module Hydrogen.Syntax.Parser where

import Hydrogen.Prelude hiding ((<|>), many)
import Hydrogen.Syntax.Types

import Hydrogen.Parsing.Char hiding (token, tokens, space)


tokenize :: String -> Parser String (Tokens Token)
tokenize srcName = either mkError sanitize . runIdentity . runParserT tokens () srcName
  where
    tokens = many (liftA2 (,) getPosition token) <* eof

    token = choice [
        TIndent <$> indent
      , something >>= \x -> choice [
          TString x '\'' <$> chars3 "'''"
        , TString x '\"' <$> chars3 "\"\"\""
        , TString x '`' <$> chars3 "```"
        , TString x '\'' <$> chars' '\''
        , TString x '\"' <$> chars' '\"'
        , TString x '`' <$> chars' '`'
        , TSomething <$> return x
        ]
      , TString "" '\'' <$> chars3 "'''"
      , TString "" '\"' <$> chars3 "\"\"\""
      , TString "" '`' <$> chars3 "```"
      , TString "" '\'' <$> chars '\''
      , TString "" '\"' <$> chars '\"'
      , TString "" '`' <$> chars' '`'
      , const TSpaces <$> many1 (char ' ')
      , const TComma <$> char ','
      , const TSemicolon <$> char ';'
      , TBraceOpen "" <$> oneOf "([{"
      , TBraceClose <$> oneOf ")]}"
      ]

    something = many1 (noneOf ("([{)]}\'\"`;, " ++ ['\0' .. '\x1F']))

    indent = many1 (try (many space >> (nl <|> comment))) *> (length <$> many space)
      where
        space = char ' '
        nl = string "\n"
        comment = string ";;" >> many (noneOf "\n") <* char '\n'

    chars' d = inBetween (char d) (many (try (char '\\' >> char d) <|> noneOf (d : "\n")))

    chars d = inBetween (char d) (many (noneOf (d : "\\\n") <|> escape))

    chars3 d = manyBetween (try (string d)) (try (string d)) (noneOf "\\" <|> escape)

    inBetween d p = d *> p <* d

    escape = char '\\' >> choice [
          oneOf "\\\'\"?`"
        , special <$> oneOf (map fst escapes)
        , char 'x' >> (convert <$> count 2 hexDigit)
        , char 'u' >> (convert <$> count 4 hexDigit)
        , char 'U' >> (convert <$> count 8 hexDigit) -- like Python
        ]
      where
        special = fromJust . flip lookup escapes
        escapes = [
            ('0', '\0'),
            ('a', '\a'),
            ('b', '\b'),
            ('f', '\f'),
            ('n', '\n'),
            ('r', '\r'),
            ('t', '\t'),
            ('v', '\v') ]
        convert str = let [(hex, _)] = readHex str in toEnum hex

    sanitize :: [(SourcePos, Token)] -> Either SomethingBad (Tokens Token)

    sanitize (t1@(p1, v1) : ts@(_t2@(p2, v2) : tss)) = case (v1, v2) of

        (TSomething k, TBraceOpen _ t) -> sanitize ((p1, TBraceOpen k t) : tss)

        (TString _ _ _, TSomething _) -> Left [(p2, "")]
        (TString _ _ _, TBraceOpen _ _) -> Left [(p2, "")]

        (TBraceClose _, TSomething _) -> Left [(p2, "")]
        (TBraceClose _, TString _ _ _) -> Left [(p2, "")]
        (TBraceClose _, TBraceOpen _ _) -> Left [(p2, "")]
        
        (TBraceOpen _ _, TIndent _) -> sanitize (t1 : tss)
        (TBraceOpen _ _, TSpaces) -> sanitize (t1 : tss)
        (TBraceOpen _ _, _) ->
            fmap ([t1, (p2, TIndent (sourceColumn p2 - 1))] ++) (sanitize ts)

        (TSpaces, _) -> sanitize ts

        (TIndent _, TBraceOpen _ '{') -> sanitize ts
        (TIndent _, TBraceClose _) -> sanitize ts
        (TIndent _, TSemicolon) -> sanitize ts

        (TSemicolon, TIndent _) -> sanitize (t1 : tss)

        _ -> fmap (t1 :) (sanitize ts)

    sanitize [t@(_, v)] = Right $ case v of
        TSpaces -> []
        TIndent _ -> []
        TSemicolon -> [t]
        _ -> [t]

    sanitize [] = Right []

layout :: Parser (Tokens Token) POPs
layout = runTokenParser (many (liftA2 (,) getPosition (someToken 0)) <* eof)
  where
    someToken :: Monad m => Int -> ParsecT [(SourcePos, Token)] u m POP
    someToken i = choice [
        sourceToken $ \case

            TIndent i' | i' <= i -> Just (Token SomethingT "" ";;")

            TString k '\'' v -> Just (Token AposString k v)
            TString k '"'  v -> Just (Token QuotString k v)
            TString k '`'  v -> Just (Token TickString k v)

            TComma -> Just (Token SomethingT "" ",")
            TSemicolon -> Just (Token SomethingT "" ";")
            TSomething v -> Just (Token SomethingT "" v)

            _ -> Nothing

      , do
            (k, c) <- sourceToken $ \case
                TBraceOpen k c -> curry Just k c
                _ -> Nothing
            i' <- sourceToken $ \(TIndent indent) -> Just indent
            ts <- many (liftA2 (,) getPosition (someToken i'))
            bt <- case c of
                '(' -> sourceToken $ \case TBraceClose ')' -> Just Grouping ; _ -> Nothing
                '[' -> sourceToken $ \case TBraceClose ']' -> Just Brackets ; _ -> Nothing
                '{' -> sourceToken $ \case TBraceClose '}' -> Just Mustache ; _ -> Nothing
                _ -> error "Popsicle!"
            return (Block bt k ts)

      ] <* skipMany (sourceToken lineContinuation)
      where
        lineContinuation = \case TIndent i' | i' > i -> Just () ; _ -> Nothing

parse :: String -> Parser String POPs
parse srcName = tokenize srcName >+> layout