module Hydrogen.Syntax.Parser.TokenParser where

import Hydrogen.Prelude hiding ((<|>), many)
import Hydrogen.Syntax.Types

import Hydrogen.Parsing.Char hiding (token, tokens, space)


data TokenizerConfig = TokenizerConfig {
    cfgSpaces :: [Char]
  , cfgEscapes :: [(Char, Char)]
  , cfgComments :: String
  , cfgOpeningBraces :: [Char]
  , cfgClosingBraces :: [Char]
  , cfgControlCharacters :: [Char]
  , cfgSpecialCharacters :: [Char]
  , cfgSpecialSemantics :: [Char]
  , cfgFoldSpecials :: [Char]
} deriving (Eq, Show)

instance Default TokenizerConfig where
    def = TokenizerConfig {
        cfgSpaces = " "
      , cfgEscapes = [
            ('\\', '\\'),
            ('\'', '\''),
            ('"', '"'),
            ('`', '`'),
            ('0', '\0'),
            ('a', '\a'),
            ('b', '\b'),
            ('f', '\f'),
            ('n', '\n'),
            ('r', '\r'),
            ('t', '\t'),
            ('v', '\v') ]
      , cfgComments = ";;"
      , cfgOpeningBraces = "([{"
      , cfgClosingBraces = ")]}"
      , cfgControlCharacters = ['\0' .. '\x1F']
      , cfgSpecialCharacters = ",;"
      , cfgSpecialSemantics = "{;"
      , cfgFoldSpecials = ";"
    }

parseTokens :: TokenizerConfig -> String -> Parser String (Tokens Token)
parseTokens cfg src = either mkError sanitize . runIdentity . runParserT tokens () src
  where
    TokenizerConfig { .. } = cfg

    tokens = many (liftA2 (,) getPosition token) <* eof

    token = choice [
        TIndent <$> indent
      , something >>= \x -> choice [
          TString x '\'' <$> chars3 "'''"
        , TString x '\"' <$> chars3 "\"\"\""
        , TString x '`' <$> chars3 "```"
        , TString x '\'' <$> chars' '\''
        , TString x '\"' <$> chars' '\"'
        , TString x '`' <$> chars' '`'
        , TSomething <$> return x
        ]
      , TString "" '\'' <$> chars3 "'''"
      , TString "" '\"' <$> chars3 "\"\"\""
      , TString "" '`' <$> chars3 "```"
      , TString "" '\'' <$> chars '\''
      , TString "" '\"' <$> chars '\"'
      , TString "" '`' <$> chars' '`'
      , const TSpaces <$> many1 space
      , TSpecial <$> oneOf cfgSpecialCharacters
      , TBraceOpen "" <$> oneOf cfgOpeningBraces
      , TBraceClose <$> oneOf cfgClosingBraces
      ]

    space = oneOf cfgSpaces

    forbidden = join [
        cfgOpeningBraces
      , cfgClosingBraces
      , "\'\"`"
      , cfgSpecialCharacters
      , cfgSpaces
      , cfgControlCharacters
      ]

    something = many1 (noneOf forbidden)

    indent = many1 (try (many space >> (nl <|> comment))) *> (length <$> many space)
      where
        nl = string "\n"
        comment = try (string cfgComments >> many (noneOf "\n") <* char '\n')

    chars' d = inBetween (char d) (many (try (char '\\' >> char d) <|> noneOf (d : "\n")))

    chars d = inBetween (char d) (many (noneOf (d : "\\\n") <|> escape))

    chars3 d = manyBetween (try (string d)) (try (string d)) (noneOf "\\" <|> escape)

    inBetween d p = d *> p <* d

    escape = char '\\' >> choice [
          special <$> oneOf (map fst cfgEscapes)
        , char 'x' >> (convert <$> count 2 hexDigit)
        , char 'u' >> (convert <$> count 4 hexDigit)
        , char 'U' >> (convert <$> count 8 hexDigit)
        ]
      where
        special = fromJust . flip lookup cfgEscapes
        convert str = let [(hex, _)] = readHex str in toEnum hex

    sanitize :: [(SourcePos, Token)] -> Either SomethingBad (Tokens Token)

    sanitize (t1@(p1, v1) : ts@(_t2@(p2, v2) : tss)) = case (v1, v2) of

        (TSomething k, TBraceOpen _ t) -> sanitize ((p1, TBraceOpen k t) : tss)

        (TString _ _ _, TSomething _) -> Left [(p2, "")]
        (TString _ _ _, TBraceOpen _ _) -> Left [(p2, "")]

        (TBraceClose _, TSomething _) -> Left [(p2, "")]
        (TBraceClose _, TString _ _ _) -> Left [(p2, "")]
        (TBraceClose _, TBraceOpen _ _) -> Left [(p2, "")]
        
        (TBraceOpen _ _, TIndent _) -> sanitize (t1 : tss)
        (TBraceOpen _ _, TSpaces) -> sanitize (t1 : tss)
        (TBraceOpen _ _, _) ->
            fmap ([t1, (p2, TIndent (sourceColumn p2 - 1))] ++) (sanitize ts)

        (TSpaces, _) -> sanitize ts
        (TSpecial _, TSpaces) -> sanitize (t1 : tss) 
        (TSpecial x1, TSpecial x2) | x1 == x2 && x1 `elem` cfgFoldSpecials -> sanitize ts

        (TIndent _, TBraceOpen _ b) | b `elem` cfgSpecialSemantics -> sanitize ts
        (TIndent _, TBraceClose _) -> sanitize ts
        (TIndent _, TSpecial x) | x `elem` cfgSpecialSemantics -> sanitize ts

        (TSpecial x, TIndent _) | x `elem` cfgSpecialSemantics -> sanitize (t1 : tss)

        _ -> fmap (t1 :) (sanitize ts)

    sanitize [t@(_, v)] = Right $ case v of
        TSpaces -> []
        TIndent _ -> []
        TSpecial x | x `elem` cfgSpecialSemantics -> []
        _ -> [t]

    sanitize [] = Right []