module Hydrogen.Syntax.Parser where
import Hydrogen.Prelude hiding ((<|>), many)
import Hydrogen.Syntax.Types
import Hydrogen.Parsing.Char hiding (token, tokens, space)
tokenize :: String -> Parser String (Tokens Token)
tokenize srcName = either mkError sanitize . runIdentity . runParserT tokens () srcName
where
tokens = many (liftA2 (,) getPosition token) <* eof
token = choice [
TIndent <$> indent
, something >>= \x -> choice [
TString x '\'' <$> chars3 "'''"
, TString x '\"' <$> chars3 "\"\"\""
, TString x '`' <$> chars3 "```"
, TString x '\'' <$> chars' '\''
, TString x '\"' <$> chars' '\"'
, TString x '`' <$> chars' '`'
, TSomething <$> return x
]
, TString "" '\'' <$> chars3 "'''"
, TString "" '\"' <$> chars3 "\"\"\""
, TString "" '`' <$> chars3 "```"
, TString "" '\'' <$> chars '\''
, TString "" '\"' <$> chars '\"'
, TString "" '`' <$> chars' '`'
, const TSpaces <$> many1 (char ' ')
, const TComma <$> char ','
, const TSemicolon <$> char ';'
, TBraceOpen "" <$> oneOf "([{"
, TBraceClose <$> oneOf ")]}"
]
something = many1 (noneOf ("([{)]}\'\"`;, " ++ ['\0' .. '\x1F']))
indent = many1 (try (many space >> (nl <|> comment))) *> (length <$> many space)
where
space = char ' '
nl = string "\n"
comment = string ";;" >> many (noneOf "\n") <* char '\n'
chars' d = inBetween (char d) (many (try (char '\\' >> char d) <|> noneOf (d : "\n")))
chars d = inBetween (char d) (many (noneOf (d : "\\\n") <|> escape))
chars3 d = manyBetween (try (string d)) (try (string d)) (noneOf "\\" <|> escape)
inBetween d p = d *> p <* d
escape = char '\\' >> choice [
oneOf "\\\'\"?`"
, special <$> oneOf (map fst escapes)
, char 'x' >> (convert <$> count 2 hexDigit)
, char 'u' >> (convert <$> count 4 hexDigit)
, char 'U' >> (convert <$> count 8 hexDigit)
]
where
special = fromJust . flip lookup escapes
escapes = [
('0', '\0'),
('a', '\a'),
('b', '\b'),
('f', '\f'),
('n', '\n'),
('r', '\r'),
('t', '\t'),
('v', '\v') ]
convert str = let [(hex, _)] = readHex str in toEnum hex
sanitize :: [(SourcePos, Token)] -> Either SomethingBad (Tokens Token)
sanitize (t1@(p1, v1) : ts@(_t2@(p2, v2) : tss)) = case (v1, v2) of
(TSomething k, TBraceOpen _ t) -> sanitize ((p1, TBraceOpen k t) : tss)
(TString _ _ _, TSomething _) -> Left [(p2, "")]
(TString _ _ _, TBraceOpen _ _) -> Left [(p2, "")]
(TBraceClose _, TSomething _) -> Left [(p2, "")]
(TBraceClose _, TString _ _ _) -> Left [(p2, "")]
(TBraceClose _, TBraceOpen _ _) -> Left [(p2, "")]
(TBraceOpen _ _, TIndent _) -> sanitize (t1 : tss)
(TBraceOpen _ _, TSpaces) -> sanitize (t1 : tss)
(TBraceOpen _ _, _) ->
fmap ([t1, (p2, TIndent (sourceColumn p2 1))] ++) (sanitize ts)
(TSpaces, _) -> sanitize ts
(TIndent _, TBraceOpen _ '{') -> sanitize ts
(TIndent _, TBraceClose _) -> sanitize ts
(TIndent _, TSemicolon) -> sanitize ts
(TSemicolon, TIndent _) -> sanitize (t1 : tss)
_ -> fmap (t1 :) (sanitize ts)
sanitize [t@(_, v)] = Right $ case v of
TSpaces -> []
TIndent _ -> []
TSemicolon -> [t]
_ -> [t]
sanitize [] = Right []
layout :: Parser (Tokens Token) POPs
layout = runTokenParser (many (liftA2 (,) getPosition (someToken 0)) <* eof)
where
someToken :: Monad m => Int -> ParsecT [(SourcePos, Token)] u m POP
someToken i = choice [
sourceToken $ \case
TIndent i' | i' <= i -> Just (Token SomethingT "" ";;")
TString k '\'' v -> Just (Token AposString k v)
TString k '"' v -> Just (Token QuotString k v)
TString k '`' v -> Just (Token TickString k v)
TComma -> Just (Token SomethingT "" ",")
TSemicolon -> Just (Token SomethingT "" ";")
TSomething v -> Just (Token SomethingT "" v)
_ -> Nothing
, do
(k, c) <- sourceToken $ \case
TBraceOpen k c -> curry Just k c
_ -> Nothing
i' <- sourceToken $ \(TIndent indent) -> Just indent
ts <- many (liftA2 (,) getPosition (someToken i'))
bt <- case c of
'(' -> sourceToken $ \case TBraceClose ')' -> Just Grouping ; _ -> Nothing
'[' -> sourceToken $ \case TBraceClose ']' -> Just Brackets ; _ -> Nothing
'{' -> sourceToken $ \case TBraceClose '}' -> Just Mustache ; _ -> Nothing
_ -> error "Popsicle!"
return (Block bt k ts)
] <* skipMany (sourceToken lineContinuation)
where
lineContinuation = \case TIndent i' | i' > i -> Just () ; _ -> Nothing
parse :: String -> Parser String POPs
parse srcName = tokenize srcName >+> layout