module Text.Peggy.Parser ( syntax, ) where import Control.Applicative import Data.Char import Numeric import Text.Parsec hiding ((<|>), many) import Text.Parsec.String import Text.Peggy.Syntax syntax :: Parser Syntax syntax = many definition <* skips <* eof definition :: Parser Definition definition = try (Definition <$> identifier <* symbol ":::" <*> haskellType <* symbol "=" <*> (Token <$> expr)) <|> (Definition <$> identifier <* symbol "::" <*> haskellType <* symbol "=" <*> expr) "definition" expr :: Parser Expr expr = choiceExpr choiceExpr :: Parser Expr choiceExpr = sepBy1 semanticExpr (symbol "/") >>= \es -> case es of [e] -> pure e _ -> pure $ Choice es "choice expr" semanticExpr :: Parser Expr semanticExpr = sequenceExpr >>= \e -> option e $ Semantic e <$> (symbol "{" *> codeFragment <* symbol "}") sequenceExpr :: Parser Expr sequenceExpr = some (try (namedExpr <* notFollowedBy (symbol "::" <|> symbol "="))) >>= \es -> case es of [e] -> pure e _ -> pure $ Sequence es namedExpr :: Parser Expr namedExpr = try (Named <$> identifier <* symbol ":" <*> suffixExpr) <|> suffixExpr suffixExpr :: Parser Expr suffixExpr = prefixExpr >>= go where go e = option e (symbol "*" *> go (Many e) <|> symbol "+" *> go (Some e) <|> symbol "?" *> go (Optional e)) prefixExpr :: Parser Expr prefixExpr = (And <$ symbol "?" <*> primExpr) <|> (Not <$ symbol "!" <*> primExpr) <|> primExpr primExpr :: Parser Expr primExpr = terminals <|> (TerminalCmp <$> set "[^") <|> (TerminalSet <$> set "[") <|> (TerminalAny <$ symbol ".") <|> (NonTerminal <$> identifier) <|> try (SepBy <$ symbol "(" <*> expr <* symbol "," <*> expr <* symbol ")") <|> try (SepBy1 <$ symbol "(" <*> expr <* symbol ";" <*> expr <* symbol ")") <|> symbol "(" *> expr <* symbol ")" "primitive expression" terminals :: Parser Expr terminals = lexeme (do b <- oneOf "\"\'" s <- many charLit e <- oneOf "\"\'" return $ Terminals (b=='\"') (e=='\"') s) "terminals" charLit :: Parser Char charLit = escaped <|> noneOf "\"\'" where escaped = char '\\' >> escChar escChar :: Parser Char escChar = ('\n' <$ char 'n' ) <|> ('\r' <$ char 'r' ) <|> ('\t' <$ char 't' ) <|> ('\\' <$ char '\\') <|> ('\"' <$ char '\"') <|> ('\'' <$ char '\'') <|> (chr . fst . head . readHex <$ char 'x' <*> count 2 hexDigit) set :: String -> Parser [CharRange] set st = lexeme $ string st *> many range <* char ']' range :: Parser CharRange range = try (CharRange <$> rchar <* char '-' <*> rchar) <|> (CharOne <$> rchar) where rchar = escaped <|> noneOf "]" escaped = char '\\' >> (escChar <|> (']' <$ char ']') <|> ('^' <$ char '^') <|> ('-' <$ char '-')) haskellType :: Parser HaskellType haskellType = some (noneOf "=") "type signature" codeFragment :: Parser CodeFragment codeFragment = many codePart "code fragment" codePart :: Parser CodePart codePart = try argument <|> Snippet <$> some (try (notFollowedBy argument >> noneOf "}")) argument :: Parser CodePart argument = try $ Argument <$ char '$' <*> number where number = read <$> some digit -- identifier :: Parser String identifier = lexeme ((:) <$> startChar <*> many subsequentChar) "identifier" where startChar = char '_' <|> letter subsequentChar = startChar <|> digit symbol :: String -> Parser String symbol s = lexeme (string s) "symbol: " ++ s lexeme :: Parser a -> Parser a lexeme p = try $ skips *> p skips :: Parser () skips = () <$ many ((() <$ space) <|> comment) comment :: Parser () comment = lineComment <|> regionComment "comment" lineComment :: Parser () lineComment = () <$ try (string "--") <* manyTill anyChar (char '\n') regionComment :: Parser () regionComment = () <$ try (string "{-") <* com <* string "-}" where com = () <$ many (regionComment <|> (notFollowedBy (string "-}") <* anyChar))