-- | -- This module parses the Relapse Grammar using the Parsec Library. module Parser ( -- * Parse Grammar parseGrammar -- * Internal functions -- | These functions are exposed for testing purposes. , grammar, pattern, nameExpr, expr, idLit, bytesCastLit, stringLit, doubleCastLit, uintCastLit, intLit, ws ) where import Text.ParserCombinators.Parsec import Numeric (readDec, readOct, readHex, readFloat) import Data.Char (chr) import Expr import Patterns import ParsePatterns -- | parseGrammar parses the Relapse Grammar. parseGrammar :: String -> Either ParseError Refs parseGrammar = parse (grammar <* eof) "" infixl 4 <++> (<++>) :: CharParser () String -> CharParser () String -> CharParser () String f <++> g = (++) <$> f <*> g infixr 5 <::> (<::>) :: CharParser () Char -> CharParser () String -> CharParser () String f <::> g = (:) <$> f <*> g empty :: CharParser () String empty = return "" opt :: CharParser () Char -> CharParser () String opt p = (:"") <$> p <|> empty _lineComment :: CharParser () () _lineComment = char '/' *> many (noneOf "\n") <* char '\n' *> return () _blockComment :: CharParser () () _blockComment = char '*' *> many (noneOf "*") <* char '*' <* char '/' *> return () _comment :: CharParser () () _comment = char '/' *> (_lineComment <|> _blockComment) _ws :: CharParser () () _ws = _comment <|> () <$ space ws :: CharParser () () ws = () <$ many _ws bool :: CharParser () Bool bool = True <$ string "true" <|> False <$ string "false" _decimalLit :: CharParser () Int _decimalLit = oneOf "123456789" <::> many digit >>= _read readDec _octalLit :: CharParser () Int _octalLit = many1 octDigit >>= _read readOct _hexLit :: CharParser () Int _hexLit = many1 hexDigit >>= _read readHex _read :: ReadS a -> String -> CharParser () a _read read s = case read s of [(n, "")] -> return n ((n, ""):_) -> return n _ -> fail "digit" _optionalSign :: (Num a) => CharParser () a _optionalSign = -1 <$ char '-' <|> return 1 _signedIntLit :: CharParser () Int _signedIntLit = (*) <$> _optionalSign <*> _intLit _intLit :: CharParser () Int _intLit = _decimalLit <|> char '0' *> (_octalLit <|> (oneOf "xX" *> _hexLit) <|> return 0 ) intLit :: CharParser () Int intLit = string "int(" *> _signedIntLit <* char ')' <|> _signedIntLit "int_lit" uintCastLit :: CharParser () Int uintCastLit = string "uint(" *> _intLit <* char ')' _exponent :: CharParser () String _exponent = oneOf "eE" <::> ( oneOf "+-" <::> many1 digit <|> many1 digit) _floatLit :: CharParser () Double _floatLit = do i <- many1 digit e <- _exponent <|> ((string "." <|> empty) <++> (_exponent <|> many1 digit <++> (_exponent <|> empty) ) ) <|> empty _read readFloat (i ++ e) doubleCastLit :: CharParser () Double doubleCastLit = string "double(" *> ((*) <$> _optionalSign <*> _floatLit) <* char ')' idLit :: CharParser () String idLit = (letter <|> char '_') <::> many (alphaNum <|> char '_') _qualid :: CharParser () String _qualid = idLit <++> (concat <$> many (char '.' <::> idLit)) _bigUValue :: CharParser () Char _bigUValue = char 'U' *> do { hs <- count 8 hexDigit; n <- _read readHex hs; return $ toEnum n } _littleUValue :: CharParser () Char _littleUValue = char 'u' *> do { hs <- count 4 hexDigit; n <- _read readHex hs; return $ toEnum n } _escapedChar :: CharParser () Char _escapedChar = choice (zipWith (\c r -> r <$ char c) "abnfrtv'\\\"/" "\a\b\n\f\r\t\v\'\\\"/") _unicodeValue :: CharParser () Char _unicodeValue = (char '\\' *> (_bigUValue <|> _littleUValue <|> _hexByteUValue <|> _escapedChar <|> _octalByteUValue) ) <|> noneOf "\\\"" _interpretedString :: CharParser () String _interpretedString = between (char '"') (char '"') (many _unicodeValue) _rawString :: CharParser () String _rawString = between (char '`') (char '`') (many $ noneOf "`") stringLit :: CharParser () String stringLit = _rawString <|> _interpretedString _hexByteUValue :: CharParser () Char _hexByteUValue = char 'x' *> do { hs <- count 2 hexDigit; n <- _read readHex hs; return $ chr n } _octalByteUValue :: CharParser () Char _octalByteUValue = do { os <- count 3 octDigit; n <- _read readOct os; return $ toEnum n } _byteLit :: CharParser () Char _byteLit = do { i <- _intLit; if i > 255 then fail $ "too large for byte: " ++ show i else return $ chr i } _byteElem :: CharParser () Char _byteElem = _byteLit <|> between (char '\'') (char '\'') (_unicodeValue <|> _octalByteUValue <|> _hexByteUValue) bytesCastLit :: CharParser () String bytesCastLit = string "[]byte{" *> sepBy (ws *> _byteElem <* ws) (char ',') <* char '}' _literal :: CharParser () ParsedExpr _literal = BoolExpr . Const <$> bool <|> IntExpr . Const <$> intLit <|> UintExpr . Const <$> uintCastLit <|> DoubleExpr . Const <$> doubleCastLit <|> StringExpr . Const <$> stringLit <|> BytesExpr . Const <$> bytesCastLit _terminal :: CharParser () ParsedExpr _terminal = (char '$' *> ( BoolExpr BoolVariable <$ string "bool" <|> IntExpr IntVariable <$ string "int" <|> UintExpr UintVariable <$ string "uint" <|> DoubleExpr DoubleVariable <$ string "double" <|> StringExpr StringVariable <$ string "string" <|> BytesExpr BytesVariable <$ string "[]byte" )) <|> _literal _builtinSymbol :: CharParser () String _builtinSymbol = string "==" <|> string "!=" <|> char '<' <::> opt (char '=') <|> char '>' <::> opt (char '=') <|> string "~=" <|> string "*=" <|> string "^=" <|> string "$=" <|> string "::" check :: Either String ParsedExpr -> CharParser () ParsedExpr check (Right r) = return r check (Left l) = fail l _builtin :: CharParser () ParsedExpr _builtin = newBuiltIn <$> _builtinSymbol <*> (ws *> _expr) >>= check _function :: CharParser () ParsedExpr _function = newFunction <$> idLit <*> (char '(' *> sepBy (ws *> _expr <* ws) (char ',') <* char ')') >>= check _listType :: CharParser () String _listType = char '[' <::> char ']' <::> ( string "bool" <|> string "int" <|> string "uint" <|> string "double" <|> string "string" <|> string "[]byte" ) _mustBool :: ParsedExpr -> CharParser () (Expr Bool) _mustBool (BoolExpr e) = return e _mustBool e = fail $ "want BoolExpr, got: " ++ show e _mustInt :: ParsedExpr -> CharParser () (Expr Int) _mustInt (IntExpr e) = return e _mustInt e = fail $ "want IntExpr, got: " ++ show e _mustUint :: ParsedExpr -> CharParser () (Expr Uint) _mustUint (UintExpr e) = return e _mustUint e = fail $ "want UintExpr, got: " ++ show e _mustDouble :: ParsedExpr -> CharParser () (Expr Double) _mustDouble (DoubleExpr e) = return e _mustDouble e = fail $ "want DoubleExpr, got: " ++ show e _mustString :: ParsedExpr -> CharParser () (Expr String) _mustString (StringExpr e) = return e _mustString e = fail $ "want StringExpr, got: " ++ show e _mustBytes :: ParsedExpr -> CharParser () (Expr Bytes) _mustBytes (BytesExpr e) = return e _mustBytes e = fail $ "want BytesExpr, got: " ++ show e newList :: String -> [ParsedExpr] -> CharParser () ParsedExpr newList "[]bool" es = BoolListExpr <$> mapM _mustBool es newList "[]int" es = IntListExpr <$> mapM _mustInt es newList "[]uint" es = UintListExpr <$> mapM _mustUint es newList "[]double" es = DoubleListExpr <$> mapM _mustDouble es newList "[]string" es = StringListExpr <$> mapM _mustString es newList "[][]byte" es = BytesListExpr <$> mapM _mustBytes es _list :: CharParser () ParsedExpr _list = do { ltype <- _listType; es <- ws *> char '{' *> sepBy (ws *> _expr <* ws) (char ',') <* char '}'; newList ltype es } _expr :: CharParser () ParsedExpr _expr = try _terminal <|> _list <|> _function expr :: CharParser () (Expr Bool) expr = (try _terminal <|> _builtin <|> _function) >>= _mustBool _name :: CharParser () (Expr Bool) _name = (newBuiltIn "==" <$> (_literal <|> (StringExpr . Const <$> idLit))) >>= check >>= _mustBool sepBy2 :: CharParser () a -> String -> CharParser () [a] sepBy2 p sep = do { x1 <- p; string sep; x2 <- p; xs <- many (try (string sep *> p)); return (x1:x2:xs) } _nameChoice :: CharParser () (Expr Bool) _nameChoice = foldl1 OrFunc <$> sepBy2 (ws *> nameExpr <* ws) "|" nameExpr :: CharParser () (Expr Bool) nameExpr = (Const True <$ char '_') <|> (NotFunc <$> (char '!' *> ws *> char '(' *> ws *> nameExpr <* ws <* char ')')) <|> (char '(' *> ws *> _nameChoice <* ws <* char ')') <|> _name _concatPattern :: CharParser () Pattern _concatPattern = char '[' *> (foldl1 Concat <$> sepBy2 (ws *> pattern <* ws) ",") <* optional (char ',' <* ws) <* char ']' _interleavePattern :: CharParser () Pattern _interleavePattern = char '{' *> (foldl1 Interleave <$> sepBy2 (ws *> pattern <* ws) ";") <* optional (char ';' <* ws) <* char '}' _parenPattern :: CharParser () Pattern _parenPattern = do { char '('; ws; first <- pattern; ws; ( char ')' *> ws *> ( ZeroOrMore first <$ char '*' <|> Optional first <$ char '?' ) ) <|> ( ( (first <$ char '|' >>= _orList) <|> (first <$ char '&' >>= _andList) ) <* char ')' ) } _orList :: Pattern -> CharParser () Pattern _orList p = Or p . foldl1 Or <$> sepBy1 (ws *> pattern <* ws) (char '|') _andList :: Pattern -> CharParser () Pattern _andList p = And p . foldl1 And <$> sepBy1 (ws *> pattern <* ws) (char '&') _refPattern :: CharParser () Pattern _refPattern = Reference <$> (char '@' *> ws *> idLit) _notPattern :: CharParser () Pattern _notPattern = Not <$> (char '!' *> ws *> char '(' *> ws *> pattern <* ws <* char ')') _emptyPattern :: CharParser () Pattern _emptyPattern = Empty <$ string "" _zanyPattern :: CharParser () Pattern _zanyPattern = ZAny <$ string "*" _containsPattern :: CharParser () Pattern _containsPattern = Contains <$> (char '.' *> pattern) _treenodePattern :: CharParser () Pattern _treenodePattern = Node <$> nameExpr <*> ( ws *> ( try (char ':' *> ws *> pattern) <|> _depthPattern ) ) _depthPattern :: CharParser () Pattern _depthPattern = _concatPattern <|> _interleavePattern <|> _containsPattern <|> flip Node Empty <$> ( (string "->" *> expr ) <|> (_builtin >>= _mustBool) ) newContains :: CharParser () ParsedExpr -> CharParser () Pattern newContains e = flip Node Empty <$> ((newBuiltIn "*=" <$> e) >>= check >>= _mustBool) pattern :: CharParser () Pattern pattern = char '*' *> ( (char '=' *> (newContains (ws *> _expr))) <|> return ZAny ) <|> _parenPattern <|> _refPattern <|> try _emptyPattern <|> try _treenodePattern <|> try _depthPattern <|> _notPattern _patternDecl :: CharParser () Refs _patternDecl = newRef <$> (char '#' *> ws *> idLit) <*> (ws *> char '=' *> ws *> pattern) grammar :: CharParser () Refs grammar = ws *> (foldl1 union <$> many1 (_patternDecl <* ws)) <|> union <$> (newRef "main" <$> pattern) <*> (foldl union emptyRef <$> many (ws *> _patternDecl <* ws))