{-# LANGUAGE CPP, OverloadedStrings #-} {-# OPTIONS_GHC -Wall -fwarn-tabs #-} module Language.Hakaru.Parser.Parser where import Prelude hiding (Real) #if __GLASGOW_HASKELL__ < 710 import Data.Functor ((<$>), (<$)) import Control.Applicative (Applicative(..)) #endif import qualified Control.Monad as M import Data.Functor.Identity import Data.Text (Text) import qualified Data.Text as Text import Data.Ratio ((%)) import Data.Char (digitToInt) import Text.Parsec hiding (Empty) import Text.Parsec.Text () -- instances only import Text.Parsec.Indentation import Text.Parsec.Indentation.Char import qualified Text.Parsec.Indentation.Token as ITok import qualified Text.Parsec.Expr as Ex import qualified Text.Parsec.Token as Tok import Language.Hakaru.Parser.AST ops, types, names :: [String] ops = ["+","*","-","^", "**", ":",".", "<~","==", "=", "_", "<|>"] types = ["->"] names = ["def", "fn", "if", "else", "∞", "expect", "observe", "return", "match", "integrate", "summate", "product", "data", "import"] type ParserStream = IndentStream (CharIndentStream Text) type Parser = ParsecT ParserStream () Identity type Operator a = Ex.Operator ParserStream () Identity a type OperatorTable a = [[Operator a]] style :: Tok.GenLanguageDef ParserStream st Identity style = ITok.makeIndentLanguageDef $ Tok.LanguageDef { Tok.commentStart = "" , Tok.commentEnd = "" , Tok.nestedComments = True , Tok.identStart = letter <|> char '_' , Tok.identLetter = alphaNum <|> oneOf "_'" , Tok.opStart = oneOf "!$%&*+./<=>?@\\^|-~" , Tok.opLetter = oneOf "!$%&*+./<=>?@\\^|-~" , Tok.caseSensitive = True , Tok.commentLine = "#" , Tok.reservedOpNames = ops ++ types , Tok.reservedNames = names } comments :: Parser () comments = string "#" *> manyTill anyChar newline *> return () emptyLine :: Parser () emptyLine = newline *> return () lexer :: Tok.GenTokenParser ParserStream () Identity lexer = ITok.makeTokenParser style whiteSpace :: Parser () whiteSpace = Tok.whiteSpace lexer decimal :: Parser Integer decimal = Tok.decimal lexer integer :: Parser Integer integer = Tok.integer lexer float :: Parser Rational float = (decimal >>= fractExponent) <* whiteSpace fractFloat :: Integer -> Parser (Either Integer Rational) fractFloat n = fractExponent n >>= return . Right fractExponent :: Integer -> Parser Rational fractExponent n = do{ fract <- fraction ; expo <- option 1 exponent' ; return ((fromInteger n + fract)*expo) } <|> do{ expo <- exponent' ; return ((fromInteger n)*expo) } fraction :: Parser Rational fraction = do{ _ <- char '.' ; digits <- many1 digit "fraction" ; return (foldr op 0 digits) } "fraction" where op d f = (f + fromIntegral (digitToInt d))/10 exponent' :: Parser Rational exponent' = do{ _ <- oneOf "eE" ; f <- sign ; e <- decimal "exponent" ; return (power (f e)) } "exponent" where power e | e < 0 = 1.0/power(-e) | otherwise = fromInteger (10^e) sign = (char '-' >> return negate) <|> (char '+' >> return id) <|> return id parens :: Parser a -> Parser a parens = Tok.parens lexer . localIndentation Any braces :: Parser a -> Parser a braces = Tok.parens lexer . localIndentation Any brackets :: Parser a -> Parser a brackets = Tok.brackets lexer . localIndentation Any commaSep :: Parser a -> Parser [a] commaSep = Tok.commaSep lexer semiSep :: Parser a -> Parser [a] semiSep = Tok.semiSep lexer semiSep1 :: Parser a -> Parser [a] semiSep1 = Tok.semiSep1 lexer identifier :: Parser Text identifier = M.liftM Text.pack $ Tok.identifier lexer reserved :: String -> Parser () reserved = Tok.reserved lexer reservedOp :: String -> Parser () reservedOp = Tok.reservedOp lexer symbol :: Text -> Parser Text symbol = M.liftM Text.pack . Tok.symbol lexer . Text.unpack app1 :: Text -> AST' Text -> AST' Text app1 s x@(WithMeta _ m) = WithMeta (Var s `App` x) m app1 s x = Var s `App` x app2 :: Text -> AST' Text -> AST' Text -> AST' Text app2 s x y = Var s `App` x `App` y -- | Smart constructor for divide divide :: AST' Text -> AST' Text -> AST' Text divide (ULiteral x') (ULiteral y') = ULiteral (go x' y') where go :: Literal' -> Literal' -> Literal' go (Nat x) (Nat y) = Prob (x % y) go x y = Real (litToRat x / litToRat y) litToRat :: Literal' -> Rational litToRat (Nat x) = toRational x litToRat (Int x) = toRational x litToRat (Prob x) = toRational x litToRat (Real x) = toRational x divide x y = NaryOp Prod [x, app1 "recip" y] binop :: Text -> AST' Text -> AST' Text -> AST' Text binop s x y | s == "+" = NaryOp Sum [x, y] | s == "-" = NaryOp Sum [x, app1 "negate" y] | s == "*" = NaryOp Prod [x, y] | s == "/" = x `divide` y | s == "<" = app2 "less" x y | s == ">" = app2 "less" y x | s == "==" = app2 "equal" x y | s == "<=" = NaryOp Or [ app2 "less" x y , app2 "equal" x y] | s == ">=" = NaryOp Or [ app2 "less" y x , app2 "equal" x y] | s == "&&" = NaryOp And [x, y] | s == "<|>" = Msum [x, y] | otherwise = app2 s x y binary :: String -> Ex.Assoc -> Operator (AST' Text) binary s = Ex.Infix (binop (Text.pack s) <$ reservedOp s) prefix :: String -> (a -> a) -> Operator a prefix s f = Ex.Prefix (f <$ reservedOp s) postfix :: Parser (a -> a) -> Operator a postfix p = Ex.Postfix . chainl1 p . return $ flip (.) table :: OperatorTable (AST' Text) table = [ [ postfix array_index ] , [ prefix "+" id ] , [ binary "^" Ex.AssocRight , binary "**" Ex.AssocRight] , [ binary "*" Ex.AssocLeft , binary "/" Ex.AssocLeft] , [ binary "+" Ex.AssocLeft , binary "-" Ex.AssocLeft , prefix "-" (app1 "negate")] -- TODO: add "<=", ">=", "/=" -- TODO: do you *really* mean AssocLeft? Shouldn't they be non-assoc? , [ postfix ann_expr ] , [ binary "<|>" Ex.AssocRight] , [ binary "<" Ex.AssocLeft , binary ">" Ex.AssocLeft , binary "<=" Ex.AssocLeft , binary ">=" Ex.AssocLeft , binary "==" Ex.AssocLeft] , [ binary "&&" Ex.AssocLeft]] unit_ :: Parser (AST' a) unit_ = Unit <$ symbol "()" empty_ :: Parser (AST' a) empty_ = Empty <$ symbol "[]" int :: Parser (AST' a) int = do n <- integer return $ if n < 0 then ULiteral $ Int n else ULiteral $ Nat n floating :: Parser (AST' a) floating = do sign <- option '+' (oneOf "+-") n <- float return $ case sign of '-' -> ULiteral $ Real (negate n) '+' -> ULiteral $ Prob n _ -> error "floating: the impossible happened" inf_ :: Parser (AST' Text) inf_ = reserved "∞" *> return Infinity' var :: Parser (AST' Text) var = Var <$> identifier pairs :: Parser (AST' Text) pairs = foldr1 Pair <$> parens (commaSep expr) type_var :: Parser TypeAST' type_var = TypeVar <$> identifier type_app :: Parser TypeAST' type_app = TypeApp <$> identifier <*> parens (commaSep type_expr) type_fun :: Parser TypeAST' type_fun = chainr1 ( try type_app <|> try type_var <|> parens type_fun) (TypeFun <$ reservedOp "->") type_expr :: Parser TypeAST' type_expr = try type_fun <|> try type_app <|> try type_var <|> parens type_expr ann_expr :: Parser (AST' Text -> AST' Text) ann_expr = reservedOp "." *> (flip Ann <$> type_expr) pdat_expr :: Parser (PDatum Text) pdat_expr = DV <$> identifier <*> parens (commaSep pat_expr) pat_expr :: Parser (Pattern' Text) pat_expr = try (PData' <$> pdat_expr) <|> (PData' <$> (DV "pair" <$> parens (commaSep pat_expr))) <|> (PWild' <$ reservedOp "_") <|> (PVar' <$> identifier) -- | Blocks are indicated by colons, and must be indented. blockOfMany :: Parser a -> Parser [a] blockOfMany p = do reservedOp ":" localIndentation Gt (many $ absoluteIndentation p) -- | Semiblocks are like blocks, but indentation is optional. Also, -- there are only 'expr' semiblocks. semiblockExpr :: Parser (AST' Text) semiblockExpr = reservedOp ":" *> localIndentation Ge expr -- | Pseudoblocks seem like semiblocks, but actually they aren't -- indented. -- -- TODO: do we actually want this in our grammar, or did we really -- mean to use 'semiblockExpr' instead? pseudoblockExpr :: Parser (AST' Text) pseudoblockExpr = reservedOp ":" *> expr branch_expr :: Parser (Branch' Text) branch_expr = Branch' <$> pat_expr <*> semiblockExpr match_expr :: Parser (AST' Text) match_expr = reserved "match" *> (Case <$> expr <*> blockOfMany branch_expr ) integrate_expr :: Parser (AST' Text) integrate_expr = reserved "integrate" *> (Integrate <$> identifier <* symbol "from" <*> expr <* symbol "to" <*> expr <*> semiblockExpr ) summate_expr :: Parser (AST' Text) summate_expr = reserved "summate" *> (Summate <$> identifier <* symbol "from" <*> expr <* symbol "to" <*> expr <*> semiblockExpr ) product_expr :: Parser (AST' Text) product_expr = reserved "product" *> (Product <$> identifier <* symbol "from" <*> expr <* symbol "to" <*> expr <*> semiblockExpr ) expect_expr :: Parser (AST' Text) expect_expr = reserved "expect" *> (Expect <$> identifier <*> expr <*> semiblockExpr ) observe_expr :: Parser (AST' Text) observe_expr = reserved "observe" *> (Observe <$> expr <*> expr ) array_expr :: Parser (AST' Text) array_expr = reserved "array" *> (Array <$> identifier <* symbol "of" <*> expr <*> semiblockExpr ) array_index :: Parser (AST' Text -> AST' Text) array_index = flip Index <$> brackets expr array_literal :: Parser (AST' Text) array_literal = checkEmpty <$> brackets (commaSep expr) where checkEmpty [] = Empty checkEmpty xs = Array "" (ULiteral . Nat . fromIntegral . length $ xs) (go 0 xs) go _ [] = error "the impossible happened" go _ [x] = x go n (x:xs) = If (Var "equal" `App` (Var "") `App` (ULiteral $ Nat n)) x (go (n + 1) xs) plate_expr :: Parser (AST' Text) plate_expr = reserved "plate" *> (Plate <$> identifier <* symbol "of" <*> expr <*> semiblockExpr ) chain_expr :: Parser (AST' Text) chain_expr = reserved "chain" *> (Chain <$> identifier <*> expr <*> expr <*> semiblockExpr ) if_expr :: Parser (AST' Text) if_expr = reserved "if" *> (If <$> localIndentation Ge expr <*> semiblockExpr <* reserved "else" <*> semiblockExpr ) lam_expr :: Parser (AST' Text) lam_expr = reserved "fn" *> (Lam <$> identifier <*> type_expr <*> semiblockExpr ) bind_expr :: Parser (AST' Text) bind_expr = Bind <$> identifier <* reservedOp "<~" <*> expr <*> expr let_expr :: Parser (AST' Text) let_expr = Let <$> identifier <* reservedOp "=" <*> expr <*> expr def_expr :: Parser (AST' Text) def_expr = do reserved "def" name <- identifier vars <- parens (commaSep defarg) bodyTyp <- optionMaybe type_expr body <- semiblockExpr let body' = foldr (\(var', varTyp) e -> Lam var' varTyp e) body vars typ = foldr TypeFun <$> bodyTyp <*> return (map snd vars) Let name (maybe id (flip Ann) typ body') <$> expr -- the \"rest\"; i.e., where the 'def' is in scope defarg :: Parser (Text, TypeAST') defarg = (,) <$> identifier <*> type_expr call_expr :: Parser (AST' Text) call_expr = foldl App <$> (Var <$> identifier) <*> parens (commaSep expr) return_expr :: Parser (AST' Text) return_expr = do reserved "return" <|> reserved "dirac" Dirac <$> expr term :: Parser (AST' Text) term = try if_expr <|> try return_expr <|> try lam_expr <|> try def_expr <|> try match_expr -- <|> try data_expr <|> try integrate_expr <|> try summate_expr <|> try product_expr <|> try expect_expr <|> try observe_expr <|> try array_expr <|> try plate_expr <|> try chain_expr <|> try let_expr <|> try bind_expr <|> try call_expr <|> try array_literal <|> try floating <|> try inf_ <|> try unit_ <|> try empty_ <|> try int <|> try var <|> try pairs <|> parens expr "an expression" expr :: Parser (AST' Text) expr = withPos (Ex.buildExpressionParser table (withPos term) "an expression") indentConfig :: Text -> ParserStream indentConfig = mkIndentStream 0 infIndentation True Ge . mkCharIndentStream parseHakaru :: Text -> Either ParseError (AST' Text) parseHakaru = runParser (skipMany (comments <|> emptyLine) *> expr <* eof) () "" . indentConfig parseHakaruWithImports :: Text -> Either ParseError (ASTWithImport' Text) parseHakaruWithImports = runParser (skipMany (comments <|> emptyLine) *> exprWithImport <* eof) () "" . indentConfig withPos :: Parser (AST' a) -> Parser (AST' a) withPos x = do s <- getPosition x' <- x e <- getPosition return $ WithMeta x' (SourceSpan s e) data_expr :: Parser (AST' Text) data_expr = reserved "data" *> (Data <$> identifier <* parens (commaSep identifier) -- TODO: why throw them away? <*> blockOfMany (try type_app <|> type_var) ) import_expr :: Parser (Import Text) import_expr = reserved "import" *> (Import <$> identifier) exprWithImport :: Parser (ASTWithImport' Text) exprWithImport = ASTWithImport' <$> (many import_expr) <*> expr