{-# LANGUAGE TupleSections #-} {- | Module : Language.Egison.ParserNonS Copyright : Satoshi Egi Licence : MIT This module provides the new parser of Egison. -} module Language.Egison.ParserNonS ( -- * Parse a string readTopExprs , readTopExpr , readExprs , readExpr , parseTopExprs , parseTopExpr , parseExprs , parseExpr -- * Parse a file , loadLibraryFile , loadFile ) where import Control.Applicative (pure, (*>), (<$>), (<$), (<*), (<*>)) import Control.Monad.Except (liftIO, throwError) import Control.Monad.State (unless) import Data.Char (isAsciiUpper, isLetter) import Data.Functor (($>)) import Data.List (find, groupBy) import Data.Maybe (fromJust, isJust, isNothing) import Data.Text (pack) import Control.Monad.Combinators.Expr import Text.Megaparsec import Text.Megaparsec.Char import qualified Text.Megaparsec.Char.Lexer as L -- import Text.Megaparsec.Debug (dbg) import Text.Megaparsec.Pos (Pos) import System.Directory (doesFileExist, getHomeDirectory) import System.IO import Language.Egison.AST import Language.Egison.Desugar import Language.Egison.Types import Paths_egison (getDataFileName) readTopExprs :: String -> EgisonM [EgisonTopExpr] readTopExprs = either throwError (mapM desugarTopExpr) . parseTopExprs readTopExpr :: String -> EgisonM EgisonTopExpr readTopExpr = either throwError desugarTopExpr . parseTopExpr readExprs :: String -> EgisonM [EgisonExpr] readExprs = either throwError (mapM desugarExpr) . parseExprs readExpr :: String -> EgisonM EgisonExpr readExpr = either throwError desugarExpr . parseExpr parseTopExprs :: String -> Either EgisonError [EgisonTopExpr] parseTopExprs = doParse $ many (L.nonIndented sc topExpr) <* eof parseTopExpr :: String -> Either EgisonError EgisonTopExpr parseTopExpr = doParse $ sc >> topExpr <* eof parseExprs :: String -> Either EgisonError [EgisonExpr] parseExprs = doParse $ many (L.nonIndented sc expr) <* eof parseExpr :: String -> Either EgisonError EgisonExpr parseExpr = doParse $ sc >> expr <* eof -- |Load a libary file loadLibraryFile :: FilePath -> EgisonM [EgisonTopExpr] loadLibraryFile file = do homeDir <- liftIO getHomeDirectory doesExist <- liftIO $ doesFileExist $ homeDir ++ "/.egison/" ++ file if doesExist then loadFile $ homeDir ++ "/.egison/" ++ file else liftIO (getDataFileName file) >>= loadFile -- |Load a file loadFile :: FilePath -> EgisonM [EgisonTopExpr] loadFile file = do doesExist <- liftIO $ doesFileExist file unless doesExist $ throwError $ Default ("file does not exist: " ++ file) input <- liftIO $ readUTF8File file exprs <- readTopExprs $ shebang input concat <$> mapM recursiveLoad exprs where recursiveLoad (Load file) = loadLibraryFile file recursiveLoad (LoadFile file) = loadFile file recursiveLoad expr = return [expr] shebang :: String -> String shebang ('#':'!':cs) = ';':'#':'!':cs shebang cs = cs readUTF8File :: FilePath -> IO String readUTF8File name = do h <- openFile name ReadMode hSetEncoding h utf8 hGetContents h -- -- Parser -- type Parser = Parsec CustomError String data CustomError = IllFormedSection EgisonBinOp EgisonBinOp | IllFormedDefine deriving (Eq, Ord) instance ShowErrorComponent CustomError where showErrorComponent (IllFormedSection op op') = "The operator " ++ info op ++ " must have lower precedence than " ++ info op' where info op = "'" ++ repr op ++ "' [" ++ show (assoc op) ++ " " ++ show (priority op) ++ "]" showErrorComponent IllFormedDefine = "Failed to parse the left hand side of definition expression." doParse :: Parser a -> String -> Either EgisonError a doParse p input = either (throwError . fromParsecError) return $ parse p "egison" input where fromParsecError :: ParseErrorBundle String CustomError -> EgisonError fromParsecError = Parser . errorBundlePretty -- -- Expressions -- topExpr :: Parser EgisonTopExpr topExpr = Load <$> (reserved "load" >> stringLiteral) <|> LoadFile <$> (reserved "loadFile" >> stringLiteral) <|> defineOrTestExpr "toplevel expression" -- Return type of |convertToDefine|. data ConversionResult = Variable Var -- Definition of a variable with no arguments on lhs. | Function Var [Arg] -- Definition of a function with some arguments on lhs. | IndexedVar VarWithIndices defineOrTestExpr :: Parser EgisonTopExpr defineOrTestExpr = do e <- expr defineExpr e <|> return (Test e) where defineExpr :: EgisonExpr -> Parser EgisonTopExpr defineExpr e = do _ <- symbol ":=" -- When ":=" is observed and the current expression turns out to be a -- definition, we do not start over from scratch but re-interpret -- what's parsed so far as the lhs of definition. case convertToDefine e of Nothing -> customFailure IllFormedDefine Just (Variable var) -> Define var <$> expr Just (Function var args) -> Define var . LambdaExpr args <$> expr Just (IndexedVar var) -> DefineWithIndices var <$> expr convertToDefine :: EgisonExpr -> Maybe ConversionResult convertToDefine (VarExpr var) = return $ Variable var convertToDefine (ApplyExpr (VarExpr var) (TupleExpr args)) = do args' <- mapM ((ScalarArg <$>) . exprToStr) args return $ Function var args' convertToDefine e@(BinaryOpExpr op _ _) | repr op == "*" || repr op == "%" = do args <- exprToArgs e case args of ScalarArg var : args -> return $ Function (Var [var] []) args _ -> Nothing convertToDefine (IndexedExpr True (VarExpr (Var var [])) indices) = do -- [Index EgisonExpr] -> Maybe [Index String] indices' <- mapM (traverse exprToStr) indices return $ IndexedVar (VarWithIndices var indices') convertToDefine _ = Nothing exprToStr :: EgisonExpr -> Maybe String exprToStr (VarExpr (Var [x] [])) = Just x exprToStr _ = Nothing exprToArgs :: EgisonExpr -> Maybe [Arg] exprToArgs (VarExpr (Var [x] [])) = return [ScalarArg x] exprToArgs (ApplyExpr func (TupleExpr args)) = (++) <$> exprToArgs func <*> mapM ((ScalarArg <$>) . exprToStr) args exprToArgs (BinaryOpExpr op lhs rhs) | repr op == "*" = do lhs' <- exprToArgs lhs rhs' <- exprToArgs rhs case rhs' of ScalarArg x : xs -> return (lhs' ++ InvertedScalarArg x : xs) _ -> Nothing exprToArgs (BinaryOpExpr op lhs rhs) | repr op == "%" = do lhs' <- exprToArgs lhs rhs' <- exprToArgs rhs case rhs' of ScalarArg x : xs -> return (lhs' ++ TensorArg x : xs) _ -> Nothing exprToArgs _ = Nothing expr :: Parser EgisonExpr expr = do body <- exprWithoutWhere bindings <- optional whereDefs return $ case bindings of Nothing -> body Just bindings -> LetRecExpr bindings body where whereDefs = do pos <- reserved "where" >> L.indentLevel some (L.indentGuard sc EQ pos >> binding) exprWithoutWhere :: Parser EgisonExpr exprWithoutWhere = ifExpr <|> patternMatchExpr <|> lambdaExpr <|> letExpr <|> withSymbolsExpr <|> doExpr <|> ioExpr <|> matcherExpr <|> algebraicDataMatcherExpr <|> memoizedLambdaExpr <|> procedureExpr <|> generateTensorExpr <|> tensorExpr <|> functionExpr <|> opExpr "expression" -- Also parses atomExpr opExpr :: Parser EgisonExpr opExpr = do pos <- L.indentLevel makeExprParser atomOrApplyExpr (makeTable pos) makeTable :: Pos -> [[Operator Parser EgisonExpr]] makeTable pos = -- prefixes have top priority let prefixes = [ [ Prefix (unary "-") , Prefix (unary "!") ] ] -- Generate binary operator table from |reservedBinops| binops = map (map binOpToOperator) (groupBy (\x y -> priority x == priority y) reservedBinops) in prefixes ++ binops where -- notFollowedBy (in unary and binary) is necessary for section expression. unary :: String -> Parser (EgisonExpr -> EgisonExpr) unary sym = UnaryOpExpr <$> try (operator sym <* notFollowedBy (symbol ")")) binary :: String -> Parser (EgisonExpr -> EgisonExpr -> EgisonExpr) binary sym = do -- TODO: Is this indentation guard necessary? op <- try (L.indentGuard sc GT pos >> binOpLiteral sym <* notFollowedBy (symbol ")")) return $ BinaryOpExpr op binOpToOperator :: EgisonBinOp -> Operator Parser EgisonExpr binOpToOperator op = case assoc op of LeftAssoc -> InfixL (binary (repr op)) RightAssoc -> InfixR (binary (repr op)) NonAssoc -> InfixN (binary (repr op)) ifExpr :: Parser EgisonExpr ifExpr = reserved "if" >> IfExpr <$> expr <* reserved "then" <*> expr <* reserved "else" <*> expr patternMatchExpr :: Parser EgisonExpr patternMatchExpr = makeMatchExpr (reserved "match") (MatchExpr BFSMode) <|> makeMatchExpr (reserved "matchDFS") (MatchExpr DFSMode) <|> makeMatchExpr (reserved "matchAll") (MatchAllExpr BFSMode) <|> makeMatchExpr (reserved "matchAllDFS") (MatchAllExpr DFSMode) "pattern match expression" where makeMatchExpr keyword ctor = ctor <$> (keyword >> expr) <*> (reserved "as" >> expr) <*> (reserved "with" >> matchClauses1) -- Parse more than 1 match clauses. matchClauses1 :: Parser [MatchClause] matchClauses1 = do pos <- L.indentLevel -- If the first bar '|' is missing, then it is expected to have only one match clause. (lookAhead (symbol "|") >> some (matchClause pos)) <|> (:[]) <$> matchClauseWithoutBar where matchClauseWithoutBar :: Parser MatchClause matchClauseWithoutBar = (,) <$> pattern <*> (symbol "->" >> expr) matchClause :: Pos -> Parser MatchClause matchClause pos = (,) <$> (L.indentGuard sc EQ pos >> symbol "|" >> pattern) <*> (symbol "->" >> expr) lambdaExpr :: Parser EgisonExpr lambdaExpr = symbol "\\" >> ( makeMatchLambdaExpr (reserved "match") MatchLambdaExpr <|> makeMatchLambdaExpr (reserved "matchAll") MatchAllLambdaExpr <|> try (LambdaExpr <$> some arg <*> (symbol "->" >> expr)) <|> PatternFunctionExpr <$> some lowerId <*> (symbol "=>" >> pattern)) "lambda or pattern function expression" where makeMatchLambdaExpr keyword ctor = do matcher <- keyword >> reserved "as" >> expr clauses <- reserved "with" >> matchClauses1 return $ ctor matcher clauses arg :: Parser Arg arg = InvertedScalarArg <$> (char '*' >> ident) <|> TensorArg <$> (char '%' >> ident) <|> ScalarArg <$> ident "argument" letExpr :: Parser EgisonExpr letExpr = do pos <- reserved "let" >> L.indentLevel binds <- oneLiner <|> some (L.indentGuard sc EQ pos *> binding) body <- reserved "in" >> expr return $ LetRecExpr binds body where oneLiner :: Parser [BindingExpr] oneLiner = braces $ sepBy binding (symbol ";") binding :: Parser BindingExpr binding = do (vars, args) <- (,[]) <$> parens (sepBy varLiteral comma) <|> do var <- varLiteral args <- many arg return ([var], args) body <- symbol ":=" >> expr return $ case args of [] -> (vars, body) _ -> (vars, LambdaExpr args body) withSymbolsExpr :: Parser EgisonExpr withSymbolsExpr = WithSymbolsExpr <$> (reserved "withSymbols" >> brackets (sepBy ident comma)) <*> expr doExpr :: Parser EgisonExpr doExpr = do pos <- reserved "do" >> L.indentLevel stmts <- oneLiner <|> some (L.indentGuard sc EQ pos >> statement) return $ case last stmts of ([], retExpr@(ApplyExpr (VarExpr (Var ["return"] _)) _)) -> DoExpr (init stmts) retExpr _ -> DoExpr stmts (makeApply' "return" []) where statement :: Parser BindingExpr statement = (reserved "let" >> binding) <|> ([],) <$> expr oneLiner :: Parser [BindingExpr] oneLiner = braces $ sepBy statement (symbol ";") ioExpr :: Parser EgisonExpr ioExpr = IoExpr <$> (reserved "io" >> expr) matcherExpr :: Parser EgisonExpr matcherExpr = do reserved "matcher" pos <- L.indentLevel -- Assuming it is unlikely that users want to write matchers with only 1 -- pattern definition, the first '|' (bar) is made indispensable in matcher -- expression. info <- some (L.indentGuard sc EQ pos >> symbol "|" >> patternDef) return $ MatcherExpr info where patternDef :: Parser (PrimitivePatPattern, EgisonExpr, [(PrimitiveDataPattern, EgisonExpr)]) patternDef = do pp <- ppPattern returnMatcher <- reserved "as" >> expr <* reserved "with" pos <- L.indentLevel datapat <- some (L.indentGuard sc EQ pos >> symbol "|" >> dataCases) return (pp, returnMatcher, datapat) dataCases :: Parser (PrimitiveDataPattern, EgisonExpr) dataCases = (,) <$> pdPattern <*> (symbol "->" >> expr) algebraicDataMatcherExpr :: Parser EgisonExpr algebraicDataMatcherExpr = do reserved "algebraicDataMatcher" pos <- L.indentLevel defs <- some (L.indentGuard sc EQ pos >> symbol "|" >> patternDef) return $ AlgebraicDataMatcherExpr defs where patternDef :: Parser (String, [EgisonExpr]) patternDef = do pos <- L.indentLevel patternCtor <- lowerId args <- many (L.indentGuard sc GT pos >> atomExpr) return (patternCtor, args) memoizedLambdaExpr :: Parser EgisonExpr memoizedLambdaExpr = MemoizedLambdaExpr <$> (reserved "memoizedLambda" >> many lowerId) <*> (symbol "->" >> expr) procedureExpr :: Parser EgisonExpr procedureExpr = ProcedureExpr <$> (reserved "procedure" >> many lowerId) <*> (symbol "->" >> expr) generateTensorExpr :: Parser EgisonExpr generateTensorExpr = GenerateTensorExpr <$> (reserved "generateTensor" >> atomExpr) <*> atomExpr tensorExpr :: Parser EgisonExpr tensorExpr = TensorExpr <$> (reserved "tensor" >> atomExpr) <*> atomExpr <*> option (CollectionExpr []) atomExpr <*> option (CollectionExpr []) atomExpr functionExpr :: Parser EgisonExpr functionExpr = FunctionExpr <$> (reserved "function" >> parens (sepBy expr comma)) collectionExpr :: Parser EgisonExpr collectionExpr = symbol "[" >> (try betweenOrFromExpr <|> elementsExpr) where betweenOrFromExpr = do start <- expr <* symbol ".." end <- optional expr <* symbol "]" case end of Just end' -> return $ makeApply' "between" [start, end'] Nothing -> return $ makeApply' "from" [start] elementsExpr = CollectionExpr <$> (sepBy (ElementExpr <$> expr) comma <* symbol "]") -- Parse an atomic expression starting with '(', which can be: -- * a tuple -- * an arbitrary expression wrapped with parenthesis -- * section tupleOrParenExpr :: Parser EgisonExpr tupleOrParenExpr = do elems <- symbol "(" >> try (sepBy expr comma <* symbol ")") <|> (section <* symbol ")") case elems of [x] -> return x -- expression wrapped in parenthesis _ -> return $ TupleExpr elems -- tuple where section :: Parser [EgisonExpr] -- Start from right, in order to parse expressions like (-1 +) correctly section = (:[]) <$> (rightSection <|> leftSection) -- Sections without the left operand: eg. (+), (+ 1) leftSection :: Parser EgisonExpr leftSection = do op <- choice $ map (binOpLiteral . repr) reservedBinops rarg <- optional expr case rarg of Just (BinaryOpExpr op' _ _) | assoc op' /= RightAssoc && priority op >= priority op' -> customFailure (IllFormedSection op op') _ -> return (makeLambda op Nothing rarg) -- Sections with the left operand but lacks the right operand: eg. (1 +) rightSection :: Parser EgisonExpr rightSection = do larg <- opExpr op <- choice $ map (binOpLiteral . repr) reservedBinops case larg of BinaryOpExpr op' _ _ | assoc op' /= LeftAssoc && priority op >= priority op' -> customFailure (IllFormedSection op op') _ -> return (makeLambda op (Just larg) Nothing) -- TODO(momohatt): Generate fresh variable for argument makeLambda :: EgisonBinOp -> Maybe EgisonExpr -> Maybe EgisonExpr -> EgisonExpr makeLambda op Nothing Nothing = LambdaExpr [TensorArg ":x", TensorArg ":y"] (BinaryOpExpr op (stringToVarExpr ":x") (stringToVarExpr ":y")) makeLambda op Nothing (Just rarg) = LambdaExpr [TensorArg ":x"] (BinaryOpExpr op (stringToVarExpr ":x") rarg) makeLambda op (Just larg) Nothing = LambdaExpr [TensorArg ":y"] (BinaryOpExpr op larg (stringToVarExpr ":y")) arrayExpr :: Parser EgisonExpr arrayExpr = ArrayExpr <$> between (symbol "(|") (symbol "|)") (sepEndBy expr comma) vectorExpr :: Parser EgisonExpr vectorExpr = VectorExpr <$> between (symbol "[|") (symbol "|]") (sepEndBy expr comma) hashExpr :: Parser EgisonExpr hashExpr = HashExpr <$> hashBraces (sepEndBy hashElem comma) where hashBraces = between (symbol "{|") (symbol "|}") hashElem = parens $ (,) <$> expr <*> (comma >> expr) index :: Parser (Index EgisonExpr) index = SupSubscript <$> (string "~_" >> atomExpr') <|> try (char '_' >> subscript) <|> try (char '~' >> superscript) <|> try (Userscript <$> (char '|' >> atomExpr')) "index" where subscript = do e1 <- atomExpr' e2 <- optional (string "..._" >> atomExpr') case e2 of Nothing -> return $ Subscript e1 Just e2' -> return $ MultiSubscript e1 e2' superscript = do e1 <- atomExpr' e2 <- optional (string "...~" >> atomExpr') case e2 of Nothing -> return $ Superscript e1 Just e2' -> return $ MultiSuperscript e1 e2' atomOrApplyExpr :: Parser EgisonExpr atomOrApplyExpr = do pos <- L.indentLevel func <- atomExpr args <- many (L.indentGuard sc GT pos *> atomExpr) return $ case args of [] -> func _ -> makeApply func args -- (Possibly indexed) atomic expressions atomExpr :: Parser EgisonExpr atomExpr = do e <- atomExpr' override <- isNothing <$> optional (try (string "..." <* lookAhead index)) -- TODO(momohatt): "..." (override of index) collides with ContPat indices <- many index return $ case indices of [] -> e _ -> IndexedExpr override e indices -- Atomic expressions without index atomExpr' :: Parser EgisonExpr atomExpr' = partialExpr -- must come before |constantExpr| <|> constantExpr <|> FreshVarExpr <$ symbol "#" <|> VarExpr <$> varLiteral <|> vectorExpr -- must come before |collectionExpr| <|> arrayExpr -- must come before |tupleOrParenExpr| <|> collectionExpr <|> tupleOrParenExpr <|> hashExpr <|> QuoteExpr <$> (char '\'' >> atomExpr') -- must come after |constantExpr| <|> QuoteSymbolExpr <$> (char '`' >> atomExpr') <|> PartialVarExpr <$> try (char '%' >> positiveIntegerLiteral) "atomic expression" partialExpr :: Parser EgisonExpr partialExpr = do n <- try (L.decimal <* char '#') -- No space after the index body <- atomExpr -- No space after '#' return $ PartialExpr n body constantExpr :: Parser EgisonExpr constantExpr = numericExpr <|> BoolExpr <$> boolLiteral <|> CharExpr <$> try charLiteral -- try for quoteExpr <|> StringExpr . pack <$> stringLiteral <|> SomethingExpr <$ reserved "something" <|> UndefinedExpr <$ reserved "undefined" numericExpr :: Parser EgisonExpr numericExpr = FloatExpr <$> try positiveFloatLiteral <|> IntegerExpr <$> positiveIntegerLiteral "numeric expression" -- -- Pattern -- pattern :: Parser EgisonPattern pattern = letPattern <|> loopPattern <|> opPattern "pattern" letPattern :: Parser EgisonPattern letPattern = do pos <- reserved "let" >> L.indentLevel binds <- some (L.indentGuard sc EQ pos *> binding) body <- reserved "in" >> pattern return $ LetPat binds body loopPattern :: Parser EgisonPattern loopPattern = LoopPat <$> (reserved "loop" >> patVarLiteral) <*> loopRange <*> atomPattern <*> atomPattern where loopRange :: Parser LoopRange loopRange = parens $ do start <- expr ends <- option (defaultEnds start) (try $ comma >> expr) as <- option WildCard (comma >> pattern) return $ LoopRange start ends as defaultEnds s = ApplyExpr (stringToVarExpr "from") (makeApply (stringToVarExpr "-'") [s, IntegerExpr 1]) seqPattern :: Parser EgisonPattern seqPattern = do pats <- braces $ sepBy pattern comma return $ foldr SeqConsPat SeqNilPat pats opPattern :: Parser EgisonPattern opPattern = makeExprParser applyOrAtomPattern table where table :: [[Operator Parser EgisonPattern]] table = [ [ Prefix (NotPat <$ symbol "!") ] -- 5 , [ InfixR (inductive2 "cons" "::" ) , InfixR (inductive2 "join" "++") ] -- 3 , [ InfixR (binary AndPat "&") ] -- 2 , [ InfixR (binary OrPat "|") ] ] inductive2 name sym = (\x y -> InductivePat name [x, y]) <$ patOperator sym binary name sym = (\x y -> name [x, y]) <$ patOperator sym applyOrAtomPattern :: Parser EgisonPattern applyOrAtomPattern = do pos <- L.indentLevel func <- atomPattern args <- many (L.indentGuard sc GT pos *> atomPattern) case (func, args) of (_, []) -> return func (InductivePat x [], _) -> return $ InductivePat x args _ -> error (show (func, args)) -- (Possibly indexed) atomic pattern atomPattern :: Parser EgisonPattern atomPattern = do pat <- atomPattern' indices <- many . try $ char '_' >> atomExpr' return $ case indices of [] -> pat _ -> IndexedPat pat indices -- Atomic pattern without index atomPattern' :: Parser EgisonPattern atomPattern' = WildCard <$ symbol "_" <|> PatVar <$> patVarLiteral <|> ValuePat <$> (char '#' >> atomExpr) <|> InductivePat "nil" [] <$ (symbol "[" >> symbol "]") <|> InductivePat <$> lowerId <*> pure [] <|> VarPat <$> (char '~' >> lowerId) <|> PredPat <$> (symbol "?" >> atomExpr) <|> ContPat <$ symbol "..." <|> makeTupleOrParen pattern TuplePat <|> seqPattern <|> LaterPatVar <$ symbol "@" "atomic pattern" ppPattern :: Parser PrimitivePatPattern ppPattern = PPInductivePat <$> lowerId <*> many ppAtom <|> makeExprParser ppAtom table "primitive pattern pattern" where table :: [[Operator Parser PrimitivePatPattern]] table = [ [ InfixR (inductive2 "cons" "::" ) , InfixR (inductive2 "join" "++") ] ] inductive2 name sym = (\x y -> PPInductivePat name [x, y]) <$ operator sym ppAtom :: Parser PrimitivePatPattern ppAtom = PPWildCard <$ symbol "_" <|> PPPatVar <$ symbol "$" <|> PPValuePat <$> (symbol "#$" >> lowerId) <|> PPInductivePat "nil" [] <$ brackets sc <|> makeTupleOrParen ppPattern PPTuplePat pdPattern :: Parser PrimitiveDataPattern pdPattern = PDInductivePat <$> upperId <*> many pdAtom <|> PDSnocPat <$> (symbol "snoc" >> pdAtom) <*> pdAtom <|> makeExprParser pdAtom table "primitive data pattern" where table :: [[Operator Parser PrimitiveDataPattern]] table = [ [ InfixR (PDConsPat <$ symbol "::") ] ] pdAtom :: Parser PrimitiveDataPattern pdAtom = PDWildCard <$ symbol "_" <|> PDPatVar <$> (symbol "$" >> lowerId) <|> PDConstantPat <$> constantExpr <|> PDEmptyPat <$ (symbol "[" >> symbol "]") <|> makeTupleOrParen pdPattern PDTuplePat -- -- Tokens -- -- Space Comsumer sc :: Parser () sc = L.space space1 lineCmnt blockCmnt where lineCmnt = L.skipLineComment "--" blockCmnt = L.skipBlockCommentNested "{-" "-}" lexeme :: Parser a -> Parser a lexeme = L.lexeme sc positiveIntegerLiteral :: Parser Integer positiveIntegerLiteral = lexeme L.decimal "unsinged integer" charLiteral :: Parser Char charLiteral = between (char '\'') (symbol "\'") L.charLiteral "character" stringLiteral :: Parser String stringLiteral = char '\"' *> manyTill L.charLiteral (symbol "\"") "string" boolLiteral :: Parser Bool boolLiteral = reserved "True" $> True <|> reserved "False" $> False "boolean" positiveFloatLiteral :: Parser Double positiveFloatLiteral = lexeme L.float "unsigned float" varLiteral :: Parser Var varLiteral = stringToVar <$> ident patVarLiteral :: Parser Var patVarLiteral = stringToVar <$> (char '$' >> lowerId) binOpLiteral :: String -> Parser EgisonBinOp binOpLiteral sym = try (do wedge <- optional (char '!') opSym <- operator' sym let opInfo = fromJust $ find ((== opSym) . repr) reservedBinops return $ opInfo { isWedge = isJust wedge }) "binary operator" where -- operator without try operator' :: String -> Parser String operator' sym = string sym <* notFollowedBy opChar <* sc reserved :: String -> Parser () reserved w = (lexeme . try) (string w *> notFollowedBy identChar) symbol :: String -> Parser String symbol sym = try $ L.symbol sc sym operator :: String -> Parser String operator sym = try $ string sym <* notFollowedBy opChar <* sc patOperator :: String -> Parser String patOperator sym = try $ string sym <* notFollowedBy patOpChar <* sc -- Characters that can consist expression operators. opChar :: Parser Char opChar = oneOf "%^&*-+\\|:<>.?/'!#@$" -- Characters that can consist pattern operators. -- ! # @ $ are omitted because they can appear at the beginning of atomPattern patOpChar :: Parser Char patOpChar = oneOf "%^&*-+\\|:<>.?/'" -- Characters that consist identifiers. -- Note that 'alphaNumChar' can also parse greek letters. -- TODO(momohatt): Use more natural way to reject "..." identChar :: Parser Char identChar = alphaNumChar <|> oneOf (['?', '\'', '/'] ++ mathSymbols) <|> try (char '.' <* notFollowedBy (char '.')) -- Non-alphabetical symbols that are allowed for identifiers mathSymbols :: String mathSymbols = "∂∇" parens :: Parser a -> Parser a parens = between (symbol "(") (symbol ")") braces :: Parser a -> Parser a braces = between (symbol "{") (symbol "}") brackets :: Parser a -> Parser a brackets = between (symbol "[") (symbol "]") comma :: Parser String comma = symbol "," -- Notes on identifiers: -- * Identifiers must be able to include greek letters and some symbols in -- |mathSymbols|. -- * Only identifiers starting with capital English letters ('A' - 'Z') can be -- parsed as |upperId|. Identifiers starting with capital Greek letters must -- be regarded as |lowerId|. lowerId :: Parser String lowerId = (lexeme . try) (p >>= check) where p = (:) <$> satisfy (\c -> c `elem` mathSymbols || isLetter c && not (isAsciiUpper c)) <*> many identChar check x = if x `elem` lowerReservedWords then fail $ "keyword " ++ show x ++ " cannot be an identifier" else return x upperId :: Parser String upperId = (lexeme . try) (p >>= check) where p = (:) <$> satisfy isAsciiUpper <*> many alphaNumChar check x = if x `elem` upperReservedWords then fail $ "keyword " ++ show x ++ " cannot be an identifier" else return x -- union of lowerId and upperId ident :: Parser String ident = (lexeme . try) (p >>= check) where p = (:) <$> satisfy (\c -> c `elem` mathSymbols || isLetter c) <*> many identChar check x = if x `elem` (lowerReservedWords ++ upperReservedWords) then fail $ "keyword " ++ show x ++ " cannot be an identifier" else return x upperReservedWords :: [String] upperReservedWords = [ "True" , "False" ] lowerReservedWords :: [String] lowerReservedWords = [ "loadFile" , "load" , "if" , "then" , "else" , "seq" , "apply" , "capply" , "memoizedLambda" , "cambda" , "procedure" , "let" , "in" , "where" , "withSymbols" , "loop" , "of" , "match" , "matchDFS" , "matchAll" , "matchAllDFS" , "as" , "with" , "matcher" , "do" , "io" , "something" , "undefined" , "algebraicDataMatcher" , "generateTensor" , "tensor" , "contract" , "subrefs" , "subrefs!" , "suprefs" , "suprefs!" , "userRefs" , "userRefs!" , "function" ] -- -- Utils -- makeTupleOrParen :: Parser a -> ([a] -> a) -> Parser a makeTupleOrParen parser tupleCtor = do elems <- parens $ sepBy parser comma case elems of [elem] -> return elem _ -> return $ tupleCtor elems makeApply :: EgisonExpr -> [EgisonExpr] -> EgisonExpr makeApply (InductiveDataExpr x []) xs = InductiveDataExpr x xs makeApply func xs = ApplyExpr func (TupleExpr xs) makeApply' :: String -> [EgisonExpr] -> EgisonExpr makeApply' func xs = ApplyExpr (stringToVarExpr func) (TupleExpr xs)