{-# OPTIONS_GHC -Wall #-} -- |This module contains everything needed to do the initial -- parsing of either a MUMPS routine or MUMPS commands -- entered at a REPL module HMumps.Parsers ( initLex, strip, comment, parseCommands, command, parseExp, parseVn, parseWriteArg, parseKillArg, parseNewArg, parseDoArg, parseRoutineRef, parseLabel, parseGotoArg, mlist, mlist1, arglist, arglist1, parse, parseFile, eol, ) where import Data.MValue import HMumps.Routine import HMumps.SyntaxTree import Data.Char import Data.String import Control.Monad import Text.Parsec hiding (spaces) import Text.Parsec.String -- import Text.Regex spaces :: Parser () spaces = (many $ oneOf " \t\r") >> return () parseFile :: Parser OldFile parseFile = many $ do tag <- parseTag spaces linelevel <- length `liftM` many (do spaces; x <- char '.'; spaces; return x) cmds <- parseCommands optional comment char_ '\n' return (tag, linelevel, cmds) parseTag :: Parser Tag parseTag = do name <- parseValidName args <- arglist parseValidName return $ Just (name,args) <|> return Nothing -- | The "initLex" function takes in a string representing all of the code -- to be parsed (say, an entire routine) and: -- * Breaks the code into lines -- * Removes comments -- * Removes trailing whitespace initLex :: String -> [String] initLex = map strip . lines strip :: String -> String strip = (dropWhile whitespace) . reverse . (dropWhile whitespace) . reverse . (takeWhile (/=';')) where whitespace x = any (==x) [' ','\t','\r'] -- |Parse Commands is fed a LINE of MUMPS (after line-level has been detrimined). parseCommands :: Parser [Command] parseCommands = (many $ do c <- command spaces return c) <|> (do comment; return []) <|> (eol >> return []) <|> (spaces >> parseCommands) -- I think I do this wrong, because I'm not sure what happens on -- mal-formed input. anyway, I think it's better than it was. -- munch comments comment :: Parser () comment = do char_ ';' _ <- many $ noneOf "\n" return () -- |Parses a single command. command :: Parser Command command = parseBreak <|> parseDo <|> parseElse <|> parseFor <|> parseGoto <|> parseHa -- left factored halt or hang <|> parseIf <|> parseKill <|> parseMerge <|> parseNew <|> parseQuit <|> parseRead <|> parseSet <|> parseWrite <|> parseXecute "MUMPS command" parseBreak :: Parser Command parseBreak = do stringOrPrefix1 "break" cond <- postCondition return $ Break cond postCondition :: Parser (Maybe Expression) postCondition = do char_ ':' cond <- parseExpAtom return $ Just cond <|> return Nothing -- Should work for end-of-line do statements. Need more tests though. parseDo :: Parser Command parseDo = do stringOrPrefix1 "do" cond <- postCondition do char_ ' ' args <- mlist parseDoArg return $ Do cond args <|> do eol return $ Do cond [] parseDoArg :: Parser DoArg parseDoArg = (do char_ '@'; expr <- parseExpAtom; return $ DoArgIndirect expr) <|> (do loc <- parseEntryRef args <- arglist parseFunArg cond <- postCondition return $ DoArg cond loc args) -- very similar to the DO parser - which makes sense, as they do -- similar things. parseGoto :: Parser Command parseGoto = do stringOrPrefix1 "goto" cond <- postCondition do char_ ' ' args <- mlist parseGotoArg return $ Goto cond args <|> do eol return $ Goto cond [] parseGotoArg :: Parser GotoArg parseGotoArg = (try (do char_ '@'; expr <- parseExpAtom; return $ GotoArgIndirect expr)) <|> (do loc <- parseEntryRef cond <- postCondition return $ GotoArg cond loc) parseElse :: Parser Command parseElse = do stringOrPrefix1 "else" eol <|> do char_ ' ' eol <|> char_ ' ' return Else parseFor :: Parser Command parseFor = do stringOrPrefix1 "for" (eol >> return ForInf) <|> (do char_ ' ' (do vn <- parseLvn char_ '=' arg <- forArg return $ For vn arg) <|> (do eol <|> char_ ' ' return ForInf)) where forArg :: Parser ForArg forArg = do args <- colonlist parseExp case length args of 1 -> return $ ForArg1 (head args) 2 -> return $ ForArg2 (args !! 0) (args !! 1) 3 -> return $ ForArg3 (args !! 0) (args !! 1) (args !! 2) _ -> fail "Wrong number of arguments to FOR" parseHa :: Parser Command parseHa = do stringOrPrefix1 "ha" (try parseHang <|> parseHalt) --Not sufficiently left factored parseHang :: Parser Command parseHang = do stringOrPrefix "ng" cond <- postCondition char_ ' ' expr <- parseExp return $ Hang cond expr parseHalt :: Parser Command parseHalt = do stringOrPrefix "lt" cond <- postCondition return $ Halt cond parseIf :: Parser Command parseIf = do stringOrPrefix1 "if" (char_ ' ' >> If `liftM` mlist parseExp) <|> (eol >> (return $ If [])) parseKill :: Parser Command parseKill = do stringOrPrefix1 "kill" cond <- postCondition (do char_ ' ' args <- mlist parseKillArg return $ Kill cond args) <|> (eol >> (return $ Kill cond [])) parseMerge :: Parser Command parseMerge = do stringOrPrefix1 "merge" cond <- postCondition char_ ' ' args <- mlist1 parseMergeArg return $ Merge cond args parseMergeArg :: Parser MergeArg parseMergeArg = (do char_ '@' expr <- parseExpAtom return $ MergeArgIndirect expr) <|> (liftM2 MergeArg parseVn (char_ '=' >> parseVn)) "MERGE argument or indirection" parseNew :: Parser Command parseNew = do stringOrPrefix1 "new" cond <- postCondition (char_ ' ' >> New cond `liftM` (mlist parseNewArg)) <|> (eol >> (return $ New cond [])) parseNewArg :: Parser NewArg parseNewArg = (do char_ '(' args <- mlist litName char_ ')' return $ NewExclusive args) <|> (NewIndirect `liftM` (char_ '@' >> parseExpAtom)) <|> NewSelective `liftM` litName parseQuit :: Parser Command parseQuit = do stringOrPrefix1 "quit" return Quit `ap` postCondition `ap` quitArg where quitArg = (char_ ' ' >> (Just `liftM` parseExp <|> (eol >> return Nothing) <|> (char_ ' ' >> return Nothing))) <|> (eol >> return Nothing) eol :: Parser () eol = notFollowedBy $ noneOf "\n;" parseRead :: Parser Command parseRead = do stringOrPrefix1 "read" cond <- postCondition char_ ' ' args <- mlist1 parseWriteArg case last args of WriteExpression (ExpVn vn) -> return $ Read cond (init args) vn _ -> fail "last argument to READ must be a variable name" parseSet :: Parser Command parseSet = do stringOrPrefix1 "set" return Set `ap` postCondition `ap` (char_ ' ' >> mlist1 setArg) where setArg = do lhs <- arglist1 parseVn <|> liftM (\x->[x]) parseVn char_ '=' rhs <- parseExp return (lhs,rhs) parseWrite :: Parser Command parseWrite = do stringOrPrefix1 "write" return Write `ap` postCondition `ap` (char_ ' ' >> mlist1 parseWriteArg) parseWriteArg :: Parser WriteArg parseWriteArg = (WriteFormat `liftM` many1 parseWriteFormatCode) <|> do char_ '@' expr <- parseExpAtom (char_ '@' >> do args <- arglist parseExp return $ WriteExpression $ ExpVn $ IndirectVn expr args) <|> (return $ WriteIndirect expr) <|> (WriteExpression `liftM` parseExp) parseWriteFormatCode :: Parser WriteFormatCode parseWriteFormatCode = (char_ '#' >> return Formfeed) <|> (char_ '!' >> return Newline) <|> (char_ '?' >> return Tab `ap` parseInt) where parseInt :: Parser Int parseInt = return read `ap` many1 (oneOf ['0'..'9']) parseXecute :: Parser Command parseXecute = do stringOrPrefix1 "x" cond <- postCondition char_ ' ' arg <- parseExp return $ Xecute cond arg parseKillArg :: Parser KillArg parseKillArg = (KillIndirect `liftM` (char_ '@' >> parseExpAtom)) <|> (KillExclusive `liftM` arglist1 litName) <|> (KillSelective `liftM` parseVn) stringOrPrefix :: String -> Parser () stringOrPrefix str = stringOrPrefix1 str <|> return () stringOrPrefix1 :: String -> Parser () stringOrPrefix1 [] = return () stringOrPrefix1 (x:xs) = do char_ (toUpper x) <|> char_ (toLower x) <|> char_ x stringOrPrefix xs parseExpAtom :: Parser Expression parseExpAtom = (parseExpUnop <|> parseExpVn <|> parseExpFuncall <|> parseSubExp <|> parseExpLit) -- |Parse an expression. Is not at all forgiving about extraneous whitespace. parseExp :: Parser Expression parseExp = do let parseWrapper :: Parser ((Expression -> Expression) -> Expression -> Expression) parseWrapper = (do char_ '\''; return $ \f x -> ExpUnop UNot (f x)) <|> (return id) parseTailItem :: Parser (Expression -> Expression) parseTailItem = do wrapper <- parseWrapper; ((do binop <- parseBinop; expr <- parseExpAtom; return $ wrapper $ \x -> ExpBinop binop x expr) <|>(do char_ '?'; pat <- undefined; return $ pat)) exp1 <- parseExpAtom tails <- many parseTailItem return $ foldl (flip (.)) id tails $ exp1 parseExpUnop :: Parser Expression parseExpUnop = (do unop <- parseUnop; expr <- parseExpAtom; return $ ExpUnop unop expr) parseUnop :: Parser UnaryOp parseUnop = (do char_ '\''; return UNot) <|> (do char_ '+'; return UPlus) <|> (do char_ '-'; return UMinus) parseExpVn :: Parser Expression parseExpVn = do vn <- parseVn return $ ExpVn vn parseExpFuncall :: Parser Expression parseExpFuncall = char_ '$' >> (parseBif <|> parseExFun) parseBif :: Parser Expression parseBif = liftM ExpBifCall $ msum [ parseBifC , parseBifX , parseBifY , parseBifT , parseBifO , parseReplace ] parseBifC :: Parser BifCall parseBifC = do stringOrPrefix1 "char" args <- arglist1 parseExp return $ BifChar args parseBifX :: Parser BifCall parseBifX = char_ 'x' >> return BifX parseBifY :: Parser BifCall parseBifY = char_ 'y' >> return BifY parseBifT :: Parser BifCall parseBifT = stringOrPrefix1 "test" >> return BifTest parseBifO :: Parser BifCall parseBifO = do stringOrPrefix1 "order" (vn, dir) <- parse2args parseVn parseExp return $ BifOrder vn dir parseReplace :: Parser BifCall parseReplace = do stringOrPrefix1 "zreplace" args <- arglist1 parseExp case args of [haystack,needle,replacement] -> return $ BifReplace haystack needle replacement _ -> fail "$$ZREPLACE requires three arguments" -- | parse two function arguments where the second is optional parse2args :: Parser a -> Parser b -> Parser (a, Maybe b) parse2args a1 a2 = do char_ '(' v1 <- a1 v2 <- ((char_ ',' >> liftM Just a2) <|> return Nothing) char_ ')' return (v1, v2) parseExFun :: Parser Expression parseExFun = do char_ '$' (do char_ '^' name2 <- parseValidName args <- arglist parseFunArg return $ FunCall "" name2 args) <|> (do name1 <- parseValidName (do char_ '^' name2 <- parseValidName args <- arglist parseFunArg return $ FunCall name1 name2 args) <|> (do args <- arglist parseFunArg return $ FunCall name1 "" args)) parseSubExp :: Parser Expression parseSubExp = do char_ '(' expr <- parseExp char_ ')' return expr -- Take a positive number or a string. Any leading -/+ signs should've -- been picked up by parseExpUnop by now. parseExpLit :: Parser Expression parseExpLit = parseNumLit <|> parseStringLit -- Does not work with scientific notation yet parseNumLit :: Parser Expression parseNumLit = do xs <- many1 digit (do char_ '.'; ys <- many1 digit; (return . ExpLit . fromDouble . read) (xs ++ ['.'] ++ ys)) <|> (return . ExpLit. fromInteger . read) xs -- parse a string literal - uses one char of look-ahead parseStringLit :: Parser Expression parseStringLit = do char_ '"' xs <- many $ (try $ do string_ "\"\"";return '\"') <|> (noneOf "\"") char_ '"' (return . ExpLit . fromString) xs -- No guarantees that the list of binops is complete. parseBinop :: Parser BinOp parseBinop = (char_ '_' >> return Concat) <|> (char_ '+' >> return Add) <|> (char_ '-' >> return Sub) <|> (char_ '*' >> ((char_ '*' >> return Pow) <|> return Mult)) <|> (char_ '/' >> return Div) <|> (char_ '#' >> return Rem) <|> (char_ '\\' >> return Quot) <|> (char_ '&' >> return And) <|> (char_ '!' >> return Or) <|> (char_ '=' >> return Equal) <|> (char_ '<' >> return LessThan) <|> (char_ '>' >> return GreaterThan) <|> (char_ ']' >> ((char_ ']' >> return SortsAfter) <|> return Follows)) <|> (char_ '[' >> return Contains) "binary operator" -- Used in DoArg and GotoArg parseRoutineRef :: Parser Routineref parseRoutineRef = (do char_ '^' (do char_ '@' RoutinerefIndirect `liftM` parseExpAtom) <|> Routineref `liftM` litName) parseEntryRef :: Parser EntryRef parseEntryRef = (Routine `liftM` parseRoutineRef) <|> (do lbl <- parseLabel offset <- parseOffset routine <- parseRoutine return $ Subroutine lbl offset routine) where parseOffset = (char_ '+' >> (Just . read) `liftM` many1 (oneOf "1234567890")) <|> (return Nothing) parseRoutine = (Just `liftM` parseRoutineRef) <|> (return Nothing) parseLabel :: Parser Label parseLabel = (char_ '@' >> LabelIndirect `liftM` parseExpAtom) <|> (Label `liftM` litName) -- Differs from parseExp because a funarg may be either: -- 1) An Expression -- 2) A (local?) variable passed by ref parseFunArg :: Parser FunArg parseFunArg = (do char_ '.' FunArgName `liftM` litName) <|> (FunArgExp `liftM` parseExp) -- |Parses the name of a variable (with subscripts) parseVn :: Parser Vn parseVn = (do char_ '@' expr <- parseExpAtom args <- (do char_ '@' arglist parseExp) <|> return [] return $ IndirectVn expr args) <|> (do char_ '^' name <- litName <|> return "" args <- arglist parseExp return $ Gvn name args) <|> parseLvn "variable name" parseLvn :: Parser Vn parseLvn = return Lvn `ap` litName `ap` arglist parseExp -- |Parses a literal name. litName :: Parser Name litName = parseValidName parseValidName :: Parser String parseValidName = do x <- oneOf (return '%' ++ ident) xs <- many (oneOf (ident ++ digits)) return (x:xs) where ident = ['a'..'z'] ++ ['A'..'Z'] digits = ['0'..'9'] -- |Given a parser, parse a comma separated list of these. mlist :: Parser a -> Parser [a] mlist pa = mlist1 pa <|> return [] -- |Similar to mlist, but must grab at least one element mlist1 :: Parser a -> Parser [a] mlist1 pa = do x <- pa xs <- (do char_ ',' mlist pa) <|> return [] return (x:xs) colonlist :: Parser a -> Parser [a] colonlist pa = do x <- pa xs <- many (char_ ':' >> pa) return (x:xs) -- |Given a parser, parse a comma separated list of these surrounded by parens arglist :: Parser a -> Parser [a] arglist pa = do char_ '(' xs <- mlist pa char_ ')' return xs <|> return [] -- |Given a parser, parse a comma separated non-empty list of these -- surounded by parens arglist1 :: Parser a -> Parser [a] arglist1 pa = do char_ '(' xs <- mlist1 pa char_ ')' return xs char_ :: Char -> Parser () char_ c = char c >>= \_ -> return () string_ :: String -> Parser () string_ str = string str >>= \_ -> return ()