{----------------------------------------------------------------------------- A LIBRARY OF MONADIC PARSER COMBINATORS 29th July 1996 Revised, October 1996 Graham Hutton Erik Meijer University of Nottingham University of Utrecht This Haskell 1.4 script defines a library of parser combinators, and is taken from sections 1-6 of our article "Monadic Parser Combinators". Some changes to the library have been made in the move from Gofer to Haskell: * Do notation is used in place of monad comprehension notation; * The parser datatype is defined using "newtype", to avoid the overhead of tagging and untagging parsers with the P constructor. -----------------------------------------------------------------------------} -- Added to April 1997, for offside rule, {- -} comments, annotations, -- extra characters in identifiers .. - -- extra combinator parsers for skipping over input module ParseLib2 (Parser, item, papply, (+++), sat, many, many1, sepby, sepby1, chainl, chainl1, chainr, chainr1, ops, bracket, char, digit, lower, upper, letter, alphanum, string, ident, nat, int, spaces, comment, junk, parse, token, natural, integer, symbol, identifier, many1_offside,many_offside,off, opt, skipUntil, skipUntilOff,skipUntilParse,skipNest) where import Data.Char import Control.Monad infixr 5 +++ --- The parser monad --------------------------------------------------------- newtype Parser a = P (Pos -> Pstring -> [(a,Pstring)]) type Pstring = (Pos,String) type Pos = (Int,Int) instance Functor Parser where -- fmap :: (a -> b) -> (Parser a -> Parser b) fmap f (P p) = P (\pos inp -> [(f v, out) | (v,out) <- p pos inp]) instance Monad Parser where -- return :: a -> Parser a return v = P (\pos inp -> [(v,inp)]) -- >>= :: Parser a -> (a -> Parser b) -> Parser b (P p) >>= f = P (\pos inp -> concat [papply (f v) pos out | (v,out) <- p pos inp]) fail s = P (\pos inp -> []) instance MonadPlus Parser where -- mzero :: Parser a mzero = P (\pos inp -> []) -- mplus :: Parser a -> Parser a -> Parser a (P p) `mplus` (P q) = P (\pos inp -> (p pos inp ++ q pos inp)) -- bits which donn't fit into Haskell's type classes just yet :-( env :: Parser Pos env = P(\pos inp -> [(pos,inp)]) setenv :: Pos -> Parser a -> Parser a setenv s (P m) = P $ \_ -> m s update :: (Pstring -> Pstring) -> Parser Pstring update f = P( \pos s -> [(s,f s)]) set :: Pstring -> Parser Pstring set s = update (\_ -> s) fetch :: Parser Pstring fetch = update id --- Other primitive parser combinators --------------------------------------- item :: Parser Char item = do (pos,x:_) <- update newstate defpos <- env if onside pos defpos then return x else mzero force :: Parser a -> Parser a force (P p) = P (\pos inp -> let x = p pos inp in (fst (head x), snd (head x)) : tail x) first :: Parser a -> Parser a first (P p) = P (\pos inp -> case p pos inp of [] -> [] (x:xs) -> [x]) papply :: Parser a -> Pos -> Pstring -> [(a,Pstring)] papply (P p) pos inp = p pos inp -- layout handling functions onside :: Pos -> Pos -> Bool onside (l,c) (dl,dc) = (c > dc) || (l == dl) newstate :: Pstring -> Pstring newstate ((l,c),x:xs) = ((l',c'),xs) where (l',c') = case x of '\n' -> (l+1,0) '\t' -> (l,((c `div` 8) +1)*8) _ -> (l,c+1) --- Derived combinators ------------------------------------------------------ (+++) :: Parser a -> Parser a -> Parser a p +++ q = first (p `mplus` q) sat :: (Char -> Bool) -> Parser Char sat p = do {x <- item; if p x then return x else mzero} many :: Parser a -> Parser [a] --many p = force (many1 p +++ return []) many p = (many1 p +++ return []) many1 :: Parser a -> Parser [a] many1 p = do {x <- p; xs <- many p; return (x:xs)} sepby :: Parser a -> Parser b -> Parser [a] p `sepby` sep = (p `sepby1` sep) +++ return [] sepby1 :: Parser a -> Parser b -> Parser [a] p `sepby1` sep = do {x <- p; xs <- many (do {sep; p}); return (x:xs)} chainl :: Parser a -> Parser (a -> a -> a) -> a -> Parser a chainl p op v = (p `chainl1` op) +++ return v chainl1 :: Parser a -> Parser (a -> a -> a) -> Parser a p `chainl1` op = do {x <- p; rest x} where rest x = do {f <- op; y <- p; rest (f x y)} +++ return x chainr :: Parser a -> Parser (a -> a -> a) -> a -> Parser a chainr p op v = (p `chainr1` op) +++ return v chainr1 :: Parser a -> Parser (a -> a -> a) -> Parser a p `chainr1` op = do {x <- p; rest x} where rest x = do {f <- op; y <- p `chainr1` op; return (f x y)} +++ return x ops :: [(Parser a, b)] -> Parser b ops xs = foldr1 (+++) [do {p; return op} | (p,op) <- xs] bracket :: Parser a -> Parser b -> Parser c -> Parser b bracket open p close = do {open; x <- p; close; return x} --- Useful parsers ----------------------------------------------------------- char :: Char -> Parser Char char x = sat (\y -> x == y) digit :: Parser Char digit = sat isDigit lower :: Parser Char lower = sat isLower upper :: Parser Char upper = sat isUpper letter :: Parser Char letter = sat isAlpha alphanum :: Parser Char alphanum = sat (\x -> isAlphaNum x || x `elem` ['\'','_','.','#']) string :: String -> Parser String string "" = return "" string (x:xs) = do {char x; string xs; return (x:xs)} -- parse a Haskell 98 identifier, when the input is a valid Haskell 98 identifier (it's more liberal than H98) ident :: Parser String ident = do {x <- lower +++ char '_' ; xs <- many alphanum; return (x:xs)} nat :: Parser Int nat = do {x <- digit; return (digitToInt x)} `chainl1` return op where m `op` n = 10*m + n int :: Parser Int int = do {char '-'; n <- nat; return (-n)} +++ nat --- Lexical combinators ------------------------------------------------------ spaces :: Parser () spaces = do {many1 (sat isJunk); return ()} isJunk x = isSpace x || (not . isPrint) x || isControl x comment :: Parser () comment = onelinecomment `mplus` bracecomment onelinecomment :: Parser () onelinecomment = do {string "--"; many (sat (\x -> x /= '\n')); return ()} bracecomment :: Parser () bracecomment = skipNest (do{string "{-"; sat (`notElem` ['!','@','*'])}) (do{sat (`notElem` ['!','@','*']);string "-}"}) junk :: Parser () junk = do _ <- setenv (0,-1) (many (spaces +++ comment)) return () parse :: Parser a -> Parser a parse p = do {junk; p} token :: Parser a -> Parser a token p = do {v <- p; junk; return v} --- Token parsers ------------------------------------------------------------ natural :: Parser Int natural = token nat integer :: Parser Int integer = token int symbol :: String -> Parser String symbol xs = token (string xs) identifier :: [String] -> Parser String identifier ks = token (do {x <- ident; if not (elem x ks) then return x else mzero}) --- Offside Parsers --------------------------------------------------------- many1_offside :: Parser a -> Parser [a] many1_offside p = do (pos,_) <- fetch vs <- setenv pos (many1 (off p)) return vs many_offside :: Parser a -> Parser [a] many_offside p = many1_offside p +++ mzero off :: Parser a -> Parser a off p = do (dl,dc) <- env ((l,c),_) <- fetch if c == dc then setenv (l,dc) p else mzero ------------------------------------------------------------------------------ -- Noel's own favourite parsers skipUntil :: Parser a -> Parser a skipUntil p = p +++ do token (many1 (sat (not . isSpace))) skipUntil p skipNest :: Parser a -> Parser b -> Parser () skipNest start finish = let x = do{ finish;return()} +++ do{skipNest start finish;x} +++ do{item;x} in do{start; x} -- this are messy, but make writing incomplete parsers a whole lot -- easier. skipUntilOff :: Parser a -> Parser [a] skipUntilOff p = fmap (concatMap justs) . many_offside $ fmap Just p +++ fmap (const Nothing) (many1 (token (many1 item))) skipUntilParse :: Char -> Parser a -> Parser [a] skipUntilParse u p = fmap (concatMap justs) . many $ do r<- p token (char u) return (Just r) +++ do many . token . many1 . sat $(/= u) token (char u) return Nothing justs (Just a) = [a] justs Nothing = [] opt p = p +++ return []