module Parse ( Derivs(..), ParseError(..), ErrorDescriptor(..), Result(..), eofError, nullError, isDigit, digitToInt, isSpace ) where import Data.Char import Data.List import Pos ---------- Data types used for parsing data ErrorDescriptor = Expected String | Message String data ParseError = ParseError { errorPos :: Pos, errorDescrs :: [ErrorDescriptor] } data Result d v = Parsed v d ParseError | NoParse ParseError newtype Parser d v = Parser (d -> Result d v) class Derivs d where dvPos :: d -> Pos dvChar :: d -> Result d Char ---------- Basic parsing combinators infixl 2 -- ordered choice infixl 1 -- error labeling infixl 1 -- unconditional error labeling -- Standard monadic combinators instance Derivs d => Monad (Parser d) where -- Sequencing combinator (Parser p1) >>= f = Parser parse where parse dvs = first (p1 dvs) first (Parsed val rem err) = let Parser p2 = f val in second err (p2 rem) first (NoParse err) = NoParse err second err1 (Parsed val rem err) = Parsed val rem (joinErrors err1 err) second err1 (NoParse err) = NoParse (joinErrors err1 err) -- Result-producing combinator return x = Parser (\dvs -> Parsed x dvs (nullError dvs)) -- Failure combinator fail [] = Parser (\dvs -> NoParse (nullError dvs)) fail msg = Parser (\dvs -> NoParse (msgError (dvPos dvs) msg)) -- Ordered choice () :: Derivs d => Parser d v -> Parser d v -> Parser d v (Parser p1) (Parser p2) = Parser parse where parse dvs = first dvs (p1 dvs) first dvs (result @ (Parsed val rem err)) = result first dvs (NoParse err) = second err (p2 dvs) second err1 (Parsed val rem err) = Parsed val rem (joinErrors err1 err) second err1 (NoParse err) = NoParse (joinErrors err1 err) -- Semantic predicate: 'satisfy ' acts like -- but only succeeds if the result it generates satisfies . satisfy :: Derivs d => Parser d v -> (v -> Bool) -> Parser d v satisfy (Parser p) test = Parser parse where parse dvs = check dvs (p dvs) check dvs (result @ (Parsed val rem err)) = if test val then result else NoParse (nullError dvs) check dvs none = none -- Syntactic predicate: 'followedBy ' acts like -- but does not consume any input. followedBy :: Derivs d => Parser d v -> Parser d v followedBy (Parser p) = Parser parse where parse dvs = case (p dvs) of Parsed val rem err -> Parsed val dvs (nullError dvs) err -> err -- Negative syntactic predicate: 'followedBy ' invokes , -- then succeeds without consuming any input if fails, -- and fails if succeeds. notFollowedBy :: Derivs d => Parser d v -> Parser d () notFollowedBy (Parser p) = Parser parse where parse dvs = case (p dvs) of Parsed val rem err -> NoParse (nullError dvs) NoParse err -> Parsed () dvs (nullError dvs) -- Optional combinator: 'optional ' invokes , -- then produces the result 'Just ' if produced , -- or else produces the success result 'Nothing' -- without consuming any input if failed. optional :: Derivs d => Parser d v -> Parser d (Maybe v) optional p = (do v <- p; return (Just v)) return Nothing ---------- Iterative combinators -- Note: use of these combinators can break -- a packrat parser's linear-time guarantee. -- Zero or more repetition combinator: -- 'many ' invokes repeatedly until it fails, -- collecting all success result values into a list. -- Always succeeds, producing an empty list in the degenerate case. many :: Derivs d => Parser d v -> Parser d [v] many p = (do { v <- p; vs <- many p; return (v : vs) } ) return [] -- One or more repetition combinator: -- 'many1 ' invokes repeatedly until it fails, -- collecting all success result values into a list. -- Fails if does not succeed even once. many1 :: Derivs d => Parser d v -> Parser d [v] many1 p = do { v <- p; vs <- many p; return (v : vs) } -- One or more repetitions with a separator: -- 'sepBy1 ' scans one or more iterations of , -- with a match of between each instance. -- Only the results of are collected into the final result list. sepBy1 :: Derivs d => Parser d v -> Parser d vsep -> Parser d [v] sepBy1 p psep = do v <- p vs <- many (do { psep; p }) return (v : vs) -- Zero or more repetitions with a separator: -- like sepBy1, but succeeds with an empty list if nothing can be parsed. sepBy :: Derivs d => Parser d v -> Parser d vsep -> Parser d [v] sepBy p psep = sepBy1 p psep return [] -- Zero or more repetitions with a terminator endBy :: Derivs d => Parser d v -> Parser d vend -> Parser d [v] endBy p pend = many (do { v <- p; pend; return v }) -- One or more repetitions with a terminator endBy1 :: Derivs d => Parser d v -> Parser d vend -> Parser d [v] endBy1 p pend = many1 (do { v <- p; pend; return v }) -- One or more repetitions with a separator or terminator: -- 'sepEndBy1 ' scans for a sequence of matches -- in which instances are separated by , -- and if a is found following the last match -- then it is consumed as well. sepEndBy1 :: Derivs d => Parser d v -> Parser d vsep -> Parser d [v] sepEndBy1 p psep = do v <- sepBy1 p psep; optional psep; return v -- Zero or more repetitions with a separator or terminator. sepEndBy :: Derivs d => Parser d v -> Parser d vsep -> Parser d [v] sepEndBy p psep = do v <- sepBy p psep; optional psep; return v -- One or more repetitions separated by left-associative operators. -- 'chainl1 ' matches instances of separated by , -- but uses the result of as a left-associative binary combinator: -- e.g., 't1 op t2 op t3' is interpreted as '(t1 op t2) op t3' chainl1 :: Derivs d => Parser d v -> Parser d (v->v->v) -> Parser d v chainl1 p psep = let psuffix z = (do f <- psep v <- p psuffix (f z v)) return z in do v <- p psuffix v -- Zero or more repetitions separated by left-associative operators. chainl :: Derivs d => Parser d v -> Parser d (v->v->v) -> v -> Parser d v chainl p psep z = chainl1 p psep return z -- One or more repetitions separated by left-associative operators: -- e.g., 't1 op t2 op t3' is interpreted as 't1 op (t2 op t3)' chainr1 :: Derivs d => Parser d v -> Parser d (v->v->v) -> Parser d v chainr1 p psep = (do v <- p f <- psep w <- chainr1 p psep return (f v w)) p -- Zero or more repetitions separated by left-associative operators. chainr :: Derivs d => Parser d v -> Parser d (v->v->v) -> v -> Parser d v chainr p psep z = chainr1 p psep return z -- N-ary ordered choice: -- given a list of parsers producing results of the same type, -- try them all in order and use the first successful result. choice :: Derivs d => [Parser d v] -> Parser d v choice [p] = p choice (p:ps) = p choice ps ---------- Error handling instance Eq ErrorDescriptor where Expected e1 == Expected e2 = e1 == e2 Message m1 == Message m2 = m1 == m2 _ == _ = False failAt :: Derivs d => Pos -> String -> Parser d v failAt pos msg = Parser (\dvs -> NoParse (msgError pos msg)) -- Annotate a parser with a description of the construct to be parsed. -- The resulting parser yields an "expected" error message -- if the construct cannot be parsed -- and if no error information is already available -- indicating a position farther right in the source code -- (which would normally be more localized/detailed information). () :: Derivs d => Parser d v -> String -> Parser d v (Parser p) desc = Parser (\dvs -> munge dvs (p dvs)) where munge dvs (Parsed v rem err) = Parsed v rem (fix dvs err) munge dvs (NoParse err) = NoParse (fix dvs err) fix dvs (err @ (ParseError p ms)) = if p > dvPos dvs then err else expError (dvPos dvs) desc -- Stronger version of the error annotation operator above, -- which unconditionally overrides any existing error information. () :: Derivs d => Parser d v -> String -> Parser d v (Parser p) desc = Parser (\dvs -> munge dvs (p dvs)) where munge dvs (Parsed v rem err) = Parsed v rem (fix dvs err) munge dvs (NoParse err) = NoParse (fix dvs err) fix dvs (err @ (ParseError p ms)) = expError (dvPos dvs) desc -- Join two ParseErrors, giving preference to the one farthest right, -- or merging their descriptor sets if they are at the same position. joinErrors :: ParseError -> ParseError -> ParseError joinErrors (e @ (ParseError p m)) (e' @ (ParseError p' m')) = if p' > p || null m then e' else if p > p' || null m' then e else ParseError p (m `union` m') nullError dvs = ParseError (dvPos dvs) [] expError pos desc = ParseError pos [Expected desc] msgError pos msg = ParseError pos [Message msg] eofError dvs = msgError (dvPos dvs) "end of input" expected :: Derivs d => String -> Parser d v expected desc = Parser (\dvs -> NoParse (expError (dvPos dvs) desc)) unexpected :: Derivs d => String -> Parser d v unexpected str = fail ("unexpected " ++ str) -- Comparison operators for ParseError just compare relative positions. instance Eq ParseError where ParseError p1 m1 == ParseError p2 m2 = p1 == p2 ParseError p1 m1 /= ParseError p2 m2 = p1 /= p2 instance Ord ParseError where ParseError p1 m1 < ParseError p2 m2 = p1 < p2 ParseError p1 m1 > ParseError p2 m2 = p1 > p2 ParseError p1 m1 <= ParseError p2 m2 = p1 <= p2 ParseError p1 m1 >= ParseError p2 m2 = p1 >= p2 -- Special behavior: "max" joins two errors max p1 p2 = joinErrors p1 p2 min p1 p2 = undefined -- Show function for error messages instance Show ParseError where show (ParseError pos []) = show pos ++ ": parse error" show (ParseError pos msgs) = expectmsg expects ++ messages msgs where expects = getExpects msgs getExpects [] = [] getExpects (Expected exp : rest) = exp : getExpects rest getExpects (Message msg : rest) = getExpects rest expectmsg [] = "" expectmsg [exp] = show pos ++ ": expecting " ++ exp ++ "\n" expectmsg [e1, e2] = show pos ++ ": expecting either " ++ e1 ++ " or " ++ e2 ++ "\n" expectmsg (first : rest) = show pos ++ ": expecting one of: " ++ first ++ expectlist rest ++ "\n" expectlist [last] = ", or " ++ last expectlist (mid : rest) = ", " ++ mid ++ expectlist rest messages [] = [] messages (Expected exp : rest) = messages rest messages (Message msg : rest) = show pos ++ ": " ++ msg ++ "\n" ++ messages rest ---------- Character-oriented parsers -- 'anyChar' matches any single character. anyChar :: Derivs d => Parser d Char anyChar = Parser dvChar -- 'char ' matches the specific character . char :: Derivs d => Char -> Parser d Char char ch = satisfy anyChar (\c -> c == ch) show ch -- 'oneOf ' matches any character in string . oneOf :: Derivs d => [Char] -> Parser d Char oneOf chs = satisfy anyChar (\c -> c `elem` chs) ("one of the characters " ++ show chs) -- 'noneOf ' matches any character not in string . noneOf :: Derivs d => [Char] -> Parser d Char noneOf chs = satisfy anyChar (\c -> not (c `elem` chs)) ("any character not in " ++ show chs) -- 'string ' matches all the characters in in sequence. string :: Derivs d => String -> Parser d String string str = p str show str where p [] = return str p (ch:chs) = do { char ch; p chs } -- 'stringFrom ' matches any string in the list of strings . -- If any strings in are prefixes of other strings in , -- then the prefixes must appear later in the list -- in order for the longer strings to be recognized at all. stringFrom :: Derivs d => [String] -> Parser d String stringFrom [str] = string str stringFrom (str : strs) = string str stringFrom strs -- Match an uppercase letter. upper :: Derivs d => Parser d Char upper = satisfy anyChar isUpper "uppercase letter" -- Match a lowercase letter. lower :: Derivs d => Parser d Char lower = satisfy anyChar isLower "lowercase letter" -- Match any letter. letter :: Derivs d => Parser d Char letter = satisfy anyChar isAlpha "letter" -- Match any letter or digit. alphaNum :: Derivs d => Parser d Char alphaNum = satisfy anyChar isAlphaNum "letter or digit" -- Match any digit. digit :: Derivs d => Parser d Char digit = satisfy anyChar isDigit "digit" -- Match any hexadecimal digit. hexDigit :: Derivs d => Parser d Char hexDigit = satisfy anyChar isHexDigit "hexadecimal digit (0-9, a-f)" -- Match any octal digit. octDigit :: Derivs d => Parser d Char octDigit = satisfy anyChar isOctDigit "octal digit (0-7)" -- Match a newline. newline :: Derivs d => Parser d Char newline = char '\n' -- Match a tab character. tab :: Derivs d => Parser d Char tab = char '\t' -- Match any whitespace character (space, tab, newline, etc.). space :: Derivs d => Parser d Char space = satisfy anyChar isSpace "whitespace character" -- Match a sequence of zero or more whitespace characters. spaces :: Derivs d => Parser d [Char] spaces = many space -- Match the end of file (i.e., "the absence of a character"). eof :: Derivs d => Parser d () eof = notFollowedBy anyChar "end of input" ---------- Parser state manipulation combinators -- Combinator to get the Derivs object for the current position: -- e.g., 'dvs <- getDerivs' as part of a 'do' sequence. getDerivs :: Derivs d => Parser d d getDerivs = Parser (\dvs -> Parsed dvs dvs (nullError dvs)) -- Combinator to set the Derivs object used for subsequent parsing; -- typically used to change parsing state elements in the Derivs tuple. setDerivs :: Derivs d => d -> Parser d () setDerivs newdvs = Parser (\dvs -> Parsed () newdvs (nullError dvs)) -- Get the current position in the input text. getPos :: Derivs d => Parser d Pos getPos = Parser (\dvs -> Parsed (dvPos dvs) dvs (nullError dvs))