Copyright | © 2015–2016 Megaparsec contributors © 2007 Paolo Martini © 1999–2001 Daan Leijen |
---|---|
License | FreeBSD |
Maintainer | Mark Karpov <markkarpov@opmbx.org> |
Stability | experimental |
Portability | portable |
Safe Haskell | None |
Language | Haskell2010 |
This module includes everything you need to get started writing a parser. If you are new to Megaparsec and don't know where to begin, take a look at our tutorials https://mrkkrp.github.io/megaparsec/tutorials.html.
By default this module is set up to parse character data. If you'd like to parse the result of your own tokenizer you should start with the following imports:
import Text.Megaparsec.Prim import Text.Megaparsec.Combinator
Then you can implement your own version of satisfy
on top of the
token
primitive.
Typical import section looks like this:
import Text.Megaparsec import Text.Megaparsec.String -- import Text.Megaparsec.ByteString -- import Text.Megaparsec.ByteString.Lazy -- import Text.Megaparsec.Text -- import Text.Megaparsec.Text.Lazy
As you can see the second import depends on data type you want to use as
input stream. It just defines useful type-synonym Parser
.
Megaparsec is capable of a lot. Apart from this standard functionality you can parse permutation phrases with Text.Megaparsec.Perm, expressions with Text.Megaparsec.Expr, and even entire languages with Text.Megaparsec.Lexer. These modules should be imported explicitly along with the two modules mentioned above.
- type Parsec e s = ParsecT e s Identity
- data ParsecT e s m a
- runParser :: Parsec e s a -> String -> s -> Either (ParseError (Token s) e) a
- runParser' :: Parsec e s a -> State s -> (State s, Either (ParseError (Token s) e) a)
- runParserT :: Monad m => ParsecT e s m a -> String -> s -> m (Either (ParseError (Token s) e) a)
- runParserT' :: Monad m => ParsecT e s m a -> State s -> m (State s, Either (ParseError (Token s) e) a)
- parse :: Parsec e s a -> String -> s -> Either (ParseError (Token s) e) a
- parseMaybe :: (ErrorComponent e, Stream s) => Parsec e s a -> s -> Maybe a
- parseTest :: (ShowErrorComponent e, Ord (Token s), ShowToken (Token s), Show a) => Parsec e s a -> s -> IO ()
- (<|>) :: Alternative f => forall a. f a -> f a -> f a
- many :: Alternative f => forall a. f a -> f [a]
- some :: Alternative f => forall a. f a -> f [a]
- optional :: Alternative f => f a -> f (Maybe a)
- unexpected :: MonadParsec e s m => ErrorItem (Token s) -> m a
- failure :: MonadParsec e s m => Set (ErrorItem (Token s)) -> Set (ErrorItem (Token s)) -> Set e -> m a
- (<?>) :: MonadParsec e s m => m a -> String -> m a
- label :: MonadParsec e s m => String -> m a -> m a
- hidden :: MonadParsec e s m => m a -> m a
- try :: MonadParsec e s m => m a -> m a
- lookAhead :: MonadParsec e s m => m a -> m a
- notFollowedBy :: MonadParsec e s m => m a -> m ()
- withRecovery :: MonadParsec e s m => (ParseError (Token s) e -> m a) -> m a -> m a
- eof :: MonadParsec e s m => m ()
- token :: MonadParsec e s m => (Token s -> Either (Set (ErrorItem (Token s)), Set (ErrorItem (Token s)), Set e) a) -> Maybe (Token s) -> m a
- tokens :: MonadParsec e s m => (Token s -> Token s -> Bool) -> [Token s] -> m [Token s]
- between :: Applicative m => m open -> m close -> m a -> m a
- choice :: (Foldable f, Alternative m) => f (m a) -> m a
- count :: Applicative m => Int -> m a -> m [a]
- count' :: Alternative m => Int -> Int -> m a -> m [a]
- eitherP :: Alternative m => m a -> m b -> m (Either a b)
- endBy :: Alternative m => m a -> m sep -> m [a]
- endBy1 :: Alternative m => m a -> m sep -> m [a]
- manyTill :: Alternative m => m a -> m end -> m [a]
- someTill :: Alternative m => m a -> m end -> m [a]
- option :: Alternative m => a -> m a -> m a
- sepBy :: Alternative m => m a -> m sep -> m [a]
- sepBy1 :: Alternative m => m a -> m sep -> m [a]
- sepEndBy :: Alternative m => m a -> m sep -> m [a]
- sepEndBy1 :: Alternative m => m a -> m sep -> m [a]
- skipMany :: Alternative m => m a -> m ()
- skipSome :: Alternative m => m a -> m ()
- newline :: (MonadParsec e s m, Token s ~ Char) => m Char
- crlf :: (MonadParsec e s m, Token s ~ Char) => m String
- eol :: (MonadParsec e s m, Token s ~ Char) => m String
- tab :: (MonadParsec e s m, Token s ~ Char) => m Char
- space :: (MonadParsec e s m, Token s ~ Char) => m ()
- controlChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- spaceChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- upperChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- lowerChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- letterChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- alphaNumChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- printChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- digitChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- octDigitChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- hexDigitChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- markChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- numberChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- punctuationChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- symbolChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- separatorChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- asciiChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- latin1Char :: (MonadParsec e s m, Token s ~ Char) => m Char
- charCategory :: (MonadParsec e s m, Token s ~ Char) => GeneralCategory -> m Char
- char :: (MonadParsec e s m, Token s ~ Char) => Char -> m Char
- char' :: (MonadParsec e s m, Token s ~ Char) => Char -> m Char
- anyChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- oneOf :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char
- oneOf' :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char
- noneOf :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char
- noneOf' :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char
- satisfy :: (MonadParsec e s m, Token s ~ Char) => (Char -> Bool) -> m Char
- string :: (MonadParsec e s m, Token s ~ Char) => String -> m String
- string' :: (MonadParsec e s m, Token s ~ Char) => String -> m String
- data Pos
- mkPos :: (Integral a, MonadThrow m) => a -> m Pos
- unPos :: Pos -> Word
- unsafePos :: Word -> Pos
- data InvalidPosException = InvalidPosException
- data SourcePos = SourcePos {
- sourceName :: FilePath
- sourceLine :: !Pos
- sourceColumn :: !Pos
- initialPos :: String -> SourcePos
- sourcePosPretty :: SourcePos -> String
- data ErrorItem t
- class Ord e => ErrorComponent e where
- data Dec
- data ParseError t e = ParseError {
- errorPos :: NonEmpty SourcePos
- errorUnexpected :: Set (ErrorItem t)
- errorExpected :: Set (ErrorItem t)
- errorCustom :: Set e
- class ShowToken a where
- class Ord a => ShowErrorComponent a where
- parseErrorPretty :: (Ord t, ShowToken t, ShowErrorComponent e) => ParseError t e -> String
- class Ord (Token s) => Stream s where
- data State s = State {
- stateInput :: s
- statePos :: NonEmpty SourcePos
- stateTabWidth :: Pos
- getInput :: MonadParsec e s m => m s
- setInput :: MonadParsec e s m => s -> m ()
- getPosition :: MonadParsec e s m => m SourcePos
- setPosition :: MonadParsec e s m => SourcePos -> m ()
- pushPosition :: MonadParsec e s m => SourcePos -> m ()
- popPosition :: MonadParsec e s m => m ()
- getTabWidth :: MonadParsec e s m => m Pos
- setTabWidth :: MonadParsec e s m => Pos -> m ()
- getParserState :: MonadParsec e s m => m (State s)
- setParserState :: MonadParsec e s m => State s -> m ()
- updateParserState :: MonadParsec e s m => (State s -> State s) -> m ()
Running parser
type Parsec e s = ParsecT e s Identity Source #
Parsec
is non-transformer variant of more general ParsecT
monad transformer.
ParsecT e s m a
is a parser with custom data component of error e
,
stream type s
, underlying monad m
and return type a
.
(ErrorComponent e, Stream s) => MonadParsec e s (ParsecT e s m) Source # | |
(ErrorComponent e, Stream s, MonadError e' m) => MonadError e' (ParsecT e s m) Source # | |
(ErrorComponent e, Stream s, MonadReader r m) => MonadReader r (ParsecT e s m) Source # | |
(ErrorComponent e, Stream s, MonadState st m) => MonadState st (ParsecT e s m) Source # | |
MonadTrans (ParsecT e s) Source # | |
(ErrorComponent e, Stream s) => Monad (ParsecT e s m) Source # | |
Functor (ParsecT e s m) Source # | |
(ErrorComponent e, Stream s) => MonadFail (ParsecT e s m) Source # | |
(ErrorComponent e, Stream s) => Applicative (ParsecT e s m) Source # | |
(ErrorComponent e, Stream s, MonadIO m) => MonadIO (ParsecT e s m) Source # | |
(ErrorComponent e, Stream s) => Alternative (ParsecT e s m) Source # | |
(ErrorComponent e, Stream s) => MonadPlus (ParsecT e s m) Source # | |
(ErrorComponent e, Stream s, MonadCont m) => MonadCont (ParsecT e s m) Source # | |
:: Parsec e s a | Parser to run |
-> String | Name of source file |
-> s | Input for parser |
-> Either (ParseError (Token s) e) a |
runParser p file input
runs parser p
on the input list of tokens
input
, obtained from source file
. The file
is only used in error
messages and may be the empty string. Returns either a ParseError
(Left
) or a value of type a
(Right
).
parseFromFile p file = runParser p file <$> readFile file
:: Monad m | |
=> ParsecT e s m a | Parser to run |
-> String | Name of source file |
-> s | Input for parser |
-> m (Either (ParseError (Token s) e) a) |
runParserT p file input
runs parser p
on the input list of tokens
input
, obtained from source file
. The file
is only used in error
messages and may be the empty string. Returns a computation in the
underlying monad m
that returns either a ParseError
(Left
) or a
value of type a
(Right
).
:: Monad m | |
=> ParsecT e s m a | Parser to run |
-> State s | Initial state |
-> m (State s, Either (ParseError (Token s) e) a) |
This function is similar to runParserT
, but like runParser'
it
accepts and returns parser state. This is thus the most general way to
run a parser.
Since: 4.2.0
:: Parsec e s a | Parser to run |
-> String | Name of source file |
-> s | Input for parser |
-> Either (ParseError (Token s) e) a |
parse p file input
runs parser p
over Identity
(see runParserT
if you're using the ParsecT
monad transformer; parse
itself is just a
synonym for runParser
). It returns either a ParseError
(Left
) or a
value of type a
(Right
). parseErrorPretty
can be used to turn
ParseError
into the string representation of the error message. See
Text.Megaparsec.Error if you need to do more advanced error analysis.
main = case (parse numbers "" "11,2,43") of Left err -> putStr (parseErrorPretty err) Right xs -> print (sum xs) numbers = integer `sepBy` char ','
parseMaybe :: (ErrorComponent e, Stream s) => Parsec e s a -> s -> Maybe a Source #
parseMaybe p input
runs parser p
on input
and returns result
inside Just
on success and Nothing
on failure. This function also
parses eof
, so if the parser doesn't consume all of its input, it will
fail.
The function is supposed to be useful for lightweight parsing, where error messages (and thus file name) are not important and entire input should be parsed. For example it can be used when parsing of single number according to specification of its format is desired.
:: (ShowErrorComponent e, Ord (Token s), ShowToken (Token s), Show a) | |
=> Parsec e s a | Parser to run |
-> s | Input for parser |
-> IO () |
The expression parseTest p input
applies a parser p
against input
input
and prints the result to stdout. Useful for testing.
Combinators
(<|>) :: Alternative f => forall a. f a -> f a -> f a #
An associative binary operation
This combinator implements choice. The parser p <|> q
first applies
p
. If it succeeds, the value of p
is returned. If p
fails
without consuming any input, parser q
is tried.
The parser is called predictive since q
is only tried when parser p
didn't consume any input (i.e. the look ahead is 1). This
non-backtracking behaviour allows for both an efficient implementation of
the parser combinators and the generation of good error messages.
many :: Alternative f => forall a. f a -> f [a] #
Zero or more.
many p
applies the parser p
zero or more times. Returns a list of
the returned values of p
.
identifier = (:) <$> letter <*> many (alphaNum <|> char '_')
some :: Alternative f => forall a. f a -> f [a] #
One or more.
some p
applies the parser p
one or more times. Returns a list of
the returned values of p
.
word = some letter
optional :: Alternative f => f a -> f (Maybe a) #
One or none.
optional p
tries to apply parser p
. It will parse p
or nothing. It
only fails if p
fails after consuming input. On success result of p
is returned inside of Just
, on failure Nothing
is returned.
unexpected :: MonadParsec e s m => ErrorItem (Token s) -> m a Source #
The parser unexpected item
always fails with an error message telling
about unexpected item item
without consuming any input.
failure :: MonadParsec e s m => Set (ErrorItem (Token s)) -> Set (ErrorItem (Token s)) -> Set e -> m a Source #
The most general way to stop parsing and report ParseError
.
unexpected
is defined in terms of this function:
unexpected item = failure (Set.singleton item) Set.empty Set.empty
Since: 4.2.0
(<?>) :: MonadParsec e s m => m a -> String -> m a infix 0 Source #
A synonym for label
in form of an operator.
label :: MonadParsec e s m => String -> m a -> m a Source #
The parser label name p
behaves as parser p
, but whenever the
parser p
fails without consuming any input, it replaces names of
“expected” tokens with the name name
.
MonadParsec e s m => m a -> m a Source #
::hidden p
behaves just like parser p
, but it doesn't show any
“expected” tokens in error message when p
fails.
try :: MonadParsec e s m => m a -> m a Source #
The parser try p
behaves like parser p
, except that it
pretends that it hasn't consumed any input when an error occurs.
This combinator is used whenever arbitrary look ahead is needed. Since
it pretends that it hasn't consumed any input when p
fails, the
(<|>
) combinator will try its second alternative even when the
first parser failed while consuming input.
For example, here is a parser that is supposed to parse word “let” or “lexical”:
>>>
parseTest (string "let" <|> string "lexical") "lexical"
1:1: unexpected "lex" expecting "let"
What happens here? First parser consumes “le” and fails (because it
doesn't see a “t”). The second parser, however, isn't tried, since the
first parser has already consumed some input! try
fixes this behavior
and allows backtracking to work:
>>>
parseTest (try (string "let") <|> string "lexical") "lexical"
"lexical"
try
also improves error messages in case of overlapping alternatives,
because Megaparsec's hint system can be used:
>>>
parseTest (try (string "let") <|> string "lexical") "le"
1:1: unexpected "le" expecting "let" or "lexical"
Please note that as of Megaparsec 4.4.0, string
backtracks
automatically (see tokens
), so it does not need try
. However, the
examples above demonstrate the idea behind try
so well that it was
decided to keep them.
lookAhead :: MonadParsec e s m => m a -> m a Source #
lookAhead p
parses p
without consuming any input.
If p
fails and consumes some input, so does lookAhead
. Combine with
try
if this is undesirable.
notFollowedBy :: MonadParsec e s m => m a -> m () Source #
notFollowedBy p
only succeeds when parser p
fails. This parser
does not consume any input and can be used to implement the “longest
match” rule.
withRecovery :: MonadParsec e s m => (ParseError (Token s) e -> m a) -> m a -> m a Source #
withRecovery r p
allows continue parsing even if parser p
fails.
In this case r
is called with actual ParseError
as its argument.
Typical usage is to return value signifying failure to parse this
particular object and to consume some part of input up to start of next
object.
Note that if r
fails, original error message is reported as if
without withRecovery
. In no way recovering parser r
can influence
error messages.
Since: 4.4.0
eof :: MonadParsec e s m => m () Source #
This parser only succeeds at the end of the input.
token :: MonadParsec e s m => (Token s -> Either (Set (ErrorItem (Token s)), Set (ErrorItem (Token s)), Set e) a) -> Maybe (Token s) -> m a Source #
The parser token test mrep
accepts a token t
with result x
when
the function test t
returns
. Right
xmrep
may provide
representation of the token to report in error messages when input
stream in empty.
This is the most primitive combinator for accepting tokens. For
example, the satisfy
parser is implemented as:
satisfy f = token testChar Nothing where testChar x = if f x then Right x else Left (Set.singleton (Tokens (x:|[])), Set.empty, Set.empty)
tokens :: MonadParsec e s m => (Token s -> Token s -> Bool) -> [Token s] -> m [Token s] Source #
The parser tokens test
parses list of tokens and returns it.
Supplied predicate test
is used to check equality of given and parsed
tokens.
This can be used for example to write string
:
string = tokens (==)
Note that beginning from Megaparsec 4.4.0, this is an auto-backtracking
primitive, which means that if it fails, it never consumes any
input. This is done to make its consumption model match how error
messages for this primitive are reported (which becomes an important
thing as user gets more control with primitives like withRecovery
):
>>>
parseTest (string "abc") "abd"
1:1: unexpected "abd" expecting "abc"
This means, in particular, that it's no longer necessary to use try
with tokens
-based parsers, such as string
and
string'
. This feature does not affect
performance in any way.
between :: Applicative m => m open -> m close -> m a -> m a Source #
between open close p
parses open
, followed by p
and close
.
Returns the value returned by p
.
braces = between (symbol "{") (symbol "}")
choice :: (Foldable f, Alternative m) => f (m a) -> m a Source #
choice ps
tries to apply the parsers in the list ps
in order,
until one of them succeeds. Returns the value of the succeeding parser.
count :: Applicative m => Int -> m a -> m [a] Source #
count n p
parses n
occurrences of p
. If n
is smaller or
equal to zero, the parser equals to return []
. Returns a list of n
values.
count' :: Alternative m => Int -> Int -> m a -> m [a] Source #
count' m n p
parses from m
to n
occurrences of p
. If n
is
not positive or m > n
, the parser equals to return []
. Returns a list
of parsed values.
Please note that m
may be negative, in this case effect is the same
as if it were equal to zero.
eitherP :: Alternative m => m a -> m b -> m (Either a b) Source #
Combine two alternatives.
Since: 4.4.0
endBy :: Alternative m => m a -> m sep -> m [a] Source #
endBy p sep
parses zero or more occurrences of p
, separated
and ended by sep
. Returns a list of values returned by p
.
cStatements = cStatement `endBy` semicolon
endBy1 :: Alternative m => m a -> m sep -> m [a] Source #
endBy1 p sep
parses one or more occurrences of p
, separated
and ended by sep
. Returns a list of values returned by p
.
manyTill :: Alternative m => m a -> m end -> m [a] Source #
manyTill p end
applies parser p
zero or more times until
parser end
succeeds. Returns the list of values returned by p
. This
parser can be used to scan comments:
simpleComment = string "<!--" >> manyTill anyChar (string "-->")
someTill :: Alternative m => m a -> m end -> m [a] Source #
someTill p end
works similarly to manyTill p end
, but p
should
succeed at least once.
option :: Alternative m => a -> m a -> m a Source #
option x p
tries to apply parser p
. If p
fails without
consuming input, it returns the value x
, otherwise the value returned
by p
.
priority = option 0 (digitToInt <$> digitChar)
sepBy :: Alternative m => m a -> m sep -> m [a] Source #
sepBy p sep
parses zero or more occurrences of p
, separated
by sep
. Returns a list of values returned by p
.
commaSep p = p `sepBy` comma
sepBy1 :: Alternative m => m a -> m sep -> m [a] Source #
sepBy1 p sep
parses one or more occurrences of p
, separated
by sep
. Returns a list of values returned by p
.
sepEndBy :: Alternative m => m a -> m sep -> m [a] Source #
sepEndBy p sep
parses zero or more occurrences of p
,
separated and optionally ended by sep
. Returns a list of values
returned by p
.
sepEndBy1 :: Alternative m => m a -> m sep -> m [a] Source #
sepEndBy1 p sep
parses one or more occurrences of p
,
separated and optionally ended by sep
. Returns a list of values
returned by p
.
skipMany :: Alternative m => m a -> m () Source #
skipMany p
applies the parser p
zero or more times, skipping
its result.
space = skipMany spaceChar
skipSome :: Alternative m => m a -> m () Source #
skipSome p
applies the parser p
one or more times, skipping
its result.
Character parsing
crlf :: (MonadParsec e s m, Token s ~ Char) => m String Source #
Parses a carriage return character followed by a newline character. Returns sequence of characters parsed.
controlChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses control characters, which are the non-printing characters of the Latin-1 subset of Unicode.
spaceChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses a Unicode space character, and the control characters: tab, newline, carriage return, form feed, and vertical tab.
upperChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses an upper-case or title-case alphabetic Unicode character. Title case is used by a small number of letter ligatures like the single-character form of Lj.
lowerChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses a lower-case alphabetic Unicode character.
letterChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses alphabetic Unicode characters: lower-case, upper-case and title-case letters, plus letters of case-less scripts and modifiers letters.
alphaNumChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses alphabetic or numeric digit Unicode characters.
Note that numeric digits outside the ASCII range are parsed by this
parser but not by digitChar
. Such digits may be part of identifiers but
are not used by the printer and reader to represent numbers.
printChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses printable Unicode characters: letters, numbers, marks, punctuation, symbols and spaces.
digitChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses an ASCII digit, i.e between “0” and “9”.
octDigitChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses an octal digit, i.e. between “0” and “7”.
hexDigitChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses a hexadecimal digit, i.e. between “0” and “9”, or “a” and “f”, or “A” and “F”.
markChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses Unicode mark characters, for example accents and the like, which combine with preceding characters.
numberChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses Unicode numeric characters, including digits from various scripts, Roman numerals, et cetera.
punctuationChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses Unicode punctuation characters, including various kinds of connectors, brackets and quotes.
symbolChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses Unicode symbol characters, including mathematical and currency symbols.
separatorChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses Unicode space and separator characters.
asciiChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses a character from the first 128 characters of the Unicode character set, corresponding to the ASCII character set.
latin1Char :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses a character from the first 256 characters of the Unicode character set, corresponding to the ISO 8859-1 (Latin-1) character set.
charCategory :: (MonadParsec e s m, Token s ~ Char) => GeneralCategory -> m Char Source #
charCategory cat
Parses character in Unicode General Category cat
,
see GeneralCategory
.
char :: (MonadParsec e s m, Token s ~ Char) => Char -> m Char Source #
char c
parses a single character c
.
semicolon = char ';'
char' :: (MonadParsec e s m, Token s ~ Char) => Char -> m Char Source #
The same as char
but case-insensitive. This parser returns actually
parsed character preserving its case.
>>>
parseTest (char' 'e') "E"
'E'>>>
parseTest (char' 'e') "G"
1:1: unexpected 'G' expecting 'E' or 'e'
anyChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
This parser succeeds for any character. Returns the parsed character.
oneOf :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char Source #
oneOf cs
succeeds if the current character is in the supplied
list of characters cs
. Returns the parsed character. Note that this
parser doesn't automatically generate “expected” component of error
message, so usually you should label it manually with label
or
(<?>
).
See also: satisfy
.
digit = oneOf ['0'..'9'] <?> "digit"
oneOf' :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char Source #
The same as oneOf
, but case-insensitive. Returns the parsed character
preserving its case.
vowel = oneOf' "aeiou" <?> "vowel"
noneOf :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char Source #
As the dual of oneOf
, noneOf cs
succeeds if the current
character not in the supplied list of characters cs
. Returns the
parsed character.
noneOf' :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char Source #
The same as noneOf
, but case-insensitive.
consonant = noneOf' "aeiou" <?> "consonant"
satisfy :: (MonadParsec e s m, Token s ~ Char) => (Char -> Bool) -> m Char Source #
The parser satisfy f
succeeds for any character for which the
supplied function f
returns True
. Returns the character that is
actually parsed.
digitChar = satisfy isDigit <?> "digit" oneOf cs = satisfy (`elem` cs)
string :: (MonadParsec e s m, Token s ~ Char) => String -> m String Source #
string s
parses a sequence of characters given by s
. Returns
the parsed string (i.e. s
).
divOrMod = string "div" <|> string "mod"
string' :: (MonadParsec e s m, Token s ~ Char) => String -> m String Source #
The same as string
, but case-insensitive. On success returns string
cased as actually parsed input.
>>>
parseTest (string' "foobar") "foObAr"
"foObAr"
Textual source position
mkPos :: (Integral a, MonadThrow m) => a -> m Pos Source #
Construction of Pos
from an instance of Integral
. The function
throws InvalidPosException
when given non-positive argument. Note that
the function is polymorphic with respect to MonadThrow
m
, so you can
get result inside of Maybe
, for example.
Since: 5.0.0
unsafePos :: Word -> Pos Source #
Dangerous construction of Pos
. Use when you know for sure that
argument is positive.
Since: 5.0.0
data InvalidPosException Source #
The exception is thrown by mkPos
when its argument is not a positive
number.
Since: 5.0.0
The data type SourcePos
represents source positions. It contains the
name of the source file, a line number, and a column number. Source line
and column positions change intensively during parsing, so we need to
make them strict to avoid memory leaks.
SourcePos | |
|
initialPos :: String -> SourcePos Source #
Construct initial position (line 1, column 1) given name of source file.
Error messages
Data type that is used to represent “unexpected/expected” items in
parse error. The data type is parametrized over token type t
.
Since: 5.0.0
Tokens (NonEmpty t) | Non-empty stream of tokens |
Label (NonEmpty Char) | Label (cannot be empty) |
EndOfInput | End of input |
Eq t => Eq (ErrorItem t) Source # | |
Data t => Data (ErrorItem t) Source # | |
Ord t => Ord (ErrorItem t) Source # | |
Read t => Read (ErrorItem t) Source # | |
Show t => Show (ErrorItem t) Source # | |
Generic (ErrorItem t) Source # | |
NFData t => NFData (ErrorItem t) Source # | |
(Ord t, ShowToken t) => ShowErrorComponent (ErrorItem t) Source # | |
type Rep (ErrorItem t) Source # | |
class Ord e => ErrorComponent e where Source #
The type class defines how to represent information about various
exceptional situations. Data types that are used as custom data component
in ParseError
must be instances of this type class.
Since: 5.0.0
“Default error component”. This in our instance of ErrorComponent
provided out-of-box.
Since: 5.0.0
data ParseError t e Source #
The data type ParseError
represents parse errors. It provides the
stack of source positions, set of expected and unexpected tokens as well
as set of custom associated data. The data type is parametrized over
token type t
and custom data e
.
Note that stack of source positions contains current position as its head, and the rest of positions allows to track full sequence of include files with topmost source file at the end of the list.
Semigroup
(or Monoid
) instance of the data type allows to merge parse
errors from different branches of parsing. When merging two
ParseError
s, longest match is preferred; if positions are the same,
custom data sets and collections of message items are combined.
ParseError | |
|
(Eq t, Eq e) => Eq (ParseError t e) Source # | |
(Data t, Data e, Ord t, Ord e) => Data (ParseError t e) Source # | |
(Ord t, Ord e, Read t, Read e) => Read (ParseError t e) Source # | |
(Show t, Show e) => Show (ParseError t e) Source # | |
Generic (ParseError t e) Source # | |
(Ord t, Ord e) => Semigroup (ParseError t e) Source # | |
(Ord t, Ord e) => Monoid (ParseError t e) Source # | |
(Show t, Typeable * t, Show e, Typeable * e) => Exception (ParseError t e) Source # | |
(NFData t, NFData e) => NFData (ParseError t e) Source # | |
type Rep (ParseError t e) Source # | |
class ShowToken a where Source #
Type class ShowToken
includes methods that allow to pretty-print
single token as well as stream of tokens. This is used for rendering of
error messages.
showTokens :: NonEmpty a -> String Source #
Pretty-print non-empty stream of tokens. This function is also used to print single tokens (represented as singleton lists).
Since: 5.0.0
class Ord a => ShowErrorComponent a where Source #
The type class defines how to print custom data component of
ParseError
.
Since: 5.0.0
showErrorComponent :: a -> String Source #
Pretty-print custom data component of ParseError
.
ShowErrorComponent Dec Source # | |
(Ord t, ShowToken t) => ShowErrorComponent (ErrorItem t) Source # | |
:: (Ord t, ShowToken t, ShowErrorComponent e) | |
=> ParseError t e | Parse error to render |
-> String | Result of rendering |
Pretty-print ParseError
. Note that rendered String
always ends with
a newline.
Since: 5.0.0
Low-level operations
class Ord (Token s) => Stream s where Source #
An instance of Stream s
has stream type s
. Token type is determined
by the stream and can be found via Token
type function.
uncons :: s -> Maybe (Token s, s) Source #
Get next token from the stream. If the stream is empty, return
Nothing
.
updatePos :: Proxy s -> Pos -> SourcePos -> Token s -> (SourcePos, SourcePos) Source #
Update position in stream given tab width, current position, and current token. The result is a tuple where the first element will be used to report parse errors for current token, while the second element is the incremented position that will be stored in parser's state.
When you work with streams where elements do not contain information
about their position in input, result is usually consists of the third
argument unchanged and incremented position calculated with respect to
current token. This is how default instances of Stream
work (they use
defaultUpdatePos
, which may be a good starting point for your own
position-advancing function).
When you wish to deal with stream of tokens where every token “knows” its start and end position in input (for example, you have produced the stream with Happy/Alex), then the best strategy is to use the start position as actual element position and provide the end position of the token as incremented one.
Since: 5.0.0
This is Megaparsec's state, it's parametrized over stream type s
.
State | |
|
getInput :: MonadParsec e s m => m s Source #
Return the current input.
setInput :: MonadParsec e s m => s -> m () Source #
getPosition :: MonadParsec e s m => m SourcePos Source #
Return the current source position.
See also: setPosition
, pushPosition
, popPosition
, and SourcePos
.
setPosition :: MonadParsec e s m => SourcePos -> m () Source #
setPosition pos
sets the current source position to pos
.
See also: getPosition
, pushPosition
, popPosition
, and SourcePos
.
pushPosition :: MonadParsec e s m => SourcePos -> m () Source #
Push given position into stack of positions and continue parsing working with this position. Useful for working with include files and the like.
See also: getPosition
, setPosition
, popPosition
, and SourcePos
.
Since: 5.0.0
popPosition :: MonadParsec e s m => m () Source #
Pop a position from stack of positions unless it only contains one
element (in that case stack of positions remains the same). This is how
to return to previous source file after pushPosition
.
See also: getPosition
, setPosition
, pushPosition
, and SourcePos
.
Since: 5.0.0
getTabWidth :: MonadParsec e s m => m Pos Source #
Return tab width. Default tab width is equal to defaultTabWidth
. You
can set different tab width with help of setTabWidth
.
setTabWidth :: MonadParsec e s m => Pos -> m () Source #
Set tab width. If argument of the function is not positive number,
defaultTabWidth
will be used.
getParserState :: MonadParsec e s m => m (State s) Source #
Returns the full parser state as a State
record.
setParserState :: MonadParsec e s m => State s -> m () Source #
setParserState st
set the full parser state to st
.
updateParserState :: MonadParsec e s m => (State s -> State s) -> m () Source #
updateParserState f
applies function f
to the parser state.