-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | High-performance parsing from strict bytestrings -- -- Flatparse is a high-performance parsing library, focusing on -- programming languages and human-readable data formats. See the README -- for more information: -- https://github.com/AndrasKovacs/flatparse. @package flatparse @version 0.3.5.1 -- | Compatibility layer for numeric primops. -- -- GHC 9.2 standardized unboxed numeric primops. Prior, it was quite -- asymmetric. Many primop functions used the native unboxed numerics -- Word# and Int# even if a sized unboxed numeric was in -- the name, e.g. indexWord8OffAddr# returning Word# -- pre-9.2. All boxed machine integers only stored Word# -- internally! -- -- We target GHC 9.2's better handling. In order to maintain -- compatibility with older GHCs, we define missing primops and wrap ones -- that changed type. Usually, we can write a few wrappers so that 9.2 -- uses sized unboxed numerics everywhere, and pre-9.2 uses native -- unboxed numerics everywhere. Sometimes we really want to work with -- sized unboxed numerics on both, in which case we have to do more -- involved primop wrapping. -- -- The general pattern is as follows: -- --
-- isDigit c = '0' <= c && c <= '9' --isDigit :: Char -> Bool -- |
-- isLatinLetter c = ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')
--
isLatinLetter :: Char -> Bool
-- |
-- isGreekLetter c = ('Α' <= c && c <= 'Ω') || ('α' <= c && c <= 'ω')
--
isGreekLetter :: Char -> Bool
mul10 :: Int# -> Int#
readInt' :: Int# -> Addr# -> Addr# -> (# Int#, Addr# #)
-- | Read an Int from the input, as a non-empty digit sequence. The
-- Int may overflow in the result.
readInt :: Addr# -> Addr# -> (# (# #) | (# Int#, Addr# #) #)
-- | Read an Integer from the input, as a non-empty digit sequence.
readInteger :: ForeignPtrContents -> Addr# -> Addr# -> (# (# #) | (# Integer, Addr# #) #)
-- | Byte offset counted backwards from the end of the buffer.
newtype Pos
Pos :: Int -> Pos
-- | A pair of positions.
data Span
Span :: !Pos -> !Pos -> Span
addrToPos# :: Addr# -> Addr# -> Pos
posToAddr# :: Addr# -> Pos -> Addr#
-- | Slice into a ByteString using a Span. The result is
-- invalid if the Span is not a valid slice of the first argument.
unsafeSlice :: ByteString -> Span -> ByteString
-- | Convert a String to an UTF-8-coded ByteString.
packUTF8 :: String -> ByteString
charToBytes :: Char -> [Word]
strToBytes :: String -> [Word]
packBytes :: [Word] -> Word
splitBytes :: [Word] -> ([Word], [Word])
derefChar8# :: Addr# -> Char#
data Trie a
Branch :: !a -> !Map Word (Trie a) -> Trie a
type Rule = Maybe Int
nilTrie :: Trie Rule
updRule :: Int -> Maybe Int -> Maybe Int
insert :: Int -> [Word] -> Trie Rule -> Trie Rule
listToTrie :: [(Int, String)] -> Trie Rule
-- | Decorate a trie with the minimum lengths of non-empty paths. This is
-- used later to place ensureBytes#.
mindepths :: Trie Rule -> Trie (Rule, Int)
data Trie' a
Branch' :: !a -> !Map Word (Trie' a) -> Trie' a
Path :: !a -> ![Word] -> !Trie' a -> Trie' a
-- | Compress linear paths.
pathify :: Trie (Rule, Int) -> Trie' (Rule, Int)
-- | Compute where to fall back after we exhausted a branch. If the branch
-- is empty, that means we've succeded at reading and we jump to the rhs
-- rule.
fallbacks :: Trie' (Rule, Int) -> Trie' (Rule, Int, Int)
-- | Decorate with ensureBytes# invocations, represented as `Maybe
-- Int`.
ensureBytes :: Trie' (Rule, Int, Int) -> Trie' (Rule, Int, Maybe Int)
compileTrie :: [(Int, String)] -> Trie' (Rule, Int, Maybe Int)
instance GHC.Show.Show FlatParse.Internal.Pos
instance GHC.Classes.Eq FlatParse.Internal.Pos
instance GHC.Show.Show FlatParse.Internal.Span
instance GHC.Classes.Eq FlatParse.Internal.Span
instance GHC.Show.Show a => GHC.Show.Show (FlatParse.Internal.Trie a)
instance GHC.Show.Show a => GHC.Show.Show (FlatParse.Internal.Trie' a)
instance GHC.Classes.Ord FlatParse.Internal.Pos
-- | This module implements a Parser supporting custom error types.
-- If you need efficient indentation parsing, use
-- FlatParse.Stateful instead.
--
-- Many internals are exposed for hacking on and extending. These are
-- generally denoted by a # hash suffix.
module FlatParse.Basic
-- | Parser e a has an error type e and a return type
-- a.
newtype Parser e a
Parser :: (ForeignPtrContents -> Addr# -> Addr# -> Res# e a) -> Parser e a
[runParser#] :: Parser e a -> ForeignPtrContents -> Addr# -> Addr# -> Res# e a
-- | Primitive result of a parser. Possible results are given by
-- OK#, Err# and Fail# pattern synonyms.
type Res# e a = (# (# a, Addr# #) | (# #) | (# e #) #)
-- | Contains return value and a pointer to the rest of the input buffer.
pattern OK# :: a -> Addr# -> Res# e a
-- | Constructor for recoverable failure.
pattern Fail# :: Res# e a
-- | Constructor for errors which are by default non-recoverable.
pattern Err# :: e -> Res# e a
-- | Higher-level boxed data type for parsing results.
data Result e a
-- | Contains return value and unconsumed input.
OK :: a -> !ByteString -> Result e a
-- | Recoverable-by-default failure.
Fail :: Result e a
-- | Unrecoverble-by-default error.
Err :: !e -> Result e a
-- | Run a parser.
runParser :: Parser e a -> ByteString -> Result e a
-- | Run a parser on a String input. Reminder:
-- OverloadedStrings for ByteString does not yield a
-- valid UTF-8 encoding! For non-ASCII ByteString literal input,
-- use runParserS or packUTF8 for testing.
runParserS :: Parser e a -> String -> Result e a
-- | The failing parser. By default, parser choice (<|>)
-- arbitrarily backtracks on parser failure.
empty :: Parser e a
-- | Throw a parsing error. By default, parser choice (<|>)
-- can't backtrack on parser error. Use try to convert an error to
-- a recoverable failure.
err :: e -> Parser e a
-- | Save the parsing state, then run a parser, then restore the state.
lookahead :: Parser e a -> Parser e a
-- | Convert a parsing failure to a success.
fails :: Parser e a -> Parser e ()
-- | Convert a parsing error into failure.
try :: Parser e a -> Parser e a
-- | Convert a parsing failure to a Maybe. If possible, use
-- withOption instead.
optional :: Parser e a -> Parser e (Maybe a)
-- | Convert a parsing failure to a ().
optional_ :: Parser e a -> Parser e ()
-- | CPS'd version of optional. This is usually more efficient,
-- since it gets rid of the extra Maybe allocation.
withOption :: Parser e a -> (a -> Parser e b) -> Parser e b -> Parser e b
-- | Convert a parsing failure to an error.
cut :: Parser e a -> e -> Parser e a
-- | Run the parser, if we get a failure, throw the given error, but if we
-- get an error, merge the inner and the newly given errors using the
-- e -> e -> e function. This can be useful for
-- implementing parsing errors which may propagate hints or accummulate
-- contextual information.
cutting :: Parser e a -> e -> (e -> e -> e) -> Parser e a
-- | Succeed if the input is empty.
eof :: Parser e ()
-- | Read the given number of bytes as a ByteString.
--
-- Throws a runtime error if given a negative integer.
takeBs :: Int -> Parser e ByteString
-- | Consume the rest of the input. May return the empty bytestring.
takeRestBs :: Parser e ByteString
-- | Skip forward n bytes. Fails if fewer than n bytes
-- are available.
--
-- Throws a runtime error if given a negative integer.
skip :: Int -> Parser e ()
-- | Parse a UTF-8 character literal. This is a template function, you can
-- use it as $(char 'x'), for example, and the splice in this
-- case has type Parser e ().
char :: Char -> Q Exp
-- | Read a Word8.
byte :: Word8 -> Parser e ()
-- | Read a sequence of bytes. This is a template function, you can use it
-- as $(bytes [3, 4, 5]), for example, and the splice has type
-- Parser e ().
bytes :: [Word] -> Q Exp
-- | Parse a UTF-8 string literal. This is a template function, you can use
-- it as $(string "foo"), for example, and the splice has type
-- Parser e ().
string :: String -> Q Exp
-- | This is a template function which makes it possible to branch on a
-- collection of string literals in an efficient way. By using
-- switch, such branching is compiled to a trie of primitive
-- parsing operations, which has optimized control flow, vectorized reads
-- and grouped checking for needed input bytes.
--
-- The syntax is slightly magical, it overloads the usual case
-- expression. An example:
--
-- -- $(switch [| case _ of -- "foo" -> pure True -- "bar" -> pure False |]) ---- -- The underscore is mandatory in case _ of. Each branch must be -- a string literal, but optionally we may have a default case, like in -- --
-- $(switch [| case _ of -- "foo" -> pure 10 -- "bar" -> pure 20 -- _ -> pure 30 |]) ---- -- All case right hand sides must be parsers with the same type. That -- type is also the type of the whole switch expression. -- -- A switch has longest match semantics, and the order of cases -- does not matter, except for the default case, which may only appear as -- the last case. -- -- If a switch does not have a default case, and no case matches -- the input, then it returns with failure, without having consumed any -- input. A fallthrough to the default case also does not consume any -- input. switch :: Q Exp -> Q Exp -- | Switch expression with an optional first argument for performing a -- post-processing action after every successful branch matching, not -- including the default branch. For example, if we have ws :: Parser -- e () for a whitespace parser, we might want to consume whitespace -- after matching on any of the switch cases. For that case, we can -- define a "lexeme" version of switch as follows. -- --
-- switch' :: Q Exp -> Q Exp -- switch' = switchWithPost (Just [| ws |]) ---- -- Note that this switch' function cannot be used in the same -- module it's defined in, because of the stage restriction of Template -- Haskell. switchWithPost :: Maybe (Q Exp) -> Q Exp -> Q Exp -- | Version of switchWithPost without syntactic sugar. The second -- argument is the list of cases, the third is the default case. rawSwitchWithPost :: Maybe (Q Exp) -> [(String, Q Exp)] -> Maybe (Q Exp) -> Q Exp -- | Parse a UTF-8 Char for which a predicate holds. satisfy :: (Char -> Bool) -> Parser e Char -- | Skip a UTF-8 Char for which a predicate holds. satisfy_ :: (Char -> Bool) -> Parser e () -- | Parse an ASCII Char for which a predicate holds. Assumption: -- the predicate must only return True for ASCII-range characters. -- Otherwise this function might read a 128-255 range byte, thereby -- breaking UTF-8 decoding. satisfyASCII :: (Char -> Bool) -> Parser e Char -- | Skip an ASCII Char for which a predicate holds. Assumption: the -- predicate must only return True for ASCII-range characters. satisfyASCII_ :: (Char -> Bool) -> Parser e () -- | This is a variant of satisfy which allows more optimization. We -- can pick four testing functions for the four cases for the possible -- number of bytes in the UTF-8 character. So in fusedSatisfy f1 f2 -- f3 f4, if we read a one-byte character, the result is scrutinized -- with f1, for two-bytes, with f2, and so on. This can -- result in dramatic lexing speedups. -- -- For example, if we want to accept any letter, the naive solution would -- be to use isLetter, but this accesses a large lookup table of -- Unicode character classes. We can do better with fusedSatisfy -- isLatinLetter isLetter isLetter isLetter, since here the -- isLatinLetter is inlined into the UTF-8 decoding, and it -- probably handles a great majority of all cases without accessing the -- character table. fusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> Parser e Char -- | Skipping variant of fusedSatisfy. fusedSatisfy_ :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> Parser e () -- | Parse any Word8 (byte). anyWord8 :: Parser e Word8 -- | Skip any Word8 (byte). anyWord8_ :: Parser e () -- | Parse any Word16. anyWord16 :: Parser e Word16 -- | Skip any Word16. anyWord16_ :: Parser e () -- | Parse any Word32. anyWord32 :: Parser e Word32 -- | Skip any Word32. anyWord32_ :: Parser e () -- | Parse any Word64. anyWord64 :: Parser e Word64 -- | Skip any Word64. anyWord64_ :: Parser e () -- | Parse any Word. anyWord :: Parser e Word -- | Skip any Word. anyWord_ :: Parser e () -- | Parse any Int8. anyInt8 :: Parser e Int8 -- | Parse any Int16. anyInt16 :: Parser e Int16 -- | Parse any Int32. anyInt32 :: Parser e Int32 -- | Parse any Int64. anyInt64 :: Parser e Int64 -- | Parse any Int. anyInt :: Parser e Int -- | Parse any UTF-8-encoded Char. anyChar :: Parser e Char -- | Skip any UTF-8-encoded Char. anyChar_ :: Parser e () -- | Parse any Char in the ASCII range, fail if the next input -- character is not in the range. This is more efficient than -- anyChar if we are only working with ASCII. anyCharASCII :: Parser e Char -- | Skip any Char in the ASCII range. More efficient than -- anyChar_ if we're working only with ASCII. anyCharASCII_ :: Parser e () -- |
-- isDigit c = '0' <= c && c <= '9' --isDigit :: Char -> Bool -- |
-- isGreekLetter c = ('Α' <= c && c <= 'Ω') || ('α' <= c && c <= 'ω')
--
isGreekLetter :: Char -> Bool
-- |
-- isLatinLetter c = ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')
--
isLatinLetter :: Char -> Bool
-- | Read a non-negative Int from the input, as a non-empty digit
-- sequence. The Int may overflow in the result.
readInt :: Parser e Int
-- | Read a non-negative Integer from the input, as a non-empty
-- digit sequence.
readInteger :: Parser e Integer
-- | Read a null-terminated bytestring (a C-style string).
--
-- Consumes the null terminator.
anyCString :: Parser e ByteString
-- | Parse any Word16 (little-endian).
anyWord16le :: Parser e Word16
-- | Parse any Word16 (big-endian).
anyWord16be :: Parser e Word16
-- | Parse any Word32 (little-endian).
anyWord32le :: Parser e Word32
-- | Parse any Word32 (big-endian).
anyWord32be :: Parser e Word32
-- | Parse any Word64 (little-endian).
anyWord64le :: Parser e Word64
-- | Parse any Word64 (big-endian).
anyWord64be :: Parser e Word64
-- | Parse any Int16 (little-endian).
anyInt16le :: Parser e Int16
-- | Parse any Int16 (big-endian).
anyInt16be :: Parser e Int16
-- | Parse any Int32 (little-endian).
anyInt32le :: Parser e Int32
-- | Parse any Int32 (big-endian).
anyInt32be :: Parser e Int32
-- | Parse any Int64 (little-endian).
anyInt64le :: Parser e Int64
-- | Parse any Int64 (big-endian).
anyInt64be :: Parser e Int64
-- | Choose between two parsers. If the first parser fails, try the second
-- one, but if the first one throws an error, propagate the error.
(<|>) :: Parser e a -> Parser e a -> Parser e a
infixr 6 <|>
-- | Branch on a parser: if the first argument succeeds, continue with the
-- second, else with the third. This can produce slightly more efficient
-- code than (<|>). Moreover, ḃranch does not
-- backtrack from the true/false cases.
branch :: Parser e a -> Parser e b -> Parser e b -> Parser e b
-- | An analogue of the list foldl function: first parse a
-- b, then parse zero or more a-s, and combine the
-- results in a left-nested way by the b -> a -> b
-- function. Note: this is not the usual chainl function from the
-- parsec libraries!
chainl :: (b -> a -> b) -> Parser e b -> Parser e a -> Parser e b
-- | An analogue of the list foldr function: parse zero or more
-- a-s, terminated by a b, and combine the results in a
-- right-nested way using the a -> b -> b function. Note:
-- this is not the usual chainr function from the parsec
-- libraries!
chainr :: (a -> b -> b) -> Parser e a -> Parser e b -> Parser e b
-- | Run a parser zero or more times, collect the results in a list. Note:
-- for optimal performance, try to avoid this. Often it is possible to
-- get rid of the intermediate list by using a combinator or a custom
-- parser.
many :: Parser e a -> Parser e [a]
-- | Skip a parser zero or more times.
many_ :: Parser e a -> Parser e ()
-- | Run a parser one or more times, collect the results in a list. Note:
-- for optimal performance, try to avoid this. Often it is possible to
-- get rid of the intermediate list by using a combinator or a custom
-- parser.
some :: Parser e a -> Parser e [a]
-- | Skip a parser one or more times.
some_ :: Parser e a -> Parser e ()
-- | Succeed if the first parser succeeds and the second one fails.
notFollowedBy :: Parser e a -> Parser e b -> Parser e a
-- | isolate n p runs the parser p isolated to the next
-- n bytes. All isolated bytes must be consumed.
--
-- Throws a runtime error if given a negative integer.
isolate :: Int -> Parser e a -> Parser e a
-- | Byte offset counted backwards from the end of the buffer.
newtype Pos
Pos :: Int -> Pos
-- | A pair of positions.
data Span
Span :: !Pos -> !Pos -> Span
-- | Get the current position in the input.
getPos :: Parser e Pos
-- | Set the input position. Warning: this can result in crashes if the
-- position points outside the current buffer. It is always safe to
-- setPos values which came from getPos with the current
-- input.
setPos :: Pos -> Parser e ()
-- | The end of the input.
endPos :: Pos
-- | Return the consumed span of a parser.
spanOf :: Parser e a -> Parser e Span
-- | Bind the result together with the span of the result. CPS'd version of
-- spanOf for better unboxing.
withSpan :: Parser e a -> (a -> Span -> Parser e b) -> Parser e b
-- | Return the ByteString consumed by a parser. Note: it's more
-- efficient to use spanOf and withSpan instead.
byteStringOf :: Parser e a -> Parser e ByteString
-- | CPS'd version of byteStringOf. Can be more efficient, because
-- the result is more eagerly unboxed by GHC. It's more efficient to use
-- spanOf or withSpan instead.
withByteString :: Parser e a -> (a -> ByteString -> Parser e b) -> Parser e b
-- | Run a parser in a given input span. The input position and the
-- Int state is restored after the parser is finished, so
-- inSpan does not consume input and has no side effect. Warning:
-- this operation may crash if the given span points outside the current
-- parsing buffer. It's always safe to use inSpan if the span
-- comes from a previous withSpan or spanOf call on the
-- current input.
inSpan :: Span -> Parser e a -> Parser e a
-- | Check whether a Pos points into a ByteString.
validPos :: ByteString -> Pos -> Bool
-- | Compute corresponding line and column numbers for each Pos in a
-- list. Throw an error on invalid positions. Note: computing lines and
-- columns may traverse the ByteString, but it traverses it only
-- once regardless of the length of the position list.
posLineCols :: ByteString -> [Pos] -> [(Int, Int)]
-- | Create a ByteString from a Span. The result is invalid
-- if the Span points outside the current buffer, or if the
-- Span start is greater than the end position.
unsafeSpanToByteString :: Span -> Parser e ByteString
-- | Slice into a ByteString using a Span. The result is
-- invalid if the Span is not a valid slice of the first argument.
unsafeSlice :: ByteString -> Span -> ByteString
-- | Create a Pos from a line and column number. Throws an error on
-- out-of-bounds line and column numbers.
mkPos :: ByteString -> (Int, Int) -> Pos
-- | Break an UTF-8-coded ByteString to lines. Throws an error on
-- invalid input. This is mostly useful for grabbing specific source
-- lines for displaying error messages.
lines :: ByteString -> [String]
-- | Parse the rest of the current line as a String. Assumes UTF-8
-- encoding, throws an error if the encoding is invalid.
takeLine :: Parser e String
-- | Parse the rest of the current line as a String, but restore the
-- parsing state. Assumes UTF-8 encoding. This can be used for debugging.
traceLine :: Parser e String
-- | Take the rest of the input as a String. Assumes UTF-8 encoding.
takeRest :: Parser e String
-- | Get the rest of the input as a String, but restore the parsing
-- state. Assumes UTF-8 encoding. This can be used for debugging.
traceRest :: Parser e String
-- | Convert a String to an UTF-8-coded ByteString.
packUTF8 :: String -> ByteString
-- | Convert an UTF-8-coded ByteString to a String.
unpackUTF8 :: ByteString -> String
-- | Check that the input has at least the given number of bytes.
ensureBytes# :: Int -> Parser e ()
-- | Read the given number of bytes as a ByteString.
--
-- Throws a runtime error if given a negative integer.
takeBs# :: Int# -> Parser e ByteString
-- | Skip forward n bytes and run the given parser. Fails if fewer
-- than n bytes are available.
--
-- Throws a runtime error if given a negative integer.
atSkip# :: Int# -> Parser e a -> Parser e a
-- | Decrease the current input position by the given number of bytes.
setBack# :: Int -> Parser e ()
-- | Run a parser, passing it the current address the parser is at.
--
-- Useful for parsing offset-based data tables. For example, you may use
-- this to save the base address to use together with various 0-indexed
-- offsets.
withAddr# :: (Addr# -> Parser e a) -> Parser e a
-- | takeBsOffAddr offset moves to addr#, skips
-- offset# bytes, reads len# bytes into a
-- ByteString, and restores the original address.
--
-- The Addr# should be from withAddr#.
--
-- Useful for parsing offset-based data tables. For example, you may use
-- this together with withAddr# to jump to an offset in your input
-- and read some data.
takeBsOffAddr# :: Addr# -> Int# -> Int# -> Parser e ByteString
-- | lookahead, but specify the address to lookahead from.
--
-- The Addr# should be from withAddr#.
lookaheadFromAddr# :: Addr# -> Parser e a -> Parser e a
-- | Run a parser at the given address.
--
-- The Addr# should be from withAddr#.
--
-- This is a highly internal function -- you likely want
-- lookaheadFromAddr#, which will reset the address after running
-- the parser.
atAddr# :: Addr# -> Parser e a -> Parser e a
withAnyWord8# :: (Word8'# -> Parser e a) -> Parser e a
withAnyWord16# :: (Word16'# -> Parser e a) -> Parser e a
withAnyWord32# :: (Word32'# -> Parser e a) -> Parser e a
withAnyWord64# :: (Word# -> Parser e a) -> Parser e a
withAnyInt8# :: (Int8'# -> Parser e a) -> Parser e a
withAnyInt16# :: (Int16'# -> Parser e a) -> Parser e a
withAnyInt32# :: (Int32'# -> Parser e a) -> Parser e a
withAnyInt64# :: (Int# -> Parser e a) -> Parser e a
-- | Read a null-terminated bytestring (a C-style string), where the
-- bytestring is known to be null-terminated somewhere in the input.
--
-- Highly unsafe. Unless you have a guarantee that the string will be
-- null terminated before the input ends, use anyCString instead.
-- Honestly, I'm not sure if this is a good function to define. But here
-- it is.
--
-- Fails on GHC versions older than 9.0, since we make use of the
-- cstringLength# primop introduced in GHC 9.0, and we aren't
-- very useful without it.
--
-- Consumes the null terminator.
anyCStringUnsafe :: Parser e ByteString
-- | Unsafely read a concrete byte from the input. It's not checked that
-- the input has enough bytes.
scan8# :: Word8 -> Parser e ()
-- | Unsafely read two concrete bytes from the input. It's not checked that
-- the input has enough bytes.
scan16# :: Word16 -> Parser e ()
-- | Unsafely read four concrete bytes from the input. It's not checked
-- that the input has enough bytes.
scan32# :: Word32 -> Parser e ()
-- | Unsafely read eight concrete bytes from the input. It's not checked
-- that the input has enough bytes.
scan64# :: Word -> Parser e ()
-- | Unsafely read and return a byte from the input. It's not checked that
-- the input is non-empty.
scanAny8# :: Parser e Word8
-- | Template function, creates a Parser e () which unsafely scans
-- a given sequence of bytes.
scanBytes# :: [Word] -> Q Exp
instance (GHC.Show.Show a, GHC.Show.Show e) => GHC.Show.Show (FlatParse.Basic.Result e a)
instance GHC.Base.Functor (FlatParse.Basic.Result e)
instance GHC.Base.Functor (FlatParse.Basic.Parser e)
instance GHC.Base.Applicative (FlatParse.Basic.Parser e)
instance GHC.Base.Monad (FlatParse.Basic.Parser e)
-- | This module contains lexer and error message primitives for a simple
-- lambda calculus parser. It demonstrates a simple but decently
-- informative implementation of error message propagation.
module FlatParse.Examples.BasicLambda.Lexer
-- | An expected item which is displayed in error messages.
data Expected
-- | An error message.
Msg :: String -> Expected
-- | A literal expected thing.
Lit :: String -> Expected
-- | A parsing error.
data Error
-- | A precisely known error, like leaving out "in" from "let".
Precise :: Pos -> Expected -> Error
-- | An imprecise error, when we expect a number of different things, but
-- parse something else.
Imprecise :: Pos -> [Expected] -> Error
errorPos :: Error -> Pos
-- | Merge two errors. Inner errors (which were thrown at points with more
-- consumed inputs) are preferred. If errors are thrown at identical
-- input positions, we prefer precise errors to imprecise ones.
--
-- The point of prioritizing inner and precise errors is to suppress the
-- deluge of "expected" items, and instead try to point to a concrete
-- issue to fix.
merge :: Error -> Error -> Error
type Parser = Parser Error
-- | Pretty print an error. The ByteString input is the source file.
-- The offending line from the source is displayed in the output.
prettyError :: ByteString -> Error -> String
-- | Imprecise cut: we slap a list of items on inner errors.
cut :: Parser a -> [Expected] -> Parser a
-- | Precise cut: we propagate at most a single error.
cut' :: Parser a -> Expected -> Parser a
runParser :: Parser a -> ByteString -> Result Error a
-- | Run parser, print pretty error on failure.
testParser :: Show a => Parser a -> String -> IO ()
-- | Parse a line comment.
lineComment :: Parser ()
-- | Parse a potentially nested multiline comment.
multilineComment :: Parser ()
-- | Consume whitespace.
ws :: Parser ()
-- | Consume whitespace after running a parser.
token :: Parser a -> Parser a
-- | Read a starting character of an identifier.
identStartChar :: Parser Char
-- | Read a non-starting character of an identifier.
identChar :: Parser Char
-- | Check whether a Span contains exactly a keyword. Does not
-- change parsing state.
isKeyword :: Span -> Parser ()
-- | Parse a non-keyword string.
symbol :: String -> Q Exp
-- | Parser a non-keyword string, throw precise error on failure.
symbol' :: String -> Q Exp
-- | Parse a keyword string.
keyword :: String -> Q Exp
-- | Parse a keyword string, throw precise error on failure.
keyword' :: String -> Q Exp
instance GHC.Classes.Ord FlatParse.Examples.BasicLambda.Lexer.Expected
instance GHC.Show.Show FlatParse.Examples.BasicLambda.Lexer.Expected
instance GHC.Classes.Eq FlatParse.Examples.BasicLambda.Lexer.Expected
instance GHC.Show.Show FlatParse.Examples.BasicLambda.Lexer.Error
instance Data.String.IsString FlatParse.Examples.BasicLambda.Lexer.Expected
-- | This module contains a simple lambda calculus parser. This parser is
-- not optimized for maximum performance; instead it's written in a style
-- which emulates the look and feel of conventional monadic parsers. An
-- optimized implementation would use low-level switch expressions
-- more often.
module FlatParse.Examples.BasicLambda.Parser
type Name = ByteString
-- | A term in the language. The precedences of different constructs are
-- the following, in decreasing order of strength:
--
-- -- x --Var :: Name -> Tm -- |
-- t u --App :: Tm -> Tm -> Tm -- |
-- lam x. t --Lam :: Name -> Tm -> Tm -- |
-- let x = t in u --Let :: Name -> Tm -> Tm -> Tm -- | true or false. BoolLit :: Bool -> Tm -- | A positive Int literal. IntLit :: Int -> Tm -- |
-- if t then u else v --If :: Tm -> Tm -> Tm -> Tm -- |
-- t + u --Add :: Tm -> Tm -> Tm -- |
-- t * u --Mul :: Tm -> Tm -> Tm -- |
-- t == u --Eq :: Tm -> Tm -> Tm -- |
-- t < u --Lt :: Tm -> Tm -> Tm -- | Parse an identifier. This parser uses isKeyword to check that -- an identifier is not a keyword. ident :: Parser Name -- | Parse an identifier, throw a precise error on failure. ident' :: Parser Name digit :: Parser Int int :: Parser Int -- | Parse a literal, identifier or parenthesized expression. atom :: Parser Tm atom' :: Parser Tm -- | Parse an App-level expression. app' :: Parser Tm -- | Parse a Mul-level expression. mul' :: Parser Tm -- | Parse an Add-level expression. add' :: Parser Tm -- | Parse an Eq or Lt-level expression. eqLt' :: Parser Tm -- | Parse a Let. pLet :: Parser Tm -- | Parse a Lam. lam :: Parser Tm -- | Parse an If. pIf :: Parser Tm -- | Parse any Tm. tm' :: Parser Tm -- | Parse a complete source file. src' :: Parser Tm p1 :: String instance GHC.Show.Show FlatParse.Examples.BasicLambda.Parser.Tm -- | This module implements a Parser supporting a custom reader -- environment, custom error types and an Int state. module FlatParse.Stateful -- | Parser r e a has a reader environment r, error type -- e and a return type a. newtype Parser r e a Parser :: (ForeignPtrContents -> r -> Addr# -> Addr# -> Int# -> Res# e a) -> Parser r e a [runParser#] :: Parser r e a -> ForeignPtrContents -> r -> Addr# -> Addr# -> Int# -> Res# e a -- | Primitive result of a parser. Possible results are given by -- OK#, Err# and Fail# pattern synonyms. type Res# e a = (# (# a, Addr#, Int# #) | (# #) | (# e #) #) -- | Contains return value, pointer to the rest of the input buffer and the -- nex Int state. pattern OK# :: a -> Addr# -> Int# -> Res# e a -- | Constructor for recoverable failure. pattern Fail# :: Res# e a -- | Constructor for errors which are by default non-recoverable. pattern Err# :: e -> Res# e a -- | Higher-level boxed data type for parsing results. data Result e a -- | Contains return value, last Int state, unconsumed input. OK :: a -> Int -> !ByteString -> Result e a -- | Recoverable-by-default failure. Fail :: Result e a -- | Unrecoverble-by-default error. Err :: !e -> Result e a -- | Run a parser. The Int argument is the initial state. runParser :: Parser r e a -> r -> Int -> ByteString -> Result e a -- | Run a parser on a String input. Reminder: -- OverloadedStrings for ByteString does not yield a -- valid UTF-8 encoding! For non-ASCII ByteString literal input, -- use runParserS or packUTF8 for testing. runParserS :: Parser r e a -> r -> Int -> String -> Result e a -- | Query the Int state. get :: Parser r e Int -- | Write the Int state. put :: Int -> Parser r e () -- | Modify the Int state. modify :: (Int -> Int) -> Parser r e () -- | Query the environment. ask :: Parser r e r -- | Run a parser in a modified environment. local :: (r -> r) -> Parser r e a -> Parser r e a -- | The failing parser. By default, parser choice (<|>) -- arbitrarily backtracks on parser failure. empty :: Parser r e a -- | Throw a parsing error. By default, parser choice (<|>) -- can't backtrack on parser error. Use try to convert an error to -- a recoverable failure. err :: e -> Parser r e a -- | Save the parsing state, then run a parser, then restore the state. lookahead :: Parser r e a -> Parser r e a -- | Convert a parsing failure to a success. fails :: Parser r e a -> Parser r e () -- | Convert a parsing error into failure. try :: Parser r e a -> Parser r e a -- | Convert a parsing failure to a Maybe. If possible, use -- withOption instead. optional :: Parser r e a -> Parser r e (Maybe a) -- | Convert a parsing failure to a (). optional_ :: Parser r e a -> Parser r e () -- | CPS'd version of optional. This is usually more efficient, -- since it gets rid of the extra Maybe allocation. withOption :: Parser r e a -> (a -> Parser r e b) -> Parser r e b -> Parser r e b -- | Convert a parsing failure to an error. cut :: Parser r e a -> e -> Parser r e a -- | Run the parser, if we get a failure, throw the given error, but if we -- get an error, merge the inner and the newly given errors using the -- e -> e -> e function. This can be useful for -- implementing parsing errors which may propagate hints or accummulate -- contextual information. cutting :: Parser r e a -> e -> (e -> e -> e) -> Parser r e a -- | Succeed if the input is empty. eof :: Parser r e () -- | Read the given number of bytes as a ByteString. -- -- Throws a runtime error if given a negative integer. takeBs :: Int -> Parser r e ByteString -- | Consume the rest of the input. May return the empty bytestring. takeRestBs :: Parser r e ByteString -- | Parse a UTF-8 character literal. This is a template function, you can -- use it as $(char 'x'), for example, and the splice in this -- case has type Parser r e (). char :: Char -> Q Exp -- | Read a byte. byte :: Word8 -> Parser r e () -- | Read a sequence of bytes. This is a template function, you can use it -- as $(bytes [3, 4, 5]), for example, and the splice has type -- Parser r e (). bytes :: [Word] -> Q Exp -- | Parse a UTF-8 string literal. This is a template function, you can use -- it as $(string "foo"), for example, and the splice has type -- Parser r e (). string :: String -> Q Exp -- | This is a template function which makes it possible to branch on a -- collection of string literals in an efficient way. By using -- switch, such branching is compiled to a trie of primitive -- parsing operations, which has optimized control flow, vectorized reads -- and grouped checking for needed input bytes. -- -- The syntax is slightly magical, it overloads the usual case -- expression. An example: -- --
-- $(switch [| case _ of -- "foo" -> pure True -- "bar" -> pure False |]) ---- -- The underscore is mandatory in case _ of. Each branch must be -- a string literal, but optionally we may have a default case, like in -- --
-- $(switch [| case _ of -- "foo" -> pure 10 -- "bar" -> pure 20 -- _ -> pure 30 |]) ---- -- All case right hand sides must be parsers with the same type. That -- type is also the type of the whole switch expression. -- -- A switch has longest match semantics, and the order of cases -- does not matter, except for the default case, which may only appear as -- the last case. -- -- If a switch does not have a default case, and no case matches -- the input, then it returns with failure, without having consumed any -- input. A fallthrough to the default case also does not consume any -- input. switch :: Q Exp -> Q Exp -- | Switch expression with an optional first argument for performing a -- post-processing action after every successful branch matching. For -- example, if we have ws :: Parser r e () for a whitespace -- parser, we might want to consume whitespace after matching on any of -- the switch cases. For that case, we can define a "lexeme" version of -- switch as follows. -- --
-- switch' :: Q Exp -> Q Exp -- switch' = switchWithPost (Just [| ws |]) ---- -- Note that this switch' function cannot be used in the same -- module it's defined in, because of the stage restriction of Template -- Haskell. switchWithPost :: Maybe (Q Exp) -> Q Exp -> Q Exp -- | Version of switchWithPost without syntactic sugar. The second -- argument is the list of cases, the third is the default case. rawSwitchWithPost :: Maybe (Q Exp) -> [(String, Q Exp)] -> Maybe (Q Exp) -> Q Exp -- | Parse a UTF-8 Char for which a predicate holds. satisfy :: (Char -> Bool) -> Parser r e Char -- | Skip a UTF-8 Char for which a predicate holds. satisfy_ :: (Char -> Bool) -> Parser r e () -- | Parse an ASCII Char for which a predicate holds. Assumption: -- the predicate must only return True for ASCII-range characters. -- Otherwise this function might read a 128-255 range byte, thereby -- breaking UTF-8 decoding. satisfyASCII :: (Char -> Bool) -> Parser r e Char -- | Skip an ASCII Char for which a predicate holds. Assumption: the -- predicate must only return True for ASCII-range characters. satisfyASCII_ :: (Char -> Bool) -> Parser r e () -- | This is a variant of satisfy which allows more optimization. We -- can pick four testing functions for the four cases for the possible -- number of bytes in the UTF-8 character. So in fusedSatisfy f1 f2 -- f3 f4, if we read a one-byte character, the result is scrutinized -- with f1, for two-bytes, with f2, and so on. This can -- result in dramatic lexing speedups. -- -- For example, if we want to accept any letter, the naive solution would -- be to use isLetter, but this accesses a large lookup table of -- Unicode character classes. We can do better with fusedSatisfy -- isLatinLetter isLetter isLetter isLetter, since here the -- isLatinLetter is inlined into the UTF-8 decoding, and it -- probably handles a great majority of all cases without accessing the -- character table. fusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> Parser r e Char -- | Skipping variant of fusedSatisfy. fusedSatisfy_ :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> Parser r e () -- | Parse any Word8 (byte). anyWord8 :: Parser r e Word8 -- | Skip any Word8 (byte). anyWord8_ :: Parser r e () -- | Parse any Word16. anyWord16 :: Parser r e Word16 -- | Skip any Word16. anyWord16_ :: Parser r e () -- | Parse any Word32. anyWord32 :: Parser r e Word32 -- | Skip any Word32. anyWord32_ :: Parser r e () -- | Parse any Word64. anyWord64 :: Parser r e Word64 -- | Skip any Word64. anyWord64_ :: Parser r e () -- | Parse any Word. anyWord :: Parser r e Word -- | Skip any Word. anyWord_ :: Parser r e () -- | Parse any Int8. anyInt8 :: Parser r e Int8 -- | Parse any Int16. anyInt16 :: Parser r e Int16 -- | Parse any Int32. anyInt32 :: Parser r e Int32 -- | Parse any Int64. anyInt64 :: Parser r e Int64 -- | Parse any Int. anyInt :: Parser r e Int -- | Parse any UTF-8-encoded Char. anyChar :: Parser r e Char -- | Skip any UTF-8-encoded Char. anyChar_ :: Parser r e () -- | Parse any Char in the ASCII range, fail if the next input -- character is not in the range. This is more efficient than -- anyChar if we are only working with ASCII. anyCharASCII :: Parser r e Char -- | Skip any Char in the ASCII range. More efficient than -- anyChar_ if we're working only with ASCII. anyCharASCII_ :: Parser r e () -- |
-- isDigit c = '0' <= c && c <= '9' --isDigit :: Char -> Bool -- |
-- isGreekLetter c = ('Α' <= c && c <= 'Ω') || ('α' <= c && c <= 'ω')
--
isGreekLetter :: Char -> Bool
-- |
-- isLatinLetter c = ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')
--
isLatinLetter :: Char -> Bool
-- | Read an Int from the input, as a non-empty digit sequence. The
-- Int may overflow in the result.
readInt :: Parser r e Int
-- | Read an Integer from the input, as a non-empty digit sequence.
readInteger :: Parser r e Integer
-- | Parse any Word16 (little-endian).
anyWord16le :: Parser r e Word16
-- | Parse any Word16 (big-endian).
anyWord16be :: Parser r e Word16
-- | Parse any Word32 (little-endian).
anyWord32le :: Parser r e Word32
-- | Parse any Word32 (big-endian).
anyWord32be :: Parser r e Word32
-- | Parse any Word64 (little-endian).
anyWord64le :: Parser r e Word64
-- | Parse any Word64 (big-endian).
anyWord64be :: Parser r e Word64
-- | Parse any Int16 (little-endian).
anyInt16le :: Parser r e Int16
-- | Parse any Int16 (big-endian).
anyInt16be :: Parser r e Int16
-- | Parse any Int32 (little-endian).
anyInt32le :: Parser r e Int32
-- | Parse any Int32 (big-endian).
anyInt32be :: Parser r e Int32
-- | Parse any Int64 (little-endian).
anyInt64le :: Parser r e Int64
-- | Parse any Int64 (big-endian).
anyInt64be :: Parser r e Int64
-- | Choose between two parsers. If the first parser fails, try the second
-- one, but if the first one throws an error, propagate the error.
(<|>) :: Parser r e a -> Parser r e a -> Parser r e a
infixr 6 <|>
-- | Branch on a parser: if the first argument succeeds, continue with the
-- second, else with the third. This can produce slightly more efficient
-- code than (<|>). Moreover, ḃranch does not
-- backtrack from the true/false cases.
branch :: Parser r e a -> Parser r e b -> Parser r e b -> Parser r e b
-- | An analogue of the list foldl function: first parse a
-- b, then parse zero or more a-s, and combine the
-- results in a left-nested way by the b -> a -> b
-- function. Note: this is not the usual chainl function from the
-- parsec libraries!
chainl :: (b -> a -> b) -> Parser r e b -> Parser r e a -> Parser r e b
-- | An analogue of the list foldr function: parse zero or more
-- a-s, terminated by a b, and combine the results in a
-- right-nested way using the a -> b -> b function. Note:
-- this is not the usual chainr function from the parsec
-- libraries!
chainr :: (a -> b -> b) -> Parser r e a -> Parser r e b -> Parser r e b
-- | Run a parser zero or more times, collect the results in a list. Note:
-- for optimal performance, try to avoid this. Often it is possible to
-- get rid of the intermediate list by using a combinator or a custom
-- parser.
many :: Parser r e a -> Parser r e [a]
-- | Skip a parser zero or more times.
many_ :: Parser r e a -> Parser r e ()
-- | Run a parser one or more times, collect the results in a list. Note:
-- for optimal performance, try to avoid this. Often it is possible to
-- get rid of the intermediate list by using a combinator or a custom
-- parser.
some :: Parser r e a -> Parser r e [a]
-- | Skip a parser one or more times.
some_ :: Parser r e a -> Parser r e ()
-- | Succeed if the first parser succeeds and the second one fails. The
-- parsing state is restored to the point of the first argument's
-- success.
notFollowedBy :: Parser r e a -> Parser r e b -> Parser r e a
-- | isolate n p runs the parser p isolated to the next
-- n bytes. All isolated bytes must be consumed.
--
-- Throws a runtime error if given a negative integer.
isolate :: Int -> Parser r e a -> Parser r e a
-- | Byte offset counted backwards from the end of the buffer.
newtype Pos
Pos :: Int -> Pos
-- | A pair of positions.
data Span
Span :: !Pos -> !Pos -> Span
-- | Get the current position in the input.
getPos :: Parser r e Pos
-- | Set the input position. Warning: this can result in crashes if the
-- position points outside the current buffer. It is always safe to
-- setPos values which came from getPos with the current
-- input.
setPos :: Pos -> Parser r e ()
-- | The end of the input.
endPos :: Pos
-- | Return the consumed span of a parser. Use withSpan if possible
-- for better efficiency.
spanOf :: Parser r e a -> Parser r e Span
-- | Bind the result together with the span of the result. CPS'd version of
-- spanOf for better unboxing.
withSpan :: Parser r e a -> (a -> Span -> Parser r e b) -> Parser r e b
-- | Return the ByteString consumed by a parser. Note: it's more
-- efficient to use spanOf and withSpan instead.
byteStringOf :: Parser r e a -> Parser r e ByteString
-- | CPS'd version of byteStringOf. Can be more efficient, because
-- the result is more eagerly unboxed by GHC. It's more efficient to use
-- spanOf or withSpan instead.
withByteString :: Parser r e a -> (a -> ByteString -> Parser r e b) -> Parser r e b
-- | Run a parser in a given input span. The input position and the
-- Int state is restored after the parser is finished, so
-- inSpan does not consume input and has no side effect. Warning:
-- this operation may crash if the given span points outside the current
-- parsing buffer. It's always safe to use inSpan if the span
-- comes from a previous withSpan or spanOf call on the
-- current input.
inSpan :: Span -> Parser r e a -> Parser r e a
-- | Check whether a Pos points into a ByteString.
validPos :: ByteString -> Pos -> Bool
-- | Compute corresponding line and column numbers for each Pos in a
-- list. Throw an error on invalid positions. Note: computing lines and
-- columns may traverse the ByteString, but it traverses it only
-- once regardless of the length of the position list.
posLineCols :: ByteString -> [Pos] -> [(Int, Int)]
-- | Create a ByteString from a Span. The result is invalid
-- is the Span points outside the current buffer, or if the
-- Span start is greater than the end position.
unsafeSpanToByteString :: Span -> Parser r e ByteString
-- | Slice into a ByteString using a Span. The result is
-- invalid if the Span is not a valid slice of the first argument.
unsafeSlice :: ByteString -> Span -> ByteString
-- | Create a Pos from a line and column number. Throws an error on
-- out-of-bounds line and column numbers.
mkPos :: ByteString -> (Int, Int) -> Pos
-- | Break an UTF-8-coded ByteString to lines. Throws an error on
-- invalid input. This is mostly useful for grabbing specific source
-- lines for displaying error messages.
lines :: ByteString -> [String]
-- | Parse the rest of the current line as a String. Assumes UTF-8
-- encoding, throws an error if the encoding is invalid.
takeLine :: Parser r e String
-- | Parse the rest of the current line as a String, but restore the
-- parsing state. Assumes UTF-8 encoding. This can be used for debugging.
traceLine :: Parser r e String
-- | Take the rest of the input as a String. Assumes UTF-8 encoding.
takeRest :: Parser r e String
-- | Get the rest of the input as a String, but restore the parsing
-- state. Assumes UTF-8 encoding. This can be used for debugging.
traceRest :: Parser r e String
-- | Convert a String to an UTF-8-coded ByteString.
packUTF8 :: String -> ByteString
-- | Convert an UTF-8-coded ByteString to a String.
unpackUTF8 :: ByteString -> String
-- | Check that the input has at least the given number of bytes.
ensureBytes# :: Int -> Parser r e ()
-- | Unsafely read a concrete byte from the input. It's not checked that
-- the input has enough bytes.
scan8# :: Word8 -> Parser r e ()
-- | Unsafely read two concrete bytes from the input. It's not checked that
-- the input has enough bytes.
scan16# :: Word16 -> Parser r e ()
-- | Unsafely read four concrete bytes from the input. It's not checked
-- that the input has enough bytes.
scan32# :: Word32 -> Parser r e ()
-- | Unsafely read eight concrete bytes from the input. It's not checked
-- that the input has enough bytes.
scan64# :: Word -> Parser r e ()
-- | Unsafely read and return a byte from the input. It's not checked that
-- the input is non-empty.
scanAny8# :: Parser r e Word8
-- | Template function, creates a Parser r e () which unsafely
-- scans a given sequence of bytes.
scanBytes# :: [Word] -> Q Exp
-- | Decrease the current input position by the given number of bytes.
setBack# :: Int -> Parser r e ()
withAnyWord8# :: (Word8'# -> Parser r e a) -> Parser r e a
withAnyWord16# :: (Word16'# -> Parser r e a) -> Parser r e a
withAnyWord32# :: (Word32'# -> Parser r e a) -> Parser r e a
withAnyWord64# :: (Word# -> Parser r e a) -> Parser r e a
withAnyInt8# :: (Int8'# -> Parser r e a) -> Parser r e a
withAnyInt16# :: (Int16'# -> Parser r e a) -> Parser r e a
withAnyInt32# :: (Int32'# -> Parser r e a) -> Parser r e a
withAnyInt64# :: (Int# -> Parser r e a) -> Parser r e a
instance (GHC.Show.Show a, GHC.Show.Show e) => GHC.Show.Show (FlatParse.Stateful.Result e a)
instance GHC.Base.Functor (FlatParse.Stateful.Result e)
instance GHC.Base.Functor (FlatParse.Stateful.Parser r e)
instance GHC.Base.Applicative (FlatParse.Stateful.Parser r e)
instance GHC.Base.Monad (FlatParse.Stateful.Parser r e)