-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | High-performance parsing from strict bytestrings -- -- Flatparse is a high-performance parsing library for strict -- bytestring input. See the README for more information: -- https://github.com/AndrasKovacs/flatparse. @package flatparse @version 0.5.2.1 module FlatParse.Common.Assorted shortInteger :: Int# -> Integer -- |
-- isDigit c = '0' <= c && c <= '9' --isDigit :: Char -> Bool -- |
-- isLatinLetter c = ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')
--
isLatinLetter :: Char -> Bool
-- |
-- isGreekLetter c = ('Α' <= c && c <= 'Ω') || ('α' <= c && c <= 'ω')
--
isGreekLetter :: Char -> Bool
packBytes :: [Word] -> Word
splitBytes :: [Word] -> ([Word], [Word])
charToBytes :: Char -> [Word]
strToBytes :: String -> [Word]
-- | Convert an UTF8-encoded String to a ByteString.
strToUtf8 :: String -> ByteString
-- | Convert a ByteString to an UTF8-encoded String.
utf8ToStr :: ByteString -> String
-- | Shortcut for 'indexCharOffAddr# addr# 0#'.
derefChar8# :: Addr# -> Char#
-- | Coerce a Word16 to Int16.
word16ToInt16 :: Word16 -> Int16
-- | Coerce a Word32 to Int32.
word32ToInt32 :: Word32 -> Int32
-- | Coerce a Word64 to Int64.
word64ToInt64 :: Word64 -> Int64
-- | Assert for the given Int# that n >= 0.
--
-- Throws a runtime error if given a negative integer.
withPosInt# :: Int# -> r -> r
-- | Unwrap the Int# from an Int and apply it to the given
-- function.
withIntUnwrap# :: (Int# -> r) -> Int -> r
-- | Index of leftmost null byte, or (number of bytes in type) if not
-- present.
--
-- Adapted from Hacker's Delight 6-1. Useful in big-endian environments.
zbytel :: (FiniteBits a, Num a) => a -> Int
-- | bit mangling, returns 0 for inputs without a null byte
--
-- Separating allows us to skip some index calculation if there was no
-- null byte.
zbytel'intermediate :: (FiniteBits a, Num a) => a -> a
-- | bit mangling, turns intermediate value into an index
--
-- Separating allows us to skip some index calculation if there was no
-- null byte.
zbytel'toIdx :: (FiniteBits a, Num a) => a -> Int
-- | Index of rightmost null byte, or (number of bytes in type) if not
-- present
--
-- Adapted from Hacker's Delight 6-1. Useful in little-endian
-- environments.
zbyter :: (FiniteBits a, Num a) => a -> Int
-- | bit mangling, returns 0 for inputs without a null byte
--
-- Separating allows us to skip some index calculation if there was no
-- null byte.
zbyter'intermediate :: (FiniteBits a, Num a) => a -> a
-- | bit mangling, turns intermediate value into an index
--
-- Separating allows us to skip some index calculation if there was no
-- null byte.
zbyter'toIdx :: (FiniteBits a, Num a) => a -> Int
-- | Exts compatibility wrapper.
module FlatParse.Common.GHCExts
module FlatParse.Common.Numbers
-- | Parse a non-empty ASCII decimal digit sequence as a Word. Fails
-- on overflow.
anyAsciiDecimalWord# :: Addr# -> Addr# -> (# (# #) | (# Word#, Addr# #) #)
-- | Parse a non-empty ASCII decimal digit sequence as a positive
-- Int. Fails on overflow.
anyAsciiDecimalInt# :: Addr# -> Addr# -> (# (# #) | (# Int#, Addr# #) #)
anyAsciiDecimalWord_# :: Word# -> Addr# -> Addr# -> (# (# #) | (# Word#, Addr# #) #)
-- | Parse a non-empty ASCII decimal digit sequence as a positive
-- Int. May overflow.
anyAsciiDecimalIntOverflow# :: Addr# -> Addr# -> (# (# #) | (# Int#, Addr# #) #)
-- | Parse a non-empty ASCII decimal digit sequence as a positive
-- Integer.
anyAsciiDecimalInteger# :: ForeignPtrContents -> Addr# -> Addr# -> (# (# #) | (# Integer, Addr# #) #)
-- | Parse a non-empty ASCII decimal digit sequence as a positive
-- Int. May overflow.
anyAsciiDecimalIntOverflow_# :: Int# -> Addr# -> Addr# -> (# Int#, Addr# #)
-- | Parse a non-empty, case-insensitive ASCII hexadecimal digit sequence
-- as a Word. Fails on overflow.
anyAsciiHexWord# :: Addr# -> Addr# -> (# (# #) | (# Word#, Addr# #) #)
-- | Parse a non-empty, case-insensitive ASCII hexadecimal digit sequence
-- as a positive Int. Fails on overflow.
anyAsciiHexInt# :: Addr# -> Addr# -> (# (# #) | (# Int#, Addr# #) #)
anyAsciiHexWord_# :: Word# -> Addr# -> Addr# -> (# (# #) | (# Word#, Addr# #) #)
fromZigzagNative :: Word -> Int
fromZigzagNative# :: Word# -> Int#
toZigzagNative :: Int -> Word
toZigzagNative# :: Int# -> Word#
toZigzagNative'# :: Word# -> Word#
-- | protobuf style (LE, redundant, on continues)
anyVarintProtobuf# :: Addr# -> Addr# -> (# (# #) | (# Int#, Addr#, Int# #) #)
unI# :: Int -> Int#
mul10# :: Int# -> Int#
-- | Common low-level parser definitions.
module FlatParse.Common.Parser
type PureMode = Proxy# Void
type IOMode = State# RealWorld
type STMode s = State# s
-- | Minimal parser definition.
module FlatParse.Basic.Parser
-- | ParserT st e a is a parser with a state token type
-- st, an error type e and a return type a.
-- The different state token types support different embedded effects;
-- see Parser, ParserIO and ParserST below.
newtype ParserT (st :: ZeroBitType) e a
ParserT :: (ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a) -> ParserT (st :: ZeroBitType) e a
[runParserT#] :: ParserT (st :: ZeroBitType) e a -> ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a
-- | The type of pure parsers.
type Parser = ParserT PureMode
-- | The type of parsers which can embed IO actions.
type ParserIO = ParserT IOMode
-- | The type of parsers which can embed ST actions.
type ParserST s = ParserT (STMode s)
-- | Same as pure for ParserT except that it does not force
-- the returned value.
pureLazy :: a -> ParserT st e a
-- | Primitive parser result wrapped with a state token.
--
-- You should rarely need to manipulate values of this type directly. Use
-- the provided bidirectional pattern synonyms OK#, Fail#
-- and Err#.
type Res# (st :: ZeroBitType) e a = (# st, ResI# e a #)
-- | Res# constructor for a successful parse. Contains the return
-- value and a pointer to the rest of the input buffer, plus a state
-- token.
pattern OK# :: (st :: ZeroBitType) -> a -> Addr# -> Res# st e a
-- | Res# constructor for errors which are by default
-- non-recoverable. Contains the error, plus a state token.
pattern Err# :: (st :: ZeroBitType) -> e -> Res# st e a
-- | Res# constructor for recoverable failure. Contains only a state
-- token.
pattern Fail# :: (st :: ZeroBitType) -> Res# st e a
-- | Primitive parser result.
type ResI# e a = (# (# a, Addr# #) | (# #) | (# e #) #)
-- | Choose between two parsers. If the first parser fails, try the second
-- one, but if the first one throws an error, propagate the error. This
-- operation can arbitrarily backtrack.
--
-- Note: this exported operator has different fixity than the same
-- operator in Applicative. Hide this operator if you want to use
-- the Alternative version.
(<|>) :: ParserT st e a -> ParserT st e a -> ParserT st e a
infixr 6 <|>
instance Control.Monad.IO.Class.MonadIO (FlatParse.Basic.Parser.ParserIO e)
instance GHC.Base.Functor (FlatParse.Basic.Parser.ParserT st e)
instance GHC.Base.Applicative (FlatParse.Basic.Parser.ParserT st e)
instance GHC.Base.Monad (FlatParse.Basic.Parser.ParserT st e)
instance GHC.Base.Alternative (FlatParse.Basic.Parser.ParserT st e)
instance GHC.Base.MonadPlus (FlatParse.Basic.Parser.ParserT st e)
-- | Basic parser building blocks.
module FlatParse.Basic.Base
-- | Succeed if the input is empty.
eof :: ParserT st e ()
-- | Read the given number of bytes as a ByteString.
--
-- Throws a runtime error if given a negative integer.
--
-- This does no copying. The ByteString returned is a "slice" of
-- the input, and will keep it alive. To avoid this, use copy on
-- the output.
take :: Int -> ParserT st e ByteString
-- | Read n# bytes as a ByteString. Fails if fewer than
-- n# bytes are available.
--
-- Throws a runtime error if given a negative integer.
--
-- This does no copying. The ByteString returned is a "slice" of
-- the input, and will keep it alive. To avoid this, use copy on
-- the output.
take# :: Int# -> ParserT st e ByteString
-- | Read n# bytes as a ByteString. Fails if fewer than
-- n# bytes are available.
--
-- Undefined behaviour if given a negative integer.
--
-- This does no copying. The ByteString returned is a "slice" of
-- the input, and will keep it alive. To avoid this, use copy on
-- the output.
takeUnsafe# :: Int# -> ParserT st e ByteString
-- | Consume the rest of the input. May return the empty bytestring.
--
-- This does no copying. The ByteString returned is a "slice" of
-- the input, and will keep it alive. To avoid this, use copy on
-- the output.
takeRest :: ParserT st e ByteString
-- | Skip forward n bytes. Fails if fewer than n bytes
-- are available.
--
-- Throws a runtime error if given a negative integer.
skip :: Int -> ParserT st e ()
-- | Skip forward n# bytes. Fails if fewer than n# bytes
-- are available.
--
-- Throws a runtime error if given a negative integer.
skip# :: Int# -> ParserT st e ()
-- | Go back i bytes in the input. Takes a positive integer.
--
-- Extremely unsafe. Makes no checks. Almost certainly a Bad Idea.
skipBack :: Int -> ParserT st e ()
-- | Go back n# bytes. Takes a positive integer.
--
-- Extremely unsafe. Makes no checks. Almost certainly a Bad Idea.
skipBack# :: Int# -> ParserT st e ()
-- | Skip forward n# bytes and run the given parser. Fails if
-- fewer than n# bytes are available.
--
-- Throws a runtime error if given a negative integer.
atSkip# :: Int# -> ParserT st e a -> ParserT st e a
-- | Skip forward n bytes and run the given parser. Fails if fewer
-- than n bytes are available.
--
-- Undefined behaviour if given a negative integer.
atSkipUnsafe# :: Int# -> ParserT st e r -> ParserT st e r
-- | Branch on a parser: if the first argument succeeds, continue with the
-- second, else with the third. This can produce slightly more efficient
-- code than (<|>). Moreover, branch does not
-- backtrack from the true/false cases.
branch :: ParserT st e a -> ParserT st e b -> ParserT st e b -> ParserT st e b
-- | Succeed if the first parser succeeds and the second one fails.
notFollowedBy :: ParserT st e a -> ParserT st e b -> ParserT st e a
-- | An analogue of the list foldl function: first parse a
-- b, then parse zero or more a-s, and combine the
-- results in a left-nested way by the b -> a -> b
-- function. Note: this is not the usual chainl function from the
-- parsec libraries!
chainl :: (b -> a -> b) -> ParserT st e b -> ParserT st e a -> ParserT st e b
-- | An analogue of the list foldr function: parse zero or more
-- a-s, terminated by a b, and combine the results in a
-- right-nested way using the a -> b -> b function. Note:
-- this is not the usual chainr function from the parsec
-- libraries!
chainr :: (a -> b -> b) -> ParserT st e a -> ParserT st e b -> ParserT st e b
-- | Save the parsing state, then run a parser, then restore the state.
lookahead :: ParserT st e a -> ParserT st e a
-- | Assert that there are at least n bytes remaining.
--
-- Undefined behaviour if given a negative integer.
ensure :: Int -> ParserT st e ()
-- | Assert that there are at least n# bytes remaining.
--
-- Undefined behaviour if given a negative integer.
ensure# :: Int# -> ParserT st e ()
-- | Assert that there are at least n# bytes remaining (CPS).
--
-- Undefined behaviour if given a negative integer.
withEnsure :: Int -> ParserT st e r -> ParserT st e r
-- | Assert that there is at least 1 byte remaining (CPS).
--
-- Undefined behaviour if given a negative integer.
withEnsure1 :: ParserT st e r -> ParserT st e r
-- | Assert that there are at least n# bytes remaining (CPS).
--
-- Undefined behaviour if given a negative integer.
withEnsure# :: Int# -> ParserT st e r -> ParserT st e r
-- | isolate n p runs the parser p isolated to the next
-- n bytes. All isolated bytes must be consumed.
--
-- Throws a runtime error if given a negative integer.
isolate :: Int -> ParserT st e a -> ParserT st e a
-- | isolate# n# p runs the parser p isolated to the next
-- n# bytes. All isolated bytes must be consumed.
--
-- Throws a runtime error if given a negative integer.
isolate# :: Int# -> ParserT st e a -> ParserT st e a
-- | isolateUnsafe# n# p runs the parser p isolated to
-- the next n# bytes. All isolated bytes must be consumed.
--
-- Undefined behaviour if given a negative integer.
isolateUnsafe# :: Int# -> ParserT st e a -> ParserT st e a
-- | Skip a parser zero or more times.
skipMany :: ParserT st e a -> ParserT st e ()
-- | Skip a parser one or more times.
skipSome :: ParserT st e a -> ParserT st e ()
-- | The failing parser. By default, parser choice (<|>)
-- arbitrarily backtracks on parser failure.
failed :: ParserT st e a
-- | Convert a parsing error into failure.
try :: ParserT st e a -> ParserT st e a
-- | Throw a parsing error. By default, parser choice (<|>)
-- can't backtrack on parser error. Use try to convert an error to
-- a recoverable failure.
err :: e -> ParserT st e a
-- | Run the parser, if an error is thrown, handle it with the given
-- function.
withError :: ParserT st e b -> (e -> ParserT st e b) -> ParserT st e b
-- | Run the parser, and handle each possible result.
withAnyResult :: ParserT st t a -> (a -> ParserT st e b) -> ParserT st e b -> (t -> ParserT st e b) -> ParserT st e b
-- | Convert a parsing failure to a success.
fails :: ParserT st e a -> ParserT st e ()
-- | Convert a parsing failure to an error.
cut :: ParserT st e a -> e -> ParserT st e a
-- | Run the parser, if we get a failure, throw the given error, but if we
-- get an error, merge the inner and the newly given errors using the
-- e -> e -> e function. This can be useful for
-- implementing parsing errors which may propagate hints or accummulate
-- contextual information.
cutting :: ParserT st e a -> e -> (e -> e -> e) -> ParserT st e a
-- | Convert a parsing failure to a Maybe. If possible, use
-- withOption instead.
optional :: ParserT st e a -> ParserT st e (Maybe a)
-- | Convert a parsing failure to a ().
optional_ :: ParserT st e a -> ParserT st e ()
-- | CPS'd version of optional. This is usually more efficient,
-- since it gets rid of the extra Maybe allocation.
withOption :: ParserT st e a -> (a -> ParserT st e r) -> ParserT st e r -> ParserT st e r
-- | Machine integer parsers.
module FlatParse.Basic.Integers
-- | Parse any Word8.
anyWord8 :: ParserT st e Word8
-- | Parse any Word16 (native byte order).
anyWord16 :: ParserT st e Word16
-- | Parse any Word32 (native byte order).
anyWord32 :: ParserT st e Word32
-- | Parse any Word64 (native byte order).
anyWord64 :: ParserT st e Word64
-- | Parse any Int8.
anyInt8 :: ParserT st e Int8
-- | Parse any Int16 (native byte order).
anyInt16 :: ParserT st e Int16
-- | Parse any Int32 (native byte order).
anyInt32 :: ParserT st e Int32
-- | Parse any Int64 (native byte order).
anyInt64 :: ParserT st e Int64
-- | Parse any Word (native size).
anyWord :: ParserT st e Word
-- | Parse any Int (native size).
anyInt :: ParserT st e Int
-- | Parse any Word16 (little-endian).
anyWord16le :: ParserT st e Word16
-- | Parse any Word16 (big-endian).
anyWord16be :: ParserT st e Word16
-- | Parse any Word32 (little-endian).
anyWord32le :: ParserT st e Word32
-- | Parse any Word32 (big-endian).
anyWord32be :: ParserT st e Word32
-- | Parse any Word64 (little-endian).
anyWord64le :: ParserT st e Word64
-- | Parse any Word64 (big-endian).
anyWord64be :: ParserT st e Word64
-- | Parse any Int16 (little-endian).
anyInt16le :: ParserT st e Int16
-- | Parse any Int16 (big-endian).
anyInt16be :: ParserT st e Int16
-- | Parse any Int32 (little-endian).
anyInt32le :: ParserT st e Int32
-- | Parse any Int32 (big-endian).
anyInt32be :: ParserT st e Int32
-- | Parse any Int64 (little-endian).
anyInt64le :: ParserT st e Int64
-- | Parse any Int64 (big-endian).
anyInt64be :: ParserT st e Int64
-- | Read the next 1 byte and assert its value as a Word8.
word8 :: Word8 -> ParserT st e ()
-- | Parse any Word8 (CPS).
withAnyWord8 :: (Word8 -> ParserT st e r) -> ParserT st e r
-- | Parse any Word16 (native byte order) (CPS).
withAnyWord16 :: (Word16 -> ParserT st e r) -> ParserT st e r
-- | Parse any Word32 (native byte order) (CPS).
withAnyWord32 :: (Word32 -> ParserT st e r) -> ParserT st e r
-- | Parse any Word64 (native byte order) (CPS).
withAnyWord64 :: (Word64 -> ParserT st e r) -> ParserT st e r
-- | Parse any Int8 (CPS).
withAnyInt8 :: (Int8 -> ParserT st e r) -> ParserT st e r
-- | Parse any Int16 (native byte order) (CPS).
withAnyInt16 :: (Int16 -> ParserT st e r) -> ParserT st e r
-- | Parse any Int32 (native byte order) (CPS).
withAnyInt32 :: (Int32 -> ParserT st e r) -> ParserT st e r
-- | Parse any Int64 (native byte order) (CPS).
withAnyInt64 :: (Int64 -> ParserT st e r) -> ParserT st e r
-- | Parse any Word (native size) (CPS).
withAnyWord :: (Word -> ParserT st e r) -> ParserT st e r
-- | Parse any Int (native size) (CPS).
withAnyInt :: (Int -> ParserT st e r) -> ParserT st e r
-- | Unsafely parse any Word8, without asserting the input is
-- non-empty.
--
-- The caller must guarantee that the input has enough bytes.
anyWord8Unsafe :: ParserT st e Word8
-- | Unsafely read the next 1 byte and assert its value as a Word8.
--
-- The caller must guarantee that the input has enough bytes.
word8Unsafe :: Word8 -> ParserT st e ()
-- | Unsafely read the next 2 bytes and assert their value as a
-- Word16 (native byte order).
--
-- The caller must guarantee that the input has enough bytes.
word16Unsafe :: Word16 -> ParserT st e ()
-- | Unsafely read the next 4 bytes and assert their value as a
-- Word32. (native byte order).
--
-- The caller must guarantee that the input has enough bytes.
word32Unsafe :: Word32 -> ParserT st e ()
-- | Unsafely read the next 8 bytes and assert their value as a
-- Word64. (native byte order).
--
-- The caller must guarantee that the input has enough bytes.
word64Unsafe :: Word64 -> ParserT st e ()
-- | Helper for defining CPS parsers for types of a constant byte size
-- (i.e. machine integers).
--
-- Call this with an indexXYZOffAddr primop (e.g.
-- indexWord8OffAddr) and the size in bytes of the type you're
-- parsing.
withAnySized# :: Int# -> (Addr# -> Int# -> a) -> (a -> ParserT st e r) -> ParserT st e r
-- | Unsafe helper for defining CPS parsers for types of a constant byte
-- size (i.e. machine integers).
--
-- Is really just syntactic sugar for applying the given parser and
-- shifting the buffer along.
--
-- The caller must guarantee that the input has enough bytes.
withAnySizedUnsafe# :: Int# -> (Addr# -> Int# -> a) -> (a -> ParserT st e r) -> ParserT st e r
-- | Unsafe helper for defining parsers for types of a constant byte size
-- (i.e. machine integers) which assert the parsed value's... value.
--
-- Call this with an indexXYZOffAddr primop (e.g.
-- indexWord8OffAddr), the size in bytes of the type you're
-- parsing, and the expected value to test the parsed value against.
--
-- The caller must guarantee that the input has enough bytes.
sizedUnsafe# :: Eq a => Int# -> (Addr# -> Int# -> a) -> a -> ParserT st e ()
-- | Bytestring parsers.
--
-- Module dependency complications prevent us from placing these in
-- FlatParse.Basic.Base.
module FlatParse.Basic.Bytes
-- | Read a sequence of bytes. This is a template function, you can use it
-- as $(bytes [3, 4, 5]), for example, and the splice has type
-- Parser e (). For a non-TH variant see byteString.
bytes :: [Word] -> Q Exp
-- | Template function, creates a Parser e () which unsafely
-- parses a given sequence of bytes.
--
-- The caller must guarantee that the input has enough bytes.
bytesUnsafe :: [Word] -> Q Exp
-- | Parsers for textual data (UTF-8, ASCII).
module FlatParse.Basic.Text
-- | Parse a UTF-8 character literal. This is a template function, you can
-- use it as $(char 'x'), for example, and the splice in this
-- case has type Parser e ().
char :: Char -> Q Exp
-- | Parse a UTF-8 string literal. This is a template function, you can use
-- it as $(string "foo"), for example, and the splice has type
-- Parser e ().
string :: String -> Q Exp
-- | Parse any single Unicode character encoded using UTF-8 as a
-- Char.
anyChar :: ParserT st e Char
-- | Skip any single Unicode character encoded using UTF-8.
skipAnyChar :: ParserT st e ()
-- | Parse a UTF-8 Char for which a predicate holds.
satisfy :: (Char -> Bool) -> ParserT st e Char
-- | Skip a UTF-8 Char for which a predicate holds.
skipSatisfy :: (Char -> Bool) -> ParserT st e ()
-- | This is a variant of satisfy which allows more optimization. We
-- can pick four testing functions for the four cases for the possible
-- number of bytes in the UTF-8 character. So in fusedSatisfy f1 f2
-- f3 f4, if we read a one-byte character, the result is scrutinized
-- with f1, for two-bytes, with f2, and so on. This can
-- result in dramatic lexing speedups.
--
-- For example, if we want to accept any letter, the naive solution would
-- be to use isLetter, but this accesses a large lookup table of
-- Unicode character classes. We can do better with fusedSatisfy
-- isLatinLetter isLetter isLetter isLetter, since here the
-- isLatinLetter is inlined into the UTF-8 decoding, and it
-- probably handles a great majority of all cases without accessing the
-- character table.
fusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> ParserT st e Char
-- | Skipping variant of fusedSatisfy.
skipFusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> Parser e ()
-- | Parse the rest of the current line as a String. Assumes UTF-8
-- encoding, throws an error if the encoding is invalid.
takeLine :: ParserT st e String
-- | Take the rest of the input as a String. Assumes UTF-8 encoding.
takeRestString :: ParserT st e String
-- | Parse any single ASCII character (a single byte) as a Char.
--
-- More efficient than anyChar for ASCII-only input.
anyAsciiChar :: ParserT st e Char
-- | Skip any single ASCII character (a single byte).
--
-- More efficient than skipAnyChar for ASCII-only input.
skipAnyAsciiChar :: ParserT st e ()
-- | Parse an ASCII Char for which a predicate holds.
--
-- Assumption: the predicate must only return True for ASCII-range
-- characters. Otherwise this function might read a 128-255 range byte,
-- thereby breaking UTF-8 decoding.
satisfyAscii :: (Char -> Bool) -> ParserT st e Char
-- | Skip an ASCII Char for which a predicate holds. Assumption: the
-- predicate must only return True for ASCII-range characters.
skipSatisfyAscii :: (Char -> Bool) -> ParserT st e ()
-- | Parse a non-empty ASCII decimal digit sequence as a Word. Fails
-- on overflow.
anyAsciiDecimalWord :: ParserT st e Word
-- | Parse a non-empty ASCII decimal digit sequence as a positive
-- Int. Fails on overflow.
anyAsciiDecimalInt :: ParserT st e Int
-- | Parse a non-empty ASCII decimal digit sequence as a positive
-- Integer.
anyAsciiDecimalInteger :: ParserT st e Integer
-- | Parse a non-empty, case-insensitive ASCII hexadecimal digit sequence
-- as a Word. Fails on overflow.
anyAsciiHexWord :: ParserT st e Word
-- | Parse a non-empty, case-insensitive ASCII hexadecimal digit sequence
-- as a positive Int. Fails on overflow.
anyAsciiHexInt :: ParserT st e Int
-- | Parse the rest of the current line as a String, but restore the
-- parsing state. Assumes UTF-8 encoding. This can be used for debugging.
traceLine :: ParserT st e String
-- | Get the rest of the input as a String, but restore the parsing
-- state. Assumes UTF-8 encoding. This can be used for debugging.
traceRest :: ParserT st e String
-- | Unsafe, highly dangerous parsing primitives using Addr#.
--
-- Ensure to read the documentation before using any definitions from
-- this module.
--
-- This module exports primitives useful for efficiently parsing binary
-- files that store data using an internal index.
--
-- Often, such indices describes records using a starting offset and a
-- length. Offsets are often relative to the file start, or some dynamic
-- address in the file. This way, individual records can be read out
-- efficiently (much faster than opening lots of small files!).
--
-- We may parse these in-place efficiently by adding record offsets to a
-- base memory address somewhere in the input. This is also extremely
-- unsafe, and easy to get catastrophically wrong. Thus, we provide as
-- much utility as reasonable to enable performing such parsing safely.
-- (That means CPS functions.)
--
-- Note that all definitions here should be considered unsafe. Any
-- Int# is not checked for positivity. You must perform any
-- necessary checks when you obtain your offsets and lengths as
-- Int#. Failure to do so may result in undefined behaviour.
module FlatParse.Basic.Addr
-- | Run a parser, passing it the current address the parser is at.
--
-- Useful for parsing offset-based data tables. For example, you may use
-- this to save the base address to use together with various relative
-- offsets.
withAddr# :: (Addr# -> ParserT st e a) -> ParserT st e a
-- | takeOffAddr# addr# offset# len# moves to addr#,
-- skips offset# bytes, reads len# bytes into a
-- ByteString, and restores the original address.
--
-- The Addr# should be from withAddr#.
--
-- Useful for parsing offset-based data tables. Ex: Your file contains an
-- index storing (OFFSET, LENGTH) entries where the offset is
-- the byte position in the file. Begin with withAddr# $
-- tableBase# -> ..., then read each entry like
-- takeOffAddr# tableBase# OFFSET LENGTH.
--
-- Fails if you attempt to read outside the input.
--
-- Undefined behaviour if offset# or len# is negative.
--
-- Name adopted from the similar-ish indexXOffAddr# primops.
takeOffAddr# :: Addr# -> Int# -> Int# -> ParserT st e ByteString
-- | withOffAddr# addr# offset# p moves to addr#, skips
-- offset# bytes, then runs the given parser p.
--
-- The Addr# should be from withAddr#.
--
-- Fails if you attempt to read outside the input.
--
-- Undefined behaviour if offset# is negative.
--
-- Name adopted from the similar-ish indexXOffAddr# primops.
withOffAddr# :: Addr# -> Int# -> ParserT st e a -> ParserT st e a
-- | lookahead, but specify the address to lookahead from.
--
-- The Addr# should be from withAddr#.
lookaheadFromAddr# :: Addr# -> ParserT st e a -> ParserT st e a
-- | Run a parser at the given address.
--
-- The Addr# should be from withAddr#.
--
-- This is a highly internal function -- you likely want
-- lookaheadFromAddr#, which will reset the address after running
-- the parser.
atAddr# :: Addr# -> ParserT st e a -> ParserT st e a
module FlatParse.Common.Position
-- | Byte offset counted backwards from the end of the buffer. Note: the
-- Ord instance for Pos considers the earlier positions to
-- be smaller.
newtype Pos
Pos :: Int -> Pos
[unPos] :: Pos -> Int
-- | The end of the input.
endPos :: Pos
-- | Very unsafe conversion between a primitive address and a position. The
-- first argument points to the end of the buffer, the second argument is
-- being converted.
addrToPos# :: Addr# -> Addr# -> Pos
-- | Very unsafe conversion between a primitive address and a position. The
-- first argument points to the end of the buffer.
posToAddr# :: Addr# -> Pos -> Addr#
-- | A pair of positions.
data Span
Span :: !Pos -> !Pos -> Span
-- | Slice into a ByteString using a Span. The result is
-- invalid if the Span is not a valid slice of the first argument.
unsafeSlice :: ByteString -> Span -> ByteString
leftPos :: Span -> Pos
rightPos :: Span -> Pos
instance GHC.Classes.Eq FlatParse.Common.Position.Pos
instance GHC.Show.Show FlatParse.Common.Position.Pos
instance GHC.Show.Show FlatParse.Common.Position.Span
instance GHC.Classes.Eq FlatParse.Common.Position.Span
instance GHC.Classes.Ord FlatParse.Common.Position.Pos
module FlatParse.Common.Switch
data Trie a
Branch :: !a -> !Map Word (Trie a) -> Trie a
type Rule = Maybe Int
nilTrie :: Trie Rule
updRule :: Int -> Maybe Int -> Maybe Int
insert :: Int -> [Word] -> Trie Rule -> Trie Rule
listToTrie :: [(Int, String)] -> Trie Rule
-- | Decorate a trie with the minimum lengths of non-empty paths. This is
-- used later to place ensure.
mindepths :: Trie Rule -> Trie (Rule, Int)
data Trie' a
Branch' :: !a -> !Map Word (Trie' a) -> Trie' a
Path :: !a -> ![Word] -> !Trie' a -> Trie' a
-- | Compress linear paths.
pathify :: Trie (Rule, Int) -> Trie' (Rule, Int)
-- | Compute where to fall back after we exhausted a branch. If the branch
-- is empty, that means we've succeded at reading and we jump to the rhs
-- rule.
fallbacks :: Trie' (Rule, Int) -> Trie' (Rule, Int, Int)
-- | Decorate with ensure invocations, represented as `Maybe Int`.
ensureBytes :: Trie' (Rule, Int, Int) -> Trie' (Rule, Int, Maybe Int)
compileTrie :: [(Int, String)] -> Trie' (Rule, Int, Maybe Int)
parseSwitch :: Q Exp -> Q ([(String, Exp)], Maybe Exp)
makeRawSwitch :: [(String, Q Exp)] -> Maybe (Q Exp) -> Q Exp
instance GHC.Show.Show a => GHC.Show.Show (FlatParse.Common.Switch.Trie a)
instance GHC.Show.Show a => GHC.Show.Show (FlatParse.Common.Switch.Trie' a)
-- | Efficient literal branching using Template Haskell.
module FlatParse.Basic.Switch
-- | This is a template function which makes it possible to branch on a
-- collection of string literals in an efficient way. By using
-- switch, such branching is compiled to a trie of primitive
-- parsing operations, which has optimized control flow, vectorized reads
-- and grouped checking for needed input bytes.
--
-- The syntax is slightly magical, it overloads the usual case
-- expression. An example:
--
-- -- $(switch [| case _ of -- "foo" -> pure True -- "bar" -> pure False |]) ---- -- The underscore is mandatory in case _ of. Each branch must be -- a string literal, but optionally we may have a default case, like in -- --
-- $(switch [| case _ of -- "foo" -> pure 10 -- "bar" -> pure 20 -- _ -> pure 30 |]) ---- -- All case right hand sides must be parsers with the same type. That -- type is also the type of the whole switch expression. -- -- A switch has longest match semantics, and the order of cases -- does not matter, except for the default case, which may only appear as -- the last case. -- -- If a switch does not have a default case, and no case matches -- the input, then it returns with failure, without having consumed any -- input. A fallthrough to the default case also does not consume any -- input. switch :: Q Exp -> Q Exp -- | Switch expression with an optional first argument for performing a -- post-processing action after every successful branch matching, not -- including the default branch. For example, if we have ws :: -- ParserT st e () for a whitespace parser, we might want to consume -- whitespace after matching on any of the switch cases. For that case, -- we can define a "lexeme" version of switch as follows. -- --
-- switch' :: Q Exp -> Q Exp -- switch' = switchWithPost (Just [| ws |]) ---- -- Note that this switch' function cannot be used in the same -- module it's defined in, because of the stage restriction of Template -- Haskell. switchWithPost :: Maybe (Q Exp) -> Q Exp -> Q Exp -- | Version of switchWithPost without syntactic sugar. The second -- argument is the list of cases, the third is the default case. rawSwitchWithPost :: Maybe (Q Exp) -> [(String, Q Exp)] -> Maybe (Q Exp) -> Q Exp -- | Parser supporting custom error types and embeddable IO or -- ST actions, but no other bells and whistles. -- -- If you need efficient indentation parsing, consider -- FlatParse.Stateful instead. module FlatParse.Basic -- | ParserT st e a is a parser with a state token type -- st, an error type e and a return type a. -- The different state token types support different embedded effects; -- see Parser, ParserIO and ParserST below. newtype ParserT (st :: ZeroBitType) e a ParserT :: (ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a) -> ParserT (st :: ZeroBitType) e a [runParserT#] :: ParserT (st :: ZeroBitType) e a -> ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a -- | The type of pure parsers. type Parser = ParserT PureMode -- | The type of parsers which can embed IO actions. type ParserIO = ParserT IOMode -- | The type of parsers which can embed ST actions. type ParserST s = ParserT (STMode s) -- | Higher-level boxed data type for parsing results. data Result e a -- | Contains return value and unconsumed input. OK :: a -> !ByteString -> Result e a -- | Recoverable-by-default failure. Fail :: Result e a -- | Unrecoverble-by-default error. Err :: !e -> Result e a -- | Run a pure parser. runParser :: Parser e a -> ByteString -> Result e a -- | Run a pure parser on a String, converting it to the -- corresponding UTF-8 bytes. -- -- Reminder: OverloadedStrings for ByteString does not -- yield a valid UTF-8 encoding! For non-ASCII ByteString literal -- input, use this wrapper or properly convert your input first. runParserUtf8 :: Parser e a -> String -> Result e a -- | Run an IO-based parser. runParserIO :: ParserIO e a -> ByteString -> IO (Result e a) -- | Run an ST-based parser. runParserST :: ParserST s e a -> ByteString -> ST s (Result e a) -- | Run a ParserST inside any parser. embedParserST :: forall e a s. (forall s. ParserST s e a) -> ParserT s e a -- | Run a pure Parser inside any parser. embedParser :: forall e a s. Parser e a -> ParserT s e a -- | Primitive parser result wrapped with a state token. -- -- You should rarely need to manipulate values of this type directly. Use -- the provided bidirectional pattern synonyms OK#, Fail# -- and Err#. type Res# (st :: ZeroBitType) e a = (# st, ResI# e a #) -- | Res# constructor for a successful parse. Contains the return -- value and a pointer to the rest of the input buffer, plus a state -- token. pattern OK# :: (st :: ZeroBitType) -> a -> Addr# -> Res# st e a -- | Res# constructor for errors which are by default -- non-recoverable. Contains the error, plus a state token. pattern Err# :: (st :: ZeroBitType) -> e -> Res# st e a -- | Res# constructor for recoverable failure. Contains only a state -- token. pattern Fail# :: (st :: ZeroBitType) -> Res# st e a -- | Primitive parser result. type ResI# e a = (# (# a, Addr# #) | (# #) | (# e #) #) -- | Run an ST action in a ParserST. liftST :: ST s a -> ParserST s e a -- | Convert an UTF8-encoded String to a ByteString. strToUtf8 :: String -> ByteString -- | Convert a ByteString to an UTF8-encoded String. utf8ToStr :: ByteString -> String -- |
-- isDigit c = '0' <= c && c <= '9' --isDigit :: Char -> Bool -- |
-- isLatinLetter c = ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')
--
isLatinLetter :: Char -> Bool
-- |
-- isGreekLetter c = ('Α' <= c && c <= 'Ω') || ('α' <= c && c <= 'ω')
--
isGreekLetter :: Char -> Bool
-- | Succeed if the input is empty.
eof :: ParserT st e ()
-- | Read the given number of bytes as a ByteString.
--
-- Throws a runtime error if given a negative integer.
--
-- This does no copying. The ByteString returned is a "slice" of
-- the input, and will keep it alive. To avoid this, use copy on
-- the output.
take :: Int -> ParserT st e ByteString
-- | Read n# bytes as a ByteString. Fails if fewer than
-- n# bytes are available.
--
-- Throws a runtime error if given a negative integer.
--
-- This does no copying. The ByteString returned is a "slice" of
-- the input, and will keep it alive. To avoid this, use copy on
-- the output.
take# :: Int# -> ParserT st e ByteString
-- | Read n# bytes as a ByteString. Fails if fewer than
-- n# bytes are available.
--
-- Undefined behaviour if given a negative integer.
--
-- This does no copying. The ByteString returned is a "slice" of
-- the input, and will keep it alive. To avoid this, use copy on
-- the output.
takeUnsafe# :: Int# -> ParserT st e ByteString
-- | Consume the rest of the input. May return the empty bytestring.
--
-- This does no copying. The ByteString returned is a "slice" of
-- the input, and will keep it alive. To avoid this, use copy on
-- the output.
takeRest :: ParserT st e ByteString
-- | Skip forward n bytes. Fails if fewer than n bytes
-- are available.
--
-- Throws a runtime error if given a negative integer.
skip :: Int -> ParserT st e ()
-- | Skip forward n# bytes. Fails if fewer than n# bytes
-- are available.
--
-- Throws a runtime error if given a negative integer.
skip# :: Int# -> ParserT st e ()
-- | Go back i bytes in the input. Takes a positive integer.
--
-- Extremely unsafe. Makes no checks. Almost certainly a Bad Idea.
skipBack :: Int -> ParserT st e ()
-- | Go back n# bytes. Takes a positive integer.
--
-- Extremely unsafe. Makes no checks. Almost certainly a Bad Idea.
skipBack# :: Int# -> ParserT st e ()
-- | Skip forward n# bytes and run the given parser. Fails if
-- fewer than n# bytes are available.
--
-- Throws a runtime error if given a negative integer.
atSkip# :: Int# -> ParserT st e a -> ParserT st e a
-- | Skip forward n bytes and run the given parser. Fails if fewer
-- than n bytes are available.
--
-- Undefined behaviour if given a negative integer.
atSkipUnsafe# :: Int# -> ParserT st e r -> ParserT st e r
-- | Read a sequence of bytes. This is a template function, you can use it
-- as $(bytes [3, 4, 5]), for example, and the splice has type
-- Parser e (). For a non-TH variant see byteString.
bytes :: [Word] -> Q Exp
-- | Template function, creates a Parser e () which unsafely
-- parses a given sequence of bytes.
--
-- The caller must guarantee that the input has enough bytes.
bytesUnsafe :: [Word] -> Q Exp
-- | Parse a given ByteString.
--
-- If the bytestring is statically known, consider using bytes
-- instead.
byteString :: ByteString -> ParserT st e ()
-- | Read a null-terminated bytestring (a C-style string).
--
-- Consumes the null terminator.
anyCString :: ParserT st e ByteString
-- | Read a protobuf-style varint into a positive Int.
--
-- protobuf-style varints are byte-aligned. For each byte, the lower 7
-- bits are data and the MSB indicates if there are further bytes. Once
-- fully parsed, the 7-bit payloads are concatenated and interpreted as a
-- little-endian unsigned integer.
--
-- Fails if the varint exceeds the positive Int range.
--
-- Really, these are varnats. They also match with the LEB128 varint
-- encoding.
--
-- protobuf encodes negatives in unsigned integers using zigzag encoding.
-- See the fromZigzag family of functions for this
-- functionality.
--
-- Further reading:
-- https://developers.google.com/protocol-buffers/docs/encoding#varints
anyVarintProtobuf :: ParserT st e Int
-- | Choose between two parsers. If the first parser fails, try the second
-- one, but if the first one throws an error, propagate the error. This
-- operation can arbitrarily backtrack.
--
-- Note: this exported operator has different fixity than the same
-- operator in Applicative. Hide this operator if you want to use
-- the Alternative version.
(<|>) :: ParserT st e a -> ParserT st e a -> ParserT st e a
infixr 6 <|>
-- | Branch on a parser: if the first argument succeeds, continue with the
-- second, else with the third. This can produce slightly more efficient
-- code than (<|>). Moreover, branch does not
-- backtrack from the true/false cases.
branch :: ParserT st e a -> ParserT st e b -> ParserT st e b -> ParserT st e b
-- | Succeed if the first parser succeeds and the second one fails.
notFollowedBy :: ParserT st e a -> ParserT st e b -> ParserT st e a
-- | An analogue of the list foldl function: first parse a
-- b, then parse zero or more a-s, and combine the
-- results in a left-nested way by the b -> a -> b
-- function. Note: this is not the usual chainl function from the
-- parsec libraries!
chainl :: (b -> a -> b) -> ParserT st e b -> ParserT st e a -> ParserT st e b
-- | An analogue of the list foldr function: parse zero or more
-- a-s, terminated by a b, and combine the results in a
-- right-nested way using the a -> b -> b function. Note:
-- this is not the usual chainr function from the parsec
-- libraries!
chainr :: (a -> b -> b) -> ParserT st e a -> ParserT st e b -> ParserT st e b
-- | Save the parsing state, then run a parser, then restore the state.
lookahead :: ParserT st e a -> ParserT st e a
-- | Assert that there are at least n bytes remaining.
--
-- Undefined behaviour if given a negative integer.
ensure :: Int -> ParserT st e ()
-- | Assert that there are at least n# bytes remaining.
--
-- Undefined behaviour if given a negative integer.
ensure# :: Int# -> ParserT st e ()
-- | Assert that there are at least n# bytes remaining (CPS).
--
-- Undefined behaviour if given a negative integer.
withEnsure :: Int -> ParserT st e r -> ParserT st e r
-- | Assert that there is at least 1 byte remaining (CPS).
--
-- Undefined behaviour if given a negative integer.
withEnsure1 :: ParserT st e r -> ParserT st e r
-- | Assert that there are at least n# bytes remaining (CPS).
--
-- Undefined behaviour if given a negative integer.
withEnsure# :: Int# -> ParserT st e r -> ParserT st e r
-- | isolate n p runs the parser p isolated to the next
-- n bytes. All isolated bytes must be consumed.
--
-- Throws a runtime error if given a negative integer.
isolate :: Int -> ParserT st e a -> ParserT st e a
-- | Isolate the given parser up to (excluding) the next null byte.
--
-- Like isolate, all isolated bytes must be consumed. The null
-- byte is consumed afterwards.
--
-- Useful for defining parsers for null-terminated data.
isolateToNextNull :: ParserT st e a -> ParserT st e a
-- | isolate# n# p runs the parser p isolated to the next
-- n# bytes. All isolated bytes must be consumed.
--
-- Throws a runtime error if given a negative integer.
isolate# :: Int# -> ParserT st e a -> ParserT st e a
-- | isolateUnsafe# n# p runs the parser p isolated to
-- the next n# bytes. All isolated bytes must be consumed.
--
-- Undefined behaviour if given a negative integer.
isolateUnsafe# :: Int# -> ParserT st e a -> ParserT st e a
-- | This is a template function which makes it possible to branch on a
-- collection of string literals in an efficient way. By using
-- switch, such branching is compiled to a trie of primitive
-- parsing operations, which has optimized control flow, vectorized reads
-- and grouped checking for needed input bytes.
--
-- The syntax is slightly magical, it overloads the usual case
-- expression. An example:
--
-- -- $(switch [| case _ of -- "foo" -> pure True -- "bar" -> pure False |]) ---- -- The underscore is mandatory in case _ of. Each branch must be -- a string literal, but optionally we may have a default case, like in -- --
-- $(switch [| case _ of -- "foo" -> pure 10 -- "bar" -> pure 20 -- _ -> pure 30 |]) ---- -- All case right hand sides must be parsers with the same type. That -- type is also the type of the whole switch expression. -- -- A switch has longest match semantics, and the order of cases -- does not matter, except for the default case, which may only appear as -- the last case. -- -- If a switch does not have a default case, and no case matches -- the input, then it returns with failure, without having consumed any -- input. A fallthrough to the default case also does not consume any -- input. switch :: Q Exp -> Q Exp -- | Switch expression with an optional first argument for performing a -- post-processing action after every successful branch matching, not -- including the default branch. For example, if we have ws :: -- ParserT st e () for a whitespace parser, we might want to consume -- whitespace after matching on any of the switch cases. For that case, -- we can define a "lexeme" version of switch as follows. -- --
-- switch' :: Q Exp -> Q Exp -- switch' = switchWithPost (Just [| ws |]) ---- -- Note that this switch' function cannot be used in the same -- module it's defined in, because of the stage restriction of Template -- Haskell. switchWithPost :: Maybe (Q Exp) -> Q Exp -> Q Exp -- | Version of switchWithPost without syntactic sugar. The second -- argument is the list of cases, the third is the default case. rawSwitchWithPost :: Maybe (Q Exp) -> [(String, Q Exp)] -> Maybe (Q Exp) -> Q Exp -- | Zero or more. many :: Alternative f => f a -> f [a] -- | Skip a parser zero or more times. skipMany :: ParserT st e a -> ParserT st e () -- | One or more. some :: Alternative f => f a -> f [a] -- | Skip a parser one or more times. skipSome :: ParserT st e a -> ParserT st e () -- | The identity of <|> empty :: Alternative f => f a -- | The failing parser. By default, parser choice (<|>) -- arbitrarily backtracks on parser failure. failed :: ParserT st e a -- | Convert a parsing error into failure. try :: ParserT st e a -> ParserT st e a -- | Throw a parsing error. By default, parser choice (<|>) -- can't backtrack on parser error. Use try to convert an error to -- a recoverable failure. err :: e -> ParserT st e a -- | Run the parser, if an error is thrown, handle it with the given -- function. withError :: ParserT st e b -> (e -> ParserT st e b) -> ParserT st e b -- | Run the parser, and handle each possible result. withAnyResult :: ParserT st t a -> (a -> ParserT st e b) -> ParserT st e b -> (t -> ParserT st e b) -> ParserT st e b -- | Convert a parsing failure to a success. fails :: ParserT st e a -> ParserT st e () -- | Convert a parsing failure to an error. cut :: ParserT st e a -> e -> ParserT st e a -- | Run the parser, if we get a failure, throw the given error, but if we -- get an error, merge the inner and the newly given errors using the -- e -> e -> e function. This can be useful for -- implementing parsing errors which may propagate hints or accummulate -- contextual information. cutting :: ParserT st e a -> e -> (e -> e -> e) -> ParserT st e a -- | Convert a parsing failure to a Maybe. If possible, use -- withOption instead. optional :: ParserT st e a -> ParserT st e (Maybe a) -- | Convert a parsing failure to a (). optional_ :: ParserT st e a -> ParserT st e () -- | CPS'd version of optional. This is usually more efficient, -- since it gets rid of the extra Maybe allocation. withOption :: ParserT st e a -> (a -> ParserT st e r) -> ParserT st e r -> ParserT st e r -- | Byte offset counted backwards from the end of the buffer. Note: the -- Ord instance for Pos considers the earlier positions to -- be smaller. newtype Pos Pos :: Int -> Pos [unPos] :: Pos -> Int -- | The end of the input. endPos :: Pos -- | Very unsafe conversion between a primitive address and a position. The -- first argument points to the end of the buffer, the second argument is -- being converted. addrToPos# :: Addr# -> Addr# -> Pos -- | Very unsafe conversion between a primitive address and a position. The -- first argument points to the end of the buffer. posToAddr# :: Addr# -> Pos -> Addr# -- | A pair of positions. data Span Span :: !Pos -> !Pos -> Span -- | Slice into a ByteString using a Span. The result is -- invalid if the Span is not a valid slice of the first argument. unsafeSlice :: ByteString -> Span -> ByteString -- | Get the current position in the input. getPos :: ParserT st e Pos -- | Set the input position. -- -- Warning: this can result in crashes if the position points outside the -- current buffer. It is always safe to setPos values which came -- from getPos with the current input. setPos :: Pos -> ParserT st e () -- | Return the consumed span of a parser. spanOf :: ParserT st e a -> ParserT st e Span -- | Bind the result together with the span of the result. CPS'd version of -- spanOf for better unboxing. withSpan :: ParserT st e a -> (a -> Span -> ParserT st e b) -> ParserT st e b -- | Return the ByteString consumed by a parser. Note: it's more -- efficient to use spanOf and withSpan instead. byteStringOf :: ParserT st e a -> ParserT st e ByteString -- | CPS'd version of byteStringOf. Can be more efficient, because -- the result is more eagerly unboxed by GHC. It's more efficient to use -- spanOf or withSpan instead. withByteString :: ParserT st e a -> (a -> ByteString -> ParserT st e b) -> ParserT st e b -- | Run a parser in a given input Span. -- -- The input position is restored after the parser is finished, so -- inSpan does not consume input and has no side effect. -- -- Warning: this operation may crash if the given span points outside the -- current parsing buffer. It's always safe to use inSpan if the -- Span comes from a previous withSpan or spanOf -- call on the current input. inSpan :: Span -> ParserT st e a -> ParserT st e a -- | Check whether a Pos points into a ByteString. validPos :: ByteString -> Pos -> Bool -- | Compute corresponding line and column numbers (both starting from 0) -- for each Pos in a list, assuming UTF8 encoding. Throw an error -- on invalid positions. Note: computing lines and columns may traverse -- the ByteString, but it traverses it only once regardless of the -- length of the position list. posLineCols :: ByteString -> [Pos] -> [(Int, Int)] -- | Create a Pos from a line and column number. Throws an error on -- out-of-bounds line and column numbers. mkPos :: ByteString -> (Int, Int) -> Pos -- | Parse a UTF-8 character literal. This is a template function, you can -- use it as $(char 'x'), for example, and the splice in this -- case has type Parser e (). char :: Char -> Q Exp -- | Parse a UTF-8 string literal. This is a template function, you can use -- it as $(string "foo"), for example, and the splice has type -- Parser e (). string :: String -> Q Exp -- | Parse any single Unicode character encoded using UTF-8 as a -- Char. anyChar :: ParserT st e Char -- | Skip any single Unicode character encoded using UTF-8. skipAnyChar :: ParserT st e () -- | Parse a UTF-8 Char for which a predicate holds. satisfy :: (Char -> Bool) -> ParserT st e Char -- | Skip a UTF-8 Char for which a predicate holds. skipSatisfy :: (Char -> Bool) -> ParserT st e () -- | This is a variant of satisfy which allows more optimization. We -- can pick four testing functions for the four cases for the possible -- number of bytes in the UTF-8 character. So in fusedSatisfy f1 f2 -- f3 f4, if we read a one-byte character, the result is scrutinized -- with f1, for two-bytes, with f2, and so on. This can -- result in dramatic lexing speedups. -- -- For example, if we want to accept any letter, the naive solution would -- be to use isLetter, but this accesses a large lookup table of -- Unicode character classes. We can do better with fusedSatisfy -- isLatinLetter isLetter isLetter isLetter, since here the -- isLatinLetter is inlined into the UTF-8 decoding, and it -- probably handles a great majority of all cases without accessing the -- character table. fusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> ParserT st e Char -- | Skipping variant of fusedSatisfy. skipFusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> Parser e () -- | Parse the rest of the current line as a String. Assumes UTF-8 -- encoding, throws an error if the encoding is invalid. takeLine :: ParserT st e String -- | Take the rest of the input as a String. Assumes UTF-8 encoding. takeRestString :: ParserT st e String -- | Break an UTF-8-coded ByteString to lines. Throws an error on -- invalid input. This is mostly useful for grabbing specific source -- lines for displaying error messages. linesUtf8 :: ByteString -> [String] -- | Parse any single ASCII character (a single byte) as a Char. -- -- More efficient than anyChar for ASCII-only input. anyAsciiChar :: ParserT st e Char -- | Skip any single ASCII character (a single byte). -- -- More efficient than skipAnyChar for ASCII-only input. skipAnyAsciiChar :: ParserT st e () -- | Parse an ASCII Char for which a predicate holds. -- -- Assumption: the predicate must only return True for ASCII-range -- characters. Otherwise this function might read a 128-255 range byte, -- thereby breaking UTF-8 decoding. satisfyAscii :: (Char -> Bool) -> ParserT st e Char -- | Skip an ASCII Char for which a predicate holds. Assumption: the -- predicate must only return True for ASCII-range characters. skipSatisfyAscii :: (Char -> Bool) -> ParserT st e () -- | Parse a non-empty ASCII decimal digit sequence as a Word. Fails -- on overflow. anyAsciiDecimalWord :: ParserT st e Word -- | Parse a non-empty ASCII decimal digit sequence as a positive -- Int. Fails on overflow. anyAsciiDecimalInt :: ParserT st e Int -- | Parse a non-empty ASCII decimal digit sequence as a positive -- Integer. anyAsciiDecimalInteger :: ParserT st e Integer -- | Parse a non-empty, case-insensitive ASCII hexadecimal digit sequence -- as a Word. Fails on overflow. anyAsciiHexWord :: ParserT st e Word -- | Parse a non-empty, case-insensitive ASCII hexadecimal digit sequence -- as a positive Int. Fails on overflow. anyAsciiHexInt :: ParserT st e Int -- | Parse the rest of the current line as a String, but restore the -- parsing state. Assumes UTF-8 encoding. This can be used for debugging. traceLine :: ParserT st e String -- | Get the rest of the input as a String, but restore the parsing -- state. Assumes UTF-8 encoding. This can be used for debugging. traceRest :: ParserT st e String -- | Create a ByteString from a Span. -- -- The result is invalid if the Span points outside the current -- buffer, or if the Span start is greater than the end position. unsafeSpanToByteString :: Span -> ParserT st e ByteString -- | Embed an IO action in a ParserT. This is slightly safer than -- unsafePerformIO because it will sequenced correctly with -- respect to the surrounding actions, and its execution is guaranteed. unsafeLiftIO :: IO a -> ParserT st e a -- | Read a null-terminated bytestring (a C-style string), where the -- bytestring is known to be null-terminated somewhere in the input. -- -- Highly unsafe. Unless you have a guarantee that the string will be -- null terminated before the input ends, use anyCString instead. -- Honestly, I'm not sure if this is a good function to define. But here -- it is. -- -- Fails on GHC versions older than 9.0, since we make use of the -- cstringLength# primop introduced in GHC 9.0, and we aren't very -- useful without it. -- -- Consumes the null terminator. anyCStringUnsafe :: ParserT st e ByteString instance (GHC.Show.Show a, GHC.Show.Show e) => GHC.Show.Show (FlatParse.Basic.Result e a) instance GHC.Base.Functor (FlatParse.Basic.Result e) -- | This module contains lexer and error message primitives for a simple -- lambda calculus parser. It demonstrates a simple but decently -- informative implementation of error message propagation. module FlatParse.Examples.BasicLambda.Lexer -- | An expected item which is displayed in error messages. data Expected -- | An error message. Msg :: String -> Expected -- | A literal expected thing. Lit :: String -> Expected -- | A parsing error. data Error -- | A precisely known error, like leaving out "in" from "let". Precise :: Pos -> Expected -> Error -- | An imprecise error, when we expect a number of different things, but -- parse something else. Imprecise :: Pos -> [Expected] -> Error errorPos :: Error -> Pos -- | Merge two errors. Inner errors (which were thrown at points with more -- consumed inputs) are preferred. If errors are thrown at identical -- input positions, we prefer precise errors to imprecise ones. -- -- The point of prioritizing inner and precise errors is to suppress the -- deluge of "expected" items, and instead try to point to a concrete -- issue to fix. merge :: Error -> Error -> Error type Parser = Parser Error -- | Pretty print an error. The ByteString input is the source file. -- The offending line from the source is displayed in the output. prettyError :: ByteString -> Error -> String -- | Imprecise cut: we slap a list of items on inner errors. cut :: Parser a -> [Expected] -> Parser a -- | Precise cut: we propagate at most a single error. cut' :: Parser a -> Expected -> Parser a runParser :: Parser a -> ByteString -> Result Error a -- | Run parser, print pretty error on failure. testParser :: Show a => Parser a -> String -> IO () -- | Parse a line comment. lineComment :: Parser () -- | Parse a potentially nested multiline comment. multilineComment :: Parser () -- | Consume whitespace. ws :: Parser () -- | Consume whitespace after running a parser. token :: Parser a -> Parser a -- | Read a starting character of an identifier. identStartChar :: Parser Char -- | Read a non-starting character of an identifier. identChar :: Parser Char -- | Check whether a Span contains exactly a keyword. Does not -- change parsing state. isKeyword :: Span -> Parser () -- | Parse a non-keyword string. symbol :: String -> Q Exp -- | Parser a non-keyword string, throw precise error on failure. symbol' :: String -> Q Exp -- | Parse a keyword string. keyword :: String -> Q Exp -- | Parse a keyword string, throw precise error on failure. keyword' :: String -> Q Exp instance GHC.Classes.Ord FlatParse.Examples.BasicLambda.Lexer.Expected instance GHC.Show.Show FlatParse.Examples.BasicLambda.Lexer.Expected instance GHC.Classes.Eq FlatParse.Examples.BasicLambda.Lexer.Expected instance GHC.Show.Show FlatParse.Examples.BasicLambda.Lexer.Error instance Data.String.IsString FlatParse.Examples.BasicLambda.Lexer.Expected -- | This module contains a simple lambda calculus parser. This parser is -- not optimized for maximum performance; instead it's written in a style -- which emulates the look and feel of conventional monadic parsers. An -- optimized implementation would use low-level switch expressions -- more often. module FlatParse.Examples.BasicLambda.Parser type Name = ByteString -- | A term in the language. The precedences of different constructs are -- the following, in decreasing order of strength: -- --
-- x --Var :: Name -> Tm -- |
-- t u --App :: Tm -> Tm -> Tm -- |
-- lam x. t --Lam :: Name -> Tm -> Tm -- |
-- let x = t in u --Let :: Name -> Tm -> Tm -> Tm -- | true or false. BoolLit :: Bool -> Tm -- | A positive Int literal. IntLit :: Int -> Tm -- |
-- if t then u else v --If :: Tm -> Tm -> Tm -> Tm -- |
-- t + u --Add :: Tm -> Tm -> Tm -- |
-- t * u --Mul :: Tm -> Tm -> Tm -- |
-- t == u --Eq :: Tm -> Tm -> Tm -- |
-- t < u --Lt :: Tm -> Tm -> Tm -- | Parse an identifier. This parser uses isKeyword to check that -- an identifier is not a keyword. ident :: Parser Name -- | Parse an identifier, throw a precise error on failure. ident' :: Parser Name digit :: Parser Int int :: Parser Int -- | Parse a literal, identifier or parenthesized expression. atom :: Parser Tm atom' :: Parser Tm -- | Parse an App-level expression. app' :: Parser Tm -- | Parse a Mul-level expression. mul' :: Parser Tm -- | Parse an Add-level expression. add' :: Parser Tm -- | Parse an Eq or Lt-level expression. eqLt' :: Parser Tm -- | Parse a Let. pLet :: Parser Tm -- | Parse a Lam. lam :: Parser Tm -- | Parse an If. pIf :: Parser Tm -- | Parse any Tm. tm' :: Parser Tm -- | Parse a complete source file. src' :: Parser Tm p1 :: String instance GHC.Show.Show FlatParse.Examples.BasicLambda.Parser.Tm -- | Minimal parser definition. module FlatParse.Stateful.Parser -- | ParserT st r e a is a parser with a state token type -- st, a reader environment r, an error type e -- and a return type a. The different state token types support -- different embedded effects; see Parser, ParserIO and -- ParserST below. newtype ParserT (st :: ZeroBitType) r e a ParserT :: (ForeignPtrContents -> r -> Addr# -> Addr# -> Int# -> st -> Res# st e a) -> ParserT (st :: ZeroBitType) r e a [runParserT#] :: ParserT (st :: ZeroBitType) r e a -> ForeignPtrContents -> r -> Addr# -> Addr# -> Int# -> st -> Res# st e a -- | The type of pure parsers. type Parser = ParserT PureMode -- | The type of parsers which can embed IO actions. type ParserIO = ParserT IOMode -- | The type of parsers which can embed ST actions. type ParserST s = ParserT (STMode s) -- | Same as pure for ParserT except that it does not force -- the returned value. pureLazy :: a -> ParserT st r e a -- | Primitive parser result wrapped with a state token. -- -- You should rarely need to manipulate values of this type directly. Use -- the provided bidirectional pattern synonyms OK#, Fail# -- and Err#. type Res# (st :: ZeroBitType) e a = (# st, ResI# e a #) -- | Res# constructor for a successful parse. Contains the return -- value, a pointer to the rest of the input buffer, and the next -- Int state, plus a state token. pattern OK# :: (st :: ZeroBitType) -> a -> Addr# -> Int# -> Res# st e a -- | Res# constructor for errors which are by default -- non-recoverable. Contains the error, plus a state token. pattern Err# :: (st :: ZeroBitType) -> e -> Res# st e a -- | Res# constructor for recoverable failure. Contains only a state -- token. pattern Fail# :: (st :: ZeroBitType) -> Res# st e a -- | Primitive parser result. type ResI# e a = (# (# a, Addr#, Int# #) | (# #) | (# e #) #) -- | Choose between two parsers. If the first parser fails, try the second -- one, but if the first one throws an error, propagate the error. This -- operation can arbitrarily backtrack. -- -- Note: this exported operator has different fixity than the same -- operator in Applicative. Hide this operator if you want to use -- the Alternative version. (<|>) :: ParserT st r e a -> ParserT st r e a -> ParserT st r e a infixr 6 <|> instance Control.Monad.IO.Class.MonadIO (FlatParse.Stateful.Parser.ParserT FlatParse.Common.Parser.IOMode r e) instance GHC.Base.Functor (FlatParse.Stateful.Parser.ParserT st r e) instance GHC.Base.Applicative (FlatParse.Stateful.Parser.ParserT st r e) instance GHC.Base.Monad (FlatParse.Stateful.Parser.ParserT st r e) instance GHC.Base.Alternative (FlatParse.Stateful.Parser.ParserT st r e) instance GHC.Base.MonadPlus (FlatParse.Stateful.Parser.ParserT st r e) -- | Basic parser building blocks. module FlatParse.Stateful.Base -- | Succeed if the input is empty. eof :: ParserT st r e () -- | Read n bytes as a ByteString. Fails if fewer than -- n bytes are available. -- -- Throws a runtime error if given a negative integer. -- -- This does no copying. The ByteString returned is a "slice" of -- the input, and will keep it alive. To avoid this, use copy on -- the output. take :: Int -> ParserT st r e ByteString -- | Read n# bytes as a ByteString. Fails if fewer than -- n# bytes are available. -- -- Throws a runtime error if given a negative integer. -- -- This does no copying. The ByteString returned is a "slice" of -- the input, and will keep it alive. To avoid this, use copy on -- the output. take# :: Int# -> ParserT st r e ByteString -- | Read i# bytes as a ByteString. Fails if newer than -- i# bytes are available. -- -- Undefined behaviour if given a negative integer. -- -- This does no copying. The ByteString returned is a "slice" of -- the input, and will keep it alive. To avoid this, use copy on -- the output. takeUnsafe# :: Int# -> ParserT st r e ByteString -- | Consume the rest of the input. May return the empty bytestring. -- -- This does no copying. The ByteString returned is a "slice" of -- the input, and will keep it alive. To avoid this, use copy on -- the output. takeRest :: ParserT st r e ByteString -- | Skip forward n bytes. Fails if fewer than n bytes -- are available. -- -- Throws a runtime error if given a negative integer. skip :: Int -> ParserT st r e () -- | Skip forward n bytes. Fails if fewer than n bytes -- are available. -- -- Throws a runtime error if given a negative integer. skip# :: Int# -> ParserT st r e () -- | Go back i bytes in the input. Takes a positive integer. -- -- Extremely unsafe. Makes no checks. Almost certainly a Bad Idea. skipBack :: Int -> ParserT st r e () -- | Go back i# bytes in the input. Takes a positive integer. -- -- Extremely unsafe. Makes no checks. Almost certainly a Bad Idea. skipBack# :: Int# -> ParserT st r e () -- | Skip forward n# bytes and run the given parser. Fails if -- fewer than n# bytes are available. -- -- Throws a runtime error if given a negative integer. atSkip# :: Int# -> ParserT st r e ret -> ParserT st r e ret -- | Skip forward i# bytes and run the given parser. Fails if -- fewer than i bytes are available. -- -- Undefined behaviour if given a negative integer. atSkipUnsafe# :: Int# -> ParserT st r e ret -> ParserT st r e ret -- | Branch on a parser: if the first argument succeeds, continue with the -- second, else with the third. This can produce slightly more efficient -- code than (<|>). Moreover, ḃranch does not -- backtrack from the true/false cases. branch :: ParserT st r e a -> ParserT st r e b -> ParserT st r e b -> ParserT st r e b -- | Succeed if the first parser succeeds and the second one fails. notFollowedBy :: ParserT st r e a -> ParserT st r e b -> ParserT st r e a -- | An analogue of the list foldl function: first parse a -- b, then parse zero or more a-s, and combine the -- results in a left-nested way by the b -> a -> b -- function. Note: this is not the usual chainl function from the -- parsec libraries! chainl :: (b -> a -> b) -> ParserT st r e b -> ParserT st r e a -> ParserT st r e b -- | An analogue of the list foldr function: parse zero or more -- a-s, terminated by a b, and combine the results in a -- right-nested way using the a -> b -> b function. Note: -- this is not the usual chainr function from the parsec -- libraries! chainr :: (a -> b -> b) -> ParserT st r e a -> ParserT st r e b -> ParserT st r e b -- | Save the parsing state, then run a parser, then restore the state. lookahead :: ParserT st r e a -> ParserT st r e a -- | Assert that there are at least n bytes remaining. -- -- Undefined behaviour if given a negative integer. ensure :: Int -> ParserT st r e () -- | Assert that there are at least n# bytes remaining. -- -- Undefined behaviour if given a negative integer. ensure# :: Int# -> ParserT st r e () -- | Assert that there are at least n# bytes remaining (CPS). -- -- Undefined behaviour if given a negative integer. withEnsure :: Int -> ParserT st r e ret -> ParserT st r e ret -- | Assert that there is at least 1 byte remaining (CPS). -- -- Undefined behaviour if given a negative integer. withEnsure1 :: ParserT st r e ret -> ParserT st r e ret -- | Assert that there are at least n# bytes remaining (CPS). -- -- Undefined behaviour if given a negative integer. withEnsure# :: Int# -> ParserT st r e ret -> ParserT st r e ret -- | isolate n p runs the parser p isolated to the next -- n bytes. All isolated bytes must be consumed. -- -- Throws a runtime error if given a negative integer. isolate :: Int -> ParserT st r e a -> ParserT st r e a -- | isolate# n# p runs the parser p isolated to the next -- n# bytes. All isolated bytes must be consumed. -- -- Throws a runtime error if given a negative integer. isolate# :: Int# -> ParserT st r e a -> ParserT st r e a -- | isolateUnsafe# i# p runs the parser p isolated to -- the next i# bytes. All isolated bytes must be consumed. -- -- Undefined behaviour if given a negative integer. isolateUnsafe# :: Int# -> ParserT st r e ret -> ParserT st r e ret -- | Skip a parser zero or more times. skipMany :: ParserT st r e a -> ParserT st r e () -- | Skip a parser one or more times. skipSome :: ParserT st r e a -> ParserT st r e () -- | The failing parser. By default, parser choice (<|>) -- arbitrarily backtracks on parser failure. This is a synonym for -- empty. failed :: ParserT st r e a -- | Convert a parsing error into failure. try :: ParserT st r e a -> ParserT st r e a -- | Throw a parsing error. By default, parser choice (<|>) -- can't backtrack on parser error. Use try to convert an error to -- a recoverable failure. err :: e -> ParserT st r e a -- | Run the parser, if an error is thrown, handle it with the given -- function. withError :: ParserT st r e b -> (e -> ParserT st r e b) -> ParserT st r e b -- | Run the parser, and handle each possible result. withAnyResult :: ParserT st r t a -> (a -> ParserT st r e b) -> ParserT st r e b -> (t -> ParserT st r e b) -> ParserT st r e b -- | Convert a parsing failure to a success. fails :: ParserT st r e a -> ParserT st r e () -- | Convert a parsing failure to an error. cut :: ParserT st r e a -> e -> ParserT st r e a -- | Run the parser, if we get a failure, throw the given error, but if we -- get an error, merge the inner and the newly given errors using the -- e -> e -> e function. This can be useful for -- implementing parsing errors which may propagate hints or accummulate -- contextual information. cutting :: ParserT st r e a -> e -> (e -> e -> e) -> ParserT st r e a -- | Convert a parsing failure to a Maybe. If possible, use -- withOption instead. optional :: ParserT st r e a -> ParserT st r e (Maybe a) -- | Convert a parsing failure to a (). optional_ :: ParserT st r e a -> ParserT st r e () -- | CPS'd version of optional. This is usually more efficient, -- since it gets rid of the extra Maybe allocation. withOption :: ParserT st r e a -> (a -> ParserT st r e ret) -> ParserT st r e ret -> ParserT st r e ret -- | Machine integer parsers. module FlatParse.Stateful.Integers -- | Parse any Word8. anyWord8 :: ParserT st r e Word8 -- | Parse any Word16 (native byte order). anyWord16 :: ParserT st r e Word16 -- | Parse any Word32 (native byte order). anyWord32 :: ParserT st r e Word32 -- | Parse any Word64 (native byte order). anyWord64 :: ParserT st r e Word64 -- | Parse any Int8. anyInt8 :: ParserT st r e Int8 -- | Parse any Int16 (native byte order). anyInt16 :: ParserT st r e Int16 -- | Parse any Int32 (native byte order). anyInt32 :: ParserT st r e Int32 -- | Parse any Int64 (native byte order). anyInt64 :: ParserT st r e Int64 -- | Parse any Word (native size). anyWord :: ParserT st r e Word -- | Parse any Int (native size). anyInt :: ParserT st r e Int -- | Parse any Word16 (little-endian). anyWord16le :: ParserT st r e Word16 -- | Parse any Word16 (big-endian). anyWord16be :: ParserT st r e Word16 -- | Parse any Word32 (little-endian). anyWord32le :: ParserT st r e Word32 -- | Parse any Word32 (big-endian). anyWord32be :: ParserT st r e Word32 -- | Parse any Word64 (little-endian). anyWord64le :: ParserT st r e Word64 -- | Parse any Word64 (big-endian). anyWord64be :: ParserT st r e Word64 -- | Parse any Int16 (little-endian). anyInt16le :: ParserT st r e Int16 -- | Parse any Int16 (big-endian). anyInt16be :: ParserT st r e Int16 -- | Parse any Int32 (little-endian). anyInt32le :: ParserT st r e Int32 -- | Parse any Int32 (big-endian). anyInt32be :: ParserT st r e Int32 -- | Parse any Int64 (little-endian). anyInt64le :: ParserT st r e Int64 -- | Parse any Int64 (big-endian). anyInt64be :: ParserT st r e Int64 -- | Read the next 1 byte and assert its value as a Word8. word8 :: Word8 -> ParserT st r e () -- | Parse any Word8 (CPS). withAnyWord8 :: (Word8 -> ParserT st r e ret) -> ParserT st r e ret -- | Parse any Word16 (native byte order) (CPS). withAnyWord16 :: (Word16 -> ParserT st r e ret) -> ParserT st r e ret -- | Parse any Word32 (native byte order) (CPS). withAnyWord32 :: (Word32 -> ParserT st r e ret) -> ParserT st r e ret -- | Parse any Word64 (native byte order) (CPS). withAnyWord64 :: (Word64 -> ParserT st r e ret) -> ParserT st r e ret -- | Parse any Int8 (CPS). withAnyInt8 :: (Int8 -> ParserT st r e ret) -> ParserT st r e ret -- | Parse any Int16 (native byte order) (CPS). withAnyInt16 :: (Int16 -> ParserT st r e ret) -> ParserT st r e ret -- | Parse any Int32 (native byte order) (CPS). withAnyInt32 :: (Int32 -> ParserT st r e ret) -> ParserT st r e ret -- | Parse any Int64 (native byte order) (CPS). withAnyInt64 :: (Int64 -> ParserT st r e ret) -> ParserT st r e ret -- | Parse any Word (native size) (CPS). withAnyWord :: (Word -> ParserT st r e ret) -> ParserT st r e ret -- | Parse any Int (native size) (CPS). withAnyInt :: (Int -> ParserT st r e ret) -> ParserT st r e ret -- | Unsafely parse any Word8, without asserting the input is -- non-empty. -- -- The caller must guarantee that the input has enough bytes. anyWord8Unsafe :: ParserT st r e Word8 -- | Unsafely read the next 1 byte and assert its value as a Word8. -- -- The caller must guarantee that the input has enough bytes. word8Unsafe :: Word8 -> ParserT st r e () -- | Unsafely read the next 2 bytes and assert their value as a -- Word16 (native byte order). -- -- The caller must guarantee that the input has enough bytes. word16Unsafe :: Word16 -> ParserT st r e () -- | Unsafely read the next 4 bytes and assert their value as a -- Word32. (native byte order). -- -- The caller must guarantee that the input has enough bytes. word32Unsafe :: Word32 -> ParserT st r e () -- | Unsafely read the next 8 bytes and assert their value as a -- Word64. (native byte order). -- -- The caller must guarantee that the input has enough bytes. word64Unsafe :: Word64 -> ParserT st r e () -- | Helper for defining CPS parsers for types of a constant byte size -- (i.e. machine integers). -- -- Call this with an indexXYZOffAddr primop (e.g. -- indexWord8OffAddr) and the size in bytes of the type you're -- parsing. withAnySized# :: Int# -> (Addr# -> Int# -> a) -> (a -> ParserT st r e ret) -> ParserT st r e ret -- | Unsafe helper for defining CPS parsers for types of a constant byte -- size (i.e. machine integers). -- -- Is really just syntactic sugar for applying the given parser and -- shifting the buffer along. -- -- The caller must guarantee that the input has enough bytes. withAnySizedUnsafe# :: Int# -> (Addr# -> Int# -> a) -> (a -> ParserT st r e ret) -> ParserT st r e ret -- | Unsafe helper for defining parsers for types of a constant byte size -- (i.e. machine integers) which assert the parsed value's... value. -- -- Call this with an indexXYZOffAddr primop (e.g. -- indexWord8OffAddr), the size in bytes of the type you're -- parsing, and the expected value to test the parsed value against. -- -- The caller must guarantee that the input has enough bytes. sizedUnsafe# :: Eq a => Int# -> (Addr# -> Int# -> a) -> a -> ParserT st r e () -- | Bytestring parsers. -- -- Module dependency complications prevent us from placing these in -- FlatParse.Stateful.Base. module FlatParse.Stateful.Bytes -- | Read a sequence of bytes. This is a template function, you can use it -- as $(bytes [3, 4, 5]), for example, and the splice has type -- Parser e (). For a non-TH variant see byteString. bytes :: [Word] -> Q Exp -- | Template function, creates a Parser e () which unsafely -- parses a given sequence of bytes. -- -- The caller must guarantee that the input has enough bytes. bytesUnsafe :: [Word] -> Q Exp -- | Unsafe, highly dangerous parsing primitives using Addr#. -- -- Ensure to read the documentation before using any definitions from -- this module. -- -- This module exports primitives useful for efficiently parsing binary -- files that store data using an internal index. -- -- Often, such indices describes records using a starting offset and a -- length. Offsets are often relative to the file start, or some dynamic -- address in the file. This way, individual records can be read out -- efficiently (much faster than opening lots of small files!). -- -- We may parse these in-place efficiently by adding record offsets to a -- base memory address somewhere in the input. This is also extremely -- unsafe, and easy to get catastrophically wrong. Thus, we provide as -- much utility as reasonable to enable performing such parsing safely. -- (That means CPS functions.) -- -- Note that all definitions here should be considered unsafe. Any -- Int# is not checked for positivity. You must perform any -- necessary checks when you obtain your offsets and lengths as -- Int#. Failure to do so may result in undefined behaviour. module FlatParse.Stateful.Addr -- | Run a parser, passing it the current address the parser is at. -- -- Useful for parsing offset-based data tables. For example, you may use -- this to save the base address to use together with various relative -- offsets. withAddr# :: (Addr# -> ParserT st r e a) -> ParserT st r e a -- | takeOffAddr# addr# offset# len# moves to addr#, -- skips offset# bytes, reads len# bytes into a -- ByteString, and restores the original address. -- -- The Addr# should be from withAddr#. -- -- Useful for parsing offset-based data tables. Ex: Your file contains an -- index storing (OFFSET, LENGTH) entries where the offset is -- the byte position in the file. Begin with withAddr# $ -- tableBase# -> ..., then read each entry like -- takeOffAddr# tableBase# OFFSET LENGTH. -- -- Fails if you attempt to read outside the input. -- -- Undefined behaviour if offset# or len# is negative. -- -- Name adopted from the similar-ish indexXOffAddr# primops. takeOffAddr# :: Addr# -> Int# -> Int# -> ParserT st r e ByteString -- | withOffAddr# addr# offset# p moves to addr#, skips -- offset# bytes, then runs the given parser p. -- -- The Addr# should be from withAddr#. -- -- Fails if you attempt to read outside the input. -- -- Undefined behaviour if offset# is negative. -- -- Name adopted from the similar-ish indexXOffAddr# primops. withOffAddr# :: Addr# -> Int# -> ParserT st r e a -> ParserT st r e a -- | lookahead, but specify the address to lookahead from. -- -- The Addr# should be from withAddr#. lookaheadFromAddr# :: Addr# -> ParserT st r e a -> ParserT st r e a -- | Run a parser at the given address. -- -- The Addr# should be from withAddr#. -- -- This is a highly internal function -- you likely want -- lookaheadFromAddr#, which will reset the address after running -- the parser. atAddr# :: Addr# -> ParserT st r e a -> ParserT st r e a -- | Efficient literal branching using Template Haskell. module FlatParse.Stateful.Switch -- | This is a template function which makes it possible to branch on a -- collection of string literals in an efficient way. By using -- switch, such branching is compiled to a trie of primitive -- parsing operations, which has optimized control flow, vectorized reads -- and grouped checking for needed input bytes. -- -- The syntax is slightly magical, it overloads the usual case -- expression. An example: -- --
-- $(switch [| case _ of -- "foo" -> pure True -- "bar" -> pure False |]) ---- -- The underscore is mandatory in case _ of. Each branch must be -- a string literal, but optionally we may have a default case, like in -- --
-- $(switch [| case _ of -- "foo" -> pure 10 -- "bar" -> pure 20 -- _ -> pure 30 |]) ---- -- All case right hand sides must be parsers with the same type. That -- type is also the type of the whole switch expression. -- -- A switch has longest match semantics, and the order of cases -- does not matter, except for the default case, which may only appear as -- the last case. -- -- If a switch does not have a default case, and no case matches -- the input, then it returns with failure, without having consumed any -- input. A fallthrough to the default case also does not consume any -- input. switch :: Q Exp -> Q Exp -- | Switch expression with an optional first argument for performing a -- post-processing action after every successful branch matching. For -- example, if we have ws :: ParserT st r e () for a whitespace -- parser, we might want to consume whitespace after matching on any of -- the switch cases. For that case, we can define a "lexeme" version of -- switch as follows. -- --
-- switch' :: Q Exp -> Q Exp -- switch' = switchWithPost (Just [| ws |]) ---- -- Note that this switch' function cannot be used in the same -- module it's defined in, because of the stage restriction of Template -- Haskell. switchWithPost :: Maybe (Q Exp) -> Q Exp -> Q Exp -- | Version of switchWithPost without syntactic sugar. The second -- argument is the list of cases, the third is the default case. rawSwitchWithPost :: Maybe (Q Exp) -> [(String, Q Exp)] -> Maybe (Q Exp) -> Q Exp -- | Parsers and textual data (UTF-8, ASCII). module FlatParse.Stateful.Text -- | Parse a UTF-8 character literal. This is a template function, you can -- use it as $(char 'x'), for example, and the splice in this -- case has type Parser e (). char :: Char -> Q Exp -- | Parse a UTF-8 string literal. This is a template function, you can use -- it as $(string "foo"), for example, and the splice has type -- Parser e (). string :: String -> Q Exp -- | Parse any single Unicode character encoded using UTF-8 as a -- Char. anyChar :: ParserT st r e Char -- | Skip any single Unicode character encoded using UTF-8. skipAnyChar :: ParserT st r e () -- | Parse a UTF-8 Char for which a predicate holds. satisfy :: (Char -> Bool) -> ParserT st r e Char -- | Skip a UTF-8 Char for which a predicate holds. skipSatisfy :: (Char -> Bool) -> ParserT st r e () -- | This is a variant of satisfy which allows more optimization. We -- can pick four testing functions for the four cases for the possible -- number of bytes in the UTF-8 character. So in fusedSatisfy f1 f2 -- f3 f4, if we read a one-byte character, the result is scrutinized -- with f1, for two-bytes, with f2, and so on. This can -- result in dramatic lexing speedups. -- -- For example, if we want to accept any letter, the naive solution would -- be to use isLetter, but this accesses a large lookup table of -- Unicode character classes. We can do better with fusedSatisfy -- isLatinLetter isLetter isLetter isLetter, since here the -- isLatinLetter is inlined into the UTF-8 decoding, and it -- probably handles a great majority of all cases without accessing the -- character table. fusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> ParserT st r e Char -- | Skipping variant of fusedSatisfy. skipFusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> ParserT st r e () -- | Parse the rest of the current line as a String. Assumes UTF-8 -- encoding, throws an error if the encoding is invalid. takeLine :: ParserT st r e String -- | Take the rest of the input as a String. Assumes UTF-8 encoding. takeRestString :: ParserT st r e String -- | Parse any single ASCII character (a single byte) as a Char. -- -- More efficient than anyChar for ASCII-only input. anyAsciiChar :: ParserT st r e Char -- | Skip any single ASCII character (a single byte). -- -- More efficient than anyChar_ for ASCII-only input. skipAnyAsciiChar :: ParserT st r e () -- | Parse an ASCII Char for which a predicate holds. -- -- Assumption: the predicate must only return True for ASCII-range -- characters. Otherwise this function might read a 128-255 range byte, -- thereby breaking UTF-8 decoding. satisfyAscii :: (Char -> Bool) -> ParserT st r e Char -- | Skip an ASCII Char for which a predicate holds. Assumption: the -- predicate must only return True for ASCII-range characters. skipSatisfyAscii :: (Char -> Bool) -> ParserT st r e () -- | Parse a non-empty ASCII decimal digit sequence as a Word. Fails -- on overflow. anyAsciiDecimalWord :: ParserT st r e Word -- | Parse a non-empty ASCII decimal digit sequence as a positive -- Int. Fails on overflow. anyAsciiDecimalInt :: ParserT st r e Int -- | Parse a non-empty ASCII decimal digit sequence as a positive -- Integer. anyAsciiDecimalInteger :: ParserT st r e Integer -- | Parse a non-empty, case-insensitive ASCII hexadecimal digit sequence -- as a Word. Fails on overflow. anyAsciiHexWord :: ParserT st r e Word -- | Parse a non-empty, case-insensitive ASCII hexadecimal digit sequence -- as a positive Int. Fails on overflow. anyAsciiHexInt :: ParserT st r e Int -- | Parse the rest of the current line as a String, but restore the -- parsing state. Assumes UTF-8 encoding. This can be used for debugging. traceLine :: ParserT st r e String -- | Get the rest of the input as a String, but restore the parsing -- state. Assumes UTF-8 encoding. This can be used for debugging. traceRest :: ParserT st r e String -- | Parser supporting a custom reader environment, custom error types and -- an Int state. A common use case of the Int state is to -- keep track of column numbers to implement indentation-sensitive -- parsers. module FlatParse.Stateful -- | ParserT st r e a is a parser with a state token type -- st, a reader environment r, an error type e -- and a return type a. The different state token types support -- different embedded effects; see Parser, ParserIO and -- ParserST below. newtype ParserT (st :: ZeroBitType) r e a ParserT :: (ForeignPtrContents -> r -> Addr# -> Addr# -> Int# -> st -> Res# st e a) -> ParserT (st :: ZeroBitType) r e a [runParserT#] :: ParserT (st :: ZeroBitType) r e a -> ForeignPtrContents -> r -> Addr# -> Addr# -> Int# -> st -> Res# st e a -- | The type of pure parsers. type Parser = ParserT PureMode -- | The type of parsers which can embed IO actions. type ParserIO = ParserT IOMode -- | The type of parsers which can embed ST actions. type ParserST s = ParserT (STMode s) -- | Higher-level boxed data type for parsing results. data Result e a -- | Contains return value, last Int state, unconsumed input. OK :: a -> Int -> !ByteString -> Result e a -- | Recoverable-by-default failure. Fail :: Result e a -- | Unrecoverble-by-default error. Err :: !e -> Result e a -- | Run a pure parser. The Int argument is the initial state. runParser :: Parser r e a -> r -> Int -> ByteString -> Result e a -- | Run a parser on a String, converting it to the corresponding -- UTF-8 bytes. The Int argument is the initial state. -- -- Reminder: OverloadedStrings for ByteString does not -- yield a valid UTF-8 encoding! For non-ASCII ByteString literal -- input, use this wrapper or convert your input using -- strToUtf8. runParserUtf8 :: Parser r e a -> r -> Int -> String -> Result e a -- | Run an IO-based parser. The Int argument is the initial -- state. runParserIO :: ParserIO r e a -> r -> Int -> ByteString -> IO (Result e a) -- | Run an ST-based parser. The Int argument is the initial -- state. runParserST :: ParserST s r e a -> r -> Int -> ByteString -> ST s (Result e a) -- | Run a ParserST inside any parser. embedParserST :: forall s r e a. (forall s. ParserST s r e a) -> ParserT s r e a -- | Run a pure Parser inside any parser. embedParser :: forall s r e a. Parser r e a -> ParserT s r e a -- | Primitive parser result wrapped with a state token. -- -- You should rarely need to manipulate values of this type directly. Use -- the provided bidirectional pattern synonyms OK#, Fail# -- and Err#. type Res# (st :: ZeroBitType) e a = (# st, ResI# e a #) -- | Res# constructor for a successful parse. Contains the return -- value, a pointer to the rest of the input buffer, and the next -- Int state, plus a state token. pattern OK# :: (st :: ZeroBitType) -> a -> Addr# -> Int# -> Res# st e a -- | Res# constructor for errors which are by default -- non-recoverable. Contains the error, plus a state token. pattern Err# :: (st :: ZeroBitType) -> e -> Res# st e a -- | Res# constructor for recoverable failure. Contains only a state -- token. pattern Fail# :: (st :: ZeroBitType) -> Res# st e a -- | Primitive parser result. type ResI# e a = (# (# a, Addr#, Int# #) | (# #) | (# e #) #) -- | Run an ST action in a ParserST. liftST :: ST s a -> ParserST s r e a -- | Query the environment. ask :: ParserT st r e r -- | Run a parser in a modified environment. local :: (r -> r) -> ParserT st r e a -> ParserT st r e a -- | Query the Int state. get :: ParserT st r e Int -- | Write the Int state. put :: Int -> ParserT st r e () -- | Modify the Int state. modify :: (Int -> Int) -> ParserT st r e () -- | Convert an UTF8-encoded String to a ByteString. strToUtf8 :: String -> ByteString -- | Convert a ByteString to an UTF8-encoded String. utf8ToStr :: ByteString -> String -- |
-- isDigit c = '0' <= c && c <= '9' --isDigit :: Char -> Bool -- |
-- isLatinLetter c = ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')
--
isLatinLetter :: Char -> Bool
-- |
-- isGreekLetter c = ('Α' <= c && c <= 'Ω') || ('α' <= c && c <= 'ω')
--
isGreekLetter :: Char -> Bool
-- | Succeed if the input is empty.
eof :: ParserT st r e ()
-- | Read n bytes as a ByteString. Fails if fewer than
-- n bytes are available.
--
-- Throws a runtime error if given a negative integer.
--
-- This does no copying. The ByteString returned is a "slice" of
-- the input, and will keep it alive. To avoid this, use copy on
-- the output.
take :: Int -> ParserT st r e ByteString
-- | Read n# bytes as a ByteString. Fails if fewer than
-- n# bytes are available.
--
-- Throws a runtime error if given a negative integer.
--
-- This does no copying. The ByteString returned is a "slice" of
-- the input, and will keep it alive. To avoid this, use copy on
-- the output.
take# :: Int# -> ParserT st r e ByteString
-- | Read i# bytes as a ByteString. Fails if newer than
-- i# bytes are available.
--
-- Undefined behaviour if given a negative integer.
--
-- This does no copying. The ByteString returned is a "slice" of
-- the input, and will keep it alive. To avoid this, use copy on
-- the output.
takeUnsafe# :: Int# -> ParserT st r e ByteString
-- | Consume the rest of the input. May return the empty bytestring.
--
-- This does no copying. The ByteString returned is a "slice" of
-- the input, and will keep it alive. To avoid this, use copy on
-- the output.
takeRest :: ParserT st r e ByteString
-- | Skip forward n bytes. Fails if fewer than n bytes
-- are available.
--
-- Throws a runtime error if given a negative integer.
skip :: Int -> ParserT st r e ()
-- | Skip forward n bytes. Fails if fewer than n bytes
-- are available.
--
-- Throws a runtime error if given a negative integer.
skip# :: Int# -> ParserT st r e ()
-- | Go back i bytes in the input. Takes a positive integer.
--
-- Extremely unsafe. Makes no checks. Almost certainly a Bad Idea.
skipBack :: Int -> ParserT st r e ()
-- | Go back i# bytes in the input. Takes a positive integer.
--
-- Extremely unsafe. Makes no checks. Almost certainly a Bad Idea.
skipBack# :: Int# -> ParserT st r e ()
-- | Skip forward n# bytes and run the given parser. Fails if
-- fewer than n# bytes are available.
--
-- Throws a runtime error if given a negative integer.
atSkip# :: Int# -> ParserT st r e ret -> ParserT st r e ret
-- | Skip forward i# bytes and run the given parser. Fails if
-- fewer than i bytes are available.
--
-- Undefined behaviour if given a negative integer.
atSkipUnsafe# :: Int# -> ParserT st r e ret -> ParserT st r e ret
-- | Read a sequence of bytes. This is a template function, you can use it
-- as $(bytes [3, 4, 5]), for example, and the splice has type
-- Parser e (). For a non-TH variant see byteString.
bytes :: [Word] -> Q Exp
-- | Template function, creates a Parser e () which unsafely
-- parses a given sequence of bytes.
--
-- The caller must guarantee that the input has enough bytes.
bytesUnsafe :: [Word] -> Q Exp
-- | Parse a given ByteString. If the bytestring is statically
-- known, consider using bytes instead.
byteString :: ByteString -> ParserT st r e ()
-- | Read a null-terminated bytestring (a C-style string).
--
-- Consumes the null terminator.
anyCString :: ParserT st r e ByteString
-- | Read a protobuf-style varint into a positive Int.
--
-- protobuf-style varints are byte-aligned. For each byte, the lower 7
-- bits are data and the MSB indicates if there are further bytes. Once
-- fully parsed, the 7-bit payloads are concatenated and interpreted as a
-- little-endian unsigned integer.
--
-- Fails if the varint exceeds the positive Int range.
--
-- Really, these are varnats. They also match with the LEB128 varint
-- encoding.
--
-- protobuf encodes negatives in unsigned integers using zigzag encoding.
-- See the fromZigzag family of functions for this
-- functionality.
--
-- Further reading:
-- https://developers.google.com/protocol-buffers/docs/encoding#varints
anyVarintProtobuf :: ParserT st r e Int
-- | Choose between two parsers. If the first parser fails, try the second
-- one, but if the first one throws an error, propagate the error. This
-- operation can arbitrarily backtrack.
--
-- Note: this exported operator has different fixity than the same
-- operator in Applicative. Hide this operator if you want to use
-- the Alternative version.
(<|>) :: ParserT st r e a -> ParserT st r e a -> ParserT st r e a
infixr 6 <|>
-- | Branch on a parser: if the first argument succeeds, continue with the
-- second, else with the third. This can produce slightly more efficient
-- code than (<|>). Moreover, ḃranch does not
-- backtrack from the true/false cases.
branch :: ParserT st r e a -> ParserT st r e b -> ParserT st r e b -> ParserT st r e b
-- | Succeed if the first parser succeeds and the second one fails.
notFollowedBy :: ParserT st r e a -> ParserT st r e b -> ParserT st r e a
-- | An analogue of the list foldl function: first parse a
-- b, then parse zero or more a-s, and combine the
-- results in a left-nested way by the b -> a -> b
-- function. Note: this is not the usual chainl function from the
-- parsec libraries!
chainl :: (b -> a -> b) -> ParserT st r e b -> ParserT st r e a -> ParserT st r e b
-- | An analogue of the list foldr function: parse zero or more
-- a-s, terminated by a b, and combine the results in a
-- right-nested way using the a -> b -> b function. Note:
-- this is not the usual chainr function from the parsec
-- libraries!
chainr :: (a -> b -> b) -> ParserT st r e a -> ParserT st r e b -> ParserT st r e b
-- | Save the parsing state, then run a parser, then restore the state.
lookahead :: ParserT st r e a -> ParserT st r e a
-- | Assert that there are at least n bytes remaining.
--
-- Undefined behaviour if given a negative integer.
ensure :: Int -> ParserT st r e ()
-- | Assert that there are at least n# bytes remaining.
--
-- Undefined behaviour if given a negative integer.
ensure# :: Int# -> ParserT st r e ()
-- | Assert that there are at least n# bytes remaining (CPS).
--
-- Undefined behaviour if given a negative integer.
withEnsure :: Int -> ParserT st r e ret -> ParserT st r e ret
-- | Assert that there is at least 1 byte remaining (CPS).
--
-- Undefined behaviour if given a negative integer.
withEnsure1 :: ParserT st r e ret -> ParserT st r e ret
-- | Assert that there are at least n# bytes remaining (CPS).
--
-- Undefined behaviour if given a negative integer.
withEnsure# :: Int# -> ParserT st r e ret -> ParserT st r e ret
-- | isolate n p runs the parser p isolated to the next
-- n bytes. All isolated bytes must be consumed.
--
-- Throws a runtime error if given a negative integer.
isolate :: Int -> ParserT st r e a -> ParserT st r e a
-- | Isolate the given parser up to (excluding) the next null byte.
--
-- Like isolate, all isolated bytes must be consumed. The null
-- byte is consumed afterwards.
--
-- Useful for defining parsers for null-terminated data.
isolateToNextNull :: ParserT st r e a -> ParserT st r e a
-- | isolate# n# p runs the parser p isolated to the next
-- n# bytes. All isolated bytes must be consumed.
--
-- Throws a runtime error if given a negative integer.
isolate# :: Int# -> ParserT st r e a -> ParserT st r e a
-- | isolateUnsafe# i# p runs the parser p isolated to
-- the next i# bytes. All isolated bytes must be consumed.
--
-- Undefined behaviour if given a negative integer.
isolateUnsafe# :: Int# -> ParserT st r e ret -> ParserT st r e ret
-- | This is a template function which makes it possible to branch on a
-- collection of string literals in an efficient way. By using
-- switch, such branching is compiled to a trie of primitive
-- parsing operations, which has optimized control flow, vectorized reads
-- and grouped checking for needed input bytes.
--
-- The syntax is slightly magical, it overloads the usual case
-- expression. An example:
--
-- -- $(switch [| case _ of -- "foo" -> pure True -- "bar" -> pure False |]) ---- -- The underscore is mandatory in case _ of. Each branch must be -- a string literal, but optionally we may have a default case, like in -- --
-- $(switch [| case _ of -- "foo" -> pure 10 -- "bar" -> pure 20 -- _ -> pure 30 |]) ---- -- All case right hand sides must be parsers with the same type. That -- type is also the type of the whole switch expression. -- -- A switch has longest match semantics, and the order of cases -- does not matter, except for the default case, which may only appear as -- the last case. -- -- If a switch does not have a default case, and no case matches -- the input, then it returns with failure, without having consumed any -- input. A fallthrough to the default case also does not consume any -- input. switch :: Q Exp -> Q Exp -- | Switch expression with an optional first argument for performing a -- post-processing action after every successful branch matching. For -- example, if we have ws :: ParserT st r e () for a whitespace -- parser, we might want to consume whitespace after matching on any of -- the switch cases. For that case, we can define a "lexeme" version of -- switch as follows. -- --
-- switch' :: Q Exp -> Q Exp -- switch' = switchWithPost (Just [| ws |]) ---- -- Note that this switch' function cannot be used in the same -- module it's defined in, because of the stage restriction of Template -- Haskell. switchWithPost :: Maybe (Q Exp) -> Q Exp -> Q Exp -- | Version of switchWithPost without syntactic sugar. The second -- argument is the list of cases, the third is the default case. rawSwitchWithPost :: Maybe (Q Exp) -> [(String, Q Exp)] -> Maybe (Q Exp) -> Q Exp -- | Zero or more. many :: Alternative f => f a -> f [a] -- | Skip a parser zero or more times. skipMany :: ParserT st r e a -> ParserT st r e () -- | One or more. some :: Alternative f => f a -> f [a] -- | Skip a parser one or more times. skipSome :: ParserT st r e a -> ParserT st r e () -- | The identity of <|> empty :: Alternative f => f a -- | The failing parser. By default, parser choice (<|>) -- arbitrarily backtracks on parser failure. This is a synonym for -- empty. failed :: ParserT st r e a -- | Convert a parsing error into failure. try :: ParserT st r e a -> ParserT st r e a -- | Throw a parsing error. By default, parser choice (<|>) -- can't backtrack on parser error. Use try to convert an error to -- a recoverable failure. err :: e -> ParserT st r e a -- | Run the parser, if an error is thrown, handle it with the given -- function. withError :: ParserT st r e b -> (e -> ParserT st r e b) -> ParserT st r e b -- | Run the parser, and handle each possible result. withAnyResult :: ParserT st r t a -> (a -> ParserT st r e b) -> ParserT st r e b -> (t -> ParserT st r e b) -> ParserT st r e b -- | Convert a parsing failure to a success. fails :: ParserT st r e a -> ParserT st r e () -- | Convert a parsing failure to an error. cut :: ParserT st r e a -> e -> ParserT st r e a -- | Run the parser, if we get a failure, throw the given error, but if we -- get an error, merge the inner and the newly given errors using the -- e -> e -> e function. This can be useful for -- implementing parsing errors which may propagate hints or accummulate -- contextual information. cutting :: ParserT st r e a -> e -> (e -> e -> e) -> ParserT st r e a -- | Convert a parsing failure to a Maybe. If possible, use -- withOption instead. optional :: ParserT st r e a -> ParserT st r e (Maybe a) -- | Convert a parsing failure to a (). optional_ :: ParserT st r e a -> ParserT st r e () -- | CPS'd version of optional. This is usually more efficient, -- since it gets rid of the extra Maybe allocation. withOption :: ParserT st r e a -> (a -> ParserT st r e ret) -> ParserT st r e ret -> ParserT st r e ret -- | Byte offset counted backwards from the end of the buffer. Note: the -- Ord instance for Pos considers the earlier positions to -- be smaller. newtype Pos Pos :: Int -> Pos [unPos] :: Pos -> Int -- | The end of the input. endPos :: Pos -- | Very unsafe conversion between a primitive address and a position. The -- first argument points to the end of the buffer, the second argument is -- being converted. addrToPos# :: Addr# -> Addr# -> Pos -- | Very unsafe conversion between a primitive address and a position. The -- first argument points to the end of the buffer. posToAddr# :: Addr# -> Pos -> Addr# -- | A pair of positions. data Span Span :: !Pos -> !Pos -> Span -- | Slice into a ByteString using a Span. The result is -- invalid if the Span is not a valid slice of the first argument. unsafeSlice :: ByteString -> Span -> ByteString -- | Get the current position in the input. getPos :: ParserT st r e Pos -- | Set the input position. -- -- Warning: this can result in crashes if the position points outside the -- current buffer. It is always safe to setPos values which came -- from getPos with the current input. setPos :: Pos -> ParserT st r e () -- | Return the consumed span of a parser. Use withSpan if possible -- for better efficiency. spanOf :: ParserT st r e a -> ParserT st r e Span -- | Bind the result together with the span of the result. CPS'd version of -- spanOf for better unboxing. withSpan :: ParserT st r e a -> (a -> Span -> ParserT st r e b) -> ParserT st r e b -- | Return the ByteString consumed by a parser. Note: it's more -- efficient to use spanOf and withSpan instead. byteStringOf :: ParserT st r e a -> ParserT st r e ByteString -- | CPS'd version of byteStringOf. Can be more efficient, because -- the result is more eagerly unboxed by GHC. It's more efficient to use -- spanOf or withSpan instead. withByteString :: ParserT st r e a -> (a -> ByteString -> ParserT st r e b) -> ParserT st r e b -- | Run a parser in a given input Span. -- -- The input position and the parser state is restored after the parser -- is finished, so inSpan does not consume input and has no side -- effect. -- -- Warning: this operation may crash if the given span points outside the -- current parsing buffer. It's always safe to use inSpan if the -- Span comes from a previous withSpan or spanOf -- call on the current input. inSpan :: Span -> ParserT st r e a -> ParserT st r e a -- | Check whether a Pos points into a ByteString. validPos :: ByteString -> Pos -> Bool -- | Compute corresponding line and column numbers (both starting from 0) -- for each Pos in a list, assuming UTF8 encoding. Throw an error -- on invalid positions. Note: computing lines and columns may traverse -- the ByteString, but it traverses it only once regardless of the -- length of the position list. posLineCols :: ByteString -> [Pos] -> [(Int, Int)] -- | Create a Pos from a line and column number. Throws an error on -- out-of-bounds line and column numbers. mkPos :: ByteString -> (Int, Int) -> Pos -- | Parse a UTF-8 character literal. This is a template function, you can -- use it as $(char 'x'), for example, and the splice in this -- case has type Parser e (). char :: Char -> Q Exp -- | Parse a UTF-8 string literal. This is a template function, you can use -- it as $(string "foo"), for example, and the splice has type -- Parser e (). string :: String -> Q Exp -- | Parse any single Unicode character encoded using UTF-8 as a -- Char. anyChar :: ParserT st r e Char -- | Skip any single Unicode character encoded using UTF-8. skipAnyChar :: ParserT st r e () -- | Parse a UTF-8 Char for which a predicate holds. satisfy :: (Char -> Bool) -> ParserT st r e Char -- | Skip a UTF-8 Char for which a predicate holds. skipSatisfy :: (Char -> Bool) -> ParserT st r e () -- | This is a variant of satisfy which allows more optimization. We -- can pick four testing functions for the four cases for the possible -- number of bytes in the UTF-8 character. So in fusedSatisfy f1 f2 -- f3 f4, if we read a one-byte character, the result is scrutinized -- with f1, for two-bytes, with f2, and so on. This can -- result in dramatic lexing speedups. -- -- For example, if we want to accept any letter, the naive solution would -- be to use isLetter, but this accesses a large lookup table of -- Unicode character classes. We can do better with fusedSatisfy -- isLatinLetter isLetter isLetter isLetter, since here the -- isLatinLetter is inlined into the UTF-8 decoding, and it -- probably handles a great majority of all cases without accessing the -- character table. fusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> ParserT st r e Char -- | Skipping variant of fusedSatisfy. skipFusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> ParserT st r e () -- | Parse the rest of the current line as a String. Assumes UTF-8 -- encoding, throws an error if the encoding is invalid. takeLine :: ParserT st r e String -- | Take the rest of the input as a String. Assumes UTF-8 encoding. takeRestString :: ParserT st r e String -- | Break an UTF-8-coded ByteString to lines. Throws an error on -- invalid input. This is mostly useful for grabbing specific source -- lines for displaying error messages. linesUtf8 :: ByteString -> [String] -- | Parse any single ASCII character (a single byte) as a Char. -- -- More efficient than anyChar for ASCII-only input. anyAsciiChar :: ParserT st r e Char -- | Skip any single ASCII character (a single byte). -- -- More efficient than anyChar_ for ASCII-only input. skipAnyAsciiChar :: ParserT st r e () -- | Parse an ASCII Char for which a predicate holds. -- -- Assumption: the predicate must only return True for ASCII-range -- characters. Otherwise this function might read a 128-255 range byte, -- thereby breaking UTF-8 decoding. satisfyAscii :: (Char -> Bool) -> ParserT st r e Char -- | Skip an ASCII Char for which a predicate holds. Assumption: the -- predicate must only return True for ASCII-range characters. skipSatisfyAscii :: (Char -> Bool) -> ParserT st r e () -- | Parse a non-empty ASCII decimal digit sequence as a Word. Fails -- on overflow. anyAsciiDecimalWord :: ParserT st r e Word -- | Parse a non-empty ASCII decimal digit sequence as a positive -- Int. Fails on overflow. anyAsciiDecimalInt :: ParserT st r e Int -- | Parse a non-empty ASCII decimal digit sequence as a positive -- Integer. anyAsciiDecimalInteger :: ParserT st r e Integer -- | Parse a non-empty, case-insensitive ASCII hexadecimal digit sequence -- as a Word. Fails on overflow. anyAsciiHexWord :: ParserT st r e Word -- | Parse a non-empty, case-insensitive ASCII hexadecimal digit sequence -- as a positive Int. Fails on overflow. anyAsciiHexInt :: ParserT st r e Int -- | Parse the rest of the current line as a String, but restore the -- parsing state. Assumes UTF-8 encoding. This can be used for debugging. traceLine :: ParserT st r e String -- | Get the rest of the input as a String, but restore the parsing -- state. Assumes UTF-8 encoding. This can be used for debugging. traceRest :: ParserT st r e String -- | Create a ByteString from a Span. -- -- The result is invalid if the Span points outside the current -- buffer, or if the Span start is greater than the end position. unsafeSpanToByteString :: Span -> ParserT st r e ByteString -- | Embed an IO action in a ParserT. This is slightly safer than -- unsafePerformIO because it will sequenced correctly with -- respect to the surrounding actions, and its execution is guaranteed. unsafeLiftIO :: IO a -> ParserT st r e a -- | Read a null-terminated bytestring (a C-style string), where the -- bytestring is known to be null-terminated somewhere in the input. -- -- Highly unsafe. Unless you have a guarantee that the string will be -- null terminated before the input ends, use anyCString instead. -- Honestly, I'm not sure if this is a good function to define. But here -- it is. -- -- Fails on GHC versions older than 9.0, since we make use of the -- cstringLength# primop introduced in GHC 9.0, and we aren't very -- useful without it. -- -- Consumes the null terminator. anyCStringUnsafe :: ParserT st r e ByteString instance (GHC.Show.Show a, GHC.Show.Show e) => GHC.Show.Show (FlatParse.Stateful.Result e a) instance GHC.Base.Functor (FlatParse.Stateful.Result e)