-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | High-performance parsing from strict bytestrings -- -- Flatparse is a high-performance parsing library for strict -- bytestring input. See the README for more information: -- https://github.com/AndrasKovacs/flatparse. @package flatparse @version 0.5.1.0 module FlatParse.Common.Assorted shortInteger :: Int# -> Integer -- |
--   isDigit c = '0' <= c && c <= '9'
--   
isDigit :: Char -> Bool -- |
--   isLatinLetter c = ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')
--   
isLatinLetter :: Char -> Bool -- |
--   isGreekLetter c = ('Α' <= c && c <= 'Ω') || ('α' <= c && c <= 'ω')
--   
isGreekLetter :: Char -> Bool packBytes :: [Word] -> Word splitBytes :: [Word] -> ([Word], [Word]) charToBytes :: Char -> [Word] strToBytes :: String -> [Word] -- | Convert an UTF8-encoded String to a ByteString. strToUtf8 :: String -> ByteString -- | Convert a ByteString to an UTF8-encoded String. utf8ToStr :: ByteString -> String -- | Shortcut for 'indexCharOffAddr# addr# 0#'. derefChar8# :: Addr# -> Char# -- | Coerce a Word16 to Int16. word16ToInt16 :: Word16 -> Int16 -- | Coerce a Word32 to Int32. word32ToInt32 :: Word32 -> Int32 -- | Coerce a Word64 to Int64. word64ToInt64 :: Word64 -> Int64 -- | Assert for the given Int# that n >= 0. -- -- Throws a runtime error if given a negative integer. withPosInt# :: Int# -> r -> r -- | Unwrap the Int# from an Int and apply it to the given -- function. withIntUnwrap# :: (Int# -> r) -> Int -> r -- | Index of leftmost null byte, or (number of bytes in type) if not -- present. -- -- Adapted from Hacker's Delight 6-1. Useful in big-endian environments. zbytel :: (FiniteBits a, Num a) => a -> Int -- | bit mangling, returns 0 for inputs without a null byte -- -- Separating allows us to skip some index calculation if there was no -- null byte. zbytel'intermediate :: (FiniteBits a, Num a) => a -> a -- | bit mangling, turns intermediate value into an index -- -- Separating allows us to skip some index calculation if there was no -- null byte. zbytel'toIdx :: (FiniteBits a, Num a) => a -> Int -- | Index of rightmost null byte, or (number of bytes in type) if not -- present -- -- Adapted from Hacker's Delight 6-1. Useful in little-endian -- environments. zbyter :: (FiniteBits a, Num a) => a -> Int -- | bit mangling, returns 0 for inputs without a null byte -- -- Separating allows us to skip some index calculation if there was no -- null byte. zbyter'intermediate :: (FiniteBits a, Num a) => a -> a -- | bit mangling, turns intermediate value into an index -- -- Separating allows us to skip some index calculation if there was no -- null byte. zbyter'toIdx :: (FiniteBits a, Num a) => a -> Int -- | Exts compatibility wrapper. module FlatParse.Common.GHCExts module FlatParse.Common.Numbers -- | Parse a non-empty ASCII decimal digit sequence as a Word. Fails -- on overflow. anyAsciiDecimalWord# :: Addr# -> Addr# -> (# (# #) | (# Word#, Addr# #) #) -- | Parse a non-empty ASCII decimal digit sequence as a positive -- Int. Fails on overflow. anyAsciiDecimalInt# :: Addr# -> Addr# -> (# (# #) | (# Int#, Addr# #) #) anyAsciiDecimalWord_# :: Word# -> Addr# -> Addr# -> (# (# #) | (# Word#, Addr# #) #) -- | Parse a non-empty ASCII decimal digit sequence as a positive -- Int. May overflow. anyAsciiDecimalIntOverflow# :: Addr# -> Addr# -> (# (# #) | (# Int#, Addr# #) #) -- | Parse a non-empty ASCII decimal digit sequence as a positive -- Integer. anyAsciiDecimalInteger# :: ForeignPtrContents -> Addr# -> Addr# -> (# (# #) | (# Integer, Addr# #) #) -- | Parse a non-empty ASCII decimal digit sequence as a positive -- Int. May overflow. anyAsciiDecimalIntOverflow_# :: Int# -> Addr# -> Addr# -> (# Int#, Addr# #) -- | Parse a non-empty, case-insensitive ASCII hexadecimal digit sequence -- as a Word. Fails on overflow. anyAsciiHexWord# :: Addr# -> Addr# -> (# (# #) | (# Word#, Addr# #) #) -- | Parse a non-empty, case-insensitive ASCII hexadecimal digit sequence -- as a positive Int. Fails on overflow. anyAsciiHexInt# :: Addr# -> Addr# -> (# (# #) | (# Int#, Addr# #) #) anyAsciiHexWord_# :: Word# -> Addr# -> Addr# -> (# (# #) | (# Word#, Addr# #) #) fromZigzagNative :: Word -> Int fromZigzagNative# :: Word# -> Int# toZigzagNative :: Int -> Word toZigzagNative# :: Int# -> Word# toZigzagNative'# :: Word# -> Word# -- | protobuf style (LE, redundant, on continues) anyVarintProtobuf# :: Addr# -> Addr# -> (# (# #) | (# Int#, Addr#, Int# #) #) unI# :: Int -> Int# mul10# :: Int# -> Int# -- | Common low-level parser definitions. module FlatParse.Common.Parser type PureMode = Proxy# Void type IOMode = State# RealWorld type STMode s = State# s -- | Minimal parser definition. module FlatParse.Basic.Parser -- | ParserT st e a is a parser with a state token type -- st, an error type e and a return type a. -- The different state token types support different embedded effects; -- see Parser, ParserIO and ParserST below. newtype ParserT (st :: ZeroBitType) e a ParserT :: (ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a) -> ParserT (st :: ZeroBitType) e a [runParserT#] :: ParserT (st :: ZeroBitType) e a -> ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a -- | The type of pure parsers. type Parser = ParserT PureMode -- | The type of parsers which can embed IO actions. type ParserIO = ParserT IOMode -- | The type of parsers which can embed ST actions. type ParserST s = ParserT (STMode s) -- | Same as pure for ParserT except that it does not force -- the returned value. pureLazy :: a -> ParserT st e a -- | Primitive parser result wrapped with a state token. -- -- You should rarely need to manipulate values of this type directly. Use -- the provided bidirectional pattern synonyms OK#, Fail# -- and Err#. type Res# (st :: ZeroBitType) e a = (# st, ResI# e a #) -- | Res# constructor for a successful parse. Contains the return -- value and a pointer to the rest of the input buffer, plus a state -- token. pattern OK# :: (st :: ZeroBitType) -> a -> Addr# -> Res# st e a -- | Res# constructor for errors which are by default -- non-recoverable. Contains the error, plus a state token. pattern Err# :: (st :: ZeroBitType) -> e -> Res# st e a -- | Res# constructor for recoverable failure. Contains only a state -- token. pattern Fail# :: (st :: ZeroBitType) -> Res# st e a -- | Primitive parser result. type ResI# e a = (# (# a, Addr# #) | (# #) | (# e #) #) -- | Choose between two parsers. If the first parser fails, try the second -- one, but if the first one throws an error, propagate the error. This -- operation can arbitrarily backtrack. -- -- Note: this exported operator has different fixity than the same -- operator in Applicative. Hide this operator if you want to use -- the Alternative version. (<|>) :: ParserT st e a -> ParserT st e a -> ParserT st e a infixr 6 <|> instance Control.Monad.IO.Class.MonadIO (FlatParse.Basic.Parser.ParserIO e) instance GHC.Base.Functor (FlatParse.Basic.Parser.ParserT st e) instance GHC.Base.Applicative (FlatParse.Basic.Parser.ParserT st e) instance GHC.Base.Monad (FlatParse.Basic.Parser.ParserT st e) instance GHC.Base.Alternative (FlatParse.Basic.Parser.ParserT st e) instance GHC.Base.MonadPlus (FlatParse.Basic.Parser.ParserT st e) -- | Basic parser building blocks. module FlatParse.Basic.Base -- | Succeed if the input is empty. eof :: ParserT st e () -- | Read the given number of bytes as a ByteString. -- -- Throws a runtime error if given a negative integer. -- -- This does no copying. The ByteString returned is a "slice" of -- the input, and will keep it alive. To avoid this, use copy on -- the output. take :: Int -> ParserT st e ByteString -- | Read n# bytes as a ByteString. Fails if fewer than -- n# bytes are available. -- -- Throws a runtime error if given a negative integer. -- -- This does no copying. The ByteString returned is a "slice" of -- the input, and will keep it alive. To avoid this, use copy on -- the output. take# :: Int# -> ParserT st e ByteString -- | Read n# bytes as a ByteString. Fails if fewer than -- n# bytes are available. -- -- Undefined behaviour if given a negative integer. -- -- This does no copying. The ByteString returned is a "slice" of -- the input, and will keep it alive. To avoid this, use copy on -- the output. takeUnsafe# :: Int# -> ParserT st e ByteString -- | Consume the rest of the input. May return the empty bytestring. -- -- This does no copying. The ByteString returned is a "slice" of -- the input, and will keep it alive. To avoid this, use copy on -- the output. takeRest :: ParserT st e ByteString -- | Skip forward n bytes. Fails if fewer than n bytes -- are available. -- -- Throws a runtime error if given a negative integer. skip :: Int -> ParserT st e () -- | Skip forward n# bytes. Fails if fewer than n# bytes -- are available. -- -- Throws a runtime error if given a negative integer. skip# :: Int# -> ParserT st e () -- | Go back i bytes in the input. Takes a positive integer. -- -- Extremely unsafe. Makes no checks. Almost certainly a Bad Idea. skipBack :: Int -> ParserT st e () -- | Go back n# bytes. Takes a positive integer. -- -- Extremely unsafe. Makes no checks. Almost certainly a Bad Idea. skipBack# :: Int# -> ParserT st e () -- | Skip forward n# bytes and run the given parser. Fails if -- fewer than n# bytes are available. -- -- Throws a runtime error if given a negative integer. atSkip# :: Int# -> ParserT st e a -> ParserT st e a -- | Skip forward n bytes and run the given parser. Fails if fewer -- than n bytes are available. -- -- Undefined behaviour if given a negative integer. atSkipUnsafe# :: Int# -> ParserT st e r -> ParserT st e r -- | Branch on a parser: if the first argument succeeds, continue with the -- second, else with the third. This can produce slightly more efficient -- code than (<|>). Moreover, branch does not -- backtrack from the true/false cases. branch :: ParserT st e a -> ParserT st e b -> ParserT st e b -> ParserT st e b -- | Succeed if the first parser succeeds and the second one fails. notFollowedBy :: ParserT st e a -> ParserT st e b -> ParserT st e a -- | An analogue of the list foldl function: first parse a -- b, then parse zero or more a-s, and combine the -- results in a left-nested way by the b -> a -> b -- function. Note: this is not the usual chainl function from the -- parsec libraries! chainl :: (b -> a -> b) -> ParserT st e b -> ParserT st e a -> ParserT st e b -- | An analogue of the list foldr function: parse zero or more -- a-s, terminated by a b, and combine the results in a -- right-nested way using the a -> b -> b function. Note: -- this is not the usual chainr function from the parsec -- libraries! chainr :: (a -> b -> b) -> ParserT st e a -> ParserT st e b -> ParserT st e b -- | Save the parsing state, then run a parser, then restore the state. lookahead :: ParserT st e a -> ParserT st e a -- | Assert that there are at least n bytes remaining. -- -- Undefined behaviour if given a negative integer. ensure :: Int -> ParserT st e () -- | Assert that there are at least n# bytes remaining. -- -- Undefined behaviour if given a negative integer. ensure# :: Int# -> ParserT st e () -- | Assert that there are at least n# bytes remaining (CPS). -- -- Undefined behaviour if given a negative integer. withEnsure :: Int -> ParserT st e r -> ParserT st e r -- | Assert that there is at least 1 byte remaining (CPS). -- -- Undefined behaviour if given a negative integer. withEnsure1 :: ParserT st e r -> ParserT st e r -- | Assert that there are at least n# bytes remaining (CPS). -- -- Undefined behaviour if given a negative integer. withEnsure# :: Int# -> ParserT st e r -> ParserT st e r -- | isolate n p runs the parser p isolated to the next -- n bytes. All isolated bytes must be consumed. -- -- Throws a runtime error if given a negative integer. isolate :: Int -> ParserT st e a -> ParserT st e a -- | isolate# n# p runs the parser p isolated to the next -- n# bytes. All isolated bytes must be consumed. -- -- Throws a runtime error if given a negative integer. isolate# :: Int# -> ParserT st e a -> ParserT st e a -- | isolateUnsafe# n# p runs the parser p isolated to -- the next n# bytes. All isolated bytes must be consumed. -- -- Undefined behaviour if given a negative integer. isolateUnsafe# :: Int# -> ParserT st e a -> ParserT st e a -- | Skip a parser zero or more times. skipMany :: ParserT st e a -> ParserT st e () -- | Skip a parser one or more times. skipSome :: ParserT st e a -> ParserT st e () -- | The failing parser. By default, parser choice (<|>) -- arbitrarily backtracks on parser failure. failed :: ParserT st e a -- | Convert a parsing error into failure. try :: ParserT st e a -> ParserT st e a -- | Throw a parsing error. By default, parser choice (<|>) -- can't backtrack on parser error. Use try to convert an error to -- a recoverable failure. err :: e -> ParserT st e a -- | Run the parser, if an error is thrown, handle it with the given -- function. withError :: ParserT st e b -> (e -> ParserT st e b) -> ParserT st e b -- | Convert a parsing failure to a success. fails :: ParserT st e a -> ParserT st e () -- | Convert a parsing failure to an error. cut :: ParserT st e a -> e -> ParserT st e a -- | Run the parser, if we get a failure, throw the given error, but if we -- get an error, merge the inner and the newly given errors using the -- e -> e -> e function. This can be useful for -- implementing parsing errors which may propagate hints or accummulate -- contextual information. cutting :: ParserT st e a -> e -> (e -> e -> e) -> ParserT st e a -- | Convert a parsing failure to a Maybe. If possible, use -- withOption instead. optional :: ParserT st e a -> ParserT st e (Maybe a) -- | Convert a parsing failure to a (). optional_ :: ParserT st e a -> ParserT st e () -- | CPS'd version of optional. This is usually more efficient, -- since it gets rid of the extra Maybe allocation. withOption :: ParserT st e a -> (a -> ParserT st e r) -> ParserT st e r -> ParserT st e r -- | Machine integer parsers. module FlatParse.Basic.Integers -- | Parse any Word8. anyWord8 :: ParserT st e Word8 -- | Parse any Word16 (native byte order). anyWord16 :: ParserT st e Word16 -- | Parse any Word32 (native byte order). anyWord32 :: ParserT st e Word32 -- | Parse any Word64 (native byte order). anyWord64 :: ParserT st e Word64 -- | Parse any Int8. anyInt8 :: ParserT st e Int8 -- | Parse any Int16 (native byte order). anyInt16 :: ParserT st e Int16 -- | Parse any Int32 (native byte order). anyInt32 :: ParserT st e Int32 -- | Parse any Int64 (native byte order). anyInt64 :: ParserT st e Int64 -- | Parse any Word (native size). anyWord :: ParserT st e Word -- | Parse any Int (native size). anyInt :: ParserT st e Int -- | Parse any Word16 (little-endian). anyWord16le :: ParserT st e Word16 -- | Parse any Word16 (big-endian). anyWord16be :: ParserT st e Word16 -- | Parse any Word32 (little-endian). anyWord32le :: ParserT st e Word32 -- | Parse any Word32 (big-endian). anyWord32be :: ParserT st e Word32 -- | Parse any Word64 (little-endian). anyWord64le :: ParserT st e Word64 -- | Parse any Word64 (big-endian). anyWord64be :: ParserT st e Word64 -- | Parse any Int16 (little-endian). anyInt16le :: ParserT st e Int16 -- | Parse any Int16 (big-endian). anyInt16be :: ParserT st e Int16 -- | Parse any Int32 (little-endian). anyInt32le :: ParserT st e Int32 -- | Parse any Int32 (big-endian). anyInt32be :: ParserT st e Int32 -- | Parse any Int64 (little-endian). anyInt64le :: ParserT st e Int64 -- | Parse any Int64 (big-endian). anyInt64be :: ParserT st e Int64 -- | Read the next 1 byte and assert its value as a Word8. word8 :: Word8 -> ParserT st e () -- | Parse any Word8 (CPS). withAnyWord8 :: (Word8 -> ParserT st e r) -> ParserT st e r -- | Parse any Word16 (native byte order) (CPS). withAnyWord16 :: (Word16 -> ParserT st e r) -> ParserT st e r -- | Parse any Word32 (native byte order) (CPS). withAnyWord32 :: (Word32 -> ParserT st e r) -> ParserT st e r -- | Parse any Word64 (native byte order) (CPS). withAnyWord64 :: (Word64 -> ParserT st e r) -> ParserT st e r -- | Parse any Int8 (CPS). withAnyInt8 :: (Int8 -> ParserT st e r) -> ParserT st e r -- | Parse any Int16 (native byte order) (CPS). withAnyInt16 :: (Int16 -> ParserT st e r) -> ParserT st e r -- | Parse any Int32 (native byte order) (CPS). withAnyInt32 :: (Int32 -> ParserT st e r) -> ParserT st e r -- | Parse any Int64 (native byte order) (CPS). withAnyInt64 :: (Int64 -> ParserT st e r) -> ParserT st e r -- | Parse any Word (native size) (CPS). withAnyWord :: (Word -> ParserT st e r) -> ParserT st e r -- | Parse any Int (native size) (CPS). withAnyInt :: (Int -> ParserT st e r) -> ParserT st e r -- | Unsafely parse any Word8, without asserting the input is -- non-empty. -- -- The caller must guarantee that the input has enough bytes. anyWord8Unsafe :: ParserT st e Word8 -- | Unsafely read the next 1 byte and assert its value as a Word8. -- -- The caller must guarantee that the input has enough bytes. word8Unsafe :: Word8 -> ParserT st e () -- | Unsafely read the next 2 bytes and assert their value as a -- Word16 (native byte order). -- -- The caller must guarantee that the input has enough bytes. word16Unsafe :: Word16 -> ParserT st e () -- | Unsafely read the next 4 bytes and assert their value as a -- Word32. (native byte order). -- -- The caller must guarantee that the input has enough bytes. word32Unsafe :: Word32 -> ParserT st e () -- | Unsafely read the next 8 bytes and assert their value as a -- Word64. (native byte order). -- -- The caller must guarantee that the input has enough bytes. word64Unsafe :: Word64 -> ParserT st e () -- | Helper for defining CPS parsers for types of a constant byte size -- (i.e. machine integers). -- -- Call this with an indexXYZOffAddr primop (e.g. -- indexWord8OffAddr) and the size in bytes of the type you're -- parsing. withAnySized# :: Int# -> (Addr# -> Int# -> a) -> (a -> ParserT st e r) -> ParserT st e r -- | Unsafe helper for defining CPS parsers for types of a constant byte -- size (i.e. machine integers). -- -- Is really just syntactic sugar for applying the given parser and -- shifting the buffer along. -- -- The caller must guarantee that the input has enough bytes. withAnySizedUnsafe# :: Int# -> (Addr# -> Int# -> a) -> (a -> ParserT st e r) -> ParserT st e r -- | Unsafe helper for defining parsers for types of a constant byte size -- (i.e. machine integers) which assert the parsed value's... value. -- -- Call this with an indexXYZOffAddr primop (e.g. -- indexWord8OffAddr), the size in bytes of the type you're -- parsing, and the expected value to test the parsed value against. -- -- The caller must guarantee that the input has enough bytes. sizedUnsafe# :: Eq a => Int# -> (Addr# -> Int# -> a) -> a -> ParserT st e () -- | Bytestring parsers. -- -- Module dependency complications prevent us from placing these in -- FlatParse.Basic.Base. module FlatParse.Basic.Bytes -- | Read a sequence of bytes. This is a template function, you can use it -- as $(bytes [3, 4, 5]), for example, and the splice has type -- Parser e (). For a non-TH variant see byteString. bytes :: [Word] -> Q Exp -- | Template function, creates a Parser e () which unsafely -- parses a given sequence of bytes. -- -- The caller must guarantee that the input has enough bytes. bytesUnsafe :: [Word] -> Q Exp -- | Parsers for textual data (UTF-8, ASCII). module FlatParse.Basic.Text -- | Parse a UTF-8 character literal. This is a template function, you can -- use it as $(char 'x'), for example, and the splice in this -- case has type Parser e (). char :: Char -> Q Exp -- | Parse a UTF-8 string literal. This is a template function, you can use -- it as $(string "foo"), for example, and the splice has type -- Parser e (). string :: String -> Q Exp -- | Parse any single Unicode character encoded using UTF-8 as a -- Char. anyChar :: ParserT st e Char -- | Skip any single Unicode character encoded using UTF-8. skipAnyChar :: ParserT st e () -- | Parse a UTF-8 Char for which a predicate holds. satisfy :: (Char -> Bool) -> ParserT st e Char -- | Skip a UTF-8 Char for which a predicate holds. skipSatisfy :: (Char -> Bool) -> ParserT st e () -- | This is a variant of satisfy which allows more optimization. We -- can pick four testing functions for the four cases for the possible -- number of bytes in the UTF-8 character. So in fusedSatisfy f1 f2 -- f3 f4, if we read a one-byte character, the result is scrutinized -- with f1, for two-bytes, with f2, and so on. This can -- result in dramatic lexing speedups. -- -- For example, if we want to accept any letter, the naive solution would -- be to use isLetter, but this accesses a large lookup table of -- Unicode character classes. We can do better with fusedSatisfy -- isLatinLetter isLetter isLetter isLetter, since here the -- isLatinLetter is inlined into the UTF-8 decoding, and it -- probably handles a great majority of all cases without accessing the -- character table. fusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> ParserT st e Char -- | Skipping variant of fusedSatisfy. skipFusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> Parser e () -- | Parse the rest of the current line as a String. Assumes UTF-8 -- encoding, throws an error if the encoding is invalid. takeLine :: ParserT st e String -- | Take the rest of the input as a String. Assumes UTF-8 encoding. takeRestString :: ParserT st e String -- | Parse any single ASCII character (a single byte) as a Char. -- -- More efficient than anyChar for ASCII-only input. anyAsciiChar :: ParserT st e Char -- | Skip any single ASCII character (a single byte). -- -- More efficient than skipAnyChar for ASCII-only input. skipAnyAsciiChar :: ParserT st e () -- | Parse an ASCII Char for which a predicate holds. -- -- Assumption: the predicate must only return True for ASCII-range -- characters. Otherwise this function might read a 128-255 range byte, -- thereby breaking UTF-8 decoding. satisfyAscii :: (Char -> Bool) -> ParserT st e Char -- | Skip an ASCII Char for which a predicate holds. Assumption: the -- predicate must only return True for ASCII-range characters. skipSatisfyAscii :: (Char -> Bool) -> ParserT st e () -- | Parse a non-empty ASCII decimal digit sequence as a Word. Fails -- on overflow. anyAsciiDecimalWord :: ParserT st e Word -- | Parse a non-empty ASCII decimal digit sequence as a positive -- Int. Fails on overflow. anyAsciiDecimalInt :: ParserT st e Int -- | Parse a non-empty ASCII decimal digit sequence as a positive -- Integer. anyAsciiDecimalInteger :: ParserT st e Integer -- | Parse a non-empty, case-insensitive ASCII hexadecimal digit sequence -- as a Word. Fails on overflow. anyAsciiHexWord :: ParserT st e Word -- | Parse a non-empty, case-insensitive ASCII hexadecimal digit sequence -- as a positive Int. Fails on overflow. anyAsciiHexInt :: ParserT st e Int -- | Parse the rest of the current line as a String, but restore the -- parsing state. Assumes UTF-8 encoding. This can be used for debugging. traceLine :: ParserT st e String -- | Get the rest of the input as a String, but restore the parsing -- state. Assumes UTF-8 encoding. This can be used for debugging. traceRest :: ParserT st e String -- | Unsafe, highly dangerous parsing primitives using Addr#. -- -- Ensure to read the documentation before using any definitions from -- this module. -- -- This module exports primitives useful for efficiently parsing binary -- files that store data using an internal index. -- -- Often, such indices describes records using a starting offset and a -- length. Offsets are often relative to the file start, or some dynamic -- address in the file. This way, individual records can be read out -- efficiently (much faster than opening lots of small files!). -- -- We may parse these in-place efficiently by adding record offsets to a -- base memory address somewhere in the input. This is also extremely -- unsafe, and easy to get catastrophically wrong. Thus, we provide as -- much utility as reasonable to enable performing such parsing safely. -- (That means CPS functions.) -- -- Note that all definitions here should be considered unsafe. Any -- Int# is not checked for positivity. You must perform any -- necessary checks when you obtain your offsets and lengths as -- Int#. Failure to do so may result in undefined behaviour. module FlatParse.Basic.Addr -- | Run a parser, passing it the current address the parser is at. -- -- Useful for parsing offset-based data tables. For example, you may use -- this to save the base address to use together with various relative -- offsets. withAddr# :: (Addr# -> ParserT st e a) -> ParserT st e a -- | takeOffAddr# addr# offset# len# moves to addr#, -- skips offset# bytes, reads len# bytes into a -- ByteString, and restores the original address. -- -- The Addr# should be from withAddr#. -- -- Useful for parsing offset-based data tables. Ex: Your file contains an -- index storing (OFFSET, LENGTH) entries where the offset is -- the byte position in the file. Begin with withAddr# $ -- tableBase# -> ..., then read each entry like -- takeOffAddr# tableBase# OFFSET LENGTH. -- -- Fails if you attempt to read outside the input. -- -- Undefined behaviour if offset# or len# is negative. -- -- Name adopted from the similar-ish indexXOffAddr# primops. takeOffAddr# :: Addr# -> Int# -> Int# -> ParserT st e ByteString -- | withOffAddr# addr# offset# p moves to addr#, skips -- offset# bytes, then runs the given parser p. -- -- The Addr# should be from withAddr#. -- -- Fails if you attempt to read outside the input. -- -- Undefined behaviour if offset# is negative. -- -- Name adopted from the similar-ish indexXOffAddr# primops. withOffAddr# :: Addr# -> Int# -> ParserT st e a -> ParserT st e a -- | lookahead, but specify the address to lookahead from. -- -- The Addr# should be from withAddr#. lookaheadFromAddr# :: Addr# -> ParserT st e a -> ParserT st e a -- | Run a parser at the given address. -- -- The Addr# should be from withAddr#. -- -- This is a highly internal function -- you likely want -- lookaheadFromAddr#, which will reset the address after running -- the parser. atAddr# :: Addr# -> ParserT st e a -> ParserT st e a module FlatParse.Common.Position -- | Byte offset counted backwards from the end of the buffer. Note: the -- Ord instance for Pos considers the earlier positions to -- be smaller. newtype Pos Pos :: Int -> Pos [unPos] :: Pos -> Int -- | The end of the input. endPos :: Pos -- | Very unsafe conversion between a primitive address and a position. The -- first argument points to the end of the buffer, the second argument is -- being converted. addrToPos# :: Addr# -> Addr# -> Pos -- | Very unsafe conversion between a primitive address and a position. The -- first argument points to the end of the buffer. posToAddr# :: Addr# -> Pos -> Addr# -- | A pair of positions. data Span Span :: !Pos -> !Pos -> Span -- | Slice into a ByteString using a Span. The result is -- invalid if the Span is not a valid slice of the first argument. unsafeSlice :: ByteString -> Span -> ByteString instance GHC.Classes.Eq FlatParse.Common.Position.Pos instance GHC.Show.Show FlatParse.Common.Position.Pos instance GHC.Show.Show FlatParse.Common.Position.Span instance GHC.Classes.Eq FlatParse.Common.Position.Span instance GHC.Classes.Ord FlatParse.Common.Position.Pos module FlatParse.Common.Switch data Trie a Branch :: !a -> !Map Word (Trie a) -> Trie a type Rule = Maybe Int nilTrie :: Trie Rule updRule :: Int -> Maybe Int -> Maybe Int insert :: Int -> [Word] -> Trie Rule -> Trie Rule listToTrie :: [(Int, String)] -> Trie Rule -- | Decorate a trie with the minimum lengths of non-empty paths. This is -- used later to place ensureBytes#. mindepths :: Trie Rule -> Trie (Rule, Int) data Trie' a Branch' :: !a -> !Map Word (Trie' a) -> Trie' a Path :: !a -> ![Word] -> !Trie' a -> Trie' a -- | Compress linear paths. pathify :: Trie (Rule, Int) -> Trie' (Rule, Int) -- | Compute where to fall back after we exhausted a branch. If the branch -- is empty, that means we've succeded at reading and we jump to the rhs -- rule. fallbacks :: Trie' (Rule, Int) -> Trie' (Rule, Int, Int) -- | Decorate with ensureBytes# invocations, represented as `Maybe -- Int`. ensureBytes :: Trie' (Rule, Int, Int) -> Trie' (Rule, Int, Maybe Int) compileTrie :: [(Int, String)] -> Trie' (Rule, Int, Maybe Int) instance GHC.Show.Show a => GHC.Show.Show (FlatParse.Common.Switch.Trie a) instance GHC.Show.Show a => GHC.Show.Show (FlatParse.Common.Switch.Trie' a) -- | Efficient literal branching using Template Haskell. module FlatParse.Basic.Switch -- | This is a template function which makes it possible to branch on a -- collection of string literals in an efficient way. By using -- switch, such branching is compiled to a trie of primitive -- parsing operations, which has optimized control flow, vectorized reads -- and grouped checking for needed input bytes. -- -- The syntax is slightly magical, it overloads the usual case -- expression. An example: -- --
--   $(switch [| case _ of
--       "foo" -> pure True
--       "bar" -> pure False |])
--   
-- -- The underscore is mandatory in case _ of. Each branch must be -- a string literal, but optionally we may have a default case, like in -- --
--   $(switch [| case _ of
--       "foo" -> pure 10
--       "bar" -> pure 20
--       _     -> pure 30 |])
--   
-- -- All case right hand sides must be parsers with the same type. That -- type is also the type of the whole switch expression. -- -- A switch has longest match semantics, and the order of cases -- does not matter, except for the default case, which may only appear as -- the last case. -- -- If a switch does not have a default case, and no case matches -- the input, then it returns with failure, without having consumed any -- input. A fallthrough to the default case also does not consume any -- input. switch :: Q Exp -> Q Exp -- | Switch expression with an optional first argument for performing a -- post-processing action after every successful branch matching, not -- including the default branch. For example, if we have ws :: -- ParserT st e () for a whitespace parser, we might want to consume -- whitespace after matching on any of the switch cases. For that case, -- we can define a "lexeme" version of switch as follows. -- --
--   switch' :: Q Exp -> Q Exp
--   switch' = switchWithPost (Just [| ws |])
--   
-- -- Note that this switch' function cannot be used in the same -- module it's defined in, because of the stage restriction of Template -- Haskell. switchWithPost :: Maybe (Q Exp) -> Q Exp -> Q Exp -- | Version of switchWithPost without syntactic sugar. The second -- argument is the list of cases, the third is the default case. rawSwitchWithPost :: Maybe (Q Exp) -> [(String, Q Exp)] -> Maybe (Q Exp) -> Q Exp -- | Parser supporting custom error types and embeddable IO or -- ST actions, but no other bells and whistles. -- -- If you need efficient indentation parsing, consider -- FlatParse.Stateful instead. module FlatParse.Basic -- | ParserT st e a is a parser with a state token type -- st, an error type e and a return type a. -- The different state token types support different embedded effects; -- see Parser, ParserIO and ParserST below. newtype ParserT (st :: ZeroBitType) e a ParserT :: (ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a) -> ParserT (st :: ZeroBitType) e a [runParserT#] :: ParserT (st :: ZeroBitType) e a -> ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a -- | The type of pure parsers. type Parser = ParserT PureMode -- | The type of parsers which can embed IO actions. type ParserIO = ParserT IOMode -- | The type of parsers which can embed ST actions. type ParserST s = ParserT (STMode s) -- | Higher-level boxed data type for parsing results. data Result e a -- | Contains return value and unconsumed input. OK :: a -> !ByteString -> Result e a -- | Recoverable-by-default failure. Fail :: Result e a -- | Unrecoverble-by-default error. Err :: !e -> Result e a -- | Run a parser. runParser :: Parser e a -> ByteString -> Result e a -- | Run a parser on a String, converting it to the corresponding -- UTF-8 bytes. -- -- Reminder: OverloadedStrings for ByteString does not -- yield a valid UTF-8 encoding! For non-ASCII ByteString literal -- input, use this wrapper or properly convert your input first. runParserUtf8 :: Parser e a -> String -> Result e a -- | Run an IO-based parser. runParserIO :: ParserIO e a -> ByteString -> IO (Result e a) -- | Run an ST-based parser. runParserST :: ParserST s e a -> ByteString -> ST s (Result e a) -- | Run a ParserST inside a pure parser. embedParserST :: forall e a. (forall s. ParserST s e a) -> Parser e a -- | Primitive parser result wrapped with a state token. -- -- You should rarely need to manipulate values of this type directly. Use -- the provided bidirectional pattern synonyms OK#, Fail# -- and Err#. type Res# (st :: ZeroBitType) e a = (# st, ResI# e a #) -- | Res# constructor for a successful parse. Contains the return -- value and a pointer to the rest of the input buffer, plus a state -- token. pattern OK# :: (st :: ZeroBitType) -> a -> Addr# -> Res# st e a -- | Res# constructor for errors which are by default -- non-recoverable. Contains the error, plus a state token. pattern Err# :: (st :: ZeroBitType) -> e -> Res# st e a -- | Res# constructor for recoverable failure. Contains only a state -- token. pattern Fail# :: (st :: ZeroBitType) -> Res# st e a -- | Primitive parser result. type ResI# e a = (# (# a, Addr# #) | (# #) | (# e #) #) -- | Run an ST action in a ParserST. liftST :: ST s a -> ParserST s e a -- | Convert an UTF8-encoded String to a ByteString. strToUtf8 :: String -> ByteString -- | Convert a ByteString to an UTF8-encoded String. utf8ToStr :: ByteString -> String -- |
--   isDigit c = '0' <= c && c <= '9'
--   
isDigit :: Char -> Bool -- |
--   isLatinLetter c = ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')
--   
isLatinLetter :: Char -> Bool -- |
--   isGreekLetter c = ('Α' <= c && c <= 'Ω') || ('α' <= c && c <= 'ω')
--   
isGreekLetter :: Char -> Bool -- | Succeed if the input is empty. eof :: ParserT st e () -- | Read the given number of bytes as a ByteString. -- -- Throws a runtime error if given a negative integer. -- -- This does no copying. The ByteString returned is a "slice" of -- the input, and will keep it alive. To avoid this, use copy on -- the output. take :: Int -> ParserT st e ByteString -- | Read n# bytes as a ByteString. Fails if fewer than -- n# bytes are available. -- -- Throws a runtime error if given a negative integer. -- -- This does no copying. The ByteString returned is a "slice" of -- the input, and will keep it alive. To avoid this, use copy on -- the output. take# :: Int# -> ParserT st e ByteString -- | Read n# bytes as a ByteString. Fails if fewer than -- n# bytes are available. -- -- Undefined behaviour if given a negative integer. -- -- This does no copying. The ByteString returned is a "slice" of -- the input, and will keep it alive. To avoid this, use copy on -- the output. takeUnsafe# :: Int# -> ParserT st e ByteString -- | Consume the rest of the input. May return the empty bytestring. -- -- This does no copying. The ByteString returned is a "slice" of -- the input, and will keep it alive. To avoid this, use copy on -- the output. takeRest :: ParserT st e ByteString -- | Skip forward n bytes. Fails if fewer than n bytes -- are available. -- -- Throws a runtime error if given a negative integer. skip :: Int -> ParserT st e () -- | Skip forward n# bytes. Fails if fewer than n# bytes -- are available. -- -- Throws a runtime error if given a negative integer. skip# :: Int# -> ParserT st e () -- | Go back i bytes in the input. Takes a positive integer. -- -- Extremely unsafe. Makes no checks. Almost certainly a Bad Idea. skipBack :: Int -> ParserT st e () -- | Go back n# bytes. Takes a positive integer. -- -- Extremely unsafe. Makes no checks. Almost certainly a Bad Idea. skipBack# :: Int# -> ParserT st e () -- | Skip forward n# bytes and run the given parser. Fails if -- fewer than n# bytes are available. -- -- Throws a runtime error if given a negative integer. atSkip# :: Int# -> ParserT st e a -> ParserT st e a -- | Skip forward n bytes and run the given parser. Fails if fewer -- than n bytes are available. -- -- Undefined behaviour if given a negative integer. atSkipUnsafe# :: Int# -> ParserT st e r -> ParserT st e r -- | Read a sequence of bytes. This is a template function, you can use it -- as $(bytes [3, 4, 5]), for example, and the splice has type -- Parser e (). For a non-TH variant see byteString. bytes :: [Word] -> Q Exp -- | Template function, creates a Parser e () which unsafely -- parses a given sequence of bytes. -- -- The caller must guarantee that the input has enough bytes. bytesUnsafe :: [Word] -> Q Exp -- | Parse a given ByteString. -- -- If the bytestring is statically known, consider using bytes -- instead. byteString :: ByteString -> ParserT st e () -- | Read a null-terminated bytestring (a C-style string). -- -- Consumes the null terminator. anyCString :: ParserT st e ByteString -- | Read a protobuf-style varint into a positive Int. -- -- protobuf-style varints are byte-aligned. For each byte, the lower 7 -- bits are data and the MSB indicates if there are further bytes. Once -- fully parsed, the 7-bit payloads are concatenated and interpreted as a -- little-endian unsigned integer. -- -- Fails if the varint exceeds the positive Int range. -- -- Really, these are varnats. They also match with the LEB128 varint -- encoding. -- -- protobuf encodes negatives in unsigned integers using zigzag encoding. -- See the fromZigzag family of functions for this -- functionality. -- -- Further reading: -- https://developers.google.com/protocol-buffers/docs/encoding#varints anyVarintProtobuf :: ParserT st e Int -- | Choose between two parsers. If the first parser fails, try the second -- one, but if the first one throws an error, propagate the error. This -- operation can arbitrarily backtrack. -- -- Note: this exported operator has different fixity than the same -- operator in Applicative. Hide this operator if you want to use -- the Alternative version. (<|>) :: ParserT st e a -> ParserT st e a -> ParserT st e a infixr 6 <|> -- | Branch on a parser: if the first argument succeeds, continue with the -- second, else with the third. This can produce slightly more efficient -- code than (<|>). Moreover, branch does not -- backtrack from the true/false cases. branch :: ParserT st e a -> ParserT st e b -> ParserT st e b -> ParserT st e b -- | Succeed if the first parser succeeds and the second one fails. notFollowedBy :: ParserT st e a -> ParserT st e b -> ParserT st e a -- | An analogue of the list foldl function: first parse a -- b, then parse zero or more a-s, and combine the -- results in a left-nested way by the b -> a -> b -- function. Note: this is not the usual chainl function from the -- parsec libraries! chainl :: (b -> a -> b) -> ParserT st e b -> ParserT st e a -> ParserT st e b -- | An analogue of the list foldr function: parse zero or more -- a-s, terminated by a b, and combine the results in a -- right-nested way using the a -> b -> b function. Note: -- this is not the usual chainr function from the parsec -- libraries! chainr :: (a -> b -> b) -> ParserT st e a -> ParserT st e b -> ParserT st e b -- | Save the parsing state, then run a parser, then restore the state. lookahead :: ParserT st e a -> ParserT st e a -- | Assert that there are at least n bytes remaining. -- -- Undefined behaviour if given a negative integer. ensure :: Int -> ParserT st e () -- | Assert that there are at least n# bytes remaining. -- -- Undefined behaviour if given a negative integer. ensure# :: Int# -> ParserT st e () -- | Assert that there are at least n# bytes remaining (CPS). -- -- Undefined behaviour if given a negative integer. withEnsure :: Int -> ParserT st e r -> ParserT st e r -- | Assert that there is at least 1 byte remaining (CPS). -- -- Undefined behaviour if given a negative integer. withEnsure1 :: ParserT st e r -> ParserT st e r -- | Assert that there are at least n# bytes remaining (CPS). -- -- Undefined behaviour if given a negative integer. withEnsure# :: Int# -> ParserT st e r -> ParserT st e r -- | isolate n p runs the parser p isolated to the next -- n bytes. All isolated bytes must be consumed. -- -- Throws a runtime error if given a negative integer. isolate :: Int -> ParserT st e a -> ParserT st e a -- | Isolate the given parser up to (excluding) the next null byte. -- -- Like isolate, all isolated bytes must be consumed. The null -- byte is consumed afterwards. -- -- Useful for defining parsers for null-terminated data. isolateToNextNull :: ParserT st e a -> ParserT st e a -- | isolate# n# p runs the parser p isolated to the next -- n# bytes. All isolated bytes must be consumed. -- -- Throws a runtime error if given a negative integer. isolate# :: Int# -> ParserT st e a -> ParserT st e a -- | isolateUnsafe# n# p runs the parser p isolated to -- the next n# bytes. All isolated bytes must be consumed. -- -- Undefined behaviour if given a negative integer. isolateUnsafe# :: Int# -> ParserT st e a -> ParserT st e a -- | This is a template function which makes it possible to branch on a -- collection of string literals in an efficient way. By using -- switch, such branching is compiled to a trie of primitive -- parsing operations, which has optimized control flow, vectorized reads -- and grouped checking for needed input bytes. -- -- The syntax is slightly magical, it overloads the usual case -- expression. An example: -- --
--   $(switch [| case _ of
--       "foo" -> pure True
--       "bar" -> pure False |])
--   
-- -- The underscore is mandatory in case _ of. Each branch must be -- a string literal, but optionally we may have a default case, like in -- --
--   $(switch [| case _ of
--       "foo" -> pure 10
--       "bar" -> pure 20
--       _     -> pure 30 |])
--   
-- -- All case right hand sides must be parsers with the same type. That -- type is also the type of the whole switch expression. -- -- A switch has longest match semantics, and the order of cases -- does not matter, except for the default case, which may only appear as -- the last case. -- -- If a switch does not have a default case, and no case matches -- the input, then it returns with failure, without having consumed any -- input. A fallthrough to the default case also does not consume any -- input. switch :: Q Exp -> Q Exp -- | Switch expression with an optional first argument for performing a -- post-processing action after every successful branch matching, not -- including the default branch. For example, if we have ws :: -- ParserT st e () for a whitespace parser, we might want to consume -- whitespace after matching on any of the switch cases. For that case, -- we can define a "lexeme" version of switch as follows. -- --
--   switch' :: Q Exp -> Q Exp
--   switch' = switchWithPost (Just [| ws |])
--   
-- -- Note that this switch' function cannot be used in the same -- module it's defined in, because of the stage restriction of Template -- Haskell. switchWithPost :: Maybe (Q Exp) -> Q Exp -> Q Exp -- | Version of switchWithPost without syntactic sugar. The second -- argument is the list of cases, the third is the default case. rawSwitchWithPost :: Maybe (Q Exp) -> [(String, Q Exp)] -> Maybe (Q Exp) -> Q Exp -- | Zero or more. many :: Alternative f => f a -> f [a] -- | Skip a parser zero or more times. skipMany :: ParserT st e a -> ParserT st e () -- | One or more. some :: Alternative f => f a -> f [a] -- | Skip a parser one or more times. skipSome :: ParserT st e a -> ParserT st e () -- | The identity of <|> empty :: Alternative f => f a -- | The failing parser. By default, parser choice (<|>) -- arbitrarily backtracks on parser failure. failed :: ParserT st e a -- | Convert a parsing error into failure. try :: ParserT st e a -> ParserT st e a -- | Throw a parsing error. By default, parser choice (<|>) -- can't backtrack on parser error. Use try to convert an error to -- a recoverable failure. err :: e -> ParserT st e a -- | Run the parser, if an error is thrown, handle it with the given -- function. withError :: ParserT st e b -> (e -> ParserT st e b) -> ParserT st e b -- | Convert a parsing failure to a success. fails :: ParserT st e a -> ParserT st e () -- | Convert a parsing failure to an error. cut :: ParserT st e a -> e -> ParserT st e a -- | Run the parser, if we get a failure, throw the given error, but if we -- get an error, merge the inner and the newly given errors using the -- e -> e -> e function. This can be useful for -- implementing parsing errors which may propagate hints or accummulate -- contextual information. cutting :: ParserT st e a -> e -> (e -> e -> e) -> ParserT st e a -- | Convert a parsing failure to a Maybe. If possible, use -- withOption instead. optional :: ParserT st e a -> ParserT st e (Maybe a) -- | Convert a parsing failure to a (). optional_ :: ParserT st e a -> ParserT st e () -- | CPS'd version of optional. This is usually more efficient, -- since it gets rid of the extra Maybe allocation. withOption :: ParserT st e a -> (a -> ParserT st e r) -> ParserT st e r -> ParserT st e r -- | Byte offset counted backwards from the end of the buffer. Note: the -- Ord instance for Pos considers the earlier positions to -- be smaller. newtype Pos Pos :: Int -> Pos [unPos] :: Pos -> Int -- | The end of the input. endPos :: Pos -- | Very unsafe conversion between a primitive address and a position. The -- first argument points to the end of the buffer, the second argument is -- being converted. addrToPos# :: Addr# -> Addr# -> Pos -- | Very unsafe conversion between a primitive address and a position. The -- first argument points to the end of the buffer. posToAddr# :: Addr# -> Pos -> Addr# -- | A pair of positions. data Span Span :: !Pos -> !Pos -> Span -- | Slice into a ByteString using a Span. The result is -- invalid if the Span is not a valid slice of the first argument. unsafeSlice :: ByteString -> Span -> ByteString -- | Get the current position in the input. getPos :: ParserT st e Pos -- | Set the input position. -- -- Warning: this can result in crashes if the position points outside the -- current buffer. It is always safe to setPos values which came -- from getPos with the current input. setPos :: Pos -> ParserT st e () -- | Return the consumed span of a parser. spanOf :: ParserT st e a -> ParserT st e Span -- | Bind the result together with the span of the result. CPS'd version of -- spanOf for better unboxing. withSpan :: ParserT st e a -> (a -> Span -> ParserT st e b) -> ParserT st e b -- | Return the ByteString consumed by a parser. Note: it's more -- efficient to use spanOf and withSpan instead. byteStringOf :: ParserT st e a -> ParserT st e ByteString -- | CPS'd version of byteStringOf. Can be more efficient, because -- the result is more eagerly unboxed by GHC. It's more efficient to use -- spanOf or withSpan instead. withByteString :: ParserT st e a -> (a -> ByteString -> ParserT st e b) -> ParserT st e b -- | Run a parser in a given input Span. -- -- The input position is restored after the parser is finished, so -- inSpan does not consume input and has no side effect. -- -- Warning: this operation may crash if the given span points outside the -- current parsing buffer. It's always safe to use inSpan if the -- Span comes from a previous withSpan or spanOf -- call on the current input. inSpan :: Span -> ParserT st e a -> ParserT st e a -- | Check whether a Pos points into a ByteString. validPos :: ByteString -> Pos -> Bool -- | Compute corresponding line and column numbers for each Pos in a -- list, assuming UTF8 encoding. Throw an error on invalid positions. -- Note: computing lines and columns may traverse the ByteString, -- but it traverses it only once regardless of the length of the position -- list. posLineCols :: ByteString -> [Pos] -> [(Int, Int)] -- | Create a Pos from a line and column number. Throws an error on -- out-of-bounds line and column numbers. mkPos :: ByteString -> (Int, Int) -> Pos -- | Parse a UTF-8 character literal. This is a template function, you can -- use it as $(char 'x'), for example, and the splice in this -- case has type Parser e (). char :: Char -> Q Exp -- | Parse a UTF-8 string literal. This is a template function, you can use -- it as $(string "foo"), for example, and the splice has type -- Parser e (). string :: String -> Q Exp -- | Parse any single Unicode character encoded using UTF-8 as a -- Char. anyChar :: ParserT st e Char -- | Skip any single Unicode character encoded using UTF-8. skipAnyChar :: ParserT st e () -- | Parse a UTF-8 Char for which a predicate holds. satisfy :: (Char -> Bool) -> ParserT st e Char -- | Skip a UTF-8 Char for which a predicate holds. skipSatisfy :: (Char -> Bool) -> ParserT st e () -- | This is a variant of satisfy which allows more optimization. We -- can pick four testing functions for the four cases for the possible -- number of bytes in the UTF-8 character. So in fusedSatisfy f1 f2 -- f3 f4, if we read a one-byte character, the result is scrutinized -- with f1, for two-bytes, with f2, and so on. This can -- result in dramatic lexing speedups. -- -- For example, if we want to accept any letter, the naive solution would -- be to use isLetter, but this accesses a large lookup table of -- Unicode character classes. We can do better with fusedSatisfy -- isLatinLetter isLetter isLetter isLetter, since here the -- isLatinLetter is inlined into the UTF-8 decoding, and it -- probably handles a great majority of all cases without accessing the -- character table. fusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> ParserT st e Char -- | Skipping variant of fusedSatisfy. skipFusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> Parser e () -- | Parse the rest of the current line as a String. Assumes UTF-8 -- encoding, throws an error if the encoding is invalid. takeLine :: ParserT st e String -- | Take the rest of the input as a String. Assumes UTF-8 encoding. takeRestString :: ParserT st e String -- | Break an UTF-8-coded ByteString to lines. Throws an error on -- invalid input. This is mostly useful for grabbing specific source -- lines for displaying error messages. linesUtf8 :: ByteString -> [String] -- | Parse any single ASCII character (a single byte) as a Char. -- -- More efficient than anyChar for ASCII-only input. anyAsciiChar :: ParserT st e Char -- | Skip any single ASCII character (a single byte). -- -- More efficient than skipAnyChar for ASCII-only input. skipAnyAsciiChar :: ParserT st e () -- | Parse an ASCII Char for which a predicate holds. -- -- Assumption: the predicate must only return True for ASCII-range -- characters. Otherwise this function might read a 128-255 range byte, -- thereby breaking UTF-8 decoding. satisfyAscii :: (Char -> Bool) -> ParserT st e Char -- | Skip an ASCII Char for which a predicate holds. Assumption: the -- predicate must only return True for ASCII-range characters. skipSatisfyAscii :: (Char -> Bool) -> ParserT st e () -- | Parse a non-empty ASCII decimal digit sequence as a Word. Fails -- on overflow. anyAsciiDecimalWord :: ParserT st e Word -- | Parse a non-empty ASCII decimal digit sequence as a positive -- Int. Fails on overflow. anyAsciiDecimalInt :: ParserT st e Int -- | Parse a non-empty ASCII decimal digit sequence as a positive -- Integer. anyAsciiDecimalInteger :: ParserT st e Integer -- | Parse a non-empty, case-insensitive ASCII hexadecimal digit sequence -- as a Word. Fails on overflow. anyAsciiHexWord :: ParserT st e Word -- | Parse a non-empty, case-insensitive ASCII hexadecimal digit sequence -- as a positive Int. Fails on overflow. anyAsciiHexInt :: ParserT st e Int -- | Parse the rest of the current line as a String, but restore the -- parsing state. Assumes UTF-8 encoding. This can be used for debugging. traceLine :: ParserT st e String -- | Get the rest of the input as a String, but restore the parsing -- state. Assumes UTF-8 encoding. This can be used for debugging. traceRest :: ParserT st e String -- | Create a ByteString from a Span. -- -- The result is invalid if the Span points outside the current -- buffer, or if the Span start is greater than the end position. unsafeSpanToByteString :: Span -> ParserT st e ByteString -- | Embed an IO action in a ParserT. This is slightly safer than -- unsafePerformIO because it will sequenced correctly with -- respect to the surrounding actions, and its execution is guaranteed. unsafeLiftIO :: IO a -> ParserT st e a -- | Read a null-terminated bytestring (a C-style string), where the -- bytestring is known to be null-terminated somewhere in the input. -- -- Highly unsafe. Unless you have a guarantee that the string will be -- null terminated before the input ends, use anyCString instead. -- Honestly, I'm not sure if this is a good function to define. But here -- it is. -- -- Fails on GHC versions older than 9.0, since we make use of the -- cstringLength# primop introduced in GHC 9.0, and we aren't very -- useful without it. -- -- Consumes the null terminator. anyCStringUnsafe :: ParserT st e ByteString instance (GHC.Show.Show a, GHC.Show.Show e) => GHC.Show.Show (FlatParse.Basic.Result e a) instance GHC.Base.Functor (FlatParse.Basic.Result e) -- | This module contains lexer and error message primitives for a simple -- lambda calculus parser. It demonstrates a simple but decently -- informative implementation of error message propagation. module FlatParse.Examples.BasicLambda.Lexer -- | An expected item which is displayed in error messages. data Expected -- | An error message. Msg :: String -> Expected -- | A literal expected thing. Lit :: String -> Expected -- | A parsing error. data Error -- | A precisely known error, like leaving out "in" from "let". Precise :: Pos -> Expected -> Error -- | An imprecise error, when we expect a number of different things, but -- parse something else. Imprecise :: Pos -> [Expected] -> Error errorPos :: Error -> Pos -- | Merge two errors. Inner errors (which were thrown at points with more -- consumed inputs) are preferred. If errors are thrown at identical -- input positions, we prefer precise errors to imprecise ones. -- -- The point of prioritizing inner and precise errors is to suppress the -- deluge of "expected" items, and instead try to point to a concrete -- issue to fix. merge :: Error -> Error -> Error type Parser = Parser Error -- | Pretty print an error. The ByteString input is the source file. -- The offending line from the source is displayed in the output. prettyError :: ByteString -> Error -> String -- | Imprecise cut: we slap a list of items on inner errors. cut :: Parser a -> [Expected] -> Parser a -- | Precise cut: we propagate at most a single error. cut' :: Parser a -> Expected -> Parser a runParser :: Parser a -> ByteString -> Result Error a -- | Run parser, print pretty error on failure. testParser :: Show a => Parser a -> String -> IO () -- | Parse a line comment. lineComment :: Parser () -- | Parse a potentially nested multiline comment. multilineComment :: Parser () -- | Consume whitespace. ws :: Parser () -- | Consume whitespace after running a parser. token :: Parser a -> Parser a -- | Read a starting character of an identifier. identStartChar :: Parser Char -- | Read a non-starting character of an identifier. identChar :: Parser Char -- | Check whether a Span contains exactly a keyword. Does not -- change parsing state. isKeyword :: Span -> Parser () -- | Parse a non-keyword string. symbol :: String -> Q Exp -- | Parser a non-keyword string, throw precise error on failure. symbol' :: String -> Q Exp -- | Parse a keyword string. keyword :: String -> Q Exp -- | Parse a keyword string, throw precise error on failure. keyword' :: String -> Q Exp instance GHC.Classes.Ord FlatParse.Examples.BasicLambda.Lexer.Expected instance GHC.Show.Show FlatParse.Examples.BasicLambda.Lexer.Expected instance GHC.Classes.Eq FlatParse.Examples.BasicLambda.Lexer.Expected instance GHC.Show.Show FlatParse.Examples.BasicLambda.Lexer.Error instance Data.String.IsString FlatParse.Examples.BasicLambda.Lexer.Expected -- | This module contains a simple lambda calculus parser. This parser is -- not optimized for maximum performance; instead it's written in a style -- which emulates the look and feel of conventional monadic parsers. An -- optimized implementation would use low-level switch expressions -- more often. module FlatParse.Examples.BasicLambda.Parser type Name = ByteString -- | A term in the language. The precedences of different constructs are -- the following, in decreasing order of strength: -- -- data Tm -- |
--   x
--   
Var :: Name -> Tm -- |
--   t u
--   
App :: Tm -> Tm -> Tm -- |
--   lam x. t
--   
Lam :: Name -> Tm -> Tm -- |
--   let x = t in u
--   
Let :: Name -> Tm -> Tm -> Tm -- | true or false. BoolLit :: Bool -> Tm -- | A positive Int literal. IntLit :: Int -> Tm -- |
--   if t then u else v
--   
If :: Tm -> Tm -> Tm -> Tm -- |
--   t + u
--   
Add :: Tm -> Tm -> Tm -- |
--   t * u
--   
Mul :: Tm -> Tm -> Tm -- |
--   t == u
--   
Eq :: Tm -> Tm -> Tm -- |
--   t < u
--   
Lt :: Tm -> Tm -> Tm -- | Parse an identifier. This parser uses isKeyword to check that -- an identifier is not a keyword. ident :: Parser Name -- | Parse an identifier, throw a precise error on failure. ident' :: Parser Name digit :: Parser Int int :: Parser Int -- | Parse a literal, identifier or parenthesized expression. atom :: Parser Tm atom' :: Parser Tm -- | Parse an App-level expression. app' :: Parser Tm -- | Parse a Mul-level expression. mul' :: Parser Tm -- | Parse an Add-level expression. add' :: Parser Tm -- | Parse an Eq or Lt-level expression. eqLt' :: Parser Tm -- | Parse a Let. pLet :: Parser Tm -- | Parse a Lam. lam :: Parser Tm -- | Parse an If. pIf :: Parser Tm -- | Parse any Tm. tm' :: Parser Tm -- | Parse a complete source file. src' :: Parser Tm p1 :: String instance GHC.Show.Show FlatParse.Examples.BasicLambda.Parser.Tm -- | Minimal parser definition. module FlatParse.Stateful.Parser -- | ParserT st r e a is a parser with a state token type -- st, a reader environment r, an error type e -- and a return type a. The different state token types support -- different embedded effects; see Parser, ParserIO and -- ParserST below. newtype ParserT (st :: ZeroBitType) r e a ParserT :: (ForeignPtrContents -> r -> Addr# -> Addr# -> Int# -> st -> Res# st e a) -> ParserT (st :: ZeroBitType) r e a [runParserT#] :: ParserT (st :: ZeroBitType) r e a -> ForeignPtrContents -> r -> Addr# -> Addr# -> Int# -> st -> Res# st e a -- | The type of pure parsers. type Parser = ParserT PureMode -- | The type of parsers which can embed IO actions. type ParserIO = ParserT IOMode -- | The type of parsers which can embed ST actions. type ParserST s = ParserT (STMode s) -- | Same as pure for ParserT except that it does not force -- the returned value. pureLazy :: a -> ParserT st r e a -- | Primitive parser result wrapped with a state token. -- -- You should rarely need to manipulate values of this type directly. Use -- the provided bidirectional pattern synonyms OK#, Fail# -- and Err#. type Res# (st :: ZeroBitType) e a = (# st, ResI# e a #) -- | Res# constructor for a successful parse. Contains the return -- value, a pointer to the rest of the input buffer, and the next -- Int state, plus a state token. pattern OK# :: (st :: ZeroBitType) -> a -> Addr# -> Int# -> Res# st e a -- | Res# constructor for errors which are by default -- non-recoverable. Contains the error, plus a state token. pattern Err# :: (st :: ZeroBitType) -> e -> Res# st e a -- | Res# constructor for recoverable failure. Contains only a state -- token. pattern Fail# :: (st :: ZeroBitType) -> Res# st e a -- | Primitive parser result. type ResI# e a = (# (# a, Addr#, Int# #) | (# #) | (# e #) #) -- | Choose between two parsers. If the first parser fails, try the second -- one, but if the first one throws an error, propagate the error. This -- operation can arbitrarily backtrack. -- -- Note: this exported operator has different fixity than the same -- operator in Applicative. Hide this operator if you want to use -- the Alternative version. (<|>) :: ParserT st r e a -> ParserT st r e a -> ParserT st r e a infixr 6 <|> instance Control.Monad.IO.Class.MonadIO (FlatParse.Stateful.Parser.ParserT FlatParse.Common.Parser.IOMode r e) instance GHC.Base.Functor (FlatParse.Stateful.Parser.ParserT st r e) instance GHC.Base.Applicative (FlatParse.Stateful.Parser.ParserT st r e) instance GHC.Base.Monad (FlatParse.Stateful.Parser.ParserT st r e) instance GHC.Base.Alternative (FlatParse.Stateful.Parser.ParserT st r e) instance GHC.Base.MonadPlus (FlatParse.Stateful.Parser.ParserT st r e) -- | Basic parser building blocks. module FlatParse.Stateful.Base -- | Succeed if the input is empty. eof :: ParserT st r e () -- | Read n bytes as a ByteString. Fails if fewer than -- n bytes are available. -- -- Throws a runtime error if given a negative integer. -- -- This does no copying. The ByteString returned is a "slice" of -- the input, and will keep it alive. To avoid this, use copy on -- the output. take :: Int -> ParserT st r e ByteString -- | Read n# bytes as a ByteString. Fails if fewer than -- n# bytes are available. -- -- Throws a runtime error if given a negative integer. -- -- This does no copying. The ByteString returned is a "slice" of -- the input, and will keep it alive. To avoid this, use copy on -- the output. take# :: Int# -> ParserT st r e ByteString -- | Read i# bytes as a ByteString. Fails if newer than -- i# bytes are available. -- -- Undefined behaviour if given a negative integer. -- -- This does no copying. The ByteString returned is a "slice" of -- the input, and will keep it alive. To avoid this, use copy on -- the output. takeUnsafe# :: Int# -> ParserT st r e ByteString -- | Consume the rest of the input. May return the empty bytestring. -- -- This does no copying. The ByteString returned is a "slice" of -- the input, and will keep it alive. To avoid this, use copy on -- the output. takeRest :: ParserT st r e ByteString -- | Skip forward n bytes. Fails if fewer than n bytes -- are available. -- -- Throws a runtime error if given a negative integer. skip :: Int -> ParserT st r e () -- | Skip forward n bytes. Fails if fewer than n bytes -- are available. -- -- Throws a runtime error if given a negative integer. skip# :: Int# -> ParserT st r e () -- | Go back i bytes in the input. Takes a positive integer. -- -- Extremely unsafe. Makes no checks. Almost certainly a Bad Idea. skipBack :: Int -> ParserT st r e () -- | Go back i# bytes in the input. Takes a positive integer. -- -- Extremely unsafe. Makes no checks. Almost certainly a Bad Idea. skipBack# :: Int# -> ParserT st r e () -- | Skip forward n# bytes and run the given parser. Fails if -- fewer than n# bytes are available. -- -- Throws a runtime error if given a negative integer. atSkip# :: Int# -> ParserT st r e ret -> ParserT st r e ret -- | Skip forward i# bytes and run the given parser. Fails if -- fewer than i bytes are available. -- -- Undefined behaviour if given a negative integer. atSkipUnsafe# :: Int# -> ParserT st r e ret -> ParserT st r e ret -- | Branch on a parser: if the first argument succeeds, continue with the -- second, else with the third. This can produce slightly more efficient -- code than (<|>). Moreover, ḃranch does not -- backtrack from the true/false cases. branch :: ParserT st r e a -> ParserT st r e b -> ParserT st r e b -> ParserT st r e b -- | Succeed if the first parser succeeds and the second one fails. notFollowedBy :: ParserT st r e a -> ParserT st r e b -> ParserT st r e a -- | An analogue of the list foldl function: first parse a -- b, then parse zero or more a-s, and combine the -- results in a left-nested way by the b -> a -> b -- function. Note: this is not the usual chainl function from the -- parsec libraries! chainl :: (b -> a -> b) -> ParserT st r e b -> ParserT st r e a -> ParserT st r e b -- | An analogue of the list foldr function: parse zero or more -- a-s, terminated by a b, and combine the results in a -- right-nested way using the a -> b -> b function. Note: -- this is not the usual chainr function from the parsec -- libraries! chainr :: (a -> b -> b) -> ParserT st r e a -> ParserT st r e b -> ParserT st r e b -- | Save the parsing state, then run a parser, then restore the state. lookahead :: ParserT st r e a -> ParserT st r e a -- | Assert that there are at least n bytes remaining. -- -- Undefined behaviour if given a negative integer. ensure :: Int -> ParserT st r e () -- | Assert that there are at least n# bytes remaining. -- -- Undefined behaviour if given a negative integer. ensure# :: Int# -> ParserT st r e () -- | Assert that there are at least n# bytes remaining (CPS). -- -- Undefined behaviour if given a negative integer. withEnsure :: Int -> ParserT st r e ret -> ParserT st r e ret -- | Assert that there is at least 1 byte remaining (CPS). -- -- Undefined behaviour if given a negative integer. withEnsure1 :: ParserT st r e ret -> ParserT st r e ret -- | Assert that there are at least n# bytes remaining (CPS). -- -- Undefined behaviour if given a negative integer. withEnsure# :: Int# -> ParserT st r e ret -> ParserT st r e ret -- | isolate n p runs the parser p isolated to the next -- n bytes. All isolated bytes must be consumed. -- -- Throws a runtime error if given a negative integer. isolate :: Int -> ParserT st r e a -> ParserT st r e a -- | isolate# n# p runs the parser p isolated to the next -- n# bytes. All isolated bytes must be consumed. -- -- Throws a runtime error if given a negative integer. isolate# :: Int# -> ParserT st r e a -> ParserT st r e a -- | isolateUnsafe# i# p runs the parser p isolated to -- the next i# bytes. All isolated bytes must be consumed. -- -- Undefined behaviour if given a negative integer. isolateUnsafe# :: Int# -> ParserT st r e ret -> ParserT st r e ret -- | Skip a parser zero or more times. skipMany :: ParserT st r e a -> ParserT st r e () -- | Skip a parser one or more times. skipSome :: ParserT st r e a -> ParserT st r e () -- | The failing parser. By default, parser choice (<|>) -- arbitrarily backtracks on parser failure. This is a synonym for -- empty. failed :: ParserT st r e a -- | Convert a parsing error into failure. try :: ParserT st r e a -> ParserT st r e a -- | Throw a parsing error. By default, parser choice (<|>) -- can't backtrack on parser error. Use try to convert an error to -- a recoverable failure. err :: e -> ParserT st r e a -- | Run the parser, if an error is thrown, handle it with the given -- function. withError :: ParserT st r e b -> (e -> ParserT st r e b) -> ParserT st r e b -- | Convert a parsing failure to a success. fails :: ParserT st r e a -> ParserT st r e () -- | Convert a parsing failure to an error. cut :: ParserT st r e a -> e -> ParserT st r e a -- | Run the parser, if we get a failure, throw the given error, but if we -- get an error, merge the inner and the newly given errors using the -- e -> e -> e function. This can be useful for -- implementing parsing errors which may propagate hints or accummulate -- contextual information. cutting :: ParserT st r e a -> e -> (e -> e -> e) -> ParserT st r e a -- | Convert a parsing failure to a Maybe. If possible, use -- withOption instead. optional :: ParserT st r e a -> ParserT st r e (Maybe a) -- | Convert a parsing failure to a (). optional_ :: ParserT st r e a -> ParserT st r e () -- | CPS'd version of optional. This is usually more efficient, -- since it gets rid of the extra Maybe allocation. withOption :: ParserT st r e a -> (a -> ParserT st r e ret) -> ParserT st r e ret -> ParserT st r e ret -- | Machine integer parsers. module FlatParse.Stateful.Integers -- | Parse any Word8. anyWord8 :: ParserT st r e Word8 -- | Parse any Word16 (native byte order). anyWord16 :: ParserT st r e Word16 -- | Parse any Word32 (native byte order). anyWord32 :: ParserT st r e Word32 -- | Parse any Word64 (native byte order). anyWord64 :: ParserT st r e Word64 -- | Parse any Int8. anyInt8 :: ParserT st r e Int8 -- | Parse any Int16 (native byte order). anyInt16 :: ParserT st r e Int16 -- | Parse any Int32 (native byte order). anyInt32 :: ParserT st r e Int32 -- | Parse any Int64 (native byte order). anyInt64 :: ParserT st r e Int64 -- | Parse any Word (native size). anyWord :: ParserT st r e Word -- | Parse any Int (native size). anyInt :: ParserT st r e Int -- | Parse any Word16 (little-endian). anyWord16le :: ParserT st r e Word16 -- | Parse any Word16 (big-endian). anyWord16be :: ParserT st r e Word16 -- | Parse any Word32 (little-endian). anyWord32le :: ParserT st r e Word32 -- | Parse any Word32 (big-endian). anyWord32be :: ParserT st r e Word32 -- | Parse any Word64 (little-endian). anyWord64le :: ParserT st r e Word64 -- | Parse any Word64 (big-endian). anyWord64be :: ParserT st r e Word64 -- | Parse any Int16 (little-endian). anyInt16le :: ParserT st r e Int16 -- | Parse any Int16 (big-endian). anyInt16be :: ParserT st r e Int16 -- | Parse any Int32 (little-endian). anyInt32le :: ParserT st r e Int32 -- | Parse any Int32 (big-endian). anyInt32be :: ParserT st r e Int32 -- | Parse any Int64 (little-endian). anyInt64le :: ParserT st r e Int64 -- | Parse any Int64 (big-endian). anyInt64be :: ParserT st r e Int64 -- | Read the next 1 byte and assert its value as a Word8. word8 :: Word8 -> ParserT st r e () -- | Parse any Word8 (CPS). withAnyWord8 :: (Word8 -> ParserT st r e ret) -> ParserT st r e ret -- | Parse any Word16 (native byte order) (CPS). withAnyWord16 :: (Word16 -> ParserT st r e ret) -> ParserT st r e ret -- | Parse any Word32 (native byte order) (CPS). withAnyWord32 :: (Word32 -> ParserT st r e ret) -> ParserT st r e ret -- | Parse any Word64 (native byte order) (CPS). withAnyWord64 :: (Word64 -> ParserT st r e ret) -> ParserT st r e ret -- | Parse any Int8 (CPS). withAnyInt8 :: (Int8 -> ParserT st r e ret) -> ParserT st r e ret -- | Parse any Int16 (native byte order) (CPS). withAnyInt16 :: (Int16 -> ParserT st r e ret) -> ParserT st r e ret -- | Parse any Int32 (native byte order) (CPS). withAnyInt32 :: (Int32 -> ParserT st r e ret) -> ParserT st r e ret -- | Parse any Int64 (native byte order) (CPS). withAnyInt64 :: (Int64 -> ParserT st r e ret) -> ParserT st r e ret -- | Parse any Word (native size) (CPS). withAnyWord :: (Word -> ParserT st r e ret) -> ParserT st r e ret -- | Parse any Int (native size) (CPS). withAnyInt :: (Int -> ParserT st r e ret) -> ParserT st r e ret -- | Unsafely parse any Word8, without asserting the input is -- non-empty. -- -- The caller must guarantee that the input has enough bytes. anyWord8Unsafe :: ParserT st r e Word8 -- | Unsafely read the next 1 byte and assert its value as a Word8. -- -- The caller must guarantee that the input has enough bytes. word8Unsafe :: Word8 -> ParserT st r e () -- | Unsafely read the next 2 bytes and assert their value as a -- Word16 (native byte order). -- -- The caller must guarantee that the input has enough bytes. word16Unsafe :: Word16 -> ParserT st r e () -- | Unsafely read the next 4 bytes and assert their value as a -- Word32. (native byte order). -- -- The caller must guarantee that the input has enough bytes. word32Unsafe :: Word32 -> ParserT st r e () -- | Unsafely read the next 8 bytes and assert their value as a -- Word64. (native byte order). -- -- The caller must guarantee that the input has enough bytes. word64Unsafe :: Word64 -> ParserT st r e () -- | Helper for defining CPS parsers for types of a constant byte size -- (i.e. machine integers). -- -- Call this with an indexXYZOffAddr primop (e.g. -- indexWord8OffAddr) and the size in bytes of the type you're -- parsing. withAnySized# :: Int# -> (Addr# -> Int# -> a) -> (a -> ParserT st r e ret) -> ParserT st r e ret -- | Unsafe helper for defining CPS parsers for types of a constant byte -- size (i.e. machine integers). -- -- Is really just syntactic sugar for applying the given parser and -- shifting the buffer along. -- -- The caller must guarantee that the input has enough bytes. withAnySizedUnsafe# :: Int# -> (Addr# -> Int# -> a) -> (a -> ParserT st r e ret) -> ParserT st r e ret -- | Unsafe helper for defining parsers for types of a constant byte size -- (i.e. machine integers) which assert the parsed value's... value. -- -- Call this with an indexXYZOffAddr primop (e.g. -- indexWord8OffAddr), the size in bytes of the type you're -- parsing, and the expected value to test the parsed value against. -- -- The caller must guarantee that the input has enough bytes. sizedUnsafe# :: Eq a => Int# -> (Addr# -> Int# -> a) -> a -> ParserT st r e () -- | Bytestring parsers. -- -- Module dependency complications prevent us from placing these in -- FlatParse.Stateful.Base. module FlatParse.Stateful.Bytes -- | Read a sequence of bytes. This is a template function, you can use it -- as $(bytes [3, 4, 5]), for example, and the splice has type -- Parser e (). For a non-TH variant see byteString. bytes :: [Word] -> Q Exp -- | Template function, creates a Parser e () which unsafely -- parses a given sequence of bytes. -- -- The caller must guarantee that the input has enough bytes. bytesUnsafe :: [Word] -> Q Exp -- | Unsafe, highly dangerous parsing primitives using Addr#. -- -- Ensure to read the documentation before using any definitions from -- this module. -- -- This module exports primitives useful for efficiently parsing binary -- files that store data using an internal index. -- -- Often, such indices describes records using a starting offset and a -- length. Offsets are often relative to the file start, or some dynamic -- address in the file. This way, individual records can be read out -- efficiently (much faster than opening lots of small files!). -- -- We may parse these in-place efficiently by adding record offsets to a -- base memory address somewhere in the input. This is also extremely -- unsafe, and easy to get catastrophically wrong. Thus, we provide as -- much utility as reasonable to enable performing such parsing safely. -- (That means CPS functions.) -- -- Note that all definitions here should be considered unsafe. Any -- Int# is not checked for positivity. You must perform any -- necessary checks when you obtain your offsets and lengths as -- Int#. Failure to do so may result in undefined behaviour. module FlatParse.Stateful.Addr -- | Run a parser, passing it the current address the parser is at. -- -- Useful for parsing offset-based data tables. For example, you may use -- this to save the base address to use together with various relative -- offsets. withAddr# :: (Addr# -> ParserT st r e a) -> ParserT st r e a -- | takeOffAddr# addr# offset# len# moves to addr#, -- skips offset# bytes, reads len# bytes into a -- ByteString, and restores the original address. -- -- The Addr# should be from withAddr#. -- -- Useful for parsing offset-based data tables. Ex: Your file contains an -- index storing (OFFSET, LENGTH) entries where the offset is -- the byte position in the file. Begin with withAddr# $ -- tableBase# -> ..., then read each entry like -- takeOffAddr# tableBase# OFFSET LENGTH. -- -- Fails if you attempt to read outside the input. -- -- Undefined behaviour if offset# or len# is negative. -- -- Name adopted from the similar-ish indexXOffAddr# primops. takeOffAddr# :: Addr# -> Int# -> Int# -> ParserT st r e ByteString -- | withOffAddr# addr# offset# p moves to addr#, skips -- offset# bytes, then runs the given parser p. -- -- The Addr# should be from withAddr#. -- -- Fails if you attempt to read outside the input. -- -- Undefined behaviour if offset# is negative. -- -- Name adopted from the similar-ish indexXOffAddr# primops. withOffAddr# :: Addr# -> Int# -> ParserT st r e a -> ParserT st r e a -- | lookahead, but specify the address to lookahead from. -- -- The Addr# should be from withAddr#. lookaheadFromAddr# :: Addr# -> ParserT st r e a -> ParserT st r e a -- | Run a parser at the given address. -- -- The Addr# should be from withAddr#. -- -- This is a highly internal function -- you likely want -- lookaheadFromAddr#, which will reset the address after running -- the parser. atAddr# :: Addr# -> ParserT st r e a -> ParserT st r e a -- | Efficient literal branching using Template Haskell. module FlatParse.Stateful.Switch -- | This is a template function which makes it possible to branch on a -- collection of string literals in an efficient way. By using -- switch, such branching is compiled to a trie of primitive -- parsing operations, which has optimized control flow, vectorized reads -- and grouped checking for needed input bytes. -- -- The syntax is slightly magical, it overloads the usual case -- expression. An example: -- --
--   $(switch [| case _ of
--       "foo" -> pure True
--       "bar" -> pure False |])
--   
-- -- The underscore is mandatory in case _ of. Each branch must be -- a string literal, but optionally we may have a default case, like in -- --
--   $(switch [| case _ of
--       "foo" -> pure 10
--       "bar" -> pure 20
--       _     -> pure 30 |])
--   
-- -- All case right hand sides must be parsers with the same type. That -- type is also the type of the whole switch expression. -- -- A switch has longest match semantics, and the order of cases -- does not matter, except for the default case, which may only appear as -- the last case. -- -- If a switch does not have a default case, and no case matches -- the input, then it returns with failure, without having consumed any -- input. A fallthrough to the default case also does not consume any -- input. switch :: Q Exp -> Q Exp -- | Switch expression with an optional first argument for performing a -- post-processing action after every successful branch matching. For -- example, if we have ws :: ParserT st r e () for a whitespace -- parser, we might want to consume whitespace after matching on any of -- the switch cases. For that case, we can define a "lexeme" version of -- switch as follows. -- --
--   switch' :: Q Exp -> Q Exp
--   switch' = switchWithPost (Just [| ws |])
--   
-- -- Note that this switch' function cannot be used in the same -- module it's defined in, because of the stage restriction of Template -- Haskell. switchWithPost :: Maybe (Q Exp) -> Q Exp -> Q Exp -- | Version of switchWithPost without syntactic sugar. The second -- argument is the list of cases, the third is the default case. rawSwitchWithPost :: Maybe (Q Exp) -> [(String, Q Exp)] -> Maybe (Q Exp) -> Q Exp -- | Parsers and textual data (UTF-8, ASCII). module FlatParse.Stateful.Text -- | Parse a UTF-8 character literal. This is a template function, you can -- use it as $(char 'x'), for example, and the splice in this -- case has type Parser e (). char :: Char -> Q Exp -- | Parse a UTF-8 string literal. This is a template function, you can use -- it as $(string "foo"), for example, and the splice has type -- Parser e (). string :: String -> Q Exp -- | Parse any single Unicode character encoded using UTF-8 as a -- Char. anyChar :: ParserT st r e Char -- | Skip any single Unicode character encoded using UTF-8. skipAnyChar :: ParserT st r e () -- | Parse a UTF-8 Char for which a predicate holds. satisfy :: (Char -> Bool) -> ParserT st r e Char -- | Skip a UTF-8 Char for which a predicate holds. skipSatisfy :: (Char -> Bool) -> ParserT st r e () -- | This is a variant of satisfy which allows more optimization. We -- can pick four testing functions for the four cases for the possible -- number of bytes in the UTF-8 character. So in fusedSatisfy f1 f2 -- f3 f4, if we read a one-byte character, the result is scrutinized -- with f1, for two-bytes, with f2, and so on. This can -- result in dramatic lexing speedups. -- -- For example, if we want to accept any letter, the naive solution would -- be to use isLetter, but this accesses a large lookup table of -- Unicode character classes. We can do better with fusedSatisfy -- isLatinLetter isLetter isLetter isLetter, since here the -- isLatinLetter is inlined into the UTF-8 decoding, and it -- probably handles a great majority of all cases without accessing the -- character table. fusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> ParserT st r e Char -- | Skipping variant of fusedSatisfy. skipFusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> ParserT st r e () -- | Parse the rest of the current line as a String. Assumes UTF-8 -- encoding, throws an error if the encoding is invalid. takeLine :: ParserT st r e String -- | Take the rest of the input as a String. Assumes UTF-8 encoding. takeRestString :: ParserT st r e String -- | Parse any single ASCII character (a single byte) as a Char. -- -- More efficient than anyChar for ASCII-only input. anyAsciiChar :: ParserT st r e Char -- | Skip any single ASCII character (a single byte). -- -- More efficient than anyChar_ for ASCII-only input. skipAnyAsciiChar :: ParserT st r e () -- | Parse an ASCII Char for which a predicate holds. -- -- Assumption: the predicate must only return True for ASCII-range -- characters. Otherwise this function might read a 128-255 range byte, -- thereby breaking UTF-8 decoding. satisfyAscii :: (Char -> Bool) -> ParserT st r e Char -- | Skip an ASCII Char for which a predicate holds. Assumption: the -- predicate must only return True for ASCII-range characters. skipSatisfyAscii :: (Char -> Bool) -> ParserT st r e () -- | Parse a non-empty ASCII decimal digit sequence as a Word. Fails -- on overflow. anyAsciiDecimalWord :: ParserT st r e Word -- | Parse a non-empty ASCII decimal digit sequence as a positive -- Int. Fails on overflow. anyAsciiDecimalInt :: ParserT st r e Int -- | Parse a non-empty ASCII decimal digit sequence as a positive -- Integer. anyAsciiDecimalInteger :: ParserT st r e Integer -- | Parse a non-empty, case-insensitive ASCII hexadecimal digit sequence -- as a Word. Fails on overflow. anyAsciiHexWord :: ParserT st r e Word -- | Parse a non-empty, case-insensitive ASCII hexadecimal digit sequence -- as a positive Int. Fails on overflow. anyAsciiHexInt :: ParserT st r e Int -- | Parse the rest of the current line as a String, but restore the -- parsing state. Assumes UTF-8 encoding. This can be used for debugging. traceLine :: ParserT st r e String -- | Get the rest of the input as a String, but restore the parsing -- state. Assumes UTF-8 encoding. This can be used for debugging. traceRest :: ParserT st r e String -- | Parser supporting a custom reader environment, custom error types and -- an Int state. A common use case of the Int state is to -- keep track of column numbers to implement indentation-sensitive -- parsers. module FlatParse.Stateful -- | ParserT st r e a is a parser with a state token type -- st, a reader environment r, an error type e -- and a return type a. The different state token types support -- different embedded effects; see Parser, ParserIO and -- ParserST below. newtype ParserT (st :: ZeroBitType) r e a ParserT :: (ForeignPtrContents -> r -> Addr# -> Addr# -> Int# -> st -> Res# st e a) -> ParserT (st :: ZeroBitType) r e a [runParserT#] :: ParserT (st :: ZeroBitType) r e a -> ForeignPtrContents -> r -> Addr# -> Addr# -> Int# -> st -> Res# st e a -- | The type of pure parsers. type Parser = ParserT PureMode -- | The type of parsers which can embed IO actions. type ParserIO = ParserT IOMode -- | The type of parsers which can embed ST actions. type ParserST s = ParserT (STMode s) -- | Higher-level boxed data type for parsing results. data Result e a -- | Contains return value, last Int state, unconsumed input. OK :: a -> Int -> !ByteString -> Result e a -- | Recoverable-by-default failure. Fail :: Result e a -- | Unrecoverble-by-default error. Err :: !e -> Result e a -- | Run a pure parser. The Int argument is the initial state. runParser :: Parser r e a -> r -> Int -> ByteString -> Result e a -- | Run a parser on a String, converting it to the corresponding -- UTF-8 bytes. The Int argument is the initial state. -- -- Reminder: OverloadedStrings for ByteString does not -- yield a valid UTF-8 encoding! For non-ASCII ByteString literal -- input, use this wrapper or convert your input using -- strToUtf8. runParserUtf8 :: Parser r e a -> r -> Int -> String -> Result e a -- | Run an IO-based parser. The Int argument is the initial -- state. runParserIO :: ParserIO r e a -> r -> Int -> ByteString -> IO (Result e a) -- | Run an ST-based parser. The Int argument is the initial -- state. runParserST :: ParserST s r e a -> r -> Int -> ByteString -> ST s (Result e a) -- | Run a ParserST inside a pure parser. embedParserST :: forall r e a. (forall s. ParserST s r e a) -> Parser r e a -- | Primitive parser result wrapped with a state token. -- -- You should rarely need to manipulate values of this type directly. Use -- the provided bidirectional pattern synonyms OK#, Fail# -- and Err#. type Res# (st :: ZeroBitType) e a = (# st, ResI# e a #) -- | Res# constructor for a successful parse. Contains the return -- value, a pointer to the rest of the input buffer, and the next -- Int state, plus a state token. pattern OK# :: (st :: ZeroBitType) -> a -> Addr# -> Int# -> Res# st e a -- | Res# constructor for errors which are by default -- non-recoverable. Contains the error, plus a state token. pattern Err# :: (st :: ZeroBitType) -> e -> Res# st e a -- | Res# constructor for recoverable failure. Contains only a state -- token. pattern Fail# :: (st :: ZeroBitType) -> Res# st e a -- | Primitive parser result. type ResI# e a = (# (# a, Addr#, Int# #) | (# #) | (# e #) #) -- | Run an ST action in a ParserST. liftST :: ST s a -> ParserST s r e a -- | Query the environment. ask :: ParserT st r e r -- | Run a parser in a modified environment. local :: (r -> r) -> ParserT st r e a -> ParserT st r e a -- | Query the Int state. get :: ParserT st r e Int -- | Write the Int state. put :: Int -> ParserT st r e () -- | Modify the Int state. modify :: (Int -> Int) -> ParserT st r e () -- | Convert an UTF8-encoded String to a ByteString. strToUtf8 :: String -> ByteString -- | Convert a ByteString to an UTF8-encoded String. utf8ToStr :: ByteString -> String -- |
--   isDigit c = '0' <= c && c <= '9'
--   
isDigit :: Char -> Bool -- |
--   isLatinLetter c = ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')
--   
isLatinLetter :: Char -> Bool -- |
--   isGreekLetter c = ('Α' <= c && c <= 'Ω') || ('α' <= c && c <= 'ω')
--   
isGreekLetter :: Char -> Bool -- | Succeed if the input is empty. eof :: ParserT st r e () -- | Read n bytes as a ByteString. Fails if fewer than -- n bytes are available. -- -- Throws a runtime error if given a negative integer. -- -- This does no copying. The ByteString returned is a "slice" of -- the input, and will keep it alive. To avoid this, use copy on -- the output. take :: Int -> ParserT st r e ByteString -- | Read n# bytes as a ByteString. Fails if fewer than -- n# bytes are available. -- -- Throws a runtime error if given a negative integer. -- -- This does no copying. The ByteString returned is a "slice" of -- the input, and will keep it alive. To avoid this, use copy on -- the output. take# :: Int# -> ParserT st r e ByteString -- | Read i# bytes as a ByteString. Fails if newer than -- i# bytes are available. -- -- Undefined behaviour if given a negative integer. -- -- This does no copying. The ByteString returned is a "slice" of -- the input, and will keep it alive. To avoid this, use copy on -- the output. takeUnsafe# :: Int# -> ParserT st r e ByteString -- | Consume the rest of the input. May return the empty bytestring. -- -- This does no copying. The ByteString returned is a "slice" of -- the input, and will keep it alive. To avoid this, use copy on -- the output. takeRest :: ParserT st r e ByteString -- | Skip forward n bytes. Fails if fewer than n bytes -- are available. -- -- Throws a runtime error if given a negative integer. skip :: Int -> ParserT st r e () -- | Skip forward n bytes. Fails if fewer than n bytes -- are available. -- -- Throws a runtime error if given a negative integer. skip# :: Int# -> ParserT st r e () -- | Go back i bytes in the input. Takes a positive integer. -- -- Extremely unsafe. Makes no checks. Almost certainly a Bad Idea. skipBack :: Int -> ParserT st r e () -- | Go back i# bytes in the input. Takes a positive integer. -- -- Extremely unsafe. Makes no checks. Almost certainly a Bad Idea. skipBack# :: Int# -> ParserT st r e () -- | Skip forward n# bytes and run the given parser. Fails if -- fewer than n# bytes are available. -- -- Throws a runtime error if given a negative integer. atSkip# :: Int# -> ParserT st r e ret -> ParserT st r e ret -- | Skip forward i# bytes and run the given parser. Fails if -- fewer than i bytes are available. -- -- Undefined behaviour if given a negative integer. atSkipUnsafe# :: Int# -> ParserT st r e ret -> ParserT st r e ret -- | Read a sequence of bytes. This is a template function, you can use it -- as $(bytes [3, 4, 5]), for example, and the splice has type -- Parser e (). For a non-TH variant see byteString. bytes :: [Word] -> Q Exp -- | Template function, creates a Parser e () which unsafely -- parses a given sequence of bytes. -- -- The caller must guarantee that the input has enough bytes. bytesUnsafe :: [Word] -> Q Exp -- | Parse a given ByteString. If the bytestring is statically -- known, consider using bytes instead. byteString :: ByteString -> ParserT st r e () -- | Read a null-terminated bytestring (a C-style string). -- -- Consumes the null terminator. anyCString :: ParserT st r e ByteString -- | Read a protobuf-style varint into a positive Int. -- -- protobuf-style varints are byte-aligned. For each byte, the lower 7 -- bits are data and the MSB indicates if there are further bytes. Once -- fully parsed, the 7-bit payloads are concatenated and interpreted as a -- little-endian unsigned integer. -- -- Fails if the varint exceeds the positive Int range. -- -- Really, these are varnats. They also match with the LEB128 varint -- encoding. -- -- protobuf encodes negatives in unsigned integers using zigzag encoding. -- See the fromZigzag family of functions for this -- functionality. -- -- Further reading: -- https://developers.google.com/protocol-buffers/docs/encoding#varints anyVarintProtobuf :: ParserT st r e Int -- | Choose between two parsers. If the first parser fails, try the second -- one, but if the first one throws an error, propagate the error. This -- operation can arbitrarily backtrack. -- -- Note: this exported operator has different fixity than the same -- operator in Applicative. Hide this operator if you want to use -- the Alternative version. (<|>) :: ParserT st r e a -> ParserT st r e a -> ParserT st r e a infixr 6 <|> -- | Branch on a parser: if the first argument succeeds, continue with the -- second, else with the third. This can produce slightly more efficient -- code than (<|>). Moreover, ḃranch does not -- backtrack from the true/false cases. branch :: ParserT st r e a -> ParserT st r e b -> ParserT st r e b -> ParserT st r e b -- | Succeed if the first parser succeeds and the second one fails. notFollowedBy :: ParserT st r e a -> ParserT st r e b -> ParserT st r e a -- | An analogue of the list foldl function: first parse a -- b, then parse zero or more a-s, and combine the -- results in a left-nested way by the b -> a -> b -- function. Note: this is not the usual chainl function from the -- parsec libraries! chainl :: (b -> a -> b) -> ParserT st r e b -> ParserT st r e a -> ParserT st r e b -- | An analogue of the list foldr function: parse zero or more -- a-s, terminated by a b, and combine the results in a -- right-nested way using the a -> b -> b function. Note: -- this is not the usual chainr function from the parsec -- libraries! chainr :: (a -> b -> b) -> ParserT st r e a -> ParserT st r e b -> ParserT st r e b -- | Save the parsing state, then run a parser, then restore the state. lookahead :: ParserT st r e a -> ParserT st r e a -- | Assert that there are at least n bytes remaining. -- -- Undefined behaviour if given a negative integer. ensure :: Int -> ParserT st r e () -- | Assert that there are at least n# bytes remaining. -- -- Undefined behaviour if given a negative integer. ensure# :: Int# -> ParserT st r e () -- | Assert that there are at least n# bytes remaining (CPS). -- -- Undefined behaviour if given a negative integer. withEnsure :: Int -> ParserT st r e ret -> ParserT st r e ret -- | Assert that there is at least 1 byte remaining (CPS). -- -- Undefined behaviour if given a negative integer. withEnsure1 :: ParserT st r e ret -> ParserT st r e ret -- | Assert that there are at least n# bytes remaining (CPS). -- -- Undefined behaviour if given a negative integer. withEnsure# :: Int# -> ParserT st r e ret -> ParserT st r e ret -- | isolate n p runs the parser p isolated to the next -- n bytes. All isolated bytes must be consumed. -- -- Throws a runtime error if given a negative integer. isolate :: Int -> ParserT st r e a -> ParserT st r e a -- | Isolate the given parser up to (excluding) the next null byte. -- -- Like isolate, all isolated bytes must be consumed. The null -- byte is consumed afterwards. -- -- Useful for defining parsers for null-terminated data. isolateToNextNull :: ParserT st r e a -> ParserT st r e a -- | isolate# n# p runs the parser p isolated to the next -- n# bytes. All isolated bytes must be consumed. -- -- Throws a runtime error if given a negative integer. isolate# :: Int# -> ParserT st r e a -> ParserT st r e a -- | isolateUnsafe# i# p runs the parser p isolated to -- the next i# bytes. All isolated bytes must be consumed. -- -- Undefined behaviour if given a negative integer. isolateUnsafe# :: Int# -> ParserT st r e ret -> ParserT st r e ret -- | This is a template function which makes it possible to branch on a -- collection of string literals in an efficient way. By using -- switch, such branching is compiled to a trie of primitive -- parsing operations, which has optimized control flow, vectorized reads -- and grouped checking for needed input bytes. -- -- The syntax is slightly magical, it overloads the usual case -- expression. An example: -- --
--   $(switch [| case _ of
--       "foo" -> pure True
--       "bar" -> pure False |])
--   
-- -- The underscore is mandatory in case _ of. Each branch must be -- a string literal, but optionally we may have a default case, like in -- --
--   $(switch [| case _ of
--       "foo" -> pure 10
--       "bar" -> pure 20
--       _     -> pure 30 |])
--   
-- -- All case right hand sides must be parsers with the same type. That -- type is also the type of the whole switch expression. -- -- A switch has longest match semantics, and the order of cases -- does not matter, except for the default case, which may only appear as -- the last case. -- -- If a switch does not have a default case, and no case matches -- the input, then it returns with failure, without having consumed any -- input. A fallthrough to the default case also does not consume any -- input. switch :: Q Exp -> Q Exp -- | Switch expression with an optional first argument for performing a -- post-processing action after every successful branch matching. For -- example, if we have ws :: ParserT st r e () for a whitespace -- parser, we might want to consume whitespace after matching on any of -- the switch cases. For that case, we can define a "lexeme" version of -- switch as follows. -- --
--   switch' :: Q Exp -> Q Exp
--   switch' = switchWithPost (Just [| ws |])
--   
-- -- Note that this switch' function cannot be used in the same -- module it's defined in, because of the stage restriction of Template -- Haskell. switchWithPost :: Maybe (Q Exp) -> Q Exp -> Q Exp -- | Version of switchWithPost without syntactic sugar. The second -- argument is the list of cases, the third is the default case. rawSwitchWithPost :: Maybe (Q Exp) -> [(String, Q Exp)] -> Maybe (Q Exp) -> Q Exp -- | Zero or more. many :: Alternative f => f a -> f [a] -- | Skip a parser zero or more times. skipMany :: ParserT st r e a -> ParserT st r e () -- | One or more. some :: Alternative f => f a -> f [a] -- | Skip a parser one or more times. skipSome :: ParserT st r e a -> ParserT st r e () -- | The identity of <|> empty :: Alternative f => f a -- | The failing parser. By default, parser choice (<|>) -- arbitrarily backtracks on parser failure. This is a synonym for -- empty. failed :: ParserT st r e a -- | Convert a parsing error into failure. try :: ParserT st r e a -> ParserT st r e a -- | Throw a parsing error. By default, parser choice (<|>) -- can't backtrack on parser error. Use try to convert an error to -- a recoverable failure. err :: e -> ParserT st r e a -- | Run the parser, if an error is thrown, handle it with the given -- function. withError :: ParserT st r e b -> (e -> ParserT st r e b) -> ParserT st r e b -- | Convert a parsing failure to a success. fails :: ParserT st r e a -> ParserT st r e () -- | Convert a parsing failure to an error. cut :: ParserT st r e a -> e -> ParserT st r e a -- | Run the parser, if we get a failure, throw the given error, but if we -- get an error, merge the inner and the newly given errors using the -- e -> e -> e function. This can be useful for -- implementing parsing errors which may propagate hints or accummulate -- contextual information. cutting :: ParserT st r e a -> e -> (e -> e -> e) -> ParserT st r e a -- | Convert a parsing failure to a Maybe. If possible, use -- withOption instead. optional :: ParserT st r e a -> ParserT st r e (Maybe a) -- | Convert a parsing failure to a (). optional_ :: ParserT st r e a -> ParserT st r e () -- | CPS'd version of optional. This is usually more efficient, -- since it gets rid of the extra Maybe allocation. withOption :: ParserT st r e a -> (a -> ParserT st r e ret) -> ParserT st r e ret -> ParserT st r e ret -- | Byte offset counted backwards from the end of the buffer. Note: the -- Ord instance for Pos considers the earlier positions to -- be smaller. newtype Pos Pos :: Int -> Pos [unPos] :: Pos -> Int -- | The end of the input. endPos :: Pos -- | Very unsafe conversion between a primitive address and a position. The -- first argument points to the end of the buffer, the second argument is -- being converted. addrToPos# :: Addr# -> Addr# -> Pos -- | Very unsafe conversion between a primitive address and a position. The -- first argument points to the end of the buffer. posToAddr# :: Addr# -> Pos -> Addr# -- | A pair of positions. data Span Span :: !Pos -> !Pos -> Span -- | Slice into a ByteString using a Span. The result is -- invalid if the Span is not a valid slice of the first argument. unsafeSlice :: ByteString -> Span -> ByteString -- | Get the current position in the input. getPos :: ParserT st r e Pos -- | Set the input position. -- -- Warning: this can result in crashes if the position points outside the -- current buffer. It is always safe to setPos values which came -- from getPos with the current input. setPos :: Pos -> ParserT st r e () -- | Return the consumed span of a parser. Use withSpan if possible -- for better efficiency. spanOf :: ParserT st r e a -> ParserT st r e Span -- | Bind the result together with the span of the result. CPS'd version of -- spanOf for better unboxing. withSpan :: ParserT st r e a -> (a -> Span -> ParserT st r e b) -> ParserT st r e b -- | Return the ByteString consumed by a parser. Note: it's more -- efficient to use spanOf and withSpan instead. byteStringOf :: ParserT st r e a -> ParserT st r e ByteString -- | CPS'd version of byteStringOf. Can be more efficient, because -- the result is more eagerly unboxed by GHC. It's more efficient to use -- spanOf or withSpan instead. withByteString :: ParserT st r e a -> (a -> ByteString -> ParserT st r e b) -> ParserT st r e b -- | Run a parser in a given input Span. -- -- The input position and the parser state is restored after the parser -- is finished, so inSpan does not consume input and has no side -- effect. -- -- Warning: this operation may crash if the given span points outside the -- current parsing buffer. It's always safe to use inSpan if the -- Span comes from a previous withSpan or spanOf -- call on the current input. inSpan :: Span -> ParserT st r e a -> ParserT st r e a -- | Check whether a Pos points into a ByteString. validPos :: ByteString -> Pos -> Bool -- | Compute corresponding line and column numbers for each Pos in a -- list, assuming UTF8 encoding. Throw an error on invalid positions. -- Note: computing lines and columns may traverse the ByteString, -- but it traverses it only once regardless of the length of the position -- list. posLineCols :: ByteString -> [Pos] -> [(Int, Int)] -- | Create a Pos from a line and column number. Throws an error on -- out-of-bounds line and column numbers. mkPos :: ByteString -> (Int, Int) -> Pos -- | Parse a UTF-8 character literal. This is a template function, you can -- use it as $(char 'x'), for example, and the splice in this -- case has type Parser e (). char :: Char -> Q Exp -- | Parse a UTF-8 string literal. This is a template function, you can use -- it as $(string "foo"), for example, and the splice has type -- Parser e (). string :: String -> Q Exp -- | Parse any single Unicode character encoded using UTF-8 as a -- Char. anyChar :: ParserT st r e Char -- | Skip any single Unicode character encoded using UTF-8. skipAnyChar :: ParserT st r e () -- | Parse a UTF-8 Char for which a predicate holds. satisfy :: (Char -> Bool) -> ParserT st r e Char -- | Skip a UTF-8 Char for which a predicate holds. skipSatisfy :: (Char -> Bool) -> ParserT st r e () -- | This is a variant of satisfy which allows more optimization. We -- can pick four testing functions for the four cases for the possible -- number of bytes in the UTF-8 character. So in fusedSatisfy f1 f2 -- f3 f4, if we read a one-byte character, the result is scrutinized -- with f1, for two-bytes, with f2, and so on. This can -- result in dramatic lexing speedups. -- -- For example, if we want to accept any letter, the naive solution would -- be to use isLetter, but this accesses a large lookup table of -- Unicode character classes. We can do better with fusedSatisfy -- isLatinLetter isLetter isLetter isLetter, since here the -- isLatinLetter is inlined into the UTF-8 decoding, and it -- probably handles a great majority of all cases without accessing the -- character table. fusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> ParserT st r e Char -- | Skipping variant of fusedSatisfy. skipFusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> ParserT st r e () -- | Parse the rest of the current line as a String. Assumes UTF-8 -- encoding, throws an error if the encoding is invalid. takeLine :: ParserT st r e String -- | Take the rest of the input as a String. Assumes UTF-8 encoding. takeRestString :: ParserT st r e String -- | Break an UTF-8-coded ByteString to lines. Throws an error on -- invalid input. This is mostly useful for grabbing specific source -- lines for displaying error messages. linesUtf8 :: ByteString -> [String] -- | Parse any single ASCII character (a single byte) as a Char. -- -- More efficient than anyChar for ASCII-only input. anyAsciiChar :: ParserT st r e Char -- | Skip any single ASCII character (a single byte). -- -- More efficient than anyChar_ for ASCII-only input. skipAnyAsciiChar :: ParserT st r e () -- | Parse an ASCII Char for which a predicate holds. -- -- Assumption: the predicate must only return True for ASCII-range -- characters. Otherwise this function might read a 128-255 range byte, -- thereby breaking UTF-8 decoding. satisfyAscii :: (Char -> Bool) -> ParserT st r e Char -- | Skip an ASCII Char for which a predicate holds. Assumption: the -- predicate must only return True for ASCII-range characters. skipSatisfyAscii :: (Char -> Bool) -> ParserT st r e () -- | Parse a non-empty ASCII decimal digit sequence as a Word. Fails -- on overflow. anyAsciiDecimalWord :: ParserT st r e Word -- | Parse a non-empty ASCII decimal digit sequence as a positive -- Int. Fails on overflow. anyAsciiDecimalInt :: ParserT st r e Int -- | Parse a non-empty ASCII decimal digit sequence as a positive -- Integer. anyAsciiDecimalInteger :: ParserT st r e Integer -- | Parse a non-empty, case-insensitive ASCII hexadecimal digit sequence -- as a Word. Fails on overflow. anyAsciiHexWord :: ParserT st r e Word -- | Parse a non-empty, case-insensitive ASCII hexadecimal digit sequence -- as a positive Int. Fails on overflow. anyAsciiHexInt :: ParserT st r e Int -- | Parse the rest of the current line as a String, but restore the -- parsing state. Assumes UTF-8 encoding. This can be used for debugging. traceLine :: ParserT st r e String -- | Get the rest of the input as a String, but restore the parsing -- state. Assumes UTF-8 encoding. This can be used for debugging. traceRest :: ParserT st r e String -- | Create a ByteString from a Span. -- -- The result is invalid if the Span points outside the current -- buffer, or if the Span start is greater than the end position. unsafeSpanToByteString :: Span -> ParserT st r e ByteString -- | Embed an IO action in a ParserT. This is slightly safer than -- unsafePerformIO because it will sequenced correctly with -- respect to the surrounding actions, and its execution is guaranteed. unsafeLiftIO :: IO a -> ParserT st r e a -- | Read a null-terminated bytestring (a C-style string), where the -- bytestring is known to be null-terminated somewhere in the input. -- -- Highly unsafe. Unless you have a guarantee that the string will be -- null terminated before the input ends, use anyCString instead. -- Honestly, I'm not sure if this is a good function to define. But here -- it is. -- -- Fails on GHC versions older than 9.0, since we make use of the -- cstringLength# primop introduced in GHC 9.0, and we aren't very -- useful without it. -- -- Consumes the null terminator. anyCStringUnsafe :: ParserT st r e ByteString instance (GHC.Show.Show a, GHC.Show.Show e) => GHC.Show.Show (FlatParse.Stateful.Result e a) instance GHC.Base.Functor (FlatParse.Stateful.Result e)