-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Binary parsing with random access. -- -- Binary parsing with random access. The target file to be parsed is -- loaded into memory at the start (represented as an IOUArray Int -- Word8). Parsing proceeds by moving a cursor around, the array is left -- intact. This allows _jumping_ inside the file and contrasts with other -- parser combinators that progress via consuming input. -- -- * Caveat * - the above of course means that the target file is obliged -- to be small enough to fit into memory. -- -- ** MAJOR CAVEAT ** - kangaroo is somewhat half-baked (maybe now two -- thirds baked). The parsing machinery seems good but the combinators -- need more work. It's on Hackage because I'm using it with Hurdle which -- was already on Hackage and the techinique of moving a cursor rather -- than consuming input seems at least interesting. -- -- Currently kangaroo is twinned with its own library of formatting -- combinators (JoinPrint), at some point this is may go in its a -- separate package. -- -- Changelog: -- --
    --
  1. 4.0 Changed JoinPrint to have distinct types for single-line -- documents (Doc) and multi-line documents (VDoc). This is because -- single-line docs track their horizontal length.
  2. --
-- -- Added a skip primitive to the Parse Monad. Added local -- and asks to the Reader top-level modules. -- --
    --
  1. 3.0 Documented the primitive parsers. char renamed -- anyChar to match Parsec's convention. Rationalized exports -- from ParseMonad module.
  2. --
  3. 2.0 Changes to ParseMonad - parsing within a region simplified, -- temporarily added JoinPrint.
  4. --
  5. 1.0 First version.
  6. --
@package kangaroo @version 0.4.0 -- | Printing with join-strings. -- -- Note - JoinPrint is just a formatter and not a 'pretty printer'. No -- line fitting takes place - lines are printed exactly as they are -- specified. module Text.PrettyPrint.JoinPrint -- | Doc is the abstract data type respresenting single line documents. -- -- JoinPrint ditinguishes between single-line and multi-line documents. -- Single-line, horizontal documents support some operations not -- multi-line documents, e.g. padding, see padl and padr -- and truncating truncl and truncr. data Doc -- | VDoc is the abstract data type respresenting multi-line documents. -- -- Multi-line documents have a limited set of operations (basically -- concatenation with or without a blank line inbetween) compared to -- single line docs which support e.g. padding and truncating. data VDoc -- | Create an empty, zero length document. empty :: Doc -- | Test if the doc is empty. null :: Doc -> Bool -- | Get the length of the Doc. -- -- Length is cached in the document's data type so this operation is -- O(1). length :: Doc -> Int -- | Horizontally concatenate two documents with no space between them. (<>) :: Doc -> Doc -> Doc -- | Horizontally concatenate two documents with a single space between -- them. (<+>) :: Doc -> Doc -> Doc -- | Horizontally concatenate a list of documents with (<>). hcat :: [Doc] -> Doc -- | Horizontally concatenate a list of documents with -- (<+>). hsep :: [Doc] -> Doc -- | Vertically concatenate a list of documents, one doc per line. -- -- Note - this function produces a VDoc rather than a Doc. vcat :: [Doc] -> VDoc -- | Vertically concatenate a list of documents, one doc per line with a -- blank line inbetween. -- -- Note - this function produces a VDoc rather than a Doc. vsep :: [Doc] -> VDoc -- | Prefix the Doc to the start of the VDoc. vcons :: Doc -> VDoc -> VDoc -- | Suffix the VDoc with the Doc. vsnoc :: VDoc -> Doc -> VDoc -- | Concatenate a list of VDoc. vconcat :: [VDoc] -> VDoc -- | Concatenate a list of VDoc with a blank line separating them. vconcatSep :: [VDoc] -> VDoc -- | Create a document from a literal string. -- -- The string should not contain tabs or newlines (though this is not -- enforced). To allow padding and truncating the horizontal width of a -- Doc is cached in the datatype, building a Doc containing tabs -- or newlines leads to unspecified behaviour. text :: String -> Doc -- | Create a document from a literal character. -- -- The char should not be a tab or newline. See text for the -- rational. char :: Char -> Doc -- | Show the Int as a Doc. -- --
--   int  = text . show
--   
int :: Int -> Doc -- | Show the Integer as a Doc. integer :: Integer -> Doc -- | Show an "integral value" as a Doc via fromIntegral. integral :: Integral a => a -> Doc -- | Show the Float as a Doc. float :: Double -> Doc -- | Show the Double as a Doc. double :: Double -> Doc -- | Create a Doc containing a single space character. sglspace :: Doc -- | Create a Doc containing a two-space characters. dblspace :: Doc -- | Create a Doc containing a comma, ",". comma :: Doc -- | Create a Doc containing a semi colon, ";". semicolon :: Doc -- | Punctuate the Doc list with the separator, producing a Doc. punctuate :: Doc -> [Doc] -> Doc -- | Enclose the final Doc within the first two. -- -- There are no spaces between the documents: -- --
--   enclose l r d = l <> d <> r
--   
enclose :: Doc -> Doc -> Doc -> Doc -- | Enclose the Doc within single quotes. squotes :: Doc -> Doc -- | Enclose the Doc within double quotes. dquotes :: Doc -> Doc -- | Enclose the Doc within parens (). parens :: Doc -> Doc -- | Enclose the Doc within square brackets []. brackets :: Doc -> Doc -- | Enclose the Doc within curly braces {}. braces :: Doc -> Doc -- | Enclose the Doc within angle brackets <>. angles :: Doc -> Doc -- | Create a Doc containing a left paren, '('. lparen :: Doc -- | Create a Doc containing a right paren, ')'. rparen :: Doc -- | Create a Doc containing a left square bracket, '['. lbracket :: Doc -- | Create a Doc containing a right square bracket, ']'. rbracket :: Doc -- | Create a Doc containing a left curly brace, '{'. lbrace :: Doc -- | Create a Doc containing a right curly brace, '}'. rbrace :: Doc -- | Create a Doc containing a left angle bracket, '<'. langle :: Doc -- | Create a Doc containing a right angle bracket, '>'. rangle :: Doc -- | replicateChar : n * ch -> Doc -- -- Repeat the supplied char (ch), n times. replicateChar :: Int -> Char -> Doc -- | Create a list of space characters of length n. spacer :: Int -> Doc -- | padl : width * ch * doc -> Doc -- -- Pad the supplied Doc to fit width using the char ch. -- Padding is performed at the left, right-justifying the Doc. -- -- If the doc is already wider than supplied width it is returned as-is -- (no truncation takes place). padl :: Int -> Char -> Doc -> Doc -- | padr : width * ch * doc -> Doc -- -- Pad the supplied Doc to fit width using the char ch. -- Padding is performed at the right, left-justifying the Doc. -- -- If the doc is already wider than supplied width it is returned as-is -- (no truncation takes place). padr :: Int -> Char -> Doc -> Doc -- | truncl : width * doc -> Doc -- -- Truncate a doc to the supplied width. Characters are dropped -- from the left until the document fits. If the document is shorter than -- the supplied width it is returned as is (no padding takes place). truncl :: Int -> Doc -> Doc -- | truncr : width * doc -> Doc -- -- Truncate a doc to the supplied width. Characters are dropped -- from the right until the document fits. If the document is shorter -- than the supplied width it is returned as is (no padding takes place). truncr :: Int -> Doc -> Doc -- | Rendering the Doc to a String. This is the same as using show. render :: Doc -> String -- | Print the Doc. -- --
--   renderIO = putStrLn . render
--   
renderIO :: Doc -> IO () -- | hex : i -> Doc -- -- Print i as hexadecimal, no zero padding. -- -- Negative numbers are printed as a string of asterisks. hex :: Integral a => a -> Doc -- | Print a Word8 as a 2-digit hex number. hex2 :: Word8 -> Doc -- | Print a Word16 as a 4-digit hex number. hex4 :: Word16 -> Doc -- | Print a Word32 as a 8-digit hex number. hex8 :: Word32 -> Doc -- | oxhex : pad-length * i -> Doc -- -- Print i in hexadecimal, padding with '0' to the supplied -- pad-length and prefixing with "0x". -- -- Negative numbers are printed as a string of asterisks. oxhex :: Integral a => Int -> a -> Doc -- | Print a Word8 as a 2-digit hex number prefixed with "0x". oxhex2 :: Word8 -> Doc -- | Print a Word16 as a 4-digit hex number prefixed with "0x". oxhex4 :: Word16 -> Doc -- | Print a Word32 as a 8-digit hex number prefixed with "0x". oxhex8 :: Word32 -> Doc hexdump :: Int -> Int -> [Word8] -> VDoc hexdumpA :: Int -> Int -> IOUArray Int Word8 -> IO VDoc -- | Kangaroo parse monad with env. module Data.ParserCombinators.KangarooReader type Kangaroo r a = GenKangaroo r a parse :: Kangaroo r a -> r -> FilePath -> IO (Either ParseErr a) runKangaroo :: Kangaroo r a -> r -> FilePath -> IO (Either ParseErr a) -- | Retrieve the environment. ask :: Kangaroo r r asks :: (r -> a) -> Kangaroo r a -- | Execute a computation in a modified environment. local :: (r -> r) -> Kangaroo r a -> Kangaroo r a type ParseErr = String -- | RegionCoda - represents three useful final positions: -- --
    --
  1. dalpunto - 'from the point' - Run the parser within a region and -- return to where you came from.
  2. --
  3. alfermata - 'to the stop' - Run the parser within a region, the -- cursor remains wherever the parse finished.
  4. --
  5. alfine - 'to the end' - Run the parser within a region and jump to -- the right-end of the region after the parse.
  6. --
data RegionCoda Dalpunto :: RegionCoda Alfermata :: RegionCoda Alfine :: RegionCoda type RegionName = String -- | Lift an IO action into the Kangaroo monad. liftIOAction :: IO a -> GenKangaroo ust a -- | Report a parse error. -- -- Source position is appended to the supplied error message reportError :: ParseErr -> GenKangaroo ust a -- | substError : parser * error_msg -> parser -- -- substError is equivalent to Parsec's <?> -- combinator. -- -- Run the supplied parser, if the parse succeeds return the result, -- otherwise override the original error message with the supplied -- error_msg. substError :: GenKangaroo ust a -> ParseErr -> GenKangaroo ust a -- | Parse a single byte. -- -- If the cursor is beyond the end of the current region a parse-error is -- thrown with reportError. word8 :: GenKangaroo ust Word8 -- | satisfy : predicate -> parser -- -- Parse a single byte and apply the predicate to it. On True -- return the parsed byte, on False throw a parse-error with -- reportError. satisfy :: (Word8 -> Bool) -> GenKangaroo ust Word8 -- | checkWord8 : predicate -> opt parser -- -- Byte parser with backtracking when the match fails. -- -- Parse a single byte and apply the predicate to the result. On success -- return (Just answer), on failure move the cursor position -- back one and return Nothing. checkWord8 :: (Word8 -> Bool) -> GenKangaroo ust (Maybe Word8) -- | Backtracking parser similar to Parsec's try. -- -- Try the supplied parser, if the parse succeeds with no parse-errors -- return (Just answer). If a parse-error is generated, discard -- the parse-error, return the cursor to the initial position and return -- Nothing. opt :: GenKangaroo ust a -> GenKangaroo ust (Maybe a) -- | skip : num_bytes -> () -- -- Move the cursor forward by the supplied distance. The distance must be -- positive, negative distances are ignored. -- -- skip performs no range checking. If the cursor is moved beyond -- the region boundary then the next parse will fail. skip :: Int -> GenKangaroo ust () -- | position : -> cursor-position -- -- Return the current cursor position position :: GenKangaroo ust Int -- | region : -> (region-start, cursor-position, -- region-end) -- -- Return the current parse region and the current position of the cursor -- within it. region :: GenKangaroo ust (Int, Int, Int) -- | atEnd - is the cursor at the end of the current region? atEnd :: GenKangaroo ust Bool -- | lengthRemaining : -> distance-to-region-end -- -- Distance from the current cursor position to the end of the current -- region lengthRemaining :: GenKangaroo ust Int -- | regionSize : -> region-length -- -- Size of the current region. regionSize :: GenKangaroo ust Int -- | intraparse : name * coda * abs_region_start * region_length -- * parser -> parser -- -- Create a new region within the current one and run the supplied -- parser. The cursor position is moved to the start of the new region. -- The value of coda determines where the cursor is positioned -- after a successful parse. -- -- intraparse throws a parse error if the supplied -- absolute-region-start is not located within the current region, or if -- the right-boundary of the new region (abs_region_start + -- region_length) extends beyond the right-boundary of the current -- region. intraparse :: RegionName -> RegionCoda -> RegionStart -> Int -> GenKangaroo ust a -> GenKangaroo ust a -- | advance : name * coda * abs_region_start * parser -> -- parser -- -- A variation of intraparse - the new region starts at the -- supplied abs_region_start and continues to the end of the -- current region. -- -- advance throws a parse error if the new start position is not -- within the current region. advance :: RegionName -> RegionCoda -> Int -> GenKangaroo ust a -> GenKangaroo ust a -- | advanceRelative : name * coda * distance * parser -> -- parser -- -- A variation of advance - the start of the new region is -- calculated from the current-cursor-position + the supplied -- distance. -- -- advanceRelative throws a parse error if the new start position -- is not within the current region. advanceRelative :: RegionName -> RegionCoda -> Int -> GenKangaroo ust a -> GenKangaroo ust a -- | restrict : name * coda * distance * parser -> -- parser -- -- A variation of intraparse - create a new region as a -- restriction of the current one and run the supplied parser. The new -- region starts at the current coursor position, the right-boundary is -- restricted to the current-cursor-position + the supplied -- distance. -- -- restrict throws a parse error if the right-boundary of the new -- region extends beyond the current region. restrict :: RegionName -> RegionCoda -> Int -> GenKangaroo ust a -> GenKangaroo ust a -- | restrictToPos : region-name * coda * abs-end-pos * parser -- -> parser -- -- A variantion of restrict - the new region takes the current -- cursor position for the left-boundary and the supplied -- absolute-end-position (abs-end-pos) as the right-boundary. -- -- restrictToPos throws a parse error if the abs-end-pos -- extends beyond the right-boundary of the current region. restrictToPos :: RegionName -> RegionCoda -> Int -> GenKangaroo ust a -> GenKangaroo ust a printHexAll :: GenKangaroo ust () printHexRange :: (Int, Int) -> GenKangaroo ust () printRegionStack :: GenKangaroo ust () manyTill :: GenKangaroo ust a -> GenKangaroo ust b -> GenKangaroo ust [a] genericManyTill :: (a -> c -> c) -> c -> GenKangaroo ust a -> GenKangaroo ust b -> GenKangaroo ust c manyTillPC :: GenKangaroo ust a -> (a -> Bool) -> GenKangaroo ust ([a], a) genericManyTillPC :: (a -> b -> b) -> b -> GenKangaroo ust a -> (a -> Bool) -> GenKangaroo ust (b, a) count :: Int -> GenKangaroo ust a -> GenKangaroo ust [a] countPrefixed :: Integral i => GenKangaroo ust i -> GenKangaroo ust a -> GenKangaroo ust (i, [a]) genericCount :: (a -> b -> b) -> b -> Int -> GenKangaroo ust a -> GenKangaroo ust b runOn :: GenKangaroo ust a -> GenKangaroo ust [a] genericRunOn :: (a -> b -> b) -> b -> GenKangaroo ust a -> GenKangaroo ust b -- | Apply parse then apply the check, if the check fails report the error -- message. postCheck :: GenKangaroo ust a -> (a -> Bool) -> String -> GenKangaroo ust a -- | Build a value by while the test holds. When the test fails the -- position is not backtracked, instead we use the "failing" element with -- lastOp potentially still building the value with it. buildWhile :: (a -> Bool) -> (a -> b -> b) -> (a -> b -> b) -> b -> GenKangaroo ust a -> GenKangaroo ust b buildPrimitive :: Int -> (Word8 -> Bool) -> (Word8 -> b -> b) -> b -> GenKangaroo ust b -- | Attempt to parse the supplied single character (the supplied char must -- be in the ASCII range 0-255). -- -- If the parse succeeds return the char, otherwise a parse-error will be -- thrown with reportError. char :: Char -> GenKangaroo ust Char -- | Parse any single character. The parser consumes one byte and uses -- chr to convert it. anyChar :: GenKangaroo ust Char -- | Parse a string of the supplied length n. -- -- If n is less than or equal to zero the empty string is -- returned. text :: Int -> GenKangaroo ust String -- | Parse the supplied string. All characters should be within the range -- 0-255. -- -- If the parse succeeds return the char, otherwise a parse-error will be -- thrown with reportError. string :: String -> GenKangaroo ust String -- | Parse a null-terminated C-style string. cstring :: GenKangaroo ust String -- | Parse the literal 0x00. w8Zero :: GenKangaroo ust Word8 -- | Get n bytes. -- -- If n is less than or equal to zero an empty list is returned. getBytes :: Integral a => a -> GenKangaroo ust [Word8] -- | Parse a single byte, returning it as an Int8. -- -- The conversion from a byte (0-255) to an Int8 uses the Prelude -- function fromIntegral. -- -- The conversion is summarized as: -- --
--   0..127   = 0..127
--   128      = -128
--   129      = -127
--   130      = -126
--   ...
--   254      = -2
--   255      = -1   
--   
--   wtoi :: Word8 -> Int8
--   wtoi i | i < 128   = i
--          | otherwise = -128 + (clearBit i 7)
--   
int8 :: GenKangaroo ust Int8 -- | Parse a Word16 in big endian form. word16be :: GenKangaroo ust Word16 -- | Parse a "Word24" in big endian form. -- -- 3 bytes are read - the answer is returned as a Word32. word24be :: GenKangaroo ust Word32 -- | Parse a Word32 in big endian form. word32be :: GenKangaroo ust Word32 -- | Parse a Word64 in big endian form. word64be :: GenKangaroo ust Word64 -- | Parse a Word16 in little endian form. word16le :: GenKangaroo ust Word16 -- | Parse a "Word24" in little endian form. -- -- 3 bytes are read - the answer is returned as a Word32. word24le :: GenKangaroo ust Word32 -- | Parse a Word32 in little endian form. word32le :: GenKangaroo ust Word32 -- | Parse an Int16 in big endian form. -- -- The ans is parsed as a Word16 (big endian) then converted to an Int16 -- using the Prelude function fromIntegral. int16be :: GenKangaroo ust Int16 -- | Parse an Int32 in big endian form. -- -- The ans is parsed as a Word32 (big endian) then converted to an Int32 -- using the Prelude function fromIntegral. int32be :: GenKangaroo ust Int32 -- | Parse an Int16 in little endian form. -- -- The ans is parsed as a Word16 (little endian) then converted to an -- Int16 using the Prelude function fromIntegral. int16le :: GenKangaroo ust Int16 -- | Parse an Int32 in little endian form. -- -- The ans is parsed as a Word32 (little endian) then converted to an -- Int32 using the Prelude function fromIntegral. int32le :: GenKangaroo ust Int32 -- | Parse an 4-byte IEEE single precision float. -- -- NOTE - THIS FUNCTION IS UNTESTED! ieeeFloatSP :: Fractional a => GenKangaroo ust a -- | Kangaroo parse monad with user env, logging and state. module Data.ParserCombinators.KangarooRWS type Kangaroo r w st a = GenKangaroo (r, w, st) a parse :: Monoid w => Kangaroo r w st a -> r -> st -> FilePath -> IO (Either ParseErr a) runKangaroo :: Monoid w => Kangaroo r w st a -> r -> st -> FilePath -> IO (Either ParseErr a, w, st) evalKangaroo :: Monoid w => Kangaroo r w st a -> r -> st -> FilePath -> IO (Either ParseErr a, w) execKangaroo :: Monoid w => Kangaroo r w st a -> r -> st -> FilePath -> IO st put :: st -> Kangaroo r w st () get :: Kangaroo r w st st modify :: (st -> st) -> Kangaroo r w st () gets :: (st -> a) -> Kangaroo r w st a tell :: Monoid w => w -> Kangaroo r w st () -- | Retrieve the environment. ask :: Kangaroo r w st r asks :: (r -> a) -> Kangaroo r w st a -- | Execute a computation in a modified environment. local :: (r -> r) -> Kangaroo r w st a -> Kangaroo r w st a type ParseErr = String -- | RegionCoda - represents three useful final positions: -- --
    --
  1. dalpunto - 'from the point' - Run the parser within a region and -- return to where you came from.
  2. --
  3. alfermata - 'to the stop' - Run the parser within a region, the -- cursor remains wherever the parse finished.
  4. --
  5. alfine - 'to the end' - Run the parser within a region and jump to -- the right-end of the region after the parse.
  6. --
data RegionCoda Dalpunto :: RegionCoda Alfermata :: RegionCoda Alfine :: RegionCoda type RegionName = String -- | Lift an IO action into the Kangaroo monad. liftIOAction :: IO a -> GenKangaroo ust a -- | Report a parse error. -- -- Source position is appended to the supplied error message reportError :: ParseErr -> GenKangaroo ust a -- | substError : parser * error_msg -> parser -- -- substError is equivalent to Parsec's <?> -- combinator. -- -- Run the supplied parser, if the parse succeeds return the result, -- otherwise override the original error message with the supplied -- error_msg. substError :: GenKangaroo ust a -> ParseErr -> GenKangaroo ust a -- | Parse a single byte. -- -- If the cursor is beyond the end of the current region a parse-error is -- thrown with reportError. word8 :: GenKangaroo ust Word8 -- | satisfy : predicate -> parser -- -- Parse a single byte and apply the predicate to it. On True -- return the parsed byte, on False throw a parse-error with -- reportError. satisfy :: (Word8 -> Bool) -> GenKangaroo ust Word8 -- | checkWord8 : predicate -> opt parser -- -- Byte parser with backtracking when the match fails. -- -- Parse a single byte and apply the predicate to the result. On success -- return (Just answer), on failure move the cursor position -- back one and return Nothing. checkWord8 :: (Word8 -> Bool) -> GenKangaroo ust (Maybe Word8) -- | Backtracking parser similar to Parsec's try. -- -- Try the supplied parser, if the parse succeeds with no parse-errors -- return (Just answer). If a parse-error is generated, discard -- the parse-error, return the cursor to the initial position and return -- Nothing. opt :: GenKangaroo ust a -> GenKangaroo ust (Maybe a) -- | skip : num_bytes -> () -- -- Move the cursor forward by the supplied distance. The distance must be -- positive, negative distances are ignored. -- -- skip performs no range checking. If the cursor is moved beyond -- the region boundary then the next parse will fail. skip :: Int -> GenKangaroo ust () -- | position : -> cursor-position -- -- Return the current cursor position position :: GenKangaroo ust Int -- | region : -> (region-start, cursor-position, -- region-end) -- -- Return the current parse region and the current position of the cursor -- within it. region :: GenKangaroo ust (Int, Int, Int) -- | atEnd - is the cursor at the end of the current region? atEnd :: GenKangaroo ust Bool -- | lengthRemaining : -> distance-to-region-end -- -- Distance from the current cursor position to the end of the current -- region lengthRemaining :: GenKangaroo ust Int -- | regionSize : -> region-length -- -- Size of the current region. regionSize :: GenKangaroo ust Int -- | intraparse : name * coda * abs_region_start * region_length -- * parser -> parser -- -- Create a new region within the current one and run the supplied -- parser. The cursor position is moved to the start of the new region. -- The value of coda determines where the cursor is positioned -- after a successful parse. -- -- intraparse throws a parse error if the supplied -- absolute-region-start is not located within the current region, or if -- the right-boundary of the new region (abs_region_start + -- region_length) extends beyond the right-boundary of the current -- region. intraparse :: RegionName -> RegionCoda -> RegionStart -> Int -> GenKangaroo ust a -> GenKangaroo ust a -- | advance : name * coda * abs_region_start * parser -> -- parser -- -- A variation of intraparse - the new region starts at the -- supplied abs_region_start and continues to the end of the -- current region. -- -- advance throws a parse error if the new start position is not -- within the current region. advance :: RegionName -> RegionCoda -> Int -> GenKangaroo ust a -> GenKangaroo ust a -- | advanceRelative : name * coda * distance * parser -> -- parser -- -- A variation of advance - the start of the new region is -- calculated from the current-cursor-position + the supplied -- distance. -- -- advanceRelative throws a parse error if the new start position -- is not within the current region. advanceRelative :: RegionName -> RegionCoda -> Int -> GenKangaroo ust a -> GenKangaroo ust a -- | restrict : name * coda * distance * parser -> -- parser -- -- A variation of intraparse - create a new region as a -- restriction of the current one and run the supplied parser. The new -- region starts at the current coursor position, the right-boundary is -- restricted to the current-cursor-position + the supplied -- distance. -- -- restrict throws a parse error if the right-boundary of the new -- region extends beyond the current region. restrict :: RegionName -> RegionCoda -> Int -> GenKangaroo ust a -> GenKangaroo ust a -- | restrictToPos : region-name * coda * abs-end-pos * parser -- -> parser -- -- A variantion of restrict - the new region takes the current -- cursor position for the left-boundary and the supplied -- absolute-end-position (abs-end-pos) as the right-boundary. -- -- restrictToPos throws a parse error if the abs-end-pos -- extends beyond the right-boundary of the current region. restrictToPos :: RegionName -> RegionCoda -> Int -> GenKangaroo ust a -> GenKangaroo ust a printHexAll :: GenKangaroo ust () printHexRange :: (Int, Int) -> GenKangaroo ust () printRegionStack :: GenKangaroo ust () manyTill :: GenKangaroo ust a -> GenKangaroo ust b -> GenKangaroo ust [a] genericManyTill :: (a -> c -> c) -> c -> GenKangaroo ust a -> GenKangaroo ust b -> GenKangaroo ust c manyTillPC :: GenKangaroo ust a -> (a -> Bool) -> GenKangaroo ust ([a], a) genericManyTillPC :: (a -> b -> b) -> b -> GenKangaroo ust a -> (a -> Bool) -> GenKangaroo ust (b, a) count :: Int -> GenKangaroo ust a -> GenKangaroo ust [a] countPrefixed :: Integral i => GenKangaroo ust i -> GenKangaroo ust a -> GenKangaroo ust (i, [a]) genericCount :: (a -> b -> b) -> b -> Int -> GenKangaroo ust a -> GenKangaroo ust b runOn :: GenKangaroo ust a -> GenKangaroo ust [a] genericRunOn :: (a -> b -> b) -> b -> GenKangaroo ust a -> GenKangaroo ust b -- | Apply parse then apply the check, if the check fails report the error -- message. postCheck :: GenKangaroo ust a -> (a -> Bool) -> String -> GenKangaroo ust a -- | Build a value by while the test holds. When the test fails the -- position is not backtracked, instead we use the "failing" element with -- lastOp potentially still building the value with it. buildWhile :: (a -> Bool) -> (a -> b -> b) -> (a -> b -> b) -> b -> GenKangaroo ust a -> GenKangaroo ust b buildPrimitive :: Int -> (Word8 -> Bool) -> (Word8 -> b -> b) -> b -> GenKangaroo ust b -- | Attempt to parse the supplied single character (the supplied char must -- be in the ASCII range 0-255). -- -- If the parse succeeds return the char, otherwise a parse-error will be -- thrown with reportError. char :: Char -> GenKangaroo ust Char -- | Parse any single character. The parser consumes one byte and uses -- chr to convert it. anyChar :: GenKangaroo ust Char -- | Parse a string of the supplied length n. -- -- If n is less than or equal to zero the empty string is -- returned. text :: Int -> GenKangaroo ust String -- | Parse the supplied string. All characters should be within the range -- 0-255. -- -- If the parse succeeds return the char, otherwise a parse-error will be -- thrown with reportError. string :: String -> GenKangaroo ust String -- | Parse a null-terminated C-style string. cstring :: GenKangaroo ust String -- | Parse the literal 0x00. w8Zero :: GenKangaroo ust Word8 -- | Get n bytes. -- -- If n is less than or equal to zero an empty list is returned. getBytes :: Integral a => a -> GenKangaroo ust [Word8] -- | Parse a single byte, returning it as an Int8. -- -- The conversion from a byte (0-255) to an Int8 uses the Prelude -- function fromIntegral. -- -- The conversion is summarized as: -- --
--   0..127   = 0..127
--   128      = -128
--   129      = -127
--   130      = -126
--   ...
--   254      = -2
--   255      = -1   
--   
--   wtoi :: Word8 -> Int8
--   wtoi i | i < 128   = i
--          | otherwise = -128 + (clearBit i 7)
--   
int8 :: GenKangaroo ust Int8 -- | Parse a Word16 in big endian form. word16be :: GenKangaroo ust Word16 -- | Parse a "Word24" in big endian form. -- -- 3 bytes are read - the answer is returned as a Word32. word24be :: GenKangaroo ust Word32 -- | Parse a Word32 in big endian form. word32be :: GenKangaroo ust Word32 -- | Parse a Word64 in big endian form. word64be :: GenKangaroo ust Word64 -- | Parse a Word16 in little endian form. word16le :: GenKangaroo ust Word16 -- | Parse a "Word24" in little endian form. -- -- 3 bytes are read - the answer is returned as a Word32. word24le :: GenKangaroo ust Word32 -- | Parse a Word32 in little endian form. word32le :: GenKangaroo ust Word32 -- | Parse an Int16 in big endian form. -- -- The ans is parsed as a Word16 (big endian) then converted to an Int16 -- using the Prelude function fromIntegral. int16be :: GenKangaroo ust Int16 -- | Parse an Int32 in big endian form. -- -- The ans is parsed as a Word32 (big endian) then converted to an Int32 -- using the Prelude function fromIntegral. int32be :: GenKangaroo ust Int32 -- | Parse an Int16 in little endian form. -- -- The ans is parsed as a Word16 (little endian) then converted to an -- Int16 using the Prelude function fromIntegral. int16le :: GenKangaroo ust Int16 -- | Parse an Int32 in little endian form. -- -- The ans is parsed as a Word32 (little endian) then converted to an -- Int32 using the Prelude function fromIntegral. int32le :: GenKangaroo ust Int32 -- | Parse an 4-byte IEEE single precision float. -- -- NOTE - THIS FUNCTION IS UNTESTED! ieeeFloatSP :: Fractional a => GenKangaroo ust a -- | Kangaroo parse monad with user state. module Data.ParserCombinators.KangarooState type Kangaroo st a = GenKangaroo st a parse :: Kangaroo st a -> st -> FilePath -> IO (Either ParseErr a) runKangaroo :: Kangaroo st a -> st -> FilePath -> IO (Either ParseErr a, st) evalKangaroo :: Kangaroo st a -> st -> FilePath -> IO (Either ParseErr a) execKangaroo :: Kangaroo st a -> st -> FilePath -> IO st put :: st -> Kangaroo st () get :: Kangaroo st st modify :: (st -> st) -> Kangaroo st () gets :: (st -> a) -> Kangaroo st a type ParseErr = String -- | RegionCoda - represents three useful final positions: -- --
    --
  1. dalpunto - 'from the point' - Run the parser within a region and -- return to where you came from.
  2. --
  3. alfermata - 'to the stop' - Run the parser within a region, the -- cursor remains wherever the parse finished.
  4. --
  5. alfine - 'to the end' - Run the parser within a region and jump to -- the right-end of the region after the parse.
  6. --
data RegionCoda Dalpunto :: RegionCoda Alfermata :: RegionCoda Alfine :: RegionCoda type RegionName = String -- | Lift an IO action into the Kangaroo monad. liftIOAction :: IO a -> GenKangaroo ust a -- | Report a parse error. -- -- Source position is appended to the supplied error message reportError :: ParseErr -> GenKangaroo ust a -- | substError : parser * error_msg -> parser -- -- substError is equivalent to Parsec's <?> -- combinator. -- -- Run the supplied parser, if the parse succeeds return the result, -- otherwise override the original error message with the supplied -- error_msg. substError :: GenKangaroo ust a -> ParseErr -> GenKangaroo ust a -- | Parse a single byte. -- -- If the cursor is beyond the end of the current region a parse-error is -- thrown with reportError. word8 :: GenKangaroo ust Word8 -- | satisfy : predicate -> parser -- -- Parse a single byte and apply the predicate to it. On True -- return the parsed byte, on False throw a parse-error with -- reportError. satisfy :: (Word8 -> Bool) -> GenKangaroo ust Word8 -- | checkWord8 : predicate -> opt parser -- -- Byte parser with backtracking when the match fails. -- -- Parse a single byte and apply the predicate to the result. On success -- return (Just answer), on failure move the cursor position -- back one and return Nothing. checkWord8 :: (Word8 -> Bool) -> GenKangaroo ust (Maybe Word8) -- | Backtracking parser similar to Parsec's try. -- -- Try the supplied parser, if the parse succeeds with no parse-errors -- return (Just answer). If a parse-error is generated, discard -- the parse-error, return the cursor to the initial position and return -- Nothing. opt :: GenKangaroo ust a -> GenKangaroo ust (Maybe a) -- | skip : num_bytes -> () -- -- Move the cursor forward by the supplied distance. The distance must be -- positive, negative distances are ignored. -- -- skip performs no range checking. If the cursor is moved beyond -- the region boundary then the next parse will fail. skip :: Int -> GenKangaroo ust () -- | position : -> cursor-position -- -- Return the current cursor position position :: GenKangaroo ust Int -- | region : -> (region-start, cursor-position, -- region-end) -- -- Return the current parse region and the current position of the cursor -- within it. region :: GenKangaroo ust (Int, Int, Int) -- | atEnd - is the cursor at the end of the current region? atEnd :: GenKangaroo ust Bool -- | lengthRemaining : -> distance-to-region-end -- -- Distance from the current cursor position to the end of the current -- region lengthRemaining :: GenKangaroo ust Int -- | regionSize : -> region-length -- -- Size of the current region. regionSize :: GenKangaroo ust Int -- | intraparse : name * coda * abs_region_start * region_length -- * parser -> parser -- -- Create a new region within the current one and run the supplied -- parser. The cursor position is moved to the start of the new region. -- The value of coda determines where the cursor is positioned -- after a successful parse. -- -- intraparse throws a parse error if the supplied -- absolute-region-start is not located within the current region, or if -- the right-boundary of the new region (abs_region_start + -- region_length) extends beyond the right-boundary of the current -- region. intraparse :: RegionName -> RegionCoda -> RegionStart -> Int -> GenKangaroo ust a -> GenKangaroo ust a -- | advance : name * coda * abs_region_start * parser -> -- parser -- -- A variation of intraparse - the new region starts at the -- supplied abs_region_start and continues to the end of the -- current region. -- -- advance throws a parse error if the new start position is not -- within the current region. advance :: RegionName -> RegionCoda -> Int -> GenKangaroo ust a -> GenKangaroo ust a -- | advanceRelative : name * coda * distance * parser -> -- parser -- -- A variation of advance - the start of the new region is -- calculated from the current-cursor-position + the supplied -- distance. -- -- advanceRelative throws a parse error if the new start position -- is not within the current region. advanceRelative :: RegionName -> RegionCoda -> Int -> GenKangaroo ust a -> GenKangaroo ust a -- | restrict : name * coda * distance * parser -> -- parser -- -- A variation of intraparse - create a new region as a -- restriction of the current one and run the supplied parser. The new -- region starts at the current coursor position, the right-boundary is -- restricted to the current-cursor-position + the supplied -- distance. -- -- restrict throws a parse error if the right-boundary of the new -- region extends beyond the current region. restrict :: RegionName -> RegionCoda -> Int -> GenKangaroo ust a -> GenKangaroo ust a -- | restrictToPos : region-name * coda * abs-end-pos * parser -- -> parser -- -- A variantion of restrict - the new region takes the current -- cursor position for the left-boundary and the supplied -- absolute-end-position (abs-end-pos) as the right-boundary. -- -- restrictToPos throws a parse error if the abs-end-pos -- extends beyond the right-boundary of the current region. restrictToPos :: RegionName -> RegionCoda -> Int -> GenKangaroo ust a -> GenKangaroo ust a printHexAll :: GenKangaroo ust () printHexRange :: (Int, Int) -> GenKangaroo ust () printRegionStack :: GenKangaroo ust () manyTill :: GenKangaroo ust a -> GenKangaroo ust b -> GenKangaroo ust [a] genericManyTill :: (a -> c -> c) -> c -> GenKangaroo ust a -> GenKangaroo ust b -> GenKangaroo ust c manyTillPC :: GenKangaroo ust a -> (a -> Bool) -> GenKangaroo ust ([a], a) genericManyTillPC :: (a -> b -> b) -> b -> GenKangaroo ust a -> (a -> Bool) -> GenKangaroo ust (b, a) count :: Int -> GenKangaroo ust a -> GenKangaroo ust [a] countPrefixed :: Integral i => GenKangaroo ust i -> GenKangaroo ust a -> GenKangaroo ust (i, [a]) genericCount :: (a -> b -> b) -> b -> Int -> GenKangaroo ust a -> GenKangaroo ust b runOn :: GenKangaroo ust a -> GenKangaroo ust [a] genericRunOn :: (a -> b -> b) -> b -> GenKangaroo ust a -> GenKangaroo ust b -- | Apply parse then apply the check, if the check fails report the error -- message. postCheck :: GenKangaroo ust a -> (a -> Bool) -> String -> GenKangaroo ust a -- | Build a value by while the test holds. When the test fails the -- position is not backtracked, instead we use the "failing" element with -- lastOp potentially still building the value with it. buildWhile :: (a -> Bool) -> (a -> b -> b) -> (a -> b -> b) -> b -> GenKangaroo ust a -> GenKangaroo ust b buildPrimitive :: Int -> (Word8 -> Bool) -> (Word8 -> b -> b) -> b -> GenKangaroo ust b -- | Attempt to parse the supplied single character (the supplied char must -- be in the ASCII range 0-255). -- -- If the parse succeeds return the char, otherwise a parse-error will be -- thrown with reportError. char :: Char -> GenKangaroo ust Char -- | Parse any single character. The parser consumes one byte and uses -- chr to convert it. anyChar :: GenKangaroo ust Char -- | Parse a string of the supplied length n. -- -- If n is less than or equal to zero the empty string is -- returned. text :: Int -> GenKangaroo ust String -- | Parse the supplied string. All characters should be within the range -- 0-255. -- -- If the parse succeeds return the char, otherwise a parse-error will be -- thrown with reportError. string :: String -> GenKangaroo ust String -- | Parse a null-terminated C-style string. cstring :: GenKangaroo ust String -- | Parse the literal 0x00. w8Zero :: GenKangaroo ust Word8 -- | Get n bytes. -- -- If n is less than or equal to zero an empty list is returned. getBytes :: Integral a => a -> GenKangaroo ust [Word8] -- | Parse a single byte, returning it as an Int8. -- -- The conversion from a byte (0-255) to an Int8 uses the Prelude -- function fromIntegral. -- -- The conversion is summarized as: -- --
--   0..127   = 0..127
--   128      = -128
--   129      = -127
--   130      = -126
--   ...
--   254      = -2
--   255      = -1   
--   
--   wtoi :: Word8 -> Int8
--   wtoi i | i < 128   = i
--          | otherwise = -128 + (clearBit i 7)
--   
int8 :: GenKangaroo ust Int8 -- | Parse a Word16 in big endian form. word16be :: GenKangaroo ust Word16 -- | Parse a "Word24" in big endian form. -- -- 3 bytes are read - the answer is returned as a Word32. word24be :: GenKangaroo ust Word32 -- | Parse a Word32 in big endian form. word32be :: GenKangaroo ust Word32 -- | Parse a Word64 in big endian form. word64be :: GenKangaroo ust Word64 -- | Parse a Word16 in little endian form. word16le :: GenKangaroo ust Word16 -- | Parse a "Word24" in little endian form. -- -- 3 bytes are read - the answer is returned as a Word32. word24le :: GenKangaroo ust Word32 -- | Parse a Word32 in little endian form. word32le :: GenKangaroo ust Word32 -- | Parse an Int16 in big endian form. -- -- The ans is parsed as a Word16 (big endian) then converted to an Int16 -- using the Prelude function fromIntegral. int16be :: GenKangaroo ust Int16 -- | Parse an Int32 in big endian form. -- -- The ans is parsed as a Word32 (big endian) then converted to an Int32 -- using the Prelude function fromIntegral. int32be :: GenKangaroo ust Int32 -- | Parse an Int16 in little endian form. -- -- The ans is parsed as a Word16 (little endian) then converted to an -- Int16 using the Prelude function fromIntegral. int16le :: GenKangaroo ust Int16 -- | Parse an Int32 in little endian form. -- -- The ans is parsed as a Word32 (little endian) then converted to an -- Int32 using the Prelude function fromIntegral. int32le :: GenKangaroo ust Int32 -- | Parse an 4-byte IEEE single precision float. -- -- NOTE - THIS FUNCTION IS UNTESTED! ieeeFloatSP :: Fractional a => GenKangaroo ust a -- | Kangaroo parse monad with logging. module Data.ParserCombinators.KangarooWriter type Kangaroo r a = GenKangaroo r a parse :: Monoid w => Kangaroo w a -> FilePath -> IO (Either ParseErr a, w) runKangaroo :: Monoid w => Kangaroo w a -> FilePath -> IO (Either ParseErr a, w) tell :: Monoid w => w -> Kangaroo w () type ParseErr = String -- | RegionCoda - represents three useful final positions: -- --
    --
  1. dalpunto - 'from the point' - Run the parser within a region and -- return to where you came from.
  2. --
  3. alfermata - 'to the stop' - Run the parser within a region, the -- cursor remains wherever the parse finished.
  4. --
  5. alfine - 'to the end' - Run the parser within a region and jump to -- the right-end of the region after the parse.
  6. --
data RegionCoda Dalpunto :: RegionCoda Alfermata :: RegionCoda Alfine :: RegionCoda type RegionName = String -- | Lift an IO action into the Kangaroo monad. liftIOAction :: IO a -> GenKangaroo ust a -- | Report a parse error. -- -- Source position is appended to the supplied error message reportError :: ParseErr -> GenKangaroo ust a -- | substError : parser * error_msg -> parser -- -- substError is equivalent to Parsec's <?> -- combinator. -- -- Run the supplied parser, if the parse succeeds return the result, -- otherwise override the original error message with the supplied -- error_msg. substError :: GenKangaroo ust a -> ParseErr -> GenKangaroo ust a -- | Parse a single byte. -- -- If the cursor is beyond the end of the current region a parse-error is -- thrown with reportError. word8 :: GenKangaroo ust Word8 -- | satisfy : predicate -> parser -- -- Parse a single byte and apply the predicate to it. On True -- return the parsed byte, on False throw a parse-error with -- reportError. satisfy :: (Word8 -> Bool) -> GenKangaroo ust Word8 -- | checkWord8 : predicate -> opt parser -- -- Byte parser with backtracking when the match fails. -- -- Parse a single byte and apply the predicate to the result. On success -- return (Just answer), on failure move the cursor position -- back one and return Nothing. checkWord8 :: (Word8 -> Bool) -> GenKangaroo ust (Maybe Word8) -- | Backtracking parser similar to Parsec's try. -- -- Try the supplied parser, if the parse succeeds with no parse-errors -- return (Just answer). If a parse-error is generated, discard -- the parse-error, return the cursor to the initial position and return -- Nothing. opt :: GenKangaroo ust a -> GenKangaroo ust (Maybe a) -- | skip : num_bytes -> () -- -- Move the cursor forward by the supplied distance. The distance must be -- positive, negative distances are ignored. -- -- skip performs no range checking. If the cursor is moved beyond -- the region boundary then the next parse will fail. skip :: Int -> GenKangaroo ust () -- | position : -> cursor-position -- -- Return the current cursor position position :: GenKangaroo ust Int -- | region : -> (region-start, cursor-position, -- region-end) -- -- Return the current parse region and the current position of the cursor -- within it. region :: GenKangaroo ust (Int, Int, Int) -- | atEnd - is the cursor at the end of the current region? atEnd :: GenKangaroo ust Bool -- | lengthRemaining : -> distance-to-region-end -- -- Distance from the current cursor position to the end of the current -- region lengthRemaining :: GenKangaroo ust Int -- | regionSize : -> region-length -- -- Size of the current region. regionSize :: GenKangaroo ust Int -- | intraparse : name * coda * abs_region_start * region_length -- * parser -> parser -- -- Create a new region within the current one and run the supplied -- parser. The cursor position is moved to the start of the new region. -- The value of coda determines where the cursor is positioned -- after a successful parse. -- -- intraparse throws a parse error if the supplied -- absolute-region-start is not located within the current region, or if -- the right-boundary of the new region (abs_region_start + -- region_length) extends beyond the right-boundary of the current -- region. intraparse :: RegionName -> RegionCoda -> RegionStart -> Int -> GenKangaroo ust a -> GenKangaroo ust a -- | advance : name * coda * abs_region_start * parser -> -- parser -- -- A variation of intraparse - the new region starts at the -- supplied abs_region_start and continues to the end of the -- current region. -- -- advance throws a parse error if the new start position is not -- within the current region. advance :: RegionName -> RegionCoda -> Int -> GenKangaroo ust a -> GenKangaroo ust a -- | advanceRelative : name * coda * distance * parser -> -- parser -- -- A variation of advance - the start of the new region is -- calculated from the current-cursor-position + the supplied -- distance. -- -- advanceRelative throws a parse error if the new start position -- is not within the current region. advanceRelative :: RegionName -> RegionCoda -> Int -> GenKangaroo ust a -> GenKangaroo ust a -- | restrict : name * coda * distance * parser -> -- parser -- -- A variation of intraparse - create a new region as a -- restriction of the current one and run the supplied parser. The new -- region starts at the current coursor position, the right-boundary is -- restricted to the current-cursor-position + the supplied -- distance. -- -- restrict throws a parse error if the right-boundary of the new -- region extends beyond the current region. restrict :: RegionName -> RegionCoda -> Int -> GenKangaroo ust a -> GenKangaroo ust a -- | restrictToPos : region-name * coda * abs-end-pos * parser -- -> parser -- -- A variantion of restrict - the new region takes the current -- cursor position for the left-boundary and the supplied -- absolute-end-position (abs-end-pos) as the right-boundary. -- -- restrictToPos throws a parse error if the abs-end-pos -- extends beyond the right-boundary of the current region. restrictToPos :: RegionName -> RegionCoda -> Int -> GenKangaroo ust a -> GenKangaroo ust a printHexAll :: GenKangaroo ust () printHexRange :: (Int, Int) -> GenKangaroo ust () printRegionStack :: GenKangaroo ust () manyTill :: GenKangaroo ust a -> GenKangaroo ust b -> GenKangaroo ust [a] genericManyTill :: (a -> c -> c) -> c -> GenKangaroo ust a -> GenKangaroo ust b -> GenKangaroo ust c manyTillPC :: GenKangaroo ust a -> (a -> Bool) -> GenKangaroo ust ([a], a) genericManyTillPC :: (a -> b -> b) -> b -> GenKangaroo ust a -> (a -> Bool) -> GenKangaroo ust (b, a) count :: Int -> GenKangaroo ust a -> GenKangaroo ust [a] countPrefixed :: Integral i => GenKangaroo ust i -> GenKangaroo ust a -> GenKangaroo ust (i, [a]) genericCount :: (a -> b -> b) -> b -> Int -> GenKangaroo ust a -> GenKangaroo ust b runOn :: GenKangaroo ust a -> GenKangaroo ust [a] genericRunOn :: (a -> b -> b) -> b -> GenKangaroo ust a -> GenKangaroo ust b -- | Apply parse then apply the check, if the check fails report the error -- message. postCheck :: GenKangaroo ust a -> (a -> Bool) -> String -> GenKangaroo ust a -- | Build a value by while the test holds. When the test fails the -- position is not backtracked, instead we use the "failing" element with -- lastOp potentially still building the value with it. buildWhile :: (a -> Bool) -> (a -> b -> b) -> (a -> b -> b) -> b -> GenKangaroo ust a -> GenKangaroo ust b buildPrimitive :: Int -> (Word8 -> Bool) -> (Word8 -> b -> b) -> b -> GenKangaroo ust b -- | Attempt to parse the supplied single character (the supplied char must -- be in the ASCII range 0-255). -- -- If the parse succeeds return the char, otherwise a parse-error will be -- thrown with reportError. char :: Char -> GenKangaroo ust Char -- | Parse any single character. The parser consumes one byte and uses -- chr to convert it. anyChar :: GenKangaroo ust Char -- | Parse a string of the supplied length n. -- -- If n is less than or equal to zero the empty string is -- returned. text :: Int -> GenKangaroo ust String -- | Parse the supplied string. All characters should be within the range -- 0-255. -- -- If the parse succeeds return the char, otherwise a parse-error will be -- thrown with reportError. string :: String -> GenKangaroo ust String -- | Parse a null-terminated C-style string. cstring :: GenKangaroo ust String -- | Parse the literal 0x00. w8Zero :: GenKangaroo ust Word8 -- | Get n bytes. -- -- If n is less than or equal to zero an empty list is returned. getBytes :: Integral a => a -> GenKangaroo ust [Word8] -- | Parse a single byte, returning it as an Int8. -- -- The conversion from a byte (0-255) to an Int8 uses the Prelude -- function fromIntegral. -- -- The conversion is summarized as: -- --
--   0..127   = 0..127
--   128      = -128
--   129      = -127
--   130      = -126
--   ...
--   254      = -2
--   255      = -1   
--   
--   wtoi :: Word8 -> Int8
--   wtoi i | i < 128   = i
--          | otherwise = -128 + (clearBit i 7)
--   
int8 :: GenKangaroo ust Int8 -- | Parse a Word16 in big endian form. word16be :: GenKangaroo ust Word16 -- | Parse a "Word24" in big endian form. -- -- 3 bytes are read - the answer is returned as a Word32. word24be :: GenKangaroo ust Word32 -- | Parse a Word32 in big endian form. word32be :: GenKangaroo ust Word32 -- | Parse a Word64 in big endian form. word64be :: GenKangaroo ust Word64 -- | Parse a Word16 in little endian form. word16le :: GenKangaroo ust Word16 -- | Parse a "Word24" in little endian form. -- -- 3 bytes are read - the answer is returned as a Word32. word24le :: GenKangaroo ust Word32 -- | Parse a Word32 in little endian form. word32le :: GenKangaroo ust Word32 -- | Parse an Int16 in big endian form. -- -- The ans is parsed as a Word16 (big endian) then converted to an Int16 -- using the Prelude function fromIntegral. int16be :: GenKangaroo ust Int16 -- | Parse an Int32 in big endian form. -- -- The ans is parsed as a Word32 (big endian) then converted to an Int32 -- using the Prelude function fromIntegral. int32be :: GenKangaroo ust Int32 -- | Parse an Int16 in little endian form. -- -- The ans is parsed as a Word16 (little endian) then converted to an -- Int16 using the Prelude function fromIntegral. int16le :: GenKangaroo ust Int16 -- | Parse an Int32 in little endian form. -- -- The ans is parsed as a Word32 (little endian) then converted to an -- Int32 using the Prelude function fromIntegral. int32le :: GenKangaroo ust Int32 -- | Parse an 4-byte IEEE single precision float. -- -- NOTE - THIS FUNCTION IS UNTESTED! ieeeFloatSP :: Fractional a => GenKangaroo ust a -- | Binary parser combinators with random access module Data.ParserCombinators.Kangaroo type Kangaroo a = GenKangaroo () a runKangaroo :: Kangaroo a -> FilePath -> IO (Either ParseErr a) parse :: Kangaroo a -> FilePath -> IO (Either ParseErr a) type ParseErr = String -- | RegionCoda - represents three useful final positions: -- --
    --
  1. dalpunto - 'from the point' - Run the parser within a region and -- return to where you came from.
  2. --
  3. alfermata - 'to the stop' - Run the parser within a region, the -- cursor remains wherever the parse finished.
  4. --
  5. alfine - 'to the end' - Run the parser within a region and jump to -- the right-end of the region after the parse.
  6. --
data RegionCoda Dalpunto :: RegionCoda Alfermata :: RegionCoda Alfine :: RegionCoda type RegionName = String -- | Lift an IO action into the Kangaroo monad. liftIOAction :: IO a -> GenKangaroo ust a -- | Report a parse error. -- -- Source position is appended to the supplied error message reportError :: ParseErr -> GenKangaroo ust a -- | substError : parser * error_msg -> parser -- -- substError is equivalent to Parsec's <?> -- combinator. -- -- Run the supplied parser, if the parse succeeds return the result, -- otherwise override the original error message with the supplied -- error_msg. substError :: GenKangaroo ust a -> ParseErr -> GenKangaroo ust a -- | Parse a single byte. -- -- If the cursor is beyond the end of the current region a parse-error is -- thrown with reportError. word8 :: GenKangaroo ust Word8 -- | satisfy : predicate -> parser -- -- Parse a single byte and apply the predicate to it. On True -- return the parsed byte, on False throw a parse-error with -- reportError. satisfy :: (Word8 -> Bool) -> GenKangaroo ust Word8 -- | checkWord8 : predicate -> opt parser -- -- Byte parser with backtracking when the match fails. -- -- Parse a single byte and apply the predicate to the result. On success -- return (Just answer), on failure move the cursor position -- back one and return Nothing. checkWord8 :: (Word8 -> Bool) -> GenKangaroo ust (Maybe Word8) -- | Backtracking parser similar to Parsec's try. -- -- Try the supplied parser, if the parse succeeds with no parse-errors -- return (Just answer). If a parse-error is generated, discard -- the parse-error, return the cursor to the initial position and return -- Nothing. opt :: GenKangaroo ust a -> GenKangaroo ust (Maybe a) -- | skip : num_bytes -> () -- -- Move the cursor forward by the supplied distance. The distance must be -- positive, negative distances are ignored. -- -- skip performs no range checking. If the cursor is moved beyond -- the region boundary then the next parse will fail. skip :: Int -> GenKangaroo ust () -- | position : -> cursor-position -- -- Return the current cursor position position :: GenKangaroo ust Int -- | region : -> (region-start, cursor-position, -- region-end) -- -- Return the current parse region and the current position of the cursor -- within it. region :: GenKangaroo ust (Int, Int, Int) -- | atEnd - is the cursor at the end of the current region? atEnd :: GenKangaroo ust Bool -- | lengthRemaining : -> distance-to-region-end -- -- Distance from the current cursor position to the end of the current -- region lengthRemaining :: GenKangaroo ust Int -- | regionSize : -> region-length -- -- Size of the current region. regionSize :: GenKangaroo ust Int -- | intraparse : name * coda * abs_region_start * region_length -- * parser -> parser -- -- Create a new region within the current one and run the supplied -- parser. The cursor position is moved to the start of the new region. -- The value of coda determines where the cursor is positioned -- after a successful parse. -- -- intraparse throws a parse error if the supplied -- absolute-region-start is not located within the current region, or if -- the right-boundary of the new region (abs_region_start + -- region_length) extends beyond the right-boundary of the current -- region. intraparse :: RegionName -> RegionCoda -> RegionStart -> Int -> GenKangaroo ust a -> GenKangaroo ust a -- | advance : name * coda * abs_region_start * parser -> -- parser -- -- A variation of intraparse - the new region starts at the -- supplied abs_region_start and continues to the end of the -- current region. -- -- advance throws a parse error if the new start position is not -- within the current region. advance :: RegionName -> RegionCoda -> Int -> GenKangaroo ust a -> GenKangaroo ust a -- | advanceRelative : name * coda * distance * parser -> -- parser -- -- A variation of advance - the start of the new region is -- calculated from the current-cursor-position + the supplied -- distance. -- -- advanceRelative throws a parse error if the new start position -- is not within the current region. advanceRelative :: RegionName -> RegionCoda -> Int -> GenKangaroo ust a -> GenKangaroo ust a -- | restrict : name * coda * distance * parser -> -- parser -- -- A variation of intraparse - create a new region as a -- restriction of the current one and run the supplied parser. The new -- region starts at the current coursor position, the right-boundary is -- restricted to the current-cursor-position + the supplied -- distance. -- -- restrict throws a parse error if the right-boundary of the new -- region extends beyond the current region. restrict :: RegionName -> RegionCoda -> Int -> GenKangaroo ust a -> GenKangaroo ust a -- | restrictToPos : region-name * coda * abs-end-pos * parser -- -> parser -- -- A variantion of restrict - the new region takes the current -- cursor position for the left-boundary and the supplied -- absolute-end-position (abs-end-pos) as the right-boundary. -- -- restrictToPos throws a parse error if the abs-end-pos -- extends beyond the right-boundary of the current region. restrictToPos :: RegionName -> RegionCoda -> Int -> GenKangaroo ust a -> GenKangaroo ust a printHexAll :: GenKangaroo ust () printHexRange :: (Int, Int) -> GenKangaroo ust () printRegionStack :: GenKangaroo ust () manyTill :: GenKangaroo ust a -> GenKangaroo ust b -> GenKangaroo ust [a] genericManyTill :: (a -> c -> c) -> c -> GenKangaroo ust a -> GenKangaroo ust b -> GenKangaroo ust c manyTillPC :: GenKangaroo ust a -> (a -> Bool) -> GenKangaroo ust ([a], a) genericManyTillPC :: (a -> b -> b) -> b -> GenKangaroo ust a -> (a -> Bool) -> GenKangaroo ust (b, a) count :: Int -> GenKangaroo ust a -> GenKangaroo ust [a] countPrefixed :: Integral i => GenKangaroo ust i -> GenKangaroo ust a -> GenKangaroo ust (i, [a]) genericCount :: (a -> b -> b) -> b -> Int -> GenKangaroo ust a -> GenKangaroo ust b runOn :: GenKangaroo ust a -> GenKangaroo ust [a] genericRunOn :: (a -> b -> b) -> b -> GenKangaroo ust a -> GenKangaroo ust b -- | Apply parse then apply the check, if the check fails report the error -- message. postCheck :: GenKangaroo ust a -> (a -> Bool) -> String -> GenKangaroo ust a -- | Build a value by while the test holds. When the test fails the -- position is not backtracked, instead we use the "failing" element with -- lastOp potentially still building the value with it. buildWhile :: (a -> Bool) -> (a -> b -> b) -> (a -> b -> b) -> b -> GenKangaroo ust a -> GenKangaroo ust b buildPrimitive :: Int -> (Word8 -> Bool) -> (Word8 -> b -> b) -> b -> GenKangaroo ust b -- | Attempt to parse the supplied single character (the supplied char must -- be in the ASCII range 0-255). -- -- If the parse succeeds return the char, otherwise a parse-error will be -- thrown with reportError. char :: Char -> GenKangaroo ust Char -- | Parse any single character. The parser consumes one byte and uses -- chr to convert it. anyChar :: GenKangaroo ust Char -- | Parse a string of the supplied length n. -- -- If n is less than or equal to zero the empty string is -- returned. text :: Int -> GenKangaroo ust String -- | Parse the supplied string. All characters should be within the range -- 0-255. -- -- If the parse succeeds return the char, otherwise a parse-error will be -- thrown with reportError. string :: String -> GenKangaroo ust String -- | Parse a null-terminated C-style string. cstring :: GenKangaroo ust String -- | Parse the literal 0x00. w8Zero :: GenKangaroo ust Word8 -- | Get n bytes. -- -- If n is less than or equal to zero an empty list is returned. getBytes :: Integral a => a -> GenKangaroo ust [Word8] -- | Parse a single byte, returning it as an Int8. -- -- The conversion from a byte (0-255) to an Int8 uses the Prelude -- function fromIntegral. -- -- The conversion is summarized as: -- --
--   0..127   = 0..127
--   128      = -128
--   129      = -127
--   130      = -126
--   ...
--   254      = -2
--   255      = -1   
--   
--   wtoi :: Word8 -> Int8
--   wtoi i | i < 128   = i
--          | otherwise = -128 + (clearBit i 7)
--   
int8 :: GenKangaroo ust Int8 -- | Parse a Word16 in big endian form. word16be :: GenKangaroo ust Word16 -- | Parse a "Word24" in big endian form. -- -- 3 bytes are read - the answer is returned as a Word32. word24be :: GenKangaroo ust Word32 -- | Parse a Word32 in big endian form. word32be :: GenKangaroo ust Word32 -- | Parse a Word64 in big endian form. word64be :: GenKangaroo ust Word64 -- | Parse a Word16 in little endian form. word16le :: GenKangaroo ust Word16 -- | Parse a "Word24" in little endian form. -- -- 3 bytes are read - the answer is returned as a Word32. word24le :: GenKangaroo ust Word32 -- | Parse a Word32 in little endian form. word32le :: GenKangaroo ust Word32 -- | Parse an Int16 in big endian form. -- -- The ans is parsed as a Word16 (big endian) then converted to an Int16 -- using the Prelude function fromIntegral. int16be :: GenKangaroo ust Int16 -- | Parse an Int32 in big endian form. -- -- The ans is parsed as a Word32 (big endian) then converted to an Int32 -- using the Prelude function fromIntegral. int32be :: GenKangaroo ust Int32 -- | Parse an Int16 in little endian form. -- -- The ans is parsed as a Word16 (little endian) then converted to an -- Int16 using the Prelude function fromIntegral. int16le :: GenKangaroo ust Int16 -- | Parse an Int32 in little endian form. -- -- The ans is parsed as a Word32 (little endian) then converted to an -- Int32 using the Prelude function fromIntegral. int32le :: GenKangaroo ust Int32 -- | Parse an 4-byte IEEE single precision float. -- -- NOTE - THIS FUNCTION IS UNTESTED! ieeeFloatSP :: Fractional a => GenKangaroo ust a