{-# OPTIONS -Wall #-} -------------------------------------------------------------------------------- -- | -- Module : Data.ParserCombinators.Kangaroo.ParseMonad -- Copyright : (c) Stephen Tetley 2009-2010 -- License : BSD3 -- -- Maintainer : Stephen Tetley -- Stability : highly unstable -- Portability : to be determined. -- -- Random access parse monad -- -------------------------------------------------------------------------------- module Data.ParserCombinators.Kangaroo.ParseMonad ( -- * Parser types GenKangaroo -- don't export outside the package , ParseErr -- * Region types , RegionCoda(..) , RegionName -- * Monadic run function , runGenKangaroo -- don't export outside the package -- * Lift IO actions , liftIOAction -- * Non proper morphisms , throwErr -- don't export outside the package , getUserSt -- don't export outside the package , putUserSt -- don't export outside the package , modifyUserSt -- don't export outside the package -- * Error reporting and exception handling , reportError , substError -- * Primitive parsers , word8 , satisfy , checkWord8 , opt , skip -- * Query the cursor position , position , region , atEnd , lengthRemaining , regionSize -- * Parse within a region , intraparse , advance , advanceRelative , restrict , restrictToPos -- * Debug , printHexAll , printRegionStack , printHexRange ) where import Data.ParserCombinators.Kangaroo.Debug import Data.ParserCombinators.Kangaroo.Region import Data.ParserCombinators.Kangaroo.Utils import Control.Applicative import Control.Monad import Data.Array.IO import Data.Word import Numeric import System.IO type ParseErr = String type ImageData = IOUArray Int Word8 -- Is Int big enough for index? type St = ParseStack type Env = ImageData -- | Kangaroo is not a transformer as IO is always at the -- \'bottom\' of the effect stack. Like the original Parsec it is -- parametric on user state (refered to as ust). -- newtype GenKangaroo ust a = GenKangaroo { getGenKangaroo :: Env -> St -> ust -> IO (Either ParseErr a, St, ust) } -------------------------------------------------------------------------------- -- Instances and helpers fmapKang :: (a -> b) -> GenKangaroo ust a -> GenKangaroo ust b fmapKang f (GenKangaroo x) = GenKangaroo $ \env st ust -> x env st ust `bindIO` \(a,st',ust') -> return (fmap f a, st', ust') instance Functor (GenKangaroo ust) where fmap = fmapKang returnIO :: a -> IO a returnIO = return infixl 1 `bindIO` bindIO :: IO a -> (a -> IO b) -> IO b bindIO = (>>=) returnKang :: a -> GenKangaroo st a returnKang a = GenKangaroo $ \_ st ust -> returnIO (Right a, st, ust) infixl 1 `bindKang` bindKang :: GenKangaroo ust a -> (a -> GenKangaroo ust b) -> GenKangaroo ust b (GenKangaroo x) `bindKang` f = GenKangaroo $ \env st ust -> x env st ust `bindIO` \(ans, st', ust') -> case ans of Left err -> returnIO (Left err,st',ust') Right a -> getGenKangaroo (f a) env st' ust' instance Monad (GenKangaroo ust) where return = returnKang (>>=) = bindKang instance Applicative (GenKangaroo ust) where pure = return (<*>) = ap -- I don't think Kangaroo has natural implementations of -- Alternative or MonadPlus. -- -- My proposition is that the sort of parsing that Kangaroo -- intends to provide you always now want you want hence there -- is no inbuilt backtracking or support for list-of-successes. -------------------------------------------------------------------------------- -- Run... -- | Primitive monadic run function - other modules should export -- type specific specializations of this function... -- runGenKangaroo :: GenKangaroo ust a -> ust -> FilePath -> IO (Either ParseErr a,ust) runGenKangaroo p user_state filename = withBinaryFile filename ReadMode $ \ handle -> do { sz <- hFileSize handle ; arr <- newArray_ (0,fromIntegral $ sz-1) ; rsz <- hGetArray handle arr (fromIntegral sz) ; (ans,stk,ust) <- runP p rsz arr ; return (answer ans stk,ust) } where runP (GenKangaroo x) upper arr = x arr st0 user_state where st0 = newStack 0 (upper-1) Alfine " -- file -- " answer (Left err) stk = Left $ err ++ ('\n':'\n':printParseStack stk) answer (Right ans) _ = Right ans -------------------------------------------------------------------------------- -- Non proper morphisms throwErr :: ParseErr -> GenKangaroo ust a throwErr msg = GenKangaroo $ \_ st ust -> return (Left msg, st, ust) askEnv :: GenKangaroo ust Env askEnv = GenKangaroo $ \env st ust -> return (Right env, st, ust) getSt :: GenKangaroo ust St getSt = GenKangaroo $ \_ st ust -> return (Right st, st, ust) putSt :: St -> GenKangaroo ust () putSt st = GenKangaroo $ \_ _ ust -> return (Right (), st, ust) getPos :: GenKangaroo ust Pos getPos = liftM location getSt getEnd :: GenKangaroo ust RegionEnd getEnd = liftM regionEnd getSt getStart :: GenKangaroo ust RegionStart getStart = liftM regionStart getSt getUserSt :: GenKangaroo ust ust getUserSt = GenKangaroo $ \_ st ust -> return (Right ust, st, ust) putUserSt :: ust -> GenKangaroo ust () putUserSt ust = GenKangaroo $ \_ st _ -> return (Right (), st, ust) modifyUserSt :: (ust -> ust) -> GenKangaroo ust () modifyUserSt f = GenKangaroo $ \_ st ust -> return (Right (), st, f ust) -- Modifying the position lets the parser go beyond the -- end-of-file. advancePos1 :: GenKangaroo ust () advancePos1 = modifyPos (+1) modifyPos :: (Pos -> Pos) -> GenKangaroo ust () modifyPos f = GenKangaroo $ \_ st ust -> return (Right (), move f st, ust) bracketRegion :: RegionInfo -> GenKangaroo ust a -> GenKangaroo ust a bracketRegion i = bracketM_ pushM popM where pushM = getSt >>= \st -> case push i st of Left err -> throwErr $ getRegionError err Right stk -> putSt stk popM = getSt >>= \st -> putSt (pop st) -------------------------------------------------------------------------------- -- Lift IO actions - we are in the IO -- | Lift an IO action into the Kangaroo monad. -- liftIOAction :: IO a -> GenKangaroo ust a liftIOAction ma = GenKangaroo $ \ _env st ust -> ma >>= \a -> return (Right a, st, ust) -------------------------------------------------------------------------------- -- Helpers -------------------------------------------------------------------------------- -- Error reporting -- | Report a parse error. -- -- Source position is appended to the supplied error message -- reportError :: ParseErr -> GenKangaroo ust a reportError s = do posn <- getPos throwErr $ s ++ posStr posn where posStr pos = concat [ " absolute position " , show pos , " (0x" , showHex pos [] , ")" ] -- | 'substError' : @ parser * error_msg -> parser@ -- -- 'substError' is equivalent to Parsec\'s @\@ combinator. -- -- Run the supplied parser, if the parse succeeds return the -- result, otherwise override the original error message with -- the supplied @error_msg@. -- substError :: GenKangaroo ust a -> ParseErr -> GenKangaroo ust a substError p msg = GenKangaroo $ \env st ust -> (getGenKangaroo p) env st ust >>= \ ans -> case ans of (Left _, st', ust') -> return (Left msg, st', ust') okay -> return okay -------------------------------------------------------------------------------- -- Primitive parsers -- | Parse a single byte. -- -- If the cursor is beyond the end of the current region a -- parse-error is thrown with 'reportError'. -- word8 :: GenKangaroo ust Word8 word8 = do ix <- getPos end <- getEnd when (ix>end) (reportError "word8") -- test emphatically is (>) ! arr <- askEnv a <- liftIOAction $ readArray arr ix advancePos1 return a -- | 'satisfy' : @ predicate -> parser @ -- -- Parse a single byte and apply the predicate to it. On @True@ -- return the parsed byte, on @False@ throw a parse-error with -- 'reportError'. -- satisfy :: (Word8 -> Bool) -> GenKangaroo ust Word8 satisfy p = word8 >>= \x -> if p x then return x else reportError $ "satisfy" -- | 'checkWord8' : @ predicate -> opt parser @ -- -- Byte parser with backtracking when the match fails. -- -- Parse a single byte and apply the predicate to the result. On -- success return @(Just answer)@, on failure move the cursor -- position back one and return @Nothing@. -- checkWord8 :: (Word8 -> Bool) -> GenKangaroo ust (Maybe Word8) checkWord8 check = word8 >>= \ans -> if check ans then return $ Just ans else modifyPos (`subtract` 1) >> return Nothing -- | Backtracking parser similar to Parsec\'s @try@. -- -- Try the supplied parser, if the parse succeeds with no -- parse-errors return @(Just answer)@. If a parse-error is -- generated, discard the parse-error, return the cursor to the -- initial position and return @Nothing@. -- opt :: GenKangaroo ust a -> GenKangaroo ust (Maybe a) opt p = GenKangaroo $ \env st ust -> (getGenKangaroo p) env st ust >>= \ ans -> case ans of (Left _, _, ust') -> return (Right Nothing, st, ust') (Right a, st', ust') -> return (Right $ Just a, st', ust') -- | 'skip' : @ num_bytes -> () @ -- -- Move the cursor forward by the supplied distance. The distance -- must be positive, negative distances are ignored. -- -- 'skip' performs no range checking. If the cursor is -- moved beyond the region boundary then the next parse will -- fail. -- skip :: Int -> GenKangaroo ust () skip n | n <= 0 = return () skip n = modifyPos (n+) -------------------------------------------------------------------------------- -- Querying position -- | 'position' : @-> cursor-position@ -- -- Return the current cursor position -- position :: GenKangaroo ust Int position = getPos -- | 'region' : @-> (region-start, cursor-position, region-end)@ -- -- Return the current parse region and the current position of -- the cursor within it. -- region :: GenKangaroo ust (Int,Int,Int) region = liftM3 (,,) getStart getPos getEnd -- region limits are inclusive so cursor is at the end -- if the position is greater that the end location. -- | 'atEnd' - is the cursor at the end of the current region? -- atEnd :: GenKangaroo ust Bool atEnd = liftM2 (>) getPos getEnd -- | 'lengthRemaining' : @-> distance-to-region-end@ -- -- Distance from the current cursor position to the end of the -- current region -- lengthRemaining :: GenKangaroo ust Int lengthRemaining = liftM2 fn getEnd getPos where fn a b | a <= b = 0 | otherwise = a - b -- | 'regionSize' : @-> region-length@ -- -- Size of the current region. -- regionSize :: GenKangaroo ust Int regionSize = liftM2 (-) getEnd getStart -------------------------------------------------------------------------------- -- The important ones parsing within a /region/ ... -- | 'intraparse' : @name * coda * abs_region_start * region_length * parser -> parser@ -- -- Create a new region within the current one and run the -- supplied parser. The cursor position is moved to the start -- of the new region. The value of @coda@ determines where the -- cursor is positioned after a successful parse. -- -- 'intraparse' throws a parse error if the supplied -- absolute-region-start is not located within the current region, -- or if the right-boundary of the new region -- (@abs_region_start + region_length@) extends beyond the -- right-boundary of the current region. -- intraparse :: RegionName -> RegionCoda -> RegionStart -> Int -> GenKangaroo ust a -> GenKangaroo ust a intraparse name coda intra_start len p = bracketRegion (newRegion intra_start len coda name) p -- | 'advance' : @name * coda * abs_region_start * parser -> parser@ -- -- A variation of 'intraparse' - the new region starts at the -- supplied @abs_region_start@ and continues to the end of the -- current region. -- -- 'advance' throws a parse error if the new start position is -- not within the current region. -- advance :: RegionName -> RegionCoda -> Int -> GenKangaroo ust a -> GenKangaroo ust a advance name coda intra_start p = getEnd >>= \end -> intraparse name coda intra_start (end - intra_start) p -- | 'advanceRelative' : @name * coda * distance * parser -> parser@ -- -- A variation of 'advance' - the start of the new region is -- calculated from the @current-cursor-position@ + the supplied -- @distance@. -- -- 'advanceRelative' throws a parse error if the new start -- position is not within the current region. -- advanceRelative :: RegionName -> RegionCoda -> Int -> GenKangaroo ust a -> GenKangaroo ust a advanceRelative name coda dist p = getPos >>= \pos -> intraparse name coda (pos+dist) dist p -- | 'restrict' : @ name * coda * distance * parser -> parser@ -- -- A variation of 'intraparse' - create a new region as a -- restriction of the current one and run the supplied parser. -- The new region starts at the current coursor position, the -- right-boundary is restricted to the @current-cursor-position@ -- + the supplied @distance@. -- -- 'restrict' throws a parse error if the right-boundary of the -- new region extends beyond the current region. -- restrict :: RegionName -> RegionCoda -> Int -> GenKangaroo ust a -> GenKangaroo ust a restrict name coda len p = getPos >>= \pos -> intraparse name coda pos len p -- | 'restrictToPos' : @region-name * coda * abs-end-pos * parser -> parser@ -- -- A variantion of 'restrict' - the new region takes the current -- cursor position for the left-boundary and the supplied -- absolute-end-position (@abs-end-pos@) as the right-boundary. -- -- 'restrictToPos' throws a parse error if the @abs-end-pos@ -- extends beyond the right-boundary of the current region. -- restrictToPos :: RegionName -> RegionCoda -> Int -> GenKangaroo ust a -> GenKangaroo ust a restrictToPos name coda abs_pos p = getPos >>= \pos -> intraparse name coda pos (abs_pos-pos) p -------------------------------------------------------------------------------- -- Debug printHexAll :: GenKangaroo ust () printHexAll = askEnv >>= liftIOAction . debugHexAll printHexRange :: (Int,Int) -> GenKangaroo ust () printHexRange rng = askEnv >>= liftIOAction . (debugHexRange rng) printRegionStack :: GenKangaroo ust () printRegionStack = getSt >>= liftIOAction . putStrLn . printParseStack