{-# LANGUAGE Rank2Types #-} ------------------------------------------------------------------------ -- | -- Module : Hyena.Parser -- Copyright : (c) Johan Tibell 2008 -- License : BSD3-style (see LICENSE) -- -- Maintainer : johan.tibell@gmail.com -- Stability : experimental -- Portability : portable -- -- A resumable LL(1) parser combinator library for 'ByteString's. -- ------------------------------------------------------------------------ module Hyena.Parser ( -- * The Parser type Parser, Result(..), runParser, -- * Primitive parsers satisfies, byte, bytes, module Control.Applicative ) where import Control.Applicative import qualified Data.ByteString as S import qualified Data.ByteString.Internal as S import Data.Int (Int64) import Data.Word (Word8) import Foreign.ForeignPtr (withForeignPtr) import Foreign.Ptr (Ptr, plusPtr) import Foreign.Storable (peekByteOff) import Prelude hiding (fail, rem, succ) import Text.Show.Functions () -- --------------------------------------------------------------------- -- The Parser type -- | The parse state. data S r = S {-# UNPACK #-} !S.ByteString {-# UNPACK #-} !Int64 {-# UNPACK #-} !Bool {-# UNPACK #-} !(S r -> Result r) deriving Show -- | Set the failure continuation. setFail :: S r -> (S r -> Result r) -> S r setFail (S bs pos eof _) = S bs pos eof {-# INLINE setFail #-} -- | A parse either succeeds, fails or returns a suspension with which -- the parsing can be resumed. data Result a = Finished a S.ByteString -- ^ Parsing succeeded and produced a value of type -- @a@. The returned 'S.ByteString' is the remaining -- unconsumed input. | Failed Int64 -- ^ Parsing failed at the given position. Either -- because the parser didn't match the input or because -- an unexpected end of input was reached during -- parsing. | Partial (Maybe S.ByteString -> Result a) -- ^ The parsing needs more input to continue. Pass in -- @Just input@ to continue parsing and @Nothing@ to -- signal end of input. If @Nothing@ is passed the -- 'Result' is either 'Finished' or 'Failed'. deriving Show -- | A parser takes a parse state, a success continuation and returns -- a 'Result'. newtype Parser a = Parser { unParser :: forall r. S r -> (a -> S r -> Result r) -> Result r } -- --------------------------------------------------------------------- -- Instances instance Functor Parser where fmap f p = Parser $ \s succ -> unParser p s (succ . f) {-# INLINE fmap #-} instance Applicative Parser where pure a = Parser $ \s succ -> succ a s {-# INLINE pure #-} p <*> p' = Parser $ \s succ -> let succ' f s' = unParser p' s' (succ . f) in unParser p s succ' {-# INLINE (<*>) #-} instance Alternative Parser where empty = Parser $ \s@(S _ _ _ fail) _ -> fail s {-# INLINE empty #-} p <|> p' = Parser $ \s@(S _ _ _ fail) succ -> let fail' s' = unParser p' (setFail s' fail) succ in unParser p (setFail s fail') succ {-# INLINE (<|>) #-} -- --------------------------------------------------------------------- -- Running a parser initState :: S.ByteString -> S r initState bs = S bs 0 False failed {-# INLINE initState #-} -- | This is the final continuation that turns a successful parse into -- a 'Result'. finished :: a -> S r -> Result a finished v (S bs _ _ _) = Finished v bs -- | This is the final continuation that turns an unsuccessful parse -- into a 'Result'. failed :: S r -> Result a failed (S _ pos _ _) = Failed pos -- | TODO: Write documentation. runParser :: Parser a -> S.ByteString -> Result a runParser p bs = unParser p (initState bs) finished -- --------------------------------------------------------------------- -- Primitive parsers -- | The parser @satisfies p@ succeeds for any byte for which the -- supplied function @p@ returns 'True'. Returns the byte that is -- actually parsed. satisfies :: (Word8 -> Bool) -> Parser Word8 satisfies p = Parser $ \s@(S bs pos eof fail) succ -> case S.uncons bs of Just (b, bs') -> if p b then succ b (S bs' (pos + 1) eof failed) else fail s Nothing -> if eof then fail s else Partial $ \x -> case x of Just bs' -> retry (S bs' pos eof fail) Nothing -> fail (S bs pos True fail) where retry s' = unParser (satisfies p) s' succ -- | @byte b@ parses a single byte @b@. Returns the parsed byte -- (i.e. @b@). byte :: Word8 -> Parser Word8 byte b = satisfies (== b) -- TODO: Check when we can let go of the failure continuation. -- | @bytes bs@ parses a sequence of bytes @bs@. Returns the parsed -- bytes (i.e. @bs@). bytes :: S.ByteString -> Parser S.ByteString bytes bs = Parser $ \(S bs' pos eof fail) succ -> let go rem inp | len == remLen = succ bs (S (S.drop len inp) newPos eof failed) | len < remLen && inpLen >= remLen = fail (S (S.drop len inp) newPos eof fail) | otherwise = Partial $ \x -> case x of Just bs'' -> go (S.drop len rem) bs'' Nothing -> fail (S S.empty newPos True fail) where len = commonPrefixLen rem inp remLen = S.length rem newPos = pos + fromIntegral len inpLen = S.length inp in go bs bs' -- --------------------------------------------------------------------- -- Internal utilities -- | /O(n)/ @commonPrefixLen xs ys@ returns the length of the longest -- common prefix of @xs@ and @ys@. commonPrefixLen :: S.ByteString -> S.ByteString -> Int commonPrefixLen (S.PS fp1 off1 len1) (S.PS fp2 off2 len2) = S.inlinePerformIO $ withForeignPtr fp1 $ \p1 -> withForeignPtr fp2 $ \p2 -> lcp (p1 `plusPtr` off1) (p2 `plusPtr` off2) 0 len1 len2 lcp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Int lcp p1 p2 n len1 len2 | n == len1 = return len1 | n == len2 = return len2 | otherwise = do a <- peekByteOff p1 n :: IO Word8 b <- peekByteOff p2 n if a == b then lcp p1 p2 (n + 1) len1 len2 else return n