{-# LANGUAGE CPP, RankNTypes, MagicHash, BangPatterns, TypeFamilies #-} -- CPP C style pre-precessing, the #if defined lines -- RankNTypes forall r. statement -- MagicHash the (# unboxing #), also needs GHC.primitives module Data.Binary.Get.Internal ( -- * The Get type Get , runCont , Decoder(..) , runGetIncremental , readN , readNWith -- * Parsing , bytesRead , isolate -- * With input chunks , withInputChunks , Consume , failOnEOF , get , put , ensureN -- * Utility , remaining , getBytes , isEmpty , lookAhead , lookAheadM , lookAheadE , label -- ** ByteStrings , getByteString ) where import Foreign import qualified Data.ByteString as B import qualified Data.ByteString.Unsafe as B import Control.Applicative import Control.Monad #if MIN_VERSION_base(4,9,0) import qualified Control.Monad.Fail as Fail #endif import Data.Binary.Internal ( accursedUnutterablePerformIO ) -- Kolmodin 20100427: at zurihac we discussed of having partial take a -- "Maybe ByteString" and implemented it in this way. -- The reasoning was that you could accidently provide an empty bytestring, -- and it should not terminate the decoding (empty would mean eof). -- However, I'd say that it's also a risk that you get stuck in a loop, -- where you keep providing an empty string. Anyway, no new input should be -- rare, as the RTS should only wake you up if you actually have some data -- to read from your fd. -- | A decoder produced by running a 'Get' monad. data Decoder a = Fail !B.ByteString String -- ^ The decoder ran into an error. The decoder either used -- 'fail' or was not provided enough input. | Partial (Maybe B.ByteString -> Decoder a) -- ^ The decoder has consumed the available input and needs -- more to continue. Provide 'Just' if more input is available -- and 'Nothing' otherwise, and you will get a new 'Decoder'. | Done !B.ByteString a -- ^ The decoder has successfully finished. Except for the -- output value you also get the unused input. | BytesRead {-# UNPACK #-} !Int64 (Int64 -> Decoder a) -- ^ The decoder needs to know the current position in the input. -- Given the number of bytes remaning in the decoder, the outer -- decoder runner needs to calculate the position and -- resume the decoding. -- unrolled codensity/state monad newtype Get a = C { runCont :: forall r. B.ByteString -> Success a r -> Decoder r } type Success a r = B.ByteString -> a -> Decoder r instance Monad Get where return = pure (>>=) = bindG #if !(MIN_VERSION_base(4,9,0)) fail = failG -- base < 4.9 #elif !(MIN_VERSION_base(4,13,0)) fail = Fail.fail -- base < 4.13 #endif -- NB: Starting with base-4.13, the `fail` method -- has been removed from the `Monad`-class -- according to the MonadFail proposal (MFP) schedule -- which completes the process that started with base-4.9. #if MIN_VERSION_base(4,9,0) instance Fail.MonadFail Get where fail = failG #endif bindG :: Get a -> (a -> Get b) -> Get b bindG (C c) f = C $ \i ks -> c i (\i' a -> (runCont (f a)) i' ks) {-# INLINE bindG #-} failG :: String -> Get a failG str = C $ \i _ks -> Fail i str apG :: Get (a -> b) -> Get a -> Get b apG d e = do b <- d a <- e return (b a) {-# INLINE [0] apG #-} fmapG :: (a -> b) -> Get a -> Get b fmapG f m = C $ \i ks -> runCont m i (\i' a -> ks i' (f a)) {-# INLINE fmapG #-} instance Applicative Get where pure = \x -> C $ \s ks -> ks s x {-# INLINE [0] pure #-} (<*>) = apG {-# INLINE (<*>) #-} -- | @since 0.7.1.0 instance MonadPlus Get where mzero = empty mplus = (<|>) instance Functor Get where fmap = fmapG instance Functor Decoder where fmap f (Done s a) = Done s (f a) fmap f (Partial k) = Partial (fmap f . k) fmap _ (Fail s msg) = Fail s msg fmap f (BytesRead b k) = BytesRead b (fmap f . k) instance (Show a) => Show (Decoder a) where show (Fail _ msg) = "Fail: " ++ msg show (Partial _) = "Partial _" show (Done _ a) = "Done: " ++ show a show (BytesRead _ _) = "BytesRead" -- | Run a 'Get' monad. See 'Decoder' for what to do next, like providing -- input, handling decoding errors and to get the output value. runGetIncremental :: Get a -> Decoder a runGetIncremental g = noMeansNo $ runCont g B.empty (\i a -> Done i a) -- | Make sure we don't have to pass Nothing to a Partial twice. -- This way we don't need to pass around an EOF value in the Get monad, it -- can safely ask several times if it needs to. noMeansNo :: Decoder a -> Decoder a noMeansNo r0 = go r0 where go r = case r of Partial k -> Partial $ \ms -> case ms of Just _ -> go (k ms) Nothing -> neverAgain (k ms) BytesRead n k -> BytesRead n (go . k) Done _ _ -> r Fail _ _ -> r neverAgain r = case r of Partial k -> neverAgain (k Nothing) BytesRead n k -> BytesRead n (neverAgain . k) Fail _ _ -> r Done _ _ -> r prompt :: B.ByteString -> Decoder a -> (B.ByteString -> Decoder a) -> Decoder a prompt inp kf ks = prompt' kf (\inp' -> ks (inp `B.append` inp')) prompt' :: Decoder a -> (B.ByteString -> Decoder a) -> Decoder a prompt' kf ks = let loop = Partial $ \sm -> case sm of Just s | B.null s -> loop | otherwise -> ks s Nothing -> kf in loop -- | Get the total number of bytes read to this point. bytesRead :: Get Int64 bytesRead = C $ \inp k -> BytesRead (fromIntegral $ B.length inp) (k inp) -- | Isolate a decoder to operate with a fixed number of bytes, and fail if -- fewer bytes were consumed, or more bytes were attempted to be consumed. -- If the given decoder fails, 'isolate' will also fail. -- Offset from 'bytesRead' will be relative to the start of 'isolate', not the -- absolute of the input. -- -- @since 0.7.2.0 isolate :: Int -- ^ The number of bytes that must be consumed -> Get a -- ^ The decoder to isolate -> Get a isolate n0 act | n0 < 0 = fail "isolate: negative size" | otherwise = go n0 (runCont act B.empty Done) where go !n (Done left x) | n == 0 && B.null left = return x | otherwise = do pushFront left let consumed = n0 - n - B.length left fail $ "isolate: the decoder consumed " ++ show consumed ++ " bytes" ++ " which is less than the expected " ++ show n0 ++ " bytes" go 0 (Partial resume) = go 0 (resume Nothing) go n (Partial resume) = do inp <- C $ \inp k -> do let takeLimited str = let (inp', out) = B.splitAt n str in k out (Just inp') case not (B.null inp) of True -> takeLimited inp False -> prompt inp (k B.empty Nothing) takeLimited case inp of Nothing -> go n (resume Nothing) Just str -> go (n - B.length str) (resume (Just str)) go _ (Fail bs err) = pushFront bs >> fail err go n (BytesRead r resume) = go n (resume $! fromIntegral n0 - fromIntegral n - r) type Consume s = s -> B.ByteString -> Either s (B.ByteString, B.ByteString) withInputChunks :: s -> Consume s -> ([B.ByteString] -> b) -> ([B.ByteString] -> Get b) -> Get b withInputChunks initS consume onSucc onFail = go initS [] where go state acc = C $ \inp ks -> case consume state inp of Left state' -> do let acc' = inp : acc prompt' (runCont (onFail (reverse acc')) B.empty ks) (\str' -> runCont (go state' acc') str' ks) Right (want,rest) -> do ks rest (onSucc (reverse (want:acc))) failOnEOF :: [B.ByteString] -> Get a failOnEOF bs = C $ \_ _ -> Fail (B.concat bs) "not enough bytes" -- | Test whether all input has been consumed, i.e. there are no remaining -- undecoded bytes. isEmpty :: Get Bool isEmpty = C $ \inp ks -> if B.null inp then prompt inp (ks inp True) (\inp' -> ks inp' False) else ks inp False -- | DEPRECATED. Same as 'getByteString'. {-# DEPRECATED getBytes "Use 'getByteString' instead of 'getBytes'." #-} getBytes :: Int -> Get B.ByteString getBytes = getByteString {-# INLINE getBytes #-} -- | @since 0.7.0.0 instance Alternative Get where empty = C $ \inp _ks -> Fail inp "Data.Binary.Get(Alternative).empty" {-# INLINE empty #-} (<|>) f g = do (decoder, bs) <- runAndKeepTrack f case decoder of Done inp x -> C $ \_ ks -> ks inp x Fail _ _ -> pushBack bs >> g _ -> error "Binary: impossible" {-# INLINE (<|>) #-} some p = (:) <$> p <*> many p {-# INLINE some #-} many p = do v <- (Just <$> p) <|> pure Nothing case v of Nothing -> pure [] Just x -> (:) x <$> many p {-# INLINE many #-} -- | Run a decoder and keep track of all the input it consumes. -- Once it's finished, return the final decoder (always 'Done' or 'Fail'), -- and unconsume all the the input the decoder required to run. -- Any additional chunks which was required to run the decoder -- will also be returned. runAndKeepTrack :: Get a -> Get (Decoder a, [B.ByteString]) runAndKeepTrack g = C $ \inp ks -> let r0 = runCont g inp (\inp' a -> Done inp' a) go !acc r = case r of Done inp' a -> ks inp (Done inp' a, reverse acc) Partial k -> Partial $ \minp -> go (maybe acc (:acc) minp) (k minp) Fail inp' s -> ks inp (Fail inp' s, reverse acc) BytesRead unused k -> BytesRead unused (go acc . k) in go [] r0 {-# INLINE runAndKeepTrack #-} pushBack :: [B.ByteString] -> Get () pushBack [] = C $ \ inp ks -> ks inp () pushBack bs = C $ \ inp ks -> ks (B.concat (inp : bs)) () {-# INLINE pushBack #-} pushFront :: B.ByteString -> Get () pushFront bs = C $ \ inp ks -> ks (B.append bs inp) () {-# INLINE pushFront #-} -- | Run the given decoder, but without consuming its input. If the given -- decoder fails, then so will this function. -- -- @since 0.7.0.0 lookAhead :: Get a -> Get a lookAhead g = do (decoder, bs) <- runAndKeepTrack g case decoder of Done _ a -> pushBack bs >> return a Fail inp s -> C $ \_ _ -> Fail inp s _ -> error "Binary: impossible" -- | Run the given decoder, and only consume its input if it returns 'Just'. -- If 'Nothing' is returned, the input will be unconsumed. -- If the given decoder fails, then so will this function. -- -- @since 0.7.0.0 lookAheadM :: Get (Maybe a) -> Get (Maybe a) lookAheadM g = do let g' = maybe (Left ()) Right <$> g either (const Nothing) Just <$> lookAheadE g' -- | Run the given decoder, and only consume its input if it returns 'Right'. -- If 'Left' is returned, the input will be unconsumed. -- If the given decoder fails, then so will this function. -- -- @since 0.7.1.0 lookAheadE :: Get (Either a b) -> Get (Either a b) lookAheadE g = do (decoder, bs) <- runAndKeepTrack g case decoder of Done _ (Left x) -> pushBack bs >> return (Left x) Done inp (Right x) -> C $ \_ ks -> ks inp (Right x) Fail inp s -> C $ \_ _ -> Fail inp s _ -> error "Binary: impossible" -- | Label a decoder. If the decoder fails, the label will be appended on -- a new line to the error message string. -- -- @since 0.7.2.0 label :: String -> Get a -> Get a label msg decoder = C $ \inp ks -> let r0 = runCont decoder inp (\inp' a -> Done inp' a) go r = case r of Done inp' a -> ks inp' a Partial k -> Partial (go . k) Fail inp' s -> Fail inp' (s ++ "\n" ++ msg) BytesRead u k -> BytesRead u (go . k) in go r0 -- | DEPRECATED. Get the number of bytes of remaining input. -- Note that this is an expensive function to use as in order to calculate how -- much input remains, all input has to be read and kept in-memory. -- The decoder keeps the input as a strict bytestring, so you are likely better -- off by calculating the remaining input in another way. {-# DEPRECATED remaining "This will force all remaining input, don't use it." #-} remaining :: Get Int64 remaining = C $ \ inp ks -> let loop acc = Partial $ \ minp -> case minp of Nothing -> let all_inp = B.concat (inp : (reverse acc)) in ks all_inp (fromIntegral $ B.length all_inp) Just inp' -> loop (inp':acc) in loop [] ------------------------------------------------------------------------ -- ByteStrings -- -- | An efficient get method for strict ByteStrings. Fails if fewer than @n@ -- bytes are left in the input. If @n <= 0@ then the empty string is returned. getByteString :: Int -> Get B.ByteString getByteString n | n > 0 = readN n (B.unsafeTake n) | otherwise = return B.empty {-# INLINE getByteString #-} -- | Get the current chunk. get :: Get B.ByteString get = C $ \inp ks -> ks inp inp -- | Replace the current chunk. put :: B.ByteString -> Get () put s = C $ \_inp ks -> ks s () -- | Return at least @n@ bytes, maybe more. If not enough data is available -- the computation will escape with 'Partial'. readN :: Int -> (B.ByteString -> a) -> Get a readN !n f = ensureN n >> unsafeReadN n f {-# INLINE [0] readN #-} {-# RULES "readN/readN merge" forall n m f g. apG (readN n f) (readN m g) = readN (n+m) (\bs -> f bs $ g (B.unsafeDrop n bs)) #-} -- | Ensure that there are at least @n@ bytes available. If not, the -- computation will escape with 'Partial'. ensureN :: Int -> Get () ensureN !n0 = C $ \inp ks -> do if B.length inp >= n0 then ks inp () else runCont (withInputChunks n0 enoughChunks onSucc onFail >>= put) inp ks where -- might look a bit funny, but plays very well with GHC's inliner. -- GHC won't inline recursive functions, so we make ensureN non-recursive enoughChunks n str | B.length str >= n = Right (str,B.empty) | otherwise = Left (n - B.length str) -- Sometimes we will produce leftovers lists of the form [B.empty, nonempty] -- where `nonempty` is a non-empty ByteString. In this case we can avoid a copy -- by simply dropping the empty prefix. In principle ByteString might want -- to gain this optimization as well onSucc = B.concat . dropWhile B.null onFail bss = C $ \_ _ -> Fail (B.concat bss) "not enough bytes" {-# INLINE ensureN #-} unsafeReadN :: Int -> (B.ByteString -> a) -> Get a unsafeReadN !n f = C $ \inp ks -> do ks (B.unsafeDrop n inp) $! f inp -- strict return -- | @readNWith n f@ where @f@ must be deterministic and not have side effects. readNWith :: Int -> (Ptr a -> IO a) -> Get a readNWith n f = do -- It should be safe to use accursedUnutterablePerformIO here. -- The action must be deterministic and not have any external side effects. -- It depends on the value of the ByteString so the value dependencies look OK. readN n $ \s -> accursedUnutterablePerformIO $ B.unsafeUseAsCString s (f . castPtr) {-# INLINE readNWith #-}