{-# LANGUAGE CPP #-}
{-# OPTIONS_GHC -fglasgow-exts #-}
-- for unboxed shifts

-----------------------------------------------------------------------------
-- |
-- Module      : Data.Binary.Strict.IncrementalGet
-- Copyright   : Lennart Kolmodin
-- License     : BSD3-style (see LICENSE)
--
-- Maintainer  : Adam Langley <agl@imperialviolet.org>
-- Stability   : experimental
-- Portability : portable to Hugs and GHC.
--
-- This is a version of the Get monad for incremental parsing. The parser is
-- written as if a single, huge, strict ByteString was to be parsed. It
-- produces results as it parses by calling yield.
--
-- However, if the parser runs out of data, rather than failing the caller sees
-- a Partial result, which includes the list of yielded values so far and a
-- continuation. By calling the continuation with more data, the parser
-- continues, none the wiser.
--
-- Take the following example
--
-- > testParse = do
-- >   getWord16be >>= yield
-- >   testParse
-- >
-- > test = runGet testParse $ B.pack [1,0,0]
--
-- Here, @testParse@ never completes, but yields Word16 values forever. It's
-- started with a 3 byte ByteString and will yield a single value before
-- running out of data. Thus, @test = Partial cont [256]@. Calling @cont@
-- with a single extra byte will yield another Word16 value etc.
--
-- The lookahead functions have been removed from this parser because of their
-- incompatibility with the incremental monad at the moment.
-----------------------------------------------------------------------------

#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
#include "MachDeps.h"
#endif

module Data.Binary.Strict.IncrementalGet (
    -- * The Get type
      Get
    , Result(..)
    , runGet

    -- * Utility
    , skip
    , yield
    , bytesRead
    , remaining
    , isEmpty

    -- * Parsing particular types
    , getWord8

    -- ** ByteStrings
    , getByteString

    -- ** Big-endian reads
    , getWord16be
    , getWord32be
    , getWord64be

    -- ** Little-endian reads
    , getWord16le
    , getWord32le
    , getWord64le

    -- ** Host-endian, unaligned reads
    , getWordhost
    , getWord16host
    , getWord32host
    , getWord64host
) where

import qualified Data.ByteString as B
import qualified Data.ByteString.Internal as B

import Foreign

#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
import GHC.Base
import GHC.Word
#endif

#ifndef __HADDOCK__
-- | The parse state
data S r = S {-# UNPACK #-} !B.ByteString  -- ^ input
             {-# UNPACK #-} !Int  -- ^ bytes read
             [r]  -- ^ results so far, in reverse order
#endif

-- | The result of a partial parse
data Result a = Failed String
                -- ^ the parse failed with the given error message
              | Finished B.ByteString [a]
                -- ^ the parse finished and produced the given list of
                --   results doing so. Any unparsed data is returned.
              | Partial (B.ByteString -> Result a) [a]
                -- ^ the parse ran out of data before finishing, but produced
                --   the given list of results before doing so. To continue the
                --   parse pass more data to the given continuation

instance (Show a) => Show (Result a) where
  show (Failed err) = "Failed " ++ err
  show (Finished _ rs) = "Finished " ++ show rs
  show (Partial _ rs) = "Partial " ++ show rs

newtype Get r a = Get { unGet :: S r -> (a -> S r -> Result r) -> Result r }

instance Functor (Get r) where
    fmap f m = Get (\s -> \cont -> unGet m s (cont . f))

instance Monad (Get r) where
  return a = Get (\s -> \k -> k a s)
  m >>= k = Get (\s -> \cont -> unGet m s (\a -> \s' -> unGet (k a) s' cont))
  fail err = Get (const $ const $ Failed err)

get :: Get r (S r)
get = Get (\s -> \k -> k s s)

-- | Return a value from the parse
yield :: r -> Get r ()
yield v = Get (\(S a b c) -> \cont -> cont () (S a b (v : c)))

initState :: B.ByteString -> S r
initState input = S input 0 []
{-# INLINE initState #-}

-- | Start a parser and return the first Result.
runGet :: Get r a -> B.ByteString -> Result r
runGet m input =
  unGet m (initState input) (const $ \(S s _ rs) -> Finished s $ reverse rs)

-- | Skip ahead @n@ bytes. Fails if fewer than @n@ bytes are available.
skip :: Int -> Get r ()
skip n = readN (fromIntegral n) (const ())

-- | Get the total number of bytes read to this point.
bytesRead :: Get r Int
bytesRead = do
  S _ b _ <- get
  return b

-- | Get the number of remaining unparsed bytes.
-- Useful for checking whether all input has been consumed.
remaining :: Get r Int
remaining = do
  S s _ _ <- get
  return (fromIntegral (B.length s))

-- | Test whether all input has been consumed,
-- i.e. there are no remaining unparsed bytes.
isEmpty :: Get r Bool
isEmpty = do
  S s _ _ <- get
  return $ B.null s

------------------------------------------------------------------------
-- Utility with ByteStrings

-- | An efficient 'get' method for strict ByteStrings. Fails if fewer
-- than @n@ bytes are left in the input.
getByteString :: Int -> Get r B.ByteString
getByteString n = readN n id
{-# INLINE getByteString #-}

-- | Pull @n@ bytes from the input, as a strict ByteString.
getBytes :: Int -> Get r B.ByteString
getBytes n = Get $ \(S s offset values) -> \cont ->
  if n <= B.length s
     then let (consume, rest) = B.splitAt n s
           in cont consume $ S rest (offset + fromIntegral n) values
     else Partial (\s' -> unGet (getBytes n) (S (B.append s s') offset []) cont) $ reverse values
{-# INLINE getBytes #-}

-- Pull n bytes from the input, and apply a parser to those bytes,
-- yielding a value. If less than @n@ bytes are available, fail with an
-- error. This wraps @getBytes@.
readN :: Int -> (B.ByteString -> a) -> Get r a
readN n f = fmap f $ getBytes n
{-# INLINE readN #-}

getPtr :: Storable a => Int -> Get r a
getPtr n = do
    (fp, o, _) <- readN n B.toForeignPtr
    return . B.inlinePerformIO $ withForeignPtr fp $ \p -> peek (castPtr $ p `plusPtr` o)
{-# INLINE getPtr #-}

getWord8 :: Get r Word8
getWord8 = getPtr (sizeOf (undefined :: Word8))
{-# INLINE getWord8 #-}

-- | Read a Word16 in big endian format
getWord16be :: Get r Word16
getWord16be = do
    s <- readN 2 id
    return $! (fromIntegral (s `B.index` 0) `shiftl_w16` 8) .|.
              (fromIntegral (s `B.index` 1))

-- | Read a Word16 in little endian format
getWord16le :: Get r Word16
getWord16le = do
    s <- readN 2 id
    return $! (fromIntegral (s `B.index` 1) `shiftl_w16` 8) .|.
              (fromIntegral (s `B.index` 0) )
{-# INLINE getWord16le #-}

-- | Read a Word32 in big endian format
getWord32be :: Get r Word32
getWord32be = do
    s <- readN 4 id
    return $! (fromIntegral (s `B.index` 0) `shiftl_w32` 24) .|.
              (fromIntegral (s `B.index` 1) `shiftl_w32` 16) .|.
              (fromIntegral (s `B.index` 2) `shiftl_w32`  8) .|.
              (fromIntegral (s `B.index` 3) )
{-# INLINE getWord32be #-}

-- | Read a Word32 in little endian format
getWord32le :: Get r Word32
getWord32le = do
    s <- readN 4 id
    return $! (fromIntegral (s `B.index` 3) `shiftl_w32` 24) .|.
              (fromIntegral (s `B.index` 2) `shiftl_w32` 16) .|.
              (fromIntegral (s `B.index` 1) `shiftl_w32`  8) .|.
              (fromIntegral (s `B.index` 0) )
{-# INLINE getWord32le #-}

-- | Read a Word64 in big endian format
getWord64be :: Get r Word64
getWord64be = do
    s <- readN 8 id
    return $! (fromIntegral (s `B.index` 0) `shiftl_w64` 56) .|.
              (fromIntegral (s `B.index` 1) `shiftl_w64` 48) .|.
              (fromIntegral (s `B.index` 2) `shiftl_w64` 40) .|.
              (fromIntegral (s `B.index` 3) `shiftl_w64` 32) .|.
              (fromIntegral (s `B.index` 4) `shiftl_w64` 24) .|.
              (fromIntegral (s `B.index` 5) `shiftl_w64` 16) .|.
              (fromIntegral (s `B.index` 6) `shiftl_w64`  8) .|.
              (fromIntegral (s `B.index` 7) )
{-# INLINE getWord64be #-}

-- | Read a Word64 in little endian format
getWord64le :: Get r Word64
getWord64le = do
    s <- readN 8 id
    return $! (fromIntegral (s `B.index` 7) `shiftl_w64` 56) .|.
              (fromIntegral (s `B.index` 6) `shiftl_w64` 48) .|.
              (fromIntegral (s `B.index` 5) `shiftl_w64` 40) .|.
              (fromIntegral (s `B.index` 4) `shiftl_w64` 32) .|.
              (fromIntegral (s `B.index` 3) `shiftl_w64` 24) .|.
              (fromIntegral (s `B.index` 2) `shiftl_w64` 16) .|.
              (fromIntegral (s `B.index` 1) `shiftl_w64`  8) .|.
              (fromIntegral (s `B.index` 0) )

------------------------------------------------------------------------
-- Host-endian reads

-- | /O(1)./ Read a single native machine word. The word is read in
-- host order, host endian form, for the machine you're on. On a 64 bit
-- machine the Word is an 8 byte value, on a 32 bit machine, 4 bytes.
getWordhost :: Get r Word
getWordhost = getPtr (sizeOf (undefined :: Word))
{-# INLINE getWordhost #-}

-- | /O(1)./ Read a 2 byte Word16 in native host order and host endianness.
getWord16host :: Get r Word16
getWord16host = getPtr (sizeOf (undefined :: Word16))
{-# INLINE getWord16host #-}

-- | /O(1)./ Read a Word32 in native host order and host endianness.
getWord32host :: Get r Word32
getWord32host = getPtr  (sizeOf (undefined :: Word32))
{-# INLINE getWord32host #-}

-- | /O(1)./ Read a Word64 in native host order and host endianess.
getWord64host   :: Get r Word64
getWord64host = getPtr  (sizeOf (undefined :: Word64))
{-# INLINE getWord64host #-}


{-# INLINE getWord64le #-}
shiftl_w16 :: Word16 -> Int -> Word16
shiftl_w32 :: Word32 -> Int -> Word32
shiftl_w64 :: Word64 -> Int -> Word64

#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
shiftl_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftL#`   i)
shiftl_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftL#`   i)

#if WORD_SIZE_IN_BITS < 64
shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL64#` i)
#else
shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL#` i)
#endif

#else
shiftl_w16 = shiftL
shiftl_w32 = shiftL
shiftl_w64 = shiftL
#endif