-- |
-- Module      : Streamly.Internal.Data.Serialize.Type
-- Copyright   : (c) 2023 Composewell Technologies
-- License     : BSD3-3-Clause
-- Maintainer  : streamly@composewell.com
-- Portability : GHC
--

module Streamly.Internal.Data.Serialize.Type
    (
      Serialize(..)
    ) where

--------------------------------------------------------------------------------
-- Imports
--------------------------------------------------------------------------------

import Control.Monad (when)
import Data.List (foldl')
import Data.Proxy (Proxy (..))
import Streamly.Internal.Data.Unbox (Unbox)
import Streamly.Internal.Data.MutByteArray.Type (MutByteArray(..))
import Streamly.Internal.Data.Array.Type (Array(..))
import GHC.Int (Int16(..), Int32(..), Int64(..), Int8(..))
import GHC.Word (Word16(..), Word32(..), Word64(..), Word8(..))
import GHC.Stable (StablePtr(..))

import qualified Streamly.Internal.Data.MutByteArray.Type as MBA
import qualified Streamly.Internal.Data.Unbox as Unbox
import qualified Streamly.Internal.Data.Array.Type as Array
import qualified Streamly.Internal.Data.MutArray as MutArray

import GHC.Exts

--------------------------------------------------------------------------------
-- Developer Note
--------------------------------------------------------------------------------

-- IMPORTANT
-- =========
--
-- Don't ever serialize the absolute offsets in the encoding. Serialize length
-- instead. Absolute offsets are NOT stable.
--
-- They will only work if the start offset of the Array when encoding and
-- decoding is the same. This is almost never the case.

--------------------------------------------------------------------------------
-- Types
--------------------------------------------------------------------------------

-- | The 'Serialize' type class provides operations for serialization and
-- deserialization of general Haskell data types to and from their byte stream
-- representation.
--
-- Unlike 'Unbox', 'Serialize' uses variable length encoding, therefore, it can
-- serialize recursive and variable length data types like lists, or variable
-- length sum types where the length of the value may vary depending on a
-- particular constructor. For variable length data types the length is encoded
-- along with the data.
--
-- The 'deserializeAt' operation reads bytes from the mutable byte array and
-- builds a Haskell data type from these bytes, the number of bytes it reads
-- depends on the type and the encoded value it is reading. 'serializeAt'
-- operation converts a Haskell data type to its binary representation which
-- must consist of as many bytes as added by the @addSizeTo@ operation for that
-- value and then stores these bytes into the mutable byte array. The
-- programmer is expected to use the @addSizeTo@ operation and allocate an
-- array of sufficient length before calling 'serializeAt'.
--
-- IMPORTANT: The serialized data's byte ordering remains the same as the host
-- machine's byte order. Therefore, it can not be deserialized from host
-- machines with a different byte ordering.
--
-- Instances can be derived via Template Haskell, or written manually.
--
-- Here is an example, for deriving an instance of this type class using
-- template Haskell:
--
-- >>> :{
-- data Object = Object
--     { _obj1 :: [Int]
--     , _obj2 :: Int
--     }
-- :}
--
-- @
-- import Streamly.Data.MutByteArray (deriveSerialize)
-- \$(deriveSerialize [d|instance Serialize Object|])
-- @
--
-- See 'Streamly.Data.MutByteArray.deriveSerialize' and
-- 'Streamly.Data.MutByteArray.deriveSerializeWith' for more information on
-- deriving using Template Haskell.
--
-- Here is an example of a manual instance.
--
-- >>> import Streamly.Data.MutByteArray (Serialize(..))
--
-- >>> :{
-- instance Serialize Object where
--     addSizeTo acc obj = addSizeTo (addSizeTo acc (_obj1 obj)) (_obj2 obj)
--     deserializeAt i arr len = do
--          -- Check the array bounds before reading
--         (i1, x0) <- deserializeAt i arr len
--         (i2, x1) <- deserializeAt i1 arr len
--         pure (i2, Object x0 x1)
--     serializeAt i arr (Object x0 x1) = do
--         i1 <- serializeAt i arr x0
--         i2 <- serializeAt i1 arr x1
--         pure i2
-- :}
--
class Serialize a where
    -- XXX Use (a -> Sum Int) instead, remove the Size type

    -- A left fold step to fold a generic structure to its serializable size.
    -- It is of the form @Int -> a -> Int@ because you can have tail-recursive
    -- traversal of the structures.

    -- | @addSizeTo accum value@ returns @accum@ incremented by the size of the
    -- serialized representation of @value@ in bytes. Size cannot be zero. It
    -- should be at least 1 byte.
    addSizeTo :: Int -> a -> Int

    -- We can implement the following functions without returning the `Int`
    -- offset but that may require traversing the Haskell structure again to get
    -- the size. Therefore, this is a performance optimization.

    -- | @deserializeAt byte-offset array arrayLen@ deserializes a value from
    -- the given byte-offset in the array. Returns a tuple consisting of the
    -- next byte-offset and the deserialized value.
    --
    -- The arrayLen passed is the entire length of the input buffer. It is to
    -- be used to check if we would overflow the input buffer when
    -- deserializing.
    --
    -- Throws an exception if the operation would exceed the supplied arrayLen.
    deserializeAt :: Int -> MutByteArray -> Int -> IO (Int, a)

    -- | @serializeAt byte-offset array value@ writes the serialized
    -- representation of the @value@ in the array at the given byte-offset.
    -- Returns the next byte-offset.
    --
    -- This is an unsafe operation, the programmer must ensure that the array
    -- has enough space available to serialize the value as determined by the
    -- @addSizeTo@ operation.
    serializeAt :: Int -> MutByteArray -> a -> IO Int

--------------------------------------------------------------------------------
-- Instances
--------------------------------------------------------------------------------

-- _size is the length from array start to the last accessed byte.
#ifdef DEBUG
{-# INLINE checkBounds #-}
checkBounds :: String -> Int -> MutByteArray -> IO ()
checkBounds _label _size _arr = do
    sz <- MBA.sizeOfMutableByteArray _arr
    if (_size > sz)
    then error
        $ _label
            ++ ": accessing array at offset = "
            ++ show (_size - 1)
            ++ " max valid offset = " ++ show (sz - 1)
    else return ()
#endif

-- Note: Instead of passing around the size parameter, we can use
-- (sizeOfMutableByteArray arr) for checking the array bound, but that turns
-- out to be more expensive.
--
-- Another way to optimize this is to avoid the check for fixed size
-- structures. For fixed size structures we can do a check at the top level and
-- then use checkless deserialization using the Unbox type class. That will
-- require ConstSize and VarSize constructors in size. The programmer can
-- bundle all const size fields in a newtype to make serialization faster. This
-- can speed up the computation of size when serializing and checking size when
-- deserialing.
--
-- For variable size non-recursive structures a separate size validation method
-- could be used to validate the size before deserializing. "validate" can also
-- be used to collpase multiple chunks of arrays coming from network into a
-- single array for deserializing. But that can also be done by framing the
-- serialized value with a size header.
--
{-# INLINE deserializeChecked #-}
deserializeChecked :: forall a. Unbox a => Int -> MutByteArray -> Int -> IO (Int, a)
deserializeChecked :: forall a. Unbox a => Int -> MutByteArray -> Int -> IO (Int, a)
deserializeChecked Int
off MutByteArray
arr Int
sz =
    let next :: Int
next = Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Proxy a -> Int
forall a. Unbox a => Proxy a -> Int
Unbox.sizeOf (Proxy a
forall {k} (t :: k). Proxy t
Proxy :: Proxy a)
     in do
        -- Keep likely path in the straight branch.
        if (Int
next Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
sz)
        then Int -> MutByteArray -> IO a
forall a. Unbox a => Int -> MutByteArray -> IO a
Unbox.peekAt Int
off MutByteArray
arr IO a -> (a -> IO (Int, a)) -> IO (Int, a)
forall a b. IO a -> (a -> IO b) -> IO b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \a
val -> (Int, a) -> IO (Int, a)
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
next, a
val)
        else [Char] -> IO (Int, a)
forall a. HasCallStack => [Char] -> a
error
            ([Char] -> IO (Int, a)) -> [Char] -> IO (Int, a)
forall a b. (a -> b) -> a -> b
$ [Char]
"deserializeAt: accessing array at offset = "
                [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show (Int
next Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" max valid offset = " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show (Int
sz Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)

{-# INLINE serializeUnsafe #-}
serializeUnsafe :: forall a. Unbox a => Int -> MutByteArray -> a -> IO Int
serializeUnsafe :: forall a. Unbox a => Int -> MutByteArray -> a -> IO Int
serializeUnsafe Int
off MutByteArray
arr a
val =
    let next :: Int
next = Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Proxy a -> Int
forall a. Unbox a => Proxy a -> Int
Unbox.sizeOf (Proxy a
forall {k} (t :: k). Proxy t
Proxy :: Proxy a)
     in do
#ifdef DEBUG
        checkBounds "serializeAt" next arr
#endif
        Int -> MutByteArray -> a -> IO ()
forall a. Unbox a => Int -> MutByteArray -> a -> IO ()
Unbox.pokeAt Int
off MutByteArray
arr a
val
        Int -> IO Int
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
next

#define DERIVE_SERIALIZE_FROM_UNBOX(_type) \
instance Serialize _type where \
; {-# INLINE addSizeTo #-} \
;    addSizeTo acc _ = acc +  Unbox.sizeOf (Proxy :: Proxy _type) \
; {-# INLINE deserializeAt #-} \
;    deserializeAt off arr end = deserializeChecked off arr end :: IO (Int, _type) \
; {-# INLINE serializeAt #-} \
;    serializeAt =  \
        serializeUnsafe :: Int -> MutByteArray -> _type -> IO Int

DERIVE_SERIALIZE_FROM_UNBOX(())
DERIVE_SERIALIZE_FROM_UNBOX(Bool)
DERIVE_SERIALIZE_FROM_UNBOX(Char)
DERIVE_SERIALIZE_FROM_UNBOX(Int8)
DERIVE_SERIALIZE_FROM_UNBOX(Int16)
DERIVE_SERIALIZE_FROM_UNBOX(Int32)
DERIVE_SERIALIZE_FROM_UNBOX(Int)
DERIVE_SERIALIZE_FROM_UNBOX(Int64)
DERIVE_SERIALIZE_FROM_UNBOX(Word)
DERIVE_SERIALIZE_FROM_UNBOX(Word8)
DERIVE_SERIALIZE_FROM_UNBOX(Word16)
DERIVE_SERIALIZE_FROM_UNBOX(Word32)
DERIVE_SERIALIZE_FROM_UNBOX(Word64)
DERIVE_SERIALIZE_FROM_UNBOX(Double)
DERIVE_SERIALIZE_FROM_UNBOX(Float)
DERIVE_SERIALIZE_FROM_UNBOX((StablePtr a))
DERIVE_SERIALIZE_FROM_UNBOX((Ptr a))
DERIVE_SERIALIZE_FROM_UNBOX((FunPtr a))

instance forall a. Serialize a => Serialize [a] where

    -- {-# INLINE addSizeTo #-}
    addSizeTo :: Int -> [a] -> Int
addSizeTo Int
acc [a]
xs =
        (Int -> a -> Int) -> Int -> [a] -> Int
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' Int -> a -> Int
forall a. Serialize a => Int -> a -> Int
addSizeTo (Int
acc Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Proxy Int -> Int
forall a. Unbox a => Proxy a -> Int
Unbox.sizeOf (Proxy Int
forall {k} (t :: k). Proxy t
Proxy :: Proxy Int))) [a]
xs

    -- Inlining this causes large compilation times for tests
    {-# INLINABLE deserializeAt #-}
    deserializeAt :: Int -> MutByteArray -> Int -> IO (Int, [a])
deserializeAt Int
off MutByteArray
arr Int
sz = do
        (Int
off1, Int64
len64) <- Int -> MutByteArray -> Int -> IO (Int, Int64)
forall a. Serialize a => Int -> MutByteArray -> Int -> IO (Int, a)
deserializeAt Int
off MutByteArray
arr Int
sz :: IO (Int, Int64)
        let len :: Int
len = (Int64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral :: Int64 -> Int) Int64
len64
            peekList :: ([a] -> b) -> Int -> t -> IO (Int, b)
peekList [a] -> b
f Int
o t
i | t
i t -> t -> Bool
forall a. Ord a => a -> a -> Bool
>= t
3 = do
              -- Unfold the loop three times
              (Int
o1, a
x1) <- Int -> MutByteArray -> Int -> IO (Int, a)
forall a. Serialize a => Int -> MutByteArray -> Int -> IO (Int, a)
deserializeAt Int
o MutByteArray
arr Int
sz
              (Int
o2, a
x2) <- Int -> MutByteArray -> Int -> IO (Int, a)
forall a. Serialize a => Int -> MutByteArray -> Int -> IO (Int, a)
deserializeAt Int
o1 MutByteArray
arr Int
sz
              (Int
o3, a
x3) <- Int -> MutByteArray -> Int -> IO (Int, a)
forall a. Serialize a => Int -> MutByteArray -> Int -> IO (Int, a)
deserializeAt Int
o2 MutByteArray
arr Int
sz
              ([a] -> b) -> Int -> t -> IO (Int, b)
peekList ([a] -> b
f ([a] -> b) -> ([a] -> [a]) -> [a] -> b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (\[a]
xs -> a
x1a -> [a] -> [a]
forall a. a -> [a] -> [a]
:a
x2a -> [a] -> [a]
forall a. a -> [a] -> [a]
:a
x3a -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
xs)) Int
o3 (t
i t -> t -> t
forall a. Num a => a -> a -> a
- t
3)
            peekList [a] -> b
f Int
o t
0 = (Int, b) -> IO (Int, b)
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
o, [a] -> b
f [])
            peekList [a] -> b
f Int
o t
i = do
              (Int
o1, a
x) <- Int -> MutByteArray -> Int -> IO (Int, a)
forall a. Serialize a => Int -> MutByteArray -> Int -> IO (Int, a)
deserializeAt Int
o MutByteArray
arr Int
sz
              ([a] -> b) -> Int -> t -> IO (Int, b)
peekList ([a] -> b
f ([a] -> b) -> ([a] -> [a]) -> [a] -> b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a
xa -> [a] -> [a]
forall a. a -> [a] -> [a]
:)) Int
o1 (t
i t -> t -> t
forall a. Num a => a -> a -> a
- t
1)
        ([a] -> [a]) -> Int -> Int -> IO (Int, [a])
forall {t} {a} {b}.
(Ord t, Num t, Serialize a) =>
([a] -> b) -> Int -> t -> IO (Int, b)
peekList [a] -> [a]
forall a. a -> a
id Int
off1 Int
len

    -- Inlining this causes large compilation times for tests
    {-# INLINABLE serializeAt #-}
    serializeAt :: Int -> MutByteArray -> [a] -> IO Int
serializeAt Int
off MutByteArray
arr [a]
val = do
        let off1 :: Int
off1 = Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Proxy Int64 -> Int
forall a. Unbox a => Proxy a -> Int
Unbox.sizeOf (Proxy Int64
forall {k} (t :: k). Proxy t
Proxy :: Proxy Int64)
        let pokeList :: Int64 -> Int -> [a] -> IO Int
pokeList Int64
acc Int
o [] =
              Int -> MutByteArray -> Int64 -> IO ()
forall a. Unbox a => Int -> MutByteArray -> a -> IO ()
Unbox.pokeAt Int
off MutByteArray
arr (Int64
acc :: Int64) IO () -> IO Int -> IO Int
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int -> IO Int
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
o
            pokeList Int64
acc Int
o (a
x:[a]
xs) = do
              Int
o1 <- Int -> MutByteArray -> a -> IO Int
forall a. Serialize a => Int -> MutByteArray -> a -> IO Int
serializeAt Int
o MutByteArray
arr a
x
              Int64 -> Int -> [a] -> IO Int
pokeList (Int64
acc Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+ Int64
1) Int
o1 [a]
xs
        Int64 -> Int -> [a] -> IO Int
forall {a}. Serialize a => Int64 -> Int -> [a] -> IO Int
pokeList Int64
0 Int
off1 [a]
val

instance
#ifdef DEVBUILD
    Unbox a =>
#endif
  Serialize (Array a) where
    {-# INLINE addSizeTo #-}
    addSizeTo :: Int -> Array a -> Int
addSizeTo Int
i (Array {Int
MutByteArray
arrContents :: MutByteArray
arrStart :: Int
arrEnd :: Int
arrContents :: forall a. Array a -> MutByteArray
arrStart :: forall a. Array a -> Int
arrEnd :: forall a. Array a -> Int
..}) = Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
arrEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
arrStart) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
8

    {-# INLINE deserializeAt #-}
    deserializeAt :: Int -> MutByteArray -> Int -> IO (Int, Array a)
deserializeAt Int
off MutByteArray
arr Int
len = do
        (Int
off1, Int
byteLen) <- Int -> MutByteArray -> Int -> IO (Int, Int)
forall a. Serialize a => Int -> MutByteArray -> Int -> IO (Int, a)
deserializeAt Int
off MutByteArray
arr Int
len :: IO (Int, Int)
        let off2 :: Int
off2 = Int
off1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
byteLen
        Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
off2 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
len) (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$
            [Char] -> IO ()
forall a. HasCallStack => [Char] -> a
error
                ([Char] -> IO ()) -> [Char] -> IO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"deserializeAt: accessing array at offset = "
                    [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show (Int
off2 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                    [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" max valid offset = " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show (Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
        -- XXX Use MutByteArray.cloneSliceUnsafe
        let slice :: MutArray a
slice = MutByteArray -> Int -> Int -> Int -> MutArray a
forall a. MutByteArray -> Int -> Int -> Int -> MutArray a
MutArray.MutArray MutByteArray
arr Int
off1 Int
off2 Int
off2
        MutArray a
newArr <- MutArray a -> IO (MutArray a)
forall (m :: * -> *) a. MonadIO m => MutArray a -> m (MutArray a)
MutArray.clone MutArray a
forall {a}. MutArray a
slice
        (Int, Array a) -> IO (Int, Array a)
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
off2, MutArray a -> Array a
forall a. MutArray a -> Array a
Array.unsafeFreeze MutArray a
newArr)

    {-# INLINE serializeAt #-}
    serializeAt :: Int -> MutByteArray -> Array a -> IO Int
serializeAt Int
off MutByteArray
arr (Array {Int
MutByteArray
arrContents :: forall a. Array a -> MutByteArray
arrStart :: forall a. Array a -> Int
arrEnd :: forall a. Array a -> Int
arrContents :: MutByteArray
arrStart :: Int
arrEnd :: Int
..}) = do
        let arrLen :: Int
arrLen = Int
arrEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
arrStart
        Int
off1 <- Int -> MutByteArray -> Int -> IO Int
forall a. Serialize a => Int -> MutByteArray -> a -> IO Int
serializeAt Int
off MutByteArray
arr Int
arrLen
        MutByteArray -> Int -> MutByteArray -> Int -> Int -> IO ()
forall (m :: * -> *).
MonadIO m =>
MutByteArray -> Int -> MutByteArray -> Int -> Int -> m ()
MBA.putSliceUnsafe MutByteArray
arrContents Int
arrStart MutByteArray
arr Int
off1 Int
arrLen
        Int -> IO Int
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
off1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
arrLen)

instance (Serialize a, Serialize b) => Serialize (a, b) where

    {-# INLINE addSizeTo #-}
    addSizeTo :: Int -> (a, b) -> Int
addSizeTo Int
acc (a
a, b
b) = Int -> b -> Int
forall a. Serialize a => Int -> a -> Int
addSizeTo (Int -> a -> Int
forall a. Serialize a => Int -> a -> Int
addSizeTo Int
acc a
a) b
b

    {-# INLINE serializeAt #-}
    serializeAt :: Int -> MutByteArray -> (a, b) -> IO Int
serializeAt Int
off MutByteArray
arr (a
a, b
b) = do
        Int
off1 <- Int -> MutByteArray -> a -> IO Int
forall a. Serialize a => Int -> MutByteArray -> a -> IO Int
serializeAt Int
off MutByteArray
arr a
a
        Int -> MutByteArray -> b -> IO Int
forall a. Serialize a => Int -> MutByteArray -> a -> IO Int
serializeAt Int
off1 MutByteArray
arr b
b

    {-# INLINE deserializeAt #-}
    deserializeAt :: Int -> MutByteArray -> Int -> IO (Int, (a, b))
deserializeAt Int
off MutByteArray
arr Int
end = do
        (Int
off1, a
a) <- Int -> MutByteArray -> Int -> IO (Int, a)
forall a. Serialize a => Int -> MutByteArray -> Int -> IO (Int, a)
deserializeAt Int
off MutByteArray
arr Int
end
        (Int
off2, b
b) <- Int -> MutByteArray -> Int -> IO (Int, b)
forall a. Serialize a => Int -> MutByteArray -> Int -> IO (Int, a)
deserializeAt Int
off1 MutByteArray
arr Int
end
        (Int, (a, b)) -> IO (Int, (a, b))
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
off2, (a
a, b
b))