{-# LANGUAGE TemplateHaskell #-}

-- |
-- Module      : Streamly.Internal.Data.Serialize.TH.Bottom
-- Copyright   : (c) 2023 Composewell Technologies
-- License     : BSD3-3-Clause
-- Maintainer  : streamly@composewell.com
-- Stability   : experimental
-- Portability : GHC
--
module Streamly.Internal.Data.Serialize.TH.Bottom
    (
    -- ** Config
      SerializeConfig(..)
    , serializeConfig
    , inlineAddSizeTo
    , inlineSerializeAt
    , inlineDeserializeAt
    , encodeConstrNames
    , encodeRecordFields

    -- ** Other Utilities
    , TypeOfType(..)
    , typeOfType
    , SimpleDataCon(..)
    , simplifyDataCon
    , Field
    , mkFieldName
    , isUnitType
    , isRecordSyntax
    , c2w
    , wListToString
    , xorCmp
    , serializeW8List
    , litIntegral
    , litProxy
    , matchConstructor
    , openConstructor
    , makeI
    , makeN
    , makeA
    , int_w8
    , int_w32
    , w32_int
    , w8_int
    , _acc
    , _arr
    , _endOffset
    , _initialOffset
    , _x
    , _tag
    , _val
    , errorUnsupported
    , errorUnimplemented
    ) where

import Data.Maybe (isJust)
import Data.Char (chr, ord)
import Data.List (foldl')
import Data.Word (Word16, Word32, Word64, Word8)
import Data.Bits (Bits, (.|.), shiftL, zeroBits, xor)
import Streamly.Internal.System.IO (unsafeInlineIO)
import Streamly.Internal.Data.Unbox (Unbox)
import Data.Proxy (Proxy)

import Language.Haskell.TH
import Streamly.Internal.Data.Serialize.Type

import qualified Streamly.Internal.Data.Unbox as Unbox

import Streamly.Internal.Data.Unbox.TH (DataCon(..))

--------------------------------------------------------------------------------
-- Config
--------------------------------------------------------------------------------

-- NOTE: 'Nothing' is not eqvivalant to 'Just Inlinable'. Ie. Having no inline
-- specific pragma and having an Inlinable pragma are different. Having an
-- Inlinable pragma makes GHC put the code in the interface file whereas having
-- no inline specific pragma let's GHC decide whether to put the code in
-- interface file or not.

-- | Configuration to control how the 'Serialize' instance is generated. The
-- configuration is opaque and is modified by composing config modifier
-- functions, for example:
--
-- >>> (inlineSerializeAt (Just NoInline)) . (inlineSerializeAt (Just Inlinable))
--
-- The default configuration settings are:
--
-- * 'inlineAddSizeTo' Nothing
-- * 'inlineSerializeAt' (Just Inline)
-- * 'inlineDeserializeAt' (Just Inline)
--
-- The following experimental options are also available:
--
-- * 'encodeConstrNames' False
-- * 'encodeRecordFields' False
--
data SerializeConfig =
    SerializeConfig
        { cfgInlineSize :: Maybe Inline
        , cfgInlineSerialize :: Maybe Inline
        , cfgInlineDeserialize :: Maybe Inline
        , cfgConstructorTagAsString :: Bool
        , cfgRecordSyntaxWithHeader :: Bool
        }

-- | How should we inline the 'addSizeTo' function? The default is 'Nothing'
-- which means left to the compiler. Forcing inline on @addSizeTo@ function
-- actually worsens some benchmarks and improves none.
inlineAddSizeTo :: Maybe Inline -> SerializeConfig -> SerializeConfig
inlineAddSizeTo v cfg = cfg {cfgInlineSize = v}

-- XXX Should we make the default Inlinable instead?

-- | How should we inline the 'serialize' function? The default 'Just Inline'.
-- However, aggressive inlining can bloat the code and increase in compilation
-- times when there are big functions and too many nesting levels so you can
-- change it accordingly. A 'Nothing' value leaves the decision to the
-- compiler.
inlineSerializeAt :: Maybe Inline -> SerializeConfig -> SerializeConfig
inlineSerializeAt v cfg = cfg {cfgInlineSerialize = v}

-- XXX Should we make the default Inlinable instead?

-- | How should we inline the 'deserialize' function? See guidelines in
-- 'inlineSerializeAt'.
inlineDeserializeAt :: Maybe Inline -> SerializeConfig -> SerializeConfig
inlineDeserializeAt v cfg = cfg {cfgInlineDeserialize = v}

-- | __Experimental__
--
-- In sum types, use Latin-1 encoded original constructor names rather than
-- binary values to identify constructors. This option is not applicable to
-- product types.
--
-- This option enables the following behavior:
--
-- * __Reordering__: Order of the fields can be changed without affecting
-- serialization.
-- * __Addition__: If a field is added in the new version, the old version of
-- the data type can still be deserialized by the new version. The new value
-- would never occur in the old one.
-- * __Deletion__: If a field is deleted in the new version, deserialization
-- of the old version will result in an error. TBD: We can possibly designate a
-- catch-all case to handle this scenario.
--
-- Note that if you change a type, change the semantics of a type, or delete a
-- field and add a new field with the same name, deserialization of old data
-- may result in silent unexpected behavior.
--
-- This option has to be the same on both encoding and decoding side.
--
-- The default is 'False'.
--
encodeConstrNames :: Bool -> SerializeConfig -> SerializeConfig
encodeConstrNames v cfg = cfg {cfgConstructorTagAsString = v}

-- XXX We can deserialize each field to Either, so if there is a
-- deserialization error in any field it can handled independently. Also, a
-- unique type/version identifier of the field (based on the versions of the
-- packages, full module name space + type identifier) can be serialized along
-- with the value for stricter compatibility, semantics checking. Or we can
-- store a type hash.

-- | __Experimental__
--
-- In explicit record types, use Latin-1 encoded record field names rather than
-- binary values to identify the record fields. Note that this option is not
-- applicable to sum types. Also, it does not work on a product type which is
-- not a record, because there are no field names to begin with.
--
-- This option enables the following behavior:
--
-- * __Reordering__: Order of the fields can be changed without affecting
-- serialization.
-- * __Addition__: If a 'Maybe' type field is added in the new version, the old
-- version of the data type can still be deserialized by the new version, the
-- field value in the older version is assumed to be 'Nothing'. If any other
-- type of field is added, deserialization of the older version results in an
-- error but only when that field is actually accessed in the deserialized
-- record.
-- * __Deletion__: If a field is deleted in the new version and it is
-- encountered in a previously serialized version then the field is discarded.
--
-- This option has to be the same on both encoding and decoding side.
--
-- There is a constant performance overhead proportional to the total length of
-- the record field names and the number of record fields.
--
-- The default is 'False'.
--
encodeRecordFields :: Bool -> SerializeConfig -> SerializeConfig
encodeRecordFields v cfg = cfg {cfgRecordSyntaxWithHeader = v}

serializeConfig :: SerializeConfig
serializeConfig =
    SerializeConfig
        { cfgInlineSize = Nothing
        , cfgInlineSerialize = Just Inline
        , cfgInlineDeserialize = Just Inline
        , cfgConstructorTagAsString = False
        , cfgRecordSyntaxWithHeader = False
        }

--------------------------------------------------------------------------------
-- Helpers
--------------------------------------------------------------------------------

type Field = (Maybe Name, Type)

_x :: Name
_x = mkName "x"

_acc :: Name
_acc = mkName "acc"

_arr :: Name
_arr = mkName "arr"

_tag :: Name
_tag = mkName "tag"

_initialOffset :: Name
_initialOffset = mkName "initialOffset"

_endOffset :: Name
_endOffset = mkName "endOffset"

_val :: Name
_val = mkName "val"

mkFieldName :: Int -> Name
mkFieldName i = mkName ("field" ++ show i)

makeI :: Int -> Name
makeI i = mkName $ "i" ++ show i

makeN :: Int -> Name
makeN i = mkName $ "n" ++ show i

makeA :: Int -> Name
makeA i = mkName $ "a" ++ show i

--------------------------------------------------------------------------------
-- Domain specific helpers
--------------------------------------------------------------------------------

openConstructor :: Name -> Int -> Q Pat
openConstructor cname numFields =
    conP cname (map varP (map mkFieldName [0 .. (numFields - 1)]))

matchConstructor :: Name -> Int -> Q Exp -> Q Match
matchConstructor cname numFields exp0 =
    match (openConstructor cname numFields) (normalB exp0) []

--------------------------------------------------------------------------------
-- Constructor types
--------------------------------------------------------------------------------

data SimpleDataCon =
    SimpleDataCon Name [Field]
    deriving (Eq)

simplifyDataCon :: DataCon -> SimpleDataCon
simplifyDataCon (DataCon cname _ _ fields) = SimpleDataCon cname fields

data TypeOfType
    = UnitType Name             -- 1 constructor and 1 field
    | TheType SimpleDataCon      -- 1 constructor and 1+ fields
    | MultiType [SimpleDataCon] -- 1+ constructors
    deriving (Eq)

typeOfType :: Type -> [DataCon] -> TypeOfType
typeOfType headTy [] =
    error
        ("Attempting to get size with no constructors (" ++
         (pprint headTy) ++ ")")
typeOfType _ [DataCon cname _ _ []] = UnitType cname
typeOfType _ [con@(DataCon _ _ _ _)] = TheType $ simplifyDataCon con
typeOfType _ cons = MultiType $ map simplifyDataCon cons

isUnitType :: [DataCon] -> Bool
isUnitType [DataCon _ _ _ []] = True
isUnitType _ = False

isRecordSyntax :: SimpleDataCon -> Bool
isRecordSyntax (SimpleDataCon _ fields) = and (isJust . fst <$> fields)

--------------------------------------------------------------------------------
-- Type casting
--------------------------------------------------------------------------------

int_w8 :: Int -> Word8
int_w8 = fromIntegral

int_w32 :: Int -> Word32
int_w32 = fromIntegral

w8_w16 :: Word8 -> Word16
w8_w16 = fromIntegral

w8_w32 :: Word8 -> Word32
w8_w32 = fromIntegral

w8_w64 :: Word8 -> Word64
w8_w64 = fromIntegral

w8_int :: Word8 -> Int
w8_int = fromIntegral

w32_int :: Word32 -> Int
w32_int = fromIntegral

c2w :: Char -> Word8
c2w = fromIntegral . ord

wListToString :: [Word8] -> String
wListToString = fmap (chr . fromIntegral)

--------------------------------------------------------------------------------
-- Bit manipulation
--------------------------------------------------------------------------------

shiftAdd :: Bits a => (b -> a) -> [b] -> a
shiftAdd conv xs =
    foldl' (.|.) zeroBits $
    map (\(j, x) -> shiftL x (j * 8)) $ zip [0 ..] $ map conv xs

-- Note: This only works in little endian machines
-- TODO:
-- Instead of generating this via TH can't we write it directly in Haskell and
-- use that? Creating one comparison function for each deserialization may be
-- too much code and may not be necessary.
-- Benchmark both the implementations and check.
xorCmp :: [Word8] -> Name -> Name -> Q Exp
xorCmp tag off arr =
    case tagLen of
        x | x < 2 -> [|$(go8 0) == zeroBits|]
        x | x < 4 -> [|$(go16 0) == zeroBits|]
        x | x < 8 -> [|$(go32 0) == zeroBits|]
        _ -> [|$(go64 0) == zeroBits|]
  where
    tagLen = length tag
    go8 i | i >= tagLen = [|zeroBits|]
    go8 i = do
        let wIntegral = litIntegral i
        [|xor (unsafeInlineIO
                   (Unbox.peekAt
                        ($(varE off) + $(litIntegral i))
                        $(varE arr)))
              ($(wIntegral) :: Word8) .|.
          $(go8 (i + 1))|]
    go16 i
        | i >= tagLen = [|zeroBits|]
    go16 i
        | tagLen - i < 2 = go16 (tagLen - 2)
    go16 i = do
        let wIntegral =
                litIntegral
                    (shiftAdd w8_w16 [tag !! i, tag !! (i + 1)] :: Word16)
        [|xor (unsafeInlineIO
                   (Unbox.peekAt
                        ($(varE off) + $(litIntegral i))
                        $(varE arr)))
              ($(wIntegral) :: Word16) .|.
          $(go16 (i + 2))|]
    go32 i
        | i >= tagLen = [|zeroBits|]
    go32 i
        | tagLen - i < 4 = go32 (tagLen - 4)
    go32 i = do
        let wIntegral =
                litIntegral
                    (shiftAdd
                         w8_w32
                         [ tag !! i
                         , tag !! (i + 1)
                         , tag !! (i + 2)
                         , tag !! (i + 3)
                         ] :: Word32)
        [|xor (unsafeInlineIO
                   (Unbox.peekAt
                        ($(varE off) + $(litIntegral i))
                        $(varE arr)))
              ($(wIntegral) :: Word32) .|.
          $(go32 (i + 4))|]
    go64 i
        | i >= tagLen = [|zeroBits|]
    go64 i
        | tagLen - i < 8 = go64 (tagLen - 8)
    go64 i = do
        let wIntegral =
                litIntegral
                    (shiftAdd
                         w8_w64
                         [ tag !! i
                         , tag !! (i + 1)
                         , tag !! (i + 2)
                         , tag !! (i + 3)
                         , tag !! (i + 4)
                         , tag !! (i + 5)
                         , tag !! (i + 6)
                         , tag !! (i + 7)
                         ])
        [|xor (unsafeInlineIO
                   (Unbox.peekAt
                        ($(varE off) + $(litIntegral i))
                        $(varE arr)))
              ($(wIntegral) :: Word64) .|.
          $(go64 (i + 8))|]

--------------------------------------------------------------------------------
-- Primitive serialization
--------------------------------------------------------------------------------

-- TODO:
-- Will this be too much of a code bloat?
-- Loop with the loop body unrolled?
-- Serialize this in batches similar to batch comparision in xorCmp?
serializeW8List :: Name -> Name -> [Word8] -> Q Exp
serializeW8List off arr w8List = do
    [|let $(varP (makeN 0)) = $(varE off)
       in $(doE (fmap makeBind [0 .. (lenW8List - 1)] ++
                 [noBindS ([|pure $(varE (makeN lenW8List))|])]))|]

    where

    lenW8List = length w8List
    makeBind i =
        bindS
            (varP (makeN (i + 1)))
            [|$(varE 'serializeAt)
                  $(varE (makeN i))
                  $(varE arr)
                  ($(litIntegral (w8List !! i)) :: Word8)|]

--------------------------------------------------------------------------------
-- TH Helpers
--------------------------------------------------------------------------------

litIntegral :: Integral a => a -> Q Exp
litIntegral = litE . IntegerL . fromIntegral

litProxy :: Unbox a => Proxy a -> Q Exp
litProxy = litE . IntegerL . fromIntegral . Unbox.sizeOf

--------------------------------------------------------------------------------
-- Error codes
--------------------------------------------------------------------------------

errorUnsupported :: String -> a
errorUnsupported err =
    error
        $ unlines
              [ "Unsupported"
              , "==========="
              , "This is improper use of the library."
              , "This case is unsupported."
              , "Please contact the developer if this case is of interest."
              , ""
              , "Message"
              , "-------"
              , err
              ]

errorUnimplemented :: a
errorUnimplemented =
    error
        $ unlines
              [ "Unimplemented"
              , "============="
              , "Please contact the developer if this case is of interest."
              ]