{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE UnboxedTuples #-}

module Data.Bytes.Builder.Unsafe
  ( -- * Types
    Builder (..)
  , BuilderState (..)
  , Commits (..)

    -- * Execution
  , pasteST
  , pasteIO

    -- * Construction
  , fromEffect

    -- * Builder State
  , newBuilderState
  , closeBuilderState

    -- * Finalization
  , reverseCommitsOntoChunks
  , commitsOntoChunks
  , copyReverseCommits
  , addCommitsLength

    -- * Commit Distance
  , commitDistance
  , commitDistance1

    -- * Safe Functions

    -- | These functions are actually completely safe, but they are defined
    -- here because they are used by typeclass instances. Import them from
    -- @Data.Bytes.Builder@ instead.
  , stringUtf8
  , cstring

    -- * Pasting with Preconditions
  , pasteUtf8TextJson#
  ) where

import Control.Monad.Primitive (primitive_)
import Data.Bytes.Chunks (Chunks (ChunksCons))
import Data.Bytes.Types (Bytes (Bytes))
import Data.Char (ord)
import Data.Primitive (ByteArray (..), MutableByteArray (..))
import Data.Word (Word8)
import Foreign.C.String (CString)
import GHC.Base (unpackCString#, unpackCStringUtf8#)
import GHC.Exts (Addr#, ByteArray#, Char (C#), Int (I#), Int#, IsString, MutableByteArray#, Ptr (Ptr), RealWorld, State#, (+#), (-#), (>#), (>=#))
import GHC.IO (stToIO)
import GHC.ST (ST (ST))

import qualified Compat as C
import qualified Data.Bytes.Builder.Bounded as Bounded
import qualified Data.Bytes.Builder.Bounded.Unsafe as UnsafeBounded
import qualified Data.Primitive as PM
import qualified GHC.Exts as Exts
import qualified Op

{- | An unmaterialized sequence of bytes that may be pasted
into a mutable byte array.
-}
newtype Builder
  = Builder
      ( forall s.
        MutableByteArray# s -> -- buffer we are currently writing to
        Int# -> -- offset into the current buffer
        Int# -> -- number of bytes remaining in the current buffer
        Commits s -> -- buffers and immutable byte slices that we have already committed
        State# s ->
        (# State# s, MutableByteArray# s, Int#, Int#, Commits s #) -- all the same things
      )

{- | A list of committed chunks along with the chunk currently being
written to. This is kind of like a non-empty variant of 'Commmits'
but with the additional invariant that the head chunk is a mutable
byte array.
-}
data BuilderState s
  = BuilderState
      (MutableByteArray# s) -- buffer we are currently writing to
      Int# -- offset into the current buffer
      Int# -- number of bytes remaining in the current buffer
      !(Commits s) -- buffers and immutable byte slices that are already committed

-- | Create an empty 'BuilderState' with a buffer of the given size.
newBuilderState :: Int -> ST s (BuilderState s)
{-# INLINE newBuilderState #-}
newBuilderState n@(I# n#) = do
  MutableByteArray buf <- PM.newByteArray n
  pure (BuilderState buf 0# n# Initial)

{- | Push the active chunk onto the top of the commits.
The @BuilderState@ argument must not be reused after being passed
to this function. That is, its use must be affine.
-}
closeBuilderState :: BuilderState s -> Commits s
closeBuilderState (BuilderState dst off _ cmts) = Mutable dst off cmts

{- | Run a builder, performing an in-place update on the state.
The @BuilderState@ argument must not be reused after being passed
to this function. That is, its use must be affine.
-}
pasteST :: Builder -> BuilderState s -> ST s (BuilderState s)
{-# INLINE pasteST #-}
pasteST (Builder f) (BuilderState buf off len cmts) = ST $ \s0 ->
  case f buf off len cmts s0 of
    (# s1, buf1, off1, len1, cmts1 #) ->
      (# s1, BuilderState buf1 off1 len1 cmts1 #)

-- | Variant of 'pasteST' that runs in 'IO'.
pasteIO :: Builder -> BuilderState RealWorld -> IO (BuilderState RealWorld)
{-# INLINE pasteIO #-}
pasteIO b st = stToIO (pasteST b st)

instance IsString Builder where
  {-# INLINE fromString #-}
  fromString = stringUtf8

instance Semigroup Builder where
  {-# INLINE (<>) #-}
  Builder f <> Builder g = Builder $ \buf0 off0 len0 cs0 s0 -> case f buf0 off0 len0 cs0 s0 of
    (# s1, buf1, off1, len1, cs1 #) -> g buf1 off1 len1 cs1 s1

instance Monoid Builder where
  {-# INLINE mempty #-}
  mempty = Builder $ \buf0 off0 len0 cs0 s0 -> (# s0, buf0, off0, len0, cs0 #)

data Commits s
  = Mutable
      -- | Mutable buffer, start index implicitly zero
      (MutableByteArray# s)
      -- | Length (may be smaller than actual length)
      Int#
      !(Commits s)
  | Immutable
      -- | Immutable chunk
      ByteArray#
      -- | Offset into chunk, not necessarily zero
      Int#
      -- | Length (may be smaller than actual length)
      Int#
      !(Commits s)
  | Initial

{- | Add the total number of bytes in the commits to first
argument.
-}
addCommitsLength :: Int -> Commits s -> Int
addCommitsLength !acc Initial = acc
addCommitsLength !acc (Immutable _ _ x cs) = addCommitsLength (acc + I# x) cs
addCommitsLength !acc (Mutable _ x cs) = addCommitsLength (acc + I# x) cs

{- | Cons the chunks from a list of @Commits@ onto an initial
@Chunks@ list (this argument is often @ChunksNil@). This reverses
the order of the chunks, which is desirable since builders assemble
@Commits@ with the chunks backwards. This performs an in-place shrink
and freezes any mutable byte arrays it encounters. Consequently,
these must not be reused.
-}
reverseCommitsOntoChunks :: Chunks -> Commits s -> ST s Chunks
reverseCommitsOntoChunks !xs Initial = pure xs
reverseCommitsOntoChunks !xs (Immutable arr off len cs) =
  reverseCommitsOntoChunks (ChunksCons (Bytes (ByteArray arr) (I# off) (I# len)) xs) cs
reverseCommitsOntoChunks !xs (Mutable buf len cs) = case len of
  -- Skip over empty byte arrays.
  0# -> reverseCommitsOntoChunks xs cs
  _ -> do
    shrinkMutableByteArray (MutableByteArray buf) (I# len)
    arr <- PM.unsafeFreezeByteArray (MutableByteArray buf)
    reverseCommitsOntoChunks (ChunksCons (Bytes arr 0 (I# len)) xs) cs

{- | Variant of 'reverseCommitsOntoChunks' that does not reverse
the order of the commits. Since commits are built backwards by
consing, this means that the chunks appended to the front will
be backwards. Within each chunk, however, the bytes will be in
the correct order.

Unlike 'reverseCommitsOntoChunks', this function is not tail
recursive.
-}
commitsOntoChunks :: Chunks -> Commits s -> ST s Chunks
commitsOntoChunks !xs0 cs0 = go cs0
 where
  go Initial = pure xs0
  go (Immutable arr off len cs) = do
    xs <- go cs
    pure $! ChunksCons (Bytes (ByteArray arr) (I# off) (I# len)) xs
  go (Mutable buf len cs) = case len of
    -- Skip over empty byte arrays.
    0# -> go cs
    _ -> do
      shrinkMutableByteArray (MutableByteArray buf) (I# len)
      arr <- PM.unsafeFreezeByteArray (MutableByteArray buf)
      xs <- go cs
      pure $! ChunksCons (Bytes arr 0 (I# len)) xs

{- | Copy the contents of the chunks into a mutable array, reversing
the order of the chunks.
Precondition: The destination must have enough space to house the
contents. This is not checked.
-}
copyReverseCommits ::
  -- | Destination
  MutableByteArray s ->
  -- | Destination range successor
  Int ->
  -- | Source
  Commits s ->
  ST s Int
{-# INLINE copyReverseCommits #-}
copyReverseCommits (MutableByteArray dst) (I# off) cs =
  ST
    ( \s0 -> case copyReverseCommits# dst off cs s0 of
        (# s1, nextOff #) -> (# s1, I# nextOff #)
    )

copyReverseCommits# ::
  MutableByteArray# s ->
  Int# ->
  Commits s ->
  State# s ->
  (# State# s, Int# #)
copyReverseCommits# _ off Initial s0 = (# s0, off #)
copyReverseCommits# marr prevOff (Mutable arr sz cs) s0 =
  let !off = prevOff -# sz
   in case Op.copyMutableByteArray# arr 0# marr off sz s0 of
        s1 -> copyReverseCommits# marr off cs s1
copyReverseCommits# marr prevOff (Immutable arr soff sz cs) s0 =
  let !off = prevOff -# sz
   in case Op.copyByteArray# arr soff marr off sz s0 of
        s1 -> copyReverseCommits# marr off cs s1

{- | Create a builder from a cons-list of 'Char'. These
must be UTF-8 encoded.
-}
stringUtf8 :: String -> Builder
{-# INLINE stringUtf8 #-}
stringUtf8 cs = Builder (goString cs)

{- | Create a builder from a @NUL@-terminated 'CString'. This ignores any
textual encoding, copying bytes until @NUL@ is reached.
-}
cstring :: CString -> Builder
{-# INLINE cstring #-}
cstring (Ptr cs) = Builder (goCString cs)

goString ::
  String ->
  MutableByteArray# s ->
  Int# ->
  Int# ->
  Commits s ->
  State# s ->
  (# State# s, MutableByteArray# s, Int#, Int#, Commits s #)
{-# NOINLINE goString #-}
goString [] buf0 off0 len0 cs0 s0 = (# s0, buf0, off0, len0, cs0 #)
goString (c : cs) buf0 off0 len0 cs0 s0 = case len0 ># 3# of
  1# -> case unST (UnsafeBounded.pasteST (Bounded.char c) (MutableByteArray buf0) (I# off0)) s0 of
    (# s1, I# off1 #) -> goString cs buf0 off1 (len0 -# (off1 -# off0)) cs0 s1
  _ -> case Exts.newByteArray# 4080# s0 of
    (# s1, buf1 #) -> case unST (UnsafeBounded.pasteST (Bounded.char c) (MutableByteArray buf1) 0) s1 of
      (# s2, I# off1 #) -> goString cs buf1 off1 (4080# -# off1) (Mutable buf0 off0 cs0) s2

-- We have to have a rule for both unpackCString# and unpackCStringUtf8#
-- since GHC uses a different function based on whether or not non-ASCII
-- codepoints are used in the string.
-- TODO: The UTF-8 variant of this rule is unsound because GHC actually
-- used Modified UTF-8.
{-# RULES
"Builder stringUtf8/cstring" forall s a b c d e.
  goString (unpackCString# s) a b c d e =
    goCString s a b c d e
"Builder stringUtf8/cstring-utf8" forall s a b c d e.
  goString (unpackCStringUtf8# s) a b c d e =
    goCString s a b c d e
  #-}

goCString ::
  Addr# ->
  MutableByteArray# s ->
  Int# ->
  Int# ->
  Commits s ->
  State# s ->
  (# State# s, MutableByteArray# s, Int#, Int#, Commits s #)
goCString addr buf0 off0 len0 cs0 s0 = case C.word8ToWord# (Exts.indexWord8OffAddr# addr 0#) of
  0## -> (# s0, buf0, off0, len0, cs0 #)
  w -> case len0 of
    0# -> case Exts.newByteArray# 4080# s0 of
      (# s1, buf1 #) -> case Exts.writeWord8Array# buf1 0# (C.wordToWord8# w) s1 of
        s2 ->
          goCString
            (Exts.plusAddr# addr 1#)
            buf1
            1#
            (4080# -# 1#)
            (Mutable buf0 off0 cs0)
            s2
    _ -> case Exts.writeWord8Array# buf0 off0 (C.wordToWord8# w) s0 of
      s1 -> goCString (Exts.plusAddr# addr 1#) buf0 (off0 +# 1#) (len0 -# 1#) cs0 s1

fromEffect ::
  -- | Maximum number of bytes the paste function needs
  Int ->
  -- | Paste function. Takes a byte array and an offset and returns
  -- the new offset and having pasted into the buffer.
  (forall s. MutableByteArray s -> Int -> ST s Int) ->
  Builder
{-# INLINE fromEffect #-}
fromEffect (I# req) f = Builder $ \buf0 off0 len0 cs0 s0 ->
  let !(# s1, buf1, off1, len1, cs1 #) = case len0 >=# req of
        1# -> (# s0, buf0, off0, len0, cs0 #)
        _ ->
          let !(I# lenX) = max 4080 (I# req)
           in case Exts.newByteArray# lenX s0 of
                (# sX, bufX #) ->
                  (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #)
   in case unST (f (MutableByteArray buf1) (I# off1)) s1 of
        (# s2, I# off2 #) -> (# s2, buf1, off2, len1 -# (off2 -# off1), cs1 #)

unST :: ST s a -> State# s -> (# State# s, a #)
unST (ST f) = f

shrinkMutableByteArray :: MutableByteArray s -> Int -> ST s ()
shrinkMutableByteArray (MutableByteArray arr) (I# sz) =
  primitive_ (Exts.shrinkMutableByteArray# arr sz)

{- | Variant of commitDistance where you get to supply a
head of the commit list that has not yet been committed.
-}
commitDistance1 ::
  MutableByteArray# s -> -- target
  Int# -> -- offset into target
  MutableByteArray# s -> -- head of array
  Int# -> -- offset into head of array
  Commits s ->
  Int#
commitDistance1 target offTarget buf0 offBuf cs =
  case Exts.sameMutableByteArray# target buf0 of
    1# -> offBuf -# offTarget
    _ -> commitDistance target offBuf cs -# offTarget

{- | Compute the number of bytes between the last byte and the offset
specified in a chunk. Precondition: the chunk must exist in the
list of committed chunks. This relies on mutable byte arrays having
identity (e.g. it uses @sameMutableByteArray#@).
-}
commitDistance :: MutableByteArray# s -> Int# -> Commits s -> Int#
commitDistance !_ !_ Initial = errorWithoutStackTrace "chunkDistance: chunk not found"
commitDistance target !n (Immutable _ _ len cs) =
  commitDistance target (n +# len) cs
commitDistance target !n (Mutable buf len cs) =
  case Exts.sameMutableByteArray# target buf of
    1# -> n +# len
    _ -> commitDistance target (n +# len) cs

{- | Encode (UTF-8 encoded) text as a JSON string, wrapping it in double quotes.
This escapes all characters with code points below @0x20@.

* Precondition: The slice of the byte argument is UTF-8 encoded text.
* Precondition: There is enough space in the buffer for the result
  to be written to. A simple way to ensure enough space is to allocate
  @6N + 2@ bytes, where N is the length of the argument. However, the
  caller may use clever heuristics to find a lower upper bound.
* Result: The next offset in the destination buffer
-}
pasteUtf8TextJson# ::
  -- | source
  ByteArray# ->
  -- | source offset
  Int# ->
  -- | source length
  Int# ->
  -- | destination buffer
  MutableByteArray# s ->
  -- | offset into destination buffer
  Int# ->
  -- | state token
  State# s ->
  (# State# s, Int# #) -- returns next destination offset
{-# NOINLINE pasteUtf8TextJson# #-}
pasteUtf8TextJson# src# soff0# slen0# dst# doff0# s0# =
  let ST f = do
        let dst = MutableByteArray dst#
        let doff0 = I# doff0#
        PM.writeByteArray dst doff0 (c2w '"')
        let go !soff !slen !doff =
              if slen > 0
                then case indexChar8Array (ByteArray src#) soff of
                  '\\' -> write2 dst doff '\\' '\\' *> go (soff + 1) (slen - 1) (doff + 2)
                  '\"' -> write2 dst doff '\\' '\"' *> go (soff + 1) (slen - 1) (doff + 2)
                  c ->
                    if c >= '\x20'
                      then PM.writeByteArray dst doff (c2w c) *> go (soff + 1) (slen - 1) (doff + 1)
                      else case c of
                        '\n' -> write2 dst doff '\\' 'n' *> go (soff + 1) (slen - 1) (doff + 2)
                        '\r' -> write2 dst doff '\\' 'r' *> go (soff + 1) (slen - 1) (doff + 2)
                        '\t' -> write2 dst doff '\\' 't' *> go (soff + 1) (slen - 1) (doff + 2)
                        '\b' -> write2 dst doff '\\' 'b' *> go (soff + 1) (slen - 1) (doff + 2)
                        '\f' -> write2 dst doff '\\' 'f' *> go (soff + 1) (slen - 1) (doff + 2)
                        _ -> do
                          write2 dst doff '\\' 'u'
                          doff' <-
                            UnsafeBounded.pasteST
                              (Bounded.word16PaddedUpperHex (fromIntegral (c2w c)))
                              dst
                              (doff + 2)
                          go (soff + 1) (slen - 1) doff'
                else pure doff
        doffRes <- go (I# soff0#) (I# slen0#) (doff0 + 1)
        PM.writeByteArray dst doffRes (c2w '"')
        pure (doffRes + 1)
      !(# !s1, I# dstFinal #) = f s0#
   in (# s1, dstFinal #)

c2w :: Char -> Word8
{-# INLINE c2w #-}
c2w = fromIntegral . ord

-- Internal. Write two characters in the ASCII plane to a byte array.
write2 :: MutableByteArray s -> Int -> Char -> Char -> ST s ()
write2 marr ix a b = do
  PM.writeByteArray marr ix (c2w a)
  PM.writeByteArray marr (ix + 1) (c2w b)

indexChar8Array :: ByteArray -> Int -> Char
{-# INLINE indexChar8Array #-}
indexChar8Array (ByteArray b) (I# i) = C# (Exts.indexCharArray# b i)