{-# LANGUAGE UnboxedTuples #-}

{- | Low-level bytestring builder using continuation parsing.

bytezap's builder is highly performant. However, one thing it can't do is
/fail/. We have no way to flag an error. If you force it, you will either

* write an initial assert, followed by an unsafe builder that relies on it, or
* build a builder as we assert, then execute it once we're ready

The former is inefficient in situations where the check scales similarly with
the build (e.g. both must iterate over the input). And the latter is very silly
since your builder will be allocating all over.

A naive failable builder might use @'Either' e 'Int'@ to flag errors. After
executing, we check the result: if 'Right', we resize to the given actual
length; if 'Left', we discard the buffer with the given error. This is fine...
but it's an extra allocation, and limits us to 'Either'. A shame.

Instead, we design a builder that takes a finalizer continuation @'Int#' ->
'ByteString'@, which is passed the final offset. The builder calls this as it
finishes, wrapping it as needed (or leaving as 'ByteString' for a non-failable
builder). The runner is expected to pass a continuation to perform any buffer
reallocation necessary (if the actual length was less than the max length), and
return a 'ByteString', possibly wrapped in e.g. 'Right'.

This is much harder to use than the regular builder, and they can't be combined
(the regular builder permits sequencing, which this can't support). But it fills
a gap!

Unlike the regular builder we stick with 'IO', because the continuations get
weird otherwise.
-}

module Bytezap.PokeCPS where

import GHC.Exts ( Int#, Addr#, Ptr )

import Data.Text.Internal ( Text(Text) )
import Data.Text.Array qualified as Text
import Data.Word ( Word8 )
import Data.ByteString ( ByteString )
import Data.Primitive.ByteArray ( ByteArray(ByteArray), indexByteArray )
import GHC.Storable ( writeWord8OffPtr )
import Tmp.BSExt qualified as B

type PokeCPS# r = Addr# -> Int# -> (Int# -> IO ByteString) -> IO r

-- | 'PokeCPS#' newtype wrapper.
--
-- Does not permit a 'Semigroup' instance because pokes do not return offset
-- information.
newtype PokeCPS r = PokeCPS { forall r. PokeCPS r -> PokeCPS# r
unPokeCPS:: PokeCPS# r }

emptyPokeCPS :: PokeCPS ByteString
emptyPokeCPS :: PokeCPS ByteString
emptyPokeCPS = PokeCPS# ByteString -> PokeCPS ByteString
forall r. PokeCPS# r -> PokeCPS r
PokeCPS (PokeCPS# ByteString -> PokeCPS ByteString)
-> PokeCPS# ByteString -> PokeCPS ByteString
forall a b. (a -> b) -> a -> b
$ \Addr#
_base# Int#
os# Int# -> IO ByteString
finalize -> Int# -> IO ByteString
finalize Int#
os#

{- Any unexplained stuff is probably parsing parts of hex bytestrings
Like @xx AA 1a2F@.
-}

{- 2024-08-27 raehik
The best algorithm would probably operate on words (let's assume 8 bytes).
It would involve a shitload of inlining and praying that GHC figures out how to
turn it all into efficient JMPs.
-}

{-
Parse hex bytestring.

Expects that the output buffer can fit the maximum length of the input.

This is a bit overly parametric in the hopes of using it with manual buffering
(e.g. allocate a single buffer and reuse it for every parse). But that's more
complex: we need to carefully set @bufInMax@ so that it also corresponds to the
output buffer as well. But now, we also need to track the input buffer
position... yeah, it's a mess.

Wait, we _are_ tracking input buffer position. We're just not passing it to the
continuation. More reworking, sigh...
-}
full
    :: ByteArray -> Ptr Word8
    -> (Int -> IO r) -> (Word8 -> IO r)
    -> (Word8 -> IO r)
    -> Int -> Int -> Int
    -> IO r
full :: forall r.
ByteArray
-> Ptr Word8
-> (Int -> IO r)
-> (Word8 -> IO r)
-> (Word8 -> IO r)
-> Int
-> Int
-> Int
-> IO r
full ByteArray
bufIn Ptr Word8
bufOut Int -> IO r
fCont Word8 -> IO r
fErrEof Word8 -> IO r
fErrNotHexDigit Int
bufInMax = Int -> Int -> IO r
go
  where
    -- each loop writes a single byte or fails
    go :: Int -> Int -> IO r
go = \Int
bufInOs Int
bufOutOs -> do
        let bufInRemaining :: Int
bufInRemaining = Int
bufInMax Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
bufInOs
        if   Int
bufInRemaining Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
2
        then case forall a. Prim a => ByteArray -> Int -> a
indexByteArray @Word8 ByteArray
bufIn Int
bufInOs of
               Word8
0x20 -> -- next byte is space
                case forall a. Prim a => ByteArray -> Int -> a
indexByteArray @Word8 ByteArray
bufIn (Int
bufInOsInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) of
                  Word8
0x20 -> -- and the byte after that: skip both
                    Int -> Int -> IO r
go (Int
bufInOsInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
2) Int
bufOutOs
                  Word8
d1   ->
                    -- next byte is space, then non-space
                    -- could just skip one, but we've already asserted 2 bytes
                    -- so let's copy-paste for just 1 more byte
                    if   Int
bufInRemaining Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
3
                    then do let d0 :: Word8
d0 = forall a. Prim a => ByteArray -> Int -> a
indexByteArray @Word8 ByteArray
bufIn (Int
bufInOsInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
2)
                            (Word8 -> IO r) -> Word8 -> Word8 -> (Word8 -> IO r) -> IO r
forall r. (Word8 -> r) -> Word8 -> Word8 -> (Word8 -> r) -> r
withHexNibbles Word8 -> IO r
fErrNotHexDigit Word8
d1 Word8
d0 ((Word8 -> IO r) -> IO r) -> (Word8 -> IO r) -> IO r
forall a b. (a -> b) -> a -> b
$ \Word8
b -> do
                                Ptr Word8 -> Int -> Word8 -> IO ()
writeWord8OffPtr Ptr Word8
bufOut Int
bufOutOs Word8
b
                                Int -> Int -> IO r
go (Int
bufInOsInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
3) (Int
bufOutOsInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
                    else Word8 -> IO r
fErrEof Word8
d1
               Word8
d1   -> do
                let d0 :: Word8
d0 = forall a. Prim a => ByteArray -> Int -> a
indexByteArray @Word8 ByteArray
bufIn (Int
bufInOsInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
                (Word8 -> IO r) -> Word8 -> Word8 -> (Word8 -> IO r) -> IO r
forall r. (Word8 -> r) -> Word8 -> Word8 -> (Word8 -> r) -> r
withHexNibbles Word8 -> IO r
fErrNotHexDigit Word8
d1 Word8
d0 ((Word8 -> IO r) -> IO r) -> (Word8 -> IO r) -> IO r
forall a b. (a -> b) -> a -> b
$ \Word8
b -> do
                    Ptr Word8 -> Int -> Word8 -> IO ()
writeWord8OffPtr Ptr Word8
bufOut Int
bufOutOs Word8
b
                    Int -> Int -> IO r
go (Int
bufInOsInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
2) (Int
bufOutOsInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
        else if   Int
bufInRemaining Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
             then Int -> IO r
fCont Int
bufOutOs
             else -- 1 byte remaining
                  case forall a. Prim a => ByteArray -> Int -> a
indexByteArray @Word8 ByteArray
bufIn Int
bufInOs of
                    Word8
0x20 -> Int -> IO r
fCont Int
bufOutOs
                    Word8
d1   -> Word8 -> IO r
fErrEof Word8
d1
{-# INLINE full #-}

withHexNibbles ::
    (Word8 -> r) -> Word8 -> Word8 -> (Word8 -> r) -> r
withHexNibbles :: forall r. (Word8 -> r) -> Word8 -> Word8 -> (Word8 -> r) -> r
withHexNibbles Word8 -> r
fFail Word8
d1 Word8
d0 Word8 -> r
fCont =
    Word8 -> (Word8 -> r) -> (Word8 -> r) -> r
forall r. Word8 -> (Word8 -> r) -> (Word8 -> r) -> r
withByteAsHexDigit Word8
d1 Word8 -> r
fFail ((Word8 -> r) -> r) -> (Word8 -> r) -> r
forall a b. (a -> b) -> a -> b
$ \Word8
n1 ->
        Word8 -> (Word8 -> r) -> (Word8 -> r) -> r
forall r. Word8 -> (Word8 -> r) -> (Word8 -> r) -> r
withByteAsHexDigit Word8
d0 Word8 -> r
fFail ((Word8 -> r) -> r) -> (Word8 -> r) -> r
forall a b. (a -> b) -> a -> b
$ \Word8
n0 ->
            Word8 -> r
fCont (Word8 -> r) -> Word8 -> r
forall a b. (a -> b) -> a -> b
$ Word8
0x10Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
*Word8
n1 Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
n0
{-# INLINE withHexNibbles #-}

withByteAsHexDigit :: Word8 -> (Word8 -> r) -> (Word8 -> r) -> r
withByteAsHexDigit :: forall r. Word8 -> (Word8 -> r) -> (Word8 -> r) -> r
withByteAsHexDigit Word8
c Word8 -> r
fFail Word8 -> r
f
  | Word8
dec  Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
9 = Word8 -> r
f Word8
dec
  | Word8
hexl Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
5 = Word8 -> r
f (Word8 -> r) -> Word8 -> r
forall a b. (a -> b) -> a -> b
$ Word8
hexl Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
10
  | Word8
hexu Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
5 = Word8 -> r
f (Word8 -> r) -> Word8 -> r
forall a b. (a -> b) -> a -> b
$ Word8
hexu Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
10
  | Bool
otherwise = Word8 -> r
fFail Word8
c
  where
    dec :: Word8
dec   = Word8
c Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Word8
ord_0
    hexl :: Word8
hexl  = Word8
c Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Word8
ord_a
    hexu :: Word8
hexu  = Word8
c Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Word8
ord_A
    ord_0 :: Word8
ord_0 = Word8
0x30
    ord_a :: Word8
ord_a = Word8
0x61
    ord_A :: Word8
ord_A = Word8
0x41
{-# INLINE withByteAsHexDigit #-}

textToByteStringUptoIO :: Text -> IO (Either String ByteString)
textToByteStringUptoIO :: Text -> IO (Either String ByteString)
textToByteStringUptoIO = \(Text (Text.ByteArray ByteArray#
tarr) Int
tos Int
tlen) ->
    Int
-> (ForeignPtr Word8 -> Int -> IO (Either String ByteString))
-> ((Int -> IO (Either String ByteString))
    -> Ptr Word8 -> IO (Either String ByteString))
-> IO (Either String ByteString)
forall r.
Int
-> (ForeignPtr Word8 -> Int -> IO r)
-> ((Int -> IO r) -> Ptr Word8 -> IO r)
-> IO r
B.createCPS (Int
tlen Int -> Int -> Int
forall a. Integral a => a -> a -> a
`quot` Int
2) ForeignPtr Word8 -> Int -> IO (Either String ByteString)
forall {a}. ForeignPtr Word8 -> Int -> IO (Either a ByteString)
finalizer (((Int -> IO (Either String ByteString))
  -> Ptr Word8 -> IO (Either String ByteString))
 -> IO (Either String ByteString))
-> ((Int -> IO (Either String ByteString))
    -> Ptr Word8 -> IO (Either String ByteString))
-> IO (Either String ByteString)
forall a b. (a -> b) -> a -> b
$ \Int -> IO (Either String ByteString)
finalize Ptr Word8
buf ->
        ByteArray
-> Ptr Word8
-> (Int -> IO (Either String ByteString))
-> (Word8 -> IO (Either String ByteString))
-> (Word8 -> IO (Either String ByteString))
-> Int
-> Int
-> Int
-> IO (Either String ByteString)
forall r.
ByteArray
-> Ptr Word8
-> (Int -> IO r)
-> (Word8 -> IO r)
-> (Word8 -> IO r)
-> Int
-> Int
-> Int
-> IO r
full (ByteArray# -> ByteArray
ByteArray ByteArray#
tarr) Ptr Word8
buf Int -> IO (Either String ByteString)
finalize Word8 -> IO (Either String ByteString)
forall {p} {b}. p -> IO (Either String b)
fErrEof Word8 -> IO (Either String ByteString)
forall {b}. Word8 -> IO (Either String b)
fErrNotHexDigit Int
tlen Int
tos Int
0
  where
    fErrNotHexDigit :: Word8 -> IO (Either String b)
fErrNotHexDigit = \Word8
b -> Either String b -> IO (Either String b)
forall a. a -> IO a
forall (f :: Type -> Type) a. Applicative f => a -> f a
pure (Either String b -> IO (Either String b))
-> Either String b -> IO (Either String b)
forall a b. (a -> b) -> a -> b
$ String -> Either String b
forall a b. a -> Either a b
Left (String
"not a hexadecimal digit: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Word8 -> String
forall a. Show a => a -> String
show Word8
b)
    finalizer :: ForeignPtr Word8 -> Int -> IO (Either a ByteString)
finalizer = \ForeignPtr Word8
fp Int
len -> ByteString -> Either a ByteString
forall a b. b -> Either a b
Right (ByteString -> Either a ByteString)
-> IO ByteString -> IO (Either a ByteString)
forall (f :: Type -> Type) a b. Functor f => (a -> b) -> f a -> f b
<$> ForeignPtr Word8 -> Int -> IO ByteString
B.mkDeferredByteString ForeignPtr Word8
fp Int
len
    fErrEof :: p -> IO (Either String b)
fErrEof = \p
_ -> Either String b -> IO (Either String b)
forall a. a -> IO a
forall (f :: Type -> Type) a. Applicative f => a -> f a
pure (Either String b -> IO (Either String b))
-> Either String b -> IO (Either String b)
forall a b. (a -> b) -> a -> b
$ String -> Either String b
forall a b. a -> Either a b
Left String
"ended during byte (TODO)"
{-# INLINE textToByteStringUptoIO #-}