{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE CPP #-}

module Bytezap.Text where

import Bytezap
import Bytezap.Int

import Data.Text.Internal

-- unused import warnings due to messy CPP
import Bytezap.Bytes
import Data.Text.Array qualified as A
import GHC.Exts

import Data.Char ( ord )
import Data.Foldable ( foldl' )
import Data.Bits ( shiftR, (.&.) )

textUtf8 :: Text -> Write
{-# INLINE textUtf8 #-}
#if MIN_VERSION_text(2,0,0)
textUtf8 :: Text -> Write
textUtf8 (Text (A.ByteArray ByteArray#
arr#) (I# Int#
off#) len :: Int
len@(I# Int#
len#)) =
    Int -> Poke -> Write
Write Int
len (Poke -> Write) -> Poke -> Write
forall a b. (a -> b) -> a -> b
$ ByteArray# -> Int# -> Int# -> Poke
pokeByteArray# ByteArray#
arr# Int#
off# Int#
len#
#else
textUtf8 = error "Bytezap.Text.textUtf8: cba for text-1"
#endif

-- TODO adapted from utf8-string
charUtf8 :: Char -> Write
charUtf8 :: Char -> Write
charUtf8 = Int -> Write
forall {a}. (Integral a, Bits a) => a -> Write
go (Int -> Write) -> (Char -> Int) -> Char -> Write
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord
 where
  go :: a -> Write
go a
oc
   | a
oc a -> a -> Bool
forall a. Ord a => a -> a -> Bool
<= a
0x7f       = Word8 -> Write
w8 (Word8 -> Write) -> Word8 -> Write
forall a b. (a -> b) -> a -> b
$ a -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
oc

   | a
oc a -> a -> Bool
forall a. Ord a => a -> a -> Bool
<= a
0x7ff      =    Word8 -> Write
w8 (a -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (a
0xc0 a -> a -> a
forall a. Num a => a -> a -> a
+ (a
oc a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftR` Int
6)))
                        Write -> Write -> Write
forall a. Semigroup a => a -> a -> a
<> Word8 -> Write
w8 (a -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (a
0x80 a -> a -> a
forall a. Num a => a -> a -> a
+ a
oc a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0x3f))

   | a
oc a -> a -> Bool
forall a. Ord a => a -> a -> Bool
<= a
0xffff     =    Word8 -> Write
w8 (a -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (a
0xe0 a -> a -> a
forall a. Num a => a -> a -> a
+ (a
oc a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftR` Int
12)))
                        Write -> Write -> Write
forall a. Semigroup a => a -> a -> a
<> Word8 -> Write
w8 (a -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (a
0x80 a -> a -> a
forall a. Num a => a -> a -> a
+ ((a
oc a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftR` Int
6) a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0x3f)))
                        Write -> Write -> Write
forall a. Semigroup a => a -> a -> a
<> Word8 -> Write
w8 (a -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (a
0x80 a -> a -> a
forall a. Num a => a -> a -> a
+ a
oc a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0x3f))
   | Bool
otherwise        =    Word8 -> Write
w8 (a -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (a
0xf0 a -> a -> a
forall a. Num a => a -> a -> a
+ (a
oc a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftR` Int
18)))
                        Write -> Write -> Write
forall a. Semigroup a => a -> a -> a
<> Word8 -> Write
w8 (a -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (a
0x80 a -> a -> a
forall a. Num a => a -> a -> a
+ ((a
oc a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftR` Int
12) a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0x3f)))
                        Write -> Write -> Write
forall a. Semigroup a => a -> a -> a
<> Word8 -> Write
w8 (a -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (a
0x80 a -> a -> a
forall a. Num a => a -> a -> a
+ ((a
oc a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftR` Int
6) a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0x3f)))
                        Write -> Write -> Write
forall a. Semigroup a => a -> a -> a
<> Word8 -> Write
w8 (a -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (a
0x80 a -> a -> a
forall a. Num a => a -> a -> a
+ a
oc a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0x3f))
{-# INLINE charUtf8 #-}

-- | TODO
--
-- In a perfect world, functions like this would not exist. But this is not a
-- perfect world. 'String's suck for a number of reasons. One big one is that
-- they are horrendous to serialize. Worse, as of GHC 9.6, type-level strings
-- only reflect to 'String'. This function does the best it can to efficiently
-- serialize 'String's. It would be much easier and probably similarly fast to
-- go through 'Text' instead, but who doesn't like a little challenge?
stringUtf8 :: String -> Write
stringUtf8 :: String -> Write
stringUtf8 = (Write -> Char -> Write) -> Write -> String -> Write
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: Type -> Type) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\Write
w Char
c -> Write
w Write -> Write -> Write
forall a. Semigroup a => a -> a -> a
<> Char -> Write
charUtf8 Char
c) Write
forall a. Monoid a => a
mempty
{-# INLINE stringUtf8 #-}