{-# LANGUAGE CPP #-}

module TextBuilderDev.Allocator
  ( -- * Execution
    allocate,

    -- * Definition
    Allocator,
    force,
    text,
    asciiByteString,
    char,
    unicodeCodePoint,
    utf8CodeUnits1,
    utf8CodeUnits2,
    utf8CodeUnits3,
    utf8CodeUnits4,
    utf16CodeUnits1,
    utf16CodeUnits2,
  )
where

import qualified Data.ByteString as ByteString
import qualified Data.Text as Text
import qualified Data.Text.Array as TextArray
import qualified Data.Text.IO as Text
import qualified Data.Text.Internal as TextInternal
import qualified Data.Text.Lazy as TextLazy
import qualified Data.Text.Lazy.Builder as TextLazyBuilder
import TextBuilderDev.Prelude
import qualified TextBuilderDev.Utf16View as Utf16View
import qualified TextBuilderDev.Utf8View as Utf8View

-- * ArrayWriter

newtype ArrayWriter
  = ArrayWriter (forall s. TextArray.MArray s -> Int -> ST s Int)

instance Semigroup ArrayWriter where
  {-# INLINE (<>) #-}
  ArrayWriter forall s. MArray s -> Int -> ST s Int
writeL <> :: ArrayWriter -> ArrayWriter -> ArrayWriter
<> ArrayWriter forall s. MArray s -> Int -> ST s Int
writeR =
    (forall s. MArray s -> Int -> ST s Int) -> ArrayWriter
ArrayWriter forall a b. (a -> b) -> a -> b
$ \MArray s
array Int
offset -> do
      Int
offsetAfter1 <- forall s. MArray s -> Int -> ST s Int
writeL MArray s
array Int
offset
      forall s. MArray s -> Int -> ST s Int
writeR MArray s
array Int
offsetAfter1

instance Monoid ArrayWriter where
  {-# INLINE mempty #-}
  mempty :: ArrayWriter
mempty = (forall s. MArray s -> Int -> ST s Int) -> ArrayWriter
ArrayWriter forall a b. (a -> b) -> a -> b
$ forall a b. a -> b -> a
const forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. Monad m => a -> m a
return

-- * Allocator

-- | Execute a builder producing a strict text.
allocate :: Allocator -> Text
allocate :: Allocator -> Text
allocate (Allocator (ArrayWriter forall s. MArray s -> Int -> ST s Int
write) Int
sizeBound) =
  forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ do
    MArray s
array <- forall s. Int -> ST s (MArray s)
TextArray.new Int
sizeBound
    Int
offsetAfter <- forall s. MArray s -> Int -> ST s Int
write MArray s
array Int
0
    Array
frozenArray <- forall s. MArray s -> ST s Array
TextArray.unsafeFreeze MArray s
array
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Array -> Int -> Int -> Text
TextInternal.text Array
frozenArray Int
0 Int
offsetAfter

-- |
-- Specification of how to efficiently construct strict 'Text'.
-- Provides instances of 'Semigroup' and 'Monoid', which have complexity of /O(1)/.
data Allocator
  = Allocator
      !ArrayWriter
      {-# UNPACK #-} !Int

instance Semigroup Allocator where
  {-# INLINE (<>) #-}
  <> :: Allocator -> Allocator -> Allocator
(<>) (Allocator ArrayWriter
writer1 Int
estimatedArraySize1) (Allocator ArrayWriter
writer2 Int
estimatedArraySize2) =
    ArrayWriter -> Int -> Allocator
Allocator ArrayWriter
writer Int
estimatedArraySize
    where
      writer :: ArrayWriter
writer = ArrayWriter
writer1 forall a. Semigroup a => a -> a -> a
<> ArrayWriter
writer2
      estimatedArraySize :: Int
estimatedArraySize = Int
estimatedArraySize1 forall a. Num a => a -> a -> a
+ Int
estimatedArraySize2

instance Monoid Allocator where
  {-# INLINE mempty #-}
  mempty :: Allocator
mempty = ArrayWriter -> Int -> Allocator
Allocator forall a. Monoid a => a
mempty Int
0

-- |
-- Run the builder and pack the produced text into a new builder.
--
-- Useful to have around builders that you reuse,
-- because a forced builder is much faster,
-- since it's virtually a single call @memcopy@.
{-# INLINE force #-}
force :: Allocator -> Allocator
force :: Allocator -> Allocator
force = Text -> Allocator
text forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Allocator -> Text
allocate

-- | Strict text.
{-# INLINEABLE text #-}
text :: Text -> Allocator
#if MIN_VERSION_text(2,0,0)
text text@(TextInternal.Text array offset length) =
  Allocator writer length
  where
    writer =
      ArrayWriter $ \builderArray builderOffset -> do
        TextArray.copyI length builderArray builderOffset array offset
        return $ builderOffset + length
#else
text :: Text -> Allocator
text text :: Text
text@(TextInternal.Text Array
array Int
offset Int
length) =
  ArrayWriter -> Int -> Allocator
Allocator ArrayWriter
writer Int
length
  where
    writer :: ArrayWriter
writer =
      (forall s. MArray s -> Int -> ST s Int) -> ArrayWriter
ArrayWriter forall a b. (a -> b) -> a -> b
$ \MArray s
builderArray Int
builderOffset -> do
        let builderOffsetAfter :: Int
builderOffsetAfter = Int
builderOffset forall a. Num a => a -> a -> a
+ Int
length
        forall s. MArray s -> Int -> Array -> Int -> Int -> ST s ()
TextArray.copyI MArray s
builderArray Int
builderOffset Array
array Int
offset Int
builderOffsetAfter
        forall (m :: * -> *) a. Monad m => a -> m a
return Int
builderOffsetAfter
#endif

-- | ASCII byte string.
--
-- It's your responsibility to ensure that the bytes are in proper range,
-- otherwise the produced text will be broken.
{-# INLINEABLE asciiByteString #-}
asciiByteString :: ByteString -> Allocator
asciiByteString :: ByteString -> Allocator
asciiByteString ByteString
byteString =
  ArrayWriter -> Int -> Allocator
Allocator ArrayWriter
action Int
length
  where
    length :: Int
length = ByteString -> Int
ByteString.length ByteString
byteString
    action :: ArrayWriter
action =
      (forall s. MArray s -> Int -> ST s Int) -> ArrayWriter
ArrayWriter forall a b. (a -> b) -> a -> b
$ \MArray s
array ->
        let step :: Word8 -> (Int -> ST s Int) -> Int -> ST s Int
step Word8
byte Int -> ST s Int
next Int
index = do
              forall s. MArray s -> Int -> Word16 -> ST s ()
TextArray.unsafeWrite MArray s
array Int
index (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
byte)
              Int -> ST s Int
next (forall a. Enum a => a -> a
succ Int
index)
         in forall a. (Word8 -> a -> a) -> a -> ByteString -> a
ByteString.foldr Word8 -> (Int -> ST s Int) -> Int -> ST s Int
step forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
byteString

-- | Unicode character.
{-# INLINE char #-}
char :: Char -> Allocator
char :: Char -> Allocator
char = Int -> Allocator
unicodeCodePoint forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Char -> Int
ord

-- | Unicode code point.
{-# INLINE unicodeCodePoint #-}
unicodeCodePoint :: Int -> Allocator
#if MIN_VERSION_text(2,0,0)
unicodeCodePoint x =
  Utf8View.unicodeCodePoint x utf8CodeUnits1 utf8CodeUnits2 utf8CodeUnits3 utf8CodeUnits4
#else
unicodeCodePoint :: Int -> Allocator
unicodeCodePoint Int
x =
  Int -> Utf16View
Utf16View.unicodeCodePoint Int
x Word16 -> Allocator
utf16CodeUnits1 Word16 -> Word16 -> Allocator
utf16CodeUnits2
#endif

-- | Single code-unit UTF-8 character.
utf8CodeUnits1 :: Word8 -> Allocator
#if MIN_VERSION_text(2,0,0)
{-# INLINEABLE utf8CodeUnits1 #-}
utf8CodeUnits1 unit1 = Allocator writer 1 
  where
    writer = ArrayWriter $ \array offset ->
      TextArray.unsafeWrite array offset unit1
        $> succ offset
#else
{-# INLINE utf8CodeUnits1 #-}
utf8CodeUnits1 :: Word8 -> Allocator
utf8CodeUnits1 Word8
unit1 =
  Word8 -> Utf16View
Utf16View.utf8CodeUnits1 Word8
unit1 Word16 -> Allocator
utf16CodeUnits1 Word16 -> Word16 -> Allocator
utf16CodeUnits2
#endif

-- | Double code-unit UTF-8 character.
utf8CodeUnits2 :: Word8 -> Word8 -> Allocator
#if MIN_VERSION_text(2,0,0)
{-# INLINEABLE utf8CodeUnits2 #-}
utf8CodeUnits2 unit1 unit2 = Allocator writer 2 
  where
    writer = ArrayWriter $ \array offset -> do
      TextArray.unsafeWrite array offset unit1
      TextArray.unsafeWrite array (offset + 1) unit2
      return $ offset + 2
#else
{-# INLINE utf8CodeUnits2 #-}
utf8CodeUnits2 :: Word8 -> Word8 -> Allocator
utf8CodeUnits2 Word8
unit1 Word8
unit2 =
  Word8 -> Word8 -> Utf16View
Utf16View.utf8CodeUnits2 Word8
unit1 Word8
unit2 Word16 -> Allocator
utf16CodeUnits1 Word16 -> Word16 -> Allocator
utf16CodeUnits2
#endif

-- | Triple code-unit UTF-8 character.
utf8CodeUnits3 :: Word8 -> Word8 -> Word8 -> Allocator
#if MIN_VERSION_text(2,0,0)
{-# INLINEABLE utf8CodeUnits3 #-}
utf8CodeUnits3 unit1 unit2 unit3 = Allocator writer 3 
  where
    writer = ArrayWriter $ \array offset -> do
      TextArray.unsafeWrite array offset unit1
      TextArray.unsafeWrite array (offset + 1) unit2
      TextArray.unsafeWrite array (offset + 2) unit3
      return $ offset + 3
#else
{-# INLINE utf8CodeUnits3 #-}
utf8CodeUnits3 :: Word8 -> Word8 -> Word8 -> Allocator
utf8CodeUnits3 Word8
unit1 Word8
unit2 Word8
unit3 =
  Word8 -> Word8 -> Word8 -> Utf16View
Utf16View.utf8CodeUnits3 Word8
unit1 Word8
unit2 Word8
unit3 Word16 -> Allocator
utf16CodeUnits1 Word16 -> Word16 -> Allocator
utf16CodeUnits2
#endif

-- | UTF-8 character out of 4 code units.
utf8CodeUnits4 :: Word8 -> Word8 -> Word8 -> Word8 -> Allocator
#if MIN_VERSION_text(2,0,0)
{-# INLINEABLE utf8CodeUnits4 #-}
utf8CodeUnits4 unit1 unit2 unit3 unit4 = Allocator writer 4 
  where
    writer = ArrayWriter $ \array offset -> do
      TextArray.unsafeWrite array offset unit1
      TextArray.unsafeWrite array (offset + 1) unit2
      TextArray.unsafeWrite array (offset + 2) unit3
      TextArray.unsafeWrite array (offset + 3) unit4
      return $ offset + 4
#else
{-# INLINE utf8CodeUnits4 #-}
utf8CodeUnits4 :: Word8 -> Word8 -> Word8 -> Word8 -> Allocator
utf8CodeUnits4 Word8
unit1 Word8
unit2 Word8
unit3 Word8
unit4 =
  Word8 -> Word8 -> Word8 -> Word8 -> Utf16View
Utf16View.utf8CodeUnits4 Word8
unit1 Word8
unit2 Word8
unit3 Word8
unit4 Word16 -> Allocator
utf16CodeUnits1 Word16 -> Word16 -> Allocator
utf16CodeUnits2
#endif

-- | Single code-unit UTF-16 character.
utf16CodeUnits1 :: Word16 -> Allocator
#if MIN_VERSION_text(2,0,0)
{-# INLINE utf16CodeUnits1 #-}
utf16CodeUnits1 = unicodeCodePoint . fromIntegral
#else
{-# INLINEABLE utf16CodeUnits1 #-}
utf16CodeUnits1 :: Word16 -> Allocator
utf16CodeUnits1 Word16
unit =
  ArrayWriter -> Int -> Allocator
Allocator ArrayWriter
writer Int
1
  where
    writer :: ArrayWriter
writer =
      (forall s. MArray s -> Int -> ST s Int) -> ArrayWriter
ArrayWriter forall a b. (a -> b) -> a -> b
$ \MArray s
array Int
offset ->
        forall s. MArray s -> Int -> Word16 -> ST s ()
TextArray.unsafeWrite MArray s
array Int
offset Word16
unit
          forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> forall a. Enum a => a -> a
succ Int
offset
#endif

-- | Double code-unit UTF-16 character.
utf16CodeUnits2 :: Word16 -> Word16 -> Allocator
#if MIN_VERSION_text(2,0,0)
{-# INLINE utf16CodeUnits2 #-}
utf16CodeUnits2 unit1 unit2 = unicodeCodePoint cp
  where
    cp = (((fromIntegral unit1 .&. 0x3FF) `shiftL` 10) .|. (fromIntegral unit2 .&. 0x3FF)) + 0x10000
#else
{-# INLINEABLE utf16CodeUnits2 #-}
utf16CodeUnits2 :: Word16 -> Word16 -> Allocator
utf16CodeUnits2 Word16
unit1 Word16
unit2 =
  ArrayWriter -> Int -> Allocator
Allocator ArrayWriter
writer Int
2
  where
    writer :: ArrayWriter
writer =
      (forall s. MArray s -> Int -> ST s Int) -> ArrayWriter
ArrayWriter forall a b. (a -> b) -> a -> b
$ \MArray s
array Int
offset -> do
        forall s. MArray s -> Int -> Word16 -> ST s ()
TextArray.unsafeWrite MArray s
array Int
offset Word16
unit1
        forall s. MArray s -> Int -> Word16 -> ST s ()
TextArray.unsafeWrite MArray s
array (forall a. Enum a => a -> a
succ Int
offset) Word16
unit2
        forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Int
offset forall a. Num a => a -> a -> a
+ Int
2
#endif