{-# LANGUAGE GeneralizedNewtypeDeriving, MagicHash, BangPatterns #-}

{-|
A library for efficiently building up a buffer of UTF-8-encoded text.  If only
safe functions are used, the resulting 'ByteString' is guaranteed to be valid
UTF-8.

To run a sequence of Utf8Builder actions and retrieve the resulting buffer, use
'runUtf8Builder'.

In special situations, for maximum performance, unsafe functions are
also provided.  The unsafe functions do not guarantee the buffer is
correct UTF-8.

This module is built on top of "Data.BufferBuilder".
-}
module Data.BufferBuilder.Utf8 (

    -- * The Utf8Builder Monad
      Utf8Builder
    , runUtf8Builder

    -- * Text encoding
    , appendText
    , appendString
    , appendChar

    -- * ASCII-7
    , appendByte7
    , appendChar7
    , appendBS7
    , appendLiteral7

    -- * URL percent-encoding
    , appendUrlEncoded

    -- * Printing numbers
    , appendDecimalSignedInt
    , appendDecimalDouble

    -- * Escaped JSON
    , appendEscapedJson
    , appendEscapedJsonLiteral
    , appendEscapedJsonText

    -- * Unsafe append operations
    , unsafeAppendBufferBuilder
    , unsafeAppendByte
    , unsafeAppendChar8
    , unsafeAppendLiteral
    , unsafeAppendLiteralN
    , unsafeAppendBS
    ) where

import GHC.Base
import GHC.Word
import Control.Applicative
import Data.ByteString (ByteString)
import Data.BufferBuilder (BufferBuilder)
import qualified Data.BufferBuilder as BB
import Data.Text (Text)
import Data.Text.Encoding (encodeUtf8)

newtype Utf8Builder a = Utf8Builder { Utf8Builder a -> BufferBuilder a
unBuilder :: BufferBuilder a }
    deriving (a -> Utf8Builder b -> Utf8Builder a
(a -> b) -> Utf8Builder a -> Utf8Builder b
(forall a b. (a -> b) -> Utf8Builder a -> Utf8Builder b)
-> (forall a b. a -> Utf8Builder b -> Utf8Builder a)
-> Functor Utf8Builder
forall a b. a -> Utf8Builder b -> Utf8Builder a
forall a b. (a -> b) -> Utf8Builder a -> Utf8Builder b
forall (f :: * -> *).
(forall a b. (a -> b) -> f a -> f b)
-> (forall a b. a -> f b -> f a) -> Functor f
<$ :: a -> Utf8Builder b -> Utf8Builder a
$c<$ :: forall a b. a -> Utf8Builder b -> Utf8Builder a
fmap :: (a -> b) -> Utf8Builder a -> Utf8Builder b
$cfmap :: forall a b. (a -> b) -> Utf8Builder a -> Utf8Builder b
Functor, Functor Utf8Builder
a -> Utf8Builder a
Functor Utf8Builder
-> (forall a. a -> Utf8Builder a)
-> (forall a b.
    Utf8Builder (a -> b) -> Utf8Builder a -> Utf8Builder b)
-> (forall a b c.
    (a -> b -> c) -> Utf8Builder a -> Utf8Builder b -> Utf8Builder c)
-> (forall a b. Utf8Builder a -> Utf8Builder b -> Utf8Builder b)
-> (forall a b. Utf8Builder a -> Utf8Builder b -> Utf8Builder a)
-> Applicative Utf8Builder
Utf8Builder a -> Utf8Builder b -> Utf8Builder b
Utf8Builder a -> Utf8Builder b -> Utf8Builder a
Utf8Builder (a -> b) -> Utf8Builder a -> Utf8Builder b
(a -> b -> c) -> Utf8Builder a -> Utf8Builder b -> Utf8Builder c
forall a. a -> Utf8Builder a
forall a b. Utf8Builder a -> Utf8Builder b -> Utf8Builder a
forall a b. Utf8Builder a -> Utf8Builder b -> Utf8Builder b
forall a b. Utf8Builder (a -> b) -> Utf8Builder a -> Utf8Builder b
forall a b c.
(a -> b -> c) -> Utf8Builder a -> Utf8Builder b -> Utf8Builder c
forall (f :: * -> *).
Functor f
-> (forall a. a -> f a)
-> (forall a b. f (a -> b) -> f a -> f b)
-> (forall a b c. (a -> b -> c) -> f a -> f b -> f c)
-> (forall a b. f a -> f b -> f b)
-> (forall a b. f a -> f b -> f a)
-> Applicative f
<* :: Utf8Builder a -> Utf8Builder b -> Utf8Builder a
$c<* :: forall a b. Utf8Builder a -> Utf8Builder b -> Utf8Builder a
*> :: Utf8Builder a -> Utf8Builder b -> Utf8Builder b
$c*> :: forall a b. Utf8Builder a -> Utf8Builder b -> Utf8Builder b
liftA2 :: (a -> b -> c) -> Utf8Builder a -> Utf8Builder b -> Utf8Builder c
$cliftA2 :: forall a b c.
(a -> b -> c) -> Utf8Builder a -> Utf8Builder b -> Utf8Builder c
<*> :: Utf8Builder (a -> b) -> Utf8Builder a -> Utf8Builder b
$c<*> :: forall a b. Utf8Builder (a -> b) -> Utf8Builder a -> Utf8Builder b
pure :: a -> Utf8Builder a
$cpure :: forall a. a -> Utf8Builder a
$cp1Applicative :: Functor Utf8Builder
Applicative, Applicative Utf8Builder
a -> Utf8Builder a
Applicative Utf8Builder
-> (forall a b.
    Utf8Builder a -> (a -> Utf8Builder b) -> Utf8Builder b)
-> (forall a b. Utf8Builder a -> Utf8Builder b -> Utf8Builder b)
-> (forall a. a -> Utf8Builder a)
-> Monad Utf8Builder
Utf8Builder a -> (a -> Utf8Builder b) -> Utf8Builder b
Utf8Builder a -> Utf8Builder b -> Utf8Builder b
forall a. a -> Utf8Builder a
forall a b. Utf8Builder a -> Utf8Builder b -> Utf8Builder b
forall a b. Utf8Builder a -> (a -> Utf8Builder b) -> Utf8Builder b
forall (m :: * -> *).
Applicative m
-> (forall a b. m a -> (a -> m b) -> m b)
-> (forall a b. m a -> m b -> m b)
-> (forall a. a -> m a)
-> Monad m
return :: a -> Utf8Builder a
$creturn :: forall a. a -> Utf8Builder a
>> :: Utf8Builder a -> Utf8Builder b -> Utf8Builder b
$c>> :: forall a b. Utf8Builder a -> Utf8Builder b -> Utf8Builder b
>>= :: Utf8Builder a -> (a -> Utf8Builder b) -> Utf8Builder b
$c>>= :: forall a b. Utf8Builder a -> (a -> Utf8Builder b) -> Utf8Builder b
$cp1Monad :: Applicative Utf8Builder
Monad)

-- | Run a sequence of 'Utf8Builder' actions and extracting the resulting
-- buffer as a 'ByteString'.
runUtf8Builder :: Utf8Builder () -> ByteString
runUtf8Builder :: Utf8Builder () -> ByteString
runUtf8Builder Utf8Builder ()
a = BufferBuilder () -> ByteString
forall a. BufferBuilder a -> ByteString
BB.runBufferBuilder (BufferBuilder () -> ByteString) -> BufferBuilder () -> ByteString
forall a b. (a -> b) -> a -> b
$ Utf8Builder () -> BufferBuilder ()
forall a. Utf8Builder a -> BufferBuilder a
unBuilder Utf8Builder ()
a
{-# INLINE runUtf8Builder #-}


-- Text encoding

-- TODO: optimize appendText with custom UTF-16 -> UTF-8 encoder in C

-- | Encodes the given 'Text' in UTF-8, appending it to the buffer.
appendText :: Text -> Utf8Builder ()
appendText :: Text -> Utf8Builder ()
appendText Text
a = BufferBuilder () -> Utf8Builder ()
forall a. BufferBuilder a -> Utf8Builder a
Utf8Builder (BufferBuilder () -> Utf8Builder ())
-> BufferBuilder () -> Utf8Builder ()
forall a b. (a -> b) -> a -> b
$ ByteString -> BufferBuilder ()
BB.appendBS (ByteString -> BufferBuilder ()) -> ByteString -> BufferBuilder ()
forall a b. (a -> b) -> a -> b
$ Text -> ByteString
encodeUtf8 Text
a
{-# INLINE appendText #-}

-- | Encodes the given 'String' in UTF-8, appending it to the buffer.
appendString :: String -> Utf8Builder ()
appendString :: String -> Utf8Builder ()
appendString String
s = (Char -> Utf8Builder ()) -> String -> Utf8Builder ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ Char -> Utf8Builder ()
appendChar String
s
{-# INLINABLE appendString #-}

-- | Encodes a single 'Char' in UTF-8, appending it to the buffer.
appendChar :: Char -> Utf8Builder ()
appendChar :: Char -> Utf8Builder ()
appendChar Char
c = BufferBuilder () -> Utf8Builder ()
forall a. BufferBuilder a -> Utf8Builder a
Utf8Builder (BufferBuilder () -> Utf8Builder ())
-> BufferBuilder () -> Utf8Builder ()
forall a b. (a -> b) -> a -> b
$ Char -> BufferBuilder ()
BB.appendCharUtf8 Char
c
{-# INLINE appendChar #-}


-- ASCII-7

-- | Appends the bottom 7 bits of a byte to the buffer.
appendByte7 :: Word8 -> Utf8Builder ()
appendByte7 :: Word8 -> Utf8Builder ()
appendByte7 = BufferBuilder () -> Utf8Builder ()
forall a. BufferBuilder a -> Utf8Builder a
Utf8Builder (BufferBuilder () -> Utf8Builder ())
-> (Word8 -> BufferBuilder ()) -> Word8 -> Utf8Builder ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> BufferBuilder ()
BB.appendByte7
{-# INLINE appendByte7 #-}

-- | Appends the bottom 7 bits of a 'Char' to the buffer.
appendChar7 :: Char -> Utf8Builder ()
appendChar7 :: Char -> Utf8Builder ()
appendChar7 = BufferBuilder () -> Utf8Builder ()
forall a. BufferBuilder a -> Utf8Builder a
Utf8Builder (BufferBuilder () -> Utf8Builder ())
-> (Char -> BufferBuilder ()) -> Char -> Utf8Builder ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> BufferBuilder ()
BB.appendChar7
{-# INLINE appendChar7 #-}

-- | Appends the given ByteString to the buffer, taking the bottom
-- 7 bits of each byte.
appendBS7 :: ByteString -> Utf8Builder ()
appendBS7 :: ByteString -> Utf8Builder ()
appendBS7 = BufferBuilder () -> Utf8Builder ()
forall a. BufferBuilder a -> Utf8Builder a
Utf8Builder (BufferBuilder () -> Utf8Builder ())
-> (ByteString -> BufferBuilder ()) -> ByteString -> Utf8Builder ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> BufferBuilder ()
BB.appendBS7
{-# INLINE appendBS7 #-}

-- | Appends the zero-terminated byte string at the given address
-- to the buffer, taking the bottom 7 bits of each byte.
appendLiteral7 :: Addr# -> Utf8Builder ()
appendLiteral7 :: Addr# -> Utf8Builder ()
appendLiteral7 Addr#
addr = BufferBuilder () -> Utf8Builder ()
forall a. BufferBuilder a -> Utf8Builder a
Utf8Builder (BufferBuilder () -> Utf8Builder ())
-> BufferBuilder () -> Utf8Builder ()
forall a b. (a -> b) -> a -> b
$ Addr# -> BufferBuilder ()
BB.appendLiteral7 Addr#
addr
{-# INLINE appendLiteral7 #-}


-- URL percent-encoding

-- | Directly calls 'BB.appendUrlEncoded'.  The output from URL
-- percent-encoding is guaranteed to be valid UTF-8.
appendUrlEncoded :: ByteString -> Utf8Builder ()
appendUrlEncoded :: ByteString -> Utf8Builder ()
appendUrlEncoded = BufferBuilder () -> Utf8Builder ()
forall a. BufferBuilder a -> Utf8Builder a
Utf8Builder (BufferBuilder () -> Utf8Builder ())
-> (ByteString -> BufferBuilder ()) -> ByteString -> Utf8Builder ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> BufferBuilder ()
BB.appendUrlEncoded
{-# INLINE appendUrlEncoded #-}


-- Printing numbers

appendDecimalSignedInt :: Int -> Utf8Builder ()
appendDecimalSignedInt :: Int -> Utf8Builder ()
appendDecimalSignedInt Int
a = BufferBuilder () -> Utf8Builder ()
forall a. BufferBuilder a -> Utf8Builder a
Utf8Builder (BufferBuilder () -> Utf8Builder ())
-> BufferBuilder () -> Utf8Builder ()
forall a b. (a -> b) -> a -> b
$ Int -> BufferBuilder ()
BB.appendDecimalSignedInt Int
a
{-# INLINE appendDecimalSignedInt #-}

appendDecimalDouble :: Double -> Utf8Builder ()
appendDecimalDouble :: Double -> Utf8Builder ()
appendDecimalDouble Double
d = BufferBuilder () -> Utf8Builder ()
forall a. BufferBuilder a -> Utf8Builder a
Utf8Builder (BufferBuilder () -> Utf8Builder ())
-> BufferBuilder () -> Utf8Builder ()
forall a b. (a -> b) -> a -> b
$ Double -> BufferBuilder ()
BB.appendDecimalDouble Double
d
{-# INLINE appendDecimalDouble #-}


-- Escaped JSON

appendEscapedJsonLiteral :: Addr# -> Utf8Builder ()
appendEscapedJsonLiteral :: Addr# -> Utf8Builder ()
appendEscapedJsonLiteral Addr#
addr = BufferBuilder () -> Utf8Builder ()
forall a. BufferBuilder a -> Utf8Builder a
Utf8Builder (BufferBuilder () -> Utf8Builder ())
-> BufferBuilder () -> Utf8Builder ()
forall a b. (a -> b) -> a -> b
$ Addr# -> BufferBuilder ()
BB.appendEscapedJsonLiteral Addr#
addr

appendEscapedJson :: ByteString -> Utf8Builder ()
appendEscapedJson :: ByteString -> Utf8Builder ()
appendEscapedJson ByteString
a = BufferBuilder () -> Utf8Builder ()
forall a. BufferBuilder a -> Utf8Builder a
Utf8Builder (BufferBuilder () -> Utf8Builder ())
-> BufferBuilder () -> Utf8Builder ()
forall a b. (a -> b) -> a -> b
$ ByteString -> BufferBuilder ()
BB.appendEscapedJson ByteString
a
{-# INLINE appendEscapedJson #-}

appendEscapedJsonText :: Text -> Utf8Builder ()
appendEscapedJsonText :: Text -> Utf8Builder ()
appendEscapedJsonText Text
txt = BufferBuilder () -> Utf8Builder ()
forall a. BufferBuilder a -> Utf8Builder a
Utf8Builder (BufferBuilder () -> Utf8Builder ())
-> BufferBuilder () -> Utf8Builder ()
forall a b. (a -> b) -> a -> b
$ Text -> BufferBuilder ()
BB.appendEscapedJsonText Text
txt
{-# INLINE appendEscapedJsonText #-}


-- Unsafe

-- | Directly append a BufferBuilder into the UTF-8 code stream.  Incorrect
-- use of this function can result in invalid UTF-8.
unsafeAppendBufferBuilder :: BufferBuilder () -> Utf8Builder ()
unsafeAppendBufferBuilder :: BufferBuilder () -> Utf8Builder ()
unsafeAppendBufferBuilder = BufferBuilder () -> Utf8Builder ()
forall a. BufferBuilder a -> Utf8Builder a
Utf8Builder

-- | Directly append a byte into the UTF-8 code stream.  Incorrect use of
-- this function can result in invalid UTF-8.
unsafeAppendByte :: Word8 -> Utf8Builder ()
unsafeAppendByte :: Word8 -> Utf8Builder ()
unsafeAppendByte = BufferBuilder () -> Utf8Builder ()
forall a. BufferBuilder a -> Utf8Builder a
Utf8Builder (BufferBuilder () -> Utf8Builder ())
-> (Word8 -> BufferBuilder ()) -> Word8 -> Utf8Builder ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> BufferBuilder ()
BB.appendByte
{-# INLINE unsafeAppendByte #-}

-- | Directly append the bottom 8 bits of the given character to the UTF-8
-- code stream.  Incorrect use of this function can result in invalid UTF-8.
unsafeAppendChar8 :: Char -> Utf8Builder ()
unsafeAppendChar8 :: Char -> Utf8Builder ()
unsafeAppendChar8 = BufferBuilder () -> Utf8Builder ()
forall a. BufferBuilder a -> Utf8Builder a
Utf8Builder (BufferBuilder () -> Utf8Builder ())
-> (Char -> BufferBuilder ()) -> Char -> Utf8Builder ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> BufferBuilder ()
BB.appendChar8
{-# INLINE unsafeAppendChar8 #-}

-- | Directly append the zero-terminated byte sequence pointed to by
-- the given address.  Be careful that the referenced byte sequence
-- contains valid UTF-8.
unsafeAppendLiteral :: Addr# -> Utf8Builder ()
unsafeAppendLiteral :: Addr# -> Utf8Builder ()
unsafeAppendLiteral Addr#
addr = BufferBuilder () -> Utf8Builder ()
forall a. BufferBuilder a -> Utf8Builder a
Utf8Builder (BufferBuilder () -> Utf8Builder ())
-> BufferBuilder () -> Utf8Builder ()
forall a b. (a -> b) -> a -> b
$ Addr# -> BufferBuilder ()
BB.appendLiteral Addr#
addr
{-# INLINE unsafeAppendLiteral #-}

-- | Directly append the given byte sequence pointed to by the given address.
-- Be careful that the referenced byte sequence contains valid UTF-8.
--
-- __WARNING__: passing an incorrect length value is likely to cause an access
-- violation or worse.
unsafeAppendLiteralN :: Int -> Addr# -> Utf8Builder ()
unsafeAppendLiteralN :: Int -> Addr# -> Utf8Builder ()
unsafeAppendLiteralN !Int
len Addr#
addr = BufferBuilder () -> Utf8Builder ()
forall a. BufferBuilder a -> Utf8Builder a
Utf8Builder (BufferBuilder () -> Utf8Builder ())
-> BufferBuilder () -> Utf8Builder ()
forall a b. (a -> b) -> a -> b
$ Int -> Addr# -> BufferBuilder ()
BB.unsafeAppendLiteralN Int
len Addr#
addr
{-# INLINE unsafeAppendLiteralN #-}

-- | Directly append the given 'ByteString' to the output buffer.
-- Be careful that the referenced 'ByteString' contains valid UTF-8.
unsafeAppendBS :: ByteString -> Utf8Builder ()
unsafeAppendBS :: ByteString -> Utf8Builder ()
unsafeAppendBS ByteString
a = BufferBuilder () -> Utf8Builder ()
forall a. BufferBuilder a -> Utf8Builder a
Utf8Builder (BufferBuilder () -> Utf8Builder ())
-> BufferBuilder () -> Utf8Builder ()
forall a b. (a -> b) -> a -> b
$ ByteString -> BufferBuilder ()
BB.appendBS ByteString
a
{-# INLINE unsafeAppendBS #-}