-- |
-- Module        : Data.Text.Encoding.NonEmpty
-- Copyright     : Gautier DI FOLCO
-- License       : BSD2
--
-- Maintainer    : Gautier DI FOLCO <gautier.difolco@gmail.com>
-- Stability     : Unstable
-- Portability   : GHC
--
-- 'NonEmpty' wrappers around `Data.Text.Encoding`
module Data.Text.Encoding.NonEmpty
  ( -- * Decoding ByteStrings to Text
    -- $strict
    decodeLatin1,
    decodeUtf8,
    decodeUtf16LE,
    decodeUtf16BE,
    decodeUtf32LE,
    decodeUtf32BE,

    -- ** Catchable failure
    decodeUtf8',

    -- ** Controllable error handling
    decodeUtf8With,
    decodeUtf16LEWith,
    decodeUtf16BEWith,
    decodeUtf32LEWith,
    decodeUtf32BEWith,

    -- ** Stream oriented decoding
    -- $stream
    streamDecodeUtf8,
    streamDecodeUtf8With,
    E.Decoding (..),

    -- * Encoding Text to ByteStrings
    encodeUtf8,
    encodeUtf16LE,
    encodeUtf16BE,
    encodeUtf32LE,
    encodeUtf32BE,

    -- * Encoding Text using ByteString Builders
    encodeUtf8Builder,
    encodeUtf8BuilderEscaped,
  )
where

import Data.ByteString (ByteString)
import qualified Data.ByteString.Builder as B
import qualified Data.ByteString.Builder.Prim as BP
import Data.NonEmpty
import qualified Data.Text.Encoding as E
import Data.Text.Encoding.Error (OnDecodeError, UnicodeException)
import Data.Text.NonEmpty
import Data.Word (Word8)
import GHC.Stack

-- $strict
--
-- All of the single-parameter functions for decoding bytestrings
-- encoded in one of the Unicode Transformation Formats (UTF) operate
-- in a /strict/ mode: each will throw an exception if given invalid
-- input.
--
-- Each function has a variant, whose name is suffixed with -'With',
-- that gives greater control over the handling of decoding errors.
-- For instance, 'decodeUtf8' will throw an exception, but
-- 'decodeUtf8With' allows the programmer to determine what to do on a
-- decoding error.

-- | Decode a 'ByteString' containing Latin-1 (aka ISO-8859-1) encoded text.
--
-- 'decodeLatin1' is semantically equivalent to
--  @Data.Text.pack . Data.ByteString.Char8.unpack@
--
-- This is a total function. However, bear in mind that decoding Latin-1 (non-ASCII)
-- characters to UTf-8 requires actual work and is not just buffer copying.
decodeLatin1 :: HasCallStack => NonEmpty ByteString -> NonEmptyStrictText
decodeLatin1 :: NonEmpty ByteString -> NonEmptyStrictText
decodeLatin1 = (ByteString -> Text) -> NonEmpty ByteString -> NonEmptyStrictText
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
overNonEmpty ByteString -> Text
E.decodeLatin1
{-# INLINE decodeLatin1 #-}

-- | Decode a 'ByteString' containing UTF-8 encoded text.
--
-- Surrogate code points in replacement character returned by 'OnDecodeError'
-- will be automatically remapped to the replacement char @U+FFFD@.
decodeUtf8With :: HasCallStack => OnDecodeError -> NonEmpty ByteString -> NonEmptyStrictText
decodeUtf8With :: OnDecodeError -> NonEmpty ByteString -> NonEmptyStrictText
decodeUtf8With OnDecodeError
onError = (ByteString -> Text) -> NonEmpty ByteString -> NonEmptyStrictText
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
overNonEmpty ((ByteString -> Text) -> NonEmpty ByteString -> NonEmptyStrictText)
-> (ByteString -> Text)
-> NonEmpty ByteString
-> NonEmptyStrictText
forall a b. (a -> b) -> a -> b
$ OnDecodeError -> ByteString -> Text
E.decodeUtf8With OnDecodeError
onError
{-# INLINE decodeUtf8With #-}

-- | Decode, in a stream oriented way, a 'ByteString' containing UTF-8
-- encoded text that is known to be valid.
--
-- If the input contains any invalid UTF-8 data, an exception will be
-- thrown (either by this function or a continuation) that cannot be
-- caught in pure code.  For more control over the handling of invalid
-- data, use 'streamDecodeUtf8With'.
streamDecodeUtf8 :: HasCallStack => NonEmpty ByteString -> NonEmpty E.Decoding
streamDecodeUtf8 :: NonEmpty ByteString -> NonEmpty Decoding
streamDecodeUtf8 = (ByteString -> Decoding)
-> NonEmpty ByteString -> NonEmpty Decoding
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
overNonEmpty ByteString -> Decoding
E.streamDecodeUtf8
{-# INLINE streamDecodeUtf8 #-}

-- | Decode, in a stream oriented way, a lazy 'ByteString' containing UTF-8
-- encoded text.
streamDecodeUtf8With :: HasCallStack => OnDecodeError -> NonEmpty ByteString -> NonEmpty E.Decoding
streamDecodeUtf8With :: OnDecodeError -> NonEmpty ByteString -> NonEmpty Decoding
streamDecodeUtf8With OnDecodeError
onError = (ByteString -> Decoding)
-> NonEmpty ByteString -> NonEmpty Decoding
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
overNonEmpty ((ByteString -> Decoding)
 -> NonEmpty ByteString -> NonEmpty Decoding)
-> (ByteString -> Decoding)
-> NonEmpty ByteString
-> NonEmpty Decoding
forall a b. (a -> b) -> a -> b
$ OnDecodeError -> ByteString -> Decoding
E.streamDecodeUtf8With OnDecodeError
onError
{-# INLINE streamDecodeUtf8With #-}

-- | Decode a 'ByteString' containing UTF-8 encoded text that is known
-- to be valid.
--
-- If the input contains any invalid UTF-8 data, an exception will be
-- thrown that cannot be caught in pure code.  For more control over
-- the handling of invalid data, use 'decodeUtf8'' or
-- 'decodeUtf8With'.
--
-- This is a partial function: it checks that input is a well-formed
-- UTF-8 sequence and copies buffer or throws an error otherwise.
decodeUtf8 :: NonEmpty ByteString -> NonEmptyStrictText
decodeUtf8 :: NonEmpty ByteString -> NonEmptyStrictText
decodeUtf8 = (ByteString -> Text) -> NonEmpty ByteString -> NonEmptyStrictText
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
overNonEmpty ByteString -> Text
E.decodeUtf8
{-# INLINE decodeUtf8 #-}

-- | Decode a 'ByteString' containing UTF-8 encoded text.
--
-- If the input contains any invalid UTF-8 data, the relevant
-- exception will be returned, otherwise the decoded text.
decodeUtf8' :: HasCallStack => NonEmpty ByteString -> Either UnicodeException NonEmptyStrictText
decodeUtf8' :: NonEmpty ByteString -> Either UnicodeException NonEmptyStrictText
decodeUtf8' = (Text -> NonEmptyStrictText)
-> Either UnicodeException Text
-> Either UnicodeException NonEmptyStrictText
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> NonEmptyStrictText
forall a. a -> NonEmpty a
trustedNonEmpty (Either UnicodeException Text
 -> Either UnicodeException NonEmptyStrictText)
-> (NonEmpty ByteString -> Either UnicodeException Text)
-> NonEmpty ByteString
-> Either UnicodeException NonEmptyStrictText
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Either UnicodeException Text
E.decodeUtf8' (ByteString -> Either UnicodeException Text)
-> (NonEmpty ByteString -> ByteString)
-> NonEmpty ByteString
-> Either UnicodeException Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NonEmpty ByteString -> ByteString
forall a. NonEmpty a -> a
getNonEmpty
{-# INLINE decodeUtf8' #-}

-- | Encode text to a ByteString 'B.Builder' using UTF-8 encoding.
encodeUtf8Builder :: NonEmptyStrictText -> NonEmpty B.Builder
encodeUtf8Builder :: NonEmptyStrictText -> NonEmpty Builder
encodeUtf8Builder = (Text -> Builder) -> NonEmptyStrictText -> NonEmpty Builder
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
overNonEmpty Text -> Builder
E.encodeUtf8Builder
{-# INLINE encodeUtf8Builder #-}

-- | Encode text using UTF-8 encoding and escape the ASCII characters using
-- a 'BP.BoundedPrim'.
--
-- Use this function is to implement efficient encoders for text-based formats
-- like JSON or HTML.
encodeUtf8BuilderEscaped :: BP.BoundedPrim Word8 -> NonEmptyStrictText -> NonEmpty B.Builder
encodeUtf8BuilderEscaped :: BoundedPrim Word8 -> NonEmptyStrictText -> NonEmpty Builder
encodeUtf8BuilderEscaped BoundedPrim Word8
be = (Text -> Builder) -> NonEmptyStrictText -> NonEmpty Builder
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
overNonEmpty ((Text -> Builder) -> NonEmptyStrictText -> NonEmpty Builder)
-> (Text -> Builder) -> NonEmptyStrictText -> NonEmpty Builder
forall a b. (a -> b) -> a -> b
$ BoundedPrim Word8 -> Text -> Builder
E.encodeUtf8BuilderEscaped BoundedPrim Word8
be
{-# INLINE encodeUtf8BuilderEscaped #-}

-- | Encode text using UTF-8 encoding.
encodeUtf8 :: NonEmptyStrictText -> NonEmpty ByteString
encodeUtf8 :: NonEmptyStrictText -> NonEmpty ByteString
encodeUtf8 = (Text -> ByteString) -> NonEmptyStrictText -> NonEmpty ByteString
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
overNonEmpty Text -> ByteString
E.encodeUtf8
{-# INLINE encodeUtf8 #-}

-- | Decode text from little endian UTF-16 encoding.
decodeUtf16LEWith :: OnDecodeError -> NonEmpty ByteString -> NonEmptyStrictText
decodeUtf16LEWith :: OnDecodeError -> NonEmpty ByteString -> NonEmptyStrictText
decodeUtf16LEWith OnDecodeError
onError = (ByteString -> Text) -> NonEmpty ByteString -> NonEmptyStrictText
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
overNonEmpty ((ByteString -> Text) -> NonEmpty ByteString -> NonEmptyStrictText)
-> (ByteString -> Text)
-> NonEmpty ByteString
-> NonEmptyStrictText
forall a b. (a -> b) -> a -> b
$ OnDecodeError -> ByteString -> Text
E.decodeUtf16LEWith OnDecodeError
onError
{-# INLINE decodeUtf16LEWith #-}

-- | Decode text from little endian UTF-16 encoding.
--
-- If the input contains any invalid little endian UTF-16 data, an
-- exception will be thrown.  For more control over the handling of
-- invalid data, use 'decodeUtf16LEWith'.
decodeUtf16LE :: NonEmpty ByteString -> NonEmptyStrictText
decodeUtf16LE :: NonEmpty ByteString -> NonEmptyStrictText
decodeUtf16LE = (ByteString -> Text) -> NonEmpty ByteString -> NonEmptyStrictText
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
overNonEmpty ByteString -> Text
E.decodeUtf16LE
{-# INLINE decodeUtf16LE #-}

-- | Decode text from big endian UTF-16 encoding.
decodeUtf16BEWith :: OnDecodeError -> NonEmpty ByteString -> NonEmptyStrictText
decodeUtf16BEWith :: OnDecodeError -> NonEmpty ByteString -> NonEmptyStrictText
decodeUtf16BEWith OnDecodeError
onError = (ByteString -> Text) -> NonEmpty ByteString -> NonEmptyStrictText
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
overNonEmpty ((ByteString -> Text) -> NonEmpty ByteString -> NonEmptyStrictText)
-> (ByteString -> Text)
-> NonEmpty ByteString
-> NonEmptyStrictText
forall a b. (a -> b) -> a -> b
$ OnDecodeError -> ByteString -> Text
E.decodeUtf16BEWith OnDecodeError
onError
{-# INLINE decodeUtf16BEWith #-}

-- | Decode text from big endian UTF-16 encoding.
--
-- If the input contains any invalid big endian UTF-16 data, an
-- exception will be thrown.  For more control over the handling of
-- invalid data, use 'decodeUtf16BEWith'.
decodeUtf16BE :: NonEmpty ByteString -> NonEmptyStrictText
decodeUtf16BE :: NonEmpty ByteString -> NonEmptyStrictText
decodeUtf16BE = (ByteString -> Text) -> NonEmpty ByteString -> NonEmptyStrictText
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
overNonEmpty ByteString -> Text
E.decodeUtf16BE
{-# INLINE decodeUtf16BE #-}

-- | Encode text using little endian UTF-16 encoding.
encodeUtf16LE :: NonEmptyStrictText -> NonEmpty ByteString
encodeUtf16LE :: NonEmptyStrictText -> NonEmpty ByteString
encodeUtf16LE = (Text -> ByteString) -> NonEmptyStrictText -> NonEmpty ByteString
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
overNonEmpty Text -> ByteString
E.encodeUtf16LE
{-# INLINE encodeUtf16LE #-}

-- | Encode text using big endian UTF-16 encoding.
encodeUtf16BE :: NonEmptyStrictText -> NonEmpty ByteString
encodeUtf16BE :: NonEmptyStrictText -> NonEmpty ByteString
encodeUtf16BE = (Text -> ByteString) -> NonEmptyStrictText -> NonEmpty ByteString
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
overNonEmpty Text -> ByteString
E.encodeUtf16BE
{-# INLINE encodeUtf16BE #-}

-- | Decode text from little endian UTF-32 encoding.
decodeUtf32LEWith :: OnDecodeError -> NonEmpty ByteString -> NonEmptyStrictText
decodeUtf32LEWith :: OnDecodeError -> NonEmpty ByteString -> NonEmptyStrictText
decodeUtf32LEWith OnDecodeError
onError = (ByteString -> Text) -> NonEmpty ByteString -> NonEmptyStrictText
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
overNonEmpty ((ByteString -> Text) -> NonEmpty ByteString -> NonEmptyStrictText)
-> (ByteString -> Text)
-> NonEmpty ByteString
-> NonEmptyStrictText
forall a b. (a -> b) -> a -> b
$ OnDecodeError -> ByteString -> Text
E.decodeUtf32LEWith OnDecodeError
onError
{-# INLINE decodeUtf32LEWith #-}

-- | Decode text from little endian UTF-32 encoding.
--
-- If the input contains any invalid little endian UTF-32 data, an
-- exception will be thrown.  For more control over the handling of
-- invalid data, use 'decodeUtf32LEWith'.
decodeUtf32LE :: NonEmpty ByteString -> NonEmptyStrictText
decodeUtf32LE :: NonEmpty ByteString -> NonEmptyStrictText
decodeUtf32LE = (ByteString -> Text) -> NonEmpty ByteString -> NonEmptyStrictText
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
overNonEmpty ByteString -> Text
E.decodeUtf32LE
{-# INLINE decodeUtf32LE #-}

-- | Decode text from big endian UTF-32 encoding.
decodeUtf32BEWith :: OnDecodeError -> NonEmpty ByteString -> NonEmptyStrictText
decodeUtf32BEWith :: OnDecodeError -> NonEmpty ByteString -> NonEmptyStrictText
decodeUtf32BEWith OnDecodeError
onError = (ByteString -> Text) -> NonEmpty ByteString -> NonEmptyStrictText
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
overNonEmpty ((ByteString -> Text) -> NonEmpty ByteString -> NonEmptyStrictText)
-> (ByteString -> Text)
-> NonEmpty ByteString
-> NonEmptyStrictText
forall a b. (a -> b) -> a -> b
$ OnDecodeError -> ByteString -> Text
E.decodeUtf32BEWith OnDecodeError
onError
{-# INLINE decodeUtf32BEWith #-}

-- | Decode text from big endian UTF-32 encoding.
--
-- If the input contains any invalid big endian UTF-32 data, an
-- exception will be thrown.  For more control over the handling of
-- invalid data, use 'decodeUtf32BEWith'.
decodeUtf32BE :: NonEmpty ByteString -> NonEmptyStrictText
decodeUtf32BE :: NonEmpty ByteString -> NonEmptyStrictText
decodeUtf32BE = (ByteString -> Text) -> NonEmpty ByteString -> NonEmptyStrictText
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
overNonEmpty ByteString -> Text
E.decodeUtf32BE
{-# INLINE decodeUtf32BE #-}

-- | Encode text using little endian UTF-32 encoding.
encodeUtf32LE :: NonEmptyStrictText -> NonEmpty ByteString
encodeUtf32LE :: NonEmptyStrictText -> NonEmpty ByteString
encodeUtf32LE = (Text -> ByteString) -> NonEmptyStrictText -> NonEmpty ByteString
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
overNonEmpty Text -> ByteString
E.encodeUtf32LE
{-# INLINE encodeUtf32LE #-}

-- | Encode text using big endian UTF-32 encoding.
encodeUtf32BE :: NonEmptyStrictText -> NonEmpty ByteString
encodeUtf32BE :: NonEmptyStrictText -> NonEmpty ByteString
encodeUtf32BE = (Text -> ByteString) -> NonEmptyStrictText -> NonEmpty ByteString
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
overNonEmpty Text -> ByteString
E.encodeUtf32BE
{-# INLINE encodeUtf32BE #-}