{-# LANGUAGE PackageImports #-}
{-# LANGUAGE Strict         #-}

module Data.String.Interpolate.Conversion.Encoding
  ( bsToTextBuilder, lbsToTextBuilder, encodeCharUTF8 )
where

import qualified Data.ByteString         as B
import qualified Data.ByteString.Builder as LB
import qualified Data.ByteString.Lazy    as LB
import qualified Data.Text.Lazy.Builder  as LT

import qualified "utf8-string" Data.ByteString.Lazy.UTF8 as LUTF8
import qualified "utf8-string" Data.ByteString.UTF8      as UTF8

-- |
-- Convert a strict ByteString into a Text `LT.Builder', converting any invalid
-- characters into the Unicode replacement character � (U+FFFD).
bsToTextBuilder :: B.ByteString -> LT.Builder
bsToTextBuilder :: ByteString -> Builder
bsToTextBuilder = (Char -> Builder -> Builder) -> Builder -> ByteString -> Builder
forall a. (Char -> a -> a) -> a -> ByteString -> a
UTF8.foldr (\Char
char Builder
bldr -> Char -> Builder
LT.singleton Char
char Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
bldr) Builder
forall a. Monoid a => a
mempty

-- |
-- Convert a lazy ByteString into a Text `LT.Builder', converting any invalid
-- characters into the Unicode replacement character � (U+FFFD).
lbsToTextBuilder :: LB.ByteString -> LT.Builder
lbsToTextBuilder :: ByteString -> Builder
lbsToTextBuilder = (Char -> Builder -> Builder) -> Builder -> ByteString -> Builder
forall a. (Char -> a -> a) -> a -> ByteString -> a
LUTF8.foldr (\Char
char Builder
bldr -> Char -> Builder
LT.singleton Char
char Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
bldr) Builder
forall a. Monoid a => a
mempty

-- |
-- "Data.ByteString.Builder" provides `charUtf8' to do this, but it doesn't
-- correctly handle invalid characters.
encodeCharUTF8 :: Char -> LB.Builder
encodeCharUTF8 :: Char -> Builder
encodeCharUTF8 Char
c =
  let normalized :: Char
normalized = case Char
c of
        Char
'\xFFFE' -> Char
'\xFFFD'
        Char
'\xFFFF' -> Char
'\xFFFD'
        Char
_        -> Char
c
  in Char -> Builder
LB.charUtf8 Char
normalized