module Data.Text.Lazy.Encoding
(
decodeASCII
, decodeUtf8
, decodeUtf16LE
, decodeUtf16BE
, decodeUtf32LE
, decodeUtf32BE
, decodeUtf8With
, decodeUtf16LEWith
, decodeUtf16BEWith
, decodeUtf32LEWith
, decodeUtf32BEWith
, encodeUtf8
, encodeUtf16LE
, encodeUtf16BE
, encodeUtf32LE
, encodeUtf32BE
) where
import Data.Bits ((.&.))
import Data.Text.Encoding.Error (OnDecodeError, strictDecode)
import Data.Text.Lazy.Internal (Text(..), chunk, empty, foldrChunks)
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.Internal as B
import qualified Data.ByteString.Unsafe as S
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Lazy.Encoding.Fusion as E
import qualified Data.Text.Lazy.Fusion as F
decodeASCII :: B.ByteString -> Text
decodeASCII bs = foldr (chunk . TE.decodeASCII) empty (B.toChunks bs)
decodeUtf8With :: OnDecodeError -> B.ByteString -> Text
decodeUtf8With onErr bs0 = fast bs0
where
decode = TE.decodeUtf8With onErr
fast (B.Chunk p ps) | isComplete p = chunk (decode p) (fast ps)
| otherwise = chunk (decode h) (slow t ps)
where (h,t) = S.splitAt pivot p
pivot | at 1 = len1
| at 2 = len2
| otherwise = len3
len = S.length p
at n = len >= n && S.unsafeIndex p (lenn) .&. 0xc0 == 0xc0
fast B.Empty = empty
slow i bs =
case B.uncons bs of
Just (w,bs') | isComplete i' -> chunk (decode i') (fast bs')
| otherwise -> slow i' bs'
where i' = S.snoc i w
Nothing -> case S.uncons i of
Just (j,i') ->
case onErr desc (Just j) of
Nothing -> slow i' bs
Just c -> Chunk (T.singleton c) (slow i' bs)
Nothing ->
case onErr desc Nothing of
Nothing -> empty
Just c -> Chunk (T.singleton c) empty
isComplete bs =
ix 1 .&. 0x80 == 0 ||
(len >= 2 && ix 2 .&. 0xe0 == 0xc0) ||
(len >= 3 && ix 3 .&. 0xf0 == 0xe0) ||
(len >= 4 && ix 4 .&. 0xf8 == 0xf0)
where len = S.length bs
ix n = S.unsafeIndex bs (lenn)
desc = "Data.Text.Lazy.Encoding.decodeUtf8With: Invalid UTF-8 stream"
decodeUtf8 :: B.ByteString -> Text
decodeUtf8 = decodeUtf8With strictDecode
encodeUtf8 :: Text -> B.ByteString
encodeUtf8 (Chunk c cs) = B.Chunk (TE.encodeUtf8 c) (encodeUtf8 cs)
encodeUtf8 Empty = B.Empty
decodeUtf16LEWith :: OnDecodeError -> B.ByteString -> Text
decodeUtf16LEWith onErr bs = F.unstream (E.streamUtf16LE onErr bs)
decodeUtf16LE :: B.ByteString -> Text
decodeUtf16LE = decodeUtf16LEWith strictDecode
decodeUtf16BEWith :: OnDecodeError -> B.ByteString -> Text
decodeUtf16BEWith onErr bs = F.unstream (E.streamUtf16BE onErr bs)
decodeUtf16BE :: B.ByteString -> Text
decodeUtf16BE = decodeUtf16BEWith strictDecode
encodeUtf16LE :: Text -> B.ByteString
encodeUtf16LE txt = B.fromChunks (foldrChunks ((:) . TE.encodeUtf16LE) [] txt)
encodeUtf16BE :: Text -> B.ByteString
encodeUtf16BE txt = B.fromChunks (foldrChunks ((:) . TE.encodeUtf16BE) [] txt)
decodeUtf32LEWith :: OnDecodeError -> B.ByteString -> Text
decodeUtf32LEWith onErr bs = F.unstream (E.streamUtf32LE onErr bs)
decodeUtf32LE :: B.ByteString -> Text
decodeUtf32LE = decodeUtf32LEWith strictDecode
decodeUtf32BEWith :: OnDecodeError -> B.ByteString -> Text
decodeUtf32BEWith onErr bs = F.unstream (E.streamUtf32BE onErr bs)
decodeUtf32BE :: B.ByteString -> Text
decodeUtf32BE = decodeUtf32BEWith strictDecode
encodeUtf32LE :: Text -> B.ByteString
encodeUtf32LE txt = B.fromChunks (foldrChunks ((:) . TE.encodeUtf32LE) [] txt)
encodeUtf32BE :: Text -> B.ByteString
encodeUtf32BE txt = B.fromChunks (foldrChunks ((:) . TE.encodeUtf32BE) [] txt)