{-# LANGUAGE CPP #-}
{-# LANGUAGE RankNTypes #-}

module Data.ByteString.Short.Decode (decodeUtf16LE, decodeUtf16LEWith, decodeUtf16LE', decodeUtf16LE'', decodeUtf8, decodeUtf8With, decodeUtf8') where

import Data.ByteString
    ( ByteString )
import Data.ByteString.Short
    ( ShortByteString )
import Data.Text
    ( Text )
import Data.Text.Encoding.Error
    ( OnDecodeError, UnicodeException, strictDecode )
import Data.Text.Internal.Fusion.Types
    ( Step (..), Stream (..) )
import Data.Text.Internal.Fusion.Size
    ( maxSize )
import Data.Text.Internal.Unsafe.Char
#if MIN_VERSION_text(2,0,0)
    ( unsafeChr16, unsafeChr8 )
#else
    ( unsafeChr, unsafeChr8 )
#endif
import Data.Bits
    ( unsafeShiftL, unsafeShiftR )
import Data.Word
    ( Word16, Word8 )

import Control.Exception
    ( evaluate, try )
import qualified Data.ByteString.Short as BS
    ( index, length )
import qualified Data.Text.Encoding as E
import qualified Data.Text.Internal.Encoding.Utf16 as U16
import qualified Data.Text.Internal.Encoding.Utf8 as U8
import GHC.IO
    ( unsafeDupablePerformIO )

#if !MIN_VERSION_text(2,0,0)
unsafeChr16 :: Word16 -> Char
unsafeChr16 = Word16 -> Char
unsafeChr
#endif

-- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using UTF-8
-- encoding.
streamUtf8 :: OnDecodeError -> ShortByteString -> Stream Char
streamUtf8 :: OnDecodeError -> ShortByteString -> Stream Char
streamUtf8 OnDecodeError
onErr ShortByteString
bs = (Int -> Step Int Char) -> Int -> Size -> Stream Char
forall a s. (s -> Step s a) -> s -> Size -> Stream a
Stream Int -> Step Int Char
next Int
0 (Int -> Size
maxSize Int
l)
    where
      l :: Int
l = ShortByteString -> Int
BS.length ShortByteString
bs
      next :: Int -> Step Int Char
next Int
i
          | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
l = Step Int Char
forall s a. Step s a
Done
          | Word8 -> Bool
U8.validate1 Word8
x1 = Char -> Int -> Step Int Char
forall s a. a -> s -> Step s a
Yield (Word8 -> Char
unsafeChr8 Word8
x1) (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
          | Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
l Bool -> Bool -> Bool
&& Word8 -> Word8 -> Bool
U8.validate2 Word8
x1 Word8
x2 = Char -> Int -> Step Int Char
forall s a. a -> s -> Step s a
Yield (Word8 -> Word8 -> Char
U8.chr2 Word8
x1 Word8
x2) (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
2)
          | Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
2 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
l Bool -> Bool -> Bool
&& Word8 -> Word8 -> Word8 -> Bool
U8.validate3 Word8
x1 Word8
x2 Word8
x3 = Char -> Int -> Step Int Char
forall s a. a -> s -> Step s a
Yield (Word8 -> Word8 -> Word8 -> Char
U8.chr3 Word8
x1 Word8
x2 Word8
x3) (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
3)
          | Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
3 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
l Bool -> Bool -> Bool
&& Word8 -> Word8 -> Word8 -> Word8 -> Bool
U8.validate4 Word8
x1 Word8
x2 Word8
x3 Word8
x4 = Char -> Int -> Step Int Char
forall s a. a -> s -> Step s a
Yield (Word8 -> Word8 -> Word8 -> Word8 -> Char
U8.chr4 Word8
x1 Word8
x2 Word8
x3 Word8
x4) (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
4)
          | Bool
otherwise = String
-> String -> OnDecodeError -> Maybe Word8 -> Int -> Step Int Char
forall s.
String
-> String -> OnDecodeError -> Maybe Word8 -> s -> Step s Char
decodeError String
"streamUtf8" String
"UTF-8" OnDecodeError
onErr (Word8 -> Maybe Word8
forall a. a -> Maybe a
Just Word8
x1) (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
          where
            x1 :: Word8
x1 = Int -> Word8
idx Int
i
            x2 :: Word8
x2 = Int -> Word8
idx (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
            x3 :: Word8
x3 = Int -> Word8
idx (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2)
            x4 :: Word8
x4 = Int -> Word8
idx (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3)
            idx :: Int -> Word8
idx = ShortByteString -> Int -> Word8
BS.index ShortByteString
bs
{-# INLINE [0] streamUtf8 #-}

-- | /O(n)/ Convert a 'ShortByteString' into a 'Stream Char', using little
-- endian UTF-16 encoding.
streamUtf16LE :: OnDecodeError -> ShortByteString -> Stream Char
streamUtf16LE :: OnDecodeError -> ShortByteString -> Stream Char
streamUtf16LE OnDecodeError
onErr ShortByteString
bs = (Int -> Step Int Char) -> Int -> Size -> Stream Char
forall a s. (s -> Step s a) -> s -> Size -> Stream a
Stream Int -> Step Int Char
next Int
0 (Int -> Size
maxSize (Int
l Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
1))
    where
      l :: Int
l = ShortByteString -> Int
BS.length ShortByteString
bs
      {-# INLINE next #-}
      next :: Int -> Step Int Char
next Int
i
          | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
l                         = Step Int Char
forall s a. Step s a
Done
          | Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
l Bool -> Bool -> Bool
&& Word16 -> Bool
U16.validate1 Word16
x1    = Char -> Int -> Step Int Char
forall s a. a -> s -> Step s a
Yield (Word16 -> Char
unsafeChr16 Word16
x1) (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
2)
          | Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
3 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
l Bool -> Bool -> Bool
&& Word16 -> Word16 -> Bool
U16.validate2 Word16
x1 Word16
x2 = Char -> Int -> Step Int Char
forall s a. a -> s -> Step s a
Yield (Word16 -> Word16 -> Char
U16.chr2 Word16
x1 Word16
x2) (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
4)
          | Bool
otherwise = String
-> String -> OnDecodeError -> Maybe Word8 -> Int -> Step Int Char
forall s.
String
-> String -> OnDecodeError -> Maybe Word8 -> s -> Step s Char
decodeError String
"streamUtf16LE" String
"UTF-16LE" OnDecodeError
onErr Maybe Word8
forall a. Maybe a
Nothing (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
          where
            x1 :: Word16
x1    = Int -> Word16
idx Int
i       Word16 -> Word16 -> Word16
forall a. Num a => a -> a -> a
+ (Int -> Word16
idx (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
8)
            x2 :: Word16
x2    = Int -> Word16
idx (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) Word16 -> Word16 -> Word16
forall a. Num a => a -> a -> a
+ (Int -> Word16
idx (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3) Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
8)
            idx :: Int -> Word16
idx = Word8 -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8 -> Word16) -> (Int -> Word8) -> Int -> Word16
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortByteString -> Int -> Word8
BS.index ShortByteString
bs :: Int -> Word16
{-# INLINE [0] streamUtf16LE #-}

-- | Decode text from little endian UTF-16 encoding.
decodeUtf16LEWith :: OnDecodeError -> ShortByteString -> String
decodeUtf16LEWith :: OnDecodeError -> ShortByteString -> String
decodeUtf16LEWith OnDecodeError
onErr ShortByteString
bs = Stream Char -> String
unstream (OnDecodeError -> ShortByteString -> Stream Char
streamUtf16LE OnDecodeError
onErr ShortByteString
bs)
{-# INLINE decodeUtf16LEWith #-}

-- | Decode text from little endian UTF-16 encoding.
--
-- If the input contains any invalid little endian UTF-16 data, an
-- exception will be thrown.  For more control over the handling of
-- invalid data, use 'decodeUtf16LEWith'.
decodeUtf16LE :: ShortByteString -> String
decodeUtf16LE :: ShortByteString -> String
decodeUtf16LE = OnDecodeError -> ShortByteString -> String
decodeUtf16LEWith OnDecodeError
strictDecode
{-# INLINE decodeUtf16LE #-}

-- | Decode text from little endian UTF-16 encoding.
decodeUtf8With :: OnDecodeError -> ShortByteString -> String
decodeUtf8With :: OnDecodeError -> ShortByteString -> String
decodeUtf8With OnDecodeError
onErr ShortByteString
bs = Stream Char -> String
unstream (OnDecodeError -> ShortByteString -> Stream Char
streamUtf8 OnDecodeError
onErr ShortByteString
bs)
{-# INLINE decodeUtf8With #-}

-- | Decode text from little endian UTF-16 encoding.
--
-- If the input contains any invalid little endian UTF-16 data, an
-- exception will be thrown.  For more control over the handling of
-- invalid data, use 'decodeUtf16LEWith'.
decodeUtf8 :: ShortByteString -> String
decodeUtf8 :: ShortByteString -> String
decodeUtf8 = OnDecodeError -> ShortByteString -> String
decodeUtf8With OnDecodeError
strictDecode
{-# INLINE decodeUtf8 #-}


decodeError :: forall s. String -> String -> OnDecodeError -> Maybe Word8
            -> s -> Step s Char
decodeError :: String
-> String -> OnDecodeError -> Maybe Word8 -> s -> Step s Char
decodeError String
func String
kind OnDecodeError
onErr Maybe Word8
mb s
i =
    case OnDecodeError
onErr String
desc Maybe Word8
mb of
      Maybe Char
Nothing -> s -> Step s Char
forall s a. s -> Step s a
Skip s
i
      Just Char
c  -> Char -> s -> Step s Char
forall s a. a -> s -> Step s a
Yield Char
c s
i
    where desc :: String
desc = String
"Data.Text.Internal.Encoding.Fusion." String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
func String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
": Invalid " String -> String -> String
forall a. [a] -> [a] -> [a]
++
                 String
kind String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" stream"

-- | /O(n)/ Convert a 'Stream Char' into a 'Text'.
unstream :: Stream Char -> String
unstream :: Stream Char -> String
unstream (Stream s -> Step s Char
next0 s
s0 Size
_) = s -> String
go s
s0
  where
    go :: s -> String
go s
si =
      case s -> Step s Char
next0 s
si of
          Step s Char
Done        -> []
          Skip s
si'    -> s -> String
go s
si'
          Yield Char
c s
si' -> Char
c Char -> String -> String
forall a. a -> [a] -> [a]
: s -> String
go s
si'
{-# INLINE [0] unstream #-}


-- | Decode a 'ShortByteString' containing UTF-8 encoded text.
--
-- If the input contains any invalid UTF-8 data, the relevant
-- exception will be returned, otherwise the decoded text.
decodeUtf8' :: ShortByteString -> Either UnicodeException String
decodeUtf8' :: ShortByteString -> Either UnicodeException String
decodeUtf8' = IO (Either UnicodeException String)
-> Either UnicodeException String
forall a. IO a -> a
unsafeDupablePerformIO (IO (Either UnicodeException String)
 -> Either UnicodeException String)
-> (ShortByteString -> IO (Either UnicodeException String))
-> ShortByteString
-> Either UnicodeException String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. IO String -> IO (Either UnicodeException String)
forall e a. Exception e => IO a -> IO (Either e a)
try (IO String -> IO (Either UnicodeException String))
-> (ShortByteString -> IO String)
-> ShortByteString
-> IO (Either UnicodeException String)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> IO String
forall a. a -> IO a
evaluate (String -> IO String)
-> (ShortByteString -> String) -> ShortByteString -> IO String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. OnDecodeError -> ShortByteString -> String
decodeUtf8With OnDecodeError
strictDecode
{-# INLINE decodeUtf8' #-}


-- | Decode a 'ShortByteString' containing UTF-16 encoded text.
--
-- If the input contains any invalid UTF-16 data, the relevant
-- exception will be returned, otherwise the decoded text.
decodeUtf16LE' :: ShortByteString -> Either UnicodeException String
decodeUtf16LE' :: ShortByteString -> Either UnicodeException String
decodeUtf16LE' = IO (Either UnicodeException String)
-> Either UnicodeException String
forall a. IO a -> a
unsafeDupablePerformIO (IO (Either UnicodeException String)
 -> Either UnicodeException String)
-> (ShortByteString -> IO (Either UnicodeException String))
-> ShortByteString
-> Either UnicodeException String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. IO String -> IO (Either UnicodeException String)
forall e a. Exception e => IO a -> IO (Either e a)
try (IO String -> IO (Either UnicodeException String))
-> (ShortByteString -> IO String)
-> ShortByteString
-> IO (Either UnicodeException String)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> IO String
forall a. a -> IO a
evaluate (String -> IO String)
-> (ShortByteString -> String) -> ShortByteString -> IO String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. OnDecodeError -> ShortByteString -> String
decodeUtf16LEWith OnDecodeError
strictDecode
{-# INLINE decodeUtf16LE' #-}


-- | Decode a 'ByteString' containing UTF-16 encoded text.
--
-- If the input contains any invalid UTF-16 data, the relevant
-- exception will be returned, otherwise the decoded text.
decodeUtf16LE'' :: ByteString -> Either UnicodeException Text
decodeUtf16LE'' :: ByteString -> Either UnicodeException Text
decodeUtf16LE'' = IO (Either UnicodeException Text) -> Either UnicodeException Text
forall a. IO a -> a
unsafeDupablePerformIO (IO (Either UnicodeException Text) -> Either UnicodeException Text)
-> (ByteString -> IO (Either UnicodeException Text))
-> ByteString
-> Either UnicodeException Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. IO Text -> IO (Either UnicodeException Text)
forall e a. Exception e => IO a -> IO (Either e a)
try (IO Text -> IO (Either UnicodeException Text))
-> (ByteString -> IO Text)
-> ByteString
-> IO (Either UnicodeException Text)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> IO Text
forall a. a -> IO a
evaluate (Text -> IO Text) -> (ByteString -> Text) -> ByteString -> IO Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. OnDecodeError -> ByteString -> Text
E.decodeUtf16LEWith OnDecodeError
strictDecode
{-# INLINE decodeUtf16LE'' #-}