{-# LANGUAGE BangPatterns, Rank2Types #-}

-- |
-- Module      : Data.Text.Lazy.Encoding.Fusion
-- Copyright   : (c) Bryan O'Sullivan 2009
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com, rtharper@aftereternity.co.uk, 
--               duncan@haskell.org
-- Stability   : experimental
-- Portability : portable
--
-- Fusible 'Stream'-oriented functions for converting between lazy
-- 'Text' and several common encodings.

module Data.Text.Lazy.Encoding.Fusion
    (
    -- * Streaming
    --  streamASCII
     streamUtf8
    --, streamUtf16LE
    --, streamUtf16BE
    --, streamUtf32LE
    --, streamUtf32BE

    -- * Unstreaming
    , unstream

    , module Data.Text.Encoding.Fusion.Common
    ) where

import Data.ByteString.Lazy.Internal (ByteString(..), defaultChunkSize)
import qualified Data.ByteString as B
import qualified Data.ByteString.Unsafe as B
import Data.Text.Encoding.Fusion.Common
import Data.Text.Encoding.Error
import Data.Text.Fusion (Step(..), Stream(..))
import Data.Text.Fusion.Internal (M(..), PairS(..), S(..))
import Data.Text.UnsafeChar (unsafeChr8)
import Data.Word (Word8)
import qualified Data.Text.Encoding.Utf8 as U8
import System.IO.Unsafe (unsafePerformIO)
import Foreign.ForeignPtr (withForeignPtr, ForeignPtr)
import Foreign.Storable (pokeByteOff)
import Data.ByteString.Internal (mallocByteString, memcpy)
import Control.Exception (assert)
import qualified Data.ByteString.Internal as B

unknownLength :: Int
unknownLength = 4

-- | /O(n)/ Convert a lazy 'ByteString' into a 'Stream Char', using
-- UTF-8 encoding.
streamUtf8 :: OnDecodeError -> ByteString -> Stream Char
streamUtf8 onErr bs0 = Stream next (bs0 :!: empty :!: 0) unknownLength
    where
      empty = S N N N N
      {-# INLINE next #-}
      next (bs@(Chunk ps _) :!: S N _ _ _ :!: i)
          | i < len && U8.validate1 a =
              Yield (unsafeChr8 a) (bs :!: empty :!: i+1)
          | i + 1 < len && U8.validate2 a b =
              Yield (U8.chr2 a b) (bs :!: empty :!: i+2)
          | i + 2 < len && U8.validate3 a b c =
              Yield (U8.chr3 a b c) (bs :!: empty :!: i+3)
          | i + 4 < len && U8.validate4 a b c d =
              Yield (U8.chr4 a b c d) (bs :!: empty :!: i+4)
          where len = B.length ps
                a = B.unsafeIndex ps i
                b = B.unsafeIndex ps (i+1)
                c = B.unsafeIndex ps (i+2)
                d = B.unsafeIndex ps (i+3)
      next st@(bs :!: s :!: i) =
        case s of
          S (J a) N _ _             | U8.validate1 a ->
            Yield (unsafeChr8 a) es
          S (J a) (J b) N _         | U8.validate2 a b ->
            Yield (U8.chr2 a b) es
          S (J a) (J b) (J c) N     | U8.validate3 a b c ->
            Yield (U8.chr3 a b c) es
          S (J a) (J b) (J c) (J d) | U8.validate4 a b c d ->
            Yield (U8.chr4 a b c d) es
          _ -> consume st
         where es = bs :!: empty :!: i
      {-# INLINE consume #-}
      consume (bs@(Chunk ps rest) :!: s :!: i)
          | i >= B.length ps = consume (rest :!: s  :!: 0)
          | otherwise =
        case s of
          S N _ _ _ -> next (bs :!: S x N N N :!: i+1)
          S a N _ _ -> next (bs :!: S a x N N :!: i+1)
          S a b N _ -> next (bs :!: S a b x N :!: i+1)
          S a b c N -> next (bs :!: S a b c x :!: i+1)
          S (J a) b c d -> decodeError "streamUtf8" "UTF-8" onErr (Just a)
                           (bs :!: S b c d N :!: i+1)
          where x = J (B.unsafeIndex ps i)
      consume (Empty :!: S N _ _ _ :!: _) = Done
      consume st = decodeError "streamUtf8" "UTF-8" onErr Nothing st
{-# INLINE [0] streamUtf8 #-}

-- | /O(n)/ Convert a 'Stream' 'Word8' to a lazy 'ByteString'.
unstreamChunks :: Int -> Stream Word8 -> ByteString
unstreamChunks chunkSize (Stream next s0 len0) = chunk s0 len0
  where chunk s1 len1 = unsafePerformIO $ do
          let len = min (max len1 unknownLength) chunkSize
          mallocByteString len >>= loop len 0 s1
          where
            loop !n !off !s fp = case next s of
                Done | off == 0 -> return Empty
                     | otherwise -> do
                      bs <- trimUp fp off
                      return $! Chunk bs Empty
                Skip s' -> loop n off s' fp
                Yield x s'
                    | off == chunkSize -> do
                      bs <- trimUp fp off
                      return (Chunk bs (chunk s (n - B.length bs)))
                    | off == n -> realloc fp n off s' x
                    | otherwise -> do
                      withForeignPtr fp $ \p -> pokeByteOff p off x
                      loop n (off+1) s' fp
            {-# NOINLINE realloc #-}
            realloc fp n off s x = do
              let n' = min (n+n) chunkSize
              fp' <- copy0 fp n n'
              withForeignPtr fp' $ \p -> pokeByteOff p off x
              loop n' (off+1) s fp'
            {-# NOINLINE trimUp #-}
            trimUp fp off = return $! B.PS fp 0 off
            copy0 :: ForeignPtr Word8 -> Int -> Int -> IO (ForeignPtr Word8)
            copy0 !src !srcLen !destLen = assert (srcLen <= destLen) $ do
                dest <- mallocByteString destLen
                withForeignPtr src  $ \src'  ->
                    withForeignPtr dest $ \dest' ->
                        memcpy dest' src' (fromIntegral srcLen)
                return dest

-- | /O(n)/ Convert a 'Stream' 'Word8' to a lazy 'ByteString'.
unstream :: Stream Word8 -> ByteString
unstream = unstreamChunks defaultChunkSize

decodeError :: forall s. String -> String -> OnDecodeError -> Maybe Word8
            -> s -> Step s Char
decodeError func kind onErr mb i =
    case onErr desc mb of
      Nothing -> Skip i
      Just c  -> Yield c i
    where desc = "Data.Text.Lazy.Encoding.Fusion." ++ func ++ ": Invalid " ++
                 kind ++ " stream"