module System.IO.Streams.Text
  ( 
    decodeUtf8
  , decodeUtf8With
  , encodeUtf8
  ) where
import           Control.Monad                 (when)
import           Control.Monad.IO.Class        (MonadIO (..))
import           Data.ByteString               (ByteString)
import qualified Data.ByteString               as S
import qualified Data.ByteString.Unsafe        as S
import           Data.Monoid                   (mappend)
import           Data.Text                     (Text)
import qualified Data.Text.Encoding            as T
import           Data.Text.Encoding.Error      (OnDecodeError)
import           Data.Word                     (Word8)
import qualified System.IO.Streams.Combinators as Streams
import           System.IO.Streams.Internal    (InputStream, OutputStream)
import qualified System.IO.Streams.Internal    as Streams
encodeUtf8 :: OutputStream ByteString -> IO (OutputStream Text)
encodeUtf8 = Streams.contramap T.encodeUtf8
decodeUtf8 :: InputStream ByteString -> IO (InputStream Text)
decodeUtf8 = decode T.decodeUtf8
decodeUtf8With :: OnDecodeError
               -> InputStream ByteString
               -> IO (InputStream Text)
decodeUtf8With e = decode (T.decodeUtf8With e)
decode :: (ByteString -> Text)
       -> InputStream ByteString
       -> IO (InputStream Text)
decode decodeFunc input = Streams.fromGenerator $ go Nothing
  where
    go !soFar = liftIO (Streams.read input) >>=
                maybe (finish soFar) (chunk soFar)
    finish Nothing  = return $! ()
    finish (Just x) = Streams.yield $! decodeFunc x
    chunk Nothing  s = process s
    chunk (Just a) b = process $ a `mappend` b
    process !s =
      case findLastFullCode s of
        LastCodeIsComplete x -> (Streams.yield $! decodeFunc x) >> go Nothing
        Split a b            -> do
                                  when (not $ S.null a) $
                                      Streams.yield $! decodeFunc a
                                  go (Just b)
        NoCodesAreComplete x -> go (Just x)
data ByteType = Regular
              | Continuation
              | Start !Int
between :: Word8 -> Word8 -> Word8 -> Bool
between x y z = x >= y && x <= z
characterizeByte :: Word8 -> ByteType
characterizeByte c | between c 0 0x7F    = Regular
                   | between c 0x80 0xBF = Continuation
                   | between c 0xC0 0xDF = Start 1
                   | between c 0xE0 0xEF = Start 2
                   
                   
                   | otherwise           = Start 3
data FindOutput = LastCodeIsComplete !ByteString
                | Split !ByteString !ByteString
                | NoCodesAreComplete !ByteString   
                                                   
findLastFullCode :: ByteString -> FindOutput
findLastFullCode b | len == 0  = LastCodeIsComplete b
                   | otherwise = go
  where
    len = S.length b
    go = let !idx = len  1
             !c   = S.unsafeIndex b idx
         in case characterizeByte c of
              Regular      -> LastCodeIsComplete b
              Continuation -> cont (len  2)
              _            -> Split (S.unsafeTake idx b) (S.unsafeDrop idx b)
    cont !idx | idx < 0 = NoCodesAreComplete b
              | otherwise =
                  let !c = S.unsafeIndex b idx
                  in case characterizeByte c of
                       
                       
                       Regular      -> LastCodeIsComplete b
                       Continuation -> cont (idx  1)
                       Start n      -> if n + idx == len  1
                                         then LastCodeIsComplete b
                                         else Split (S.unsafeTake idx b)
                                                    (S.unsafeDrop idx b)