{-# LANGUAGE BangPatterns     #-}
{-# LANGUAGE CPP              #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE PatternSynonyms  #-}
{-# LANGUAGE RankNTypes       #-}
{-# LANGUAGE RecordWildCards  #-}

-- |
-- Module      : Streamly.Data.Internal.Unicode.Stream
-- Copyright   : (c) 2018 Composewell Technologies
--               (c) Bjoern Hoehrmann 2008-2009
--
-- License     : BSD3
-- Maintainer  : streamly@composewell.com
-- Stability   : experimental
-- Portability : GHC
--

#include "inline.hs"

module Streamly.Internal.Data.Unicode.Stream
    (
    -- * Construction (Decoding)
      decodeLatin1
    , decodeUtf8
    , decodeUtf8Lax
    , DecodeError(..)
    , DecodeState
    , CodePoint
    , decodeUtf8Either
    , resumeDecodeUtf8Either
    , decodeUtf8Arrays
    , decodeUtf8ArraysLenient

    -- * Elimination (Encoding)
    , encodeLatin1
    , encodeLatin1Lax
    , encodeUtf8
    {-
    -- * Operations on character strings
    , strip -- (dropAround isSpace)
    , stripEnd
    -}

    -- * StreamD UTF8 Encoding / Decoding transformations.
    , decodeUtf8D
    , encodeUtf8D
    , decodeUtf8LenientD
    , decodeUtf8EitherD
    , resumeDecodeUtf8EitherD
    , decodeUtf8ArraysD
    , decodeUtf8ArraysLenientD

    -- * Transformation
    , stripStart
    , lines
    , words
    , unlines
    , unwords
    )
where

import Control.Monad.IO.Class (MonadIO, liftIO)
import Data.Bits (shiftR, shiftL, (.|.), (.&.))
import Data.Char (ord)
import Data.Word (Word8)
import Foreign.ForeignPtr (touchForeignPtr)
import Foreign.ForeignPtr.Unsafe (unsafeForeignPtrToPtr)
import Foreign.Storable (Storable(..))
import GHC.Base (assert, unsafeChr)
import GHC.ForeignPtr (ForeignPtr (..))
import GHC.IO.Encoding.Failure (isSurrogate)
import GHC.Ptr (Ptr (..), plusPtr)
import Prelude hiding (String, lines, words, unlines, unwords)
import System.IO.Unsafe (unsafePerformIO)

import Streamly (IsStream)
import Streamly.Data.Fold (Fold)
import Streamly.Memory.Array (Array)
import Streamly.Internal.Data.Unfold (Unfold)
import Streamly.Internal.Data.SVar (adaptState)
import Streamly.Internal.Data.Stream.StreamD (Stream(..), Step (..))
import Streamly.Internal.Data.Strict (Tuple'(..))

#if __GLASGOW_HASKELL__ < 800
import Streamly.Internal.Data.Stream.StreamD (pattern Stream)
#endif

import qualified Streamly.Internal.Memory.Array.Types as A
import qualified Streamly.Internal.Prelude as S
import qualified Streamly.Internal.Data.Stream.StreamD as D

-------------------------------------------------------------------------------
-- Encoding/Decoding Unicode (UTF-8) Characters
-------------------------------------------------------------------------------

-- UTF-8 primitives, Lifted from GHC.IO.Encoding.UTF8.

data WList = WCons !Word8 !WList | WNil

{-# INLINE ord2 #-}
ord2 :: Char -> WList
ord2 c = assert (n >= 0x80 && n <= 0x07ff) (WCons x1 (WCons x2 WNil))
  where
    n = ord c
    x1 = fromIntegral $ (n `shiftR` 6) + 0xC0
    x2 = fromIntegral $ (n .&. 0x3F) + 0x80

{-# INLINE ord3 #-}
ord3 :: Char -> WList
ord3 c = assert (n >= 0x0800 && n <= 0xffff) (WCons x1 (WCons x2 (WCons x3 WNil)))
  where
    n = ord c
    x1 = fromIntegral $ (n `shiftR` 12) + 0xE0
    x2 = fromIntegral $ ((n `shiftR` 6) .&. 0x3F) + 0x80
    x3 = fromIntegral $ (n .&. 0x3F) + 0x80

{-# INLINE ord4 #-}
ord4 :: Char -> WList
ord4 c = assert (n >= 0x10000)  (WCons x1 (WCons x2 (WCons x3 (WCons x4 WNil))))
  where
    n = ord c
    x1 = fromIntegral $ (n `shiftR` 18) + 0xF0
    x2 = fromIntegral $ ((n `shiftR` 12) .&. 0x3F) + 0x80
    x3 = fromIntegral $ ((n `shiftR` 6) .&. 0x3F) + 0x80
    x4 = fromIntegral $ (n .&. 0x3F) + 0x80

data CodingFailureMode
    = TransliterateCodingFailure
    | ErrorOnCodingFailure
    deriving (Show)

{-# INLINE replacementChar #-}
replacementChar :: Char
replacementChar = '\xFFFD'

-- Int helps in cheaper conversion from Int to Char
type CodePoint = Int
type DecodeState = Word8

-- See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.

-- XXX Use names decodeSuccess = 0, decodeFailure = 12

decodeTable :: [Word8]
decodeTable = [
   -- The first part of the table maps bytes to character classes that
   -- to reduce the size of the transition table and create bitmasks.
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
   8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,

   -- The second part is a transition table that maps a combination
   -- of a state of the automaton and a character class to a state.
   0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
  12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
  12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
  12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
  12,36,12,12,12,12,12,12,12,12,12,12
  ]

utf8d :: A.Array Word8
utf8d =
      unsafePerformIO
    -- Aligning to cacheline makes a barely noticeable difference
    -- XXX currently alignment is not implemented for unmanaged allocation
    $ D.runFold (A.writeNAlignedUnmanaged 64 (length decodeTable))
              (D.fromList decodeTable)

-- | Return element at the specified index without checking the bounds.
-- and without touching the foreign ptr.
{-# INLINE_NORMAL unsafePeekElemOff #-}
unsafePeekElemOff :: forall a. Storable a => Ptr a -> Int -> a
unsafePeekElemOff p i = let !x = A.unsafeInlineIO $ peekElemOff p i in x

-- decode is split into two separate cases to avoid branching instructions.
-- From the higher level flow we already know which case we are in so we can
-- call the appropriate decode function.
--
-- When the state is 0
{-# INLINE decode0 #-}
decode0 :: Ptr Word8 -> Word8 -> Tuple' DecodeState CodePoint
decode0 table byte =
    let !t = table `unsafePeekElemOff` fromIntegral byte
        !codep' = (0xff `shiftR` (fromIntegral t)) .&. fromIntegral byte
        !state' = table `unsafePeekElemOff` (256 + fromIntegral t)
     in assert ((byte > 0x7f || error showByte)
                && (state' /= 0 || error (showByte ++ showTable)))
               (Tuple' state' codep')

    where

    utf8table =
        let !(Ptr addr) = table
            end = table `plusPtr` 364
        in A.Array (ForeignPtr addr undefined) end end :: A.Array Word8
    showByte = "Streamly: decode0: byte: " ++ show byte
    showTable = " table: " ++ show utf8table

-- When the state is not 0
{-# INLINE decode1 #-}
decode1
    :: Ptr Word8
    -> DecodeState
    -> CodePoint
    -> Word8
    -> Tuple' DecodeState CodePoint
decode1 table state codep byte =
    -- Remember codep is Int type!
    -- Can it be unsafe to convert the resulting Int to Char?
    let !t = table `unsafePeekElemOff` fromIntegral byte
        !codep' = (fromIntegral byte .&. 0x3f) .|. (codep `shiftL` 6)
        !state' = table `unsafePeekElemOff`
                    (256 + fromIntegral state + fromIntegral t)
     in assert (codep' <= 0x10FFFF
                    || error (showByte ++ showState state codep))
               (Tuple' state' codep')
    where

    utf8table =
        let !(Ptr addr) = table
            end = table `plusPtr` 364
        in A.Array (ForeignPtr addr undefined) end end :: A.Array Word8
    showByte = "Streamly: decode1: byte: " ++ show byte
    showState st cp =
        " state: " ++ show st ++
        " codepoint: " ++ show cp ++
        " table: " ++ show utf8table

-- We can divide the errors in three general categories:
-- * A non-starter was encountered in a begin state
-- * A starter was encountered without completing a codepoint
-- * The last codepoint was not complete (input underflow)
--
data DecodeError = DecodeError !DecodeState !CodePoint deriving Show

data FreshPoint s a
    = FreshPointDecodeInit s
    | FreshPointDecodeInit1 s Word8
    | FreshPointDecodeFirst s Word8
    | FreshPointDecoding s !DecodeState !CodePoint
    | YieldAndContinue a (FreshPoint s a)
    | Done

-- XXX Add proper error messages
-- XXX Implement this in terms of decodeUtf8Either
{-# INLINE_NORMAL decodeUtf8WithD #-}
decodeUtf8WithD :: Monad m => CodingFailureMode -> Stream m Word8 -> Stream m Char
decodeUtf8WithD cfm (Stream step state) =
    let A.Array p _ _ = utf8d
        !ptr = (unsafeForeignPtrToPtr p)
    in Stream (step' ptr) (FreshPointDecodeInit state)
  where
    {-# INLINE transliterateOrError #-}
    transliterateOrError e s =
        case cfm of
            ErrorOnCodingFailure -> error e
            TransliterateCodingFailure -> YieldAndContinue replacementChar s
    {-# INLINE inputUnderflow #-}
    inputUnderflow =
        case cfm of
            ErrorOnCodingFailure ->
                error "Streamly.Internal.Data.Stream.StreamD.decodeUtf8With: Input Underflow"
            TransliterateCodingFailure -> YieldAndContinue replacementChar Done
    {-# INLINE_LATE step' #-}
    step' _ gst (FreshPointDecodeInit st) = do
        r <- step (adaptState gst) st
        return $ case r of
            Yield x s -> Skip (FreshPointDecodeInit1 s x)
            Skip s -> Skip (FreshPointDecodeInit s)
            Stop   -> Skip Done

    step' _ _ (FreshPointDecodeInit1 st x) = do
        -- Note: It is important to use a ">" instead of a "<=" test
        -- here for GHC to generate code layout for default branch
        -- prediction for the common case. This is fragile and might
        -- change with the compiler versions, we need a more reliable
        -- "likely" primitive to control branch predication.
        case x > 0x7f of
            False ->
                return $ Skip $ YieldAndContinue
                    (unsafeChr (fromIntegral x))
                    (FreshPointDecodeInit st)
            -- Using a separate state here generates a jump to a
            -- separate code block in the core which seems to perform
            -- slightly better for the non-ascii case.
            True -> return $ Skip $ FreshPointDecodeFirst st x

    -- XXX should we merge it with FreshPointDecodeInit1?
    step' table _ (FreshPointDecodeFirst st x) = do
        let (Tuple' sv cp) = decode0 table x
        return $
            case sv of
                12 ->
                    Skip $
                    transliterateOrError
                        "Streamly.Internal.Data.Stream.StreamD.decodeUtf8With: Invalid UTF8 codepoint encountered"
                        (FreshPointDecodeInit st)
                0 -> error "unreachable state"
                _ -> Skip (FreshPointDecoding st sv cp)

    -- We recover by trying the new byte x a starter of a new codepoint.
    -- XXX need to use the same recovery in array decoding routine as well
    step' table gst (FreshPointDecoding st statePtr codepointPtr) = do
        r <- step (adaptState gst) st
        case r of
            Yield x s -> do
                let (Tuple' sv cp) = decode1 table statePtr codepointPtr x
                return $
                    case sv of
                        0 -> Skip $ YieldAndContinue (unsafeChr cp)
                                        (FreshPointDecodeInit s)
                        12 ->
                            Skip $
                            transliterateOrError
                                "Streamly.Internal.Data.Stream.StreamD.decodeUtf8With: Invalid UTF8 codepoint encountered"
                                (FreshPointDecodeInit1 s x)
                        _ -> Skip (FreshPointDecoding s sv cp)
            Skip s -> return $ Skip (FreshPointDecoding s statePtr codepointPtr)
            Stop -> return $ Skip inputUnderflow

    step' _ _ (YieldAndContinue c s) = return $ Yield c s
    step' _ _ Done = return Stop

{-# INLINE decodeUtf8D #-}
decodeUtf8D :: Monad m => Stream m Word8 -> Stream m Char
decodeUtf8D = decodeUtf8WithD ErrorOnCodingFailure

{-# INLINE decodeUtf8LenientD #-}
decodeUtf8LenientD :: Monad m => Stream m Word8 -> Stream m Char
decodeUtf8LenientD = decodeUtf8WithD TransliterateCodingFailure

{-# INLINE_NORMAL resumeDecodeUtf8EitherD #-}
resumeDecodeUtf8EitherD
    :: Monad m
    => DecodeState
    -> CodePoint
    -> Stream m Word8
    -> Stream m (Either DecodeError Char)
resumeDecodeUtf8EitherD dst codep (Stream step state) =
    let A.Array p _ _ = utf8d
        !ptr = (unsafeForeignPtrToPtr p)
        stt =
            if dst == 0
            then FreshPointDecodeInit state
            else FreshPointDecoding state dst codep
    in Stream (step' ptr) stt
  where
    {-# INLINE_LATE step' #-}
    step' _ gst (FreshPointDecodeInit st) = do
        r <- step (adaptState gst) st
        return $ case r of
            Yield x s -> Skip (FreshPointDecodeInit1 s x)
            Skip s -> Skip (FreshPointDecodeInit s)
            Stop   -> Skip Done

    step' _ _ (FreshPointDecodeInit1 st x) = do
        -- Note: It is important to use a ">" instead of a "<=" test
        -- here for GHC to generate code layout for default branch
        -- prediction for the common case. This is fragile and might
        -- change with the compiler versions, we need a more reliable
        -- "likely" primitive to control branch predication.
        case x > 0x7f of
            False ->
                return $ Skip $ YieldAndContinue
                    (Right $ unsafeChr (fromIntegral x))
                    (FreshPointDecodeInit st)
            -- Using a separate state here generates a jump to a
            -- separate code block in the core which seems to perform
            -- slightly better for the non-ascii case.
            True -> return $ Skip $ FreshPointDecodeFirst st x

    -- XXX should we merge it with FreshPointDecodeInit1?
    step' table _ (FreshPointDecodeFirst st x) = do
        let (Tuple' sv cp) = decode0 table x
        return $
            case sv of
                12 ->
                    Skip $ YieldAndContinue (Left $ DecodeError 0 (fromIntegral x))
                                            (FreshPointDecodeInit st)
                0 -> error "unreachable state"
                _ -> Skip (FreshPointDecoding st sv cp)

    -- We recover by trying the new byte x a starter of a new codepoint.
    -- XXX need to use the same recovery in array decoding routine as well
    step' table gst (FreshPointDecoding st statePtr codepointPtr) = do
        r <- step (adaptState gst) st
        case r of
            Yield x s -> do
                let (Tuple' sv cp) = decode1 table statePtr codepointPtr x
                return $
                    case sv of
                        0 -> Skip $ YieldAndContinue (Right $ unsafeChr cp)
                                        (FreshPointDecodeInit s)
                        12 ->
                            Skip $ YieldAndContinue (Left $ DecodeError statePtr codepointPtr)
                                        (FreshPointDecodeInit1 s x)
                        _ -> Skip (FreshPointDecoding s sv cp)
            Skip s -> return $ Skip (FreshPointDecoding s statePtr codepointPtr)
            Stop -> return $ Skip $ YieldAndContinue (Left $ DecodeError statePtr codepointPtr) Done

    step' _ _ (YieldAndContinue c s) = return $ Yield c s
    step' _ _ Done = return Stop

{-# INLINE_NORMAL decodeUtf8EitherD #-}
decodeUtf8EitherD :: Monad m
    => Stream m Word8 -> Stream m (Either DecodeError Char)
decodeUtf8EitherD = resumeDecodeUtf8EitherD 0 0

data FlattenState s a
    = OuterLoop s !(Maybe (DecodeState, CodePoint))
    | InnerLoopDecodeInit s (ForeignPtr a) !(Ptr a) !(Ptr a)
    | InnerLoopDecodeFirst s (ForeignPtr a) !(Ptr a) !(Ptr a) Word8
    | InnerLoopDecoding s (ForeignPtr a) !(Ptr a) !(Ptr a)
        !DecodeState !CodePoint
    | YAndC !Char (FlattenState s a) -- These constructors can be
                                     -- encoded in the FreshPoint
                                     -- type, I prefer to keep these
                                     -- flat even though that means
                                     -- coming up with new names
    | D

-- The normal decodeUtf8 above should fuse with flattenArrays
-- to create this exact code but it doesn't for some reason, as of now this
-- remains the fastest way I could figure out to decodeUtf8.
--
-- XXX Add Proper error messages
{-# INLINE_NORMAL decodeUtf8ArraysWithD #-}
decodeUtf8ArraysWithD ::
       MonadIO m
    => CodingFailureMode
    -> Stream m (A.Array Word8)
    -> Stream m Char
decodeUtf8ArraysWithD cfm (Stream step state) =
    let A.Array p _ _ = utf8d
        !ptr = (unsafeForeignPtrToPtr p)
    in Stream (step' ptr) (OuterLoop state Nothing)
  where
    {-# INLINE transliterateOrError #-}
    transliterateOrError e s =
        case cfm of
            ErrorOnCodingFailure -> error e
            TransliterateCodingFailure -> YAndC replacementChar s
    {-# INLINE inputUnderflow #-}
    inputUnderflow =
        case cfm of
            ErrorOnCodingFailure ->
                error
                    "Streamly.Internal.Data.Stream.StreamD.decodeUtf8ArraysWith: Input Underflow"
            TransliterateCodingFailure -> YAndC replacementChar D
    {-# INLINE_LATE step' #-}
    step' _ gst (OuterLoop st Nothing) = do
        r <- step (adaptState gst) st
        return $
            case r of
                Yield A.Array {..} s ->
                    let p = unsafeForeignPtrToPtr aStart
                     in Skip (InnerLoopDecodeInit s aStart p aEnd)
                Skip s -> Skip (OuterLoop s Nothing)
                Stop -> Skip D
    step' _ gst (OuterLoop st dst@(Just (ds, cp))) = do
        r <- step (adaptState gst) st
        return $
            case r of
                Yield A.Array {..} s ->
                    let p = unsafeForeignPtrToPtr aStart
                     in Skip (InnerLoopDecoding s aStart p aEnd ds cp)
                Skip s -> Skip (OuterLoop s dst)
                Stop -> Skip inputUnderflow
    step' _ _ (InnerLoopDecodeInit st startf p end)
        | p == end = do
            liftIO $ touchForeignPtr startf
            return $ Skip $ OuterLoop st Nothing
    step' _ _ (InnerLoopDecodeInit st startf p end) = do
        x <- liftIO $ peek p
        -- Note: It is important to use a ">" instead of a "<=" test here for
        -- GHC to generate code layout for default branch prediction for the
        -- common case. This is fragile and might change with the compiler
        -- versions, we need a more reliable "likely" primitive to control
        -- branch predication.
        case x > 0x7f of
            False ->
                return $ Skip $ YAndC
                    (unsafeChr (fromIntegral x))
                    (InnerLoopDecodeInit st startf (p `plusPtr` 1) end)
            -- Using a separate state here generates a jump to a separate code
            -- block in the core which seems to perform slightly better for the
            -- non-ascii case.
            True -> return $ Skip $ InnerLoopDecodeFirst st startf p end x

    step' table _ (InnerLoopDecodeFirst st startf p end x) = do
        let (Tuple' sv cp) = decode0 table x
        return $
            case sv of
                12 ->
                    Skip $
                    transliterateOrError
                        "Streamly.Internal.Data.Stream.StreamD.decodeUtf8ArraysWith: Invalid UTF8 codepoint encountered"
                        (InnerLoopDecodeInit st startf (p `plusPtr` 1) end)
                0 -> error "unreachable state"
                _ -> Skip (InnerLoopDecoding st startf (p `plusPtr` 1) end sv cp)
    step' _ _ (InnerLoopDecoding st startf p end sv cp)
        | p == end = do
            liftIO $ touchForeignPtr startf
            return $ Skip $ OuterLoop st (Just (sv, cp))
    step' table _ (InnerLoopDecoding st startf p end statePtr codepointPtr) = do
        x <- liftIO $ peek p
        let (Tuple' sv cp) = decode1 table statePtr codepointPtr x
        return $
            case sv of
                0 ->
                    Skip $
                    YAndC
                        (unsafeChr cp)
                        (InnerLoopDecodeInit st startf (p `plusPtr` 1) end)
                12 ->
                    Skip $
                    transliterateOrError
                        "Streamly.Internal.Data.Stream.StreamD.decodeUtf8ArraysWith: Invalid UTF8 codepoint encountered"
                        (InnerLoopDecodeInit st startf (p `plusPtr` 1) end)
                _ -> Skip (InnerLoopDecoding st startf (p `plusPtr` 1) end sv cp)
    step' _ _ (YAndC c s) = return $ Yield c s
    step' _ _ D = return Stop

{-# INLINE decodeUtf8ArraysD #-}
decodeUtf8ArraysD ::
       MonadIO m
    => Stream m (A.Array Word8)
    -> Stream m Char
decodeUtf8ArraysD = decodeUtf8ArraysWithD ErrorOnCodingFailure

{-# INLINE decodeUtf8ArraysLenientD #-}
decodeUtf8ArraysLenientD ::
       MonadIO m
    => Stream m (A.Array Word8)
    -> Stream m Char
decodeUtf8ArraysLenientD = decodeUtf8ArraysWithD TransliterateCodingFailure

data EncodeState s = EncodeState s !WList

-- More yield points improve performance, but I am not sure if they can cause
-- too much code bloat or some trouble with fusion. So keeping only two yield
-- points for now, one for the ascii chars (fast path) and one for all other
-- paths (slow path).
{-# INLINE_NORMAL encodeUtf8D #-}
encodeUtf8D :: Monad m => Stream m Char -> Stream m Word8
encodeUtf8D (Stream step state) = Stream step' (EncodeState state WNil)
  where
    {-# INLINE_LATE step' #-}
    step' gst (EncodeState st WNil) = do
        r <- step (adaptState gst) st
        return $
            case r of
                Yield c s ->
                    case ord c of
                        x
                            | x <= 0x7F ->
                                Yield (fromIntegral x) (EncodeState s WNil)
                            | x <= 0x7FF -> Skip (EncodeState s (ord2 c))
                            | x <= 0xFFFF ->
                                if isSurrogate c
                                    then error
                                             "Streamly.Internal.Data.Stream.StreamD.encodeUtf8: Encountered a surrogate"
                                    else Skip (EncodeState s (ord3 c))
                            | otherwise -> Skip (EncodeState s (ord4 c))
                Skip s -> Skip (EncodeState s WNil)
                Stop -> Stop
    step' _ (EncodeState s (WCons x xs)) = return $ Yield x (EncodeState s xs)


-- | Decode a stream of bytes to Unicode characters by mapping each byte to a
-- corresponding Unicode 'Char' in 0-255 range.
--
-- /Since: 0.7.0/
{-# INLINE decodeLatin1 #-}
decodeLatin1 :: (IsStream t, Monad m) => t m Word8 -> t m Char
decodeLatin1 = S.map (unsafeChr . fromIntegral)

-- | Encode a stream of Unicode characters to bytes by mapping each character
-- to a byte in 0-255 range. Throws an error if the input stream contains
-- characters beyond 255.
--
-- /Since: 0.7.0/
{-# INLINE encodeLatin1 #-}
encodeLatin1 :: (IsStream t, Monad m) => t m Char -> t m Word8
encodeLatin1 = S.map convert
    where
    convert c =
        let codepoint = ord c
        in if codepoint > 255
           then error $ "Streamly.String.encodeLatin1 invalid " ++
                      "input char codepoint " ++ show codepoint
           else fromIntegral codepoint

-- | Like 'encodeLatin1' but silently truncates and maps input characters beyond
-- 255 to (incorrect) chars in 0-255 range. No error or exception is thrown
-- when such truncation occurs.
--
-- /Since: 0.7.0/
{-# INLINE encodeLatin1Lax #-}
encodeLatin1Lax :: (IsStream t, Monad m) => t m Char -> t m Word8
encodeLatin1Lax = S.map (fromIntegral . ord)

-- | Decode a UTF-8 encoded bytestream to a stream of Unicode characters.
-- The incoming stream is truncated if an invalid codepoint is encountered.
--
-- /Since: 0.7.0/
{-# INLINE decodeUtf8 #-}
decodeUtf8 :: (Monad m, IsStream t) => t m Word8 -> t m Char
decodeUtf8 = D.fromStreamD . decodeUtf8D . D.toStreamD

-- |
--
-- /Internal/
{-# INLINE decodeUtf8Arrays #-}
decodeUtf8Arrays :: (MonadIO m, IsStream t) => t m (Array Word8) -> t m Char
decodeUtf8Arrays = D.fromStreamD . decodeUtf8ArraysD . D.toStreamD

-- | Decode a UTF-8 encoded bytestream to a stream of Unicode characters.
-- Any invalid codepoint encountered is replaced with the unicode replacement
-- character.
--
-- /Since: 0.7.0/
{-# INLINE decodeUtf8Lax #-}
decodeUtf8Lax :: (Monad m, IsStream t) => t m Word8 -> t m Char
decodeUtf8Lax = D.fromStreamD . decodeUtf8LenientD . D.toStreamD

-- |
--
-- /Internal/
{-# INLINE decodeUtf8Either #-}
decodeUtf8Either :: (Monad m, IsStream t)
    => t m Word8 -> t m (Either DecodeError Char)
decodeUtf8Either = D.fromStreamD . decodeUtf8EitherD . D.toStreamD

-- |
--
-- /Internal/
{-# INLINE resumeDecodeUtf8Either #-}
resumeDecodeUtf8Either
    :: (Monad m, IsStream t)
    => DecodeState
    -> CodePoint
    -> t m Word8
    -> t m (Either DecodeError Char)
resumeDecodeUtf8Either st cp =
    D.fromStreamD . resumeDecodeUtf8EitherD st cp . D.toStreamD

-- |
--
-- /Internal/
{-# INLINE decodeUtf8ArraysLenient #-}
decodeUtf8ArraysLenient ::
       (MonadIO m, IsStream t) => t m (Array Word8) -> t m Char
decodeUtf8ArraysLenient =
    D.fromStreamD . decodeUtf8ArraysLenientD . D.toStreamD

-- | Encode a stream of Unicode characters to a UTF-8 encoded bytestream.
--
-- /Since: 0.7.0/
{-# INLINE encodeUtf8 #-}
encodeUtf8 :: (Monad m, IsStream t) => t m Char -> t m Word8
encodeUtf8 = D.fromStreamD . encodeUtf8D . D.toStreamD

{-
-------------------------------------------------------------------------------
-- Utility operations on strings
-------------------------------------------------------------------------------

strip :: IsStream t => t m Char -> t m Char
strip = undefined

stripEnd :: IsStream t => t m Char -> t m Char
stripEnd = undefined
-}

-- | Remove leading whitespace from a string.
--
-- > stripStart = S.dropWhile isSpace
--
-- /Internal/
{-# INLINE stripStart #-}
stripStart :: (Monad m, IsStream t) => t m Char -> t m Char
stripStart = S.dropWhile isSpace

-- | Fold each line of the stream using the supplied 'Fold'
-- and stream the result.
--
-- >>> S.toList $ lines FL.toList (S.fromList "lines\nthis\nstring\n\n\n")
-- ["lines", "this", "string", "", ""]
--
-- > lines = S.splitOnSuffix (== '\n')
--
-- /Internal/
{-# INLINE lines #-}
lines :: (Monad m, IsStream t) => Fold m Char b -> t m Char -> t m b
lines = S.splitOnSuffix (== '\n')

foreign import ccall unsafe "u_iswspace"
  iswspace :: Int -> Int

-- | Code copied from base/Data.Char to INLINE it
{-# INLINE isSpace #-}
isSpace :: Char -> Bool
isSpace c
  | uc <= 0x377 = uc == 32 || uc - 0x9 <= 4 || uc == 0xa0
  | otherwise = iswspace (ord c) /= 0
  where
    uc = fromIntegral (ord c) :: Word

-- | Fold each word of the stream using the supplied 'Fold'
-- and stream the result.
--
-- >>>  S.toList $ words FL.toList (S.fromList "fold these     words")
-- ["fold", "these", "words"]
--
-- > words = S.wordsBy isSpace
--
-- /Internal/
{-# INLINE words #-}
words :: (Monad m, IsStream t) => Fold m Char b -> t m Char -> t m b
words = S.wordsBy isSpace

-- | Unfold a stream to character streams using the supplied 'Unfold'
-- and concat the results suffixing a newline character @\\n@ to each stream.
--
-- /Internal/
{-# INLINE unlines #-}
unlines :: (MonadIO m, IsStream t) => Unfold m a Char -> t m a -> t m Char
unlines = S.interposeSuffix '\n'

-- | Unfold the elements of a stream to character streams using the supplied
-- 'Unfold' and concat the results with a whitespace character infixed between
-- the streams.
--
-- /Internal/
{-# INLINE unwords #-}
unwords :: (MonadIO m, IsStream t) => Unfold m a Char -> t m a -> t m Char
unwords = S.interpose ' '