{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ScopedTypeVariables #-}

-- |
-- @since 0.2.2.0
module Data.TypedEncoding.Conv.Text.Encoding where

import qualified Data.ByteString as B
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE

import           Data.TypedEncoding.Instances.Support
import qualified Data.TypedEncoding.Common.Util.TypeLits as Knds
import           Data.TypedEncoding.Instances.Restriction.UTF8 ()
import           Data.TypedEncoding.Instances.Restriction.ASCII ()
import           Data.TypedEncoding.Unsafe (withUnsafe)


-- $setup
-- >>> :set -XScopedTypeVariables -XOverloadedStrings -XDataKinds -XFlexibleContexts -XTypeApplications
-- >>> import Test.QuickCheck
-- >>> import Test.QuickCheck.Instances.Text()
-- >>> import Test.QuickCheck.Instances.ByteString()
-- >>> import Data.TypedEncoding.Instances.Restriction.BoundedAlphaNums()
-- >>> import qualified Data.ByteString.Char8 as B8
-- >>> import Data.Char
-- >>> import Data.Either
-- >>> import Data.TypedEncoding
-- >>> import Data.TypedEncoding.Conv.Text
-- >>> let emptyUTF8B = unsafeSetPayload () "" :: Enc '["r-UTF8"] () B.ByteString  
-- >>> :{
-- instance Arbitrary (Enc '["r-UTF8"] () B.ByteString) where 
--      arbitrary =  fmap (fromRight emptyUTF8B) 
--                   . flip suchThat isRight 
--                   . fmap (encodeFAll @'["r-UTF8"] @(Either EncodeEx) @(). toEncoding ()) $ arbitrary 
-- instance Arbitrary (Enc '["r-UTF8"] () T.Text) where 
--      arbitrary =  fmap (unsafeSetPayload ()) 
--                         arbitrary 
-- instance Arbitrary (Enc '["r-ASCII"] () B.ByteString) where 
--      arbitrary =  fmap (unsafeSetPayload ()) 
--                   . flip suchThat (B8.all isAscii) 
--                        $ arbitrary 
-- instance Arbitrary (Enc '["r-ASCII"] () T.Text) where 
--      arbitrary =  fmap (unsafeSetPayload ()) 
--                   . flip suchThat (T.all isAscii) 
--                        $ arbitrary 
-- :}


-- |
-- With given constraints 'decodeUtf8' and 'encodeUtf8' can be used on subsets of @"r-UTF8"@
--
-- Note: For example, the @ByteString@ encoding of @"\xd800"@  (@11101101 10100000 10000000@ @ed a0 80@) is considered invalid /UTF8/ by the 'T.Text' library
-- To be consistent we make the same assumption of also restricting representable Unicode chars as in /Unicode.D76/.
--
-- >>> TE.decodeUtf8 "\237\160\128"
-- "*** Exception: Cannot decode byte '\xed': Data.Text.Internal.Encoding.decodeUtf8: Invalid UTF-8 stream
-- 
-- The "\xdfff" case (@11101101 10111111 10111111@ @ed bf bf@):
-- >>> TE.decodeUtf8 "\237\191\191"
-- "*** Exception: Cannot decode byte '\xed': Data.Text.Internal.Encoding.decodeUtf8: Invalid UTF-8 stream
--
-- >>> displ . decodeUtf8 $ (unsafeSetPayload () "Hello" :: Enc '["r-ASCII"] () B.ByteString)
-- "Enc '[r-ASCII] () (Text Hello)"
--
-- "r-UTF8" is redundant:
--
-- >>> displ . utf8Demote . decodeUtf8 $ (unsafeSetPayload () "Hello" :: Enc '["r-UTF8"] () B.ByteString)
-- "Enc '[] () (Text Hello)"
--
-- @decodeUtf8@ and @encodeUtf8@ now form isomorphism
-- 
-- prop> \x -> getPayload x == (getPayload . encodeUtf8 . decodeUtf8 @ '["r-UTF8"] @() $ x)
--
-- prop> \x -> getPayload x == (getPayload . decodeUtf8 . encodeUtf8 @ '["r-UTF8"] @() $ x)
--
-- These nicely work as iso's for "r-ASCII" subset
--
-- prop> \x -> getPayload x == (getPayload . encodeUtf8 . decodeUtf8 @ '["r-ASCII"] @() $ x)
-- prop> \x -> getPayload x == (getPayload . decodeUtf8 . encodeUtf8 @ '["r-ASCII"] @() $ x)
--
-- Similarly to 'Data.TypedEncoding.Conv.ByteString.Char8.pack' this function makes unverified assumption
-- that the encoding stack @xs@ does invalidate UTF8 byte layout.  This is safe for any "r-" encoding as well
-- as any of the "enc-" and "do-" encodings that can be currently found in this library. 
-- Future versions of this method are likely to introduce constraints that guarantee better type safety.
--
--
-- This is technically unsafe (even if we ignore the use of @unsafeSetPayload@) of decodeUtf8 
-- since currently @"r-ban:999"@ does not have @ByteString@ instances and that violates the assumption of matching encoding/decoding stacks
-- on both sides.  
-- >>> displ . decodeUtf8 $ (unsafeSetPayload () "123" :: Enc '["r-ban:999"] () B.ByteString)
-- "Enc '[r-ban:999] () (Text 123)"
--
-- See "Data.TypedEncoding.Conv" for more detailed discussion.
--
-- Note: implementation uses the partial 'TE.decodeUtf8' function but provides type level guarantee that it this function
-- will not error out unless unsafe combinators were used in constructing the encoded input
--
-- @since 0.4.0.0
decodeUtf8 :: forall xs c t y ys encs. (
          Knds.UnSnoc xs ~ '(,) ys y
         , Superset "r-UTF8" y
         , encs ~ RemoveRs ys
         , AllEncodeInto "r-UTF8" encs
        ) => Enc xs c B.ByteString -> Enc xs c T.Text
decodeUtf8 = withUnsafe (fmap TE.decodeUtf8)

-- | simplified version of @decodeUtf8@ that works on single /r-/ encodings
-- @since 0.5.2.0
decodeUtf8_1 :: (
         Superset "r-UTF8" y
         ) => Enc '[y] c  B.ByteString -> Enc '[y] c T.Text
decodeUtf8_1 = decodeUtf8

-- |
-- >>> displ $ encodeUtf8 $ utf8Promote $ toEncoding () ("text" :: T.Text)
-- "Enc '[r-UTF8] () (ByteString text)"
--
-- See 'decodeUtf8'.  Similar type safety concerns apply.
--
-- See "Data.TypedEncoding.Conv" for more detailed discussion.
--
-- @since 0.4.0.0
encodeUtf8 :: forall xs c t y ys encs.   (
          Knds.UnSnoc xs ~ '(,) ys y
         , Superset "r-UTF8" y
         , encs ~ RemoveRs ys
         , AllEncodeInto "r-UTF8" encs
        ) => Enc xs c T.Text -> Enc xs c B.ByteString
encodeUtf8 = withUnsafe (fmap TE.encodeUtf8)

-- | simplified version of @decodeUtf8@ that works on single /r-/ encodings
-- @since 0.5.2.0
encodeUtf8_1 :: (
         Superset "r-UTF8" y
         ) => Enc '[y] c  T.Text -> Enc '[y] c B.ByteString
encodeUtf8_1 = encodeUtf8