{-# LANGUAGE CPP #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}

module Data.Validity.Text where

import Control.Exception (evaluate, try)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Builder as SBB
import qualified Data.ByteString.Lazy as LB
import Data.Text (Text)
import qualified Data.Text as ST
import qualified Data.Text.Array as A
import qualified Data.Text.Encoding as E
import qualified Data.Text.Encoding.Error as E
import qualified Data.Text.Internal as ST
import qualified Data.Text.Internal.Lazy as LT
import qualified Data.Text.Unsafe as U
import Data.Validity
import Data.Word

-- | A text is valid if the internal structure is consistent.
instance Validity ST.Text where
  validate :: Text -> Validation
validate t :: Text
t@(ST.Text Array
arr Int
off Int
len) =
    [Validation] -> Validation
forall a. Monoid a => [a] -> a
mconcat
      [ String -> Bool -> Validation
declare String
"The length is positive." (Int
len Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0),
        String -> Bool -> Validation
declare String
"The offset is positive." (Int
off Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0),
        String -> Bool -> Validation
declare String
"The offset char is valid" (Bool -> Validation) -> Bool -> Validation
forall a b. (a -> b) -> a -> b
$
          let c :: Word16
c = Array -> Int -> Word16
A.unsafeIndex Array
arr Int
off
           in Int
len Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 Bool -> Bool -> Bool
|| Word16 -> Bool
offsetCharCheck Word16
c,
        String -> Bool -> Validation
declare String
"The array contains bytes in the right encoding" (Bool -> Validation) -> Bool -> Validation
forall a b. (a -> b) -> a -> b
$
          (Either UnicodeException Text
-> Either UnicodeException Text -> Bool
forall a. Eq a => a -> a -> Bool
== (Text -> Either UnicodeException Text
forall a b. b -> Either a b
Right Text
t :: Either E.UnicodeException ST.Text))
            (Either UnicodeException Text -> Bool)
-> ([Word16] -> Either UnicodeException Text) -> [Word16] -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. IO (Either UnicodeException Text) -> Either UnicodeException Text
forall a. IO a -> a
U.unsafeDupablePerformIO
            (IO (Either UnicodeException Text) -> Either UnicodeException Text)
-> ([Word16] -> IO (Either UnicodeException Text))
-> [Word16]
-> Either UnicodeException Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. IO Text -> IO (Either UnicodeException Text)
forall e a. Exception e => IO a -> IO (Either e a)
try
            (IO Text -> IO (Either UnicodeException Text))
-> ([Word16] -> IO Text)
-> [Word16]
-> IO (Either UnicodeException Text)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> IO Text
forall a. a -> IO a
evaluate
            (Text -> IO Text) -> ([Word16] -> Text) -> [Word16] -> IO Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
validityDecoding
            (ByteString -> Text)
-> ([Word16] -> ByteString) -> [Word16] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
LB.toStrict
            (ByteString -> ByteString)
-> ([Word16] -> ByteString) -> [Word16] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> ByteString
SBB.toLazyByteString
            (Builder -> ByteString)
-> ([Word16] -> Builder) -> [Word16] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Builder] -> Builder
forall a. Monoid a => [a] -> a
mconcat
            ([Builder] -> Builder)
-> ([Word16] -> [Builder]) -> [Word16] -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word16 -> Builder) -> [Word16] -> [Builder]
forall a b. (a -> b) -> [a] -> [b]
map Word16 -> Builder
validityWording
            ([Word16] -> Bool) -> [Word16] -> Bool
forall a b. (a -> b) -> a -> b
$ Array -> Int -> Int -> [Word16]
A.toList Array
arr Int
off Int
len
      ]
    where

#if MIN_VERSION_text(2,0,0)
      offsetCharCheck :: Word8 -> Bool
      offsetCharCheck c =  c < 0x80 || c >= 0xC0 -- Valid UTF8
#else
      offsetCharCheck :: Word16 -> Bool
      offsetCharCheck :: Word16 -> Bool
offsetCharCheck Word16
c = Word16
c Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
< Word16
0xDC00 Bool -> Bool -> Bool
|| Word16
c Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
> Word16
0xDFFF -- Valid UTF16
#endif

#if MIN_VERSION_text(2,0,0)
      validityDecoding :: ByteString -> Text
      validityDecoding = E.decodeUtf8With E.strictDecode
#else
      validityDecoding :: ByteString -> Text
      validityDecoding :: ByteString -> Text
validityDecoding = OnDecodeError -> ByteString -> Text
E.decodeUtf16LEWith OnDecodeError
E.strictDecode
#endif

#if MIN_VERSION_text(2,0,0)
      validityWording :: Word8 -> SBB.Builder
      validityWording = SBB.word8
#else
      validityWording :: Word16 -> SBB.Builder
      validityWording :: Word16 -> Builder
validityWording = Word16 -> Builder
SBB.word16LE
#endif

-- | A lazy text value is valid if all the internal chunks are valid and nonempty
instance Validity LT.Text where
  validate :: Text -> Validation
validate Text
LT.Empty = Validation
valid
  validate (LT.Chunk Text
st Text
lt) =
    [Validation] -> Validation
forall a. Monoid a => [a] -> a
mconcat
      [ String -> Text -> Validation
forall a. Validity a => String -> a -> Validation
delve String
"The strict chunk" Text
st,
        String -> Bool -> Validation
declare String
"The strict chunk is not empty" (Bool -> Validation) -> Bool -> Validation
forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ Text -> Bool
ST.null Text
st,
        String -> Text -> Validation
forall a. Validity a => String -> a -> Validation
delve String
"The lazy chunk" Text
lt
      ]

validateTextSingleLine :: ST.Text -> Validation
validateTextSingleLine :: Text -> Validation
validateTextSingleLine = String -> Validation
validateStringSingleLine (String -> Validation) -> (Text -> String) -> Text -> Validation
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
ST.unpack