{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE CPP #-}

module Data.Validity.Text where

import Control.Exception (evaluate, try)

import Data.Validity

import qualified Data.ByteString.Builder as SBB
import qualified Data.ByteString.Lazy as LB
import qualified Data.Text as ST
import qualified Data.Text.Array as A
import qualified Data.Text.Encoding as E
import qualified Data.Text.Encoding.Error as E
import qualified Data.Text.Internal as ST
import qualified Data.Text.Internal.Lazy as LT
import qualified Data.Text.Unsafe as U

-- | A text is valid if the internal structure is consistent.
instance Validity ST.Text where
    validate t@(ST.Text arr off len) =
        mconcat
            [ check (len >= 0) "The length is positive."
            , check (off >= 0) "The offset is positive."
            , check
                  (let c = A.unsafeIndex arr off
                    in len == 0 || c < 0xDC00 || c > 0xDFFF)
                  "The offset character is valid UTF16."
                 -- It contains a valid UTF16
            , check
                  ((== (Right t :: Either E.UnicodeException ST.Text)) $
                   U.unsafeDupablePerformIO .
                   try .
                   evaluate .
                   E.decodeUtf16LEWith E.strictDecode .
                   LB.toStrict .
                   SBB.toLazyByteString . mconcat . map SBB.word16LE $
                   A.toList arr off len)
                  "The bytes can correctly be decoded as UTF16."
            ]

-- | A lazy text value is valid if all the internal chunks are valid and nonempty
instance Validity LT.Text where
    validate LT.Empty = valid
    validate (LT.Chunk st lt) =
        mconcat
            [ delve "The strict chunk" st
            , declare "The strict chunk is not empty" $ not $ ST.null st
            , delve "The lazy chunk" lt
            ]