{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE CPP #-}

module Data.Validity.Text where

import Control.Exception (evaluate, try)

import Data.Validity

import qualified Data.ByteString.Builder as SBB
import qualified Data.ByteString.Lazy as LB
import Data.Text ()
import qualified Data.Text.Array as A
import qualified Data.Text.Encoding as E
import qualified Data.Text.Encoding.Error as E
import Data.Text.Internal (Text(..))
import qualified Data.Text.Unsafe as U
#if !MIN_VERSION_base(4,8,0)
import Data.Monoid (mconcat)
#endif
-- | A text is valid if the internal structure is consistent.
instance Validity Text where
    isValid t@(Text arr off len) =
        and
            [ len >= 0
            , off >= 0
            , let c = A.unsafeIndex arr off
              in len == 0 || c < 0xDC00 || c > 0xDFFF
                 -- It contains a valid UTF16
            , (== (Right t :: Either E.UnicodeException Text)) $
              U.unsafeDupablePerformIO .
              try .
              evaluate .
              E.decodeUtf16LEWith E.strictDecode .
              LB.toStrict . SBB.toLazyByteString . mconcat . map SBB.word16LE $
              A.toList arr off len
            ]
    validate t@(Text arr off len) =
        mconcat
            [ len >= 0 <?!> "The length is positive."
            , off >= 0 <?!> "The offset is positive."
            , (let c = A.unsafeIndex arr off
               in len == 0 || c < 0xDC00 || c > 0xDFFF) <?!>
              "The offset character is valid UTF16."
                 -- It contains a valid UTF16
            , ((== (Right t :: Either E.UnicodeException Text)) $
               U.unsafeDupablePerformIO .
               try .
               evaluate .
               E.decodeUtf16LEWith E.strictDecode .
               LB.toStrict . SBB.toLazyByteString . mconcat . map SBB.word16LE $
               A.toList arr off len) <?!>
              "The bytes can correctly be decoded as UTF16."
            ]