{-# LANGUAGE MagicHash, UnboxedTuples #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE UnliftedFFITypes #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE CPP #-}
module Std.Data.Text.Base (
  
    Text(..)
  
  , validate
  , validateMaybe
  , replicate
  , cycleN
  , indexMaybe, charByteIndex, indexMaybeR, charByteIndexR
  
  , empty, singleton, copy
  
  , pack, packN, packR, packRN
  , unpack, unpackR
  
  , fromVector
  , toVector
  
  , null
  , length
  , append
  , map', imap'
  , foldl', ifoldl'
  , foldr', ifoldr'
  , concat, concatMap
    
  , count, all, any
    
  , NormalizationResult(..), NormalizeMode(..)
  , isNormalized, isNormalizedTo, normalize, normalizeTo
    
    
  , Locale, localeDefault, localeLithuanian, localeTurkishAndAzeriLatin
  , caseFold, caseFoldWith, toLower, toLowerWith, toUpper, toUpperWith, toTitle, toTitleWith
    
  , isCategory, spanCategory
  , Category
  , categoryLetterUppercase
  , categoryLetterLowercase
  , categoryLetterTitlecase
  , categoryLetterOther
  , categoryLetter
  , categoryCaseMapped
  , categoryMarkNonSpacing
  , categoryMarkSpacing
  , categoryMarkEnclosing
  , categoryMark
  , categoryNumberDecimal
  , categoryNumberLetter
  , categoryNumberOther
  , categoryNumber
  , categoryPunctuationConnector
  , categoryPunctuationDash
  , categoryPunctuationOpen
  , categoryPunctuationClose
  , categoryPunctuationInitial
  , categoryPunctuationFinal
  , categoryPunctuationOther
  , categoryPunctuation
  , categorySymbolMath
  , categorySymbolCurrency
  , categorySymbolModifier
  , categorySymbolOther
  , categorySymbol
  , categorySeparatorSpace
  , categorySeparatorLine
  , categorySeparatorParagraph
  , categorySeparator
  , categoryControl
  , categoryFormat
  , categorySurrogate
  , categoryPrivateUse
  , categoryUnassigned
  , categoryCompatibility
  , categoryIgnoreGraphemeCluste
  , categoryIscntrl
  , categoryIsprint
  , categoryIsspace
  , categoryIsblank
  , categoryIsgraph
  , categoryIspunct
  , categoryIsalnum
  , categoryIsalpha
  , categoryIsupper
  , categoryIslower
  , categoryIsdigit
  , categoryIsxdigit
  
  , c_utf8_validate_ba
  , c_utf8_validate_addr
 ) where
import           Control.DeepSeq
import           Control.Monad.ST
import           Control.Monad
import           Data.Bits
import           Data.Char          hiding (toLower, toUpper, toTitle)
import           Data.Foldable            (foldlM)
import           Data.Hashable            (Hashable(..))
import qualified Data.List                as List
import           Data.Primitive.PrimArray
import           Data.Typeable
import           Data.String              (IsString(..))
import           Data.Word
import           Foreign.C.Types          (CSize(..))
import           GHC.Exts                 (build)
import           GHC.Ptr
import           GHC.Types
import           GHC.Stack
import           GHC.CString              (unpackCString#)
import           Std.Data.Array
import           Std.Data.Text.UTF8Codec
import           Std.Data.Text.UTF8Rewind
import           Std.Data.Vector.Base     (Bytes, PrimVector(..), c_strlen)
import qualified Std.Data.Vector.Base     as V
import qualified Std.Data.Vector.Extra    as V
import qualified Std.Data.Vector.Search   as V
import           Std.Foreign.PrimArray
import           System.IO.Unsafe (unsafeDupablePerformIO)
import           Prelude                       hiding (concat, concatMap,
                                                elem, notElem, null, length, map,
                                                foldl, foldl1, foldr, foldr1,
                                                maximum, minimum, product, sum,
                                                all, any, replicate, traverse)
newtype Text = Text
    { getUTF8Bytes :: Bytes 
    }
instance Eq Text where
    Text b1 == Text b2 = b1 == b2
    {-# INLINE (==) #-}
instance Ord Text where
    Text b1 `compare` Text b2 = b1 `compare` b2 
    {-# INLINE compare #-}
instance Show Text where
    showsPrec p t = showsPrec p (unpack t)
instance Read Text where
    readsPrec p str = [ (pack x, y) | (x, y) <- readsPrec p str ]
instance NFData Text where
    rnf (Text bs) = rnf bs
instance Hashable Text where
    {-# INLINE hashWithSalt #-}
    hashWithSalt salt (Text bs) = hashWithSalt salt bs
instance IsString Text where
    {-# INLINE fromString #-}
    fromString = pack
packStringAddr :: Addr# -> Text
{-# INLINABLE packStringAddr #-}
packStringAddr addr# = validateAndCopy addr#
  where
    len = fromIntegral . unsafeDupablePerformIO $ c_strlen addr#
    valid = unsafeDupablePerformIO $ c_utf8_validate_addr addr# len
    validateAndCopy addr#
        | valid == 0 = pack (unpackCString# addr#)
        | otherwise  = runST $ do
            marr <- newPrimArray len
            copyPtrToMutablePrimArray marr 0 (Ptr addr#) len
            arr <- unsafeFreezePrimArray marr
            return $ Text (PrimVector arr 0 len)
indexMaybe :: Text -> Int -> Maybe Char
{-# INLINABLE indexMaybe #-}
indexMaybe (Text (V.PrimVector ba s l)) n
    | n < 0 = Nothing
    | otherwise = go s 0
  where
    !end = s + l
    go !i !j
        | i >= end = Nothing
        | j >= n = let !c = decodeChar_ ba i in Just c
        | otherwise =
            let l = decodeCharLen ba i in go (i+l) (j+1)
charByteIndex :: Text -> Int -> Int
{-# INLINABLE charByteIndex #-}
charByteIndex (Text (V.PrimVector ba s l)) n
    | n < 0 = s
    | otherwise = go s 0
  where
    !end = s + l
    go !i !j
        | i >= end = i
        | j >= n = i
        | otherwise =
            let l = decodeCharLen ba i in go (i+l) (j+1)
indexMaybeR :: Text -> Int -> Maybe Char
{-# INLINABLE indexMaybeR #-}
indexMaybeR (Text (V.PrimVector ba s l)) n
    | n < 0 = Nothing
    | otherwise = go (s+l-1) 0
  where
    go !i !j
        | i < s = Nothing
        | j >= n = let !c = decodeCharReverse_ ba i in Just c
        | otherwise =
            let l = decodeCharLenReverse ba i in go (i-l) (j+1)
charByteIndexR :: Text -> Int -> Int
{-# INLINABLE charByteIndexR #-}
charByteIndexR (Text (V.PrimVector ba s l)) n
    | n < 0 = s+l
    | otherwise = go (s+l-1) 0
  where
    go !i !j
        | i < s = i
        | j >= n = i
        | otherwise =
            let l = decodeCharLenReverse ba i in go (i-l) (j+1)
validate :: HasCallStack => Bytes -> Text
{-# INLINE validate #-}
validate bs@(V.PrimVector (PrimArray ba#) (I# s#) l@(I# l#))
    | l == 0 = Text bs
    | c_utf8_validate_ba ba# s# l# > 0 = Text bs
    | otherwise = error "invalid UTF8 bytes"
validateMaybe :: Bytes -> Maybe Text
{-# INLINE validateMaybe #-}
validateMaybe bs@(V.PrimVector (PrimArray ba#) (I# s#) l@(I# l#))
    | l == 0 = Just (Text bs)
    | c_utf8_validate_ba ba# s# l# > 0 = Just (Text bs)
    | otherwise = Nothing
foreign import ccall unsafe "text.h utf8_validate"
    c_utf8_validate_ba :: BA# Word8 -> Int# -> Int# -> Int
foreign import ccall unsafe "text.h utf8_validate_addr"
    c_utf8_validate_addr :: Addr# -> Int -> IO Int
pack :: String -> Text
pack = packN V.defaultInitSize
{-# INLINE CONLIKE [1] pack #-}
{-# RULES
    "pack/packStringAddr" forall addr . pack (unpackCString# addr) = packStringAddr addr
  #-}
packN :: Int -> String -> Text
{-# INLINE packN #-}
packN n0 = \ ws0 ->
    Text (V.create' (max 4 n0) (\ marr -> foldlM go (V.IPair 0 marr) ws0))
  where
    
    
    go :: V.IPair (MutablePrimArray s Word8) -> Char -> ST s (V.IPair (MutablePrimArray s Word8))
    go (V.IPair i marr) !c = do
        siz <- getSizeofMutablePrimArray marr
        if i < siz - 3  
        then do
            i' <- encodeChar marr i c
            return (V.IPair i' marr)
        else do
            let !siz' = siz `shiftL` 1
            !marr' <- resizeMutablePrimArray marr siz'
            i' <- encodeChar marr' i c
            return (V.IPair i' marr')
packR :: String -> Text
{-# INLINE packR #-}
packR = packRN V.defaultInitSize
packRN :: Int -> String -> Text
{-# INLINE packRN #-}
packRN n0 = \ ws0 -> runST (do let n = max 4 n0
                               marr <- newArr n
                               (V.IPair i marr') <- foldM go (V.IPair n marr) ws0
                               ba <- unsafeFreezeArr marr'
                               return $! Text (V.fromArr ba i (sizeofArr ba-i))
                           )
  where
    go :: V.IPair (MutablePrimArray s Word8) -> Char -> ST s (V.IPair (MutablePrimArray s Word8))
    go (V.IPair i marr) !c = do
        n <- sizeofMutableArr marr
        let l = encodeCharLength c
        if i >= l
        then do encodeChar marr (i-l) c
                return (V.IPair (i-l) marr)
        else do let !n' = n `shiftL` 1  
                !marr' <- newArr n'
                copyMutableArr marr' (n+i) marr i (n-i)
                let i' = n+i-l
                encodeChar marr' i' c
                return (V.IPair i' marr')
unpack :: Text -> String
{-# INLINE [1] unpack #-}
unpack (Text (V.PrimVector ba s l)) = go s
  where
    !end = s + l
    go !idx
        | idx >= end = []
        | otherwise = let (# c, i #) = decodeChar ba idx in c : go (idx + i)
unpackFB :: Text -> (Char -> a -> a) -> a -> a
{-# INLINE [0] unpackFB #-}
unpackFB (Text (V.PrimVector ba s l)) k z = go s
  where
    !end = s + l
    go !idx
        | idx >= end = z
        | otherwise = let (# c, i #) = decodeChar ba idx in c `k` go (idx + i)
{-# RULES
"unpack" [~1] forall t . unpack t = build (\ k z -> unpackFB t k z)
"unpackFB" [1] forall t . unpackFB t (:) [] = unpack t
 #-}
unpackR :: Text -> String
{-# INLINE [1] unpackR #-}
unpackR (Text (V.PrimVector ba s l)) = go (s+l-1)
  where
    go !idx
        | idx < s = []
        | otherwise = let (# c, i #) = decodeCharReverse ba idx in c : go (idx - i)
unpackRFB :: Text -> (Char -> a -> a) -> a -> a
{-# INLINE [0] unpackRFB #-}
unpackRFB (Text (V.PrimVector ba s l)) k z = go (s+l-1)
  where
    go !idx
        | idx < s = z
        | otherwise = let (# c, i #) = decodeCharReverse ba idx in c `k` go (idx - i)
{-# RULES
"unpackR" [~1] forall t . unpackR t = build (\ k z -> unpackRFB t k z)
"unpackRFB" [1] forall t . unpackRFB t (:) [] = unpackR t
 #-}
singleton :: Char -> Text
{-# INLINABLE singleton #-}
singleton c = Text $ V.createN 4 $ \ marr -> encodeChar marr 0 c
empty :: Text
{-# INLINABLE empty #-}
empty = Text V.empty
copy :: Text -> Text
{-# INLINE copy #-}
copy (Text bs) = Text (V.copy bs)
append :: Text -> Text -> Text
append ta tb = Text ( getUTF8Bytes ta `V.append` getUTF8Bytes tb )
{-# INLINE append #-}
null :: Text -> Bool
{-# INLINABLE null #-}
null (Text bs) = V.null bs
length :: Text -> Int
{-# INLINABLE length #-}
length (Text (V.PrimVector ba s l)) = go s 0
  where
    !end = s + l
    go !i !acc | i >= end = acc
               | otherwise = let j = decodeCharLen ba i in go (i+j) (1+acc)
map' :: (Char -> Char) -> Text -> Text
{-# INLINE map' #-}
map' f (Text (V.PrimVector arr s l)) | l == 0 = empty
                                     | otherwise = Text (V.create' (l+3) (go s 0))
  where
    end = s + l
    
    
    
    go :: Int -> Int -> MutablePrimArray s Word8 -> ST s (V.IPair (MutablePrimArray s Word8))
    go !i !j !marr
        | i >= end = return (V.IPair j marr)
        | otherwise = do
            let (# c, d #) = decodeChar arr i
            j' <- encodeChar marr j (f c)
            let !i' = i + d
            siz <- sizeofMutableArr marr
            if  j' < siz - 3
            then go i' j' marr
            else do
                let !siz' = siz `shiftL` 1
                !marr' <- resizeMutablePrimArray marr siz'
                go i' j' marr'
imap' :: (Int -> Char -> Char) -> Text -> Text
{-# INLINE imap' #-}
imap' f (Text (V.PrimVector arr s l)) | l == 0 = empty
                                      | otherwise = Text (V.create' (l+3) (go s 0 0))
  where
    end = s + l
    go :: Int -> Int -> Int -> MutablePrimArray s Word8 -> ST s (V.IPair (MutablePrimArray s Word8))
    go !i !j !k !marr
        | i >= end = return (V.IPair j marr)
        | otherwise = do
            let (# c, d #) = decodeChar arr i
            j' <- encodeChar marr j (f k c)
            let !i' = i + d
                !k' = k + 1
            siz <- sizeofMutableArr marr
            if  j' < siz - 3
            then go i' j' k' marr
            else do
                let !siz' = siz `shiftL` 1
                !marr' <- resizeMutablePrimArray marr siz'
                go i' j' k' marr'
foldl' :: (b -> Char -> b) -> b -> Text -> b
{-# INLINE foldl' #-}
foldl' f z (Text (V.PrimVector arr s l)) = go z s
  where
    !end = s + l
    
    go !acc !i | i < end  = case decodeChar arr i of
                                (# x, d #) -> go (f acc x) (i + d)
               | otherwise = acc
ifoldl' :: (b -> Int ->  Char -> b) -> b -> Text -> b
{-# INLINE ifoldl' #-}
ifoldl' f z (Text (V.PrimVector arr s l)) = go z s 0
  where
    !end = s + l
    go !acc !i !k | i < end  = case decodeChar arr i of
                                    (# x, d #) -> go (f acc k x) (i + d) (k + 1)
                  | otherwise = acc
foldr' :: (Char -> b -> b) -> b -> Text -> b
{-# INLINE foldr' #-}
foldr' f z (Text (V.PrimVector arr s l)) = go z (s+l-1)
  where
    
    go !acc !i | i >= s    = case decodeCharReverse arr i of
                                (# x, d #) -> go (f x acc) (i - d)
               | otherwise = acc
ifoldr' :: (Int -> Char -> b -> b) -> b -> Text -> b
{-# INLINE ifoldr' #-}
ifoldr' f z (Text (V.PrimVector arr s l)) = go z (s+l-1) 0
  where
    go !acc !i !k | i >= s    = case decodeCharReverse arr i of
                                    (# x, d #) -> go (f k x acc) (i - d) (k + 1)
                  | otherwise = acc
concat :: [Text] -> Text
concat = Text . V.concat . coerce
{-# INLINE concat #-}
concatMap :: (Char -> Text) -> Text -> Text
{-# INLINE concatMap #-}
concatMap f = concat . foldr' ((:) . f) []
count :: Char -> Text -> Int
{-# INLINE count #-}
count c (Text v)
    | encodeCharLength c == 1 = let w = V.c2w c in V.count w v
    | otherwise = let (Text pat) = singleton c
                  in List.length $ V.indices pat v False
any :: (Char -> Bool) -> Text -> Bool
{-# INLINE any #-}
any f (Text (V.PrimVector arr s l))
    | l <= 0    = False
    | otherwise = case decodeChar arr s of
                    (# x0, d #) -> go (f x0) (s+d)
  where
    !end = s+l
    go !acc !i | acc       = True
               | i >= end  = acc
               | otherwise = case decodeChar arr i of
                                (# x, d #) -> go (acc || f x) (i+d)
all :: (Char -> Bool) -> Text -> Bool
{-# INLINE all #-}
all f (Text (V.PrimVector arr s l))
    | l <= 0    = True
    | otherwise = case decodeChar arr s of
                    (# x0, d #) -> go (f x0) (s+d)
  where
    !end = s+l
    go !acc !i | not acc   = False
               | i >= end  = acc
               | otherwise = case decodeChar arr i of
                                (# x, d #) -> go (acc && f x) (i+d)
replicate :: Int -> Char -> Text
{-# INLINE replicate #-}
replicate 0 _ = empty
replicate n c = Text (V.create siz (go 0))
  where
    !csiz = encodeCharLength c
    !siz = n * csiz
    go :: Int -> MutablePrimArray s Word8 -> ST s ()
    go 0 marr = encodeChar marr 0 c >> go csiz marr
    go i marr | i >= siz = return ()
              | otherwise = do copyChar' csiz marr i marr (i-csiz)
                               go (i+csiz) marr
cycleN :: Int -> Text -> Text
{-# INLINE cycleN #-}
cycleN 0 _ = empty
cycleN n (Text v) = Text (V.cycleN n v)
fromVector :: V.PrimVector Char -> Text
{-# INLINE fromVector #-}
fromVector (V.PrimVector arr s l) = Text (V.createN l (go s 0))
  where
    end = s+l
    go !i !j !marr
        | i >= l = return j
        | otherwise = do
            let c = indexPrimArray arr i
            j' <- encodeChar marr j c
            go (i+1) j' marr
toVector :: Text -> V.PrimVector Char
{-# INLINE toVector #-}
toVector (Text (V.PrimVector arr s l)) = V.createN (l*4) (go s 0)
  where
    end = s+l
    go !i !j !marr
        | i >= l = return j
        | otherwise = do
            let (# c, n #) = decodeChar arr i
            writePrimArray marr j c
            go (i+n) (j+1) marr
isNormalized :: Text -> NormalizationResult
{-# INLINE isNormalized #-}
isNormalized = isNormalizedTo NFC
isNormalizedTo :: NormalizeMode -> Text -> NormalizationResult
isNormalizedTo nmode (Text (V.PrimVector (PrimArray arr#) (I# s#) l@(I# l#)))
    | l == 0 = NormalizedYes
    | otherwise =
        let nflag = normalizeModeToFlag nmode
        in toNormalizationResult (utf8_isnormalized arr# s# l# nflag)
normalize :: Text -> Text
{-# INLINE normalize #-}
normalize = normalizeTo NFC
normalizeTo :: NormalizeMode -> Text -> Text
normalizeTo nmode (Text (V.PrimVector (PrimArray arr#) (I# s#) l@(I# l#)))
    | l == 0 = empty
    | otherwise = unsafeDupablePerformIO $ do
        let nflag = normalizeModeToFlag nmode
            l'@(I# l'#) = utf8_normalize_length arr# s# l# nflag
        when (l' < 0) (error "impossible happened!")
        pa@(MutablePrimArray marr#) <- newArr l'
        utf8_normalize arr# s# l# marr# l'# nflag
        arr' <- unsafeFreezeArr pa
        let !v = V.fromArr arr' 0 l'
        return (Text v)
foreign import ccall unsafe utf8_isnormalized ::
    BA# Word8 -> Int# -> Int# -> CSize -> Int
foreign import ccall unsafe utf8_normalize ::
    BA# Word8 -> Int# -> Int# -> MBA# Word8 -> Int# -> CSize -> IO ()
foreign import ccall unsafe utf8_normalize_length ::
    BA# Word8 -> Int# -> Int# -> CSize -> Int
caseFold :: Text -> Text
caseFold = caseFoldWith localeDefault
caseFoldWith :: Locale -> Text -> Text
caseFoldWith locale (Text (V.PrimVector (PrimArray arr#) (I# s#) l@(I# l#)))
    | l == 0 = empty
    | otherwise = unsafeDupablePerformIO $ do
        let l'@(I# l'#) = utf8_casefold_length arr# s# l# locale
        when (l' < 0) (error "impossible happened!")
        pa@(MutablePrimArray marr#) <- newArr l'
        utf8_casefold arr# s# l# marr# l'# locale
        arr' <- unsafeFreezeArr pa
        let !v = V.fromArr arr' 0 l'
        return (Text v)
toLower :: Text -> Text
toLower = toLowerWith localeDefault
toLowerWith :: Locale -> Text -> Text
toLowerWith locale (Text (V.PrimVector (PrimArray arr#) (I# s#) l@(I# l#)))
    | l == 0 = empty
    | otherwise = unsafeDupablePerformIO $ do
        let l'@(I# l'#) = utf8_tolower_length arr# s# l# locale
        when (l' < 0) (error "impossible happened!")
        pa@(MutablePrimArray marr#) <- newArr l'
        utf8_tolower arr# s# l# marr# l'# locale
        arr' <- unsafeFreezeArr pa
        let !v = V.fromArr arr' 0 l'
        return (Text v)
toUpper :: Text -> Text
toUpper = toUpperWith localeDefault
toUpperWith :: Locale -> Text -> Text
toUpperWith locale (Text (V.PrimVector (PrimArray arr#) (I# s#) l@(I# l#)))
    | l == 0 = empty
    | otherwise = unsafeDupablePerformIO $ do
        let l'@(I# l'#) = utf8_toupper_length arr# s# l# locale
        when (l' < 0) (error "impossible happened!")
        pa@(MutablePrimArray marr#) <- newArr l'
        utf8_toupper arr# s# l# marr# l'# locale
        arr' <- unsafeFreezeArr pa
        let !v = V.fromArr arr' 0 l'
        return (Text v)
toTitle :: Text -> Text
toTitle = toTitleWith localeDefault
toTitleWith :: Locale -> Text -> Text
toTitleWith locale (Text (V.PrimVector (PrimArray arr#) (I# s#) l@(I# l#)))
    | l == 0 = empty
    | otherwise = unsafeDupablePerformIO $ do
        let l'@(I# l'#) = utf8_totitle_length arr# s# l# locale
        when (l' < 0) (error "impossible happened!")
        pa@(MutablePrimArray marr#) <- newArr l'
        utf8_totitle arr# s# l# marr# l'# locale
        arr' <- unsafeFreezeArr pa
        let !v = V.fromArr arr' 0 l'
        return (Text v)
foreign import ccall unsafe utf8_casefold ::
    BA# Word8 -> Int# -> Int# -> MBA# Word8 -> Int# -> Locale -> IO ()
foreign import ccall unsafe utf8_casefold_length ::
    BA# Word8 -> Int# -> Int# -> Locale -> Int
foreign import ccall unsafe utf8_tolower ::
    BA# Word8 -> Int# -> Int# -> MBA# Word8 -> Int# -> Locale -> IO ()
foreign import ccall unsafe utf8_tolower_length ::
    BA# Word8 -> Int# -> Int# -> Locale -> Int
foreign import ccall unsafe utf8_toupper ::
    BA# Word8 -> Int# -> Int# -> MBA# Word8 -> Int# -> Locale -> IO ()
foreign import ccall unsafe utf8_toupper_length ::
    BA# Word8 -> Int# -> Int# -> Locale -> Int
foreign import ccall unsafe utf8_totitle ::
    BA# Word8 -> Int# -> Int# -> MBA# Word8 -> Int# -> Locale -> IO ()
foreign import ccall unsafe utf8_totitle_length ::
    BA# Word8 -> Int# -> Int# -> Locale -> Int
isCategory :: Category -> Text -> Bool
isCategory c (Text (V.PrimVector arr@(PrimArray arr#) s@(I# s#) l@(I# l#)))
    | l == 0 = True
    | otherwise = utf8_iscategory arr# s# l# c == l
spanCategory :: Category -> Text -> (Text, Text)
spanCategory c (Text (V.PrimVector arr@(PrimArray arr#) s@(I# s#) l@(I# l#)))
    | l == 0 = (empty, empty)
    | otherwise =
        let i = utf8_iscategory arr# s# l# c
        in (Text (V.PrimVector arr s i), Text (V.PrimVector arr (s+i) (l-i)))
foreign import ccall utf8_iscategory :: BA# Word8 -> Int# -> Int# -> Category -> Int