module Data.Text.Short.Internal
    ( 
      ShortText(..)
      
    , null
    , length
    , isAscii
    , splitAt
    , splitAtEnd
    , indexEndMaybe
    , indexMaybe
    , isPrefixOf
    , stripPrefix
    , isSuffixOf
    , stripSuffix
    , cons
    , snoc
    , uncons
    , unsnoc
    , findIndex
    , find
    , all
    , span
    , spanEnd
    , intersperse
    , intercalate
    , reverse
    , replicate
    , filter
    , dropAround
    , foldl
    , foldl'
    , foldr
    , foldl1
    , foldl1'
    , foldr1
      
      
    , singleton
      
    , Data.Text.Short.Internal.fromString
    , toString
      
    , fromText
    , toText
      
    , fromShortByteString
    , fromShortByteStringUnsafe
    , toShortByteString
    , fromByteString
    , fromByteStringUnsafe
    , toByteString
    , toBuilder
      
      
    , BS.ByteString
    , T.Text
    , module Prelude
      
    , isValidUtf8
    ) where
import           Control.DeepSeq                (NFData)
import           Control.Monad.ST               (stToIO)
import           Data.Binary
import           Data.Bits
import qualified Data.ByteString                as BS
import qualified Data.ByteString.Builder        as BB
import           Data.ByteString.Short          (ShortByteString)
import qualified Data.ByteString.Short          as BSS
import qualified Data.ByteString.Short.Internal as BSSI
import           Data.Char                      (ord)
import           Data.Hashable                  (Hashable)
import qualified Data.List                      as List
import           Data.Maybe                     (fromMaybe, isNothing)
import           Data.Semigroup
import qualified Data.String                    as S
import qualified Data.Text                      as T
import qualified Data.Text.Encoding             as T
import           Foreign.C
import           GHC.Base                       (assert, unsafeChr)
import qualified GHC.CString                    as GHC
import           GHC.Exts                       (Addr#, ByteArray#, Int (I#),
                                                 Int#, MutableByteArray#,
                                                 Ptr (..), RealWorld, Word (W#))
import qualified GHC.Exts
import qualified GHC.Foreign                    as GHC
import           GHC.IO.Encoding
import           GHC.ST
import           Prelude                        hiding (all, any, break, concat,
                                                 drop, dropWhile, filter, foldl,
                                                 foldl1, foldr, foldr1, head,
                                                 init, last, length, null,
                                                 replicate, reverse, span,
                                                 splitAt, tail, take, takeWhile)
import           System.IO.Unsafe
import           Text.Printf                    (PrintfArg, formatArg,
                                                 formatString)
import qualified PrimOps
newtype ShortText = ShortText ShortByteString
                  deriving (Monoid,Data.Semigroup.Semigroup,Hashable,NFData)
instance Eq ShortText where
  
  (==) x y
    | lx /= ly  = False
    | lx ==  0  = True
    | otherwise = case PrimOps.compareByteArrays# (toByteArray# x) 0# (toByteArray# y) 0# n# of
                    0# -> True
                    _  -> False
    where
      !lx@(I# n#) = toLength x
      !ly = toLength y
instance Ord ShortText where
  compare t1 t2
    | n == 0  = compare n1 n2
    | otherwise = case PrimOps.compareByteArrays# ba1# 0# ba2# 0# n# of
        r# | I# r# < 0 -> LT
           | I# r# > 0 -> GT
           | n1 < n2   -> LT
           | n1 > n2   -> GT
           | otherwise -> EQ
    where
      ba1# = toByteArray# t1
      ba2# = toByteArray# t2
      !n1 = toLength t1
      !n2 = toLength t2
      !n@(I# n#) = n1 `min` n2
instance Show ShortText where
    showsPrec p (ShortText b) = showsPrec p (decodeStringShort' utf8 b)
    show (ShortText b)        = show        (decodeStringShort' utf8 b)
instance Read ShortText where
    readsPrec p = map (\(x,s) -> (ShortText $ encodeStringShort utf8 x,s)) . readsPrec p
instance PrintfArg ShortText where
  formatArg txt = formatString $ toString txt
#if MIN_VERSION_binary(0,8,1)
instance Binary ShortText where
    put = put . toShortByteString
    get = do
        sbs <- get
        case fromShortByteString sbs of
          Nothing -> fail "Binary.get(ShortText): Invalid UTF-8 stream"
          Just st -> return st
#else
instance Binary ShortText where
    put = put . toByteString
    get = do
        bs <- get
        case fromByteString bs of
          Nothing -> fail "Binary.get(ShortText): Invalid UTF-8 stream"
          Just st -> return st
#endif
null :: ShortText -> Bool
null = BSS.null . toShortByteString
length :: ShortText -> Int
length st = fromIntegral $ unsafeDupablePerformIO (c_text_short_length (toByteArray# st) (toCSize st))
foreign import ccall unsafe "hs_text_short_length" c_text_short_length :: ByteArray# -> CSize -> IO CSize
isAscii :: ShortText -> Bool
isAscii st = (/= 0) $ unsafeDupablePerformIO (c_text_short_is_ascii (toByteArray# st) sz)
  where
    sz = toCSize st
foreign import ccall unsafe "hs_text_short_is_ascii" c_text_short_is_ascii :: ByteArray# -> CSize -> IO CInt
all :: (Char -> Bool) -> ShortText -> Bool
all p st = isNothing (findOfs (not . p) st (B 0))
find :: (Char -> Bool) -> ShortText -> Maybe Char
find p st = go 0
  where
    go !ofs
      | ofs >= sz  = Nothing
      | otherwise  = let (c,ofs') = decodeCharAtOfs st ofs
                     in c `seq` ofs' `seq`
                        if p c
                        then Just c
                        else go ofs'
    !sz = toB st
findIndex :: (Char -> Bool) -> ShortText -> Maybe Int
findIndex p st = go 0 0
  where
    go !ofs !i
      | ofs >= sz  = Nothing
      | otherwise  = let (c,ofs') = decodeCharAtOfs st ofs
                     in c `seq` ofs' `seq`
                        if p c
                        then Just i
                        else go ofs' (i+1)
    !sz = toB st
findOfs :: (Char -> Bool) -> ShortText -> B -> Maybe B
findOfs p st = go
  where
    go :: B -> Maybe B
    go !ofs | ofs >= sz  = Nothing
    go !ofs | p c       = Just ofs
            | otherwise = go ofs'
      where
        (c,ofs') = decodeCharAtOfs st ofs
    !sz = toB st
findOfsRev :: (Char -> Bool) -> ShortText -> B -> Maybe B
findOfsRev p st = go
  where
    go (B 0) = Nothing
    go !ofs
      | p (cp2ch cp) = Just ofs
      | otherwise    = go (ofscpLen cp)
      where
        !cp = readCodePointRev st ofs
span :: (Char -> Bool) -> ShortText -> (ShortText,ShortText)
span p st
  | Just ofs <- findOfs (not . p) st (B 0) = splitAtOfs ofs st
  | otherwise = (st,mempty)
spanEnd :: (Char -> Bool) -> ShortText -> (ShortText,ShortText)
spanEnd p st
  | Just ofs <- findOfsRev (not . p) st (toB st) = splitAtOfs ofs st
  | otherwise = (mempty,st)
toCSize :: ShortText -> CSize
toCSize = fromIntegral . BSS.length . toShortByteString
toB :: ShortText -> B
toB = fromIntegral . BSS.length . toShortByteString
toLength :: ShortText -> Int
toLength st = I# (toLength# st)
toLength# :: ShortText -> Int#
toLength# st = GHC.Exts.sizeofByteArray# (toByteArray# st)
toByteArray# :: ShortText -> ByteArray#
toByteArray# (ShortText (BSSI.SBS ba#)) = ba#
toShortByteString :: ShortText -> ShortByteString
toShortByteString (ShortText b) = b
toByteString :: ShortText -> BS.ByteString
toByteString = BSS.fromShort . toShortByteString
toBuilder :: ShortText -> BB.Builder
toBuilder = BB.shortByteString . toShortByteString
toString :: ShortText -> String
toString st = go 0
  where
    go !ofs
      | ofs >= sz  = []
      | otherwise  = let (c,ofs') = decodeCharAtOfs st ofs
                     in c `seq` ofs' `seq` (c : go ofs')
    !sz = toB st
foldl :: (a -> Char -> a) -> a -> ShortText -> a
foldl f z st = go 0 z
  where
    go !ofs acc
      | ofs >= sz  = acc
      | otherwise  = let (c,ofs') = decodeCharAtOfs st ofs
                     in c `seq` ofs' `seq` go ofs' (f acc c)
    !sz = toB st
foldl1 :: (Char -> Char -> Char) -> ShortText -> Char
foldl1 f st
  | sz == 0    = error "foldl1: empty ShortText"
  | otherwise  = go c0sz c0
  where
    go !ofs acc
      | ofs >= sz  = acc
      | otherwise  = let (c,ofs') = decodeCharAtOfs st ofs
                     in c `seq` ofs' `seq` go ofs' (f acc c)
    !sz = toB st
    (c0,c0sz) = decodeCharAtOfs st (B 0)
foldl' :: (a -> Char -> a) -> a -> ShortText -> a
foldl' f !z st = go 0 z
  where
    go !ofs !acc
      | ofs >= sz  = acc
      | otherwise  = let (c,ofs') = decodeCharAtOfs st ofs
                     in c `seq` ofs' `seq` go ofs' (f acc c)
    !sz = toB st
foldl1' :: (Char -> Char -> Char) -> ShortText -> Char
foldl1' f st
  | sz == 0    = error "foldl1: empty ShortText"
  | otherwise  = go c0sz c0
  where
    go !ofs !acc
      | ofs >= sz  = acc
      | otherwise  = let (c,ofs') = decodeCharAtOfs st ofs
                     in c `seq` ofs' `seq` go ofs' (f acc c)
    !sz = toB st
    (c0,c0sz) = decodeCharAtOfs st (B 0)
foldr :: (Char -> a -> a) -> a -> ShortText -> a
foldr f z st = go 0
  where
    go !ofs
      | ofs >= sz  = z
      | otherwise  = let (c,ofs') = decodeCharAtOfs st ofs
                     in c `seq` ofs' `seq` f c (go ofs')
    !sz = toB st
foldr1 :: (Char -> Char -> Char) -> ShortText -> Char
foldr1 f st
  | sz == 0    = error "foldr1: empty ShortText"
  | otherwise  = go 0
  where
    go !ofs = let (c,ofs') = decodeCharAtOfs st ofs
              in c `seq` ofs' `seq`
                 (if ofs' >= sz then c else f c (go ofs'))
    !sz = toB st
toText :: ShortText -> T.Text
toText = T.decodeUtf8 . toByteString
fromString :: String -> ShortText
fromString []  = mempty
fromString [c] = singleton c
fromString s = ShortText . encodeStringShort utf8 . map r $ s
  where
    r c | isSurr (ord c) = '\xFFFD'
        | otherwise      = c
fromText :: T.Text -> ShortText
fromText = fromByteStringUnsafe . T.encodeUtf8
fromShortByteString :: ShortByteString -> Maybe ShortText
fromShortByteString sbs
  | isValidUtf8 st  = Just st
  | otherwise       = Nothing
  where
    st = ShortText sbs
fromShortByteStringUnsafe :: ShortByteString -> ShortText
fromShortByteStringUnsafe = ShortText
fromByteString :: BS.ByteString -> Maybe ShortText
fromByteString = fromShortByteString . BSS.toShort
fromByteStringUnsafe :: BS.ByteString -> ShortText
fromByteStringUnsafe = ShortText . BSS.toShort
encodeString :: TextEncoding -> String -> BS.ByteString
encodeString te str = unsafePerformIO $ GHC.withCStringLen te str BS.packCStringLen
decodeString' :: TextEncoding -> BS.ByteString -> String
decodeString' te bs = unsafePerformIO $ BS.useAsCStringLen bs (GHC.peekCStringLen te)
decodeStringShort' :: TextEncoding -> ShortByteString -> String
decodeStringShort' te = decodeString' te . BSS.fromShort
encodeStringShort :: TextEncoding -> String -> BSS.ShortByteString
encodeStringShort te = BSS.toShort . encodeString te
isValidUtf8 :: ShortText -> Bool
isValidUtf8 st = (==0) $ unsafeDupablePerformIO (c_text_short_is_valid_utf8 (toByteArray# st) (toCSize st))
type CCodePoint = Word
foreign import ccall unsafe "hs_text_short_is_valid_utf8" c_text_short_is_valid_utf8 :: ByteArray# -> CSize -> IO CInt
foreign import ccall unsafe "hs_text_short_index_cp" c_text_short_index :: ByteArray# -> CSize -> CSize -> IO CCodePoint
indexMaybe :: ShortText -> Int -> Maybe Char
indexMaybe st i
  | i < 0      = Nothing
  | otherwise  = cp2chSafe cp
  where
    cp = CP $ unsafeDupablePerformIO (c_text_short_index (toByteArray# st) (toCSize st) (fromIntegral i))
indexEndMaybe :: ShortText -> Int -> Maybe Char
indexEndMaybe st i
  | i < 0      = Nothing
  | otherwise  = cp2chSafe cp
  where
    cp = CP $ unsafeDupablePerformIO (c_text_short_index_rev (toByteArray# st) (toCSize st) (fromIntegral i))
foreign import ccall unsafe "hs_text_short_index_cp_rev" c_text_short_index_rev :: ByteArray# -> CSize -> CSize -> IO CCodePoint
splitAt :: Int -> ShortText -> (ShortText,ShortText)
splitAt i st
  | i <= 0    = (mempty,st)
  | otherwise = splitAtOfs ofs st
  where
    ofs   = csizeToB $
            unsafeDupablePerformIO (c_text_short_index_ofs (toByteArray# st) stsz (fromIntegral i))
    stsz  = toCSize st
splitAtEnd :: Int -> ShortText -> (ShortText,ShortText)
splitAtEnd i st
  | i <= 0      = (st,mempty)
  | ofs >= stsz = (mempty,st)
  | otherwise   = splitAtOfs ofs st
  where
    ofs   = csizeToB $
            unsafeDupablePerformIO (c_text_short_index_ofs_rev (toByteArray# st) (toCSize st) (fromIntegral (i1)))
    stsz  = toB st
splitAtOfs :: B -> ShortText -> (ShortText,ShortText)
splitAtOfs ofs st
  | ofs  == 0    = (mempty,st)
  | ofs  >  stsz = (st,mempty)
  | otherwise    = (slice st 0 ofs, slice st ofs (stszofs))
  where
    !stsz  = toB st
foreign import ccall unsafe "hs_text_short_index_ofs" c_text_short_index_ofs :: ByteArray# -> CSize -> CSize -> IO CSize
foreign import ccall unsafe "hs_text_short_index_ofs_rev" c_text_short_index_ofs_rev :: ByteArray# -> CSize -> CSize -> IO CSize
uncons :: ShortText -> Maybe (Char,ShortText)
uncons st
  | null st    = Nothing
  | len2 == 0  = Just (c0, mempty)
  | otherwise  = Just (c0, slice st ofs len2)
  where
    c0  = cp2ch cp0
    cp0 = readCodePoint st 0
    ofs = cpLen cp0
    len2 = toB st  ofs
unsnoc :: ShortText -> Maybe (ShortText,Char)
unsnoc st
  | null st    = Nothing
  | len1 == 0  = Just (mempty, c0)
  | otherwise  = Just (slice st 0 len1, c0)
  where
    c0  = cp2ch cp0
    cp0 = readCodePointRev st stsz
    stsz = toB st
    len1 = stsz  cpLen cp0
isPrefixOf :: ShortText -> ShortText -> Bool
isPrefixOf x y
  | lx > ly = False
  | lx == 0 = True
  | otherwise = case PrimOps.compareByteArrays# (toByteArray# x) 0# (toByteArray# y) 0# n# of
                  0# -> True
                  _  -> False
  where
    !lx@(I# n#) = toLength x
    !ly = toLength y
stripPrefix :: ShortText -> ShortText -> Maybe ShortText
stripPrefix pfx t
  | isPrefixOf pfx t = Just $! snd (splitAtOfs (toB pfx) t)
  | otherwise        = Nothing
isSuffixOf :: ShortText -> ShortText -> Bool
isSuffixOf x y
  | lx > ly = False
  | lx == 0 = True
  | otherwise = case PrimOps.compareByteArrays# (toByteArray# x) 0# (toByteArray# y) ofs2# n# of
                  0# -> True
                  _  -> False
  where
    !(I# ofs2#) = ly  lx
    !lx@(I# n#) = toLength x
    !ly = toLength y
stripSuffix :: ShortText -> ShortText -> Maybe ShortText
stripSuffix sfx t
  | isSuffixOf sfx t = Just $! fst (splitAtOfs pfxLen t)
  | otherwise        = Nothing
  where
    pfxLen = toB t  toB sfx
intersperse :: Char -> ShortText -> ShortText
intersperse c st
  | null st = mempty
  | sn == 1 = st
  | otherwise = create newsz $ \mba -> do
      let !cp0 = readCodePoint st 0
          !cp0sz = cpLen cp0
      writeCodePointN cp0sz mba 0 cp0
      go mba (sn  1) cp0sz cp0sz
  where
    newsz = ssz + ((sn1) `mulB` csz)
    ssz = toB st
    sn  = length st
    csz = cpLen cp
    cp  = ch2cp c
    go :: MBA s -> Int -> B -> B -> ST s ()
    go _   0 !_  !_   = return ()
    go mba n ofs ofs2 = do
      let !cp1 = readCodePoint st ofs2
          !cp1sz = cpLen cp1
      writeCodePointN csz   mba ofs cp
      writeCodePointN cp1sz mba (ofs+csz) cp1
      go mba (n1) (ofs+csz+cp1sz) (ofs2+cp1sz)
intercalate :: ShortText -> [ShortText] -> ShortText
intercalate _ []  = mempty
intercalate _ [t] = t
intercalate sep ts
  | null sep   = mconcat ts
  | otherwise  = mconcat (List.intersperse sep ts)
replicate :: Int -> ShortText -> ShortText
replicate n0 t
  | n0 < 1     = mempty
  | null t    = mempty
  | otherwise = create (n0 `mulB` sz) (go 0)
  where
    go :: Int -> MBA s -> ST s ()
    go j mba
      | j == n0    = return ()
      | otherwise  = do
          copyByteArray t 0 mba (j `mulB` sz) sz
          go (j+1) mba
    sz = toB t
reverse :: ShortText -> ShortText
reverse st
  | null st   = mempty
  | sn == 1   = st
  | otherwise = create sz $ go sn 0
  where
    sz = toB st
    sn = length st
    go :: Int -> B -> MBA s -> ST s ()
    go 0 !_  _   = return ()
    go i ofs mba = do
      let !cp   = readCodePoint st ofs
          !cpsz = cpLen cp
          !ofs' = ofs+cpsz
      writeCodePointN cpsz mba (sz  ofs') cp
      go (i1) ofs' mba
filter :: (Char -> Bool) -> ShortText -> ShortText
filter p t
  = case (mofs1,mofs2) of
      (Nothing,   _)       -> t 
      (Just 0,    Nothing) -> mempty 
      (Just ofs1, Nothing) -> slice t 0 ofs1 
      (Just ofs1, Just ofs2) -> createShrink (t0sz(ofs2ofs1)) $ \mba -> do
        
        copyByteArray t 0 mba 0 ofs1
        
        
        t1sz <- go mba ofs2 ofs1
        return t1sz
  where
    mofs1 = findOfs (not . p) t (B 0) 
    mofs2 = findOfs p t (fromMaybe (B 0) mofs1) 
    t0sz = toB t
    go :: MBA s -> B -> B -> ST s B
    go mba !t0ofs !t1ofs
      | t0ofs >= t0sz = return t1ofs
      | otherwise = let !cp = readCodePoint t t0ofs
                        !cpsz = cpLen cp
                    in if p (cp2ch cp)
                       then writeCodePointN cpsz mba t1ofs cp >>
                            go mba (t0ofs+cpsz) (t1ofs+cpsz)
                       else go mba (t0ofs+cpsz) t1ofs 
dropAround :: (Char -> Bool) -> ShortText -> ShortText
dropAround p t0 = case (mofs1,mofs2) of
                    (Nothing,_) -> mempty
                    (Just ofs1,Just ofs2)
                      | ofs1 == 0, ofs2 == t0sz  -> t0
                      | ofs1 < ofs2  -> create (ofs2ofs1) $ \mba -> do
                          copyByteArray t0 ofs1 mba (B 0) (ofs2ofs1)
                    (_,_) -> error "dropAround: the impossible happened"
  where
    mofs1 = findOfs    (not . p) t0 (B 0)
    mofs2 = findOfsRev (not . p) t0 t0sz
    t0sz = toB t0
slice :: ShortText -> B -> B -> ShortText
slice st ofs len
  | ofs < 0    = error "invalid offset"
  | len < 0    = error "invalid length"
  | len' == 0  = mempty
  | otherwise  = create len' $ \mba -> copyByteArray st ofs' mba 0 len'
  where
    len0 = toB st
    len' = max 0 (min len (len0ofs))
    ofs' = max 0 ofs
newtype B = B { unB :: Int }
          deriving (Ord,Eq,Num)
mulB :: Int -> B -> B
mulB n (B b) = B (n*b)
csizeFromB :: B -> CSize
csizeFromB = fromIntegral . unB
csizeToB :: CSize -> B
csizeToB = B . fromIntegral
data MBA s = MBA# { unMBA# :: MutableByteArray# s }
create :: B -> (forall s. MBA s -> ST s ()) -> ShortText
create n go = runST $ do
  mba <- newByteArray n
  go mba
  unsafeFreeze mba
createShrink :: B -> (forall s. MBA s -> ST s B) -> ShortText
createShrink n go = runST $ do
  mba <- newByteArray n
  n' <- go mba
  if n' < n
    then unsafeFreezeShrink mba n'
    else unsafeFreeze mba
unsafeFreeze :: MBA s -> ST s ShortText
unsafeFreeze (MBA# mba#)
  = ST $ \s -> case GHC.Exts.unsafeFreezeByteArray# mba# s of
                 (# s', ba# #) -> (# s', ShortText (BSSI.SBS ba#) #)
copyByteArray :: ShortText -> B -> MBA s -> B -> B -> ST s ()
copyByteArray (ShortText (BSSI.SBS src#)) (B (I# src_off#)) (MBA# dst#) (B (I# dst_off#)) (B (I# len#))
  = ST $ \s -> case GHC.Exts.copyByteArray# src# src_off# dst# dst_off# len# s of
                 s' -> (# s', () #)
newByteArray :: B -> ST s (MBA s)
newByteArray (B (I# n#))
  = ST $ \s -> case GHC.Exts.newByteArray# n# s of
                 (# s', mba# #) -> (# s', MBA# mba# #)
writeWord8Array :: MBA s -> B -> Word -> ST s ()
writeWord8Array (MBA# mba#) (B (I# i#)) (W# w#)
  = ST $ \s -> case GHC.Exts.writeWord8Array# mba# i# w# s of
                 s' -> (# s', () #)
copyAddrToByteArray :: Ptr a -> MBA RealWorld -> B -> B -> ST RealWorld ()
copyAddrToByteArray (Ptr src#) (MBA# dst#) (B (I# dst_off#)) (B (I# len#))
  = ST $ \s -> case GHC.Exts.copyAddrToByteArray# src# dst# dst_off# len# s of
                 s' -> (# s', () #)
#if __GLASGOW_HASKELL__ >= 710
unsafeFreezeShrink :: MBA s -> B -> ST s ShortText
unsafeFreezeShrink mba n = do
  shrink mba n
  unsafeFreeze mba
shrink :: MBA s -> B -> ST s ()
shrink (MBA# mba#) (B (I# i#))
  = ST $ \s -> case GHC.Exts.shrinkMutableByteArray# mba# i# s of
                 s' -> (# s', () #)
#else
unsafeFreezeShrink :: MBA s -> B -> ST s ShortText
unsafeFreezeShrink mba0 n = do
  mba' <- newByteArray n
  copyByteArray2 mba0 0 mba' 0 n
  unsafeFreeze mba'
copyByteArray2 :: MBA s -> B -> MBA s -> B -> B -> ST s ()
copyByteArray2 (MBA# src#) (B (I# src_off#)) (MBA# dst#) (B (I# dst_off#)) (B( I# len#))
  = ST $ \s -> case GHC.Exts.copyMutableByteArray# src# src_off# dst# dst_off# len# s of
                 s' -> (# s', () #)
#endif
newtype CP = CP Word
ch2cp :: Char -> CP
ch2cp (ord -> ci)
  | isSurr ci  = CP 0xFFFD
  | otherwise  = CP (fromIntegral ci)
isSurr :: (Num i, Bits i) => i -> Bool
isSurr ci = ci .&. 0xfff800 == 0xd800
cp2ch :: CP -> Char
cp2ch (CP w) = (w < 0x110000) `assert` unsafeChr (fromIntegral w)
cp2chSafe :: CP -> Maybe Char
cp2chSafe cp
  | cpNull cp = Nothing
  | otherwise = Just $! cp2ch cp
  where
    cpNull :: CP -> Bool
    cpNull (CP w) = w >= 0x110000
cpLen :: CP -> B
cpLen (CP cp)
  | cp <    0x80  = B 1
  | cp <   0x800  = B 2
  | cp < 0x10000  = B 3
  | otherwise     = B 4
decodeCharAtOfs :: ShortText -> B -> (Char,B)
decodeCharAtOfs st ofs = (c,ofs')
  where
    c    = cp2ch cp
    ofs' = ofs + cpLen cp
    cp   = readCodePoint st ofs
singleton :: Char -> ShortText
singleton = singleton' . ch2cp
singleton' :: CP -> ShortText
singleton' cp@(CP cpw)
  | cpw <    0x80  = create 1 $ \mba -> writeCodePoint1 mba 0 cp
  | cpw <   0x800  = create 2 $ \mba -> writeCodePoint2 mba 0 cp
  | cpw < 0x10000  = create 3 $ \mba -> writeCodePoint3 mba 0 cp
  | otherwise      = create 4 $ \mba -> writeCodePoint4 mba 0 cp
cons :: Char -> ShortText -> ShortText
cons (ch2cp -> cp@(CP cpw)) sfx
  | n == 0         = singleton' cp
  | cpw <    0x80  = create (n+1) $ \mba -> writeCodePoint1 mba 0 cp >> copySfx 1 mba
  | cpw <   0x800  = create (n+2) $ \mba -> writeCodePoint2 mba 0 cp >> copySfx 2 mba
  | cpw < 0x10000  = create (n+3) $ \mba -> writeCodePoint3 mba 0 cp >> copySfx 3 mba
  | otherwise      = create (n+4) $ \mba -> writeCodePoint4 mba 0 cp >> copySfx 4 mba
  where
    !n = toB sfx
    copySfx :: B -> MBA s -> ST s ()
    copySfx ofs mba = copyByteArray sfx 0 mba ofs n
snoc :: ShortText -> Char -> ShortText
snoc pfx (ch2cp -> cp@(CP cpw))
  | n == 0         = singleton' cp
  | cpw <    0x80  = create (n+1) $ \mba -> copyPfx mba >> writeCodePoint1 mba n cp
  | cpw <   0x800  = create (n+2) $ \mba -> copyPfx mba >> writeCodePoint2 mba n cp
  | cpw < 0x10000  = create (n+3) $ \mba -> copyPfx mba >> writeCodePoint3 mba n cp
  | otherwise      = create (n+4) $ \mba -> copyPfx mba >> writeCodePoint4 mba n cp
  where
    !n = toB pfx
    copyPfx :: MBA s -> ST s ()
    copyPfx mba = copyByteArray pfx 0 mba 0 n
writeCodePointN :: B -> MBA s -> B -> CP -> ST s ()
writeCodePointN 1 = writeCodePoint1
writeCodePointN 2 = writeCodePoint2
writeCodePointN 3 = writeCodePoint3
writeCodePointN 4 = writeCodePoint4
writeCodePointN _ = undefined
writeCodePoint1 :: MBA s -> B -> CP -> ST s ()
writeCodePoint1 mba ofs (CP cp) =
  writeWord8Array mba ofs cp
writeCodePoint2 :: MBA s -> B -> CP -> ST s ()
writeCodePoint2 mba ofs (CP cp) = do
  writeWord8Array mba  ofs    (0xc0 .|. (cp `unsafeShiftR` 6))
  writeWord8Array mba (ofs+1) (0x80 .|. (cp                     .&. 0x3f))
writeCodePoint3 :: MBA s -> B -> CP -> ST s ()
writeCodePoint3 mba ofs (CP cp) = do
  writeWord8Array mba  ofs    (0xe0 .|.  (cp `unsafeShiftR` 12))
  writeWord8Array mba (ofs+1) (0x80 .|. ((cp `unsafeShiftR` 6)  .&. 0x3f))
  writeWord8Array mba (ofs+2) (0x80 .|.  (cp                    .&. 0x3f))
writeCodePoint4 :: MBA s -> B -> CP -> ST s ()
writeCodePoint4 mba ofs (CP cp) = do
  writeWord8Array mba  ofs    (0xf0 .|.  (cp `unsafeShiftR` 18))
  writeWord8Array mba (ofs+1) (0x80 .|. ((cp `unsafeShiftR` 12) .&. 0x3f))
  writeWord8Array mba (ofs+2) (0x80 .|. ((cp `unsafeShiftR` 6)  .&. 0x3f))
  writeWord8Array mba (ofs+3) (0x80 .|. (cp                     .&. 0x3f))
readCodePoint :: ShortText -> B -> CP
readCodePoint st (csizeFromB -> ofs)
  = CP $ unsafeDupablePerformIO (c_text_short_ofs_cp (toByteArray# st) ofs)
foreign import ccall unsafe "hs_text_short_ofs_cp" c_text_short_ofs_cp :: ByteArray# -> CSize -> IO CCodePoint
readCodePointRev :: ShortText -> B -> CP
readCodePointRev st (csizeFromB -> ofs)
  = CP $ unsafeDupablePerformIO (c_text_short_ofs_cp_rev (toByteArray# st) ofs)
foreign import ccall unsafe "hs_text_short_ofs_cp_rev" c_text_short_ofs_cp_rev :: ByteArray# -> CSize -> IO CCodePoint
instance GHC.Exts.IsList ShortText where
    type (Item ShortText) = Char
    fromList = fromString
    toList   = toString
instance S.IsString ShortText where
    fromString = fromStringLit
fromStringLit :: String -> ShortText
fromStringLit = fromString
fromLitAsciiAddr# :: Addr# -> ShortText
fromLitAsciiAddr# (Ptr -> ptr) = unsafeDupablePerformIO $ do
  sz <- csizeToB `fmap` c_strlen ptr
  case sz `compare` 0 of
    EQ -> return mempty 
    GT -> stToIO $ do
      mba <- newByteArray sz
      copyAddrToByteArray ptr mba 0 sz
      unsafeFreeze mba
    LT -> return (error "fromLitAsciiAddr#")
          
          
          
foreign import ccall unsafe "strlen" c_strlen :: CString -> IO CSize
fromLitMUtf8Addr# :: Addr# -> ShortText
fromLitMUtf8Addr# (Ptr -> ptr) = unsafeDupablePerformIO $ do
  sz <- B `fmap` c_text_short_mutf8_strlen ptr
  case sz `compare` 0 of
    EQ -> return mempty 
    GT -> stToIO $ do
      mba <- newByteArray sz
      copyAddrToByteArray ptr mba 0 sz
      unsafeFreeze mba
    LT -> do
      mba <- stToIO (newByteArray (abs sz))
      c_text_short_mutf8_trans ptr (unMBA# mba)
      stToIO (unsafeFreeze mba)
foreign import ccall unsafe "hs_text_short_mutf8_strlen" c_text_short_mutf8_strlen :: CString -> IO Int
foreign import ccall unsafe "hs_text_short_mutf8_trans" c_text_short_mutf8_trans :: CString -> MutableByteArray# RealWorld -> IO ()