{-# LANGUAGE CPP, MagicHash, UnboxedTuples #-}
-- |
-- Module      : Data.Text.Unsafe
-- Copyright   : (c) 2009, 2010, 2011 Bryan O'Sullivan
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com
-- Portability : portable
--
-- A module containing unsafe 'Text' operations, for very very careful
-- use in heavily tested code.
module Data.Text.Unsafe
    (
      inlineInterleaveST
    , inlinePerformIO
    , unsafeDupablePerformIO
    , Iter(..)
    , iter
    , iterArray
    , iter_
    , reverseIter
    , reverseIterArray
    , reverseIter_
    , unsafeHead
    , unsafeTail
    , lengthWord8
    , takeWord8
    , dropWord8
    ) where

#if defined(ASSERTS)
import Control.Exception (assert)
import GHC.Stack (HasCallStack)
#endif
import Data.Text.Internal.Encoding.Utf8 (chr2, chr3, chr4, utf8LengthByLeader)
import Data.Text.Internal (Text(..))
import Data.Text.Internal.Unsafe (inlineInterleaveST, inlinePerformIO)
import Data.Text.Internal.Unsafe.Char (unsafeChr8)
import qualified Data.Text.Array as A
import GHC.IO (unsafeDupablePerformIO)

-- | /O(1)/ A variant of 'head' for non-empty 'Text'. 'unsafeHead'
-- omits the check for the empty case, so there is an obligation on
-- the programmer to provide a proof that the 'Text' is non-empty.
unsafeHead :: Text -> Char
unsafeHead :: Text -> Char
unsafeHead (Text Array
arr Int
off Int
_len) = case Word8 -> Int
utf8LengthByLeader Word8
m0 of
    Int
1 -> Word8 -> Char
unsafeChr8 Word8
m0
    Int
2 -> Word8 -> Word8 -> Char
chr2 Word8
m0 Word8
m1
    Int
3 -> Word8 -> Word8 -> Word8 -> Char
chr3 Word8
m0 Word8
m1 Word8
m2
    Int
_ -> Word8 -> Word8 -> Word8 -> Word8 -> Char
chr4 Word8
m0 Word8
m1 Word8
m2 Word8
m3
    where m0 :: Word8
m0 = Array -> Int -> Word8
A.unsafeIndex Array
arr Int
off
          m1 :: Word8
m1 = Array -> Int -> Word8
A.unsafeIndex Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
          m2 :: Word8
m2 = Array -> Int -> Word8
A.unsafeIndex Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
2)
          m3 :: Word8
m3 = Array -> Int -> Word8
A.unsafeIndex Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
3)
{-# INLINE unsafeHead #-}

-- | /O(1)/ A variant of 'tail' for non-empty 'Text'. 'unsafeTail'
-- omits the check for the empty case, so there is an obligation on
-- the programmer to provide a proof that the 'Text' is non-empty.
unsafeTail :: Text -> Text
unsafeTail :: Text -> Text
unsafeTail t :: Text
t@(Text Array
arr Int
off Int
len) =
#if defined(ASSERTS)
    assert (d <= len) $
#endif
    Array -> Int -> Int -> Text
Text Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
d) (Int
lenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
d)
  where d :: Int
d = Text -> Int -> Int
iter_ Text
t Int
0
{-# INLINE unsafeTail #-}

data Iter = Iter {-# UNPACK #-} !Char {-# UNPACK #-} !Int
  deriving (Int -> Iter -> ShowS
[Iter] -> ShowS
Iter -> String
(Int -> Iter -> ShowS)
-> (Iter -> String) -> ([Iter] -> ShowS) -> Show Iter
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Iter] -> ShowS
$cshowList :: [Iter] -> ShowS
show :: Iter -> String
$cshow :: Iter -> String
showsPrec :: Int -> Iter -> ShowS
$cshowsPrec :: Int -> Iter -> ShowS
Show)

-- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-8
-- array, returning the current character and the delta to add to give
-- the next offset to iterate at.
iter ::
#if defined(ASSERTS)
  HasCallStack =>
#endif
  Text -> Int -> Iter
iter :: Text -> Int -> Iter
iter (Text Array
arr Int
off Int
_len) Int
i = Array -> Int -> Iter
iterArray Array
arr (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i)
{-# INLINE iter #-}

-- | @since 2.0
iterArray :: A.Array -> Int -> Iter
iterArray :: Array -> Int -> Iter
iterArray Array
arr Int
j = Char -> Int -> Iter
Iter Char
chr Int
l
  where m0 :: Word8
m0 = Array -> Int -> Word8
A.unsafeIndex Array
arr Int
j
        m1 :: Word8
m1 = Array -> Int -> Word8
A.unsafeIndex Array
arr (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
        m2 :: Word8
m2 = Array -> Int -> Word8
A.unsafeIndex Array
arr (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
2)
        m3 :: Word8
m3 = Array -> Int -> Word8
A.unsafeIndex Array
arr (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
3)
        l :: Int
l = Word8 -> Int
utf8LengthByLeader Word8
m0
        chr :: Char
chr = case Int
l of
            Int
1 -> Word8 -> Char
unsafeChr8 Word8
m0
            Int
2 -> Word8 -> Word8 -> Char
chr2 Word8
m0 Word8
m1
            Int
3 -> Word8 -> Word8 -> Word8 -> Char
chr3 Word8
m0 Word8
m1 Word8
m2
            Int
_ -> Word8 -> Word8 -> Word8 -> Word8 -> Char
chr4 Word8
m0 Word8
m1 Word8
m2 Word8
m3
{-# INLINE iterArray #-}

-- | /O(1)/ Iterate one step through a UTF-8 array, returning the
-- delta to add to give the next offset to iterate at.
iter_ :: Text -> Int -> Int
iter_ :: Text -> Int -> Int
iter_ (Text Array
arr Int
off Int
_len) Int
i = Word8 -> Int
utf8LengthByLeader Word8
m
  where m :: Word8
m = Array -> Int -> Word8
A.unsafeIndex Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
i)
{-# INLINE iter_ #-}

-- | /O(1)/ Iterate one step backwards through a UTF-8 array,
-- returning the current character and the delta to add (i.e. a
-- negative number) to give the next offset to iterate at.
reverseIter :: Text -> Int -> Iter
reverseIter :: Text -> Int -> Iter
reverseIter (Text Array
arr Int
off Int
_len) Int
i = Array -> Int -> Iter
reverseIterArray Array
arr (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i)
{-# INLINE reverseIter #-}

-- | @since 2.0
reverseIterArray :: A.Array -> Int -> Iter
reverseIterArray :: Array -> Int -> Iter
reverseIterArray Array
arr Int
j
    | Word8
m0 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<  Word8
0x80 = Char -> Int -> Iter
Iter (Word8 -> Char
unsafeChr8 Word8
m0) (-Int
1)
    | Word8
m1 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
0xC0 = Char -> Int -> Iter
Iter (Word8 -> Word8 -> Char
chr2 Word8
m1 Word8
m0) (-Int
2)
    | Word8
m2 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
0xC0 = Char -> Int -> Iter
Iter (Word8 -> Word8 -> Word8 -> Char
chr3 Word8
m2 Word8
m1 Word8
m0) (-Int
3)
    | Bool
otherwise  = Char -> Int -> Iter
Iter (Word8 -> Word8 -> Word8 -> Word8 -> Char
chr4 Word8
m3 Word8
m2 Word8
m1 Word8
m0) (-Int
4)
  where m0 :: Word8
m0 = Array -> Int -> Word8
A.unsafeIndex Array
arr Int
j
        m1 :: Word8
m1 = Array -> Int -> Word8
A.unsafeIndex Array
arr (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)
        m2 :: Word8
m2 = Array -> Int -> Word8
A.unsafeIndex Array
arr (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
2)
        m3 :: Word8
m3 = Array -> Int -> Word8
A.unsafeIndex Array
arr (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
3)
{-# INLINE reverseIterArray #-}

-- | /O(1)/ Iterate one step backwards through a UTF-8 array,
-- returning the delta to add (i.e. a negative number) to give the
-- next offset to iterate at.
--
-- @since 1.1.1.0
reverseIter_ :: Text -> Int -> Int
reverseIter_ :: Text -> Int -> Int
reverseIter_ (Text Array
arr Int
off Int
_len) Int
i
    | Word8
m0 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<  Word8
0x80 = -Int
1
    | Word8
m1 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
0xC0 = -Int
2
    | Word8
m2 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
0xC0 = -Int
3
    | Bool
otherwise  = -Int
4
  where m0 :: Word8
m0 = Array -> Int -> Word8
A.unsafeIndex Array
arr Int
j
        m1 :: Word8
m1 = Array -> Int -> Word8
A.unsafeIndex Array
arr (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)
        m2 :: Word8
m2 = Array -> Int -> Word8
A.unsafeIndex Array
arr (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
2)
        j :: Int
j = Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i
{-# INLINE reverseIter_ #-}

-- | /O(1)/ Return the length of a 'Text' in units of 'Word8'.  This
-- is useful for sizing a target array appropriately before using
-- 'unsafeCopyToPtr'.
--
-- @since 2.0
lengthWord8 :: Text -> Int
lengthWord8 :: Text -> Int
lengthWord8 (Text Array
_arr Int
_off Int
len) = Int
len
{-# INLINE lengthWord8 #-}

-- | /O(1)/ Unchecked take of 'k' 'Word8's from the front of a 'Text'.
--
-- @since 2.0
takeWord8 :: Int -> Text -> Text
takeWord8 :: Int -> Text -> Text
takeWord8 Int
k (Text Array
arr Int
off Int
_len) = Array -> Int -> Int -> Text
Text Array
arr Int
off Int
k
{-# INLINE takeWord8 #-}

-- | /O(1)/ Unchecked drop of 'k' 'Word8's from the front of a 'Text'.
--
-- @since 2.0
dropWord8 :: Int -> Text -> Text
dropWord8 :: Int -> Text -> Text
dropWord8 Int
k (Text Array
arr Int
off Int
len) = Array -> Int -> Int -> Text
Text Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
k) (Int
lenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
k)
{-# INLINE dropWord8 #-}