{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE BinaryLiterals #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE UnboxedTuples #-}
module Data.Text.Utf8
( CodePoint
, CodeUnit
, CodeUnitIndex (..)
, Text (..)
, fromByteList
, isCaseInvariant
, lengthUtf8
, lowerCodePoint
, unlowerCodePoint
, lowerUtf8
, toLowerAscii
, unicode2utf8
, unpackUtf8
, decode2
, decode3
, decode4
, decodeUtf8
, indexCodeUnit
, unsafeIndexCodePoint
, unsafeIndexCodeUnit
, skipCodePointsBackwards
, unsafeCutUtf8
, unsafeSliceUtf8
, arrayContents
, isArrayPinned
, unsafeIndexCodePoint'
, unsafeIndexCodeUnit'
, BackwardsIter (..)
, unsafeIndexEndOfCodePoint'
, unsafeIndexAnywhereInCodePoint'
, Text.concat
, Text.dropWhile
, Text.isInfixOf
, Text.null
, Text.pack
, Text.replicate
, Text.unpack
, TextSearch.indices
) where
import Control.DeepSeq (NFData)
import Data.Bits (Bits (shiftL), shiftR, (.&.), (.|.))
import Data.Hashable (Hashable)
import Data.Text.Internal (Text (..))
import Data.Word (Word8)
import GHC.Generics (Generic)
import Data.Primitive (ByteArray (ByteArray), Prim, byteArrayFromList)
#if defined(HAS_AESON)
import Data.Aeson (FromJSON, ToJSON)
#endif
import Data.Text.Utf8.Unlower (unlowerCodePoint)
import qualified Data.Char as Char
import qualified Data.Text as Text
import qualified Data.Text.Array as TextArray
import qualified Data.Text.Internal.Search as TextSearch
import qualified Data.Text.Unsafe as TextUnsafe
import qualified GHC.Exts as Exts
type CodeUnit = Word8
type CodePoint = Char
newtype CodeUnitIndex = CodeUnitIndex
{ CodeUnitIndex -> Int
codeUnitIndex :: Int
}
deriving stock (CodeUnitIndex -> CodeUnitIndex -> Bool
(CodeUnitIndex -> CodeUnitIndex -> Bool)
-> (CodeUnitIndex -> CodeUnitIndex -> Bool) -> Eq CodeUnitIndex
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: CodeUnitIndex -> CodeUnitIndex -> Bool
== :: CodeUnitIndex -> CodeUnitIndex -> Bool
$c/= :: CodeUnitIndex -> CodeUnitIndex -> Bool
/= :: CodeUnitIndex -> CodeUnitIndex -> Bool
Eq, Eq CodeUnitIndex
Eq CodeUnitIndex =>
(CodeUnitIndex -> CodeUnitIndex -> Ordering)
-> (CodeUnitIndex -> CodeUnitIndex -> Bool)
-> (CodeUnitIndex -> CodeUnitIndex -> Bool)
-> (CodeUnitIndex -> CodeUnitIndex -> Bool)
-> (CodeUnitIndex -> CodeUnitIndex -> Bool)
-> (CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex)
-> (CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex)
-> Ord CodeUnitIndex
CodeUnitIndex -> CodeUnitIndex -> Bool
CodeUnitIndex -> CodeUnitIndex -> Ordering
CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: CodeUnitIndex -> CodeUnitIndex -> Ordering
compare :: CodeUnitIndex -> CodeUnitIndex -> Ordering
$c< :: CodeUnitIndex -> CodeUnitIndex -> Bool
< :: CodeUnitIndex -> CodeUnitIndex -> Bool
$c<= :: CodeUnitIndex -> CodeUnitIndex -> Bool
<= :: CodeUnitIndex -> CodeUnitIndex -> Bool
$c> :: CodeUnitIndex -> CodeUnitIndex -> Bool
> :: CodeUnitIndex -> CodeUnitIndex -> Bool
$c>= :: CodeUnitIndex -> CodeUnitIndex -> Bool
>= :: CodeUnitIndex -> CodeUnitIndex -> Bool
$cmax :: CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
max :: CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
$cmin :: CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
min :: CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
Ord, (forall x. CodeUnitIndex -> Rep CodeUnitIndex x)
-> (forall x. Rep CodeUnitIndex x -> CodeUnitIndex)
-> Generic CodeUnitIndex
forall x. Rep CodeUnitIndex x -> CodeUnitIndex
forall x. CodeUnitIndex -> Rep CodeUnitIndex x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. CodeUnitIndex -> Rep CodeUnitIndex x
from :: forall x. CodeUnitIndex -> Rep CodeUnitIndex x
$cto :: forall x. Rep CodeUnitIndex x -> CodeUnitIndex
to :: forall x. Rep CodeUnitIndex x -> CodeUnitIndex
Generic, CodeUnitIndex
CodeUnitIndex -> CodeUnitIndex -> Bounded CodeUnitIndex
forall a. a -> a -> Bounded a
$cminBound :: CodeUnitIndex
minBound :: CodeUnitIndex
$cmaxBound :: CodeUnitIndex
maxBound :: CodeUnitIndex
Bounded)
#if defined(HAS_AESON)
deriving newtype (Int -> CodeUnitIndex -> ShowS
[CodeUnitIndex] -> ShowS
CodeUnitIndex -> [Char]
(Int -> CodeUnitIndex -> ShowS)
-> (CodeUnitIndex -> [Char])
-> ([CodeUnitIndex] -> ShowS)
-> Show CodeUnitIndex
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> CodeUnitIndex -> ShowS
showsPrec :: Int -> CodeUnitIndex -> ShowS
$cshow :: CodeUnitIndex -> [Char]
show :: CodeUnitIndex -> [Char]
$cshowList :: [CodeUnitIndex] -> ShowS
showList :: [CodeUnitIndex] -> ShowS
Show, Addr# -> Int# -> CodeUnitIndex
ByteArray# -> Int# -> CodeUnitIndex
Proxy CodeUnitIndex -> Int#
CodeUnitIndex -> Int#
(Proxy CodeUnitIndex -> Int#)
-> (CodeUnitIndex -> Int#)
-> (Proxy CodeUnitIndex -> Int#)
-> (CodeUnitIndex -> Int#)
-> (ByteArray# -> Int# -> CodeUnitIndex)
-> (forall s.
MutableByteArray# s
-> Int# -> State# s -> (# State# s, CodeUnitIndex #))
-> (forall s.
MutableByteArray# s
-> Int# -> CodeUnitIndex -> State# s -> State# s)
-> (forall s.
MutableByteArray# s
-> Int# -> Int# -> CodeUnitIndex -> State# s -> State# s)
-> (Addr# -> Int# -> CodeUnitIndex)
-> (forall s.
Addr# -> Int# -> State# s -> (# State# s, CodeUnitIndex #))
-> (forall s.
Addr# -> Int# -> CodeUnitIndex -> State# s -> State# s)
-> (forall s.
Addr# -> Int# -> Int# -> CodeUnitIndex -> State# s -> State# s)
-> Prim CodeUnitIndex
forall s.
Addr# -> Int# -> Int# -> CodeUnitIndex -> State# s -> State# s
forall s.
Addr# -> Int# -> State# s -> (# State# s, CodeUnitIndex #)
forall s. Addr# -> Int# -> CodeUnitIndex -> State# s -> State# s
forall s.
MutableByteArray# s
-> Int# -> Int# -> CodeUnitIndex -> State# s -> State# s
forall s.
MutableByteArray# s
-> Int# -> State# s -> (# State# s, CodeUnitIndex #)
forall s.
MutableByteArray# s
-> Int# -> CodeUnitIndex -> State# s -> State# s
forall a.
(Proxy a -> Int#)
-> (a -> Int#)
-> (Proxy a -> Int#)
-> (a -> Int#)
-> (ByteArray# -> Int# -> a)
-> (forall s.
MutableByteArray# s -> Int# -> State# s -> (# State# s, a #))
-> (forall s.
MutableByteArray# s -> Int# -> a -> State# s -> State# s)
-> (forall s.
MutableByteArray# s -> Int# -> Int# -> a -> State# s -> State# s)
-> (Addr# -> Int# -> a)
-> (forall s. Addr# -> Int# -> State# s -> (# State# s, a #))
-> (forall s. Addr# -> Int# -> a -> State# s -> State# s)
-> (forall s. Addr# -> Int# -> Int# -> a -> State# s -> State# s)
-> Prim a
$csizeOfType# :: Proxy CodeUnitIndex -> Int#
sizeOfType# :: Proxy CodeUnitIndex -> Int#
$csizeOf# :: CodeUnitIndex -> Int#
sizeOf# :: CodeUnitIndex -> Int#
$calignmentOfType# :: Proxy CodeUnitIndex -> Int#
alignmentOfType# :: Proxy CodeUnitIndex -> Int#
$calignment# :: CodeUnitIndex -> Int#
alignment# :: CodeUnitIndex -> Int#
$cindexByteArray# :: ByteArray# -> Int# -> CodeUnitIndex
indexByteArray# :: ByteArray# -> Int# -> CodeUnitIndex
$creadByteArray# :: forall s.
MutableByteArray# s
-> Int# -> State# s -> (# State# s, CodeUnitIndex #)
readByteArray# :: forall s.
MutableByteArray# s
-> Int# -> State# s -> (# State# s, CodeUnitIndex #)
$cwriteByteArray# :: forall s.
MutableByteArray# s
-> Int# -> CodeUnitIndex -> State# s -> State# s
writeByteArray# :: forall s.
MutableByteArray# s
-> Int# -> CodeUnitIndex -> State# s -> State# s
$csetByteArray# :: forall s.
MutableByteArray# s
-> Int# -> Int# -> CodeUnitIndex -> State# s -> State# s
setByteArray# :: forall s.
MutableByteArray# s
-> Int# -> Int# -> CodeUnitIndex -> State# s -> State# s
$cindexOffAddr# :: Addr# -> Int# -> CodeUnitIndex
indexOffAddr# :: Addr# -> Int# -> CodeUnitIndex
$creadOffAddr# :: forall s.
Addr# -> Int# -> State# s -> (# State# s, CodeUnitIndex #)
readOffAddr# :: forall s.
Addr# -> Int# -> State# s -> (# State# s, CodeUnitIndex #)
$cwriteOffAddr# :: forall s. Addr# -> Int# -> CodeUnitIndex -> State# s -> State# s
writeOffAddr# :: forall s. Addr# -> Int# -> CodeUnitIndex -> State# s -> State# s
$csetOffAddr# :: forall s.
Addr# -> Int# -> Int# -> CodeUnitIndex -> State# s -> State# s
setOffAddr# :: forall s.
Addr# -> Int# -> Int# -> CodeUnitIndex -> State# s -> State# s
Prim, Eq CodeUnitIndex
Eq CodeUnitIndex =>
(Int -> CodeUnitIndex -> Int)
-> (CodeUnitIndex -> Int) -> Hashable CodeUnitIndex
Int -> CodeUnitIndex -> Int
CodeUnitIndex -> Int
forall a. Eq a => (Int -> a -> Int) -> (a -> Int) -> Hashable a
$chashWithSalt :: Int -> CodeUnitIndex -> Int
hashWithSalt :: Int -> CodeUnitIndex -> Int
$chash :: CodeUnitIndex -> Int
hash :: CodeUnitIndex -> Int
Hashable, Integer -> CodeUnitIndex
CodeUnitIndex -> CodeUnitIndex
CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
(CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex)
-> (CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex)
-> (CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex)
-> (CodeUnitIndex -> CodeUnitIndex)
-> (CodeUnitIndex -> CodeUnitIndex)
-> (CodeUnitIndex -> CodeUnitIndex)
-> (Integer -> CodeUnitIndex)
-> Num CodeUnitIndex
forall a.
(a -> a -> a)
-> (a -> a -> a)
-> (a -> a -> a)
-> (a -> a)
-> (a -> a)
-> (a -> a)
-> (Integer -> a)
-> Num a
$c+ :: CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
+ :: CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
$c- :: CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
- :: CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
$c* :: CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
* :: CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
$cnegate :: CodeUnitIndex -> CodeUnitIndex
negate :: CodeUnitIndex -> CodeUnitIndex
$cabs :: CodeUnitIndex -> CodeUnitIndex
abs :: CodeUnitIndex -> CodeUnitIndex
$csignum :: CodeUnitIndex -> CodeUnitIndex
signum :: CodeUnitIndex -> CodeUnitIndex
$cfromInteger :: Integer -> CodeUnitIndex
fromInteger :: Integer -> CodeUnitIndex
Num, CodeUnitIndex -> ()
(CodeUnitIndex -> ()) -> NFData CodeUnitIndex
forall a. (a -> ()) -> NFData a
$crnf :: CodeUnitIndex -> ()
rnf :: CodeUnitIndex -> ()
NFData, Maybe CodeUnitIndex
Value -> Parser [CodeUnitIndex]
Value -> Parser CodeUnitIndex
(Value -> Parser CodeUnitIndex)
-> (Value -> Parser [CodeUnitIndex])
-> Maybe CodeUnitIndex
-> FromJSON CodeUnitIndex
forall a.
(Value -> Parser a)
-> (Value -> Parser [a]) -> Maybe a -> FromJSON a
$cparseJSON :: Value -> Parser CodeUnitIndex
parseJSON :: Value -> Parser CodeUnitIndex
$cparseJSONList :: Value -> Parser [CodeUnitIndex]
parseJSONList :: Value -> Parser [CodeUnitIndex]
$comittedField :: Maybe CodeUnitIndex
omittedField :: Maybe CodeUnitIndex
FromJSON, [CodeUnitIndex] -> Value
[CodeUnitIndex] -> Encoding
CodeUnitIndex -> Bool
CodeUnitIndex -> Value
CodeUnitIndex -> Encoding
(CodeUnitIndex -> Value)
-> (CodeUnitIndex -> Encoding)
-> ([CodeUnitIndex] -> Value)
-> ([CodeUnitIndex] -> Encoding)
-> (CodeUnitIndex -> Bool)
-> ToJSON CodeUnitIndex
forall a.
(a -> Value)
-> (a -> Encoding)
-> ([a] -> Value)
-> ([a] -> Encoding)
-> (a -> Bool)
-> ToJSON a
$ctoJSON :: CodeUnitIndex -> Value
toJSON :: CodeUnitIndex -> Value
$ctoEncoding :: CodeUnitIndex -> Encoding
toEncoding :: CodeUnitIndex -> Encoding
$ctoJSONList :: [CodeUnitIndex] -> Value
toJSONList :: [CodeUnitIndex] -> Value
$ctoEncodingList :: [CodeUnitIndex] -> Encoding
toEncodingList :: [CodeUnitIndex] -> Encoding
$comitField :: CodeUnitIndex -> Bool
omitField :: CodeUnitIndex -> Bool
ToJSON)
#else
deriving newtype (Show, Prim, Hashable, Num, NFData)
#endif
{-# INLINABLE unpackUtf8 #-}
unpackUtf8 :: Text -> [CodeUnit]
unpackUtf8 :: Text -> [CodeUnit]
unpackUtf8 (Text Array
u8data Int
offset Int
len) =
let
go :: Int -> t -> [CodeUnit]
go Int
_ t
0 = []
go Int
i t
n = Array -> CodeUnitIndex -> CodeUnit
unsafeIndexCodeUnit' Array
u8data (Int -> CodeUnitIndex
CodeUnitIndex Int
i) CodeUnit -> [CodeUnit] -> [CodeUnit]
forall a. a -> [a] -> [a]
: Int -> t -> [CodeUnit]
go (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (t
n t -> t -> t
forall a. Num a => a -> a -> a
- t
1)
in
Int -> Int -> [CodeUnit]
forall {t}. (Eq t, Num t) => Int -> t -> [CodeUnit]
go Int
offset Int
len
lengthUtf8 :: Text -> CodeUnitIndex
lengthUtf8 :: Text -> CodeUnitIndex
lengthUtf8 (Text Array
_ Int
_ !Int
len) = Int -> CodeUnitIndex
CodeUnitIndex Int
len
{-# INLINE toLowerAscii #-}
toLowerAscii :: Char -> Char
toLowerAscii :: Char -> Char
toLowerAscii Char
cp
| Char -> Bool
Char.isAsciiUpper Char
cp = Int -> Char
Char.chr (Char -> Int
Char.ord Char
cp Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
0x20)
| Bool
otherwise = Char
cp
{-# INLINE lowerUtf8 #-}
lowerUtf8 :: Text -> Text
lowerUtf8 :: Text -> Text
lowerUtf8 = (Char -> Char) -> Text -> Text
Text.map Char -> Char
lowerCodePoint
asciiCount :: Int
asciiCount :: Int
asciiCount = Int
128
{-# INLINE lowerCodePoint #-}
lowerCodePoint :: Char -> Char
lowerCodePoint :: Char -> Char
lowerCodePoint Char
cp
| Char -> Int
Char.ord Char
cp Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
asciiCount = Char -> Char
toLowerAscii Char
cp
| Bool
otherwise = Char -> Char
Char.toLower Char
cp
unicode2utf8 :: (Ord a, Num a, Bits a) => a -> [a]
{-# INLINE unicode2utf8 #-}
unicode2utf8 :: forall a. (Ord a, Num a, Bits a) => a -> [a]
unicode2utf8 a
c
| a
c a -> a -> Bool
forall a. Ord a => a -> a -> Bool
< a
0x80 = [a
c]
| a
c a -> a -> Bool
forall a. Ord a => a -> a -> Bool
< a
0x800 = [a
0xc0 a -> a -> a
forall a. Bits a => a -> a -> a
.|. (a
c a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftR` Int
6), a
0x80 a -> a -> a
forall a. Bits a => a -> a -> a
.|. (a
0x3f a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
c)]
| a
c a -> a -> Bool
forall a. Ord a => a -> a -> Bool
< a
0x10000 = [a
0xe0 a -> a -> a
forall a. Bits a => a -> a -> a
.|. (a
c a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftR` Int
12), a
0x80 a -> a -> a
forall a. Bits a => a -> a -> a
.|. (a
0x3f a -> a -> a
forall a. Bits a => a -> a -> a
.&. (a
c a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftR` Int
6)), a
0x80 a -> a -> a
forall a. Bits a => a -> a -> a
.|. (a
0x3f a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
c)]
| Bool
otherwise = [a
0xf0 a -> a -> a
forall a. Bits a => a -> a -> a
.|. (a
c a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftR` Int
18), a
0x80 a -> a -> a
forall a. Bits a => a -> a -> a
.|. (a
0x3f a -> a -> a
forall a. Bits a => a -> a -> a
.&. (a
c a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftR` Int
12)), a
0x80 a -> a -> a
forall a. Bits a => a -> a -> a
.|. (a
0x3f a -> a -> a
forall a. Bits a => a -> a -> a
.&. (a
c a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftR` Int
6)), a
0x80 a -> a -> a
forall a. Bits a => a -> a -> a
.|. (a
0x3f a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
c)]
fromByteList :: [Word8] -> Text
fromByteList :: [CodeUnit] -> Text
fromByteList [CodeUnit]
byteList = Array -> Int -> Int -> Text
Text (ByteArray# -> Array
TextArray.ByteArray ByteArray#
ba#) Int
0 ([CodeUnit] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [CodeUnit]
byteList)
where !(ByteArray ByteArray#
ba#) = [CodeUnit] -> ByteArray
forall a. Prim a => [a] -> ByteArray
byteArrayFromList [CodeUnit]
byteList
{-# INLINE isCaseInvariant #-}
isCaseInvariant :: Text -> Bool
isCaseInvariant :: Text -> Bool
isCaseInvariant = (Char -> Bool) -> Text -> Bool
Text.all (\Char
c -> Char -> [Char]
unlowerCodePoint (Char -> Char
lowerCodePoint Char
c) [Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
== [Char
c])
decode1 :: CodeUnit -> CodePoint
decode1 :: CodeUnit -> Char
decode1 CodeUnit
cu0 =
Int -> Char
Char.chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ CodeUnit -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CodeUnit
cu0
{-# INLINE decode2 #-}
decode2 :: CodeUnit -> CodeUnit -> CodePoint
decode2 :: CodeUnit -> CodeUnit -> Char
decode2 CodeUnit
cu0 CodeUnit
cu1 =
Int -> Char
Char.chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ (CodeUnit -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CodeUnit
cu0 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x1f) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
6 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. CodeUnit -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CodeUnit
cu1 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3f
{-# INLINE decode3 #-}
decode3 :: CodeUnit -> CodeUnit -> CodeUnit -> CodePoint
decode3 :: CodeUnit -> CodeUnit -> CodeUnit -> Char
decode3 CodeUnit
cu0 CodeUnit
cu1 CodeUnit
cu2 =
Int -> Char
Char.chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ (CodeUnit -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CodeUnit
cu0 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0xf) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
12 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. (CodeUnit -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CodeUnit
cu1 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3f) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
6 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. (CodeUnit -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CodeUnit
cu2 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3f)
{-# INLINE decode4 #-}
decode4 :: CodeUnit -> CodeUnit -> CodeUnit -> CodeUnit -> CodePoint
decode4 :: CodeUnit -> CodeUnit -> CodeUnit -> CodeUnit -> Char
decode4 CodeUnit
cu0 CodeUnit
cu1 CodeUnit
cu2 CodeUnit
cu3 =
Int -> Char
Char.chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ (CodeUnit -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CodeUnit
cu0 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x7) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
18 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. (CodeUnit -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CodeUnit
cu1 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3f) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
12 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. (CodeUnit -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CodeUnit
cu2 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3f) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
6 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. (CodeUnit -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CodeUnit
cu3 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3f)
decodeUtf8 :: [CodeUnit] -> [CodePoint]
decodeUtf8 :: [CodeUnit] -> [Char]
decodeUtf8 [] = []
decodeUtf8 (CodeUnit
cu0 : [CodeUnit]
cus) | CodeUnit
cu0 CodeUnit -> CodeUnit -> Bool
forall a. Ord a => a -> a -> Bool
< CodeUnit
0xc0 = Int -> Char
Char.chr (CodeUnit -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CodeUnit
cu0) Char -> ShowS
forall a. a -> [a] -> [a]
: [CodeUnit] -> [Char]
decodeUtf8 [CodeUnit]
cus
decodeUtf8 (CodeUnit
cu0 : CodeUnit
cu1 : [CodeUnit]
cus) | CodeUnit
cu0 CodeUnit -> CodeUnit -> Bool
forall a. Ord a => a -> a -> Bool
< CodeUnit
0xe0 = CodeUnit -> CodeUnit -> Char
decode2 CodeUnit
cu0 CodeUnit
cu1 Char -> ShowS
forall a. a -> [a] -> [a]
: [CodeUnit] -> [Char]
decodeUtf8 [CodeUnit]
cus
decodeUtf8 (CodeUnit
cu0 : CodeUnit
cu1 : CodeUnit
cu2 : [CodeUnit]
cus) | CodeUnit
cu0 CodeUnit -> CodeUnit -> Bool
forall a. Ord a => a -> a -> Bool
< CodeUnit
0xf0 = CodeUnit -> CodeUnit -> CodeUnit -> Char
decode3 CodeUnit
cu0 CodeUnit
cu1 CodeUnit
cu2 Char -> ShowS
forall a. a -> [a] -> [a]
: [CodeUnit] -> [Char]
decodeUtf8 [CodeUnit]
cus
decodeUtf8 (CodeUnit
cu0 : CodeUnit
cu1 : CodeUnit
cu2 : CodeUnit
cu3 : [CodeUnit]
cus) | CodeUnit
cu0 CodeUnit -> CodeUnit -> Bool
forall a. Ord a => a -> a -> Bool
< CodeUnit
0xf8 = CodeUnit -> CodeUnit -> CodeUnit -> CodeUnit -> Char
decode4 CodeUnit
cu0 CodeUnit
cu1 CodeUnit
cu2 CodeUnit
cu3 Char -> ShowS
forall a. a -> [a] -> [a]
: [CodeUnit] -> [Char]
decodeUtf8 [CodeUnit]
cus
decodeUtf8 [CodeUnit]
cus = ShowS
forall a. HasCallStack => [Char] -> a
error ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$ [Char]
"Invalid UTF-8 input sequence at " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [CodeUnit] -> [Char]
forall a. Show a => a -> [Char]
show (Int -> [CodeUnit] -> [CodeUnit]
forall a. Int -> [a] -> [a]
take Int
4 [CodeUnit]
cus)
{-# INLINE unsafeIndexCodePoint #-}
unsafeIndexCodePoint :: Text -> CodeUnitIndex -> (CodeUnitIndex, CodePoint)
unsafeIndexCodePoint :: Text -> CodeUnitIndex -> (CodeUnitIndex, Char)
unsafeIndexCodePoint (Text !Array
u8data !Int
off !Int
_len) !CodeUnitIndex
index =
Array -> CodeUnitIndex -> (CodeUnitIndex, Char)
unsafeIndexCodePoint' Array
u8data (CodeUnitIndex -> (CodeUnitIndex, Char))
-> CodeUnitIndex -> (CodeUnitIndex, Char)
forall a b. (a -> b) -> a -> b
$ Int -> CodeUnitIndex
CodeUnitIndex Int
off CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
index
{-# INLINE indexCodeUnit #-}
indexCodeUnit :: Text -> CodeUnitIndex -> CodeUnit
indexCodeUnit :: Text -> CodeUnitIndex -> CodeUnit
indexCodeUnit !Text
text !CodeUnitIndex
index
| CodeUnitIndex
index CodeUnitIndex -> CodeUnitIndex -> Bool
forall a. Ord a => a -> a -> Bool
< CodeUnitIndex
0 Bool -> Bool -> Bool
|| CodeUnitIndex
index CodeUnitIndex -> CodeUnitIndex -> Bool
forall a. Ord a => a -> a -> Bool
>= Text -> CodeUnitIndex
lengthUtf8 Text
text = [Char] -> CodeUnit
forall a. HasCallStack => [Char] -> a
error ([Char] -> CodeUnit) -> [Char] -> CodeUnit
forall a b. (a -> b) -> a -> b
$ [Char]
"Index out of bounds " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ CodeUnitIndex -> [Char]
forall a. Show a => a -> [Char]
show CodeUnitIndex
index
| Bool
otherwise = Text -> CodeUnitIndex -> CodeUnit
unsafeIndexCodeUnit Text
text CodeUnitIndex
index
{-# INLINE unsafeIndexCodeUnit #-}
unsafeIndexCodeUnit :: Text -> CodeUnitIndex -> CodeUnit
unsafeIndexCodeUnit :: Text -> CodeUnitIndex -> CodeUnit
unsafeIndexCodeUnit (Text !Array
u8data !Int
off !Int
_len) !CodeUnitIndex
index =
Array -> CodeUnitIndex -> CodeUnit
unsafeIndexCodeUnit' Array
u8data (CodeUnitIndex -> CodeUnit) -> CodeUnitIndex -> CodeUnit
forall a b. (a -> b) -> a -> b
$ Int -> CodeUnitIndex
CodeUnitIndex Int
off CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
index
{-# INLINE skipCodePointsBackwards #-}
skipCodePointsBackwards :: Text -> CodeUnitIndex -> Int -> CodeUnitIndex
skipCodePointsBackwards :: Text -> CodeUnitIndex -> Int -> CodeUnitIndex
skipCodePointsBackwards (Text !Array
u8data !Int
off !Int
len) !CodeUnitIndex
index0 !Int
n0
| CodeUnitIndex
index0 CodeUnitIndex -> CodeUnitIndex -> Bool
forall a. Ord a => a -> a -> Bool
>= Int -> CodeUnitIndex
CodeUnitIndex Int
len = [Char] -> CodeUnitIndex
forall a. HasCallStack => [Char] -> a
error [Char]
"Invalid use of skipCodePointsBackwards"
| Bool
otherwise = CodeUnitIndex -> Int -> CodeUnitIndex
forall {t}. (Eq t, Num t) => CodeUnitIndex -> t -> CodeUnitIndex
loop (CodeUnitIndex
index0 CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ Int -> CodeUnitIndex
CodeUnitIndex Int
off) Int
n0
where
loop :: CodeUnitIndex -> t -> CodeUnitIndex
loop CodeUnitIndex
index t
n | CodeUnitIndex -> Bool
atTrailingByte CodeUnitIndex
index =
CodeUnitIndex -> t -> CodeUnitIndex
loop (CodeUnitIndex
indexCodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
-CodeUnitIndex
1) t
n
loop CodeUnitIndex
index t
0 | CodeUnitIndex
index CodeUnitIndex -> CodeUnitIndex -> Bool
forall a. Ord a => a -> a -> Bool
< CodeUnitIndex
0 =
[Char] -> CodeUnitIndex
forall a. HasCallStack => [Char] -> a
error [Char]
"Invalid use of skipCodePointsBackwards"
loop CodeUnitIndex
index t
0 =
CodeUnitIndex
index CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- Int -> CodeUnitIndex
CodeUnitIndex Int
off
loop CodeUnitIndex
index t
n =
CodeUnitIndex -> t -> CodeUnitIndex
loop (CodeUnitIndex
indexCodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
-CodeUnitIndex
1) (t
nt -> t -> t
forall a. Num a => a -> a -> a
-t
1)
atTrailingByte :: CodeUnitIndex -> Bool
atTrailingByte !CodeUnitIndex
index = Array -> CodeUnitIndex -> CodeUnit
unsafeIndexCodeUnit' Array
u8data CodeUnitIndex
index CodeUnit -> CodeUnit -> CodeUnit
forall a. Bits a => a -> a -> a
.&. CodeUnit
0b1100_0000 CodeUnit -> CodeUnit -> Bool
forall a. Eq a => a -> a -> Bool
== CodeUnit
0b1000_0000
unsafeCutUtf8 :: CodeUnitIndex
-> CodeUnitIndex
-> Text
-> (Text, Text)
unsafeCutUtf8 :: CodeUnitIndex -> CodeUnitIndex -> Text -> (Text, Text)
unsafeCutUtf8 (CodeUnitIndex !Int
begin) (CodeUnitIndex !Int
len) !Text
text =
( Int -> Text -> Text
TextUnsafe.takeWord8 Int
begin Text
text
, Int -> Text -> Text
TextUnsafe.dropWord8 (Int
begin Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len) Text
text
)
unsafeSliceUtf8 :: CodeUnitIndex -> CodeUnitIndex -> Text -> Text
unsafeSliceUtf8 :: CodeUnitIndex -> CodeUnitIndex -> Text -> Text
unsafeSliceUtf8 (CodeUnitIndex !Int
begin) (CodeUnitIndex !Int
len) !Text
text =
Int -> Text -> Text
TextUnsafe.takeWord8 Int
len (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
TextUnsafe.dropWord8 Int
begin Text
text
isArrayPinned :: TextArray.Array -> Bool
isArrayPinned :: Array -> Bool
isArrayPinned (TextArray.ByteArray ByteArray#
ba#) = Int# -> Bool
Exts.isTrue# (ByteArray# -> Int#
Exts.isByteArrayPinned# ByteArray#
ba#)
arrayContents :: TextArray.Array -> Exts.Ptr Word8
arrayContents :: Array -> Ptr CodeUnit
arrayContents (TextArray.ByteArray ByteArray#
ba#) = Addr# -> Ptr CodeUnit
forall a. Addr# -> Ptr a
Exts.Ptr (ByteArray# -> Addr#
Exts.byteArrayContents# ByteArray#
ba#)
unsafeIndexCodePoint' :: TextArray.Array -> CodeUnitIndex -> (CodeUnitIndex, CodePoint)
{-# INLINE unsafeIndexCodePoint' #-}
unsafeIndexCodePoint' :: Array -> CodeUnitIndex -> (CodeUnitIndex, Char)
unsafeIndexCodePoint' !Array
u8data !CodeUnitIndex
idx =
CodeUnit
-> CodeUnit -> CodeUnit -> CodeUnit -> (CodeUnitIndex, Char)
decodeN (CodeUnitIndex -> CodeUnit
cuAt CodeUnitIndex
0) (CodeUnitIndex -> CodeUnit
cuAt CodeUnitIndex
1) (CodeUnitIndex -> CodeUnit
cuAt CodeUnitIndex
2) (CodeUnitIndex -> CodeUnit
cuAt CodeUnitIndex
3)
where
cuAt :: CodeUnitIndex -> CodeUnit
cuAt !CodeUnitIndex
i = Array -> CodeUnitIndex -> CodeUnit
unsafeIndexCodeUnit' Array
u8data (CodeUnitIndex -> CodeUnit) -> CodeUnitIndex -> CodeUnit
forall a b. (a -> b) -> a -> b
$ CodeUnitIndex
idx CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
i
decodeN :: CodeUnit -> CodeUnit -> CodeUnit -> CodeUnit -> (CodeUnitIndex, CodePoint)
{-# INLINE decodeN #-}
decodeN :: CodeUnit
-> CodeUnit -> CodeUnit -> CodeUnit -> (CodeUnitIndex, Char)
decodeN CodeUnit
cu0 CodeUnit
cu1 CodeUnit
cu2 CodeUnit
cu3
| CodeUnit
cu0 CodeUnit -> CodeUnit -> Bool
forall a. Ord a => a -> a -> Bool
< CodeUnit
0xc0 = (CodeUnitIndex
1, CodeUnit -> Char
decode1 CodeUnit
cu0)
| CodeUnit
cu0 CodeUnit -> CodeUnit -> Bool
forall a. Ord a => a -> a -> Bool
< CodeUnit
0xe0 = (CodeUnitIndex
2, CodeUnit -> CodeUnit -> Char
decode2 CodeUnit
cu0 CodeUnit
cu1)
| CodeUnit
cu0 CodeUnit -> CodeUnit -> Bool
forall a. Ord a => a -> a -> Bool
< CodeUnit
0xf0 = (CodeUnitIndex
3, CodeUnit -> CodeUnit -> CodeUnit -> Char
decode3 CodeUnit
cu0 CodeUnit
cu1 CodeUnit
cu2)
| Bool
otherwise = (CodeUnitIndex
4, CodeUnit -> CodeUnit -> CodeUnit -> CodeUnit -> Char
decode4 CodeUnit
cu0 CodeUnit
cu1 CodeUnit
cu2 CodeUnit
cu3)
data BackwardsIter = BackwardsIter
{ BackwardsIter -> CodeUnitIndex
backwardsIterNext :: {-# UNPACK #-} !CodeUnitIndex
, BackwardsIter -> Char
backwardsIterChar :: {-# UNPACK #-} !CodePoint
, BackwardsIter -> CodeUnitIndex
backwardsIterEndOfChar :: {-# UNPACK #-} !CodeUnitIndex
}
unsafeIndexEndOfCodePoint' :: TextArray.Array -> CodeUnitIndex -> BackwardsIter
{-# INLINE unsafeIndexEndOfCodePoint' #-}
unsafeIndexEndOfCodePoint' :: Array -> CodeUnitIndex -> BackwardsIter
unsafeIndexEndOfCodePoint' !Array
u8data !CodeUnitIndex
idx =
let
cuAt :: CodeUnitIndex -> CodeUnit
cuAt !CodeUnitIndex
i = Array -> CodeUnitIndex -> CodeUnit
unsafeIndexCodeUnit' Array
u8data (CodeUnitIndex -> CodeUnit) -> CodeUnitIndex -> CodeUnit
forall a b. (a -> b) -> a -> b
$ CodeUnitIndex
idx CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
i
isFirstByte :: a -> Bool
isFirstByte !a
cu = a
cu a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0b1100_0000 a -> a -> Bool
forall a. Eq a => a -> a -> Bool
/= a
0b1000_0000
cu0 :: CodeUnit
cu0 = CodeUnitIndex -> CodeUnit
cuAt CodeUnitIndex
0
in
if CodeUnit -> Bool
forall {a}. (Bits a, Num a) => a -> Bool
isFirstByte CodeUnit
cu0
then CodeUnitIndex -> Char -> CodeUnitIndex -> BackwardsIter
BackwardsIter (CodeUnitIndex
idx CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
1) (CodeUnit -> Char
decode1 CodeUnit
cu0) CodeUnitIndex
idx
else
let cu1 :: CodeUnit
cu1 = CodeUnitIndex -> CodeUnit
cuAt CodeUnitIndex
1 in
if CodeUnit -> Bool
forall {a}. (Bits a, Num a) => a -> Bool
isFirstByte CodeUnit
cu1
then CodeUnitIndex -> Char -> CodeUnitIndex -> BackwardsIter
BackwardsIter (CodeUnitIndex
idx CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
2) (CodeUnit -> CodeUnit -> Char
decode2 CodeUnit
cu1 CodeUnit
cu0) CodeUnitIndex
idx
else
let cu2 :: CodeUnit
cu2 = CodeUnitIndex -> CodeUnit
cuAt CodeUnitIndex
2 in
if CodeUnit -> Bool
forall {a}. (Bits a, Num a) => a -> Bool
isFirstByte CodeUnit
cu2
then CodeUnitIndex -> Char -> CodeUnitIndex -> BackwardsIter
BackwardsIter (CodeUnitIndex
idx CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
3) (CodeUnit -> CodeUnit -> CodeUnit -> Char
decode3 CodeUnit
cu2 CodeUnit
cu1 CodeUnit
cu0) CodeUnitIndex
idx
else
let cu3 :: CodeUnit
cu3 = CodeUnitIndex -> CodeUnit
cuAt CodeUnitIndex
3 in
if CodeUnit -> Bool
forall {a}. (Bits a, Num a) => a -> Bool
isFirstByte CodeUnit
cu3
then CodeUnitIndex -> Char -> CodeUnitIndex -> BackwardsIter
BackwardsIter (CodeUnitIndex
idx CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
4) (CodeUnit -> CodeUnit -> CodeUnit -> CodeUnit -> Char
decode4 CodeUnit
cu3 CodeUnit
cu2 CodeUnit
cu1 CodeUnit
cu0) CodeUnitIndex
idx
else
[Char] -> BackwardsIter
forall a. HasCallStack => [Char] -> a
error [Char]
"unsafeIndexEndOfCodePoint' could not find valid UTF8 codepoint"
unsafeIndexAnywhereInCodePoint' :: TextArray.Array -> CodeUnitIndex -> BackwardsIter
{-# INLINE unsafeIndexAnywhereInCodePoint' #-}
unsafeIndexAnywhereInCodePoint' :: Array -> CodeUnitIndex -> BackwardsIter
unsafeIndexAnywhereInCodePoint' !Array
u8data !CodeUnitIndex
idx =
let
cuAt :: CodeUnitIndex -> CodeUnit
cuAt !CodeUnitIndex
i = Array -> CodeUnitIndex -> CodeUnit
unsafeIndexCodeUnit' Array
u8data (CodeUnitIndex -> CodeUnit) -> CodeUnitIndex -> CodeUnit
forall a b. (a -> b) -> a -> b
$ CodeUnitIndex
idx CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
i
isFirstByte :: a -> Bool
isFirstByte !a
cu = a
cu a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0b1100_0000 a -> a -> Bool
forall a. Eq a => a -> a -> Bool
/= a
0b1000_0000
cu0 :: CodeUnit
cu0 = CodeUnitIndex -> CodeUnit
cuAt CodeUnitIndex
0
makeBackwardsIter :: CodeUnitIndex -> (CodeUnitIndex, Char) -> BackwardsIter
makeBackwardsIter CodeUnitIndex
next (CodeUnitIndex
l, Char
cp) = CodeUnitIndex -> Char -> CodeUnitIndex -> BackwardsIter
BackwardsIter CodeUnitIndex
next Char
cp (CodeUnitIndex
next CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
l)
in
if CodeUnit -> Bool
forall {a}. (Bits a, Num a) => a -> Bool
isFirstByte CodeUnit
cu0
then CodeUnitIndex -> (CodeUnitIndex, Char) -> BackwardsIter
makeBackwardsIter (CodeUnitIndex
idx CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
1) ((CodeUnitIndex, Char) -> BackwardsIter)
-> (CodeUnitIndex, Char) -> BackwardsIter
forall a b. (a -> b) -> a -> b
$ CodeUnit
-> CodeUnit -> CodeUnit -> CodeUnit -> (CodeUnitIndex, Char)
decodeN CodeUnit
cu0 (CodeUnitIndex -> CodeUnit
cuAt CodeUnitIndex
1) (CodeUnitIndex -> CodeUnit
cuAt CodeUnitIndex
2) (CodeUnitIndex -> CodeUnit
cuAt CodeUnitIndex
3)
else
let cu00 :: CodeUnit
cu00 = CodeUnitIndex -> CodeUnit
cuAt (-CodeUnitIndex
1) in
if CodeUnit -> Bool
forall {a}. (Bits a, Num a) => a -> Bool
isFirstByte CodeUnit
cu00
then CodeUnitIndex -> (CodeUnitIndex, Char) -> BackwardsIter
makeBackwardsIter (CodeUnitIndex
idx CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
2) ((CodeUnitIndex, Char) -> BackwardsIter)
-> (CodeUnitIndex, Char) -> BackwardsIter
forall a b. (a -> b) -> a -> b
$ CodeUnit
-> CodeUnit -> CodeUnit -> CodeUnit -> (CodeUnitIndex, Char)
decodeN CodeUnit
cu00 CodeUnit
cu0 (CodeUnitIndex -> CodeUnit
cuAt CodeUnitIndex
1) (CodeUnitIndex -> CodeUnit
cuAt CodeUnitIndex
2)
else
let cu000 :: CodeUnit
cu000 = CodeUnitIndex -> CodeUnit
cuAt (-CodeUnitIndex
2) in
if CodeUnit -> Bool
forall {a}. (Bits a, Num a) => a -> Bool
isFirstByte CodeUnit
cu000
then CodeUnitIndex -> (CodeUnitIndex, Char) -> BackwardsIter
makeBackwardsIter (CodeUnitIndex
idx CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
3) ((CodeUnitIndex, Char) -> BackwardsIter)
-> (CodeUnitIndex, Char) -> BackwardsIter
forall a b. (a -> b) -> a -> b
$ CodeUnit
-> CodeUnit -> CodeUnit -> CodeUnit -> (CodeUnitIndex, Char)
decodeN CodeUnit
cu000 CodeUnit
cu00 CodeUnit
cu0 (CodeUnitIndex -> CodeUnit
cuAt CodeUnitIndex
1)
else
let cu0000 :: CodeUnit
cu0000 = CodeUnitIndex -> CodeUnit
cuAt (-CodeUnitIndex
3) in
if CodeUnit -> Bool
forall {a}. (Bits a, Num a) => a -> Bool
isFirstByte CodeUnit
cu0000
then CodeUnitIndex -> (CodeUnitIndex, Char) -> BackwardsIter
makeBackwardsIter (CodeUnitIndex
idx CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
4) ((CodeUnitIndex, Char) -> BackwardsIter)
-> (CodeUnitIndex, Char) -> BackwardsIter
forall a b. (a -> b) -> a -> b
$ CodeUnit
-> CodeUnit -> CodeUnit -> CodeUnit -> (CodeUnitIndex, Char)
decodeN CodeUnit
cu0000 CodeUnit
cu000 CodeUnit
cu00 CodeUnit
cu0
else
[Char] -> BackwardsIter
forall a. HasCallStack => [Char] -> a
error [Char]
"unsafeIndexAnywhereInCodePoint' could not find valid UTF8 codepoint"
{-# INLINE unsafeIndexCodeUnit' #-}
unsafeIndexCodeUnit' :: TextArray.Array -> CodeUnitIndex -> CodeUnit
unsafeIndexCodeUnit' :: Array -> CodeUnitIndex -> CodeUnit
unsafeIndexCodeUnit' !Array
u8data (CodeUnitIndex !Int
idx) = Array -> Int -> CodeUnit
TextArray.unsafeIndex Array
u8data Int
idx