module Foundation.Primitive.UTF8.Addr
( Immutable
, Mutable
, nextAscii
, nextAsciiDigit
, expectAscii
, next
, prev
, prevSkip
, write
, toList
, all
, any
, foldr
, length
, primIndex
, primIndex64
, primRead
, primWrite
) where
import GHC.Int
import GHC.Types
import GHC.Word
import GHC.Prim
import Data.Bits
import Foundation.Internal.Base hiding (toList)
import Foundation.Internal.Primitive
import Foundation.Internal.Proxy
import Foundation.Numerical
import Foundation.Primitive.Types.OffsetSize
import Foundation.Primitive.Monad
import Foundation.Primitive.Types
import Foundation.Primitive.UTF8.Helper
import Foundation.Primitive.UTF8.Table
import Foundation.Primitive.UTF8.Types
type Immutable = Addr#
type Mutable (prim :: * -> *) = Addr#
primWrite :: PrimMonad prim => Mutable prim -> Offset Word8 -> Word8 -> prim ()
primWrite = primAddrWrite
primRead :: PrimMonad prim => Mutable prim -> Offset Word8 -> prim Word8
primRead = primAddrRead
primIndex :: Immutable -> Offset Word8 -> Word8
primIndex = primAddrIndex
primIndex64 :: Immutable -> Offset Word64 -> Word64
primIndex64 = primAddrIndex
nextAscii :: Immutable -> Offset Word8 -> StepASCII
nextAscii ba n = StepASCII w
where
!w = primIndex ba n
nextAsciiDigit :: Immutable -> Offset Word8 -> StepDigit
nextAsciiDigit ba n = StepDigit (primIndex ba n 0x30)
expectAscii :: Immutable -> Offset Word8 -> Word8 -> Bool
expectAscii ba n v = primIndex ba n == v
next :: Immutable -> Offset8 -> Step
next ba n =
case getNbBytes h of
0 -> Step (toChar1 h) (n + Offset 1)
1 -> Step (toChar2 h (primIndex ba (n + Offset 1))) (n + Offset 2)
2 -> Step (toChar3 h (primIndex ba (n + Offset 1))
(primIndex ba (n + Offset 2))) (n + Offset 3)
3 -> Step (toChar4 h (primIndex ba (n + Offset 1))
(primIndex ba (n + Offset 2))
(primIndex ba (n + Offset 3))) (n + Offset 4)
r -> error ("next: internal error: invalid input: offset=" <> show n <> " table=" <> show r <> " h=" <> show h)
where
!h = primIndex ba n
prev :: Immutable -> Offset Word8 -> StepBack
prev ba offset =
case primIndex ba prevOfs1 of
(W8# v1) | isContinuation# v1 -> atLeast2 (maskContinuation# v1)
| otherwise -> StepBack (toChar# v1) prevOfs1
where
sz1 = CountOf 1
!prevOfs1 = offset `offsetMinusE` sz1
prevOfs2 = prevOfs1 `offsetMinusE` sz1
prevOfs3 = prevOfs2 `offsetMinusE` sz1
prevOfs4 = prevOfs3 `offsetMinusE` sz1
atLeast2 !v =
case primIndex ba prevOfs2 of
(W8# v2) | isContinuation# v2 -> atLeast3 (or# (uncheckedShiftL# (maskContinuation# v2) 6#) v)
| otherwise -> StepBack (toChar# (or# (uncheckedShiftL# (maskHeader2# v2) 6#) v)) prevOfs2
atLeast3 !v =
case primIndex ba prevOfs3 of
(W8# v3) | isContinuation# v3 -> atLeast4 (or# (uncheckedShiftL# (maskContinuation# v3) 12#) v)
| otherwise -> StepBack (toChar# (or# (uncheckedShiftL# (maskHeader3# v3) 12#) v)) prevOfs3
atLeast4 !v =
case primIndex ba prevOfs4 of
(W8# v4) -> StepBack (toChar# (or# (uncheckedShiftL# (maskHeader4# v4) 18#) v)) prevOfs4
prevSkip :: Immutable -> Offset Word8 -> Offset Word8
prevSkip ba offset = loop (offset `offsetMinusE` sz1)
where
sz1 = CountOf 1
loop o
| isContinuation (primIndex ba o) = loop (o `offsetMinusE` sz1)
| otherwise = o
write :: PrimMonad prim => Mutable prim -> Offset8 -> Char -> prim Offset8
write mba !i !c
| bool# (ltWord# x 0x80## ) = encode1
| bool# (ltWord# x 0x800## ) = encode2
| bool# (ltWord# x 0x10000##) = encode3
| otherwise = encode4
where
!(I# xi) = fromEnum c
!x = int2Word# xi
encode1 = primWrite mba i (W8# x) >> pure (i + Offset 1)
encode2 = do
let x1 = or# (uncheckedShiftRL# x 6#) 0xc0##
x2 = toContinuation x
primWrite mba i (W8# x1)
primWrite mba (i+1) (W8# x2)
pure (i + Offset 2)
encode3 = do
let x1 = or# (uncheckedShiftRL# x 12#) 0xe0##
x2 = toContinuation (uncheckedShiftRL# x 6#)
x3 = toContinuation x
primWrite mba i (W8# x1)
primWrite mba (i+Offset 1) (W8# x2)
primWrite mba (i+Offset 2) (W8# x3)
pure (i + Offset 3)
encode4 = do
let x1 = or# (uncheckedShiftRL# x 18#) 0xf0##
x2 = toContinuation (uncheckedShiftRL# x 12#)
x3 = toContinuation (uncheckedShiftRL# x 6#)
x4 = toContinuation x
primWrite mba i (W8# x1)
primWrite mba (i+Offset 1) (W8# x2)
primWrite mba (i+Offset 2) (W8# x3)
primWrite mba (i+Offset 3) (W8# x4)
pure (i + Offset 4)
toContinuation :: Word# -> Word#
toContinuation w = or# (and# w 0x3f##) 0x80##
toList :: Immutable -> Offset Word8 -> Offset Word8 -> [Char]
toList ba !start !end = loop start
where
loop !idx
| idx == end = []
| otherwise = c : loop idx'
where (Step c idx') = next ba idx
all :: (Char -> Bool) -> Immutable -> Offset Word8 -> Offset Word8 -> Bool
all predicate ba start end = loop start
where
loop !idx
| idx == end = True
| predicate c = loop idx'
| otherwise = False
where (Step c idx') = next ba idx
any :: (Char -> Bool) -> Immutable -> Offset Word8 -> Offset Word8 -> Bool
any predicate ba start end = loop start
where
loop !idx
| idx == end = False
| predicate c = True
| otherwise = loop idx'
where (Step c idx') = next ba idx
foldr :: Immutable -> Offset Word8 -> Offset Word8 -> (Char -> a -> a) -> a -> a
foldr dat start end f acc = loop start
where
loop !i
| i == end = acc
| otherwise =
let (Step c i') = next dat i
in c `f` loop i'
length :: Immutable -> Offset Word8 -> Offset Word8 -> CountOf Char
length dat start end
| start == end = 0
| otherwise = processStart 0 start
where
end64 :: Offset Word64
end64 = offsetInElements end
prx64 :: Proxy Word64
prx64 = Proxy
mask64_80 :: Word64
mask64_80 = 0x8080808080808080
processStart :: CountOf Char -> Offset Word8 -> CountOf Char
processStart !c !i
| i == end = c
| offsetIsAligned prx64 i = processAligned c (offsetInElements i)
| otherwise =
let h = primIndex dat i
cont = (h .&. 0xc0) == 0x80
c' = if cont then c else c+1
in processStart c' (i+1)
processAligned :: CountOf Char -> Offset Word64 -> CountOf Char
processAligned !c !i
| i >= end64 = processEnd c (offsetInBytes i)
| otherwise =
let !h = primIndex64 dat i
!h80 = h .&. mask64_80
in if h80 == 0
then processAligned (c+8) (i+1)
else let !nbAscii = if h80 == mask64_80 then 0 else CountOf (8 popCount h80)
!nbHigh = CountOf $ popCount (h .&. (h80 `unsafeShiftR` 1))
in processAligned (c + nbAscii + nbHigh) (i+1)
processEnd !c !i
| i == end = c
| otherwise =
let h = primIndex dat i
cont = (h .&. 0xc0) == 0x80
c' = if cont then c else c+1
in processStart c' (i+1)