module Text.PariPari.Decode (
bytesEqual
, byteAt
, utf8Decode
, utf8DecodeFixed
, utf8Width
) where
import Data.Word (Word8)
import Data.Bits (unsafeShiftL, (.|.), (.&.))
import Foreign.ForeignPtr (ForeignPtr, withForeignPtr)
import Foreign.Ptr (plusPtr)
import Foreign.Storable (peekByteOff)
import GHC.Base (unsafeChr)
import qualified Data.ByteString.Internal as B
bytesEqual :: ForeignPtr Word8 -> Int -> ForeignPtr Word8 -> Int -> Int -> Bool
bytesEqual p1 i1 p2 i2 n =
B.accursedUnutterablePerformIO $
withForeignPtr p1 $ \q1 ->
withForeignPtr p2 $ \q2 ->
(== 0) <$> B.memcmp (q1 `plusPtr` i1) (q2 `plusPtr` i2) n
{-# INLINE bytesEqual #-}
byteAt :: ForeignPtr Word8 -> Int -> Word8
byteAt p i = B.accursedUnutterablePerformIO $
withForeignPtr p $ \q -> peekByteOff q i
{-# INLINE byteAt #-}
at :: ForeignPtr Word8 -> Int -> Int
at p i = fromIntegral $ byteAt p i
{-# INLINE at #-}
utf8Decode :: ForeignPtr Word8 -> Int -> (Char, Int)
utf8Decode p i
| a1 <- at p i,
a1 <= 0x7F =
(unsafeChr a1, 1)
| a1 <- at p i, a2 <- at p (i + 1),
(a1 .&. 0xE0) == 0xC0,
(a2 .&. 0xC0) == 0x80 =
(unsafeChr (((a1 .&. 31) `unsafeShiftL` 6)
.|. (a2 .&. 0x3F)), 2)
| a1 <- at p i, a2 <- at p (i + 1), a3 <- at p (i + 2),
(a1 .&. 0xF0) == 0xE0,
(a2 .&. 0xC0) == 0x80,
(a3 .&. 0xC0) == 0x80 =
(unsafeChr (((a1 .&. 15) `unsafeShiftL` 12)
.|. ((a2 .&. 0x3F) `unsafeShiftL` 6)
.|. (a3 .&. 0x3F)), 3)
| a1 <- at p i, a2 <- at p (i + 1), a3 <- at p (i + 2), a4 <- at p (i + 3),
(a1 .&. 0xF8) == 0xF0,
(a2 .&. 0xC0) == 0x80,
(a3 .&. 0xC0) == 0x80,
(a4 .&. 0xC0) == 0x80 =
(unsafeChr (((a1 .&. 7) `unsafeShiftL` 18)
.|. ((a2 .&. 0x3F) `unsafeShiftL` 12)
.|. ((a3 .&. 0x3F) `unsafeShiftL` 6)
.|. (a4 .&. 0x3F)), 4)
| otherwise = ('\0', 0)
{-# INLINE utf8Decode #-}
utf8DecodeFixed :: Int -> ForeignPtr Word8 -> Int -> Char
utf8DecodeFixed w p i = unsafeChr $
case w of
1 -> at p i
2 | a1 <- at p i, a2 <- at p (i + 1) ->
((a1 .&. 31) `unsafeShiftL` 6)
.|. (a2 .&. 0x3F)
3 | a1 <- at p i, a2 <- at p (i + 1), a3 <- at p (i + 2) ->
((a1 .&. 15) `unsafeShiftL` 12)
.|. ((a2 .&. 0x3F) `unsafeShiftL` 6)
.|. (a3 .&. 0x3F)
4 | a1 <- at p i, a2 <- at p (i + 1), a3 <- at p (i + 2), a4 <- at p (i + 3) ->
((a1 .&. 7) `unsafeShiftL` 18)
.|. ((a2 .&. 0x3F) `unsafeShiftL` 12)
.|. ((a3 .&. 0x3F) `unsafeShiftL` 6)
.|. (a4 .&. 0x3F)
_ -> 0
{-# INLINE utf8DecodeFixed #-}
utf8Width :: Char -> Int
utf8Width c | c <= unsafeChr 0x7F = 1
| c <= unsafeChr 0x7FF = 2
| c <= unsafeChr 0xFFFF = 3
| otherwise = 4
{-# INLINE utf8Width #-}