module Text.PariPari.Decode (
  bytesEqual
  , byteAt
  , utf8Decode
  , utf8DecodeFixed
  , utf8Width
) where

import Data.Word (Word8)
import Data.Bits (unsafeShiftL, (.|.), (.&.))
import Foreign.ForeignPtr (ForeignPtr, withForeignPtr)
import Foreign.Ptr (plusPtr)
import Foreign.Storable (peekByteOff)
import GHC.Base (unsafeChr)
import qualified Data.ByteString.Internal as B

bytesEqual :: ForeignPtr Word8 -> Int -> ForeignPtr Word8 -> Int -> Int -> Bool
bytesEqual p1 i1 p2 i2 n =
  B.accursedUnutterablePerformIO $
  withForeignPtr p1 $ \q1 ->
  withForeignPtr p2 $ \q2 ->
  (== 0) <$> B.memcmp (q1 `plusPtr` i1) (q2 `plusPtr` i2) n
{-# INLINE bytesEqual #-}

byteAt :: ForeignPtr Word8 -> Int -> Word8
byteAt p i = B.accursedUnutterablePerformIO $
  withForeignPtr p $ \q -> peekByteOff q i
{-# INLINE byteAt #-}

at :: ForeignPtr Word8 -> Int -> Int
at p i = fromIntegral $ byteAt p i
{-# INLINE at #-}

-- | Decode UTF-8 character at the given offset relative to the pointer
utf8Decode :: ForeignPtr Word8 -> Int -> (Char, Int)
utf8Decode p i
  | a1 <- at p i,
    a1 <= 0x7F =
    (unsafeChr a1, 1)
  | a1 <- at p i, a2 <- at p (i + 1),
    (a1 .&. 0xE0) == 0xC0,
    (a2 .&. 0xC0) == 0x80 =
    (unsafeChr (((a1 .&. 31) `unsafeShiftL` 6)
                .|. (a2 .&. 0x3F)), 2)
  | a1 <- at p i, a2 <- at p (i + 1), a3 <- at p (i + 2),
    (a1 .&. 0xF0) == 0xE0,
    (a2 .&. 0xC0) == 0x80,
    (a3 .&. 0xC0) == 0x80 =
    (unsafeChr (((a1 .&. 15) `unsafeShiftL` 12)
                 .|. ((a2 .&. 0x3F) `unsafeShiftL` 6)
                 .|. (a3 .&. 0x3F)), 3)
  | a1 <- at p i, a2 <- at p (i + 1), a3 <- at p (i + 2), a4 <- at p (i + 3),
    (a1 .&. 0xF8) == 0xF0,
    (a2 .&. 0xC0) == 0x80,
    (a3 .&. 0xC0) == 0x80,
    (a4 .&. 0xC0) == 0x80 =
    (unsafeChr (((a1 .&. 7) `unsafeShiftL` 18)
                 .|. ((a2 .&. 0x3F) `unsafeShiftL` 12)
                 .|. ((a3 .&. 0x3F) `unsafeShiftL` 6)
                 .|. (a4 .&. 0x3F)), 4)
  | otherwise = ('\0', 0)
{-# INLINE utf8Decode #-}

-- | Decode UTF-8 character with known width at the given offset relative to the pointer
utf8DecodeFixed :: Int -> ForeignPtr Word8 -> Int -> Char
utf8DecodeFixed w p i = unsafeChr $
  case w of
    1 -> at p i
    2 | a1 <- at p i, a2 <- at p (i + 1) ->
        ((a1 .&. 31) `unsafeShiftL` 6)
        .|. (a2 .&. 0x3F)
    3 | a1 <- at p i, a2 <- at p (i + 1), a3 <- at p (i + 2) ->
        ((a1 .&. 15) `unsafeShiftL` 12)
        .|. ((a2 .&. 0x3F) `unsafeShiftL` 6)
        .|. (a3 .&. 0x3F)
    4 | a1 <- at p i, a2 <- at p (i + 1), a3 <- at p (i + 2), a4 <- at p (i + 3) ->
        ((a1 .&. 7) `unsafeShiftL` 18)
        .|. ((a2 .&. 0x3F) `unsafeShiftL` 12)
        .|. ((a3 .&. 0x3F) `unsafeShiftL` 6)
        .|. (a4 .&. 0x3F)
    _ -> 0
{-# INLINE utf8DecodeFixed #-}

-- | Bytes width of an UTF-8 character
utf8Width :: Char -> Int
utf8Width c | c <= unsafeChr 0x7F = 1
            | c <= unsafeChr 0x7FF = 2
            | c <= unsafeChr 0xFFFF = 3
            | otherwise = 4
{-# INLINE utf8Width #-}