{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE BinaryLiterals #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeInType #-}
{-# LANGUAGE UnboxedSums #-}
{-# LANGUAGE UnboxedTuples #-}

{-# OPTIONS_GHC
 -Weverything
 -fno-warn-unsafe
 -fno-warn-implicit-prelude
 -O2
#-}

module Packed.Bytes.Window
  ( findByte
  , foldl'
  , reverse
  , zipAnd
  , zipOr
  , zipXor
  , equality
    -- * Characters
  , isAscii
  , isUtf8
  , findNonAscii'
  ) where

import Data.Primitive (ByteArray(ByteArray))
import Data.Word (Word8)
import GHC.Types (RuntimeRep,TYPE)
import GHC.Int (Int(I#))
import GHC.Word (Word8(W8#),Word(W#))
import GHC.Exts (Int#,Word#,ByteArray#)
import Data.Bits (xor,(.|.),(.&.),complement,unsafeShiftL)
import Control.Monad.ST (ST,runST)
import qualified Data.Primitive as PM

type Maybe# (a :: TYPE (r :: RuntimeRep)) = (# (# #) | a #)

boxMaybeInt :: Maybe# Int# -> Maybe Int
boxMaybeInt = \case
  (# | a #) -> Just (I# a)
  (# (# #) | #) -> Nothing

unboxInt :: Int -> Int#
unboxInt (I# i) = i

unboxWord :: Word -> Word#
unboxWord (W# i) = i

-- | Finds the first occurrence of the given byte.
{-# INLINE findByte #-}
findByte :: Int -> Int -> Word8 -> ByteArray -> Maybe Int
findByte (I# off) (I# len) (W8# w) (ByteArray arr) =
  boxMaybeInt (findByte' off len w arr)

{-# NOINLINE findByte' #-}
findByte' :: Int# -> Int# -> Word# -> ByteArray# -> Maybe# Int#
findByte' !off# !len0# !w0# !arr0# = 
  let !off = I# off#
      !len0 = I# len0#
      !end0 = off + len0
      !beginMachWord = alignUp off
      !endMachWord = alignDown end0
   in if len0 < PM.sizeOf (undefined :: Word)
        then go off end0
        else case go off (beginMachWord * PM.sizeOf (undefined :: Word)) of
          (# | ix #) -> (# | ix #)
          (# (# #) | #) -> case goMachWord beginMachWord endMachWord (broadcastWord8 w) of
            (# | ix #) -> (# | ix #)
            (# (# #) | #) -> case go (endMachWord * PM.sizeOf (undefined :: Word)) end0 of
              (# | ix #) -> (# | ix #)
              (# (# #) | #) -> (# (# #) | #)
  where
  !w = W8# w0#
  !arr = ByteArray arr0#
  go :: Int -> Int -> Maybe# Int#
  go !ix !end = if ix < end
    then if PM.indexByteArray arr ix == w
      then (# | unboxInt ix #)
      else go (ix + 1) end
    else (# (# #) | #)
  -- The start and end index here are given in machine Word elements,
  -- not Word8 elements.
  goMachWord :: Int -> Int -> Word -> Maybe# Int#
  goMachWord !ix !end !artifact = if ix < end
    then case detectArtifact (unsafeIndexWord arr ix) artifact of
      0 -> goMachWord (ix + 1) end artifact
      _ -> go -- this call to go should always return Just
        (ix * PM.sizeOf (undefined :: Word)) 
        ((ix + 1) * PM.sizeOf (undefined :: Word))
    else (# (# #) | #)

-- cast a Word8 index to a machine Word index, rounding up
alignUp :: Int -> Int
alignUp i =
  let !(!quotient,!remainder) = quotRem i (PM.sizeOf (undefined :: Word))
   in case remainder of
        0 -> quotient
        _ -> quotient + 1

-- cast a Word8 index to a machine Word index, rounding down
alignDown :: Int -> Int
alignDown i = quot i (PM.sizeOf (undefined :: Word))

broadcastWord8 :: Word8 -> Word
broadcastWord8 !w0 = go 8 (fromIntegral w0) where
  go :: Int -> Word -> Word
  go !n !w = if n < 8 * PM.sizeOf (undefined :: Word)
    then go (twice n) (unsafeShiftL w n .|. w)
    else w

twice :: Int -> Int
twice n = n * 2

-- returns non-zero if a null byte is present in the machine word
detectNull :: Word -> Word
detectNull x = (x - repeatHexZeroOne) .&. complement x .&. repeatHexEightZero

detectArtifact :: Word -> Word -> Word
detectArtifact x artifact = detectNull (applyArtifact x artifact)

applyArtifact :: Word -> Word -> Word
applyArtifact = xor

repeatHexZeroOne :: Word
repeatHexZeroOne = div maxBound 255

repeatHexEightZero :: Word
repeatHexEightZero = 128 * (div maxBound 255 :: Word)

foldl' :: forall a. Int -> Int -> (a -> Word8 -> a) -> a -> ByteArray -> a
foldl' !off !len f !acc0 !arr = go acc0 off where
  go :: a -> Int -> a
  go !acc !ix = if ix < off + len
    then go (f acc (PM.indexByteArray arr ix)) (ix + 1)
    else acc

-- this is only used internally
unsafeIndexWord :: ByteArray -> Int -> Word
unsafeIndexWord = PM.indexByteArray

safeIndexWord :: ByteArray -> Int -> Word
safeIndexWord arr ix = if ix < 0 || ix >= (div (PM.sizeofByteArray arr) (PM.sizeOf (undefined :: Word)))
  then error ("safeIndexWord: " ++ show ix ++ " is out of bounds")
  else PM.indexByteArray arr ix

-- this is only used internally
unsafeIndex :: ByteArray -> Int -> Word8
unsafeIndex = PM.indexByteArray

safeIndex :: ByteArray -> Int -> Word8
safeIndex arr ix = if ix < 0 || ix >= PM.sizeofByteArray arr
  then error ("safeIndex: " ++ show ix ++ " is out of bounds")
  else PM.indexByteArray arr ix

-- TODO: optimize this. We could do a whole Word64 at a
-- time if the bytearray is pinned. Maybe even if it
-- isn't pinned.
-- reverse :: Int -> Int -> ByteArray -> ByteArray
-- reverse off len arr = runST

-- | Check if the given slice of the two byte arrays
--   is equal.
equality :: 
     Int -- ^ start x
  -> Int -- ^ start y
  -> Int -- ^ length
  -> ByteArray -- ^ array x
  -> ByteArray -- ^ array y
  -> Bool
equality !ixA !ixB !len !arrA !arrB = go 0
  -- TODO: Replace this with compareByteArrays# once GHC 8.4
  -- or GHC 8.6 is released. This will be simpler and much
  -- faster.
  where
  go :: Int -> Bool
  go !ix = if ix < len
    then if safeIndex arrA (ix + ixA) == safeIndex arrB (ix + ixB)
      then go (ix + 1)
      else False
    else True

{-# INLINE zipVectorizable #-}
zipVectorizable ::
     (Word8 -> Word8 -> Word8)
  -> (Word -> Word -> Word)
  -> Int -- start x
  -> Int -- len x
  -> Int -- start y
  -> Int -- len y
  -> ByteArray -- x
  -> ByteArray -- y
  -> ByteArray -- z
zipVectorizable !combine !combineMach !startX !lenX !startY !lenY !x !y = runST action
  where
  action :: forall s. ST s ByteArray
  action = do
    let !len = min lenX lenY
    marr <- PM.newByteArray len
    let !(!quotStartX,!remStartX) = quotRem startX (PM.sizeOf (undefined :: Word))
        !(!quotStartY,!remStartY) = quotRem startY (PM.sizeOf (undefined :: Word))
        go :: Int -> Int -> ST s ()
        go !ix !end = if ix < end
          then do
            PM.writeByteArray marr ix (combine (unsafeIndex x (startX + ix)) (unsafeIndex y (startY + ix)))
            go (ix + 1) end
          else return ()
        goMach :: Int -> Int -> ST s ()
        goMach !ix !end = if ix < end
          then do
            PM.writeByteArray marr ix (combineMach (unsafeIndexWord x (quotStartX + ix)) (unsafeIndexWord y (quotStartY + ix)))
            goMach (ix + 1) end
          else return ()
    if remStartX .|. remStartY == 0 -- if they are both zero
      then do
        let !lenQuotient = quot len (PM.sizeOf (undefined :: Word))
        goMach 0 lenQuotient
        go (lenQuotient * PM.sizeOf (undefined :: Word)) len
      else go 0 len
    PM.unsafeFreezeByteArray marr

zipAnd :: Int -> Int -> Int -> Int -> ByteArray -> ByteArray -> ByteArray
zipAnd x0 xlen y0 ylen x y = zipVectorizable (.&.) (.&.) x0 xlen y0 ylen x y

zipOr :: Int -> Int -> Int -> Int -> ByteArray -> ByteArray -> ByteArray
zipOr x0 xlen y0 ylen x y = zipVectorizable (.|.) (.|.) x0 xlen y0 ylen x y

zipXor :: Int -> Int -> Int -> Int -> ByteArray -> ByteArray -> ByteArray
zipXor x0 xlen y0 ylen x y = zipVectorizable (.|.) (.|.) x0 xlen y0 ylen x y

-- this can be used to implement all predicates or any predicates
{-# INLINE boolVectorizable #-}
boolVectorizable ::
     Bool
  -> (Bool -> Bool -> Bool)
  -> (Word8 -> Bool)
  -> (Word -> Bool)
  -> Int -- start
  -> Int -- len
  -> ByteArray
  -> Bool
boolVectorizable emptyBool together predicate predicateMach !start !len !arr =
  let !end0 = start + len
      !beginMachWord = alignUp start
      !endMachWord = alignDown end0
   in if len < PM.sizeOf (undefined :: Word)
        then go start end0
        else together
          (go start (beginMachWord * PM.sizeOf (undefined :: Word)))
          (together
            (goMachWord beginMachWord endMachWord)
            (go (endMachWord * PM.sizeOf (undefined :: Word)) end0)
          )
  where
  go :: Int -> Int -> Bool
  go !ix !end = if ix < end
    then together (predicate (PM.indexByteArray arr ix)) (go (ix + 1) end)
    else emptyBool
  -- The start and end index here are given in machine Word elements,
  -- not Word8 elements.
  goMachWord :: Int -> Int -> Bool
  goMachWord !ix !end = if ix < end
    then together (predicateMach (unsafeIndexWord arr ix)) (goMachWord (ix + 1) end)
    else emptyBool

{-# INLINE findVectorizable #-}
findVectorizable ::
     (Word8 -> Bool)
  -> (Word -> Bool)
  -> Int -- start
  -> Int -- len
  -> ByteArray
  -> Maybe# Int#
findVectorizable predicate predicateMach !start !len !arr =
  let !end0 = start + len
      !beginMachWord = alignUp start
      !endMachWord = alignDown end0
   in if len < PM.sizeOf (undefined :: Word)
        then go start end0
        else case go start (beginMachWord * PM.sizeOf (undefined :: Word)) of
          (# | ix #) -> (# | ix #)
          (# (# #) | #) -> case goMachWord beginMachWord endMachWord of
            (# | ix #) -> (# | ix #)
            (# (# #) | #) -> case go (endMachWord * PM.sizeOf (undefined :: Word)) end0 of
              (# | ix #) -> (# | ix #)
              (# (# #) | #) -> (# (# #) | #)
  where
  go :: Int -> Int -> Maybe# Int#
  go !ix !end = if ix < end
    then case predicate (safeIndex arr ix) of
      False -> go (ix + 1) end
      True -> (# | unboxInt ix #)
    else (# (# #) | #)
  -- The start and end index here are given in machine Word elements,
  -- not Word8 elements.
  goMachWord :: Int -> Int -> Maybe# Int#
  goMachWord !ix !end = if ix < end
    then case predicateMach (safeIndexWord arr ix) of
      False -> goMachWord (ix + 1) end
      True -> go
        (ix * PM.sizeOf (undefined :: Word)) 
        ((ix + 1) * PM.sizeOf (undefined :: Word))
    else (# (# #) | #)

asciiMask :: Word8
asciiMask = 0x80

asciiMachMask :: Word
asciiMachMask = repeatHexEightZero

isAscii :: Int -> Int -> ByteArray -> Bool
isAscii start len arr = boolVectorizable True (&&)
  (\w -> w .&. asciiMask == 0)
  (\w -> w .&. asciiMachMask == 0)
  start len arr

-- This could be exported but it does not seem generally useful.
findNonAscii' :: Int -> Int -> ByteArray -> Maybe# Int#
findNonAscii' !start !len !arr = findVectorizable
  (\w -> w .&. asciiMask /= 0)
  (\w -> w .&. asciiMachMask /= 0)
  start len arr

-- | The meaning of the result sum elements in order:
--
--   1. An integer representing the first unparseable byte. This
--      may be up to three bytes before the actual byte where
--      the actual problem occurred. The leading byte is given
--      since that gives recovery mechanisms a reasonable place
--      to split the Bytes.
--   2. An empty nullary tuple indicating success with no leftovers
--   3. A triple of three words. The first word is the total number
--      of bytes in the multibyte word (only ever 2, 3, or 4). The
--      second is the number of additional bytes needed (only
--      ever 1, 2, or 3). The third is the fragment of the character built
--      so far. It will need to be bit shifted to the left by some
--      multiple of 6 to be completed.
--   
--   The first tuple element is 0 if everything successfully
--   parsed was acsii and a word with the high bit set to 1
--   if multi-byte characters were present.
--   It is 1 if there were surrogates, characters
--   in the range @U+D800@ to @U+DFFF@, present in the text. If
--   there are surrogates, it is implied that there are multi-byte
--   characters, since a surrogate is multi-byte by definition.
--   The value of this element is still meaningful even if the parse
--   ultimately fails.
isUtf8 :: 
     Int -- start
  -> Int -- length
  -> ByteArray -- bytes
  -> (# Word#, (# Int# | (# #) | (# Word#, Word#, Word# #) #) #)
isUtf8 !start !len !arr = case findNonAscii' start len arr of
  (# (# #) | #) -> (# 0##, (# | (# #) | #) #)
  (# | ix# #) -> case postAsciiIsUtf8 (I# ix#) (len + start - (I# ix#)) arr of
    (# hasSurrogate, (# ixFailure# | | #) #) -> if I# ix# == I# ixFailure#
      then (# 0## , (# ixFailure# | | #) #)
      else (# hasSurrogate, (# ixFailure# | | #) #)
    (# hasSurrogate, (# | (# #) | #) #) -> (# hasSurrogate, (# | (# #) | #) #)
    (# hasSurrogate, (# | | (# w1, w2, w3 #) #) #) -> (# hasSurrogate, (# | | (# w1, w2, w3 #) #) #)
-- Notes on the implementation of isUtf8 There is some careful trickery to
-- ensure that we always correctly report whether or not we encountered any
-- multi-byte characters. We initially do a fast run to get as far as we can
-- on only ascii characters. In this stage, we are able to travel a full
-- machine word at a time. After this, we switch to a slower byte-by-byte
-- UTF-8 recognition function.  If this second stage fails on the very first
-- byte it sees, we report that everything successfully parsed was ascii.
-- If it fails at any point after this, we report that multibyte characters
-- were encounter. If it succeeds, we report that we encounter multi-byte
-- characters. This is accurate because, for it to start running at all,
-- findNonAscii' must have found something that was not ascii.

-- Note that postAsciiIsUtf8 does not return anything indicating whether or not there
-- were any multi-byte characters present. This is because, if this function
-- is called, it should be called with the start index on a non-ascii-encoded
-- character. This function is not expect to perform well. It shouldn't
-- allocate memory, but it has to go byte-by-byte through the ByteArray. The
-- author of this library does not know of a way to vectorize the check
-- for UTF-8 compliance.
--
-- The first element of the response tuple is either 1 or a machine word
-- with the high bit set to 1. If it is the high bit,
-- no surrogates were present. If it is 1, surrogates were present.
-- The second element is a nested unboxed sum with three cases. These
-- are described in the docs for isUtf8.
--
postAsciiIsUtf8 :: 
     Int -- start
  -> Int -- length
  -> ByteArray -- bytes
  -> (# Word#, (# Int# | (# #) | (# Word#, Word#, Word# #) #) #)
postAsciiIsUtf8 !start !len !arr = go start binaryOneThenZeroes
  where
  !end = start + len
  go :: Int 
     -> Word
     -> (# Word#, (# Int# | (# #) | (# Word#, Word#, Word# #) #) #)
  go !ix !hasSurrogate = if ix < end
    then
      let !firstByte = safeIndex arr ix in
       if | oneByteChar firstByte -> go (ix + 1) hasSurrogate
          | twoByteChar firstByte -> if ix + 1 < end
              then if followingByte (safeIndex arr (ix + 1))
                then go (ix + 2) hasSurrogate
                else (# unboxWord hasSurrogate, (# unboxInt ix | | #) #)
              else (# unboxWord hasSurrogate, (# | | (# 2##, 1##, unboxWord (byteTwoPartialOne firstByte) #) #) #)
          | threeByteChar firstByte ->
              if | ix + 2 < end -> 
                     let !secondByte = safeIndex arr (ix + 1) in
                     if followingByte secondByte
                       then 
                         let !thirdByte = safeIndex arr (ix + 2) in
                         if followingByte thirdByte
                           then if surrogate (codepointFromThreeBytes firstByte secondByte thirdByte)
                             then go (ix + 3) 1
                             else go (ix + 3) hasSurrogate
                           else (# unboxWord hasSurrogate, (# unboxInt ix | | #) #)
                       else (# unboxWord hasSurrogate, (# unboxInt ix | | #) #)
                 | ix + 1 < end -> 
                     let !secondByte = safeIndex arr (ix + 1) in
                     if followingByte secondByte
                       then (# unboxWord hasSurrogate, (# | | (# 3##, 1##, unboxWord (byteThreePartialTwo firstByte secondByte) #) #) #)
                       else (# unboxWord hasSurrogate, (# unboxInt ix | | #) #)
                 | otherwise -> (# unboxWord hasSurrogate, (# | | (# 3##, 2##, unboxWord (byteThreePartialOne firstByte) #) #) #)
          | fourByteChar firstByte ->
              if | ix + 3 < end ->
                     let !secondByte = safeIndex arr (ix + 1) in
                     if followingByte secondByte
                       then 
                         let !thirdByte = safeIndex arr (ix + 2) in
                         if followingByte thirdByte
                           then
                             let !fourthByte = safeIndex arr (ix + 3) in
                             if followingByte fourthByte
                               then go (ix + 4) hasSurrogate
                               else (# unboxWord hasSurrogate, (# unboxInt ix | | #) #)
                           else (# unboxWord hasSurrogate, (# unboxInt ix | | #) #)
                       else (# unboxWord hasSurrogate, (# unboxInt ix | | #) #)
                 | ix + 2 < end -> 
                     let !secondByte = safeIndex arr (ix + 1) in
                     if followingByte secondByte
                       then 
                         let !thirdByte = safeIndex arr (ix + 2) in
                         if followingByte thirdByte
                           then (# unboxWord hasSurrogate, (# | | (# 4##, 1##, unboxWord (byteFourPartialThree firstByte secondByte thirdByte) #) #) #)
                           else (# unboxWord hasSurrogate, (# unboxInt ix | | #) #)
                       else (# unboxWord hasSurrogate, (# unboxInt ix | | #) #)
                 | ix + 1 < end -> 
                     let !secondByte = safeIndex arr (ix + 1) in
                     if followingByte secondByte
                       then (# unboxWord hasSurrogate, (# | | (# 4##, 2##, unboxWord (byteFourPartialTwo firstByte secondByte) #) #) #)
                       else (# unboxWord hasSurrogate, (# unboxInt ix | | #) #)
                 | otherwise -> (# unboxWord hasSurrogate, (# | | (# 4##, 3##, unboxWord (byteFourPartialOne firstByte) #) #) #)
          | otherwise -> (# unboxWord hasSurrogate, (# unboxInt ix | | #) #)
    else (# unboxWord hasSurrogate, (# | (# #) | #) #)

byteTwoPartialOne :: Word8 -> Word
byteTwoPartialOne w = word8ToWord w .&. 0b00011111

byteThreePartialTwo :: Word8 -> Word8 -> Word
byteThreePartialTwo a b =
  unsafeShiftL (word8ToWord a .&. 0b00001111) 6 .|.
  (word8ToWord b .&. 0b00111111)

byteThreePartialOne :: Word8 -> Word
byteThreePartialOne a = word8ToWord a .&. 0b00001111

byteFourPartialTwo :: Word8 -> Word8 -> Word
byteFourPartialTwo a b =
  unsafeShiftL (word8ToWord a .&. 0b00000111) 6 .|.
  (word8ToWord b .&. 0b00111111)

byteFourPartialThree :: Word8 -> Word8 -> Word8 -> Word
byteFourPartialThree a b c =
  unsafeShiftL (word8ToWord a .&. 0b00000111) 12 .|.
  unsafeShiftL (word8ToWord b .&. 0b00111111) 6 .|.
  (word8ToWord c .&. 0b00111111)

byteFourPartialOne :: Word8 -> Word
byteFourPartialOne a = word8ToWord a .&. 0b00000111

followingByte :: Word8 -> Bool
followingByte !w = xor w 0b01000000 .&. 0b11000000 == 0b11000000

oneByteChar :: Word8 -> Bool
oneByteChar !w = w .&. 0b10000000 == 0

twoByteChar :: Word8 -> Bool
twoByteChar !w = w .&. 0b11100000 == 0b11000000

threeByteChar :: Word8 -> Bool
threeByteChar !w = w .&. 0b11110000 == 0b11100000

fourByteChar :: Word8 -> Bool
fourByteChar !w = w .&. 0b11111000 == 0b11110000

codepointFromThreeBytes :: Word8 -> Word8 -> Word8 -> Word
codepointFromThreeBytes w1 w2 w3 = 
  unsafeShiftL (word8ToWord w1 .&. 0b00001111) 12 .|. 
  unsafeShiftL (word8ToWord w2 .&. 0b00111111) 6 .|. 
  (word8ToWord w3 .&. 0b00111111)

word8ToWord :: Word8 -> Word
word8ToWord = fromIntegral

surrogate :: Word -> Bool
surrogate codepoint = codepoint >= 0xD800 && codepoint < 0xE000

binaryOneThenZeroes :: Word
binaryOneThenZeroes = maxBound - div (maxBound :: Word) 2