{-# language BangPatterns #-}
{-# language BinaryLiterals #-}
{-# language DataKinds #-}
{-# language DeriveFunctor #-}
{-# language DerivingStrategies #-}
{-# language GADTSyntax #-}
{-# language KindSignatures #-}
{-# language LambdaCase #-}
{-# language MagicHash #-}
{-# language MultiWayIf #-}
{-# language PolyKinds #-}
{-# language RankNTypes #-}
{-# language ScopedTypeVariables #-}
{-# language StandaloneDeriving #-}
{-# language TypeApplications #-}
{-# language UnboxedSums #-}
{-# language UnboxedTuples #-}

-- | Parse input as though it were text encoded by
-- ISO 8859-1 (Latin-1). All byte sequences are valid
-- text under ISO 8859-1.
module Data.Bytes.Parser.Latin
  ( -- * Matching
    -- ** Required
    char
  , char2
  , char3
  , char4
    -- ** Try
  , trySatisfy
  , trySatisfyThen
    -- * Get Character
  , any
  , opt
  , opt#
    -- * Skip
  , skipDigits
  , skipDigits1
  , skipChar
  , skipChar1
  , skipTrailedBy
    -- * Numbers
    -- ** Decimal
    -- *** Unsigned
  , decWord
  , decWord8
  , decWord16
  , decWord32
    -- *** Signed
  , decUnsignedInt
  , decUnsignedInt#
  , decSignedInt
  , decStandardInt
  , decTrailingInt
  , decTrailingInt#
  , decSignedInteger
  , decUnsignedInteger
  , decTrailingInteger
    -- ** Hexadecimal
  , hexWord16
  ) where

import Prelude hiding (length,any,fail,takeWhile)

import Data.Bits ((.|.))
import Data.Bytes.Types (Bytes(..))
import Data.Bytes.Parser.Internal (Parser(..),ST#,uneffectful,Result#,uneffectful#)
import Data.Bytes.Parser.Internal (InternalResult(..),indexLatinCharArray,upcastUnitSuccess)
import Data.Bytes.Parser.Internal (boxBytes)
import Data.Bytes.Parser (bindFromLiftedToInt)
import Data.Word (Word8)
import Data.Char (ord)
import Data.Kind (Type)
import GHC.Exts (Int(I#),Char(C#),Word#,Int#,Char#,(+#),(-#),indexCharArray#)
import GHC.Exts (TYPE,RuntimeRep)
import GHC.Word (Word(W#),Word8(W8#),Word16(W16#),Word32(W32#))

import qualified GHC.Exts as Exts
import qualified Data.Bytes as Bytes
import qualified Data.Primitive as PM

-- | Runs the predicate on the next character in the input. If the
-- predicate is matched, this consumes the character. Otherwise,
-- the character is not consumed. This returns @False@ if the end
-- of the input has been reached. This never fails.
trySatisfy :: (Char -> Bool) -> Parser e s Bool
trySatisfy f = uneffectful $ \chunk -> case length chunk of
  0 -> InternalSuccess False (offset chunk) (length chunk)
  _ -> case f (indexLatinCharArray (array chunk) (offset chunk)) of
    True -> InternalSuccess True (offset chunk + 1) (length chunk - 1)
    False -> InternalSuccess False (offset chunk) (length chunk)

-- | Runs the function on the next character in the input. If the
-- function returns @Just@, this consumes the character and then
-- runs the parser on the remaining input. If the function returns
-- @Nothing@, this does not consume the tested character, and it
-- runs the default parser on the input (which includes the tested
-- character). If there is no input remaining, this also runs the
-- default parser. This combinator never fails.
trySatisfyThen :: forall (r :: RuntimeRep) (e :: Type) (s :: Type) (a :: TYPE r).
     Parser e s a -- ^ Default parser. Runs on @Nothing@ or end of input.
  -> (Char -> Maybe (Parser e s a)) -- ^ Parser-selecting predicate
  -> Parser e s a
{-# inline trySatisfyThen #-}
trySatisfyThen (Parser g) f = Parser
  (\input@(# arr,off0,len0 #) s0 -> case len0 of
    0# -> g input s0
    _ -> case f (C# (indexCharArray# arr off0)) of
      Nothing -> g input s0
      Just (Parser p) -> p (# arr, off0 +# 1#, len0 -# 1# #) s0
  )

-- | Consume the next character, failing if it does not
-- match the expected value or if there is no more input.
char :: e -> Char -> Parser e s ()
-- GHC should decide to inline this after optimization.
char e !c = uneffectful $ \chunk -> if length chunk > 0
  then if indexLatinCharArray (array chunk) (offset chunk) == c
    then InternalSuccess () (offset chunk + 1) (length chunk - 1)
    else InternalFailure e
  else InternalFailure e

-- | Consume the next two characters, failing if they do
-- not match they expected values.
--
-- > char2 e a b === char e a *> char e b
char2 :: e -> Char -> Char -> Parser e s ()
-- GHC should decide to inline this after optimization.
char2 e !c0 !c1 = uneffectful $ \chunk ->
  if | length chunk > 1
     , indexLatinCharArray (array chunk) (offset chunk) == c0
     , indexLatinCharArray (array chunk) (offset chunk + 1) == c1
         -> InternalSuccess () (offset chunk + 2) (length chunk - 2)
     | otherwise -> InternalFailure e

-- | Consume the three characters, failing if they do
-- not match they expected values.
--
-- > char3 e a b c === char e a *> char e b *> char e c
char3 :: e -> Char -> Char -> Char -> Parser e s ()
-- GHC should decide to inline this after optimization.
char3 e !c0 !c1 !c2 = uneffectful $ \chunk ->
  if | length chunk > 2
     , indexLatinCharArray (array chunk) (offset chunk) == c0
     , indexLatinCharArray (array chunk) (offset chunk + 1) == c1
     , indexLatinCharArray (array chunk) (offset chunk + 2) == c2
         -> InternalSuccess () (offset chunk + 3) (length chunk - 3)
     | otherwise -> InternalFailure e

-- | Consume the four characters, failing if they do
-- not match they expected values.
--
-- > char4 e a b c d === char e a *> char e b *> char e c *> char e d
char4 :: e -> Char -> Char -> Char -> Char -> Parser e s ()
-- GHC should decide to inline this after optimization.
char4 e !c0 !c1 !c2 !c3 = uneffectful $ \chunk ->
  if | length chunk > 3
     , indexLatinCharArray (array chunk) (offset chunk) == c0
     , indexLatinCharArray (array chunk) (offset chunk + 1) == c1
     , indexLatinCharArray (array chunk) (offset chunk + 2) == c2
     , indexLatinCharArray (array chunk) (offset chunk + 3) == c3
         -> InternalSuccess () (offset chunk + 4) (length chunk - 4)
     | otherwise -> InternalFailure e

-- | Consumes and returns the next character in the input.
any :: e -> Parser e s Char
any e = uneffectful $ \chunk -> if length chunk > 0
  then
    let c = indexLatinCharArray (array chunk) (offset chunk)
     in InternalSuccess c (offset chunk + 1) (length chunk - 1)
  else InternalFailure e

-- | Consume a character from the input or return Nothing if
-- end of the stream has been reached. Since ISO 8859-1 maps every
-- bytes to a character, this parser never fails.
opt :: Parser e s (Maybe Char)
opt = uneffectful $ \chunk -> case length chunk of
  0 -> InternalSuccess Nothing (offset chunk) (length chunk)
  _ -> InternalSuccess
    (Just (indexLatinCharArray (array chunk) (offset chunk)))
    (offset chunk + 1) (length chunk - 1)

-- | Variant of @opt@ with unboxed result.
opt# :: Parser e s (# (# #) | Char# #)
{-# inline opt# #-}
opt# = Parser
  (\(# arr, off, len #) s0 -> case len of
    0# -> (# s0, (# | (# (# (# #) | #), off, len #) #) #)
    _ -> (# s0, (# | (# (# | indexCharArray# arr off #), off +# 1#, len -# 1# #) #) #)
  )

skipDigitsAsciiLoop ::
     Bytes -- Chunk
  -> (# Int#, Int# #)
skipDigitsAsciiLoop !c = if length c > 0
  then
    let w = indexLatinCharArray (array c) (offset c)
     in if w >= '0' && w <= '9'
          then skipDigitsAsciiLoop (Bytes.unsafeDrop 1 c)
          else (# unI (offset c), unI (length c) #)
  else (# unI (offset c), unI (length c) #)

skipDigitsAscii1LoopStart ::
     e
  -> Bytes -- chunk
  -> Result# e ()
skipDigitsAscii1LoopStart e !c = if length c > 0
  then 
    let w = indexLatinCharArray (array c) (offset c)
     in if w >= '0' && w <= '9'
          then upcastUnitSuccess (skipDigitsAsciiLoop (Bytes.unsafeDrop 1 c))
          else (# e | #)
  else (# e | #)

-- | Variant of 'skipDigits' that requires at least one digit
-- to be present.
skipDigits1 :: e -> Parser e s ()
skipDigits1 e = uneffectful# $ \c ->
  skipDigitsAscii1LoopStart e c

-- | Skip the characters @0-9@ until a non-digit is encountered.
-- This parser does not fail.
skipDigits :: Parser e s ()
skipDigits = uneffectful# $ \c ->
  upcastUnitSuccess (skipDigitsAsciiLoop c)

unI :: Int -> Int#
unI (I# w) = w

-- | Skip the character any number of times. This succeeds
-- even if the character was not present.
skipChar :: Char -> Parser e s ()
skipChar !w = uneffectful# $ \c ->
  upcastUnitSuccess (skipLoop w c)

-- | Skip the character any number of times. It must occur
-- at least once or else this will fail.
skipChar1 :: e -> Char -> Parser e s ()
skipChar1 e !w = uneffectful# $ \c ->
  skipLoop1Start e w c

skipLoop ::
     Char -- byte to match
  -> Bytes -- Chunk
  -> (# Int#, Int# #)
skipLoop !w !c = if length c > 0
  then if indexLatinCharArray (array c) (offset c) == w
    then skipLoop w (Bytes.unsafeDrop 1 c)
    else (# unI (offset c), unI (length c) #)
  else (# unI (offset c), unI (length c) #)

skipLoop1Start ::
     e
  -> Char -- byte to match
  -> Bytes -- chunk
  -> Result# e ()
skipLoop1Start e !w !chunk0 = if length chunk0 > 0
  then if indexLatinCharArray (array chunk0) (offset chunk0) == w
    then upcastUnitSuccess (skipLoop w (Bytes.unsafeDrop 1 chunk0))
    else (# e | #)
  else (# e | #)

-- | Parse a decimal-encoded 8-bit word. If the number is larger
-- than 255, this parser fails.
decWord8 :: e -> Parser e s Word8
decWord8 e = Parser
  (\chunk0 s0 -> case decSmallWordStart e 256 (boxBytes chunk0) s0 of
    (# s1, r #) -> (# s1, upcastWord8Result r #)
  )

-- | Parse a decimal-encoded 16-bit word. If the number is larger
-- than 65535, this parser fails.
decWord16 :: e -> Parser e s Word16
decWord16 e = Parser
  (\chunk0 s0 -> case decSmallWordStart e 65536 (boxBytes chunk0) s0 of
    (# s1, r #) -> (# s1, upcastWord16Result r #)
  )

-- | Parse a decimal-encoded 32-bit word. If the number is larger
-- than 4294967295, this parser fails.
decWord32 :: e -> Parser e s Word32
-- This will not work on 32-bit platforms.
decWord32 e = Parser
  (\chunk0 s0 -> case decSmallWordStart e 4294967296 (boxBytes chunk0) s0 of
    (# s1, r #) -> (# s1, upcastWord32Result r #)
  )

-- | Parse a decimal-encoded number. If the number is too large to be
-- represented by a machine word, this fails with the provided
-- error message. This accepts any number of leading zeroes.
decWord :: e -> Parser e s Word
decWord e = Parser
  (\chunk0 s0 -> case decWordStart e (boxBytes chunk0) s0 of
    (# s1, r #) -> (# s1, upcastWordResult r #)
  )

decSmallWordStart ::
     e -- Error message
  -> Word -- Upper Bound
  -> Bytes -- Chunk
  -> ST# s (Result# e Word# )
decSmallWordStart e !limit !chunk0 s0 = if length chunk0 > 0
  then
    let !w = fromIntegral @Word8 @Word
          (PM.indexByteArray (array chunk0) (offset chunk0)) - 48
     in if w < 10
          then (# s0, decSmallWordMore e w limit (Bytes.unsafeDrop 1 chunk0) #)
          else (# s0, (# e | #) #)
  else (# s0, (# e | #) #)

-- This will not inline since it is recursive, but worker
-- wrapper will still happen.
decWordMore ::
     e -- Error message
  -> Word -- Accumulator
  -> Bytes -- Chunk
  -> Result# e Word#
decWordMore e !acc !chunk0 = if length chunk0 > 0
  then
    let !w = fromIntegral @Word8 @Word
          (PM.indexByteArray (array chunk0) (offset chunk0)) - 48
        !acc' = acc * 10 + w
     in if w < 10 && acc' >= acc
          then decWordMore e acc' (Bytes.unsafeDrop 1 chunk0)
          else (# | (# unW acc, unI (offset chunk0), unI (length chunk0)  #) #)
  else (# | (# unW acc, unI (offset chunk0), 0# #) #)


upcastWordResult :: Result# e Word# -> Result# e Word
upcastWordResult (# e | #) = (# e | #)
upcastWordResult (# | (# a, b, c #) #) = (# | (# W# a, b, c #) #)

decSmallWordMore ::
     e -- Error message
  -> Word -- Accumulator
  -> Word -- Upper Bound
  -> Bytes -- Chunk
  -> Result# e Word#
decSmallWordMore e !acc !limit !chunk0 = if length chunk0 > 0
  then
    let !w = fromIntegral @Word8 @Word
          (PM.indexByteArray (array chunk0) (offset chunk0)) - 48
     in if w < 10
          then
            let w' = acc * 10 + w
             in if w' < limit
                  then decSmallWordMore e w' limit (Bytes.unsafeDrop 1 chunk0)
                  else (# e | #)
          else (# | (# unW acc, unI (offset chunk0), unI (length chunk0)  #) #)
  else (# | (# unW acc, unI (offset chunk0), 0# #) #)

unW :: Word -> Word#
unW (W# w) = w

decWordStart ::
     e -- Error message
  -> Bytes -- Chunk
  -> ST# s (Result# e Word# )
decWordStart e !chunk0 s0 = if length chunk0 > 0
  then
    let !w = fromIntegral @Word8 @Word
          (PM.indexByteArray (array chunk0) (offset chunk0)) - 48
     in if w < 10
          then (# s0, decWordMore e w (Bytes.unsafeDrop 1 chunk0) #)
          else (# s0, (# e | #) #)
  else (# s0, (# e | #) #)

-- Precondition: the word is small enough
upcastWord16Result :: Result# e Word# -> Result# e Word16
upcastWord16Result (# e | #) = (# e | #)
upcastWord16Result (# | (# a, b, c #) #) = (# | (# W16# a, b, c #) #)

-- Precondition: the word is small enough
upcastWord32Result :: Result# e Word# -> Result# e Word32
upcastWord32Result (# e | #) = (# e | #)
upcastWord32Result (# | (# a, b, c #) #) = (# | (# W32# a, b, c #) #)

-- Precondition: the word is small enough
upcastWord8Result :: Result# e Word# -> Result# e Word8
upcastWord8Result (# e | #) = (# e | #)
upcastWord8Result (# | (# a, b, c #) #) = (# | (# W8# a, b, c #) #)

-- | Parse a decimal-encoded number. If the number is too large to be
-- represented by a machine integer, this fails with the provided
-- error message. This rejects input with that is preceeded by plus
-- or minus. Consequently, it does not parse negative numbers. Use
-- 'decStandardInt' or 'decSignedInt' for that purpose. On a 64-bit
-- platform 'decWord' will successfully parse 9223372036854775808
-- (i.e. @2 ^ 63@), but 'decUnsignedInt' will fail. This parser allows
-- leading zeroes.
decUnsignedInt :: e -> Parser e s Int
decUnsignedInt e = Parser
  (\chunk0 s0 -> case decPosIntStart e (boxBytes chunk0) s0 of
    (# s1, r #) -> (# s1, upcastIntResult r #)
  )

-- | Variant of 'decUnsignedInt' with an unboxed result.
decUnsignedInt# :: e -> Parser e s Int#
decUnsignedInt# e = Parser
  (\chunk0 s0 -> decPosIntStart e (boxBytes chunk0) s0)

-- | Parse a decimal-encoded number. If the number is too large to be
-- represented by a machine integer, this fails with the provided
-- error message. This allows the number to optionally be prefixed
-- by plus or minus. If the sign prefix is not present, the number
-- is interpreted as positive. This allows leading zeroes.
decSignedInt :: e -> Parser e s Int
decSignedInt e = Parser
  (\chunk0 s0 -> case runParser (decSignedInt# e) chunk0 s0 of
    (# s1, r #) -> (# s1, upcastIntResult r #)
  )

-- | Variant of 'decUnsignedInt' that lets the caller supply a leading
-- digit. This is useful when parsing formats like JSON where integers with
-- leading zeroes are considered invalid. The calling context must
-- consume the first digit before calling this parser. Results are
-- always positive numbers.
decTrailingInt ::
     e -- ^ Error message
  -> Int -- ^ Leading digit, should be between @0@ and @9@.
  -> Parser e s Int
decTrailingInt e (I# w) = Parser
  (\chunk0 s0 -> case runParser (decTrailingInt# e w) chunk0 s0 of
    (# s1, r #) -> (# s1, upcastIntResult r #)
  )

decTrailingInt# ::
     e -- Error message
  -> Int# -- Leading digit, should be between @0@ and @9@.
  -> Parser e s Int#
decTrailingInt# e !w =
  Parser (\chunk0 s0 -> (# s0, decPosIntMore e (I# w) (boxBytes chunk0) #))

-- | Parse a decimal-encoded number. If the number is too large to be
-- represented by a machine integer, this fails with the provided
-- error message. This allows the number to optionally be prefixed
-- by minus. If the minus prefix is not present, the number
-- is interpreted as positive. The disallows a leading plus sign.
-- For example, 'decStandardInt' rejects @+42@, but 'decSignedInt'
-- allows it.
decStandardInt :: e -> Parser e s Int
decStandardInt e = Parser
  (\chunk0 s0 -> case runParser (decStandardInt# e) chunk0 s0 of
    (# s1, r #) -> (# s1, upcastIntResult r #)
  )

decSignedInt# :: e -> Parser e s Int#
{-# noinline decSignedInt# #-}
decSignedInt# e = any e `bindFromLiftedToInt` \c -> case c of
  '+' -> Parser -- plus sign
    (\chunk0 s0 -> decPosIntStart e (boxBytes chunk0) s0)
  '-' -> Parser -- minus sign
    (\chunk0 s0 -> decNegIntStart e (boxBytes chunk0) s0)
  _ -> Parser -- no sign, there should be a digit here 
    (\chunk0 s0 ->
      let !w = char2Word c - 48
        in if w < 10
             then (# s0, decPosIntMore e (fromIntegral @Word @Int w) (boxBytes chunk0) #)
             else (# s0, (# e | #) #)
    )

-- This is the same as decSignedInt except that we disallow
-- a leading plus sign.
decStandardInt# :: e -> Parser e s Int#
{-# noinline decStandardInt# #-}
decStandardInt# e = any e `bindFromLiftedToInt` \c -> case c of
  '-' -> Parser -- minus sign
    (\chunk0 s0 -> decNegIntStart e (boxBytes chunk0) s0)
  _ -> Parser -- no sign, there should be a digit here 
    (\chunk0 s0 ->
      let !w = char2Word c - 48
        in if w < 10
             then (# s0, decPosIntMore e (fromIntegral @Word @Int w) (boxBytes chunk0) #)
             else (# s0, (# e | #) #)
    )

-- | Variant of 'decUnsignedInteger' that lets the caller supply a leading
-- digit. This is useful when parsing formats like JSON where integers with
-- leading zeroes are considered invalid. The calling context must
-- consume the first digit before calling this parser. Results are
-- always positive numbers.
decTrailingInteger ::
     Int -- ^ Leading digit, should be between @0@ and @9@.
  -> Parser e s Integer
decTrailingInteger (I# w) =
  Parser (\chunk0 s0 -> (# s0, (# | decIntegerChunks (I# w) 10 0 (boxBytes chunk0) #) #))

-- | Parse a decimal-encoded positive integer of arbitrary
-- size. This rejects input that begins with a plus or minus
-- sign.
decUnsignedInteger :: e -> Parser e s Integer
decUnsignedInteger e = Parser
  (\chunk0 s0 -> decUnsignedIntegerStart e (boxBytes chunk0) s0)

-- | Parse a decimal-encoded integer of arbitrary size.
-- This accepts input that begins with a plus or minus sign.
-- Input without a sign prefix is interpreted as positive.
decSignedInteger :: e -> Parser e s Integer
{-# noinline decSignedInteger #-}
decSignedInteger e = any e >>= \c -> case c of
  '+' -> do
    decUnsignedInteger e
  '-' -> do
    x <- decUnsignedInteger e
    pure $! negate x
  _ -> Parser -- no sign, there should be a digit here 
    (\chunk0 s0 ->
      let !w = char2Word c - 48 in
      if w < 10
        then
          let !r = decIntegerChunks
                (fromIntegral @Word @Int w)
                10
                0
                (boxBytes chunk0)
           in (# s0, (# | r #) #)
        else (# s0, (# e | #) #)
    )

decPosIntStart ::
     e -- Error message
  -> Bytes -- Chunk
  -> ST# s (Result# e Int# )
decPosIntStart e !chunk0 s0 = if length chunk0 > 0
  then
    let !w = fromIntegral @Word8 @Word
          (PM.indexByteArray (array chunk0) (offset chunk0)) - 48
     in if w < 10
          then (# s0, decPosIntMore e (fromIntegral @Word @Int w) (Bytes.unsafeDrop 1 chunk0) #)
          else (# s0, (# e | #) #)
  else (# s0, (# e | #) #)

decNegIntStart ::
     e -- Error message
  -> Bytes -- Chunk
  -> ST# s (Result# e Int# )
decNegIntStart e !chunk0 s0 = if length chunk0 > 0
  then
    let !w = fromIntegral @Word8 @Word
          (PM.indexByteArray (array chunk0) (offset chunk0)) - 48
     in if w < 10
          then (# s0, decNegIntMore e (negate (fromIntegral @Word @Int w)) (Bytes.unsafeDrop 1 chunk0) #)
          else (# s0, (# e | #) #)
  else (# s0, (# e | #) #)

decUnsignedIntegerStart ::
     e
  -> Bytes
  -> ST# s (Result# e Integer)
decUnsignedIntegerStart e !chunk0 s0 = if length chunk0 > 0
  then
    let !w = (PM.indexByteArray (array chunk0) (offset chunk0)) - 48
     in if w < (10 :: Word8)
          then
            let !r = decIntegerChunks
                  (fromIntegral @Word8 @Int w)
                  10
                  0
                  (Bytes.unsafeDrop 1 chunk0)
             in (# s0, (# | r #) #)
          else (# s0, (# e | #) #)
  else (# s0, (# e | #) #)

-- This will not inline since it is recursive, but worker
-- wrapper will still happen.
decNegIntMore ::
     e -- Error message
  -> Int -- Accumulator
  -> Bytes -- Chunk
  -> Result# e Int#
decNegIntMore e !acc !chunk0 = if length chunk0 > 0
  then
    let !w = fromIntegral @Word8 @Word
          (PM.indexByteArray (array chunk0) (offset chunk0)) - 48
        !acc' = acc * 10 - (fromIntegral @Word @Int w)
     in if w < 10
          then if acc' <= acc
            then decNegIntMore e acc' (Bytes.unsafeDrop 1 chunk0)
            else (# e | #)
          else (# | (# unI acc, unI (offset chunk0), unI (length chunk0)  #) #)
  else (# | (# unI acc, unI (offset chunk0), 0# #) #)

-- This will not inline since it is recursive, but worker
-- wrapper will still happen. Fails if the accumulator
-- exceeds the size of a machine integer.
decPosIntMore ::
     e -- Error message
  -> Int -- Accumulator
  -> Bytes -- Chunk
  -> Result# e Int#
decPosIntMore e !acc !chunk0 = if len > 0
  then
    let !w = fromIntegral @Word8 @Word
          (PM.indexByteArray (array chunk0) (offset chunk0)) - 48
        !acc' = acc * 10 + (fromIntegral @Word @Int w)
     in if w < 10
          then if acc' >= acc
            then decPosIntMore e acc' (Bytes.unsafeDrop 1 chunk0)
            else (# e | #)
          else (# | (# unI acc, unI (offset chunk0), len# #) #)
  else (# | (# unI acc, unI (offset chunk0), 0# #) #)
  where
  !len@(I# len# ) = length chunk0

-- This will not inline since it is recursive, but worker
-- wrapper will still happen. When the accumulator
-- exceeds the size of a machine integer, this pushes the
-- accumulated machine int and the shift amount onto the
-- stack.
-- We are intentionally lazy in the accumulator. There is
-- no need to force this on every iteration. We do however,
-- force it preemptively every time it changes.
-- Because of how we track overflow, we are able to use the
-- same function for both positive and negative numbers.
decIntegerChunks ::
     Int -- Chunk accumulator (e.g. 236)
  -> Int -- Chunk base-ten bound (e.g. 1000)
  -> Integer -- Accumulator
  -> Bytes -- Chunk
  -> (# Integer, Int#, Int# #)
decIntegerChunks !nAcc !eAcc acc !chunk0 = if len > 0
  then
    let !w = fromIntegral @Word8 @Word
          (PM.indexByteArray (array chunk0) (offset chunk0)) - 48
     in if w < 10
          then let !eAcc' = eAcc * 10 in
            if eAcc' >= eAcc
              then decIntegerChunks
                (nAcc * 10 + fromIntegral @Word @Int w)
                eAcc'
                acc
                (Bytes.unsafeDrop 1 chunk0)
              else
                -- In this case, notice that we deliberately
                -- unconsume the digit that would have caused
                -- an overflow.
                let !r = (acc * fromIntegral @Int @Integer eAcc)
                       + (fromIntegral @Int @Integer nAcc)
                 in decIntegerChunks 0 1 r chunk0
          else
            let !r = (acc * fromIntegral @Int @Integer eAcc)
                   + (fromIntegral @Int @Integer nAcc)
             in (# r, unI (offset chunk0), len# #)
  else
    let !r = (acc * fromIntegral @Int @Integer eAcc)
           + (fromIntegral @Int @Integer nAcc)
     in (# r, unI (offset chunk0), 0# #)
  where
  !len@(I# len# ) = length chunk0

upcastIntResult :: Result# e Int# -> Result# e Int
upcastIntResult (# e | #) = (# e | #)
upcastIntResult (# | (# a, b, c #) #) = (# | (# I# a, b, c #) #)

char2Word :: Char -> Word
char2Word = fromIntegral . ord

-- | Skip all characters until the character from the is encountered
-- and then consume the matching character as well. Visually,
-- @skipTrailedBy \'C\'@ advances the cursor like this:
-- 
-- >  A Z B Y C X C W
-- > |->->->->-|
skipTrailedBy :: e -> Char -> Parser e s ()
skipTrailedBy e !w = uneffectful# $ \c ->
  skipUntilConsumeLoop e w c

skipUntilConsumeLoop ::
     e -- Error message
  -> Char -- byte to match
  -> Bytes -- Chunk
  -> Result# e ()
skipUntilConsumeLoop e !w !c = if length c > 0
  then if indexLatinCharArray (array c) (offset c) /= w
    then skipUntilConsumeLoop e w (Bytes.unsafeDrop 1 c)
    else (# | (# (), unI (offset c + 1), unI (length c - 1) #) #)
  else (# e | #)

-- | Parse exactly four ASCII-encoded characters, interpretting
-- them as the hexadecimal encoding of a 32-bit number. Note that
-- this rejects a sequence such as @5A9@, requiring @05A9@ instead.
-- This is insensitive to case. This is particularly useful when
-- parsing escape sequences in C or JSON, which allow encoding
-- characters in the Basic Multilingual Plane as @\\uhhhh@.
hexWord16 :: e -> Parser e s Word16
{-# inline hexWord16 #-}
hexWord16 e = Parser
  (\x s0 -> case runParser (hexWord16# e) x s0 of
    (# s1, r #) -> case r of
      (# err | #) -> (# s1, (# err | #) #)
      (# | (# a, b, c #) #) -> (# s1, (# | (# W16# a, b, c #) #) #)
  )

hexWord16# :: e -> Parser e s Word#
{-# noinline hexWord16# #-}
hexWord16# e = uneffectfulWord# $ \chunk -> if length chunk >= 4
  then
    let !w0@(W# n0) = oneHex $ PM.indexByteArray (array chunk) (offset chunk)
        !w1@(W# n1) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 1)
        !w2@(W# n2) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 2)
        !w3@(W# n3) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 3)
     in if | w0 .|. w1 .|. w2 .|. w3 /= maxBound ->
             (# |
                (# (n0 `Exts.timesWord#` 4096##) `Exts.plusWord#`
                   (n1 `Exts.timesWord#` 256##) `Exts.plusWord#`
                   (n2 `Exts.timesWord#` 16##) `Exts.plusWord#`
                   n3
                ,  unI (offset chunk) +# 4#
                ,  unI (length chunk) -# 4# #) #)
           | otherwise -> (# e | #)
  else (# e | #)


-- Returns the maximum machine word if the argument is not
-- the ASCII encoding of a hexadecimal digit.
oneHex :: Word8 -> Word
oneHex w
  | w >= 48 && w < 58 = (fromIntegral w - 48)
  | w >= 65 && w < 71 = (fromIntegral w - 55)
  | w >= 97 && w < 103 = (fromIntegral w - 87)
  | otherwise = maxBound

uneffectfulWord# :: (Bytes -> Result# e Word#) -> Parser e s Word#
uneffectfulWord# f = Parser
  ( \b s0 -> (# s0, (f (boxBytes b)) #) )