{-|
Module      : Z.Data.Parser.Base
Description : Efficient deserialization/parse.
Copyright   : (c) Dong Han, 2017-2019
License     : BSD
Maintainer  : winterland1989@gmail.com
Stability   : experimental
Portability : non-portable

This module provide internal data types for a simple resumable 'Parser', which is suitable for binary protocol and simple textual protocol parsing. 'Parser' extensively works on on 'V.Bytes', which is same to 'T.Text' representation.

-}

module Z.Data.Parser.Base
  ( -- * Parser types
    Result(..)
  , ParseError
  , ParseStep
  , Parser(..)
  , (<?>)
    -- * Running a parser
  , parse, parse', parseChunk, parseChunkList, ParseChunks, parseChunks, finishParsing
  , runAndKeepTrack, match
    -- * Basic parsers
  , ensureN, endOfInput, currentChunk, atEnd
    -- * Primitive decoders
  , decodePrim, BE(..), LE(..)
  , decodePrimLE, decodePrimBE
    -- * More parsers
  , scan, scanChunks, peekMaybe, peek, satisfy, satisfyWith
  , anyWord8, word8, char8, anyChar8, anyCharUTF8, charUTF8, char7, anyChar7
  , skipWord8, endOfLine, skip, skipWhile, skipSpaces
  , take, takeN, takeTill, takeWhile, takeWhile1, takeRemaining, takeUTF8,  bytes, bytesCI
  , text
    -- * Error reporting
  , fail', failWithInput, unsafeLiftIO
    -- * Specialized primitive parser
  , decodeWord  , decodeWord64, decodeWord32, decodeWord16, decodeWord8
  , decodeInt   , decodeInt64 , decodeInt32 , decodeInt16 , decodeInt8 , decodeDouble, decodeFloat
  , decodeWordLE  , decodeWord64LE , decodeWord32LE , decodeWord16LE
  , decodeIntLE   , decodeInt64LE , decodeInt32LE , decodeInt16LE , decodeDoubleLE , decodeFloatLE
  , decodeWordBE  , decodeWord64BE , decodeWord32BE , decodeWord16BE
  , decodeIntBE   , decodeInt64BE , decodeInt32BE , decodeInt16BE , decodeDoubleBE , decodeFloatBE
  ) where

import           Control.Applicative
import           Control.Exception                  (assert)
import           Control.Monad
import           Control.Monad.Primitive
import qualified Control.Monad.Fail                 as Fail
import qualified Data.CaseInsensitive               as CI
import qualified Data.Primitive.PrimArray           as A
import           Data.Int
import           Data.Word
import           Data.Bits                          ((.&.))
import           GHC.Types
import           GHC.Exts                           (State#, runRW#, unsafeCoerce#)
import           Prelude                            hiding (take, takeWhile, decodeFloat)
import           Z.Data.Array.Unaligned
import           Z.Data.ASCII
import qualified Z.Data.Text                        as T
import qualified Z.Data.Text.Base                   as T
import qualified Z.Data.Text.UTF8Codec              as T
import qualified Z.Data.Vector.Base                 as V
import qualified Z.Data.Vector.Extra                as V

-- | Simple parsing result, that represent respectively:
--
-- * Success: the remaining unparsed data and the parsed value
--
-- * Failure: the remaining unparsed data and the error message
--
-- * Partial: that need for more input data, supply empty bytes to indicate 'endOfInput'
--
data Result e r
    = Success r !V.Bytes
    | Failure e !V.Bytes
    | Partial (ParseStep e r)

-- | A parse step consumes 'V.Bytes' and produce 'Result'.
type ParseStep e r = V.Bytes -> Result e r

-- | Type alias for error message
type ParseError = [T.Text]

instance Functor (Result e) where
    fmap f (Success a s)   = Success (f a) s
    fmap f (Partial k)     = Partial (fmap f . k)
    fmap _ (Failure e v)   = Failure e v

instance (Show e, Show a) => Show (Result e a) where
    show (Success a _)    = "Success " ++ show a
    show (Partial _)      = "Partial _"
    show (Failure errs _) = "Failure: " ++ show errs

-- | Simple CPSed parser
--
-- A parser takes a failure continuation, and a success one, while the success continuation is
-- usually composed by 'Monad' instance, the failure one is more like a reader part, which can
-- be modified via '<?>'. If you build parsers from ground, a pattern like this can be used:
--
--  @
--    xxParser = do
--      ensureN errMsg ...            -- make sure we have some bytes
--      Parser $ \ kf k s inp ->      -- fail continuation, success continuation, state token and input
--        ...
--        ... kf errMsg (if input not OK)
--        ... k s ... (if we get something useful for next parser)
--  @
newtype Parser a = Parser {
        runParser :: forall r . (ParseError -> ParseStep ParseError r)
                  -> (State# ParserState -> a -> ParseStep ParseError r)
                  -> State# ParserState -> ParseStep ParseError r
    }

-- | State token tag used in `Parser`
data ParserState

-- It seems eta-expand all params to ensure parsers are saturated is helpful
instance Functor Parser where
    fmap = fmapParser
    {-# INLINE fmap #-}
    a <$ Parser pb = Parser (\ kf k s inp -> pb kf (\ s' _ -> k s' a) s inp)
    {-# INLINE (<$) #-}

fmapParser :: (a -> b) -> Parser a -> Parser b
{-# INLINE fmapParser #-}
fmapParser f (Parser pa) = Parser (\ kf k s inp -> pa kf (\ s' -> k s' . f) s inp)

instance Applicative Parser where
    pure x = Parser (\ _ k s inp -> k s x inp)
    {-# INLINE pure #-}
    (<*>) = apParser
    {-# INLINE (<*>) #-}
    Parser pa *> Parser pb = Parser (\ kf k s inp -> pa kf (\ s' _ -> pb kf k s') s inp)
    {-# INLINE (*>) #-}
    Parser pa <* Parser pb = Parser (\ kf k s inp -> pa kf (\ s' x -> pb kf (\ s'' _ -> k s'' x) s') s inp)
    {-# INLINE (<*) #-}

apParser :: Parser (a -> b) -> Parser a -> Parser b
{-# INLINE apParser #-}
apParser (Parser pf) (Parser pa) = Parser (\ kf k s inp -> pf kf (\ s' f -> pa kf (\ s'' -> k s'' . f) s') s inp)

instance Monad Parser where
    return = pure
    {-# INLINE return #-}
    Parser pa >>= f = Parser (\ kf k s inp -> pa kf (\ s' a -> runParser (f a) kf k s') s inp)
    {-# INLINE (>>=) #-}
    (>>) = (*>)
    {-# INLINE (>>) #-}

instance PrimMonad Parser where
    type PrimState Parser = ParserState
    {-# INLINE primitive #-}
    primitive io = Parser (\ _ k st inp ->
        let !(# st', r #) = io st
        in k st' r inp)

{-# RULES "replicateM/Parser" forall n (x :: Parser a). V.replicateM n x = V.replicatePM n x #-}
{-# RULES "traverse/Parser" forall (f :: a -> Parser b). V.traverse f = V.traverseWithIndexPM (const f) #-}
{-# RULES "traverseWithIndex/Parser" forall (f :: Int -> a -> Parser b). V.traverseWithIndex f = V.traverseWithIndexPM f #-}

-- | Unsafely lifted an `IO` action into 'Parser'.
--
-- This is only for debugging purpose(logging, etc). Don't mix compuation from
-- realworld to parsing result, otherwise parsing is not deterministic.
unsafeLiftIO :: IO a -> Parser a
{-# INLINE unsafeLiftIO #-}
unsafeLiftIO (IO io) = Parser $ \ _ k st inp ->
    let !(# st', r #) = io (unsafeCoerce# st)
    in k (unsafeCoerce# st') r inp

instance Fail.MonadFail Parser where
    fail = fail' . T.pack
    {-# INLINE fail #-}

instance MonadPlus Parser where
    mzero = empty
    {-# INLINE mzero #-}
    mplus = (<|>)
    {-# INLINE mplus #-}

instance Alternative Parser where
    empty = fail' "Z.Data.Parser.Base(Alternative).empty"
    {-# INLINE empty #-}
    f <|> g = do
        (r, consumed) <- runAndKeepTrack f
        case r of
            Success x inp   -> Parser (\ _ k s _ -> k s x inp)
            Failure _ _     -> Parser (\ kf k s _ -> runParser g kf k s consumed)
            _               -> error "Z.Data.Parser.Base: impossible"
    {-# INLINE (<|>) #-}

-- | 'T.Text' version of 'fail'.
fail' :: T.Text -> Parser a
{-# INLINE fail' #-}
fail' msg = Parser (\ kf _ _ inp -> kf [msg] inp)

-- | Similar to `fail'`, but can produce error message with current input chunk.
failWithInput :: (V.Bytes -> T.Text) -> Parser a
{-# INLINE failWithInput #-}
failWithInput f = Parser (\ kf _ _ inp -> kf [f inp] inp)

-- | Parse the complete input, without resupplying
parse' :: Parser a -> V.Bytes -> Either ParseError a
{-# INLINABLE parse' #-}
parse' p = snd . parse p

-- | Parse the complete input, without resupplying, return the rest bytes
parse :: Parser a -> V.Bytes -> (V.Bytes, Either ParseError a)
{-# INLINE parse #-}
parse (Parser p) = \ inp ->
    case (runRW# ( \ s -> unsafeCoerce# (p Failure (\ _ r -> Success r) (unsafeCoerce# s) inp))) of
        Success a rest    -> (rest, Right a)
        Failure errs rest -> (rest, Left errs)
        Partial f         -> finishParsing (f V.empty)

-- | Parse the complete input list, without resupplying, return the rest bytes list.
--
-- Parsers in "Z.Data.Parser" will take 'V.empty' as EOF, so please make sure there are no 'V.empty's
-- mixed into the chunk list.
parseChunkList :: Parser a -> [V.Bytes] -> ([V.Bytes], Either ParseError a)
{-# INLINABLE parseChunkList #-}
parseChunkList p (inp:inps) = go (parseChunk p inp) inps
    where
        go r is = case r of
            Partial f -> case is of
                (i:is') -> go (f i) is'
                _ -> let (rest, r') = finishParsing r
                     in (if V.null rest then [] else [rest], r')
            Success a rest    -> (if V.null rest then is else rest:is, Right a)
            Failure errs rest -> (if V.null rest then is else rest:is, Left errs)

-- | Parse an input chunk
parseChunk :: Parser a -> V.Bytes -> Result ParseError a
{-# INLINE parseChunk #-}
parseChunk (Parser p) = runRW# (\ s ->
    unsafeCoerce# (p Failure (\ _ r -> Success r) (unsafeCoerce# s)))

-- | Finish parsing and fetch result, feed empty bytes if it's 'Partial' result.
finishParsing :: Result ParseError a -> (V.Bytes, Either ParseError a)
{-# INLINABLE finishParsing #-}
finishParsing r = case r of
    Success a rest    -> (rest, Right a)
    Failure errs rest -> (rest, Left errs)
    Partial f         -> finishParsing (f V.empty)

-- | Type alias for a streaming parser, draw chunk from Monad m with a initial chunk,
-- return result in @Either err x@.
type ParseChunks m err x = m V.Bytes -> V.Bytes -> m (V.Bytes, Either err x)

-- | Run a chunk parser with an initial input string, and a monadic action
-- that can supply more input if needed.
--
parseChunks :: Monad m => (V.Bytes -> Result e a) -> ParseChunks m e a
{-# INLINE parseChunks #-}
parseChunks pc m inp = go (pc inp)
  where
    go r = case r of
        Partial f -> go . f =<< m
        Success a rest    -> pure (rest, Right a)
        Failure errs rest -> pure (rest, Left errs)

(<?>) :: T.Text -> Parser a -> Parser a
{-# INLINE (<?>) #-}
msg <?> (Parser p) = Parser (\ kf k s inp -> p (kf . (msg:)) k s inp)
infixr 0 <?>

-- | Run a parser and keep track of all the input chunks it consumes.
-- Once it's finished, return the final result (always 'Success' or 'Failure') and
-- all consumed chunks.
--
runAndKeepTrack :: Parser a -> Parser (Result ParseError a, V.Bytes)
{-# INLINE runAndKeepTrack #-}
runAndKeepTrack (Parser pa) = Parser $ \ _ k0 st0 inp ->
    let go !acc r k (st :: State# ParserState) = case r of
            Partial k'      -> Partial (\ inp' -> go (inp':acc) (k' inp') k st)
            Success _ inp' -> let consumed = V.concatR acc in consumed `seq` k st (r, consumed) inp'
            Failure _ inp' -> let consumed = V.concatR acc in consumed `seq` k st (r, consumed) inp'
        r0 = runRW# (\ s ->
                unsafeCoerce# (pa Failure (\ _ r -> Success r) (unsafeCoerce# s) inp))
    in go [inp] r0 k0 st0

-- | Return both the result of a parse and the portion of the input
-- that was consumed while it was being parsed.
--
match :: Parser a -> Parser (V.Bytes, a)
{-# INLINE match #-}
match p = do
    (r, consumed) <- runAndKeepTrack p
    Parser (\ _ k s _ ->
        case r of
            Success r' inp'  -> let consumed' = V.dropR (V.length inp') consumed
                                in consumed' `seq` k s (consumed' , r') inp'
            Failure err inp' -> Failure err inp'
            Partial _        -> error "Z.Data.Parser.Base.match: impossible")

-- | Ensure that there are at least @n@ bytes available. If not, the
-- computation will escape with 'Partial'.
--
-- Since this parser is used in many other parsers, an extra error param is provide
-- to attach custom error info.
ensureN :: Int -> T.Text -> Parser ()
{-# INLINE ensureN #-}
ensureN n0 err = Parser $ \ kf k s inp -> do
    let l = V.length inp
    if n0 <= l
    then k s () inp
    else Partial (ensureNPartial (n0-l) inp kf k s)
  where
    ensureNPartial :: forall r. Int -> V.PrimVector Word8 -> (ParseError -> ParseStep ParseError r)
                   -> (State# ParserState -> () -> ParseStep ParseError r)
                   -> State# ParserState -> ParseStep ParseError r
    ensureNPartial !l0 inp0 kf k s0 =
        let go acc !l s = \ inp -> do
                let l' = V.length inp
                if l' == 0
                then kf [err] (V.concatR (inp:acc))
                else do
                    if l <= l'
                    then let !inp' = V.concatR (inp:acc) in k s () inp'
                    else Partial (go (inp:acc) (l - l') s)
        in go [inp0] l0 s0

-- | Ensure that there are at least @n@ bytes available. If not, the
-- computation will escape with 'Partial'.
--
-- Since this parser is used in many other parsers, an extra error param is provide
-- to attach custom error info.
readN :: forall a. Int -> T.Text -> (V.Bytes -> a) -> Parser a
{-# INLINE readN #-}
readN n0 err f = Parser $ \ kf k s inp -> do
    let l = V.length inp
    if n0 <= l
    then let !r = f inp
             !inp' = V.unsafeDrop n0 inp
             in k s r inp'
    else Partial (readNPartial (n0-l) inp kf k s)
  where
    readNPartial :: forall r. Int -> V.PrimVector Word8 -> (ParseError -> ParseStep ParseError r)
                   -> (State# ParserState -> a -> ParseStep ParseError r)
                   -> State# ParserState -> ParseStep ParseError r
    readNPartial !l0 inp0 kf k s0 =
        let go acc !l s = \ inp -> do
                let l' = V.length inp
                if l' == 0
                then kf [err] (V.concatR (inp:acc))
                else do
                    if l <= l'
                    then let !inp' = V.concatR (inp:acc)
                             !r  = f inp'
                             !inp'' = V.unsafeDrop n0 inp'
                         in k s r inp''
                    else Partial (go (inp:acc) (l - l') s)
        in go [inp0] l0 s0

{- These rules are bascially what inliner do so no need to mess up with them
{-# RULES "readN/fmap"
    forall n f g e. fmapParser f (readN n e g)  = readN n e (f . g) #-}
{-# RULES "readN/merge"
    forall n1 n2 e1 e2 f1 f2. apParser (readN n1 e1 f1) (readN n2 e2 f2) = readN (n1 + n2) (T.concat [e1, ", ", e2]) (\ inp -> f1 inp $! f2 (V.unsafeDrop n1 inp)) #-}
-}

-- | Get current input chunk, draw new chunk if neccessary. 'V.null' means EOF.
--
-- Note this is different from 'takeRemaining', 'currentChunk' only return what's
-- left in current input chunk.
currentChunk :: Parser V.Bytes
{-# INLINE currentChunk #-}
currentChunk =  Parser $ \ _ k s inp ->
    if V.null inp
    then Partial (\ inp' -> k s inp' V.empty)
    else k s inp V.empty

-- | Test whether all input has been consumed, i.e. there are no remaining
-- undecoded bytes. Fail if not 'atEnd'.
endOfInput :: Parser ()
{-# INLINE endOfInput #-}
endOfInput = Parser $ \ kf k s inp ->
    if V.null inp
    then Partial (\ inp' ->
        if (V.null inp')
        then k s () inp'
        else kf ["Z.Data.Parser.Base.endOfInput: end not reached yet"] inp')
    else kf ["Z.Data.Parser.Base.endOfInput: end not reached yet"] inp

-- | Test whether all input has been consumed, i.e. there are no remaining
-- undecoded bytes.
atEnd :: Parser Bool
{-# INLINE atEnd #-}
atEnd = Parser $ \ _ k s inp ->
    if V.null inp
    then Partial (\ inp' -> k s (V.null inp') inp')
    else k s False inp

-- | Decode a primitive type in host byte order.
decodePrim :: forall a. (Unaligned a) => Parser a
{-# INLINE decodePrim #-}
decodePrim = do
    readN n "Z.Data.Parser.Base.decodePrim: not enough bytes" (\ (V.PrimVector ba i _) -> indexPrimWord8ArrayAs ba i)
  where
    n = getUnalignedSize (unalignedSize @a)

#define DECODE_HOST(f, type) \
    f :: Parser type; {-# INLINE f #-}; f = decodePrim; \
    -- ^ Decode type in host endian order.

DECODE_HOST(decodeWord  , Word   )
DECODE_HOST(decodeWord64, Word64 )
DECODE_HOST(decodeWord32, Word32 )
DECODE_HOST(decodeWord16, Word16 )
DECODE_HOST(decodeWord8 , Word8  )
DECODE_HOST(decodeInt   , Int    )
DECODE_HOST(decodeInt64 , Int64  )
DECODE_HOST(decodeInt32 , Int32  )
DECODE_HOST(decodeInt16 , Int16  )
DECODE_HOST(decodeInt8  , Int8   )
DECODE_HOST(decodeDouble, Double )
DECODE_HOST(decodeFloat , Float  )

-- | Decode a primitive type in little endian.
decodePrimLE :: forall a. (Unaligned (LE a)) => Parser a
{-# INLINE decodePrimLE #-}
decodePrimLE = do
    readN n "Z.Data.Parser.Base.decodePrimLE: not enough bytes" (\ (V.PrimVector ba i _) -> getLE (indexPrimWord8ArrayAs ba i))
  where
    n = getUnalignedSize (unalignedSize @(LE a))

#define DECODE_LE(f, type) \
    f :: Parser type; {-# INLINE f #-}; f = decodePrimLE; \
    -- ^ Decode type in little endian order.

DECODE_LE(decodeWordLE  , Word   )
DECODE_LE(decodeWord64LE, Word64 )
DECODE_LE(decodeWord32LE, Word32 )
DECODE_LE(decodeWord16LE, Word16 )
DECODE_LE(decodeIntLE   , Int    )
DECODE_LE(decodeInt64LE , Int64  )
DECODE_LE(decodeInt32LE , Int32  )
DECODE_LE(decodeInt16LE , Int16  )
DECODE_LE(decodeDoubleLE, Double )
DECODE_LE(decodeFloatLE , Float  )

-- | Decode a primitive type in big endian.
decodePrimBE :: forall a. (Unaligned (BE a)) => Parser a
{-# INLINE decodePrimBE #-}
decodePrimBE = do
    readN n "Z.Data.Parser.Base.decodePrimBE: not enough bytes" (\ (V.PrimVector ba i _) -> getBE (indexPrimWord8ArrayAs ba i))
  where
    n = getUnalignedSize (unalignedSize @(BE a))

#define DECODE_BE(f, type) \
    f :: Parser type; {-# INLINE f #-}; f = decodePrimBE; \
    -- ^ Decode type in big endian order.

DECODE_BE(decodeWordBE  , Word   )
DECODE_BE(decodeWord64BE, Word64 )
DECODE_BE(decodeWord32BE, Word32 )
DECODE_BE(decodeWord16BE, Word16 )
DECODE_BE(decodeIntBE   , Int    )
DECODE_BE(decodeInt64BE , Int64  )
DECODE_BE(decodeInt32BE , Int32  )
DECODE_BE(decodeInt16BE , Int16  )
DECODE_BE(decodeDoubleBE, Double )
DECODE_BE(decodeFloatBE , Float  )

-- | A stateful scanner.  The predicate consumes and transforms a
-- state argument, and each transformed state is passed to successive
-- invocations of the predicate on each byte of the input until one
-- returns 'Nothing' or the input ends.
--
-- This parser does not fail.  It will return an empty string if the
-- predicate returns 'Nothing' on the first byte of input.
--
scan :: s -> (s -> Word8 -> Maybe s) -> Parser (V.Bytes, s)
{-# INLINE scan #-}
scan s0 f = scanChunks s0 f'
  where
    f' s0' (V.PrimVector arr off l) =
        let !end = off + l
            go !st !i
                | i < end = do
                    let !w = A.indexPrimArray arr i
                    case f st w of
                        Just st' -> go st' (i+1)
                        _        ->
                            let !len1 = i - off
                                !len2 = end - off
                            in Right (V.PrimVector arr off len1, V.PrimVector arr i len2, st)
                | otherwise = Left st
        in go s0' off

-- | Similar to 'scan', but working on 'V.Bytes' chunks, The predicate
-- consumes a 'V.Bytes' chunk and transforms a state argument,
-- and each transformed state is passed to successive invocations of
-- the predicate on each chunk of the input until one chunk got splited to
-- @Right (V.Bytes, V.Bytes)@ or the input ends.
--
-- Note the fields of result triple will not be forced by 'scanChunks', you may need to add `seq` or strict annotation to
-- avoid thunks and unintentional references to buffer.
--
scanChunks :: forall s. s -> (s -> V.Bytes -> Either s (V.Bytes, V.Bytes, s)) -> Parser (V.Bytes, s)
{-# INLINE scanChunks #-}
scanChunks s0 consume = Parser (\ _ k st inp ->
    case consume s0 inp of
        Right (want, rest, s') -> k st (want, s') rest
        Left s' -> Partial (scanChunksPartial s' k st inp))
  where
    -- we want to inline consume if possible
    {-# INLINE scanChunksPartial #-}
    scanChunksPartial :: forall r. s -> (State# ParserState -> (V.PrimVector Word8, s) -> ParseStep ParseError r)
                      -> State# ParserState -> V.PrimVector Word8 -> ParseStep ParseError r
    scanChunksPartial s0' k st0 inp0 =
        let go s acc st = \ inp ->
                if V.null inp
                then k st (V.concatR acc, s) inp
                else case consume s inp of
                        Left s' -> do
                            let acc' = inp : acc
                            Partial (go s' acc' st)
                        Right (want,rest,s') ->
                            let !r = V.concatR (want:acc) in k st (r, s') rest
        in go s0' [inp0] st0

--------------------------------------------------------------------------------

-- | Match any byte, to perform lookahead. Returns 'Nothing' if end of
-- input has been reached. Does not consume any input.
--
peekMaybe :: Parser (Maybe Word8)
{-# INLINE peekMaybe #-}
peekMaybe =
    Parser $ \ _ k s inp ->
        if V.null inp
        then Partial (\ inp' -> k s (if V.null inp'
            then Nothing
            else Just (V.unsafeHead inp')) inp')
        else k s (Just (V.unsafeHead inp)) inp

-- | Match any byte, to perform lookahead.  Does not consume any
-- input, but will fail if end of input has been reached.
--
peek :: Parser Word8
{-# INLINE peek #-}
peek =
    Parser $ \ kf k s inp ->
        if V.null inp
        then Partial (\ inp' ->
            if V.null inp'
            then kf ["Z.Data.Parser.Base.peek: not enough bytes"] inp'
            else k s (V.unsafeHead inp') inp')
        else k s (V.unsafeHead inp) inp

-- | The parser @satisfy p@ succeeds for any byte for which the
-- predicate @p@ returns 'True'. Returns the byte that is actually
-- parsed.
--
-- >digit = satisfy isDigit
-- >    where isDigit w = w >= 48 && w <= 57
--
satisfy :: (Word8 -> Bool) -> Parser Word8
{-# INLINE satisfy #-}
satisfy p = do
    ensureN 1 "Z.Data.Parser.Base.satisfy: not enough bytes"
    Parser $ \ kf k s inp ->
        let w = V.unsafeHead inp
        in if p w
            then k s w (V.unsafeTail inp)
            else kf [ "Z.Data.Parser.Base.satisfy: unsatisfied bytes " <> T.toText (V.take 8 inp) ]
                    (V.unsafeTail inp)

-- | The parser @satisfyWith f p@ transforms a byte, and succeeds if
-- the predicate @p@ returns 'True' on the transformed value. The
-- parser returns the transformed byte that was parsed.
--
satisfyWith :: (Word8 -> a) -> (a -> Bool) -> Parser a
{-# INLINE satisfyWith #-}
satisfyWith f p = do
    ensureN 1 "Z.Data.Parser.Base.satisfyWith: not enough bytes"
    Parser $ \ kf k s inp ->
        let a = f (V.unsafeHead inp)
        in if p a
            then k s a (V.unsafeTail inp)
            else kf ["Z.Data.Parser.Base.satisfyWith: unsatisfied byte"] (V.unsafeTail inp)

-- | Match a specific byte.
--
word8 :: Word8 -> Parser ()
{-# INLINE word8 #-}
word8 w' = do
    ensureN 1 "Z.Data.Parser.Base.word8: not enough bytes"
    Parser (\ kf k s inp ->
        let w = V.unsafeHead inp
        in if w == w'
            then k s () (V.unsafeTail inp)
            else kf [ T.concat [
                 "Z.Data.Parser.Base.word8: mismatch byte, expected "
                , T.toText w'
                , ", meet "
                , T.toText w
                , " at "
                , T.toText (V.take 8 inp)
                ] ] inp)

-- | Return a byte, this is an alias to @decodePrim @Word8@.
--
anyWord8 :: Parser Word8
{-# INLINE anyWord8 #-}
anyWord8 = decodePrim

-- | Match a specific 8bit char.
--
char8 :: Char -> Parser ()
{-# INLINE char8 #-}
char8 = word8 . c2w

-- | Match a specific 7bit char.
--
char7 :: Char -> Parser ()
{-# INLINE char7 #-}
char7 chr = word8 (c2w chr .&. 0x7F)

-- | Match a specific UTF8 char.
--
charUTF8 :: Char -> Parser ()
{-# INLINE charUTF8 #-}
charUTF8 = text . T.singleton

-- | Take a byte and return as a 8bit char.
--
anyChar8 :: Parser Char
{-# INLINE anyChar8 #-}
anyChar8 = do
    w <- anyWord8
    return $! w2c w

-- | Take a byte and return as a 7bit char, fail if exceeds @0x7F@.
--
anyChar7 :: Parser Char
{-# INLINE anyChar7 #-}
anyChar7 = do
    w <- satisfy (<= 0x7f)
    return $! w2c w

-- | Decode next few bytes as an UTF8 char.
--
-- Don't use this method as UTF8 decoder, it's slower than 'T.validate'.
anyCharUTF8 :: Parser Char
{-# INLINE anyCharUTF8 #-}
anyCharUTF8 = do
    r <- Parser $ \ kf k st inp@(V.PrimVector arr s l) -> do
        if l > 0
        then
            let l' = T.decodeCharLen arr s
            in if l' > l
            then k st (Left l') inp
            else do
                case T.validateMaybe (V.unsafeTake l' inp) of
                    Just t -> k st (Right $! T.head t) $! V.unsafeDrop l' inp
                    _ -> kf ["Z.Data.Parser.Base.anyCharUTF8: invalid UTF8 bytes"] inp
        else k st (Left 1) inp
    case r of
        Left d -> do
            ensureN d "Z.Data.Parser.Base.anyCharUTF8: not enough bytes"
            anyCharUTF8
        Right c -> return c

-- | Match either a single newline byte @\'\\n\'@, or a carriage
-- return followed by a newline byte @\"\\r\\n\"@.
endOfLine :: Parser ()
{-# INLINE endOfLine #-}
endOfLine = do
    w <- decodePrim :: Parser Word8
    case w of
        10 -> return ()
        13 -> word8 10

        _  -> Parser (\ kf _ _ inp -> kf [
            T.concat [
             "Z.Data.Parser.Base.endOfLine: mismatch byte, expected 10 or 13, meet "
            , T.toText w
            , " at "
            , T.toText (V.cons w (V.take 8 inp))
            ] ] inp)

--------------------------------------------------------------------------------

-- | 'skip' N bytes.
--
skip :: Int -> Parser ()
{-# INLINE skip #-}
skip n = assert (n > 0) $
    Parser (\ kf k s inp ->
        let l = V.length inp
        in if l >= n
            then k s () $! V.unsafeDrop n inp
            else Partial (skipPartial (n-l) kf k s))
  where
    skipPartial :: Int -> (ParseError -> ParseStep ParseError r)
                -> (State# ParserState -> () -> ParseStep ParseError r)
                -> State# ParserState -> ParseStep ParseError r
    skipPartial n0 kf k s0 =
        let go !n' s = \ inp ->
                let l = V.length inp
                in if l >= n'
                    then k s () $! V.unsafeDrop n' inp
                    else if l == 0
                        then kf ["Z.Data.Parser.Base.skip: not enough bytes"] inp
                        else Partial (go (n'-l) s)
        in go n0 s0

-- | Skip a byte.
--
skipWord8 :: Parser ()
{-# INLINE skipWord8 #-}
skipWord8 =
    Parser $ \ kf k s inp ->
        if V.null inp
        then Partial (\ inp' ->
            if V.null inp'
            then kf ["Z.Data.Parser.Base.skipWord8: not enough bytes"] inp'
            else k s () (V.unsafeTail inp'))
        else k s () (V.unsafeTail inp)

-- | Skip past input for as long as the predicate returns 'True'.
--
skipWhile :: (Word8 -> Bool) -> Parser ()
{-# INLINE skipWhile #-}
skipWhile p =
    Parser (\ _ k s inp ->
        let !rest = V.dropWhile p inp
        in if V.null rest
            then Partial (skipWhilePartial k s)
            else k s () rest)
  where
    skipWhilePartial :: forall r. (State# ParserState -> () -> ParseStep ParseError r)
                     -> State# ParserState -> ParseStep ParseError r
    skipWhilePartial k s0 =
        let go s = \ inp ->
                if V.null inp
                then k s () inp
                else
                    let !rest = V.dropWhile p inp
                    in if V.null rest then Partial (go s) else k s () rest
        in go s0

-- | Skip over white space using 'isSpace'.
--
skipSpaces :: Parser ()
{-# INLINE skipSpaces #-}
skipSpaces = skipWhile isSpace

-- | Take N bytes.
take :: Int -> Parser V.Bytes
{-# INLINE take #-}
take n = assert (n > 0) $ readN n "Z.Data.Parser.Base.take: not enough bytes" (V.unsafeTake n)

-- | Consume input as long as the predicate returns 'False' or reach the end of input,
-- and return the consumed input.
--
takeTill :: (Word8 -> Bool) -> Parser V.Bytes
{-# INLINE takeTill #-}
takeTill p = Parser (\ _ k s inp ->
    let (!want, !rest) = V.break p inp
    in if V.null rest
        then Partial (takeTillPartial k s want)
        else k s want rest)
  where
    takeTillPartial :: forall r. (State# ParserState -> V.PrimVector Word8 -> ParseStep ParseError r)
                    -> State# ParserState -> V.PrimVector Word8 -> ParseStep ParseError r
    takeTillPartial k s0 want =
        let go acc s = \ inp ->
                if V.null inp
                then let !r = V.concatR acc in k s r inp
                else
                    let (!want', !rest) = V.break p inp
                        acc' = want' : acc
                    in if V.null rest
                        then Partial (go acc' s)
                        else let !r = V.concatR acc' in k s r rest
        in go [want] s0

-- | Consume input as long as the predicate returns 'True' or reach the end of input,
-- and return the consumed input.
--
takeWhile :: (Word8 -> Bool) -> Parser V.Bytes
{-# INLINE takeWhile #-}
takeWhile p = Parser (\ _ k s inp ->
    let (!want, !rest) = V.span p inp
    in if V.null rest
        then Partial (takeWhilePartial k s want)
        else k s want rest)
  where
    takeWhilePartial :: forall r. (State# ParserState -> V.PrimVector Word8 -> ParseStep ParseError r)
                     -> State# ParserState -> V.PrimVector Word8 -> ParseStep ParseError r
    takeWhilePartial k s0 want =
        let go acc s = \ inp ->
                if V.null inp
                then let !r = V.concatR acc in k s r inp
                else
                    let (!want', !rest) = V.span p inp
                        acc' = want' : acc
                    in if V.null rest
                        then Partial (go acc' s)
                        else let !r = V.concatR acc' in k s r rest
        in go [want] s0

-- | Similar to 'takeWhile', but requires the predicate to succeed on at least one byte
-- of input: it will fail if the predicate never returns 'True' or reach the end of input
--
takeWhile1 :: (Word8 -> Bool) -> Parser V.Bytes
{-# INLINE takeWhile1 #-}
takeWhile1 p = do
    bs <- takeWhile p
    if V.null bs
    then Parser (\ kf _ _ inp ->
            kf ["Z.Data.Parser.Base.takeWhile1: no satisfied byte at " <> T.toText (V.take 10 inp) ]
               inp)
    else return bs

-- | Take all the remaining input chunks and return as 'V.Bytes'.
takeRemaining :: Parser V.Bytes
{-# INLINE takeRemaining #-}
takeRemaining = Parser (\ _ k s inp -> Partial (takeRemainingPartial k s inp))
  where
    takeRemainingPartial :: forall r. (State# ParserState -> V.PrimVector Word8 -> ParseStep ParseError r)
                         -> State# ParserState -> V.PrimVector Word8 -> ParseStep ParseError r
    takeRemainingPartial k s0 want =
        let go acc s = \ inp ->
                if V.null inp
                then let !r = V.concatR acc in k s r inp
                else Partial (go (inp:acc) s)
        in go [want] s0

-- | Take N bytes and validate as UTF8, failed if not UTF8 encoded.
takeUTF8 :: Int -> Parser T.Text
{-# INLINE takeUTF8 #-}
takeUTF8 n = do
    bs <- take n
    case T.validateMaybe bs of Just t -> pure t
                               _ -> fail' $ "Z.Data.Parser.Base.takeUTF8: illegal UTF8 bytes: " <> T.toText bs

-- | Similar to 'take', but requires the predicate to succeed on next N bytes
-- of input, and take N bytes(no matter if N+1 byte satisfy predicate or not).
--
takeN :: (Word8 -> Bool) -> Int -> Parser V.Bytes
{-# INLINE takeN #-}
takeN p n = do
    bs <- take n
    if go bs 0
    then return bs
    else Parser (\ kf _ _ inp ->
        kf [ "Z.Data.Parser.Base.takeN: byte does not satisfy at " <> T.toText (bs <> V.take 10 inp) ]
            inp)

  where
    go bs@(V.PrimVector _ _ l) !i
        | i < l = p (V.unsafeIndex bs i) && go bs (i+1)
        | otherwise = True

-- | @bytes s@ parses a sequence of bytes that identically match @s@.
--
bytes :: V.Bytes -> Parser ()
{-# INLINE bytes #-}
bytes bs = do
    let n = V.length bs
    ensureN n "Z.Data.Parser.Base.bytes: not enough bytes"
    Parser (\ kf k s inp ->
        if bs == V.unsafeTake n inp
        then k s () $! V.unsafeDrop n inp
        else kf [ T.concat [
             "Z.Data.Parser.Base.bytes: mismatch bytes, expected "
            , T.toText bs
            , ", meet "
            , T.toText (V.take n inp)
            ] ] inp)


-- | Same as 'bytes' but ignoring ASCII case.
bytesCI :: V.Bytes -> Parser ()
{-# INLINE bytesCI #-}
bytesCI bs = do
    let n = V.length bs
    -- casefold an ASCII string should not change it's length
    ensureN n "Z.Data.Parser.Base.bytesCI: not enough bytes"
    Parser (\ kf k s inp ->
        if bs' == CI.foldCase (V.unsafeTake n inp)
        then k s () $! V.unsafeDrop n inp
        else kf [ T.concat [
             "Z.Data.Parser.Base.bytesCI: mismatch bytes, expected "
            , T.toText bs
            , "(case insensitive), meet "
            , T.toText (V.unsafeTake n inp)
            ] ] inp)

  where
    bs' = CI.foldCase bs

-- | @text s@ parses a sequence of UTF8 bytes that identically match @s@.
--
text :: T.Text -> Parser ()
{-# INLINE text #-}
text (T.Text bs) = bytes bs