{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE FlexibleInstances #-}

{-|
Parser supporting custom error types and embeddable `IO` or `ST` actions, but
no other bells and whistles.

If you need efficient indentation parsing, consider "FlatParse.Stateful" instead.
-}

module FlatParse.Basic (
  -- * Parser types
    FP.Parser.ParserT(..)
  , FP.Parser.Parser, FP.Parser.ParserIO, FP.Parser.ParserST

  -- * Running parsers
  , Result(..)
  , runParser
  , runParserUtf8
  , runParserIO
  , runParserST

  -- ** Primitive result types
  , type FP.Parser.Res#
  , pattern FP.Parser.OK#, pattern FP.Parser.Err#, pattern FP.Parser.Fail#
  , type FP.Parser.ResI#

  -- * Embedding `ST` operations
  , liftST

  -- * UTF conversion
  , Common.strToUtf8
  , Common.utf8ToStr

  -- * Character predicates
  , Common.isDigit
  , Common.isLatinLetter
  , Common.isGreekLetter

  -- * Parsers
  -- ** Bytewise
  , FP.Base.eof
  , FP.Base.take
  , FP.Base.take#
  , FP.Base.takeUnsafe#
  , FP.Base.takeRest
  , FP.Base.skip
  , FP.Base.skip#
  , FP.Base.skipBack
  , FP.Base.skipBack#
  , FP.Base.atSkip#
  , FP.Base.atSkipUnsafe#

  , FP.Bytes.bytes
  , FP.Bytes.bytesUnsafe
  , byteString
  , anyCString
  , anyVarintProtobuf

  -- ** Combinators
  , (FP.Parser.<|>)
  , FP.Base.branch
  , FP.Base.notFollowedBy
  , FP.Base.chainl
  , FP.Base.chainr
  , FP.Base.lookahead
  , FP.Base.ensure
  , FP.Base.ensure#
  , FP.Base.withEnsure
  , FP.Base.withEnsure1
  , FP.Base.withEnsure#
  , FP.Base.isolate
  , FP.Base.isolate#
  , FP.Base.isolateUnsafe#
  , FP.Switch.switch
  , FP.Switch.switchWithPost
  , FP.Switch.rawSwitchWithPost
  , Control.Applicative.many
  , FP.Base.skipMany
  , Control.Applicative.some
  , FP.Base.skipSome

  -- ** Errors and failures
  , Control.Applicative.empty
  , FP.Base.failed
  , FP.Base.try
  , FP.Base.err
  , FP.Base.withError
  , FP.Base.fails
  , FP.Base.cut
  , FP.Base.cutting
  , FP.Base.optional
  , FP.Base.optional_
  , FP.Base.withOption

  -- ** Position
  , FlatParse.Common.Position.Pos(..)
  , FlatParse.Common.Position.endPos
  , FlatParse.Common.Position.addrToPos#
  , FlatParse.Common.Position.posToAddr#
  , FlatParse.Common.Position.Span(..)
  , FlatParse.Common.Position.unsafeSlice
  , getPos
  , setPos
  , spanOf
  , withSpan
  , byteStringOf
  , withByteString
  , inSpan
  , validPos
  , posLineCols
  , mkPos

  -- ** Text
  -- *** UTF-8
  , FP.Text.char, FP.Text.string
  , FP.Text.anyChar, FP.Text.skipAnyChar
  , FP.Text.satisfy, FP.Text.skipSatisfy
  , FP.Text.fusedSatisfy, FP.Text.skipFusedSatisfy
  , FP.Text.takeLine
  , FP.Text.takeRestString
  , linesUtf8

  -- *** ASCII
  , FP.Text.anyAsciiChar, FP.Text.skipAnyAsciiChar
  , FP.Text.satisfyAscii, FP.Text.skipSatisfyAscii

  -- *** ASCII-encoded numbers
  , FP.Text.anyAsciiDecimalWord
  , FP.Text.anyAsciiDecimalInt
  , FP.Text.anyAsciiDecimalInteger
  , FP.Text.anyAsciiHexWord
  , FP.Text.anyAsciiHexInt

  -- ** Machine integers
  , module FP.Integers

  -- ** Debugging parsers
  , FP.Text.traceLine
  , FP.Text.traceRest

  -- * Unsafe
  , unsafeSpanToByteString

  -- ** IO
  , unsafeLiftIO

  -- ** Parsers
  , module FP.Addr
  , anyCStringUnsafe

  ) where

import FlatParse.Basic.Parser
import FlatParse.Basic.Base
import FlatParse.Basic.Integers
--import FlatParse.Basic.Bytes
import FlatParse.Basic.Text
--import FlatParse.Basic.Switch
import FlatParse.Basic.Addr
import FlatParse.Common.Position
import qualified FlatParse.Common.Assorted as Common
import qualified FlatParse.Common.Numbers  as Common

-- common prefix for using/exporting parsers with their submodule
import qualified FlatParse.Basic.Parser as FP.Parser
import qualified FlatParse.Basic.Base as FP.Base
import qualified FlatParse.Basic.Integers as FP.Integers
import qualified FlatParse.Basic.Bytes as FP.Bytes
import qualified FlatParse.Basic.Text as FP.Text
import qualified FlatParse.Basic.Switch as FP.Switch
import qualified FlatParse.Basic.Addr as FP.Addr

import qualified Control.Applicative
import GHC.IO (IO(..), unsafeIOToST)
import GHC.Exts
import GHC.ForeignPtr
import GHC.ST (ST(..))
import System.IO.Unsafe
import Data.Ord ( comparing )
import Data.List ( sortBy )

import qualified Data.ByteString as B
import qualified Data.ByteString.Unsafe as B
import qualified Data.ByteString.Internal as B

-- | Higher-level boxed data type for parsing results.
data Result e a =
    OK a !(B.ByteString)  -- ^ Contains return value and unconsumed input.
  | Fail                  -- ^ Recoverable-by-default failure.
  | Err !e                -- ^ Unrecoverble-by-default error.
  deriving Int -> Result e a -> ShowS
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall e a. (Show a, Show e) => Int -> Result e a -> ShowS
forall e a. (Show a, Show e) => [Result e a] -> ShowS
forall e a. (Show a, Show e) => Result e a -> String
showList :: [Result e a] -> ShowS
$cshowList :: forall e a. (Show a, Show e) => [Result e a] -> ShowS
show :: Result e a -> String
$cshow :: forall e a. (Show a, Show e) => Result e a -> String
showsPrec :: Int -> Result e a -> ShowS
$cshowsPrec :: forall e a. (Show a, Show e) => Int -> Result e a -> ShowS
Show

instance Functor (Result e) where
  fmap :: forall a b. (a -> b) -> Result e a -> Result e b
fmap a -> b
f (OK a
a ByteString
s) = let !b :: b
b = a -> b
f a
a in forall e a. a -> ByteString -> Result e a
OK b
b ByteString
s
  fmap a -> b
f Result e a
r        = unsafeCoerce# :: forall a b. a -> b
unsafeCoerce# Result e a
r
  {-# inline fmap #-}
  <$ :: forall a b. a -> Result e b -> Result e a
(<$) a
a (OK b
_ ByteString
s) = forall e a. a -> ByteString -> Result e a
OK a
a ByteString
s
  (<$) a
_ Result e b
r        = unsafeCoerce# :: forall a b. a -> b
unsafeCoerce# Result e b
r
  {-# inline (<$) #-}

-- | Embed an IO action in a 'ParserT'. This is slightly safer than 'unsafePerformIO' because
-- it will sequenced correctly with respect to the surrounding actions, and its execution is guaranteed.
unsafeLiftIO :: IO a -> ParserT st e a
unsafeLiftIO :: forall a (st :: ZeroBitType) e. IO a -> ParserT st e a
unsafeLiftIO IO a
io = forall (st :: ZeroBitType) e a.
(ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a)
-> ParserT st e a
ParserT \ForeignPtrContents
fp Addr#
eob Addr#
s st
st ->
                   let !a :: a
a = forall a. IO a -> a
unsafePerformIO IO a
io
                   in forall (st :: ZeroBitType) a e. st -> a -> Addr# -> Res# st e a
OK# st
st a
a Addr#
s
{-# inline unsafeLiftIO #-}

--------------------------------------------------------------------------------

-- | Run a parser.
runParser :: Parser e a -> B.ByteString -> Result e a
runParser :: forall e a. Parser e a -> ByteString -> Result e a
runParser (ParserT ForeignPtrContents
-> Addr# -> Addr# -> PureMode -> Res# PureMode e a
f) b :: ByteString
b@(B.PS (ForeignPtr Addr#
_ ForeignPtrContents
fp) Int
_ (I# Int#
len)) = forall a. IO a -> a
unsafePerformIO forall a b. (a -> b) -> a -> b
$
  forall a. ByteString -> (CString -> IO a) -> IO a
B.unsafeUseAsCString ByteString
b \(Ptr Addr#
buf) -> do
    let end :: Addr#
end = Addr# -> Int# -> Addr#
plusAddr# Addr#
buf Int#
len
    forall (f :: * -> *) a. Applicative f => a -> f a
pure case ForeignPtrContents
-> Addr# -> Addr# -> PureMode -> Res# PureMode e a
f ForeignPtrContents
fp Addr#
end Addr#
buf forall {k} (a :: k). Proxy# a
proxy# of
      OK# PureMode
_st a
a Addr#
s -> let offset :: Int#
offset = Addr# -> Addr# -> Int#
minusAddr# Addr#
s Addr#
buf
                     in forall e a. a -> ByteString -> Result e a
OK a
a (Int -> ByteString -> ByteString
B.drop (Int# -> Int
I# Int#
offset) ByteString
b)

      Err# PureMode
_st e
e -> forall e a. e -> Result e a
Err e
e
      Fail# PureMode
_st  -> forall e a. Result e a
Fail
{-# noinline runParser #-}
-- We mark this as noinline to allow power users to safely do unsafe state token coercions.
-- Details are discussed in https://github.com/AndrasKovacs/flatparse/pull/34#issuecomment-1326999390

-- | Run a parser on a 'String', converting it to the corresponding UTF-8 bytes.
--
-- Reminder: @OverloadedStrings@ for 'B.ByteString' does not yield a valid UTF-8
-- encoding! For non-ASCII 'B.ByteString' literal input, use this wrapper or
-- properly convert your input first.
runParserUtf8 :: Parser e a -> String -> Result e a
runParserUtf8 :: forall e a. Parser e a -> String -> Result e a
runParserUtf8 Parser e a
pa String
s = forall e a. Parser e a -> ByteString -> Result e a
runParser Parser e a
pa (String -> ByteString
Common.strToUtf8 String
s)

-- | Run an `ST`-based parser.
runParserST :: ParserST s e a -> B.ByteString -> ST s (Result e a)
runParserST :: forall s e a. ParserST s e a -> ByteString -> ST s (Result e a)
runParserST ParserST s e a
pst ByteString
buf = forall a s. IO a -> ST s a
unsafeIOToST (forall e a. ParserIO e a -> ByteString -> IO (Result e a)
runParserIO (unsafeCoerce# :: forall a b. a -> b
unsafeCoerce# ParserST s e a
pst) ByteString
buf)
{-# inlinable runParserST #-}

-- | Run an `IO`-based parser.
runParserIO :: ParserIO e a -> B.ByteString -> IO (Result e a)
runParserIO :: forall e a. ParserIO e a -> ByteString -> IO (Result e a)
runParserIO (ParserT ForeignPtrContents -> Addr# -> Addr# -> IOMode -> Res# IOMode e a
f) b :: ByteString
b@(B.PS (ForeignPtr Addr#
_ ForeignPtrContents
fp) Int
_ (I# Int#
len)) = do
  forall a. ByteString -> (CString -> IO a) -> IO a
B.unsafeUseAsCString ByteString
b \(Ptr Addr#
buf) -> do
    let end :: Addr#
end = Addr# -> Int# -> Addr#
plusAddr# Addr#
buf Int#
len
    forall a. (IOMode -> (# IOMode, a #)) -> IO a
IO \IOMode
st -> case ForeignPtrContents -> Addr# -> Addr# -> IOMode -> Res# IOMode e a
f ForeignPtrContents
fp Addr#
end Addr#
buf IOMode
st of
      OK# IOMode
rw' a
a Addr#
s ->  let offset :: Int#
offset = Addr# -> Addr# -> Int#
minusAddr# Addr#
s Addr#
buf
                      in (# IOMode
rw', forall e a. a -> ByteString -> Result e a
OK a
a (Int -> ByteString -> ByteString
B.drop (Int# -> Int
I# Int#
offset) ByteString
b) #)

      Err# IOMode
rw' e
e ->  (# IOMode
rw', forall e a. e -> Result e a
Err e
e #)
      Fail# IOMode
rw'  ->  (# IOMode
rw', forall e a. Result e a
Fail #)
{-# inlinable runParserIO #-}

-- | Run an `ST` action in a `ParserST`.
liftST :: ST s a -> ParserST s e a
liftST :: forall s a e. ST s a -> ParserST s e a
liftST (ST STRep s a
f) = forall (st :: ZeroBitType) e a.
(ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a)
-> ParserT st e a
ParserT \ForeignPtrContents
fp Addr#
eob Addr#
s STMode s
st -> case STRep s a
f STMode s
st of
  (# STMode s
st, a
a #) -> forall (st :: ZeroBitType) a e. st -> a -> Addr# -> Res# st e a
OK# STMode s
st a
a Addr#
s
{-# inline liftST #-}

--------------------------------------------------------------------------------

-- | Parse a given 'B.ByteString'.
--
-- If the bytestring is statically known, consider using 'bytes' instead.
byteString :: B.ByteString -> ParserT st e ()
byteString :: forall (st :: ZeroBitType) e. ByteString -> ParserT st e ()
byteString (B.PS (ForeignPtr Addr#
bs ForeignPtrContents
fcontent) Int
_ (I# Int#
len)) =

  let go64 :: Addr# -> Addr# -> Addr# -> State# RealWorld -> Res# (State# RealWorld) e ()
      go64 :: forall e. Addr# -> Addr# -> Addr# -> IOMode -> Res# IOMode e ()
go64 Addr#
bs Addr#
bsend Addr#
s IOMode
rw =
        let bs' :: Addr#
bs' = Addr# -> Int# -> Addr#
plusAddr# Addr#
bs Int#
8# in
        case Addr# -> Addr# -> Int#
gtAddr# Addr#
bs' Addr#
bsend of
          Int#
1# -> forall e. Addr# -> Addr# -> Addr# -> IOMode -> Res# IOMode e ()
go8 Addr#
bs Addr#
bsend Addr#
s IOMode
rw
#if MIN_VERSION_base(4,17,0)
          _  -> case eqWord64# (indexWord64OffAddr# bs 0#) (indexWord64OffAddr# s 0#) of
#else
          Int#
_  -> case Word# -> Word# -> Int#
eqWord# (Addr# -> Int# -> Word#
indexWord64OffAddr# Addr#
bs Int#
0#) (Addr# -> Int# -> Word#
indexWord64OffAddr# Addr#
s Int#
0#) of
#endif
            Int#
1# -> forall e. Addr# -> Addr# -> Addr# -> IOMode -> Res# IOMode e ()
go64 Addr#
bs' Addr#
bsend (Addr# -> Int# -> Addr#
plusAddr# Addr#
s Int#
8#) IOMode
rw
            Int#
_  -> forall (st :: ZeroBitType) e a. st -> Res# st e a
Fail# IOMode
rw

      go8 :: Addr# -> Addr# -> Addr# -> State# RealWorld -> Res# (State# RealWorld) e ()
      go8 :: forall e. Addr# -> Addr# -> Addr# -> IOMode -> Res# IOMode e ()
go8 Addr#
bs Addr#
bsend Addr#
s IOMode
rw = case Addr# -> Addr# -> Int#
ltAddr# Addr#
bs Addr#
bsend of
#if MIN_VERSION_base(4,16,0)
        Int#
1# -> case Word8# -> Word8# -> Int#
eqWord8# (Addr# -> Int# -> Word8#
indexWord8OffAddr# Addr#
bs Int#
0#) (Addr# -> Int# -> Word8#
indexWord8OffAddr# Addr#
s Int#
0#) of
#else
        1# -> case eqWord# (indexWord8OffAddr# bs 0#) (indexWord8OffAddr# s 0#) of
#endif
          Int#
1# -> forall e. Addr# -> Addr# -> Addr# -> IOMode -> Res# IOMode e ()
go8 (Addr# -> Int# -> Addr#
plusAddr# Addr#
bs Int#
1#) Addr#
bsend (Addr# -> Int# -> Addr#
plusAddr# Addr#
s Int#
1#) IOMode
rw
          Int#
_  -> forall (st :: ZeroBitType) e a. st -> Res# st e a
Fail# IOMode
rw
        Int#
_  -> forall (st :: ZeroBitType) a e. st -> a -> Addr# -> Res# st e a
OK# IOMode
rw () Addr#
s

      go :: Addr# -> Addr# -> Addr# -> State# RealWorld -> Res# (State# RealWorld) e ()
      go :: forall e. Addr# -> Addr# -> Addr# -> IOMode -> Res# IOMode e ()
go Addr#
bs Addr#
bsend Addr#
s IOMode
rw = case forall e. Addr# -> Addr# -> Addr# -> IOMode -> Res# IOMode e ()
go64 Addr#
bs Addr#
bsend Addr#
s IOMode
rw of
        (# IOMode
rw', ResI# e ()
res #) -> case touch# :: forall a. a -> IOMode -> IOMode
touch# ForeignPtrContents
fcontent IOMode
rw' of
          IOMode
rw'' -> (# IOMode
rw'', ResI# e ()
res #)

  in forall (st :: ZeroBitType) e a.
(ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a)
-> ParserT st e a
ParserT \ForeignPtrContents
fp Addr#
eob Addr#
s st
st ->
      case Int#
len Int# -> Int# -> Int#
<=# Addr# -> Addr# -> Int#
minusAddr# Addr#
eob Addr#
s of
           Int#
1# -> case forall o. (IOMode -> o) -> o
runRW# (forall e. Addr# -> Addr# -> Addr# -> IOMode -> Res# IOMode e ()
go Addr#
bs (Addr# -> Int# -> Addr#
plusAddr# Addr#
bs Int#
len) Addr#
s) of
             (# IOMode
rw, ResI# e ()
a #) -> (# st
st, ResI# e ()
a #)
           Int#
_  -> forall (st :: ZeroBitType) e a. st -> Res# st e a
Fail# st
st
{-# inline byteString #-}

--------------------------------------------------------------------------------

-- | Get the current position in the input.
getPos :: ParserT st e Pos
getPos :: forall (st :: ZeroBitType) e. ParserT st e Pos
getPos = forall (st :: ZeroBitType) e a.
(ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a)
-> ParserT st e a
ParserT \ForeignPtrContents
fp Addr#
eob Addr#
s st
st -> forall (st :: ZeroBitType) a e. st -> a -> Addr# -> Res# st e a
OK# st
st (Addr# -> Addr# -> Pos
addrToPos# Addr#
eob Addr#
s) Addr#
s
{-# inline getPos #-}

-- | Set the input position.
--
-- Warning: this can result in crashes if the position points outside the
-- current buffer. It is always safe to 'setPos' values which came from 'getPos'
-- with the current input.
setPos :: Pos -> ParserT st e ()
setPos :: forall (st :: ZeroBitType) e. Pos -> ParserT st e ()
setPos Pos
s = forall (st :: ZeroBitType) e a.
(ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a)
-> ParserT st e a
ParserT \ForeignPtrContents
fp Addr#
eob Addr#
_ st
st -> forall (st :: ZeroBitType) a e. st -> a -> Addr# -> Res# st e a
OK# st
st () (Addr# -> Pos -> Addr#
posToAddr# Addr#
eob Pos
s)
{-# inline setPos #-}

-- | Return the consumed span of a parser.
spanOf :: ParserT st e a -> ParserT st e Span
spanOf :: forall (st :: ZeroBitType) e a. ParserT st e a -> ParserT st e Span
spanOf (ParserT ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a
f) = forall (st :: ZeroBitType) e a.
(ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a)
-> ParserT st e a
ParserT \ForeignPtrContents
fp Addr#
eob Addr#
s st
st -> case ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a
f ForeignPtrContents
fp Addr#
eob Addr#
s st
st of
  OK# st
st' a
a Addr#
s' -> forall (st :: ZeroBitType) a e. st -> a -> Addr# -> Res# st e a
OK# st
st' (Pos -> Pos -> Span
Span (Addr# -> Addr# -> Pos
addrToPos# Addr#
eob Addr#
s) (Addr# -> Addr# -> Pos
addrToPos# Addr#
eob Addr#
s')) Addr#
s'
  Res# st e a
x            -> unsafeCoerce# :: forall a b. a -> b
unsafeCoerce# Res# st e a
x
{-# inline spanOf #-}

-- | Bind the result together with the span of the result. CPS'd version of `spanOf`
--   for better unboxing.
withSpan :: ParserT st e a -> (a -> Span -> ParserT st e b) -> ParserT st e b
withSpan :: forall (st :: ZeroBitType) e a b.
ParserT st e a -> (a -> Span -> ParserT st e b) -> ParserT st e b
withSpan (ParserT ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a
f) a -> Span -> ParserT st e b
g = forall (st :: ZeroBitType) e a.
(ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a)
-> ParserT st e a
ParserT \ForeignPtrContents
fp Addr#
eob Addr#
s st
st -> case ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a
f ForeignPtrContents
fp Addr#
eob Addr#
s st
st of
  OK# st
st' a
a Addr#
s' -> forall (st :: ZeroBitType) e a.
ParserT st e a
-> ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a
runParserT# (a -> Span -> ParserT st e b
g a
a (Pos -> Pos -> Span
Span (Addr# -> Addr# -> Pos
addrToPos# Addr#
eob Addr#
s) (Addr# -> Addr# -> Pos
addrToPos# Addr#
eob Addr#
s'))) ForeignPtrContents
fp Addr#
eob Addr#
s' st
st'
  Res# st e a
x            -> unsafeCoerce# :: forall a b. a -> b
unsafeCoerce# Res# st e a
x
{-# inline withSpan #-}

-- | Return the `B.ByteString` consumed by a parser. Note: it's more efficient to use `spanOf` and
--   `withSpan` instead.
byteStringOf :: ParserT st e a -> ParserT st e B.ByteString
byteStringOf :: forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf (ParserT ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a
f) = forall (st :: ZeroBitType) e a.
(ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a)
-> ParserT st e a
ParserT \ForeignPtrContents
fp Addr#
eob Addr#
s st
st -> case ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a
f ForeignPtrContents
fp Addr#
eob Addr#
s st
st of
  OK# st
st' a
a Addr#
s' -> forall (st :: ZeroBitType) a e. st -> a -> Addr# -> Res# st e a
OK# st
st' (ForeignPtr Word8 -> Int -> Int -> ByteString
B.PS (forall a. Addr# -> ForeignPtrContents -> ForeignPtr a
ForeignPtr Addr#
s ForeignPtrContents
fp) Int
0 (Int# -> Int
I# (Addr# -> Addr# -> Int#
minusAddr# Addr#
s' Addr#
s))) Addr#
s'
  Res# st e a
x        -> unsafeCoerce# :: forall a b. a -> b
unsafeCoerce# Res# st e a
x
{-# inline byteStringOf #-}

-- | CPS'd version of `byteStringOf`. Can be more efficient, because the result is more eagerly unboxed
--   by GHC. It's more efficient to use `spanOf` or `withSpan` instead.
withByteString :: ParserT st e a -> (a -> B.ByteString -> ParserT st e b) -> ParserT st e b
withByteString :: forall (st :: ZeroBitType) e a b.
ParserT st e a
-> (a -> ByteString -> ParserT st e b) -> ParserT st e b
withByteString (ParserT ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a
f) a -> ByteString -> ParserT st e b
g = forall (st :: ZeroBitType) e a.
(ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a)
-> ParserT st e a
ParserT \ForeignPtrContents
fp Addr#
eob Addr#
s st
st -> case ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a
f ForeignPtrContents
fp Addr#
eob Addr#
s st
st of
  OK# st
st' a
a Addr#
s' -> forall (st :: ZeroBitType) e a.
ParserT st e a
-> ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a
runParserT# (a -> ByteString -> ParserT st e b
g a
a (ForeignPtr Word8 -> Int -> Int -> ByteString
B.PS (forall a. Addr# -> ForeignPtrContents -> ForeignPtr a
ForeignPtr Addr#
s ForeignPtrContents
fp) Int
0 (Int# -> Int
I# (Addr# -> Addr# -> Int#
minusAddr# Addr#
s' Addr#
s)))) ForeignPtrContents
fp Addr#
eob Addr#
s' st
st'
  Res# st e a
x        -> unsafeCoerce# :: forall a b. a -> b
unsafeCoerce# Res# st e a
x
{-# inline withByteString #-}

-- | Run a parser in a given input 'Span'.
--
-- The input position is restored after the parser is finished, so 'inSpan' does
-- not consume input and has no side effect.
--
-- Warning: this operation may crash if the given span points outside the
-- current parsing buffer. It's always safe to use 'inSpan' if the 'Span' comes
-- from a previous 'withSpan' or 'spanOf' call on the current input.
inSpan :: Span -> ParserT st e a -> ParserT st e a
inSpan :: forall (st :: ZeroBitType) e a.
Span -> ParserT st e a -> ParserT st e a
inSpan (Span Pos
s Pos
eob) (ParserT ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a
f) = forall (st :: ZeroBitType) e a.
(ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a)
-> ParserT st e a
ParserT \ForeignPtrContents
fp Addr#
eob' Addr#
s' st
st ->
  case ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a
f ForeignPtrContents
fp (Addr# -> Pos -> Addr#
posToAddr# Addr#
eob' Pos
eob) (Addr# -> Pos -> Addr#
posToAddr# Addr#
eob' Pos
s) st
st of
    OK# st
st' a
a Addr#
_ -> forall (st :: ZeroBitType) a e. st -> a -> Addr# -> Res# st e a
OK# st
st' a
a Addr#
s'
    Res# st e a
x           -> unsafeCoerce# :: forall a b. a -> b
unsafeCoerce# Res# st e a
x
{-# inline inSpan #-}

--------------------------------------------------------------------------------

-- | Create a 'B.ByteString' from a 'Span'.
--
-- The result is invalid if the 'Span' points outside the current buffer, or if
-- the 'Span' start is greater than the end position.
unsafeSpanToByteString :: Span -> ParserT st e B.ByteString
unsafeSpanToByteString :: forall (st :: ZeroBitType) e. Span -> ParserT st e ByteString
unsafeSpanToByteString (Span Pos
l Pos
r) =
  forall (st :: ZeroBitType) e a. ParserT st e a -> ParserT st e a
lookahead (forall (st :: ZeroBitType) e. Pos -> ParserT st e ()
setPos Pos
l forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf (forall (st :: ZeroBitType) e. Pos -> ParserT st e ()
setPos Pos
r))
{-# inline unsafeSpanToByteString #-}

-- | Check whether a `Pos` points into a `B.ByteString`.
validPos :: B.ByteString -> Pos -> Bool
validPos :: ByteString -> Pos -> Bool
validPos ByteString
str Pos
pos =
  let go :: ParserT st e Bool
go = do
        Pos
start <- forall (st :: ZeroBitType) e. ParserT st e Pos
getPos
        forall (f :: * -> *) a. Applicative f => a -> f a
pure (Pos
start forall a. Ord a => a -> a -> Bool
<= Pos
pos Bool -> Bool -> Bool
&& Pos
pos forall a. Ord a => a -> a -> Bool
<= Pos
endPos)
  in  case forall e a. Parser e a -> ByteString -> Result e a
runParser forall {st :: ZeroBitType} {e}. ParserT st e Bool
go ByteString
str of
        OK Bool
b ByteString
_ -> Bool
b
        Result Any Bool
_      -> forall a. HasCallStack => String -> a
error String
"FlatParse.Basic.validPos: got a non-OK result, impossible"
{-# inline validPos #-}

-- | Compute corresponding line and column numbers for each `Pos` in a list,
--   assuming UTF8 encoding. Throw an error on invalid positions. Note:
--   computing lines and columns may traverse the `B.ByteString`, but it
--   traverses it only once regardless of the length of the position list.
posLineCols :: B.ByteString -> [Pos] -> [(Int, Int)]
posLineCols :: ByteString -> [Pos] -> [(Int, Int)]
posLineCols ByteString
str [Pos]
poss =
  let go :: t -> t -> [(a, Pos)] -> ParserT st e [(a, (t, t))]
go !t
line !t
col [] = forall (f :: * -> *) a. Applicative f => a -> f a
pure []
      go t
line t
col ((a
i, Pos
pos):[(a, Pos)]
poss) = do
        Pos
p <- forall (st :: ZeroBitType) e. ParserT st e Pos
getPos
        if Pos
pos forall a. Eq a => a -> a -> Bool
== Pos
p then
          ((a
i, (t
line, t
col))forall a. a -> [a] -> [a]
:) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> t -> t -> [(a, Pos)] -> ParserT st e [(a, (t, t))]
go t
line t
col [(a, Pos)]
poss
        else do
          Char
c <- forall (st :: ZeroBitType) e. ParserT st e Char
anyChar
          if Char
'\n' forall a. Eq a => a -> a -> Bool
== Char
c then
            t -> t -> [(a, Pos)] -> ParserT st e [(a, (t, t))]
go (t
line forall a. Num a => a -> a -> a
+ t
1) t
0 ((a
i, Pos
pos)forall a. a -> [a] -> [a]
:[(a, Pos)]
poss)
          else
            t -> t -> [(a, Pos)] -> ParserT st e [(a, (t, t))]
go t
line (t
col forall a. Num a => a -> a -> a
+ t
1) ((a
i, Pos
pos)forall a. a -> [a] -> [a]
:[(a, Pos)]
poss)

      sorted :: [(Int, Pos)]
      sorted :: [(Int, Pos)]
sorted = forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (\(Int
_, Pos
i) (Int
_, Pos
j) -> forall a. Ord a => a -> a -> Ordering
compare Pos
i Pos
j) (forall a b. [a] -> [b] -> [(a, b)]
zip [Int
0..] [Pos]
poss)

  in case forall e a. Parser e a -> ByteString -> Result e a
runParser (forall {t} {t} {a} {st :: ZeroBitType} {e}.
(Num t, Num t) =>
t -> t -> [(a, Pos)] -> ParserT st e [(a, (t, t))]
go Int
0 Int
0 [(Int, Pos)]
sorted) ByteString
str of
       OK [(Int, (Int, Int))]
res ByteString
_ -> forall a b. (a, b) -> b
snd forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing forall a b. (a, b) -> a
fst) [(Int, (Int, Int))]
res
       Result Any [(Int, (Int, Int))]
_        -> forall a. HasCallStack => String -> a
error String
"FlatParse.Basic.posLineCols: invalid position"

-- | Create a `Pos` from a line and column number. Throws an error on out-of-bounds
--   line and column numbers.
mkPos :: B.ByteString -> (Int, Int) -> Pos
mkPos :: ByteString -> (Int, Int) -> Pos
mkPos ByteString
str (Int
line', Int
col') =
  let go :: Int -> Int -> ParserT st e Pos
go Int
line Int
col | Int
line forall a. Eq a => a -> a -> Bool
== Int
line' Bool -> Bool -> Bool
&& Int
col forall a. Eq a => a -> a -> Bool
== Int
col' = forall (st :: ZeroBitType) e. ParserT st e Pos
getPos
      go Int
line Int
col = (do
        Char
c <- forall (st :: ZeroBitType) e. ParserT st e Char
anyChar
        if Char
c forall a. Eq a => a -> a -> Bool
== Char
'\n' then Int -> Int -> ParserT st e Pos
go (Int
line forall a. Num a => a -> a -> a
+ Int
1) Int
0
                     else Int -> Int -> ParserT st e Pos
go Int
line (Int
col forall a. Num a => a -> a -> a
+ Int
1)) forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e a -> ParserT st e a
<|> forall a. HasCallStack => String -> a
error String
"FlatParse.Basic.mkPos: invalid position"
  in case forall e a. Parser e a -> ByteString -> Result e a
runParser (forall {st :: ZeroBitType} {e}. Int -> Int -> ParserT st e Pos
go Int
0 Int
0) ByteString
str of
    OK Pos
res ByteString
_ -> Pos
res
    Result Any Pos
_        -> forall a. HasCallStack => String -> a
error String
"FlatParse.Basic.mkPos: got a non-OK result, impossible"

-- | Break an UTF-8-coded `B.ByteString` to lines. Throws an error on invalid input.
--   This is mostly useful for grabbing specific source lines for displaying error
--   messages.
linesUtf8 :: B.ByteString -> [String]
linesUtf8 :: ByteString -> [String]
linesUtf8 ByteString
str =
  let go :: ParserT st e [String]
go = ([] forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall (st :: ZeroBitType) e. ParserT st e ()
eof) forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e a -> ParserT st e a
<|> ((:) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (st :: ZeroBitType) e. ParserT st e String
takeLine forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ParserT st e [String]
go)
  in case forall e a. Parser e a -> ByteString -> Result e a
runParser forall {st :: ZeroBitType} {e}. ParserT st e [String]
go ByteString
str of
    OK [String]
ls ByteString
_ -> [String]
ls
    Result Any [String]
_       -> forall a. HasCallStack => String -> a
error String
"FlatParse.Basic.linesUtf8: invalid input"

--------------------------------------------------------------------------------

-- | Read a null-terminated bytestring (a C-style string).
--
-- Consumes the null terminator.
anyCString :: ParserT st e B.ByteString
anyCString :: forall (st :: ZeroBitType) e. ParserT st e ByteString
anyCString = forall (st :: ZeroBitType) e a.
(ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a)
-> ParserT st e a
ParserT forall {p :: ZeroBitType} {e}.
ForeignPtrContents -> Addr# -> Addr# -> p -> Res# p e ByteString
go'
  where
    go' :: ForeignPtrContents -> Addr# -> Addr# -> p -> Res# p e ByteString
go' ForeignPtrContents
fp Addr#
eob Addr#
s0 p
st = forall {e}. Int# -> Addr# -> Res# p e ByteString
go Int#
0# Addr#
s0
      where
        go :: Int# -> Addr# -> Res# p e ByteString
go Int#
n# Addr#
s = case Addr# -> Addr# -> Int#
eqAddr# Addr#
eob Addr#
s of
          Int#
1# -> forall (st :: ZeroBitType) e a. st -> Res# st e a
Fail# p
st
          Int#
_  ->
            let s' :: Addr#
s' = Addr# -> Int# -> Addr#
plusAddr# Addr#
s Int#
1#
#if MIN_VERSION_base(4,16,0)
            -- TODO below is a candidate for improving with ExtendedLiterals!
            in  case Word8# -> Word8# -> Int#
eqWord8# (Addr# -> Int# -> Word8#
indexWord8OffAddr# Addr#
s Int#
0#) (Word# -> Word8#
wordToWord8# Word#
0##) of
#else
            in  case eqWord# (indexWord8OffAddr# s 0#) 0## of
#endif
                  Int#
1# -> forall (st :: ZeroBitType) a e. st -> a -> Addr# -> Res# st e a
OK# p
st (ForeignPtr Word8 -> Int -> Int -> ByteString
B.PS (forall a. Addr# -> ForeignPtrContents -> ForeignPtr a
ForeignPtr Addr#
s0 ForeignPtrContents
fp) Int
0 (Int# -> Int
I# Int#
n#)) Addr#
s'
                  Int#
_  -> Int# -> Addr# -> Res# p e ByteString
go (Int#
n# Int# -> Int# -> Int#
+# Int#
1#) Addr#
s'
{-# inline anyCString #-}

-- | Read a null-terminated bytestring (a C-style string), where the bytestring
--   is known to be null-terminated somewhere in the input.
--
-- Highly unsafe. Unless you have a guarantee that the string will be null
-- terminated before the input ends, use 'anyCString' instead. Honestly, I'm not
-- sure if this is a good function to define. But here it is.
--
-- Fails on GHC versions older than 9.0, since we make use of the
-- 'cstringLength#' primop introduced in GHC 9.0, and we aren't very useful
-- without it.
--
-- Consumes the null terminator.
anyCStringUnsafe :: ParserT st e B.ByteString
{-# inline anyCStringUnsafe #-}
#if MIN_VERSION_base(4,15,0)
anyCStringUnsafe :: forall (st :: ZeroBitType) e. ParserT st e ByteString
anyCStringUnsafe = forall (st :: ZeroBitType) e a.
(ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a)
-> ParserT st e a
ParserT \ForeignPtrContents
fp Addr#
eob Addr#
s st
st ->
  case Addr# -> Addr# -> Int#
eqAddr# Addr#
eob Addr#
s of
    Int#
1# -> forall (st :: ZeroBitType) e a. st -> Res# st e a
Fail# st
st
    Int#
_  -> let n# :: Int#
n#  = Addr# -> Int#
cstringLength# Addr#
s
              s'# :: Addr#
s'# = Addr# -> Int# -> Addr#
plusAddr# Addr#
s (Int#
n# Int# -> Int# -> Int#
+# Int#
1#)
           in forall (st :: ZeroBitType) a e. st -> a -> Addr# -> Res# st e a
OK# st
st (ForeignPtr Word8 -> Int -> Int -> ByteString
B.PS (forall a. Addr# -> ForeignPtrContents -> ForeignPtr a
ForeignPtr Addr#
s ForeignPtrContents
fp) Int
0 (Int# -> Int
I# Int#
n#)) Addr#
s'#
#else
anyCStringUnsafe = error "Flatparse.Basic.anyCStringUnsafe: requires GHC 9.0 / base-4.15, not available on this compiler"
#endif

-- | Read a protobuf-style varint into a positive 'Int'.
--
-- protobuf-style varints are byte-aligned. For each byte, the lower 7 bits are
-- data and the MSB indicates if there are further bytes. Once fully parsed, the
-- 7-bit payloads are concatenated and interpreted as a little-endian unsigned
-- integer.
--
-- Fails if the varint exceeds the positive 'Int' range.
--
-- Really, these are varnats. They also match with the LEB128 varint encoding.
--
-- protobuf encodes negatives in unsigned integers using zigzag encoding. See
-- the @fromZigzag@ family of functions for this functionality.
--
-- Further reading:
-- https://developers.google.com/protocol-buffers/docs/encoding#varints
anyVarintProtobuf :: ParserT st e Int
anyVarintProtobuf :: forall (st :: ZeroBitType) e. ParserT st e Int
anyVarintProtobuf = forall (st :: ZeroBitType) e a.
(ForeignPtrContents -> Addr# -> Addr# -> st -> Res# st e a)
-> ParserT st e a
ParserT \ForeignPtrContents
fp Addr#
eob Addr#
s st
st ->
    case Addr# -> Addr# -> (# (# #) | (# Int#, Addr#, Int# #) #)
Common.anyVarintProtobuf# Addr#
eob Addr#
s of
      (# (##) | #) -> forall (st :: ZeroBitType) e a. st -> Res# st e a
Fail# st
st
      (# | (# Int#
w#, Addr#
s#, Int#
bits# #) #) ->
        case Int#
bits# Int# -> Int# -> Int#
># Int#
63# of
          Int#
0# -> forall (st :: ZeroBitType) a e. st -> a -> Addr# -> Res# st e a
OK# st
st (Int# -> Int
I# Int#
w#) Addr#
s#
          Int#
_  -> forall (st :: ZeroBitType) e a. st -> Res# st e a
Fail# st
st -- overflow
{-# inline anyVarintProtobuf #-}