{-# language BlockArguments #-} {-# language MagicHash #-} {-# language UnboxedTuples #-} {-# language ScopedTypeVariables #-} {-# language UnliftedFFITypes #-} {-# language BangPatterns #-} {-# language RankNTypes #-} {-# language TypeApplications #-} {-# language LambdaCase #-} {-# language AllowAmbiguousTypes #-} {-# language PolyKinds #-} {-# language CPP #-} module Text.Parsnip.Parser ( Parser, KnownBase , parse ---------------------------- , try , atEnd , endOfInput ---------------------------- , tillSubstring , skipTillSubstring , skip , skip0 , take ---------------------------- , Mark , mark , release , snip , snipping ---------------------------- , input , pos , betwixt , rest ---------------------------- , loc ) where import Data.ByteString (ByteString) import qualified Data.ByteString as B import qualified Data.ByteString.Internal as B import qualified Data.ByteString.Unsafe as B import Foreign.C.Types import GHC.ForeignPtr import GHC.Prim import GHC.Ptr import GHC.Types import Prelude hiding (take) import Text.Parsnip.Internal.Mark import Text.Parsnip.Internal.Parser import Text.Parsnip.Internal.Private import Text.Parsnip.Internal.Simple import Text.Parsnip.Location -------------------------------------------------------------------------------- -- * Combinators -------------------------------------------------------------------------------- atEnd :: Parser s Bool atEnd = Parser \p s -> case readCharOffAddr# p 0# s of (# t, c #) -> OK (isTrue# do chr# 0# `eqChar#` c) p t endOfInput :: Parser s () endOfInput = Parser \p s -> case readCharOffAddr# p 0# s of (# t, c #) -> (# if isTrue# do chr# 0# `eqChar#` c then Some () else None, p, t #) take :: forall s. KnownBase s => Int -> Parser s ByteString take = case reflectBase @s of !(Base b g q r) -> \(I# i) -> Parser \p s -> if isTrue# (minusAddr# r p <# i) then Fail p s else OK (B.PS (ForeignPtr (b `plusAddr#` minusAddr# p q) g) 0 (I# i)) (plusAddr# p i) s -- | We can do this two ways, this way is O(1) but needs KnownBase. skip :: forall s. KnownBase s => Int -> Parser s () skip = \(I# i) -> Parser \p s -> if isTrue# (minusAddr# r p <# i) then Fail p s else OK () (plusAddr# p i) s where r = end @s -- | Linear time, but no @KnownBase@ dependency. skip0 :: Int -> Parser s () skip0 n@(I# i) = Parser \p s -> case io (c_memchr p 0 (fromIntegral n)) s of (# t, Ptr q #) -> if isTrue# (q `eqAddr#` nullAddr#) then OK () (plusAddr# p i) t else Fail p s tillSubstring :: KnownBase s => ByteString -> Parser s ByteString tillSubstring needle = relative \bs -> case p bs of (r, _) -> SimpleOK r (B.length r) where p = B.breakSubstring needle foreign import ccall unsafe "string.h" strstr :: Addr# -> Addr# -> IO (Ptr ()) foreign import ccall unsafe "string.h" strlen :: Addr# -> IO CSize skipTillSubstring :: ByteString -> Parser s () skipTillSubstring bneedle = case B.length bneedle of 0 -> pure () 1 -> () <$ word8 (B.unsafeHead bneedle) _ -> let fneedle = packForeignString bneedle in Parser \p s -> case io ( withForeignString fneedle \(Ptr cneedle)-> strstr p cneedle >>= \q -> if q == nullPtr then plusPtr (Ptr p) . fromIntegral <$> strlen p else pure q ) s of (# t, Ptr r #) -> OK () r t --skipTillSubstring :: ByteString -> Parser s () --skipTillSubstring needle = relative \bs -> case p bs of -- (r, _) -> SimpleOK r (B.length r) -- where p = B.breakSubstring needle -- | @input = snip minBound maxBound@ input :: KnownBase s => Parser s ByteString input = absolute \b _ -> SimpleOK b 0 -- | @rest = mark >>= \p -> snip p maxBound@ rest :: KnownBase s => Parser s ByteString rest = relative \b -> SimpleOK b 0 -- | 'snip' is a smidge faster, easier to type, if less fun to say, and -- doesn't need you to fiddle with explicit type application to actually -- apply. -- -- The benefit of this combinator is that it is easy to come up with numbers -- of bytes into a file, and this combinator will automatically trim the -- result to the actual range of bytes available, whereas constructing an -- illegal 'Mark' will error in 'toEnum'/'fromEnum'/'succ' or whatever other -- combinator tries to produce one out of range to maintain the invariant -- that a mark is always a well formed location in the content. betwixt :: forall s. KnownBase s => Int -> Int -> ByteString betwixt i j = B.take (j-i) $ B.drop i $ bytes @s -- | 'mark' is generally faster pos :: forall s. KnownBase s => Parser s Int pos = Parser \ p s -> OK (I# (minusAddr# p (start @s))) p s {-# inline pos #-} loc :: KnownBase s => Parser s Location loc = markLocation <$> mark {-# inline loc #-} -- | Actually looking at one of these is pretty slow, as it has to do a linear -- scan to figure out its line number for display. markLocation :: forall s. KnownBase s => Mark s -> Location markLocation (Mark (Ptr p)) = location (bytes @s) (I# (minusAddr# p (start @s))) {-# inline markLocation #-}