-- | -- Module : Data.ByteString.Parser -- License : CC0-1.0 -- -- Maintainer : mordae@anilinux.org -- Stability : unstable -- Portability : non-portable (ghc) -- -- This module provides a parser for 'ByteString'. -- -- * If you\'d like to parse ASCII text, you might want to take a look at -- "Data.ByteString.Parser.Char8". It reuses the same 'Parser', but -- provides functions working with 'Char' instead of 'Word8' as well as -- more string utilities. -- -- * If you\'d like to parse Unicode text, look instead at the -- "Data.Text.Parser". Is is slower, but in a way more correct. -- module Data.ByteString.Parser ( Parser(..) , Result(..) , parseOnly -- * Bytes , byte , notByte , anyByte , satisfy , peekByte -- * Strings , string , Data.ByteString.Parser.take , scan , runScanner , Data.ByteString.Parser.takeWhile , takeWhile1 , takeTill , takeTill1 -- * Combinators , provided , choice , branch , Snack.Combinators.count , optional , eitherP , option , many , many1 , manyTill , sepBy , sepBy1 , wrap , match , label , unlabel , validate -- * End Of Input , takeByteString , peekByteString , endOfInput , atEnd -- * Position , offset -- * Miscelaneous -- | -- These are all generic methods, but since I sometimes forget about them, -- it is nice to have them listed here for reference what writing parsers. , Control.Applicative.empty , pure , guard , when , unless , void ) where import Prelude hiding (null, length, splitAt, take) import Control.Applicative import Control.Monad import Data.List qualified as List import Data.Maybe import Data.Word import Data.ByteString as BS import Data.ByteString.Unsafe as BS import Snack.Combinators -- | -- Result represents either success or some kind of failure. -- -- You can find the problematic offset by subtracting length of the -- remainder from length of the original input. -- data Result a = Success a {-# UNPACK #-} !ByteString -- ^ Parser successfully matched the input. -- Produces the parsing result and the remainder of the input. | Failure [String] {-# UNPACK #-} !ByteString -- ^ Parser failed to match the input. -- Produces list of expected inputs and the corresponding remainder. | Error String {-# UNPACK #-} !ByteString {-# UNPACK #-} !Int -- ^ Parser ran into an error. Either syntactic or a validation one. deriving (Eq, Show) instance Functor Result where {-# INLINE fmap #-} fmap fn (Success res more) = Success (fn res) more fmap _ (Failure expected more) = Failure expected more fmap _ (Error reason more len) = Error reason more len -- | -- Parser for 'ByteString' inputs. -- newtype Parser a = Parser { runParser :: ByteString -> Result a -- ^ Run the parser on specified input. } instance Functor Parser where {-# INLINE fmap #-} fmap fn Parser{runParser} = Parser \inp -> fmap fn (runParser inp) instance Applicative Parser where {-# INLINE pure #-} pure x = Parser \inp -> Success x inp {-# INLINE (<*>) #-} (Parser runFn) <*> (Parser runArg) = Parser \inp -> case runFn inp of Error reason more len -> Error reason more len Failure expected more -> Failure expected more Success fn rest -> fmap fn (runArg rest) instance Alternative Parser where {-# INLINE empty #-} empty = Parser \inp -> Failure [] inp -- | -- Tries the right branch only if the left brach produces Failure. -- Does not mask Error. -- {-# INLINE (<|>) #-} (Parser runLeft) <|> (Parser runRight) = Parser \inp -> case runLeft inp of Success res more -> Success res more Error reason more len -> Error reason more len Failure expected more -> case runRight inp of Success res' more' -> Success res' more' Error reason' more' len' -> Error reason' more' len' Failure expected' more' -> -- Longer match (shorter remainder) wins. case length more `compare` length more' of LT -> Failure expected more EQ -> Failure (expected <> expected') more GT -> Failure expected' more' instance Monad Parser where {-# INLINE (>>=) #-} (Parser runLeft) >>= right = Parser \inp -> case runLeft inp of Error reason more len -> Error reason more len Failure expected more -> Failure expected more Success res more -> runParser (right res) more instance MonadPlus Parser instance MonadFail Parser where -- | -- Fail the whole parser with given reason. -- -- If you want the best error report possible, fail at the end of a -- relevant 'extent'. -- -- For example, if you are parsing a mapping that is syntactically valid, -- but does not contain some mandatory keys, fail after parsing the whole -- mapping and make sure that the maaping parser and the 'fail' call are -- enclosed in an 'extent'. -- -- That way, the error will indicate the extent remainder and length. -- {-# INLINE CONLIKE fail #-} fail reason = Parser \inp -> Error reason inp 0 -- | -- Accepts a single, matching byte. -- {-# INLINE CONLIKE byte #-} byte :: Word8 -> Parser Word8 byte c = satisfy (c ==) -- | -- Accepts a single, differing byte. -- {-# INLINE CONLIKE notByte #-} notByte :: Word8 -> Parser Word8 notByte c = satisfy (c /=) -- | -- Discards the remaining input and returns just the parse result. -- You might want to combine it with 'endOfInput' for the best effect. -- -- Example: -- -- @ -- parseOnly (pContacts \<* endOfInput) bstr -- @ -- {-# INLINE CONLIKE parseOnly #-} parseOnly :: Parser a -> ByteString -> Either String a parseOnly par = \inp -> case runParser par inp of Success res _ -> Right res Error reason _ _ -> Left reason Failure expected _ -> case expected of [] -> Left $ "Unexpected input." ex -> Left $ "Expected " <> List.intercalate ", " ex <> "." -- | -- Accepts a single byte. -- {-# INLINE anyByte #-} anyByte :: Parser Word8 anyByte = Parser \inp -> if null inp then Failure ["any byte"] inp else Success (unsafeHead inp) (unsafeTail inp) -- | -- Accepts a single byte matching the predicate. -- {-# INLINE CONLIKE satisfy #-} satisfy :: (Word8 -> Bool) -> Parser Word8 satisfy isOk = Parser \inp -> if null inp then Failure ["more input"] inp else let c = unsafeHead inp in if isOk c then Success c (unsafeTail inp) else Failure [] inp -- | -- Peeks ahead, but does not consume. -- -- Be careful, peeking behind end of the input fails. -- You might want to check using 'atEnd' beforehand. -- {-# INLINE peekByte #-} peekByte :: Parser Word8 peekByte = Parser \inp -> if null inp then Failure ["more input"] inp else Success (unsafeHead inp) inp -- | -- Accepts a matching string. -- {-# INLINE CONLIKE string #-} string :: ByteString -> Parser ByteString string str = Parser \inp -> let (pfx, sfx) = splitAt (length str) inp in case pfx == str of True -> Success pfx sfx False -> Failure [show str] inp -- | -- Accepts given number of bytes. -- Fails when not enough bytes are available. -- {-# INLINE CONLIKE take #-} take :: Int -> Parser ByteString take n = Parser \inp -> if n > length inp then Failure [show n <> " more bytes"] inp else Success (unsafeTake n inp) (unsafeDrop n inp) -- | -- Scans ahead statefully and then accepts whatever bytes the scanner liked. -- Scanner returns 'Nothing' to mark end of the acceptable extent. -- {-# INLINE CONLIKE scan #-} scan :: s -> (s -> Word8 -> Maybe s) -> Parser ByteString scan state scanner = fst <$> runScanner state scanner -- | -- Like 'scan', but also returns the final scanner state. -- {-# INLINE CONLIKE runScanner #-} runScanner :: s -> (s -> Word8 -> Maybe s) -> Parser (ByteString, s) runScanner state scanner = Parser \inp -> let (state', n) = scanBytes state scanner 0 (unpack inp) (res, more) = splitAt n inp in Success (res, state') more {-# INLINE scanBytes #-} scanBytes :: s -> (s -> Word8 -> Maybe s) -> Int -> [Word8] -> (s, Int) scanBytes !state _scanner !n [] = (state, n) scanBytes !state scanner !n (x:more) = case scanner state x of Just state' -> scanBytes state' scanner (succ n) more Nothing -> (state, n) -- | -- Efficiently consume as long as the input bytes match the predicate. -- An inverse of 'takeTill'. -- {-# INLINE CONLIKE takeWhile #-} takeWhile :: (Word8 -> Bool) -> Parser ByteString takeWhile test = takeTill (not . test) -- | -- Like 'Data.ByteString.Parser.takeWhile', but requires at least a single byte. -- {-# INLINE CONLIKE takeWhile1 #-} takeWhile1 :: (Word8 -> Bool) -> Parser ByteString takeWhile1 test = Data.ByteString.Parser.takeWhile test `provided` (not . null) -- | -- Efficiently consume until a byte matching the predicate is found. -- An inverse of 'Data.ByteString.Parser.takeWhile'. -- {-# INLINE CONLIKE takeTill #-} takeTill :: (Word8 -> Bool) -> Parser ByteString takeTill test = Parser \inp -> let n = fromMaybe (length inp) $ findIndex test inp in Success (unsafeTake n inp) (unsafeDrop n inp) -- | -- Same as 'takeTill', but requires at least a single byte. -- {-# INLINE CONLIKE takeTill1 #-} takeTill1 :: (Word8 -> Bool) -> Parser ByteString takeTill1 test = Data.ByteString.Parser.takeTill test `provided` (not . null) -- | -- Makes the parser not only return the result, but also the original -- matched extent. -- {-# INLINE CONLIKE match #-} match :: Parser a -> Parser (ByteString, a) match par = Parser \inp -> case runParser par inp of Failure expected more -> Failure expected more Error reason more len -> Error reason more len Success res more -> let n = length more in Success (BS.take n inp, res) more -- | -- Names an extent of the parser. -- -- When the extent returns a Failure, details are discarded and replaced -- with the extent as a whole. -- -- When the extent returns an Error, it is adjusted to cover the whole -- extent, but the reason is left intact. -- -- You should strive to make labeled extents as small as possible, -- approximately of a typical token size. For example: -- -- @ -- pString = label \"string\" $ pStringContents \`wrap\` char \'\"\' -- @ -- {-# INLINE CONLIKE label #-} label :: String -> Parser a -> Parser a label lbl par = Parser \inp -> case runParser par inp of Success res more -> Success res more Failure _expected _more -> Failure [lbl] inp Error reason more len -> let len' = len + (length inp - length more) in Error reason inp len' -- | -- Un-names an extent of the parser. -- -- Same as 'label', but removes any expected values upon Failure. -- Very useful to mark comments and optional whitespace with. -- {-# INLINE CONLIKE unlabel #-} unlabel :: Parser a -> Parser a unlabel par = Parser \inp -> case runParser par inp of Success res more -> Success res more Failure _expected _more -> Failure [] inp Error reason more len -> let len' = len + (length inp - length more) in Error reason inp len' -- | -- Validate parser result and turn it into an Error upon failure. -- {-# INLINE CONLIKE validate #-} validate :: (a -> Either String b) -> Parser a -> Parser b validate test par = Parser \inp -> case runParser par inp of Failure expected more -> Failure expected more Error reason more len -> Error reason more len Success res more -> case test res of Right res' -> Success res' more Left reason -> Error reason inp (length inp - length more) -- | -- Given list of matchers and parsers, runs the first parser whose matcher -- succeeds on the input. This pattern makes for a simpler alternative to -- @try@ used in other parser combinator libraries. -- -- Example: -- -- @ -- pProperty = branch [ ( string "public" <* skipSpace -- , \_ -> Property Public <$> pToken -- ) -- , ( string "private" <* skipSpace -- , \_ -> Property Private <$> pToken -- ) -- ] -- @ -- {-# INLINE CONLIKE branch #-} branch :: [(Parser a, a -> Parser b)] -> Parser b branch [] = Parser \inp -> Failure [] inp branch ((Parser test, finish) : alts) = Parser \inp -> case test inp of Success res more -> runParser (finish res) more Error reason more len -> Error reason more len Failure _expected _more -> runParser (branch alts) inp -- | -- Accept whatever input remains. -- {-# INLINE takeByteString #-} takeByteString :: Parser ByteString takeByteString = Parser \inp -> Success inp mempty -- | -- Peek at whatever input remains. -- {-# INLINE peekByteString #-} peekByteString :: Parser ByteString peekByteString = Parser \inp -> Success inp inp -- | -- Accepts end of input and fails if we are not there yet. -- {-# INLINE endOfInput #-} endOfInput :: Parser () endOfInput = Parser \case inp | null inp -> Success () inp inp -> Failure ["end of input"] inp -- | -- Returns whether we are at the end of the input yet. -- {-# INLINE atEnd #-} atEnd :: Parser Bool atEnd = Parser \inp -> Success (null inp) inp -- | -- Calculate offset from the original input and the remainder. -- offset :: ByteString -> ByteString -> Int offset inp more = length inp - length more -- vim:set ft=haskell sw=2 ts=2 et: