{-# LANGUAGE BangPatterns #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE UndecidableInstances #-} {-# LANGUAGE UnicodeSyntax #-} {-| Module : Data.Nanoparsec.Internal Copyright : © 2011 Maciej Piechotka License : BSD3 Maintainer : uzytkownik2@gmail.com Stability : experimental Portability : -} module Data.Nanoparsec.Internal ( -- * Parser types Parser , Result(..) , S(input) -- * Running parsers , parse -- * Combinators , () , try -- * Parsing individual elements , satisfy , satisfyWith , anyElem , skip , elem , notElem , elem' , notElem' -- * Efficient substring handling , skipWhile , string , stringTransform , take , takeWhile , takeWhile1 , takeTill -- * State observation and manipulation functions , endOfInput , ensure ) where import Control.Applicative import Control.Monad import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as LB import Data.Monoid import Data.String import qualified Data.ListLike as LL import Data.Word import Prelude hiding (take, takeWhile, elem, notElem) (⊕) ∷ Monoid m ⇒ m → m → m (⊕) = mappend {-# INLINE (⊕) #-} ø ∷ Monoid m ⇒ m ø = mempty {-# INLINE ø #-} (≡) ∷ Eq a ⇒ a → a → Bool (≡) = (==) {-# INLINE (≡) #-} (≢) ∷ Eq a ⇒ a → a → Bool (≢) = (/=) {-# INLINE (≢) #-} (≥) ∷ Ord a ⇒ a → a → Bool (≥) = (>=) {-# INLINE (≥) #-} (≫) ∷ Monad m ⇒ m a → m b → m b (≫) = (>>) {-# INLINE (≫) #-} data Result δ r = Fail (S δ) [String] String | Partial (δ → Result δ r) | Done r (S δ) newtype Parser δ a = Parser { runParser ∷ forall r. S δ → Failure δ r → Success δ a r → Result δ r } type Failure δ r = S δ → [String] → String → Result δ r type Success δ a r = a → S δ → Result δ r data More = Complete | Incomplete deriving (Eq, Show) instance Monoid More where mappend Complete _ = Complete mappend _ Complete = Complete mappend _ _ = Incomplete mempty = Incomplete {-# INLINE mappend #-} data S δ = S { input ∷ !δ , _added ∷ !δ , more ∷ !More } deriving (Show) instance (Show δ, Show r) ⇒ Show (Result δ r) where show (Fail _ stack msg) = "Fail " ++ show stack ++ " " ++ show msg show (Partial _) = "Partial _" show (Done str r) = "Done " ++ show str ++ " " ++ show r instance Monoid δ ⇒ Monoid (S δ) where mempty = S mempty mempty mempty {-# INLINE mempty #-} {-# SPECIALIZE mempty ∷ S B.ByteString #-} {-# SPECIALIZE mempty ∷ S LB.ByteString #-} mappend (S inp add mor) (S _ add' mor') = S (inp ⊕ add') (add ⊕ add') (mor ⊕ mor') {-# INLINE mappend #-} {-# SPECIALIZE mappend ∷ S B.ByteString → S B.ByteString → S B.ByteString #-} {-# SPECIALIZE mappend ∷ S LB.ByteString → S LB.ByteString → S LB.ByteString #-} instance Functor (Parser δ) where f `fmap` Parser p = Parser $ \s fa su → p s fa (su . f) {-# INLINE fmap #-} instance Monoid δ ⇒ Applicative (Parser δ) where pure = return {-# INLINE pure #-} {-# SPECIALIZE pure ∷ a → Parser B.ByteString a #-} {-# SPECIALIZE pure ∷ a → Parser LB.ByteString a #-} (<*>) = ap {-# INLINE (<*>) #-} {-# SPECIALIZE (<*>) ∷ Parser B.ByteString (a → b) → Parser B.ByteString a → Parser B.ByteString b #-} {-# SPECIALIZE (<*>) ∷ Parser LB.ByteString (a → b) → Parser LB.ByteString a → Parser LB.ByteString b #-} (*>) = (>>) {-# INLINE (*>) #-} {-# SPECIALIZE (*>) ∷ Parser B.ByteString a → Parser B.ByteString b → Parser B.ByteString b #-} {-# SPECIALIZE (*>) ∷ Parser LB.ByteString a → Parser LB.ByteString b → Parser LB.ByteString b #-} x <* y = x >>= \a → y >> return a {-# INLINE (<*) #-} {-# SPECIALIZE (<*) ∷ Parser B.ByteString a → Parser B.ByteString b → Parser B.ByteString a #-} {-# SPECIALIZE (<*) ∷ Parser LB.ByteString a → Parser LB.ByteString b → Parser LB.ByteString a #-} instance Monoid δ ⇒ Monad (Parser δ) where return x = Parser $ \s _ su → su x s {-# INLINE return #-} {-# SPECIALIZE return ∷ a → Parser B.ByteString a #-} {-# SPECIALIZE return ∷ a → Parser LB.ByteString a #-} Parser p >>= g = Parser $ \s fa su → p s fa (\a s' → runParser (g a) s' fa su) {-# INLINE (>>=) #-} {-# SPECIALIZE (>>=) ∷ Parser B.ByteString a → (a → Parser B.ByteString b) → Parser B.ByteString b #-} {-# SPECIALIZE (>>=) ∷ Parser LB.ByteString a → (a → Parser LB.ByteString b) → Parser LB.ByteString b #-} Parser p >> Parser r = Parser $ \s fa su → p s fa (\_ s' → r s' fa su) {-# INLINE (>>) #-} {-# SPECIALIZE (>>) ∷ Parser B.ByteString a → Parser B.ByteString b → Parser B.ByteString b #-} {-# SPECIALIZE (>>) ∷ Parser LB.ByteString a → Parser LB.ByteString b → Parser LB.ByteString b #-} fail err = Parser (\s fa _ → fa s [] ("Failed reading: " ++ err)) {-# INLINE fail #-} {-# SPECIALIZE fail ∷ String → Parser B.ByteString a #-} {-# SPECIALIZE fail ∷ String → Parser LB.ByteString a #-} instance Monoid δ ⇒ Monoid (Parser δ a) where mempty = fail "mempty" {-# INLINE mempty #-} {-# SPECIALIZE mempty ∷ Parser B.ByteString a #-} {-# SPECIALIZE mempty ∷ Parser LB.ByteString a #-} Parser p `mappend` Parser r = Parser $ \s fa su → let fa' s' _ _ = r (s ⊕ s') fa su !s'' = noAdds s in p s'' fa' su {-# INLINE mappend #-} {-# SPECIALIZE mappend ∷ Parser B.ByteString a → Parser B.ByteString a → Parser B.ByteString a #-} {-# SPECIALIZE mappend ∷ Parser LB.ByteString a → Parser LB.ByteString a → Parser LB.ByteString a #-} instance Monoid δ ⇒ Alternative (Parser δ) where empty = fail "empty" {-# INLINE empty #-} {-# SPECIALIZE empty ∷ Parser B.ByteString a #-} {-# SPECIALIZE empty ∷ Parser LB.ByteString a #-} (<|>) = mappend {-# INLINE (<|>) #-} {-# SPECIALIZE (<|>) ∷ Parser B.ByteString a → Parser B.ByteString a → Parser B.ByteString a #-} {-# SPECIALIZE (<|>) ∷ Parser LB.ByteString a → Parser LB.ByteString a → Parser LB.ByteString a #-} instance Monoid δ ⇒ MonadPlus (Parser δ) where mzero = fail "mzero" {-# INLINE mzero #-} {-# SPECIALIZE mzero ∷ Parser B.ByteString a #-} {-# SPECIALIZE mzero ∷ Parser LB.ByteString a #-} mplus = mappend {-# INLINE mplus #-} {-# SPECIALIZE mplus ∷ Parser B.ByteString a → Parser B.ByteString a → Parser B.ByteString a #-} {-# SPECIALIZE mplus ∷ Parser LB.ByteString a → Parser LB.ByteString a → Parser LB.ByteString a #-} instance (Eq δ, LL.ListLike δ ε, IsString δ) ⇒ IsString (Parser δ δ) where fromString = string . fromString noAdds ∷ Monoid δ ⇒ S δ → S δ noAdds (S s _ c) = S s ø c {-# INLINE noAdds #-} {-# SPECIALIZE noAdds ∷ S B.ByteString → S B.ByteString #-} {-# SPECIALIZE noAdds ∷ S LB.ByteString → S LB.ByteString #-} -- | Succeed only if at least @n@ elements of input are available. ensure ∷ LL.ListLike δ ε ⇒ Int → Parser δ () ensure n = Parser $ \st@(S s _ _) fa su → if LL.length s ≥ n then su () st else runParser (demandInput ≫ ensure n) st fa su {-# SPECIALIZE ensure ∷ Int → Parser B.ByteString () #-} {-# SPECIALIZE ensure ∷ Int → Parser LB.ByteString () #-} -- | Ask for input. If we receive any, pass it to a success -- continuation, otherwise to a failure continuation. prompt ∷ LL.ListLike δ ε ⇒ S δ → (S δ → Result δ r) → (S δ → Result δ r) → Result δ r prompt (S s a _) fa su = Partial $ \p → if LL.null s then fa $ S s a Complete else su $ S (s ⊕ p) (a ⊕ p) Incomplete {-# SPECIALIZE prompt ∷ S B.ByteString → (S B.ByteString → Result B.ByteString r) → (S B.ByteString → Result B.ByteString r) → Result B.ByteString r #-} {-# SPECIALIZE prompt ∷ S LB.ByteString → (S LB.ByteString → Result LB.ByteString r) → (S LB.ByteString → Result LB.ByteString r) → Result LB.ByteString r #-} -- | Immediately demand more input via a 'Partial' continuation -- result. demandInput ∷ LL.ListLike δ ε ⇒ Parser δ () demandInput = Parser $ \st fa su → if more st ≡ Complete then fa st ["demandInput"] "not enough characters" else prompt st (\st' → fa st' ["demandInput"] "not enough characters") (su ()) {-# SPECIALIZE demandInput ∷ Parser B.ByteString () #-} {-# SPECIALIZE demandInput ∷ Parser LB.ByteString () #-} -- | This parser always succeeds. It returns 'True' if any input is -- available either immediately or on demand, and 'False' if the end -- of all input has been reached. wantInput ∷ LL.ListLike δ ε ⇒ Parser δ Bool wantInput = Parser $ \st@(S s _ c) _ su → case () of _ | not (LL.null s) → su True st | c ≡ Complete → su False st | otherwise → prompt st (su False) (su True) {-# SPECIALIZE wantInput ∷ Parser B.ByteString Bool #-} {-# SPECIALIZE wantInput ∷ Parser LB.ByteString Bool #-} get ∷ Parser δ δ get = Parser (\st _ su → su (input st) st) put ∷ δ → Parser δ () put s = Parser (\(S _ a c) _ su → su () (S s a c)) -- | Attempt a parse, and if it fails, rewind the input so that no -- input appears to have been consumed. -- -- This combinator is useful in cases where a parser might consume -- some input before failing, i.e. the parser needs arbitrary -- lookahead. The downside to using this combinator is that it can -- retain input for longer than is desirable. try ∷ Monoid δ ⇒ Parser δ a → Parser δ a try p = Parser (\st fa su → runParser p (noAdds st) (fa . (st ⊕)) su) {-# SPECIALIZE try ∷ Parser B.ByteString a → Parser B.ByteString a #-} {-# SPECIALIZE try ∷ Parser LB.ByteString a → Parser LB.ByteString a #-} -- | The parser @satisfy p@ succeeds for any element for which the -- predicate @p@ returns 'True'. Returns the element that is actually -- parsed. -- -- >digit = satisfy isDigit -- > where isDigit w = w >= 48 && w <= 57 satisfy ∷ LL.ListLike δ ε ⇒ (ε → Bool) → Parser δ ε satisfy p = do ensure 1 s ← get let w = LL.head s if p w then put (LL.tail s) ≫ return w else fail "satisfy" {-# SPECIALIZE satisfy ∷ (Word8 → Bool) → Parser B.ByteString Word8 #-} {-# SPECIALIZE satisfy ∷ (Word8 → Bool) → Parser LB.ByteString Word8 #-} -- | The parser @skip p@ succeeds for any element for which the predicate -- @p@ returns 'True'. -- -- >space = skip isSpace -- > where isDigit w = w == 9 || w == 10 || w == 13 || w == 32 skip ∷ LL.ListLike δ ε ⇒ (ε → Bool) → Parser δ () skip p = do ensure 1 s ← get if p (LL.head s) then put (LL.tail s) else fail "skip" {-# SPECIALIZE skip ∷ (Word8 → Bool) → Parser B.ByteString () #-} {-# SPECIALIZE skip ∷ (Word8 → Bool) → Parser LB.ByteString () #-} -- | The parser @satisfyWith f p@ transforms an element, and succeeds if -- the predicate @p@ returns 'True' on the transformed value. The -- parser returns the transformed element that was parsed. satisfyWith ∷ LL.ListLike δ ε ⇒ (ε → a) → (a → Bool) → Parser δ a satisfyWith f p = do ensure 1 s ← get let c = f (LL.head s) if p c then put (LL.tail s) ≫ return c else fail "satisfyWith" {-# SPECIALIZE satisfyWith ∷ (Word8 → a) → (a → Bool) → Parser B.ByteString a #-} {-# SPECIALIZE satisfyWith ∷ (Word8 → a) → (a → Bool) → Parser LB.ByteString a #-} -- | Consume @n@ element of input, but succeed only if the predicate -- returns 'True'. takeWith ∷ LL.ListLike δ ε ⇒ Int → (δ → Bool) → Parser δ δ takeWith n p = do ensure n s ← get let (h, t) = LL.splitAt n s if p h then put t ≫ return h else fail "takeWith" {-# SPECIALIZE takeWith ∷ Int → (B.ByteString → Bool) → Parser B.ByteString B.ByteString #-} {-# SPECIALIZE takeWith ∷ Int → (LB.ByteString → Bool) → Parser LB.ByteString LB.ByteString #-} -- | Consume exactly @n@ elements of input. take ∷ LL.ListLike δ ε ⇒ Int → Parser δ δ take n = takeWith n (const True) {-# INLINE take #-} {-# SPECIALIZE take ∷ Int → Parser B.ByteString B.ByteString #-} {-# SPECIALIZE take ∷ Int → Parser LB.ByteString LB.ByteString #-} -- | @string s@ parses a sequence of elements that identically match -- @s@. Returns the parsed string (i.e. @s@). This parser consumes no -- input if it fails (even if a partial match). -- -- /Note/: The behaviour of this parser is different to that of the -- similarly-named parser in Parsec, as this one is all-or-nothing. -- To illustrate the difference, the following parser will fail under -- Parsec given an input of @"for"@: -- -- >string "foo" <|> string "for" -- -- The reason for its failure is that that the first branch is a -- partial match, and will consume the letters @\'f\'@ and @\'o\'@ -- before failing. In Nnaoparsec, the above parser will /succeed/ on -- that input, because the failed first branch will consume nothing. string ∷ (Eq δ, LL.ListLike δ ε) ⇒ δ → Parser δ δ string s = takeWith (LL.length s) (≡ s) {-# INLINE string #-} {-# SPECIALIZE string ∷ B.ByteString → Parser B.ByteString B.ByteString #-} {-# SPECIALIZE string ∷ LB.ByteString → Parser LB.ByteString LB.ByteString #-} stringTransform ∷ (LL.ListLike δ ε, Eq δ) ⇒ (δ → δ) → δ → Parser δ δ stringTransform f s = takeWith (LL.length s) ((≡ f s) . f) {-# INLINE stringTransform #-} {-# SPECIALIZE stringTransform ∷ (B.ByteString → B.ByteString) → B.ByteString → Parser B.ByteString B.ByteString #-} {-# SPECIALIZE stringTransform ∷ (LB.ByteString → LB.ByteString) → LB.ByteString → Parser LB.ByteString LB.ByteString #-} skipWhile ∷ LL.ListLike δ ε ⇒ (ε → Bool) → Parser δ () skipWhile p = go where go = do _ ← wantInput t ← LL.dropWhile p <$> get put t when (LL.null t) go {-# SPECIALIZE skipWhile ∷ (Word8 → Bool) → Parser B.ByteString () #-} {-# SPECIALIZE skipWhile ∷ (Word8 → Bool) → Parser LB.ByteString () #-} -- | Consume input as long as the predicate returns 'False' -- (i.e. until it returns 'True'), and return the consumed input. -- -- This parser does not fail. It will return an empty string if the -- predicate returns 'True' on the first element of input. -- -- /Note/: Because this parser does not fail, do not use it with -- combinators such as 'many', because such parsers loop until a -- failure occurs. Careless use will thus result in an infinite loop. takeTill ∷ LL.ListLike δ ε ⇒ (ε → Bool) → Parser δ δ takeTill p = takeWhile (not . p) {-# INLINE takeTill #-} {-# SPECIALIZE takeTill ∷ (Word8 → Bool) → Parser B.ByteString B.ByteString #-} {-# SPECIALIZE takeTill ∷ (Word8 → Bool) → Parser LB.ByteString LB.ByteString #-} -- | Consume input as long as the predicate returns 'True', and return -- the consumed input. -- -- This parser does not fail. It will return an empty string if the -- predicate returns 'False' on the first element of input. -- -- /Note/: Because this parser does not fail, do not use it with -- combinators such as 'many', because such parsers loop until a -- failure occurs. Careless use will thus result in an infinite loop. takeWhile ∷ LL.ListLike δ ε ⇒ (ε → Bool) → Parser δ δ takeWhile p = go ø where go acc = do inp <- wantInput if inp then do (h, t) ← LL.span p <$> get put t if (LL.null t) then go (h ⊕ acc) else return h else return ø {-# SPECIALIZE takeWhile ∷ (Word8 → Bool) → Parser B.ByteString B.ByteString #-} {-# SPECIALIZE takeWhile ∷ (Word8 → Bool) → Parser LB.ByteString LB.ByteString #-} -- | Consume input as long as the predicate returns 'True', and return -- the consumed input. -- -- This parser requires the predicate to succeed on at least one element -- of input: it will fail if the predicate never returns 'True' or if -- there is no input left. takeWhile1 ∷ LL.ListLike δ ε ⇒ (ε → Bool) → Parser δ δ takeWhile1 p = do (`when` demandInput) =<< LL.null <$> get (h, t) ← LL.span p <$> get when (LL.null h) $ fail "takeWhile1" put t if LL.null t then (h ⊕) `fmap` takeWhile p else return h {-# SPECIALIZE takeWhile1 ∷ (Word8 → Bool) → Parser B.ByteString B.ByteString #-} {-# SPECIALIZE takeWhile1 ∷ (Word8 → Bool) → Parser LB.ByteString LB.ByteString #-} anyElem ∷ LL.ListLike δ ε ⇒ Parser δ ε anyElem = satisfy (const True) {-# INLINE anyElem #-} {-# SPECIALIZE anyElem ∷ Parser B.ByteString Word8 #-} {-# SPECIALIZE anyElem ∷ Parser LB.ByteString Word8 #-} elem ∷ (Eq ε, Show ε, LL.ListLike δ ε) ⇒ ε → Parser δ ε elem x = elem' (show x) x {-# INLINE elem #-} {-# SPECIALIZE elem ∷ Word8 → Parser B.ByteString Word8 #-} {-# SPECIALIZE elem ∷ Word8 → Parser LB.ByteString Word8 #-} notElem ∷ (Eq ε, Show ε, LL.ListLike δ ε) ⇒ ε → Parser δ ε notElem x = notElem' ("not " ⊕ show x) x {-# INLINE notElem #-} {-# SPECIALIZE notElem ∷ Word8 → Parser B.ByteString Word8 #-} {-# SPECIALIZE notElem ∷ Word8 → Parser LB.ByteString Word8 #-} elem' ∷ (Eq ε, LL.ListLike δ ε) ⇒ String → ε → Parser δ ε elem' p c = satisfy (≡ c) p {-# INLINE elem' #-} {-# SPECIALIZE elem' ∷ String → Word8 → Parser B.ByteString Word8 #-} {-# SPECIALIZE elem' ∷ String → Word8 → Parser LB.ByteString Word8 #-} notElem' ∷ (Eq ε, LL.ListLike δ ε) ⇒ String → ε → Parser δ ε notElem' p c = satisfy (≢ c) p {-# INLINE notElem' #-} {-# SPECIALIZE notElem' ∷ String → Word8 → Parser B.ByteString Word8 #-} {-# SPECIALIZE notElem' ∷ String → Word8 → Parser LB.ByteString Word8 #-} -- | Match only if all input has been consumed. endOfInput ∷ LL.ListLike δ ε ⇒ Parser δ () endOfInput = Parser $ \st@(S i _ mo) fa su → case () of _ | not (LL.null i) → fa st [] "endOfInput" | mo ≡ Complete → su () st | otherwise → let su' _ st' = su () (st ⊕ st') fa' st' _ _ = fa (st ⊕ st') [] "endOfInput" in runParser demandInput st fa' su' {-# SPECIALIZE endOfInput ∷ Parser B.ByteString () #-} {-# SPECIALIZE endOfInput ∷ Parser LB.ByteString () #-} -- | Match either a single newline character @\'\\n\'@, or a carriage -- return followed by a newline character @\"\\r\\n\"@. () ∷ Parser δ a → String → Parser δ a p ms = Parser $ \s fa su → runParser p s (\s' sts m → fa s' (ms:sts) m) su {-# INLINE () #-} infix 0 -- | Run a parser. parse ∷ Monoid δ ⇒ Parser δ a → δ → Result δ a parse m s = runParser m (S s ø Incomplete) Fail Done {-# INLINE parse #-} {-# SPECIALIZE parse ∷ Parser B.ByteString a → B.ByteString → Result B.ByteString a #-} {-# SPECIALIZE parse ∷ Parser LB.ByteString a → LB.ByteString → Result LB.ByteString a #-}