{-# LANGUAGE UnicodeSyntax #-} {-| Module : Data.Nanoparsec Copyright : © 2011 Maciej Piechotka License : BSD3 Maintainer : uzytkownik2@gmail.com Stability : experimental Portability : unknown Simple, efficient combinator parsing for 'LL.ListLike' lists based on Attoparsec library. -} module Data.Nanoparsec ( -- * Differences from Attoparsec -- $attoparsec -- * Differences from Parsec -- $parsec -- * Performance considerations -- $performance -- * Parser types I.Parser , Result(..) -- ** Typeclass instances -- $instances -- * Running parsers , parse , feed , parseWith , parseTest -- ** Result conversion , maybeResult , eitherResult -- * Combinators , (I.) , I.try , module Data.Nanoparsec.Combinator -- * Parsing infividual elements , I.elem , I.anyElem , I.satisfy , I.satisfyWith , I.skip -- * Efficient sublist handling , I.string , I.skipWhile , I.take , I.takeWhile , I.takeWhile1 , I.takeTill -- * State observation and manipulation functions , I.endOfInput , I.ensure -- * Applicative specializations -- $applicative , (<*.) , (.*>) ) where import Control.Applicative import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as LB import Data.Monoid import Data.Nanoparsec.Combinator import qualified Data.Nanoparsec.Internal as I -- $attoparsec -- -- Compared to Attoparsec and Attoparsec-text Nanoparsec is a little (around -- 6%) slower. However it allows more abstract parsers on similar level as -- Parsec 3 (except the monad transformation). -- $parsec -- -- Compared to Parsec 3, Nanoparsec makes several tradeoffs. It is -- not intended for, or ideal for, all possible uses. -- -- * While Nanoparsec can consume input incrementally, Parsec cannot. -- Incremental input is a huge deal for efficient and secure network -- and system programming, since it gives much more control to users -- of the library over matters such as resource usage and the I/O -- model to use. -- -- * Much of the performance advantage of Nanoparsec is gained via -- high-performance parsers such as 'I.takeWhile' and 'I.string'. -- If you use complicated combinators that return lists of elements or -- characters, there really isn't much performance difference the -- two libraries. -- -- * Unlike Parsec 3, Nanoparsec does not support being used as a -- monad transformer. This is mostly a matter of the implementor -- not having needed that functionality. -- -- * Parsec parsers can produce more helpful error messages than -- Nanoparsec parsers. This is a matter of focus: Nanoparsec avoids -- the extra book-keeping in favour of higher performance. -- $performance -- -- If you write an Nanoparsec-based parser carefully, it can be -- realistic to expect it to perform within a factor of 2.4 of a -- hand-rolled C parser (measuring megabytes parsed per second). -- -- To actually achieve high performance, there are a few guidelines -- that it is useful to follow. -- -- Use the list-oriented parsers whenever possible, -- e.g. 'I.takeWhile1' instead of 'many1' 'I.anyWord8'. There is -- about a factor of 100 difference in performance between the two -- kinds of parser. -- -- Make active use of benchmarking and profiling tools to measure, -- find the problems with, and improve the performance of your parser. -- $instances -- -- The 'I.Parser' type is an instance of the following classes: -- -- * 'Monad', where 'fail' throws an exception (i.e. fails) with an -- error message. -- -- * 'Functor' and 'Applicative', which follow the usual definitions. -- -- * 'Monoid', where 'mempty' fails (with no error message) and -- 'mappend' executes the right-hand parser if the left-hand one -- fails. -- -- * 'MonadPlus' and 'Alternative', which follows 'MonadPlus'. -- -- The 'Result' type is an instance of 'Functor', where 'fmap' -- transforms the value in a 'Done' result. (⊕) ∷ Monoid m ⇒ m → m → m (⊕) = mappend data Result δ r = Fail !δ [String] String | Partial (δ → Result δ r) | Done !δ r instance (Show δ, Show r) ⇒ Show (Result δ r) where show (Fail bs stk msg) = "Fail " ++ show bs ++ " " ++ show stk ++ " " ++ show msg show (Partial _) = "Partial _" show (Done bs r) = "Done " ++ show bs ++ " " ++ show r feed ∷ Monoid δ ⇒ Result δ r → δ → Result δ r feed f@(Fail _ _ _) _ = f feed (Partial k) d = k d feed (Done bs r) d = Done (bs ⊕ d) r {-# SPECIALIZE feed ∷ Result B.ByteString r → B.ByteString → Result B.ByteString r #-} {-# SPECIALIZE feed ∷ Result LB.ByteString r → LB.ByteString → Result LB.ByteString r #-} instance Functor (Result δ) where _ `fmap` Fail st stk msg = Fail st stk msg f `fmap` Partial k = Partial (fmap f . k) f `fmap` Done bs r = Done bs (f r) -- | Run a parser and print its result to standard output. parseTest ∷ (Show a, Show δ, Monoid δ) ⇒ I.Parser δ a → δ → IO () parseTest p s = print (parse p s) {-# SPECIALIZE parseTest ∷ Show a ⇒ I.Parser B.ByteString a → B.ByteString → IO () #-} {-# SPECIALIZE parseTest ∷ Show a ⇒ I.Parser LB.ByteString a → LB.ByteString → IO () #-} translate ∷ I.Result δ a → Result δ a translate (I.Fail st stk msg) = Fail (I.input st) stk msg translate (I.Partial k) = Partial (translate . k) translate (I.Done r st) = Done (I.input st) r -- | Run a parser and return its result. parse :: Monoid δ ⇒ I.Parser δ a → δ → Result δ a parse m s = translate (I.parse m s) {-# INLINE parse #-} {-# SPECIALIZE parse ∷ I.Parser B.ByteString a → B.ByteString → Result B.ByteString a #-} {-# SPECIALIZE parse ∷ I.Parser LB.ByteString a → LB.ByteString → Result LB.ByteString a #-} -- | Run a parser with an initial input string, and a monadic action -- that can supply more input if needed. parseWith ∷ (Monad m, Monoid δ) ⇒ (m δ) -- ^ An action that will be executed to provide the parser -- with more input, if necessary. The action must return an -- 'B.empty' string when there is no more input available. → I.Parser δ a → δ -- ^ Initial input for the parser. → m (Result δ a) parseWith refill p s = step $ I.parse p s where step (I.Fail st stk msg) = return $! Fail (I.input st) stk msg step (I.Partial k) = (step . k) =<< refill step (I.Done r st) = return $! Done (I.input st) r {-# SPECIALIZE parseWith ∷ Monad m ⇒ (m B.ByteString) → I.Parser B.ByteString a → B.ByteString → m (Result B.ByteString a) #-} {-# SPECIALIZE parseWith ∷ Monad m ⇒ (m LB.ByteString) → I.Parser LB.ByteString a → LB.ByteString → m (Result LB.ByteString a) #-} -- | Convert a 'Result' value to a 'Maybe' value. A 'Partial' result -- is treated as failure. maybeResult ∷ Result δ r → Maybe r maybeResult (Done _ r) = Just r maybeResult _ = Nothing -- | Convert a 'Result' value to an 'Either' value. A 'Partial' result -- is treated as failure. eitherResult ∷ Result δ r → Either String r eitherResult (Done _ r) = Right r eitherResult (Fail _ _ msg) = Left msg eitherResult _ = Left "Result: incomplete input" -- $applicative -- -- We provide specializations of @\<*@ and @*\>@ as @\<*.@ and -- @.*\>@, respectively. Together with @IsString@ instance of -- 'I.Parser', you may write parsers applicatively more easily. -- For example: -- -- > paren p = "(" .*> p <*. ")" -- -- instead of the more verbose -- -- > paren p = string "(" *> p <* string ")" -- | Same as @Applicative@'s @\<*@ but specialized. (<*.) :: Monoid δ ⇒ I.Parser δ a → I.Parser δ δ → I.Parser δ a (<*.) = (<*) -- | Same as @Applicative@'s @*\>@ but specialized. (.*>) :: Monoid δ ⇒ I.Parser δ δ → I.Parser δ a → I.Parser δ a (.*>) = (*>)