-- | -- Module : Data.Attoparsec.Text -- Copyright : Felipe Lessa 2010-2011, Bryan O'Sullivan 2007-2010 -- License : BSD3 -- -- Maintainer : felipe.lessa@gmail.com -- Stability : experimental -- Portability : unknown -- -- Simple, efficient combinator parsing for 'T.Text' strings, -- loosely based on the Parsec library. module Data.Attoparsec.Text ( -- * Differences from Parsec -- $parsec -- * Performance considerations -- $performance -- * Parser types I.Parser , Result(..) -- ** Typeclass instances -- $instances -- * Running parsers , parse , parseTest , parseWith , feed -- ** Result conversion , maybeResult , eitherResult -- * Combinators , (I.) , I.try , module Data.Attoparsec.Combinator -- * Parsing individual characters , I.char , I.anyChar , I.notChar , I.satisfy , I.satisfyWith , I.skip -- ** Special character parsers -- $specialcharparsers , I.digit , I.letter , I.space -- ** Character classes , I.inClass , I.notInClass -- * Efficient string handling , I.string , I.skipSpace , I.skipWhile , I.take , I.takeWhile , I.takeWhile1 , I.takeTill -- * Text parsing , I.endOfLine -- * Numeric parsers , I.decimal , I.hexadecimal , I.signed , I.double , I.rational -- * State observation and manipulation functions , I.endOfInput , I.ensure -- * Applicative specializations -- $applicative , (<*.) , (.*>) ) where import Control.Applicative (Applicative, (<*), (*>)) import Data.Attoparsec.Combinator import qualified Data.Attoparsec.Text.Internal as I import qualified Data.Text as T -- $parsec -- -- Compared to Parsec 3, @attoparsec-text@ makes several -- tradeoffs. It is not intended for, or ideal for, all possible -- uses. -- -- * While @attoparsec-text@ can consume input incrementally, -- Parsec cannot. Incremental input is a huge deal for -- efficient and secure network and system programming, since -- it gives much more control to users of the library over -- matters such as resource usage and the I/O model to use. -- -- * Much of the performance advantage of @attoparsec-text@ is -- gained via high-performance parsers such as 'I.takeWhile' -- and 'I.string'. If you use complicated combinators that -- return lists of characters, there really isn't much -- performance difference the two libraries. -- -- * Unlike Parsec 3, @attoparsec-text@ does not support being -- used as a monad transformer. This is mostly a matter of the -- implementor not having needed that functionality. -- -- * @attoparsec-text@ is specialised to deal only with strict -- 'T.Text' input. Efficiency concernts rule out both lists -- and lazy texts. The usual use for lazy texts would be to -- allow consumption of very large input without a large -- footprint. For this need, @attoparsec-text@'s incremental -- input provides an excellent substitute, with much more -- control over when input takes place. -- -- * Parsec parsers can produce more helpful error messages than -- @attoparsec-text@ parsers. This is a matter of focus: -- @attoparsec-text@ avoids the extra book-keeping in favour of -- higher performance. -- $performance -- -- To actually achieve high performance, there are a few guidelines -- that it is useful to follow. -- -- Use the 'T.Text'-oriented parsers whenever possible, -- e.g. 'I.takeWhile1' instead of 'many1' 'I.anyChar'. There is -- about a factor of 100 difference in performance between the -- two kinds of parser. -- -- For very simple character-testing predicates, write them by -- hand instead of using 'I.inClass' or 'I.notInClass'. For -- instance, both of these predicates test for an end-of-line -- character, but the first is much faster than the second: -- -- >endOfLine_fast c = w == '\r' || c == '\n' -- >endOfLine_slow = inClass "\r\n" -- -- Make active use of benchmarking and profiling tools to measure, -- find the problems with, and improve the performance of your parser. -- $instances -- -- The 'I.Parser' type is an instance of the following classes: -- -- * 'Monad', where 'fail' throws an exception (i.e. fails) with an -- error message. -- -- * 'Functor' and 'Applicative', which follow the usual definitions. -- -- * 'MonadPlus', where 'mzero' fails (with no error message) and -- 'mplus' executes the right-hand parser if the left-hand one -- fails. -- -- * 'Alternative', which follows 'MonadPlus'. -- -- The 'Result' type is an instance of 'Functor', where 'fmap' -- transforms the value in a 'Done' result. -- | The result of a parse. data Result r = Fail !T.Text [String] String -- ^ The parse failed. The 'T.Text' is the input -- that had not yet been consumed when the failure -- occurred. The @[@'String'@]@ is a list of contexts -- in which the error occurred. The 'String' is the -- message describing the error, if any. | Partial (T.Text -> Result r) -- ^ Supply this continuation with more input so that -- the parser can resume. To indicate that no more -- input is available, use an 'T.empty' string. | Done !T.Text r -- ^ The parse succeeded. The 'T.Text' is the -- input that had not yet been consumed (if any) when -- the parse succeeded. instance Show r => Show (Result r) where show (Fail bs stk msg) = "Fail " ++ show bs ++ " " ++ show stk ++ " " ++ show msg show (Partial _) = "Partial _" show (Done bs r) = "Done " ++ show bs ++ " " ++ show r -- | If a parser has returned a 'Partial' result, supply it with more -- input. feed :: Result r -> T.Text -> Result r feed f@(Fail _ _ _) _ = f feed (Partial k) d = k d feed (Done bs r) d = Done (T.append bs d) r fmapR :: (a -> b) -> Result a -> Result b fmapR _ (Fail st stk msg) = Fail st stk msg fmapR f (Partial k) = Partial (fmapR f . k) fmapR f (Done bs r) = Done bs (f r) instance Functor Result where fmap = fmapR -- | Run a parser and print its result to standard output. parseTest :: (Show a) => I.Parser a -> T.Text -> IO () parseTest p s = print (parse p s) translate :: I.Result a -> Result a translate (I.Fail st stk msg) = Fail (I.input st) stk msg translate (I.Partial k) = Partial (translate . k) translate (I.Done st r) = Done (I.input st) r -- | Run a parser and return its result. parse :: I.Parser a -> T.Text -> Result a parse m s = translate (I.parse m s) {-# INLINE parse #-} -- | Run a parser with an initial input string, and a monadic action -- that can supply more input if needed. parseWith :: Monad m => (m T.Text) -- ^ An action that will be executed to provide the parser -- with more input, if necessary. The action must return an -- 'T.empty' string when there is no more input available. -> I.Parser a -> T.Text -- ^ Initial input for the parser. -> m (Result a) parseWith refill p s = step $ I.parse p s where step (I.Fail st stk msg) = return $! Fail (I.input st) stk msg step (I.Partial k) = (step . k) =<< refill step (I.Done st r) = return $! Done (I.input st) r -- | Convert a 'Result' value to a 'Maybe' value. A 'Partial' result -- is treated as failure. maybeResult :: Result r -> Maybe r maybeResult (Done _ r) = Just r maybeResult _ = Nothing -- | Convert a 'Result' value to an 'Either' value. A 'Partial' result -- is treated as failure. eitherResult :: Result r -> Either String r eitherResult (Done _ r) = Right r eitherResult (Fail _ _ msg) = Left msg eitherResult _ = Left "Result: incomplete input" -- $specialcharparsers -- -- Special parser for characters. Unlike the original -- @attoparsec@ package, these parsers do work correctly for all -- encodings. Internally "Data.Char" module is used. -- $applicative -- -- We provide specializations of @\<*@ and @*\>@ as @\<*.@ and -- @.*\>@, respectively. Together with @IsString@ instance of -- 'I.Parser', you may write parsers applicatively more easily. -- For example: -- -- > paren p = "(" .*> p <*. ")" -- -- instead of the more verbose -- -- > paren p = string "(" *> p <* string ")" -- | Same as @Applicative@'s @\<*@ but specialized to 'T.Text' -- on the second argument. (<*.) :: Applicative f => f a -> f T.Text -> f a (<*.) = (<*) -- | Same as @Applicative@'s @*\>@ but specialized to 'T.Text' -- on the first argument. (.*>) :: Applicative f => f T.Text -> f a -> f a (.*>) = (*>)