Data/Attoparsec/Text.hs

-- |
-- Module      :  Data.Attoparsec.Text
-- Copyright   :  Felipe Lessa 2010-2011, Bryan O'Sullivan 2007-2010
-- License     :  BSD3
--
-- Maintainer  :  felipe.lessa@gmail.com
-- Stability   :  experimental
-- Portability :  unknown
--
-- Simple, efficient combinator parsing for 'T.Text' strings,
-- loosely based on the Parsec library.

module Data.Attoparsec.Text
    (
    -- * Differences from Parsec
    -- $parsec

    -- * Performance considerations
    -- $performance

    -- * Parser types
      I.Parser
    , Result(..)

    -- ** Typeclass instances
    -- $instances

    -- * Running parsers
    , parse
    , parseTest
    , parseWith
    , feed

    -- ** Result conversion
    , maybeResult
    , eitherResult

    -- * Combinators
    , (I.<?>)
    , I.try
    , module Data.Attoparsec.Combinator

    -- * Parsing individual characters
    , I.char
    , I.anyChar
    , I.notChar
    , I.satisfy
    , I.satisfyWith
    , I.skip

    -- ** Special character parsers
    -- $specialcharparsers
    , I.digit
    , I.letter
    , I.space

    -- ** Character classes
    , I.inClass
    , I.notInClass

    -- * Efficient string handling
    , I.string
    , I.skipSpace
    , I.skipWhile
    , I.take
    , I.takeWhile
    , I.takeWhile1
    , I.takeTill

    -- * Text parsing
    , I.endOfLine

    -- * Numeric parsers
    , I.decimal
    , I.hexadecimal
    , I.signed
    , I.double
    , I.rational

    -- * State observation and manipulation functions
    , I.endOfInput
    , I.ensure

    -- * Applicative specializations
    -- $applicative
    , (<*.)
    , (.*>)
    ) where

import Control.Applicative (Applicative, (<*), (*>))
import Data.Attoparsec.Combinator
import qualified Data.Attoparsec.Text.Internal as I
import qualified Data.Text as T

-- $parsec
--
-- Compared to Parsec 3, @attoparsec-text@ makes several
-- tradeoffs.  It is not intended for, or ideal for, all possible
-- uses.
--
-- * While @attoparsec-text@ can consume input incrementally,
--   Parsec cannot.  Incremental input is a huge deal for
--   efficient and secure network and system programming, since
--   it gives much more control to users of the library over
--   matters such as resource usage and the I/O model to use.
--
-- * Much of the performance advantage of @attoparsec-text@ is
--   gained via high-performance parsers such as 'I.takeWhile'
--   and 'I.string'.  If you use complicated combinators that
--   return lists of characters, there really isn't much
--   performance difference the two libraries.
--
-- * Unlike Parsec 3, @attoparsec-text@ does not support being
--   used as a monad transformer.  This is mostly a matter of the
--   implementor not having needed that functionality.
--
-- * @attoparsec-text@ is specialised to deal only with strict
--   'T.Text' input.  Efficiency concernts rule out both lists
--   and lazy texts.  The usual use for lazy texts would be to
--   allow consumption of very large input without a large
--   footprint.  For this need, @attoparsec-text@'s incremental
--   input provides an excellent substitute, with much more
--   control over when input takes place.
--
-- * Parsec parsers can produce more helpful error messages than
--   @attoparsec-text@ parsers.  This is a matter of focus:
--   @attoparsec-text@ avoids the extra book-keeping in favour of
--   higher performance.

-- $performance
--
-- To actually achieve high performance, there are a few guidelines
-- that it is useful to follow.
--
-- Use the 'T.Text'-oriented parsers whenever possible,
-- e.g. 'I.takeWhile1' instead of 'many1' 'I.anyChar'.  There is
-- about a factor of 100 difference in performance between the
-- two kinds of parser.
--
-- For very simple character-testing predicates, write them by
-- hand instead of using 'I.inClass' or 'I.notInClass'.  For
-- instance, both of these predicates test for an end-of-line
-- character, but the first is much faster than the second:
--
-- >endOfLine_fast c = w == '\r' || c == '\n'
-- >endOfLine_slow   = inClass "\r\n"
--
-- Make active use of benchmarking and profiling tools to measure,
-- find the problems with, and improve the performance of your parser.

-- $instances
--
-- The 'I.Parser' type is an instance of the following classes:
--
-- * 'Monad', where 'fail' throws an exception (i.e. fails) with an
--   error message.
--
-- * 'Functor' and 'Applicative', which follow the usual definitions.
--
-- * 'MonadPlus', where 'mzero' fails (with no error message) and
--   'mplus' executes the right-hand parser if the left-hand one
--   fails.
--
-- * 'Alternative', which follows 'MonadPlus'.
--
-- The 'Result' type is an instance of 'Functor', where 'fmap'
-- transforms the value in a 'Done' result.

-- | The result of a parse.
data Result r = Fail !T.Text [String] String
              -- ^ The parse failed.  The 'T.Text' is the input
              -- that had not yet been consumed when the failure
              -- occurred.  The @[@'String'@]@ is a list of contexts
              -- in which the error occurred.  The 'String' is the
              -- message describing the error, if any.
              | Partial (T.Text -> Result r)
              -- ^ Supply this continuation with more input so that
              -- the parser can resume.  To indicate that no more
              -- input is available, use an 'T.empty' string.
              | Done !T.Text r
              -- ^ The parse succeeded.  The 'T.Text' is the
              -- input that had not yet been consumed (if any) when
              -- the parse succeeded.

instance Show r => Show (Result r) where
    show (Fail bs stk msg) =
        "Fail " ++ show bs ++ " " ++ show stk ++ " " ++ show msg
    show (Partial _)       = "Partial _"
    show (Done bs r)       = "Done " ++ show bs ++ " " ++ show r

-- | If a parser has returned a 'Partial' result, supply it with more
-- input.
feed :: Result r -> T.Text -> Result r
feed f@(Fail _ _ _) _ = f
feed (Partial k) d    = k d
feed (Done bs r) d    = Done (T.append bs d) r

fmapR :: (a -> b) -> Result a -> Result b
fmapR _ (Fail st stk msg) = Fail st stk msg
fmapR f (Partial k)       = Partial (fmapR f . k)
fmapR f (Done bs r)       = Done bs (f r)

instance Functor Result where
    fmap = fmapR

-- | Run a parser and print its result to standard output.
parseTest :: (Show a) => I.Parser a -> T.Text -> IO ()
parseTest p s = print (parse p s)

translate :: I.Result a -> Result a
translate (I.Fail st stk msg) = Fail (I.input st) stk msg
translate (I.Partial k)       = Partial (translate . k)
translate (I.Done st r)       = Done (I.input st) r

-- | Run a parser and return its result.
parse :: I.Parser a -> T.Text -> Result a
parse m s = translate (I.parse m s)
{-# INLINE parse #-}

-- | Run a parser with an initial input string, and a monadic action
-- that can supply more input if needed.
parseWith :: Monad m =>
             (m T.Text)
          -- ^ An action that will be executed to provide the parser
          -- with more input, if necessary.  The action must return an
          -- 'T.empty' string when there is no more input available.
          -> I.Parser a
          -> T.Text
          -- ^ Initial input for the parser.
          -> m (Result a)
parseWith refill p s = step $ I.parse p s
  where step (I.Fail st stk msg) = return $! Fail (I.input st) stk msg
        step (I.Partial k)       = (step . k) =<< refill
        step (I.Done st r)       = return $! Done (I.input st) r

-- | Convert a 'Result' value to a 'Maybe' value. A 'Partial' result
-- is treated as failure.
maybeResult :: Result r -> Maybe r
maybeResult (Done _ r) = Just r
maybeResult _          = Nothing

-- | Convert a 'Result' value to an 'Either' value. A 'Partial' result
-- is treated as failure.
eitherResult :: Result r -> Either String r
eitherResult (Done _ r)     = Right r
eitherResult (Fail _ _ msg) = Left msg
eitherResult _              = Left "Result: incomplete input"

-- $specialcharparsers
--
-- Special parser for characters.  Unlike the original
-- @attoparsec@ package, these parsers do work correctly for all
-- encodings.  Internally "Data.Char" module is used.


-- $applicative
--
-- We provide specializations of @\<*@ and @*\>@ as @\<*.@ and
-- @.*\>@, respectively.  Together with @IsString@ instance of
-- 'I.Parser', you may write parsers applicatively more easily.
-- For example:
--
-- > paren p = "(" .*> p <*. ")"
--
-- instead of the more verbose
--
-- > paren p = string "(" *> p <* string ")"

-- | Same as @Applicative@'s @\<*@ but specialized to 'T.Text'
-- on the second argument.
(<*.) :: Applicative f => f a -> f T.Text -> f a
(<*.) = (<*)

-- | Same as @Applicative@'s @*\>@ but specialized to 'T.Text'
-- on the first argument.
(.*>) :: Applicative f => f T.Text -> f a -> f a
(.*>) = (*>)