Data/Picoparsec.hs

{-# LANGUAGE Haskell2010 #-}
-- |
-- Module      :  Data.Picoparsec
-- Copyright   :  Bryan O'Sullivan 2007-2011, Mario Blažević 2014-2015
-- License     :  BSD3
--
-- Maintainer  :  Mario Blažević
-- Stability   :  experimental
-- Portability :  unknown
--
-- Simple, efficient combinator parsing for
-- 'Data.Monoid.Cancellative.LeftGCDMonoid' and
-- 'Data.Monoid.Factorial.FactorialMonoid' inputs, loosely based on
-- Parsec and derived from Attoparsec.

module Data.Picoparsec
    (
    -- * Differences from Parsec
    -- $parsec

    -- * Differences from Attoparsec
    -- $attoparsec
      
    -- * Incremental input
    -- $incremental

    -- * Performance considerations
    -- $performance

    -- * Parser types
      I.Parser
    , Result
    , T.IResult(..)
    , I.compareResults

    -- * Running parsers
    , parse
    , feed
    , I.parseOnly
    , parseWith
    , parseTest

    -- ** Result conversion
    , maybeResult
    , eitherResult

    -- * Parsing individual tokens
    , I.anyToken
    , I.notToken
    , I.peekToken
    , I.satisfy
    , I.satisfyWith
    , I.skip

    -- ** Parsing individual characters
    , I.anyChar
    , I.char
    , I.notChar
    , I.peekChar
    , I.peekChar'
    , I.satisfyChar
    , I.satisfyCharInput

    -- * Efficient string handling
    , I.scan
    , I.string
    , I.skipWhile
    , I.take
    , I.takeWhile
    , I.takeWhile1
    , I.takeTill

    -- ** Efficient character string handling
    , I.scanChars
    , I.skipCharsWhile
    , I.takeCharsWhile
    , I.takeCharsWhile1
    , I.takeCharsTill
    , I.takeTillChar
    , I.takeTillChar1

    -- ** Consume all remaining input
    , I.takeRest

    -- * Text parsing
    , I.endOfLine

    -- * Combinators
    , try
    , (<?>)
    , choice
    , count
    , option
    , many'
    , many1
    , many1'
    , manyTill
    , manyTill'
    , sepBy
    , sepBy'
    , sepBy1
    , sepBy1'
    , skipMany
    , skipMany1
    , eitherP

    -- * State observation and manipulation functions
    , I.endOfInput
    , I.atEnd
    ) where

import Data.Monoid -- (Monoid)

import Data.Picoparsec.Combinator
import qualified Data.Picoparsec.Monoid.Internal as I
import qualified Data.Picoparsec.Internal as I
import Data.Picoparsec.Monoid.Internal (Result, parse)
import qualified Data.Picoparsec.Internal.Types as T

import Prelude

-- $parsec
--
-- Compared to Parsec 3, Picoparsec makes several tradeoffs.  It is not intended for, or ideal for, all possible uses.
--
-- * While Picoparsec can consume input incrementally, Parsec cannot.  Incremental input is a huge deal for efficient
-- and secure network and system programming, since it gives much more control to users of the library over matters such
-- as resource usage and the I/O model to use.
--
-- * Much of the performance advantage of Picoparsec is gained via high-performance parsers such as 'I.takeWhile' and
-- 'I.string'.  If you use complicated combinators that return lists of bytes or characters, there is less performance
-- difference between the two libraries.
--
-- * Unlike Parsec 3, Picoparsec does not support being used as a monad transformer.
--
-- * Parsec parsers can produce more helpful error messages than Picoparsec parsers.  This is a matter of focus:
-- Picoparsec avoids the extra book-keeping in favour of higher performance.
--
-- * Parsec comes with built-in support for user state. Picoparsec does not maintain any state by default, in order to
-- maximize performance. If your parsing logic needs depends on it, you can track the state by wrapping your input in a
-- 'Stateful' monoid.

-- $attoparsec
--
-- Compared to Attoparsec, Picoparsec trades away some performance for generality. Attoparsec works only with
-- 'ByteString' and 'Text' inputs. If your input type is one of these two, Attoparsec is the better choice. Use
-- Picoparsec if you want your parser to be applicable to a different input type, especially if you wish to leave the
-- choice of that input type to the end user.
--
-- Some Attoparsec primitives like 'word8' are missing because they are specific to ByteString inputs. Picoparsec is
-- otherwise largely compatible with Attoparsec, having copied from it both the core logic and the full set of parsing
-- combinators.

-- $incremental
--
-- Picoparsec supports incremental input, meaning that you can feed it a chunk of input that represents only part of the
-- expected total amount of data to parse. If your parser reaches the end of a fragment of input and could consume more
-- input, it will suspend parsing and return a 'T.Partial' continuation.
--
-- Supplying the 'T.Partial' continuation with another string will resume parsing at the point where it was
-- suspended. You must be prepared for the result of the resumed parse to be another 'T.Partial' continuation.
--
-- To indicate that you have no more input, supply the 'T.Partial' continuation with an empty string.
--
-- Remember that some parsing combinators will not return a result until they reach the end of input.  They may thus
-- cause 'T.Partial' results to be returned.
--
-- If you do not need support for incremental input, consider using the 'I.parseOnly' function to run your parser.  It
-- will never prompt for more input.

-- $performance
--
-- A Picoparsec-based parser applied to a strict ByteString or Text input will generally be somewhat slower than
-- Attoparsec, but if properly optimized and specialized the difference should be less than 50%.
--
-- To actually achieve high performance, there are a few guidelines that it is useful to follow.
--
-- * Use the input-returning parsers whenever possible, e.g. 'I.takeWhile1' instead of 'many1' 'I.anyToken'.  There is a
-- large difference in performance between the two kinds of parsers.
--
-- * If you are parsing textual inputs, use the specialized character parsers; e.g. 'I.takeCharsWhile1' instead of
-- 'I.takeWhile1'.
--
-- * If the 'mappend' operation is slow for the input monoid type, it may drastically slow down the parsing of large
-- inputs. Try wrapping the input with the 'Concat' newtype to make the 'mappend' time constant.
--
-- * Use the INLINE, INLINABLE, and SPECIALIZE pragmas to optimize the more important parts of your parser for the likely
-- input types.
--
-- * Make active use of benchmarking and profiling tools to measure, find the problems with, and improve the performance
-- of your parser.

-- | Run a parser and print its result to standard output.
parseTest :: (Monoid t, Show t, Show a) => I.Parser t a -> t -> IO ()
parseTest p s = print (parse p s)

-- | Run a parser with an initial input string, and a monadic action
-- that can supply more input if needed.
parseWith :: (Monoid t, Monad m) => m t
          -- ^ An action that will be executed to provide the parser
          -- with more input, if necessary.  The action must return an
          -- 'mempty' string when there is no more input available.
          -> I.Parser t a
          -> t
          -- ^ Initial input for the parser.
          -> m (Result t a)
parseWith refill p s = step $ parse p s
  where step (T.Partial k) = (step . k) =<< refill
        step r             = return r
{-# INLINE parseWith #-}

-- | Convert a 'Result' value to a 'Maybe' value. A 'T.Partial' result
-- is treated as failure.
maybeResult :: Result t r -> Maybe r
maybeResult (T.Done _ r) = Just r
maybeResult _            = Nothing

-- | Convert a 'Result' value to an 'Either' value. A 'T.Partial'
-- result is treated as failure.
eitherResult :: Result t r -> Either String r
eitherResult (T.Done _ r)     = Right r
eitherResult (T.Fail _ _ msg) = Left msg
eitherResult _                = Left "Result: incomplete input"