Data/Nanoparsec.hs

{-# LANGUAGE UnicodeSyntax #-}
{-|
Module      : Data.Nanoparsec
Copyright   : © 2011  Maciej Piechotka 
License     : BSD3

Maintainer  : uzytkownik2@gmail.com
Stability   : experimental
Portability : unknown

Simple, efficient combinator parsing for 'LL.ListLike' lists based on
Attoparsec library.
-}
module Data.Nanoparsec
  (
    -- * Differences from Attoparsec
    -- $attoparsec
    
    -- * Differences from Parsec
    -- $parsec
    
    -- * Performance considerations
    -- $performance
    
    -- * Parser types
    I.Parser
  , Result(..)
    
    -- ** Typeclass instances
    -- $instances
    
    -- * Running parsers
  , parse
  , feed
  , parseWith
  , parseTest
    
    -- ** Result conversion
  , maybeResult
  , eitherResult
    
    -- * Combinators
  , (I.<?>)
  , I.try
  , module Data.Nanoparsec.Combinator
    
    -- * Parsing infividual elements
  , I.elem
  , I.anyElem
  , I.satisfy
  , I.satisfyWith
  , I.skip
    
    -- * Efficient sublist handling
  , I.string
  , I.skipWhile
  , I.take
  , I.takeWhile
  , I.takeWhile1
  , I.takeTill
    
    -- * State observation and manipulation functions
  , I.endOfInput
  , I.ensure

    -- * Applicative specializations
    -- $applicative
  , (<*.)
  , (.*>)
  )
where

import Control.Applicative
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as LB
import Data.Monoid
import Data.Nanoparsec.Combinator
import qualified Data.Nanoparsec.Internal as I

-- $attoparsec
--
-- Compared to Attoparsec and Attoparsec-text Nanoparsec is a little (around
-- 6%) slower. However it allows more abstract parsers on similar level as 
-- Parsec 3 (except the monad transformation). 

-- $parsec
--
-- Compared to Parsec 3, Nanoparsec makes several tradeoffs.  It is
-- not intended for, or ideal for, all possible uses.
--
-- * While Nanoparsec can consume input incrementally, Parsec cannot.
--   Incremental input is a huge deal for efficient and secure network
--   and system programming, since it gives much more control to users
--   of the library over matters such as resource usage and the I/O
--   model to use.
--
-- * Much of the performance advantage of Nanoparsec is gained via
--   high-performance parsers such as 'I.takeWhile' and 'I.string'.
--   If you use complicated combinators that return lists of elements or
--   characters, there really isn't much performance difference the
--   two libraries.
--
-- * Unlike Parsec 3, Nanoparsec does not support being used as a
--   monad transformer.  This is mostly a matter of the implementor
--   not having needed that functionality.
--
-- * Parsec parsers can produce more helpful error messages than
--   Nanoparsec parsers.  This is a matter of focus: Nanoparsec avoids
--   the extra book-keeping in favour of higher performance.

-- $performance
--
-- If you write an Nanoparsec-based parser carefully, it can be
-- realistic to expect it to perform within a factor of 2.4 of a
-- hand-rolled C parser (measuring megabytes parsed per second).
--
-- To actually achieve high performance, there are a few guidelines
-- that it is useful to follow.
--
-- Use the list-oriented parsers whenever possible,
-- e.g. 'I.takeWhile1' instead of 'many1' 'I.anyWord8'.  There is
-- about a factor of 100 difference in performance between the two
-- kinds of parser.
--
-- Make active use of benchmarking and profiling tools to measure,
-- find the problems with, and improve the performance of your parser.

-- $instances
--
-- The 'I.Parser' type is an instance of the following classes:
--
-- * 'Monad', where 'fail' throws an exception (i.e. fails) with an
--   error message.
--
-- * 'Functor' and 'Applicative', which follow the usual definitions.
--
-- * 'Monoid', where 'mempty' fails (with no error message) and
--   'mappend' executes the right-hand parser if the left-hand one
--   fails.
--
-- * 'MonadPlus' and 'Alternative', which follows 'MonadPlus'.
--
-- The 'Result' type is an instance of 'Functor', where 'fmap'
-- transforms the value in a 'Done' result.

(⊕) ∷ Monoid m ⇒ m → m → m
(⊕) = mappend

data Result δ r = Fail !δ [String] String
                | Partial (δ → Result δ r)
                | Done !δ r

instance (Show δ, Show r) ⇒ Show (Result δ r) where
    show (Fail bs stk msg)
        = "Fail " ++ show bs ++ " " ++ show stk ++ " " ++ show msg
    show (Partial _) = "Partial _"
    show (Done bs r) = "Done " ++ show bs ++ " " ++ show r

feed ∷ Monoid δ ⇒ Result δ r → δ → Result δ r
feed f@(Fail _ _ _) _ = f
feed (Partial k)    d = k d
feed (Done bs r)    d = Done (bs ⊕ d) r
{-# SPECIALIZE feed ∷ Result B.ByteString r → B.ByteString → Result B.ByteString r #-}
{-# SPECIALIZE feed ∷ Result LB.ByteString r → LB.ByteString → Result LB.ByteString r #-}

instance Functor (Result δ) where
    _ `fmap` Fail st stk msg = Fail st stk msg
    f `fmap` Partial k       = Partial (fmap f . k)
    f `fmap` Done bs r       = Done bs (f r)

-- | Run a parser and print its result to standard output.
parseTest ∷ (Show a, Show δ, Monoid δ) ⇒ I.Parser δ a → δ → IO ()
parseTest p s = print (parse p s)
{-# SPECIALIZE parseTest ∷ Show a ⇒ I.Parser B.ByteString a → B.ByteString → IO () #-}
{-# SPECIALIZE parseTest ∷ Show a ⇒ I.Parser LB.ByteString a → LB.ByteString → IO () #-}

translate ∷ I.Result δ a → Result δ a
translate (I.Fail st stk msg) = Fail (I.input st) stk msg
translate (I.Partial k)       = Partial (translate . k)
translate (I.Done r st)       = Done (I.input st) r

-- | Run a parser and return its result.
parse :: Monoid δ ⇒ I.Parser δ a → δ → Result δ a
parse m s = translate (I.parse m s)
{-# INLINE parse #-}
{-# SPECIALIZE parse ∷ I.Parser B.ByteString a → B.ByteString → Result B.ByteString a #-}
{-# SPECIALIZE parse ∷ I.Parser LB.ByteString a → LB.ByteString → Result LB.ByteString a #-}

-- | Run a parser with an initial input string, and a monadic action
-- that can supply more input if needed.
parseWith ∷ (Monad m, Monoid δ)
          ⇒ (m δ)
          -- ^ An action that will be executed to provide the parser
          -- with more input, if necessary.  The action must return an
          -- 'B.empty' string when there is no more input available.
          → I.Parser δ a
          → δ
          -- ^ Initial input for the parser.
          → m (Result δ a)
parseWith refill p s
    = step $ I.parse p s
    where step (I.Fail st stk msg) = return $! Fail (I.input st) stk msg
          step (I.Partial k)       = (step . k) =<< refill
          step (I.Done r st)       = return $! Done (I.input st) r
{-# SPECIALIZE parseWith ∷ Monad m
                         ⇒ (m B.ByteString)
                         → I.Parser B.ByteString a
                         → B.ByteString
                         → m (Result B.ByteString a) #-}
{-# SPECIALIZE parseWith ∷ Monad m
                         ⇒ (m LB.ByteString)
                         → I.Parser LB.ByteString a
                         → LB.ByteString
                         → m (Result LB.ByteString a) #-}

-- | Convert a 'Result' value to a 'Maybe' value. A 'Partial' result
-- is treated as failure.
maybeResult ∷ Result δ r → Maybe r
maybeResult (Done _ r) = Just r
maybeResult _          = Nothing

-- | Convert a 'Result' value to an 'Either' value. A 'Partial' result
-- is treated as failure.
eitherResult ∷ Result δ r → Either String r
eitherResult (Done _ r)     = Right r
eitherResult (Fail _ _ msg) = Left msg
eitherResult _              = Left "Result: incomplete input"

-- $applicative
--
-- We provide specializations of @\<*@ and @*\>@ as @\<*.@ and
-- @.*\>@, respectively.  Together with @IsString@ instance of
-- 'I.Parser', you may write parsers applicatively more easily.
-- For example:
--
-- > paren p = "(" .*> p <*. ")"
--
-- instead of the more verbose
--
-- > paren p = string "(" *> p <* string ")"

-- | Same as @Applicative@'s @\<*@ but specialized.
(<*.) :: Monoid δ ⇒ I.Parser δ a → I.Parser δ δ → I.Parser δ a
(<*.) = (<*)

-- | Same as @Applicative@'s @*\>@ but specialized.
(.*>) :: Monoid δ ⇒ I.Parser δ δ → I.Parser δ a → I.Parser δ a
(.*>) = (*>)