Text/ParserCombinators/Incremental.hs

{- 
    Copyright 2010-2011 Mario Blazevic

    This file is part of the Streaming Component Combinators (SCC) project.

    The SCC project is free software: you can redistribute it and/or modify it under the terms of the GNU General Public
    License as published by the Free Software Foundation, either version 3 of the License, or (at your moptional) any later
    version.

    SCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.

    You should have received a copy of the GNU General Public License along with SCC.  If not, see
    <http://www.gnu.org/licenses/>.
-}

-- | This module defines incremental parsers. 
-- 
-- The exported 'Parser' type can provide partial parsing results from partial input, as long as the output is a
-- 'Monoid'. Construct a parser using the primitives and combinators, supply it with input using functions 'feed' and
-- 'feedEof', and extract the parsed output using 'results'.
-- 
-- Implementation is based on Brzozowski derivatives.

{-# LANGUAGE FlexibleContexts, UndecidableInstances #-}

module Text.ParserCombinators.Incremental (
   -- * The Parser type
   Parser,
   -- * Using a Parser
   feed, feedEof, results, completeResults, resultPrefix,
   -- * Parser primitives
   failure, more, eof, anyToken, token, satisfy, acceptAll, string, takeWhile, takeWhile1,
   -- * Parser combinators
   count, skip, moptional, concatMany, concatSome, manyTill,
   mapType, mapIncremental, (<||>), (<<|>), (><), lookAhead, notFollowedBy, and, andThen,
   -- * Utilities
   isInfallible, showWith
   )
where

import Prelude hiding (and, takeWhile)
import Control.Applicative (Applicative (pure, (<*>), (*>), (<*)), Alternative ((<|>)))
import Control.Applicative.Monoid(MonoidApplicative(..), MonoidAlternative(..))
import Control.Monad (ap)
import Data.Monoid (Monoid, mempty, mappend, (<>))
import Data.Monoid.Cancellative (LeftCancellativeMonoid (mstripPrefix))
import Data.Monoid.Factorial (FactorialMonoid (splitPrimePrefix), mspan)
import Data.Monoid.Null (MonoidNull(mnull))

-- | The central parser type. Its first parameter is the input monoid, the second the output.
data Parser a s r = Failure
                  | Result s r
                  | ResultPart (r -> r) (Parser a s r) (s -> Parser a s r)
                  | Delay (Parser a s r) (s -> Parser a s r)
                  | Choice (Parser a s r) (Parser a s r)

-- | Feeds a chunk of the input to the parser.
feed :: Monoid s => s -> Parser a s r -> Parser a s r
feed s Failure = s `seq` Failure
feed s (Result t r) = Result (mappend t s) r
feed s (ResultPart r _ f) = resultPart r (f s)
feed s (Choice p1 p2) = feed s p1 <||> feed s p2
feed s (Delay _ f) = f s

-- | Signals the end of the input.
feedEof :: Monoid s => Parser a s r -> Parser a s r
feedEof Failure = Failure
feedEof p@Result{} = p
feedEof (ResultPart r e _) = prepend r (feedEof e)
feedEof (Choice p1 p2) = feedEof p1 <||> feedEof p2
feedEof (Delay e _) = feedEof e

-- | Extracts all available parsing results. The first component of the result pair is a list of complete results
-- together with the unconsumed remainder of the input. If the parsing can continue further, the second component of the
-- pair provides the partial result prefix together with the parser for the rest of the input.
results :: Monoid r => Parser a s r -> ([(r, s)], Maybe (r, Parser a s r))
results Failure = ([], Nothing)
results (Result t r) = ([(r, t)], Nothing)
results (ResultPart r e f) = ([], Just (r mempty, ResultPart id e f))
results (Choice p1 p2) | isInfallible p1 = (results1 ++ results2, combine rest1 rest2)
   where (results1, rest1) = results p1
         (results2, rest2) = results p2
         combine Nothing rest = rest
         combine rest Nothing = rest
         combine (Just (r1, p1')) (Just (r2, p2')) = Just (mempty, Choice (prepend (r1 <>) p1') (prepend (r2 <>) p2'))
results p = ([], Just (mempty, p))

-- | Like 'results', but returns only the complete results with the corresponding unconsumed inputs.
completeResults :: Parser a s r -> [(r, s)]
completeResults (Result t r) = [(r, t)]
completeResults (ResultPart r e f) = map (\(r', t)-> (r r', t)) (completeResults e)
completeResults (Choice p1 p2) | isInfallible p1 = completeResults p1 ++ completeResults p2
completeResults _ = []

-- | Like 'results', but returns only the partial result prefix.
resultPrefix :: Monoid r => Parser a s r -> (r, Parser a s r)
resultPrefix (Result t r) = (r, Result t mempty)
resultPrefix (ResultPart r e f) = (r mempty, ResultPart id e f)
resultPrefix p = (mempty, p)

failure :: Parser a s r
failure = Failure

-- | Usage of 'fmap' destroys the incrementality of parsing results, if you need it use 'mapIncremental' instead.
instance Monoid s => Functor (Parser a s) where
   fmap f (Result t r) = Result t (f r)
   fmap g (ResultPart r e f) = ResultPart id (fmap g $ prepend r $ feedEof e) (fmap g . prepend r . f)
   fmap f p = apply (fmap f) p

-- | The '<*>' combinator requires its both arguments to provide complete parsing results, takeWhile '*>' and '<*'
-- preserve the incremental results.
instance Monoid s => Applicative (Parser a s) where
   pure = Result mempty
   (<*>) = ap
   (*>) = (>>)

   Result t r <* p = feed t p *> pure r
   ResultPart r e f <* p | isInfallible p = ResultPart r (e <* p) ((<* p) . f)
   p1 <* p2 = apply (<* p2) p1

-- | Usage of '>>=' destroys the incrementality of its left argument's parsing results, but '>>' is safe to use.
instance Monoid s => Monad (Parser a s) where
   return = Result mempty

   Result t r >>= f = feed t (f r)
   p >>= f = apply (>>= f) p

   Result t _ >> p = feed t p
   ResultPart _ e f >> p | isInfallible p = ResultPart id (e >> p) ((>> p) . f)
                         | otherwise = Delay (e >> p) ((>> p) . f)
   p1 >> p2 = apply (>> p2) p1

instance Monoid s => MonoidApplicative (Parser a s) where
   -- | Join operator on two parsers of the same type, preserving the incremental results.
   _ >< Failure = Failure
   p1 >< p2 | isInfallible p2 = appendIncremental p1 p2
            | otherwise       = append p1 p2

appendIncremental :: (Monoid s, Monoid r) => Parser a s r -> Parser a s r -> Parser a s r
appendIncremental (Result t r) p = resultPart (mappend r) (feed t p)
appendIncremental (ResultPart r e f) p2 = ResultPart r (appendIncremental e p2) (flip appendIncremental p2 . f)
appendIncremental p1 p2 = apply (`appendIncremental` p2) p1

append :: (Monoid s, Monoid r) => Parser a s r -> Parser a s r -> Parser a s r
append (Result t r) p2 = prepend (mappend r) (feed t p2)
append p1 p2 = apply (`append` p2) p1

-- | Two parsers can be sequentially joined.
instance (Monoid s, Monoid r) => Monoid (Parser a s r) where
   mempty = return mempty
   mappend = (><)

instance (Alternative (Parser a s), Monoid s) => MonoidAlternative (Parser a s) where
   moptional p = p <|> mempty
   concatMany = fst . manies
   concatSome = snd . manies

manies :: (Alternative (Parser a s), Monoid s, Monoid r) => Parser a s r -> (Parser a s r, Parser a s r)
manies p = (many, some)
   where many = some <|> mempty
         some = appendIncremental p many

infixl 3 <||>
infixl 3 <<|>

(<||>) :: Parser a s r -> Parser a s r -> Parser a s r
Delay e1 f1 <||> Delay e2 f2 = Delay (e1 <||> e2) (\s-> f1 s <||> f2 s)
Failure <||> p = p
p <||> Failure = p
p1@Result{} <||> p2 = Choice p1 p2
p1@ResultPart{} <||> p2 = Choice p1 p2
Choice p1a p1b <||> p2 | isInfallible p1a = Choice p1a (p1b <||> p2)
p1 <||> p2@Result{} = Choice p2 p1
p1 <||> p2@ResultPart{} = Choice p2 p1
p1 <||> Choice p2a p2b | isInfallible p2a = Choice p2a (p1 <||> p2b)
p1 <||> p2 = Choice p1 p2

(<<|>) :: Monoid s => Parser a s r -> Parser a s r -> Parser a s r
Failure <<|> p = p
p <<|> _ | isInfallible p = p
p <<|> Failure = p
p1 <<|> p2 = if isInfallible p2 then ResultPart id e f else Delay e f
   where e = feedEof p1 <<|> feedEof p2
         f s = feed s p1 <<|> feed s p2

-- instance (Monoid s, Monoid r, Show s, Show r) => Show (Parser a s r) where
--    show = showWith (show . ($ mempty)) show

showWith :: (Monoid s, Monoid r, Show s) => ((s -> Parser a s r) -> String) -> (r -> String) -> Parser a s r -> String
showWith _ _ Failure = "Failure"
showWith _ sr (Result t r) = "(Result " ++ shows t (" " ++ sr r ++ ")")
showWith sm sr (ResultPart r e f) =
   "(ResultPart (mappend " ++ sr (r mempty) ++ ") " ++ showWith sm sr e ++ " " ++ sm f ++ ")"
showWith sm sr (Choice p1 p2) = "(Choice " ++ showWith sm sr p1 ++ " " ++ showWith sm sr p2 ++ ")"
showWith sm sr (Delay e f) = "(Delay " ++ showWith sm sr e ++ " " ++ sm f ++ ")"

-- | Like 'fmap', but capable of mapping partial results, being restricted to 'Monoid' types only.
mapIncremental :: (Monoid s, Monoid a, Monoid b) => (a -> b) -> Parser p s a -> Parser p s b
mapIncremental f (Result t r) = Result t (f r)
mapIncremental g (ResultPart r e f) = 
   ResultPart (mappend $ g $ r mempty) (mapIncremental g e) (mapIncremental g . f)
mapIncremental f p = apply (mapIncremental f) p

-- | Behaves like the argument parser, but without consuming any input.
lookAhead :: Monoid s => Parser a s r -> Parser a s r
lookAhead p = lookAheadInto mempty p
   where lookAheadInto :: Monoid s => s -> Parser a s r -> Parser a s r
         lookAheadInto _ Failure            = Failure
         lookAheadInto t (Result _ r)       = Result t r
         lookAheadInto t (ResultPart r e f) = ResultPart r (lookAheadInto t e) (\s-> lookAheadInto (mappend t s) (f s))
         lookAheadInto t (Choice p1 p2)     = lookAheadInto t p1 <||> lookAheadInto t p2
         lookAheadInto t (Delay e f)        = Delay (lookAheadInto t e) (\s-> lookAheadInto (mappend t s) (f s))

-- | Does not consume any input; succeeds (with 'mempty' result) iff the argument parser fails.
notFollowedBy :: (Monoid s, Monoid r) => Parser a s r' -> Parser a s r
notFollowedBy = lookAheadNotInto mempty
   where lookAheadNotInto :: (Monoid s, Monoid r) => s -> Parser a s r' -> Parser a s r
         lookAheadNotInto t Failure     = Result t mempty
         lookAheadNotInto t (Delay e f) = Delay (lookAheadNotInto t e) (\s-> lookAheadNotInto (mappend t s) (f s))
         lookAheadNotInto t p | isInfallible p = Failure
                              | otherwise = Delay (lookAheadNotInto t $ feedEof p) 
                                                  (\s-> lookAheadNotInto (mappend t s) (feed s p))

-- | Provides a partial parsing result.
resultPart :: Monoid s => (r -> r) -> Parser a s r -> Parser a s r
resultPart _ Failure = error "Internal contradiction"
resultPart f (Result t r) = Result t (f r)
resultPart r1 (ResultPart r2 e f) = ResultPart (r1 . r2) e f
resultPart r p = ResultPart r (feedEof p) (flip feed p)

isInfallible :: Parser a s r -> Bool
isInfallible Result{} = True
isInfallible ResultPart{} = True
isInfallible (Choice p _) = isInfallible p
isInfallible _ = False

prepend :: (r -> r) -> Parser a s r -> Parser a s r
prepend _ Failure = Failure
prepend r1 (Result t r2) = Result t (r1 r2)
prepend r1 (ResultPart r2 e f) = ResultPart (r1 . r2) e f
prepend r (Choice p1 p2) = Choice (prepend r p1) (prepend r p2)
prepend r (Delay e f) = Delay (prepend r e) (prepend r . f)

apply :: Monoid s => (Parser a s r -> Parser a s r') -> Parser a s r -> Parser a s r'
apply _ Failure = Failure
apply f (Choice p1 p2) = f p1 <||> f p2
apply g (Delay e f) = Delay (g e) (g . f)
apply f p = Delay (f $ feedEof p) (\s-> f $ feed s p)

mapType :: (Parser a s r -> Parser b s r) -> Parser a s r -> Parser b s r
mapType _ Failure = Failure
mapType _ (Result s r) = Result s r
mapType g (ResultPart r e f) = ResultPart r (g e) (g . f)
mapType f (Choice p1 p2) = Choice (f p1) (f p2)
mapType g (Delay e f) = Delay (g e) (g . f)

more :: (s -> Parser a s r) -> Parser a s r
more = Delay Failure

-- | A parser that fails on any input and succeeds at its end.
eof :: (MonoidNull s, Monoid r) => Parser a s r
eof = Delay mempty (\s-> if mnull s then eof else Failure)

-- | A parser that accepts any single input atom.
anyToken :: FactorialMonoid s => Parser a s s
anyToken = more f
   where f s = case splitPrimePrefix s
               of Just (first, rest) -> Result rest first
                  Nothing -> anyToken

-- | A parser that accepts a specific input atom.
token :: (Eq s, FactorialMonoid s) => s -> Parser a s s
token x = satisfy (== x)

-- | A parser that accepts an input atom only if it satisfies the given predicate.
satisfy :: FactorialMonoid s => (s -> Bool) -> Parser a s s
satisfy predicate = p
   where p = more f
         f s = case splitPrimePrefix s
               of Just (first, rest) -> if predicate first then Result rest first else Failure
                  Nothing -> p

-- | A parser that consumes and returns the given prefix of the input.
string :: (LeftCancellativeMonoid s, MonoidNull s) => s -> Parser a s s
string x | mnull x = mempty
string x = more (\y-> case (mstripPrefix x y, mstripPrefix y x)
                      of (Just y', _) -> Result y' x
                         (Nothing, Nothing) -> Failure
                         (Nothing, Just x') -> string x' >> return x)

-- | A parser accepting the longest sequence of input atoms that match the given predicate; an optimized version of
-- 'concatMany . satisfy'.
takeWhile :: (FactorialMonoid s, MonoidNull s) => (s -> Bool) -> Parser a s s
takeWhile pred = while
   where while = ResultPart id (return mempty) f
         f s = let (prefix, suffix) = mspan pred s 
               in if mnull suffix then resultPart (mappend prefix) while
                  else Result suffix prefix

-- | A parser accepting the longest non-empty sequence of input atoms that match the given predicate; an optimized
-- version of 'concatSome . satisfy'.
takeWhile1 :: (FactorialMonoid s, MonoidNull s) => (s -> Bool) -> Parser a s s
takeWhile1 pred = more f
   where f s | mnull s = takeWhile1 pred
             | otherwise = let (prefix, suffix) = mspan pred s 
                           in if mnull prefix then Failure
                              else if mnull suffix then resultPart (mappend prefix) (takeWhile pred)
                                   else Result suffix prefix

-- | Accepts the given number of occurrences of the argument parser.
count :: (Monoid s, Monoid r) => Int -> Parser a s r -> Parser a s r
count n p | n > 0 = p >< count (pred n) p
          | otherwise = mempty

-- | Discards the results of the argument parser.
skip :: (Monoid s, Monoid r) => Parser a s r' -> Parser a s r
skip p = p *> mempty

-- | Repeats matching the first argument until the second one succeeds.
manyTill :: (Alternative (Parser a s), Monoid s, Monoid r) => Parser a s r -> Parser a s r' -> Parser a s r
manyTill next end = t
   where t = skip end <|> mappend next t

-- | A parser that accepts all input.
acceptAll :: Monoid s => Parser a s s
acceptAll = ResultPart id mempty f
   where f s = ResultPart (mappend s) mempty f

-- | Parallel parser conjunction: the combined parser keeps accepting input as long as both arguments do.
and :: (Monoid s, Monoid r1, Monoid r2) => Parser a s r1 -> Parser a s r2 -> Parser a s (r1, r2)
Failure `and` _ = Failure
_ `and` Failure = Failure
p `and` Result _ r = fmap (\x-> (x, r)) (feedEof p)
Result _ r `and` p = fmap (\x-> (r, x)) (feedEof p)
ResultPart r e f `and` p | isInfallible p =
   ResultPart (\(r1, r2)-> (r r1, r2)) (e `and` feedEof p) (\s-> f s `and` feed s p)
p `and` ResultPart r e f | isInfallible p =
   ResultPart (\(r1, r2)-> (r1, r r2)) (feedEof p `and` e) (\s-> feed s p `and` f s)
Choice p1a p1b `and` p2 = (p1a `and` p2) <||> (p1b `and` p2)
p1 `and` Choice p2a p2b = (p1 `and` p2a) <||> (p1 `and` p2b)
p1 `and` p2 = Delay (feedEof p1 `and` feedEof p2) (\s-> feed s p1 `and` feed s p2)

-- | A sequence parser that preserves incremental results, otherwise equivalent to 'Alternative.liftA2' (,)
andThen :: (Monoid s, Monoid r1, Monoid r2) => Parser a s r1 -> Parser a s r2 -> Parser a s (r1, r2)
Result t r `andThen` p | isInfallible p = resultPart (mappend (r, mempty)) (feed t (fmap ((,) mempty) p))
ResultPart r e f `andThen` p | isInfallible p = ResultPart (\(r1, r2)-> (r r1, r2)) (e `andThen` p) ((`andThen` p) . f)
p1 `andThen` p2 = apply (`andThen` p2) p1