-- | This module contains functions to help parsing input from within
-- 'Iter's.  Many of the operators are either imported from
-- "Data.Applicative" or inspired by "Text.Parsec".

module Data.IterIO.Parse (-- * Iteratee combinators
                          (<|>), (\/), orEmpty, (<?>), expectedI
                         , someI, foldrI, foldr1I, foldrMinMaxI
                         , foldlI, foldl1I, foldMI, foldM1I
                         , skipI, optionalI, ensureI
                         , eord
                         , skipWhileI, skipWhile1I
                         , whileI, while1I, whileMaxI, whileMinMaxI
                         , concatI, concat1I, concatMinMaxI
                         , readI, eofI
                         -- * Applicative combinators
                         , (<$>), (<$), ($>), (>$>), Applicative(..), (<**>)
                         , (<++>), (<:>), nil
                         -- * Parsing Iteratees
                         -- $Parseclike
                         , many, skipMany, sepBy, endBy, sepEndBy
                         , many1, skipMany1, sepBy1, endBy1, sepEndBy1
                         , satisfy, char, match, string, stringCase
                         ) where

import Prelude hiding (null)
import Control.Applicative (Applicative(..), (<**>), liftA2)
import Control.Monad
import Data.Char
import Data.Functor ((<$>), (<$))
import qualified Data.ListLike as LL
import Data.Monoid

import Data.IterIO.Iter
import Data.IterIO.Inum
import Data.IterIO.ListLike

-- | An infix synonym for 'multiParse' that allows LL(*) parsing of
-- alternatives by executing both Iteratees on input chunks as they
-- arrive.  This is similar to the @\<|>@ method of the
-- @'Alternative'@ class in "Control.Applicative", but the
-- @'Alternative'@ operator has left fixity, while for efficiency this
-- one has:
--
-- > infixr 3 <|>
(<|>) :: (ChunkData t, Monad m) =>
         Iter t m a -> Iter t m a -> Iter t m a
{-# INLINE (<|>) #-}
(<|>) = multiParse
infixr 3 <|>

-- | An infix synonym for 'ifNoParse' that allows LL(*) parsing of
-- alternatives by keeping a copy of input data consumed by the first
-- Iteratee so as to backtrack and execute the second Iteratee if the
-- first one fails.  Returns a function that takes a continuation for
-- the first 'Iter', should it succeed.  The code:
--
-- >     iter1 \/ iter2 $ \iter1Result -> doSomethingWith iter1Result
--
-- Executes @iter1@ (saving a copy of the input for backtracking).  If
-- @iter1@ fails with an exception of class 'IterNoParse', then the
-- input is re-wound and fed to @iter2@.  On the other hand, if
-- @iter1@ succeeds and returns @iter1Result@, then the saved input is
-- discarded (as @iter2@ will not need to be run) and the result of
-- @iter1@ is fed to function @doSomethingWith@.
--
-- For example, to build up a list of results of executing @iter@, one
-- could implement a type-restricted version of 'many' as follows:
--
-- @
--   myMany :: (ChunkData t, Monad m) => Iter t m a -> Iter t m [a]
--   myMany iter = iter \\/ return [] '$' \\r -> 'fmap' ((:) r) (myMany iter)
-- @
--
-- In other words, @myMany@ tries running @iter@.  If @iter@ fails,
-- then @myMany@ returns the empty list.  If @iter@ succeeds, its
-- result @r@ is added to the head of the list returned by calling
-- @myMany@ recursively.  This idiom of partially applying a binary
-- funciton to a result and then applying the resulting function to an
-- 'Iter' via 'fmap' is so common that there is an infix operator for
-- it, @'>$>'@.  Thus, the above code can be written:
--
-- @
--   myMany iter = iter \\/ return [] '$' (:) '>$>' myMany iter
-- @
--
-- Of course, using 'fmap' is not the most efficient way to implement
-- @myMany@.  If you are going to use this pattern for something
-- performance critical, you should use an accumulator rather than
-- build up long chains of 'fmap's.  A faster implementation would be:
--
-- @
--   myMany iter = loop id
--       where loop ac = iter \\/ return (acc []) '$' \a -> loop (acc . (a :))
-- @
--
-- @\\/@ has fixity:
--
-- > infix 2 \/
--
(\/) :: (ChunkData t, Monad m) => 
        Iter t m a -> Iter t m b -> (a -> Iter t m b) -> Iter t m b
{-# INLINE (\/) #-}
(\/) = ifNoParse
infix 2 \/

-- | @(f >$> a) t@ is equivalent to @f t '<$>' a@ (where '<$>' is and
-- infix alias for 'fmap').  Particularly useful with infix
-- combinators such as '\/' and ``orEmpty`` when chaining parse
-- actions.  See examples at '\/' and 'orEmpty'.  Note 'fmap' is not
-- always the most efficient solution (see an example in the
-- description of '\/').
--
-- Has fixity:
--
-- > infixl 3 >$>
--
(>$>) :: (Functor f) => (t -> a -> b) -> f a -> t -> f b
{-# INLINE (>$>) #-}
(>$>) f a = \t -> f t <$> a
infixr 3 >$>

-- | @fa $> b = b <$ fa@ -- replaces the output value of a functor
-- with some pure value.  Has the same fixity as '<$>' and '<$',
-- namely:
--
-- > infixl 4 $>
($>) :: (Functor f) => f a -> b -> f b
{-# INLINE ($>) #-}
a $> b = b <$ a
infixl 4 $>

-- | Defined as @orEmpty = ('\/' return 'mempty')@, and useful when
-- parse failures should just return an empty 'Monoid'.  For example,
-- a type-restricted 'many' can be implemented as:
--
-- @
--   myMany :: (ChunkData t, Monad m) => Iter t m a -> Iter t m [a]
--   myMany iter = iter ``orEmpty`` (:) '>$>' myMany iter
-- @
--
-- Has fixity:
--
-- > infixr 3 `orEmpty`
--
orEmpty :: (ChunkData t, Monad m, Monoid b) =>
           Iter t m a -> (a -> Iter t m b) -> Iter t m b
{-# INLINE orEmpty #-}
orEmpty = (\/ nil)
infixr 3 `orEmpty`

-- | @iter \<?\> token@ replaces any kind of parse failure in @iter@
-- with an exception equivalent to calling @'expectedI' prefix token@
-- where @prefix@ is a prefix of the input that was fed to @iter@ and
-- caused it to fail.
--
-- Has fixity:
--
-- > infix 0 <?>
--
(<?>) :: (ChunkData t, Monad m) => Iter t m a -> String -> Iter t m a
{-# INLINE (<?>) #-}
(<?>) iter expected =
    Iter $ \c -> case runIter iter c of
      r@(Done _ _)   -> r
      r@(Fail e _ _) -> case e of
                          IterException _ -> r
                          _ -> Fail (IterExpected [(show c, expected)])
                                    Nothing Nothing
      r              -> slowPath (show c) expected r
    where
      {-# NOINLINE slowPath #-}
      slowPath saw exp1 = onDoneR $ \r0 ->
        case r0 of
          r@(Fail e _ _) -> case e of
                              IterException _ -> r
                              _ -> Fail (IterExpected [(saw, exp1)])
                                   Nothing Nothing
          r -> r
infix 0 <?>
  
-- | Throw an 'Iter' exception that describes expected input not
-- found.
expectedI :: (ChunkData t) =>
             String             -- ^ Input actually received
          -> String             -- ^ Description of input that was wanted
          -> Iter t m a
expectedI saw target =
    Iter $ \_ -> Fail (IterExpected [(saw, target)]) Nothing Nothing

-- | Takes an 'Iter' returning a 'LL.ListLike' type, executes the
-- 'Iter' once, and throws a parse error if the returned value is
-- 'LL.null'.  (Note that this is quite different from the @'some'@
-- method of the @'Alternative'@ class in "Control.Applicative", which
-- executes a computation one /or more/ times.  This library does not
-- use @'Alternative'@ because @`Alternative`@'s @\<|\>@ operator has
-- left instead of right fixity.)
someI :: (ChunkData t, Monad m, LL.ListLike a e) => Iter t m a -> Iter t m a
someI iter = (<?> "someI") $ do
  a <- iter
  if LL.null a then mzero else return a

-- | Repeatedly invoke an 'Iter' and right-fold a function over the
-- results.
foldrI :: (ChunkData t, Monad m) =>
          (a -> b -> b) -> b -> Iter t m a -> Iter t m b
foldrI = innerFoldrI id

innerFoldrI :: (ChunkData t, Monad m) =>
               (b -> b) -> (a -> b -> b) -> b -> Iter t m a -> Iter t m b
innerFoldrI acc0 f z iter = loop acc0
    where loop acc = iter \/ return (acc z) $ \a -> loop (acc . f a)

-- | A variant of 'foldrI' that requires the 'Iter' to succeed at
-- least once.
foldr1I :: (ChunkData t, Monad m) =>
           (a -> b -> b) -> b -> Iter t m a -> Iter t m b
foldr1I f z iter = iter >>= \a -> innerFoldrI (f a) f z iter

-- | A variant of 'foldrI' that requires the 'Iter' to succeed at
-- least a minimum number of items and stops parsing after executing
-- the 'Iter' some maximum number of times.
foldrMinMaxI :: (ChunkData t, Monad m) =>
                Int             -- ^ Minimum number to parse
             -> Int             -- ^ Maximum number to parse
             -> (a -> b -> b)   -- ^ Folding function
             -> b               -- ^ Rightmost value
             -> Iter t m a      -- ^ Iteratee generating items to fold
             -> Iter t m b
foldrMinMaxI nmin0 nmax0 f z iter
    | nmin0 > nmax0 = throwParseI "foldrMinMaxI: min > max"
    | nmax0 < 0     = throwParseI "foldrMinMaxI: negative max"
    | otherwise = loop id nmin0 nmax0
    where
      loop acc nmin nmax
          | nmax == 0 = return $ acc z
          | nmin > 0  = iter >>= \a -> loop (acc . f a) (nmin - 1) (nmax - 1)
          | otherwise = iter \/ return (acc z) $ \a ->
                        loop (acc . f a) 0 (nmax - 1)

-- | Strict left fold over an 'Iter' (until it throws an 'IterNoParse'
-- exception).  @foldlI f z iter@ is sort of equivalent to:
--
-- > ... (f <$> (f <$> (f z <$> iter) <*> iter) <*> iter) ...
foldlI :: (ChunkData t, Monad m) =>
          (b -> a -> b) -> b -> Iter t m a -> Iter t m b
foldlI f z0 iter = foldNext z0
    where foldNext z = z `seq` iter \/ return z $ \a -> foldNext (f z a)

-- | A version of 'foldlI' that fails if the 'Iter' argument does not
-- succeed at least once.
foldl1I :: (ChunkData t, Monad m) =>
           (b -> a -> b) -> b -> Iter t m a -> Iter t m b
foldl1I f z iter = iter >>= \a -> foldlI f (f z a) iter

-- | @foldMI@ is a left fold in which the folding function can execute
-- monadic actions.  Essentially @foldMI@ is to 'foldlI' as 'foldM' is
-- to @`foldl'`@ in the standard libraries.
foldMI :: (ChunkData t, Monad m) =>
          (b -> a -> Iter t m b) -> b -> Iter t m a -> Iter t m b
foldMI f z0 iter = foldNext z0
    where foldNext z = iter \/ return z $ f z >=> foldNext

-- | A variant of 'foldMI' that requires the 'Iter' to succeed at
-- least once.
foldM1I :: (ChunkData t, Monad m) =>
           (b -> a -> Iter t m b) -> b -> Iter t m a -> Iter t m b
foldM1I f z0 iter = iter >>= f z0 >>= \z -> foldMI f z iter


-- | Discard the result of executing an Iteratee once.  Throws an
-- error if the Iteratee fails.  (Like @skip x = x >> return ()@.)
skipI :: Applicative f => f a -> f ()
skipI = (() <$)

-- | Execute an iteratee.  Discard the result if it succeeds.  Rewind
-- the input and suppress the error if it fails.
optionalI :: (ChunkData t, Monad m) => Iter t m a -> Iter t m ()
optionalI iter = ifParse iter (const $ return ()) (return ())

-- | Ensures the next input element satisfies a predicate or throws a
-- parse error.  Does not consume any input.
ensureI :: (ChunkData t, LL.ListLike t e, Monad m) =>
           (e -> Bool) -> Iter t m ()
ensureI test =
    Iter $ \c@(Chunk t eof) ->
        if LL.null t
           then (if eof then eofFail else IterF (ensureI test))
           else (if test (LL.head t) then Done () c else testFail)
    where testFail = Fail (IterParseErr "ensureI test failed") Nothing Nothing
          eofFail  = Fail (mkIterEOF "ensureI EOF") Nothing Nothing

-- | A variant of the standard library 'ord' function, but that
-- translates a 'Char' into any 'Enum' type, not just 'Int'.
-- Particularly useful for 'Iter's that must work with both 'String's
-- (which consist of 'Char's) and ASCII @'ByteString'@s (which consist
-- of @'Word8'@s).  For example, to skip one or more space or TAB
-- characters, you can use:
--
-- @
--   skipSpace :: ('LL.ListLike' t e, ChunkData t, 'Eq' e, 'Enum' e, Monad m) =>
--                'Iter' t m ()
--   skipSpace = 'skipWhile1I' (\\c -> c == eord ' ' || c == eord '\t')
-- @
eord :: (Enum e) => Char -> e
{-# INLINE eord #-}
eord = toEnum . ord

-- | Skip all input elements encountered until an element is found
-- that does not match the specified predicate.
skipWhileI :: (ChunkData t, LL.ListLike t e, Monad m) =>
              (e -> Bool) -> Iter t m ()
skipWhileI test = loop
    where loop = Iter $ \(Chunk t eof) ->
                 case LL.dropWhile test t of
                   t1 | LL.null t1 && not eof -> IterF loop
                   t1 -> Done () $ Chunk t1 eof

-- | Like 'skipWhileI', but fails if at least one element does not
-- satisfy the predicate.
skipWhile1I :: (ChunkData t, LL.ListLike t e, Monad m) =>
               (e -> Bool) -> Iter t m ()
skipWhile1I test = ensureI test >> skipWhileI test <?> "skipWhile1I"

-- | Return all input elements up to the first one that does not match
-- the specified predicate.
whileI :: (ChunkData t, LL.ListLike t e, Monad m)
          => (e -> Bool) -> Iter t m t
whileI test = more id
    where
      more acc = Iter $ \(Chunk t eof) ->
                 case LL.span test t of
                   (t1, t2) | not (LL.null t2) || eof ->
                                     Done (acc t1) $ Chunk t2 eof
                   (t1, _) -> IterF $ more (acc . LL.append t1)

-- | Like 'whileI', but fails if at least one element does not satisfy
-- the predicate.
while1I :: (ChunkData t, LL.ListLike t e, Monad m) =>
           (e -> Bool) -> Iter t m t
while1I test = ensureI test >> whileI test

-- | A variant of 'whileI' with a maximum number matches.
whileMaxI :: (ChunkData t, LL.ListLike t e, Monad m) =>
             Int                  -- ^ Maximum number to match
          -> (e -> Bool)          -- ^ Predicate test
          -> Iter t m t
whileMaxI nmax test = inumMax nmax .| whileI test

-- | A variant of 'whileI' with a minimum and maximum number matches.
whileMinMaxI :: (ChunkData t, LL.ListLike t e, Monad m) =>
                Int                  -- ^ Minumum number
             -> Int                  -- ^ Maximum number
             -> (e -> Bool)          -- ^ Predicate test
             -> Iter t m t
whileMinMaxI nmin nmax test = do
  result <- whileMaxI nmax test
  if LL.length result >= nmin
    then return result
    else expectedI "too few" "whileMinMaxI minimum"

-- | Repeatedly execute an 'Iter' returning a 'Monoid' and 'mappend'
-- all the results in a right fold.
concatI :: (ChunkData t, Monoid s, Monad m) =>
           Iter t m s -> Iter t m s
concatI iter = foldrI mappend mempty iter

-- | Like 'concatI', but fails if the 'Iter' doesn't return at least
-- once.
concat1I :: (ChunkData t, Monoid s, Monad m) =>
           Iter t m s -> Iter t m s
concat1I iter = foldr1I mappend mempty iter

-- | A version of 'concatI' that takes a minimum and maximum number of
-- items to parse.
concatMinMaxI :: (ChunkData t, Monoid s, Monad m) =>
                 Int            -- ^ Minimum number to parse
              -> Int            -- ^ Maximum number to parse
              -> Iter t m s     -- ^ 'Iter' whose results to concatenate
              -> Iter t m s
concatMinMaxI nmin nmax iter = foldrMinMaxI nmin nmax mappend mempty iter
                               
                               
-- | This 'Iter' parses a 'LL.StringLike' argument.  It does not
-- consume any Iteratee input.  The only reason it is an Iteratee is
-- so that it can throw an Iteratee parse error should it fail to
-- parse the argument string (or should the argument yield an
-- ambiguous parse).
readI :: (ChunkData t, Monad m, LL.StringLike s, Read a) => 
         s -> Iter t m a
readI s' = let s = LL.toString s'
           in case [a | (a,"") <- reads s] of
                [a] -> return a
                []  -> throwParseI $ "readI can't parse: " ++ s
                _   -> throwParseI $ "readI ambiguous: " ++ s

-- | Ensures the input is at the end-of-file marker, or else throws an
-- exception.
eofI :: (ChunkData t, Monad m, Show t) => Iter t m ()
eofI = do
  Chunk t eof <- iterF $ \c -> Done c c
  if eof && null t
    then return ()
    else expectedI (chunkShow t) "EOF"

-- | 'mappend' the result of two 'Applicative' types returning
-- 'Monoid' types (@\<++> = 'liftA2' 'mappend'@).  Has the same fixity
-- as '++', namely:
--
-- > infixr 5 <++>
(<++>) :: (Applicative f, Monoid t) => f t -> f t -> f t
(<++>) = liftA2 mappend
infixr 5 <++>

-- | 'LL.cons' an 'Applicative' type onto an an 'Applicative'
-- 'LL.ListLike' type (@\<:> = 'liftA2' 'LL.cons'@).  Has the same
-- fixity as @:@, namely:
--
-- > infixr 5 <:>
(<:>) :: (LL.ListLike t e, Applicative f) => f e -> f t -> f t
{-# INLINE (<:>) #-}
(<:>) = liftA2 LL.cons
infixr 5 <:>

-- | @nil = 'pure' 'mempty'@--An empty 'Monoid' injected into an
-- 'Applicative' type.
nil :: (Applicative f, Monoid t) => f t
{-# INLINE nil #-}
nil = pure mempty

-- $Parseclike
--
-- These functions are intended to be similar to those supplied by
-- "Text.Parsec".

-- | Run an 'Iter' zero or more times (until it fails) and return a
-- list-like container of the results.
many :: (ChunkData t, LL.ListLike f a, Monad m) => Iter t m a -> Iter t m f
{-# INLINE many #-}
many = foldrI LL.cons LL.empty

-- | Repeatedly run an 'Iter' until it fails and discard all the
-- results.
skipMany :: (ChunkData t, Monad m) => Iter t m a -> Iter t m ()
skipMany = foldlI (\_ _ -> ()) ()

-- | Parses a sequence of the form
-- /Item1 Separator Item2 Separator ... Separator ItemN/
-- and returns the list @[@/Item1/@,@ /Item2/@,@ ...@,@ /ItemN/@]@
-- or a 'LL.ListLike' equivalent.
sepBy :: (ChunkData t, LL.ListLike f a, Monad m) =>
         Iter t m a             -- ^ Item to parse
      -> Iter t m b             -- ^ Separator between items
      -> Iter t m f             -- ^ Returns 'LL.ListLike' list of items
sepBy item sep = item `orEmpty` \a ->
                 innerFoldrI (LL.cons a) LL.cons LL.empty (sep *> item)

-- | Like 'sepBy', but expects a separator after the final item.  In
-- other words, parses a sequence of the form
-- /Item1 Separator Item2 Separator ... Separator ItemN Separator/
-- and returns the list @[@/Item1/@,@ /Item2/@,@ ...@,@ /ItemN/@]@ or
-- a 'LL.ListLike' equivalent.
endBy :: (ChunkData t, LL.ListLike f a, Monad m) =>
         Iter t m a             -- ^ Item to parse
      -> Iter t m b             -- ^ Separator that must follow each item
      -> Iter t m f             -- ^ Returns 'LL.ListLike' list of items
endBy item sep = foldrI LL.cons LL.empty (item <* sep)

-- | Accepts items that would be parsed by either 'sepBy' or 'endBy'.
-- Essentially a version of 'endBy' in which the final separator is
-- optional.
sepEndBy :: (ChunkData t, LL.ListLike f a, Monad m) =>
            Iter t m a -> Iter t m b -> Iter t m f
sepEndBy item sep = sepBy item sep <* optionalI sep


-- | Run an 'Iter' one or more times (until it fails) and return a
-- list-like container of the results.
many1 :: (ChunkData t, LL.ListLike f a, Monad m) => Iter t m a -> Iter t m f
many1 = foldr1I LL.cons LL.empty

-- | A variant of 'skipMany' that throws a parse error if the 'Iter'
-- does not succeed at least once.
skipMany1 :: (ChunkData t, Monad m) => Iter t m a -> Iter t m ()
skipMany1 = foldl1I (\_ _ -> ()) ()

-- | A variant of 'sepBy' that throws a parse error if it cannot
-- return at least one item.
sepBy1 :: (ChunkData t, LL.ListLike f a, Monad m) =>
          Iter t m a -> Iter t m b -> Iter t m f
sepBy1 item sep = item >>= \a ->
                  innerFoldrI (LL.cons a) LL.cons LL.empty (sep *> item)

-- | A variant of 'endBy' that throws a parse error if it cannot
-- return at least one item.
endBy1 :: (ChunkData t, LL.ListLike f a, Monad m) =>
          Iter t m a -> Iter t m b -> Iter t m f
endBy1 item sep = foldr1I LL.cons LL.empty (item <* sep)

-- | A variant of 'sepEndBy' that throws a parse error if it cannot
-- return at least one item.
sepEndBy1 :: (ChunkData t, LL.ListLike f a, Monad m) =>
             Iter t m a -> Iter t m b -> Iter t m f
sepEndBy1 item sep = sepBy1 item sep <* optionalI sep

                 
-- | Read the next input element if it satisfies some predicate.
-- Otherwise throw an error.
satisfy :: (ChunkData t, LL.ListLike t e, Enum e, Monad m) =>
           (e -> Bool) -> Iter t m e
satisfy test =
    Iter $ \c@(Chunk t eof) ->
        if LL.null t
           then (if eof then eofFail else IterF (satisfy test))
           else case LL.head t of
                  h | test h -> 
                        Done h (Chunk (LL.tail t) eof)
                    | otherwise ->
                        Fail (IterExpected [(show $ chr $ fromEnum h
                                           , "satisfy predicate")])
                        Nothing (Just c)
    where eofFail  = Fail (mkIterEOF "satisfy: EOF") Nothing Nothing

-- | Read input that exactly matches a character.
char :: (ChunkData t, LL.ListLike t e, Eq e, Enum e, Monad m) =>
        Char -> Iter t m e
{-# INLINE char #-}
char target = satisfy (eord target ==) <?> show target

-- | Read input that exactly matches some target.
match :: (ChunkData t, LL.ListLike t e, Eq e, Monad m) =>
         t -> Iter t m t
match ft = doMatch ft
    where doMatch target | LL.null target = return ft
                         | otherwise      = do
            m <- data0MaxI $ LL.length target
            if not (LL.null m) && LL.isPrefixOf m target
              then doMatch $ LL.drop (LL.length m) target
              else expectedI (chunkShow m) $ chunkShow target

-- | Read input that exactly matches a string.
string :: (ChunkData t, LL.ListLike t e, LL.StringLike t, Eq e, Monad m) =>
          String -> Iter t m t
{-# INLINE string #-}
string = match . LL.fromString

-- | Read input that matches a string up to case.
stringCase :: (ChunkData t, LL.ListLike t e, Enum e, Eq e, Monad m) =>
              String -> Iter t m t
stringCase ft = doMatch LL.empty $ ft
    where
      prefix a b | LL.null a = True
                 | otherwise =
                     if toLower (chr $ fromEnum $ LL.head a) /= toLower (head b)
                     then False else LL.tail a `prefix` LL.tail b
      doMatch acc target | LL.null target = return acc
                         | otherwise      = do
        m <- data0MaxI $ LL.length target
        if not (LL.null m) && m `prefix` target
          then doMatch (LL.append acc m) $ LL.drop (LL.length m) target
          else expectedI (chunkShow m) $ chunkShow target