-- | This module defines our parsing monad.  In the past there have been lazy
-- and strict parsers in this module.  Currently we have only the strict
-- variant and it is used for parsing patch files.

module Darcs.Patch.ReadMonads (ParserM, Darcs.Patch.ReadMonads.take,
                        parse, parseStrictly, char, int,
                        option, choice, skipSpace, skipWhile, string,
                        lexChar, lexString, lexEof, takeTillChar,
                        myLex', anyChar, endOfInput, takeTill,
                        checkConsumes,
                        linesStartingWith, linesStartingWithEndingWith) where

import Prelude ()
import Darcs.Prelude

import Darcs.Util.ByteString ( dropSpace, breakSpace, breakFirstPS,
                         readIntPS, breakLastPS )
import qualified Data.ByteString as B (null, drop, length, tail, empty,
                                       ByteString)
import qualified Data.ByteString.Char8 as BC ( uncons, dropWhile, break
                                             , splitAt, length, head )
import Control.Applicative ( Alternative(..) )
import Data.Foldable ( asum )
import Control.Monad ( MonadPlus(..) )

-- | 'lexChar' checks if the next space delimited token from
-- the input stream matches a specific 'Char'.
-- Uses 'Maybe' inside 'ParserM' to handle failed matches, so
-- that it always returns () on success.
lexChar :: ParserM m => Char -> m ()
lexChar c = do
  skipSpace
  char c
  return ()

-- | 'lexString' fetches the next whitespace delimited token from
-- from the input and checks if it matches the 'ByteString' input.
-- Uses 'Maybe' inside 'ParserM' to handle failed matches, so
-- that it always returns () on success.
lexString :: ParserM m => B.ByteString -> m ()
lexString str = work
           $ \s -> case myLex s of
                       Just (xs :*: ys) | xs == str -> Just (() :*: ys)
                       _ -> Nothing

-- | Only succeeds if the characters in the input exactly match @str@.
string :: ParserM m => B.ByteString -> m ()
string str = work
        $ \s -> case BC.splitAt (BC.length str) s of
                  (h, t) | h == str -> Just (() :*: t)
                  _ -> Nothing

-- | 'lexEof' looks for optional spaces followed by the end of input.
-- Uses 'Maybe' inside 'ParserM' to handle failed matches, so
-- that it always returns () on success.
lexEof :: ParserM m => m ()
lexEof = work
        $ \s -> if B.null (dropSpace s)
                then Just (() :*: B.empty)
                else Nothing

-- | 'myLex' drops leading spaces and then breaks the string at the
-- next space.  Returns 'Nothing' when the string is empty after
-- dropping leading spaces, otherwise it returns the first sequence
-- of non-spaces and the remainder of the input.
myLex :: B.ByteString -> Maybe (ParserState B.ByteString)
myLex s = let s' = dropSpace s
           in if B.null s'
              then Nothing
              else Just $ stuple $ breakSpace s'

-- | Like 'myLex' except that it is in ParserM
myLex' :: ParserM m => m B.ByteString
myLex' = work myLex

-- | Accepts the next character and returns it.  Only fails at end of
-- input.
anyChar :: ParserM m => m Char
anyChar = work $ \s -> stuple <$> BC.uncons s

-- | Only succeeds at end of input, consumes no characters.
endOfInput :: ParserM m => m ()
endOfInput = work $ \s -> if B.null s
                            then Just (() :*: s)
                            else Nothing

-- | Accepts only the specified character.  Consumes a character, if
-- available.
char :: ParserM m => Char -> m ()
char c = work $ \s ->
  case BC.uncons s of
  Just (c', s') | c == c' -> Just (() :*: s')
  _ -> Nothing

-- | Parse an integer and return it.  Skips leading whitespaces and
-- | uses the efficient ByteString readInt.
int :: ParserM m => m Int
int = work $ \s -> stuple <$> readIntPS s

-- | Discards spaces until a non-space character is encountered.
-- Always succeeds.
skipSpace :: ParserM m => m ()
skipSpace = alterInput dropSpace

-- | Discards any characters as long as @p@ returns True.  Always
-- | succeeds.
skipWhile :: ParserM m => (Char -> Bool) -> m ()
skipWhile p = alterInput (BC.dropWhile p)

-- | Takes characters while @p@ returns True.  Always succeeds.
takeTill :: ParserM m => (Char -> Bool) -> m B.ByteString
takeTill p = work $ \s -> Just $ stuple (BC.break p s)

-- | Equivalent to @takeTill (==c)@, except that it is optimized for
-- | the equality case.
takeTillChar :: ParserM m => Char -> m B.ByteString
takeTillChar c = work $ \s -> Just $ stuple (BC.break (==c) s)

-- | Takes exactly @n@ bytes, or fails.
take :: ParserM m => Int -> m B.ByteString
take n = work $ \s -> if B.length s >= n
                        then Just $ stuple $ BC.splitAt n s
                        else Nothing

-- | This is a highly optimized way to read lines that start with a
-- particular character.  To implement this efficiently we need access
-- to the parser's internal state.  If this is implemented in terms of
-- the other primitives for the parser it requires us to consume one
-- character at a time.  That leads to @(>>=)@ wasting significant
-- time.
linesStartingWith :: ParserM m => Char -> m [B.ByteString]
linesStartingWith c = work $ linesStartingWith' c

-- | Helper function for 'linesStartingWith'.
linesStartingWith' :: Char -> B.ByteString -> Maybe (ParserState [B.ByteString])
linesStartingWith' c thes =
    Just (lsw [] thes)
    where lsw acc s | B.null s || BC.head s /= c = reverse acc :*: s
          lsw acc s = let s' = B.tail s
                  in case breakFirstPS '\n' s' of
                     Just (l, r) -> lsw (l:acc) r
                     Nothing -> reverse (s':acc) :*: B.empty

-- | This is a highly optimized way to read lines that start with a
-- particular character, and stops when it reaches a particular |
-- character.  See 'linesStartingWith' for details on why this |
-- defined here as a primitive.
linesStartingWithEndingWith :: ParserM m => Char -> Char -> m [B.ByteString]
linesStartingWithEndingWith st en = work $ linesStartingWithEndingWith' st en

-- | Helper function for 'linesStartingWithEndingWith'.
linesStartingWithEndingWith' :: Char -> Char -> B.ByteString
                             -> Maybe (ParserState [B.ByteString])
linesStartingWithEndingWith' st en = lswew
    where
  lswew x
    | B.null x = Nothing
    | BC.head x == en = Just $ [] :*: B.tail x
    | BC.head x /= st = Nothing
    | otherwise = case BC.break ('\n' ==) $ B.tail x of
              (l,r) -> case lswew $ B.tail r of
                       Just (ls :*: r') -> Just ((l:ls) :*: r')
                       Nothing ->
                           case breakLastPS en l of
                           Just (l2,_) ->
                               Just ([l2] :*: B.drop (B.length l2+2) x)
                           Nothing -> Nothing


-- | Applies a function to the input stream and discards the
-- result of the function.
alterInput :: ParserM m
            => (B.ByteString -> B.ByteString) -> m ()
alterInput f = work (\s -> Just (() :*: f s))

-- | If @p@ fails it returns @x@, otherwise it returns the result of @p@.
option :: Alternative f => a -> f a -> f a
option x p = p <|> pure x

-- | Attempts each option until one succeeds.
choice :: Alternative f => [f a] -> f a
choice = asum

-- |Ensure that a parser consumes input when producing a result
-- Causes the initial state of the input stream to be held on to while the
-- parser runs, so use with caution.
checkConsumes :: ParserM m => m a -> m a
checkConsumes parser = do
   x <- B.length <$> peekInput
   res <- parser
   x' <- B.length <$> peekInput
   if x' < x then return res else mzero

class (Functor m, Applicative m, Alternative m, Monad m, MonadPlus m) => ParserM m where
    -- | Applies a parsing function inside the 'ParserM' monad.
    work :: (B.ByteString -> Maybe (ParserState a)) -> m a
    -- | Allows for the inspection of the input that is yet to be parsed.
    peekInput :: m B.ByteString
    -- | Run the parser
    parse :: m a -> B.ByteString -> Maybe (a, B.ByteString)

----- Strict Monad -----
-- | 'parseStrictly' applies the parser functions to a string
-- and checks that each parser produced a result as it goes.
-- The strictness is in the 'ParserM' instance for 'SM'.
parseStrictly :: SM a -> B.ByteString -> Maybe (a, B.ByteString)
parseStrictly (SM f) s = case f s of
  Just (a :*: r) -> Just (a, r)
  _ -> Nothing

-- | ParserState represents the internal state of the parser.  We make it
-- strict and specialize it on ByteString.  This is purely to help GHC
-- optimize.  If performance were not a concern, it could be replaced
-- with @(a, ByteString)@.
data ParserState a = !a :*: !B.ByteString

-- | Convert from a lazy tuple to a strict tuple.
stuple :: (a, B.ByteString) -> ParserState a
stuple (a, b) = a :*: b

-- | 'SM' is the Strict Monad for parsing.
newtype SM a = SM (B.ByteString -> Maybe (ParserState a))

bindSM :: SM a -> (a -> SM b) -> SM b
bindSM (SM m) k = SM $ \s -> case m s of
                             Nothing -> Nothing
                             Just (x :*: s') ->
                               case k x of
                               SM y -> y s'
{-# INLINE bindSM #-}
returnSM :: a -> SM a
returnSM x = SM (\s -> Just (x :*: s))
{-# INLINE returnSM #-}
failSM :: String -> SM a
failSM _ = SM (\_ -> Nothing)
{-# INLINE failSM #-}

instance Monad SM where
    (>>=)  = bindSM
    return = returnSM

instance ParserM SM where
    work = SM
    peekInput = SM $ \s -> Just (s :*: s)
    parse = parseStrictly

-- The following instances allow us to use more conventional
-- interfaces provided by other parser libraries. The instances are
-- defined using bindSM, returnSM, and failSM to avoid any infinite,
-- or even unneccessary, recursion of instances between between
-- ParserM and Monad.  Other recursive uses will be fine, such as
-- (<|>) = mplus.
instance MonadPlus SM where
  mzero = failSM ""
  -- | Over using mplus can lead to space leaks.  It's best to push
  -- the use of mplus as far down as possible, because until the the
  -- first parameter completes, we must hold on to the input.
  mplus (SM a) (SM b) = SM $ \s ->
    case a s of
      Nothing -> b s
      r -> r

instance Functor SM where
  fmap f m = m `bindSM` (returnSM . f)

instance Applicative SM where
  pure = returnSM
  a <*> b =
    a `bindSM` \c ->
    b `bindSM` \d ->
    returnSM (c d)

instance Alternative SM where
  empty = failSM ""
  (<|>) = mplus