{-# LANGUAGE BangPatterns, CPP, Rank2Types, OverloadedStrings #-}
-- |
-- Module      :  Data.Picoparsec.Monoid.Internal
-- Copyright   :  Bryan O'Sullivan 2007-2011, Mario Blažević <blamario@yahoo.com> 2014
-- License     :  BSD3
--
-- Maintainer  :  Mario Blažević
-- Stability   :  experimental
-- Portability :  unknown
--
-- Simple, efficient combinator parsing for
-- 'Data.Monoid.Cancellative.LeftGCDMonoid' and
-- 'Data.Monoid.Factorial.FactorialMonoid' inputs, loosely based on
-- the Parsec library.

module Data.Picoparsec.Monoid.Internal
    (
    -- * Parser types
      Parser
    , Result

    -- * Running parsers
    , parse
    , parseOnly

    -- * Combinators
    , module Data.Picoparsec.Combinator

    -- * Parsing individual tokens
    , satisfy
    , satisfyWith
    , anyToken
    , skip
    , peekToken
    
    -- ** Parsing individual characters
    , anyChar
    , char
    , satisfyChar
    , peekChar
    , peekChar'

    -- * Efficient string handling
    , scan
    , skipWhile
    , string
    , stringTransform
    , take
    , takeWhile
    , takeWhile1
    , takeWith
    , takeTill

    -- ** Efficient character string handling
    , scanChars
    , skipCharsWhile
    , takeCharsWhile
    , takeCharsWhile1
    , takeCharsTill
    , takeTillChar
    , takeTillChar1

    -- ** Consume all remaining input
    , takeRest

    -- * Utilities
    , endOfLine
    , ensureOne
    ) where

import Control.Applicative ((<|>), (<$>))
import Control.Monad (when)
import Data.Picoparsec.Combinator
import Data.Picoparsec.Internal.Types
import Data.Monoid (Monoid(..), (<>))
import Data.Monoid.Cancellative (LeftGCDMonoid(..))
import Data.Monoid.Null (MonoidNull(null))
import qualified Data.Monoid.Factorial as Factorial
import Data.Monoid.Factorial (FactorialMonoid)
import Data.Monoid.Textual (TextualMonoid)
import qualified Data.Monoid.Textual as Textual
import Prelude hiding (getChar, null, span, take, takeWhile)
import qualified Data.Picoparsec.Internal.Types as T

type Result = IResult

ensure' :: FactorialMonoid t => Int -> T.Input t -> T.Added t -> More -> T.Failure t r -> T.Success t t r
        -> IResult t r
ensure' !n0 i0 a0 m0 kf0 ks0 =
    T.runParser (demandInput >> go n0) i0 a0 m0 kf0 ks0
  where
    go !n = T.Parser $ \i a m kf ks ->
        if Factorial.length (unI i) >= n
        then ks i a m (unI i)
        else T.runParser (demandInput >> go n) i a m kf ks

-- | If at least one token of input is available, return the current
-- input, otherwise fail.
ensureOne :: FactorialMonoid t => Parser t t
ensureOne = T.Parser $ \i0 a0 m0 kf ks ->
    if null (unI i0)
    -- The uncommon case is kept out-of-line to reduce code size:
    then ensure' 1 i0 a0 m0 kf ks
    else ks i0 a0 m0 (unI i0)
-- Non-recursive so the bounds check can be inlined:
{-# INLINE ensureOne #-}

-- | Ask for input.  If we receive any, pass it to a success
-- continuation, otherwise to a failure continuation.
prompt :: MonoidNull t => Input t -> Added t -> More
       -> (Input t -> Added t -> More -> IResult t r)
       -> (Input t -> Added t -> More -> IResult t r)
       -> IResult t r
prompt i0 a0 _m0 kf ks = Partial $ \s ->
    if null s
    then kf i0 a0 Complete
    else ks (i0 <> I s) (a0 <> A s) Incomplete

-- | Immediately demand more input via a 'Partial' continuation
-- result.
demandInput :: MonoidNull t => Parser t ()
demandInput = T.Parser $ \i0 a0 m0 kf ks ->
    if m0 == Complete
    then kf i0 a0 m0 ["demandInput"] "not enough input"
    else let kf' i a m = kf i a m ["demandInput"] "not enough input"
             ks' i a m = ks i a m ()
         in prompt i0 a0 m0 kf' ks'

-- | This parser always succeeds.  It returns 'True' if any input is
-- available either immediately or on demand, and 'False' if the end
-- of all input has been reached.
wantInput :: MonoidNull t => Parser t Bool
wantInput = T.Parser $ \i0 a0 m0 _kf ks ->
  case () of
    _ | not (null (unI i0)) -> ks i0 a0 m0 True
      | m0 == Complete  -> ks i0 a0 m0 False
      | otherwise       -> let kf' i a m = ks i a m False
                               ks' i a m = ks i a m True
                           in prompt i0 a0 m0 kf' ks'

-- | This parser always succeeds.  It returns 'True' if any input is
-- available on demand, and 'False' if the end of all input has been reached.
wantMoreInput :: MonoidNull t => Parser t Bool
wantMoreInput = T.Parser $ \i0 a0 m0 _kf ks ->
  if m0 == Complete  
  then ks i0 a0 m0 False
  else let kf' i a m = ks i a m False
           ks' i a m = ks i a m True
       in prompt i0 a0 m0 kf' ks'

get :: Parser t t
get  = T.Parser $ \i0 a0 m0 _kf ks -> ks i0 a0 m0 (unI i0)

put :: t -> Parser t ()
put s = T.Parser $ \_i0 a0 m0 _kf ks -> ks (I s) a0 m0 ()

-- | The parser @satisfy p@ succeeds for any prime input token for
-- which the predicate @p@ returns 'True'. Returns the token that is
-- actually parsed.
--
-- >digit = satisfy isDigit
-- >    where isDigit w = w >= "0" && w <= "9"
satisfy :: FactorialMonoid t => (t -> Bool) -> Parser t t
satisfy p = do
  s <- ensureOne
  let Just (first, rest) = Factorial.splitPrimePrefix s
  if p first then put rest >> return first else fail "satisfy"
{-# INLINE satisfy #-}

-- | The parser @satisfy p@ succeeds for any input character for
-- which the predicate @p@ returns 'True'. Returns the character that 
-- is actually parsed.
--
-- >digit = satisfy isDigit
-- >    where isDigit w = w >= "0" && w <= "9"
satisfyChar :: TextualMonoid t => (Char -> Bool) -> Parser t Char
satisfyChar p = do
  s <- ensureOne
  case Textual.splitCharacterPrefix s 
     of Just (first, rest) | p first -> put rest >> return first 
        _ -> fail "satisfy"
{-# INLINE satisfyChar #-}

-- | The parser @skip p@ succeeds for any prime input token for which
-- the predicate @p@ returns 'True'.
--
-- >skipDigit = skip isDigit
-- >    where isDigit w = w >= "0" && w <= "9"
skip :: FactorialMonoid t => (t -> Bool) -> Parser t ()
skip p = do
  s <- ensureOne
  let Just (first, rest) = Factorial.splitPrimePrefix s
  if p first then put rest else fail "skip"

-- | The parser @satisfyWith f p@ transforms an input token, and
-- succeeds if the predicate @p@ returns 'True' on the transformed
-- value. The parser returns the transformed token that was parsed.
satisfyWith :: FactorialMonoid t => (t -> a) -> (a -> Bool) -> Parser t a
satisfyWith f p = do
  s <- ensureOne
  let Just (first, rest) = Factorial.splitPrimePrefix s
      c = f $! first
  if p c then put rest >> return c else fail "satisfyWith"
{-# INLINE satisfyWith #-}

-- | Consume @n@ tokens of input, but succeed only if the predicate
-- returns 'True'.
takeWith :: FactorialMonoid t => Int -> (t -> Bool) -> Parser t t
takeWith n0 p =
  get >>= \i->
  let !(h, t) = Factorial.splitAt n0 i
      n1 = Factorial.length h
  in if null t && n1 < n0
     then put mempty
          >> demandInput
          >> takeWith' h n1 p
     else if p h
          then put t
               >> return h
          else fail "takeWith"
{-# INLINABLE takeWith #-}

-- The uncommon case
takeWith' :: FactorialMonoid t => t -> Int -> (t -> Bool) -> Parser t t
takeWith' h0 n0 p =
  get >>= \i->
  let !(h, t) = Factorial.splitAt n0 i
      n1 = Factorial.length h
      h1 = h0 <> h
  in if null t && n1 < n0
     then put mempty
          >> demandInput
          >> takeWith' h1 n1 p
     else if p h1
          then put t
               >> return h1
          else fail "takeWith"
{-# INLINABLE takeWith' #-}

-- | Consume exactly @n@ prime input tokens.
take :: FactorialMonoid t => Int -> Parser t t
take n = takeWith n (const True)
{-# INLINE take #-}

-- | @string s@ parses a prefix of input that identically matches
-- @s@. Returns the parsed string (i.e. @s@).  This parser consumes no
-- input if it fails (even if a partial match).
--
-- /Note/: The behaviour of this parser is different to that of the
-- similarly-named parser in Parsec, as this one is all-or-nothing.
-- To illustrate the difference, the following parser will fail under
-- Parsec given an input of @\"for\"@:
--
-- >string "foo" <|> string "for"
--
-- The reason for its failure is that the first branch is a
-- partial match, and will consume the letters @\'f\'@ and @\'o\'@
-- before failing.  In Attoparsec, the above parser will /succeed/ on
-- that input, because the failed first branch will consume nothing.
string :: (LeftGCDMonoid t, MonoidNull t) => t -> Parser t t
string s =
   get >>= \i->
   let !(p, s', i') = stripCommonPrefix s i
   in if null s'
      then put i' >> return s
      else if null i'
           then put mempty
                >> demandInput
                >> string' p s'
           else fail "string"
{-# INLINE string #-}

-- The uncommon case
string' :: (LeftGCDMonoid t, MonoidNull t) => t -> t -> Parser t t
string' consumed rest =
   get >>= \i->
   let !(p, s', i') = stripCommonPrefix rest i
   in if null s'
      then put i' >> return (consumed <> rest)
      else if null i'
           then put mempty
                >> demandInput
                >> string' (consumed <> p) s'
           else put (consumed <> i) 
                >> fail "string"

stringTransform :: (FactorialMonoid t, Eq t) => (t -> t) -> t
                -> Parser t t
stringTransform f s = takeWith (Factorial.length s) ((==f s) . f)
{-# INLINE stringTransform #-}

-- | Skip past input for as long as the predicate returns 'True'.
skipWhile :: FactorialMonoid t => (t -> Bool) -> Parser t ()
skipWhile p = go
 where
  go = do
    t <- Factorial.dropWhile p <$> get
    put t
    when (null t) $ do
      input <- wantMoreInput
      when input go
{-# INLINE skipWhile #-}

-- | Skip past input characters for as long as the predicate returns 'True'.
skipCharsWhile :: TextualMonoid t => (Char -> Bool) -> Parser t ()
skipCharsWhile p = go
 where
  go = do
    t <- Textual.dropWhile_ False p <$> get
    put t
    when (null t) $ do
      input <- wantMoreInput
      when input go
{-# INLINE skipCharsWhile #-}

-- | Consume input as long as the predicate returns 'False'
-- (i.e. until it returns 'True'), and return the consumed input.
--
-- This parser does not fail.  It will return an empty string if the
-- predicate returns 'True' on the first input token.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
takeTill :: FactorialMonoid t => (t -> Bool) -> Parser t t
takeTill p = takeWhile (not . p)
{-# INLINE takeTill #-}

-- | Consume input characters as long as the predicate returns 'False'
-- (i.e. until it returns 'True'), and return the consumed input.
--
-- This parser does not fail.  It will return an empty string if the
-- predicate returns 'True' on the first input token.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
takeCharsTill :: TextualMonoid t => (Char -> Bool) -> Parser t t
takeCharsTill p = takeCharsWhile (not . p)

-- | Consume all input until the character for which the predicate 
-- returns 'True' and return the consumed input.
--
-- The only difference between 'takeCharsTill' and 'takeTillChar' is
-- in their handling of non-character data: The former never consumes
-- it, the latter always does.
--
-- This parser does not fail.  It will return an empty string if the
-- predicate returns 'True' on the first input token.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
takeTillChar :: TextualMonoid t => (Char -> Bool) -> Parser t t
takeTillChar p = go id
 where
  go acc = do
    (h,t) <- Textual.break_ False p <$> get
    put t
    if null t
      then do
        input <- wantInput
        if input
          then go (acc . mappend h)
          else return (acc h)
      else return (acc h)
{-# INLINE takeTillChar #-}

-- | Consume all input until the character for which the predicate 
-- returns 'True' and return the consumed input.
--
-- This parser always consumes at least one token: it will fail if the 
-- input starts with a character for which the predicate returns 
-- 'True' or if there is no input left.
takeTillChar1 :: TextualMonoid t => (Char -> Bool) -> Parser t t
takeTillChar1 p = do
  (`when` demandInput) =<< null <$> get
  (h,t) <- Textual.break_ False p <$> get
  when (null h && maybe True p (Textual.characterPrefix t)) $ fail "takeTillChar1"
  put t
  if null t
    then (h<>) <$> takeTillChar p
    else return h
{-# INLINE takeTillChar1 #-}

-- | Consume input as long as the predicate returns 'True', and return
-- the consumed input.
--
-- This parser does not fail.  It will return an empty string if the
-- predicate returns 'False' on the first input token.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
takeWhile :: FactorialMonoid t => (t -> Bool) -> Parser t t
takeWhile p = go id
 where
  go acc = do
    (h,t) <- Factorial.span p <$> get
    put t
    if null t
      then do
        input <- wantMoreInput
        if input
          then go (acc . mappend h)
          else return (acc h)
      else return (acc h)
{-# INLINE takeWhile #-}

-- | Consume input characters as long as the predicate returns 'True', 
-- and return the consumed input.
--
-- This parser does not fail.  It will return an empty string if the
-- predicate returns 'False' on the first input token.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
takeCharsWhile :: TextualMonoid t => (Char -> Bool) -> Parser t t
takeCharsWhile p = {-# SCC takeCharsWhile #-} go id
 where
  go acc = do
    (h,t) <- Textual.span_ False p <$> get
    put t
    if null t
      then do
        input <- wantMoreInput
        if input
          then go (acc . mappend h)
          else return (acc h)
      else return (acc h)
{-# INLINE takeCharsWhile #-}

-- | Consume all remaining input and return it as a single string.
takeRest :: MonoidNull t => Parser t t
takeRest = go []
 where
  go acc = do
    input <- wantInput
    if input
      then do
        s <- get
        put mempty
        go (s:acc)
      else return (mconcat $ reverse acc)
{-# INLINABLE takeRest #-}

-- | Consume input as long as the predicate returns 'True', and return
-- the consumed input.
--
-- This parser requires the predicate to succeed on at least one input
-- token: it will fail if the predicate never returns 'True'
-- or if there is no input left.
takeWhile1 :: FactorialMonoid t => (t -> Bool) -> Parser t t
takeWhile1 p = do
  (`when` demandInput) =<< null <$> get
  (h,t) <- Factorial.span p <$> get
  when (null h) $ fail "takeWhile1"
  put t
  if null t
    then (h<>) `fmap` takeWhile p
    else return h
{-# INLINE takeWhile1 #-}

takeCharsWhile1 :: TextualMonoid t => (Char -> Bool) -> Parser t t
takeCharsWhile1 p = do
  (`when` demandInput) =<< null <$> get
  (h,t) <- Textual.span_ False p <$> get
  when (null h) $ fail "takeCharsWhile1"
  put t
  if null t
    then (h<>) `fmap` takeCharsWhile p
    else return h
{-# INLINE takeCharsWhile1 #-}

-- | A stateful scanner.  The predicate consumes and transforms a
-- state argument, and each transformed state is passed to successive
-- invocations of the predicate on each token of the input until one
-- returns 'Nothing' or the input ends.
--
-- This parser does not fail.  It will return an empty string if the
-- predicate returns 'Nothing' on the first prime input factor.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
scan :: FactorialMonoid t => s -> (s -> t -> Maybe s) -> Parser t t
scan s0 f = go s0 id
 where
  go s acc = do
    (h,t,s') <- Factorial.spanMaybe' s f <$> get
    put t
    if null t
      then do
        input <- wantMoreInput
        if input
          then go s' (acc . mappend h)
          else return (acc h)
      else return (acc h)
{-# INLINE scan #-}

-- | A stateful scanner.  The predicate consumes and transforms a
-- state argument, and each transformed state is passed to successive
-- invocations of the predicate on each token of the input until one
-- returns 'Nothing' or the input ends.
--
-- This parser does not fail.  It will return an empty string if the
-- predicate returns 'Nothing' on the first prime input factor.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
scanChars :: TextualMonoid t => s -> (s -> Char -> Maybe s) -> Parser t t
scanChars s0 fc = go s0 id
 where
  go s acc = do
    (h,t,s') <- Textual.spanMaybe_' s fc <$> get
    put t
    if null t
      then do
        input <- wantMoreInput
        if input
          then go s' (acc . mappend h)
          else return (acc h)
      else return (acc h)
{-# INLINE scanChars #-}

-- | Match any prime input token.
anyToken :: FactorialMonoid t => Parser t t
anyToken = satisfy $ const True
{-# INLINE anyToken #-}

-- | Match any prime input token. Returns 'mempty' if end of input
-- has been reached. Does not consume any input.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
peekToken :: FactorialMonoid t => Parser t t
peekToken = T.Parser $ \i0 a0 m0 _kf ks ->
            if null (unI i0)
            then if m0 == Complete
                 then ks i0 a0 m0 mempty
                 else let k' i a m = ks i a m $! Factorial.primePrefix (unI i)
                      in prompt i0 a0 m0 k' k'
            else ks i0 a0 m0 $! Factorial.primePrefix (unI i0)
{-# INLINE peekToken #-}

-- | Match any character.
anyChar :: TextualMonoid t => Parser t Char
anyChar = satisfyChar $ const True
{-# INLINE anyChar #-}

-- | Match a specific character.
char :: TextualMonoid t => Char -> Parser t Char
char c = satisfyChar (== c) <?> show c
{-# INLINE char #-}

-- | Match any input character, if available. Does not consume any input.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
peekChar :: TextualMonoid t => Parser t (Maybe Char)
peekChar = T.Parser $ \i0 a0 m0 _kf ks ->
           if null (unI i0)
           then if m0 == Complete
                then ks i0 a0 m0 Nothing
                else let k' i a m = ks i a m $! Textual.characterPrefix (unI i)
                     in prompt i0 a0 m0 k' k'
           else ks i0 a0 m0 $! Textual.characterPrefix (unI i0)
{-# INLINE peekChar #-}

-- | Match any input character, failing if the input doesn't start
-- with any. Does not consume any input.
peekChar' :: TextualMonoid t => Parser t Char
peekChar' = do
  s <- ensureOne
  case Textual.characterPrefix s 
     of Just c -> return c
        _ -> fail "peekChar'"
{-# INLINE peekChar' #-}

-- | Match either a single newline character @\'\\n\'@, or a carriage
-- return followed by a newline character @\"\\r\\n\"@.
endOfLine :: (Eq t, TextualMonoid t) => Parser t ()
endOfLine = (char '\n' >> return ()) <|> (string "\r\n" >> return ())

-- | Terminal failure continuation.
failK :: Failure t a
failK i0 _a0 _m0 stack msg = Fail (unI i0) stack msg
{-# INLINE failK #-}

-- | Terminal success continuation.
successK :: Success t a a
successK i0 _a0 _m0 a = Done (unI i0) a
{-# INLINE successK #-}

-- | Run a parser.
parse :: Monoid t => Parser t a -> t -> IResult t a
parse m s = T.runParser m (I s) mempty Incomplete failK successK
{-# INLINE parse #-}

-- | Run a parser that cannot be resupplied via a 'Partial' result.
parseOnly :: Monoid t => Parser t a -> t -> Either String a
parseOnly m s = case T.runParser m (I s) mempty Complete failK successK of
                  Fail _ _ err -> Left err
                  Done _ a     -> Right a
                  _            -> error "parseOnly: impossible error!"
{-# INLINE parseOnly #-}