{-# LANGUAGE BangPatterns, CPP #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  Data.Attoparsec.Incremental
-- Copyright   :  Bryan O'Sullivan 2009
-- License     :  BSD3
-- 
-- Maintainer  :  bos@serpentine.com
-- Stability   :  experimental
-- Portability :  unknown
--
-- Simple, efficient, and incremental parser combinators for lazy
-- 'L.ByteString' strings, loosely based on the Parsec library.
--
-- This module is heavily influenced by Adam Langley's incremental
-- parser in his @binary-strict@ package.
-- 
-----------------------------------------------------------------------------
module Data.Attoparsec.Incremental
    (
    -- * Incremental parsing
    -- $incremental

    -- * Parser types
      Parser
    , Result(..)

    -- * Running parsers
    , parse
    , parseWith
    , parseTest

    -- * Combinators
    , (<?>)
    , try

    -- * Parsing individual bytes
    , word8
    , notWord8
    , anyWord8
    , satisfy

    -- * Efficient string handling
    , string
    , skipWhile
    , takeCount
    , takeTill
    , takeWhile

    -- * State observation and manipulation functions
    , endOfInput
    , pushBack
    , yield

    -- * Combinators
    , module Data.Attoparsec.Combinator
    ) where

import Data.Attoparsec.Combinator
import Control.Monad (MonadPlus(..), ap)
import Data.Attoparsec.Internal ((+:))
import Data.Word (Word8)
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Internal as L
import Prelude hiding (takeWhile)

-- $incremental
-- Incremental parsing makes it possible to supply a parser with only
-- a limited amount of input.  If the parser cannot complete due to
-- lack of data, it will return a 'Partial' result with a continuation
-- to which more input can be supplied, as follows:
--
-- >   case parse myParser someInput of
-- >     Partial k -> k moreInput
--
-- To signal that no more input is available, pass an empty
-- string to this continuation.

data S = S {-# UNPACK #-} !S.ByteString -- first chunk of input
           L.ByteString                 -- rest of input
           [L.ByteString]               -- input acquired during backtracks
           !Bool                        -- have we hit EOF yet?
           {-# UNPACK #-} !Int          -- failure depth

-- | The result of a partial parse.
data Result a = Failed String
                -- ^ The parse failed, with the given error message.
              | Done L.ByteString a
                -- ^ The parse succeeded, producing the given
                -- result. The 'L.ByteString' contains any unconsumed
                -- input.
              | Partial (L.ByteString -> Result a)
                -- ^ The parse ran out of data before finishing. To
                -- resume the parse, pass more data to the given
                -- continuation.

instance (Show a) => Show (Result a) where
  show (Failed err)      = "Failed " ++ show err
  show (Done L.Empty rs) = "Done Empty " ++ show rs
  show (Done rest rs)    = "Done (" ++ show rest ++ ") " ++ show rs
  show (Partial _)       = "Partial"

-- | This is the internal version of the above. This is the type which
-- is actually used by the code, as it has the extra information
-- needed for backtracking. This is converted to a friendly 'Result'
-- type just before giving it to the outside world.
data IResult a = IFailed S String
               | IDone S a
               | IPartial (L.ByteString -> IResult a)

instance Show (IResult a) where
  show (IFailed _ err) = "IFailed " ++ show err
  show (IDone _ _)     = "IDone"
  show (IPartial _)    = "IPartial"

-- | The parser type.
newtype Parser r a = Parser {
      unParser :: S -> (a -> S -> IResult r) -> IResult r
    }

instance Monad (Parser r) where
  return a = Parser $ \s k -> k a s
  m >>= k = Parser $ \s cont -> unParser m s $ \a s' -> unParser (k a) s' cont
  fail err = Parser $ \s -> const $ IFailed s err

zero :: Parser r a
zero = fail ""

-- | I'm not sure if this is a huge bodge or not. It probably is.
--
-- When performing a choice (in @plus@), the failure depth in the
-- current state is incremented. If a failure is generated inside the
-- attempted path, the state carried in the IFailure will have this
-- incremented failure depth. However, we don't want to backtrack
-- after the attempted path has completed. Thus we insert this cut
-- continuation, which decrements the failure count of any failure
-- passing though, thus it would be caught in @plus@ and doesn't
-- trigger a backtrack.
cutContinuation :: (a -> S -> IResult r) -> a -> S -> IResult r
cutContinuation k v s =
  case k v s of
       IFailed (S lb i adds eof failDepth) err -> IFailed (S lb i adds eof (failDepth - 1)) err
       x -> x

appL :: L.ByteString -> L.ByteString -> L.ByteString
appL xs L.Empty = xs
appL L.Empty ys = ys
appL xs ys      = xs `L.append` ys

plus :: Parser r a -> Parser r a -> Parser r a
plus p1 p2 =
  Parser $ \(S sb lb adds eof failDepth) k ->
    let
      filt f@(IFailed (S _ _ adds' eof' failDepth') _)
        | failDepth' == failDepth + 1 =
            let lb' = lb `appL` L.concat (reverse adds')
            in  unParser p2 (S sb lb' (adds' ++ adds) eof' failDepth) k
        | otherwise = f
      filt (IPartial cont) = IPartial (filt . cont)
      filt v@(IDone _ _) = v
    in
      filt $ unParser p1 (S sb lb [] eof (failDepth + 1)) (cutContinuation k)

-- | This is a no-op combinator for compatibility.
try :: Parser r a -> Parser r a
try p = p

instance Functor (Parser r) where
    fmap f m = Parser $ \s cont -> unParser m s (cont . f)

infix 0 <?>

-- | Name the parser, in case failure occurs.
(<?>) :: Parser r a
      -> String                 -- ^ the name to use if parsing fails
      -> Parser r a
{-# INLINE (<?>) #-}
p <?> msg =
  Parser $ \st k ->
    case unParser p st k of
      IFailed st' _ -> IFailed st' msg
      ok -> ok

initState :: L.ByteString -> S
initState (L.Chunk sb lb) = S sb lb [] False 0
initState _               = S S.empty L.empty [] False 0

mkState :: L.ByteString -> [L.ByteString] -> Bool -> Int -> S
mkState bs adds eof failDepth =
    case bs of
      L.Empty -> S S.empty L.empty adds eof failDepth
      L.Chunk sb lb -> S sb lb adds eof failDepth

addX :: L.ByteString -> [L.ByteString] -> [L.ByteString]
addX s adds | L.null s = adds
            | otherwise = s : adds

-- | Resume our caller, handing back a 'Partial' result. This function
-- is probably not useful, but provided for completeness.
yield :: Parser r ()
yield = Parser $ \(S sb lb adds eof failDepth) k ->
  IPartial $ \s -> k () (S sb (lb `appL` s) (addX s adds) eof failDepth)

continue :: (S -> IResult r) -> Parser r a
         -> (a -> S -> IResult r) -> S -> IResult r
continue onEOF p k (S _sb _lb adds eof failDepth) =
    if eof
    then onEOF (S S.empty L.empty adds True failDepth)
    else IPartial $ \s -> let st = contState s adds failDepth
                          in unParser p st k

takeWith :: (L.ByteString -> (L.ByteString, L.ByteString))
         -> Parser r L.ByteString
takeWith splitf =
  Parser $ \st@(S sb lb adds eof failDepth) k ->
  let (left,rest) = splitf (sb +: lb)
  in if L.null rest
     then continue (k left) (takeWith splitf) (k . appL left) st
     else k left (mkState rest adds eof failDepth)
    
-- | Consume bytes while the predicate succeeds.
takeWhile :: (Word8 -> Bool) -> Parser r L.ByteString
takeWhile = takeWith . L.span

-- | Consume bytes while the predicate fails.  If the predicate never
-- succeeds, the entire input string is returned.
takeTill :: (Word8 -> Bool) -> Parser r L.ByteString
takeTill = takeWith . L.break

-- | Return exactly the given number of bytes.  If not enough are
-- available, fail.
takeCount :: Int -> Parser r L.ByteString
takeCount = tc . fromIntegral where
 tc n = Parser $ \st@(S sb lb adds eof failDepth) k ->
        let (h,t) = L.splitAt n (sb +: lb)
            l = L.length h
        in if L.length h == n
           then k h (mkState t adds eof failDepth)
           else continue (`IFailed` "takeCount: EOF")
                         (tc (n - l)) (k . appL h) st

-- | Match a literal string exactly.
string :: L.ByteString -> Parser r L.ByteString
string s =
  Parser $ \st@(S sb lb adds eof failDepth) k ->
    case L.splitAt (L.length s) (sb +: lb) of
      (h,t)
        | h == s -> k s (mkState t adds eof failDepth)
      (h,L.Empty)
        | h `L.isPrefixOf` s ->
            continue (`IFailed` "string: EOF")
                     (string (L.drop (L.length h) s))
                     (k . appL h)
                     st
      _ -> IFailed st "string failed to match"

contState :: L.ByteString -> [L.ByteString] -> Int -> S
contState s adds failDepth
    | L.null s  = S S.empty L.empty [] True failDepth
    | otherwise = mkState s (addX s adds) False failDepth

-- | Match a single byte based on the given predicate.
satisfy :: (Word8 -> Bool) -> Parser r Word8
satisfy p =
  Parser $ \st@(S sb lb adds eof failDepth) k ->
    case S.uncons sb of
      Just (w, sb') | p w -> k w (S sb' lb adds eof failDepth)
                    | otherwise -> IFailed st "failed to match"
      Nothing -> case L.uncons lb of
                   Just (w, lb') | p w -> k w (mkState lb' adds eof failDepth)
                                 | otherwise -> IFailed st "failed to match"
                   Nothing -> continue (`IFailed` "satisfy: EOF")
                                       (satisfy p) k st

-- | Force the given string to appear next in the input stream.
pushBack :: L.ByteString -> Parser r ()
pushBack bs =
    Parser $ \(S sb lb adds eof failDepth) k ->
        k () (mkState (bs `appL` (sb +: lb)) adds eof failDepth)

-- | Succeed if we have reached the end of the input string.
endOfInput :: Parser r ()
endOfInput = Parser $ \st@(S sb lb _adds _eof _failDepth) k ->
             if not (S.null sb) || not (L.null lb)
             then IFailed st "endOfInput: not EOF"
             else continue (k ()) endOfInput k st

toplevelTranslate :: IResult a -> Result a
toplevelTranslate (IFailed _ err) = Failed err
toplevelTranslate (IDone (S sb lb _ _ _) value) = Done (sb +: lb) value
toplevelTranslate (IPartial k) = Partial $ toplevelTranslate . k

terminalContinuation :: a -> S -> IResult a
terminalContinuation v s = IDone s v

-- | Run a parser.
parse :: Parser r r -> L.ByteString -> Result r
parse m input =
  toplevelTranslate $ unParser m (initState input) terminalContinuation

-- | Run a parser, using the given function to resupply it with input.
--
-- Here's an example that shows how to parse data from a socket, using
-- Johan Tibbell's @network-bytestring@ package.
--
-- >  import qualified Data.ByteString.Lazy as L
-- >  import Data.Attoparsec.Incremental (Parser, Result, parseWith)
-- >  import Network.Socket.ByteString.Lazy (recv_)
-- >  import Network.Socket (Socket)
-- >
-- >  netParse :: Parser r r -> Socket -> IO (Result r)
-- >  netParse p sock = parseWith (recv_ sock 65536) p L.empty
parseWith :: Applicative f => f L.ByteString -- ^ resupply parser with input
          -> Parser r r                      -- ^ parser to run
          -> L.ByteString                    -- ^ initial input
          -> f (Result r)
parseWith refill p s =
  case parse p s of
    Partial k -> k <$> refill
    ok        -> pure ok

-- | Try out a parser, and print its result.
parseTest :: (Show r) => Parser r r -> L.ByteString -> IO ()
parseTest p s = print (parse p s)

#define PARSER Parser r
#include "Word8Boilerplate.h"