{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE UnicodeSyntax #-}
{-|
Module      : Data.Nanoparsec.Internal
Copyright   : © 2011  Maciej Piechotka
License     : BSD3

Maintainer  : uzytkownik2@gmail.com
Stability   : experimental
Portability :
-}
module Data.Nanoparsec.Internal
  (
    -- * Parser types
    Parser
  , Result(..)
  , S(input)
    
    -- * Running parsers
  , parse
    
    -- * Combinators
  , (<?>)
  , try  
    
    -- * Parsing individual elements
  , satisfy
  , satisfyWith
  , anyElem
  , skip
  , elem
  , notElem
  , elem'
  , notElem'
    
    -- * Efficient substring handling
  , skipWhile
  , string
  , stringTransform
  , take
  , takeWhile
  , takeWhile1
  , takeTill
    
    -- * State observation and manipulation functions
  , endOfInput
  , ensure
  )
where

import Control.Applicative
import Control.Monad
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as LB
import Data.Monoid
import Data.String
import qualified Data.ListLike as LL
import Data.Word
import Prelude hiding (take, takeWhile, elem, notElem)

()  Monoid m  m  m  m
() = mappend
{-# INLINE (⊕) #-}

ø  Monoid m  m
ø = mempty
{-# INLINE ø #-}
      
()  Eq a  a  a  Bool
() = (==)
{-# INLINE (≡) #-}

()  Eq a  a  a  Bool
() = (/=)
{-# INLINE (≢) #-}

()  Ord a  a  a  Bool
() = (>=)
{-# INLINE (≥) #-}

()  Monad m  m a  m b  m b
() = (>>)
{-# INLINE (≫) #-}

data Result δ r
    = Fail (S δ) [String] String
    | Partial (δ  Result δ r)
    | Done r (S δ)

newtype Parser δ a
    = Parser { runParser  forall r. S δ
                          Failure     δ   r
                          Success     δ a r
                          Result      δ   r }

type Failure δ   r =     S δ  [String]  String  Result δ r
type Success δ a r = a  S δ                      Result δ r

data More = Complete | Incomplete deriving (Eq, Show)

instance Monoid More where
    mappend Complete _        = Complete
    mappend _        Complete = Complete
    mappend _        _        = Incomplete
    mempty                    = Incomplete
    {-# INLINE mappend #-}

data S δ = S { input  !δ
             , _added  !δ
             , more  !More
             } deriving (Show)

instance (Show δ, Show r)  Show (Result δ r) where
    show (Fail _ stack msg) = "Fail " ++ show stack ++ " " ++ show msg
    show (Partial _)        = "Partial _"
    show (Done str r)       = "Done " ++ show str ++ " " ++ show r

instance Monoid δ  Monoid (S δ) where
    mempty = S mempty mempty mempty
    {-# INLINE mempty #-}
    {-# SPECIALIZE mempty ∷ S B.ByteString #-}
    {-# SPECIALIZE mempty ∷ S LB.ByteString #-}
    mappend (S inp add mor) (S _ add' mor')
        = S (inp  add') (add  add') (mor  mor')
    {-# INLINE mappend #-}
    {-# SPECIALIZE mappend ∷ S B.ByteString → S B.ByteString → S B.ByteString #-}
    {-# SPECIALIZE mappend ∷ S LB.ByteString → S LB.ByteString → S LB.ByteString #-}

instance Functor (Parser δ) where
    f `fmap` Parser p = Parser $ \s fa su  p s fa (su . f)
    {-# INLINE fmap #-}

instance Monoid δ  Applicative (Parser δ) where
    pure = return
    {-# INLINE pure #-}
    {-# SPECIALIZE pure ∷ a → Parser B.ByteString a #-}
    {-# SPECIALIZE pure ∷ a → Parser LB.ByteString a #-}
    (<*>) = ap
    {-# INLINE (<*>) #-}
    {-# SPECIALIZE (<*>) ∷ Parser B.ByteString (a → b) → Parser B.ByteString a → Parser B.ByteString b #-}
    {-# SPECIALIZE (<*>) ∷ Parser LB.ByteString (a → b) → Parser LB.ByteString a → Parser LB.ByteString b #-}
    (*>) = (>>)
    {-# INLINE (*>) #-}
    {-# SPECIALIZE (*>) ∷ Parser B.ByteString a → Parser B.ByteString b → Parser B.ByteString b #-}
    {-# SPECIALIZE (*>) ∷ Parser LB.ByteString a → Parser LB.ByteString b → Parser LB.ByteString b #-}
    x <* y = x >>= \a  y >> return a
    {-# INLINE (<*) #-}
    {-# SPECIALIZE (<*) ∷ Parser B.ByteString a → Parser B.ByteString b → Parser B.ByteString a #-}
    {-# SPECIALIZE (<*) ∷ Parser LB.ByteString a → Parser LB.ByteString b → Parser LB.ByteString a #-}

instance Monoid δ  Monad (Parser δ) where
    return x = Parser $ \s _ su  su x s
    {-# INLINE return #-}
    {-# SPECIALIZE return ∷ a → Parser B.ByteString a #-}
    {-# SPECIALIZE return ∷ a → Parser LB.ByteString a #-}
    Parser p >>= g
        = Parser $ \s fa su  p s fa (\a s'  runParser (g a) s' fa su)
    {-# INLINE (>>=) #-}
    {-# SPECIALIZE (>>=) ∷ Parser B.ByteString a → (a → Parser B.ByteString b) → Parser B.ByteString b #-}
    {-# SPECIALIZE (>>=) ∷ Parser LB.ByteString a → (a → Parser LB.ByteString b) → Parser LB.ByteString b #-}
    Parser p >> Parser r
        = Parser $ \s fa su  p s fa (\_ s'  r s' fa su)
    {-# INLINE (>>) #-}
    {-# SPECIALIZE (>>) ∷ Parser B.ByteString a → Parser B.ByteString b → Parser B.ByteString b #-}
    {-# SPECIALIZE (>>) ∷ Parser LB.ByteString a → Parser LB.ByteString b → Parser LB.ByteString b #-}
    fail err = Parser (\s fa _  fa s [] ("Failed reading: " ++ err))
    {-# INLINE fail #-}
    {-# SPECIALIZE fail ∷ String → Parser B.ByteString a #-}
    {-# SPECIALIZE fail ∷ String → Parser LB.ByteString a #-}

instance Monoid δ  Monoid (Parser δ a) where
    mempty = fail "mempty"
    {-# INLINE mempty #-}
    {-# SPECIALIZE mempty ∷ Parser B.ByteString a #-}
    {-# SPECIALIZE mempty ∷ Parser LB.ByteString a #-}
    Parser p `mappend` Parser r
        = Parser $ \s fa su  let fa' s' _ _ = r (s  s') fa su
                                  !s'' = noAdds s
                              in p s'' fa' su
    {-# INLINE mappend #-}
    {-# SPECIALIZE mappend ∷ Parser B.ByteString a → Parser B.ByteString a → Parser B.ByteString a #-}
    {-# SPECIALIZE mappend ∷ Parser LB.ByteString a → Parser LB.ByteString a → Parser LB.ByteString a #-}

instance Monoid δ  Alternative (Parser δ) where
    empty = fail "empty"
    {-# INLINE empty #-}
    {-# SPECIALIZE empty ∷ Parser B.ByteString a #-}
    {-# SPECIALIZE empty ∷ Parser LB.ByteString a #-}
    (<|>) = mappend
    {-# INLINE (<|>) #-}
    {-# SPECIALIZE (<|>) ∷ Parser B.ByteString a → Parser B.ByteString a → Parser B.ByteString a #-}
    {-# SPECIALIZE (<|>) ∷ Parser LB.ByteString a → Parser LB.ByteString a → Parser LB.ByteString a #-}

instance Monoid δ  MonadPlus (Parser δ) where
    mzero = fail "mzero"
    {-# INLINE mzero #-}
    {-# SPECIALIZE mzero ∷ Parser B.ByteString a #-}
    {-# SPECIALIZE mzero ∷ Parser LB.ByteString a #-}
    mplus = mappend
    {-# INLINE mplus #-}
    {-# SPECIALIZE mplus ∷ Parser B.ByteString a → Parser B.ByteString a → Parser B.ByteString a #-}
    {-# SPECIALIZE mplus ∷ Parser LB.ByteString a → Parser LB.ByteString a → Parser LB.ByteString a #-}

instance (Eq δ, LL.ListLike δ ε, IsString δ)  IsString (Parser δ δ) where
    fromString = string . fromString    

noAdds  Monoid δ  S δ  S δ
noAdds (S s _ c) = S s ø c
{-# INLINE noAdds #-}
{-# SPECIALIZE noAdds ∷ S B.ByteString → S B.ByteString #-}
{-# SPECIALIZE noAdds ∷ S LB.ByteString → S LB.ByteString #-}

-- | Succeed only if at least @n@ elements of input are available.
ensure  LL.ListLike δ ε  Int  Parser δ ()
ensure n = Parser $ \st@(S s _ _) fa su 
    if LL.length s  n
        then su () st
        else runParser (demandInput  ensure n) st fa su
{-# SPECIALIZE ensure ∷ Int → Parser B.ByteString () #-}
{-# SPECIALIZE ensure ∷ Int → Parser LB.ByteString () #-}

-- | Ask for input.  If we receive any, pass it to a success
-- continuation, otherwise to a failure continuation.
prompt  LL.ListLike δ ε
        S δ  (S δ  Result δ r)  (S δ  Result δ r)  Result δ r
prompt (S s a _) fa su = Partial $ \p 
    if LL.null s
        then fa $ S s a Complete
        else su $ S (s  p) (a  p) Incomplete
{-# SPECIALIZE prompt ∷ S B.ByteString
                      → (S B.ByteString → Result B.ByteString r)
                      → (S B.ByteString → Result B.ByteString r)
                      → Result B.ByteString r #-}
{-# SPECIALIZE prompt ∷ S LB.ByteString
                      → (S LB.ByteString → Result LB.ByteString r)
                      → (S LB.ByteString → Result LB.ByteString r)
                      → Result LB.ByteString r #-}

-- | Immediately demand more input via a 'Partial' continuation
-- result.
demandInput  LL.ListLike δ ε  Parser δ ()
demandInput = Parser $ \st fa su 
    if more st  Complete
        then fa st ["demandInput"] "not enough characters"
        else prompt st (\st'  fa st' ["demandInput"] "not enough characters")
                       (su ())
{-# SPECIALIZE demandInput ∷ Parser B.ByteString () #-}
{-# SPECIALIZE demandInput ∷ Parser LB.ByteString () #-}

-- | This parser always succeeds.  It returns 'True' if any input is
-- available either immediately or on demand, and 'False' if the end
-- of all input has been reached.
wantInput  LL.ListLike δ ε  Parser δ Bool
wantInput = Parser $ \st@(S s _ c) _ su  case () of
  _ | not (LL.null s)  su True st
    | c  Complete     su False st
    | otherwise        prompt st (su False) (su True)
{-# SPECIALIZE wantInput ∷ Parser B.ByteString Bool #-}
{-# SPECIALIZE wantInput ∷ Parser LB.ByteString Bool #-}

get  Parser δ δ
get = Parser (\st _ su  su (input st) st)

put  δ  Parser δ ()
put s = Parser (\(S _ a c) _ su  su () (S s a c))

-- | Attempt a parse, and if it fails, rewind the input so that no
-- input appears to have been consumed.
--
-- This combinator is useful in cases where a parser might consume
-- some input before failing, i.e. the parser needs arbitrary
-- lookahead.  The downside to using this combinator is that it can
-- retain input for longer than is desirable.
try  Monoid δ  Parser δ a  Parser δ a
try p = Parser (\st fa su  runParser p (noAdds st) (fa . (st )) su)
{-# SPECIALIZE try ∷ Parser B.ByteString a → Parser B.ByteString a #-}
{-# SPECIALIZE try ∷ Parser LB.ByteString a → Parser LB.ByteString a #-}

-- | The parser @satisfy p@ succeeds for any element for which the
-- predicate @p@ returns 'True'. Returns the element that is actually
-- parsed.
--
-- >digit = satisfy isDigit
-- >    where isDigit w = w >= 48 && w <= 57
satisfy  LL.ListLike δ ε  (ε  Bool)  Parser δ ε
satisfy p = do
    ensure 1
    s  get
    let w = LL.head s
    if p w
        then put (LL.tail s)  return w
        else fail "satisfy"
{-# SPECIALIZE satisfy ∷ (Word8 → Bool) → Parser B.ByteString Word8 #-}
{-# SPECIALIZE satisfy ∷ (Word8 → Bool) → Parser LB.ByteString Word8 #-}

-- | The parser @skip p@ succeeds for any element for which the predicate
-- @p@ returns 'True'.
--
-- >space = skip isSpace
-- >    where isDigit w = w == 9 || w == 10 || w == 13 || w == 32
skip  LL.ListLike δ ε  (ε  Bool)  Parser δ ()
skip p = do
  ensure 1
  s  get
  if p (LL.head s)
      then put (LL.tail s)
      else fail "skip"
{-# SPECIALIZE skip ∷ (Word8 → Bool) → Parser B.ByteString () #-}
{-# SPECIALIZE skip ∷ (Word8 → Bool) → Parser LB.ByteString () #-}

-- | The parser @satisfyWith f p@ transforms an element, and succeeds if
-- the predicate @p@ returns 'True' on the transformed value. The
-- parser returns the transformed element that was parsed.
satisfyWith  LL.ListLike δ ε  (ε  a)  (a  Bool)  Parser δ a
satisfyWith f p = do
    ensure 1
    s  get
    let c = f (LL.head s)
    if p c
        then put (LL.tail s)  return c
        else fail "satisfyWith"
{-# SPECIALIZE satisfyWith ∷ (Word8 → a) → (a → Bool) → Parser B.ByteString a #-}
{-# SPECIALIZE satisfyWith ∷ (Word8 → a) → (a → Bool) → Parser LB.ByteString a #-}

-- | Consume @n@ element of input, but succeed only if the predicate
-- returns 'True'.
takeWith  LL.ListLike δ ε  Int  (δ  Bool)  Parser δ δ
takeWith n p = do
    ensure n
    s  get
    let (h, t) = LL.splitAt n s
    if p h
        then put t  return h
        else fail "takeWith"
{-# SPECIALIZE takeWith ∷ Int → (B.ByteString → Bool) → Parser B.ByteString B.ByteString #-}
{-# SPECIALIZE takeWith ∷ Int → (LB.ByteString → Bool) → Parser LB.ByteString LB.ByteString #-}

-- | Consume exactly @n@ elements of input.
take  LL.ListLike δ ε  Int  Parser δ δ
take n = takeWith n (const True)
{-# INLINE take #-}
{-# SPECIALIZE take ∷ Int → Parser B.ByteString B.ByteString #-}
{-# SPECIALIZE take ∷ Int → Parser LB.ByteString LB.ByteString #-}

-- | @string s@ parses a sequence of elements that identically match
-- @s@. Returns the parsed string (i.e. @s@).  This parser consumes no
-- input if it fails (even if a partial match).
--
-- /Note/: The behaviour of this parser is different to that of the
-- similarly-named parser in Parsec, as this one is all-or-nothing.
-- To illustrate the difference, the following parser will fail under
-- Parsec given an input of @"for"@:
--
-- >string "foo" <|> string "for"
--
-- The reason for its failure is that that the first branch is a
-- partial match, and will consume the letters @\'f\'@ and @\'o\'@
-- before failing.  In Nnaoparsec, the above parser will /succeed/ on
-- that input, because the failed first branch will consume nothing.
string  (Eq δ, LL.ListLike δ ε)  δ  Parser δ δ
string s = takeWith (LL.length s) ( s)
{-# INLINE string #-}
{-# SPECIALIZE string ∷ B.ByteString → Parser B.ByteString B.ByteString #-}
{-# SPECIALIZE string ∷ LB.ByteString → Parser LB.ByteString LB.ByteString #-}

stringTransform  (LL.ListLike δ ε, Eq δ)  (δ  δ)  δ  Parser δ δ
stringTransform f s = takeWith (LL.length s) (( f s) . f)
{-# INLINE stringTransform #-}
{-# SPECIALIZE stringTransform ∷ (B.ByteString → B.ByteString) → B.ByteString → Parser B.ByteString B.ByteString #-}
{-# SPECIALIZE stringTransform ∷ (LB.ByteString → LB.ByteString) → LB.ByteString → Parser LB.ByteString LB.ByteString #-}

skipWhile  LL.ListLike δ ε  (ε  Bool)  Parser δ ()
skipWhile p = go
              where go = do
                      _  wantInput
                      t  LL.dropWhile p <$> get
                      put t
                      when (LL.null t) go
{-# SPECIALIZE skipWhile ∷ (Word8 → Bool) → Parser B.ByteString () #-}
{-# SPECIALIZE skipWhile ∷ (Word8 → Bool) → Parser LB.ByteString () #-}

-- | Consume input as long as the predicate returns 'False'
-- (i.e. until it returns 'True'), and return the consumed input.
--
-- This parser does not fail.  It will return an empty string if the
-- predicate returns 'True' on the first element of input.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
takeTill  LL.ListLike δ ε  (ε  Bool)  Parser δ δ
takeTill p = takeWhile (not . p)
{-# INLINE takeTill #-}
{-# SPECIALIZE takeTill ∷ (Word8 → Bool) → Parser B.ByteString B.ByteString #-}
{-# SPECIALIZE takeTill ∷ (Word8 → Bool) → Parser LB.ByteString LB.ByteString #-}

-- | Consume input as long as the predicate returns 'True', and return
-- the consumed input.
--
-- This parser does not fail.  It will return an empty string if the
-- predicate returns 'False' on the first element of input.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
takeWhile  LL.ListLike δ ε  (ε  Bool)  Parser δ δ
takeWhile p = go ø
              where go acc = do
                      inp <- wantInput
                      if inp
                          then do
                            (h, t)  LL.span p <$> get
                            put t
                            if (LL.null t)
                                then go (h  acc)
                                else return h
                          else return ø
{-# SPECIALIZE takeWhile ∷ (Word8 → Bool) → Parser B.ByteString B.ByteString #-}
{-# SPECIALIZE takeWhile ∷ (Word8 → Bool) → Parser LB.ByteString LB.ByteString #-}

-- | Consume input as long as the predicate returns 'True', and return
-- the consumed input.
--
-- This parser requires the predicate to succeed on at least one element
-- of input: it will fail if the predicate never returns 'True' or if
-- there is no input left.
takeWhile1  LL.ListLike δ ε  (ε  Bool)  Parser δ δ
takeWhile1 p = do
  (`when` demandInput) =<< LL.null <$> get
  (h, t)  LL.span p <$> get
  when (LL.null h) $ fail "takeWhile1"
  put t
  if LL.null t
      then (h ) `fmap` takeWhile p
      else return h
{-# SPECIALIZE takeWhile1 ∷ (Word8 → Bool) → Parser B.ByteString B.ByteString #-}
{-# SPECIALIZE takeWhile1 ∷ (Word8 → Bool) → Parser LB.ByteString LB.ByteString #-}

anyElem  LL.ListLike δ ε  Parser δ ε
anyElem = satisfy (const True)
{-# INLINE anyElem #-}
{-# SPECIALIZE anyElem ∷ Parser B.ByteString Word8 #-}
{-# SPECIALIZE anyElem ∷ Parser LB.ByteString Word8 #-}

elem  (Eq ε, Show ε, LL.ListLike δ ε)  ε  Parser δ ε
elem x = elem' (show x) x
{-# INLINE elem #-}
{-# SPECIALIZE elem ∷ Word8 → Parser B.ByteString Word8 #-}
{-# SPECIALIZE elem ∷ Word8 → Parser LB.ByteString Word8 #-}

notElem  (Eq ε, Show ε, LL.ListLike δ ε)  ε  Parser δ ε
notElem x = notElem' ("not "  show x) x
{-# INLINE notElem #-}
{-# SPECIALIZE notElem ∷ Word8 → Parser B.ByteString Word8 #-}
{-# SPECIALIZE notElem ∷ Word8 → Parser LB.ByteString Word8 #-}

elem'  (Eq ε, LL.ListLike δ ε)  String  ε  Parser δ ε
elem' p c = satisfy ( c) <?> p
{-# INLINE elem' #-}
{-# SPECIALIZE elem' ∷ String → Word8 → Parser B.ByteString Word8 #-}
{-# SPECIALIZE elem' ∷ String → Word8 → Parser LB.ByteString Word8 #-}

notElem'  (Eq ε, LL.ListLike δ ε)  String  ε  Parser δ ε
notElem' p c = satisfy ( c) <?> p
{-# INLINE notElem' #-}
{-# SPECIALIZE notElem' ∷ String → Word8 → Parser B.ByteString Word8 #-}
{-# SPECIALIZE notElem' ∷ String → Word8 → Parser LB.ByteString Word8 #-}

-- | Match only if all input has been consumed.
endOfInput  LL.ListLike δ ε  Parser δ ()
endOfInput = Parser $ \st@(S i _ mo) fa su  case () of
    _ | not (LL.null i)  fa    st [] "endOfInput"
      | mo  Complete    su () st
      | otherwise        let su' _ st'     = su () (st  st')
                              fa'   st' _ _ = fa    (st  st') [] "endOfInput"
                          in runParser demandInput st fa' su'
{-# SPECIALIZE endOfInput ∷ Parser B.ByteString () #-}
{-# SPECIALIZE endOfInput ∷ Parser LB.ByteString () #-}

-- | Match either a single newline character @\'\\n\'@, or a carriage
-- return followed by a newline character @\"\\r\\n\"@.
(<?>)  Parser δ a  String  Parser δ a
p <?> ms = Parser $ \s fa su  runParser p s (\s' sts m  fa s' (ms:sts) m) su
{-# INLINE (<?>) #-}
infix 0 <?>

-- | Run a parser.
parse  Monoid δ  Parser δ a  δ  Result δ a
parse m s = runParser m (S s ø Incomplete) Fail Done
{-# INLINE parse #-}
{-# SPECIALIZE parse ∷ Parser B.ByteString a → B.ByteString → Result B.ByteString a #-}
{-# SPECIALIZE parse ∷ Parser LB.ByteString a → LB.ByteString → Result LB.ByteString a #-}