{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE UnicodeSyntax #-}
{-|
Module      : Data.Nanoparsec.Internal
Copyright   : © 2011  Maciej Piechotka
License     : BSD3

Maintainer  : uzytkownik2@gmail.com
Stability   : experimental
Portability :
-}
module Data.Nanoparsec.Internal
  (
    -- * Parser types
    Parser
  , Result(..)
  , S(input)
    
    -- * Running parsers
  , parse
    
    -- * Combinators
  , (<?>)
  , try  
    
    -- * Parsing individual elements
  , satisfy
  , satisfyWith
  , anyElem
  , skip
  , elem
  , notElem
  , elem'
  , notElem'
    
    -- * Efficient substring handling
  , skipWhile
  , string
  , stringTransform
  , take
  , takeWhile
  , takeWhile1
  , takeTill
    
    -- * State observation and manipulation functions
  , endOfInput
  , ensure
  )
where

import Control.Applicative
import Control.Monad
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as LB
import Data.Monoid
import Data.String
import qualified Data.ListLike as LL
import Data.Word
import Prelude hiding (take, takeWhile, elem, notElem)

(⊕) ∷ Monoid m ⇒ m → m → m
(⊕) = mappend
{-# INLINE (⊕) #-}

ø ∷ Monoid m ⇒ m
ø = mempty
{-# INLINE ø #-}
      
(≡) ∷ Eq a ⇒ a → a → Bool
(≡) = (==)
{-# INLINE (≡) #-}

(≢) ∷ Eq a ⇒ a → a → Bool
(≢) = (/=)
{-# INLINE (≢) #-}

(≥) ∷ Ord a ⇒ a → a → Bool
(≥) = (>=)
{-# INLINE (≥) #-}

(≫) ∷ Monad m ⇒ m a → m b → m b
(≫) = (>>)
{-# INLINE (≫) #-}

data Result δ r
    = Fail (S δ) [String] String
    | Partial (δ → Result δ r)
    | Done r (S δ)

newtype Parser δ a
    = Parser { runParser ∷ forall r. S δ
                         → Failure     δ   r
                         → Success     δ a r
                         → Result      δ   r }

type Failure δ   r =     S δ → [String] → String → Result δ r
type Success δ a r = a → S δ →                     Result δ r

data More = Complete | Incomplete deriving (Eq, Show)

instance Monoid More where
    mappend Complete _        = Complete
    mappend _        Complete = Complete
    mappend _        _        = Incomplete
    mempty                    = Incomplete
    {-# INLINE mappend #-}

data S δ = S { input ∷ !δ
             , _added ∷ !δ
             , more ∷ !More
             } deriving (Show)

instance (Show δ, Show r) ⇒ Show (Result δ r) where
    show (Fail _ stack msg) = "Fail " ++ show stack ++ " " ++ show msg
    show (Partial _)        = "Partial _"
    show (Done str r)       = "Done " ++ show str ++ " " ++ show r

instance Monoid δ ⇒ Monoid (S δ) where
    mempty = S mempty mempty mempty
    {-# INLINE mempty #-}
    {-# SPECIALIZE mempty ∷ S B.ByteString #-}
    {-# SPECIALIZE mempty ∷ S LB.ByteString #-}
    mappend (S inp add mor) (S _ add' mor')
        = S (inp ⊕ add') (add ⊕ add') (mor ⊕ mor')
    {-# INLINE mappend #-}
    {-# SPECIALIZE mappend ∷ S B.ByteString → S B.ByteString → S B.ByteString #-}
    {-# SPECIALIZE mappend ∷ S LB.ByteString → S LB.ByteString → S LB.ByteString #-}

instance Functor (Parser δ) where
    f `fmap` Parser p = Parser $ \s fa su → p s fa (su . f)
    {-# INLINE fmap #-}

instance Monoid δ ⇒ Applicative (Parser δ) where
    pure = return
    {-# INLINE pure #-}
    {-# SPECIALIZE pure ∷ a → Parser B.ByteString a #-}
    {-# SPECIALIZE pure ∷ a → Parser LB.ByteString a #-}
    (<*>) = ap
    {-# INLINE (<*>) #-}
    {-# SPECIALIZE (<*>) ∷ Parser B.ByteString (a → b) → Parser B.ByteString a → Parser B.ByteString b #-}
    {-# SPECIALIZE (<*>) ∷ Parser LB.ByteString (a → b) → Parser LB.ByteString a → Parser LB.ByteString b #-}
    (*>) = (>>)
    {-# INLINE (*>) #-}
    {-# SPECIALIZE (*>) ∷ Parser B.ByteString a → Parser B.ByteString b → Parser B.ByteString b #-}
    {-# SPECIALIZE (*>) ∷ Parser LB.ByteString a → Parser LB.ByteString b → Parser LB.ByteString b #-}
    x <* y = x >>= \a → y >> return a
    {-# INLINE (<*) #-}
    {-# SPECIALIZE (<*) ∷ Parser B.ByteString a → Parser B.ByteString b → Parser B.ByteString a #-}
    {-# SPECIALIZE (<*) ∷ Parser LB.ByteString a → Parser LB.ByteString b → Parser LB.ByteString a #-}

instance Monoid δ ⇒ Monad (Parser δ) where
    return x = Parser $ \s _ su → su x s
    {-# INLINE return #-}
    {-# SPECIALIZE return ∷ a → Parser B.ByteString a #-}
    {-# SPECIALIZE return ∷ a → Parser LB.ByteString a #-}
    Parser p >>= g
        = Parser $ \s fa su → p s fa (\a s' → runParser (g a) s' fa su)
    {-# INLINE (>>=) #-}
    {-# SPECIALIZE (>>=) ∷ Parser B.ByteString a → (a → Parser B.ByteString b) → Parser B.ByteString b #-}
    {-# SPECIALIZE (>>=) ∷ Parser LB.ByteString a → (a → Parser LB.ByteString b) → Parser LB.ByteString b #-}
    Parser p >> Parser r
        = Parser $ \s fa su → p s fa (\_ s' → r s' fa su)
    {-# INLINE (>>) #-}
    {-# SPECIALIZE (>>) ∷ Parser B.ByteString a → Parser B.ByteString b → Parser B.ByteString b #-}
    {-# SPECIALIZE (>>) ∷ Parser LB.ByteString a → Parser LB.ByteString b → Parser LB.ByteString b #-}
    fail err = Parser (\s fa _ → fa s [] ("Failed reading: " ++ err))
    {-# INLINE fail #-}
    {-# SPECIALIZE fail ∷ String → Parser B.ByteString a #-}
    {-# SPECIALIZE fail ∷ String → Parser LB.ByteString a #-}

instance Monoid δ ⇒ Monoid (Parser δ a) where
    mempty = fail "mempty"
    {-# INLINE mempty #-}
    {-# SPECIALIZE mempty ∷ Parser B.ByteString a #-}
    {-# SPECIALIZE mempty ∷ Parser LB.ByteString a #-}
    Parser p `mappend` Parser r
        = Parser $ \s fa su → let fa' s' _ _ = r (s ⊕ s') fa su
                                  !s'' = noAdds s
                              in p s'' fa' su
    {-# INLINE mappend #-}
    {-# SPECIALIZE mappend ∷ Parser B.ByteString a → Parser B.ByteString a → Parser B.ByteString a #-}
    {-# SPECIALIZE mappend ∷ Parser LB.ByteString a → Parser LB.ByteString a → Parser LB.ByteString a #-}

instance Monoid δ ⇒ Alternative (Parser δ) where
    empty = fail "empty"
    {-# INLINE empty #-}
    {-# SPECIALIZE empty ∷ Parser B.ByteString a #-}
    {-# SPECIALIZE empty ∷ Parser LB.ByteString a #-}
    (<|>) = mappend
    {-# INLINE (<|>) #-}
    {-# SPECIALIZE (<|>) ∷ Parser B.ByteString a → Parser B.ByteString a → Parser B.ByteString a #-}
    {-# SPECIALIZE (<|>) ∷ Parser LB.ByteString a → Parser LB.ByteString a → Parser LB.ByteString a #-}

instance Monoid δ ⇒ MonadPlus (Parser δ) where
    mzero = fail "mzero"
    {-# INLINE mzero #-}
    {-# SPECIALIZE mzero ∷ Parser B.ByteString a #-}
    {-# SPECIALIZE mzero ∷ Parser LB.ByteString a #-}
    mplus = mappend
    {-# INLINE mplus #-}
    {-# SPECIALIZE mplus ∷ Parser B.ByteString a → Parser B.ByteString a → Parser B.ByteString a #-}
    {-# SPECIALIZE mplus ∷ Parser LB.ByteString a → Parser LB.ByteString a → Parser LB.ByteString a #-}

instance (Eq δ, LL.ListLike δ ε, IsString δ) ⇒ IsString (Parser δ δ) where
    fromString = string . fromString    

noAdds ∷ Monoid δ ⇒ S δ → S δ
noAdds (S s _ c) = S s ø c
{-# INLINE noAdds #-}
{-# SPECIALIZE noAdds ∷ S B.ByteString → S B.ByteString #-}
{-# SPECIALIZE noAdds ∷ S LB.ByteString → S LB.ByteString #-}

-- | Succeed only if at least @n@ elements of input are available.
ensure ∷ LL.ListLike δ ε ⇒ Int → Parser δ ()
ensure n = Parser $ \st@(S s _ _) fa su →
    if LL.length s ≥ n
        then su () st
        else runParser (demandInput ≫ ensure n) st fa su
{-# SPECIALIZE ensure ∷ Int → Parser B.ByteString () #-}
{-# SPECIALIZE ensure ∷ Int → Parser LB.ByteString () #-}

-- | Ask for input.  If we receive any, pass it to a success
-- continuation, otherwise to a failure continuation.
prompt ∷ LL.ListLike δ ε
       ⇒ S δ → (S δ → Result δ r) → (S δ → Result δ r) → Result δ r
prompt (S s a _) fa su = Partial $ \p →
    if LL.null s
        then fa $ S s a Complete
        else su $ S (s ⊕ p) (a ⊕ p) Incomplete
{-# SPECIALIZE prompt ∷ S B.ByteString
                      → (S B.ByteString → Result B.ByteString r)
                      → (S B.ByteString → Result B.ByteString r)
                      → Result B.ByteString r #-}
{-# SPECIALIZE prompt ∷ S LB.ByteString
                      → (S LB.ByteString → Result LB.ByteString r)
                      → (S LB.ByteString → Result LB.ByteString r)
                      → Result LB.ByteString r #-}

-- | Immediately demand more input via a 'Partial' continuation
-- result.
demandInput ∷ LL.ListLike δ ε ⇒ Parser δ ()
demandInput = Parser $ \st fa su →
    if more st ≡ Complete
        then fa st ["demandInput"] "not enough characters"
        else prompt st (\st' → fa st' ["demandInput"] "not enough characters")
                       (su ())
{-# SPECIALIZE demandInput ∷ Parser B.ByteString () #-}
{-# SPECIALIZE demandInput ∷ Parser LB.ByteString () #-}

-- | This parser always succeeds.  It returns 'True' if any input is
-- available either immediately or on demand, and 'False' if the end
-- of all input has been reached.
wantInput ∷ LL.ListLike δ ε ⇒ Parser δ Bool
wantInput = Parser $ \st@(S s _ c) _ su → case () of
  _ | not (LL.null s) → su True st
    | c ≡ Complete    → su False st
    | otherwise       → prompt st (su False) (su True)
{-# SPECIALIZE wantInput ∷ Parser B.ByteString Bool #-}
{-# SPECIALIZE wantInput ∷ Parser LB.ByteString Bool #-}

get ∷ Parser δ δ
get = Parser (\st _ su → su (input st) st)

put ∷ δ → Parser δ ()
put s = Parser (\(S _ a c) _ su → su () (S s a c))

-- | Attempt a parse, and if it fails, rewind the input so that no
-- input appears to have been consumed.
--
-- This combinator is useful in cases where a parser might consume
-- some input before failing, i.e. the parser needs arbitrary
-- lookahead.  The downside to using this combinator is that it can
-- retain input for longer than is desirable.
try ∷ Monoid δ ⇒ Parser δ a → Parser δ a
try p = Parser (\st fa su → runParser p (noAdds st) (fa . (st ⊕)) su)
{-# SPECIALIZE try ∷ Parser B.ByteString a → Parser B.ByteString a #-}
{-# SPECIALIZE try ∷ Parser LB.ByteString a → Parser LB.ByteString a #-}

-- | The parser @satisfy p@ succeeds for any element for which the
-- predicate @p@ returns 'True'. Returns the element that is actually
-- parsed.
--
-- >digit = satisfy isDigit
-- >    where isDigit w = w >= 48 && w <= 57
satisfy ∷ LL.ListLike δ ε ⇒ (ε → Bool) → Parser δ ε
satisfy p = do
    ensure 1
    s ← get
    let w = LL.head s
    if p w
        then put (LL.tail s) ≫ return w
        else fail "satisfy"
{-# SPECIALIZE satisfy ∷ (Word8 → Bool) → Parser B.ByteString Word8 #-}
{-# SPECIALIZE satisfy ∷ (Word8 → Bool) → Parser LB.ByteString Word8 #-}

-- | The parser @skip p@ succeeds for any element for which the predicate
-- @p@ returns 'True'.
--
-- >space = skip isSpace
-- >    where isDigit w = w == 9 || w == 10 || w == 13 || w == 32
skip ∷ LL.ListLike δ ε ⇒ (ε → Bool) → Parser δ ()
skip p = do
  ensure 1
  s ← get
  if p (LL.head s)
      then put (LL.tail s)
      else fail "skip"
{-# SPECIALIZE skip ∷ (Word8 → Bool) → Parser B.ByteString () #-}
{-# SPECIALIZE skip ∷ (Word8 → Bool) → Parser LB.ByteString () #-}

-- | The parser @satisfyWith f p@ transforms an element, and succeeds if
-- the predicate @p@ returns 'True' on the transformed value. The
-- parser returns the transformed element that was parsed.
satisfyWith ∷ LL.ListLike δ ε ⇒ (ε → a) → (a → Bool) → Parser δ a
satisfyWith f p = do
    ensure 1
    s ← get
    let c = f (LL.head s)
    if p c
        then put (LL.tail s) ≫ return c
        else fail "satisfyWith"
{-# SPECIALIZE satisfyWith ∷ (Word8 → a) → (a → Bool) → Parser B.ByteString a #-}
{-# SPECIALIZE satisfyWith ∷ (Word8 → a) → (a → Bool) → Parser LB.ByteString a #-}

-- | Consume @n@ element of input, but succeed only if the predicate
-- returns 'True'.
takeWith ∷ LL.ListLike δ ε ⇒ Int → (δ → Bool) → Parser δ δ
takeWith n p = do
    ensure n
    s ← get
    let (h, t) = LL.splitAt n s
    if p h
        then put t ≫ return h
        else fail "takeWith"
{-# SPECIALIZE takeWith ∷ Int → (B.ByteString → Bool) → Parser B.ByteString B.ByteString #-}
{-# SPECIALIZE takeWith ∷ Int → (LB.ByteString → Bool) → Parser LB.ByteString LB.ByteString #-}

-- | Consume exactly @n@ elements of input.
take ∷ LL.ListLike δ ε ⇒ Int → Parser δ δ
take n = takeWith n (const True)
{-# INLINE take #-}
{-# SPECIALIZE take ∷ Int → Parser B.ByteString B.ByteString #-}
{-# SPECIALIZE take ∷ Int → Parser LB.ByteString LB.ByteString #-}

-- | @string s@ parses a sequence of elements that identically match
-- @s@. Returns the parsed string (i.e. @s@).  This parser consumes no
-- input if it fails (even if a partial match).
--
-- /Note/: The behaviour of this parser is different to that of the
-- similarly-named parser in Parsec, as this one is all-or-nothing.
-- To illustrate the difference, the following parser will fail under
-- Parsec given an input of @"for"@:
--
-- >string "foo" <|> string "for"
--
-- The reason for its failure is that that the first branch is a
-- partial match, and will consume the letters @\'f\'@ and @\'o\'@
-- before failing.  In Nnaoparsec, the above parser will /succeed/ on
-- that input, because the failed first branch will consume nothing.
string ∷ (Eq δ, LL.ListLike δ ε) ⇒ δ → Parser δ δ
string s = takeWith (LL.length s) (≡ s)
{-# INLINE string #-}
{-# SPECIALIZE string ∷ B.ByteString → Parser B.ByteString B.ByteString #-}
{-# SPECIALIZE string ∷ LB.ByteString → Parser LB.ByteString LB.ByteString #-}

stringTransform ∷ (LL.ListLike δ ε, Eq δ) ⇒ (δ → δ) → δ → Parser δ δ
stringTransform f s = takeWith (LL.length s) ((≡ f s) . f)
{-# INLINE stringTransform #-}
{-# SPECIALIZE stringTransform ∷ (B.ByteString → B.ByteString) → B.ByteString → Parser B.ByteString B.ByteString #-}
{-# SPECIALIZE stringTransform ∷ (LB.ByteString → LB.ByteString) → LB.ByteString → Parser LB.ByteString LB.ByteString #-}

skipWhile ∷ LL.ListLike δ ε ⇒ (ε → Bool) → Parser δ ()
skipWhile p = go
              where go = do
                      _ ← wantInput
                      t ← LL.dropWhile p <$> get
                      put t
                      when (LL.null t) go
{-# SPECIALIZE skipWhile ∷ (Word8 → Bool) → Parser B.ByteString () #-}
{-# SPECIALIZE skipWhile ∷ (Word8 → Bool) → Parser LB.ByteString () #-}

-- | Consume input as long as the predicate returns 'False'
-- (i.e. until it returns 'True'), and return the consumed input.
--
-- This parser does not fail.  It will return an empty string if the
-- predicate returns 'True' on the first element of input.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
takeTill ∷ LL.ListLike δ ε ⇒ (ε → Bool) → Parser δ δ
takeTill p = takeWhile (not . p)
{-# INLINE takeTill #-}
{-# SPECIALIZE takeTill ∷ (Word8 → Bool) → Parser B.ByteString B.ByteString #-}
{-# SPECIALIZE takeTill ∷ (Word8 → Bool) → Parser LB.ByteString LB.ByteString #-}

-- | Consume input as long as the predicate returns 'True', and return
-- the consumed input.
--
-- This parser does not fail.  It will return an empty string if the
-- predicate returns 'False' on the first element of input.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
takeWhile ∷ LL.ListLike δ ε ⇒ (ε → Bool) → Parser δ δ
takeWhile p = go ø
              where go acc = do
                      inp <- wantInput
                      if inp
                          then do
                            (h, t) ← LL.span p <$> get
                            put t
                            if (LL.null t)
                                then go (h ⊕ acc)
                                else return h
                          else return ø
{-# SPECIALIZE takeWhile ∷ (Word8 → Bool) → Parser B.ByteString B.ByteString #-}
{-# SPECIALIZE takeWhile ∷ (Word8 → Bool) → Parser LB.ByteString LB.ByteString #-}

-- | Consume input as long as the predicate returns 'True', and return
-- the consumed input.
--
-- This parser requires the predicate to succeed on at least one element
-- of input: it will fail if the predicate never returns 'True' or if
-- there is no input left.
takeWhile1 ∷ LL.ListLike δ ε ⇒ (ε → Bool) → Parser δ δ
takeWhile1 p = do
  (`when` demandInput) =<< LL.null <$> get
  (h, t) ← LL.span p <$> get
  when (LL.null h) $ fail "takeWhile1"
  put t
  if LL.null t
      then (h ⊕) `fmap` takeWhile p
      else return h
{-# SPECIALIZE takeWhile1 ∷ (Word8 → Bool) → Parser B.ByteString B.ByteString #-}
{-# SPECIALIZE takeWhile1 ∷ (Word8 → Bool) → Parser LB.ByteString LB.ByteString #-}

anyElem ∷ LL.ListLike δ ε ⇒ Parser δ ε
anyElem = satisfy (const True)
{-# INLINE anyElem #-}
{-# SPECIALIZE anyElem ∷ Parser B.ByteString Word8 #-}
{-# SPECIALIZE anyElem ∷ Parser LB.ByteString Word8 #-}

elem ∷ (Eq ε, Show ε, LL.ListLike δ ε) ⇒ ε → Parser δ ε
elem x = elem' (show x) x
{-# INLINE elem #-}
{-# SPECIALIZE elem ∷ Word8 → Parser B.ByteString Word8 #-}
{-# SPECIALIZE elem ∷ Word8 → Parser LB.ByteString Word8 #-}

notElem ∷ (Eq ε, Show ε, LL.ListLike δ ε) ⇒ ε → Parser δ ε
notElem x = notElem' ("not " ⊕ show x) x
{-# INLINE notElem #-}
{-# SPECIALIZE notElem ∷ Word8 → Parser B.ByteString Word8 #-}
{-# SPECIALIZE notElem ∷ Word8 → Parser LB.ByteString Word8 #-}

elem' ∷ (Eq ε, LL.ListLike δ ε) ⇒ String → ε → Parser δ ε
elem' p c = satisfy (≡ c) <?> p
{-# INLINE elem' #-}
{-# SPECIALIZE elem' ∷ String → Word8 → Parser B.ByteString Word8 #-}
{-# SPECIALIZE elem' ∷ String → Word8 → Parser LB.ByteString Word8 #-}

notElem' ∷ (Eq ε, LL.ListLike δ ε) ⇒ String → ε → Parser δ ε
notElem' p c = satisfy (≢ c) <?> p
{-# INLINE notElem' #-}
{-# SPECIALIZE notElem' ∷ String → Word8 → Parser B.ByteString Word8 #-}
{-# SPECIALIZE notElem' ∷ String → Word8 → Parser LB.ByteString Word8 #-}

-- | Match only if all input has been consumed.
endOfInput ∷ LL.ListLike δ ε ⇒ Parser δ ()
endOfInput = Parser $ \st@(S i _ mo) fa su → case () of
    _ | not (LL.null i) → fa    st [] "endOfInput"
      | mo ≡ Complete   → su () st
      | otherwise       → let su' _ st'     = su () (st ⊕ st')
                              fa'   st' _ _ = fa    (st ⊕ st') [] "endOfInput"
                          in runParser demandInput st fa' su'
{-# SPECIALIZE endOfInput ∷ Parser B.ByteString () #-}
{-# SPECIALIZE endOfInput ∷ Parser LB.ByteString () #-}

-- | Match either a single newline character @\'\\n\'@, or a carriage
-- return followed by a newline character @\"\\r\\n\"@.
(<?>) ∷ Parser δ a → String → Parser δ a
p <?> ms = Parser $ \s fa su → runParser p s (\s' sts m → fa s' (ms:sts) m) su
{-# INLINE (<?>) #-}
infix 0 <?>

-- | Run a parser.
parse ∷ Monoid δ ⇒ Parser δ a → δ → Result δ a
parse m s = runParser m (S s ø Incomplete) Fail Done
{-# INLINE parse #-}
{-# SPECIALIZE parse ∷ Parser B.ByteString a → B.ByteString → Result B.ByteString a #-}
{-# SPECIALIZE parse ∷ Parser LB.ByteString a → LB.ByteString → Result LB.ByteString a #-}