{-# LANGUAGE BangPatterns, Rank2Types, OverloadedStrings, RecordWildCards #-}
-- |
-- Module      :  Data.Attoparsec.Text.Internal
-- Copyright   :  Bryan O'Sullivan 2011
-- License     :  BSD3
--
-- Maintainer  :  bos@serpentine.com
-- Stability   :  experimental
-- Portability :  unknown
--
-- Simple, efficient parser combinators for 'T.Text' strings, loosely
-- based on the Parsec library.

module Data.Attoparsec.Text.Internal
    (
    -- * Parser types
      Parser
    , Result

    -- * Running parsers
    , parse
    , parseOnly

    -- * Combinators
    , (<?>)
    , try
    , module Data.Attoparsec.Combinator

    -- * Parsing individual characters
    , satisfy
    , satisfyWith
    , anyChar
    , skip
    , char
    , notChar

    -- ** Character classes
    , inClass
    , notInClass

    -- * Efficient string handling
    , skipWhile
    , string
    , stringTransform
    , take
    , scan
    , takeWhile
    , takeWhile1
    , takeTill

    -- ** Consume all remaining input
    , takeText
    , takeLazyText

    -- * State observation and manipulation functions
    , endOfInput
    , atEnd
    , ensure

    -- * Utilities
    , endOfLine
    ) where

import Control.Applicative ((<|>), (<$>))
import Control.Monad (when)
import Data.Attoparsec.Combinator
import Data.Attoparsec.Internal.Types hiding (Parser, Input, Added, Failure, Success)
import Data.Text (Text)
import Prelude hiding (getChar, take, takeWhile)
import qualified Data.Attoparsec.Internal.Types as T
import qualified Data.Attoparsec.Text.FastSet as Set
import qualified Data.Text as T
import qualified Data.Text.Lazy as L

type Parser a = T.Parser Text a
type Result a = IResult Text a
type Input = T.Input Text
type Added = T.Added Text
type Failure r = T.Failure Text r
type Success a r = T.Success Text a r

-- | If at least @n@ characters of input are available, return the
-- current input, otherwise fail.
ensure :: Int -> Parser Text
ensure !n = T.Parser $ \i0 a0 m0 kf ks ->
    if T.length (unI i0) >= n
    then ks i0 a0 m0 (unI i0)
    else runParser (demandInput >> ensure n) i0 a0 m0 kf ks

-- | Ask for input.  If we receive any, pass it to a success
-- continuation, otherwise to a failure continuation.
prompt :: Input -> Added -> More
       -> (Input -> Added -> More -> Result r)
       -> (Input -> Added -> More -> Result r)
       -> Result r
prompt i0 a0 _m0 kf ks = Partial $ \s ->
    if T.null s
    then kf i0 a0 Complete
    else ks (I (unI i0 <> s)) (A (unA a0 <> s)) Incomplete

-- | Immediately demand more input via a 'Partial' continuation
-- result.
demandInput :: Parser ()
demandInput = T.Parser $ \i0 a0 m0 kf ks ->
    if m0 == Complete
    then kf i0 a0 m0 ["demandInput"] "not enough input"
    else let kf' i a m = kf i a m ["demandInput"] "not enough input"
             ks' i a m = ks i a m ()
         in prompt i0 a0 m0 kf' ks'

-- | This parser always succeeds.  It returns 'True' if any input is
-- available either immediately or on demand, and 'False' if the end
-- of all input has been reached.
wantInput :: Parser Bool
wantInput = T.Parser $ \i0 a0 m0 _kf ks ->
  case () of
    _ | not (T.null (unI i0)) -> ks i0 a0 m0 True
      | m0 == Complete  -> ks i0 a0 m0 False
      | otherwise       -> let kf' i a m = ks i a m False
                               ks' i a m = ks i a m True
                           in prompt i0 a0 m0 kf' ks'

get :: Parser Text
get  = T.Parser $ \i0 a0 m0 _kf ks -> ks i0 a0 m0 (unI i0)

put :: Text -> Parser ()
put s = T.Parser $ \_i0 a0 m0 _kf ks -> ks (I s) a0 m0 ()

-- | Attempt a parse, and if it fails, rewind the input so that no
-- input appears to have been consumed.
--
-- This combinator is provided for compatibility with Parsec.
-- Attoparsec parsers always backtrack on failure.
try :: Parser a -> Parser a
try p = p
{-# INLINE try #-}

unsafeHead :: Text -> Char
unsafeHead = T.head

unsafeTail :: Text -> Text
unsafeTail = T.tail

unsafeTake :: Int -> Text -> Text
unsafeTake = T.take

unsafeDrop :: Int -> Text -> Text
unsafeDrop = T.drop

-- | The parser @satisfy p@ succeeds for any character for which the
-- predicate @p@ returns 'True'. Returns the character that is
-- actually parsed.
--
-- >digit = satisfy isDigit
-- >    where isDigit c = c >= '0' && c <= '9'
satisfy :: (Char -> Bool) -> Parser Char
satisfy p = do
  s <- ensure 1
  let w = unsafeHead s
  if p w
    then put (unsafeTail s) >> return w
    else fail "satisfy"

-- | The parser @skip p@ succeeds for any character for which the
-- predicate @p@ returns 'True'.
--
-- >skipDigit = skip isDigit
-- >    where isDigit c = c >= '0' && c <= '9'
skip :: (Char -> Bool) -> Parser ()
skip p = do
  s <- ensure 1
  if p (unsafeHead s)
    then put (unsafeTail s)
    else fail "skip"

-- | The parser @satisfyWith f p@ transforms a character, and succeeds
-- if the predicate @p@ returns 'True' on the transformed value. The
-- parser returns the transformed character that was parsed.
satisfyWith :: (Char -> a) -> (a -> Bool) -> Parser a
satisfyWith f p = do
  s <- ensure 1
  let c = f (unsafeHead s)
  if p c
    then put (unsafeTail s) >> return c
    else fail "satisfyWith"

-- | Consume @n@ characters of input, but succeed only if the
-- predicate returns 'True'.
takeWith :: Int -> (Text -> Bool) -> Parser Text
takeWith n p = do
  s <- ensure n
  let h = unsafeTake n s
      t = unsafeDrop n s
  if p h
    then put t >> return h
    else fail "takeWith"

-- | Consume exactly @n@ characters of input.
take :: Int -> Parser Text
take n = takeWith n (const True)
{-# INLINE take #-}

-- | @string s@ parses a sequence of characters that identically match
-- @s@. Returns the parsed string (i.e. @s@).  This parser consumes no
-- input if it fails (even if a partial match).
--
-- /Note/: The behaviour of this parser is different to that of the
-- similarly-named parser in Parsec, as this one is all-or-nothing.
-- To illustrate the difference, the following parser will fail under
-- Parsec given an input of @"for"@:
--
-- >string "foo" <|> string "for"
--
-- The reason for its failure is that that the first branch is a
-- partial match, and will consume the letters @\'f\'@ and @\'o\'@
-- before failing.  In Attoparsec, the above parser will /succeed/ on
-- that input, because the failed first branch will consume nothing.
string :: Text -> Parser Text
string s = takeWith (T.length s) (==s)
{-# INLINE string #-}

stringTransform :: (Text -> Text) -> Text
                -> Parser Text
stringTransform f s = takeWith (T.length s) ((==f s) . f)
{-# INLINE stringTransform #-}

-- | Skip past input for as long as the predicate returns 'True'.
skipWhile :: (Char -> Bool) -> Parser ()
skipWhile p = go
 where
  go = do
    t <- T.dropWhile p <$> get
    put t
    when (T.null t) $ do
      input <- wantInput
      when input go
{-# INLINE skipWhile #-}

-- | Consume input as long as the predicate returns 'False'
-- (i.e. until it returns 'True'), and return the consumed input.
--
-- This parser does not fail.  It will return an empty string if the
-- predicate returns 'True' on the first character of input.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
takeTill :: (Char -> Bool) -> Parser Text
takeTill p = takeWhile (not . p)
{-# INLINE takeTill #-}

-- | Consume input as long as the predicate returns 'True', and return
-- the consumed input.
--
-- This parser does not fail.  It will return an empty string if the
-- predicate returns 'False' on the first character of input.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
takeWhile :: (Char -> Bool) -> Parser Text
takeWhile p = (T.concat . reverse) `fmap` go []
 where
  go acc = do
    (h,t) <- T.span p <$> get
    put t
    if T.null t
      then do
        input <- wantInput
        if input
          then go (h:acc)
          else return (h:acc)
      else return (h:acc)

takeRest :: Parser [Text]
takeRest = go []
 where
  go acc = do
    input <- wantInput
    if input
      then do
        s <- get
        put T.empty
        go (s:acc)
      else return (reverse acc)

-- | Consume all remaining input and return it as a single string.
takeText :: Parser Text
takeText = T.concat `fmap` takeRest

-- | Consume all remaining input and return it as a single string.
takeLazyText :: Parser L.Text
takeLazyText = L.fromChunks `fmap` takeRest

data Scan s = Continue s
            | Finished {-# UNPACK #-} !Int T.Text

-- | A stateful scanner.  The predicate consumes and transforms a
-- state argument, and each transformed state is passed to successive
-- invocations of the predicate on each character of the input until one
-- returns 'Nothing' or the input ends.
--
-- This parser does not fail.  It will return an empty string if the
-- predicate returns 'Nothing' on the first character of input.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
scan :: s -> (s -> Char -> Maybe s) -> Parser Text
scan s0 p = do
  chunks <- go [] s0
  case chunks of
    [x] -> return x
    xs  -> return . T.concat . reverse $ xs
 where
  scanner s !n t =
    case T.uncons t of
      Just (c,t') -> case p s c of
                       Just s' -> scanner s' (n+1) t'
                       Nothing -> Finished n t
      Nothing     -> Continue s
  go acc s = do
    input <- get
    case scanner s 0 input of
      Continue s'  -> do put T.empty
                         more <- wantInput
                         if more
                           then go (input : acc) s'
                           else return (input : acc)
      Finished n t -> put t >> return (T.take n input : acc)
{-# INLINE scan #-}

-- | Consume input as long as the predicate returns 'True', and return
-- the consumed input.
--
-- This parser requires the predicate to succeed on at least one
-- character of input: it will fail if the predicate never returns
-- 'True' or if there is no input left.
takeWhile1 :: (Char -> Bool) -> Parser Text
takeWhile1 p = do
  (`when` demandInput) =<< T.null <$> get
  (h,t) <- T.span p <$> get
  when (T.null h) $ fail "takeWhile1"
  put t
  if T.null t
    then (h<>) `fmap` takeWhile p
    else return h

-- | Match any character in a set.
--
-- >vowel = inClass "aeiou"
--
-- Range notation is supported.
--
-- >halfAlphabet = inClass "a-nA-N"
--
-- To add a literal @\'-\'@ to a set, place it at the beginning or end
-- of the string.
inClass :: String -> Char -> Bool
inClass s = (`Set.member` mySet)
    where mySet = Set.charClass s
          {-# NOINLINE mySet #-}
{-# INLINE inClass #-}

-- | Match any character not in a set.
notInClass :: String -> Char -> Bool
notInClass s = not . inClass s
{-# INLINE notInClass #-}

-- | Match any character.
anyChar :: Parser Char
anyChar = satisfy $ const True
{-# INLINE anyChar #-}

-- | Match a specific character.
char :: Char -> Parser Char
char c = satisfy (== c) <?> show c
{-# INLINE char #-}

-- | Match any character except the given one.
notChar :: Char -> Parser Char
notChar c = satisfy (/= c) <?> "not " ++ show c
{-# INLINE notChar #-}

-- | Match only if all input has been consumed.
endOfInput :: Parser ()
endOfInput = T.Parser $ \i0 a0 m0 kf ks ->
             if T.null (unI i0)
             then if m0 == Complete
                  then ks i0 a0 m0 ()
                  else let kf' i1 a1 m1 _ _ = addS i0 a0 m0 i1 a1 m1 $
                                              \ i2 a2 m2 -> ks i2 a2 m2 ()
                           ks' i1 a1 m1 _   = addS i0 a0 m0 i1 a1 m1 $
                                              \ i2 a2 m2 -> kf i2 a2 m2 []
                                                            "endOfInput"
                       in  runParser demandInput i0 a0 m0 kf' ks'
             else kf i0 a0 m0 [] "endOfInput"

-- | Return an indication of whether the end of input has been
-- reached.
atEnd :: Parser Bool
atEnd = not <$> wantInput
{-# INLINE atEnd #-}

-- | Match either a single newline character @\'\\n\'@, or a carriage
-- return followed by a newline character @\"\\r\\n\"@.
endOfLine :: Parser ()
endOfLine = (char '\n' >> return ()) <|> (string "\r\n" >> return ())

-- | Name the parser, in case failure occurs.
(<?>) :: Parser a
      -> String                 -- ^ the name to use if parsing fails
      -> Parser a
p <?> msg0 = T.Parser $ \i0 a0 m0 kf ks ->
             let kf' i a m strs msg = kf i a m (msg0:strs) msg
             in runParser p i0 a0 m0 kf' ks
{-# INLINE (<?>) #-}
infix 0 <?>

-- | Terminal failure continuation.
failK :: Failure a
failK i0 _a0 _m0 stack msg = Fail (unI i0) stack msg
{-# INLINE failK #-}

-- | Terminal success continuation.
successK :: Success a a
successK i0 _a0 _m0 a = Done (unI i0) a
{-# INLINE successK #-}

-- | Run a parser.
parse :: Parser a -> Text -> Result a
parse m s = runParser m (I s) (A T.empty) Incomplete failK successK
{-# INLINE parse #-}

-- | Run a parser that cannot be resupplied via a 'Partial' result.
parseOnly :: Parser a -> Text -> Either String a
parseOnly m s = case runParser m (I s) (A T.empty) Complete failK successK of
                  Fail _ _ err -> Left err
                  Done _ a     -> Right a
                  _            -> error "parseOnly: impossible error!"
{-# INLINE parseOnly #-}