-----------------------------------------------------------------------------
-- |
-- Module      :  Data.ParserCombinators.Attoparsec.Internal
-- Copyright   :  Daan Leijen 1999-2001, Jeremy Shaw 2006, Bryan O'Sullivan 2007-2008
-- License     :  BSD3
-- 
-- Maintainer  :  bos@serpentine.com
-- Stability   :  experimental
-- Portability :  unknown
--
-- Simple, efficient parser combinators for lazy 'LB.ByteString'
-- strings, loosely based on 'Text.ParserCombinators.Parsec'.
-- 
-----------------------------------------------------------------------------
module Data.ParserCombinators.Attoparsec.Internal
    (
    -- * Parser
      ParseError
    , Parser

    -- * Running parsers
    , parse
    , parseAt
    , parseTest
    , subParse

    -- * Combinators
    , (<?>)

    -- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@)
    , try
    , choice
    , manyTill
    , eof
    , notFollowedBy
    , skipMany
    , skipMany1
    , count
    , lookAhead
    , peek
    , sepBy
    , sepBy1

    -- * Things like in @Parsec.Char@
    , satisfy
    , anyWord8
    , word8
    , notWord8
    , string
    , stringTransform

    -- * Parser converters.
    , eitherP
    , eitherF

    -- * Miscellaneous functions.
    , err
    , getInput
    , getConsumed
    , takeWhile
    , takeWhile1
    , takeTill
    , takeAll
    , skipWhile
    , notEmpty
    , match
    ) where

import Control.Applicative
    (Alternative(..), Applicative(..), (<$>), (<*), (*>))
import Control.Monad (MonadPlus(..), ap, liftM2)
import Control.Monad.Fix (MonadFix(..))
import qualified Data.ByteString as SB
import qualified Data.ByteString.Lazy as LB
import qualified Data.ByteString.Lazy.Internal as LB
import Data.Int (Int64)
import Data.Word (Word8)
import Prelude hiding (takeWhile)

type ParseError = String

data S = S {-# UNPACK #-} !SB.ByteString
           LB.ByteString
           {-# UNPACK #-} !Int64

newtype Parser a = Parser {
      unParser :: S -> Either (LB.ByteString, [String]) (a, S)
    }

instance Functor Parser where
    fmap f p =
        Parser $ \s ->
            case unParser p s of
              Right (a, s') -> Right (f a, s')
              Left err -> Left err

instance Monad Parser where
    return a = Parser $ \s -> Right (a, s)
    m >>= f = Parser $ \s ->
              case unParser m s of
                Right (a, s') -> unParser (f a) s'
                Left (s', msgs) -> Left (s', msgs)
    fail err = Parser $ \(S sb lb _) -> Left (sb +: lb, [err])

instance MonadFix Parser where
    mfix f = Parser $ \s ->
             let r = case r of
                       Right (a, _) -> unParser (f a) s
                       err -> err
             in r

zero :: Parser a
zero = Parser $ \(S sb lb _) -> Left (sb +: lb, [])
{-# INLINE zero #-}

plus :: Parser a -> Parser a -> Parser a
plus p1 p2 =
    Parser $ \s@(S sb lb _) ->
        case unParser p1 s of
          Left (_, msgs1) -> 
              case unParser p2 s of
                Left (_, msgs2) -> Left (sb +: lb, (msgs1 ++ msgs2))
                ok -> ok
          ok -> ok
{-# INLINE plus #-}

instance MonadPlus Parser where
    mzero = zero
    mplus = plus

instance Applicative Parser where
    pure = return
    (<*>) = ap

instance Alternative Parser where
    empty = zero
    (<|>) = plus

mkState :: LB.ByteString -> Int64 -> S
mkState s = case s of
              LB.Empty -> S SB.empty s
              LB.Chunk x xs -> S x xs

-- | Turn our chunked representation back into a normal lazy
-- ByteString.
(+:) :: SB.ByteString -> LB.ByteString -> LB.ByteString
sb +: lb | SB.null sb = lb
         | otherwise = LB.chunk sb lb
{-# INLINE (+:) #-}

infix 0 <?>

-- | Name the parser.
(<?>) :: Parser a -> String -> Parser a
p <?> msg =
    Parser $ \s@(S sb lb _) ->
        case unParser p s of
          (Left _) -> Left (sb +: lb, [msg])
          ok -> ok
{-# INLINE (<?>) #-}

nextChunk :: Parser ()
nextChunk = Parser $ \(S _ lb n) ->
            case lb of
              LB.Chunk sb' lb' -> Right ((), S sb' lb' n)
              LB.Empty -> Left (lb, [])

-- | Get remaining input.
getInput :: Parser LB.ByteString
getInput = Parser $ \s@(S sb lb _) -> Right (sb +: lb, s)

-- | Get number of bytes consumed so far.
getConsumed :: Parser Int64
getConsumed = Parser $ \s@(S _ _ n) -> Right (n, s)

-- | Character parser.
satisfy :: (Word8 -> Bool) -> Parser Word8
satisfy p =
    Parser $ \s@(S sb lb n) ->
           case SB.uncons sb of
             Just (c, sb') | p c -> Right (c, S sb' lb (n + 1))
                           | otherwise -> Left (sb +: lb, [])
             Nothing -> unParser (nextChunk >> satisfy p) s
{-# INLINE satisfy #-}

anyWord8 :: Parser Word8
anyWord8 = satisfy $ const True
{-# INLINE anyWord8 #-}

-- | Satisfy a specific character.
word8 :: Word8 -> Parser Word8
word8 c = satisfy (== c) <?> show c
{-# INLINE word8 #-}

-- | Satisfy a specific character.
notWord8 :: Word8 -> Parser Word8
notWord8 c = satisfy (/= c) <?> "not " ++ show c
{-# INLINE notWord8 #-}

sepBy :: Parser a -> Parser s -> Parser [a]
sepBy p s = liftM2 (:) p ((s >> sepBy1 p s) <|> return []) <|> return []

sepBy1 :: Parser a -> Parser s -> Parser [a]
sepBy1 p s = liftM2 (:) p ((s >> sepBy1 p s) <|> return [])

-- | Satisfy a literal string.
string :: LB.ByteString -> Parser LB.ByteString
string s = Parser $ \(S sb lb n) ->
           let bs = sb +: lb
               l = LB.length s
               (h, t) = LB.splitAt l bs
           in if s == h
              then Right (s, mkState t (n + l))
              else Left (bs, [])
{-# INLINE string #-}

-- | Satisfy a literal string, after applying a transformation to both
-- it and the matching text.
stringTransform :: (LB.ByteString -> LB.ByteString) -> LB.ByteString
                -> Parser LB.ByteString
stringTransform f s = Parser $ \(S sb lb n) ->
             let bs = sb +: lb
                 l = LB.length s
                 (h, t) = LB.splitAt l bs
             in if fs == f h
                then Right (s, mkState t (n + l))
                else Left (bs, [])
    where fs = f s
{-# INLINE stringTransform #-}

-- | Apply the given parser repeatedly, returning every parse result.
count :: Int -> Parser a -> Parser [a]
count n p = sequence (replicate n p)
{-# INLINE count #-}

try :: Parser a -> Parser a
try p = Parser $ \s@(S sb lb _) ->
        case unParser p s of
          Left (_, msgs) -> Left (sb +: lb, msgs)
          ok -> ok

-- | Try multiple parsers.
choice :: [Parser a] -> Parser a
choice = foldr1 (<|>)

-- | Detect 'end of file'.
eof :: Parser ()
eof = Parser $ \s@(S sb lb _) -> if SB.null sb && LB.null lb
                                 then Right ((), s)
                                 else Left (sb +: lb, ["EOF"])

notFollowedBy :: Parser a -> Parser ()
notFollowedBy p = Parser $ \s@(S sb lb _) ->
                  case unParser p s of
                    Left (_, _) -> Right ((), s)
                    _ -> Left (sb +: lb, [])

takeAll :: Parser LB.ByteString
takeAll = Parser $ \(S sb lb n) ->
          let bs = sb +: lb
          in Right (bs, mkState LB.empty (n + LB.length bs))

oneChunk :: SB.ByteString -> LB.ByteString
oneChunk s = LB.chunk s LB.Empty

length64 :: SB.ByteString -> Int64
length64 = fromIntegral . SB.length

-- | Consume characters while the predicate is true.
takeWhile :: (Word8 -> Bool) -> Parser LB.ByteString
takeWhile p = Parser $ \s@(S sb lb n) ->
              let (h, t) = SB.span p sb
              in if SB.null t && (not . LB.null) lb
                 then unParser ((h +:) <$> (nextChunk *> takeWhile p)) s
                 else Right (oneChunk h, S t lb (n + length64 h))
{-# INLINE takeWhile #-}

takeTill :: (Word8 -> Bool) -> Parser LB.ByteString
takeTill p = takeWhile (not . p) <* satisfy p
{-# INLINE takeTill #-}

takeWhile1 :: (Word8 -> Bool) -> Parser LB.ByteString
takeWhile1 p = Parser $ \s@(S sb lb n) ->
               case unParser (takeWhile p) s of
                 Left err -> Left err
                 Right (bs, s') -> if LB.null bs
                                   then Left (sb +: lb, [msg])
                                   else Right (bs, s')
    where msg = "Expected at least one byte to satisfy criterion."
{-# INLINE takeWhile1 #-}

-- | Skip over characters while the predicate is true.
skipWhile :: (Word8 -> Bool) -> Parser ()
skipWhile p = takeWhile p >> return ()
{-# INLINE skipWhile #-}

manyTill :: Parser a -> Parser b -> Parser [a]
manyTill p end = scan
    where scan = (end >> return []) <|> liftM2 (:) p scan

-- |'skipMany' - skip zero or many instances of the parser
skipMany :: Parser a -> Parser ()
skipMany p = scan
    where scan = (p >> scan) <|> return ()

-- |'skipMany1' - skip one or many instances of the parser       
skipMany1 :: Parser  a -> Parser ()
skipMany1 p = p >> skipMany p

-- | Test that a parser returned a non-null ByteString.
notEmpty :: Parser LB.ByteString -> Parser LB.ByteString 
notEmpty p = Parser $ \s ->
             case unParser p s of
               o@(Right (a, _)) ->
                   if LB.null a
                   then Left (a, ["notEmpty"])
                   else o
               x -> x

-- | Parse some input with the given parser and return that input
-- without copying it.
match :: Parser a -> Parser LB.ByteString
match p = do bs <- getInput
             start <- getConsumed
             p
             end <- getConsumed
             return (LB.take (end - start) bs)

eitherP :: Parser a -> Parser b -> Parser (Either a b)
eitherP a b = (Left <$> a) <|> (Right <$> b)
{-# INLINE eitherP #-}

eitherF :: Parser a -> Parser (Either (LB.ByteString, [String]) a) 
eitherF p = Parser $ \s ->
            case unParser p s of
              Right (m, s') -> Right (Right m, s')
              Left err -> Right (Left err, s)
{-# INLINE eitherF #-}

err :: [String] -> Parser a
err strings = Parser $ \(S sb lb _) -> Left (sb +: lb, strings)
{-# INLINE err #-}

subParse :: Parser a -> LB.ByteString -> Parser a
subParse p bs = Parser $ \s@(S sb lb _) ->
                case unParser p (mkState bs 0) of
                  Left (_, msg) -> Left (sb +: lb, "sub parse failure" : msg)
                  Right (x, _) -> Right (x, s)
{-# INLINE subParse #-}

peek :: Parser a -> Parser (Maybe a)
peek p = Parser $ \s ->
         case unParser p s of
           Right (m, _) -> Right (Just m, s)
           _ -> Right (Nothing, s)

lookAhead :: Parser a -> Parser a
lookAhead p = Parser $ \s ->
         case unParser p s of
           Right (m, _) -> Right (m, s)
           Left (e, bs) -> Left (e, bs)

parseAt :: Parser a -> LB.ByteString -> Int64
        -> (LB.ByteString, Either ParseError (a, Int64))
parseAt p bs n = 
    case unParser p (mkState bs n) of
      Left (bs', msg) -> (bs', Left $ showError msg)
      Right (a, S sb lb n') -> (sb +: lb, Right (a, n'))
    where
      showError [msg] = "Parser error, expected:\n" ++ msg ++ "\n"
      showError msgs = "Parser error, expected one of:\n" ++ unlines msgs

-- | Run a parser.
parse :: Parser a -> LB.ByteString
      -> (LB.ByteString, Either ParseError a)
parse p bs = case parseAt p bs 0 of
               (bs', Right (a, _)) -> (bs', Right a)
               (bs', Left err) -> (bs', Left err)

parseTest :: (Show a) => Parser a -> LB.ByteString -> IO ()
parseTest p s =
    case parse p s of
      (st, Left msg) -> putStrLn $ msg ++ "\nGot:\n" ++ show st
      (_, Right r) -> print r