Text/Parsec/Prim.hs

-----------------------------------------------------------------------------
-- |
-- Module      :  Text.Parsec.Prim
-- Copyright   :  (c) Daan Leijen 1999-2001, (c) Paolo Martini 2007
-- License     :  BSD-style (see the LICENSE file)
-- 
-- Maintainer  :  derek.a.elkins@gmail.com
-- Stability   :  provisional
-- Portability :  portable
-- 
-- The primitive parser combinators.
-- 
-----------------------------------------------------------------------------   

{-# LANGUAGE MultiParamTypeClasses, FunctionalDependencies, FlexibleContexts,
             UndecidableInstances #-}

module Text.Parsec.Prim where

import qualified Control.Applicative as Applicative ( Applicative(..), Alternative(..) )
import Control.Monad()
import Control.Monad.Trans
import Control.Monad.Identity

import Control.Monad.Reader.Class
import Control.Monad.State.Class
import Control.Monad.Cont.Class
import Control.Monad.Error.Class

import Text.Parsec.Pos
import Text.Parsec.Error

unknownError :: State s u -> ParseError
unknownError state        = newErrorUnknown (statePos state)

sysUnExpectError :: String -> SourcePos -> Reply s u a
sysUnExpectError msg pos  = Error (newErrorMessage (SysUnExpect msg) pos)

-- | The parser @unexpected msg@ always fails with an unexpected error
-- message @msg@ without consuming any input.
--
-- The parsers 'fail', ('<?>') and @unexpected@ are the three parsers
-- used to generate error messages. Of these, only ('<?>') is commonly
-- used. For an example of the use of @unexpected@, see the definition
-- of 'Text.Parsec.Combinator.notFollowedBy'.

unexpected :: (Stream s m t) => String -> ParsecT s u m a
unexpected msg
    = ParsecT $ \s -> return $ Empty $ return $ 
                        Error (newErrorMessage (UnExpect msg) (statePos s))

-- | ParserT monad transformer and Parser type

-- | @ParsecT s u m a@ is a parser with stream type @s@, user state type @u@,
-- underlying monad @m@ and return type @a@

data ParsecT s u m a
    = ParsecT { runParsecT :: State s u -> m (Consumed (m (Reply s u a))) }

type Parsec s u = ParsecT s u Identity

data Consumed a  = Consumed a
                 | Empty !a

data Reply s u a = Ok !a !(State s u) ParseError
                 | Error ParseError

data State s u = State {
      stateInput :: s,
      statePos   :: !SourcePos,
      stateUser  :: !u
    }

instance Functor Consumed where
    fmap f (Consumed x) = Consumed (f x)
    fmap f (Empty x)    = Empty (f x)

instance Functor (Reply s u) where
    fmap f (Ok x s e) = Ok (f x) s e
    fmap _ (Error e) = Error e -- XXX

instance (Monad m) => Functor (ParsecT s u m) where
    fmap f p = parsecMap f p

parsecMap :: (Monad m) => (a -> b) -> ParsecT s u m a -> ParsecT s u m b
parsecMap f p
    = ParsecT $ \s -> liftM (fmap (liftM (fmap f))) (runParsecT p s)

instance (Monad m) => Applicative.Applicative (ParsecT s u m) where
    pure = return
    (<*>) = ap -- TODO: Can this be optimized?

instance (Monad m) => Applicative.Alternative (ParsecT s u m) where
    empty = mzero
    (<|>) = mplus

instance (Monad m) => Monad (ParsecT s u m) where
    return x = parserReturn x
    p >>= f  = parserBind p f
    fail msg = parserFail msg

instance (MonadIO m) => MonadIO (ParsecT s u m) where
    liftIO = lift . liftIO

instance (MonadReader r m) => MonadReader r (ParsecT s u m) where
    ask = lift ask
    local f p = ParsecT $ \s -> local f (runParsecT p s)

-- I'm presuming the user might want a separate, non-backtracking
-- state aside from the Parsec user state.
instance (MonadState s m) => MonadState s (ParsecT s' u m) where
    get = lift get
    put = lift . put

instance (MonadCont m) => MonadCont (ParsecT s u m) where
    callCC f = ParsecT $ \s ->
          callCC $ \c ->
          runParsecT (f (\a -> ParsecT $ \s' -> c (pack s' a))) s

     where pack s a= Empty $ return (Ok a s (unknownError s))

instance (MonadError e m) => MonadError e (ParsecT s u m) where
    throwError = lift . throwError
    p `catchError` h = ParsecT $ \s ->
        runParsecT p s `catchError` \e ->
            runParsecT (h e) s

parserReturn :: (Monad m) => a -> ParsecT s u m a
parserReturn x
    = ParsecT $ \s -> return $ Empty $ return (Ok x s (unknownError s))

parserBind :: (Monad m)
           => ParsecT s u m a -> (a -> ParsecT s u m b) -> ParsecT s u m b

parserBind p f
    = ParsecT $ \s -> do -- TODO: This was \s@(State _ u _) ???
        res1 <- runParsecT p s
        case res1 of

          Empty mReply1
            -> do reply1 <- mReply1
                  case reply1 of
                    Ok x s' err1 -> do
                      res2 <- runParsecT (f x) s'
                      case res2 of
                        Empty mReply2
                          -> do reply2 <- mReply2
                                return $ Empty $
                                            return $ mergeErrorReply err1 reply2
                        other
                          -> do return $ other
                    Error err1 -> return $ Empty $ return $ Error err1

          Consumed mReply1
            -> do reply1 <- mReply1
                  return $ Consumed $ -- `early' returning
                    case reply1 of
                      Ok x s' err1 -> do
                        res2 <- runParsecT (f x) s'
                        case res2 of
                          Empty mReply2
                            -> do reply2 <- mReply2
                                  return $ mergeErrorReply err1 reply2
                          Consumed reply2 -> reply2
                      Error err1   -> return $ Error err1


mergeErrorReply :: ParseError -> Reply s u a -> Reply s u a
mergeErrorReply err1 reply -- XXX where to put it?
    = case reply of
        Ok x state err2 -> Ok x state (mergeError err1 err2)
        Error err2      -> Error (mergeError err1 err2)

parserFail :: (Monad m) => String -> ParsecT s u m a
parserFail msg
    = ParsecT $ \s -> return $ Empty $ return $
        Error (newErrorMessage (Message msg) (statePos s))

instance (Monad m) => MonadPlus (ParsecT s u m) where
    mzero = parserZero
    mplus p1 p2 = parserPlus p1 p2

-- | @parserZero@ always fails without consuming any input. @parserZero@ is defined
-- equal to the 'mzero' member of the 'MonadPlus' class and to the 'Control.Applicative.empty' member 
-- of the 'Control.Applicative.Applicative' class.

parserZero :: (Monad m) => ParsecT s u m a
parserZero
    = ParsecT $ \s -> return $ Empty $ return $ Error (unknownError s)

parserPlus :: (Monad m)
           => ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
parserPlus (ParsecT p1) (ParsecT p2)
    = ParsecT $ \s -> do
        c1 <- p1 s
        case c1 of
          Empty mReply1
            -> do r1 <- mReply1
                  case r1 of
                    Error err -> do
                      c2 <- p2 s
                      case c2 of
                        Empty mReply2
                          -> do reply2 <- mReply2
                                return $ Empty $ return (mergeErrorReply err reply2)
                        consumed
                          -> return $ consumed
                    other -> return $ Empty $ return $ other
          other -> return $ other

instance MonadTrans (ParsecT s u) where
    lift amb = ParsecT $ \s -> do
                 a <- amb
                 return $ Empty $ return $ Ok a s (unknownError s)

infix  0 <?>
infixr 1 <|>

-- | The parser @p <?> msg@ behaves as parser @p@, but whenever the
-- parser @p@ fails /without consuming any input/, it replaces expect
-- error messages with the expect error message @msg@.
--
-- This is normally used at the end of a set alternatives where we want
-- to return an error message in terms of a higher level construct
-- rather than returning all possible characters. For example, if the
-- @expr@ parser from the 'try' example would fail, the error
-- message is: '...: expecting expression'. Without the @(\<?>)@
-- combinator, the message would be like '...: expecting \"let\" or
-- letter', which is less friendly.

(<?>) :: (Monad m)
      => (ParsecT s u m a) -> String -> (ParsecT s u m a)
p <?> msg = label p msg

-- | This combinator implements choice. The parser @p \<|> q@ first
-- applies @p@. If it succeeds, the value of @p@ is returned. If @p@
-- fails /without consuming any input/, parser @q@ is tried. This
-- combinator is defined equal to the 'mplus' member of the 'MonadPlus'
-- class and the ('Control.Applicative.<|>') member of 'Control.Applicative.Alternative'.
--
-- The parser is called /predictive/ since @q@ is only tried when
-- parser @p@ didn't consume any input (i.e.. the look ahead is 1).
-- This non-backtracking behaviour allows for both an efficient
-- implementation of the parser combinators and the generation of good
-- error messages.

(<|>) :: (Monad m)
      => (ParsecT s u m a) -> (ParsecT s u m a) -> (ParsecT s u m a)
p1 <|> p2 = mplus p1 p2

label :: (Monad m) => ParsecT s u m a -> String -> ParsecT s u m a
label p msg
  = labels p [msg]

labels :: (Monad m) => ParsecT s u m a -> [String] -> ParsecT s u m a
labels p msgs
    = ParsecT $ \s -> do
        r <- runParsecT p s
        case r of
          Empty mReply -> do
            reply <- mReply
            return $ Empty $ case reply of
              Error err
                -> return $ Error (setExpectErrors err msgs)
              Ok x s' err
                | errorIsUnknown err -> return $ reply
                | otherwise -> return (Ok x s' (setExpectErrors err msgs))
          other        -> return $ other
    where
        setExpectErrors err []         = setErrorMessage (Expect "") err
        setExpectErrors err [msg]      = setErrorMessage (Expect msg) err
        setExpectErrors err (msg:msgs)
            = foldr (\msg' err' -> addErrorMessage (Expect msg') err')
                    (setErrorMessage (Expect msg) err) msgs

-- TODO: There should be a stronger statement that can be made about this

-- | An instance of @Stream@ has stream type @s@, underlying monad @m@ and token type @t@ determined by the stream
-- 
-- Some rough guidelines for a \"correct\" instance of Stream:
--
--    * unfoldM uncons gives the [t] corresponding to the stream
--
--    * A @Stream@ instance is responsible for maintaining the \"position within the stream\" in the stream state @s@.  This is trivial unless you are using the monad in a non-trivial way.

class (Monad m) => Stream s m t | s -> t where
    uncons :: s -> m (Maybe (t,s))

tokens :: (Stream s m t, Eq t)
       => ([t] -> String)      -- Pretty print a list of tokens
       -> (SourcePos -> [t] -> SourcePos)
       -> [t]                  -- List of tokens to parse
       -> ParsecT s u m [t]
tokens _ _ []
    = ParsecT $ \s -> return $ Empty $ return $ Ok [] s (unknownError s)
tokens showTokens nextposs tts@(tok:toks)
    = ParsecT $ \(State input pos u) -> 
    let
        errEof = return $ Error (setErrorMessage (Expect (showTokens tts))
                                 (newErrorMessage (SysUnExpect "") pos))
        errExpect x = return $ Error (setErrorMessage (Expect (showTokens tts))
                                 (newErrorMessage (SysUnExpect (showTokens [x])) pos))
        walk []     rs = return (ok rs)
        walk (t:ts) rs = do
          sr <- uncons rs
          case sr of
            Nothing                 -> errEof
            Just (x,xs) | t == x    -> walk ts xs
                        | otherwise -> errExpect x
        ok rs = let pos' = nextposs pos tts
                    s' = State rs pos' u
                in Ok tts s' (newErrorUnknown pos')
    in do
        sr <- uncons input
        return $ case sr of
            Nothing         -> Empty    $ errEof
            Just (x,xs)
                | tok == x    -> Consumed $ walk toks xs
                | otherwise -> Empty    $ errExpect x
        
-- | The parser @try p@ behaves like parser @p@, except that it
-- pretends that it hasn't consumed any input when an error occurs.
--
-- This combinator is used whenever arbitrary look ahead is needed.
-- Since it pretends that it hasn't consumed any input when @p@ fails,
-- the ('<|>') combinator will try its second alternative even when the
-- first parser failed while consuming input.
--
-- The @try@ combinator can for example be used to distinguish
-- identifiers and reserved words. Both reserved words and identifiers
-- are a sequence of letters. Whenever we expect a certain reserved
-- word where we can also expect an identifier we have to use the @try@
-- combinator. Suppose we write:
--
-- >  expr        = letExpr <|> identifier <?> "expression"
-- >
-- >  letExpr     = do{ string "let"; ... }
-- >  identifier  = many1 letter
--
-- If the user writes \"lexical\", the parser fails with: @unexpected
-- \'x\', expecting \'t\' in \"let\"@. Indeed, since the ('<|>') combinator
-- only tries alternatives when the first alternative hasn't consumed
-- input, the @identifier@ parser is never tried (because the prefix
-- \"le\" of the @string \"let\"@ parser is already consumed). The
-- right behaviour can be obtained by adding the @try@ combinator:
--
-- >  expr        = letExpr <|> identifier <?> "expression"
-- >
-- >  letExpr     = do{ try (string "let"); ... }
-- >  identifier  = many1 letter

try :: (Stream s m t) => ParsecT s u m a -> ParsecT s u m a
try (ParsecT p)
    = ParsecT $ \s@(State _ pos _) -> do
        res <- p s
        case res of
          Consumed rep -> do r <- rep
                             case r of
                               Error err -> return $ Empty $ return $ Error
                                                         (setErrorPos pos err)
                               ok        -> return $ Consumed $ return $ ok
          empty        -> return $ empty

-- | The parser @token showTok posFromTok testTok@ accepts a token @t@
-- with result @x@ when the function @testTok t@ returns @'Just' x@. The
-- source position of the @t@ should be returned by @posFromTok t@ and
-- the token can be shown using @showTok t@.
--
-- This combinator is expressed in terms of 'tokenPrim'.
-- It is used to accept user defined token streams. For example,
-- suppose that we have a stream of basic tokens tupled with source
-- positions. We can than define a parser that accepts single tokens as:
--
-- >  mytoken x
-- >    = token showTok posFromTok testTok
-- >    where
-- >      showTok (pos,t)     = show t
-- >      posFromTok (pos,t)  = pos
-- >      testTok (pos,t)     = if x == t then Just t else Nothing

token :: (Stream s Identity t)
      => (t -> String)            -- ^ Token pretty-printing function.
      -> (t -> SourcePos)         -- ^ Computes the position of a token.
      -> (t -> Maybe a)           -- ^ Matching function for the token to parse.
      -> Parsec s u a
token showToken tokpos test = tokenPrim showToken nextpos test
    where
        nextpos _ tok ts = case runIdentity (uncons ts) of
                             Nothing -> tokpos tok
                             Just (tok',_) -> tokpos tok'

-- | The parser @token showTok nextPos testTok@ accepts a token @t@
-- with result @x@ when the function @testTok t@ returns @'Just' x@. The
-- token can be shown using @showTok t@. The position of the /next/
-- token should be returned when @nextPos@ is called with the current
-- source position @pos@, the current token @t@ and the rest of the
-- tokens @toks@, @nextPos pos t toks@.
--
-- This is the most primitive combinator for accepting tokens. For
-- example, the 'Text.Parsec.Char.char' parser could be implemented as:
--
-- >  char c
-- >    = tokenPrim showChar nextPos testChar
-- >    where
-- >      showChar x        = "'" ++ x ++ "'"
-- >      testChar x        = if x == c then Just x else Nothing
-- >      nextPos pos x xs  = updatePosChar pos x

tokenPrim :: (Stream s m t)
          => (t -> String)                      -- ^ Token pretty-printing function.
          -> (SourcePos -> t -> s -> SourcePos) -- ^ Next position calculating function.
          -> (t -> Maybe a)                     -- ^ Matching function for the token to parse.
          -> ParsecT s u m a
tokenPrim showToken nextpos test = tokenPrimEx showToken nextpos Nothing test

tokenPrimEx :: (Stream s m t)
            => (t -> String)      
            -> (SourcePos -> t -> s -> SourcePos)
            -> Maybe (SourcePos -> t -> s -> u -> u)
            -> (t -> Maybe a)     
            -> ParsecT s u m a
tokenPrimEx showToken nextpos mbNextState test
    = case mbNextState of
        Nothing
          -> ParsecT $ \(State input pos user) -> do
              r <- uncons input
              case r of
                Nothing -> return $ Empty $ return (sysUnExpectError "" pos)
                Just (c,cs)
                  -> case test c of
                       Just x  -> let newpos   = nextpos pos c cs
                                      newstate = State cs newpos user
                                  in seq newpos $ seq newstate $
                                     return $ Consumed $ return $
                                       (Ok x newstate (newErrorUnknown newpos))
                       Nothing -> return $ Empty $ return $
                                    (sysUnExpectError (showToken c) pos)
        Just nextState
          -> ParsecT $ \(State input pos user) -> do
              r <- uncons input
              case r of
                Nothing -> return $ Empty $ return (sysUnExpectError "" pos)
                Just (c,cs)
                  -> case test c of
                       Just x  -> let newpos   = nextpos pos c cs
                                      newuser  = nextState pos c cs user
                                      newstate = State cs newpos newuser
                                  in seq newpos $ seq newstate $
                                     return $ Consumed $ return $
                                       (Ok x newstate (newErrorUnknown newpos))
                       Nothing -> return $ Empty $ return $
                                    (sysUnExpectError (showToken c) pos)

-- | @many p@ applies the parser @p@ /zero/ or more times. Returns a
--    list of the returned values of @p@.
--
-- >  identifier  = do{ c  <- letter
-- >                  ; cs <- many (alphaNum <|> char '_')
-- >                  ; return (c:cs)
-- >                  }

many :: (Stream s m t) => ParsecT s u m a -> ParsecT s u m [a]
many p
  = do xs <- manyAccum (:) p
       return (reverse xs)

-- | @skipMany p@ applies the parser @p@ /zero/ or more times, skipping
-- its result.
--
-- >  spaces  = skipMany space

skipMany :: (Stream s m t) => ParsecT s u m a -> ParsecT s u m ()
skipMany p
  = do manyAccum (\_ _ -> []) p
       return ()

manyAccum :: (Stream s m t)
          => (a -> [a] -> [a])
          -> ParsecT s u m a
          -> ParsecT s u m [a]
manyAccum accum p
    = ParsecT $ \s ->
        let walk xs state mr
              = do r <- mr
                   case r of
                     Empty mReply
                         -> do reply <- mReply
                               case reply of
                                 Error err -> return $ Ok xs state err
                                 _         -> error "Text.Parsec.Prim.many: combinator 'many' is applied to a parser that accepts an empty string."
                     Consumed mReply
                         -> do reply <- mReply
                               case reply of
                                 Error err
                                     -> return $ Error err
                                 Ok x s' _err
                                     -> let ys = accum x xs
                                        in seq ys (walk ys s' (runParsecT p s'))
        in do r <- runParsecT p s
              case r of
                Empty mReply
                    -> do reply <- mReply
                          case reply of
                            Ok _ _ _
                                -> error "Text.ParserCombinators.Parsec.Prim.many: combinator 'many' is applied to a parser that accepts an empty string."
                            Error err
                                -> return $ Empty $ return (Ok [] s err)
                consumed
                    -> return $ Consumed $ walk [] s (return consumed)


-- < Running a parser: monadic (runPT) and pure (runP)

runPT :: (Stream s m t)
      => ParsecT s u m a -> u -> SourceName -> s -> m (Either ParseError a)
runPT p u name s
    = do res <- runParsecT p (State s (initialPos name) u)
         r <- parserReply res
         case r of
           Ok x _ _  -> return (Right x)
           Error err -> return (Left err)
    where
        parserReply res
            = case res of
                Consumed r -> r
                Empty    r -> r

runP :: (Stream s Identity t)
     => Parsec s u a -> u -> SourceName -> s -> Either ParseError a
runP p u name s = runIdentity $ runPT p u name s

-- | The most general way to run a parser. @runParserT p state filePath
-- input@ runs parser @p@ on the input list of tokens @input@,
-- obtained from source @filePath@ with the initial user state @st@.
-- The @filePath@ is only used in error messages and may be the empty
-- string. Returns a computation in the underlying monad @m@ that return either a 'ParseError' ('Left') or a
-- value of type @a@ ('Right').

runParserT :: (Stream s m t)
           => ParsecT s u m a -> u -> SourceName -> s -> m (Either ParseError a)
runParserT = runPT

-- | The most general way to run a parser over the Identity monad. @runParser p state filePath
-- input@ runs parser @p@ on the input list of tokens @input@,
-- obtained from source @filePath@ with the initial user state @st@.
-- The @filePath@ is only used in error messages and may be the empty
-- string. Returns either a 'ParseError' ('Left') or a
-- value of type @a@ ('Right').
--
-- >  parseFromFile p fname
-- >    = do{ input <- readFile fname
-- >        ; return (runParser p () fname input)
-- >        }

runParser :: (Stream s Identity t)
          => Parsec s u a -> u -> SourceName -> s -> Either ParseError a
runParser = runP

-- | @parse p filePath input@ runs a parser @p@ over Identity without user
-- state. The @filePath@ is only used in error messages and may be the
-- empty string. Returns either a 'ParseError' ('Left')
-- or a value of type @a@ ('Right').
--
-- >  main    = case (parse numbers "" "11, 2, 43") of
-- >             Left err  -> print err
-- >             Right xs  -> print (sum xs)
-- >
-- >  numbers = commaSep integer

parse :: (Stream s Identity t)
      => Parsec s () a -> SourceName -> s -> Either ParseError a
parse p = runP p ()

-- | The expression @parseTest p input@ applies a parser @p@ against
-- input @input@ and prints the result to stdout. Used for testing
-- parsers.

parseTest :: (Stream s Identity t, Show a)
          => Parsec s () a -> s -> IO ()
parseTest p input
    = case parse p "" input of
        Left err -> do putStr "parse error at "
                       print err
        Right x  -> print x

-- < Parser state combinators

-- | Returns the current source position. See also 'SourcePos'.

getPosition :: (Monad m) => ParsecT s u m SourcePos
getPosition = do state <- getParserState
                 return (statePos state)

-- | Returns the current input 

getInput :: (Monad m) => ParsecT s u m s
getInput = do state <- getParserState
              return (stateInput state)

-- | @setPosition pos@ sets the current source position to @pos@. 

setPosition :: (Monad m) => SourcePos -> ParsecT s u m ()
setPosition pos
    = do updateParserState (\(State input _ user) -> State input pos user)
         return ()

-- | @setInput input@ continues parsing with @input@. The 'getInput' and
-- @setInput@ functions can for example be used to deal with #include
-- files. 

setInput :: (Monad m) => s -> ParsecT s u m ()
setInput input
    = do updateParserState (\(State _ pos user) -> State input pos user)
         return ()

-- | Returns the full parser state as a 'State' record.

getParserState :: (Monad m) => ParsecT s u m (State s u)
getParserState = updateParserState id

-- | @setParserState st@ set the full parser state to @st@. 

setParserState :: (Monad m) => State s u -> ParsecT s u m (State s u)
setParserState st = updateParserState (const st)

-- | @updateParserState f@ applies function @f@ to the parser state.

updateParserState :: (Monad m)
                  => (State s u -> State s u) -> ParsecT s u m (State s u)
updateParserState f
    = ParsecT $ \s -> let s' = f s
                      in return $ Empty $ return (Ok s' s' (unknownError s'))

-- < User state combinators

-- | Returns the current user state. 

getState :: (Monad m) => ParsecT s u m u
getState = stateUser `liftM` getParserState

-- | @putState st@ set the user state to @st@. 

putState :: (Monad m) => u -> ParsecT s u m ()
putState u = do updateParserState $ \s -> s { stateUser = u }
                return ()

-- | @updateState f@ applies function @f@ to the user state. Suppose
-- that we want to count identifiers in a source, we could use the user
-- state as:
--
-- >  expr  = do{ x <- identifier
-- >            ; updateState (+1)
-- >            ; return (Id x)
-- >            }

modifyState :: (Monad m) => (u -> u) -> ParsecT s u m ()
modifyState f = do updateParserState $ \s -> s { stateUser = f (stateUser s) }
                   return ()

-- XXX Compat

-- | An alias for putState for backwards compatibility.

setState :: (Monad m) => u -> ParsecT s u m ()
setState = putState

-- | An alias for modifyState for backwards compatibility.

updateState :: (Monad m) => (u -> u) -> ParsecT s u m ()
updateState = modifyState