src/Parsimony/Prim.hs

-----------------------------------------------------------------------------
-- |
-- Module      :  Parsimony.Prim
-- Copyright   :  (c) Daan Leijen 1999-2001, (c) Iavor S. Diatchki 2009
-- License     :  BSD3
--
-- Maintainer  :  iavor.diatchki@gmail.com
-- Stability   :  provisional
-- Portability :  portable
--
-- The primitive parser combinators.
--
-----------------------------------------------------------------------------

module Parsimony.Prim
  ( Parser, PrimParser, Reply(..)
  , runParser, primParser
  , parseError, try, lookAhead, labels
  , foldMany, foldManyWhile, skipMany, match
  , State(..), getState, updateState, mapState
  ) where

import Parsimony.Pos
import Parsimony.Error
import Control.Applicative(Applicative(..),Alternative(..))
import Control.Monad(liftM,ap,MonadPlus(..))


-- | A parser constructing values of type 'a', with an input
-- buffer of type 't'.
data Parser t a       = P { unP :: State t -> R t a }

-- NOTE: The order of the fields is important!
-- In the rest of the module we use the fact that pattern matching
-- happens left to right to ensure that if matching on the 'Bool'
-- fails, then we will not look at the 'Either' field.
data R s a            = R !Bool (Reply s a)

data Reply s a        = Ok !a !(State s)
                      | Error !ParseError


-- | The parser state.
data State t          = State { stateInput :: !t          -- ^ Token source
                              , statePos   :: !SourcePos  -- ^ Current position
                              }

type PrimParser s a   = State s -> Reply s a

-- | Define a primitive parser.
-- Consumes input on success.
{-# INLINE primParser #-}
primParser           :: PrimParser t a -> Parser t a
primParser prim       = P $ \s -> case prim s of
                                    r@(Error _) -> R False r
                                    r           -> R True r


{-# INLINE runParser #-}
-- | Convert a parser into a 'PrimParser'.
runParser            :: Parser t a -> PrimParser t a
runParser p s         = case unP p s of
                          R _ x -> x

-- | Access the current parser state.
-- Does not consume input.
{-# INLINE getState #-}
getState             :: Parser t (State t)
getState              = P $ \s -> R False (Ok s s)

-- | Modify the current parser state.
-- Returns the old state.
-- Does not consume input.
{-# INLINE updateState #-}
updateState          :: (State s -> State s) -> Parser s ()
updateState f         = P $ \s -> R False $! Ok () (f s)

-- | Change the input stream of a parser.
-- This is useful for extending the input stream with extra information.
-- The first function splits the extended state into a state
-- suitable for use by the given parser and some additional information.
-- The second function combines the extra infomration of the original
-- state with the new partial state, to compute a new extended state.
{-# INLINE mapState #-}
mapState            :: (State big -> (State small,extra))
                    -> (State small -> extra -> State big)
                    -> Parser small a -> Parser big a
mapState extract inject p  = P $ \big ->
  case extract big of
    (small,extra) ->
      case unP p small of
        -- XXX: strict
        R c r -> R c $ case r of
                         Error err    -> Error err
                         Ok a small1  -> Ok a (inject small1 extra)



-- | Fail with the given parser error without consuming any input.
-- The error is applied to the current source position.
{-# INLINE parseError #-}
parseError          :: (SourcePos -> ParseError) -> Parser t a
parseError e         = P $ \s -> R False $ Error $ e $ statePos s




-- | Allow a parser to back-track.  The resulting parser behaves like
-- the input parser unless it fails.  In that case,  we backtrack
-- without consuming any input.  Because we may have to back-track,
-- we keep a hold of the parser input so over-use of this function
-- may result in memory leaks.

{-# INLINE try #-}
try                :: Parser t a -> Parser t a
try p               = P $ \s ->
  case unP p s of
    R True (Error err)  -> R False $ Error $ setErrorPos (statePos s) err
    other               -> other


-- | Applies the given parser without consuming any input.
{-# INLINE lookAhead #-}
lookAhead          :: Parser t a -> Parser t a
lookAhead p         = P $ \s ->
  R False $ case unP p s of
              R _ (Error err) -> Error err
              R _ (Ok a _)    -> Ok a s

-- | The resulting parser behaves like the input parser,
-- except that in case of failure we use the given expectation
-- messages.
{-# INLINE labels #-}
labels             :: Parser t a -> [String] -> Parser t a
labels p msgs0      = P $ \s ->
  case unP p s of
    R c r -> R c (addErr r)

  where setExpectErrors err []         = setErrorMessage (Expect "") err
        setExpectErrors err [msg]      = setErrorMessage (Expect msg) err
        setExpectErrors err (msg:msgs) =
          foldr (\m e -> addErrorMessage (Expect m) e)
             (setErrorMessage (Expect msg) err) msgs

        addErr (Error e)  = Error $ setExpectErrors e msgs0
        addErr r          = r


-- | Apply a parser repeatedly, combining the results with the
-- given functions.  This function is similar to the strict 'foldl'.
-- We stop when an application of the parser fails without consuming any
-- input.  If the parser fails after it has consumed some input, then
-- the repeated parser will also fail.

{-# INLINE foldMany #-}
foldMany :: (b -> a -> b) -> b -> Parser t a -> Parser t b
foldMany cons nil p = P $ \s ->
  case unP p s of
    R False (Ok {})     -> crash "Parsimony.foldMany"
    R False (Error _)   -> R False $ Ok nil s
    R True  (Ok x s1)   -> R True  $ (walk $! cons nil x) s1
    R True  (Error err) -> R True  $ Error err

  -- NOTE: this is written like this because after the first iteration
  -- we already know weather the parser will be consuming input.
  where
  walk xs s =
    case unP p s of
      R False (Ok {})   -> crash "Parsimony.foldMany"
      R False (Error _) -> Ok xs s
      R True  (Ok x s1) -> (walk $! cons xs x) s1
      R True  (Error e) -> Error e


-- | Apply a parser repeatedly, combining the results with the
-- given functions.  This function is similar to the strict 'foldl'.
-- We stop on one of the following conditions:
--   * an application of the parser fails without consuming any input,
--   * the pearser returns 'Nothing' as a result.
-- If the parser fails after it has consumed some input, then
-- the repeated parser will also fail.

{-# INLINE foldManyWhile #-}
foldManyWhile :: (b -> a -> b) -> b -> Parser t (Maybe a) -> Parser t b
foldManyWhile cons nil p = P $ \s ->
  case unP p s of
    R False (Ok Nothing _)   -> R False $ Ok nil s
    R False (Ok {})          -> crash "Parsimony.foldManyWhile"
    R False (Error _)        -> R False $ Ok nil s
    R True  (Ok Nothing s1)  -> R True  $ Ok nil s1
    R True  (Ok (Just x) s1) -> R True  $ (walk $! cons nil x) s1
    R True  (Error err)      -> R True  $ Error err

  -- NOTE: this is written like this because after the first iteration
  -- we already know weather the parser will be consuming input.
  where
  walk xs s =
    case unP p s of
      R False (Ok Nothing _)    -> Ok xs s
      R False (Ok {})           -> crash "Parsimony.foldManyWhile"
      R False (Error _)         -> Ok xs s
      R True  (Ok Nothing s1)   -> Ok xs s1
      R True  (Ok (Just x) s1)  -> (walk $! cons xs x) s1
      R True  (Error e)         -> Error e


-- | Apply a parser repeatedly, ignoring the results.
-- We stop when an application of the parser fails without consuming any
-- input.  If the parser fails after it has consumed some input, then
-- the repeated parser will also fail.

{-# INLINE skipMany #-}
skipMany :: Parser t a -> Parser t ()
skipMany p = P $ \s ->
  -- pFold specialized for a common case

  case unP p s of
    R False (Ok {})     -> crash "Parsimony.skipMany"
    R False (Error _)   -> R False $ Ok () s
    R True  (Ok _ s1)   -> R True  $ walk s1
    R True  (Error err) -> R True  $ Error err

  -- NOTE: this is written like this because after the first iteration
  -- we already know weather the parser will be consuming input.
  where
  walk s =
    case unP p s of
      R False (Ok {})   -> crash "Parsimony.skipMany"
      R False (Error _) -> Ok () s
      R True  (Ok _ s1) -> walk s1
      R True  (Error e) -> Error e


-- | Produces a parser that succeeds if it can extract the list of values
-- specified by the list.
-- The function argument specifies how to show the expectations in
-- error messages.
match :: (Eq a) => (a -> String) -> [a] -> Parser t a -> Parser t ()
match sh goal p = P (outer goal)

  where
  expected x          = addErrorMessage (Expect (sh x))
  unexpected x pos    = newErrorMessage (UnExpect (sh x)) pos

  -- not yet consumed
  outer [] s      = R False $ Ok () s
  outer (x:xs) s  =
     case unP (labels p [sh x]) s of
       R False (Ok a s1)
         | x == a    -> outer xs s1
         | otherwise -> R False $ Error $ expected x $ unexpected a $ statePos s
       R False (Error e) -> R False $ Error e
       R True r -> R True $
         case r of
           Error e -> Error $ expected x e
           Ok a s1
             | x == a    -> inner xs s1
             | otherwise -> Error $ expected x $ unexpected a $ statePos s

  -- we consumed something
  inner [] s      = Ok () s
  inner (x:xs) s  =
    case unP (labels p [sh x]) s of
      R _ (Ok a s1)
        | x == a    -> inner xs s1
        | otherwise -> Error $ expected x $ unexpected a $ statePos s
      R _ (Error e) -> Error e



-- | We use to let the user know that we have entered an infinity loop.
crash :: String -> a
crash f = error $ f ++ " applied to a parser that accepts the empty string."

-- Instances -------------------------------------------------------------------

instance Functor (Parser t) where
  fmap = liftM

instance Monad (Parser t) where
  return a  = pure a
  p >>= f   = P $ \s ->
    case unP p s of
      R True r  -> R True $ case r of
                              Error e -> Error e
                              Ok a s1 ->
                                case unP (f a) s1 of
                                  R _ r1 -> r1
      R False r -> case r of
                     Error e  -> R False $ Error e
                     Ok a s1  -> unP (f a) s1

  fail m  = parseError (newErrorMessage (Message m))

instance Applicative (Parser t) where
  pure a  = P $ \s -> R False $ Ok a s
  (<*>)   = ap

instance Alternative (Parser t) where
  empty     = parseError newErrorUnknown
  p1 <|> p2 = P $ \s ->
    -- WARNING: It is important that we match the 'False' first
    -- because then we can quickly move to the second branch, without
    -- having to perform any actual parsing.
    case unP p1 s of
      R False (Error _) -> unP p2 s
      other             -> other


instance MonadPlus (Parser t) where
  mzero   = empty
  mplus   = (<|>)