Text/Parcom/Core.hs

-- | The core functionality of a Parcom parser: defining and running parsers,
-- lifting, getting tokens and characters from a stream, and the most basic
-- primitive parsers and combinators that cannot easily be expressed in terms
-- of other parsers and combinators.
{-#LANGUAGE MultiParamTypeClasses #-}
module Text.Parcom.Core
( ParcomError (..)
, SourcePosition (..)
, ParcomT, parseT
, Parcom, parse
, peek, next, atEnd
, try, handle, handleB
, notFollowedBy
, (<?>), (<|>), empty
, Stream, Token, Listish, Textish
, peekChar, nextChar
)
where

import qualified Text.Parcom.Stream as Stream
import Text.Parcom.Stream (Stream, Token, Listish, Textish)
import Control.Monad (liftM, ap)
import Control.Applicative
import Control.Monad.Identity
import Control.Monad.Trans.Class

-- | Represents a position in a source file. Both lines and columns are
-- 1-based.
data SourcePosition =
    SourcePosition
        { posFileName :: String
        , posLine :: Int
        , posColumn :: Int
        }
        deriving (Show)

-- | A parser error.
data ParcomError =
    ParcomError
        { peErrorDescription :: String -- ^ Human-readable description of the error
        , peSourcePosition :: SourcePosition -- ^ Position in the source where the error was found.
        }
        deriving (Show)

-- | The parser's internal state.
data ParcomState s =
    ParcomState
        { psSourcePosition :: SourcePosition -- ^ Current source position, for error reporting
        , psStream :: s -- ^ The remaining source stream
        }

-- | Parcom as a pure parser
type Parcom s t a = ParcomT s t Identity a

-- | Parcom as a monad transformer. You can access the underlying monad stack
-- using the usual lifting techniques.
newtype ParcomT s t m a = ParcomT { runParcomT :: ParcomState s -> m (Either ParcomError a, ParcomState s) }

-- | Parcom is a monad. Obviously. Since the Parcom monad handles both failure
-- through 'Either' as well as carrying along its internal state, *and*
-- supporting the transformed parent monad, the implementation is a tiny bit
-- hairy.
instance (Monad m) => Monad (ParcomT s t m) where
    return x = ParcomT (\s -> return (Right x, s))
    fail err = ParcomT (\s -> return (Left $ ParcomError err (psSourcePosition s), s))
    m >>= f = ParcomT $ \s -> do -- in the m Monad
        (a, s') <- runParcomT m s
        case a of
            Left err -> return (Left err, s')
            Right ma -> do
                runParcomT (f ma) s'

-- | ParcomT enables lifting by implementing 'MonadTrans'
instance MonadTrans (ParcomT s t) where
    lift a = ParcomT $ \s -> do
        v <- a
        return (Right v, s)

instance (Monad m) => Functor (ParcomT s t m) where
    fmap f xs = xs >>= return . f

instance (Monad m) => Applicative (ParcomT s t m) where
    pure = return
    (<*>) = ap

instance (Monad m) => Alternative (ParcomT s t m) where
    (<|>) = alt
    empty = fail "empty"

-- | Wrapper that handles setting up a sensible initial state before running a
-- ParcomT
runParserT :: (Stream s t, Token t) => ParcomT s t m a -> String -> s -> m (Either ParcomError a, ParcomState s)
runParserT p fn str =
    runParcomT p state
    where
        state =
            ParcomState
                { psSourcePosition = SourcePosition fn 1 1
                , psStream = str }

-- | Run a parcom transformer and return the result
parseT :: (Stream s t, Token t, Monad m) => ParcomT s t m a -> String -> s -> m (Either ParcomError a)
parseT p fn str = fst `liftM` runParserT p fn str

-- | Run a pure parcom parser and return the result
parse :: (Stream s t, Token t) => Parcom s t a -> String -> s -> Either ParcomError a
parse p fn str = runIdentity $ parseT p fn str

-- | Get the internal parser state
getState :: Monad m => ParcomT s t m (ParcomState s)
getState = ParcomT $ \s -> return (Right s, s)

-- | Get the internal parser state and apply a projection function
useState :: Monad m => (ParcomState s -> a) -> ParcomT s t m a
useState f = ParcomT $ \s -> return (Right $ f s, s)

-- | Overwrite the internal parser state entirely
setState :: Monad m => ParcomState s -> ParcomT s t m ()
setState s = ParcomT $ \_ -> return (Right (), s)

-- | Apply a modification function to the internal parser state
modifyState :: Monad m => (ParcomState s -> ParcomState s) -> ParcomT s t m ()
modifyState f = ParcomT $ \s -> return (Right (), f s)

-- | Wrap a raw parser with error and success handler functions, such that:
--
-- * the handlers can choose whether to fail or succeed
--
-- * the handlers can choose to backtrack (by returning the previous state) or
--   consume (by returning the new state).
--
-- In order to facilitate this, both the error and success handler functions
-- accept three arguments: the error or parsed value, respectively; the parser
-- state before parsing; and the parser state after parsing.
handle' :: Monad m
    => ParcomT s t m a -- ^ The parser we're wrapping
    -> (ParcomError -> ParcomState s -> ParcomState s -> m (Either ParcomError b, ParcomState s)) -- ^ error handler
    -> (a -> ParcomState s -> ParcomState s -> m (Either ParcomError b, ParcomState s)) -- ^ success handler
    -> ParcomT s t m b
handle' p errorH successH = ParcomT $ \s -> do
    (r, s') <- runParcomT p s
    case r of
        -- parse failed: run error handler
        Left e -> errorH e s s'
        -- parse succeeded: run success handler
        Right x -> successH x s s'

-- | Wrap a raw parser to allow handling success and failure. The error and
-- success handlers take the error or parsed value, respectively, and return
-- a parser that should be applied in the error or success case, respectively.
-- No backtracking is performed.
handle :: Monad m
    => ParcomT s t m a
    -> (ParcomError -> ParcomT s t m b)
    -> (a -> ParcomT s t m b)
    -> ParcomT s t m b
handle p f t =
    handle' p (\e _ s' -> runParcomT (f e) s') (\x _ s' -> runParcomT (t x) s')

-- | Same as 'handle', but backtrack on error (that is, if the raw parser
-- fails, any input it has consumed is restored.
handleB :: Monad m
    => ParcomT s t m a
    -> (ParcomError -> ParcomT s t m b)
    -> (a -> ParcomT s t m b)
    -> ParcomT s t m b
handleB p f t =
    handle' p (\e s _ -> runParcomT (f e) s) (\x _ s' -> runParcomT (t x) s')

-- | Backtracking modifier; restores the parser state to the previous situation
-- if the wrapped parser fails.
try :: Monad m => ParcomT s t m a -> ParcomT s t m a
try p = handle' p (\e s _ -> return (Left e, s)) (\x _ s' -> return (Right x, s'))

-- | Return the result of the first parser that succeeds.
alt :: Monad m => ParcomT s t m a -> ParcomT s t m a -> ParcomT s t m a
alt a b = handle' a (\e s _ -> runParcomT b s) (\x _ s' -> return (Right x, s'))

-- | Succeeds iff the given parser fails
notFollowedBy :: (Monad m, Stream s t) => ParcomT s t m a -> ParcomT s t m ()
notFollowedBy p = handle' p (\_ s _ -> return (Right (), s)) (\x s _ -> runParcomT (fail "something followed that shouldn't") s)

-- | Gets the next token from the stream without consuming it.
-- Fails at end-of-input.
peek :: (Monad m, Stream s t) => ParcomT s t m t
peek = do
    str <- psStream `liftM` getState
    if Stream.atEnd str
        then fail "Unexpected end of input"
        else return (Stream.peek str)

-- | Checks whether end-of-input has been reached.
atEnd :: (Monad m, Stream s t) => ParcomT s t m Bool
atEnd = useState (Stream.atEnd . psStream)

-- | Update a source position to proceed to the next line
nextLine :: SourcePosition -> SourcePosition
nextLine s =
    s { posLine = posLine s + 1, posColumn = 1 }

-- | Update a source position to proceed to the next column
nextColumn :: SourcePosition -> SourcePosition
nextColumn s =
    s { posColumn = posColumn s + 1 }

-- | Gets the next token from the stream and consumes it.
-- Fails at end-of-input.
next :: (Monad m, Stream s t, Token t) => ParcomT s t m t
next = do
    str <- psStream `liftM` getState
    if Stream.atEnd str
        then fail "Unexpected end of input"
        else do
            let (t, str') = Stream.pop str
            modifyState $ \state -> state { psStream = str' }
            if Stream.isLineDelimiter t
                then modifyState $ \s -> s { psSourcePosition = nextLine (psSourcePosition s) }
                else modifyState $ \s -> s { psSourcePosition = nextColumn (psSourcePosition s) }
            return t

-- | Get one character from the stream, but do not consume it. Fails when the
-- input stream contains a sequence that does not represent a valid character,
-- or when the end of the input stream has been reached.
peekChar :: (Monad m, Stream s t, Token t, Textish s) => ParcomT s t m Char
peekChar = do
    str <- psStream `liftM` getState
    let (charMay, _) = Stream.peekChar str
    case charMay of
        Just c -> return c
        Nothing -> fail "Tokens do not form a valid character, or end of input reached"
    
-- | Get one character from the stream, and consume it. Fails when the input
-- stream contains a sequence that does not represent a valid character, or
-- when the end of the input stream has been reached.
nextChar :: (Monad m, Stream s t, Token t, Textish s) => ParcomT s t m Char
nextChar = do
    str <- psStream `liftM` getState
    let (charMay, numTokens) = Stream.peekChar str
    case charMay of
        Just c -> do
            replicateM_ numTokens next
            return c
        Nothing -> fail "Tokens do not form a valid character, or end of input reached"

-- | Tags a parser with a human-readable description of the expected entity,
-- generating an "Expected {entity}" type error message on failure.
infixl 3 <?>
(<?>) :: (Monad m, Stream s t) => ParcomT s t m a -> String -> ParcomT s t m a
p <?> expected = p <|> fail ("Expected " ++ expected)