-- | The core functionality of a Parcom parser: defining and running parsers, -- lifting, getting tokens and characters from a stream, and the most basic -- primitive parsers and combinators that cannot easily be expressed in terms -- of other parsers and combinators. {-#LANGUAGE MultiParamTypeClasses #-} module Text.Parcom.Core ( ParcomError (..) , SourcePosition (..) , ParcomT, parseT , Parcom, parse , peek, next, atEnd , try, handle, handleB , notFollowedBy , (), (<|>), empty , Stream, Token, Listish, Textish , peekChar, nextChar ) where import qualified Text.Parcom.Stream as Stream import Text.Parcom.Stream (Stream, Token, Listish, Textish) import Control.Monad (liftM, ap) import Control.Applicative import Control.Monad.Identity import Control.Monad.Trans.Class -- | Represents a position in a source file. Both lines and columns are -- 1-based. data SourcePosition = SourcePosition { posFileName :: String , posLine :: Int , posColumn :: Int } deriving (Show) -- | A parser error. data ParcomError = ParcomError { peErrorDescription :: String -- ^ Human-readable description of the error , peSourcePosition :: SourcePosition -- ^ Position in the source where the error was found. } deriving (Show) -- | The parser's internal state. data ParcomState s = ParcomState { psSourcePosition :: SourcePosition -- ^ Current source position, for error reporting , psStream :: s -- ^ The remaining source stream } -- | Parcom as a pure parser type Parcom s t a = ParcomT s t Identity a -- | Parcom as a monad transformer. You can access the underlying monad stack -- using the usual lifting techniques. newtype ParcomT s t m a = ParcomT { runParcomT :: ParcomState s -> m (Either ParcomError a, ParcomState s) } -- | Parcom is a monad. Obviously. Since the Parcom monad handles both failure -- through 'Either' as well as carrying along its internal state, *and* -- supporting the transformed parent monad, the implementation is a tiny bit -- hairy. instance (Monad m) => Monad (ParcomT s t m) where return x = ParcomT (\s -> return (Right x, s)) fail err = ParcomT (\s -> return (Left $ ParcomError err (psSourcePosition s), s)) m >>= f = ParcomT $ \s -> do -- in the m Monad (a, s') <- runParcomT m s case a of Left err -> return (Left err, s') Right ma -> do runParcomT (f ma) s' -- | ParcomT enables lifting by implementing 'MonadTrans' instance MonadTrans (ParcomT s t) where lift a = ParcomT $ \s -> do v <- a return (Right v, s) instance (Monad m) => Functor (ParcomT s t m) where fmap f xs = xs >>= return . f instance (Monad m) => Applicative (ParcomT s t m) where pure = return (<*>) = ap instance (Monad m) => Alternative (ParcomT s t m) where (<|>) = alt empty = fail "empty" -- | Wrapper that handles setting up a sensible initial state before running a -- ParcomT runParserT :: (Stream s t, Token t) => ParcomT s t m a -> String -> s -> m (Either ParcomError a, ParcomState s) runParserT p fn str = runParcomT p state where state = ParcomState { psSourcePosition = SourcePosition fn 1 1 , psStream = str } -- | Run a parcom transformer and return the result parseT :: (Stream s t, Token t, Monad m) => ParcomT s t m a -> String -> s -> m (Either ParcomError a) parseT p fn str = fst `liftM` runParserT p fn str -- | Run a pure parcom parser and return the result parse :: (Stream s t, Token t) => Parcom s t a -> String -> s -> Either ParcomError a parse p fn str = runIdentity $ parseT p fn str -- | Get the internal parser state getState :: Monad m => ParcomT s t m (ParcomState s) getState = ParcomT $ \s -> return (Right s, s) -- | Get the internal parser state and apply a projection function useState :: Monad m => (ParcomState s -> a) -> ParcomT s t m a useState f = ParcomT $ \s -> return (Right $ f s, s) -- | Overwrite the internal parser state entirely setState :: Monad m => ParcomState s -> ParcomT s t m () setState s = ParcomT $ \_ -> return (Right (), s) -- | Apply a modification function to the internal parser state modifyState :: Monad m => (ParcomState s -> ParcomState s) -> ParcomT s t m () modifyState f = ParcomT $ \s -> return (Right (), f s) -- | Wrap a raw parser with error and success handler functions, such that: -- -- * the handlers can choose whether to fail or succeed -- -- * the handlers can choose to backtrack (by returning the previous state) or -- consume (by returning the new state). -- -- In order to facilitate this, both the error and success handler functions -- accept three arguments: the error or parsed value, respectively; the parser -- state before parsing; and the parser state after parsing. handle' :: Monad m => ParcomT s t m a -- ^ The parser we're wrapping -> (ParcomError -> ParcomState s -> ParcomState s -> m (Either ParcomError b, ParcomState s)) -- ^ error handler -> (a -> ParcomState s -> ParcomState s -> m (Either ParcomError b, ParcomState s)) -- ^ success handler -> ParcomT s t m b handle' p errorH successH = ParcomT $ \s -> do (r, s') <- runParcomT p s case r of -- parse failed: run error handler Left e -> errorH e s s' -- parse succeeded: run success handler Right x -> successH x s s' -- | Wrap a raw parser to allow handling success and failure. The error and -- success handlers take the error or parsed value, respectively, and return -- a parser that should be applied in the error or success case, respectively. -- No backtracking is performed. handle :: Monad m => ParcomT s t m a -> (ParcomError -> ParcomT s t m b) -> (a -> ParcomT s t m b) -> ParcomT s t m b handle p f t = handle' p (\e _ s' -> runParcomT (f e) s') (\x _ s' -> runParcomT (t x) s') -- | Same as 'handle', but backtrack on error (that is, if the raw parser -- fails, any input it has consumed is restored. handleB :: Monad m => ParcomT s t m a -> (ParcomError -> ParcomT s t m b) -> (a -> ParcomT s t m b) -> ParcomT s t m b handleB p f t = handle' p (\e s _ -> runParcomT (f e) s) (\x _ s' -> runParcomT (t x) s') -- | Backtracking modifier; restores the parser state to the previous situation -- if the wrapped parser fails. try :: Monad m => ParcomT s t m a -> ParcomT s t m a try p = handle' p (\e s _ -> return (Left e, s)) (\x _ s' -> return (Right x, s')) -- | Return the result of the first parser that succeeds. alt :: Monad m => ParcomT s t m a -> ParcomT s t m a -> ParcomT s t m a alt a b = handle' a (\e s _ -> runParcomT b s) (\x _ s' -> return (Right x, s')) -- | Succeeds iff the given parser fails notFollowedBy :: (Monad m, Stream s t) => ParcomT s t m a -> ParcomT s t m () notFollowedBy p = handle' p (\_ s _ -> return (Right (), s)) (\x s _ -> runParcomT (fail "something followed that shouldn't") s) -- | Gets the next token from the stream without consuming it. -- Fails at end-of-input. peek :: (Monad m, Stream s t) => ParcomT s t m t peek = do str <- psStream `liftM` getState if Stream.atEnd str then fail "Unexpected end of input" else return (Stream.peek str) -- | Checks whether end-of-input has been reached. atEnd :: (Monad m, Stream s t) => ParcomT s t m Bool atEnd = useState (Stream.atEnd . psStream) -- | Update a source position to proceed to the next line nextLine :: SourcePosition -> SourcePosition nextLine s = s { posLine = posLine s + 1, posColumn = 1 } -- | Update a source position to proceed to the next column nextColumn :: SourcePosition -> SourcePosition nextColumn s = s { posColumn = posColumn s + 1 } -- | Gets the next token from the stream and consumes it. -- Fails at end-of-input. next :: (Monad m, Stream s t, Token t) => ParcomT s t m t next = do str <- psStream `liftM` getState if Stream.atEnd str then fail "Unexpected end of input" else do let (t, str') = Stream.pop str modifyState $ \state -> state { psStream = str' } if Stream.isLineDelimiter t then modifyState $ \s -> s { psSourcePosition = nextLine (psSourcePosition s) } else modifyState $ \s -> s { psSourcePosition = nextColumn (psSourcePosition s) } return t -- | Get one character from the stream, but do not consume it. Fails when the -- input stream contains a sequence that does not represent a valid character, -- or when the end of the input stream has been reached. peekChar :: (Monad m, Stream s t, Token t, Textish s) => ParcomT s t m Char peekChar = do str <- psStream `liftM` getState let (charMay, _) = Stream.peekChar str case charMay of Just c -> return c Nothing -> fail "Tokens do not form a valid character, or end of input reached" -- | Get one character from the stream, and consume it. Fails when the input -- stream contains a sequence that does not represent a valid character, or -- when the end of the input stream has been reached. nextChar :: (Monad m, Stream s t, Token t, Textish s) => ParcomT s t m Char nextChar = do str <- psStream `liftM` getState let (charMay, numTokens) = Stream.peekChar str case charMay of Just c -> do replicateM_ numTokens next return c Nothing -> fail "Tokens do not form a valid character, or end of input reached" -- | Tags a parser with a human-readable description of the expected entity, -- generating an "Expected {entity}" type error message on failure. infixl 3 () :: (Monad m, Stream s t) => ParcomT s t m a -> String -> ParcomT s t m a p expected = p <|> fail ("Expected " ++ expected)