{-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE MultiParamTypeClasses #-} module Agda.Syntax.Parser.Monad ( -- * The parser monad Parser , ParseResult(..) , ParseState(..) , ParseError(..) , LexState , LayoutContext(..) , ParseFlags (..) -- * Running the parser , initState , defaultParseFlags , parse , parsePosString , parseFile -- * Manipulating the state , setParsePos, setLastPos, getParseInterval , setPrevToken , getParseFlags , getLexState, pushLexState, popLexState -- ** Layout , topContext, popContext, pushContext , pushCurrentContext -- ** Errors , parseError, parseErrorAt, parseError' , lexError ) where import Control.Exception import Data.Int import Data.Typeable ( Typeable ) import Control.Monad.State import Control.Applicative import Agda.Syntax.Position import Agda.Utils.Except ( MonadError(catchError, throwError) ) import Agda.Utils.FileName import qualified Agda.Utils.IO.UTF8 as UTF8 import Agda.Utils.Pretty {-------------------------------------------------------------------------- The parse monad --------------------------------------------------------------------------} -- | The parse monad. Equivalent to @StateT 'ParseState' (Either 'ParseError')@ -- except for the definition of @fail@, which builds a suitable 'ParseError' -- object. newtype Parser a = P { unP :: ParseState -> ParseResult a } -- | The parser state. Contains everything the parser and the lexer could ever -- need. data ParseState = PState { parseSrcFile :: !SrcFile , parsePos :: !PositionWithoutFile -- ^ position at current input location , parseLastPos :: !PositionWithoutFile -- ^ position of last token , parseInp :: String -- ^ the current input , parsePrevChar :: !Char -- ^ the character before the input , parsePrevToken:: String -- ^ the previous token , parseLayout :: [LayoutContext] -- ^ the stack of layout contexts , parseLexState :: [LexState] -- ^ the state of the lexer -- (states can be nested so we need a stack) , parseFlags :: ParseFlags -- ^ currently there are no flags } deriving Show {-| To do context sensitive lexing alex provides what is called /start codes/ in the Alex documentation. It is really an integer representing the state of the lexer, so we call it @LexState@ instead. -} type LexState = Int -- | We need to keep track of the context to do layout. The context -- specifies the indentation (if any) of a layout block. See -- "Agda.Syntax.Parser.Layout" for more informaton. data LayoutContext = NoLayout -- ^ no layout | Layout Int32 -- ^ layout at specified column deriving Show -- | There aren't any parser flags at the moment. data ParseFlags = ParseFlags { parseKeepComments :: Bool -- ^ Should comment tokens be returned by the lexer? } deriving Show -- | What you get if parsing fails. data ParseError = ParseError { errSrcFile :: !SrcFile -- ^ The file in which the error occurred. , errPos :: !PositionWithoutFile -- ^ Where the error occurred. , errInput :: String -- ^ The remaining input. , errPrevToken :: String -- ^ The previous token. , errMsg :: String -- ^ Hopefully an explanation of what happened. } deriving (Typeable) instance Exception ParseError -- | The result of parsing something. data ParseResult a = ParseOk ParseState a | ParseFailed ParseError {-------------------------------------------------------------------------- Instances --------------------------------------------------------------------------} instance Monad Parser where return = pure P m >>= f = P $ \s -> case m s of ParseFailed e -> ParseFailed e ParseOk s' x -> unP (f x) s' fail msg = P $ \s -> ParseFailed $ ParseError { errSrcFile = parseSrcFile s , errPos = parseLastPos s , errInput = parseInp s , errPrevToken = parsePrevToken s , errMsg = msg } instance Functor Parser where fmap = liftM instance Applicative Parser where pure x = P $ \s -> ParseOk s x (<*>) = ap instance MonadError ParseError Parser where throwError e = P $ \_ -> ParseFailed e P m `catchError` h = P $ \s -> case m s of ParseFailed err -> unP (h err) s m' -> m' instance MonadState ParseState Parser where get = P $ \s -> ParseOk s s put s = P $ \_ -> ParseOk s () instance Show ParseError where show = prettyShow instance Pretty ParseError where pretty err = vcat [ pretty ((errPos err) { srcFile = errSrcFile err }) <> colon <+> text (errMsg err) , text $ errPrevToken err ++ "" , text $ take 30 (errInput err) ++ "..." ] instance HasRange ParseError where getRange err = posToRange' (errSrcFile err) p p where p = errPos err {-------------------------------------------------------------------------- Running the parser --------------------------------------------------------------------------} initStatePos :: Position -> ParseFlags -> String -> [LexState] -> ParseState initStatePos pos flags inp st = PState { parseSrcFile = srcFile pos , parsePos = pos' , parseLastPos = pos' , parseInp = inp , parsePrevChar = '\n' , parsePrevToken = "" , parseLexState = st , parseLayout = [NoLayout] , parseFlags = flags } where pos' = pos { srcFile = () } -- | Constructs the initial state of the parser. The string argument -- is the input string, the file path is only there because it's part -- of a position. initState :: Maybe AbsolutePath -> ParseFlags -> String -> [LexState] -> ParseState initState file = initStatePos (startPos file) -- | The default flags. defaultParseFlags :: ParseFlags defaultParseFlags = ParseFlags { parseKeepComments = False } -- | The most general way of parsing a string. The "Agda.Syntax.Parser" will define -- more specialised functions that supply the 'ParseFlags' and the -- 'LexState'. parse :: ParseFlags -> [LexState] -> Parser a -> String -> ParseResult a parse flags st p input = unP p (initState Nothing flags input st) -- | The even more general way of parsing a string. parsePosString :: Position -> ParseFlags -> [LexState] -> Parser a -> String -> ParseResult a parsePosString pos flags st p input = unP p (initStatePos pos flags input st) -- | The most general way of parsing a file. The "Agda.Syntax.Parser" will define -- more specialised functions that supply the 'ParseFlags' and the -- 'LexState'. -- -- Note that Agda source files always use the UTF-8 character -- encoding. parseFile :: ParseFlags -> [LexState] -> Parser a -> AbsolutePath -> IO (ParseResult a) parseFile flags st p file = do input <- liftIO $ UTF8.readTextFile $ filePath file return $ unP p (initState (Just file) flags input st) {-------------------------------------------------------------------------- Manipulating the state --------------------------------------------------------------------------} setParsePos :: PositionWithoutFile -> Parser () setParsePos p = modify $ \s -> s { parsePos = p } setLastPos :: PositionWithoutFile -> Parser () setLastPos p = modify $ \s -> s { parseLastPos = p } setPrevToken :: String -> Parser () setPrevToken t = modify $ \s -> s { parsePrevToken = t } getLastPos :: Parser PositionWithoutFile getLastPos = get >>= return . parseLastPos -- | The parse interval is between the last position and the current position. getParseInterval :: Parser Interval getParseInterval = do s <- get return $ posToInterval (parseSrcFile s) (parseLastPos s) (parsePos s) getLexState :: Parser [LexState] getLexState = parseLexState <$> get setLexState :: [LexState] -> Parser () setLexState ls = do s <- get put $ s { parseLexState = ls } pushLexState :: LexState -> Parser () pushLexState l = do s <- getLexState setLexState (l:s) popLexState :: Parser () popLexState = do _:ls <- getLexState setLexState ls getParseFlags :: Parser ParseFlags getParseFlags = parseFlags <$> get -- | @parseError = fail@ parseError :: String -> Parser a parseError = fail -- | Fake a parse error at the specified position. Used, for instance, when -- lexing nested comments, which when failing will always fail at the end -- of the file. A more informative position is the beginning of the failing -- comment. parseErrorAt :: PositionWithoutFile -> String -> Parser a parseErrorAt p msg = do setLastPos p parseError msg -- | Use 'parseErrorAt' or 'parseError' as appropriate. parseError' :: Maybe PositionWithoutFile -> String -> Parser a parseError' = maybe parseError parseErrorAt -- | For lexical errors we want to report the current position as the site of -- the error, whereas for parse errors the previous position is the one -- we're interested in (since this will be the position of the token we just -- lexed). This function does 'parseErrorAt' the current position. lexError :: String -> Parser a lexError msg = do p <- parsePos <$> get parseErrorAt p msg {-------------------------------------------------------------------------- Layout --------------------------------------------------------------------------} getContext :: Parser [LayoutContext] getContext = parseLayout <$> get setContext :: [LayoutContext] -> Parser () setContext ctx = do s <- get put $ s { parseLayout = ctx } -- | Return the current layout context. topContext :: Parser LayoutContext topContext = do ctx <- getContext case ctx of [] -> parseError "No layout context in scope" l:_ -> return l popContext :: Parser () popContext = do ctx <- getContext case ctx of [] -> parseError "There is no layout block to close at this point." _:ctx -> setContext ctx pushContext :: LayoutContext -> Parser () pushContext l = do ctx <- getContext setContext (l : ctx) -- | Should only be used at the beginning of a file. When we start parsing -- we should be in layout mode. Instead of forcing zero indentation we use -- the indentation of the first token. pushCurrentContext :: Parser () pushCurrentContext = do p <- getLastPos pushContext (Layout (posCol p))