---------------------------------------------------------------------------- -- | -- Module : SableCC.Test.Parser -- Copyright : (c) Fontaine 2011 -- License : BSD3 -- -- Maintainer : fontaine@cs.uni-duesseldorf.de -- Stability : experimental -- Portability : GHC-only -- -- -- A lexer and a parser which takes the parser tables as a explicit argument -- (as opposed to importing the tables as generated Haskell modules). ----------------------------------------------------------------------------- {-# LANGUAGE GeneralizedNewtypeDeriving, RecordWildCards #-} module SableCC.Test.Parser where import SableCC.ParserTypes import SableCC.ParserDefinition import SableCC.LexerDefinition import SableCC.XML.ParserDefinitionRaw (Token(..)) import Data.Array as Array import Data.List as List import Data.Map (Map) import qualified Data.Map as Map import Control.Monad.Reader import Control.Monad.IO.Class -- the Parser type Parser x = ReaderT ParserDefinition IO x runParser :: ParserDefinition -> Input -> IO CST runParser parserDef input = flip runReaderT parserDef $ do [cst,_] <- parserLoop (initialStack parserDef, input) return cst parserLoop :: PState -> Parser [CST] parserLoop pState@(stack,input) = do action <- findAction pState case action of Shift newState -> case input of [] -> error "shift action with empty input" (h:rest) -> parserLoop ((newState, CstLeaf h) : stack, rest) Reduce reduction -> performReduction reduction pState >>= parserLoop Accept -> return (map snd stack) Error msg -> do table <- asks errorTable error (table ! msg) findAction :: (Stack,Input) -> Parser Action findAction s = case s of ([],_) -> error "findAction: empty Stack" (_,[]) -> error "findAction: empty Input" (((state,_):_),((tokenID,_):_)) -> do table <- asks actionTable let list = table ! state case Map.lookup tokenID $ transitionMap list of Nothing -> return $ defaultTransition list Just r -> return r performReduction :: Reduction -> PState -> Parser PState performReduction Reduction {..} (stack,input) = do let restStack = drop popCount stack cstNode = CstNode reductionID $ map snd $ take popCount stack newState <- lookupGotoList gotoIndex $ fst $ head restStack return ((newState,cstNode) : restStack, input) lookupGotoList :: GotoIndex -> ParserStateID -> Parser ParserStateID lookupGotoList index state = do gTable <- asks gotoTable let gMap = gTable ! index case Map.lookup state $ gotoMap gMap of Nothing -> return $ defaultGoto gMap Just r -> return r -- the Lexer type Lexer x = ReaderT LexerDefinition IO x runLexer :: LexerDefinition -> String -> IO [SToken] runLexer lexerDef input = flip runReaderT lexerDef $ do acc <- lexerLoop (initLexerState input) [] return $ reverse acc lexerLoop :: LexerState -> [SToken] -> Lexer [SToken] lexerLoop lexerState@LexerState {..} acc = do LexerDefinition {..} <- ask scanToken <- runDFA (snd $ lexerModes ! lexerMode) lexerBuffer case scanToken of Right (newBuff, tokenString,tokenID) -> do let token = lexerTokens ! tokenID newNestingCounter = case token_ename token of "TCommentEnd" -> pred commentNestingCounter "TComment" -> succ commentNestingCounter _ -> commentNestingCounter newMode = if token_ename token == "TCommentEnd" && newNestingCounter == 0 then ModeID 0 else modeTransitions ! (tokenID, lexerMode) sToken = (tokenID, tokenString) newState = lexerState { lexerBuffer = newBuff ,lexerMode = newMode ,commentNestingCounter = newNestingCounter } if null newBuff then return acc else lexerLoop newState (sToken : acc) Left err -> do liftIO $ putStrLn $ "lexError :" ++ err return $ error "lexError" runDFA :: Mode -> String -> Lexer (Either String (String,String,LexTokenID)) runDFA mode input = checkState (Left "noToken recognised") (modeStates mode ! LexStateID 0) input [] where checkState :: Either String (String,String,LexTokenID) -> State -> String -> String -> Lexer (Either String (String,String,LexTokenID)) checkState lastAcceptedToken dfaState input acc = do case (input, stateAcceptedToken dfaState) of ([], Just tokenID) -> return $ Right ("",reverse acc, tokenID) ([], Nothing) -> return $ lastAcceptedToken (_, Just tokenID) -> scanChar (Right (input, reverse acc, tokenID)) dfaState input acc (_, Nothing) -> scanChar lastAcceptedToken dfaState input acc scanChar :: Either String (String,String,LexTokenID) -> State -> String -> String -> Lexer (Either String (String,String,LexTokenID)) scanChar lastAcceptedToken dfaState [] acc = error "scanChar:unreachable" scanChar lastAcceptedToken dfaState input@(h:rest) acc = case lookupInterval h (stateTransitions dfaState) of Nothing -> return $ lastAcceptedToken Just (Epsilon newStateID) -> scanChar lastAcceptedToken (modeStates mode ! newStateID) input acc Just (NotEpsilon newStateID) -> checkState lastAcceptedToken (modeStates mode ! newStateID) rest (h:acc)