{-# LANGUAGE TupleSections, RecordWildCards #-} module Language.CSPM.LexHelper ( lexInclude ,lexIncludePath ,lexPlain ,removeIgnoredToken ,tokenIsComment ,unicodeTokenString ,asciiTokenString ) where import qualified Language.CSPM.Lexer as Lexer (scanner) import Language.CSPM.Token (Token(..), LexError(..)) import Language.CSPM.TokenClasses (PrimToken(..)) import Language.CSPM.UnicodeSymbols (lookupDefaultSymbol) import Control.Monad.IO.Class import qualified Data.Set as Set import qualified Data.DList as DList import Control.Monad.Trans.Either import System.FilePath(isAbsolute,splitDirectories,normalise,joinPath) -- | lex a String . lexPlain :: String -> Either LexError [Token] lexPlain src = fmap reverse $ Lexer.scanner src -- | Convert a token to a String. -- If the tokenClasss has a Unicode symbol return the default Unicode string. unicodeTokenString :: Token -> String unicodeTokenString token = case lookupDefaultSymbol $ tokenClass token of Just (unicodeSymbol, _) -> [unicodeSymbol] Nothing -> tokenString token -- | Convert a token to a String. -- If the tokenClasss has a Unicode symbol return the default ASCII string. asciiTokenString :: Token -> String asciiTokenString token = case lookupDefaultSymbol $ tokenClass token of Just (_, symbol) -> symbol Nothing -> tokenString token type Chunk = [Token] type Chunks = DList.DList Chunk data FilePart = Toks Chunk | Include FilePath deriving Show -- | lex input-string and import all includes files lexInclude :: String -> IO (Either LexError [Token]) lexInclude input = lexIncludePath "" input -- | lex input-string and import all includes files lexIncludePath :: FilePath -> String -> IO (Either LexError [Token]) lexIncludePath srcName input = eitherT (return . Left) (return . Right . concat . DList.toList) $ lexInclude2 srcName input lexInclude2 :: FilePath -> String -> EitherT LexError IO Chunks lexInclude2 srcName input = do hoistEither $ lexPlain input >>= hoistEither . splitIncludes [] >>= mapM (processPart srcName) >>= return . DList.concat processPart :: FilePath -> FilePart -> EitherT LexError IO Chunks processPart srcName part = case part of Toks ch -> return $ DList.singleton $ ch Include fname -> (liftIO $ readFile absolutePath) >>= lexInclude2 absolutePath where absolutePath = getAbsoluteIncludeFileName srcName fname -- | micro-parser for include-statements splitIncludes :: [Token] -> [Token] -> Either LexError [FilePart] splitIncludes acc [] = return [Toks $ reverse acc] splitIncludes acc (h:rest) = case h of tok@(Token _ _ _ L_Include _) -> do r <- scanInclude tok rest return $ (Toks $ reverse acc) : r _ -> splitIncludes (h:acc) rest scanInclude :: Token -> [Token] -> Either LexError [FilePart] scanInclude incl (h:rest) = case h of Token _ _ _ T_WhiteSpace _ -> scanInclude incl rest Token _ _ _ L_String fname -> do r <- splitIncludes [] rest let fileName = reverse $ tail $ reverse $ tail fname -- remove quotes return $ (Include fileName) : r _ -> Left $ LexError { lexEPos = tokenStart incl ,lexEMsg = "Include without filename" } scanInclude incl _ = Left $ LexError { lexEPos = tokenStart incl ,lexEMsg = "Include without filename at end of file" } -- | Remove comments, whitespaces and unneeded newlines. removeIgnoredToken :: [Token] -> [Token] removeIgnoredToken = soakNewlines . removeComments where -- | Remove comments from the token stream. removeComments :: [Token] -> [Token] removeComments = filter (\t -> not (tokenIsComment t || isWhiteSpace t)) isWhiteSpace = (==) T_WhiteSpace . tokenClass -- | Is the token a line-comment, block-comment or a Pragma? tokenIsComment :: Token -> Bool tokenIsComment t = tc == L_LComment || tc == L_BComment || tc == L_Pragma where tc = tokenClass t -- | remove newlines, that do not end a declaration from the token stream. -- For example newlines next to binary operators. -- Remove all trailing newlines. soakNewlines :: [Token] -> [Token] soakNewlines = worker where worker [] = [] worker [x] | tokenClass x ==L_Newline = [] worker [x] = [x] worker (h1:h2:t) = case (tokenClass h1, tokenClass h2) of (L_Newline, L_Newline) -> worker (h1:t) (L_Newline, _) | isH2NewLineConsumer -> worker $ h2:t (L_Newline, _) -> h1 : (worker $ h2:t) (_, L_Newline) | isH1NewLineConsumer -> worker $ h1:t (_, L_Newline) -> h1: (worker $ h2:t) _ -> h1: (worker $ h2:t) where isH2NewLineConsumer = tokenClass h2 `Set.member` consumeNLBeforeToken isH1NewLineConsumer = tokenClass h1 `Set.member` consumeNLAfterToken binaryOperators = [T_is, T_hat, T_hash, T_times, T_slash, T_percent, T_plus, T_minus, T_eq, T_neq, T_ge, T_le, T_not, T_amp, T_semicolon, T_comma, T_triangle, T_box, T_rhd, T_exp, T_sqcap, T_interleave, T_backslash, T_parallel, T_mid, T_at, T_atat, T_rightarrow, T_leftarrow, T_leftrightarrow, T_dot, T_dotdot, T_exclamation, T_questionmark, T_colon, T_openBrack, T_closeBrack, T_openOxBrack, T_closeOxBrack,T_openPBrace, T_openBrackBrack, T_if, T_then,T_else, T_let, T_and, T_or, T_Refine, T_trace,T_failure, T_failureDivergence, T_refusalTesting, T_refusalTestingDiv, T_revivalTesting, T_revivalTestingDiv,T_tauPriorityOp, T_within] consumeNLBeforeToken = Set.fromList ( [T_closeParen, T_gt, T_closeBrace, T_closeBrackBrack, T_closeSpecialBrack, T_closePBrace] ++ binaryOperators) consumeNLAfterToken = Set.fromList ( [T_openParen, T_openBrace, T_lt] ++ binaryOperators) -- Helper function for determining the absolute path of an include file name. -- getAbsoluteIncludeFileName makes it possible to include other CSP modules by -- giving the file path locally w.r.t. the current file path of the CSP module, -- which includes the particular CSP modules. getAbsoluteIncludeFileName :: FilePath -> FilePath -> FilePath getAbsoluteIncludeFileName srcFileName inclFileName = case isAbsolute inclFileName of True -> inclFileName False -> joinPath $ ( take ((length srcDirSequence)-(countBackDirs fileDirSequence)) srcDirSequence ++ (removeBackDirs fileDirSequence)) where fileDirSequence = splitDirectories $ normalise inclFileName srcDirSequence = init $ splitDirectories $ normalise srcFileName countBackDirs = length . filter (".." ==) removeBackDirs = dropWhile (".." == )