module Language.CSPM.LexHelper
(
lexInclude
,lexIncludePath
,lexPlain
,removeIgnoredToken
,tokenIsComment
,unicodeTokenString
,asciiTokenString
)
where
import qualified Language.CSPM.Lexer as Lexer (scanner)
import Language.CSPM.Token (Token(..), LexError(..))
import Language.CSPM.TokenClasses (PrimToken(..))
import Language.CSPM.UnicodeSymbols (lookupDefaultSymbol)
import Control.Monad.IO.Class
import qualified Data.Set as Set
import qualified Data.DList as DList
import Control.Monad.Trans.Either
import System.FilePath(isAbsolute,splitDirectories,normalise,joinPath)
lexPlain :: String -> Either LexError [Token]
lexPlain src = fmap reverse $ Lexer.scanner src
unicodeTokenString :: Token -> String
unicodeTokenString token
= case lookupDefaultSymbol $ tokenClass token of
Just (unicodeSymbol, _) -> [unicodeSymbol]
Nothing -> tokenString token
asciiTokenString :: Token -> String
asciiTokenString token
= case lookupDefaultSymbol $ tokenClass token of
Just (_, symbol) -> symbol
Nothing -> tokenString token
type Chunk = [Token]
type Chunks = DList.DList Chunk
data FilePart
= Toks Chunk
| Include FilePath
deriving Show
lexInclude :: String -> IO (Either LexError [Token])
lexInclude input
= lexIncludePath "" input
lexIncludePath :: FilePath -> String -> IO (Either LexError [Token])
lexIncludePath srcName input
= eitherT (return . Left) (return . Right . concat . DList.toList) $ lexInclude2 srcName input
lexInclude2 :: FilePath -> String -> EitherT LexError IO Chunks
lexInclude2 srcName input = do
hoistEither $ lexPlain input
>>= hoistEither . splitIncludes []
>>= mapM (processPart srcName)
>>= return . DList.concat
processPart :: FilePath -> FilePart -> EitherT LexError IO Chunks
processPart srcName part = case part of
Toks ch -> return $ DList.singleton $ ch
Include fname -> (liftIO $ readFile absolutePath) >>= lexInclude2 absolutePath
where
absolutePath = getAbsoluteIncludeFileName srcName fname
splitIncludes :: [Token] -> [Token] -> Either LexError [FilePart]
splitIncludes acc [] = return [Toks $ reverse acc]
splitIncludes acc (h:rest) = case h of
tok@(Token _ _ _ L_Include _) -> do
r <- scanInclude tok rest
return $ (Toks $ reverse acc) : r
_ -> splitIncludes (h:acc) rest
scanInclude :: Token -> [Token] -> Either LexError [FilePart]
scanInclude incl (h:rest) = case h of
Token _ _ _ T_WhiteSpace _ -> scanInclude incl rest
Token _ _ _ L_String fname -> do
r <- splitIncludes [] rest
let fileName = reverse $ tail $ reverse $ tail fname
return $ (Include fileName) : r
_ -> Left $ LexError {
lexEPos = tokenStart incl
,lexEMsg = "Include without filename"
}
scanInclude incl _ = Left $ LexError {
lexEPos = tokenStart incl
,lexEMsg = "Include without filename at end of file"
}
removeIgnoredToken :: [Token] -> [Token]
removeIgnoredToken = soakNewlines . removeComments
where
removeComments :: [Token] -> [Token]
removeComments = filter (\t -> not (tokenIsComment t || isWhiteSpace t))
isWhiteSpace = (==) T_WhiteSpace . tokenClass
tokenIsComment :: Token -> Bool
tokenIsComment t = tc == L_LComment || tc == L_BComment || tc == L_Pragma
where tc = tokenClass t
soakNewlines :: [Token] -> [Token]
soakNewlines = worker
where
worker [] = []
worker [x] | tokenClass x ==L_Newline = []
worker [x] = [x]
worker (h1:h2:t) = case (tokenClass h1, tokenClass h2) of
(L_Newline, L_Newline) -> worker (h1:t)
(L_Newline, _) | isH2NewLineConsumer -> worker $ h2:t
(L_Newline, _) -> h1 : (worker $ h2:t)
(_, L_Newline) | isH1NewLineConsumer -> worker $ h1:t
(_, L_Newline) -> h1: (worker $ h2:t)
_ -> h1: (worker $ h2:t)
where
isH2NewLineConsumer = tokenClass h2 `Set.member` consumeNLBeforeToken
isH1NewLineConsumer = tokenClass h1 `Set.member` consumeNLAfterToken
binaryOperators =
[T_is, T_hat, T_hash, T_times, T_slash,
T_percent, T_plus, T_minus, T_eq, T_neq,
T_ge, T_le, T_not, T_amp, T_semicolon,
T_comma, T_triangle, T_box, T_rhd, T_exp,
T_sqcap, T_interleave, T_backslash, T_parallel,
T_mid, T_at, T_atat, T_rightarrow, T_leftarrow,
T_leftrightarrow, T_dot, T_dotdot, T_exclamation,
T_questionmark, T_colon, T_openBrack, T_closeBrack,
T_openOxBrack, T_closeOxBrack,T_openPBrace,
T_openBrackBrack, T_if, T_then,T_else, T_let, T_and,
T_or, T_Refine, T_trace,T_failure, T_failureDivergence,
T_refusalTesting, T_refusalTestingDiv, T_revivalTesting,
T_revivalTestingDiv,T_tauPriorityOp, T_within]
consumeNLBeforeToken
= Set.fromList (
[T_closeParen, T_gt, T_closeBrace, T_closeBrackBrack, T_closeSpecialBrack, T_closePBrace]
++ binaryOperators)
consumeNLAfterToken
= Set.fromList ( [T_openParen, T_openBrace, T_lt] ++ binaryOperators)
getAbsoluteIncludeFileName :: FilePath -> FilePath -> FilePath
getAbsoluteIncludeFileName srcFileName inclFileName =
case isAbsolute inclFileName of
True -> inclFileName
False -> joinPath $ (
take ((length srcDirSequence)(countBackDirs fileDirSequence)) srcDirSequence
++ (removeBackDirs fileDirSequence))
where
fileDirSequence = splitDirectories $ normalise inclFileName
srcDirSequence = init $ splitDirectories $ normalise srcFileName
countBackDirs = length . filter (".." ==)
removeBackDirs = dropWhile (".." == )