module Language.CSPM.LexHelper
where
import qualified Language.CSPM.Lexer as Lexer (scanner)
import Language.CSPM.Token (Token(..), LexError(..))
import Language.CSPM.TokenClasses (PrimToken(..))
import Language.CSPM.UnicodeSymbols (lookupDefaultSymbol)
import Control.Monad.IO.Class
import qualified Data.Set as Set
import qualified Data.DList as DList
import Control.Monad.Trans.Either
lexPlain :: String -> Either LexError [Token]
lexPlain src = fmap reverse $ Lexer.scanner src
unicodeTokenString :: Token -> String
unicodeTokenString token
= case lookupDefaultSymbol $ tokenClass token of
Just (unicodeSymbol, _) -> [unicodeSymbol]
Nothing -> tokenString token
asciiTokenString :: Token -> String
asciiTokenString token
= case lookupDefaultSymbol $ tokenClass token of
Just (_, symbol) -> symbol
Nothing -> tokenString token
type Chunk = [Token]
type Chunks = DList.DList Chunk
data FilePart
= Toks Chunk
| Include FilePath
deriving Show
lexInclude :: String -> IO (Either LexError [Token])
lexInclude input
= eitherT (return . Left) (return . Right . concat . DList.toList) $ lexInclude2 input
lexInclude2 :: String -> EitherT LexError IO Chunks
lexInclude2 input = do
hoistEither $ lexPlain input
>>= hoistEither . splitIncludes []
>>= mapM processPart
>>= return . DList.concat
processPart :: FilePart -> EitherT LexError IO Chunks
processPart part = case part of
Toks ch -> return $ DList.singleton $ ch
Include fname -> (liftIO $ readFile fname) >>= lexInclude2
splitIncludes :: [Token] -> [Token] -> Either LexError [FilePart]
splitIncludes acc [] = return [Toks $ reverse acc]
splitIncludes acc (h:rest) = case h of
tok@(Token _ _ _ L_Include _) -> do
r <- scanInclude tok rest
return $ (Toks $ reverse acc) : r
_ -> splitIncludes (h:acc) rest
scanInclude :: Token -> [Token] -> Either LexError [FilePart]
scanInclude incl (h:rest) = case h of
Token _ _ _ T_WhiteSpace _ -> scanInclude incl rest
Token _ _ _ L_String fname -> do
r <- splitIncludes [] rest
let fileName = reverse $ tail $ reverse $ tail fname
return $ (Include fileName) : r
_ -> Left $ LexError {
lexEPos = tokenStart incl
,lexEMsg = "Include without filename"
}
scanInclude incl _ = Left $ LexError {
lexEPos = tokenStart incl
,lexEMsg = "Include without filename at end of file"
}
removeIgnoredToken :: [Token] -> [Token]
removeIgnoredToken = soakNewlines . removeComments
where
removeComments :: [Token] -> [Token]
removeComments = filter (\t -> not (tokenIsComment t || isWhiteSpace t))
isWhiteSpace = (==) T_WhiteSpace . tokenClass
tokenIsComment :: Token -> Bool
tokenIsComment t = tc == L_LComment || tc == L_BComment || tc == L_Pragma
where tc = tokenClass t
soakNewlines :: [Token] -> [Token]
soakNewlines = worker
where
worker [] = []
worker [x] | tokenClass x ==L_Newline = []
worker [x] = [x]
worker (h1:h2:t) = case (tokenClass h1, tokenClass h2) of
(L_Newline, L_Newline) -> worker (h1:t)
(L_Newline, _) | isH2NewLineConsumer -> worker $ h2:t
(L_Newline, _) -> h1 : (worker $ h2:t)
(_, L_Newline) | isH1NewLineConsumer -> worker $ h1:t
(_, L_Newline) -> h1: (worker $ h2:t)
_ -> h1: (worker $ h2:t)
where
isH2NewLineConsumer = tokenClass h2 `Set.member` consumeNLBeforeToken
isH1NewLineConsumer = tokenClass h1 `Set.member` consumeNLAfterToken
binaryOperators =
[T_is, T_hat, T_hash, T_times, T_slash,
T_percent, T_plus, T_minus, T_eq, T_neq,
T_ge, T_le, T_not, T_amp, T_semicolon,
T_comma, T_triangle, T_box, T_rhd, T_exp,
T_sqcap, T_interleave, T_backslash, T_parallel,
T_mid, T_at, T_atat, T_rightarrow, T_leftarrow,
T_leftrightarrow, T_dot, T_dotdot, T_exclamation,
T_questionmark, T_colon, T_openBrack, T_closeBrack,
T_openOxBrack, T_closeOxBrack,T_openPBrace,
T_openBrackBrack, T_if, T_then,T_else, T_let, T_and,
T_or, T_Refine, T_trace,T_failure, T_failureDivergence,
T_refusalTesting, T_refusalTestingDiv, T_revivalTesting,
T_revivalTestingDiv,T_tauPriorityOp, T_within]
consumeNLBeforeToken
= Set.fromList (
[T_closeParen, T_gt, T_closeBrace, T_closeBrackBrack, T_closeSpecialBrack, T_closePBrace]
++ binaryOperators)
consumeNLAfterToken
= Set.fromList ( [T_openParen, T_openBrace, T_lt] ++ binaryOperators)