src/Language/CSPM/LexHelper.hs

{-# LANGUAGE TupleSections, RecordWildCards #-}
module Language.CSPM.LexHelper
(
   lexInclude
  ,lexIncludePath
  ,lexPlain
  ,removeIgnoredToken
  ,tokenIsComment
  ,unicodeTokenString
  ,asciiTokenString
)
where

import qualified Language.CSPM.Lexer as Lexer (scanner)
import Language.CSPM.Token (Token(..), LexError(..))
import Language.CSPM.TokenClasses (PrimToken(..))
import Language.CSPM.UnicodeSymbols (lookupDefaultSymbol)
import Control.Monad.IO.Class
import qualified Data.Set as Set

import qualified Data.DList as DList
import Control.Monad.Trans.Either
import System.FilePath(isAbsolute,splitDirectories,normalise,joinPath)

-- | lex a String .
lexPlain :: String -> Either LexError [Token]
lexPlain src = fmap reverse $ Lexer.scanner src

-- | Convert a token to a String.
--   If the tokenClasss has a Unicode symbol return the default Unicode string.
unicodeTokenString :: Token -> String
unicodeTokenString token
  = case lookupDefaultSymbol $ tokenClass token of
      Just (unicodeSymbol, _) -> [unicodeSymbol]
      Nothing -> tokenString token

-- | Convert a token to a String.
--   If the tokenClasss has a Unicode symbol return the default ASCII string.
asciiTokenString :: Token -> String
asciiTokenString token
  = case lookupDefaultSymbol $ tokenClass token of
      Just (_, symbol) -> symbol
      Nothing -> tokenString token

type Chunk = [Token]
type Chunks = DList.DList Chunk
data FilePart
    = Toks    Chunk
    | Include FilePath
    deriving Show

-- | lex input-string and import all includes files
lexInclude :: String -> IO (Either LexError [Token])
lexInclude input
   = lexIncludePath "" input

-- | lex input-string and import all includes files
lexIncludePath :: FilePath -> String -> IO (Either LexError [Token])
lexIncludePath srcName input
   = eitherT (return . Left) (return . Right . concat . DList.toList) $ lexInclude2 srcName input

lexInclude2 :: FilePath -> String -> EitherT LexError IO Chunks
lexInclude2 srcName input = do
        hoistEither $ lexPlain input
    >>= hoistEither . splitIncludes []
    >>= mapM (processPart srcName)
    >>= return . DList.concat

processPart :: FilePath -> FilePart -> EitherT LexError IO Chunks
processPart srcName part = case part of
    Toks ch -> return $ DList.singleton $ ch
    Include fname -> (liftIO $ readFile absolutePath) >>= lexInclude2 absolutePath
     where
       absolutePath = getAbsoluteIncludeFileName srcName fname 

-- | micro-parser for include-statements
splitIncludes :: [Token] -> [Token] -> Either LexError [FilePart]
splitIncludes acc  []  = return [Toks $ reverse acc]
splitIncludes acc (h:rest) = case h of
    tok@(Token _ _ _ L_Include _) -> do
        r <- scanInclude tok rest
        return $ (Toks $ reverse acc) : r
    _ -> splitIncludes (h:acc) rest

scanInclude :: Token -> [Token] -> Either LexError [FilePart]
scanInclude incl (h:rest) = case h of
    Token _ _ _ T_WhiteSpace _ -> scanInclude incl rest
    Token _ _ _ L_String fname -> do
       r <- splitIncludes [] rest
       let fileName = reverse $ tail $ reverse $ tail fname -- remove quotes
       return $ (Include fileName) : r
    _ -> Left $ LexError {
       lexEPos = tokenStart incl
      ,lexEMsg = "Include without filename"
      }


scanInclude incl _ = Left $ LexError {
       lexEPos = tokenStart incl
      ,lexEMsg = "Include without filename at end of file"
      }


-- | Remove comments, whitespaces and unneeded newlines.
removeIgnoredToken :: [Token] -> [Token]
removeIgnoredToken = soakNewlines . removeComments
  where
    -- | Remove comments from the token stream.
    removeComments :: [Token] -> [Token]
    removeComments = filter (\t -> not (tokenIsComment t || isWhiteSpace t))
    isWhiteSpace = (==) T_WhiteSpace . tokenClass

-- | Is the token a line-comment, block-comment or a Pragma?
tokenIsComment :: Token -> Bool
tokenIsComment t = tc == L_LComment || tc == L_BComment || tc == L_Pragma
  where tc = tokenClass t


-- | remove newlines, that do not end a declaration from the token stream.
-- For example newlines next to binary operators.
-- Remove all trailing newlines.
soakNewlines :: [Token] -> [Token]
soakNewlines = worker
  where
    worker [] = []
    worker [x] | tokenClass x ==L_Newline = []
    worker [x] = [x]
    worker (h1:h2:t) = case (tokenClass h1, tokenClass h2) of
       (L_Newline, L_Newline) -> worker (h1:t)
       (L_Newline, _) | isH2NewLineConsumer -> worker $ h2:t
       (L_Newline, _) -> h1 : (worker $ h2:t)
       (_, L_Newline) | isH1NewLineConsumer -> worker $ h1:t
       (_, L_Newline) -> h1: (worker $ h2:t)
       _   -> h1: (worker $ h2:t)
      where
        isH2NewLineConsumer = tokenClass h2 `Set.member` consumeNLBeforeToken
        isH1NewLineConsumer = tokenClass h1 `Set.member` consumeNLAfterToken

    binaryOperators =
     [T_is, T_hat, T_hash, T_times, T_slash,
      T_percent, T_plus, T_minus, T_eq, T_neq,
      T_ge, T_le, T_not, T_amp, T_semicolon,
      T_comma, T_triangle, T_box, T_rhd, T_exp,
      T_sqcap, T_interleave, T_backslash, T_parallel,
      T_mid, T_at, T_atat, T_rightarrow, T_leftarrow,
      T_leftrightarrow, T_dot, T_dotdot, T_exclamation,
      T_questionmark, T_colon, T_openBrack, T_closeBrack,
      T_openOxBrack, T_closeOxBrack,T_openPBrace,
      T_openBrackBrack, T_if, T_then,T_else, T_let, T_and,
      T_or, T_Refine, T_trace,T_failure, T_failureDivergence,
      T_refusalTesting, T_refusalTestingDiv, T_revivalTesting,
      T_revivalTestingDiv,T_tauPriorityOp, T_within]
    consumeNLBeforeToken
      = Set.fromList (
            [T_closeParen, T_gt, T_closeBrace, T_closeBrackBrack, T_closeSpecialBrack, T_closePBrace]
         ++ binaryOperators)
    consumeNLAfterToken
      = Set.fromList ( [T_openParen, T_openBrace, T_lt] ++ binaryOperators)

-- Helper function for determining the absolute path of an include file name.
-- getAbsoluteIncludeFileName makes it possible to include other CSP modules by 
-- giving the file path locally w.r.t. the current file path of the CSP module,
-- which includes the particular CSP modules.
getAbsoluteIncludeFileName :: FilePath -> FilePath -> FilePath
getAbsoluteIncludeFileName srcFileName inclFileName = 
   case isAbsolute inclFileName of 
	True  -> inclFileName 
	False -> joinPath $ (
		take ((length srcDirSequence)-(countBackDirs fileDirSequence)) srcDirSequence 
			++ (removeBackDirs fileDirSequence))
	where
		fileDirSequence = splitDirectories $ normalise inclFileName
		srcDirSequence  = init $ splitDirectories $ normalise srcFileName
		countBackDirs   = length . filter (".." ==) 
		removeBackDirs  = dropWhile (".." == )