{-# LANGUAGE LambdaCase, ScopedTypeVariables #-} -- | Front-end interface to the pre-processor. module Hpp (parseDefinition, preprocess, liftHpp, errorHpp, getConfig, setConfig, hppReadFile, runErrHppIO) where import Control.Arrow (second) import Control.Exception (catch, IOException) import Control.Monad ((<=<)) import Data.Char (isSpace) import Data.Functor.Identity import System.Directory (doesFileExist) import System.FilePath (()) import Text.Read (readMaybe) import Hpp.Config import Hpp.Env import Hpp.Expansion import Hpp.Expr import Hpp.String import Hpp.Tokens import Hpp.Types -- * Trigraphs -- | The first component of each pair represents the end of a known -- trigraph sequence (each trigraph begins with two consecutive -- question marks (@"??"@). The second component is the -- single-character equivalent that we substitute in for the trigraph. trigraphs :: [(Char, Char)] trigraphs = [ ('=', '#') , ('/', '\\') , ('\'', '^') , ('(', '[') , (')', ']') , ('!', '|') , ('<', '{') , ('>', '}') , ('-', '~') ] -- | Count the prefix @?@ characters as we go. data TrigraphPrefix = TP0 | TP1 | TP2 deriving (Enum, Show) trigraphReplacement :: String -> String trigraphReplacement = go TP0 where go :: TrigraphPrefix -> String -> String go n [] = replicate (fromEnum n) '?' go TP2 ('?':xs) = '?' : go TP2 xs go TP2 (x:xs) = case lookup x trigraphs of Just x' -> x' : go TP0 xs Nothing -> "??" ++ x : go TP0 xs go i ('?':xs) = go (succ i) xs go TP0 (x:xs) = x : go TP0 xs go TP1 (x:xs) = '?' : x : go TP0 xs -- * Line Splicing lineSplicing :: [String] -> [String] lineSplicing [] = [] lineSplicing [x] = [x] lineSplicing ([]:t) = [] : lineSplicing t lineSplicing (x:t@(y:xs)) | last x == '\\' = lineSplicing ((init x++y) : xs) | otherwise = x : lineSplicing t -- FIXME: This doesn't work! we can also end or start a line with an -- operator! A #pragma line shouldn't be spliced. But note also that -- treating a close parenthesis is risky. -- | Applications can run across multiple lines. We join those lines -- to simplify subsequent parsing. We simply look for trailing or -- leading commas to determine which lines to splice. spliceApplications :: [String] -> [String] spliceApplications = go Nothing where go :: Maybe String -> [String] -> [String] go prev [] = maybe [] (:[]) prev go prev (l:ls) | headIs '#' l = let p = maybe [] (:[]) prev in p ++ l : go Nothing ls go (Just prev) (l:ls) | headOp opStarts l = go Nothing ((prev++l) : ls) | otherwise = prev : go Nothing (l:ls) go Nothing (l1:l2:ls) | headOp opEnds $ reverse l1 = go Nothing ((l1++l2):ls) go Nothing (l:ls) = go (Just l) ls opEnds = [',','+','-','*','/','(','=','%','&','|','^'] opStarts = [',','+','-','*','/','(',')','=','%','&','|','^'] headOp ops xs = case dropWhile isSpace xs of c:_ -> c `elem` ops _ -> False headIs x xs = case dropWhile isSpace xs of (y:_) -> y == x _ -> False -- * C Comments breakBlockCommentStart :: String -> Maybe (String, String) breakBlockCommentStart = go id where go _ [] = Nothing go acc ('"' : ts) = skipLiteral (go . (acc .)) ts go acc ('/' : '*' : t) = Just (acc [], t) go acc (c:cs) = go (acc . (c:)) cs breakBlockCommentEnd :: String -> Maybe String breakBlockCommentEnd [] = Nothing breakBlockCommentEnd (_:'"':cs) = skipLiteral (const breakBlockCommentEnd) cs breakBlockCommentEnd ('*':'/':t) = Just (' ':t) breakBlockCommentEnd (_:cs) = breakBlockCommentEnd cs dropOneLineBlockComments :: String -> String dropOneLineBlockComments [] = [] dropOneLineBlockComments (c:'"':cs) = c : skipLiteral (\x y -> x [] ++ dropOneLineBlockComments y) cs dropOneLineBlockComments ('/':'*':cs) = go cs where go [] = "/*" go ('*':'/':t) = ' ' : dropOneLineBlockComments t go (_:t) = go t dropOneLineBlockComments (c:cs) = c : dropOneLineBlockComments cs dropLineComments :: String -> String dropLineComments [] = [] dropLineComments ('/':'/':_) = [] dropLineComments (c:cs) = c : dropLineComments cs removeMultilineComments :: Int -> [String] -> [String] removeMultilineComments _ [] = [] removeMultilineComments lineNum (l:ls) = case breakBlockCommentStart l of Nothing -> l : removeMultilineComments (lineNum+1) ls Just (pre,_) -> case go 0 ls of (numSkipped, []) -> pre : replicate (lineNum+numSkipped) [] (numSkipped, (l':ls')) -> let lineNum' = lineNum + numSkipped in (pre ++ l') : ("#line " ++ show (lineNum'+1)) : removeMultilineComments lineNum' ls' where go :: Int -> [String] -> (Int, [String]) go numSkipped [] = (numSkipped, []) go numSkipped (l':ls') = case breakBlockCommentEnd l' of Nothing -> go (numSkipped + 1) ls' Just rst -> (numSkipped+1, rst : ls') commentRemoval :: [String] -> [String] commentRemoval = map dropLineComments . removeMultilineComments 1 . map dropOneLineBlockComments -- * Token Splices -- | Deal with the two-character '##' token pasting/splicing -- operator. We do so eliminating spaces around the @##@ -- operator. prepTokenSplices :: [Token] -> [Token] prepTokenSplices = dropSpaces [] . mergeTokens [] where -- Merges ## tokens, and reverses the input list mergeTokens acc [] = acc mergeTokens acc (Important "#" : Important "#" : ts) = mergeTokens (Important "##" : acc) (dropWhile (not . isImportant) ts) mergeTokens acc (t:ts) = mergeTokens (t : acc) ts -- Drop trailing spaces and re-reverse the list dropSpaces acc [] = acc dropSpaces acc (t@(Important "##") : ts) = dropSpaces (t : acc) (dropWhile (not . isImportant) ts) dropSpaces acc (t:ts) = dropSpaces (t : acc) ts -- * Function-like macros as Haskell functions -- | @functionMacro parameters body arguments@ substitutes @arguments@ -- for @parameters@ in @body@ and performs stringification for uses of -- the @#@ operator and token concatenation for the @##@ operator. functionMacro :: [String] -> [Token] -> [([Scan],String)] -> [Scan] functionMacro params body = paste . subst body' . zip params where subst toks gamma = go toks where go [] = [] go (p@(Important "##"):t@(Important s):ts) = case lookupKey s gamma of Nothing -> Rescan p : Rescan t : go ts Just ((_,arg),_) -> Rescan p : Rescan (Important arg) : go ts go (t@(Important s):p@(Important "##"):ts) = case lookupKey s gamma of Nothing -> Rescan t : go (p:ts) Just ((_,arg),_) -> Rescan (Important arg) : go (p:ts) go (t@(Important "##"):ts) = Rescan t : go ts go (t@(Important ('#':s)) : ts) = case lookupKey s gamma of Nothing -> Rescan t : go ts Just ((_,arg),_) -> Rescan (Important (stringify arg)) : go ts go (t@(Important s) : ts) = case lookupKey s gamma of Nothing -> Rescan t : go ts Just ((arg,_),_) -> arg ++ go ts go (t:ts) = Rescan t : go ts prepStringify [] = [] prepStringify (Important "#" : ts) = case dropWhile (not . isImportant) ts of (Important t : ts') -> Important ('#':t) : prepStringify ts' _ -> Important "#" : ts prepStringify (t:ts) = t : prepStringify ts body' = prepStringify . prepTokenSplices $ dropWhile (not . isImportant) body paste [] = [] paste (Rescan (Important s) : Rescan (Important "##") : Rescan (Important t) : ts) = paste (Rescan (Important (trimSpaces s ++ dropWhile isSpace t)) : ts) paste (t:ts) = t : paste ts -- * Pre-Processor Capabilities -- | Raise an error condition. errorHpp :: Error -> ErrHpp a errorHpp e = do f <- liftHpp . fmap curFileName $ getConfig ErrHpp . pure $ Left (f, e) -- | Lift an 'Either' into an 'ErrHpp' liftEither :: Either Error a -> ErrHpp a liftEither = either errorHpp pure -- | Lift an 'Hpp' into an 'ErrHpp' liftHpp :: Hpp a -> ErrHpp a liftHpp = ErrHpp . fmap Right -- | Read a file as an 'Hpp' action hppReadFile :: Int -> FilePath -> Hpp String hppReadFile n file = ReadFile n file return -- | Read a file available on the search path after the path -- containing the current file. hppReadNext :: Int -> FilePath -> Hpp String hppReadNext n file = ReadNext n file return -- | Obtain the current 'Config' getConfig :: Hpp Config getConfig = GetConfig return -- | Set the current 'Config' setConfig :: Config -> Hpp () setConfig = flip SetConfig (return ()) -- | Run an action with a substitute 'Config' withConfig :: Config -> ErrHpp a -> ErrHpp a withConfig cfg m = do oldCfg <- liftHpp getConfig liftHpp $ setConfig cfg r <- m liftHpp $ setConfig oldCfg return r -- * Running an Hpp Action includeCandidates :: [FilePath] -> String -> Maybe [FilePath] includeCandidates searchPath nm = case nm of '<':nm' -> Just $ sysSearch (init nm') '"':nm' -> let nm'' = init nm' in Just $ nm'' : sysSearch nm'' _ -> Nothing where sysSearch f = map ( f) searchPath searchForInclude :: [FilePath] -> String -> IO (Maybe FilePath) searchForInclude paths = maybe (return Nothing) aux . includeCandidates paths where aux [] = return Nothing aux (f:fs) = do exists <- doesFileExist f if exists then return (Just f) else aux fs searchForNextInclude :: [FilePath] -> String -> IO (Maybe FilePath) searchForNextInclude paths = maybe (return Nothing) (aux False) . includeCandidates paths where aux _ [] = return Nothing aux n (f:fs) = do exists <- doesFileExist f if exists then if n then return (Just f) else aux True fs else aux n fs runHpp :: Config -> Hpp a -> IO (Either (FilePath,Error) a) runHpp cfg = go where go (Pure x) = return (Right x) go (ReadFile ln file k) = searchForInclude (includePaths cfg) file >>= readAux ln file k go (ReadNext ln file k) = searchForNextInclude (includePaths cfg) file >>= readAux ln file k go (GetConfig k) = go (k cfg) go (SetConfig cfg' k) = runHpp cfg' k curFile = curFileName cfg readAux ln file _ Nothing = return $ Left (curFile, IncludeDoesNotExist ln file) readAux ln file k (Just file') = catch (Just <$> readFile file') (\(_::IOException) -> return Nothing) >>= maybe (return . Left $ (curFile, FailedInclude ln file)) (go . k) -- * Preprocessor -- | Parse the definition of an object-like or function macro. parseDefinition :: [Token] -> Maybe (String, Macro) parseDefinition toks = case dropWhile (not . isImportant) toks of (Important name:Important "(":rst) -> let params = takeWhile (/= ")") $ filter (/= ",") (importants rst) body = trimUnimportant . tail $ dropWhile (/= Important ")") toks macro = Function (length params) (functionMacro params body) in Just (name, macro) (Important name:_) -> let rhs = case dropWhile (/= Important name) toks of [] -> [Important ""] str@(_:t) | all (not . isImportant) str -> [Important ""] | otherwise -> trimUnimportant t in Just (name, Object rhs) _ -> Nothing -- | Trim 'Other' 'Token's from both ends of a list of 'Token's. trimUnimportant :: [Token] -> [Token] trimUnimportant = aux id . dropWhile (not .isImportant) where aux _ [] = [] aux acc (t@(Important _) : ts) = acc (t : aux id ts) aux acc (t@(Other _) : ts) = aux (acc . (t:)) ts -- | Handle preprocessor directives (commands prefixed with an octothorpe). directive :: Config -> LineNum -> String -> ErrHpp ((LineNum -> [String] -> Config -> Env -> [String] -> ErrHpp r) -> Env -> [String] -> ErrHpp r) directive cfg ln s = case importants toks of "pragma":_ -> pure $ \k -> k ln' [] cfg -- Pragmas are ignored "define":_ -> case parseDefinition . tail $ dropWhile (/= Important "define") toks of Nothing -> errorHpp $ BadMacroDefinition ln Just def -> pure $ \k -> k ln' [] cfg . (def :) ["undef",name] -> pure $ \k -> k ln' [] cfg . deleteKey name "include":_ -> includeAux hppReadFile . trimUnimportant . tail $ dropWhile (/= Important "include") toks "include_next":_ -> includeAux hppReadNext . trimUnimportant . tail $ dropWhile (/= Important "include_next") toks ("line":_) -> pure $ \k env lns -> do (env',rst) <- liftEither . expandLine cfg ln env . tail $ dropWhile (/= Important "line") toks case dropWhile (not . isImportant) rst of Important n:optFile -> case readMaybe n of Nothing -> errorHpp $ BadLineArgument ln n Just ln'' -> let cfg' = case optFile of [] -> cfg _ -> let f = unquote . detokenize . dropWhile (not . isImportant) . tail -- line number token . dropWhile (not . isImportant) $ rst in cfg { curFileNameF = pure f } in k ln'' [] cfg' env' lns _ -> errorHpp $ BadLineArgument ln s ["ifdef", x] -> pure $ \k env lns -> case lookupKey x env of Nothing -> do lns' <- liftEither $ dropBranch lns k ln [] cfg env lns' Just _ -> liftEither (takeBranch lns) >>= k ln [] cfg env ["ifndef", x] -> pure $ \k env lns -> case lookupKey x env of Nothing -> liftEither (takeBranch lns) >>= k ln [] cfg env Just (_,env') -> do lns' <- liftEither $ dropBranch lns k ln [] cfg env' lns' ["else"] -> pure $ \k env lns -> liftEither (takeBranch lns) >>= k ln [] cfg env "if":_ -> pure $ ifAux "if" "elif":_ -> pure $ ifAux "elif" ["endif"] -> pure $ \k env -> k ln [] cfg env "error":_ -> errorHpp . UserError ln . detokenize . dropWhile (not . isImportant) . tail $ dropWhile (/= Important "error") toks "warning":_ -> pure $ \k env -> k ln' [] cfg env -- FIXME _ -> errorHpp $ UnknownCommand ln s where toks = tokenize s ln' = ln + 1 toksAfterCommand cmd = tail $ dropWhile (/= Important cmd) toks ifAux c k env lns = do (env',ex) <- liftEither . expandLine cfg ln env . squashDefines env $ toksAfterCommand c let res = evalExpr <$> parseExpr ex -- res' = (if curFileName cfg == "test" -- then trace ("Eval "++show ex++" => "++show res) -- else id) res if maybe False (/= 0) res then either errorHpp (k ln [] cfg env') (takeBranch lns) else either errorHpp (k ln [] cfg env') (dropBranch lns) includeAux readFun fileToks = pure $ \k env lns -> do (env', fileName) <- liftEither $ expandLine cfg ln env fileToks let fileName' = detokenize $ trimUnimportant fileName cfg' = -- trace ("Including "++show fileName') $ cfg { curFileNameF = pure $ unquote fileName' } (env'',inc) <- liftHpp (readFun ln fileName') >>= withConfig cfg' . preprocess env' k ln' [inc] cfg env'' lns -- | We want to expand macros in expressions that must be evaluated -- for condtionalals, but we want to take special care when dealing -- with the meta @defined@ operator of the expression language that is -- a predicate on the evaluation environment. squashDefines :: Env -> [Token] -> [Token] squashDefines _ [] = [] squashDefines env (Important "defined" : ts) = go ts where go (t@(Other _) : ts') = t : go ts' go (Important "(" : ts') = Important "(" : go ts' go (Important t : ts') = case lookupKey t env of Nothing -> Important "0" : squashDefines env ts' Just (_,env') -> Important "1" : squashDefines env' ts' go [] = [] squashDefines env (t : ts) = t : squashDefines env ts getCmd :: String -> Maybe (String,[Token]) getCmd = aux . dropWhile (not . isImportant) . tokenize where aux (Important "#" : ts) = let (Important cmd:toks) = dropWhile (not . isImportant) ts in Just (cmd, toks) aux _ = Nothing -- | Feed an entire conditional block (bounded by @#if@/@#endif@) as -- the first argument to the given continuation, and the remaining -- input as the second argument. takeConditional :: [String] -> (DList String -> [String] -> r) -> r takeConditional lns0 k = go id lns0 where go acc [] = k acc [] go acc (ln:lns) = case getCmd ln of Nothing -> go (acc . (ln:)) lns Just (cmd,_) | cmd == "endif" -> k (acc . (ln:)) lns | cmd `elem` ["if","ifdef","ifndef"] -> takeConditional lns $ \acc' lns' -> go (acc . (ln:) . acc') lns' | otherwise -> go (acc . (ln:)) lns -- | Take everything up to the end of this branch, drop all remaining -- branches (if any), and inject a @line@ update command in the -- remaining stream. The first argument is the first line of branch -- being taken. This is supplied so branches not taken can be -- discounted from the running line count. takeBranch :: [String] -> Either Error [String] takeBranch = go id where go _ [] = Left UnterminatedBranch go acc (ln:lns) = case getCmd ln of Just (cmd,_) | cmd `elem` ["if","ifdef","ifndef"] -> takeConditional lns $ \acc' lns' -> go (acc . (ln:) . acc') lns' | cmd == "endif" -> Right (acc [] ++ lns) | cmd `elem` ["else","elif"] -> case dropAllBranches lns of Right lns' -> Right $ acc [] ++ lns' Left err -> Left err _ -> go (acc . (ln:)) lns dropAllBranches :: [String] -> Either Error [String] dropAllBranches = aux <=< dropBranch where aux :: [String] -> Either Error [String] aux [] = Left UnterminatedBranch aux (ln:lns) = case getCmd ln of Just ("endif",_) -> Right lns _ -> dropAllBranches lns -- | Skip to the end of a conditional branch, returning the remaining -- input. dropBranch :: [String] -> Either Error [String] dropBranch = go where go [] = Left UnterminatedBranch go (l:ls) = case getCmd l of Just (cmd,_) -- Drop nested conditional blocks | cmd `elem` ["if","ifdef","ifndef"] -> dropAllBranches ls >>= go | cmd `elem` ["else","elif","endif"] -> Right (l:ls) | otherwise -> go ls Nothing -> go ls -- | Returns a new macro binding environment and the result of -- expanding the input string. macroExpansion :: Config -- ^ Options controlling the preprocessor -> Env -- ^ Macro binding environment -> [String] -- ^ Input lines -> ErrHpp (Env, [String]) macroExpansion cfg0 macros = go 1 cfg0 macros id where go :: Int -> Config -> [(String, Macro)] -> DList String -> [String] -> ErrHpp (Env, [String]) go _ _ ms acc [] = return (ms, acc []) go lineNum cfg ms acc (x:xs) = case dropWhile isSpace x of [] -> go (lineNum + 1) cfg ms (acc . (x:)) xs ('#':cmd) -> do k <- directive cfg lineNum cmd k (\lineNum' newLines cfg' ms' remainingInput -> go lineNum' cfg' ms' (acc . (newLines++)) remainingInput) ms xs _ -> do (ms',x') <- either errorHpp pure $ expandLine cfg lineNum ms (tokenize x) go (lineNum+1) cfg ms' (acc . (detokenize x':)) xs -- | @preprocess env src@ runs the pre-processor over source code -- @src@ beginning with macro binding environment @env@. preprocess :: Env -> String -> ErrHpp (Env, String) preprocess env inp = do cfg <- liftHpp $ GetConfig return let splicer = if spliceLongLines cfg then lineSplicing else id decomment = if eraseCComments cfg then commentRemoval else id appSplicer = if runIdentity (spliceApplicationsF cfg) then spliceApplications else id go = macroExpansion cfg env -- (\lns -> return (env, lns)) . appSplicer . splicer . decomment . map trigraphReplacement fmap (second unlines) . go $ lines inp -- | Run an 'Hpp' action that might fail with a given initial -- configuration. runErrHppIO :: Config -> ErrHpp a -> IO a runErrHppIO cfg = fmap (either err (either err id)) . runHpp cfg . runErrHpp where err :: (FilePath,Error) -> a err = error . show