{-# LANGUAGE BangPatterns, CPP, OverloadedStrings #-}
module Hpp.Preprocessing
(
trigraphReplacement
, lineSplicing
, cCommentRemoval
, cCommentAndTrigraph
, prepareInput
) where
import Control.Arrow (first)
import Data.Char (isSpace)
#if __GLASGOW_HASKELL__ < 804
import Data.Semigroup ((<>))
#endif
import Data.String (fromString)
import Hpp.Config
import Hpp.StringSig
import Hpp.Tokens (tokenize, Token(..), skipLiteral)
import Hpp.Types (TOKEN, String, HasHppState, getState, config, getL)
import Prelude hiding (String)
trigraphs :: [(Char, Char)]
trigraphs = [ ('=', '#')
, ('/', '\\')
, ('\'', '^')
, ('(', '[')
, (')', ']')
, ('!', '|')
, ('<', '{')
, ('>', '}')
, ('-', '~') ]
trigraphReplacement :: Stringy s => s -> s
trigraphReplacement s = aux (breakOn [("??", ())] s)
where aux Nothing = s
aux (Just (_, pre, pos)) =
case uncons pos of
Nothing -> pre <> "??"
Just (c,t) ->
case lookup c trigraphs of
Just c' -> snoc pre c' <> trigraphReplacement t
Nothing -> snoc pre '?' <> trigraphReplacement (cons '?' pos)
lineSplicing :: Stringy s => [s] -> [s]
lineSplicing = go id
where go acc [] = [acc mempty]
go acc (ln:lns) = case unsnoc ln of
Nothing -> acc mempty : go id lns
Just (ini, '\\') -> go (acc . (ini<>)) lns
Just _ -> acc ln : go id lns
{-# INLINE lineSplicing #-}
breakBlockCommentStart :: Stringy s => s -> Maybe (s, s)
breakBlockCommentStart s =
case breakCharOrSub '"' "/*" s of
NoMatch -> Nothing
CharMatch pre pos -> let (lit, rest) = skipLiteral pos
in first ((pre <> lit) <>) <$>
breakBlockCommentStart rest
SubMatch pre pos -> Just (pre, pos)
breakBlockCommentEnd :: Stringy s => s -> Maybe s
breakBlockCommentEnd s =
case breakCharOrSub '"' "*/" s of
NoMatch -> Nothing
CharMatch _ pos -> let (_, rest) = skipLiteral pos
in breakBlockCommentEnd rest
SubMatch _ pos -> Just pos
dropOneLineBlockComments :: Stringy s => s -> s
dropOneLineBlockComments s =
case breakCharOrSub '"' "/*"s of
NoMatch -> s
CharMatch pre pos ->
let (lit,rest) = skipLiteral pos
in snoc pre '"' <> lit <> dropOneLineBlockComments rest
SubMatch pre pos ->
case breakOn [("*/", ())] pos of
Nothing -> pre <> "/*"
Just (_,_,pos') -> snoc pre ' ' <> dropOneLineBlockComments pos'
removeMultilineComments :: Stringy s => Int -> [s] -> [s]
removeMultilineComments !lineStart = goStart lineStart
where goStart _ [] = []
goStart !curLine (ln:lns) =
case breakBlockCommentStart ln of
Nothing -> ln : goStart (curLine+1) lns
Just (pre,_) -> goEnd (curLine+1) pre lns
goEnd _ _ [] = error "Unmatched /*"
goEnd !curLine pre (ln:lns) =
case breakBlockCommentEnd ln of
Nothing -> goEnd (curLine+1) pre lns
Just pos
| sall isSpace (pre<>pos) ->
("#line "<> fromString (show (curLine+1))) : goStart (curLine + 1) lns
| otherwise -> (pre<>pos)
: ("#line "<> fromString (show (curLine+1)))
: goStart (curLine+1) lns
cCommentRemoval :: Stringy s => [s] -> [s]
cCommentRemoval = removeMultilineComments 1 . map dropOneLineBlockComments
cCommentAndTrigraph :: Stringy s => [s] -> [s]
cCommentAndTrigraph = removeMultilineComments 1
. map (dropOneLineBlockComments . trigraphReplacement)
prepareInput :: (Monad m, HasHppState m) => m ([String] -> [[TOKEN]])
prepareInput =
do cfg <- getL config <$> getState
case () of
_ | eraseCComments cfg && spliceLongLines cfg
&& not (inhibitLinemarkers cfg) -> pure normalCPP
_ | (eraseCComments cfg && spliceLongLines cfg
&& (not (replaceTrigraphs cfg))) ->
pure haskellCPP
_ | otherwise -> pure (genericConfig cfg)
normalCPP :: [String] -> [[TOKEN]]
normalCPP = map ((++ [Other "\n"]) . tokenize)
. lineSplicing
. cCommentAndTrigraph
{-# INLINABLE normalCPP #-}
haskellCPP :: [String] -> [[TOKEN]]
haskellCPP = map ((++[Other "\n"]) . tokenize)
. lineSplicing
. cCommentRemoval
{-# INLINABLE haskellCPP #-}
genericConfig :: Config -> [String] -> [[TOKEN]]
genericConfig cfg = map ((++ [Other "\n"]) . tokenize)
. (if spliceLongLines cfg then lineSplicing else id)
. (if eraseCComments cfg then cCommentRemoval else id)
. (if replaceTrigraphs cfg then map trigraphReplacement else id)