{-# LANGUAGE BangPatterns, CPP, OverloadedStrings #-}
-- | The simplest pre-processing steps are represented as distinct
-- passes over input lines.
module Hpp.Preprocessing
  (
    trigraphReplacement
  , lineSplicing
  , cCommentRemoval
  , cCommentAndTrigraph
  , prepareInput
  ) where
import Control.Arrow (first)
import Data.Char (isSpace)
#if __GLASGOW_HASKELL__ < 804
import Data.Semigroup ((<>))
#endif
import Data.String (fromString)
import Hpp.Config
import Hpp.StringSig
import Hpp.Tokens (tokenize, Token(..), skipLiteral)
import Hpp.Types (TOKEN, String, HasHppState, getState, config, getL)
import Prelude hiding (String)

-- * Trigraphs

-- | The first component of each pair represents the end of a known
-- trigraph sequence (each trigraph begins with two consecutive
-- question marks (@\"??\"@). The second component is the
-- single-character equivalent that we substitute in for the trigraph.
trigraphs :: [(Char, Char)]
trigraphs :: [(Char, Char)]
trigraphs = [ (Char
'=', Char
'#')
            , (Char
'/', Char
'\\')
            , (Char
'\'', Char
'^')
            , (Char
'(', Char
'[')
            , (Char
')', Char
']')
            , (Char
'!', Char
'|')
            , (Char
'<', Char
'{')
            , (Char
'>', Char
'}')
            , (Char
'-', Char
'~') ]

trigraphReplacement :: Stringy s => s -> s
trigraphReplacement :: s -> s
trigraphReplacement s
s = Maybe ((), s, s) -> s
forall a. Maybe (a, s, s) -> s
aux ([(s, ())] -> s -> Maybe ((), s, s)
forall s t. Stringy s => [(s, t)] -> s -> Maybe (t, s, s)
breakOn [(s
"??", ())] s
s)
  where aux :: Maybe (a, s, s) -> s
aux Maybe (a, s, s)
Nothing = s
s
        aux (Just (a
_, s
pre, s
pos)) =
          case s -> Maybe (Char, s)
forall s. Stringy s => s -> Maybe (Char, s)
uncons s
pos of
            Maybe (Char, s)
Nothing -> s
pre s -> s -> s
forall a. Semigroup a => a -> a -> a
<> s
"??"
            Just (Char
c,s
t) ->
              case Char -> [(Char, Char)] -> Maybe Char
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Char
c [(Char, Char)]
trigraphs of
                Just Char
c' -> s -> Char -> s
forall s. Stringy s => s -> Char -> s
snoc s
pre Char
c' s -> s -> s
forall a. Semigroup a => a -> a -> a
<> s -> s
forall s. Stringy s => s -> s
trigraphReplacement s
t
                Maybe Char
Nothing -> s -> Char -> s
forall s. Stringy s => s -> Char -> s
snoc s
pre Char
'?' s -> s -> s
forall a. Semigroup a => a -> a -> a
<> s -> s
forall s. Stringy s => s -> s
trigraphReplacement (Char -> s -> s
forall s. Stringy s => Char -> s -> s
cons Char
'?' s
pos)

-- * Line Splicing

-- | If a line ends with a backslash, it is prepended to the following
-- the line.
lineSplicing :: Stringy s => [s] -> [s]
lineSplicing :: [s] -> [s]
lineSplicing = (s -> s) -> [s] -> [s]
forall a. Stringy a => (a -> a) -> [a] -> [a]
go s -> s
forall a. a -> a
id
  where go :: (a -> a) -> [a] -> [a]
go a -> a
acc [] = [a -> a
acc a
forall a. Monoid a => a
mempty]
        go a -> a
acc (a
ln:[a]
lns) = case a -> Maybe (a, Char)
forall s. Stringy s => s -> Maybe (s, Char)
unsnoc a
ln of
                            Maybe (a, Char)
Nothing -> a -> a
acc a
forall a. Monoid a => a
mempty a -> [a] -> [a]
forall a. a -> [a] -> [a]
: (a -> a) -> [a] -> [a]
go a -> a
forall a. a -> a
id [a]
lns
                            Just (a
ini, Char
'\\') -> (a -> a) -> [a] -> [a]
go (a -> a
acc (a -> a) -> (a -> a) -> a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a
inia -> a -> a
forall a. Semigroup a => a -> a -> a
<>)) [a]
lns
                            Just (a, Char)
_ -> a -> a
acc a
ln a -> [a] -> [a]
forall a. a -> [a] -> [a]
: (a -> a) -> [a] -> [a]
go a -> a
forall a. a -> a
id [a]
lns
{-# INLINE lineSplicing #-}

-- * C Comments

breakBlockCommentStart :: Stringy s => s -> Maybe (s, s)
breakBlockCommentStart :: s -> Maybe (s, s)
breakBlockCommentStart s
s =
  case Char -> s -> s -> CharOrSub s
forall s. Stringy s => Char -> s -> s -> CharOrSub s
breakCharOrSub Char
'"' s
"/*" s
s of
    CharOrSub s
NoMatch -> Maybe (s, s)
forall a. Maybe a
Nothing
    CharMatch s
pre s
pos -> let (s
lit, s
rest) = s -> (s, s)
forall s. Stringy s => s -> (s, s)
skipLiteral s
pos
                         in (s -> s) -> (s, s) -> (s, s)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first ((s
pre s -> s -> s
forall a. Semigroup a => a -> a -> a
<> s
lit) s -> s -> s
forall a. Semigroup a => a -> a -> a
<>) ((s, s) -> (s, s)) -> Maybe (s, s) -> Maybe (s, s)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
                            s -> Maybe (s, s)
forall s. Stringy s => s -> Maybe (s, s)
breakBlockCommentStart s
rest
    SubMatch s
pre s
pos -> (s, s) -> Maybe (s, s)
forall a. a -> Maybe a
Just (s
pre, s
pos)

breakBlockCommentEnd :: Stringy s => s -> Maybe s
breakBlockCommentEnd :: s -> Maybe s
breakBlockCommentEnd s
s =
  case Char -> s -> s -> CharOrSub s
forall s. Stringy s => Char -> s -> s -> CharOrSub s
breakCharOrSub Char
'"' s
"*/" s
s of
    CharOrSub s
NoMatch -> Maybe s
forall a. Maybe a
Nothing
    CharMatch s
_ s
pos -> let (s
_, s
rest) = s -> (s, s)
forall s. Stringy s => s -> (s, s)
skipLiteral s
pos
                       in s -> Maybe s
forall s. Stringy s => s -> Maybe s
breakBlockCommentEnd s
rest
    SubMatch s
_ s
pos -> s -> Maybe s
forall a. a -> Maybe a
Just s
pos

dropOneLineBlockComments :: Stringy s => s -> s
dropOneLineBlockComments :: s -> s
dropOneLineBlockComments s
s =
  case Char -> s -> s -> CharOrSub s
forall s. Stringy s => Char -> s -> s -> CharOrSub s
breakCharOrSub Char
'"' s
"/*"s
s of
    CharOrSub s
NoMatch -> s
s
    CharMatch s
pre s
pos ->
      let (s
lit,s
rest) = s -> (s, s)
forall s. Stringy s => s -> (s, s)
skipLiteral s
pos
      in s -> Char -> s
forall s. Stringy s => s -> Char -> s
snoc s
pre Char
'"' s -> s -> s
forall a. Semigroup a => a -> a -> a
<> s
lit s -> s -> s
forall a. Semigroup a => a -> a -> a
<> s -> s
forall s. Stringy s => s -> s
dropOneLineBlockComments s
rest
    SubMatch s
pre s
pos ->
      case [(s, ())] -> s -> Maybe ((), s, s)
forall s t. Stringy s => [(s, t)] -> s -> Maybe (t, s, s)
breakOn [(s
"*/", ())] s
pos of
        Maybe ((), s, s)
Nothing -> s
pre s -> s -> s
forall a. Semigroup a => a -> a -> a
<> s
"/*"
        Just (()
_,s
_,s
pos') -> s -> Char -> s
forall s. Stringy s => s -> Char -> s
snoc s
pre Char
' ' s -> s -> s
forall a. Semigroup a => a -> a -> a
<> s -> s
forall s. Stringy s => s -> s
dropOneLineBlockComments s
pos'

removeMultilineComments :: Stringy s => Int -> [s] -> [s]
removeMultilineComments :: Int -> [s] -> [s]
removeMultilineComments !Int
lineStart = Int -> [s] -> [s]
forall a a. (Stringy a, Num a, Show a) => a -> [a] -> [a]
goStart Int
lineStart
  where goStart :: a -> [a] -> [a]
goStart a
_ [] = []
        goStart !a
curLine (a
ln:[a]
lns) =
          case a -> Maybe (a, a)
forall s. Stringy s => s -> Maybe (s, s)
breakBlockCommentStart a
ln of
            Maybe (a, a)
Nothing -> a
ln a -> [a] -> [a]
forall a. a -> [a] -> [a]
: a -> [a] -> [a]
goStart (a
curLinea -> a -> a
forall a. Num a => a -> a -> a
+a
1) [a]
lns
            Just (a
pre,a
_) -> a -> a -> [a] -> [a]
goEnd (a
curLinea -> a -> a
forall a. Num a => a -> a -> a
+a
1) a
pre [a]
lns
        goEnd :: a -> a -> [a] -> [a]
goEnd a
_ a
_ [] = [Char] -> [a]
forall a. HasCallStack => [Char] -> a
error [Char]
"Unmatched /*"
        goEnd !a
curLine a
pre (a
ln:[a]
lns) =
          case a -> Maybe a
forall s. Stringy s => s -> Maybe s
breakBlockCommentEnd a
ln of
            Maybe a
Nothing -> a -> a -> [a] -> [a]
goEnd (a
curLinea -> a -> a
forall a. Num a => a -> a -> a
+a
1) a
pre [a]
lns
            Just a
pos
              | (Char -> Bool) -> a -> Bool
forall s. Stringy s => (Char -> Bool) -> s -> Bool
sall Char -> Bool
isSpace (a
prea -> a -> a
forall a. Semigroup a => a -> a -> a
<>a
pos) ->
                (a
"#line "a -> a -> a
forall a. Semigroup a => a -> a -> a
<> [Char] -> a
forall a. IsString a => [Char] -> a
fromString (a -> [Char]
forall a. Show a => a -> [Char]
show (a
curLinea -> a -> a
forall a. Num a => a -> a -> a
+a
1))) a -> [a] -> [a]
forall a. a -> [a] -> [a]
: a -> [a] -> [a]
goStart (a
curLine a -> a -> a
forall a. Num a => a -> a -> a
+ a
1) [a]
lns
              | Bool
otherwise -> (a
prea -> a -> a
forall a. Semigroup a => a -> a -> a
<>a
pos)
                             a -> [a] -> [a]
forall a. a -> [a] -> [a]
: (a
"#line "a -> a -> a
forall a. Semigroup a => a -> a -> a
<> [Char] -> a
forall a. IsString a => [Char] -> a
fromString (a -> [Char]
forall a. Show a => a -> [Char]
show (a
curLinea -> a -> a
forall a. Num a => a -> a -> a
+a
1)))
                             a -> [a] -> [a]
forall a. a -> [a] -> [a]
: a -> [a] -> [a]
goStart (a
curLinea -> a -> a
forall a. Num a => a -> a -> a
+a
1) [a]
lns

-- | Remove C-style comments bracketed by @/*@ and @*/@.
cCommentRemoval :: Stringy s => [s] -> [s]
cCommentRemoval :: [s] -> [s]
cCommentRemoval = Int -> [s] -> [s]
forall s. Stringy s => Int -> [s] -> [s]
removeMultilineComments Int
1 ([s] -> [s]) -> ([s] -> [s]) -> [s] -> [s]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (s -> s) -> [s] -> [s]
forall a b. (a -> b) -> [a] -> [b]
map s -> s
forall s. Stringy s => s -> s
dropOneLineBlockComments

-- | Remove C-style comments bracked by @/*@ and @*/@ and perform
-- trigraph replacement.
cCommentAndTrigraph :: Stringy s => [s] -> [s]
cCommentAndTrigraph :: [s] -> [s]
cCommentAndTrigraph = Int -> [s] -> [s]
forall s. Stringy s => Int -> [s] -> [s]
removeMultilineComments Int
1
                    ([s] -> [s]) -> ([s] -> [s]) -> [s] -> [s]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (s -> s) -> [s] -> [s]
forall a b. (a -> b) -> [a] -> [b]
map (s -> s
forall s. Stringy s => s -> s
dropOneLineBlockComments (s -> s) -> (s -> s) -> s -> s
forall b c a. (b -> c) -> (a -> b) -> a -> c
. s -> s
forall s. Stringy s => s -> s
trigraphReplacement)

prepareInput :: (Monad m, HasHppState m) => m ([String] -> [[TOKEN]])
prepareInput :: m ([String] -> [[TOKEN]])
prepareInput =
  do Config
cfg <- Lens HppState Config -> HppState -> Config
forall s a. Lens s a -> s -> a
getL Lens HppState Config
config (HppState -> Config) -> m HppState -> m Config
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> m HppState
forall (m :: * -> *). HasHppState m => m HppState
getState
     case () of
       ()
_ | Config -> Bool
eraseCComments Config
cfg Bool -> Bool -> Bool
&& Config -> Bool
spliceLongLines Config
cfg
           Bool -> Bool -> Bool
&& Bool -> Bool
not (Config -> Bool
inhibitLinemarkers Config
cfg) -> ([String] -> [[TOKEN]]) -> m ([String] -> [[TOKEN]])
forall (f :: * -> *) a. Applicative f => a -> f a
pure [String] -> [[TOKEN]]
normalCPP
       ()
_ | (Config -> Bool
eraseCComments Config
cfg Bool -> Bool -> Bool
&& Config -> Bool
spliceLongLines Config
cfg
            Bool -> Bool -> Bool
&& (Bool -> Bool
not (Config -> Bool
replaceTrigraphs Config
cfg))) ->
           ([String] -> [[TOKEN]]) -> m ([String] -> [[TOKEN]])
forall (f :: * -> *) a. Applicative f => a -> f a
pure [String] -> [[TOKEN]]
haskellCPP
       ()
_ | Bool -> Bool
not (((Config -> Bool) -> Bool) -> [Config -> Bool] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any ((Config -> Bool) -> Config -> Bool
forall a b. (a -> b) -> a -> b
$ Config
cfg) [ Config -> Bool
eraseCComments
                            , Config -> Bool
spliceLongLines
                            , Config -> Bool
replaceTrigraphs ]) -> ([String] -> [[TOKEN]]) -> m ([String] -> [[TOKEN]])
forall (f :: * -> *) a. Applicative f => a -> f a
pure [String] -> [[TOKEN]]
onlyMacrosCPP
       ()
_ | Bool
otherwise -> ([String] -> [[TOKEN]]) -> m ([String] -> [[TOKEN]])
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Config -> [String] -> [[TOKEN]]
genericConfig Config
cfg)

-- * HPP configurations

-- | Standard CPP settings for processing C files.
normalCPP :: [String] -> [[TOKEN]]
normalCPP :: [String] -> [[TOKEN]]
normalCPP = (String -> [TOKEN]) -> [String] -> [[TOKEN]]
forall a b. (a -> b) -> [a] -> [b]
map (([TOKEN] -> [TOKEN] -> [TOKEN]
forall a. [a] -> [a] -> [a]
++ [String -> TOKEN
forall s. s -> Token s
Other String
"\n"]) ([TOKEN] -> [TOKEN]) -> (String -> [TOKEN]) -> String -> [TOKEN]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [TOKEN]
forall s. Stringy s => s -> [Token s]
tokenize)
          ([String] -> [[TOKEN]])
-> ([String] -> [String]) -> [String] -> [[TOKEN]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> [String]
forall s. Stringy s => [s] -> [s]
lineSplicing
          ([String] -> [String])
-> ([String] -> [String]) -> [String] -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> [String]
forall s. Stringy s => [s] -> [s]
cCommentAndTrigraph
{-# INLINABLE normalCPP #-}

-- | For Haskell we do not want trigraph replacement.
haskellCPP :: [String] -> [[TOKEN]]
haskellCPP :: [String] -> [[TOKEN]]
haskellCPP = (String -> [TOKEN]) -> [String] -> [[TOKEN]]
forall a b. (a -> b) -> [a] -> [b]
map (([TOKEN] -> [TOKEN] -> [TOKEN]
forall a. [a] -> [a] -> [a]
++[String -> TOKEN
forall s. s -> Token s
Other String
"\n"]) ([TOKEN] -> [TOKEN]) -> (String -> [TOKEN]) -> String -> [TOKEN]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [TOKEN]
forall s. Stringy s => s -> [Token s]
tokenize)
           ([String] -> [[TOKEN]])
-> ([String] -> [String]) -> [String] -> [[TOKEN]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> [String]
forall s. Stringy s => [s] -> [s]
lineSplicing
           ([String] -> [String])
-> ([String] -> [String]) -> [String] -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> [String]
forall s. Stringy s => [s] -> [s]
cCommentRemoval
{-# INLINABLE haskellCPP #-}

-- | No C-style comment removal; no line splicing; no trigraph
-- replacement. This variant only supports macros and conditionals.
onlyMacrosCPP :: [String] -> [[TOKEN]]
onlyMacrosCPP :: [String] -> [[TOKEN]]
onlyMacrosCPP = (String -> [TOKEN]) -> [String] -> [[TOKEN]]
forall a b. (a -> b) -> [a] -> [b]
map (([TOKEN] -> [TOKEN] -> [TOKEN]
forall a. [a] -> [a] -> [a]
++[String -> TOKEN
forall s. s -> Token s
Other String
"\n"]) ([TOKEN] -> [TOKEN]) -> (String -> [TOKEN]) -> String -> [TOKEN]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [TOKEN]
forall s. Stringy s => s -> [Token s]
tokenize)
{-# INLINABLE onlyMacrosCPP #-}

-- | If we don't have a predefined processor, we build one based on a
-- 'Config' value.
genericConfig :: Config -> [String] -> [[TOKEN]]
genericConfig :: Config -> [String] -> [[TOKEN]]
genericConfig Config
cfg = (String -> [TOKEN]) -> [String] -> [[TOKEN]]
forall a b. (a -> b) -> [a] -> [b]
map (([TOKEN] -> [TOKEN] -> [TOKEN]
forall a. [a] -> [a] -> [a]
++ [String -> TOKEN
forall s. s -> Token s
Other String
"\n"]) ([TOKEN] -> [TOKEN]) -> (String -> [TOKEN]) -> String -> [TOKEN]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [TOKEN]
forall s. Stringy s => s -> [Token s]
tokenize)
                  ([String] -> [[TOKEN]])
-> ([String] -> [String]) -> [String] -> [[TOKEN]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (if Config -> Bool
spliceLongLines Config
cfg then [String] -> [String]
forall s. Stringy s => [s] -> [s]
lineSplicing else [String] -> [String]
forall a. a -> a
id)
                  ([String] -> [String])
-> ([String] -> [String]) -> [String] -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (if Config -> Bool
eraseCComments Config
cfg then [String] -> [String]
forall s. Stringy s => [s] -> [s]
cCommentRemoval else [String] -> [String]
forall a. a -> a
id)
                  ([String] -> [String])
-> ([String] -> [String]) -> [String] -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (if Config -> Bool
replaceTrigraphs Config
cfg then (String -> String) -> [String] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map String -> String
forall s. Stringy s => s -> s
trigraphReplacement else [String] -> [String]
forall a. a -> a
id)