{-# LANGUAGE ViewPatterns #-} {-| Module : Language.C.Preprocessor.Remover.Internal.AddPadding Description : Padding of the Cpp output Copyright : (c) Carlo Nucera, 2016 License : BSD3 Maintainer : meditans@gmail.com Stability : experimental Portability : POSIX After cpp preprocessing, the file is left by the compilation pipeline in the output format of the @cpp@ program, described in <https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html this section> of the C Preprocessor manual. By default, the @cpp@ program inserts blank lines to preserve line numbering, but only if the number of blank lines to be created is not too high (<6 or so). Otherwise a linemarker is created, to reduce the size of the generated file, of the form: @ # linenum filename flags @ As cpp doesn't have an option to output only blank lines and keeping the line numbering, the following functions parse a file with linemarkers separating it in `CppOutputComponents` (the source chunks between the linemarkers), and pad them with the appropriate amount of blank lines. -} module Language.C.Preprocessor.Remover.Internal.AddPadding ( -- * Entry point for padding addPadding -- * Data Types , LineMarker (..) , isLineMarker , parseLineMarker , CppOutputComponent (..) -- * Stages of padding , parseCppOutputComponents , discardUnusefulComponents , reconstructSource ) where import Data.Char (isDigit) import Data.List (isPrefixOf, isSuffixOf) import Data.List.Extra (repeatedly) -------------------------------------------------------------------------------- -- Entry point for padding -------------------------------------------------------------------------------- -- | Substitutes the lineMarker in the content of a file with the appropriate -- blank line padding. addPadding :: FilePath -> String -> String addPadding fp = unlines . reconstructSource . discardUnusefulComponents fp . parseCppOutputComponents . lines -------------------------------------------------------------------------------- -- Data Types -------------------------------------------------------------------------------- -- | A 'LineMarker' follows the structure described -- <https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html here>. We only -- retain the linenumber and the file the line is referring to. Note that the -- filename is surrounded by quotation marks in the cpp output, but not in this -- representation. data LineMarker = LineMarker { beginsAtLine :: Int , filePath :: FilePath } deriving (Show) -- | -- >>> isLineMarker "# 42 \"/path/to/file\"" -- True isLineMarker :: String -> Bool isLineMarker (words -> hash:number:fp:_) = hash == "#" && all isDigit number && isPrefixOf "\"" fp && isSuffixOf "\"" fp isLineMarker _ = False -- | -- >>> parseLineMarker "# 42 \"/path/to/file\"" -- LineMarker {beginsAtLine = 42, filePath = "/path/to/file"} parseLineMarker :: String -> LineMarker parseLineMarker s = LineMarker (read $ words s !! 1) (unquote $ words s !! 2) where unquote = tail . init -- | A 'CppOutputComponent' is constituted by a 'LineMarker' and the block of -- code till the next 'LineMarker'. data CppOutputComponent = CppOutputComponent { lineMarker :: LineMarker , sourceBlock :: [String] } deriving (Show) -------------------------------------------------------------------------------- -- Stages of padding -------------------------------------------------------------------------------- -- | Given the lines of a file, parses the CppOutputComponents. Note that a file -- that doesn't need cpp preprocessing doesn't have any 'LineMarker'. In that -- case a dummy component is created, with an empty path. parseCppOutputComponents :: [String] -> [CppOutputComponent] parseCppOutputComponents ss | any isLineMarker ss = flip repeatedly ss $ \ls -> let (content, rest) = span (not . isLineMarker) (tail ls) cppComponent = CppOutputComponent (parseLineMarker $ head ls) content in (cppComponent, rest) | otherwise = [CppOutputComponent (LineMarker 1 "") ss] -- | Discard the parts of cpp output which correspond to cpp include files. If -- there's a unique component then we return that one, otherwise we return all -- the components relative to our file other than the first (which has no real -- meaning). discardUnusefulComponents :: FilePath -> [CppOutputComponent] -> [CppOutputComponent] discardUnusefulComponents _ [] = error "The function discardUnusefulComponents expects a non-empty list of components" discardUnusefulComponents _ [c] = [c] discardUnusefulComponents fp cs = filter ((== fp) . filePath . lineMarker) cs -- | Adds padding to the source blocks to mantain the correct line numbers of -- the source code. reconstructSource :: [CppOutputComponent] -> [String] reconstructSource = sourceBlock . foldr1 combine where combine (CppOutputComponent lm1 c1) (CppOutputComponent lm2 c2) = let padding = (beginsAtLine lm2 - beginsAtLine lm1 - length c1) in CppOutputComponent lm1 (c1 ++ replicate padding "" ++ c2)