Data/GraphViz/PreProcessing.hs

{- |
   Module      : Data.GraphViz.PreProcessing
   Description : Pre-process imported Dot code.
   Copyright   : (c) Ivan Lazar Miljenovic
   License     : 3-Clause BSD-style
   Maintainer  : Ivan.Miljenovic@gmail.com

   "Real life" Dot code contains various items that are not directly
   parseable by this library.  This module defines the 'preProcess'
   function to remove these components, which include:

     * Comments (both @\/* ... *\/@ style and @\/\/ ... @ style);

     * Pre-processor lines (lines starting with a @#@);

     * Split lines (by inserting a @\\@ the rest of that \"line\" is
       continued on the next line).

     * 'String's concatenated together using @\"...\" + \"...\"@;
       these are concatenated into one big 'String'.
-}
module Data.GraphViz.PreProcessing(preProcess) where

import Data.GraphViz.Parsing

import Control.Monad(liftM)

-- -----------------------------------------------------------------------------
-- Filtering out unwanted Dot items such as comments

-- | Remove unparseable features of Dot, such as comments and
--   multi-line strings (which are converted to single-line strings).
preProcess :: String -> String
preProcess = runParser' parseOutUnwanted
             -- snd should be null

-- | Parse out comments and make quoted strings spread over multiple
--   lines only over a single line.  Should parse the /entire/ input
--   'String'.
parseOutUnwanted :: Parse String
parseOutUnwanted = liftM concat (many getNext)
    where
      getNext = parseConcatStrings
                `onFail`
                parseHTML
                `onFail`
                (parseUnwanted >> return [])
                `onFail`
                liftM return next

-- | Parses an unwanted part of the Dot code (comments and
--   pre-processor lines; also un-splits lines).
parseUnwanted :: Parse ()
parseUnwanted = oneOf [ parseLineComment
                      , parseMultiLineComment
                      , parsePreProcessor
                      , parseSplitLine
                      ]
                >> return ()

-- | Remove pre-processor lines (that is, those that start with a
--   @#@).  Will consume the newline from the beginning of the
--   previous line, but will leave the one from the pre-processor line
--   there (so in the end it just removes the line).
parsePreProcessor :: Parse String
parsePreProcessor = do newline
                       character '#'
                       consumeLine

-- | Parse @//@-style comments.
parseLineComment :: Parse String
parseLineComment = string "//"
                   -- Note: do /not/ consume the newlines, as they're
                   -- needed in case the next line is a pre-processor
                   -- line.
                   >> consumeLine

-- | Parse @/* ... */@-style comments.
parseMultiLineComment :: Parse String
parseMultiLineComment = bracket start end (liftM concat $ many inner)
    where
      start = string "/*"
      end = string "*/"
      inner = many1 (satisfy ((/=) '*'))
              `onFail`
              do ast <- character '*'
                 n <- satisfy ((/=) '/')
                 liftM ((:) ast . (:) n) inner

parseConcatStrings :: Parse String
parseConcatStrings = liftM (wrapQuotes . concat)
                     $ sepBy1 parseString parseConcat
  where
    parseString = quotedParse (liftM concat $ many parseInner)
    parseInner = string "\\\""
                 `onFail`
                 parseSplitLine -- in case there's a split mid-quote
                 `onFail`
                 liftM return (satisfy ((/=) quoteChar))
    parseConcat = parseSep >> character '+' >> parseSep
    parseSep = many $ allWhitespace `onFail` parseUnwanted
    wrapQuotes str = quoteChar : str ++ [quoteChar]


-- | Lines can be split with a @\\@ at the end of the line.
parseSplitLine :: Parse String
parseSplitLine = character '\\' >> newline >> return ""

parseHTML :: Parse String
parseHTML = liftM (addQuotes . concat)
            . parseAngled $ many inner
  where
    inner = parseHTML
            `onFail`
            many1 (satisfy (\c -> c /= open && c /= close))
    addQuotes str = open : str ++ [close]
    open = '<'
    close = '>'