{- | Module : Data.GraphViz.PreProcessing Description : Pre-process imported Dot code. Copyright : (c) Ivan Lazar Miljenovic License : 3-Clause BSD-style Maintainer : Ivan.Miljenovic@gmail.com \"Real life\" Dot code contains various items that are not directly parseable by this library. This module defines the 'preProcess' function to remove these components, which include: * Comments (both @\/* ... *\/@ style and @\/\/ ... @ style); * Pre-processor lines (lines starting with a @#@); * Split lines (by inserting a @\\@ the rest of that \"line\" is continued on the next line). * Strings concatenated together using @\"...\" + \"...\"@; these are concatenated into one big string. -} module Data.GraphViz.PreProcessing(preProcess) where import Data.GraphViz.Parsing import Data.GraphViz.Exception(GraphvizException(NotDotCode), throw) import qualified Data.Text.Lazy as T import Data.Text.Lazy(Text) import qualified Data.Text.Lazy.Builder as B import Data.Text.Lazy.Builder(Builder) import Data.Monoid(Monoid(..), mconcat) -- ----------------------------------------------------------------------------- -- Filtering out unwanted Dot items such as comments -- | Remove unparseable features of Dot, such as comments and -- multi-line strings (which are converted to single-line strings). preProcess :: Text -> Text preProcess t = case fst $ runParser parseOutUnwanted t of (Right r) -> B.toLazyText r (Left l) -> throw (NotDotCode l) -- snd should be null -- | Parse out comments and make quoted strings spread over multiple -- lines only over a single line. Should parse the /entire/ input -- 'Text'. parseOutUnwanted :: Parse Builder parseOutUnwanted = mconcat <$> many getNext where getNext = parseOK `onFail` parseConcatStrings `onFail` parseHTML `onFail` parseUnwanted `onFail` fmap B.singleton next parseOK = B.fromLazyText <$> many1Satisfy (`notElem` ['\n', '\r', '\\', '/', '"', '<']) -- | Parses an unwanted part of the Dot code (comments and -- pre-processor lines; also un-splits lines). parseUnwanted :: (Monoid m) => Parse m parseUnwanted = oneOf [ parseLineComment , parseMultiLineComment , parsePreProcessor , parseSplitLine ] -- | Remove pre-processor lines (that is, those that start with a -- @#@). Will consume the newline from the beginning of the -- previous line, but will leave the one from the pre-processor line -- there (so in the end it just removes the line). parsePreProcessor :: (Monoid m) => Parse m parsePreProcessor = newline *> character '#' *> consumeLine *> pure mempty -- | Parse @//@-style comments. parseLineComment :: (Monoid m) => Parse m parseLineComment = string "//" -- Note: do /not/ consume the newlines, as they're -- needed in case the next line is a pre-processor -- line. *> consumeLine *> pure mempty -- | Parse @/* ... */@-style comments. parseMultiLineComment :: (Monoid m) => Parse m parseMultiLineComment = bracket start end (many inner) *> pure mempty where start = string "/*" end = string "*/" inner = (many1Satisfy ('*' /=) *> pure ()) `onFail` (character '*' *> satisfy ('/' /=) *> inner) parseConcatStrings :: Parse Builder parseConcatStrings = wrapQuotes . mconcat <$> sepBy1 parseString parseConcat where qParse = bracket (character '"') (commit $ character '"') parseString = qParse (mconcat <$> many parseInner) parseInner = (string "\\\"" *> pure (B.fromLazyText $ T.pack "\\\"")) `onFail` -- Need to parse an explicit `\', in case it ends the -- string (and thus the next step would get parsed by the -- previous option). (string "\\\\" *> pure (B.fromLazyText $ T.pack "\\\\")) `onFail` parseSplitLine -- in case there's a split mid-quote `onFail` fmap B.singleton (satisfy (quoteChar /=)) parseConcat = parseSep *> character '+' *> parseSep parseSep = many $ whitespace1 `onFail` parseUnwanted wrapQuotes str = qc `mappend` str `mappend` qc qc = B.singleton '"' -- | Lines can be split with a @\\@ at the end of the line. parseSplitLine :: (Monoid m) => Parse m parseSplitLine = character '\\' *> newline *> pure mempty parseHTML :: Parse Builder parseHTML = fmap (addAngled . mconcat) . parseAngled $ many inner where inner = parseHTML `onFail` (B.fromLazyText <$> many1Satisfy (\c -> c /= open && c /= close)) addAngled str = B.singleton open `mappend` str `mappend` B.singleton close open = '<' close = '>'