{-# LANGUAGE FlexibleInstances, OverloadedStrings #-} {- | Module : Data.GraphViz.Parsing Description : Helper functions for Parsing. Copyright : (c) Matthew Sackman, Ivan Lazar Miljenovic License : 3-Clause BSD-style Maintainer : Ivan.Miljenovic@gmail.com This module defines simple helper functions for use with "Text.ParserCombinators.Poly.Lazy". Note that the 'ParseDot' instances for 'Bool', etc. match those specified for use with Graphviz (e.g. non-zero integers are equivalent to 'True'). You should not be using this module; rather, it is here for informative/documentative reasons. If you want to parse a @'Data.GraphViz.Types.DotRepr'@, you should use @'Data.GraphViz.Types.parseDotGraph'@ rather than its 'ParseDot' instance. -} module Data.GraphViz.Parsing ( -- * Re-exporting pertinent parts of Polyparse. module Text.ParserCombinators.Poly.StateText -- * The ParseDot class. , Parse , ParseDot(..) , parseIt , parseIt' , runParser , runParser' , checkValidParse -- * Convenience parsing combinators. , bracket , onlyBool , quotelessString , stringBlock , numString , isNumString , isIntString , quotedString , parseEscaped , parseAndSpace , string , strings , character , parseStrictFloat , noneOf , whitespace , whitespace' , allWhitespace , allWhitespace' , wrapWhitespace , optionalQuotedString , optionalQuoted , quotedParse , orQuote , quoteChar , newline , newline' , parseComma , parseEq , tryParseList , tryParseList' , consumeLine , parseField , parseFields , parseFieldBool , parseFieldsBool , parseFieldDef , parseFieldsDef , commaSep , commaSepUnqt , commaSep' , stringRep , stringReps , stringParse , stringValue , parseAngled , parseBraced ) where import Data.GraphViz.Util import Data.GraphViz.State -- To avoid orphan instances and cyclic imports import Data.GraphViz.Attributes.ColorScheme import Data.GraphViz.Exception(GraphvizException(NotDotCode), throw) import Text.ParserCombinators.Poly.StateText hiding (bracket, empty, indent, runParser) import qualified Text.ParserCombinators.Poly.StateText as P import Data.Char( isDigit , isSpace , isLower , toLower , toUpper ) import Data.List(groupBy, sortBy) import Data.Function(on) import Data.Maybe(fromMaybe, isNothing, listToMaybe) import Data.Ratio((%)) import qualified Data.Set as Set import qualified Data.Text.Lazy as T import qualified Data.Text.Lazy.Read as T import Data.Text.Lazy(Text) import Data.Word(Word8, Word16) import Control.Arrow(first, second) import Control.Monad(liftM, liftM2, when) -- ----------------------------------------------------------------------------- -- Based off code from Text.Parse in the polyparse library -- | A @ReadS@-like type alias. type Parse a = Parser GraphvizState a runParser :: Parse a -> Text -> (Either String a, Text) runParser p t = let (r,_,t') = P.runParser p initialState t in (r,t') -- | A variant of 'runParser' where it is assumed that the provided -- parsing function consumes all of the 'Text' input (with the -- exception of whitespace at the end). runParser' :: Parse a -> Text -> a runParser' p = checkValidParse . fst . runParser p' where p' = p `discard` (allWhitespace' >> eof) class ParseDot a where parseUnqt :: Parse a parse :: Parse a parse = optionalQuoted parseUnqt parseUnqtList :: Parse [a] parseUnqtList = bracketSep (parseAndSpace $ character '[') ( wrapWhitespace parseComma `onFail` allWhitespace ) (allWhitespace' >> character ']') parseUnqt parseList :: Parse [a] parseList = quotedParse parseUnqtList -- | Parse the required value, returning also the rest of the input -- 'String' that hasn't been parsed (for debugging purposes). parseIt :: (ParseDot a) => Text -> (a, Text) parseIt = first checkValidParse . runParser parse -- | If unable to parse /Dot/ code properly, 'throw' a -- 'GraphvizException'. checkValidParse :: Either String a -> a checkValidParse (Left err) = throw (NotDotCode err) checkValidParse (Right a) = a -- | Parse the required value with the assumption that it will parse -- all of the input 'String'. parseIt' :: (ParseDot a) => Text -> a parseIt' = runParser' parse instance ParseDot Int where parseUnqt = parseInt' instance ParseDot Integer where parseUnqt = parseSigned parseInt instance ParseDot Word8 where parseUnqt = parseInt instance ParseDot Word16 where parseUnqt = parseInt instance ParseDot Double where parseUnqt = parseFloat' parseUnqtList = sepBy1 parseUnqt (character ':') parseList = quotedParse parseUnqtList `onFail` liftM return parse instance ParseDot Bool where parseUnqt = onlyBool `onFail` liftM (zero /=) parseInt' where zero :: Int zero = 0 -- | Use this when you do not want numbers to be treated as 'Bool' values. onlyBool :: Parse Bool onlyBool = oneOf [ stringRep True "true" , stringRep False "false" ] instance ParseDot Char where -- Can't be a quote character. parseUnqt = satisfy (quoteChar /=) parse = satisfy restIDString `onFail` quotedParse parseUnqt parseUnqtList = liftM T.unpack parseUnqt parseList = liftM T.unpack parse instance ParseDot Text where -- Too many problems with using this within other parsers where -- using numString or stringBlock will cause a parse failure. As -- such, this will successfully parse all un-quoted Texts. parseUnqt = quotedString parse = quotelessString `onFail` -- This will also take care of quoted versions of -- above. quotedParse quotedString instance (ParseDot a) => ParseDot [a] where parseUnqt = parseUnqtList parse = parseList -- | Parse a 'String' that doesn't need to be quoted. quotelessString :: Parse Text quotelessString = numString `onFail` stringBlock numString :: Parse Text numString = liftM tShow parseStrictFloat `onFail` liftM tShow parseInt' where tShow :: (Show a) => a -> Text tShow = T.pack . show stringBlock :: Parse Text stringBlock = do frst <- satisfy frstIDString rest <- manySatisfy restIDString return $ frst `T.cons` rest -- | Used when quotes are explicitly required; quotedString :: Parse Text quotedString = parseEscaped True [] [] parseSigned :: (Num a) => Parse a -> Parse a parseSigned p = (character '-' >> liftM negate p) `onFail` p parseInt :: (Integral a) => Parse a parseInt = do cs <- many1Satisfy isDigit case T.decimal cs of Right (n,"") -> return n Right (_,txt) -> fail $ "Trailing digits not parsed as Integral: " ++ T.unpack txt Left err -> fail $ "Could not read Integral: " ++ err `adjustErr` ("Expected one or more digits\n\t"++) parseInt' :: Parse Int parseInt' = parseSigned parseInt -- | Parse a floating point number that actually contains decimals. parseStrictFloat :: Parse Double parseStrictFloat = parseSigned parseFloat parseFloat :: (RealFrac a) => Parse a parseFloat = do ds <- manySatisfy isDigit frac <- optional $ do character '.' manySatisfy isDigit when (T.null ds && noDec frac) (fail "No actual digits in floating point number!") expn <- optional parseExp when (isNothing frac && isNothing expn) (fail "This is an integer, not a floating point number!") let frac' = fromMaybe "" frac expn' = fromMaybe 0 expn ( return . fromRational . (* (10^^(expn' - fromIntegral (T.length frac')))) . (%1) . runParser' parseInt) (ds `T.append` frac') `onFail` fail "Expected a floating point number" where parseExp = do character 'e' ((character '+' >> parseInt) `onFail` parseInt') noDec = maybe True T.null parseFloat' :: Parse Double parseFloat' = parseSigned ( parseFloat `onFail` liftM fI parseInt ) where fI :: Integer -> Double fI = fromIntegral -- ----------------------------------------------------------------------------- -- | Parse a bracketed item, discarding the brackets. -- -- The definition of @bracket@ defined in Polyparse uses -- 'adjustErrBad' and thus doesn't allow backtracking and trying the -- next possible parser. This is a version of @bracket@ that does. bracket :: Parse bra -> Parse ket -> Parse a -> Parse a bracket open close pa = do open `adjustErr` ("Missing opening bracket:\n\t"++) pa `discard` (close `adjustErr` ("Missing closing bracket:\n\t"++)) parseAndSpace :: Parse a -> Parse a parseAndSpace p = p `discard` allWhitespace' string :: String -> Parse () string = mapM_ character stringRep :: a -> String -> Parse a stringRep v = stringReps v . return stringReps :: a -> [String] -> Parse a stringReps v ss = oneOf (map string ss) >> return v stringParse :: [(String, Parse a)] -> Parse a stringParse = toPM . sortBy (flip compare `on` fst) where toPM = oneOf . map mkPM . groupBy ((==) `on` (listToMaybe . fst)) mkPM [("",p)] = p mkPM [(str,p)] = string str >> p mkPM kv = character (head . fst $ head kv) >> toPM (map (first tail) kv) stringValue :: [(String, a)] -> Parse a stringValue = stringParse . map (second return) strings :: [String] -> Parse () strings = oneOf . map string character :: Char -> Parse Char character c = satisfy parseC `adjustErr` (const $ "Not the expected character: " ++ [c]) where parseC c' = c' == c || c == flipCase c' flipCase c' = if isLower c' then toUpper c' else toLower c' noneOf :: [Char] -> Parse Char noneOf t = satisfy (\x -> all (/= x) t) whitespace :: Parse () whitespace = many1Satisfy isSpace >> return () whitespace' :: Parse () whitespace' = manySatisfy isSpace >> return () allWhitespace :: Parse () allWhitespace = (whitespace `onFail` newline) >> allWhitespace' allWhitespace' :: Parse () allWhitespace' = newline' >> whitespace' -- | Parse and discard optional whitespace. wrapWhitespace :: Parse a -> Parse a wrapWhitespace = bracket allWhitespace' allWhitespace' optionalQuotedString :: String -> Parse () optionalQuotedString = optionalQuoted . string optionalQuoted :: Parse a -> Parse a optionalQuoted p = quotedParse p `onFail` p quotedParse :: Parse a -> Parse a quotedParse = bracket parseQuote parseQuote parseQuote :: Parse Char parseQuote = character quoteChar orQuote :: Parse Char -> Parse Char orQuote p = stringRep quoteChar "\\\"" `onFail` p quoteChar :: Char quoteChar = '"' -- | Parse a 'String' where the provided 'Char's (as well as @\"@ and -- @\\@) are escaped and the second list of 'Char's are those that -- are not permitted. Note: does not parse surrounding quotes. The -- 'Bool' value indicates whether empty 'String's are allowed or -- not. parseEscaped :: Bool -> [Char] -> [Char] -> Parse Text parseEscaped empt cs bnd = liftM T.pack . lots $ qPrs `onFail` oth where lots = if empt then many else many1 cs' = quoteChar : slash : cs csSet = Set.fromList cs' bndSet = Set.fromList bnd `Set.union` csSet slash = '\\' -- Have to allow standard slashes qPrs = do character slash mE <- optional $ oneOf (map character cs') return $ fromMaybe slash mE oth = satisfy (`Set.notMember` bndSet) newline :: Parse () newline = strings ["\r\n", "\n", "\r"] -- | Consume all whitespace and newlines until a line with -- non-whitespace is reached. The whitespace on that line is -- not consumed. newline' :: Parse () newline' = many (whitespace' >> newline) >> return () -- | Parses and returns all characters up till the end of the line, -- but does not touch the newline characters. consumeLine :: Parse Text consumeLine = manySatisfy (`notElem` ['\n','\r']) parseEq :: Parse () parseEq = wrapWhitespace (character '=') >> return () parseField :: (ParseDot a) => (a -> b) -> String -> [(String, Parse b)] parseField c fld = [(fld, parseEq >> liftM c parse)] parseFields :: (ParseDot a) => (a -> b) -> [String] -> [(String, Parse b)] parseFields c = concatMap (parseField c) parseFieldBool :: (Bool -> b) -> String -> [(String, Parse b)] parseFieldBool = flip parseFieldDef True parseFieldsBool :: (Bool -> b) -> [String] -> [(String, Parse b)] parseFieldsBool c = concatMap (parseFieldBool c) -- | For 'Bool'-like data structures where the presence of the field -- name without a value implies a default value. parseFieldDef :: (ParseDot a) => (a -> b) -> a -> String -> [(String, Parse b)] parseFieldDef c d fld = [(fld, p)] where p = (parseEq >> liftM c parse) `onFail` do nxt <- optional $ satisfy restIDString bool (fail "Not actually the field you were after") (return $ c d) (isNothing nxt) parseFieldsDef :: (ParseDot a) => (a -> b) -> a -> [String] -> [(String, Parse b)] parseFieldsDef c d = concatMap (parseFieldDef c d) commaSep :: (ParseDot a, ParseDot b) => Parse (a, b) commaSep = commaSep' parse parse commaSepUnqt :: (ParseDot a, ParseDot b) => Parse (a, b) commaSepUnqt = commaSep' parseUnqt parseUnqt commaSep' :: Parse a -> Parse b -> Parse (a,b) commaSep' pa pb = do a <- pa whitespace' parseComma whitespace' b <- pb return (a,b) parseComma :: Parse () parseComma = character ',' >> return () tryParseList :: (ParseDot a) => Parse [a] tryParseList = tryParseList' parse tryParseList' :: Parse [a] -> Parse [a] tryParseList' = liftM (fromMaybe []) . optional parseAngled :: Parse a -> Parse a parseAngled = bracket (character '<') (character '>') parseBraced :: Parse a -> Parse a parseBraced = bracket (character '{') (character '}') -- ----------------------------------------------------------------------------- -- These instances are defined here to avoid cyclic imports and orphan instances instance ParseDot ColorScheme where parseUnqt = do cs <- stringRep X11 "X11" `onFail` liftM Brewer parseUnqt setColorScheme cs return cs instance ParseDot BrewerScheme where parseUnqt = liftM2 BScheme parseUnqt parseUnqt instance ParseDot BrewerName where -- The order is different from above to make sure longer names are -- parsed first. parseUnqt = stringValue [ ("accent", Accent) , ("blues", Blues) , ("brbg", Brbg) , ("bugn", Bugn) , ("bupu", Bupu) , ("dark2", Dark2) , ("gnbu", Gnbu) , ("greens", Greens) , ("greys", Greys) , ("oranges", Oranges) , ("orrd", Orrd) , ("paired", Paired) , ("pastel1", Pastel1) , ("pastel2", Pastel2) , ("piyg", Piyg) , ("prgn", Prgn) , ("pubugn", Pubugn) , ("pubu", Pubu) , ("puor", Puor) , ("purd", Purd) , ("purples", Purples) , ("rdbu", Rdbu) , ("rdgy", Rdgy) , ("rdpu", Rdpu) , ("rdylbu", Rdylbu) , ("rdylgn", Rdylgn) , ("reds", Reds) , ("set1", Set1) , ("set2", Set2) , ("set3", Set3) , ("spectral", Spectral) , ("ylgnbu", Ylgnbu) , ("ylgn", Ylgn) , ("ylorbr", Ylorbr) , ("ylorrd", Ylorrd) ]