{-# LANGUAGE CPP #-} module Language.Dot.Parser ( parseDot #ifdef TEST , parsePort , parseCompass , parseAttribute , parseId #endif ) where import Control.Applicative ((<$>), (<*>), (<*), (*>)) import Control.Monad (when) import Data.Char (digitToInt, toLower) import Data.List (foldl') import Data.Maybe (fromJust, fromMaybe, isJust) import Numeric (readFloat) import Text.Parsec import Text.Parsec.Language import Text.Parsec.String import Text.Parsec.Token import Language.Dot.Syntax -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- parseDot :: String -- ^ origin of the data, e.g., the name of a file -> String -- ^ DOT source code -> Either ParseError Graph parseDot origin = parse (whiteSpace' >> parseGraph) origin . preprocess -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- preprocess :: String -> String preprocess = unlines . map commentPoundLines . lines where commentPoundLines [] = [] commentPoundLines line@(c:_) = if c == '#' then "// " ++ line else line -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- parseGraph :: Parser Graph parseGraph = ( Graph <$> parseGraphStrictness <*> parseGraphDirectedness <*> optionMaybe parseId <*> parseStatementList ) "graph" parseGraphStrictness :: Parser GraphStrictness parseGraphStrictness = ((reserved' "strict" >> return StrictGraph) <|> return UnstrictGraph) "graph strictness" parseGraphDirectedness :: Parser GraphDirectedness parseGraphDirectedness = ( (reserved' "graph" >> return UndirectedGraph) <|> (reserved' "digraph" >> return DirectedGraph) ) "graph directedness" -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- parseStatementList :: Parser [Statement] parseStatementList = braces' (parseStatement `endBy` optional semi') "statement list" parseStatement :: Parser Statement parseStatement = ( try parseEdgeStatement <|> try parseAttributeStatement <|> try parseAssignmentStatement <|> try parseSubgraphStatement <|> parseNodeStatement ) "statement" -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- parseNodeStatement :: Parser Statement parseNodeStatement = ( NodeStatement <$> parseNodeId <*> parseAttributeList ) "node statement" -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- parseEdgeStatement :: Parser Statement parseEdgeStatement = ( EdgeStatement <$> parseEntityList <*> parseAttributeList ) "edge statement" -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- parseAttributeStatement :: Parser Statement parseAttributeStatement = ( AttributeStatement <$> parseAttributeStatementType <*> parseAttributeList ) "attribute statement" parseAttributeStatementType :: Parser AttributeStatementType parseAttributeStatementType = ( (reserved' "graph" >> return GraphAttributeStatement) <|> (reserved' "node" >> return NodeAttributeStatement) <|> (reserved' "edge" >> return EdgeAttributeStatement) ) "attribute statement type" -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- parseAssignmentStatement :: Parser Statement parseAssignmentStatement = ( AssignmentStatement <$> parseId <*> (reservedOp' "=" *> parseId) ) "assignment statement" -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- parseSubgraphStatement :: Parser Statement parseSubgraphStatement = ( SubgraphStatement <$> parseSubgraph ) "subgraph statement" parseSubgraph :: Parser Subgraph parseSubgraph = ( try parseNewSubgraph <|> parseSubgraphRef ) "subgraph" parseNewSubgraph :: Parser Subgraph parseNewSubgraph = ( NewSubgraph <$> (optional (reserved' "subgraph") *> optionMaybe parseId) <*> parseStatementList ) "new subgraph" parseSubgraphRef :: Parser Subgraph parseSubgraphRef = ( SubgraphRef <$> (reserved' "subgraph" *> parseId) ) "subgraph ref" -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- parseEntityList :: Parser [Entity] parseEntityList = ( (:) <$> parseEntity True <*> many1 (parseEntity False) ) "entity list" parseEntity :: Bool -> Parser Entity parseEntity first = ( try (parseENodeId first) <|> parseESubgraph first ) "entity" parseENodeId :: Bool -> Parser Entity parseENodeId first = ( ENodeId <$> (if first then return NoEdge else parseEdgeType) <*> parseNodeId ) "entity node id" parseESubgraph :: Bool -> Parser Entity parseESubgraph first = ( ESubgraph <$> (if first then return NoEdge else parseEdgeType) <*> parseSubgraph ) "entity subgraph" parseEdgeType :: Parser EdgeType parseEdgeType = ( try (reservedOp' "->" >> return DirectedEdge) <|> (reservedOp' "--" >> return UndirectedEdge) ) "edge operator" -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- parseNodeId :: Parser NodeId parseNodeId = ( NodeId <$> parseId <*> optionMaybe parsePort ) "node id" -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- parsePort :: Parser Port parsePort = ( try parsePortC <|> parsePortI ) "port" parsePortC :: Parser Port parsePortC = ( PortC <$> (colon' *> parseCompass) ) "port (compass variant)" parsePortI :: Parser Port parsePortI = ( PortI <$> (colon' *> parseId) <*> optionMaybe (colon' *> parseCompass) ) "port (id variant)" -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- parseCompass :: Parser Compass parseCompass = (fmap convert identifier' >>= maybe err return) "compass" where err = parserFail "invalid compass value" convert = flip lookup table . stringToLower where table = [ ("n", CompassN), ("e", CompassE), ("s", CompassS), ("w", CompassW) , ("ne", CompassNE), ("nw", CompassNW), ("se", CompassSE), ("sw", CompassSW) ] -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- parseAttributeList :: Parser [Attribute] parseAttributeList = (brackets' (parseAttribute `sepBy` optional comma') <|> return []) "attribute list" parseAttribute :: Parser Attribute parseAttribute = ( do id0 <- parseId id1 <- optionMaybe (reservedOp' "=" >> parseId) return $ maybe (AttributeSetTrue id0) (AttributeSetValue id0) id1 ) "attribute" -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- parseId :: Parser Id parseId = ( try parseNameId <|> try parseStringId <|> try parseFloatId <|> try parseIntegerId <|> parseXmlId ) "id" parseNameId :: Parser Id parseNameId = ( NameId <$> identifier' ) "name" parseStringId :: Parser Id parseStringId = ( StringId <$> lexeme' (char '"' *> manyTill stringChar (char '"')) ) "string literal" where stringChar = (try (string "\\\"" >> return '"') <|> noneOf "\"") "string character" -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- | DOT allows floating point numbers having no whole part like @.123@, but -- Parsec 'float' does not accept them. parseFloatId :: Parser Id parseFloatId = lexeme' ( do s <- parseSign l <- fmap (fromMaybe 0) (optionMaybe parseNatural) _ <- char '.' r <- many1 digit maybe err return (make s (show l ++ "." ++ r)) ) "float" where err = parserFail "invalid float value" make s f = case readFloat f of [(v,"")] -> (Just . FloatId . s) v _ -> Nothing parseSign :: (Num a) => Parser (a -> a) parseSign = ( (char '-' >> return negate) <|> (char '+' >> return id) <|> return id ) "sign" -- | Non-'lexeme' variant of 'natural' for parsing the natural part of a float. parseNatural :: Parser Integer parseNatural = ( (char '0' >> return 0) <|> (convert <$> many1 digit) ) "natural" where convert = foldl' (\acc d -> 10 * acc + fromIntegral (digitToInt d)) 0 -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- parseIntegerId :: Parser Id parseIntegerId = ( IntegerId <$> integer' ) "integer" -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- parseXmlId :: Parser Id parseXmlId = ( XmlId <$> angles' parseXml ) "XML id" parseXml :: Parser Xml parseXml = ( try parseXmlEmptyTag <|> try parseXmlTag <|> parseXmlText ) "XML" parseXmlEmptyTag :: Parser Xml parseXmlEmptyTag = ( XmlEmptyTag <$> (char '<' *> parseXmlName) <*> (parseXmlAttributes <* (char '/' >> char '>')) ) "XML empty tag" parseXmlTag :: Parser Xml parseXmlTag = ( do (name, attributes) <- parseXmlTagOpen elements <- manyTill parseXml (lookAhead (try (parseXmlTagClose (Just name)))) parseXmlTagClose (Just name) return $ XmlTag name attributes elements ) "XML tag" parseXmlTagOpen :: Parser (XmlName, [XmlAttribute]) parseXmlTagOpen = ( (,) <$> (char '<' *> parseXmlName) <*> (parseXmlAttributes <* char '>') ) "XML opening tag" parseXmlTagClose :: Maybe XmlName -> Parser () parseXmlTagClose mn0 = ( do _ <- char '<' _ <- char '/' n1 <- parseXmlName _ <- char '>' when (isJust mn0 && fromJust mn0 /= n1) parserZero ) "XML closing tag " ++ "(" ++ which ++ ")" where which = case mn0 of Just (XmlName n) -> "for " ++ show n Nothing -> "any" parseXmlText :: Parser Xml parseXmlText = ( XmlText <$> anyChar `manyTill` lookAhead ( try (parseXmlEmptyTag >> return ()) <|> try (parseXmlTag >> return ()) <|> parseXmlTagClose Nothing ) ) "XML text" parseXmlAttributes :: Parser [XmlAttribute] parseXmlAttributes = many parseXmlAttribute "XML attribute list" parseXmlAttribute :: Parser XmlAttribute parseXmlAttribute = ( XmlAttribute <$> (parseXmlName <* reservedOp' "=") <*> parseXmlAttributeValue ) "XML attribute" parseXmlAttributeValue :: Parser XmlAttributeValue parseXmlAttributeValue = ( XmlAttributeValue <$> stringLiteral' ) "XML attribute value" parseXmlName :: Parser XmlName parseXmlName = ( XmlName <$> ((:) <$> c0 <*> (many c1 <* whiteSpace')) ) "XML name" where c0 = letter <|> cs c1 = alphaNum <|> cs cs = oneOf "-.:_" -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- angles' :: Parser a -> Parser a braces' :: Parser a -> Parser a brackets' :: Parser a -> Parser a colon' :: Parser String comma' :: Parser String identifier' :: Parser String integer' :: Parser Integer lexeme' :: Parser a -> Parser a reserved' :: String -> Parser () reservedOp' :: String -> Parser () semi' :: Parser String stringLiteral' :: Parser String whiteSpace' :: Parser () angles' = angles lexer braces' = braces lexer brackets' = brackets lexer colon' = colon lexer comma' = comma lexer identifier' = identifier lexer integer' = integer lexer lexeme' = lexeme lexer reserved' = reserved lexer reservedOp' = reservedOp lexer semi' = semi lexer stringLiteral' = stringLiteral lexer whiteSpace' = whiteSpace lexer lexer :: TokenParser () lexer = makeTokenParser dotDef where dotDef = emptyDef { commentStart = "/*" , commentEnd = "*/" , commentLine = "//" , nestedComments = True , identStart = letter <|> char '_' , identLetter = alphaNum <|> char '_' , opStart = oneOf "-=" , opLetter = oneOf "" , reservedOpNames = ["->", "--", "="] , reservedNames = ["digraph", "edge", "graph", "node", "strict", "subgraph"] , caseSensitive = False } -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- stringToLower :: String -> String stringToLower = map toLower