{-# LANGUAGE OverloadedStrings, DeriveGeneric, TypeSynonymInstances, FlexibleInstances #-} -- | -- Module: NetSpider.GraphML.Writer -- Description: Serialize a Snapshot graph into GraphML format. -- Maintainer: Toshio Ito -- -- This module defines tools to serialize a 'SnapshotGraph' into -- [GraphML](http://graphml.graphdrawing.org/primer/graphml-primer.html) -- format. -- -- @since 0.3.1.0 module NetSpider.GraphML.Writer ( -- * Functions writeGraphML, writeGraphMLWith, -- * Options WriteOption, defWriteOption, -- ** accessors for WriteOption woptDefaultDirected, -- * NodeID NodeID, ToNodeID(..), nodeIDByShow, -- * Attributes (re-exports) AttributeKey, AttributeValue(..), ToAttributes(..), valueFromAeson, attributesFromAeson ) where import Data.Foldable (foldl') import Data.Greskell.Graph (Property(..)) import Data.Hashable (Hashable(..)) import Data.HashMap.Strict (HashMap) import qualified Data.HashMap.Strict as HM import Data.Int (Int8, Int16, Int32, Int64) import Data.Maybe (catMaybes) import Data.Monoid ((<>), Monoid(..), mconcat) import Data.Semigroup (Semigroup(..)) import Data.Text (Text, pack, unpack) import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Builder as TLB import Data.Time (TimeZone(..)) import Data.Word (Word8, Word16, Word32, Word64) import GHC.Generics (Generic) import NetSpider.GraphML.Attribute ( AttributeKey, AttributeValue(..), ToAttributes(..), valueFromAeson, attributesFromAeson ) import NetSpider.Snapshot ( SnapshotNode, nodeId, nodeTimestamp, isOnBoundary, nodeAttributes, SnapshotLink, sourceNode, destinationNode, linkTimestamp, isDirected, linkAttributes, SnapshotGraph ) import NetSpider.Timestamp ( Timestamp(epochTime, timeZone), showTimestamp ) -- | Node ID in GraphML. type NodeID = Text -- | Type that can be converted to 'NodeID'. class ToNodeID a where toNodeID :: a -> NodeID -- | Make a 'NodeID' by calling 'show'. nodeIDByShow :: Show a => a -> NodeID nodeIDByShow = pack . show instance ToNodeID Text where toNodeID = id instance ToNodeID TL.Text where toNodeID = TL.toStrict instance ToNodeID String where toNodeID = pack instance ToNodeID Int where toNodeID = nodeIDByShow instance ToNodeID Int8 where toNodeID = nodeIDByShow instance ToNodeID Int16 where toNodeID = nodeIDByShow instance ToNodeID Int32 where toNodeID = nodeIDByShow instance ToNodeID Int64 where toNodeID = nodeIDByShow instance ToNodeID Word where toNodeID = nodeIDByShow instance ToNodeID Word8 where toNodeID = nodeIDByShow instance ToNodeID Word16 where toNodeID = nodeIDByShow instance ToNodeID Word32 where toNodeID = nodeIDByShow instance ToNodeID Integer where toNodeID = nodeIDByShow instance ToNodeID Float where toNodeID = nodeIDByShow instance ToNodeID Double where toNodeID = nodeIDByShow instance ToNodeID Bool where toNodeID True = "true" toNodeID False = "false" sbuild :: Show a => a -> TLB.Builder sbuild = TLB.fromString . show showAttributeValue :: AttributeValue -> TLB.Builder showAttributeValue v = case v of AttrBoolean False -> "false" AttrBoolean True -> "true" AttrInt i -> sbuild i AttrLong i -> sbuild i AttrFloat f -> sbuild f AttrDouble d -> sbuild d AttrString t -> encodeXML t -- | Type specifier of 'AttributeValue' data AttributeType = ATBoolean | ATInt | ATLong | ATFloat | ATDouble | ATString deriving (Show,Eq,Ord,Generic) instance Hashable AttributeType valueType :: AttributeValue -> AttributeType valueType v = case v of AttrBoolean _ -> ATBoolean AttrInt _ -> ATInt AttrLong _ -> ATLong AttrFloat _ -> ATFloat AttrDouble _ -> ATDouble AttrString _ -> ATString showAttributeType :: AttributeType -> TLB.Builder showAttributeType t = case t of ATBoolean -> "boolean" ATInt -> "int" ATLong -> "long" ATFloat -> "float" ATDouble -> "double" ATString -> "string" -- | Domain (`for` field of the key) of attribute. data AttributeDomain = DomainGraph | DomainNode | DomainEdge | DomainAll deriving (Show,Eq,Ord,Generic) instance Hashable AttributeDomain showDomain :: AttributeDomain -> TLB.Builder showDomain d = case d of DomainGraph -> "graph" DomainNode -> "node" DomainEdge -> "edge" DomainAll -> "all" -- | Meta data for a key. data KeyMeta = KeyMeta { kmName :: AttributeKey, kmType :: AttributeType, kmDomain :: AttributeDomain } deriving (Show,Eq,Ord,Generic) instance Hashable KeyMeta makeMetaValue :: AttributeDomain -> AttributeKey -> AttributeValue -> (KeyMeta, AttributeValue) makeMetaValue d k v = (meta, v) where meta = KeyMeta { kmName = k, kmType = valueType v, kmDomain = d } -- | Storage of key metadata. data KeyStore = KeyStore { ksMetas :: [KeyMeta], ksIndexFor :: HashMap KeyMeta Int } deriving (Show,Eq,Ord) emptyKeyStore :: KeyStore emptyKeyStore = KeyStore { ksMetas = [], ksIndexFor = HM.empty } addKey :: KeyMeta -> KeyStore -> KeyStore addKey new_meta ks = KeyStore { ksMetas = if HM.member new_meta $ ksIndexFor ks then ksMetas ks else new_meta : ksMetas ks, ksIndexFor = HM.insertWith (\_ old -> old) new_meta (length $ ksMetas ks) $ ksIndexFor ks } keyIndex :: KeyStore -> KeyMeta -> Maybe Int keyIndex ks km = HM.lookup km $ ksIndexFor ks showAllKeys :: TLB.Builder -> KeyStore -> TLB.Builder showAllKeys prefix ks = mconcat $ map (prefix <>) $ catMaybes $ map (showKeyMeta ks) $ reverse $ ksMetas ks showKeyMeta :: KeyStore -> KeyMeta -> Maybe TLB.Builder showKeyMeta ks km = fmap (\i -> showKeyMetaWithIndex i km) $ keyIndex ks km showAttributeID :: Int -> TLB.Builder showAttributeID index = "d" <> sbuild index showKeyMetaWithIndex :: Int -> KeyMeta -> TLB.Builder showKeyMetaWithIndex index km = " id_str <> "\" for=\"" <> domain_str <> "\" attr.name=\"" <> name_str <> "\" attr.type=\"" <> type_str <> "\"/>\n" where id_str = showAttributeID index domain_str = showDomain $ kmDomain km name_str = encodeXML $ kmName km type_str = showAttributeType $ kmType km nodeMetaKeys :: ToAttributes na => SnapshotNode n na -> [KeyMeta] nodeMetaKeys n = map fst $ nodeMetaValues n nodeMetaValues :: ToAttributes na => SnapshotNode n na -> [(KeyMeta, AttributeValue)] nodeMetaValues n = map convert $ base <> attrs where timestamp = case nodeTimestamp n of Nothing -> [] Just t -> toAttributes t base = timestamp <> [("@is_on_boundary", AttrBoolean $ isOnBoundary n)] attrs = toAttributes $ nodeAttributes n convert (k, v) = makeMetaValue DomainNode k v linkMetaKeys :: ToAttributes la => SnapshotLink n la -> [KeyMeta] linkMetaKeys l = map fst $ linkMetaValues l linkMetaValues :: ToAttributes la => SnapshotLink n la -> [(KeyMeta, AttributeValue)] linkMetaValues l = map convert $ base <> attrs where base = toAttributes $ linkTimestamp l attrs = toAttributes $ linkAttributes l convert (k, v) = makeMetaValue DomainEdge k v showAttribute :: KeyStore -> KeyMeta -> AttributeValue -> TLB.Builder showAttribute ks km val = case keyIndex ks km of Nothing -> "" Just index -> " key_id_str <> "\">" <> val_str <> "\n" where key_id_str = showAttributeID index val_str = showAttributeValue val -- | Options to write GraphML. Use 'defWriteOption' to get the default -- values. data WriteOption = WriteOption { woptDefaultDirected :: Bool -- ^ If 'True', set GraphML's @edgedefault@ attribute to -- @directed@. If 'False', set it to @undirected@. Note that -- regardless of this option, each @edge@ element specifies -- @directed@ attribute explicitly. -- -- Default: 'True' } deriving (Show,Eq,Ord) defWriteOption :: WriteOption defWriteOption = WriteOption { woptDefaultDirected = True } -- | 'writeGraphMLWith' the default options. writeGraphML :: (ToNodeID n, ToAttributes na, ToAttributes la) => SnapshotGraph n na la -> TL.Text writeGraphML = writeGraphMLWith defWriteOption -- | Show the 'SnapshotGraph' into GraphML format. 'WriteOption' -- controls the conversion. writeGraphMLWith :: (ToNodeID n, ToAttributes na, ToAttributes la) => WriteOption -> SnapshotGraph n na la -> TL.Text writeGraphMLWith wopt (input_nodes, input_links) = TLB.toLazyText ( xml_header <> graphml_header <> keys <> graph_header <> nodes <> edges <> graph_footer <> graphml_footer ) where xml_header = "\n" graphml_header = " " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" <> " xsi:schemaLocation=\"http://graphml.graphdrawing.org/xmlns http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd\">\n" graphml_footer = "\n" keys = showAllKeys "" key_store key_metas = concat $ map nodeMetaKeys input_nodes <> map linkMetaKeys input_links key_store = foldl' (\acc m -> addKey m acc) emptyKeyStore key_metas edgedefault_str = if woptDefaultDirected wopt then "directed" else "undirected" graph_header = " edgedefault_str <> "\">\n" graph_footer = "\n" nodes = mconcat $ map writeNode input_nodes edges = mconcat $ map writeLink input_links showAttribute' (k,v) = (" " <> ) $ showAttribute key_store k v writeNode n = " (encodeNodeID $ nodeId n) <> "\">\n" <> (mconcat $ map showAttribute' $ nodeMetaValues n) <> " \n" writeLink l = " (encodeNodeID $ sourceNode l) <> "\" target=\"" <> (encodeNodeID $ destinationNode l) <> "\" directed=\"" <> showDirected l <> "\">\n" <> (mconcat $ map showAttribute' $ linkMetaValues l) <> " \n" showDirected l = if isDirected l then "true" else "false" encodeNodeID :: ToNodeID n => n -> TLB.Builder encodeNodeID = encodeXML . toNodeID encodeXML :: Text -> TLB.Builder encodeXML = mconcat . map escape . unpack where escape c = case c of '<' -> "<" '>' -> ">" '&' -> "&" '"' -> """ '\'' -> "'" '\n' -> " " '\r' -> " " _ -> TLB.singleton c