{-# LANGUAGE OverloadedStrings #-} -- | -- Module: NetSpider.Unify -- Description: LinkSampleUnifier type -- Maintainer: Toshio Ito -- -- module NetSpider.Unify ( -- * Types LinkSampleUnifier, LinkSample(..), LinkSampleID, linkSampleId, toLinkSamples, -- * Standard unifiers unifyToOne, unifyToMany, unifyStd, UnifyStdConfig(..), defUnifyStdConfig, -- * Building blocks latestLinkSample, defNegatesLinkSample ) where import Control.Monad (mapM) import Data.Foldable (maximumBy) import Data.Function (on) import Data.Hashable (Hashable(hashWithSalt)) import Data.Maybe (catMaybes) import Data.Monoid ((<>)) import GHC.Exts (groupWith) import NetSpider.Found (FoundLink, LinkState, FoundNode) import qualified NetSpider.Found as Found import NetSpider.Log (WriterLoggingM, logDebugW, spack) import NetSpider.Pair (Pair(..)) import NetSpider.Snapshot (SnapshotNode, nodeTimestamp, nodeId, SnapshotLink) import NetSpider.Timestamp (Timestamp) -- | 'LinkSample' is an intermediate type between -- 'NetSpider.Found.FoundLink' and -- 'NetSpider.Snapshot.SnapshotLink'. 'LinkSample's are collected from -- the history graph, and are unified into -- 'NetSpider.Snapshot.SnapshotLink's. data LinkSample n la = LinkSample { lsSubjectNode :: n, lsTargetNode :: n, lsLinkState :: LinkState, lsTimestamp :: Timestamp, lsLinkAttributes :: la } deriving (Show,Eq,Ord) -- | Link ID of the 'LinkSample'. It's the 'Pair' of 'lsSubjectNode' -- and 'lsTargetNode'. type LinkSampleID n = Pair n -- | Get 'LinkSampleID' of the 'LinkSample'. linkSampleId :: LinkSample n la -> LinkSampleID n linkSampleId l = Pair (lsSubjectNode l, lsTargetNode l) -- | Make 'LinkSample's from 'FoundLink's in 'FoundNode'. -- -- @since 0.4.2.0 toLinkSamples :: FoundNode n na la -> [LinkSample n la] toLinkSamples fn = map fromFoundLink $ Found.neighborLinks fn where fromFoundLink fl = LinkSample { lsSubjectNode = Found.subjectNode fn, lsTargetNode = Found.targetNode fl, lsLinkState = Found.linkState fl, lsTimestamp = Found.foundAt fn, lsLinkAttributes = Found.linkAttributes fl } -- | Function to unify 'LinkSample's collected for the given pair of -- nodes and return 'LinkSample' per physical link. The returned -- 'LinkSample's will be directly converted to 'SnapshotLink's in the -- snapshot graph. -- -- This function has a number of important roles during construction -- of the snapshot graph. -- -- - Because a link can be observed from both of its end nodes, there -- can be multiple 'LinkSample's for one physical link. This -- function is supposed to return one reasonable link sample for the -- physical link from those input link samples. -- - There can be multiple physical links for a given pair of nodes, -- but the 'Spider' has no way to distinguish them. So, this -- function is supposed to distinguish 'LinkSample's for different -- physical links, and return one or more 'LinkSample's, each of -- which corresponds to a physical link. -- - Sometimes a link is found by one end node but not found by the -- other end node. Should 'Spider' treats the link is available or -- not? This function is supposed to answer that question by -- returning non-empty result (if the link is available) or empty -- result (if the link is not available.) -- - Sometimes it is natural to have different data models of link -- attributes for 'FoundLink's (@fla@) and for 'SnapshotLink's -- (@sla@). For example, when you want to combine link attributes -- obtained from both of the end nodes to make the link attributes -- of 'SnapshotLink'. This function is supposed to convert the link -- attribute type. type LinkSampleUnifier n na fla sla = SnapshotNode n na -> SnapshotNode n na -> [LinkSample n fla] -> WriterLoggingM [LinkSample n sla] -- | Unify 'LinkSample's to one. This is the sensible unifier if there -- is at most one physical link for a given pair of nodes. unifyToOne :: (Eq n, Show n) => LinkSampleUnifier n na la la unifyToOne = unifyStd defUnifyStdConfig -- | Unify 'LinkSample's to possibly multiple samples. The input -- samples are partitioned to groups based on the link sub-ID, defined -- by the given getter function. Each group represents one of the -- final samples. unifyToMany :: (Eq n, Show n, Ord lsid) => (LinkSample n fla -> lsid) -- ^ Getter of the link sub-ID -> LinkSampleUnifier n na fla fla unifyToMany getKey = unifyStd conf where conf = defUnifyStdConfig { makeLinkSubId = getKey } -- | Configuration of 'unifyStd'. See 'unifyStd' for detail. data UnifyStdConfig n na fla sla lsid = UnifyStdConfig { makeLinkSubId :: LinkSample n fla -> lsid, -- ^ Function to create the link sub-ID from -- 'LinkSample'. -- -- Default: always return @()@. All 'LinkSample's have the same -- link sub-ID. mergeSamples :: [LinkSample n fla] -> [LinkSample n fla] -> Maybe (LinkSample n sla), -- ^ Function to merge 'LinkSample's for a physical link. The -- input is 'LinkSample's found by each of the end nodes. This -- function optionally converts the link attributes from @fla@ to -- @sla@. -- -- Default: Concatenate the input 'LinkSample's and get -- 'latestLinkSample'. negatesLinkSample :: SnapshotNode n na -> LinkSample n sla -> Bool -- ^ This function is supposed to return 'True' if the -- 'SnapshotNode' negates the presence of the 'LinkSample'. -- -- Default: 'defNegatesLinkSample'. } -- | Default of 'UnifyStdConfig'. defUnifyStdConfig :: Eq n => UnifyStdConfig n na fla fla () defUnifyStdConfig = UnifyStdConfig { makeLinkSubId = const (), mergeSamples = \ls rs -> latestLinkSample (ls ++ rs), negatesLinkSample = defNegatesLinkSample } -- | The standard unifier. This unifier does the following. -- -- 1. It partitions 'LinkSample's based on their link sub-IDs. The -- link sub-ID is defined by 'makeLinkSubId' function. Each link -- sub-ID corresponds to a physical link. -- 2. For each partition, 'LinkSample's are merged to one using -- 'mergeSamples' function. -- 3. After merge, the link is checked against its end nodes. If -- 'negatesLinkSample' returns 'True' for either of the end nodes, -- the link is removed from the final result. unifyStd :: (Eq n, Show n, Ord lsid) => UnifyStdConfig n na fla sla lsid -> LinkSampleUnifier n na fla sla unifyStd conf lnode rnode input_samples = do let groups = groupWith (makeLinkSubId conf) input_samples logDebug ( "Group " <> (spack $ length input_samples) <> " samples into " <> (spack $ length groups) <> " groups by link sub-ID." ) fmap catMaybes $ mapM forGroup $ zip groups ([0 ..] :: [Int]) where logDebug msg = logDebugW ("unifyStd: " <> msg) samplesFor samples sn = filter (\s -> nodeId sn == (lsSubjectNode s)) samples forGroup (samples, group_i) = case mergeSamples conf (samplesFor samples lnode) (samplesFor samples rnode) of Nothing -> do logDebugG ("No link after mergeSamples") return Nothing ml -> maybeNegates rnode =<< maybeNegates lnode ml where logDebugG msg = logDebug ("group " <> spack group_i <> ": " <> msg) maybeNegates _ Nothing = return Nothing maybeNegates sn (Just sample) = if negatesLinkSample conf sn sample then do logDebugG ("Merged sample is negated by node " <> (spack $ nodeId sn)) return Nothing else return $ Just sample -- | Get the 'LinkSample' that has the latest (biggest) timestamp. latestLinkSample :: [LinkSample n la] -> Maybe (LinkSample n la) latestLinkSample [] = Nothing latestLinkSample samples = Just $ maximumBy comp samples where comp = compare `on` lsTimestamp -- | Default of 'negatesLinkSample'. This function returns 'True' if -- all of the following conditions are met. -- -- - The 'SnapshotNode' has 'nodeTimestamp'. -- - The 'nodeTimestamp' is greater (newer) than the link's timestamp. -- - The 'lsSubjectNode' is not the node ID of the 'SnapshotNode'. -- -- If the above conditions are met, it implies that the 'LinkSample' -- is found by the other end node, but the given 'SnapshotNode' does -- not find it. This is possibly because the link has just -- disappeared, so the link should be negated. defNegatesLinkSample :: Eq n => SnapshotNode n na -> LinkSample n la -> Bool defNegatesLinkSample sn l = case nodeTimestamp sn of Nothing -> False Just t -> lsTimestamp l < t && lsSubjectNode l /= nodeId sn