{- |
   Module      : Data.Graph.Analysis
   Description : A Graph-Theoretic Analysis Library.
   Copyright   : (c) Ivan Lazar Miljenovic 2009
   License     : 2-Clause BSD
   Maintainer  : Ivan.Miljenovic@gmail.com

   This is the root module of the /Graphalyze/ library, which aims to
   provide a way of analysing the relationships inherent in discrete
   data as a graph.

   The original version of this library was written as part of my
   mathematics honours thesis,
   /Graph-Theoretic Analysis of the Relationships in Discrete Data/.
 -}
module Data.Graph.Analysis
    ( version,
      -- * Re-exporting other modules
      module Data.Graph.Analysis.Types,
      module Data.Graph.Analysis.Utils,
      module Data.Graph.Analysis.Algorithms,
      module Data.Graph.Analysis.Visualisation,
      module Data.Graph.Analysis.Reporting,
      module Data.Graph.Inductive.Graph,
      -- * Importing data
      ImportParams(..),
      importData,
      -- * Result analysis
      -- $analfuncts
      lengthAnalysis,
      classifyRoots,
      inaccessibleNodes,
      interiorChains,
      collapseAndUpdate,
      collapseAndUpdate',
      levelGraphFromRoot
    ) where

import Data.Graph.Analysis.Internal
import Data.Graph.Analysis.Utils
import Data.Graph.Analysis.Types
import Data.Graph.Analysis.Algorithms
import Data.Graph.Analysis.Visualisation
import Data.Graph.Analysis.Reporting

import Data.Graph.Inductive.Graph

import Data.List(find)
import Data.Maybe(mapMaybe)
import qualified Data.Map as M
import Data.Map(Map)
import qualified Data.Set as S
import Data.Set(Set)
import Control.Arrow(first)

import Data.Version(showVersion)
import qualified Paths_Graphalyze as Paths(version)

-- -----------------------------------------------------------------------------

-- | The library version.
version :: String
version = showVersion Paths.version

{- |
   This represents the information that's being passed in that we want
   to analyse.  If the graph is undirected, it is better to list each
   edge once rather than both directions.
 -}
data ImportParams n e = ImpParams { -- | The discrete points.
                                    dataPoints :: [n]
                                    -- | The relationships between the points.
                                  , relationships :: [Rel n e]
                                    -- | The expected roots of the graph.
                                    --   If @'directed' = 'False'@, then this is ignored.
                                  , roots :: [n]
                                    -- | 'False' if relationships are symmetric
                                    --   (i.e. an undirected graph).
                                  , directed :: Bool
                                  }

{- |
   Import data into a format suitable for analysis.  This function is
   /edge-safe/: if any datums are listed in the edges of
   'ImportParams' that aren't listed in the data points, then those
   edges are ignored.  Thus, no sanitation of the 'relationships' in
   @ImportParams@ is necessary.  The unused relations are stored in
   'unusedRelationships'.  Note that it is assumed that all datums in
   'roots' are also contained within 'dataPoints'.
 -}
importData        :: (Ord n, Ord e) => ImportParams n e -> GraphData n e
importData params = GraphData { graph = dGraph
                              , wantedRootNodes = rootNodes
                              , directedData = isDir
                              , unusedRelationships = unRs
                              }
    where
      isDir = directed params
      -- Adding Node values to each of the data points.
      lNodes = zip [1..] (dataPoints params)
      -- The valid edges in the graph along with the unused relationships.
      (unRs, graphEdges) = relsToEs isDir lNodes (relationships params)
      -- Creating a lookup map from the label to the @Node@ value.
      nodeMap = mkNodeMap lNodes
      -- Validate a node
      validNode l = M.lookup l nodeMap
      -- Construct the root nodes
      rootNodes = if isDir
                  then mapMaybe validNode (roots params)
                  else []
      -- Construct the graph.
      dGraph = mkGraph lNodes graphEdges

-- -----------------------------------------------------------------------------

{- $analfuncts
   Extra functions for data analysis.
 -}

-- | Returns the mean and standard deviations of the lengths of the sublists,
--   as well all those lists more than one standard deviation longer than
--   the mean.
lengthAnalysis    :: [[a]] -> (Int,Int,[(Int,[a])])
lengthAnalysis as = (av,stdDev,as'')
    where
      as' = addLengths as
      ls = map fst as'
      (av,stdDev) = statistics' ls
      as'' = filter (\(l,_) -> l > (av+stdDev)) as'

{- |
   Compare the actual roots in the graph with those that are expected
   (i.e. those in 'wantedRootNodes').  Returns (in order):

   * Those roots that are expected (i.e. elements of 'wantedRootNodes'
     that are roots).

   * Those roots that are expected but not present (i.e. elements of
     'wantedRootNodes' that /aren't/ roots.

   * Unexpected roots (i.e. those roots that aren't present in
     'wantedRootNodes').
 -}
classifyRoots    :: GraphData n e -> (Set Node, Set Node, Set Node)
classifyRoots gd = (areWanted, notRoots, notWanted)
    where
      wntd = S.fromList $ wantedRootNodes gd
      rts = S.fromList $ applyAlg rootsOf' gd
      areWanted = S.intersection wntd rts
      notRoots  = S.difference wntd rts
      notWanted = S.difference rts wntd

-- | Find the nodes that are not reachable from the expected roots
--   (i.e. those in 'wantedRootNodes').
inaccessibleNodes    :: GraphData n e -> Set Node
inaccessibleNodes gd = allNs `S.difference` reachableNs
    where
      -- We can't use accessibleOnlyFrom' on notWanted from
      -- classifyRoots, as there might be nodes that are roots but not
      -- detectable (e.g. a loop).
      allNs = S.fromList $ applyAlg nodes gd
      rs = S.fromList $ wantedRootNodes gd
      reachableNs = applyAlg accessibleFrom' gd rs

-- | Only return those chains (see 'chainsIn') where the non-initial
--   nodes are /not/ expected roots.
interiorChains    :: (Eq n, Eq e) => GraphData n e -> [LNGroup n]
interiorChains gd = filter (not . interiorRoot) chains
    where
      chains = applyAlg chainsIn gd
      rts = wantedRoots gd
      interiorRoot = any (`elem` rts) . tail

-- | As with 'collapseAndReplace', but also update the
--   'wantedRootNodes' to contain the possibly compressed nodes.
--   Since the datums they refer to may no longer exist (as they are
--   compressed), 'unusedRelationships' is set to @[]@.
collapseAndUpdate    :: (Ord n) => [AGr n e -> [(NGroup, n)]]
                        -> GraphData n e -> GraphData n e
collapseAndUpdate fs = fst . collapseAndUpdate' fs

-- | As with 'collapseAndUpdate', but also includes a lookup 'Map'
--   from the old label to the new.
collapseAndUpdate'       :: (Ord n) => [AGr n e -> [(NGroup, n)]]
                            -> GraphData n e -> (GraphData n e, Map n n)
collapseAndUpdate' fs gd = (gd', repLookup)
  where
    gr = graph gd
    (gr', reps) = collapseAndReplace' fs gr
    lns' = mkNodeMap $ labNodes gr'
    reps' = map (first S.fromList) reps
    rs = S.fromList $ wantedRootNodes gd
    replace r = maybe r ((M.!) lns' . snd)
                $ find (S.member r . fst) reps'
    gd' = gd { graph = gr'
             , wantedRootNodes = S.toList $ S.map replace rs
             , unusedRelationships = []
             }
    nlLookup = M.fromList $ labNodes gr
    getLs = mapMaybe (flip M.lookup nlLookup)
    repLookup = M.fromList . spreadOut $ map (first getLs) reps

-- | As with 'levelGraph', but use the expected roots rather than the
--   actual roots.
levelGraphFromRoot    :: (Ord n) => GraphData n e
                         -> GraphData (GenCluster n) e
levelGraphFromRoot gd = updateGraph (levelGraphFrom (wantedRootNodes gd)) gd