```{- Math.Clustering.Hierarchical.Types
Gregory W. Schwartz

Collects the types used in hierarchical clustering.
-}

{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE StandaloneDeriving #-}

module Math.Clustering.Hierarchical.Spectral.Types
( ClusteringTree (..)
, ClusteringVertex (..)
, GenericClusteringTree (..)
, GenericClusteringVertex (..)
, EigenGroup (..)
, clusteringTreeToDendrogram
, clusteringTreeToDendrogramCumulative
, clusteringTreeToGenericClusteringTree
, getClusterItemsDend
, getClusterItemsTree
, getClusterItemsGenericTree
, Q (..)
, NumEigen (..)
, Runs (..)
) where

-- Remote
import Data.Clustering.Hierarchical (Dendrogram (..))
import Data.Maybe (catMaybes)
import Data.Monoid ((<>))
import Data.Tree (Tree (..))
import GHC.Generics (Generic)
import Math.Modularity.Types (Q (..))
import Math.TreeFun.Tree (leaves)
import qualified Data.Aeson as A
import qualified Data.Foldable as F
import qualified Data.Vector as V

-- Local

type Items a         = V.Vector a
type ClusteringTree a = Tree (ClusteringVertex a)
type GenericClusteringTree a = Tree (GenericClusteringVertex a)
type NumEigen = Int
type Runs = Int

data EigenGroup = SignGroup | KMeansGroup deriving (Read, Show, Generic)

data ClusteringVertex a = ClusteringVertex
{ _clusteringItems  :: !(Items a)
, _ngMod            :: !Q
, _pVal           :: !(Maybe Double)
} deriving (Eq, Ord, Read, Show, Generic)

data GenericClusteringVertex a = GenericClusteringVertex
{ _item :: !(Maybe (Items a))
, _distance :: !(Maybe Double)
, _significance :: !(Maybe Double)
} deriving (Eq, Ord, Read, Show, Generic)

-- | Convert a ClusteringTree to a Dendrogram. Modularity is the distance.
clusteringTreeToDendrogram :: ClusteringTree a -> Dendrogram (Items a)
clusteringTreeToDendrogram = go
where
go (Node { rootLabel = !n, subForest = []}) = Leaf \$ _clusteringItems n
go (Node { rootLabel = !n, subForest = [x, y]}) =
Branch (unQ . _ngMod \$ n) (go x) (go y)
go (Node { subForest = xs}) =
error \$ "Clustering tree has "
<> (show \$ length xs)
<> " children. Requires two or none."

-- | Convert a ClusteringTree to a Dendrogram. Modularity is the distance, such
-- that the distance is the modularity plus the maximum distance of each branch.
clusteringTreeToDendrogramCumulative :: ClusteringTree a -> Dendrogram (Items a)
clusteringTreeToDendrogramCumulative = fst . go
where
go (Node { rootLabel = !n, subForest = []}) =
(Leaf (_clusteringItems n), 0)
go (Node { rootLabel = !n, subForest = [x, y]}) =
(Branch newD l r, newD)
where
newD = (unQ . _ngMod \$ n) + max lDist rDist
(!l, !lDist) = go x
(!r, !rDist) = go y
go (Node { subForest = xs}) =
error \$ "Clustering tree has "
<> (show \$ length xs)
<> " children. Requires two or none."

-- | Convert a ClusteringTree to a GenericClusteringVertex tree (more
-- standardized for our purposes).
clusteringTreeToGenericClusteringTree :: ClusteringTree a
-> GenericClusteringTree a
clusteringTreeToGenericClusteringTree = go
where
go (Node { rootLabel = !n, subForest = []}) =
Node { rootLabel = ( GenericClusteringVertex
{ _item = Just \$ _clusteringItems n
, _distance = Nothing
, _significance = Nothing
}
)
, subForest = []
}
go (Node { rootLabel = !n, subForest = xs }) =
Node { rootLabel = ( GenericClusteringVertex
{ _item = Nothing
, _distance = Just . unQ . _ngMod \$ n
, _significance = _pVal n
}
)
, subForest = fmap go xs
}

-- | Gather clusters (leaves) from the dendrogram.
getClusterItemsDend :: Foldable t => t (Items a) -> [Items a]
getClusterItemsDend = F.toList

-- | Gather clusters (leaves) from the tree.
getClusterItemsTree :: ClusteringTree a -> [Items a]
getClusterItemsTree = fmap _clusteringItems . leaves

-- | Gather clusters (leaves) from the generic tree.
getClusterItemsGenericTree :: GenericClusteringTree a -> [Items a]
getClusterItemsGenericTree = catMaybes . fmap _item . leaves