module Data.Clustering.Hierarchical
(Dendrogram(..)
,Linkage(..)
,completeDendrogram
) where
import qualified Data.IntMap as IM
import Control.Applicative ((<$>), (<*>))
import Control.Monad.ST (runST)
import Data.Array (listArray, (!))
import Data.Foldable (Foldable (..))
import Data.Function (on)
import Data.Monoid (mappend)
import Data.Traversable (Traversable(..))
import Data.Clustering.Hierarchical.Internal.DistanceMatrix
data Dendrogram d a =
Leaf a
| Branch d (Dendrogram d a) (Dendrogram d a)
deriving (Eq, Ord, Show)
instance Functor (Dendrogram d) where
fmap f (Leaf d) = Leaf (f d)
fmap f (Branch s c1 c2) = Branch s (fmap f c1) (fmap f c2)
instance Foldable (Dendrogram d) where
foldMap f (Leaf d) = f d
foldMap f (Branch _ c1 c2) = foldMap f c1 `mappend` foldMap f c2
instance Traversable (Dendrogram d) where
traverse f (Leaf d) = Leaf <$> f d
traverse f (Branch s c1 c2) = Branch s <$> traverse f c1 <*> traverse f c2
data Linkage =
SingleLinkage
| CompleteLinkage
| UPGMA
| FakeAverageLinkage
deriving (Eq, Ord, Show, Enum)
clusterDistance :: (Fractional d, Ord d) => Linkage -> ClusterDistance d
clusterDistance SingleLinkage = \_ (_, d1) (_, d2) _ -> d1 `min` d2
clusterDistance CompleteLinkage = \_ (_, d1) (_, d2) _ -> d1 `max` d2
clusterDistance FakeAverageLinkage = \_ (_, d1) (_, d2) _ -> (d1 + d2) / 2
clusterDistance UPGMA = \_ (b1,d1) (b2,d2) _ ->
let n1 = fromIntegral (size b1)
n2 = fromIntegral (size b2)
in (n1 * d1 + n2 * d2) / (n1 + n2)
completeDendrogram :: (Fractional d, Ord d) => Linkage ->
[a] -> (a -> a -> d) -> Dendrogram d a
completeDendrogram linkage items dist = runST (act ())
where
n = length items
cdist = clusterDistance linkage
act _ = do
let xs = listArray (1, n) items
fromDistance (dist `on` (xs !)) n >>= go xs (n1) IM.empty
go xs i ds dm = do
((c1,c2), distance) <- findMin dm
cu <- mergeClusters cdist dm (c1,c2)
let dendro c = case size c of
1 -> Leaf (xs ! key c)
_ -> ds IM.! key c
d1 = dendro c1
d2 = dendro c2
du = Branch distance d1 d2
case i of
1 -> return du
_ -> let ds' = IM.insert (key cu) du $
IM.delete (key c1) $
IM.delete (key c2) ds
in go xs (i1) ds' dm