```module Data.Clustering.Hierarchical
(-- * Dendrogram data type
Dendrogram(..)
,Distance
,elements
,cutAt
-- * Clustering function
,dendrogram
) where

import qualified Data.Clustering.Hierarchical.Internal.DistanceMatrix as DM
import qualified Data.Clustering.Hierarchical.Internal.Optimal as O

-- | List of elements in a dendrogram.
elements :: Dendrogram a -> [a]
elements = go []
where
go acc (Leaf x)       = x : acc
go acc (Branch _ l r) = go (go acc r) l

-- | @dendro \`cutAt\` threshold@ cuts the dendrogram @dendro@ at
-- all branches which have distances strictly greater than
-- @threshold@.
--
-- For example, suppose we have
--
-- @
-- dendro = Branch 0.8
--            (Branch 0.5
--              (Branch 0.2
--                (Leaf \'A\')
--                (Leaf \'B\'))
--              (Leaf \'C\'))
--            (Leaf \'D\')
-- @
--
-- Then:
--
-- @
-- dendro \`cutAt\` 0.9 == dendro \`cutAt\` 0.8 == [dendro] -- no changes
-- dendro \`cutAt\` 0.7 == dendro \`cutAt\` 0.5 == [Branch 0.5 (Branch 0.2 (Leaf \'A\') (Leaf \'B\')) (Leaf \'C\'), Leaf \'D\']
-- dendro \`cutAt\` 0.4 == dendro \`cutAt\` 0.2 == [Branch 0.2 (Leaf \'A\') (Leaf \'B\'), Leaf \'C\', Leaf \'D\']
-- dendro \`cutAt\` 0.1 == [Leaf \'A\', Leaf \'B\', Leaf \'C\', Leaf \'D\'] -- no branches at all
-- @
cutAt :: Dendrogram a -> Distance -> [Dendrogram a]
cutAt dendro threshold = go [] dendro
where
go acc x@(Leaf _)                        = x : acc
go acc x@(Branch d l r) | d <= threshold = x : acc
| otherwise      = go (go acc r) l  -- cut!

-- | Calculates a complete, rooted dendrogram for a list of items
-- and a linkage type.  The following are the time and space
--
-- ['SingleLinkage'] /O(n^2)/ time and /O(n)/ space, using the
--   SLINK algorithm.  This algorithm is optimal in both space
--   and time and gives the same answer as the naive algorithm
--   using a distance matrix.
--
-- ['CompleteLinkage'] /O(n^3)/ time and /O(n^2)/ space, using
--   the naive algorithm with a distance matrix.  Use 'CLINK' if
--   you need more performance.
--
--   space, using the CLINK algorithm.  Note that this algorithm
--   doesn't always give the same answer as the naive algorithm
--   using a distance matrix, but it's much faster.
--
-- ['UPGMA'] /O(n^3)/ time and /O(n^2)/ space, using the naive
--   algorithm with a distance matrix.
--
-- ['FakeAverageLinkage'] /O(n^3)/ time and /O(n^2)/ space, using
-- the naive algorithm with a distance matrix.