------------------------------------------------------------------------------
-- | 
-- Module       : Data.Datamining.Clustering.Gsom
-- Copyright    : (c) 2009 Stephan G√ľnther
-- License      : BSD3
--
-- Maintainer   : gnn.github@gmail.com
-- Stability    : experimental
-- Portability  : non-portable (requires STM)
--
-- This module should contain everything you need to run the GSOM clustering 
-- algorithm. It collects and re-exports all important and needed functions
-- from moduls lower in the hirarchy. 
--
-- Ideally you should never need to look at those modules. If you do need 
-- to do this, it is a design failure and I would appreciate it if you 
-- would drop me a note.
------------------------------------------------------------------------------

module Data.Datamining.Clustering.Gsom (
-- * Algorithm Input
-- | The GSOM algorithm expects a list of input vectors as its input.
-- These input vectors are just lists of @'Double'@s.
  Input, Inputs

-- ** Input processing
, dimension, Bounds, bounds, normalize, unnormalize

-- ** Operations between input vectors
, distance, (*.), (.*), (<+>), (<->)

-- ** Auxiliary Types
, Coordinates

-- * The map created by GSOM
-- | The growing self organizing map builds, as its name suggests, a map
-- representing the input vectors. This map is of type @'Lattice'@ and is 
-- a network of @'Node'@s.

-- ** The @'Node'@s of the map
, Node(..), Nodes, Neighbours, Neighbourhood

-- *** Node Creation
, node

-- *** Modification
, propagate, update, updateError

-- *** Querying
, isLeaf, isNode, neighbourhood, unwrappedNeighbours

-- *** Debugging
, putNode

-- ** The map type
, Lattice

-- *** Creating initial lattices
, newCentered, newRandom

-- *** Querying a lattice
, L.bmu, nodes

-- *** Debugging
, putLattice, putWeights

-- * Running GSOM
-- | The GSOM algorithm builds the map by sequentially @'run'@ning a given
-- number of @'Phases'@ over the input.

-- ** The Phase type
, Phase(..), Phases, Kernel(..), LearningRate(..)

-- *** Predefined phases
, defaultFirst, defaultSecond, defaultThird, defaults

-- *** Running a phase/phases
, phase, run

-- * Constructing a clustering from the map
-- ** The Clustering type
, Cluster(..), Clustering

-- ** Construction and Query
, cluster, clustering, nearestCluster

) where

------------------------------------------------------------------------------
-- Standard modules
------------------------------------------------------------------------------

import Control.Concurrent.STM
import Control.Monad
import Data.Function
import Data.List
import Data.Map(Map(..))
import qualified Data.Map as Map

------------------------------------------------------------------------------
-- Private modules
------------------------------------------------------------------------------

import Data.Datamining.Clustering.Gsom.Coordinates
import Data.Datamining.Clustering.Gsom.Input
import Data.Datamining.Clustering.Gsom.Lattice hiding (bmu)
import qualified Data.Datamining.Clustering.Gsom.Lattice as L(bmu)
import Data.Datamining.Clustering.Gsom.Node
import Data.Datamining.Clustering.Gsom.Phase

------------------------------------------------------------------------------
-- Types
------------------------------------------------------------------------------

-- | The clusters generated by GSOM basically consist of three things:
data Cluster = Cluster {
  -- | the vector which best represents all the vectors belonging to this
  -- cluster.
  center :: Input
  -- | The indices of the input vectors belonging to this cluster. 
  -- That means a cluster is always relative to a set of @'Inputs'@ 
, contents :: [Int]
  -- | the coordinates of this cluster
, coordinates :: Coordinates
} deriving (Read, Show)

-- | The final clustering which is the result of the GSOM algorithm 
-- is a @'Data.Map'@ mapping @'Coordinates'@ to @'Cluster'@s.
type Clustering = Map Coordinates Cluster

------------------------------------------------------------------------------
-- Creation
------------------------------------------------------------------------------

-- | Computes a clustering induced by the given lattice.
--
-- @'clustering' lattice@ uses the @'weights'@ of the @'nodes'@ 
-- stored in @lattice@ to generate clusters and returns the 
-- @'Clustering'@ storing these clusters. Each non leaf node @n@ in @lattice@
-- cluster @c@ with @('coordinates' c = 'location' n)@ and with @'center' c@ 
-- equal to the weight vector of @n@. Each generated clusters contents are 
-- empty.
clustering :: Lattice -> IO Clustering
clustering l = do
  ns <- atomically $ liftM (filter isNode) (nodes l)
  associations <- atomically $ mapM (\n -> do 
    ws <- readTVar $ weights n
    return $! (location n, ws)) ns
  return $! Map.fromList $ map (\(xy, w) -> 
    (xy, Cluster {center = w, contents = [], coordinates = xy})) associations

-- | @'cluster' inputs clustering@ clusters the given @inputs@ according to 
-- the centers of the clusters in @clustering@. That means for each input @i@
-- from @inputs@ the index of @i@ is added to the contents of the cluster 
-- center to which @i@ has minimal distance.
-- TODO: Implement tiebreaker.
cluster :: Inputs -> Clustering -> Clustering
cluster is cs = foldl' f cs (zip [0..] is) where
  f cs (index, i) = let c = bmu i cs in 
    Map.insert (coordinates c) (c{contents = index : contents c}) cs

-- | @'nearestCluster' input clustering@ returns the cluster which has 
-- the center with the smallest distance to @input@.
nearestCluster :: Input -> Clustering -> Cluster
nearestCluster = bmu

------------------------------------------------------------------------------
-- Internal Functions
------------------------------------------------------------------------------

-- | Again computes the best matching unit, only this time it is pure.
bmu :: Input -> Clustering -> Cluster
bmu i = snd . minimumBy (compare `on` (distance i) . center . snd) . Map.assocs