------------------------------------------------------------------------
-- |
-- Module      :  Data.Datamining.Clustering.SOMInternal
-- Copyright   :  (c) Amy de Buitléir 2012-2013
-- License     :  BSD-style
-- Maintainer  :  amy@nualeargais.ie
-- Stability   :  experimental
-- Portability :  portable
--
-- A module containing private @SOM@ internals. Most developers should
-- use @SOM@ instead. This module is subject to change without notice.
--
------------------------------------------------------------------------
{-# LANGUAGE UnicodeSyntax, TypeFamilies, FlexibleContexts, 
    FlexibleInstances, MultiParamTypeClasses #-}

module Data.Datamining.Clustering.SOMInternal
  (
    SOM(..),
    defaultSOM,
    customSOM,
    gaussian,
    decayingGaussian,
    toGridMap
  ) where

import Data.List (foldl', minimumBy)
import Data.Ord (comparing)
import qualified Math.Geometry.Grid as G (Grid(..))
import qualified Math.Geometry.GridMap as GM (GridMap(..))
import Data.Datamining.Pattern (Pattern(..))
import Data.Datamining.Clustering.Classifier(Classifier(..))
import Prelude hiding (lookup)

data SOM gm k p = SOM
  {
    sGridMap ∷ gm p,
    sLearningFunction ∷ Int → Int → Metric p,
    sCounter ∷ Int
  }

currentLearningFunction ∷ SOM gm k p → (Int → Metric p)
currentLearningFunction s = (sLearningFunction s) (sCounter s)

-- | Extract the grid and current models from the SOM.
toGridMap ∷ GM.GridMap gm p ⇒ SOM gm k p → gm p
toGridMap = sGridMap

adjustNode
  ∷ (Pattern p, G.Grid g, k ~ G.Index g) ⇒
     g → (Int → Metric p) → p → k → k → p → p
adjustNode g f target bmu k = makeSimilar target (f d)
  where d = G.distance g bmu k

trainWithBMU
  ∷ (Pattern p, G.Grid (gm p), GM.GridMap gm p,
      G.Index (GM.BaseGrid gm p) ~ G.Index (gm p)) ⇒
     SOM gm k p → G.Index (gm p) → p → SOM gm k p
trainWithBMU s bmu target = s { sGridMap=gm' }
  where gm = sGridMap s
        gm' = GM.mapWithKey (adjustNode gm f target bmu) gm
        f = currentLearningFunction s

justTrain
  ∷ (Ord (Metric p), Pattern p, G.Grid (gm p),
      GM.GridMap gm (Metric p), GM.GridMap gm p,
      G.Index (GM.BaseGrid gm (Metric p)) ~ G.Index (gm p),
      G.Index (GM.BaseGrid gm p) ~ G.Index (gm p)) ⇒
     SOM gm k p → p → SOM gm k p
justTrain s p = trainWithBMU s bmu p
  where ds = GM.toList . GM.map (p `difference`) . sGridMap $ s
        bmu = fst . minimumBy (comparing snd) $ ds

instance 
  (GM.GridMap gm p, k ~ G.Index (GM.BaseGrid gm p), Pattern p, 
  G.Grid (gm p), GM.GridMap gm (Metric p), k ~ G.Index (gm p),
  k ~ G.Index (GM.BaseGrid gm (Metric p)), Ord (Metric p)) ⇒ 
    Classifier (SOM gm) k p where
  toList = GM.toList . sGridMap
  numModels = G.tileCount . sGridMap
  models = GM.elems . sGridMap
  differences s p = GM.toList . GM.map (p `difference`) . sGridMap $ s
  trainBatch s ps = (foldl' justTrain s ps) {sCounter=sCounter s + 1}
  reportAndTrain s p = (bmu, ds, s'')
    where ds = differences s p
          bmu = fst . minimumBy (comparing snd) $ ds
          s' = trainWithBMU s bmu p
          s'' = s' { sCounter=sCounter s + 1}


-- Creates a classifier with a default (bell-shaped) learning function.
defaultSOM 
  ∷ Floating (Metric p) ⇒
  -- | The geometry and initial models for this classifier.
  --   A reasonable choice here is 'lazyGridMap g ps', where 'g' is a
  --   @'Math.Geometry.Grid.HexHexGrid'@, and 'ps' is a set of
  --   random patterns.
  gm p →
  -- | The learning rate to be applied to the BMU (Best Matching Unit)
  --   at "time" zero. The BMU is the model which best matches the
  --   current target pattern.
  Metric p →
  -- | The width of the bell curve at "time" zero.
  Metric p →
  -- | After this time, any learning done by the classifier will be
  --   negligible. Recommend setting this parameter to the number of
  --   patterns (or pattern batches) that will be presented to the
  --   classifier. An estimate is fine.
  Int →
  -- | The result
  SOM gm k p
defaultSOM gm r w t = 
  SOM { 
        sGridMap=gm, 
        sLearningFunction=decayingGaussian r w t, 
        sCounter=0
      }

-- Creates a classifier with a custom learning function.
customSOM ∷ 
  -- | The geometry and initial models for this classifier.
  --   A reasonable choice here is 'lazyGridMap g ps', where 'g' is a
  --   @'Math.Geometry.Grid.HexHexGrid'@, and 'ps' is a set of
  --   random patterns.
  gm p →
  -- | A function used to adjust the models in the classifier.
  --   This function will be invoked with two parameters.
  --   The first parameter will indicate how many patterns (or pattern 
  --   batches) have previously been presented to this classifier. 
  --   Typically this is used to make the learning rate decay over time.
  --   The second parameter to the function is the grid distance from
  --   the node being updated to the BMU (Best Matching Unit).
  --   The output is the learning rate for that node (the amount by
  --   which the node's model should be updated to match the target).
  --   The learning rate should be between zero and one.
  (Int → Int → Metric p) →
  -- | The result
  SOM gm k p
customSOM gm f = 
  SOM {
        sGridMap=gm,
        sLearningFunction=f,
        sCounter=0
      }


-- | Calculates @r/e/^(-d^2/2w^2)@.
--   This form of the Gaussian function is useful as a learning rate
--   function. In @'gaussian' r w d@, @r@ specifies the highest learning
--   rate, which will be applied to the SOM node that best matches the
--   input pattern. The learning rate applied to other nodes will be 
--   applied based on their distance @d@ from the best matching node. 
--   The value @w@ controls the \'width\' of the Gaussian. Higher values
--   of @w@ cause the learning rate to fall off more slowly with 
--   distance @d@.
gaussian ∷ Floating a ⇒ a → a → Int → a
gaussian r w d = r * exp (-d'*d'/(2*w*w))
  where d' = fromIntegral d

-- | Configures a typical learning function for classifiers.
--   @'decayingGaussian r w0 tMax' returns a bell curve-shaped function.
--   At time zero, the maximum learning rate (applied to the BMU) is 
--   @r@, and the neighbourhood width is @w@. Over time the bell curve
--   shrinks and the learning rate tapers off, until at time @tMax@,
--   the learning rate is negligible.
decayingGaussian
  ∷ Floating a ⇒ a → a → Int → (Int → Int → a)
decayingGaussian r w0 tMax = 
  \t d → let t' = fromIntegral t in gaussian r w0 d * exp (-t'/tMax')
  where tMax' = fromIntegral tMax