src/Data/Datamining/Clustering/SSOMInternal.hs

------------------------------------------------------------------------
-- |
-- Module      :  Data.Datamining.Clustering.SSOMInternal
-- Copyright   :  (c) Amy de Buitléir 2012-2015
-- License     :  BSD-style
-- Maintainer  :  amy@nualeargais.ie
-- Stability   :  experimental
-- Portability :  portable
--
-- A module containing private @SSOM@ internals. Most developers should
-- use @SSOM@ instead. This module is subject to change without notice.
--
------------------------------------------------------------------------
{-# LANGUAGE TypeFamilies, FlexibleContexts, FlexibleInstances,
    MultiParamTypeClasses, DeriveAnyClass, DeriveGeneric #-}

module Data.Datamining.Clustering.SSOMInternal where

import Control.DeepSeq (NFData)
import Data.List (foldl', minimumBy)
import Data.Ord (comparing)
import Data.Datamining.Clustering.Classifier(Classifier(..))
import qualified Data.Map.Strict as M
import GHC.Generics (Generic)
import Prelude hiding (lookup)

-- | A typical learning function for classifiers.
--   @'exponential' r0 d t@ returns the learning rate at time @t@.
--   When @t = 0@, the learning rate is @r0@.
--   Over time the learning rate decays exponentially; the decay rate is
--   @d@.
--   Normally the parameters are chosen such that:
--
--   * 0 < r0 < 1
--
--   * 0 < d
exponential :: Floating a => a -> a -> a -> a
exponential r0 d t = r0 * exp (-d*t)

-- | A Simplified Self-Organising Map (SSOM).
--   @x@ is the type of the learning rate and the difference metric.
--   @t@ is the type of the counter.
--   @k@ is the type of the model indices.
--   @p@ is the type of the input patterns and models.
data SSOM t x k p = SSOM
  {
    -- | Maps patterns to nodes.
    sMap :: M.Map k p,
    -- | A function which determines the learning rate for a node.
    --   The input parameter indicates how many patterns (or pattern
    --   batches) have previously been presented to the classifier.
    --   Typically this is used to make the learning rate decay over
    --   time.
    --   The output is the learning rate for that node (the amount by
    --   which the node's model should be updated to match the target).
    --   The learning rate should be between zero and one.
    learningRate :: t -> x,
    -- | A function which compares two patterns and returns a 
    --   /non-negative/ number representing how different the patterns
    --   are.
    --   A result of @0@ indicates that the patterns are identical.
    difference :: p -> p -> x,
    -- | A function which updates models.
    --   For example, if this function is @f@, then
    --   @f target amount pattern@ returns a modified copy of @pattern@
    --   that is more similar to @target@ than @pattern@ is.
    --   The magnitude of the adjustment is controlled by the @amount@
    --   parameter, which should be a number between 0 and 1.
    --   Larger values for @amount@ permit greater adjustments.
    --   If @amount@=1, the result should be identical to the @target@.
    --   If @amount@=0, the result should be the unmodified @pattern@.
    makeSimilar :: p -> x -> p -> p,
    -- | A counter used as a "time" parameter.
    --   If you create the SSOM with a counter value @0@, and don't
    --   directly modify it, then the counter will represent the number
    --   of patterns that this SSOM has classified.
    counter :: t
  } deriving (Generic, NFData)

-- | Extracts the current models from the SSOM.
--   A synonym for @'sMap'@.
toMap :: SSOM t x k p -> M.Map k p
toMap = sMap

-- | Trains the specified node to better match a target.
--   Most users should use @'train'@, which automatically determines
--   the BMU and trains it.
trainNode
  :: (Num t, Ord k)
      => SSOM t x k p -> k -> p -> SSOM t x k p
trainNode s k target = s { sMap=gm' }
  where gm = sMap s
        gm' = M.adjust (makeSimilar s target r) k gm
        r = (learningRate s) (counter s)

incrementCounter :: Num t => SSOM t x k p -> SSOM t x k p
incrementCounter s = s { counter=counter s + 1}

justTrain
  :: (Num t, Ord k, Ord x)
      => SSOM t x k p -> p -> SSOM t x k p
justTrain s p = trainNode s bmu p
  where ds = M.toList . M.map (difference s p) . toMap $ s
        bmu = f ds
        f [] = error "SSOM has no models"
        f xs = fst $ minimumBy (comparing snd) xs

instance
  (Num t, Ord x, Num x, Ord k)
    => Classifier (SSOM t) x k p where
  toList = M.toList . toMap
  -- TODO: If the # of models is fixed, make more efficient
  numModels = M.size . sMap
  models = M.elems . toMap
  differences s p = M.toList . M.map (difference s p) $ toMap s
  trainBatch s = incrementCounter . foldl' justTrain s
  reportAndTrain s p = (bmu, ds, s')
    where ds = differences s p
          bmu = fst $ minimumBy (comparing snd) ds
          s' = incrementCounter . trainNode s bmu $ p