------------------------------------------------------------------------
-- |
-- Module : Data.Datamining.Clustering.SOMInternal
-- Copyright : (c) Amy de BuitlĂ©ir 2012-2018
-- License : BSD-style
-- Maintainer : amy@nualeargais.ie
-- Stability : experimental
-- Portability : portable
--
-- A module containing private @SOM@ internals. Most developers should
-- use @SOM@ instead. This module is subject to change without notice.
--
------------------------------------------------------------------------
{-# LANGUAGE TypeFamilies, FlexibleContexts, FlexibleInstances,
MultiParamTypeClasses, DeriveAnyClass, DeriveGeneric #-}
module Data.Datamining.Clustering.SOMInternal where
import Prelude hiding (lookup)
import Control.DeepSeq (NFData)
import qualified Data.Foldable as F (Foldable, foldr)
import Data.List (foldl', minimumBy)
import Data.Ord (comparing)
import qualified Math.Geometry.Grid as G (Grid(..))
import qualified Math.Geometry.GridMap as GM (GridMap(..))
import Data.Datamining.Clustering.Classifier(Classifier(..))
import GHC.Generics (Generic)
-- | A typical learning function for classifiers.
-- @'decayingGaussian' r0 rf w0 wf tf@ returns a bell curve-shaped
-- function. At time zero, the maximum learning rate (applied to the
-- BMU) is @r0@, and the neighbourhood width is @w0@. Over time the
-- bell curve shrinks and the learning rate tapers off, until at time
-- @tf@, the maximum learning rate (applied to the BMU) is @rf@,
-- and the neighbourhood width is @wf@. Normally the parameters
-- should be chosen such that:
--
-- * 0 < rf << r0 < 1
--
-- * 0 < wf << w0
--
-- * 0 < tf
--
-- where << means "is much smaller than" (not the Haskell @<<@
-- operator!)
decayingGaussian :: Floating x => x -> x -> x -> x -> x -> x -> x -> x
decayingGaussian r0 rf w0 wf tf t d = r * exp (-x/y)
where a = t / tf
r = r0 * ((rf/r0)**a)
w = w0 * ((wf/w0)**a)
x = (d*d)
y = (2*w*w)
-- | A learning function that only updates the BMU and has a constant
-- learning rate.
stepFunction :: (Num d, Fractional x, Eq d) => x -> t -> d -> x
stepFunction r _ d = if d == 0 then r else 0.0
-- | A learning function that updates all nodes with the same, constant
-- learning rate. This can be useful for testing.
constantFunction :: x -> t -> d -> x
constantFunction r _ _ = r
-- | A Self-Organising Map (SOM).
--
-- Although @SOM@ implements @GridMap@, most users will only need the
-- interface provided by @Data.Datamining.Clustering.Classifier@. If
-- you chose to use the @GridMap@ functions, please note:
--
-- 1. The functions @adjust@, and @adjustWithKey@ do not increment the
-- counter. You can do so manually with @incrementCounter@.
--
-- 2. The functions @map@ and @mapWithKey@ are not implemented (they
-- just return an @error@). It would be problematic to implement
-- them because the input SOM and the output SOM would have to have
-- the same @Metric@ type.
data SOM t d gm x k p = SOM
{
-- | Maps patterns to tiles in a regular grid.
-- In the context of a SOM, the tiles are called "nodes"
gridMap :: gm p,
-- | A function which determines the how quickly the SOM learns.
-- For example, if the function is @f@, then @f t d@ returns the
-- learning rate for a node.
-- The parameter @t@ indicates how many patterns (or pattern
-- batches) have previously been presented to the classifier.
-- Typically this is used to make the learning rate decay over
-- time.
-- The parameter @d@ is the grid distance from the node being
-- updated to the BMU (Best Matching Unit).
-- The output is the learning rate for that node (the amount by
-- which the node's model should be updated to match the target).
-- The learning rate should be between zero and one.
learningRate :: t -> d -> x,
-- | A function which compares two patterns and returns a
-- /non-negative/ number representing how different the patterns
-- are.
-- A result of @0@ indicates that the patterns are identical.
difference :: p -> p -> x,
-- | A function which updates models.
-- If this function is @f@, then @f target amount pattern@ returns
-- a modified copy of @pattern@ that is more similar to @target@
-- than @pattern@ is.
-- The magnitude of the adjustment is controlled by the @amount@
-- parameter, which should be a number between 0 and 1.
-- Larger values for @amount@ permit greater adjustments.
-- If @amount@=1, the result should be identical to the @target@.
-- If @amount@=0, the result should be the unmodified @pattern@.
makeSimilar :: p -> x -> p -> p,
-- | A counter used as a "time" parameter.
-- If you create the SOM with a counter value @0@, and don't
-- directly modify it, then the counter will represent the number
-- of patterns that this SOM has classified.
counter :: t
} deriving (Generic, NFData)
instance (F.Foldable gm) => F.Foldable (SOM t d gm x k) where
foldr f x g = F.foldr f x (gridMap g)
instance (G.Grid (gm p)) => G.Grid (SOM t d gm x k p) where
type Index (SOM t d gm x k p) = G.Index (gm p)
type Direction (SOM t d gm x k p) = G.Direction (gm p)
indices = G.indices . gridMap
distance = G.distance . gridMap
neighbours = G.neighbours . gridMap
contains = G.contains . gridMap
viewpoint = G.viewpoint . gridMap
directionTo = G.directionTo . gridMap
tileCount = G.tileCount . gridMap
null = G.null . gridMap
nonNull = G.nonNull . gridMap
instance (F.Foldable gm, GM.GridMap gm p, G.Grid (GM.BaseGrid gm p))
=> GM.GridMap (SOM t d gm x k) p where
type BaseGrid (SOM t d gm x k) p = GM.BaseGrid gm p
toGrid = GM.toGrid . gridMap
toMap = GM.toMap . gridMap
mapWithKey = error "Not implemented"
delete k = withGridMap (GM.delete k)
adjustWithKey f k = withGridMap (GM.adjustWithKey f k)
insertWithKey f k v = withGridMap (GM.insertWithKey f k v)
alter f k = withGridMap (GM.alter f k)
filterWithKey f = withGridMap (GM.filterWithKey f)
-- | Internal method.
withGridMap :: (gm p -> gm p) -> SOM t d gm x k p -> SOM t d gm x k p
withGridMap f s = s { gridMap=gm' }
where gm = gridMap s
gm' = f gm
-- | Returns the learning function currently being used by the SOM.
currentLearningFunction :: (Num t) => SOM t d gm x k p -> (d -> x)
currentLearningFunction s
= (learningRate s) (counter s)
-- | Extracts the grid and current models from the SOM.
-- A synonym for @'gridMap'@.
toGridMap :: GM.GridMap gm p => SOM t d gm x k p -> gm p
toGridMap = gridMap
-- | Internal method.
adjustNode
:: (G.Grid g, k ~ G.Index g, Num t) =>
g -> (t -> x) -> (p -> x -> p -> p) -> p -> k -> k -> p -> p
adjustNode g rateF adjustF target bmu k = adjustF target (rateF d)
where d = fromIntegral $ G.distance g bmu k
-- | Trains the specified node and the neighbourood around it to better
-- match a target.
-- Most users should use @'train'@, which automatically determines
-- the BMU and trains it and its neighbourhood.
trainNeighbourhood
:: (G.Grid (gm p), GM.GridMap gm p,
G.Index (GM.BaseGrid gm p) ~ G.Index (gm p), Num t, Num x,
Num d) =>
SOM t d gm x k p -> G.Index (gm p) -> p -> SOM t d gm x k p
trainNeighbourhood s bmu target = s { gridMap=gm' }
where gm = gridMap s
gm' = GM.mapWithKey (adjustNode gm f1 f2 target bmu) gm
f1 = currentLearningFunction s
f2 = makeSimilar s
-- | Increment the match counter.
incrementCounter :: Num t => SOM t d gm x k p -> SOM t d gm x k p
incrementCounter s = s { counter=counter s + 1}
-- | Internal method.
justTrain
:: (Ord x, G.Grid (gm p), GM.GridMap gm x, GM.GridMap gm p,
G.Index (GM.BaseGrid gm x) ~ G.Index (gm p),
G.Index (GM.BaseGrid gm p) ~ G.Index (gm p), Num t, Num x,
Num d) =>
SOM t d gm x k p -> p -> SOM t d gm x k p
justTrain s p = trainNeighbourhood s bmu p
where ds = GM.toList . GM.map (difference s p) $ gridMap s
bmu = f ds
f [] = error "SOM has no models"
f xs = fst $ minimumBy (comparing snd) xs
instance
(GM.GridMap gm p, k ~ G.Index (GM.BaseGrid gm p), G.Grid (gm p),
GM.GridMap gm x, k ~ G.Index (gm p), k ~ G.Index (GM.BaseGrid gm x),
Num t, Ord x, Num x, Num d)
=> Classifier (SOM t d gm) x k p where
toList = GM.toList . gridMap
numModels = G.tileCount . gridMap
models = GM.elems . gridMap
differences s p = GM.toList . GM.map (difference s p) $ gridMap s
trainBatch s = incrementCounter . foldl' justTrain s
reportAndTrain s p = (bmu, ds, incrementCounter s')
where ds = differences s p
bmu = fst $ minimumBy (comparing snd) ds
s' = trainNeighbourhood s bmu p