------------------------------------------------------------------------ -- | -- Module : Data.Datamining.Clustering.SOMInternal -- Copyright : (c) Amy de Buitléir 2012-2013 -- License : BSD-style -- Maintainer : amy@nualeargais.ie -- Stability : experimental -- Portability : portable -- -- A module containing private @SOM@ internals. Most developers should -- use @SOM@ instead. This module is subject to change without notice. -- ------------------------------------------------------------------------ {-# LANGUAGE UnicodeSyntax, TypeFamilies, FlexibleContexts #-} module Data.Datamining.Clustering.SOMInternal ( adjustNode, adjustVector, classify, classifyAndTrain, diff, diffAndTrain, euclideanDistanceSquared, magnitudeSquared, normalise, NormalisedVector, scale, scaleAll, ScaledVector, train, trainBatch, Pattern(..) ) where import Data.Eq.Unicode ((≡)) import Data.List (foldl', minimumBy) import Data.Ord (comparing) import Math.Geometry.GridMap (GridMap, BaseGrid, mapWithKey, toList) import Math.Geometry.Grid (Grid, Index, distance) import qualified Math.Geometry.GridMap as GM (map) -- | A pattern to be learned or classified by a self-organising map. class Pattern p where type Metric p -- | Compares two patterns and returns a /non-negative/ number -- representing how different the patterns are. A result of @0@ -- indicates that the patterns are identical. difference ∷ p → p → Metric p -- | @'makeSimilar' target amount pattern@ returns a modified copy of -- @pattern@ that is more similar to @target@ than @pattern@ is. The -- magnitude of the adjustment is controlled by the @amount@ -- parameter, which should be a number between 0 and 1. Larger -- values for @amount@ permit greater adjustments. If @amount@=1, -- the result should be identical to the @target@. If @amount@=0, -- the result should be the unmodified @pattern@. makeSimilar ∷ p → Metric p → p → p -- | @'diff' c pattern@ returns the positions of all nodes in -- @c@, paired with the difference between @pattern@ and the node's -- pattern. diff ∷ (GridMap gm p, Pattern p, GridMap gm m, Metric p ~ m, BaseGrid gm p ~ BaseGrid gm m) ⇒ gm p → p → gm m diff c pattern = GM.map (pattern `difference`) c -- | @classify c pattern@ returns the position of the node in @c@ -- whose pattern best matches the input @pattern@. classify ∷ (GridMap gm p, Pattern p, GridMap gm m, Metric p ~ m, Ord m, k ~ Index (BaseGrid gm p), BaseGrid gm m ~ BaseGrid gm p) ⇒ gm p → p → k classify c pattern = fst $ minimumBy (comparing snd) $ toList $ diff c pattern -- | If @f d@ is a function that returns the learning rate to apply to a -- node based on its distance @d@from the node that best matches the -- input pattern, then @'train' c f pattern@ returns a modified copy -- of the classifier @c@ that has partially learned the @target@. train ∷ (Ord m, GridMap gm p, GridMap gm m, GridMap gm (Int, p), GridMap gm (m, p), Grid (gm p), Pattern p, Metric p ~ m, Index (BaseGrid gm p) ~ Index (gm p), BaseGrid gm m ~ BaseGrid gm p) ⇒ gm p → (Int → m) → p → gm p train c f pattern = snd $ classifyAndTrain c f pattern -- | Same as @train@, but applied to multiple patterns. trainBatch ∷ (Ord m, GridMap gm p, GridMap gm m, GridMap gm (Int, p), GridMap gm (m, p), Grid (gm p), Pattern p, Metric p ~ m, Index (BaseGrid gm p) ~ Index (gm p), BaseGrid gm m ~ BaseGrid gm p) ⇒ gm p → (Int → m) → [p] → gm p trainBatch c f ps = foldl' (\som → train som f) c ps -- | If @f@ is a function that returns the learning rate to apply to a -- node based on its distance from the node that best matches the -- @target@, then @'classifyAndTrain' c f target@ returns a tuple -- containing the position of the node in @c@ whose pattern best -- matches the input @target@, and a modified copy of the classifier -- @c@ that has partially learned the @target@. -- Invoking @classifyAndTrain c f p@ may be faster than invoking -- @(p `classify` c, train c f p)@, but they should give identical -- results. classifyAndTrain ∷ (Ord m, GridMap gm p, GridMap gm m, GridMap gm (Int, p), GridMap gm (m, p), Grid (gm p), Pattern p, Metric p ~ m, Index (BaseGrid gm p) ~ Index (gm p), BaseGrid gm m ~ BaseGrid gm p) ⇒ gm p → (Int → m) → p → (Index (gm p), gm p) classifyAndTrain c f pattern = (bmu, c') where (bmu, _, c') = reportAndTrain c f pattern -- | If @f@ is a function that returns the learning rate to apply to a -- node based on its distance from the node that best matches the -- @target@, then @'diffAndTrain' c f target@ returns a tuple -- containing: -- 1. The positions of all nodes in @c@, paired with the difference -- between @pattern@ and the node's pattern -- 2. A modified copy of the classifier @c@ that has partially -- learned the @target@. -- Invoking @diffAndTrain c f p@ may be faster than invoking -- @(p `diff` c, train c f p)@, but they should give identical -- results. diffAndTrain ∷ (Ord m, GridMap gm p, GridMap gm m, GridMap gm (Int, p), GridMap gm (m, p), Grid (gm p), Pattern p, Metric p ~ m, Index (BaseGrid gm p) ~ Index (gm p), BaseGrid gm m ~ BaseGrid gm p) ⇒ gm p → (Int → m) → p → (gm m, gm p) diffAndTrain c f pattern = (ds, c') where (_, ds, c') = reportAndTrain c f pattern reportAndTrain ∷ (Ord m, GridMap gm p, GridMap gm m, GridMap gm (Int, p), GridMap gm (m, p), Grid (gm p), Pattern p, Metric p ~ m, Index (BaseGrid gm p) ~ Index (gm p), BaseGrid gm m ~ BaseGrid gm p) ⇒ gm p → (Int → m) → p → (Index (gm p), gm m, gm p) reportAndTrain c f pattern = (bmu, ds, c') where ds = c `diff` pattern bmu = fst $ minimumBy (comparing snd) $ toList ds c' = trainWithBMU c f bmu pattern trainWithBMU ∷ (GridMap gm p, GridMap gm (Int, p), GridMap gm (m, p), Grid (gm p), Pattern p, Metric p ~ m, k ~ Index (BaseGrid gm p), k ~ Index (gm p)) ⇒ gm p → (Int → m) → k → p → gm p trainWithBMU c f bmu pattern = GM.map (adjustNode pattern) lrMap where dMap = mapWithKey (\k p → (distance c k bmu, p)) c lrMap = GM.map (\(d,p) → (f d, p)) dMap adjustNode ∷ (Pattern p) ⇒ p → (Metric p, p) → p adjustNode target (r,p) = makeSimilar target r p -- -- Using numeric vectors as patterns. -- magnitudeSquared ∷ Num a ⇒ [a] → a magnitudeSquared xs = sum $ map (\x → x*x) xs -- | Calculates the square of the Euclidean distance between two -- vectors. euclideanDistanceSquared ∷ Num a ⇒ [a] → [a] → a euclideanDistanceSquared xs ys = magnitudeSquared $ zipWith (-) xs ys -- | @'adjustVector' target amount vector@ adjusts @vector@ to move it -- closer to @target@. The amount of adjustment is controlled by the -- learning rate @r@, which is a number between 0 and 1. Larger values -- of @r@ permit more adjustment. If @r@=1, the result will be -- identical to the @target@. If @amount@=0, the result will be the -- unmodified @pattern@. adjustVector ∷ (Num a, Ord a, Eq a) ⇒ [a] → a → [a] → [a] adjustVector xs r ys | r < 0 = error "Negative learning rate" | r > 1 = error "Learning rate > 1" | r ≡ 1 = xs | otherwise = zipWith (+) ys deltas where ds = zipWith (-) xs ys deltas = map (r *) ds -- | A vector that has been normalised, i.e., the magnitude of the -- vector = 1. data NormalisedVector a = NormalisedVector [a] deriving Show -- | Normalises a vector normalise ∷ Floating a ⇒ [a] → NormalisedVector a normalise xs = NormalisedVector $ map (/x) xs where x = norm xs norm ∷ Floating a ⇒ [a] → a norm xs = sqrt $ sum (map f xs) where f x = x*x instance (Floating a, Fractional a, Ord a, Eq a) ⇒ Pattern (NormalisedVector a) where type Metric (NormalisedVector a) = a difference (NormalisedVector xs) (NormalisedVector ys) = euclideanDistanceSquared xs ys makeSimilar (NormalisedVector xs) r (NormalisedVector ys) = normalise $ adjustVector xs r ys -- | A vector that has been scaled so that all elements in the vector -- are between zero and one. To scale a set of vectors, use -- @'scaleAll'@. Alternatively, if you can identify a maximum and -- minimum value for each element in a vector, you can scale -- individual vectors using @'scale'@. data ScaledVector a = ScaledVector [a] deriving Show -- | Given a vector @qs@ of pairs of numbers, where each pair represents -- the maximum and minimum value to be expected at each position in -- @xs@, @'scale' qs xs@ scales the vector @xs@ element by element, -- mapping the maximum value expected at that position to one, and the -- minimum value to zero. scale ∷ Fractional a ⇒ [(a,a)] → [a] → ScaledVector a scale qs xs = ScaledVector $ zipWith scaleValue qs xs -- | Scales a set of vectors by determining the maximum and minimum -- values at each position in the vector, and mapping the maximum -- value to one, and the minimum value to zero. scaleAll ∷ (Fractional a, Ord a) ⇒ [[a]] → [ScaledVector a] scaleAll xss = map (scale qs) xss where qs = quantify xss scaleValue ∷ Fractional a ⇒ (a,a) → a → a scaleValue (minX,maxX) x = (x - minX) / (maxX-minX) quantify ∷ Ord a ⇒ [[a]] → [(a,a)] quantify xss = foldl' quantify' qs (tail xss) where qs = zip (head xss) (head xss) quantify' ∷ Ord a ⇒ [(a,a)] → [a] → [(a,a)] quantify' = zipWith f where f (minX, maxX) x = (min minX x, max maxX x) instance (Fractional a, Ord a, Eq a) ⇒ Pattern (ScaledVector a) where type Metric (ScaledVector a) = a difference (ScaledVector xs) (ScaledVector ys) = euclideanDistanceSquared xs ys makeSimilar (ScaledVector xs) r (ScaledVector ys) = ScaledVector $ adjustVector xs r ys