-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | Self-Organising Maps
--
@package som
@version 7.2.3
-- | Tools for identifying patterns in data.
module Data.Datamining.Pattern
-- | A pattern to be learned or classified.
class Pattern p where type family Metric p
difference :: Pattern p => p -> p -> Metric p
makeSimilar :: Pattern p => p -> Metric p -> p -> p
adjustNum :: (Num a, Ord a, Eq a) => a -> a -> a -> a
absDifference :: Num a => a -> a -> a
-- | adjustVector target amount vector adjusts
-- vector to move it closer to target. The amount of
-- adjustment is controlled by the learning rate r, which is a
-- number between 0 and 1. Larger values of r permit more
-- adjustment. If r=1, the result will be identical to the
-- target. If amount=0, the result will be the
-- unmodified pattern.
adjustVector :: (Num a, Ord a, Eq a) => [a] -> a -> [a] -> [a]
-- | Calculates the square of the Euclidean distance between two vectors.
euclideanDistanceSquared :: Num a => [a] -> [a] -> a
magnitudeSquared :: Num a => [a] -> a
-- | A vector that has been normalised, i.e., the magnitude of the vector =
-- 1.
data NormalisedVector a
-- | Normalises a vector
normalise :: Floating a => [a] -> NormalisedVector a
-- | A vector that has been scaled so that all elements in the vector are
-- between zero and one. To scale a set of vectors, use
-- scaleAll. Alternatively, if you can identify a maximum
-- and minimum value for each element in a vector, you can scale
-- individual vectors using scale.
data ScaledVector a
-- | Given a vector qs of pairs of numbers, where each pair
-- represents the maximum and minimum value to be expected at each index
-- in xs, scale qs xs scales the vector
-- xs element by element, mapping the maximum value expected at
-- that index to one, and the minimum value to zero.
scale :: Fractional a => [(a, a)] -> [a] -> ScaledVector a
-- | Scales a set of vectors by determining the maximum and minimum values
-- at each index in the vector, and mapping the maximum value to one, and
-- the minimum value to zero.
scaleAll :: (Fractional a, Ord a) => [[a]] -> [ScaledVector a]
instance Show a => Show (NormalisedVector a)
instance Show a => Show (ScaledVector a)
instance (Fractional a, Ord a, Eq a) => Pattern (ScaledVector a)
instance (Floating a, Fractional a, Ord a, Eq a) => Pattern (NormalisedVector a)
-- | Tools for identifying patterns in data.
module Data.Datamining.Clustering.Classifier
-- | A machine which learns to classify input patterns. Minimal complete
-- definition: trainBatch, reportAndTrain.
class Classifier (c :: * -> * -> *) k p where classify c p = f $ differences c p where f [] = error "classifier has no models" f xs = fst $ minimumBy (comparing snd) xs train c p = c' where (_, _, c') = reportAndTrain c p classifyAndTrain c p = (bmu, c') where (bmu, _, c') = reportAndTrain c p diffAndTrain c p = (ds, c') where (_, ds, c') = reportAndTrain c p
toList :: Classifier c k p => c k p -> [(k, p)]
numModels :: Classifier c k p => c k p -> Int
models :: Classifier c k p => c k p -> [p]
differences :: (Classifier c k p, Pattern p, v ~ Metric p) => c k p -> p -> [(k, v)]
classify :: (Classifier c k p, Pattern p, Ord v, v ~ Metric p) => c k p -> p -> k
train :: (Classifier c k p, Ord v, v ~ Metric p) => c k p -> p -> c k p
trainBatch :: Classifier c k p => c k p -> [p] -> c k p
classifyAndTrain :: (Classifier c k p, Ord v, v ~ Metric p) => c k p -> p -> (k, c k p)
diffAndTrain :: (Classifier c k p, Ord v, v ~ Metric p) => c k p -> p -> ([(k, v)], c k p)
reportAndTrain :: (Classifier c k p, Ord v, v ~ Metric p) => c k p -> p -> (k, [(k, v)], c k p)
-- | A module containing private DSOM internals. Most developers
-- should use DSOM instead. This module is subject to change
-- without notice.
module Data.Datamining.Clustering.DSOMInternal
-- | A Self-Organising Map (DSOM).
--
-- Although DSOM implements GridMap, most users will
-- only need the interface provided by
-- Data.Datamining.Clustering.Classifier. If you chose to use
-- the GridMap functions, please note:
--
--
-- - The functions adjust, and adjustWithKey do not
-- increment the counter. You can do so manually with
-- incrementCounter.
-- - The functions map and mapWithKey are not
-- implemented (they just return an error). It would be
-- problematic to implement them because the input DSOM and the output
-- DSOM would have to have the same Metric type.
--
data DSOM gm k p
DSOM :: gm p -> (Metric p -> Metric p -> Metric p -> Metric p) -> DSOM gm k p
sGridMap :: DSOM gm k p -> gm p
sLearningFunction :: DSOM gm k p -> (Metric p -> Metric p -> Metric p -> Metric p)
-- | Extracts the grid and current models from the DSOM.
toGridMap :: GridMap gm p => DSOM gm k p -> gm p
adjustNode :: (Pattern p, FiniteGrid (gm p), GridMap gm p, k ~ Index (gm p), Ord k, k ~ Index (BaseGrid gm p), Num (Metric p), Fractional (Metric p)) => gm p -> (Metric p -> Metric p -> Metric p) -> p -> k -> k -> p -> p
scaleDistance :: (Num a, Fractional a) => Int -> Int -> a
-- | Trains the specified node and the neighbourood around it to better
-- match a target. Most users should use train, which
-- automatically determines the BMU and trains it and its neighbourhood.
trainNeighbourhood :: (Pattern p, FiniteGrid (gm p), GridMap gm p, Num (Metric p), Ord k, k ~ Index (gm p), k ~ Index (BaseGrid gm p), Fractional (Metric p)) => DSOM gm t p -> k -> p -> DSOM gm k p
justTrain :: (Pattern p, FiniteGrid (gm p), GridMap gm p, Num (Metric p), Ord (Metric p), Ord (Index (gm p)), GridMap gm (Metric p), Fractional (Metric p), Index (BaseGrid gm (Metric p)) ~ Index (gm p), Index (BaseGrid gm p) ~ Index (gm p)) => DSOM gm t p -> p -> DSOM gm (Index (gm p)) p
-- | Creates a classifier with a default (bell-shaped) learning function.
-- Usage is defaultDSOM gm r w t, where:
--
--
-- - gm The geometry and initial models for this
-- classifier. A reasonable choice here is lazyGridMap g
-- ps, where g is a HexHexGrid, and
-- ps is a set of random patterns.
-- - r and [p] are the first two parameters to
-- the rougierLearningFunction.
--
defaultDSOM :: (Eq (Metric p), Ord (Metric p), Floating (Metric p)) => gm p -> Metric p -> Metric p -> DSOM gm k p
-- | Creates a classifier with a custom learning function. Usage is
-- customDSOM gm g, where:
--
--
-- - gm The geometry and initial models for this
-- classifier. A reasonable choice here is lazyGridMap g
-- ps, where g is a HexHexGrid, and
-- ps is a set of random patterns.
-- - f A function used to determine the learning rate
-- (for adjusting the models in the classifier). This function will be
-- invoked with three parameters. The first parameter will indicate how
-- different the BMU is from the input pattern. The second parameter
-- indicates how different the pattern of the node currently being
-- trained is from the input pattern. The third parameter is the grid
-- distance from the BMU to the node currently being trained, as a
-- fraction of the maximum grid distance. The output is the learning rate
-- for that node (the amount by which the node's model should be updated
-- to match the target). The learning rate should be between zero and
-- one.
--
customDSOM :: gm p -> (Metric p -> Metric p -> Metric p -> Metric p) -> DSOM gm k p
-- | Configures a learning function that depends not on the time, but on
-- how good a model we already have for the target. If the BMU is an
-- exact match for the target, no learning occurs. Usage is
-- rougierLearningFunction r p, where r is the
-- maximal learning rate (0 <= r <= 1), and p is the
-- elasticity.
--
-- NOTE: When using this learning function, ensure that abs .
-- difference is always between 0 and 1, inclusive. Otherwise you
-- may get invalid learning rates.
rougierLearningFunction :: (Eq a, Ord a, Floating a) => a -> a -> (a -> a -> a -> a)
instance (GridMap gm p, k ~ Index (BaseGrid gm p), Pattern p, FiniteGrid (gm p), GridMap gm (Metric p), k ~ Index (gm p), k ~ Index (BaseGrid gm (Metric p)), Ord k, Ord (Metric p), Num (Metric p), Fractional (Metric p)) => Classifier (DSOM gm) k p
instance (Foldable gm, GridMap gm p, FiniteGrid (BaseGrid gm p)) => GridMap (DSOM gm k) p
instance Grid (gm p) => Grid (DSOM gm k p)
instance Foldable gm => Foldable (DSOM gm k)
-- | A modified Kohonen Self-organising Map (SOM) which supports a
-- time-independent learning function. (See SOM for a
-- description of a SOM.)
--
-- References:
--
--
-- - Rougier, N. & Boniface, Y. (2011). Dynamic self-organising
-- map. Neurocomputing, 74 (11), 1840-1847.
-- - Kohonen, T. (1982). Self-organized formation of topologically
-- correct feature maps. Biological Cybernetics, 43 (1), 59–69.
--
module Data.Datamining.Clustering.DSOM
-- | A Self-Organising Map (DSOM).
--
-- Although DSOM implements GridMap, most users will
-- only need the interface provided by
-- Data.Datamining.Clustering.Classifier. If you chose to use
-- the GridMap functions, please note:
--
--
-- - The functions adjust, and adjustWithKey do not
-- increment the counter. You can do so manually with
-- incrementCounter.
-- - The functions map and mapWithKey are not
-- implemented (they just return an error). It would be
-- problematic to implement them because the input DSOM and the output
-- DSOM would have to have the same Metric type.
--
data DSOM gm k p
-- | Creates a classifier with a default (bell-shaped) learning function.
-- Usage is defaultDSOM gm r w t, where:
--
--
-- - gm The geometry and initial models for this
-- classifier. A reasonable choice here is lazyGridMap g
-- ps, where g is a HexHexGrid, and
-- ps is a set of random patterns.
-- - r and [p] are the first two parameters to
-- the rougierLearningFunction.
--
defaultDSOM :: (Eq (Metric p), Ord (Metric p), Floating (Metric p)) => gm p -> Metric p -> Metric p -> DSOM gm k p
-- | Creates a classifier with a custom learning function. Usage is
-- customDSOM gm g, where:
--
--
-- - gm The geometry and initial models for this
-- classifier. A reasonable choice here is lazyGridMap g
-- ps, where g is a HexHexGrid, and
-- ps is a set of random patterns.
-- - f A function used to determine the learning rate
-- (for adjusting the models in the classifier). This function will be
-- invoked with three parameters. The first parameter will indicate how
-- different the BMU is from the input pattern. The second parameter
-- indicates how different the pattern of the node currently being
-- trained is from the input pattern. The third parameter is the grid
-- distance from the BMU to the node currently being trained, as a
-- fraction of the maximum grid distance. The output is the learning rate
-- for that node (the amount by which the node's model should be updated
-- to match the target). The learning rate should be between zero and
-- one.
--
customDSOM :: gm p -> (Metric p -> Metric p -> Metric p -> Metric p) -> DSOM gm k p
-- | Configures a learning function that depends not on the time, but on
-- how good a model we already have for the target. If the BMU is an
-- exact match for the target, no learning occurs. Usage is
-- rougierLearningFunction r p, where r is the
-- maximal learning rate (0 <= r <= 1), and p is the
-- elasticity.
--
-- NOTE: When using this learning function, ensure that abs .
-- difference is always between 0 and 1, inclusive. Otherwise you
-- may get invalid learning rates.
rougierLearningFunction :: (Eq a, Ord a, Floating a) => a -> a -> (a -> a -> a -> a)
-- | Extracts the grid and current models from the DSOM.
toGridMap :: GridMap gm p => DSOM gm k p -> gm p
-- | Trains the specified node and the neighbourood around it to better
-- match a target. Most users should use train, which
-- automatically determines the BMU and trains it and its neighbourhood.
trainNeighbourhood :: (Pattern p, FiniteGrid (gm p), GridMap gm p, Num (Metric p), Ord k, k ~ Index (gm p), k ~ Index (BaseGrid gm p), Fractional (Metric p)) => DSOM gm t p -> k -> p -> DSOM gm k p
-- | A module containing private SOM internals. Most developers
-- should use SOM instead. This module is subject to change
-- without notice.
module Data.Datamining.Clustering.SOMInternal
-- | A function used to adjust the models in a classifier.
class LearningFunction f where type family LearningRate f
rate :: LearningFunction f => f -> LearningRate f -> LearningRate f -> LearningRate f
-- | A typical learning function for classifiers.
-- DecayingGaussian r0 rf w0 wf tf returns a bell
-- curve-shaped function. At time zero, the maximum learning rate
-- (applied to the BMU) is r0, and the neighbourhood width is
-- w0. Over time the bell curve shrinks and the learning rate
-- tapers off, until at time tf, the maximum learning rate
-- (applied to the BMU) is rf, and the neighbourhood width is
-- wf. Normally the parameters should be chosen such that:
--
--
-- - 0 < rf << r0 < 1
-- - 0 < wf << w0
-- - 0 < tf
--
--
-- where << means "is much smaller than" (not the Haskell
-- << operator!)
data DecayingGaussian a
DecayingGaussian :: a -> a -> a -> a -> a -> DecayingGaussian a
-- | A learning function that only updates the BMU and has a constant
-- learning rate.
data StepFunction a
StepFunction :: a -> StepFunction a
-- | A learning function that updates all nodes with the same, constant
-- learning rate. This can be useful for testing.
data ConstantFunction a
ConstantFunction :: a -> ConstantFunction a
-- | A Self-Organising Map (SOM).
--
-- Although SOM implements GridMap, most users will
-- only need the interface provided by
-- Data.Datamining.Clustering.Classifier. If you chose to use
-- the GridMap functions, please note:
--
--
-- - The functions adjust, and adjustWithKey do not
-- increment the counter. You can do so manually with
-- incrementCounter.
-- - The functions map and mapWithKey are not
-- implemented (they just return an error). It would be
-- problematic to implement them because the input SOM and the output SOM
-- would have to have the same Metric type.
--
data SOM f t gm k p
SOM :: gm p -> f -> t -> SOM f t gm k p
-- | Maps patterns to tiles in a regular grid. In the context of a SOM, the
-- tiles are called "nodes"
gridMap :: SOM f t gm k p -> gm p
-- | The function used to update the nodes.
learningFunction :: SOM f t gm k p -> f
-- | A counter used as a "time" parameter. If you create the SOM with a
-- counter value 0, and don't directly modify it, then the
-- counter will represent the number of patterns that this SOM has
-- classified.
counter :: SOM f t gm k p -> t
currentLearningFunction :: (LearningFunction f, Metric p ~ LearningRate f, Num (LearningRate f), Integral t) => SOM f t gm k p -> (LearningRate f -> Metric p)
-- | Extracts the grid and current models from the SOM. A synonym for
-- gridMap.
toGridMap :: GridMap gm p => SOM f t gm k p -> gm p
adjustNode :: (Pattern p, Grid g, k ~ Index g, Num t) => g -> (t -> Metric p) -> p -> k -> k -> p -> p
-- | Trains the specified node and the neighbourood around it to better
-- match a target. Most users should use train, which
-- automatically determines the BMU and trains it and its neighbourhood.
trainNeighbourhood :: (Pattern p, Grid (gm p), GridMap gm p, Index (BaseGrid gm p) ~ Index (gm p), LearningFunction f, Metric p ~ LearningRate f, Num (LearningRate f), Integral t) => SOM f t gm k p -> Index (gm p) -> p -> SOM f t gm k p
incrementCounter :: Num t => SOM f t gm k p -> SOM f t gm k p
justTrain :: (Ord (Metric p), Pattern p, Grid (gm p), GridMap gm (Metric p), GridMap gm p, Index (BaseGrid gm (Metric p)) ~ Index (gm p), Index (BaseGrid gm p) ~ Index (gm p), LearningFunction f, Metric p ~ LearningRate f, Num (LearningRate f), Integral t) => SOM f t gm k p -> p -> SOM f t gm k p
instance Eq a => Eq (DecayingGaussian a)
instance Show a => Show (DecayingGaussian a)
instance Generic (DecayingGaussian a)
instance Eq a => Eq (StepFunction a)
instance Show a => Show (StepFunction a)
instance Generic (StepFunction a)
instance Eq a => Eq (ConstantFunction a)
instance Show a => Show (ConstantFunction a)
instance Generic (ConstantFunction a)
instance (Eq f, Eq t, Eq (gm p)) => Eq (SOM f t gm k p)
instance (Show f, Show t, Show (gm p)) => Show (SOM f t gm k p)
instance Generic (SOM f t gm k p)
instance Datatype D1DecayingGaussian
instance Constructor C1_0DecayingGaussian
instance Datatype D1StepFunction
instance Constructor C1_0StepFunction
instance Datatype D1ConstantFunction
instance Constructor C1_0ConstantFunction
instance Datatype D1SOM
instance Constructor C1_0SOM
instance Selector S1_0_0SOM
instance Selector S1_0_1SOM
instance Selector S1_0_2SOM
instance (GridMap gm p, k ~ Index (BaseGrid gm p), Pattern p, Grid (gm p), GridMap gm (Metric p), k ~ Index (gm p), k ~ Index (BaseGrid gm (Metric p)), Ord (Metric p), LearningFunction f, Metric p ~ LearningRate f, Num (LearningRate f), Integral t) => Classifier (SOM f t gm) k p
instance (Foldable gm, GridMap gm p, Grid (BaseGrid gm p)) => GridMap (SOM f t gm k) p
instance Grid (gm p) => Grid (SOM f t gm k p)
instance Foldable gm => Foldable (SOM f t gm k)
instance Fractional a => LearningFunction (ConstantFunction a)
instance (Fractional a, Eq a) => LearningFunction (StepFunction a)
instance (Floating a, Fractional a, Num a) => LearningFunction (DecayingGaussian a)
-- | A Kohonen Self-organising Map (SOM). A SOM maps input patterns onto a
-- regular grid (usually two-dimensional) where each node in the grid is
-- a model of the input data, and does so using a method which ensures
-- that any topological relationships within the input data are also
-- represented in the grid. This implementation supports the use of
-- non-numeric patterns.
--
-- In layman's terms, a SOM can be useful when you you want to discover
-- the underlying structure of some data. A tutorial is available at
-- https://github.com/mhwombat/som/wiki.
--
-- NOTES:
--
--
-- - Version 5.0 fixed a bug in the decayingGaussian
-- function. If you use defaultSOM (which uses this
-- function), your SOM should now learn more quickly.
-- - The gaussian function has been removed because it is not
-- as useful for SOMs as I originally thought. It was originally designed
-- to be used as a factor in a learning function. However, in most cases
-- the user will want to introduce a time decay into the exponent, rather
-- than simply multiply by a factor.
--
--
-- References:
--
--
-- - Kohonen, T. (1982). Self-organized formation of topologically
-- correct feature maps. Biological Cybernetics, 43 (1), 59–69.
--
module Data.Datamining.Clustering.SOM
-- | A Self-Organising Map (SOM).
--
-- Although SOM implements GridMap, most users will
-- only need the interface provided by
-- Data.Datamining.Clustering.Classifier. If you chose to use
-- the GridMap functions, please note:
--
--
-- - The functions adjust, and adjustWithKey do not
-- increment the counter. You can do so manually with
-- incrementCounter.
-- - The functions map and mapWithKey are not
-- implemented (they just return an error). It would be
-- problematic to implement them because the input SOM and the output SOM
-- would have to have the same Metric type.
--
data SOM f t gm k p
SOM :: gm p -> f -> t -> SOM f t gm k p
-- | Maps patterns to tiles in a regular grid. In the context of a SOM, the
-- tiles are called "nodes"
gridMap :: SOM f t gm k p -> gm p
-- | The function used to update the nodes.
learningFunction :: SOM f t gm k p -> f
-- | A counter used as a "time" parameter. If you create the SOM with a
-- counter value 0, and don't directly modify it, then the
-- counter will represent the number of patterns that this SOM has
-- classified.
counter :: SOM f t gm k p -> t
-- | A typical learning function for classifiers.
-- DecayingGaussian r0 rf w0 wf tf returns a bell
-- curve-shaped function. At time zero, the maximum learning rate
-- (applied to the BMU) is r0, and the neighbourhood width is
-- w0. Over time the bell curve shrinks and the learning rate
-- tapers off, until at time tf, the maximum learning rate
-- (applied to the BMU) is rf, and the neighbourhood width is
-- wf. Normally the parameters should be chosen such that:
--
--
-- - 0 < rf << r0 < 1
-- - 0 < wf << w0
-- - 0 < tf
--
--
-- where << means "is much smaller than" (not the Haskell
-- << operator!)
data DecayingGaussian a
DecayingGaussian :: a -> a -> a -> a -> a -> DecayingGaussian a
-- | Extracts the grid and current models from the SOM. A synonym for
-- gridMap.
toGridMap :: GridMap gm p => SOM f t gm k p -> gm p
-- | Trains the specified node and the neighbourood around it to better
-- match a target. Most users should use train, which
-- automatically determines the BMU and trains it and its neighbourhood.
trainNeighbourhood :: (Pattern p, Grid (gm p), GridMap gm p, Index (BaseGrid gm p) ~ Index (gm p), LearningFunction f, Metric p ~ LearningRate f, Num (LearningRate f), Integral t) => SOM f t gm k p -> Index (gm p) -> p -> SOM f t gm k p