-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | Self-Organising Maps
--
-- Please see the README on GitHub at
-- https://github.com/mhwombat/som#readme
@package som
@version 10.1.2
-- | Tools for identifying patterns in data.
module Data.Datamining.Clustering.Classifier
-- | A machine which learns to classify input patterns. Minimal complete
-- definition: trainBatch, reportAndTrain.
class Classifier (c :: * -> * -> * -> *) v k p
-- | Returns a list of index/model pairs.
toList :: Classifier c v k p => c v k p -> [(k, p)]
-- | Returns the number of models this classifier can learn.
numModels :: Classifier c v k p => c v k p -> Int
-- | Returns the current models of the classifier.
models :: Classifier c v k p => c v k p -> [p]
-- | differences c target returns the indices of all nodes
-- in c, paired with the difference between target and
-- the node's model.
differences :: Classifier c v k p => c v k p -> p -> [(k, v)]
-- | classify c target returns the index of the node in c
-- whose model best matches the target.
classify :: (Classifier c v k p, Ord v) => c v k p -> p -> k
-- | train c target returns a modified copy of the
-- classifier c that has partially learned the target.
train :: Classifier c v k p => c v k p -> p -> c v k p
-- | trainBatch c targets returns a modified copy of the
-- classifier c that has partially learned the targets.
trainBatch :: Classifier c v k p => c v k p -> [p] -> c v k p
-- | classifyAndTrain c target returns a tuple containing
-- the index of the node in c whose model best matches the input
-- target, and a modified copy of the classifier c that
-- has partially learned the target. Invoking
-- classifyAndTrain c p may be faster than invoking (p
-- classify c, train c p), but they should give identical
-- results.
classifyAndTrain :: Classifier c v k p => c v k p -> p -> (k, c v k p)
-- | diffAndTrain c target returns a tuple containing: 1.
-- The indices of all nodes in c, paired with the difference
-- between target and the node's model 2. A modified copy of the
-- classifier c that has partially learned the target.
-- Invoking diffAndTrain c p may be faster than invoking (p
-- diff c, train c p), but they should give identical
-- results.
diffAndTrain :: Classifier c v k p => c v k p -> p -> ([(k, v)], c v k p)
-- | reportAndTrain c f target returns a tuple containing:
-- 1. The index of the node in c whose model best matches the
-- input target 2. The indices of all nodes in c,
-- paired with the difference between target and the node's
-- model 3. A modified copy of the classifier c that has
-- partially learned the target Invoking diffAndTrain c
-- p may be faster than invoking (p diff c, train c
-- p), but they should give identical results.
reportAndTrain :: Classifier c v k p => c v k p -> p -> (k, [(k, v)], c v k p)
-- | A module containing private DSOM internals. Most developers
-- should use DSOM instead. This module is subject to change
-- without notice.
module Data.Datamining.Clustering.DSOMInternal
-- | A Self-Organising Map (DSOM).
--
-- Although DSOM implements GridMap, most users will
-- only need the interface provided by
-- Data.Datamining.Clustering.Classifier. If you chose to use
-- the GridMap functions, please note:
--
--
-- - The functions adjust, and adjustWithKey do not
-- increment the counter. You can do so manually with
-- incrementCounter.
-- - The functions map and mapWithKey are not
-- implemented (they just return an error). It would be
-- problematic to implement them because the input DSOM and the output
-- DSOM would have to have the same Metric type.
--
data DSOM gm x k p
DSOM :: gm p -> (x -> x -> x -> x) -> (p -> p -> x) -> (p -> x -> p -> p) -> DSOM gm x k p
-- | Maps patterns to tiles in a regular grid. In the context of a SOM, the
-- tiles are called "nodes"
[gridMap] :: DSOM gm x k p -> gm p
-- | A function which determines the how quickly the SOM learns.
[learningRate] :: DSOM gm x k p -> x -> x -> x -> x
-- | A function which compares two patterns and returns a
-- non-negative number representing how different the patterns
-- are. A result of 0 indicates that the patterns are identical.
[difference] :: DSOM gm x k p -> p -> p -> x
-- | A function which updates models. If this function is f, then
-- f target amount pattern returns a modified copy of
-- pattern that is more similar to target than
-- pattern is. The magnitude of the adjustment is controlled by
-- the amount parameter, which should be a number between 0 and
-- 1. Larger values for amount permit greater adjustments. If
-- amount=1, the result should be identical to the
-- target. If amount=0, the result should be the
-- unmodified pattern.
[makeSimilar] :: DSOM gm x k p -> p -> x -> p -> p
-- | Internal method.
withGridMap :: (gm p -> gm p) -> DSOM gm x k p -> DSOM gm x k p
-- | Extracts the grid and current models from the DSOM.
toGridMap :: GridMap gm p => DSOM gm x k p -> gm p
-- | Internal method.
adjustNode :: (FiniteGrid (gm p), GridMap gm p, k ~ Index (gm p), k ~ Index (BaseGrid gm p), Ord k, Num x, Fractional x) => gm p -> (p -> x -> p -> p) -> (p -> p -> x) -> (x -> x -> x) -> p -> k -> k -> p -> p
-- | Internal method.
scaleDistance :: (Num a, Fractional a) => Int -> Int -> a
-- | Trains the specified node and the neighbourood around it to better
-- match a target. Most users should use train, which
-- automatically determines the BMU and trains it and its neighbourhood.
trainNeighbourhood :: (FiniteGrid (gm p), GridMap gm p, k ~ Index (gm p), k ~ Index (BaseGrid gm p), Ord k, Num x, Fractional x) => DSOM gm x t p -> k -> p -> DSOM gm x k p
-- | Internal method.
justTrain :: (FiniteGrid (gm p), GridMap gm p, GridMap gm x, k ~ Index (gm p), k ~ Index (gm x), k ~ Index (BaseGrid gm p), k ~ Index (BaseGrid gm x), Ord k, Ord x, Num x, Fractional x) => DSOM gm x t p -> p -> DSOM gm x k p
-- | Configures a learning function that depends not on the time, but on
-- how good a model we already have for the target. If the BMU is an
-- exact match for the target, no learning occurs. Usage is
-- rougierLearningFunction r p, where r is the
-- maximal learning rate (0 <= r <= 1), and p is the
-- elasticity.
--
-- NOTE: When using this learning function, ensure that abs .
-- difference is always between 0 and 1, inclusive. Otherwise you
-- may get invalid learning rates.
rougierLearningFunction :: (Eq a, Ord a, Floating a) => a -> a -> a -> a -> a -> a
instance Control.DeepSeq.NFData (gm p) => Control.DeepSeq.NFData (Data.Datamining.Clustering.DSOMInternal.DSOM gm x k p)
instance GHC.Generics.Generic (Data.Datamining.Clustering.DSOMInternal.DSOM gm x k p)
instance Data.Foldable.Foldable gm => Data.Foldable.Foldable (Data.Datamining.Clustering.DSOMInternal.DSOM gm x k)
instance Math.Geometry.GridInternal.Grid (gm p) => Math.Geometry.GridInternal.Grid (Data.Datamining.Clustering.DSOMInternal.DSOM gm x k p)
instance (Data.Foldable.Foldable gm, Math.Geometry.GridMap.GridMap gm p, Math.Geometry.GridInternal.FiniteGrid (Math.Geometry.GridMap.BaseGrid gm p)) => Math.Geometry.GridMap.GridMap (Data.Datamining.Clustering.DSOMInternal.DSOM gm x k) p
instance (Math.Geometry.GridMap.GridMap gm p, k Data.Type.Equality.~ Math.Geometry.GridInternal.Index (Math.Geometry.GridMap.BaseGrid gm p), Math.Geometry.GridInternal.FiniteGrid (gm p), Math.Geometry.GridMap.GridMap gm x, k Data.Type.Equality.~ Math.Geometry.GridInternal.Index (gm p), k Data.Type.Equality.~ Math.Geometry.GridInternal.Index (gm x), k Data.Type.Equality.~ Math.Geometry.GridInternal.Index (Math.Geometry.GridMap.BaseGrid gm x), GHC.Classes.Ord k, GHC.Classes.Ord x, GHC.Num.Num x, GHC.Real.Fractional x) => Data.Datamining.Clustering.Classifier.Classifier (Data.Datamining.Clustering.DSOMInternal.DSOM gm) x k p
-- | A modified Kohonen Self-organising Map (SOM) which supports a
-- time-independent learning function. (See SOM for a
-- description of a SOM.)
--
-- References:
--
--
-- - Rougier, N. & Boniface, Y. (2011). Dynamic self-organising
-- map. Neurocomputing, 74 (11), 1840-1847.
-- - Kohonen, T. (1982). Self-organized formation of topologically
-- correct feature maps. Biological Cybernetics, 43 (1), 59–69.
--
module Data.Datamining.Clustering.DSOM
-- | A Self-Organising Map (DSOM).
--
-- Although DSOM implements GridMap, most users will
-- only need the interface provided by
-- Data.Datamining.Clustering.Classifier. If you chose to use
-- the GridMap functions, please note:
--
--
-- - The functions adjust, and adjustWithKey do not
-- increment the counter. You can do so manually with
-- incrementCounter.
-- - The functions map and mapWithKey are not
-- implemented (they just return an error). It would be
-- problematic to implement them because the input DSOM and the output
-- DSOM would have to have the same Metric type.
--
data DSOM gm x k p
DSOM :: gm p -> (x -> x -> x -> x) -> (p -> p -> x) -> (p -> x -> p -> p) -> DSOM gm x k p
-- | Maps patterns to tiles in a regular grid. In the context of a SOM, the
-- tiles are called "nodes"
[gridMap] :: DSOM gm x k p -> gm p
-- | A function which determines the how quickly the SOM learns.
[learningRate] :: DSOM gm x k p -> x -> x -> x -> x
-- | A function which compares two patterns and returns a
-- non-negative number representing how different the patterns
-- are. A result of 0 indicates that the patterns are identical.
[difference] :: DSOM gm x k p -> p -> p -> x
-- | A function which updates models. If this function is f, then
-- f target amount pattern returns a modified copy of
-- pattern that is more similar to target than
-- pattern is. The magnitude of the adjustment is controlled by
-- the amount parameter, which should be a number between 0 and
-- 1. Larger values for amount permit greater adjustments. If
-- amount=1, the result should be identical to the
-- target. If amount=0, the result should be the
-- unmodified pattern.
[makeSimilar] :: DSOM gm x k p -> p -> x -> p -> p
-- | Extracts the grid and current models from the DSOM.
toGridMap :: GridMap gm p => DSOM gm x k p -> gm p
-- | Configures a learning function that depends not on the time, but on
-- how good a model we already have for the target. If the BMU is an
-- exact match for the target, no learning occurs. Usage is
-- rougierLearningFunction r p, where r is the
-- maximal learning rate (0 <= r <= 1), and p is the
-- elasticity.
--
-- NOTE: When using this learning function, ensure that abs .
-- difference is always between 0 and 1, inclusive. Otherwise you
-- may get invalid learning rates.
rougierLearningFunction :: (Eq a, Ord a, Floating a) => a -> a -> a -> a -> a -> a
-- | Trains the specified node and the neighbourood around it to better
-- match a target. Most users should use train, which
-- automatically determines the BMU and trains it and its neighbourhood.
trainNeighbourhood :: (FiniteGrid (gm p), GridMap gm p, k ~ Index (gm p), k ~ Index (BaseGrid gm p), Ord k, Num x, Fractional x) => DSOM gm x t p -> k -> p -> DSOM gm x k p
-- | A module containing private SGM internals. Most developers
-- should use SGM instead. This module is subject to change
-- without notice.
module Data.Datamining.Clustering.SGM2Internal
-- | A typical learning function for classifiers. exponential r0
-- d t returns the learning rate at time t. When t =
-- 0, the learning rate is r0. Over time the learning rate
-- decays exponentially; the decay rate is d. Normally the
-- parameters are chosen such that:
--
--
-- - 0 < r0 < 1
-- - 0 < d
--
exponential :: (Floating a, Integral t) => a -> a -> t -> a
-- | A Simplified Self-Organising Map (SGM). t is the type of the
-- counter. x is the type of the learning rate and the
-- difference metric. k is the type of the model indices.
-- p is the type of the input patterns and models.
data SGM t x k p
SGM :: Map k (p, t) -> (t -> x) -> Int -> (p -> p -> x) -> (p -> x -> p -> p) -> k -> SGM t x k p
-- | Maps patterns and match counts to nodes.
[toMap] :: SGM t x k p -> Map k (p, t)
-- | A function which determines the learning rate for a node. The input
-- parameter indicates how many patterns (or pattern batches) have
-- previously been presented to the classifier. Typically this is used to
-- make the learning rate decay over time. The output is the learning
-- rate for that node (the amount by which the node's model should be
-- updated to match the target). The learning rate should be between zero
-- and one.
[learningRate] :: SGM t x k p -> t -> x
-- | The maximum number of models this SGM can hold.
[capacity] :: SGM t x k p -> Int
-- | A function which compares two patterns and returns a
-- non-negative number representing how different the patterns
-- are. A result of 0 indicates that the patterns are identical.
[difference] :: SGM t x k p -> p -> p -> x
-- | A function which updates models. For example, if this function is
-- f, then f target amount pattern returns a modified
-- copy of pattern that is more similar to target than
-- pattern is. The magnitude of the adjustment is controlled by
-- the amount parameter, which should be a number between 0 and
-- 1. Larger values for amount permit greater adjustments. If
-- amount=1, the result should be identical to the
-- target. If amount=0, the result should be the
-- unmodified pattern.
[makeSimilar] :: SGM t x k p -> p -> x -> p -> p
-- | Index for the next node to add to the SGM.
[nextIndex] :: SGM t x k p -> k
-- | makeSGM lr n diff ms creates a new SGM that does not
-- (yet) contain any models. It will learn at the rate determined by the
-- learning function lr, and will be able to hold up to
-- n models. It will create a new model based on a pattern
-- presented to it when the SGM is not at capacity, or a less useful
-- model can be replaced. It will use the function diff to
-- measure the similarity between an input pattern and a model. It will
-- use the function ms to adjust models as needed to make them
-- more similar to input patterns.
makeSGM :: Bounded k => (t -> x) -> Int -> (p -> p -> x) -> (p -> x -> p -> p) -> SGM t x k p
-- | Returns true if the SGM has no models, false otherwise.
isEmpty :: SGM t x k p -> Bool
-- | Returns the number of models the SGM currently contains.
size :: SGM t x k p -> Int
-- | Returns a map from node ID to model.
modelMap :: SGM t x k p -> Map k p
-- | Returns a map from node ID to counter (number of times the node's
-- model has been the closest match to an input pattern).
counterMap :: SGM t x k p -> Map k t
-- | Returns the model at a specified node.
modelAt :: Ord k => SGM t x k p -> k -> p
-- | Returns the match counter for a specified node.
counterAt :: Ord k => SGM t x k p -> k -> t
-- | Returns the current labels.
labels :: SGM t x k p -> [k]
-- | The current "time" (number of times the SGM has been trained).
time :: Num t => SGM t x k p -> t
-- | Adds a new node to the SGM.
addNode :: (Num t, Enum k, Ord k) => p -> SGM t x k p -> SGM t x k p
-- | Increments the counter.
incrementCounter :: (Num t, Ord k) => k -> SGM t x k p -> SGM t x k p
-- | Trains the specified node to better match a target. Most users should
-- use train, which automatically determines the BMU and
-- trains it.
trainNode :: (Num t, Ord k) => SGM t x k p -> k -> p -> SGM t x k p
-- | Calculates the difference between all pairs of non-identical labels in
-- the SGM.
modelDiffs :: (Eq k, Ord k) => SGM t x k p -> [((k, k), x)]
-- | Generates all pairs of non-identical labels in the SGM.
labelPairs :: Eq k => SGM t x k p -> [(k, k)]
-- | Pairs a node label with all labels except itself.
labelPairs' :: Eq k => SGM t x k p -> k -> [(k, k)]
-- | Returns the labels of the two most similar models, and the difference
-- between them.
twoMostSimilar :: (Ord x, Eq k, Ord k) => SGM t x k p -> (k, k, x)
-- | Deletes the least used (least matched) model in a pair, and returns
-- its label (now available) and the updated SGM. TODO: Modify the other
-- model to make it slightly more similar to the one that was deleted?
mergeModels :: (Num t, Ord t, Ord k) => SGM t x k p -> k -> k -> (k, SGM t x k p)
-- | Set the model for a node. Useful when merging two models and replacing
-- one.
setModel :: (Num t, Ord k) => SGM t x k p -> k -> p -> SGM t x k p
-- | Add a new node, making room for it by merging two existing nodes.
mergeAddModel :: (Num t, Ord t, Ord k) => SGM t x k p -> k -> k -> p -> SGM t x k p
-- | classify s p identifies the model s that most
-- closely matches the pattern p. It will not make any changes
-- to the classifier. (I.e., it will not change the models or match
-- counts.) Returns the ID of the node with the best matching model, the
-- difference between the best matching model and the pattern, and the
-- SGM labels paired with the model and the difference between the input
-- and the corresponding model. The final paired list is sorted in
-- decreasing order of similarity.
classify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x))
-- | Order models by ascending difference from the input pattern, then by
-- creation order (label number).
matchOrder :: (Ord a, Ord b) => (a, b) -> (a, b) -> Ordering
-- | trainAndClassify s p identifies the model in
-- s that most closely matches p, and updates it to be
-- a somewhat better match. If necessary, it will create a new node and
-- model. Returns the ID of the node with the best matching model, the
-- difference between the pattern and the best matching model in the
-- original SGM (before training or adding a new model), the differences
-- between the pattern and each model in the updated SGM, and the updated
-- SGM.
trainAndClassify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p)
-- | Internal method. NOTE: This function will adjust the model and update
-- the match for the BMU.
trainAndClassify' :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p)
-- | Internal method.
addModelTrainAndClassify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p)
-- | train s p identifies the model in s that most
-- closely matches p, and updates it to be a somewhat better
-- match. If necessary, it will create a new node and model.
train :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> SGM t x k p
-- | For each pattern p in ps, trainBatch s
-- ps identifies the model in s that most closely matches
-- p, and updates it to be a somewhat better match.
trainBatch :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> [p] -> SGM t x k p
instance (Control.DeepSeq.NFData k, Control.DeepSeq.NFData p, Control.DeepSeq.NFData t) => Control.DeepSeq.NFData (Data.Datamining.Clustering.SGM2Internal.SGM t x k p)
instance GHC.Generics.Generic (Data.Datamining.Clustering.SGM2Internal.SGM t x k p)
-- | A Self-generating Model (SGM). An SGM maps input patterns onto a set,
-- where each element in the set is a model of the input data. An SGM is
-- like a Kohonen Self-organising Map (SOM), except:
--
--
-- - Instead of a grid, it uses a simple set of unconnected models.
-- Since the models are unconnected, only the model that best matches the
-- input is ever updated. This makes it faster, however, topological
-- relationships within the input data are not preserved.
-- - New models are created on-the-fly when no existing model is
-- similar enough to an input pattern. If the SGM is at capacity, the
-- least useful model will be deleted.
--
--
-- This implementation supports the use of non-numeric patterns.
--
-- In layman's terms, a SGM can be useful when you you want to build a
-- set of models on some data. A tutorial is available at
-- https://github.com/mhwombat/som/wiki.
--
-- References:
--
--
-- - Amy de Buitléir, Mark Daly, and Michael Russell. The
-- Self-generating Model: an Adaptation of the Self-organizing Map for
-- Intelligent Agents and Data Mining. In: Artificial Life and
-- Intelligent Agents: Second International Symposium, ALIA 2016,
-- Birmingham, UK, June 14-15, 2016, Revised Selected Papers. Ed. by
-- Peter R. Lewis et al. Springer International Publishing, 2018, pp.
-- 59–72. Available at http://amydebuitleir.eu/publications/.
-- - Amy de Buitléir, Michael Russell, and Mark Daly. Wains: A
-- pattern-seeking artificial life species. Artificial Life,
-- (18)4:399–423, 2012. Available at
-- http://amydebuitleir.eu/publications/.
-- - Kohonen, T. (1982). Self-organized formation of topologically
-- correct feature maps. Biological Cybernetics, 43 (1), 59–69.
--
module Data.Datamining.Clustering.SGM2
-- | A Simplified Self-Organising Map (SGM). t is the type of the
-- counter. x is the type of the learning rate and the
-- difference metric. k is the type of the model indices.
-- p is the type of the input patterns and models.
data SGM t x k p
SGM :: Map k (p, t) -> (t -> x) -> Int -> (p -> p -> x) -> (p -> x -> p -> p) -> k -> SGM t x k p
-- | Maps patterns and match counts to nodes.
[toMap] :: SGM t x k p -> Map k (p, t)
-- | A function which determines the learning rate for a node. The input
-- parameter indicates how many patterns (or pattern batches) have
-- previously been presented to the classifier. Typically this is used to
-- make the learning rate decay over time. The output is the learning
-- rate for that node (the amount by which the node's model should be
-- updated to match the target). The learning rate should be between zero
-- and one.
[learningRate] :: SGM t x k p -> t -> x
-- | The maximum number of models this SGM can hold.
[capacity] :: SGM t x k p -> Int
-- | A function which compares two patterns and returns a
-- non-negative number representing how different the patterns
-- are. A result of 0 indicates that the patterns are identical.
[difference] :: SGM t x k p -> p -> p -> x
-- | A function which updates models. For example, if this function is
-- f, then f target amount pattern returns a modified
-- copy of pattern that is more similar to target than
-- pattern is. The magnitude of the adjustment is controlled by
-- the amount parameter, which should be a number between 0 and
-- 1. Larger values for amount permit greater adjustments. If
-- amount=1, the result should be identical to the
-- target. If amount=0, the result should be the
-- unmodified pattern.
[makeSimilar] :: SGM t x k p -> p -> x -> p -> p
-- | Index for the next node to add to the SGM.
[nextIndex] :: SGM t x k p -> k
-- | makeSGM lr n diff ms creates a new SGM that does not
-- (yet) contain any models. It will learn at the rate determined by the
-- learning function lr, and will be able to hold up to
-- n models. It will create a new model based on a pattern
-- presented to it when the SGM is not at capacity, or a less useful
-- model can be replaced. It will use the function diff to
-- measure the similarity between an input pattern and a model. It will
-- use the function ms to adjust models as needed to make them
-- more similar to input patterns.
makeSGM :: Bounded k => (t -> x) -> Int -> (p -> p -> x) -> (p -> x -> p -> p) -> SGM t x k p
-- | The current "time" (number of times the SGM has been trained).
time :: Num t => SGM t x k p -> t
-- | Returns true if the SGM has no models, false otherwise.
isEmpty :: SGM t x k p -> Bool
-- | Returns the number of models the SGM currently contains.
size :: SGM t x k p -> Int
-- | Returns a map from node ID to model.
modelMap :: SGM t x k p -> Map k p
-- | Returns a map from node ID to counter (number of times the node's
-- model has been the closest match to an input pattern).
counterMap :: SGM t x k p -> Map k t
-- | Returns the model at a specified node.
modelAt :: Ord k => SGM t x k p -> k -> p
-- | A typical learning function for classifiers. exponential r0
-- d t returns the learning rate at time t. When t =
-- 0, the learning rate is r0. Over time the learning rate
-- decays exponentially; the decay rate is d. Normally the
-- parameters are chosen such that:
--
--
-- - 0 < r0 < 1
-- - 0 < d
--
exponential :: (Floating a, Integral t) => a -> a -> t -> a
-- | classify s p identifies the model s that most
-- closely matches the pattern p. It will not make any changes
-- to the classifier. (I.e., it will not change the models or match
-- counts.) Returns the ID of the node with the best matching model, the
-- difference between the best matching model and the pattern, and the
-- SGM labels paired with the model and the difference between the input
-- and the corresponding model. The final paired list is sorted in
-- decreasing order of similarity.
classify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x))
-- | trainAndClassify s p identifies the model in
-- s that most closely matches p, and updates it to be
-- a somewhat better match. If necessary, it will create a new node and
-- model. Returns the ID of the node with the best matching model, the
-- difference between the pattern and the best matching model in the
-- original SGM (before training or adding a new model), the differences
-- between the pattern and each model in the updated SGM, and the updated
-- SGM.
trainAndClassify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p)
-- | train s p identifies the model in s that most
-- closely matches p, and updates it to be a somewhat better
-- match. If necessary, it will create a new node and model.
train :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> SGM t x k p
-- | For each pattern p in ps, trainBatch s
-- ps identifies the model in s that most closely matches
-- p, and updates it to be a somewhat better match.
trainBatch :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> [p] -> SGM t x k p
-- | A module containing private SGM internals. Most developers
-- should use SGM instead. This module is subject to change
-- without notice.
module Data.Datamining.Clustering.SGMInternal
-- | A typical learning function for classifiers. exponential r0
-- d t returns the learning rate at time t. When t =
-- 0, the learning rate is r0. Over time the learning rate
-- decays exponentially; the decay rate is d. Normally the
-- parameters are chosen such that:
--
--
-- - 0 < r0 < 1
-- - 0 < d
--
exponential :: (Floating a, Integral t) => a -> a -> t -> a
-- | A Simplified Self-Organising Map (SGM). t is the type of the
-- counter. x is the type of the learning rate and the
-- difference metric. k is the type of the model indices.
-- p is the type of the input patterns and models.
data SGM t x k p
SGM :: Map k (p, t) -> (t -> x) -> Int -> x -> Bool -> (p -> p -> x) -> (p -> x -> p -> p) -> k -> SGM t x k p
-- | Maps patterns and match counts to nodes.
[toMap] :: SGM t x k p -> Map k (p, t)
-- | A function which determines the learning rate for a node. The input
-- parameter indicates how many patterns (or pattern batches) have
-- previously been presented to the classifier. Typically this is used to
-- make the learning rate decay over time. The output is the learning
-- rate for that node (the amount by which the node's model should be
-- updated to match the target). The learning rate should be between zero
-- and one.
[learningRate] :: SGM t x k p -> t -> x
-- | The maximum number of models this SGM can hold.
[maxSize] :: SGM t x k p -> Int
-- | The threshold that triggers creation of a new model.
[diffThreshold] :: SGM t x k p -> x
-- | Delete existing models to make room for new ones? The least useful
-- (least frequently matched) models will be deleted first.
[allowDeletion] :: SGM t x k p -> Bool
-- | A function which compares two patterns and returns a
-- non-negative number representing how different the patterns
-- are. A result of 0 indicates that the patterns are identical.
[difference] :: SGM t x k p -> p -> p -> x
-- | A function which updates models. For example, if this function is
-- f, then f target amount pattern returns a modified
-- copy of pattern that is more similar to target than
-- pattern is. The magnitude of the adjustment is controlled by
-- the amount parameter, which should be a number between 0 and
-- 1. Larger values for amount permit greater adjustments. If
-- amount=1, the result should be identical to the
-- target. If amount=0, the result should be the
-- unmodified pattern.
[makeSimilar] :: SGM t x k p -> p -> x -> p -> p
-- | Index for the next node to add to the SGM.
[nextIndex] :: SGM t x k p -> k
-- | makeSGM lr n dt diff ms creates a new SGM that does
-- not (yet) contain any models. It will learn at the rate determined by
-- the learning function lr, and will be able to hold up to
-- n models. It will create a new model based on a pattern
-- presented to it when (1) the SGM contains no models, or (2) the
-- difference between the pattern and the closest matching model exceeds
-- the threshold dt. It will use the function diff to
-- measure the similarity between an input pattern and a model. It will
-- use the function ms to adjust models as needed to make them
-- more similar to input patterns.
makeSGM :: Bounded k => (t -> x) -> Int -> x -> Bool -> (p -> p -> x) -> (p -> x -> p -> p) -> SGM t x k p
-- | Returns true if the SGM has no models, false otherwise.
isEmpty :: SGM t x k p -> Bool
-- | Returns the number of models the SGM currently contains.
numModels :: SGM t x k p -> Int
-- | Returns a map from node ID to model.
modelMap :: SGM t x k p -> Map k p
-- | Returns a map from node ID to counter (number of times the node's
-- model has been the closest match to an input pattern).
counterMap :: SGM t x k p -> Map k t
-- | Returns the model at a specified node.
modelAt :: Ord k => SGM t x k p -> k -> p
-- | Returns the current labels.
labels :: SGM t x k p -> [k]
-- | Returns the current models.
models :: SGM t x k p -> [p]
-- | Returns the current counters (number of times the node's model has
-- been the closest match to an input pattern).
counters :: SGM t x k p -> [t]
-- | The current "time" (number of times the SGM has been trained).
time :: Num t => SGM t x k p -> t
-- | Adds a new node to the SGM.
addNode :: (Num t, Enum k, Ord k) => p -> SGM t x k p -> SGM t x k p
-- | Removes a node from the SGM. Deleted nodes are never re-used.
deleteNode :: Ord k => k -> SGM t x k p -> SGM t x k p
-- | Increment the match counter.
incrementCounter :: (Num t, Ord k) => k -> SGM t x k p -> SGM t x k p
-- | Trains the specified node to better match a target. Most users should
-- use train, which automatically determines the BMU and
-- trains it.
trainNode :: (Num t, Ord k) => SGM t x k p -> k -> p -> SGM t x k p
-- | Returns the node that has been the BMU least often.
leastUsefulNode :: Ord t => SGM t x k p -> k
-- | Deletes the node that has been the BMU least often.
deleteLeastUsefulNode :: (Ord t, Ord k) => SGM t x k p -> SGM t x k p
-- | Adds a new node to the SGM, deleting the least useful node/model if
-- necessary to make room.
addModel :: (Num t, Ord t, Enum k, Ord k) => p -> SGM t x k p -> SGM t x k p
-- | classify s p identifies the model s that most
-- closely matches the pattern p. It will not make any changes
-- to the classifier. Returns the ID of the node with the best matching
-- model, the difference between the best matching model and the pattern,
-- and the SGM labels paired with the model and the difference between
-- the input and the corresponding model. The final paired list is sorted
-- in decreasing order of similarity.
classify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x))
-- | Internal method. NOTE: This function may create a new model, but it
-- does not modify existing models.
classify' :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p)
-- | Order models by ascending difference from the input pattern, then by
-- creation order (label number).
matchOrder :: (Ord a, Ord b) => (a, b) -> (a, b) -> Ordering
-- | trainAndClassify s p identifies the model in
-- s that most closely matches p, and updates it to be
-- a somewhat better match. If necessary, it will create a new node and
-- model. Returns the ID of the node with the best matching model, the
-- difference between the best matching model and the pattern, the
-- differences between the input and each model in the SGM, and the
-- updated SGM.
trainAndClassify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p)
-- | train s p identifies the model in s that most
-- closely matches p, and updates it to be a somewhat better
-- match. If necessary, it will create a new node and model.
train :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> SGM t x k p
-- | For each pattern p in ps, trainBatch s
-- ps identifies the model in s that most closely matches
-- p, and updates it to be a somewhat better match.
trainBatch :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> [p] -> SGM t x k p
instance (Control.DeepSeq.NFData k, Control.DeepSeq.NFData p, Control.DeepSeq.NFData t, Control.DeepSeq.NFData x) => Control.DeepSeq.NFData (Data.Datamining.Clustering.SGMInternal.SGM t x k p)
instance GHC.Generics.Generic (Data.Datamining.Clustering.SGMInternal.SGM t x k p)
-- | A Self-generating Model (SGM). An SGM maps input patterns onto a set,
-- where each element in the set is a model of the input data. An SGM is
-- like a Kohonen Self-organising Map (SOM), except:
--
--
-- - Instead of a grid, it uses a simple set of unconnected models.
-- Since the models are unconnected, only the model that best matches the
-- input is ever updated. This makes it faster, however, topological
-- relationships within the input data are not preserved.
-- - New models are created on-the-fly when no existing model is
-- similar enough to an input pattern. If the SGM is at capacity, the
-- least useful model will be deleted.
--
--
-- This implementation supports the use of non-numeric patterns.
--
-- In layman's terms, a SGM can be useful when you you want to build a
-- set of models on some data. A tutorial is available at
-- https://github.com/mhwombat/som/wiki.
--
-- References:
--
--
-- - Amy de Buitléir, Mark Daly, and Michael Russell. The
-- Self-generating Model: an Adaptation of the Self-organizing Map for
-- Intelligent Agents and Data Mining. In: Artificial Life and
-- Intelligent Agents: Second International Symposium, ALIA 2016,
-- Birmingham, UK, June 14-15, 2016, Revised Selected Papers. Ed. by
-- Peter R. Lewis et al. Springer International Publishing, 2018, pp.
-- 59–72. Available at http://amydebuitleir.eu/publications/.
-- - Amy de Buitléir, Michael Russell, and Mark Daly. Wains: A
-- pattern-seeking artificial life species. Artificial Life,
-- (18)4:399–423, 2012. Available at
-- http://amydebuitleir.eu/publications/.
-- - Kohonen, T. (1982). Self-organized formation of topologically
-- correct feature maps. Biological Cybernetics, 43 (1), 59–69.
--
module Data.Datamining.Clustering.SGM
-- | A Simplified Self-Organising Map (SGM). t is the type of the
-- counter. x is the type of the learning rate and the
-- difference metric. k is the type of the model indices.
-- p is the type of the input patterns and models.
data SGM t x k p
SGM :: Map k (p, t) -> (t -> x) -> Int -> x -> Bool -> (p -> p -> x) -> (p -> x -> p -> p) -> k -> SGM t x k p
-- | Maps patterns and match counts to nodes.
[toMap] :: SGM t x k p -> Map k (p, t)
-- | A function which determines the learning rate for a node. The input
-- parameter indicates how many patterns (or pattern batches) have
-- previously been presented to the classifier. Typically this is used to
-- make the learning rate decay over time. The output is the learning
-- rate for that node (the amount by which the node's model should be
-- updated to match the target). The learning rate should be between zero
-- and one.
[learningRate] :: SGM t x k p -> t -> x
-- | The maximum number of models this SGM can hold.
[maxSize] :: SGM t x k p -> Int
-- | The threshold that triggers creation of a new model.
[diffThreshold] :: SGM t x k p -> x
-- | Delete existing models to make room for new ones? The least useful
-- (least frequently matched) models will be deleted first.
[allowDeletion] :: SGM t x k p -> Bool
-- | A function which compares two patterns and returns a
-- non-negative number representing how different the patterns
-- are. A result of 0 indicates that the patterns are identical.
[difference] :: SGM t x k p -> p -> p -> x
-- | A function which updates models. For example, if this function is
-- f, then f target amount pattern returns a modified
-- copy of pattern that is more similar to target than
-- pattern is. The magnitude of the adjustment is controlled by
-- the amount parameter, which should be a number between 0 and
-- 1. Larger values for amount permit greater adjustments. If
-- amount=1, the result should be identical to the
-- target. If amount=0, the result should be the
-- unmodified pattern.
[makeSimilar] :: SGM t x k p -> p -> x -> p -> p
-- | Index for the next node to add to the SGM.
[nextIndex] :: SGM t x k p -> k
-- | makeSGM lr n dt diff ms creates a new SGM that does
-- not (yet) contain any models. It will learn at the rate determined by
-- the learning function lr, and will be able to hold up to
-- n models. It will create a new model based on a pattern
-- presented to it when (1) the SGM contains no models, or (2) the
-- difference between the pattern and the closest matching model exceeds
-- the threshold dt. It will use the function diff to
-- measure the similarity between an input pattern and a model. It will
-- use the function ms to adjust models as needed to make them
-- more similar to input patterns.
makeSGM :: Bounded k => (t -> x) -> Int -> x -> Bool -> (p -> p -> x) -> (p -> x -> p -> p) -> SGM t x k p
-- | The current "time" (number of times the SGM has been trained).
time :: Num t => SGM t x k p -> t
-- | Returns true if the SGM has no models, false otherwise.
isEmpty :: SGM t x k p -> Bool
-- | Returns the number of models the SGM currently contains.
numModels :: SGM t x k p -> Int
-- | Returns a map from node ID to model.
modelMap :: SGM t x k p -> Map k p
-- | Returns a map from node ID to counter (number of times the node's
-- model has been the closest match to an input pattern).
counterMap :: SGM t x k p -> Map k t
-- | Returns the model at a specified node.
modelAt :: Ord k => SGM t x k p -> k -> p
-- | A typical learning function for classifiers. exponential r0
-- d t returns the learning rate at time t. When t =
-- 0, the learning rate is r0. Over time the learning rate
-- decays exponentially; the decay rate is d. Normally the
-- parameters are chosen such that:
--
--
-- - 0 < r0 < 1
-- - 0 < d
--
exponential :: (Floating a, Integral t) => a -> a -> t -> a
-- | classify s p identifies the model s that most
-- closely matches the pattern p. It will not make any changes
-- to the classifier. Returns the ID of the node with the best matching
-- model, the difference between the best matching model and the pattern,
-- and the SGM labels paired with the model and the difference between
-- the input and the corresponding model. The final paired list is sorted
-- in decreasing order of similarity.
classify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x))
-- | trainAndClassify s p identifies the model in
-- s that most closely matches p, and updates it to be
-- a somewhat better match. If necessary, it will create a new node and
-- model. Returns the ID of the node with the best matching model, the
-- difference between the best matching model and the pattern, the
-- differences between the input and each model in the SGM, and the
-- updated SGM.
trainAndClassify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p)
-- | train s p identifies the model in s that most
-- closely matches p, and updates it to be a somewhat better
-- match. If necessary, it will create a new node and model.
train :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> SGM t x k p
-- | For each pattern p in ps, trainBatch s
-- ps identifies the model in s that most closely matches
-- p, and updates it to be a somewhat better match.
trainBatch :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> [p] -> SGM t x k p
-- | A module containing private SOM internals. Most developers
-- should use SOM instead. This module is subject to change
-- without notice.
module Data.Datamining.Clustering.SOMInternal
-- | A typical learning function for classifiers.
-- decayingGaussian r0 rf w0 wf tf returns a bell
-- curve-shaped function. At time zero, the maximum learning rate
-- (applied to the BMU) is r0, and the neighbourhood width is
-- w0. Over time the bell curve shrinks and the learning rate
-- tapers off, until at time tf, the maximum learning rate
-- (applied to the BMU) is rf, and the neighbourhood width is
-- wf. Normally the parameters should be chosen such that:
--
--
-- - 0 < rf << r0 < 1
-- - 0 < wf << w0
-- - 0 < tf
--
--
-- where << means "is much smaller than" (not the Haskell
-- << operator!)
decayingGaussian :: Floating x => x -> x -> x -> x -> x -> x -> x -> x
-- | A learning function that only updates the BMU and has a constant
-- learning rate.
stepFunction :: (Num d, Fractional x, Eq d) => x -> t -> d -> x
-- | A learning function that updates all nodes with the same, constant
-- learning rate. This can be useful for testing.
constantFunction :: x -> t -> d -> x
-- | A Self-Organising Map (SOM).
--
-- Although SOM implements GridMap, most users will
-- only need the interface provided by
-- Data.Datamining.Clustering.Classifier. If you chose to use
-- the GridMap functions, please note:
--
--
-- - The functions adjust, and adjustWithKey do not
-- increment the counter. You can do so manually with
-- incrementCounter.
-- - The functions map and mapWithKey are not
-- implemented (they just return an error). It would be
-- problematic to implement them because the input SOM and the output SOM
-- would have to have the same Metric type.
--
data SOM t d gm x k p
SOM :: gm p -> (t -> d -> x) -> (p -> p -> x) -> (p -> x -> p -> p) -> t -> SOM t d gm x k p
-- | Maps patterns to tiles in a regular grid. In the context of a SOM, the
-- tiles are called "nodes"
[gridMap] :: SOM t d gm x k p -> gm p
-- | A function which determines the how quickly the SOM learns. For
-- example, if the function is f, then f t d returns
-- the learning rate for a node. The parameter t indicates how
-- many patterns (or pattern batches) have previously been presented to
-- the classifier. Typically this is used to make the learning rate decay
-- over time. The parameter d is the grid distance from the node
-- being updated to the BMU (Best Matching Unit). The output is the
-- learning rate for that node (the amount by which the node's model
-- should be updated to match the target). The learning rate should be
-- between zero and one.
[learningRate] :: SOM t d gm x k p -> t -> d -> x
-- | A function which compares two patterns and returns a
-- non-negative number representing how different the patterns
-- are. A result of 0 indicates that the patterns are identical.
[difference] :: SOM t d gm x k p -> p -> p -> x
-- | A function which updates models. If this function is f, then
-- f target amount pattern returns a modified copy of
-- pattern that is more similar to target than
-- pattern is. The magnitude of the adjustment is controlled by
-- the amount parameter, which should be a number between 0 and
-- 1. Larger values for amount permit greater adjustments. If
-- amount=1, the result should be identical to the
-- target. If amount=0, the result should be the
-- unmodified pattern.
[makeSimilar] :: SOM t d gm x k p -> p -> x -> p -> p
-- | A counter used as a "time" parameter. If you create the SOM with a
-- counter value 0, and don't directly modify it, then the
-- counter will represent the number of patterns that this SOM has
-- classified.
[counter] :: SOM t d gm x k p -> t
-- | Internal method.
withGridMap :: (gm p -> gm p) -> SOM t d gm x k p -> SOM t d gm x k p
-- | Returns the learning function currently being used by the SOM.
currentLearningFunction :: Num t => SOM t d gm x k p -> d -> x
-- | Extracts the grid and current models from the SOM. A synonym for
-- gridMap.
toGridMap :: GridMap gm p => SOM t d gm x k p -> gm p
-- | Internal method.
adjustNode :: (Grid g, k ~ Index g, Num t) => g -> (t -> x) -> (p -> x -> p -> p) -> p -> k -> k -> p -> p
-- | Trains the specified node and the neighbourood around it to better
-- match a target. Most users should use train, which
-- automatically determines the BMU and trains it and its neighbourhood.
trainNeighbourhood :: (Grid (gm p), GridMap gm p, Index (BaseGrid gm p) ~ Index (gm p), Num t, Num x, Num d) => SOM t d gm x k p -> Index (gm p) -> p -> SOM t d gm x k p
-- | Increment the match counter.
incrementCounter :: Num t => SOM t d gm x k p -> SOM t d gm x k p
-- | Internal method.
justTrain :: (Ord x, Grid (gm p), GridMap gm x, GridMap gm p, Index (BaseGrid gm x) ~ Index (gm p), Index (BaseGrid gm p) ~ Index (gm p), Num t, Num x, Num d) => SOM t d gm x k p -> p -> SOM t d gm x k p
instance (Control.DeepSeq.NFData t, Control.DeepSeq.NFData (gm p)) => Control.DeepSeq.NFData (Data.Datamining.Clustering.SOMInternal.SOM t d gm x k p)
instance GHC.Generics.Generic (Data.Datamining.Clustering.SOMInternal.SOM t d gm x k p)
instance Data.Foldable.Foldable gm => Data.Foldable.Foldable (Data.Datamining.Clustering.SOMInternal.SOM t d gm x k)
instance Math.Geometry.GridInternal.Grid (gm p) => Math.Geometry.GridInternal.Grid (Data.Datamining.Clustering.SOMInternal.SOM t d gm x k p)
instance (Data.Foldable.Foldable gm, Math.Geometry.GridMap.GridMap gm p, Math.Geometry.GridInternal.Grid (Math.Geometry.GridMap.BaseGrid gm p)) => Math.Geometry.GridMap.GridMap (Data.Datamining.Clustering.SOMInternal.SOM t d gm x k) p
instance (Math.Geometry.GridMap.GridMap gm p, k Data.Type.Equality.~ Math.Geometry.GridInternal.Index (Math.Geometry.GridMap.BaseGrid gm p), Math.Geometry.GridInternal.Grid (gm p), Math.Geometry.GridMap.GridMap gm x, k Data.Type.Equality.~ Math.Geometry.GridInternal.Index (gm p), k Data.Type.Equality.~ Math.Geometry.GridInternal.Index (Math.Geometry.GridMap.BaseGrid gm x), GHC.Num.Num t, GHC.Classes.Ord x, GHC.Num.Num x, GHC.Num.Num d) => Data.Datamining.Clustering.Classifier.Classifier (Data.Datamining.Clustering.SOMInternal.SOM t d gm) x k p
-- | A Kohonen Self-organising Map (SOM). A SOM maps input patterns onto a
-- regular grid (usually two-dimensional) where each node in the grid is
-- a model of the input data, and does so using a method which ensures
-- that any topological relationships within the input data are also
-- represented in the grid. This implementation supports the use of
-- non-numeric patterns.
--
-- In layman's terms, a SOM can be useful when you you want to discover
-- the underlying structure of some data. A tutorial is available at
-- https://github.com/mhwombat/som/wiki.
--
-- NOTES:
--
--
-- - Version 5.0 fixed a bug in the decayingGaussian
-- function. If you use defaultSOM (which uses this
-- function), your SOM should now learn more quickly.
-- - The gaussian function has been removed because it is not
-- as useful for SOMs as I originally thought. It was originally designed
-- to be used as a factor in a learning function. However, in most cases
-- the user will want to introduce a time decay into the exponent, rather
-- than simply multiply by a factor.
--
--
-- References:
--
--
-- - Kohonen, T. (1982). Self-organized formation of topologically
-- correct feature maps. Biological Cybernetics, 43 (1), 59–69.
--
module Data.Datamining.Clustering.SOM
-- | A Self-Organising Map (SOM).
--
-- Although SOM implements GridMap, most users will
-- only need the interface provided by
-- Data.Datamining.Clustering.Classifier. If you chose to use
-- the GridMap functions, please note:
--
--
-- - The functions adjust, and adjustWithKey do not
-- increment the counter. You can do so manually with
-- incrementCounter.
-- - The functions map and mapWithKey are not
-- implemented (they just return an error). It would be
-- problematic to implement them because the input SOM and the output SOM
-- would have to have the same Metric type.
--
data SOM t d gm x k p
SOM :: gm p -> (t -> d -> x) -> (p -> p -> x) -> (p -> x -> p -> p) -> t -> SOM t d gm x k p
-- | Maps patterns to tiles in a regular grid. In the context of a SOM, the
-- tiles are called "nodes"
[gridMap] :: SOM t d gm x k p -> gm p
-- | A function which determines the how quickly the SOM learns. For
-- example, if the function is f, then f t d returns
-- the learning rate for a node. The parameter t indicates how
-- many patterns (or pattern batches) have previously been presented to
-- the classifier. Typically this is used to make the learning rate decay
-- over time. The parameter d is the grid distance from the node
-- being updated to the BMU (Best Matching Unit). The output is the
-- learning rate for that node (the amount by which the node's model
-- should be updated to match the target). The learning rate should be
-- between zero and one.
[learningRate] :: SOM t d gm x k p -> t -> d -> x
-- | A function which compares two patterns and returns a
-- non-negative number representing how different the patterns
-- are. A result of 0 indicates that the patterns are identical.
[difference] :: SOM t d gm x k p -> p -> p -> x
-- | A function which updates models. If this function is f, then
-- f target amount pattern returns a modified copy of
-- pattern that is more similar to target than
-- pattern is. The magnitude of the adjustment is controlled by
-- the amount parameter, which should be a number between 0 and
-- 1. Larger values for amount permit greater adjustments. If
-- amount=1, the result should be identical to the
-- target. If amount=0, the result should be the
-- unmodified pattern.
[makeSimilar] :: SOM t d gm x k p -> p -> x -> p -> p
-- | A counter used as a "time" parameter. If you create the SOM with a
-- counter value 0, and don't directly modify it, then the
-- counter will represent the number of patterns that this SOM has
-- classified.
[counter] :: SOM t d gm x k p -> t
-- | Extracts the grid and current models from the SOM. A synonym for
-- gridMap.
toGridMap :: GridMap gm p => SOM t d gm x k p -> gm p
-- | A typical learning function for classifiers.
-- decayingGaussian r0 rf w0 wf tf returns a bell
-- curve-shaped function. At time zero, the maximum learning rate
-- (applied to the BMU) is r0, and the neighbourhood width is
-- w0. Over time the bell curve shrinks and the learning rate
-- tapers off, until at time tf, the maximum learning rate
-- (applied to the BMU) is rf, and the neighbourhood width is
-- wf. Normally the parameters should be chosen such that:
--
--
-- - 0 < rf << r0 < 1
-- - 0 < wf << w0
-- - 0 < tf
--
--
-- where << means "is much smaller than" (not the Haskell
-- << operator!)
decayingGaussian :: Floating x => x -> x -> x -> x -> x -> x -> x -> x
-- | A learning function that only updates the BMU and has a constant
-- learning rate.
stepFunction :: (Num d, Fractional x, Eq d) => x -> t -> d -> x
-- | A learning function that updates all nodes with the same, constant
-- learning rate. This can be useful for testing.
constantFunction :: x -> t -> d -> x
-- | Trains the specified node and the neighbourood around it to better
-- match a target. Most users should use train, which
-- automatically determines the BMU and trains it and its neighbourhood.
trainNeighbourhood :: (Grid (gm p), GridMap gm p, Index (BaseGrid gm p) ~ Index (gm p), Num t, Num x, Num d) => SOM t d gm x k p -> Index (gm p) -> p -> SOM t d gm x k p
-- | Tools for identifying patterns in data.
module Data.Datamining.Pattern
-- | Adjusts a number to make it more similar to the target.
adjustNum :: (Num a, Ord a, Eq a) => a -> a -> a -> a
-- | Returns the absolute difference between two numbers.
absDifference :: Num a => a -> a -> a
-- | adjustVector target amount vector adjusts each element
-- of vector to move it closer to the corresponding element of
-- target. The amount of adjustment is controlled by the
-- learning rate amount, which is a number between 0 and 1.
-- Larger values of amount permit more adjustment. If
-- amount=1, the result will be identical to the
-- target. If amount=0, the result will be the
-- unmodified pattern. If target is shorter than
-- vector, the result will be the same length as
-- target. If target is longer than vector,
-- the result will be the same length as vector.
adjustVector :: (Num a, Ord a, Eq a) => [a] -> a -> [a] -> [a]
-- | Same as adjustVector, except that the result will
-- always be the same length as vector. This means that if
-- target is shorter than vector, the "leftover"
-- elements of vector will be copied the result, unmodified.
adjustVectorPreserveLength :: (Num a, Ord a, Eq a) => [a] -> a -> [a] -> [a]
-- | Calculates the square of the Euclidean distance between two vectors.
euclideanDistanceSquared :: Num a => [a] -> [a] -> a
-- | Returns the sum of the squares of the elements of a vector.
magnitudeSquared :: Num a => [a] -> a
-- | A vector that has been normalised, i.e., the magnitude of the vector =
-- 1.
data NormalisedVector a
-- | Normalises a vector
normalise :: Floating a => [a] -> NormalisedVector a
-- | A vector that has been scaled so that all elements in the vector are
-- between zero and one. To scale a set of vectors, use
-- scaleAll. Alternatively, if you can identify a maximum
-- and minimum value for each element in a vector, you can scale
-- individual vectors using scale.
data ScaledVector a
-- | Given a vector qs of pairs of numbers, where each pair
-- represents the maximum and minimum value to be expected at each index
-- in xs, scale qs xs scales the vector
-- xs element by element, mapping the maximum value expected at
-- that index to one, and the minimum value to zero.
scale :: Fractional a => [(a, a)] -> [a] -> ScaledVector a
-- | Scales a set of vectors by determining the maximum and minimum values
-- at each index in the vector, and mapping the maximum value to one, and
-- the minimum value to zero.
scaleAll :: (Fractional a, Ord a) => [[a]] -> [ScaledVector a]
instance GHC.Show.Show a => GHC.Show.Show (Data.Datamining.Pattern.ScaledVector a)
instance GHC.Show.Show a => GHC.Show.Show (Data.Datamining.Pattern.NormalisedVector a)