-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | Self-Organising Maps
--   
--   Please see the README on GitHub at
--   <a>https://github.com/mhwombat/som#readme</a>
@package som
@version 10.1.2


-- | Tools for identifying patterns in data.
module Data.Datamining.Clustering.Classifier

-- | A machine which learns to classify input patterns. Minimal complete
--   definition: <tt>trainBatch</tt>, <tt>reportAndTrain</tt>.
class Classifier (c :: * -> * -> * -> *) v k p

-- | Returns a list of index/model pairs.
toList :: Classifier c v k p => c v k p -> [(k, p)]

-- | Returns the number of models this classifier can learn.
numModels :: Classifier c v k p => c v k p -> Int

-- | Returns the current models of the classifier.
models :: Classifier c v k p => c v k p -> [p]

-- | <tt><a>differences</a> c target</tt> returns the indices of all nodes
--   in <tt>c</tt>, paired with the difference between <tt>target</tt> and
--   the node's model.
differences :: Classifier c v k p => c v k p -> p -> [(k, v)]

-- | <tt>classify c target</tt> returns the index of the node in <tt>c</tt>
--   whose model best matches the <tt>target</tt>.
classify :: (Classifier c v k p, Ord v) => c v k p -> p -> k

-- | <tt><a>train</a> c target</tt> returns a modified copy of the
--   classifier <tt>c</tt> that has partially learned the <tt>target</tt>.
train :: Classifier c v k p => c v k p -> p -> c v k p

-- | <tt><a>trainBatch</a> c targets</tt> returns a modified copy of the
--   classifier <tt>c</tt> that has partially learned the <tt>targets</tt>.
trainBatch :: Classifier c v k p => c v k p -> [p] -> c v k p

-- | <tt><a>classifyAndTrain</a> c target</tt> returns a tuple containing
--   the index of the node in <tt>c</tt> whose model best matches the input
--   <tt>target</tt>, and a modified copy of the classifier <tt>c</tt> that
--   has partially learned the <tt>target</tt>. Invoking
--   <tt>classifyAndTrain c p</tt> may be faster than invoking <tt>(p
--   <a>classify</a> c, train c p)</tt>, but they should give identical
--   results.
classifyAndTrain :: Classifier c v k p => c v k p -> p -> (k, c v k p)

-- | <tt><a>diffAndTrain</a> c target</tt> returns a tuple containing: 1.
--   The indices of all nodes in <tt>c</tt>, paired with the difference
--   between <tt>target</tt> and the node's model 2. A modified copy of the
--   classifier <tt>c</tt> that has partially learned the <tt>target</tt>.
--   Invoking <tt>diffAndTrain c p</tt> may be faster than invoking <tt>(p
--   <tt>diff</tt> c, train c p)</tt>, but they should give identical
--   results.
diffAndTrain :: Classifier c v k p => c v k p -> p -> ([(k, v)], c v k p)

-- | <tt><a>reportAndTrain</a> c f target</tt> returns a tuple containing:
--   1. The index of the node in <tt>c</tt> whose model best matches the
--   input <tt>target</tt> 2. The indices of all nodes in <tt>c</tt>,
--   paired with the difference between <tt>target</tt> and the node's
--   model 3. A modified copy of the classifier <tt>c</tt> that has
--   partially learned the <tt>target</tt> Invoking <tt>diffAndTrain c
--   p</tt> may be faster than invoking <tt>(p <tt>diff</tt> c, train c
--   p)</tt>, but they should give identical results.
reportAndTrain :: Classifier c v k p => c v k p -> p -> (k, [(k, v)], c v k p)


-- | A module containing private <tt>DSOM</tt> internals. Most developers
--   should use <tt>DSOM</tt> instead. This module is subject to change
--   without notice.
module Data.Datamining.Clustering.DSOMInternal

-- | A Self-Organising Map (DSOM).
--   
--   Although <tt>DSOM</tt> implements <tt>GridMap</tt>, most users will
--   only need the interface provided by
--   <tt>Data.Datamining.Clustering.Classifier</tt>. If you chose to use
--   the <tt>GridMap</tt> functions, please note:
--   
--   <ol>
--   <li>The functions <tt>adjust</tt>, and <tt>adjustWithKey</tt> do not
--   increment the counter. You can do so manually with
--   <tt>incrementCounter</tt>.</li>
--   <li>The functions <tt>map</tt> and <tt>mapWithKey</tt> are not
--   implemented (they just return an <tt>error</tt>). It would be
--   problematic to implement them because the input DSOM and the output
--   DSOM would have to have the same <tt>Metric</tt> type.</li>
--   </ol>
data DSOM gm x k p
DSOM :: gm p -> (x -> x -> x -> x) -> (p -> p -> x) -> (p -> x -> p -> p) -> DSOM gm x k p

-- | Maps patterns to tiles in a regular grid. In the context of a SOM, the
--   tiles are called "nodes"
[gridMap] :: DSOM gm x k p -> gm p

-- | A function which determines the how quickly the SOM learns.
[learningRate] :: DSOM gm x k p -> x -> x -> x -> x

-- | A function which compares two patterns and returns a
--   <i>non-negative</i> number representing how different the patterns
--   are. A result of <tt>0</tt> indicates that the patterns are identical.
[difference] :: DSOM gm x k p -> p -> p -> x

-- | A function which updates models. If this function is <tt>f</tt>, then
--   <tt>f target amount pattern</tt> returns a modified copy of
--   <tt>pattern</tt> that is more similar to <tt>target</tt> than
--   <tt>pattern</tt> is. The magnitude of the adjustment is controlled by
--   the <tt>amount</tt> parameter, which should be a number between 0 and
--   1. Larger values for <tt>amount</tt> permit greater adjustments. If
--   <tt>amount</tt>=1, the result should be identical to the
--   <tt>target</tt>. If <tt>amount</tt>=0, the result should be the
--   unmodified <tt>pattern</tt>.
[makeSimilar] :: DSOM gm x k p -> p -> x -> p -> p

-- | Internal method.
withGridMap :: (gm p -> gm p) -> DSOM gm x k p -> DSOM gm x k p

-- | Extracts the grid and current models from the DSOM.
toGridMap :: GridMap gm p => DSOM gm x k p -> gm p

-- | Internal method.
adjustNode :: (FiniteGrid (gm p), GridMap gm p, k ~ Index (gm p), k ~ Index (BaseGrid gm p), Ord k, Num x, Fractional x) => gm p -> (p -> x -> p -> p) -> (p -> p -> x) -> (x -> x -> x) -> p -> k -> k -> p -> p

-- | Internal method.
scaleDistance :: (Num a, Fractional a) => Int -> Int -> a

-- | Trains the specified node and the neighbourood around it to better
--   match a target. Most users should use <tt>train</tt>, which
--   automatically determines the BMU and trains it and its neighbourhood.
trainNeighbourhood :: (FiniteGrid (gm p), GridMap gm p, k ~ Index (gm p), k ~ Index (BaseGrid gm p), Ord k, Num x, Fractional x) => DSOM gm x t p -> k -> p -> DSOM gm x k p

-- | Internal method.
justTrain :: (FiniteGrid (gm p), GridMap gm p, GridMap gm x, k ~ Index (gm p), k ~ Index (gm x), k ~ Index (BaseGrid gm p), k ~ Index (BaseGrid gm x), Ord k, Ord x, Num x, Fractional x) => DSOM gm x t p -> p -> DSOM gm x k p

-- | Configures a learning function that depends not on the time, but on
--   how good a model we already have for the target. If the BMU is an
--   exact match for the target, no learning occurs. Usage is
--   <tt><a>rougierLearningFunction</a> r p</tt>, where <tt>r</tt> is the
--   maximal learning rate (0 &lt;= r &lt;= 1), and <tt>p</tt> is the
--   elasticity.
--   
--   NOTE: When using this learning function, ensure that <tt>abs .
--   difference</tt> is always between 0 and 1, inclusive. Otherwise you
--   may get invalid learning rates.
rougierLearningFunction :: (Eq a, Ord a, Floating a) => a -> a -> a -> a -> a -> a
instance Control.DeepSeq.NFData (gm p) => Control.DeepSeq.NFData (Data.Datamining.Clustering.DSOMInternal.DSOM gm x k p)
instance GHC.Generics.Generic (Data.Datamining.Clustering.DSOMInternal.DSOM gm x k p)
instance Data.Foldable.Foldable gm => Data.Foldable.Foldable (Data.Datamining.Clustering.DSOMInternal.DSOM gm x k)
instance Math.Geometry.GridInternal.Grid (gm p) => Math.Geometry.GridInternal.Grid (Data.Datamining.Clustering.DSOMInternal.DSOM gm x k p)
instance (Data.Foldable.Foldable gm, Math.Geometry.GridMap.GridMap gm p, Math.Geometry.GridInternal.FiniteGrid (Math.Geometry.GridMap.BaseGrid gm p)) => Math.Geometry.GridMap.GridMap (Data.Datamining.Clustering.DSOMInternal.DSOM gm x k) p
instance (Math.Geometry.GridMap.GridMap gm p, k Data.Type.Equality.~ Math.Geometry.GridInternal.Index (Math.Geometry.GridMap.BaseGrid gm p), Math.Geometry.GridInternal.FiniteGrid (gm p), Math.Geometry.GridMap.GridMap gm x, k Data.Type.Equality.~ Math.Geometry.GridInternal.Index (gm p), k Data.Type.Equality.~ Math.Geometry.GridInternal.Index (gm x), k Data.Type.Equality.~ Math.Geometry.GridInternal.Index (Math.Geometry.GridMap.BaseGrid gm x), GHC.Classes.Ord k, GHC.Classes.Ord x, GHC.Num.Num x, GHC.Real.Fractional x) => Data.Datamining.Clustering.Classifier.Classifier (Data.Datamining.Clustering.DSOMInternal.DSOM gm) x k p


-- | A modified Kohonen Self-organising Map (SOM) which supports a
--   time-independent learning function. (See <tt><a>SOM</a></tt> for a
--   description of a SOM.)
--   
--   References:
--   
--   <ul>
--   <li>Rougier, N. &amp; Boniface, Y. (2011). Dynamic self-organising
--   map. Neurocomputing, 74 (11), 1840-1847.</li>
--   <li>Kohonen, T. (1982). Self-organized formation of topologically
--   correct feature maps. Biological Cybernetics, 43 (1), 59–69.</li>
--   </ul>
module Data.Datamining.Clustering.DSOM

-- | A Self-Organising Map (DSOM).
--   
--   Although <tt>DSOM</tt> implements <tt>GridMap</tt>, most users will
--   only need the interface provided by
--   <tt>Data.Datamining.Clustering.Classifier</tt>. If you chose to use
--   the <tt>GridMap</tt> functions, please note:
--   
--   <ol>
--   <li>The functions <tt>adjust</tt>, and <tt>adjustWithKey</tt> do not
--   increment the counter. You can do so manually with
--   <tt>incrementCounter</tt>.</li>
--   <li>The functions <tt>map</tt> and <tt>mapWithKey</tt> are not
--   implemented (they just return an <tt>error</tt>). It would be
--   problematic to implement them because the input DSOM and the output
--   DSOM would have to have the same <tt>Metric</tt> type.</li>
--   </ol>
data DSOM gm x k p
DSOM :: gm p -> (x -> x -> x -> x) -> (p -> p -> x) -> (p -> x -> p -> p) -> DSOM gm x k p

-- | Maps patterns to tiles in a regular grid. In the context of a SOM, the
--   tiles are called "nodes"
[gridMap] :: DSOM gm x k p -> gm p

-- | A function which determines the how quickly the SOM learns.
[learningRate] :: DSOM gm x k p -> x -> x -> x -> x

-- | A function which compares two patterns and returns a
--   <i>non-negative</i> number representing how different the patterns
--   are. A result of <tt>0</tt> indicates that the patterns are identical.
[difference] :: DSOM gm x k p -> p -> p -> x

-- | A function which updates models. If this function is <tt>f</tt>, then
--   <tt>f target amount pattern</tt> returns a modified copy of
--   <tt>pattern</tt> that is more similar to <tt>target</tt> than
--   <tt>pattern</tt> is. The magnitude of the adjustment is controlled by
--   the <tt>amount</tt> parameter, which should be a number between 0 and
--   1. Larger values for <tt>amount</tt> permit greater adjustments. If
--   <tt>amount</tt>=1, the result should be identical to the
--   <tt>target</tt>. If <tt>amount</tt>=0, the result should be the
--   unmodified <tt>pattern</tt>.
[makeSimilar] :: DSOM gm x k p -> p -> x -> p -> p

-- | Extracts the grid and current models from the DSOM.
toGridMap :: GridMap gm p => DSOM gm x k p -> gm p

-- | Configures a learning function that depends not on the time, but on
--   how good a model we already have for the target. If the BMU is an
--   exact match for the target, no learning occurs. Usage is
--   <tt><a>rougierLearningFunction</a> r p</tt>, where <tt>r</tt> is the
--   maximal learning rate (0 &lt;= r &lt;= 1), and <tt>p</tt> is the
--   elasticity.
--   
--   NOTE: When using this learning function, ensure that <tt>abs .
--   difference</tt> is always between 0 and 1, inclusive. Otherwise you
--   may get invalid learning rates.
rougierLearningFunction :: (Eq a, Ord a, Floating a) => a -> a -> a -> a -> a -> a

-- | Trains the specified node and the neighbourood around it to better
--   match a target. Most users should use <tt>train</tt>, which
--   automatically determines the BMU and trains it and its neighbourhood.
trainNeighbourhood :: (FiniteGrid (gm p), GridMap gm p, k ~ Index (gm p), k ~ Index (BaseGrid gm p), Ord k, Num x, Fractional x) => DSOM gm x t p -> k -> p -> DSOM gm x k p


-- | A module containing private <tt>SGM</tt> internals. Most developers
--   should use <tt>SGM</tt> instead. This module is subject to change
--   without notice.
module Data.Datamining.Clustering.SGM2Internal

-- | A typical learning function for classifiers. <tt><a>exponential</a> r0
--   d t</tt> returns the learning rate at time <tt>t</tt>. When <tt>t =
--   0</tt>, the learning rate is <tt>r0</tt>. Over time the learning rate
--   decays exponentially; the decay rate is <tt>d</tt>. Normally the
--   parameters are chosen such that:
--   
--   <ul>
--   <li>0 &lt; r0 &lt; 1</li>
--   <li>0 &lt; d</li>
--   </ul>
exponential :: (Floating a, Integral t) => a -> a -> t -> a

-- | A Simplified Self-Organising Map (SGM). <tt>t</tt> is the type of the
--   counter. <tt>x</tt> is the type of the learning rate and the
--   difference metric. <tt>k</tt> is the type of the model indices.
--   <tt>p</tt> is the type of the input patterns and models.
data SGM t x k p
SGM :: Map k (p, t) -> (t -> x) -> Int -> (p -> p -> x) -> (p -> x -> p -> p) -> k -> SGM t x k p

-- | Maps patterns and match counts to nodes.
[toMap] :: SGM t x k p -> Map k (p, t)

-- | A function which determines the learning rate for a node. The input
--   parameter indicates how many patterns (or pattern batches) have
--   previously been presented to the classifier. Typically this is used to
--   make the learning rate decay over time. The output is the learning
--   rate for that node (the amount by which the node's model should be
--   updated to match the target). The learning rate should be between zero
--   and one.
[learningRate] :: SGM t x k p -> t -> x

-- | The maximum number of models this SGM can hold.
[capacity] :: SGM t x k p -> Int

-- | A function which compares two patterns and returns a
--   <i>non-negative</i> number representing how different the patterns
--   are. A result of <tt>0</tt> indicates that the patterns are identical.
[difference] :: SGM t x k p -> p -> p -> x

-- | A function which updates models. For example, if this function is
--   <tt>f</tt>, then <tt>f target amount pattern</tt> returns a modified
--   copy of <tt>pattern</tt> that is more similar to <tt>target</tt> than
--   <tt>pattern</tt> is. The magnitude of the adjustment is controlled by
--   the <tt>amount</tt> parameter, which should be a number between 0 and
--   1. Larger values for <tt>amount</tt> permit greater adjustments. If
--   <tt>amount</tt>=1, the result should be identical to the
--   <tt>target</tt>. If <tt>amount</tt>=0, the result should be the
--   unmodified <tt>pattern</tt>.
[makeSimilar] :: SGM t x k p -> p -> x -> p -> p

-- | Index for the next node to add to the SGM.
[nextIndex] :: SGM t x k p -> k

-- | <tt><a>makeSGM</a> lr n diff ms</tt> creates a new SGM that does not
--   (yet) contain any models. It will learn at the rate determined by the
--   learning function <tt>lr</tt>, and will be able to hold up to
--   <tt>n</tt> models. It will create a new model based on a pattern
--   presented to it when the SGM is not at capacity, or a less useful
--   model can be replaced. It will use the function <tt>diff</tt> to
--   measure the similarity between an input pattern and a model. It will
--   use the function <tt>ms</tt> to adjust models as needed to make them
--   more similar to input patterns.
makeSGM :: Bounded k => (t -> x) -> Int -> (p -> p -> x) -> (p -> x -> p -> p) -> SGM t x k p

-- | Returns true if the SGM has no models, false otherwise.
isEmpty :: SGM t x k p -> Bool

-- | Returns the number of models the SGM currently contains.
size :: SGM t x k p -> Int

-- | Returns a map from node ID to model.
modelMap :: SGM t x k p -> Map k p

-- | Returns a map from node ID to counter (number of times the node's
--   model has been the closest match to an input pattern).
counterMap :: SGM t x k p -> Map k t

-- | Returns the model at a specified node.
modelAt :: Ord k => SGM t x k p -> k -> p

-- | Returns the match counter for a specified node.
counterAt :: Ord k => SGM t x k p -> k -> t

-- | Returns the current labels.
labels :: SGM t x k p -> [k]

-- | The current "time" (number of times the SGM has been trained).
time :: Num t => SGM t x k p -> t

-- | Adds a new node to the SGM.
addNode :: (Num t, Enum k, Ord k) => p -> SGM t x k p -> SGM t x k p

-- | Increments the counter.
incrementCounter :: (Num t, Ord k) => k -> SGM t x k p -> SGM t x k p

-- | Trains the specified node to better match a target. Most users should
--   use <tt><a>train</a></tt>, which automatically determines the BMU and
--   trains it.
trainNode :: (Num t, Ord k) => SGM t x k p -> k -> p -> SGM t x k p

-- | Calculates the difference between all pairs of non-identical labels in
--   the SGM.
modelDiffs :: (Eq k, Ord k) => SGM t x k p -> [((k, k), x)]

-- | Generates all pairs of non-identical labels in the SGM.
labelPairs :: Eq k => SGM t x k p -> [(k, k)]

-- | Pairs a node label with all labels except itself.
labelPairs' :: Eq k => SGM t x k p -> k -> [(k, k)]

-- | Returns the labels of the two most similar models, and the difference
--   between them.
twoMostSimilar :: (Ord x, Eq k, Ord k) => SGM t x k p -> (k, k, x)

-- | Deletes the least used (least matched) model in a pair, and returns
--   its label (now available) and the updated SGM. TODO: Modify the other
--   model to make it slightly more similar to the one that was deleted?
mergeModels :: (Num t, Ord t, Ord k) => SGM t x k p -> k -> k -> (k, SGM t x k p)

-- | Set the model for a node. Useful when merging two models and replacing
--   one.
setModel :: (Num t, Ord k) => SGM t x k p -> k -> p -> SGM t x k p

-- | Add a new node, making room for it by merging two existing nodes.
mergeAddModel :: (Num t, Ord t, Ord k) => SGM t x k p -> k -> k -> p -> SGM t x k p

-- | <tt><a>classify</a> s p</tt> identifies the model <tt>s</tt> that most
--   closely matches the pattern <tt>p</tt>. It will not make any changes
--   to the classifier. (I.e., it will not change the models or match
--   counts.) Returns the ID of the node with the best matching model, the
--   difference between the best matching model and the pattern, and the
--   SGM labels paired with the model and the difference between the input
--   and the corresponding model. The final paired list is sorted in
--   decreasing order of similarity.
classify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x))

-- | Order models by ascending difference from the input pattern, then by
--   creation order (label number).
matchOrder :: (Ord a, Ord b) => (a, b) -> (a, b) -> Ordering

-- | <tt><a>trainAndClassify</a> s p</tt> identifies the model in
--   <tt>s</tt> that most closely matches <tt>p</tt>, and updates it to be
--   a somewhat better match. If necessary, it will create a new node and
--   model. Returns the ID of the node with the best matching model, the
--   difference between the pattern and the best matching model in the
--   original SGM (before training or adding a new model), the differences
--   between the pattern and each model in the updated SGM, and the updated
--   SGM.
trainAndClassify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p)

-- | Internal method. NOTE: This function will adjust the model and update
--   the match for the BMU.
trainAndClassify' :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p)

-- | Internal method.
addModelTrainAndClassify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p)

-- | <tt><a>train</a> s p</tt> identifies the model in <tt>s</tt> that most
--   closely matches <tt>p</tt>, and updates it to be a somewhat better
--   match. If necessary, it will create a new node and model.
train :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> SGM t x k p

-- | For each pattern <tt>p</tt> in <tt>ps</tt>, <tt><a>trainBatch</a> s
--   ps</tt> identifies the model in <tt>s</tt> that most closely matches
--   <tt>p</tt>, and updates it to be a somewhat better match.
trainBatch :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> [p] -> SGM t x k p
instance (Control.DeepSeq.NFData k, Control.DeepSeq.NFData p, Control.DeepSeq.NFData t) => Control.DeepSeq.NFData (Data.Datamining.Clustering.SGM2Internal.SGM t x k p)
instance GHC.Generics.Generic (Data.Datamining.Clustering.SGM2Internal.SGM t x k p)


-- | A Self-generating Model (SGM). An SGM maps input patterns onto a set,
--   where each element in the set is a model of the input data. An SGM is
--   like a Kohonen Self-organising Map (SOM), except:
--   
--   <ul>
--   <li>Instead of a grid, it uses a simple set of unconnected models.
--   Since the models are unconnected, only the model that best matches the
--   input is ever updated. This makes it faster, however, topological
--   relationships within the input data are not preserved.</li>
--   <li>New models are created on-the-fly when no existing model is
--   similar enough to an input pattern. If the SGM is at capacity, the
--   least useful model will be deleted.</li>
--   </ul>
--   
--   This implementation supports the use of non-numeric patterns.
--   
--   In layman's terms, a SGM can be useful when you you want to build a
--   set of models on some data. A tutorial is available at
--   <a>https://github.com/mhwombat/som/wiki</a>.
--   
--   References:
--   
--   <ul>
--   <li>Amy de Buitléir, Mark Daly, and Michael Russell. The
--   Self-generating Model: an Adaptation of the Self-organizing Map for
--   Intelligent Agents and Data Mining. In: Artificial Life and
--   Intelligent Agents: Second International Symposium, ALIA 2016,
--   Birmingham, UK, June 14-15, 2016, Revised Selected Papers. Ed. by
--   Peter R. Lewis et al. Springer International Publishing, 2018, pp.
--   59–72. Available at <a>http://amydebuitleir.eu/publications/</a>.</li>
--   <li>Amy de Buitléir, Michael Russell, and Mark Daly. Wains: A
--   pattern-seeking artificial life species. Artificial Life,
--   (18)4:399–423, 2012. Available at
--   <a>http://amydebuitleir.eu/publications/</a>.</li>
--   <li>Kohonen, T. (1982). Self-organized formation of topologically
--   correct feature maps. Biological Cybernetics, 43 (1), 59–69.</li>
--   </ul>
module Data.Datamining.Clustering.SGM2

-- | A Simplified Self-Organising Map (SGM). <tt>t</tt> is the type of the
--   counter. <tt>x</tt> is the type of the learning rate and the
--   difference metric. <tt>k</tt> is the type of the model indices.
--   <tt>p</tt> is the type of the input patterns and models.
data SGM t x k p
SGM :: Map k (p, t) -> (t -> x) -> Int -> (p -> p -> x) -> (p -> x -> p -> p) -> k -> SGM t x k p

-- | Maps patterns and match counts to nodes.
[toMap] :: SGM t x k p -> Map k (p, t)

-- | A function which determines the learning rate for a node. The input
--   parameter indicates how many patterns (or pattern batches) have
--   previously been presented to the classifier. Typically this is used to
--   make the learning rate decay over time. The output is the learning
--   rate for that node (the amount by which the node's model should be
--   updated to match the target). The learning rate should be between zero
--   and one.
[learningRate] :: SGM t x k p -> t -> x

-- | The maximum number of models this SGM can hold.
[capacity] :: SGM t x k p -> Int

-- | A function which compares two patterns and returns a
--   <i>non-negative</i> number representing how different the patterns
--   are. A result of <tt>0</tt> indicates that the patterns are identical.
[difference] :: SGM t x k p -> p -> p -> x

-- | A function which updates models. For example, if this function is
--   <tt>f</tt>, then <tt>f target amount pattern</tt> returns a modified
--   copy of <tt>pattern</tt> that is more similar to <tt>target</tt> than
--   <tt>pattern</tt> is. The magnitude of the adjustment is controlled by
--   the <tt>amount</tt> parameter, which should be a number between 0 and
--   1. Larger values for <tt>amount</tt> permit greater adjustments. If
--   <tt>amount</tt>=1, the result should be identical to the
--   <tt>target</tt>. If <tt>amount</tt>=0, the result should be the
--   unmodified <tt>pattern</tt>.
[makeSimilar] :: SGM t x k p -> p -> x -> p -> p

-- | Index for the next node to add to the SGM.
[nextIndex] :: SGM t x k p -> k

-- | <tt><a>makeSGM</a> lr n diff ms</tt> creates a new SGM that does not
--   (yet) contain any models. It will learn at the rate determined by the
--   learning function <tt>lr</tt>, and will be able to hold up to
--   <tt>n</tt> models. It will create a new model based on a pattern
--   presented to it when the SGM is not at capacity, or a less useful
--   model can be replaced. It will use the function <tt>diff</tt> to
--   measure the similarity between an input pattern and a model. It will
--   use the function <tt>ms</tt> to adjust models as needed to make them
--   more similar to input patterns.
makeSGM :: Bounded k => (t -> x) -> Int -> (p -> p -> x) -> (p -> x -> p -> p) -> SGM t x k p

-- | The current "time" (number of times the SGM has been trained).
time :: Num t => SGM t x k p -> t

-- | Returns true if the SGM has no models, false otherwise.
isEmpty :: SGM t x k p -> Bool

-- | Returns the number of models the SGM currently contains.
size :: SGM t x k p -> Int

-- | Returns a map from node ID to model.
modelMap :: SGM t x k p -> Map k p

-- | Returns a map from node ID to counter (number of times the node's
--   model has been the closest match to an input pattern).
counterMap :: SGM t x k p -> Map k t

-- | Returns the model at a specified node.
modelAt :: Ord k => SGM t x k p -> k -> p

-- | A typical learning function for classifiers. <tt><a>exponential</a> r0
--   d t</tt> returns the learning rate at time <tt>t</tt>. When <tt>t =
--   0</tt>, the learning rate is <tt>r0</tt>. Over time the learning rate
--   decays exponentially; the decay rate is <tt>d</tt>. Normally the
--   parameters are chosen such that:
--   
--   <ul>
--   <li>0 &lt; r0 &lt; 1</li>
--   <li>0 &lt; d</li>
--   </ul>
exponential :: (Floating a, Integral t) => a -> a -> t -> a

-- | <tt><a>classify</a> s p</tt> identifies the model <tt>s</tt> that most
--   closely matches the pattern <tt>p</tt>. It will not make any changes
--   to the classifier. (I.e., it will not change the models or match
--   counts.) Returns the ID of the node with the best matching model, the
--   difference between the best matching model and the pattern, and the
--   SGM labels paired with the model and the difference between the input
--   and the corresponding model. The final paired list is sorted in
--   decreasing order of similarity.
classify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x))

-- | <tt><a>trainAndClassify</a> s p</tt> identifies the model in
--   <tt>s</tt> that most closely matches <tt>p</tt>, and updates it to be
--   a somewhat better match. If necessary, it will create a new node and
--   model. Returns the ID of the node with the best matching model, the
--   difference between the pattern and the best matching model in the
--   original SGM (before training or adding a new model), the differences
--   between the pattern and each model in the updated SGM, and the updated
--   SGM.
trainAndClassify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p)

-- | <tt><a>train</a> s p</tt> identifies the model in <tt>s</tt> that most
--   closely matches <tt>p</tt>, and updates it to be a somewhat better
--   match. If necessary, it will create a new node and model.
train :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> SGM t x k p

-- | For each pattern <tt>p</tt> in <tt>ps</tt>, <tt><a>trainBatch</a> s
--   ps</tt> identifies the model in <tt>s</tt> that most closely matches
--   <tt>p</tt>, and updates it to be a somewhat better match.
trainBatch :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> [p] -> SGM t x k p


-- | A module containing private <tt>SGM</tt> internals. Most developers
--   should use <tt>SGM</tt> instead. This module is subject to change
--   without notice.
module Data.Datamining.Clustering.SGMInternal

-- | A typical learning function for classifiers. <tt><a>exponential</a> r0
--   d t</tt> returns the learning rate at time <tt>t</tt>. When <tt>t =
--   0</tt>, the learning rate is <tt>r0</tt>. Over time the learning rate
--   decays exponentially; the decay rate is <tt>d</tt>. Normally the
--   parameters are chosen such that:
--   
--   <ul>
--   <li>0 &lt; r0 &lt; 1</li>
--   <li>0 &lt; d</li>
--   </ul>
exponential :: (Floating a, Integral t) => a -> a -> t -> a

-- | A Simplified Self-Organising Map (SGM). <tt>t</tt> is the type of the
--   counter. <tt>x</tt> is the type of the learning rate and the
--   difference metric. <tt>k</tt> is the type of the model indices.
--   <tt>p</tt> is the type of the input patterns and models.
data SGM t x k p
SGM :: Map k (p, t) -> (t -> x) -> Int -> x -> Bool -> (p -> p -> x) -> (p -> x -> p -> p) -> k -> SGM t x k p

-- | Maps patterns and match counts to nodes.
[toMap] :: SGM t x k p -> Map k (p, t)

-- | A function which determines the learning rate for a node. The input
--   parameter indicates how many patterns (or pattern batches) have
--   previously been presented to the classifier. Typically this is used to
--   make the learning rate decay over time. The output is the learning
--   rate for that node (the amount by which the node's model should be
--   updated to match the target). The learning rate should be between zero
--   and one.
[learningRate] :: SGM t x k p -> t -> x

-- | The maximum number of models this SGM can hold.
[maxSize] :: SGM t x k p -> Int

-- | The threshold that triggers creation of a new model.
[diffThreshold] :: SGM t x k p -> x

-- | Delete existing models to make room for new ones? The least useful
--   (least frequently matched) models will be deleted first.
[allowDeletion] :: SGM t x k p -> Bool

-- | A function which compares two patterns and returns a
--   <i>non-negative</i> number representing how different the patterns
--   are. A result of <tt>0</tt> indicates that the patterns are identical.
[difference] :: SGM t x k p -> p -> p -> x

-- | A function which updates models. For example, if this function is
--   <tt>f</tt>, then <tt>f target amount pattern</tt> returns a modified
--   copy of <tt>pattern</tt> that is more similar to <tt>target</tt> than
--   <tt>pattern</tt> is. The magnitude of the adjustment is controlled by
--   the <tt>amount</tt> parameter, which should be a number between 0 and
--   1. Larger values for <tt>amount</tt> permit greater adjustments. If
--   <tt>amount</tt>=1, the result should be identical to the
--   <tt>target</tt>. If <tt>amount</tt>=0, the result should be the
--   unmodified <tt>pattern</tt>.
[makeSimilar] :: SGM t x k p -> p -> x -> p -> p

-- | Index for the next node to add to the SGM.
[nextIndex] :: SGM t x k p -> k

-- | <tt><a>makeSGM</a> lr n dt diff ms</tt> creates a new SGM that does
--   not (yet) contain any models. It will learn at the rate determined by
--   the learning function <tt>lr</tt>, and will be able to hold up to
--   <tt>n</tt> models. It will create a new model based on a pattern
--   presented to it when (1) the SGM contains no models, or (2) the
--   difference between the pattern and the closest matching model exceeds
--   the threshold <tt>dt</tt>. It will use the function <tt>diff</tt> to
--   measure the similarity between an input pattern and a model. It will
--   use the function <tt>ms</tt> to adjust models as needed to make them
--   more similar to input patterns.
makeSGM :: Bounded k => (t -> x) -> Int -> x -> Bool -> (p -> p -> x) -> (p -> x -> p -> p) -> SGM t x k p

-- | Returns true if the SGM has no models, false otherwise.
isEmpty :: SGM t x k p -> Bool

-- | Returns the number of models the SGM currently contains.
numModels :: SGM t x k p -> Int

-- | Returns a map from node ID to model.
modelMap :: SGM t x k p -> Map k p

-- | Returns a map from node ID to counter (number of times the node's
--   model has been the closest match to an input pattern).
counterMap :: SGM t x k p -> Map k t

-- | Returns the model at a specified node.
modelAt :: Ord k => SGM t x k p -> k -> p

-- | Returns the current labels.
labels :: SGM t x k p -> [k]

-- | Returns the current models.
models :: SGM t x k p -> [p]

-- | Returns the current counters (number of times the node's model has
--   been the closest match to an input pattern).
counters :: SGM t x k p -> [t]

-- | The current "time" (number of times the SGM has been trained).
time :: Num t => SGM t x k p -> t

-- | Adds a new node to the SGM.
addNode :: (Num t, Enum k, Ord k) => p -> SGM t x k p -> SGM t x k p

-- | Removes a node from the SGM. Deleted nodes are never re-used.
deleteNode :: Ord k => k -> SGM t x k p -> SGM t x k p

-- | Increment the match counter.
incrementCounter :: (Num t, Ord k) => k -> SGM t x k p -> SGM t x k p

-- | Trains the specified node to better match a target. Most users should
--   use <tt><a>train</a></tt>, which automatically determines the BMU and
--   trains it.
trainNode :: (Num t, Ord k) => SGM t x k p -> k -> p -> SGM t x k p

-- | Returns the node that has been the BMU least often.
leastUsefulNode :: Ord t => SGM t x k p -> k

-- | Deletes the node that has been the BMU least often.
deleteLeastUsefulNode :: (Ord t, Ord k) => SGM t x k p -> SGM t x k p

-- | Adds a new node to the SGM, deleting the least useful node/model if
--   necessary to make room.
addModel :: (Num t, Ord t, Enum k, Ord k) => p -> SGM t x k p -> SGM t x k p

-- | <tt><a>classify</a> s p</tt> identifies the model <tt>s</tt> that most
--   closely matches the pattern <tt>p</tt>. It will not make any changes
--   to the classifier. Returns the ID of the node with the best matching
--   model, the difference between the best matching model and the pattern,
--   and the SGM labels paired with the model and the difference between
--   the input and the corresponding model. The final paired list is sorted
--   in decreasing order of similarity.
classify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x))

-- | Internal method. NOTE: This function may create a new model, but it
--   does not modify existing models.
classify' :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p)

-- | Order models by ascending difference from the input pattern, then by
--   creation order (label number).
matchOrder :: (Ord a, Ord b) => (a, b) -> (a, b) -> Ordering

-- | <tt><a>trainAndClassify</a> s p</tt> identifies the model in
--   <tt>s</tt> that most closely matches <tt>p</tt>, and updates it to be
--   a somewhat better match. If necessary, it will create a new node and
--   model. Returns the ID of the node with the best matching model, the
--   difference between the best matching model and the pattern, the
--   differences between the input and each model in the SGM, and the
--   updated SGM.
trainAndClassify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p)

-- | <tt><a>train</a> s p</tt> identifies the model in <tt>s</tt> that most
--   closely matches <tt>p</tt>, and updates it to be a somewhat better
--   match. If necessary, it will create a new node and model.
train :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> SGM t x k p

-- | For each pattern <tt>p</tt> in <tt>ps</tt>, <tt><a>trainBatch</a> s
--   ps</tt> identifies the model in <tt>s</tt> that most closely matches
--   <tt>p</tt>, and updates it to be a somewhat better match.
trainBatch :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> [p] -> SGM t x k p
instance (Control.DeepSeq.NFData k, Control.DeepSeq.NFData p, Control.DeepSeq.NFData t, Control.DeepSeq.NFData x) => Control.DeepSeq.NFData (Data.Datamining.Clustering.SGMInternal.SGM t x k p)
instance GHC.Generics.Generic (Data.Datamining.Clustering.SGMInternal.SGM t x k p)


-- | A Self-generating Model (SGM). An SGM maps input patterns onto a set,
--   where each element in the set is a model of the input data. An SGM is
--   like a Kohonen Self-organising Map (SOM), except:
--   
--   <ul>
--   <li>Instead of a grid, it uses a simple set of unconnected models.
--   Since the models are unconnected, only the model that best matches the
--   input is ever updated. This makes it faster, however, topological
--   relationships within the input data are not preserved.</li>
--   <li>New models are created on-the-fly when no existing model is
--   similar enough to an input pattern. If the SGM is at capacity, the
--   least useful model will be deleted.</li>
--   </ul>
--   
--   This implementation supports the use of non-numeric patterns.
--   
--   In layman's terms, a SGM can be useful when you you want to build a
--   set of models on some data. A tutorial is available at
--   <a>https://github.com/mhwombat/som/wiki</a>.
--   
--   References:
--   
--   <ul>
--   <li>Amy de Buitléir, Mark Daly, and Michael Russell. The
--   Self-generating Model: an Adaptation of the Self-organizing Map for
--   Intelligent Agents and Data Mining. In: Artificial Life and
--   Intelligent Agents: Second International Symposium, ALIA 2016,
--   Birmingham, UK, June 14-15, 2016, Revised Selected Papers. Ed. by
--   Peter R. Lewis et al. Springer International Publishing, 2018, pp.
--   59–72. Available at <a>http://amydebuitleir.eu/publications/</a>.</li>
--   <li>Amy de Buitléir, Michael Russell, and Mark Daly. Wains: A
--   pattern-seeking artificial life species. Artificial Life,
--   (18)4:399–423, 2012. Available at
--   <a>http://amydebuitleir.eu/publications/</a>.</li>
--   <li>Kohonen, T. (1982). Self-organized formation of topologically
--   correct feature maps. Biological Cybernetics, 43 (1), 59–69.</li>
--   </ul>
module Data.Datamining.Clustering.SGM

-- | A Simplified Self-Organising Map (SGM). <tt>t</tt> is the type of the
--   counter. <tt>x</tt> is the type of the learning rate and the
--   difference metric. <tt>k</tt> is the type of the model indices.
--   <tt>p</tt> is the type of the input patterns and models.
data SGM t x k p
SGM :: Map k (p, t) -> (t -> x) -> Int -> x -> Bool -> (p -> p -> x) -> (p -> x -> p -> p) -> k -> SGM t x k p

-- | Maps patterns and match counts to nodes.
[toMap] :: SGM t x k p -> Map k (p, t)

-- | A function which determines the learning rate for a node. The input
--   parameter indicates how many patterns (or pattern batches) have
--   previously been presented to the classifier. Typically this is used to
--   make the learning rate decay over time. The output is the learning
--   rate for that node (the amount by which the node's model should be
--   updated to match the target). The learning rate should be between zero
--   and one.
[learningRate] :: SGM t x k p -> t -> x

-- | The maximum number of models this SGM can hold.
[maxSize] :: SGM t x k p -> Int

-- | The threshold that triggers creation of a new model.
[diffThreshold] :: SGM t x k p -> x

-- | Delete existing models to make room for new ones? The least useful
--   (least frequently matched) models will be deleted first.
[allowDeletion] :: SGM t x k p -> Bool

-- | A function which compares two patterns and returns a
--   <i>non-negative</i> number representing how different the patterns
--   are. A result of <tt>0</tt> indicates that the patterns are identical.
[difference] :: SGM t x k p -> p -> p -> x

-- | A function which updates models. For example, if this function is
--   <tt>f</tt>, then <tt>f target amount pattern</tt> returns a modified
--   copy of <tt>pattern</tt> that is more similar to <tt>target</tt> than
--   <tt>pattern</tt> is. The magnitude of the adjustment is controlled by
--   the <tt>amount</tt> parameter, which should be a number between 0 and
--   1. Larger values for <tt>amount</tt> permit greater adjustments. If
--   <tt>amount</tt>=1, the result should be identical to the
--   <tt>target</tt>. If <tt>amount</tt>=0, the result should be the
--   unmodified <tt>pattern</tt>.
[makeSimilar] :: SGM t x k p -> p -> x -> p -> p

-- | Index for the next node to add to the SGM.
[nextIndex] :: SGM t x k p -> k

-- | <tt><a>makeSGM</a> lr n dt diff ms</tt> creates a new SGM that does
--   not (yet) contain any models. It will learn at the rate determined by
--   the learning function <tt>lr</tt>, and will be able to hold up to
--   <tt>n</tt> models. It will create a new model based on a pattern
--   presented to it when (1) the SGM contains no models, or (2) the
--   difference between the pattern and the closest matching model exceeds
--   the threshold <tt>dt</tt>. It will use the function <tt>diff</tt> to
--   measure the similarity between an input pattern and a model. It will
--   use the function <tt>ms</tt> to adjust models as needed to make them
--   more similar to input patterns.
makeSGM :: Bounded k => (t -> x) -> Int -> x -> Bool -> (p -> p -> x) -> (p -> x -> p -> p) -> SGM t x k p

-- | The current "time" (number of times the SGM has been trained).
time :: Num t => SGM t x k p -> t

-- | Returns true if the SGM has no models, false otherwise.
isEmpty :: SGM t x k p -> Bool

-- | Returns the number of models the SGM currently contains.
numModels :: SGM t x k p -> Int

-- | Returns a map from node ID to model.
modelMap :: SGM t x k p -> Map k p

-- | Returns a map from node ID to counter (number of times the node's
--   model has been the closest match to an input pattern).
counterMap :: SGM t x k p -> Map k t

-- | Returns the model at a specified node.
modelAt :: Ord k => SGM t x k p -> k -> p

-- | A typical learning function for classifiers. <tt><a>exponential</a> r0
--   d t</tt> returns the learning rate at time <tt>t</tt>. When <tt>t =
--   0</tt>, the learning rate is <tt>r0</tt>. Over time the learning rate
--   decays exponentially; the decay rate is <tt>d</tt>. Normally the
--   parameters are chosen such that:
--   
--   <ul>
--   <li>0 &lt; r0 &lt; 1</li>
--   <li>0 &lt; d</li>
--   </ul>
exponential :: (Floating a, Integral t) => a -> a -> t -> a

-- | <tt><a>classify</a> s p</tt> identifies the model <tt>s</tt> that most
--   closely matches the pattern <tt>p</tt>. It will not make any changes
--   to the classifier. Returns the ID of the node with the best matching
--   model, the difference between the best matching model and the pattern,
--   and the SGM labels paired with the model and the difference between
--   the input and the corresponding model. The final paired list is sorted
--   in decreasing order of similarity.
classify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x))

-- | <tt><a>trainAndClassify</a> s p</tt> identifies the model in
--   <tt>s</tt> that most closely matches <tt>p</tt>, and updates it to be
--   a somewhat better match. If necessary, it will create a new node and
--   model. Returns the ID of the node with the best matching model, the
--   difference between the best matching model and the pattern, the
--   differences between the input and each model in the SGM, and the
--   updated SGM.
trainAndClassify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p)

-- | <tt><a>train</a> s p</tt> identifies the model in <tt>s</tt> that most
--   closely matches <tt>p</tt>, and updates it to be a somewhat better
--   match. If necessary, it will create a new node and model.
train :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> SGM t x k p

-- | For each pattern <tt>p</tt> in <tt>ps</tt>, <tt><a>trainBatch</a> s
--   ps</tt> identifies the model in <tt>s</tt> that most closely matches
--   <tt>p</tt>, and updates it to be a somewhat better match.
trainBatch :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> [p] -> SGM t x k p


-- | A module containing private <tt>SOM</tt> internals. Most developers
--   should use <tt>SOM</tt> instead. This module is subject to change
--   without notice.
module Data.Datamining.Clustering.SOMInternal

-- | A typical learning function for classifiers.
--   <tt><a>decayingGaussian</a> r0 rf w0 wf tf</tt> returns a bell
--   curve-shaped function. At time zero, the maximum learning rate
--   (applied to the BMU) is <tt>r0</tt>, and the neighbourhood width is
--   <tt>w0</tt>. Over time the bell curve shrinks and the learning rate
--   tapers off, until at time <tt>tf</tt>, the maximum learning rate
--   (applied to the BMU) is <tt>rf</tt>, and the neighbourhood width is
--   <tt>wf</tt>. Normally the parameters should be chosen such that:
--   
--   <ul>
--   <li>0 &lt; rf &lt;&lt; r0 &lt; 1</li>
--   <li>0 &lt; wf &lt;&lt; w0</li>
--   <li>0 &lt; tf</li>
--   </ul>
--   
--   where &lt;&lt; means "is much smaller than" (not the Haskell
--   <tt>&lt;&lt;</tt> operator!)
decayingGaussian :: Floating x => x -> x -> x -> x -> x -> x -> x -> x

-- | A learning function that only updates the BMU and has a constant
--   learning rate.
stepFunction :: (Num d, Fractional x, Eq d) => x -> t -> d -> x

-- | A learning function that updates all nodes with the same, constant
--   learning rate. This can be useful for testing.
constantFunction :: x -> t -> d -> x

-- | A Self-Organising Map (SOM).
--   
--   Although <tt>SOM</tt> implements <tt>GridMap</tt>, most users will
--   only need the interface provided by
--   <tt>Data.Datamining.Clustering.Classifier</tt>. If you chose to use
--   the <tt>GridMap</tt> functions, please note:
--   
--   <ol>
--   <li>The functions <tt>adjust</tt>, and <tt>adjustWithKey</tt> do not
--   increment the counter. You can do so manually with
--   <tt>incrementCounter</tt>.</li>
--   <li>The functions <tt>map</tt> and <tt>mapWithKey</tt> are not
--   implemented (they just return an <tt>error</tt>). It would be
--   problematic to implement them because the input SOM and the output SOM
--   would have to have the same <tt>Metric</tt> type.</li>
--   </ol>
data SOM t d gm x k p
SOM :: gm p -> (t -> d -> x) -> (p -> p -> x) -> (p -> x -> p -> p) -> t -> SOM t d gm x k p

-- | Maps patterns to tiles in a regular grid. In the context of a SOM, the
--   tiles are called "nodes"
[gridMap] :: SOM t d gm x k p -> gm p

-- | A function which determines the how quickly the SOM learns. For
--   example, if the function is <tt>f</tt>, then <tt>f t d</tt> returns
--   the learning rate for a node. The parameter <tt>t</tt> indicates how
--   many patterns (or pattern batches) have previously been presented to
--   the classifier. Typically this is used to make the learning rate decay
--   over time. The parameter <tt>d</tt> is the grid distance from the node
--   being updated to the BMU (Best Matching Unit). The output is the
--   learning rate for that node (the amount by which the node's model
--   should be updated to match the target). The learning rate should be
--   between zero and one.
[learningRate] :: SOM t d gm x k p -> t -> d -> x

-- | A function which compares two patterns and returns a
--   <i>non-negative</i> number representing how different the patterns
--   are. A result of <tt>0</tt> indicates that the patterns are identical.
[difference] :: SOM t d gm x k p -> p -> p -> x

-- | A function which updates models. If this function is <tt>f</tt>, then
--   <tt>f target amount pattern</tt> returns a modified copy of
--   <tt>pattern</tt> that is more similar to <tt>target</tt> than
--   <tt>pattern</tt> is. The magnitude of the adjustment is controlled by
--   the <tt>amount</tt> parameter, which should be a number between 0 and
--   1. Larger values for <tt>amount</tt> permit greater adjustments. If
--   <tt>amount</tt>=1, the result should be identical to the
--   <tt>target</tt>. If <tt>amount</tt>=0, the result should be the
--   unmodified <tt>pattern</tt>.
[makeSimilar] :: SOM t d gm x k p -> p -> x -> p -> p

-- | A counter used as a "time" parameter. If you create the SOM with a
--   counter value <tt>0</tt>, and don't directly modify it, then the
--   counter will represent the number of patterns that this SOM has
--   classified.
[counter] :: SOM t d gm x k p -> t

-- | Internal method.
withGridMap :: (gm p -> gm p) -> SOM t d gm x k p -> SOM t d gm x k p

-- | Returns the learning function currently being used by the SOM.
currentLearningFunction :: Num t => SOM t d gm x k p -> d -> x

-- | Extracts the grid and current models from the SOM. A synonym for
--   <tt><a>gridMap</a></tt>.
toGridMap :: GridMap gm p => SOM t d gm x k p -> gm p

-- | Internal method.
adjustNode :: (Grid g, k ~ Index g, Num t) => g -> (t -> x) -> (p -> x -> p -> p) -> p -> k -> k -> p -> p

-- | Trains the specified node and the neighbourood around it to better
--   match a target. Most users should use <tt><a>train</a></tt>, which
--   automatically determines the BMU and trains it and its neighbourhood.
trainNeighbourhood :: (Grid (gm p), GridMap gm p, Index (BaseGrid gm p) ~ Index (gm p), Num t, Num x, Num d) => SOM t d gm x k p -> Index (gm p) -> p -> SOM t d gm x k p

-- | Increment the match counter.
incrementCounter :: Num t => SOM t d gm x k p -> SOM t d gm x k p

-- | Internal method.
justTrain :: (Ord x, Grid (gm p), GridMap gm x, GridMap gm p, Index (BaseGrid gm x) ~ Index (gm p), Index (BaseGrid gm p) ~ Index (gm p), Num t, Num x, Num d) => SOM t d gm x k p -> p -> SOM t d gm x k p
instance (Control.DeepSeq.NFData t, Control.DeepSeq.NFData (gm p)) => Control.DeepSeq.NFData (Data.Datamining.Clustering.SOMInternal.SOM t d gm x k p)
instance GHC.Generics.Generic (Data.Datamining.Clustering.SOMInternal.SOM t d gm x k p)
instance Data.Foldable.Foldable gm => Data.Foldable.Foldable (Data.Datamining.Clustering.SOMInternal.SOM t d gm x k)
instance Math.Geometry.GridInternal.Grid (gm p) => Math.Geometry.GridInternal.Grid (Data.Datamining.Clustering.SOMInternal.SOM t d gm x k p)
instance (Data.Foldable.Foldable gm, Math.Geometry.GridMap.GridMap gm p, Math.Geometry.GridInternal.Grid (Math.Geometry.GridMap.BaseGrid gm p)) => Math.Geometry.GridMap.GridMap (Data.Datamining.Clustering.SOMInternal.SOM t d gm x k) p
instance (Math.Geometry.GridMap.GridMap gm p, k Data.Type.Equality.~ Math.Geometry.GridInternal.Index (Math.Geometry.GridMap.BaseGrid gm p), Math.Geometry.GridInternal.Grid (gm p), Math.Geometry.GridMap.GridMap gm x, k Data.Type.Equality.~ Math.Geometry.GridInternal.Index (gm p), k Data.Type.Equality.~ Math.Geometry.GridInternal.Index (Math.Geometry.GridMap.BaseGrid gm x), GHC.Num.Num t, GHC.Classes.Ord x, GHC.Num.Num x, GHC.Num.Num d) => Data.Datamining.Clustering.Classifier.Classifier (Data.Datamining.Clustering.SOMInternal.SOM t d gm) x k p


-- | A Kohonen Self-organising Map (SOM). A SOM maps input patterns onto a
--   regular grid (usually two-dimensional) where each node in the grid is
--   a model of the input data, and does so using a method which ensures
--   that any topological relationships within the input data are also
--   represented in the grid. This implementation supports the use of
--   non-numeric patterns.
--   
--   In layman's terms, a SOM can be useful when you you want to discover
--   the underlying structure of some data. A tutorial is available at
--   <a>https://github.com/mhwombat/som/wiki</a>.
--   
--   NOTES:
--   
--   <ul>
--   <li>Version 5.0 fixed a bug in the <tt><a>decayingGaussian</a></tt>
--   function. If you use <tt><tt>defaultSOM</tt></tt> (which uses this
--   function), your SOM should now learn more quickly.</li>
--   <li>The <tt>gaussian</tt> function has been removed because it is not
--   as useful for SOMs as I originally thought. It was originally designed
--   to be used as a factor in a learning function. However, in most cases
--   the user will want to introduce a time decay into the exponent, rather
--   than simply multiply by a factor.</li>
--   </ul>
--   
--   References:
--   
--   <ul>
--   <li>Kohonen, T. (1982). Self-organized formation of topologically
--   correct feature maps. Biological Cybernetics, 43 (1), 59–69.</li>
--   </ul>
module Data.Datamining.Clustering.SOM

-- | A Self-Organising Map (SOM).
--   
--   Although <tt>SOM</tt> implements <tt>GridMap</tt>, most users will
--   only need the interface provided by
--   <tt>Data.Datamining.Clustering.Classifier</tt>. If you chose to use
--   the <tt>GridMap</tt> functions, please note:
--   
--   <ol>
--   <li>The functions <tt>adjust</tt>, and <tt>adjustWithKey</tt> do not
--   increment the counter. You can do so manually with
--   <tt>incrementCounter</tt>.</li>
--   <li>The functions <tt>map</tt> and <tt>mapWithKey</tt> are not
--   implemented (they just return an <tt>error</tt>). It would be
--   problematic to implement them because the input SOM and the output SOM
--   would have to have the same <tt>Metric</tt> type.</li>
--   </ol>
data SOM t d gm x k p
SOM :: gm p -> (t -> d -> x) -> (p -> p -> x) -> (p -> x -> p -> p) -> t -> SOM t d gm x k p

-- | Maps patterns to tiles in a regular grid. In the context of a SOM, the
--   tiles are called "nodes"
[gridMap] :: SOM t d gm x k p -> gm p

-- | A function which determines the how quickly the SOM learns. For
--   example, if the function is <tt>f</tt>, then <tt>f t d</tt> returns
--   the learning rate for a node. The parameter <tt>t</tt> indicates how
--   many patterns (or pattern batches) have previously been presented to
--   the classifier. Typically this is used to make the learning rate decay
--   over time. The parameter <tt>d</tt> is the grid distance from the node
--   being updated to the BMU (Best Matching Unit). The output is the
--   learning rate for that node (the amount by which the node's model
--   should be updated to match the target). The learning rate should be
--   between zero and one.
[learningRate] :: SOM t d gm x k p -> t -> d -> x

-- | A function which compares two patterns and returns a
--   <i>non-negative</i> number representing how different the patterns
--   are. A result of <tt>0</tt> indicates that the patterns are identical.
[difference] :: SOM t d gm x k p -> p -> p -> x

-- | A function which updates models. If this function is <tt>f</tt>, then
--   <tt>f target amount pattern</tt> returns a modified copy of
--   <tt>pattern</tt> that is more similar to <tt>target</tt> than
--   <tt>pattern</tt> is. The magnitude of the adjustment is controlled by
--   the <tt>amount</tt> parameter, which should be a number between 0 and
--   1. Larger values for <tt>amount</tt> permit greater adjustments. If
--   <tt>amount</tt>=1, the result should be identical to the
--   <tt>target</tt>. If <tt>amount</tt>=0, the result should be the
--   unmodified <tt>pattern</tt>.
[makeSimilar] :: SOM t d gm x k p -> p -> x -> p -> p

-- | A counter used as a "time" parameter. If you create the SOM with a
--   counter value <tt>0</tt>, and don't directly modify it, then the
--   counter will represent the number of patterns that this SOM has
--   classified.
[counter] :: SOM t d gm x k p -> t

-- | Extracts the grid and current models from the SOM. A synonym for
--   <tt><a>gridMap</a></tt>.
toGridMap :: GridMap gm p => SOM t d gm x k p -> gm p

-- | A typical learning function for classifiers.
--   <tt><a>decayingGaussian</a> r0 rf w0 wf tf</tt> returns a bell
--   curve-shaped function. At time zero, the maximum learning rate
--   (applied to the BMU) is <tt>r0</tt>, and the neighbourhood width is
--   <tt>w0</tt>. Over time the bell curve shrinks and the learning rate
--   tapers off, until at time <tt>tf</tt>, the maximum learning rate
--   (applied to the BMU) is <tt>rf</tt>, and the neighbourhood width is
--   <tt>wf</tt>. Normally the parameters should be chosen such that:
--   
--   <ul>
--   <li>0 &lt; rf &lt;&lt; r0 &lt; 1</li>
--   <li>0 &lt; wf &lt;&lt; w0</li>
--   <li>0 &lt; tf</li>
--   </ul>
--   
--   where &lt;&lt; means "is much smaller than" (not the Haskell
--   <tt>&lt;&lt;</tt> operator!)
decayingGaussian :: Floating x => x -> x -> x -> x -> x -> x -> x -> x

-- | A learning function that only updates the BMU and has a constant
--   learning rate.
stepFunction :: (Num d, Fractional x, Eq d) => x -> t -> d -> x

-- | A learning function that updates all nodes with the same, constant
--   learning rate. This can be useful for testing.
constantFunction :: x -> t -> d -> x

-- | Trains the specified node and the neighbourood around it to better
--   match a target. Most users should use <tt><a>train</a></tt>, which
--   automatically determines the BMU and trains it and its neighbourhood.
trainNeighbourhood :: (Grid (gm p), GridMap gm p, Index (BaseGrid gm p) ~ Index (gm p), Num t, Num x, Num d) => SOM t d gm x k p -> Index (gm p) -> p -> SOM t d gm x k p


-- | Tools for identifying patterns in data.
module Data.Datamining.Pattern

-- | Adjusts a number to make it more similar to the target.
adjustNum :: (Num a, Ord a, Eq a) => a -> a -> a -> a

-- | Returns the absolute difference between two numbers.
absDifference :: Num a => a -> a -> a

-- | <tt><a>adjustVector</a> target amount vector</tt> adjusts each element
--   of <tt>vector</tt> to move it closer to the corresponding element of
--   <tt>target</tt>. The amount of adjustment is controlled by the
--   learning rate <tt>amount</tt>, which is a number between 0 and 1.
--   Larger values of <tt>amount</tt> permit more adjustment. If
--   <tt>amount</tt>=1, the result will be identical to the
--   <tt>target</tt>. If <tt>amount</tt>=0, the result will be the
--   unmodified <tt>pattern</tt>. If <tt>target</tt> is shorter than
--   <tt>vector</tt>, the result will be the same length as
--   <tt>target</tt>. If <tt>target</tt> is longer than <tt>vector</tt>,
--   the result will be the same length as <tt>vector</tt>.
adjustVector :: (Num a, Ord a, Eq a) => [a] -> a -> [a] -> [a]

-- | Same as <tt><a>adjustVector</a></tt>, except that the result will
--   always be the same length as <tt>vector</tt>. This means that if
--   <tt>target</tt> is shorter than <tt>vector</tt>, the "leftover"
--   elements of <tt>vector</tt> will be copied the result, unmodified.
adjustVectorPreserveLength :: (Num a, Ord a, Eq a) => [a] -> a -> [a] -> [a]

-- | Calculates the square of the Euclidean distance between two vectors.
euclideanDistanceSquared :: Num a => [a] -> [a] -> a

-- | Returns the sum of the squares of the elements of a vector.
magnitudeSquared :: Num a => [a] -> a

-- | A vector that has been normalised, i.e., the magnitude of the vector =
--   1.
data NormalisedVector a

-- | Normalises a vector
normalise :: Floating a => [a] -> NormalisedVector a

-- | A vector that has been scaled so that all elements in the vector are
--   between zero and one. To scale a set of vectors, use
--   <tt><a>scaleAll</a></tt>. Alternatively, if you can identify a maximum
--   and minimum value for each element in a vector, you can scale
--   individual vectors using <tt><a>scale</a></tt>.
data ScaledVector a

-- | Given a vector <tt>qs</tt> of pairs of numbers, where each pair
--   represents the maximum and minimum value to be expected at each index
--   in <tt>xs</tt>, <tt><a>scale</a> qs xs</tt> scales the vector
--   <tt>xs</tt> element by element, mapping the maximum value expected at
--   that index to one, and the minimum value to zero.
scale :: Fractional a => [(a, a)] -> [a] -> ScaledVector a

-- | Scales a set of vectors by determining the maximum and minimum values
--   at each index in the vector, and mapping the maximum value to one, and
--   the minimum value to zero.
scaleAll :: (Fractional a, Ord a) => [[a]] -> [ScaledVector a]
instance GHC.Show.Show a => GHC.Show.Show (Data.Datamining.Pattern.ScaledVector a)
instance GHC.Show.Show a => GHC.Show.Show (Data.Datamining.Pattern.NormalisedVector a)