-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | Self-Organising Maps
--   
--   A Kohonen Self-organising Map (SOM) maps input patterns onto a regular
--   grid (usually two-dimensional) where each node in the grid is a model
--   of the input data, and does so using a method which ensures that any
--   topological relationships within the input data are also represented
--   in the grid. This implementation supports the use of non-numeric
--   patterns.
--   
--   In layman's terms, a SOM can be useful when you you want to discover
--   the underlying structure of some data.
--   
--   The userguide is available at
--   <a>https://github.com/mhwombat/som/wiki</a>.
@package som
@version 4.1


-- | Tools for identifying patterns in data.
module Data.Datamining.Pattern

-- | A pattern to be learned or classified.
class Pattern p where type family Metric p
difference :: Pattern p => p -> p -> Metric p
makeSimilar :: Pattern p => p -> Metric p -> p -> p

-- | A vector that has been normalised, i.e., the magnitude of the vector =
--   1.
data NormalisedVector a

-- | Normalises a vector
normalise :: Floating a => [a] -> NormalisedVector a

-- | A vector that has been scaled so that all elements in the vector are
--   between zero and one. To scale a set of vectors, use
--   <tt><a>scaleAll</a></tt>. Alternatively, if you can identify a maximum
--   and minimum value for each element in a vector, you can scale
--   individual vectors using <tt><a>scale</a></tt>.
data ScaledVector a

-- | Given a vector <tt>qs</tt> of pairs of numbers, where each pair
--   represents the maximum and minimum value to be expected at each index
--   in <tt>xs</tt>, <tt><a>scale</a> qs xs</tt> scales the vector
--   <tt>xs</tt> element by element, mapping the maximum value expected at
--   that index to one, and the minimum value to zero.
scale :: Fractional a => [(a, a)] -> [a] -> ScaledVector a

-- | Scales a set of vectors by determining the maximum and minimum values
--   at each index in the vector, and mapping the maximum value to one, and
--   the minimum value to zero.
scaleAll :: (Fractional a, Ord a) => [[a]] -> [ScaledVector a]

-- | <tt><a>adjustVector</a> target amount vector</tt> adjusts
--   <tt>vector</tt> to move it closer to <tt>target</tt>. The amount of
--   adjustment is controlled by the learning rate <tt>r</tt>, which is a
--   number between 0 and 1. Larger values of <tt>r</tt> permit more
--   adjustment. If <tt>r</tt>=1, the result will be identical to the
--   <tt>target</tt>. If <tt>amount</tt>=0, the result will be the
--   unmodified <tt>pattern</tt>.
adjustVector :: (Num a, Ord a, Eq a) => [a] -> a -> [a] -> [a]

-- | Calculates the square of the Euclidean distance between two vectors.
euclideanDistanceSquared :: Num a => [a] -> [a] -> a
magnitudeSquared :: Num a => [a] -> a
instance Show a => Show (NormalisedVector a)
instance Show a => Show (ScaledVector a)
instance (Fractional a, Ord a, Eq a) => Pattern (ScaledVector a)
instance (Floating a, Fractional a, Ord a, Eq a) => Pattern (NormalisedVector a)


-- | Tools for identifying patterns in data.
module Data.Datamining.Clustering.Classifier

-- | A machine which learns to classify input patterns. Minimal complete
--   definition: <tt>trainBatch</tt>, <tt>reportAndTrain</tt>.
class Classifier (c :: * -> * -> *) k p where classify c p = fst . minimumBy (comparing snd) $ differences c p train c p = c' where (_, _, c') = reportAndTrain c p classifyAndTrain c p = (bmu, c') where (bmu, _, c') = reportAndTrain c p diffAndTrain c p = (ds, c') where (_, ds, c') = reportAndTrain c p
toList :: Classifier c k p => c k p -> [(k, p)]
numModels :: Classifier c k p => c k p -> Int
models :: Classifier c k p => c k p -> [p]
differences :: (Classifier c k p, Pattern p, v ~ Metric p) => c k p -> p -> [(k, v)]
classify :: (Classifier c k p, Pattern p, Ord v, v ~ Metric p) => c k p -> p -> k
train :: (Classifier c k p, Ord v, v ~ Metric p) => c k p -> p -> c k p
trainBatch :: Classifier c k p => c k p -> [p] -> c k p
classifyAndTrain :: (Classifier c k p, Ord v, v ~ Metric p) => c k p -> p -> (k, c k p)
diffAndTrain :: (Classifier c k p, Ord v, v ~ Metric p) => c k p -> p -> ([(k, v)], c k p)
reportAndTrain :: (Classifier c k p, Ord v, v ~ Metric p) => c k p -> p -> (k, [(k, v)], c k p)


-- | A module containing private <tt>SOM</tt> internals. Most developers
--   should use <tt>SOM</tt> instead. This module is subject to change
--   without notice.
module Data.Datamining.Clustering.SOMInternal

-- | A Self-Organising Map (SOM).
--   
--   Although <tt>SOM</tt> implements <tt>GridMap</tt>, most users will
--   only need the interface provided by <tt>Classifier</tt>. If you chose
--   to use the <tt>GridMap</tt> functions, please note:
--   
--   <ol>
--   <li>The functions <tt>adjust</tt>, and <tt>adjustWithKey</tt> do not
--   increment the counter. You can do so manually with
--   <tt>incrementCounter</tt>.</li>
--   <li>The functions <tt>map</tt> and <tt>mapWithKey</tt> are not
--   implemented (they just return an <tt>error</tt>). It would be
--   problematic to implement them because the input SOM and the output SOM
--   would have to have the same <tt>Metric</tt> type.</li>
--   </ol>
data SOM gm k p
SOM :: gm p -> (Int -> Int -> Metric p) -> Int -> SOM gm k p
sGridMap :: SOM gm k p -> gm p
sLearningFunction :: SOM gm k p -> Int -> Int -> Metric p
sCounter :: SOM gm k p -> Int

-- | Creates a classifier with a default (bell-shaped) learning function.
--   Usage is <tt><a>defaultSOM</a> gm r w t</tt>, where:
--   
--   <ul>
--   <li><i><tt>gm</tt></i> The geometry and initial models for this
--   classifier. A reasonable choice here is <tt><tt>lazyGridMap</tt> g
--   ps</tt>, where <tt>g</tt> is a <tt><tt>HexHexGrid</tt></tt>, and
--   <tt>ps</tt> is a set of random patterns.</li>
--   <li><i><tt>r</tt></i> The learning rate to be applied to the BMU (Best
--   Matching Unit) at <a>time</a> zero. The BMU is the model which best
--   matches the current target pattern.</li>
--   <li><i><tt>w</tt></i> The width of the bell curve at <a>time</a>
--   zero.</li>
--   <li><i><tt>t</tt></i> Controls how rapidly the learning rate decays.
--   After this time, any learning done by the classifier will be
--   negligible. We recommend setting this parameter to the number of
--   patterns (or pattern batches) that will be presented to the
--   classifier. An estimate is fine.</li>
--   </ul>
defaultSOM :: Floating (Metric p) => gm p -> Metric p -> Metric p -> Int -> SOM gm k p

-- | Creates a classifier with a custom learning function. Usage is
--   <tt><a>customSOM</a> gm g</tt>, where:
--   
--   <ul>
--   <li><i><tt>gm</tt></i> The geometry and initial models for this
--   classifier. A reasonable choice here is <tt><tt>lazyGridMap</tt> g
--   ps</tt>, where <tt>g</tt> is a <tt><tt>HexHexGrid</tt></tt>, and
--   <tt>ps</tt> is a set of random patterns.</li>
--   <li><i><tt>f</tt></i> A function used to adjust the models in the
--   classifier. This function will be invoked with two parameters. The
--   first parameter will indicate how many patterns (or pattern batches)
--   have previously been presented to this classifier. Typically this is
--   used to make the learning rate decay over time. The second parameter
--   to the function is the grid distance from the node being updated to
--   the BMU (Best Matching Unit). The output is the learning rate for that
--   node (the amount by which the node's model should be updated to match
--   the target). The learning rate should be between zero and one.</li>
--   </ul>
customSOM :: gm p -> (Int -> Int -> Metric p) -> SOM gm k p

-- | Calculates <tt>r<i>e</i>^(-d^2/2w^2)</tt>. This form of the Gaussian
--   function is useful as a learning rate function. In <tt><a>gaussian</a>
--   r w d</tt>, <tt>r</tt> specifies the highest learning rate, which will
--   be applied to the SOM node that best matches the input pattern. The
--   learning rate applied to other nodes will be applied based on their
--   distance <tt>d</tt> from the best matching node. The value <tt>w</tt>
--   controls the 'width' of the Gaussian. Higher values of <tt>w</tt>
--   cause the learning rate to fall off more slowly with distance
--   <tt>d</tt>.
gaussian :: Floating a => a -> a -> Int -> a

-- | Configures a typical learning function for classifiers.
--   <tt><a>decayingGaussian</a> r w0 tMax</tt> returns a bell curve-shaped
--   function. At time zero, the maximum learning rate (applied to the BMU)
--   is <tt>r</tt>, and the neighbourhood width is <tt>w</tt>. Over time
--   the bell curve shrinks and the learning rate tapers off, until at time
--   <tt>tMax</tt>, the learning rate is negligible.
decayingGaussian :: Floating a => a -> a -> Int -> (Int -> Int -> a)

-- | Extracts the grid and current models from the SOM.
toGridMap :: GridMap gm p => SOM gm k p -> gm p

-- | Trains the specified node and the neighbourood around it to better
--   match a target. Most users should use <tt>train</tt>, which
--   automatically determines the BMU and trains it and its neighbourhood.
trainNeighbourhood :: (Pattern p, Grid (gm p), GridMap gm p, Index (BaseGrid gm p) ~ Index (gm p)) => SOM gm k p -> Index (gm p) -> p -> SOM gm k p
incrementCounter :: SOM gm k p -> SOM gm k p
instance (GridMap gm p, k ~ Index (BaseGrid gm p), Pattern p, Grid (gm p), GridMap gm (Metric p), k ~ Index (gm p), k ~ Index (BaseGrid gm (Metric p)), Ord (Metric p)) => Classifier (SOM gm) k p
instance (Foldable gm, GridMap gm p, Grid (BaseGrid gm p)) => GridMap (SOM gm k) p
instance Grid (gm p) => Grid (SOM gm k p)
instance Foldable gm => Foldable (SOM gm k)


-- | A Kohonen Self-organising Map (SOM). A SOM maps input patterns onto a
--   regular grid (usually two-dimensional) where each node in the grid is
--   a model of the input data, and does so using a method which ensures
--   that any topological relationships within the input data are also
--   represented in the grid. This implementation supports the use of
--   non-numeric patterns.
--   
--   In layman's terms, a SOM can be useful when you you want to discover
--   the underlying structure of some data. A tutorial is available at
--   <a>https://github.com/mhwombat/som/wiki</a>.
--   
--   References:
--   
--   <ul>
--   <li>Kohonen, T. (1982). Self-organized formation of topologically
--   correct feature maps. Biological Cybernetics, 43 (1), 59–69.</li>
--   </ul>
module Data.Datamining.Clustering.SOM

-- | A Self-Organising Map (SOM).
--   
--   Although <tt>SOM</tt> implements <tt>GridMap</tt>, most users will
--   only need the interface provided by <tt>Classifier</tt>. If you chose
--   to use the <tt>GridMap</tt> functions, please note:
--   
--   <ol>
--   <li>The functions <tt>adjust</tt>, and <tt>adjustWithKey</tt> do not
--   increment the counter. You can do so manually with
--   <tt>incrementCounter</tt>.</li>
--   <li>The functions <tt>map</tt> and <tt>mapWithKey</tt> are not
--   implemented (they just return an <tt>error</tt>). It would be
--   problematic to implement them because the input SOM and the output SOM
--   would have to have the same <tt>Metric</tt> type.</li>
--   </ol>
data SOM gm k p

-- | Creates a classifier with a default (bell-shaped) learning function.
--   Usage is <tt><a>defaultSOM</a> gm r w t</tt>, where:
--   
--   <ul>
--   <li><i><tt>gm</tt></i> The geometry and initial models for this
--   classifier. A reasonable choice here is <tt><tt>lazyGridMap</tt> g
--   ps</tt>, where <tt>g</tt> is a <tt><tt>HexHexGrid</tt></tt>, and
--   <tt>ps</tt> is a set of random patterns.</li>
--   <li><i><tt>r</tt></i> The learning rate to be applied to the BMU (Best
--   Matching Unit) at <a>time</a> zero. The BMU is the model which best
--   matches the current target pattern.</li>
--   <li><i><tt>w</tt></i> The width of the bell curve at <a>time</a>
--   zero.</li>
--   <li><i><tt>t</tt></i> Controls how rapidly the learning rate decays.
--   After this time, any learning done by the classifier will be
--   negligible. We recommend setting this parameter to the number of
--   patterns (or pattern batches) that will be presented to the
--   classifier. An estimate is fine.</li>
--   </ul>
defaultSOM :: Floating (Metric p) => gm p -> Metric p -> Metric p -> Int -> SOM gm k p

-- | Creates a classifier with a custom learning function. Usage is
--   <tt><a>customSOM</a> gm g</tt>, where:
--   
--   <ul>
--   <li><i><tt>gm</tt></i> The geometry and initial models for this
--   classifier. A reasonable choice here is <tt><tt>lazyGridMap</tt> g
--   ps</tt>, where <tt>g</tt> is a <tt><tt>HexHexGrid</tt></tt>, and
--   <tt>ps</tt> is a set of random patterns.</li>
--   <li><i><tt>f</tt></i> A function used to adjust the models in the
--   classifier. This function will be invoked with two parameters. The
--   first parameter will indicate how many patterns (or pattern batches)
--   have previously been presented to this classifier. Typically this is
--   used to make the learning rate decay over time. The second parameter
--   to the function is the grid distance from the node being updated to
--   the BMU (Best Matching Unit). The output is the learning rate for that
--   node (the amount by which the node's model should be updated to match
--   the target). The learning rate should be between zero and one.</li>
--   </ul>
customSOM :: gm p -> (Int -> Int -> Metric p) -> SOM gm k p

-- | Calculates <tt>r<i>e</i>^(-d^2/2w^2)</tt>. This form of the Gaussian
--   function is useful as a learning rate function. In <tt><a>gaussian</a>
--   r w d</tt>, <tt>r</tt> specifies the highest learning rate, which will
--   be applied to the SOM node that best matches the input pattern. The
--   learning rate applied to other nodes will be applied based on their
--   distance <tt>d</tt> from the best matching node. The value <tt>w</tt>
--   controls the 'width' of the Gaussian. Higher values of <tt>w</tt>
--   cause the learning rate to fall off more slowly with distance
--   <tt>d</tt>.
gaussian :: Floating a => a -> a -> Int -> a

-- | Configures a typical learning function for classifiers.
--   <tt><a>decayingGaussian</a> r w0 tMax</tt> returns a bell curve-shaped
--   function. At time zero, the maximum learning rate (applied to the BMU)
--   is <tt>r</tt>, and the neighbourhood width is <tt>w</tt>. Over time
--   the bell curve shrinks and the learning rate tapers off, until at time
--   <tt>tMax</tt>, the learning rate is negligible.
decayingGaussian :: Floating a => a -> a -> Int -> (Int -> Int -> a)

-- | Extracts the grid and current models from the SOM.
toGridMap :: GridMap gm p => SOM gm k p -> gm p

-- | Trains the specified node and the neighbourood around it to better
--   match a target. Most users should use <tt>train</tt>, which
--   automatically determines the BMU and trains it and its neighbourhood.
trainNeighbourhood :: (Pattern p, Grid (gm p), GridMap gm p, Index (BaseGrid gm p) ~ Index (gm p)) => SOM gm k p -> Index (gm p) -> p -> SOM gm k p
incrementCounter :: SOM gm k p -> SOM gm k p