-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Self-Organising Maps -- -- Please see the README on GitHub at -- https://github.com/mhwombat/som#readme @package som @version 10.1.2 -- | Tools for identifying patterns in data. module Data.Datamining.Clustering.Classifier -- | A machine which learns to classify input patterns. Minimal complete -- definition: trainBatch, reportAndTrain. class Classifier (c :: * -> * -> * -> *) v k p -- | Returns a list of index/model pairs. toList :: Classifier c v k p => c v k p -> [(k, p)] -- | Returns the number of models this classifier can learn. numModels :: Classifier c v k p => c v k p -> Int -- | Returns the current models of the classifier. models :: Classifier c v k p => c v k p -> [p] -- | differences c target returns the indices of all nodes -- in c, paired with the difference between target and -- the node's model. differences :: Classifier c v k p => c v k p -> p -> [(k, v)] -- | classify c target returns the index of the node in c -- whose model best matches the target. classify :: (Classifier c v k p, Ord v) => c v k p -> p -> k -- | train c target returns a modified copy of the -- classifier c that has partially learned the target. train :: Classifier c v k p => c v k p -> p -> c v k p -- | trainBatch c targets returns a modified copy of the -- classifier c that has partially learned the targets. trainBatch :: Classifier c v k p => c v k p -> [p] -> c v k p -- | classifyAndTrain c target returns a tuple containing -- the index of the node in c whose model best matches the input -- target, and a modified copy of the classifier c that -- has partially learned the target. Invoking -- classifyAndTrain c p may be faster than invoking (p -- classify c, train c p), but they should give identical -- results. classifyAndTrain :: Classifier c v k p => c v k p -> p -> (k, c v k p) -- | diffAndTrain c target returns a tuple containing: 1. -- The indices of all nodes in c, paired with the difference -- between target and the node's model 2. A modified copy of the -- classifier c that has partially learned the target. -- Invoking diffAndTrain c p may be faster than invoking (p -- diff c, train c p), but they should give identical -- results. diffAndTrain :: Classifier c v k p => c v k p -> p -> ([(k, v)], c v k p) -- | reportAndTrain c f target returns a tuple containing: -- 1. The index of the node in c whose model best matches the -- input target 2. The indices of all nodes in c, -- paired with the difference between target and the node's -- model 3. A modified copy of the classifier c that has -- partially learned the target Invoking diffAndTrain c -- p may be faster than invoking (p diff c, train c -- p), but they should give identical results. reportAndTrain :: Classifier c v k p => c v k p -> p -> (k, [(k, v)], c v k p) -- | A module containing private DSOM internals. Most developers -- should use DSOM instead. This module is subject to change -- without notice. module Data.Datamining.Clustering.DSOMInternal -- | A Self-Organising Map (DSOM). -- -- Although DSOM implements GridMap, most users will -- only need the interface provided by -- Data.Datamining.Clustering.Classifier. If you chose to use -- the GridMap functions, please note: -- --
    --
  1. The functions adjust, and adjustWithKey do not -- increment the counter. You can do so manually with -- incrementCounter.
  2. --
  3. The functions map and mapWithKey are not -- implemented (they just return an error). It would be -- problematic to implement them because the input DSOM and the output -- DSOM would have to have the same Metric type.
  4. --
data DSOM gm x k p DSOM :: gm p -> (x -> x -> x -> x) -> (p -> p -> x) -> (p -> x -> p -> p) -> DSOM gm x k p -- | Maps patterns to tiles in a regular grid. In the context of a SOM, the -- tiles are called "nodes" [gridMap] :: DSOM gm x k p -> gm p -- | A function which determines the how quickly the SOM learns. [learningRate] :: DSOM gm x k p -> x -> x -> x -> x -- | A function which compares two patterns and returns a -- non-negative number representing how different the patterns -- are. A result of 0 indicates that the patterns are identical. [difference] :: DSOM gm x k p -> p -> p -> x -- | A function which updates models. If this function is f, then -- f target amount pattern returns a modified copy of -- pattern that is more similar to target than -- pattern is. The magnitude of the adjustment is controlled by -- the amount parameter, which should be a number between 0 and -- 1. Larger values for amount permit greater adjustments. If -- amount=1, the result should be identical to the -- target. If amount=0, the result should be the -- unmodified pattern. [makeSimilar] :: DSOM gm x k p -> p -> x -> p -> p -- | Internal method. withGridMap :: (gm p -> gm p) -> DSOM gm x k p -> DSOM gm x k p -- | Extracts the grid and current models from the DSOM. toGridMap :: GridMap gm p => DSOM gm x k p -> gm p -- | Internal method. adjustNode :: (FiniteGrid (gm p), GridMap gm p, k ~ Index (gm p), k ~ Index (BaseGrid gm p), Ord k, Num x, Fractional x) => gm p -> (p -> x -> p -> p) -> (p -> p -> x) -> (x -> x -> x) -> p -> k -> k -> p -> p -- | Internal method. scaleDistance :: (Num a, Fractional a) => Int -> Int -> a -- | Trains the specified node and the neighbourood around it to better -- match a target. Most users should use train, which -- automatically determines the BMU and trains it and its neighbourhood. trainNeighbourhood :: (FiniteGrid (gm p), GridMap gm p, k ~ Index (gm p), k ~ Index (BaseGrid gm p), Ord k, Num x, Fractional x) => DSOM gm x t p -> k -> p -> DSOM gm x k p -- | Internal method. justTrain :: (FiniteGrid (gm p), GridMap gm p, GridMap gm x, k ~ Index (gm p), k ~ Index (gm x), k ~ Index (BaseGrid gm p), k ~ Index (BaseGrid gm x), Ord k, Ord x, Num x, Fractional x) => DSOM gm x t p -> p -> DSOM gm x k p -- | Configures a learning function that depends not on the time, but on -- how good a model we already have for the target. If the BMU is an -- exact match for the target, no learning occurs. Usage is -- rougierLearningFunction r p, where r is the -- maximal learning rate (0 <= r <= 1), and p is the -- elasticity. -- -- NOTE: When using this learning function, ensure that abs . -- difference is always between 0 and 1, inclusive. Otherwise you -- may get invalid learning rates. rougierLearningFunction :: (Eq a, Ord a, Floating a) => a -> a -> a -> a -> a -> a instance Control.DeepSeq.NFData (gm p) => Control.DeepSeq.NFData (Data.Datamining.Clustering.DSOMInternal.DSOM gm x k p) instance GHC.Generics.Generic (Data.Datamining.Clustering.DSOMInternal.DSOM gm x k p) instance Data.Foldable.Foldable gm => Data.Foldable.Foldable (Data.Datamining.Clustering.DSOMInternal.DSOM gm x k) instance Math.Geometry.GridInternal.Grid (gm p) => Math.Geometry.GridInternal.Grid (Data.Datamining.Clustering.DSOMInternal.DSOM gm x k p) instance (Data.Foldable.Foldable gm, Math.Geometry.GridMap.GridMap gm p, Math.Geometry.GridInternal.FiniteGrid (Math.Geometry.GridMap.BaseGrid gm p)) => Math.Geometry.GridMap.GridMap (Data.Datamining.Clustering.DSOMInternal.DSOM gm x k) p instance (Math.Geometry.GridMap.GridMap gm p, k Data.Type.Equality.~ Math.Geometry.GridInternal.Index (Math.Geometry.GridMap.BaseGrid gm p), Math.Geometry.GridInternal.FiniteGrid (gm p), Math.Geometry.GridMap.GridMap gm x, k Data.Type.Equality.~ Math.Geometry.GridInternal.Index (gm p), k Data.Type.Equality.~ Math.Geometry.GridInternal.Index (gm x), k Data.Type.Equality.~ Math.Geometry.GridInternal.Index (Math.Geometry.GridMap.BaseGrid gm x), GHC.Classes.Ord k, GHC.Classes.Ord x, GHC.Num.Num x, GHC.Real.Fractional x) => Data.Datamining.Clustering.Classifier.Classifier (Data.Datamining.Clustering.DSOMInternal.DSOM gm) x k p -- | A modified Kohonen Self-organising Map (SOM) which supports a -- time-independent learning function. (See SOM for a -- description of a SOM.) -- -- References: -- -- module Data.Datamining.Clustering.DSOM -- | A Self-Organising Map (DSOM). -- -- Although DSOM implements GridMap, most users will -- only need the interface provided by -- Data.Datamining.Clustering.Classifier. If you chose to use -- the GridMap functions, please note: -- --
    --
  1. The functions adjust, and adjustWithKey do not -- increment the counter. You can do so manually with -- incrementCounter.
  2. --
  3. The functions map and mapWithKey are not -- implemented (they just return an error). It would be -- problematic to implement them because the input DSOM and the output -- DSOM would have to have the same Metric type.
  4. --
data DSOM gm x k p DSOM :: gm p -> (x -> x -> x -> x) -> (p -> p -> x) -> (p -> x -> p -> p) -> DSOM gm x k p -- | Maps patterns to tiles in a regular grid. In the context of a SOM, the -- tiles are called "nodes" [gridMap] :: DSOM gm x k p -> gm p -- | A function which determines the how quickly the SOM learns. [learningRate] :: DSOM gm x k p -> x -> x -> x -> x -- | A function which compares two patterns and returns a -- non-negative number representing how different the patterns -- are. A result of 0 indicates that the patterns are identical. [difference] :: DSOM gm x k p -> p -> p -> x -- | A function which updates models. If this function is f, then -- f target amount pattern returns a modified copy of -- pattern that is more similar to target than -- pattern is. The magnitude of the adjustment is controlled by -- the amount parameter, which should be a number between 0 and -- 1. Larger values for amount permit greater adjustments. If -- amount=1, the result should be identical to the -- target. If amount=0, the result should be the -- unmodified pattern. [makeSimilar] :: DSOM gm x k p -> p -> x -> p -> p -- | Extracts the grid and current models from the DSOM. toGridMap :: GridMap gm p => DSOM gm x k p -> gm p -- | Configures a learning function that depends not on the time, but on -- how good a model we already have for the target. If the BMU is an -- exact match for the target, no learning occurs. Usage is -- rougierLearningFunction r p, where r is the -- maximal learning rate (0 <= r <= 1), and p is the -- elasticity. -- -- NOTE: When using this learning function, ensure that abs . -- difference is always between 0 and 1, inclusive. Otherwise you -- may get invalid learning rates. rougierLearningFunction :: (Eq a, Ord a, Floating a) => a -> a -> a -> a -> a -> a -- | Trains the specified node and the neighbourood around it to better -- match a target. Most users should use train, which -- automatically determines the BMU and trains it and its neighbourhood. trainNeighbourhood :: (FiniteGrid (gm p), GridMap gm p, k ~ Index (gm p), k ~ Index (BaseGrid gm p), Ord k, Num x, Fractional x) => DSOM gm x t p -> k -> p -> DSOM gm x k p -- | A module containing private SGM internals. Most developers -- should use SGM instead. This module is subject to change -- without notice. module Data.Datamining.Clustering.SGM2Internal -- | A typical learning function for classifiers. exponential r0 -- d t returns the learning rate at time t. When t = -- 0, the learning rate is r0. Over time the learning rate -- decays exponentially; the decay rate is d. Normally the -- parameters are chosen such that: -- -- exponential :: (Floating a, Integral t) => a -> a -> t -> a -- | A Simplified Self-Organising Map (SGM). t is the type of the -- counter. x is the type of the learning rate and the -- difference metric. k is the type of the model indices. -- p is the type of the input patterns and models. data SGM t x k p SGM :: Map k (p, t) -> (t -> x) -> Int -> (p -> p -> x) -> (p -> x -> p -> p) -> k -> SGM t x k p -- | Maps patterns and match counts to nodes. [toMap] :: SGM t x k p -> Map k (p, t) -- | A function which determines the learning rate for a node. The input -- parameter indicates how many patterns (or pattern batches) have -- previously been presented to the classifier. Typically this is used to -- make the learning rate decay over time. The output is the learning -- rate for that node (the amount by which the node's model should be -- updated to match the target). The learning rate should be between zero -- and one. [learningRate] :: SGM t x k p -> t -> x -- | The maximum number of models this SGM can hold. [capacity] :: SGM t x k p -> Int -- | A function which compares two patterns and returns a -- non-negative number representing how different the patterns -- are. A result of 0 indicates that the patterns are identical. [difference] :: SGM t x k p -> p -> p -> x -- | A function which updates models. For example, if this function is -- f, then f target amount pattern returns a modified -- copy of pattern that is more similar to target than -- pattern is. The magnitude of the adjustment is controlled by -- the amount parameter, which should be a number between 0 and -- 1. Larger values for amount permit greater adjustments. If -- amount=1, the result should be identical to the -- target. If amount=0, the result should be the -- unmodified pattern. [makeSimilar] :: SGM t x k p -> p -> x -> p -> p -- | Index for the next node to add to the SGM. [nextIndex] :: SGM t x k p -> k -- | makeSGM lr n diff ms creates a new SGM that does not -- (yet) contain any models. It will learn at the rate determined by the -- learning function lr, and will be able to hold up to -- n models. It will create a new model based on a pattern -- presented to it when the SGM is not at capacity, or a less useful -- model can be replaced. It will use the function diff to -- measure the similarity between an input pattern and a model. It will -- use the function ms to adjust models as needed to make them -- more similar to input patterns. makeSGM :: Bounded k => (t -> x) -> Int -> (p -> p -> x) -> (p -> x -> p -> p) -> SGM t x k p -- | Returns true if the SGM has no models, false otherwise. isEmpty :: SGM t x k p -> Bool -- | Returns the number of models the SGM currently contains. size :: SGM t x k p -> Int -- | Returns a map from node ID to model. modelMap :: SGM t x k p -> Map k p -- | Returns a map from node ID to counter (number of times the node's -- model has been the closest match to an input pattern). counterMap :: SGM t x k p -> Map k t -- | Returns the model at a specified node. modelAt :: Ord k => SGM t x k p -> k -> p -- | Returns the match counter for a specified node. counterAt :: Ord k => SGM t x k p -> k -> t -- | Returns the current labels. labels :: SGM t x k p -> [k] -- | The current "time" (number of times the SGM has been trained). time :: Num t => SGM t x k p -> t -- | Adds a new node to the SGM. addNode :: (Num t, Enum k, Ord k) => p -> SGM t x k p -> SGM t x k p -- | Increments the counter. incrementCounter :: (Num t, Ord k) => k -> SGM t x k p -> SGM t x k p -- | Trains the specified node to better match a target. Most users should -- use train, which automatically determines the BMU and -- trains it. trainNode :: (Num t, Ord k) => SGM t x k p -> k -> p -> SGM t x k p -- | Calculates the difference between all pairs of non-identical labels in -- the SGM. modelDiffs :: (Eq k, Ord k) => SGM t x k p -> [((k, k), x)] -- | Generates all pairs of non-identical labels in the SGM. labelPairs :: Eq k => SGM t x k p -> [(k, k)] -- | Pairs a node label with all labels except itself. labelPairs' :: Eq k => SGM t x k p -> k -> [(k, k)] -- | Returns the labels of the two most similar models, and the difference -- between them. twoMostSimilar :: (Ord x, Eq k, Ord k) => SGM t x k p -> (k, k, x) -- | Deletes the least used (least matched) model in a pair, and returns -- its label (now available) and the updated SGM. TODO: Modify the other -- model to make it slightly more similar to the one that was deleted? mergeModels :: (Num t, Ord t, Ord k) => SGM t x k p -> k -> k -> (k, SGM t x k p) -- | Set the model for a node. Useful when merging two models and replacing -- one. setModel :: (Num t, Ord k) => SGM t x k p -> k -> p -> SGM t x k p -- | Add a new node, making room for it by merging two existing nodes. mergeAddModel :: (Num t, Ord t, Ord k) => SGM t x k p -> k -> k -> p -> SGM t x k p -- | classify s p identifies the model s that most -- closely matches the pattern p. It will not make any changes -- to the classifier. (I.e., it will not change the models or match -- counts.) Returns the ID of the node with the best matching model, the -- difference between the best matching model and the pattern, and the -- SGM labels paired with the model and the difference between the input -- and the corresponding model. The final paired list is sorted in -- decreasing order of similarity. classify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x)) -- | Order models by ascending difference from the input pattern, then by -- creation order (label number). matchOrder :: (Ord a, Ord b) => (a, b) -> (a, b) -> Ordering -- | trainAndClassify s p identifies the model in -- s that most closely matches p, and updates it to be -- a somewhat better match. If necessary, it will create a new node and -- model. Returns the ID of the node with the best matching model, the -- difference between the pattern and the best matching model in the -- original SGM (before training or adding a new model), the differences -- between the pattern and each model in the updated SGM, and the updated -- SGM. trainAndClassify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p) -- | Internal method. NOTE: This function will adjust the model and update -- the match for the BMU. trainAndClassify' :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p) -- | Internal method. addModelTrainAndClassify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p) -- | train s p identifies the model in s that most -- closely matches p, and updates it to be a somewhat better -- match. If necessary, it will create a new node and model. train :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> SGM t x k p -- | For each pattern p in ps, trainBatch s -- ps identifies the model in s that most closely matches -- p, and updates it to be a somewhat better match. trainBatch :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> [p] -> SGM t x k p instance (Control.DeepSeq.NFData k, Control.DeepSeq.NFData p, Control.DeepSeq.NFData t) => Control.DeepSeq.NFData (Data.Datamining.Clustering.SGM2Internal.SGM t x k p) instance GHC.Generics.Generic (Data.Datamining.Clustering.SGM2Internal.SGM t x k p) -- | A Self-generating Model (SGM). An SGM maps input patterns onto a set, -- where each element in the set is a model of the input data. An SGM is -- like a Kohonen Self-organising Map (SOM), except: -- -- -- -- This implementation supports the use of non-numeric patterns. -- -- In layman's terms, a SGM can be useful when you you want to build a -- set of models on some data. A tutorial is available at -- https://github.com/mhwombat/som/wiki. -- -- References: -- -- module Data.Datamining.Clustering.SGM2 -- | A Simplified Self-Organising Map (SGM). t is the type of the -- counter. x is the type of the learning rate and the -- difference metric. k is the type of the model indices. -- p is the type of the input patterns and models. data SGM t x k p SGM :: Map k (p, t) -> (t -> x) -> Int -> (p -> p -> x) -> (p -> x -> p -> p) -> k -> SGM t x k p -- | Maps patterns and match counts to nodes. [toMap] :: SGM t x k p -> Map k (p, t) -- | A function which determines the learning rate for a node. The input -- parameter indicates how many patterns (or pattern batches) have -- previously been presented to the classifier. Typically this is used to -- make the learning rate decay over time. The output is the learning -- rate for that node (the amount by which the node's model should be -- updated to match the target). The learning rate should be between zero -- and one. [learningRate] :: SGM t x k p -> t -> x -- | The maximum number of models this SGM can hold. [capacity] :: SGM t x k p -> Int -- | A function which compares two patterns and returns a -- non-negative number representing how different the patterns -- are. A result of 0 indicates that the patterns are identical. [difference] :: SGM t x k p -> p -> p -> x -- | A function which updates models. For example, if this function is -- f, then f target amount pattern returns a modified -- copy of pattern that is more similar to target than -- pattern is. The magnitude of the adjustment is controlled by -- the amount parameter, which should be a number between 0 and -- 1. Larger values for amount permit greater adjustments. If -- amount=1, the result should be identical to the -- target. If amount=0, the result should be the -- unmodified pattern. [makeSimilar] :: SGM t x k p -> p -> x -> p -> p -- | Index for the next node to add to the SGM. [nextIndex] :: SGM t x k p -> k -- | makeSGM lr n diff ms creates a new SGM that does not -- (yet) contain any models. It will learn at the rate determined by the -- learning function lr, and will be able to hold up to -- n models. It will create a new model based on a pattern -- presented to it when the SGM is not at capacity, or a less useful -- model can be replaced. It will use the function diff to -- measure the similarity between an input pattern and a model. It will -- use the function ms to adjust models as needed to make them -- more similar to input patterns. makeSGM :: Bounded k => (t -> x) -> Int -> (p -> p -> x) -> (p -> x -> p -> p) -> SGM t x k p -- | The current "time" (number of times the SGM has been trained). time :: Num t => SGM t x k p -> t -- | Returns true if the SGM has no models, false otherwise. isEmpty :: SGM t x k p -> Bool -- | Returns the number of models the SGM currently contains. size :: SGM t x k p -> Int -- | Returns a map from node ID to model. modelMap :: SGM t x k p -> Map k p -- | Returns a map from node ID to counter (number of times the node's -- model has been the closest match to an input pattern). counterMap :: SGM t x k p -> Map k t -- | Returns the model at a specified node. modelAt :: Ord k => SGM t x k p -> k -> p -- | A typical learning function for classifiers. exponential r0 -- d t returns the learning rate at time t. When t = -- 0, the learning rate is r0. Over time the learning rate -- decays exponentially; the decay rate is d. Normally the -- parameters are chosen such that: -- -- exponential :: (Floating a, Integral t) => a -> a -> t -> a -- | classify s p identifies the model s that most -- closely matches the pattern p. It will not make any changes -- to the classifier. (I.e., it will not change the models or match -- counts.) Returns the ID of the node with the best matching model, the -- difference between the best matching model and the pattern, and the -- SGM labels paired with the model and the difference between the input -- and the corresponding model. The final paired list is sorted in -- decreasing order of similarity. classify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x)) -- | trainAndClassify s p identifies the model in -- s that most closely matches p, and updates it to be -- a somewhat better match. If necessary, it will create a new node and -- model. Returns the ID of the node with the best matching model, the -- difference between the pattern and the best matching model in the -- original SGM (before training or adding a new model), the differences -- between the pattern and each model in the updated SGM, and the updated -- SGM. trainAndClassify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p) -- | train s p identifies the model in s that most -- closely matches p, and updates it to be a somewhat better -- match. If necessary, it will create a new node and model. train :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> SGM t x k p -- | For each pattern p in ps, trainBatch s -- ps identifies the model in s that most closely matches -- p, and updates it to be a somewhat better match. trainBatch :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> [p] -> SGM t x k p -- | A module containing private SGM internals. Most developers -- should use SGM instead. This module is subject to change -- without notice. module Data.Datamining.Clustering.SGMInternal -- | A typical learning function for classifiers. exponential r0 -- d t returns the learning rate at time t. When t = -- 0, the learning rate is r0. Over time the learning rate -- decays exponentially; the decay rate is d. Normally the -- parameters are chosen such that: -- -- exponential :: (Floating a, Integral t) => a -> a -> t -> a -- | A Simplified Self-Organising Map (SGM). t is the type of the -- counter. x is the type of the learning rate and the -- difference metric. k is the type of the model indices. -- p is the type of the input patterns and models. data SGM t x k p SGM :: Map k (p, t) -> (t -> x) -> Int -> x -> Bool -> (p -> p -> x) -> (p -> x -> p -> p) -> k -> SGM t x k p -- | Maps patterns and match counts to nodes. [toMap] :: SGM t x k p -> Map k (p, t) -- | A function which determines the learning rate for a node. The input -- parameter indicates how many patterns (or pattern batches) have -- previously been presented to the classifier. Typically this is used to -- make the learning rate decay over time. The output is the learning -- rate for that node (the amount by which the node's model should be -- updated to match the target). The learning rate should be between zero -- and one. [learningRate] :: SGM t x k p -> t -> x -- | The maximum number of models this SGM can hold. [maxSize] :: SGM t x k p -> Int -- | The threshold that triggers creation of a new model. [diffThreshold] :: SGM t x k p -> x -- | Delete existing models to make room for new ones? The least useful -- (least frequently matched) models will be deleted first. [allowDeletion] :: SGM t x k p -> Bool -- | A function which compares two patterns and returns a -- non-negative number representing how different the patterns -- are. A result of 0 indicates that the patterns are identical. [difference] :: SGM t x k p -> p -> p -> x -- | A function which updates models. For example, if this function is -- f, then f target amount pattern returns a modified -- copy of pattern that is more similar to target than -- pattern is. The magnitude of the adjustment is controlled by -- the amount parameter, which should be a number between 0 and -- 1. Larger values for amount permit greater adjustments. If -- amount=1, the result should be identical to the -- target. If amount=0, the result should be the -- unmodified pattern. [makeSimilar] :: SGM t x k p -> p -> x -> p -> p -- | Index for the next node to add to the SGM. [nextIndex] :: SGM t x k p -> k -- | makeSGM lr n dt diff ms creates a new SGM that does -- not (yet) contain any models. It will learn at the rate determined by -- the learning function lr, and will be able to hold up to -- n models. It will create a new model based on a pattern -- presented to it when (1) the SGM contains no models, or (2) the -- difference between the pattern and the closest matching model exceeds -- the threshold dt. It will use the function diff to -- measure the similarity between an input pattern and a model. It will -- use the function ms to adjust models as needed to make them -- more similar to input patterns. makeSGM :: Bounded k => (t -> x) -> Int -> x -> Bool -> (p -> p -> x) -> (p -> x -> p -> p) -> SGM t x k p -- | Returns true if the SGM has no models, false otherwise. isEmpty :: SGM t x k p -> Bool -- | Returns the number of models the SGM currently contains. numModels :: SGM t x k p -> Int -- | Returns a map from node ID to model. modelMap :: SGM t x k p -> Map k p -- | Returns a map from node ID to counter (number of times the node's -- model has been the closest match to an input pattern). counterMap :: SGM t x k p -> Map k t -- | Returns the model at a specified node. modelAt :: Ord k => SGM t x k p -> k -> p -- | Returns the current labels. labels :: SGM t x k p -> [k] -- | Returns the current models. models :: SGM t x k p -> [p] -- | Returns the current counters (number of times the node's model has -- been the closest match to an input pattern). counters :: SGM t x k p -> [t] -- | The current "time" (number of times the SGM has been trained). time :: Num t => SGM t x k p -> t -- | Adds a new node to the SGM. addNode :: (Num t, Enum k, Ord k) => p -> SGM t x k p -> SGM t x k p -- | Removes a node from the SGM. Deleted nodes are never re-used. deleteNode :: Ord k => k -> SGM t x k p -> SGM t x k p -- | Increment the match counter. incrementCounter :: (Num t, Ord k) => k -> SGM t x k p -> SGM t x k p -- | Trains the specified node to better match a target. Most users should -- use train, which automatically determines the BMU and -- trains it. trainNode :: (Num t, Ord k) => SGM t x k p -> k -> p -> SGM t x k p -- | Returns the node that has been the BMU least often. leastUsefulNode :: Ord t => SGM t x k p -> k -- | Deletes the node that has been the BMU least often. deleteLeastUsefulNode :: (Ord t, Ord k) => SGM t x k p -> SGM t x k p -- | Adds a new node to the SGM, deleting the least useful node/model if -- necessary to make room. addModel :: (Num t, Ord t, Enum k, Ord k) => p -> SGM t x k p -> SGM t x k p -- | classify s p identifies the model s that most -- closely matches the pattern p. It will not make any changes -- to the classifier. Returns the ID of the node with the best matching -- model, the difference between the best matching model and the pattern, -- and the SGM labels paired with the model and the difference between -- the input and the corresponding model. The final paired list is sorted -- in decreasing order of similarity. classify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x)) -- | Internal method. NOTE: This function may create a new model, but it -- does not modify existing models. classify' :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p) -- | Order models by ascending difference from the input pattern, then by -- creation order (label number). matchOrder :: (Ord a, Ord b) => (a, b) -> (a, b) -> Ordering -- | trainAndClassify s p identifies the model in -- s that most closely matches p, and updates it to be -- a somewhat better match. If necessary, it will create a new node and -- model. Returns the ID of the node with the best matching model, the -- difference between the best matching model and the pattern, the -- differences between the input and each model in the SGM, and the -- updated SGM. trainAndClassify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p) -- | train s p identifies the model in s that most -- closely matches p, and updates it to be a somewhat better -- match. If necessary, it will create a new node and model. train :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> SGM t x k p -- | For each pattern p in ps, trainBatch s -- ps identifies the model in s that most closely matches -- p, and updates it to be a somewhat better match. trainBatch :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> [p] -> SGM t x k p instance (Control.DeepSeq.NFData k, Control.DeepSeq.NFData p, Control.DeepSeq.NFData t, Control.DeepSeq.NFData x) => Control.DeepSeq.NFData (Data.Datamining.Clustering.SGMInternal.SGM t x k p) instance GHC.Generics.Generic (Data.Datamining.Clustering.SGMInternal.SGM t x k p) -- | A Self-generating Model (SGM). An SGM maps input patterns onto a set, -- where each element in the set is a model of the input data. An SGM is -- like a Kohonen Self-organising Map (SOM), except: -- -- -- -- This implementation supports the use of non-numeric patterns. -- -- In layman's terms, a SGM can be useful when you you want to build a -- set of models on some data. A tutorial is available at -- https://github.com/mhwombat/som/wiki. -- -- References: -- -- module Data.Datamining.Clustering.SGM -- | A Simplified Self-Organising Map (SGM). t is the type of the -- counter. x is the type of the learning rate and the -- difference metric. k is the type of the model indices. -- p is the type of the input patterns and models. data SGM t x k p SGM :: Map k (p, t) -> (t -> x) -> Int -> x -> Bool -> (p -> p -> x) -> (p -> x -> p -> p) -> k -> SGM t x k p -- | Maps patterns and match counts to nodes. [toMap] :: SGM t x k p -> Map k (p, t) -- | A function which determines the learning rate for a node. The input -- parameter indicates how many patterns (or pattern batches) have -- previously been presented to the classifier. Typically this is used to -- make the learning rate decay over time. The output is the learning -- rate for that node (the amount by which the node's model should be -- updated to match the target). The learning rate should be between zero -- and one. [learningRate] :: SGM t x k p -> t -> x -- | The maximum number of models this SGM can hold. [maxSize] :: SGM t x k p -> Int -- | The threshold that triggers creation of a new model. [diffThreshold] :: SGM t x k p -> x -- | Delete existing models to make room for new ones? The least useful -- (least frequently matched) models will be deleted first. [allowDeletion] :: SGM t x k p -> Bool -- | A function which compares two patterns and returns a -- non-negative number representing how different the patterns -- are. A result of 0 indicates that the patterns are identical. [difference] :: SGM t x k p -> p -> p -> x -- | A function which updates models. For example, if this function is -- f, then f target amount pattern returns a modified -- copy of pattern that is more similar to target than -- pattern is. The magnitude of the adjustment is controlled by -- the amount parameter, which should be a number between 0 and -- 1. Larger values for amount permit greater adjustments. If -- amount=1, the result should be identical to the -- target. If amount=0, the result should be the -- unmodified pattern. [makeSimilar] :: SGM t x k p -> p -> x -> p -> p -- | Index for the next node to add to the SGM. [nextIndex] :: SGM t x k p -> k -- | makeSGM lr n dt diff ms creates a new SGM that does -- not (yet) contain any models. It will learn at the rate determined by -- the learning function lr, and will be able to hold up to -- n models. It will create a new model based on a pattern -- presented to it when (1) the SGM contains no models, or (2) the -- difference between the pattern and the closest matching model exceeds -- the threshold dt. It will use the function diff to -- measure the similarity between an input pattern and a model. It will -- use the function ms to adjust models as needed to make them -- more similar to input patterns. makeSGM :: Bounded k => (t -> x) -> Int -> x -> Bool -> (p -> p -> x) -> (p -> x -> p -> p) -> SGM t x k p -- | The current "time" (number of times the SGM has been trained). time :: Num t => SGM t x k p -> t -- | Returns true if the SGM has no models, false otherwise. isEmpty :: SGM t x k p -> Bool -- | Returns the number of models the SGM currently contains. numModels :: SGM t x k p -> Int -- | Returns a map from node ID to model. modelMap :: SGM t x k p -> Map k p -- | Returns a map from node ID to counter (number of times the node's -- model has been the closest match to an input pattern). counterMap :: SGM t x k p -> Map k t -- | Returns the model at a specified node. modelAt :: Ord k => SGM t x k p -> k -> p -- | A typical learning function for classifiers. exponential r0 -- d t returns the learning rate at time t. When t = -- 0, the learning rate is r0. Over time the learning rate -- decays exponentially; the decay rate is d. Normally the -- parameters are chosen such that: -- -- exponential :: (Floating a, Integral t) => a -> a -> t -> a -- | classify s p identifies the model s that most -- closely matches the pattern p. It will not make any changes -- to the classifier. Returns the ID of the node with the best matching -- model, the difference between the best matching model and the pattern, -- and the SGM labels paired with the model and the difference between -- the input and the corresponding model. The final paired list is sorted -- in decreasing order of similarity. classify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x)) -- | trainAndClassify s p identifies the model in -- s that most closely matches p, and updates it to be -- a somewhat better match. If necessary, it will create a new node and -- model. Returns the ID of the node with the best matching model, the -- difference between the best matching model and the pattern, the -- differences between the input and each model in the SGM, and the -- updated SGM. trainAndClassify :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p) -- | train s p identifies the model in s that most -- closely matches p, and updates it to be a somewhat better -- match. If necessary, it will create a new node and model. train :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> p -> SGM t x k p -- | For each pattern p in ps, trainBatch s -- ps identifies the model in s that most closely matches -- p, and updates it to be a somewhat better match. trainBatch :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k) => SGM t x k p -> [p] -> SGM t x k p -- | A module containing private SOM internals. Most developers -- should use SOM instead. This module is subject to change -- without notice. module Data.Datamining.Clustering.SOMInternal -- | A typical learning function for classifiers. -- decayingGaussian r0 rf w0 wf tf returns a bell -- curve-shaped function. At time zero, the maximum learning rate -- (applied to the BMU) is r0, and the neighbourhood width is -- w0. Over time the bell curve shrinks and the learning rate -- tapers off, until at time tf, the maximum learning rate -- (applied to the BMU) is rf, and the neighbourhood width is -- wf. Normally the parameters should be chosen such that: -- -- -- -- where << means "is much smaller than" (not the Haskell -- << operator!) decayingGaussian :: Floating x => x -> x -> x -> x -> x -> x -> x -> x -- | A learning function that only updates the BMU and has a constant -- learning rate. stepFunction :: (Num d, Fractional x, Eq d) => x -> t -> d -> x -- | A learning function that updates all nodes with the same, constant -- learning rate. This can be useful for testing. constantFunction :: x -> t -> d -> x -- | A Self-Organising Map (SOM). -- -- Although SOM implements GridMap, most users will -- only need the interface provided by -- Data.Datamining.Clustering.Classifier. If you chose to use -- the GridMap functions, please note: -- --
    --
  1. The functions adjust, and adjustWithKey do not -- increment the counter. You can do so manually with -- incrementCounter.
  2. --
  3. The functions map and mapWithKey are not -- implemented (they just return an error). It would be -- problematic to implement them because the input SOM and the output SOM -- would have to have the same Metric type.
  4. --
data SOM t d gm x k p SOM :: gm p -> (t -> d -> x) -> (p -> p -> x) -> (p -> x -> p -> p) -> t -> SOM t d gm x k p -- | Maps patterns to tiles in a regular grid. In the context of a SOM, the -- tiles are called "nodes" [gridMap] :: SOM t d gm x k p -> gm p -- | A function which determines the how quickly the SOM learns. For -- example, if the function is f, then f t d returns -- the learning rate for a node. The parameter t indicates how -- many patterns (or pattern batches) have previously been presented to -- the classifier. Typically this is used to make the learning rate decay -- over time. The parameter d is the grid distance from the node -- being updated to the BMU (Best Matching Unit). The output is the -- learning rate for that node (the amount by which the node's model -- should be updated to match the target). The learning rate should be -- between zero and one. [learningRate] :: SOM t d gm x k p -> t -> d -> x -- | A function which compares two patterns and returns a -- non-negative number representing how different the patterns -- are. A result of 0 indicates that the patterns are identical. [difference] :: SOM t d gm x k p -> p -> p -> x -- | A function which updates models. If this function is f, then -- f target amount pattern returns a modified copy of -- pattern that is more similar to target than -- pattern is. The magnitude of the adjustment is controlled by -- the amount parameter, which should be a number between 0 and -- 1. Larger values for amount permit greater adjustments. If -- amount=1, the result should be identical to the -- target. If amount=0, the result should be the -- unmodified pattern. [makeSimilar] :: SOM t d gm x k p -> p -> x -> p -> p -- | A counter used as a "time" parameter. If you create the SOM with a -- counter value 0, and don't directly modify it, then the -- counter will represent the number of patterns that this SOM has -- classified. [counter] :: SOM t d gm x k p -> t -- | Internal method. withGridMap :: (gm p -> gm p) -> SOM t d gm x k p -> SOM t d gm x k p -- | Returns the learning function currently being used by the SOM. currentLearningFunction :: Num t => SOM t d gm x k p -> d -> x -- | Extracts the grid and current models from the SOM. A synonym for -- gridMap. toGridMap :: GridMap gm p => SOM t d gm x k p -> gm p -- | Internal method. adjustNode :: (Grid g, k ~ Index g, Num t) => g -> (t -> x) -> (p -> x -> p -> p) -> p -> k -> k -> p -> p -- | Trains the specified node and the neighbourood around it to better -- match a target. Most users should use train, which -- automatically determines the BMU and trains it and its neighbourhood. trainNeighbourhood :: (Grid (gm p), GridMap gm p, Index (BaseGrid gm p) ~ Index (gm p), Num t, Num x, Num d) => SOM t d gm x k p -> Index (gm p) -> p -> SOM t d gm x k p -- | Increment the match counter. incrementCounter :: Num t => SOM t d gm x k p -> SOM t d gm x k p -- | Internal method. justTrain :: (Ord x, Grid (gm p), GridMap gm x, GridMap gm p, Index (BaseGrid gm x) ~ Index (gm p), Index (BaseGrid gm p) ~ Index (gm p), Num t, Num x, Num d) => SOM t d gm x k p -> p -> SOM t d gm x k p instance (Control.DeepSeq.NFData t, Control.DeepSeq.NFData (gm p)) => Control.DeepSeq.NFData (Data.Datamining.Clustering.SOMInternal.SOM t d gm x k p) instance GHC.Generics.Generic (Data.Datamining.Clustering.SOMInternal.SOM t d gm x k p) instance Data.Foldable.Foldable gm => Data.Foldable.Foldable (Data.Datamining.Clustering.SOMInternal.SOM t d gm x k) instance Math.Geometry.GridInternal.Grid (gm p) => Math.Geometry.GridInternal.Grid (Data.Datamining.Clustering.SOMInternal.SOM t d gm x k p) instance (Data.Foldable.Foldable gm, Math.Geometry.GridMap.GridMap gm p, Math.Geometry.GridInternal.Grid (Math.Geometry.GridMap.BaseGrid gm p)) => Math.Geometry.GridMap.GridMap (Data.Datamining.Clustering.SOMInternal.SOM t d gm x k) p instance (Math.Geometry.GridMap.GridMap gm p, k Data.Type.Equality.~ Math.Geometry.GridInternal.Index (Math.Geometry.GridMap.BaseGrid gm p), Math.Geometry.GridInternal.Grid (gm p), Math.Geometry.GridMap.GridMap gm x, k Data.Type.Equality.~ Math.Geometry.GridInternal.Index (gm p), k Data.Type.Equality.~ Math.Geometry.GridInternal.Index (Math.Geometry.GridMap.BaseGrid gm x), GHC.Num.Num t, GHC.Classes.Ord x, GHC.Num.Num x, GHC.Num.Num d) => Data.Datamining.Clustering.Classifier.Classifier (Data.Datamining.Clustering.SOMInternal.SOM t d gm) x k p -- | A Kohonen Self-organising Map (SOM). A SOM maps input patterns onto a -- regular grid (usually two-dimensional) where each node in the grid is -- a model of the input data, and does so using a method which ensures -- that any topological relationships within the input data are also -- represented in the grid. This implementation supports the use of -- non-numeric patterns. -- -- In layman's terms, a SOM can be useful when you you want to discover -- the underlying structure of some data. A tutorial is available at -- https://github.com/mhwombat/som/wiki. -- -- NOTES: -- -- -- -- References: -- -- module Data.Datamining.Clustering.SOM -- | A Self-Organising Map (SOM). -- -- Although SOM implements GridMap, most users will -- only need the interface provided by -- Data.Datamining.Clustering.Classifier. If you chose to use -- the GridMap functions, please note: -- --
    --
  1. The functions adjust, and adjustWithKey do not -- increment the counter. You can do so manually with -- incrementCounter.
  2. --
  3. The functions map and mapWithKey are not -- implemented (they just return an error). It would be -- problematic to implement them because the input SOM and the output SOM -- would have to have the same Metric type.
  4. --
data SOM t d gm x k p SOM :: gm p -> (t -> d -> x) -> (p -> p -> x) -> (p -> x -> p -> p) -> t -> SOM t d gm x k p -- | Maps patterns to tiles in a regular grid. In the context of a SOM, the -- tiles are called "nodes" [gridMap] :: SOM t d gm x k p -> gm p -- | A function which determines the how quickly the SOM learns. For -- example, if the function is f, then f t d returns -- the learning rate for a node. The parameter t indicates how -- many patterns (or pattern batches) have previously been presented to -- the classifier. Typically this is used to make the learning rate decay -- over time. The parameter d is the grid distance from the node -- being updated to the BMU (Best Matching Unit). The output is the -- learning rate for that node (the amount by which the node's model -- should be updated to match the target). The learning rate should be -- between zero and one. [learningRate] :: SOM t d gm x k p -> t -> d -> x -- | A function which compares two patterns and returns a -- non-negative number representing how different the patterns -- are. A result of 0 indicates that the patterns are identical. [difference] :: SOM t d gm x k p -> p -> p -> x -- | A function which updates models. If this function is f, then -- f target amount pattern returns a modified copy of -- pattern that is more similar to target than -- pattern is. The magnitude of the adjustment is controlled by -- the amount parameter, which should be a number between 0 and -- 1. Larger values for amount permit greater adjustments. If -- amount=1, the result should be identical to the -- target. If amount=0, the result should be the -- unmodified pattern. [makeSimilar] :: SOM t d gm x k p -> p -> x -> p -> p -- | A counter used as a "time" parameter. If you create the SOM with a -- counter value 0, and don't directly modify it, then the -- counter will represent the number of patterns that this SOM has -- classified. [counter] :: SOM t d gm x k p -> t -- | Extracts the grid and current models from the SOM. A synonym for -- gridMap. toGridMap :: GridMap gm p => SOM t d gm x k p -> gm p -- | A typical learning function for classifiers. -- decayingGaussian r0 rf w0 wf tf returns a bell -- curve-shaped function. At time zero, the maximum learning rate -- (applied to the BMU) is r0, and the neighbourhood width is -- w0. Over time the bell curve shrinks and the learning rate -- tapers off, until at time tf, the maximum learning rate -- (applied to the BMU) is rf, and the neighbourhood width is -- wf. Normally the parameters should be chosen such that: -- -- -- -- where << means "is much smaller than" (not the Haskell -- << operator!) decayingGaussian :: Floating x => x -> x -> x -> x -> x -> x -> x -> x -- | A learning function that only updates the BMU and has a constant -- learning rate. stepFunction :: (Num d, Fractional x, Eq d) => x -> t -> d -> x -- | A learning function that updates all nodes with the same, constant -- learning rate. This can be useful for testing. constantFunction :: x -> t -> d -> x -- | Trains the specified node and the neighbourood around it to better -- match a target. Most users should use train, which -- automatically determines the BMU and trains it and its neighbourhood. trainNeighbourhood :: (Grid (gm p), GridMap gm p, Index (BaseGrid gm p) ~ Index (gm p), Num t, Num x, Num d) => SOM t d gm x k p -> Index (gm p) -> p -> SOM t d gm x k p -- | Tools for identifying patterns in data. module Data.Datamining.Pattern -- | Adjusts a number to make it more similar to the target. adjustNum :: (Num a, Ord a, Eq a) => a -> a -> a -> a -- | Returns the absolute difference between two numbers. absDifference :: Num a => a -> a -> a -- | adjustVector target amount vector adjusts each element -- of vector to move it closer to the corresponding element of -- target. The amount of adjustment is controlled by the -- learning rate amount, which is a number between 0 and 1. -- Larger values of amount permit more adjustment. If -- amount=1, the result will be identical to the -- target. If amount=0, the result will be the -- unmodified pattern. If target is shorter than -- vector, the result will be the same length as -- target. If target is longer than vector, -- the result will be the same length as vector. adjustVector :: (Num a, Ord a, Eq a) => [a] -> a -> [a] -> [a] -- | Same as adjustVector, except that the result will -- always be the same length as vector. This means that if -- target is shorter than vector, the "leftover" -- elements of vector will be copied the result, unmodified. adjustVectorPreserveLength :: (Num a, Ord a, Eq a) => [a] -> a -> [a] -> [a] -- | Calculates the square of the Euclidean distance between two vectors. euclideanDistanceSquared :: Num a => [a] -> [a] -> a -- | Returns the sum of the squares of the elements of a vector. magnitudeSquared :: Num a => [a] -> a -- | A vector that has been normalised, i.e., the magnitude of the vector = -- 1. data NormalisedVector a -- | Normalises a vector normalise :: Floating a => [a] -> NormalisedVector a -- | A vector that has been scaled so that all elements in the vector are -- between zero and one. To scale a set of vectors, use -- scaleAll. Alternatively, if you can identify a maximum -- and minimum value for each element in a vector, you can scale -- individual vectors using scale. data ScaledVector a -- | Given a vector qs of pairs of numbers, where each pair -- represents the maximum and minimum value to be expected at each index -- in xs, scale qs xs scales the vector -- xs element by element, mapping the maximum value expected at -- that index to one, and the minimum value to zero. scale :: Fractional a => [(a, a)] -> [a] -> ScaledVector a -- | Scales a set of vectors by determining the maximum and minimum values -- at each index in the vector, and mapping the maximum value to one, and -- the minimum value to zero. scaleAll :: (Fractional a, Ord a) => [[a]] -> [ScaledVector a] instance GHC.Show.Show a => GHC.Show.Show (Data.Datamining.Pattern.ScaledVector a) instance GHC.Show.Show a => GHC.Show.Show (Data.Datamining.Pattern.NormalisedVector a)