------------------------------------------------------------------------
-- |
-- Module      :  Data.Datamining.Clustering.SGM4Internal
-- Copyright   :  (c) 2012-2021 Amy de Buitléir
-- License     :  BSD-style
-- Maintainer  :  amy@nualeargais.ie
-- Stability   :  experimental
-- Portability :  portable
--
-- A module containing private @SGM@ internals. Most developers should
-- use @SGM@ instead. This module is subject to change without notice.
--
------------------------------------------------------------------------
{-# LANGUAGE DeriveAnyClass        #-}
{-# LANGUAGE DeriveGeneric         #-}
{-# LANGUAGE FlexibleContexts      #-}
{-# LANGUAGE FlexibleInstances     #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TypeFamilies          #-}
{-# LANGUAGE UndecidableInstances  #-}

module Data.Datamining.Clustering.SGM4Internal where

import           Prelude         hiding (filter, lookup)

import           Control.DeepSeq (NFData)
import           Data.List       (foldl', minimumBy, sortBy, (\\))
import qualified Data.Map.Strict as M
import           Data.Ord        (comparing)
-- import           Data.Ratio      ((%))
import           GHC.Generics    (Generic)

-- | A typical learning function for classifiers.
--   @'exponential' r0 d t@ returns the learning rate at time @t@.
--   When @t = 0@, the learning rate is @r0@.
--   Over time the learning rate decays exponentially; the decay rate is
--   @d@.
--   Normally the parameters are chosen such that:
--
--   * 0 < r0 < 1
--
--   * 0 < d
exponential :: (Floating x, Integral t) => x -> x -> t -> x
exponential :: x -> x -> t -> x
exponential x
r0 x
d t
t = x
r0 x -> x -> x
forall a. Num a => a -> a -> a
* x -> x
forall a. Floating a => a -> a
exp (-x
dx -> x -> x
forall a. Num a => a -> a -> a
*x
t')
  where t' :: x
t' = t -> x
forall a b. (Integral a, Num b) => a -> b
fromIntegral t
t

-- | A Simplified Self-Organising Map (SGM).
--   @t@ is the type of the counter.
--   @x@ is the type of the learning rate and the difference metric.
--   @k@ is the type of the model indices.
--   @p@ is the type of the input patterns and models.
data SGM t x k p = SGM
  {
    -- | Maps patterns and match counts to nodes.
    SGM t x k p -> Map k (p, t)
toMap        :: M.Map k (p, t),
    -- | A function which determines the learning rate for a node.
    --   The input parameter indicates how many patterns (or pattern
    --   batches) have previously been presented to the classifier.
    --   Typically this is used to make the learning rate decay over
    --   time.
    --   The output is the learning rate for that node (the amount by
    --   which the node's model should be updated to match the target).
    --   The learning rate should be between zero and one.
    SGM t x k p -> t -> x
learningRate :: t -> x,
    -- | The maximum number of models this SGM can hold.
    SGM t x k p -> Int
capacity     :: Int,
    -- | A function which compares two patterns and returns a
    --   /non-negative/ number representing how different the patterns
    --   are.
    --   A result of @0@ indicates that the patterns are identical.
    SGM t x k p -> p -> p -> x
difference   :: p -> p -> x,
    -- | A function which updates models.
    --   For example, if this function is @f@, then
    --   @f target amount pattern@ returns a modified copy of @pattern@
    --   that is more similar to @target@ than @pattern@ is.
    --   The magnitude of the adjustment is controlled by the @amount@
    --   parameter, which should be a number between 0 and 1.
    --   Larger values for @amount@ permit greater adjustments.
    --   If @amount@=1, the result should be identical to the @target@.
    --   If @amount@=0, the result should be the unmodified @pattern@.
    SGM t x k p -> p -> x -> p -> p
makeSimilar  :: p -> x -> p -> p,
    -- | Index for the next node to add to the SGM.
    SGM t x k p -> k
nextIndex    :: k
  } deriving ((forall x. SGM t x k p -> Rep (SGM t x k p) x)
-> (forall x. Rep (SGM t x k p) x -> SGM t x k p)
-> Generic (SGM t x k p)
forall x. Rep (SGM t x k p) x -> SGM t x k p
forall x. SGM t x k p -> Rep (SGM t x k p) x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
forall t x k p x. Rep (SGM t x k p) x -> SGM t x k p
forall t x k p x. SGM t x k p -> Rep (SGM t x k p) x
$cto :: forall t x k p x. Rep (SGM t x k p) x -> SGM t x k p
$cfrom :: forall t x k p x. SGM t x k p -> Rep (SGM t x k p) x
Generic, SGM t x k p -> ()
(SGM t x k p -> ()) -> NFData (SGM t x k p)
forall a. (a -> ()) -> NFData a
forall t x k p. (NFData k, NFData p, NFData t) => SGM t x k p -> ()
rnf :: SGM t x k p -> ()
$crnf :: forall t x k p. (NFData k, NFData p, NFData t) => SGM t x k p -> ()
NFData)

-- | @'makeSGM' lr n diff ms@ creates a new SGM that does not (yet)
--   contain any models.
--   It will learn at the rate determined by the learning function @lr@,
--   and will be able to hold up to @n@ models.
--   It will create a new model based on a pattern presented to it when
--   the SGM is not at capacity, or a less useful model can be replaced.
--   It will use the function @diff@ to measure the similarity between
--   an input pattern and a model.
--   It will use the function @ms@ to adjust models as needed to make
--   them more similar to input patterns.
makeSGM
  :: Bounded k
    => (t -> x) -> Int -> (p -> p -> x) -> (p -> x -> p -> p) -> SGM t x k p
makeSGM :: (t -> x)
-> Int -> (p -> p -> x) -> (p -> x -> p -> p) -> SGM t x k p
makeSGM t -> x
lr Int
n p -> p -> x
diff p -> x -> p -> p
ms =
  if Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0
    then [Char] -> SGM t x k p
forall a. HasCallStack => [Char] -> a
error [Char]
"max size for SGM <= 0"
    else Map k (p, t)
-> (t -> x)
-> Int
-> (p -> p -> x)
-> (p -> x -> p -> p)
-> k
-> SGM t x k p
forall t x k p.
Map k (p, t)
-> (t -> x)
-> Int
-> (p -> p -> x)
-> (p -> x -> p -> p)
-> k
-> SGM t x k p
SGM Map k (p, t)
forall k a. Map k a
M.empty t -> x
lr Int
n p -> p -> x
diff p -> x -> p -> p
ms k
forall a. Bounded a => a
minBound

-- | Returns true if the SGM has no models, false otherwise.
isEmpty :: SGM t x k p -> Bool
isEmpty :: SGM t x k p -> Bool
isEmpty = Map k (p, t) -> Bool
forall k a. Map k a -> Bool
M.null (Map k (p, t) -> Bool)
-> (SGM t x k p -> Map k (p, t)) -> SGM t x k p -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SGM t x k p -> Map k (p, t)
forall t x k p. SGM t x k p -> Map k (p, t)
toMap

-- | Returns the number of models the SGM currently contains.
size :: SGM t x k p -> Int
size :: SGM t x k p -> Int
size = Map k (p, t) -> Int
forall k a. Map k a -> Int
M.size (Map k (p, t) -> Int)
-> (SGM t x k p -> Map k (p, t)) -> SGM t x k p -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SGM t x k p -> Map k (p, t)
forall t x k p. SGM t x k p -> Map k (p, t)
toMap

-- | Returns a map from node ID to model.
modelMap :: SGM t x k p -> M.Map k p
modelMap :: SGM t x k p -> Map k p
modelMap = ((p, t) -> p) -> Map k (p, t) -> Map k p
forall a b k. (a -> b) -> Map k a -> Map k b
M.map (p, t) -> p
forall a b. (a, b) -> a
fst (Map k (p, t) -> Map k p)
-> (SGM t x k p -> Map k (p, t)) -> SGM t x k p -> Map k p
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SGM t x k p -> Map k (p, t)
forall t x k p. SGM t x k p -> Map k (p, t)
toMap

-- | Returns a map from node ID to counter (number of times the
--   node's model has been the closest match to an input pattern).
counterMap :: SGM t x k p -> M.Map k t
counterMap :: SGM t x k p -> Map k t
counterMap = ((p, t) -> t) -> Map k (p, t) -> Map k t
forall a b k. (a -> b) -> Map k a -> Map k b
M.map (p, t) -> t
forall a b. (a, b) -> b
snd (Map k (p, t) -> Map k t)
-> (SGM t x k p -> Map k (p, t)) -> SGM t x k p -> Map k t
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SGM t x k p -> Map k (p, t)
forall t x k p. SGM t x k p -> Map k (p, t)
toMap

-- | Returns the model at a specified node.
modelAt :: Ord k => SGM t x k p -> k -> p
modelAt :: SGM t x k p -> k -> p
modelAt SGM t x k p
s k
k = (SGM t x k p -> Map k p
forall t x k p. SGM t x k p -> Map k p
modelMap SGM t x k p
s) Map k p -> k -> p
forall k a. Ord k => Map k a -> k -> a
M.! k
k

-- | Returns the match counter for a specified node.
counterAt :: Ord k => SGM t x k p -> k -> t
counterAt :: SGM t x k p -> k -> t
counterAt SGM t x k p
s k
k = (SGM t x k p -> Map k t
forall t x k p. SGM t x k p -> Map k t
counterMap SGM t x k p
s) Map k t -> k -> t
forall k a. Ord k => Map k a -> k -> a
M.! k
k

-- | Returns the current labels.
labels :: SGM t x k p -> [k]
labels :: SGM t x k p -> [k]
labels = Map k (p, t) -> [k]
forall k a. Map k a -> [k]
M.keys (Map k (p, t) -> [k])
-> (SGM t x k p -> Map k (p, t)) -> SGM t x k p -> [k]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SGM t x k p -> Map k (p, t)
forall t x k p. SGM t x k p -> Map k (p, t)
toMap

-- | The current "time" (number of times the SGM has been trained).
time :: Num t => SGM t x k p -> t
time :: SGM t x k p -> t
time = [t] -> t
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ([t] -> t) -> (SGM t x k p -> [t]) -> SGM t x k p -> t
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((p, t) -> t) -> [(p, t)] -> [t]
forall a b. (a -> b) -> [a] -> [b]
map (p, t) -> t
forall a b. (a, b) -> b
snd ([(p, t)] -> [t])
-> (SGM t x k p -> [(p, t)]) -> SGM t x k p -> [t]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map k (p, t) -> [(p, t)]
forall k a. Map k a -> [a]
M.elems (Map k (p, t) -> [(p, t)])
-> (SGM t x k p -> Map k (p, t)) -> SGM t x k p -> [(p, t)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SGM t x k p -> Map k (p, t)
forall t x k p. SGM t x k p -> Map k (p, t)
toMap

-- | Adds a new node to the SGM.
addNode
  :: (Num t, Bounded k, Enum k, Ord k)
    => SGM t x k p -> p -> SGM t x k p
addNode :: SGM t x k p -> p -> SGM t x k p
addNode SGM t x k p
s p
p = SGM t x k p -> k -> p -> SGM t x k p
forall t k x p.
(Num t, Bounded k, Enum k, Ord k) =>
SGM t x k p -> k -> p -> SGM t x k p
addNodeAt SGM t x k p
s (SGM t x k p -> k
forall t x k p. SGM t x k p -> k
nextIndex SGM t x k p
s) p
p

addNodeAt
  :: (Num t, Bounded k, Enum k, Ord k)
    => SGM t x k p -> k -> p -> SGM t x k p
addNodeAt :: SGM t x k p -> k -> p -> SGM t x k p
addNodeAt SGM t x k p
s k
k p
p
  | SGM t x k p -> Bool
forall t x k p. SGM t x k p -> Bool
atCapacity SGM t x k p
s   = [Char] -> SGM t x k p
forall a. HasCallStack => [Char] -> a
error [Char]
"SGM is full"
  | SGM t x k p
s SGM t x k p -> k -> Bool
forall k t x p. Ord k => SGM t x k p -> k -> Bool
`hasLabel` k
k = [Char] -> SGM t x k p
forall a. HasCallStack => [Char] -> a
error [Char]
"label already exists"
  | Bool
otherwise      = SGM t x k p
s { toMap :: Map k (p, t)
toMap=Map k (p, t)
gm', nextIndex :: k
nextIndex=k
kNext }
  where gm :: Map k (p, t)
gm = SGM t x k p -> Map k (p, t)
forall t x k p. SGM t x k p -> Map k (p, t)
toMap SGM t x k p
s
        gm' :: Map k (p, t)
gm' = k -> (p, t) -> Map k (p, t) -> Map k (p, t)
forall k a. Ord k => k -> a -> Map k a -> Map k a
M.insert k
k (p
p, t
0) Map k (p, t)
gm
        -- kNext = succ . maximum . M.keys $ gm'
        allPossibleIndices :: [k]
allPossibleIndices = k -> k -> [k]
forall a. Enum a => a -> a -> [a]
enumFromTo k
forall a. Bounded a => a
minBound k
forall a. Bounded a => a
maxBound
        usedIndices :: [k]
usedIndices = Map k (p, t) -> [k]
forall k a. Map k a -> [k]
M.keys Map k (p, t)
gm'
        availableIndices :: [k]
availableIndices = [k]
allPossibleIndices [k] -> [k] -> [k]
forall a. Eq a => [a] -> [a] -> [a]
\\ [k]
usedIndices
        kNext :: k
kNext = [k] -> k
forall a. [a] -> a
head [k]
availableIndices

-- | Increments the match counter.
incrementCounter :: (Num t, Ord k) => k -> SGM t x k p -> SGM t x k p
incrementCounter :: k -> SGM t x k p -> SGM t x k p
incrementCounter k
k SGM t x k p
s = SGM t x k p
s { toMap :: Map k (p, t)
toMap=Map k (p, t)
gm' }
  where gm :: Map k (p, t)
gm = SGM t x k p -> Map k (p, t)
forall t x k p. SGM t x k p -> Map k (p, t)
toMap SGM t x k p
s
        gm' :: Map k (p, t)
gm' | k -> Map k (p, t) -> Bool
forall k a. Ord k => k -> Map k a -> Bool
M.member k
k Map k (p, t)
gm = ((p, t) -> (p, t)) -> k -> Map k (p, t) -> Map k (p, t)
forall k a. Ord k => (a -> a) -> k -> Map k a -> Map k a
M.adjust (p, t) -> (p, t)
forall b a. Num b => (a, b) -> (a, b)
inc k
k Map k (p, t)
gm
            | Bool
otherwise     = [Char] -> Map k (p, t)
forall a. HasCallStack => [Char] -> a
error [Char]
"no such node"
        inc :: (a, b) -> (a, b)
inc (a
p, b
t) = (a
p, b
tb -> b -> b
forall a. Num a => a -> a -> a
+b
1)

-- | Trains the specified node to better match a target.
--   Most users should use @'train'@, which automatically determines
--   the BMU and trains it.
trainNode
  :: (Num t, Ord k)
    => SGM t x k p -> k -> p -> SGM t x k p
trainNode :: SGM t x k p -> k -> p -> SGM t x k p
trainNode SGM t x k p
s k
k p
target = SGM t x k p
s { toMap :: Map k (p, t)
toMap=Map k (p, t)
gm' }
  where gm :: Map k (p, t)
gm = SGM t x k p -> Map k (p, t)
forall t x k p. SGM t x k p -> Map k (p, t)
toMap SGM t x k p
s
        gm' :: Map k (p, t)
gm' = ((p, t) -> (p, t)) -> k -> Map k (p, t) -> Map k (p, t)
forall k a. Ord k => (a -> a) -> k -> Map k a -> Map k a
M.adjust (p, t) -> (p, t)
tweakModel k
k Map k (p, t)
gm
        r :: x
r = (SGM t x k p -> t -> x
forall t x k p. SGM t x k p -> t -> x
learningRate SGM t x k p
s) (SGM t x k p -> t
forall t x k p. Num t => SGM t x k p -> t
time SGM t x k p
s)
        tweakModel :: (p, t) -> (p, t)
tweakModel (p
p, t
t) = (SGM t x k p -> p -> x -> p -> p
forall t x k p. SGM t x k p -> p -> x -> p -> p
makeSimilar SGM t x k p
s p
target x
r p
p, t
t)

hasLabel :: Ord k => SGM t x k p -> k -> Bool
hasLabel :: SGM t x k p -> k -> Bool
hasLabel SGM t x k p
s k
k = k -> Map k (p, t) -> Bool
forall k a. Ord k => k -> Map k a -> Bool
M.member k
k (Map k (p, t) -> Bool)
-> (SGM t x k p -> Map k (p, t)) -> SGM t x k p -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SGM t x k p -> Map k (p, t)
forall t x k p. SGM t x k p -> Map k (p, t)
toMap (SGM t x k p -> Bool) -> SGM t x k p -> Bool
forall a b. (a -> b) -> a -> b
$ SGM t x k p
s

imprint
  :: (Num t, Ord t, Fractional x, Num x, Ord x,
     Bounded k, Enum k, Ord k)
  => SGM t x k p -> k -> p -> SGM t x k p
imprint :: SGM t x k p -> k -> p -> SGM t x k p
imprint SGM t x k p
s k
k p
p
  | SGM t x k p
s SGM t x k p -> k -> Bool
forall k t x p. Ord k => SGM t x k p -> k -> Bool
`hasLabel` k
k = SGM t x k p -> k -> p -> SGM t x k p
forall t k x p.
(Num t, Ord k) =>
SGM t x k p -> k -> p -> SGM t x k p
trainNode SGM t x k p
s k
k p
p
  | SGM t x k p -> Bool
forall t x k p. SGM t x k p -> Bool
atCapacity SGM t x k p
s   = SGM t x k p -> p -> SGM t x k p
forall t x k p.
(Num t, Ord t, Fractional x, Num x, Ord x, Bounded k, Enum k,
 Ord k) =>
SGM t x k p -> p -> SGM t x k p
train SGM t x k p
s p
p
  | Bool
otherwise      = SGM t x k p -> k -> p -> SGM t x k p
forall t k x p.
(Num t, Bounded k, Enum k, Ord k) =>
SGM t x k p -> k -> p -> SGM t x k p
addNodeAt SGM t x k p
s k
k p
p

imprintBatch
  :: (Num t, Ord t, Fractional x, Num x, Ord x,
     Bounded k, Enum k, Ord k)
  => SGM t x k p -> [(k, p)] -> SGM t x k p
imprintBatch :: SGM t x k p -> [(k, p)] -> SGM t x k p
imprintBatch = (SGM t x k p -> (k, p) -> SGM t x k p)
-> SGM t x k p -> [(k, p)] -> SGM t x k p
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' SGM t x k p -> (k, p) -> SGM t x k p
forall x t k p.
(Fractional x, Num t, Bounded k, Enum k, Ord t, Ord x, Ord k) =>
SGM t x k p -> (k, p) -> SGM t x k p
imprintOne
  where imprintOne :: SGM t x k p -> (k, p) -> SGM t x k p
imprintOne SGM t x k p
s' (k
k, p
p) = SGM t x k p -> k -> p -> SGM t x k p
forall t x k p.
(Num t, Ord t, Fractional x, Num x, Ord x, Bounded k, Enum k,
 Ord k) =>
SGM t x k p -> k -> p -> SGM t x k p
imprint SGM t x k p
s' k
k p
p

-- | Calculates the difference between all pairs of non-identical
--   labels in the SGM.
modelDiffs :: (Eq k, Ord k) => SGM t x k p -> [((k, k), x)]
modelDiffs :: SGM t x k p -> [((k, k), x)]
modelDiffs SGM t x k p
s = ((k, k) -> ((k, k), x)) -> [(k, k)] -> [((k, k), x)]
forall a b. (a -> b) -> [a] -> [b]
map (k, k) -> ((k, k), x)
f ([(k, k)] -> [((k, k), x)]) -> [(k, k)] -> [((k, k), x)]
forall a b. (a -> b) -> a -> b
$ SGM t x k p -> [(k, k)]
forall k t x p. Eq k => SGM t x k p -> [(k, k)]
labelPairs SGM t x k p
s
  where f :: (k, k) -> ((k, k), x)
f (k
k, k
k') = ( (k
k, k
k'),
                      SGM t x k p -> p -> p -> x
forall t x k p. SGM t x k p -> p -> p -> x
difference SGM t x k p
s (SGM t x k p
s SGM t x k p -> k -> p
forall k t x p. Ord k => SGM t x k p -> k -> p
`modelAt` k
k) (SGM t x k p
s SGM t x k p -> k -> p
forall k t x p. Ord k => SGM t x k p -> k -> p
`modelAt` k
k') )

-- | Generates all pairs of non-identical labels in the SGM.
labelPairs :: Eq k => SGM t x k p -> [(k, k)]
labelPairs :: SGM t x k p -> [(k, k)]
labelPairs SGM t x k p
s = (k -> [(k, k)]) -> [k] -> [(k, k)]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (SGM t x k p -> k -> [(k, k)]
forall k t x p. Eq k => SGM t x k p -> k -> [(k, k)]
labelPairs' SGM t x k p
s) ([k] -> [(k, k)]) -> [k] -> [(k, k)]
forall a b. (a -> b) -> a -> b
$ SGM t x k p -> [k]
forall t x k p. SGM t x k p -> [k]
labels SGM t x k p
s

-- | Pairs a node label with all labels except itself.
labelPairs' :: Eq k => SGM t x k p -> k -> [(k, k)]
labelPairs' :: SGM t x k p -> k -> [(k, k)]
labelPairs' SGM t x k p
s k
k = (k -> (k, k)) -> [k] -> [(k, k)]
forall a b. (a -> b) -> [a] -> [b]
map (\k
k' -> (k
k, k
k')) ([k] -> [(k, k)]) -> [k] -> [(k, k)]
forall a b. (a -> b) -> a -> b
$ SGM t x k p -> [k]
forall t x k p. SGM t x k p -> [k]
labels SGM t x k p
s [k] -> [k] -> [k]
forall a. Eq a => [a] -> [a] -> [a]
\\ [k
k]

-- | Returns the labels of the two most similar models, and the
--   difference between them.
twoMostSimilar :: (Ord x, Eq k, Ord k) => SGM t x k p -> (k, k, x)
twoMostSimilar :: SGM t x k p -> (k, k, x)
twoMostSimilar SGM t x k p
s
  | SGM t x k p -> Int
forall t x k p. SGM t x k p -> Int
size SGM t x k p
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
2 = [Char] -> (k, k, x)
forall a. HasCallStack => [Char] -> a
error [Char]
"there aren't two models to merge"
  | Bool
otherwise  = (k
k, k
k', x
x)
  where ((k
k, k
k'), x
x) = (((k, k), x) -> ((k, k), x) -> Ordering)
-> [((k, k), x)] -> ((k, k), x)
forall (t :: * -> *) a.
Foldable t =>
(a -> a -> Ordering) -> t a -> a
minimumBy ((((k, k), x) -> x) -> ((k, k), x) -> ((k, k), x) -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing ((k, k), x) -> x
forall a b. (a, b) -> b
snd) ([((k, k), x)] -> ((k, k), x)) -> [((k, k), x)] -> ((k, k), x)
forall a b. (a -> b) -> a -> b
$ SGM t x k p -> [((k, k), x)]
forall k t x p. (Eq k, Ord k) => SGM t x k p -> [((k, k), x)]
modelDiffs SGM t x k p
s

-- | Deletes the least used (least matched) model in a pair,
--   and returns its label (now available) and the updated SGM.
--   TODO: Modify the other model to make it slightly more similar to
--   the one that was deleted?
mergeModels :: (Num t, Ord t, Ord k) => SGM t x k p -> k -> k -> (k, SGM t x k p)
mergeModels :: SGM t x k p -> k -> k -> (k, SGM t x k p)
mergeModels SGM t x k p
s k
k1 k
k2
  | Bool -> Bool
not (k -> Map k (p, t) -> Bool
forall k a. Ord k => k -> Map k a -> Bool
M.member k
k1 Map k (p, t)
gm) = [Char] -> (k, SGM t x k p)
forall a. HasCallStack => [Char] -> a
error [Char]
"no such node 1"
  | Bool -> Bool
not (k -> Map k (p, t) -> Bool
forall k a. Ord k => k -> Map k a -> Bool
M.member k
k2 Map k (p, t)
gm) = [Char] -> (k, SGM t x k p)
forall a. HasCallStack => [Char] -> a
error [Char]
"no such node 2"
  | Bool
otherwise          = (k
kDelete, SGM t x k p
s { toMap :: Map k (p, t)
toMap = Map k (p, t)
gm' })
  where c1 :: t
c1 = SGM t x k p
s SGM t x k p -> k -> t
forall k t x p. Ord k => SGM t x k p -> k -> t
`counterAt` k
k1
        c2 :: t
c2 = SGM t x k p
s SGM t x k p -> k -> t
forall k t x p. Ord k => SGM t x k p -> k -> t
`counterAt` k
k2
        (k
kKeep, k
kDelete) | t
c1 t -> t -> Bool
forall a. Ord a => a -> a -> Bool
>= t
c2   = (k
k1, k
k2)
                         | Bool
otherwise = (k
k2, k
k1)
        gm :: Map k (p, t)
gm = SGM t x k p -> Map k (p, t)
forall t x k p. SGM t x k p -> Map k (p, t)
toMap SGM t x k p
s
        gm' :: Map k (p, t)
gm' = ((p, t) -> (p, t)) -> k -> Map k (p, t) -> Map k (p, t)
forall k a. Ord k => (a -> a) -> k -> Map k a -> Map k a
M.adjust (p, t) -> (p, t)
f k
kKeep (Map k (p, t) -> Map k (p, t)) -> Map k (p, t) -> Map k (p, t)
forall a b. (a -> b) -> a -> b
$ k -> Map k (p, t) -> Map k (p, t)
forall k a. Ord k => k -> Map k a -> Map k a
M.delete k
kDelete Map k (p, t)
gm
        f :: (p, t) -> (p, t)
f (p
p, t
_) = (p
p, t
c1 t -> t -> t
forall a. Num a => a -> a -> a
+ t
c2)

-- | Returns True if the SOM is full; returns False if it can add one
--   or more models.
atCapacity :: SGM t x k p -> Bool
atCapacity :: SGM t x k p -> Bool
atCapacity SGM t x k p
s = SGM t x k p -> Int
forall t x k p. SGM t x k p -> Int
size SGM t x k p
s Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== SGM t x k p -> Int
forall t x k p. SGM t x k p -> Int
capacity SGM t x k p
s

-- | @'consolidate' s@ finds the two most similar models, and combines
--   them. This can be used to free up more space for learning. It
--   returns the index of the newly free node, and the updated SGM.
consolidate :: (Num t, Ord t, Ord x, Ord k) => SGM t x k p -> (k, SGM t x k p)
consolidate :: SGM t x k p -> (k, SGM t x k p)
consolidate SGM t x k p
s = (k
k3, SGM t x k p
s2)
  where (k
k1, k
k2, x
_) = SGM t x k p -> (k, k, x)
forall x k t p. (Ord x, Eq k, Ord k) => SGM t x k p -> (k, k, x)
twoMostSimilar SGM t x k p
s
        (k
k3, SGM t x k p
s2) = SGM t x k p -> k -> k -> (k, SGM t x k p)
forall t k x p.
(Num t, Ord t, Ord k) =>
SGM t x k p -> k -> k -> (k, SGM t x k p)
mergeModels SGM t x k p
s k
k1 k
k2

consolidateAndAdd
  :: (Num t, Ord t, Ord x, Bounded k, Enum k, Ord k)
  => SGM t x k p -> p -> SGM t x k p
consolidateAndAdd :: SGM t x k p -> p -> SGM t x k p
consolidateAndAdd SGM t x k p
s p
p = SGM t x k p -> p -> SGM t x k p
forall t k x p.
(Num t, Bounded k, Enum k, Ord k) =>
SGM t x k p -> p -> SGM t x k p
addNode SGM t x k p
s' p
p
  where (k
_, SGM t x k p
s') = SGM t x k p -> (k, SGM t x k p)
forall t x k p.
(Num t, Ord t, Ord x, Ord k) =>
SGM t x k p -> (k, SGM t x k p)
consolidate SGM t x k p
s

-- | Set the model for a node.
--   Useful when merging two models and replacing one.
setModel :: (Num t, Ord k) => SGM t x k p -> k -> p -> SGM t x k p
setModel :: SGM t x k p -> k -> p -> SGM t x k p
setModel SGM t x k p
s k
k p
p
  | k -> Map k (p, t) -> Bool
forall k a. Ord k => k -> Map k a -> Bool
M.member k
k Map k (p, t)
gm = [Char] -> SGM t x k p
forall a. HasCallStack => [Char] -> a
error [Char]
"node already exists"
  | Bool
otherwise     = SGM t x k p
s { toMap :: Map k (p, t)
toMap = Map k (p, t)
gm' }
  where gm :: Map k (p, t)
gm = SGM t x k p -> Map k (p, t)
forall t x k p. SGM t x k p -> Map k (p, t)
toMap SGM t x k p
s
        gm' :: Map k (p, t)
gm' = k -> (p, t) -> Map k (p, t) -> Map k (p, t)
forall k a. Ord k => k -> a -> Map k a -> Map k a
M.insert k
k (p
p, t
0) Map k (p, t)
gm

-- | @'classify' s p@ identifies the model @s@ that most closely
--   matches the pattern @p@.
--   It will not make any changes to the classifier.
--   (I.e., it will not change the models or match counts.)
--   Returns the ID of the node with the best matching model,
--   the difference between the best matching model and the pattern,
--   and the SGM labels paired with the model and the difference
--   between the input and the corresponding model.
--   The final paired list is sorted in decreasing order of similarity.
classify
  :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k)
    => SGM t x k p -> p -> (k, x, M.Map k (p, x))
classify :: SGM t x k p -> p -> (k, x, Map k (p, x))
classify SGM t x k p
s p
p
  | SGM t x k p -> Bool
forall t x k p. SGM t x k p -> Bool
isEmpty SGM t x k p
s = [Char] -> (k, x, Map k (p, x))
forall a. HasCallStack => [Char] -> a
error [Char]
"SGM has no models"
  | Bool
otherwise = (k
bmu, x
bmuDiff, Map k (p, x)
report)
  where report :: Map k (p, x)
report
          = (p -> (p, x)) -> Map k p -> Map k (p, x)
forall a b k. (a -> b) -> Map k a -> Map k b
M.map (\p
p0 -> (p
p0, SGM t x k p -> p -> p -> x
forall t x k p. SGM t x k p -> p -> p -> x
difference SGM t x k p
s p
p p
p0)) (Map k p -> Map k (p, x))
-> (SGM t x k p -> Map k p) -> SGM t x k p -> Map k (p, x)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SGM t x k p -> Map k p
forall t x k p. SGM t x k p -> Map k p
modelMap (SGM t x k p -> Map k (p, x)) -> SGM t x k p -> Map k (p, x)
forall a b. (a -> b) -> a -> b
$ SGM t x k p
s
        (k
bmu, x
bmuDiff)
          = [(k, x)] -> (k, x)
forall a. [a] -> a
head ([(k, x)] -> (k, x))
-> (Map k (p, x) -> [(k, x)]) -> Map k (p, x) -> (k, x)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((k, x) -> (k, x) -> Ordering) -> [(k, x)] -> [(k, x)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (k, x) -> (k, x) -> Ordering
forall a b. (Ord a, Ord b) => (a, b) -> (a, b) -> Ordering
matchOrder ([(k, x)] -> [(k, x)])
-> (Map k (p, x) -> [(k, x)]) -> Map k (p, x) -> [(k, x)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((k, (p, x)) -> (k, x)) -> [(k, (p, x))] -> [(k, x)]
forall a b. (a -> b) -> [a] -> [b]
map (\(k
k, (p
_, x
x)) -> (k
k, x
x))
              ([(k, (p, x))] -> [(k, x)])
-> (Map k (p, x) -> [(k, (p, x))]) -> Map k (p, x) -> [(k, x)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map k (p, x) -> [(k, (p, x))]
forall k a. Map k a -> [(k, a)]
M.toList (Map k (p, x) -> (k, x)) -> Map k (p, x) -> (k, x)
forall a b. (a -> b) -> a -> b
$ Map k (p, x)
report

-- | Order models by ascending difference from the input pattern,
--   then by creation order (label number).
matchOrder :: (Ord a, Ord b) => (a, b) -> (a, b) -> Ordering
matchOrder :: (a, b) -> (a, b) -> Ordering
matchOrder (a
a, b
b) (a
c, b
d) = (b, a) -> (b, a) -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (b
b, a
a) (b
d, a
c)

-- | @'trainAndClassify' s p@ identifies the model in @s@ that most
--   closely matches @p@, and updates it to be a somewhat better match.
--   If necessary, it will create a new node and model.
--   Returns the ID of the node with the best matching model,
--   the difference between the pattern and the best matching model
--   in the original SGM (before training or adding a new model),
--   the differences between the pattern and each model in the updated
--   SGM,
--   and the updated SGM.
trainAndClassify
  :: (Num t, Ord t, Fractional x, Num x, Ord x,
     Bounded k, Enum k, Ord k)
    => SGM t x k p -> p -> (k, x, M.Map k (p, x), SGM t x k p)
trainAndClassify :: SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p)
trainAndClassify SGM t x k p
s p
p = SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p)
forall t x k p.
(Num t, Ord t, Num x, Ord x, Enum k, Ord k) =>
SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p)
trainAndClassify' SGM t x k p
s' p
p
  where s' :: SGM t x k p
s' | SGM t x k p -> Int
forall t x k p. SGM t x k p -> Int
size SGM t x k p
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
1 Bool -> Bool -> Bool
&& x
bmuDiff x -> x -> Bool
forall a. Eq a => a -> a -> Bool
== x
0        = SGM t x k p
s
           | SGM t x k p -> Bool
forall t x k p. SGM t x k p -> Bool
atCapacity SGM t x k p
s Bool -> Bool -> Bool
&& SGM t x k p -> Int
forall t x k p. SGM t x k p -> Int
capacity SGM t x k p
s Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1   = SGM t x k p
s
           | SGM t x k p -> Int
forall t x k p. SGM t x k p -> Int
size SGM t x k p
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
2                      = SGM t x k p -> p -> SGM t x k p
forall t k x p.
(Num t, Bounded k, Enum k, Ord k) =>
SGM t x k p -> p -> SGM t x k p
addNode SGM t x k p
s p
p
           | SGM t x k p -> Bool
forall t x k p. SGM t x k p -> Bool
atCapacity SGM t x k p
s Bool -> Bool -> Bool
&& x
bmuDiff x -> x -> Bool
forall a. Ord a => a -> a -> Bool
>= x
cutoff = SGM t x k p -> p -> SGM t x k p
forall t x k p.
(Num t, Ord t, Ord x, Bounded k, Enum k, Ord k) =>
SGM t x k p -> p -> SGM t x k p
consolidateAndAdd SGM t x k p
s p
p
           | SGM t x k p -> Bool
forall t x k p. SGM t x k p -> Bool
atCapacity SGM t x k p
s                    = SGM t x k p
s
           | Bool
otherwise                       = SGM t x k p -> p -> SGM t x k p
forall t k x p.
(Num t, Bounded k, Enum k, Ord k) =>
SGM t x k p -> p -> SGM t x k p
addNode SGM t x k p
s p
p
        (k
_, x
bmuDiff, Map k (p, x)
_) = SGM t x k p -> p -> (k, x, Map k (p, x))
forall t x k p.
(Num t, Ord t, Num x, Ord x, Enum k, Ord k) =>
SGM t x k p -> p -> (k, x, Map k (p, x))
classify SGM t x k p
s p
p
        (k
_, k
_, x
cutoff) = SGM t x k p -> (k, k, x)
forall x k t p. (Ord x, Eq k, Ord k) => SGM t x k p -> (k, k, x)
twoMostSimilar SGM t x k p
s

-- | Internal method.
-- NOTE: This function will adjust the model and update the match
-- for the BMU.
trainAndClassify'
  :: (Num t, Ord t, Num x, Ord x, Enum k, Ord k)
    => SGM t x k p -> p -> (k, x, M.Map k (p, x), SGM t x k p)
trainAndClassify' :: SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p)
trainAndClassify' SGM t x k p
s p
p = (k
bmu2, x
bmuDiff, Map k (p, x)
report, SGM t x k p
s3)
  where (k
bmu, x
bmuDiff, Map k (p, x)
_) = SGM t x k p -> p -> (k, x, Map k (p, x))
forall t x k p.
(Num t, Ord t, Num x, Ord x, Enum k, Ord k) =>
SGM t x k p -> p -> (k, x, Map k (p, x))
classify SGM t x k p
s p
p
        s2 :: SGM t x k p
s2 = k -> SGM t x k p -> SGM t x k p
forall t k x p. (Num t, Ord k) => k -> SGM t x k p -> SGM t x k p
incrementCounter k
bmu SGM t x k p
s
        s3 :: SGM t x k p
s3 = SGM t x k p -> k -> p -> SGM t x k p
forall t k x p.
(Num t, Ord k) =>
SGM t x k p -> k -> p -> SGM t x k p
trainNode SGM t x k p
s2 k
bmu p
p
        (k
bmu2, x
_, Map k (p, x)
report) = SGM t x k p -> p -> (k, x, Map k (p, x))
forall t x k p.
(Num t, Ord t, Num x, Ord x, Enum k, Ord k) =>
SGM t x k p -> p -> (k, x, Map k (p, x))
classify SGM t x k p
s3 p
p

-- | @'train' s p@ identifies the model in @s@ that most closely
--   matches @p@, and updates it to be a somewhat better match.
--   If necessary, it will create a new node and model.
train
  :: (Num t, Ord t, Fractional x, Num x, Ord x,
     Bounded k, Enum k, Ord k)
    => SGM t x k p -> p -> SGM t x k p
train :: SGM t x k p -> p -> SGM t x k p
train SGM t x k p
s p
p = SGM t x k p
s'
  where (k
_, x
_, Map k (p, x)
_, SGM t x k p
s') = SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p)
forall t x k p.
(Num t, Ord t, Fractional x, Num x, Ord x, Bounded k, Enum k,
 Ord k) =>
SGM t x k p -> p -> (k, x, Map k (p, x), SGM t x k p)
trainAndClassify SGM t x k p
s p
p

-- | For each pattern @p@ in @ps@, @'trainBatch' s ps@ identifies the
--   model in @s@ that most closely matches @p@,
--   and updates it to be a somewhat better match.
trainBatch
  :: (Num t, Ord t, Fractional x, Num x, Ord x,
     Bounded k, Enum k, Ord k)
    => SGM t x k p -> [p] -> SGM t x k p
trainBatch :: SGM t x k p -> [p] -> SGM t x k p
trainBatch = (SGM t x k p -> p -> SGM t x k p)
-> SGM t x k p -> [p] -> SGM t x k p
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' SGM t x k p -> p -> SGM t x k p
forall t x k p.
(Num t, Ord t, Fractional x, Num x, Ord x, Bounded k, Enum k,
 Ord k) =>
SGM t x k p -> p -> SGM t x k p
train

-- | Same as @'size'@.
numModels :: SGM t x k p -> Int
numModels :: SGM t x k p -> Int
numModels = SGM t x k p -> Int
forall t x k p. SGM t x k p -> Int
size

-- | Same as @'capacity'@.
maxSize :: SGM t x k p -> Int
maxSize :: SGM t x k p -> Int
maxSize = SGM t x k p -> Int
forall t x k p. SGM t x k p -> Int
capacity

-- | Returns a copy of the SOM containing only models that satisfy the
--   predicate.
filter :: (p -> Bool) -> SGM t x k p -> SGM t x k p
filter :: (p -> Bool) -> SGM t x k p -> SGM t x k p
filter p -> Bool
f SGM t x k p
s = SGM t x k p
s { toMap :: Map k (p, t)
toMap = Map k (p, t)
pm' }
  where pm :: Map k (p, t)
pm = SGM t x k p -> Map k (p, t)
forall t x k p. SGM t x k p -> Map k (p, t)
toMap SGM t x k p
s
        pm' :: Map k (p, t)
pm' = ((p, t) -> Bool) -> Map k (p, t) -> Map k (p, t)
forall a k. (a -> Bool) -> Map k a -> Map k a
M.filter (\(p
p, t
_) -> p -> Bool
f p
p) Map k (p, t)
pm