------------------------------------------------------------------------
-- |
-- Module      :  Data.Datamining.Clustering.SOMInternal
-- Copyright   :  (c) Amy de Buitléir 2012-2013
-- License     :  BSD-style
-- Maintainer  :  amy@nualeargais.ie
-- Stability   :  experimental
-- Portability :  portable
--
-- A module containing private @SOM@ internals. Most developers should
-- use @SOM@ instead. This module is subject to change without notice.
--
------------------------------------------------------------------------
{-# LANGUAGE UnicodeSyntax, TypeFamilies, FlexibleContexts #-}

module Data.Datamining.Clustering.SOMInternal
  (
    adjustNode,
    adjustVector,
    classify,
    classifyAndTrain,
    diff,
    diffAndTrain,
    euclideanDistanceSquared,
    magnitudeSquared,
    normalise,
    NormalisedVector,
    scale,
    scaleAll,
    ScaledVector,
    train,
    trainBatch,
    Pattern(..)
  ) where

import Data.Eq.Unicode ((≡))
import Data.List (foldl', minimumBy)
import Data.Ord (comparing)
import Math.Geometry.GridMap (GridMap, BaseGrid, mapWithKey, toList)
import Math.Geometry.Grid (Grid, Index, distance)
import qualified Math.Geometry.GridMap as GM (map)

-- | A pattern to be learned or classified by a self-organising map.
class Pattern p where
  type Metric p
  -- | Compares two patterns and returns a /non-negative/ number 
  --   representing how different the patterns are. A result of @0@ 
  --   indicates that the patterns are identical.
  difference ∷ p → p → Metric p
  -- | @'makeSimilar' target amount pattern@ returns a modified copy of
  --   @pattern@ that is more similar to @target@ than @pattern@ is. The 
  --   magnitude of the adjustment is controlled by the @amount@ 
  --   parameter, which should be a number between 0 and 1. Larger 
  --   values for @amount@ permit greater adjustments. If @amount@=1,
  --   the result should be identical to the @target@. If @amount@=0,
  --   the result should be the unmodified @pattern@.
  makeSimilar ∷ p → Metric p → p → p

-- | @'diff' c pattern@ returns the positions of all nodes in 
--   @c@, paired with the difference between @pattern@ and the node's 
--   pattern.
diff 
  ∷ (GridMap gm p, Pattern p, GridMap gm m,
    Metric p ~ m, BaseGrid gm p ~ BaseGrid gm m) ⇒ 
    gm p → p → gm m
diff c pattern = GM.map (pattern `difference`) c

-- | @classify c pattern@ returns the position of the node in @c@ 
--   whose pattern best matches the input @pattern@.
classify
  ∷ (GridMap gm p, Pattern p, GridMap gm m,
      Metric p ~ m, Ord m, k ~ Index (BaseGrid gm p), 
      BaseGrid gm m ~ BaseGrid gm p) ⇒ 
    gm p → p → k
classify c pattern = 
  fst $ minimumBy (comparing snd) $ toList $ diff c pattern

-- | If @f d@ is a function that returns the learning rate to apply to a
--   node based on its distance @d@from the node that best matches the
--   input pattern, then @'train' c f pattern@ returns a modified copy
--   of the classifier @c@ that has partially learned the @target@.
train
  ∷ (Ord m, GridMap gm p, GridMap gm m,
      GridMap gm (Int, p), GridMap gm (m, p), Grid (gm p),
      Pattern p, Metric p ~ m, Index (BaseGrid gm p) ~ Index (gm p),
      BaseGrid gm m ~ BaseGrid gm p) ⇒
    gm p → (Int → m) → p → gm p
train c f pattern = snd $ classifyAndTrain c f pattern

-- | Same as @train@, but applied to multiple patterns.
trainBatch
  ∷ (Ord m, GridMap gm p, GridMap gm m,
      GridMap gm (Int, p), GridMap gm (m, p), Grid (gm p),
      Pattern p, Metric p ~ m, Index (BaseGrid gm p) ~ Index (gm p),
      BaseGrid gm m ~ BaseGrid gm p) ⇒
    gm p → (Int → m) → [p] → gm p
trainBatch c f ps = foldl' (\som → train som f) c ps

-- | If @f@ is a function that returns the learning rate to apply to a
--   node based on its distance from the node that best matches the 
--   @target@, then @'classifyAndTrain' c f target@ returns a tuple 
--   containing the position of the node in @c@ whose pattern best 
--   matches the input @target@, and a modified copy of the classifier 
--   @c@ that has partially learned the @target@.
--   Invoking @classifyAndTrain c f p@ may be faster than invoking
--   @(p `classify` c, train c f p)@, but they should give identical
--   results.
classifyAndTrain
  ∷ (Ord m, GridMap gm p, GridMap gm m,
      GridMap gm (Int, p), GridMap gm (m, p), Grid (gm p),
      Pattern p, Metric p ~ m, Index (BaseGrid gm p) ~ Index (gm p),
      BaseGrid gm m ~ BaseGrid gm p) ⇒
     gm p → (Int → m) → p → (Index (gm p), gm p)
classifyAndTrain c f pattern = (bmu, c')
  where (bmu, _, c') = reportAndTrain c f pattern

-- | If @f@ is a function that returns the learning rate to apply to a
--   node based on its distance from the node that best matches the 
--   @target@, then @'diffAndTrain' c f target@ returns a tuple 
--   containing:
--   1. The positions of all nodes in @c@, paired with the difference
--      between @pattern@ and the node's pattern
--   2. A modified copy of the classifier @c@ that has partially
--      learned the @target@.
--   Invoking @diffAndTrain c f p@ may be faster than invoking
--   @(p `diff` c, train c f p)@, but they should give identical
--   results.
diffAndTrain
  ∷ (Ord m, GridMap gm p, GridMap gm m,
      GridMap gm (Int, p), GridMap gm (m, p), Grid (gm p),
      Pattern p, Metric p ~ m, Index (BaseGrid gm p) ~ Index (gm p),
      BaseGrid gm m ~ BaseGrid gm p) ⇒
     gm p → (Int → m) → p → (gm m, gm p)
diffAndTrain c f pattern = (ds, c')
  where (_, ds, c') = reportAndTrain c f pattern

reportAndTrain
  ∷ (Ord m, GridMap gm p, GridMap gm m,
      GridMap gm (Int, p), GridMap gm (m, p), Grid (gm p),
      Pattern p, Metric p ~ m, Index (BaseGrid gm p) ~ Index (gm p),
      BaseGrid gm m ~ BaseGrid gm p) ⇒
     gm p → (Int → m) → p → (Index (gm p), gm m, gm p)
reportAndTrain c f pattern = (bmu, ds, c')
  where ds = c `diff` pattern
        bmu = fst $ minimumBy (comparing snd) $ toList ds
        c' = trainWithBMU c f bmu pattern

trainWithBMU
  ∷ (GridMap gm p, GridMap gm (Int, p), GridMap gm (m, p),
      Grid (gm p), Pattern p, Metric p ~ m, k ~ Index (BaseGrid gm p), 
      k ~ Index (gm p)) ⇒
    gm p → (Int → m) → k → p → gm p
trainWithBMU c f bmu pattern = GM.map (adjustNode pattern) lrMap
  where dMap = mapWithKey (\k p → (distance c k bmu, p)) c
        lrMap = GM.map (\(d,p) → (f d, p)) dMap

adjustNode ∷ (Pattern p) ⇒ p → (Metric p, p) → p
adjustNode target (r,p) = makeSimilar target r p

--
-- Using numeric vectors as patterns.
-- 

magnitudeSquared ∷ Num a ⇒ [a] → a
magnitudeSquared xs =  sum $ map (\x → x*x) xs

-- | Calculates the square of the Euclidean distance between two 
--   vectors.
euclideanDistanceSquared ∷ Num a ⇒ [a] → [a] → a
euclideanDistanceSquared xs ys = magnitudeSquared $ zipWith (-) xs ys

-- | @'adjustVector' target amount vector@ adjusts @vector@ to move it 
--   closer to @target@. The amount of adjustment is controlled by the
--   learning rate @r@, which is a number between 0 and 1. Larger values
--   of @r@ permit more adjustment. If @r@=1, the result will be 
--   identical to the @target@. If @amount@=0, the result will be the
--   unmodified @pattern@.
adjustVector ∷ (Num a, Ord a, Eq a) ⇒ [a] → a → [a] → [a]
adjustVector xs r ys
  | r < 0     = error "Negative learning rate"
  | r > 1     = error "Learning rate > 1"
  | r ≡ 1     = xs
  | otherwise = zipWith (+) ys deltas
      where ds = zipWith (-) xs ys
            deltas = map (r *) ds

-- | A vector that has been normalised, i.e., the magnitude of the 
--   vector = 1.
data NormalisedVector a = NormalisedVector [a] deriving Show

-- | Normalises a vector
normalise ∷ Floating a ⇒ [a] → NormalisedVector a
normalise xs = NormalisedVector $ map (/x) xs
  where x = norm xs

norm ∷ Floating a ⇒ [a] → a
norm xs = sqrt $ sum (map f xs)
  where f x = x*x

instance (Floating a, Fractional a, Ord a, Eq a) ⇒ 
    Pattern (NormalisedVector a) where
  type Metric (NormalisedVector a) = a
  difference (NormalisedVector xs) (NormalisedVector ys) = 
    euclideanDistanceSquared xs ys
  makeSimilar (NormalisedVector xs) r (NormalisedVector ys) = 
    normalise $ adjustVector xs r ys

-- | A vector that has been scaled so that all elements in the vector 
--   are between zero and one. To scale a set of vectors, use 
--   @'scaleAll'@. Alternatively, if you can identify a maximum and 
--   minimum value for each element in a vector, you can scale 
--   individual vectors using @'scale'@.
data ScaledVector a = ScaledVector [a] deriving Show

-- | Given a vector @qs@ of pairs of numbers, where each pair represents
--   the maximum and minimum value to be expected at each position in 
--   @xs@, @'scale' qs xs@ scales the vector @xs@ element by element, 
--   mapping the maximum value expected at that position to one, and the
--   minimum value to zero.
scale ∷ Fractional a ⇒ [(a,a)] → [a] → ScaledVector a
scale qs xs = ScaledVector $ zipWith scaleValue qs xs

-- | Scales a set of vectors by determining the maximum and minimum
--   values at each position in the vector, and mapping the maximum 
--   value to one, and the minimum value to zero.
scaleAll ∷ (Fractional a, Ord a) ⇒ [[a]] → [ScaledVector a]
scaleAll xss = map (scale qs) xss
  where qs = quantify xss

scaleValue ∷ Fractional a ⇒ (a,a) → a → a
scaleValue (minX,maxX) x = (x - minX) / (maxX-minX)

quantify ∷ Ord a ⇒ [[a]] → [(a,a)]
quantify xss = foldl' quantify' qs (tail xss)
  where qs = zip (head xss) (head xss)

quantify' ∷ Ord a ⇒ [(a,a)] → [a] → [(a,a)]
quantify' = zipWith f
  where f (minX, maxX) x = (min minX x, max maxX x)

instance (Fractional a, Ord a, Eq a) ⇒ Pattern (ScaledVector a) where
  type Metric (ScaledVector a) = a
  difference (ScaledVector xs) (ScaledVector ys) = 
    euclideanDistanceSquared xs ys
  makeSimilar (ScaledVector xs) r (ScaledVector ys) =
    ScaledVector $ adjustVector xs r ys