------------------------------------------------------------------------
-- |
-- Module      :  Data.Datamining.Clustering.Classifier
-- Copyright   :  (c) 2012-2021 Amy de Buitléir
-- License     :  BSD-style
-- Maintainer  :  amy@nualeargais.ie
-- Stability   :  experimental
-- Portability :  portable
--
-- Tools for identifying patterns in data.
--
------------------------------------------------------------------------
{-# LANGUAGE FlexibleContexts      #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TypeFamilies          #-}
module Data.Datamining.Clustering.Classifier
  (
    Classifier(..)
  ) where

import           Data.List (minimumBy)
import           Data.Ord  (comparing)

-- | A machine which learns to classify input patterns.
--   Minimal complete definition: @trainBatch@, @reportAndTrain@.
class Classifier (c :: * -> * -> * -> *) v k p where
  -- | Returns a list of index\/model pairs.
  toList :: c v k p -> [(k, p)]

  -- | Returns the number of models this classifier can learn.
  numModels :: c v k p -> Int

  -- | Returns the current models of the classifier.
  models :: c v k p -> [p]

  -- | @'differences' c target@ returns the indices of all nodes in
  --   @c@, paired with the difference between @target@ and the
  --   node's model.
  differences :: c v k p -> p -> [(k, v)]

  -- | @classify c target@ returns the index of the node in @c@
  --   whose model best matches the @target@.
  classify :: Ord v => c v k p -> p -> k
  classify c v k p
c p
p = [(k, v)] -> k
forall b p. Ord b => [(p, b)] -> p
f ([(k, v)] -> k) -> [(k, v)] -> k
forall a b. (a -> b) -> a -> b
$ c v k p -> p -> [(k, v)]
forall (c :: * -> * -> * -> *) v k p.
Classifier c v k p =>
c v k p -> p -> [(k, v)]
differences c v k p
c p
p
    where f :: [(p, b)] -> p
f [] = [Char] -> p
forall a. HasCallStack => [Char] -> a
error [Char]
"classifier has no models"
          f [(p, b)]
xs = (p, b) -> p
forall a b. (a, b) -> a
fst ((p, b) -> p) -> (p, b) -> p
forall a b. (a -> b) -> a -> b
$ ((p, b) -> (p, b) -> Ordering) -> [(p, b)] -> (p, b)
forall (t :: * -> *) a.
Foldable t =>
(a -> a -> Ordering) -> t a -> a
minimumBy (((p, b) -> b) -> (p, b) -> (p, b) -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (p, b) -> b
forall a b. (a, b) -> b
snd) [(p, b)]
xs

  -- | @'train' c target@ returns a modified copy
  --   of the classifier @c@ that has partially learned the @target@.
  train :: c v k p -> p -> c v k p
  train c v k p
c p
p = c v k p
c'
    where (k
_, [(k, v)]
_, c v k p
c') = c v k p -> p -> (k, [(k, v)], c v k p)
forall (c :: * -> * -> * -> *) v k p.
Classifier c v k p =>
c v k p -> p -> (k, [(k, v)], c v k p)
reportAndTrain c v k p
c p
p

  -- | @'trainBatch' c targets@ returns a modified copy
  --   of the classifier @c@ that has partially learned the @targets@.
  trainBatch :: c v k p -> [p] -> c v k p

  -- | @'classifyAndTrain' c target@ returns a tuple containing the
  --   index of the node in @c@ whose model best matches the input
  --   @target@, and a modified copy of the classifier @c@ that has
  --   partially learned the @target@. Invoking @classifyAndTrain c p@
  --   may be faster than invoking @(p `classify` c, train c p)@, but
  --   they
  --   should give identical results.
  classifyAndTrain :: c v k p -> p -> (k, c v k p)
  classifyAndTrain c v k p
c p
p = (k
bmu, c v k p
c')
    where (k
bmu, [(k, v)]
_, c v k p
c') = c v k p -> p -> (k, [(k, v)], c v k p)
forall (c :: * -> * -> * -> *) v k p.
Classifier c v k p =>
c v k p -> p -> (k, [(k, v)], c v k p)
reportAndTrain c v k p
c p
p

  -- | @'diffAndTrain' c target@ returns a tuple containing:
  --   1. The indices of all nodes in @c@, paired with the difference
  --      between @target@ and the node's model
  --   2. A modified copy of the classifier @c@ that has partially
  --      learned the @target@.
  --   Invoking @diffAndTrain c p@ may be faster than invoking
  --   @(p `diff` c, train c p)@, but they should give identical
  --   results.
  diffAndTrain :: c v k p -> p -> ([(k, v)], c v k p)
  diffAndTrain c v k p
c p
p = ([(k, v)]
ds, c v k p
c')
    where (k
_, [(k, v)]
ds, c v k p
c') = c v k p -> p -> (k, [(k, v)], c v k p)
forall (c :: * -> * -> * -> *) v k p.
Classifier c v k p =>
c v k p -> p -> (k, [(k, v)], c v k p)
reportAndTrain c v k p
c p
p

  -- | @'reportAndTrain' c f target@ returns a tuple containing:
  --   1. The index of the node in @c@ whose model best matches the
  --      input @target@
  --   2. The indices of all nodes in @c@, paired with the difference
  --      between @target@ and the node's model
  --   3. A modified copy of the classifier @c@ that has partially
  --      learned the @target@
  --   Invoking @diffAndTrain c p@ may be faster than invoking
  --   @(p `diff` c, train c p)@, but they should give identical
  --   results.
  reportAndTrain :: c v k p -> p -> (k, [(k, v)], c v k p)