------------------------------------------------------------------------ -- | -- Module : Data.Datamining.Clustering.Classifier -- Copyright : (c) Amy de Buitléir 2012-2018 -- License : BSD-style -- Maintainer : amy@nualeargais.ie -- Stability : experimental -- Portability : portable -- -- Tools for identifying patterns in data. -- ------------------------------------------------------------------------ {-# LANGUAGE TypeFamilies, FlexibleContexts, MultiParamTypeClasses #-} module Data.Datamining.Clustering.Classifier ( Classifier(..) ) where import Data.List (minimumBy) import Data.Ord (comparing) -- | A machine which learns to classify input patterns. -- Minimal complete definition: @trainBatch@, @reportAndTrain@. class Classifier (c :: * -> * -> * -> *) v k p where -- | Returns a list of index\/model pairs. toList :: c v k p -> [(k, p)] -- | Returns the number of models this classifier can learn. numModels :: c v k p -> Int -- | Returns the current models of the classifier. models :: c v k p -> [p] -- | @'differences' c target@ returns the indices of all nodes in -- @c@, paired with the difference between @target@ and the -- node's model. differences :: c v k p -> p -> [(k, v)] -- | @classify c target@ returns the index of the node in @c@ -- whose model best matches the @target@. classify :: Ord v => c v k p -> p -> k classify c p = f $ differences c p where f [] = error "classifier has no models" f xs = fst $ minimumBy (comparing snd) xs -- | @'train' c target@ returns a modified copy -- of the classifier @c@ that has partially learned the @target@. train :: c v k p -> p -> c v k p train c p = c' where (_, _, c') = reportAndTrain c p -- | @'trainBatch' c targets@ returns a modified copy -- of the classifier @c@ that has partially learned the @targets@. trainBatch :: c v k p -> [p] -> c v k p -- | @'classifyAndTrain' c target@ returns a tuple containing the -- index of the node in @c@ whose model best matches the input -- @target@, and a modified copy of the classifier @c@ that has -- partially learned the @target@. Invoking @classifyAndTrain c p@ -- may be faster than invoking @(p `classify` c, train c p)@, but -- they -- should give identical results. classifyAndTrain :: c v k p -> p -> (k, c v k p) classifyAndTrain c p = (bmu, c') where (bmu, _, c') = reportAndTrain c p -- | @'diffAndTrain' c target@ returns a tuple containing: -- 1. The indices of all nodes in @c@, paired with the difference -- between @target@ and the node's model -- 2. A modified copy of the classifier @c@ that has partially -- learned the @target@. -- Invoking @diffAndTrain c p@ may be faster than invoking -- @(p `diff` c, train c p)@, but they should give identical -- results. diffAndTrain :: c v k p -> p -> ([(k, v)], c v k p) diffAndTrain c p = (ds, c') where (_, ds, c') = reportAndTrain c p -- | @'reportAndTrain' c f target@ returns a tuple containing: -- 1. The index of the node in @c@ whose model best matches the -- input @target@ -- 2. The indices of all nodes in @c@, paired with the difference -- between @target@ and the node's model -- 3. A modified copy of the classifier @c@ that has partially -- learned the @target@ -- Invoking @diffAndTrain c p@ may be faster than invoking -- @(p `diff` c, train c p)@, but they should give identical -- results. reportAndTrain :: c v k p -> p -> (k, [(k, v)], c v k p)