src/Learning/Perceptron.hs

--------------------------------------------------------------------------------
-- |
-- Module      :  Learning.Perceptron
-- Copyright   :  (c) 2013 Stefan Holdermans
-- License     :  BSD-style
-- Maintainer  :  stefan@vectorfabrics.com
-- Stability   :  provisional
-- Portability :  portable
--
-- An implementation of the perceptron learning algorithm for supervised binary
-- linear classification.
--
-- A binary classifier maps so-called feature vectors to Boolean values.
-- In the case of linear binary classification the decision on what Boolean a
-- vector is mapped to is based on the value of a linear combination of the
-- vector's components.
-- The perceptron is an online algorithm for learning such a linear mapping from
-- a set of training data.
--
-- Training data are provided as samples consisting of a feature vector and the
-- Boolean it should be mapped to.
-- Hence, the perceptron implements a form of supervised learning.
-- Only if the training vectors are linearly separable, the perceptron algorithm
-- is guaranteed to terminate.
--
--------------------------------------------------------------------------------

module Learning.Perceptron (
    pla        -- :: (Num a, Ord a) => [([a], Bool)] -> [a] -> Bool
  ) where

-- The algorithm models its hypotheses (potential classifiers) by weight
-- vectors; the classifier corresponding to such a weight vector @ws@ maps a
-- feature vector @xs@ to @True@ if and only if
--
--   @sum (zipWith (*) ws xs) + b > 0@,
--
-- where @b@ is the so-called bias term. By extending each feature vector with
-- an artifical component that is always @0@, we can rewrite this inequality to
--
--   @sum (zipWith (*) (b : ws) (1 : xs)) > 0@,
--
-- so that each hypothesis can be represented by a single list of vector
-- components.
--
-- Starting with an initial hypothesis the algorithm proceeds by selecting in
-- each iteration an element from the training set that does not agree with the
-- hypothesis and then adjusts the hypothesis to make it hold for the selected
-- element.
-- The algorithm terminates when all elements in the training set agree with the
-- hypothesis.

--------------------------------------------------------------------------------
-- Samples
--------------------------------------------------------------------------------

-- An augmented input vector, i.e., an input vector with a constant artificial
-- component for multiplication with the bias term prepended.
newtype Aug a = A [a]

-- Augment an input vector by prepending a constant artificial bias-term
-- component.
aug :: Num a => [a] -> Aug a
aug xs = A (1 : xs)

-- A training sample: an augmented classification, i.e., an augmented input
-- vector and the corresponding desired output.
data Sample a = S (Aug a) !Bool

--------------------------------------------------------------------------------
-- Hypotheses
--------------------------------------------------------------------------------

-- A hypothesis: a weight vector with a bias term prepended
newtype Hyp a = H [a]

-- Apply the hypothesis to the augmented input vector.
ap :: (Num a, Ord a) => Hyp a -> Aug a -> Bool
ap (H ws) (A xs) = sum (zipWith (*) ws xs) > 0

-- Test the hypothesis against the training sample.
test :: (Num a, Ord a) => Hyp a -> Sample a -> Bool
test h (S a y) = ap h a == y

-- Adjust the hypothesis toward the sample.
adjust :: Num a => Hyp a -> Sample a -> Hyp a
adjust (H ws) (S (A xs) y) = H (zipWith op ws xs)
  where
    op | y         = (+)
       | otherwise = (-)

-- "Promote" the hypothesis to a classifier.
lift :: (Num a, Ord a) => Hyp a -> [a] -> Bool
lift h xs = ap h (aug xs)

--------------------------------------------------------------------------------
-- Algorithm
--------------------------------------------------------------------------------

-- | Return a classifier that agrees with the given list of classifications.
-- /Only terminates if the given classifications are linearly separable!/
pla :: (Num a, Ord a) => [([a], Bool)] -> [a] -> Bool
pla ds = go h0 ss
  where
    ss = [S (aug xs) y | (xs, y) <- ds]  -- training samples
    h0 = H (repeat 0)                    -- initial hypothesis

    -- iterate
    go h []                    = lift h
    go h (s : ss') | test h s  = go h ss'
                   | otherwise = go (adjust h s) ss