------------------------------------------------------------------------ -- | -- Module : Data.Datamining.Pattern -- Copyright : (c) Amy de Buitléir 2012-2013 -- License : BSD-style -- Maintainer : amy@nualeargais.ie -- Stability : experimental -- Portability : portable -- -- Tools for identifying patterns in data. -- ------------------------------------------------------------------------ {-# LANGUAGE TypeFamilies, FlexibleContexts, MultiParamTypeClasses #-} module Data.Datamining.Pattern ( -- * Patterns Pattern(..), -- * Numbers as patterns -- $Num adjustNum, absDifference, -- * Numeric vectors as patterns -- ** Raw vectors -- $Vector adjustVector, euclideanDistanceSquared, magnitudeSquared, -- ** Normalised vectors NormalisedVector, normalise, -- ** Scaled vectors ScaledVector, scale, scaleAll ) where import Data.List (foldl') -- | A pattern to be learned or classified. class Pattern p where type Metric p -- | Compares two patterns and returns a /non-negative/ number -- representing how different the patterns are. A result of @0@ -- indicates that the patterns are identical. difference :: p -> p -> Metric p -- | @'makeSimilar' target amount pattern@ returns a modified copy of -- @pattern@ that is more similar to @target@ than @pattern@ is. The -- magnitude of the adjustment is controlled by the @amount@ -- parameter, which should be a number between 0 and 1. Larger -- values for @amount@ permit greater adjustments. If @amount@=1, -- the result should be identical to the @target@. If @amount@=0, -- the result should be the unmodified @pattern@. makeSimilar :: p -> Metric p -> p -> p -- -- Using numbers as patterns. -- absDifference :: Num a => a -> a -> a absDifference x y = abs (x - y) adjustNum :: (Num a, Ord a, Eq a) => a -> a -> a -> a adjustNum target r x | r < 0 = error "Negative learning rate" | r > 1 = error "Learning rate > 1" | r == 1 = x | otherwise = adjustNum' r target x -- Note that parameters are swapped adjustNum' :: Num a => a -> a -> a -> a adjustNum' r target x = x + r*(target - x) {- $Num If you wish to use, say, a @Double@ as a pattern, one option is to use @no-warn-orphans@ and add the following to your code: > instance Double => Pattern Double where > type Metric Double = Double > difference = absDifference > makeSimilar = adjustNum -} -- -- Using numeric vectors as patterns. -- magnitudeSquared :: Num a => [a] -> a magnitudeSquared xs = sum $ map (\x -> x*x) xs -- | Calculates the square of the Euclidean distance between two -- vectors. euclideanDistanceSquared :: Num a => [a] -> [a] -> a euclideanDistanceSquared xs ys = magnitudeSquared $ zipWith (-) xs ys -- | @'adjustVector' target amount vector@ adjusts @vector@ to move it -- closer to @target@. The amount of adjustment is controlled by the -- learning rate @r@, which is a number between 0 and 1. Larger values -- of @r@ permit more adjustment. If @r@=1, the result will be -- identical to the @target@. If @amount@=0, the result will be the -- unmodified @pattern@. adjustVector :: (Num a, Ord a, Eq a) => [a] -> a -> [a] -> [a] adjustVector xs r ys | r < 0 = error "Negative learning rate" | r > 1 = error "Learning rate > 1" | r == 1 = xs | otherwise = zipWith (adjustNum' r) xs ys -- | A vector that has been normalised, i.e., the magnitude of the -- vector = 1. data NormalisedVector a = NormalisedVector [a] deriving Show -- | Normalises a vector normalise :: Floating a => [a] -> NormalisedVector a normalise xs = NormalisedVector $ map (/x) xs where x = norm xs norm :: Floating a => [a] -> a norm xs = sqrt $ sum (map f xs) where f x = x*x instance (Floating a, Fractional a, Ord a, Eq a) => Pattern (NormalisedVector a) where type Metric (NormalisedVector a) = a difference (NormalisedVector xs) (NormalisedVector ys) = euclideanDistanceSquared xs ys makeSimilar (NormalisedVector xs) r (NormalisedVector ys) = normalise $ adjustVector xs r ys -- | A vector that has been scaled so that all elements in the vector -- are between zero and one. To scale a set of vectors, use -- @'scaleAll'@. Alternatively, if you can identify a maximum and -- minimum value for each element in a vector, you can scale -- individual vectors using @'scale'@. data ScaledVector a = ScaledVector [a] deriving Show -- | Given a vector @qs@ of pairs of numbers, where each pair represents -- the maximum and minimum value to be expected at each index in -- @xs@, @'scale' qs xs@ scales the vector @xs@ element by element, -- mapping the maximum value expected at that index to one, and the -- minimum value to zero. scale :: Fractional a => [(a,a)] -> [a] -> ScaledVector a scale qs xs = ScaledVector $ zipWith scaleValue qs xs -- | Scales a set of vectors by determining the maximum and minimum -- values at each index in the vector, and mapping the maximum -- value to one, and the minimum value to zero. scaleAll :: (Fractional a, Ord a) => [[a]] -> [ScaledVector a] scaleAll xss = map (scale qs) xss where qs = quantify xss scaleValue :: Fractional a => (a,a) -> a -> a scaleValue (minX,maxX) x = (x - minX) / (maxX-minX) quantify :: Ord a => [[a]] -> [(a,a)] quantify xss = foldl' quantify' qs (tail xss) where qs = zip (head xss) (head xss) quantify' :: Ord a => [(a,a)] -> [a] -> [(a,a)] quantify' = zipWith f where f (minX, maxX) x = (min minX x, max maxX x) instance (Fractional a, Ord a, Eq a) => Pattern (ScaledVector a) where type Metric (ScaledVector a) = a difference (ScaledVector xs) (ScaledVector ys) = euclideanDistanceSquared xs ys makeSimilar (ScaledVector xs) r (ScaledVector ys) = ScaledVector $ adjustVector xs r ys {- $Vector If you wish to use raw numeric vectors as a pattern, one option is to use @no-warn-orphans@ and add the following to your code: > instance (Floating a, Fractional a, Ord a, Eq a) => Pattern [a] where > type Metric [a] = a > difference = euclideanDistanceSquared > makeSimilar = adjustVector -}