------------------------------------------------------------------------
-- |
-- Module      :  Data.Datamining.Pattern
-- Copyright   :  (c) Amy de Buitléir 2012-2015
-- License     :  BSD-style
-- Maintainer  :  amy@nualeargais.ie
-- Stability   :  experimental
-- Portability :  portable
--
-- Tools for identifying patterns in data.
--
------------------------------------------------------------------------
{-# LANGUAGE TypeFamilies, FlexibleContexts, MultiParamTypeClasses #-}
module Data.Datamining.Pattern
  (
    -- * Numbers as patterns
    adjustNum,
    absDifference,
    -- * Numeric vectors as patterns
    -- ** Raw vectors
    adjustVector,
    adjustVectorPreserveLength,
    euclideanDistanceSquared,
    magnitudeSquared,
    -- ** Normalised vectors
    NormalisedVector,
    normalise,
    -- ** Scaled vectors
    ScaledVector,
    scale,
    scaleAll
  ) where

import Data.List (foldl')

--
-- Using numbers as patterns.
--

absDifference :: Num a => a -> a -> a
absDifference x y = abs (x - y)

adjustNum :: (Num a, Ord a, Eq a) => a -> a -> a -> a
adjustNum target r x
  | r < 0     = error "Negative learning rate"
  | r > 1     = error "Learning rate > 1"
  | otherwise = adjustNum' r target x

-- Note that parameters are swapped
adjustNum' :: Num a => a -> a -> a -> a
adjustNum' r target x = x + r*(target - x)

--
-- Using numeric vectors as patterns.
--

magnitudeSquared :: Num a => [a] -> a
magnitudeSquared xs =  sum $ map (\x -> x*x) xs

-- | Calculates the square of the Euclidean distance between two
--   vectors.
euclideanDistanceSquared :: Num a => [a] -> [a] -> a
euclideanDistanceSquared xs ys = magnitudeSquared $ zipWith (-) xs ys

-- | @'adjustVector' target amount vector@ adjusts each element of
--   @vector@ to move it closer to the corresponding element of
--   @target@.
--   The amount of adjustment is controlled by the learning rate
--   @amount@, which is a number between 0 and 1.
--   Larger values of @amount@ permit more adjustment.
--   If @amount@=1, the result will be identical to the @target@.
--   If @amount@=0, the result will be the unmodified @pattern@.
--   If @target@ is shorter than @vector@, the result will be the same
--   length as @target@.
--   If @target@ is longer than @vector@, the result will be the same
--   length as @vector@.
adjustVector :: (Num a, Ord a, Eq a) => [a] -> a -> [a] -> [a]
adjustVector ts r xs
  | r < 0     = error "Negative learning rate"
  | r > 1     = error "Learning rate > 1"
  | r == 1     = ts
  | otherwise = zipWith (adjustNum' r) ts xs

-- | Same as @'adjustVector'@, except that the result will always be
--   the same length as @vector@.
--   This means that if @target@ is shorter than @vector@, the
--   "leftover" elements of @vector@ will be copied the result,
--   unmodified.
adjustVectorPreserveLength :: (Num a, Ord a, Eq a) => [a] -> a -> [a] -> [a]
adjustVectorPreserveLength ts r xs
  | r < 0     = error "Negative learning rate"
  | r > 1     = error "Learning rate > 1"
  | r == 1     = ts
  | otherwise = avpl ts r xs

avpl :: (Num a, Ord a, Eq a) => [a] -> a -> [a] -> [a]
avpl _ _ [] = []
avpl [] _ x = x
avpl (t:ts) r (x:xs) = (adjustNum' r t x) : (avpl ts r xs)

-- | A vector that has been normalised, i.e., the magnitude of the
--   vector = 1.
data NormalisedVector a = NormalisedVector [a] deriving Show

-- | Normalises a vector
normalise :: Floating a => [a] -> NormalisedVector a
normalise xs = NormalisedVector $ map (/x) xs
  where x = norm xs

norm :: Floating a => [a] -> a
norm xs = sqrt $ sum (map f xs)
  where f x = x*x

-- | A vector that has been scaled so that all elements in the vector
--   are between zero and one. To scale a set of vectors, use
--   @'scaleAll'@. Alternatively, if you can identify a maximum and
--   minimum value for each element in a vector, you can scale
--   individual vectors using @'scale'@.
data ScaledVector a = ScaledVector [a] deriving Show

-- | Given a vector @qs@ of pairs of numbers, where each pair represents
--   the maximum and minimum value to be expected at each index in
--   @xs@, @'scale' qs xs@ scales the vector @xs@ element by element,
--   mapping the maximum value expected at that index to one, and the
--   minimum value to zero.
scale :: Fractional a => [(a,a)] -> [a] -> ScaledVector a
scale qs xs = ScaledVector $ zipWith scaleValue qs xs

-- | Scales a set of vectors by determining the maximum and minimum
--   values at each index in the vector, and mapping the maximum
--   value to one, and the minimum value to zero.
scaleAll :: (Fractional a, Ord a) => [[a]] -> [ScaledVector a]
scaleAll xss = map (scale qs) xss
  where qs = quantify xss

scaleValue :: Fractional a => (a,a) -> a -> a
scaleValue (minX,maxX) x = (x - minX) / (maxX-minX)

quantify :: Ord a => [[a]] -> [(a,a)]
quantify xss = foldl' quantify' qs (tail xss)
  where qs = zip (head xss) (head xss)

quantify' :: Ord a => [(a,a)] -> [a] -> [(a,a)]
quantify' = zipWith f
  where f (minX, maxX) x = (min minX x, max maxX x)