------------------------------------------------------------------------
-- |
-- Module      :  Data.Datamining.Pattern
-- Copyright   :  (c) 2012-2021 Amy de Buitléir
-- License     :  BSD-style
-- Maintainer  :  amy@nualeargais.ie
-- Stability   :  experimental
-- Portability :  portable
--
-- Tools for identifying patterns in data.
--
------------------------------------------------------------------------
{-# LANGUAGE FlexibleContexts      #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TypeFamilies          #-}
module Data.Datamining.Pattern
  (
    -- * Numbers as patterns
    adjustNum,
    absDifference,
    -- * Numeric vectors as patterns
    -- ** Raw vectors
    adjustVector,
    adjustVectorPreserveLength,
    euclideanDistanceSquared,
    magnitudeSquared,
    -- ** Normalised vectors
    NormalisedVector,
    normalise,
    -- ** Scaled vectors
    ScaledVector,
    scale,
    scaleAll
  ) where

import           Data.List (foldl')

--
-- Using numbers as patterns.
--

-- | Returns the absolute difference between two numbers.
absDifference :: Num a => a -> a -> a
absDifference :: a -> a -> a
absDifference a
x a
y = a -> a
forall a. Num a => a -> a
abs (a
x a -> a -> a
forall a. Num a => a -> a -> a
- a
y)

-- | Adjusts a number to make it more similar to the target.
adjustNum :: (Num a, Ord a, Eq a) => a -> a -> a -> a
adjustNum :: a -> a -> a -> a
adjustNum a
target a
r a
x
  | a
r a -> a -> Bool
forall a. Ord a => a -> a -> Bool
< a
0     = [Char] -> a
forall a. HasCallStack => [Char] -> a
error [Char]
"Negative learning rate"
  | a
r a -> a -> Bool
forall a. Ord a => a -> a -> Bool
> a
1     = [Char] -> a
forall a. HasCallStack => [Char] -> a
error [Char]
"Learning rate > 1"
  | Bool
otherwise = a -> a -> a -> a
forall a. Num a => a -> a -> a -> a
adjustNum' a
r a
target a
x

-- Note that parameters are swapped
adjustNum' :: Num a => a -> a -> a -> a
adjustNum' :: a -> a -> a -> a
adjustNum' a
r a
target a
x = a
x a -> a -> a
forall a. Num a => a -> a -> a
+ a
ra -> a -> a
forall a. Num a => a -> a -> a
*(a
target a -> a -> a
forall a. Num a => a -> a -> a
- a
x)

--
-- Using numeric vectors as patterns.
--

-- | Returns the sum of the squares of the elements of a vector.
magnitudeSquared :: Num a => [a] -> a
magnitudeSquared :: [a] -> a
magnitudeSquared [a]
xs =  [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ([a] -> a) -> [a] -> a
forall a b. (a -> b) -> a -> b
$ (a -> a) -> [a] -> [a]
forall a b. (a -> b) -> [a] -> [b]
map (\a
x -> a
xa -> a -> a
forall a. Num a => a -> a -> a
*a
x) [a]
xs

-- | Calculates the square of the Euclidean distance between two
--   vectors.
euclideanDistanceSquared :: Num a => [a] -> [a] -> a
euclideanDistanceSquared :: [a] -> [a] -> a
euclideanDistanceSquared [a]
xs [a]
ys = [a] -> a
forall a. Num a => [a] -> a
magnitudeSquared ([a] -> a) -> [a] -> a
forall a b. (a -> b) -> a -> b
$ (a -> a -> a) -> [a] -> [a] -> [a]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (-) [a]
xs [a]
ys

-- | @'adjustVector' target amount vector@ adjusts each element of
--   @vector@ to move it closer to the corresponding element of
--   @target@.
--   The amount of adjustment is controlled by the learning rate
--   @amount@, which is a number between 0 and 1.
--   Larger values of @amount@ permit more adjustment.
--   If @amount@=1, the result will be identical to the @target@.
--   If @amount@=0, the result will be the unmodified @pattern@.
--   If @target@ is shorter than @vector@, the result will be the same
--   length as @target@.
--   If @target@ is longer than @vector@, the result will be the same
--   length as @vector@.
adjustVector :: (Num a, Ord a, Eq a) => [a] -> a -> [a] -> [a]
adjustVector :: [a] -> a -> [a] -> [a]
adjustVector [a]
ts a
r [a]
xs
  | a
r a -> a -> Bool
forall a. Ord a => a -> a -> Bool
< a
0     = [Char] -> [a]
forall a. HasCallStack => [Char] -> a
error [Char]
"Negative learning rate"
  | a
r a -> a -> Bool
forall a. Ord a => a -> a -> Bool
> a
1     = [Char] -> [a]
forall a. HasCallStack => [Char] -> a
error [Char]
"Learning rate > 1"
  | a
r a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
1     = [a]
ts
  | Bool
otherwise = (a -> a -> a) -> [a] -> [a] -> [a]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (a -> a -> a -> a
forall a. Num a => a -> a -> a -> a
adjustNum' a
r) [a]
ts [a]
xs

-- | Same as @'adjustVector'@, except that the result will always be
--   the same length as @vector@.
--   This means that if @target@ is shorter than @vector@, the
--   "leftover" elements of @vector@ will be copied the result,
--   unmodified.
adjustVectorPreserveLength :: (Num a, Ord a, Eq a) => [a] -> a -> [a] -> [a]
adjustVectorPreserveLength :: [a] -> a -> [a] -> [a]
adjustVectorPreserveLength [a]
ts a
r [a]
xs
  | a
r a -> a -> Bool
forall a. Ord a => a -> a -> Bool
< a
0     = [Char] -> [a]
forall a. HasCallStack => [Char] -> a
error [Char]
"Negative learning rate"
  | a
r a -> a -> Bool
forall a. Ord a => a -> a -> Bool
> a
1     = [Char] -> [a]
forall a. HasCallStack => [Char] -> a
error [Char]
"Learning rate > 1"
  | a
r a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
1     = [a]
ts
  | Bool
otherwise = [a] -> a -> [a] -> [a]
forall a. (Num a, Ord a, Eq a) => [a] -> a -> [a] -> [a]
avpl [a]
ts a
r [a]
xs

avpl :: (Num a, Ord a, Eq a) => [a] -> a -> [a] -> [a]
avpl :: [a] -> a -> [a] -> [a]
avpl [a]
_ a
_ []          = []
avpl [] a
_ [a]
x          = [a]
x
avpl (a
t:[a]
ts) a
r (a
x:[a]
xs) = (a -> a -> a -> a
forall a. Num a => a -> a -> a -> a
adjustNum' a
r a
t a
x) a -> [a] -> [a]
forall a. a -> [a] -> [a]
: ([a] -> a -> [a] -> [a]
forall a. (Num a, Ord a, Eq a) => [a] -> a -> [a] -> [a]
avpl [a]
ts a
r [a]
xs)

-- | A vector that has been normalised, i.e., the magnitude of the
--   vector = 1.
data NormalisedVector a = NormalisedVector [a] deriving Int -> NormalisedVector a -> ShowS
[NormalisedVector a] -> ShowS
NormalisedVector a -> [Char]
(Int -> NormalisedVector a -> ShowS)
-> (NormalisedVector a -> [Char])
-> ([NormalisedVector a] -> ShowS)
-> Show (NormalisedVector a)
forall a. Show a => Int -> NormalisedVector a -> ShowS
forall a. Show a => [NormalisedVector a] -> ShowS
forall a. Show a => NormalisedVector a -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [NormalisedVector a] -> ShowS
$cshowList :: forall a. Show a => [NormalisedVector a] -> ShowS
show :: NormalisedVector a -> [Char]
$cshow :: forall a. Show a => NormalisedVector a -> [Char]
showsPrec :: Int -> NormalisedVector a -> ShowS
$cshowsPrec :: forall a. Show a => Int -> NormalisedVector a -> ShowS
Show

-- | Normalises a vector
normalise :: Floating a => [a] -> NormalisedVector a
normalise :: [a] -> NormalisedVector a
normalise [a]
xs = [a] -> NormalisedVector a
forall a. [a] -> NormalisedVector a
NormalisedVector ([a] -> NormalisedVector a) -> [a] -> NormalisedVector a
forall a b. (a -> b) -> a -> b
$ (a -> a) -> [a] -> [a]
forall a b. (a -> b) -> [a] -> [b]
map (a -> a -> a
forall a. Fractional a => a -> a -> a
/a
x) [a]
xs
  where x :: a
x = [a] -> a
forall a. Floating a => [a] -> a
norm [a]
xs

norm :: Floating a => [a] -> a
norm :: [a] -> a
norm [a]
xs = a -> a
forall a. Floating a => a -> a
sqrt (a -> a) -> a -> a
forall a b. (a -> b) -> a -> b
$ [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ((a -> a) -> [a] -> [a]
forall a b. (a -> b) -> [a] -> [b]
map a -> a
forall a. Num a => a -> a
f [a]
xs)
  where f :: a -> a
f a
x = a
xa -> a -> a
forall a. Num a => a -> a -> a
*a
x

-- | A vector that has been scaled so that all elements in the vector
--   are between zero and one. To scale a set of vectors, use
--   @'scaleAll'@. Alternatively, if you can identify a maximum and
--   minimum value for each element in a vector, you can scale
--   individual vectors using @'scale'@.
data ScaledVector a = ScaledVector [a] deriving Int -> ScaledVector a -> ShowS
[ScaledVector a] -> ShowS
ScaledVector a -> [Char]
(Int -> ScaledVector a -> ShowS)
-> (ScaledVector a -> [Char])
-> ([ScaledVector a] -> ShowS)
-> Show (ScaledVector a)
forall a. Show a => Int -> ScaledVector a -> ShowS
forall a. Show a => [ScaledVector a] -> ShowS
forall a. Show a => ScaledVector a -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [ScaledVector a] -> ShowS
$cshowList :: forall a. Show a => [ScaledVector a] -> ShowS
show :: ScaledVector a -> [Char]
$cshow :: forall a. Show a => ScaledVector a -> [Char]
showsPrec :: Int -> ScaledVector a -> ShowS
$cshowsPrec :: forall a. Show a => Int -> ScaledVector a -> ShowS
Show

-- | Given a vector @qs@ of pairs of numbers, where each pair represents
--   the maximum and minimum value to be expected at each index in
--   @xs@, @'scale' qs xs@ scales the vector @xs@ element by element,
--   mapping the maximum value expected at that index to one, and the
--   minimum value to zero.
scale :: Fractional a => [(a,a)] -> [a] -> ScaledVector a
scale :: [(a, a)] -> [a] -> ScaledVector a
scale [(a, a)]
qs [a]
xs = [a] -> ScaledVector a
forall a. [a] -> ScaledVector a
ScaledVector ([a] -> ScaledVector a) -> [a] -> ScaledVector a
forall a b. (a -> b) -> a -> b
$ ((a, a) -> a -> a) -> [(a, a)] -> [a] -> [a]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (a, a) -> a -> a
forall a. Fractional a => (a, a) -> a -> a
scaleValue [(a, a)]
qs [a]
xs

-- | Scales a set of vectors by determining the maximum and minimum
--   values at each index in the vector, and mapping the maximum
--   value to one, and the minimum value to zero.
scaleAll :: (Fractional a, Ord a) => [[a]] -> [ScaledVector a]
scaleAll :: [[a]] -> [ScaledVector a]
scaleAll [[a]]
xss = ([a] -> ScaledVector a) -> [[a]] -> [ScaledVector a]
forall a b. (a -> b) -> [a] -> [b]
map ([(a, a)] -> [a] -> ScaledVector a
forall a. Fractional a => [(a, a)] -> [a] -> ScaledVector a
scale [(a, a)]
qs) [[a]]
xss
  where qs :: [(a, a)]
qs = [[a]] -> [(a, a)]
forall a. Ord a => [[a]] -> [(a, a)]
quantify [[a]]
xss

scaleValue :: Fractional a => (a,a) -> a -> a
scaleValue :: (a, a) -> a -> a
scaleValue (a
minX,a
maxX) a
x = (a
x a -> a -> a
forall a. Num a => a -> a -> a
- a
minX) a -> a -> a
forall a. Fractional a => a -> a -> a
/ (a
maxXa -> a -> a
forall a. Num a => a -> a -> a
-a
minX)

quantify :: Ord a => [[a]] -> [(a,a)]
quantify :: [[a]] -> [(a, a)]
quantify [[a]]
xss = ([(a, a)] -> [a] -> [(a, a)]) -> [(a, a)] -> [[a]] -> [(a, a)]
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' [(a, a)] -> [a] -> [(a, a)]
forall a. Ord a => [(a, a)] -> [a] -> [(a, a)]
quantify' [(a, a)]
qs ([[a]] -> [[a]]
forall a. [a] -> [a]
tail [[a]]
xss)
  where qs :: [(a, a)]
qs = [a] -> [a] -> [(a, a)]
forall a b. [a] -> [b] -> [(a, b)]
zip ([[a]] -> [a]
forall a. [a] -> a
head [[a]]
xss) ([[a]] -> [a]
forall a. [a] -> a
head [[a]]
xss)

quantify' :: Ord a => [(a,a)] -> [a] -> [(a,a)]
quantify' :: [(a, a)] -> [a] -> [(a, a)]
quantify' = ((a, a) -> a -> (a, a)) -> [(a, a)] -> [a] -> [(a, a)]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (a, a) -> a -> (a, a)
forall b. Ord b => (b, b) -> b -> (b, b)
f
  where f :: (b, b) -> b -> (b, b)
f (b
minX, b
maxX) b
x = (b -> b -> b
forall a. Ord a => a -> a -> a
min b
minX b
x, b -> b -> b
forall a. Ord a => a -> a -> a
max b
maxX b
x)