module MachineLearning
(
addBiasDimension
, removeBiasDimension
, meanStddev
, featureNormalization
, mapFeatures
, splitToXY
)
where
import MachineLearning.Types (Vector, Matrix)
import qualified Numeric.LinearAlgebra as LA
import Numeric.LinearAlgebra((|||), (??))
import qualified Numeric.GSL.Statistics as Stat
import Control.Monad (replicateM, mfilter, MonadPlus)
import Data.List (sort, group, foldl')
import qualified Data.Vector as V
addBiasDimension :: Matrix -> Matrix
addBiasDimension x = 1 ||| x
removeBiasDimension :: Matrix -> Matrix
removeBiasDimension x = x ?? (LA.All, LA.Drop 1)
meanStddev x =
let cols = LA.toColumns x
means = map Stat.mean cols
stddevs = zipWith (\m col -> Stat.stddev_m m col) means cols
stddevs' = map (\s -> if s < 2 then 1 else s) stddevs
in (LA.row means, LA.row stddevs')
featureNormalization (means, stddevs) x = (x - means) / stddevs
mapFeatures :: Int -> Matrix -> Matrix
mapFeatures 1 x = x
mapFeatures degree x = LA.fromColumns $ cols ++ (foldl' (\l d -> (terms d) ++ l) [] [degree, degree-1 .. 2])
where cols = LA.toColumns x
vv = V.fromList cols
ncols = V.length vv
makeTerm :: [(Int, Int)] -> Vector
makeTerm = foldl' (\c (index, power) -> c * (vv V.! index) ^ power) 1
terms :: Int -> [Vector]
terms d = foldl' (\l x -> (makeTerm x) : l) [] $ polynomialTerms d [ncols-1, ncols-2 .. 0]
polynomialTerms :: Ord a => Int -> [a] -> [[(a, Int)]]
polynomialTerms degree terms =
map (\x -> map (\y -> (head y, length y)) $ group x)
$ combinationsWithReplacement degree terms
combinationsWithReplacement :: (MonadPlus m, Ord a) => Int -> m a -> m [a]
combinationsWithReplacement sample objects = mfilter (\a -> sort a == a) $ replicateM sample objects
splitToXY m =
let x = m ?? (LA.All, LA.DropLast 1)
y = LA.flatten $ m ?? (LA.All, LA.TakeLast 1)
in (x, y)