{-# OPTIONS_HADDOCK show-extensions #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE BangPatterns #-} {-| Module : Numeric.Neural.Normalization Description : normalizing data Copyright : (c) Lars Brünjes, 2016 License : MIT Maintainer : brunjlar@gmail.com Stability : experimental Portability : portable This modules provides utilities for data normalization. -} module Numeric.Neural.Normalization ( encode1ofN , decode1ofN , encodeEquiDist , decodeEquiDist , crossEntropyError , white , whiten , Classifier , mkStdClassifier ) where import Control.Arrow (first) import Control.Category import Data.Proxy import GHC.TypeLits import GHC.TypeLits.Witnesses import Data.MyPrelude import Data.Utils.Analytic import Data.Utils.Statistics import Data.Utils.Traversable import Data.Utils.Vector import Numeric.Neural.Layer import Numeric.Neural.Model import Prelude hiding (id, (.)) -- | Provides "1 of @n@" encoding for enumerable types. -- -- >>> :set -XDataKinds -- >>> encode1ofN LT :: Vector 3 Int -- [1,0,0] -- -- >>> encode1ofN EQ :: Vector 3 Int -- [0,1,0] -- -- >>> encode1ofN GT :: Vector 3 Int -- [0,0,1] -- encode1ofN :: (Enum a, Num b, KnownNat n) => a -> Vector n b encode1ofN x = generate $ \i -> if i == fromEnum x then 1 else 0 -- | Provides "1 of @n@" decoding for enumerable types. -- -- >>> decode1ofN [0.9, 0.3, 0.1 :: Double] :: Ordering -- LT -- -- >>> decode1ofN [0.7, 0.8, 0.6 :: Double] :: Ordering -- EQ -- -- >>> decode1ofN [0.2, 0.3, 0.8 :: Double] :: Ordering -- GT -- decode1ofN :: (Enum a, Num b, Ord b, Foldable f) => f b -> a decode1ofN = toEnum . fst . maximumBy (compare `on` snd) . zip [0..] . toList polyhedron :: Floating a => Int -> [[a]] polyhedron = fst . p where p 2 = ([[-1], [1]], 2) p n = let (xs, d) = p (n - 1) y = sqrt (d * d - 1) v = y : replicate (n - 2) 0 xs' = v : ((0 :) <$> xs) shift = y / fromIntegral n shifted = (\(z : zs) -> (z - shift : zs)) <$> xs' scale = 1 / (y - shift) scaled = ((scale *) <$>) <$> shifted in (scaled, d * scale) polyhedron' :: forall a n. (Floating a, KnownNat n) => Proxy n -> [[a]] polyhedron' p = withNatOp (%+) p (Proxy :: Proxy 1) $ polyhedron (fromIntegral $ natVal (Proxy :: Proxy (n + 1))) -- | Provides equidistant encoding for enumerable types. -- -- >>> :set -XDataKinds -- >>> encodeEquiDist LT :: Vector 2 Float -- [1.0,0.0] -- -- >>> encodeEquiDist EQ :: Vector 2 Float -- [-0.5,-0.86602545] -- -- >>> encodeEquiDist GT :: Vector 2 Float -- [-0.5,0.86602545] -- encodeEquiDist :: forall a b n. (Enum a, Floating b, KnownNat n) => a -> Vector n b encodeEquiDist x = let ys = polyhedron' (Proxy :: Proxy n) y = ys !! fromEnum x in fromJust (fromList y) -- | Provides equidistant decoding for enumerable types. -- -- >>> :set -XDataKinds -- >>> let u = fromJust (fromList [0.9, 0.2]) :: Vector 2 Double -- >>> decodeEquiDist u :: Ordering -- LT -- -- >>> :set -XDataKinds -- >>> let v = fromJust (fromList [-0.4, -0.5]) :: Vector 2 Double -- >>> decodeEquiDist v :: Ordering -- EQ -- -- >>> :set -XDataKinds -- >>> let w = fromJust (fromList [0.1, 0.8]) :: Vector 2 Double -- >>> decodeEquiDist w :: Ordering -- GT -- decodeEquiDist :: forall a b n. (Enum a, Ord b, Floating b, KnownNat n) => Vector n b -> a decodeEquiDist y = let xs = polyhedron' (Proxy :: Proxy n) xs' = (fromJust . fromList) <$> xs ds = [(j, sqDiff x y) | (j, x) <- zip [0..] xs'] i = fst $ minimumBy (compare `on` snd) ds in toEnum i -- | Computes the cross entropy error (assuming "1 of n" encoding). -- -- >>> runDiff (crossEntropyError LT) (cons 0.8 (cons 0.1 (cons 0.1 nil))) :: Identity Double -- Identity 0.2231435513142097 -- -- >>> runDiff (crossEntropyError EQ) (cons 0.8 (cons 0.1 (cons 0.1 nil))) :: Identity Double -- Identity 2.3025850929940455 -- crossEntropyError :: (Enum a, KnownNat n) => a -> Diff (Vector n) Identity crossEntropyError a = Diff $ \ys -> Identity $ negate $ log $ encode1ofN a <%> ys -- | Function 'white' takes a batch of values (of a specific shape) -- and computes a normalization function which whitens values of that shape, -- so that each component has zero mean and unit variance. -- -- >>> :set -XDataKinds -- >>> let xss = [cons 1 (cons 1 nil), cons 1 (cons 2 nil), cons 1 (cons 3 nil)] :: [Vector 2 Double] -- >>> let f = white xss -- >>> f <$> xss -- [[0.0,-1.2247448713915887],[0.0,0.0],[0.0,1.2247448713915887]] white :: (Applicative f, Traversable t, Eq a, Floating a) => t (f a) -> f a -> f a white xss = ((w <$> sequenceA xss) <*>) where w xs = case toList xs of [] -> id xs' -> let (_, !m, !v) = countMeanVar xs' !s = if v == 0 then 1 else 1 / sqrt v in \x -> (x - m) * s -- | Modifies a 'Model' by whitening the input before feeding it into the embedded component. -- whiten :: (Applicative f, Traversable t) => Model f g a b c -- ^ original model -> t b -- ^ batch of input data -> Model f g a b c whiten (Model c e i o) xss = Model c e' i' o where w = white $ i <$> xss i' = w . i e' = first w . e -- | A @'Classifier' f n b c@ is a 'Model' that classifies items of type @b@ into categories of type @c@, -- using a component with input shape @f@ and output shape @'Vector' n@. -- type Classifier f n b c = StdModel f (Vector n) b c -- | Makes a standard 'Classifier' which uses a softmax layer, "1 of n" encoding and the cross entropy error. -- mkStdClassifier :: (Functor f, KnownNat n, Enum c) => Component f (Vector n) -- ^ the embedded component -> (b -> f Double) -- ^ converts input -> Classifier f n b c mkStdClassifier c i = mkStdModel (cArr (Diff softmax) . c) crossEntropyError i decode1ofN