module Network.Trainer
( BackpropTrainer(..)
, CostFunction
, CostFunction'
, TrainingData
, Selection
, TrainCompletionPredicate
, trainNTimes
, trainUntilErrorLessThan
, trainUntil
, quadraticCost
, quadraticCost'
, minibatch
, online
, backprop
, inputs
, outputs
, deltas
, hiddenDeltas
, calculateNablas
, fit
, evaluate
) where
import Network.Network
import Network.Neuron
import Network.Layer
import System.Random
import System.Random.Shuffle (shuffle')
import Data.List.Split (chunksOf)
import Numeric.LinearAlgebra
data BackpropTrainer a = BackpropTrainer { eta :: a
, cost :: CostFunction a
, cost' :: CostFunction' a
}
type CostFunction a = Vector a -> Vector a -> a
type CostFunction' a = Vector a -> Vector a -> Vector a
type TrainingData a = (Vector a, Vector a)
type Selection a = [TrainingData a] -> [[TrainingData a]]
type TrainCompletionPredicate a = Network a -> BackpropTrainer a -> [TrainingData a] -> Int -> Bool
trainNTimes :: (Floating (Vector a), Container Vector a, Product a)
=> Network a -> BackpropTrainer a -> Selection a -> [TrainingData a] -> Int -> Network a
trainNTimes network trainer s dat n =
trainUntil network trainer s dat completion 0
where completion _ _ _ n' = (n == n')
trainUntilErrorLessThan :: (Floating (Vector a), Container Vector a, Product a, Ord a)
=> Network a -> BackpropTrainer a -> Selection a -> [TrainingData a] -> a -> Network a
trainUntilErrorLessThan network trainer s dat err =
trainUntil network trainer s dat (networkErrorLessThan err) 0
networkErrorLessThan :: (Floating (Vector a), Container Vector a, Product a, Ord a)
=> a -> Network a -> BackpropTrainer a -> [TrainingData a] -> Int -> Bool
networkErrorLessThan err network trainer dat _ = meanError < err
where meanError = (sum errors) / fromIntegral (length errors)
errors = map (evaluate trainer network) dat
trainUntil :: (Floating (Vector a), Container Vector a, Product a)
=> Network a -> BackpropTrainer a -> Selection a -> [TrainingData a] -> TrainCompletionPredicate a -> Int -> Network a
trainUntil network trainer s dat completion n =
if completion network trainer dat n
then network
else trainUntil network' trainer s dat completion (n+1)
where network' = fit s trainer network dat
quadraticCost :: (Floating (Vector a), Container Vector a)
=> Vector a -> Vector a -> a
quadraticCost y a = sumElements $ 0.5 * (a y) ** 2
quadraticCost' :: (Floating (Vector a))
=> Vector a -> Vector a -> Vector a
quadraticCost' y a = a y
minibatch :: (Floating (Vector a), Container Vector a)
=> Int -> [TrainingData a] -> [[TrainingData a]]
minibatch size = chunksOf size
online :: (Floating (Vector a), Container Vector a)
=> [TrainingData a] -> [[TrainingData a]]
online = minibatch 1
fit :: (Floating (Vector a), Container Vector a, Product a)
=> Selection a -> BackpropTrainer a -> Network a -> [TrainingData a] -> Network a
fit s t n examples = foldl (backprop t) n $
s (shuffle' examples (length examples) (mkStdGen 4))
backprop :: (Floating (Vector a), Container Vector a, Product a)
=> BackpropTrainer a -> Network a -> [TrainingData a] -> Network a
backprop t n es =
updateNetwork (length es) t (foldl (calculateNablas t n) emptyNetwork es) n
updateNetwork :: (Floating (Vector a), Container Vector a, Product a)
=> Int -> BackpropTrainer a -> Network a -> Network a -> Network a
updateNetwork mag t nablas n = addNetworks n
(Network $ map (scaleLayer $ 1 * (eta t) / (fromIntegral mag)) (layers nablas))
calculateNablas :: (Floating (Vector a), Container Vector a, Product a)
=> BackpropTrainer a -> Network a -> Network a -> TrainingData a -> Network a
calculateNablas t n nablas e = Network $ map (updateLayer t) (zip3 (layers n) ds os)
where ds = deltas t n e
os = outputs (fst e) n
updateLayer :: (Floating (Vector a), Container Vector a, Product a)
=> BackpropTrainer a -> (Layer a, Vector a, Vector a) -> Layer a
updateLayer t (l, delta, output) = Layer newWeight newBias n
where n = neuron l
newWeight = ((reshape 1 delta) <> (reshape (dim output) output))
newBias = delta
outputs :: (Floating (Vector a), Container Vector a, Product a)
=> Vector a -> Network a -> [Vector a]
outputs input network = scanl apply input (layers network)
inputs :: (Floating (Vector a), Container Vector a, Product a)
=> Vector a -> Network a -> [Vector a]
inputs input network = if null (layers network) then []
else unactivated : inputs activated (Network (tail $ layers network))
where unactivated = weightMatrix layer <> input + biasVector layer
layer = head $ layers network
activated = mapVector (activation (neuron layer)) unactivated
deltas :: (Floating (Vector a), Container Vector a, Product a)
=> BackpropTrainer a -> Network a -> TrainingData a -> [Vector a]
deltas t n example = hiddenDeltas
(Network (reverse (layers n))) outputDelta (tail $ reverse is)
++ [outputDelta]
where outputDelta = costd (snd example) output *
mapVector activationd lastInput
costd = cost' t
activationd = activation' (neuron (last (layers n)))
output = last os
lastInput = last is
is = inputs (fst example) n
os = outputs (fst example) n
hiddenDeltas :: (Floating (Vector a), Container Vector a, Product a)
=> Network a -> Vector a -> [Vector a] -> [Vector a]
hiddenDeltas n prevDelta is = if length (layers n) <= 1 then []
else delta : hiddenDeltas rest delta (tail is)
where rest = Network (tail $ layers n)
delta = (trans w) <> prevDelta * spv
w = weightMatrix (head $ layers n)
spv = mapVector (activation' (neuron (head $ layers n))) (head is)
evaluate :: (Floating (Vector a), Container Vector a, Product a)
=> BackpropTrainer a -> Network a -> TrainingData a -> a
evaluate t n example = (cost t) (snd example) (predict (fst example) n)