module Hopfield.Boltzmann.ClassificationBoltzmannMachine where
import Data.Maybe
import Control.Monad
import Control.Monad.Random
import Data.List
import Data.Vector ((!))
import qualified Data.Vector as V
import qualified Numeric.Container as NC
import Hopfield.Common
import Hopfield.Util
learningRate :: Double
learningRate = 0.001
data Mode = Hidden | Visible | Classification
deriving(Eq, Show)
data BoltzmannData = BoltzmannData {
weightsB :: Weights
, classificationWeights :: Weights
, biasB :: Bias
, biasC :: Bias
, biasD :: Bias
, patternsB :: [Pattern]
, hiddenCount :: Int
, pattern_to_class :: [(Pattern, Int)]
}
deriving(Show)
getDimension :: Mode -> Weights -> Int
getDimension Hidden ws = V.length $ ws
getDimension Visible ws = V.length $ ws ! 0
getDimension Classification ws = V.length $ ws ! 0
buildCBoltzmannData :: MonadRandom m => [Pattern] -> m BoltzmannData
buildCBoltzmannData [] = error "Train patterns are empty"
buildCBoltzmannData pats =
buildCBoltzmannData' pats nr_visible
where nr_visible = V.length (head pats)
buildCBoltzmannData' :: MonadRandom m => [Pattern] -> Int -> m BoltzmannData
buildCBoltzmannData' [] _ = error "Train patterns are empty"
buildCBoltzmannData' pats nrHidden
| first_len == 0
= error "Cannot have empty patterns"
| any (\x -> V.length x /= first_len) pats
= error "All training patterns must have the same length"
| otherwise = trainBoltzmann pats nrHidden
where
first_len = V.length $ head pats
getActivationSum :: Weights -> Bias -> Pattern -> Int -> Double
getActivationSum ws bias pat index
= bias ! index + dotProduct (columnVector ws index) (toDouble pat)
getActivationProbabilityVisible :: Weights -> Bias -> Pattern -> Int -> Double
getActivationProbabilityVisible ws bias h index
= activation $ getActivationSum ws bias h index
getActivationSumHidden :: Weights -> Weights -> Bias -> Pattern -> Pattern -> Int -> Double
getActivationSumHidden ws u c v y index
| Just e <- validPattern Visible ws v = error e
| Just e <- validPattern Classification u y = error e
| otherwise = c ! index + dotProduct (ws ! index) (toDouble v) + dotProduct (u ! index) (toDouble y)
getHiddenSums :: Weights -> Weights -> Bias -> Pattern -> Pattern -> V.Vector Double
getHiddenSums ws u c v y
= V.fromList [getActivationSumHidden ws u c v y i | i <- [0 .. (V.length ws) 1] ]
getActivationProbabilityHidden :: Weights -> Weights -> Bias -> Pattern -> Pattern -> Int -> Double
getActivationProbabilityHidden ws u c v y index
= activation $ getActivationSumHidden ws u c v y index
updateNeuronVisible :: MonadRandom m => Weights -> Bias -> Pattern -> Int -> m Int
updateNeuronVisible ws bias h index
= gibbsSampling $ getActivationProbabilityVisible ws bias h index
updateNeuronHidden :: MonadRandom m => Weights -> Weights -> Bias -> Pattern -> Pattern -> Int -> m Int
updateNeuronHidden ws u c v y index
= gibbsSampling $ getActivationProbabilityHidden ws u c v y index
updateVisible :: MonadRandom m => Weights -> Bias -> Pattern -> m Pattern
updateVisible ws bias h
| Just e <- validPattern Hidden ws h = error e
| otherwise = V.fromList `liftM` mapM (updateNeuronVisible ws bias h) updatedIndices
where
updatedIndices = [0 .. (V.length $ ws ! 0) 1]
updateHidden :: MonadRandom m => Weights -> Weights -> Bias -> Pattern -> Pattern -> m Pattern
updateHidden ws u c v y
| Just e <- validPattern Visible ws v = error e
| otherwise = V.fromList `liftM` mapM (updateNeuronHidden ws u c v y) updatedIndices
where
updatedIndices = [0 .. (V.length ws) 1 ]
updateClassification :: Weights -> Bias -> Pattern -> Pattern
updateClassification u d h
= V.fromList [ if n == newClass then 1 else 0 | n <- allClasses]
where
expActivation = exp . (getActivationSum u d h)
newClass = maximumBy (compareBy expActivation) allClasses
allClasses = [0 .. nrClasses 1]
nrClasses = V.length d
getClassificationVector :: [(Pattern, Int)] -> Pattern -> Pattern
getClassificationVector pat_classes pat
= V.fromList [ if n == pat_class then 1 else 0 | n <- map snd pat_classes]
where pat_class = fromJust $ lookup pat pat_classes
oneTrainingStep :: MonadRandom m => BoltzmannData -> Pattern -> m BoltzmannData
oneTrainingStep (BoltzmannData ws u b c d pats nr_h pat_to_class) v = do
let y = getClassificationVector pat_to_class v
h_sum = getHiddenSums ws u c v y
h <- updateHidden ws u c v y
v' <- updateVisible ws b h
let y' = updateClassification u d h
(h_sum' :: V.Vector Double) = getHiddenSums ws u c v' y'
getOuterProduct v1 v2 = NC.toLists $ (fromDataVector v1) `NC.outer` (fromDataVector $ toDouble v2)
getDelta pos neg = map (map (* learningRate)) $ combine () pos neg
updateWeights w d_w = vector2D $ combine (+) (list2D w) d_w
deltaBias v1 v2 = V.map ((* learningRate) . fromIntegral) (combineVectors () v1 v2)
deltaBiasC v1 v2 = V.map (* learningRate) (combineVectors () v1 v2)
updateBias bias delta_bias = combineVectors (+) bias delta_bias
pos_ws = getOuterProduct h_sum v
neg_ws = getOuterProduct h_sum' v'
pos_u = getOuterProduct h_sum y
neg_u = getOuterProduct h_sum' y'
d_ws = getDelta pos_ws neg_ws
new_ws = updateWeights ws d_ws
d_u = getDelta pos_u neg_u
new_u = updateWeights u d_u
new_b = updateBias b (deltaBias v v')
new_c = updateBias c (deltaBiasC h_sum h_sum')
new_d = updateBias d (deltaBias y y')
return $ BoltzmannData new_ws new_u new_b new_c new_d pats nr_h pat_to_class
trainBoltzmann :: MonadRandom m => [Pattern] -> Int -> m BoltzmannData
trainBoltzmann pats nr_h = do
ws <- vector2D `liftM` genWeights
u <- vector2D `liftM` genU
foldM oneTrainingStep (BoltzmannData ws u b c d pats nr_h pats_classes) pats
where
genWeights = replicateM nr_h . replicateM nr_visible $ normal 0.0 0.01
genU = replicateM nr_h . replicateM nr_classes $ normal 0.0 0.01
b = V.fromList $ replicate nr_visible 0.0
c = V.fromList $ replicate nr_h 0.0
d = V.fromList $ replicate nr_classes 0.0
nr_classes = length nub_pats
nub_pats = nub pats
pats_classes = zip nub_pats [0 .. ]
nr_visible = V.length $ head pats
matchPatternCBoltzmann :: BoltzmannData -> Pattern -> Int
matchPatternCBoltzmann bm v
| Just e <- validPattern Visible (weightsB bm) v = error e
| otherwise = fromJust $ maxPat `elemIndex` pats
where
pats_classes = pattern_to_class bm
pats = patternsB bm
patternsWithClassifications = [ (p, getClassificationVector pats_classes p) | p <- map fst pats_classes]
probability classification = exp $ (getFreeEnergy bm v classification)
(maxPat, _) = maximumBy (compareBy $ probability . snd) patternsWithClassifications
getFreeEnergy :: BoltzmannData -> Pattern -> Pattern -> Double
getFreeEnergy (BoltzmannData ws u _b c d _pats _nrH _pats_classes) v y
= dotProduct d (toDouble y) (V.sum $ V.map softplus hiddenSums)
where hiddenSums = getHiddenSums ws u c v y
activation :: Double -> Double
activation x = 1.0 / (1.0 + exp (x))
softplus :: Double -> Double
softplus x = log (1.0 + exp x)
validClassificationVector :: Pattern -> Int -> Maybe String
validClassificationVector pat size
| V.length pat /= size = Just "classification vector does not match expected size"
| V.any (\x -> notElem x [0, 1]) pat = Just "Non binary element in classification pattern"
| V.sum pat /=1 = Just "Invalid classification vector"
| otherwise = Nothing
validPattern :: Mode -> Weights -> Pattern -> Maybe String
validPattern mode ws pat
| getDimension mode ws /= V.length pat = Just $ "Size of pattern must match network size in " ++ show mode
| V.any (\x -> notElem x [0, 1]) pat = Just "Non binary element in Boltzmann pattern"
| otherwise = Nothing
validWeights :: Weights -> Maybe String
validWeights ws
| V.null ws = Just "The matrix of weights is empty"
| V.any (\x -> V.length x /= V.length (ws ! 0)) ws = Just "Weights matrix ill formed"
| otherwise = Nothing