{-# LANGUAGE PatternGuards, ScopedTypeVariables #-} -- | Base Restricted Boltzmann machine. module Hopfield.Boltzmann.ClassificationBoltzmannMachine where -- http://en.wikipedia.org/wiki/Restricted_Boltzmann_machine -- Using RBM for recognition -- http://uai.sis.pitt.edu/papers/11/p463-louradour.pdf -- http://www.dmi.usherb.ca/~larocheh/publications/drbm-mitacs-poster.pdf import Data.Maybe import Control.Monad import Control.Monad.Random import Data.List import Data.Vector ((!)) import qualified Data.Vector as V import qualified Numeric.Container as NC import Hopfield.Common import Hopfield.Util -- In the case of the Boltzmann Machine the weight matrix establishes the -- weights between visible and hidden neurons -- w i j - connection between visible neuron i and hidden neuron j -- | determines the rate in which the weights are changed in the training phase. -- http://en.wikipedia.org/wiki/Restricted_Boltzmann_machine#Training_algorithm learningRate :: Double learningRate = 0.001 data Mode = Hidden | Visible | Classification deriving(Eq, Show) data BoltzmannData = BoltzmannData { weightsB :: Weights -- ^ the weights of the network , classificationWeights :: Weights -- weights for classification , biasB :: Bias , biasC :: Bias , biasD :: Bias , patternsB :: [Pattern] -- ^ the patterns which were used to train it -- can be decuded from weights, maybe should be remove now , hiddenCount :: Int -- ^ number of neurons in the hidden layer , pattern_to_class :: [(Pattern, Int)] -- the class of the given pattern -- classes have to be in consecutive order, from 0 } deriving(Show) -- | Retrieves the dimension of the weights matrix corresponding to the given mode. -- For hidden, it is the width of the matrix, and for visible it is the height. -- One has to ensure that the appropriate weights matrix is passed with this function. getDimension :: Mode -> Weights -> Int getDimension Hidden ws = V.length $ ws getDimension Visible ws = V.length $ ws ! 0 getDimension Classification ws = V.length $ ws ! 0 -- | @buildCBoltzmannData patterns@ trains a boltzmann network with @patterns@. -- The number of hidden neurons is set to the number of visible neurons. buildCBoltzmannData :: MonadRandom m => [Pattern] -> m BoltzmannData buildCBoltzmannData [] = error "Train patterns are empty" buildCBoltzmannData pats = buildCBoltzmannData' pats nr_visible where nr_visible = V.length (head pats) -- | @buildCBoltzmannData' patterns nrHidden@: Takes a list of patterns and -- builds a Boltzmann network (by training) in which these patterns are -- stable states. The result of this function can be used to run a pattern -- against the network, by using 'matchPatternBoltzmann'. buildCBoltzmannData' :: MonadRandom m => [Pattern] -> Int -> m BoltzmannData buildCBoltzmannData' [] _ = error "Train patterns are empty" buildCBoltzmannData' pats nrHidden | first_len == 0 = error "Cannot have empty patterns" | any (\x -> V.length x /= first_len) pats = error "All training patterns must have the same length" | otherwise = trainBoltzmann pats nrHidden where first_len = V.length $ head pats -- | @getActivationProbability ws bias pat index@ -- can be used to compute the activation probability for a neuron in the -- visible layer, or for parts of the sums requires for -- the probability of the classifications getActivationSum :: Weights -> Bias -> Pattern -> Int -> Double getActivationSum ws bias pat index = bias ! index + dotProduct (columnVector ws index) (toDouble pat) -- | @getActivationProbabilityVisible ws bias h index@ returns the activation -- probability for a neuron @index@ in a visible pattern, given the weights -- matrix @ws@, the vector of biases @bias@. Applies the activation function -- to the activation sum, in order to obtain the probability. getActivationProbabilityVisible :: Weights -> Bias -> Pattern -> Int -> Double getActivationProbabilityVisible ws bias h index = activation $ getActivationSum ws bias h index -- | @getActivationSumHidden ws bias h index@ returns the activation -- sum for a neuron @index@ in a hidden pattern, given the weights -- matrix @ws@, the vector of biases @bias@. getActivationSumHidden :: Weights -> Weights -> Bias -> Pattern -> Pattern -> Int -> Double getActivationSumHidden ws u c v y index | Just e <- validPattern Visible ws v = error e | Just e <- validPattern Classification u y = error e | otherwise = c ! index + dotProduct (ws ! index) (toDouble v) + dotProduct (u ! index) (toDouble y) -- | @getActivationSumHidden ws bias h index@ returns the activation -- sum for all neurons in the hidden pattern, given the weights -- matrix @ws@, the vector of biases @bias@. getHiddenSums :: Weights -> Weights -> Bias -> Pattern -> Pattern -> V.Vector Double getHiddenSums ws u c v y = V.fromList [getActivationSumHidden ws u c v y i | i <- [0 .. (V.length ws) - 1] ] -- | @getActivationProbabilityVisible ws u bias v index@ returns the activation -- probability for a neuron @index@ in a hidden pattern, given the weights -- matrices @ws@ and @u@, the vector of biases @bias@. Applies the activation function -- to the activation sum, in order to obtain the probability. getActivationProbabilityHidden :: Weights -> Weights -> Bias -> Pattern -> Pattern -> Int -> Double getActivationProbabilityHidden ws u c v y index = activation $ getActivationSumHidden ws u c v y index -- | @updateNeuronVisible ws bias h index@ updates a neuron in the visible layer by using gibbsSampling, according -- to the activation probability updateNeuronVisible :: MonadRandom m => Weights -> Bias -> Pattern -> Int -> m Int updateNeuronVisible ws bias h index = gibbsSampling $ getActivationProbabilityVisible ws bias h index -- | Updates a neuron in the hidden layer by using gibbsSampling, according -- to the activation probability updateNeuronHidden :: MonadRandom m => Weights -> Weights -> Bias -> Pattern -> Pattern -> Int -> m Int updateNeuronHidden ws u c v y index = gibbsSampling $ getActivationProbabilityHidden ws u c v y index -- | Updates the entire visible layer by using gibbsSampling, according -- to the activation probability updateVisible :: MonadRandom m => Weights -> Bias -> Pattern -> m Pattern updateVisible ws bias h | Just e <- validPattern Hidden ws h = error e | otherwise = V.fromList `liftM` mapM (updateNeuronVisible ws bias h) updatedIndices where updatedIndices = [0 .. (V.length $ ws ! 0) - 1] -- | Updates the entire visible layer by using gibbsSampling, according -- to the activation probability updateHidden :: MonadRandom m => Weights -> Weights -> Bias -> Pattern -> Pattern -> m Pattern updateHidden ws u c v y | Just e <- validPattern Visible ws v = error e | otherwise = V.fromList `liftM` mapM (updateNeuronHidden ws u c v y) updatedIndices where updatedIndices = [0 .. (V.length ws) - 1 ] -- | Updates a classification vector given the current state of the network ( -- the u matrix and the vector of biases d, together with a hidden vector h) updateClassification :: Weights -> Bias -> Pattern -> Pattern updateClassification u d h = V.fromList [ if n == newClass then 1 else 0 | n <- allClasses] where -- TODO replace with actual sampling using inverse method (with cdf list) expActivation = exp . (getActivationSum u d h) newClass = maximumBy (compareBy expActivation) allClasses allClasses = [0 .. nrClasses - 1] nrClasses = V.length d -- @getClassificationVector pat_to_classes pat@ returns the classification -- vector of @pat@, by looking up in @pat@ in @pat_to_classes@ to obtain the -- class of the pattern. The classification vector is obtained by -- creating vector with all 0s and only 1 in the position of the class. -- The length of all classification vectors is the number of classes. getClassificationVector :: [(Pattern, Int)] -> Pattern -> Pattern getClassificationVector pat_classes pat = V.fromList [ if n == pat_class then 1 else 0 | n <- map snd pat_classes] where pat_class = fromJust $ lookup pat pat_classes -- | One step which updates the weights in the CD-n training process. -- The weights are changed according to one of the training patterns. -- http://en.wikipedia.org/wiki/Restricted_Boltzmann_machine#Training_algorithm -- @oneTrainingStep bm visible@ updates the parameters of @bm@ (the 2 weight -- matrices and the biases) according to the training instance @v@ -- and its classification, obtained by looking in the map kept in @bm@ oneTrainingStep :: MonadRandom m => BoltzmannData -> Pattern -> m BoltzmannData oneTrainingStep (BoltzmannData ws u b c d pats nr_h pat_to_class) v = do let y = getClassificationVector pat_to_class v h_sum = getHiddenSums ws u c v y h <- updateHidden ws u c v y v' <- updateVisible ws b h let y' = updateClassification u d h (h_sum' :: V.Vector Double) = getHiddenSums ws u c v' y' getOuterProduct v1 v2 = NC.toLists $ (fromDataVector v1) `NC.outer` (fromDataVector $ toDouble v2) getDelta pos neg = map (map (* learningRate)) $ combine (-) pos neg updateWeights w d_w = vector2D $ combine (+) (list2D w) d_w deltaBias v1 v2 = V.map ((* learningRate) . fromIntegral) (combineVectors (-) v1 v2) deltaBiasC v1 v2 = V.map (* learningRate) (combineVectors (-) v1 v2) updateBias bias delta_bias = combineVectors (+) bias delta_bias pos_ws = getOuterProduct h_sum v -- "positive gradient for ws" neg_ws = getOuterProduct h_sum' v' -- "negative gradient for ws" pos_u = getOuterProduct h_sum y -- "positive gradient for u" neg_u = getOuterProduct h_sum' y' -- "negative gradient for u" d_ws = getDelta pos_ws neg_ws -- "delta ws" new_ws = updateWeights ws d_ws d_u = getDelta pos_u neg_u -- "delta u" new_u = updateWeights u d_u new_b = updateBias b (deltaBias v v') new_c = updateBias c (deltaBiasC h_sum h_sum') new_d = updateBias d (deltaBias y y') return $ BoltzmannData new_ws new_u new_b new_c new_d pats nr_h pat_to_class -- | The training function for the Boltzmann Machine. -- We are using the contrastive divergence algorithm CD-1 -- TODO see if making the vis -- (we could extend to CD-n, but "In pratice, CD-1 has been shown to work surprisingly well." -- @trainBoltzmann pats nrHidden@ where @pats@ are the training patterns -- and @nrHidden@ is the number of neurons to be created in the hidden layer. -- http://en.wikipedia.org/wiki/Restricted_Boltzmann_machine#Training_algorithm trainBoltzmann :: MonadRandom m => [Pattern] -> Int -> m BoltzmannData trainBoltzmann pats nr_h = do ws <- vector2D `liftM` genWeights u <- vector2D `liftM` genU foldM oneTrainingStep (BoltzmannData ws u b c d pats nr_h pats_classes) pats where genWeights = replicateM nr_h . replicateM nr_visible $ normal 0.0 0.01 genU = replicateM nr_h . replicateM nr_classes $ normal 0.0 0.01 b = V.fromList $ replicate nr_visible 0.0 c = V.fromList $ replicate nr_h 0.0 d = V.fromList $ replicate nr_classes 0.0 nr_classes = length nub_pats nub_pats = nub pats pats_classes = zip nub_pats [0 .. ] nr_visible = V.length $ head pats -- | @matchPatternBoltzmann bm pat@ given the Boltzmann trained network @bm@ -- recognizes @pat@, by classifying it to one of the patterns the network was -- trained with. This is done by computing the free energy of @pat@ with -- every possible classification, and choosing the classification with -- lowest energy. -- http://uai.sis.pitt.edu/papers/11/p463-louradour.pdf matchPatternCBoltzmann :: BoltzmannData -> Pattern -> Int matchPatternCBoltzmann bm v | Just e <- validPattern Visible (weightsB bm) v = error e | otherwise = fromJust $ maxPat `elemIndex` pats where pats_classes = pattern_to_class bm pats = patternsB bm patternsWithClassifications = [ (p, getClassificationVector pats_classes p) | p <- map fst pats_classes] probability classification = exp $ - (getFreeEnergy bm v classification) (maxPat, _) = maximumBy (compareBy $ probability . snd) patternsWithClassifications -- | @getFreeEnergy bm visible classification_vector@ -- Computes the free energy of @v@ with @classification_vector@, according -- to the trained Boltzmann network @bm@. It is used for classifying a given -- visible vector according to the classes used for training the network @bm@. getFreeEnergy :: BoltzmannData -> Pattern -> Pattern -> Double getFreeEnergy (BoltzmannData ws u _b c d _pats _nrH _pats_classes) v y = - dotProduct d (toDouble y) - (V.sum $ V.map softplus hiddenSums) where hiddenSums = getHiddenSums ws u c v y -- | The activation function for the network (the logistic sigmoid). -- http://en.wikipedia.org/wiki/Sigmoid_function activation :: Double -> Double activation x = 1.0 / (1.0 + exp (-x)) -- | The function used to compute the free energy -- http://uai.sis.pitt.edu/papers/11/p463-louradour.pdf softplus :: Double -> Double softplus x = log (1.0 + exp x) -- TODO move to tests validClassificationVector :: Pattern -> Int -> Maybe String validClassificationVector pat size | V.length pat /= size = Just "classification vector does not match expected size" | V.any (\x -> notElem x [0, 1]) pat = Just "Non binary element in classification pattern" | V.sum pat /=1 = Just "Invalid classification vector" | otherwise = Nothing -- | @validPattern mode weights pattern@ -- Returns an error string in a Just if the @pattern@ is not compatible -- with @weights@ and Nothing otherwise. @mode@ gives the type of the pattern, -- which is checked (Visible or Hidden). validPattern :: Mode -> Weights -> Pattern -> Maybe String validPattern mode ws pat | getDimension mode ws /= V.length pat = Just $ "Size of pattern must match network size in " ++ show mode | V.any (\x -> notElem x [0, 1]) pat = Just "Non binary element in Boltzmann pattern" | otherwise = Nothing -- | @validWeights ws@ checks that a weight matrix is well formed. validWeights :: Weights -> Maybe String validWeights ws | V.null ws = Just "The matrix of weights is empty" | V.any (\x -> V.length x /= V.length (ws ! 0)) ws = Just "Weights matrix ill formed" | otherwise = Nothing