當前位置：首頁 > 人工智能 > pytorch >内容正文

pytorch

深度学习（八）RBM受限波尔兹曼机学习-未完待续

發布時間：2025/3/21 pytorch 23 豆豆

生活随笔收集整理的這篇文章主要介紹了深度学习（八）RBM受限波尔兹曼机学习-未完待续小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

RBM受限波爾茲曼機學習

原文地址：

作者：hjimce

[python]?view plain?copy

#coding=utf-8??

import?timeit??

try:??

????import?PIL.Image?as?Image??

except?ImportError:??

????import?Image??

import?numpy??

import?theano??

import?theano.tensor?as?T??

import?os??

from?theano.tensor.shared_randomstreams?import?RandomStreams??

from?utils?import?tile_raster_images??

from?logistic_sgd?import?load_data??

#RBM網絡結構設置，主要是參數初始化??

class?RBM(object):??

????def?__init__(self,input=None,n_visible=784,n_hidden=500,W=None,hbias=None,vbias=None,numpy_rng=None,theano_rng=None):??

????????self.n_visible?=?n_visible??

????????self.n_hidden?=?n_hidden??

????????if?numpy_rng?is?None:??

????????????numpy_rng?=?numpy.random.RandomState(1234)??

????????if?theano_rng?is?None:??

????????????theano_rng?=?RandomStreams(numpy_rng.randint(2?**?30))??

????????#參數初始化公式?-4*sqrt(6./(n_visible+n_hidden))??

????????if?W?is?None:??

????????????initial_W?=?numpy.asarray(??

????????????????numpy_rng.uniform(??

????????????????????low=-4?*?numpy.sqrt(6.?/?(n_hidden?+?n_visible)),??

????????????????????high=4?*?numpy.sqrt(6.?/?(n_hidden?+?n_visible)),??

????????????????????size=(n_visible,?n_hidden)??

????????????????),??

????????????????dtype=theano.config.floatX??

????????????)??

????????????#在GPU上，多線程權重共享??

????????????W?=?theano.shared(value=initial_W,?name='W',?borrow=True)??

????????#隱藏層偏置b初始化為0??

????????if?hbias?is?None:??

????????????hbias?=?theano.shared(value=numpy.zeros(n_hidden,dtype=theano.config.floatX),name='hbias',borrow=True)??

????????#可見層偏置項b初始化為0??

????????if?vbias?is?None:??

????????????vbias?=?theano.shared(value=numpy.zeros(n_visible,dtype=theano.config.floatX),name='vbias',borrow=True)??

????????#網絡輸入??

????????self.input?=?input??

????????if?not?input:??

????????????self.input?=?T.matrix('input')??

????????self.W?=?W??

????????self.hbias?=?hbias??

????????self.vbias?=?vbias??

????????self.theano_rng?=?theano_rng??

????????#網絡的所有參數，我們把它們放在一個列表中，以便后續訪問??

????????self.params?=?[self.W,?self.hbias,?self.vbias]??

????#能量函數的定義，主要是用于計算可視層的能量??

????def?free_energy(self,?v_sample):??

????????'''''?Function?to?compute?the?free?energy?'''??

????????wx_b?=?T.dot(v_sample,?self.W)?+?self.hbias??

????????vbias_term?=?T.dot(v_sample,?self.vbias)??

????????hidden_term?=?T.sum(T.log(1?+?T.exp(wx_b)),?axis=1)??

????????return?-hidden_term?-?vbias_term??

????#前向傳導?從可視層到隱藏層，計算p(h=1|v)??

????def?propup(self,?vis):??

????????pre_sigmoid_activation?=?T.dot(vis,?self.W)?+?self.hbias??

????????return?[pre_sigmoid_activation,?T.nnet.sigmoid(pre_sigmoid_activation)]??

????#根據可視層狀態?計算隱藏層狀態??

????def?sample_h_given_v(self,?v0_sample):??

????????#計算隱藏層每個神經元為狀態1的概率，即?p(h=1|v)??

????????pre_sigmoid_h1,?h1_mean?=?self.propup(v0_sample)??

????????#根據給定的概率，進行采樣，就像拋硬幣一樣。n表示拋硬幣的次數?，每個神經元隨機采樣一次，就可以得到每個神經元的狀態了??

????????#p是生成1的概率，binomial函數生成的是一個0、1數??

????????h1_sample?=?self.theano_rng.binomial(size=h1_mean.shape,n=1,p=h1_mean,dtype=theano.config.floatX)??

????????return?[pre_sigmoid_h1,?h1_mean,?h1_sample]??

????#后向傳導，從隱藏層到可視層，計算p(v=1|h)??

????def?propdown(self,?hid):??

????????pre_sigmoid_activation?=?T.dot(hid,?self.W.T)?+?self.vbias??

????????return?[pre_sigmoid_activation,?T.nnet.sigmoid(pre_sigmoid_activation)]??

????#根據隱藏層的狀態計算可視層狀態??

????def?sample_v_given_h(self,?h0_sample):??

????????pre_sigmoid_v1,?v1_mean?=?self.propdown(h0_sample)??

????????v1_sample?=?self.theano_rng.binomial(size=v1_mean.shape,??

?????????????????????????????????????????????n=1,?p=v1_mean,??

?????????????????????????????????????????????dtype=theano.config.floatX)??

????????return?[pre_sigmoid_v1,?v1_mean,?v1_sample]??

????#計算從隱藏層-》可視層-》隱藏層的一個狀態轉移過程，相當于一次的Gibbs?sampling采樣??

????def?gibbs_hvh(self,?h0_sample):??

????????pre_sigmoid_v1,?v1_mean,?v1_sample?=?self.sample_v_given_h(h0_sample)??

????????pre_sigmoid_h1,?h1_mean,?h1_sample?=?self.sample_h_given_v(v1_sample)??

????????return?[pre_sigmoid_v1,?v1_mean,?v1_sample,??

????????????????pre_sigmoid_h1,?h1_mean,?h1_sample]??

????#計算從可視層-》隱藏層-》可視層的狀態轉移過程，相當于一次的Gibbs?sampling采樣??

????def?gibbs_vhv(self,?v0_sample):??

????????pre_sigmoid_h1,?h1_mean,?h1_sample?=?self.sample_h_given_v(v0_sample)??

????????pre_sigmoid_v1,?v1_mean,?v1_sample?=?self.sample_v_given_h(h1_sample)??

????????return?[pre_sigmoid_h1,?h1_mean,?h1_sample,??

????????????????pre_sigmoid_v1,?v1_mean,?v1_sample]??

????#?k用于設置Gibbs?sampling采樣次數，也就是相當于來回跑了多少次（來回一趟算一次）??

????def?get_cost_updates(self,?lr=0.1,?persistent=None,?k=1):??

????????#?當我們輸入數據的時候，首先根據輸入x，計算隱藏層的概率分布，概率分布的采樣結果??

????????pre_sigmoid_ph,?ph_mean,?ph_sample?=?self.sample_h_given_v(self.input)??

????????#?decide?how?to?initialize?persistent?chain:??

????????#?for?CD,?we?use?the?newly?generate?hidden?sample??

????????#?for?PCD,?we?initialize?from?the?old?state?of?the?chain??

????????if?persistent?is?None:??

????????????chain_start?=?ph_sample??

????????else:??

????????????chain_start?=?persistent??

????????#讓函數來回跑k次??

????????(??

????????????[??

????????????????pre_sigmoid_nvs,??

????????????????nv_means,??

????????????????nv_samples,??

????????????????pre_sigmoid_nhs,??

????????????????nh_means,??

????????????????nh_samples??

????????????],??

????????????updates??

????????)?=?theano.scan(self.gibbs_hvh,outputs_info=[None,?None,?None,?None,?None,?chain_start],n_steps=k)??

????????#拿到最后一次循環的狀態??

????????chain_end?=?nv_samples[-1]??

????????#構造損失函數??

????????cost?=?T.mean(self.free_energy(self.input))?-?T.mean(??

????????????self.free_energy(chain_end))??

????????#計算梯度，然后進行梯度下降更新??

????????gparams?=?T.grad(cost,?self.params,?consider_constant=[chain_end])??

????????for?gparam,?param?in?zip(gparams,?self.params):??

????????????updates[param]?=?param?-?gparam?*?T.cast(lr,dtype=theano.config.floatX)??

????????if?persistent:??

????????????#?Note?that?this?works?only?if?persistent?is?a?shared?variable??

????????????updates[persistent]?=?nh_samples[-1]??

????????????#?pseudo-likelihood?is?a?better?proxy?for?PCD??

????????????monitoring_cost?=?self.get_pseudo_likelihood_cost(updates)??

????????else:??

????????????#?reconstruction?cross-entropy?is?a?better?proxy?for?CD??

????????????monitoring_cost?=?self.get_reconstruction_cost(updates,pre_sigmoid_nvs[-1])??

????????return?monitoring_cost,?updates??

????????#?end-snippet-4??

????def?get_pseudo_likelihood_cost(self,?updates):??

????????"""Stochastic?approximation?to?the?pseudo-likelihood"""??

????????#?index?of?bit?i?in?expression?p(x_i?|?x_{\i})??

????????bit_i_idx?=?theano.shared(value=0,?name='bit_i_idx')??

????????#?binarize?the?input?image?by?rounding?to?nearest?integer??

????????xi?=?T.round(self.input)??

????????#?calculate?free?energy?for?the?given?bit?configuration??

????????fe_xi?=?self.free_energy(xi)??

????????#?flip?bit?x_i?of?matrix?xi?and?preserve?all?other?bits?x_{\i}??

????????#?Equivalent?to?xi[:,bit_i_idx]?=?1-xi[:,?bit_i_idx],?but?assigns??

????????#?the?result?to?xi_flip,?instead?of?working?in?place?on?xi.??

????????xi_flip?=?T.set_subtensor(xi[:,?bit_i_idx],?1?-?xi[:,?bit_i_idx])??

????????#?calculate?free?energy?with?bit?flipped??

????????fe_xi_flip?=?self.free_energy(xi_flip)??

????????#?equivalent?to?e^(-FE(x_i))?/?(e^(-FE(x_i))?+?e^(-FE(x_{\i})))??

????????cost?=?T.mean(self.n_visible?*?T.log(T.nnet.sigmoid(fe_xi_flip?-??

????????????????????????????????????????????????????????????fe_xi)))??

????????#?increment?bit_i_idx?%?number?as?part?of?updates??

????????updates[bit_i_idx]?=?(bit_i_idx?+?1)?%?self.n_visible??

????????return?cost??

????def?get_reconstruction_cost(self,?updates,?pre_sigmoid_nv):??

????????"""Approximation?to?the?reconstruction?error?

????????Note?that?this?function?requires?the?pre-sigmoid?activation?as?

????????input.??To?understand?why?this?is?so?you?need?to?understand?a?

????????bit?about?how?Theano?works.?Whenever?you?compile?a?Theano?

????????function,?the?computational?graph?that?you?pass?as?input?gets?

????????optimized?for?speed?and?stability.??This?is?done?by?changing?

????????several?parts?of?the?subgraphs?with?others.??One?such?

????????optimization?expresses?terms?of?the?form?log(sigmoid(x))?in?

????????terms?of?softplus.??We?need?this?optimization?for?the?

????????cross-entropy?since?sigmoid?of?numbers?larger?than?30.?(or?

????????even?less?then?that)?turn?to?1.?and?numbers?smaller?than?

????????-30.?turn?to?0?which?in?terms?will?force?theano?to?compute?

????????log(0)?and?therefore?we?will?get?either?-inf?or?NaN?as?

????????cost.?If?the?value?is?expressed?in?terms?of?softplus?we?do?not?

????????get?this?undesirable?behaviour.?This?optimization?usually?

????????works?fine,?but?here?we?have?a?special?case.?The?sigmoid?is?

????????applied?inside?the?scan?op,?while?the?log?is?

????????outside.?Therefore?Theano?will?only?see?log(scan(..))?instead?

????????of?log(sigmoid(..))?and?will?not?apply?the?wanted?

????????optimization.?We?can?not?go?and?replace?the?sigmoid?in?scan?

????????with?something?else?also,?because?this?only?needs?to?be?done?

????????on?the?last?step.?Therefore?the?easiest?and?more?efficient?way?

????????is?to?get?also?the?pre-sigmoid?activation?as?an?output?of?

????????scan,?and?apply?both?the?log?and?sigmoid?outside?scan?such?

????????that?Theano?can?catch?and?optimize?the?expression.?

????????"""??

????????cross_entropy?=?T.mean(??

????????????T.sum(??

????????????????self.input?*?T.log(T.nnet.sigmoid(pre_sigmoid_nv))?+??

????????????????(1?-?self.input)?*?T.log(1?-?T.nnet.sigmoid(pre_sigmoid_nv)),??

????????????????axis=1??

????????????)??

????????)??

????????return?cross_entropy??

#??

def?test_rbm(learning_rate=0.1,?training_epochs=15,??

?????????????dataset='mnist.pkl.gz',?batch_size=20,??

?????????????n_chains=20,?n_samples=10,?output_folder='rbm_plots',??

?????????????n_hidden=500):??

????datasets?=?load_data(dataset)??

????train_set_x,?train_set_y?=?datasets[0]??

????test_set_x,?test_set_y?=?datasets[2]??

????#計算訓練的批數??

????n_train_batches?=?train_set_x.get_value(borrow=True).shape[0]?/?batch_size??

????index?=?T.lscalar()????#?index?to?a?[mini]batch??

????x?=?T.matrix('x')??#?the?data?is?presented?as?rasterized?images??

????rng?=?numpy.random.RandomState(123)??

????theano_rng?=?RandomStreams(rng.randint(2?**?30))??

????persistent_chain?=?theano.shared(numpy.zeros((batch_size,?n_hidden),dtype=theano.config.floatX),borrow=True)??

????#網絡構建?，可視層神經元個數為28*28，隱藏層神經元為500??

????rbm?=?RBM(input=x,?n_visible=28?*?28,n_hidden=n_hidden,?numpy_rng=rng,?theano_rng=theano_rng)??

????#?get?the?cost?and?the?gradient?corresponding?to?one?step?of?CD-15??

????cost,?updates?=?rbm.get_cost_updates(lr=learning_rate,persistent=persistent_chain,?k=15)??

????#################################??

????#?????Training?the?RBM??????????#??

????#################################??

????if?not?os.path.isdir(output_folder):??

????????os.makedirs(output_folder)??

????os.chdir(output_folder)??

????#?start-snippet-5??

????#?it?is?ok?for?a?theano?function?to?have?no?output??

????#?the?purpose?of?train_rbm?is?solely?to?update?the?RBM?parameters??

????train_rbm?=?theano.function([index],cost,updates=updates,givens={x:?train_set_x[index?*?batch_size:?(index?+?1)?*?batch_size]},name='train_rbm')??

????plotting_time?=?0.??

????start_time?=?timeit.default_timer()??

????#?go?through?training?epochs??

????for?epoch?in?xrange(training_epochs):??

????????#?go?through?the?training?set??

????????mean_cost?=?[]??

????????for?batch_index?in?xrange(n_train_batches):??

????????????mean_cost?+=?[train_rbm(batch_index)]??

????????print?'Training?epoch?%d,?cost?is?'?%?epoch,?numpy.mean(mean_cost)??

????????#?Plot?filters?after?each?training?epoch??

????????plotting_start?=?timeit.default_timer()??

????????#?Construct?image?from?the?weight?matrix??

????????image?=?Image.fromarray(??

????????????tile_raster_images(??

????????????????X=rbm.W.get_value(borrow=True).T,??

????????????????img_shape=(28,?28),??

????????????????tile_shape=(10,?10),??

????????????????tile_spacing=(1,?1)??

????????????)??

????????)??

????????image.save('filters_at_epoch_%i.png'?%?epoch)??

????????plotting_stop?=?timeit.default_timer()??

????????plotting_time?+=?(plotting_stop?-?plotting_start)??

????end_time?=?timeit.default_timer()??

????pretraining_time?=?(end_time?-?start_time)?-?plotting_time??

????print?('Training?took?%f?minutes'?%?(pretraining_time?/?60.))??

????#?end-snippet-5?start-snippet-6??

????#################################??

????#?????Sampling?from?the?RBM?????#??

????#################################??

????#?find?out?the?number?of?test?samples??

????number_of_test_samples?=?test_set_x.get_value(borrow=True).shape[0]??

????#?pick?random?test?examples,?with?which?to?initialize?the?persistent?chain??

????test_idx?=?rng.randint(number_of_test_samples?-?n_chains)??

????persistent_vis_chain?=?theano.shared(??

????????numpy.asarray(??

????????????test_set_x.get_value(borrow=True)[test_idx:test_idx?+?n_chains],??

????????????dtype=theano.config.floatX??

????????)??

????)??

????#?end-snippet-6?start-snippet-7??

????plot_every?=?1000??

????#?define?one?step?of?Gibbs?sampling?(mf?=?mean-field)?define?a??

????#?function?that?does?`plot_every`?steps?before?returning?the??

????#?sample?for?plotting??

????(??

????????[??

????????????presig_hids,??

????????????hid_mfs,??

????????????hid_samples,??

????????????presig_vis,??

????????????vis_mfs,??

????????????vis_samples??

????????],??

????????updates??

????)?=?theano.scan(??

????????rbm.gibbs_vhv,??

????????outputs_info=[None,?None,?None,?None,?None,?persistent_vis_chain],??

????????n_steps=plot_every??

????)??

????#?add?to?updates?the?shared?variable?that?takes?care?of?our?persistent??

????#?chain?:.??

????updates.update({persistent_vis_chain:?vis_samples[-1]})??

????#?construct?the?function?that?implements?our?persistent?chain.??

????#?we?generate?the?"mean?field"?activations?for?plotting?and?the?actual??

????#?samples?for?reinitializing?the?state?of?our?persistent?chain??

????sample_fn?=?theano.function(??

????????[],??

????????[??

????????????vis_mfs[-1],??

????????????vis_samples[-1]??

????????],??

????????updates=updates,??

????????name='sample_fn'??

????)??

????#?create?a?space?to?store?the?image?for?plotting?(?we?need?to?leave??

????#?room?for?the?tile_spacing?as?well)??

????image_data?=?numpy.zeros(??

????????(29?*?n_samples?+?1,?29?*?n_chains?-?1),??

????????dtype='uint8'??

????)??

????for?idx?in?xrange(n_samples):??

????????#?generate?`plot_every`?intermediate?samples?that?we?discard,??

????????#?because?successive?samples?in?the?chain?are?too?correlated??

????????vis_mf,?vis_sample?=?sample_fn()??

????????print?'?...?plotting?sample?',?idx??

????????image_data[29?*?idx:29?*?idx?+?28,?:]?=?tile_raster_images(??

????????????X=vis_mf,??

????????????img_shape=(28,?28),??

????????????tile_shape=(1,?n_chains),??

????????????tile_spacing=(1,?1)??

????????)??

????#?construct?image??

????image?=?Image.fromarray(image_data)??

????image.save('samples.png')??

????#?end-snippet-7??

????os.chdir('../')??

if?__name__?==?'__main__':??

????test_rbm()??

from:?http://blog.csdn.net/hjimce/article/details/48949197 《新程序員》：云原生和全面數字化實踐50位技術專家共同創作，文字、視頻、音頻交互閱讀

總結

以上是生活随笔為你收集整理的深度学习（八）RBM受限波尔兹曼机学习-未完待续的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇：深度学习（六）caffe入门学习
下一篇：梳理百年深度学习发展史-七月在线机器学习