tensorflow学习(6.Alexnet实现及猫狗分类)
有關Alexnet介紹的部分可以見:https://blog.csdn.net/qq_26499769/article/details/82928164
然后自己結合教程嘗試著實現了一下:
from skimage import io,transform import glob import numpy as np import tensorflow as tf #from alexnet import alexnetdef alexnet(x, keep_prob, num_classes):# conv1with tf.name_scope('conv1') as scope:kernel = tf.Variable(tf.truncated_normal([11, 11, 3, 96], dtype=tf.float32,stddev=1e-1), name='weights')conv = tf.nn.conv2d(x, kernel, [1, 4, 4, 1], padding='SAME')biases = tf.Variable(tf.constant(0.0, shape=[96], dtype=tf.float32),trainable=True, name='biases')bias = tf.nn.bias_add(conv, biases)conv1 = tf.nn.relu(bias, name=scope)# lrn1with tf.name_scope('lrn1') as scope:lrn1 = tf.nn.local_response_normalization(conv1,alpha=1e-4,beta=0.75,depth_radius=2,bias=2.0)# pool1with tf.name_scope('pool1') as scope:pool1 = tf.nn.max_pool(lrn1,ksize=[1, 3, 3, 1],strides=[1, 2, 2, 1],padding='VALID')# conv2with tf.name_scope('conv2') as scope:pool1_groups = tf.split(axis=3, value=pool1, num_or_size_splits=2)kernel = tf.Variable(tf.truncated_normal([5, 5, 48, 256], dtype=tf.float32,stddev=1e-1), name='weights')kernel_groups = tf.split(axis=3, value=kernel, num_or_size_splits=2)conv_up = tf.nn.conv2d(pool1_groups[0], kernel_groups[0], [1, 1, 1, 1], padding='SAME')conv_down = tf.nn.conv2d(pool1_groups[1], kernel_groups[1], [1, 1, 1, 1], padding='SAME')biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),trainable=True, name='biases')biases_groups = tf.split(axis=0, value=biases, num_or_size_splits=2)bias_up = tf.nn.bias_add(conv_up, biases_groups[0])bias_down = tf.nn.bias_add(conv_down, biases_groups[1])bias = tf.concat(axis=3, values=[bias_up, bias_down])conv2 = tf.nn.relu(bias, name=scope)# lrn2with tf.name_scope('lrn2') as scope:lrn2 = tf.nn.local_response_normalization(conv2,alpha=1e-4,beta=0.75,depth_radius=2,bias=2.0)# pool2with tf.name_scope('pool2') as scope:pool2 = tf.nn.max_pool(lrn2,ksize=[1, 3, 3, 1],strides=[1, 2, 2, 1],padding='VALID')# conv3with tf.name_scope('conv3') as scope:kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 384],dtype=tf.float32,stddev=1e-1), name='weights')conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding='SAME')biases = tf.Variable(tf.constant(0.0, shape=[384], dtype=tf.float32),trainable=True, name='biases')bias = tf.nn.bias_add(conv, biases)conv3 = tf.nn.relu(bias, name=scope)# conv4with tf.name_scope('conv4') as scope:conv3_groups = tf.split(axis=3, value=conv3, num_or_size_splits=2)kernel = tf.Variable(tf.truncated_normal([3, 3, 192, 384],dtype=tf.float32,stddev=1e-1), name='weights')kernel_groups = tf.split(axis=3, value=kernel, num_or_size_splits=2)conv_up = tf.nn.conv2d(conv3_groups[0], kernel_groups[0], [1, 1, 1, 1], padding='SAME')conv_down = tf.nn.conv2d(conv3_groups[1], kernel_groups[1], [1, 1, 1, 1], padding='SAME')biases = tf.Variable(tf.constant(0.0, shape=[384], dtype=tf.float32),trainable=True, name='biases')biases_groups = tf.split(axis=0, value=biases, num_or_size_splits=2)bias_up = tf.nn.bias_add(conv_up, biases_groups[0])bias_down = tf.nn.bias_add(conv_down, biases_groups[1])bias = tf.concat(axis=3, values=[bias_up, bias_down])conv4 = tf.nn.relu(bias, name=scope)# conv5with tf.name_scope('conv5') as scope:conv4_groups = tf.split(axis=3, value=conv4, num_or_size_splits=2)kernel = tf.Variable(tf.truncated_normal([3, 3, 192, 256],dtype=tf.float32,stddev=1e-1), name='weights')kernel_groups = tf.split(axis=3, value=kernel, num_or_size_splits=2)conv_up = tf.nn.conv2d(conv4_groups[0], kernel_groups[0], [1, 1, 1, 1], padding='SAME')conv_down = tf.nn.conv2d(conv4_groups[1], kernel_groups[1], [1, 1, 1, 1], padding='SAME')biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),trainable=True, name='biases')biases_groups = tf.split(axis=0, value=biases, num_or_size_splits=2)bias_up = tf.nn.bias_add(conv_up, biases_groups[0])bias_down = tf.nn.bias_add(conv_down, biases_groups[1])bias = tf.concat(axis=3, values=[bias_up, bias_down])conv5 = tf.nn.relu(bias, name=scope)# pool5with tf.name_scope('pool5') as scope:pool5 = tf.nn.max_pool(conv5,ksize=[1, 3, 3, 1],strides=[1, 2, 2, 1],padding='VALID', )# flattened6with tf.name_scope('flattened6') as scope:flattened = tf.reshape(pool5, shape=[-1, 6 * 6 * 256])# fc6with tf.name_scope('fc6') as scope:weights = tf.Variable(tf.truncated_normal([6 * 6 * 256, 4096],dtype=tf.float32,stddev=1e-1), name='weights')biases = tf.Variable(tf.constant(0.0, shape=[4096], dtype=tf.float32),trainable=True, name='biases')bias = tf.nn.xw_plus_b(flattened, weights, biases)fc6 = tf.nn.relu(bias)# dropout6with tf.name_scope('dropout6') as scope:dropout6 = tf.nn.dropout(fc6, keep_prob)# fc7with tf.name_scope('fc7') as scope:weights = tf.Variable(tf.truncated_normal([4096, 4096],dtype=tf.float32,stddev=1e-1), name='weights')biases = tf.Variable(tf.constant(0.0, shape=[4096], dtype=tf.float32),trainable=True, name='biases')bias = tf.nn.xw_plus_b(dropout6, weights, biases)fc7 = tf.nn.relu(bias)# dropout7with tf.name_scope('dropout7') as scope:dropout7 = tf.nn.dropout(fc7, keep_prob)# fc8with tf.name_scope('fc8') as scope:weights = tf.Variable(tf.truncated_normal([4096, num_classes],dtype=tf.float32,stddev=1e-1), name='weights')biases = tf.Variable(tf.constant(0.0, shape=[num_classes], dtype=tf.float32),trainable=True, name='biases')fc8 = tf.nn.xw_plus_b(dropout7, weights, biases)return fc8#將所有的圖片重新設置尺寸為227*227*3 w = 227 h = 227 c = 3 num_classes=2 learning_rate=0.5#mnist數據集中訓練數據和測試數據保存地址 train_path = "train/" test_path = "test/"#讀取圖片及其標簽函數 def read_image(path,start_num,end_num):images = []labels = []for animal in ['cat','dog']:for img_num in range(start_num,end_num,1):#獲取指定目錄下的所有圖片img=path + '/' + animal + '.'+str(img_num+1)+'.jpg'print("reading the image:%s"%img)image = io.imread(img)image = transform.resize(image,(w,h,c))images.append(image)if animal=='cat':labels.append([0,1])if animal == 'dog':labels.append([1,0])return np.asarray(images,dtype=np.float32),np.asarray(labels,dtype=np.int32)#array和asarray都可以將結構數據轉化為ndarray,但是主要區別就是當數據源是ndarray時,array仍然會copy出一個副本,占用新的內存,但asarray不會#讀取訓練數據及測試數據 train_data,train_label = read_image(train_path,1,1+500) test_data,test_label = read_image(train_path,1001,1001+500)#打亂訓練數據及測試數據 train_image_num = len(train_data) train_image_index = np.arange(train_image_num)#arange(start,stop, step, dtype=None)根據start與stop指定的范圍以及step設定的步長,生成一個 ndarray。 np.random.shuffle(train_image_index)#亂序函數,多維時只對一維亂序,說明見https://blog.csdn.net/jasonzzj/article/details/53932645 train_data = train_data[train_image_index]#亂序后的數據 train_label = train_label[train_image_index]test_image_num = len(test_data) test_image_index = np.arange(test_image_num) np.random.shuffle(test_image_index) test_data = test_data[test_image_index] test_label = test_label[test_image_index]#alexnet調用x = tf.placeholder(tf.float32,[None,w,h,c],name='x') y_ = tf.placeholder(tf.int32,[None,num_classes],name='y_') keep_prob = tf.placeholder(tf.float32) fc8 = alexnet(x, keep_prob, num_classes)# loss with tf.name_scope('loss'):loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=fc8,labels=y_)) # optimizer with tf.name_scope('optimizer'):optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)train_op = optimizer.minimize(loss_op)# accuracy with tf.name_scope("accuracy"):correct_pred = tf.equal(tf.argmax(fc8, 1), tf.argmax(y_, 1))accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))init = tf.global_variables_initializer()# Tensorboard filewriter_path='alexnet_tmp/tensorboard' tf.summary.scalar('loss', loss_op) tf.summary.scalar('accuracy', accuracy) merged_summary = tf.summary.merge_all() writer = tf.summary.FileWriter(filewriter_path)# saver saver = tf.train.Saver()#每次獲取batch_size個樣本進行訓練或測試 def get_batch(data,label,batch_size):for start_index in range(0,len(data)-batch_size+1,batch_size):slice_index = slice(start_index,start_index+batch_size)yield data[slice_index],label[slice_index]#創建Session會話 with tf.Session() as sess:#初始化所有變量(權值,偏置等)#將所有樣本訓練10次,每次訓練中以64個為一組訓練完所有樣本。#train_num可以設置大一些。train_num = 10batch_size = 64sess.run(init)j = 0for i in range(train_num):print("batchnum:",i)train_loss,train_acc,batch_num = 0, 0, 0for train_data_batch,train_label_batch in get_batch(train_data,train_label,batch_size):_,err,acc = sess.run([train_op,loss_op,accuracy],feed_dict={x:train_data_batch,y_:train_label_batch,keep_prob:0.5})train_loss+=err;train_acc+=acc;batch_num+=1j=j+1result=sess.run(merged_summary,feed_dict={x: train_data_batch, y_: train_label_batch, keep_prob: 0.5})writer.add_summary(result, j)print("train loss:",train_loss/(batch_num))print("train acc:",train_acc/(batch_num))test_loss,test_acc,batch_num = 0, 0, 0for test_data_batch,test_label_batch in get_batch(test_data,test_label,batch_size):err,acc = sess.run([loss_op,accuracy],feed_dict={x:test_data_batch,y_:test_label_batch,keep_prob:1})test_loss+=err;test_acc+=acc;batch_num+=1print("test loss:",test_loss/(batch_num))print("test acc:",test_acc/(batch_num))數據集的來源:
https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/data(感覺把資源放在網盤根本下不下來)
訓練結果準確度只有50%,可能是數據使用太少的原因
簡單說明一下問題:
1.代碼的注釋不是很好,有時間會加上詳細的注釋
2.由于電腦本身的限制,沒有使用很多數據去訓練,結果其實是很差的,沒有起到驗證結果,有機會會驗證代碼
3.分析代碼本身的一些問題,因為數據量是比較多的,所以說一次訓練或者數據的讀入有時是內存是不夠的,正確的做法應該是將數據分次輸入,每次結束訓練后,將參數保存,再次讀入,利用新的數據訓練
4.Alexnet的論文沒有進行閱讀,找時間會將論文詳細進行閱讀
由于平時時間不是很多,博客的更新內容是平時想到的一些問題,如果感覺講解很亂請諒解。以后有機會可能會對自己關注的問題進行博客的更新,例如,變尺寸圖像的輸入,不均衡樣本的問題,少label的樣本問題,小目標檢測,網絡的優化及過擬合問題,擬合可變參數的系統(如PSF去卷積問題,核在變化的情況),在不進行大量樣本擴充和計算量提升的情況下,提高網絡結果的準確性和抑制過擬合情形,實際項目的實時性要求和輕量化工作。如果有相應的見解,很希望能共同學習。
?
總結
以上是生活随笔為你收集整理的tensorflow学习(6.Alexnet实现及猫狗分类)的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: Alexnet结构及代码
- 下一篇: tensorflow学习(7. GAN实