Unet项目解析(4): ./src/RetinaNN_predict.py
項(xiàng)目GitHub主頁:https://github.com/orobix/retina-unet
參考論文:Retina blood vessel segmentation with a convolution neural network (U-net)?Retina blood vessel segmentation with a convolution neural network (U-net)
1.導(dǎo)入相關(guān)模塊
#Python import numpy as np import configparser from matplotlib import pyplot as plt #Keras from keras.models import model_from_json導(dǎo)入sklearn模塊,關(guān)于sklearn模塊的詳細(xì)說明可以參考fuqiuai的博客,也可以參考官網(wǎng)的使用說明。
sklearn.metric提供了一些函數(shù),用來計(jì)算真實(shí)值與預(yù)測(cè)值之間的預(yù)測(cè)誤差;這里用的評(píng)價(jià)標(biāo)準(zhǔn)主要集中如下幾個(gè)方面:
#scikit learn from sklearn.metrics import roc_curve from sklearn.metrics import roc_auc_score from sklearn.metrics import confusion_matrix from sklearn.metrics import precision_recall_curve from sklearn.metrics import jaccard_similarity_score from sklearn.metrics import f1_score導(dǎo)入依賴的處理腳本文件。
import sys sys.path.insert(0, '/home/shenziehng/anaconda/SpyderProject/Retina_NN/lib/') # help_functions.py from help_functions import * # extract_patches.py from extract_patches import recompone from extract_patches import recompone_overlap from extract_patches import paint_border from extract_patches import kill_border from extract_patches import pred_only_FOV from extract_patches import get_data_testing from extract_patches import get_data_testing_overlap # pre_processing.py from pre_processing import my_PreProc2. 加載配置文件,解析參數(shù)
config = configparser.RawConfigParser() config.read('/home/shenziheng/SpyderProject/Retina_NN/configuration.txt') path_data = config.get('data paths', 'path_local') #數(shù)據(jù)路徑 DRIVE_test_imgs_original = path_data + config.get('data paths', 'test_imgs_original') #測(cè)試集圖像封裝文件 test_imgs_orig = load_hdf5(DRIVE_test_imgs_original) #測(cè)試集圖像 full_img_height = test_imgs_orig.shape[2] full_img_width = test_imgs_orig.shape[3]DRIVE_test_border_masks = path_data + config.get('data paths', 'test_border_masks') #測(cè)試集掩膜封裝文件 test_border_masks = load_hdf5(DRIVE_test_border_masks) # 圖像塊的維度 patch_height = int(config.get('data attributes', 'patch_height')) patch_width = int(config.get('data attributes', 'patch_width')) # 圖像分塊的跳躍步長 stride_height = int(config.get('testing settings', 'stride_height')) stride_width = int(config.get('testing settings', 'stride_width')) assert (stride_height < patch_height and stride_width < patch_width)name_experiment = config.get('experiment name', 'name') path_experiment = './' +name_experiment +'/'Imgs_to_test = int(config.get('testing settings', 'full_images_to_test')) # 20張圖像全部進(jìn)行預(yù)測(cè)N_visual = int(config.get('testing settings', 'N_group_visual')) #1average_mode = config.getboolean('testing settings', 'average_mode') # average=True#ground truth gtruth= path_data + config.get('data paths', 'test_groundTruth') #測(cè)試集金標(biāo)準(zhǔn)封裝文件 img_truth= load_hdf5(gtruth)visualize(group_images(test_imgs_orig[0:20,:,:,:],5),'original').show() #顯示所有的測(cè)試圖像 visualize(group_images(test_border_masks[0:20,:,:,:],5),'borders').show()#顯示所有的掩膜圖像 visualize(group_images(img_truth[0:20,:,:,:],5),'gtruth').show() #顯示所有的金標(biāo)準(zhǔn)圖像3. 圖像分塊、預(yù)測(cè)
patches_imgs_test = None masks_test = None patches_masks_test = None new_height = None new_width = None if average_mode == True:patches_imgs_test, new_height, new_width, masks_test = get_data_testing_overlap(DRIVE_test_imgs_original = DRIVE_test_imgs_original, #originalDRIVE_test_groudTruth = path_data + config.get('data paths', 'test_groundTruth'), #masksImgs_to_test = int(config.get('testing settings', 'full_images_to_test')),patch_height = patch_height,patch_width = patch_width,stride_height = stride_height,stride_width = stride_width) else:patches_imgs_test, patches_masks_test = get_data_testing(DRIVE_test_imgs_original = DRIVE_test_imgs_original, #originalDRIVE_test_groudTruth = path_data + config.get('data paths', 'test_groundTruth'), #masksImgs_to_test = int(config.get('testing settings', 'full_images_to_test')),patch_height = patch_height,patch_width = patch_width,)前者是采用覆蓋式的圖像塊獲取方法,后者就是簡單的拼湊式。
#================ Run the prediction of the patches ================================== best_last = config.get('testing settings', 'best_last') #加載已經(jīng)訓(xùn)練好的模型 和 相關(guān)的權(quán)重 model = model_from_json(open(path_experiment+name_experiment +'_architecture.json').read()) model.load_weights(path_experiment+name_experiment + '_'+best_last+'_weights.h5') #進(jìn)行模型預(yù)測(cè) predictions = model.predict(patches_imgs_test, batch_size=32, verbose=1) # verbose = 1 采用進(jìn)度條形式進(jìn)行顯示 print ("predicted images size :") print (predictions.shape)#===== Convert the prediction arrays in corresponding images pred_patches = pred_to_imgs(predictions, patch_height, patch_width, "original")這里有一個(gè)非常重要的函數(shù),就是pred_to_imgs, 后面我會(huì)專門寫一遍博客仔細(xì)研究一下分塊方法、整合方法、預(yù)測(cè)結(jié)果還原成圖像以及可視化。
# 對(duì)于預(yù)測(cè)的數(shù)據(jù)將掩膜外的數(shù)據(jù)清零 kill_border(pred_imgs, test_border_masks) ## back to original dimensions orig_imgs = orig_imgs[:,:,0:full_img_height,0:full_img_width] pred_imgs = pred_imgs[:,:,0:full_img_height,0:full_img_width] gtruth_masks = gtruth_masks[:,:,0:full_img_height,0:full_img_width] print ("Orig imgs shape: " +str(orig_imgs.shape)) print ("pred imgs shape: " +str(pred_imgs.shape)) print ("Gtruth imgs shape: " +str(gtruth_masks.shape)) #可視化結(jié)果 對(duì)比預(yù)測(cè) 與 金標(biāo)準(zhǔn) assert (orig_imgs.shape[0]==pred_imgs.shape[0] and orig_imgs.shape[0]==gtruth_masks.shape[0]) N_predicted = orig_imgs.shape[0] group = N_visual assert (N_predicted%group==0) for i in range(int(N_predicted/group)):orig_stripe = group_images(orig_imgs[i*group:(i*group)+group,:,:,:],group)masks_stripe = group_images(gtruth_masks[i*group:(i*group)+group,:,:,:],group)pred_stripe = group_images(pred_imgs[i*group:(i*group)+group,:,:,:],group)total_img = np.concatenate((orig_stripe,masks_stripe,pred_stripe),axis=0)visualize(total_img,path_experiment+name_experiment +"_Original_GroundTruth_Prediction"+str(i)).show()4. 對(duì)深度模型進(jìn)行評(píng)價(jià)
作者主要用了sklearn模塊的中模型評(píng)價(jià)函數(shù), sklearn.metrics。
?
- sklearn.metrics.roc_curve : 受試者工作曲線/準(zhǔn)確性評(píng)價(jià)
計(jì)算受試者工作特性曲線Receiver Operating Characteristic, ROC。只能應(yīng)用于二分類問題。
ROC曲線指受試者工作特征曲線/接收器操作特性(receiver operating characteristic,ROC)曲線,是反映靈敏性和特效性連續(xù)變量的綜合指標(biāo),是用構(gòu)圖法揭示敏感性和特異性的相互關(guān)系,它通過將連續(xù)變量設(shè)定出多個(gè)不同的臨界值,從而計(jì)算出一系列敏感性和特異性。ROC曲線是根據(jù)一系列不同的二分類方式(分界值或決定閾),以真正例率(也就是靈敏度)(True Positive Rate,TPR)為縱坐標(biāo),假正例率(1-特效性)(False Positive Rate,FPR)為橫坐標(biāo)繪制的曲線。
ROC觀察模型正確地識(shí)別正例的比例與模型錯(cuò)誤地把負(fù)例數(shù)據(jù)識(shí)別成正例的比例之間的權(quán)衡。TPR的增加以FPR的增加為代價(jià)。ROC曲線下的面積是模型準(zhǔn)確率的度量,AUC(Area under roccurve)。
縱坐標(biāo):真正率(True Positive Rate , TPR)或靈敏度(sensitivity):TPR = TP /(TP + FN) ?(正樣本預(yù)測(cè)結(jié)果數(shù) / 正樣本實(shí)際數(shù))
橫坐標(biāo):假正率(False Positive Rate , FPR):FPR = FP /(FP + TN) (被預(yù)測(cè)為正的負(fù)樣本結(jié)果數(shù) /負(fù)樣本實(shí)際數(shù))
該函數(shù)返回這三個(gè)變量:fpr,tpr,和閾值thresholds; 這里理解thresholds: 分類器的一個(gè)重要功能“概率輸出”,即表示分類器認(rèn)為某個(gè)樣本具有多大的概率屬于正樣本(或負(fù)樣本)。
Score表示每個(gè)測(cè)試樣本屬于正樣本的概率。接下來,從高到低,依次將Score值作為閾值threshold,當(dāng)測(cè)試樣本屬于正樣本的概率大于或等于這個(gè)threshold時(shí),我們認(rèn)為它為正樣本,否則為負(fù)樣本。每次選取一個(gè)不同的threshold,我們就可以得到一組FPR和TPR,即ROC曲線上的一點(diǎn)。當(dāng)我們將threshold設(shè)置為1和0時(shí),分別可以得到ROC曲線上的(0,0)和(1,1)兩個(gè)點(diǎn)。將這些(FPR,TPR)對(duì)連接起來,就得到了ROC曲線。當(dāng)threshold取值越多,ROC曲線越平滑。其實(shí),我們并不一定要得到每個(gè)測(cè)試樣本是正樣本的概率值,只要得到這個(gè)分類器對(duì)該測(cè)試樣本的“評(píng)分值”即可(評(píng)分值并不一定在(0,1)區(qū)間)。評(píng)分越高,表示分類器越肯定地認(rèn)為這個(gè)測(cè)試樣本是正樣本,而且同時(shí)使用各個(gè)評(píng)分值作為threshold。
#====== Evaluate the results print ("\n\n======== Evaluate the results =======================") # 只預(yù)測(cè)FOV內(nèi)部的圖像 y_scores, y_true = pred_only_FOV(pred_imgs,gtruth_masks, test_border_masks) print ("Calculating results only inside the FOV:") print ("y scores pixels: " +str(y_scores.shape[0]) +" (radius 270: 270*270*3.14==228906), including background around retina: " +str(pred_imgs.shape[0]*pred_imgs.shape[2]*pred_imgs.shape[3]) +" (584*565==329960)" print ("y true pixels: " +str(y_true.shape[0]) +" (radius 270: 270*270*3.14==228906), including background around retina: " +str(gtruth_masks.shape[2]*gtruth_masks.shape[3]*gtruth_masks.shape[0])+" (584*565==329960)"# ROC曲線下的面積 fpr, tpr, thresholds = roc_curve((y_true), y_scores) AUC_ROC = roc_auc_score(y_true, y_scores) # test_integral = np.trapz(tpr,fpr) #trapz is numpy integration print ("\n Area under the ROC curve: " +str(AUC_ROC)) roc_curve =plt.figure() plt.plot(fpr,tpr,'-',label='Area Under the Curve (AUC = %0.4f)' % AUC_ROC) plt.title('ROC curve') plt.xlabel("FPR (False Positive Rate)") plt.ylabel("TPR (True Positive Rate)") plt.legend(loc="lower right") plt.savefig(path_experiment+"ROC.png")?
- sklearn.metrics.precision_recall_curve:精確度-召回率曲線
以推薦算法為例:
A:檢索到的,相關(guān)的 ? ? ? ? ? ? ? ? ? ?(搜到的也想要的)
B:檢索到的,但是不相關(guān)的 ? ? ? ? ? (搜到的但沒用的)
C:未檢索到的,但卻是相關(guān)的 ? ? ? ?(沒搜到,然而實(shí)際上想要的)
D:未檢索到的,也不相關(guān)的 ? ? ? ? ? (沒搜到也沒用的)
如果我們希望:被檢索到的內(nèi)容越多越好,是追求“查全率”,即A/(A+C),越大越好。
如果我們希望:檢索到的文檔中,真正想要的、也就是相關(guān)的越多越好,不相關(guān)的越少越好,是追求“準(zhǔn)確率”,即A/(A+B),越大越好。
#Precision-recall curve precision, recall, thresholds = precision_recall_curve(y_true, y_scores) precision = np.fliplr([precision])[0] #so the array is increasing (you won't get negative AUC) recall = np.fliplr([recall])[0] #so the array is increasing (you won't get negative AUC) AUC_prec_rec = np.trapz(precision,recall) print "\nArea under Precision-Recall curve: " +str(AUC_prec_rec) prec_rec_curve = plt.figure() plt.plot(recall,precision,'-',label='Area Under the Curve (AUC = %0.4f)' % AUC_prec_rec) plt.title('Precision - Recall curve') plt.xlabel("Recall") plt.ylabel("Precision") plt.legend(loc="lower right") plt.savefig(path_experiment+"Precision_recall.png")- sklearn.metrics.confusion_matrix : 混淆矩陣
混淆矩陣是對(duì)有監(jiān)督學(xué)習(xí)分類算法準(zhǔn)確率進(jìn)行評(píng)估的工具。通過將模型預(yù)測(cè)的數(shù)據(jù)與測(cè)試數(shù)據(jù)進(jìn)行對(duì)比,使用各種指標(biāo)對(duì)模型的分類效果進(jìn)行度量。
#Confusion matrix threshold_confusion = 0.5 print ("\nConfusion matrix: Costum threshold (for positive) of " +str(threshold_confusion)) y_pred = np.empty((y_scores.shape[0])) for i in range(y_scores.shape[0]):if y_scores[i]>=threshold_confusion:y_pred[i]=1else:y_pred[i]=0 confusion = confusion_matrix(y_true, y_pred) print (confusion) accuracy = 0 if float(np.sum(confusion))!=0:accuracy = float(confusion[0,0]+confusion[1,1])/float(np.sum(confusion)) print ("Global Accuracy: " +str(accuracy)) specificity = 0 if float(confusion[0,0]+confusion[0,1])!=0:specificity = float(confusion[0,0])/float(confusion[0,0]+confusion[0,1]) print ("Specificity: " +str(specificity)) sensitivity = 0 if float(confusion[1,1]+confusion[1,0])!=0:sensitivity = float(confusion[1,1])/float(confusion[1,1]+confusion[1,0]) print ("Sensitivity: " +str(sensitivity)) precision = 0 if float(confusion[1,1]+confusion[0,1])!=0:precision = float(confusion[1,1])/float(confusion[1,1]+confusion[0,1]) print ("Precision: " +str(precision))- sklearn.metrics. jaccard_similarity_score : jacaard相似度
jaccard index又稱為jaccard similarity coefficient用于比較有限樣本集之間的相似性和差異性。定義:
給定兩個(gè)集合A,B jaccard 系數(shù)定義為A與B交集的大小與并集大小的比值。jaccard值越大說明相似度越高。
#Jaccard similarity index jaccard_index = jaccard_similarity_score(y_true, y_pred, normalize=True) print ("\nJaccard similarity score: " +str(jaccard_index))- sklearn.metrics.f1_score
F1-score: ?是準(zhǔn)確率與召回率的綜合。 可以認(rèn)為是平均效果
#F1 score F1_score = f1_score(y_true, y_pred, labels=None, average='binary', sample_weight=None) print ("\nF1 score (F-measure): " +str(F1_score))?
最后保存數(shù)據(jù)結(jié)果。
#Save the results file_perf = open(path_experiment+'performances.txt', 'w') file_perf.write("Area under the ROC curve: "+str(AUC_ROC)+ "\nArea under Precision-Recall curve: " +str(AUC_prec_rec)+ "\nJaccard similarity score: " +str(jaccard_index)+ "\nF1 score (F-measure): " +str(F1_score)+"\n\nConfusion matrix:"+str(confusion)+"\nACCURACY: " +str(accuracy)+"\nSENSITIVITY: " +str(sensitivity)+"\nSPECIFICITY: " +str(specificity)+"\nPRECISION: " +str(precision)) file_perf.close()總結(jié)
以上是生活随笔為你收集整理的Unet项目解析(4): ./src/RetinaNN_predict.py的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: SOCKET入门最简单的程序啊
- 下一篇: 使用临界段实现优化的进程间同步对象-原理