pytorch(1)--分类图片
一、前言
? ? 剛畢業用的是大火大熱的caffe,如今卻逐漸無人問津,2020年轉向pytorch,在此記錄下搬過的磚...
? ? 首篇記錄下pytorch 分類圖片代碼
二、訓練集準備
? ? 本次對7類圖片做訓練和預測,7類圖片分別保存于7個文件夾內,7個文件夾保存于dataset文件夾內,如下
? ? 以下代碼 genTxt.py 生成用于 訓練、驗證、測試的txt文件
#coding=utf-8 #genTxt.pyimport os,cv2 import random,shutil,time import numpy as np import xml.etree.ElementTree as ETdef writeTxt( root , f , pic_list ,txt_name):for pic in pic_list:save_path = "dataset"+"/"+f+"/"+ pic+" "+str(int(f)-1)+"\n"with open(txt_name,"a") as ftxt:ftxt.write(save_path)root = r"."folder = ["1","2","3","4","5","6","7"]val_thr = 0.1 test_thr = 0.1if os.path.exists("val.txt"): os.remove("val.txt") if os.path.exists("test.txt"): os.remove("test.txt") if os.path.exists("train.txt"): os.remove("train.txt")for f in folder:pic_root = os.path.join( root , f )jpeg_list = os.listdir(pic_root)random.shuffle(jpeg_list)val_num = int( val_thr*len(jpeg_list) )test_num = int( test_thr*len(jpeg_list) )val_list = jpeg_list[0:val_num]test_list = jpeg_list[val_num:val_num+test_num]train_list = jpeg_list[val_num+test_num:]writeTxt( root , f , val_list ,"val.txt")writeTxt( root , f , test_list ,"test.txt")writeTxt( root , f , train_list ,"train.txt")? ? 運行生成txt文件,如下:
三、編寫生成器
? 數據集準備好后,需要編寫生成器以便可讀取數據,回到dataset文件夾目錄,編寫?datasetLoader.py
#coding=utf-8 #datasetLoader.pyfrom torch.utils.data import Dataset,DataLoader from PIL import Image from torchvision import transformsimport numpy as np import os#Write my own dataset loader # -----------------ready the dataset-------------------------- def default_loader(path):src = Image.open(path)resized = src.resize((48,48)) #縮放圖像大小return resized.convert('RGB') # 3通道必須用這個#return Image.open(path) #經過np.array()轉換后,可看到shape為(h,w,c),np.ndarray格式為(h,w,c) class MyDataset(Dataset):#必須重寫 __init__ , __getitem__, __len__ 三個函數#__init__:從txt文件中讀路徑和標簽數據到self.imgs保存,初始化transform、target_transform、loader#__getitem__:編寫支持數據集索引的函數,輸入index ,從self.imgs讀圖像,返回 圖像數據,標簽數據#__len__:返回數據集的大小 ,即len(self.imgs)def __init__(self, txt, transform=None, target_transform=None, loader=default_loader):fh = open(txt, 'r')imgs = []for line in fh:line = line.strip('\n')line = line.rstrip() #刪除 本行string 字符串末尾的指定字符if len(line)==0:continue words = line.split() #按空格分割imgs.append((line[:-2],int(words[-1]))) #并不是圖像,每行只是圖像路徑和標簽,真正圖像數據在__getitem__讀取self.imgs = imgsself.transform = transformself.target_transform = target_transformself.loader = loaderdef __getitem__(self, index):fn, label = self.imgs[index]img = self.loader(fn)if self.transform is not None:img = self.transform(img)return img,labeldef __len__(self):return len(self.imgs)def calMeanStd(root,sample_size = 100): #從訓練數據集隨機挑選sample_size個樣本計算train_data = MyDataset(txt=root + 'train.txt', transform=transforms.ToTensor())train_loader = DataLoader(dataset=train_data, batch_size= sample_size,shuffle=True )train = iter(train_loader).next()[0] # 一個batch的數據mean = np.mean(train.numpy(), axis=(0,2,3))std = np.std(train.numpy(), axis=(0,2,3))return mean, stddef dLoader(root, batch_size_train=64, batch_size_test=64,is_shuffle=True,kwargs ={} ):#mean, std = calMeanStd(root )#print("mean, std:",mean, std) #這個先運行一次,取固定值mean,std = [[0.16965699,0.17889683,0.1844586 ],[0.11298971,0.11618855,0.11640119]]trans_train = transforms.Compose([#transforms.ToPILImage(),transforms.RandomHorizontalFlip(),transforms.RandomVerticalFlip() ,transforms.RandomRotation(degrees=30),transforms.ToTensor(), #對數據轉換為tensor格式,并且換通道為(c,h,w),將數據除以255,規劃至(0,1)transforms.Normalize(mean, std) #均值和標準差,歸一化計算,(x-mean)/std]) # 對數據做預處理trans_test = transforms.Compose([transforms.ToTensor(), #對數據轉換為tensor格式,并且換通道為(c,h,w),將數據除以255,規劃至(0,1)transforms.Normalize(mean, std) #均值和標準差,歸一化計算,(x-mean)/std]) # 對數據做預處理train_data = MyDataset(txt=os.path.join(root ,'train.txt'), transform=trans_train)test_data = MyDataset(txt=os.path.join(root ,'val.txt') , transform=trans_test)train_loader = DataLoader(dataset=train_data, batch_size=batch_size_train, shuffle=is_shuffle,**kwargs)test_loader = DataLoader(dataset=test_data, batch_size=batch_size_test,**kwargs) #kwargs是為了設置pin_memoryreturn train_loader, test_loaderif __name__=="__main__":print(calMeanStd( 'dataset/'))? datasetLoader.py 讀取圖片并縮放至(48,48),數據增強為隨機上下左右翻轉和旋轉,運行該文件可獲得 均值和方差,可將均值和方差填寫至
四、編寫model文件
? 開始構建網絡,生成器讀出圖片為(48,48),以下構建簡單分類網絡 4 卷積層 4 池化層 2全連接層
import argparse,os import torch import torch.nn as nn import torch.nn.functional as Fclass Net(nn.Module):def __init__(self):super(Net, self).__init__()self.layer1 = nn.Sequential(nn.Conv2d(3, 6, 5, padding=2), nn.MaxPool2d(kernel_size=2, stride=2))self.layer2 = nn.Sequential(nn.Conv2d(6, 12, kernel_size=5,padding=2),nn.MaxPool2d(kernel_size=2, stride=2)) self.layer3 = nn.Sequential(nn.Conv2d(12, 24, kernel_size=5,padding=2),nn.MaxPool2d(kernel_size=2, stride=2))self.layer4 = nn.Sequential(nn.Conv2d(24, 36, kernel_size=5,padding=2),nn.MaxPool2d(kernel_size=2, stride=2))self.fc = nn.Sequential(nn.Linear(36 * 3 * 3, 128),nn.Dropout(0.2),nn.ReLU(inplace=True),nn.Linear(128, 64),nn.Dropout(0.2),nn.ReLU(inplace=True),nn.Linear(64, 7) #注意這個最后的類別)def forward(self, x):x = self.layer1(x)x = self.layer2(x)x = self.layer3(x)x = self.layer4(x)x = x.view(x.size(0), -1)x = self.fc(x)return x? 五、編寫訓練文件
? ? ?接下來編寫訓練文件 trainval.py
#trainVal.py #coding=utf-8import argparse,os import torch import torch.nn as nn import torch.nn.functional as F from torch import optim from datasetLoader import dLoaderfrom model import Netdef train(args, net, device, train_loader, optimizer, epoch ,scheduler):running_loss = 0.0 # 初始化losscorrect = 0.batch_num = 0#重點注意,訓練時如果用到Batch Normalization 和 Dropout,就要在訓練時使用net.train(),測試時用net.eval(),否則則不用net.train() criterion = nn.CrossEntropyLoss() #nn的函數是要先創建,后初始化#開始數據機加載batchfor batch_idx, (inputs, labels) in enumerate(train_loader, 0):# 輸入數據上傳inputs = inputs.to(device)labels = labels.to(device)# 梯度清零optimizer.zero_grad()# forward + backwardoutputs = net(inputs) loss = criterion( outputs, labels )loss.backward()running_loss += loss.item()batch_num +=1pred = outputs.argmax(dim=1, keepdim=True) # get the index of the max log-probabilitycorrect += pred.eq(labels.view_as(pred)).sum().item()# 更新參數optimizer.step()if batch_idx % args.log_interval == 0: #每args.log_interval個批次輸出一次lossprint('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, (batch_idx+1)*len(inputs), len(train_loader.dataset),100*(batch_idx+1)*len(inputs)/ len(train_loader.dataset), running_loss/( batch_idx+1) ) )scheduler.step() def test(args, net, device, test_loader,train_loader, epoch ):net.eval() #用到Batch Normalization 和 Dropout 就要加上test_loss = 0correct = 0criterion = nn.CrossEntropyLoss() # nn的函數是要先創建,后初始化with torch.no_grad():for data, label in test_loader: #不會做反向求導data, label = data.to(device), label.to(device)output = net(data) test_loss += F.cross_entropy(output, label, reduction='sum').item() # sum up batch losspred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probabilitycorrect += pred.eq(label.view_as(pred)).sum().item()test_loss /= len(test_loader.dataset)print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss, correct, len(test_loader.dataset),100. * correct / len(test_loader.dataset)))def main():parser = argparse.ArgumentParser(description='PyTorch MNIST Example')parser.add_argument('--batch-size', type=int, default=64, metavar='N',help='input batch size for training (default: 64)')parser.add_argument('--test-batch-size', type=int, default=64, metavar='N',help='input batch size for testing (default: 1000)')parser.add_argument('--epochs', type=int, default=100, metavar='N',help='number of epochs to train (default: 10)')parser.add_argument('--lr', type=float, default=0.01, metavar='LR',help='learning rate (default: 0.01)')parser.add_argument('--momentum', type=float, default=0.9, metavar='M',help='SGD momentum (default: 0.5)')parser.add_argument('--no-cuda', action='store_true', default=False,help='disables CUDA training')parser.add_argument('--seed', type=int, default=1, metavar='S',help='random seed (default: 1)')parser.add_argument('--log-interval', type=int, default=8, metavar='N',help='how many batches to wait before logging training status')parser.add_argument('--save-model', action='store_true', default=True,help='For Saving the current Model')#獲取參數args = parser.parse_args()# 先來判斷是否要用cuda,默認是有的話就用use_cuda = not args.no_cuda and torch.cuda.is_available()torch.manual_seed(args.seed) #閾值隨機設置device = torch.device("cuda" if use_cuda else "cpu")#準備數據加載器kwargs = {'num_workers': 2, 'pin_memory': True} if use_cuda else {}data_root = './dataset/'train_loader,test_loader = dLoader(data_root ,args.batch_size,args.test_batch_size, kwargs=kwargs) #使用自己封裝的數據加載器#初始化net,訓練和驗證都需要netnet = Net().to(device)print("Create Net:",net)#初始化optimizer,只有train時使用optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum)scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 40 , gamma=0.5)#開始迭代訓練for epoch in range(args.epochs):train(args, net, device, train_loader, optimizer, epoch , scheduler )test(args, net, device, test_loader,train_loader, epoch ) #不需要optimizerif (args.save_model):torch.save(net.state_dict(), "./cnn.pth") # 不使用state_dict(),則將模型結構和權重一起保存if __name__ =="__main__":main()? 運行結束后可得到權值文件cnn.pth
六、對圖片做預測
? 編寫 predictTest.py 對文件做預測,一種是對單張圖片預測,一種為對一個batch做預測
import argparse import torch,cv2 import torch.nn as nn import torch.nn.functional as F from torch import optim from PIL import Image import numpy as np from torchvision import transforms from torch.autograd import Variablefrom datasetLoader import dLoaderfrom model import Netdef predictPic(net,trans,img,prob_thr = 0.98):img = img.unsqueeze(0) # 圖片擴展多一維,因為輸入到保存的模型中是4維的[batch_size,通道,長,寬],而普通圖片只有三維,[通道,長,寬]# 擴展后,為[1,1,48,48]output = net(img)prob = F.softmax(output, dim=1)prob = Variable(prob)prob = prob.cpu().numpy() # 用GPU的數據訓練的模型保存的參數都是gpu形式的,要顯示則先要轉回cpu,再轉回numpy模式#print(prob) # prob是7個分類的概率pred = np.argmax(prob) # 選出概率最大的一個pred_idx = pred.item()return pred_idx #一次只能預測一張圖片 def predict_1( ):device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')net = Net()net.load_state_dict(torch.load('./cnn.pth'))net = net.to(device)net.eval()mean,std = [[0.16965699,0.17889683,0.1844586 ],[0.11298971,0.11618855,0.11640119]]trans = transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean, std)])total = 0right = 0with open("dataset/test.txt", "r") as ftxt:for line in ftxt.readlines():line = line.rstrip("\n").strip(" ")img_path = line[:-2]img_idx = int(line[-1])cv_img = cv2.imread(img_path) cv_img = cv2.resize(cv_img, (48, 48)) img = Image.fromarray(cv2.cvtColor(cv_img, cv2.COLOR_BGRA2RGBA)).convert('RGB') #cv2是BGR,這里得轉換 img = trans(img)img = img.to(device) res_idx = predictPic(net,trans,img, prob_thr = 0.70)if res_idx==img_idx:right+=1total+=1print("total:%d right:%d Acc:%f"%(total,right,right/total) ) def predictBatch(net,trans,img_batch,prob_thr = 0.98): output = net(img_batch) prob = F.softmax(output, dim=1)prob = Variable(prob)prob = prob.cpu().numpy()prob_idx = np.argmax(prob, axis=1)#print( prob_idx )return prob_idx#一次只能預測一個batch def predict_2(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')net = Net()net.load_state_dict(torch.load('./cnn.pth'))net = net.to(device)net.eval()mean,std = [[0.16965699,0.17889683,0.1844586 ],[0.11298971,0.11618855,0.11640119]]trans = transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean, std)]) total = 0right = 0images = np.array([])real_idx = []with open("dataset/test.txt", "r") as ftxt:for line in ftxt.readlines():line = line.rstrip("\n").strip(" ")img_path = line[:-2]real_idx.append(int(line[-1]))cv_img = cv2.imread(img_path) H,W = 48,48cv_img = cv2.resize(cv_img, (W, H)) img = Image.fromarray(cv2.cvtColor(cv_img, cv2.COLOR_BGRA2RGBA)).convert('RGB') #cv2是BGR,這里得轉換 img = trans(img)images = np.append(images, img.numpy()) img_batch = images.reshape(-1, 3, H, W) img_batch = torch.from_numpy(img_batch).float()img_batch=img_batch.to(device) prob_idx = predictBatch(net,trans,img_batch,prob_thr = 0.5) real_idx = np.array(real_idx)total = real_idx.shape[0]right = np.sum( real_idx == prob_idx) print("total:%d right:%d Acc:%f"%( total ,right,right/total) )if __name__ == "__main__":predict_1()七、使用tensorboardX ?的 SummaryWriter?查看Train/Loss、Test/Accu 隨著迭代次數的曲線
ensorboardX ?實際為TensorFlow使用,故先使用pip安裝TensorFlow,再安裝?tensorboardX ?
之后導入包
然后初始化記錄器,并在當前路徑下創建名為log的文件夾
記錄下訓練過程的loss和測試時的acc,以及迭代次數
之后在cmd到當前路徑,輸入命令行
tensorboard --logdir=./log
在瀏覽器輸入127.0.0.1:6006 可看到tensorboardX? 做的圖
?
總結
以上是生活随笔為你收集整理的pytorch(1)--分类图片的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: wdr7300千兆版和百兆版区别_tl-
- 下一篇: 安卓短信转发qq邮箱