過年期間由於疫情影響,划水多天後良心發現,遂開始學習。
模型訓練步驟
A typical training procedure for a neural network is as follows:
- Define the neural network that has some learnable parameters (or
weights) - Iterate over a dataset of inputs
- Process input through the network
- Compute the loss (how far is the output from being correct)
- Propagate gradients back into the network’s parameters
- Update the weights of the network, typically using a simple update rule:
weight = weight - learning_rate * gradient!
詳細代碼(具體知識見註釋)
pytorch_demo_model.py
import torch.nn as nn
import torch.nn.functional as F
"""
定義一個類,這個類繼承於nn.Module,實現兩個方法:初始化函數和正向傳播
實例化這個類之後,將參數傳入這個類中,進行正向傳播
"""
"""
If running on Windows and you get a BrokenPipeError, try setting
the num_worker of torch.utils.data.DataLoader() to 0.
"""
class LeNet(nn.Module):
def __init__(self):
# super解決在多重繼承中調用父類可能出現的問題
super(LeNet, self).__init__()
self.conv1 = nn.Conv2d(3, 16, 5)
self.pool1 = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(16, 32, 5)
self.pool2 = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(32*5*5, 120) # 全連接層輸入的是一維向量,第一層節點個數120是根據Pytorch官網demo設定
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10) # 10因爲使用的是cifar10,分爲10類
def forward(self, x):
x = F.relu(self.conv1(x)) # input (3,32,32) output(16, 32-5+1=28, 32-5+1)
x = self.pool1(x) # output(16, 28/2=14, 28/2)
x = F.relu((self.conv2(x))) # output(32, 14-5+1=10, 14-5+1=10)
x = self.pool2(x) # output(32, 10/2=5, 10/2=5)
x = x.view(-1, 32*5*5) # output(32*5*5)
x = F.relu(self.fc1(x)) # output(120)
x = F.relu(self.fc2(x)) # output(84)
x = F.relu(self.fc3(x)) # output(10)
return x
pytorch_demo_train.py
import torch
import torchvision
import torch.nn as nn
from pytorch_demo_model import LeNet
import matplotlib as plt
import torchvision.transforms as transforms
import numpy as np
batch_size = 36
learning_rate = 1e-3
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))] # 標準化 output = (input- 0.5)/0.5
)
# 50000張訓練圖片
trainset = torchvision.datasets.CIFAR10(root="./data", train=True, download=False, transform=transform) # 當前目錄的data文件夾下
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=0) # 在windows下,num_workers只能設置爲0
# 10000張測試圖片
testset = torchvision.datasets.CIFAR10(root="./data", train=True, download=False, transform=transform) # 當前目錄的data文件夾下
testloader = torch.utils.data.DataLoader(testset, batch_size=10000, shuffle=True, num_workers=0) # 在windows下,num_workers只能設置爲0
test_data_iter = iter(testloader) # 將testloader轉換爲迭代器
test_img, test_label = test_data_iter.next() # 通過next()獲得一批數據
classes = ("plane", "car", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck")
def imshow(img):
img = img / 2 + 0.5 # unnormalize反標準化過程input = output*0.5 + 0.5
npimg = img.numpy() # 轉換爲numpy
plt.imshow(np.transpose(npimg, (1, 2, 0))) # Pytorch內Tensor順序[batch, channel, height, width],由於輸入沒有batch,故channel對於0,height對應1,width對應2
# 此處要還原爲載入圖像時基礎的shape,所以應把順序變爲[height, width, channel], 所以需要np.transpose(npimg, (1, 2, 0))
plt.show()
# 打印幾張圖片看看
# print labels
# print(''.join('%5s' % classes[test_label[j]] for j in range(4))) 此處應將testloader內的batch_size改爲4即可,沒必要顯示10000張
# show images
# imshow(torchvision.utils.make_grid(test_img))
# 實例化
Mynet = LeNet()
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(Mynet.parameters(), lr=learning_rate)
for epoch in range(10): # loop over the dataset multiple times
running_loss = 0.
for step, data in enumerate(trainloader, start=0): # enumerate返回每一批數據和對應的index
# get the inputs: data is a list of [inputs, labels]
inputs, labels = data
# zero the parameter
optimizer.zero_grad()
# forward + backward + optimize
outputs = Mynet(inputs)
loss = loss_fn(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if step % 500 == 499: # print every 500 mini-batches
with torch.no_grad(): # with是一個上下文管理器
outputs = Mynet(test_img) # [batch, 10]
y_pred = torch.max(outputs, dim=1)[1] # 找到最大值,即最有可能的類別,第0個維度對應batch,所以dim=1,第一個維度對應類別,[1]代表只需要index即可,即位置
accuracy = (y_pred == test_label).sum().item() / test_label.size(0) # 整個預測是在tensor變量中計算的,所以要用.item()轉爲數值, test_label.size(0)爲測試樣本的數目
print('[%d, %5d] train_loss: %.3f test_accuracy: %.3f' %
(epoch + 1, step + 1, running_loss / 500, accuracy)) # 500次的平均train_loss
running_loss = 0. # 清零,進行下一個500次的計算
print("Training finished")
save_path = './Lenet.pth'
torch.save(Mynet.state_dict(), save_path)
訓練結果
在網上下載一張飛機的圖片,保存在同一路徑下
pytorch_demo_test.py
import torch
import torchvision.transforms as transforms
from PIL import Image
from pytorch_demo_model import LeNet
transform = transforms.Compose(
[transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))] # 標準化 output = (input- 0.5)/0.5
)
classes = ("plane", "car", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck")
net = LeNet()
net.load_state_dict(torch.load('Lenet.pth')) # 載入權重文件
im = Image.open('plane.jpg')
im = transform(im) # [C, H, W] 轉成Pytorch的Tensor格式
im = torch.unsqueeze(im, dim=0) # [N, C, H, W] 對數據增加一個新維度
with torch.no_grad():
outputs = net(im)
predict = torch.max(outputs, dim=1)[1].data.numpy()
print(classes[int(predict)])