每一段代碼後都有跑出來的結果
參考網址 https://pytorch.org/tutorials/beginner/blitz/neural_networks_tutorial.html
1. Neural Networks
1.1 Define the network
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__() # 類繼承,py2.7的寫法,在3中可以寫爲super().__init__()
# 1 input image channel, 6 output channels, 3x3 square convolution
# kernel
self.conv1 = nn.Conv2d(1, 6, 3) # 定義第一個卷積核的shape
self.conv2 = nn.Conv2d(6, 16, 3) # 第二個卷積核的shape
# an affine operation: y = Wx + b
self.fc1 = nn.Linear(16 * 6 * 6, 120) # 6*6 from image dimension
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10) # 定義了三個全連接層
def forward(self, x):
# Max pooling over a (2, 2) window
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) # 先對x用conv1進行卷積,再ReLU結果最後用2*2窗口池化
# If the size is a square you can only specify a single number
x = F.max_pool2d(F.relu(self.conv2(x)), 2) # 與第一步相同,窗口如果是正方形可以只寫一邊的邊長
x = x.view(-1, self.num_flat_features(x)) # view相當於Numpy中的reshape,將x reshape爲一個一維的向量來輸入全連接層
x = F.relu(self.fc1(x)) # 全連接層
x = F.relu(self.fc2(x)) # 全連接層
x = self.fc3(x) # 全連接層
return x
def num_flat_features(self, x): # 扁平化向量x
size = x.size()[1:] # all dimensions except the batch dimension
num_features = 1
for s in size:
num_features *= s
return num_features # 返回x除第一個維度以外所有維度的乘積,用於壓扁x
net = Net()
print(net)
params = list(net.parameters()) # 網絡參數列表,一共有十組參數
#print(params)
print(len(params)) # 參數列表長度
print(params[0].size()) # conv1's .weight
input = torch.randn(1, 1, 32, 32) # 生成一個隨機張量,1,1,32,32代表num爲1,channel爲1,高和寬均爲32
out = net(input) # 在網絡中進行前向傳播
print(out)
net.zero_grad() # 將所有參數的梯度緩衝區都置0
out.backward(torch.randn(1, 10)) # 給out一個梯度輸入,讓梯度進行反向傳播
這裏沒有結果輸出,但有一個NOTE
torch.nn only supports mini-batches. The entire torch.nn package only supports inputs that are a mini-batch of samples, and not a single sample.
For example, nn.Conv2d will take in a 4D Tensor of nSamples x nChannels x Height x Width.
If you have a single sample, just use input.unsqueeze(0) to add a fake batch dimension.
1.2 Loss Function
output = net(input) # 前向傳播產生一個結果
target = torch.randn(10) # a dummy target, for example
target = target.view(1, -1) # make it the same shape as output
criterion = nn.MSELoss() # 誤差爲MSELoss,也就是均方誤差
loss = criterion(output, target) # 求取輸出和目標結果之間的均方誤差
print(loss)
print(loss.grad_fn) # MSELoss
print(loss.grad_fn.next_functions[0][0]) # Linear,MSELoss的上一級
print(loss.grad_fn.next_functions[0][0].next_functions[0][0]) # ReLU,Linear的上一級
1.3 Backprop
要反向傳播誤差,我們要做的就是調用loss.backward()。不過,需要清除現存的梯度值,否則梯度將累積到現有的梯度中。
net.zero_grad() # zeroes the gradient buffers of all parameters
print('conv1.bias.grad before backward')
print(net.conv1.bias.grad) # 梯度置0後的偏置梯度
loss.backward()
print('conv1.bias.grad after backward')
print(net.conv1.bias.grad) # 求取梯度後的偏置梯度值
1.4 Update the weights
在SGD中使用的最簡單的更新規則爲:
weight = weight - learning_rate * gradient
用python代碼實現如下
learning_rate = 0.01
for f in net.parameters():
f.data.sub_(f.grad.data * learning_rate) # sub_應該是對f.data做減法並且更新到f.data中
import torch.optim as optim # 導入優化器,我們可以採用更多的梯度更新規則,例如Nesterov-SGD,Adam,RMSProp等等
# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01) # 創建一個優化器,採用SGD方法,學習率爲0.01
# in your training loop:
optimizer.zero_grad() # zero the gradient buffers
output = net(input) # 前向傳播
loss = criterion(output, target) # 求出誤差
loss.backward() # 反向傳播
optimizer.step() # Does the update 使用定義的優化器規則進行參數的更新
2. Trainning a classifier
2.1 Loading and normalizing CIFAR10
這部分要CIFAR10數據集,懶得弄了,就不跑了,只寫一下程序註釋。
### torchvision是一個圖像操作的庫
### transforms是對圖像做預處理的包
import torch
import torchvision
import torchvision.transforms as transforms
# 預處理
transform = transforms.Compose(
[transforms.ToTensor(), # PIL圖像轉化爲torch.Tensor
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) # 對張量進行Normalization,此處因爲
#ToTensor操作已把數據處理成了[0,1],那麼image-0.5/0.5的範圍就是[-1,1]
# 訓練集,download=True表示如果本地沒有數據集就去INTERNET上下載下來,並對訓練集做上述transform的預處理
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
download=True, transform=transform)
# 加載訓練集,batch_size爲4,並在每一個epoch中打亂數據,num_workers使用多進程加載的進程數,0代表不使用
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
shuffle=True, num_workers=2)
# 測試集,與訓練集不一樣的地方就是參數train=False
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
download=True, transform=transform)
# 加載測試集,測試集數據不需要打亂
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
shuffle=False, num_workers=2)
# 設置類別元組
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
加載一些數據看看效果:
import matplotlib.pyplot as plt
import numpy as np
# functions to show an image
def imshow(img):
img = img / 2 + 0.5 # unnormalize 還原圖像
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0))) # 改變通道順序
plt.show()
# get some random training images
dataiter = iter(trainloader) # 每次迭代取一個Batch,這也就是爲何顯示四張圖片
images, labels = dataiter.next()
# show images
imshow(torchvision.utils.make_grid(images)) # make_grid將多幅圖像合併成網格
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))
2.2 Define a Convolutional Neural Network
import torch.nn as nn
import torch.nn.functional as F
# 定義一個網絡,繼承於nn.Module,網絡結構爲
# Conv1->ReLU->pool->Conv2->ReLU->pool->fc1->fc2->f3->scores
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
# 實例化一個網絡net
net = Net()
2.3 Define a Loss function and optimizer
import torch.optim as optim
criterion = nn.CrossEntropyLoss() # 交叉熵損失函數
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) # 優化器選擇帶動量的SGD,學習率0.001
2.4 Train the network
for epoch in range(2): # loop over the dataset multiple times 跑兩個epoch
running_loss = 0.0
for i, data in enumerate(trainloader, 0): # 從下標爲0的地方開始遍歷
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad() # 清空梯度緩衝區
# forward + backward + optimize
outputs = net(inputs) # 前向傳播得到scores
loss = criterion(outputs, labels) # 求出scores的交叉熵損失
loss.backward() # 進行反向傳播求導
optimizer.step() # 用上面定義的帶有動量的SGD優化器進行參數更新
# print statistics
running_loss += loss.item() # 累積兩千次的Loss
if i % 2000 == 1999: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.0 # loss置零,求下一個2000次
print('Finished Training')
保存網絡:
PATH = './cifar_net.pth'
torch.save(net.state_dict(), PATH)
2.5 Test the network on the test data
# 這裏只是隨便從測試集中取幾張圖片看一下
dataiter = iter(testloader)
images, labels = dataiter.next()
# print images
imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))
導入訓練好的網絡:
net = Net()
net.load_state_dict(torch.load(PATH))
前向傳播:
outputs = net(images)
根據前向傳播的結果預測類別:
_, predicted = torch.max(outputs, 1) # 選擇計算出的outputs中的最大值,第二個參數爲dim,1代表行方向的最大值
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]]
for j in range(4)))
看看在整個測試集上的效果:
correct = 0
total = 0
with torch.no_grad(): # 這次計算不用求梯度,省去一些開銷
for data in testloader:
images, labels = data
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0) # 總數
correct += (predicted == labels).sum().item() # 分類正確的數目
print('Accuracy of the network on the 10000 test images: %d %%' % (
100 * correct / total))
官方訓練的結果,好像還行,有一半的正確率:
2.6 Training on GPU
想要在GPU加速訓練,可以用如下代碼:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)
# 記得把數據集和標籤都放到GPU上去
inputs, labels = data[0].to(device), data[1].to(device)