在上一篇 Pytorch實例----CAFAR10數據集分類(VGG)的識別統計,本篇主要調整Net()類,設計ResNet網絡(+BN),實現對CAFAR10分類數據集的分類任務。
ResNet網絡結構編程實現:
#create residual block
class ResidualBlock(nn.Module):
def __init__(self, inchannel, outchannel, stride=1):
super(ResidualBlock, self).__init__()
#define conv2d -> BN -> ReLU -> BN
self.left = nn.Sequential(
nn.Conv2d(inchannel, outchannel, kernel_size=3, stride=stride, padding=1, bias=False),
nn.BatchNorm2d(outchannel),
nn.ReLU(inplace=True),
nn.Conv2d(outchannel, outchannel, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(outchannel)
)
#define shortcut
self.shortcut = nn.Sequential()
if stride != 1 or inchannel != outchannel:
self.shortcut = nn.Sequential(
nn.Conv2d(inchannel, outchannel, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(outchannel)
)
def forward(self, x):
out = self.left(x)
out += self.shortcut(x)
out = F.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, ResidualBlock, num_classes=10):
super(ResNet, self).__init__()
self.inchannel = 64
self.conv1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(),
)
#use make_layer to append residual block
self.layer1 = self.make_layer(ResidualBlock, 64, 2, stride=1)
self.layer2 = self.make_layer(ResidualBlock, 128, 2, stride=2)
self.layer3 = self.make_layer(ResidualBlock, 256, 2, stride=2)
self.layer4 = self.make_layer(ResidualBlock, 512, 2, stride=2)
self.fc = nn.Linear(512, num_classes)
#define use nn.Sequential to create block or stage
def make_layer(self, block, channels, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1) #strides=[1,1]
layers = []
for stride in strides:
layers.append(block(self.inchannel, channels, stride))
self.inchannel = channels
return nn.Sequential(*layers)
def forward(self, x):
out = self.conv1(x)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.fc(out)
return out
def ResNet18():
return ResNet(ResidualBlock)
#instance for ResNet18
#net = ResNet18()
整體代碼實現:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
import torchvision.transforms as transforms
from torchvision import models
import matplotlib.pyplot as plt
import numpy as np
def imshow(img):
img = img / 2 + 0.5
np_img = img.numpy()
plt.imshow(np.transpose(np_img, (1, 2, 0)))
#define Parameter for data
BATCH_SIZE = 4
EPOCH = 4
#define transform
#hint: Normalize(mean, var) to normalize RGB
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5),(0.5, 0.5, 0.5))])
#define trainloader
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
#define testloader
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
#define class
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
#create residual block
class ResidualBlock(nn.Module):
def __init__(self, inchannel, outchannel, stride=1):
super(ResidualBlock, self).__init__()
#define conv2d -> BN -> ReLU -> BN
self.left = nn.Sequential(
nn.Conv2d(inchannel, outchannel, kernel_size=3, stride=stride, padding=1, bias=False),
nn.BatchNorm2d(outchannel),
nn.ReLU(inplace=True),
nn.Conv2d(outchannel, outchannel, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(outchannel)
)
#define shortcut
self.shortcut = nn.Sequential()
if stride != 1 or inchannel != outchannel:
self.shortcut = nn.Sequential(
nn.Conv2d(inchannel, outchannel, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(outchannel)
)
def forward(self, x):
out = self.left(x)
out += self.shortcut(x)
out = F.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, ResidualBlock, num_classes=10):
super(ResNet, self).__init__()
self.inchannel = 64
self.conv1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(),
)
#use make_layer to append residual block
self.layer1 = self.make_layer(ResidualBlock, 64, 2, stride=1)
self.layer2 = self.make_layer(ResidualBlock, 128, 2, stride=2)
self.layer3 = self.make_layer(ResidualBlock, 256, 2, stride=2)
self.layer4 = self.make_layer(ResidualBlock, 512, 2, stride=2)
self.fc = nn.Linear(512, num_classes)
#define use nn.Sequential to create block or stage
def make_layer(self, block, channels, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1) #strides=[1,1]
layers = []
for stride in strides:
layers.append(block(self.inchannel, channels, stride))
self.inchannel = channels
return nn.Sequential(*layers)
def forward(self, x):
out = self.conv1(x)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.fc(out)
return out
def ResNet18():
return ResNet(ResidualBlock)
net = ResNet18()
if torch.cuda.is_available():
net.cuda()
print(net)
#define loss
cost = nn.CrossEntropyLoss()
#define optimizer
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
print('start')
#iteration for training
#setting for epoch
for epoch in range(EPOCH):
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
inputs, labels = data
inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
optimizer.zero_grad()
outputs = net(inputs)
loss = cost(outputs, labels)
loss.backward()
optimizer.step()
#print loss result
running_loss += loss.item()
if i % 2000 == 1999:
print('[%d, %5d] loss: %.3f'%(epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.001
print('done')
#get random image and label
dataiter = iter(testloader)
images, labels = dataiter.next()
#imshow(torchvision.utils.make_grid(images))
print('groundTruth: ', ''.join('%6s' %classes[labels[j]] for j in range(4)))
#get the predict result
outputs = net(Variable(images.cuda()))
_, pred = torch.max(outputs.data, 1)
print('prediction: ', ''.join('%6s' %classes[labels[j]] for j in range(4)))
#test the whole result
correct = 0.0
total = 0
for data in testloader:
images, labels = data
outputs = net(Variable(images.cuda()))
_, pred = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (pred == labels.cuda()).sum()
print('average Accuracy: %d %%' %(100*correct / total))
#list each class prediction
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
for data in testloader:
images, labels = data
outputs = net(Variable(images.cuda()))
_, pred = torch.max(outputs.data, 1)
c = (pred == labels.cuda()).squeeze()
for i in range(4):
label = labels[i]
class_correct[label] += float(c[i])
class_total[label] += 1
print('each class accuracy: \n')
for i in range(10):
print('Accuracy: %6s %2d %%' %(classes[i], 100 * class_correct[i] / class_total[i]))
實驗結果:
【注】:隨着算力的提升,這裏更改了相對較高的training EPOCH, 統計結果如下:
2 | 4 | 8 | |
Loss | 0.748(0.789) | 0.455 | 0.152 |
Acc | 74%(71%) | 79% | 81% |
括號表示epoch爲2時VGG網絡對應的loss和Accuracy,可以看到,隨着EPOCH的提升,Loss仍在下降,Accuracy繼續提升,當epoch爲8時,比VGG提升了10個百分點,表明將殘差信息傳遞給下一級網絡能有效避免過擬合和訓練困難的問題,在目標檢測中,RetinNet及以RetinNet爲backbone的網絡結構同樣採用了該想法,實現了較好的檢測效果。
practice makes perfect !
github source code : https://github.com/GinkgoX/CAFAR10_Classification_Task/blob/master/CAFAR10_ResNet.ipynb