卷積神經網絡 - 垃圾分類
代碼和數據集可以在 我的AI學習筆記 - github 中獲取
實驗內容
自今年7月1日起,上海市將正式實施 《上海市生活垃圾管理條例》。垃圾分類,看似是微不足道的“小事”,實則關係到13億多人生活環境的改善,理應大力提倡。
垃圾識別分類數據集中包括 glass、cardboard、metal、paper、plastic、trash,共6個類別。
生活垃圾由於種類繁多,具體分類缺乏統一標準,大多人在實際操作時會“選擇困難”,基於深度學習技術建立準確的分類模型,利用技術手段改善人居環境。
數據集
該數據集包含了 2307 個生活垃圾圖片。數據集的創建者將垃圾分爲了 6 個類別,分別是:
序號 | 中文名 | 英文名 | 數據集大小 |
---|---|---|---|
1 | 玻璃 | glass | 457 |
2 | 紙 | paper | 540 |
3 | 硬紙板 | cardboard | 370 |
4 | 塑料 | plastic | 445 |
5 | 金屬 | metal | 380 |
6 | 一般垃圾 | trash | 115 |
物品都是放在白板上在日光/室內光源下拍攝的,壓縮後的尺寸爲 512 * 384.
實驗要求
-
建立深度學習模型,並儘可能將其調到最佳狀態
-
繪製深度學習模型圖、繪製並分析學習曲線等
-
分析模型並試着調試不同學習率等超參數對模型的結果影響
本地環境:
GPU:
- NVIDIA Quadro P600
- 驅動程序版本:442.92
- CUDA:10.1(已添加到系統環境變量)
torch 1.5.0+cu101
torchvision 0.6.0+cu101
安裝使用均沒有報錯,並可以使用GPU進行訓練。
記錄
DNN;開始給的示例代碼模型, 一個簡單的全連接神經網絡
inputs = Input(shape=input_shape)
# 將輸入展平
dnn = Flatten()(inputs)
# Dense 全連接層
dnn = Dense(6)(dnn)
dnn = BatchNormalization(axis=-1)(dnn)
dnn = Activation('sigmoid')(dnn)
dnn = Dropout(0.25)(dnn)
dnn = Dense(12)(dnn)
dnn = BatchNormalization(axis=-1)(dnn)
dnn = Activation('relu')(dnn)
dnn = Dropout(0.5)(dnn)
dnn = Dense(6)(dnn)
dnn = BatchNormalization(axis=-1)(dnn)
dnn = Activation('softmax')(dnn)
outputs = dnn
# 生成一個函數型模型
model = Model(inputs=inputs, outputs=outputs)
訓練要好一會兒,模型正確率大概在0.3左右;
嘗試了一個簡單的卷積神經網絡模型
隨便找的
model = Sequential()
model.add(Conv2D(32, (5, 5), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(32, (5, 5), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(12, activation='relu'))
model.add(Dense(6))
model.add(BatchNormalization(axis=-1))
model.add(Activation('softmax'))
訓練正確率大約在0.5 - 0.6,但發生了過擬合,在比賽數據集中只有30%的識別準確率。訓練參數和示例一樣(沒改)
pytorch調參學習
由於是初次接觸pytorch和除了samples以外實際上手調參,還是比較茫然的…
嘗試過了從頭訓練神經網絡和遷移學習兩種方式;
resnet
def getRsn():
model = models.resnet18(pretrained=True)
num_fc_in = model.fc.in_features
model.fc = nn.Linear(num_fc_in, 6)
return model
MO上似乎加載resnet的話內存有可能會出現超限的問題(是不是我哪裏操作不正確
mobilenet_v2
於是就嘗試了一下mobilenet_v2,但可能由於過擬合的原因,實際在測試數據上表現並不好;
可以再嘗試嘗試;
def getMbnet():
model = models.mobilenet_v2(pretrained=True)
model.classifier = nn.Sequential(
nn.Linear(in_features=1280,out_features=64),
nn.Dropout(p=0.5,inplace=False),
nn.Linear(in_features=64,out_features=6,bias=True),
)
return model
自己寫的一個簡單的CNN
大致就是普通CNN的結構,先多層卷積層池化層,然後用全連接層解決分類問題;
class MyCNN(nn.Module):
"""
網絡模型
"""
def __init__(self, image_size, num_classes):
super(MyCNN, self).__init__()
# conv1: Conv2d -> BN -> ReLU -> MaxPool
self.conv1 = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1),
nn.Conv2d(in_channels=16, out_channels=16, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=1),
)
# conv2: Conv2d -> BN -> ReLU -> MaxPool
self.conv2 = nn.Sequential(
nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),
nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.conv3 = nn.Sequential(
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.conv4 = nn.Sequential(
nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.conv5 = nn.Sequential(
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=1),
)
# fully connected layer
self.dp1 = nn.Dropout(0.20)
self.fc1 = nn.Linear(4608, 256)
self.dp2 = nn.Dropout(0.50)
self.fc2 = nn.Linear(256, num_classes)
def forward(self, x):
"""
input: N * 3 * image_size * image_size
output: N * num_classes
"""
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
# view(x.size(0), -1): change tensor size from (N ,H , W) to (N, H*W)
x = x.view(x.size(0), -1)
x = self.dp1(x)
x = self.fc1(x)
x = self.dp2(x)
output = self.fc2(x)
return output
訓練超參數:
batch_size = 20
num_epochs = 10
lr = 0.00007
num_classes = 6
image_size = 64
同時也對圖片進行了一系列變換,如旋轉、翻轉、灰度化以增強穩定度;
輸入的圖片大小爲64*64
最後提交的完整代碼:
模型訓練代碼
import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision
import time
import os
from torchvision import models
import matplotlib.pyplot as plt
class MyCNN(nn.Module):
"""
網絡模型
"""
def __init__(self, image_size, num_classes):
super(MyCNN, self).__init__()
# conv1: Conv2d -> BN -> ReLU -> MaxPool
self.conv1 = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1),
nn.Conv2d(in_channels=16, out_channels=16, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=1),
)
# conv2: Conv2d -> BN -> ReLU -> MaxPool
self.conv2 = nn.Sequential(
nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),
nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.conv3 = nn.Sequential(
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.conv4 = nn.Sequential(
nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.conv5 = nn.Sequential(
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=1),
)
# fully connected layer
self.dp1 = nn.Dropout(0.20)
self.fc1 = nn.Linear(4608, 256)
self.dp2 = nn.Dropout(0.50)
self.fc2 = nn.Linear(256, num_classes)
def forward(self, x):
"""
input: N * 3 * image_size * image_size
output: N * num_classes
"""
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
# view(x.size(0), -1): change tensor size from (N ,H , W) to (N, H*W)
x = x.view(x.size(0), -1)
x = self.dp1(x)
x = self.fc1(x)
x = self.dp2(x)
output = self.fc2(x)
return output
def getRsn():
model = models.resnet18(pretrained=True)
num_fc_in = model.fc.in_features
model.fc = nn.Linear(num_fc_in, 6)
return model
def getMbnet():
model = models.mobilenet_v2(pretrained=True)
model.classifier = nn.Sequential(
nn.Linear(in_features=1280,out_features=64),
nn.Dropout(p=0.5,inplace=False),
nn.Linear(in_features=64,out_features=6,bias=True),
)
return model
def train(model, train_loader, loss_func, optimizer, device):
"""
訓練模型
train model using loss_fn and optimizer in an epoch.
model: CNN networks
train_loader: a Dataloader object with training data
loss_func: loss function
device: train on cpu or gpu device
"""
total_loss = 0
# train the model using minibatch
for i, (images, targets) in enumerate(train_loader):
images = images.to(device)
targets = targets.to(device)
# forward
outputs = model(images)
_,preds = torch.max(outputs.data,1)
loss = loss_func(outputs, targets)
# backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
if (i + 1) % 100 == 0:
print ("Step [{}/{}] Train Loss: {:.4f} Train acc".format(i+1, len(train_loader), loss.item()))
save_model(model, save_path="results/cnn.pth")
return total_loss / len(train_loader)
def evaluate(model, val_loader, device, name):
"""
評估模型
model: CNN networks
val_loader: a Dataloader object with validation data
device: evaluate on cpu or gpu device
return classification accuracy of the model on val dataset
"""
# evaluate the model
model.eval()
# context-manager that disabled gradient computation
with torch.no_grad():
correct = 0
total = 0
for i, (images, targets) in enumerate(val_loader):
# device: cpu or gpu
images = images.to(device)
targets = targets.to(device)
outputs = model(images)
# return the maximum value of each row of the input tensor in the
# given dimension dim, the second return vale is the index location
# of each maxium value found(argmax)
_, predicted = torch.max(outputs.data, dim=1)
correct += (predicted == targets).sum().item()
total += targets.size(0)
accuracy = correct / total
print('Accuracy on '+name+' Set: {:.4f} %'.format(100 * accuracy))
return accuracy
def save_model(model, save_path="results/cnn.pth"):
'''保存模型'''
# save model
torch.save(model.state_dict(), save_path)
def show_curve(ys, title):
"""
plot curlve for Loss and Accuacy
Args:
ys: loss or acc list
title: loss or accuracy
"""
x = np.array(range(len(ys)))
y = np.array(ys)
plt.plot(x, y, c='b')
plt.axis()
plt.title('{} curve'.format(title))
plt.xlabel('epoch')
plt.ylabel('{}'.format(title))
plt.show()
def fit(model, num_epochs, optimizer, device):
"""
train and evaluate an classifier num_epochs times.
We use optimizer and cross entropy loss to train the model.
Args:
model: CNN network
num_epochs: the number of training epochs
optimizer: optimize the loss function
"""
# loss and optimizer
loss_func = nn.CrossEntropyLoss()
model.to(device)
loss_func.to(device)
# log train loss and test accuracy
losses = []
accs = []
accst = []
for epoch in range(num_epochs):
print('Epoch {}/{}:'.format(epoch + 1, num_epochs))
# train step
loss = train(model, train_loader, loss_func, optimizer, device)
losses.append(loss)
# evaluate step
accuracy = evaluate(model, test_loader, device, 'test')
accuracy1 = evaluate(model, train_loader, device, 'train')
accs.append(accuracy)
accst.append(accuracy1)
# show curve
show_curve(losses, "train loss")
show_curve(accs, "test accuracy")
show_curve(accst, "train accuracy")
# model = models.vgg16_bn(pretrained=True)
# model_ft= models.resnet18(pretrained=True)
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torch
from torchvision import datasets, transforms
from torch.utils import model_zoo
from torch.optim import lr_scheduler
# #hyper parameter
batch_size = 16
num_epochs = 20
lr = 0.00007
num_classes = 6
image_size = 64
path = "datasets/la1ji1fe1nle4ishu4ju4ji22-momodel/dataset-resized"
transform = transforms.Compose([
transforms.Resize((64,64)),
transforms.RandomRotation((30,30)),
transforms.RandomVerticalFlip(0.1),
transforms.RandomGrayscale(0.1),
transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
dataset = datasets.ImageFolder(path, transform=transform)
print("dataset.classes",dataset.classes)
print("dataset.class_to_idx",dataset.class_to_idx)
idx_to_class = dict((v, k) for k, v in dataset.class_to_idx.items())
print("idx_to_class",idx_to_class)
print('len(dataset)', len(dataset))
"""將訓練集劃分爲訓練集和驗證集"""
train_db, val_db = torch.utils.data.random_split(dataset, [2257, 270])
print('train:', len(train_db), 'validation:', len(val_db))
# 訓練集
train_loader = torch.utils.data.DataLoader(
train_db,
batch_size=batch_size,
shuffle=True,
drop_last=False)
test_loader = torch.utils.data.DataLoader(
val_db,
batch_size=batch_size,
shuffle=True)
classes = set(['cardboard', 'glass', 'metal', 'paper', 'plastic', 'trash'])
# declare and define an objet of MyCNN
mycnn = MyCNN(image_size, num_classes)
# mycnn = getRsn()
# mycnn = getMbnet()
print(mycnn)
# device = torch.device('cuda:0')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
optimizer = torch.optim.Adam(mycnn.parameters(), lr=lr)
# start training on cifar10 dataset
fit(mycnn, num_epochs, optimizer, device)
測試代碼:
丟棄了Dropout層;
對圖片不進行變換;
import torch
from torch import nn
import random
import numpy as np
from PIL import Image
from torchvision.transforms import transforms
import torchvision.transforms.functional as TF
import os
import torch.utils.data as Data
import torchvision
from torchvision import models
class MyCNN(nn.Module):
"""
網絡模型
"""
def __init__(self, image_size, num_classes):
super(MyCNN, self).__init__()
# conv1: Conv2d -> BN -> ReLU -> MaxPool
self.conv1 = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1),
nn.Conv2d(in_channels=16, out_channels=16, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=1),
)
# conv2: Conv2d -> BN -> ReLU -> MaxPool
self.conv2 = nn.Sequential(
nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),
nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.conv3 = nn.Sequential(
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.conv4 = nn.Sequential(
nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.conv5 = nn.Sequential(
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=1),
)
# fully connected layer
self.dp1 = nn.Dropout(0.20)
self.fc1 = nn.Linear(4608, 256)
self.dp2 = nn.Dropout(0.50)
self.fc2 = nn.Linear(256, num_classes)
def forward(self, x):
"""
input: N * 3 * image_size * image_size
output: N * num_classes
"""
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
# view(x.size(0), -1): change tensor size from (N ,H , W) to (N, H*W)
x = x.view(x.size(0), -1)
##x = self.dp1(x)
x = self.fc1(x)
##x = self.dp2(x)
output = self.fc2(x)
return output
def getRsn():
model = models.resnet18(pretrained=False)
num_fc_in = model.fc.in_features
model.fc = nn.Linear(num_fc_in, 6)
return model
def getMbnet():
model = models.mobilenet_v2(pretrained=True)
print(model)
model.classifier = nn.Sequential(
nn.Linear(in_features=1280,out_features=64),
nn.Dropout(p=0.5, inplace=False),
nn.Linear(in_features=64,out_features=6,bias=True),
)
return model
def load_model(model_path, device):
# net = getRsn()
net = MyCNN(64, 6)
## net = getMbnet()
print('loading the model from %s' % model_path)
state_dict = torch.load(model_path, map_location=str(device))
if hasattr(state_dict, '_metadata'):
del state_dict._metadata
net.load_state_dict(state_dict)
return net
# 加載模型,加載請注意 model_path 是相對路徑, 與當前文件同級。
# 如果你的模型是在 results 文件夾下的 dnn.h5 模型,則 model_path = 'results/dnn.h5'
model_path = 'results/cnn.pth'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = load_model(model_path, device).to(device)
model.eval()
def predict(img):
"""
:param img: PIL.Image 對象
:return: string, 模型識別圖片的類別,
共 'cardboard','glass','metal','paper','plastic','trash' 6 個類別
"""
transform = transforms.Compose([
##transforms.RandomCrop(size=(384,512), padding=10),
transforms.Resize((64,64)),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
img = transform(img)
classes=['cardboard','glass','metal','paper','plastic','trash']
img = img.to(device).unsqueeze(0)
pred_cate = model(img)
preds = pred_cate.argmax(dim=1)
# -------------------------------------------------------------------------
y_predict = classes[preds]
# 返回圖片的類別
return y_predict