本文爲《深度學習入門 基於Python的理論與實現》的部分讀書筆記,也參考吳恩達深度學習視頻
代碼以及圖片均參考此書
CNN的實現
本節將實現以下結構的簡單CNN
默認在MNIST數據集上工作,輸入數據形狀爲,卷積層使用 的濾波器,濾波器個數爲30,不進行填充,步長爲1,因此卷積層之後數據形狀爲。池化層使用 的濾波器,不進行填充,步長爲2,因此池化層之後數據形狀爲。第一個隱藏層設爲100個神經元,輸出層的神經元個數設爲10.
import sys
file_path = __file__.replace('\\', '/')
dir_path = file_path[: file_path.rfind('/')] # 當前文件夾的路徑
pardir_path = dir_path[: dir_path.rfind('/')]
sys.path.append(pardir_path) # 添加上上級目錄到python模塊搜索路徑
import numpy as np
from func.gradient import numerical_gradient, gradient_check
from layer.common import *
from collections import OrderedDict
import os
import pickle
class SimpleConvNet:
"""
conv - relu - pool - affine - relu - affine - softmax
默認傳進來的圖片寬高相同
Parameters
----------
input_dim : 輸入大小(MNIST的情況下爲(1, 28, 28))
hidden_size : 隱藏層的神經元數量
output_size : 輸出大小(MNIST的情況下爲10)
"""
def __init__(self, input_dim=(1, 28, 28),
conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
hidden_size=100, output_size=10,
pretrain_flag=True, pkl_file_name=None):
filter_num = conv_param['filter_num']
filter_size = conv_param['filter_size']
filter_pad = conv_param['pad']
filter_stride = conv_param['stride']
input_size = input_dim[1]
conv_output_size = (input_size - filter_size + 2 * filter_pad) // filter_stride + 1
pool_output_size = filter_num * (conv_output_size // 2)**2
self.pkl_file_name = pkl_file_name
if pretrain_flag == 1 and os.path.exists(self.pkl_file_name):
self.load_pretrain_model()
else:
# 初始化權重
self.params = {}
self.params['W1'] = np.sqrt(2.0 / (input_dim[1] * filter_size**2)) * \
np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
self.params['b1'] = np.zeros(filter_num)
self.params['W2'] = np.sqrt(2 / pool_output_size) * \
np.random.randn(pool_output_size, hidden_size)
self.params['b2'] = np.zeros(hidden_size)
self.params['W3'] = np.sqrt(2 / hidden_size) * \
np.random.randn(hidden_size, output_size)
self.params['b3'] = np.zeros(output_size)
# 生成層
self.layers = OrderedDict()
self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'],
conv_param['stride'], conv_param['pad'])
self.layers['Relu1'] = Relu()
self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
self.layers['Relu2'] = Relu()
self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])
self.last_layer = SoftmaxWithLoss()
def load_pretrain_model(self):
with open(self.pkl_file_name, 'rb') as f:
model = pickle.load(f)
for key in ('params', 'layers', 'last_layer'):
exec('self.' + key + '=model.' + key)
print('params loaded!')
def predict(self, x):
for layer in self.layers.values():
x = layer.forward(x)
return x
def loss(self, x, t):
y = self.predict(x)
return self.last_layer.forward(y, t)
def accuracy(self, x, t, batch_size=100):
if t.ndim != 1 :
t = np.argmax(t, axis=1)
acc = 0.0
for i in range(int(x.shape[0] / batch_size)):
tx = x[i*batch_size:(i+1)*batch_size]
tt = t[i*batch_size:(i+1)*batch_size]
y = self.predict(tx)
y = np.argmax(y, axis=1)
acc += np.sum(y == tt)
return acc / x.shape[0]
def numerical_gradient(self, x, t):
loss_w = lambda w: self.loss(x, t)
grads = {}
for idx in range(1, 4):
grads['W' + str(idx)] = numerical_gradient(loss_w, self.params['W' + str(idx)])
grads['b' + str(idx)] = numerical_gradient(loss_w, self.params['b' + str(idx)])
return grads
def gradient(self, x, t):
# forward
self.loss(x, t)
# backward
dout = 1
dout = self.last_layer.backward(dout)
for layer_name in reversed(self.layers):
dout = self.layers[layer_name].backward(dout)
# 設定
grads = {}
grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers['Conv1'].db
grads['W2'], grads['b2'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
grads['W3'], grads['b3'] = self.layers['Affine2'].dW, self.layers['Affine2'].db
return grads
if __name__ == '__main__':
from dataset.mnist import load_mnist
from trainer.trainer import Trainer
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=False, one_hot_label=True, shuffle_data=True)
# setting
train_flag = 1 # 進行訓練還是預測
gradcheck_flag = 0 # 對已訓練的網絡進行梯度檢驗
pkl_file_name = dir_path + '/convnet.pkl'
fig_name = dir_path + '/convnet.png'
net = SimpleConvNet(input_dim=(1, 28, 28),
conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
hidden_size=100, output_size=10,
pretrain_flag=True, pkl_file_name=pkl_file_name)
trainer = Trainer(net, x_train, t_train, x_test, t_test,
epochs=2, mini_batch_size=128,
optimizer='Adam', optimizer_param={},
save_model_flag=True, pkl_file_name=pkl_file_name, plot_flag=True, fig_name=fig_name,
evaluate_sample_num_per_epoch=1000, verbose=True)
if gradcheck_flag == 1:
gradient_check(net, x_train[:2], t_train[:2])
if train_flag:
trainer.train()
else:
acc = net.accuracy(x_train, t_train)
print('accuracy:', acc)
=============== Final Test Accuracy ===============
test acc:0.9811
可以看到,使用簡單的CNN結構,僅訓練了2個epoch之後預測精度就輕易達到了98%,已經超過了之前實現的全連接層網絡結構能達到的最高識別精度。
CNN的可視化
比較一下學習前和學習後的卷積層的濾波器
import sys
file_path = __file__.replace('\\', '/')
dir_path = file_path[: file_path.rfind('/')] # 當前文件夾的路徑
pardir_path = dir_path[: dir_path.rfind('/')]
sys.path.append(pardir_path) # 添加上上級目錄到python模塊搜索路徑
import numpy as np
import matplotlib.pyplot as plt
from layer.convnet import SimpleConvNet
def filter_show(filters, nx=8):
"""
可視化所有濾波器的第一個通道
"""
FN, C, FH, FW = filters.shape
ny = int(np.ceil(FN / nx)) # np.ceil: 向上取整
fig, axes = plt.subplots(ny, nx)
fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)
for i in range(FN):
# 該濾波器所屬子圖位置
x = i // nx
y = i % nx
axes[x, y].set_xticks([])
axes[x, y].set_yticks([])
axes[x, y].imshow(filters[i, 0], cmap=plt.cm.gray_r, interpolation='nearest')
plt.show()
pkl_file_name = dir_path + '/convnet.pkl'
net = SimpleConvNet(input_dim=(1, 28, 28),
conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
hidden_size=100, output_size=10, weight_init_std=0.01,
pretrain_flag=False, pkl_file_name=pkl_file_name)
# 隨機進行初始化後的權重
filter_show(net.params['W1'])
# 學習後的權重
net.load_pretrain_model()
filter_show(net.params['W1'])
- 學習前的卷積層濾波器
- 學習後的卷積層濾波器
對比兩圖,可以看出學習前的濾波器是隨機進行初始化的,所以在黑白的濃淡上沒有規律可循,但學習後的濾波器變成了有規律的圖像。我們發現,通過學習,濾波器被更新成了有規律的濾波器,比如從白到黑漸變的濾波器、含有塊狀區域(稱爲blob)的濾波器等。
如果要問有規律的濾波器在“觀察”什麼,答案就是它在觀察邊緣(顏色變化的分界線)和斑塊(局部的塊狀區域)等。比如,左半部分爲白色、右半部分爲黑色的濾波器會對垂直方向上的邊緣有響應。
由此可知,卷積層的濾波器會提取邊緣或斑塊等原始信息。而剛纔實現的CNN會將這些原始信息傳遞給後面的層。
上面的結果是針對第1層的卷積層得出的。第1層的卷積層中提取了邊緣或斑塊等“低級”信息,根據深度學習的可視化相關的研究,隨着層次加深,提取的信息(正確地講,是反映強烈的神經元)也越來越抽象。如果堆疊了多層卷積層,則隨着層次加深,提取的信息也愈加複雜、抽象,也就是說,隨着層次加深,神經元從簡單的形狀向“高級”信息變化。