深度學習入門(十):CNN的實現及可視化

本文爲《深度學習入門 基於Python的理論與實現》的部分讀書筆記,也參考吳恩達深度學習視頻
代碼以及圖片均參考此書

CNN的實現

本節將實現以下結構的簡單CNN
在這裏插入圖片描述
默認在MNIST數據集上工作,輸入數據形狀爲(1,28,28)(1,28,28),卷積層使用 1×5×51 \times 5 \times 5 的濾波器,濾波器個數爲30,不進行填充,步長爲1,因此卷積層之後數據形狀爲(30,24,24)(30,24,24)。池化層使用2×22 \times 2 的濾波器,不進行填充,步長爲2,因此池化層之後數據形狀爲(30,12,12)(30,12,12)。第一個隱藏層設爲100個神經元,輸出層的神經元個數設爲10.

import sys
file_path = __file__.replace('\\', '/')
dir_path = file_path[: file_path.rfind('/')] # 當前文件夾的路徑
pardir_path = dir_path[: dir_path.rfind('/')]
sys.path.append(pardir_path) # 添加上上級目錄到python模塊搜索路徑

import numpy as np
from func.gradient import numerical_gradient, gradient_check
from layer.common import *
from collections import OrderedDict
import os
import pickle

class SimpleConvNet:
    """
    conv - relu - pool - affine - relu - affine - softmax
    默認傳進來的圖片寬高相同
    Parameters
    ----------
    input_dim : 輸入大小(MNIST的情況下爲(1, 28, 28))
    hidden_size : 隱藏層的神經元數量
    output_size : 輸出大小(MNIST的情況下爲10)
    """
    def __init__(self, input_dim=(1, 28, 28), 
                 conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
                 hidden_size=100, output_size=10,
                 pretrain_flag=True, pkl_file_name=None):
        filter_num = conv_param['filter_num']
        filter_size = conv_param['filter_size']
        filter_pad = conv_param['pad']
        filter_stride = conv_param['stride']
        input_size = input_dim[1] 
        conv_output_size = (input_size - filter_size + 2 * filter_pad) // filter_stride + 1
        pool_output_size = filter_num * (conv_output_size // 2)**2
        self.pkl_file_name = pkl_file_name

        if pretrain_flag == 1 and os.path.exists(self.pkl_file_name):
            self.load_pretrain_model()
        else:
            # 初始化權重
            self.params = {}
            self.params['W1'] = np.sqrt(2.0 / (input_dim[1] * filter_size**2)) * \
                                np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
            self.params['b1'] = np.zeros(filter_num)
            self.params['W2'] = np.sqrt(2 / pool_output_size) * \
                                np.random.randn(pool_output_size, hidden_size)
            self.params['b2'] = np.zeros(hidden_size)
            self.params['W3'] = np.sqrt(2 / hidden_size) * \
                                np.random.randn(hidden_size, output_size)
            self.params['b3'] = np.zeros(output_size)

            # 生成層
            self.layers = OrderedDict()
            self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'],
                                            conv_param['stride'], conv_param['pad'])
            self.layers['Relu1'] = Relu()
            self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
            self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
            self.layers['Relu2'] = Relu()
            self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])

            self.last_layer = SoftmaxWithLoss()

    def load_pretrain_model(self):
        with open(self.pkl_file_name, 'rb') as f:
            model = pickle.load(f)
            for key in ('params', 'layers', 'last_layer'):
                exec('self.' + key + '=model.' + key)
            print('params loaded!')

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)

        return x

    def loss(self, x, t):
        y = self.predict(x)
        return self.last_layer.forward(y, t)

    def accuracy(self, x, t, batch_size=100):
        if t.ndim != 1 :
            t = np.argmax(t, axis=1)
        
        acc = 0.0
        
        for i in range(int(x.shape[0] / batch_size)):
            tx = x[i*batch_size:(i+1)*batch_size]
            tt = t[i*batch_size:(i+1)*batch_size]
            y = self.predict(tx)
            y = np.argmax(y, axis=1)
            acc += np.sum(y == tt) 
        
        return acc / x.shape[0]

    def numerical_gradient(self, x, t):
        loss_w = lambda w: self.loss(x, t)

        grads = {}
        for idx in range(1, 4):
            grads['W' + str(idx)] = numerical_gradient(loss_w, self.params['W' + str(idx)])
            grads['b' + str(idx)] = numerical_gradient(loss_w, self.params['b' + str(idx)])

        return grads

    def gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.last_layer.backward(dout)
        for layer_name in reversed(self.layers):
            dout = self.layers[layer_name].backward(dout)

        # 設定
        grads = {}
        grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers['Conv1'].db
        grads['W2'], grads['b2'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
        grads['W3'], grads['b3'] = self.layers['Affine2'].dW, self.layers['Affine2'].db

        return grads
        
if __name__ == '__main__':
    from dataset.mnist import load_mnist
    from trainer.trainer import Trainer

    (x_train, t_train),  (x_test, t_test) = load_mnist(normalize=True, flatten=False, one_hot_label=True, shuffle_data=True)

    # setting
    train_flag = 1 # 進行訓練還是預測
    gradcheck_flag = 0 # 對已訓練的網絡進行梯度檢驗
    
    pkl_file_name = dir_path + '/convnet.pkl'
    fig_name = dir_path + '/convnet.png'

    net = SimpleConvNet(input_dim=(1, 28, 28), 
                        conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
                        hidden_size=100, output_size=10,
                        pretrain_flag=True, pkl_file_name=pkl_file_name)

    trainer = Trainer(net, x_train, t_train, x_test, t_test,
                 epochs=2, mini_batch_size=128,
                 optimizer='Adam', optimizer_param={}, 
                 save_model_flag=True, pkl_file_name=pkl_file_name, plot_flag=True, fig_name=fig_name,
                 evaluate_sample_num_per_epoch=1000, verbose=True)

    if gradcheck_flag == 1:
        gradient_check(net, x_train[:2], t_train[:2])

    if train_flag:
        trainer.train()
    else:           
        acc = net.accuracy(x_train, t_train)
        print('accuracy:', acc)
=============== Final Test Accuracy ===============
test acc:0.9811

可以看到,使用簡單的CNN結構,僅訓練了2個epoch之後預測精度就輕易達到了98%,已經超過了之前實現的全連接層網絡結構能達到的最高識別精度。

CNN的可視化

比較一下學習前和學習後的卷積層的濾波器

import sys
file_path = __file__.replace('\\', '/')
dir_path = file_path[: file_path.rfind('/')] # 當前文件夾的路徑
pardir_path = dir_path[: dir_path.rfind('/')]
sys.path.append(pardir_path) # 添加上上級目錄到python模塊搜索路徑

import numpy as np
import matplotlib.pyplot as plt
from layer.convnet import SimpleConvNet

def filter_show(filters, nx=8):
    """
    可視化所有濾波器的第一個通道
    """
    FN, C, FH, FW = filters.shape
    ny = int(np.ceil(FN / nx)) # np.ceil: 向上取整

    fig, axes = plt.subplots(ny, nx)
    fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)

    for i in range(FN):
        # 該濾波器所屬子圖位置
        x = i // nx
        y = i % nx

        axes[x, y].set_xticks([])
        axes[x, y].set_yticks([])
        axes[x, y].imshow(filters[i, 0], cmap=plt.cm.gray_r, interpolation='nearest')
    
    plt.show()


pkl_file_name = dir_path + '/convnet.pkl'

net = SimpleConvNet(input_dim=(1, 28, 28), 
                    conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
                    hidden_size=100, output_size=10, weight_init_std=0.01, 
                    pretrain_flag=False, pkl_file_name=pkl_file_name)
# 隨機進行初始化後的權重
filter_show(net.params['W1'])

# 學習後的權重
net.load_pretrain_model()
filter_show(net.params['W1'])
  • 學習前的卷積層濾波器
    在這裏插入圖片描述
  • 學習後的卷積層濾波器
    在這裏插入圖片描述

對比兩圖,可以看出學習前的濾波器是隨機進行初始化的,所以在黑白的濃淡上沒有規律可循,但學習後的濾波器變成了有規律的圖像。我們發現,通過學習,濾波器被更新成了有規律的濾波器,比如從白到黑漸變的濾波器、含有塊狀區域(稱爲blob)的濾波器等。

如果要問有規律的濾波器在“觀察”什麼,答案就是它在觀察邊緣(顏色變化的分界線)和斑塊(局部的塊狀區域)等。比如,左半部分爲白色、右半部分爲黑色的濾波器會對垂直方向上的邊緣有響應。

由此可知,卷積層的濾波器會提取邊緣或斑塊等原始信息。而剛纔實現的CNN會將這些原始信息傳遞給後面的層

上面的結果是針對第1層的卷積層得出的。第1層的卷積層中提取了邊緣或斑塊等“低級”信息,根據深度學習的可視化相關的研究,隨着層次加深,提取的信息(正確地講,是反映強烈的神經元)也越來越抽象。如果堆疊了多層卷積層,則隨着層次加深,提取的信息也愈加複雜、抽象,也就是說,隨着層次加深,神經元從簡單的形狀向“高級”信息變化

在這裏插入圖片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章