文章目錄
聲明
本文參考何寬
前言
本文的結構:
首先從底層搭建一個完整的卷積神經網絡的模塊,之後再用tensorflow來實現。
模型結構如下:
要實現的模塊的函數功能:
- 卷積模塊,包含以下函數:
- 使用0擴充邊界
- 卷積窗口
- 前向卷積
- 反向卷積
- 池化模塊,包含以下函數:
- 前向池化
- 創建掩碼
- 值分配
- 反向池化
一、卷積神經網絡
1.1、導入庫
import numpy as np
import h5py
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (5.0,4.0)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
np.random.seed(1)
1.2、邊界填充
用np.pad進行邊界填充。
def zero_pad(X,pad):
"""
把數據集x的圖像邊界全部使用0來擴充pad個寬度和高度。
參數:
X - 圖像數據集,維度爲(樣本數,圖像高度,圖像寬度,圖像通道數)
pad - 整數,每個圖像在垂直和水平維度上的填充量
返回:
X_paded - 擴充後的圖像數據集,維度爲(樣本數,圖像高度 + 2*pad,圖像寬度 + 2*pad,圖像通道數)
"""
X_paded = np.pad(X,((0,0),(pad,pad),(pad,pad),(0,0)),'constant')
return X_paded
測試:
np.random.seed(1)
x = np.random.randn(4,3,3,2)
x_paded = zero_pad(x,2)
print(x.shape,x_paded.shape,x[1,1],x_paded[1,1])
fig,axarr = plt.subplots(1,2)
axarr[0].set_title('x')
axarr[0].imshow(x[0,:,:,0])
axarr[1].set_title('x_paded')
axarr[1].imshow(x_paded[0,:,:,0])
結果:
(4, 3, 3, 2) (4, 7, 7, 2) [[ 0.90085595 -0.68372786]
[-0.12289023 -0.93576943]
[-0.26788808 0.53035547]] [[0. 0.]
[0. 0.]
[0. 0.]
[0. 0.]
[0. 0.]
[0. 0.]
[0. 0.]]
1.3、單步卷積
過濾器大小f=3,步伐s=1,填充pad=2。
def conv_single_step(a_slice_prev,W,b):
"""
在前一層的激活輸出的一個片段上應用一個由參數W定義的過濾器。這裏切片大小和過濾器大小相同。
參數:
a_slice_prev - 輸入數據的一個片段,維度爲(過濾器大小,過濾器大小,上一通道數)
W - 權重參數,包含在了一個矩陣中,維度爲(過濾器大小,過濾器大小,上一通道數)
b - 偏置參數,包含在了一個矩陣中,維度爲(1,1,1)
返回:
Z - 在輸入數據的片x上卷積滑動窗口(W,b)的結果。
"""
s = a_slice_prev*W+b
Z = np.sum(s)
return Z
測試:
np.random.seed(1)
a_slice_prev = np.random.randn(4,4,3)
W = np.random.randn(4,4,3)
b = np.random.randn(1,1,1)
Z = conv_single_step(a_slice_prev,W,b)
print(Z)
結果:
-23.16021220252078
1.4、卷積層前向傳播
def conv_forward(A_prev,W,b,hparameters):
"""
實現卷積函數的前向傳播
參數:
A_prev - 上一層的激活輸出矩陣,維度爲(m,n_H_prev,n_C_prev),(樣本數量,上一層圖像的高度,上一層圖像的寬度,上一層過濾器數量)
W - 權重矩陣,維度爲(f,f,n_C_prev,n_c),(過濾器大小,過濾器大小,上一層的過濾器數量,這一層的過濾器數量)
b - 偏置矩陣,維度爲(1,1,1,n_C),(1,1,1,這一層的過濾器數量)
hparameters - 包含了“stride”與“pad”的超參數字典。
返回:
Z - 卷積輸出,維度爲(m,n_H,n_W,n_C),(樣本數,圖像的高度,圖像的寬帶,過濾器數量)
cache - 緩存了一些反向傳播函數conv_backward()需要的一些數據
"""
(m,n_H_prev,n_W_rev,n_C_prev) = A_prev.shape
(f,f,n_C_prev,n_C) = W.shape
stride = hparameters['stride']
pad = hparameters['pad']
n_H = int((n_H_prev+2*pad-f)/stride)+1
n_W = int((n_W_rev+2*pad-f)/stride)+1
Z = np.zeros((m,n_H,n_W,n_C))
A_prev_pad = zero_pad(A_prev,pad)
for i in range(m):
a_prev_pad = A_prev_pad[i]
for h in range(n_H):
for w in range(n_W):
for c in range(n_C):
vert_start = h*stride
vert_end = vert_start+f
horiz_start = w*stride
horiz_end = horiz_start+f
a_slip_prev = a_prev_pad[vert_start:vert_end,horiz_start:horiz_end,:]
Z[i,h,w,c] = conv_single_step(a_slip_prev,W[:,:,:,c],b[0,0,0,c])
cache = (A_prev,W,b,hparameters)
return (Z,cache)
測試:
np.random.seed(1)
A_prev = np.random.randn(10,4,4,3)
W = np.random.randn(2,2,3,8)
b = np.random.randn(1,1,1,8)
hparameters = {'pad':2,'stride':1}
Z,cache_conv = conv_forward(A_prev,W,b,hparameters)
print(np.mean(Z))
print(cache_conv[0][1][2][3])
結果:
0.15585932488906465
[-0.20075807 0.18656139 0.41005165]
1.5、池化層前向傳播
def pool_forward(A_prev,hparameters,mode='max'):
"""
實現池化層的前向傳播
參數:
A_prev - 輸入數據,維度爲(m,n_H_prev,n_W_prev,n_C_prev)
hparameters - 包含了‘f'和’stride‘的超參數字典
mode - 模式選擇【’max‘|‘average’】
返回:
A - 池化層的輸出,維度爲(m,n_H,n_W,n_C)
cache - 存儲了一些反向傳播需要用到的值,包含了輸入和超參數的字典。
"""
(m,n_H_prev,n_W_prev,n_C_prev)=A_prev.shape
f = hparameters['f']
stride = hparameters['stride']
n_H = int((n_H_prev-f)/stride)+1
n_W = int((n_W_prev-f)/stride)+1
n_C = n_C_prev
A = np.zeros((m,n_H,n_W,n_C))
for i in range(m):
a_prev = A_prev[i]
for h in range(n_H):
for w in range(n_W):
for c in range(n_C):
vert_start = h*stride
vert_end = vert_start+f
horiz_start = w*stride
horiz_end = horiz_start+f
a_slice_prev = a_prev[vert_start:vert_end,horiz_start:horiz_end,c]
if mode=='max':
A[i,h,w,c] = np.max(a_slice_prev)
elif mode=='average':
A[i,h,w,c] = np.mean(a_slice_prev)
cache = (A_prev,hparameters)
return A,cache
測試:
np.random.seed(1)
A_prev = np.random.randn(2,4,4,3)
hparameters = {'f':4,'stride':1}
A,cache = pool_forward(A_prev,hparameters,mode='max')
print(A)
A,cache = pool_forward(A_prev,hparameters,mode='average')
print(A)
結果:
[[[[1.74481176 1.6924546 2.10025514]]]
[[[1.19891788 1.51981682 2.18557541]]]]
[[[[-0.09498456 0.11180064 -0.14263511]]]
[[[-0.09525108 0.28325018 0.33035185]]]]
1.6、卷積層的反向傳播
1.6.1、計算dA
其中,是過濾器,是一個標量,是卷積層第h行第w列的使用點乘計算後的輸出Z的梯度。需要注意的是在每次更新dA的時候,都會用相同的過濾器乘以不同的dZ,因爲在前向傳播的時候,每個過濾器都與a_slice進行了點乘相加,所以在計算dA的時候,需要把a_slice的梯度也加進來,可以在循環中加一句代碼:
da_perv_pad[vert_start:vert_end,horiz_start:horiz_end,:] += W[:,:,:,c] * dZ[i,h,w,c]
1.6.2、計算dW
其中,對應着的激活值,由此,可以推導W的梯度,因爲使用了過濾器來對數據進行窗口滑動,在這裏實際上是切出了和過濾器一樣大小的切片,切了多少次就產生了多少個梯度,所以需要把它們加起來得到這個數據集的整體dW。
dW[:,:,:, c] += a_slice * dZ[i , h , w , c]
1.6.2、計算db
db[:,:,:,c] += dZ[ i, h, w, c]
def conv_backward(dZ,cache):
"""
實現卷積層的反向傳播
參數:
dZ - 卷積層的輸出z的梯度,維度爲(m,n_H,n_W,n_C)
cache - 反向傳播所需要的參數,conv_forward()的輸出之一
返回:
dA_prev - 卷積層的輸入(A_prev)的梯度值,維度爲(m,n_H_prev,n_W_prev,n_C_prev)
dW - 卷積層的權值的梯度,維度爲(f,f,n_C_prev,n_C)
db - 卷積層的偏置的梯度,維度爲(1,1,1,n_C)
"""
(A_prev,W,b,hparameters) = cache
(m,n_H_prev,n_W_prev,n_C_prev) = A_prev.shape
(m,n_H,n_W,n_C) = dZ.shape
(f,f,n_C_prev,n_C) = W.shape
pad = hparameters['pad']
stride = hparameters['stride']
dA_prev = np.zeros((m,n_H_prev,n_W_prev,n_C_prev))
dW = np.zeros((f,f,n_C_prev,n_C))
db = np.zeros((1,1,1,n_C))
A_prev_pad = zero_pad(A_prev,pad)
dA_prev_pad = zero_pad(dA_prev,pad)
for i in range(m):
a_prev_pad = A_prev_pad[i]
da_prev_pad = dA_prev_pad[i]
for h in range(n_H):
for w in range(n_W):
for c in range(n_C):
vert_start = h
vert_end = vert_start+f
horiz_start = w
horiz_end = horiz_start+f
a_slice = a_prev_pad[vert_start:vert_end,horiz_start:horiz_end,:]
da_prev_pad[vert_start:vert_end,horiz_start:horiz_end,:]+=W[:,:,:,c]*dZ[i,h,w,c]
dW[:,:,:,c] += a_slice*dZ[i,h,w,c]
db[:,:,:,c] += dZ[i,h,w,c]
dA_prev[i,:,:,:] = da_prev_pad[pad:-pad,pad:-pad,:]
assert(dA_prev.shape==(m,n_H_prev,n_W_prev,n_C_prev))
return dA_prev,dW,db
測試:
np.random.seed(1)
A_prev = np.random.randn(10,4,4,3)
W = np.random.randn(2,2,3,8)
b = np.random.randn(1,1,1,8)
hparameters = {'pad':2,'stride':1}
Z,cache_conv = conv_forward(A_prev,W,b,hparameters)
dA_prev,dW,db = conv_backward(Z,cache_conv)
print(np.mean(dA_prev),np.mean(dW),np.mean(db))
結果:
9.608990675868995 10.581741275547566 76.37106919563735
1.7、池化層的反向傳播
1.7.1、最大值池化層的反向傳播
首先,我們需要創建一個create_mask_from_window()
的函數,這個函數創建一個掩碼矩陣,以保存最大值的位置,當爲1的時候表示最大值的位置,其他的爲0,這個是最大值池化層。
爲什麼要創建這個掩碼矩陣呢?因爲我們需要記錄最大值的位置,這樣才能反向傳播到卷積層。
def create_mask_from_window(x):
"""
從輸入矩陣中創建掩碼,以保存最大值的矩陣的位置。
參數:
x - 一個維度爲(f,f)的矩陣
返回:
mask - 包含x的最大值的位置的矩陣
"""
mask = x == np.max(x)
return mask
測試:
np.random.seed(1)
x = np.random.randn(2,3)
mask = create_mask_from_window(x)
print(x,mask)
結果:
[[ 1.62434536 -0.61175641 -0.52817175]
[-1.07296862 0.86540763 -2.3015387 ]] [[ True False False]
[False False False]]
1.7.2、均值池化層的反向傳播
和最大值池化層不同,均值池化層取過濾器的均值。
def distribute_value(dz,shape):
"""
給定一個值,爲按矩陣大小平均分配到每一個矩陣位置中。
參數:
dz - 輸入的實數
shape - 元組,兩個值,分別爲n_H,n_W
返回:
a - 已經分配好了值的矩陣,裏面的值全部一樣。
"""
(n_H,n_W) = shape
average = dz/(n_H*n_W)
a = np.ones(shape)*average
return a
測試:
dz = 2
shape = (2,2)
a = distribute_value(dz,shape)
print(a)
結果:
[[0.5 0.5]
[0.5 0.5]]
1.7.3、池化層反向傳播
def pool_backward(dA,cache,mode="max"):
"""
實現池化層的反向傳播
參數:
dA - 池化層的輸出的梯度,和池化層的輸出的維度一樣
cache - 池化層前向傳播時所存儲的參數
mode - 模式選擇,【‘max’|‘average’】
返回:
dA_prev - 池化層的輸入的梯度,和A_prev的維度相同
"""
(A_prev,hparameters) = cache
f = hparameters['f']
stride = hparameters['stride']
(m,n_H_prev,n_W_prev,n_C_prev) = A_prev.shape
(m,n_H,n_W,n_C) = dA.shape
dA_prev = np.zeros_like(A_prev)
for i in range(m):
a_prev = A_prev[i]
for h in range(n_H):
for w in range(n_W):
for c in range(n_C):
vert_start = h
vert_end = vert_start + f
horiz_start = w
horiz_end = horiz_start + f
if mode == 'max':
a_prev_slice = a_prev[vert_start:vert_end,horiz_start:horiz_end,c]
mask = create_mask_from_window(a_prev_slice)
dA_prev[i,vert_start:vert_end,horiz_start:horiz_end,c]+=np.multiply(mask,dA[i,h,w,c])
elif mode == 'average':
da = dA[i,h,w,c]
shape = (f,f)
dA_prev[i,vert_start:vert_end,horiz_start:horiz_end,c]+=distribute_value(da,shape)
assert(dA_prev.shape == A_prev.shape)
return dA_prev
測試:
np.random.seed(1)
A_prev = np.random.randn(5,5,3,2)
W = np.random.randn(2,2,3,8)
b = np.random.randn(1,1,1,8)
hparameters = {'f':2,'stride':1}
A,cache = pool_forward(A_prev,hparameters)
dA = np.random.randn(5,4,2,2)
dA_prev = pool_backward(dA,cache,mode='max')
print(np.mean(dA),dA_prev[1,1])
dA_prev = pool_backward(dA,cache,mode='average')
print(np.mean(dA),dA_prev[1,1])
結果:
0.14026942447888846 [[ 0. 0. ]
[-0.72334716 2.42826793]
[ 0. 0. ]]
0.14026942447888846 [[-0.0103192 0.16889297]
[-0.18083679 0.69818344]
[-0.17051759 0.52929047]]
二、基於tensorflow的卷積神經網絡
situation:使用tensorflow來實現卷積神經網絡,然後應用到手勢識別中。
2.0、導入庫
import math
import numpy as np
import h5py
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import tensorflow as tf
from tensorflow.python.framework import ops
import cnn_utils
import tf_utils
np.random.seed(1)
看裏面的內容:
X_train_orig , Y_train_orig , X_test_orig , Y_test_orig , classes = tf_utils.load_dataset()
index = 6
plt.imshow(X_train_orig[index])
print(str(np.squeeze(Y_train_orig[:,index])))
結果:
2
調整數據,看數據的維度:
X_train = X_train_orig/255
X_test = X_test_orig/255
Y_train = cnn_utils.convert_to_one_hot(Y_train_orig,6).T
Y_test = cnn_utils.convert_to_one_hot(Y_test_orig,6).T
print(X_train.shape[0],X_test.shape[0],X_train.shape,Y_train.shape,X_test.shape,Y_test.shape)
結果:
1080 120 (1080, 64, 64, 3) (1080, 6) (120, 64, 64, 3) (120, 6)
2.1、創建placeholders
tensorflow要求爲會話時將輸入到模型中的輸入數據創建佔位符。因爲我們使用的是小批量數據塊,輸入的樣本數量可能不固定,所以在數量那裏我們要使用None作爲可變數量,輸入X的維度爲[None,n_H0,n_W0,n_c0],對應的Y是[None,n_y]。
def create_placeholders(n_H0,n_W0,n_C0,n_y):
"""
爲session創建佔位符
參數:
n_H0 - 實數,輸入圖像的高度
n_W0 - 實數,輸入圖像的寬度
n_C0 - 實數,輸入的通道數
n_y - 實數,分類數
返回:
X - 輸入數據的佔位符,維度爲[None,n_H0,n_W0,n_C0],類型爲‘float’
Y - 輸入數據的標籤的佔位符,維度爲[None,n_y],類型爲‘float’
"""
X = tf.placeholder(tf.float32,[None,n_H0,n_W0,n_C0])
Y = tf.placeholder(tf.float32,[None,n_y])
return X,Y
測試:
X,Y = create_placeholders(64,64,3,6)
print(X,Y)
結果:
Tensor("Placeholder:0", shape=(?, 64, 64, 3), dtype=float32) Tensor("Placeholder_1:0", shape=(?, 6), dtype=float32)
2.2、初始化參數
def initialize_parameters():
"""
初始化權值矩陣,這裏我們把權值矩陣硬編碼
W1:[4,4,3,8]
W2:[2,2,8,16]
返回:
包含了tensor類型的W1、W2的字典
"""
tf.set_random_seed(1)
W1 = tf.get_variable('W1',[4,4,3,8],initializer=tf.contrib.layers.xavier_initializer(seed=0))
W2 = tf.get_variable('W2',[2,2,8,16],initializer=tf.contrib.layers.xavier_initializer(seed=0))
parameters = {'W1':W1,'W2':W2}
return parameters
測試:
tf.reset_default_graph()
with tf.Session() as sess_test:
parameters = initialize_parameters()
init = tf.global_variables_initializer()
sess_test.run(init)
print(parameters['W1'].eval()[1,1,1])
print(parameters['W2'].eval()[1,1,1])
sess_test.close()
結果:
[ 0.00131723 0.1417614 -0.04434952 0.09197326 0.14984085 -0.03514394
-0.06847463 0.05245192]
[-0.08566415 0.17750949 0.11974221 0.16773748 -0.0830943 -0.08058
-0.00577033 -0.14643836 0.24162132 -0.05857408 -0.19055021 0.1345228
-0.22779644 -0.1601823 -0.16117483 -0.10286498]
2.3、前向傳播
模型的結構:
conv2d->relu->maxpool->conv2d->relu->maxpool->fullconnected
步驟和參數:
- Conv2d:步伐:1,填充方式:‘SAME’
- Relu
- Max pool:過濾器大小:8x8,步伐:8x8,填充方式:‘SAME’
- Conv2d:步伐:1,填充方式:‘SAME’
- Relu
- Max pool:過濾器大小:4x4,步伐:4x4,填充方式:‘SAME’
- 一維化上一層的輸出
- 全連接層(FC):使用沒有非線性激活函數的全連接層。這裏不用調用SoftMax,這將導致輸出層中有6個神經元,然後再傳遞到softmax。
def forward_propagation(X,parameters):
"""
實現前向傳播:
conv2d->relu->maxpool->conv2d->relu->maxpool->flatten->fullyconnected
參數:
X - 輸入數據的placeholder,維度爲(輸入節點數量,樣本數量)
parameters - 包含了’W1‘和’W2‘的python字典
返回:
Z3 - 最後一個linear節點的輸出
"""
W1 = parameters['W1']
W2 = parameters['W2']
# Conv2d:步伐:1,填充方式:’SAME'
Z1 = tf.nn.conv2d(X,W1,strides=[1,1,1,1],padding='SAME')
# Relu
A1 = tf.nn.relu(Z1)
# Max pool:窗口大小:8x8,步伐:8X8,填充方式:’SAME'
P1 = tf.nn.max_pool(A1,ksize=[1,8,8,1],strides=[1,8,8,1],padding='SAME')
# Conv2d:步伐:1,填充方式:’SAME'
Z2 = tf.nn.conv2d(P1,W2,strides=[1,1,1,1],padding='SAME')
# Relu
A2 = tf.nn.relu(Z2)
# Max pool:窗口大小:4x4,步伐:4X4,填充方式:’SAME'
P2 = tf.nn.max_pool(A2,ksize=[1,4,4,1],strides=[1,4,4,1],padding='SAME')
# 一維化上一層的輸出
P = tf.contrib.layers.flatten(P2)
# 全連接層(FC):使用沒有非線性激活函數的全連接層
Z3 = tf.contrib.layers.fully_connected(P,6,activation_fn = None)
return Z3
測試:
tf.reset_default_graph()
np.random.seed(1)
with tf.Session() as sess_test:
X,Y = create_placeholders(64,64,3,6)
parameters = initialize_parameters()
Z3 = forward_propagation(X,parameters)
init = tf.global_variables_initializer()
sess_test.run(init)
a = sess_test.run(Z3,{X:np.random.randn(2,64,64,3),Y:np.random.randn(2,6)})
print(a)
sess_test.close()
結果:
[[-0.44670227 -1.5720876 -1.5304923 -2.3101304 -1.2910438 0.46852064]
[-0.17601591 -1.5797201 -1.4737016 -2.616721 -1.0081065 0.5747785 ]]
2.4、計算成本
def compute_cost(Z3,Y):
"""
計算成本
參數:
Z3 - 正向傳播最後一個linear節點的輸出,維度爲(6,樣本數)
Y - 標籤向量的placeholder,和Z3的維度相同
返回:
cost - 計算後的成本
"""
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=Z3,labels=Y))
return cost
測試:
tf.reset_default_graph()
with tf.Session() as sess_test:
np.random.seed(1)
X,Y = create_placeholders(64,64,3,6)
parameters = initialize_parameters()
Z3 = forward_propagation(X,parameters)
cost = compute_cost(Z3,Y)
init = tf.global_variables_initializer()
sess_test.run(init)
a = sess_test.run(cost,{X:np.random.randn(4,64,64,3),Y:np.random.randn(4,6)})
print(a)
sess_test.close()
結果:
2.9103398
2.5、構建模型
步驟:
- 創建佔位符
- 初始化參數
- 前向傳播
- 計算成本
- 反向傳播
- 創建優化器
最後將創建一個session來運行模型。
def model(X_train,Y_train,X_test,Y_test,learning_rate=0.009,num_epochs=100,minibatch_size=64,print_cost=True,isPlot=True):
"""
使用tensorflow實現三層的卷積神經網絡
conv2d->relu->maxpool->conv2d->relu->maxpool->flatten->fullyconnected
參數:
X_train - 訓練數據,維度爲(None, 64, 64, 3)
Y_train - 訓練數據對應的標籤,維度爲(None, n_y = 6)
X_test - 測試數據,維度爲(None, 64, 64, 3)
Y_test - 訓練數據對應的標籤,維度爲(None, n_y = 6)
learning_rate - 學習率
num_epochs - 遍歷整個數據集的次數
minibatch_size - 每個小批量數據塊的大小
print_cost - 是否打印成本值,每遍歷100次整個數據集打印一次
isPlot - 是否繪製圖譜
返回:
train_accuracy - 實數,訓練集的準確度
test_accuracy - 實數,測試集的準確度
parameters - 學習後的參數
"""
ops.reset_default_graph()
tf.set_random_seed(1)
seed = 3
(m,n_H0,n_W0,n_C0) = X_train.shape
n_y = Y_train.shape[1]
costs = []
X,Y = create_placeholders(n_H0,n_W0,n_C0,n_y)
parameters = initialize_parameters()
Z3 = forward_propagation(X,parameters)
cost = compute_cost(Z3,Y)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(num_epochs):
minibatch_cost = 0
num_minibatches = int(m/minibatch_size)
seed = seed + 1
minibatches = cnn_utils.random_mini_batches(X_train,Y_train,minibatch_size,seed)
for minibatch in minibatches:
(minibatch_X,minibatch_Y) = minibatch
_,temp_cost = sess.run([optimizer,cost],feed_dict={X:minibatch_X,Y:minibatch_Y})
minibatch_cost +=temp_cost/num_minibatches
if print_cost:
if epoch%5==0:
print('epoch=',epoch,',minibatch_cost=',minibatch_cost)
if epoch%1==0:
costs.append(minibatch_cost)
if isPlot:
plt.plot(np.squeeze(costs))
plt.ylabel('cost')
plt.xlabel('iterations(per tens)')
plt.title('learning rate = '+str(learning_rate))
plt.show()
predict_op = tf.arg_max(Z3,1)
corrent_prediction = tf.equal(predict_op,tf.arg_max(Y,1))
accuracy = tf.reduce_mean(tf.cast(corrent_prediction,'float'))
print(accuracy)
train_accurary = accuracy.eval({X:X_train,Y:Y_train})
test_accuary = accuracy.eval({X:X_test,Y:Y_test})
print(train_accurary)
print(test_accuary)
return (train_accurary,test_accuary,parameters)
測試:
_,_,parameters = model(X_train,Y_train,X_test,Y_test,num_epochs=100)
結果:
epoch= 0 ,minibatch_cost= 1.9179195687174797
epoch= 5 ,minibatch_cost= 1.5324752107262611
epoch= 10 ,minibatch_cost= 1.0148038603365421
epoch= 15 ,minibatch_cost= 0.8851366713643074
epoch= 20 ,minibatch_cost= 0.7669634483754635
epoch= 25 ,minibatch_cost= 0.651207884773612
epoch= 30 ,minibatch_cost= 0.6133557204157114
epoch= 35 ,minibatch_cost= 0.6059311926364899
epoch= 40 ,minibatch_cost= 0.5347129087895155
epoch= 45 ,minibatch_cost= 0.5514022018760443
epoch= 50 ,minibatch_cost= 0.49697646126151085
epoch= 55 ,minibatch_cost= 0.4544383566826582
epoch= 60 ,minibatch_cost= 0.45549566112458706
epoch= 65 ,minibatch_cost= 0.4583591800183058
epoch= 70 ,minibatch_cost= 0.4500396177172661
epoch= 75 ,minibatch_cost= 0.4106866829097271
epoch= 80 ,minibatch_cost= 0.46900513023138046
epoch= 85 ,minibatch_cost= 0.389252956956625
epoch= 90 ,minibatch_cost= 0.3638075301423669
epoch= 95 ,minibatch_cost= 0.3761322880163789
Tensor("Mean_1:0", shape=(), dtype=float32)
0.86851853
0.73333335