前言
BP神經網絡是深度神經網絡的基礎,由於深度神經網絡過於複雜,不便於理解其中參數更新的過程,所以我們一般會用3層網絡來理解這個過程。BP網絡由輸入層,隱藏層和輸出層組成。一次正向傳播輸出結果,算出損失值,然後一次反向傳播,求出各層之間連接的權重和偏置的梯度,通過梯度下降法(或者其他方法)更新參數,從而完成一次loss的最小優化。重複正,反向傳播的過程,直到loss不再減小(或者說減少的很小,可忽略不計)。求梯度的過程是需要仔細推導的,矩陣求導可以記住求導公式(或者自己去推敲),其他函數的求導倒是沒啥問題,激活函數都比較簡單。
編程實現
import numpy as np
import os
import matplotlib.pyplot as plt
#sigmoid 函數的導數
def sigmoid_dt(y):
return y*(1-y)
#sigmoid 函數
def sigmoid(x):
return 1./(1+np.exp(-x))
"""
函數說明:加載數據集
set,txt文件的數據格式形如:
-0.017612 14.053064 0
-1.395634 4.662541 1
-0.752157 6.538620 0
-1.322371 7.152853 0
0.423363 11.054677 0
0.406704 7.067335 1
Parameters:
無
Returns:
返回 feature 和 lable
"""
def loadData():
feature = []
lable = []
fr = open('set.txt')
lines = fr.readlines()
for line in lines:
lineArr = line.strip().split()
feature.append([lineArr[0], lineArr[1]])
lable.append([lineArr[-1]])
return np.array(feature, dtype='float64'), np.array(lable, dtype='float64')
class BP(object):
#隨機初始化參數
def __init__(self, layers):
self.w1 = np.random.random((layers[1],layers[0]))
self.b1 = np.random.random((layers[1], 1))
self.w2 = np.random.random((layers[2],layers[1]))
self.b2 = np.random.random((layers[2], 1))
def train(self, X, Y, learn = 0.1, epochs = 10000):
for n in range(epochs+1):
i = np.random.randint(X.shape[0])
x = X[i]
x = np.reshape(x,(x.shape[0], 1))
#前向傳播
#第二層(隱藏層)神經元的輸出
L1 = sigmoid(np.dot(self.w1,x)+self.b1)
#輸出層 神經元的輸出
L2 = sigmoid(np.dot(self.w2,L1)+self.b2)
#對第二層(隱藏層)神經元的輸出的導數 也是對輸出層神經元輸入的導數
delta_L2 = (L2 - Y[i])*sigmoid_dt(L2)
#對第二層(隱藏層)神經元的輸入的導數
delta_L1 = np.dot(self.w2.T,delta_L2)*sigmoid_dt(L1)
#通過梯度更新參數(對於 f(X) = W*X, delta(f(X)) = W.T)
self.w2 = self.w2 - learn * np.dot(delta_L2, L1.T)
self.b2 = self.b2 - learn * delta_L2
self.w1 = self.w1 - learn * np.dot(delta_L1, x.T)
self.b1 = self.b1 - learn * delta_L1
#更新完成後,計算正確率和損失
correct_count = 0
k = 0
cost=0
for x in X:
prediction = self.predict(x)
if prediction > 0.5 and Y[k]==1.0:
correct_count = correct_count+1
if prediction <= 0.5 and Y[k]==0.0:
correct_count = correct_count+1
cost = cost + abs(Y[k] - prediction)
k = k+1
correct_rate = correct_count / X.shape[0]
cost = cost/X.shape[0]
return correct_rate, cost
#預測函數,一次正向傳播
def predict(self, x):
x = np.reshape(x,(x.shape[0], 1))
L1 = sigmoid(np.dot(self.w1,x)+self.b1)
L2 = sigmoid(np.dot(self.w2,L1)+self.b2)
return L2
if __name__ == "__main__":
X, Y = loadData()
bp = BP([2,9,1])
correct_rate = bp.train(X,Y)
print(correct_rate)