一、問題描述
二、算法核心思想分析
貝葉斯決策論的核心思想是由先驗概率和類條件概率密度求後驗概率,進而判斷樣本點類型,這裏簡化爲二分類問題,並且符合正態分佈,只需求出均值、方差、維度和先驗概率代入判別函數計算即可。
三、代碼參考
我這裏用python實現,主要用到xlrd和numpy兩個庫,excel文件中總共30行數據,每列依次爲:x1, x2, x3, w
import xlrd
import numpy as np
# 讀取數據
def read_data():
x = []
data = xlrd.open_workbook("lab1_data.xlsx")
table = data.sheets()[0]
rows = table.nrows
for i in range(1, rows):
row_value = table.row_values(i)
if row_value[3] != 3:
x.append(row_value)
return x
# 計算均值
def get_u(x):
u = np.mean(x, axis=0) # 求每列的均值
return u
# 計算協方差
def get_sigmal(x):
sigmal = np.cov(np.mat(x).T)
return sigmal
# 計算g(x)
def get_g(x, u, sigmal, pw, d):
x = np.mat(x)
u = np.mat(u)
sigmal = np.mat(sigmal)
pw = np.mat(pw)
g = -0.5 * (x - u) * sigmal.I * (x - u).T - d / 2 * np.log(2 * np.pi) - 0.5 * np.log(np.linalg.det(sigmal)) + np.log(pw)
return g
def main(dimension):
d = dimension
g = [0, 0, 0]
pw = (0.5, 0.5, 0)
u = [0, 0, 0]
sigmal = [0, 0, 0]
data = read_data()
for i in range(len(pw)-1):
xi = [x[:d] for x in filter(lambda x: x[3] == i + 1, data)]
u[i] = get_u(xi)
sigmal[i] = get_sigmal(xi)
count_true = 0
count_false = 0
for i in range(len(data)):
x = data[i][:d]
w = data[i][3]
print("x =", x)
g[0] = get_g(x, u[0], sigmal[0], pw[0], d)
g[1] = get_g(x, u[1], sigmal[1], pw[1], d)
print("g1(x) =", g[0], "g2(x) =", g[1])
if g[0] > g[1]:
flag = w == 1
print("w1", flag)
else:
flag = w == 2
print("w2", flag)
if flag:
count_true += 1
else:
count_false += 1
error_rate = count_false/len(data)
accuracy = count_true/len(data)
print("Accuracy = ", accuracy)
print("Error Rate = ", error_rate)
if __name__ == '__main__':
main(1) # 單特徵值
main(2) # 雙特徵值
main(3) # 三特徵值
四、運行結果
1、單特徵值
均值方差:
各樣本點結果:
誤差率30%
2、雙特徵值
均值方差:
各樣本點結果:
誤差率45%
3、三特徵值
均值方差:
各樣本點結果:
誤差率15%
如有錯誤請指正