因此,LDA降維的套路是:
(1)求各個類的均值向量和總的均值向量
(2)求類間散佈矩陣和類內散佈矩陣
(3)計算矩陣乘法
(4)對S進行特徵值分解,得到特徵值和特徵向量
(5)若想降到k維,則按特徵值從大到小排序,把前k個特徵向量作爲行構建投影矩陣
以下是利用LDA降維處理Iris數據集的代碼:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# 讀取數據並取數字部分
data = pd.read_csv('iris.csv')
# print(data)
# 求各類的均值向量和總均值向量
type = data['Species'].value_counts()
meanVal = np.empty([3,4])
for i in range(len(type)):
meanVal[i] = np.mean(np.mat(data[data['Species'] == type.index[i]].iloc[:, 1:5]),axis = 0)
#print(meanVal)
meanValAll = np.mean(meanVal,axis = 0)
#print(meanValAll)
# 求類內和類間散佈矩陣
S_w = np.zeros([4,4])
S_b = np.zeros([4,4])
for i in range(len(type)):
x = np.mat(data[data['Species'] == type.index[i]].iloc[:, 1:5])
S_w += np.matmul((x - meanVal[i]).T, x - meanVal[i])
n = len(x)
m_mat = np.mat(meanVal[i] - meanValAll)
S_b += n*np.matmul(m_mat.T,m_mat)
print(S_w)
print(S_b)
# 求S_w^-1 * S_B
S = np.linalg.inv(S_w)*S_b
#求特徵值,特徵向量
eigVals,eigVects = np.linalg.eig(S)
print(eigVals,"\n",eigVects)
# 4->2投影矩陣
W = eigVects[0:2]
# 繪圖
fig = plt.figure()
ax1 = fig.add_subplot()
plt.xlabel('LDA1')
plt.ylabel('LDA2')
colors = ['r','g','b']
for i in range(len(type)):
x = np.mat(data[data['Species'] == type.index[i]].iloc[:, 1:5])
x_new = (x * W.T).getA()
lda1 = list(x_new[:,0])
lda2 = list(x_new[:,1])
ax1.scatter(lda1,lda2,c=colors[i],label=type.index[i])
plt.show()
結果: