import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from scipy.spatial.distance import cdist
class KNN(object):
def __init__(self, k=5):
self.k = k
self.DataSet = list()
self.Distance = list()
self.TableNN = list()
self.Result = list()
pass
def fitting(self, X, Y):
""" 建模 """
self.Distance = cdist(X, X, metric='euclidean')
for Index in range(len(X)):
self.DataSet.append({'Index': Index,
'Data': X[Index],
'Signal': Y[Index]})
self.TableNN.append([])
for nn in sorted(enumerate(self.Distance[Index]), key=lambda X: X[1])[1:self.k + 1]:
self.TableNN[-1].append({'Index': Index, 'Target': nn[0], 'Distance': nn[1]})
self.TableNN = np.array(self.TableNN)
pass
def Forecast(self, X):
DISTANCE = cdist(np.array(X), np.array(pd.DataFrame(self.DataSet)['Data'].tolist()), metric='euclidean')
for dist in DISTANCE:
""" 計算距離 """
Type = dict()
for nn in sorted(enumerate(dist), key=lambda X: X[1])[1:self.k + 1]:
""" 通過序號查找索引,進而獲取近鄰類別 """
if self.DataSet[nn[0]]['Signal'] in Type.keys():
Type[self.DataSet[nn[0]]['Signal']] = Type[self.DataSet[nn[0]]['Signal']] + 1
else:
Type[self.DataSet[nn[0]]['Signal']] = 1
self.Result.append(sorted(Type.items(), key=lambda x: x[0])[0][0])
return self.Result
def accuracy(self, Y):
trueNum = 0
for Index in range(len(self.Result)):
if self.Result[Index] == Y[Index]:
trueNum = trueNum + 1
print("The accuracy of KNN is: {}".format(trueNum / len(self.Result)))
pass