import numpy as np
import pandas
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso,LassoCV,LassoLarsCV
from sklearn.svm import SVC
from sklearn.svm import LinearSVC #支持向量機
from sklearn.naive_bayes import MultinomialNB #樸素也貝斯
from sklearn.tree import DecisionTreeClassifier #決策樹
from sklearn.ensemble import RandomForestClassifier #隨機森鈴
from sklearn.ensemble import GradientBoostingClassifier #GBDT
from xgboost import XGBClassifier #xgboost
def modelReturn(model,name):
model = RandomForestClassifier();
model.fit(x_train,y_train);
predict = model.predict(x_test);
trueNum = 0;
for i in range(len(y_test)):
if(y_test[i]==predict[i]):
trueNum+=1;
print(name,":",trueNum/len(y_test));
dataframe = pandas.read_csv("export.csv");
#獲取 CVS中的值
dataset = dataframe.values;
#本身數據有53列 下標我0開始 取52列 53列是標籤
#X = dataset[:,0:53].astype(np.float);
#Y =dataset[:,53];
X = dataset[:,3:6].astype(np.float);
Y =dataset[:,53];
x_train,x_test,y_train,y_test = train_test_split(X,Y);
#xgboost 46 50
model =XGBClassifier();
modelReturn(model,"xgboost")
#GBDT 40 -48
model =GradientBoostingClassifier();
modelReturn(model,"GBDT")
#隨機森林 44-46
model =RandomForestClassifier();
modelReturn(model,"隨機森林")
#決策樹 36-39
model = DecisionTreeClassifier();
modelReturn(model,"決策樹")
#樸素也貝斯 44-51
model = MultinomialNB();
modelReturn(model,"樸素也貝斯")
#支持向量機 45-48
model = LinearSVC();
modelReturn(model,"支持向量機")
#SVM 48-52
model = SVC()
modelReturn(model,"SVM")
#laoss 68-73%
model = Lasso(alpha=0.005); #調節aplha 可以實現對擬合的。的程度
modelReturn(model,"laoss")
"""
model.fit(x_train,y_train);
predict =model.predict(x_test);
trueNum =0;
print(predict)
for i in range(len(y_test)):
if ((abs(y_test[i])-abs(predict[i])< 0.5)):
trueNum += 1;
print(trueNum/len(y_test));
"""
"""
pca = PCA(n_components=27);
xTrainPca = pca.fit_transform(x_train);
xTestPca = pca.fit_transform(x_test);
log =LogisticRegression();
log.fit(xTrainPca,y_train);
print("準確率:",log.score(xTestPca,y_test));
"""
"""
#降到10個維度
pca = PCA(n_components=50);
xTrainPca = pca.fit_transform(x_train);
xTestPca = pca.fit_transform(x_test);
knn = KNeighborsClassifier(n_neighbors=11);
knn.fit(xTrainPca,y_train);
print(knn.score(xTestPca,y_test))
"""