山東大學實訓 Day4

先比較了幾種模型在數據集上效果,沒調參,效果都不太好

from __future__ import division
import time
import numpy as np
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import learning_curve
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import metrics
from sklearn.linear_model import BayesianRidge, LinearRegression, ElasticNet  # 批量導入要實現的迴歸算法
from sklearn.model_selection import cross_val_score  # 交叉檢驗
from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_squared_error, r2_score  # 批量導入指標算法

from sklearn.ensemble.gradient_boosting import GradientBoostingRegressor  # 集成算法

 
data=pd.read_csv('data/mooc_data.csv',header=None,index_col=0)
data=data.sample(frac=1)
dataset=np.array(data)
#dataset=np.loadtxt(dir)
index=int(dataset.shape[0]*0.2)
X_train=dataset[:index,:-1]
y_train=dataset[:index,-1]
X_test=dataset[index:,:-1]
y_test=dataset[index:,-1]
 




model_br = BayesianRidge()  # 建立貝葉斯嶺迴歸模型對象
model_lr = LinearRegression()  # 建立普通線性迴歸模型對象
model_etc = ElasticNet()  # 建立彈性網絡迴歸模型對象
model_svr = SVR()  # 建立支持向量機迴歸模型對象
model_gbr = GradientBoostingRegressor()  # 建立梯度增強迴歸模型對象
model_names = ['BayesianRidge', 'LinearRegression', 'ElasticNet', 'SVR', 'GBR']  # 不同模型的名稱列表
model_dic = [model_br, model_lr, model_etc, model_svr, model_gbr]  # 不同迴歸模型對象的集合
cv_score_list = []  # 交叉檢驗結果列表
pre_y_list = []  # 各個迴歸模型預測的y值列表


for model in model_dic:  # 讀出每個迴歸模型對象
    scores = cross_val_score(model, X_train, y_train, cv=5)  # 將每個迴歸模型導入交叉檢驗模型中做訓練檢驗
    cv_score_list.append(scores)  # 將交叉檢驗結果存入結果列表
    pre_y_list.append(model.fit(X_train, y_train).predict(X_test))  # 將回歸訓練中得到的預測y存入列表

model_metrics_name = [explained_variance_score, mean_absolute_error, mean_squared_error, r2_score]  # 迴歸評估指標對象集
model_metrics_list = []  # 迴歸評估指標列表
for i in range(5):  # 循環每個模型索引
    tmp_list = []  # 每個內循環的臨時結果列表
    for m in model_metrics_name:  # 循環每個指標對象
        tmp_score = m(y_test, pre_y_list[i])  # 計算每個迴歸指標結果
        tmp_list.append(tmp_score)  # 將結果存入每個內循環的臨時結果列表
    model_metrics_list.append(tmp_list)  # 將結果存入迴歸評估指標列表

df2 = pd.DataFrame(model_metrics_list, index=model_names, columns=['ev', 'mae', 'mse', 'r2'])  # 建立迴歸指標的數據框

print (df2)

最後結果:

                        ev       mae         mse        r2
BayesianRidge     0.017023  8.514099  149.028967  0.017016
LinearRegression  0.077178  8.024226  139.912144  0.077150
ElasticNet        0.041530  8.250055  145.315903  0.041507
SVR               0.018165  8.407134  149.456046  0.014199
GBR              -0.208560  8.823358  183.961341 -0.213396

問了一下老師,老師說課程應該分開來做,我以前一直把它當特徵的,於是按照課程分開

from __future__ import division
import time
import numpy as np
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import learning_curve
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import metrics
from sklearn.linear_model import BayesianRidge, LinearRegression, ElasticNet  # 批量導入要實現的迴歸算法
from sklearn.model_selection import cross_val_score  # 交叉檢驗
from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_squared_error, r2_score  # 批量導入指標算法

from sklearn.ensemble.gradient_boosting import GradientBoostingRegressor  # 集成算法
import os
file=[]
for root,dirs,files in os.walk('data'):
    for name in files:
        file.append(os.path.join(root, name))
for f in file:
    
    data=pd.read_csv(f,header=None,index_col=0)
    data_=data.iloc[:,3:]
    data_=data_.sample(frac=1)
    dataset=np.array(data_)
#dataset=np.loadtxt(dir)
    index=int(dataset.shape[0]*0.2)
    X_train=dataset[:index,:-1]
    y_train=dataset[:index,-1]
    X_test=dataset[index:,:-1]
    y_test=dataset[index:,-1]
 




    model_br = BayesianRidge()  # 建立貝葉斯嶺迴歸模型對象
    model_lr = LinearRegression()  # 建立普通線性迴歸模型對象
    model_etc = ElasticNet()  # 建立彈性網絡迴歸模型對象
    model_svr = SVR()  # 建立支持向量機迴歸模型對象
    model_gbr = GradientBoostingRegressor()  # 建立梯度增強迴歸模型對象
    model_names = ['BayesianRidge', 'LinearRegression', 'ElasticNet', 'SVR', 'GBR']  # 不同模型的名稱列表
    model_dic = [model_br, model_lr, model_etc, model_svr, model_gbr]  # 不同迴歸模型對象的集合
    cv_score_list = []  # 交叉檢驗結果列表
    pre_y_list = []  # 各個迴歸模型預測的y值列表


    for model in model_dic:  # 讀出每個迴歸模型對象
        scores = cross_val_score(model, X_train, y_train, cv=5)  # 將每個迴歸模型導入交叉檢驗模型中做訓練檢驗
        cv_score_list.append(scores)  # 將交叉檢驗結果存入結果列表
        pre_y_list.append(model.fit(X_train, y_train).predict(X_test))  # 將回歸訓練中得到的預測y存入列表

    model_metrics_name = [explained_variance_score, mean_absolute_error, mean_squared_error, r2_score]  # 迴歸評估指標對象集
    model_metrics_list = []  # 迴歸評估指標列表
    for i in range(5):  # 循環每個模型索引
        tmp_list = []  # 每個內循環的臨時結果列表
        for m in model_metrics_name:  # 循環每個指標對象
            tmp_score = m(y_test, pre_y_list[i])  # 計算每個迴歸指標結果
            tmp_list.append(tmp_score)  # 將結果存入每個內循環的臨時結果列表
        model_metrics_list.append(tmp_list)  # 將結果存入迴歸評估指標列表

    df2 = pd.DataFrame(model_metrics_list, index=model_names, columns=['ev', 'mae', 'mse', 'r2'])  # 建立迴歸指標的數據框

    print('='*10,f,'='*10)
    print (df2)

 然後就報錯了。。明天再看

這樣的進度也太慢了orz

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章