作業題目如下:
代碼如下:
1.庫的引用
from sklearn import datasets,cross_validation
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
2.交叉檢驗劃分
dataset = datasets.make_classification(n_samples=2000, n_features=15)
data, target = dataset[0], dataset[1]
kf = cross_validation.KFold(len(target), n_folds=10, shuffle=True)
3.機器學習部分代碼
num = 1
for train_index, test_index in kf:
data_train, target_train = data[train_index], target[train_index]
data_test, target_test = data[test_index], target[test_index]
print("Test:",num)
num = num + 1
print("GaussianNB:")
clf = GaussianNB()
clf.fit(data_train, target_train)
pred = clf.predict(data_test)
print("Accuracy:", metrics.accuracy_score(target_test, pred))
print("F1-score:", metrics.f1_score(target_test, pred))
print("AUC ROC:",metrics.roc_auc_score(target_test, pred))
print("SVC:")
clf = SVC(C=1e-01, kernel='rbf', gamma=0.1)
clf.fit(data_train, target_train)
pred = clf.predict(data_test)
print("Accuracy:", metrics.accuracy_score(target_test, pred))
print("F1-score:", metrics.f1_score(target_test, pred))
print("AUC ROC:",metrics.roc_auc_score(target_test, pred))
print("RandomForestClassifier:")
clf = RandomForestClassifier(n_estimators=100)
clf.fit(data_train, target_train)
pred = clf.predict(data_test)
print("Accuracy:", metrics.accuracy_score(target_test, pred))
print("F1-score:", metrics.f1_score(target_test, pred))
print("AUC ROC:",metrics.roc_auc_score(target_test, pred))
print()
輸出結果:
通過實驗可知,二分類問題中,隨機森林算法的效果比樸素貝葉斯和向量機都更好