GitChat 章華燕 機器學習算法各個擊破

"""
KNN
"""

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import neighbors, datasets

n_neighbors = 15

iris = datasets.load_iris()

X = iris.data[:, :2]
y = iris.target

h = .02  # step size in the mesh

cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])

for weights in ['uniform', 'distance']:

    clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
    clf.fit(X, y)

    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

    Z = Z.reshape(xx.shape)
    plt.figure()
    plt.pcolormesh(xx, yy, Z, cmap=cmap_light)

    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold,
                edgecolor='k', s=20)
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.title("3-Class classification (k = %i, weights = '%s')"
              % (n_neighbors, weights))

plt.show()
"""
LR
"""

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import datasets

# 加載 iris 數據集
iris = datasets.load_iris()

X = iris.data
y = iris.target

print('Sample num: ', len(y))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

clf = LogisticRegression(C=1.0, penalty='l1', tol=1e-6)

# 訓練模型
clf.fit(X_train, y_train)

# 預測結果
ans = clf.predict(X_test)

# 計算準確率
cnt = 0
for i in range(len(y_test)):
    if ans[i] - y_test[i] < 1e-1:
        cnt += 1
    # print(ans[i], ' ', y_test[i])

print("Accuracy: ", (cnt * 100.0 / len(y_test)),"%")
"""
Naive Bayes
"""
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn import datasets

# 加載iris數據集
iris = datasets.load_iris()

X = iris.data
y = iris.target

print('Sample num: ', len(y))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

clf = GaussianNB()

# 訓練模型
clf.fit(X_train, y_train)

# 預測結果
ans = clf.predict(X_test)

# 計算準確率
cnt = 0
for i in range(len(y_test)):
    if ans[i] - y_test[i] < 1e-1:
        cnt += 1
    # print(ans[i], ' ', y_test[i])

print("Accuracy: ", (cnt * 100.0 / len(y_test)), "%")
"""
決策樹
"""

from sklearn import tree
from sklearn import datasets
from sklearn.model_selection import train_test_split

# 加載iris數據集
iris = datasets.load_iris()

X = iris.data
y = iris.target

print('Sample num: ', len(y))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 模型初始化並訓練
clf = tree.DecisionTreeClassifier()
clf.fit(X_train, y_train)

# 預測結果
ans = clf.predict(X_test)
# 計算準確率
cnt = 0
for i in range(len(y_test)):
    if ans[i] - y_test[i] < 1e-1:
        cnt += 1
    # print(ans[i], ' ', y_test[i])
print("Accuracy: ", (cnt * 100.0 / len(y_test)),"%")
"""
XGBoost
"""

from sklearn.datasets import load_iris
import xgboost as xgb
from xgboost import plot_importance
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split

# read in the iris data
iris = load_iris()

X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# 訓練模型
model = xgb.XGBClassifier(max_depth=5, learning_rate=0.1, n_estimators=160, silent=True, objective='multi:softmax')
model.fit(X_train, y_train)

# 對測試集進行預測
ans = model.predict(X_test)

# 計算準確率
cnt1 = 0
cnt2 = 0
for i in range(len(y_test)):
    if ans[i] == y_test[i]:
        cnt1 += 1
    else:
        cnt2 += 1

print("Accuracy: %.2f %% " % (100 * cnt1 / (cnt1 + cnt2)))

# 顯示重要特徵
plot_importance(model)
plt.show()
"""
LightBGM
"""
import lightgbm as lgb
from sklearn import datasets
from sklearn.model_selection import train_test_split

# 加載iris數據集
iris = datasets.load_iris()

X = iris.data
y = iris.target

print('Sample num: ', len(y))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 模型初始化並訓練
clf = lgb.LGBMClassifier(boosting_type='gbdt', num_leaves=31, max_depth=7, learning_rate=0.1,subsample_for_bin=5000)

clf.fit(X_train, y_train)

# 預測結果
ans = clf.predict(X_test)

# 計算準確率
cnt1 = 0
cnt2 = 0
for i in range(len(y_test)):
    if ans[i] - y_test[i] < 1e-5:
        cnt1 += 1
    else:
        cnt2 += 1
print("Accuracy: %.2f %% " % (100 * cnt1 / (cnt1 + cnt2)))
"""
KMeans
"""
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

x = np.random.normal(0, 2, 1000)
y = np.random.normal(0, 2, 1000)

x1 = np.random.normal(10, 1, 100)
y1 = np.random.normal(10, 1, 100)

x = np.append(x, x1)
y = np.append(y, y1)

x = np.append(x, y)
X = x.reshape(1100, 2)

plt.figure(figsize=(10, 4))
plt.subplot(121)
plt.scatter(X[:, 0], X[:, 1], s=5)

y_pred = KMeans(n_clusters=2, random_state=170).fit_predict(X)

plt.subplot(122)
plt.scatter(X[:, 0], X[:, 1], s=5, c=y_pred)

plt.show()
"""
神經網絡
"""
from sklearn.neural_network import MLPClassifier
from sklearn import datasets
from sklearn.model_selection import train_test_split

# 加載iris數據集
iris = datasets.load_iris()

X = iris.data
y = iris.target

print('Sample num: ', len(y))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 模型初始化並訓練
clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5), random_state=1)

clf.fit(X_train, y_train)

# 預測結果
ans = clf.predict(X_test)

# 計算準確率
cnt1 = 0
cnt2 = 0
for i in range(len(y_test)):
    if ans[i] - y_test[i] < 1e-5:
        cnt1 += 1
    else:
        cnt2 += 1
print("Accuracy: %.2f %% " % (100 * cnt1 / (cnt1 + cnt2)))
"""
PCA 與 LDA 降維
"""

import matplotlib.pyplot as plt

from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

iris = datasets.load_iris()

X = iris.data
y = iris.target
target_names = iris.target_names

pca = PCA(n_components=2)
X_r = pca.fit(X).transform(X)

lda = LinearDiscriminantAnalysis(n_components=2)
X_r2 = lda.fit(X, y).transform(X)

# Percentage of variance explained for each components
print('explained variance ratio (first two components): %s'
      % str(pca.explained_variance_ratio_))

plt.figure()
colors = ['navy', 'turquoise', 'darkorange']
lw = 2

for color, i, target_name in zip(colors, [0, 1, 2], target_names):
    plt.scatter(X_r[y == i, 0], X_r[y == i, 1], color=color, alpha=.8, lw=lw,
                label=target_name)
plt.legend(loc='best', shadow=False, scatterpoints=1)
plt.title('PCA of IRIS dataset')

plt.figure()
for color, i, target_name in zip(colors, [0, 1, 2], target_names):
    plt.scatter(X_r2[y == i, 0], X_r2[y == i, 1], alpha=.8, color=color,
                label=target_name)
plt.legend(loc='best', shadow=False, scatterpoints=1)
plt.title('LDA of IRIS dataset')

plt.show()
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章