在上一篇統計學習方法———第2章感知機模型中介紹了感知機學習模型、策略和算法,這裏通過編程實現對iris數據集的萼片長度(sepal length)和萼片寬度(sepal width)進行線性分類。
Iris 鳶尾花數據集是一個經典數據集,在統計學習和機器學習領域都經常被用作示例。數據集內包含 3 類共 150 條記錄,每類各 50 個數據,每條記錄都有 4 項特徵:花萼長度、花萼寬度、花瓣長度、花瓣寬度,可以通過這4個特徵預測鳶尾花卉屬於(iris-setosa, iris-versicolour, iris-virginica)中的哪一品種。
實驗結果如下:
由於sklearn庫自帶了Perceptron感知機模型,作爲對照,設計了skclassifier分類器,對比二者分類效果:
正如之前討論的那樣,由於採用隨機梯度下降方法會選取不同的訓練初值,最終感知機的解存在多種情況,更好的實現方法是採用線性SVM分類器。
基於一般形式的感知機分類器編程實現如下:
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import random
'''
preception for binary linear classification
'''
class Model:
#define init entry
def __init__(self):
self.w = np.zeros(len(data[0])-1, dtype=np.float32)
self.b = 1
self.lr = 0.5
#define sign function
def sign(self, x, w, b):
y = np.dot(x, w) + b
return y
#define SGD
def fit(self, x_train, y_train):
flage = False
while not flage:
count = 0
for i in range(len(x_train)):
j = np.random.randint(len(x_train))
x = x_train[j]
y = y_train[j]
if y * self.sign(x, self.w, self.b) <= 0:
self.w += self.lr * np.dot(y, x)
self.b += self.lr * y
count += 1
if count == 0:
flage = True
return 'perception model!'
def classifier():
perception = Model()
perception.fit(x, y)
#plot figure
x_axis = np.linspace(4, 7, 10)
y_axis = -(perception.w[0] * x_axis + perception.b) / perception.w[1]
plt.plot(x_axis, y_axis)
plt.plot(data[:50, 0], data[:50, 1], 'bo', color='blue', label='0')
plt.plot(data[50:100, 0], data[50:100, 1], 'bo', color='orange', label='1')
plt.xlabel('sepal length')
plt.ylabel('sepal width')
plt.legend()
plt.title('prediction with classifier result')
plt.show()
plt.pause(1)
def skclassifier():
from sklearn.linear_model import Perceptron
perception = Perceptron(fit_intercept=False, max_iter=1000, shuffle=True)
perception.fit(x, y)
x_axis = np.arange(4, 8)
y_axis = -(perception.coef_[0][0] * x_axis + perception.intercept_) / perception.coef_[0][1]
plt.plot(x_axis, y_axis)
plt.plot(data[:50, 0], data[:50, 1], 'bo', color='blue', label='0')
plt.plot(data[50:100, 0], data[50:100, 1], 'bo', color='orange', label='1')
plt.xlabel('sepal length')
plt.ylabel('sepal width')
plt.legend()
plt.title('prediction with skclassifier result')
plt.show()
def loadDate():
#load irisdata
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['label'] = iris.target
df.columns = ['sepal length', 'sepal width', 'petal length', 'petal width', 'label']
df.label.value_counts()
plt.scatter(df[:50]['sepal length'], df[:50]['sepal width'], label='0')
plt.scatter(df[50:100]['sepal length'], df[50:100]['sepal width'], label='1')
plt.xlabel('sepal length')
plt.ylabel('sepal width')
plt.legend()
plt.title('original date')
plt.show()
plt.pause(1)
#take the columns:0, 1, -1, ie, sepal length, sepal width, label
data = np.array(df.iloc[:100, [0, 1, -1]])
return data
if __name__ == '__main__':
data = loadDate()
x, y = data[:, :-1], data[:, -1]
y = np.array([1 if i == 1 else -1 for i in y])
#load classifier
classifier()
skclassifier()
print("success!")