四、一元線性迴歸

1. 預測函數

輸入	輸出
0	1
1	3
2	5
3	7
4	9
…	…

預測函數爲： $y=1+2x$
預測：輸入10；輸出21
$y=w_0+w_1x$ ，任務就是尋找預測函數中的模型參數 $w_0$ 和 $w_1$ ，以滿足輸入和輸出之間的聯繫。

2. 單樣本誤差

隨機 $x$ 代入 $y=w_0+w_1x$ 得到 $y'$ ，則單樣本誤差爲： $e=\frac{(y-y')^2}{2}$

3. 總樣本誤差

總樣本誤差爲： $E=\sum {\frac{(y-y')^2}{2}}$

4. 損失函數

損失函數爲： $Loss(w_0,w_1)=\sum {\frac{(y-(w_0+w_1x))^2}{2}}$
任務就是尋找可以使損失函數取得最小值的模型參數 $w_0$ 和 $w_1$ 。

5. 梯度下降法尋優

隨機選擇一組模型參數 $w_0$ 和 $w_1$ ，計算損失函數在該模型參數處的梯度 $[\partial{Loss}/\partial{w_0}, \partial{Loss}/\partial{w_1}]$
計算與該梯度反方向的修正步長 $[-n\partial{Loss}/\partial{w_0}, -n\partial{Loss}/\partial{w_1}]$
計算下一組模型參數
$w_0=w_0-n\partial{Loss}/\partial{w_0}$
$w_1=w_1-n\partial{Loss}/\partial{w_1}$
直到滿足迭代終止條件：
- 迭代足夠多次；
- 損失值已經足夠小；
- 損失值已經不再明顯減少。

$Loss(w_0,w_1)=\sum {\frac{(y-(w_0+w_1x))^2}{2}}$

$\partial{Loss}/\partial{w_0}=\sum{\partial{\frac{(y-y')^2}{2}}/\partial{w_0}}=\sum{y-y'}$

$\partial{Loss}/\partial{w_1}=\sum{\partial{\frac{(y-y')^2}{2}}/\partial{w_1}}=\sum{(y-y')x}$

# gd.py
import numpy as np
import matplotlib.pyplot as mp
from mpl_toolkits.mplot3d import axes3d

# 樣本輸入
train_x = np.array([0.5, 0.6, 0.8, 1.1, 1.4])
# 樣本輸出
train_y = np.array([5.0, 5.5, 6.0, 6.8, 7.0])
# 設置迭代次數
n_epoches = 1000
# 學習率設置爲0.01，向量長度
lrate = 0.01
# 記錄迭代次數和損失值
epoches, losses = [], []
# 設置初始值爲1,1
w0, w1 = [1], [1]

# 開始迭代
for epoch in range(1, n_epoches + 1):
    epoches.append(epoch)  # 添加迭代次數
    # 計算本次損失值並添加，公式參考上述公式
    losses.append(((train_y - (w0[-1] + w1[-1] * train_x)) ** 2 / 2).sum())
    # 格式化打印當前數據
#     print('{:4}> w0={:.8f}, w1={:.8f}, loss={:.8f}'.format(epoches[-1], w0[-1], w1[-1], losses[-1]))
    # 關於dw0的損失值偏微分
    d0 = -(train_y - (w0[-1] + w1[-1] * train_x)).sum()
    # 關於dw1的損失值偏微分
    d1 = -((train_y - (w0[-1] + w1[-1] * train_x)) * train_x).sum()
    # 追加w0w1
    w0.append(w0[-1] - lrate * d0)
    w1.append(w1[-1] - lrate * d1)

# 刪掉多算的一個
w0 = np.array(w0[:-1])
w1 = np.array(w1[:-1])

# 提取排序索引
sorted_indices = train_x.argsort()
# 提取順序測試集
test_x = train_x[sorted_indices]
test_y = train_y[sorted_indices]
# 預測輸出
pred_test_y = w0[-1] + w1[-1] * test_x

# 計算損失值
grid_w0, grid_w1 = np.meshgrid(np.linspace(0, 9, 500), np.linspace(0, 3.5, 500))
flat_w0, flat_w1 = grid_w0.ravel(), grid_w1.ravel()
# 計算扁平化損失值
flat_loss = (((flat_w0 + np.outer(train_x, flat_w1)) - train_y.reshape(-1, 1)) ** 2).sum(axis=0) / 2
# 網格化損失值
grid_loss = flat_loss.reshape(grid_w0.shape)

# ————————————————————————————————————————————————————————————
# 繪製線性迴歸圖
mp.figure('Linear Regression', dpi=120)
mp.title('Linear Regression', fontsize=20)
mp.xlabel('x', fontsize=14)
mp.ylabel('y', fontsize=14)
mp.tick_params(labelsize=10)
mp.grid(linestyle=':')
# 訓練數據點
mp.scatter(train_x, train_y, marker='s',
           c='dodgerblue', alpha=0.5, s=80,
           label='Training')
# 測試數據點
mp.scatter(test_x, test_y, marker='D',
           c='orangered', alpha=0.5, s=60,
           label='Testing')
# 預測數據點
mp.scatter(test_x, pred_test_y, c='orangered',
           alpha=0.5, s=60, label='Predicted')

# 獲取誤差值，並且連線
for x, y, pred_y in zip(
        test_x, test_y, pred_test_y):
    mp.plot([x, x], [y, pred_y], c='orangered',
            alpha=0.5, linewidth=1)
# 繪製迴歸線，即根據迴歸方程繪製的直線    
mp.plot(test_x, pred_test_y, '--', c='limegreen',
        label='Regression', linewidth=1)
mp.legend()


# ————————————————————————————————————————————————————————
mp.figure('Training Progress', dpi=120)
# 繪製過程圖像w0
mp.subplot(311)
mp.title('Training Progress', fontsize=20)
mp.ylabel('w0', fontsize=14)
mp.gca().xaxis.set_major_locator(mp.MultipleLocator(100))
mp.tick_params(labelsize=10)
mp.grid(linestyle=':')

mp.plot(epoches, w0, c='dodgerblue', label='w0')
mp.legend()

# 繪製過程圖像w1
mp.subplot(312)
mp.ylabel('w1', fontsize=14)
mp.gca().xaxis.set_major_locator(mp.MultipleLocator(100))
mp.tick_params(labelsize=10)
mp.grid(linestyle=':')

mp.plot(epoches, w1, c='limegreen', label='w1')
mp.legend()

# 繪製過程圖像，迭代次數和損失值的關係
mp.subplot(313)
mp.xlabel('epoch', fontsize=14)
mp.ylabel('loss', fontsize=14)
mp.gca().xaxis.set_major_locator(mp.MultipleLocator(100))
mp.tick_params(labelsize=10)
mp.grid(linestyle=':')

mp.plot(epoches, losses, c='orangered', label='loss')
mp.legend()
mp.tight_layout()

# 繪製損失函數
mp.figure('Loss Function', dpi=120)
ax = mp.gca(projection='3d')
mp.title('Loss Function', fontsize=20)
ax.set_xlabel('w0', fontsize=14)
ax.set_ylabel('w1', fontsize=14)
ax.set_zlabel('loss', fontsize=14)
mp.tick_params(labelsize=10)

ax.plot_surface(grid_w0, grid_w1, grid_loss, rstride=10, cstride=10, cmap='jet')
ax.plot(w0, w1, losses, 'o-', c='orangered', label='BGD')
mp.legend()

# 扁平化
mp.figure('Batch Gradient Descent', dpi=120)
mp.title('Batch Gradient Descent', fontsize=20)
mp.xlabel('x', fontsize=14)
mp.ylabel('y', fontsize=14)
mp.tick_params(labelsize=10)
mp.grid(linestyle=':')

mp.contourf(grid_w0, grid_w1, grid_loss, 1000, cmap='jet')
cntr = mp.contour(grid_w0, grid_w1, grid_loss, 10,colors='black', linewidths=0.5)
mp.clabel(cntr, inline_spacing=0.1, fmt='%.2f', fontsize=8)
mp.plot(w0, w1, 'o-', c='orangered', label='BGD')



mp.legend()
mp.show()

6. 工具包

import sklearn.linear_model as lm
線性迴歸器 = lm.LinearRegression()
線性迴歸器.fit(已知輸入, 已知輸出)     # 計算模型參數
線性迴歸器.predict(新的輸入)          ->新的輸出

import pickle  #  保存模型

import numpy as np
import sklearn.linear_model as lm
import sklearn.metrics as sm
import matplotlib.pyplot as mp
import pickle

# 讀取數據流程
x, y = [], []
with open('../data/single.txt', 'r') as f:
    for line in f.readlines():
        data = [float(substr) for substr in line.split(',')]
        x.append(data[:-1])
        y.append(data[-1])

# 轉換爲array
x = np.array(x)
y = np.array(y)

# 載入模型
# with open('../data/linear.pkl', 'rb') as f:
#     model = pickle.load(f)
    
# 建立線性模型
model = lm.LinearRegression()
# 計算模型參數
model.fit(x, y)
# 預測新的輸出
pred_y = model.predict(x)
# 查看效果1/(1+E) 誤差越大，越接近0；誤差越小，接近1
print(sm.r2_score(y, pred_y))

# 保存模型
# with open('../../data/linear.pkl', 'wb') as f:
#     pickle.dump(model, f)
    
# 繪製迴歸圖形
mp.figure('Linear Regression', dpi=120)
mp.title('Linear Regression', fontsize=20)
mp.xlabel('x', fontsize=14)
mp.ylabel('y', fontsize=14)
mp.tick_params(labelsize=10)
mp.grid(linestyle=':')
mp.scatter(x, y, c='dodgerblue', alpha=0.75, s=60, label='Sample')
# 將x變爲一維數組，按照排序繪製點
sorted_indices = x.ravel().argsort()
mp.plot(x[sorted_indices], pred_y[sorted_indices], c='orangered', label='Regression')

mp.legend()
mp.show()

0.7362638998481811

五、嶺迴歸

通過正則的方法，即在損失函數中加入正則項，以減弱模型參數對熟練數據的匹配度，藉以規避少數明顯偏移正常範圍的異常樣本影響模型的迴歸效果。

$Loss(w_0,w_1)=\sum{\frac{(y-(w_0+w_1x))^2}{2}}+正則強度 \times f(w_0,w_1)$

lm.Ridge(300, fit_intercept=True) (正則強度，是否約束)

# rdg.py
import numpy as np
import sklearn.linear_model as lm
import matplotlib.pyplot as mp

x, y = [], []
with open('../data/abnormal.txt', 'r') as f:
    for line in f.readlines():
        data = [float(substr) for substr
                in line.split(',')]
        x.append(data[:-1])
        y.append(data[-1])
x = np.array(x)
y = np.array(y)

# 普通線性迴歸
model1 = lm.LinearRegression()
model1.fit(x, y)
pred_y1 = model1.predict(x)

# 嶺迴歸，若正則強度爲0，則爲普通線性迴歸
model2 = lm.Ridge(300, fit_intercept=True)
model2.fit(x, y)
pred_y2 = model2.predict(x)

mp.figure('Linear & Ridge Regression', dpi=120)
mp.title('Linear & Ridge Regression', fontsize=20)
mp.xlabel('x', fontsize=14)
mp.ylabel('y', fontsize=14)
mp.tick_params(labelsize=10)
mp.grid(linestyle=':')

mp.scatter(x, y, c='dodgerblue', alpha=0.75, s=60, label='Sample')
sorted_indices = x.ravel().argsort()
mp.plot(x[sorted_indices], pred_y1[sorted_indices], c='orangered', label='Linear')
mp.plot(x[sorted_indices], pred_y2[sorted_indices], c='limegreen', label='Ridge')

mp.legend()
mp.show()

六、多項式迴歸

多元線性： $y=w_0+w_1x_1+w_2x_2+w_3x_3+...+w_nx_n$
將 $x^2, x^3 ... x^n$ 看作 $x_1, x_2, ...$

一元多項式： $y=w_0+w_1x+w_2x^2+w_3x^3+...+w_nx^n$

                                  x->多項式特徵擴展器 -x1...xn-> 線性迴歸器->w0...wn
                                    \______________________________________/
                                                             管線

# poly.py
import numpy as np
import sklearn.pipeline as pl
import sklearn.preprocessing as sp
import sklearn.linear_model as lm
import sklearn.metrics as sm
import matplotlib.pyplot as mp

train_x, train_y = [], []
with open('../data/single.txt', 'r') as f:
    for line in f.readlines():
        data = [float(substr) for substr
                in line.split(',')]
        train_x.append(data[:-1])
        train_y.append(data[-1])
        
train_x = np.array(train_x)
train_y = np.array(train_y)
model = pl.make_pipeline(sp.PolynomialFeatures(10), lm.LinearRegression()) # 先sp預處理，參數爲多項式最高次數，再使用lm方法
model.fit(train_x, train_y)
pred_train_y = model.predict(train_x)
print(sm.r2_score(train_y, pred_train_y))

test_x = np.linspace(train_x.min(), train_x.max(), 1000).reshape(-1, 1)
pred_test_y = model.predict(test_x)

mp.figure('Polynomial Regression', dpi=120)
mp.title('Polynomial Regression', fontsize=20)
mp.xlabel('x', fontsize=14)
mp.ylabel('y', fontsize=14)
mp.tick_params(labelsize=10)
mp.grid(linestyle=':')
mp.scatter(train_x, train_y, c='dodgerblue', alpha=0.75, s=60, label='Sample')
mp.plot(test_x, pred_test_y, c='orangered', label='Regression')

mp.legend()
mp.show()

0.7868629092058498

迴歸Regression(一元線性迴歸、嶺迴歸、多元線性迴歸、多項式迴歸)

四、一元線性迴歸

1. 預測函數

2. 單樣本誤差

3. 總樣本誤差

4. 損失函數

5. 梯度下降法尋優

6. 工具包

五、嶺迴歸

六、多項式迴歸

PythonNET網絡編程3

Linux升級Python3.7

機器學習MachineLearning概述(簡單預處理)

迴歸Regression(一元線性迴歸、嶺迴歸、多元線性迴歸、多項式迴歸)

Skew and Kurtosis (峯度和偏度) 轉載

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結