機器學習--LR算法實現

numpy庫下的實踐:

import numpy as np

import matplotlib.pyplot as plt

#加載數據

def loadDataSet():

dataMat=[]; labelMat=[]

fr=open('testSet.txt')

for line in fr.readlines():

lineArr=line.strip().split() #strip()默認刪除每行頭尾的空格或換行符

dataMat.append([1.0,float(lineArr[0]),float(lineArr[1])]) #屬性集

labelMat.append(int(lineArr[2])) #標籤集

return dataMat,labelMat

 

def sigmoid(inx):

return 1.0/(1+np.exp(-inx)) #注意python3和python2的區別

 

#梯度下降法對weights進行優化

def gradAscent(dataMatIn,classLabels):

dataMatrix=np.mat(dataMatIn)

labelMat=np.mat(classLabels).transpose()

m,n=np.shape(dataMatrix)

alpha=0.001 #梯度下降步長

maxCycles=600 #迭代次數

weights=np.ones((n,1)) #初始化係數矩陣

for k in range(maxCycles):

h=sigmoid(dataMatrix*weights)

error=(h-labelMat)

weights=weights-alpha*dataMatrix.transpose()*error #更新權重

return weights

 

#函數調用

dataMat,labelMat=loadDataSet()

weights=gradAscent(dataMat,labelMat)

print(weights)

 

#畫出決策邊界

def plotBestFit(dataMat,labelMat,weights):

dataArr=np.array(dataMat)

n=np.shape(dataArr)[0] #樣本數

xcode1=[];ycode1=[]

xcode2=[];ycode2=[]

for i in range(n):

if int(labelMat[i]==1):

xcode1.append(dataArr[i,1]);ycode1.append(dataArr[i,2])

else:

xcode2.append(dataArr[i,1]);ycode2.append(dataArr[i,2])

fig=plt.figure()

ax=fig.add_subplot(111)

ax.scatter(xcode1,ycode1,s=30,c='red')

ax.scatter(xcode2,ycode2,s=30,c='green')

x=np.arange(-3.0,3.0,0.1)

y=(-weights[0]-weights[1]*x)/weights[2] #由w1*x1+x2*x2+b=0推到得到

ax.plot(x,y)

plt.xlabel('X1');plt.ylabel('X2');

plt.show()

 

#函數調用

plotBestFit(dataMat,labelMat,weights.getA()) #getA()矩陣轉換成數組

 

##################隨機梯度法##################

import numpy as np

import matplotlib.pyplot as plt

#加載數據

def loadDataSet():

dataMat=[]; labelMat=[]

fr=open('testSet.txt')

for line in fr.readlines():

lineArr=line.strip().split() #strip()默認刪除每行頭尾的空格或換行符

dataMat.append([1.0,float(lineArr[0]),float(lineArr[1])]) #屬性集

labelMat.append(int(lineArr[2])) #標籤集

return dataMat,labelMat

 

def sigmoid(inx):

return 1.0/(1+np.exp(-inx)) #注意python3和python2的區別

 

#梯度下降法對weights進行優化

def gradAscent(dataMatIn,classLabels):

dataMatrix=np.mat(dataMatIn)

labelMat=np.mat(classLabels).transpose()

m,n=np.shape(dataMatrix)

alpha=0.001 #梯度下降步長

maxCycles=600 #迭代次數

weights=np.ones((n,1)) #初始化係數矩陣

for k in range(maxCycles):

h=sigmoid(dataMatrix*weights)

error=(h-labelMat)

weights=weights-alpha*dataMatrix.transpose()*error #更新權重

return weights

 

#隨機梯度下降

def stocGradAscent(dataMatIn,classLabels):

m, n = np.shape(dataMatIn)

alpha=0.01

maxCycles=200

weights = np.ones(n)

for j in range(maxCycles):

for i in range(m):

h=sigmoid(sum(dataMatIn[i]*weights))

error = (h - labelMat[i])

weights = weights - alpha * error* dataMatIn[i] # 更新權重

return weights

 

#函數調用

dataMat,labelMat=loadDataSet()

weights=stocGradAscent(np.array(dataMat),labelMat)

print(weights)

 

# 畫出決策邊界

def plotBestFit(dataMat,labelMat,weights):

dataArr=np.array(dataMat)

n=np.shape(dataArr)[0] #樣本數

xcode1=[];ycode1=[]

xcode2=[];ycode2=[]

for i in range(n):

if int(labelMat[i]==1):

xcode1.append(dataArr[i,1]);ycode1.append(dataArr[i,2])

else:

xcode2.append(dataArr[i,1]);ycode2.append(dataArr[i,2])

fig=plt.figure()

ax=fig.add_subplot(111)

ax.scatter(xcode1,ycode1,s=30,c='red')

ax.scatter(xcode2,ycode2,s=30,c='green')

x=np.arange(-3.0,3.0,0.1)

y=(-weights[0]-weights[1]*x)/weights[2] #由w1*x1+x2*x2+b=0推到得到

ax.plot(x,y)

plt.xlabel('X1');plt.ylabel('X2');

plt.show()

 

# 函數調用

plotBestFit(dataMat,labelMat,weights)

 

sklearn庫下的實踐:

import numpy as np

from sklearn.model_selection import train_test_split

#加載數據

def loadDataSet():

dataMat=[]; labelMat=[]

fr=open('testSet.txt')

for line in fr.readlines():

lineArr=line.strip().split() #strip()默認刪除每行頭尾的空格或換行符

dataMat.append([1.0,float(lineArr[0]),float(lineArr[1])]) #屬性集

labelMat.append(int(lineArr[2])) #標籤集

return dataMat,labelMat

 

#生成訓練集、測試集

dataMat,labelMat=loadDataSet()

X_train, X_test, y_train, y_test = train_test_split(dataMat, labelMat, test_size=0.3, random_state=0)

 

#數據預處理

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

sc.fit(X_train)

X_train_std = sc.transform(X_train)

X_test_std = sc.transform(X_test)

 

#訓練模型

from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(C=10000.0, random_state=0)

lr.fit(X_train_std, y_train)

 

#預測

import matplotlib.pyplot as plt

def plotPredict(X_test_std,y_test,model):

result=list(model.predict(X_test_std))

print(y_test)

print(result)

n=np.shape(X_test_std)[0] #樣本

xcode1=[];ycode1=[];truePrex=[];truePrey=[]

xcode2=[];ycode2=[];falsePrex=[];falsePrey=[]

for i in range(n):

if int(y_test[i]==1):

xcode1.append(X_test_std[i,1]);ycode1.append(X_test_std[i,2])

else:

xcode2.append(X_test_std[i,1]);ycode2.append(X_test_std[i,2])

 

for j in range(n):

if int(result[j] == y_test[j]):

truePrex.append(X_test_std[j,1]);truePrey.append(X_test_std[j,2])

else:

falsePrex.append(X_test_std[j,1]);falsePrey.append(X_test_std[j,2])

fig=plt.figure()

ax=fig.add_subplot(111)

ax.scatter(xcode1,ycode1,s=30,c='red')

ax.scatter(xcode2,ycode2,s=30,c='green')

ax.scatter(truePrex,truePrey,c='yellow',marker='v')

ax.scatter(falsePrex,falsePrey,s=30,c='yellow',marker='x')

plt.show()

 

plotPredict(X_test_std,y_test,lr)

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章