花了兩晚上做的,代碼部分參考了github。其中“文件數據轉矩陣”部分在我上一篇文章寫了。怕自己忘記把代碼寫在這裏。
嗚嗚嗚,太開心了
from numpy import *
file = open("/Users/zhaoyueqing/Desktop/train_linear.txt")
filelist = file.readlines()
lenth = len(filelist)
to_mat= zeros((lenth,2))
index = 0
file = open("/Users/zhaoyueqing/Desktop/train_linear.txt")
for data in file.readlines():
data = data.strip('\n')
nums = data.split(" ")
nums = [float(x) for x in nums]
to_mat[index,:] = nums[:]
index +=1
X = to_mat[:,0]
y = to_mat[:,1]
class LinearRegression(object):
def __init__(self, X, y):
self.m = len(X) #訓練樣本的個數
self.X = mat(X).transpose() #生成矩陣
z = ones((self.m,1))#爲theta0賦值全1矩陣作爲x0
self.X = hstack((z,self.X))#將x0與X合併
self.y = mat(y).transpose()#對y矩陣進行轉置
self.n = self.X.shape[1] #特徵的維度
self.theta = ones((self.n,1)) #需要計算的參數,構造n行一列的全1矩陣
def costFunction(self):
h = dot(self.X,self.theta) #矩陣相乘
J = 0.5 * sum(array((self.y - h)) ** 2)#代價函數
if isnan(J):
return inf
return J
def gradientDescent(self, alpha):
h = dot(self.X, self.theta)
self.grad = transpose(self.X).dot(self.y - h)
self.theta = self.theta + alpha * self.grad
def stocGradientDescent(self, alpha):
for i in range(self.m):
h = dot(self.X[i], self.theta)
error = self.y[i] - h
self.theta = self.theta + alpha * self.X[i].transpose() * error
def train(self, iterations=1000, alpha=0.01, method=0):
for k in range(iterations):
if method == 0:
self.gradientDescent(alpha)
elif method == 1:
self.stocGradientDescent(alpha)
def predict(self, threshold=0.5):
return self.X.dot(self.theta)
a = LinearRegression(X,y)
#print(a.m)
#print(a.costFunction())
a.train(method=0)
print(a.theta)
#b = LinearRegression(X,y)
#b.train(method=1)
#print(b.theta)
b =a.predict()
print(b)
注,self.n = self.X.shape[1],把特徵維度賦值給n。
x0與X合併後的矩陣:
theta:
預測結果: