(i)處理文件
# -*- coding: utf-8 -*-
import numpy as np
import math
import matplotlib.pyplot as plt
import csv
def read():
fr=open('quasar_test.csv','r')
arrayline=fr.readlines()
y=arrayline[1].strip().split(',')
m=len(arrayline)
n=len(y)
y2 = np.zeros((m - 1, n))
x = np.zeros((2, n))
x[0] = arrayline[0].strip().split(',')
x[1] = 1
y=map(lambda y: float(y), y)
y2[0] = weighted_linear_regression(x, y, 5)
q=[]
for i in range(len(y2[0])):
q.append(y2[0][i])
f1 = open('result1.csv', 'w')
f1.write(str(q))
f1.write('\n')
i=1
for line in arrayline[2:]:
y = line.strip().split(',')
y = map(lambda y: float(y), y)
y2[i]=weighted_linear_regression(x, y, 5)
q = []
for j in range(len(y2[0])):
q.append(y2[i][j])
print q
i=i+1
f1.write(str(q))
f1.write('\n')
return y2
def weighted_linear_regression(x,y,t):#加權線性迴歸
y2=[]
for i in range(len(x[0])):
w=np.zeros((len(x[0]),len(x[0])))
for j in range(len(x[0])):
w[j][j]=math.exp((x[0][i] - x[0][j])*(x[0][i] - x[0][j]) / (-2 * t * t))
xwx=np.dot(np.dot(x,w),np.transpose(x))
xwx_inverse=np.linalg.inv(xwx)
xwx_inverse_x=np.dot(xwx_inverse,x)
xwx_inverse_x_w = np.dot(xwx_inverse_x,w)
xwx_inverse_x_w_y=np.dot(xwx_inverse_x_w,np.transpose(y))
theta=xwx_inverse_x_w_y
y2.append(x[0][i] * theta[0] + theta[1])
return y2
y2=read()
(ii)
# -*- coding: utf-8 -*-
import numpy as np
import heapq
import csv
def read():
fr=open('result.csv')
arrayline = fr.readlines()
x = arrayline[0].strip( ).split(',')
x=map(lambda x: float(x), x)
m=len(arrayline)
n=len(x)
y=np.zeros((m-1,n))
i=0
for line in arrayline[1:]:
num=line.strip().split(',')
num = map(lambda num: float(num), num)
y[i]=num
i=i+1
return x,y
x,y=read()
def predict(x,y,m):
dis=[]
for i in range(len(y)):
sum = 0
for j in range(150,450):
sum=sum+(y[m][j]-y[i][j])*(y[m][j]-y[i][j])
dis.append(sum)
smallest_four=heapq.nsmallest(4, dis)
smallest_three=smallest_four[1:]
location=[]
for i in range(len(smallest_three)):
location.append(dis.index(smallest_three[i]))
h=max(smallest_three)
f_left=[]
for i in range(50):
top=0
bottom=0
for j in range(len(smallest_three)):
n=location[j]
top = top + (1 - smallest_three[j]/ h) * y[n][i]
bottom = bottom + (1 -smallest_three[j]/ h)
sum=top / bottom
f_left.append(sum)
return f_left
def error(y):
f_left_predict=[]
sum_error=[]
for m in range(len(y)):
error=0
f_left=predict(x, y, m)
for n in range(len(f_left)):
error=error+(f_left[n]-y[m][n])*(f_left[n]-y[m][n])
f_left_predict.append(f_left)
sum_error.append(error)
print sum(sum_error)/len(sum_error)
error(y)
(iii)
# -*- coding: utf-8 -*-
import numpy as np
import heapq
import matplotlib.pyplot as plt
def read1(): #讀取數據
fr=open('result.csv')
arrayline = fr.readlines()
x = arrayline[0].strip( ).split(',')
x=map(lambda x: float(x), x)
m=len(arrayline)
n=len(x)
y=np.zeros((m-1,n))
i=0
for line in arrayline[1:]:
num=line.strip().split(',')
num = map(lambda num: float(num), num)
y[i]=num
i=i+1
return x,y
def read2(): #讀取預測數據
fr=open('result1.csv')
arrayline = fr.readlines()
x= arrayline[0].strip( ).split(',')
x=map(lambda x: float(x), x)
m=len(arrayline)
n=len(x)
y=np.zeros((m-1,n))
i=0
for line in arrayline[1:]:
num=line.strip().split(',')
num = map(lambda num: float(num), num)
y[i]=num
i=i+1
return x,y
def predict(x,y,y1,m): #y1爲預測數據
dis=[]
for i in range(len(y)):
sum = 0
for j in range(150,450):
sum=sum+(y1[m][j]-y[i][j])*(y1[m][j]-y[i][j])
dis.append(sum)
smallest_three=heapq.nsmallest(3, dis)
location=[]
for i in range(len(smallest_three)):
location.append(dis.index(smallest_three[i]))
h=max(smallest_three)
f_left=[]
for i in range(50):
top=0
bottom=0
for j in range(len(smallest_three)):
n=location[j]
top = top + (1 - smallest_three[j]/ h) * y[n][i]
bottom = bottom + (1 -smallest_three[j]/ h)
sum=top / bottom
f_left.append(sum)
return f_left
def error(x,y,y1):
f_left_predict=[]
sum_error=[]
for m in range(len(y1)):
f_left=predict(x,y,y1,m)
error = 0
for n in range(len(f_left)):
error=error+(f_left[n]-y1[m][n])*(f_left[n]-y1[m][n])
f_left_predict.append(f_left)
sum_error.append(error)
return f_left_predict[0],f_left_predict[5],sum(sum_error)/len(sum_error)
def figure(x,example_1,example_6,y1): #畫圖
plt.figure(1)
plt.xlabel('Wavelength')
plt.ylabel('Flux')
plt.scatter(x[0:50],example_1, marker='.', color='b', label='predict value', s=10)
plt.scatter(x[0:50], y1[0][0:50], marker='.', color='g', label='real value', s=10)
plt.legend(loc='upper right')
plt.figure(2)
plt.xlabel('Wavelength')
plt.ylabel('Flux')
plt.scatter(x[0:50], example_6, marker='.', color='b', label='predict value', s=10)
plt.scatter(x[0:50], y1[5][0:50], marker='.', color='g', label='real value', s=10)
plt.legend(loc='upper right')
plt.show()
def main():
x,y=read1()
x1,y1=read2()
example_1, example_6, error1 = error(x,y, y1)
figure(x,example_1,example_6,y1)
if __name__ == '__main__':
main()