import tensorflow as tf
import numpy as np
import pandas as pd
train = pd.read_csv('../Datasets/Breast-Cancer/breast-cancer-train.csv')
test = pd.read_csv('../Datasets/Breast-Cancer/breast-cancer-test.csv')
X_train = np.float32(train[['Clump Thickness','Cell Size']].T)
y_train = np.float32(train['Type'].T)
X_test = np.float32(test[['Clump Thickness','Cell Size']].T)
y_test = np.float32(test['Type'].T)
#定義一個tensorflow的變量b作爲線性模型的截距,同時設置初始值爲 1 0
b = tf.Variable(tf.zeros([1]))
#定義一個tensorflow的變量w作爲線性模型的參數,並設置初始值爲-1.0至1.0之間均勻分佈的隨機數
W = tf.Variable(tf.random_uniform([1,2],-1.0,1.0))
#顯示定義這個線性函數
y = tf.matmul(W,X_train)+b
#使用tensorflow中的reduce_mean 取得訓練集上均方誤差
loss = tf.reduce_mean(tf.square(y-y_train))
#使用梯度下降法估計參數W, b,並且設置迭代步長爲0.01,這個與Scikit-learn中的SGDRegressor類似D
optimizer = tf.train.GradientDescentOptimizer(0.01)
#以最小二乘損失爲優化目標
train = optimizer.minimize(loss)
#初始化所有變量
init = tf.initialize_all_variables()
#開啓tensorflow中的會話
sess = tf.Session()
sess.run(init)
#迭代1000輪次,訓練參數
for step in range(0,1000):
sess.run(train)
if step % 200 == 0:
print(step,sess.run(W),sess.run(b))
test_negative = test.loc[test['Type'] == 0][['Clump Thickness','Cell Size']]
test_positive = test.loc[test['Type'] == 1][['Clump Thickness','Cell Size']]
#以最終的參數作圖
import matplotlib.pyplot as plot
plot.scatter(test_negative['Clump Thickness'],test_negative['Cell Size'],marker='o',s=200,c='red')
plot.scatter(test_positive['Clump Thickness'],test_positive['Cell Size'],marker='x',s=150,c='black')
plot.xlabel('Clump Thickness')
plot.ylabel('Cell Size')
lx = np.arange(0,12)
ly = (0.5-sess.run(b)-lx*sess.run(W)[0][0])/sess.run(W)[0][1]
plot.plot(lx,ly,color='green')
plot.show()
效果圖如下