CNTK入門03:CNTK 101: Logistic Regression and ML Primer

CNTK 101: Logistic Regression and ML Primer


參考:https://github.com/Microsoft/CNTK/blob/v2.0.rc2/Tutorials/CNTK_101_LogisticRegression.ipynb

官網是CNTK_101_LogisticRegression.ipynb文件,一種交互誰筆記本展示的。訪問官網上述網址即可訪問,也可以訪問本地自己下載的Tutorials文件夾下,具體使用如下:
Jupyter Notebook的使用
Jupyter Notebook(此前被稱爲 IPython notebook)是一個交互式筆記本,支持運行 40 多種編程語言。
如何打開自己下載的demo,Tutorials文件夾下的 .ipnb 文件?
ipython從版本4.0以後就把notebook從Python獨立出來了,不僅支持ipython還支持R,matlab,並且改名爲jupyter
sudo pip install jupyter
在本地包含 .ipnb 文件夾下運行如下命令:
jupyter notebook

學習算法的五個步驟:
讀數據;
數據預處理:數據歸一化等問題;
建立模型;
學習模型;
驗證模型;

CNTK開放了Python接口:

參看官網101例子,整合成文件形式的代碼:

在cmd下運行:python  test101.py 

#encoding=utf-8
from __future__ import print_function
import numpy as np
import sys
import os
from cntk import *
from IPython.display import Image

import matplotlib.pyplot as plt
# Select the right target device when this notebook is being tested:
if 'TEST_DEVICE' in os.environ:
    import cntk
    if os.environ['TEST_DEVICE'] == 'cpu':
        cntk.device.try_set_default_device(cntk.device.cpu())
    else:
        cntk.device.try_set_default_device(cntk.device.gpu(0))

# Ensure we always get the same amount of randomness
np.random.seed(0)

# 生成模擬數據
def generate_random_data_sample(sample_size, feature_dim, num_classes):
    Y = np.random.randint(size=(sample_size, 1), low=0, high=num_classes)
    X = (np.random.randn(sample_size, feature_dim)+3) * (Y+1)
    X = X.astype(np.float32)    
    class_ind = [Y==class_number for class_number in range(num_classes)]
    Y = np.asarray(np.hstack(class_ind), dtype=np.float32)
    return X, Y   

#畫出特徵和標籤的數據
def show_data(features, labels):
	colors = ['r' if l == 0 else 'b' for l in labels[:,0]]

	plt.scatter(features[:,0], features[:,1], c=colors)
	plt.xlabel("Scaled age (in yrs)")
	plt.ylabel("Tumor size (in cm)")
	plt.show() 

#從輸入到輸出的計算;times函數是cntk包中的;
mydict = {"w":None,"b":None}
def linear_layer(input_var, output_dim):#(2,2)
    input_dim = input_var.shape[0]#返回input_var第一維形狀,2
    weight_param = parameter(shape=(input_dim, output_dim))
    bias_param = parameter(shape=(output_dim))
    
    mydict['w'], mydict['b'] = weight_param, bias_param

    return times(input_var, weight_param) + bias_param

# Define a utility function to compute the moving average sum.
def moving_average(a, w=10):
    if len(a) < w: 
        return a[:]    
    return [val if idx < w else sum(a[(idx-w):idx])/w for idx, val in enumerate(a)]


# Defines a utility that prints the training progress
def print_training_progress(trainer, mb, frequency, verbose=1):
    training_loss, eval_error = "NA", "NA"
    if mb % frequency == 0:
        training_loss = trainer.previous_minibatch_loss_average
        eval_error = trainer.previous_minibatch_evaluation_average
        if verbose: 
            print ("Minibatch: {0}, Loss: {1:.4f}, Error: {2:.2f}".format(mb, training_loss, eval_error))
        
    return mb, training_loss, eval_error

#畫圖
def show_plot01(plotdata):
	plt.figure(1)
	plt.subplot(211)
	plt.plot(plotdata["batchsize"], plotdata["avgloss"], 'b--')
	plt.xlabel('Minibatch number')
	plt.ylabel('Loss')
	plt.title('Minibatch run vs. Training loss')

	plt.show()
#畫圖
def show_plot02(plotdata):
	plt.subplot(212)
	plt.plot(plotdata["batchsize"], plotdata["avgerror"], 'r--')
	plt.xlabel('Minibatch number')
	plt.ylabel('Label Prediction Error')
	plt.title('Minibatch run vs. Label Prediction Error')
	plt.show()		
def show_plot03(features,labels,bias_vector,weight_matrix):			
	colors = ['r' if l == 0 else 'b' for l in labels[:,0]]
	plt.scatter(features[:,0], features[:,1], c=colors)
	plt.plot([0, bias_vector[0]/weight_matrix[0][1]], 
         [ bias_vector[1]/weight_matrix[0][0], 0], c = 'g', lw = 3)
	plt.xlabel("Scaled age (in yrs)")
	plt.ylabel("Tumor size (in cm)")
	plt.show()
	
if __name__=='__main__':
	#產生數據,展示產生的數據
	mysamplesize = 32
	input_dim = 2
	num_output_classes = 2
	#features, labels = generate_random_data_sample(mysamplesize, input_dim, num_output_classes)
	#show_data(features, labels)
	
	#設置訓練模型
	feature = input(input_dim, np.float32)
	output_dim = num_output_classes
	label = input((num_output_classes), np.float32)
	z = linear_layer(feature, output_dim)
	loss = cross_entropy_with_softmax(z, label)
	eval_error = classification_error(z, label)
	
	learning_rate = 0.5
	lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch) 
	learner = sgd(z.parameters, lr_schedule)
	trainer = Trainer(z, (loss, eval_error), [learner])
	
	#訓練
	minibatch_size = 25
	num_samples_to_train = 20000
	num_minibatches_to_train = int(num_samples_to_train  / minibatch_size)
	#參數的配置
	training_progress_output_freq = 50
	plotdata = {"batchsize":[], "loss":[], "error":[]}
	
	for i in range(0, num_minibatches_to_train):
		features, labels = generate_random_data_sample(minibatch_size, input_dim, num_output_classes)
		#訓練
		trainer.train_minibatch({feature : features, label : labels})
		#打印訓練中的參數
		batchsize, loss, error = print_training_progress(trainer, i, 
                                                     training_progress_output_freq, verbose=1)
		if not (loss == "NA" or error =="NA"):
			plotdata["batchsize"].append(batchsize)
			plotdata["loss"].append(loss)
			plotdata["error"].append(error)
			
	# Compute the moving average loss to smooth out the noise in SGD
	plotdata["avgloss"] = moving_average(plotdata["loss"])
	plotdata["avgerror"] = moving_average(plotdata["error"])	
	show_plot01(plotdata)
	show_plot02(plotdata)
	
	#測試
	test_minibatch_size = 25
	features, labels = generate_random_data_sample(test_minibatch_size, input_dim, num_output_classes)
	print(trainer.test_minibatch({feature : features, label : labels}))
	#查看預測
	out = softmax(z)
	result = out.eval({feature : features})
	print("Label    :", [np.argmax(label) for label in labels])
	print("Predicted:", [np.argmax(result[i,:]) for i in range(len(result))])
	#打印模型訓練得到的參數
	print(mydict['b'].value)
	bias_vector   = mydict['b'].value
	weight_matrix = mydict['w'].value
	#可視化測試的數據預測
	show_plot03(features,labels,bias_vector,weight_matrix)
	




發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章