"""Simple tutorial following the TensorFlow example of a Convolutional Network.
Parag K. Mital, Jan. 2016"""import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'# %% Importsimport tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data
from libs.utils import montage, weight_variable, bias_variable
import matplotlib.pyplot as plt
import numpy as np
# %% Setup input to the network and true output label. These are# simply placeholders which we'll fill in later.
mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)
x = tf.placeholder(tf.float32,[None,784])
y = tf.placeholder(tf.float32,[None,10])# %% Since x is currently [batch, height*width], we need to reshape to a# 4-D tensor to use it in a convolutional graph. If one component of# `shape` is the special value -1, the size of that dimension is# computed so that the total size remains constant. Since we haven't# defined the batch dimension's shape yet, we use -1 to denote this# dimension should not change size.
x_tensor = tf.reshape(x,[-1,28,28,1])
定義卷積核形狀:[窗高, 窗寬, 通道數, 輸出通道數]
用步幅爲2和更多小滑窗尺寸的卷積層代替池化層。
最後將卷積輸出的特徵壓成一維的,與批量構成二維數組,以和全連接層對接。
添加dropout層正則化,以減小過擬合。
# %% We'll setup the first convolutional layer# Weight matrix is [height x width x input_channels x output_channels]
filter_size =5
n_filters_1 =16
W_conv1 = weight_variable([filter_size, filter_size,1, n_filters_1])# %% Bias is [output_channels]
b_conv1 = bias_variable([n_filters_1])# %% Now we can build a graph which does the first layer of convolution:# we define our stride as batch x height x width x channels# instead of pooling, we use strides of 2 and more layers# with smaller filters.
h_conv1 = tf.nn.relu(
tf.nn.conv2d(input=x_tensor,filter=W_conv1,
strides=[1,2,2,1],
padding='SAME')+
b_conv1)# %% And just like the first layer, add additional layers to create# a deep net
n_filters_2 =16
W_conv2 = weight_variable([filter_size, filter_size, n_filters_1, n_filters_2])
b_conv2 = bias_variable([n_filters_2])
h_conv2 = tf.nn.relu(
tf.nn.conv2d(input=h_conv1,filter=W_conv2,
strides=[1,2,2,1],
padding='SAME')+
b_conv2)# %% We'll now reshape so we can connect to a fully-connected layer:
h_conv2_flat = tf.reshape(h_conv2,[-1,7*7* n_filters_2])# %% Create a fully-connected layer:
n_fc =1024
W_fc1 = weight_variable([7*7* n_filters_2, n_fc])
b_fc1 = bias_variable([n_fc])
h_fc1 = tf.nn.relu(tf.matmul(h_conv2_flat, W_fc1)+ b_fc1)# %% We can add dropout for regularizing and to reduce overfitting like so:
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)# %% And finally our softmax layer:
W_fc2 = weight_variable([n_fc,10])
b_fc2 = bias_variable([10])
y_pred = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2)+ b_fc2)# %% Define loss/eval/training functions
cross_entropy =-tf.reduce_sum(y * tf.log(y_pred))
optimizer = tf.train.AdamOptimizer().minimize(cross_entropy)# %% Monitor accuracy
correct_prediction = tf.equal(tf.argmax(y_pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,'float'))
創建session來運行所有圖。
通過mnist.train.next_batch()獲取批次。
# %% We now create a new session to actually perform the initialization the# variables:
sess = tf.Session()
sess.run(tf.global_variables_initializer())# %% We'll train in minibatches and report accuracy:
batch_size =100
n_epochs =5for epoch_i inrange(n_epochs):for batch_i inrange(mnist.train.num_examples // batch_size):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
sess.run(optimizer, feed_dict={
x: batch_xs, y: batch_ys, keep_prob:0.5})print(sess.run(accuracy,
feed_dict={
x: mnist.validation.images,
y: mnist.validation.labels,
keep_prob:1.0}))
可視化:卷積核
# %% Let's take a look at the kernels we've learned
W = sess.run(W_conv1)
plt.imshow(montage(W / np.max(W)), cmap='coolwarm')
plt.show()# 控制檯0.97560.98280.98760.98660.9882