keras用xception網絡實現貓狗識別

前言

最近想復現一下xception實現分類的任務，但是網絡上只找的到xception的結構實現[1]或是像VGG[2]，Inception-v3[3]之類的遷移學習。沒有代碼能直接拿來用，那就學習一下，自己寫寫。最終目標當然不只是貓狗分類，不過可以用它來驗證一下正確性。

如果完全不懂keras，可以去看一下這個教程視頻，它的前提是懂得tensorflow或者theano。很基礎，能有個大概的瞭解。

數據集

用的是最基礎的貓狗識別數據集，不附鏈接了網速太差上傳慢，這個很容易就可以下載到。

因爲xception內部會自己處理圖片的大小，所以數據集圖片大小不作要求。但是目錄結構得是下面這種方式[4]：

其中我的數據集中訓練集貓狗各1000張，測試集各500張。

代碼

本來想模仿[3]來寫，但是找不到xception預訓練好的pb文件，只能找到對應於keras的h5文件（top&notop），所以網絡模型上參考了[1]的內容。雖然keras其實內部有封裝好的xception，但是因爲會有某些路徑錯誤[5]，所以就直接使用了[1]。

from __future__ import print_function
from __future__ import absolute_import

import warnings
import numpy as np
import keras

from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator

from keras.models import Model, Sequential
from keras import layers
from keras.layers import Dense, Input, BatchNormalization, Activation, Flatten
from keras.layers import Conv2D, SeparableConv2D, MaxPooling2D, GlobalAveragePooling2D, GlobalMaxPooling2D
from keras.engine.topology import get_source_inputs
from keras.utils.data_utils import get_file
from keras import backend as K
from keras_applications.imagenet_utils import decode_predictions
from keras_applications.imagenet_utils import _obtain_input_shape
# from keras_applications.xception import Xception

TF_WEIGHTS_PATH = 'E:/files/my_file/master/deep/xception/weights/xception_weights_tf_dim_ordering_tf_kernels.h5'
TF_WEIGHTS_PATH_NO_TOP = 'E:/files/my_file/master/deep/xception/weights/xception_weights_tf_dim_ordering_tf_kernels_notop.h5'
# 如果xception後面再加其他層，就用WEIGHTS_PATH_NO_TOP，表示去掉最後的softmax
DATA_PATH = "E:/dl_data/cats_and_dogs_filtered/cats_dogs/"

def Xception(include_top=True, weights='imagenet',
             input_tensor=None, input_shape=None,
             pooling=None,
             classes=1000):
    if weights not in {'imagenet', None}:
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization) or `imagenet` '
                         '(pre-training on ImageNet).')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError('If using `weights` as imagenet with `include_top`'
                         ' as true, `classes` should be 1000')

    if K.backend() != 'tensorflow':
        raise RuntimeError('The Xception model is only available with '
                           'the TensorFlow backend.')
    if K.image_data_format() != 'channels_last':
        warnings.warn('The Xception model is only available for the '
                      'input data format "channels_last" '
                      '(width, height, channels). '
                      'However your settings specify the default '
                      'data format "channels_first" (channels, width, height). '
                      'You should set `image_data_format="channels_last"` in your Keras '
                      'config located at ~/.keras/keras.json. '
                      'The model being returned right now will expect inputs '
                      'to follow the "channels_last" data format.')
        K.set_image_data_format('channels_last')
        old_data_format = 'channels_first'
    else:
        old_data_format = None

    input_shape = _obtain_input_shape(input_shape,
                                      default_size=299,
                                      min_size=71,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top)

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            img_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    x = Conv2D(32, (3, 3), strides=(2, 2), use_bias=False, name='block1_conv1')(img_input)
    x = BatchNormalization(name='block1_conv1_bn')(x)
    x = Activation('relu', name='block1_conv1_act')(x)
    x = Conv2D(64, (3, 3), use_bias=False, name='block1_conv2')(x)
    x = BatchNormalization(name='block1_conv2_bn')(x)
    x = Activation('relu', name='block1_conv2_act')(x)

    residual = Conv2D(128, (1, 1), strides=(2, 2),
                      padding='same', use_bias=False)(x)
    residual = BatchNormalization()(residual)

    x = SeparableConv2D(128, (3, 3), padding='same', use_bias=False, name='block2_sepconv1')(x)
    x = BatchNormalization(name='block2_sepconv1_bn')(x)
    x = Activation('relu', name='block2_sepconv2_act')(x)
    x = SeparableConv2D(128, (3, 3), padding='same', use_bias=False, name='block2_sepconv2')(x)
    x = BatchNormalization(name='block2_sepconv2_bn')(x)

    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block2_pool')(x)
    x = layers.add([x, residual])

    residual = Conv2D(256, (1, 1), strides=(2, 2),
                      padding='same', use_bias=False)(x)
    residual = BatchNormalization()(residual)

    x = Activation('relu', name='block3_sepconv1_act')(x)
    x = SeparableConv2D(256, (3, 3), padding='same', use_bias=False, name='block3_sepconv1')(x)
    x = BatchNormalization(name='block3_sepconv1_bn')(x)
    x = Activation('relu', name='block3_sepconv2_act')(x)
    x = SeparableConv2D(256, (3, 3), padding='same', use_bias=False, name='block3_sepconv2')(x)
    x = BatchNormalization(name='block3_sepconv2_bn')(x)

    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block3_pool')(x)
    x = layers.add([x, residual])

    residual = Conv2D(728, (1, 1), strides=(2, 2),
                      padding='same', use_bias=False)(x)
    residual = BatchNormalization()(residual)

    x = Activation('relu', name='block4_sepconv1_act')(x)
    x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block4_sepconv1')(x)
    x = BatchNormalization(name='block4_sepconv1_bn')(x)
    x = Activation('relu', name='block4_sepconv2_act')(x)
    x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block4_sepconv2')(x)
    x = BatchNormalization(name='block4_sepconv2_bn')(x)

    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block4_pool')(x)
    x = layers.add([x, residual])

    for i in range(8):
        residual = x
        prefix = 'block' + str(i + 5)

        x = Activation('relu', name=prefix + '_sepconv1_act')(x)
        x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv1')(x)
        x = BatchNormalization(name=prefix + '_sepconv1_bn')(x)
        x = Activation('relu', name=prefix + '_sepconv2_act')(x)
        x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv2')(x)
        x = BatchNormalization(name=prefix + '_sepconv2_bn')(x)
        x = Activation('relu', name=prefix + '_sepconv3_act')(x)
        x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv3')(x)
        x = BatchNormalization(name=prefix + '_sepconv3_bn')(x)

        x = layers.add([x, residual])

    residual = Conv2D(1024, (1, 1), strides=(2, 2),
                      padding='same', use_bias=False)(x)
    residual = BatchNormalization()(residual)

    x = Activation('relu', name='block13_sepconv1_act')(x)
    x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block13_sepconv1')(x)
    x = BatchNormalization(name='block13_sepconv1_bn')(x)
    x = Activation('relu', name='block13_sepconv2_act')(x)
    x = SeparableConv2D(1024, (3, 3), padding='same', use_bias=False, name='block13_sepconv2')(x)
    x = BatchNormalization(name='block13_sepconv2_bn')(x)

    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block13_pool')(x)
    x = layers.add([x, residual])

    x = SeparableConv2D(1536, (3, 3), padding='same', use_bias=False, name='block14_sepconv1')(x)
    x = BatchNormalization(name='block14_sepconv1_bn')(x)
    x = Activation('relu', name='block14_sepconv1_act')(x)

    x = SeparableConv2D(2048, (3, 3), padding='same', use_bias=False, name='block14_sepconv2')(x)
    x = BatchNormalization(name='block14_sepconv2_bn')(x)
    x = Activation('relu', name='block14_sepconv2_act')(x)

    if include_top:
        x = GlobalAveragePooling2D(name='avg_pool')(x)
        x = Dense(classes, activation='softmax', name='predictions')(x)
    else:
        if pooling == 'avg':
            x = GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = GlobalMaxPooling2D()(x)

    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input

    model = Model(inputs, x, name='xception')

    if weights == 'imagenet':
        if include_top:
            weights_path = TF_WEIGHTS_PATH
        else:
            weights_path = TF_WEIGHTS_PATH_NO_TOP
        model.load_weights(weights_path)

    if old_data_format:
        K.set_image_data_format(old_data_format)
    return model

def preprocess_input(x):
    x /= 255.
    x -= 0.5
    x *= 2.
    return x

# 提取圖片的bottleneck特徵
def get_bottleneck_features():
    # 圖像生成器初始化
    datagen = ImageDataGenerator(rescale=1. / 255)
    # 訓練集圖像生成器
    generator_train = datagen.flow_from_directory(DATA_PATH+"train",
                                                  target_size=(150, 150), batch_size=32, class_mode=None, shuffle=False)
    # 測試集圖像生成器
    generator_test = datagen.flow_from_directory(DATA_PATH+"test",
                                                 target_size=(150, 150), batch_size=32, class_mode=None, shuffle=False)

    model = Xception(include_top=False, weights='imagenet')

    # 灌入pre-model的權重（在自己定義的xception中已灌入）
    # print("loading weights ----------------")
    # model.load_weights(TF_WEIGHTS_PATH_NO_TOP)
    # print("loading weights over!")

    # 得到bottleneck feature
    bottleneck_features_train = model.predict_generator(generator_train, 2000/32)
    # predict_generator(generator, steps, ...)
    # steps: 在聲明一個 epoch 完成並開始下一個 epoch 之前從 generator 產生的總步數（批次樣本）。
    # 它通常應該等於你的數據集的樣本數量除以批量大小。(重要！)
    # 對於 Sequence，它是可選的：如果未指定，將使用len(generator) 作爲步數。
    np.save((DATA_PATH + "bottleneck_features_train.npy"), bottleneck_features_train)

    bottleneck_features_test = model.predict_generator(generator_test, 1000/32)
    np.save((DATA_PATH + "bottleneck_features_test.npy"), bottleneck_features_test)
    print("over!")

if __name__ == '__main__':
    # 提取圖片的bottleneck特徵
    # get_bottleneck_features()

    # 導入bottleneck_feature數據
    train_data = np.load(DATA_PATH+"bottleneck_features_train.npy")
    print("train_data: ", np.shape(train_data))
    train_labels = np.array([0]*1000+[1]*1000) # 上述特徵存儲時未打亂，1000只貓1000只狗
    print("train_labels: ", np.shape(train_labels))
    test_data = np.load(DATA_PATH + "bottleneck_features_test.npy")
    test_labels = np.array([0] * 500 + [1] * 500)

    # 標籤變成one_hot格式
    train_labels = keras.utils.to_categorical(train_labels, 2)
    test_labels = keras.utils.to_categorical(test_labels, 2)

    # 網絡結構
    model = Sequential()
    model.add(Flatten(input_shape=(5,5,2048)))
    model.add(Dense(2, activation='softmax'))

    # 編譯
    model.compile(loss='categorical_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])

    print("Training --------------------")
    model.fit(train_data, train_labels, epochs=4, verbose=2, batch_size=100, shuffle=True)
    model.save_weights(DATA_PATH+"cats_dogs_bottleneck.h5")

    # 測試
    print("\nTesting --------------------")
    loss, accuracy = model.evaluate(test_data, test_labels)

    print("\ntest loss=", loss)
    print("\ntest accuracy=", accuracy)

上述代碼中的其他部分參考[2]，原文代碼直接copy過來有幾個地方報錯，因爲[2]使用的是封裝好的VGG而不是我這裏的自定義的Xception。還有np.save()的報錯問題，均已在上述代碼中修改。

（其他細節方面改天再更）

運行結果

訓練：

測試：

補充

如果要在xception後面再加其它層，比如[6]，只需把上述代碼中網絡結構部分改成下面即可：

# 網絡結構
    model = Sequential()
    # print("before lstm: ", model.get_layer(index=0).output_shape)
    # lstm層（單層）
    model.add(LSTM(units=n_hidden_unis, input_shape=(n_steps, n_inputs)))
    print("after lstm: ", model.get_layer(index=0).output_shape) # (None, 512)
    # 全連接層
    # model.add(Flatten(input_shape=(None, n_hidden_unis)))
    model.add(Dense(2, activation='softmax'))

運行結果：