『自己的工作6』MNIST Plus++競賽(更加困難的mnist數據集競賽)!

MNIST Plus++競賽(更加困難的mnist數據集競賽)!

一. 數據集描述

MNIST Plus++,一個困難的mnsit競賽數據集!

  • 對mnist數據集的一個困難擴充
  • 爲了檢驗模型對分類任務的推廣能力和特徵抓取能力

1.1. 訓練集(15,670個jpg文件)

文件格式如下(命名方式)

  • format("%d_%05d.jpg", label, digit_index)
  • label爲圖像文件對應的真值(0-9)
  • index爲圖像文件的索引(每個類別0到類別數)

1.2. 測試集(54,330個jpg文件)

文件格式如下(命名方式)

  • format("%05d.jpg", index)
  • index爲文件編號(0-54329)

二. 數據集轉csv文件

2.1. 訓練集及標籤轉csv文件

  • 訓練集及標籤轉csv文件
#coding:utf-8
import  os
import csv
import random
import numpy as np

def create_csv(dirname):
    path = './'+ dirname +'/'               # 修改爲你自己的圖片路徑即可
    name = os.listdir(path)
    images = []
    for n in name:
        if n[-4:] == '.jpg':
            label = n[0]                    # 標籤;
            src_label=[path+n, label]
            images.append(src_label)
        else:
            pass

    # 隨機打散圖片
    random.shuffle(images)
    random.shuffle(images)
    random.shuffle(images)
    with open(dirname+'.csv', mode='w', newline='') as f:
        writer = csv.writer(f)
        for img_label in images:
            label = img_label[-1]
            img = img_label[:-1][0]
            writer.writerow([img, label])

# 加載數據,分爲訓練和測試部分。
def load_data(path, mode='train'):
    # read from csv file
    images, labels = [], []
    with open(path+'.csv') as f:
        reader = csv.reader(f)
        for row in reader:
            img, label = row
            label = int(label)

            images.append(img)
            labels.append(label)

    # 圖片切割成,訓練70%,驗證15%,測試15%。
    if mode == 'train':                                                     # 70% 訓練集
        images = images[:int(0.99 * len(images))]
        labels = labels[:int(0.99 * len(labels))]
    # elif mode == 'val':                                                     # 15% = 70%->85%  驗證集
    #     images = images[int(0.70 * len(images)):int(0.98 * len(images))]
    #     labels = labels[int(0.70 * len(labels)):int(0.98* len(labels))]
    else:                                                                   # 15% = 70%->85%  測試集
        images = images[int(0.99 * len(images)):]
        labels = labels[int(0.99 * len(labels)):]
    return images, labels

if __name__ == "__main__":
    # create_csv('train')
    img, label = load_data('./train', 'train')
    print(len(label))
    img, label = load_data('./train', 'test')
    print(len(label))
    # create_csv('test')

2.2. 測試集轉csv文件

  • 測試集轉csv文件(這就是最終需要預測並且上傳的結果!)
#coding:utf-8
import  os
import csv
import random
import numpy as np
import tensorflow as tf

# 1. 路徑變圖片
def cover_to_image(x):
    x = tf.io.read_file(x)
    x = tf.image.decode_jpeg(x, channels=3)    # RGBA
    x = tf.image.resize(x, [224, 224])
    return x

# 2. 計算均值和方差
#1. 0~10000:      [113.85617, 108.830154, 100.53466] [67.78309, 66.85325, 69.263535]
#2. 10000~20000:  [113.80364, 108.86746, 101.23807]  [68.67979, 68.00103, 70.159836]
#3. 20000~30000:  [113.1947, 108.1745, 100.01061]    [68.5435, 67.56627, 69.880646]
#4. 30000~40000:  [112.90792, 108.4116, 100.10866]   [68.22623, 67.380264, 69.61325]
#5. 40000~50000:  [112.88327, 108.186104, 99.96623]  [67.59057, 66.67929, 69.02802]
#6. 50000~60000:  [114.23421, 109.06324, 100.649376] [67.503265, 66.92577, 69.64176]
#7. 60000~70000:  [114.297066, 109.30525, 101.281685] [68.341255, 67.64873, 69.79894]
######################################
# 均值: [113.59671086, 108.69118686, 100.54132729]
# 方差: [68.09538571, 67.29351486, 69.62656957]
def compute_mean_var(image):
    # image.shape: [image_num, w, h, c]
    mean = []
    var  = []
    for c in range(image.shape[-1]):
        mean.append(np.mean(image[..., c]))
        var.append(np.std(image[..., c]))
    return mean, var


# 3. 歸一化圖片
def norm_images(image):
    # image.shape: [image_num, w, h, c]
    image = image.astype('float32')
    mean, var = compute_mean_var(image)
    image[..., 0] = (image[..., 0] - mean[0]) / var[0]
    image[..., 1] = (image[..., 1] - mean[1]) / var[1]
    image[..., 2] = (image[..., 2] - mean[2]) / var[2]
    return image


def create_csv(dirname):
    path = './'+ dirname +'/'               # 修改爲你自己的圖片路徑即可
    name = os.listdir(path)
    with open (dirname + '.csv','w') as csvfile:
        writer = csv.writer(csvfile)
        for n in name:
            if n[-4:] == '.jpg':
                writer.writerow(['./'+str(dirname) +'/' + str(n)])
            else:
                pass

# 加載應用的數據。
def load_data(path):
    # read from csv file
    images = []
    with open(path+'.csv') as f:
        reader = csv.reader(f)
        for row in reader:
            img = row[0]
            images.append(img)

    return images

if __name__ == "__main__":
    # create_csv('all_data')
    images_src = load_data('./all_data')[60000:70000]

    images = []
    for i in range(len(images_src)):
        print(i)
        img_i = cover_to_image(images_src[i])
        images.append(img_i.numpy())

    images = np.array(images)
    print(images.shape)

    mean, std = compute_mean_var(images)
    print(mean, std)

三. Inception網絡結構

3.1. 訓練模型

  • 訓練部分代碼,使用了預訓練模型!
import os
import tensorflow as tf
import numpy as np

from tensorflow import keras
from tensorflow.python.keras.api._v2.keras import layers, optimizers, losses
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from loadmnist_Inception import load_data

tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')      # 判斷tf的版本是否是以‘2.’開頭,如果是,則返回True,否則返回False

########################################################################################################################
img_mean = tf.constant([0.485, 0.456, 0.406])
img_std = tf.constant([0.229, 0.224, 0.225])
def normalize(x, mean=img_mean, std=img_std):
    # x shape: [224, 224, 3]
    # mean:shape爲1;這裏用到了廣播機制。我們安裝好右邊對齊的原則,可以得到如下;
    # mean : [1, 1, 3], std: [3]        先插入1
    # mean : [224, 224, 3], std: [3]    再變爲224
    x = (x - mean)/std
    return x

# 預處理的函數,複製過來。
def preprocess(x,y):
    # x: 圖片的路徑,y:圖片的數字編碼
    x = tf.io.read_file(x)
    x = tf.image.decode_jpeg(x, channels=3)     # RGBA
    x = tf.image.resize(x, [224, 224])
    # x = tf.pad(x, [[4, 4], [4, 4], [0, 0]])     # 上下(0軸)填充4個0,左右(1軸)填充4個0,3軸不填充。最終變爲[40, 40, 3]
    # x = tf.image.random_flip_left_right(x)
    # x = tf.image.random_flip_up_down(x)
    # x = tf.image.random_crop(x, [224,224,3])

    # x: [0,255]=> -1~1
    x = 2 * (tf.cast(x, dtype=tf.float32) / 255. - 0.5)
    x = normalize(x)
    y = tf.convert_to_tensor(y)
    y = tf.one_hot(y, depth=10)

    return x, y

########################################################################################################################
batchsz = 64

# creat train db   一般訓練的時候需要shuffle。其它是不需要的。
images, labels = load_data('./train', 'train')
db_train = tf.data.Dataset.from_tensor_slices((images, labels))     # 變成個Dataset對象。
db_train = db_train.shuffle(1000).map(preprocess).batch(batchsz)    # map函數圖片路徑變爲內容。

# # crate validation db
# images2, labels2 = load_data('./train', 'val')
# db_val = tf.data.Dataset.from_tensor_slices((images2, labels2))
# db_val = db_val.map(preprocess).batch(batchsz)

# create test db
images3, labels3 = load_data('./train', 'test')
db_test = tf.data.Dataset.from_tensor_slices((images3, labels3))
db_test = db_test.map(preprocess).batch(batchsz)

##########################################################################################
# 導入別的已經訓練好的網絡和參數, 這部分工作在keras網絡中提供了一些經典的網絡以及經典網絡訓練好的參數。
# 這裏使用Vgg19,還把他的權值導入進來。imagenet訓練的1000類,我們就把輸出層去掉。
net = keras.applications.InceptionV3(weights='imagenet', include_top=False, pooling='max')
# for i in range(len(net.layers)-16):                     # print(len(model.layers))=23
#     net.layers[i].trainable = False
model = keras.Sequential([net, layers.Dense(10)])

model.build(input_shape=(None, 224, 224, 3))
model.summary()

##########################################################################################
# early_stopping:monitor監聽器,當驗證集損失值,連續增加小於0時,持續10個epoch,則終止訓練。
# early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0.00001, patience=10, mode='max')

# reduce_lr:當評價指標不在提升時,減少學習率,每次減少10%,當驗證損失值,持續3次未減少時,則終止訓練。
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.2, patience=10, min_lr=0.000003, mode='max')

##########################################################################################
model.compile(optimizer=optimizers.Adam(lr=1e-4),
              loss=losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])                                                        # 損失函數

model.fit(db_train, epochs=50, initial_epoch=0)                                           # 1個epoch驗證1次

model.evaluate(db_test)
model.save('./my_transfer_InceptionV3.h5')

3.2. 測試模型

  • 保存預測結果到result.csv文件
import os
import tensorflow as tf
import numpy as np
import csv

from tensorflow import keras
from tensorflow.keras import layers, optimizers, losses
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from predict_test import load_data

tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.') # 判斷tf的版本是否是以‘2.’開頭,如果是,則返回True,否則返回False

##########################################################################################
img_mean = tf.constant([0.485, 0.456, 0.406])
img_std = tf.constant([0.229, 0.224, 0.225])
def normalize(x, mean=img_mean, std=img_std):
    # x shape: [224, 224, 3]
    # mean:shape爲1;這裏用到了廣播機制。我們安裝好右邊對齊的原則,可以得到如下;
    # mean : [1, 1, 3], std: [3]        先插入1
    # mean : [224, 224, 3], std: [3]    再變爲224
    x = (x - mean)/std
    return x

# 預處理的函數,複製過來。
def preprocess(x):
    # x: 圖片的路徑,y:圖片的數字編碼
    x = tf.io.read_file(x)
    x = tf.image.decode_jpeg(x, channels=3)    # RGBA
    x = tf.image.resize(x, [224, 224])
    # x = tf.pad(x, [[4, 4], [4, 4], [0, 0]]) # 上下(0軸)填充4個0,左右(1軸)填充4個0,3軸不填充。最終變爲[40, 40, 3]
    # x = tf.image.random_flip_left_right(x)
    # x = tf.image.random_flip_up_down(x)
    # x = tf.image.random_crop(x, [224,224,3])

    # x: [0,255]=> -1~1
    x = 2 * (tf.cast(x, dtype=tf.float32) / 255. - 0.5)
    x = normalize(x)
    return x

##########################################################################################
batchsz = 64
imgs = load_data('./test')
db_test = tf.data.Dataset.from_tensor_slices(imgs)
db_test = db_test.map(preprocess).batch(batchsz)
##########################################################################################
net = keras.applications.InceptionV3(weights='imagenet', include_top=False, pooling='max')
# for i in range(len(net.layers)-16):                     # print(len(model.layers))=23
#     net.layers[i].trainable = False
model = keras.Sequential([net, layers.Dense(10)])

model.build(input_shape=(None, 224, 224, 3))
model.summary()
model.load_weights('./my_transfer_InceptionV3.h5')
############################################## 測試結果保存在一個list中; ####################
result_all = []
i = 1
for x in db_test:
    preds = model.predict(x)
    pred_result = np.argmax(preds, axis=1)
    result_all.extend(pred_result)          # 結果合併到一個list中;
    print('epoch:', i)
    i= i + 1
print(len(result_all))
############################################## 保存預測結果到result.csv文件 #################
csvfile = open("result.csv", "w+")
for i, result in enumerate(result_all):
    img_num1 = imgs[i].split('/')           # imgs[i]是 ./test/01677.jpg
    img_num2 = img_num1[2].split('.')
    img_num = img_num2[0]                   # 01677

    result_i = result_all[i]
    print('圖像數:', i+1, img_num, result_i)

    # 保存預測結果到result.csv文件
    writer = csv.writer(csvfile)
    writer.writerow((img_num, result_i))

3.2. 最終預測結果result.csv

  • 選取了部分代碼,第一列是圖片的編號,第二列是圖片預測的數字!
01677,5
17028,2
20216,2
51817,6
52902,9
11626,6
51851,4
11020,0
54059,5
45142,0
13129,6
13544,7
21336,2
04468,8
45685,6
02609,3
03635,0
06451,1
26061,0
22283,2
30184,8
09008,2
04459,6
01228,2
21062,0
39979,9
37160,8
27048,8
04408,4
51317,7
19533,0
42744,1
25122,3
35339,0
15835,1
08527,2
08047,0
52603,4
26161,3
20064,9
  • 最終取得的名次!
發佈了153 篇原創文章 · 獲贊 339 · 訪問量 15萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章