MNIST Plus++競賽(更加困難的mnist數據集競賽)! |
文章目錄
一. 數據集描述
- 首先提供數據集百度雲鏈接:提取碼:1eji
MNIST Plus++,一個困難的mnsit競賽數據集!
- 對mnist數據集的一個困難擴充
- 爲了檢驗模型對分類任務的推廣能力和特徵抓取能力
1.1. 訓練集(15,670個jpg文件)
文件格式如下(命名方式)
- format("%d_%05d.jpg", label, digit_index)
- label爲圖像文件對應的真值(0-9)
- index爲圖像文件的索引(每個類別0到類別數)
1.2. 測試集(54,330個jpg文件)
文件格式如下(命名方式)
- format("%05d.jpg", index)
- index爲文件編號(0-54329)
二. 數據集轉csv文件
2.1. 訓練集及標籤轉csv文件
- 訓練集及標籤轉csv文件
#coding:utf-8
import os
import csv
import random
import numpy as np
def create_csv(dirname):
path = './'+ dirname +'/' # 修改爲你自己的圖片路徑即可
name = os.listdir(path)
images = []
for n in name:
if n[-4:] == '.jpg':
label = n[0] # 標籤;
src_label=[path+n, label]
images.append(src_label)
else:
pass
# 隨機打散圖片
random.shuffle(images)
random.shuffle(images)
random.shuffle(images)
with open(dirname+'.csv', mode='w', newline='') as f:
writer = csv.writer(f)
for img_label in images:
label = img_label[-1]
img = img_label[:-1][0]
writer.writerow([img, label])
# 加載數據,分爲訓練和測試部分。
def load_data(path, mode='train'):
# read from csv file
images, labels = [], []
with open(path+'.csv') as f:
reader = csv.reader(f)
for row in reader:
img, label = row
label = int(label)
images.append(img)
labels.append(label)
# 圖片切割成,訓練70%,驗證15%,測試15%。
if mode == 'train': # 70% 訓練集
images = images[:int(0.99 * len(images))]
labels = labels[:int(0.99 * len(labels))]
# elif mode == 'val': # 15% = 70%->85% 驗證集
# images = images[int(0.70 * len(images)):int(0.98 * len(images))]
# labels = labels[int(0.70 * len(labels)):int(0.98* len(labels))]
else: # 15% = 70%->85% 測試集
images = images[int(0.99 * len(images)):]
labels = labels[int(0.99 * len(labels)):]
return images, labels
if __name__ == "__main__":
# create_csv('train')
img, label = load_data('./train', 'train')
print(len(label))
img, label = load_data('./train', 'test')
print(len(label))
# create_csv('test')
2.2. 測試集轉csv文件
- 測試集轉csv文件(這就是最終需要預測並且上傳的結果!)
#coding:utf-8
import os
import csv
import random
import numpy as np
import tensorflow as tf
# 1. 路徑變圖片
def cover_to_image(x):
x = tf.io.read_file(x)
x = tf.image.decode_jpeg(x, channels=3) # RGBA
x = tf.image.resize(x, [224, 224])
return x
# 2. 計算均值和方差
#1. 0~10000: [113.85617, 108.830154, 100.53466] [67.78309, 66.85325, 69.263535]
#2. 10000~20000: [113.80364, 108.86746, 101.23807] [68.67979, 68.00103, 70.159836]
#3. 20000~30000: [113.1947, 108.1745, 100.01061] [68.5435, 67.56627, 69.880646]
#4. 30000~40000: [112.90792, 108.4116, 100.10866] [68.22623, 67.380264, 69.61325]
#5. 40000~50000: [112.88327, 108.186104, 99.96623] [67.59057, 66.67929, 69.02802]
#6. 50000~60000: [114.23421, 109.06324, 100.649376] [67.503265, 66.92577, 69.64176]
#7. 60000~70000: [114.297066, 109.30525, 101.281685] [68.341255, 67.64873, 69.79894]
######################################
# 均值: [113.59671086, 108.69118686, 100.54132729]
# 方差: [68.09538571, 67.29351486, 69.62656957]
def compute_mean_var(image):
# image.shape: [image_num, w, h, c]
mean = []
var = []
for c in range(image.shape[-1]):
mean.append(np.mean(image[..., c]))
var.append(np.std(image[..., c]))
return mean, var
# 3. 歸一化圖片
def norm_images(image):
# image.shape: [image_num, w, h, c]
image = image.astype('float32')
mean, var = compute_mean_var(image)
image[..., 0] = (image[..., 0] - mean[0]) / var[0]
image[..., 1] = (image[..., 1] - mean[1]) / var[1]
image[..., 2] = (image[..., 2] - mean[2]) / var[2]
return image
def create_csv(dirname):
path = './'+ dirname +'/' # 修改爲你自己的圖片路徑即可
name = os.listdir(path)
with open (dirname + '.csv','w') as csvfile:
writer = csv.writer(csvfile)
for n in name:
if n[-4:] == '.jpg':
writer.writerow(['./'+str(dirname) +'/' + str(n)])
else:
pass
# 加載應用的數據。
def load_data(path):
# read from csv file
images = []
with open(path+'.csv') as f:
reader = csv.reader(f)
for row in reader:
img = row[0]
images.append(img)
return images
if __name__ == "__main__":
# create_csv('all_data')
images_src = load_data('./all_data')[60000:70000]
images = []
for i in range(len(images_src)):
print(i)
img_i = cover_to_image(images_src[i])
images.append(img_i.numpy())
images = np.array(images)
print(images.shape)
mean, std = compute_mean_var(images)
print(mean, std)
三. Inception網絡結構
3.1. 訓練模型
- 訓練部分代碼,使用了預訓練模型!
import os
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.python.keras.api._v2.keras import layers, optimizers, losses
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from loadmnist_Inception import load_data
tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.') # 判斷tf的版本是否是以‘2.’開頭,如果是,則返回True,否則返回False
########################################################################################################################
img_mean = tf.constant([0.485, 0.456, 0.406])
img_std = tf.constant([0.229, 0.224, 0.225])
def normalize(x, mean=img_mean, std=img_std):
# x shape: [224, 224, 3]
# mean:shape爲1;這裏用到了廣播機制。我們安裝好右邊對齊的原則,可以得到如下;
# mean : [1, 1, 3], std: [3] 先插入1
# mean : [224, 224, 3], std: [3] 再變爲224
x = (x - mean)/std
return x
# 預處理的函數,複製過來。
def preprocess(x,y):
# x: 圖片的路徑,y:圖片的數字編碼
x = tf.io.read_file(x)
x = tf.image.decode_jpeg(x, channels=3) # RGBA
x = tf.image.resize(x, [224, 224])
# x = tf.pad(x, [[4, 4], [4, 4], [0, 0]]) # 上下(0軸)填充4個0,左右(1軸)填充4個0,3軸不填充。最終變爲[40, 40, 3]
# x = tf.image.random_flip_left_right(x)
# x = tf.image.random_flip_up_down(x)
# x = tf.image.random_crop(x, [224,224,3])
# x: [0,255]=> -1~1
x = 2 * (tf.cast(x, dtype=tf.float32) / 255. - 0.5)
x = normalize(x)
y = tf.convert_to_tensor(y)
y = tf.one_hot(y, depth=10)
return x, y
########################################################################################################################
batchsz = 64
# creat train db 一般訓練的時候需要shuffle。其它是不需要的。
images, labels = load_data('./train', 'train')
db_train = tf.data.Dataset.from_tensor_slices((images, labels)) # 變成個Dataset對象。
db_train = db_train.shuffle(1000).map(preprocess).batch(batchsz) # map函數圖片路徑變爲內容。
# # crate validation db
# images2, labels2 = load_data('./train', 'val')
# db_val = tf.data.Dataset.from_tensor_slices((images2, labels2))
# db_val = db_val.map(preprocess).batch(batchsz)
# create test db
images3, labels3 = load_data('./train', 'test')
db_test = tf.data.Dataset.from_tensor_slices((images3, labels3))
db_test = db_test.map(preprocess).batch(batchsz)
##########################################################################################
# 導入別的已經訓練好的網絡和參數, 這部分工作在keras網絡中提供了一些經典的網絡以及經典網絡訓練好的參數。
# 這裏使用Vgg19,還把他的權值導入進來。imagenet訓練的1000類,我們就把輸出層去掉。
net = keras.applications.InceptionV3(weights='imagenet', include_top=False, pooling='max')
# for i in range(len(net.layers)-16): # print(len(model.layers))=23
# net.layers[i].trainable = False
model = keras.Sequential([net, layers.Dense(10)])
model.build(input_shape=(None, 224, 224, 3))
model.summary()
##########################################################################################
# early_stopping:monitor監聽器,當驗證集損失值,連續增加小於0時,持續10個epoch,則終止訓練。
# early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0.00001, patience=10, mode='max')
# reduce_lr:當評價指標不在提升時,減少學習率,每次減少10%,當驗證損失值,持續3次未減少時,則終止訓練。
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.2, patience=10, min_lr=0.000003, mode='max')
##########################################################################################
model.compile(optimizer=optimizers.Adam(lr=1e-4),
loss=losses.CategoricalCrossentropy(from_logits=True),
metrics=['accuracy']) # 損失函數
model.fit(db_train, epochs=50, initial_epoch=0) # 1個epoch驗證1次
model.evaluate(db_test)
model.save('./my_transfer_InceptionV3.h5')
3.2. 測試模型
- 保存預測結果到result.csv文件
import os
import tensorflow as tf
import numpy as np
import csv
from tensorflow import keras
from tensorflow.keras import layers, optimizers, losses
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from predict_test import load_data
tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.') # 判斷tf的版本是否是以‘2.’開頭,如果是,則返回True,否則返回False
##########################################################################################
img_mean = tf.constant([0.485, 0.456, 0.406])
img_std = tf.constant([0.229, 0.224, 0.225])
def normalize(x, mean=img_mean, std=img_std):
# x shape: [224, 224, 3]
# mean:shape爲1;這裏用到了廣播機制。我們安裝好右邊對齊的原則,可以得到如下;
# mean : [1, 1, 3], std: [3] 先插入1
# mean : [224, 224, 3], std: [3] 再變爲224
x = (x - mean)/std
return x
# 預處理的函數,複製過來。
def preprocess(x):
# x: 圖片的路徑,y:圖片的數字編碼
x = tf.io.read_file(x)
x = tf.image.decode_jpeg(x, channels=3) # RGBA
x = tf.image.resize(x, [224, 224])
# x = tf.pad(x, [[4, 4], [4, 4], [0, 0]]) # 上下(0軸)填充4個0,左右(1軸)填充4個0,3軸不填充。最終變爲[40, 40, 3]
# x = tf.image.random_flip_left_right(x)
# x = tf.image.random_flip_up_down(x)
# x = tf.image.random_crop(x, [224,224,3])
# x: [0,255]=> -1~1
x = 2 * (tf.cast(x, dtype=tf.float32) / 255. - 0.5)
x = normalize(x)
return x
##########################################################################################
batchsz = 64
imgs = load_data('./test')
db_test = tf.data.Dataset.from_tensor_slices(imgs)
db_test = db_test.map(preprocess).batch(batchsz)
##########################################################################################
net = keras.applications.InceptionV3(weights='imagenet', include_top=False, pooling='max')
# for i in range(len(net.layers)-16): # print(len(model.layers))=23
# net.layers[i].trainable = False
model = keras.Sequential([net, layers.Dense(10)])
model.build(input_shape=(None, 224, 224, 3))
model.summary()
model.load_weights('./my_transfer_InceptionV3.h5')
############################################## 測試結果保存在一個list中; ####################
result_all = []
i = 1
for x in db_test:
preds = model.predict(x)
pred_result = np.argmax(preds, axis=1)
result_all.extend(pred_result) # 結果合併到一個list中;
print('epoch:', i)
i= i + 1
print(len(result_all))
############################################## 保存預測結果到result.csv文件 #################
csvfile = open("result.csv", "w+")
for i, result in enumerate(result_all):
img_num1 = imgs[i].split('/') # imgs[i]是 ./test/01677.jpg
img_num2 = img_num1[2].split('.')
img_num = img_num2[0] # 01677
result_i = result_all[i]
print('圖像數:', i+1, img_num, result_i)
# 保存預測結果到result.csv文件
writer = csv.writer(csvfile)
writer.writerow((img_num, result_i))
3.2. 最終預測結果result.csv
- 選取了部分代碼,第一列是圖片的編號,第二列是圖片預測的數字!
01677,5
17028,2
20216,2
51817,6
52902,9
11626,6
51851,4
11020,0
54059,5
45142,0
13129,6
13544,7
21336,2
04468,8
45685,6
02609,3
03635,0
06451,1
26061,0
22283,2
30184,8
09008,2
04459,6
01228,2
21062,0
39979,9
37160,8
27048,8
04408,4
51317,7
19533,0
42744,1
25122,3
35339,0
15835,1
08527,2
08047,0
52603,4
26161,3
20064,9
- 最終取得的名次!