Kaggle手寫數字識別(Digit Recognizer)記錄

競賽地址 https://www.kaggle.com/c/digit-recognizer

苦於學了cs231n和pytorch之後沒有東西拿來練手,就去kaggle上找了一個入門競賽,MNIST手寫數字識別,這個比賽把MNIST數據集拆分成了42000的trainset和28000的testset。

然後自己跟着cs231n的模式一步步寫網絡,pandas+numpy預處理數據,重寫check_acc/train_part函數,重新定義新的網絡結構,訓練模型,一直到輸出測試集的結果,到處都是坑啊!!!耗時一天多一點,終於完成。。。最後score0.99042,也懶得再改了,等看論文看到新技術再改吧。參數也調了好久,各種batch_size/lr/lrdecay/網絡層數不停地試,終於在加深了網絡和調整了batch_size之後讓score上升了2%(果真深度是有好處的_

最坑的TM測試輸出的時候因爲28000張數據一起輸入,顯存炸了。。。顯存炸完,心態也快炸了。。。3G真的傷不起,早知道當時買6G了。。都是淚。。。

然後下面是代碼,使用Jupyter notebook寫的,前面的網絡訓練部分拿來可以直接跑,後面的數據測試和結果輸出需要改(因爲顯存炸了只好把測試集分成兩半測試一個釋放一個然後再測另外一個,之後再合併到一起輸出。。。所以輸出部分改的有點多,不能直接跑)

然後看到這個比賽的leaderboard前95個score都是1,驚呆了,後來看了評論才發現有一些高分(也許有大佬用42000個訓練集數據就做出來100的acc也不是不可能)是直接用MINIST所有的數據集來訓練的,這樣豈不是overfit也沒有問題???反正testset是MNIST的子集,剩下的等以後學到啥再來改吧。

其實通過這個收穫也挺大的,至少自己手寫了一遍!!然後什麼normalization/standardization/batchnorm都自己訓練體驗了一遍,就比光看論文看博客來的實在一點!

#%%

import pandas as pd
import random

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler

import numpy as np
import matplotlib.pyplot as plt
import datetime

#%%

### 讀取文件數據集

# pandas讀取文件內容
data = pd.read_csv('./digit-recognizer/train.csv')
data.head(5) # 顯示文件前五行

# dataframe數據轉化成numpy數組,共42000個數據
np_data = np.array(data)

# 數據第一列爲標籤,將標籤與數據分開
digit_data = np_data[:, 1:]
digit_label = np_data[:, 0]
print(digit_label.shape)

# 將二維數據轉化爲三維張量,第一維度代表數據個數,第二第三代表圖像長寬像素,然後這個數據的二三維拿出來就是圖像
# 也就是把一維的像素值reshape成圖像
digit_data = digit_data.reshape((42000, 28, 28))
example = digit_data[5, :, :] # 可以看一下第一張圖像
plt.imshow(example)

#%%

# 將數據分爲train,val兩組,分別爲38400/3600
train_data = digit_data[0:38400, :, :]
val_data = digit_data[38400:, :, :]
train_exam = train_data[0, :, :]
val_exam = val_data[0, :, :]
print('train and val data: ', train_data.shape, ' ', val_data.shape) # 檢查維度

train_label = digit_label[0:38400]
val_label = digit_label[38400:]
print('train and val label: ', train_label.shape, ' ', val_label.shape)

#%%

plt.imshow(train_data[0, :, :])
# 將數據集和驗證集都轉化爲pytorch的tensor
train_data = torch.tensor(train_data)
val_data = torch.tensor(val_data)
train_label = torch.tensor(train_label)
val_label = torch.tensor(val_label)

#%%

# 對數據進行歸一化處理,因爲原始數據是[0,255]的值,需要歸一化到[0,1]的區間
# 在整個數據維度上對像素進行規範化,直接除以255
def data_normalization(tensor):
    return tensor.float()/255

def data_stdandization(tensor):
    return (tensor.float()-tensor.float().mean())/tensor.float().std()

#%%

# 將數據都規範化到0-1之間
train_data = data_normalization(train_data)
val_data = data_normalization(val_data)

train_data = data_stdandization(train_data)
val_data = data_stdandization(val_data)

#%%

def read_a_batch(batch_size):
    index = random.sample(range(38400), batch_size)
    # return (train_data[index, :, :], train_label[:, index])
    return (train_data[index, :, :], train_label[index])

#%%

USE_GPU = True

dtype = torch.float32 # we will be using float throughout this tutorial

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# Constant to control how frequently we print train loss
print_every = 250

print('using device:', device)

#%%

def check_val_accuracy(model, val):
    print('Checking accuracy on validation set')
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        x, y = val
        x = torch.unsqueeze(x, 1)
        x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
        y = y.to(device=device, dtype=torch.long)
        
        scores = model(x)
        preds = torch.argmax(scores, dim=1)
        corr=(preds==y)
        acc=corr.sum().item()/3600*100
        print('Got acc %.2f %%' % acc)
        return acc

#%%

def train_part(model, optimizer, scheduler, batch_size = 256,epochs=1):
    history = []
    loss_all = []
    for e in range(epochs):
        for i in range((int)(38400/batch_size)):
            x, y = read_a_batch(batch_size)
            x = torch.unsqueeze(x, 1)
            x = x.to(device=device, dtype=dtype)
            y = y.to(device=device, dtype=torch.long)
            
            scores = model(x)
            loss = F.cross_entropy(scores, y)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            if i % print_every == 0:
                print('Epoch %d, Iteration %d, loss = %.4f' % (e+1, i, loss.item()))
                val = (val_data, val_label)
                acc = check_val_accuracy(model, val)
                history.append(acc)
                loss_all.append(loss.item())
                print()
                
        # 學習率衰減,每5個epoch乘以一個係數
        scheduler.step()
        if (e+1) % 5 == 0:
            print('learning rate decreased...\n')
        
    plt.figure(figsize=(15,6))
    plt.xlabel('iteration')
    plt.ylabel('acc')
    plt.plot(list(range(len(history))), history, 'go-')
    plt.show()
    plt.figure(figsize=(15,6))
    plt.xlabel('iteration')
    plt.ylabel('loss')
    plt.plot(list(range(len(loss_all))), loss_all, 'ro-')
    plt.show()
    

#%%

model = None
optimizer = None

class MyDigitNet(nn.Module):
    '''
    輸入圖像是28*28*1的像單通道素圖
    '''
    def __init__(self, num_classes=10):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=2, padding=1), # 1*28*28輸入,輸出爲32*14*14
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 64, kernel_size=3, padding=1), # 32*14*14batchnorm輸入,輸出爲64*14*14
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2), # 池化後尺寸7*7
            nn.Conv2d(64, 128, kernel_size=3),  # 輸入64*7*7,輸出128*5*5
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(128),
            nn.Conv2d(128, 128, kernel_size=2),  # 輸入128*5*5,輸出128*4*4
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2), # 輸出尺寸爲2*2的fm
        )
        self.classifier = nn.Sequential(
            nn.Linear(128*2*2, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(256, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(256, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), 128*2*2)
        x = self.classifier(x)
        return x
    
# 實例化並查看網絡結構
model = MyDigitNet(10)
model.to(device)
print(model)

#%%

# 定義優化器,SGD
optimizer = optim.SGD(model.parameters(), lr=0.01,
                     momentum=0.9, nesterov=True, weight_decay=0.0005)
# 學習率每五個epoch調整一次
scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.3)

#%%

# 訓練網絡,並計算時間
start = datetime.datetime.now()
train_part(model, optimizer, scheduler, 64, epochs=15)
end = datetime.datetime.now()
print(end-start)

#%%

# 保存模型參數
torch.save(model.state_dict(), './digit-recognizer/CNN.pth')

#%%

# 處理測試集圖像
test = pd.read_csv('./digit-recognizer/test.csv')
test.head(5) # 顯示文件前五行

# dataframe數據轉化成numpy數組,共42000個數據
np_test = np.array(test)

# 將二維數據轉化爲三維張量,第一維度代表數據個數,第二第三代表圖像長寬像素,然後這個數據的二三維拿出來就是圖像
# 也就是把一維的像素值reshape成圖像
np_test = np_test.reshape((28000, 28, 28))
example = np_test[1, :, :] # 可以看一下第一張圖像
plt.imshow(example)

#%%

# 數據轉換成torhc.tensor,然後放到GPU上
test_data=torch.tensor(np_test)
test_data = torch.unsqueeze(test_data,dim=1)
#test_data=test_data.to(device=device,dtype=dtype)
print(test_data.shape)
print(test_data.dtype)
print(test_data.device)

#%%

# 加載模型
#model = MyDigitNet(10)
#model.load_state_dict(torch.load('./digit-recognizer/CNN.pth'))
torch.cuda.memory_allocated()

#%%

#test_data1=test_data[0:14001, :, :, :].to(dtype=dtype,device='cuda:0')
test_data2=test_data[14001:28000, :, :, :].to(dtype=dtype,device='cuda:0')
#%%

# 先用網絡跑出結果矩陣,GPU顯存不夠。。。把test_data拆分成兩半,跑完再拼起來

#test1 = test_data[0:14001, :, :, :]
#test2 = test_data[14001:28001,:, :, :]
model.eval()
with torch.no_grad():
    # scores = model(test_data)
    # preds = torch.argmax(scores, dim=1)
    # print(preds)
    # scores1 = model(test_data1)
    # preds1 = torch.argmax(scores1, dim=1)
    # print(preds1)
    scores2 = model(test_data2)
    preds2 = torch.argmax(scores2, dim=1)
    print(preds2)

#%%

# res=preds.cpu().numpy()
# print(res.shape)
res1 = preds1.cpu().numpy()
# print(res1.shape)
res2 = preds2.cpu().numpy()
# print(res2.shape)

#%%

# 先將結果合併起來
res = np.hstack((res1, res2))
print(res.shape)
print(res)
#%%

# 將結果preds寫入.csv文件
imageID=list(range(1, 28001))
result = {'ImageId':imageID, 
          'Label':res}
dt = pd.DataFrame(result)
dt.head(5)
dt.to_csv('./digit-recognizer/submission2nd.csv')


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章