pytorch實現簡單卷積神經網絡（CNN）網絡完成手寫數字識別

首先你需要安裝torch，torchvision，然後使用torchvision來下載mnist數據集，如果下載數據集有什麼問題，請查看PyTorch用最簡單的多層感知機（深度神經網絡）實現手寫數字識別和使用torchvision下載外網數據集mnist沒有進度的解決方案

本文假設你對CNN基本原理有一定的瞭解。

我首先建立了一個tools工具類，這樣主模塊的代碼可以少一些了。我把它命名成tools.py：


import torch
import torchvision
from torch.utils.data import DataLoader

def get_trans():
    # 設置一個轉換的集合，先把數據轉換到tensor，再歸一化爲均值.5，標準差.5的正態分佈
    trans = torchvision.transforms.Compose(
        [
            torchvision.transforms.ToTensor(),  # ToTensor方法把[0,255]變成[0,1]
            torchvision.transforms.Normalize( [0.5], [0.5] )
            # 變成mean(均值)=.5，std（標準差standard deviation）=.5的分佈
        ]
    )
    return trans

DOWNLOAD_MNIST=False
train_data = torchvision.datasets.MNIST( root="./mnist",  # 設置數據集的根目錄
    train=True,  # 是否是訓練集
    transform=get_trans(),  # 對數據進行轉換
    download=DOWNLOAD_MNIST
                                         )
test_data = torchvision.datasets.MNIST( root="./mnist", train=False,  # 測試集，所以false
    transform=get_trans(), download=DOWNLOAD_MNIST
                                        )
def get_trainLoader(BATCH_SIZE):
    # 第二個參數是數據分塊之後每一個塊的大小，第三個參數是是否大亂數據
    train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
    return train_loader

def get_testLoader(BATCH_SIZE):
    test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)
    return test_loader

def get_cuda_available():
    available=torch.cuda.is_available()
    return available

def get_test_data_len():
    return len(test_data)

然後是主模塊，首先引入必要的模塊：

import torch.nn as nn
import torch
import time
import tools

搭建CNN網絡：

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1=nn.Sequential(
            nn.Conv2d(          #(1,28,28)
                in_channels=1,
                out_channels=16,
                kernel_size=5,
                stride=1,
                padding=2   #padding=(kernelsize-stride)/2
            ),#(16,28,28)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)#(16,14,14)

        )
        self.conv2=nn.Sequential(#(16,14,14)
            nn.Conv2d(16,32,5,1,2),#(32,14,14)
            nn.ReLU(),#(32,14,14)
            nn.MaxPool2d(2)#(32,7,7)
        )
        self.out=nn.Linear(32*7*7,10)
    #定義前向傳播過程，過程名字不可更改，因爲這是重寫父類的方法
    def forward(self,x):
        x = self.conv1( x )
        x = self.conv2( x ) #(batch,32,7,7)
        x=x.view(x.size(0),-1) #(batch,32*7*7)
        output=self.out(x)
        return output

輸出CNN的結構如下：

CNN(
(conv1): Sequential(
(0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(1): ReLU()
(2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(conv2): Sequential(
(0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(1): ReLU()
(2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(out): Linear(in_features=1568, out_features=10, bias=True)
)

定義常量：

EPOCH=50#總的訓練次數
BATCH_SIZE=20#批次的大小
LR=0.03#學習率#交叉熵損失函數不需要太大的學習率
DOWNLOAD_MNIST=False#運行代碼的時候是否下載數據集

定義損失函數和優化器、加載數據、更改網絡到gpu上運行（如果GPU可用的話）：

cnn=CNN()
cuda_available=tools.get_cuda_available()
if cuda_available==True:
    cnn=cnn.cuda()
    
optimizer=torch.optim.Adam(cnn.parameters(),lr=LR)
loss_function=nn.CrossEntropyLoss()

train_loader=tools.get_trainLoader(BATCH_SIZE)
test_loader=tools.get_testLoader(BATCH_SIZE*10)

最後是訓練和測試：

for ep in range(EPOCH):
    # 記錄把所有數據集訓練+測試一遍需要多長時間
    startTick = time.clock()
    for data in train_loader:  # 對於訓練集的每一個batch
        img, label = data
        if cuda_available:
            img = img.cuda()
            label = label.cuda()

        out = cnn( img )  # 送進網絡進行輸出
        loss = loss_function( out, label )  # 獲得損失

        optimizer.zero_grad()  # 梯度歸零
        loss.backward()  # 反向傳播獲得梯度，但是參數還沒有更新
        optimizer.step()  # 更新梯度

    num_correct = 0  # 正確分類的個數，在測試集中測試準確率
    for data in test_loader:
        img, label = data
        if cuda_available:
            img = img.cuda()
            label = label.cuda()

        out = cnn( img )  # 獲得輸出

        _, prediction = torch.max( out, 1 )
        # torch.max()返回兩個結果，
        # 第一個是最大值，第二個是對應的索引值；
        # 第二個參數 0 代表按列取最大值並返回對應的行索引值，1 代表按行取最大值並返回對應的列索引值。
        num_correct += (prediction == label).sum()  # 找出預測和真實值相同的數量，也就是以預測正確的數量

    accuracy = num_correct.cpu().numpy() / tools.get_test_data_len()  # 計算正確率，num_correct是gpu上的變量，先轉換成cpu變量
    timeSpan = time.clock() - startTick
    print( "第%d迭代期，準確率爲%f,耗時%dS" % (ep + 1, accuracy, timeSpan) )

整個模塊的代碼：

import torch.nn as nn
import torch
import time
import tools

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1=nn.Sequential(
            nn.Conv2d(          #(1,28,28)
                in_channels=1,
                out_channels=16,
                kernel_size=5,
                stride=1,
                padding=2   #padding=(kernelsize-stride)/2
            ),#(16,28,28)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)#(16,14,14)

        )
        self.conv2=nn.Sequential(#(16,14,14)
            nn.Conv2d(16,32,5,1,2),#(32,14,14)
            nn.ReLU(),#(32,14,14)
            nn.MaxPool2d(2)#(32,7,7)
        )
        self.out=nn.Linear(32*7*7,10)
    def forward(self,x):
        x = self.conv1( x )
        x = self.conv2( x ) #(batch,32,7,7)
        x=x.view(x.size(0),-1) #(batch,32*7*7)
        output=self.out(x)
        return output
print("start")
EPOCH=50#總的訓練次數
BATCH_SIZE=20#批次的大小
LR=0.03#學習率#交叉熵損失函數不需要太大的學習率
DOWNLOAD_MNIST=False#運行代碼的時候是否下載數據集


cnn=CNN()
cuda_available=tools.get_cuda_available()
if cuda_available==True:
    cnn=cnn.cuda()

optimizer=torch.optim.Adam(cnn.parameters(),lr=LR)
loss_function=nn.CrossEntropyLoss()

train_loader=tools.get_trainLoader(BATCH_SIZE)
test_loader=tools.get_testLoader(BATCH_SIZE*10)



#訓練過程
for ep in range(EPOCH):
    # 記錄把所有數據集訓練+測試一遍需要多長時間
    startTick = time.clock()
    for data in train_loader:  # 對於訓練集的每一個batch
        img, label = data
        if cuda_available:
            img = img.cuda()
            label = label.cuda()

        out = cnn( img )  # 送進網絡進行輸出
        loss = loss_function( out, label )  # 獲得損失

        optimizer.zero_grad()  # 梯度歸零
        loss.backward()  # 反向傳播獲得梯度，但是參數還沒有更新
        optimizer.step()  # 更新梯度

    num_correct = 0  # 正確分類的個數，在測試集中測試準確率
    for data in test_loader:
        img, label = data
        if cuda_available:
            img = img.cuda()
            label = label.cuda()

        out = cnn( img )  # 獲得輸出

        _, prediction = torch.max( out, 1 )
        # torch.max()返回兩個結果，
        # 第一個是最大值，第二個是對應的索引值；
        # 第二個參數 0 代表按列取最大值並返回對應的行索引值，1 代表按行取最大值並返回對應的列索引值。
        num_correct += (prediction == label).sum()  # 找出預測和真實值相同的數量，也就是以預測正確的數量

    accuracy = num_correct.cpu().numpy() / tools.get_test_data_len()  # 計算正確率，num_correct是gpu上的變量，先轉換成cpu變量
    timeSpan = time.clock() - startTick
    print( "第%d迭代期，準確率爲%f,耗時%dS" % (ep + 1, accuracy, timeSpan) )

pytorch實現簡單卷積神經網絡（CNN）網絡完成手寫數字識別

《Python進階》學習筆記

Leetcode 3161. 物塊放置查詢

leetcode 60 排列序列

一個docker容器暴露多個端口

微服務實踐之使用 Visual Studio 2022 調試Dapr 應用程序

wpf附加屬性理解 WPF附加屬性

Flask login頁面無限重定向的可能原因之一

flask多條件查詢並的簡單方式，

np.diff二維數組中使用append和prepend，

textarea的placeholder不起作用的可能原因

pyqt5點擊一次按文件瀏覽框會彈出兩次的可能原因

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結