Pytorch實例----DCGAN圖像生成

背景：

深度學習對數據量的需求明顯高於傳統機器學習方法，當真實的數據量較少或難以滿足實際網絡收斂情況下，需用通過數據增強方法生成更多可用於訓練的更多圖片，數據增強方法通過對圖像進行隨機剪裁，翻轉等變換，提升圖像輸入的豐富度，但數據增強本質上是在原圖上做“線性變化”，無法產生原圖中沒有的圖像。在這種條件下，GAN（Generative Adversarial Networks ）能夠在有限數據條件下生成更加豐富的數據資源，提升網絡訓練的有效性。

DCGAN：使用卷積神經網絡替代GAN 中的多層感知機（MLP）並對網絡做微調處理，顯著提升了圖像生成的質量。

論文鏈接：Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks

DCGAN設計技巧

所有的pooling層使用步幅卷積(判別網絡)和微步幅度卷積(生成網絡)進行替換；
在生成網絡和判別網絡上使用批處理規範化；
對於更深的架構移除全連接隱藏層；
在生成網絡的所有層上使用RelU激活函數，除了輸出層使用Tanh激活函數；
在判別網絡的所有層上使用LeakyReLU激活函數；

基於Pytorch的DCGAN數據增強方法：

生成器（Generator）：

class NetG(nn.Module):
    def __init__(self, ngf, nz):
        super(NetG, self).__init__()
        # layer1輸入的是一個100x1x1的隨機噪聲, 輸出尺寸(ngf*8)x4x4
        self.layer1 = nn.Sequential(
            nn.ConvTranspose2d(nz, ngf * 8, kernel_size=4, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(inplace=True)
        )
        # layer2輸出尺寸(ngf*4)x8x8
        self.layer2 = nn.Sequential(
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(inplace=True)
        )
        # layer3輸出尺寸(ngf*2)x16x16
        self.layer3 = nn.Sequential(
            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(inplace=True)
        )
        # layer4輸出尺寸(ngf)x32x32
        self.layer4 = nn.Sequential(
            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(inplace=True)
        )
        # layer5輸出尺寸 3x96x96
        self.layer5 = nn.Sequential(
            nn.ConvTranspose2d(ngf, 3, 5, 3, 1, bias=False),
            nn.Tanh()
        )

    # 定義NetG的前向傳播
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        return out

判別器（Discriminator）：（生成器的逆過程）

# 定義鑑別器網絡D
class NetD(nn.Module):
    def __init__(self, ndf):
        super(NetD, self).__init__()
        # layer1 輸入 3 x 96 x 96, 輸出 (ndf) x 32 x 32
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, ndf, kernel_size=5, stride=3, padding=1, bias=False),
            nn.BatchNorm2d(ndf),
            nn.LeakyReLU(0.2, inplace=True)
        )
        # layer2 輸出 (ndf*2) x 16 x 16
        self.layer2 = nn.Sequential(
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True)
        )
        # layer3 輸出 (ndf*4) x 8 x 8
        self.layer3 = nn.Sequential(
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True)
        )
        # layer4 輸出 (ndf*8) x 4 x 4
        self.layer4 = nn.Sequential(
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True)
        )
        # layer5 輸出一個數(概率)
        self.layer5 = nn.Sequential(
            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )

    # 定義NetD的前向傳播
    def forward(self,x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        return out

損失函數（Loss function）：

criterion = nn.BCELoss()
optimizerG = torch.optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
optimizerD = torch.optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
#每次訓練中
for epoch in range(1, opt.epoch + 1):
    for i, (imgs,_) in enumerate(dataloader):
        # 固定生成器G，訓練鑑別器D
        optimizerD.zero_grad()
        ## 讓D儘可能的把真圖片判別爲1
        imgs=imgs.to(device)
        output = netD(imgs)
        label.data.fill_(real_label)
        label=label.to(device)
        #判別圖像和標籤的loss
        errD_real = criterion(output, label)
        errD_real.backward()
        ## 讓D儘可能把假圖片判別爲0
        label.data.fill_(fake_label)
        noise = torch.randn(opt.batchSize, opt.nz, 1, 1)
        noise=noise.to(device)
        fake = netG(noise)  # 生成假圖
        output = netD(fake.detach()) #避免梯度傳到G，因爲G不用更新
        errD_fake = criterion(output, label)
        errD_fake.backward()
        errD = errD_fake + errD_real
        optimizerD.step()

        # 固定鑑別器D，訓練生成器G
        optimizerG.zero_grad()
        # 讓D儘可能把G生成的假圖判別爲1
        label.data.fill_(real_label)
        label = label.to(device)
        output = netD(fake)
        errG = criterion(output, label)
        errG.backward()
        optimizerG.step()

模型: model.py

import torch.nn as nn
# 定義生成器網絡G
class NetG(nn.Module):
    def __init__(self, ngf, nz):
        super(NetG, self).__init__()
        # layer1輸入的是一個100x1x1的隨機噪聲, 輸出尺寸(ngf*8)x4x4
        self.layer1 = nn.Sequential(
            nn.ConvTranspose2d(nz, ngf * 8, kernel_size=4, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(inplace=True)
        )
        # layer2輸出尺寸(ngf*4)x8x8
        self.layer2 = nn.Sequential(
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(inplace=True)
        )
        # layer3輸出尺寸(ngf*2)x16x16
        self.layer3 = nn.Sequential(
            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(inplace=True)
        )
        # layer4輸出尺寸(ngf)x32x32
        self.layer4 = nn.Sequential(
            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(inplace=True)
        )
        # layer5輸出尺寸 3x96x96
        self.layer5 = nn.Sequential(
            nn.ConvTranspose2d(ngf, 3, 5, 3, 1, bias=False),
            nn.Tanh()
        )

    # 定義NetG的前向傳播
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        return out


# 定義鑑別器網絡D
class NetD(nn.Module):
    def __init__(self, ndf):
        super(NetD, self).__init__()
        # layer1 輸入 3 x 96 x 96, 輸出 (ndf) x 32 x 32
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, ndf, kernel_size=5, stride=3, padding=1, bias=False),
            nn.BatchNorm2d(ndf),
            nn.LeakyReLU(0.2, inplace=True)
        )
        # layer2 輸出 (ndf*2) x 16 x 16
        self.layer2 = nn.Sequential(
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True)
        )
        # layer3 輸出 (ndf*4) x 8 x 8
        self.layer3 = nn.Sequential(
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True)
        )
        # layer4 輸出 (ndf*8) x 4 x 4
        self.layer4 = nn.Sequential(
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True)
        )
        # layer5 輸出一個數(概率)
        self.layer5 = nn.Sequential(
            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )

    # 定義NetD的前向傳播
    def forward(self,x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        return out

訓練：train.py

import os
import argparse
import torch
import torchvision
import torchvision.utils as vutils
from torchvision import transforms
from torchvision.transforms import ToPILImage
import torch.nn as nn
from random import randint
from model import NetD, NetG
from PIL import Image
from utils import tensor_to_PIL

parser = argparse.ArgumentParser()
parser.add_argument('--batchSize', type=int, default=32)
parser.add_argument('--imageSize', type=int, default=96)
parser.add_argument('--nz', type=int, default=100, help='size of the latent z vector')
parser.add_argument('--ngf', type=int, default=64)
parser.add_argument('--ndf', type=int, default=64)
parser.add_argument('--epoch', type=int, default=40000, help='number of epochs to train for')
parser.add_argument('--lr', type=float, default=0.0002, help='learning rate, default=0.0002')
parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for adam. default=0.5')
parser.add_argument('--data_path', default='data/', help='folder to train data')
parser.add_argument('--outf', default='imgs/', help='folder to output images and model checkpoints')
opt = parser.parse_args()
# 定義是否使用GPU
device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")

#圖像讀入與預處理
transforms = torchvision.transforms.Compose([
    torchvision.transforms.Scale(opt.imageSize),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])

dataset = torchvision.datasets.ImageFolder(opt.data_path, transform=transforms)

dataloader = torch.utils.data.DataLoader(
    dataset=dataset,
    batch_size=opt.batchSize,
    shuffle=True,
    drop_last=True,
)

netG = NetG(opt.ngf, opt.nz).to(device)
netD = NetD(opt.ndf).to(device)

criterion = nn.BCELoss()
optimizerG = torch.optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
optimizerD = torch.optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))

label = torch.FloatTensor(opt.batchSize)
real_label = 1
fake_label = 0
save_path = './imgs/epoch{:s}'
j = 0
for epoch in range(1, opt.epoch + 1):
    for i, (imgs,_) in enumerate(dataloader):
        # 固定生成器G，訓練鑑別器D
        optimizerD.zero_grad()
        ## 讓D儘可能的把真圖片判別爲1
        imgs=imgs.to(device)
        output = netD(imgs)
        label.data.fill_(real_label)
        label=label.to(device)
        errD_real = criterion(output, label)
        errD_real.backward()
        ## 讓D儘可能把假圖片判別爲0
        label.data.fill_(fake_label)
        noise = torch.randn(opt.batchSize, opt.nz, 1, 1)
        noise=noise.to(device)
        fake = netG(noise)  # 生成假圖
        output = netD(fake.detach()) #避免梯度傳到G，因爲G不用更新
        errD_fake = criterion(output, label)
        errD_fake.backward()
        errD = errD_fake + errD_real
        optimizerD.step()

        # 固定鑑別器D，訓練生成器G
        optimizerG.zero_grad()
        # 讓D儘可能把G生成的假圖判別爲1
        label.data.fill_(real_label)
        label = label.to(device)
        output = netD(fake)
        errG = criterion(output, label)
        errG.backward()
        optimizerG.step()

        print('[%d/%d][%d/%d] Loss_D: %.3f Loss_G %.3f'
              % (epoch, opt.epoch, i, len(dataloader), errD.item(), errG.item()))
    if epoch % 1000 == 0:
        os.mkdir(save_path.format(str(j)))
        for i in range(len(fake.data)):
            #im = tensor_to_PIL(fake.data[i])
            #im = im.convert('RGB')
            #im.save(os.path.join(save_path.format(str(j)),str(i)+'.png'))
            vutils.save_image(fake.data[i],
                            '%s/%d.png' % (save_path.format(str(j)), i),
                            normalize=True)
        torch.save(netG.state_dict(), '%s/netG_%03d.pth' % (opt.outf, epoch))
        torch.save(netD.state_dict(), '%s/netD_%03d.pth' % (opt.outf, epoch))
        j = j+1

參考博客：Pytorch版DCGAN圖像生成技術

qq_37172182

發佈了97 篇原創文章 · 獲贊 14 · 訪問量 1萬+

私信關注

Pytorch實例----DCGAN圖像生成

爲什麼要⽤ Foundry

【筆記】動手學深度學習-預備知識

py發送email

MySQL 分庫分表方案，總結太全了。。

Qt/C++音視頻開發71-指定mjpeg/h264格式採集本地攝像頭/存儲文件到mp4/設備推流/採集推流

WPF開源輕便、快速的桌面啓動器

公司來了個新同事，把 DDD 運用得爐火純青！

Pytorch實例----NLP之文本分類

統計學習方法——第1章統計學習方法概論

統計學習方法——第2章感知機模型

Pytorch學習基礎——LSTM從訓練到測試

統計學習方法——第2章感知機模型編程實現

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結