ResNet50的mxnet實現

ResNet50的mxnet實現

具體網絡結構可參見https://blog.csdn.net/qq_21046135/article/details/81674605

和 https://blog.csdn.net/seven_year_promise/article/details/69360488

from mxnet.gluon import nn
from mxnet import nd, init, gluon, autograd
import mxnet as mx
import gluonbook as gb
from mxnet.gluon.data.vision import transforms


lr = 0.1
num_epochs = 100
batch_size = 128
ctx = mx.gpu()

tansformer = transforms.Compose([
    transforms.ToTensor()
])

# 加載數據
train_data = gluon.data.vision.ImageFolderDataset("/home/user/cf/st/train")
test_data = gluon.data.vision.ImageFolderDataset("/home/user/cf/st/test")

train_iter = gluon.data.DataLoader(train_data.transform_first(tansformer), shuffle=True, batch_size=batch_size)
test_iter = gluon.data.DataLoader(test_data.transform_first(tansformer),shuffle=True, batch_size=batch_size)

class Residual(nn.Block):
    def __init__(self, num_channels, use_1x1conv=False, strides=1, **kwargs):
        super(Residual, self).__init__(**kwargs)
        self.conv1 = nn.Conv2D(num_channels, kernel_size=1, strides=strides)
        self.conv2 = nn.Conv2D(num_channels, kernel_size=3, strides=1, padding=1)
        self.conv3 = nn.Conv2D(num_channels*4, kernel_size=1, strides=1)
        if use_1x1conv:
            self.conv4 = nn.Conv2D(num_channels*4, kernel_size=1, strides=strides)
            self.bn4 = nn.BatchNorm()
        else:
            self.conv4 = None
            self.bn4 = None
        self.bn1 = nn.BatchNorm()
        self.bn2 = nn.BatchNorm()
        self.bn3 = nn.BatchNorm()

    def forward(self, x):
        y = nd.relu(self.bn1(self.conv1(x)))
        y = nd.relu(self.bn2(self.conv2(y)))
        y = self.bn3(self.conv3(y))
        if self.conv4:
            x = self.bn4(self.conv4(x))
        return nd.relu(x+y)

def resnet_block(num_channels, num_residuals, stride=1):
    blk = nn.Sequential()
    for i in range(num_residuals):
        if i==0 :
            blk.add(Residual(num_channels, use_1x1conv=True, strides=stride))   
        else:
            blk.add(Residual(num_channels))
    return blk

net = nn.Sequential()
net.add(
    nn.Conv2D(64, kernel_size=7, strides=2, padding=3),
    nn.BatchNorm(),
    nn.Activation('relu'),
    nn.MaxPool2D(pool_size=3, strides=2)
)
net.add(
    resnet_block(64, 3, stride=1),
    resnet_block(128, 4, stride=2),
    resnet_block(256, 6, stride=2),
    resnet_block(512, 3, stride=2)
)
# 平均池化
net.add(nn.GlobalAvgPool2D(),
        nn.Dense(2))

net.initialize(init=init.Xavier(), force_reinit=True, ctx=ctx)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate':lr})
loss = gluon.loss.SoftmaxCrossEntropyLoss()
gb.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)

當self.bn4定義在self.bn3的後面時,會出現錯誤:mxnet.gluon.parameter.DeferredInitializationError: Parameter 'batchnorm8_gamma' has not been initialized yet because initialization was deferred. Actual initialization happens during the first forward pass. Please pass one batch of data through the network before accessing Parameters. You can also avoid deferred initialization by specifying in_units, num_features, etc., for network layers.

這是因爲定義在後面的bn4,當條件判斷爲false的時候,不會使用到self.bn4,會出現DeferredInitializationError的錯誤,所以需要同時定義兩種情況下的self.bn4

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章