ResNext WSL

作者：沈福利北京工業大學碩士學位，高級算法專家。產品和技術負責人，專注於NLP、圖像、推薦系統
Author: Facebook AI

ResNext models trained with billion scale weakly-supervised data.

圖1：使用不同規模和參數配置的ResNeXt-101模型在ImageNet和Instagram標記數據集的分類性能的比較

何愷明團隊新作ResNext：Instagram圖片預訓練，挑戰ImageNet新精度

8億參數，刷新ImageNet紀錄：何愷明團隊開源最強ResNeXt預訓練模型

resnext101_32x{4,8,16,32,48}d_wsl，其中wsl是弱監督學習。用Instagram上面的9.4億張圖做了 (弱監督) 預訓練，用ImageNet做了微調。

ImageNet測試中，它的 (32×48d) 分類準確率達到85.4% (Top-1) ，打破了從前的紀錄。

導入庫

# 導入torch 庫
import torch
import torch.nn as nn
from torchvision import transforms
# 導入 經調整後 facebookresearch_WSL_resnext 模型
## 'resnext50_32x4d', 'resnext101_32x8d', 'resnext101_32x16d_wsl'
import models

加載模型

# 加載模型，設置僅預測模式
model_ft = models.resnext101_32x16d_wsl()
r = model_ft.eval()

# 模型1000類
model_ft.fc

Linear(in_features=2048, out_features=1000, bias=True)

加載圖片數據

All pre-trained models expect input images normalized in the same way,
i.e. mini-batches of 3-channel RGB images of shape (3 x H x W), where H and W are expected to be at least 224.
The images have to be loaded in to a range of [0, 1] and then normalized using mean = [0.485, 0.456, 0.406]
and std = [0.229, 0.224, 0.225].

Here’s a sample execution.

# sample execution (requires torchvision)
file_name ='images/yindu.jpg'
from PIL import Image
input_image = Image.open(file_name)
print(input_image)
print(input_image.size) # 尺寸大小：長=1546，寬1213

# 數據處理後，我們看看處理後圖片
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用來正常顯示中文標籤
plt.rcParams['axes.unicode_minus'] = False  # 用來正常顯示符號
plt.imshow(input_image)

圖像歸一化是計算機視覺、模式識別等領域廣泛使用的一種技術。所謂圖像歸一化, 就是通過一系列變換, 將待處理的原始圖像轉換成相應的唯一標準形式(該標準形式圖像對平移、旋轉、縮放等仿射變換具有不變特性)

基於矩的圖像歸一化過程包括 4 個步驟即座標中心化、x-shearing 歸一化、縮放歸一化和旋轉歸一化。

圖片數據預處理

preprocess = transforms.Compose([
    # 1. 圖像變換:重置圖像分辨率,圖片縮放256 * 256 
    transforms.Resize(256),
    # 2. 裁剪: 中心裁剪 ,依據給定的size從中心裁剪
    transforms.CenterCrop(224),
    # 3. 將PIL Image或者 ndarray 轉換爲tensor，並且歸一化至[0-1].注意事項：歸一化至[0-1]是直接除以255
    transforms.ToTensor(),
    # 4. 對數據按通道進行標準化，即先減均值，再除以標準差
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),#圖片歸一化
])
input_tensor = preprocess(input_image)
print('input_tensor.shape = ',input_tensor.shape)
print('input_tensor = ',input_tensor)

input_tensor.shape =  torch.Size([3, 224, 224])
input_tensor =  tensor([[[ 0.8104,  0.9646,  1.0502,  ...,  0.3994,  0.4166,  0.4337],
         [ 0.7591,  0.9646,  1.0502,  ...,  0.4337,  0.4337,  0.4508],
         [ 0.7248,  0.9474,  1.0673,  ...,  0.4166,  0.4337,  0.4337],
         ...,
         [ 2.1119,  2.1290,  2.1290,  ..., -0.2513, -0.2513, -0.3198],
         [ 2.0948,  2.1119,  2.0948,  ..., -0.2513, -0.1828, -0.2171],
         [ 2.0777,  2.0948,  2.0948,  ..., -0.3198, -0.1486, -0.1828]],

        [[ 1.5357,  1.6933,  1.7808,  ...,  1.0980,  1.1155,  1.1331],
         [ 1.5007,  1.6758,  1.7808,  ...,  1.1506,  1.1506,  1.1681],
         [ 1.4307,  1.6583,  1.7633,  ...,  1.1681,  1.1856,  1.1856],
         ...,
         [ 2.4111,  2.4111,  2.4286,  ...,  0.0126,  0.0126, -0.0574],
         [ 2.4286,  2.4286,  2.4286,  ...,  0.0126,  0.0826,  0.0476],
         [ 2.4286,  2.4286,  2.4286,  ..., -0.0574,  0.1176,  0.0826]],

        [[ 2.1171,  2.2566,  2.3437,  ...,  1.7163,  1.7337,  1.7337],
         [ 2.0648,  2.2566,  2.3437,  ...,  1.7511,  1.7685,  1.7685],
         [ 2.0125,  2.2391,  2.3263,  ...,  1.7685,  1.7860,  1.7860],
         ...,
         [ 2.6226,  2.6226,  2.6400,  ...,  0.2696,  0.2696,  0.1999],
         [ 2.6400,  2.6400,  2.6400,  ...,  0.2696,  0.3393,  0.3045],
         [ 2.6226,  2.6400,  2.6400,  ...,  0.1999,  0.3742,  0.3393]]])

# 轉換模型需要數據格式
input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model
print('input_batch.shape = ',input_batch.shape)
print('input_batch = ',input_batch)

import matplotlib.pyplot as plt
%matplotlib inline
image_tmp = input_tensor.permute(1,2,0) #Changing from 3x224x224 to 224x224x3
print('image_tmp.matplotlib.shape = ',image_tmp.shape)
input_tensor = torch.clamp(input_tensor,0,1)
print('image_tmp.matplotlib.clamp.shape = ',image_tmp.shape)
plt.imshow(image_tmp)

input_batch.shape =  torch.Size([1, 3, 224, 224])
input_batch =  tensor([[[[ 0.8104,  0.9646,  1.0502,  ...,  0.3994,  0.4166,  0.4337],
          [ 0.7591,  0.9646,  1.0502,  ...,  0.4337,  0.4337,  0.4508],
          [ 0.7248,  0.9474,  1.0673,  ...,  0.4166,  0.4337,  0.4337],
          ...,
          [ 2.1119,  2.1290,  2.1290,  ..., -0.2513, -0.2513, -0.3198],
          [ 2.0948,  2.1119,  2.0948,  ..., -0.2513, -0.1828, -0.2171],
          [ 2.0777,  2.0948,  2.0948,  ..., -0.3198, -0.1486, -0.1828]],

         [[ 1.5357,  1.6933,  1.7808,  ...,  1.0980,  1.1155,  1.1331],
          [ 1.5007,  1.6758,  1.7808,  ...,  1.1506,  1.1506,  1.1681],
          [ 1.4307,  1.6583,  1.7633,  ...,  1.1681,  1.1856,  1.1856],
          ...,
          [ 2.4111,  2.4111,  2.4286,  ...,  0.0126,  0.0126, -0.0574],
          [ 2.4286,  2.4286,  2.4286,  ...,  0.0126,  0.0826,  0.0476],
          [ 2.4286,  2.4286,  2.4286,  ..., -0.0574,  0.1176,  0.0826]],

         [[ 2.1171,  2.2566,  2.3437,  ...,  1.7163,  1.7337,  1.7337],
          [ 2.0648,  2.2566,  2.3437,  ...,  1.7511,  1.7685,  1.7685],
          [ 2.0125,  2.2391,  2.3263,  ...,  1.7685,  1.7860,  1.7860],
          ...,
          [ 2.6226,  2.6226,  2.6400,  ...,  0.2696,  0.2696,  0.1999],
          [ 2.6400,  2.6400,  2.6400,  ...,  0.2696,  0.3393,  0.3045],
          [ 2.6226,  2.6400,  2.6400,  ...,  0.1999,  0.3742,  0.3393]]]])
image_tmp.matplotlib.shape =  torch.Size([224, 224, 3])
image_tmp.matplotlib.clamp.shape =  torch.Size([224, 224, 3])

模型在線預測

# move the input and model to GPU for speed if available
if torch.cuda.is_available():
    input_batch = input_batch.to('cuda')
    model.to('cuda')

with torch.no_grad():
    output = model_ft(input_batch)

# Tensor of shape 1000, with confidence scores over Imagenet's 1000 classes
print(output[0].shape)

torch.Size([1000])

# The output has unnormalized scores. To get probabilities, you can run a softmax on it.
result = torch.nn.functional.softmax(output[0], dim=0)
print(result.shape)

torch.Size([1000])

# 獲取預測結果標籤id，然後imagenet 標籤庫查看對應的標籤名稱
v_list = result.cpu().numpy().tolist()

v_max = 0
idx = 0
for i,v in enumerate(v_list):
    if v>v_max:
        v_max = v
        idx = i

print('v_max = ',v_max)
print('idx = ',idx)

v_max =  0.3861195147037506
idx =  638

加載ImageNet 標籤，然後獲取結果

imagenet數據集類別標籤和對應的英文中文對照表：data/ImageNet1k_label.txt

import codecs

ImageNet_dict = {}
for line in codecs.open('data/ImageNet1k_label.txt','r',encoding='utf-8'):
    line = line.strip()
    _id = line.split(":")[0]
    _name = line.split(":")[1]
    ImageNet_dict[int(_id)] = _name.replace('\xa0',"")

ImageNet_dict[idx]