ResNext WSL
作者:沈福利 北京工業大學碩士學位,高級算法專家。產品和技術負責人,專注於NLP、圖像、推薦系統
Author: Facebook AI
ResNext models trained with billion scale weakly-supervised data.
圖1:使用不同規模和參數配置的ResNeXt-101模型在ImageNet和Instagram標記數據集的分類性能的比較
何愷明團隊新作ResNext:Instagram圖片預訓練,挑戰ImageNet新精度
8億參數,刷新ImageNet紀錄:何愷明團隊開源最強ResNeXt預訓練模型
resnext101_32x{4,8,16,32,48}d_wsl,其中wsl是弱監督學習。用Instagram上面的9.4億張圖做了 (弱監督) 預訓練,用ImageNet做了微調。
ImageNet測試中,它的 (32×48d) 分類準確率達到85.4% (Top-1) ,打破了從前的紀錄。
導入庫
# 導入torch 庫
import torch
import torch.nn as nn
from torchvision import transforms
# 導入 經調整後 facebookresearch_WSL_resnext 模型
## 'resnext50_32x4d', 'resnext101_32x8d', 'resnext101_32x16d_wsl'
import models
加載模型
# 加載模型,設置僅預測模式
model_ft = models.resnext101_32x16d_wsl()
r = model_ft.eval()
# 模型1000類
model_ft.fc
Linear(in_features=2048, out_features=1000, bias=True)
加載圖片數據
All pre-trained models expect input images normalized in the same way,
i.e. mini-batches of 3-channel RGB images of shape (3 x H x W)
, where H
and W
are expected to be at least 224
.
The images have to be loaded in to a range of [0, 1]
and then normalized using mean = [0.485, 0.456, 0.406]
and std = [0.229, 0.224, 0.225]
.
Here’s a sample execution.
# sample execution (requires torchvision)
file_name ='images/yindu.jpg'
from PIL import Image
input_image = Image.open(file_name)
print(input_image)
print(input_image.size) # 尺寸大小:長=1546,寬1213
# 數據處理後,我們看看處理後圖片
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用來正常顯示中文標籤
plt.rcParams['axes.unicode_minus'] = False # 用來正常顯示符號
plt.imshow(input_image)
圖像歸一化是計算機視覺、模式識別等領域廣泛使用的一種技術。所謂圖像歸一化, 就是通過一系列變換, 將待處理的原始圖像轉換成相應的唯一標準形式(該標準形式圖像對平移、旋轉、縮放等仿射變換具有不變特性)
基於矩的圖像歸一化過程包括 4 個步驟 即座標中心化、x-shearing 歸一化、縮放歸一化和旋轉歸一化。
圖片數據預處理
preprocess = transforms.Compose([
# 1. 圖像變換:重置圖像分辨率,圖片縮放256 * 256
transforms.Resize(256),
# 2. 裁剪: 中心裁剪 ,依據給定的size從中心裁剪
transforms.CenterCrop(224),
# 3. 將PIL Image或者 ndarray 轉換爲tensor,並且歸一化至[0-1].注意事項:歸一化至[0-1]是直接除以255
transforms.ToTensor(),
# 4. 對數據按通道進行標準化,即先減均值,再除以標準差
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),#圖片歸一化
])
input_tensor = preprocess(input_image)
print('input_tensor.shape = ',input_tensor.shape)
print('input_tensor = ',input_tensor)
input_tensor.shape = torch.Size([3, 224, 224])
input_tensor = tensor([[[ 0.8104, 0.9646, 1.0502, ..., 0.3994, 0.4166, 0.4337],
[ 0.7591, 0.9646, 1.0502, ..., 0.4337, 0.4337, 0.4508],
[ 0.7248, 0.9474, 1.0673, ..., 0.4166, 0.4337, 0.4337],
...,
[ 2.1119, 2.1290, 2.1290, ..., -0.2513, -0.2513, -0.3198],
[ 2.0948, 2.1119, 2.0948, ..., -0.2513, -0.1828, -0.2171],
[ 2.0777, 2.0948, 2.0948, ..., -0.3198, -0.1486, -0.1828]],
[[ 1.5357, 1.6933, 1.7808, ..., 1.0980, 1.1155, 1.1331],
[ 1.5007, 1.6758, 1.7808, ..., 1.1506, 1.1506, 1.1681],
[ 1.4307, 1.6583, 1.7633, ..., 1.1681, 1.1856, 1.1856],
...,
[ 2.4111, 2.4111, 2.4286, ..., 0.0126, 0.0126, -0.0574],
[ 2.4286, 2.4286, 2.4286, ..., 0.0126, 0.0826, 0.0476],
[ 2.4286, 2.4286, 2.4286, ..., -0.0574, 0.1176, 0.0826]],
[[ 2.1171, 2.2566, 2.3437, ..., 1.7163, 1.7337, 1.7337],
[ 2.0648, 2.2566, 2.3437, ..., 1.7511, 1.7685, 1.7685],
[ 2.0125, 2.2391, 2.3263, ..., 1.7685, 1.7860, 1.7860],
...,
[ 2.6226, 2.6226, 2.6400, ..., 0.2696, 0.2696, 0.1999],
[ 2.6400, 2.6400, 2.6400, ..., 0.2696, 0.3393, 0.3045],
[ 2.6226, 2.6400, 2.6400, ..., 0.1999, 0.3742, 0.3393]]])
# 轉換模型需要數據格式
input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model
print('input_batch.shape = ',input_batch.shape)
print('input_batch = ',input_batch)
import matplotlib.pyplot as plt
%matplotlib inline
image_tmp = input_tensor.permute(1,2,0) #Changing from 3x224x224 to 224x224x3
print('image_tmp.matplotlib.shape = ',image_tmp.shape)
input_tensor = torch.clamp(input_tensor,0,1)
print('image_tmp.matplotlib.clamp.shape = ',image_tmp.shape)
plt.imshow(image_tmp)
input_batch.shape = torch.Size([1, 3, 224, 224])
input_batch = tensor([[[[ 0.8104, 0.9646, 1.0502, ..., 0.3994, 0.4166, 0.4337],
[ 0.7591, 0.9646, 1.0502, ..., 0.4337, 0.4337, 0.4508],
[ 0.7248, 0.9474, 1.0673, ..., 0.4166, 0.4337, 0.4337],
...,
[ 2.1119, 2.1290, 2.1290, ..., -0.2513, -0.2513, -0.3198],
[ 2.0948, 2.1119, 2.0948, ..., -0.2513, -0.1828, -0.2171],
[ 2.0777, 2.0948, 2.0948, ..., -0.3198, -0.1486, -0.1828]],
[[ 1.5357, 1.6933, 1.7808, ..., 1.0980, 1.1155, 1.1331],
[ 1.5007, 1.6758, 1.7808, ..., 1.1506, 1.1506, 1.1681],
[ 1.4307, 1.6583, 1.7633, ..., 1.1681, 1.1856, 1.1856],
...,
[ 2.4111, 2.4111, 2.4286, ..., 0.0126, 0.0126, -0.0574],
[ 2.4286, 2.4286, 2.4286, ..., 0.0126, 0.0826, 0.0476],
[ 2.4286, 2.4286, 2.4286, ..., -0.0574, 0.1176, 0.0826]],
[[ 2.1171, 2.2566, 2.3437, ..., 1.7163, 1.7337, 1.7337],
[ 2.0648, 2.2566, 2.3437, ..., 1.7511, 1.7685, 1.7685],
[ 2.0125, 2.2391, 2.3263, ..., 1.7685, 1.7860, 1.7860],
...,
[ 2.6226, 2.6226, 2.6400, ..., 0.2696, 0.2696, 0.1999],
[ 2.6400, 2.6400, 2.6400, ..., 0.2696, 0.3393, 0.3045],
[ 2.6226, 2.6400, 2.6400, ..., 0.1999, 0.3742, 0.3393]]]])
image_tmp.matplotlib.shape = torch.Size([224, 224, 3])
image_tmp.matplotlib.clamp.shape = torch.Size([224, 224, 3])
模型在線預測
# move the input and model to GPU for speed if available
if torch.cuda.is_available():
input_batch = input_batch.to('cuda')
model.to('cuda')
with torch.no_grad():
output = model_ft(input_batch)
# Tensor of shape 1000, with confidence scores over Imagenet's 1000 classes
print(output[0].shape)
torch.Size([1000])
# The output has unnormalized scores. To get probabilities, you can run a softmax on it.
result = torch.nn.functional.softmax(output[0], dim=0)
print(result.shape)
torch.Size([1000])
# 獲取預測結果標籤id,然後imagenet 標籤庫查看對應的標籤名稱
v_list = result.cpu().numpy().tolist()
v_max = 0
idx = 0
for i,v in enumerate(v_list):
if v>v_max:
v_max = v
idx = i
print('v_max = ',v_max)
print('idx = ',idx)
v_max = 0.3861195147037506
idx = 638
加載ImageNet 標籤,然後獲取結果
imagenet數據集類別標籤和對應的英文中文對照表:data/ImageNet1k_label.txt
import codecs
ImageNet_dict = {}
for line in codecs.open('data/ImageNet1k_label.txt','r',encoding='utf-8'):
line = line.strip()
_id = line.split(":")[0]
_name = line.split(":")[1]
ImageNet_dict[int(_id)] = _name.replace('\xa0',"")
ImageNet_dict[idx]
" 'maillot',"
maillot 中文的意思是:緊身衣;女子游泳衣;緊身體操衣.