YOLO v3配置文件全解析

# [net]爲特殊的層,配置整個網絡
[net]

# 測試模式,每次測試1/1=1張
# Testing
# batch=1
# subdivisions=1

# 訓練模式 每次前向圖片的數目=batch/subdivisions=16/1=16
# Training
# batch越大,訓練效果越好,subdivision越大,佔用內存壓力越小
batch=16
subdivisions=1

# 網絡輸入的寬、高、通道數這三個參數中,要求width==height, 並且爲32的倍數,大分辨率可以檢測到更加細小的物體,從而影響precision
width=416
height=416
channels=3

# 動量,影響梯度下降到最優的速度,一般默認0.9
momentum=0.9
# 權重衰減、正則項的係數,防止過擬合
decay=0.0005

# 旋轉角度,從而生成更多訓練樣本
angle=0
# 調整飽和度,從而生成更多訓練樣本
saturation = 1.5
# 調整曝光度,從而生成更多訓練樣本
exposure = 1.5
# 調整色調,從而生成更多訓練樣本
hue=.1

# 學習率決定了權值更新的速度,學習率大,更新的就快,但太快容易越過最優值,而學習率太小又更新的慢,效率低,一般學習率隨着訓練的進行不斷更改,先高一點,然後慢慢降低,一般在0.01--0.001
learning_rate=0.001
# 學習率控制的參數,在迭代次數小於burn_in時,其學習率的更新有一種方式,大於burn_in時,才採用policy的更新方式
burn_in=1000
# 迭代次數,1000次以內,每訓練100次保存一次權重,1000次以上,每訓練10000次保存一次權重
max_batches = 500200
# 學習率策略,學習率下降的方式
policy=steps
# 學習率變動步長
steps=400000,450000
# 學習率變動因子:如迭代到40000次時,學習率衰減十倍,45000次迭代時,學習率又會在前一個學習率的基礎上衰減十倍
scales=.1,.1

[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
# 如果pad爲0,padding由padding參數指定;如果pad爲1,padding大小爲size/2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=64
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=32
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky

[shortcut]
# 與前面的多少次進行融合,-3表示前面第三層
from=-3
# 層次激活函數包括,logistic, loggy, relu, elu, relie, plse, hardtan, lhtan, linear, ramp, leaky, tanh, stair
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=128
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=256
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=512
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

######################
# 在第79層之後經過幾個卷積操作得到的是1/32 (13*13) 的預測結果,下采樣倍數高,這裏特徵圖的感受野比較大,因此適合檢測圖像中尺寸比較大的對象。
# 然後這個結果通過上採樣與第61層的結果進行concat,再經過幾個卷積操作得到1/16的預測結果;它具有中等尺度的感受野,適合檢測中等尺度的對象。
# 91層的結果經過上採樣之後在於第36層的結果進行concat,經過幾個卷積操作之後得到的是1/8的結果,它的感受野最小,適合檢測小尺寸的對象。

# YOLO2已經開始採用K-means聚類得到先驗框的尺寸,YOLO3延續了這種方法,爲每種下采樣尺度設定3種先驗框,總共聚類出9種尺寸的先驗框。
# 在COCO數據集這9個先驗框是:(10x13),(16x30),(33x23),(30x61),(62x45),(59x119),(116x90),(156x198),(373x326)。
# 分配上,在最小的13*13特徵圖上(有最大的感受野)應用較大的先驗框(116x90),(156x198),(373x326),適合檢測較大的對象。中等的26*26
# 特徵圖上(中等感受野)應用中等的先驗框(30x61),(62x45),(59x119),適合檢測中等大小的對象。較大的52*52特徵圖上(較小的感受野)應用
# 較小的先驗框(10x13),(16x30),(33x23),適合檢測較小的對象。

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear


[yolo]
mask = 6,7,8
anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
classes=80
num=9
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1


[route]
layers = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[upsample]
stride=2

[route]
layers = -1, 61

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky

[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear


[yolo]
mask = 3,4,5
anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
classes=80
num=9
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1

[route]
layers = -4

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[upsample]
stride=2

[route]
layers = -1, 36

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky

[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear


[yolo]
# 使用anchor時使用前三個尺寸,每個yolo層實際上只預測3個由mask定義的anchors
mask = 0,1,2
# anchors是可以事先通過cmd指令計算出來的,是和圖片數量,width,height以及cluster(就是下面的num的值,即想要使用的anchors的數量)相關的預選框,可以手工挑選,也可以通過k-means算法從訓練樣本中學出
anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
classes=80
# 每個grid cell預測幾個box,和anchors的數量一致。當想要使用更多anchors時需要調大num,且如果調大num後訓練時Obj趨近0的話可以嘗試調大object_scale
num=9
# 通過抖動來防止過擬合,jitter就是crop的參數
jitter=.3
# ignore_thresh 指得是參與計算的IOU閾值大小。
# 當預測的檢測框與ground true的IOU大於ignore_thresh的時候,參與loss的計算,否則,檢測框的不參與損失計算,目的是控制參與loss計算的檢測框的規模,當ignore_thresh過於大,接近於1的時候,那麼參與檢測框迴歸loss的個數就會比較少,同時也容易造成過擬合;而如果ignore_thresh設置的過於小,那麼參與計算的會數量規模就會很大。同時也容易在進行檢測框迴歸的時候造成欠擬合。
# 參數設置:一般選取0.5-0.7之間的一個值,之前的計算基礎都是小尺度(13*13)用的是0.7,(26*26)用的是0.5。這次先將0.5更改爲0.7
ignore_thresh = .7
truth_thresh = 1
# 如果顯存小,設置爲0,關閉多尺度訓練,random設置成1,可以增加檢測精度precision,每次迭代圖片大小隨機從320到608,步長爲32,如果爲0,每次訓練大小與輸入大小一致
random=1

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章