基于Pytorch對YOLOV5 進行簡易實現(xiàn)
【GiantPandaCV導語】這篇文章主要針對于YOLOV5-Pytorch版本的網(wǎng)絡結(jié)構(gòu)代碼進行實現(xiàn),簡化代碼的理解并簡化配置文件,進一步梳理一些YOLOV5四種網(wǎng)絡結(jié)構(gòu),在這個過程中對于V5的網(wǎng)絡有著更加深入的理解。最后希望看完這篇文章的讀者可以有所收獲,對于代碼中的一些寫法上的優(yōu)化希望可以和大家一起交流進步。
一、網(wǎng)絡完整代碼
-
實現(xiàn)思路,v5中的common代碼結(jié)構(gòu)進行了保留,因為這一部分代碼是比較好理解的,整體代碼看起來是比較簡單的,主要是整體網(wǎng)絡結(jié)構(gòu)的搭建,通過解析yaml文件對于一些開發(fā)人員來說是不是很友好的。
-
網(wǎng)絡中的一些變量
c1:輸入通道 c2:輸出通道 k:卷積核大小 s:步長 p:padding g:分組 act;激活函數(shù) e:擴展倍數(shù)
gw:網(wǎng)絡寬度因子 gd:網(wǎng)絡深度因子 n:模塊重復次數(shù) nc:類別數(shù) -
主干網(wǎng)絡代碼
CSPDarknet53 -
1
import torch
import torch.nn as nn
def autopad(k, p=None): # kernel, padding
# Pad to 'same'
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p
class CBL(nn.Module):
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, e=1.0):
super(CBL, self).__init__()
c1 = round(c1 * e)
c2 = round(c2 * e)
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
def forward(self, x):
return self.act(self.bn(self.conv(x)))
class Focus(nn.Module):
def __init__(self, c1, c2, k=3, s=1, p=1, g=1, act=True, e=1.0):
super(Focus, self).__init__()
c2 = round(c2 * e)
self.conv = CBL(c1 * 4, c2, k, s, p, g, act)
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
flatten_channel = torch.cat([x[..., 0::2, 0::2],
x[..., 1::2, 0::2],
x[..., 0::2, 1::2],
x[..., 1::2, 1::2]], dim=1)
return self.conv(flatten_channel)
class SPP(nn.Module):
def __init__(self, c1, c2, k=(5, 9, 13), e=1.0):
super(SPP, self).__init__()
c1 = round(c1 * e)
c2 = round(c2 * e)
c_ = c1 // 2
self.cbl_before = CBL(c1, c_, 1, 1)
self.max_pool = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
self.cbl_after = CBL(c_ * 4, c2, 1, 1)
def forward(self, x):
x = self.cbl_before(x)
x_cat = torch.cat([x] + [m(x) for m in self.max_pool], 1)
return self.cbl_after(x_cat)
class ResUnit_n(nn.Module):
def __init__(self, c1, c2, n):
super(ResUnit_n, self).__init__()
self.shortcut = c1 == c2
res_unit = nn.Sequential(
CBL(c1, c1, k=1, s=1, p=0),
CBL(c1, c2, k=3, s=1, p=1)
)
self.res_unit_n = nn.Sequential(*[res_unit for _ in range(n)])
def forward(self, x):
return x + self.res_unit_n(x) if self.shortcut else self.res_unit_n(x)
class CSP1_n(nn.Module):
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, n=1, e=None):
super(CSP1_n, self).__init__()
c1 = round(c1 * e[1])
c2 = round(c2 * e[1])
n = round(n * e[0])
c_ = c2 // 2
self.up = nn.Sequential(
CBL(c1, c_, k, s, autopad(k, p), g, act),
ResUnit_n(c_, c_, n),
# nn.Conv2d(c_, c_, 1, 1, 0, bias=False) 這里最新yolov5結(jié)構(gòu)中去掉了,與網(wǎng)上的結(jié)構(gòu)圖稍微有些區(qū)別
)
self.bottom = nn.Conv2d(c1, c_, 1, 1, 0)
self.tie = nn.Sequential(
nn.BatchNorm2d(c_ * 2),
nn.LeakyReLU(),
nn.Conv2d(c_ * 2, c2, 1, 1, 0, bias=False)
)
def forward(self, x):
total = torch.cat([self.up(x), self.bottom(x)], dim=1)
out = self.tie(total)
return out
class CSPDarkNet(nn.Module):
def __init__(self, gd=0.33, gw=0.5):
super(CSPDarkNet, self).__init__()
self.truck_big = nn.Sequential(
Focus(3, 64, e=gw),
CBL(64, 128, k=3, s=2, p=1, e=gw),
CSP1_n(128, 128, n=3, e=[gd, gw]),
CBL(128, 256, k=3, s=2, p=1, e=gw),
CSP1_n(256, 256, n=9, e=[gd, gw]),
)
self.truck_middle = nn.Sequential(
CBL(256, 512, k=3, s=2, p=1, e=gw),
CSP1_n(512, 512, n=9, e=[gd, gw]),
)
self.truck_small = nn.Sequential(
CBL(512, 1024, k=3, s=2, p=1, e=gw),
SPP(1024, 1024, e=gw)
)
def forward(self, x):
h_big = self.truck_big(x) # torch.Size([2, 128, 76, 76])
h_middle = self.truck_middle(h_big)
h_small = self.truck_small(h_middle)
return h_big, h_middle, h_small
def darknet53(gd, gw, pretrained, **kwargs):
model = CSPDarkNet(gd, gw)
if pretrained:
if isinstance(pretrained, str):
model.load_state_dict(torch.load(pretrained))
else:
raise Exception(f"darknet request a pretrained path. got[{pretrained}]")
return model -
整體網(wǎng)絡的構(gòu)建
import torch
import torch.nn as nn
from cspdarknet53v5 import darknet53
def autopad(k, p=None): # kernel, padding
# Pad to 'same'
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p
class UpSample(nn.Module):
def __init__(self):
super(UpSample, self).__init__()
self.up_sample = nn.Upsample(scale_factor=2, mode='nearest')
def forward(self, x):
return self.up_sample(x)
class CBL(nn.Module):
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, e=1.0):
super(CBL, self).__init__()
c1 = round(c1 * e)
c2 = round(c2 * e)
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
def forward(self, x):
return self.act(self.bn(self.conv(x)))
class ResUnit_n(nn.Module):
def __init__(self, c1, c2, n):
super(ResUnit_n, self).__init__()
self.shortcut = c1 == c2
res_unit = nn.Sequential(
CBL(c1, c1, k=1, s=1, p=0),
CBL(c1, c2, k=3, s=1, p=1)
)
self.res_unit_n = nn.Sequential(*[res_unit for _ in range(n)])
def forward(self, x):
return x + self.res_unit_n(x) if self.shortcut else self.res_unit_n(x)
class CSP1_n(nn.Module):
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, n=1, e=None):
super(CSP1_n, self).__init__()
c1 = round(c1 * e[1])
c2 = round(c2 * e[1])
n = round(n * e[0])
c_ = c2 // 2
self.up = nn.Sequential(
CBL(c1, c_, k, s, autopad(k, p), g, act),
ResUnit_n(c_, c_, n),
# nn.Conv2d(c_, c_, 1, 1, 0, bias=False) 這里最新yolov5結(jié)構(gòu)中去掉了,與網(wǎng)上的結(jié)構(gòu)圖稍微有些區(qū)別
)
self.bottom = nn.Conv2d(c1, c_, 1, 1, 0)
self.tie = nn.Sequential(
nn.BatchNorm2d(c_ * 2),
nn.LeakyReLU(),
nn.Conv2d(c_ * 2, c2, 1, 1, 0, bias=False)
)
def forward(self, x):
total = torch.cat([self.up(x), self.bottom(x)], dim=1)
out = self.tie(total)
return out
class CSP2_n(nn.Module):
def __init__(self, c1, c2, e=0.5, n=1):
super(CSP2_n, self).__init__()
c_ = int(c1 * e)
cbl_2 = nn.Sequential(
CBL(c1, c_, 1, 1, 0),
CBL(c_, c_, 1, 1, 0),
)
self.cbl_2n = nn.Sequential(*[cbl_2 for _ in range(n)])
self.conv_up = nn.Conv2d(c_, c_, 1, 1, 0)
self.conv_bottom = nn.Conv2d(c1, c_, 1, 1, 0)
self.tie = nn.Sequential(
nn.BatchNorm2d(c_ * 2),
nn.LeakyReLU(),
nn.Conv2d(c_ * 2, c2, 1, 1, 0)
)
def forward(self, x):
up = self.conv_up(self.cbl_2n(x))
total = torch.cat([up, self.conv_bottom(x)], dim=1)
out = self.tie(total)
return out
class yolov5(nn.Module):
def __init__(self, nc=80, gd=0.33, gw=0.5):
super(yolov5, self).__init__()
# ------------------------------Backbone--------------------------------
self.backbone = darknet53(gd, gw, None)
# ------------------------------Neck------------------------------------
self.neck_small = nn.Sequential(
CSP1_n(1024, 1024, n=3, e=[gd, gw]),
CBL(1024, 512, 1, 1, 0, e=gw)
)
self.up_middle = nn.Sequential(
UpSample()
)
self.out_set_middle = nn.Sequential(
CSP1_n(1024, 512, n=3, e=[gd, gw]),
CBL(512, 256, 1, 1, 0, e=gw),
)
self.up_big = nn.Sequential(
UpSample()
)
self.out_set_tie_big = nn.Sequential(
CSP1_n(512, 256, n=3, e=[gd, gw])
)
self.pan_middle = nn.Sequential(
CBL(256, 256, 3, 2, 1, e=gw)
)
self.out_set_tie_middle = nn.Sequential(
CSP1_n(512, 512, n=3, e=[gd, gw])
)
self.pan_small = nn.Sequential(
CBL(512, 512, 3, 2, 1, e=gw)
)
self.out_set_tie_small = nn.Sequential(
CSP1_n(1024, 1024, n=3, e=[gd, gw])
)
# ------------------------------Prediction--------------------------------
# prediction
big_ = round(256 * gw)
middle = round(512 * gw)
small_ = round(1024 * gw)
self.out_big = nn.Sequential(
nn.Conv2d(big_, 3 * (5 + nc), 1, 1, 0)
)
self.out_middle = nn.Sequential(
nn.Conv2d(middle, 3 * (5 + nc), 1, 1, 0)
)
self.out_small = nn.Sequential(
nn.Conv2d(small_, 3 * (5 + nc), 1, 1, 0)
)
def forward(self, x):
h_big, h_middle, h_small = self.backbone(x)
neck_small = self.neck_small(h_small)
# ----------------------------up sample 38*38-------------------------------
up_middle = self.up_middle(neck_small)
middle_cat = torch.cat([up_middle, h_middle], dim=1)
out_set_middle = self.out_set_middle(middle_cat)
# ----------------------------up sample 76*76-------------------------------
up_big = self.up_big(out_set_middle) # torch.Size([2, 128, 76, 76])
big_cat = torch.cat([up_big, h_big], dim=1)
out_set_tie_big = self.out_set_tie_big(big_cat)
# ----------------------------PAN 36*36-------------------------------------
neck_tie_middle = torch.cat([self.pan_middle(out_set_tie_big), out_set_middle], dim=1)
up_middle = self.out_set_tie_middle(neck_tie_middle)
# ----------------------------PAN 18*18-------------------------------------
neck_tie_small = torch.cat([self.pan_small(up_middle), neck_small], dim=1)
out_set_small = self.out_set_tie_small(neck_tie_small)
# ----------------------------prediction-------------------------------------
out_small = self.out_small(out_set_small)
out_middle = self.out_middle(up_middle)
out_big = self.out_big(out_set_tie_big)
return out_small, out_middle, out_big
if __name__ == '__main__':
# 配置文件的寫法
config = {
# gd gw
'yolov5s': [0.33, 0.50],
'yolov5m': [0.67, 0.75],
'yolov5l': [1.00, 1.00],
'yolov5x': [1.33, 1.25]
}
# 修改一次文件名字
net_size = config['yolov5x']
net = yolov5(nc=80, gd=net_size[0], gw=net_size[1])
print(net)
a = torch.randn(2, 3, 416, 416)
y = net(a)
print(y[0].shape, y[1].shape, y[2].shape)
二、網(wǎng)絡結(jié)構(gòu)的解析
-
殘差塊ResUnit_n
class ResUnit_n(nn.Module):
def __init__(self, c1, c2, n):
super(ResUnit_n, self).__init__()
self.shortcut = c1 == c2
res_unit = nn.Sequential(
CBL(c1, c1, k=1, s=1, p=0),
CBL(c1, c2, k=3, s=1, p=1)
)
self.res_unit_n = nn.Sequential(*[res_unit for _ in range(n)])
def forward(self, x):
return x + self.res_unit_n(x) if self.shortcut else self.res_unit_n(x) -
CSP1_x結(jié)構(gòu)
構(gòu)建思路:CSP1_n 代碼進行優(yōu)化,把CSP看做一個趴著的動物,頭在左面,尾巴在右邊;up是靠近天空的地方,bottom是靠近地的,tie就是動物的尾巴
class CSP1_n(nn.Module):
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, n=1, e=None):
super(CSP1_n, self).__init__()
c1 = round(c1 * e[1])
c2 = round(c2 * e[1])
n = round(n * e[0])
c_ = c2 // 2
self.up = nn.Sequential(
CBL(c1, c_, k, s, autopad(k, p), g, act),
ResUnit_n(c_, c_, n),
# nn.Conv2d(c_, c_, 1, 1, 0, bias=False) 這里最新yolov5結(jié)構(gòu)中去掉了,與網(wǎng)上的結(jié)構(gòu)圖稍微有些區(qū)別
)
self.bottom = nn.Conv2d(c1, c_, 1, 1, 0)
self.tie = nn.Sequential(
nn.BatchNorm2d(c_ * 2),
nn.LeakyReLU(),
nn.Conv2d(c_ * 2, c2, 1, 1, 0, bias=False)
)
def forward(self, x):
total = torch.cat([self.up(x), self.bottom(x)], dim=1)
out = self.tie(total)
return out -
CSPDarknet主干網(wǎng)絡構(gòu)建
class CSPDarkNet(nn.Module):
def __init__(self, gd=0.33, gw=0.5):
super(CSPDarkNet, self).__init__()
self.truck_big = nn.Sequential(
Focus(3, 64, e=gw),
CBL(64, 128, k=3, s=2, p=1, e=gw),
CSP1_n(128, 128, n=3, e=[gd, gw]),
CBL(128, 256, k=3, s=2, p=1, e=gw),
CSP1_n(256, 256, n=9, e=[gd, gw]),
)
self.truck_middle = nn.Sequential(
CBL(256, 512, k=3, s=2, p=1, e=gw),
CSP1_n(512, 512, n=9, e=[gd, gw]),
)
self.truck_small = nn.Sequential(
CBL(512, 1024, k=3, s=2, p=1, e=gw),
SPP(1024, 1024, e=gw)
)
def forward(self, x):
h_big = self.truck_big(x)
h_middle = self.truck_middle(h_big)
h_small = self.truck_small(h_middle)
return h_big, h_middle, h_small -
整體網(wǎng)絡構(gòu)建
class yolov5(nn.Module):
def __init__(self, nc=80, gd=0.33, gw=0.5):
super(yolov5, self).__init__()
# ------------------------------Backbone------------------------------------
self.backbone = darknet53(gd, gw, None)
# ------------------------------Neck------------------------------------
self.neck_small = nn.Sequential(
CSP1_n(1024, 1024, n=3, e=[gd, gw]),
CBL(1024, 512, 1, 1, 0, e=gw)
)
# FPN:2次上采樣 自頂而下 完成語義信息增強
self.up_middle = nn.Sequential(
UpSample()
)
self.out_set_middle = nn.Sequential(
CSP1_n(1024, 512, n=3, e=[gd, gw]),
CBL(512, 256, 1, 1, 0, e=gw),
)
self.up_big = nn.Sequential(
UpSample()
)
self.out_set_tie_big = nn.Sequential(
CSP1_n(512, 256, n=3, e=[gd, gw])
)
# PAN:2次下采樣 自底而上 完成位置信息增強
self.pan_middle = nn.Sequential(
CBL(256, 256, 3, 2, 1, e=gw)
)
self.out_set_tie_middle = nn.Sequential(
CSP1_n(512, 512, n=3, e=[gd, gw])
)
self.pan_small = nn.Sequential(
CBL(512, 512, 3, 2, 1, e=gw)
)
self.out_set_tie_small = nn.Sequential(
# CSP2_n(512, 512)
CSP1_n(1024, 1024, n=3, e=[gd, gw])
)
# ------------------------------Prediction------------------------------------
# prediction
big_ = round(256 * gw)
middle = round(512 * gw)
small_ = round(1024 * gw)
self.out_big = nn.Sequential(
nn.Conv2d(big_, 3 * (5 + nc), 1, 1, 0)
)
self.out_middle = nn.Sequential(
nn.Conv2d(middle, 3 * (5 + nc), 1, 1, 0)
)
self.out_small = nn.Sequential(
nn.Conv2d(small_, 3 * (5 + nc), 1, 1, 0)
)
def forward(self, x):
h_big, h_middle, h_small = self.backbone(x)
neck_small = self.neck_small(h_small)
# ----------------------------up sample 38*38--------------------------------
up_middle = self.up_middle(neck_small)
middle_cat = torch.cat([up_middle, h_middle], dim=1)
out_set_middle = self.out_set_middle(middle_cat)
# ----------------------------up sample 76*76--------------------------------
up_big = self.up_big(out_set_middle) # torch.Size([2, 128, 76, 76])
big_cat = torch.cat([up_big, h_big], dim=1)
out_set_tie_big = self.out_set_tie_big(big_cat)
# ----------------------------PAN 36*36-------------------------------------
neck_tie_middle = torch.cat([self.pan_middle(out_set_tie_big), out_set_middle], dim=1)
up_middle = self.out_set_tie_middle(neck_tie_middle)
# ----------------------------PAN 18*18-------------------------------------
neck_tie_small = torch.cat([self.pan_small(up_middle), neck_small], dim=1)
out_set_small = self.out_set_tie_small(neck_tie_small)
# ----------------------------prediction-------------------------------------
out_small = self.out_small(out_set_small)
out_middle = self.out_middle(up_middle)
out_big = self.out_big(out_set_tie_big)
return out_small, out_middle, out_big -
四種尺寸的配置文件的寫法,放在了config字典中,這是網(wǎng)絡模型的配置參數(shù),沒有將其他的參數(shù)放到配置文件中,可以將類別也放到配置文件中。在上面的網(wǎng)絡代碼中寬度參數(shù)就是變量
e然后傳入到每個網(wǎng)絡中去。config = {
# gd gw
'yolov5s': [0.33, 0.50],
'yolov5m': [0.67, 0.75],
'yolov5l': [1.00, 1.00],
'yolov5x': [1.33, 1.25]
}
# 修改一次文件名字
net_size = config['yolov5x']
net = yolov5(nc=80, gd=net_size[0], gw=net_size[1])
v5原始代碼將v3中的Head部分單獨寫成了一個 Detect類,主要的原因是因為v5中使用了一些訓練的技巧,在Detect中有訓練和兩個部分,v5原始代碼對于初學者來說是比較困難的,首先網(wǎng)絡的寫法,對于編碼的能力要求是相對比較高的。不過這種yaml配置文件來對網(wǎng)絡進行配置的方法在很多公司已經(jīng)開始使用,這可能是未來工程話代碼的一個寫法,還是需要掌握這種寫法的。
三、總結(jié)
-
我個人的感覺是對于這種網(wǎng)絡的設計還有代碼的寫法要有天馬行空的想象力,代碼寫起來也像武俠小說中那種飄逸感。(網(wǎng)絡結(jié)構(gòu)圖,網(wǎng)上有很多,我是仿照這江大白的結(jié)構(gòu)圖,在其結(jié)構(gòu)圖的基礎(chǔ)上并與最新的v5代碼的基礎(chǔ)上進行了調(diào)整)。
-
最新的v5網(wǎng)絡結(jié)構(gòu)中出現(xiàn)了
Transformer結(jié)構(gòu),有種CV領(lǐng)域工程化上要變天的節(jié)奏,大家可以去了解一些。
歡迎關(guān)注GiantPandaCV, 在這里你將看到獨家的深度學習分享,堅持原創(chuàng),每天分享我們學習到的新鮮知識。( ? ?ω?? )?
長按掃描下方二維碼加入交流群,群里博士大佬云集,每日討論話題有目標檢測、語義分割、超分辨率、模型部署、數(shù)學基礎(chǔ)知識、算法面試題分享的等等內(nèi)容,當然也少不了搬磚人的扯犢子。
長按掃描下方二維碼添加小助手。
可以一起討論遇到的問題
聲明:轉(zhuǎn)載請說明出處
掃描下方二維碼關(guān)注【集智書童】公眾號,獲取更多實踐項目源碼和論文解讀,非常期待你我的相遇,讓我們以夢為馬,砥礪前行!
掃碼關(guān)注我們
