PyTorch 搭建 ResNet 模型对 CIFAR10 数据集分类总结

ResNet 模型搭建

结构分析

通用框架

根据 Deep Residual Learning for Image Recognition 论文中的信息,可以得到常规 ResNet 模型的通用框架如下

layer_nameout_size(18/34 layers) out_channel(50/101/152 layers) out_channelkernel_sizestridepadding
Input224*22433NoneNoneNone
Conv1112*1126464723
Maxpool56*566464321
Conv2_x56*566464*4=256---
Conv3_x28*28128128*4=512---
Conv4_x14*14256256*4=1024---
Conv5_x7*7512512*4=2048---
Avgpool1*15122048NoneNoneNone
Flatten204811NoneNoneNone
FC100011NoneNoneNone

其中 Conv2_x 、 Conv3_x 、Conv4_x 、 Conv5_x 层可由 BasicBlock 和 Bottleneck 两种基本模型组合而成

layer_nameResNet18ResNet34ResNet50ResNet101ResNet152
Conv2_xBasicBlock*2BasicBlock*3Bottleneck*3Bottleneck*3Bottleneck*3
Conv3_xBasicBlock*2BasicBlock*4Bottleneck*4Bottleneck*4Bottleneck*8
Conv4_xBasicBlock*2BasicBlock*6Bottleneck*6Bottleneck*23Bottleneck*36
Conv5_xBasicBlock*2BasicBlock*3Bottleneck*3Bottleneck*3Bottleneck*3

基础结构

ResNet 18/34 使用的 BasicBlock 结构如下

layer_namein_sizeout_sizeout_channelkernel_sizestridepadding
Conv1x*x(x/stride)*(x/stride)out_channel3stride1
Conv2x'*x'x'*x'out_channel311
identity

ResNet 50/101/152 使用的 Bottleneck 结构如下

layer_namein_sizeout_sizeout_channelkernel_sizestridepadding
Conv1x*x(x/stride)*(x/stride)out_channel110
Conv2x'*x'x'*x'out_channel3stride1
Conv3x'*x'x'*x'out_channel110
identity

stride 和 identity

当基础结构是 Conv3_x 、Conv4_x 、 Conv5_x 的第一层时, stride=2 且 identity 为下采样后的输入

1
2
3
4
nn.Sequential(
nn.Conv2d(self.in_channel, out_channel * block.expansion, kernel_size=1, stride=stride),
nn.BatchNorm2d(out_channel * block.expansion),
)

当基础结构是 Conv3_x 、Conv4_x 、 Conv5_x 的其他层或在 Conv2_x 层时, stride=1 且 identity 为输入本身

1
nn.Sequential()

网络实现

BasicBlock

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
class BasicBlock(nn.Module):
expansion: int = 1

def __init__(
self,
in_channel: int,
out_channel: int,
stride: int = 1,
downsample: Optional[nn.Module] = None,
) -> None:
super().__init__()
# transforming (batch_size * x * x * input_channel) to (batch_size * x * x * output_channel)
# or (batch_size * x/2 * x/2 * output_channel)
# floor(((x - 3 + 2 * 1) / stride) + 1) => floor(x) stride = 1
# => floor(x/2) stride = 2
self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channel)
# transforming (batch_size * x' * x' * output_channel) to (batch_size * x' * x' * output_channel)
# floor(((x' - 3 + 2 * 1) / 1) + 1) => floor(x')
self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channel)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride

def forward(self, x: Tensor) -> Tensor:

if self.downsample is not None:
identity = self.downsample(x)
else:
identity = x

out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)

out = self.conv2(out)
out = self.bn2(out)

out += identity
out = self.relu(out)

return out

Bottleneck

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
class Bottleneck(nn.Module):
expansion: int = 4

def __init__(
self,
in_channel: int,
out_channel: int,
stride: int = 1,
downsample: Optional[nn.Module] = None,
) -> None:
super().__init__()

# transforming (batch_size * x * x * output_channel) to (batch_size * x * x * output_channel)
# floor(((x - 3 + 2 * 1) / 1) + 1) => floor(x)
self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channel)
# transforming (batch_size * x * x * output_channel) to (batch_size * x * x * output_channel)
# or (batch_size * x/2 * x/2 * output_channel)
# floor(((x - 3 + 2 * 1) / stride) + 1) => floor(x) stride = 1
# => floor(x/2) stride = 2
self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channel)
# transforming (batch_size * x' * x' * output_channel) to (batch_size * x' * x' * (output_channel* expansion))
# floor(((x' - 3 + 2 * 1) / 1) + 1) => floor(x')
self.conv3 = nn.Conv2d(out_channel, out_channel * self.expansion, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(out_channel * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride

def forward(self, x: Tensor) -> Tensor:

if self.downsample is not None:
identity = self.downsample(x)
else:
identity = x

out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)

out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)

out = self.conv3(out)
out = self.bn3(out)

out += identity
out = self.relu(out)

return out

通用框架

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
class ResNet(nn.Module):
def __init__(self, block: Type[Union[BasicBlock, Bottleneck]], num_block: List[int], num_classes: int = 1000) -> None:
super().__init__()
self.in_channel = 64
# transforming (batch_size * 224 * 224 * input_channel) to (batch_size * 112 * 112 * 64)
# floor(((224 - 7 + 2 * 3) / 2) + 1) => floor(112.5) => floor(112)
self.conv1 = nn.Sequential(
nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2, padding=3, bias=False),
nn.BatchNorm2d(self.in_channel),
nn.ReLU(inplace=True),
)
# transforming (batch_size * 112 * 112 * 64) to (batch_size * 56 * 56 * 64)
# floor(((112 - 3 + 2 * 1) / 2) + 1) => floor(56.5) => floor(56)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
# transforming (batch_size * 56 * 56 * 64) to (batch_size * 56 * 56 * (64 * block.expansion))
self.conv2_x = self._make_layer(block, 64, num_block[0], stride=1)
# transforming (batch_size * 56 * 56 * (64 * block.expansion)) to (batch_size * 28 * 28 * (128 * block.expansion))
self.conv3_x = self._make_layer(block, 128, num_block[1], stride=2)
# transforming (batch_size * 28 * 28 * (128 * block.expansion)) to (batch_size * 14 * 14 * (256 * block.expansion))
self.conv4_x = self._make_layer(block, 256, num_block[2], stride=2)
# transforming (batch_size * 14 * 14 * (256 * block.expansion)) to (batch_size * 7 * 7 * (512 * block.expansion))
self.conv5_x = self._make_layer(block, 512, num_block[3], stride=2)
# transforming (batch_size * 7 * 7 * (512 * block.expansion)) to (batch_size * 1 * 1 * (512 * block.expansion))
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
# transforming (batch_size * 2048) to (batch_size * num_classes)
self.fc = nn.Linear(512 * block.expansion, num_classes)

def _make_layer(
self, block: Type[Union[BasicBlock, Bottleneck]], out_channel: int, num_blocks: int, stride: int = 1
) -> nn.Sequential:
downsample = None
if stride != 1:
downsample = nn.Sequential(
nn.Conv2d(self.in_channel, out_channel * block.expansion, kernel_size=1, stride=stride),
nn.BatchNorm2d(out_channel * block.expansion),
)
layers = []
layers.append(block(self.in_channel, out_channel, stride, downsample))
self.in_channel = out_channel * block.expansion
for _ in range(1, num_blocks):
layers.append(block(self.in_channel, out_channel))
return nn.Sequential(*layers)

def forward(self, x: Tensor) -> Tensor:
x = self.conv1(x)
x = self.maxpool(x)
x = self.conv2_x(x)
x = self.conv3_x(x)
x = self.conv4_x(x)
x = self.conv5_x(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)

return x

构造网络

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
def ResNet18() -> ResNet:
return ResNet(BasicBlock, [2, 2, 2, 2])


def ResNet34() -> ResNet:
return ResNet(BasicBlock, [3, 4, 6, 3])


def ResNet50() -> ResNet:
return ResNet(Bottleneck, [3, 4, 6, 3])


def ResNet101() -> ResNet:
return ResNet(Bottleneck, [3, 4, 23, 3])


def ResNet152() -> ResNet:
return ResNet(Bottleneck, [3, 8, 36, 3])

参考网页

PyTorch - SOURCE CODE FOR TORCHVISION.MODELS.RESNET

明素 - ResNet详解

回顾

nn.Conv2d() 函数 bias 参数的设置

nn.Conv2d() 后接 nn.BatchNorm2d() 时,可以把 bias 参数设置为 False

因为在 BN 层中,输入是否存在偏置不影响输出结果

不添加偏置还可以减少显卡内存的占用

参考网页

7s记忆的鱼 - 【pytorch】Conv2d()里面的参数bias什么时候加,什么时候不加?

nn.AdaptiveAvgPool2d 函数

参考网页

*参数 的作用

*参数 可以解压参数

1
2
3
4
a = (0,1,2,3,4,5,6,7,8,9)
b = [0,1,2,3,4,5,6,7,8,9]
print(*a)
print(*b)

将 List 和 Tuple 中的元素逐一解压出来

1
2
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
参考网页

TEDxPY - Python *args 用法笔记

pip install 默认安装在 base 环境

使用 pip install 时改用如下指令即可安装到当前虚拟环境中

1
python -m pip install **
参考网页

timertimer - 在conda虚拟环境中用pip安装包总是在base环境中的解决办法

CIFAR10 特化模型

layer_nameout_sizeout_channelkernel_sizestridepadding
Input32*323NoneNoneNone
Conv132*32161131
Conv2_x32*321664*4=256---
Conv3_x16*1632128*4=512---
Conv4_x8*864256*4=1024---
Avgpool1*164NoneNoneNone
Flatten641NoneNoneNone
FC101NoneNoneNone

其中 Conv2_x 、 Conv3_x 、Conv4_x 层由 Block 组成

layer_nameResNet_CIFAR10
Conv2_xBlock*n
Conv3_xBlock*n
Conv4_xBlock*n

Block 结构如下

layer_namein_sizeout_sizeout_channelkernel_sizestridepadding
Conv1x*x(x/stride)*(x/stride)out_channel3stride1
Conv2x'*x'x'*x'out_channel311
identity

当基础结构是 Conv3_x 、Conv4_x 、 Conv5_x 的第一层时, stride=2 且 identity 为下采样后的输入

1
2
3
4
nn.Sequential(
nn.Conv2d(self.in_channel, out_channel * block.expansion, kernel_size=1, stride=stride),
nn.BatchNorm2d(out_channel * block.expansion),
)

当基础结构是 Conv3_x 、Conv4_x 、 Conv5_x 的其他层或在 Conv2_x 层时, stride=1 且 identity 为输入本身

1
nn.Sequential()

模型实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
class Block(nn.Module):
expansion: int = 1

def __init__(
self,
in_channel: int,
out_channel: int,
stride: int = 1,
downsample: Optional[nn.Module] = None,
) -> None:
super().__init__()
# transforming (batch_size * x * x * input_channel) to (batch_size * x * x * output_channel)
# or (batch_size * x/2 * x/2 * output_channel)
# floor(((x - 3 + 2 * 1) / stride) + 1) => floor(x) stride = 1
# => floor(x/2) stride = 2
self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channel)
# transforming (batch_size * x' * x' * output_channel) to (batch_size * x' * x' * output_channel)
# floor(((x' - 3 + 2 * 1) / 1) + 1) => floor(x')
self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channel)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride

def forward(self, x: Tensor) -> Tensor:
if self.downsample is not None:
identity = self.downsample(x)
else:
identity = x

out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)

out = self.conv2(out)
out = self.bn2(out)

out += identity
out = self.relu(out)

return out
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
class ResNet(nn.Module):
def __init__(self, block: Block, num_block: List[int], num_classes: int = 10) -> None:
super().__init__()
self.in_channel = 16
# transforming (batch_size * 32 * 32 * input_channel) to (batch_size * 32 * 32 * 16)
# floor(((32 - 3 + 2 * 1) / 1) + 1) => floor(112.5) => floor(112)
self.conv1 = nn.Sequential(
nn.Conv2d(3, self.in_channel, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(self.in_channel),
nn.ReLU(inplace=True),
)
# transforming (batch_size * 32 * 32 * 16) to (batch_size * 32 * 32 * 16)
self.conv2_x = self._make_layer(block, 16, num_block, stride=1)
# transforming (batch_size * 32 * 32 * 16) to (batch_size * 16 * 16 * 32)
self.conv3_x = self._make_layer(block, 32, num_block, stride=2)
# transforming (batch_size * 16 * 16 * 16) to (batch_size * 8 * 8 * 64)
self.conv4_x = self._make_layer(block, 64, num_block, stride=2)
# transforming (batch_size * 8 * 8 * 64) to (batch_size * 1 * 1 * 64)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
# transforming (batch_size * 64) to (batch_size * num_classes)
self.fc = nn.Linear(64 * block.expansion, num_classes)

for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)

def _make_layer(self, block: Block, out_channel: int, num_blocks: int, stride: int = 1) -> nn.Sequential:
downsample = None
if stride != 1:
downsample = nn.Sequential(
nn.Conv2d(self.in_channel, out_channel * block.expansion, kernel_size=1, stride=stride),
nn.BatchNorm2d(out_channel * block.expansion),
)
layers = []
layers.append(block(self.in_channel, out_channel, stride, downsample))
self.in_channel = out_channel * block.expansion
for _ in range(1, num_blocks):
layers.append(block(self.in_channel, out_channel))
return nn.Sequential(*layers)

def forward(self, x: Tensor) -> Tensor:
x = self.conv1(x)
x = self.conv2_x(x)
x = self.conv3_x(x)
x = self.conv4_x(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)

return x
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
def ResNet20() -> ResNet:
return ResNet(Block, 3)


def ResNet32() -> ResNet:
return ResNet(Block, 5)


def ResNet44() -> ResNet:
return ResNet(Block, 7)


def ResNet56() -> ResNet:
return ResNet(Block, 9)


def ResNet110() -> ResNet:
return ResNet(Block, 18)


def ResNet1202() -> ResNet:
return ResNet(Block, 200)

模型训练

模型训练内容与 AlexNet_CIFAR10 项目相似,相同之处不再赘述

封装自定义 Python 库

此次实验中将 AlexNet_CIFAR10 项目中计算数据集均值和方差封装在 utils 文件夹下

需要在 utils 文件夹下生成空的 __init__.py 文件,声明 utils 文件夹为封装好的 Python 库