Dear all,
I noticed in the architecture of some of the pretrained resnet models, that the first downsample HybridBlock has kernel = (1,1)
and stride = (1,1)
. I was under the impression that all downsampling operations are performed with kernel = 1
, stride = 2
. This is not in all architectures. resnet18,34 (v1 and v2) have stride = 2 in all downsample layers. All other resnet architectures have stride = 1
in the first downsample layer and stride = 2 in subsequent downsample layers. Is this normal?
For example, resnet18_v2:
from mxnet.gluon.model_zoo import vision as models
myresnet = models.resnet18_v1(prefix="resnet18_v1_",classes=20)
myresnet.features
Output
HybridSequential(
(0): Conv2D(3 -> 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(1): BatchNorm(fix_gamma=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None)
(2): Activation(relu)
(3): MaxPool2D(size=(3, 3), stride=(2, 2), padding=(1, 1), ceil_mode=False)
(4): HybridSequential(
(0): BasicBlockV1(
(body): HybridSequential(
(0): Conv2D(64 -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm(fix_gamma=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None)
(2): Activation(relu)
(3): Conv2D(64 -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(4): BatchNorm(fix_gamma=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None)
)
)
(1): BasicBlockV1(
(body): HybridSequential(
(0): Conv2D(64 -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm(fix_gamma=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None)
(2): Activation(relu)
(3): Conv2D(64 -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(4): BatchNorm(fix_gamma=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None)
)
)
)
(5): HybridSequential(
(0): BasicBlockV1(
(body): HybridSequential(
(0): Conv2D(64 -> 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(1): BatchNorm(fix_gamma=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None)
(2): Activation(relu)
(3): Conv2D(128 -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(4): BatchNorm(fix_gamma=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None)
)
(downsample): HybridSequential( # <== HERE IS THE STRIDE S=2
(0): Conv2D(64 -> 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm(fix_gamma=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None)
)
)
# more layers output, subsequent downsample operations stride = 2
while for resnet50_v2
from mxnet.gluon.model_zoo import vision as models
myresnet = models.resnet50_v2(prefix="resnet50_v2_",classes=20)
myresnet.features
Output
HybridSequential(
(0): BatchNorm(fix_gamma=True, eps=1e-05, momentum=0.9, axis=1, in_channels=None)
(1): Conv2D(3 -> 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(2): BatchNorm(fix_gamma=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None)
(3): Activation(relu)
(4): MaxPool2D(size=(3, 3), stride=(2, 2), padding=(1, 1), ceil_mode=False)
(5): HybridSequential(
(0): BottleneckV2(
(bn1): BatchNorm(fix_gamma=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None)
(bn3): BatchNorm(fix_gamma=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None)
(bn2): BatchNorm(fix_gamma=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None)
(conv3): Conv2D(None -> 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(conv2): Conv2D(64 -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(conv1): Conv2D(None -> 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(downsample): # <==== HERE IS STRIDE = 1
Conv2D(64 -> 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
# more layers output, subsequent downsample operations stride = 2