Thanks,
my model define as follow:
ssd_net.py
def gen_sequential( num,block ):
'''
create many HybridSequential containers
'''
layers = []
for _ in range(0,num ):
seq = nn.HybridSequential()
#this is important :-)
block.register_child( seq )
layers.append( seq )
return layers
class MiniSSD( HybridBlock ):
def __init__(self, ctx, classes=20, **kwargs):
super(MiniSSD, self).__init__(**kwargs)
#assume the input image is 3*310*310
self.num_layers = 6
#self.num_class = 21 #VOC object classes + 1
self.num_class = classes+1
with self.name_scope():
#conv2d layers to generate feature map
self.conv_layers = gen_sequential( self.num_layers,self )
self.conv_params = {
#first layer copied from VGG16,didnot use this parameters
"channels" : [-1,1024,512,256,256,256],
"kernel_size" : [-1,3,3,3,3,3],
"strides" : [-1,1,1,1,1,1],
"padding" : [-1,1,2,1,2,1],
"pool_type" : [-1,"maxpool","maxpool","maxpool","maxpool","avgpool"],
"pool_size" : [-1,2,2,2,2,2]
}
#conv2d layers to inference object class
self.cls_layers = gen_sequential( self.num_layers,self )
#conv2d layers to inference object location
self.loc_layers = gen_sequential( self.num_layers,self )
#prior boxes generated from feature map
self.prior_boxes = []
#construct conv layers
for i in range(0,self.num_layers):
if i == 0 :
#load predefined vgg16 model into GPU!(This is important)
base_model = vision.get_model("vgg16",pretrained=True,ctx=ctx)
#and copy 0-20 layers to first conv layer
for layer in base_model.features[0:21]:
#print(layer.name)
self.conv_layers[0].add(layer)
else:
#conv2d + batchnorm + relu + maxpool/avgpool
self.conv_layers[i].add(
nn.Conv2D(
channels=self.conv_params['channels'][i],
kernel_size=self.conv_params['kernel_size'][i],
strides=self.conv_params['strides'][i],
padding=self.conv_params['padding'][i]
)
)
self.conv_layers[i].add( nn.BatchNorm() )
self.conv_layers[i].add( nn.Activation("relu") )
if self.conv_params['pool_type'][i] == "maxpool" :
self.conv_layers[i].add( nn.MaxPool2D( self.conv_params['pool_size'][i] ) )
else:
self.conv_layers[i].add( nn.AvgPool2D( self.conv_params['pool_size'][i] ) )
#prior box parameters
self.boxes_per_cell = [4,6,6,6,6,4]
#construct class predict layers: batchnorm + conv2d
for i in range(0,self.num_layers):
self.cls_layers[i].add( nn.BatchNorm() )
self.cls_layers[i].add( nn.Conv2D(channels=self.num_class*self.boxes_per_cell[i], kernel_size=3, strides=1, padding=1) )
#construct location predict layers: batchnorm + conv2d
#in every layer,the output is :
#4*channels*feature_map_width*feature_map_height == 4*default_boxes_on_map(generated by MultiBoxPrior)
for i in range(0,self.num_layers):
self.loc_layers[i].add( nn.BatchNorm() )
self.loc_layers[i].add( nn.Conv2D(channels=4*self.boxes_per_cell[i], kernel_size=3, strides=1, padding=1) )
#prior parameters for generate anchor boxes when forward ( in train/predict )
self.prior_box_params = {
"sizes" : [ [0.2,0.3],[0.3,0.4],[0.4,0.5],[0.5,0.6],[0.6,0.7],[0.8,0.9] ],
"ratios" : [ [0.5,1,2],[1,2,0.5,3,1/3],[1,2,0.5,3,1/3],[1,2,0.5,3,1/3],[1,2,0.5,3,1/3],[1,2,0.5] ],
"clip" :[ False,False,False,False,False,False ],
"steps" : [ [8/310,8/310],[16/310,16/310],[31/310,31/310],[62/310,62/310],[103/310,103/310],[310/310,310/310] ]
}
def hybrid_forward(self, F, x, *args, **kwargs):
cls_pred_list = []
loc_pred_list = []
default_box_list = []
for i in range(0,self.num_layers):
#get feature map
x = self.conv_layers[i](x)
#print("conv output=",x.shape)
#get class data
cls_pred_list.append( nd.flatten( nd.transpose( self.cls_layers[i](x), axes=(0,2,3,1) ) ) )
#print("cls output=",cls_pred_list[len(cls_pred_list)-1].shape )
#get location data
loc_pred_list.append( nd.flatten( nd.transpose( self.loc_layers[i](x), axes=(0,2,3,1) ) ) )
#print("loc output=",loc_pred_list[len(loc_pred_list)-1].shape )
#call mxnet.ndarray.contrib.MultiBoxPrior to generate prior boxes
#it seems MultiBoxPrior only receive and handle batch_size=1's tensor
#so we need to slice batch and expand_dims to [1,x,x,x] and then concat...
tmp = []
for j in range(0,x.shape[0]):
tmp.append( nd.contrib.MultiBoxPrior(
nd.expand_dims( x[j,:,:,:], axis=0 ),
sizes=self.prior_box_params["sizes"][i],
ratios=self.prior_box_params["ratios"][i],
#clip=self.prior_box_params["clip"][i],
steps=self.prior_box_params["steps"][i],
offsets=[0.5,0.5]
) )
default_box_list.append( nd.concat(*tmp,dim=0) )
#print("shape of default_box:",default_box_list[len(default_box_list)-1].shape )
#concat and ...
'''
cls_pred = nd.concat( *cls_pred_list,dim=1 )
cls_pred = nd.reshape( cls_pred, shape=(0,-1,self.num_class))
cls_pred = nd.transpose( cls_pred, axes=(0,2,1))
'''
cls_pred = nd.transpose(
nd.reshape( nd.concat( *cls_pred_list,dim=1 ), shape=(0,-1,self.num_class)),
axes=(0,2,1)
)
loc_pred = nd.concat( *loc_pred_list,dim=1 )
'''
default_box = nd.concat( *default_box_list, dim=1 )
default_box = nd.reshape( default_box, shape=(0,-1,4))
'''
default_box = nd.reshape( nd.concat( *default_box_list, dim=1 ), shape=(0,-1,4))
return [cls_pred,loc_pred,default_box]
def initialize(self, init=None, ctx=None, verbose=False, force_reinit=False):
# note: ignore pretrained vgg16 network
for param in self.collect_params().values():
#print("init param.....",type(param),param,ctx)
if param._data is not None:
#print("reset ctx")
param.reset_ctx(ctx)
else:
param.initialize(init=init, ctx=ctx, force_reinit=True)
-------------------------------------------------------------------------------------------
ssd_train.py:
ctx = mx.gpu()
#train_lst_file = "./trainimage-bak.lst"
#train_lst_file = "./VOC-train-all.lst"
#train_lst_file = "./VOC-mini.lst"
#train_lst_file = "./trainimage.lst"
train_lst_file = "./VOC-train-1000.lst"
model_prefix = "minissd"
batch_size = 32
train_iter = mx.image.ImageDetIter(
batch_size = batch_size,
data_shape = (3,310,310),
path_imglist = train_lst_file,
shuffle = True)
num_samples = 1000
'''
profiler.set_config(profile_all=True,
filename='chrome_tracing_profile.json',
continuous_dump=True,
aggregate_stats=True)
profiler.set_state('run')
'''
net = MiniSSD( ctx=ctx )
#net.initialize( init=mx.initializer.Xavier(magnitude=2),ctx=ctx )
net.initialize( init=mx.initializer.Normal(),ctx=ctx )
#net.initialize( ctx=ctx )
HuberLoss = mx.gluon.loss.HuberLoss( rho=1 )
SoftmaxCrossEntropyLoss = mx.gluon.loss.SoftmaxCrossEntropyLoss( axis=-1, sparse_label=True, from_logits=False, weight=None, batch_axis=0 )
loss_weight = 1.0
trainer = mx.gluon.Trainer(net.collect_params(),
'sgd', {'learning_rate': 0.001, 'wd': 5e-4})
num_epochs = 1
print_batches = 1
period = 1
#net.hybridize()
for epoch in range(0, num_epochs):
train_iter.reset()
train_loss, n = 0.0, 0.0
tic = time.time()
for i,batch in enumerate( train_iter ):
batch_data = batch.data[0].as_in_context(ctx)
batch_label = batch.label[0].as_in_context(ctx)
with autograd.record():
cls_pred,loc_pred,default_box = net( batch_data )
tmp = nd.contrib.MultiBoxTarget(
default_box[0:1,:,:], #Anchors are shared across batches
batch_label,
cls_pred,
overlap_threshold = 0.5,
ignore_label=-1,
negative_mining_ratio=3,
minimum_negative_samples = 0,
negative_mining_thresh=.5,
variances=(0.1, 0.1, 0.2, 0.2)
)
loc_target = tmp[0]
loc_target_mask = tmp[1]
cls_target = tmp[2]
loc_loss = HuberLoss( loc_target_mask*loc_pred, loc_target_mask*loc_target )
cls_loss = SoftmaxCrossEntropyLoss( nd.transpose(cls_pred,axes=(0,2,1)), cls_target )
loss = loc_loss + cls_loss*loss_weight
loss.backward()
trainer.step(batch_size)
train_loss += sum([l.asscalar() for l in loss])
n += batch_size
#sys.exit()
if (i+1) % print_batches == 0:
print(
"Epoch [%d]. Batch [%d]. Loss [%f]. Time %.1f sec" %
(epoch, n, train_loss/n, time.time() - tic))
# save checkpoint
#profiler.set_state('stop')
'''
if (epoch + 1) % period == 0:
net.save_parameters("./param/" + model_prefix + "-{}.params".format(epoch + 1))
print("Saved checkpoint to {}-{}.params".format(model_prefix, epoch + 1))
'''
net.save_parameters("./param/" + model_prefix + "-last.params")
and my 1080Ti has 11G memory.
Thanks!