I have a piece of code that fails with the following error at the trainer.step line -
UserWarning: Gradient of Parameter `resnetv20_conv0_weight` on context gpu(0) has not been updated by backward since last `step`. This could mean a bug in your model that made it only use a subset of the Parameters (Blocks) for this iteration. If you are intentionally only using a subset, call step with ignore_stale_grad=True to suppress this warning and skip updating of Parameters with stale gradient
I cannot understand why.
What’s happening in this code is that I define a new autograd.function with a custom backward pass and apply it on the model parameters. My goal is to be able to re-weight of the gradients based on certain input matrix, which has the same dimension as the gradient -
class weight_layer(mx.autograd.Function):
def forward(self, x, weights):
self.save_for_backward(weights)
y = 1.0 * x
return y
def backward(self, dy):
weights, = self.saved_tensors
return dy * weights, mx.nd.zeros(shape=weights.shape, ctx=mx.gpu(0))
weight_ly = weight_layer()
with ag.record():
output = net_forward(net, data)
output = [weight_ly(x, mx.nd.ones_like(x, ctx=ctx)) for x in output]
[x.attach_grad() for x in output]
loss = [loss_fns[0](yhat, y) for yhat, y in zip(output, label)]
for l_idx in range(len(loss)):
ag.backward(loss[l_idx]) # Ignore the confidence loss here
trainer.step(batch_size)
Update -
I switched to using a Custom layer and I get the same error. I believe it has something to do with the weight layer I introduce but I can’t figure out what it is.
class GradientWeight(mx.operator.CustomOp):
def forward(self, is_train, req, in_data, out_data, aux):
self.assign(out_data[0], req[0], in_data[0])
def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
weight = in_data[1]
y = in_grad[0] * weight
self.assign(in_grad[0], req[0], y)
@mx.operator.register("gradient_weight")
class GradientWeightProp(mx.operator.CustomOpProp):
def __init__(self):
super(GradientWeightProp, self).__init__(need_top_grad=False)
def list_arguments(self):
return ['data', 'weight']
def list_outputs(self):
return ['output']
def infer_shape(self, in_shape):
data_shape = in_shape[0]
weight_shape = in_shape[0]
output_shape = in_shape[0]
return [data_shape, weight_shape], [output_shape], []
def create_operator(self, ctx, shapes, dtypes):
return GradientWeight()
with ag.record():
output = net_forward(net, data)
output = [mx.nd.Custom(x, mx.nd.ones_like(x, ctx=ctx), op_type="gradient_weight") for x in output]
[x.attach_grad() for x in output]
loss = [loss_fns[0](yhat, y) for yhat, y in zip(output, label)]
All help is appreciated. Thanks