for batch in data_iter:
model.forward(batch)
out1 = model.get_outputs()[0].asnumpy()
print(‘saving’)
arg, aux = model.get_params()
all_layers = model.symbol.get_internals()
_sym = all_layers[‘fc1_output’]
_arg = {}
for k in arg:
_arg[k] = arg[k]
mx.model.save_checkpoint(prefix, 1, _sym, _arg, aux)
sym, arg_params, aux_params = mx.model.load_checkpoint(os.path.join(args.pretrained, “model”), args.pretrained_epoch)
model.set_params(arg_params, aux_params)
model.forward(batch)
out2 = model.get_outputs()[0].asnumpy()
print(f"diff_output:{np.abs(out1-out2)}")
Below is the working code:-
for batch in data_iter:
model.forward(batch)
out1 = model.get_outputs()[0]
break
model.save_checkpoint('my-model', epoch = 1)
sym, arg_params, aux_params = mx.model.load_checkpoint('my-model', epoch = 1)
model.set_params(arg_params, aux_params)
model.forward(batch)
out2 = model.get_outputs()[0]
print(f'diff_output:{nd.abs(out1 - out2)}')
Prints:
diff_output:
[[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
...
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]]
<NDArray 128x10 @gpu(0)>
For more info regarding saving and loading model parameters, visit here.
And welcome to the community.
HI @mouryarishik ! Which version of mxnet do you use. I made the same as you wrote and got:
[[0.85546875 0.67749023 0.5878906 … 0.06152344 0.03369141 1.9970703 ]
[0.39019775 0.7111206 0.478302 … 0.13378906 0.2421875 0.20977402]
[0.15838623 1.2324219 1.3285522 … 0.4419098 1.2724609 0.41748047]
…
[0.73339844 0.22021484 0.58673096 … 0.28814697 0.38052177 0.15722656]
[1.0070801 0.08300781 0.8363037 … 1.9019775 1.3452148 0.3955078 ]
[0.24072266 0.23144531 0.34838867 … 0.23730469 0.8791504 0.39160156]]
<NDArray 600x512 @gpu(0)>
1.4.1 it is.
Show me your full code if possible.
@mouryarishik This is not my code it is from [Insightface_repo], but I am testing and editing it to my ideas.
(https://github.com/deepinsight/insightface/blob/master/recognition/train.py)
Below is link to code where I am testing outputs, lines 315-331. Code is a bit messy, the main part in those lines.
LINK to py file
The main part is here in batch_callback:
def _batch_callback(param):
avr_loss.append(param.eval_metric.get_name_value()[1][1])
avr_acc.append(param.eval_metric.get_name_value()[0][1])
if len(avr_loss) == 20:
avloss = np.mean(avr_loss)
avacc = np.mean(avr_acc)
sw.add_scalar(
tag='loss',
value=avloss,
global_step=global_step[0]
)
sw.add_scalar(
tag='accuracy',
value=avacc,
global_step=global_step[0]
)
avr_loss.clear()
avr_acc.clear()
#global global_step
global_step[0] += 1
mbatch = global_step[0]
for step in lr_steps:
if mbatch == step:
opt.lr *= 0.1
print('lr change to', opt.lr)
break
_cb(param)
if mbatch % 10 == 0:
print('Lets check with save and without')
data = np.random.rand(1, 3, 112, 112)
data_iter = train_dataiter2
for batch in data_iter:
model.forward(batch)
out1 = model.get_outputs()[0]
break
model.save_checkpoint('my-model', epoch=1)
sym, arg_params, aux_params = mx.model.load_checkpoint("my-model", epoch=1)
model.set_params(arg_params, aux_params)
model.forward(batch)
out2 = model.get_outputs()[0]
print(f'diff_output:{nd.abs(out1 - out2)}')
@mouryarishik, I have pretrained model but when I send all to modelfit it shows 0 on training accuracy. All simple tests on saving and loading worked well outside model.fit and batch_callback. Inside batch_callback it doesnt work.
Show me your training code.
Give your email please, I will send. I cant find convenient way to insert it here.