Save and load params gluon

I am trying to save and load the architecture and params file, but facing this below error. Please, can anyone help me solve this? I train on linux, inference on Linux

Error:

Traceback (most recent call last):
  File "CNN.py", line 137, in <module>
    deserialized_net.load_parameters('modelConvLSTM-0010.params', ctx=ctx)
  File "/home/test/anaconda3/lib/python3.7/site-packages/mxnet/gluon/block.py", line 386, in load_parameters
    filename, ctx, allow_missing, ignore_extra, self.prefix)
  File "/home/test/anaconda3/lib/python3.7/site-packages/mxnet/gluon/parameter.py", line 911, in load
    name[lprefix:], filename, _brief_print_list(arg_dict.keys()))
**AssertionError: Parameter 'data0' is missing in file 'modelConvLSTM-0010.params', which contains parameters: 'net0_conv_lstm0_i2h_weight', 'net0_conv_lstm0_h2h_weight', 'net0_conv_lstm0_i2h_bias', ..., 'net0_dense0_weight', 'net0_dense0_bias', 'net0_dense1_weight', 'net0_dense1_bias'. Please make sure source and target networks have the same prefix.**

Code:

class Net(gluon.HybridBlock):

    def __init__(self, **kwargs):
        super(Net, self).__init__(**kwargs)
        with self.name_scope():

            self.encoder = gluon.rnn.HybridSequentialRNNCell()
            self.encoder.add(mx.gluon.contrib.rnn.Conv3DLSTMCell(input_shape=(1,q, rows, columns),hidden_channels=64, activation='relu',i2h_kernel=(5,5,5),i2h_pad=(2,2,2), h2h_kernel=(5,5,5),conv_layout='NCDHW'))
            self.encoder.add(mx.gluon.contrib.rnn.Conv3DLSTMCell(input_shape=(64,q, rows, columns),hidden_channels=32, activation='relu',i2h_kernel=(3,3,3),i2h_pad=(1,1,1), h2h_kernel=(3,3,3),conv_layout='NCDHW'))

            self.middle = gluon.nn.HybridSequential()
            self.middle.add(mx.gluon.nn.BatchNorm())
            self.middle.add(mx.gluon.nn.Dropout(dropout))

            self.decoder =  gluon.nn.HybridSequential()
            self.decoder.add(mx.gluon.nn.Dense(10,flatten=True))
            self.decoder.add(mx.gluon.nn.Dense(1))
            
    def hybrid_forward(self, F, x,states=None):

        x,states= self.encoder(x, states)        
        x= self.middle(x)
        x = self.decoder(x)         
        return x,states
def evaluate_accuracy(model, dataloader):

    eval_metrics_1 = mx.metric.MAE()
    eval_metrics_2 = mx.metric.MSE()
    eval_metrics_3 = mx.metric.RMSE()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [eval_metrics_1, eval_metrics_2, eval_metrics_3]:
        eval_metrics.add(child_metric)

    for i, (data, label) in enumerate(dataloader):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        states =model.encoder.begin_state( batch_size=data.shape[0],ctx=ctx)
        preds,states=model(data,states)

        eval_metrics.update(labels=label, preds=preds)
        data.as_in_context(mx.cpu(0))
        label.as_in_context(mx.cpu(0))
    return eval_metrics.get()

def fit(model):

    train_loss = []
    val_loss = []

    for e in range(epochs):
        tick = time.time()
        for i, (data, label) in enumerate(train_iter):
            data = data.as_in_context(ctx)
            label = label.as_in_context(ctx)
            
            states =model.encoder.begin_state( batch_size=data.shape[0],ctx=ctx)

            with autograd.record():
                Y_pred,states= model(data,states)
                loss =loss1(Y_pred, label) 
            loss.backward()                                        
            trainer.step(batch_size)

            if i == 0:
                moving_loss = nd.mean(loss).asscalar()
            else:
                moving_loss = .99 * moving_loss + .01 * nd.mean(loss).asscalar()
                
        train_loss.append(moving_loss)

        val_err = evaluate_accuracy(model,val_iter)
        val_loss.append(val_err[1])
        print("Epoch %s. Loss: %.10f Test MAE: %s time:%.2f s" % (e, moving_loss, val_err, time.time()-tick))
    return train_loss, val_loss, model

ctx = mx.gpu()
net=Net()
net.collect_params().initialize(mx.init.Xavier(), ctx=ctx)
loss1 = mx.gluon.loss.L1Loss()
trainer = mx.gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': lr})
print(" Loss before training:  ", evaluate_accuracy(net, test_iter))
t_loss,v_loss, net=fit(net)
net.hybridize()
test=evaluate_accuracy(net, test_iter)
print("Test : ", test)

print net params give:

net0_ (
  Parameter net0_conv_lstm0_i2h_weight (shape=(256, 1, 5, 5, 5), dtype=<class 'numpy.float32'>)
  Parameter net0_conv_lstm0_h2h_weight (shape=(256, 64, 5, 5, 5), dtype=<class 'numpy.float32'>)
  Parameter net0_conv_lstm0_i2h_bias (shape=(256,), dtype=<class 'numpy.float32'>)
  Parameter net0_conv_lstm0_h2h_bias (shape=(256,), dtype=<class 'numpy.float32'>)
  Parameter net0_conv_lstm1_i2h_weight (shape=(128, 64, 3, 3, 3), dtype=<class 'numpy.float32'>)
  Parameter net0_conv_lstm1_h2h_weight (shape=(128, 32, 3, 3, 3), dtype=<class 'numpy.float32'>)
  Parameter net0_conv_lstm1_i2h_bias (shape=(128,), dtype=<class 'numpy.float32'>)
  Parameter net0_conv_lstm1_h2h_bias (shape=(128,), dtype=<class 'numpy.float32'>)
  Parameter net0_batchnorm0_gamma (shape=(32,), dtype=<class 'numpy.float32'>)
  Parameter net0_batchnorm0_beta (shape=(32,), dtype=<class 'numpy.float32'>)
  Parameter net0_batchnorm0_running_mean (shape=(32,), dtype=<class 'numpy.float32'>)
  Parameter net0_batchnorm0_running_var (shape=(32,), dtype=<class 'numpy.float32'>)
  Parameter net0_dense0_weight (shape=(10, 812544), dtype=float32)
  Parameter net0_dense0_bias (shape=(10,), dtype=float32)
  Parameter net0_dense1_weight (shape=(1, 10), dtype=float32)
  Parameter net0_dense1_bias (shape=(1,), dtype=float32)
)

to load the JSON and params file I use:

sym = mx.sym.load_json(open('modelConvLSTM-symbol.json', 'r').read())
deserialized_net = gluon.nn.SymbolBlock(outputs=sym, inputs=mx.sym.var('data'))
deserialized_net.load_parameters('modelConvLSTM-0010.params', ctx=ctx)
test=evaluate_accuracy(deserialized_net, test_iter)
print("Test : ", test)

Please help me to find out where am I going wrong here?
Thank you.

.. note:: When there are only one input, it will have name `data`. When there
          Are more than one inputs, they will be named as `data0`, `data1`, etc.

I know this is the reason why I’m not able to load… but i want to know the solution for it.

1 Like

deserialized_net.load_parameters(‘ConvLSTM.params’,allow_missing=True, ctx=ctx, ignore_extra=True)

This solves the problem but generates another error:

Parameter ‘data0’ has not been initialized. Note that you should initialize parameters and create Trainer with Block.collect_params() instead of Block.params because the later does not include Parameters of nested child Blocks

When I print the loaded parameters:

(
Parameter data0 (shape=None, dtype=<class ‘numpy.float32’>)
Parameter net0_conv_lstm0_i2h_weight (shape=None, dtype=<class ‘numpy.float32’>)
Parameter net0_conv_lstm0_i2h_bias (shape=None, dtype=<class ‘numpy.float32’>)
Parameter data1 (shape=None, dtype=<class ‘numpy.float32’>)
Parameter net0_conv_lstm0_h2h_weight (shape=None, dtype=<class ‘numpy.float32’>)
Parameter net0_conv_lstm0_h2h_bias (shape=None, dtype=<class ‘numpy.float32’>)
Parameter data2 (shape=None, dtype=<class ‘numpy.float32’>)
Parameter net0_conv_lstm1_i2h_weight (shape=None, dtype=<class ‘numpy.float32’>)
Parameter net0_conv_lstm1_i2h_bias (shape=None, dtype=<class ‘numpy.float32’>)
Parameter data3 (shape=None, dtype=<class ‘numpy.float32’>)
Parameter net0_conv_lstm1_h2h_weight (shape=None, dtype=<class ‘numpy.float32’>)
Parameter net0_conv_lstm1_h2h_bias (shape=None, dtype=<class ‘numpy.float32’>)
Parameter data4 (shape=None, dtype=<class ‘numpy.float32’>)
Parameter net0_batchnorm0_gamma (shape=None, dtype=<class ‘numpy.float32’>)
Parameter net0_batchnorm0_beta (shape=None, dtype=<class ‘numpy.float32’>)
Parameter net0_dense0_weight (shape=None, dtype=<class ‘numpy.float32’>)
Parameter net0_dense0_bias (shape=None, dtype=<class ‘numpy.float32’>)
Parameter net0_dense1_weight (shape=None, dtype=<class ‘numpy.float32’>)
Parameter net0_dense1_bias (shape=None, dtype=<class ‘numpy.float32’>)
Parameter net0_batchnorm0_running_mean (shape=None, dtype=<class ‘numpy.float32’>)
Parameter net0_batchnorm0_running_var (shape=None, dtype=<class ‘numpy.float32’>)
)

Please, please, please help.… I know I have to initialize states but I just wonder how?? do I have to separately initialize data0,data1,data2,data3???

Hi @komal,

I couldn’t get your code running unfortunately, too many undefined terms etc. Would be useful to provide a minimally reproducible example of this. I created a little test with an LSTM network and don’t have the same issue:

import mxnet as mx

net = mx.gluon.rnn.LSTM(hidden_size=5)
sample = mx.nd.random.uniform(shape=(2,3,4))
net.initialize()
net(sample)
net.export('test', 0)
deserialized_net = mx.gluon.nn.SymbolBlock.imports("test-symbol.json", ['data'], "test-0000.params")
deserialized_net(sample)

So it could be down to the different usage of SymbolBlock. Check to see if my usage helps your case. Good luck! Cheers, Thom

One quick hint on post formatting, try using ``` above and below your code blocks. You’ll find it renders much more clearly after you submit. I added this to your top post.

here’s the code: you can try to execute this:

please help,
thank you for the suggestion :sweat_smile::relaxed::relaxed::innocent:

import pandas as pd
import mxnet as mx
from mxnet import gluon, autograd, nd
from mxnet.gluon import nn, rnn
from mxnet.gluon.data import ArrayDataset, DataLoader

rows=14
columns=14
batch_size=16
dropout=0.2
lr=0.001
n_out=1
epochs=1
q=3
horizon=36
sample = mx.nd.random.uniform(shape=(2,1,q,rows,columns))

class Net(gluon.HybridBlock):

    def __init__(self, **kwargs):
        super(Net, self).__init__(**kwargs)
        with self.name_scope():

            self.encoder = gluon.rnn.HybridSequentialRNNCell()
            self.encoder.add(mx.gluon.contrib.rnn.Conv3DLSTMCell(input_shape=(1,q, rows, columns),hidden_channels=64, activation='relu',i2h_kernel=(5,5,5),i2h_pad=(2,2,2), h2h_kernel=(5,5,5),conv_layout='NCDHW'))
            self.encoder.add(mx.gluon.contrib.rnn.Conv3DLSTMCell(input_shape=(64,q, rows, columns),hidden_channels=32, activation='relu',i2h_kernel=(3,3,3),i2h_pad=(1,1,1), h2h_kernel=(3,3,3),conv_layout='NCDHW'))

            self.middle = gluon.nn.HybridSequential()
            self.middle.add(mx.gluon.nn.BatchNorm())
            self.middle.add(mx.gluon.nn.Dropout(dropout))

            self.decoder =  gluon.nn.HybridSequential()
            self.decoder.add(mx.gluon.nn.Dense(10,flatten=True))
            self.decoder.add(mx.gluon.nn.Dense(1))
            
    def hybrid_forward(self, F, x,states=None):
        x,states= self.encoder(x, states)       
        x= self.middle(x)   
        x = self.decoder(x)          
        return x,states

ctx = mx.cpu()
net=Net()

net.collect_params().initialize(mx.init.Xavier(), ctx=ctx)
loss1 = mx.gluon.loss.L1Loss()
trainer = mx.gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': lr})
states =net.encoder.begin_state( batch_size=sample.shape[0],ctx=ctx)
pred, states= net(sample,states)
net.hybridize()

pred, states= net(sample,states)
net.export('ConvLSTM', epoch=epochs)

#deserialized_net = mx.gluon.nn.SymbolBlock.imports("ConvLSTM-symbol.json", ['data'], "ConvLSTM-0001.params")

sym = mx.sym.load_json(open("ConvLSTM-symbol.json", 'r').read())
deserialized_net = gluon.nn.SymbolBlock(outputs=sym, inputs=mx.sym.var('data'))
deserialized_net.load_parameters('ConvLSTM-0001.params',allow_missing=True, ctx=ctx, ignore_extra=True)
sample2 = mx.nd.random.uniform(shape=(2,1,q,rows,columns))
deserialized_net(sample2)
1 Like

Hi @komal,

Great, thanks for the reproducible example! So state adds complexity here, and these are the additional data nodes in your graph. I used Netron to visualise the graph, and attached a part of the graph below that’s showing the first Conv3DLSTMCell cell.

You can see there’s 3 data nodes in the first Conv3DLSTMCell, data0 is your actual data, data1 and data2 are states that are typical in LSTM models. You’ll need to include all of these as input nodes.

block = mx.gluon.nn.SymbolBlock.imports("ConvLSTM-symbol.json",
                                        ['data' + str(i) for i in range(5)],
                                        "ConvLSTM-0001.params")

And then you’ll have to pass these states in when you want to run the model.

Are you sure you need SymbolBlock here? You can simplify all of this by just reinitializing the network (i.e. Net), and calling load_parameters. You can then use begin_state and call your network as you do during training.

2 Likes

Thank you. Yes, I can reinitialize the network by using load parameters but I’m trying to achieve inference on Linux machine(current windows) that is why need to use JSON and params file…

Also, is it possible to use ConvLSTM without states? States are causing a huge problem for me to achieve inference.

I changed the last part of above code something like this:

sym = mx.sym.load_json(open(“ConvLSTM-symbol.json”, ‘r’).read())
deserialized_net = gluon.nn.SymbolBlock(outputs=sym, inputs=mx.sym.var(‘data’))
deserialized_net.load_parameters(‘ConvLSTM-0001.params’,allow_missing=True, ctx=ctx, ignore_extra=True)
print(deserialized_net.collect_params())
sample2 = mx.nd.random.uniform(shape=(2,1,q,rows,columns))
states2 =deserialized_net.encoder.begin_state( batch_size=sample2.shape[0],ctx=ctx)
deserialized_net(sample2,states2)

but still doesn’t help. I get a different error:

  Parameter data0 (shape=None, dtype=<class 'numpy.float32'>)
  Parameter net6_conv_lstm0_i2h_weight (shape=None, dtype=<class 'numpy.float32'>)
  Parameter net6_conv_lstm0_i2h_bias (shape=None, dtype=<class 'numpy.float32'>)
  Parameter data1 (shape=None, dtype=<class 'numpy.float32'>)
  Parameter net6_conv_lstm0_h2h_weight (shape=None, dtype=<class 'numpy.float32'>)
  Parameter net6_conv_lstm0_h2h_bias (shape=None, dtype=<class 'numpy.float32'>)
  Parameter data2 (shape=None, dtype=<class 'numpy.float32'>)
  Parameter net6_conv_lstm1_i2h_weight (shape=None, dtype=<class 'numpy.float32'>)
  Parameter net6_conv_lstm1_i2h_bias (shape=None, dtype=<class 'numpy.float32'>)
  Parameter data3 (shape=None, dtype=<class 'numpy.float32'>)
  Parameter net6_conv_lstm1_h2h_weight (shape=None, dtype=<class 'numpy.float32'>)
  Parameter net6_conv_lstm1_h2h_bias (shape=None, dtype=<class 'numpy.float32'>)
  Parameter data4 (shape=None, dtype=<class 'numpy.float32'>)
  Parameter net6_batchnorm0_gamma (shape=None, dtype=<class 'numpy.float32'>)
  Parameter net6_batchnorm0_beta (shape=None, dtype=<class 'numpy.float32'>)
  Parameter net6_dense0_weight (shape=None, dtype=<class 'numpy.float32'>)
  Parameter net6_dense0_bias (shape=None, dtype=<class 'numpy.float32'>)
  Parameter net6_dense1_weight (shape=None, dtype=<class 'numpy.float32'>)
  Parameter net6_dense1_bias (shape=None, dtype=<class 'numpy.float32'>)
  Parameter net6_batchnorm0_running_mean (shape=None, dtype=<class 'numpy.float32'>)
  Parameter net6_batchnorm0_running_var (shape=None, dtype=<class 'numpy.float32'>)
)
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-7-786461bdfd93> in <module>
     60 print(deserialized_net.collect_params())
     61 sample2 = mx.nd.random.uniform(shape=(2,1,q,rows,columns))
---> 62 states2 =deserialized_net.encoder.begin_state( batch_size=sample2.shape[0],ctx=ctx)
     63 deserialized_net(sample2,states2)

AttributeError: 'SymbolBlock' object has no attribute 'encoder' ```

This is not an active thread, but I had to drop a note of thanks for taking the time to give this solution. It helped me not go down the rabbit hole (docs) further and come out without solid leads. Thank you.
Curious, how’d you figure out this was the answer for data input nodes? Because the log (assertion error) is least helpful.