MxNet C++ code does not work properly but Python code does

Hello everyone
I have created and trained a GAN with 3D Convolutions in Python with MxNet Gluon and exported the trained parameters and the network. When I use a python script to load the network and parameters and forward data through the generator it works perfectly fine. However, I have C++ code that is supposed to do the same but for some reason the output from there is pretty much useless. The Generator should output 32x3 floats between 0 and 1 but from the C++ code it only generates zeroes and ones.
My Python code is the following:

filePath = "myPath"
network_dir = "/model/threedgan.Generator/"
mx_context = mx.gpu()

gen_net = mx.gluon.nn.SymbolBlock.imports(network_dir + "model_0_newest-symbol.json", ["data"], ctx = mx_context)
gen_net.load_parameters(network_dir + "model_0_newest-0000.params", ctx=mx_context)

#Create random vector
data_np = np.random.normal(0,1,(1,200))
data=mx.nd.array(data_np, ctx=mx_context)

out = gen_net(data)[0]
out = out.detach().asnumpy()
#Save to npy file + "object", out)

This is the C++ code


#include <mxnet-cpp/MxNetCpp.h>

#include <cassert>
#include <string>
#include <vector>
#include <CNNModelLoader.h>
#include <CNNLAOptimizer_threedgan_connector_translator.h>

using namespace mxnet::cpp;    
class CNNPredictor_threedgan_connector_translator_0{
    const std::string file_prefix = "model/threedgan.Generator/model_0_newest";
    const std::vector<std::string> network_input_keys = {
    const std::vector<std::vector<mx_uint>> network_input_shapes = {{1,200}};
    std::vector<mx_uint> network_input_sizes;
    std::vector<std::vector<std::string>> network_arg_names;
    std::vector<Executor *> network_handles;
    Context ctx = Context::cpu(); //Will be updated later in init according to use_gpu
    int dtype = 0; //use data type (float32=0 float64=1 ...)
    explicit CNNPredictor_threedgan_connector_translator_0(){
        init(file_prefix, network_input_keys, network_input_shapes);

        for(Executor * handle : network_handles){
            delete handle;

    void predict(const std::vector<float> &in_noise_,
                 std::vector<float> &out_data_){

        NDArray input_temp;
        input_temp = NDArray(network_input_shapes[0], ctx, false, dtype);
        input_temp.SyncCopyFromCPU(, network_input_sizes[0]);
        std::cout << "Input "<<"--------\n";
        std::cout << input_temp;
        CheckMXNetError("Forward, predict, handle ind. 0");
        std::vector<NDArray> output = network_handles.back()->outputs;
        std::vector<mx_uint> curr_output_shape;
        size_t curr_output_size; 
        curr_output_shape = output[0].GetShape();
        std::cout << "From GPU "<< << << << << <<"--------\n";
        std::cout << output[0];
        curr_output_size = 1;
        for (mx_uint i : curr_output_shape) curr_output_size *= i;
        //Fix due to a bug in the in how the output arrays are initialized when there are multiple outputs
        assert((curr_output_size == out_data_.size()) || (curr_output_size == out_data_[0]));
    Executor* initExecutor(Symbol &sym,
                           std::map<std::string, NDArray> &param_map,
                           const std::vector<std::string> &exec_input_keys,
                           const std::vector<std::vector<mx_uint>> &exec_input_shapes){

        const mx_uint num_exec_input_nodes = exec_input_keys.size();
        for(mx_uint i = 0; i < num_exec_input_nodes; i++){
            param_map[exec_input_keys[i]] = NDArray(exec_input_shapes[i], ctx, false, dtype);

        std::vector<NDArray> param_arrays;
        std::vector<NDArray> grad_array;
        std::vector<OpReqType> grad_reqs;
        std::vector<NDArray> aux_arrays;
        std::map< std::string, NDArray> aux_map;

        sym.InferExecutorArrays(ctx, &param_arrays, &grad_array, &grad_reqs,
                                    &aux_arrays, param_map, std::map<std::string, NDArray>(),
                                    std::map<std::string, OpReqType>(), aux_map);

        Executor *handle = new Executor(sym, ctx, param_arrays, grad_array, grad_reqs, aux_arrays);
        return handle;

    std::vector<mx_uint> getSizesOfShapes(const std::vector<std::vector<mx_uint>> shapes){
        std::vector<mx_uint> sizes;
        for(std::vector<mx_uint> shape : shapes){
            mx_uint val = 1;
            for(mx_uint i: shape){
                val *= i;
        return sizes;

    void CheckMXNetError(std::string loc){
        const char* err = MXGetLastError();
        if (err && err[0] != 0) {
            std::cout << "MXNet error at " << loc << err << std::endl;
    void init(const std::string &file_prefix,
              const std::vector<std::string> &network_input_keys,
              const std::vector<std::vector<mx_uint>> &network_input_shapes){

        CNNLAOptimizer_threedgan_connector_translator optimizer_creator = CNNLAOptimizer_threedgan_connector_translator();
        if(optimizer_creator.getContextName() == "gpu"){
            ctx = Context::gpu();
        network_input_sizes = getSizesOfShapes(network_input_shapes);

        ModelLoader model_loader(file_prefix, 0, ctx);
        std::vector<Symbol> network_symbols = model_loader.GetNetworkSymbols();
        std::vector<std::map<std::string, NDArray>> network_param_maps;
        network_param_maps = model_loader.GetNetworkParamMaps();
        //Init handles
        std::map<std::string, std::vector<mx_uint>> in_shape_map;
        for(mx_uint i=0; i < network_input_keys.size(); i++){
            in_shape_map[network_input_keys[i]] = network_input_shapes[i];
        std::vector<std::vector<mx_uint>> in_shapes;
        std::vector<std::vector<mx_uint>> aux_shapes;
        std::vector<std::vector<mx_uint>> out_shapes;
        network_symbols[0].InferShape(in_shape_map, &in_shapes, &aux_shapes, &out_shapes);
        network_handles.push_back(initExecutor(network_symbols[0], network_param_maps[0], network_input_keys, network_input_shapes));

And the model loader:


#include <mxnet-cpp/MxNetCpp.h>

#include <stdio.h>
#include <iostream>
#include <fstream>

using namespace mxnet::cpp;

// Read files to load moddel symbol and parameters
class ModelLoader {
    Context ctx = Context::cpu();
    std::vector<Symbol> network_symbol_list;
    std::vector<std::map<std::string, NDArray>> network_param_map_list;

    std::vector<Symbol> query_symbol_list;
    std::vector<std::map<std::string, NDArray>> query_param_map_list;

    std::vector<std::map<std::string, NDArray>> replay_memory;

    std::vector<Symbol> loss_symbol;
    std::vector<std::map<std::string, NDArray>> loss_param_map;

    void checkFile(std::string file_path){
        std::ifstream ifs(file_path.c_str(), std::ios::in | std::ios::binary);
        if (!ifs) {
            std::cerr << "Can't open the file. Please check " << file_path << ". \n";

        int length_;
        ifs.seekg(0, std::ios::end);
        length_ = ifs.tellg();
        ifs.seekg(0, std::ios::beg);
        std::cout << file_path.c_str() << " ... "<< length_ << " bytes\n";

    void loadComponent(std::string json_path,
                       std::string param_path,
                       std::vector<Symbol> &symbols_list,
                       std::vector<std::map<std::string, NDArray>> &param_map_list){
        std::map<std::string, NDArray> params;
        NDArray::Load(param_path, 0, &params);

    std::map<std::string, NDArray> processParamMap(std::map<std::string, NDArray> param_map){
        std::map<std::string, NDArray> processed_param_map;
            for (const auto &pair : param_map) {
                std::string name = pair.first.substr(4); //the first four letters would be the type (arg: or aux:, but we don't have aux parameters? <- need to make sure)
                processed_param_map[name] = pair.second.Copy(ctx);
        return processed_param_map;

    explicit ModelLoader(std::string file_prefix, mx_uint num_subnets, Context ctx_param){

        ctx = ctx_param;
        std::string network_json_path;
        std::string network_param_path;
        std::string query_json_path;
        std::string query_param_path;
        std::string memory_path;
        std::string loss_json_path;
        std::string loss_param_path;

        //Load network
            network_json_path = file_prefix + "-symbol.json";
            network_param_path = file_prefix + "-0000.params";
            loadComponent(network_json_path, network_param_path, network_symbol_list, network_param_map_list);
            for(int i=0; i < num_subnets; i++){
                network_json_path = file_prefix + "_episodic_sub_net_" + std::to_string(i) + "-symbol.json";
                network_param_path = file_prefix + "_episodic_sub_net_" + std::to_string(i) + "-0000.params";
                loadComponent(network_json_path, network_param_path, network_symbol_list, network_param_map_list);
                if(i >= 1){
                    query_json_path = file_prefix + "_episodic_query_net_" + std::to_string(i) + "-symbol.json";
                    query_param_path = file_prefix + "_episodic_query_net_" + std::to_string(i) + "-0000.params";
                    loadComponent(query_json_path, query_param_path, query_symbol_list, query_param_map_list);
                    memory_path = file_prefix + "_episodic_memory_sub_net_" + std::to_string(i) + "-0000";

                    std::map<std::string, NDArray> mem_map = NDArray::LoadToMap(memory_path);
                    for(auto &mem : mem_map){
                        mem.second = mem.second.Copy(ctx);

        //Load Loss
        //loss_json_path = file_prefix + "_loss-symbol.json";
        //loss_param_path = file_prefix + "_loss-0000.params";
        //loadComponent(loss_json_path, loss_param_path, loss_symbol, loss_param_map);


    std::vector<Symbol> GetNetworkSymbols() {
        return network_symbol_list;

    std::vector<std::map<std::string, NDArray>> GetNetworkParamMaps() {
        return network_param_map_list;

    Symbol GetLoss() {
        return loss_symbol[0];

    std::map<std::string, NDArray> GetLossParamMap() {
        return loss_param_map[0];

    std::vector<Symbol> GetQuerySymbols() {
        return query_symbol_list;

    std::vector<std::map<std::string, NDArray>>  GetQueryParamMaps() {
        return query_param_map_list;

    std::vector<std::map<std::string, NDArray>> GetReplayMemory(){
        return replay_memory;

Any help is greatly appreciated thank you