C++ API. Linear regression example

Hello,

I’m trying to implement a simple linear regression fit example (similar to python https://gluon.mxnet.io/chapter02_supervised-learning/linear-regression-scratch.html) in C++ using mxnet C++ API.

I’m using single fully connected layer (FullyConnected symbol) with LinearRegressionOutput and SGD optimizer.

The problem I have is that RMSE doesn’t drop below ~0.64 regardless of number of learning iterations applied. The example in python can achieve much lower RMSE values.

Can anyone suggest what might be the problem. The source code is below.

Thanks,
Eugene
//=========================================================
#include <stdlib.h>
#include
#include
#include “mxnet-cpp/MxNetCpp.h”

using namespace std;
using namespace mxnet::cpp;

int main(int argc, char** argv)
{
const int max_epoch = 10;
const float learning_rate = 0.01;
const float momentum = 0.9;
const float weight_decay = 0.;
const int N = 1000;
const int M = 2;
const int batch_size = 5;
Context ctx = Context::cpu(); // Use CPU for training

srand((unsigned)time(NULL));

std::vector<mx_float> x(N*M);
for (int i=0; i<x.size(); ++i) x[i]=((mx_float)rand())/RAND_MAX;
NDArray X = NDArray(x,Shape(N, M),ctx);

// fill Y[i] = X[i,0] + 2X[i,1] + 3
std::vector<mx_float> y(N);
for (int i=0; i<y.size(); ++i)
y[i]=x[i] + 2
x[i+N] + 3;

NDArray labels = NDArray(y,Shape(N),ctx);

//auto net = mlp({1});
auto fc = FullyConnected(Symbol::Variable(“X”), Symbol::Variable(“w0”), Symbol::Variable(“b0”), 1 );
auto net = LinearRegressionOutput(“linreg”, fc, Symbol::Variable(“label”));

std::map<string, NDArray> args;
args[“X”] = NDArray(Shape(batch_size, M), ctx);
args[“label”] = NDArray(Shape(batch_size), ctx);
net.InferArgsMap(ctx, &args, args);

auto initializer = Uniform(0.01);
for (auto& arg : args) {
initializer(arg.first, &arg.second);
}

// optim
Optimizer* opt = OptimizerRegistry::Find(“sgd”);
opt->SetParam(“lr”, learning_rate)
->SetParam(“wd”, weight_decay);

// binding parameters to the model
auto arg_names = net.ListArguments();
for (size_t i = 0; i < arg_names.size(); ++i)
std::cout << arg_names[i] << std::endl;

int num_batches = N/batch_size;

// training
for (int iter = 0; iter < max_epoch; ++iter) {

auto tic = chrono::system_clock::now();
for (int slice = 0; slice < num_batches ; slice ++ ) {

  int from_index = slice*batch_size;
  int to_index   = from_index + batch_size; if (to_index>N) to_index  = N;

  NDArray l = labels.Slice(from_index,to_index);
  args["X"] = X.Slice(from_index,to_index).Copy(ctx);
  args["label"] = l.Copy(ctx);

  {
    auto *exec = net.SimpleBind(ctx, args);
    exec->Forward(true);
    exec->Backward();
    // Update parameters
    for (size_t i = 0; i < arg_names.size(); ++i) {
      if (arg_names[i] == "X" || arg_names[i] == "label") continue;
      opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]);
      //LG << arg_names[i] << "|" << exec->arg_arrays[i] << "|" << exec->grad_arrays[i];
    }
    delete exec;
  }
}

RMSE acc;
{
  // eval
  args["X"] = X.Copy(ctx);
  args["label"] = labels.Copy(ctx);
  auto *exec = net.SimpleBind(ctx, args);
  exec->Forward(false);
  acc.Update(labels, exec->outputs[0]);
  delete exec;
}

auto toc = chrono::system_clock::now();
float duration = chrono::duration_cast<chrono::milliseconds>(toc - tic).count() / 1000.0;
LG << "Epoch: " << iter << " RMSE: " << acc.Get();

}
//LG << exec->outputs[0].Slice(0,batch_size);
//LG << labels.Slice(0,batch_size);

MXNotifyShutdown();

return 0;
}

An update:

I’ve figured out the problem: the initialization of two dimensional array Shape(N,M) from the flat std::vector expects row by row packing. I initialized the values assuming column by column packing, so it wasn’t a linear regression. That’s why it couldn’t achieve a decent
Here is a working example:
#include <stdlib.h>
#include
#include
#include “mxnet-cpp/MxNetCpp.h”

    using namespace std;
    using namespace mxnet::cpp;

    int main(int argc, char** argv) 
    {
      const int max_epoch = 10;
      const float learning_rate = 0.01;
      const float momentum = 0.9;
      const float weight_decay = 0.;
      const int N = 1000;
      const int M = 2;
      const int batch_size = 5;
      Context ctx = Context::cpu();  // Use CPU for training
      
      srand((unsigned)time(NULL));

      std::vector<mx_float> x(N*M);
      for (int i=0; i<x.size(); ++i) x[i]=((mx_float)rand())/RAND_MAX;
      NDArray X =  NDArray(x,Shape(N, M),ctx); 

      // fill Y[i] = X[i,0] + 2*X[i,1] + 3
      std::vector<mx_float> y(N);
      for (int i=0; i<y.size(); ++i) 
        y[i]=x[2*i] + 2*x[2*i+1] + 3;

      NDArray labels =  NDArray(y,Shape(N),ctx); 

      //auto net = mlp({1});
      auto fc = FullyConnected(Symbol::Variable("X"), Symbol::Variable("w0"), Symbol::Variable("b0"), 1 );
      auto net = LinearRegressionOutput("linreg", fc, Symbol::Variable("label"));

      std::map<string, NDArray> args;
      args["X"] = NDArray(Shape(batch_size, M), ctx);
      args["label"] = NDArray(Shape(batch_size), ctx);
      net.InferArgsMap(ctx, &args, args);
      
      auto initializer = Uniform(0.01);
      for (auto& arg : args) {
        initializer(arg.first, &arg.second);
      }

      // optim
      Optimizer* opt = OptimizerRegistry::Find("sgd");
      opt->SetParam("lr", learning_rate)
         ->SetParam("wd", weight_decay);
      
      // binding parameters to the model
      auto arg_names = net.ListArguments();
      for (size_t i = 0; i < arg_names.size(); ++i) 
        std::cout << arg_names[i] << std::endl;

      int num_batches = N/batch_size;
      NDArray w0,b0;

      RMSE acc;
      {
        // eval
        args["X"] = X.Copy(ctx);
        args["label"] = labels.Copy(ctx);
        auto *exec = net.SimpleBind(ctx, args);
        exec->Forward(false);
        acc.Update(labels, exec->outputs[0]);
        delete exec;
        LG << "Epoch: " << "before" << " RMSE: " << acc.Get();
      }

      // training
      for (int iter = 0; iter < max_epoch; ++iter) {

        auto tic = chrono::system_clock::now();
        for (int slice = 0; slice < num_batches ; slice ++ ) {

          int from_index = slice*batch_size;
          int to_index   = from_index + batch_size; if (to_index>N) to_index  = N;

          NDArray l = labels.Slice(from_index,to_index);
          args["X"] = X.Slice(from_index,to_index).Copy(ctx);
          args["label"] = l.Copy(ctx);

          {
            auto *exec = net.SimpleBind(ctx, args);
            exec->Forward(true);
            exec->Backward();
            // Update parameters
            for (size_t i = 0; i < arg_names.size(); ++i) {
              if (arg_names[i] == "X" || arg_names[i] == "label") continue;
              opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]);
              if (arg_names[i] == "w0") w0 = exec->arg_arrays[i];
              if (arg_names[i] == "b0") b0 = exec->arg_arrays[i];
            }
            delete exec;
          }
        }

        RMSE acc;
        {
          // eval
          args["X"] = X.Copy(ctx);
          args["label"] = labels.Copy(ctx);
          auto *exec = net.SimpleBind(ctx, args);
          exec->Forward(false);
          acc.Update(labels, exec->outputs[0]);
          delete exec;
        }

        auto toc = chrono::system_clock::now();
        float duration = chrono::duration_cast<chrono::milliseconds>(toc - tic).count() / 1000.0;
        LG << "Epoch: " << iter << " RMSE: " << acc.Get();
      }

      LG << "w=" << w0;
      LG << "b=" << b0;

      MXNotifyShutdown();

      return 0;
    }

i copy your code but the result show the acc becomes lower and lower
is there any wrong?

Yes, as I mentioned in my second post the problem was in wrong initialization of the data. Now everything works fine and it achieves good rmse in just a few steps.

Thanks,

Eugene