RNN example needed

Is there any example on how to use the RNN layer with a simple sinus or similar as input?
I can only find example with language or words but no really simple mathematical function.
For some reason my network only predicts linear values and i all i need is a reference implementation so that i can compare my code.

I haven’t found any example of predicting sinus wave with MXNet, so I created something for you based on Keras example

The code is below. Creating of data is taken directly from the example and I use LSTM instead of vanilla RNN. I also use GPU, but feel free to change it to CPU if you need:

import math
import random
import numpy as np
import pandas as pd

import mxnet as mx
from mxnet import autograd
from mxnet.gluon import Trainer
from mxnet.gluon.data import DataLoader, ArrayDataset
from mxnet.gluon.nn import HybridSequential, Dense
from mxnet.gluon.rnn import LSTM
from mxnet.initializer import Xavier
from mxnet.metric import RMSE


def noisy_sin(steps_per_cycle=50, number_of_cycles=500, random_factor=0.4):
    random_factor    : amount of noise in sign wave. 0 = no noise
    number_of_cycles : The number of steps required for one cycle

    Return :
    pd.DataFrame() with column sin_t containing the generated sin wave
    df = pd.DataFrame(np.arange(steps_per_cycle * number_of_cycles + 1), columns=["t"])
    df["sin_t"] = df.t.apply(lambda x: math.sin(
        x * (2 * math.pi / steps_per_cycle) + random.uniform(-1.0, +1.0) * random_factor))
    df["sin_t_clean"] = df.t.apply(lambda x: math.sin(x * (2 * math.pi / steps_per_cycle)))
    print("create period-{} sin wave with {} cycles".format(steps_per_cycle, number_of_cycles))
    print("In total, the sin wave time series length is {}".format(
        steps_per_cycle * number_of_cycles + 1))
    return df

steps_per_cycle = 10
df = noisy_sin(steps_per_cycle=steps_per_cycle, random_factor=0.2)

n_plot = 8
df[["sin_t"]].head(steps_per_cycle * n_plot).plot(
    title="Generated first {} cycles".format(n_plot),
    figsize=(15, 3))

def _load_data(data, n_prev=100):
    data should be pd.DataFrame()

    docX, docY = [], []
    for i in range(len(data) - n_prev):
        docX.append(data.iloc[i:i + n_prev].as_matrix())
        docY.append(data.iloc[i + n_prev].as_matrix())
    alsX = np.array(docX)
    alsY = np.array(docY)

    return alsX, alsY

length_of_sequences = 50
test_size = 0.4

ntr = int(len(df) * (1 - test_size))
df_train = df[["sin_t"]].iloc[:ntr]
df_test = df[["sin_t"]].iloc[ntr:]
(X_train, y_train) = _load_data(df_train, n_prev=length_of_sequences)
(X_test, y_test) = _load_data(df_test, n_prev=length_of_sequences)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

train_dataset = ArrayDataset(X_train.astype('float32'), y_train.astype('float32'))
train_dataloader = DataLoader(train_dataset, batch_size=100, shuffle=True)

test_dataset = ArrayDataset(X_test.astype('float32'), y_test.astype('float32'))
test_dataloader = DataLoader(test_dataset, batch_size=100, shuffle=False)

def define_model():
    model = HybridSequential()
    model.add(LSTM(hidden_size=100, layout='NTC'))
    return model

# change to GPU if you have one
ctx = mx.gpu() #mx.cpu()

net = define_model()
net.initialize(Xavier(magnitude=2.24), ctx=ctx)
# net.hybridize(static_alloc=True)
trainer = Trainer(params=net.collect_params(), optimizer='adam',
                  optimizer_params={"learning_rate": 0.001})
loss_fn = mx.gluon.loss.L2Loss()
train_rmse = RMSE()
test_rmse = RMSE()

# train
for e in range(10):
    for i, (x, y) in enumerate(train_dataloader):
        with autograd.record():
            # move to GPU if needed
            x = x.as_in_context(ctx)
            y = y.as_in_context(ctx)

            output = net(x)
            loss = loss_fn(output, y)

        train_rmse.update(y, output)

    for i, (x, y) in enumerate(test_dataloader):
        # move to GPU if needed
        x = x.as_in_context(ctx)
        y = y.as_in_context(ctx)

        output = net(x)
        test_rmse.update(y, output)

    print("Epoch {}: train loss: {:.5f}, train RMSE {:.3f}, test RMSE {:.3f}".format(
        e, loss.mean().asscalar(), float(train_rmse.get()[1]), float(test_rmse.get()[1])))

If I run it on my machine I get the following output:

Epoch 0: train loss: 0.00809, train RMSE 0.261, test RMSE 0.115
Epoch 1: train loss: 0.00301, train RMSE 0.178, test RMSE 0.099
Epoch 2: train loss: 0.00324, train RMSE 0.146, test RMSE 0.094
Epoch 3: train loss: 0.00260, train RMSE 0.131, test RMSE 0.091
Epoch 4: train loss: 0.00248, train RMSE 0.121, test RMSE 0.089
Epoch 5: train loss: 0.00291, train RMSE 0.115, test RMSE 0.088
Epoch 6: train loss: 0.00391, train RMSE 0.110, test RMSE 0.087
Epoch 7: train loss: 0.00377, train RMSE 0.107, test RMSE 0.087
Epoch 8: train loss: 0.00334, train RMSE 0.104, test RMSE 0.086
Epoch 9: train loss: 0.00329, train RMSE 0.102, test RMSE 0.086

I am not sure why test RMSE is lower than train RMSE, but play with that example to see if it matches your needs.

P.S. if you are thinking of doing time series forecast (especially multivariate one), then I recommend to look into LSTNet: https://github.com/safrooze/LSTNet-Gluon


Thank you! You are my hero.