Hi,
I’m new in the topic of neural network and even newer in recurrent network.
I tried tried to implement a very basic recurrent network, just to see if it can learn a noisy sinus.
Input data:
A, T, epsilon, l =10, 10, 1, 3000
data = Anp.sin(2np.pi*np.arange(l)/T) + np.random.normal(0, epsilon, l)
train_features = data[:2000]
Recurrent network model:
class RnnTest(nn.HybridBlock):
def __init__(self, size_hidden, **kwargs):
super().__init__(**kwargs)
with self.name_scope():
self.size_hidden = size_hidden
self.rnn = rnn.RNN(self.size_hidden)
self.dense = nn.Dense(1)
def hybrid_forward(self, F, x, state):
out, state = self.rnn(x, state)
return self.dense(out.reshape(-1, self.size_hidden)), state
def begin_state(self, *args, **kwargs):
return self.rnn.begin_state(*args, **kwargs)
I used the L2Loss from gluon for the training:
size_hidden=16
num_epochs, lr, clip, batch_size, num_steps = 500, 0.01, 10, 32, 10
model = RnnTest(size_hidden)
model.initialize(init.Normal(sigma=0.1), force_reinit=True)
model.hybridize()
adam_optimizer = mx.optimizer.Adam(clip_gradient=clip, learning_rate=lr, wd=0)
trainer = gluon.Trainer(model.collect_params(), optimizer=adam_optimizer)
From that, I iterate over the data (the complete code is just below).
But in the end, the loss oscillates around a constant value, corresponding to the mean of the data (or zero, it is the same number as I used sinusoidal data).
I spent a lot of time playing with the hyperparameters, the optimizer (I first used sgd).
Here the complete code (I used some convenient functions described in the book d2l.ai):
import mxnet as mx
from mxnet import gluon, init, np, npx, autograd
npx.set_np()
from mxnet.gluon import nn, rnn
from d2l import mxnet as d2l
import random
A, T, epsilon, l =10, 10, 1, 3000
data = A*np.sin(2*np.pi*np.arange(l)/T) + np.random.normal(0, epsilon, l)
train_features = data[:2000]
def create_random_iter(data, batch_size, num_steps):
offset = random.randint(0, num_steps)
data = data[offset:]
num_examples = ((len(data) - 1) // num_steps)
example_indices = list(range(0, num_examples * num_steps, num_steps))
random.shuffle(example_indices)
num_batches = num_examples // batch_size
for i in range(0, batch_size * num_batches, batch_size):
batch_indices = example_indices[i:(i+batch_size)]
X = np.stack([data[j: j+num_steps] for j in batch_indices]).T
Y = np.stack([data[j+1: j+1+num_steps] for j in batch_indices]).T
yield X.reshape(X.shape[0], batch_size, 1), Y.reshape(Y.shape[0], batch_size, 1)
class RnnTest(nn.HybridBlock):
def __init__(self, size_hidden, **kwargs):
super().__init__(**kwargs)
with self.name_scope():
self.size_hidden = size_hidden
self.rnn = rnn.RNN(self.size_hidden)
self.dense = nn.Dense(1)
def hybrid_forward(self, F, x, state):
out, state = self.rnn(x, state)
return self.dense(out.reshape(-1, self.size_hidden)), state
def begin_state(self, *args, **kwargs):
return self.rnn.begin_state(*args, **kwargs)
def train_epoch(model, train_features, loss, trainer, batch_size=8, num_steps=30, use_random_iter=True):
train_iter = create_random_iter(data, batch_size=batch_size, num_steps=num_steps)
metric = d2l.Accumulator(2)
for X, Y in train_iter:
state = model.begin_state(batch_size=batch_size)
y = Y.T.reshape(-1, 1)
with autograd.record():
pred, state = model(X, state)
l = loss(pred, y)
l.backward()
trainer.step(batch_size=batch_size)
metric.add(float(l.mean()) * Y.shape[0], Y.shape[0])
return metric[0]/metric[1]
def train(model, train_features, loss, trainer, batch_size=4, num_steps=10):
animator = d2l.Animator(xlabel='epoch', ylabel='loss', legend=['train'], xlim=[1, num_epochs])
for i in range(num_epochs):
err=train_epoch(model, train_features, loss, trainer, batch_size=batch_size, num_steps=num_steps)
if i % 10 == 0:
print(err)
animator.add(i+1, [err])
print('final loss:', err)
size_hidden=16
num_epochs, lr, clip, batch_size, num_steps = 100, 0.01, 10, 32, 10
model = RnnTest(size_hidden)
model.initialize(init.Normal(sigma=0.1), force_reinit=True)
model.hybridize()
adam_optimizer = mx.optimizer.Adam(clip_gradient=clip, learning_rate=lr, wd=0)
trainer = gluon.Trainer(model.collect_params(), optimizer=adam_optimizer)
loss = gluon.loss.L2Loss()
train(model, train_features, loss, trainer, batch_size=batch_size, num_steps=num_steps)