I have the following actor-critic model
class actor(gluon.HybridBlock):
def __init__(self, r, **kwargs):
super(actor, self).__init__(**kwargs)
with self.name_scope():
obvSize = 2*r + 1
nOutputs = 2
self.ac1 = gluon.nn.Dense(4, activation = 'relu')
self.ac3 = gluon.nn.Dense(nOutputs)
def hybrid_forward(self, f, x):
ac2_i = self.ac1(x)
#ac3_i = self.ac2(ac2_i)
action = f.softmax(self.ac3(ac2_i))
return action
class critic(gluon.HybridBlock):
def __init__(self, r, **kwargs):
super(critic, self).__init__(**kwargs)
with self.name_scope():
obvSize = 2*r + 1
nActions = 1
self.cr1 = gluon.nn.Dense(4, activation = 'relu')
self.cr3 = gluon.nn.Dense(1)
def hybrid_forward(self, f, x):
return self.cr3(self.cr1(x))
And I’m training the reinforcement learning agent over 5000episodes, and some of the weights of the actor becomes NA after 2000Episodes. (learning rate = 0.001)
What can be the reason for that?