I’m wondering how to use mx.nd operations in the forward pass of Gluon to implement cross product style architectures (e.g. bilinear models) and that need to look at pairs of vectors
I was trying to implement a Relational Network: https://arxiv.org/abs/1706.01427
Relevant implementation in PyTorch: https://github.com/kimhc6028/relational-networks/blob/170327761521160af99f6269333a29536335a58b/model.py#L83
My first attempt at an implementation is below - but I’m new to mxnet so I’m probably making a million mistakes - it would be useful if someone could guide me along with how to implement this.
class RecurrentRelational(gluon.Block):
def __init__(self, dim=100, num_layers=1, layout='NTC',
**kwargs):
super(RecurrentRelational, self).__init__(**kwargs)
self.key = 'recurrent-relational'
with self.name_scope():
# layers created in name_scope will inherit name space
# from parent layer.
#self.dropout = nn.Dropout(0.3)
self.hidden_size = dim
self.num_layers = num_layers
self.layout = layout
self.bn = nn.BatchNorm()
# Recurrent Encoder
self.rnn = rnn.RNN(self.hidden_size, self.num_layers,
layout=self.layout, bidirectional=True)
# Relational Network
self.g_hidden = 100
self.relational_g1 = nn.Dense(self.g_hidden, activation='relu')
self.relational_g2 = nn.Dense(self.g_hidden, activation='relu')
self.relational_f = nn.Dense(100, activation='relu')
# End RN
self.binary = nn.Dense(2)
def activate_relation(self, relation_vector):
g_z = self.relational_g1(relation_vector)
g_z = self.bn(g_z)
g_z = self.relational_g2(g_z)
return g_z
def activate_aggregation(self, aggregation):
return self.relational_f(self.bn(aggregation))
def forward(self, (x1, x2)):
z1 = self.rnn(x1)
z2 = self.rnn(x2)
batch_size, seq_len, hidden_dim = z1.shape
num_objects = z1.shape[1]
all_object_pairs = []
for i in range(num_objects):
first_object = z1[:, i, :]
for j in range(num_objects):
second_object = z2[:, j, :]
relation_vector = mx.nd.concat(first_object, second_object, dim=1)
all_object_pairs.append(relation_vector)
all_relations = mx.nd.concat(*all_object_pairs, dim=0)
z_rel = self.activate_relation(all_relations).reshape((-1, num_objects * num_objects,
self.g_hidden))
z_agg = mx.nd.sum(z_rel, axis=1)
return self.binary(self.activate_aggregation(z_agg))
The error I’m getting is
libc++abi.dylib: terminating with uncaught exception of type dmlc::Error: [16:57:57] src/engine/./threaded_engine.h:347: [16:57:57] src/operator/tensor/./matrix_op-inl.h:964: CropAssign only supports kWriteTo
Is there a different way to implement this that may avoid this issue?