Hi, I’m trying to using following code to predict with a resnet50:
ctxs = [mx.gpu(0),mx.gpu(1)]
def test(net, test_data_loader, threshold, ctxs):
predicted = [ ]for i, data in enumerate(test_data_loader): data = gluon.utils.split_and_load(data, ctx_list=ctxs, batch_axis=0, even_split=False) outputs = [net(X) for X in data] for output in outputs: output = output.as_in_context(ctxs[0]) outputs = nd.concat(*outputs,dim=0) score_predicts = nd.sigmoid(outputs) label_predicts = [np.arange(28) [ np.argwhere( (score_predict).asnumpy() > threshold ) ] \ for score_predict in score_predicts] str_predict_labels = [' '.join(str(np.asscalar(l)) for l in label_predict) for label_predict in label_predicts] predicted.extend(str_predict_labels)
But this code only works occasionally. At most time it come back with following error message:
---------------------------------------------------------------------------
MXNetError Traceback (most recent call last)
<ipython-input-31-18b005eb63dd> in <module>()
5 test_dataset = ProteinDataset(root=TEST,size=size,transform=joint_transform,istrain=False)
6 test_data_loader = mx.gluon.data.DataLoader(test_dataset,batch_size=batch_size,shuffle=False,num_workers=num_workers)
----> 7 test(finetune_net, test_dataset,test_data_loader, threshold, ctxs,file_name)
<ipython-input-30-9ddd755ca136> in test(net, test_dataset, test_data_loader, threshold, ctxs, file_name)
14 score_predicts = nd.sigmoid(outputs)
15
---> 16 label_predicts = [np.arange(28) [ np.argwhere( (score_predict).asnumpy() > threshold ) ] for score_predict in score_predicts]
17 str_predict_labels = [' '.join(str(np.asscalar(l)) for l in label_predict) for label_predict in label_predicts]
18 predicted.extend(str_predict_labels)
<ipython-input-30-9ddd755ca136> in <listcomp>(.0)
14 score_predicts = nd.sigmoid(outputs)
15
---> 16 label_predicts = [np.arange(28) [ np.argwhere( (score_predict).asnumpy() > threshold ) ] for score_predict in score_predicts]
17 str_predict_labels = [' '.join(str(np.asscalar(l)) for l in label_predict) for label_predict in label_predicts]
18 predicted.extend(str_predict_labels)
~/anaconda3/lib/python3.7/site-packages/mxnet/ndarray/ndarray.py in asnumpy(self)
1970 self.handle,
1971 data.ctypes.data_as(ctypes.c_void_p),
-> 1972 ctypes.c_size_t(data.size)))
1973 return data
1974
~/anaconda3/lib/python3.7/site-packages/mxnet/base.py in check_call(ret)
250 """
251 if ret != 0:
--> 252 raise MXNetError(py_str(_LIB.MXGetLastError()))
253
254
MXNetError: [19:33:11] src/operator/nn/./cudnn/cudnn_activation-inl.h:129: Check failed: e == CUDNN_STATUS_SUCCESS (8 vs. 0) cuDNN: CUDNN_STATUS_EXECUTION_FAILED
Stack trace returned 10 entries:
[bt] (0) /home/jumpywizard/anaconda3/lib/python3.7/site-packages/mxnet/libmxnet.so(+0x3617ba) [0x7f125e1d97ba]
[bt] (1) /home/jumpywizard/anaconda3/lib/python3.7/site-packages/mxnet/libmxnet.so(+0x361dd1) [0x7f125e1d9dd1]
[bt] (2) /home/jumpywizard/anaconda3/lib/python3.7/site-packages/mxnet/libmxnet.so(+0x336b6c5) [0x7f12611e36c5]
[bt] (3) /home/jumpywizard/anaconda3/lib/python3.7/site-packages/mxnet/libmxnet.so(+0x336de58) [0x7f12611e5e58]
[bt] (4) /home/jumpywizard/anaconda3/lib/python3.7/site-packages/mxnet/libmxnet.so(+0x2aead6a) [0x7f1260962d6a]
[bt] (5) /home/jumpywizard/anaconda3/lib/python3.7/site-packages/mxnet/libmxnet.so(+0x2a4ba67) [0x7f12608c3a67]
[bt] (6) /home/jumpywizard/anaconda3/lib/python3.7/site-packages/mxnet/libmxnet.so(+0x2a4ba67) [0x7f12608c3a67]
[bt] (7) /home/jumpywizard/anaconda3/lib/python3.7/site-packages/mxnet/libmxnet.so(+0x2a4ba67) [0x7f12608c3a67]
[bt] (8) /home/jumpywizard/anaconda3/lib/python3.7/site-packages/mxnet/libmxnet.so(+0x2a4ba67) [0x7f12608c3a67]
[bt] (9) /home/jumpywizard/anaconda3/lib/python3.7/site-packages/mxnet/libmxnet.so(+0x2a4ba67) [0x7f12608c3a67]
Where is the problem? Thanks!
By the way, I’m using UBUNTU 18.04, CUDA 9.2, CUDNN 7.1.4 on GTX 1080Ti and MXNet 1.3.0.