Hey everyone, I’m currently trying to implement a paper in mxnet, its idea is to segment vessels in the retina (paper) and was made using PyTorch. I found an implementation of the base model that the paper used and tried to change the code to suit the paper. However, I am Running into the following error in the update function of EvalMetric:
<ipython-input-20-f3c422a173c1> in <module>()
240 trainer.step(batch_size)
241 for m in metrics:
--> 242 m.update(labels=llist[0], preds=preds)
243
244
<ipython-input-20-f3c422a173c1> in update(self, labels, preds)
172
173 for l ,p in zip(labels, preds):
--> 174 lnp = l.asnumpy()
175 pnp = p.asnumpy()
176 pl = np.argmax(pnp, axis=0)
/usr/local/lib/python3.6/dist-packages/mxnet/ndarray/ndarray.py in asnumpy(self)
1978 self.handle,
1979 data.ctypes.data_as(ctypes.c_void_p),
--> 1980 ctypes.c_size_t(data.size)))
1981 return data
1982
/usr/local/lib/python3.6/dist-packages/mxnet/base.py in check_call(ret)
250 """
251 if ret != 0:
--> 252 raise MXNetError(py_str(_LIB.MXGetLastError()))
253
254
MXNetError: [02:52:48] src/operator/./crop-inl.h:126: Check failed: data_shape[2] >= out_shape[2] (56 vs. 65) data_shape'height should be larger than that of out_shape
From the the call stack, I saw that it was a problem with a check_call in asnumpy so I commented it out, but I got another error:
<ipython-input-13-f3c422a173c1> in <module>()
240 trainer.step(batch_size)
241 for m in metrics:
--> 242 m.update(labels=llist[0], preds=preds)
243
244
<ipython-input-13-f3c422a173c1> in update(self, labels, preds)
172
173 for l ,p in zip(labels, preds):
--> 174 lnp = l.asnumpy()
175 pnp = p.asnumpy()
176 pl = np.argmax(pnp, axis=0)
/usr/local/lib/python3.6/dist-packages/mxnet/ndarray/ndarray.py in asnumpy(self)
1978 # self.handle,
1979 # data.ctypes.data_as(ctypes.c_void_p),
--> 1980 # ctypes.c_size_t(data.size)))
1981 return data
1982
/usr/local/lib/python3.6/dist-packages/mxnet/base.py in check_call(ret)
250 """
251 if ret != 0:
--> 252 raise MXNetError(py_str(_LIB.MXGetLastError()))
253
254
MXNetError: [02:15:09] src/nnvm/legacy_op_util.cc:134: Check failed: fwd_init_
What’s weird is that when I comment the check_call out on my laptop, it works. But when I do it on Google Colab I get the above error. Here is the full code (very messy I know):
import os
import mxnet as mx
import mxnet.ndarray as nd
import mxnet.gluon as gluon
import mxnet.gluon.nn as nn
import mxnet.autograd as ag
from mxnet.gluon.data import Dataset, DataLoader
from mxnet.gluon.loss import Loss, _apply_weighting, _reshape_like
from mxnet import image
import matplotlib.pyplot as plt
import numpy as np
import numpy.random as random
import cv2
import time
class MyDataSet(Dataset):
def __init__(self, root, split, transform=None, use_mask=True):
self.root = os.path.join(root, split)
self.transform = transform
self.img_paths = []
self.mask_paths = []
self.lbl_paths = []
self._img = os.path.join(root, split, 'image', '{}.png')
self._use_mask = use_mask
if self._use_mask:
self._mask = os.path.join(root, split, 'mask', '{}.png')
self._lbl = os.path.join(root, split, 'label', '{}.png')
for fn in os.listdir(os.path.join(root, split, 'image')):
if len(fn) > 3 and fn[-4:] == '.png':
self.img_paths.append(fn[:-4])
for fn in os.listdir(os.path.join(root, split, 'mask')):
if len(fn) > 3 and fn[-4:] == '.png':
self.mask_paths.append(fn[:-4])
for fn in os.listdir(os.path.join(root, split, 'label')):
if len(fn) > 3 and fn[-4:] == '.png':
self.lbl_paths.append(fn[:-4])
def __len__(self):
return len(self.img_paths)
def __getitem__(self, idx):
img_path = self._img.format(self.img_paths[idx])
if self._use_mask:
mask_path = self._mask.format(self.mask_paths[idx])
lbl_path = self._lbl.format(self.lbl_paths[idx])
img = cv2.imread(img_path)
img_g = img[:,:,1]
clahe = cv2.createCLAHE()
img_gc = clahe.apply(img_g)
lbl = cv2.imread(lbl_path, cv2.IMREAD_GRAYSCALE)
all_count = np.prod(lbl.shape)
fg_count = np.count_nonzero(lbl)
bg_count = all_count - fg_count
alpha = 1. / fg_count
beta = 1. / bg_count
alpha = alpha / (alpha + beta)
beta = beta / (alpha + beta)
if self._use_mask:
mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
lbl = np.bitwise_and(mask, lbl)
img_gc = np.bitwise_and(mask, img_gc)
lbl = lbl / 255
img_gc = img_gc / 255
img_gc = img_gc[np.newaxis,...]
img_gc = mx.nd.array(img_gc)
lbl = mx.nd.array(lbl)
weight = lbl * alpha + (1 - lbl) * beta
return img_gc, lbl, weight
my_train = MyDataSet('/content/drive/My Drive/DRIVE', 'train')
my_valid = MyDataSet('/content/drive/My Drive/DRIVE', 'valid')
train_loader = DataLoader(my_train, batch_size=4, shuffle=True, last_batch='rollover')
valid_loader = DataLoader(my_valid, batch_size=4, shuffle=True, last_batch='rollover')
ctx = [mx.gpu(0)]
net = UNet()
net.hybridize()
net.collect_params().initialize(ctx=ctx)
num_epochs = 1
num_steps = len(my_train) // 8
trainer = gluon.Trainer(net.collect_params(), 'adam', {
'learning_rate': 0.001,
'wd': 0.0005,
'lr_scheduler': mx.lr_scheduler.PolyScheduler(num_steps * num_epochs, 0.001, 2, 0.00001)
})
criterion = gluon.loss.SoftmaxCELoss(axis=1)
class SegMetric(mx.metric.EvalMetric):
"""CalculSegMetricate metrics for Seg training """
def __init__(self, eps=1e-8, use_mask=False):
super(SegMetric, self).__init__('Seg')
self.eps = eps
self.num = 2
self.ac = 0
self.ce = 0
self.name = ['Accuracy_background','Accuracy_foreground']
self.use_mask = use_mask
self.reset()
def reset(self):
"""
override reset behavior
"""
if getattr(self, 'num', None) is None:
self.num_inst = 0
self.sum_metric = 0.0
else:
self.num_inst = [0] * self.num
self.sum_metric = [0.0] * self.num
def update(self, labels, preds):
"""
Implementation of updating metrics
"""
# get generated multi label from network
for l ,p in zip(labels, preds):
lnp = l.asnumpy()
pnp = p.asnumpy()
pl = np.argmax(pnp, axis=0)
if self.use_mask:
m = lnp != 255
m255 = 255 - m*255
pl = np.bitwise_or(pl, m255)
bg_gt = lnp==0
fg_gt = lnp==1
bg = bg_gt * (pl == 0) #np.bitwise_and(bg_gt, pl==0)
fg = fg_gt * (pl == 1) #np.bitwise_and(fg_gt, pl==1)
self.sum_metric[0] += bg.sum()
self.sum_metric[1] += fg.sum()
# print(fg.sum())
self.num_inst[0] += bg_gt.sum()
self.num_inst[1] += fg_gt.sum()
def get(self):
if self.num is None:
if self.num_inst == 0:
return (self.name, float('nan'))
else:
return (self.name, self.sum_metric / float(self.num_inst))
else:
names = ['%s'%(self.name[i]) for i in range(self.num)]
for x, y in zip(self.sum_metric, self.num_inst):
if y != 0:
values = x / y
else:
values = float('nan')
return (names, values)
metrics = [SegMetric(use_mask=True)]
for epoch in range(num_epochs):
t0 = time.time()
total_loss = 0
for m in metrics:
m.reset()
for data, label, weight in train_loader:
batch_size = data.shape[0]
dlist = gluon.utils.split_and_load(data, ctx)
llist = gluon.utils.split_and_load(label, ctx)
wlist = gluon.utils.split_and_load(weight, ctx)
with ag.record():
preds = net(dlist[0])
losses = []
chan, chan2, preds_y, preds_x = preds.shape
chan, label_y, label_x = label.shape
xoff = (label_x - preds_x) // 2
yoff = (label_y - preds_y) // 2
llist[0] = llist[0][:,yoff:-yoff,xoff:-xoff]
wlist[0] = wlist[0][:,yoff:-yoff,xoff:-xoff]
for i in range(len(preds)):
l = criterion(preds, llist[0], wlist[0])
losses.append(l)
ag.backward(losses)
total_loss += sum([l.sum() for l in losses])
trainer.step(batch_size)
for m in metrics:
m.update(labels=llist[0], preds=preds)
for m in metrics:
name, value = m.get()
t1 = time.time()
print(epoch, t1-t0, total_loss, name, value)
And here is the model:
import mxnet as mx
import mxnet.gluon.nn as nn
def ConvBlock(channels):
out = nn.HybridSequential()
out.add(
nn.Conv2D(channels, 3),
nn.BatchNorm(),
nn.Activation('relu'),
nn.Conv2D(channels, 3),
nn.BatchNorm(),
nn.Activation('relu')
)
return out
class up_block(nn.HybridBlock):
def __init__(self, channels, **kwargs):
super(up_block, self).__init__(**kwargs)
self.upsampler = nn.Conv2DTranspose(channels=channels // 2, kernel_size=2, strides=2, weight_initializer=mx.init.Bilinear())
self.upsampler.collect_params().setattr('gred_req', 'null')
self.conv1 = ConvBlock(channels)
def hybrid_forward(self, F, x, s):
x = self.upsampler(x)
x = F.Crop(*[x,s], center_crop=True)
x = F.concat(s,x, dim=1)
out = self.conv1(x)
return out
class UNet(nn.HybridBlock):
def __init__(self, first_channels=64, **kwargs):
super(UNet, self).__init__(**kwargs)
with self.name_scope():
self.d0 = ConvBlock(first_channels)
self.d1 = nn.HybridSequential()
self.d1.add(nn.MaxPool2D(2,2), ConvBlock(first_channels*2))
self.d2 = nn.HybridSequential()
self.d2.add(nn.MaxPool2D(2,2), ConvBlock(first_channels*2**2))
self.d3 = nn.HybridSequential()
self.d3.add(nn.MaxPool2D(2,2), ConvBlock(first_channels*2**3))
self.d4 = nn.HybridSequential()
self.d4.add(nn.MaxPool2D(2,2), ConvBlock(first_channels*2**4))
self.u3 = up_block(first_channels*2**3)
self.u2 = up_block(first_channels*2**2)
self.u1 = up_block(first_channels*2)
self.u0 = up_block(first_channels)
self.conv = nn.Conv2D(2,1)
def hybrid_forward(self, F, x):
x0 = self.d0(x)
x1 = self.d1(x0)
x2 = self.d2(x1)
x3 = self.d3(x2)
x4 = self.d4(x3)
y3 = self.u3(x4,x3)
y2 = self.u2(y3,x2)
y1 = self.u1(y2,x1)
y0 = self.u0(y1,x0)
out = self.conv(y0)
return out
I am using the DRIVE dataset as input into the network which has an image size of 565x584.
Update:
Here’s a more cleaned up code:
import os
import mxnet as mx
import mxnet.ndarray as nd
import mxnet.gluon as gluon
import mxnet.gluon.nn as nn
import mxnet.autograd as ag
from mxnet.gluon.data import Dataset, DataLoader
from mxnet.gluon.loss import Loss, _apply_weighting, _reshape_like
from mxnet import image
import matplotlib.pyplot as plt
import numpy as np
import numpy.random as random
import cv2
import time
from model_unet3 import UNet
class MyDataSet(Dataset):
def __init__(self, root, split, transform=None, use_mask=True):
self.root = os.path.join(root, split)
self.transform = transform
self.img_paths = []
self.mask_paths = []
self.lbl_paths = []
self._img = os.path.join(root, split, 'image', '{}.png')
self._use_mask = use_mask
if self._use_mask:
self._mask = os.path.join(root, split, 'mask', '{}.png')
self._lbl = os.path.join(root, split, 'label', '{}.png')
img_list = os.listdir(os.path.join(root, split, 'image'))
img_list.sort()
lbl_list = os.listdir(os.path.join(root, split, 'label'))
lbl_list.sort()
if self._use_mask:
mask_list = os.listdir(os.path.join(root, split, 'mask'))
mask_list.sort()
for fn in img_list:
if len(fn) > 3 and fn[-4:] == '.png':
self.img_paths.append(fn[:-4])
if self._use_mask:
for fn in mask_list:
if len(fn) > 3 and fn[-4:] == '.png':
self.mask_paths.append(fn[:-4])
for fn in lbl_list:
if len(fn) > 3 and fn[-4:] == '.png':
self.lbl_paths.append(fn[:-4])
def __len__(self):
return len(self.img_paths)
def __getitem__(self, idx):
img_path = self._img.format(self.img_paths[idx])
if self._use_mask:
mask_path = self._mask.format(self.mask_paths[idx])
lbl_path = self._lbl.format(self.lbl_paths[idx])
img = cv2.imread(img_path)
img_g = img[:,:,1]
clahe = cv2.createCLAHE()
img_gc = clahe.apply(img_g)
lbl = cv2.imread(lbl_path, cv2.IMREAD_GRAYSCALE)
if self._use_mask:
mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
lbl = np.bitwise_and(mask, lbl)
img_gc = np.bitwise_and(mask, img_gc)
lbl = lbl / 255
img_gc = img_gc / 255
lbl = lbl[4:-4,4:-4]
img_gc = img_gc[np.newaxis,...]
lbl = lbl.astype('float32')
img_gc = img_gc.astype('float32')
return img_gc, lbl
class SegMetric(mx.metric.EvalMetric):
def __init__(self, use_mask=False):
super(SegMetric, self).__init__('Seg')
self.num = 2
self.name = ['Accuracy_background','Accuracy_foreground']
self.use_mask = use_mask
self.reset()
def reset(self):
if getattr(self, 'num', None) is None:
self.num_inst = 0
self.sum_metric = 0.0
else:
self.num_inst = [0] * self.num
self.sum_metric = [0.0] * self.num
def update(self, labels, preds):
for l ,p in zip(labels, preds):
l = l.asnumpy()
p = p.asnumpy()
pl = np.argmax(p, axis=0)
bg_gt = l==0
fg_gt = l==1
bg = bg_gt * (pl == 0)
fg = fg_gt * (pl == 1)
self.sum_metric[0] += bg.sum()
self.sum_metric[1] += fg.sum()
self.num_inst[0] += bg_gt.sum()
self.num_inst[1] += fg_gt.sum()
def get(self):
if self.num is None:
if self.num_inst == 0:
return (self.name, float('nan'))
else:
return (self.name, self.sum_metric / float(self.num_inst))
else:
names = ['%s'%(self.name[i]) for i in range(self.num)]
values = []
for x, y in zip(self.sum_metric, self.num_inst):
if y != 0:
values.append(x / y)
else:
values.append(float('nan'))
return [names, values]
my_train = MyDataSet('/Users/pinky/Documents/hsb/nn-dl/datasets/DRIVE', 'train')
my_valid = MyDataSet('/Users/pinky/Documents/hsb/nn-dl/datasets/DRIVE', 'valid')
train_loader = DataLoader(my_train, batch_size=4, shuffle=True, last_batch='rollover')
valid_loader = DataLoader(my_valid, batch_size=4, shuffle=True, last_batch='rollover')
net = UNet()
net.initialize(mx.init.Xavier())
num_epochs = 50
trainer = gluon.Trainer(net.collect_params(), 'adam', optimizer_params={'learning_rate': 0.001,'wd': 0.0005})
criterion = gluon.loss.SoftmaxCELoss(axis=1, sparse_label=False)
metrics = [SegMetric(use_mask=True)]
for epoch in range(num_epochs):
t0 = time.time()
for data, label in train_loader:
batch_size = data.shape[0]
with ag.record():
preds = net(data)
loss = criterion(preds, label)
loss.backward()
for m in metrics:
m.update(labels=label, preds=preds)
trainer.step(batch_size)
for m in metrics:
name, value = m.get()
t1 = time.time()
print(epoch, t1-t0, name, value)
for data, label in valid_loader:
preds = net(data)
for m in metrics:
m.update(labels=label, preds=preds)
for m in metrics:
name, value = m.get()
print('Valid:', name, value)
Still not working on Colab, but I found a problem when running it on my laptop: after net(data) is called, the label gets zeroed out which causes the loss and metrics to be meaningless. I have no idea why this is happening as I’m not getting any errors or warnings.