I copied all code from original book into a single .py
file, and it seems to work fine to me.
The only problem I experienced is related to multiprocessing of data loading - for some reason if I run the same code several times, at one run I receive an error message Pipe is broken
, which doesn’t stop the training, but looks ugly. So, the only change I did to original code, is I copied the data loading code into my .py
file to set num_workers
to 0.
Try this code out.
import sys
import gluonbook as gb
import os
from mxnet import autograd, nd
from mxnet.gluon import data
batch_size = 256
def load_data_fashion_mnist(batch_size, resize=None, root=os.path.join(
'~', '.mxnet', 'datasets', 'fashion-mnist')):
"""Download the fashion mnist dataset and then load into memory."""
root = os.path.expanduser(root)
transformer = []
if resize:
transformer += [data.vision.transforms.Resize(resize)]
transformer += [data.vision.transforms.ToTensor()]
transformer = data.vision.transforms.Compose(transformer)
mnist_train = data.vision.FashionMNIST(root=root, train=True)
mnist_test = data.vision.FashionMNIST(root=root, train=False)
num_workers = 0
train_iter = data.DataLoader(mnist_train.transform_first(transformer),
batch_size, shuffle=True,
num_workers=num_workers)
test_iter = data.DataLoader(mnist_test.transform_first(transformer),
batch_size, shuffle=False,
num_workers=num_workers)
return train_iter, test_iter
train_iter, test_iter = load_data_fashion_mnist(batch_size)
num_inputs = 784
num_outputs = 10
W = nd.random.normal(scale=0.01, shape=(num_inputs, num_outputs))
b = nd.zeros(num_outputs)
W.attach_grad()
b.attach_grad()
X = nd.array([[1, 2, 3], [4, 5, 6]])
X.sum(axis=0, keepdims=True), X.sum(axis=1, keepdims=True)
def softmax(X):
X_exp = X.exp()
partition = X_exp.sum(axis=1, keepdims=True)
return X_exp / partition # The broadcast mechanism is applied here.
X = nd.random.normal(shape=(2, 5))
X_prob = softmax(X)
X_prob, X_prob.sum(axis=1)
def net(X):
return softmax(nd.dot(X.reshape((-1, num_inputs)), W) + b)
y_hat = nd.array([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])
y = nd.array([0, 2])
nd.pick(y_hat, y)
def cross_entropy(y_hat, y):
return - nd.pick(y_hat, y).log()
def accuracy(y_hat, y):
return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar()
accuracy(y_hat, y)
# The function will be gradually improved: the complete implementation will be
# discussed in the "Image Augmentation" section.
def evaluate_accuracy(data_iter, net):
acc = 0
for X, y in data_iter:
acc += accuracy(net(X), y)
return acc / len(data_iter)
evaluate_accuracy(test_iter, net)
num_epochs, lr = 5, 0.1
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
params=None, lr=None, trainer=None):
for epoch in range(num_epochs):
train_l_sum = 0
train_acc_sum = 0
for X, y in train_iter:
with autograd.record():
y_hat = net(X)
l = loss(y_hat, y)
l.backward()
if trainer is None:
gb.sgd(params, lr, batch_size)
else:
trainer.step(batch_size) # This will be illustrated in the next section.
train_l_sum += l.mean().asscalar()
train_acc_sum += accuracy(y_hat, y)
test_acc = evaluate_accuracy(test_iter, net)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
% (epoch + 1, train_l_sum / len(train_iter),
train_acc_sum / len(train_iter), test_acc))
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr)
for X, y in test_iter:
break
true_labels = gb.get_fashion_mnist_labels(y.asnumpy())
pred_labels = gb.get_fashion_mnist_labels(net(X).argmax(axis=1).asnumpy())
titles = [truelabel + '\n' + predlabel for truelabel, predlabel in zip(true_labels, pred_labels)]
gb.show_fashion_mnist(X[0:9], titles[0:9])
If it still doesn’t work for you, check your version of MXNet - I use 1.4.0.