from fastai.vision.all import *

if sys.platform == "darwin":
    # https://stackoverflow.com/a/64855500/69707
    import os
    os.environ['OMP_NUM_THREADS'] = '1'

path = untar_data(URLs.MNIST)

set_seed(0)
num_imgs_per_digit = 500
items = L([
    p
    for split in ['training', 'testing']
    for digit in range(10)
    for p in (path/split/str(digit)).ls().shuffle()[:num_imgs_per_digit]
])

block = DataBlock(
    blocks=(ImageBlock(PILImageBW), CategoryBlock),
    get_y = parent_label,
    splitter=GrandparentSplitter(train_name='training', valid_name="testing"),
)
dataloaders = block.dataloaders(items, bs=16)
print(f"{dataloaders.train.n} training images, {dataloaders.valid.n} validation images")

5000 training images, 5000 validation images

dataloaders.train.show_batch()

print(f"Available categories: {dataloaders.train.vocab}")

Available categories: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

images, labels = dataloaders.train.one_batch()
images = cast(images, Tensor) # work around a fastai quirk
labels = cast(labels, Tensor)
images.shape

torch.Size([16, 1, 28, 28])

show_images(images)

images.max(), images.min()

(tensor(1.), tensor(0.))

images.mean(), images.std()

(tensor(0.1356), tensor(0.3120))

flattener = nn.Flatten()

flattener(images).shape

torch.Size([16, 784])

linear_1 = nn.Linear(in_features=..., out_features=..., bias=True)
# nn.Sequential just connects the output of one function into the input of the next.
# In this case we'll use it to connect the flattener to the linear layer.
model = nn.Sequential(
    flattener,
    linear_1,
)

model.to(images.device)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=10, bias=True)
)

logits = model(images) # if this fails, check the input dimensionality of the model.
print("Logits shape:", logits.shape)
assert logits.shape == (16, 10) # if this fails, check the output dimensionality of the model.

Logits shape: torch.Size([16, 10])

logits[0]

tensor([ 0.1896,  0.1378,  0.0330, -0.1807,  0.0758, -0.0851, -0.1845, -0.1535,
        -0.3112,  0.2156], grad_fn=<SelectBackward0>)

probs = logits.softmax(dim=-1)
plt.barh(torch.arange(10), probs[0].detach().cpu()); plt.xlabel("Probability"); plt.ylabel("Digit");
# side note: the `detach` is needed because we don't need to take the gradient of this *plot*.

labels

tensor([0, 9, 8, 6, 0, 6, 8, 3, 1, 6, 5, 0, 0, 7, 7, 9])

F.cross_entropy(
  logits: Tensor[Batch, Categories], # the unnormalized scores of each class, for each item in the batch
  target: TensorCategory[Batch],     # the correct label index (an int) for each item in the class
  reduction: str = 'mean'            # whether to return a single number for the average loss across the batch ('mean') or not ('none')
  label_smoothing: float = 0.0       # how much label smoothing to apply (none by default)
)

loss = F.cross_entropy(logits, labels, reduction='none')
loss

tensor([2.1014, 2.2606, 1.9505, 2.4734, 2.4618, 2.4118, 2.1921, 2.2975, 2.1964,
        2.4857, 2.2314, 2.2812, 2.3104, 2.3450, 2.3184, 2.2362],
       grad_fn=<NllLossBackward0>)

loss.mean()

tensor(2.2846, grad_fn=<MeanBackward0>)

# An alternative, less numerically stable way to compute the loss:
probs = logits.softmax(dim=-1)
F.nll_loss(probs.log(), labels, reduction='none').mean()

tensor(2.2846, grad_fn=<MeanBackward0>)

predictions = logits.argmax(dim=1) # note: we could use `probs` instead of `logits`. Why?
print(predictions.shape)
predictions

torch.Size([16])

tensor([9, 2, 8, 8, 9, 7, 2, 8, 8, 1, 4, 5, 5, 4, 2, 2])

# Make sure we don't accidentally reuse global variables from our example.
# This is a common source of bugs in Jupyter notebooks.
del images, labels, logits, loss, probs, predictions

num_epochs = 1 # increase this to 10 or 20 once your training loop is working
learning_rate = .1
losses = []

# Re-initialize the parameters of the model, so training restarts when this block starts.
linear_1.reset_parameters()

for epoch in range(num_epochs):
    # Keep track of some things for each epoch.
    total_images = 0
    total_correct = 0

    # Loop over the training data in batches.
    for images, labels in dataloaders.train:
        images = cast(images, Tensor) # work around a quirk in fastai, ignore this
        labels = cast(labels, Tensor)
        
        logits = ...
        loss = ...

        # take an SGD step.
        loss.backward()
        for parameter in model.parameters():
            parameter.data -= learning_rate * parameter.grad
        model.zero_grad()

        # Track metrics
        predictions = logits.argmax(axis=1)
        num_accurate = (predictions == labels).sum()
        total_images += len(labels)
        total_correct += num_accurate

        # Track losses.
        # The .item method converts a 1-element tensor to a Python number.
        losses.append(loss.item())

    # Epoch done, print some stats.
    avg_loss_this_epoch = np.mean(losses[-total_images:])
    print(f"Epoch {epoch:2d}: loss={avg_loss_this_epoch:.2f}, train accuracy {total_correct:3d}/{total_images}")

# Plot the un-smoothed loss
#plt.plot(losses)
# Plot a smoothed version of the loss (easier to see the trend)
pd.Series(losses).ewm(alpha = .1).mean().plot()
plt.xlabel("Iteration")
plt.ylabel("Cross-Entropy Loss");

Epoch  0: loss=0.67, train accuracy 4109/4992

linear_1.weight.shape

torch.Size([10, 784])

weight_images = linear_1.weight.data.view((10, 28, 28))
with matplotlib.rc_context(rc={'image.cmap': 'RdBu'}):
    show_images(weight_images)

np.log(10)

2.302585092994046

num_epochs = 10
learning_rate = .1
losses = []

# Initialize the optimizer.
optimizer = torch.optim.SGD(params=model.parameters(), lr=learning_rate)

# Re-initialize the parameters of the model, so training restarts when this block starts.
linear_1.reset_parameters()
for epoch in range(num_epochs):
    # Keep track of some things for each epoch.
    total_images = 0
    total_correct = 0
    for images, labels in dataloaders.train:
        images = TensorBase(images) # work around a quirk in fastai, ignore this
        logits = ...
        loss = ...

        # take an SGD step.
        loss.backward()
        optimizer.step()
        model.zero_grad()

        # Track metrics
        predictions = logits.argmax(axis=1)
        num_accurate = (predictions == labels).sum()
        total_images += len(labels)
        total_correct += num_accurate

        # Track losses.
        losses.append(loss.item())

    # Epoch done, print some stats.
    avg_loss_this_epoch = np.mean(losses[-total_images:])
    print(f"Epoch {epoch:2d}: loss={avg_loss_this_epoch:.2f}, train accuracy {total_correct:3d}/{total_images}")

# Plot the un-smoothed loss
#plt.plot(losses)
# Plot a smoothed version of the loss (easier to see the trend)
pd.Series(losses).ewm(alpha = .1).mean().plot()
plt.xlabel("Iteration")
plt.ylabel("Cross-Entropy Loss");

Train Simple Image Classifier¶

Setup¶

Task¶

Analysis¶

Extension¶