from fastai.vision.all import *
set_seed(0)


train_dataset_path = untar_data('https://students.cs.calvin.edu/~ka37/example_letter_images.zip')
train_images = get_image_files(train_dataset_path)
train_labels = [img.parent.name for img in train_images]


Counter(train_labels)


dataloaders = ImageDataLoaders.from_lists(
    path = train_dataset_path, fnames=train_images, labels=train_labels,
    valid_pct=0.2,
    seed=42,
    bs=2,
    item_tfms=RandomResizedCrop(224),
    # Use data augmentation
    batch_tfms=aug_transforms(size=224)
)
dataloaders.show_batch()


dataloaders.train.n

21


sizes_of_images_in_batch = [image_batch.shape[0] for image_batch, label_batch in dataloaders.train]
print(f'{len(train_images)} total images: {dataloaders.train.n} in train set, {dataloaders.valid.n} in valid set')
print(f'Training data loader gave us {len(sizes_of_images_in_batch)} batches in an epoch')
print(f'Each batch had {set(sizes_of_images_in_batch)} images.')
print(f'So the learner will get trained on a total of {sum(sizes_of_images_in_batch)} images.')

26 total images: 21 in train set, 5 in valid set
Training data loader gave us 10 batches in an epoch
Each batch had {2} images.
So the learner will get trained on a total of 20 images.


learn = vision_learner(
    dls = dataloaders,
    arch=resnet18,
    metrics=[accuracy]
)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


saved_clf_filename = 'classifier.pth'
try:
    learn.load(saved_clf_filename)
    print("Loaded saved learner")
except FileNotFoundError:
    # Note that save/load is odd; it actually saves and loads from (learn.path/learn.model_dir/filename).
    # Maybe we should be using learn.export()?
    print("Running fine-tuning")
    learn.fine_tune(epochs=10)
    learn.recorder.plot_loss()
    learn.save(saved_clf_filename)

Running fine-tuning


test_url = 'https://students.cs.calvin.edu/~ka37/letter_images_dataset_v0.zip'
test_dataset_path = untar_data(test_url)


test_images = get_image_files(test_dataset_path)
test_labels = [img.parent.name.upper() for img in test_images]


test_images

(#944) [Path('/root/.fastai/data/letter_images_dataset_v0/c/group_5_c3.jpg'),Path('/root/.fastai/data/letter_images_dataset_v0/c/group_9_lower_c_1.png'),Path('/root/.fastai/data/letter_images_dataset_v0/c/group_4_C_0.png'),Path('/root/.fastai/data/letter_images_dataset_v0/c/group_0_C_30.png'),Path('/root/.fastai/data/letter_images_dataset_v0/c/group_0_C_7.png'),Path('/root/.fastai/data/letter_images_dataset_v0/c/group_10_c20.png'),Path('/root/.fastai/data/letter_images_dataset_v0/c/group_1_C_34.png'),Path('/root/.fastai/data/letter_images_dataset_v0/c/group_8_c13.png'),Path('/root/.fastai/data/letter_images_dataset_v0/c/group_1_C_4.png'),Path('/root/.fastai/data/letter_images_dataset_v0/c/group_3_C_0.png')...]


# Note: we need to "zip" the filenames together with the corresponding filenames.
# To see how `zip` works, try looking at the output of `list(zip(test_images[:5], test_labels))`.
test_dl = dataloaders.test_dl(list(zip(test_images, test_labels)), with_labels=True)


test_dl.show_batch()


interp = ClassificationInterpretation.from_learner(learn, dl=test_dl)


interp.print_classification_report()

              precision    recall  f1-score   support

           A       0.54      0.48      0.51       332
           B       0.63      0.12      0.21       346
           C       0.40      0.88      0.55       266

    accuracy                           0.46       944
   macro avg       0.52      0.49      0.42       944
weighted avg       0.53      0.46      0.41       944


interp.plot_confusion_matrix()


interp.plot_top_losses(25)


predicted_probs, targets = learn.get_preds(dl=test_dl, with_preds=True, with_targs=True)
# Use the class with the highest predicted probability as the predicted class.
predicted_classes = predicted_probs.argmax(axis=1)


(
    # Make a Tensor of Trues and Falses, True if the classifier got the corresponding image right
    (
        # the predictions that the model made on the test set
        predicted_classes
        # compare with the target labels provided by the DataLoader (we got these from get_preds above)
        == targets
    )
    # convert True to 1.0, False to 0.0
    .to(float)
    # Compute the fraction of True's.
    .mean()
)

TensorBase(0.4608, dtype=torch.float64)


accuracy(predicted_probs, targets)

TensorBase(0.4608)

epoch	train_loss	valid_loss	accuracy	time
0	1.140317	0.691164	0.600000	00:01
1	1.484382	0.387477	0.800000	00:00
2	1.547534	0.572340	0.600000	00:00
3	1.446050	0.660998	0.800000	00:01
4	1.446995	0.613119	0.800000	00:00
5	1.395874	0.627853	0.800000	00:00
6	1.380330	0.485403	0.800000	00:00
7	1.356561	0.420344	0.800000	00:00
8	1.269402	0.440142	0.800000	00:00
9	1.277924	0.311430	0.800000	00:00

Train the model¶

Load the test set¶

Evaluate our learner on the testing data¶