16. Tensorboard

16.1. Add image

[1]:
import matplotlib.pyplot as plt
from torchvision import datasets, models, transforms
import torch.optim as optim
import torch.nn as nn
import torchvision
from torchvision.transforms import *
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import torch
import numpy as np
from collections import defaultdict
import time
import copy

def matplotlib_imshow(img, one_channel=False):
    if one_channel:
        img = img.mean(dim=0)
    # img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    if one_channel:
        plt.imshow(npimg, cmap="Greys")
    else:
        plt.imshow(np.transpose(npimg, (1, 2, 0)))

def get_dataloaders():
    def clean_up(path):
        import os
        from shutil import rmtree

        ipynb_checkpoints = f'{path}/.ipynb_checkpoints'
        if os.path.exists(ipynb_checkpoints):
            rmtree(ipynb_checkpoints)

    def get_dataloader(phase):
        path = f'./shapes/{phase}'
        clean_up(path)

        if phase in ['valid']:
            transform = transforms.Compose([
                Resize(224),
                ToTensor()])
        else:
            transform = transforms.Compose([
                Resize(224),
                RandomAffine(degrees=(30, 50), shear=5),
                ToTensor()])

        image_folder = datasets.ImageFolder(path, transform=transform)
        # print(path, image_folder.classes)
        return DataLoader(image_folder, batch_size=4, shuffle=True, num_workers=4)

    return {phase: get_dataloader(phase) for phase in ['train', 'test', 'valid']}

np.random.seed(37)
torch.manual_seed(37)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
pretrained=True
num_classes = 3
writer = SummaryWriter('/root/tensorboard/shape_experiment_1')

dataloaders = get_dataloaders()
[2]:
data_iter = iter(dataloaders['train'])
images, labels = data_iter.next()

img_grid = torchvision.utils.make_grid(images)

matplotlib_imshow(img_grid, one_channel=False)

writer.add_image('four_shape_images', img_grid)
writer.close()
_images/tensorboard_3_0.png

16.2. Add model

[3]:
model = models.resnet18(pretrained=pretrained)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)
Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /root/.cache/torch/checkpoints/resnet18-5c106cde.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 47.7MB/s]
[4]:
writer.add_graph(model, images.to(device))
writer.close()

16.3. Add embedding

[5]:
def get_data(dataloader, device):
    imgs = []
    lbls = []

    for inputs, labels in dataloader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        imgs.append(inputs)
        lbls.append(labels)

    imgs = torch.cat(imgs)
    lbls = torch.cat(lbls)

    return imgs, lbls

def select_n_random(data, labels, n=100):
    assert len(data) == len(labels)

    perm = torch.randperm(len(data))
    return data[perm][:n], labels[perm][:n]

images, labels = get_data(dataloaders['train'], device)
images, labels = select_n_random(images, labels)

idx_to_class = {v: k for k, v in dataloaders['train'].dataset.class_to_idx.items()}
class_labels = [idx_to_class[lab] for lab in labels.cpu().detach().numpy()]

writer.add_embedding(images.mean(dim=1).view(-1, 224 * 224), metadata=class_labels, label_img=images, global_step=1)
writer.close()

16.4. Add training loss

[6]:
import torch.nn.functional as F

def images_to_probs(net, images):
    output = net(images)
    _, preds_tensor = torch.max(output, 1)
    preds = np.squeeze(preds_tensor.cpu().detach().numpy())
    return preds, [F.softmax(el, dim=0)[i].item() for i, el in zip(preds, output)]


def plot_classes_preds(net, images, labels, classes):
    preds, probs = images_to_probs(net, images)

    fig = plt.figure(figsize=(12, 48))

    n_images = images.shape[0]

    for idx in np.arange(n_images):
        ax = fig.add_subplot(1, n_images, idx+1, xticks=[], yticks=[])
        matplotlib_imshow(images[idx].cpu().detach(), one_channel=False)

        clazz_pred = classes[preds[idx]]
        clazz_true = classes[labels[idx].cpu().detach().numpy().item()]
        prob_pred = probs[idx] * 100.0
        color = 'green' if preds[idx]==labels[idx].item() else 'red'

        ax.set_title(f'pred={clazz_pred}, {prob_pred:.2f}\ntrue={clazz_true}', color=color)

    return fig

def train(dataloaders, model, criterion, optimizer, scheduler, classes, device, writer, num_epochs=20):
    def format_start_stop(start, stop):
        elapsed = stop - start
        return f'{elapsed//60:.0f}m {elapsed%50:.0f}s'

    best_model_weights = copy.deepcopy(model.state_dict())
    best_acc = -1.0

    loop_start = time.time()

    for epoch in range(num_epochs):
        train_loss, train_acc = 0.0, 0.0
        test_loss, test_acc = 0.0, 0.0
        train_start, test_start = 0.0, 0.0
        train_stop, test_stop = 0.0, 0.0

        for i, phase in enumerate(['train', 'test']):
            if phase == 'train':
                optimizer.step()
                scheduler.step()
                model.train()
                train_start = time.time()
            else:
                model.eval()
                test_start = time.time()

            running_loss = 0.0
            running_corrects = 0

            n = 0
            dataloader = dataloaders[phase]

            for inputs, labels in dataloader:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    _, preds = torch.max(outputs, 1)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.mean() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                n += len(labels)

            if phase == 'train':
                writer.add_scalar('training loss', running_loss, epoch * len(dataloader) + i)

                writer.add_figure('predictions vs. actuals',
                        plot_classes_preds(model, inputs, labels, classes),
                        global_step=epoch * len(dataloader) + i)

            epoch_loss = running_loss / float(n)
            epoch_acc = running_corrects.double() / float(n)

            if phase == 'train':
                train_stop = time.time()
                train_loss, train_acc = epoch_loss, epoch_acc
            else:
                test_stop = time.time()
                test_loss, test_acc = epoch_loss, epoch_acc

                if epoch_acc > best_acc:
                    best_model_weights = copy.deepcopy(model.state_dict())
                    best_acc = epoch_acc

        train_time = format_start_stop(train_start, train_stop)
        test_time = format_start_stop(test_start, test_stop)

        train_metrics = f'TRAIN: {train_loss:.4f}, {train_acc:.4f}, {train_time}'
        test_metrics = f'TEST: {test_loss:.4f}, {test_acc:.4f}, {test_time}'
        print(f'epoch {str(epoch + 1).zfill(2)}/{str(num_epochs).zfill(2)} | {train_metrics} | {test_metrics}')

    loop_stop = time.time()
    loop_time = format_start_stop(loop_start, loop_stop)
    print(f'completed learning in {loop_time}, best accuracy {best_acc:.4f}')

    model.load_state_dict(best_model_weights)
    writer.close()
    return model
[7]:
num_epochs = 20

model = models.resnet18(pretrained=pretrained)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Rprop(model.parameters(), lr=0.01)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1)

model = train(dataloaders, model, criterion, optimizer, scheduler, idx_to_class, device, writer, num_epochs=num_epochs)
epoch 01/20 | TRAIN: 1.0803, 0.4667, 0m 1s | TEST: 0.0827, 1.0000, 0m 0s
epoch 02/20 | TRAIN: 0.5203, 0.8333, 0m 1s | TEST: 0.0882, 1.0000, 0m 0s
epoch 03/20 | TRAIN: 0.7947, 0.8000, 0m 1s | TEST: 0.1361, 0.9333, 0m 0s
epoch 04/20 | TRAIN: 0.1635, 0.9333, 0m 1s | TEST: 0.0064, 1.0000, 0m 0s
epoch 05/20 | TRAIN: 0.1467, 0.9333, 0m 1s | TEST: 0.0036, 1.0000, 0m 0s
epoch 06/20 | TRAIN: 0.1230, 0.9333, 0m 1s | TEST: 0.0016, 1.0000, 0m 0s
epoch 07/20 | TRAIN: 0.3416, 0.9000, 0m 1s | TEST: 0.0015, 1.0000, 0m 0s
epoch 08/20 | TRAIN: 0.0392, 1.0000, 0m 1s | TEST: 0.0029, 1.0000, 0m 0s
epoch 09/20 | TRAIN: 0.3981, 0.8000, 0m 1s | TEST: 0.0006, 1.0000, 0m 0s
epoch 10/20 | TRAIN: 0.1058, 0.9667, 0m 1s | TEST: 0.0010, 1.0000, 0m 0s
epoch 11/20 | TRAIN: 0.3351, 0.8667, 0m 1s | TEST: 0.0007, 1.0000, 0m 0s
epoch 12/20 | TRAIN: 0.0615, 1.0000, 0m 1s | TEST: 0.0011, 1.0000, 0m 0s
epoch 13/20 | TRAIN: 0.0738, 0.9667, 0m 1s | TEST: 0.0367, 1.0000, 0m 0s
epoch 14/20 | TRAIN: 0.2468, 0.9333, 0m 1s | TEST: 0.0108, 1.0000, 0m 0s
epoch 15/20 | TRAIN: 0.2451, 0.9333, 0m 1s | TEST: 0.0019, 1.0000, 0m 0s
epoch 16/20 | TRAIN: 0.1233, 0.9333, 0m 1s | TEST: 0.0038, 1.0000, 0m 0s
epoch 17/20 | TRAIN: 0.3774, 0.8333, 0m 1s | TEST: 0.0005, 1.0000, 0m 0s
epoch 18/20 | TRAIN: 0.1251, 0.9333, 0m 1s | TEST: 0.0039, 1.0000, 0m 0s
epoch 19/20 | TRAIN: 0.2142, 0.9333, 0m 1s | TEST: 0.0005, 1.0000, 0m 0s
epoch 20/20 | TRAIN: 0.2402, 0.8667, 0m 1s | TEST: 0.0014, 1.0000, 0m 0s
completed learning in 0m 20s, best accuracy 1.0000

16.5. Add precision-recall curve

[8]:
class_probs = []
class_preds = []

with torch.no_grad():
    for data in dataloaders['valid']:
        images, labels = data
        images, labels = images.to(device), labels.to(device)

        output = model(images)
        class_probs_batch = [F.softmax(el, dim=0) for el in output]
        _, class_preds_batch = torch.max(output, 1)

        class_probs.append(class_probs_batch)
        class_preds.append(class_preds_batch)

test_probs = torch.cat([torch.stack(batch) for batch in class_probs])
test_preds = torch.cat(class_preds)
[9]:
test_probs
[9]:
tensor([[9.9846e-01, 1.1767e-03, 3.6261e-04],
        [9.9739e-01, 1.9887e-03, 6.2254e-04],
        [9.9639e-01, 2.3840e-03, 1.2309e-03],
        [9.2196e-05, 9.9922e-01, 6.8980e-04],
        [8.1074e-03, 2.1422e-01, 7.7767e-01],
        [4.3053e-04, 3.0992e-02, 9.6858e-01],
        [7.7913e-10, 9.9995e-01, 5.1609e-05],
        [9.9911e-01, 6.2751e-04, 2.5947e-04],
        [1.1694e-10, 1.0000e+00, 5.2564e-08],
        [9.9906e-12, 1.0000e+00, 2.7644e-08],
        [1.0453e-08, 9.9999e-01, 1.1971e-05],
        [2.8406e-05, 4.6754e-02, 9.5322e-01],
        [7.9143e-13, 1.0000e+00, 3.6505e-09],
        [9.9958e-01, 2.8400e-04, 1.3418e-04],
        [3.1156e-03, 1.6711e-01, 8.2978e-01]], device='cuda:0')
[10]:
test_preds
[10]:
tensor([0, 0, 0, 1, 2, 2, 1, 0, 1, 1, 1, 2, 1, 0, 2], device='cuda:0')
[11]:
def add_pr_curve_tensorboard(class_index, test_probs, test_preds, idx_to_class, global_step=0):
    tensorboard_preds = test_preds == class_index
    tensorboard_probs = test_probs[:, class_index]

    writer.add_pr_curve(idx_to_class[class_index],
                        tensorboard_preds,
                        tensorboard_probs,
                        global_step=global_step)
    writer.close()

n_classes = len(dataloaders['valid'].dataset.classes)

for i in range(n_classes):
    add_pr_curve_tensorboard(i, test_probs, test_preds, idx_to_class)