7. Optimizer

[1]:
%matplotlib inline
import matplotlib.pyplot as plt
from torchvision import datasets, models, transforms
import torch.optim as optim
import torch.nn as nn
from torchvision.transforms import *
from torch.utils.data import DataLoader
import torch
import numpy as np
from collections import namedtuple
import pandas as pd

def train(dataloader, model, criterion, optimizer, scheduler, num_epochs=20):
    results = []
    for epoch in range(num_epochs):
        optimizer.step()
        scheduler.step()
        model.train()

        running_loss = 0.0
        running_corrects = 0

        n = 0
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            with torch.set_grad_enabled(True):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                _, preds = torch.max(outputs, 1)

                loss.backward()
                optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
            n += len(labels)

        epoch_loss = running_loss / float(n)
        epoch_acc = running_corrects.double() / float(n)

        print(f'epoch {epoch}/{num_epochs} : {epoch_loss:.5f}, {epoch_acc:.5f}')
        results.append(EpochProgress(epoch, epoch_loss, epoch_acc.item()))
    return pd.DataFrame(results)

def plot_results(df, figsize=(10, 5)):
    fig, ax1 = plt.subplots(figsize=figsize)

    ax1.set_xlabel('epoch')
    ax1.set_ylabel('loss', color='tab:red')
    ax1.plot(df['epoch'], df['loss'], color='tab:red')

    ax2 = ax1.twinx()
    ax2.set_ylabel('accuracy', color='tab:blue')
    ax2.plot(df['epoch'], df['accuracy'], color='tab:blue')

    fig.tight_layout()

np.random.seed(37)
torch.manual_seed(37)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

num_classes = 3
pretrained = True
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

EpochProgress = namedtuple('EpochProgress', 'epoch, loss, accuracy')

transform = transforms.Compose([Resize(224), ToTensor()])
image_folder = datasets.ImageFolder('./shapes/train', transform=transform)
dataloader = DataLoader(image_folder, batch_size=4, shuffle=True, num_workers=4)

7.1. Stochastic gradient descent

[2]:
model = models.resnet18(pretrained=pretrained)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

params_to_update = model.parameters()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(params_to_update, lr=0.01)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1)

results = train(dataloader, model, criterion, optimizer, scheduler)
plot_results(results)
epoch 0/20 : 1.35156, 0.40000
epoch 1/20 : 1.13637, 0.43333
epoch 2/20 : 1.06040, 0.50000
epoch 3/20 : 1.02444, 0.56667
epoch 4/20 : 1.13440, 0.33333
epoch 5/20 : 1.08239, 0.56667
epoch 6/20 : 1.08502, 0.53333
epoch 7/20 : 1.08369, 0.43333
epoch 8/20 : 1.06111, 0.46667
epoch 9/20 : 1.09906, 0.43333
epoch 10/20 : 1.09626, 0.43333
epoch 11/20 : 1.07304, 0.50000
epoch 12/20 : 1.11257, 0.43333
epoch 13/20 : 1.14465, 0.50000
epoch 14/20 : 1.09183, 0.53333
epoch 15/20 : 1.07681, 0.56667
epoch 16/20 : 1.10339, 0.53333
epoch 17/20 : 1.13121, 0.43333
epoch 18/20 : 1.11461, 0.43333
epoch 19/20 : 1.06282, 0.56667
_images/optimizer_3_1.png

7.2. AdaDelta

[3]:
model = models.resnet18(pretrained=pretrained)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

params_to_update = model.parameters()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adadelta(params_to_update, lr=0.01)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1)

results = train(dataloader, model, criterion, optimizer, scheduler)
plot_results(results)
epoch 0/20 : 1.19977, 0.33333
epoch 1/20 : 1.19553, 0.40000
epoch 2/20 : 1.18889, 0.36667
epoch 3/20 : 1.15373, 0.33333
epoch 4/20 : 1.19890, 0.40000
epoch 5/20 : 1.16365, 0.36667
epoch 6/20 : 1.16017, 0.36667
epoch 7/20 : 1.14592, 0.36667
epoch 8/20 : 1.16320, 0.43333
epoch 9/20 : 1.19652, 0.36667
epoch 10/20 : 1.18728, 0.33333
epoch 11/20 : 1.18054, 0.36667
epoch 12/20 : 1.21949, 0.33333
epoch 13/20 : 1.16084, 0.40000
epoch 14/20 : 1.15557, 0.36667
epoch 15/20 : 1.17068, 0.36667
epoch 16/20 : 1.14629, 0.33333
epoch 17/20 : 1.12268, 0.43333
epoch 18/20 : 1.17228, 0.36667
epoch 19/20 : 1.14354, 0.36667
_images/optimizer_5_1.png

7.3. AdaGrad

[4]:
model = models.resnet18(pretrained=pretrained)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

params_to_update = model.parameters()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adagrad(params_to_update, lr=0.01)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1)

results = train(dataloader, model, criterion, optimizer, scheduler)
plot_results(results)
epoch 0/20 : 0.77216, 0.66667
epoch 1/20 : 0.78004, 0.73333
epoch 2/20 : 0.57943, 0.76667
epoch 3/20 : 0.21080, 0.96667
epoch 4/20 : 0.28196, 0.90000
epoch 5/20 : 0.34823, 0.90000
epoch 6/20 : 0.23991, 0.93333
epoch 7/20 : 0.36666, 0.86667
epoch 8/20 : 0.34647, 0.86667
epoch 9/20 : 0.53740, 0.70000
epoch 10/20 : 0.25844, 0.86667
epoch 11/20 : 0.37599, 0.83333
epoch 12/20 : 0.51929, 0.73333
epoch 13/20 : 0.55198, 0.76667
epoch 14/20 : 0.39852, 0.80000
epoch 15/20 : 0.32558, 0.86667
epoch 16/20 : 0.71763, 0.83333
epoch 17/20 : 0.45249, 0.80000
epoch 18/20 : 0.39796, 0.83333
epoch 19/20 : 0.50014, 0.76667
_images/optimizer_7_1.png

7.4. Adam

[5]:
model = models.resnet18(pretrained=pretrained)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

params_to_update = model.parameters()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(params_to_update, lr=0.01)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1)

results = train(dataloader, model, criterion, optimizer, scheduler)
plot_results(results)
epoch 0/20 : 1.08423, 0.60000
epoch 1/20 : 1.63394, 0.53333
epoch 2/20 : 0.49516, 0.76667
epoch 3/20 : 0.60868, 0.76667
epoch 4/20 : 0.57865, 0.83333
epoch 5/20 : 0.60903, 0.76667
epoch 6/20 : 0.54606, 0.80000
epoch 7/20 : 0.38538, 0.86667
epoch 8/20 : 0.81709, 0.73333
epoch 9/20 : 0.41252, 0.83333
epoch 10/20 : 0.39877, 0.90000
epoch 11/20 : 0.58696, 0.76667
epoch 12/20 : 0.52193, 0.73333
epoch 13/20 : 0.42973, 0.76667
epoch 14/20 : 0.31566, 0.90000
epoch 15/20 : 0.64462, 0.66667
epoch 16/20 : 0.61575, 0.70000
epoch 17/20 : 0.40781, 0.83333
epoch 18/20 : 0.57845, 0.83333
epoch 19/20 : 0.75139, 0.73333
_images/optimizer_9_1.png

7.5. AdamW

[6]:
model = models.resnet18(pretrained=pretrained)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

params_to_update = model.parameters()

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(params_to_update, lr=0.01)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1)

results = train(dataloader, model, criterion, optimizer, scheduler)
plot_results(results)
epoch 0/20 : 1.16534, 0.60000
epoch 1/20 : 0.83021, 0.70000
epoch 2/20 : 0.56983, 0.80000
epoch 3/20 : 0.54591, 0.83333
epoch 4/20 : 0.65391, 0.73333
epoch 5/20 : 0.42384, 0.86667
epoch 6/20 : 0.43772, 0.83333
epoch 7/20 : 0.33034, 0.86667
epoch 8/20 : 0.57162, 0.73333
epoch 9/20 : 0.58920, 0.76667
epoch 10/20 : 0.59165, 0.83333
epoch 11/20 : 0.58105, 0.80000
epoch 12/20 : 0.48469, 0.80000
epoch 13/20 : 0.52863, 0.86667
epoch 14/20 : 0.68455, 0.80000
epoch 15/20 : 0.90342, 0.70000
epoch 16/20 : 0.58743, 0.80000
epoch 17/20 : 0.89801, 0.66667
epoch 18/20 : 0.30146, 0.86667
epoch 19/20 : 0.39773, 0.83333
_images/optimizer_11_1.png

7.6. Adamax

[7]:
model = models.resnet18(pretrained=pretrained)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

params_to_update = model.parameters()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adamax(params_to_update, lr=0.01)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1)

results = train(dataloader, model, criterion, optimizer, scheduler)
plot_results(results)
epoch 0/20 : 1.05041, 0.56667
epoch 1/20 : 1.25037, 0.70000
epoch 2/20 : 0.89263, 0.83333
epoch 3/20 : 1.09394, 0.66667
epoch 4/20 : 1.38519, 0.66667
epoch 5/20 : 0.80390, 0.70000
epoch 6/20 : 0.81821, 0.76667
epoch 7/20 : 1.03320, 0.66667
epoch 8/20 : 0.69888, 0.83333
epoch 9/20 : 0.94823, 0.76667
epoch 10/20 : 1.13064, 0.70000
epoch 11/20 : 1.17754, 0.70000
epoch 12/20 : 0.87697, 0.80000
epoch 13/20 : 0.84306, 0.83333
epoch 14/20 : 1.26252, 0.63333
epoch 15/20 : 0.92927, 0.73333
epoch 16/20 : 0.85536, 0.76667
epoch 17/20 : 0.99645, 0.73333
epoch 18/20 : 0.91567, 0.70000
epoch 19/20 : 1.01903, 0.63333
_images/optimizer_13_1.png

7.7. ASGD

[8]:
model = models.resnet18(pretrained=pretrained)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

params_to_update = model.parameters()

criterion = nn.CrossEntropyLoss()
optimizer = optim.ASGD(params_to_update, lr=0.01)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1)

results = train(dataloader, model, criterion, optimizer, scheduler)
plot_results(results)
epoch 0/20 : 1.17415, 0.40000
epoch 1/20 : 1.02151, 0.53333
epoch 2/20 : 0.95373, 0.56667
epoch 3/20 : 1.01674, 0.50000
epoch 4/20 : 0.91915, 0.66667
epoch 5/20 : 0.95214, 0.50000
epoch 6/20 : 0.97350, 0.56667
epoch 7/20 : 1.00653, 0.53333
epoch 8/20 : 0.93291, 0.56667
epoch 9/20 : 1.02682, 0.50000
epoch 10/20 : 0.94823, 0.53333
epoch 11/20 : 0.96903, 0.46667
epoch 12/20 : 0.95601, 0.63333
epoch 13/20 : 0.97117, 0.50000
epoch 14/20 : 0.89461, 0.63333
epoch 15/20 : 0.95025, 0.53333
epoch 16/20 : 0.96510, 0.60000
epoch 17/20 : 1.01499, 0.43333
epoch 18/20 : 0.95498, 0.50000
epoch 19/20 : 0.98825, 0.63333
_images/optimizer_15_1.png

7.8. RMSprop

[9]:
model = models.resnet18(pretrained=pretrained)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

params_to_update = model.parameters()

criterion = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(params_to_update, lr=0.01)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1)

results = train(dataloader, model, criterion, optimizer, scheduler)
plot_results(results)
epoch 0/20 : 2.38401, 0.36667
epoch 1/20 : 3.06042, 0.43333
epoch 2/20 : 2.37027, 0.43333
epoch 3/20 : 2.27388, 0.50000
epoch 4/20 : 2.42956, 0.43333
epoch 5/20 : 2.28123, 0.46667
epoch 6/20 : 2.32184, 0.46667
epoch 7/20 : 2.07055, 0.56667
epoch 8/20 : 2.22991, 0.50000
epoch 9/20 : 2.06145, 0.50000
epoch 10/20 : 2.68841, 0.40000
epoch 11/20 : 2.19093, 0.50000
epoch 12/20 : 2.10353, 0.50000
epoch 13/20 : 2.06072, 0.53333
epoch 14/20 : 2.31504, 0.40000
epoch 15/20 : 2.37040, 0.40000
epoch 16/20 : 2.16376, 0.46667
epoch 17/20 : 2.37319, 0.53333
epoch 18/20 : 2.35661, 0.43333
epoch 19/20 : 2.22608, 0.46667
_images/optimizer_17_1.png

7.9. Rprop

[10]:
model = models.resnet18(pretrained=pretrained)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

params_to_update = model.parameters()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Rprop(params_to_update, lr=0.01)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1)

results = train(dataloader, model, criterion, optimizer, scheduler)
plot_results(results)
epoch 0/20 : 1.41951, 0.43333
epoch 1/20 : 1.00923, 0.63333
epoch 2/20 : 0.27935, 0.93333
epoch 3/20 : 0.47481, 0.83333
epoch 4/20 : 0.06080, 0.96667
epoch 5/20 : 0.49697, 0.86667
epoch 6/20 : 0.33636, 0.86667
epoch 7/20 : 0.15372, 0.96667
epoch 8/20 : 0.05433, 1.00000
epoch 9/20 : 0.00329, 1.00000
epoch 10/20 : 0.05734, 0.96667
epoch 11/20 : 0.07089, 0.96667
epoch 12/20 : 0.03279, 1.00000
epoch 13/20 : 0.04594, 0.96667
epoch 14/20 : 0.05307, 0.96667
epoch 15/20 : 0.01468, 1.00000
epoch 16/20 : 0.06959, 0.96667
epoch 17/20 : 0.04229, 0.96667
epoch 18/20 : 0.07761, 0.96667
epoch 19/20 : 0.00851, 1.00000
_images/optimizer_19_1.png