View file src/colab/copie_de_efficient_hyperparameter_optimization_with_optuna_framework.py - Download

# -*- coding: utf-8 -*-
"""Copie de Efficient hyperparameter optimization with Optuna framework.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1JIuTDS5JFXy21B-VG7fK5WDMkrWTyLhe

https://colab.research.google.com/drive/1OegNO802ZrluOqO4upRhy9OYiIH6KG6k?usp=sharing&ref=broutonlab.com

# Efficient hyperparameter optimization with Optuna framework: practical example

---

Hyperparameters optimization is an important part of working on data science projects. But the more parameters we have to optimize, the more difficult it is to do it manually. To speed up project development, we may want to automate this work. In order to do this, we will use **Optuna** framework.

In this practical example of hyperparameters optimization, we will address a binary classification problem.

*(based on http://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html)*

## Let's get into practice!

Install and import **Optuna**
"""

!pip install optuna
import optuna

import os
import time
import copy

import torch
import torch.nn as nn
import torch.optim as optim

import torchvision
from torchvision import datasets, models, transforms

import numpy as np
import matplotlib.pyplot as plt

"""We will use the Ants vs Bees dataset, which is part of the ImageNet dataset. You will need to download it from here: [Ants vs Bees](https://download.pytorch.org/tutorial/hymenoptera_data.zip). It contains 400 pictures, ~250 training, and ~150 validation (test)."""

!wget https://download.pytorch.org/tutorial/hymenoptera_data.zip
!unzip hymenoptera_data.zip

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = './hymenoptera_data'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

"""In order to get a pretrained model by its name, we will add a function *get_model*:"""

def get_model(model_name: str = "resnet18"):
    if model_name == "resnet18":
        model = models.resnet18(pretrained=True)
        in_features = model.fc.in_features
        model.fc = nn.Linear(in_features, 2)
    elif model_name == "alexnet":
        model = models.alexnet(pretrained=True)
        in_features = model.classifier[1].in_features
        model.classifier = nn.Linear(in_features, 2)
    elif model_name == "vgg16":
        model = models.vgg16(pretrained=True)
        in_features = model.classifier[0].in_features
        model.classifier = nn.Linear(in_features, 2)
    return model

"""The following function will be used to train the model:"""

def train_model(trial, model, criterion, optimizer, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

        trial.report(epoch_acc, epoch)
        if trial.should_prune():
            raise optuna.TrialPruned()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    model.load_state_dict(best_model_wts)
    return model, best_acc

"""We need to create the **Objective Function**. It takes a configuration of hyperparameters and returns its evaluation score (Objective value). By maximizing or minimizing the **Objective Function**, Optuna solves the problem of hyperparameter optimization.

Within **Objective Function**, we should define the hyperparameters we want to optimize. In our example, we will optimize 3 hyperparameters:

1.   Pretrained network. Since Ants vs Bees is a small dataset, we will use transfer learning to achieve a good quality model. We choose one of the networks trained on ImageNet and replace the last fully connected layers responsible for classification.
2.   Optimizer: SGD, Adam.
3.   Learning Rate: from 1e-4 to 1e-2.

"""

def objective(trial):

    # Hyperparameters we want optimize
    params = {
        "model_name": trial.suggest_categorical('model_name',["resnet18", "alexnet", "vgg16"]),
        "lr": trial.suggest_loguniform('lr', 1e-4, 1e-2),
        "optimizer_name": trial.suggest_categorical('optimizer_name',["SGD", "Adam"])
    }

    # Get pretrained model
    model = get_model(params["model_name"])
    model = model.to(device)

    # Define criterion
    criterion = nn.CrossEntropyLoss()

    # Configure optimizer
    optimizer = getattr(
        torch.optim, params["optimizer_name"]
    )(model.parameters(), lr=params["lr"])

    # Train model
    # best_model, best_acc = train_model(trial, model, criterion, optimizer, num_epochs=20)
    best_model, best_acc = train_model(trial, model, criterion, optimizer, num_epochs=5)

    # Save best model for each trial
    # torch.save(best_model.state_dict(), f"model_trial_{trial.number}.pth")

    # Return accuracy (Objective Value) of the current trial
    return best_acc

"""To start optimizing our **Objective Function**, we create a new **study**:

"""

# sampler: We want to use a TPE sampler
# pruner: We use a MedianPruner in order to interrupt unpromising trials
# direction: The direction of study is “maximize” because we want to maximize the accuracy
# n_trials: Number of trials

sampler = optuna.samplers.TPESampler()
study = optuna.create_study(
    sampler=sampler,
    pruner=optuna.pruners.MedianPruner(
        n_startup_trials=3, n_warmup_steps=5, interval_steps=3
    ),
    direction='maximize')
study.optimize(func=objective, n_trials=20)

print("Best trial: ")
print(study.best_trial)

"""Optuna helps to visually assess the impact of hyperparameters on the accuracy of the predictions. Let’s visualize the dependence between the learning rate, optimizer_name, model_name and accuracy (Objective Value):

"""

optuna.visualization.plot_parallel_coordinate(study)

optuna.visualization.plot_contour(study, params=['optimizer_name','model_name'])

"""Slice plots for each of the hyperparameters:"""

optuna.visualization.plot_slice(study)

"""Hyperparameter importances:"""

optuna.visualization.plot_param_importances(study)

"""Plot the optimization history of all trials in a study:"""

optuna.visualization.plot_optimization_history(study)

""" Learning curves of the trials:"""

optuna.visualization.plot_intermediate_values(study)