View file src/colab/copie_de_efficient_hyperparameter_optimization_with_optuna_framework.py - Download
# -*- coding: utf-8 -*-
"""Copie de Efficient hyperparameter optimization with Optuna framework.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1JIuTDS5JFXy21B-VG7fK5WDMkrWTyLhe
https://colab.research.google.com/drive/1OegNO802ZrluOqO4upRhy9OYiIH6KG6k?usp=sharing&ref=broutonlab.com
# Efficient hyperparameter optimization with Optuna framework: practical example
---
Hyperparameters optimization is an important part of working on data science projects. But the more parameters we have to optimize, the more difficult it is to do it manually. To speed up project development, we may want to automate this work. In order to do this, we will use **Optuna** framework.
In this practical example of hyperparameters optimization, we will address a binary classification problem.
*(based on http://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html)*
## Let's get into practice!
Install and import **Optuna**
"""
!pip install optuna
import optuna
import os
import time
import copy
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, models, transforms
import numpy as np
import matplotlib.pyplot as plt
"""We will use the Ants vs Bees dataset, which is part of the ImageNet dataset. You will need to download it from here: [Ants vs Bees](https://download.pytorch.org/tutorial/hymenoptera_data.zip). It contains 400 pictures, ~250 training, and ~150 validation (test)."""
!wget https://download.pytorch.org/tutorial/hymenoptera_data.zip
!unzip hymenoptera_data.zip
data_transforms = {
'train': transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
'val': transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
data_dir = './hymenoptera_data'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
data_transforms[x])
for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
shuffle=True, num_workers=4)
for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
"""In order to get a pretrained model by its name, we will add a function *get_model*:"""
def get_model(model_name: str = "resnet18"):
if model_name == "resnet18":
model = models.resnet18(pretrained=True)
in_features = model.fc.in_features
model.fc = nn.Linear(in_features, 2)
elif model_name == "alexnet":
model = models.alexnet(pretrained=True)
in_features = model.classifier[1].in_features
model.classifier = nn.Linear(in_features, 2)
elif model_name == "vgg16":
model = models.vgg16(pretrained=True)
in_features = model.classifier[0].in_features
model.classifier = nn.Linear(in_features, 2)
return model
"""The following function will be used to train the model:"""
def train_model(trial, model, criterion, optimizer, num_epochs=25):
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
for phase in ['train', 'val']:
if phase == 'train':
model.train()
else:
model.eval()
running_loss = 0.0
running_corrects = 0
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)
optimizer.zero_grad()
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
if phase == 'train':
loss.backward()
optimizer.step()
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects.double() / dataset_sizes[phase]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
print()
trial.report(epoch_acc, epoch)
if trial.should_prune():
raise optuna.TrialPruned()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
model.load_state_dict(best_model_wts)
return model, best_acc
"""We need to create the **Objective Function**. It takes a configuration of hyperparameters and returns its evaluation score (Objective value). By maximizing or minimizing the **Objective Function**, Optuna solves the problem of hyperparameter optimization.
Within **Objective Function**, we should define the hyperparameters we want to optimize. In our example, we will optimize 3 hyperparameters:
1. Pretrained network. Since Ants vs Bees is a small dataset, we will use transfer learning to achieve a good quality model. We choose one of the networks trained on ImageNet and replace the last fully connected layers responsible for classification.
2. Optimizer: SGD, Adam.
3. Learning Rate: from 1e-4 to 1e-2.
"""
def objective(trial):
# Hyperparameters we want optimize
params = {
"model_name": trial.suggest_categorical('model_name',["resnet18", "alexnet", "vgg16"]),
"lr": trial.suggest_loguniform('lr', 1e-4, 1e-2),
"optimizer_name": trial.suggest_categorical('optimizer_name',["SGD", "Adam"])
}
# Get pretrained model
model = get_model(params["model_name"])
model = model.to(device)
# Define criterion
criterion = nn.CrossEntropyLoss()
# Configure optimizer
optimizer = getattr(
torch.optim, params["optimizer_name"]
)(model.parameters(), lr=params["lr"])
# Train model
# best_model, best_acc = train_model(trial, model, criterion, optimizer, num_epochs=20)
best_model, best_acc = train_model(trial, model, criterion, optimizer, num_epochs=5)
# Save best model for each trial
# torch.save(best_model.state_dict(), f"model_trial_{trial.number}.pth")
# Return accuracy (Objective Value) of the current trial
return best_acc
"""To start optimizing our **Objective Function**, we create a new **study**:
"""
# sampler: We want to use a TPE sampler
# pruner: We use a MedianPruner in order to interrupt unpromising trials
# direction: The direction of study is “maximize” because we want to maximize the accuracy
# n_trials: Number of trials
sampler = optuna.samplers.TPESampler()
study = optuna.create_study(
sampler=sampler,
pruner=optuna.pruners.MedianPruner(
n_startup_trials=3, n_warmup_steps=5, interval_steps=3
),
direction='maximize')
study.optimize(func=objective, n_trials=20)
print("Best trial: ")
print(study.best_trial)
"""Optuna helps to visually assess the impact of hyperparameters on the accuracy of the predictions. Let’s visualize the dependence between the learning rate, optimizer_name, model_name and accuracy (Objective Value):
"""
optuna.visualization.plot_parallel_coordinate(study)
optuna.visualization.plot_contour(study, params=['optimizer_name','model_name'])
"""Slice plots for each of the hyperparameters:"""
optuna.visualization.plot_slice(study)
"""Hyperparameter importances:"""
optuna.visualization.plot_param_importances(study)
"""Plot the optimization history of all trials in a study:"""
optuna.visualization.plot_optimization_history(study)
""" Learning curves of the trials:"""
optuna.visualization.plot_intermediate_values(study)