Source code for holocron.trainer.segmentation

# Copyright (C) 2019-2024, François-Guillaume Fernandez.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0> for full license details.

from typing import Any, Dict

import torch

from .core import Trainer

__all__ = ["SegmentationTrainer"]



[docs]
class SegmentationTrainer(Trainer):
    """Semantic segmentation trainer class.

    Args:
        model: model to train
        train_loader: training loader
        val_loader: validation loader
        criterion: loss criterion
        optimizer: parameter optimizer
        gpu: index of the GPU to use
        output_file: path where checkpoints will be saved
        amp: whether to use automatic mixed precision
        skip_nan_loss: whether the optimizer step should be skipped when the loss is NaN
        nan_tolerance: number of consecutive batches with NaN loss before stopping the training
        gradient_acc: number of batches to accumulate the gradient of before performing the update step
        gradient_clip: the gradient clip value
        on_epoch_end: callback triggered at the end of an epoch
    """

    def __init__(self, *args: Any, num_classes: int = 10, **kwargs: Any) -> None:
        super().__init__(*args, **kwargs)
        self.num_classes = num_classes


[docs]
    @torch.inference_mode()
    def evaluate(self, ignore_index: int = 255) -> Dict[str, float]:
        """Evaluate the model on the validation set

        Args:
            ignore_index (int, optional): index of the class to ignore in evaluation

        Returns:
            dict: evaluation metrics
        """
        self.model.eval()

        val_loss, mean_iou, num_valid_batches = 0.0, 0.0, 0
        conf_mat = torch.zeros(
            (self.num_classes, self.num_classes), dtype=torch.int64, device=next(self.model.parameters()).device
        )
        for x, target in self.val_loader:
            x, target = self.to_cuda(x, target)

            loss, out = self._get_loss(x, target, return_logits=True)

            # Safeguard for NaN loss
            if not torch.isnan(loss) and not torch.isinf(loss):
                val_loss += loss.item()
                num_valid_batches += 1

            # borrowed from https://github.com/pytorch/vision/blob/master/references/segmentation/train.py
            pred = out.argmax(dim=1).flatten()
            target = target.flatten()
            k = (target >= 0) & (target < self.num_classes)
            inds = self.num_classes * target[k].to(torch.int64) + pred[k]
            nc = self.num_classes
            conf_mat += torch.bincount(inds, minlength=nc**2).reshape(nc, nc)

        val_loss /= num_valid_batches
        acc_global = (torch.diag(conf_mat).sum() / conf_mat.sum()).item()
        mean_iou = (torch.diag(conf_mat) / (conf_mat.sum(1) + conf_mat.sum(0) - torch.diag(conf_mat))).mean().item()

        return {"val_loss": val_loss, "acc_global": acc_global, "mean_iou": mean_iou}


    @staticmethod
    def _eval_metrics_str(eval_metrics: Dict[str, float]) -> str:
        return (
            f"Validation loss: {eval_metrics['val_loss']:.4} "
            f"(Acc: {eval_metrics['acc_global']:.2%} | Mean IoU: {eval_metrics['mean_iou']:.2%})"
        )