Source code for holocron.ops.boxes

# Copyright (C) 2019-2024, François-Guillaume Fernandez.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0> for full license details.

import math
from typing import Tuple, cast

import torch
from torch import Tensor
from torchvision.ops.boxes import box_area, box_iou

__all__ = ["box_giou", "ciou_loss", "diou_loss"]


def _box_iou(boxes1: Tensor, boxes2: Tensor) -> Tuple[Tensor, Tensor]:
    # from https://github.com/facebookresearch/detr/blob/master/util/box_ops.py
    area1 = box_area(boxes1)
    area2 = box_area(boxes2)

    lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # [N,M,2]
    rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # [N,M,2]

    wh = (rb - lt).clamp(min=0)  # [N,M,2]
    inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]

    union = area1[:, None] + area2 - inter

    iou = inter / union
    return iou, union



[docs]
def box_giou(boxes1: Tensor, boxes2: Tensor) -> Tensor:
    r"""Computes the Generalized-IoU as described in `"Generalized Intersection over Union: A Metric and A Loss
    for Bounding Box Regression" <https://arxiv.org/pdf/1902.09630.pdf>`_. This implementation was adapted
    from https://github.com/facebookresearch/detr/blob/master/util/box_ops.py

    The generalized IoU is defined as follows:

    .. math::
        GIoU = IoU - \frac{|C - A \cup B|}{|C|}

    where :math:`IoU` is the Intersection over Union,
    :math:`A \cup B` is the area of the boxes' union,
    and :math:`C` is the area of the smallest enclosing box covering the two boxes.

    Args:
        boxes1 (torch.Tensor[M, 4]): bounding boxes
        boxes2 (torch.Tensor[N, 4]): bounding boxes

    Returns:
        torch.Tensor[M, N]: Generalized-IoU
    """
    # degenerate boxes gives inf / nan results
    # so do an early check
    if torch.any(boxes1[:, 2:] < boxes1[:, :2]) or torch.any(boxes2[:, 2:] < boxes2[:, :2]):
        raise AssertionError("Incorrect coordinate format")
    iou, union = _box_iou(boxes1, boxes2)

    lt = torch.min(boxes1[:, None, :2], boxes2[:, :2])
    rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])

    wh = (rb - lt).clamp(min=0)  # [N,M,2]
    area = wh[:, :, 0] * wh[:, :, 1]

    return iou - (area - union) / area



def iou_penalty(boxes1: Tensor, boxes2: Tensor) -> Tensor:
    """Implements the penalty term for the Distance-IoU loss

    Args:
        boxes1 (torch.Tensor[M, 4]): bounding boxes
        boxes2 (torch.Tensor[N, 4]): bounding boxes

    Returns:
        torch.Tensor[M, N]: penalty term
    """
    # Diagonal length of the smallest enclosing box
    c2 = torch.zeros((boxes1.shape[0], boxes2.shape[0], 2), device=boxes1.device)
    # Assign bottom right coords
    c2[..., 0] = torch.max(boxes1[:, 2].unsqueeze(-1), boxes2[:, 2].unsqueeze(-2))
    c2[..., 1] = torch.max(boxes1[:, 3].unsqueeze(-1), boxes2[:, 3].unsqueeze(-2))
    # Subtract top left coords
    c2[..., 0].sub_(torch.min(boxes1[:, 0].unsqueeze(-1), boxes2[:, 0].unsqueeze(-2)))
    c2[..., 1].sub_(torch.min(boxes1[:, 1].unsqueeze(-1), boxes2[:, 1].unsqueeze(-2)))

    c2.pow_(2)
    c2 = c2.sum(dim=-1)

    # L2 - distance between box centers
    center_dist2 = torch.zeros((boxes1.shape[0], boxes2.shape[0], 2), device=boxes1.device)
    # Centers of boxes1
    center_dist2[..., 0] = boxes1[:, [0, 2]].sum(dim=1).unsqueeze(1)
    center_dist2[..., 1] = boxes1[:, [1, 3]].sum(dim=1).unsqueeze(1)
    # Centers of boxes2
    center_dist2[..., 0].sub_(boxes2[:, [0, 2]].sum(dim=1).unsqueeze(0))
    center_dist2[..., 1].sub_(boxes2[:, [1, 3]].sum(dim=1).unsqueeze(0))

    center_dist2.pow_(2)
    center_dist2 = center_dist2.sum(dim=-1) / 4

    return center_dist2 / c2



[docs]
def diou_loss(boxes1: Tensor, boxes2: Tensor) -> Tensor:
    r"""Computes the Distance-IoU loss as described in `"Distance-IoU Loss: Faster and Better Learning for
    Bounding Box Regression" <https://arxiv.org/pdf/1911.08287.pdf>`_.

    The loss is defined as follows:

    .. math::
        \mathcal{L}_{DIoU} = 1 - IoU + \frac{\rho^2(b, b^{GT})}{c^2}

    where :math:`IoU` is the Intersection over Union,
    :math:`b` and :math:`b^{GT}` are the centers of the box and the ground truth box respectively,
    :math:`c` c is the diagonal length of the smallest enclosing box covering the two boxes,
    and :math:`\rho(.)` is the Euclidean distance.

    .. image:: https://github.com/frgfm/Holocron/releases/download/v0.1.3/diou_loss.png
        :align: center

    Args:
        boxes1 (torch.Tensor[M, 4]): bounding boxes
        boxes2 (torch.Tensor[N, 4]): bounding boxes

    Returns:
        torch.Tensor[M, N]: Distance-IoU loss
    """
    return cast(Tensor, 1 - box_iou(boxes1, boxes2) + iou_penalty(boxes1, boxes2))



def aspect_ratio(boxes: Tensor) -> Tensor:
    """Computes the aspect ratio of boxes

    Args:
        boxes (torch.Tensor[N, 4]): bounding boxes

    Returns:
        torch.Tensor[N]: aspect ratio
    """
    return torch.atan((boxes[:, 2] - boxes[:, 0]) / (boxes[:, 3] - boxes[:, 1]))


def aspect_ratio_consistency(boxes1: Tensor, boxes2: Tensor) -> Tensor:
    """Computes the aspect ratio consistency from the complete IoU loss

    Args:
        boxes1 (torch.Tensor[M, 4]): bounding boxes
        boxes2 (torch.Tensor[N, 4]): bounding boxes

    Returns:
        torch.Tensor[M, N]: aspect ratio consistency
    """
    v = aspect_ratio(boxes1).unsqueeze(-1) - aspect_ratio(boxes2).unsqueeze(-2)
    v.pow_(2)
    v.mul_(4 / math.pi**2)

    return v



[docs]
def ciou_loss(boxes1: Tensor, boxes2: Tensor) -> Tensor:
    r"""Computes the Complete IoU loss as described in `"Distance-IoU Loss: Faster and Better Learning for
    Bounding Box Regression" <https://arxiv.org/pdf/1911.08287.pdf>`_.

    The loss is defined as follows:

    .. math::
        \mathcal{L}_{CIoU} = 1 - IoU + \frac{\rho^2(b, b^{GT})}{c^2} + \alpha v

    where :math:`IoU` is the Intersection over Union,
    :math:`b` and :math:`b^{GT}` are the centers of the box and the ground truth box respectively,
    :math:`c` c is the diagonal length of the smallest enclosing box covering the two boxes,
    :math:`\rho(.)` is the Euclidean distance,
    :math:`\alpha` is a positive trade-off parameter,
    and :math:`v` is the aspect ratio consistency.

    More specifically:

    .. math::
        v = \frac{4}{\pi^2} \Big(\arctan{\frac{w^{GT}}{h^{GT}}} - \arctan{\frac{w}{h}}\Big)^2

    and

    .. math::
        \alpha = \frac{v}{(1 - IoU) + v}

    Args:
        boxes1 (torch.Tensor[M, 4]): bounding boxes
        boxes2 (torch.Tensor[N, 4]): bounding boxes

    Returns:
        torch.Tensor[M, N]: Complete IoU loss

    Example:
        >>> import torch
        >>> from holocron.ops.boxes import box_ciou
        >>> boxes1 = torch.tensor([[0, 0, 100, 100], [100, 100, 200, 200]], dtype=torch.float32)
        >>> boxes2 = torch.tensor([[50, 50, 150, 150]], dtype=torch.float32)
        >>> box_ciou(boxes1, boxes2)
    """
    iou = box_iou(boxes1, boxes2)
    v = aspect_ratio_consistency(boxes1, boxes2)

    ciou_loss = cast(Tensor, 1 - iou + iou_penalty(boxes1, boxes2))

    # Check
    filter_ = (v != 0) & (iou != 0)
    ciou_loss[filter_].addcdiv_(v[filter_], 1 - iou[filter_] + v[filter_])

    return ciou_loss