Yolov8 源码解析(四十二)
.\yolov8\ultralytics\utils\loss.py
# 导入PyTorch库中需要的模块
import torch
import torch.nn as nn
import torch.nn.functional as F# 从Ultralytics工具包中导入一些特定的功能
from ultralytics.utils.metrics import OKS_SIGMA
from ultralytics.utils.ops import crop_mask, xywh2xyxy, xyxy2xywh
from ultralytics.utils.tal import RotatedTaskAlignedAssigner, TaskAlignedAssigner, dist2bbox, dist2rbox, make_anchors
from ultralytics.utils.torch_utils import autocast# 从当前目录下的.metrics文件中导入bbox_iou和probiou函数
from .metrics import bbox_iou, probiou
# 从当前目录下的.tal文件中导入bbox2dist函数
from .tal import bbox2dist# 定义一个名为VarifocalLoss的PyTorch模块,继承自nn.Module
class VarifocalLoss(nn.Module):"""Varifocal loss by Zhang et al.https://arxiv.org/abs/2008.13367."""def __init__(self):"""Initialize the VarifocalLoss class."""super().__init__()@staticmethoddef forward(pred_score, gt_score, label, alpha=0.75, gamma=2.0):"""Computes varfocal loss."""# 计算权重weight = alpha * pred_score.sigmoid().pow(gamma) * (1 - label) + gt_score * label# 禁用自动混合精度计算with autocast(enabled=False):# 计算损失loss = ((F.binary_cross_entropy_with_logits(pred_score.float(), gt_score.float(), reduction="none") * weight).mean(1) # 沿着维度1取均值.sum() # 求和)return lossclass FocalLoss(nn.Module):"""Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)."""def __init__(self):"""Initializer for FocalLoss class with no parameters."""super().__init__()@staticmethoddef forward(pred, label, gamma=1.5, alpha=0.25):"""Calculates and updates confusion matrix for object detection/classification tasks."""# 计算二元交叉熵损失loss = F.binary_cross_entropy_with_logits(pred, label, reduction="none")# 计算概率pred_prob = pred.sigmoid() # logits转为概率# 计算p_t值p_t = label * pred_prob + (1 - label) * (1 - pred_prob)# 计算调节因子modulating_factor = (1.0 - p_t) ** gamma# 应用调节因子到损失上loss *= modulating_factor# 如果alpha大于0,则应用alpha因子if alpha > 0:alpha_factor = label * alpha + (1 - label) * (1 - alpha)loss *= alpha_factorreturn loss.mean(1).sum()class DFLoss(nn.Module):"""Criterion class for computing DFL losses during training."""def __init__(self, reg_max=16) -> None:"""Initialize the DFL module."""super().__init__()self.reg_max = reg_maxdef __call__(self, pred_dist, target):"""Return sum of left and right DFL losses.Distribution Focal Loss (DFL) proposed in Generalized Focal Losshttps://ieeexplore.ieee.org/document/9792391"""# 将目标张量限制在 [0, self.reg_max - 1 - 0.01] 的范围内target = target.clamp_(0, self.reg_max - 1 - 0.01)# 将目标张量转换为长整型(整数)tl = target.long() # target left# 计算目标张量的右侧邻近值tr = tl + 1 # target right# 计算左侧权重wl = tr - target # weight left# 计算右侧权重wr = 1 - wl # weight right# 计算左侧损失(使用交叉熵损失函数)left_loss = F.cross_entropy(pred_dist, tl.view(-1), reduction="none").view(tl.shape) * wl# 计算右侧损失(使用交叉熵损失函数)right_loss = F.cross_entropy(pred_dist, tr.view(-1), reduction="none").view(tl.shape) * wr# 返回左侧损失和右侧损失的平均值(在最后一个维度上求均值,保持维度)return (left_loss + right_loss).mean(-1, keepdim=True)
# 定义了一个用于计算边界框损失的模块
class BboxLoss(nn.Module):"""Criterion class for computing training losses during training."""def __init__(self, reg_max=16):"""Initialize the BboxLoss module with regularization maximum and DFL settings."""super().__init__()# 如果 reg_max 大于 1,则创建一个 DFLoss 对象,否则设为 Noneself.dfl_loss = DFLoss(reg_max) if reg_max > 1 else Nonedef forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask):"""Compute IoU loss."""# 计算前景掩码中目标得分的总和,并扩展维度weight = target_scores.sum(-1)[fg_mask].unsqueeze(-1)# 计算预测边界框和目标边界框之间的 IoU(Intersection over Union)iou = bbox_iou(pred_bboxes[fg_mask], target_bboxes[fg_mask], xywh=False, CIoU=True)# 计算 IoU 损失loss_iou = ((1.0 - iou) * weight).sum() / target_scores_sum# 计算 DFL lossif self.dfl_loss:# 将锚点和目标边界框转换成距离形式target_ltrb = bbox2dist(anchor_points, target_bboxes, self.dfl_loss.reg_max - 1)# 计算 DFL 损失loss_dfl = self.dfl_loss(pred_dist[fg_mask].view(-1, self.dfl_loss.reg_max), target_ltrb[fg_mask]) * weightloss_dfl = loss_dfl.sum() / target_scores_sumelse:# 如果没有 DFL loss,则设为 0loss_dfl = torch.tensor(0.0).to(pred_dist.device)return loss_iou, loss_dfl# 继承自 BboxLoss 类,用于处理旋转边界框损失
class RotatedBboxLoss(BboxLoss):"""Criterion class for computing training losses during training."""def __init__(self, reg_max):"""Initialize the RotatedBboxLoss module with regularization maximum and DFL settings."""super().__init__(reg_max)def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask):"""Compute IoU loss for rotated bounding boxes."""# 计算前景掩码中目标得分的总和,并扩展维度weight = target_scores.sum(-1)[fg_mask].unsqueeze(-1)# 计算预测边界框和目标边界框之间的概率 IoUiou = probiou(pred_bboxes[fg_mask], target_bboxes[fg_mask])# 计算 IoU 损失loss_iou = ((1.0 - iou) * weight).sum() / target_scores_sum# 计算 DFL lossif self.dfl_loss:# 将锚点和目标边界框转换成距离形式target_ltrb = bbox2dist(anchor_points, xywh2xyxy(target_bboxes[..., :4]), self.dfl_loss.reg_max - 1)# 计算 DFL 损失loss_dfl = self.dfl_loss(pred_dist[fg_mask].view(-1, self.dfl_loss.reg_max), target_ltrb[fg_mask]) * weightloss_dfl = loss_dfl.sum() / target_scores_sumelse:# 如果没有 DFL loss,则设为 0loss_dfl = torch.tensor(0.0).to(pred_dist.device)return loss_iou, loss_dfl# 用于计算关键点损失的模块
class KeypointLoss(nn.Module):"""Criterion class for computing training losses."""def __init__(self, sigmas) -> None:"""Initialize the KeypointLoss class."""super().__init__()# 初始化关键点损失类,接收 sigmas 参数self.sigmas = sigmas# 定义一个方法,用于计算预测关键点和真实关键点之间的损失因子和欧氏距离损失。def forward(self, pred_kpts, gt_kpts, kpt_mask, area):# 计算预测关键点与真实关键点在 x 和 y 方向上的平方差,得到欧氏距离的平方d = (pred_kpts[..., 0] - gt_kpts[..., 0]).pow(2) + (pred_kpts[..., 1] - gt_kpts[..., 1]).pow(2)# 计算关键点损失因子,用于调整不同关键点的重要性,避免稀疏区域对损失的过度影响kpt_loss_factor = kpt_mask.shape[1] / (torch.sum(kpt_mask != 0, dim=1) + 1e-9)# 计算欧氏距离损失,根据预设的尺度参数 self.sigmas 和区域面积 area 进行调整e = d / ((2 * self.sigmas).pow(2) * (area + 1e-9) * 2) # 来自于 cocoeval 的公式# 返回加权后的关键点损失的均值,其中损失通过 kpt_mask 进行加权,确保只考虑有效关键点return (kpt_loss_factor.view(-1, 1) * ((1 - torch.exp(-e)) * kpt_mask)).mean()
class v8DetectionLoss:"""Criterion class for computing training losses."""def __init__(self, model, tal_topk=10): # model must be de-paralleled"""Initializes v8DetectionLoss with the model, defining model-related properties and BCE loss function."""# 获取模型的设备信息device = next(model.parameters()).device # get model device# 从模型中获取超参数h = model.args # hyperparameters# 获取最后一个模型组件,通常是 Detect() 模块m = model.model[-1] # Detect() module# 使用 nn.BCEWithLogitsLoss 计算 BCE 损失,设置为不进行归约self.bce = nn.BCEWithLogitsLoss(reduction="none")# 保存超参数self.hyp = h# 获取模型的步长信息self.stride = m.stride # model strides# 获取模型的类别数self.nc = m.nc # number of classes# 设置输出通道数,包括类别和回归目标self.no = m.nc + m.reg_max * 4# 获取模型的最大回归目标数量self.reg_max = m.reg_max# 保存模型的设备信息self.device = device# 判断是否使用 DFL(Distribution-based Focal Loss)self.use_dfl = m.reg_max > 1# 初始化任务对齐分配器,用于匹配目标框和锚点框self.assigner = TaskAlignedAssigner(topk=tal_topk, num_classes=self.nc, alpha=0.5, beta=6.0)# 初始化边界框损失函数,使用指定数量的回归目标self.bbox_loss = BboxLoss(m.reg_max).to(device)# 创建一个张量,用于后续的数学运算,位于指定的设备上self.proj = torch.arange(m.reg_max, dtype=torch.float, device=device)def preprocess(self, targets, batch_size, scale_tensor):"""Preprocesses the target counts and matches with the input batch size to output a tensor."""# 获取目标张量的维度信息nl, ne = targets.shape# 如果目标张量为空,则返回一个零张量if nl == 0:out = torch.zeros(batch_size, 0, ne - 1, device=self.device)else:# 获取图像索引和其对应的计数i = targets[:, 0] # image index_, counts = i.unique(return_counts=True)counts = counts.to(dtype=torch.int32)# 创建零张量,用于存储预处理后的目标数据out = torch.zeros(batch_size, counts.max(), ne - 1, device=self.device)for j in range(batch_size):# 获取与当前批次图像索引匹配的目标matches = i == jn = matches.sum()if n:out[j, :n] = targets[matches, 1:]# 对输出的边界框坐标进行缩放和转换out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor))return outdef bbox_decode(self, anchor_points, pred_dist):"""Decode predicted object bounding box coordinates from anchor points and distribution."""# 如果使用 DFL,则对预测分布进行处理if self.use_dfl:b, a, c = pred_dist.shape # batch, anchors, channels# 对预测分布进行解码,使用预定义的投影张量pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))# 另外两种可能的解码方式,根据实际需求选择# pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype))# pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2)# 返回解码后的边界框坐标return dist2bbox(pred_dist, anchor_points, xywh=False)def __call__(self, preds, batch):"""Calculate the sum of the loss for box, cls and dfl multiplied by batch size."""# 初始化一个全零的张量,用于存储损失值,包括box、cls和dflloss = torch.zeros(3, device=self.device) # box, cls, dfl# 如果preds是元组,则取第二个元素作为feats,否则直接使用predsfeats = preds[1] if isinstance(preds, tuple) else preds# 将feats中的特征拼接并分割,得到预测的分布(pred_distri)和分数(pred_scores)pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split((self.reg_max * 4, self.nc), 1)# 调整张量维度,使得pred_scores和pred_distri的维度更适合后续计算pred_scores = pred_scores.permute(0, 2, 1).contiguous()pred_distri = pred_distri.permute(0, 2, 1).contiguous()# 获取pred_scores的数据类型和batch sizedtype = pred_scores.dtypebatch_size = pred_scores.shape[0]# 计算图像尺寸,以张量形式存储在imgsz中,单位是像素imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w)# 使用make_anchors函数生成锚点(anchor_points)和步长张量(stride_tensor)anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)# 处理目标数据,包括图像索引、类别和边界框,转换为Tensor并传输到设备上targets = torch.cat((batch["batch_idx"].view(-1, 1), batch["cls"].view(-1, 1), batch["bboxes"]), 1)targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])# 将目标数据拆分为类别标签(gt_labels)和边界框(gt_bboxes)gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy# 生成用于过滤的掩码(mask_gt),判断边界框是否有效mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0.0)# 解码预测的边界框(pred_bboxes),得到真实坐标pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4)# 使用分配器(assigner)计算匹配的目标边界框和分数_, target_bboxes, target_scores, fg_mask, _ = self.assigner(pred_scores.detach().sigmoid(),(pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype),anchor_points * stride_tensor,gt_labels,gt_bboxes,mask_gt,)# 计算目标分数之和,用于损失计算的归一化target_scores_sum = max(target_scores.sum(), 1)# 类别损失计算,使用二元交叉熵损失(BCE)loss[1] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE# 如果有前景掩码(fg_mask)存在,则计算边界框损失和分布损失if fg_mask.sum():target_bboxes /= stride_tensorloss[0], loss[2] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask)# 损失值乘以超参数中的各自增益系数loss[0] *= self.hyp.box # box gainloss[1] *= self.hyp.cls # cls gainloss[2] *= self.hyp.dfl # dfl gain# 返回损失值的总和乘以batch size,以及分离的损失张量return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl)
class v8SegmentationLoss(v8DetectionLoss):"""Criterion class for computing training losses."""def __init__(self, model): # model must be de-paralleled"""Initializes the v8SegmentationLoss class, taking a de-paralleled model as argument."""super().__init__(model)self.overlap = model.args.overlap_mask@staticmethoddef single_mask_loss(gt_mask: torch.Tensor, pred: torch.Tensor, proto: torch.Tensor, xyxy: torch.Tensor, area: torch.Tensor) -> torch.Tensor:"""Compute the instance segmentation loss for a single image.Args:gt_mask (torch.Tensor): Ground truth mask of shape (n, H, W), where n is the number of objects.pred (torch.Tensor): Predicted mask coefficients of shape (n, 32).proto (torch.Tensor): Prototype masks of shape (32, H, W).xyxy (torch.Tensor): Ground truth bounding boxes in xyxy format, normalized to [0, 1], of shape (n, 4).area (torch.Tensor): Area of each ground truth bounding box of shape (n,).Returns:(torch.Tensor): The calculated mask loss for a single image.Notes:The function uses the equation pred_mask = torch.einsum('in,nhw->ihw', pred, proto) to produce thepredicted masks from the prototype masks and predicted mask coefficients."""# Compute predicted masks using prototype masks and coefficientspred_mask = torch.einsum("in,nhw->ihw", pred, proto) # (n, 32) @ (32, H, W) -> (n, H, W)# Compute binary cross entropy loss between predicted masks and ground truth masksloss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")# Crop the loss using bounding boxes, then compute mean per instance and sum across instancesreturn (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).sum()def calculate_segmentation_loss(self,fg_mask: torch.Tensor,masks: torch.Tensor,target_gt_idx: torch.Tensor,target_bboxes: torch.Tensor,batch_idx: torch.Tensor,proto: torch.Tensor,pred_masks: torch.Tensor,imgsz: torch.Tensor,overlap: bool,) -> torch.Tensor:"""Calculate the loss for instance segmentation.Args:fg_mask (torch.Tensor): A binary tensor of shape (BS, N_anchors) indicating which anchors are positive.masks (torch.Tensor): Ground truth masks of shape (BS, H, W) if `overlap` is False, otherwise (BS, ?, H, W).target_gt_idx (torch.Tensor): Indexes of ground truth objects for each anchor of shape (BS, N_anchors).target_bboxes (torch.Tensor): Ground truth bounding boxes for each anchor of shape (BS, N_anchors, 4).batch_idx (torch.Tensor): Batch indices of shape (N_labels_in_batch, 1).proto (torch.Tensor): Prototype masks of shape (BS, 32, H, W).pred_masks (torch.Tensor): Predicted masks for each anchor of shape (BS, N_anchors, 32).imgsz (torch.Tensor): Size of the input image as a tensor of shape (2), i.e., (H, W).overlap (bool): Whether the masks in `masks` tensor overlap.Returns:(torch.Tensor): The calculated loss for instance segmentation.Notes:The batch loss can be computed for improved speed at higher memory usage.For example, pred_mask can be computed as follows:pred_mask = torch.einsum('in,nhw->ihw', pred, proto) # (i, 32) @ (32, 160, 160) -> (i, 160, 160)"""_, _, mask_h, mask_w = proto.shape # 获取原型掩模的高度和宽度loss = 0 # 初始化损失值为0# Normalize to 0-1target_bboxes_normalized = target_bboxes / imgsz[[1, 0, 1, 0]] # 将目标边界框归一化到0-1范围# Areas of target bboxesmarea = xyxy2xywh(target_bboxes_normalized)[..., 2:].prod(2) # 计算目标边界框的面积# Normalize to mask sizemxyxy = target_bboxes_normalized * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=proto.device) # 将边界框归一化到掩模大小# 遍历每个样本中的前景掩模、目标索引、预测掩模、原型、归一化边界框、目标面积、掩模for i, single_i in enumerate(zip(fg_mask, target_gt_idx, pred_masks, proto, mxyxy, marea, masks)):fg_mask_i, target_gt_idx_i, pred_masks_i, proto_i, mxyxy_i, marea_i, masks_i = single_iif fg_mask_i.any(): # 如果前景掩模中有任何True值mask_idx = target_gt_idx_i[fg_mask_i] # 获取前景掩模对应的目标索引if overlap:gt_mask = masks_i == (mask_idx + 1).view(-1, 1, 1) # 如果存在重叠,则获取真实掩模gt_mask = gt_mask.float() # 转换为浮点数else:gt_mask = masks[batch_idx.view(-1) == i][mask_idx] # 否则直接获取真实掩模# 计算单个掩模损失loss += self.single_mask_loss(gt_mask, pred_masks_i[fg_mask_i], proto_i, mxyxy_i[fg_mask_i], marea_i[fg_mask_i])# WARNING: lines below prevents Multi-GPU DDP 'unused gradient' PyTorch errors, do not removeelse:# 防止在多GPU分布式数据并行处理中出现未使用梯度的错误loss += (proto * 0).sum() + (pred_masks * 0).sum() # 将inf和相加可能导致nan损失# 返回平均每个前景掩模的损失return loss / fg_mask.sum()
class v8PoseLoss(v8DetectionLoss):"""Criterion class for computing training losses."""def __init__(self, model): # model must be de-paralleled"""Initializes v8PoseLoss with model, sets keypoint variables and declares a keypoint loss instance."""super().__init__(model)self.kpt_shape = model.model[-1].kpt_shapeself.bce_pose = nn.BCEWithLogitsLoss()# Check if the model deals with pose keypoints (17 keypoints with 3 coordinates each)is_pose = self.kpt_shape == [17, 3]nkpt = self.kpt_shape[0] # number of keypoints# Set sigmas for keypoint loss calculation based on whether it's pose or notsigmas = torch.from_numpy(OKS_SIGMA).to(self.device) if is_pose else torch.ones(nkpt, device=self.device) / nkptself.keypoint_loss = KeypointLoss(sigmas=sigmas)@staticmethoddef kpts_decode(anchor_points, pred_kpts):"""Decodes predicted keypoints to image coordinates."""y = pred_kpts.clone()# Scale keypoints coordinatesy[..., :2] *= 2.0# Translate keypoints to their anchor pointsy[..., 0] += anchor_points[:, [0]] - 0.5y[..., 1] += anchor_points[:, [1]] - 0.5return ydef calculate_keypoints_loss(self, masks, target_gt_idx, keypoints, batch_idx, stride_tensor, target_bboxes, pred_kpts
):"""Calculate the keypoints loss."""# Implementation of keypoints loss calculation goes herepass # Placeholder for actual implementationclass v8ClassificationLoss:"""Criterion class for computing training losses."""def __call__(self, preds, batch):"""Compute the classification loss between predictions and true labels."""loss = F.cross_entropy(preds, batch["cls"], reduction="mean")loss_items = loss.detach()return loss, loss_itemsclass v8OBBLoss(v8DetectionLoss):"""Calculates losses for object detection, classification, and box distribution in rotated YOLO models."""def __init__(self, model):"""Initializes v8OBBLoss with model, assigner, and rotated bbox loss; note model must be de-paralleled."""super().__init__(model)self.assigner = RotatedTaskAlignedAssigner(topk=10, num_classes=self.nc, alpha=0.5, beta=6.0)self.bbox_loss = RotatedBboxLoss(self.reg_max).to(self.device)def preprocess(self, targets, batch_size, scale_tensor):"""Preprocesses the target counts and matches with the input batch size to output a tensor."""if targets.shape[0] == 0:# If no targets, return zero tensorout = torch.zeros(batch_size, 0, 6, device=self.device)else:i = targets[:, 0] # image index_, counts = i.unique(return_counts=True)counts = counts.to(dtype=torch.int32)# Initialize output tensor with proper dimensionsout = torch.zeros(batch_size, counts.max(), 6, device=self.device)for j in range(batch_size):matches = i == jn = matches.sum()if n:# Extract and scale bounding boxes, then concatenate with class labelsbboxes = targets[matches, 2:]bboxes[..., :4].mul_(scale_tensor)out[j, :n] = torch.cat([targets[matches, 1:2], bboxes], dim=-1)return outdef bbox_decode(self, anchor_points, pred_dist, pred_angle):"""Decode predicted object bounding box coordinates from anchor points and distribution.Args:anchor_points (torch.Tensor): Anchor points, (h*w, 2).pred_dist (torch.Tensor): Predicted rotated distance, (bs, h*w, 4).pred_angle (torch.Tensor): Predicted angle, (bs, h*w, 1).Returns:(torch.Tensor): Predicted rotated bounding boxes with angles, (bs, h*w, 5)."""# 如果使用 DFL(Dynamic Feature Learning),对预测的距离进行处理if self.use_dfl:# 获取批量大小、锚点数、通道数b, a, c = pred_dist.shape # batch, anchors, channels# 重新调整预测距离的形状并进行 softmax 处理,然后乘以投影矩阵pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))# 将解码后的旋转边界框坐标和预测的角度拼接在一起并返回return torch.cat((dist2rbox(pred_dist, pred_angle, anchor_points), pred_angle), dim=-1)
# 定义一个名为 E2EDetectLoss 的类,用于计算训练损失
class E2EDetectLoss:"""Criterion class for computing training losses."""def __init__(self, model):"""初始化 E2EDetectLoss 类,使用提供的模型初始化一个一对多和一个对一检测损失。"""self.one2many = v8DetectionLoss(model, tal_topk=10) # 初始化一对多检测损失对象self.one2one = v8DetectionLoss(model, tal_topk=1) # 初始化一对一检测损失对象def __call__(self, preds, batch):"""调用实例时计算框、类别和深度特征点损失的总和,乘以批次大小。"""preds = preds[1] if isinstance(preds, tuple) else preds # 如果 preds 是元组,则使用第二个元素one2many = preds["one2many"] # 获取预测结果中的一对多损失loss_one2many = self.one2many(one2many, batch) # 计算一对多损失one2one = preds["one2one"] # 获取预测结果中的一对一损失loss_one2one = self.one2one(one2one, batch) # 计算一对一损失return loss_one2many[0] + loss_one2one[0], loss_one2many[1] + loss_one2one[1]# 返回两个损失的总和,分别对应框和类别损失,以及深度特征点损失
.\yolov8\ultralytics\utils\metrics.py
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""Model validation metrics."""import math
import warnings
from pathlib import Pathimport matplotlib.pyplot as plt
import numpy as np
import torchfrom ultralytics.utils import LOGGER, SimpleClass, TryExcept, plt_settings# Object Keypoint Similarity (OKS) sigmas for different keypoints
OKS_SIGMA = (np.array([0.26, 0.25, 0.25, 0.35, 0.35, 0.79, 0.79, 0.72, 0.72, 0.62, 0.62, 1.07, 1.07, 0.87, 0.87, 0.89, 0.89])/ 10.0
)def bbox_ioa(box1, box2, iou=False, eps=1e-7):"""Calculate the intersection over box2 area given box1 and box2. Boxes are in x1y1x2y2 format.Args:box1 (np.ndarray): A numpy array of shape (n, 4) representing n bounding boxes.box2 (np.ndarray): A numpy array of shape (m, 4) representing m bounding boxes.iou (bool): Calculate the standard IoU if True else return inter_area/box2_area.eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.Returns:(np.ndarray): A numpy array of shape (n, m) representing the intersection over box2 area."""# Get the coordinates of bounding boxesb1_x1, b1_y1, b1_x2, b1_y2 = box1.Tb2_x1, b2_y1, b2_x2, b2_y2 = box2.T# Intersection area calculationinter_area = (np.minimum(b1_x2[:, None], b2_x2) - np.maximum(b1_x1[:, None], b2_x1)).clip(0) * (np.minimum(b1_y2[:, None], b2_y2) - np.maximum(b1_y1[:, None], b2_y1)).clip(0)# Box2 area calculationarea = (b2_x2 - b2_x1) * (b2_y2 - b2_y1)if iou:box1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1)area = area + box1_area[:, None] - inter_area# Intersection over box2 areareturn inter_area / (area + eps)def box_iou(box1, box2, eps=1e-7):"""Calculate intersection-over-union (IoU) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format.Based on https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.pyArgs:box1 (torch.Tensor): A tensor of shape (N, 4) representing N bounding boxes.box2 (torch.Tensor): A tensor of shape (M, 4) representing M bounding boxes.eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.Returns:(torch.Tensor): An NxM tensor containing the pairwise IoU values for every element in box1 and box2."""# Convert box coordinates to float for accurate computation(a1, a2), (b1, b2) = box1.float().unsqueeze(1).chunk(2, 2), box2.float().unsqueeze(0).chunk(2, 2)# Calculate intersection areainter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp_(0).prod(2)# Compute IoUreturn inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps)def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):"""Calculate Intersection over Union (IoU) of box1(1, 4) to box2(n, 4).This function calculates IoU considering different variants such as Generalized IoU (GIoU),Distance IoU (DIoU), and Complete IoU (CIoU) if specified.Args:box1 (torch.Tensor): A tensor representing a single bounding box of shape (1, 4).box2 (torch.Tensor): A tensor representing multiple bounding boxes of shape (n, 4).xywh (bool, optional): If True, treats boxes as (x_center, y_center, width, height).GIoU (bool, optional): If True, compute Generalized IoU. Defaults to False.DIoU (bool, optional): If True, compute Distance IoU. Defaults to False.CIoU (bool, optional): If True, compute Complete IoU. Defaults to False.eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.Returns:(torch.Tensor): IoU values between box1 and each box in box2, of shape (n,)."""Args:box1 (torch.Tensor): A tensor representing a single bounding box with shape (1, 4).box2 (torch.Tensor): A tensor representing n bounding boxes with shape (n, 4).xywh (bool, optional): If True, input boxes are in (x, y, w, h) format. If False, input boxes are in(x1, y1, x2, y2) format. Defaults to True.GIoU (bool, optional): If True, calculate Generalized IoU. Defaults to False.DIoU (bool, optional): If True, calculate Distance IoU. Defaults to False.CIoU (bool, optional): If True, calculate Complete IoU. Defaults to False.eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.Returns:(torch.Tensor): IoU, GIoU, DIoU, or CIoU values depending on the specified flags."""# 根据输入的格式标志,获取边界框的坐标信息if xywh: # 如果输入格式为 (x, y, w, h)# 将 box1 和 box2 按照坐标和尺寸分块(x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, -1), box2.chunk(4, -1)# 计算各自的一半宽度和高度w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2# 计算边界框的四个顶点坐标b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_else: # 如果输入格式为 (x1, y1, x2, y2)# 将 box1 和 box2 按照坐标分块b1_x1, b1_y1, b1_x2, b1_y2 = box1.chunk(4, -1)b2_x1, b2_y1, b2_x2, b2_y2 = box2.chunk(4, -1)# 计算边界框的宽度和高度,并添加一个小值 eps 避免除以零w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + epsw2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps# 计算交集面积inter = (b1_x2.minimum(b2_x2) - b1_x1.maximum(b2_x1)).clamp_(0) * (b1_y2.minimum(b2_y2) - b1_y1.maximum(b2_y1)).clamp_(0)# 计算并集面积union = w1 * h1 + w2 * h2 - inter + eps# 计算 IoUiou = inter / unionif CIoU or DIoU or GIoU:# 计算最小包围框的宽度和高度cw = b1_x2.maximum(b2_x2) - b1_x1.minimum(b2_x1)ch = b1_y2.maximum(b2_y2) - b1_y1.minimum(b2_y1)if CIoU or DIoU: # 如果是 Distance IoU 或者 Complete IoU# 计算最小包围框的对角线的平方c2 = cw.pow(2) + ch.pow(2) + eps# 计算中心距离的平方rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2).pow(2) + (b2_y1 + b2_y2 - b1_y1 - b1_y2).pow(2)) / 4if CIoU: # 如果是 Complete IoUv = (4 / math.pi**2) * ((w2 / h2).atan() - (w1 / h1).atan()).pow(2)with torch.no_grad():alpha = v / (v - iou + (1 + eps))return iou - (rho2 / c2 + v * alpha) # 计算 CIoUreturn iou - rho2 / c2 # 计算 DIoU# 计算最小包围框的面积c_area = cw * ch + epsreturn iou - (c_area - union) / c_area # 计算 GIoUreturn iou # 返回 IoU
# 计算两个方向边界框之间的概率 IoU,参考论文 https://arxiv.org/pdf/2106.06072v1.pdf
def probiou(obb1, obb2, CIoU=False, eps=1e-7):"""Calculate the prob IoU between oriented bounding boxes, https://arxiv.org/pdf/2106.06072v1.pdf.Args:obb1 (torch.Tensor): A tensor of shape (N, 5) representing the first oriented bounding boxes in xywhr format.obb2 (torch.Tensor): A tensor of shape (M, 5) representing the second oriented bounding boxes in xywhr format.CIoU (bool, optional): If True, compute Complete IoU. Defaults to False.eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.Returns:(torch.Tensor): A tensor of shape (N, M) representing the probabilities of IoU between obb1 and obb2."""# 将 Gaussian 边界框合并,忽略中心点(前两列)因为这里不需要gbbs = torch.cat((obb1[:, 2:4].pow(2) / 12, obb1[:, 4:]), dim=-1)a, b, c = gbbs.split(1, dim=-1)cos = c.cos()sin = c.sin()cos2 = cos.pow(2)sin2 = sin.pow(2)# 计算旋转边界框的协方差矩阵return a * cos2 + b * sin2, a * sin2 + b * cos2, (a - b) * cos * sinArgs:obb1 (torch.Tensor): A tensor of shape (N, 5) representing ground truth obbs, with xywhr format.obb2 (torch.Tensor): A tensor of shape (N, 5) representing predicted obbs, with xywhr format.eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.Returns:(torch.Tensor): A tensor of shape (N, ) representing obb similarities."""# Splitting the x and y coordinates from obb1 and obb2x1, y1 = obb1[..., :2].split(1, dim=-1)x2, y2 = obb2[..., :2].split(1, dim=-1)# Calculating covariance matrix components for obb1 and obb2a1, b1, c1 = _get_covariance_matrix(obb1)a2, b2, c2 = _get_covariance_matrix(obb2)# Calculation of terms t1, t2, and t3 for IoU computationt1 = (((a1 + a2) * (y1 - y2).pow(2) + (b1 + b2) * (x1 - x2).pow(2)) / ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2) + eps)) * 0.25t2 = (((c1 + c2) * (x2 - x1) * (y1 - y2)) / ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2) + eps)) * 0.5t3 = (((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2))/ (4 * ((a1 * b1 - c1.pow(2)).clamp_(0) * (a2 * b2 - c2.pow(2)).clamp_(0)).sqrt() + eps)+ eps).log() * 0.5# Combined term for boundary distancebd = (t1 + t2 + t3).clamp(eps, 100.0)# Hausdorff distance calculationhd = (1.0 - (-bd).exp() + eps).sqrt()# Intersection over Union (IoU) computationiou = 1 - hd# Compute Complete IoU (CIoU) if CIoU flag is Trueif CIoU:# Splitting width and height components from obb1 and obb2w1, h1 = obb1[..., 2:4].split(1, dim=-1)w2, h2 = obb2[..., 2:4].split(1, dim=-1)# Calculating v value based on width and height ratiosv = (4 / math.pi**2) * ((w2 / h2).atan() - (w1 / h1).atan()).pow(2)# Compute alpha factor and adjust IoU for CIoUwith torch.no_grad():alpha = v / (v - iou + (1 + eps))return iou - v * alpha # CIoU# Return regular IoU if CIoU flag is Falsereturn iou
# 计算两个有方向边界框之间的概率IoU,参考论文 https://arxiv.org/pdf/2106.06072v1.pdf
def batch_probiou(obb1, obb2, eps=1e-7):"""Calculate the prob IoU between oriented bounding boxes, https://arxiv.org/pdf/2106.06072v1.pdf.Args:obb1 (torch.Tensor | np.ndarray): A tensor of shape (N, 5) representing ground truth obbs, with xywhr format.obb2 (torch.Tensor | np.ndarray): A tensor of shape (M, 5) representing predicted obbs, with xywhr format.eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.Returns:(torch.Tensor): A tensor of shape (N, M) representing obb similarities."""# 将输入obb1和obb2转换为torch.Tensor,如果它们是np.ndarray类型的话obb1 = torch.from_numpy(obb1) if isinstance(obb1, np.ndarray) else obb1obb2 = torch.from_numpy(obb2) if isinstance(obb2, np.ndarray) else obb2# 分割xy坐标和宽高比例与旋转角度信息,以便后续处理x1, y1 = obb1[..., :2].split(1, dim=-1)x2, y2 = (x.squeeze(-1)[None] for x in obb2[..., :2].split(1, dim=-1))# 计算相关的协方差矩阵分量a1, b1, c1 = _get_covariance_matrix(obb1)a2, b2, c2 = (x.squeeze(-1)[None] for x in _get_covariance_matrix(obb2))# 计算概率IoU的三个部分t1 = (((a1 + a2) * (y1 - y2).pow(2) + (b1 + b2) * (x1 - x2).pow(2)) / ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2) + eps)) * 0.25t2 = (((c1 + c2) * (x2 - x1) * (y1 - y2)) / ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2) + eps)) * 0.5t3 = (((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2))/ (4 * ((a1 * b1 - c1.pow(2)).clamp_(0) * (a2 * b2 - c2.pow(2)).clamp_(0)).sqrt() + eps)+ eps).log() * 0.5# 组合三个部分,并进行一些修正和限制bd = (t1 + t2 + t3).clamp(eps, 100.0)hd = (1.0 - (-bd).exp() + eps).sqrt()# 返回1减去修正的IoU概率return 1 - hd# 计算平滑的正负二元交叉熵目标
def smooth_BCE(eps=0.1):"""Computes smoothed positive and negative Binary Cross-Entropy targets.This function calculates positive and negative label smoothing BCE targets based on a given epsilon value.For implementation details, refer to https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441.Args:eps (float, optional): The epsilon value for label smoothing. Defaults to 0.1.Returns:(tuple): A tuple containing the positive and negative label smoothing BCE targets."""# 计算平滑后的正负二元交叉熵目标return 1.0 - 0.5 * eps, 0.5 * epsclass ConfusionMatrix:"""A class for calculating and updating a confusion matrix for object detection and classification tasks.Attributes:task (str): The type of task, either 'detect' or 'classify'.matrix (np.ndarray): The confusion matrix, with dimensions depending on the task.nc (int): The number of classes.conf (float): The confidence threshold for detections.iou_thres (float): The Intersection over Union threshold."""# 用于计算和更新目标检测和分类任务的混淆矩阵的类定义def __init__(self, task, nc, conf=0.5, iou_thres=0.5):self.task = taskself.matrix = np.zeros((nc, nc), dtype=np.int64)self.nc = ncself.conf = confself.iou_thres = iou_thres# 更新混淆矩阵中的条目def update_matrix(self, targets, preds):"""Update the confusion matrix with new target and prediction entries.Args:targets (np.ndarray): An array containing the ground truth labels.preds (np.ndarray): An array containing the predicted labels."""for t, p in zip(targets, preds):self.matrix[t, p] += 1# 重置混淆矩阵def reset_matrix(self):"""Reset the confusion matrix to all zeros."""self.matrix.fill(0)# 打印混淆矩阵的当前状态def print_matrix(self):"""Print the current state of the confusion matrix."""print(self.matrix)def __init__(self, nc, conf=0.25, iou_thres=0.45, task="detect"):"""Initialize attributes for the YOLO model.Args:nc (int): Number of classes.conf (float): Confidence threshold, default is 0.25, adjusted to 0.25 if None or 0.001.iou_thres (float): IoU (Intersection over Union) threshold.task (str): Task type, either "detect" or other.Initializes:self.task (str): Task type.self.matrix (np.ndarray): Confusion matrix initialized based on task type and number of classes.self.nc (int): Number of classes.self.conf (float): Confidence threshold.self.iou_thres (float): IoU threshold."""self.task = taskself.matrix = np.zeros((nc + 1, nc + 1)) if self.task == "detect" else np.zeros((nc, nc))self.nc = nc # number of classesself.conf = 0.25 if conf in {None, 0.001} else conf # apply 0.25 if default val conf is passedself.iou_thres = iou_thresdef process_cls_preds(self, preds, targets):"""Update confusion matrix for classification task.Args:preds (Array[N, min(nc,5)]): Predicted class labels.targets (Array[N, 1]): Ground truth class labels."""preds, targets = torch.cat(preds)[:, 0], torch.cat(targets)for p, t in zip(preds.cpu().numpy(), targets.cpu().numpy()):self.matrix[p][t] += 1def process_batch(self, detections, gt_bboxes, gt_cls):"""Update confusion matrix for object detection task.Args:detections (Array[N, 6] | Array[N, 7]): Detected bounding boxes and their associated information.Each row should contain (x1, y1, x2, y2, conf, class)or with an additional element `angle` when it's obb.gt_bboxes (Array[M, 4]| Array[N, 5]): Ground truth bounding boxes with xyxy/xyxyr format.gt_cls (Array[M]): The class labels."""# 检查标签是否为空if gt_cls.shape[0] == 0:if detections is not None:# 根据置信度阈值过滤掉低置信度的检测结果detections = detections[detections[:, 4] > self.conf]# 提取检测结果的类别detection_classes = detections[:, 5].int()for dc in detection_classes:self.matrix[dc, self.nc] += 1 # 假阳性return# 如果没有检测结果if detections is None:# 提取真实标签的类别gt_classes = gt_cls.int()for gc in gt_classes:self.matrix[self.nc, gc] += 1 # 背景 FNreturn# 根据置信度阈值过滤掉低置信度的检测结果detections = detections[detections[:, 4] > self.conf]# 提取真实标签的类别gt_classes = gt_cls.int()# 提取检测结果的类别detection_classes = detections[:, 5].int()# 判断是否为带有角度信息的检测结果和真实标签is_obb = detections.shape[1] == 7 and gt_bboxes.shape[1] == 5# 计算 IoU(交并比)iou = (batch_probiou(gt_bboxes, torch.cat([detections[:, :4], detections[:, -1:]], dim=-1))if is_obbelse box_iou(gt_bboxes, detections[:, :4]))# 根据 IoU 阈值筛选匹配结果x = torch.where(iou > self.iou_thres)if x[0].shape[0]:matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()if x[0].shape[0] > 1:matches = matches[matches[:, 2].argsort()[::-1]]matches = matches[np.unique(matches[:, 1], return_index=True)[1]]matches = matches[matches[:, 2].argsort()[::-1]]matches = matches[np.unique(matches[:, 0], return_index=True)[1]]else:matches = np.zeros((0, 3))# 判断是否有匹配结果n = matches.shape[0] > 0m0, m1, _ = matches.transpose().astype(int)# 更新混淆矩阵for i, gc in enumerate(gt_classes):j = m0 == iif n and sum(j) == 1:self.matrix[detection_classes[m1[j]], gc] += 1 # 正确else:self.matrix[self.nc, gc] += 1 # 真实背景# 如果有匹配结果,更新混淆矩阵if n:for i, dc in enumerate(detection_classes):if not any(m1 == i):self.matrix[dc, self.nc] += 1 # 预测背景def matrix(self):"""Returns the confusion matrix."""return self.matrixdef tp_fp(self):"""Returns true positives and false positives."""tp = self.matrix.diagonal() # 提取混淆矩阵的对角线元素,即 true positivesfp = self.matrix.sum(1) - tp # 计算每行的和减去对角线元素,得到 false positives# fn = self.matrix.sum(0) - tp # false negatives (missed detections) -- 该行被注释掉,不起作用return (tp[:-1], fp[:-1]) if self.task == "detect" else (tp, fp) # 如果任务是检测,移除背景类别后返回结果@TryExcept("WARNING ⚠️ ConfusionMatrix plot failure")@plt_settings()def plot(self, normalize=True, save_dir="", names=(), on_plot=None):"""Plot the confusion matrix using seaborn and save it to a file.Args:normalize (bool): Whether to normalize the confusion matrix.save_dir (str): Directory where the plot will be saved.names (tuple): Names of classes, used as labels on the plot.on_plot (func): An optional callback to pass plots path and data when they are rendered."""import seaborn # 引入 seaborn 库,用于绘制混淆矩阵图array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1e-9) if normalize else 1) # 对混淆矩阵进行列归一化处理array[array < 0.005] = np.nan # 将小于 0.005 的值设为 NaN,不在图上标注fig, ax = plt.subplots(1, 1, figsize=(12, 9), tight_layout=True) # 创建图和轴对象,设置图的大小和布局nc, nn = self.nc, len(names) # 类别数和类别名称列表的长度seaborn.set_theme(font_scale=1.0 if nc < 50 else 0.8) # 设置字体大小,根据类别数决定labels = (0 < nn < 99) and (nn == nc) # 根据类别名称是否符合要求决定是否应用于刻度标签ticklabels = (list(names) + ["background"]) if labels else "auto" # 根据条件设置刻度标签with warnings.catch_warnings():warnings.simplefilter("ignore") # 忽略警告信息,避免空矩阵的 RuntimeWarning: All-NaN slice encounteredseaborn.heatmap(array,ax=ax,annot=nc < 30, # 如果类别数小于 30,则在图上标注数值annot_kws={"size": 8}, # 标注的字体大小cmap="Blues", # 使用蓝色调色板fmt=".2f" if normalize else ".0f", # 数值格式,归一化时保留两位小数,否则取整数square=True, # 方形图vmin=0.0, # 最小值为 0xticklabels=ticklabels, # X 轴刻度标签yticklabels=ticklabels, # Y 轴刻度标签).set_facecolor((1, 1, 1)) # 设置图的背景色为白色title = "Confusion Matrix" + " Normalized" * normalize # 图表标题,根据是否归一化添加后缀ax.set_xlabel("True") # X 轴标签ax.set_ylabel("Predicted") # Y 轴标签ax.set_title(title) # 设置图表标题plot_fname = Path(save_dir) / f'{title.lower().replace(" ", "_")}.png' # 图片保存的文件名fig.savefig(plot_fname, dpi=250) # 保存图表为 PNG 文件,设置 DPI 为 250plt.close(fig) # 关闭图表if on_plot:on_plot(plot_fname) # 如果有回调函数,则调用该函数,并传递图表文件路径def print(self):"""Print the confusion matrix to the console."""for i in range(self.nc + 1): # 循环打印混淆矩阵的每一行LOGGER.info(" ".join(map(str, self.matrix[i]))) # 将每一行转换为字符串并记录到日志中
def compute_ap(recall, precision):"""Compute the average precision (AP) given the recall and precision curves.Args:recall (list): The recall curve.precision (list): The precision curve.Returns:(float): Average precision.(np.ndarray): Precision envelope curve.(np.ndarray): Modified recall curve with sentinel values added at the beginning and end."""# Append sentinel values to beginning and endmrec = np.concatenate(([0.0], recall, [1.0]))mpre = np.concatenate(([1.0], precision, [0.0]))# Compute the precision envelopempre = np.flip(np.maximum.accumulate(np.flip(mpre)))# Integrate area under curve# 计算曲线下面积,使用梯形法则ap = np.sum(np.diff(mrec) * mpre[:-1])return ap, mpre, mrecmethod = "interp" # 定义变量 method,并赋值为 "interp",表示采用插值法计算平均精度if method == "interp":x = np.linspace(0, 1, 101) # 在 [0, 1] 区间生成101个均匀间隔的点,用于插值计算 (COCO)ap = np.trapz(np.interp(x, mrec, mpre), x) # 使用梯形法则计算插值后的曲线下面积,得到平均精度else: # 如果 method 不是 "interp",则执行 'continuous' 分支i = np.where(mrec[1:] != mrec[:-1])[0] # 找到 mrec 中 recall 值发生变化的索引位置ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # 计算曲线下面积,得到平均精度return ap, mpre, mrec # 返回计算得到的平均精度 ap,以及修改后的 mpre 和 mrec# 根据对象置信度降序排列索引i = np.argsort(-conf)# 按照排序后的顺序重新排列 tp, conf, pred_clstp, conf, pred_cls = tp[i], conf[i], pred_cls[i]# 找出唯一的类别和它们的数量unique_classes, nt = np.unique(target_cls, return_counts=True)nc = unique_classes.shape[0] # 类别的数量,也是检测的数量# 创建 Precision-Recall 曲线并计算每个类别的平均精度 (AP)x, prec_values = np.linspace(0, 1, 1000), []# 初始化存储平均精度 (AP),精度 (Precision),和召回率 (Recall) 曲线的数组ap, p_curve, r_curve = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))for ci, c in enumerate(unique_classes):# ci 是类别 c 在 unique_classes 中的索引,c 是当前类别的值i = pred_cls == c# 计算预测类别为 c 的样本数n_l = nt[ci] # number of labels# 计算真实类别为 c 的样本数n_p = i.sum() # number of predictions# 如果没有预测类别为 c 的样本或者真实类别为 c 的样本,则跳过if n_p == 0 or n_l == 0:continue# 累积计算假阳性和真阳性fpc = (1 - tp[i]).cumsum(0)tpc = tp[i].cumsum(0)# 计算召回率recall = tpc / (n_l + eps) # recall curve# 在负向 x 上插值,以生成召回率曲线r_curve[ci] = np.interp(-x, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases# 计算精确率precision = tpc / (tpc + fpc) # precision curve# 在负向 x 上插值,以生成精确率曲线p_curve[ci] = np.interp(-x, -conf[i], precision[:, 0], left=1) # p at pr_score# 从召回率-精确率曲线计算平均准确率for j in range(tp.shape[1]):ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])# 如果需要绘图并且是第一个类别,记录在 mAP@0.5 处的精确率值if plot and j == 0:prec_values.append(np.interp(x, mrec, mpre)) # precision at mAP@0.5prec_values = np.array(prec_values) # (nc, 1000)# 计算 F1 值(精确率和召回率的调和平均数)f1_curve = 2 * p_curve * r_curve / (p_curve + r_curve + eps)# 仅保留有数据的类别名称列表names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have datanames = dict(enumerate(names)) # 转换为字典形式# 如果需要绘图,则绘制精确率-召回率曲线、F1 曲线、精确率曲线、召回率曲线if plot:plot_pr_curve(x, prec_values, ap, save_dir / f"{prefix}PR_curve.png", names, on_plot=on_plot)plot_mc_curve(x, f1_curve, save_dir / f"{prefix}F1_curve.png", names, ylabel="F1", on_plot=on_plot)plot_mc_curve(x, p_curve, save_dir / f"{prefix}P_curve.png", names, ylabel="Precision", on_plot=on_plot)plot_mc_curve(x, r_curve, save_dir / f"{prefix}R_curve.png", names, ylabel="Recall", on_plot=on_plot)# 找到最大 F1 值所在的索引i = smooth(f1_curve.mean(0), 0.1).argmax() # max F1 index# 获取最大 F1 值对应的精确率、召回率、F1 值p, r, f1 = p_curve[:, i], r_curve[:, i], f1_curve[:, i] # max-F1 precision, recall, F1 values# 计算真正例(TP)tp = (r * nt).round() # true positives# 计算假正例(FP)fp = (tp / (p + eps) - tp).round() # false positives# 返回结果:TP、FP、精确率、召回率、F1 值、平均准确率、唯一类别、精确率曲线、召回率曲线、F1 曲线、x 值、精确率值return tp, fp, p, r, f1, ap, unique_classes.astype(int), p_curve, r_curve, f1_curve, x, prec_values
class Metric(SimpleClass):"""Class for computing evaluation metrics for YOLOv8 model.Attributes:p (list): Precision for each class. Shape: (nc,).r (list): Recall for each class. Shape: (nc,).f1 (list): F1 score for each class. Shape: (nc,).all_ap (list): AP scores for all classes and all IoU thresholds. Shape: (nc, 10).ap_class_index (list): Index of class for each AP score. Shape: (nc,).nc (int): Number of classes.Methods:ap50(): AP at IoU threshold of 0.5 for all classes. Returns: List of AP scores. Shape: (nc,) or [].ap(): AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: List of AP scores. Shape: (nc,) or [].mp(): Mean precision of all classes. Returns: Float.mr(): Mean recall of all classes. Returns: Float.map50(): Mean AP at IoU threshold of 0.5 for all classes. Returns: Float.map75(): Mean AP at IoU threshold of 0.75 for all classes. Returns: Float.map(): Mean AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: Float.mean_results(): Mean of results, returns mp, mr, map50, map.class_result(i): Class-aware result, returns p[i], r[i], ap50[i], ap[i].maps(): mAP of each class. Returns: Array of mAP scores, shape: (nc,).fitness(): Model fitness as a weighted combination of metrics. Returns: Float.update(results): Update metric attributes with new evaluation results."""def __init__(self) -> None:"""Initializes a Metric instance for computing evaluation metrics for the YOLOv8 model."""self.p = [] # Precision for each class, initialized as an empty listself.r = [] # Recall for each class, initialized as an empty listself.f1 = [] # F1 score for each class, initialized as an empty listself.all_ap = [] # AP scores for all classes and IoU thresholds, initialized as an empty listself.ap_class_index = [] # Index of class for each AP score, initialized as an empty listself.nc = 0 # Number of classes, initialized to 0@propertydef ap50(self):"""Returns the Average Precision (AP) at an IoU threshold of 0.5 for all classes.Returns:(np.ndarray, list): Array of shape (nc,) with AP50 values per class, or an empty list if not available."""return self.all_ap[:, 0] if len(self.all_ap) else [] # Return AP50 values if all_ap is not empty, otherwise an empty list@propertydef ap(self):"""Returns the Average Precision (AP) at IoU thresholds from 0.5 to 0.95 for all classes.Returns:(np.ndarray, list): Array of shape (nc,) with mean AP values per class, or an empty list if not available."""return self.all_ap.mean(1) if len(self.all_ap) else [] # Return mean AP values across IoU thresholds if all_ap is not empty, otherwise an empty list@propertydef mp(self):"""Returns the Mean Precision of all classes.Returns:(float): The mean precision of all classes."""return self.p.mean() if len(self.p) else 0.0 # Return the mean precision of classes if p is not empty, otherwise 0.0@propertydef mr(self):"""Returns the Mean Recall of all classes.Returns:(float): The mean recall of all classes."""return self.r.mean() if len(self.r) else 0.0 # Return the mean recall of classes if r is not empty, otherwise 0.0def map50(self):"""Returns the mean Average Precision (mAP) at an IoU threshold of 0.5.Returns:(float): The mAP at an IoU threshold of 0.5."""return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0@propertydef map75(self):"""Returns the mean Average Precision (mAP) at an IoU threshold of 0.75.Returns:(float): The mAP at an IoU threshold of 0.75."""return self.all_ap[:, 5].mean() if len(self.all_ap) else 0.0@propertydef map(self):"""Returns the mean Average Precision (mAP) over IoU thresholds of 0.5 - 0.95 in steps of 0.05.Returns:(float): The mAP over IoU thresholds of 0.5 - 0.95 in steps of 0.05."""return self.all_ap.mean() if len(self.all_ap) else 0.0def mean_results(self):"""Mean of results, return mp, mr, map50, map."""return [self.mp, self.mr, self.map50, self.map]def class_result(self, i):"""Class-aware result, return p[i], r[i], ap50[i], ap[i]."""return self.p[i], self.r[i], self.ap50[i], self.ap[i]@propertydef maps(self):"""MAP of each class."""maps = np.zeros(self.nc) + self.mapfor i, c in enumerate(self.ap_class_index):maps[c] = self.ap[i]return mapsdef fitness(self):"""Model fitness as a weighted combination of metrics."""w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]return (np.array(self.mean_results()) * w).sum()def update(self, results):"""Updates the evaluation metrics of the model with a new set of results.Args:results (tuple): A tuple containing the following evaluation metrics:- p (list): Precision for each class. Shape: (nc,).- r (list): Recall for each class. Shape: (nc,).- f1 (list): F1 score for each class. Shape: (nc,).- all_ap (list): AP scores for all classes and all IoU thresholds. Shape: (nc, 10).- ap_class_index (list): Index of class for each AP score. Shape: (nc,).Side Effects:Updates the class attributes `self.p`, `self.r`, `self.f1`, `self.all_ap`, and `self.ap_class_index` basedon the values provided in the `results` tuple."""(self.p,self.r,self.f1,self.all_ap,self.ap_class_index,self.p_curve,self.r_curve,self.f1_curve,self.px,self.prec_values,) = results@propertydef curves(self):"""Returns a list of curves for accessing specific metrics curves."""return []@propertydef pr_curves(self):"""Returns precision and recall curves."""return self.p_curve, self.r_curve@propertydef f1_curves(self):"""Returns F1 score curves."""return self.f1_curve@propertydef precision_values(self):"""Returns precision values for the PR curve."""return self.px, self.prec_values@propertydef pr_values(self):"""Returns precision and recall values."""return self.p, self.r@propertydef f1_values(self):"""Returns F1 values."""return self.f1@propertydef pr(self):"""Returns precision and recall."""return self.p, self.r@propertydef f1(self):"""Returns F1 score."""return self.f1def print_results(self):"""Prints results (p, r, ap50, ap)."""print(self.p, self.r, self.ap50, self.ap)def evaluation(self):"""Model evaluation with metric AP."""return self.apdef result(self):"""Return p, r, ap50, ap."""return self.p, self.r, self.ap50, self.ap@propertydef recall(self):"""Returns recall."""return self.r@propertydef mean(self):"""Returns the mean AP."""return self.ap.mean()@propertydef mapss(self):"""Returns mAP of each class."""maps = np.zeros(self.nc) + self.mapfor i, c in enumerate(self.ap_class_index):maps[c] = self.ap[i]return maps@propertydef model(self):"""Returns the model."""return self.modeldef curves_results(self):"""Returns a list of curves for accessing specific metrics curves."""返回一个包含多个曲线的列表,用于访问特定的度量曲线。return [[self.px, self.prec_values, "Recall", "Precision"],# 返回包含 Precision 和 Recall 曲线的列表,使用 self.px 作为 x 轴,self.prec_values 作为 y 轴[self.px, self.f1_curve, "Confidence", "F1"],# 返回包含 F1 曲线的列表,使用 self.px 作为 x 轴,self.f1_curve 作为 y 轴[self.px, self.p_curve, "Confidence", "Precision"],# 返回包含 Precision 曲线的列表,使用 self.px 作为 x 轴,self.p_curve 作为 y 轴[self.px, self.r_curve, "Confidence", "Recall"],# 返回包含 Recall 曲线的列表,使用 self.px 作为 x 轴,self.r_curve 作为 y 轴]
class DetMetrics(SimpleClass):"""This class is a utility class for computing detection metrics such as precision, recall, and mean average precision(mAP) of an object detection model.Args:save_dir (Path): A path to the directory where the output plots will be saved. Defaults to current directory.plot (bool): A flag that indicates whether to plot precision-recall curves for each class. Defaults to False.on_plot (func): An optional callback to pass plots path and data when they are rendered. Defaults to None.names (tuple of str): A tuple of strings that represents the names of the classes. Defaults to an empty tuple.Attributes:save_dir (Path): A path to the directory where the output plots will be saved.plot (bool): A flag that indicates whether to plot the precision-recall curves for each class.on_plot (func): An optional callback to pass plots path and data when they are rendered.names (tuple of str): A tuple of strings that represents the names of the classes.box (Metric): An instance of the Metric class for storing the results of the detection metrics.speed (dict): A dictionary for storing the execution time of different parts of the detection process.Methods:process(tp, conf, pred_cls, target_cls): Updates the metric results with the latest batch of predictions.keys: Returns a list of keys for accessing the computed detection metrics.mean_results: Returns a list of mean values for the computed detection metrics.class_result(i): Returns a list of values for the computed detection metrics for a specific class.maps: Returns a dictionary of mean average precision (mAP) values for different IoU thresholds.fitness: Computes the fitness score based on the computed detection metrics.ap_class_index: Returns a list of class indices sorted by their average precision (AP) values.results_dict: Returns a dictionary that maps detection metric keys to their computed values.curves: TODOcurves_results: TODO"""def __init__(self, save_dir=Path("."), plot=False, on_plot=None, names=()) -> None:"""Initialize a DetMetrics instance with a save directory, plot flag, callback function, and class names."""# 设置保存输出图表的目录路径,默认为当前目录self.save_dir = save_dir# 是否绘制每个类别的精度-召回率曲线的标志,默认为 Falseself.plot = plot# 可选的回调函数,用于在绘制完成时传递图表路径和数据,默认为 Noneself.on_plot = on_plot# 类别名称的元组,表示检测模型所涉及的类别名称,默认为空元组self.names = names# Metric 类的实例,用于存储检测指标的结果self.box = Metric()# 存储检测过程中不同部分执行时间的字典self.speed = {"preprocess": 0.0, "inference": 0.0, "loss": 0.0, "postprocess": 0.0}# 任务类型,这里为检测任务self.task = "detect"@propertydef keys(self):"""Returns a list of keys for accessing specific metrics."""# 返回一个包含特定指标键的列表,用于访问特定指标数据return ["metrics/precision(B)", "metrics/recall(B)", "metrics/mAP50(B)", "metrics/mAP50-95(B)"]def mean_results(self):"""Calculate mean of detected objects & return precision, recall, mAP50, and mAP50-95."""# 计算检测到的对象的平均值,并返回精度、召回率、mAP50 和 mAP50-95return self.box.mean_results()def class_result(self, i):"""Return the result of evaluating the performance of an object detection model on a specific class."""# 返回评估特定类别对象检测模型性能的结果return self.box.class_result(i)@propertydef maps(self):"""Returns mean Average Precision (mAP) scores per class."""# 返回每个类别的平均精度 (mAP) 分数return self.box.maps@propertydef fitness(self):"""Returns the fitness of box object."""# 返回盒子对象的适应性(健壮性)return self.box.fitness()@propertydef ap_class_index(self):"""Returns the average precision index per class."""# 返回每个类别的平均精度指数return self.box.ap_class_index@propertydef results_dict(self):"""Returns dictionary of computed performance metrics and statistics."""# 返回计算的性能指标和统计数据的字典return dict(zip(self.keys + ["fitness"], self.mean_results() + [self.fitness]))@propertydef curves(self):"""Returns a list of curves for accessing specific metrics curves."""# 返回用于访问特定指标曲线的曲线列表return ["Precision-Recall(B)", "F1-Confidence(B)", "Precision-Confidence(B)", "Recall-Confidence(B)"]@propertydef curves_results(self):"""Returns dictionary of computed performance metrics and statistics."""# 返回计算的性能指标和统计数据的字典return self.box.curves_results
# SegmentMetrics 类,继承自 SimpleClass,用于计算和聚合给定类别集合上的检测和分割指标。class SegmentMetrics(SimpleClass):"""Calculates and aggregates detection and segmentation metrics over a given set of classes.Args:save_dir (Path): Path to the directory where the output plots should be saved. Default is the current directory.plot (bool): Whether to save the detection and segmentation plots. Default is False.on_plot (func): An optional callback to pass plots path and data when they are rendered. Defaults to None.names (list): List of class names. Default is an empty list.Attributes:save_dir (Path): Path to the directory where the output plots should be saved.plot (bool): Whether to save the detection and segmentation plots.on_plot (func): An optional callback to pass plots path and data when they are rendered.names (list): List of class names.box (Metric): An instance of the Metric class to calculate box detection metrics.seg (Metric): An instance of the Metric class to calculate mask segmentation metrics.speed (dict): Dictionary to store the time taken in different phases of inference.Methods:process(tp_m, tp_b, conf, pred_cls, target_cls): Processes metrics over the given set of predictions.mean_results(): Returns the mean of the detection and segmentation metrics over all the classes.class_result(i): Returns the detection and segmentation metrics of class `i`.maps: Returns the mean Average Precision (mAP) scores for IoU thresholds ranging from 0.50 to 0.95.fitness: Returns the fitness scores, which are a single weighted combination of metrics.ap_class_index: Returns the list of indices of classes used to compute Average Precision (AP).results_dict: Returns the dictionary containing all the detection and segmentation metrics and fitness score."""def __init__(self, save_dir=Path("."), plot=False, on_plot=None, names=()) -> None:"""Initialize a SegmentMetrics instance with a save directory, plot flag, callback function, and class names."""# 初始化保存结果图像的目录路径self.save_dir = save_dir# 是否保存检测和分割图像的标志self.plot = plot# 可选的回调函数,用于在图像渲染时传递图像路径和数据self.on_plot = on_plot# 类别名称列表self.names = names# Metric 类的实例,用于计算盒子检测指标self.box = Metric()# Metric 类的实例,用于计算分割掩码指标self.seg = Metric()# 存储不同推理阶段时间消耗的字典self.speed = {"preprocess": 0.0, "inference": 0.0, "loss": 0.0, "postprocess": 0.0}# 任务类型,标识为 "segment"self.task = "segment"@propertydef keys(self):"""Returns a list of keys for accessing metrics."""# 返回用于访问指标的键列表,用于对象检测和语义分割模型的评估return ["metrics/precision(B)", # 精度(Bounding Box)"metrics/recall(B)", # 召回率(Bounding Box)"metrics/mAP50(B)", # 平均精度 (mAP) @ IoU 50% (Bounding Box)"metrics/mAP50-95(B)", # 平均精度 (mAP) @ IoU 50%-95% (Bounding Box)"metrics/precision(M)", # 精度(Mask)"metrics/recall(M)", # 召回率(Mask)"metrics/mAP50(M)", # 平均精度 (mAP) @ IoU 50% (Mask)"metrics/mAP50-95(M)", # 平均精度 (mAP) @ IoU 50%-95% (Mask)]def mean_results(self):"""Return the mean metrics for bounding box and segmentation results."""# 返回边界框和分割结果的平均指标return self.box.mean_results() + self.seg.mean_results()def class_result(self, i):"""Returns classification results for a specified class index."""# 返回指定类索引的分类结果return self.box.class_result(i) + self.seg.class_result(i)@propertydef maps(self):"""Returns mAP scores for object detection and semantic segmentation models."""# 返回对象检测和语义分割模型的 mAP 分数return self.box.maps + self.seg.maps@propertydef fitness(self):"""Get the fitness score for both segmentation and bounding box models."""# 获取分割和边界框模型的适应性分数return self.seg.fitness() + self.box.fitness()@propertydef ap_class_index(self):"""Boxes and masks have the same ap_class_index."""# 边界框和掩膜具有相同的 ap_class_indexreturn self.box.ap_class_index@propertydef results_dict(self):"""Returns results of object detection model for evaluation."""# 返回对象检测模型的评估结果return dict(zip(self.keys + ["fitness"], self.mean_results() + [self.fitness]))@property# 返回一个包含特定度量曲线的列表,用于访问特定度量曲线。def curves(self):"""Returns a list of curves for accessing specific metrics curves."""return ["Precision-Recall(B)", # 精确率-召回率(B)"F1-Confidence(B)", # F1-置信度(B)"Precision-Confidence(B)", # 精确率-置信度(B)"Recall-Confidence(B)", # 召回率-置信度(B)"Precision-Recall(M)", # 精确率-召回率(M)"F1-Confidence(M)", # F1-置信度(M)"Precision-Confidence(M)", # 精确率-置信度(M)"Recall-Confidence(M)", # 召回率-置信度(M)]@property# 返回一个包含计算的性能指标和统计数据的字典。def curves_results(self):"""Returns dictionary of computed performance metrics and statistics."""return self.box.curves_results + self.seg.curves_results
class PoseMetrics(SegmentMetrics):"""Calculates and aggregates detection and pose metrics over a given set of classes.Args:save_dir (Path): Path to the directory where the output plots should be saved. Default is the current directory.plot (bool): Whether to save the detection and segmentation plots. Default is False.on_plot (func): An optional callback to pass plots path and data when they are rendered. Defaults to None.names (list): List of class names. Default is an empty list.Attributes:save_dir (Path): Path to the directory where the output plots should be saved.plot (bool): Whether to save the detection and segmentation plots.on_plot (func): An optional callback to pass plots path and data when they are rendered.names (list): List of class names.box (Metric): An instance of the Metric class to calculate box detection metrics.pose (Metric): An instance of the Metric class to calculate mask segmentation metrics.speed (dict): Dictionary to store the time taken in different phases of inference.Methods:process(tp_m, tp_b, conf, pred_cls, target_cls): Processes metrics over the given set of predictions.mean_results(): Returns the mean of the detection and segmentation metrics over all the classes.class_result(i): Returns the detection and segmentation metrics of class `i`.maps: Returns the mean Average Precision (mAP) scores for IoU thresholds ranging from 0.50 to 0.95.fitness: Returns the fitness scores, which are a single weighted combination of metrics.ap_class_index: Returns the list of indices of classes used to compute Average Precision (AP).results_dict: Returns the dictionary containing all the detection and segmentation metrics and fitness score."""def __init__(self, save_dir=Path("."), plot=False, on_plot=None, names=()) -> None:"""Initialize the PoseMetrics class with directory path, class names, and plotting options."""# 调用父类的初始化方法,初始化基础类SegmentMetrics的属性super().__init__(save_dir, plot, names)# 设置实例属性:保存输出图表的目录路径self.save_dir = save_dir# 设置实例属性:是否保存检测和分割图表的标志self.plot = plot# 设置实例属性:用于在渲染时传递图表路径和数据的回调函数self.on_plot = on_plot# 设置实例属性:类名列表self.names = names# 设置实例属性:用于计算框检测指标的Metric类实例self.box = Metric()# 设置实例属性:用于计算姿势分割指标的Metric类实例self.pose = Metric()# 设置实例属性:存储推断不同阶段所花费时间的字典self.speed = {"preprocess": 0.0, "inference": 0.0, "loss": 0.0, "postprocess": 0.0}# 设置实例属性:任务类型为姿势估计self.task = "pose"def process(self, tp, tp_p, conf, pred_cls, target_cls):"""Processes the detection and pose metrics over the given set of predictions.Args:tp (list): List of True Positive boxes.tp_p (list): List of True Positive keypoints.conf (list): List of confidence scores.pred_cls (list): List of predicted classes.target_cls (list): List of target classes."""# Calculate pose metrics per class and update PoseEvaluatorresults_pose = ap_per_class(tp_p,conf,pred_cls,target_cls,plot=self.plot,on_plot=self.on_plot,save_dir=self.save_dir,names=self.names,prefix="Pose",)[2:]# Set the number of classes for pose evaluationself.pose.nc = len(self.names)# Update pose metrics with calculated resultsself.pose.update(results_pose)# Calculate box metrics per class and update BoxEvaluatorresults_box = ap_per_class(tp,conf,pred_cls,target_cls,plot=self.plot,on_plot=self.on_plot,save_dir=self.save_dir,names=self.names,prefix="Box",)[2:]# Set the number of classes for box evaluationself.box.nc = len(self.names)# Update box metrics with calculated resultsself.box.update(results_box)@propertydef keys(self):"""Returns list of evaluation metric keys."""return ["metrics/precision(B)","metrics/recall(B)","metrics/mAP50(B)","metrics/mAP50-95(B)","metrics/precision(P)","metrics/recall(P)","metrics/mAP50(P)","metrics/mAP50-95(P)",]def mean_results(self):"""Return the mean results of box and pose."""# Return mean results of both box and pose evaluationsreturn self.box.mean_results() + self.pose.mean_results()def class_result(self, i):"""Return the class-wise detection results for a specific class i."""# Return class-wise detection results for class i from both box and pose evaluationsreturn self.box.class_result(i) + self.pose.class_result(i)@propertydef maps(self):"""Returns the mean average precision (mAP) per class for both box and pose detections."""# Return mean average precision (mAP) per class for both box and pose detectionsreturn self.box.maps + self.pose.maps@propertydef fitness(self):"""Computes classification metrics and speed using the `targets` and `pred` inputs."""# Compute classification metrics and speed using the `targets` and `pred` inputs for both box and posereturn self.pose.fitness() + self.box.fitness()@propertydef curves(self):"""Returns a list of curves for accessing specific metrics curves."""# Return a list of curves for accessing specific metrics curvesreturn ["Precision-Recall(B)","F1-Confidence(B)","Precision-Confidence(B)","Recall-Confidence(B)","Precision-Recall(P)","F1-Confidence(P)","Precision-Confidence(P)","Recall-Confidence(P)",]@propertydef curves_results(self):"""Returns dictionary of computed performance metrics and statistics."""# Return dictionary of computed performance metrics and statistics for both box and posereturn self.box.curves_results + self.pose.curves_results
class ClassifyMetrics(SimpleClass):"""Class for computing classification metrics including top-1 and top-5 accuracy.Attributes:top1 (float): The top-1 accuracy.top5 (float): The top-5 accuracy.speed (Dict[str, float]): A dictionary containing the time taken for each step in the pipeline.fitness (float): The fitness of the model, which is equal to top-5 accuracy.results_dict (Dict[str, Union[float, str]]): A dictionary containing the classification metrics and fitness.keys (List[str]): A list of keys for the results_dict.Methods:process(targets, pred): Processes the targets and predictions to compute classification metrics."""def __init__(self) -> None:"""Initialize a ClassifyMetrics instance."""# 初始化 top1 和 top5 精度为 0self.top1 = 0self.top5 = 0# 初始化速度字典,包含各个步骤的时间,初始值都为 0.0self.speed = {"preprocess": 0.0, "inference": 0.0, "loss": 0.0, "postprocess": 0.0}# 设定任务类型为分类self.task = "classify"def process(self, targets, pred):"""Target classes and predicted classes."""# 合并预测结果和目标类别,以便计算准确率pred, targets = torch.cat(pred), torch.cat(targets)# 计算每个样本的正确性correct = (targets[:, None] == pred).float()# 计算 top-1 和 top-5 精度acc = torch.stack((correct[:, 0], correct.max(1).values), dim=1) # (top1, top5) accuracyself.top1, self.top5 = acc.mean(0).tolist()@propertydef fitness(self):"""Returns mean of top-1 and top-5 accuracies as fitness score."""# 计算并返回 top-1 和 top-5 精度的平均值作为 fitness 分数return (self.top1 + self.top5) / 2@propertydef results_dict(self):"""Returns a dictionary with model's performance metrics and fitness score."""# 返回包含模型性能指标和 fitness 分数的字典return dict(zip(self.keys + ["fitness"], [self.top1, self.top5, self.fitness]))@propertydef keys(self):"""Returns a list of keys for the results_dict property."""# 返回结果字典中的键列表return ["metrics/accuracy_top1", "metrics/accuracy_top5"]@propertydef curves(self):"""Returns a list of curves for accessing specific metrics curves."""# 返回一个空列表,用于访问特定的度量曲线return []@propertydef curves_results(self):"""Returns a list of curves for accessing specific metrics curves."""# 返回一个空列表,用于访问特定的度量曲线return []class OBBMetrics(SimpleClass):"""Metrics for evaluating oriented bounding box (OBB) detection, see https://arxiv.org/pdf/2106.06072.pdf."""def __init__(self, save_dir=Path("."), plot=False, on_plot=None, names=()) -> None:"""Initialize an OBBMetrics instance with directory, plotting, callback, and class names."""# 初始化 OBBMetrics 实例,包括保存目录、绘图标志、回调函数和类名列表self.save_dir = save_dirself.plot = plotself.on_plot = on_plotself.names = names# 初始化 Metric 类型的 box 属性self.box = Metric()# 初始化速度字典,包含各个步骤的时间,初始值都为 0.0self.speed = {"preprocess": 0.0, "inference": 0.0, "loss": 0.0, "postprocess": 0.0}# 处理目标检测的预测结果并更新指标def process(self, tp, conf, pred_cls, target_cls):"""Process predicted results for object detection and update metrics."""# 调用 ap_per_class 函数计算每个类别的平均精度等指标,返回结果列表,去掉前两个元素results = ap_per_class(tp,conf,pred_cls,target_cls,plot=self.plot, # 是否绘制结果的标志save_dir=self.save_dir, # 结果保存目录names=self.names, # 类别名称列表on_plot=self.on_plot, # 是否在绘图时处理结果的标志)[2:]# 更新 self.box 对象的类别数self.box.nc = len(self.names)# 调用 self.box 对象的 update 方法,更新检测结果self.box.update(results)@propertydef keys(self):"""Returns a list of keys for accessing specific metrics."""# 返回用于访问特定指标的键列表return ["metrics/precision(B)", "metrics/recall(B)", "metrics/mAP50(B)", "metrics/mAP50-95(B)"]def mean_results(self):"""Calculate mean of detected objects & return precision, recall, mAP50, and mAP50-95."""# 调用 self.box 对象的 mean_results 方法,计算检测到的物体的平均指标,返回包含这些指标的列表return self.box.mean_results()def class_result(self, i):"""Return the result of evaluating the performance of an object detection model on a specific class."""# 调用 self.box 对象的 class_result 方法,返回指定类别 i 的性能评估结果return self.box.class_result(i)@propertydef maps(self):"""Returns mean Average Precision (mAP) scores per class."""# 返回每个类别的平均精度 (mAP) 分数列表,由 self.box 对象的 maps 属性提供return self.box.maps@propertydef fitness(self):"""Returns the fitness of box object."""# 返回 self.box 对象的 fitness 方法计算的适应度值return self.box.fitness()@propertydef ap_class_index(self):"""Returns the average precision index per class."""# 返回每个类别的平均精度索引,由 self.box 对象的 ap_class_index 属性提供return self.box.ap_class_index@propertydef results_dict(self):"""Returns dictionary of computed performance metrics and statistics."""# 返回计算的性能指标和统计信息的字典,包括指标键列表和适应度值return dict(zip(self.keys + ["fitness"], self.mean_results() + [self.fitness]))@propertydef curves(self):"""Returns a list of curves for accessing specific metrics curves."""# 返回一个曲线列表,用于访问特定的指标曲线,这里返回一个空列表return []@propertydef curves_results(self):"""Returns a list of curves for accessing specific metrics curves."""# 返回一个曲线列表,用于访问特定的指标曲线,这里返回一个空列表return []
.\yolov8\ultralytics\utils\ops.py
# Ultralytics YOLO 🚀, AGPL-3.0 licenseimport contextlib # 导入上下文管理器相关的模块
import math # 导入数学函数模块
import re # 导入正则表达式模块
import time # 导入时间模块import cv2 # 导入OpenCV库
import numpy as np # 导入NumPy库
import torch # 导入PyTorch库
import torch.nn.functional as F # 导入PyTorch的函数模块from ultralytics.utils import LOGGER # 从ultralytics.utils中导入LOGGER对象
from ultralytics.utils.metrics import batch_probiou # 从ultralytics.utils.metrics中导入batch_probiou函数class Profile(contextlib.ContextDecorator):"""YOLOv8 Profile class. Use as a decorator with @Profile() or as a context manager with 'with Profile():'.Example:```pyfrom ultralytics.utils.ops import Profilewith Profile(device=device) as dt:pass # slow operation hereprint(dt) # prints "Elapsed time is 9.5367431640625e-07 s"```"""def __init__(self, t=0.0, device: torch.device = None):"""Initialize the Profile class.Args:t (float): Initial time. Defaults to 0.0.device (torch.device): Devices used for model inference. Defaults to None (cpu)."""self.t = t # 初始化累计时间self.device = device # 初始化设备self.cuda = bool(device and str(device).startswith("cuda")) # 检查是否使用CUDA加速def __enter__(self):"""Start timing."""self.start = self.time() # 记录开始时间return selfdef __exit__(self, type, value, traceback): # noqa"""Stop timing."""self.dt = self.time() - self.start # 计算耗时self.t += self.dt # 累加耗时到总时间def __str__(self):"""Returns a human-readable string representing the accumulated elapsed time in the profiler."""return f"Elapsed time is {self.t} s" # 返回累计的耗时信息def time(self):"""Get current time."""if self.cuda:torch.cuda.synchronize(self.device) # 同步CUDA流return time.time() # 返回当前时间戳def segment2box(segment, width=640, height=640):"""Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy).Args:segment (torch.Tensor): the segment labelwidth (int): the width of the image. Defaults to 640height (int): The height of the image. Defaults to 640Returns:(np.ndarray): the minimum and maximum x and y values of the segment."""x, y = segment.T # 提取segment的xy坐标inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height) # 内部约束条件x = x[inside] # 过滤符合约束条件的x坐标y = y[inside] # 过滤符合约束条件的y坐标return (np.array([x.min(), y.min(), x.max(), y.max()], dtype=segment.dtype)if any(x)else np.zeros(4, dtype=segment.dtype)) # 返回segment的最小和最大xy坐标,如果没有符合条件的点则返回全零数组def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True, xywh=False):"""Rescales bounding boxes (in the format of xyxy by default) from the shape of the image they were originallyspecified in (img1_shape) to the shape of a different image (img0_shape).Args:img1_shape (tuple): Shape of the original image (height, width).boxes (torch.Tensor): Bounding boxes in format xyxy.img0_shape (tuple): Shape of the new image (height, width).ratio_pad (tuple): Aspect ratio and padding.padding (bool): Whether to pad bounding boxes or not.xywh (bool): Whether the boxes are in xywh format or not. Defaults to False.""""""Args:img1_shape (tuple): 目标图像的形状,格式为 (高度, 宽度)boxes (torch.Tensor): 图像中物体的边界框,格式为 (x1, y1, x2, y2)img0_shape (tuple): 原始图像的形状,格式为 (高度, 宽度)ratio_pad (tuple): 一个元组 (ratio, pad),用于缩放边界框。如果未提供,则根据两个图像的大小差异计算 ratio 和 padpadding (bool): 如果为 True,则假设边界框基于 YOLO 样式增强的图像。如果为 False,则进行常规的重新缩放xywh (bool): 边界框格式是否为 xywh, 默认为 FalseReturns:boxes (torch.Tensor): 缩放后的边界框,格式为 (x1, y1, x2, y2)"""if ratio_pad is None: # 如果未提供 ratio_pad,则从 img0_shape 计算gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # 计算缩放比例 gain = 目标图像尺寸 / 原始图像尺寸pad = (round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1), # 计算宽度方向的填充量round((img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1), # 计算高度方向的填充量)else:gain = ratio_pad[0][0] # 使用提供的 ratio_pad 中的缩放比例pad = ratio_pad[1] # 使用提供的 ratio_pad 中的填充量if padding:boxes[..., 0] -= pad[0] # 减去 x 方向的填充量boxes[..., 1] -= pad[1] # 减去 y 方向的填充量if not xywh:boxes[..., 2] -= pad[0] # 对于非 xywh 格式的边界框,再次减去 x 方向的填充量boxes[..., 3] -= pad[1] # 对于非 xywh 格式的边界框,再次减去 y 方向的填充量boxes[..., :4] /= gain # 缩放边界框坐标return clip_boxes(boxes, img0_shape) # 调用 clip_boxes 函数,确保边界框在图像内部
# 执行非极大值抑制(NMS)操作,用于一组边界框,支持掩码和每个框多个标签。
def non_max_suppression(prediction,conf_thres=0.25, # 置信度阈值,低于此阈值的框将被忽略iou_thres=0.45, # IoU(交并比)阈值,用于判断重叠框之间是否合并classes=None, # 类别列表,用于过滤特定类别的框agnostic=False, # 是否忽略预测框的类别信息multi_label=False, # 是否支持多标签输出labels=(), # 标签列表,指定要保留的标签max_det=300, # 最大检测框数nc=0, # 类别数量(可选)max_time_img=0.05, # 最大图像处理时间max_nms=30000, # 最大NMS操作数max_wh=7680, # 最大宽度和高度in_place=True, # 是否就地修改rotated=False, # 是否为旋转框
):"""Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box."""# 如果预测为空,返回一个空的numpy数组if len(prediction) == 0:return np.empty((0,), dtype=np.int8)# 根据置信度对预测框进行降序排序sorted_idx = torch.argsort(prediction[:, 4], descending=True)prediction = prediction[sorted_idx]# 计算所有框两两之间的probiou得分矩阵,并取其上三角部分ious = batch_probiou(prediction, prediction).triu_(diagonal=1)# 根据IoU阈值进行非极大值抑制,保留符合条件的框索引pick = torch.nonzero(ious.max(dim=0)[0] < iou_thres).squeeze(-1)# 返回按照降序排列的被选框的索引return sorted_idx[pick]import torchvision # 引入torchvision模块,用于加快“import ultralytics”的速度# 检查置信度阈值的有效性,必须在0到1之间assert 0 <= conf_thres <= 1, f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0"# 检查IoU阈值的有效性,必须在0到1之间assert 0 <= iou_thres <= 1, f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0"# 如果prediction是一个列表或元组(例如YOLOv8模型在验证模式下的输出),选择推断输出部分if isinstance(prediction, (list, tuple)):prediction = prediction[0] # 选择推断输出# 如果指定了classes,则将其转换为与prediction设备相同的torch张量if classes is not None:classes = torch.tensor(classes, device=prediction.device)# 如果prediction的最后一个维度为6,说明是端到端模型的输出(BNC格式,即1,300,6)if prediction.shape[-1] == 6:# 对每个预测结果进行置信度阈值过滤output = [pred[pred[:, 4] > conf_thres] for pred in prediction]# 如果指定了classes,则进一步根据classes进行过滤if classes is not None:output = [pred[(pred[:, 5:6] == classes).any(1)] for pred in output]return output# 获取batch size(BCN格式,即1,84,6300)bs = prediction.shape[0]# 如果未指定nc(类别数量),则根据prediction的形状推断类别数量nc = nc or (prediction.shape[1] - 4) # number of classes# 计算预测结果中的掩码数量nm = prediction.shape[1] - nc - 4 # number of masks# 确定掩码起始索引mi = 4 + nc # mask start index# 根据置信度阈值确定候选项xc = prediction[:, 4:mi].amax(1) > conf_thres # candidates# 设置时间限制time_limit = 2.0 + max_time_img * bs # seconds to quit after# 若多标签设置为真,则每个框可能有多个标签(增加0.5ms/图像)multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)# 调整预测结果的维度顺序,将最后两个维度互换prediction = prediction.transpose(-1, -2) # shape(1,84,6300) to shape(1,6300,84)# 如果不是旋转框,根据需求将预测的边界框格式从xywh转换为xyxyif not rotated:if in_place:prediction[..., :4] = xywh2xyxy(prediction[..., :4]) # xywh to xyxy in-place modificationelse:# 在非原地操作时,将边界框和其他预测结果连接起来,转换为xyxy格式prediction = torch.cat((xywh2xyxy(prediction[..., :4]), prediction[..., 4:]), dim=-1) # xywh to xyxy# 记录当前时间t = time.time()# 初始化输出列表,每个元素都是一个空的张量,形状为(0, 6 + nm),在指定设备上output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bsfor xi, x in enumerate(prediction): # 对每个预测结果进行遍历,xi是索引,x是预测结果# Apply constraints# 应用约束条件# x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0 # width-height# 对预测结果中的宽度和高度进行约束,将不满足条件的置为0x = x[xc[xi]] # confidence# 根据置信度索引获取预测结果的子集# Cat apriori labels if autolabelling# 如果自动标注,合并先验标签if labels and len(labels[xi]) and not rotated:lb = labels[xi]v = torch.zeros((len(lb), nc + nm + 4), device=x.device)v[:, :4] = xywh2xyxy(lb[:, 1:5]) # boxv[range(len(lb)), lb[:, 0].long() + 4] = 1.0 # clsx = torch.cat((x, v), 0)# 将先验标签与预测结果合并,形成新的预测结果# If none remain process next image# 如果没有剩余的预测结果,则处理下一张图像if not x.shape[0]:continue# Detections matrix nx6 (xyxy, conf, cls)# 检测矩阵,大小为nx6(xyxy坐标,置信度,类别)box, cls, mask = x.split((4, nc, nm), 1)if multi_label:i, j = torch.where(cls > conf_thres)x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1)# 如果支持多标签,根据置信度阈值筛选类别,并形成新的预测结果else: # best class onlyconf, j = cls.max(1, keepdim=True)x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]# 否则,选择最高置信度的类别作为预测结果# Filter by class# 根据类别进行过滤if classes is not None:x = x[(x[:, 5:6] == classes).any(1)]# 如果指定了类别,只保留匹配指定类别的预测结果# Check shape# 检查预测结果的形状n = x.shape[0] # number of boxes# n为盒子(边界框)的数量if not n: # no boxescontinueif n > max_nms: # excess boxesx = x[x[:, 4].argsort(descending=True)[:max_nms]]# 如果盒子数量超过设定的最大NMS数量,则按置信度排序并保留前max_nms个盒子# Batched NMS# 批处理的非极大值抑制(NMS)c = x[:, 5:6] * (0 if agnostic else max_wh) # classesscores = x[:, 4] # scoresif rotated:boxes = torch.cat((x[:, :2] + c, x[:, 2:4], x[:, -1:]), dim=-1)i = nms_rotated(boxes, scores, iou_thres)# 如果启用了旋转NMS,对旋转边界框进行NMS处理else:boxes = x[:, :4] + ci = torchvision.ops.nms(boxes, scores, iou_thres)# 否则,对标准边界框进行NMS处理i = i[:max_det] # limit detections# 限制最终的检测结果数量output[xi] = x[i]# 将处理后的预测结果存入输出中的对应位置if (time.time() - t) > time_limit:LOGGER.warning(f"WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded")break # time limit exceeded# 如果超过了NMS处理时间限制,记录警告并跳出循环return output
def scale_image(masks, im0_shape, ratio_pad=None):"""Takes a mask, and resizes it to the original image size.Args:masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3].im0_shape (tuple): the original image shaperatio_pad (tuple): the ratio of the padding to the original image.Returns:masks (np.ndarray): The masks that are being returned with shape [h, w, num]."""# 获取当前 masks 的形状im1_shape = masks.shape# 如果当前 masks 形状与原始图片形状相同,则直接返回 masks,无需调整大小if im1_shape[:2] == im0_shape[:2]:return masks# 如果未指定 ratio_pad,则根据 im0_shape 计算 gain 和 padif ratio_pad is None:# 计算 gain,即缩放比例gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1])# 计算 padding 的宽度和高度pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2else:pad = ratio_pad[1] # 使用指定的 ratio_pad 中的 padding 值# 将 pad 转换为整数,表示上、左、下、右的边界top, left = int(pad[1]), int(pad[0]) # y, xbottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0])# 如果 masks 的维度小于 2,则抛出异常if len(masks.shape) < 2:raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')# 对 masks 进行裁剪,按照计算得到的边界进行裁剪masks = masks[top:bottom, left:right]# 将裁剪后的 masks 调整大小至原始图片大小masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]))# 检查 masks 的维度是否为 2if len(masks.shape) == 2:# 如果是,添加一个额外的维度,使其变为三维masks = masks[:, :, None]# 返回处理后的 masks 变量return masks
def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):"""Convert bounding box coordinates from (x1, y1, x2, y2) format to normalized (x, y, width, height) format,relative to image dimensions and optionally clip the values.Args:x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format.w (int): Width of the image. Defaults to 640.h (int): Height of the image. Defaults to 640.clip (bool): Whether to clip the normalized coordinates to [0, 1]. Defaults to False.eps (float): Epsilon value for numerical stability. Defaults to 0.0.Returns:y (np.ndarray | torch.Tensor): The bounding box coordinates in normalized (x, y, width, height) format."""assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copyhalf_w = w / 2.0half_h = h / 2.0y[..., 0] = (x[..., 0] + x[..., 2]) / (2 * w) # center x normalizedy[..., 1] = (x[..., 1] + x[..., 3]) / (2 * h) # center y normalizedy[..., 2] = (x[..., 2] - x[..., 0]) / w # width normalizedy[..., 3] = (x[..., 3] - x[..., 1]) / h # height normalizedif clip:y = torch.clamp(y, min=eps, max=1.0 - eps) if isinstance(y, torch.Tensor) else np.clip(y, eps, 1.0 - eps)return y# 将边界框坐标从 (x1, y1, x2, y2) 格式转换为 (x, y, width, height, normalized) 格式。其中 x, y, width 和 height 均已归一化至图像尺寸。Args:x (np.ndarray | torch.Tensor): 输入的边界框坐标,格式为 (x1, y1, x2, y2)。w (int): 图像的宽度。默认为 640。h (int): 图像的高度。默认为 640。clip (bool): 如果为 True,则将边界框裁剪到图像边界内。默认为 False。eps (float): 边界框宽度和高度的最小值。默认为 0.0。Returns:y (np.ndarray | torch.Tensor): 格式为 (x, y, width, height, normalized) 的边界框坐标。"""if clip:# 调用 clip_boxes 函数,将边界框 x 裁剪到图像边界内,边界为 (h - eps, w - eps)x = clip_boxes(x, (h - eps, w - eps))assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"# 根据输入 x 的类型创建与之相同类型的空数组 y,相比 clone/copy 操作更快y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x)# 计算 x 中每个边界框的中心点 x 坐标,并将其归一化到图像宽度 wy[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w # x center# 计算 x 中每个边界框的中心点 y 坐标,并将其归一化到图像高度 hy[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h # y center# 计算 x 中每个边界框的宽度,并将其归一化到图像宽度 wy[..., 2] = (x[..., 2] - x[..., 0]) / w # width# 计算 x 中每个边界框的高度,并将其归一化到图像高度 hy[..., 3] = (x[..., 3] - x[..., 1]) / h # height# 返回格式为 (x, y, width, height, normalized) 的边界框坐标 yreturn y
def xywh2ltwh(x):"""将边界框格式从 [x, y, w, h] 转换为 [x1, y1, w, h],其中 x1, y1 是左上角坐标。Args:x (np.ndarray | torch.Tensor): 输入张量,包含 xywh 格式的边界框坐标Returns:y (np.ndarray | torch.Tensor): 输出张量,包含 xyltwh 格式的边界框坐标"""y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)y[..., 0] = x[..., 0] - x[..., 2] / 2 # 左上角 x 坐标y[..., 1] = x[..., 1] - x[..., 3] / 2 # 左上角 y 坐标return ydef xyxy2ltwh(x):"""将多个 [x1, y1, x2, y2] 格式的边界框转换为 [x1, y1, w, h] 格式,其中 xy1 是左上角,xy2 是右下角。Args:x (np.ndarray | torch.Tensor): 输入张量,包含 xyxy 格式的边界框坐标Returns:y (np.ndarray | torch.Tensor): 输出张量,包含 xyltwh 格式的边界框坐标"""y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)y[..., 2] = x[..., 2] - x[..., 0] # 宽度y[..., 3] = x[..., 3] - x[..., 1] # 高度return ydef ltwh2xywh(x):"""将 [x1, y1, w, h] 格式的边界框转换为 [x, y, w, h] 格式,其中 xy1 是左上角,xy 是中心坐标。Args:x (torch.Tensor): 输入张量Returns:y (np.ndarray | torch.Tensor): 输出张量,包含 xywh 格式的边界框坐标"""y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)y[..., 0] = x[..., 0] + x[..., 2] / 2 # 中心 x 坐标y[..., 1] = x[..., 1] + x[..., 3] / 2 # 中心 y 坐标return ydef xyxyxyxy2xywhr(x):"""将批量的方向边界框 (OBB) 从 [xy1, xy2, xy3, xy4] 格式转换为 [cx, cy, w, h, rotation] 格式。旋转角度的范围是从 0 到 90 度。Args:x (numpy.ndarray | torch.Tensor): 输入的角点数组 [xy1, xy2, xy3, xy4],形状为 (n, 8)。Returns:(numpy.ndarray | torch.Tensor): 转换后的数据,形状为 (n, 5),包含 [cx, cy, w, h, rotation] 格式。"""is_torch = isinstance(x, torch.Tensor)points = x.cpu().numpy() if is_torch else xpoints = points.reshape(len(x), -1, 2)rboxes = []for pts in points:# 注意: 使用 cv2.minAreaRect 来获取准确的 xywhr 格式,# 特别是当数据加载器中的一些对象因增强而被裁剪时。(cx, cy), (w, h), angle = cv2.minAreaRect(pts)rboxes.append([cx, cy, w, h, angle / 180 * np.pi])return torch.tensor(rboxes, device=x.device, dtype=x.dtype) if is_torch else np.asarray(rboxes)def xywhr2xyxyxyxy(x):"""将批量的方向边界框 (OBB) 从 [cx, cy, w, h, rotation] 格式转换为 [xy1, xy2, xy3, xy4] 格式。旋转角度的范围应为 0 到 90 度。Args:x (numpy.ndarray | torch.Tensor): 输入的角点数组,形状为 (n, 5) 或 (b, n, 5)。Returns:(numpy.ndarray | torch.Tensor): 转换后的角点数组,形状为 (n, 4, 2) 或 (b, n, 4, 2)。"""# 这个函数没有实现主体部分,因此不需要添加注释。pass# 根据输入的张量类型选择对应的数学函数库cos, sin, cat, stack = ((torch.cos, torch.sin, torch.cat, torch.stack)if isinstance(x, torch.Tensor)else (np.cos, np.sin, np.concatenate, np.stack))# 提取张量 x 的中心坐标ctr = x[..., :2]# 提取张量 x 的宽度、高度和角度信息w, h, angle = (x[..., i : i + 1] for i in range(2, 5))# 计算角度的余弦和正弦值cos_value, sin_value = cos(angle), sin(angle)# 计算第一个向量 vec1vec1 = [w / 2 * cos_value, w / 2 * sin_value]# 计算第二个向量 vec2vec2 = [-h / 2 * sin_value, h / 2 * cos_value]# 合并向量 vec1 的两个分量vec1 = cat(vec1, -1)# 合并向量 vec2 的两个分量vec2 = cat(vec2, -1)# 计算矩形的四个顶点pt1 = ctr + vec1 + vec2pt2 = ctr + vec1 - vec2pt3 = ctr - vec1 - vec2pt4 = ctr - vec1 + vec2# 将四个顶点按行堆叠形成新的张量,并沿着倒数第二个维度堆叠return stack([pt1, pt2, pt3, pt4], -2)
def ltwh2xyxy(x):"""将边界框从[x1, y1, w, h]转换为[x1, y1, x2, y2],其中xy1为左上角,xy2为右下角。Args:x (np.ndarray | torch.Tensor): 输入的图像或张量Returns:y (np.ndarray | torch.Tensor): 边界框的xyxy坐标"""y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)y[..., 2] = x[..., 2] + x[..., 0] # 计算宽度y[..., 3] = x[..., 3] + x[..., 1] # 计算高度return ydef segments2boxes(segments):"""将分段标签转换为框标签,即(cls, xy1, xy2, ...)转换为(cls, xywh)Args:segments (list): 分段列表,每个分段是一个点列表,每个点是一个包含x, y坐标的列表Returns:(np.ndarray): 边界框的xywh坐标"""boxes = []for s in segments:x, y = s.T # 提取分段的xy坐标boxes.append([x.min(), y.min(), x.max(), y.max()]) # 计算xyxy坐标return xyxy2xywh(np.array(boxes)) # 转换为xywh坐标def resample_segments(segments, n=1000):"""将分段列表(samples,2)输入并将其上采样到每个n点的分段列表(samples,2)。Args:segments (list): 包含(samples,2)数组的列表,其中samples是分段中的点数。n (int): 要上采样到的点数,默认为1000。Returns:segments (list): 上采样后的分段列表。"""for i, s in enumerate(segments):s = np.concatenate((s, s[0:1, :]), axis=0) # 首尾相接,闭合分段x = np.linspace(0, len(s) - 1, n)xp = np.arange(len(s))segments[i] = (np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)], dtype=np.float32).reshape(2, -1).T) # 插值获取上采样点return segmentsdef crop_mask(masks, boxes):"""根据边界框裁剪掩模,并返回裁剪后的掩模。Args:masks (torch.Tensor): [n, h, w] 掩模张量boxes (torch.Tensor): [n, 4] 相对点形式的边界框坐标Returns:(torch.Tensor): 裁剪后的掩模"""_, h, w = masks.shapex1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # 分离边界框坐标r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # 行索引c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # 列索引return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))def process_mask(protos, masks_in, bboxes, shape, upsample=False):"""使用掩模头部的输出,将掩模应用于边界框。Args:protos: 未指定masks_in (torch.Tensor): [n, h, w] 掩模张量bboxes (torch.Tensor): [n, 4] 边界框坐标shape: 未指定upsample (bool): 是否上采样,默认为FalseReturns:unspecified"""# 函数体未提供pass# 获取 protos 张量的形状信息,分别赋值给 c, mh, mwc, mh, mw = protos.shape # CHW# 解构 shape 元组,获取输入图像的高度和宽度信息,分别赋值给 ih, iwih, iw = shape# 计算每个 mask 的输出,通过 masks_in 与 protos 的矩阵乘法,再重新 reshape 成 [n, mh, mw] 的形状masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw) # CHW# 计算宽度和高度的比率,用于将 bounding boxes 按比例缩放width_ratio = mw / iwheight_ratio = mh / ih# 复制 bounding boxes 张量,按照比率调整左上角和右下角的坐标downsampled_bboxes = bboxes.clone()downsampled_bboxes[:, 0] *= width_ratiodownsampled_bboxes[:, 2] *= width_ratiodownsampled_bboxes[:, 3] *= height_ratiodownsampled_bboxes[:, 1] *= height_ratio# 裁剪 masks,根据 downsampled_bboxes 中的边界框信息进行裁剪,输出结果的形状保持为 CHWmasks = crop_mask(masks, downsampled_bboxes) # CHW# 如果 upsample 标志为 True,则对 masks 进行双线性插值,将其尺寸调整为 shape,最终形状为 [1, h, w]if upsample:masks = F.interpolate(masks[None], shape, mode="bilinear", align_corners=False)[0] # CHW# 返回 masks 张量中大于 0.0 的元素,即二值化后的二进制 mask 张量,形状为 [n, h, w]return masks.gt_(0.0)
# 定义函数 process_mask_native,处理原生掩模的逻辑
def process_mask_native(protos, masks_in, bboxes, shape):"""It takes the output of the mask head, and crops it after upsampling to the bounding boxes.Args:protos (torch.Tensor): [mask_dim, mask_h, mask_w],原型掩模的张量,形状为 [掩模维度, 高度, 宽度]masks_in (torch.Tensor): [n, mask_dim],经 NMS 后的掩模张量,形状为 [n, 掩模维度],n 为经过 NMS 后的掩模数量bboxes (torch.Tensor): [n, 4],经 NMS 后的边界框张量,形状为 [n, 4],n 为经过 NMS 后的掩模数量shape (tuple): 输入图像的尺寸 (高度, 宽度)Returns:masks (torch.Tensor): 处理后的掩模张量,形状为 [高度, 宽度, n]"""c, mh, mw = protos.shape # 获取原型掩模的通道数、高度、宽度masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw) # 计算掩模张量,进行上采样后裁剪到边界框大小masks = scale_masks(masks[None], shape)[0] # 对掩模进行尺寸缩放masks = crop_mask(masks, bboxes) # 根据边界框裁剪掩模return masks.gt_(0.0) # 返回掩模张量,应用大于零的阈值处理# 定义函数 scale_masks,将分段掩模尺寸缩放到指定形状
def scale_masks(masks, shape, padding=True):"""Rescale segment masks to shape.Args:masks (torch.Tensor): (N, C, H, W),掩模张量,形状为 (批量大小, 通道数, 高度, 宽度)shape (tuple): 目标高度和宽度padding (bool): 如果为 True,则假设边界框基于 YOLO 样式增强的图像。如果为 False,则进行常规尺寸缩放。Returns:masks (torch.Tensor): 缩放后的掩模张量"""mh, mw = masks.shape[2:] # 获取掩模张量的高度和宽度gain = min(mh / shape[0], mw / shape[1]) # 计算缩放比例 gain = 旧尺寸 / 新尺寸pad = [mw - shape[1] * gain, mh - shape[0] * gain] # 计算高度和宽度的填充值if padding:pad[0] /= 2 # 宽度填充减半pad[1] /= 2 # 高度填充减半top, left = (int(pad[1]), int(pad[0])) if padding else (0, 0) # 计算顶部和左侧填充位置bottom, right = (int(mh - pad[1]), int(mw - pad[0])) # 计算底部和右侧填充位置masks = masks[..., top:bottom, left:right] # 对掩模进行裁剪masks = F.interpolate(masks, shape, mode="bilinear", align_corners=False) # 使用双线性插值对掩模进行尺寸缩放return masks# 定义函数 scale_coords,将图像 1 的分割坐标缩放到图像 0 的尺寸
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize=False, padding=True):"""Rescale segment coordinates (xy) from img1_shape to img0_shape.Args:img1_shape (tuple): 坐标所在图像的尺寸。coords (torch.Tensor): 需要缩放的坐标,形状为 n,2。img0_shape (tuple): 应用分割的目标图像的尺寸。ratio_pad (tuple): 图像尺寸与填充图像尺寸的比例。normalize (bool): 如果为 True,则将坐标归一化到 [0, 1] 范围内。默认为 False。padding (bool): 如果为 True,则假设边界框基于 YOLO 样式增强的图像。如果为 False,则进行常规尺寸缩放。Returns:coords (torch.Tensor): 缩放后的坐标。"""if ratio_pad is None: # 如果没有指定比例,则根据图像 0 的尺寸计算gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # 计算缩放比例 gain = 旧尺寸 / 新尺寸pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # 计算高度和宽度的填充值else:gain = ratio_pad[0][0] # 获取填充比例的缩放增益pad = ratio_pad[1] # 获取填充值if padding:coords[..., 0] -= pad[0] # 减去 x 方向的填充值coords[..., 1] -= pad[1] # 减去 y 方向的填充值coords[..., 0] /= gain # 根据缩放增益进行 x 坐标缩放coords[..., 1] /= gain # 根据缩放增益进行 y 坐标缩放coords = clip_coords(coords, img0_shape) # 调用 clip_coords 函数对坐标进行裁剪# 如果 normalize 参数为 True,则进行坐标归一化处理if normalize:# 将所有坐标点的 x 值除以图像宽度,实现 x 坐标的归一化coords[..., 0] /= img0_shape[1] # width# 将所有坐标点的 y 值除以图像高度,实现 y 坐标的归一化coords[..., 1] /= img0_shape[0] # height# 返回归一化后的坐标数组return coords
# Regularize rotated boxes in range [0, pi/2].
def regularize_rboxes(rboxes):x, y, w, h, t = rboxes.unbind(dim=-1)# Swap edge and angle if h >= ww_ = torch.where(w > h, w, h) # Determine the maximum edge lengthh_ = torch.where(w > h, h, w) # Determine the minimum edge lengtht = torch.where(w > h, t, t + math.pi / 2) % math.pi # Adjust angle if height is greater than widthreturn torch.stack([x, y, w_, h_, t], dim=-1) # Stack the regularized boxes# It takes a list of masks(n,h,w) and returns a list of segments(n,xy)
def masks2segments(masks, strategy="largest"):segments = []for x in masks.int().cpu().numpy().astype("uint8"):# Find contours in the mask imagec = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]if c:if strategy == "concat": # concatenate all segmentsc = np.concatenate([x.reshape(-1, 2) for x in c])elif strategy == "largest": # select largest segmentc = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)else:c = np.zeros((0, 2)) # no segments foundsegments.append(c.astype("float32"))return segments# Convert a batch of FP32 torch tensors (0.0-1.0) to a NumPy uint8 array (0-255), changing from BCHW to BHWC layout.
def convert_torch2numpy_batch(batch: torch.Tensor) -> np.ndarray:return (batch.permute(0, 2, 3, 1).contiguous() * 255).clamp(0, 255).to(torch.uint8).cpu().numpy()# Cleans a string by replacing special characters with underscore _
def clean_str(s):return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)