2024-简单点-ultralytics库解析-data模块-编程知识

data模块

overview布局
- \_\_init__.py
- - from .base import BaseDataset
  - \_\_all__
- annotator.py
- augment.py
- - `class BaseTransform`
  - `class Compose`
  - `class BaseMixTransform`
  - `class Mosaic`
  - - 静态方法更新label
  - `class MixUp`
  - `RandomPerspective`
  - `class RandomHSV`

overview布局

在这里插入图片描述
从上往下解析

init.py

在这里插入图片描述

from .base import BaseDataset

__init__.py 文件在 Python 中扮演着重要的角色，它是 Python 包结构的一部分。

当一个目录包含 __init__.py 文件时，Python 会将这个目录视为一个包，允许你导入其中的模块和类。这个文件的目的是告诉 Python 解释器，目录内的内容是一个可导入的模块集合，也就是一个包。

在这个特定的例子中，__init__.py 文件是 ultralytics 库（假设是 ultralytics 包）的一部分，它导出了 base.py、build.py 和 dataset.py 中的一些关键类和函数。通过这种方式，用户可以直接从 ultralytics 包中导入这些导出的类和函数，而不需要知道它们在哪个具体的模块里。

例如，用户可以这样导入和使用这些类和函数：

from ultralytics import BaseDataset, YOLODataset, build_yolo_datasetdataset = BaseDataset()
yolo_dataset = YOLODataset()
dataloader = build_yolo_dataset(dataset)

这种组织方式提高了代码的可读性和可维护性，因为用户只需要记住在包级别上可用的接口，而不是每个单独模块的细节。同时，__init__.py 也可以用来执行一些初始化逻辑，比如设置配置、注册自定义类型等。

all

在这里插入图片描述

annotator.py

在这里插入图片描述

# Ultralytics YOLO 🚀, AGPL-3.0 licensefrom pathlib import Pathfrom ultralytics import SAM, YOLOdef auto_annotate(data, det_model="yolov8x.pt", sam_model="sam_b.pt", device="", output_dir=None):"""Automatically annotates images using a YOLO object detection model and a SAM segmentation model.Args:data (str): Path to a folder containing images to be annotated.det_model (str, optional): Pre-trained YOLO detection model. Defaults to 'yolov8x.pt'.sam_model (str, optional): Pre-trained SAM segmentation model. Defaults to 'sam_b.pt'.device (str, optional): Device to run the models on. Defaults to an empty string (CPU or GPU, if available).output_dir (str | None | optional): Directory to save the annotated results.Defaults to a 'labels' folder in the same directory as 'data'.Example:```pythonfrom ultralytics.data.annotator import auto_annotateauto_annotate(data='ultralytics/assets', det_model='yolov8n.pt', sam_model='mobile_sam.pt')```"""det_model = YOLO(det_model)sam_model = SAM(sam_model)data = Path(data)if not output_dir:output_dir = data.parent / f"{data.stem}_auto_annotate_labels"Path(output_dir).mkdir(exist_ok=True, parents=True)det_results = det_model(data, stream=True, device=device)for result in det_results:class_ids = result.boxes.cls.int().tolist()  # noqaif len(class_ids):boxes = result.boxes.xyxy  # Boxes object for bbox outputssam_results = sam_model(result.orig_img, bboxes=boxes, verbose=False, save=False, device=device)segments = sam_results[0].masks.xyn  # noqawith open(f"{Path(output_dir) / Path(result.path).stem}.txt", "w") as f:for i in range(len(segments)):s = segments[i]if len(s) == 0:continuesegment = map(str, segments[i].reshape(-1).tolist())f.write(f"{class_ids[i]} " + " ".join(segment) + "\n")

在这里插入图片描述

augment.py

数据增强
在这里插入图片描述

`class BaseTransform`

# TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic
class BaseTransform:"""Base class for image transformations.This is a generic transformation class that can be extended for specific image processing needs.The class is designed to be compatible with both classification and semantic segmentation tasks.Methods:__init__: Initializes the BaseTransform object.apply_image: Applies image transformation to labels.apply_instances: Applies transformations to object instances in labels.apply_semantic: Applies semantic segmentation to an image.__call__: Applies all label transformations to an image, instances, and semantic masks."""def __init__(self) -> None:"""Initializes the BaseTransform object."""passdef apply_image(self, labels):"""Applies image transformations to labels."""passdef apply_instances(self, labels):"""Applies transformations to object instances in labels."""passdef apply_semantic(self, labels):"""Applies semantic segmentation to an image."""passdef __call__(self, labels):"""Applies all label transformations to an image, instances, and semantic masks."""self.apply_image(labels)self.apply_instances(labels)self.apply_semantic(labels)

在这里插入图片描述

`class Compose`

class Compose:"""Class for composing multiple image transformations."""def __init__(self, transforms):"""Initializes the Compose object with a list of transforms."""self.transforms = transforms if isinstance(transforms, list) else [transforms]def __call__(self, data):"""Applies a series of transformations to input data."""for t in self.transforms:data = t(data)return datadef append(self, transform):"""Appends a new transform to the existing list of transforms."""self.transforms.append(transform)def insert(self, index, transform):"""Inserts a new transform to the existing list of transforms."""self.transforms.insert(index, transform)def __getitem__(self, index: Union[list, int]) -> "Compose":"""Retrieve a specific transform or a set of transforms using indexing."""assert isinstance(index, (int, list)), f"The indices should be either list or int type but got {type(index)}"index = [index] if isinstance(index, int) else indexreturn Compose([self.transforms[i] for i in index])def __setitem__(self, index: Union[list, int], value: Union[list, int]) -> None:"""Retrieve a specific transform or a set of transforms using indexing."""assert isinstance(index, (int, list)), f"The indices should be either list or int type but got {type(index)}"if isinstance(index, list):assert isinstance(value, list), f"The indices should be the same type as values, but got {type(index)} and {type(value)}"if isinstance(index, int):index, value = [index], [value]for i, v in zip(index, value):assert i < len(self.transforms), f"list index {i} out of range {len(self.transforms)}."self.transforms[i] = vdef tolist(self):"""Converts the list of transforms to a standard Python list."""return self.transformsdef __repr__(self):"""Returns a string representation of the object."""return f"{self.__class__.__name__}({', '.join([f'{t}' for t in self.transforms])})"

在这里插入图片描述

`class BaseMixTransform`

class BaseMixTransform:"""Class for base mix (MixUp/Mosaic) transformations.This implementation is from mmyolo."""def __init__(self, dataset, pre_transform=None, p=0.0) -> None:"""Initializes the BaseMixTransform object with dataset, pre_transform, and probability."""self.dataset = datasetself.pre_transform = pre_transformself.p = pdef __call__(self, labels):"""Applies pre-processing transforms and mixup/mosaic transforms to labels data."""if random.uniform(0, 1) > self.p:return labels# Get index of one or three other imagesindexes = self.get_indexes()if isinstance(indexes, int):indexes = [indexes]# Get images information will be used for Mosaic or MixUpmix_labels = [self.dataset.get_image_and_label(i) for i in indexes]if self.pre_transform is not None:for i, data in enumerate(mix_labels):mix_labels[i] = self.pre_transform(data)labels["mix_labels"] = mix_labels# Update cls and textslabels = self._update_label_text(labels)# Mosaic or MixUplabels = self._mix_transform(labels)labels.pop("mix_labels", None)return labelsdef _mix_transform(self, labels):"""Applies MixUp or Mosaic augmentation to the label dictionary."""raise NotImplementedErrordef get_indexes(self):"""Gets a list of shuffled indexes for mosaic augmentation."""raise NotImplementedErrordef _update_label_text(self, labels):"""Update label text."""if "texts" not in labels:return labelsmix_texts = sum([labels["texts"]] + [x["texts"] for x in labels["mix_labels"]], [])mix_texts = list({tuple(x) for x in mix_texts})text2id = {text: i for i, text in enumerate(mix_texts)}for label in [labels] + labels["mix_labels"]:for i, cls in enumerate(label["cls"].squeeze(-1).tolist()):text = label["texts"][int(cls)]label["cls"][i] = text2id[tuple(text)]label["texts"] = mix_textsreturn labels

在这里插入图片描述

`class Mosaic`

class Mosaic(BaseMixTransform):"""Mosaic augmentation.This class performs mosaic augmentation by combining multiple (4 or 9) images into a single mosaic image.The augmentation is applied to a dataset with a given probability.Attributes:dataset: The dataset on which the mosaic augmentation is applied.imgsz (int, optional): Image size (height and width) after mosaic pipeline of a single image. Default to 640.p (float, optional): Probability of applying the mosaic augmentation. Must be in the range 0-1. Default to 1.0.n (int, optional): The grid size, either 4 (for 2x2) or 9 (for 3x3)."""def __init__(self, dataset, imgsz=640, p=1.0, n=4):"""Initializes the object with a dataset, image size, probability, and border."""assert 0 <= p <= 1.0, f"The probability should be in range [0, 1], but got {p}."assert n in {4, 9}, "grid must be equal to 4 or 9."super().__init__(dataset=dataset, p=p)self.dataset = datasetself.imgsz = imgszself.border = (-imgsz // 2, -imgsz // 2)  # width, heightself.n = ndef get_indexes(self, buffer=True):"""Return a list of random indexes from the dataset."""if buffer:  # select images from bufferreturn random.choices(list(self.dataset.buffer), k=self.n - 1)else:  # select any imagesreturn [random.randint(0, len(self.dataset) - 1) for _ in range(self.n - 1)]def _mix_transform(self, labels):"""Apply mixup transformation to the input image and labels."""assert labels.get("rect_shape", None) is None, "rect and mosaic are mutually exclusive."assert len(labels.get("mix_labels", [])), "There are no other images for mosaic augment."return (self._mosaic3(labels) if self.n == 3 else self._mosaic4(labels) if self.n == 4 else self._mosaic9(labels))  # This code is modified for mosaic3 method.def _mosaic3(self, labels):"""Create a 1x3 image mosaic."""mosaic_labels = []s = self.imgszfor i in range(3):labels_patch = labels if i == 0 else labels["mix_labels"][i - 1]# Load imageimg = labels_patch["img"]h, w = labels_patch.pop("resized_shape")# Place img in img3if i == 0:  # centerimg3 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8)  # base image with 3 tilesh0, w0 = h, wc = s, s, s + w, s + h  # xmin, ymin, xmax, ymax (base) coordinateselif i == 1:  # rightc = s + w0, s, s + w0 + w, s + helif i == 2:  # leftc = s - w, s + h0 - h, s, s + h0padw, padh = c[:2]x1, y1, x2, y2 = (max(x, 0) for x in c)  # allocate coordsimg3[y1:y2, x1:x2] = img[y1 - padh :, x1 - padw :]  # img3[ymin:ymax, xmin:xmax]# hp, wp = h, w  # height, width previous for next iteration# Labels assuming imgsz*2 mosaic sizelabels_patch = self._update_labels(labels_patch, padw + self.border[0], padh + self.border[1])mosaic_labels.append(labels_patch)final_labels = self._cat_labels(mosaic_labels)final_labels["img"] = img3[-self.border[0] : self.border[0], -self.border[1] : self.border[1]]return final_labelsdef _mosaic4(self, labels):"""Create a 2x2 image mosaic."""mosaic_labels = []s = self.imgszyc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.border)  # mosaic center x, yfor i in range(4):labels_patch = labels if i == 0 else labels["mix_labels"][i - 1]# Load imageimg = labels_patch["img"]h, w = labels_patch.pop("resized_shape")# Place img in img4if i == 0:  # top leftimg4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tilesx1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)elif i == 1:  # top rightx1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), ycx1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), helif i == 2:  # bottom leftx1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)elif i == 3:  # bottom rightx1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]padw = x1a - x1bpadh = y1a - y1blabels_patch = self._update_labels(labels_patch, padw, padh)mosaic_labels.append(labels_patch)final_labels = self._cat_labels(mosaic_labels)final_labels["img"] = img4return final_labelsdef _mosaic9(self, labels):"""Create a 3x3 image mosaic."""mosaic_labels = []s = self.imgszhp, wp = -1, -1  # height, width previousfor i in range(9):labels_patch = labels if i == 0 else labels["mix_labels"][i - 1]# Load imageimg = labels_patch["img"]h, w = labels_patch.pop("resized_shape")# Place img in img9if i == 0:  # centerimg9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tilesh0, w0 = h, wc = s, s, s + w, s + h  # xmin, ymin, xmax, ymax (base) coordinateselif i == 1:  # topc = s, s - h, s + w, selif i == 2:  # top rightc = s + wp, s - h, s + wp + w, selif i == 3:  # rightc = s + w0, s, s + w0 + w, s + helif i == 4:  # bottom rightc = s + w0, s + hp, s + w0 + w, s + hp + helif i == 5:  # bottomc = s + w0 - w, s + h0, s + w0, s + h0 + helif i == 6:  # bottom leftc = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + helif i == 7:  # leftc = s - w, s + h0 - h, s, s + h0elif i == 8:  # top leftc = s - w, s + h0 - hp - h, s, s + h0 - hppadw, padh = c[:2]x1, y1, x2, y2 = (max(x, 0) for x in c)  # allocate coords# Imageimg9[y1:y2, x1:x2] = img[y1 - padh :, x1 - padw :]  # img9[ymin:ymax, xmin:xmax]hp, wp = h, w  # height, width previous for next iteration# Labels assuming imgsz*2 mosaic sizelabels_patch = self._update_labels(labels_patch, padw + self.border[0], padh + self.border[1])mosaic_labels.append(labels_patch)final_labels = self._cat_labels(mosaic_labels)final_labels["img"] = img9[-self.border[0] : self.border[0], -self.border[1] : self.border[1]]return final_labels@staticmethoddef _update_labels(labels, padw, padh):"""Update labels."""nh, nw = labels["img"].shape[:2]labels["instances"].convert_bbox(format="xyxy")labels["instances"].denormalize(nw, nh)labels["instances"].add_padding(padw, padh)return labelsdef _cat_labels(self, mosaic_labels):"""Return labels with mosaic border instances clipped."""if len(mosaic_labels) == 0:return {}cls = []instances = []imgsz = self.imgsz * 2  # mosaic imgszfor labels in mosaic_labels:cls.append(labels["cls"])instances.append(labels["instances"])# Final labelsfinal_labels = {"im_file": mosaic_labels[0]["im_file"],"ori_shape": mosaic_labels[0]["ori_shape"],"resized_shape": (imgsz, imgsz),"cls": np.concatenate(cls, 0),"instances": Instances.concatenate(instances, axis=0),"mosaic_border": self.border,}final_labels["instances"].clip(imgsz, imgsz)good = final_labels["instances"].remove_zero_area_boxes()final_labels["cls"] = final_labels["cls"][good]if "texts" in mosaic_labels[0]:final_labels["texts"] = mosaic_labels[0]["texts"]return final_labels

在这里插入图片描述
具体介绍一下_mosaic3

静态方法更新label

    # 静态方法，用于更新单个图像的标签@staticmethoddef _update_labels(labels, padw, padh):# 获取图像的高度和宽度nh, nw = labels["img"].shape[:2]# 将实例（boxes）转换为xyxy格式labels["instances"].convert_bbox(format="xyxy")# 反标准化实例坐标，基于图像的实际尺寸labels["instances"].denormalize(nw, nh)# 添加填充到实例坐标labels["instances"].add_padding(padw, padh)# 返回更新后的标签return labels# 静态方法，用于将多个图像的标签组合成一个拼贴图像的标签@staticmethoddef _cat_labels(mosaic_labels):# 检查输入列表是否为空if len(mosaic_labels) == 0:return {}# 初始化类别列表和实例列表cls = []instances = []# 拼贴图像的大小是原始图像大小的两倍imgsz = self.imgsz * 2# 遍历每个图像的标签for labels in mosaic_labels:# 收集类别cls.append(labels["cls"])# 收集实例instances.append(labels["instances"])# 创建最终的标签字典final_labels = {"im_file": mosaic_labels[0]["im_file"],  # 图像的文件名"ori_shape": mosaic_labels[0]["ori_shape"],  # 图像的原始形状"resized_shape": (imgsz, imgsz),  # 拼贴图像的尺寸"cls": np.concatenate(cls, 0),  # 合并所有图像的类别"instances": Instances.concatenate(instances, axis=0),  # 合并所有图像的实例"mosaic_border": self.border,  # 拼贴图像的边框信息}# 裁剪超出拼贴图像边界的实例final_labels["instances"].clip(imgsz, imgsz)# 移除面积为零的实例good = final_labels["instances"].remove_zero_area_boxes()# 根据有效的实例更新类别final_labels["cls"] = final_labels["cls"][good]# 如果原始标签中有"texts"字段，将其添加到最终标签中if "texts" in mosaic_labels[0]:final_labels["texts"] = mosaic_labels[0]["texts"]# 返回组合后的标签return final_labels

`class MixUp`

class MixUp(BaseMixTransform):"""Class for applying MixUp augmentation to the dataset."""def __init__(self, dataset, pre_transform=None, p=0.0) -> None:"""初始化MixUp对象，传入数据集、预处理变换和应用MixUp的概率。参数:- dataset: 数据集对象- pre_transform: 可选的预处理变换- p: 应用MixUp的概率，默认为0.0，表示默认不应用MixUp"""super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)def get_indexes(self):"""从数据集中随机获取一个索引。返回:- 一个随机生成的整数索引，范围在0到数据集长度减1之间。"""return random.randint(0, len(self.dataset) - 1)def _mix_transform(self, labels):"""根据https://arxiv.org/pdf/1710.09412.pdf中的描述应用MixUp数据增强。参数:- labels: 包含图像和对应标签的字典，如{'img': 图像, 'instances': 实例, 'cls': 类别}返回:- 应用MixUp后的混合标签字典。"""# 生成MixUp的混合比例，这里使用alpha=beta=32.0r = np.random.beta(32.0, 32.0)# 获取第二个图像及其标签labels2 = labels["mix_labels"][0]# 混合两个图像mixed_img = (labels["img"] * r + labels2["img"] * (1 - r)).astype(np.uint8)# 混合两个实例（如边界框）mixed_instances = Instances.concatenate([labels["instances"], labels2["instances"]], axis=0)# 混合两个类别mixed_cls = np.concatenate([labels["cls"], labels2["cls"]], 0)# 返回混合后的标签字典return {"img": mixed_img, "instances": mixed_instances, "cls": mixed_cls}

`RandomPerspective`

class RandomPerspective:"""Implements random perspective and affine transformations on images and corresponding bounding boxes, segments, andkeypoints. These transformations include rotation, translation, scaling, and shearing. The class also offers theoption to apply these transformations conditionally with a specified probability.Attributes:degrees (float): Degree range for random rotations.translate (float): Fraction of total width and height for random translation.scale (float): Scaling factor interval, e.g., a scale factor of 0.1 allows a resize between 90%-110%.shear (float): Shear intensity (angle in degrees).perspective (float): Perspective distortion factor.border (tuple): Tuple specifying mosaic border.pre_transform (callable): A function/transform to apply to the image before starting the random transformation.Methods:affine_transform(img, border): Applies a series of affine transformations to the image.apply_bboxes(bboxes, M): Transforms bounding boxes using the calculated affine matrix.apply_segments(segments, M): Transforms segments and generates new bounding boxes.apply_keypoints(keypoints, M): Transforms keypoints.__call__(labels): Main method to apply transformations to both images and their corresponding annotations.box_candidates(box1, box2): Filters out bounding boxes that don't meet certain criteria post-transformation."""def __init__(self, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, border=(0, 0), pre_transform=None):"""Initializes RandomPerspective object with transformation parameters."""self.degrees = degreesself.translate = translateself.scale = scaleself.shear = shearself.perspective = perspectiveself.border = border  # mosaic borderself.pre_transform = pre_transformdef affine_transform(self, img, border):"""Applies a sequence of affine transformations centered around the image center.Args:img (ndarray): Input image.border (tuple): Border dimensions.Returns:img (ndarray): Transformed image.M (ndarray): Transformation matrix.s (float): Scale factor."""# CenterC = np.eye(3, dtype=np.float32)C[0, 2] = -img.shape[1] / 2  # x translation (pixels)C[1, 2] = -img.shape[0] / 2  # y translation (pixels)# PerspectiveP = np.eye(3, dtype=np.float32)P[2, 0] = random.uniform(-self.perspective, self.perspective)  # x perspective (about y)P[2, 1] = random.uniform(-self.perspective, self.perspective)  # y perspective (about x)# Rotation and ScaleR = np.eye(3, dtype=np.float32)a = random.uniform(-self.degrees, self.degrees)# a += random.choice([-180, -90, 0, 90])  # add 90deg rotations to small rotationss = random.uniform(1 - self.scale, 1 + self.scale)# s = 2 ** random.uniform(-scale, scale)R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)# ShearS = np.eye(3, dtype=np.float32)S[0, 1] = math.tan(random.uniform(-self.shear, self.shear) * math.pi / 180)  # x shear (deg)S[1, 0] = math.tan(random.uniform(-self.shear, self.shear) * math.pi / 180)  # y shear (deg)# TranslationT = np.eye(3, dtype=np.float32)T[0, 2] = random.uniform(0.5 - self.translate, 0.5 + self.translate) * self.size[0]  # x translation (pixels)T[1, 2] = random.uniform(0.5 - self.translate, 0.5 + self.translate) * self.size[1]  # y translation (pixels)# Combined rotation matrixM = T @ S @ R @ P @ C  # order of operations (right to left) is IMPORTANT# Affine imageif (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changedif self.perspective:img = cv2.warpPerspective(img, M, dsize=self.size, borderValue=(114, 114, 114))else:  # affineimg = cv2.warpAffine(img, M[:2], dsize=self.size, borderValue=(114, 114, 114))return img, M, sdef apply_bboxes(self, bboxes, M):"""Apply affine to bboxes only.Args:bboxes (ndarray): list of bboxes, xyxy format, with shape (num_bboxes, 4).M (ndarray): affine matrix.Returns:new_bboxes (ndarray): bboxes after affine, [num_bboxes, 4]."""n = len(bboxes)if n == 0:return bboxesxy = np.ones((n * 4, 3), dtype=bboxes.dtype)xy[:, :2] = bboxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1xy = xy @ M.T  # transformxy = (xy[:, :2] / xy[:, 2:3] if self.perspective else xy[:, :2]).reshape(n, 8)  # perspective rescale or affine# Create new boxesx = xy[:, [0, 2, 4, 6]]y = xy[:, [1, 3, 5, 7]]return np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1)), dtype=bboxes.dtype).reshape(4, n).Tdef apply_segments(self, segments, M):"""Apply affine to segments and generate new bboxes from segments.Args:segments (ndarray): list of segments, [num_samples, 500, 2].M (ndarray): affine matrix.Returns:new_segments (ndarray): list of segments after affine, [num_samples, 500, 2].new_bboxes (ndarray): bboxes after affine, [N, 4]."""n, num = segments.shape[:2]if n == 0:return [], segmentsxy = np.ones((n * num, 3), dtype=segments.dtype)segments = segments.reshape(-1, 2)xy[:, :2] = segmentsxy = xy @ M.T  # transformxy = xy[:, :2] / xy[:, 2:3]segments = xy.reshape(n, -1, 2)bboxes = np.stack([segment2box(xy, self.size[0], self.size[1]) for xy in segments], 0)segments[..., 0] = segments[..., 0].clip(bboxes[:, 0:1], bboxes[:, 2:3])segments[..., 1] = segments[..., 1].clip(bboxes[:, 1:2], bboxes[:, 3:4])return bboxes, segmentsdef apply_keypoints(self, keypoints, M):"""Apply affine to keypoints.Args:keypoints (ndarray): keypoints, [N, 17, 3].M (ndarray): affine matrix.Returns:new_keypoints (ndarray): keypoints after affine, [N, 17, 3]."""n, nkpt = keypoints.shape[:2]if n == 0:return keypointsxy = np.ones((n * nkpt, 3), dtype=keypoints.dtype)visible = keypoints[..., 2].reshape(n * nkpt, 1)xy[:, :2] = keypoints[..., :2].reshape(n * nkpt, 2)xy = xy @ M.T  # transformxy = xy[:, :2] / xy[:, 2:3]  # perspective rescale or affineout_mask = (xy[:, 0] < 0) | (xy[:, 1] < 0) | (xy[:, 0] > self.size[0]) | (xy[:, 1] > self.size[1])visible[out_mask] = 0return np.concatenate([xy, visible], axis=-1).reshape(n, nkpt, 3)def __call__(self, labels):"""Affine images and targets.Args:labels (dict): a dict of `bboxes`, `segments`, `keypoints`."""if self.pre_transform and "mosaic_border" not in labels:labels = self.pre_transform(labels)labels.pop("ratio_pad", None)  # do not need ratio padimg = labels["img"]cls = labels["cls"]instances = labels.pop("instances")# Make sure the coord formats are rightinstances.convert_bbox(format="xyxy")instances.denormalize(*img.shape[:2][::-1])border = labels.pop("mosaic_border", self.border)self.size = img.shape[1] + border[1] * 2, img.shape[0] + border[0] * 2  # w, h# M is affine matrix# Scale for func:`box_candidates`img, M, scale = self.affine_transform(img, border)bboxes = self.apply_bboxes(instances.bboxes, M)segments = instances.segmentskeypoints = instances.keypoints# Update bboxes if there are segments.if len(segments):bboxes, segments = self.apply_segments(segments, M)if keypoints is not None:keypoints = self.apply_keypoints(keypoints, M)new_instances = Instances(bboxes, segments, keypoints, bbox_format="xyxy", normalized=False)# Clipnew_instances.clip(*self.size)# Filter instancesinstances.scale(scale_w=scale, scale_h=scale, bbox_only=True)# Make the bboxes have the same scale with new_bboxesi = self.box_candidates(box1=instances.bboxes.T, box2=new_instances.bboxes.T, area_thr=0.01 if len(segments) else 0.10)labels["instances"] = new_instances[i]labels["cls"] = cls[i]labels["img"] = imglabels["resized_shape"] = img.shape[:2]return labelsdef box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):"""Compute box candidates based on a set of thresholds. This method compares the characteristics of the boxesbefore and after augmentation to decide whether a box is a candidate for further processing.Args:box1 (numpy.ndarray): The 4,n bounding box before augmentation, represented as [x1, y1, x2, y2].box2 (numpy.ndarray): The 4,n bounding box after augmentation, represented as [x1, y1, x2, y2].wh_thr (float, optional): The width and height threshold in pixels. Default is 2.ar_thr (float, optional): The aspect ratio threshold. Default is 100.area_thr (float, optional): The area ratio threshold. Default is 0.1.eps (float, optional): A small epsilon value to prevent division by zero. Default is 1e-16.Returns:(numpy.ndarray): A boolean array indicating which boxes are candidates based on the given thresholds."""w1, h1 = box1[2] - box1[0], box1[3] - box1[1]w2, h2 = box2[2] - box2[0], box2[3] - box2[1]ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps))  # aspect ratioreturn (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr)  # candidates

M = T @ S @ R @ P @ C
在这里插入图片描述
关于这种矩阵变换可以看下面这个网址：
矩阵变换

在这里插入图片描述

`class RandomHSV`

class RandomHSV:"""This class is responsible for performing random adjustments to the Hue, Saturation, and Value (HSV) channels of animage.The adjustments are random but within limits set by hgain, sgain, and vgain."""def __init__(self, hgain=0.5, sgain=0.5, vgain=0.5) -> None:"""Initialize RandomHSV class with gains for each HSV channel.Args:hgain (float, optional): Maximum variation for hue. Default is 0.5.sgain (float, optional): Maximum variation for saturation. Default is 0.5.vgain (float, optional): Maximum variation for value. Default is 0.5."""self.hgain = hgainself.sgain = sgainself.vgain = vgaindef __call__(self, labels):"""Applies random HSV augmentation to an image within the predefined limits.The modified image replaces the original image in the input 'labels' dict."""img = labels["img"]if self.hgain or self.sgain or self.vgain:r = np.random.uniform(-1, 1, 3) * [self.hgain, self.sgain, self.vgain] + 1  # random gainshue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))dtype = img.dtype  # uint8x = np.arange(0, 256, dtype=r.dtype)lut_hue = ((x * r[0]) % 180).astype(dtype)lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)lut_val = np.clip(x * r[2], 0, 255).astype(dtype)im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=img)  # no return neededreturn labels