[yolox优化] 提升ap

1.增加尺度变换

去掉马赛克之后,就什么数据增强都没有了,需要增加尺度变换
YOLOX-main/yolox/data/data_augment.py 增加一个函数 #163行

def random_perspective_rotation_scale(        #新加的尺度变换
    img,
    targets=(),
    degrees=10,
    scale=0.1,
    perspective=0.0,
    border=(0, 0),
):
    # targets = [cls, xyxy]
    height = img.shape[0] + border[0] * 2  # shape(h,w,c)
    width = img.shape[1] + border[1] * 2

    # Rotation and Scale  旋转和缩放
    R = np.eye(3)
    a = random.uniform(-degrees, degrees)
    # a += random.choice([-180, -90, 0, 90])  # add 90deg rotations to small rotations
    s = random.uniform(scale[0], scale[1])        ##### s = random.uniform(1 - scale, 1 + scale)
    # s = 2 ** random.uniform(-scale, scale)
    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)   #图像旋转

    M = R
    ###########################
    # For Aug out of Mosaic
    # s = 1.
    # M = np.eye(3)
    ###########################

    if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changed 图像透视变换 cv2.warpPerspective
        if perspective:
            img = cv2.warpPerspective(
                img, M, dsize=(width, height), borderValue=(114, 114, 114)
            )
        else:  # affine
            img = cv2.warpAffine(
                img, M[:2], dsize=(width, height), borderValue=(114, 114, 114) #仿射变换函数
            )

    # Transform label coordinates # 相应的label也要转换
    n = len(targets)         # label数量   [num_labels, 5]
    if n:
        # warp points
        xy = np.ones((n * 4, 3))
        xy[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
            n * 4, 2
        )  # x1y1, x2y2, x1y2, x2y1  targets 坐标形式是[xmin, ymin, xmax, ymax]  下边这句话就是提取真实框的四个点
        xy = xy @ M.T  # transform T是矩阵转置,而不是上边用于仿射变换的矩阵T
        if perspective:
            xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8)  # rescale
        else:  # affine
            xy = xy[:, :2].reshape(n, 8)

        # create new boxes  得到新的真实框
        x = xy[:, [0, 2, 4, 6]]
        y = xy[:, [1, 3, 5, 7]]
        xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T

        # clip boxes  将超出图像边界的真实框的坐标赋予0或图像边长
        xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
        xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)

        # filter candidates 筛选掉过于狭窄,高或宽小于2, 处理之后的真实框的面积要比处理之前真实框的面积<=0.1的真实框
        i = box_candidates(box1=targets[:, :4].T * s, box2=xy.T)
        targets = targets[i]
        targets[:, :4] = xy[i]    # targets[:, 1:5] = xy[i]

    return img, targets

YOLOX-main/yolox/data/datasets/mosaicdetection.py 中加入from ..data_augment import random_affine, random_perspective_rotation_scale 头文件

在这里插入图片描述
在这里插入图片描述

2.修改loss_obj为focal_loss

YOLOX-main/yolox/models/yolo_head.py 修改loss_obj为focal_loss #131、397
在这里插入图片描述

   self.focal_loss = Focal_Loss(gamma=2, alpha=0.8)

 
#         loss_obj = (                                                                # obj_loss 666s
#             self.bcewithlog_loss(obj_preds.view(-1, 1), obj_targets)      
#         ).sum() / num_fg
#注释掉
loss_obj = self.focal_loss(obj_preds.view(-1, 1), obj_targets).sum() / num_fg  # add focal_loss

YOLOX-main/yolox/models/losses.py #55 加入focal_loss定义函数

class Focal_Loss(nn.Module):
    def __init__(self, loss_fcn=nn.BCEWithLogitsLoss(reduction="none"), gamma=2, alpha=0.25):
        super(Focal_Loss, self).__init__()
        self.loss_fcn = loss_fcn  # must be nn.BCEWithLogitsLoss()
        self.gamma = gamma
        self.alpha = alpha
        self.reduction = self.loss_fcn.reduction

    def forward(self, pred, true):
#         import pdb;pdb.set_trace()
        loss = self.loss_fcn(pred, true)
        pred_prob = torch.sigmoid(pred)  # prob from logits
        p_t = true * pred_prob + (1 - true) * (1 - pred_prob)
        alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
        modulating_factor = (1.0 - p_t) ** self.gamma
        loss *= alpha_factor * modulating_factor
        
        if self.reduction == 'mean':
            return loss.mean()
        elif self.reduction == 'sum':
            return loss.sum()
        else: 
            return loss