[yolox优化] 提升ap
1.增加尺度变换
去掉马赛克之后,就什么数据增强都没有了,需要增加尺度变换
YOLOX-main/yolox/data/data_augment.py 增加一个函数 #163行
def random_perspective_rotation_scale( #新加的尺度变换
img,
targets=(),
degrees=10,
scale=0.1,
perspective=0.0,
border=(0, 0),
):
# targets = [cls, xyxy]
height = img.shape[0] + border[0] * 2 # shape(h,w,c)
width = img.shape[1] + border[1] * 2
# Rotation and Scale 旋转和缩放
R = np.eye(3)
a = random.uniform(-degrees, degrees)
# a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
s = random.uniform(scale[0], scale[1]) ##### s = random.uniform(1 - scale, 1 + scale)
# s = 2 ** random.uniform(-scale, scale)
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) #图像旋转
M = R
###########################
# For Aug out of Mosaic
# s = 1.
# M = np.eye(3)
###########################
if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed 图像透视变换 cv2.warpPerspective
if perspective:
img = cv2.warpPerspective(
img, M, dsize=(width, height), borderValue=(114, 114, 114)
)
else: # affine
img = cv2.warpAffine(
img, M[:2], dsize=(width, height), borderValue=(114, 114, 114) #仿射变换函数
)
# Transform label coordinates # 相应的label也要转换
n = len(targets) # label数量 [num_labels, 5]
if n:
# warp points
xy = np.ones((n * 4, 3))
xy[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
n * 4, 2
) # x1y1, x2y2, x1y2, x2y1 targets 坐标形式是[xmin, ymin, xmax, ymax] 下边这句话就是提取真实框的四个点
xy = xy @ M.T # transform T是矩阵转置,而不是上边用于仿射变换的矩阵T
if perspective:
xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale
else: # affine
xy = xy[:, :2].reshape(n, 8)
# create new boxes 得到新的真实框
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# clip boxes 将超出图像边界的真实框的坐标赋予0或图像边长
xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
# filter candidates 筛选掉过于狭窄,高或宽小于2, 处理之后的真实框的面积要比处理之前真实框的面积<=0.1的真实框
i = box_candidates(box1=targets[:, :4].T * s, box2=xy.T)
targets = targets[i]
targets[:, :4] = xy[i] # targets[:, 1:5] = xy[i]
return img, targets
YOLOX-main/yolox/data/datasets/mosaicdetection.py 中加入from ..data_augment import random_affine, random_perspective_rotation_scale
头文件
2.修改loss_obj为focal_loss
YOLOX-main/yolox/models/yolo_head.py 修改loss_obj为focal_loss #131、397
self.focal_loss = Focal_Loss(gamma=2, alpha=0.8)
# loss_obj = ( # obj_loss 666s
# self.bcewithlog_loss(obj_preds.view(-1, 1), obj_targets)
# ).sum() / num_fg
#注释掉
loss_obj = self.focal_loss(obj_preds.view(-1, 1), obj_targets).sum() / num_fg # add focal_loss
YOLOX-main/yolox/models/losses.py #55 加入focal_loss定义函数
class Focal_Loss(nn.Module):
def __init__(self, loss_fcn=nn.BCEWithLogitsLoss(reduction="none"), gamma=2, alpha=0.25):
super(Focal_Loss, self).__init__()
self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()
self.gamma = gamma
self.alpha = alpha
self.reduction = self.loss_fcn.reduction
def forward(self, pred, true):
# import pdb;pdb.set_trace()
loss = self.loss_fcn(pred, true)
pred_prob = torch.sigmoid(pred) # prob from logits
p_t = true * pred_prob + (1 - true) * (1 - pred_prob)
alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
modulating_factor = (1.0 - p_t) ** self.gamma
loss *= alpha_factor * modulating_factor
if self.reduction == 'mean':
return loss.mean()
elif self.reduction == 'sum':
return loss.sum()
else:
return loss