00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 """
00011 This file has functions about generating bounding box regression targets
00012 """
00013
00014 import numpy as np
00015
00016 from bbox_transform import bbox_overlaps, bbox_transform
00017
00018
00019 def compute_bbox_regression_targets(rois, overlaps, labels, cfg):
00020 """
00021 given rois, overlaps, gt labels, compute bounding box regression targets
00022 :param rois: roidb[i]['boxes'] k * 4
00023 :param overlaps: roidb[i]['max_overlaps'] k * 1
00024 :param labels: roidb[i]['max_classes'] k * 1
00025 :return: targets[i][class, dx, dy, dw, dh] k * 5
00026 """
00027
00028 rois = rois.astype(np.float, copy=False)
00029
00030
00031 if len(rois) != len(overlaps):
00032 print 'bbox regression: this should not happen'
00033
00034
00035 gt_inds = np.where(overlaps == 1)[0]
00036 if len(gt_inds) == 0:
00037 print 'something wrong : zero ground truth rois'
00038
00039 ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_REGRESSION_THRESH)[0]
00040
00041
00042 ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :])
00043
00044
00045
00046 gt_assignment = ex_gt_overlaps.argmax(axis=1)
00047 gt_rois = rois[gt_inds[gt_assignment], :]
00048 ex_rois = rois[ex_inds, :]
00049
00050 targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
00051 targets[ex_inds, 0] = labels[ex_inds]
00052 targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
00053 return targets
00054
00055
00056 def add_bbox_regression_targets(roidb, cfg):
00057 """
00058 given roidb, add ['bbox_targets'] and normalize bounding box regression targets
00059 :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb
00060 :return: means, std variances of targets
00061 """
00062 print 'add bounding box regression targets'
00063 assert len(roidb) > 0
00064 assert 'max_classes' in roidb[0]
00065
00066 num_images = len(roidb)
00067 num_classes = 2 if cfg.CLASS_AGNOSTIC else roidb[0]['gt_overlaps'].shape[1]
00068
00069 for im_i in range(num_images):
00070 rois = roidb[im_i]['boxes']
00071 max_overlaps = roidb[im_i]['max_overlaps']
00072 max_classes = roidb[im_i]['max_classes']
00073 roidb[im_i]['bbox_targets'] = compute_bbox_regression_targets(rois, max_overlaps, max_classes, cfg)
00074
00075 if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
00076
00077 means = np.tile(np.array(cfg.TRAIN.BBOX_MEANS), (num_classes, 1))
00078 stds = np.tile(np.array(cfg.TRAIN.BBOX_STDS), (num_classes, 1))
00079 else:
00080
00081 class_counts = np.zeros((num_classes, 1)) + 1e-14
00082 sums = np.zeros((num_classes, 4))
00083 squared_sums = np.zeros((num_classes, 4))
00084 for im_i in range(num_images):
00085 targets = roidb[im_i]['bbox_targets']
00086 for cls in range(1, num_classes):
00087 cls_indexes = np.where(targets[:, 0] > 0)[0] if cfg.CLASS_AGNOSTIC else np.where(targets[:, 0] == cls)[0]
00088 if cls_indexes.size > 0:
00089 class_counts[cls] += cls_indexes.size
00090 sums[cls, :] += targets[cls_indexes, 1:].sum(axis=0)
00091 squared_sums[cls, :] += (targets[cls_indexes, 1:] ** 2).sum(axis=0)
00092
00093 means = sums / class_counts
00094
00095 stds = np.sqrt(squared_sums / class_counts - means ** 2)
00096
00097 print 'bbox target means:'
00098 print means
00099 print means[1:, :].mean(axis=0)
00100 print 'bbox target stdevs:'
00101 print stds
00102 print stds[1:, :].mean(axis=0)
00103
00104
00105
00106 for im_i in range(num_images):
00107 targets = roidb[im_i]['bbox_targets']
00108 for cls in range(1, num_classes):
00109 cls_indexes = np.where(targets[:, 0] > 0) if cfg.CLASS_AGNOSTIC else np.where(targets[:, 0] == cls)[0]
00110 roidb[im_i]['bbox_targets'][cls_indexes, 1:] -= means[cls, :]
00111 roidb[im_i]['bbox_targets'][cls_indexes, 1:] /= stds[cls, :]
00112
00113 return means.ravel(), stds.ravel()
00114
00115
00116 def expand_bbox_regression_targets(bbox_targets_data, num_classes, cfg):
00117 """
00118 expand from 5 to 4 * num_classes; only the right class has non-zero bbox regression targets
00119 :param bbox_targets_data: [k * 5]
00120 :param num_classes: number of classes
00121 :return: bbox target processed [k * 4 num_classes]
00122 bbox_weights ! only foreground boxes have bbox regression computation!
00123 """
00124 classes = bbox_targets_data[:, 0]
00125 if cfg.CLASS_AGNOSTIC:
00126 num_classes = 2
00127 bbox_targets = np.zeros((classes.size, 4 * num_classes), dtype=np.float32)
00128 bbox_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
00129 indexes = np.where(classes > 0)[0]
00130 for index in indexes:
00131 cls = classes[index]
00132 start = int(4 * 1 if cls > 0 else 0) if cfg.CLASS_AGNOSTIC else int(4 * cls)
00133 end = start + 4
00134 bbox_targets[index, start:end] = bbox_targets_data[index, 1:]
00135 bbox_weights[index, start:end] = cfg.TRAIN.BBOX_WEIGHTS
00136 return bbox_targets, bbox_weights
00137