bbox_regression.py
Go to the documentation of this file.
00001 # --------------------------------------------------------
00002 # Deformable Convolutional Networks
00003 # Copyright (c) 2016 by Contributors
00004 # Copyright (c) 2017 Microsoft
00005 # Licensed under The Apache-2.0 License [see LICENSE for details]
00006 # Modified by Yuwen Xiong, from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
00007 # --------------------------------------------------------
00008 
00009 
00010 """
00011 This file has functions about generating bounding box regression targets
00012 """
00013 
00014 import numpy as np
00015 
00016 from bbox_transform import bbox_overlaps, bbox_transform
00017 
00018 
00019 def compute_bbox_regression_targets(rois, overlaps, labels, cfg):
00020     """
00021     given rois, overlaps, gt labels, compute bounding box regression targets
00022     :param rois: roidb[i]['boxes'] k * 4
00023     :param overlaps: roidb[i]['max_overlaps'] k * 1
00024     :param labels: roidb[i]['max_classes'] k * 1
00025     :return: targets[i][class, dx, dy, dw, dh] k * 5
00026     """
00027     # Ensure ROIs are floats
00028     rois = rois.astype(np.float, copy=False)
00029 
00030     # Sanity check
00031     if len(rois) != len(overlaps):
00032         print 'bbox regression: this should not happen'
00033 
00034     # Indices of ground-truth ROIs
00035     gt_inds = np.where(overlaps == 1)[0]
00036     if len(gt_inds) == 0:
00037         print 'something wrong : zero ground truth rois'
00038     # Indices of examples for which we try to make predictions
00039     ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_REGRESSION_THRESH)[0]
00040 
00041     # Get IoU overlap between each ex ROI and gt ROI
00042     ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :])
00043 
00044     # Find which gt ROI each ex ROI has max overlap with:
00045     # this will be the ex ROI's gt target
00046     gt_assignment = ex_gt_overlaps.argmax(axis=1)
00047     gt_rois = rois[gt_inds[gt_assignment], :]
00048     ex_rois = rois[ex_inds, :]
00049 
00050     targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
00051     targets[ex_inds, 0] = labels[ex_inds]
00052     targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
00053     return targets
00054 
00055 
00056 def add_bbox_regression_targets(roidb, cfg):
00057     """
00058     given roidb, add ['bbox_targets'] and normalize bounding box regression targets
00059     :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb
00060     :return: means, std variances of targets
00061     """
00062     print 'add bounding box regression targets'
00063     assert len(roidb) > 0
00064     assert 'max_classes' in roidb[0]
00065 
00066     num_images = len(roidb)
00067     num_classes = 2 if cfg.CLASS_AGNOSTIC else roidb[0]['gt_overlaps'].shape[1]
00068 
00069     for im_i in range(num_images):
00070         rois = roidb[im_i]['boxes']
00071         max_overlaps = roidb[im_i]['max_overlaps']
00072         max_classes = roidb[im_i]['max_classes']
00073         roidb[im_i]['bbox_targets'] = compute_bbox_regression_targets(rois, max_overlaps, max_classes, cfg)
00074 
00075     if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
00076         # use fixed / precomputed means and stds instead of empirical values
00077         means = np.tile(np.array(cfg.TRAIN.BBOX_MEANS), (num_classes, 1))
00078         stds = np.tile(np.array(cfg.TRAIN.BBOX_STDS), (num_classes, 1))
00079     else:
00080         # compute mean, std values
00081         class_counts = np.zeros((num_classes, 1)) + 1e-14
00082         sums = np.zeros((num_classes, 4))
00083         squared_sums = np.zeros((num_classes, 4))
00084         for im_i in range(num_images):
00085             targets = roidb[im_i]['bbox_targets']
00086             for cls in range(1, num_classes):
00087                 cls_indexes = np.where(targets[:, 0] > 0)[0] if cfg.CLASS_AGNOSTIC else np.where(targets[:, 0] == cls)[0]
00088                 if cls_indexes.size > 0:
00089                     class_counts[cls] += cls_indexes.size
00090                     sums[cls, :] += targets[cls_indexes, 1:].sum(axis=0)
00091                     squared_sums[cls, :] += (targets[cls_indexes, 1:] ** 2).sum(axis=0)
00092 
00093         means = sums / class_counts
00094         # var(x) = E(x^2) - E(x)^2
00095         stds = np.sqrt(squared_sums / class_counts - means ** 2)
00096 
00097     print 'bbox target means:'
00098     print means
00099     print means[1:, :].mean(axis=0)  # ignore bg class
00100     print 'bbox target stdevs:'
00101     print stds
00102     print stds[1:, :].mean(axis=0)  # ignore bg class
00103 
00104 
00105     # normalized targets
00106     for im_i in range(num_images):
00107         targets = roidb[im_i]['bbox_targets']
00108         for cls in range(1, num_classes):
00109             cls_indexes = np.where(targets[:, 0] > 0) if cfg.CLASS_AGNOSTIC else np.where(targets[:, 0] == cls)[0]
00110             roidb[im_i]['bbox_targets'][cls_indexes, 1:] -= means[cls, :]
00111             roidb[im_i]['bbox_targets'][cls_indexes, 1:] /= stds[cls, :]
00112 
00113     return means.ravel(), stds.ravel()
00114 
00115 
00116 def expand_bbox_regression_targets(bbox_targets_data, num_classes, cfg):
00117     """
00118     expand from 5 to 4 * num_classes; only the right class has non-zero bbox regression targets
00119     :param bbox_targets_data: [k * 5]
00120     :param num_classes: number of classes
00121     :return: bbox target processed [k * 4 num_classes]
00122     bbox_weights ! only foreground boxes have bbox regression computation!
00123     """
00124     classes = bbox_targets_data[:, 0]
00125     if cfg.CLASS_AGNOSTIC:
00126         num_classes = 2
00127     bbox_targets = np.zeros((classes.size, 4 * num_classes), dtype=np.float32)
00128     bbox_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
00129     indexes = np.where(classes > 0)[0]
00130     for index in indexes:
00131         cls = classes[index]
00132         start = int(4 * 1 if cls > 0 else 0) if cfg.CLASS_AGNOSTIC else int(4 * cls)
00133         end = start + 4
00134         bbox_targets[index, start:end] = bbox_targets_data[index, 1:]
00135         bbox_weights[index, start:end] = cfg.TRAIN.BBOX_WEIGHTS
00136     return bbox_targets, bbox_weights
00137 


rail_object_detector
Author(s):
autogenerated on Sat Jun 8 2019 20:26:29