rcnn.py
Go to the documentation of this file.
00001 # --------------------------------------------------------
00002 # Deformable Convolutional Networks
00003 # Copyright (c) 2016 by Contributors
00004 # Copyright (c) 2017 Microsoft
00005 # Licensed under The Apache-2.0 License [see LICENSE for details]
00006 # Modified by Yuwen Xiong
00007 # --------------------------------------------------------
00008 """
00009 Fast R-CNN:
00010 data =
00011     {'data': [num_images, c, h, w],
00012     'rois': [num_rois, 5]}
00013 label =
00014     {'label': [num_rois],
00015     'bbox_target': [num_rois, 4 * num_classes],
00016     'bbox_weight': [num_rois, 4 * num_classes]}
00017 roidb extended format [image_index]
00018     ['image', 'height', 'width', 'flipped',
00019      'boxes', 'gt_classes', 'gt_overlaps', 'max_classes', 'max_overlaps', 'bbox_targets']
00020 """
00021 
00022 import numpy as np
00023 import numpy.random as npr
00024 
00025 from utils.image import get_image, tensor_vstack
00026 from bbox.bbox_transform import bbox_overlaps, bbox_transform
00027 from bbox.bbox_regression import expand_bbox_regression_targets
00028 
00029 
00030 def get_rcnn_testbatch(roidb, cfg):
00031     """
00032     return a dict of testbatch
00033     :param roidb: ['image', 'flipped'] + ['boxes']
00034     :return: data, label, im_info
00035     """
00036     # assert len(roidb) == 1, 'Single batch only'
00037     imgs, roidb = get_image(roidb, cfg)
00038     im_array = imgs
00039     im_info = [np.array([roidb[i]['im_info']], dtype=np.float32) for i in range(len(roidb))]
00040 
00041     im_rois = [roidb[i]['boxes'] for i in range(len(roidb))]
00042     rois = im_rois
00043     rois_array = [np.hstack((0 * np.ones((rois[i].shape[0], 1)), rois[i])) for i in range(len(rois))]
00044 
00045     data = [{'data': im_array[i],
00046              'rois': rois_array[i]} for i in range(len(roidb))]
00047     label = {}
00048 
00049     return data, label, im_info
00050 
00051 
00052 def get_rcnn_batch(roidb, cfg):
00053     """
00054     return a dict of multiple images
00055     :param roidb: a list of dict, whose length controls batch size
00056     ['images', 'flipped'] + ['gt_boxes', 'boxes', 'gt_overlap'] => ['bbox_targets']
00057     :return: data, label
00058     """
00059     num_images = len(roidb)
00060     imgs, roidb = get_image(roidb, cfg)
00061     im_array = tensor_vstack(imgs)
00062 
00063     assert cfg.TRAIN.BATCH_ROIS == -1 or cfg.TRAIN.BATCH_ROIS % cfg.TRAIN.BATCH_IMAGES == 0, \
00064         'BATCHIMAGES {} must divide BATCH_ROIS {}'.format(cfg.TRAIN.BATCH_IMAGES, cfg.TRAIN.BATCH_ROIS)
00065 
00066     if cfg.TRAIN.BATCH_ROIS == -1:
00067         rois_per_image = np.sum([iroidb['boxes'].shape[0] for iroidb in roidb])
00068         fg_rois_per_image = rois_per_image
00069     else:
00070         rois_per_image = cfg.TRAIN.BATCH_ROIS / cfg.TRAIN.BATCH_IMAGES
00071         fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image).astype(int)
00072 
00073     rois_array = list()
00074     labels_array = list()
00075     bbox_targets_array = list()
00076     bbox_weights_array = list()
00077 
00078     for im_i in range(num_images):
00079         roi_rec = roidb[im_i]
00080 
00081         # infer num_classes from gt_overlaps
00082         num_classes = roi_rec['gt_overlaps'].shape[1]
00083 
00084         # label = class RoI has max overlap with
00085         rois = roi_rec['boxes']
00086         labels = roi_rec['max_classes']
00087         overlaps = roi_rec['max_overlaps']
00088         bbox_targets = roi_rec['bbox_targets']
00089 
00090         im_rois, labels, bbox_targets, bbox_weights = \
00091             sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg,
00092                         labels, overlaps, bbox_targets)
00093 
00094         # project im_rois
00095         # do not round roi
00096         rois = im_rois
00097         batch_index = im_i * np.ones((rois.shape[0], 1))
00098         rois_array_this_image = np.hstack((batch_index, rois))
00099         rois_array.append(rois_array_this_image)
00100 
00101         # add labels
00102         labels_array.append(labels)
00103         bbox_targets_array.append(bbox_targets)
00104         bbox_weights_array.append(bbox_weights)
00105 
00106     rois_array = np.array(rois_array)
00107     labels_array = np.array(labels_array)
00108     bbox_targets_array = np.array(bbox_targets_array)
00109     bbox_weights_array = np.array(bbox_weights_array)
00110 
00111     data = {'data': im_array,
00112             'rois': rois_array}
00113     label = {'label': labels_array,
00114              'bbox_target': bbox_targets_array,
00115              'bbox_weight': bbox_weights_array}
00116 
00117     return data, label
00118 
00119 
00120 def sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg,
00121                 labels=None, overlaps=None, bbox_targets=None, gt_boxes=None):
00122     """
00123     generate random sample of ROIs comprising foreground and background examples
00124     :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index
00125     :param fg_rois_per_image: foreground roi number
00126     :param rois_per_image: total roi number
00127     :param num_classes: number of classes
00128     :param labels: maybe precomputed
00129     :param overlaps: maybe precomputed (max_overlaps)
00130     :param bbox_targets: maybe precomputed
00131     :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls)
00132     :return: (labels, rois, bbox_targets, bbox_weights)
00133     """
00134     if labels is None:
00135         overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float))
00136         gt_assignment = overlaps.argmax(axis=1)
00137         overlaps = overlaps.max(axis=1)
00138         labels = gt_boxes[gt_assignment, 4]
00139 
00140     # foreground RoI with FG_THRESH overlap
00141     fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
00142     # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs
00143     fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size)
00144     # Sample foreground regions without replacement
00145     if len(fg_indexes) > fg_rois_per_this_image:
00146         fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False)
00147 
00148     # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
00149     bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
00150     # Compute number of background RoIs to take from this image (guarding against there being fewer than desired)
00151     bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
00152     bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size)
00153     # Sample foreground regions without replacement
00154     if len(bg_indexes) > bg_rois_per_this_image:
00155         bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False)
00156 
00157     # indexes selected
00158     keep_indexes = np.append(fg_indexes, bg_indexes)
00159 
00160     # pad more to ensure a fixed minibatch size
00161     while keep_indexes.shape[0] < rois_per_image:
00162         gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0])
00163         gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False)
00164         keep_indexes = np.append(keep_indexes, gap_indexes)
00165 
00166     # select labels
00167     labels = labels[keep_indexes]
00168     # set labels of bg_rois to be 0
00169     labels[fg_rois_per_this_image:] = 0
00170     rois = rois[keep_indexes]
00171 
00172     # load or compute bbox_target
00173     if bbox_targets is not None:
00174         bbox_target_data = bbox_targets[keep_indexes, :]
00175     else:
00176         targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4])
00177         if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
00178             targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS))
00179                        / np.array(cfg.TRAIN.BBOX_STDS))
00180         bbox_target_data = np.hstack((labels[:, np.newaxis], targets))
00181 
00182     bbox_targets, bbox_weights = \
00183         expand_bbox_regression_targets(bbox_target_data, num_classes, cfg)
00184 
00185     return rois, labels, bbox_targets, bbox_weights
00186 


rail_object_detector
Author(s):
autogenerated on Sat Jun 8 2019 20:26:30