00001
00002
00003
00004
00005
00006
00007
00008 """
00009 Fast R-CNN:
00010 data =
00011 {'data': [num_images, c, h, w],
00012 'rois': [num_rois, 5]}
00013 label =
00014 {'label': [num_rois],
00015 'bbox_target': [num_rois, 4 * num_classes],
00016 'bbox_weight': [num_rois, 4 * num_classes]}
00017 roidb extended format [image_index]
00018 ['image', 'height', 'width', 'flipped',
00019 'boxes', 'gt_classes', 'gt_overlaps', 'max_classes', 'max_overlaps', 'bbox_targets']
00020 """
00021
00022 import numpy as np
00023 import numpy.random as npr
00024
00025 from utils.image import get_image, tensor_vstack
00026 from bbox.bbox_transform import bbox_overlaps, bbox_transform
00027 from bbox.bbox_regression import expand_bbox_regression_targets
00028
00029
00030 def get_rcnn_testbatch(roidb, cfg):
00031 """
00032 return a dict of testbatch
00033 :param roidb: ['image', 'flipped'] + ['boxes']
00034 :return: data, label, im_info
00035 """
00036
00037 imgs, roidb = get_image(roidb, cfg)
00038 im_array = imgs
00039 im_info = [np.array([roidb[i]['im_info']], dtype=np.float32) for i in range(len(roidb))]
00040
00041 im_rois = [roidb[i]['boxes'] for i in range(len(roidb))]
00042 rois = im_rois
00043 rois_array = [np.hstack((0 * np.ones((rois[i].shape[0], 1)), rois[i])) for i in range(len(rois))]
00044
00045 data = [{'data': im_array[i],
00046 'rois': rois_array[i]} for i in range(len(roidb))]
00047 label = {}
00048
00049 return data, label, im_info
00050
00051
00052 def get_rcnn_batch(roidb, cfg):
00053 """
00054 return a dict of multiple images
00055 :param roidb: a list of dict, whose length controls batch size
00056 ['images', 'flipped'] + ['gt_boxes', 'boxes', 'gt_overlap'] => ['bbox_targets']
00057 :return: data, label
00058 """
00059 num_images = len(roidb)
00060 imgs, roidb = get_image(roidb, cfg)
00061 im_array = tensor_vstack(imgs)
00062
00063 assert cfg.TRAIN.BATCH_ROIS == -1 or cfg.TRAIN.BATCH_ROIS % cfg.TRAIN.BATCH_IMAGES == 0, \
00064 'BATCHIMAGES {} must divide BATCH_ROIS {}'.format(cfg.TRAIN.BATCH_IMAGES, cfg.TRAIN.BATCH_ROIS)
00065
00066 if cfg.TRAIN.BATCH_ROIS == -1:
00067 rois_per_image = np.sum([iroidb['boxes'].shape[0] for iroidb in roidb])
00068 fg_rois_per_image = rois_per_image
00069 else:
00070 rois_per_image = cfg.TRAIN.BATCH_ROIS / cfg.TRAIN.BATCH_IMAGES
00071 fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image).astype(int)
00072
00073 rois_array = list()
00074 labels_array = list()
00075 bbox_targets_array = list()
00076 bbox_weights_array = list()
00077
00078 for im_i in range(num_images):
00079 roi_rec = roidb[im_i]
00080
00081
00082 num_classes = roi_rec['gt_overlaps'].shape[1]
00083
00084
00085 rois = roi_rec['boxes']
00086 labels = roi_rec['max_classes']
00087 overlaps = roi_rec['max_overlaps']
00088 bbox_targets = roi_rec['bbox_targets']
00089
00090 im_rois, labels, bbox_targets, bbox_weights = \
00091 sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg,
00092 labels, overlaps, bbox_targets)
00093
00094
00095
00096 rois = im_rois
00097 batch_index = im_i * np.ones((rois.shape[0], 1))
00098 rois_array_this_image = np.hstack((batch_index, rois))
00099 rois_array.append(rois_array_this_image)
00100
00101
00102 labels_array.append(labels)
00103 bbox_targets_array.append(bbox_targets)
00104 bbox_weights_array.append(bbox_weights)
00105
00106 rois_array = np.array(rois_array)
00107 labels_array = np.array(labels_array)
00108 bbox_targets_array = np.array(bbox_targets_array)
00109 bbox_weights_array = np.array(bbox_weights_array)
00110
00111 data = {'data': im_array,
00112 'rois': rois_array}
00113 label = {'label': labels_array,
00114 'bbox_target': bbox_targets_array,
00115 'bbox_weight': bbox_weights_array}
00116
00117 return data, label
00118
00119
00120 def sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg,
00121 labels=None, overlaps=None, bbox_targets=None, gt_boxes=None):
00122 """
00123 generate random sample of ROIs comprising foreground and background examples
00124 :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index
00125 :param fg_rois_per_image: foreground roi number
00126 :param rois_per_image: total roi number
00127 :param num_classes: number of classes
00128 :param labels: maybe precomputed
00129 :param overlaps: maybe precomputed (max_overlaps)
00130 :param bbox_targets: maybe precomputed
00131 :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls)
00132 :return: (labels, rois, bbox_targets, bbox_weights)
00133 """
00134 if labels is None:
00135 overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float))
00136 gt_assignment = overlaps.argmax(axis=1)
00137 overlaps = overlaps.max(axis=1)
00138 labels = gt_boxes[gt_assignment, 4]
00139
00140
00141 fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
00142
00143 fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size)
00144
00145 if len(fg_indexes) > fg_rois_per_this_image:
00146 fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False)
00147
00148
00149 bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
00150
00151 bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
00152 bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size)
00153
00154 if len(bg_indexes) > bg_rois_per_this_image:
00155 bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False)
00156
00157
00158 keep_indexes = np.append(fg_indexes, bg_indexes)
00159
00160
00161 while keep_indexes.shape[0] < rois_per_image:
00162 gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0])
00163 gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False)
00164 keep_indexes = np.append(keep_indexes, gap_indexes)
00165
00166
00167 labels = labels[keep_indexes]
00168
00169 labels[fg_rois_per_this_image:] = 0
00170 rois = rois[keep_indexes]
00171
00172
00173 if bbox_targets is not None:
00174 bbox_target_data = bbox_targets[keep_indexes, :]
00175 else:
00176 targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4])
00177 if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
00178 targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS))
00179 / np.array(cfg.TRAIN.BBOX_STDS))
00180 bbox_target_data = np.hstack((labels[:, np.newaxis], targets))
00181
00182 bbox_targets, bbox_weights = \
00183 expand_bbox_regression_targets(bbox_target_data, num_classes, cfg)
00184
00185 return rois, labels, bbox_targets, bbox_weights
00186