rail_object_detector: proposal.py Source File

Go to the documentation of this file.
00001 """
00002 Proposal Operator transform anchor coordinates into ROI coordinates with prediction results on
00003 classification probability and bounding box prediction results, and image size and scale information.
00004 """
00005 
00006 import mxnet as mx
00007 import numpy as np
00008 import numpy.random as npr
00009 from distutils.util import strtobool
00010 
00011 from bbox.bbox_transform import bbox_pred, clip_boxes
00012 from rpn.generate_anchor import generate_anchors
00013 from nms.nms import py_nms_wrapper, cpu_nms_wrapper, gpu_nms_wrapper
00014 
00015 DEBUG = False
00016 
00017 
00018 class ProposalOperator(mx.operator.CustomOp):
00019     def __init__(self, feat_stride, scales, ratios, output_score,
00020                  rpn_pre_nms_top_n, rpn_post_nms_top_n, threshold, rpn_min_size):
00021         super(ProposalOperator, self).__init__()
00022         self._feat_stride = feat_stride
00023         self._scales = np.fromstring(scales[1:-1], dtype=float, sep=',')
00024         self._ratios = np.fromstring(ratios[1:-1], dtype=float, sep=',')
00025         self._anchors = generate_anchors(base_size=self._feat_stride, scales=self._scales, ratios=self._ratios)
00026         self._num_anchors = self._anchors.shape[0]
00027         self._output_score = output_score
00028         self._rpn_pre_nms_top_n = rpn_pre_nms_top_n
00029         self._rpn_post_nms_top_n = rpn_post_nms_top_n
00030         self._threshold = threshold
00031         self._rpn_min_size = rpn_min_size
00032 
00033         if DEBUG:
00034             print 'feat_stride: {}'.format(self._feat_stride)
00035             print 'anchors:'
00036             print self._anchors
00037 
00038     def forward(self, is_train, req, in_data, out_data, aux):
00039         nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id)
00040 
00041         batch_size = in_data[0].shape[0]
00042         if batch_size > 1:
00043             raise ValueError("Sorry, multiple images each device is not implemented")
00044 
00045         # for each (H, W) location i
00046         #   generate A anchor boxes centered on cell i
00047         #   apply predicted bbox deltas at cell i to each of the A anchors
00048         # clip predicted boxes to image
00049         # remove predicted boxes with either height or width < threshold
00050         # sort all (proposal, score) pairs by score from highest to lowest
00051         # take top pre_nms_topN proposals before NMS
00052         # apply NMS with threshold 0.7 to remaining proposals
00053         # take after_nms_topN proposals after NMS
00054         # return the top proposals (-> RoIs top, scores top)
00055 
00056         pre_nms_topN = self._rpn_pre_nms_top_n
00057         post_nms_topN = self._rpn_post_nms_top_n
00058         min_size = self._rpn_min_size
00059 
00060         # the first set of anchors are background probabilities
00061         # keep the second part
00062         scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :]
00063         bbox_deltas = in_data[1].asnumpy()
00064         im_info = in_data[2].asnumpy()[0, :]
00065 
00066         if DEBUG:
00067             print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
00068             print 'scale: {}'.format(im_info[2])
00069 
00070         # 1. Generate proposals from bbox_deltas and shifted anchors
00071         # use real image size instead of padded feature map sizes
00072         height, width = int(im_info[0] / self._feat_stride), int(im_info[1] / self._feat_stride)
00073 
00074         if DEBUG:
00075             print 'score map size: {}'.format(scores.shape)
00076             print "resudial: {}".format((scores.shape[2] - height, scores.shape[3] - width))
00077 
00078         # Enumerate all shifts
00079         shift_x = np.arange(0, width) * self._feat_stride
00080         shift_y = np.arange(0, height) * self._feat_stride
00081         shift_x, shift_y = np.meshgrid(shift_x, shift_y)
00082         shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
00083 
00084         # Enumerate all shifted anchors:
00085         #
00086         # add A anchors (1, A, 4) to
00087         # cell K shifts (K, 1, 4) to get
00088         # shift anchors (K, A, 4)
00089         # reshape to (K*A, 4) shifted anchors
00090         A = self._num_anchors
00091         K = shifts.shape[0]
00092         anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
00093         anchors = anchors.reshape((K * A, 4))
00094 
00095         # Transpose and reshape predicted bbox transformations to get them
00096         # into the same order as the anchors:
00097         #
00098         # bbox deltas will be (1, 4 * A, H, W) format
00099         # transpose to (1, H, W, 4 * A)
00100         # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
00101         # in slowest to fastest order
00102         bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
00103         bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
00104 
00105         # Same story for the scores:
00106         #
00107         # scores are (1, A, H, W) format
00108         # transpose to (1, H, W, A)
00109         # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
00110         scores = self._clip_pad(scores, (height, width))
00111         scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
00112 
00113         # Convert anchors into proposals via bbox transformations
00114         proposals = bbox_pred(anchors, bbox_deltas)
00115 
00116         # 2. clip predicted boxes to image
00117         proposals = clip_boxes(proposals, im_info[:2])
00118 
00119         # 3. remove predicted boxes with either height or width < threshold
00120         # (NOTE: convert min_size to input image scale stored in im_info[2])
00121         keep = self._filter_boxes(proposals, min_size * im_info[2])
00122         proposals = proposals[keep, :]
00123         scores = scores[keep]
00124 
00125         # 4. sort all (proposal, score) pairs by score from highest to lowest
00126         # 5. take top pre_nms_topN (e.g. 6000)
00127         order = scores.ravel().argsort()[::-1]
00128         if pre_nms_topN > 0:
00129             order = order[:pre_nms_topN]
00130         proposals = proposals[order, :]
00131         scores = scores[order]
00132 
00133         # 6. apply nms (e.g. threshold = 0.7)
00134         # 7. take after_nms_topN (e.g. 300)
00135         # 8. return the top proposals (-> RoIs top)
00136         det = np.hstack((proposals, scores)).astype(np.float32)
00137         keep = nms(det)
00138         if post_nms_topN > 0:
00139             keep = keep[:post_nms_topN]
00140         # pad to ensure output size remains unchanged
00141         if len(keep) < post_nms_topN:
00142             pad = npr.choice(keep, size=post_nms_topN - len(keep))
00143             keep = np.hstack((keep, pad))
00144         proposals = proposals[keep, :]
00145         scores = scores[keep]
00146 
00147         # Output rois array
00148         # Our RPN implementation only supports a single input image, so all
00149         # batch inds are 0
00150         batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
00151         blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
00152         self.assign(out_data[0], req[0], blob)
00153 
00154         if self._output_score:
00155             self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
00156 
00157     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
00158         self.assign(in_grad[0], req[0], 0)
00159         self.assign(in_grad[1], req[1], 0)
00160         self.assign(in_grad[2], req[2], 0)
00161 
00162     @staticmethod
00163     def _filter_boxes(boxes, min_size):
00164         """ Remove all boxes with any side smaller than min_size """
00165         ws = boxes[:, 2] - boxes[:, 0] + 1
00166         hs = boxes[:, 3] - boxes[:, 1] + 1
00167         keep = np.where((ws >= min_size) & (hs >= min_size))[0]
00168         return keep
00169 
00170     @staticmethod
00171     def _clip_pad(tensor, pad_shape):
00172         """
00173         Clip boxes of the pad area.
00174         :param tensor: [n, c, H, W]
00175         :param pad_shape: [h, w]
00176         :return: [n, c, h, w]
00177         """
00178         H, W = tensor.shape[2:]
00179         h, w = pad_shape
00180 
00181         if h < H or w < W:
00182             tensor = tensor[:, :, :h, :w].copy()
00183 
00184         return tensor
00185 
00186 
00187 @mx.operator.register("proposal")
00188 class ProposalProp(mx.operator.CustomOpProp):
00189     def __init__(self, feat_stride='16', scales='(8, 16, 32)', ratios='(0.5, 1, 2)', output_score='False',
00190                  rpn_pre_nms_top_n='6000', rpn_post_nms_top_n='300', threshold='0.3', rpn_min_size='16'):
00191         super(ProposalProp, self).__init__(need_top_grad=False)
00192         self._feat_stride = int(feat_stride)
00193         self._scales = scales
00194         self._ratios = ratios
00195         self._output_score = strtobool(output_score)
00196         self._rpn_pre_nms_top_n = int(rpn_pre_nms_top_n)
00197         self._rpn_post_nms_top_n = int(rpn_post_nms_top_n)
00198         self._threshold = float(threshold)
00199         self._rpn_min_size = int(rpn_min_size)
00200 
00201     def list_arguments(self):
00202         return ['cls_prob', 'bbox_pred', 'im_info']
00203 
00204     def list_outputs(self):
00205         if self._output_score:
00206             return ['output', 'score']
00207         else:
00208             return ['output']
00209 
00210     def infer_shape(self, in_shape):
00211         cls_prob_shape = in_shape[0]
00212         bbox_pred_shape = in_shape[1]
00213         assert cls_prob_shape[0] == bbox_pred_shape[0], 'ROI number does not equal in cls and reg'
00214 
00215         batch_size = cls_prob_shape[0]
00216         im_info_shape = (batch_size, 3)
00217         output_shape = (self._rpn_post_nms_top_n, 5)
00218         score_shape = (self._rpn_post_nms_top_n, 1)
00219 
00220         if self._output_score:
00221             return [cls_prob_shape, bbox_pred_shape, im_info_shape], [output_shape, score_shape]
00222         else:
00223             return [cls_prob_shape, bbox_pred_shape, im_info_shape], [output_shape]
00224 
00225     def create_operator(self, ctx, shapes, dtypes):
00226         return ProposalOperator(self._feat_stride, self._scales, self._ratios, self._output_score,
00227                                 self._rpn_pre_nms_top_n, self._rpn_post_nms_top_n, self._threshold, self._rpn_min_size)
00228 
00229     def declare_backward_dependency(self, out_grad, in_data, out_data):
00230         return []