00001 """
00002 Proposal Operator transform anchor coordinates into ROI coordinates with prediction results on
00003 classification probability and bounding box prediction results, and image size and scale information.
00004 """
00005
00006 import mxnet as mx
00007 import numpy as np
00008 import numpy.random as npr
00009 from distutils.util import strtobool
00010
00011 from bbox.bbox_transform import bbox_pred, clip_boxes
00012 from rpn.generate_anchor import generate_anchors
00013 from nms.nms import py_nms_wrapper, cpu_nms_wrapper, gpu_nms_wrapper
00014
00015 DEBUG = False
00016
00017
00018 class ProposalOperator(mx.operator.CustomOp):
00019 def __init__(self, feat_stride, scales, ratios, output_score,
00020 rpn_pre_nms_top_n, rpn_post_nms_top_n, threshold, rpn_min_size):
00021 super(ProposalOperator, self).__init__()
00022 self._feat_stride = feat_stride
00023 self._scales = np.fromstring(scales[1:-1], dtype=float, sep=',')
00024 self._ratios = np.fromstring(ratios[1:-1], dtype=float, sep=',')
00025 self._anchors = generate_anchors(base_size=self._feat_stride, scales=self._scales, ratios=self._ratios)
00026 self._num_anchors = self._anchors.shape[0]
00027 self._output_score = output_score
00028 self._rpn_pre_nms_top_n = rpn_pre_nms_top_n
00029 self._rpn_post_nms_top_n = rpn_post_nms_top_n
00030 self._threshold = threshold
00031 self._rpn_min_size = rpn_min_size
00032
00033 if DEBUG:
00034 print 'feat_stride: {}'.format(self._feat_stride)
00035 print 'anchors:'
00036 print self._anchors
00037
00038 def forward(self, is_train, req, in_data, out_data, aux):
00039 nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id)
00040
00041 batch_size = in_data[0].shape[0]
00042 if batch_size > 1:
00043 raise ValueError("Sorry, multiple images each device is not implemented")
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056 pre_nms_topN = self._rpn_pre_nms_top_n
00057 post_nms_topN = self._rpn_post_nms_top_n
00058 min_size = self._rpn_min_size
00059
00060
00061
00062 scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :]
00063 bbox_deltas = in_data[1].asnumpy()
00064 im_info = in_data[2].asnumpy()[0, :]
00065
00066 if DEBUG:
00067 print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
00068 print 'scale: {}'.format(im_info[2])
00069
00070
00071
00072 height, width = int(im_info[0] / self._feat_stride), int(im_info[1] / self._feat_stride)
00073
00074 if DEBUG:
00075 print 'score map size: {}'.format(scores.shape)
00076 print "resudial: {}".format((scores.shape[2] - height, scores.shape[3] - width))
00077
00078
00079 shift_x = np.arange(0, width) * self._feat_stride
00080 shift_y = np.arange(0, height) * self._feat_stride
00081 shift_x, shift_y = np.meshgrid(shift_x, shift_y)
00082 shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
00083
00084
00085
00086
00087
00088
00089
00090 A = self._num_anchors
00091 K = shifts.shape[0]
00092 anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
00093 anchors = anchors.reshape((K * A, 4))
00094
00095
00096
00097
00098
00099
00100
00101
00102 bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
00103 bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
00104
00105
00106
00107
00108
00109
00110 scores = self._clip_pad(scores, (height, width))
00111 scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
00112
00113
00114 proposals = bbox_pred(anchors, bbox_deltas)
00115
00116
00117 proposals = clip_boxes(proposals, im_info[:2])
00118
00119
00120
00121 keep = self._filter_boxes(proposals, min_size * im_info[2])
00122 proposals = proposals[keep, :]
00123 scores = scores[keep]
00124
00125
00126
00127 order = scores.ravel().argsort()[::-1]
00128 if pre_nms_topN > 0:
00129 order = order[:pre_nms_topN]
00130 proposals = proposals[order, :]
00131 scores = scores[order]
00132
00133
00134
00135
00136 det = np.hstack((proposals, scores)).astype(np.float32)
00137 keep = nms(det)
00138 if post_nms_topN > 0:
00139 keep = keep[:post_nms_topN]
00140
00141 if len(keep) < post_nms_topN:
00142 pad = npr.choice(keep, size=post_nms_topN - len(keep))
00143 keep = np.hstack((keep, pad))
00144 proposals = proposals[keep, :]
00145 scores = scores[keep]
00146
00147
00148
00149
00150 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
00151 blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
00152 self.assign(out_data[0], req[0], blob)
00153
00154 if self._output_score:
00155 self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
00156
00157 def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
00158 self.assign(in_grad[0], req[0], 0)
00159 self.assign(in_grad[1], req[1], 0)
00160 self.assign(in_grad[2], req[2], 0)
00161
00162 @staticmethod
00163 def _filter_boxes(boxes, min_size):
00164 """ Remove all boxes with any side smaller than min_size """
00165 ws = boxes[:, 2] - boxes[:, 0] + 1
00166 hs = boxes[:, 3] - boxes[:, 1] + 1
00167 keep = np.where((ws >= min_size) & (hs >= min_size))[0]
00168 return keep
00169
00170 @staticmethod
00171 def _clip_pad(tensor, pad_shape):
00172 """
00173 Clip boxes of the pad area.
00174 :param tensor: [n, c, H, W]
00175 :param pad_shape: [h, w]
00176 :return: [n, c, h, w]
00177 """
00178 H, W = tensor.shape[2:]
00179 h, w = pad_shape
00180
00181 if h < H or w < W:
00182 tensor = tensor[:, :, :h, :w].copy()
00183
00184 return tensor
00185
00186
00187 @mx.operator.register("proposal")
00188 class ProposalProp(mx.operator.CustomOpProp):
00189 def __init__(self, feat_stride='16', scales='(8, 16, 32)', ratios='(0.5, 1, 2)', output_score='False',
00190 rpn_pre_nms_top_n='6000', rpn_post_nms_top_n='300', threshold='0.3', rpn_min_size='16'):
00191 super(ProposalProp, self).__init__(need_top_grad=False)
00192 self._feat_stride = int(feat_stride)
00193 self._scales = scales
00194 self._ratios = ratios
00195 self._output_score = strtobool(output_score)
00196 self._rpn_pre_nms_top_n = int(rpn_pre_nms_top_n)
00197 self._rpn_post_nms_top_n = int(rpn_post_nms_top_n)
00198 self._threshold = float(threshold)
00199 self._rpn_min_size = int(rpn_min_size)
00200
00201 def list_arguments(self):
00202 return ['cls_prob', 'bbox_pred', 'im_info']
00203
00204 def list_outputs(self):
00205 if self._output_score:
00206 return ['output', 'score']
00207 else:
00208 return ['output']
00209
00210 def infer_shape(self, in_shape):
00211 cls_prob_shape = in_shape[0]
00212 bbox_pred_shape = in_shape[1]
00213 assert cls_prob_shape[0] == bbox_pred_shape[0], 'ROI number does not equal in cls and reg'
00214
00215 batch_size = cls_prob_shape[0]
00216 im_info_shape = (batch_size, 3)
00217 output_shape = (self._rpn_post_nms_top_n, 5)
00218 score_shape = (self._rpn_post_nms_top_n, 1)
00219
00220 if self._output_score:
00221 return [cls_prob_shape, bbox_pred_shape, im_info_shape], [output_shape, score_shape]
00222 else:
00223 return [cls_prob_shape, bbox_pred_shape, im_info_shape], [output_shape]
00224
00225 def create_operator(self, ctx, shapes, dtypes):
00226 return ProposalOperator(self._feat_stride, self._scales, self._ratios, self._output_score,
00227 self._rpn_pre_nms_top_n, self._rpn_post_nms_top_n, self._threshold, self._rpn_min_size)
00228
00229 def declare_backward_dependency(self, out_grad, in_data, out_data):
00230 return []