34 from chainer
import cuda
35 from chainer
import function
36 from chainer.utils
import type_check
40 start = int(numpy.floor(size * stride))
41 end = int(numpy.ceil((size + 1) * stride))
43 start = min(max(start + roi_offset, 0), max_size)
44 end = min(max(end + roi_offset, 0), max_size)
46 return slice(start, end), end - start
51 """RoI pooling over a set of 2d planes.""" 54 self.outh, self.
outw = outh, outw
58 type_check.expect(in_types.size() == 2)
60 x_type, roi_type = in_types
62 x_type.dtype == numpy.float32,
64 roi_type.dtype == numpy.float32,
66 roi_type.shape[1] == 5,
70 bottom_data, bottom_rois = inputs
71 n_rois, channels, height, width = bottom_data.shape
72 top_data = numpy.empty((n_rois, channels, self.outh, self.
outw),
74 self.
argmax_data = numpy.empty_like(top_data).astype(numpy.int32)
76 for i_roi
in six.moves.range(n_rois):
77 idx, xmin, ymin, xmax, ymax = bottom_rois[i_roi]
82 roi_width = max(xmax - xmin + 1, 1)
83 roi_height = max(ymax - ymin + 1, 1)
84 strideh = 1. * roi_height / self.outh
85 stridew = 1. * roi_width / self.
outw 87 for outh
in six.moves.range(self.outh):
89 outh, strideh, height, ymin)
90 if sliceh.stop <= sliceh.start:
92 for outw
in six.moves.range(self.
outw):
94 outw, stridew, width, xmin)
95 if slicew.stop <= slicew.start:
97 roi_data = bottom_data[int(idx), :, sliceh, slicew]\
98 .reshape(channels, -1)
99 top_data[i_roi, :, outh, outw] =\
100 numpy.max(roi_data, axis=1)
103 max_idx_slice = numpy.unravel_index(
104 numpy.argmax(roi_data, axis=1), (lenh, lenw))
105 max_idx_slice_h = max_idx_slice[0] + sliceh.start
106 max_idx_slice_w = max_idx_slice[1] + slicew.start
107 max_idx_slice = max_idx_slice_h * width + max_idx_slice_w
108 self.
argmax_data[i_roi, :, outh, outw] = max_idx_slice
112 bottom_data, bottom_rois = inputs
113 channels, height, width = bottom_data.shape[1:]
114 n_rois = bottom_rois.shape[0]
115 top_data = cuda.cupy.empty((n_rois, channels, self.outh,
116 self.
outw), dtype=numpy.float32)
117 self.
argmax_data = cuda.cupy.empty_like(top_data).astype(numpy.int32)
118 cuda.cupy.ElementwiseKernel(
120 raw float32 bottom_data, float32 spatial_scale, int32 channels, 121 int32 height, int32 width, int32 pooled_height, int32 pooled_width, 122 raw float32 bottom_rois 124 'float32 top_data, int32 argmax_data',
126 // pos in output filter 127 int pw = i % pooled_width; 128 int ph = (i / pooled_width) % pooled_height; 129 int c = (i / pooled_width / pooled_height) % channels; 130 int num = i / pooled_width / pooled_height / channels; 132 int roi_batch_ind = bottom_rois[num * 5 + 0]; 133 int roi_start_w = round(bottom_rois[num * 5 + 1] * spatial_scale); 134 int roi_start_h = round(bottom_rois[num * 5 + 2] * spatial_scale); 135 int roi_end_w = round(bottom_rois[num * 5 + 3] * spatial_scale); 136 int roi_end_h = round(bottom_rois[num * 5 + 4] * spatial_scale); 138 // Force malformed ROIs to be 1x1 139 int roi_width = max(roi_end_w - roi_start_w + 1, 1); 140 int roi_height = max(roi_end_h - roi_start_h + 1, 1); 141 float bin_size_h = static_cast<float>(roi_height) 142 / static_cast<float>(pooled_height); 143 float bin_size_w = static_cast<float>(roi_width) 144 / static_cast<float>(pooled_width); 146 int hstart = static_cast<int>(floor(static_cast<float>(ph) 148 int wstart = static_cast<int>(floor(static_cast<float>(pw) 150 int hend = static_cast<int>(ceil(static_cast<float>(ph + 1) 152 int wend = static_cast<int>(ceil(static_cast<float>(pw + 1) 155 // Add roi offsets and clip to input boundaries 156 hstart = min(max(hstart + roi_start_h, 0), height); 157 hend = min(max(hend + roi_start_h, 0), height); 158 wstart = min(max(wstart + roi_start_w, 0), width); 159 wend = min(max(wend + roi_start_w, 0), width); 160 bool is_empty = (hend <= hstart) || (wend <= wstart); 162 // Define an empty pooling region to be zero 163 float maxval = is_empty ? 0 : -1E+37; 164 // If nothing is pooled, argmax=-1 causes nothing to be backprop'd 166 int data_offset = (roi_batch_ind * channels + c) * height * width; 167 for (int h = hstart; h < hend; ++h) { 168 for (int w = wstart; w < wend; ++w) { 169 int bottom_index = h * width + w; 170 if (bottom_data[data_offset + bottom_index] > maxval) { 171 maxval = bottom_data[data_offset + bottom_index]; 172 maxidx = bottom_index; 177 argmax_data = maxidx; 178 ''',
'roi_poolig_2d_fwd' 180 self.outh, self.
outw, bottom_rois, top_data,
186 bottom_data, bottom_rois = inputs
187 n_rois, channels, height, width = bottom_data.shape
188 bottom_delta = numpy.zeros_like(bottom_data, dtype=numpy.float32)
190 for i_roi
in six.moves.range(n_rois):
191 idx, xmin, ymin, xmax, ymax = bottom_rois[i_roi]
197 roi_width = max(xmax - xmin + 1, 1)
198 roi_height = max(ymax - ymin + 1, 1)
200 strideh = float(roi_height) / float(self.outh)
201 stridew = float(roi_width) / float(self.
outw)
204 for w
in six.moves.range(xmin, xmax + 1):
205 for h
in six.moves.range(ymin, ymax + 1):
206 phstart = int(numpy.floor(float(h - ymin) / strideh))
207 phend = int(numpy.ceil(float(h - ymin + 1) / strideh))
208 pwstart = int(numpy.floor(float(w - xmin) / stridew))
209 pwend = int(numpy.ceil(float(w - xmin + 1) / stridew))
211 phstart = min(max(phstart, 0), self.outh)
212 phend = min(max(phend, 0), self.outh)
213 pwstart = min(max(pwstart, 0), self.
outw)
214 pwend = min(max(pwend, 0), self.
outw)
216 for ph
in six.moves.range(phstart, phend):
217 for pw
in six.moves.range(pwstart, pwend):
219 for c
in six.moves.range(channels):
220 if max_idx_tmp[c] == (h * width + w):
221 bottom_delta[idx, c, h, w] += \
222 gy[0][i_roi, c, ph, pw]
223 return bottom_delta,
None 226 bottom_data, bottom_rois = inputs
227 channels, height, width = bottom_data.shape[1:]
228 bottom_diff = cuda.cupy.zeros_like(bottom_data, dtype=numpy.float32)
229 cuda.cupy.ElementwiseKernel(
231 raw float32 top_diff, raw int32 argmax_data, int32 num_rois, 232 float32 spatial_scale, int32 channels, int32 height, int32 width, 233 int32 pooled_height, int32 pooled_width, raw float32 bottom_rois 235 'float32 bottom_diff',
238 int h = (i / width) % height; 239 int c = (i / (width * height)) % channels; 240 int num = i / (width * height * channels); 243 // Accumulate gradient over all ROIs that pooled this element 244 for (int roi_n = 0; roi_n < num_rois; ++roi_n) { 245 // Skip if ROI's batch index doesn't match num 246 if (num != static_cast<int>(bottom_rois[roi_n * 5])) { 250 int roi_start_w = round(bottom_rois[roi_n * 5 + 1] 252 int roi_start_h = round(bottom_rois[roi_n * 5 + 2] 254 int roi_end_w = round(bottom_rois[roi_n * 5 + 3] 256 int roi_end_h = round(bottom_rois[roi_n * 5 + 4] 259 // Skip if ROI doesn't include (h, w) 260 const bool in_roi = (w >= roi_start_w && w <= roi_end_w && 261 h >= roi_start_h && h <= roi_end_h); 266 int offset = (roi_n * channels + c) * pooled_height 269 // Compute feasible set of pooled units that could have pooled 272 // Force malformed ROIs to be 1x1 273 int roi_width = max(roi_end_w - roi_start_w + 1, 1); 274 int roi_height = max(roi_end_h - roi_start_h + 1, 1); 276 float bin_size_h = static_cast<float>(roi_height) 277 / static_cast<float>(pooled_height); 278 float bin_size_w = static_cast<float>(roi_width) 279 / static_cast<float>(pooled_width); 281 int phstart = floor(static_cast<float>(h - roi_start_h) 283 int phend = ceil(static_cast<float>(h - roi_start_h + 1) 285 int pwstart = floor(static_cast<float>(w - roi_start_w) 287 int pwend = ceil(static_cast<float>(w - roi_start_w + 1) 290 phstart = min(max(phstart, 0), pooled_height); 291 phend = min(max(phend, 0), pooled_height); 292 pwstart = min(max(pwstart, 0), pooled_width); 293 pwend = min(max(pwend, 0), pooled_width); 295 for (int ph = phstart; ph < phend; ++ph) { 296 for (int pw = pwstart; pw < pwend; ++pw) { 297 int index_ = ph * pooled_width + pw + offset; 298 if (argmax_data[index_] == (h * width + w)) { 299 gradient += top_diff[index_]; 304 bottom_diff = gradient; 305 ''',
'roi_pooling_2d_bwd' 307 channels, height, width, self.outh, self.
outw,
308 bottom_rois, bottom_diff)
310 return bottom_diff,
None 314 """Spatial Region of Interest (ROI) pooling function. 316 This function acts similarly to :class:`~functions.MaxPooling2D`, but 317 it computes the maximum of input spatial patch for each channel 318 with the region of interest. 321 x (~chainer.Variable): Input variable. 322 rois (~chainer.Variable): Input roi variable. 323 outh (int): Height of output image after pooled. 324 outw (int): Width of output image after pooled. 325 spatial_scale (float): Scale of the roi is resized. 328 ~chainer.Variable: Ouptut variable. 330 See the original paper proposing ROIPooling: 331 `Fast R-CNN <http://arxiv.org/abs/1504.08083>`_. def backward_cpu(self, inputs, gy)
def __init__(self, outh, outw, spatial_scale)
def check_type_forward(self, in_types)
def _roi_pooling_slice(size, stride, max_size, roi_offset)
def forward_cpu(self, inputs)
def roi_pooling_2d(x, rois, outh, outw, spatial_scale)
def backward_gpu(self, inputs, gy)
def forward_gpu(self, inputs)