roi_pooling_2d.py
Go to the documentation of this file.
1 # Modified work:
2 # -----------------------------------------------------------------------------
3 # Copyright (c) 2015 Preferred Infrastructure, Inc.
4 # Copyright (c) 2015 Preferred Networks, Inc.
5 # -----------------------------------------------------------------------------
6 
7 # Original work of _roi_pooling_slice, forward_cpu and backward_cpu:
8 # -----------------------------------------------------------------------------
9 # Copyright 2014 Nervana Systems Inc.
10 # Licensed under the Apache License, Version 2.0 (the "License");
11 # you may not use this file except in compliance with the License.
12 # You may obtain a copy of the License at
13 #
14 # http://www.apache.org/licenses/LICENSE-2.0
15 #
16 # Unless required by applicable law or agreed to in writing, software
17 # distributed under the License is distributed on an "AS IS" BASIS,
18 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 # See the License for the specific language governing permissions and
20 # limitations under the License.
21 # -----------------------------------------------------------------------------
22 
23 # Original work of forward_gpu and backward_gpu:
24 # -----------------------------------------------------------------------------
25 # Fast R-CNN
26 # Copyright (c) 2015 Microsoft
27 # Licensed under The MIT License [see fast-rcnn/LICENSE for details]
28 # Written by Ross Girshick
29 # -----------------------------------------------------------------------------
30 
31 import numpy
32 import six
33 
34 from chainer import cuda
35 from chainer import function
36 from chainer.utils import type_check
37 
38 
39 def _roi_pooling_slice(size, stride, max_size, roi_offset):
40  start = int(numpy.floor(size * stride))
41  end = int(numpy.ceil((size + 1) * stride))
42 
43  start = min(max(start + roi_offset, 0), max_size)
44  end = min(max(end + roi_offset, 0), max_size)
45 
46  return slice(start, end), end - start
47 
48 
49 class ROIPooling2D(function.Function):
50 
51  """RoI pooling over a set of 2d planes."""
52 
53  def __init__(self, outh, outw, spatial_scale):
54  self.outh, self.outw = outh, outw
55  self.spatial_scale = spatial_scale
56 
57  def check_type_forward(self, in_types):
58  type_check.expect(in_types.size() == 2)
59 
60  x_type, roi_type = in_types
61  type_check.expect(
62  x_type.dtype == numpy.float32,
63  x_type.ndim == 4,
64  roi_type.dtype == numpy.float32,
65  roi_type.ndim == 2,
66  roi_type.shape[1] == 5,
67  )
68 
69  def forward_cpu(self, inputs):
70  bottom_data, bottom_rois = inputs
71  n_rois, channels, height, width = bottom_data.shape
72  top_data = numpy.empty((n_rois, channels, self.outh, self.outw),
73  dtype=numpy.float32)
74  self.argmax_data = numpy.empty_like(top_data).astype(numpy.int32)
75 
76  for i_roi in six.moves.range(n_rois):
77  idx, xmin, ymin, xmax, ymax = bottom_rois[i_roi]
78  xmin = int(round(xmin * self.spatial_scale))
79  xmax = int(round(xmax * self.spatial_scale))
80  ymin = int(round(ymin * self.spatial_scale))
81  ymax = int(round(ymax * self.spatial_scale))
82  roi_width = max(xmax - xmin + 1, 1)
83  roi_height = max(ymax - ymin + 1, 1)
84  strideh = 1. * roi_height / self.outh
85  stridew = 1. * roi_width / self.outw
86 
87  for outh in six.moves.range(self.outh):
88  sliceh, lenh = _roi_pooling_slice(
89  outh, strideh, height, ymin)
90  if sliceh.stop <= sliceh.start:
91  continue
92  for outw in six.moves.range(self.outw):
93  slicew, lenw = _roi_pooling_slice(
94  outw, stridew, width, xmin)
95  if slicew.stop <= slicew.start:
96  continue
97  roi_data = bottom_data[int(idx), :, sliceh, slicew]\
98  .reshape(channels, -1)
99  top_data[i_roi, :, outh, outw] =\
100  numpy.max(roi_data, axis=1)
101 
102  # get the max idx respect to feature_maps coordinates
103  max_idx_slice = numpy.unravel_index(
104  numpy.argmax(roi_data, axis=1), (lenh, lenw))
105  max_idx_slice_h = max_idx_slice[0] + sliceh.start
106  max_idx_slice_w = max_idx_slice[1] + slicew.start
107  max_idx_slice = max_idx_slice_h * width + max_idx_slice_w
108  self.argmax_data[i_roi, :, outh, outw] = max_idx_slice
109  return top_data,
110 
111  def forward_gpu(self, inputs):
112  bottom_data, bottom_rois = inputs
113  channels, height, width = bottom_data.shape[1:]
114  n_rois = bottom_rois.shape[0]
115  top_data = cuda.cupy.empty((n_rois, channels, self.outh,
116  self.outw), dtype=numpy.float32)
117  self.argmax_data = cuda.cupy.empty_like(top_data).astype(numpy.int32)
118  cuda.cupy.ElementwiseKernel(
119  '''
120  raw float32 bottom_data, float32 spatial_scale, int32 channels,
121  int32 height, int32 width, int32 pooled_height, int32 pooled_width,
122  raw float32 bottom_rois
123  ''',
124  'float32 top_data, int32 argmax_data',
125  '''
126  // pos in output filter
127  int pw = i % pooled_width;
128  int ph = (i / pooled_width) % pooled_height;
129  int c = (i / pooled_width / pooled_height) % channels;
130  int num = i / pooled_width / pooled_height / channels;
131 
132  int roi_batch_ind = bottom_rois[num * 5 + 0];
133  int roi_start_w = round(bottom_rois[num * 5 + 1] * spatial_scale);
134  int roi_start_h = round(bottom_rois[num * 5 + 2] * spatial_scale);
135  int roi_end_w = round(bottom_rois[num * 5 + 3] * spatial_scale);
136  int roi_end_h = round(bottom_rois[num * 5 + 4] * spatial_scale);
137 
138  // Force malformed ROIs to be 1x1
139  int roi_width = max(roi_end_w - roi_start_w + 1, 1);
140  int roi_height = max(roi_end_h - roi_start_h + 1, 1);
141  float bin_size_h = static_cast<float>(roi_height)
142  / static_cast<float>(pooled_height);
143  float bin_size_w = static_cast<float>(roi_width)
144  / static_cast<float>(pooled_width);
145 
146  int hstart = static_cast<int>(floor(static_cast<float>(ph)
147  * bin_size_h));
148  int wstart = static_cast<int>(floor(static_cast<float>(pw)
149  * bin_size_w));
150  int hend = static_cast<int>(ceil(static_cast<float>(ph + 1)
151  * bin_size_h));
152  int wend = static_cast<int>(ceil(static_cast<float>(pw + 1)
153  * bin_size_w));
154 
155  // Add roi offsets and clip to input boundaries
156  hstart = min(max(hstart + roi_start_h, 0), height);
157  hend = min(max(hend + roi_start_h, 0), height);
158  wstart = min(max(wstart + roi_start_w, 0), width);
159  wend = min(max(wend + roi_start_w, 0), width);
160  bool is_empty = (hend <= hstart) || (wend <= wstart);
161 
162  // Define an empty pooling region to be zero
163  float maxval = is_empty ? 0 : -1E+37;
164  // If nothing is pooled, argmax=-1 causes nothing to be backprop'd
165  int maxidx = -1;
166  int data_offset = (roi_batch_ind * channels + c) * height * width;
167  for (int h = hstart; h < hend; ++h) {
168  for (int w = wstart; w < wend; ++w) {
169  int bottom_index = h * width + w;
170  if (bottom_data[data_offset + bottom_index] > maxval) {
171  maxval = bottom_data[data_offset + bottom_index];
172  maxidx = bottom_index;
173  }
174  }
175  }
176  top_data = maxval;
177  argmax_data = maxidx;
178  ''', 'roi_poolig_2d_fwd'
179  )(bottom_data, self.spatial_scale, channels, height, width,
180  self.outh, self.outw, bottom_rois, top_data,
181  self.argmax_data)
182 
183  return top_data,
184 
185  def backward_cpu(self, inputs, gy):
186  bottom_data, bottom_rois = inputs
187  n_rois, channels, height, width = bottom_data.shape
188  bottom_delta = numpy.zeros_like(bottom_data, dtype=numpy.float32)
189 
190  for i_roi in six.moves.range(n_rois):
191  idx, xmin, ymin, xmax, ymax = bottom_rois[i_roi]
192  idx = int(idx)
193  xmin = int(round(xmin * self.spatial_scale))
194  xmax = int(round(xmax * self.spatial_scale))
195  ymin = int(round(ymin * self.spatial_scale))
196  ymax = int(round(ymax * self.spatial_scale))
197  roi_width = max(xmax - xmin + 1, 1)
198  roi_height = max(ymax - ymin + 1, 1)
199 
200  strideh = float(roi_height) / float(self.outh)
201  stridew = float(roi_width) / float(self.outw)
202 
203  # iterate all the w, h (from feature map) that fall into this ROIs
204  for w in six.moves.range(xmin, xmax + 1):
205  for h in six.moves.range(ymin, ymax + 1):
206  phstart = int(numpy.floor(float(h - ymin) / strideh))
207  phend = int(numpy.ceil(float(h - ymin + 1) / strideh))
208  pwstart = int(numpy.floor(float(w - xmin) / stridew))
209  pwend = int(numpy.ceil(float(w - xmin + 1) / stridew))
210 
211  phstart = min(max(phstart, 0), self.outh)
212  phend = min(max(phend, 0), self.outh)
213  pwstart = min(max(pwstart, 0), self.outw)
214  pwend = min(max(pwend, 0), self.outw)
215 
216  for ph in six.moves.range(phstart, phend):
217  for pw in six.moves.range(pwstart, pwend):
218  max_idx_tmp = self.argmax_data[i_roi, :, ph, pw]
219  for c in six.moves.range(channels):
220  if max_idx_tmp[c] == (h * width + w):
221  bottom_delta[idx, c, h, w] += \
222  gy[0][i_roi, c, ph, pw]
223  return bottom_delta, None
224 
225  def backward_gpu(self, inputs, gy):
226  bottom_data, bottom_rois = inputs
227  channels, height, width = bottom_data.shape[1:]
228  bottom_diff = cuda.cupy.zeros_like(bottom_data, dtype=numpy.float32)
229  cuda.cupy.ElementwiseKernel(
230  '''
231  raw float32 top_diff, raw int32 argmax_data, int32 num_rois,
232  float32 spatial_scale, int32 channels, int32 height, int32 width,
233  int32 pooled_height, int32 pooled_width, raw float32 bottom_rois
234  ''',
235  'float32 bottom_diff',
236  '''
237  int w = i % width;
238  int h = (i / width) % height;
239  int c = (i / (width * height)) % channels;
240  int num = i / (width * height * channels);
241 
242  float gradient = 0;
243  // Accumulate gradient over all ROIs that pooled this element
244  for (int roi_n = 0; roi_n < num_rois; ++roi_n) {
245  // Skip if ROI's batch index doesn't match num
246  if (num != static_cast<int>(bottom_rois[roi_n * 5])) {
247  continue;
248  }
249 
250  int roi_start_w = round(bottom_rois[roi_n * 5 + 1]
251  * spatial_scale);
252  int roi_start_h = round(bottom_rois[roi_n * 5 + 2]
253  * spatial_scale);
254  int roi_end_w = round(bottom_rois[roi_n * 5 + 3]
255  * spatial_scale);
256  int roi_end_h = round(bottom_rois[roi_n * 5 + 4]
257  * spatial_scale);
258 
259  // Skip if ROI doesn't include (h, w)
260  const bool in_roi = (w >= roi_start_w && w <= roi_end_w &&
261  h >= roi_start_h && h <= roi_end_h);
262  if (!in_roi) {
263  continue;
264  }
265 
266  int offset = (roi_n * channels + c) * pooled_height
267  * pooled_width;
268 
269  // Compute feasible set of pooled units that could have pooled
270  // this bottom unit
271 
272  // Force malformed ROIs to be 1x1
273  int roi_width = max(roi_end_w - roi_start_w + 1, 1);
274  int roi_height = max(roi_end_h - roi_start_h + 1, 1);
275 
276  float bin_size_h = static_cast<float>(roi_height)
277  / static_cast<float>(pooled_height);
278  float bin_size_w = static_cast<float>(roi_width)
279  / static_cast<float>(pooled_width);
280 
281  int phstart = floor(static_cast<float>(h - roi_start_h)
282  / bin_size_h);
283  int phend = ceil(static_cast<float>(h - roi_start_h + 1)
284  / bin_size_h);
285  int pwstart = floor(static_cast<float>(w - roi_start_w)
286  / bin_size_w);
287  int pwend = ceil(static_cast<float>(w - roi_start_w + 1)
288  / bin_size_w);
289 
290  phstart = min(max(phstart, 0), pooled_height);
291  phend = min(max(phend, 0), pooled_height);
292  pwstart = min(max(pwstart, 0), pooled_width);
293  pwend = min(max(pwend, 0), pooled_width);
294 
295  for (int ph = phstart; ph < phend; ++ph) {
296  for (int pw = pwstart; pw < pwend; ++pw) {
297  int index_ = ph * pooled_width + pw + offset;
298  if (argmax_data[index_] == (h * width + w)) {
299  gradient += top_diff[index_];
300  }
301  }
302  }
303  }
304  bottom_diff = gradient;
305  ''', 'roi_pooling_2d_bwd'
306  )(gy[0], self.argmax_data, bottom_rois.shape[0], self.spatial_scale,
307  channels, height, width, self.outh, self.outw,
308  bottom_rois, bottom_diff)
309 
310  return bottom_diff, None
311 
312 
313 def roi_pooling_2d(x, rois, outh, outw, spatial_scale):
314  """Spatial Region of Interest (ROI) pooling function.
315 
316  This function acts similarly to :class:`~functions.MaxPooling2D`, but
317  it computes the maximum of input spatial patch for each channel
318  with the region of interest.
319 
320  Args:
321  x (~chainer.Variable): Input variable.
322  rois (~chainer.Variable): Input roi variable.
323  outh (int): Height of output image after pooled.
324  outw (int): Width of output image after pooled.
325  spatial_scale (float): Scale of the roi is resized.
326 
327  Returns:
328  ~chainer.Variable: Ouptut variable.
329 
330  See the original paper proposing ROIPooling:
331  `Fast R-CNN <http://arxiv.org/abs/1504.08083>`_.
332 
333  """
334  return ROIPooling2D(outh, outw, spatial_scale)(x, rois)
def _roi_pooling_slice(size, stride, max_size, roi_offset)
def roi_pooling_2d(x, rois, outh, outw, spatial_scale)


jsk_recognition_utils
Author(s):
autogenerated on Mon May 3 2021 03:03:03