fcn8s_depth_predicition.py
Go to the documentation of this file.
1 import chainer
2 from chainer import cuda
3 import chainer.functions as F
4 import chainer.links as L
5 import fcn
6 import numpy as np
7 
8 
9 class FCN8sDepthPrediction(chainer.Chain):
10 
11  # [0.2, 3]
12  min_depth = 0.2
13  max_depth = 3.
14 
15  def __init__(self, n_class, masking=True, concat=True):
16  self.n_class = n_class
17  self.masking = masking
18  self.concat = concat
19  kwargs = {
20  'initialW': chainer.initializers.Zero(),
21  'initial_bias': chainer.initializers.Zero(),
22  }
23  super(self.__class__, self).__init__()
24  with self.init_scope():
25  self.conv_rgb_1_1 = L.Convolution2D(3, 64, 3, 1, 100, **kwargs)
26  self.conv_rgb_1_2 = L.Convolution2D(64, 64, 3, 1, 1, **kwargs)
27 
28  self.conv_rgb_2_1 = L.Convolution2D(64, 128, 3, 1, 1, **kwargs)
29  self.conv_rgb_2_2 = L.Convolution2D(128, 128, 3, 1, 1, **kwargs)
30 
31  self.conv_rgb_3_1 = L.Convolution2D(128, 256, 3, 1, 1, **kwargs)
32  self.conv_rgb_3_2 = L.Convolution2D(256, 256, 3, 1, 1, **kwargs)
33  self.conv_rgb_3_3 = L.Convolution2D(256, 256, 3, 1, 1, **kwargs)
34 
35  self.conv_rgb_4_1 = L.Convolution2D(256, 512, 3, 1, 1, **kwargs)
36  self.conv_rgb_4_2 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
37  self.conv_rgb_4_3 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
38 
39  self.conv_rgb_5_1 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
40  self.conv_rgb_5_2 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
41  self.conv_rgb_5_3 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
42 
43  self.rgb_fc6 = L.Convolution2D(512, 4096, 7, 1, 0, **kwargs)
44  self.rgb_fc7 = L.Convolution2D(4096, 4096, 1, 1, 0, **kwargs)
45 
46  self.mask_score_fr = L.Convolution2D(
47  4096, n_class, 1, 1, 0, **kwargs)
48 
49  self.mask_upscore2 = L.Deconvolution2D(
50  n_class, n_class, 4, 2, 0, nobias=True,
51  initialW=fcn.initializers.UpsamplingDeconvWeight())
52  self.mask_upscore8 = L.Deconvolution2D(
53  n_class, n_class, 16, 8, 0, nobias=True,
54  initialW=fcn.initializers.UpsamplingDeconvWeight())
55 
56  self.mask_score_pool3 = L.Convolution2D(
57  256, n_class, 1, 1, 0, **kwargs)
58  self.mask_score_pool4 = L.Convolution2D(
59  512, n_class, 1, 1, 0, **kwargs)
60 
61  self.mask_upscore_pool4 = L.Deconvolution2D(
62  n_class, n_class, 4, 2, 0, nobias=True,
63  initialW=fcn.initializers.UpsamplingDeconvWeight())
64 
65  self.conv_depth_1_1 = L.Convolution2D(3, 64, 3, 1, 100, **kwargs)
66  self.conv_depth_1_2 = L.Convolution2D(64, 64, 3, 1, 1, **kwargs)
67 
68  self.conv_depth_2_1 = L.Convolution2D(64, 128, 3, 1, 1, **kwargs)
69  self.conv_depth_2_2 = L.Convolution2D(128, 128, 3, 1, 1, **kwargs)
70 
71  self.conv_depth_3_1 = L.Convolution2D(128, 256, 3, 1, 1, **kwargs)
72  self.conv_depth_3_2 = L.Convolution2D(256, 256, 3, 1, 1, **kwargs)
73  self.conv_depth_3_3 = L.Convolution2D(256, 256, 3, 1, 1, **kwargs)
74 
75  self.conv_depth_4_1 = L.Convolution2D(256, 512, 3, 1, 1, **kwargs)
76  self.conv_depth_4_2 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
77  self.conv_depth_4_3 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
78 
79  self.conv_depth_5_1 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
80  self.conv_depth_5_2 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
81  self.conv_depth_5_3 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
82 
83  if self.concat is True:
84  self.concat_fc6 = L.Convolution2D(
85  1024, 4096, 7, 1, 0, **kwargs)
86  else:
87  self.depth_fc6 = L.Convolution2D(512, 4096, 7, 1, 0, **kwargs)
88 
89  self.concat_fc7 = L.Convolution2D(4096, 4096, 1, 1, 0, **kwargs)
90 
91  self.depth_score_fr = L.Convolution2D(4096, 1, 1, 1, 0, **kwargs)
92 
93  self.depth_upscore2 = L.Deconvolution2D(
94  1, 1, 4, 2, 0, nobias=True,
95  initialW=fcn.initializers.UpsamplingDeconvWeight())
96  self.depth_upscore8 = L.Deconvolution2D(
97  1, 1, 16, 8, 0, nobias=True,
98  initialW=fcn.initializers.UpsamplingDeconvWeight())
99 
100  self.depth_score_pool3 = L.Convolution2D(
101  256, 1, 1, 1, 0, **kwargs)
102  self.depth_score_pool4 = L.Convolution2D(
103  512, 1, 1, 1, 0, **kwargs)
104 
105  self.depth_upscore_pool4 = L.Deconvolution2D(
106  1, 1, 4, 2, 0, nobias=True,
107  initialW=fcn.initializers.UpsamplingDeconvWeight())
108 
109  def predict_mask(self, rgb, return_pool5=False):
110  # conv_rgb_1
111  h = F.relu(self.conv_rgb_1_1(rgb))
112  h = F.relu(self.conv_rgb_1_2(h))
113  h = F.max_pooling_2d(h, 2, stride=2, pad=0)
114  rgb_pool1 = h # 1/2
115 
116  # conv_rgb_2
117  h = F.relu(self.conv_rgb_2_1(rgb_pool1))
118  h = F.relu(self.conv_rgb_2_2(h))
119  h = F.max_pooling_2d(h, 2, stride=2, pad=0)
120  rgb_pool2 = h # 1/4
121 
122  # conv_rgb_3
123  h = F.relu(self.conv_rgb_3_1(rgb_pool2))
124  h = F.relu(self.conv_rgb_3_2(h))
125  h = F.relu(self.conv_rgb_3_3(h))
126  h = F.max_pooling_2d(h, 2, stride=2, pad=0)
127  rgb_pool3 = h # 1/8
128 
129  # conv_rgb_4
130  h = F.relu(self.conv_rgb_4_1(rgb_pool3))
131  h = F.relu(self.conv_rgb_4_2(h))
132  h = F.relu(self.conv_rgb_4_3(h))
133  h = F.max_pooling_2d(h, 2, stride=2, pad=0)
134  rgb_pool4 = h # 1/16
135 
136  # conv_rgb_5
137  h = F.relu(self.conv_rgb_5_1(rgb_pool4))
138  h = F.relu(self.conv_rgb_5_2(h))
139  h = F.relu(self.conv_rgb_5_3(h))
140  h = F.max_pooling_2d(h, 2, stride=2, pad=0)
141  rgb_pool5 = h # 1/32
142 
143  # rgb_fc6
144  h = F.relu(self.rgb_fc6(rgb_pool5))
145  h = F.dropout(h, ratio=.5)
146  rgb_fc6 = h # 1/32
147 
148  # rgb_fc7
149  h = F.relu(self.rgb_fc7(rgb_fc6))
150  h = F.dropout(h, ratio=.5)
151  rgb_fc7 = h # 1/32
152 
153  # mask_score_fr
154  h = self.mask_score_fr(rgb_fc7)
155  mask_score_fr = h # 1/32
156 
157  # mask_score_pool3
158  scale_rgb_pool3 = 0.0001 * rgb_pool3
159  h = self.mask_score_pool3(scale_rgb_pool3)
160  mask_score_pool3 = h # 1/8
161 
162  # mask_score_pool4
163  scale_rgb_pool4 = 0.01 * rgb_pool4
164  h = self.mask_score_pool4(scale_rgb_pool4)
165  mask_score_pool4 = h # 1/16
166 
167  # mask upscore2
168  h = self.mask_upscore2(mask_score_fr)
169  mask_upscore2 = h # 1/16
170 
171  # mask_score_pool4c
172  h = mask_score_pool4[:, :,
173  5:5 + mask_upscore2.data.shape[2],
174  5:5 + mask_upscore2.data.shape[3]]
175  mask_score_pool4c = h # 1/16
176 
177  # mask_fuse_pool4
178  h = mask_upscore2 + mask_score_pool4c
179  mask_fuse_pool4 = h # 1/16
180 
181  # mask_upscore_pool4
182  h = self.mask_upscore_pool4(mask_fuse_pool4)
183  mask_upscore_pool4 = h # 1/8
184 
185  # mask_score_pool3c
186  h = mask_score_pool3[:, :,
187  9:9 + mask_upscore_pool4.data.shape[2],
188  9:9 + mask_upscore_pool4.data.shape[3]]
189  mask_score_pool3c = h # 1/8
190 
191  # mask_fuse_pool3
192  h = mask_upscore_pool4 + mask_score_pool3c
193  mask_fuse_pool3 = h # 1/8
194 
195  # mask_upscore8
196  h = self.mask_upscore8(mask_fuse_pool3)
197  mask_upscore8 = h # 1/1
198 
199  # mask_score
200  h = mask_upscore8[:, :,
201  31:31 + rgb.shape[2], 31:31 + rgb.shape[3]]
202  mask_score = h # 1/1
203 
204  if return_pool5:
205  return mask_score, rgb_pool5
206  else:
207  return mask_score
208 
209  def predict_depth(self, rgb, mask_score, depth_viz, rgb_pool5):
210  # conv_depth_1
211  h = F.relu(self.conv_depth_1_1(depth_viz))
212  h = F.relu(self.conv_depth_1_2(h))
213  h = F.max_pooling_2d(h, 2, stride=2, pad=0)
214  depth_pool1 = h # 1/2
215 
216  # conv_depth_2
217  h = F.relu(self.conv_depth_2_1(depth_pool1))
218  h = F.relu(self.conv_depth_2_2(h))
219  h = F.max_pooling_2d(h, 2, stride=2, pad=0)
220  depth_pool2 = h # 1/4
221 
222  # conv_depth_3
223  h = F.relu(self.conv_depth_3_1(depth_pool2))
224  h = F.relu(self.conv_depth_3_2(h))
225  h = F.relu(self.conv_depth_3_3(h))
226  h = F.max_pooling_2d(h, 2, stride=2, pad=0)
227  depth_pool3 = h # 1/8
228 
229  # conv_depth_4
230  h = F.relu(self.conv_depth_4_1(depth_pool3))
231  h = F.relu(self.conv_depth_4_2(h))
232  h = F.relu(self.conv_depth_4_3(h))
233  h = F.max_pooling_2d(h, 2, stride=2, pad=0)
234  depth_pool4 = h # 1/16
235 
236  # conv_depth_5
237  h = F.relu(self.conv_depth_5_1(depth_pool4))
238  h = F.relu(self.conv_depth_5_2(h))
239  h = F.relu(self.conv_depth_5_3(h))
240  h = F.max_pooling_2d(h, 2, stride=2, pad=0)
241  depth_pool5 = h # 1/32
242 
243  if self.masking is True:
244  # Apply negative_mask to depth_pool5
245  # (N, C, H, W) -> (N, H, W)
246  mask_pred_tmp = F.argmax(self.mask_score, axis=1)
247  # (N, H, W) -> (N, 1, H, W), float required for resizing
248  mask_pred_tmp = mask_pred_tmp[:, None, :, :].data.astype(
249  self.xp.float32) # 1/1
250  resized_mask_pred = F.resize_images(
251  mask_pred_tmp,
252  (depth_pool5.shape[2], depth_pool5.shape[3])) # 1/32
253  depth_pool5_cp = depth_pool5
254  masked_depth_pool5 = depth_pool5_cp * \
255  (resized_mask_pred.data == 0.0).astype(self.xp.float32)
256  else:
257  masked_depth_pool5 = depth_pool5
258 
259  if self.concat is True:
260  # concatenate rgb_pool5 and depth_pool5
261  concat_pool5 = F.concat((rgb_pool5, masked_depth_pool5), axis=1)
262 
263  # concat_fc6
264  h = F.relu(self.concat_fc6(concat_pool5))
265  h = F.dropout(h, ratio=.5)
266  concat_fc6 = h # 1/32
267  else:
268  # concat_fc6
269  h = F.relu(self.depth_fc6(masked_depth_pool5))
270  h = F.dropout(h, ratio=.5)
271  concat_fc6 = h # 1/32
272 
273  # concat_fc7
274  h = F.relu(self.concat_fc7(concat_fc6))
275  h = F.dropout(h, ratio=.5)
276  concat_fc7 = h # 1/32
277 
278  # depth_score_fr
279  h = self.depth_score_fr(concat_fc7)
280  depth_score_fr = h # 1/32
281 
282  # depth_score_pool3
283  scale_depth_pool3 = 0.0001 * depth_pool3
284  h = self.depth_score_pool3(scale_depth_pool3)
285  depth_score_pool3 = h # 1/8
286 
287  # depth_score_pool4
288  scale_depth_pool4 = 0.01 * depth_pool4
289  h = self.depth_score_pool4(scale_depth_pool4)
290  depth_score_pool4 = h # 1/16
291 
292  # depth upscore2
293  h = self.depth_upscore2(depth_score_fr)
294  depth_upscore2 = h # 1/16
295 
296  # depth_score_pool4c
297  h = depth_score_pool4[:, :,
298  5:5 + depth_upscore2.data.shape[2],
299  5:5 + depth_upscore2.data.shape[3]]
300  depth_score_pool4c = h # 1/16
301 
302  # depth_fuse_pool4
303  h = depth_upscore2 + depth_score_pool4c
304  depth_fuse_pool4 = h # 1/16
305 
306  # depth_upscore_pool4
307  h = self.depth_upscore_pool4(depth_fuse_pool4)
308  depth_upscore_pool4 = h # 1/8
309 
310  # depth_score_pool3c
311  h = depth_score_pool3[:, :,
312  9:9 + depth_upscore_pool4.data.shape[2],
313  9:9 + depth_upscore_pool4.data.shape[3]]
314  depth_score_pool3c = h # 1/8
315 
316  # depth_fuse_pool3
317  h = depth_upscore_pool4 + depth_score_pool3c
318  depth_fuse_pool3 = h # 1/8
319 
320  # depth_upscore8
321  h = self.depth_upscore8(depth_fuse_pool3)
322  depth_upscore8 = h # 1/1
323 
324  # depth_score
325  h = depth_upscore8[:, :,
326  31:31 + rgb.shape[2],
327  31:31 + rgb.shape[3]]
328  depth_score = h # 1/1
329 
330  return depth_score
331 
332  def compute_loss_mask(self, mask_score, true_mask):
333  # segmentation loss
334  seg_loss = F.softmax_cross_entropy(
335  mask_score, true_mask, normalize=True)
336 
337  return seg_loss
338 
339  def compute_loss_depth(self, depth_pred, true_mask, true_depth):
340  assert true_mask.dtype == self.xp.int32
341 
342  # Whole region
343  keep_regardless_mask = ~self.xp.isnan(true_depth)
344  if self.xp.sum(keep_regardless_mask) == 0:
345  depth_loss_regardless_mask = 0
346  else:
347  depth_loss_regardless_mask = F.sum(F.log(F.cosh(
348  depth_pred[keep_regardless_mask[self.xp.newaxis, :, :, :]] -
349  true_depth[keep_regardless_mask])))
350  depth_loss_regardless_mask /= \
351  true_depth.shape[1] * true_depth.shape[2]
352 
353  # Only masked region
354  keep_only_mask = self.xp.logical_and(
355  true_mask > 0, ~self.xp.isnan(true_depth))
356  if self.xp.sum(keep_only_mask) == 0:
357  depth_loss_only_mask = 0
358  else:
359  depth_loss_only_mask = F.sum(F.log(F.cosh(
360  depth_pred[keep_only_mask[self.xp.newaxis, :, :, :]] -
361  true_depth[keep_only_mask])))
362  depth_loss_only_mask /= true_depth.shape[1] + true_depth.shape[2]
363 
364  # Regression loss
365  coef = [1, 1]
366  reg_loss = (coef[0] * depth_loss_regardless_mask +
367  coef[1] * depth_loss_only_mask)
368 
369  return reg_loss
370 
371  def compute_loss(self, mask_score, depth_pred, true_mask, true_depth):
372  seg_loss = self.compute_loss_mask(mask_score, true_mask)
373  reg_loss = self.compute_loss_depth(depth_pred, true_mask, true_depth)
374 
375  # Loss
376  coef = [1, 1]
377  loss = coef[0] * seg_loss + coef[1] * reg_loss
378  if self.xp.isnan(float(loss.data)):
379  raise ValueError('Loss is nan.')
380 
381  batch_size = len(mask_score)
382  assert batch_size == 1
383 
384  # GPU -> CPU
385  # N, C, H, W -> C, H, W
386  true_mask = cuda.to_cpu(true_mask)[0]
387  mask_pred = cuda.to_cpu(F.argmax(self.score_label, axis=1).data)[0]
388  true_depth = cuda.to_cpu(true_depth)[0]
389  depth_pred = cuda.to_cpu(depth_pred.data)[0]
390 
391  # Evaluate Mask IU
392  mask_iu = fcn.utils.label_accuracy_score(
393  [true_mask], [mask_pred], n_class=2)[2]
394 
395  # Evaluate Depth Accuracy
396  depth_acc = {}
397  for thresh in [0.01, 0.02, 0.03, 0.04, 0.05, 0.07, 0.10, 0.15, 0.20,
398  0.25, 0.30, 0.40, 0.50, 0.70, 1.00]:
399  t_lbl_fg = true_mask > 0
400  p_lbl_fg = mask_pred > 0
401  if np.sum(t_lbl_fg) == 0 and np.sum(p_lbl_fg) == 0:
402  acc = 1.0
403  elif np.sum(t_lbl_fg) == 0:
404  acc = 0.0
405  else:
406  # {TP and (|error| < thresh)} / (TP or FP or FN)
407  true_depth_cp = np.copy(true_depth)
408  true_depth_cp[np.isnan(true_depth_cp)] = np.inf
409  numer = np.sum(
410  np.logical_and(
411  np.logical_and(t_lbl_fg, p_lbl_fg),
412  np.abs(true_depth_cp - depth_pred) < thresh))
413  denom = np.sum(np.logical_or(t_lbl_fg, p_lbl_fg))
414  acc = 1. * numer / denom
415  depth_acc['%.2f' % thresh] = acc
416 
417  chainer.reporter.report({
418  'loss': loss,
419  'seg_loss': seg_loss,
420  'reg_loss': reg_loss,
421  'miou': mask_iu,
422  'depth_acc<0.01': depth_acc['0.01'],
423  'depth_acc<0.02': depth_acc['0.02'],
424  'depth_acc<0.03': depth_acc['0.03'],
425  'depth_acc<0.04': depth_acc['0.04'],
426  'depth_acc<0.05': depth_acc['0.05'],
427  'depth_acc<0.07': depth_acc['0.07'],
428  'depth_acc<0.10': depth_acc['0.10'],
429  'depth_acc<0.15': depth_acc['0.15'],
430  'depth_acc<0.20': depth_acc['0.20'],
431  'depth_acc<0.25': depth_acc['0.25'],
432  'depth_acc<0.30': depth_acc['0.30'],
433  'depth_acc<0.40': depth_acc['0.40'],
434  'depth_acc<0.50': depth_acc['0.50'],
435  'depth_acc<0.70': depth_acc['0.70'],
436  'depth_acc<1.00': depth_acc['1.00'],
437  }, self)
438 
439  return loss
440 
441  def __call__(self, rgb, depth_viz):
442  mask_score, rgb_pool5 = self.predict_mask(rgb, return_pool5=True)
443  self.mask_score = mask_score
444 
445  depth_score = self.predict_depth(
446  rgb, mask_score, depth_viz, rgb_pool5)
447  self.depth_score = depth_score
448 
449  assert not chainer.config.train
450  return
451 
452  def init_from_vgg16(self, vgg16):
453  for l in self.children():
454  if l.name.startswith('conv'):
455  l1 = getattr(vgg16,
456  l.name.split('_')[0] + l.name.split('_')[2] +
457  '_' + l.name.split('_')[3])
458  l2 = getattr(self, l.name)
459  assert l1.W.shape == l2.W.shape
460  assert l1.b.shape == l2.b.shape
461  l2.W.data[...] = l1.W.data[...]
462  l2.b.data[...] = l1.b.data[...]
463  elif l.name in ['rgb_fc6', 'rgb_fc7']:
464  l1 = getattr(vgg16, l.name.split('_')[1])
465  l2 = getattr(self, l.name)
466  assert l1.W.size == l2.W.size
467  assert l1.b.size == l2.b.size
468  l2.W.data[...] = l1.W.data.reshape(l2.W.shape)[...]
469  l2.b.data[...] = l1.b.data.reshape(l2.b.shape)[...]
470  elif l.name == 'depth_fc6' and self.concat is False:
471  l1 = getattr(vgg16, 'fc6')
472  l2 = getattr(self, l.name)
473  assert l1.W.size == l2.W.size
474  assert l1.b.size == l2.b.size
475  l2.W.data[...] = l1.W.data.reshape(l2.W.shape)[...]
476  l2.b.data[...] = l1.b.data.reshape(l2.b.shape)[...]
477  elif l.name == 'concat_fc6' and self.concat is True:
478  l1 = getattr(vgg16, 'fc6')
479  l2 = getattr(self, l.name)
480  assert l1.W.size * 2 == l2.W.size
481  assert l1.b.size == l2.b.size
482  l2.W.data[:, :int(l2.W.shape[1] / 2), :, :] = \
483  l1.W.data.reshape(
484  (l2.W.shape[0], int(l2.W.shape[1] / 2),
485  l2.W.shape[2], l2.W.shape[3]))[...]
486  l2.W.data[:, int(l2.W.shape[1] / 2):, :, :] = \
487  l1.W.data.reshape(
488  (l2.W.shape[0], int(l2.W.shape[1] / 2),
489  l2.W.shape[2], l2.W.shape[3]))[...]
490  l2.b.data[...] = l1.b.data.reshape(l2.b.shape)[...]
491  elif l.name == 'concat_fc7':
492  l1 = getattr(vgg16, 'fc7')
493  l2 = getattr(self, l.name)
494  assert l1.W.size == l2.W.size
495  assert l1.b.size == l2.b.size
496  l2.W.data[...] = l1.W.data.reshape(l2.W.shape)[...]
497  l2.b.data[...] = l1.b.data.reshape(l2.b.shape)[...]
498 
499 
501 
502  # [0.4, 5.1]
503  min_depth = 0.4
504  max_depth = 5.1
505 
506  def __init__(self, n_class, masking=True):
507  self.n_class = n_class
508  self.masking = masking
509  kwargs = {
510  'initialW': chainer.initializers.Zero(),
511  'initial_bias': chainer.initializers.Zero(),
512  }
513  super(FCN8sDepthPrediction, self).__init__() # This is correct
514  with self.init_scope():
515  self.conv_rgb_1_1 = L.Convolution2D(3, 64, 3, 1, 100, **kwargs)
516  self.conv_rgb_1_2 = L.Convolution2D(64, 64, 3, 1, 1, **kwargs)
517 
518  self.conv_rgb_2_1 = L.Convolution2D(64, 128, 3, 1, 1, **kwargs)
519  self.conv_rgb_2_2 = L.Convolution2D(128, 128, 3, 1, 1, **kwargs)
520 
521  self.conv_rgb_3_1 = L.Convolution2D(128, 256, 3, 1, 1, **kwargs)
522  self.conv_rgb_3_2 = L.Convolution2D(256, 256, 3, 1, 1, **kwargs)
523  self.conv_rgb_3_3 = L.Convolution2D(256, 256, 3, 1, 1, **kwargs)
524 
525  self.conv_rgb_4_1 = L.Convolution2D(256, 512, 3, 1, 1, **kwargs)
526  self.conv_rgb_4_2 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
527  self.conv_rgb_4_3 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
528 
529  self.conv_rgb_5_1 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
530  self.conv_rgb_5_2 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
531  self.conv_rgb_5_3 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
532 
533  self.rgb_fc6 = L.Convolution2D(512, 4096, 7, 1, 0, **kwargs)
534  self.rgb_fc7 = L.Convolution2D(4096, 4096, 1, 1, 0, **kwargs)
535 
536  self.mask_score_fr = L.Convolution2D(
537  4096, n_class, 1, 1, 0, **kwargs)
538 
539  self.mask_upscore2 = L.Deconvolution2D(
540  n_class, n_class, 4, 2, 0, nobias=True,
541  initialW=fcn.initializers.UpsamplingDeconvWeight())
542  self.mask_upscore8 = L.Deconvolution2D(
543  n_class, n_class, 16, 8, 0, nobias=True,
544  initialW=fcn.initializers.UpsamplingDeconvWeight())
545 
546  self.mask_score_pool3 = L.Convolution2D(
547  256, n_class, 1, 1, 0, **kwargs)
548  self.mask_score_pool4 = L.Convolution2D(
549  512, n_class, 1, 1, 0, **kwargs)
550 
551  self.mask_upscore_pool4 = L.Deconvolution2D(
552  n_class, n_class, 4, 2, 0, nobias=True,
553  initialW=fcn.initializers.UpsamplingDeconvWeight())
554 
555  self.conv_depth_1_1 = L.Convolution2D(6, 64, 3, 1, 100, **kwargs)
556  self.conv_depth_1_2 = L.Convolution2D(64, 64, 3, 1, 1, **kwargs)
557 
558  self.conv_depth_2_1 = L.Convolution2D(64, 128, 3, 1, 1, **kwargs)
559  self.conv_depth_2_2 = L.Convolution2D(128, 128, 3, 1, 1, **kwargs)
560 
561  self.conv_depth_3_1 = L.Convolution2D(128, 256, 3, 1, 1, **kwargs)
562  self.conv_depth_3_2 = L.Convolution2D(256, 256, 3, 1, 1, **kwargs)
563  self.conv_depth_3_3 = L.Convolution2D(256, 256, 3, 1, 1, **kwargs)
564 
565  self.conv_depth_4_1 = L.Convolution2D(256, 512, 3, 1, 1, **kwargs)
566  self.conv_depth_4_2 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
567  self.conv_depth_4_3 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
568 
569  self.conv_depth_5_1 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
570  self.conv_depth_5_2 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
571  self.conv_depth_5_3 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
572 
573  self.depth_fc6 = L.Convolution2D(512, 4096, 7, 1, 0, **kwargs)
574  self.depth_fc7 = L.Convolution2D(4096, 4096, 1, 1, 0, **kwargs)
575 
576  self.depth_score_fr = L.Convolution2D(4096, 1, 1, 1, 0, **kwargs)
577 
578  self.depth_upscore2 = L.Deconvolution2D(
579  1, 1, 4, 2, 0, nobias=True,
580  initialW=fcn.initializers.UpsamplingDeconvWeight())
581  self.depth_upscore8 = L.Deconvolution2D(
582  1, 1, 16, 8, 0, nobias=True,
583  initialW=fcn.initializers.UpsamplingDeconvWeight())
584 
585  self.depth_score_pool3 = L.Convolution2D(
586  256, 1, 1, 1, 0, **kwargs)
587  self.depth_score_pool4 = L.Convolution2D(
588  512, 1, 1, 1, 0, **kwargs)
589 
590  self.depth_upscore_pool4 = L.Deconvolution2D(
591  1, 1, 4, 2, 0, nobias=True,
592  initialW=fcn.initializers.UpsamplingDeconvWeight())
593 
594  def predict_depth(self, rgb, mask_score, depth_viz):
595  # concatenate rgb and depth_viz
596  concat_input = F.concat((rgb, depth_viz), axis=1)
597 
598  # conv_depth_1
599  h = F.relu(self.conv_depth_1_1(concat_input))
600  h = F.relu(self.conv_depth_1_2(h))
601  h = F.max_pooling_2d(h, 2, stride=2, pad=0)
602  depth_pool1 = h # 1/2
603 
604  # conv_depth_2
605  h = F.relu(self.conv_depth_2_1(depth_pool1))
606  h = F.relu(self.conv_depth_2_2(h))
607  h = F.max_pooling_2d(h, 2, stride=2, pad=0)
608  depth_pool2 = h # 1/4
609 
610  # conv_depth_3
611  h = F.relu(self.conv_depth_3_1(depth_pool2))
612  h = F.relu(self.conv_depth_3_2(h))
613  h = F.relu(self.conv_depth_3_3(h))
614  h = F.max_pooling_2d(h, 2, stride=2, pad=0)
615  depth_pool3 = h # 1/8
616 
617  # conv_depth_4
618  h = F.relu(self.conv_depth_4_1(depth_pool3))
619  h = F.relu(self.conv_depth_4_2(h))
620  h = F.relu(self.conv_depth_4_3(h))
621  h = F.max_pooling_2d(h, 2, stride=2, pad=0)
622  depth_pool4 = h # 1/16
623 
624  # conv_depth_5
625  h = F.relu(self.conv_depth_5_1(depth_pool4))
626  h = F.relu(self.conv_depth_5_2(h))
627  h = F.relu(self.conv_depth_5_3(h))
628  h = F.max_pooling_2d(h, 2, stride=2, pad=0)
629  depth_pool5 = h # 1/32
630 
631  if self.masking is True:
632  # (N, C, H, W) -> (N, H, W)
633  mask_pred_tmp = F.argmax(self.score_label, axis=1)
634  # (N, H, W) -> (N, 1, H, W), float required for resizing
635  mask_pred_tmp = mask_pred_tmp[:, None, :, :].data.astype(
636  self.xp.float32) # 1/1
637  resized_mask_pred = F.resize_images(
638  mask_pred_tmp,
639  (depth_pool5.shape[2], depth_pool5.shape[3])) # 1/32
640  depth_pool5_cp = depth_pool5
641  masked_depth_pool5 = depth_pool5_cp * \
642  (resized_mask_pred.data == 0.0).astype(self.xp.float32)
643  else:
644  masked_depth_pool5 = depth_pool5
645 
646  # depth_fc6
647  h = F.relu(self.depth_fc6(masked_depth_pool5))
648  h = F.dropout(h, ratio=.5)
649  depth_fc6 = h # 1/32
650 
651  # depth_fc7
652  h = F.relu(self.depth_fc7(depth_fc6))
653  h = F.dropout(h, ratio=.5)
654  depth_fc7 = h # 1/32
655 
656  # depth_score_fr
657  h = self.depth_score_fr(depth_fc7)
658  depth_score_fr = h # 1/32
659 
660  # depth_score_pool3
661  scale_depth_pool3 = 0.0001 * depth_pool3
662  h = self.depth_score_pool3(scale_depth_pool3)
663  depth_score_pool3 = h # 1/8
664 
665  # depth_score_pool4
666  scale_depth_pool4 = 0.01 * depth_pool4
667  h = self.depth_score_pool4(scale_depth_pool4)
668  depth_score_pool4 = h # 1/16
669 
670  # depth upscore2
671  h = self.depth_upscore2(depth_score_fr)
672  depth_upscore2 = h # 1/16
673 
674  # depth_score_pool4c
675  h = depth_score_pool4[:, :,
676  5:5 + depth_upscore2.data.shape[2],
677  5:5 + depth_upscore2.data.shape[3]]
678  depth_score_pool4c = h # 1/16
679 
680  # depth_fuse_pool4
681  h = depth_upscore2 + depth_score_pool4c
682  depth_fuse_pool4 = h # 1/16
683 
684  # depth_upscore_pool4
685  h = self.depth_upscore_pool4(depth_fuse_pool4)
686  depth_upscore_pool4 = h # 1/8
687 
688  # depth_score_pool3c
689  h = depth_score_pool3[:, :,
690  9:9 + depth_upscore_pool4.data.shape[2],
691  9:9 + depth_upscore_pool4.data.shape[3]]
692  depth_score_pool3c = h # 1/8
693 
694  # depth_fuse_pool3
695  h = depth_upscore_pool4 + depth_score_pool3c
696  depth_fuse_pool3 = h # 1/8
697 
698  # depth_upscore8
699  h = self.depth_upscore8(depth_fuse_pool3)
700  depth_upscore8 = h # 1/1
701 
702  # depth_score
703  h = depth_upscore8[:, :,
704  31:31 + rgb.shape[2],
705  31:31 + rgb.shape[3]]
706  depth_score = h # 1/1
707 
708  # (-inf, inf) -> (0, 1) -> (min_depth, max_depth)
709  h = F.sigmoid(depth_score)
710  h = h * (self.max_depth - self.min_depth) + self.min_depth
711  depth_pred = h
712 
713  return depth_pred
714 
715  def __call__(self, rgb, depth_viz, mask_gt=None, depth_gt=None):
716  score_label = self.predict_mask(rgb, return_pool5=False)
717  self.score_label = score_label
718  depth_pred = self.predict_depth(rgb, score_label, depth_viz)
719  self.depth_score = depth_pred
720 
721  if mask_gt is None or depth_gt is None:
722  assert not chainer.config.train
723  return
724 
725  loss = self.compute_loss(
726  score_label, depth_pred, mask_gt, depth_gt)
727 
728  return loss
729 
730  def init_from_vgg16(self, vgg16):
731  for l in self.children():
732  if l.name == 'conv_depth_1_1':
733  l1 = getattr(vgg16, 'conv1_1')
734  l2 = getattr(self, l.name)
735  assert l1.W.size * 2 == l2.W.size
736  assert l1.b.size == l2.b.size
737  l2.W.data[:, :int(l2.W.shape[1] / 2), :, :] = \
738  l1.W.data.reshape(
739  (l2.W.shape[0], int(l2.W.shape[1] / 2),
740  l2.W.shape[2], l2.W.shape[3]))[...]
741  l2.W.data[:, int(l2.W.shape[1] / 2):, :, :] = \
742  l1.W.data.reshape(
743  (l2.W.shape[0], int(l2.W.shape[1] / 2),
744  l2.W.shape[2], l2.W.shape[3]))[...]
745  l2.b.data[...] = l1.b.data.reshape(l2.b.shape)[...]
746  elif l.name.startswith('conv'):
747  l1 = getattr(vgg16,
748  l.name.split('_')[0] + l.name.split('_')[2] +
749  '_' + l.name.split('_')[3])
750  l2 = getattr(self, l.name)
751  assert l1.W.shape == l2.W.shape
752  assert l1.b.shape == l2.b.shape
753  l2.W.data[...] = l1.W.data[...]
754  l2.b.data[...] = l1.b.data[...]
755  elif l.name in ['rgb_fc6', 'rgb_fc7']:
756  l1 = getattr(vgg16, l.name.split('_')[1])
757  l2 = getattr(self, l.name)
758  assert l1.W.size == l2.W.size
759  assert l1.b.size == l2.b.size
760  l2.W.data[...] = l1.W.data.reshape(l2.W.shape)[...]
761  l2.b.data[...] = l1.b.data.reshape(l2.b.shape)[...]
762  elif l.name in ['depth_fc6', 'depth_fc7']:
763  l1 = getattr(vgg16, l.name.split('_')[1])
764  l2 = getattr(self, l.name)
765  assert l1.W.size == l2.W.size
766  assert l1.b.size == l2.b.size
767  l2.W.data[...] = l1.W.data.reshape(l2.W.shape)[...]
768  l2.b.data[...] = l1.b.data.reshape(l2.b.shape)[...]


jsk_recognition_utils
Author(s):
autogenerated on Mon May 3 2021 03:03:03