2 from chainer
import cuda
3 import chainer.functions
as F
4 import chainer.links
as L
15 def __init__(self, n_class, masking=True, concat=True):
20 'initialW': chainer.initializers.Zero(),
21 'initial_bias': chainer.initializers.Zero(),
23 super(self.__class__, self).
__init__()
24 with self.init_scope():
43 self.
rgb_fc6 = L.Convolution2D(512, 4096, 7, 1, 0, **kwargs)
44 self.
rgb_fc7 = L.Convolution2D(4096, 4096, 1, 1, 0, **kwargs)
47 4096, n_class, 1, 1, 0, **kwargs)
50 n_class, n_class, 4, 2, 0, nobias=
True,
51 initialW=fcn.initializers.UpsamplingDeconvWeight())
53 n_class, n_class, 16, 8, 0, nobias=
True,
54 initialW=fcn.initializers.UpsamplingDeconvWeight())
57 256, n_class, 1, 1, 0, **kwargs)
59 512, n_class, 1, 1, 0, **kwargs)
62 n_class, n_class, 4, 2, 0, nobias=
True,
63 initialW=fcn.initializers.UpsamplingDeconvWeight())
85 1024, 4096, 7, 1, 0, **kwargs)
87 self.
depth_fc6 = L.Convolution2D(512, 4096, 7, 1, 0, **kwargs)
89 self.
concat_fc7 = L.Convolution2D(4096, 4096, 1, 1, 0, **kwargs)
94 1, 1, 4, 2, 0, nobias=
True,
95 initialW=fcn.initializers.UpsamplingDeconvWeight())
97 1, 1, 16, 8, 0, nobias=
True,
98 initialW=fcn.initializers.UpsamplingDeconvWeight())
101 256, 1, 1, 1, 0, **kwargs)
103 512, 1, 1, 1, 0, **kwargs)
106 1, 1, 4, 2, 0, nobias=
True,
107 initialW=fcn.initializers.UpsamplingDeconvWeight())
113 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
119 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
126 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
133 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
140 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
144 h = F.relu(self.
rgb_fc6(rgb_pool5))
145 h = F.dropout(h, ratio=.5)
149 h = F.relu(self.
rgb_fc7(rgb_fc6))
150 h = F.dropout(h, ratio=.5)
158 scale_rgb_pool3 = 0.0001 * rgb_pool3
163 scale_rgb_pool4 = 0.01 * rgb_pool4
172 h = mask_score_pool4[:, :,
173 5:5 + mask_upscore2.data.shape[2],
174 5:5 + mask_upscore2.data.shape[3]]
175 mask_score_pool4c = h
178 h = mask_upscore2 + mask_score_pool4c
183 mask_upscore_pool4 = h
186 h = mask_score_pool3[:, :,
187 9:9 + mask_upscore_pool4.data.shape[2],
188 9:9 + mask_upscore_pool4.data.shape[3]]
189 mask_score_pool3c = h
192 h = mask_upscore_pool4 + mask_score_pool3c
200 h = mask_upscore8[:, :,
201 31:31 + rgb.shape[2], 31:31 + rgb.shape[3]]
205 return mask_score, rgb_pool5
213 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
219 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
226 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
233 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
240 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
246 mask_pred_tmp = F.argmax(self.
mask_score, axis=1)
248 mask_pred_tmp = mask_pred_tmp[:,
None, :, :].data.astype(
250 resized_mask_pred = F.resize_images(
252 (depth_pool5.shape[2], depth_pool5.shape[3]))
253 depth_pool5_cp = depth_pool5
254 masked_depth_pool5 = depth_pool5_cp * \
255 (resized_mask_pred.data == 0.0).astype(self.xp.float32)
257 masked_depth_pool5 = depth_pool5
261 concat_pool5 = F.concat((rgb_pool5, masked_depth_pool5), axis=1)
265 h = F.dropout(h, ratio=.5)
269 h = F.relu(self.
depth_fc6(masked_depth_pool5))
270 h = F.dropout(h, ratio=.5)
275 h = F.dropout(h, ratio=.5)
283 scale_depth_pool3 = 0.0001 * depth_pool3
285 depth_score_pool3 = h
288 scale_depth_pool4 = 0.01 * depth_pool4
290 depth_score_pool4 = h
297 h = depth_score_pool4[:, :,
298 5:5 + depth_upscore2.data.shape[2],
299 5:5 + depth_upscore2.data.shape[3]]
300 depth_score_pool4c = h
303 h = depth_upscore2 + depth_score_pool4c
308 depth_upscore_pool4 = h
311 h = depth_score_pool3[:, :,
312 9:9 + depth_upscore_pool4.data.shape[2],
313 9:9 + depth_upscore_pool4.data.shape[3]]
314 depth_score_pool3c = h
317 h = depth_upscore_pool4 + depth_score_pool3c
325 h = depth_upscore8[:, :,
326 31:31 + rgb.shape[2],
327 31:31 + rgb.shape[3]]
334 seg_loss = F.softmax_cross_entropy(
335 mask_score, true_mask, normalize=
True)
340 assert true_mask.dtype == self.xp.int32
343 keep_regardless_mask = ~self.xp.isnan(true_depth)
344 if self.xp.sum(keep_regardless_mask) == 0:
345 depth_loss_regardless_mask = 0
347 depth_loss_regardless_mask = F.sum(F.log(F.cosh(
348 depth_pred[keep_regardless_mask[self.xp.newaxis, :, :, :]] -
349 true_depth[keep_regardless_mask])))
350 depth_loss_regardless_mask /= \
351 true_depth.shape[1] * true_depth.shape[2]
354 keep_only_mask = self.xp.logical_and(
355 true_mask > 0, ~self.xp.isnan(true_depth))
356 if self.xp.sum(keep_only_mask) == 0:
357 depth_loss_only_mask = 0
359 depth_loss_only_mask = F.sum(F.log(F.cosh(
360 depth_pred[keep_only_mask[self.xp.newaxis, :, :, :]] -
361 true_depth[keep_only_mask])))
362 depth_loss_only_mask /= true_depth.shape[1] + true_depth.shape[2]
366 reg_loss = (coef[0] * depth_loss_regardless_mask +
367 coef[1] * depth_loss_only_mask)
377 loss = coef[0] * seg_loss + coef[1] * reg_loss
378 if self.xp.isnan(float(loss.data)):
379 raise ValueError(
'Loss is nan.')
381 batch_size = len(mask_score)
382 assert batch_size == 1
386 true_mask = cuda.to_cpu(true_mask)[0]
387 mask_pred = cuda.to_cpu(F.argmax(self.score_label, axis=1).data)[0]
388 true_depth = cuda.to_cpu(true_depth)[0]
389 depth_pred = cuda.to_cpu(depth_pred.data)[0]
392 mask_iu = fcn.utils.label_accuracy_score(
393 [true_mask], [mask_pred], n_class=2)[2]
397 for thresh
in [0.01, 0.02, 0.03, 0.04, 0.05, 0.07, 0.10, 0.15, 0.20,
398 0.25, 0.30, 0.40, 0.50, 0.70, 1.00]:
399 t_lbl_fg = true_mask > 0
400 p_lbl_fg = mask_pred > 0
401 if np.sum(t_lbl_fg) == 0
and np.sum(p_lbl_fg) == 0:
403 elif np.sum(t_lbl_fg) == 0:
407 true_depth_cp = np.copy(true_depth)
408 true_depth_cp[np.isnan(true_depth_cp)] = np.inf
411 np.logical_and(t_lbl_fg, p_lbl_fg),
412 np.abs(true_depth_cp - depth_pred) < thresh))
413 denom = np.sum(np.logical_or(t_lbl_fg, p_lbl_fg))
414 acc = 1. * numer / denom
415 depth_acc[
'%.2f' % thresh] = acc
417 chainer.reporter.report({
419 'seg_loss': seg_loss,
420 'reg_loss': reg_loss,
422 'depth_acc<0.01': depth_acc[
'0.01'],
423 'depth_acc<0.02': depth_acc[
'0.02'],
424 'depth_acc<0.03': depth_acc[
'0.03'],
425 'depth_acc<0.04': depth_acc[
'0.04'],
426 'depth_acc<0.05': depth_acc[
'0.05'],
427 'depth_acc<0.07': depth_acc[
'0.07'],
428 'depth_acc<0.10': depth_acc[
'0.10'],
429 'depth_acc<0.15': depth_acc[
'0.15'],
430 'depth_acc<0.20': depth_acc[
'0.20'],
431 'depth_acc<0.25': depth_acc[
'0.25'],
432 'depth_acc<0.30': depth_acc[
'0.30'],
433 'depth_acc<0.40': depth_acc[
'0.40'],
434 'depth_acc<0.50': depth_acc[
'0.50'],
435 'depth_acc<0.70': depth_acc[
'0.70'],
436 'depth_acc<1.00': depth_acc[
'1.00'],
442 mask_score, rgb_pool5 = self.
predict_mask(rgb, return_pool5=
True)
446 rgb, mask_score, depth_viz, rgb_pool5)
449 assert not chainer.config.train
453 for l
in self.children():
454 if l.name.startswith(
'conv'):
456 l.name.split(
'_')[0] + l.name.split(
'_')[2] +
457 '_' + l.name.split(
'_')[3])
458 l2 = getattr(self, l.name)
459 assert l1.W.shape == l2.W.shape
460 assert l1.b.shape == l2.b.shape
461 l2.W.data[...] = l1.W.data[...]
462 l2.b.data[...] = l1.b.data[...]
463 elif l.name
in [
'rgb_fc6',
'rgb_fc7']:
464 l1 = getattr(vgg16, l.name.split(
'_')[1])
465 l2 = getattr(self, l.name)
466 assert l1.W.size == l2.W.size
467 assert l1.b.size == l2.b.size
468 l2.W.data[...] = l1.W.data.reshape(l2.W.shape)[...]
469 l2.b.data[...] = l1.b.data.reshape(l2.b.shape)[...]
470 elif l.name ==
'depth_fc6' and self.
concat is False:
471 l1 = getattr(vgg16,
'fc6')
472 l2 = getattr(self, l.name)
473 assert l1.W.size == l2.W.size
474 assert l1.b.size == l2.b.size
475 l2.W.data[...] = l1.W.data.reshape(l2.W.shape)[...]
476 l2.b.data[...] = l1.b.data.reshape(l2.b.shape)[...]
477 elif l.name ==
'concat_fc6' and self.
concat is True:
478 l1 = getattr(vgg16,
'fc6')
479 l2 = getattr(self, l.name)
480 assert l1.W.size * 2 == l2.W.size
481 assert l1.b.size == l2.b.size
482 l2.W.data[:, :int(l2.W.shape[1] / 2), :, :] = \
484 (l2.W.shape[0], int(l2.W.shape[1] / 2),
485 l2.W.shape[2], l2.W.shape[3]))[...]
486 l2.W.data[:, int(l2.W.shape[1] / 2):, :, :] = \
488 (l2.W.shape[0], int(l2.W.shape[1] / 2),
489 l2.W.shape[2], l2.W.shape[3]))[...]
490 l2.b.data[...] = l1.b.data.reshape(l2.b.shape)[...]
491 elif l.name ==
'concat_fc7':
492 l1 = getattr(vgg16,
'fc7')
493 l2 = getattr(self, l.name)
494 assert l1.W.size == l2.W.size
495 assert l1.b.size == l2.b.size
496 l2.W.data[...] = l1.W.data.reshape(l2.W.shape)[...]
497 l2.b.data[...] = l1.b.data.reshape(l2.b.shape)[...]
510 'initialW': chainer.initializers.Zero(),
511 'initial_bias': chainer.initializers.Zero(),
513 super(FCN8sDepthPrediction, self).
__init__()
514 with self.init_scope():
533 self.
rgb_fc6 = L.Convolution2D(512, 4096, 7, 1, 0, **kwargs)
534 self.
rgb_fc7 = L.Convolution2D(4096, 4096, 1, 1, 0, **kwargs)
537 4096, n_class, 1, 1, 0, **kwargs)
540 n_class, n_class, 4, 2, 0, nobias=
True,
541 initialW=fcn.initializers.UpsamplingDeconvWeight())
543 n_class, n_class, 16, 8, 0, nobias=
True,
544 initialW=fcn.initializers.UpsamplingDeconvWeight())
547 256, n_class, 1, 1, 0, **kwargs)
549 512, n_class, 1, 1, 0, **kwargs)
552 n_class, n_class, 4, 2, 0, nobias=
True,
553 initialW=fcn.initializers.UpsamplingDeconvWeight())
573 self.
depth_fc6 = L.Convolution2D(512, 4096, 7, 1, 0, **kwargs)
574 self.
depth_fc7 = L.Convolution2D(4096, 4096, 1, 1, 0, **kwargs)
579 1, 1, 4, 2, 0, nobias=
True,
580 initialW=fcn.initializers.UpsamplingDeconvWeight())
582 1, 1, 16, 8, 0, nobias=
True,
583 initialW=fcn.initializers.UpsamplingDeconvWeight())
586 256, 1, 1, 1, 0, **kwargs)
588 512, 1, 1, 1, 0, **kwargs)
591 1, 1, 4, 2, 0, nobias=
True,
592 initialW=fcn.initializers.UpsamplingDeconvWeight())
596 concat_input = F.concat((rgb, depth_viz), axis=1)
601 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
607 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
614 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
621 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
628 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
635 mask_pred_tmp = mask_pred_tmp[:,
None, :, :].data.astype(
637 resized_mask_pred = F.resize_images(
639 (depth_pool5.shape[2], depth_pool5.shape[3]))
640 depth_pool5_cp = depth_pool5
641 masked_depth_pool5 = depth_pool5_cp * \
642 (resized_mask_pred.data == 0.0).astype(self.xp.float32)
644 masked_depth_pool5 = depth_pool5
647 h = F.relu(self.
depth_fc6(masked_depth_pool5))
648 h = F.dropout(h, ratio=.5)
653 h = F.dropout(h, ratio=.5)
661 scale_depth_pool3 = 0.0001 * depth_pool3
663 depth_score_pool3 = h
666 scale_depth_pool4 = 0.01 * depth_pool4
668 depth_score_pool4 = h
675 h = depth_score_pool4[:, :,
676 5:5 + depth_upscore2.data.shape[2],
677 5:5 + depth_upscore2.data.shape[3]]
678 depth_score_pool4c = h
681 h = depth_upscore2 + depth_score_pool4c
686 depth_upscore_pool4 = h
689 h = depth_score_pool3[:, :,
690 9:9 + depth_upscore_pool4.data.shape[2],
691 9:9 + depth_upscore_pool4.data.shape[3]]
692 depth_score_pool3c = h
695 h = depth_upscore_pool4 + depth_score_pool3c
703 h = depth_upscore8[:, :,
704 31:31 + rgb.shape[2],
705 31:31 + rgb.shape[3]]
709 h = F.sigmoid(depth_score)
715 def __call__(self, rgb, depth_viz, mask_gt=None, depth_gt=None):
716 score_label = self.
predict_mask(rgb, return_pool5=
False)
721 if mask_gt
is None or depth_gt
is None:
722 assert not chainer.config.train
726 score_label, depth_pred, mask_gt, depth_gt)
731 for l
in self.children():
732 if l.name ==
'conv_depth_1_1':
733 l1 = getattr(vgg16,
'conv1_1')
734 l2 = getattr(self, l.name)
735 assert l1.W.size * 2 == l2.W.size
736 assert l1.b.size == l2.b.size
737 l2.W.data[:, :int(l2.W.shape[1] / 2), :, :] = \
739 (l2.W.shape[0], int(l2.W.shape[1] / 2),
740 l2.W.shape[2], l2.W.shape[3]))[...]
741 l2.W.data[:, int(l2.W.shape[1] / 2):, :, :] = \
743 (l2.W.shape[0], int(l2.W.shape[1] / 2),
744 l2.W.shape[2], l2.W.shape[3]))[...]
745 l2.b.data[...] = l1.b.data.reshape(l2.b.shape)[...]
746 elif l.name.startswith(
'conv'):
748 l.name.split(
'_')[0] + l.name.split(
'_')[2] +
749 '_' + l.name.split(
'_')[3])
750 l2 = getattr(self, l.name)
751 assert l1.W.shape == l2.W.shape
752 assert l1.b.shape == l2.b.shape
753 l2.W.data[...] = l1.W.data[...]
754 l2.b.data[...] = l1.b.data[...]
755 elif l.name
in [
'rgb_fc6',
'rgb_fc7']:
756 l1 = getattr(vgg16, l.name.split(
'_')[1])
757 l2 = getattr(self, l.name)
758 assert l1.W.size == l2.W.size
759 assert l1.b.size == l2.b.size
760 l2.W.data[...] = l1.W.data.reshape(l2.W.shape)[...]
761 l2.b.data[...] = l1.b.data.reshape(l2.b.shape)[...]
762 elif l.name
in [
'depth_fc6',
'depth_fc7']:
763 l1 = getattr(vgg16, l.name.split(
'_')[1])
764 l2 = getattr(self, l.name)
765 assert l1.W.size == l2.W.size
766 assert l1.b.size == l2.b.size
767 l2.W.data[...] = l1.W.data.reshape(l2.W.shape)[...]
768 l2.b.data[...] = l1.b.data.reshape(l2.b.shape)[...]
def compute_loss(self, mask_score, depth_pred, true_mask, true_depth)
def compute_loss_depth(self, depth_pred, true_mask, true_depth)
def __call__(self, rgb, depth_viz, mask_gt=None, depth_gt=None)
def init_from_vgg16(self, vgg16)
def compute_loss_mask(self, mask_score, true_mask)
def __init__(self, n_class, masking=True, concat=True)
def predict_mask(self, rgb, return_pool5=False)
def __call__(self, rgb, depth_viz)
def predict_depth(self, rgb, mask_score, depth_viz)
def __init__(self, n_class, masking=True)
def init_from_vgg16(self, vgg16)
def predict_depth(self, rgb, mask_score, depth_viz, rgb_pool5)