00001
00002
00003 from distutils.version import LooseVersion
00004
00005 import chainer
00006 from chainer import cuda
00007 import chainer.functions as F
00008 import chainer.links as L
00009 import chainer.serializers as S
00010
00011 import fcn
00012 import matplotlib.cm
00013 import numpy as np
00014
00015 import cv_bridge
00016 from jsk_topic_tools import ConnectionBasedTransport
00017 import message_filters
00018 import rospy
00019 from sensor_msgs.msg import Image
00020
00021
00022 def colorize_depth(depth, min_value=None, max_value=None):
00023 min_value = np.nanmin(depth) if min_value is None else min_value
00024 max_value = np.nanmax(depth) if max_value is None else max_value
00025 if np.isinf(min_value) or np.isinf(max_value):
00026 rospy.logwarn('Min or max value for depth colorization is inf.')
00027
00028 colorized = depth.copy()
00029 nan_mask = np.isnan(colorized)
00030 colorized[nan_mask] = 0
00031 colorized = 1. * (colorized - min_value) / (max_value - min_value)
00032 colorized = matplotlib.cm.jet(colorized)[:, :, :3]
00033 colorized = (colorized * 255).astype(np.uint8)
00034 colorized[nan_mask] = (0, 0, 0)
00035 return colorized
00036
00037
00038 class FCNDepthPrediction(ConnectionBasedTransport):
00039
00040 def __init__(self):
00041 super(self.__class__, self).__init__()
00042 self.backend = rospy.get_param('~backend', 'chainer')
00043 self.model_name = rospy.get_param('~model_name')
00044 self.model_file = rospy.get_param('~model_file')
00045 self.gpu = rospy.get_param('~gpu', -1)
00046 self.target_names = rospy.get_param('~target_names')
00047 self.bg_label = rospy.get_param('~bg_label', 0)
00048 self.proba_threshold = rospy.get_param('~proba_threshold', 0.0)
00049 self.mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
00050 self._load_model()
00051 self.pub_depth = self.advertise('~output', Image, queue_size=1)
00052 self.pub_depth_raw = self.advertise(
00053 '~output/depth_pred_raw', Image, queue_size=1)
00054 self.pub_label = self.advertise('~output/label', Image, queue_size=1)
00055 self.pub_proba = self.advertise(
00056 '~output/proba_image', Image, queue_size=1)
00057
00058 def _load_model(self):
00059 if self.backend == 'chainer':
00060 self._load_chainer_model()
00061 else:
00062 raise RuntimeError('Unsupported backend: %s', self.backend)
00063
00064 def _load_chainer_model(self):
00065 n_class = len(self.target_names)
00066 if self.model_name == 'fcn8s_depth_prediction':
00067 self.model = FCN8sDepthPrediction(n_class=n_class)
00068 else:
00069 raise ValueError(
00070 'Unsupported ~model_name: {}'.format(self.model_name))
00071 rospy.loginfo('Loading trained model: {0}'.format(self.model_file))
00072 if self.model_file.endswith('.npz'):
00073 S.load_npz(self.model_file, self.model)
00074 rospy.loginfo(
00075 'Finished loading trained model: {0}'.format(self.model_file))
00076 if self.gpu != -1:
00077 self.model.to_gpu(self.gpu)
00078 if LooseVersion(chainer.__version__) < LooseVersion('2.0.0'):
00079 self.model.train = False
00080
00081 def subscribe(self):
00082 queue_size = rospy.get_param('~queue_size', 10)
00083 sub_img = message_filters.Subscriber(
00084 '~input', Image, queue_size=1, buff_size=2**24)
00085 sub_depth = message_filters.Subscriber(
00086 '~input/depth', Image, queue_size=1, buff_size=2**24)
00087 self.subs = [sub_img, sub_depth]
00088 if rospy.get_param('~approximate_sync', False):
00089 slop = rospy.get_param('~slop', 0.1)
00090 sync = message_filters.ApproximateTimeSynchronizer(
00091 fs=self.subs, queue_size=queue_size, slop=slop)
00092 else:
00093 sync = message_filters.TimeSynchronizer(
00094 fs=self.subs, queue_size=queue_size)
00095 sync.registerCallback(self._cb)
00096
00097 def unsubscribe(self):
00098 for sub in self.subs:
00099 sub.unregister()
00100
00101 def transform_depth(self, depth):
00102 if depth.dtype == np.uint16:
00103 depth = depth.astype(np.float32) * 0.001
00104 min_value = self.model.min_depth
00105 max_value = self.model.max_depth
00106 depth_viz_rgb = colorize_depth(
00107 depth,
00108 min_value=min_value, max_value=max_value
00109 )
00110 depth_viz_bgr = depth_viz_rgb[:, :, ::-1].astype(np.float32)
00111 depth_viz_bgr = (depth_viz_bgr - self.mean_bgr).transpose((2, 0, 1))
00112 return depth_viz_bgr
00113
00114 def _cb(self, img_msg, depth_msg):
00115 br = cv_bridge.CvBridge()
00116 bgr_img = br.imgmsg_to_cv2(img_msg, desired_encoding='bgr8')
00117 depth_img = br.imgmsg_to_cv2(depth_msg, desired_encoding='passthrough')
00118 if depth_img.ndim > 2:
00119 depth_img = np.squeeze(depth_img, axis=2)
00120 bgr_img = (bgr_img - self.mean_bgr).transpose((2, 0, 1))
00121 depth_viz_bgr = self.transform_depth(depth_img)
00122
00123 label_pred, proba_img, depth_pred = \
00124 self.predict_depth(bgr_img, depth_viz_bgr)
00125 depth_pred_raw = depth_pred.copy()
00126 depth_pred[label_pred == 0] = depth_img[label_pred == 0]
00127
00128 label_msg = br.cv2_to_imgmsg(label_pred.astype(np.int32), '32SC1')
00129 label_msg.header = img_msg.header
00130 self.pub_label.publish(label_msg)
00131 proba_msg = br.cv2_to_imgmsg(proba_img.astype(np.float32))
00132 proba_msg.header = img_msg.header
00133 self.pub_proba.publish(proba_msg)
00134 depth_msg = br.cv2_to_imgmsg(depth_pred.astype(np.float32))
00135 depth_msg.header = img_msg.header
00136 self.pub_depth.publish(depth_msg)
00137 depth_raw_msg = br.cv2_to_imgmsg(depth_pred_raw.astype(np.float32))
00138 depth_raw_msg.header = img_msg.header
00139 self.pub_depth_raw.publish(depth_raw_msg)
00140
00141 def predict_depth(self, bgr, depth_bgr=None):
00142 if self.backend == 'chainer':
00143 return self._predict_depth_chainer_backend(bgr, depth_bgr)
00144 raise ValueError('Unsupported backend: {0}'.format(self.backend))
00145
00146 def _predict_depth_chainer_backend(self, bgr, depth_bgr=None):
00147 bgr_data = np.array([bgr], dtype=np.float32)
00148 depth_bgr_data = np.array([depth_bgr], dtype=np.float32)
00149 if self.gpu != -1:
00150 bgr_data = cuda.to_gpu(bgr_data, device=self.gpu)
00151 depth_bgr_data = cuda.to_gpu(depth_bgr_data, device=self.gpu)
00152 if LooseVersion(chainer.__version__) < LooseVersion('2.0.0'):
00153 bgr = chainer.Variable(bgr_data, volatile=True)
00154 depth_bgr = chainer.Variable(depth_bgr_data, volatile=True)
00155 self.model(bgr, depth_bgr)
00156 else:
00157 with chainer.using_config('train', False):
00158 with chainer.no_backprop_mode():
00159 bgr = chainer.Variable(bgr_data)
00160 depth_bgr = chainer.Variable(depth_bgr_data)
00161 self.model(bgr, depth_bgr)
00162
00163 proba_img = F.softmax(self.model.mask_score)
00164 label_pred = F.argmax(self.model.mask_score, axis=1)
00165 depth_pred = F.sigmoid(self.model.depth_score)
00166 proba_img = F.transpose(proba_img, (0, 2, 3, 1))
00167 max_proba_img = F.max(proba_img, axis=-1)
00168
00169 proba_img = cuda.to_cpu(proba_img.data)[0]
00170 max_proba_img = cuda.to_cpu(max_proba_img.data)[0]
00171 label_pred = cuda.to_cpu(label_pred.data)[0]
00172 depth_pred = cuda.to_cpu(depth_pred.data)[0]
00173
00174 label_pred[max_proba_img < self.proba_threshold] = self.bg_label
00175
00176 depth_pred = depth_pred[0, :, :]
00177 depth_pred *= (self.model.max_depth - self.model.min_depth)
00178 depth_pred += self.model.min_depth
00179
00180 return label_pred, proba_img, depth_pred
00181
00182
00183 class FCN8sDepthPrediction(chainer.Chain):
00184
00185
00186 min_depth = 0.2
00187 max_depth = 3.
00188
00189 def __init__(self, n_class, masking=True, concat=True):
00190 self.n_class = n_class
00191 self.masking = masking
00192 self.concat = concat
00193 kwargs = {
00194 'initialW': chainer.initializers.Zero(),
00195 'initial_bias': chainer.initializers.Zero(),
00196 }
00197 super(self.__class__, self).__init__()
00198 with self.init_scope():
00199 self.conv_rgb_1_1 = L.Convolution2D(3, 64, 3, 1, 100, **kwargs)
00200 self.conv_rgb_1_2 = L.Convolution2D(64, 64, 3, 1, 1, **kwargs)
00201
00202 self.conv_rgb_2_1 = L.Convolution2D(64, 128, 3, 1, 1, **kwargs)
00203 self.conv_rgb_2_2 = L.Convolution2D(128, 128, 3, 1, 1, **kwargs)
00204
00205 self.conv_rgb_3_1 = L.Convolution2D(128, 256, 3, 1, 1, **kwargs)
00206 self.conv_rgb_3_2 = L.Convolution2D(256, 256, 3, 1, 1, **kwargs)
00207 self.conv_rgb_3_3 = L.Convolution2D(256, 256, 3, 1, 1, **kwargs)
00208
00209 self.conv_rgb_4_1 = L.Convolution2D(256, 512, 3, 1, 1, **kwargs)
00210 self.conv_rgb_4_2 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
00211 self.conv_rgb_4_3 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
00212
00213 self.conv_rgb_5_1 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
00214 self.conv_rgb_5_2 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
00215 self.conv_rgb_5_3 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
00216
00217 self.rgb_fc6 = L.Convolution2D(512, 4096, 7, 1, 0, **kwargs)
00218 self.rgb_fc7 = L.Convolution2D(4096, 4096, 1, 1, 0, **kwargs)
00219
00220 self.mask_score_fr = L.Convolution2D(
00221 4096, n_class, 1, 1, 0, **kwargs)
00222
00223 self.mask_upscore2 = L.Deconvolution2D(
00224 n_class, n_class, 4, 2, 0, nobias=True,
00225 initialW=fcn.initializers.UpsamplingDeconvWeight())
00226 self.mask_upscore8 = L.Deconvolution2D(
00227 n_class, n_class, 16, 8, 0, nobias=True,
00228 initialW=fcn.initializers.UpsamplingDeconvWeight())
00229
00230 self.mask_score_pool3 = L.Convolution2D(
00231 256, n_class, 1, 1, 0, **kwargs)
00232 self.mask_score_pool4 = L.Convolution2D(
00233 512, n_class, 1, 1, 0, **kwargs)
00234
00235 self.mask_upscore_pool4 = L.Deconvolution2D(
00236 n_class, n_class, 4, 2, 0, nobias=True,
00237 initialW=fcn.initializers.UpsamplingDeconvWeight())
00238
00239 self.conv_depth_1_1 = L.Convolution2D(3, 64, 3, 1, 100, **kwargs)
00240 self.conv_depth_1_2 = L.Convolution2D(64, 64, 3, 1, 1, **kwargs)
00241
00242 self.conv_depth_2_1 = L.Convolution2D(64, 128, 3, 1, 1, **kwargs)
00243 self.conv_depth_2_2 = L.Convolution2D(128, 128, 3, 1, 1, **kwargs)
00244
00245 self.conv_depth_3_1 = L.Convolution2D(128, 256, 3, 1, 1, **kwargs)
00246 self.conv_depth_3_2 = L.Convolution2D(256, 256, 3, 1, 1, **kwargs)
00247 self.conv_depth_3_3 = L.Convolution2D(256, 256, 3, 1, 1, **kwargs)
00248
00249 self.conv_depth_4_1 = L.Convolution2D(256, 512, 3, 1, 1, **kwargs)
00250 self.conv_depth_4_2 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
00251 self.conv_depth_4_3 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
00252
00253 self.conv_depth_5_1 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
00254 self.conv_depth_5_2 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
00255 self.conv_depth_5_3 = L.Convolution2D(512, 512, 3, 1, 1, **kwargs)
00256
00257 if self.concat is True:
00258 self.concat_fc6 = L.Convolution2D(
00259 1024, 4096, 7, 1, 0, **kwargs)
00260 else:
00261 self.depth_fc6 = L.Convolution2D(512, 4096, 7, 1, 0, **kwargs)
00262
00263 self.concat_fc7 = L.Convolution2D(4096, 4096, 1, 1, 0, **kwargs)
00264
00265 self.depth_score_fr = L.Convolution2D(4096, 1, 1, 1, 0, **kwargs)
00266
00267 self.depth_upscore2 = L.Deconvolution2D(
00268 1, 1, 4, 2, 0, nobias=True,
00269 initialW=fcn.initializers.UpsamplingDeconvWeight())
00270 self.depth_upscore8 = L.Deconvolution2D(
00271 1, 1, 16, 8, 0, nobias=True,
00272 initialW=fcn.initializers.UpsamplingDeconvWeight())
00273
00274 self.depth_score_pool3 = L.Convolution2D(
00275 256, 1, 1, 1, 0, **kwargs)
00276 self.depth_score_pool4 = L.Convolution2D(
00277 512, 1, 1, 1, 0, **kwargs)
00278
00279 self.depth_upscore_pool4 = L.Deconvolution2D(
00280 1, 1, 4, 2, 0, nobias=True,
00281 initialW=fcn.initializers.UpsamplingDeconvWeight())
00282
00283 def predict_mask(self, rgb, return_pool5=False):
00284
00285 h = F.relu(self.conv_rgb_1_1(rgb))
00286 h = F.relu(self.conv_rgb_1_2(h))
00287 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
00288 rgb_pool1 = h
00289
00290
00291 h = F.relu(self.conv_rgb_2_1(rgb_pool1))
00292 h = F.relu(self.conv_rgb_2_2(h))
00293 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
00294 rgb_pool2 = h
00295
00296
00297 h = F.relu(self.conv_rgb_3_1(rgb_pool2))
00298 h = F.relu(self.conv_rgb_3_2(h))
00299 h = F.relu(self.conv_rgb_3_3(h))
00300 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
00301 rgb_pool3 = h
00302
00303
00304 h = F.relu(self.conv_rgb_4_1(rgb_pool3))
00305 h = F.relu(self.conv_rgb_4_2(h))
00306 h = F.relu(self.conv_rgb_4_3(h))
00307 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
00308 rgb_pool4 = h
00309
00310
00311 h = F.relu(self.conv_rgb_5_1(rgb_pool4))
00312 h = F.relu(self.conv_rgb_5_2(h))
00313 h = F.relu(self.conv_rgb_5_3(h))
00314 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
00315 rgb_pool5 = h
00316
00317
00318 h = F.relu(self.rgb_fc6(rgb_pool5))
00319 h = F.dropout(h, ratio=.5)
00320 rgb_fc6 = h
00321
00322
00323 h = F.relu(self.rgb_fc7(rgb_fc6))
00324 h = F.dropout(h, ratio=.5)
00325 rgb_fc7 = h
00326
00327
00328 h = self.mask_score_fr(rgb_fc7)
00329 mask_score_fr = h
00330
00331
00332 scale_rgb_pool3 = 0.0001 * rgb_pool3
00333 h = self.mask_score_pool3(scale_rgb_pool3)
00334 mask_score_pool3 = h
00335
00336
00337 scale_rgb_pool4 = 0.01 * rgb_pool4
00338 h = self.mask_score_pool4(scale_rgb_pool4)
00339 mask_score_pool4 = h
00340
00341
00342 h = self.mask_upscore2(mask_score_fr)
00343 mask_upscore2 = h
00344
00345
00346 h = mask_score_pool4[:, :,
00347 5:5 + mask_upscore2.data.shape[2],
00348 5:5 + mask_upscore2.data.shape[3]]
00349 mask_score_pool4c = h
00350
00351
00352 h = mask_upscore2 + mask_score_pool4c
00353 mask_fuse_pool4 = h
00354
00355
00356 h = self.mask_upscore_pool4(mask_fuse_pool4)
00357 mask_upscore_pool4 = h
00358
00359
00360 h = mask_score_pool3[:, :,
00361 9:9 + mask_upscore_pool4.data.shape[2],
00362 9:9 + mask_upscore_pool4.data.shape[3]]
00363 mask_score_pool3c = h
00364
00365
00366 h = mask_upscore_pool4 + mask_score_pool3c
00367 mask_fuse_pool3 = h
00368
00369
00370 h = self.mask_upscore8(mask_fuse_pool3)
00371 mask_upscore8 = h
00372
00373
00374 h = mask_upscore8[:, :,
00375 31:31 + rgb.shape[2], 31:31 + rgb.shape[3]]
00376 mask_score = h
00377
00378 if return_pool5:
00379 return mask_score, rgb_pool5
00380 else:
00381 return mask_score
00382
00383 def predict_depth(self, rgb, mask_score, depth_viz, rgb_pool5):
00384
00385 h = F.relu(self.conv_depth_1_1(depth_viz))
00386 h = F.relu(self.conv_depth_1_2(h))
00387 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
00388 depth_pool1 = h
00389
00390
00391 h = F.relu(self.conv_depth_2_1(depth_pool1))
00392 h = F.relu(self.conv_depth_2_2(h))
00393 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
00394 depth_pool2 = h
00395
00396
00397 h = F.relu(self.conv_depth_3_1(depth_pool2))
00398 h = F.relu(self.conv_depth_3_2(h))
00399 h = F.relu(self.conv_depth_3_3(h))
00400 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
00401 depth_pool3 = h
00402
00403
00404 h = F.relu(self.conv_depth_4_1(depth_pool3))
00405 h = F.relu(self.conv_depth_4_2(h))
00406 h = F.relu(self.conv_depth_4_3(h))
00407 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
00408 depth_pool4 = h
00409
00410
00411 h = F.relu(self.conv_depth_5_1(depth_pool4))
00412 h = F.relu(self.conv_depth_5_2(h))
00413 h = F.relu(self.conv_depth_5_3(h))
00414 h = F.max_pooling_2d(h, 2, stride=2, pad=0)
00415 depth_pool5 = h
00416
00417 if self.masking is True:
00418
00419
00420 mask_pred_tmp = F.argmax(self.mask_score, axis=1)
00421
00422 mask_pred_tmp = mask_pred_tmp[:, None, :, :].data.astype(
00423 self.xp.float32)
00424 resized_mask_pred = F.resize_images(
00425 mask_pred_tmp,
00426 (depth_pool5.shape[2], depth_pool5.shape[3]))
00427 depth_pool5_cp = depth_pool5
00428 masked_depth_pool5 = depth_pool5_cp * \
00429 (resized_mask_pred.data == 0.0).astype(self.xp.float32)
00430 else:
00431 masked_depth_pool5 = depth_pool5
00432
00433 if self.concat is True:
00434
00435 concat_pool5 = F.concat((rgb_pool5, masked_depth_pool5), axis=1)
00436
00437
00438 h = F.relu(self.concat_fc6(concat_pool5))
00439 h = F.dropout(h, ratio=.5)
00440 concat_fc6 = h
00441 else:
00442
00443 h = F.relu(self.depth_fc6(masked_depth_pool5))
00444 h = F.dropout(h, ratio=.5)
00445 concat_fc6 = h
00446
00447
00448 h = F.relu(self.concat_fc7(concat_fc6))
00449 h = F.dropout(h, ratio=.5)
00450 concat_fc7 = h
00451
00452
00453 h = self.depth_score_fr(concat_fc7)
00454 depth_score_fr = h
00455
00456
00457 scale_depth_pool3 = 0.0001 * depth_pool3
00458 h = self.depth_score_pool3(scale_depth_pool3)
00459 depth_score_pool3 = h
00460
00461
00462 scale_depth_pool4 = 0.01 * depth_pool4
00463 h = self.depth_score_pool4(scale_depth_pool4)
00464 depth_score_pool4 = h
00465
00466
00467 h = self.depth_upscore2(depth_score_fr)
00468 depth_upscore2 = h
00469
00470
00471 h = depth_score_pool4[:, :,
00472 5:5 + depth_upscore2.data.shape[2],
00473 5:5 + depth_upscore2.data.shape[3]]
00474 depth_score_pool4c = h
00475
00476
00477 h = depth_upscore2 + depth_score_pool4c
00478 depth_fuse_pool4 = h
00479
00480
00481 h = self.depth_upscore_pool4(depth_fuse_pool4)
00482 depth_upscore_pool4 = h
00483
00484
00485 h = depth_score_pool3[:, :,
00486 9:9 + depth_upscore_pool4.data.shape[2],
00487 9:9 + depth_upscore_pool4.data.shape[3]]
00488 depth_score_pool3c = h
00489
00490
00491 h = depth_upscore_pool4 + depth_score_pool3c
00492 depth_fuse_pool3 = h
00493
00494
00495 h = self.depth_upscore8(depth_fuse_pool3)
00496 depth_upscore8 = h
00497
00498
00499 h = depth_upscore8[:, :,
00500 31:31 + rgb.shape[2],
00501 31:31 + rgb.shape[3]]
00502 depth_score = h
00503
00504 return depth_score
00505
00506 def __call__(self, rgb, depth_viz):
00507 mask_score, rgb_pool5 = self.predict_mask(rgb, return_pool5=True)
00508 self.mask_score = mask_score
00509
00510 depth_score = self.predict_depth(
00511 rgb, mask_score, depth_viz, rgb_pool5)
00512 self.depth_score = depth_score
00513
00514 assert not chainer.config.train
00515 return
00516
00517
00518 if __name__ == '__main__':
00519 rospy.init_node('fcn_depth_prediction')
00520 FCNDepthPrediction()
00521 rospy.spin()