jsk_perception: fast_rcnn.py Source File

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 
00003 import os.path as osp
00004 import sys
00005 
00006 import chainer
00007 from chainer import cuda
00008 import chainer.serializers as S
00009 from chainer import Variable
00010 import cv2
00011 from distutils.version import LooseVersion
00012 import numpy as np
00013 
00014 import cv_bridge
00015 from dynamic_reconfigure.server import Server
00016 from jsk_perception.cfg import FastRCNNConfig as Config
00017 from jsk_recognition_msgs.msg import Rect, RectArray
00018 from jsk_recognition_msgs.msg import ClassificationResult
00019 import jsk_recognition_utils
00020 from jsk_recognition_utils.chainermodels import VGG16FastRCNN
00021 from jsk_recognition_utils.chainermodels import VGG_CNN_M_1024
00022 from jsk_recognition_utils.nms import nms
00023 from jsk_topic_tools import ConnectionBasedTransport
00024 import message_filters
00025 import rospkg
00026 import rospy
00027 from sensor_msgs.msg import Image
00028 
00029 
00030 def img_preprocessing(orig_img, pixel_means, max_size=1000, scale=600):
00031     img = orig_img.astype(np.float32, copy=True)
00032     img -= pixel_means
00033     im_size_min = np.min(img.shape[0:2])
00034     im_size_max = np.max(img.shape[0:2])
00035     im_scale = float(scale) / float(im_size_min)
00036     if np.rint(im_scale * im_size_max) > max_size:
00037         im_scale = float(max_size) / float(im_size_max)
00038     img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale,
00039                      interpolation=cv2.INTER_LINEAR)
00040     return img.transpose([2, 0, 1]).astype(np.float32), im_scale
00041 
00042 
00043 class FastRCNN(ConnectionBasedTransport):
00044 
00045     def __init__(self, model, target_names, pixel_means, use_gpu):
00046         super(FastRCNN, self).__init__()
00047 
00048         self._srv = Server(Config, self.configCallback)
00049 
00050         self.model = model
00051         self._pub_rects = self.advertise('~output/rect_array',
00052                                          RectArray, queue_size=1)
00053         self._pub_class = self.advertise('~output/class',
00054                                          ClassificationResult, queue_size=1)
00055         self.target_names = target_names
00056         self.pixel_means = np.array(pixel_means, dtype=np.float32)
00057         self.use_gpu = use_gpu
00058         self.classifier_name = rospy.get_param("~classifier_name", rospy.get_name())
00059 
00060     def configCallback(self, config, level):
00061         self.nms_thresh = config.nms_thresh
00062         self.conf_thresh = config.conf_thresh
00063         return config
00064 
00065     def subscribe(self):
00066         self._sub = message_filters.Subscriber('~input', Image)
00067         self._sub_rects = message_filters.Subscriber('~input/rect_array',
00068                                                      RectArray)
00069         use_async = rospy.get_param('~approximate_sync', False)
00070         queue_size = rospy.get_param('~queue_size', 100)
00071         subs = [self._sub, self._sub_rects]
00072         if use_async:
00073             slop = rospy.get_param('~slop', 0.1)
00074             sync = message_filters.ApproximateTimeSynchronizer(
00075                 subs, queue_size, slop)
00076         else:
00077             sync = message_filters.TimeSynchronizer(subs, queue_size)
00078         sync.registerCallback(self._detect)
00079 
00080     def unsubscribe(self):
00081         self._sub.unregister()
00082         self._sub_rects.unregister()
00083 
00084     def _detect(self, imgmsg, rects_msg):
00085         bridge = cv_bridge.CvBridge()
00086         im_orig = bridge.imgmsg_to_cv2(imgmsg, desired_encoding='bgr8')
00087         im, im_scale = img_preprocessing(im_orig, self.pixel_means)
00088         rects_orig = jsk_recognition_utils.rects_msg_to_ndarray(rects_msg)
00089         if len(rects_orig) == 0:
00090             return
00091         rects = rects_orig * im_scale
00092         scores, bbox_pred = self._im_detect(im, rects)
00093 
00094         rects = RectArray(header=imgmsg.header)
00095         labels = []
00096         label_proba = []
00097         for cls_id in range(1, len(self.target_names)):
00098             _cls = scores[:, cls_id][:, np.newaxis]
00099             _bbx = bbox_pred[:, cls_id * 4: (cls_id + 1) * 4]
00100             dets = np.hstack((_bbx, _cls))
00101             keep = nms(dets, self.nms_thresh)
00102             dets = dets[keep, :]
00103             orig_rects = cuda.cupy.asnumpy(rects_orig)[keep, :]
00104 
00105             inds = np.where(dets[:, -1] >= self.conf_thresh)[0]
00106 
00107             for i in inds:
00108                 _bbox = dets[i, :4]
00109                 x1, y1, x2, y2 = orig_rects[i]
00110                 width = x2 - x1
00111                 height = y2 - y1
00112                 center_x = x1 + 0.5 * width
00113                 center_y = y1 + 0.5 * height
00114 
00115                 dx, dy, dw, dh = _bbox
00116                 _center_x = dx * width + center_x
00117                 _center_y = dy * height + center_y
00118                 _width = np.exp(dw) * width
00119                 _height = np.exp(dh) * height
00120 
00121                 x1 = _center_x - 0.5 * _width
00122                 y1 = _center_y - 0.5 * _height
00123                 x2 = _center_x + 0.5 * _width
00124                 y2 = _center_y + 0.5 * _height
00125                 rect = Rect(x=x1, y=y1, width=x2-x1, height=y2-y1)
00126                 rects.rects.append(rect)
00127                 labels.append(cls_id)
00128                 label_proba.append(dets[:, -1][i])
00129 
00130         # publish classification result
00131         clss = ClassificationResult(
00132             header=imgmsg.header,
00133             classifier=self.classifier_name,
00134             target_names=self.target_names,
00135             labels=labels,
00136             label_names=[self.target_names[l] for l in labels],
00137             label_proba=label_proba,
00138         )
00139         self._pub_rects.publish(rects)
00140         self._pub_class.publish(clss)
00141 
00142     def _im_detect(self, im, rects):
00143         xp = cuda.cupy if self.use_gpu else np
00144         im = xp.asarray(im)
00145         rects = xp.asarray(rects)
00146         x_data = im[xp.newaxis, :, :, :]
00147         # batch_indices is always 0 when batch size is 1
00148         batch_indices = xp.zeros((len(rects), 1), dtype=np.float32)
00149         rects = xp.hstack((batch_indices, rects))
00150         if LooseVersion(chainer.__version__).version[0] < 2:
00151             x = Variable(x_data, volatile=True)
00152             rects_val = Variable(rects, volatile=True)
00153             self.model.train = False
00154             cls_score, bbox_pred = self.model(x, rects_val)
00155         else:
00156             with chainer.using_config('train', False), \
00157                  chainer.no_backprop_mode():
00158                 x = Variable(x_data)
00159                 rects_val = Variable(rects)
00160                 cls_score, bbox_pred = self.model(x, rects_val)
00161 
00162         scores = cuda.to_cpu(cls_score.data)
00163         bbox_pred = cuda.to_cpu(bbox_pred.data)
00164         return scores, bbox_pred
00165 
00166 
00167 def main():
00168     rospy.init_node('fast_rcnn_caffenet')
00169 
00170     # get parameters
00171     try:
00172         model_name = rospy.get_param('~model')
00173     except KeyError as e:
00174         rospy.logerr('Unspecified rosparam: {0}'.format(e))
00175         sys.exit(1)
00176 
00177     gpu = rospy.get_param('~gpu', -1)
00178     use_gpu = True if gpu >= 0 else False
00179 
00180     # setup model
00181     PKG = 'jsk_perception'
00182     rp = rospkg.RosPack()
00183     data_path = osp.join(rp.get_path(PKG), 'trained_data')
00184     if model_name == 'vgg_cnn_m_1024':
00185         model = VGG_CNN_M_1024()
00186         chainermodel = osp.join(data_path, 'vgg_cnn_m_1024.chainermodel')
00187     elif model_name == 'vgg16':
00188         model = VGG16FastRCNN()
00189         chainermodel = osp.join(data_path, 'vgg16_fast_rcnn.chainermodel')
00190     else:
00191         rospy.logerr('Unsupported model: {0}'.format(model_name))
00192         sys.exit(1)
00193     rospy.loginfo('Loading chainermodel')
00194     S.load_hdf5(chainermodel, model)
00195     if use_gpu:
00196         model.to_gpu(gpu)
00197     rospy.loginfo('Finished loading chainermodel')
00198 
00199     # assumptions
00200     target_names = [
00201         '__background__',
00202         'aeroplane',
00203         'bicycle',
00204         'bird',
00205         'boat',
00206         'bottle',
00207         'bus',
00208         'car',
00209         'cat',
00210         'chair',
00211         'cow',
00212         'diningtable',
00213         'dog',
00214         'horse',
00215         'motorbike',
00216         'person',
00217         'pottedplant',
00218         'sheep',
00219         'sofa',
00220         'train',
00221         'tvmonitor',
00222     ]
00223     pixel_means = [102.9801, 115.9465, 122.7717]
00224 
00225     fast_rcnn = FastRCNN(
00226         model=model, target_names=target_names,
00227         pixel_means=pixel_means, use_gpu=use_gpu)
00228     rospy.spin()
00229 
00230 
00231 if __name__ == '__main__':
00232     main()