jsk_perception: fast_rcnn.py Source File

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 
00003 import os.path as osp
00004 import sys
00005 
00006 from chainer import cuda
00007 import chainer.serializers as S
00008 from chainer import Variable
00009 import cv2
00010 import numpy as np
00011 
00012 import cv_bridge
00013 from dynamic_reconfigure.server import Server
00014 from jsk_perception.cfg import FastRCNNConfig as Config
00015 from jsk_recognition_msgs.msg import Rect, RectArray
00016 from jsk_recognition_msgs.msg import ClassificationResult
00017 import jsk_recognition_utils
00018 from jsk_recognition_utils.chainermodels import VGG16FastRCNN
00019 from jsk_recognition_utils.chainermodels import VGG_CNN_M_1024
00020 from jsk_recognition_utils.nms import nms
00021 from jsk_topic_tools import ConnectionBasedTransport
00022 import message_filters
00023 import rospkg
00024 import rospy
00025 from sensor_msgs.msg import Image
00026 
00027 
00028 def img_preprocessing(orig_img, pixel_means, max_size=1000, scale=600):
00029     img = orig_img.astype(np.float32, copy=True)
00030     img -= pixel_means
00031     im_size_min = np.min(img.shape[0:2])
00032     im_size_max = np.max(img.shape[0:2])
00033     im_scale = float(scale) / float(im_size_min)
00034     if np.rint(im_scale * im_size_max) > max_size:
00035         im_scale = float(max_size) / float(im_size_max)
00036     img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale,
00037                      interpolation=cv2.INTER_LINEAR)
00038     return img.transpose([2, 0, 1]).astype(np.float32), im_scale
00039 
00040 
00041 class FastRCNN(ConnectionBasedTransport):
00042 
00043     def __init__(self, model, target_names, pixel_means, use_gpu):
00044         super(FastRCNN, self).__init__()
00045 
00046         self._srv = Server(Config, self.configCallback)
00047 
00048         self.model = model
00049         self._pub_rects = self.advertise('~output/rect_array',
00050                                          RectArray, queue_size=1)
00051         self._pub_class = self.advertise('~output/class',
00052                                          ClassificationResult, queue_size=1)
00053         self.target_names = target_names
00054         self.pixel_means = np.array(pixel_means, dtype=np.float32)
00055         self.use_gpu = use_gpu
00056         self.classifier_name = rospy.get_param("~classifier_name", rospy.get_name())
00057 
00058     def configCallback(self, config, level):
00059         self.nms_thresh = config.nms_thresh
00060         self.conf_thresh = config.conf_thresh
00061         return config
00062 
00063     def subscribe(self):
00064         self._sub = message_filters.Subscriber('~input', Image)
00065         self._sub_rects = message_filters.Subscriber('~input/rect_array',
00066                                                      RectArray)
00067         use_async = rospy.get_param('~approximate_sync', False)
00068         queue_size = rospy.get_param('~queue_size', 100)
00069         subs = [self._sub, self._sub_rects]
00070         if use_async:
00071             slop = rospy.get_param('~slop', 0.1)
00072             sync = message_filters.ApproximateTimeSynchronizer(
00073                 subs, queue_size, slop)
00074         else:
00075             sync = message_filters.TimeSynchronizer(subs, queue_size)
00076         sync.registerCallback(self._detect)
00077 
00078     def unsubscribe(self):
00079         self._sub.unregister()
00080         self._sub_rects.unregister()
00081 
00082     def _detect(self, imgmsg, rects_msg):
00083         bridge = cv_bridge.CvBridge()
00084         im_orig = bridge.imgmsg_to_cv2(imgmsg, desired_encoding='bgr8')
00085         im, im_scale = img_preprocessing(im_orig, self.pixel_means)
00086         rects_orig = jsk_recognition_utils.rects_msg_to_ndarray(rects_msg)
00087         if len(rects_orig) == 0:
00088             return
00089         rects = rects_orig * im_scale
00090         scores, bbox_pred = self._im_detect(im, rects)
00091 
00092         rects = RectArray(header=imgmsg.header)
00093         labels = []
00094         label_proba = []
00095         for cls_id in range(1, len(self.target_names)):
00096             _cls = scores[:, cls_id][:, np.newaxis]
00097             _bbx = bbox_pred[:, cls_id * 4: (cls_id + 1) * 4]
00098             dets = np.hstack((_bbx, _cls))
00099             keep = nms(dets, self.nms_thresh)
00100             dets = dets[keep, :]
00101             orig_rects = cuda.cupy.asnumpy(rects_orig)[keep, :]
00102 
00103             inds = np.where(dets[:, -1] >= self.conf_thresh)[0]
00104 
00105             for i in inds:
00106                 _bbox = dets[i, :4]
00107                 x1, y1, x2, y2 = orig_rects[i]
00108                 width = x2 - x1
00109                 height = y2 - y1
00110                 center_x = x1 + 0.5 * width
00111                 center_y = y1 + 0.5 * height
00112 
00113                 dx, dy, dw, dh = _bbox
00114                 _center_x = dx * width + center_x
00115                 _center_y = dy * height + center_y
00116                 _width = np.exp(dw) * width
00117                 _height = np.exp(dh) * height
00118 
00119                 x1 = _center_x - 0.5 * _width
00120                 y1 = _center_y - 0.5 * _height
00121                 x2 = _center_x + 0.5 * _width
00122                 y2 = _center_y + 0.5 * _height
00123                 rect = Rect(x=x1, y=y1, width=x2-x1, height=y2-y1)
00124                 rects.rects.append(rect)
00125                 labels.append(cls_id)
00126                 label_proba.append(dets[:, -1][i])
00127 
00128         # publish classification result
00129         clss = ClassificationResult(
00130             header=imgmsg.header,
00131             classifier=self.classifier_name,
00132             target_names=self.target_names,
00133             labels=labels,
00134             label_names=[self.target_names[l] for l in labels],
00135             label_proba=label_proba,
00136         )
00137         self._pub_rects.publish(rects)
00138         self._pub_class.publish(clss)
00139 
00140     def _im_detect(self, im, rects):
00141         xp = cuda.cupy if self.use_gpu else np
00142         im = xp.asarray(im)
00143         rects = xp.asarray(rects)
00144         x_data = im[xp.newaxis, :, :, :]
00145         # batch_indices is always 0 when batch size is 1
00146         batch_indices = xp.zeros((len(rects), 1), dtype=np.float32)
00147         rects = xp.hstack((batch_indices, rects))
00148         x = Variable(x_data, volatile=True)
00149         rects_val = Variable(rects, volatile=True)
00150         self.model.train = False
00151         cls_score, bbox_pred = self.model(x, rects_val)
00152         scores = cuda.to_cpu(cls_score.data)
00153         bbox_pred = cuda.to_cpu(bbox_pred.data)
00154         return scores, bbox_pred
00155 
00156 
00157 def main():
00158     rospy.init_node('fast_rcnn_caffenet')
00159 
00160     # get parameters
00161     try:
00162         model_name = rospy.get_param('~model')
00163     except KeyError as e:
00164         rospy.logerr('Unspecified rosparam: {0}'.format(e))
00165         sys.exit(1)
00166 
00167     # FIXME: In CPU mode, there is no detections.
00168     if not cuda.available:
00169         rospy.logfatal('CUDA environment is required.')
00170         sys.exit(1)
00171     use_gpu = True
00172 
00173     # setup model
00174     PKG = 'jsk_perception'
00175     rp = rospkg.RosPack()
00176     data_path = osp.join(rp.get_path(PKG), 'trained_data')
00177     if model_name == 'vgg_cnn_m_1024':
00178         model = VGG_CNN_M_1024()
00179         chainermodel = osp.join(data_path, 'vgg_cnn_m_1024.chainermodel')
00180     elif model_name == 'vgg16':
00181         model = VGG16FastRCNN()
00182         chainermodel = osp.join(data_path, 'vgg16_fast_rcnn.chainermodel')
00183     else:
00184         rospy.logerr('Unsupported model: {0}'.format(model_name))
00185         sys.exit(1)
00186     rospy.loginfo('Loading chainermodel')
00187     S.load_hdf5(chainermodel, model)
00188     if use_gpu:
00189         model.to_gpu()
00190     rospy.loginfo('Finished loading chainermodel')
00191 
00192     # assumptions
00193     target_names = [
00194         '__background__',
00195         'aeroplane',
00196         'bicycle',
00197         'bird',
00198         'boat',
00199         'bottle',
00200         'bus',
00201         'car',
00202         'cat',
00203         'chair',
00204         'cow',
00205         'diningtable',
00206         'dog',
00207         'horse',
00208         'motorbike',
00209         'person',
00210         'pottedplant',
00211         'sheep',
00212         'sofa',
00213         'train',
00214         'tvmonitor',
00215     ]
00216     pixel_means = [102.9801, 115.9465, 122.7717]
00217 
00218     fast_rcnn = FastRCNN(
00219         model=model, target_names=target_names,
00220         pixel_means=pixel_means, use_gpu=use_gpu)
00221     rospy.spin()
00222 
00223 
00224 if __name__ == '__main__':
00225     main()