00001
00002
00003 import os.path as osp
00004 import sys
00005
00006 import chainer
00007 from chainer import cuda
00008 import chainer.serializers as S
00009 from chainer import Variable
00010 import cv2
00011 from distutils.version import LooseVersion
00012 import numpy as np
00013
00014 import cv_bridge
00015 from dynamic_reconfigure.server import Server
00016 from jsk_perception.cfg import FastRCNNConfig as Config
00017 from jsk_recognition_msgs.msg import Rect, RectArray
00018 from jsk_recognition_msgs.msg import ClassificationResult
00019 import jsk_recognition_utils
00020 from jsk_recognition_utils.chainermodels import VGG16FastRCNN
00021 from jsk_recognition_utils.chainermodels import VGG_CNN_M_1024
00022 from jsk_recognition_utils.nms import nms
00023 from jsk_topic_tools import ConnectionBasedTransport
00024 import message_filters
00025 import rospkg
00026 import rospy
00027 from sensor_msgs.msg import Image
00028
00029
00030 def img_preprocessing(orig_img, pixel_means, max_size=1000, scale=600):
00031 img = orig_img.astype(np.float32, copy=True)
00032 img -= pixel_means
00033 im_size_min = np.min(img.shape[0:2])
00034 im_size_max = np.max(img.shape[0:2])
00035 im_scale = float(scale) / float(im_size_min)
00036 if np.rint(im_scale * im_size_max) > max_size:
00037 im_scale = float(max_size) / float(im_size_max)
00038 img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale,
00039 interpolation=cv2.INTER_LINEAR)
00040 return img.transpose([2, 0, 1]).astype(np.float32), im_scale
00041
00042
00043 class FastRCNN(ConnectionBasedTransport):
00044
00045 def __init__(self, model, target_names, pixel_means, use_gpu):
00046 super(FastRCNN, self).__init__()
00047
00048 self._srv = Server(Config, self.configCallback)
00049
00050 self.model = model
00051 self._pub_rects = self.advertise('~output/rect_array',
00052 RectArray, queue_size=1)
00053 self._pub_class = self.advertise('~output/class',
00054 ClassificationResult, queue_size=1)
00055 self.target_names = target_names
00056 self.pixel_means = np.array(pixel_means, dtype=np.float32)
00057 self.use_gpu = use_gpu
00058 self.classifier_name = rospy.get_param("~classifier_name", rospy.get_name())
00059
00060 def configCallback(self, config, level):
00061 self.nms_thresh = config.nms_thresh
00062 self.conf_thresh = config.conf_thresh
00063 return config
00064
00065 def subscribe(self):
00066 self._sub = message_filters.Subscriber('~input', Image)
00067 self._sub_rects = message_filters.Subscriber('~input/rect_array',
00068 RectArray)
00069 use_async = rospy.get_param('~approximate_sync', False)
00070 queue_size = rospy.get_param('~queue_size', 100)
00071 subs = [self._sub, self._sub_rects]
00072 if use_async:
00073 slop = rospy.get_param('~slop', 0.1)
00074 sync = message_filters.ApproximateTimeSynchronizer(
00075 subs, queue_size, slop)
00076 else:
00077 sync = message_filters.TimeSynchronizer(subs, queue_size)
00078 sync.registerCallback(self._detect)
00079
00080 def unsubscribe(self):
00081 self._sub.unregister()
00082 self._sub_rects.unregister()
00083
00084 def _detect(self, imgmsg, rects_msg):
00085 bridge = cv_bridge.CvBridge()
00086 im_orig = bridge.imgmsg_to_cv2(imgmsg, desired_encoding='bgr8')
00087 im, im_scale = img_preprocessing(im_orig, self.pixel_means)
00088 rects_orig = jsk_recognition_utils.rects_msg_to_ndarray(rects_msg)
00089 if len(rects_orig) == 0:
00090 return
00091 rects = rects_orig * im_scale
00092 scores, bbox_pred = self._im_detect(im, rects)
00093
00094 rects = RectArray(header=imgmsg.header)
00095 labels = []
00096 label_proba = []
00097 for cls_id in range(1, len(self.target_names)):
00098 _cls = scores[:, cls_id][:, np.newaxis]
00099 _bbx = bbox_pred[:, cls_id * 4: (cls_id + 1) * 4]
00100 dets = np.hstack((_bbx, _cls))
00101 keep = nms(dets, self.nms_thresh)
00102 dets = dets[keep, :]
00103 orig_rects = cuda.cupy.asnumpy(rects_orig)[keep, :]
00104
00105 inds = np.where(dets[:, -1] >= self.conf_thresh)[0]
00106
00107 for i in inds:
00108 _bbox = dets[i, :4]
00109 x1, y1, x2, y2 = orig_rects[i]
00110 width = x2 - x1
00111 height = y2 - y1
00112 center_x = x1 + 0.5 * width
00113 center_y = y1 + 0.5 * height
00114
00115 dx, dy, dw, dh = _bbox
00116 _center_x = dx * width + center_x
00117 _center_y = dy * height + center_y
00118 _width = np.exp(dw) * width
00119 _height = np.exp(dh) * height
00120
00121 x1 = _center_x - 0.5 * _width
00122 y1 = _center_y - 0.5 * _height
00123 x2 = _center_x + 0.5 * _width
00124 y2 = _center_y + 0.5 * _height
00125 rect = Rect(x=x1, y=y1, width=x2-x1, height=y2-y1)
00126 rects.rects.append(rect)
00127 labels.append(cls_id)
00128 label_proba.append(dets[:, -1][i])
00129
00130
00131 clss = ClassificationResult(
00132 header=imgmsg.header,
00133 classifier=self.classifier_name,
00134 target_names=self.target_names,
00135 labels=labels,
00136 label_names=[self.target_names[l] for l in labels],
00137 label_proba=label_proba,
00138 )
00139 self._pub_rects.publish(rects)
00140 self._pub_class.publish(clss)
00141
00142 def _im_detect(self, im, rects):
00143 xp = cuda.cupy if self.use_gpu else np
00144 im = xp.asarray(im)
00145 rects = xp.asarray(rects)
00146 x_data = im[xp.newaxis, :, :, :]
00147
00148 batch_indices = xp.zeros((len(rects), 1), dtype=np.float32)
00149 rects = xp.hstack((batch_indices, rects))
00150 if LooseVersion(chainer.__version__).version[0] < 2:
00151 x = Variable(x_data, volatile=True)
00152 rects_val = Variable(rects, volatile=True)
00153 self.model.train = False
00154 cls_score, bbox_pred = self.model(x, rects_val)
00155 else:
00156 with chainer.using_config('train', False), \
00157 chainer.no_backprop_mode():
00158 x = Variable(x_data)
00159 rects_val = Variable(rects)
00160 cls_score, bbox_pred = self.model(x, rects_val)
00161
00162 scores = cuda.to_cpu(cls_score.data)
00163 bbox_pred = cuda.to_cpu(bbox_pred.data)
00164 return scores, bbox_pred
00165
00166
00167 def main():
00168 rospy.init_node('fast_rcnn_caffenet')
00169
00170
00171 try:
00172 model_name = rospy.get_param('~model')
00173 except KeyError as e:
00174 rospy.logerr('Unspecified rosparam: {0}'.format(e))
00175 sys.exit(1)
00176
00177 gpu = rospy.get_param('~gpu', -1)
00178 use_gpu = True if gpu >= 0 else False
00179
00180
00181 PKG = 'jsk_perception'
00182 rp = rospkg.RosPack()
00183 data_path = osp.join(rp.get_path(PKG), 'trained_data')
00184 if model_name == 'vgg_cnn_m_1024':
00185 model = VGG_CNN_M_1024()
00186 chainermodel = osp.join(data_path, 'vgg_cnn_m_1024.chainermodel')
00187 elif model_name == 'vgg16':
00188 model = VGG16FastRCNN()
00189 chainermodel = osp.join(data_path, 'vgg16_fast_rcnn.chainermodel')
00190 else:
00191 rospy.logerr('Unsupported model: {0}'.format(model_name))
00192 sys.exit(1)
00193 rospy.loginfo('Loading chainermodel')
00194 S.load_hdf5(chainermodel, model)
00195 if use_gpu:
00196 model.to_gpu(gpu)
00197 rospy.loginfo('Finished loading chainermodel')
00198
00199
00200 target_names = [
00201 '__background__',
00202 'aeroplane',
00203 'bicycle',
00204 'bird',
00205 'boat',
00206 'bottle',
00207 'bus',
00208 'car',
00209 'cat',
00210 'chair',
00211 'cow',
00212 'diningtable',
00213 'dog',
00214 'horse',
00215 'motorbike',
00216 'person',
00217 'pottedplant',
00218 'sheep',
00219 'sofa',
00220 'train',
00221 'tvmonitor',
00222 ]
00223 pixel_means = [102.9801, 115.9465, 122.7717]
00224
00225 fast_rcnn = FastRCNN(
00226 model=model, target_names=target_names,
00227 pixel_means=pixel_means, use_gpu=use_gpu)
00228 rospy.spin()
00229
00230
00231 if __name__ == '__main__':
00232 main()