00001
00002
00003
00004
00005 import cv2
00006 import numpy as np
00007 import os
00008 import traceback
00009
00010 import chainer
00011 from chainer.dataset import download
00012 from chainer.serializers import load_npz
00013 import chainer.functions as F
00014 import chainer.links as L
00015
00016 import cv_bridge
00017 from dynamic_reconfigure.server import Server
00018 from jsk_topic_tools import ConnectionBasedTransport
00019 import message_filters
00020 import rospy
00021 import tf.transformations as T
00022
00023 from geometry_msgs.msg import PoseArray
00024 from jsk_perception.cfg import FacePoseEstimationConfig as Config
00025 from jsk_recognition_msgs.msg import ClassificationResult
00026 from jsk_recognition_msgs.msg import Rect, RectArray
00027 from jsk_recognition_msgs.msg import PeoplePoseArray
00028 from sensor_msgs.msg import Image
00029
00030
00031 def _get(var):
00032 if isinstance(var, chainer.Variable):
00033 var = var.data
00034 if hasattr(var, 'get'):
00035 var = var.get()
00036 return var
00037
00038
00039 def _project_plane_yz(vec):
00040 x = vec.dot(np.array([0, 1, 0], dtype=np.float32))
00041 y = vec.dot(np.array([0, 0, 1], dtype=np.float32))
00042 return np.array([x, -y], dtype=np.float32)
00043
00044
00045 def _draw_line(img, pt1, pt2, color, thickness=2):
00046 pt1 = (int(pt1[0]), int(pt1[1]))
00047 pt2 = (int(pt2[0]), int(pt2[1]))
00048 return cv2.line(img, pt1, pt2, color, int(thickness))
00049
00050
00051 class HyperFaceModel(chainer.Chain):
00052
00053 def __init__(self, pretrained_model='auto'):
00054 super(HyperFaceModel, self).__init__(
00055 conv1 = L.Convolution2D(3, 96, 11, stride=4, pad=0),
00056 conv1a = L.Convolution2D(96, 256, 4, stride=4, pad=0),
00057 conv2 = L.Convolution2D(96, 256, 5, stride=1, pad=2),
00058 conv3 = L.Convolution2D(256, 384, 3, stride=1, pad=1),
00059 conv3a = L.Convolution2D(384, 256, 2, stride=2, pad=0),
00060 conv4 = L.Convolution2D(384, 384, 3, stride=1, pad=1),
00061 conv5 = L.Convolution2D(384, 256, 3, stride=1, pad=1),
00062 conv_all = L.Convolution2D(768, 192, 1, stride=1, pad=0),
00063 fc_full = L.Linear(6 * 6 * 192, 3072),
00064 fc_detection1 = L.Linear(3072, 512),
00065 fc_detection2 = L.Linear(512, 2),
00066 fc_landmarks1 = L.Linear(3072, 512),
00067 fc_landmarks2 = L.Linear(512, 42),
00068 fc_visibility1 = L.Linear(3072, 512),
00069 fc_visibility2 = L.Linear(512, 21),
00070 fc_pose1 = L.Linear(3072, 512),
00071 fc_pose2 = L.Linear(512, 3),
00072 fc_gender1 = L.Linear(3072, 512),
00073 fc_gender2 = L.Linear(512, 2),
00074 )
00075
00076
00077 if pretrained_model == 'auto':
00078 rospy.loginfo("Loading pretrained model. (This may take some minutes.)")
00079 url = 'https://jsk-ros-pkg.s3.amazonaws.com/chainer/hyperface_model_epoch_190.npz'
00080 load_npz(download.cached_download(url), self)
00081 rospy.loginfo("Model loaded")
00082 elif pretrained_model:
00083 rospy.loginfo("Loading pretrained model: %s" % pretrained_model)
00084 load_npz(pretrained_model, self)
00085 rospy.loginfo("Model loaded")
00086 else:
00087 rospy.logwarn("No pretrained model is loaded.")
00088
00089 def __call__(self, x):
00090 c1 = F.relu(self.conv1(x))
00091 m1 = F.max_pooling_2d(c1, 3, stride=2, pad=0)
00092 m1_n = F.local_response_normalization(m1)
00093 c1a = F.relu(self.conv1a(m1_n))
00094 c2 = F.relu(self.conv2(m1_n))
00095 m2 = F.max_pooling_2d(c2, 3, stride=2, pad=0)
00096 m2_n = F.local_response_normalization(m2)
00097 c3 = F.relu(self.conv3(m2_n))
00098 c3a = F.relu(self.conv3a(c3))
00099 c4 = F.relu(self.conv4(c3))
00100 c5 = F.relu(self.conv5(c4))
00101 m5 = F.max_pooling_2d(c5, 3, stride=2, pad=0)
00102
00103 c = F.concat((c1a, c3a, m5))
00104
00105 c_all = F.relu(self.conv_all(c))
00106 fc = F.relu(self.fc_full(c_all))
00107
00108 detection = F.relu(self.fc_detection1(fc))
00109 detection = self.fc_detection2(detection)
00110 detection = F.softmax(detection)
00111 landmark = F.relu(self.fc_landmarks1(fc))
00112 landmark = self.fc_landmarks2(landmark)
00113 visibility = F.relu(self.fc_visibility1(fc))
00114 visibility = self.fc_visibility2(visibility)
00115 pose = F.relu(self.fc_pose1(fc))
00116 pose = self.fc_pose2(pose)
00117 gender = F.relu(self.fc_gender1(fc))
00118 gender = self.fc_gender2(gender)
00119 gender = F.softmax(gender)
00120
00121 detection = F.softmax(detection)[:, 1]
00122 gender = F.softmax(gender)[:, 1]
00123
00124 return {'detection': detection,
00125 'landmark': landmark,
00126 'visibility': visibility,
00127 'gender': gender,
00128 'pose': pose}
00129
00130
00131 class HyperFacePredictor(object):
00132 def __init__(self, model, gpu=-1):
00133 assert isinstance(model, HyperFaceModel)
00134 self.gpu = gpu
00135
00136 model.train = False
00137 model.report = False
00138 model.backward = False
00139
00140 if self.gpu >= 0:
00141 model.to_gpu(self.gpu)
00142
00143 self.model = model
00144
00145 def preprocess(self, img):
00146
00147 assert img.size > 0
00148 orig_h, orig_w, _ = img.shape
00149 assert orig_h > 0 and orig_w > 0
00150
00151
00152 img = img.astype(np.float32) / 255.0
00153 img = cv2.resize(img, (227, 227))
00154 img = cv2.normalize(img, None, -0.5, 0.5, cv2.NORM_MINMAX)
00155 img = np.transpose(img, (2, 0, 1))
00156
00157 return img
00158
00159 def forward(self, imgs):
00160 xp = self.model.xp
00161 imgs = xp.asarray([self.preprocess(img) for img in imgs])
00162
00163
00164 x = chainer.Variable(imgs)
00165 if self.gpu >= 0:
00166 x.to_gpu()
00167 y = self.model(x)
00168
00169 detection = _get(y["detection"])
00170 landmark = _get(y["landmark"])
00171 visibility = _get(y["visibility"])
00172 pose = _get(y["pose"])
00173 gender = _get(y["gender"])
00174
00175 result = []
00176 for i in range(len(detection)):
00177 result.append({
00178 "detection": detection[i],
00179 "landmark": landmark[i],
00180 "visibility": visibility[i],
00181 "pose": pose[i],
00182 "gender": gender[i]
00183 })
00184 return result
00185
00186 def __call__(self, imgs):
00187 if self.gpu >= 0:
00188 chainer.cuda.get_device_from_id(self.gpu).use()
00189 return self.forward(imgs)
00190
00191
00192 class FacePoseEstimator(ConnectionBasedTransport):
00193 def __init__(self):
00194 super(FacePoseEstimator, self).__init__()
00195
00196 self.cv_bridge = cv_bridge.CvBridge()
00197
00198 gpu = rospy.get_param("~gpu", -1)
00199
00200 model = HyperFaceModel(pretrained_model=rospy.get_param("~model_path", None))
00201 self.predictor = HyperFacePredictor(model=model, gpu=gpu)
00202 rospy.loginfo("hyperface predictor initialized ({})".format(
00203 "GPU: %d" % gpu if gpu >= 0 else "CPU mode"))
00204
00205 self.approximate_sync = False
00206 self.queue_size = 10
00207 self.slop = 0.1
00208 self.classifier_name = rospy.get_param("~classifier_name", rospy.get_name())
00209 self.srv = Server(Config, self.config_callback)
00210
00211 self.pub_pose = self.advertise("~output/pose", PoseArray, queue_size=1)
00212 self.pub_gender = self.advertise("~output/gender", ClassificationResult, queue_size=1)
00213 self.pub_rect = self.advertise("~output/rects", RectArray, queue_size=1)
00214 self.pub_debug_image = self.advertise("~output/debug", Image, queue_size=1)
00215
00216 @property
00217 def visualize(self):
00218 return self.pub_debug_image.get_num_connections() > 0
00219
00220 def config_callback(self, config, level):
00221 need_resubscribe = (
00222 self.approximate_sync != config.approximate_sync or
00223 self.queue_size != config.queue_size or
00224 self.slop != self.slop)
00225 self.approximate_sync = config.approximate_sync
00226 self.queue_size = config.queue_size
00227 self.slop = config.slop
00228
00229 self.face_padding = config.face_padding
00230 self.face_threshold = config.face_threshold
00231
00232 if need_resubscribe and self.is_subscribed():
00233 self.unsubscribe()
00234 self.subscribe()
00235
00236 return config
00237
00238 def subscribe(self):
00239 sub_image = message_filters.Subscriber("~input", Image)
00240 self.subscribers = [
00241 message_filters.Subscriber("~input", Image),
00242 message_filters.Subscriber("~input/pose_2d", PeoplePoseArray),
00243 message_filters.Subscriber("~input/pose", PeoplePoseArray),
00244 ]
00245
00246 if self.approximate_sync:
00247 self.sync = message_filters.ApproximateTimeSynchronizer(
00248 self.subscribers, self.queue_size, self.slop)
00249 else:
00250 self.sync = message_filters.TimeSynchronizer(
00251 self.subscribers, self.queue_size)
00252 self.sync.registerCallback(self.callback)
00253
00254 def unsubscribe(self):
00255 for s in self.subscribers:
00256 s.unregister()
00257
00258 def callback(self, img, pose2d, pose3d):
00259 header = img.header
00260 try:
00261 img = self.cv_bridge.imgmsg_to_cv2(img, "bgr8")
00262 except cv_bridge.CvBridgeError as e:
00263 rospy.logerr("Failed to convert image: %s" % str(e))
00264 rospy.logerr(traceback.format_exc())
00265 return
00266
00267 faces = []
00268 rects = []
00269 face_origins = []
00270 face_2d_origins = []
00271 for p2d, p3d in zip(pose2d.poses, pose3d.poses):
00272 try:
00273 score = p2d.scores[p2d.limb_names.index("Nose")]
00274 if score < self.face_threshold:
00275 continue
00276 nose = p2d.poses[p2d.limb_names.index("Nose")]
00277 neck = p2d.poses[p2d.limb_names.index("Neck")]
00278 width_half = np.sqrt((neck.position.x - nose.position.x) ** 2 +
00279 (neck.position.y - nose.position.y) ** 2)
00280 width_half *= 1.0 + self.face_padding
00281 rect = Rect(x=int(nose.position.x-width_half),
00282 y=int(nose.position.y-width_half),
00283 width=int(width_half*2),
00284 height=int(width_half*2))
00285 face_origin = p3d.poses[p3d.limb_names.index("Nose")]
00286 face_2d_origin = nose
00287 except ValueError:
00288 continue
00289
00290 try:
00291 face = img[rect.y:rect.y+rect.height, rect.x:rect.x+rect.width]
00292 if face.size <= 0:
00293 continue
00294 except Exception as e:
00295 rospy.logerr("Failed to crop image: %s" % str(e))
00296 rospy.logerr(traceback.format_exc())
00297 continue
00298 rects.append(rect)
00299 face_origins.append(face_origin)
00300 face_2d_origins.append(face_2d_origin)
00301 faces.append(face)
00302
00303 if not faces:
00304 rospy.logdebug("No face found")
00305 return
00306
00307 try:
00308 results = self.predictor(faces)
00309 except OverflowError:
00310 rospy.logfatal(traceback.format_exc())
00311 rospy.signal_shutdown("shutdown")
00312 except Exception as e:
00313 rospy.logerr("Failed to predict: %s" % str(e))
00314 rospy.logerr(traceback.format_exc())
00315 return
00316
00317 for i in range(len(results)):
00318 results[i].update({
00319 "face_origin": face_origins[i],
00320 "face_2d_origin": face_2d_origins[i],
00321 "rect": rects[i],
00322 })
00323
00324 self.publish_face_rects(header, results)
00325 self.publish_gender(header, results)
00326 self.publish_pose(header, results, img)
00327
00328 def publish_face_rects(self, header, results):
00329 rects = RectArray(
00330 header=header,
00331 rects=[r["rect"] for r in results],
00332 )
00333 self.pub_rect.publish(rects)
00334
00335 def publish_gender(self, header, results):
00336 target_names = ["Male", "Female"]
00337 labels = [0 if r["gender"] < 0.5 else 1 for r in results]
00338 msg = ClassificationResult(
00339 header=header,
00340 classifier=self.classifier_name,
00341 target_names=target_names,
00342 labels=labels,
00343 label_names=[target_names[l] for l in labels],
00344 label_proba=[r["detection"] for r in results],
00345 )
00346 self.pub_gender.publish(msg)
00347
00348 def publish_pose(self, header, results, img):
00349 msg = PoseArray(header=header)
00350 for r in results:
00351 pose = r["face_origin"]
00352 pose_2d = r['face_2d_origin']
00353 ori = r["pose"]
00354 mat = T.euler_matrix(-ori[0], -ori[1], -ori[2])
00355 rotmat = mat[:3, :3]
00356 quat = T.quaternion_from_matrix(mat)
00357 quat = T.quaternion_multiply(
00358 [0.5, 0.5, -0.5, 0.5], quat)
00359
00360 pose.orientation.x = quat[0]
00361 pose.orientation.y = quat[1]
00362 pose.orientation.z = quat[2]
00363 pose.orientation.w = quat[3]
00364 msg.poses.append(pose)
00365
00366 if self.visualize:
00367 zvec = np.array([0, 0, 1], np.float32)
00368 yvec = np.array([0, 1, 0], np.float32)
00369 xvec = np.array([1, 0, 0], np.float32)
00370 zvec = _project_plane_yz(rotmat.dot(zvec)) * 50.0
00371 yvec = _project_plane_yz(rotmat.dot(yvec)) * 50.0
00372 xvec = _project_plane_yz(rotmat.dot(xvec)) * 50.0
00373
00374 face_2d_center = np.array([pose_2d.position.x, pose_2d.position.y])
00375 img = _draw_line(img, face_2d_center,
00376 face_2d_center + zvec, (255, 0, 0), 3)
00377 img = _draw_line(img, face_2d_center,
00378 face_2d_center + yvec, (0, 255, 0), 3)
00379 img = _draw_line(img, face_2d_center,
00380 face_2d_center + xvec, (0, 0, 255), 3)
00381
00382 self.pub_pose.publish(msg)
00383 if self.visualize:
00384 img_msg = self.cv_bridge.cv2_to_imgmsg(img, "bgr8")
00385 img_msg.header = header
00386 self.pub_debug_image.publish(img_msg)
00387
00388
00389 if __name__ == '__main__':
00390 rospy.init_node("face_pose_estimation")
00391 node = FacePoseEstimator()
00392 rospy.spin()