face_pose_estimation.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 # -*- coding: utf-8 -*-
00003 # Author: Furushchev <furushchev@jsk.imi.i.u-tokyo.ac.jp>
00004 
00005 import cv2
00006 import numpy as np
00007 import os
00008 import traceback
00009 
00010 import chainer
00011 from chainer.dataset import download
00012 from chainer.serializers import load_npz
00013 import chainer.functions as F
00014 import chainer.links as L
00015 
00016 import cv_bridge
00017 from dynamic_reconfigure.server import Server
00018 from jsk_topic_tools import ConnectionBasedTransport
00019 import message_filters
00020 import rospy
00021 import tf.transformations as T
00022 
00023 from geometry_msgs.msg import PoseArray
00024 from jsk_perception.cfg import FacePoseEstimationConfig as Config
00025 from jsk_recognition_msgs.msg import ClassificationResult
00026 from jsk_recognition_msgs.msg import Rect, RectArray
00027 from jsk_recognition_msgs.msg import PeoplePoseArray
00028 from sensor_msgs.msg import Image
00029 
00030 
00031 def _get(var):
00032     if isinstance(var, chainer.Variable):
00033         var = var.data
00034         if hasattr(var, 'get'):
00035             var = var.get()
00036     return var
00037 
00038 
00039 def _project_plane_yz(vec):
00040     x = vec.dot(np.array([0, 1, 0], dtype=np.float32))
00041     y = vec.dot(np.array([0, 0, 1], dtype=np.float32))
00042     return np.array([x, -y], dtype=np.float32)  # y flip
00043 
00044 
00045 def _draw_line(img, pt1, pt2, color, thickness=2):
00046     pt1 = (int(pt1[0]), int(pt1[1]))
00047     pt2 = (int(pt2[0]), int(pt2[1]))
00048     return cv2.line(img, pt1, pt2, color, int(thickness))
00049 
00050 
00051 class HyperFaceModel(chainer.Chain):
00052 
00053     def __init__(self, pretrained_model='auto'):
00054         super(HyperFaceModel, self).__init__(
00055             conv1  = L.Convolution2D(3,   96, 11, stride=4, pad=0),
00056             conv1a = L.Convolution2D(96,  256, 4, stride=4, pad=0),
00057             conv2  = L.Convolution2D(96,  256, 5, stride=1, pad=2),
00058             conv3  = L.Convolution2D(256, 384, 3, stride=1, pad=1),
00059             conv3a = L.Convolution2D(384, 256, 2, stride=2, pad=0),
00060             conv4  = L.Convolution2D(384, 384, 3, stride=1, pad=1),
00061             conv5  = L.Convolution2D(384, 256, 3, stride=1, pad=1),
00062             conv_all = L.Convolution2D(768, 192, 1, stride=1, pad=0),
00063             fc_full  = L.Linear(6 * 6 * 192, 3072),
00064             fc_detection1  = L.Linear(3072, 512),
00065             fc_detection2  = L.Linear(512,  2),
00066             fc_landmarks1  = L.Linear(3072, 512),
00067             fc_landmarks2  = L.Linear(512,  42),
00068             fc_visibility1 = L.Linear(3072, 512),
00069             fc_visibility2 = L.Linear(512,  21),
00070             fc_pose1       = L.Linear(3072, 512),
00071             fc_pose2       = L.Linear(512,  3),
00072             fc_gender1     = L.Linear(3072, 512),
00073             fc_gender2     = L.Linear(512,  2),
00074         )
00075 
00076         # download pretrained weights
00077         if pretrained_model == 'auto':
00078             rospy.loginfo("Loading pretrained model. (This may take some minutes.)")
00079             url = 'https://jsk-ros-pkg.s3.amazonaws.com/chainer/hyperface_model_epoch_190.npz'
00080             load_npz(download.cached_download(url), self)
00081             rospy.loginfo("Model loaded")
00082         elif pretrained_model:
00083             rospy.loginfo("Loading pretrained model: %s" % pretrained_model)
00084             load_npz(pretrained_model, self)
00085             rospy.loginfo("Model loaded")
00086         else:
00087             rospy.logwarn("No pretrained model is loaded.")
00088 
00089     def __call__(self, x):
00090         c1 = F.relu(self.conv1(x))
00091         m1 = F.max_pooling_2d(c1, 3, stride=2, pad=0)
00092         m1_n = F.local_response_normalization(m1)
00093         c1a = F.relu(self.conv1a(m1_n))
00094         c2 = F.relu(self.conv2(m1_n))
00095         m2 = F.max_pooling_2d(c2, 3, stride=2, pad=0)
00096         m2_n = F.local_response_normalization(m2)
00097         c3 = F.relu(self.conv3(m2_n))
00098         c3a = F.relu(self.conv3a(c3))
00099         c4 = F.relu(self.conv4(c3))
00100         c5 = F.relu(self.conv5(c4))
00101         m5 = F.max_pooling_2d(c5, 3, stride=2, pad=0)
00102 
00103         c = F.concat((c1a, c3a, m5))
00104 
00105         c_all = F.relu(self.conv_all(c))
00106         fc = F.relu(self.fc_full(c_all))
00107 
00108         detection = F.relu(self.fc_detection1(fc))
00109         detection = self.fc_detection2(detection)
00110         detection = F.softmax(detection)
00111         landmark = F.relu(self.fc_landmarks1(fc))
00112         landmark = self.fc_landmarks2(landmark)
00113         visibility = F.relu(self.fc_visibility1(fc))
00114         visibility = self.fc_visibility2(visibility)
00115         pose = F.relu(self.fc_pose1(fc))
00116         pose = self.fc_pose2(pose)
00117         gender = F.relu(self.fc_gender1(fc))
00118         gender = self.fc_gender2(gender)
00119         gender = F.softmax(gender)
00120 
00121         detection = F.softmax(detection)[:, 1]
00122         gender = F.softmax(gender)[:, 1]
00123 
00124         return {'detection': detection,
00125                 'landmark': landmark,
00126                 'visibility': visibility,
00127                 'gender': gender,
00128                 'pose': pose}
00129 
00130 
00131 class HyperFacePredictor(object):
00132     def __init__(self, model, gpu=-1):
00133         assert isinstance(model, HyperFaceModel)
00134         self.gpu = gpu
00135 
00136         model.train = False
00137         model.report = False
00138         model.backward = False
00139 
00140         if self.gpu >= 0:
00141             model.to_gpu(self.gpu)
00142 
00143         self.model = model
00144 
00145     def preprocess(self, img):
00146         # assertion
00147         assert img.size > 0
00148         orig_h, orig_w, _ = img.shape
00149         assert orig_h > 0 and orig_w > 0
00150 
00151         # transform image
00152         img = img.astype(np.float32) / 255.0
00153         img = cv2.resize(img, (227, 227))
00154         img = cv2.normalize(img, None, -0.5, 0.5, cv2.NORM_MINMAX)
00155         img = np.transpose(img, (2, 0, 1))  # CHW
00156 
00157         return img
00158 
00159     def forward(self, imgs):
00160         xp = self.model.xp
00161         imgs = xp.asarray([self.preprocess(img) for img in imgs])
00162 
00163         # forwarding
00164         x = chainer.Variable(imgs)
00165         if self.gpu >= 0:
00166             x.to_gpu()
00167         y = self.model(x)
00168 
00169         detection = _get(y["detection"])
00170         landmark = _get(y["landmark"])
00171         visibility = _get(y["visibility"])
00172         pose = _get(y["pose"])
00173         gender = _get(y["gender"])
00174 
00175         result = []
00176         for i in range(len(detection)):
00177             result.append({
00178                 "detection": detection[i],
00179                 "landmark": landmark[i],
00180                 "visibility": visibility[i],
00181                 "pose": pose[i],
00182                 "gender": gender[i]
00183             })
00184         return result
00185 
00186     def __call__(self, imgs):
00187         if self.gpu >= 0:
00188             chainer.cuda.get_device_from_id(self.gpu).use()
00189         return self.forward(imgs)
00190 
00191 
00192 class FacePoseEstimator(ConnectionBasedTransport):
00193     def __init__(self):
00194         super(FacePoseEstimator, self).__init__()
00195 
00196         self.cv_bridge = cv_bridge.CvBridge()
00197 
00198         gpu = rospy.get_param("~gpu", -1)  # -1 == cpu only
00199 
00200         model = HyperFaceModel(pretrained_model=rospy.get_param("~model_path", None))
00201         self.predictor = HyperFacePredictor(model=model, gpu=gpu)
00202         rospy.loginfo("hyperface predictor initialized ({})".format(
00203             "GPU: %d" % gpu if gpu >= 0 else "CPU mode"))
00204 
00205         self.approximate_sync = False
00206         self.queue_size = 10
00207         self.slop = 0.1
00208         self.classifier_name = rospy.get_param("~classifier_name", rospy.get_name())
00209         self.srv = Server(Config, self.config_callback)
00210 
00211         self.pub_pose = self.advertise("~output/pose", PoseArray, queue_size=1)
00212         self.pub_gender = self.advertise("~output/gender", ClassificationResult, queue_size=1)
00213         self.pub_rect = self.advertise("~output/rects", RectArray, queue_size=1)
00214         self.pub_debug_image = self.advertise("~output/debug", Image, queue_size=1)
00215 
00216     @property
00217     def visualize(self):
00218         return self.pub_debug_image.get_num_connections() > 0
00219 
00220     def config_callback(self, config, level):
00221         need_resubscribe = (
00222             self.approximate_sync != config.approximate_sync or
00223             self.queue_size != config.queue_size or
00224             self.slop != self.slop)
00225         self.approximate_sync = config.approximate_sync
00226         self.queue_size = config.queue_size
00227         self.slop = config.slop
00228 
00229         self.face_padding = config.face_padding
00230         self.face_threshold = config.face_threshold
00231 
00232         if need_resubscribe and self.is_subscribed():
00233             self.unsubscribe()
00234             self.subscribe()
00235 
00236         return config
00237 
00238     def subscribe(self):
00239         sub_image = message_filters.Subscriber("~input", Image)
00240         self.subscribers = [
00241             message_filters.Subscriber("~input", Image),
00242             message_filters.Subscriber("~input/pose_2d", PeoplePoseArray),
00243             message_filters.Subscriber("~input/pose", PeoplePoseArray),
00244         ]
00245 
00246         if self.approximate_sync:
00247             self.sync = message_filters.ApproximateTimeSynchronizer(
00248                 self.subscribers, self.queue_size, self.slop)
00249         else:
00250             self.sync = message_filters.TimeSynchronizer(
00251                 self.subscribers, self.queue_size)
00252         self.sync.registerCallback(self.callback)
00253 
00254     def unsubscribe(self):
00255         for s in self.subscribers:
00256             s.unregister()
00257 
00258     def callback(self, img, pose2d, pose3d):
00259         header = img.header
00260         try:
00261             img = self.cv_bridge.imgmsg_to_cv2(img, "bgr8")
00262         except cv_bridge.CvBridgeError as e:
00263             rospy.logerr("Failed to convert image: %s" % str(e))
00264             rospy.logerr(traceback.format_exc())
00265             return
00266 
00267         faces = []
00268         rects = []
00269         face_origins = []
00270         face_2d_origins = []
00271         for p2d, p3d in zip(pose2d.poses, pose3d.poses):
00272             try:
00273                 score = p2d.scores[p2d.limb_names.index("Nose")]
00274                 if score < self.face_threshold:
00275                     continue
00276                 nose = p2d.poses[p2d.limb_names.index("Nose")]
00277                 neck = p2d.poses[p2d.limb_names.index("Neck")]
00278                 width_half = np.sqrt((neck.position.x - nose.position.x) ** 2 +
00279                                      (neck.position.y - nose.position.y) ** 2)
00280                 width_half *= 1.0 + self.face_padding
00281                 rect = Rect(x=int(nose.position.x-width_half),
00282                             y=int(nose.position.y-width_half),
00283                             width=int(width_half*2),
00284                             height=int(width_half*2))
00285                 face_origin = p3d.poses[p3d.limb_names.index("Nose")]
00286                 face_2d_origin = nose
00287             except ValueError:
00288                 continue
00289 
00290             try:
00291                 face = img[rect.y:rect.y+rect.height, rect.x:rect.x+rect.width]
00292                 if face.size <= 0:
00293                     continue
00294             except Exception as e:
00295                 rospy.logerr("Failed to crop image: %s" % str(e))
00296                 rospy.logerr(traceback.format_exc())
00297                 continue
00298             rects.append(rect)
00299             face_origins.append(face_origin)
00300             face_2d_origins.append(face_2d_origin)
00301             faces.append(face)
00302 
00303         if not faces:
00304             rospy.logdebug("No face found")
00305             return
00306 
00307         try:
00308             results = self.predictor(faces)
00309         except OverflowError:
00310             rospy.logfatal(traceback.format_exc())
00311             rospy.signal_shutdown("shutdown")
00312         except Exception as e:
00313             rospy.logerr("Failed to predict: %s" % str(e))
00314             rospy.logerr(traceback.format_exc())
00315             return
00316 
00317         for i in range(len(results)):
00318             results[i].update({
00319                 "face_origin": face_origins[i],
00320                 "face_2d_origin": face_2d_origins[i],
00321                 "rect": rects[i],
00322             })
00323 
00324         self.publish_face_rects(header, results)
00325         self.publish_gender(header, results)
00326         self.publish_pose(header, results, img)
00327 
00328     def publish_face_rects(self, header, results):
00329         rects = RectArray(
00330             header=header,
00331             rects=[r["rect"] for r in results],
00332         )
00333         self.pub_rect.publish(rects)
00334 
00335     def publish_gender(self, header, results):
00336         target_names = ["Male", "Female"]
00337         labels = [0 if r["gender"] < 0.5 else 1 for r in results]
00338         msg = ClassificationResult(
00339             header=header,
00340             classifier=self.classifier_name,
00341             target_names=target_names,
00342             labels=labels,
00343             label_names=[target_names[l] for l in labels],
00344             label_proba=[r["detection"] for r in results],
00345         )
00346         self.pub_gender.publish(msg)
00347 
00348     def publish_pose(self, header, results, img):
00349         msg = PoseArray(header=header)
00350         for r in results:
00351             pose = r["face_origin"]
00352             pose_2d = r['face_2d_origin']
00353             ori = r["pose"]
00354             mat = T.euler_matrix(-ori[0], -ori[1], -ori[2])
00355             rotmat = mat[:3, :3]
00356             quat = T.quaternion_from_matrix(mat)
00357             quat = T.quaternion_multiply(
00358                 [0.5, 0.5, -0.5, 0.5], quat)
00359 
00360             pose.orientation.x = quat[0]
00361             pose.orientation.y = quat[1]
00362             pose.orientation.z = quat[2]
00363             pose.orientation.w = quat[3]
00364             msg.poses.append(pose)
00365 
00366             if self.visualize:
00367                 zvec = np.array([0, 0, 1], np.float32)
00368                 yvec = np.array([0, 1, 0], np.float32)
00369                 xvec = np.array([1, 0, 0], np.float32)
00370                 zvec = _project_plane_yz(rotmat.dot(zvec)) * 50.0
00371                 yvec = _project_plane_yz(rotmat.dot(yvec)) * 50.0
00372                 xvec = _project_plane_yz(rotmat.dot(xvec)) * 50.0
00373 
00374                 face_2d_center = np.array([pose_2d.position.x, pose_2d.position.y])
00375                 img = _draw_line(img, face_2d_center,
00376                                  face_2d_center + zvec, (255, 0, 0), 3)
00377                 img = _draw_line(img, face_2d_center,
00378                                  face_2d_center + yvec, (0, 255, 0), 3)
00379                 img = _draw_line(img, face_2d_center,
00380                                  face_2d_center + xvec, (0, 0, 255), 3)
00381 
00382         self.pub_pose.publish(msg)
00383         if self.visualize:
00384             img_msg = self.cv_bridge.cv2_to_imgmsg(img, "bgr8")
00385             img_msg.header = header
00386             self.pub_debug_image.publish(img_msg)
00387 
00388 
00389 if __name__ == '__main__':
00390     rospy.init_node("face_pose_estimation")
00391     node = FacePoseEstimator()
00392     rospy.spin()


jsk_perception
Author(s): Manabu Saito, Ryohei Ueda
autogenerated on Tue Jul 2 2019 19:41:07