face_pose_estimation.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # Author: Furushchev <furushchev@jsk.imi.i.u-tokyo.ac.jp>
4 
5 from __future__ import print_function
6 
7 import cv2
8 import numpy as np
9 import os
10 import traceback
11 
12 import itertools, pkg_resources, sys
13 from distutils.version import LooseVersion
14 if LooseVersion(pkg_resources.get_distribution("chainer").version) >= LooseVersion('7.0.0') and \
15  sys.version_info.major == 2:
16  print('''Please install chainer < 7.0.0:
17 
18  sudo pip install chainer==6.7.0
19 
20 c.f https://github.com/jsk-ros-pkg/jsk_recognition/pull/2485
21 ''', file=sys.stderr)
22  sys.exit(1)
23 if [p for p in list(itertools.chain(*[pkg_resources.find_distributions(_) for _ in sys.path])) if "cupy-" in p.project_name ] == []:
24  print('''Please install CuPy
25 
26  sudo pip install cupy-cuda[your cuda version]
27 i.e.
28  sudo pip install cupy-cuda91
29 
30 ''', file=sys.stderr)
31  # sys.exit(1)
32 import chainer
33 from chainer.dataset import download
34 from chainer.serializers import load_npz
35 import chainer.functions as F
36 import chainer.links as L
37 
38 import cv_bridge
39 from dynamic_reconfigure.server import Server
40 from jsk_topic_tools import ConnectionBasedTransport
41 import message_filters
42 import rospy
43 import tf.transformations as T
44 
45 from geometry_msgs.msg import PoseArray
46 from jsk_perception.cfg import FacePoseEstimationConfig as Config
47 from jsk_recognition_msgs.msg import ClassificationResult
48 from jsk_recognition_msgs.msg import Rect, RectArray
49 from jsk_recognition_msgs.msg import PeoplePoseArray
50 from sensor_msgs.msg import Image
51 
52 
53 def _get(var):
54  if isinstance(var, chainer.Variable):
55  var = var.data
56  if hasattr(var, 'get'):
57  var = var.get()
58  return var
59 
60 
62  x = vec.dot(np.array([0, 1, 0], dtype=np.float32))
63  y = vec.dot(np.array([0, 0, 1], dtype=np.float32))
64  return np.array([x, -y], dtype=np.float32) # y flip
65 
66 
67 def _draw_line(img, pt1, pt2, color, thickness=2):
68  pt1 = (int(pt1[0]), int(pt1[1]))
69  pt2 = (int(pt2[0]), int(pt2[1]))
70  return cv2.line(img, pt1, pt2, color, int(thickness))
71 
72 
73 class HyperFaceModel(chainer.Chain):
74 
75  def __init__(self, pretrained_model='auto'):
76  super(HyperFaceModel, self).__init__(
77  conv1 = L.Convolution2D(3, 96, 11, stride=4, pad=0),
78  conv1a = L.Convolution2D(96, 256, 4, stride=4, pad=0),
79  conv2 = L.Convolution2D(96, 256, 5, stride=1, pad=2),
80  conv3 = L.Convolution2D(256, 384, 3, stride=1, pad=1),
81  conv3a = L.Convolution2D(384, 256, 2, stride=2, pad=0),
82  conv4 = L.Convolution2D(384, 384, 3, stride=1, pad=1),
83  conv5 = L.Convolution2D(384, 256, 3, stride=1, pad=1),
84  conv_all = L.Convolution2D(768, 192, 1, stride=1, pad=0),
85  fc_full = L.Linear(6 * 6 * 192, 3072),
86  fc_detection1 = L.Linear(3072, 512),
87  fc_detection2 = L.Linear(512, 2),
88  fc_landmarks1 = L.Linear(3072, 512),
89  fc_landmarks2 = L.Linear(512, 42),
90  fc_visibility1 = L.Linear(3072, 512),
91  fc_visibility2 = L.Linear(512, 21),
92  fc_pose1 = L.Linear(3072, 512),
93  fc_pose2 = L.Linear(512, 3),
94  fc_gender1 = L.Linear(3072, 512),
95  fc_gender2 = L.Linear(512, 2),
96  )
97 
98  # download pretrained weights
99  if pretrained_model == 'auto':
100  rospy.loginfo("Loading pretrained model. (This may take some minutes.)")
101  url = 'https://jsk-ros-pkg.s3.amazonaws.com/chainer/hyperface_model_epoch_190.npz'
102  load_npz(download.cached_download(url), self)
103  rospy.loginfo("Model loaded")
104  elif pretrained_model:
105  rospy.loginfo("Loading pretrained model: %s" % pretrained_model)
106  load_npz(pretrained_model, self)
107  rospy.loginfo("Model loaded")
108  else:
109  rospy.logwarn("No pretrained model is loaded.")
110 
111  def __call__(self, x):
112  c1 = F.relu(self.conv1(x))
113  m1 = F.max_pooling_2d(c1, 3, stride=2, pad=0)
114  m1_n = F.local_response_normalization(m1)
115  c1a = F.relu(self.conv1a(m1_n))
116  c2 = F.relu(self.conv2(m1_n))
117  m2 = F.max_pooling_2d(c2, 3, stride=2, pad=0)
118  m2_n = F.local_response_normalization(m2)
119  c3 = F.relu(self.conv3(m2_n))
120  c3a = F.relu(self.conv3a(c3))
121  c4 = F.relu(self.conv4(c3))
122  c5 = F.relu(self.conv5(c4))
123  m5 = F.max_pooling_2d(c5, 3, stride=2, pad=0)
124 
125  c = F.concat((c1a, c3a, m5))
126 
127  c_all = F.relu(self.conv_all(c))
128  fc = F.relu(self.fc_full(c_all))
129 
130  detection = F.relu(self.fc_detection1(fc))
131  detection = self.fc_detection2(detection)
132  detection = F.softmax(detection)
133  landmark = F.relu(self.fc_landmarks1(fc))
134  landmark = self.fc_landmarks2(landmark)
135  visibility = F.relu(self.fc_visibility1(fc))
136  visibility = self.fc_visibility2(visibility)
137  pose = F.relu(self.fc_pose1(fc))
138  pose = self.fc_pose2(pose)
139  gender = F.relu(self.fc_gender1(fc))
140  gender = self.fc_gender2(gender)
141  gender = F.softmax(gender)
142 
143  detection = F.softmax(detection)[:, 1]
144  gender = F.softmax(gender)[:, 1]
145 
146  return {'detection': detection,
147  'landmark': landmark,
148  'visibility': visibility,
149  'gender': gender,
150  'pose': pose}
151 
152 
154  def __init__(self, model, gpu=-1):
155  assert isinstance(model, HyperFaceModel)
156  self.gpu = gpu
157 
158  model.train = False
159  model.report = False
160  model.backward = False
161 
162  if self.gpu >= 0:
163  model.to_gpu(self.gpu)
164 
165  self.model = model
166 
167  def preprocess(self, img):
168  # assertion
169  assert img.size > 0
170  orig_h, orig_w, _ = img.shape
171  assert orig_h > 0 and orig_w > 0
172 
173  # transform image
174  img = img.astype(np.float32) / 255.0
175  img = cv2.resize(img, (227, 227))
176  img = cv2.normalize(img, None, -0.5, 0.5, cv2.NORM_MINMAX)
177  img = np.transpose(img, (2, 0, 1)) # CHW
178 
179  return img
180 
181  def forward(self, imgs):
182  xp = self.model.xp
183  imgs = xp.asarray([self.preprocess(img) for img in imgs])
184 
185  # forwarding
186  x = chainer.Variable(imgs)
187  if self.gpu >= 0:
188  x.to_gpu()
189  y = self.model(x)
190 
191  detection = _get(y["detection"])
192  landmark = _get(y["landmark"])
193  visibility = _get(y["visibility"])
194  pose = _get(y["pose"])
195  gender = _get(y["gender"])
196 
197  result = []
198  for i in range(len(detection)):
199  result.append({
200  "detection": detection[i],
201  "landmark": landmark[i],
202  "visibility": visibility[i],
203  "pose": pose[i],
204  "gender": gender[i]
205  })
206  return result
207 
208  def __call__(self, imgs):
209  if self.gpu >= 0:
210  chainer.cuda.get_device_from_id(self.gpu).use()
211  return self.forward(imgs)
212 
213 
214 class FacePoseEstimator(ConnectionBasedTransport):
215  def __init__(self):
216  super(FacePoseEstimator, self).__init__()
217 
218  self.cv_bridge = cv_bridge.CvBridge()
219 
220  gpu = rospy.get_param("~gpu", -1) # -1 == cpu only
221 
222  model = HyperFaceModel(pretrained_model=rospy.get_param("~model_path", None))
223  self.predictor = HyperFacePredictor(model=model, gpu=gpu)
224  rospy.loginfo("hyperface predictor initialized ({})".format(
225  "GPU: %d" % gpu if gpu >= 0 else "CPU mode"))
226 
227  self.approximate_sync = False
228  self.queue_size = 10
229  self.slop = 0.1
230  self.classifier_name = rospy.get_param("~classifier_name", rospy.get_name())
231  self.srv = Server(Config, self.config_callback)
232 
233  self.pub_pose = self.advertise("~output/pose", PoseArray, queue_size=1)
234  self.pub_gender = self.advertise("~output/gender", ClassificationResult, queue_size=1)
235  self.pub_rect = self.advertise("~output/rects", RectArray, queue_size=1)
236  self.pub_debug_image = self.advertise("~output/debug", Image, queue_size=1)
237 
238  @property
239  def visualize(self):
240  return self.pub_debug_image.get_num_connections() > 0
241 
242  def config_callback(self, config, level):
243  need_resubscribe = (
244  self.approximate_sync != config.approximate_sync or
245  self.queue_size != config.queue_size or
246  self.slop != self.slop)
247  self.approximate_sync = config.approximate_sync
248  self.queue_size = config.queue_size
249  self.slop = config.slop
250 
251  self.face_padding = config.face_padding
252  self.face_threshold = config.face_threshold
253 
254  if need_resubscribe and self.is_subscribed():
255  self.unsubscribe()
256  self.subscribe()
257 
258  return config
259 
260  def subscribe(self):
261  sub_image = message_filters.Subscriber("~input", Image)
262  self.subscribers = [
263  message_filters.Subscriber("~input", Image),
264  message_filters.Subscriber("~input/pose_2d", PeoplePoseArray),
265  message_filters.Subscriber("~input/pose", PeoplePoseArray),
266  ]
267 
268  if self.approximate_sync:
269  self.sync = message_filters.ApproximateTimeSynchronizer(
270  self.subscribers, self.queue_size, self.slop)
271  else:
273  self.subscribers, self.queue_size)
274  self.sync.registerCallback(self.callback)
275 
276  def unsubscribe(self):
277  for s in self.subscribers:
278  s.unregister()
279 
280  def callback(self, img, pose2d, pose3d):
281  header = img.header
282  try:
283  img = self.cv_bridge.imgmsg_to_cv2(img, "bgr8")
284  except cv_bridge.CvBridgeError as e:
285  rospy.logerr("Failed to convert image: %s" % str(e))
286  rospy.logerr(traceback.format_exc())
287  return
288 
289  faces = []
290  rects = []
291  face_origins = []
292  face_2d_origins = []
293  for p2d, p3d in zip(pose2d.poses, pose3d.poses):
294  try:
295  score = p2d.scores[p2d.limb_names.index("Nose")]
296  if score < self.face_threshold:
297  continue
298  nose = p2d.poses[p2d.limb_names.index("Nose")]
299  neck = p2d.poses[p2d.limb_names.index("Neck")]
300  width_half = np.sqrt((neck.position.x - nose.position.x) ** 2 +
301  (neck.position.y - nose.position.y) ** 2)
302  width_half *= 1.0 + self.face_padding
303  rect = Rect(x=int(nose.position.x-width_half),
304  y=int(nose.position.y-width_half),
305  width=int(width_half*2),
306  height=int(width_half*2))
307  face_origin = p3d.poses[p3d.limb_names.index("Nose")]
308  face_2d_origin = nose
309  except ValueError:
310  continue
311 
312  try:
313  face = img[rect.y:rect.y+rect.height, rect.x:rect.x+rect.width]
314  if face.size <= 0:
315  continue
316  except Exception as e:
317  rospy.logerr("Failed to crop image: %s" % str(e))
318  rospy.logerr(traceback.format_exc())
319  continue
320  rects.append(rect)
321  face_origins.append(face_origin)
322  face_2d_origins.append(face_2d_origin)
323  faces.append(face)
324 
325  if not faces:
326  rospy.logdebug("No face found")
327  return
328 
329  try:
330  results = self.predictor(faces)
331  except OverflowError:
332  rospy.logfatal(traceback.format_exc())
333  rospy.signal_shutdown("shutdown")
334  except Exception as e:
335  rospy.logerr("Failed to predict: %s" % str(e))
336  rospy.logerr(traceback.format_exc())
337  return
338 
339  for i in range(len(results)):
340  results[i].update({
341  "face_origin": face_origins[i],
342  "face_2d_origin": face_2d_origins[i],
343  "rect": rects[i],
344  })
345 
346  self.publish_face_rects(header, results)
347  self.publish_gender(header, results)
348  self.publish_pose(header, results, img)
349 
350  def publish_face_rects(self, header, results):
351  rects = RectArray(
352  header=header,
353  rects=[r["rect"] for r in results],
354  )
355  self.pub_rect.publish(rects)
356 
357  def publish_gender(self, header, results):
358  target_names = ["Male", "Female"]
359  labels = [0 if r["gender"] < 0.5 else 1 for r in results]
360  msg = ClassificationResult(
361  header=header,
362  classifier=self.classifier_name,
363  target_names=target_names,
364  labels=labels,
365  label_names=[target_names[l] for l in labels],
366  label_proba=[r["detection"] for r in results],
367  )
368  self.pub_gender.publish(msg)
369 
370  def publish_pose(self, header, results, img):
371  msg = PoseArray(header=header)
372  for r in results:
373  pose = r["face_origin"]
374  pose_2d = r['face_2d_origin']
375  ori = r["pose"]
376  mat = T.euler_matrix(-ori[0], -ori[1], -ori[2])
377  rotmat = mat[:3, :3]
378  quat = T.quaternion_from_matrix(mat)
379  quat = T.quaternion_multiply(
380  [0.5, 0.5, -0.5, 0.5], quat)
381 
382  pose.orientation.x = quat[0]
383  pose.orientation.y = quat[1]
384  pose.orientation.z = quat[2]
385  pose.orientation.w = quat[3]
386  msg.poses.append(pose)
387 
388  if self.visualize:
389  zvec = np.array([0, 0, 1], np.float32)
390  yvec = np.array([0, 1, 0], np.float32)
391  xvec = np.array([1, 0, 0], np.float32)
392  zvec = _project_plane_yz(rotmat.dot(zvec)) * 50.0
393  yvec = _project_plane_yz(rotmat.dot(yvec)) * 50.0
394  xvec = _project_plane_yz(rotmat.dot(xvec)) * 50.0
395 
396  face_2d_center = np.array([pose_2d.position.x, pose_2d.position.y])
397  img = _draw_line(img, face_2d_center,
398  face_2d_center + zvec, (255, 0, 0), 3)
399  img = _draw_line(img, face_2d_center,
400  face_2d_center + yvec, (0, 255, 0), 3)
401  img = _draw_line(img, face_2d_center,
402  face_2d_center + xvec, (0, 0, 255), 3)
403 
404  self.pub_pose.publish(msg)
405  if self.visualize:
406  img_msg = self.cv_bridge.cv2_to_imgmsg(img, "bgr8")
407  img_msg.header = header
408  self.pub_debug_image.publish(img_msg)
409 
410 
411 if __name__ == '__main__':
412  rospy.init_node("face_pose_estimation")
414  rospy.spin()
def _draw_line(img, pt1, pt2, color, thickness=2)


jsk_perception
Author(s): Manabu Saito, Ryohei Ueda
autogenerated on Mon May 3 2021 03:03:27