human_mesh_recovery.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # -*- coding:utf-8 -*-
3 
4 # Ros Wrapper of Human Mesh Recovery
5 # See: End-to-end Recovery of Human Shape and Pose
6 # https://akanazawa.github.io/hmr/
7 
8 from __future__ import print_function
9 
10 import itertools, pkg_resources, sys
11 from distutils.version import LooseVersion
12 if LooseVersion(pkg_resources.get_distribution("chainer").version) >= LooseVersion('7.0.0') and \
13  sys.version_info.major == 2:
14  print('''Please install chainer < 7.0.0:
15 
16  sudo pip install chainer==6.7.0
17 
18 c.f https://github.com/jsk-ros-pkg/jsk_recognition/pull/2485
19 ''', file=sys.stderr)
20  sys.exit(1)
21 if [p for p in list(itertools.chain(*[pkg_resources.find_distributions(_) for _ in sys.path])) if "cupy-" in p.project_name ] == []:
22  print('''Please install CuPy
23 
24  sudo pip install cupy-cuda[your cuda version]
25 i.e.
26  sudo pip install cupy-cuda91
27 
28 ''', file=sys.stderr)
29  # sys.exit(1)
30 import chainer
31 import chainer.functions as F
32 from chainer import Variable
33 import cv2
34 import numpy as np
35 import pylab as plt # NOQA
36 
37 import tf
38 import cv_bridge
39 import message_filters
40 import rospy
41 from jsk_topic_tools import ConnectionBasedTransport
42 from geometry_msgs.msg import Pose
43 from geometry_msgs.msg import Point
44 from geometry_msgs.msg import Quaternion
45 from jsk_recognition_msgs.msg import PeoplePose
46 from jsk_recognition_msgs.msg import PeoplePoseArray
47 from sensor_msgs.msg import Image
48 
49 from hmr.smpl import SMPL
50 from hmr.net import EncoderFC3Dropout
51 from hmr.resnet_v2_50 import ResNet_v2_50
52 
53 
54 def format_pose_msg(person_pose):
55  key_points = []
56  for pose, score in zip(person_pose.poses, person_pose.scores):
57  key_points.append(pose.position.x)
58  key_points.append(pose.position.y)
59  key_points.append(score)
60  return np.array(key_points, 'f').reshape(-1, 3)
61 
62 
63 def resize_img(img, scale_factor):
64  new_size = (np.floor(np.array(img.shape[0:2]) * scale_factor)).astype(int)
65  new_img = cv2.resize(img, (new_size[1], new_size[0]))
66  # This is scale factor of [height, width] i.e. [y, x]
67  actual_factor = [
68  new_size[0] / float(img.shape[0]), new_size[1] / float(img.shape[1])
69  ]
70  return new_img, actual_factor
71 
72 
73 def scale_and_crop(image, scale, center, img_size):
74  image_scaled, scale_factors = resize_img(image, scale)
75  # Swap so it's [x, y]
76  scale_factors = [scale_factors[1], scale_factors[0]]
77  center_scaled = np.round(center * scale_factors).astype(np.int)
78 
79  margin = int(img_size / 2)
80  image_pad = np.pad(
81  image_scaled, ((margin, ), (margin, ), (0, )), mode='edge')
82  center_pad = center_scaled + margin
83  # figure out starting point
84  start_pt = center_pad - margin
85  end_pt = center_pad + margin
86  # crop:
87  crop = image_pad[start_pt[1]:end_pt[1], start_pt[0]:end_pt[0], :]
88  proc_param = {
89  'scale': scale,
90  'start_pt': start_pt,
91  'end_pt': end_pt,
92  'img_size': img_size
93  }
94 
95  return crop, proc_param
96 
97 
98 def get_bbox(key_points, vis_thr=0.2):
99  # Pick the most confident detection.
100  vis = key_points[:, 2] > vis_thr
101  vis_kp = key_points[vis, :2]
102  if len(vis_kp) == 0:
103  return False, False
104  min_pt = np.min(vis_kp, axis=0)
105  max_pt = np.max(vis_kp, axis=0)
106  person_height = np.linalg.norm(max_pt - min_pt)
107  if person_height == 0:
108  return False, False
109  center = (min_pt + max_pt) / 2.
110  scale = 150. / person_height
111  return scale, center
112 
113 
114 def preprocess_image(img, key_points=None, img_size=224):
115  if key_points is None:
116  scale = 1.
117  center = np.round(np.array(img.shape[:2]) / 2).astype(int)
118  # image center in (x,y)
119  center = center[::-1]
120  else:
121  scale, center = get_bbox(key_points, vis_thr=0.1)
122  if scale is False:
123  scale = 1.
124  center = np.round(np.array(img.shape[:2]) / 2).astype(int)
125  # image center in (x,y)
126  center = center[::-1]
127  crop_img, proc_param = scale_and_crop(img, scale, center,
128  img_size)
129 
130  # Normalize image to [-1, 1]
131  crop_img = 2 * ((crop_img / 255.) - 0.5)
132  return crop_img, proc_param
133 
134 
135 mean = np.array([[
136  0.90365213, -0.00383353, 0.03301106, 3.14986515, -0.01883755,
137  0.16895422, -0.15615709, -0.0058559, 0.07191881, -0.18924442,
138  -0.04396844, -0.05114707, 0.24385466, 0.00881136, 0.02384637,
139  0.2066803, -0.10190887, -0.03373535, 0.27340922, 0.00637481,
140  0.07408072, -0.03409823, -0.00971786, 0.03841642, 0.0191336,
141  0.10812955, -0.06782207, -0.08026548, -0.18373352, 0.16556455,
142  0.03735897, -0.02497507, 0.02688527, -0.18802814, 0.17772846,
143  0.13135587, 0.01438429, 0.15891947, -0.2279436, -0.07497088,
144  0.05169746, 0.08784129, 0.02147929, 0.02320284, -0.42375749,
145  -0.04963749, 0.08627309, 0.47963148, 0.26528436, -0.1028522,
146  -0.02501041, 0.05762934, -0.26270828, -0.8297376, 0.13903582,
147  0.30412629, 0.79824799, 0.12842464, -0.64139324, 0.16469972,
148  -0.08669609, 0.55955994, -0.16742738, -0.03153928, -0.02245264,
149  -0.02357809, 0.02061746, 0.02320515, 0.00869796, -0.1655257,
150  -0.07094092, -0.1663706, -0.10953037, 0.11675739, 0.20495811,
151  0.10592803, 0.14583197, -0.31755996, 0.13645983, 0.28833047,
152  0.06303538, 0.48629287, 0.23359743, -0.02812484, 0.23948504]], 'f')
153 
154 
155 class HumanMeshRecovery(ConnectionBasedTransport):
156 
157  def __init__(self):
158  super(self.__class__, self).__init__()
159  self.gpu = rospy.get_param('~gpu', -1) # -1 is cpu mode
160  self.with_people_pose = rospy.get_param('~with_people_pose', False)
161  self.num_stage = rospy.get_param('~num_stage', 3)
162 
163  self.smpl = SMPL()
164  self.encoder_fc3_model = EncoderFC3Dropout()
165  self.resnet_v2 = ResNet_v2_50()
166  self._load_model()
167 
168  self.br = cv_bridge.CvBridge()
169  self.pose_pub = self.advertise(
170  '~output/pose', PeoplePoseArray, queue_size=1)
171 
172  def _load_model(self):
173  smpl_model_file = rospy.get_param('~smpl_model_file')
174  chainer.serializers.load_npz(smpl_model_file, self.smpl)
175  encoder_fc3_model_file = rospy.get_param('~encoder_model_file')
176  chainer.serializers.load_npz(
177  encoder_fc3_model_file, self.encoder_fc3_model)
178  resnet_v2_50_model_file = rospy.get_param('~resnet_v2_50_model_file')
179  chainer.serializers.load_npz(resnet_v2_50_model_file, self.resnet_v2)
180 
181  rospy.loginfo('Finished loading trained model')
182  if self.gpu >= 0:
183  chainer.cuda.get_device_from_id(self.gpu).use()
184  self.smpl.to_gpu()
185  self.encoder_fc3_model.to_gpu()
186  self.resnet_v2.to_gpu()
187  chainer.global_config.train = False
188  chainer.global_config.enable_backprop = False
189 
190  def subscribe(self):
191  if self.with_people_pose:
192  queue_size = rospy.get_param('~queue_size', 10)
193  sub_img = message_filters.Subscriber(
194  '~input', Image, queue_size=queue_size, buff_size=2**24)
195  sub_pose = message_filters.Subscriber(
196  '~input/pose', PeoplePoseArray,
197  queue_size=queue_size, buff_size=2**24)
198  self.subs = [sub_img, sub_pose]
199 
200  if rospy.get_param('~approximate_sync', False):
201  slop = rospy.get_param('~slop', 0.1)
202  sync = message_filters.ApproximateTimeSynchronizer(
203  fs=self.subs, queue_size=queue_size, slop=slop)
204  else:
206  fs=self.subs, queue_size=queue_size)
207  sync.registerCallback(self._cb_with_pose)
208  else:
209  sub_img = rospy.Subscriber(
210  '~input', Image, self._cb,
211  queue_size=1, buff_size=2**24)
212  self.subs = [sub_img]
213 
214  def unsubscribe(self):
215  for sub in self.subs:
216  sub.unregister()
217 
218  def _cb(self, img_msg):
219  br = self.br
220  img = br.imgmsg_to_cv2(img_msg, desired_encoding='bgr8')
221  img, _ = preprocess_image(img)
222  imgs = img.transpose(2, 0, 1)[None, ]
223  verts, Js, Rs, A, cams, poses, shapes = self.pose_estimate(imgs)
224 
225  people_pose_msg = self._create_people_pose_array_msgs(
226  chainer.cuda.to_cpu(A.data), img_msg.header)
227  self.pose_pub.publish(people_pose_msg)
228 
229  def _cb_with_pose(self, img_msg, people_pose_msg):
230  br = self.br
231  img = br.imgmsg_to_cv2(img_msg, desired_encoding='bgr8')
232 
233  imgs = []
234  for person_pose in people_pose_msg.poses:
235  key_points = format_pose_msg(person_pose)
236  crop_img, _ = preprocess_image(img, key_points)
237  imgs.append(crop_img)
238  if len(imgs) == 0:
239  img, _ = preprocess_image(img)
240  imgs = np.array(img[None, ], 'f').transpose(0, 3, 1, 2)
241  else:
242  imgs = np.array(imgs, 'f').transpose(0, 3, 1, 2)
243  verts, Js, Rs, A, cams, poses, shapes = self.pose_estimate(imgs)
244 
245  people_pose_msg = self._create_people_pose_array_msgs(
246  chainer.cuda.to_cpu(A.data), img_msg.header)
247  self.pose_pub.publish(people_pose_msg)
248 
249  def _create_people_pose_array_msgs(self, people_joint_positions, header):
250  people_pose_msg = PeoplePoseArray(header=header)
251  for i, person_joint_positions in enumerate(people_joint_positions):
252  pose_msg = PeoplePose()
253  for joint_pose in person_joint_positions:
254  pose_msg.limb_names.append(str(i))
255  pose_msg.scores.append(0.0)
256  q_xyzw = tf.transformations.quaternion_from_matrix(joint_pose)
257  pose_msg.poses.append(
258  Pose(position=Point(
259  x=joint_pose[0, 3],
260  y=joint_pose[1, 3],
261  z=joint_pose[2, 3]),
262  orientation=Quaternion(
263  x=q_xyzw[0],
264  y=q_xyzw[1],
265  z=q_xyzw[2],
266  w=q_xyzw[3])))
267  people_pose_msg.poses.append(pose_msg)
268  return people_pose_msg
269 
270  def pose_estimate(self, imgs):
271  batch_size = imgs.shape[0]
272  imgs = Variable(self.resnet_v2.xp.array(imgs, 'f'))
273  img_feat = self.resnet_v2(imgs).reshape(batch_size, -1)
274 
275  theta_prev = F.tile(
276  Variable(self.encoder_fc3_model.xp.array(mean, 'f')),
277  (batch_size, 1))
278  num_cam = 3
279  num_theta = 72
280  for i in range(self.num_stage):
281  state = F.concat([img_feat, theta_prev], axis=1)
282  delta_theta = self.encoder_fc3_model(state)
283  theta_here = theta_prev + delta_theta
284  # cam = N x 3, pose N x self.num_theta, shape: N x 10
285  cams = theta_here[:, :num_cam]
286  poses = theta_here[:, num_cam:(num_cam + num_theta)]
287  shapes = theta_here[:, (num_cam + num_theta):]
288 
289  verts, Js, Rs, A = self.smpl(shapes, poses)
290  # Project to 2D!
291  # pred_kp = batch_orth_proj_idrot(
292  # Js, cams, name='proj_2d_stage%d' % i)
293  theta_prev = theta_here
294  return verts, Js, Rs, A, cams, poses, shapes
295 
296 
297 if __name__ == '__main__':
298  rospy.init_node('human_mesh_recovery')
300  rospy.spin()
def _create_people_pose_array_msgs(self, people_joint_positions, header)
def scale_and_crop(image, scale, center, img_size)
def preprocess_image(img, key_points=None, img_size=224)
def _cb_with_pose(self, img_msg, people_pose_msg)
def get_bbox(key_points, vis_thr=0.2)


jsk_perception
Author(s): Manabu Saito, Ryohei Ueda
autogenerated on Mon May 3 2021 03:03:27