people_pose_estimation_2d.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # -*- coding:utf-8 -*-
3 
4 # See: https://arxiv.org/abs/1611.08050
5 
6 from __future__ import print_function
7 
8 import math
9 
10 import itertools, pkg_resources, sys
11 from distutils.version import LooseVersion
12 if LooseVersion(pkg_resources.get_distribution("chainer").version) >= LooseVersion('7.0.0') and \
13  sys.version_info.major == 2:
14  print('''Please install chainer < 7.0.0:
15 
16  sudo pip install chainer==6.7.0
17 
18 c.f https://github.com/jsk-ros-pkg/jsk_recognition/pull/2485
19 ''', file=sys.stderr)
20  sys.exit(1)
21 if [p for p in list(itertools.chain(*[pkg_resources.find_distributions(_) for _ in sys.path])) if "cupy-" in p.project_name ] == []:
22  print('''Please install CuPy
23 
24  sudo pip install cupy-cuda[your cuda version]
25 i.e.
26  sudo pip install cupy-cuda91
27 
28 ''', file=sys.stderr)
29  sys.exit(1)
30 import chainer
31 import chainer.functions as F
32 from chainer import cuda
33 import cv2
34 import matplotlib
35 import numpy as np
36 from scipy.ndimage.filters import gaussian_filter
37 import pylab as plt # NOQA
38 
39 import cv_bridge
40 import message_filters
41 import rospy
42 from jsk_topic_tools import ConnectionBasedTransport
43 from geometry_msgs.msg import Pose
44 from geometry_msgs.msg import Point
45 from geometry_msgs.msg import Quaternion
46 from jsk_recognition_msgs.msg import HumanSkeleton
47 from jsk_recognition_msgs.msg import HumanSkeletonArray
48 from jsk_recognition_msgs.msg import PeoplePose
49 from jsk_recognition_msgs.msg import PeoplePoseArray
50 from jsk_recognition_msgs.msg import Segment
51 from sensor_msgs.msg import CameraInfo
52 from sensor_msgs.msg import Image
53 
54 from openpose import PoseNet, HandNet
55 
56 
57 def find_joint(limb, jts):
58  jt = [jt for jt in jts if jt['limb'] == limb]
59  if jt:
60  return jt[0]
61  else:
62  return None
63 
64 
65 def padRightDownCorner(img, stride, padValue):
66  h = img.shape[0]
67  w = img.shape[1]
68 
69  pad = 4 * [None]
70  pad[0] = 0 # up
71  pad[1] = 0 # left
72  pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down
73  pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right
74 
75  img_padded = img
76  pad_up = np.tile(img_padded[0:1, :, :] * 0 + padValue, (pad[0], 1, 1))
77  img_padded = np.concatenate((pad_up, img_padded), axis=0)
78  pad_left = np.tile(img_padded[:, 0:1, :] * 0 + padValue, (1, pad[1], 1))
79  img_padded = np.concatenate((pad_left, img_padded), axis=1)
80  pad_down = np.tile(img_padded[-2:-1, :, :] * 0 + padValue, (pad[2], 1, 1))
81  img_padded = np.concatenate((img_padded, pad_down), axis=0)
82  pad_right = np.tile(img_padded[:, -2:-1, :] * 0 + padValue, (1, pad[3], 1))
83  img_padded = np.concatenate((img_padded, pad_right), axis=1)
84 
85  return img_padded, pad
86 
87 
88 class PeoplePoseEstimation2D(ConnectionBasedTransport):
89  # Note: the order of this sequences is important
90  # find connection in the specified sequence,
91  # center 29 is in the position 15
92  limb_sequence = [[ 2, 1], [ 1, 16], [ 1, 15], [ 6, 18], [ 3, 17],
93  [ 2, 3], [ 2, 6], [ 3, 4], [ 4, 5], [ 6, 7],
94  [ 7, 8], [ 2, 9], [ 9, 10], [10, 11], [ 2, 12],
95  [12, 13], [13, 14], [15, 17], [16, 18]]
96  # the middle joints heatmap correpondence
97  map_idx = [[47, 48], [49, 50], [51, 52], [37, 38], [45, 46],
98  [31, 32], [39, 40], [33, 34], [35, 36], [41, 42],
99  [43, 44], [19, 20], [21, 22], [23, 24], [25, 26],
100  [27, 28], [29, 30], [53, 54], [55, 56]]
101  # length ratio from connections
102  limb_length_hand_ratio = [ 0.6, 0.2, 0.2, 0.85, 0.85,
103  0.6, 0.6, 0.93, 0.65, 0.95,
104  0.65, 2.2, 1.7, 1.7, 2.2,
105  1.7, 1.7, 0.25, 0.25]
106  # hand joint connection sequence
107  hand_sequence = [[0, 1], [1, 2], [2, 3], [3, 4],
108  [0, 5], [5, 6], [6, 7], [7, 8],
109  [0, 9], [9, 10], [10, 11], [11, 12],
110  [0, 13], [13, 14], [14, 15], [15, 16],
111  [0, 17], [17, 18], [18, 19], [19, 20],]
112 
113  index2limbname = ["Nose",
114  "Neck",
115  "RShoulder",
116  "RElbow",
117  "RWrist",
118  "LShoulder",
119  "LElbow",
120  "LWrist",
121  "RHip",
122  "RKnee",
123  "RAnkle",
124  "LHip",
125  "LKnee",
126  "LAnkle",
127  "REye",
128  "LEye",
129  "REar",
130  "LEar",
131  "Bkg"]
132 
133  index2handname = ["RHand{}".format(i) for i in range(21)] +\
134  ["LHand{}".format(i) for i in range(21)]
135 
136  def __init__(self):
137  super(self.__class__, self).__init__()
138  self.backend = rospy.get_param('~backend', 'chainer')
139  self.scales = rospy.get_param('~scales', [0.38])
140  self.stride = rospy.get_param('~stride', 8)
141  self.pad_value = rospy.get_param('~pad_value', 128)
142  self.thre1 = rospy.get_param('~thre1', 0.1)
143  self.thre2 = rospy.get_param('~thre2', 0.05)
144  self.width = rospy.get_param('~width', None)
145  self.height = rospy.get_param('~height', None)
146  self.check_wh()
147  self.gpu = rospy.get_param('~gpu', -1) # -1 is cpu mode
148  self.with_depth = rospy.get_param('~with_depth', False)
149  # hand detection
150  self.use_hand = rospy.get_param('~hand/enable', False)
151  self.hand_gaussian_ksize = rospy.get_param('~hand/gaussian_ksize', 17)
152  self.hand_gaussian_sigma = rospy.get_param('~hand/gaussian_sigma', 2.5)
153  self.hand_thre1 = rospy.get_param('~hand/thre1', 20)
154  self.hand_thre2 = rospy.get_param('~hand/thre2', 0.1)
155  self.hand_width_offset = rospy.get_param('~hand/width_offset', 0)
156  # model loading
157  self._load_model()
158  # topic advertise
159  self.image_pub = self.advertise('~output', Image, queue_size=1)
160  self.pose_pub = self.advertise('~pose', PeoplePoseArray, queue_size=1)
161  self.sub_info = None
162  if self.with_depth is True:
163  self.pose_2d_pub = self.advertise('~pose_2d', PeoplePoseArray, queue_size=1)
164  # visualization rviz plugin: https://github.com/jsk-ros-pkg/jsk_visualization/pull/740
165  self.skeleton_pub = self.advertise(
166  '~skeleton', HumanSkeletonArray, queue_size=1)
167 
168  def check_wh(self):
169  if (self.width is None) != (self.height is None):
170  rospy.logwarn('width and height should be specified, but '
171  'specified only {}'
172  .format('height' if self.height else 'width'))
173 
174  @property
175  def visualize(self):
176  return self.image_pub.get_num_connections() > 0
177 
178  def _load_model(self):
179  if self.backend == 'chainer':
180  self._load_chainer_model()
181  else:
182  raise RuntimeError('Unsupported backend: %s', self.backend)
183 
185  model_file = rospy.get_param('~model_file')
186  self.pose_net = PoseNet(pretrained_model=model_file)
187  rospy.loginfo('Finished loading trained model: {0}'.format(model_file))
188  # hand net
189  if self.use_hand:
190  model_file = rospy.get_param('~hand/model_file')
191  self.hand_net = HandNet(pretrained_model=model_file)
192  rospy.loginfo('Finished loading trained hand model: {}'.format(model_file))
193  #
194  if self.gpu >= 0:
195  self.pose_net.to_gpu(self.gpu)
196  if self.use_hand:
197  self.hand_net.to_gpu(self.gpu)
198  # create gaussian kernel
199  ksize = self.hand_gaussian_ksize
200  sigma = self.hand_gaussian_sigma
201  c = ksize // 2
202  k = np.zeros((1, 1, ksize, ksize), dtype=np.float32)
203  for y in range(ksize):
204  dy = abs(y - c)
205  for x in range(ksize):
206  dx = abs(x - c)
207  e = np.exp(- (dx ** 2 + dy ** 2) / (2 * sigma ** 2))
208  k[0][0][y][x] = 1 / (sigma ** 2 * 2 * np.pi) * e
209  k = chainer.cuda.to_gpu(k, device=self.gpu)
211  chainer.global_config.train = False
212  chainer.global_config.enable_backprop = False
213 
214  def subscribe(self):
215  if self.with_depth:
216  queue_size = rospy.get_param('~queue_size', 10)
217  sub_img = message_filters.Subscriber(
218  '~input', Image, queue_size=1, buff_size=2**24)
219  sub_depth = message_filters.Subscriber(
220  '~input/depth', Image, queue_size=1, buff_size=2**24)
221  self.subs = [sub_img, sub_depth]
222 
223  # NOTE: Camera info is not synchronized by default.
224  # See https://github.com/jsk-ros-pkg/jsk_recognition/issues/2165
225  sync_cam_info = rospy.get_param("~sync_camera_info", False)
226  if sync_cam_info:
227  sub_info = message_filters.Subscriber(
228  '~input/info', CameraInfo, queue_size=1, buff_size=2**24)
229  self.subs.append(sub_info)
230  else:
231  self.sub_info = rospy.Subscriber(
232  '~input/info', CameraInfo, self._cb_cam_info)
233 
234  if rospy.get_param('~approximate_sync', True):
235  slop = rospy.get_param('~slop', 0.1)
236  sync = message_filters.ApproximateTimeSynchronizer(
237  fs=self.subs, queue_size=queue_size, slop=slop)
238  else:
240  fs=self.subs, queue_size=queue_size)
241  if sync_cam_info:
242  sync.registerCallback(self._cb_with_depth_info)
243  else:
244  self.camera_info_msg = None
245  sync.registerCallback(self._cb_with_depth)
246  else:
247  sub_img = rospy.Subscriber(
248  '~input', Image, self._cb, queue_size=1, buff_size=2**24)
249  self.subs = [sub_img]
250 
251  def unsubscribe(self):
252  for sub in self.subs:
253  sub.unregister()
254  if self.sub_info is not None:
255  self.sub_info.unregister()
256  self.sub_info = None
257 
258  def _cb_cam_info(self, msg):
259  self.camera_info_msg = msg
260  self.sub_info.unregister()
261  self.sub_info = None
262  rospy.loginfo("Received camera info")
263 
264  def _cb_with_depth(self, img_msg, depth_msg):
265  if self.camera_info_msg is None:
266  return
267  self._cb_with_depth_info(img_msg, depth_msg, self.camera_info_msg)
268 
269  def _cb_with_depth_info(self, img_msg, depth_msg, camera_info_msg):
270  br = cv_bridge.CvBridge()
271  img = br.imgmsg_to_cv2(img_msg, desired_encoding='bgr8')
272  depth_img = br.imgmsg_to_cv2(depth_msg, 'passthrough')
273  if depth_msg.encoding == '16UC1':
274  depth_img = np.asarray(depth_img, dtype=np.float32)
275  depth_img /= 1000 # convert metric: mm -> m
276  elif depth_msg.encoding != '32FC1':
277  rospy.logerr('Unsupported depth encoding: %s' % depth_msg.encoding)
278 
279  people_joint_positions, all_peaks = self.pose_estimate(img)
280  if self.use_hand:
281  people_joint_positions = self.hand_estimate(
282  img, people_joint_positions)
283 
284  people_pose_msg = PeoplePoseArray()
285  people_pose_msg.header = img_msg.header
286  people_pose_2d_msg = self._create_2d_people_pose_array_msgs(
287  people_joint_positions,
288  img_msg.header)
289  skeleton_msgs = HumanSkeletonArray(header=img_msg.header)
290 
291  # calculate xyz-position
292  fx = camera_info_msg.K[0]
293  fy = camera_info_msg.K[4]
294  cx = camera_info_msg.K[2]
295  cy = camera_info_msg.K[5]
296  for person_joint_positions in people_joint_positions:
297  pose_msg = PeoplePose()
298  skeleton_msg = HumanSkeleton(header=img_msg.header)
299  for joint_pos in person_joint_positions:
300  if joint_pos['score'] < 0:
301  continue
302  if 0 <= joint_pos['y'] < depth_img.shape[0] and\
303  0 <= joint_pos['x'] < depth_img.shape[1]:
304  z = float(depth_img[int(joint_pos['y'])][int(joint_pos['x'])])
305  else:
306  continue
307  if np.isnan(z):
308  continue
309  x = (joint_pos['x'] - cx) * z / fx
310  y = (joint_pos['y'] - cy) * z / fy
311  pose_msg.limb_names.append(joint_pos['limb'])
312  pose_msg.scores.append(joint_pos['score'])
313  pose_msg.poses.append(Pose(position=Point(x=x, y=y, z=z),
314  orientation=Quaternion(w=1)))
315  people_pose_msg.poses.append(pose_msg)
316 
317  for i, conn in enumerate(self.limb_sequence):
318  j1_name = self.index2limbname[conn[0] - 1]
319  j2_name = self.index2limbname[conn[1] - 1]
320  if j1_name not in pose_msg.limb_names \
321  or j2_name not in pose_msg.limb_names:
322  continue
323  j1_index = pose_msg.limb_names.index(j1_name)
324  j2_index = pose_msg.limb_names.index(j2_name)
325  bone_name = '{}->{}'.format(j1_name, j2_name)
326  bone = Segment(
327  start_point=pose_msg.poses[j1_index].position,
328  end_point=pose_msg.poses[j2_index].position)
329  skeleton_msg.bones.append(bone)
330  skeleton_msg.bone_names.append(bone_name)
331  skeleton_msgs.skeletons.append(skeleton_msg)
332 
333  self.pose_2d_pub.publish(people_pose_2d_msg)
334  self.pose_pub.publish(people_pose_msg)
335  self.skeleton_pub.publish(skeleton_msgs)
336 
337  if self.visualize:
338  vis_img = self._draw_joints(img, people_joint_positions, all_peaks)
339  vis_msg = br.cv2_to_imgmsg(vis_img, encoding='bgr8')
340  vis_msg.header.stamp = img_msg.header.stamp
341  self.image_pub.publish(vis_msg)
342 
343  def _cb(self, img_msg):
344  br = cv_bridge.CvBridge()
345  img = br.imgmsg_to_cv2(img_msg, desired_encoding='bgr8')
346  people_joint_positions, all_peaks = self.pose_estimate(img)
347  if self.use_hand:
348  people_joint_positions = self.hand_estimate(
349  img, people_joint_positions)
350 
351  people_pose_msg = self._create_2d_people_pose_array_msgs(
352  people_joint_positions,
353  img_msg.header)
354 
355  self.pose_pub.publish(people_pose_msg)
356 
357  if self.visualize:
358  vis_img = self._draw_joints(img, people_joint_positions, all_peaks)
359  vis_msg = br.cv2_to_imgmsg(vis_img, encoding='bgr8')
360  vis_msg.header.stamp = img_msg.header.stamp
361  self.image_pub.publish(vis_msg)
362 
363  def _create_2d_people_pose_array_msgs(self, people_joint_positions, header):
364  people_pose_msg = PeoplePoseArray(header=header)
365  for person_joint_positions in people_joint_positions:
366  pose_msg = PeoplePose()
367  for joint_pos in person_joint_positions:
368  if joint_pos['score'] < 0:
369  continue
370  pose_msg.limb_names.append(joint_pos['limb'])
371  pose_msg.scores.append(joint_pos['score'])
372  pose_msg.poses.append(Pose(position=Point(x=joint_pos['x'],
373  y=joint_pos['y'],
374  z=0)))
375  people_pose_msg.poses.append(pose_msg)
376  return people_pose_msg
377 
378  def pose_estimate(self, bgr):
379  if self.backend == 'chainer':
380  return self._pose_estimate_chainer_backend(bgr)
381  raise ValueError('Unsupported backend: {0}'.format(self.backend))
382 
383  def _pose_estimate_chainer_backend(self, bgr_img):
384  if self.gpu >= 0:
385  chainer.cuda.get_device_from_id(self.gpu).use()
386  xp = self.pose_net.xp
387 
388  org_h, org_w, _ = bgr_img.shape
389  if not (self.width is None or self.height is None):
390  bgr_img = cv2.resize(bgr_img, (self.width, self.height))
391 
392  heatmap_avg = xp.zeros((bgr_img.shape[0], bgr_img.shape[1], 19),
393  dtype=np.float32)
394  paf_avg = xp.zeros((bgr_img.shape[0], bgr_img.shape[1], 38),
395  dtype=np.float32)
396 
397  for scale in self.scales:
398  img = cv2.resize(bgr_img, (0, 0), fx=scale,
399  fy=scale, interpolation=cv2.INTER_CUBIC)
400  padded_img, pad = padRightDownCorner(
401  img, self.stride, self.pad_value)
402  x = np.transpose(np.float32(
403  padded_img[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
404  if self.gpu >= 0:
405  x = chainer.cuda.to_gpu(x)
406  x = chainer.Variable(x)
407  pafs, heatmaps = self.pose_net(x)
408  paf = pafs[-1]
409  heatmap = heatmaps[-1]
410 
411  # extract outputs, resize, and remove padding
412  heatmap = F.resize_images(
413  heatmap, (heatmap.data.shape[2] * self.stride,
414  heatmap.data.shape[3] * self.stride))
415  heatmap = heatmap[:, :, :padded_img.shape[0] -
416  pad[2], :padded_img.shape[1] - pad[3]]
417  heatmap = F.resize_images(
418  heatmap, (bgr_img.shape[0], bgr_img.shape[1]))
419  heatmap = xp.transpose(xp.squeeze(heatmap.data), (1, 2, 0))
420  paf = F.resize_images(
421  paf, (paf.data.shape[2] * self.stride,
422  paf.data.shape[3] * self.stride))
423  paf = paf[:, :, :padded_img.shape[0] -
424  pad[2], :padded_img.shape[1] - pad[3]]
425  paf = F.resize_images(paf, (bgr_img.shape[0], bgr_img.shape[1]))
426  paf = xp.transpose(xp.squeeze(paf.data), (1, 2, 0))
427 
428  coeff = 1.0 / len(self.scales)
429  paf_avg += paf * coeff
430  heatmap_avg += heatmap * coeff
431 
432  heatmav_left = xp.zeros_like(heatmap_avg)
433  heatmav_left[1:, :] = heatmap_avg[:-1, :]
434  heatmav_right = xp.zeros_like(heatmap_avg)
435  heatmav_right[:-1, :] = heatmap_avg[1:, :]
436  heatmav_up = xp.zeros_like(heatmap_avg)
437  heatmav_up[:, 1:] = heatmap_avg[:, :-1]
438  heatmav_down = xp.zeros_like(heatmap_avg)
439  heatmav_down[:, :-1] = heatmap_avg[:, 1:]
440  peaks_binary = (heatmap_avg >= heatmav_left) & \
441  (heatmap_avg >= heatmav_right) & \
442  (heatmap_avg >= heatmav_up) & \
443  (heatmap_avg >= heatmav_down) & \
444  (heatmap_avg > self.thre1)
445 
446  peaks = xp.array(xp.nonzero(peaks_binary[..., :len(self.index2limbname)-1]), dtype=np.int32).T
447  peak_counter = peaks.shape[0]
448  all_peaks = xp.zeros((peak_counter, 4), dtype=np.float32)
449  all_peaks[:, 0] = peaks[:, 1]
450  all_peaks[:, 1] = peaks[:, 0]
451  all_peaks[:, 2] = heatmap_avg[peaks.T.tolist()]
452  peaks_order = peaks[..., 2]
453  try:
454  all_peaks = all_peaks[xp.argsort(peaks_order)]
455  except AttributeError:
456  # cupy.argsort is not available at cupy==1.0.1
457  peaks_order = chainer.cuda.to_cpu(peaks_order)
458  all_peaks = all_peaks[np.argsort(peaks_order)]
459  all_peaks[:, 3] = xp.arange(peak_counter, dtype=np.float32)
460  if self.gpu >= 0:
461  all_peaks = chainer.cuda.to_cpu(all_peaks)
462  peaks_order = chainer.cuda.to_cpu(peaks_order)
463  all_peaks = np.split(all_peaks, np.cumsum(
464  np.bincount(peaks_order, minlength=len(self.index2limbname)-1)))
465  connection_all = []
466  mid_num = 10
467  eps = 1e-8
468  score_mid = paf_avg[:, :, [[x - 19 for x in self.map_idx[k]]
469  for k in range(len(self.map_idx))]]
470 
471  cands = np.array(all_peaks, dtype=object)[
472  np.array(self.limb_sequence, dtype=np.int32) - 1]
473  candAs = cands[:, 0]
474  candBs = cands[:, 1]
475  nAs = np.array([len(candA) for candA in candAs])
476  nBs = np.array([len(candB) for candB in candBs])
477  target_indices = np.nonzero(np.logical_and(nAs != 0, nBs != 0))[0]
478  if len(target_indices) == 0:
479  return [], []
480 
481  all_candidates_A = [np.repeat(np.array(tmp_candA, dtype=np.float32), nB, axis=0)
482  for tmp_candA, nB in zip(candAs, nBs)]
483  all_candidates_B = [np.tile(np.array(tmp_candB, dtype=np.float32), (nA, 1))
484  for tmp_candB, nA in zip(candBs, nAs)]
485 
486  target_candidates_B = [all_candidates_B[index]
487  for index in target_indices]
488  target_candidates_A = [all_candidates_A[index]
489  for index in target_indices]
490 
491  vec = np.vstack(target_candidates_B)[
492  :, :2] - np.vstack(target_candidates_A)[:, :2]
493  if self.gpu >= 0:
494  vec = chainer.cuda.to_gpu(vec)
495  norm = xp.sqrt(xp.sum(vec ** 2, axis=1)) + eps
496  vec = vec / norm[:, None]
497  start_end = zip(np.round(np.mgrid[np.vstack(target_candidates_A)[:, 1].reshape(-1, 1):np.vstack(target_candidates_B)[:, 1].reshape(-1, 1):(mid_num * 1j)]).astype(np.int32),
498  np.round(np.mgrid[np.vstack(target_candidates_A)[:, 0].reshape(-1, 1):np.vstack(
499  target_candidates_B)[:, 0].reshape(-1, 1):(mid_num * 1j)]).astype(np.int32),
500  np.concatenate([[[index] * mid_num for i in range(len(c))] for index, c in zip(target_indices, target_candidates_B)]),)
501 
502  v = score_mid[np.concatenate(
503  start_end, axis=1).tolist()].reshape(-1, mid_num, 2)
504  score_midpts = xp.sum(v * xp.repeat(vec, (mid_num),
505  axis=0).reshape(-1, mid_num, 2), axis=2)
506  score_with_dist_prior = xp.sum(score_midpts, axis=1) / mid_num + \
507  xp.minimum(0.5 * bgr_img.shape[0] / norm - 1,
508  xp.zeros_like(norm, dtype=np.float32))
509  c1 = xp.sum(score_midpts > self.thre2, axis=1) > 0.8 * mid_num
510  c2 = score_with_dist_prior > 0.0
511  criterion = xp.logical_and(c1, c2)
512 
513  indices_bins = np.cumsum(nAs * nBs)
514  indices_bins = np.concatenate(
515  [np.zeros(1), indices_bins]).astype(np.int32)
516  target_candidate_indices = xp.nonzero(criterion)[0]
517  if self.gpu >= 0:
518  target_candidate_indices = chainer.cuda.to_cpu(
519  target_candidate_indices)
520  score_with_dist_prior = chainer.cuda.to_cpu(score_with_dist_prior)
521 
522  k_s = np.digitize(target_candidate_indices, indices_bins) - 1
523  i_s = (target_candidate_indices - (indices_bins[k_s])) // nBs[k_s]
524  j_s = (target_candidate_indices - (indices_bins[k_s])) % nBs[k_s]
525 
526  connection_candidate = np.concatenate([k_s.reshape(-1, 1),
527  i_s.reshape(-1, 1),
528  j_s.reshape(-1, 1),
529  score_with_dist_prior[
530  target_candidate_indices][None, ].T,
531  (score_with_dist_prior[target_candidate_indices][None, ] +
532  np.concatenate(target_candidates_A)[target_candidate_indices, 2] + np.concatenate(target_candidates_B)[target_candidate_indices, 2]).T], axis=1)
533 
534  sorted_indices = np.argsort(
535  connection_candidate[:, 0] * 100 - connection_candidate[:, 3])
536 
537  connection_all = []
538  for _ in range(0, 19):
539  connection = np.zeros((0, 5), dtype=np.float32)
540  connection_all.append(connection)
541 
542  for c_candidate in connection_candidate[sorted_indices]:
543  k, i, j = c_candidate[0:3].astype(np.int32)
544  score = c_candidate[3]
545  if(len(connection_all[k]) >= min(nAs[k], nBs[k])):
546  continue
547  i *= nBs[k]
548  if(i not in connection_all[k][:, 3] and j not in connection_all[k][:, 4]):
549  connection_all[k] = np.vstack([connection_all[k], np.array(
550  [all_candidates_A[k][i][3], all_candidates_B[k][j][3], score, i, j], dtype=np.float32)])
551 
552  joint_cands_indices = -1 * np.ones((0, 20))
553  candidate = np.array(
554  [item for sublist in all_peaks for item in sublist])
555  for k in range(len(self.map_idx)):
556  partAs = connection_all[k][:, 0]
557  partBs = connection_all[k][:, 1]
558  indexA, indexB = np.array(self.limb_sequence[k]) - 1
559  for i in range(len(connection_all[k])): # = 1:size(temp,1)
560  found = 0
561  joint_cands_indices_idx = [-1, -1]
562  # 1:size(joint_cands_indices,1):
563  for j in range(len(joint_cands_indices)):
564  if joint_cands_indices[j][indexA] == float(partAs[i]) or joint_cands_indices[j][indexB] == float(partBs[i]):
565  joint_cands_indices_idx[found] = j
566  found += 1
567 
568  if found == 1:
569  j = joint_cands_indices_idx[0]
570  if(joint_cands_indices[j][indexB] != float(partBs[i])):
571  joint_cands_indices[j][indexB] = partBs[i]
572  joint_cands_indices[j][-1] += 1
573  joint_cands_indices[
574  j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
575  joint_cands_indices[
576  j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
577  elif found == 2: # if found 2 and disjoint, merge them
578  j1, j2 = joint_cands_indices_idx
579  membership = ((joint_cands_indices[j1] >= 0).astype(
580  int) + (joint_cands_indices[j2] >= 0).astype(int))[:-2]
581  if len(np.nonzero(membership == 2)[0]) == 0: # merge
582  joint_cands_indices[j1][
583  :-2] += (joint_cands_indices[j2][:-2] + 1)
584  joint_cands_indices[
585  j1][-2:] += joint_cands_indices[j2][-2:]
586  joint_cands_indices[j1][-2] += connection_all[k][i][2]
587  joint_cands_indices = np.delete(
588  joint_cands_indices, j2, 0)
589  else: # as like found == 1
590  joint_cands_indices[j1][indexB] = partBs[i]
591  joint_cands_indices[j1][-1] += 1
592  joint_cands_indices[
593  j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
594 
595  # if find no partA in the joint_cands_indices, create a new
596  # joint_cands_indices
597  elif not found and k < len(self.index2limbname) - 2:
598  row = -1 * np.ones(20)
599  row[indexA] = partAs[i]
600  row[indexB] = partBs[i]
601  row[-1] = 2
602  row[-2] = sum(candidate[connection_all[k]
603  [i, :2].astype(int), 2]) + connection_all[k][i][2]
604  joint_cands_indices = np.vstack([joint_cands_indices, row])
605 
606  # delete some rows of joint_cands_indices which has few parts occur
607  deleteIdx = []
608  for i in range(len(joint_cands_indices)):
609  if joint_cands_indices[i][-1] < 4 or joint_cands_indices[i][-2] / joint_cands_indices[i][-1] < 0.4:
610  deleteIdx.append(i)
611  joint_cands_indices = np.delete(joint_cands_indices, deleteIdx, axis=0)
612 
613  return self._extract_joint_position(joint_cands_indices, candidate), all_peaks
614 
615  def _extract_joint_position(self, joint_cands_indices, candidate):
616  people_joint_positions = []
617  for joint_cand_idx in joint_cands_indices:
618  person_joint_positions = []
619  for i, limb_name in enumerate(self.index2limbname):
620  cand_idx = int(joint_cand_idx[i])
621  if cand_idx == -1 or cand_idx >= candidate.shape[0]:
622  person_joint_positions.append(dict(limb=limb_name,
623  x=0,
624  y=0,
625  score=-1))
626  continue
627  X, Y = candidate[cand_idx, :2]
628  person_joint_positions.append(dict(limb=limb_name,
629  x=X,
630  y=Y,
631  score=candidate[cand_idx, 2]))
632  people_joint_positions.append(person_joint_positions)
633 
634  return people_joint_positions
635 
636  def _draw_joints(self, img, people_joint_positions, all_peaks):
637  if all_peaks:
638  # keypoints
639  cmap = matplotlib.cm.get_cmap('hsv')
640  n = len(self.index2limbname)-1
641  for i in range(len(self.index2limbname)-1):
642  rgba = np.array(cmap(1. * i / n))
643  color = rgba[:3] * 255
644  for j in range(len(all_peaks[i])):
645  cv2.circle(img, (int(all_peaks[i][j][0]), int(
646  all_peaks[i][j][1])), 4, color, thickness=-1)
647 
648  # connections
649  stickwidth = 4
650  for joint_positions in people_joint_positions:
651  n = len(self.limb_sequence)
652  for i, conn in enumerate(self.limb_sequence):
653  rgba = np.array(cmap(1. * i / n))
654  color = rgba[:3] * 255
655  j1, j2 = joint_positions[conn[0]-1], joint_positions[conn[1]-1]
656  if j1['score'] < 0 or j2['score'] < 0:
657  continue
658  cx, cy = int((j1['x'] + j2['x']) / 2.), int((j1['y'] + j2['y']) / 2.)
659  dx, dy = j1['x'] - j2['x'], j1['y'] - j2['y']
660  length = np.linalg.norm([dx, dy])
661  angle = int(np.degrees(np.arctan2(dy, dx)))
662  polygon = cv2.ellipse2Poly((cx, cy), (int(length / 2.), stickwidth),
663  angle, 0, 360, 1)
664  top, left = np.min(polygon[:,1]), np.min(polygon[:,0])
665  bottom, right = np.max(polygon[:,1]), np.max(polygon[:,0])
666  roi = img[top:bottom,left:right]
667  roi2 = roi.copy()
668  cv2.fillConvexPoly(roi2, polygon - np.array([left, top]), color)
669  cv2.addWeighted(roi, 0.4, roi2, 0.6, 0.0, dst=roi)
670 
671  # for hand
672  if self.use_hand:
673  offset = len(self.limb_sequence)
674  for joint_positions in people_joint_positions:
675  n = len(joint_positions[offset:])
676  for i, jt in enumerate(joint_positions[offset:]):
677  if jt['score'] < 0.0:
678  continue
679  rgba = np.array(cmap(1. * i / n))
680  color = rgba[:3] * 255
681  cv2.circle(img, (int(jt['x']), int(jt['y'])),
682  2, color, thickness=-1)
683 
684  for joint_positions in people_joint_positions:
685  offset = len(self.limb_sequence)
686  n = len(self.hand_sequence)
687  for _ in range(2):
688  # for both hands
689  for i, conn in enumerate(self.hand_sequence):
690  rgba = np.array(cmap(1. * i / n))
691  color = rgba[:3] * 255
692  j1 = joint_positions[offset + conn[0]]
693  j2 = joint_positions[offset + conn[1]]
694  if j1['score'] < 0 or j2['score'] < 0:
695  continue
696  cx, cy = int((j1['x'] + j2['x']) / 2.), int((j1['y'] + j2['y']) / 2.)
697  dx, dy = j1['x'] - j2['x'], j1['y'] - j2['y']
698  length = np.linalg.norm([dx, dy])
699  angle = int(np.degrees(np.arctan2(dy, dx)))
700  polygon = cv2.ellipse2Poly((cx, cy), (int(length / 2.), stickwidth),
701  angle, 0, 360, 1)
702  top, left = np.min(polygon[:,1]), np.min(polygon[:,0])
703  bottom, right = np.max(polygon[:,1]), np.max(polygon[:,0])
704  roi = img[top:bottom,left:right]
705  roi2 = roi.copy()
706  cv2.fillConvexPoly(roi2, polygon - np.array([left, top]), color)
707  cv2.addWeighted(roi, 0.4, roi2, 0.6, 0.0, dst=roi)
708  #
709  offset += len(self.index2handname) / 2
710 
711  return img
712 
713  def hand_estimate(self, bgr, people_joint_positions):
714  if self.backend == 'chainer':
715  return self._hand_estimate_chainer_backend(bgr, people_joint_positions)
716  raise ValueError('Unsupported backend: {0}'.format(self.backend))
717 
718  def _hand_estimate_chainer_backend(self, bgr, people_joint_positions):
719  if self.gpu >= 0:
720  chainer.cuda.get_device_from_id(self.gpu).use()
721  # https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/29ea7e24dce4abae30faecf769855823ad7bb637/src/openpose/hand/handDetector.cpp
722  for joint_positions in people_joint_positions:
723  # crop hand image for each person
724  width = self._get_hand_roi_width(joint_positions) + self.hand_width_offset
725  hand_joint_positions = []
726  if width > self.hand_thre1:
727  rwrist = find_joint('RWrist', joint_positions)
728  if rwrist['score'] > self.hand_thre2:
729  cx, cy = rwrist['x'], rwrist['y']
730  relbow = find_joint('RElbow', joint_positions)
731  if relbow['score'] > 0:
732  cx += 0.3 * (cx - relbow['x'])
733  cy += 0.3 * (cy - relbow['y'])
734  hand_bgr = self._crop_square_image(bgr, cx, cy, width)
735  hand_joints = self._hand_estimate_chainer_backend_each(
736  hand_bgr, cx, cy, False)
737  hand_joint_positions.extend(hand_joints)
738  if self.visualize:
739  cv2.circle(bgr, (int(cx), int(cy)), int(width/2.),
740  (255, 0, 0), thickness=1)
741 
742  lwrist = find_joint('LWrist', joint_positions)
743  if lwrist['score'] > self.hand_thre2:
744  cx, cy = lwrist['x'], lwrist['y']
745  lelbow = find_joint('LElbow', joint_positions)
746  if lelbow['score'] > 0:
747  cx += 0.3 * (cx - lelbow['x'])
748  cy += 0.3 * (cy - lelbow['y'])
749  hand_bgr = self._crop_square_image(bgr, cx, cy, width)
750  hand_joints = self._hand_estimate_chainer_backend_each(
751  hand_bgr, cx, cy, True)
752  hand_joint_positions.extend(hand_joints)
753  if self.visualize:
754  cv2.circle(bgr, (int(cx), int(cy)), int(width/2.),
755  (255, 0, 0), thickness=1)
756 
757  for limb in self.index2handname:
758  jt = find_joint(limb, hand_joint_positions)
759  if jt is not None:
760  joint_positions.append(jt)
761  else:
762  joint_positions.append({
763  'x': 0, 'y': 0, 'score': -1, 'limb': limb})
764 
765  return people_joint_positions
766 
767  def _hand_estimate_chainer_backend_each(self, hand_bgr, cx, cy, left_hand):
768  xp = self.hand_net.xp
769 
770  if left_hand:
771  hand_bgr = cv2.flip(hand_bgr, 1) # 1 = vertical
772 
773  resized = cv2.resize(hand_bgr, (368, 368), interpolation=cv2.INTER_CUBIC)
774  x = np.array(resized[np.newaxis], dtype=np.float32)
775  x = x.transpose(0, 3, 1, 2)
776  x = x / 256 - 0.5
777 
778  if self.gpu >= 0:
779  x = chainer.cuda.to_gpu(x)
780  x = chainer.Variable(x)
781 
782  heatmaps = self.hand_net(x)
783  heatmaps = F.resize_images(heatmaps[-1], hand_bgr.shape[:2])[0]
784  if self.gpu >= 0:
785  heatmaps.to_cpu()
786  heatmaps = heatmaps.array
787 
788  if left_hand:
789  heatmaps = heatmaps.transpose(1, 2, 0)
790  heatmaps = cv2.flip(heatmaps, 1)
791  heatmaps = heatmaps.transpose(2, 0, 1)
792 
793  # get peak on heatmap
794  hmaps = []
795  if xp == np:
796  # cpu
797  for i in range(heatmaps.shape[0] - 1):
798  heatmap = gaussian_filter(heatmaps[i], sigma=self.hand_gaussian_sigma)
799  hmaps.append(heatmap)
800  else:
801  heatmaps = chainer.cuda.to_gpu(heatmaps)
802  heatmaps = F.convolution_2d(
803  heatmaps[:, xp.newaxis], self.hand_gaussian_kernel,
804  stride=1, pad=int(self.hand_gaussian_ksize / 2))
805  heatmaps = chainer.cuda.to_cpu(xp.squeeze(heatmaps.array))
806  for heatmap in heatmaps[:-1]:
807  hmaps.append(heatmap)
808  keypoints = []
809  idx_offset = 0
810  if left_hand:
811  idx_offset += len(hmaps)
812  for i, heatmap in enumerate(hmaps):
813  conf = heatmap.max()
814  cds = np.array(np.where(heatmap==conf)).flatten().tolist()
815  py = cy + cds[0] - hand_bgr.shape[0] / 2
816  px = cx + cds[1] - hand_bgr.shape[1] / 2
817  keypoints.append({'x': px, 'y': py, 'score': conf,
818  'limb': self.index2handname[idx_offset+i]})
819  return keypoints
820 
821  def _crop_square_image(self, img, cx, cy, width):
822  cx, cy, width = int(cx), int(cy), int(width)
823  left, right = cx - int(width / 2), cx + int(width / 2)
824  top, bottom = cy - int(width / 2), cy + int(width / 2)
825  imh, imw, imc = img.shape
826  cropped = img[max(0, top):max(min(imh, bottom), 0), max(0, left):max(min(imw, right), 0)]
827  ch, cw = cropped.shape[:2]
828  bx, by = max(0, -left), max(0, -top)
829  padded = np.zeros((bottom - top, right - left, imc), dtype=np.uint8)
830  padded[by:by+ch,bx:bx+cw] = cropped
831  return padded
832 
833  def _get_hand_roi_width(self, joint_positions):
834  lengths = []
835  for conn, ratio in zip(self.limb_sequence, self.limb_length_hand_ratio):
836  j1, j2 = joint_positions[conn[0]-1], joint_positions[conn[1]-1]
837  length = 0
838  if j1['score'] > 0 and j2['score'] > 0:
839  dx, dy = j1['x'] - j2['x'], j1['y'] - j2['y']
840  length = np.linalg.norm([dx, dy])
841  lengths.append(length / ratio)
842  if np.sum(lengths[:5]) > 0:
843  lengths = lengths[:5]
844  rospy.logdebug('length: %s' % lengths)
845  return np.sum(lengths) / len(np.nonzero(lengths)[0])
846 
847 
848 if __name__ == '__main__':
849  rospy.init_node('people_pose_estimation_2d')
851  rospy.spin()
def _hand_estimate_chainer_backend_each(self, hand_bgr, cx, cy, left_hand)
def _draw_joints(self, img, people_joint_positions, all_peaks)
def _cb_with_depth_info(self, img_msg, depth_msg, camera_info_msg)
def _create_2d_people_pose_array_msgs(self, people_joint_positions, header)


jsk_perception
Author(s): Manabu Saito, Ryohei Ueda
autogenerated on Mon May 3 2021 03:03:27