jsk_perception: people_pose_estimation

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 # -*- coding:utf-8 -*-
00003 
00004 #    See: https://arxiv.org/abs/1611.08050
00005 
00006 import math
00007 
00008 import chainer
00009 import chainer.functions as F
00010 from chainer import cuda
00011 import cv2
00012 import matplotlib
00013 import numpy as np
00014 from scipy.ndimage.filters import gaussian_filter
00015 import pylab as plt  # NOQA
00016 
00017 import cv_bridge
00018 import message_filters
00019 import rospy
00020 from jsk_topic_tools import ConnectionBasedTransport
00021 from geometry_msgs.msg import Pose
00022 from geometry_msgs.msg import Point
00023 from geometry_msgs.msg import Quaternion
00024 from jsk_recognition_msgs.msg import PeoplePose
00025 from jsk_recognition_msgs.msg import PeoplePoseArray
00026 from sensor_msgs.msg import CameraInfo
00027 from sensor_msgs.msg import Image
00028 
00029 from openpose import PoseNet, HandNet
00030 
00031 
00032 def find_joint(limb, jts):
00033     jt = [jt for jt in jts if jt['limb'] == limb]
00034     if jt:
00035         return jt[0]
00036     else:
00037         return None
00038 
00039 
00040 def padRightDownCorner(img, stride, padValue):
00041     h = img.shape[0]
00042     w = img.shape[1]
00043 
00044     pad = 4 * [None]
00045     pad[0] = 0  # up
00046     pad[1] = 0  # left
00047     pad[2] = 0 if (h % stride == 0) else stride - (h % stride)  # down
00048     pad[3] = 0 if (w % stride == 0) else stride - (w % stride)  # right
00049 
00050     img_padded = img
00051     pad_up = np.tile(img_padded[0:1, :, :] * 0 + padValue, (pad[0], 1, 1))
00052     img_padded = np.concatenate((pad_up, img_padded), axis=0)
00053     pad_left = np.tile(img_padded[:, 0:1, :] * 0 + padValue, (1, pad[1], 1))
00054     img_padded = np.concatenate((pad_left, img_padded), axis=1)
00055     pad_down = np.tile(img_padded[-2:-1, :, :] * 0 + padValue, (pad[2], 1, 1))
00056     img_padded = np.concatenate((img_padded, pad_down), axis=0)
00057     pad_right = np.tile(img_padded[:, -2:-1, :] * 0 + padValue, (1, pad[3], 1))
00058     img_padded = np.concatenate((img_padded, pad_right), axis=1)
00059 
00060     return img_padded, pad
00061 
00062 
00063 class PeoplePoseEstimation2D(ConnectionBasedTransport):
00064     # Note: the order of this sequences is important
00065     # find connection in the specified sequence,
00066     # center 29 is in the position 15
00067     limb_sequence = [[ 2,  1], [ 1, 16], [ 1, 15], [ 6, 18], [ 3, 17],
00068                      [ 2,  3], [ 2,  6], [ 3,  4], [ 4,  5], [ 6,  7],
00069                      [ 7,  8], [ 2,  9], [ 9, 10], [10, 11], [ 2, 12],
00070                      [12, 13], [13, 14], [15, 17], [16, 18]]
00071     # the middle joints heatmap correpondence
00072     map_idx = [[47, 48], [49, 50], [51, 52], [37, 38], [45, 46],
00073                [31, 32], [39, 40], [33, 34], [35, 36], [41, 42],
00074                [43, 44], [19, 20], [21, 22], [23, 24], [25, 26],
00075                [27, 28], [29, 30], [53, 54], [55, 56]]
00076     # length ratio from connections
00077     limb_length_hand_ratio = [ 0.6,  0.2,  0.2, 0.85, 0.85,
00078                                0.6,  0.6, 0.93, 0.65, 0.95,
00079                               0.65,  2.2,  1.7,  1.7,  2.2,
00080                                1.7,  1.7, 0.25, 0.25]
00081     # hand joint connection sequence
00082     hand_sequence = [[0, 1],   [1, 2],   [2, 3],   [3, 4],
00083                      [0, 5],   [5, 6],   [6, 7],   [7, 8],
00084                      [0, 9],   [9, 10],  [10, 11], [11, 12],
00085                      [0, 13],  [13, 14], [14, 15], [15, 16],
00086                      [0, 17],  [17, 18], [18, 19], [19, 20],]
00087 
00088     index2limbname = ["Nose",
00089                       "Neck",
00090                       "RShoulder",
00091                       "RElbow",
00092                       "RWrist",
00093                       "LShoulder",
00094                       "LElbow",
00095                       "LWrist",
00096                       "RHip",
00097                       "RKnee",
00098                       "RAnkle",
00099                       "LHip",
00100                       "LKnee",
00101                       "LAnkle",
00102                       "REye",
00103                       "LEye",
00104                       "REar",
00105                       "LEar",
00106                       "Bkg"]
00107 
00108     index2handname = ["RHand{}".format(i) for i in range(21)] +\
00109                      ["LHand{}".format(i) for i in range(21)]
00110 
00111     def __init__(self):
00112         super(self.__class__, self).__init__()
00113         self.backend = rospy.get_param('~backend', 'chainer')
00114         self.scales = rospy.get_param('~scales', [0.38])
00115         self.stride = rospy.get_param('~stride', 8)
00116         self.pad_value = rospy.get_param('~pad_value', 128)
00117         self.thre1 = rospy.get_param('~thre1', 0.1)
00118         self.thre2 = rospy.get_param('~thre2', 0.05)
00119         self.width = rospy.get_param('~width', None)
00120         self.height = rospy.get_param('~height', None)
00121         self.check_wh()
00122         self.gpu = rospy.get_param('~gpu', -1)  # -1 is cpu mode
00123         self.with_depth = rospy.get_param('~with_depth', False)
00124         # hand detection
00125         self.use_hand = rospy.get_param('~hand/enable', False)
00126         self.hand_gaussian_ksize = rospy.get_param('~hand/gaussian_ksize', 17)
00127         self.hand_gaussian_sigma = rospy.get_param('~hand/gaussian_sigma', 2.5)
00128         self.hand_thre1 = rospy.get_param('~hand/thre1', 20)
00129         self.hand_thre2 = rospy.get_param('~hand/thre2', 0.1)
00130         self.hand_width_offset = rospy.get_param('~hand/width_offset', 0)
00131         # model loading
00132         self._load_model()
00133         # topic advertise
00134         self.image_pub = self.advertise('~output', Image, queue_size=1)
00135         self.pose_pub = self.advertise('~pose', PeoplePoseArray, queue_size=1)
00136         self.sub_info = None
00137         if self.with_depth is True:
00138             self.pose_2d_pub = self.advertise('~pose_2d', PeoplePoseArray, queue_size=1)
00139 
00140     def check_wh(self):
00141         if (self.width is None) != (self.height is None):
00142             rospy.logwarn('width and height should be specified, but '
00143                           'specified only {}'
00144                           .format('height' if self.height else 'width'))
00145 
00146     @property
00147     def visualize(self):
00148         return self.image_pub.get_num_connections() > 0
00149 
00150     def _load_model(self):
00151         if self.backend == 'chainer':
00152             self._load_chainer_model()
00153         else:
00154             raise RuntimeError('Unsupported backend: %s', self.backend)
00155 
00156     def _load_chainer_model(self):
00157         model_file = rospy.get_param('~model_file')
00158         self.pose_net = PoseNet(pretrained_model=model_file)
00159         rospy.loginfo('Finished loading trained model: {0}'.format(model_file))
00160         # hand net
00161         if self.use_hand:
00162             model_file = rospy.get_param('~hand/model_file')
00163             self.hand_net = HandNet(pretrained_model=model_file)
00164             rospy.loginfo('Finished loading trained hand model: {}'.format(model_file))
00165         #
00166         if self.gpu >= 0:
00167             self.pose_net.to_gpu(self.gpu)
00168             if self.use_hand:
00169                 self.hand_net.to_gpu(self.gpu)
00170                 # create gaussian kernel
00171                 ksize = self.hand_gaussian_ksize
00172                 sigma = self.hand_gaussian_sigma
00173                 c = ksize // 2
00174                 k = np.zeros((1, 1, ksize, ksize), dtype=np.float32)
00175                 for y in range(ksize):
00176                     dy = abs(y - c)
00177                     for x in range(ksize):
00178                         dx = abs(x - c)
00179                         e = np.exp(- (dx ** 2 + dy ** 2) / (2 * sigma ** 2))
00180                         k[0][0][y][x] = 1 / (sigma ** 2 * 2 * np.pi) * e
00181                 k = chainer.cuda.to_gpu(k, device=self.gpu)
00182                 self.hand_gaussian_kernel = k
00183         chainer.global_config.train = False
00184         chainer.global_config.enable_backprop = False
00185 
00186     def subscribe(self):
00187         if self.with_depth:
00188             queue_size = rospy.get_param('~queue_size', 10)
00189             sub_img = message_filters.Subscriber(
00190                 '~input', Image, queue_size=1, buff_size=2**24)
00191             sub_depth = message_filters.Subscriber(
00192                 '~input/depth', Image, queue_size=1, buff_size=2**24)
00193             self.subs = [sub_img, sub_depth]
00194 
00195             # NOTE: Camera info is not synchronized by default.
00196             # See https://github.com/jsk-ros-pkg/jsk_recognition/issues/2165
00197             sync_cam_info = rospy.get_param("~sync_camera_info", False)
00198             if sync_cam_info:
00199                 sub_info = message_filters.Subscriber(
00200                     '~input/info', CameraInfo, queue_size=1, buff_size=2**24)
00201                 self.subs.append(sub_info)
00202             else:
00203                 self.sub_info = rospy.Subscriber(
00204                     '~input/info', CameraInfo, self._cb_cam_info)
00205 
00206             if rospy.get_param('~approximate_sync', True):
00207                 slop = rospy.get_param('~slop', 0.1)
00208                 sync = message_filters.ApproximateTimeSynchronizer(
00209                     fs=self.subs, queue_size=queue_size, slop=slop)
00210             else:
00211                 sync = message_filters.TimeSynchronizer(
00212                     fs=self.subs, queue_size=queue_size)
00213             if sync_cam_info:
00214                 sync.registerCallback(self._cb_with_depth_info)
00215             else:
00216                 self.camera_info_msg = None
00217                 sync.registerCallback(self._cb_with_depth)
00218         else:
00219             sub_img = rospy.Subscriber(
00220                 '~input', Image, self._cb, queue_size=1, buff_size=2**24)
00221             self.subs = [sub_img]
00222 
00223     def unsubscribe(self):
00224         for sub in self.subs:
00225             sub.unregister()
00226         if self.sub_info is not None:
00227             self.sub_info.unregister()
00228             self.sub_info = None
00229 
00230     def _cb_cam_info(self, msg):
00231         self.camera_info_msg = msg
00232         self.sub_info.unregister()
00233         self.sub_info = None
00234         rospy.loginfo("Received camera info")
00235 
00236     def _cb_with_depth(self, img_msg, depth_msg):
00237         if self.camera_info_msg is None:
00238             return
00239         self._cb_with_depth_info(img_msg, depth_msg, self.camera_info_msg)
00240 
00241     def _cb_with_depth_info(self, img_msg, depth_msg, camera_info_msg):
00242         br = cv_bridge.CvBridge()
00243         img = br.imgmsg_to_cv2(img_msg, desired_encoding='bgr8')
00244         depth_img = br.imgmsg_to_cv2(depth_msg, 'passthrough')
00245         if depth_msg.encoding == '16UC1':
00246             depth_img = np.asarray(depth_img, dtype=np.float32)
00247             depth_img /= 1000  # convert metric: mm -> m
00248         elif depth_msg.encoding != '32FC1':
00249             rospy.logerr('Unsupported depth encoding: %s' % depth_msg.encoding)
00250 
00251         people_joint_positions, all_peaks = self.pose_estimate(img)
00252         if self.use_hand:
00253             people_joint_positions = self.hand_estimate(
00254                 img, people_joint_positions)
00255 
00256         people_pose_msg = PeoplePoseArray()
00257         people_pose_msg.header = img_msg.header
00258         people_pose_2d_msg = self._create_2d_people_pose_array_msgs(
00259             people_joint_positions,
00260             img_msg.header)
00261 
00262         # calculate xyz-position
00263         fx = camera_info_msg.K[0]
00264         fy = camera_info_msg.K[4]
00265         cx = camera_info_msg.K[2]
00266         cy = camera_info_msg.K[5]
00267         for person_joint_positions in people_joint_positions:
00268             pose_msg = PeoplePose()
00269             for joint_pos in person_joint_positions:
00270                 if joint_pos['score'] < 0:
00271                     continue
00272                 if 0 <= joint_pos['y'] < depth_img.shape[0] and\
00273                    0 <= joint_pos['x'] < depth_img.shape[1]:
00274                     z = float(depth_img[int(joint_pos['y'])][int(joint_pos['x'])])
00275                 else:
00276                     continue
00277                 if np.isnan(z):
00278                     continue
00279                 x = (joint_pos['x'] - cx) * z / fx
00280                 y = (joint_pos['y'] - cy) * z / fy
00281                 pose_msg.limb_names.append(joint_pos['limb'])
00282                 pose_msg.scores.append(joint_pos['score'])
00283                 pose_msg.poses.append(Pose(position=Point(x=x, y=y, z=z),
00284                                            orientation=Quaternion(w=1)))
00285             people_pose_msg.poses.append(pose_msg)
00286 
00287         self.pose_2d_pub.publish(people_pose_2d_msg)
00288         self.pose_pub.publish(people_pose_msg)
00289 
00290         if self.visualize:
00291             vis_img = self._draw_joints(img, people_joint_positions, all_peaks)
00292             vis_msg = br.cv2_to_imgmsg(vis_img, encoding='bgr8')
00293             vis_msg.header.stamp = img_msg.header.stamp
00294             self.image_pub.publish(vis_msg)
00295 
00296     def _cb(self, img_msg):
00297         br = cv_bridge.CvBridge()
00298         img = br.imgmsg_to_cv2(img_msg, desired_encoding='bgr8')
00299         people_joint_positions, all_peaks = self.pose_estimate(img)
00300         if self.use_hand:
00301             people_joint_positions = self.hand_estimate(
00302                 img, people_joint_positions)
00303 
00304         people_pose_msg = self._create_2d_people_pose_array_msgs(
00305             people_joint_positions,
00306             img_msg.header)
00307 
00308         self.pose_pub.publish(people_pose_msg)
00309 
00310         if self.visualize:
00311             vis_img = self._draw_joints(img, people_joint_positions, all_peaks)
00312             vis_msg = br.cv2_to_imgmsg(vis_img, encoding='bgr8')
00313             vis_msg.header.stamp = img_msg.header.stamp
00314             self.image_pub.publish(vis_msg)
00315 
00316     def _create_2d_people_pose_array_msgs(self, people_joint_positions, header):
00317         people_pose_msg = PeoplePoseArray(header=header)
00318         for person_joint_positions in people_joint_positions:
00319             pose_msg = PeoplePose()
00320             for joint_pos in person_joint_positions:
00321                 if joint_pos['score'] < 0:
00322                     continue
00323                 pose_msg.limb_names.append(joint_pos['limb'])
00324                 pose_msg.scores.append(joint_pos['score'])
00325                 pose_msg.poses.append(Pose(position=Point(x=joint_pos['x'],
00326                                                           y=joint_pos['y'],
00327                                                           z=0)))
00328             people_pose_msg.poses.append(pose_msg)
00329         return people_pose_msg
00330 
00331     def pose_estimate(self, bgr):
00332         if self.backend == 'chainer':
00333             return self._pose_estimate_chainer_backend(bgr)
00334         raise ValueError('Unsupported backend: {0}'.format(self.backend))
00335 
00336     def _pose_estimate_chainer_backend(self, bgr_img):
00337         if self.gpu >= 0:
00338             chainer.cuda.get_device_from_id(self.gpu).use()
00339         xp = self.pose_net.xp
00340 
00341         org_h, org_w, _ = bgr_img.shape
00342         if not (self.width is None or self.height is None):
00343             bgr_img = cv2.resize(bgr_img, (self.width, self.height))
00344 
00345         heatmap_avg = xp.zeros((bgr_img.shape[0], bgr_img.shape[1], 19),
00346                                dtype=np.float32)
00347         paf_avg = xp.zeros((bgr_img.shape[0], bgr_img.shape[1], 38),
00348                            dtype=np.float32)
00349 
00350         for scale in self.scales:
00351             img = cv2.resize(bgr_img, (0, 0), fx=scale,
00352                              fy=scale, interpolation=cv2.INTER_CUBIC)
00353             padded_img, pad = padRightDownCorner(
00354                 img, self.stride, self.pad_value)
00355             x = np.transpose(np.float32(
00356                 padded_img[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
00357             if self.gpu >= 0:
00358                 x = chainer.cuda.to_gpu(x)
00359             x = chainer.Variable(x)
00360             pafs, heatmaps = self.pose_net(x)
00361             paf = pafs[-1]
00362             heatmap = heatmaps[-1]
00363 
00364             # extract outputs, resize, and remove padding
00365             heatmap = F.resize_images(
00366                 heatmap, (heatmap.data.shape[2] * self.stride,
00367                           heatmap.data.shape[3] * self.stride))
00368             heatmap = heatmap[:, :, :padded_img.shape[0] -
00369                               pad[2], :padded_img.shape[1] - pad[3]]
00370             heatmap = F.resize_images(
00371                 heatmap, (bgr_img.shape[0], bgr_img.shape[1]))
00372             heatmap = xp.transpose(xp.squeeze(heatmap.data), (1, 2, 0))
00373             paf = F.resize_images(
00374                 paf, (paf.data.shape[2] * self.stride,
00375                       paf.data.shape[3] * self.stride))
00376             paf = paf[:, :, :padded_img.shape[0] -
00377                       pad[2], :padded_img.shape[1] - pad[3]]
00378             paf = F.resize_images(paf, (bgr_img.shape[0], bgr_img.shape[1]))
00379             paf = xp.transpose(xp.squeeze(paf.data), (1, 2, 0))
00380 
00381             coeff = 1.0 / len(self.scales)
00382             paf_avg += paf * coeff
00383             heatmap_avg += heatmap * coeff
00384 
00385         heatmav_left = xp.zeros_like(heatmap_avg)
00386         heatmav_left[1:, :] = heatmap_avg[:-1, :]
00387         heatmav_right = xp.zeros_like(heatmap_avg)
00388         heatmav_right[:-1, :] = heatmap_avg[1:, :]
00389         heatmav_up = xp.zeros_like(heatmap_avg)
00390         heatmav_up[:, 1:] = heatmap_avg[:, :-1]
00391         heatmav_down = xp.zeros_like(heatmap_avg)
00392         heatmav_down[:, :-1] = heatmap_avg[:, 1:]
00393         peaks_binary = (heatmap_avg >= heatmav_left) & \
00394                        (heatmap_avg >= heatmav_right) & \
00395                        (heatmap_avg >= heatmav_up) & \
00396                        (heatmap_avg >= heatmav_down) & \
00397                        (heatmap_avg > self.thre1)
00398 
00399         peaks = xp.array(xp.nonzero(peaks_binary[..., :len(self.index2limbname)-1]), dtype=np.int32).T
00400         peak_counter = peaks.shape[0]
00401         all_peaks = xp.zeros((peak_counter, 4), dtype=np.float32)
00402         all_peaks[:, 0] = peaks[:, 1]
00403         all_peaks[:, 1] = peaks[:, 0]
00404         all_peaks[:, 2] = heatmap_avg[peaks.T.tolist()]
00405         peaks_order = peaks[..., 2]
00406         try:
00407             all_peaks = all_peaks[xp.argsort(peaks_order)]
00408         except AttributeError:
00409             # cupy.argsort is not available at cupy==1.0.1
00410             peaks_order = chainer.cuda.to_cpu(peaks_order)
00411             all_peaks = all_peaks[np.argsort(peaks_order)]
00412         all_peaks[:, 3] = xp.arange(peak_counter, dtype=np.float32)
00413         if self.gpu >= 0:
00414             all_peaks = chainer.cuda.to_cpu(all_peaks)
00415             peaks_order = chainer.cuda.to_cpu(peaks_order)
00416         all_peaks = np.split(all_peaks, np.cumsum(
00417             np.bincount(peaks_order, minlength=len(self.index2limbname)-1)))
00418         connection_all = []
00419         mid_num = 10
00420         eps = 1e-8
00421         score_mid = paf_avg[:, :, [[x - 19 for x in self.map_idx[k]]
00422                                    for k in range(len(self.map_idx))]]
00423 
00424         cands = np.array(all_peaks, dtype=object)[
00425             np.array(self.limb_sequence, dtype=np.int32) - 1]
00426         candAs = cands[:, 0]
00427         candBs = cands[:, 1]
00428         nAs = np.array([len(candA) for candA in candAs])
00429         nBs = np.array([len(candB) for candB in candBs])
00430         target_indices = np.nonzero(np.logical_and(nAs != 0, nBs != 0))[0]
00431         if len(target_indices) == 0:
00432             return [], []
00433 
00434         all_candidates_A = [np.repeat(np.array(tmp_candA, dtype=np.float32), nB, axis=0)
00435                             for tmp_candA, nB in zip(candAs, nBs)]
00436         all_candidates_B = [np.tile(np.array(tmp_candB, dtype=np.float32), (nA, 1))
00437                             for tmp_candB, nA in zip(candBs, nAs)]
00438 
00439         target_candidates_B = [all_candidates_B[index]
00440                                for index in target_indices]
00441         target_candidates_A = [all_candidates_A[index]
00442                                for index in target_indices]
00443 
00444         vec = np.vstack(target_candidates_B)[
00445             :, :2] - np.vstack(target_candidates_A)[:, :2]
00446         if self.gpu >= 0:
00447             vec = chainer.cuda.to_gpu(vec)
00448         norm = xp.sqrt(xp.sum(vec ** 2, axis=1)) + eps
00449         vec = vec / norm[:, None]
00450         start_end = zip(np.round(np.mgrid[np.vstack(target_candidates_A)[:, 1].reshape(-1, 1):np.vstack(target_candidates_B)[:, 1].reshape(-1, 1):(mid_num * 1j)]).astype(np.int32),
00451                         np.round(np.mgrid[np.vstack(target_candidates_A)[:, 0].reshape(-1, 1):np.vstack(
00452                             target_candidates_B)[:, 0].reshape(-1, 1):(mid_num * 1j)]).astype(np.int32),
00453                         np.concatenate([[[index] * mid_num for i in range(len(c))] for index, c in zip(target_indices, target_candidates_B)]),)
00454 
00455         v = score_mid[np.concatenate(
00456             start_end, axis=1).tolist()].reshape(-1, mid_num, 2)
00457         score_midpts = xp.sum(v * xp.repeat(vec, (mid_num),
00458                                             axis=0).reshape(-1, mid_num, 2), axis=2)
00459         score_with_dist_prior = xp.sum(score_midpts, axis=1) / mid_num + \
00460             xp.minimum(0.5 * bgr_img.shape[0] / norm - 1,
00461                        xp.zeros_like(norm, dtype=np.float32))
00462         c1 = xp.sum(score_midpts > self.thre2, axis=1) > 0.8 * mid_num
00463         c2 = score_with_dist_prior > 0.0
00464         criterion = xp.logical_and(c1, c2)
00465 
00466         indices_bins = np.cumsum(nAs * nBs)
00467         indices_bins = np.concatenate(
00468             [np.zeros(1), indices_bins]).astype(np.int32)
00469         target_candidate_indices = xp.nonzero(criterion)[0]
00470         if self.gpu >= 0:
00471             target_candidate_indices = chainer.cuda.to_cpu(
00472                 target_candidate_indices)
00473             score_with_dist_prior = chainer.cuda.to_cpu(score_with_dist_prior)
00474 
00475         k_s = np.digitize(target_candidate_indices, indices_bins) - 1
00476         i_s = (target_candidate_indices - (indices_bins[k_s])) // nBs[k_s]
00477         j_s = (target_candidate_indices - (indices_bins[k_s])) % nBs[k_s]
00478 
00479         connection_candidate = np.concatenate([k_s.reshape(-1, 1),
00480                                                i_s.reshape(-1, 1),
00481                                                j_s.reshape(-1, 1),
00482                                                score_with_dist_prior[
00483                                                    target_candidate_indices][None, ].T,
00484                                                (score_with_dist_prior[target_candidate_indices][None, ] +
00485                                                 np.concatenate(target_candidates_A)[target_candidate_indices, 2] + np.concatenate(target_candidates_B)[target_candidate_indices, 2]).T], axis=1)
00486 
00487         sorted_indices = np.argsort(
00488             connection_candidate[:, 0] * 100 - connection_candidate[:, 3])
00489 
00490         connection_all = []
00491         for _ in range(0, 19):
00492             connection = np.zeros((0, 5), dtype=np.float32)
00493             connection_all.append(connection)
00494 
00495         for c_candidate in connection_candidate[sorted_indices]:
00496             k, i, j = c_candidate[0:3].astype(np.int32)
00497             score = c_candidate[3]
00498             if(len(connection_all[k]) >= min(nAs[k], nBs[k])):
00499                 continue
00500             i *= nBs[k]
00501             if(i not in connection_all[k][:, 3] and j not in connection_all[k][:, 4]):
00502                 connection_all[k] = np.vstack([connection_all[k], np.array(
00503                     [all_candidates_A[k][i][3], all_candidates_B[k][j][3], score, i, j], dtype=np.float32)])
00504 
00505         joint_cands_indices = -1 * np.ones((0, 20))
00506         candidate = np.array(
00507             [item for sublist in all_peaks for item in sublist])
00508         for k in range(len(self.map_idx)):
00509             partAs = connection_all[k][:, 0]
00510             partBs = connection_all[k][:, 1]
00511             indexA, indexB = np.array(self.limb_sequence[k]) - 1
00512             for i in range(len(connection_all[k])):  # = 1:size(temp,1)
00513                 found = 0
00514                 joint_cands_indices_idx = [-1, -1]
00515                 # 1:size(joint_cands_indices,1):
00516                 for j in range(len(joint_cands_indices)):
00517                     if joint_cands_indices[j][indexA] == float(partAs[i]) or joint_cands_indices[j][indexB] == float(partBs[i]):
00518                         joint_cands_indices_idx[found] = j
00519                         found += 1
00520 
00521                 if found == 1:
00522                     j = joint_cands_indices_idx[0]
00523                     if(joint_cands_indices[j][indexB] != float(partBs[i])):
00524                         joint_cands_indices[j][indexB] = partBs[i]
00525                         joint_cands_indices[j][-1] += 1
00526                         joint_cands_indices[
00527                             j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
00528                         joint_cands_indices[
00529                             j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
00530                 elif found == 2:  # if found 2 and disjoint, merge them
00531                     j1, j2 = joint_cands_indices_idx
00532                     membership = ((joint_cands_indices[j1] >= 0).astype(
00533                         int) + (joint_cands_indices[j2] >= 0).astype(int))[:-2]
00534                     if len(np.nonzero(membership == 2)[0]) == 0:  # merge
00535                         joint_cands_indices[j1][
00536                             :-2] += (joint_cands_indices[j2][:-2] + 1)
00537                         joint_cands_indices[
00538                             j1][-2:] += joint_cands_indices[j2][-2:]
00539                         joint_cands_indices[j1][-2] += connection_all[k][i][2]
00540                         joint_cands_indices = np.delete(
00541                             joint_cands_indices, j2, 0)
00542                     else:  # as like found == 1
00543                         joint_cands_indices[j1][indexB] = partBs[i]
00544                         joint_cands_indices[j1][-1] += 1
00545                         joint_cands_indices[
00546                             j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
00547 
00548                 # if find no partA in the joint_cands_indices, create a new
00549                 # joint_cands_indices
00550                 elif not found and k < len(self.index2limbname) - 2:
00551                     row = -1 * np.ones(20)
00552                     row[indexA] = partAs[i]
00553                     row[indexB] = partBs[i]
00554                     row[-1] = 2
00555                     row[-2] = sum(candidate[connection_all[k]
00556                                             [i, :2].astype(int), 2]) + connection_all[k][i][2]
00557                     joint_cands_indices = np.vstack([joint_cands_indices, row])
00558 
00559         # delete some rows of joint_cands_indices which has few parts occur
00560         deleteIdx = []
00561         for i in range(len(joint_cands_indices)):
00562             if joint_cands_indices[i][-1] < 4 or joint_cands_indices[i][-2] / joint_cands_indices[i][-1] < 0.4:
00563                 deleteIdx.append(i)
00564         joint_cands_indices = np.delete(joint_cands_indices, deleteIdx, axis=0)
00565 
00566         return self._extract_joint_position(joint_cands_indices, candidate), all_peaks
00567 
00568     def _extract_joint_position(self, joint_cands_indices, candidate):
00569         people_joint_positions = []
00570         for joint_cand_idx in joint_cands_indices:
00571             person_joint_positions = []
00572             for i, limb_name in enumerate(self.index2limbname):
00573                 cand_idx = int(joint_cand_idx[i])
00574                 if cand_idx == -1 or cand_idx >= candidate.shape[0]:
00575                     person_joint_positions.append(dict(limb=limb_name,
00576                                                        x=0,
00577                                                        y=0,
00578                                                        score=-1))
00579                     continue
00580                 X, Y = candidate[cand_idx, :2]
00581                 person_joint_positions.append(dict(limb=limb_name,
00582                                                    x=X,
00583                                                    y=Y,
00584                                                    score=candidate[cand_idx, 2]))
00585             people_joint_positions.append(person_joint_positions)
00586 
00587         return people_joint_positions
00588 
00589     def _draw_joints(self, img, people_joint_positions, all_peaks):
00590         if all_peaks:
00591             # keypoints
00592             cmap = matplotlib.cm.get_cmap('hsv')
00593             n = len(self.index2limbname)-1
00594             for i in range(len(self.index2limbname)-1):
00595                 rgba = np.array(cmap(1. * i / n))
00596                 color = rgba[:3] * 255
00597                 for j in range(len(all_peaks[i])):
00598                     cv2.circle(img, (int(all_peaks[i][j][0]), int(
00599                         all_peaks[i][j][1])), 4, color, thickness=-1)
00600 
00601         # connections
00602         stickwidth = 4
00603         for joint_positions in people_joint_positions:
00604             n = len(self.limb_sequence)
00605             for i, conn in enumerate(self.limb_sequence):
00606                 rgba = np.array(cmap(1. * i / n))
00607                 color = rgba[:3] * 255
00608                 j1, j2 = joint_positions[conn[0]-1], joint_positions[conn[1]-1]
00609                 if j1['score'] < 0 or j2['score'] < 0:
00610                     continue
00611                 cx, cy = int((j1['x'] + j2['x']) / 2.), int((j1['y'] + j2['y']) / 2.)
00612                 dx, dy = j1['x'] - j2['x'], j1['y'] - j2['y']
00613                 length = np.linalg.norm([dx, dy])
00614                 angle = int(np.degrees(np.arctan2(dy, dx)))
00615                 polygon = cv2.ellipse2Poly((cx, cy), (int(length / 2.), stickwidth),
00616                                            angle, 0, 360, 1)
00617                 top, left = np.min(polygon[:,1]), np.min(polygon[:,0])
00618                 bottom, right = np.max(polygon[:,1]), np.max(polygon[:,0])
00619                 roi = img[top:bottom,left:right]
00620                 roi2 = roi.copy()
00621                 cv2.fillConvexPoly(roi2, polygon - np.array([left, top]), color)
00622                 cv2.addWeighted(roi, 0.4, roi2, 0.6, 0.0, dst=roi)
00623 
00624         # for hand
00625         if self.use_hand:
00626             offset = len(self.limb_sequence)
00627             for joint_positions in people_joint_positions:
00628                 n = len(joint_positions[offset:])
00629                 for i, jt in enumerate(joint_positions[offset:]):
00630                     if jt['score'] < 0.0:
00631                         continue
00632                     rgba = np.array(cmap(1. * i / n))
00633                     color = rgba[:3] * 255
00634                     cv2.circle(img, (int(jt['x']), int(jt['y'])),
00635                                2, color, thickness=-1)
00636 
00637             for joint_positions in people_joint_positions:
00638                 offset = len(self.limb_sequence)
00639                 n = len(self.hand_sequence)
00640                 for _ in range(2):
00641                     # for both hands
00642                     for i, conn in enumerate(self.hand_sequence):
00643                         rgba = np.array(cmap(1. * i / n))
00644                         color = rgba[:3] * 255
00645                         j1 = joint_positions[offset + conn[0]]
00646                         j2 = joint_positions[offset + conn[1]]
00647                         if j1['score'] < 0 or j2['score'] < 0:
00648                             continue
00649                         cx, cy = int((j1['x'] + j2['x']) / 2.), int((j1['y'] + j2['y']) / 2.)
00650                         dx, dy = j1['x'] - j2['x'], j1['y'] - j2['y']
00651                         length = np.linalg.norm([dx, dy])
00652                         angle = int(np.degrees(np.arctan2(dy, dx)))
00653                         polygon = cv2.ellipse2Poly((cx, cy), (int(length / 2.), stickwidth),
00654                                                    angle, 0, 360, 1)
00655                         top, left = np.min(polygon[:,1]), np.min(polygon[:,0])
00656                         bottom, right = np.max(polygon[:,1]), np.max(polygon[:,0])
00657                         roi = img[top:bottom,left:right]
00658                         roi2 = roi.copy()
00659                         cv2.fillConvexPoly(roi2, polygon - np.array([left, top]), color)
00660                         cv2.addWeighted(roi, 0.4, roi2, 0.6, 0.0, dst=roi)
00661                     #
00662                     offset += len(self.index2handname) / 2
00663 
00664         return img
00665 
00666     def hand_estimate(self, bgr, people_joint_positions):
00667         if self.backend == 'chainer':
00668             return self._hand_estimate_chainer_backend(bgr, people_joint_positions)
00669         raise ValueError('Unsupported backend: {0}'.format(self.backend))
00670 
00671     def _hand_estimate_chainer_backend(self, bgr, people_joint_positions):
00672         if self.gpu >= 0:
00673             chainer.cuda.get_device_from_id(self.gpu).use()
00674         # https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/29ea7e24dce4abae30faecf769855823ad7bb637/src/openpose/hand/handDetector.cpp
00675         for joint_positions in people_joint_positions:
00676             # crop hand image for each person
00677             width = self._get_hand_roi_width(joint_positions) + self.hand_width_offset
00678             hand_joint_positions = []
00679             if width > self.hand_thre1:
00680                 rwrist = find_joint('RWrist', joint_positions)
00681                 if rwrist['score'] > self.hand_thre2:
00682                     cx, cy = rwrist['x'], rwrist['y']
00683                     relbow = find_joint('RElbow', joint_positions)
00684                     if relbow['score'] > 0:
00685                         cx += 0.3 * (cx - relbow['x'])
00686                         cy += 0.3 * (cy - relbow['y'])
00687                     hand_bgr = self._crop_square_image(bgr, cx, cy, width)
00688                     hand_joints = self._hand_estimate_chainer_backend_each(
00689                         hand_bgr, cx, cy, False)
00690                     hand_joint_positions.extend(hand_joints)
00691                     if self.visualize:
00692                         cv2.circle(bgr, (int(cx), int(cy)), int(width/2.),
00693                                    (255, 0, 0), thickness=1)
00694 
00695                 lwrist = find_joint('LWrist', joint_positions)
00696                 if lwrist['score'] > self.hand_thre2:
00697                     cx, cy = lwrist['x'], lwrist['y']
00698                     lelbow = find_joint('LElbow', joint_positions)
00699                     if lelbow['score'] > 0:
00700                         cx += 0.3 * (cx - lelbow['x'])
00701                         cy += 0.3 * (cy - lelbow['y'])
00702                     hand_bgr = self._crop_square_image(bgr, cx, cy, width)
00703                     hand_joints = self._hand_estimate_chainer_backend_each(
00704                         hand_bgr, cx, cy, True)
00705                     hand_joint_positions.extend(hand_joints)
00706                     if self.visualize:
00707                         cv2.circle(bgr, (int(cx), int(cy)), int(width/2.),
00708                                    (255, 0, 0), thickness=1)
00709 
00710             for limb in self.index2handname:
00711                 jt = find_joint(limb, hand_joint_positions)
00712                 if jt is not None:
00713                     joint_positions.append(jt)
00714                 else:
00715                     joint_positions.append({
00716                         'x': 0, 'y': 0, 'score': -1, 'limb': limb})
00717 
00718         return people_joint_positions
00719 
00720     def _hand_estimate_chainer_backend_each(self, hand_bgr, cx, cy, left_hand):
00721         xp = self.hand_net.xp
00722 
00723         if left_hand:
00724             hand_bgr = cv2.flip(hand_bgr, 1)  # 1 = vertical
00725 
00726         resized = cv2.resize(hand_bgr, (368, 368), interpolation=cv2.INTER_CUBIC)
00727         x = np.array(resized[np.newaxis], dtype=np.float32)
00728         x = x.transpose(0, 3, 1, 2)
00729         x = x / 256 - 0.5
00730 
00731         if self.gpu >= 0:
00732             x = chainer.cuda.to_gpu(x)
00733         x = chainer.Variable(x)
00734 
00735         heatmaps = self.hand_net(x)
00736         heatmaps = F.resize_images(heatmaps[-1], hand_bgr.shape[:2])[0]
00737         if self.gpu >= 0:
00738             heatmaps.to_cpu()
00739         heatmaps = heatmaps.array
00740 
00741         if left_hand:
00742             heatmaps = heatmaps.transpose(1, 2, 0)
00743             heatmaps = cv2.flip(heatmaps, 1)
00744             heatmaps = heatmaps.transpose(2, 0, 1)
00745 
00746         # get peak on heatmap
00747         hmaps = []
00748         if xp == np:
00749             # cpu
00750             for i in range(heatmaps.shape[0] - 1):
00751                 heatmap = gaussian_filter(heatmaps[i], sigma=self.hand_gaussian_sigma)
00752                 hmaps.append(heatmap)
00753         else:
00754             heatmaps = chainer.cuda.to_gpu(heatmaps)
00755             heatmaps = F.convolution_2d(
00756                 heatmaps[:, xp.newaxis], self.hand_gaussian_kernel,
00757                 stride=1, pad=int(self.hand_gaussian_ksize / 2))
00758             heatmaps = chainer.cuda.to_cpu(xp.squeeze(heatmaps.array))
00759             for heatmap in heatmaps[:-1]:
00760                 hmaps.append(heatmap)
00761         keypoints = []
00762         idx_offset = 0
00763         if left_hand:
00764             idx_offset += len(hmaps)
00765         for i, heatmap in enumerate(hmaps):
00766             conf = heatmap.max()
00767             cds = np.array(np.where(heatmap==conf)).flatten().tolist()
00768             py = cy + cds[0] - hand_bgr.shape[0] / 2
00769             px = cx + cds[1] - hand_bgr.shape[1] / 2
00770             keypoints.append({'x': px, 'y': py, 'score': conf,
00771                               'limb': self.index2handname[idx_offset+i]})
00772         return keypoints
00773 
00774     def _crop_square_image(self, img, cx, cy, width):
00775         cx, cy, width = int(cx), int(cy), int(width)
00776         left, right = cx - int(width / 2), cx + int(width / 2)
00777         top, bottom = cy - int(width / 2), cy + int(width / 2)
00778         imh, imw, imc = img.shape
00779         cropped = img[max(0, top):min(imh, bottom), max(0, left):min(imw, right)]
00780         ch, cw = cropped.shape[:2]
00781         bx, by = max(0, -left), max(0, -top)
00782         padded = np.zeros((bottom - top, right - left, imc), dtype=np.uint8)
00783         padded[by:by+ch,bx:bx+cw] = cropped
00784         return padded
00785 
00786     def _get_hand_roi_width(self, joint_positions):
00787         lengths = []
00788         for conn, ratio in zip(self.limb_sequence, self.limb_length_hand_ratio):
00789             j1, j2 = joint_positions[conn[0]-1], joint_positions[conn[1]-1]
00790             length = 0
00791             if j1['score'] > 0 and j2['score'] > 0:
00792                 dx, dy = j1['x'] - j2['x'], j1['y'] - j2['y']
00793                 length = np.linalg.norm([dx, dy])
00794             lengths.append(length / ratio)
00795         if np.sum(lengths[:5]) > 0:
00796             lengths = lengths[:5]
00797         rospy.logdebug('length: %s' % lengths)
00798         return np.sum(lengths) / len(np.nonzero(lengths)[0])
00799 
00800 
00801 if __name__ == '__main__':
00802     rospy.init_node('people_pose_estimation_2d')
00803     PeoplePoseEstimation2D()
00804     rospy.spin()