12 from __future__
import division
13 from __future__
import print_function
15 from distutils.version
import LooseVersion
19 if LooseVersion(pkg_resources.get_distribution(
"torch").version) \
20 < LooseVersion(
'1.1.0'):
21 print(
'''torch >= 1.1.0 is required:
22 Download wheel file from https://download.pytorch.org/whl/cu90/torch_stable.html (melodic)
23 and install with pip as follow:
24 pip install --user torch-1.1.0-cp27-cp27mu-linux_x86_64.whl
25 pip install --user torchvision-0.3.0-cp27-cp27mu-manylinux1_x86_64.whl
32 import message_filters
38 from skimage
import feature
41 from image_geometry
import PinholeCameraModel
45 from jsk_recognition_msgs.msg
import HandPose
46 from jsk_recognition_msgs.msg
import HandPoseArray
47 from jsk_recognition_msgs.msg
import HumanSkeleton
48 from jsk_recognition_msgs.msg
import HumanSkeletonArray
49 from jsk_recognition_msgs.msg
import Segment
50 from jsk_topic_tools
import ConnectionBasedTransport
51 from sensor_msgs.msg
import Image
52 from sensor_msgs.msg
import CameraInfo
106 FINGERNAME2INDEX = {name: i
for i, name
in enumerate(INDEX2FINGERNAME)}
107 NUMBER_OF_FINGERS = 5
108 NUMBER_OF_FINGER_JOINT = 3
109 CONNECTION_PAIR = [(0, 1), (0, 1), (1, 2), (2, 3),
110 (0, 5), (4, 5), (5, 6), (6, 7),
111 (0, 9), (8, 9), (9, 10), (10, 11),
112 (0, 13), (12, 13), (13, 14), (14, 15),
113 (0, 17), (16, 17), (17, 18), (18, 19)]
116 super(self.__class__, self).
__init__()
117 self.
backend = rospy.get_param(
'~backend',
'torch')
118 self.
gpu = rospy.get_param(
'~gpu', -1)
132 '~output/vis', Image, queue_size=1)
134 '~output/pose', HandPoseArray, queue_size=1)
138 '~output/pose_2d', HandPoseArray, queue_size=1)
140 '~skeleton', HumanSkeletonArray, queue_size=1)
145 return self.
image_pub.get_num_connections() > 0
151 raise RuntimeError(
'Unsupported backend: %s', self.
backend)
158 rospy.loginfo(
'Loading model')
159 model_file = rospy.get_param(
'~model_file',
None)
163 if self.
gpu >= 0
and torch.cuda.is_available():
165 model_file, map_location=torch.device(
'cuda'))
166 rospy.loginfo(
'Finished loading SRHandNet model to gpu')
168 self.
model = torch.jit.load(
169 model_file, map_location=torch.device(
'cpu'))
170 rospy.loginfo(
'Finished loading SRHandNet model to cpu')
172 rospy.loginfo(
'You need to check version match between PyTorch and CUDA. See https://jsk-docs.readthedocs.io/projects/jsk_recognition/en/latest/install_chainer_gpu.html and https://github.com/jsk-ros-pkg/jsk_recognition/pull/2601#issuecomment-876405219 for more info')
176 pkg_name =
'jsk_perception'
177 filepath =
'trained_data/SRHandNet.pts'
178 jsk_data.download_data(
181 url=
'https://drive.google.com/'
182 'uc?id=10oGfGALjIIwdIWO9MQftN07A3TRdvp3b',
183 md5=
'9f39d3baa43cf1c962c8f752c009eb14',
185 rp = rospkg.RosPack()
186 pkgpath = rp.get_path(pkg_name)
187 return osp.join(pkgpath, filepath)
191 queue_size = rospy.get_param(
'~queue_size', 10)
193 '~input', Image, queue_size=1, buff_size=2**24)
195 '~input/depth', Image, queue_size=1, buff_size=2**24)
196 self.
subs = [sub_img, sub_depth]
200 sync_cam_info = rospy.get_param(
"~sync_camera_info",
False)
203 '~input/info', CameraInfo, queue_size=1, buff_size=2**24)
204 self.
subs.append(sub_info)
209 if rospy.get_param(
'~approximate_sync',
True):
210 slop = rospy.get_param(
'~slop', 0.1)
211 sync = message_filters.ApproximateTimeSynchronizer(
212 fs=self.
subs, queue_size=queue_size, slop=slop)
215 fs=self.
subs, queue_size=queue_size)
222 sub_img = rospy.Subscriber(
223 '~input', Image, self.
_cb, queue_size=1, buff_size=2**24)
224 self.
subs = [sub_img]
227 for sub
in self.
subs:
237 rospy.loginfo(
"Received camera info")
246 camera_model.fromCameraInfo(camera_info_msg)
247 br = cv_bridge.CvBridge()
248 img = br.imgmsg_to_cv2(img_msg, desired_encoding=
'bgr8')
249 depth_img = br.imgmsg_to_cv2(depth_msg,
'passthrough')
250 if depth_msg.encoding ==
'16UC1':
251 depth_img = np.asarray(depth_img, dtype=np.float32)
253 elif depth_msg.encoding !=
'32FC1':
254 rospy.logerr(
'Unsupported depth encoding: %s' % depth_msg.encoding)
256 hands_points, hands_point_scores, hands_score = self.
hand_pose_estimate(img, img_msg.header)
265 hand_pose_array_msg = HandPoseArray(header=img_msg.header)
267 for hand
in hand_pose_2d_msg.poses:
268 hand_pose_msg = HandPose(hand_score=hand.hand_score)
269 for pose, finger_name, score
in zip(
270 hand.poses, hand.finger_names,
274 if 0 <= u < depth_img.shape[1]
and \
275 0 <= v < depth_img.shape[0]:
279 if np.isnan(z)
or z <= 0:
281 x = (u - camera_model.cx()) * z / camera_model.fx()
282 y = (v - camera_model.cy()) * z / camera_model.fy()
283 hand_pose_msg.finger_names.append(finger_name)
284 hand_pose_msg.poses.append(
285 Pose(position=Point(x=x, y=y, z=z),
286 orientation=Quaternion(w=1)))
287 hand_pose_msg.point_scores.append(score)
288 hand_pose_array_msg.poses.append(hand_pose_msg)
291 skeleton_array_msg = HumanSkeletonArray(header=img_msg.header)
292 for hand_pose_msg
in hand_pose_array_msg.poses:
293 hand_joint_index_to_list_index = {
295 for i, name
in enumerate(hand_pose_msg.finger_names)}
296 skeleton_msg = HumanSkeleton(header=img_msg.header)
298 if index_a
not in hand_joint_index_to_list_index \
299 or index_b
not in hand_joint_index_to_list_index:
301 joint_a_index = hand_joint_index_to_list_index[index_a]
302 joint_b_index = hand_joint_index_to_list_index[index_b]
304 start_point=hand_pose_msg.poses[joint_a_index].position,
305 end_point=hand_pose_msg.poses[joint_b_index].position)
306 bone_name =
'{}->{}'.format(
309 skeleton_msg.bones.append(bone)
310 skeleton_msg.bone_names.append(bone_name)
311 skeleton_array_msg.skeletons.append(skeleton_msg)
318 img = self.
bridge.imgmsg_to_cv2(
319 img_msg, desired_encoding=
'bgr8')
320 hands_points, hands_point_scores, hands_score = \
343 hand_pose_msg = HandPoseArray(header=header)
344 for hand_points, point_scores, hand_score
in \
345 zip(hands_points, hands_point_scores, hands_score):
346 pose_msg = HandPose()
347 pose_msg.hand_score = hand_score
348 for index, joint_pos
in enumerate(hand_points):
350 pose_msg.point_scores.append(point_scores[index])
351 if (len(joint_pos) == 2):
352 pose_msg.poses.append(
354 x=joint_pos[0], y=joint_pos[1], z=0.)))
356 pose_msg.poses.append(
357 Pose(position=Point(x=0., y=0., z=0.)))
358 hand_pose_msg.poses.append(pose_msg)
364 raise ValueError(
'Unsupported backend: {0}'.format(self.
backend))
367 hands_points, hands_rect, hands_point_scores, hands_score = \
371 vis_img = self.
_draw_joints(frame, hands_points, hands_rect)
372 vis_msg = self.
bridge.cv2_to_imgmsg(vis_img, encoding=
'bgr8')
373 vis_msg.header = header
376 return hands_points, hands_point_scores, hands_score
379 for rect_idx, points
in enumerate(hands_points):
380 rect = hands_rect[rect_idx]
384 cv2.rectangle(frame, rect[0:2], rect[2:4], (0, 0, 255), 6)
387 for i, point
in enumerate(points):
388 if point
is None or len(point) == 0:
392 cv2.circle(frame, point, 6, self.
HAND_COLORS[i], 6)
397 if (len(points[cnt]) != 0
398 and len(points[cnt+1]) != 0):
401 frame, points[cnt], points[cnt+1],
404 per =
'{}%'.format(
int(2100-missing*100)//21)
405 text_pos = hands_rect[rect_idx][0:2]
406 text_pos = (text_pos[0], text_pos[1]+5)
407 cv2.putText(frame, per, text_pos, 1, 3, (0, 0, 255), 3)
412 peak_loc = feature.peak_local_max(
413 src, min_distance=2, threshold_abs=threshold, exclude_border=
True)
414 peak_pair = [(src[x][y], (x, y))
for x, y
in peak_loc]
419 self, tensor, src_img, hands_rect=None, tensor_idx=0):
421 if hands_rect
is not None:
422 l, t, r, b = hands_rect[tensor_idx]
425 rows, cols = img.shape[:2]
426 ratio = min(tensor.shape[2] / rows, tensor.shape[3] / cols)
427 mat = np.array([[ratio, 0, 0], [0, ratio, 0]], dtype=np.float32)
429 dst = cv2.warpAffine(
430 img, mat, (tensor.shape[3], tensor.shape[2]),
431 flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT,
432 borderValue=(128, 128, 128))
434 dst = dst / 255. - 0.5
435 b, g, r = cv2.split(dst)
437 if self.
gpu >= 0
and torch.cuda.is_available():
438 tensor[tensor_idx][0] = torch.tensor(
439 b, device=torch.device(
'cuda')).
float()
440 tensor[tensor_idx][1] = torch.tensor(
441 g, device=torch.device(
'cuda')).
float()
442 tensor[tensor_idx][2] = torch.tensor(
443 r, device=torch.device(
'cuda')).
float()
445 tensor[tensor_idx][0] = torch.tensor(
446 b, device=torch.device(
'cpu')).
float()
447 tensor[tensor_idx][1] = torch.tensor(
448 g, device=torch.device(
'cpu')).
float()
449 tensor[tensor_idx][2] = torch.tensor(
450 r, device=torch.device(
'cpu')).
float()
454 if self.
gpu >= 0
and torch.cuda.is_available():
455 tensor = torch.zeros(
457 device=torch.device(
'cuda'))
459 tensor = torch.zeros(
461 device=torch.device(
'cpu'))
462 rows, cols, _ = input_image.shape
466 with torch.no_grad():
467 heatmap = self.
model.forward(tensor)[3]
473 ratio_net_downsample = \
475 rect_map_idx = heatmap.shape[1] - 3
479 np.copy(heatmap[0][i+rect_map_idx].cpu().detach().numpy()))
485 for loc_val, points
in locations:
486 pos_x, pos_y = points
487 ratio_width = ratio_height = pixelcount = 0
489 max(pos_x-2, 0), min(pos_x+3,
int(heatmap.shape[2]))):
491 max(pos_y-2, 0), min(pos_y+3,
int(heatmap.shape[3]))):
492 ratio_width += rectmap[1][m][n]
493 ratio_height += rectmap[2][m][n]
497 ratio_width = min(max(ratio_width / pixelcount, 0), 1)
498 ratio_height = min(max(ratio_height / pixelcount, 0), 1)
499 ratio = ratio_net_downsample / ratio_input_to_net
509 max(
int(pos_x - rect_h/2), 0),
510 max(
int(pos_y - rect_w/2), 0)
514 min(
int(pos_x + rect_h/2), rows - 1),
515 min(
int(pos_y + rect_w/2), cols - 1)
518 hands_rect.append((l_t[1], l_t[0], r_b[1], r_b[0]))
519 hands_score.append(loc_val)
520 return hands_rect, hands_score
523 if len(hands_rect) == 0:
526 ratio_input_to_net = [
None]*len(hands_rect)
528 if self.
gpu >= 0
and torch.cuda.is_available():
529 tensor = torch.zeros(
532 device=torch.device(
'cuda'))
534 tensor = torch.zeros(
537 device=torch.device(
'cpu'))
540 for i
in range(len(hands_rect)):
542 tensor, input_image, hands_rect, i)
544 with torch.no_grad():
545 heatmaps = self.
model.forward(tensor)[3]
551 hands_point_scores = []
552 for rect_idx
in range(len(hands_rect)):
553 hand_points = [[]
for i
in range(21)]
554 point_scores = [0.
for i
in range(21)]
555 x, y, _, _ = hands_rect[rect_idx]
556 ratio = ratio_net_downsample / ratio_input_to_net[rect_idx]
558 heatmap = heatmaps[rect_idx][i].cpu().detach().numpy()
561 score, point = points[0]
563 int(point[1]*ratio)+x,
564 int(point[0]*ratio)+y
566 point_scores[i] = score
567 hands_points.append(hand_points)
568 hands_point_scores.append(point_scores)
569 return hands_points, hands_point_scores
574 hands_rect, hands_score = self.
detect_bbox(input_image)
576 if len(hands_rect) == 0:
577 return [], [], [], []
580 hands_points, hands_point_scores = self.
detect_hand(
581 input_image, hands_rect)
583 for i
in range(len(hands_rect)-1, -1, -1):
586 if len(hands_points[i][j]) != 2:
592 hands_point_scores.pop(i)
594 return hands_points, hands_rect, hands_point_scores, hands_score
597 if __name__ ==
'__main__':
598 rospy.init_node(
'hand_pose_estimation_2d')