aws_detect_faces.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 #
4 # ROS driver for AWS Rekognition (detect_faces)
5 #
6 # https://github.com/awsdocs/amazon-rekognition-developer-guide/blob/master/doc_source/faces-detect-images.md
7 #
8 #
9 # Author: Kei Okada <k-okada@jsk.t.u-tokyo.ac.jp>
10 #
11 
12 from __future__ import division
13 
14 import rospy
15 from dynamic_reconfigure.server import Server
16 from jsk_perception.cfg import AWSDetectFacesConfig
17 
18 from jsk_topic_tools import ConnectionBasedTransport
19 from geometry_msgs.msg import PoseArray, Pose, Point
20 from sensor_msgs.msg import CompressedImage, Image
21 from opencv_apps.msg import FaceArrayStamped, Face, Rect
22 from jsk_recognition_msgs.msg import ClassificationResult, PeoplePoseArray, PeoplePose
23 from tf.transformations import quaternion_from_euler
24 
25 import numpy as np
26 import math
27 
28 import boto3
29 
30 import cv2
31 import cv_bridge
32 
33 import json
34 
35 import sys
36 
37 COLORS = [
38  (100, 100, 100),
39  (100, 0, 0),
40  (150, 0, 0),
41  (200, 0, 0),
42  (255, 0, 0),
43  (100, 100, 0),
44  (150, 150, 0),
45  (200, 200, 0),
46  (255, 255, 0),
47  (0, 100, 50),
48  (0, 150, 75),
49  (0, 200, 100),
50  (0, 255, 125),
51  (0, 50, 100),
52  (0, 75, 150),
53  (0, 100, 200),
54  (0, 125, 255),
55  (100, 0, 100),
56  (150, 0, 150),
57  (200, 0, 200),
58  (255, 0, 255),
59 ]
60 
61 
62 class DetectFaces(ConnectionBasedTransport):
63 
64  def __init__(self):
65  super(self.__class__, self).__init__()
66 
67  aws_credentials_path = rospy.get_param('~aws_credentials_path', 'aws.json')
68  rospy.loginfo("Loading AWS credentials from {}".format(aws_credentials_path))
69  try:
70  with open(aws_credentials_path) as f:
71  aws_credentials = json.load(f)
72  except IOError:
73  rospy.logerr('Cannot open "{}".\n Please put region/aws_access_key_id/aws_secret_access_key to aws.json.'.format(aws_credentials_path))
74  sys.exit(1)
75 
76  try:
77  aws_access_key_id = aws_credentials['aws_access_key_id']
78  aws_secret_access_key = aws_credentials['aws_secret_access_key']
79  region_name = aws_credentials['region']
80  except KeyError:
81  print('Invalid config file')
82  raise
83 
84  self.rekognition = boto3.client(
85  'rekognition',
86  aws_access_key_id=aws_access_key_id,
87  aws_secret_access_key=aws_secret_access_key,
88  region_name=region_name)
89 
90  self.bridge = cv_bridge.CvBridge()
91 
92  self.always_publish = rospy.get_param('~always_publish', True)
93  rospy.loginfo("Publish even if face is not found : {}".format(self.always_publish))
94 
95  self.use_window = rospy.get_param('~use_window', False)
96  rospy.loginfo("Launch image window : {}".format(self.use_window))
97 
98  # enable to change atributes from Dynamic Reconfigure
99  self.attributes = rospy.get_param('~attributes', 'ALL')
100  rospy.loginfo("Facial attributes to be returned : {}".format(self.attributes))
101  self.old_config = {self.attributes: False}
102  self.dynamic_reconfigure_server = Server(AWSDetectFacesConfig, self.reconfigure_callback)
103 
104  self.faces_pub = self.advertise('~faces', FaceArrayStamped, queue_size=1)
105  self.poses_pub = self.advertise('~poses', PoseArray, queue_size=1)
106  self.attributes_pub = self.advertise('~attributes', ClassificationResult, queue_size=1)
107  self.landmarks_pub = self.advertise('~landmarks', PeoplePoseArray, queue_size=1)
108  self.image_pub = self.advertise('~output', Image, queue_size=1)
109  self.image_comp_pub = self.advertise('~output/compressed', CompressedImage, queue_size=1)
110  self.orig_image_pub = self.advertise('~image/compressed', CompressedImage, queue_size=1)
111  #
112  # To process latest message, we need to set buff_size must be large enough.
113  # we need to set buff_size larger than message size to use latest message for callback
114  # 640*480(image size) / 5 (expected compressed rate) *
115  # 10 (number of message need to be drop 10 x 30msec = 300msec processing time)
116  #
117  # c.f. https://answers.ros.org/question/220502/image-subscriber-lag-despite-queue-1/
118  #
119  self.buff_size = rospy.get_param('~buff_size', 640 * 480 * 3 // 5 * 10)
120  rospy.loginfo("rospy.Subscriber buffer size : {}".format(self.buff_size))
121 
122  def subscribe(self):
123  self.image_sub = rospy.Subscriber('{}/compressed'.format(rospy.resolve_name('image')), CompressedImage, self.image_callback, queue_size=1, buff_size=self.buff_size)
124 
125  def unsubscribe(self):
126  self.image_sub.unregister()
127 
128  def reconfigure_callback(self, config, level):
129  new_config = {}
130  if self.old_config:
131  new_config = {k: config[k] for k in config if k in self.old_config and config[k] != self.old_config[k]}
132 
133  # If we choose config, we sould set DEFAULT to false
134  if 'ALL' in new_config and new_config['ALL'] is True:
135  self.attributes = ["ALL"]
136  for key in config.keys():
137  if key in ['ALL', 'groups']:
138  continue
139  config[key] = False
140 
141  # If we choose DEFAULT, we should set ALL to False
142  elif 'DEFAULT' in new_config and new_config['DEFAULT'] is True:
143  self.attributes = ["DEFAULT"]
144  for key in config.keys():
145  if key in ['DEFAULT', 'groups']:
146  continue
147  config[key] = False
148 
149  else:
150  self.attributes = []
151  for key in config.keys():
152  if key in ['ALL', 'DEFAULT', 'groups']:
153  continue
154  if config[key]:
155  self.attributes.append(key)
156  # If we want to set individually, remove ALL/DEFAULT
157  if self.attributes:
158  config['ALL'] = False
159  config['DEFAULT'] = False
160 
161  self.old_config = config
162  return config
163 
164  def process_attributes(self, text, img, bbox):
165  rospy.logdebug(" {}".format(text))
166  if self.use_window:
167  cv2.putText(img, text,
168  (bbox.x + bbox.height // 2 + 8, bbox.y - bbox.width // 2 + self.offset), cv2.FONT_HERSHEY_PLAIN,
169  fontScale=1, color=(0, 255, 0), thickness=1, lineType=cv2.LINE_AA)
170  self.offset += 16
171 
172  @property
173  def visualize(self):
174  return self.use_window \
175  or self.image_pub.get_num_connections() > 0 \
176  or self.image_comp_pub.get_num_connections() > 0
177 
178  def image_callback(self, image):
179  start_time = rospy.Time.now()
180  # decode compressed image
181  np_arr = np.fromstring(image.data, np.uint8)
182  img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
183 
184  if image.format.find("compressed rgb") > -1:
185  img = img[:, :, ::-1]
186 
187  img_gray = None
188  img_width = img.shape[1]
189  img_height = img.shape[0]
190  visualize = self.visualize
191  if visualize:
192  img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
193  img_gray = cv2.cvtColor(img_gray, cv2.COLOR_GRAY2BGR)
194 
195  _, buf = cv2.imencode('.jpg', img)
196  faces = self.rekognition.detect_faces(Image={'Bytes': buf.tobytes()}, Attributes=self.attributes)
197 
198  face_msgs = FaceArrayStamped()
199  face_msgs.header = image.header
200  face_msgs.faces = []
201 
202  pose_msgs = PoseArray()
203  pose_msgs.header = image.header
204  pose_msgs.poses = []
205 
206  attributes_msgs = ClassificationResult()
207  attributes_msgs.header = image.header
208  attributes_msgs.label_names = []
209  attributes_msgs.probabilities = []
210 
211  landmarks_msgs = PeoplePoseArray()
212  landmarks_msgs.header = image.header
213  landmarks_msgs.poses = []
214 
215  # See https://docs.aws.amazon.com/rekognition/latest/dg/API_DetectFaces.html for detail
216  rospy.logdebug("Found {} faces".format(len(faces['FaceDetails'])))
217  for face in faces['FaceDetails']:
218 
219  # Bounding box of the face
220  face_msg = Face()
221  bbox_msg = Rect() # Rect data type, x-y is center point
222  if 'BoundingBox' in face:
223  top = int(face['BoundingBox']['Top'] * img_height)
224  left = int(face['BoundingBox']['Left'] * img_width)
225  width = int(face['BoundingBox']['Width'] * img_width)
226  height = int(face['BoundingBox']['Height'] * img_height)
227  bbox_msg.x = left + width // 2
228  bbox_msg.y = top + height // 2
229  bbox_msg.width = width
230  bbox_msg.height = height
231 
232  face_msg.face = bbox_msg
233 
234  if visualize:
235  cv2.rectangle(img_gray, (left, top), (left + width, top + height), color=(0, 255, 0), thickness=2)
236 
237  # Indicates the location of landmarks on the face.
238  if 'Landmarks' in face:
239  landmark_msg = PeoplePose()
240  landmark_msg.limb_names = []
241  landmark_msg.poses = []
242  for i in range(len(face['Landmarks'])):
243  landmark = face['Landmarks'][i]
244  px = int(landmark['X'] * img_width)
245  py = int(landmark['Y'] * img_height)
246 
247  landmark_msg.limb_names.append(landmark['Type'])
248  landmark_msg.poses.append(Pose(position=Point(x=px, y=py)))
249 
250  if visualize:
251  cv2.circle(img_gray, (px, py), 1, COLORS[i % (len(COLORS))], thickness=-1)
252 
253  landmarks_msgs.poses.append(landmark_msg)
254 
255  eye_left_msg = Rect()
256  eye_right_msg = Rect()
257  landmark = next((x for x in face['Landmarks'] if x['Type'] == 'eyeLeft'), False)
258  if landmark:
259  eye_left_msg.x = int(landmark['X'] * img_width)
260  eye_left_msg.y = int(landmark['Y'] * img_height)
261 
262  landmark1 = next((x for x in face['Landmarks'] if x['Type'] == 'leftEyeLeft'), False)
263  landmark2 = next((x for x in face['Landmarks'] if x['Type'] == 'leftEyeRight'), False)
264  if landmark1 and landmark2:
265  eye_left_msg.width = int((landmark2['X'] - landmark1['X']) * img_width)
266  landmark1 = next((x for x in face['Landmarks'] if x['Type'] == 'leftEyeUp'), False)
267  landmark2 = next((x for x in face['Landmarks'] if x['Type'] == 'leftEyeDown'), False)
268  if landmark1 and landmark2:
269  eye_left_msg.height = int((landmark2['Y'] - landmark1['Y']) * img_height)
270 
271  landmark = next((x for x in face['Landmarks'] if x['Type'] == 'eyeRight'), False)
272  if landmark:
273  eye_right_msg.x = int(landmark['X'] * img_width)
274  eye_right_msg.y = int(landmark['Y'] * img_height)
275 
276  landmark1 = next((x for x in face['Landmarks'] if x['Type'] == 'rightEyeLeft'), False)
277  landmark2 = next((x for x in face['Landmarks'] if x['Type'] == 'rightEyeRight'), False)
278  if landmark1 and landmark2:
279  eye_right_msg.width = int((landmark2['X'] - landmark1['X']) * img_width)
280  landmark1 = next((x for x in face['Landmarks'] if x['Type'] == 'rightEyeUp'), False)
281  landmark2 = next((x for x in face['Landmarks'] if x['Type'] == 'rightEyeDown'), False)
282  if landmark1 and landmark2:
283  eye_right_msg.height = int((landmark2['Y'] - landmark1['Y']) * img_height)
284 
285  face_msg.eyes = [eye_left_msg, eye_right_msg]
286 
287  # initialize offset in drow text in window
288  self.offset = 16
289 
290  # Confidence level that the bounding box contains a face.
291  if 'Confidence' in face:
292  confidence = face['Confidence']
293  face_msg.confidence = confidence
294  attributes_msgs.label_names.append('confidence')
295  attributes_msgs.probabilities.append(confidence)
296  self.process_attributes("Confidence : {:.3f}".format(confidence), img_gray, bbox_msg)
297 
298  # The estimated age range, in years, for the face. Low represents the lowest estimated age and High represents the highest estimated age.
299  if 'AgeRange' in face:
300  self.process_attributes("Age Range : {} - {}".format(face['AgeRange']['Low'], face['AgeRange']['High']), img_gray, bbox_msg)
301 
302  # Indicates the pose of the face as determined by its pitch, roll, and yaw.
303  pose_msg = None
304  if 'Pose' in face:
305  yaw = face['Pose']['Yaw']
306  roll = face['Pose']['Roll']
307  pitch = face['Pose']['Pitch']
308  q = quaternion_from_euler(roll * math.pi / 180, pitch * math.pi / 180, yaw * math.pi / 180)
309  pose_msg = Pose()
310  pose_msg.orientation.x = q[0]
311  pose_msg.orientation.y = q[1]
312  pose_msg.orientation.z = q[2]
313  pose_msg.orientation.w = q[3]
314  self.process_attributes("Pose : Yaw {:.3f}, Roll {:.3f}, Pitch {:.3f}".format(yaw, roll, pitch), img_gray, bbox_msg)
315 
316  # Identifies image brightness and sharpness.
317  if 'Quality' in face:
318  sharpness = face['Quality']['Sharpness']
319  brightness = face['Quality']['Brightness']
320  attributes_msgs.label_names.append('sharpness')
321  attributes_msgs.probabilities.append(sharpness)
322  attributes_msgs.label_names.append('brightness')
323  attributes_msgs.probabilities.append(brightness)
324  self.process_attributes("Quality : Sharpness {:.3f}, Brightness {:.3f}".format(sharpness, brightness), img_gray, bbox_msg)
325 
326  # The emotions that appear to be expressed on the face, and the confidence level in the determination.
327  if 'Emotions' in face:
328  for emotion in face['Emotions']:
329  if emotion['Confidence'] > 50:
330  face_msg.label += "; {}".format(emotion['Type'])
331  attributes_msgs.label_names.append(emotion['Type'])
332  attributes_msgs.probabilities.append(emotion['Confidence'])
333  self.process_attributes("{}: {:.3f}".format(emotion['Type'], emotion['Confidence']), img_gray, bbox_msg)
334 
335  # Other attributes in https://docs.aws.amazon.com/sdkfornet/v3/apidocs/items/Rekognition/TFaceDetail.html
336  for key in face.keys():
337  if type(face[key]) is dict and 'Confidence' in face[key] and 'Value' in face[key]:
338  if face[key]['Value'] is True:
339  face_msg.label += "; {}".format(key)
340  if not face[key]['Value'] in [True, False]:
341  face_msg.label += "; {}".format(face[key]['Value'])
342  if face[key]['Value'] is True:
343  attributes_msgs.label_names.append(key)
344  attributes_msgs.probabilities.append(face[key]['Confidence'])
345  elif face[key]['Value'] is False: # If attributes is false, then we use 100-confidnece
346  attributes_msgs.label_names.append(key)
347  attributes_msgs.probabilities.append(100 - face[key]['Confidence'])
348  else:
349  attributes_msgs.label_names.append(face[key]['Value'])
350  attributes_msgs.probabilities.append(face[key]['Confidence'])
351  self.process_attributes("{} : {} ({:.3f})".format(key, face[key]['Value'], face[key]['Confidence']), img_gray, bbox_msg)
352 
353  # Construct face message
354  face_msg.label = face_msg.label[2:] # skip first "; "
355  face_msgs.faces.append(face_msg)
356  # Construct pose message
357  if pose_msgs:
358  pose_msgs.poses.append(pose_msg)
359 
360  if self.use_window:
361  cv2.imshow(image._connection_header['topic'], img_gray)
362  cv2.waitKey(1)
363 
364  # is always_publish is False, and face is not detected , do not publish any results
365  if not self.always_publish and len(faces['FaceDetails']) <= 0:
366  # debug info
367  rospy.loginfo("processing time {}".format((rospy.Time.now() - start_time).to_sec()))
368  return
369 
370  if self.image_pub.get_num_connections() > 0:
371  self.image_pub.publish(self.bridge.cv2_to_imgmsg(
372  img_gray, encoding='bgr8'))
373 
374  if self.image_comp_pub.get_num_connections() > 0:
375  msg = CompressedImage()
376  msg.header = image.header
377  msg.format = "jpeg"
378  msg.data = np.array(cv2.imencode('.jpg', img_gray)[1]).tostring()
379  self.image_comp_pub.publish(msg)
380 
381  if self.orig_image_pub.get_num_connections() > 0:
382  self.orig_image_pub.publish(image)
383 
384  self.faces_pub.publish(face_msgs)
385  self.poses_pub.publish(pose_msgs)
386  self.attributes_pub.publish(attributes_msgs)
387  self.landmarks_pub.publish(landmarks_msgs)
388 
389  # debug info
390  rospy.logdebug("processing time {} on message taken at {} sec ago".format(
391  (rospy.Time.now() - start_time).to_sec(),
392  (rospy.Time.now() - image.header.stamp).to_sec()))
393 
394 
395 if __name__ == '__main__':
396  rospy.init_node('aws_detect_faces')
397  rospy.loginfo("ROS node initialized as {}".format(rospy.get_name()))
398  detect_faces = DetectFaces()
399  rospy.spin()
node_scripts.aws_detect_faces.DetectFaces.faces_pub
faces_pub
Definition: aws_detect_faces.py:104
ssd_train_dataset.int
int
Definition: ssd_train_dataset.py:175
msg
node_scripts.aws_detect_faces.DetectFaces.image_callback
def image_callback(self, image)
Definition: aws_detect_faces.py:178
node_scripts.aws_detect_faces.DetectFaces.bridge
bridge
Definition: aws_detect_faces.py:90
node_scripts.aws_detect_faces.DetectFaces.attributes_pub
attributes_pub
Definition: aws_detect_faces.py:106
node_scripts.aws_detect_faces.DetectFaces.poses_pub
poses_pub
Definition: aws_detect_faces.py:105
node_scripts.aws_detect_faces.DetectFaces.offset
offset
Definition: aws_detect_faces.py:288
node_scripts.aws_detect_faces.DetectFaces.subscribe
def subscribe(self)
Definition: aws_detect_faces.py:122
node_scripts.aws_detect_faces.DetectFaces.buff_size
buff_size
Definition: aws_detect_faces.py:119
node_scripts.aws_detect_faces.DetectFaces.dynamic_reconfigure_server
dynamic_reconfigure_server
Definition: aws_detect_faces.py:102
node_scripts.aws_detect_faces.DetectFaces.__init__
def __init__(self)
Definition: aws_detect_faces.py:64
node_scripts.aws_detect_faces.DetectFaces.old_config
old_config
Definition: aws_detect_faces.py:101
node_scripts.aws_detect_faces.DetectFaces
Definition: aws_detect_faces.py:62
node_scripts.aws_detect_faces.DetectFaces.orig_image_pub
orig_image_pub
Definition: aws_detect_faces.py:110
node_scripts.aws_detect_faces.detect_faces
detect_faces
Definition: aws_detect_faces.py:398
node_scripts.aws_detect_faces.DetectFaces.use_window
use_window
Definition: aws_detect_faces.py:95
node_scripts.aws_detect_faces.DetectFaces.reconfigure_callback
def reconfigure_callback(self, config, level)
Definition: aws_detect_faces.py:128
node_scripts.aws_detect_faces.DetectFaces.image_pub
image_pub
Definition: aws_detect_faces.py:108
node_scripts.aws_detect_faces.DetectFaces.rekognition
rekognition
Definition: aws_detect_faces.py:84
node_scripts.aws_detect_faces.DetectFaces.unsubscribe
def unsubscribe(self)
Definition: aws_detect_faces.py:125
node_scripts.aws_detect_faces.DetectFaces.process_attributes
def process_attributes(self, text, img, bbox)
Definition: aws_detect_faces.py:164
node_scripts.aws_detect_faces.DetectFaces.visualize
def visualize(self)
Definition: aws_detect_faces.py:173
node_scripts.aws_detect_faces.DetectFaces.always_publish
always_publish
Definition: aws_detect_faces.py:92
node_scripts.aws_detect_faces.DetectFaces.image_sub
image_sub
Definition: aws_detect_faces.py:123
node_scripts.aws_detect_faces.DetectFaces.attributes
attributes
Definition: aws_detect_faces.py:99
node_scripts.aws_detect_faces.DetectFaces.image_comp_pub
image_comp_pub
Definition: aws_detect_faces.py:109
node_scripts.aws_detect_faces.DetectFaces.landmarks_pub
landmarks_pub
Definition: aws_detect_faces.py:107


jsk_perception
Author(s): Manabu Saito, Ryohei Ueda
autogenerated on Fri May 16 2025 03:11:16