openvino-face-detection.cpp
Go to the documentation of this file.
1 // License: Apache 2.0. See LICENSE file in root directory.
2 // Copyright(c) 2020 Intel Corporation. All Rights Reserved.
3 
4 // NOTE: This file will be compiled only with INTEL_OPENVINO_DIR pointing to an OpenVINO install!
5 
8 
12 #include <cv-helpers.hpp>
13 
14 namespace openvino = InferenceEngine;
15 
16 /* We need to extend the basic detected_object to include facial characteristics
17 */
19 {
20  float _age;
21  float _male_score, _female_score; // cumulative - see update_gender()
22 
23 public:
24  using ptr = std::shared_ptr< detected_face >;
25 
26  explicit detected_face( size_t id,
27  cv::Rect const& location,
28  float male_prob,
29  float age,
30  cv::Rect const& depth_location = cv::Rect{},
31  float intensity = 1,
32  float depth = 0 )
33  : detected_object( id, std::string(), location, depth_location, intensity, depth )
34  , _age( age )
35  , _male_score( male_prob > 0.5f ? male_prob - 0.5f : 0.f )
36  , _female_score( male_prob > 0.5f ? 0.f : 0.5f - male_prob )
37  {
38  }
39 
41  {
42  _age = (_age == -1) ? value : 0.95f * _age + 0.05f * value;
43  }
44 
46  {
47  if( value >= 0 )
48  {
49  if( value > 0.5 )
50  _male_score += value - 0.5f;
51  else
52  _female_score += 0.5f - value;
53  }
54  }
55 
56  int get_age() const { return static_cast< int >( std::floor( _age + 0.5f )); }
57  bool is_male() const { return( _male_score > _female_score ); }
58  bool is_female() const { return !is_male(); }
59 };
60 
61 
62 /* Define a filter that will perform facial detection using OpenVINO
63 */
65 {
66  InferenceEngine::Core _ie;
70  size_t _id = 0;
71 
72  std::shared_ptr< atomic_objects_in_frame > _objects;
73 
74 public:
77  /*
78  This face detector is from the OpenCV Model Zoo:
79  https://github.com/opencv/open_model_zoo/blob/master/models/intel/face-detection-adas-0001/description/face-detection-adas-0001.md
80  */
81  , _face_detector(
82  "face-detection-adas-0001.xml",
83  0.5, // Probability threshold
84  false ) // Not async
85  /*
86  */
87  , _age_detector(
88  "age-gender-recognition-retail-0013.xml",
89  false ) // Not async
90  {
91  }
92 
94  {
95  // Complete background worker to ensure it releases the instance's resources in controlled manner
96  release_background_worker();
97  }
98 
99 public:
100  void start( rs2::subdevice_model & model ) override
101  {
103  _objects = model.detected_objects;
104  }
105 
106 private:
107  void worker_start() override
108  {
109  LOG(INFO) << "Loading CPU extensions...";
110  std::string const device_name{ "CPU" };
111 
112 #ifdef OPENVINO2019
113  _ie.AddExtension(std::make_shared< openvino::Extensions::Cpu::CpuExtensions >(), device_name);
114 #endif
115 
116  _face_detector.load_into( _ie, device_name);
117  _age_detector.load_into( _ie, device_name);
118  }
119 
120  /*
121  Returns the "intensity" of the face in the picture, and calculates the distance to it, ignoring
122  Invalid depth pixels or those outside a range that would be appropriate for a face.
123  */
124  static float calc_face_attrs(
125  const rs2::video_frame & cf,
126  const rs2::depth_frame & df,
127  cv::Rect const & depth_bbox,
128  float * p_mean_depth )
129  {
130  uint16_t const * const pdw = reinterpret_cast<const uint16_t*>( df.get_data() );
131  uint8_t const * const pcb = reinterpret_cast<uint8_t*>(const_cast<void*>( cf.get_data() ));
132  float const depth_scale = df.get_units();
133 
134  int const depth_width = df.get_width();
135  int const color_width = cf.get_width();
136  int const color_bpp = cf.get_bytes_per_pixel();
137 
138  int const top = depth_bbox.y;
139  int const bot = top + depth_bbox.height;
140  int const left = depth_bbox.x;
141  int const right = left + depth_bbox.width;
142 
143  // Find a center point that has a depth on it
144  int center_x = (left + right) / 2;
145  int center_index = (top + bot) / 2 * depth_width + center_x;
146  for( int d = 1; !pdw[center_index] && d < 10; ++d )
147  {
148  if( pdw[center_index + d] ) center_index += d;
149  if( pdw[center_index - d] ) center_index -= d;
150  if( pdw[center_index + depth_width] ) center_index += depth_width;
151  if( pdw[center_index - depth_width] ) center_index -= depth_width;
152  }
153  if( !pdw[center_index] )
154  {
155  if( p_mean_depth )
156  *p_mean_depth = 0;
157  return 1;
158  }
159  float const d_center = pdw[center_index] * depth_scale;
160 
161  // Set a "near" and "far" threshold -- anything closer or father, respectively,
162  // would be deemed not a part of the face and therefore background:
163  float const d_far_threshold = d_center + 0.2f;
164  float const d_near_threshold = std::max( d_center - 0.5f, 0.001f );
165  // Average human head diameter ~= 7.5" or ~19cm
166  // Assume that the center point is in the front of the face, so the near threshold
167  // should be very close to that, while the far farther...
168 
169  float total_luminance = 0;
170  float total_depth = 0;
171  unsigned pixel_count = 0;
172 #pragma omp parallel for schedule(dynamic) //Using OpenMP to try to parallelise the loop
173  for( int y = top; y < bot; ++y )
174  {
175  auto depth_pixel_index = y * depth_width + left;
176  for( int x = left; x < right; ++x, ++depth_pixel_index )
177  {
178  // Get the depth value of the current pixel
179  auto d = depth_scale * pdw[depth_pixel_index];
180 
181  // Check if the depth value is invalid (<=0) or greater than the threashold
182  if( d >= d_near_threshold && d <= d_far_threshold )
183  {
184  // Calculate the offset in other frame's buffer to current pixel
185  auto const coffset = depth_pixel_index * color_bpp;
186  auto const pc = &pcb[coffset];
187 
188  // Using RGB...
189  auto r = pc[0], g = pc[1], b = pc[2];
190  total_luminance += 0.2989f * r + 0.5870f * g + 0.1140f * b; // CCIR 601 -- see https://en.wikipedia.org/wiki/Luma_(video)
191  ++pixel_count;
192 
193  // And get a mean depth, too
194  total_depth += d;
195  }
196  }
197  }
198  if( p_mean_depth )
199  *p_mean_depth = pixel_count ? total_depth / pixel_count : 0;
200  return pixel_count ? total_luminance / pixel_count : 1;
201  }
202 
203  void worker_body( rs2::frame f ) override
204  {
205  auto fs = f.as< rs2::frameset >();
206  auto cf = f;
207  rs2::depth_frame df = NULL;
208  if (fs)
209  {
210  cf = fs.get_color_frame();
211  df = fs.get_depth_frame();
212  }
213 
214  if ((!fs && f.get_profile().stream_name() != "Color") || (fs && !cf))
215  {
216  _objects->clear();
217  return;
218  }
219 
220  // A color video frame is the minimum we need for detection
221  if( cf.get_profile().format() != RS2_FORMAT_RGB8 )
222  {
223  LOG(ERROR) << get_context(fs) << "color format must be RGB8; it's " << cf.get_profile().format();
224  return;
225  }
226 
227  // A depth frame is optional: if not enabled, we won't get it, and we simply won't provide depth info...
228 
229  if (df && df.get_profile().format() != RS2_FORMAT_Z16)
230  {
231  LOG(ERROR) << get_context(fs) << "depth format must be Z16; it's " << df.get_profile().format();
232  return;
233  }
234  try
235  {
236  rs2_intrinsics color_intrin, depth_intrin;
237  rs2_extrinsics color_extrin, depth_extrin;
238  get_trinsics( cf, df, color_intrin, depth_intrin, color_extrin, depth_extrin );
239 
240  objects_in_frame objects;
241 
242  cv::Mat image( color_intrin.height, color_intrin.width, CV_8UC3, const_cast<void *>(cf.get_data()), cv::Mat::AUTO_STEP );
243  _face_detector.enqueue( image );
244  _face_detector.submit_request();
245  auto results = _face_detector.fetch_results();
246 
247  openvino_helpers::detected_objects prev_faces { std::move( _faces ) };
248  _faces.clear();
249  for( auto && result : results )
250  {
251  cv::Rect rect = result.location & cv::Rect( 0, 0, image.cols, image.rows );
252  detected_face::ptr face = std::dynamic_pointer_cast< detected_face >(
253  openvino_helpers::find_object( rect, prev_faces ));
254  try
255  {
256  // Use a mean of the face intensity to help identify faces -- if the intensity changes too much,
257  // it's not the same face...
258  float depth = 0, intensity = 1;
259  cv::Rect depth_rect;
260  if( df )
261  {
262  rs2::rect depth_bbox = project_rect_to_depth(
263  rs2::rect { float( rect.x ), float( rect.y ), float( rect.width ), float( rect.height ) },
264  df,
265  color_intrin, depth_intrin, color_extrin, depth_extrin
266  );
267  // It is possible to get back an invalid rect!
268  if( depth_bbox == depth_bbox.intersection( rs2::rect { 0.f, 0.f, float( depth_intrin.width ), float( depth_intrin.height) } ) )
269  {
270  depth_rect = cv::Rect( int( depth_bbox.x ), int( depth_bbox.y ), int( depth_bbox.w ), int( depth_bbox.h ) );
271  intensity = calc_face_attrs( cf, df, depth_rect, &depth );
272  }
273  else
274  {
275  LOG(DEBUG) << get_context(fs) << "depth_bbox is no good!";
276  }
277  }
278  else
279  {
280  intensity = openvino_helpers::calc_intensity( image( rect ) );
281  }
282  float intensity_change = face ? std::abs( intensity - face->get_intensity() ) / face->get_intensity() : 1;
283  float depth_change = ( face && face->get_depth() ) ? std::abs( depth - face->get_depth() ) / face->get_depth() : 0;
284 
285  if( intensity_change > 0.07f || depth_change > 0.2f )
286  {
287  // Figure out the age for this new face
288  float age = 0, maleProb = 0.5;
289  // Enlarge the bounding box around the detected face for more robust operation of face analytics networks
290  cv::Mat face_image = image(
292  & cv::Rect( 0, 0, image.cols, image.rows ) );
293  _age_detector.enqueue( face_image );
294  _age_detector.submit_request();
295  _age_detector.wait();
296  auto age_gender = _age_detector[0];
297  age = age_gender.age;
298  maleProb = age_gender.maleProb;
299  // Note: we may want to update the gender/age for each frame, as it may change...
300  face = std::make_shared< detected_face >( _id++, rect, maleProb, age, depth_rect, intensity, depth );
301  }
302  else
303  {
304  face->move( rect, depth_rect, intensity, depth );
305  }
306 
307  _faces.push_back( face );
308  }
309  catch( ... )
310  {
311  LOG(ERROR) << get_context(fs) << "Unhandled exception!!!";
312  }
313  }
314 
315  for( auto && object : _faces )
316  {
317  auto face = std::dynamic_pointer_cast<detected_face>( object );
318  cv::Rect const & loc = face->get_location();
319  rs2::rect bbox { float( loc.x ), float( loc.y ), float( loc.width ), float( loc.height ) };
320  rs2::rect normalized_color_bbox = bbox.normalize( rs2::rect { 0, 0, float(color_intrin.width), float(color_intrin.height) } );
321  rs2::rect normalized_depth_bbox = normalized_color_bbox;
322  if( df )
323  {
324  cv::Rect const & depth_loc = face->get_depth_location();
325  rs2::rect depth_bbox { float( depth_loc.x ), float( depth_loc.y ), float( depth_loc.width ), float( depth_loc.height ) };
326  normalized_depth_bbox = depth_bbox.normalize( rs2::rect { 0, 0, float( df.get_width() ), float( df.get_height() ) } );
327  }
328  objects.emplace_back(
329  face->get_id(),
330  rs2::to_string() << (face->is_male() ? u8"\uF183" : u8"\uF182") << " " << face->get_age(),
331  normalized_color_bbox,
332  normalized_depth_bbox,
333  face->get_depth()
334  );
335  }
336 
337  std::lock_guard< std::mutex > lock( _objects->mutex );
338  if( is_pb_enabled() )
339  {
340  if( _objects->sensor_is_on )
341  _objects->swap( objects );
342  }
343  else
344  {
345  _objects->clear();
346  }
347  }
348  catch( const std::exception & e )
349  {
350  LOG(ERROR) << get_context(fs) << e.what();
351  }
352  catch( ... )
353  {
354  LOG(ERROR) << get_context(fs) << "Unhandled exception caught in openvino_face_detection";
355  }
356  }
357 
358  void on_processing_block_enable( bool e ) override
359  {
361  if( !e )
362  {
363  // Make sure all the objects go away!
364  std::lock_guard< std::mutex > lock( _objects->mutex );
365  _objects->clear();
366  }
367  }
368 
369 };
370 
371 
372 static auto it = post_processing_filters_list::register_filter< openvino_face_detection >( "Face Detection : OpenVINO" );
373 
float y
Definition: rendering.h:499
static const textual_icon lock
Definition: model-views.h:218
openvino_helpers::age_gender_detection _age_detector
GLboolean GLboolean GLboolean b
GLboolean GLboolean g
void on_processing_block_enable(bool e) override
GLint y
float calc_intensity(const cv::Mat &src)
GLuint const GLchar * name
GLdouble GLdouble GLdouble top
detected_object(size_t id, std::string const &label, cv::Rect const &location, cv::Rect const &depth_location=cv::Rect{}, float intensity=1, float depth=0)
rect intersection(const rect &other) const
Definition: rendering.h:543
GLint location
GLfloat value
std::list< detected_object::ptr > detected_objects
stream_profile get_profile() const
Definition: rs_frame.hpp:557
int get_bytes_per_pixel() const
Definition: rs_frame.hpp:707
UINT8_TYPE u8
Definition: sqlite3.c:11450
float get_units() const
Definition: rs_frame.hpp:845
GLint GLint GLsizei GLsizei GLsizei depth
LOG(INFO)<< "Log message to default logger"
const void * get_data() const
Definition: rs_frame.hpp:545
unsigned short uint16_t
Definition: stdint.h:79
std::string stream_name() const
Definition: rs_frame.hpp:113
GLsizei const GLchar *const * string
d
Definition: rmse.py:171
std::array< point3d, 4 > object
unsigned char uint8_t
Definition: stdint.h:78
e
Definition: rmse.py:177
std::shared_ptr< atomic_objects_in_frame > _objects
GLenum GLenum GLsizei void * image
video_frame get_color_frame() const
Definition: rs_frame.hpp:1015
float x
Definition: rendering.h:499
std::shared_ptr< atomic_objects_in_frame > detected_objects
Definition: model-views.h:623
GLdouble f
cv::Rect adjust_face_bbox(cv::Rect const &r, float enlarge_coefficient=1, float dx_coefficient=1, float dy_coefficient=1)
float h
Definition: rendering.h:500
GLdouble GLdouble r
void load_into(InferenceEngine::Core &ie, const std::string &deviceName)
GLdouble x
void start(rs2::subdevice_model &model) override
cv::Rect const & get_location() const
virtual void on_processing_block_enable(bool e)
std::vector< object_in_frame > objects_in_frame
void update_age(float value)
GLint left
Definition: glext.h:1963
void worker_body(rs2::frame f) override
float w
Definition: rendering.h:500
void update_gender(float value)
Cross-stream extrinsics: encodes the topology describing how the different devices are oriented...
Definition: rs_sensor.h:96
Definition: example.hpp:70
rs2_format format() const
Definition: rs_frame.hpp:44
void start(rs2::subdevice_model &model) override
GLdouble right
static auto it
int get_height() const
Definition: rs_frame.hpp:671
detected_object::ptr find_object(cv::Rect rect, detected_objects const &objects)
typename::boost::move_detail::remove_reference< T >::type && move(T &&t) BOOST_NOEXCEPT
Video stream intrinsics.
Definition: rs_types.h:58
rect normalize(const rect &normalize_to) const
Definition: rendering.h:525
static float calc_face_attrs(const rs2::video_frame &cf, const rs2::depth_frame &df, cv::Rect const &depth_bbox, float *p_mean_depth)
openvino_helpers::object_detection _face_detector
#define NULL
Definition: tinycthread.c:47
#define INFO(msg)
Definition: catch.hpp:17429
openvino_face_detection(std::string const &name)
void enqueue(const cv::Mat &frame)
GLuint64EXT * result
Definition: glext.h:10921
std::shared_ptr< detected_face > ptr
int get_width() const
Definition: rs_frame.hpp:659
GLenum GLuint GLint GLenum face
Definition: glext.h:3377
std::vector< Result > fetch_results()
openvino_helpers::detected_objects _faces
detected_face(size_t id, cv::Rect const &location, float male_prob, float age, cv::Rect const &depth_location=cv::Rect{}, float intensity=1, float depth=0)
T as() const
Definition: rs_frame.hpp:580
std::string to_string(T value)


librealsense2
Author(s): Sergey Dorodnicov , Doron Hirshberg , Mark Horn , Reagan Lopez , Itay Carpis
autogenerated on Mon May 3 2021 02:47:38