object-detection.cpp
Go to the documentation of this file.
1 // License: Apache 2.0. See LICENSE file in root directory.
2 // Copyright(c) 2019 Intel Corporation. All Rights Reserved.
3 
4 
7 #include <easylogging++.h>
8 
9 
10 using namespace InferenceEngine;
11 
12 
13 const size_t DETECTED_OBJECT_SIZE = 7; // the size of each detected object
14 
15 
16 namespace openvino_helpers
17 {
18  object_detection::object_detection(
19  const std::string &pathToModel,
20  double detectionThreshold,
21  bool isAsync,
22  int maxBatch, bool isBatchDynamic,
23  bool doRawOutputMessages
24  )
25  : base_detection( "object detection", pathToModel, maxBatch, isBatchDynamic, isAsync, doRawOutputMessages )
26  , _detection_threshold( detectionThreshold )
27  , _max_results( 0 )
28  , _n_enqued_frames( 0 ), _width( 0 ), _height( 0 )
29  {
30  }
31 
32 
34  {
35  if( !_n_enqued_frames )
36  return;
37  _n_enqued_frames = 0;
39  }
40 
41 
42  void object_detection::enqueue( const cv::Mat & frame )
43  {
44  if( !enabled() )
45  return;
46 
47  if( !_request )
48  _request = net.CreateInferRequestPtr();
49 
50  _width = static_cast<float>(frame.cols);
51  _height = static_cast<float>(frame.rows);
52 
53  Blob::Ptr inputBlob = _request->GetBlob( _input_layer_name );
54  matU8ToBlob<uint8_t>( frame, inputBlob );
55 
56  if( ! _im_info_name.empty() )
57  {
58  Blob::Ptr infoBlob = _request->GetBlob( _im_info_name );
59 
60  // (height, width, image_scale)
61  float * p = infoBlob->buffer().as< PrecisionTrait< Precision::FP32 >::value_type * >();
62  p[0] = static_cast< float >( _input_width );
63  p[1] = static_cast< float >( _input_height );
64  for( size_t k = 2; k < _im_info_size; k++ )
65  p[k] = 1.f; // all scale factors are set to 1.0
66  }
67 
68  _n_enqued_frames = 1;
69  }
70 
71 
73  {
74  LOG(INFO) << "Loading " << topoName << " model from: " << pathToModel;
75 
76  CNNNetReader netReader;
78  netReader.ReadNetwork( pathToModel );
80  //LOG(DEBUG) << "Batch size is set to " << maxBatch;
81  netReader.getNetwork().setBatchSize( maxBatch );
82 
84  std::string binFileName = remove_ext( pathToModel ) + ".bin";
85  netReader.ReadWeights( binFileName );
86 
87  // We support networks with one or two inputs, though others may be possible...
88  InputsDataMap inputInfo( netReader.getNetwork().getInputsInfo() );
89  if( inputInfo.size() != 1 && inputInfo.size() != 2 )
90  throw std::logic_error( "Object detection network should have only one or two inputs" );
91  for( auto & item : inputInfo )
92  {
93  if( item.second->getInputData()->getTensorDesc().getDims().size() == 4 )
94  {
95  // Blob "data" (1x4) will contain the actual image data (e.g., 1,3,224,224 or 1,3,300,300)
96  _input_layer_name = item.first;
97  _input_width = item.second->getTensorDesc().getDims()[2];
98  _input_height = item.second->getTensorDesc().getDims()[3];
99  item.second->setPrecision( Precision::U8 );
100  }
101  else if( item.second->getInputData()->getTensorDesc().getDims().size() == 2 )
102  {
103  // Blob "im_info" is optional: 1x3 (height, width, image_scale)
104  _im_info_name = item.first;
105  auto const & dims = item.second->getTensorDesc().getDims();
106  if( dims[0] != 1 )
107  throw std::logic_error( "Invalid input info: layer \"" + _im_info_name + "\" should be 1x3 or 1x6" );
108  _im_info_size = dims[1];
109  item.second->setPrecision( Precision::FP32 );
110  if( _im_info_size != 3 && _im_info_size != 6 )
111  throw std::logic_error( "Invalid input info: layer \"" + _im_info_name + "\" should be 1x3 or 1x6" );
112  }
113  }
114  if( _input_layer_name.empty() )
115  throw std::logic_error( "Could not find input \"data\" layer in network" );
116 
117  // Only a single "DetectionOuput" layer is expected
118  OutputsDataMap outputInfo( netReader.getNetwork().getOutputsInfo() );
119  if( outputInfo.size() != 1 )
120  throw std::logic_error(
121  "Object detection network should have only one output" );
122  _output_layer_name = outputInfo.begin()->first;
123  DataPtr & outputDataPtr = outputInfo.begin()->second;
124  const CNNLayerPtr outputLayer = netReader.getNetwork().getLayerByName( _output_layer_name.c_str() );
125  if( outputLayer->type != "DetectionOutput" )
126  throw std::logic_error(
127  "Object detection network output layer(" + outputLayer->name +
128  ") should be DetectionOutput, but was " + outputLayer->type );
129  if( outputLayer->params.find( "num_classes" ) == outputLayer->params.end() )
130  throw std::logic_error(
131  "Object detection network output layer (" +
132  _output_layer_name + ") should have num_classes integer attribute" );
133 
134  /*
135  Expect a blob of [1, 1, N, 7], where N is the number of detected bounding boxes.
136  For each detection, the description has the format: [image_id, label, conf, x_min, y_min, x_max, y_max]
137  image_id - ID of the image in the batch
138  label - predicted class ID
139  conf - confidence for the predicted class
140  (x_min, y_min) - coordinates of the top left bounding box corner
141  (x_max, y_max) - coordinates of the bottom right bounding box corner.
142  */
143  const SizeVector & outputDims = outputDataPtr->getTensorDesc().getDims();
144  if( outputDims.size() != 4 )
145  throw std::logic_error(
146  "Object detection network output dimensions should be 4, but was " + std::to_string( outputDims.size() ) );
147  size_t objectSize = outputDims[3];
148  if( objectSize != DETECTED_OBJECT_SIZE )
149  throw std::logic_error(
150  "Object detection network output layer last dimension should be " +
151  std::to_string( DETECTED_OBJECT_SIZE ) + "; got " + std::to_string( objectSize ) );
152  _max_results = outputDims[2];
153  outputDataPtr->setPrecision( Precision::FP32 );
154 
155  return netReader.getNetwork();
156  }
157 
158 
159  std::vector< object_detection::Result > object_detection::fetch_results()
160  {
161  std::vector< Result > results;
162  const float *detections = _request->GetBlob( _output_layer_name )->buffer().as<float *>();
163 
164  for( size_t i = 0; i < _max_results; i++ )
165  {
166  float image_id = detections[i * DETECTED_OBJECT_SIZE + 0];
167  if( image_id < 0 )
168  break;
169 
170  // [image_id, label, confidence, x_min, y_min, x_max, y_max]
171  Result r;
172  r.label = static_cast<int>(detections[i * DETECTED_OBJECT_SIZE + 1]);
173  r.confidence = detections[i * DETECTED_OBJECT_SIZE + 2];
175  continue;
176  r.location.x = static_cast<int>(detections[i * DETECTED_OBJECT_SIZE + 3] * _width);
177  r.location.y = static_cast<int>(detections[i * DETECTED_OBJECT_SIZE + 4] * _height);
178  r.location.width = static_cast<int>(detections[i * DETECTED_OBJECT_SIZE + 5] * _width - r.location.x);
179  r.location.height = static_cast<int>(detections[i * DETECTED_OBJECT_SIZE + 6] * _height - r.location.y);
180 
181  if( doRawOutputMessages )
182  {
183  LOG(DEBUG)
184  << "[" << i << "," << r.label << "] element, prob = " << r.confidence
185  << " (" << r.location.x << "," << r.location.y << ")-(" << r.location.width << ","
186  << r.location.height << ")"
187  << ((r.confidence > _detection_threshold) ? " WILL BE RENDERED!" : "");
188  }
189 
191  results.push_back( r );
192  }
193 
194  return results;
195  }
196 }
GLfloat GLfloat p
Definition: glext.h:12687
InferenceEngine::InferRequest::Ptr _request
std::string remove_ext(const std::string &filepath)
InferenceEngine::ExecutableNetwork net
LOG(INFO)<< "Log message to default logger"
GLsizei const GLchar *const * string
const size_t DETECTED_OBJECT_SIZE
GLdouble f
GLdouble GLdouble r
InferenceEngine::CNNNetwork read_network() override
int i
#define INFO(msg)
Definition: catch.hpp:17429
void enqueue(const cv::Mat &frame)
std::vector< Result > fetch_results()
std::string to_string(T value)


librealsense2
Author(s): Sergey Dorodnicov , Doron Hirshberg , Mark Horn , Reagan Lopez , Itay Carpis
autogenerated on Mon May 3 2021 02:47:38