spatial_mobilenet_mono.cpp
Go to the documentation of this file.
1 #include <chrono>
2 #include <iostream>
3 
4 // Includes common necessary includes for development using depthai library
5 #include "depthai/depthai.hpp"
6 
7 static const std::vector<std::string> labelMap = {"background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus",
8  "car", "cat", "chair", "cow", "diningtable", "dog", "horse",
9  "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};
10 
11 static std::atomic<bool> syncNN{true};
12 
13 int main(int argc, char** argv) {
14  using namespace std;
15  using namespace std::chrono;
16  std::string nnPath(BLOB_PATH);
17 
18  // If path to blob specified, use that
19  if(argc > 1) {
20  nnPath = std::string(argv[1]);
21  }
22 
23  // Print which blob we are using
24  printf("Using blob at path: %s\n", nnPath.c_str());
25 
26  // Create pipeline
27  dai::Pipeline pipeline;
28 
29  // Define sources and outputs
30  auto monoLeft = pipeline.create<dai::node::MonoCamera>();
31  auto monoRight = pipeline.create<dai::node::MonoCamera>();
32  auto stereo = pipeline.create<dai::node::StereoDepth>();
33  auto spatialDetectionNetwork = pipeline.create<dai::node::MobileNetSpatialDetectionNetwork>();
34  auto imageManip = pipeline.create<dai::node::ImageManip>();
35 
36  auto xoutManip = pipeline.create<dai::node::XLinkOut>();
37  auto nnOut = pipeline.create<dai::node::XLinkOut>();
38  auto xoutDepth = pipeline.create<dai::node::XLinkOut>();
39 
40  xoutManip->setStreamName("right");
41  nnOut->setStreamName("detections");
42  xoutDepth->setStreamName("depth");
43 
44  // Properties
45  imageManip->initialConfig.setResize(300, 300);
46  // The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
47  imageManip->initialConfig.setFrameType(dai::ImgFrame::Type::BGR888p);
48 
50  monoLeft->setCamera("left");
52  monoRight->setCamera("right");
53 
54  // StereoDepth
55  stereo->setDefaultProfilePreset(dai::node::StereoDepth::PresetMode::HIGH_DENSITY);
56 
57  // Define a neural network that will make predictions based on the source frames
58  spatialDetectionNetwork->setConfidenceThreshold(0.5f);
59  spatialDetectionNetwork->setBlobPath(nnPath);
60  spatialDetectionNetwork->input.setBlocking(false);
61  spatialDetectionNetwork->setBoundingBoxScaleFactor(0.5);
62  spatialDetectionNetwork->setDepthLowerThreshold(100);
63  spatialDetectionNetwork->setDepthUpperThreshold(5000);
64 
65  // Linking
66  monoLeft->out.link(stereo->left);
67  monoRight->out.link(stereo->right);
68 
69  imageManip->out.link(spatialDetectionNetwork->input);
70  if(syncNN) {
71  spatialDetectionNetwork->passthrough.link(xoutManip->input);
72  } else {
73  imageManip->out.link(xoutManip->input);
74  }
75 
76  spatialDetectionNetwork->out.link(nnOut->input);
77 
78  stereo->rectifiedRight.link(imageManip->inputImage);
79  stereo->depth.link(spatialDetectionNetwork->inputDepth);
80  spatialDetectionNetwork->passthroughDepth.link(xoutDepth->input);
81 
82  // Connect to device and start pipeline
83  dai::Device device(pipeline);
84 
85  // Output queues will be used to get the rgb frames and nn data from the outputs defined above
86  auto previewQueue = device.getOutputQueue("right", 4, false);
87  auto detectionNNQueue = device.getOutputQueue("detections", 4, false);
88  auto depthQueue = device.getOutputQueue("depth", 4, false);
89 
90  auto startTime = steady_clock::now();
91  int counter = 0;
92  float fps = 0;
93  auto color = cv::Scalar(255, 255, 255);
94 
95  while(true) {
96  auto inRectified = previewQueue->get<dai::ImgFrame>();
97  auto inDet = detectionNNQueue->get<dai::SpatialImgDetections>();
98  auto inDepth = depthQueue->get<dai::ImgFrame>();
99 
100  counter++;
101  auto currentTime = steady_clock::now();
102  auto elapsed = duration_cast<duration<float>>(currentTime - startTime);
103  if(elapsed > seconds(1)) {
104  fps = counter / elapsed.count();
105  counter = 0;
106  startTime = currentTime;
107  }
108 
109  cv::Mat rectifiedRight = inRectified->getCvFrame();
110 
111  cv::Mat depthFrame = inDepth->getFrame(); // depthFrame values are in millimeters
112  cv::Mat depthFrameColor;
113 
114  cv::normalize(depthFrame, depthFrameColor, 255, 0, cv::NORM_INF, CV_8UC1);
115  cv::equalizeHist(depthFrameColor, depthFrameColor);
116  cv::applyColorMap(depthFrameColor, depthFrameColor, cv::COLORMAP_HOT);
117 
118  auto detections = inDet->detections;
119 
120  for(auto& detection : detections) {
121  auto roiData = detection.boundingBoxMapping;
122  auto roi = roiData.roi;
123  roi = roi.denormalize(depthFrameColor.cols, depthFrameColor.rows);
124  auto topLeft = roi.topLeft();
125  auto bottomRight = roi.bottomRight();
126  auto xmin = (int)topLeft.x;
127  auto ymin = (int)topLeft.y;
128  auto xmax = (int)bottomRight.x;
129  auto ymax = (int)bottomRight.y;
130  cv::rectangle(depthFrameColor, cv::Rect(cv::Point(xmin, ymin), cv::Point(xmax, ymax)), color, cv::FONT_HERSHEY_SIMPLEX);
131 
132  int x1 = detection.xmin * rectifiedRight.cols;
133  int y1 = detection.ymin * rectifiedRight.rows;
134  int x2 = detection.xmax * rectifiedRight.cols;
135  int y2 = detection.ymax * rectifiedRight.rows;
136 
137  uint32_t labelIndex = detection.label;
138  std::string labelStr = to_string(labelIndex);
139  if(labelIndex < labelMap.size()) {
140  labelStr = labelMap[labelIndex];
141  }
142  cv::putText(rectifiedRight, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
143  std::stringstream confStr;
144  confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
145  cv::putText(rectifiedRight, confStr.str(), cv::Point(x1 + 10, y1 + 35), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
146 
147  std::stringstream depthX;
148  depthX << "X: " << (int)detection.spatialCoordinates.x << " mm";
149  cv::putText(rectifiedRight, depthX.str(), cv::Point(x1 + 10, y1 + 50), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
150  std::stringstream depthY;
151  depthY << "Y: " << (int)detection.spatialCoordinates.y << " mm";
152  cv::putText(rectifiedRight, depthY.str(), cv::Point(x1 + 10, y1 + 65), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
153  std::stringstream depthZ;
154  depthZ << "Z: " << (int)detection.spatialCoordinates.z << " mm";
155  cv::putText(rectifiedRight, depthZ.str(), cv::Point(x1 + 10, y1 + 80), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
156 
157  cv::rectangle(rectifiedRight, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
158  }
159 
160  std::stringstream fpsStr;
161  fpsStr << std::fixed << std::setprecision(2) << fps;
162  cv::putText(rectifiedRight, fpsStr.str(), cv::Point(2, rectifiedRight.rows - 4), cv::FONT_HERSHEY_TRIPLEX, 0.4, color);
163 
164  cv::imshow("depth", depthFrameColor);
165  cv::imshow("rectified right", rectifiedRight);
166 
167  int key = cv::waitKey(1);
168  if(key == 'q' || key == 'Q') {
169  return 0;
170  }
171  }
172  return 0;
173 }
dai::node::MonoCamera::out
Output out
Definition: MonoCamera.hpp:47
dai::node::XLinkOut
XLinkOut node. Sends messages over XLink.
Definition: XLinkOut.hpp:14
dai::node::MonoCamera::setCamera
void setCamera(std::string name)
Definition: MonoCamera.cpp:36
dai::Pipeline
Represents the pipeline, set of nodes and connections between them.
Definition: Pipeline.hpp:100
dai::node::StereoDepth
StereoDepth node. Compute stereo disparity and depth from left-right image pair.
Definition: StereoDepth.hpp:15
fps
static constexpr int fps
Definition: rgb_depth_aligned.cpp:12
syncNN
static std::atomic< bool > syncNN
Definition: spatial_mobilenet_mono.cpp:11
dai::node::MonoCamera
MonoCamera node. For use with grayscale sensors.
Definition: MonoCamera.hpp:17
dai::SpatialImgDetections
Definition: SpatialImgDetections.hpp:15
dai::RawImgFrame::Type::BGR888p
@ BGR888p
dai::Device::getOutputQueue
std::shared_ptr< DataOutputQueue > getOutputQueue(const std::string &name)
Definition: Device.cpp:86
dai::MonoCameraProperties::SensorResolution::THE_400_P
@ THE_400_P
dai::node::XLinkOut::input
Input input
Definition: XLinkOut.hpp:27
depthai.hpp
dai::Pipeline::create
std::shared_ptr< N > create()
Definition: Pipeline.hpp:145
dai::node::MonoCamera::setResolution
void setResolution(Properties::SensorResolution resolution)
Set sensor resolution.
Definition: MonoCamera.cpp:82
dai::ImgFrame::getCvFrame
void getCvFrame(T...)
Definition: ImgFrame.hpp:222
dai::ImgFrame
Definition: ImgFrame.hpp:25
nanorpc::core::exception::to_string
std::string to_string(std::exception const &e)
Definition: exception.h:46
dai::node::MobileNetSpatialDetectionNetwork
Definition: SpatialDetectionNetwork.hpp:109
dai::node::ImageManip
ImageManip node. Capability to crop, resize, warp, ... incoming image frames.
Definition: ImageManip.hpp:15
main
int main(int argc, char **argv)
Definition: spatial_mobilenet_mono.cpp:13
dai::node::StereoDepth::PresetMode::HIGH_DENSITY
@ HIGH_DENSITY
dai::Device
Definition: Device.hpp:21
std
Definition: Node.hpp:366
dai::Node::Output::link
void link(const Input &in)
Definition: Node.cpp:84
dai::node::XLinkOut::setStreamName
void setStreamName(const std::string &name)
Definition: XLinkOut.cpp:13
dai::node::StereoDepth::setConfidenceThreshold
void setConfidenceThreshold(int confThr)
Definition: StereoDepth.cpp:114
labelMap
static const std::vector< std::string > labelMap
Definition: spatial_mobilenet_mono.cpp:7


depthai
Author(s): Martin Peterlin
autogenerated on Sat Mar 22 2025 02:58:19