spatial_tiny_yolo.cpp
Go to the documentation of this file.
1 #include <chrono>
2 
3 #include "utility.hpp"
4 
5 // Includes common necessary includes for development using depthai library
6 #include "depthai/depthai.hpp"
7 
8 static const std::vector<std::string> labelMap = {
9  "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
10  "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse",
11  "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag",
12  "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
13  "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon",
14  "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza",
15  "donut", "cake", "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor",
16  "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink",
17  "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"};
18 
19 static std::atomic<bool> syncNN{true};
20 
21 int main(int argc, char** argv) {
22  using namespace std;
23  using namespace std::chrono;
24  std::string nnPath(BLOB_PATH);
25 
26  // If path to blob specified, use that
27  if(argc > 1) {
28  nnPath = std::string(argv[1]);
29  }
30 
31  // Print which blob we are using
32  printf("Using blob at path: %s\n", nnPath.c_str());
33 
34  // Create pipeline
35  dai::Pipeline pipeline;
36 
37  // Define sources and outputs
38  auto camRgb = pipeline.create<dai::node::ColorCamera>();
39  auto spatialDetectionNetwork = pipeline.create<dai::node::YoloSpatialDetectionNetwork>();
40  auto monoLeft = pipeline.create<dai::node::MonoCamera>();
41  auto monoRight = pipeline.create<dai::node::MonoCamera>();
42  auto stereo = pipeline.create<dai::node::StereoDepth>();
43 
44  auto xoutRgb = pipeline.create<dai::node::XLinkOut>();
45  auto xoutNN = pipeline.create<dai::node::XLinkOut>();
46  auto xoutDepth = pipeline.create<dai::node::XLinkOut>();
47  auto nnNetworkOut = pipeline.create<dai::node::XLinkOut>();
48 
49  xoutRgb->setStreamName("rgb");
50  xoutNN->setStreamName("detections");
51  xoutDepth->setStreamName("depth");
52  nnNetworkOut->setStreamName("nnNetwork");
53 
54  // Properties
55  camRgb->setPreviewSize(416, 416);
57  camRgb->setInterleaved(false);
58  camRgb->setColorOrder(dai::ColorCameraProperties::ColorOrder::BGR);
59 
61  monoLeft->setCamera("left");
63  monoRight->setCamera("right");
64 
65  // setting node configs
66  stereo->setDefaultProfilePreset(dai::node::StereoDepth::PresetMode::HIGH_DENSITY);
67  // Align depth map to the perspective of RGB camera, on which inference is done
68  stereo->setDepthAlign(dai::CameraBoardSocket::CAM_A);
69  stereo->setOutputSize(monoLeft->getResolutionWidth(), monoLeft->getResolutionHeight());
70 
71  spatialDetectionNetwork->setBlobPath(nnPath);
72  spatialDetectionNetwork->setConfidenceThreshold(0.5f);
73  spatialDetectionNetwork->input.setBlocking(false);
74  spatialDetectionNetwork->setBoundingBoxScaleFactor(0.5);
75  spatialDetectionNetwork->setDepthLowerThreshold(100);
76  spatialDetectionNetwork->setDepthUpperThreshold(5000);
77 
78  // yolo specific parameters
79  spatialDetectionNetwork->setNumClasses(80);
80  spatialDetectionNetwork->setCoordinateSize(4);
81  spatialDetectionNetwork->setAnchors({10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319});
82  spatialDetectionNetwork->setAnchorMasks({{"side26", {1, 2, 3}}, {"side13", {3, 4, 5}}});
83  spatialDetectionNetwork->setIouThreshold(0.5f);
84 
85  // Linking
86  monoLeft->out.link(stereo->left);
87  monoRight->out.link(stereo->right);
88 
89  camRgb->preview.link(spatialDetectionNetwork->input);
90  if(syncNN) {
91  spatialDetectionNetwork->passthrough.link(xoutRgb->input);
92  } else {
93  camRgb->preview.link(xoutRgb->input);
94  }
95 
96  spatialDetectionNetwork->out.link(xoutNN->input);
97 
98  stereo->depth.link(spatialDetectionNetwork->inputDepth);
99  spatialDetectionNetwork->passthroughDepth.link(xoutDepth->input);
100  spatialDetectionNetwork->outNetwork.link(nnNetworkOut->input);
101 
102  // Connect to device and start pipeline
103  dai::Device device(pipeline);
104 
105  // Output queues will be used to get the rgb frames and nn data from the outputs defined above
106  auto previewQueue = device.getOutputQueue("rgb", 4, false);
107  auto detectionNNQueue = device.getOutputQueue("detections", 4, false);
108  auto depthQueue = device.getOutputQueue("depth", 4, false);
109  auto networkQueue = device.getOutputQueue("nnNetwork", 4, false);
110 
111  auto startTime = steady_clock::now();
112  int counter = 0;
113  float fps = 0;
114  auto color = cv::Scalar(255, 255, 255);
115  bool printOutputLayersOnce = true;
116 
117  while(true) {
118  auto imgFrame = previewQueue->get<dai::ImgFrame>();
119  auto inDet = detectionNNQueue->get<dai::SpatialImgDetections>();
120  auto depth = depthQueue->get<dai::ImgFrame>();
121  auto inNN = networkQueue->get<dai::NNData>();
122 
123  if(printOutputLayersOnce && inNN) {
124  std::cout << "Output layer names: ";
125  for(const auto& ten : inNN->getAllLayerNames()) {
126  std::cout << ten << ", ";
127  }
128  std::cout << std::endl;
129  printOutputLayersOnce = false;
130  }
131 
132  cv::Mat frame = imgFrame->getCvFrame();
133  cv::Mat depthFrame = depth->getFrame(); // depthFrame values are in millimeters
134 
135  cv::Mat depthFrameColor;
136  cv::normalize(depthFrame, depthFrameColor, 255, 0, cv::NORM_INF, CV_8UC1);
137  cv::equalizeHist(depthFrameColor, depthFrameColor);
138  cv::applyColorMap(depthFrameColor, depthFrameColor, cv::COLORMAP_HOT);
139 
140  counter++;
141  auto currentTime = steady_clock::now();
142  auto elapsed = duration_cast<duration<float>>(currentTime - startTime);
143  if(elapsed > seconds(1)) {
144  fps = counter / elapsed.count();
145  counter = 0;
146  startTime = currentTime;
147  }
148 
149  auto detections = inDet->detections;
150 
151  for(const auto& detection : detections) {
152  auto roiData = detection.boundingBoxMapping;
153  auto roi = roiData.roi;
154  roi = roi.denormalize(depthFrameColor.cols, depthFrameColor.rows);
155  auto topLeft = roi.topLeft();
156  auto bottomRight = roi.bottomRight();
157  auto xmin = (int)topLeft.x;
158  auto ymin = (int)topLeft.y;
159  auto xmax = (int)bottomRight.x;
160  auto ymax = (int)bottomRight.y;
161  cv::rectangle(depthFrameColor, cv::Rect(cv::Point(xmin, ymin), cv::Point(xmax, ymax)), color, cv::FONT_HERSHEY_SIMPLEX);
162 
163  int x1 = detection.xmin * frame.cols;
164  int y1 = detection.ymin * frame.rows;
165  int x2 = detection.xmax * frame.cols;
166  int y2 = detection.ymax * frame.rows;
167 
168  uint32_t labelIndex = detection.label;
169  std::string labelStr = to_string(labelIndex);
170  if(labelIndex < labelMap.size()) {
171  labelStr = labelMap[labelIndex];
172  }
173  cv::putText(frame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
174  std::stringstream confStr;
175  confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
176  cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 35), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
177 
178  std::stringstream depthX;
179  depthX << "X: " << (int)detection.spatialCoordinates.x << " mm";
180  cv::putText(frame, depthX.str(), cv::Point(x1 + 10, y1 + 50), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
181  std::stringstream depthY;
182  depthY << "Y: " << (int)detection.spatialCoordinates.y << " mm";
183  cv::putText(frame, depthY.str(), cv::Point(x1 + 10, y1 + 65), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
184  std::stringstream depthZ;
185  depthZ << "Z: " << (int)detection.spatialCoordinates.z << " mm";
186  cv::putText(frame, depthZ.str(), cv::Point(x1 + 10, y1 + 80), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
187 
188  cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
189  }
190 
191  std::stringstream fpsStr;
192  fpsStr << std::fixed << std::setprecision(2) << fps;
193  cv::putText(frame, fpsStr.str(), cv::Point(2, imgFrame->getHeight() - 4), cv::FONT_HERSHEY_TRIPLEX, 0.4, color);
194 
195  cv::imshow("depth", depthFrameColor);
196  cv::imshow("rgb", frame);
197 
198  int key = cv::waitKey(1);
199  if(key == 'q' || key == 'Q') {
200  return 0;
201  }
202  }
203  return 0;
204 }
dai::node::MonoCamera::out
Output out
Definition: MonoCamera.hpp:47
dai::node::XLinkOut
XLinkOut node. Sends messages over XLink.
Definition: XLinkOut.hpp:14
dai::node::MonoCamera::setCamera
void setCamera(std::string name)
Definition: MonoCamera.cpp:36
dai::Pipeline
Represents the pipeline, set of nodes and connections between them.
Definition: Pipeline.hpp:100
dai::NNData
Definition: NNData.hpp:16
dai::node::StereoDepth
StereoDepth node. Compute stereo disparity and depth from left-right image pair.
Definition: StereoDepth.hpp:15
fps
static constexpr int fps
Definition: rgb_depth_aligned.cpp:12
dai::CameraBoardSocket::CAM_A
@ CAM_A
dai::node::MonoCamera
MonoCamera node. For use with grayscale sensors.
Definition: MonoCamera.hpp:17
dai::node::ColorCamera
ColorCamera node. For use with color sensors.
Definition: ColorCamera.hpp:16
dai::SpatialImgDetections
Definition: SpatialImgDetections.hpp:15
dai::Device::getOutputQueue
std::shared_ptr< DataOutputQueue > getOutputQueue(const std::string &name)
Definition: Device.cpp:86
dai::MonoCameraProperties::SensorResolution::THE_400_P
@ THE_400_P
dai::node::YoloSpatialDetectionNetwork
Definition: SpatialDetectionNetwork.hpp:117
main
int main(int argc, char **argv)
Definition: spatial_tiny_yolo.cpp:21
dai::node::XLinkOut::input
Input input
Definition: XLinkOut.hpp:27
depthai.hpp
dai::Pipeline::create
std::shared_ptr< N > create()
Definition: Pipeline.hpp:145
dai::node::MonoCamera::setResolution
void setResolution(Properties::SensorResolution resolution)
Set sensor resolution.
Definition: MonoCamera.cpp:82
dai::ColorCameraProperties::ColorOrder::BGR
@ BGR
dai::ImgFrame
Definition: ImgFrame.hpp:25
nanorpc::core::exception::to_string
std::string to_string(std::exception const &e)
Definition: exception.h:46
dai::ColorCameraProperties::SensorResolution::THE_1080_P
@ THE_1080_P
1920 × 1080
dai::node::StereoDepth::PresetMode::HIGH_DENSITY
@ HIGH_DENSITY
dai::Device
Definition: Device.hpp:21
dai::node::ColorCamera::preview
Output preview
Definition: ColorCamera.hpp:69
std
Definition: Node.hpp:366
dai::Node::Output::link
void link(const Input &in)
Definition: Node.cpp:84
dai::node::XLinkOut::setStreamName
void setStreamName(const std::string &name)
Definition: XLinkOut.cpp:13
utility.hpp
syncNN
static std::atomic< bool > syncNN
Definition: spatial_tiny_yolo.cpp:19
labelMap
static const std::vector< std::string > labelMap
Definition: spatial_tiny_yolo.cpp:8


depthai
Author(s): Martin Peterlin
autogenerated on Sat Mar 22 2025 02:58:19