8 static const std::vector<std::string>
labelMap = {
9 "person",
"bicycle",
"car",
"motorbike",
"aeroplane",
"bus",
"train",
"truck",
"boat",
10 "traffic light",
"fire hydrant",
"stop sign",
"parking meter",
"bench",
"bird",
"cat",
"dog",
"horse",
11 "sheep",
"cow",
"elephant",
"bear",
"zebra",
"giraffe",
"backpack",
"umbrella",
"handbag",
12 "tie",
"suitcase",
"frisbee",
"skis",
"snowboard",
"sports ball",
"kite",
"baseball bat",
"baseball glove",
13 "skateboard",
"surfboard",
"tennis racket",
"bottle",
"wine glass",
"cup",
"fork",
"knife",
"spoon",
14 "bowl",
"banana",
"apple",
"sandwich",
"orange",
"broccoli",
"carrot",
"hot dog",
"pizza",
15 "donut",
"cake",
"chair",
"sofa",
"pottedplant",
"bed",
"diningtable",
"toilet",
"tvmonitor",
16 "laptop",
"mouse",
"remote",
"keyboard",
"cell phone",
"microwave",
"oven",
"toaster",
"sink",
17 "refrigerator",
"book",
"clock",
"vase",
"scissors",
"teddy bear",
"hair drier",
"toothbrush"};
19 static std::atomic<bool>
syncNN{
true};
21 int main(
int argc,
char** argv) {
23 using namespace std::chrono;
24 std::string nnPath(BLOB_PATH);
28 nnPath = std::string(argv[1]);
32 printf(
"Using blob at path: %s\n", nnPath.c_str());
49 xoutRgb->setStreamName(
"rgb");
55 camRgb->setPreviewSize(416, 416);
57 camRgb->setInterleaved(
false);
61 monoLeft->setCamera(
"left");
69 stereo->setOutputSize(monoLeft->getResolutionWidth(), monoLeft->getResolutionHeight());
71 spatialDetectionNetwork->setBlobPath(nnPath);
72 spatialDetectionNetwork->setConfidenceThreshold(0.5f);
73 spatialDetectionNetwork->input.setBlocking(
false);
74 spatialDetectionNetwork->setBoundingBoxScaleFactor(0.5);
75 spatialDetectionNetwork->setDepthLowerThreshold(100);
76 spatialDetectionNetwork->setDepthUpperThreshold(5000);
79 spatialDetectionNetwork->setNumClasses(80);
80 spatialDetectionNetwork->setCoordinateSize(4);
81 spatialDetectionNetwork->setAnchors({10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319});
82 spatialDetectionNetwork->setAnchorMasks({{
"side26", {1, 2, 3}}, {
"side13", {3, 4, 5}}});
83 spatialDetectionNetwork->setIouThreshold(0.5f);
86 monoLeft->out.link(stereo->left);
87 monoRight->
out.
link(stereo->right);
89 camRgb->preview.link(spatialDetectionNetwork->input);
91 spatialDetectionNetwork->passthrough.link(xoutRgb->input);
96 spatialDetectionNetwork->out.link(xoutNN->
input);
98 stereo->depth.link(spatialDetectionNetwork->inputDepth);
99 spatialDetectionNetwork->passthroughDepth.link(xoutDepth->
input);
100 spatialDetectionNetwork->outNetwork.link(nnNetworkOut->
input);
107 auto detectionNNQueue = device.
getOutputQueue(
"detections", 4,
false);
111 auto startTime = steady_clock::now();
114 auto color = cv::Scalar(255, 255, 255);
115 bool printOutputLayersOnce =
true;
123 if(printOutputLayersOnce && inNN) {
124 std::cout <<
"Output layer names: ";
125 for(
const auto& ten : inNN->getAllLayerNames()) {
126 std::cout << ten <<
", ";
128 std::cout << std::endl;
129 printOutputLayersOnce =
false;
132 cv::Mat frame = imgFrame->getCvFrame();
133 cv::Mat depthFrame = depth->getFrame();
135 cv::Mat depthFrameColor;
136 cv::normalize(depthFrame, depthFrameColor, 255, 0, cv::NORM_INF, CV_8UC1);
137 cv::equalizeHist(depthFrameColor, depthFrameColor);
138 cv::applyColorMap(depthFrameColor, depthFrameColor, cv::COLORMAP_HOT);
141 auto currentTime = steady_clock::now();
142 auto elapsed = duration_cast<duration<float>>(currentTime - startTime);
143 if(elapsed > seconds(1)) {
144 fps = counter / elapsed.count();
146 startTime = currentTime;
149 auto detections = inDet->detections;
151 for(
const auto& detection : detections) {
152 auto roiData = detection.boundingBoxMapping;
153 auto roi = roiData.roi;
154 roi = roi.denormalize(depthFrameColor.cols, depthFrameColor.rows);
155 auto topLeft = roi.topLeft();
156 auto bottomRight = roi.bottomRight();
157 auto xmin = (int)topLeft.x;
158 auto ymin = (
int)topLeft.y;
159 auto xmax = (int)bottomRight.x;
160 auto ymax = (
int)bottomRight.y;
161 cv::rectangle(depthFrameColor, cv::Rect(cv::Point(xmin, ymin), cv::Point(xmax, ymax)), color, cv::FONT_HERSHEY_SIMPLEX);
163 int x1 = detection.xmin * frame.cols;
164 int y1 = detection.ymin * frame.rows;
165 int x2 = detection.xmax * frame.cols;
166 int y2 = detection.ymax * frame.rows;
168 uint32_t labelIndex = detection.label;
169 std::string labelStr =
to_string(labelIndex);
173 cv::putText(frame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
174 std::stringstream confStr;
175 confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
176 cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 35), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
178 std::stringstream depthX;
179 depthX <<
"X: " << (int)detection.spatialCoordinates.x <<
" mm";
180 cv::putText(frame, depthX.str(), cv::Point(x1 + 10, y1 + 50), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
181 std::stringstream depthY;
182 depthY <<
"Y: " << (int)detection.spatialCoordinates.y <<
" mm";
183 cv::putText(frame, depthY.str(), cv::Point(x1 + 10, y1 + 65), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
184 std::stringstream depthZ;
185 depthZ <<
"Z: " << (int)detection.spatialCoordinates.z <<
" mm";
186 cv::putText(frame, depthZ.str(), cv::Point(x1 + 10, y1 + 80), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
188 cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
191 std::stringstream fpsStr;
192 fpsStr << std::fixed << std::setprecision(2) <<
fps;
193 cv::putText(frame, fpsStr.str(), cv::Point(2, imgFrame->getHeight() - 4), cv::FONT_HERSHEY_TRIPLEX, 0.4, color);
195 cv::imshow(
"depth", depthFrameColor);
196 cv::imshow(
"rgb", frame);
198 int key = cv::waitKey(1);
199 if(key ==
'q' || key ==
'Q') {