depthai: mono_mobilenet.cpp Source File

Go to the documentation of this file.
 #include <iostream>
  
 #include "utility.hpp"
  
 // Includes common necessary includes for development using depthai library
 #include "depthai/depthai.hpp"
  
 // MobilenetSSD label texts
 static const std::vector<std::string> labelMap = {"background", "aeroplane", "bicycle",     "bird",  "boat",        "bottle", "bus",
                                                   "car",        "cat",       "chair",       "cow",   "diningtable", "dog",    "horse",
                                                   "motorbike",  "person",    "pottedplant", "sheep", "sofa",        "train",  "tvmonitor"};
  
 int main(int argc, char** argv) {
     using namespace std;
     // Default blob path provided by Hunter private data download
     // Applicable for easier example usage only
     std::string nnPath(BLOB_PATH);
  
     // If path to blob specified, use that
     if(argc > 1) {
         nnPath = std::string(argv[1]);
     }
  
     // Print which blob we are using
     printf("Using blob at path: %s\n", nnPath.c_str());
  
     // Create pipeline
     dai::Pipeline pipeline;
  
     // Define sources and outputs
     auto monoRight = pipeline.create<dai::node::MonoCamera>();
     auto manip = pipeline.create<dai::node::ImageManip>();
     auto nn = pipeline.create<dai::node::MobileNetDetectionNetwork>();
     auto manipOut = pipeline.create<dai::node::XLinkOut>();
     auto nnOut = pipeline.create<dai::node::XLinkOut>();
  
     manipOut->setStreamName("right");
     nnOut->setStreamName("nn");
  
     // Properties
     monoRight->setCamera("right");
     monoRight->setResolution(dai::MonoCameraProperties::SensorResolution::THE_720_P);
  
     // Convert the grayscale frame into the nn-acceptable form
     manip->initialConfig.setResize(300, 300);
     // The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
     manip->initialConfig.setFrameType(dai::ImgFrame::Type::BGR888p);
  
     nn->setConfidenceThreshold(0.5);
     nn->setBlobPath(nnPath);
     nn->setNumInferenceThreads(2);
     nn->input.setBlocking(false);
  
     // Linking
     monoRight->out.link(manip->inputImage);
     manip->out.link(nn->input);
     manip->out.link(manipOut->input);
     nn->out.link(nnOut->input);
  
     // Connect to device and start pipeline
     dai::Device device(pipeline);
  
     // Output queues will be used to get the grayscale frames and nn data from the outputs defined above
     auto qRight = device.getOutputQueue("right", 4, false);
     auto qDet = device.getOutputQueue("nn", 4, false);
  
     cv::Mat frame;
     std::vector<dai::ImgDetection> detections;
  
     // Add bounding boxes and text to the frame and show it to the user
     auto displayFrame = [](std::string name, cv::Mat frame, std::vector<dai::ImgDetection>& detections) {
         auto color = cv::Scalar(255, 0, 0);
         // nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height
         for(auto& detection : detections) {
             int x1 = detection.xmin * frame.cols;
             int y1 = detection.ymin * frame.rows;
             int x2 = detection.xmax * frame.cols;
             int y2 = detection.ymax * frame.rows;
  
             uint32_t labelIndex = detection.label;
             std::string labelStr = to_string(labelIndex);
             if(labelIndex < labelMap.size()) {
                 labelStr = labelMap[labelIndex];
             }
             cv::putText(frame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
             std::stringstream confStr;
             confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
             cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
             cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
         }
         // Show the frame
         cv::imshow(name, frame);
     };
  
     while(true) {
         // Instead of get (blocking), we use tryGet (non-blocking) which will return the available data or None otherwise
         auto inRight = qRight->tryGet<dai::ImgFrame>();
         auto inDet = qDet->tryGet<dai::ImgDetections>();
  
         if(inRight) {
             frame = inRight->getCvFrame();
         }
  
         if(inDet) {
             detections = inDet->detections;
         }
  
         if(!frame.empty()) {
             displayFrame("right", frame, detections);
         }
  
         int key = cv::waitKey(1);
         if(key == 'q' || key == 'Q') return 0;
     }
     return 0;
 }