depthai: object_tracker_video.cpp Source File

Go to the documentation of this file.
 #include <chrono>
 #include <iostream>
  
 #include "utility.hpp"
  
 // Includes common necessary includes for development using depthai library
 #include "depthai/depthai.hpp"
  
 static const std::vector<std::string> labelMap = {"", "person"};
  
 static std::atomic<bool> fullFrameTracking{false};
  
 int main(int argc, char** argv) {
     using namespace std;
     using namespace std::chrono;
     std::string nnPath(BLOB_PATH);
     std::string videoPath(VIDEO_PATH);
  
     // If path to blob specified, use that
     if(argc > 2) {
         nnPath = std::string(argv[1]);
         videoPath = std::string(argv[2]);
     }
  
     // Print which blob we are using
     printf("Using blob at path: %s\n", nnPath.c_str());
     printf("Using video at path: %s\n", videoPath.c_str());
  
     // Create pipeline
     dai::Pipeline pipeline;
  
     // Define sources and outputs
     auto manip = pipeline.create<dai::node::ImageManip>();
     auto objectTracker = pipeline.create<dai::node::ObjectTracker>();
     auto detectionNetwork = pipeline.create<dai::node::MobileNetDetectionNetwork>();
  
     auto manipOut = pipeline.create<dai::node::XLinkOut>();
     auto xinFrame = pipeline.create<dai::node::XLinkIn>();
     auto trackerOut = pipeline.create<dai::node::XLinkOut>();
     auto xlinkOut = pipeline.create<dai::node::XLinkOut>();
     auto nnOut = pipeline.create<dai::node::XLinkOut>();
  
     manipOut->setStreamName("manip");
     xinFrame->setStreamName("inFrame");
     xlinkOut->setStreamName("trackerFrame");
     trackerOut->setStreamName("tracklets");
     nnOut->setStreamName("nn");
  
     // Properties
     xinFrame->setMaxDataSize(1920 * 1080 * 3);
  
     manip->initialConfig.setResizeThumbnail(544, 320);
     // manip->initialConfig.setResize(384, 384);
     // manip->initialConfig.setKeepAspectRatio(false); //squash the image to not lose FOV
     // The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
     manip->initialConfig.setFrameType(dai::ImgFrame::Type::BGR888p);
     manip->inputImage.setBlocking(true);
  
     // setting node configs
     detectionNetwork->setBlobPath(nnPath);
     detectionNetwork->setConfidenceThreshold(0.5);
     detectionNetwork->input.setBlocking(true);
  
     objectTracker->inputTrackerFrame.setBlocking(true);
     objectTracker->inputDetectionFrame.setBlocking(true);
     objectTracker->inputDetections.setBlocking(true);
     objectTracker->setDetectionLabelsToTrack({1});  // track only person
     // possible tracking types: ZERO_TERM_COLOR_HISTOGRAM, ZERO_TERM_IMAGELESS, SHORT_TERM_IMAGELESS, SHORT_TERM_KCF
     objectTracker->setTrackerType(dai::TrackerType::ZERO_TERM_COLOR_HISTOGRAM);
     // take the smallest ID when new object is tracked, possible options: SMALLEST_ID, UNIQUE_ID
     objectTracker->setTrackerIdAssignmentPolicy(dai::TrackerIdAssignmentPolicy::SMALLEST_ID);
  
     // Linking
     manip->out.link(manipOut->input);
     manip->out.link(detectionNetwork->input);
     xinFrame->out.link(manip->inputImage);
     xinFrame->out.link(objectTracker->inputTrackerFrame);
     detectionNetwork->out.link(nnOut->input);
     detectionNetwork->out.link(objectTracker->inputDetections);
     detectionNetwork->passthrough.link(objectTracker->inputDetectionFrame);
     objectTracker->out.link(trackerOut->input);
     objectTracker->passthroughTrackerFrame.link(xlinkOut->input);
  
     // Connect to device and start pipeline
     dai::Device device(pipeline);
  
     auto qIn = device.getInputQueue("inFrame", 4);
     auto trackerFrameQ = device.getOutputQueue("trackerFrame", 4);
     auto tracklets = device.getOutputQueue("tracklets", 4);
     auto qManip = device.getOutputQueue("manip", 4);
     auto qDet = device.getOutputQueue("nn", 4);
  
     auto startTime = steady_clock::now();
     int counter = 0;
     float fps = 0;
     cv::Mat frame;
     cv::Mat manipFrame;
     std::vector<dai::ImgDetection> detections;
  
     // Add bounding boxes and text to the frame and show it to the user
     auto displayFrame = [](std::string name, cv::Mat frame, std::vector<dai::ImgDetection>& detections) {
         auto color = cv::Scalar(255, 0, 0);
         // nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height
         for(auto& detection : detections) {
             int x1 = detection.xmin * frame.cols;
             int y1 = detection.ymin * frame.rows;
             int x2 = detection.xmax * frame.cols;
             int y2 = detection.ymax * frame.rows;
  
             uint32_t labelIndex = detection.label;
             std::string labelStr = to_string(labelIndex);
             if(labelIndex < labelMap.size()) {
                 labelStr = labelMap[labelIndex];
             }
             cv::putText(frame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
             std::stringstream confStr;
             confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
             cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
             cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
         }
         // Show the frame
         cv::imshow(name, frame);
     };
  
     cv::VideoCapture cap(videoPath);
     auto baseTs = steady_clock::now();
     float simulatedFps = 30;
  
     while(cap.isOpened()) {
         // Read frame from video
         cap >> frame;
         if(frame.empty()) break;
  
         auto img = std::make_shared<dai::ImgFrame>();
         frame = resizeKeepAspectRatio(frame, cv::Size(1920, 1080), cv::Scalar(0));
         toPlanar(frame, img->getData());
         img->setTimestamp(baseTs);
         baseTs += steady_clock::duration(static_cast<int64_t>((1000 * 1000 * 1000 / simulatedFps)));
         img->setWidth(1920);
         img->setHeight(1080);
         img->setType(dai::ImgFrame::Type::BGR888p);
         qIn->send(img);
  
         auto trackFrame = trackerFrameQ->tryGet<dai::ImgFrame>();
         if(!trackFrame) {
             continue;
         }
  
         auto track = tracklets->get<dai::Tracklets>();
         auto inManip = qManip->get<dai::ImgFrame>();
         auto inDet = qDet->get<dai::ImgDetections>();
  
         counter++;
         auto currentTime = steady_clock::now();
         auto elapsed = duration_cast<duration<float>>(currentTime - startTime);
         if(elapsed > seconds(1)) {
             fps = counter / elapsed.count();
             counter = 0;
             startTime = currentTime;
         }
  
         detections = inDet->detections;
         manipFrame = inManip->getCvFrame();
         displayFrame("nn", manipFrame, detections);
  
         auto color = cv::Scalar(255, 0, 0);
         cv::Mat trackerFrame = trackFrame->getCvFrame();
         auto trackletsData = track->tracklets;
         for(auto& t : trackletsData) {
             auto roi = t.roi.denormalize(trackerFrame.cols, trackerFrame.rows);
             int x1 = roi.topLeft().x;
             int y1 = roi.topLeft().y;
             int x2 = roi.bottomRight().x;
             int y2 = roi.bottomRight().y;
  
             uint32_t labelIndex = t.label;
             std::string labelStr = to_string(labelIndex);
             if(labelIndex < labelMap.size()) {
                 labelStr = labelMap[labelIndex];
             }
             cv::putText(trackerFrame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
  
             std::stringstream idStr;
             idStr << "ID: " << t.id;
             cv::putText(trackerFrame, idStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
             std::stringstream statusStr;
             statusStr << "Status: " << t.status;
             cv::putText(trackerFrame, statusStr.str(), cv::Point(x1 + 10, y1 + 60), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
  
             cv::rectangle(trackerFrame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
         }
  
         std::stringstream fpsStr;
         fpsStr << "NN fps:" << std::fixed << std::setprecision(2) << fps;
         cv::putText(trackerFrame, fpsStr.str(), cv::Point(2, trackFrame->getHeight() - 4), cv::FONT_HERSHEY_TRIPLEX, 0.4, color);
  
         cv::imshow("tracker", trackerFrame);
  
         int key = cv::waitKey(1);
         if(key == 'q' || key == 'Q') {
             return 0;
         }
     }
     return 0;
 }