jsk_perception: deep_sort_app.py Source File

Go to the documentation of this file.
00001 # vim: expandtab:ts=4:sw=4
00002 from __future__ import division, print_function, absolute_import
00003 
00004 import argparse
00005 import os
00006 
00007 import cv2
00008 import numpy as np
00009 
00010 from application_util import preprocessing
00011 from application_util import visualization
00012 from deep_sort import nn_matching
00013 from deep_sort.detection import Detection
00014 from deep_sort.tracker import Tracker
00015 
00016 
00017 def gather_sequence_info(sequence_dir, detection_file):
00018     """Gather sequence information, such as image filenames, detections,
00019     groundtruth (if available).
00020 
00021     Parameters
00022     ----------
00023     sequence_dir : str
00024         Path to the MOTChallenge sequence directory.
00025     detection_file : str
00026         Path to the detection file.
00027 
00028     Returns
00029     -------
00030     Dict
00031         A dictionary of the following sequence information:
00032 
00033         * sequence_name: Name of the sequence
00034         * image_filenames: A dictionary that maps frame indices to image
00035           filenames.
00036         * detections: A numpy array of detections in MOTChallenge format.
00037         * groundtruth: A numpy array of ground truth in MOTChallenge format.
00038         * image_size: Image size (height, width).
00039         * min_frame_idx: Index of the first frame.
00040         * max_frame_idx: Index of the last frame.
00041 
00042     """
00043     image_dir = os.path.join(sequence_dir, "img1")
00044     image_filenames = {
00045         int(os.path.splitext(f)[0]): os.path.join(image_dir, f)
00046         for f in os.listdir(image_dir)}
00047     groundtruth_file = os.path.join(sequence_dir, "gt/gt.txt")
00048 
00049     detections = None
00050     if detection_file is not None:
00051         detections = np.load(detection_file)
00052     groundtruth = None
00053     if os.path.exists(groundtruth_file):
00054         groundtruth = np.loadtxt(groundtruth_file, delimiter=',')
00055 
00056     if len(image_filenames) > 0:
00057         image = cv2.imread(next(iter(image_filenames.values())),
00058                            cv2.IMREAD_GRAYSCALE)
00059         image_size = image.shape
00060     else:
00061         image_size = None
00062 
00063     if len(image_filenames) > 0:
00064         min_frame_idx = min(image_filenames.keys())
00065         max_frame_idx = max(image_filenames.keys())
00066     else:
00067         min_frame_idx = int(detections[:, 0].min())
00068         max_frame_idx = int(detections[:, 0].max())
00069 
00070     info_filename = os.path.join(sequence_dir, "seqinfo.ini")
00071     if os.path.exists(info_filename):
00072         with open(info_filename, "r") as f:
00073             line_splits = [l.split('=') for l in f.read().splitlines()[1:]]
00074             info_dict = dict(
00075                 s for s in line_splits if isinstance(s, list) and len(s) == 2)
00076 
00077         update_ms = 1000 / int(info_dict["frameRate"])
00078     else:
00079         update_ms = None
00080 
00081     feature_dim = detections.shape[1] - 10 if detections is not None else 0
00082     seq_info = {
00083         "sequence_name": os.path.basename(sequence_dir),
00084         "image_filenames": image_filenames,
00085         "detections": detections,
00086         "groundtruth": groundtruth,
00087         "image_size": image_size,
00088         "min_frame_idx": min_frame_idx,
00089         "max_frame_idx": max_frame_idx,
00090         "feature_dim": feature_dim,
00091         "update_ms": update_ms
00092     }
00093     return seq_info
00094 
00095 
00096 def create_detections(detection_mat, frame_idx, min_height=0):
00097     """Create detections for given frame index from the raw detection matrix.
00098 
00099     Parameters
00100     ----------
00101     detection_mat : ndarray
00102         Matrix of detections. The first 10 columns of the detection matrix are
00103         in the standard MOTChallenge detection format. In the remaining columns
00104         store the feature vector associated with each detection.
00105     frame_idx : int
00106         The frame index.
00107     min_height : Optional[int]
00108         A minimum detection bounding box height. Detections that are smaller
00109         than this value are disregarded.
00110 
00111     Returns
00112     -------
00113     List[tracker.Detection]
00114         Returns detection responses at given frame index.
00115 
00116     """
00117     frame_indices = detection_mat[:, 0].astype(np.int)
00118     mask = frame_indices == frame_idx
00119 
00120     detection_list = []
00121     for row in detection_mat[mask]:
00122         bbox, confidence, feature = row[2:6], row[6], row[10:]
00123         if bbox[3] < min_height:
00124             continue
00125         detection_list.append(Detection(bbox, confidence, feature))
00126     return detection_list
00127 
00128 
00129 def run(sequence_dir, detection_file, output_file, min_confidence,
00130         nms_max_overlap, min_detection_height, max_cosine_distance,
00131         nn_budget, display):
00132     """Run multi-target tracker on a particular sequence.
00133 
00134     Parameters
00135     ----------
00136     sequence_dir : str
00137         Path to the MOTChallenge sequence directory.
00138     detection_file : str
00139         Path to the detections file.
00140     output_file : str
00141         Path to the tracking output file. This file will contain the tracking
00142         results on completion.
00143     min_confidence : float
00144         Detection confidence threshold. Disregard all detections that have
00145         a confidence lower than this value.
00146     nms_max_overlap: float
00147         Maximum detection overlap (non-maxima suppression threshold).
00148     min_detection_height : int
00149         Detection height threshold. Disregard all detections that have
00150         a height lower than this value.
00151     max_cosine_distance : float
00152         Gating threshold for cosine distance metric (object appearance).
00153     nn_budget : Optional[int]
00154         Maximum size of the appearance descriptor gallery. If None, no budget
00155         is enforced.
00156     display : bool
00157         If True, show visualization of intermediate tracking results.
00158 
00159     """
00160     seq_info = gather_sequence_info(sequence_dir, detection_file)
00161     metric = nn_matching.NearestNeighborDistanceMetric(
00162         "cosine", max_cosine_distance, nn_budget)
00163     tracker = Tracker(metric)
00164     results = []
00165 
00166     def frame_callback(vis, frame_idx):
00167         print("Processing frame %05d" % frame_idx)
00168 
00169         # Load image and generate detections.
00170         detections = create_detections(
00171             seq_info["detections"], frame_idx, min_detection_height)
00172         detections = [d for d in detections if d.confidence >= min_confidence]
00173 
00174         # Run non-maxima suppression.
00175         boxes = np.array([d.tlwh for d in detections])
00176         scores = np.array([d.confidence for d in detections])
00177         indices = preprocessing.non_max_suppression(
00178             boxes, nms_max_overlap, scores)
00179         detections = [detections[i] for i in indices]
00180 
00181         # Update tracker.
00182         tracker.predict()
00183         tracker.update(detections)
00184 
00185         # Update visualization.
00186         if display:
00187             image = cv2.imread(
00188                 seq_info["image_filenames"][frame_idx], cv2.IMREAD_COLOR)
00189             vis.set_image(image.copy())
00190             vis.draw_detections(detections)
00191             vis.draw_trackers(tracker.tracks)
00192 
00193         # Store results.
00194         for track in tracker.tracks:
00195             if not track.is_confirmed() or track.time_since_update > 1:
00196                 continue
00197             bbox = track.to_tlwh()
00198             results.append([
00199                 frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3]])
00200 
00201     # Run tracker.
00202     if display:
00203         visualizer = visualization.Visualization(seq_info, update_ms=5)
00204     else:
00205         visualizer = visualization.NoVisualization(seq_info)
00206     visualizer.run(frame_callback)
00207 
00208     # Store results.
00209     f = open(output_file, 'w')
00210     for row in results:
00211         print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1' % (
00212             row[0], row[1], row[2], row[3], row[4], row[5]),file=f)
00213 
00214 
00215 def parse_args():
00216     """ Parse command line arguments.
00217     """
00218     parser = argparse.ArgumentParser(description="Deep SORT")
00219     parser.add_argument(
00220         "--sequence_dir", help="Path to MOTChallenge sequence directory",
00221         default=None, required=True)
00222     parser.add_argument(
00223         "--detection_file", help="Path to custom detections.", default=None,
00224         required=True)
00225     parser.add_argument(
00226         "--output_file", help="Path to the tracking output file. This file will"
00227         " contain the tracking results on completion.",
00228         default="/tmp/hypotheses.txt")
00229     parser.add_argument(
00230         "--min_confidence", help="Detection confidence threshold. Disregard "
00231         "all detections that have a confidence lower than this value.",
00232         default=0.8, type=float)
00233     parser.add_argument(
00234         "--min_detection_height", help="Threshold on the detection bounding "
00235         "box height. Detections with height smaller than this value are "
00236         "disregarded", default=0, type=int)
00237     parser.add_argument(
00238         "--nms_max_overlap",  help="Non-maxima suppression threshold: Maximum "
00239         "detection overlap.", default=1.0, type=float)
00240     parser.add_argument(
00241         "--max_cosine_distance", help="Gating threshold for cosine distance "
00242         "metric (object appearance).", type=float, default=0.2)
00243     parser.add_argument(
00244         "--nn_budget", help="Maximum size of the appearance descriptors "
00245         "gallery. If None, no budget is enforced.", type=int, default=None)
00246     parser.add_argument(
00247         "--display", help="Show intermediate tracking results",
00248         default=True, type=bool)
00249     return parser.parse_args()
00250 
00251 
00252 if __name__ == "__main__":
00253     args = parse_args()
00254     run(
00255         args.sequence_dir, args.detection_file, args.output_file,
00256         args.min_confidence, args.nms_max_overlap, args.min_detection_height,
00257         args.max_cosine_distance, args.nn_budget, args.display)