00001
00002 from __future__ import division, print_function, absolute_import
00003
00004 import argparse
00005 import os
00006
00007 import cv2
00008 import numpy as np
00009
00010 from application_util import preprocessing
00011 from application_util import visualization
00012 from deep_sort import nn_matching
00013 from deep_sort.detection import Detection
00014 from deep_sort.tracker import Tracker
00015
00016
00017 def gather_sequence_info(sequence_dir, detection_file):
00018 """Gather sequence information, such as image filenames, detections,
00019 groundtruth (if available).
00020
00021 Parameters
00022 ----------
00023 sequence_dir : str
00024 Path to the MOTChallenge sequence directory.
00025 detection_file : str
00026 Path to the detection file.
00027
00028 Returns
00029 -------
00030 Dict
00031 A dictionary of the following sequence information:
00032
00033 * sequence_name: Name of the sequence
00034 * image_filenames: A dictionary that maps frame indices to image
00035 filenames.
00036 * detections: A numpy array of detections in MOTChallenge format.
00037 * groundtruth: A numpy array of ground truth in MOTChallenge format.
00038 * image_size: Image size (height, width).
00039 * min_frame_idx: Index of the first frame.
00040 * max_frame_idx: Index of the last frame.
00041
00042 """
00043 image_dir = os.path.join(sequence_dir, "img1")
00044 image_filenames = {
00045 int(os.path.splitext(f)[0]): os.path.join(image_dir, f)
00046 for f in os.listdir(image_dir)}
00047 groundtruth_file = os.path.join(sequence_dir, "gt/gt.txt")
00048
00049 detections = None
00050 if detection_file is not None:
00051 detections = np.load(detection_file)
00052 groundtruth = None
00053 if os.path.exists(groundtruth_file):
00054 groundtruth = np.loadtxt(groundtruth_file, delimiter=',')
00055
00056 if len(image_filenames) > 0:
00057 image = cv2.imread(next(iter(image_filenames.values())),
00058 cv2.IMREAD_GRAYSCALE)
00059 image_size = image.shape
00060 else:
00061 image_size = None
00062
00063 if len(image_filenames) > 0:
00064 min_frame_idx = min(image_filenames.keys())
00065 max_frame_idx = max(image_filenames.keys())
00066 else:
00067 min_frame_idx = int(detections[:, 0].min())
00068 max_frame_idx = int(detections[:, 0].max())
00069
00070 info_filename = os.path.join(sequence_dir, "seqinfo.ini")
00071 if os.path.exists(info_filename):
00072 with open(info_filename, "r") as f:
00073 line_splits = [l.split('=') for l in f.read().splitlines()[1:]]
00074 info_dict = dict(
00075 s for s in line_splits if isinstance(s, list) and len(s) == 2)
00076
00077 update_ms = 1000 / int(info_dict["frameRate"])
00078 else:
00079 update_ms = None
00080
00081 feature_dim = detections.shape[1] - 10 if detections is not None else 0
00082 seq_info = {
00083 "sequence_name": os.path.basename(sequence_dir),
00084 "image_filenames": image_filenames,
00085 "detections": detections,
00086 "groundtruth": groundtruth,
00087 "image_size": image_size,
00088 "min_frame_idx": min_frame_idx,
00089 "max_frame_idx": max_frame_idx,
00090 "feature_dim": feature_dim,
00091 "update_ms": update_ms
00092 }
00093 return seq_info
00094
00095
00096 def create_detections(detection_mat, frame_idx, min_height=0):
00097 """Create detections for given frame index from the raw detection matrix.
00098
00099 Parameters
00100 ----------
00101 detection_mat : ndarray
00102 Matrix of detections. The first 10 columns of the detection matrix are
00103 in the standard MOTChallenge detection format. In the remaining columns
00104 store the feature vector associated with each detection.
00105 frame_idx : int
00106 The frame index.
00107 min_height : Optional[int]
00108 A minimum detection bounding box height. Detections that are smaller
00109 than this value are disregarded.
00110
00111 Returns
00112 -------
00113 List[tracker.Detection]
00114 Returns detection responses at given frame index.
00115
00116 """
00117 frame_indices = detection_mat[:, 0].astype(np.int)
00118 mask = frame_indices == frame_idx
00119
00120 detection_list = []
00121 for row in detection_mat[mask]:
00122 bbox, confidence, feature = row[2:6], row[6], row[10:]
00123 if bbox[3] < min_height:
00124 continue
00125 detection_list.append(Detection(bbox, confidence, feature))
00126 return detection_list
00127
00128
00129 def run(sequence_dir, detection_file, output_file, min_confidence,
00130 nms_max_overlap, min_detection_height, max_cosine_distance,
00131 nn_budget, display):
00132 """Run multi-target tracker on a particular sequence.
00133
00134 Parameters
00135 ----------
00136 sequence_dir : str
00137 Path to the MOTChallenge sequence directory.
00138 detection_file : str
00139 Path to the detections file.
00140 output_file : str
00141 Path to the tracking output file. This file will contain the tracking
00142 results on completion.
00143 min_confidence : float
00144 Detection confidence threshold. Disregard all detections that have
00145 a confidence lower than this value.
00146 nms_max_overlap: float
00147 Maximum detection overlap (non-maxima suppression threshold).
00148 min_detection_height : int
00149 Detection height threshold. Disregard all detections that have
00150 a height lower than this value.
00151 max_cosine_distance : float
00152 Gating threshold for cosine distance metric (object appearance).
00153 nn_budget : Optional[int]
00154 Maximum size of the appearance descriptor gallery. If None, no budget
00155 is enforced.
00156 display : bool
00157 If True, show visualization of intermediate tracking results.
00158
00159 """
00160 seq_info = gather_sequence_info(sequence_dir, detection_file)
00161 metric = nn_matching.NearestNeighborDistanceMetric(
00162 "cosine", max_cosine_distance, nn_budget)
00163 tracker = Tracker(metric)
00164 results = []
00165
00166 def frame_callback(vis, frame_idx):
00167 print("Processing frame %05d" % frame_idx)
00168
00169
00170 detections = create_detections(
00171 seq_info["detections"], frame_idx, min_detection_height)
00172 detections = [d for d in detections if d.confidence >= min_confidence]
00173
00174
00175 boxes = np.array([d.tlwh for d in detections])
00176 scores = np.array([d.confidence for d in detections])
00177 indices = preprocessing.non_max_suppression(
00178 boxes, nms_max_overlap, scores)
00179 detections = [detections[i] for i in indices]
00180
00181
00182 tracker.predict()
00183 tracker.update(detections)
00184
00185
00186 if display:
00187 image = cv2.imread(
00188 seq_info["image_filenames"][frame_idx], cv2.IMREAD_COLOR)
00189 vis.set_image(image.copy())
00190 vis.draw_detections(detections)
00191 vis.draw_trackers(tracker.tracks)
00192
00193
00194 for track in tracker.tracks:
00195 if not track.is_confirmed() or track.time_since_update > 1:
00196 continue
00197 bbox = track.to_tlwh()
00198 results.append([
00199 frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3]])
00200
00201
00202 if display:
00203 visualizer = visualization.Visualization(seq_info, update_ms=5)
00204 else:
00205 visualizer = visualization.NoVisualization(seq_info)
00206 visualizer.run(frame_callback)
00207
00208
00209 f = open(output_file, 'w')
00210 for row in results:
00211 print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1' % (
00212 row[0], row[1], row[2], row[3], row[4], row[5]),file=f)
00213
00214
00215 def parse_args():
00216 """ Parse command line arguments.
00217 """
00218 parser = argparse.ArgumentParser(description="Deep SORT")
00219 parser.add_argument(
00220 "--sequence_dir", help="Path to MOTChallenge sequence directory",
00221 default=None, required=True)
00222 parser.add_argument(
00223 "--detection_file", help="Path to custom detections.", default=None,
00224 required=True)
00225 parser.add_argument(
00226 "--output_file", help="Path to the tracking output file. This file will"
00227 " contain the tracking results on completion.",
00228 default="/tmp/hypotheses.txt")
00229 parser.add_argument(
00230 "--min_confidence", help="Detection confidence threshold. Disregard "
00231 "all detections that have a confidence lower than this value.",
00232 default=0.8, type=float)
00233 parser.add_argument(
00234 "--min_detection_height", help="Threshold on the detection bounding "
00235 "box height. Detections with height smaller than this value are "
00236 "disregarded", default=0, type=int)
00237 parser.add_argument(
00238 "--nms_max_overlap", help="Non-maxima suppression threshold: Maximum "
00239 "detection overlap.", default=1.0, type=float)
00240 parser.add_argument(
00241 "--max_cosine_distance", help="Gating threshold for cosine distance "
00242 "metric (object appearance).", type=float, default=0.2)
00243 parser.add_argument(
00244 "--nn_budget", help="Maximum size of the appearance descriptors "
00245 "gallery. If None, no budget is enforced.", type=int, default=None)
00246 parser.add_argument(
00247 "--display", help="Show intermediate tracking results",
00248 default=True, type=bool)
00249 return parser.parse_args()
00250
00251
00252 if __name__ == "__main__":
00253 args = parse_args()
00254 run(
00255 args.sequence_dir, args.detection_file, args.output_file,
00256 args.min_confidence, args.nms_max_overlap, args.min_detection_height,
00257 args.max_cosine_distance, args.nn_budget, args.display)