deep_sort_tracker.py
Go to the documentation of this file.
1 from __future__ import print_function
2 
3 import cv2
4 import numpy as np
5 import itertools, pkg_resources, sys
6 from distutils.version import LooseVersion
7 if LooseVersion(pkg_resources.get_distribution("chainer").version) >= LooseVersion('7.0.0') and \
8  sys.version_info.major == 2:
9  print('''Please install chainer < 7.0.0:
10 
11  sudo pip install chainer==6.7.0
12 
13 c.f https://github.com/jsk-ros-pkg/jsk_recognition/pull/2485
14 ''', file=sys.stderr)
15  sys.exit(1)
16 if [p for p in list(itertools.chain(*[pkg_resources.find_distributions(_) for _ in sys.path])) if "cupy-" in p.project_name ] == []:
17  print('''Please install CuPy
18 
19  sudo pip install cupy-cuda[your cuda version]
20 i.e.
21  sudo pip install cupy-cuda91
22 
23 ''', file=sys.stderr)
24  # sys.exit(1)
25 import chainer
26 
28  import DeepSortFeatureExtractor
29 
30 from vis_bboxes import vis_bboxes
31 import deep_sort
32 
33 
34 def extract_image_patch(image, bbox, patch_shape):
35  """Extract image patch from bounding box.
36  copied from
37  https://github.com/nwojke/deep_sort/blob/master/tools/generate_detections.py
38 
39  Parameters
40  ----------
41  image : ndarray
42  The full image.
43  bbox : array_like
44  The bounding box in format (x, y, width, height).
45  patch_shape : Optional[array_like]
46  This parameter can be used to enforce a desired patch shape
47  (height, width). First, the `bbox` is adapted to the aspect ratio
48  of the patch shape, then it is clipped at the image boundaries.
49  If None, the shape is computed from :arg:`bbox`.
50 
51  Returns
52  -------
53  ndarray | NoneType
54  An image patch showing the :arg:`bbox`, optionally reshaped to
55  :arg:`patch_shape`.
56  Returns None if the bounding box is empty or fully outside of the image
57  boundaries.
58 
59  """
60  bbox = np.array(bbox)
61  if patch_shape is not None:
62  # correct aspect ratio to patch shape
63  target_aspect = float(patch_shape[1]) / patch_shape[0]
64  new_width = target_aspect * bbox[3]
65  bbox[0] -= (new_width - bbox[2]) / 2
66  bbox[2] = new_width
67 
68  # convert to top left, bottom right
69  bbox[2:] += bbox[:2]
70  bbox = bbox.astype(np.int)
71 
72  # clip at image boundaries
73  bbox[:2] = np.maximum(0, bbox[:2])
74  bbox[2:] = np.minimum(np.asarray(image.shape[:2][::-1]) - 1, bbox[2:])
75  if np.any(bbox[:2] >= bbox[2:]):
76  return None
77  sx, sy, ex, ey = bbox
78  image = image[sy:ey, sx:ex]
79  image = cv2.resize(image, tuple(patch_shape[::-1]))
80  return image
81 
82 
83 def encoder(image_encoder):
84 
85  def _encoder(image, boxes):
86  image_shape = 128, 64, 3
87  image_patches = []
88  for box in boxes:
89  patch = extract_image_patch(
90  image, box, image_shape[:2])
91  if patch is None:
92  patch = np.random.uniform(
93  0., 255., image_shape).astype(np.uint8)
94  image_patches.append(patch)
95  image_patches = np.asarray(image_patches, 'f')
96  image_patches = image_patches.transpose(0, 3, 1, 2)
97  image_patches = image_encoder.xp.asarray(image_patches)
98  with chainer.using_config('train', False):
99  ret = image_encoder(image_patches)
100  return chainer.cuda.to_cpu(ret.data)
101 
102  return _encoder
103 
104 
106 
107  def __init__(self, gpu=-1,
108  pretrained_model=None,
109  nms_max_overlap=1.0,
110  max_cosine_distance=0.2,
111  budget=None):
112  self.max_cosine_distance = max_cosine_distance
113  self.nms_max_overlap = nms_max_overlap
114  self.budget = budget
115 
116  # feature extractor
117  self.gpu = gpu
118  self.extractor = DeepSortFeatureExtractor()
119  if pretrained_model is not None:
120  chainer.serializers.load_npz(
121  pretrained_model, self.extractor)
122  if self.gpu >= 0:
123  self.extractor = self.extractor.to_gpu()
124  self.encoder = encoder(self.extractor)
125 
126  # variables for tracking objects
127  self.n_tracked = 0 # number of tracked objects
129  self.tracker = None
131  self.reset()
132 
133  def reset(self):
134  self.track_id_to_object_id = {}
135  self.tracking_objects = {}
136  metric = deep_sort.deep_sort.nn_matching.NearestNeighborDistanceMetric(
137  'cosine',
138  matching_threshold=self.max_cosine_distance,
139  budget=self.budget)
140  self.tracker = deep_sort.deep_sort.tracker.Tracker(metric)
141 
142  def track(self, frame, bboxes, scores):
143  # run non-maximam suppression.
144  indices = deep_sort.application_util.preprocessing.non_max_suppression(
145  bboxes, self.nms_max_overlap, scores)
146  bboxes = bboxes[indices]
147  scores = scores[indices]
148 
149  # generate detections.
150  features = self.encoder(frame, np.array(bboxes))
151  n_bbox = len(bboxes)
152  detections = [
153  deep_sort.deep_sort.detection.Detection(
154  bboxes[i], scores[i], features[i]) for i in range(n_bbox)]
155 
156  # update tracker.
157  self.tracker.predict()
158  self.tracker.update(detections)
159 
160  for target_object in self.tracking_objects.values():
161  target_object['out_of_frame'] = True
162 
163  # store results
164  for track in self.tracker.tracks:
165  if not track.is_confirmed() or track.time_since_update > 1:
166  continue
167  bbox = track.to_tlwh()
168 
169  if track.track_id in self.track_id_to_object_id:
170  # update tracked object
171  target_object = self.tracking_objects[
172  self.track_id_to_object_id[track.track_id]]
173  target_object['out_of_frame'] = False
174  target_object['bbox'] = bbox
175  else:
176  # detected for the first time
177  object_id = self.n_tracked
178  self.n_tracked += 1
179  self.track_id_to_object_id[track.track_id] = object_id
180  self.tracking_objects[object_id] = dict(
181  out_of_frame=False,
182  bbox=bbox)
183 
184  def visualize(self, frame, bboxes):
185  vis_frame = frame.copy()
186  for x1, y1, w, h in bboxes:
187  x2, y2 = x1 + w, y1 + h
188  x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
189  cv2.rectangle(vis_frame,
190  (x1, y1), (x2, y2),
191  (255, 255, 255), 3)
192  labels, bboxes = [], []
193  for object_id, target_object in self.tracking_objects.items():
194  if target_object['out_of_frame']:
195  continue
196  labels.append(object_id)
197  bboxes.append(target_object['bbox'])
198  vis_bboxes(vis_frame, bboxes, labels)
199  return vis_frame
def vis_bboxes(img, bboxes, labels, font_scale=0.8, thickness=1, font_face=cv2.FONT_HERSHEY_SIMPLEX, text_color=(255, 255, 255), max_label_num=1024)
Definition: vis_bboxes.py:26
def __init__(self, gpu=-1, pretrained_model=None, nms_max_overlap=1.0, max_cosine_distance=0.2, budget=None)
def extract_image_patch(image, bbox, patch_shape)


jsk_perception
Author(s): Manabu Saito, Ryohei Ueda
autogenerated on Mon May 3 2021 03:03:27