00001
00002 import os
00003 import errno
00004 import argparse
00005 import numpy as np
00006 import cv2
00007 import tensorflow as tf
00008
00009
00010 def _run_in_batches(f, data_dict, out, batch_size):
00011 data_len = len(out)
00012 num_batches = int(data_len / batch_size)
00013
00014 s, e = 0, 0
00015 for i in range(num_batches):
00016 s, e = i * batch_size, (i + 1) * batch_size
00017 batch_data_dict = {k: v[s:e] for k, v in data_dict.items()}
00018 out[s:e] = f(batch_data_dict)
00019 if e < len(out):
00020 batch_data_dict = {k: v[e:] for k, v in data_dict.items()}
00021 out[e:] = f(batch_data_dict)
00022
00023
00024 def extract_image_patch(image, bbox, patch_shape):
00025 """Extract image patch from bounding box.
00026
00027 Parameters
00028 ----------
00029 image : ndarray
00030 The full image.
00031 bbox : array_like
00032 The bounding box in format (x, y, width, height).
00033 patch_shape : Optional[array_like]
00034 This parameter can be used to enforce a desired patch shape
00035 (height, width). First, the `bbox` is adapted to the aspect ratio
00036 of the patch shape, then it is clipped at the image boundaries.
00037 If None, the shape is computed from :arg:`bbox`.
00038
00039 Returns
00040 -------
00041 ndarray | NoneType
00042 An image patch showing the :arg:`bbox`, optionally reshaped to
00043 :arg:`patch_shape`.
00044 Returns None if the bounding box is empty or fully outside of the image
00045 boundaries.
00046
00047 """
00048 bbox = np.array(bbox)
00049 if patch_shape is not None:
00050
00051 target_aspect = float(patch_shape[1]) / patch_shape[0]
00052 new_width = target_aspect * bbox[3]
00053 bbox[0] -= (new_width - bbox[2]) / 2
00054 bbox[2] = new_width
00055
00056
00057 bbox[2:] += bbox[:2]
00058 bbox = bbox.astype(np.int)
00059
00060
00061 bbox[:2] = np.maximum(0, bbox[:2])
00062 bbox[2:] = np.minimum(np.asarray(image.shape[:2][::-1]) - 1, bbox[2:])
00063 if np.any(bbox[:2] >= bbox[2:]):
00064 return None
00065 sx, sy, ex, ey = bbox
00066 image = image[sy:ey, sx:ex]
00067 image = cv2.resize(image, tuple(patch_shape[::-1]))
00068 return image
00069
00070
00071 class ImageEncoder(object):
00072
00073 def __init__(self, checkpoint_filename, input_name="images",
00074 output_name="features"):
00075 self.session = tf.Session()
00076 with tf.gfile.GFile(checkpoint_filename, "rb") as file_handle:
00077 graph_def = tf.GraphDef()
00078 graph_def.ParseFromString(file_handle.read())
00079 tf.import_graph_def(graph_def, name="net")
00080 self.input_var = tf.get_default_graph().get_tensor_by_name(
00081 "net/%s:0" % input_name)
00082 self.output_var = tf.get_default_graph().get_tensor_by_name(
00083 "net/%s:0" % output_name)
00084
00085 assert len(self.output_var.get_shape()) == 2
00086 assert len(self.input_var.get_shape()) == 4
00087 self.feature_dim = self.output_var.get_shape().as_list()[-1]
00088 self.image_shape = self.input_var.get_shape().as_list()[1:]
00089
00090 def __call__(self, data_x, batch_size=32):
00091 out = np.zeros((len(data_x), self.feature_dim), np.float32)
00092 _run_in_batches(
00093 lambda x: self.session.run(self.output_var, feed_dict=x),
00094 {self.input_var: data_x}, out, batch_size)
00095 return out
00096
00097
00098 def create_box_encoder(model_filename, input_name="images",
00099 output_name="features", batch_size=32):
00100 image_encoder = ImageEncoder(model_filename, input_name, output_name)
00101 image_shape = image_encoder.image_shape
00102
00103 def encoder(image, boxes):
00104 image_patches = []
00105 for box in boxes:
00106 patch = extract_image_patch(image, box, image_shape[:2])
00107 if patch is None:
00108 print("WARNING: Failed to extract image patch: %s." % str(box))
00109 patch = np.random.uniform(
00110 0., 255., image_shape).astype(np.uint8)
00111 image_patches.append(patch)
00112 image_patches = np.asarray(image_patches)
00113 return image_encoder(image_patches, batch_size)
00114
00115 return encoder
00116
00117
00118 def generate_detections(encoder, mot_dir, output_dir, detection_dir=None):
00119 """Generate detections with features.
00120
00121 Parameters
00122 ----------
00123 encoder : Callable[image, ndarray] -> ndarray
00124 The encoder function takes as input a BGR color image and a matrix of
00125 bounding boxes in format `(x, y, w, h)` and returns a matrix of
00126 corresponding feature vectors.
00127 mot_dir : str
00128 Path to the MOTChallenge directory (can be either train or test).
00129 output_dir
00130 Path to the output directory. Will be created if it does not exist.
00131 detection_dir
00132 Path to custom detections. The directory structure should be the default
00133 MOTChallenge structure: `[sequence]/det/det.txt`. If None, uses the
00134 standard MOTChallenge detections.
00135
00136 """
00137 if detection_dir is None:
00138 detection_dir = mot_dir
00139 try:
00140 os.makedirs(output_dir)
00141 except OSError as exception:
00142 if exception.errno == errno.EEXIST and os.path.isdir(output_dir):
00143 pass
00144 else:
00145 raise ValueError(
00146 "Failed to created output directory '%s'" % output_dir)
00147
00148 for sequence in os.listdir(mot_dir):
00149 print("Processing %s" % sequence)
00150 sequence_dir = os.path.join(mot_dir, sequence)
00151
00152 image_dir = os.path.join(sequence_dir, "img1")
00153 image_filenames = {
00154 int(os.path.splitext(f)[0]): os.path.join(image_dir, f)
00155 for f in os.listdir(image_dir)}
00156
00157 detection_file = os.path.join(
00158 detection_dir, sequence, "det/det.txt")
00159 detections_in = np.loadtxt(detection_file, delimiter=',')
00160 detections_out = []
00161
00162 frame_indices = detections_in[:, 0].astype(np.int)
00163 min_frame_idx = frame_indices.astype(np.int).min()
00164 max_frame_idx = frame_indices.astype(np.int).max()
00165 for frame_idx in range(min_frame_idx, max_frame_idx + 1):
00166 print("Frame %05d/%05d" % (frame_idx, max_frame_idx))
00167 mask = frame_indices == frame_idx
00168 rows = detections_in[mask]
00169
00170 if frame_idx not in image_filenames:
00171 print("WARNING could not find image for frame %d" % frame_idx)
00172 continue
00173 bgr_image = cv2.imread(
00174 image_filenames[frame_idx], cv2.IMREAD_COLOR)
00175 features = encoder(bgr_image, rows[:, 2:6].copy())
00176 detections_out += [np.r_[(row, feature)] for row, feature
00177 in zip(rows, features)]
00178
00179 output_filename = os.path.join(output_dir, "%s.npy" % sequence)
00180 np.save(
00181 output_filename, np.asarray(detections_out), allow_pickle=False)
00182
00183
00184 def parse_args():
00185 """Parse command line arguments.
00186 """
00187 parser = argparse.ArgumentParser(description="Re-ID feature extractor")
00188 parser.add_argument(
00189 "--model",
00190 default="resources/networks/mars-small128.pb",
00191 help="Path to freezed inference graph protobuf.")
00192 parser.add_argument(
00193 "--mot_dir", help="Path to MOTChallenge directory (train or test)",
00194 required=True)
00195 parser.add_argument(
00196 "--detection_dir", help="Path to custom detections. Defaults to "
00197 "standard MOT detections Directory structure should be the default "
00198 "MOTChallenge structure: [sequence]/det/det.txt", default=None)
00199 parser.add_argument(
00200 "--output_dir", help="Output directory. Will be created if it does not"
00201 " exist.", default="detections")
00202 return parser.parse_args()
00203
00204
00205 def main():
00206 args = parse_args()
00207 encoder = create_box_encoder(args.model, batch_size=32)
00208 generate_detections(encoder, args.mot_dir, args.output_dir,
00209 args.detection_dir)
00210
00211
00212 if __name__ == "__main__":
00213 main()