jsk_recognition_utils: deep_sort

Go to the documentation of this file.
00001 import chainer
00002 import chainer.functions as F
00003 import chainer.links as L
00004 from chainer import initializers
00005 
00006 
00007 class BlockA(chainer.Chain):
00008 
00009     def __init__(self, in_size, out_size, stride=1, activation=F.elu,
00010                  is_first=False, pad=1, projection_pad=0, flag=False):
00011         super(BlockA, self).__init__()
00012         initialW = initializers.HeNormal()
00013 
00014         self.in_size = in_size
00015         self.out_size = out_size
00016         self.is_first = is_first
00017         self.activation = activation
00018         self.flag = flag
00019         with self.init_scope():
00020             if is_first is False:
00021                 self.bn0 = L.BatchNormalization(in_size)
00022             if flag is False:
00023                 self.conv1 = L.Convolution2D(
00024                     in_size, out_size, 3, stride, pad,
00025                     initialW=initialW, nobias=True)
00026             else:
00027                 self.conv1 = L.Convolution2D(
00028                     in_size, out_size, 3, stride, 0,
00029                     initialW=initialW, nobias=True)
00030             self.bn1 = L.BatchNormalization(out_size)
00031             self.conv2 = L.Convolution2D(
00032                 out_size, out_size, 3, 1,
00033                 pad, initialW=initialW, nobias=False)
00034             if in_size != out_size:
00035                 if out_size != 2 * in_size:
00036                     raise ValueError('out_size should be two 2 * in_size ',
00037                                      '{} != {}'.format(out_size, 2 * in_size))
00038                 self.projection = L.Convolution2D(
00039                     in_size, out_size,
00040                     1, stride, projection_pad, nobias=True)
00041 
00042     def __call__(self, x):
00043         batchsize, channel, height, width = x.shape
00044         if self.is_first:
00045             h = x
00046         else:
00047             h = self.activation(self.bn0(x))
00048         if self.flag:
00049             # for (top, bottom, left, right) padding
00050             #     (0, 1, 0, 1)
00051             h = F.concat([F.concat([
00052                 h,
00053                 self.xp.zeros((batchsize, channel, 1, width), 'f')], axis=2),
00054                 self.xp.zeros((
00055                     batchsize, channel, height + 1, 1), 'f')],
00056                 axis=3)
00057             h = self.activation(self.bn1(self.conv1(h)))
00058         else:
00059             h = self.activation(self.bn1(self.conv1(h)))
00060         h = F.dropout(h, ratio=0.6)
00061         h = self.conv2(h)
00062         if self.in_size != self.out_size:
00063             return self.projection(x) + h
00064         else:
00065             return x + h
00066 
00067 
00068 class DeepSortFeatureExtractor(chainer.Chain):
00069 
00070     def __init__(self):
00071         super(DeepSortFeatureExtractor, self).__init__()
00072 
00073         with self.init_scope():
00074             self.conv1_1 = L.Convolution2D(
00075                 3, 32, 3, 1, 1, nobias=True)
00076             self.bn1 = L.BatchNormalization(32)
00077             self.conv1_2 = L.Convolution2D(
00078                 32, 32, 3, 1, 1, nobias=True)
00079             self.bn2 = L.BatchNormalization(32)
00080             self.conv2_1 = BlockA(32, 32, stride=1, is_first=True)
00081             self.conv2_3 = BlockA(32, 32, stride=1)
00082             self.conv3_1 = BlockA(
00083                 32, 64, stride=2, pad=1, projection_pad=0)
00084             self.conv3_3 = BlockA(64, 64, stride=1)
00085             self.conv4_1 = BlockA(
00086                 64, 128, stride=2, pad=1, projection_pad=0, flag=True)
00087             self.conv4_3 = BlockA(128, 128, stride=1)
00088             self.fc1 = L.Linear(16384, 128, nobias=True)
00089             self.fc1_bn = L.BatchNormalization(128)
00090 
00091             self.ball = L.BatchNormalization(128)
00092             self.mean_vectors = chainer.Parameter(0, shape=[128, 1501])
00093             self.scale = chainer.Parameter(0, shape=[1501])
00094 
00095     def __call__(self, x):
00096         # x.shape == (batchsize, 3, 128, 64)
00097         batchsize = x.shape[0]
00098         h = F.elu(self.bn1(self.conv1_1(x)))
00099         h = F.elu(self.bn2(self.conv1_2(h)))
00100         h = F.max_pooling_2d(h, 3, 2, cover_all=False)
00101         h = self.conv2_1(h)
00102         h = self.conv2_3(h)
00103         h = self.conv3_1(h)
00104         h = self.conv3_3(h)
00105         h = self.conv4_1(h)
00106         h = self.conv4_3(h)
00107 
00108         h = h.reshape(batchsize, -1)
00109         h = F.dropout(h, ratio=0.6)
00110         h = F.elu(self.fc1_bn(self.fc1(h)))
00111 
00112         # Features in rows, normalize axis 1.
00113         weights = self.mean_vectors
00114         features = self.ball(h)
00115         features = F.normalize(features, eps=1e-8)
00116         scale = F.softplus(self.scale)
00117         normalized_weight = F.normalize(weights, axis=0, eps=1e-8)
00118         logits = F.tile(scale[None, ], (batchsize, 1)) * \
00119             F.matmul(features, normalized_weight)
00120         return logits