jsk_recognition_utils: vgg_cnn_m

Go to the documentation of this file.
00001 import chainer
00002 from chainer import cuda
00003 import chainer.functions as F
00004 import chainer.links as L
00005 from chainer import Variable
00006 
00007 from roi_pooling_2d import roi_pooling_2d
00008 
00009 
00010 class VGG_CNN_M_1024(chainer.Chain):
00011 
00012     def __init__(self, n_class=21, bg_label=-1):
00013         super(VGG_CNN_M_1024, self).__init__(
00014             conv1=L.Convolution2D(3, 96, ksize=7, stride=2),
00015             conv2=L.Convolution2D(96, 256, ksize=5, stride=2, pad=1),
00016             conv3=L.Convolution2D(256, 512, ksize=3, stride=1, pad=1),
00017             conv4=L.Convolution2D(512, 512, ksize=3, stride=1, pad=1),
00018             conv5=L.Convolution2D(512, 512, ksize=3, stride=1, pad=1),
00019             fc6=L.Linear(18432, 4096),
00020             fc7=L.Linear(4096, 1024),
00021             cls_score=L.Linear(1024, n_class),
00022             bbox_pred=L.Linear(1024, 4 * n_class)
00023         )
00024         self.n_class = n_class
00025         self.bg_label = bg_label
00026 
00027     def __call__(self, x, rois, t=None, train=False):
00028         h = self.conv1(x)
00029         h = F.relu(h)
00030         h = F.local_response_normalization(h, n=5, k=2, alpha=5e-4, beta=.75)
00031         h = F.max_pooling_2d(h, ksize=3, stride=2)
00032 
00033         h = self.conv2(h)
00034         h = F.relu(h)
00035         h = F.local_response_normalization(h, n=5, k=2, alpha=5e-4, beta=.75)
00036         h = F.max_pooling_2d(h, ksize=3, stride=2)
00037 
00038         h = self.conv3(h)
00039         h = F.relu(h)
00040 
00041         h = self.conv4(h)
00042         h = F.relu(h)
00043 
00044         h = self.conv5(h)
00045         h = F.relu(h)
00046 
00047         h = roi_pooling_2d(h, rois, 6, 6, spatial_scale=0.0625)
00048 
00049         h = self.fc6(h)
00050         h = F.relu(h)
00051         h = F.dropout(h, train=train, ratio=.5)
00052 
00053         h = self.fc7(h)
00054         h = F.relu(h)
00055         h = F.dropout(h, train=train, ratio=.5)
00056 
00057         h_cls_score = self.cls_score(h)
00058         cls_score = F.softmax(h_cls_score)
00059         bbox_pred = self.bbox_pred(h)
00060 
00061         if t is None:
00062             assert train is False
00063             return cls_score, bbox_pred
00064 
00065         assert train
00066         t_cls, t_bbox = t
00067         self.cls_loss = F.softmax_cross_entropy(h_cls_score, t_cls)
00068         self.bbox_loss = F.smooth_l1_loss(bbox_pred, t_bbox)
00069 
00070         xp = cuda.get_array_module(x.data)
00071         lambda_ = (0.5 * (t_cls.data != self.bg_label)).astype(xp.float32)
00072         lambda_ = Variable(lambda_, volatile=not train)
00073         L = self.cls_loss + F.sum(lambda_ * self.bbox_loss)
00074         return L