00001 import numpy as np
00002 import os
00003 import cv2
00004 import random
00005 from PIL import Image
00006 from bbox.bbox_transform import clip_boxes
00007
00008
00009
00010 def get_image(roidb, config):
00011 """
00012 preprocess image and return processed roidb
00013 :param roidb: a list of roidb
00014 :return: list of img as in mxnet format
00015 roidb add new item['im_info']
00016 0 --- x (width, second dim of im)
00017 |
00018 y (height, first dim of im)
00019 """
00020 num_images = len(roidb)
00021 processed_ims = []
00022 processed_roidb = []
00023 for i in range(num_images):
00024 roi_rec = roidb[i]
00025 assert os.path.exists(roi_rec['image']), '%s does not exist'.format(roi_rec['image'])
00026 im = cv2.imread(roi_rec['image'], cv2.IMREAD_COLOR|cv2.IMREAD_IGNORE_ORIENTATION)
00027 if roidb[i]['flipped']:
00028 im = im[:, ::-1, :]
00029 new_rec = roi_rec.copy()
00030 scale_ind = random.randrange(len(config.SCALES))
00031 target_size = config.SCALES[scale_ind][0]
00032 max_size = config.SCALES[scale_ind][1]
00033 im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
00034 im_tensor = transform(im, config.network.PIXEL_MEANS)
00035 processed_ims.append(im_tensor)
00036 im_info = [im_tensor.shape[2], im_tensor.shape[3], im_scale]
00037 new_rec['boxes'] = clip_boxes(np.round(roi_rec['boxes'].copy() * im_scale), im_info[:2])
00038 new_rec['im_info'] = im_info
00039 processed_roidb.append(new_rec)
00040 return processed_ims, processed_roidb
00041
00042
00043 def get_segmentation_image(segdb, config):
00044 """
00045 propocess image and return segdb
00046 :param segdb: a list of segdb
00047 :return: list of img as mxnet format
00048 """
00049 num_images = len(segdb)
00050 assert num_images > 0, 'No images'
00051 processed_ims = []
00052 processed_segdb = []
00053 processed_seg_cls_gt = []
00054 for i in range(num_images):
00055 seg_rec = segdb[i]
00056 assert os.path.exists(seg_rec['image']), '%s does not exist'.format(seg_rec['image'])
00057 im = np.array(cv2.imread(seg_rec['image']))
00058
00059 new_rec = seg_rec.copy()
00060
00061 scale_ind = random.randrange(len(config.SCALES))
00062 target_size = config.SCALES[scale_ind][0]
00063 max_size = config.SCALES[scale_ind][1]
00064 im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
00065 im_tensor = transform(im, config.network.PIXEL_MEANS)
00066 im_info = [im_tensor.shape[2], im_tensor.shape[3], im_scale]
00067 new_rec['im_info'] = im_info
00068
00069 seg_cls_gt = np.array(Image.open(seg_rec['seg_cls_path']))
00070 seg_cls_gt, seg_cls_gt_scale = resize(
00071 seg_cls_gt, target_size, max_size, stride=config.network.IMAGE_STRIDE, interpolation=cv2.INTER_NEAREST)
00072 seg_cls_gt_tensor = transform_seg_gt(seg_cls_gt)
00073
00074 processed_ims.append(im_tensor)
00075 processed_segdb.append(new_rec)
00076 processed_seg_cls_gt.append(seg_cls_gt_tensor)
00077
00078 return processed_ims, processed_seg_cls_gt, processed_segdb
00079
00080 def resize(im, target_size, max_size, stride=0, interpolation = cv2.INTER_LINEAR):
00081 """
00082 only resize input image to target size and return scale
00083 :param im: BGR image input by opencv
00084 :param target_size: one dimensional size (the short side)
00085 :param max_size: one dimensional max size (the long side)
00086 :param stride: if given, pad the image to designated stride
00087 :param interpolation: if given, using given interpolation method to resize image
00088 :return:
00089 """
00090 im_shape = im.shape
00091 im_size_min = np.min(im_shape[0:2])
00092 im_size_max = np.max(im_shape[0:2])
00093 im_scale = float(target_size) / float(im_size_min)
00094
00095 if np.round(im_scale * im_size_max) > max_size:
00096 im_scale = float(max_size) / float(im_size_max)
00097 im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=interpolation)
00098
00099 if stride == 0:
00100 return im, im_scale
00101 else:
00102
00103 im_height = int(np.ceil(im.shape[0] / float(stride)) * stride)
00104 im_width = int(np.ceil(im.shape[1] / float(stride)) * stride)
00105 im_channel = im.shape[2]
00106 padded_im = np.zeros((im_height, im_width, im_channel))
00107 padded_im[:im.shape[0], :im.shape[1], :] = im
00108 return padded_im, im_scale
00109
00110 def transform(im, pixel_means):
00111 """
00112 transform into mxnet tensor
00113 substract pixel size and transform to correct format
00114 :param im: [height, width, channel] in BGR
00115 :param pixel_means: [B, G, R pixel means]
00116 :return: [batch, channel, height, width]
00117 """
00118 im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1]))
00119 for i in range(3):
00120 im_tensor[0, i, :, :] = im[:, :, 2 - i] - pixel_means[2 - i]
00121 return im_tensor
00122
00123 def transform_seg_gt(gt):
00124 """
00125 transform segmentation gt image into mxnet tensor
00126 :param gt: [height, width, channel = 1]
00127 :return: [batch, channel = 1, height, width]
00128 """
00129 gt_tensor = np.zeros((1, 1, gt.shape[0], gt.shape[1]))
00130 gt_tensor[0, 0, :, :] = gt[:, :]
00131
00132 return gt_tensor
00133
00134 def transform_inverse(im_tensor, pixel_means):
00135 """
00136 transform from mxnet im_tensor to ordinary RGB image
00137 im_tensor is limited to one image
00138 :param im_tensor: [batch, channel, height, width]
00139 :param pixel_means: [B, G, R pixel means]
00140 :return: im [height, width, channel(RGB)]
00141 """
00142 assert im_tensor.shape[0] == 1
00143 im_tensor = im_tensor.copy()
00144
00145 channel_swap = (0, 2, 3, 1)
00146 im_tensor = im_tensor.transpose(channel_swap)
00147 im = im_tensor[0]
00148 assert im.shape[2] == 3
00149 im += pixel_means[[2, 1, 0]]
00150 im = im.astype(np.uint8)
00151 return im
00152
00153 def tensor_vstack(tensor_list, pad=0):
00154 """
00155 vertically stack tensors
00156 :param tensor_list: list of tensor to be stacked vertically
00157 :param pad: label to pad with
00158 :return: tensor with max shape
00159 """
00160 ndim = len(tensor_list[0].shape)
00161 dtype = tensor_list[0].dtype
00162 islice = tensor_list[0].shape[0]
00163 dimensions = []
00164 first_dim = sum([tensor.shape[0] for tensor in tensor_list])
00165 dimensions.append(first_dim)
00166 for dim in range(1, ndim):
00167 dimensions.append(max([tensor.shape[dim] for tensor in tensor_list]))
00168 if pad == 0:
00169 all_tensor = np.zeros(tuple(dimensions), dtype=dtype)
00170 elif pad == 1:
00171 all_tensor = np.ones(tuple(dimensions), dtype=dtype)
00172 else:
00173 all_tensor = np.full(tuple(dimensions), pad, dtype=dtype)
00174 if ndim == 1:
00175 for ind, tensor in enumerate(tensor_list):
00176 all_tensor[ind*islice:(ind+1)*islice] = tensor
00177 elif ndim == 2:
00178 for ind, tensor in enumerate(tensor_list):
00179 all_tensor[ind*islice:(ind+1)*islice, :tensor.shape[1]] = tensor
00180 elif ndim == 3:
00181 for ind, tensor in enumerate(tensor_list):
00182 all_tensor[ind*islice:(ind+1)*islice, :tensor.shape[1], :tensor.shape[2]] = tensor
00183 elif ndim == 4:
00184 for ind, tensor in enumerate(tensor_list):
00185 all_tensor[ind*islice:(ind+1)*islice, :tensor.shape[1], :tensor.shape[2], :tensor.shape[3]] = tensor
00186 else:
00187 raise Exception('Sorry, unimplemented.')
00188 return all_tensor