image.py
Go to the documentation of this file.
00001 import numpy as np
00002 import os
00003 import cv2
00004 import random
00005 from PIL import Image
00006 from bbox.bbox_transform import clip_boxes
00007 
00008 
00009 # TODO: This two functions should be merged with individual data loader
00010 def get_image(roidb, config):
00011     """
00012     preprocess image and return processed roidb
00013     :param roidb: a list of roidb
00014     :return: list of img as in mxnet format
00015     roidb add new item['im_info']
00016     0 --- x (width, second dim of im)
00017     |
00018     y (height, first dim of im)
00019     """
00020     num_images = len(roidb)
00021     processed_ims = []
00022     processed_roidb = []
00023     for i in range(num_images):
00024         roi_rec = roidb[i]
00025         assert os.path.exists(roi_rec['image']), '%s does not exist'.format(roi_rec['image'])
00026         im = cv2.imread(roi_rec['image'], cv2.IMREAD_COLOR|cv2.IMREAD_IGNORE_ORIENTATION)
00027         if roidb[i]['flipped']:
00028             im = im[:, ::-1, :]
00029         new_rec = roi_rec.copy()
00030         scale_ind = random.randrange(len(config.SCALES))
00031         target_size = config.SCALES[scale_ind][0]
00032         max_size = config.SCALES[scale_ind][1]
00033         im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
00034         im_tensor = transform(im, config.network.PIXEL_MEANS)
00035         processed_ims.append(im_tensor)
00036         im_info = [im_tensor.shape[2], im_tensor.shape[3], im_scale]
00037         new_rec['boxes'] = clip_boxes(np.round(roi_rec['boxes'].copy() * im_scale), im_info[:2])
00038         new_rec['im_info'] = im_info
00039         processed_roidb.append(new_rec)
00040     return processed_ims, processed_roidb
00041 
00042 
00043 def get_segmentation_image(segdb, config):
00044     """
00045     propocess image and return segdb
00046     :param segdb: a list of segdb
00047     :return: list of img as mxnet format
00048     """
00049     num_images = len(segdb)
00050     assert num_images > 0, 'No images'
00051     processed_ims = []
00052     processed_segdb = []
00053     processed_seg_cls_gt = []
00054     for i in range(num_images):
00055         seg_rec = segdb[i]
00056         assert os.path.exists(seg_rec['image']), '%s does not exist'.format(seg_rec['image'])
00057         im = np.array(cv2.imread(seg_rec['image']))
00058 
00059         new_rec = seg_rec.copy()
00060 
00061         scale_ind = random.randrange(len(config.SCALES))
00062         target_size = config.SCALES[scale_ind][0]
00063         max_size = config.SCALES[scale_ind][1]
00064         im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
00065         im_tensor = transform(im, config.network.PIXEL_MEANS)
00066         im_info = [im_tensor.shape[2], im_tensor.shape[3], im_scale]
00067         new_rec['im_info'] = im_info
00068 
00069         seg_cls_gt = np.array(Image.open(seg_rec['seg_cls_path']))
00070         seg_cls_gt, seg_cls_gt_scale = resize(
00071             seg_cls_gt, target_size, max_size, stride=config.network.IMAGE_STRIDE, interpolation=cv2.INTER_NEAREST)
00072         seg_cls_gt_tensor = transform_seg_gt(seg_cls_gt)
00073 
00074         processed_ims.append(im_tensor)
00075         processed_segdb.append(new_rec)
00076         processed_seg_cls_gt.append(seg_cls_gt_tensor)
00077 
00078     return processed_ims, processed_seg_cls_gt, processed_segdb
00079 
00080 def resize(im, target_size, max_size, stride=0, interpolation = cv2.INTER_LINEAR):
00081     """
00082     only resize input image to target size and return scale
00083     :param im: BGR image input by opencv
00084     :param target_size: one dimensional size (the short side)
00085     :param max_size: one dimensional max size (the long side)
00086     :param stride: if given, pad the image to designated stride
00087     :param interpolation: if given, using given interpolation method to resize image
00088     :return:
00089     """
00090     im_shape = im.shape
00091     im_size_min = np.min(im_shape[0:2])
00092     im_size_max = np.max(im_shape[0:2])
00093     im_scale = float(target_size) / float(im_size_min)
00094     # prevent bigger axis from being more than max_size:
00095     if np.round(im_scale * im_size_max) > max_size:
00096         im_scale = float(max_size) / float(im_size_max)
00097     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=interpolation)
00098 
00099     if stride == 0:
00100         return im, im_scale
00101     else:
00102         # pad to product of stride
00103         im_height = int(np.ceil(im.shape[0] / float(stride)) * stride)
00104         im_width = int(np.ceil(im.shape[1] / float(stride)) * stride)
00105         im_channel = im.shape[2]
00106         padded_im = np.zeros((im_height, im_width, im_channel))
00107         padded_im[:im.shape[0], :im.shape[1], :] = im
00108         return padded_im, im_scale
00109 
00110 def transform(im, pixel_means):
00111     """
00112     transform into mxnet tensor
00113     substract pixel size and transform to correct format
00114     :param im: [height, width, channel] in BGR
00115     :param pixel_means: [B, G, R pixel means]
00116     :return: [batch, channel, height, width]
00117     """
00118     im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1]))
00119     for i in range(3):
00120         im_tensor[0, i, :, :] = im[:, :, 2 - i] - pixel_means[2 - i]
00121     return im_tensor
00122 
00123 def transform_seg_gt(gt):
00124     """
00125     transform segmentation gt image into mxnet tensor
00126     :param gt: [height, width, channel = 1]
00127     :return: [batch, channel = 1, height, width]
00128     """
00129     gt_tensor = np.zeros((1, 1, gt.shape[0], gt.shape[1]))
00130     gt_tensor[0, 0, :, :] = gt[:, :]
00131 
00132     return gt_tensor
00133 
00134 def transform_inverse(im_tensor, pixel_means):
00135     """
00136     transform from mxnet im_tensor to ordinary RGB image
00137     im_tensor is limited to one image
00138     :param im_tensor: [batch, channel, height, width]
00139     :param pixel_means: [B, G, R pixel means]
00140     :return: im [height, width, channel(RGB)]
00141     """
00142     assert im_tensor.shape[0] == 1
00143     im_tensor = im_tensor.copy()
00144     # put channel back
00145     channel_swap = (0, 2, 3, 1)
00146     im_tensor = im_tensor.transpose(channel_swap)
00147     im = im_tensor[0]
00148     assert im.shape[2] == 3
00149     im += pixel_means[[2, 1, 0]]
00150     im = im.astype(np.uint8)
00151     return im
00152 
00153 def tensor_vstack(tensor_list, pad=0):
00154     """
00155     vertically stack tensors
00156     :param tensor_list: list of tensor to be stacked vertically
00157     :param pad: label to pad with
00158     :return: tensor with max shape
00159     """
00160     ndim = len(tensor_list[0].shape)
00161     dtype = tensor_list[0].dtype
00162     islice = tensor_list[0].shape[0]
00163     dimensions = []
00164     first_dim = sum([tensor.shape[0] for tensor in tensor_list])
00165     dimensions.append(first_dim)
00166     for dim in range(1, ndim):
00167         dimensions.append(max([tensor.shape[dim] for tensor in tensor_list]))
00168     if pad == 0:
00169         all_tensor = np.zeros(tuple(dimensions), dtype=dtype)
00170     elif pad == 1:
00171         all_tensor = np.ones(tuple(dimensions), dtype=dtype)
00172     else:
00173         all_tensor = np.full(tuple(dimensions), pad, dtype=dtype)
00174     if ndim == 1:
00175         for ind, tensor in enumerate(tensor_list):
00176             all_tensor[ind*islice:(ind+1)*islice] = tensor
00177     elif ndim == 2:
00178         for ind, tensor in enumerate(tensor_list):
00179             all_tensor[ind*islice:(ind+1)*islice, :tensor.shape[1]] = tensor
00180     elif ndim == 3:
00181         for ind, tensor in enumerate(tensor_list):
00182             all_tensor[ind*islice:(ind+1)*islice, :tensor.shape[1], :tensor.shape[2]] = tensor
00183     elif ndim == 4:
00184         for ind, tensor in enumerate(tensor_list):
00185             all_tensor[ind*islice:(ind+1)*islice, :tensor.shape[1], :tensor.shape[2], :tensor.shape[3]] = tensor
00186     else:
00187         raise Exception('Sorry, unimplemented.')
00188     return all_tensor


rail_object_detector
Author(s):
autogenerated on Sat Jun 8 2019 20:26:30