00001 import numpy as np
00002 import cv2
00003
00004
00005 def resize(im, target_size, max_size):
00006 """
00007 only resize input image to target size and return scale
00008 :param im: BGR image input by opencv
00009 :param target_size: one dimensional size (the short side)
00010 :param max_size: one dimensional max size (the long side)
00011 :return:
00012 """
00013 im_shape = im.shape
00014 im_size_min = np.min(im_shape[0:2])
00015 im_size_max = np.max(im_shape[0:2])
00016 im_scale = float(target_size) / float(im_size_min)
00017
00018 if np.round(im_scale * im_size_max) > max_size:
00019 im_scale = float(max_size) / float(im_size_max)
00020 im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
00021 return im, im_scale
00022
00023
00024 def transform(im, pixel_means, need_mean=False):
00025 """
00026 transform into mxnet tensor
00027 subtract pixel size and transform to correct format
00028 :param im: [height, width, channel] in BGR
00029 :param pixel_means: [[[R, G, B pixel means]]]
00030 :return: [batch, channel, height, width]
00031 """
00032 assert False, "shouldn't reach here."
00033 im = im.copy()
00034 im[:, :, (0, 1, 2)] = im[:, :, (2, 1, 0)]
00035 im = im.astype(float)
00036 if need_mean:
00037 im -= pixel_means
00038 im_tensor = im[np.newaxis, :]
00039
00040 channel_swap = (0, 3, 1, 2)
00041 im_tensor = im_tensor.transpose(channel_swap)
00042 return im_tensor
00043
00044
00045 def transform_inverse(im_tensor, pixel_means):
00046 """
00047 transform from mxnet im_tensor to ordinary RGB image
00048 im_tensor is limited to one image
00049 :param im_tensor: [batch, channel, height, width]
00050 :param pixel_means: [[[R, G, B pixel means]]]
00051 :return: im [height, width, channel(RGB)]
00052 """
00053 assert im_tensor.shape[0] == 1
00054 im_tensor = im_tensor.copy()
00055
00056 channel_swap = (0, 2, 3, 1)
00057 im_tensor = im_tensor.transpose(channel_swap)
00058 im = im_tensor[0]
00059 assert im.shape[2] == 3
00060 im += pixel_means
00061 im = im.astype(np.uint8)
00062 return im
00063
00064
00065 def tensor_vstack(tensor_list, pad=0):
00066 """
00067 vertically stack tensors
00068 :param tensor_list: list of tensor to be stacked vertically
00069 :param pad: label to pad with
00070 :return: tensor with max shape
00071 """
00072 ndim = len(tensor_list[0].shape)
00073 if ndim == 1:
00074 return np.hstack(tensor_list)
00075 dimensions = [0]
00076 for dim in range(1, ndim):
00077 dimensions.append(max([tensor.shape[dim] for tensor in tensor_list]))
00078 for ind, tensor in enumerate(tensor_list):
00079 pad_shape = [(0, 0)]
00080 for dim in range(1, ndim):
00081 pad_shape.append((0, dimensions[dim] - tensor.shape[dim]))
00082 tensor_list[ind] = np.lib.pad(tensor, pad_shape, 'constant', constant_values=pad)
00083 all_tensor = np.vstack(tensor_list)
00084 return all_tensor