rail_object_detector: image_processing.py Source File

Go to the documentation of this file.
00001 import numpy as np
00002 import cv2
00003 
00004 
00005 def resize(im, target_size, max_size):
00006     """
00007     only resize input image to target size and return scale
00008     :param im: BGR image input by opencv
00009     :param target_size: one dimensional size (the short side)
00010     :param max_size: one dimensional max size (the long side)
00011     :return:
00012     """
00013     im_shape = im.shape
00014     im_size_min = np.min(im_shape[0:2])
00015     im_size_max = np.max(im_shape[0:2])
00016     im_scale = float(target_size) / float(im_size_min)
00017     # prevent bigger axis from being more than max_size:
00018     if np.round(im_scale * im_size_max) > max_size:
00019         im_scale = float(max_size) / float(im_size_max)
00020     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
00021     return im, im_scale
00022 
00023 
00024 def transform(im, pixel_means, need_mean=False):
00025     """
00026     transform into mxnet tensor
00027     subtract pixel size and transform to correct format
00028     :param im: [height, width, channel] in BGR
00029     :param pixel_means: [[[R, G, B pixel means]]]
00030     :return: [batch, channel, height, width]
00031     """
00032     assert False, "shouldn't reach here."
00033     im = im.copy()
00034     im[:, :, (0, 1, 2)] = im[:, :, (2, 1, 0)]
00035     im = im.astype(float)
00036     if need_mean:
00037         im -= pixel_means
00038     im_tensor = im[np.newaxis, :]
00039     # put channel first
00040     channel_swap = (0, 3, 1, 2)
00041     im_tensor = im_tensor.transpose(channel_swap)
00042     return im_tensor
00043 
00044 
00045 def transform_inverse(im_tensor, pixel_means):
00046     """
00047     transform from mxnet im_tensor to ordinary RGB image
00048     im_tensor is limited to one image
00049     :param im_tensor: [batch, channel, height, width]
00050     :param pixel_means: [[[R, G, B pixel means]]]
00051     :return: im [height, width, channel(RGB)]
00052     """
00053     assert im_tensor.shape[0] == 1
00054     im_tensor = im_tensor.copy()
00055     # put channel back
00056     channel_swap = (0, 2, 3, 1)
00057     im_tensor = im_tensor.transpose(channel_swap)
00058     im = im_tensor[0]
00059     assert im.shape[2] == 3
00060     im += pixel_means
00061     im = im.astype(np.uint8)
00062     return im
00063 
00064 
00065 def tensor_vstack(tensor_list, pad=0):
00066     """
00067     vertically stack tensors
00068     :param tensor_list: list of tensor to be stacked vertically
00069     :param pad: label to pad with
00070     :return: tensor with max shape
00071     """
00072     ndim = len(tensor_list[0].shape)
00073     if ndim == 1:
00074         return np.hstack(tensor_list)
00075     dimensions = [0]
00076     for dim in range(1, ndim):
00077         dimensions.append(max([tensor.shape[dim] for tensor in tensor_list]))
00078     for ind, tensor in enumerate(tensor_list):
00079         pad_shape = [(0, 0)]
00080         for dim in range(1, ndim):
00081             pad_shape.append((0, dimensions[dim] - tensor.shape[dim]))
00082         tensor_list[ind] = np.lib.pad(tensor, pad_shape, 'constant', constant_values=pad)
00083     all_tensor = np.vstack(tensor_list)
00084     return all_tensor