jsk_perception: image_viewer.py Source File

Go to the documentation of this file.
00001 # vim: expandtab:ts=4:sw=4
00002 """
00003 This module contains an image viewer and drawing routines based on OpenCV.
00004 """
00005 import numpy as np
00006 import cv2
00007 import time
00008 
00009 
00010 def is_in_bounds(mat, roi):
00011     """Check if ROI is fully contained in the image.
00012 
00013     Parameters
00014     ----------
00015     mat : ndarray
00016         An ndarray of ndim>=2.
00017     roi : (int, int, int, int)
00018         Region of interest (x, y, width, height) where (x, y) is the top-left
00019         corner.
00020 
00021     Returns
00022     -------
00023     bool
00024         Returns true if the ROI is contain in mat.
00025 
00026     """
00027     if roi[0] < 0 or roi[0] + roi[2] >= mat.shape[1]:
00028         return False
00029     if roi[1] < 0 or roi[1] + roi[3] >= mat.shape[0]:
00030         return False
00031     return True
00032 
00033 
00034 def view_roi(mat, roi):
00035     """Get sub-array.
00036 
00037     The ROI must be valid, i.e., fully contained in the image.
00038 
00039     Parameters
00040     ----------
00041     mat : ndarray
00042         An ndarray of ndim=2 or ndim=3.
00043     roi : (int, int, int, int)
00044         Region of interest (x, y, width, height) where (x, y) is the top-left
00045         corner.
00046 
00047     Returns
00048     -------
00049     ndarray
00050         A view of the roi.
00051 
00052     """
00053     sx, ex = roi[0], roi[0] + roi[2]
00054     sy, ey = roi[1], roi[1] + roi[3]
00055     if mat.ndim == 2:
00056         return mat[sy:ey, sx:ex]
00057     else:
00058         return mat[sy:ey, sx:ex, :]
00059 
00060 
00061 class ImageViewer(object):
00062     """An image viewer with drawing routines and video capture capabilities.
00063 
00064     Key Bindings:
00065 
00066     * 'SPACE' : pause
00067     * 'ESC' : quit
00068 
00069     Parameters
00070     ----------
00071     update_ms : int
00072         Number of milliseconds between frames (1000 / frames per second).
00073     window_shape : (int, int)
00074         Shape of the window (width, height).
00075     caption : Optional[str]
00076         Title of the window.
00077 
00078     Attributes
00079     ----------
00080     image : ndarray
00081         Color image of shape (height, width, 3). You may directly manipulate
00082         this image to change the view. Otherwise, you may call any of the
00083         drawing routines of this class. Internally, the image is treated as
00084         beeing in BGR color space.
00085 
00086         Note that the image is resized to the the image viewers window_shape
00087         just prior to visualization. Therefore, you may pass differently sized
00088         images and call drawing routines with the appropriate, original point
00089         coordinates.
00090     color : (int, int, int)
00091         Current BGR color code that applies to all drawing routines.
00092         Values are in range [0-255].
00093     text_color : (int, int, int)
00094         Current BGR text color code that applies to all text rendering
00095         routines. Values are in range [0-255].
00096     thickness : int
00097         Stroke width in pixels that applies to all drawing routines.
00098 
00099     """
00100 
00101     def __init__(self, update_ms, window_shape=(640, 480), caption="Figure 1"):
00102         self._window_shape = window_shape
00103         self._caption = caption
00104         self._update_ms = update_ms
00105         self._video_writer = None
00106         self._user_fun = lambda: None
00107         self._terminate = False
00108 
00109         self.image = np.zeros(self._window_shape + (3, ), dtype=np.uint8)
00110         self._color = (0, 0, 0)
00111         self.text_color = (255, 255, 255)
00112         self.thickness = 1
00113 
00114     @property
00115     def color(self):
00116         return self._color
00117 
00118     @color.setter
00119     def color(self, value):
00120         if len(value) != 3:
00121             raise ValueError("color must be tuple of 3")
00122         self._color = tuple(int(c) for c in value)
00123 
00124     def rectangle(self, x, y, w, h, label=None):
00125         """Draw a rectangle.
00126 
00127         Parameters
00128         ----------
00129         x : float | int
00130             Top left corner of the rectangle (x-axis).
00131         y : float | int
00132             Top let corner of the rectangle (y-axis).
00133         w : float | int
00134             Width of the rectangle.
00135         h : float | int
00136             Height of the rectangle.
00137         label : Optional[str]
00138             A text label that is placed at the top left corner of the
00139             rectangle.
00140 
00141         """
00142         pt1 = int(x), int(y)
00143         pt2 = int(x + w), int(y + h)
00144         cv2.rectangle(self.image, pt1, pt2, self._color, self.thickness)
00145         if label is not None:
00146             text_size = cv2.getTextSize(
00147                 label, cv2.FONT_HERSHEY_PLAIN, 1, self.thickness)
00148 
00149             center = pt1[0] + 5, pt1[1] + 5 + text_size[0][1]
00150             pt2 = pt1[0] + 10 + text_size[0][0], pt1[1] + 10 + \
00151                 text_size[0][1]
00152             cv2.rectangle(self.image, pt1, pt2, self._color, -1)
00153             cv2.putText(self.image, label, center, cv2.FONT_HERSHEY_PLAIN,
00154                         1, (255, 255, 255), self.thickness)
00155 
00156     def circle(self, x, y, radius, label=None):
00157         """Draw a circle.
00158 
00159         Parameters
00160         ----------
00161         x : float | int
00162             Center of the circle (x-axis).
00163         y : float | int
00164             Center of the circle (y-axis).
00165         radius : float | int
00166             Radius of the circle in pixels.
00167         label : Optional[str]
00168             A text label that is placed at the center of the circle.
00169 
00170         """
00171         image_size = int(radius + self.thickness + 1.5)  # actually half size
00172         roi = int(x - image_size), int(y - image_size), \
00173             int(2 * image_size), int(2 * image_size)
00174         if not is_in_bounds(self.image, roi):
00175             return
00176 
00177         image = view_roi(self.image, roi)
00178         center = image.shape[1] // 2, image.shape[0] // 2
00179         cv2.circle(
00180             image, center, int(radius + .5), self._color, self.thickness)
00181         if label is not None:
00182             cv2.putText(
00183                 self.image, label, center, cv2.FONT_HERSHEY_PLAIN,
00184                 2, self.text_color, 2)
00185 
00186     def gaussian(self, mean, covariance, label=None):
00187         """Draw 95% confidence ellipse of a 2-D Gaussian distribution.
00188 
00189         Parameters
00190         ----------
00191         mean : array_like
00192             The mean vector of the Gaussian distribution (ndim=1).
00193         covariance : array_like
00194             The 2x2 covariance matrix of the Gaussian distribution.
00195         label : Optional[str]
00196             A text label that is placed at the center of the ellipse.
00197 
00198         """
00199         # chi2inv(0.95, 2) = 5.9915
00200         vals, vecs = np.linalg.eigh(5.9915 * covariance)
00201         indices = vals.argsort()[::-1]
00202         vals, vecs = np.sqrt(vals[indices]), vecs[:, indices]
00203 
00204         center = int(mean[0] + .5), int(mean[1] + .5)
00205         axes = int(vals[0] + .5), int(vals[1] + .5)
00206         angle = int(180. * np.arctan2(vecs[1, 0], vecs[0, 0]) / np.pi)
00207         cv2.ellipse(
00208             self.image, center, axes, angle, 0, 360, self._color, 2)
00209         if label is not None:
00210             cv2.putText(self.image, label, center, cv2.FONT_HERSHEY_PLAIN,
00211                         2, self.text_color, 2)
00212 
00213     def annotate(self, x, y, text):
00214         """Draws a text string at a given location.
00215 
00216         Parameters
00217         ----------
00218         x : int | float
00219             Bottom-left corner of the text in the image (x-axis).
00220         y : int | float
00221             Bottom-left corner of the text in the image (y-axis).
00222         text : str
00223             The text to be drawn.
00224 
00225         """
00226         cv2.putText(self.image, text, (int(x), int(y)), cv2.FONT_HERSHEY_PLAIN,
00227                     2, self.text_color, 2)
00228 
00229     def colored_points(self, points, colors=None, skip_index_check=False):
00230         """Draw a collection of points.
00231 
00232         The point size is fixed to 1.
00233 
00234         Parameters
00235         ----------
00236         points : ndarray
00237             The Nx2 array of image locations, where the first dimension is
00238             the x-coordinate and the second dimension is the y-coordinate.
00239         colors : Optional[ndarray]
00240             The Nx3 array of colors (dtype=np.uint8). If None, the current
00241             color attribute is used.
00242         skip_index_check : Optional[bool]
00243             If True, index range checks are skipped. This is faster, but
00244             requires all points to lie within the image dimensions.
00245 
00246         """
00247         if not skip_index_check:
00248             cond1, cond2 = points[:, 0] >= 0, points[:, 0] < 480
00249             cond3, cond4 = points[:, 1] >= 0, points[:, 1] < 640
00250             indices = np.logical_and.reduce((cond1, cond2, cond3, cond4))
00251             points = points[indices, :]
00252         if colors is None:
00253             colors = np.repeat(
00254                 self._color, len(points)).reshape(3, len(points)).T
00255         indices = (points + .5).astype(np.int)
00256         self.image[indices[:, 1], indices[:, 0], :] = colors
00257 
00258     def enable_videowriter(self, output_filename, fourcc_string="MJPG",
00259                            fps=None):
00260         """ Write images to video file.
00261 
00262         Parameters
00263         ----------
00264         output_filename : str
00265             Output filename.
00266         fourcc_string : str
00267             The OpenCV FOURCC code that defines the video codec (check OpenCV
00268             documentation for more information).
00269         fps : Optional[float]
00270             Frames per second. If None, configured according to current
00271             parameters.
00272 
00273         """
00274         fourcc = cv2.VideoWriter_fourcc(*fourcc_string)
00275         if fps is None:
00276             fps = int(1000. / self._update_ms)
00277         self._video_writer = cv2.VideoWriter(
00278             output_filename, fourcc, fps, self._window_shape)
00279 
00280     def disable_videowriter(self):
00281         """ Disable writing videos.
00282         """
00283         self._video_writer = None
00284 
00285     def run(self, update_fun=None):
00286         """Start the image viewer.
00287 
00288         This method blocks until the user requests to close the window.
00289 
00290         Parameters
00291         ----------
00292         update_fun : Optional[Callable[] -> None]
00293             An optional callable that is invoked at each frame. May be used
00294             to play an animation/a video sequence.
00295 
00296         """
00297         if update_fun is not None:
00298             self._user_fun = update_fun
00299 
00300         self._terminate, is_paused = False, False
00301         # print("ImageViewer is paused, press space to start.")
00302         while not self._terminate:
00303             t0 = time.time()
00304             if not is_paused:
00305                 self._terminate = not self._user_fun()
00306                 if self._video_writer is not None:
00307                     self._video_writer.write(
00308                         cv2.resize(self.image, self._window_shape))
00309             t1 = time.time()
00310             remaining_time = max(1, int(self._update_ms - 1e3*(t1-t0)))
00311             cv2.imshow(
00312                 self._caption, cv2.resize(self.image, self._window_shape[:2]))
00313             key = cv2.waitKey(remaining_time)
00314             if key & 255 == 27:  # ESC
00315                 print("terminating")
00316                 self._terminate = True
00317             elif key & 255 == 32:  # ' '
00318                 print("toggeling pause: " + str(not is_paused))
00319                 is_paused = not is_paused
00320             elif key & 255 == 115:  # 's'
00321                 print("stepping")
00322                 self._terminate = not self._user_fun()
00323                 is_paused = True
00324 
00325         # Due to a bug in OpenCV we must call imshow after destroying the
00326         # window. This will make the window appear again as soon as waitKey
00327         # is called.
00328         #
00329         # see https://github.com/Itseez/opencv/issues/4535
00330         self.image[:] = 0
00331         cv2.destroyWindow(self._caption)
00332         cv2.waitKey(1)
00333         cv2.imshow(self._caption, self.image)
00334 
00335     def stop(self):
00336         """Stop the control loop.
00337 
00338         After calling this method, the viewer will stop execution before the
00339         next frame and hand over control flow to the user.
00340 
00341         Parameters
00342         ----------
00343 
00344         """
00345         self._terminate = True