pal_vision_segmentation: image_processing.cpp Source File

Go to the documentation of this file.
00001 /*
00002  * Software License Agreement (Modified BSD License)
00003  *
00004  *  Copyright (c) 2012, PAL Robotics, S.L.
00005  *  All rights reserved.
00006  *
00007  *  Redistribution and use in source and binary forms, with or without
00008  *  modification, are permitted provided that the following conditions
00009  *  are met:
00010  *
00011  *   * Redistributions of source code must retain the above copyright
00012  *     notice, this list of conditions and the following disclaimer.
00013  *   * Redistributions in binary form must reproduce the above
00014  *     copyright notice, this list of conditions and the following
00015  *     disclaimer in the documentation and/or other materials provided
00016  *     with the distribution.
00017  *   * Neither the name of PAL Robotics, S.L. nor the names of its
00018  *     contributors may be used to endorse or promote products derived
00019  *     from this software without specific prior written permission.
00020  *
00021  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00022  *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00023  *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00024  *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
00025  *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00026  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00027  *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00028  *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00029  *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00030  *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00031  *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00032  *  POSSIBILITY OF SUCH DAMAGE.
00033  */
00034 
00037 #include <pal_vision_segmentation/image_processing.h>
00038 #include <opencv2/imgproc/imgproc.hpp>
00039 #include <opencv2/imgproc/imgproc_c.h>
00040 #include <iostream>
00041 
00042 namespace pal_vision_util
00043 {
00044 
00045   void getConnectedComponents(const cv::Mat& binaryImage,
00046                               std::vector< std::vector<cv::Point2i> >& components,
00047                               std::vector< cv::Rect >& boundingBoxes)
00048   {
00049     components.clear();
00050 
00051     //make sure to use a binary image where background pixels are set to 0 and the remaining ones to 1
00052     cv::Mat tempBinaryImage;
00053     cv::threshold(binaryImage, tempBinaryImage, 1, 1, cv::THRESH_BINARY );
00054 
00055     // Fill the label_image with the blobs
00056     // 0  - background
00057     // 1  - unlabelled foreground
00058     // 2+ - labelled foreground
00059 
00060     cv::Mat label_image;
00061     tempBinaryImage.convertTo(label_image, CV_32FC1); // weird it doesn't support CV_32S!
00062 
00063     int label_count = 2; // starts at 2 because 0,1 are used already
00064 
00065     for(int y=0; y < binaryImage.rows; y++)
00066     {
00067       for(int x=0; x < binaryImage.cols; x++)
00068       {
00069         if ( static_cast<int>(label_image.at<float>(y,x)) != 1 )
00070         {
00071           continue;
00072         }
00073 
00074         cv::Rect rect;
00075         cv::floodFill(label_image, cv::Point(x,y), cv::Scalar(label_count), &rect, cv::Scalar(0), cv::Scalar(0), 4);
00076 
00077         std::vector <cv::Point2i> component;
00078 
00079         for(int i=rect.y; i < (rect.y+rect.height); i++)
00080         {
00081           for(int j=rect.x; j < (rect.x+rect.width); j++)
00082           {
00083             if ( static_cast<int>(label_image.at<float>(i,j)) != label_count )
00084             {
00085               continue;
00086             }
00087 
00088             component.push_back(cv::Point2i(j,i));
00089             boundingBoxes.push_back(rect);
00090           }
00091         }
00092 
00093         components.push_back(component);
00094 
00095         label_count++;
00096       }
00097     }
00098   }
00099 
00100   ImageRoi::ImageRoi()
00101   {
00102     clear();
00103   }
00104 
00105   ImageRoi::ImageRoi(const ImageRoi& roi)
00106   {
00107     x = roi.x;
00108     y = roi.y;
00109     width = roi.width;
00110     height = roi.height;
00111   }
00112 
00113   ImageRoi::ImageRoi(int left, int top, int w, int h): x(left), y(top), width(w), height(h)
00114   {
00115   }
00116 
00117   ImageRoi::ImageRoi(CvRect rect)
00118   {
00119     set(rect);
00120   }
00121 
00122   ImageRoi::~ImageRoi()
00123   {
00124   }
00125 
00126   void ImageRoi::set(CvRect rect)
00127   {
00128     x      = rect.x;
00129     y      = rect.y;
00130     width  = rect.width;
00131     height = rect.height;
00132   }
00133 
00134   void ImageRoi::clear()
00135   {
00136     x = 0;
00137     y = 0;
00138     width = 0;
00139     height = 0;
00140   }
00141 
00142   bool ImageRoi::empty() const
00143   {
00144     return x == 0 && y == 0 && width == 0 && height == 0;
00145   }
00146 
00147   CvRect ImageRoi::toCvRect() const
00148   {
00149     return cvRect(x, y, width, height);
00150   }
00151 
00156   void ImageRoi::keepInside(int imgWidth, int imgHeight)
00157   {
00158     if ( !empty() )
00159     {
00160       if ( x < 0 )
00161       {
00162         width += x;
00163         x = 0;
00164       }
00165       if ( y < 0 )
00166       {
00167         height += y;
00168         y = 0;
00169       }
00170       if ( x + width - 1 >= imgWidth )
00171       {
00172         width = - x + imgWidth;
00173       }
00174       if ( y + height - 1 >= imgHeight )
00175       {
00176         height = - y + imgHeight;
00177       }
00178     }
00179   }
00180 
00184   const ImageRoi& ImageRoi::resize(double factorHorizontal, double factorVertical)
00185   {
00186     int cx = x + width/2;
00187     int cy = y + height/2;
00188 
00189     int newWidth  = static_cast<int>( factorHorizontal * static_cast<double>(width) );
00190     int newHeight = static_cast<int>( factorVertical * static_cast<double>(height) );
00191 
00192     x = cx - newWidth/2;
00193     y = cy - newHeight/2;
00194     width = newWidth;
00195     height = newHeight;
00196 
00197     return *this;
00198   }
00199 
00200   Image::Image()
00201   {
00202     _img = NULL;
00203     _img = cvCreateImage(cvSize(10, 10), 8, 3);
00204     _releaseOnDestroyer = true;
00205   }
00206 
00207   Image::Image(IplImage *img)
00208   {
00209     _img = img;
00210     _releaseOnDestroyer = true;
00211   }
00212 
00213   Image::Image(const cv::Mat& mat)
00214   {
00215     _iplFromMat = mat;
00216     _img = &_iplFromMat;
00217     _releaseOnDestroyer = false;
00218   }
00219 
00220   Image::~Image()
00221   {
00222     freeImage();
00223   }
00224 
00225   void Image::freeImage()
00226   {
00227     if ( _img != NULL && _releaseOnDestroyer )
00228     {
00229       cvReleaseImage(&_img);
00230       _img = NULL;
00231     }
00232   }
00233 
00234   void Image::loadImage(const std::string& fileName)
00235   {
00236 
00237   }
00238 
00239   void Image::set(IplImage *img)
00240   {
00241     freeImage();
00242     _img = img;
00243     _releaseOnDestroyer = true;
00244   }
00245 
00246   IplImage* Image::release()
00247   {
00248     IplImage *img = _img;
00249     _img = NULL;
00250     return img;
00251   }
00252 
00253   void Image::resizeImage(int width, int height, int channels, int depth)
00254   {
00255     freeImage();
00256     _img = cvCreateImage(cvSize(width, height), depth, channels);
00257   }
00258 
00259   unsigned char Image::getPixelB(int col, int row, int ch) const
00260   {
00261     unsigned char value = 0;
00262 
00263     value = *((unsigned char *)((unsigned long)_img->imageData + (unsigned long)ch + (unsigned long)((col)*_img->nChannels) + (unsigned long)((row)*_img->widthStep)));
00264 
00265     return value;
00266   }
00267 
00268   float Image::getPixelF(int col, int row, int ch) const
00269   {
00270     float value = 0;
00271 
00272     value = *((float *)((unsigned long)(_img->imageData) + (unsigned long)((ch)*sizeof(float)) + (unsigned long)((col)*_img->nChannels*sizeof(float)) + (unsigned long)((row)*_img->widthStep)));
00273 
00274     return value;
00275   }
00276 
00277   short Image::getPixelS(int col, int row, int ch) const
00278   {
00279     short value = 0;
00280 
00281     value = *((short *)((unsigned long)_img->imageData + (unsigned long)((ch)*sizeof(short)) + (unsigned long)((col)*_img->nChannels*sizeof(short)) + (unsigned long)((row)*_img->widthStep)));
00282 
00283     return value;
00284   }
00285 
00286 
00287   void Image::setPixelB(int col, int row, int ch, unsigned char value)
00288   {
00289     *((unsigned char *)((unsigned long)(_img->imageData) + (unsigned long)ch + (unsigned long)(col*_img->nChannels) + (unsigned long)(row*_img->widthStep))) = value;
00290   }
00291 
00292   void Image::setPixelS(int col, int row, int ch, short value)
00293   {
00294     *((short *)((unsigned long)(_img->imageData) + (unsigned long)(ch*sizeof(short)) + (unsigned long)(col*_img->nChannels*sizeof(short)) + (unsigned long)((row)*_img->widthStep))) = value;
00295   }
00296 
00297   void Image::setPixelF(int col, int row, int ch, float value)
00298   {
00299     *((float *)((unsigned long)(_img->imageData) + (unsigned long)((ch)*sizeof(float)) + (unsigned long)((col)*(_img->nChannels)*sizeof(float)) + (unsigned long)((row)*_img->widthStep))) = value;
00300 
00301   }
00302 
00303   void getLab(const Image& img, Image& L, Image& a, Image& b)
00304   {
00305     if ( img.getChannel() != 3 || img.getDepth() != IPL_DEPTH_8U )
00306     {
00307       std::cout << std::endl << "Error in getLightnessFromLab: the input image is not 3-channel 8 bit image" << std::endl << std::endl;
00308       exit(0);
00309     }
00310 
00311     Image labImg;
00312     labImg.resizeImage(cvGetImageROI(img).width, cvGetImageROI(img).height, 3, 8);
00313     L.resizeImage(cvGetImageROI(img).width, cvGetImageROI(img).height, 1, 8);
00314     a.resizeImage(cvGetImageROI(img).width, cvGetImageROI(img).height, 1, 8);
00315     b.resizeImage(cvGetImageROI(img).width, cvGetImageROI(img).height, 1, 8);
00316 
00317     cvCvtColor(img, labImg, CV_BGR2Lab);
00318     cvSplit(labImg, L, a, b, NULL);
00319   }
00320 
00321   void setLab(const Image& L, const Image& a, const Image& b, Image& img)
00322   {
00323     if ( L.getChannel() != 1 || a.getChannel() != 1 || b.getChannel() != 1 ||
00324          L.getDepth() != 8 || a.getDepth() != 8 || b.getDepth() != 8 )
00325     {
00326       std::cout << std::endl << "Error in setLab: L, a, b images must be 1-channel 8 bit" << std::endl << std::endl;
00327     }
00328 
00329     img.resizeImage(cvGetImageROI(L).width, cvGetImageROI(L).height, 3, 8);
00330     cvMerge(L, a, b, NULL, img);
00331   }
00332 
00333 
00334   void dctNormalization(const Image& img, Image& normalizedImg, int coefficientsToCancel, const ImageRoi& roi)
00335   {
00337     // Unfortunately, DCT may only be computed on even-sized images
00338     // First of all, define image as a suitable sized copy of img
00340     bool oddSize = false;
00341     int oddWidth = img.getWidth(), oddHeight = img.getHeight();
00342     ImageRoi evenRoi(roi);
00343 
00344     //check if no roi is provided but image size is odd
00345     if ( roi.width == 0 && roi.height == 0 && (img.getWidth() % 2 != 0 || img.getHeight() % 2 != 0) )
00346     {
00347       oddSize   = true;
00348       oddWidth  = img.getWidth();
00349       oddHeight = img.getHeight();
00350     }
00351     //check if roi is provided and it has odd size
00352     else if ( roi.width != 0 && roi.height != 0 && (roi.width % 2 != 0 || roi.height % 2 != 0) )
00353     {
00354       oddSize   = true;
00355       oddWidth  = roi.width;
00356       oddHeight = roi.height;
00357     }
00358 
00359     //if either the roi or the image is odd-sized, use an even-sized roi to fix it
00360     if ( oddSize )
00361     {
00362       int evenWidth = oddWidth, evenHeight = oddHeight;
00363       if ( evenWidth % 2 != 0 )
00364         --evenWidth;
00365       if ( evenHeight % 2 != 0 )
00366         --evenHeight;
00367 
00368       evenRoi.width  = evenWidth;
00369       evenRoi.height = evenHeight;
00370       dctNormalization(img, normalizedImg, coefficientsToCancel, evenRoi);
00371       return;
00372     }
00373 
00374     if ( img.getDepth() != 8 )
00375     {
00376       std::cout << std::endl << "Error in dctNormalization: input image must have 8 bit depth " << std::endl << std::endl;
00377       exit(0);
00378     }
00379 
00380     if ( img.getChannel() == 3 )
00381     {
00382       Image L, a, b;
00383 
00384       normalizedImg.resizeImage(img.getWidth(), img.getHeight(), 3, 8);
00385       cvZero(normalizedImg);
00386 
00387       if ( roi.width != 0 && roi.height != 0 )
00388       {
00389         cvSetImageROI(img, cvRect(roi.x, roi.y, roi.width, roi.height));
00390         cvSetImageROI(normalizedImg, cvRect(roi.x, roi.y, roi.width, roi.height));
00391       }
00392 
00393       getLab(img, L, a, b);
00394 
00395       //L.seeWindow("L", 5);
00396       Image normalizedL;
00397       dctNormalization(L, normalizedL, coefficientsToCancel);
00398       //normalizedL.seeWindow("L normalized", 5);
00399       Image LabImg;
00400       setLab(normalizedL, a, b, LabImg);
00401       cvCvtColor(LabImg, normalizedImg, CV_Lab2BGR);
00402 
00403       if ( roi.width != 0 && roi.height != 0 )
00404       {
00405         cvResetImageROI(img);
00406         cvResetImageROI(normalizedImg);
00407       }
00408     }
00409     else if ( img.getChannel() ==  1 )
00410     {
00411       Image X32, Y32;
00412       double minVal1, maxVal1;
00413       cvMinMaxLoc(img, &minVal1, &maxVal1);
00414 
00415       normalizedImg.resizeImage(img.getWidth(), img.getHeight(), img.getChannel(), img.getDepth());
00416       cvZero(normalizedImg);
00417 
00418       if ( roi.width != 0 && roi.height != 0 )
00419       {
00420         cvSetImageROI(img, cvRect(roi.x, roi.y, roi.width, roi.height));
00421         cvSetImageROI(normalizedImg, cvRect(roi.x, roi.y, roi.width, roi.height));
00422       }
00423 
00424       X32.resizeImage(cvGetImageROI(img).width, cvGetImageROI(img).height, 1, IPL_DEPTH_32F);
00425       Y32.resizeImage(cvGetImageROI(img).width, cvGetImageROI(img).height, 1, IPL_DEPTH_32F);
00426 
00427       cvZero(Y32);
00428       cvConvert(img, X32);
00429 
00430       for (int row = 0; row < X32.getHeight(); ++row)
00431         for (int col = 0; col < X32.getWidth(); ++col)
00432         {
00433           float f = X32.getPixelF(col, row, 0);
00434           if ( f == 0 ) f = 1; //to avoid log(0)
00435           X32.setPixelF(col, row, 0, log(f));
00436         }
00437 
00438       cvDCT(X32, Y32, CV_DXT_FORWARD);
00439 
00440       //cancel low frequency coefficients
00441       for (int row = 0; row < coefficientsToCancel && row < Y32.getHeight(); ++row)
00442         for (int col = 0; col < coefficientsToCancel && col < Y32.getWidth(); ++col)
00443         {
00444           Y32.setPixelF(col, row, 0, 0);
00445         }
00446 
00447       cvZero(X32);
00448       cvDCT(Y32, X32, CV_DXT_INVERSE);
00449 
00450       double minVal2, maxVal2, m = 0, M = 255;
00451       cvMinMaxLoc(X32, &minVal2, &maxVal2);
00452       cvConvertScaleAbs(X32, normalizedImg, (M-m)/(maxVal2-minVal2), (m-M)*minVal2/(maxVal2-minVal2));
00453       //cvConvertScaleAbs(X32, normalizedImg, 255.0/(maxVal2-minVal2), -255.0*minVal2/(maxVal2-minVal2));
00454       //cvConvertScaleAbs(X32, normalizedImg, (maxVal1-minVal1)/(maxVal2-minVal2), -minVal2*(maxVal1-minVal1)/(maxVal2-minVal2));
00455 
00456       if ( roi.width != 0 && roi.height != 0 )
00457       {
00458         cvResetImageROI(img);
00459         cvResetImageROI(normalizedImg);
00460       }
00461     }
00462     else
00463     {
00464       std::cout << std::endl << "Error in dctNormalization: input image format not supported" << std::endl << std::endl;
00465     }
00466   }
00467 
00468   void dctNormalization(const cv::Mat& img, cv::Mat& normalizedImg, int coefficientsToCancel, const ImageRoi& roi)
00469   {
00470     IplImage ipl = img;
00471     Image input(&ipl);
00472     Image output;
00473     dctNormalization(input, output, coefficientsToCancel, roi);
00474     normalizedImg = cv::Mat(output.getIplImg(), true);
00475     input.release();
00476   }
00477 
00478 } // namespace pal_vision_util