pal_vision_segmentation: image_processing.cpp Source File

Go to the documentation of this file.
00001 /*
00002  * Software License Agreement (Modified BSD License)
00003  *
00004  *  Copyright (c) 2012, PAL Robotics, S.L.
00005  *  All rights reserved.
00006  *
00007  *  Redistribution and use in source and binary forms, with or without
00008  *  modification, are permitted provided that the following conditions
00009  *  are met:
00010  *
00011  *   * Redistributions of source code must retain the above copyright
00012  *     notice, this list of conditions and the following disclaimer.
00013  *   * Redistributions in binary form must reproduce the above
00014  *     copyright notice, this list of conditions and the following
00015  *     disclaimer in the documentation and/or other materials provided
00016  *     with the distribution.
00017  *   * Neither the name of PAL Robotics, S.L. nor the names of its
00018  *     contributors may be used to endorse or promote products derived
00019  *     from this software without specific prior written permission.
00020  *
00021  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00022  *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00023  *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00024  *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
00025  *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00026  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00027  *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00028  *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00029  *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00030  *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00031  *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00032  *  POSSIBILITY OF SUCH DAMAGE.
00033  */
00034 
00037 #include <pal_vision_segmentation/image_processing.h>
00038 #include <opencv2/imgproc/imgproc.hpp>
00039 #include <opencv2/imgproc/imgproc_c.h>
00040 #include <iostream>
00041 
00042 namespace pal_vision_util
00043 {
00044 
00045   void getConnectedComponents(const cv::Mat& binaryImage,
00046                               std::vector< std::vector<cv::Point2i> >& components,
00047                               std::vector< cv::Rect >& boundingBoxes)
00048   {
00049     components.clear();
00050 
00051     //make sure to use a binary image where background pixels are set to 0 and the remaining ones to 1
00052     cv::Mat tempBinaryImage;
00053     cv::threshold(binaryImage, tempBinaryImage, 1, 1, cv::THRESH_BINARY );
00054 
00055     // Fill the label_image with the blobs
00056     // 0  - background
00057     // 1  - unlabelled foreground
00058     // 2+ - labelled foreground
00059 
00060     cv::Mat label_image;
00061     tempBinaryImage.convertTo(label_image, CV_32FC1); // weird it doesn't support CV_32S!
00062 
00063     int label_count = 2; // starts at 2 because 0,1 are used already
00064 
00065     for(int y=0; y < binaryImage.rows; y++)
00066     {
00067       for(int x=0; x < binaryImage.cols; x++)
00068       {
00069         if ( static_cast<int>(label_image.at<float>(y,x)) != 1 )
00070         {
00071           continue;
00072         }
00073 
00074         cv::Rect rect;
00075         cv::floodFill(label_image, cv::Point(x,y), cv::Scalar(label_count), &rect, cv::Scalar(0), cv::Scalar(0), 4);
00076 
00077         std::vector <cv::Point2i> component;
00078 
00079         for(int i=rect.y; i < (rect.y+rect.height); i++)
00080         {
00081           for(int j=rect.x; j < (rect.x+rect.width); j++)
00082           {
00083             if ( static_cast<int>(label_image.at<float>(i,j)) != label_count )
00084             {
00085               continue;
00086             }
00087 
00088             component.push_back(cv::Point2i(j,i));
00089             boundingBoxes.push_back(rect);
00090           }
00091         }
00092 
00093         components.push_back(component);
00094 
00095         label_count++;
00096       }
00097     }
00098   }
00099 
00100   ImageRoi::ImageRoi()
00101   {
00102     clear();
00103   }
00104 
00105   ImageRoi::ImageRoi(const ImageRoi& roi)
00106   {
00107     x = roi.x;
00108     y = roi.y;
00109     width = roi.width;
00110     height = roi.height;
00111   }
00112 
00113   ImageRoi::ImageRoi(int left, int top, int w, int h): x(left), y(top), width(w), height(h)
00114   {
00115   }
00116 
00117   ImageRoi::ImageRoi(CvRect rect)
00118   {
00119     set(rect);
00120   }
00121 
00122   ImageRoi::~ImageRoi()
00123   {
00124   }
00125 
00126   void ImageRoi::set(CvRect rect)
00127   {
00128     x      = rect.x;
00129     y      = rect.y;
00130     width  = rect.width;
00131     height = rect.height;
00132   }
00133 
00134   void ImageRoi::clear()
00135   {
00136     x = 0;
00137     y = 0;
00138     width = 0;
00139     height = 0;
00140   }
00141 
00142   bool ImageRoi::empty() const
00143   {
00144     return x == 0 && y == 0 && width == 0 && height == 0;
00145   }
00146 
00147   CvRect ImageRoi::toCvRect() const
00148   {
00149     return cvRect(x, y, width, height);
00150   }
00151 
00156   void ImageRoi::keepInside(int imgWidth, int imgHeight)
00157   {
00158     if ( !empty() )
00159     {
00160       if ( x < 0 )
00161       {
00162         width += x;
00163         x = 0;
00164       }
00165       if ( y < 0 )
00166       {
00167         height += y;
00168         y = 0;
00169       }
00170       if ( x + width - 1 >= imgWidth )
00171       {
00172         width = - x + imgWidth;
00173       }
00174       if ( y + height - 1 >= imgHeight )
00175       {
00176         height = - y + imgHeight;
00177       }
00178     }
00179   }
00180 
00184   const ImageRoi& ImageRoi::resize(double factorHorizontal, double factorVertical)
00185   {
00186     int cx = x + width/2;
00187     int cy = y + height/2;
00188 
00189     int newWidth  = static_cast<int>( factorHorizontal * static_cast<double>(width) );
00190     int newHeight = static_cast<int>( factorVertical * static_cast<double>(height) );
00191 
00192     x = cx - newWidth/2;
00193     y = cy - newHeight/2;
00194     width = newWidth;
00195     height = newHeight;
00196 
00197     return *this;
00198   }
00199 
00200   Image::Image()
00201   {
00202     _img = NULL;
00203     _img = cvCreateImage(cvSize(10, 10), 8, 3);
00204   }
00205 
00206   Image::Image(IplImage *img)
00207   {
00208     _img = img;
00209   }
00210 
00211   Image::~Image()
00212   {
00213     freeImage();
00214   }
00215 
00216   void Image::freeImage()
00217   {
00218     if ( _img != NULL )
00219     {
00220       cvReleaseImage(&_img);
00221       _img = NULL;
00222     }
00223   }
00224 
00225   void Image::loadImage(const std::string& fileName)
00226   {
00227 
00228   }
00229 
00230   void Image::set(IplImage *img)
00231   {
00232     freeImage();
00233     _img = img;
00234   }
00235 
00236   IplImage* Image::release()
00237   {
00238     IplImage *img = _img;
00239     _img = NULL;
00240     return img;
00241   }
00242 
00243   void Image::resizeImage(int width, int height, int channels, int depth)
00244   {
00245     freeImage();
00246     _img = cvCreateImage(cvSize(width, height), depth, channels);
00247   }
00248 
00249   unsigned char Image::getPixelB(int col, int row, int ch) const
00250   {
00251     unsigned char value = 0;
00252 
00253     value = *((unsigned char *)((unsigned long)_img->imageData + (unsigned long)ch + (unsigned long)((col)*_img->nChannels) + (unsigned long)((row)*_img->widthStep)));
00254 
00255     return value;
00256   }
00257 
00258   float Image::getPixelF(int col, int row, int ch) const
00259   {
00260     float value = 0;
00261 
00262     value = *((float *)((unsigned long)(_img->imageData) + (unsigned long)((ch)*sizeof(float)) + (unsigned long)((col)*_img->nChannels*sizeof(float)) + (unsigned long)((row)*_img->widthStep)));
00263 
00264     return value;
00265   }
00266 
00267   short Image::getPixelS(int col, int row, int ch) const
00268   {
00269     short value = 0;
00270 
00271     value = *((short *)((unsigned long)_img->imageData + (unsigned long)((ch)*sizeof(short)) + (unsigned long)((col)*_img->nChannels*sizeof(short)) + (unsigned long)((row)*_img->widthStep)));
00272 
00273     return value;
00274   }
00275 
00276 
00277   void Image::setPixelB(int col, int row, int ch, unsigned char value)
00278   {
00279     *((unsigned char *)((unsigned long)(_img->imageData) + (unsigned long)ch + (unsigned long)(col*_img->nChannels) + (unsigned long)(row*_img->widthStep))) = value;
00280   }
00281 
00282   void Image::setPixelS(int col, int row, int ch, short value)
00283   {
00284     *((short *)((unsigned long)(_img->imageData) + (unsigned long)(ch*sizeof(short)) + (unsigned long)(col*_img->nChannels*sizeof(short)) + (unsigned long)((row)*_img->widthStep))) = value;
00285   }
00286 
00287   void Image::setPixelF(int col, int row, int ch, float value)
00288   {
00289     *((float *)((unsigned long)(_img->imageData) + (unsigned long)((ch)*sizeof(float)) + (unsigned long)((col)*(_img->nChannels)*sizeof(float)) + (unsigned long)((row)*_img->widthStep))) = value;
00290 
00291   }
00292 
00293   void getLab(const Image& img, Image& L, Image& a, Image& b)
00294   {
00295     if ( img.getChannel() != 3 || img.getDepth() != IPL_DEPTH_8U )
00296     {
00297       std::cout << std::endl << "Error in getLightnessFromLab: the input image is not 3-channel 8 bit image" << std::endl << std::endl;
00298       exit(0);
00299     }
00300 
00301     Image labImg;
00302     labImg.resizeImage(cvGetImageROI(img).width, cvGetImageROI(img).height, 3, 8);
00303     L.resizeImage(cvGetImageROI(img).width, cvGetImageROI(img).height, 1, 8);
00304     a.resizeImage(cvGetImageROI(img).width, cvGetImageROI(img).height, 1, 8);
00305     b.resizeImage(cvGetImageROI(img).width, cvGetImageROI(img).height, 1, 8);
00306 
00307     cvCvtColor(img, labImg, CV_BGR2Lab);
00308     cvSplit(labImg, L, a, b, NULL);
00309   }
00310 
00311   void setLab(const Image& L, const Image& a, const Image& b, Image& img)
00312   {
00313     if ( L.getChannel() != 1 || a.getChannel() != 1 || b.getChannel() != 1 ||
00314          L.getDepth() != 8 || a.getDepth() != 8 || b.getDepth() != 8 )
00315     {
00316       std::cout << std::endl << "Error in setLab: L, a, b images must be 1-channel 8 bit" << std::endl << std::endl;
00317     }
00318 
00319     img.resizeImage(cvGetImageROI(L).width, cvGetImageROI(L).height, 3, 8);
00320     cvMerge(L, a, b, NULL, img);
00321   }
00322 
00323 
00324   void dctNormalization(const Image& img, Image& normalizedImg, int coefficientsToCancel, const ImageRoi& roi)
00325   {
00327     // Unfortunately, DCT may only be computed on even-sized images
00328     // First of all, define image as a suitable sized copy of img
00330     bool oddSize = false;
00331     int oddWidth = img.getWidth(), oddHeight = img.getHeight();
00332     ImageRoi evenRoi(roi);
00333 
00334     //check if no roi is provided but image size is odd
00335     if ( roi.width == 0 && roi.height == 0 && (img.getWidth() % 2 != 0 || img.getHeight() % 2 != 0) )
00336     {
00337       oddSize   = true;
00338       oddWidth  = img.getWidth();
00339       oddHeight = img.getHeight();
00340     }
00341     //check if roi is provided and it has odd size
00342     else if ( roi.width != 0 && roi.height != 0 && (roi.width % 2 != 0 || roi.height % 2 != 0) )
00343     {
00344       oddSize   = true;
00345       oddWidth  = roi.width;
00346       oddHeight = roi.height;
00347     }
00348 
00349     //if either the roi or the image is odd-sized, use an even-sized roi to fix it
00350     if ( oddSize )
00351     {
00352       int evenWidth = oddWidth, evenHeight = oddHeight;
00353       if ( evenWidth % 2 != 0 )
00354         --evenWidth;
00355       if ( evenHeight % 2 != 0 )
00356         --evenHeight;
00357 
00358       evenRoi.width  = evenWidth;
00359       evenRoi.height = evenHeight;
00360       dctNormalization(img, normalizedImg, coefficientsToCancel, evenRoi);
00361       return;
00362     }
00363 
00364     if ( img.getDepth() != 8 )
00365     {
00366       std::cout << std::endl << "Error in dctNormalization: input image must have 8 bit depth " << std::endl << std::endl;
00367       exit(0);
00368     }
00369 
00370     if ( img.getChannel() == 3 )
00371     {
00372       Image L, a, b;
00373 
00374       normalizedImg.resizeImage(img.getWidth(), img.getHeight(), 3, 8);
00375       cvZero(normalizedImg);
00376 
00377       if ( roi.width != 0 && roi.height != 0 )
00378       {
00379         cvSetImageROI(img, cvRect(roi.x, roi.y, roi.width, roi.height));
00380         cvSetImageROI(normalizedImg, cvRect(roi.x, roi.y, roi.width, roi.height));
00381       }
00382 
00383       getLab(img, L, a, b);
00384 
00385       //L.seeWindow("L", 5);
00386       Image normalizedL;
00387       dctNormalization(L, normalizedL, coefficientsToCancel);
00388       //normalizedL.seeWindow("L normalized", 5);
00389       Image LabImg;
00390       setLab(normalizedL, a, b, LabImg);
00391       cvCvtColor(LabImg, normalizedImg, CV_Lab2BGR);
00392 
00393       if ( roi.width != 0 && roi.height != 0 )
00394       {
00395         cvResetImageROI(img);
00396         cvResetImageROI(normalizedImg);
00397       }
00398     }
00399     else if ( img.getChannel() ==  1 )
00400     {
00401       Image X32, Y32;
00402       double minVal1, maxVal1;
00403       cvMinMaxLoc(img, &minVal1, &maxVal1);
00404 
00405       normalizedImg.resizeImage(img.getWidth(), img.getHeight(), img.getChannel(), img.getDepth());
00406       cvZero(normalizedImg);
00407 
00408       if ( roi.width != 0 && roi.height != 0 )
00409       {
00410         cvSetImageROI(img, cvRect(roi.x, roi.y, roi.width, roi.height));
00411         cvSetImageROI(normalizedImg, cvRect(roi.x, roi.y, roi.width, roi.height));
00412       }
00413 
00414       X32.resizeImage(cvGetImageROI(img).width, cvGetImageROI(img).height, 1, IPL_DEPTH_32F);
00415       Y32.resizeImage(cvGetImageROI(img).width, cvGetImageROI(img).height, 1, IPL_DEPTH_32F);
00416 
00417       cvZero(Y32);
00418       cvConvert(img, X32);
00419 
00420       for (int row = 0; row < X32.getHeight(); ++row)
00421         for (int col = 0; col < X32.getWidth(); ++col)
00422         {
00423           float f = X32.getPixelF(col, row, 0);
00424           if ( f == 0 ) f = 1; //to avoid log(0)
00425           X32.setPixelF(col, row, 0, log(f));
00426         }
00427 
00428       cvDCT(X32, Y32, CV_DXT_FORWARD);
00429 
00430       //cancel low frequency coefficients
00431       for (int row = 0; row < coefficientsToCancel && row < Y32.getHeight(); ++row)
00432         for (int col = 0; col < coefficientsToCancel && col < Y32.getWidth(); ++col)
00433         {
00434           Y32.setPixelF(col, row, 0, 0);
00435         }
00436 
00437       cvZero(X32);
00438       cvDCT(Y32, X32, CV_DXT_INVERSE);
00439 
00440       double minVal2, maxVal2, m = 0, M = 255;
00441       cvMinMaxLoc(X32, &minVal2, &maxVal2);
00442       cvConvertScaleAbs(X32, normalizedImg, (M-m)/(maxVal2-minVal2), (m-M)*minVal2/(maxVal2-minVal2));
00443       //cvConvertScaleAbs(X32, normalizedImg, 255.0/(maxVal2-minVal2), -255.0*minVal2/(maxVal2-minVal2));
00444       //cvConvertScaleAbs(X32, normalizedImg, (maxVal1-minVal1)/(maxVal2-minVal2), -minVal2*(maxVal1-minVal1)/(maxVal2-minVal2));
00445 
00446       if ( roi.width != 0 && roi.height != 0 )
00447       {
00448         cvResetImageROI(img);
00449         cvResetImageROI(normalizedImg);
00450       }
00451     }
00452     else
00453     {
00454       std::cout << std::endl << "Error in dctNormalization: input image format not supported" << std::endl << std::endl;
00455     }
00456   }
00457 
00458   void dctNormalization(const cv::Mat& img, cv::Mat& normalizedImg, int coefficientsToCancel, const ImageRoi& roi)
00459   {
00460     IplImage ipl = img;
00461     Image input(&ipl);
00462     Image output;
00463     dctNormalization(input, output, coefficientsToCancel, roi);
00464     normalizedImg = cv::Mat(output.getIplImg(), true);
00465     input.release();
00466   }
00467 
00468 } // namespace pal_vision_util