pal_vision_segmentation: pal_vision_segmentation: image

00001 /*
00002  * Software License Agreement (Modified BSD License)
00003  *
00004  *  Copyright (c) 2012, PAL Robotics, S.L.
00005  *  All rights reserved.
00006  *
00007  *  Redistribution and use in source and binary forms, with or without
00008  *  modification, are permitted provided that the following conditions
00009  *  are met:
00010  *
00011  *   * Redistributions of source code must retain the above copyright
00012  *     notice, this list of conditions and the following disclaimer.
00013  *   * Redistributions in binary form must reproduce the above
00014  *     copyright notice, this list of conditions and the following
00015  *     disclaimer in the documentation and/or other materials provided
00016  *     with the distribution.
00017  *   * Neither the name of PAL Robotics, S.L. nor the names of its
00018  *     contributors may be used to endorse or promote products derived
00019  *     from this software without specific prior written permission.
00020  *
00021  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00022  *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00023  *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00024  *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
00025  *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00026  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00027  *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00028  *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00029  *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00030  *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00031  *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00032  *  POSSIBILITY OF SUCH DAMAGE.
00033  */
00034 
00037 #include <pal_vision_segmentation/image_processing.h>
00038 
00039 namespace pal_vision_util
00040 {
00041 
00042   void getConnectedComponents(const cv::Mat& binaryImage,
00043                               std::vector< std::vector<cv::Point2i> >& components,
00044                               std::vector< cv::Rect >& boundingBoxes)
00045   {
00046     components.clear();
00047 
00048     //make sure to use a binary image where background pixels are set to 0 and the remaining ones to 1
00049     cv::Mat tempBinaryImage;
00050     cv::threshold(binaryImage, tempBinaryImage, 1, 1, cv::THRESH_BINARY );
00051 
00052     // Fill the label_image with the blobs
00053     // 0  - background
00054     // 1  - unlabelled foreground
00055     // 2+ - labelled foreground
00056 
00057     cv::Mat label_image;
00058     tempBinaryImage.convertTo(label_image, CV_32FC1); // weird it doesn't support CV_32S!
00059 
00060     int label_count = 2; // starts at 2 because 0,1 are used already
00061 
00062     for(int y=0; y < binaryImage.rows; y++)
00063     {
00064       for(int x=0; x < binaryImage.cols; x++)
00065       {
00066         if ( static_cast<int>(label_image.at<float>(y,x)) != 1 )
00067         {
00068           continue;
00069         }
00070 
00071         cv::Rect rect;
00072         cv::floodFill(label_image, cv::Point(x,y), cv::Scalar(label_count), &rect, cv::Scalar(0), cv::Scalar(0), 4);
00073 
00074         std::vector <cv::Point2i> component;
00075 
00076         for(int i=rect.y; i < (rect.y+rect.height); i++)
00077         {
00078           for(int j=rect.x; j < (rect.x+rect.width); j++)
00079           {
00080             if ( static_cast<int>(label_image.at<float>(i,j)) != label_count )
00081             {
00082               continue;
00083             }
00084 
00085             component.push_back(cv::Point2i(j,i));
00086             boundingBoxes.push_back(rect);
00087           }
00088         }
00089 
00090         components.push_back(component);
00091 
00092         label_count++;
00093       }
00094     }
00095   }
00096 
00097   ImageRoi::ImageRoi()
00098   {
00099     clear();
00100   }
00101 
00102   ImageRoi::ImageRoi(const ImageRoi& roi)
00103   {
00104     x = roi.x;
00105     y = roi.y;
00106     width = roi.width;
00107     height = roi.height;
00108   }
00109 
00110   ImageRoi::ImageRoi(int left, int top, int w, int h): x(left), y(top), width(w), height(h)
00111   {
00112   }
00113 
00114   ImageRoi::ImageRoi(CvRect rect)
00115   {
00116     set(rect);
00117   }
00118 
00119   ImageRoi::~ImageRoi()
00120   {
00121   }
00122 
00123   void ImageRoi::set(CvRect rect)
00124   {
00125     x      = rect.x;
00126     y      = rect.y;
00127     width  = rect.width;
00128     height = rect.height;
00129   }
00130 
00131   void ImageRoi::clear()
00132   {
00133     x = 0;
00134     y = 0;
00135     width = 0;
00136     height = 0;
00137   }
00138 
00139   bool ImageRoi::empty() const
00140   {
00141     return x == 0 && y == 0 && width == 0 && height == 0;
00142   }
00143 
00144   CvRect ImageRoi::toCvRect() const
00145   {
00146     return cvRect(x, y, width, height);
00147   }
00148 
00153   void ImageRoi::keepInside(int imgWidth, int imgHeight)
00154   {
00155     if ( !empty() )
00156     {
00157       if ( x < 0 )
00158       {
00159         width += x;
00160         x = 0;
00161       }
00162       if ( y < 0 )
00163       {
00164         height += y;
00165         y = 0;
00166       }
00167       if ( x + width - 1 >= imgWidth )
00168       {
00169         width = - x + imgWidth;
00170       }
00171       if ( y + height - 1 >= imgHeight )
00172       {
00173         height = - y + imgHeight;
00174       }
00175     }
00176   }
00177 
00181   const ImageRoi& ImageRoi::resize(double factorHorizontal, double factorVertical)
00182   {
00183     int cx = x + width/2;
00184     int cy = y + height/2;
00185 
00186     int newWidth  = static_cast<int>( factorHorizontal * static_cast<double>(width) );
00187     int newHeight = static_cast<int>( factorVertical * static_cast<double>(height) );
00188 
00189     x = cx - newWidth/2;
00190     y = cy - newHeight/2;
00191     width = newWidth;
00192     height = newHeight;
00193 
00194     return *this;
00195   }
00196 
00197   Image::Image()
00198   {
00199     _img = NULL;
00200     _img = cvCreateImage(cvSize(10, 10), 8, 3);
00201   }
00202 
00203   Image::Image(IplImage *img)
00204   {
00205     _img = img;
00206   }
00207 
00208   Image::~Image()
00209   {
00210     freeImage();
00211   }
00212 
00213   void Image::freeImage()
00214   {
00215     if ( _img != NULL )
00216     {
00217       cvReleaseImage(&_img);
00218       _img = NULL;
00219     }
00220   }
00221 
00222   void Image::loadImage(const std::string& fileName)
00223   {
00224 
00225   }
00226 
00227   void Image::set(IplImage *img)
00228   {
00229     freeImage();
00230     _img = img;
00231   }
00232 
00233   IplImage* Image::release()
00234   {
00235     IplImage *img = _img;
00236     _img = NULL;
00237     return img;
00238   }
00239 
00240   void Image::resizeImage(int width, int height, int channels, int depth)
00241   {
00242     freeImage();
00243     _img = cvCreateImage(cvSize(width, height), depth, channels);
00244   }
00245 
00246   unsigned char Image::getPixelB(int col, int row, int ch) const
00247   {
00248     unsigned char value = 0;
00249 
00250     value = *((unsigned char *)((unsigned long)_img->imageData + (unsigned long)ch + (unsigned long)((col)*_img->nChannels) + (unsigned long)((row)*_img->widthStep)));
00251 
00252     return value;
00253   }
00254 
00255   float Image::getPixelF(int col, int row, int ch) const
00256   {
00257     float value = 0;
00258 
00259     value = *((float *)((unsigned long)(_img->imageData) + (unsigned long)((ch)*sizeof(float)) + (unsigned long)((col)*_img->nChannels*sizeof(float)) + (unsigned long)((row)*_img->widthStep)));
00260 
00261     return value;
00262   }
00263 
00264   short Image::getPixelS(int col, int row, int ch) const
00265   {
00266     short value = 0;
00267 
00268     value = *((short *)((unsigned long)_img->imageData + (unsigned long)((ch)*sizeof(short)) + (unsigned long)((col)*_img->nChannels*sizeof(short)) + (unsigned long)((row)*_img->widthStep)));
00269 
00270     return value;
00271   }
00272 
00273 
00274   void Image::setPixelB(int col, int row, int ch, unsigned char value)
00275   {
00276     *((unsigned char *)((unsigned long)(_img->imageData) + (unsigned long)ch + (unsigned long)(col*_img->nChannels) + (unsigned long)(row*_img->widthStep))) = value;
00277   }
00278 
00279   void Image::setPixelS(int col, int row, int ch, short value)
00280   {
00281     *((short *)((unsigned long)(_img->imageData) + (unsigned long)(ch*sizeof(short)) + (unsigned long)(col*_img->nChannels*sizeof(short)) + (unsigned long)((row)*_img->widthStep))) = value;
00282   }
00283 
00284   void Image::setPixelF(int col, int row, int ch, float value)
00285   {
00286     *((float *)((unsigned long)(_img->imageData) + (unsigned long)((ch)*sizeof(float)) + (unsigned long)((col)*(_img->nChannels)*sizeof(float)) + (unsigned long)((row)*_img->widthStep))) = value;
00287 
00288   }
00289 
00290   void getLab(const Image& img, Image& L, Image& a, Image& b)
00291   {
00292     if ( img.getChannel() != 3 || img.getDepth() != IPL_DEPTH_8U )
00293     {
00294       std::cout << std::endl << "Error in getLightnessFromLab: the input image is not 3-channel 8 bit image" << std::endl << std::endl;
00295       exit(0);
00296     }
00297 
00298     Image labImg;
00299     labImg.resizeImage(cvGetImageROI(img).width, cvGetImageROI(img).height, 3, 8);
00300     L.resizeImage(cvGetImageROI(img).width, cvGetImageROI(img).height, 1, 8);
00301     a.resizeImage(cvGetImageROI(img).width, cvGetImageROI(img).height, 1, 8);
00302     b.resizeImage(cvGetImageROI(img).width, cvGetImageROI(img).height, 1, 8);
00303 
00304     cvCvtColor(img, labImg, CV_BGR2Lab);
00305     cvSplit(labImg, L, a, b, NULL);
00306   }
00307 
00308   void setLab(const Image& L, const Image& a, const Image& b, Image& img)
00309   {
00310     if ( L.getChannel() != 1 || a.getChannel() != 1 || b.getChannel() != 1 ||
00311          L.getDepth() != 8 || a.getDepth() != 8 || b.getDepth() != 8 )
00312     {
00313       std::cout << std::endl << "Error in setLab: L, a, b images must be 1-channel 8 bit" << std::endl << std::endl;
00314     }
00315 
00316     img.resizeImage(cvGetImageROI(L).width, cvGetImageROI(L).height, 3, 8);
00317     cvMerge(L, a, b, NULL, img);
00318   }
00319 
00320 
00321   void dctNormalization(const Image& img, Image& normalizedImg, int coefficientsToCancel, const ImageRoi& roi)
00322   {
00324     // Unfortunately, DCT may only be computed on even-sized images
00325     // First of all, define image as a suitable sized copy of img
00327     bool oddSize = false;
00328     int oddWidth = img.getWidth(), oddHeight = img.getHeight();
00329     ImageRoi evenRoi(roi);
00330 
00331     //check if no roi is provided but image size is odd
00332     if ( roi.width == 0 && roi.height == 0 && (img.getWidth() % 2 != 0 || img.getHeight() % 2 != 0) )
00333     {
00334       oddSize   = true;
00335       oddWidth  = img.getWidth();
00336       oddHeight = img.getHeight();
00337     }
00338     //check if roi is provided and it has odd size
00339     else if ( roi.width != 0 && roi.height != 0 && (roi.width % 2 != 0 || roi.height % 2 != 0) )
00340     {
00341       oddSize   = true;
00342       oddWidth  = roi.width;
00343       oddHeight = roi.height;
00344     }
00345 
00346     //if either the roi or the image is odd-sized, use an even-sized roi to fix it
00347     if ( oddSize )
00348     {
00349       int evenWidth = oddWidth, evenHeight = oddHeight;
00350       if ( evenWidth % 2 != 0 )
00351         --evenWidth;
00352       if ( evenHeight % 2 != 0 )
00353         --evenHeight;
00354 
00355       evenRoi.width  = evenWidth;
00356       evenRoi.height = evenHeight;
00357       dctNormalization(img, normalizedImg, coefficientsToCancel, evenRoi);
00358       return;
00359     }
00360 
00361     if ( img.getDepth() != 8 )
00362     {
00363       std::cout << std::endl << "Error in dctNormalization: input image must have 8 bit depth " << std::endl << std::endl;
00364       exit(0);
00365     }
00366 
00367     if ( img.getChannel() == 3 )
00368     {
00369       Image L, a, b;
00370 
00371       normalizedImg.resizeImage(img.getWidth(), img.getHeight(), 3, 8);
00372       cvZero(normalizedImg);
00373 
00374       if ( roi.width != 0 && roi.height != 0 )
00375       {
00376         cvSetImageROI(img, cvRect(roi.x, roi.y, roi.width, roi.height));
00377         cvSetImageROI(normalizedImg, cvRect(roi.x, roi.y, roi.width, roi.height));
00378       }
00379 
00380       getLab(img, L, a, b);
00381 
00382       //L.seeWindow("L", 5);
00383       Image normalizedL;
00384       dctNormalization(L, normalizedL, coefficientsToCancel);
00385       //normalizedL.seeWindow("L normalized", 5);
00386       Image LabImg;
00387       setLab(normalizedL, a, b, LabImg);
00388       cvCvtColor(LabImg, normalizedImg, CV_Lab2BGR);
00389 
00390       if ( roi.width != 0 && roi.height != 0 )
00391       {
00392         cvResetImageROI(img);
00393         cvResetImageROI(normalizedImg);
00394       }
00395     }
00396     else if ( img.getChannel() ==  1 )
00397     {
00398       Image X32, Y32;
00399       double minVal1, maxVal1;
00400       cvMinMaxLoc(img, &minVal1, &maxVal1);
00401 
00402       normalizedImg.resizeImage(img.getWidth(), img.getHeight(), img.getChannel(), img.getDepth());
00403       cvZero(normalizedImg);
00404 
00405       if ( roi.width != 0 && roi.height != 0 )
00406       {
00407         cvSetImageROI(img, cvRect(roi.x, roi.y, roi.width, roi.height));
00408         cvSetImageROI(normalizedImg, cvRect(roi.x, roi.y, roi.width, roi.height));
00409       }
00410 
00411       X32.resizeImage(cvGetImageROI(img).width, cvGetImageROI(img).height, 1, IPL_DEPTH_32F);
00412       Y32.resizeImage(cvGetImageROI(img).width, cvGetImageROI(img).height, 1, IPL_DEPTH_32F);
00413 
00414       cvZero(Y32);
00415       cvConvert(img, X32);
00416 
00417       for (int row = 0; row < X32.getHeight(); ++row)
00418         for (int col = 0; col < X32.getWidth(); ++col)
00419         {
00420           float f = X32.getPixelF(col, row, 0);
00421           if ( f == 0 ) f = 1; //to avoid log(0)
00422           X32.setPixelF(col, row, 0, log(f));
00423         }
00424 
00425       cvDCT(X32, Y32, CV_DXT_FORWARD);
00426 
00427       //cancel low frequency coefficients
00428       for (int row = 0; row < coefficientsToCancel && row < Y32.getHeight(); ++row)
00429         for (int col = 0; col < coefficientsToCancel && col < Y32.getWidth(); ++col)
00430         {
00431           Y32.setPixelF(col, row, 0, 0);
00432         }
00433 
00434       cvZero(X32);
00435       cvDCT(Y32, X32, CV_DXT_INVERSE);
00436 
00437       double minVal2, maxVal2, m = 0, M = 255;
00438       cvMinMaxLoc(X32, &minVal2, &maxVal2);
00439       cvConvertScaleAbs(X32, normalizedImg, (M-m)/(maxVal2-minVal2), (m-M)*minVal2/(maxVal2-minVal2));
00440       //cvConvertScaleAbs(X32, normalizedImg, 255.0/(maxVal2-minVal2), -255.0*minVal2/(maxVal2-minVal2));
00441       //cvConvertScaleAbs(X32, normalizedImg, (maxVal1-minVal1)/(maxVal2-minVal2), -minVal2*(maxVal1-minVal1)/(maxVal2-minVal2));
00442 
00443       if ( roi.width != 0 && roi.height != 0 )
00444       {
00445         cvResetImageROI(img);
00446         cvResetImageROI(normalizedImg);
00447       }
00448     }
00449     else
00450     {
00451       std::cout << std::endl << "Error in dctNormalization: input image format not supported" << std::endl << std::endl;
00452     }
00453   }
00454 
00455   void dctNormalization(const cv::Mat& img, cv::Mat& normalizedImg, int coefficientsToCancel, const ImageRoi& roi)
00456   {
00457     IplImage ipl = img;
00458     Image input(&ipl);
00459     Image output;
00460     dctNormalization(input, output, coefficientsToCancel, roi);
00461     normalizedImg = cv::Mat(output.getIplImg(), true);
00462     input.release();
00463   }
00464 
00465 } // namespace pal_vision_util
image_processing.cpp