linemod.cpp
Go to the documentation of this file.
00001 /*
00002  * Software License Agreement (BSD License)
00003  *
00004  *  Point Cloud Library (PCL) - www.pointclouds.org
00005  *  Copyright (c) 2010-2011, Willow Garage, Inc.
00006  *
00007  *  All rights reserved. 
00008  *
00009  *  Redistribution and use in source and binary forms, with or without
00010  *  modification, are permitted provided that the following conditions
00011  *  are met:
00012  *
00013  *   * Redistributions of source code must retain the above copyright
00014  *     notice, this list of conditions and the following disclaimer.
00015  *   * Redistributions in binary form must reproduce the above
00016  *     copyright notice, this list of conditions and the following
00017  *     disclaimer in the documentation and/or other materials provided
00018  *     with the distribution.
00019  *   * Neither the name of Willow Garage, Inc. nor the names of its
00020  *     contributors may be used to endorse or promote products derived
00021  *     from this software without specific prior written permission.
00022  *
00023  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00024  *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00025  *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00026  *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
00027  *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00028  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00029  *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00030  *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00031  *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00032  *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00033  *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00034  *  POSSIBILITY OF SUCH DAMAGE.
00035  *
00036  */
00037 
00038 //#define __SSE2__
00039 
00040 #include <pcl/recognition/linemod.h>
00041 
00042 #ifdef __SSE2__
00043 #include <emmintrin.h>
00044 #endif
00045 
00046 #include <fstream>
00047 
00048 //#define LINEMOD_USE_SEPARATE_ENERGY_MAPS
00049 
00051 pcl::LINEMOD::LINEMOD () 
00052   : template_threshold_ (0.75f)
00053   , use_non_max_suppression_ (false)
00054   , average_detections_ (false)
00055   , templates_ ()
00056 {
00057 }
00058 
00060 pcl::LINEMOD::~LINEMOD()
00061 {
00062 }
00063 
00065 int 
00066 pcl::LINEMOD::createAndAddTemplate (const std::vector<pcl::QuantizableModality*> & modalities,
00067                       const std::vector<pcl::MaskMap*> & masks,
00068                       const pcl::RegionXY & region)
00069 {
00070   // assuming width and height is same for all modalities; should we check this??
00071   //const int width = modalities[0]->getQuantizedMap().getWidth ();
00072   //const int height = modalities[0]->getQuantizedMap().getHeight ();
00073 
00074   SparseQuantizedMultiModTemplate linemod_template;
00075 
00076   // select N features from every modality (N = 50, hardcoded; CHANGE this to a parameter!!!)
00077   const size_t nr_features_per_modality = 63;
00078   const size_t nr_modalities = modalities.size();
00079   for (size_t modality_index = 0; modality_index < nr_modalities; ++modality_index)
00080   {
00081     const MaskMap & mask = *(masks[modality_index]);
00082     modalities[modality_index]->extractFeatures(mask, nr_features_per_modality, modality_index,
00083                                                 linemod_template.features);
00084   }
00085 
00086   // up to now all features are relative to the input frame; make them relative to the region center
00087   //const int centerX = region.x+region.width/2;
00088   //const int centerY = region.y+region.height/2;
00089 
00090   const size_t nr_features = linemod_template.features.size();
00091   for (size_t feature_index = 0; feature_index < nr_features; ++feature_index)
00092   {
00093     //linemod_template.features[feature_index].x -= centerX;
00094     //linemod_template.features[feature_index].y -= centerY;
00095     linemod_template.features[feature_index].x -= region.x;
00096     linemod_template.features[feature_index].y -= region.y;
00097   }
00098 
00099   // set region relative to the center
00100   linemod_template.region.x = 0;
00101   linemod_template.region.y = 0;
00102   //linemod_template.region.x = region.x - centerX;
00103   //linemod_template.region.y = region.y - centerY;
00104   linemod_template.region.width = region.width;
00105   linemod_template.region.height = region.height;
00106 
00107   // add template to template storage
00108   templates_.push_back(linemod_template);
00109 
00110   return static_cast<int> (templates_.size () - 1);
00111 }
00112 
00114 int 
00115 pcl::LINEMOD::addTemplate (const SparseQuantizedMultiModTemplate & linemod_template)
00116 {
00117   // add template to template storage
00118   templates_.push_back(linemod_template);
00119 
00120   return static_cast<int> (templates_.size () - 1);
00121 }
00122 
00124 void
00125 pcl::LINEMOD::matchTemplates (const std::vector<QuantizableModality*> & modalities, std::vector<LINEMODDetection> & detections) const
00126 {
00127   // create energy maps
00128   std::vector<EnergyMaps> modality_energy_maps;
00129   const size_t nr_modalities = modalities.size();
00130   for (size_t modality_index = 0; modality_index < nr_modalities; ++modality_index)
00131   {
00132     const QuantizedMap & quantized_map = modalities[modality_index]->getSpreadedQuantizedMap ();
00133 
00134     const size_t width = quantized_map.getWidth ();
00135     const size_t height = quantized_map.getHeight ();
00136 
00137     const unsigned char * quantized_data = quantized_map.getData ();
00138 
00139     const unsigned char nr_bins = 8;
00140     EnergyMaps energy_maps;
00141     energy_maps.initialize (width, height, nr_bins);
00142     //std::vector< unsigned char* > energy_maps(nr_bins);
00143     for (unsigned char bin_index = 0; bin_index < nr_bins; ++bin_index)
00144     {
00145       //energy_maps[bin_index] = new unsigned char[width*height];
00146       //memset (energy_maps[bin_index], 0, width*height);
00147 
00148       const unsigned char base_bit = static_cast<unsigned char> (0x1);
00149       unsigned char val0 = static_cast<unsigned char> (base_bit << bin_index); // e.g. 00100000
00150       unsigned char val1 = static_cast<unsigned char> (val0 | (base_bit << (bin_index+1)&7) | (base_bit << (bin_index+7)&7)); // e.g. 01110000
00151       unsigned char val2 = static_cast<unsigned char> (val1 | (base_bit << (bin_index+2)&7) | (base_bit << (bin_index+6)&7)); // e.g. 11111000
00152       unsigned char val3 = static_cast<unsigned char> (val2 | (base_bit << (bin_index+3)&7) | (base_bit << (bin_index+5)&7)); // e.g. 11111101
00153       for (size_t index = 0; index < width*height; ++index)
00154       {
00155         if ((val0 & quantized_data[index]) != 0)
00156           ++energy_maps (bin_index, index);
00157         if ((val1 & quantized_data[index]) != 0)
00158           ++energy_maps (bin_index, index);
00159         if ((val2 & quantized_data[index]) != 0)
00160           ++energy_maps (bin_index, index);
00161         if ((val3 & quantized_data[index]) != 0)
00162           ++energy_maps (bin_index, index);
00163       }
00164     }
00165 
00166     modality_energy_maps.push_back (energy_maps);
00167   }
00168 
00169   // create linearized maps
00170   const size_t step_size = 8;
00171   std::vector<std::vector<LinearizedMaps> > modality_linearized_maps;
00172   for (size_t modality_index = 0; modality_index < nr_modalities; ++modality_index)
00173   {
00174     const size_t width = modality_energy_maps[modality_index].getWidth ();
00175     const size_t height = modality_energy_maps[modality_index].getHeight ();
00176 
00177     std::vector<LinearizedMaps> linearized_maps;
00178     const size_t nr_bins = modality_energy_maps[modality_index].getNumOfBins ();
00179     for (size_t bin_index = 0; bin_index < nr_bins; ++bin_index)
00180     {
00181       unsigned char * energy_map = modality_energy_maps[modality_index] (bin_index);
00182 
00183       LinearizedMaps maps;
00184       maps.initialize (width, height, step_size);
00185       for (size_t map_row = 0; map_row < step_size; ++map_row)
00186       {
00187         for (size_t map_col = 0; map_col < step_size; ++map_col)
00188         {
00189           unsigned char * linearized_map = maps (map_col, map_row);
00190 
00191           // copy data from energy maps
00192           const size_t lin_width = width/step_size;
00193           const size_t lin_height = height/step_size;
00194           for (size_t row_index = 0; row_index < lin_height; ++row_index)
00195           {
00196             for (size_t col_index = 0; col_index < lin_width; ++col_index)
00197             {
00198               const size_t tmp_col_index = col_index*step_size + map_col;
00199               const size_t tmp_row_index = row_index*step_size + map_row;
00200 
00201               linearized_map[row_index*lin_width + col_index] = energy_map[tmp_row_index*width + tmp_col_index];
00202             }
00203           }
00204         }
00205       }
00206 
00207       linearized_maps.push_back (maps);
00208     }
00209 
00210     modality_linearized_maps.push_back (linearized_maps);
00211   }
00212 
00213   // compute scores for templates
00214   const size_t width = modality_energy_maps[0].getWidth ();
00215   const size_t height = modality_energy_maps[0].getHeight ();
00216   for (size_t template_index = 0; template_index < templates_.size (); ++template_index)
00217   {
00218     const size_t mem_width = width / step_size;
00219     const size_t mem_height = height / step_size;
00220     const size_t mem_size = mem_width * mem_height;
00221 
00222 #ifdef __SSE2__
00223     unsigned short * score_sums = reinterpret_cast<unsigned short*> (aligned_malloc (mem_size*sizeof(unsigned short)));
00224     unsigned char * tmp_score_sums = reinterpret_cast<unsigned char*> (aligned_malloc (mem_size*sizeof(unsigned char)));
00225     memset (score_sums, 0, mem_size*sizeof (score_sums[0]));
00226     memset (tmp_score_sums, 0, mem_size*sizeof (tmp_score_sums[0]));
00227 
00228     //__m128i * score_sums_m128i = reinterpret_cast<__m128i*> (score_sums);
00229     __m128i * tmp_score_sums_m128i = reinterpret_cast<__m128i*> (tmp_score_sums);
00230 
00231     const size_t mem_size_16 = mem_size / 16;
00232     //const size_t mem_size_mod_16 = mem_size & 15;
00233     const size_t mem_size_mod_16_base = mem_size_16 * 16;
00234 
00235     size_t max_score = 0;
00236     size_t copy_back_counter = 0;
00237     for (size_t feature_index = 0; feature_index < templates_[template_index].features.size (); ++feature_index)
00238     {
00239       const QuantizedMultiModFeature & feature = templates_[template_index].features[feature_index];
00240 
00241       for (size_t bin_index = 0; bin_index < 8; ++bin_index)
00242       {
00243         if ((feature.quantized_value & (0x1<<bin_index)) != 0)
00244         {
00245           max_score += 4;
00246 
00247           unsigned char * data = modality_linearized_maps[feature.modality_index][bin_index].getOffsetMap (feature.x, feature.y);
00248           __m128i * data_m128i = reinterpret_cast<__m128i*> (data);
00249 
00250           for (size_t mem_index = 0; mem_index < mem_size_16; ++mem_index)
00251           {
00252             __m128i aligned_data_m128i = _mm_loadu_si128 (reinterpret_cast<const __m128i*> (data_m128i + mem_index)); // SSE2
00253             //__m128i aligned_data_m128i = _mm_lddqu_si128 (reinterpret_cast<const __m128i*> (data_m128i + mem_index)); // SSE3
00254             tmp_score_sums_m128i[mem_index] = _mm_add_epi8 (tmp_score_sums_m128i[mem_index], aligned_data_m128i);
00255           }
00256           for (size_t mem_index = mem_size_mod_16_base; mem_index < mem_size; ++mem_index)
00257           {
00258             tmp_score_sums[mem_index] = static_cast<unsigned char> (tmp_score_sums[mem_index] + data[mem_index]);
00259           }
00260         }
00261       }
00262 
00263       ++copy_back_counter;
00264 
00265       //if ((feature_index & 7) == 7)
00266       //if ((feature_index & 63) == 63)
00267       if (copy_back_counter > 63) // only valid if each feature has only one bit set..
00268       {
00269         copy_back_counter = 0;
00270 
00271         for (size_t mem_index = 0; mem_index < mem_size; mem_index += 16)
00272         {
00273           score_sums[mem_index+0]  = static_cast<unsigned short> (score_sums[mem_index+0]  + tmp_score_sums[mem_index+0]);
00274           score_sums[mem_index+1]  = static_cast<unsigned short> (score_sums[mem_index+1]  + tmp_score_sums[mem_index+1]);
00275           score_sums[mem_index+2]  = static_cast<unsigned short> (score_sums[mem_index+2]  + tmp_score_sums[mem_index+2]);
00276           score_sums[mem_index+3]  = static_cast<unsigned short> (score_sums[mem_index+3]  + tmp_score_sums[mem_index+3]);
00277           score_sums[mem_index+4]  = static_cast<unsigned short> (score_sums[mem_index+4]  + tmp_score_sums[mem_index+4]);
00278           score_sums[mem_index+5]  = static_cast<unsigned short> (score_sums[mem_index+5]  + tmp_score_sums[mem_index+5]);
00279           score_sums[mem_index+6]  = static_cast<unsigned short> (score_sums[mem_index+6]  + tmp_score_sums[mem_index+6]);
00280           score_sums[mem_index+7]  = static_cast<unsigned short> (score_sums[mem_index+7]  + tmp_score_sums[mem_index+7]);
00281           score_sums[mem_index+8]  = static_cast<unsigned short> (score_sums[mem_index+8]  + tmp_score_sums[mem_index+8]);
00282           score_sums[mem_index+9]  = static_cast<unsigned short> (score_sums[mem_index+9]  + tmp_score_sums[mem_index+9]);
00283           score_sums[mem_index+10] = static_cast<unsigned short> (score_sums[mem_index+10] + tmp_score_sums[mem_index+10]);
00284           score_sums[mem_index+11] = static_cast<unsigned short> (score_sums[mem_index+11] + tmp_score_sums[mem_index+11]);
00285           score_sums[mem_index+12] = static_cast<unsigned short> (score_sums[mem_index+12] + tmp_score_sums[mem_index+12]);
00286           score_sums[mem_index+13] = static_cast<unsigned short> (score_sums[mem_index+13] + tmp_score_sums[mem_index+13]);
00287           score_sums[mem_index+14] = static_cast<unsigned short> (score_sums[mem_index+14] + tmp_score_sums[mem_index+14]);
00288           score_sums[mem_index+15] = static_cast<unsigned short> (score_sums[mem_index+15] + tmp_score_sums[mem_index+15]);
00289         }
00290         for (size_t mem_index = mem_size_mod_16_base; mem_index < mem_size; ++mem_index)
00291         {
00292           score_sums[mem_index] = static_cast<unsigned short> (score_sums[mem_index] + tmp_score_sums[mem_index]);
00293         }
00294 
00295         memset (tmp_score_sums, 0, mem_size*sizeof (tmp_score_sums[0]));
00296       }
00297     }
00298     {
00299       for (size_t mem_index = 0; mem_index < mem_size; ++mem_index)
00300       {
00301         score_sums[mem_index] = static_cast<unsigned short> (score_sums[mem_index] + tmp_score_sums[mem_index]);
00302       }
00303         
00304       memset (tmp_score_sums, 0, mem_size*sizeof (tmp_score_sums[0]));
00305     }
00306 #else
00307     unsigned short * score_sums = new unsigned short[mem_size];
00308     //unsigned char * score_sums = new unsigned char[mem_size];
00309     memset (score_sums, 0, mem_size*sizeof (score_sums[0]));
00310 
00311     size_t max_score = 0;
00312     for (size_t feature_index = 0; feature_index < templates_[template_index].features.size (); ++feature_index)
00313     {
00314       const QuantizedMultiModFeature & feature = templates_[template_index].features[feature_index];
00315 
00316       //feature.modality_index;
00317       for (size_t bin_index = 0; bin_index < 8; ++bin_index)
00318       {
00319         if ((feature.quantized_value & (0x1<<bin_index)) != 0)
00320         {
00321           max_score += 4;
00322 
00323           unsigned char * data = modality_linearized_maps[feature.modality_index][bin_index].getOffsetMap (feature.x, feature.y);
00324           for (size_t mem_index = 0; mem_index < mem_size; ++mem_index)
00325           {
00326             score_sums[mem_index] += data[mem_index];
00327           }
00328         }
00329       }
00330     }
00331 #endif
00332 
00333     const float inv_max_score = 1.0f / float (max_score);
00334     
00335     size_t max_value = 0;
00336     size_t max_index = 0;
00337     for (size_t mem_index = 0; mem_index < mem_size; ++mem_index)
00338     {
00339       if (score_sums[mem_index] > max_value) 
00340       {
00341         max_value = score_sums[mem_index];
00342         max_index = mem_index;
00343       }
00344     }
00345 
00346     const size_t max_col_index = (max_index % mem_width) * step_size;
00347     const size_t max_row_index = (max_index / mem_width) * step_size;
00348 
00349     LINEMODDetection detection;
00350     detection.x = static_cast<int> (max_col_index);
00351     detection.y = static_cast<int> (max_row_index);
00352     detection.template_id = static_cast<int> (template_index);
00353     detection.score = static_cast<float> (max_value) * inv_max_score;
00354 
00355     detections.push_back (detection);
00356 
00357     delete[] score_sums;
00358   }
00359 
00360   // release data
00361   for (size_t modality_index = 0; modality_index < modality_linearized_maps.size (); ++modality_index)
00362   {
00363     modality_energy_maps[modality_index].releaseAll ();
00364     for (size_t bin_index = 0; bin_index < modality_linearized_maps[modality_index].size (); ++bin_index)
00365       modality_linearized_maps[modality_index][bin_index].releaseAll ();
00366   }
00367 }
00368 
00370 void
00371 pcl::LINEMOD::detectTemplates (const std::vector<QuantizableModality*> & modalities, std::vector<LINEMODDetection> & detections) const
00372 {
00373   // create energy maps
00374   std::vector<EnergyMaps> modality_energy_maps;
00375 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00376   std::vector<EnergyMaps> modality_energy_maps_1;
00377   std::vector<EnergyMaps> modality_energy_maps_2;
00378   std::vector<EnergyMaps> modality_energy_maps_3;
00379 #endif
00380   const size_t nr_modalities = modalities.size();
00381   for (size_t modality_index = 0; modality_index < nr_modalities; ++modality_index)
00382   {
00383     const QuantizedMap & quantized_map = modalities[modality_index]->getSpreadedQuantizedMap ();
00384 
00385     const size_t width = quantized_map.getWidth ();
00386     const size_t height = quantized_map.getHeight ();
00387 
00388     const unsigned char * quantized_data = quantized_map.getData ();
00389 
00390     const int nr_bins = 8;
00391     EnergyMaps energy_maps;
00392     energy_maps.initialize (width, height, nr_bins);
00393 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00394     EnergyMaps energy_maps_1;
00395     EnergyMaps energy_maps_2;
00396     EnergyMaps energy_maps_3;
00397     energy_maps_1.initialize (width, height, nr_bins);
00398     energy_maps_2.initialize (width, height, nr_bins);
00399     energy_maps_3.initialize (width, height, nr_bins);
00400 #endif
00401     //std::vector< unsigned char* > energy_maps(nr_bins);
00402     for (int bin_index = 0; bin_index < nr_bins; ++bin_index)
00403     {
00404       //energy_maps[bin_index] = new unsigned char[width*height];
00405       //memset (energy_maps[bin_index], 0, width*height);
00406 
00407       const unsigned char base_bit = static_cast<unsigned char> (0x1);
00408       unsigned char val0 = static_cast<unsigned char> (base_bit << bin_index); // e.g. 00100000
00409       unsigned char val1 = static_cast<unsigned char> (val0 | (base_bit << ((bin_index+1)%8)) | (base_bit << ((bin_index+7)%8))); // e.g. 01110000
00410       unsigned char val2 = static_cast<unsigned char> (val1 | (base_bit << ((bin_index+2)%8)) | (base_bit << ((bin_index+6)%8))); // e.g. 11111000
00411       unsigned char val3 = static_cast<unsigned char> (val2 | (base_bit << ((bin_index+3)%8)) | (base_bit << ((bin_index+5)%8))); // e.g. 11111101
00412       for (size_t index = 0; index < width*height; ++index)
00413       {
00414         if ((val0 & quantized_data[index]) != 0)
00415           ++energy_maps (bin_index, index);
00416 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00417         if ((val1 & quantized_data[index]) != 0)
00418           ++energy_maps_1 (bin_index, index);
00419         if ((val2 & quantized_data[index]) != 0)
00420           ++energy_maps_2 (bin_index, index);
00421         if ((val3 & quantized_data[index]) != 0)
00422           ++energy_maps_3 (bin_index, index);
00423 #else
00424         if ((val1 & quantized_data[index]) != 0)
00425           ++energy_maps (bin_index, index);
00426         if ((val2 & quantized_data[index]) != 0)
00427           ++energy_maps (bin_index, index);
00428         if ((val3 & quantized_data[index]) != 0)
00429           ++energy_maps (bin_index, index);
00430 #endif
00431       }
00432     }
00433 
00434     modality_energy_maps.push_back (energy_maps);
00435 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00436     modality_energy_maps_1.push_back (energy_maps_1);
00437     modality_energy_maps_2.push_back (energy_maps_2);
00438     modality_energy_maps_3.push_back (energy_maps_3);
00439 #endif
00440   }
00441 
00442   // create linearized maps
00443   const size_t step_size = 8;
00444   std::vector<std::vector<LinearizedMaps> > modality_linearized_maps;
00445 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00446   std::vector<std::vector<LinearizedMaps> > modality_linearized_maps_1;
00447   std::vector<std::vector<LinearizedMaps> > modality_linearized_maps_2;
00448   std::vector<std::vector<LinearizedMaps> > modality_linearized_maps_3;
00449 #endif
00450   for (size_t modality_index = 0; modality_index < nr_modalities; ++modality_index)
00451   {
00452     const size_t width = modality_energy_maps[modality_index].getWidth ();
00453     const size_t height = modality_energy_maps[modality_index].getHeight ();
00454 
00455     std::vector<LinearizedMaps> linearized_maps;
00456 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00457     std::vector<LinearizedMaps> linearized_maps_1;
00458     std::vector<LinearizedMaps> linearized_maps_2;
00459     std::vector<LinearizedMaps> linearized_maps_3;
00460 #endif
00461     const size_t nr_bins = modality_energy_maps[modality_index].getNumOfBins ();
00462     for (size_t bin_index = 0; bin_index < nr_bins; ++bin_index)
00463     {
00464       unsigned char * energy_map = modality_energy_maps[modality_index] (bin_index);
00465 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00466       unsigned char * energy_map_1 = modality_energy_maps_1[modality_index] (bin_index);
00467       unsigned char * energy_map_2 = modality_energy_maps_2[modality_index] (bin_index);
00468       unsigned char * energy_map_3 = modality_energy_maps_3[modality_index] (bin_index);
00469 #endif
00470 
00471       LinearizedMaps maps;
00472       maps.initialize (width, height, step_size);
00473 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00474       LinearizedMaps maps_1;
00475       LinearizedMaps maps_2;
00476       LinearizedMaps maps_3;
00477       maps_1.initialize (width, height, step_size);
00478       maps_2.initialize (width, height, step_size);
00479       maps_3.initialize (width, height, step_size);
00480 #endif
00481       for (size_t map_row = 0; map_row < step_size; ++map_row)
00482       {
00483         for (size_t map_col = 0; map_col < step_size; ++map_col)
00484         {
00485           unsigned char * linearized_map = maps (map_col, map_row);
00486 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00487           unsigned char * linearized_map_1 = maps_1 (map_col, map_row);
00488           unsigned char * linearized_map_2 = maps_2 (map_col, map_row);
00489           unsigned char * linearized_map_3 = maps_3 (map_col, map_row);
00490 #endif
00491 
00492           // copy data from energy maps
00493           const size_t lin_width = width/step_size;
00494           const size_t lin_height = height/step_size;
00495           for (size_t row_index = 0; row_index < lin_height; ++row_index)
00496           {
00497             for (size_t col_index = 0; col_index < lin_width; ++col_index)
00498             {
00499               const size_t tmp_col_index = col_index*step_size + map_col;
00500               const size_t tmp_row_index = row_index*step_size + map_row;
00501 
00502               linearized_map[row_index*lin_width + col_index] = energy_map[tmp_row_index*width + tmp_col_index];
00503 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00504               linearized_map_1[row_index*lin_width + col_index] = energy_map_1[tmp_row_index*width + tmp_col_index];
00505               linearized_map_2[row_index*lin_width + col_index] = energy_map_2[tmp_row_index*width + tmp_col_index];
00506               linearized_map_3[row_index*lin_width + col_index] = energy_map_3[tmp_row_index*width + tmp_col_index];
00507 #endif
00508             }
00509           }
00510         }
00511       }
00512 
00513       linearized_maps.push_back (maps);
00514 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00515       linearized_maps_1.push_back (maps_1);
00516       linearized_maps_2.push_back (maps_2);
00517       linearized_maps_3.push_back (maps_3);
00518 #endif
00519     }
00520 
00521     modality_linearized_maps.push_back (linearized_maps);
00522 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00523     modality_linearized_maps_1.push_back (linearized_maps_1);
00524     modality_linearized_maps_2.push_back (linearized_maps_2);
00525     modality_linearized_maps_3.push_back (linearized_maps_3);
00526 #endif
00527   }
00528 
00529   // compute scores for templates
00530   const size_t width = modality_energy_maps[0].getWidth ();
00531   const size_t height = modality_energy_maps[0].getHeight ();
00532   for (size_t template_index = 0; template_index < templates_.size (); ++template_index)
00533   {
00534     const size_t mem_width = width / step_size;
00535     const size_t mem_height = height / step_size;
00536     const size_t mem_size = mem_width * mem_height;
00537 
00538 #ifdef __SSE2__
00539     unsigned short * score_sums = reinterpret_cast<unsigned short*> (aligned_malloc (mem_size*sizeof(unsigned short)));
00540     unsigned char * tmp_score_sums = reinterpret_cast<unsigned char*> (aligned_malloc (mem_size*sizeof(unsigned char)));
00541     memset (score_sums, 0, mem_size*sizeof (score_sums[0]));
00542     memset (tmp_score_sums, 0, mem_size*sizeof (tmp_score_sums[0]));
00543 
00544     //__m128i * score_sums_m128i = reinterpret_cast<__m128i*> (score_sums);
00545     __m128i * tmp_score_sums_m128i = reinterpret_cast<__m128i*> (tmp_score_sums);
00546 
00547     const size_t mem_size_16 = mem_size / 16;
00548     //const size_t mem_size_mod_16 = mem_size & 15;
00549     const size_t mem_size_mod_16_base = mem_size_16 * 16;
00550 
00551     int max_score = 0;
00552     size_t copy_back_counter = 0;
00553     for (size_t feature_index = 0; feature_index < templates_[template_index].features.size (); ++feature_index)
00554     {
00555       const QuantizedMultiModFeature & feature = templates_[template_index].features[feature_index];
00556 
00557       for (size_t bin_index = 0; bin_index < 8; ++bin_index)
00558       {
00559         if ((feature.quantized_value & (0x1<<bin_index)) != 0)
00560         {
00561           max_score += 4;
00562 
00563           unsigned char * data = modality_linearized_maps[feature.modality_index][bin_index].getOffsetMap (feature.x, feature.y);
00564           __m128i * data_m128i = reinterpret_cast<__m128i*> (data);
00565 
00566           for (size_t mem_index = 0; mem_index < mem_size_16; ++mem_index)
00567           {
00568             __m128i aligned_data_m128i = _mm_loadu_si128 (reinterpret_cast<const __m128i*> (data_m128i + mem_index)); // SSE2
00569             //__m128i aligned_data_m128i = _mm_lddqu_si128 (reinterpret_cast<const __m128i*> (data_m128i + mem_index)); // SSE3
00570             tmp_score_sums_m128i[mem_index] = _mm_add_epi8 (tmp_score_sums_m128i[mem_index], aligned_data_m128i);
00571           }
00572           for (size_t mem_index = mem_size_mod_16_base; mem_index < mem_size; ++mem_index)
00573           {
00574             tmp_score_sums[mem_index] = static_cast<unsigned char> (tmp_score_sums[mem_index] + data[mem_index]);
00575           }
00576         }
00577       }
00578 
00579       ++copy_back_counter;
00580 
00581       //if ((feature_index & 7) == 7)
00582       //if ((feature_index & 63) == 63)
00583       if (copy_back_counter > 63) // only valid if each feature has only one bit set..
00584       {
00585         copy_back_counter = 0;
00586 
00587         for (size_t mem_index = 0; mem_index < mem_size; mem_index += 16)
00588         {
00589           score_sums[mem_index+0]  = static_cast<unsigned short> (score_sums[mem_index+0]  + tmp_score_sums[mem_index+0]);
00590           score_sums[mem_index+1]  = static_cast<unsigned short> (score_sums[mem_index+1]  + tmp_score_sums[mem_index+1]);
00591           score_sums[mem_index+2]  = static_cast<unsigned short> (score_sums[mem_index+2]  + tmp_score_sums[mem_index+2]);
00592           score_sums[mem_index+3]  = static_cast<unsigned short> (score_sums[mem_index+3]  + tmp_score_sums[mem_index+3]);
00593           score_sums[mem_index+4]  = static_cast<unsigned short> (score_sums[mem_index+4]  + tmp_score_sums[mem_index+4]);
00594           score_sums[mem_index+5]  = static_cast<unsigned short> (score_sums[mem_index+5]  + tmp_score_sums[mem_index+5]);
00595           score_sums[mem_index+6]  = static_cast<unsigned short> (score_sums[mem_index+6]  + tmp_score_sums[mem_index+6]);
00596           score_sums[mem_index+7]  = static_cast<unsigned short> (score_sums[mem_index+7]  + tmp_score_sums[mem_index+7]);
00597           score_sums[mem_index+8]  = static_cast<unsigned short> (score_sums[mem_index+8]  + tmp_score_sums[mem_index+8]);
00598           score_sums[mem_index+9]  = static_cast<unsigned short> (score_sums[mem_index+9]  + tmp_score_sums[mem_index+9]);
00599           score_sums[mem_index+10] = static_cast<unsigned short> (score_sums[mem_index+10] + tmp_score_sums[mem_index+10]);
00600           score_sums[mem_index+11] = static_cast<unsigned short> (score_sums[mem_index+11] + tmp_score_sums[mem_index+11]);
00601           score_sums[mem_index+12] = static_cast<unsigned short> (score_sums[mem_index+12] + tmp_score_sums[mem_index+12]);
00602           score_sums[mem_index+13] = static_cast<unsigned short> (score_sums[mem_index+13] + tmp_score_sums[mem_index+13]);
00603           score_sums[mem_index+14] = static_cast<unsigned short> (score_sums[mem_index+14] + tmp_score_sums[mem_index+14]);
00604           score_sums[mem_index+15] = static_cast<unsigned short> (score_sums[mem_index+15] + tmp_score_sums[mem_index+15]);
00605         }
00606         for (size_t mem_index = mem_size_mod_16_base; mem_index < mem_size; ++mem_index)
00607         {
00608           score_sums[mem_index] = static_cast<unsigned short> (score_sums[mem_index] + tmp_score_sums[mem_index]);
00609         }
00610 
00611         memset (tmp_score_sums, 0, mem_size*sizeof (tmp_score_sums[0]));
00612       }
00613     }
00614     {
00615       for (size_t mem_index = 0; mem_index < mem_size; ++mem_index)
00616       {
00617         score_sums[mem_index] = static_cast<unsigned short> (score_sums[mem_index] + tmp_score_sums[mem_index]);
00618       }
00619         
00620       memset (tmp_score_sums, 0, mem_size*sizeof (tmp_score_sums[0]));
00621     }
00622 #else
00623     unsigned short * score_sums = new unsigned short[mem_size];
00624     //unsigned char * score_sums = new unsigned char[mem_size];
00625     memset (score_sums, 0, mem_size*sizeof (score_sums[0]));
00626 
00627 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00628     unsigned short * score_sums_1 = new unsigned short[mem_size];
00629     unsigned short * score_sums_2 = new unsigned short[mem_size];
00630     unsigned short * score_sums_3 = new unsigned short[mem_size];
00631     memset (score_sums_1, 0, mem_size*sizeof (score_sums_1[0]));
00632     memset (score_sums_2, 0, mem_size*sizeof (score_sums_2[0]));
00633     memset (score_sums_3, 0, mem_size*sizeof (score_sums_3[0]));
00634 #endif
00635 
00636     int max_score = 0;
00637     for (size_t feature_index = 0; feature_index < templates_[template_index].features.size (); ++feature_index)
00638     {
00639       const QuantizedMultiModFeature & feature = templates_[template_index].features[feature_index];
00640 
00641       //feature.modality_index;
00642       for (size_t bin_index = 0; bin_index < 8; ++bin_index)
00643       {
00644         if ((feature.quantized_value & (0x1<<bin_index)) != 0)
00645         {
00646 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00647           ++max_score;
00648 
00649           unsigned char * data = modality_linearized_maps[feature.modality_index][bin_index].getOffsetMap (feature.x, feature.y);
00650           unsigned char * data_1 = modality_linearized_maps_1[feature.modality_index][bin_index].getOffsetMap (feature.x, feature.y);
00651           unsigned char * data_2 = modality_linearized_maps_2[feature.modality_index][bin_index].getOffsetMap (feature.x, feature.y);
00652           unsigned char * data_3 = modality_linearized_maps_3[feature.modality_index][bin_index].getOffsetMap (feature.x, feature.y);
00653           for (size_t mem_index = 0; mem_index < mem_size; ++mem_index)
00654           {
00655             score_sums[mem_index] += data[mem_index];
00656             score_sums_1[mem_index] += data_1[mem_index];
00657             score_sums_2[mem_index] += data_2[mem_index];
00658             score_sums_3[mem_index] += data_3[mem_index];
00659           }
00660 #else
00661           max_score += 4;
00662 
00663           unsigned char * data = modality_linearized_maps[feature.modality_index][bin_index].getOffsetMap (feature.x, feature.y);
00664           for (size_t mem_index = 0; mem_index < mem_size; ++mem_index)
00665           {
00666             score_sums[mem_index] += data[mem_index];
00667           }
00668 #endif
00669         }
00670       }
00671     }
00672 #endif
00673 
00674     const float inv_max_score = 1.0f / float (max_score);
00675 
00676     // we compute a new threshold based on the threshold supplied by the user;
00677     // this is due to the use of the cosine approx. in the response computation;
00678 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00679     const float raw_threshold = (4.0f * float (max_score) / 2.0f + template_threshold_ * (4.0f * float (max_score) / 2.0f));
00680 #else
00681     const float raw_threshold = (float (max_score) / 2.0f + template_threshold_ * (float (max_score) / 2.0f));
00682 #endif
00683 
00684     //int max_value = 0;
00685     //size_t max_index = 0;
00686     for (size_t mem_index = 0; mem_index < mem_size; ++mem_index)
00687     {
00688       //const float score = score_sums[mem_index] * inv_max_score;
00689 
00690 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00691       const float raw_score = score_sums[mem_index] 
00692         + score_sums_1[mem_index]
00693         + score_sums_2[mem_index]
00694         + score_sums_3[mem_index];
00695 
00696       const float score = 2.0f * static_cast<float> (raw_score) * 0.25f * inv_max_score - 1.0f;
00697 #else
00698       const float raw_score = score_sums[mem_index];
00699 
00700       const float score = 2.0f * static_cast<float> (raw_score) * inv_max_score - 1.0f;
00701 #endif
00702 
00703 
00704       //if (score > template_threshold_) 
00705       if (raw_score > raw_threshold) 
00706       {
00707         const size_t mem_col_index = (mem_index % mem_width);
00708         const size_t mem_row_index = (mem_index / mem_width);
00709 
00710         if (use_non_max_suppression_)
00711         {
00712           bool is_local_max = true;
00713           for (size_t sup_row_index = mem_row_index-1; sup_row_index <= mem_row_index+1 && is_local_max; ++sup_row_index)
00714           {
00715             if (sup_row_index >= mem_height)
00716               continue;
00717 
00718             for (size_t sup_col_index = mem_col_index-1; sup_col_index <= mem_col_index+1; ++sup_col_index)
00719             {
00720               if (sup_col_index >= mem_width)
00721                 continue;
00722 
00723               if (score_sums[mem_index] < score_sums[sup_row_index*mem_width + sup_col_index])
00724               {
00725                 is_local_max = false;
00726                 break;
00727               }
00728             } 
00729           }
00730 
00731           if (!is_local_max)
00732             continue;
00733         }
00734 
00735         LINEMODDetection detection;
00736 
00737         if (average_detections_)
00738         {
00739           size_t average_col = 0;
00740           size_t average_row = 0;
00741           size_t sum = 0;
00742 
00743           for (size_t sup_row_index = mem_row_index-1; sup_row_index <= mem_row_index+1; ++sup_row_index)
00744           {
00745             if (sup_row_index >= mem_height)
00746               continue;
00747 
00748             for (size_t sup_col_index = mem_col_index-1; sup_col_index <= mem_col_index+1; ++sup_col_index)
00749             {
00750               if (sup_col_index >= mem_width)
00751                 continue;
00752 
00753               const size_t weight = static_cast<size_t> (score_sums[sup_row_index*mem_width + sup_col_index]);
00754               average_col += sup_col_index * weight;
00755               average_row += sup_row_index * weight;
00756               sum += weight;
00757             } 
00758           }
00759 
00760           average_col *= step_size;
00761           average_row *= step_size;
00762 
00763           average_col /= sum;
00764           average_row /= sum;
00765 
00766           //std::cerr << mem_col_index << ", " << mem_row_index << " - " << average_col << ", " << average_row << std::endl;
00767           std::cerr << mem_col_index*step_size << ", " << mem_row_index*step_size << " - " << average_col << ", " << average_row << std::endl;
00768 
00769           const size_t detection_col_index = average_col;// * step_size;
00770           const size_t detection_row_index = average_row;// * step_size;
00771 
00772           detection.x = static_cast<int> (detection_col_index);
00773           detection.y = static_cast<int> (detection_row_index);
00774         }
00775         else
00776         {
00777           const size_t detection_col_index = mem_col_index * step_size;
00778           const size_t detection_row_index = mem_row_index * step_size;
00779 
00780           detection.x = static_cast<int> (detection_col_index);
00781           detection.y = static_cast<int> (detection_row_index);
00782         }
00783 
00784         detection.template_id = static_cast<int> (template_index);
00785         detection.score = score;
00786 
00787 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00788         std::cerr << "score: " << static_cast<float> (raw_score) * inv_max_score * 0.25f << ", " << (2.0f * static_cast<float> (raw_score) * inv_max_score - 1.0f) << std::endl;
00789         std::cerr << "score0: " << static_cast<float> (score_sums[mem_index]) * inv_max_score << ", " << (2.0f * static_cast<float> (score_sums[mem_index]) * inv_max_score - 1.0f) << std::endl;
00790         std::cerr << "score1: " << static_cast<float> (score_sums_1[mem_index]) * inv_max_score << ", " << (2.0f * static_cast<float> (score_sums_1[mem_index]) * inv_max_score - 1.0f) << std::endl;
00791         std::cerr << "score2: " << static_cast<float> (score_sums_2[mem_index]) * inv_max_score << ", " << (2.0f * static_cast<float> (score_sums_2[mem_index]) * inv_max_score - 1.0f) << std::endl;
00792         std::cerr << "score3: " << static_cast<float> (score_sums_3[mem_index]) * inv_max_score << ", " << (2.0f * static_cast<float> (score_sums_3[mem_index]) * inv_max_score - 1.0f) << std::endl;
00793 #endif
00794 
00795 
00796         detections.push_back (detection);
00797       }
00798     }
00799 
00800 #ifdef __SSE2__
00801     aligned_free (score_sums);
00802     aligned_free (tmp_score_sums);
00803 #else
00804     delete[] score_sums;
00805 #endif
00806 
00807 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00808     delete[] score_sums_1;
00809     delete[] score_sums_2;
00810     delete[] score_sums_3;
00811 #endif
00812   }
00813 
00814   // release data
00815   for (size_t modality_index = 0; modality_index < modality_linearized_maps.size (); ++modality_index)
00816   {
00817     modality_energy_maps[modality_index].releaseAll ();
00818 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00819     modality_energy_maps_1[modality_index].releaseAll ();
00820     modality_energy_maps_2[modality_index].releaseAll ();
00821     modality_energy_maps_3[modality_index].releaseAll ();
00822 #endif
00823     for (size_t bin_index = 0; bin_index < modality_linearized_maps[modality_index].size (); ++bin_index)
00824     {
00825       modality_linearized_maps[modality_index][bin_index].releaseAll ();
00826 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00827       modality_linearized_maps_1[modality_index][bin_index].releaseAll ();
00828       modality_linearized_maps_2[modality_index][bin_index].releaseAll ();
00829       modality_linearized_maps_3[modality_index][bin_index].releaseAll ();
00830 #endif
00831     }
00832   }
00833 }
00834 
00836 void
00837 pcl::LINEMOD::detectTemplatesSemiScaleInvariant (
00838     const std::vector<QuantizableModality*> & modalities,
00839     std::vector<LINEMODDetection> & detections,
00840     const float min_scale,
00841     const float max_scale,
00842     const float scale_multiplier) const
00843 {
00844   // create energy maps
00845   std::vector<EnergyMaps> modality_energy_maps;
00846 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00847   std::vector<EnergyMaps> modality_energy_maps_1;
00848   std::vector<EnergyMaps> modality_energy_maps_2;
00849   std::vector<EnergyMaps> modality_energy_maps_3;
00850 #endif
00851   const size_t nr_modalities = modalities.size();
00852   for (size_t modality_index = 0; modality_index < nr_modalities; ++modality_index)
00853   {
00854     const QuantizedMap & quantized_map = modalities[modality_index]->getSpreadedQuantizedMap ();
00855 
00856     const size_t width = quantized_map.getWidth ();
00857     const size_t height = quantized_map.getHeight ();
00858 
00859     const unsigned char * quantized_data = quantized_map.getData ();
00860 
00861     const int nr_bins = 8;
00862     EnergyMaps energy_maps;
00863     energy_maps.initialize (width, height, nr_bins);
00864 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00865     EnergyMaps energy_maps_1;
00866     EnergyMaps energy_maps_2;
00867     EnergyMaps energy_maps_3;
00868     energy_maps_1.initialize (width, height, nr_bins);
00869     energy_maps_2.initialize (width, height, nr_bins);
00870     energy_maps_3.initialize (width, height, nr_bins);
00871 #endif
00872     //std::vector< unsigned char* > energy_maps(nr_bins);
00873     for (int bin_index = 0; bin_index < nr_bins; ++bin_index)
00874     {
00875       //energy_maps[bin_index] = new unsigned char[width*height];
00876       //memset (energy_maps[bin_index], 0, width*height);
00877 
00878       const unsigned char base_bit = static_cast<unsigned char> (0x1);
00879       unsigned char val0 = static_cast<unsigned char> (base_bit << bin_index); // e.g. 00100000
00880       unsigned char val1 = static_cast<unsigned char> (val0 | (base_bit << ((bin_index+1)%8)) | (base_bit << ((bin_index+7)%8))); // e.g. 01110000
00881       unsigned char val2 = static_cast<unsigned char> (val1 | (base_bit << ((bin_index+2)%8)) | (base_bit << ((bin_index+6)%8))); // e.g. 11111000
00882       unsigned char val3 = static_cast<unsigned char> (val2 | (base_bit << ((bin_index+3)%8)) | (base_bit << ((bin_index+5)%8))); // e.g. 11111101
00883       for (size_t index = 0; index < width*height; ++index)
00884       {
00885         if ((val0 & quantized_data[index]) != 0)
00886           ++energy_maps (bin_index, index);
00887 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00888         if ((val1 & quantized_data[index]) != 0)
00889           ++energy_maps_1 (bin_index, index);
00890         if ((val2 & quantized_data[index]) != 0)
00891           ++energy_maps_2 (bin_index, index);
00892         if ((val3 & quantized_data[index]) != 0)
00893           ++energy_maps_3 (bin_index, index);
00894 #else
00895         if ((val1 & quantized_data[index]) != 0)
00896           ++energy_maps (bin_index, index);
00897         if ((val2 & quantized_data[index]) != 0)
00898           ++energy_maps (bin_index, index);
00899         if ((val3 & quantized_data[index]) != 0)
00900           ++energy_maps (bin_index, index);
00901 #endif
00902       }
00903     }
00904 
00905     modality_energy_maps.push_back (energy_maps);
00906 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00907     modality_energy_maps_1.push_back (energy_maps_1);
00908     modality_energy_maps_2.push_back (energy_maps_2);
00909     modality_energy_maps_3.push_back (energy_maps_3);
00910 #endif
00911   }
00912 
00913   // create linearized maps
00914   const size_t step_size = 8;
00915   std::vector<std::vector<LinearizedMaps> > modality_linearized_maps;
00916 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00917   std::vector<std::vector<LinearizedMaps> > modality_linearized_maps_1;
00918   std::vector<std::vector<LinearizedMaps> > modality_linearized_maps_2;
00919   std::vector<std::vector<LinearizedMaps> > modality_linearized_maps_3;
00920 #endif
00921   for (size_t modality_index = 0; modality_index < nr_modalities; ++modality_index)
00922   {
00923     const size_t width = modality_energy_maps[modality_index].getWidth ();
00924     const size_t height = modality_energy_maps[modality_index].getHeight ();
00925 
00926     std::vector<LinearizedMaps> linearized_maps;
00927 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00928     std::vector<LinearizedMaps> linearized_maps_1;
00929     std::vector<LinearizedMaps> linearized_maps_2;
00930     std::vector<LinearizedMaps> linearized_maps_3;
00931 #endif
00932     const size_t nr_bins = modality_energy_maps[modality_index].getNumOfBins ();
00933     for (size_t bin_index = 0; bin_index < nr_bins; ++bin_index)
00934     {
00935       unsigned char * energy_map = modality_energy_maps[modality_index] (bin_index);
00936 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00937       unsigned char * energy_map_1 = modality_energy_maps_1[modality_index] (bin_index);
00938       unsigned char * energy_map_2 = modality_energy_maps_2[modality_index] (bin_index);
00939       unsigned char * energy_map_3 = modality_energy_maps_3[modality_index] (bin_index);
00940 #endif
00941 
00942       LinearizedMaps maps;
00943       maps.initialize (width, height, step_size);
00944 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00945       LinearizedMaps maps_1;
00946       LinearizedMaps maps_2;
00947       LinearizedMaps maps_3;
00948       maps_1.initialize (width, height, step_size);
00949       maps_2.initialize (width, height, step_size);
00950       maps_3.initialize (width, height, step_size);
00951 #endif
00952       for (size_t map_row = 0; map_row < step_size; ++map_row)
00953       {
00954         for (size_t map_col = 0; map_col < step_size; ++map_col)
00955         {
00956           unsigned char * linearized_map = maps (map_col, map_row);
00957 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00958           unsigned char * linearized_map_1 = maps_1 (map_col, map_row);
00959           unsigned char * linearized_map_2 = maps_2 (map_col, map_row);
00960           unsigned char * linearized_map_3 = maps_3 (map_col, map_row);
00961 #endif
00962 
00963           // copy data from energy maps
00964           const size_t lin_width = width/step_size;
00965           const size_t lin_height = height/step_size;
00966           for (size_t row_index = 0; row_index < lin_height; ++row_index)
00967           {
00968             for (size_t col_index = 0; col_index < lin_width; ++col_index)
00969             {
00970               const size_t tmp_col_index = col_index*step_size + map_col;
00971               const size_t tmp_row_index = row_index*step_size + map_row;
00972 
00973               linearized_map[row_index*lin_width + col_index] = energy_map[tmp_row_index*width + tmp_col_index];
00974 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00975               linearized_map_1[row_index*lin_width + col_index] = energy_map_1[tmp_row_index*width + tmp_col_index];
00976               linearized_map_2[row_index*lin_width + col_index] = energy_map_2[tmp_row_index*width + tmp_col_index];
00977               linearized_map_3[row_index*lin_width + col_index] = energy_map_3[tmp_row_index*width + tmp_col_index];
00978 #endif
00979             }
00980           }
00981         }
00982       }
00983 
00984       linearized_maps.push_back (maps);
00985 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00986       linearized_maps_1.push_back (maps_1);
00987       linearized_maps_2.push_back (maps_2);
00988       linearized_maps_3.push_back (maps_3);
00989 #endif
00990     }
00991 
00992     modality_linearized_maps.push_back (linearized_maps);
00993 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
00994     modality_linearized_maps_1.push_back (linearized_maps_1);
00995     modality_linearized_maps_2.push_back (linearized_maps_2);
00996     modality_linearized_maps_3.push_back (linearized_maps_3);
00997 #endif
00998   }
00999 
01000   // compute scores for templates
01001   const size_t width = modality_energy_maps[0].getWidth ();
01002   const size_t height = modality_energy_maps[0].getHeight ();
01003   for (size_t template_index = 0; template_index < templates_.size (); ++template_index)
01004   {
01005     const size_t mem_width = width / step_size;
01006     const size_t mem_height = height / step_size;
01007     const size_t mem_size = mem_width * mem_height;
01008 
01009     for (float scale = min_scale; scale <= max_scale; scale *= scale_multiplier)
01010     {
01011 #ifdef __SSE2__
01012       unsigned short * score_sums = reinterpret_cast<unsigned short*> (aligned_malloc (mem_size*sizeof(unsigned short)));
01013       unsigned char * tmp_score_sums = reinterpret_cast<unsigned char*> (aligned_malloc (mem_size*sizeof(unsigned char)));
01014       memset (score_sums, 0, mem_size*sizeof (score_sums[0]));
01015       memset (tmp_score_sums, 0, mem_size*sizeof (tmp_score_sums[0]));
01016 
01017       //__m128i * score_sums_m128i = reinterpret_cast<__m128i*> (score_sums);
01018       __m128i * tmp_score_sums_m128i = reinterpret_cast<__m128i*> (tmp_score_sums);
01019 
01020       const size_t mem_size_16 = mem_size / 16;
01021       //const size_t mem_size_mod_16 = mem_size & 15;
01022       const size_t mem_size_mod_16_base = mem_size_16 * 16;
01023 
01024       int max_score = 0;
01025       size_t copy_back_counter = 0;
01026       for (size_t feature_index = 0; feature_index < templates_[template_index].features.size (); ++feature_index)
01027       {
01028         const QuantizedMultiModFeature & feature = templates_[template_index].features[feature_index];
01029 
01030         for (size_t bin_index = 0; bin_index < 8; ++bin_index)
01031         {
01032           if ((feature.quantized_value & (0x1<<bin_index)) != 0)
01033           {
01034             max_score += 4;
01035 
01036             unsigned char *data = modality_linearized_maps[feature.modality_index][bin_index].getOffsetMap (
01037                 size_t (float (feature.x) * scale), size_t (float (feature.y) * scale));
01038             __m128i * data_m128i = reinterpret_cast<__m128i*> (data);
01039 
01040             for (size_t mem_index = 0; mem_index < mem_size_16; ++mem_index)
01041             {
01042               __m128i aligned_data_m128i = _mm_loadu_si128 (reinterpret_cast<const __m128i*> (data_m128i + mem_index)); // SSE2
01043               //__m128i aligned_data_m128i = _mm_lddqu_si128 (reinterpret_cast<const __m128i*> (data_m128i + mem_index)); // SSE3
01044               tmp_score_sums_m128i[mem_index] = _mm_add_epi8 (tmp_score_sums_m128i[mem_index], aligned_data_m128i);
01045             }
01046             for (size_t mem_index = mem_size_mod_16_base; mem_index < mem_size; ++mem_index)
01047             {
01048               tmp_score_sums[mem_index] = static_cast<unsigned char> (tmp_score_sums[mem_index] + data[mem_index]);
01049             }
01050           }
01051         }
01052 
01053         ++copy_back_counter;
01054 
01055         //if ((feature_index & 7) == 7)
01056         //if ((feature_index & 63) == 63)
01057         if (copy_back_counter > 63) // only valid if each feature has only one bit set..
01058         {
01059           copy_back_counter = 0;
01060 
01061           for (size_t mem_index = 0; mem_index < mem_size; mem_index += 16)
01062           {
01063             score_sums[mem_index+0]  = static_cast<unsigned short> (score_sums[mem_index+0]  + tmp_score_sums[mem_index+0]);
01064             score_sums[mem_index+1]  = static_cast<unsigned short> (score_sums[mem_index+1]  + tmp_score_sums[mem_index+1]);
01065             score_sums[mem_index+2]  = static_cast<unsigned short> (score_sums[mem_index+2]  + tmp_score_sums[mem_index+2]);
01066             score_sums[mem_index+3]  = static_cast<unsigned short> (score_sums[mem_index+3]  + tmp_score_sums[mem_index+3]);
01067             score_sums[mem_index+4]  = static_cast<unsigned short> (score_sums[mem_index+4]  + tmp_score_sums[mem_index+4]);
01068             score_sums[mem_index+5]  = static_cast<unsigned short> (score_sums[mem_index+5]  + tmp_score_sums[mem_index+5]);
01069             score_sums[mem_index+6]  = static_cast<unsigned short> (score_sums[mem_index+6]  + tmp_score_sums[mem_index+6]);
01070             score_sums[mem_index+7]  = static_cast<unsigned short> (score_sums[mem_index+7]  + tmp_score_sums[mem_index+7]);
01071             score_sums[mem_index+8]  = static_cast<unsigned short> (score_sums[mem_index+8]  + tmp_score_sums[mem_index+8]);
01072             score_sums[mem_index+9]  = static_cast<unsigned short> (score_sums[mem_index+9]  + tmp_score_sums[mem_index+9]);
01073             score_sums[mem_index+10] = static_cast<unsigned short> (score_sums[mem_index+10] + tmp_score_sums[mem_index+10]);
01074             score_sums[mem_index+11] = static_cast<unsigned short> (score_sums[mem_index+11] + tmp_score_sums[mem_index+11]);
01075             score_sums[mem_index+12] = static_cast<unsigned short> (score_sums[mem_index+12] + tmp_score_sums[mem_index+12]);
01076             score_sums[mem_index+13] = static_cast<unsigned short> (score_sums[mem_index+13] + tmp_score_sums[mem_index+13]);
01077             score_sums[mem_index+14] = static_cast<unsigned short> (score_sums[mem_index+14] + tmp_score_sums[mem_index+14]);
01078             score_sums[mem_index+15] = static_cast<unsigned short> (score_sums[mem_index+15] + tmp_score_sums[mem_index+15]);
01079           }
01080           for (size_t mem_index = mem_size_mod_16_base; mem_index < mem_size; ++mem_index)
01081           {
01082             score_sums[mem_index] = static_cast<unsigned short> (score_sums[mem_index] + tmp_score_sums[mem_index]);
01083           }
01084 
01085           memset (tmp_score_sums, 0, mem_size*sizeof (tmp_score_sums[0]));
01086         }
01087       }
01088       {
01089         for (size_t mem_index = 0; mem_index < mem_size; ++mem_index)
01090         {
01091           score_sums[mem_index] = static_cast<unsigned short> (score_sums[mem_index] + tmp_score_sums[mem_index]);
01092         }
01093         
01094         memset (tmp_score_sums, 0, mem_size*sizeof (tmp_score_sums[0]));
01095       }
01096 #else
01097       unsigned short * score_sums = new unsigned short[mem_size];
01098       //unsigned char * score_sums = new unsigned char[mem_size];
01099       memset (score_sums, 0, mem_size*sizeof (score_sums[0]));
01100 
01101 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
01102       unsigned short * score_sums_1 = new unsigned short[mem_size];
01103       unsigned short * score_sums_2 = new unsigned short[mem_size];
01104       unsigned short * score_sums_3 = new unsigned short[mem_size];
01105       memset (score_sums_1, 0, mem_size*sizeof (score_sums_1[0]));
01106       memset (score_sums_2, 0, mem_size*sizeof (score_sums_2[0]));
01107       memset (score_sums_3, 0, mem_size*sizeof (score_sums_3[0]));
01108 #endif
01109 
01110       int max_score = 0;
01111       for (size_t feature_index = 0; feature_index < templates_[template_index].features.size (); ++feature_index)
01112       {
01113         const QuantizedMultiModFeature & feature = templates_[template_index].features[feature_index];
01114 
01115         //feature.modality_index;
01116         for (size_t bin_index = 0; bin_index < 8; ++bin_index)
01117         {
01118           if ((feature.quantized_value & (0x1<<bin_index)) != 0)
01119           {
01120 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
01121             ++max_score;
01122 
01123             unsigned char * data = modality_linearized_maps[feature.modality_index][bin_index].getOffsetMap (feature.x*scale, feature.y*scale);
01124             unsigned char * data_1 = modality_linearized_maps_1[feature.modality_index][bin_index].getOffsetMap (feature.x*scale, feature.y*scale);
01125             unsigned char * data_2 = modality_linearized_maps_2[feature.modality_index][bin_index].getOffsetMap (feature.x*scale, feature.y*scale);
01126             unsigned char * data_3 = modality_linearized_maps_3[feature.modality_index][bin_index].getOffsetMap (feature.x*scale, feature.y*scale);
01127             for (size_t mem_index = 0; mem_index < mem_size; ++mem_index)
01128             {
01129               score_sums[mem_index] += data[mem_index];
01130               score_sums_1[mem_index] += data_1[mem_index];
01131               score_sums_2[mem_index] += data_2[mem_index];
01132               score_sums_3[mem_index] += data_3[mem_index];
01133             }
01134 #else
01135             max_score += 4;
01136 
01137             unsigned char * data = modality_linearized_maps[feature.modality_index][bin_index].getOffsetMap (static_cast<size_t> (feature.x*scale), static_cast<size_t> (feature.y*scale));
01138             for (size_t mem_index = 0; mem_index < mem_size; ++mem_index)
01139             {
01140               score_sums[mem_index] += data[mem_index];
01141             }
01142 #endif
01143           }
01144         }
01145       }
01146 #endif
01147 
01148       const float inv_max_score = 1.0f / float (max_score);
01149 
01150       // we compute a new threshold based on the threshold supplied by the user;
01151       // this is due to the use of the cosine approx. in the response computation;
01152 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
01153       const float raw_threshold = (4.0f * float (max_score) / 2.0f + template_threshold_ * (4.0f * float (max_score) / 2.0f));
01154 #else
01155       const float raw_threshold = (float (max_score) / 2.0f + template_threshold_ * (float (max_score) / 2.0f));
01156 #endif
01157 
01158       //int max_value = 0;
01159       //size_t max_index = 0;
01160       for (size_t mem_index = 0; mem_index < mem_size; ++mem_index)
01161       {
01162         //const float score = score_sums[mem_index] * inv_max_score;
01163 
01164 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
01165         const float raw_score = score_sums[mem_index] 
01166           + score_sums_1[mem_index]
01167           + score_sums_2[mem_index]
01168           + score_sums_3[mem_index];
01169 
01170         const float score = 2.0f * static_cast<float> (raw_score) * 0.25f * inv_max_score - 1.0f;
01171 #else
01172         const float raw_score = score_sums[mem_index];
01173 
01174         const float score = 2.0f * static_cast<float> (raw_score) * inv_max_score - 1.0f;
01175 #endif
01176 
01177 
01178         //if (score > template_threshold_) 
01179         if (raw_score > raw_threshold) 
01180         {
01181           const size_t mem_col_index = (mem_index % mem_width);
01182           const size_t mem_row_index = (mem_index / mem_width);
01183 
01184           if (use_non_max_suppression_)
01185           {
01186             bool is_local_max = true;
01187             for (size_t sup_row_index = mem_row_index-1; sup_row_index <= mem_row_index+1 && is_local_max; ++sup_row_index)
01188             {
01189               if (sup_row_index >= mem_height)
01190                 continue;
01191 
01192               for (size_t sup_col_index = mem_col_index-1; sup_col_index <= mem_col_index+1; ++sup_col_index)
01193               {
01194                 if (sup_col_index >= mem_width)
01195                   continue;
01196 
01197                 if (score_sums[mem_index] < score_sums[sup_row_index*mem_width + sup_col_index])
01198                 {
01199                   is_local_max = false;
01200                   break;
01201                 }
01202               } 
01203             }
01204 
01205             if (!is_local_max)
01206               continue;
01207           }
01208 
01209           LINEMODDetection detection;
01210 
01211           if (average_detections_)
01212           {
01213             size_t average_col = 0;
01214             size_t average_row = 0;
01215             size_t sum = 0;
01216 
01217             for (size_t sup_row_index = mem_row_index-1; sup_row_index <= mem_row_index+1; ++sup_row_index)
01218             {
01219               if (sup_row_index >= mem_height)
01220                 continue;
01221 
01222               for (size_t sup_col_index = mem_col_index-1; sup_col_index <= mem_col_index+1; ++sup_col_index)
01223               {
01224                 if (sup_col_index >= mem_width)
01225                   continue;
01226 
01227                 const size_t weight = static_cast<size_t> (score_sums[sup_row_index*mem_width + sup_col_index]);
01228                 average_col += sup_col_index * weight;
01229                 average_row += sup_row_index * weight;
01230                 sum += weight;
01231               } 
01232             }
01233 
01234             average_col *= step_size;
01235             average_row *= step_size;
01236 
01237             average_col /= sum;
01238             average_row /= sum;
01239 
01240             //std::cerr << mem_col_index << ", " << mem_row_index << " - " << average_col << ", " << average_row << std::endl;
01241             std::cerr << mem_col_index*step_size << ", " << mem_row_index*step_size << " - " << average_col << ", " << average_row << std::endl;
01242 
01243             const size_t detection_col_index = average_col;// * step_size;
01244             const size_t detection_row_index = average_row;// * step_size;
01245 
01246             detection.x = static_cast<int> (detection_col_index);
01247             detection.y = static_cast<int> (detection_row_index);
01248           }
01249           else
01250           {
01251             const size_t detection_col_index = mem_col_index * step_size;
01252             const size_t detection_row_index = mem_row_index * step_size;
01253 
01254             detection.x = static_cast<int> (detection_col_index);
01255             detection.y = static_cast<int> (detection_row_index);
01256           }
01257 
01258           detection.template_id = static_cast<int> (template_index);
01259           detection.score = score;
01260           detection.scale = scale;
01261 
01262 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
01263           std::cerr << "score: " << static_cast<float> (raw_score) * inv_max_score * 0.25f << ", " << (2.0f * static_cast<float> (raw_score) * inv_max_score - 1.0f) << std::endl;
01264           std::cerr << "score0: " << static_cast<float> (score_sums[mem_index]) * inv_max_score << ", " << (2.0f * static_cast<float> (score_sums[mem_index]) * inv_max_score - 1.0f) << std::endl;
01265           std::cerr << "score1: " << static_cast<float> (score_sums_1[mem_index]) * inv_max_score << ", " << (2.0f * static_cast<float> (score_sums_1[mem_index]) * inv_max_score - 1.0f) << std::endl;
01266           std::cerr << "score2: " << static_cast<float> (score_sums_2[mem_index]) * inv_max_score << ", " << (2.0f * static_cast<float> (score_sums_2[mem_index]) * inv_max_score - 1.0f) << std::endl;
01267           std::cerr << "score3: " << static_cast<float> (score_sums_3[mem_index]) * inv_max_score << ", " << (2.0f * static_cast<float> (score_sums_3[mem_index]) * inv_max_score - 1.0f) << std::endl;
01268 #endif
01269 
01270 
01271           detections.push_back (detection);
01272         }
01273       }
01274 
01275       delete[] score_sums;
01276 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
01277       delete[] score_sums_1;
01278       delete[] score_sums_2;
01279       delete[] score_sums_3;
01280 #endif
01281     }
01282   }
01283 
01284   // release data
01285   for (size_t modality_index = 0; modality_index < modality_linearized_maps.size (); ++modality_index)
01286   {
01287     modality_energy_maps[modality_index].releaseAll ();
01288 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
01289     modality_energy_maps_1[modality_index].releaseAll ();
01290     modality_energy_maps_2[modality_index].releaseAll ();
01291     modality_energy_maps_3[modality_index].releaseAll ();
01292 #endif
01293     for (size_t bin_index = 0; bin_index < modality_linearized_maps[modality_index].size (); ++bin_index)
01294     {
01295       modality_linearized_maps[modality_index][bin_index].releaseAll ();
01296 #ifdef LINEMOD_USE_SEPARATE_ENERGY_MAPS
01297       modality_linearized_maps_1[modality_index][bin_index].releaseAll ();
01298       modality_linearized_maps_2[modality_index][bin_index].releaseAll ();
01299       modality_linearized_maps_3[modality_index][bin_index].releaseAll ();
01300 #endif
01301     }
01302   }
01303 }
01304 
01306 void
01307 pcl::LINEMOD::saveTemplates (const char * file_name) const
01308 {
01309   std::ofstream file_stream;
01310   file_stream.open (file_name, std::ofstream::out | std::ofstream::binary);
01311 
01312   serialize (file_stream);
01313 
01314   file_stream.close ();
01315 }
01316 
01318 void
01319 pcl::LINEMOD::loadTemplates (const char * file_name)
01320 {
01321   std::ifstream file_stream;
01322   file_stream.open (file_name, std::ofstream::in | std::ofstream::binary);
01323 
01324   deserialize (file_stream);
01325 
01326   file_stream.close ();
01327 }
01328 
01330 void
01331 pcl::LINEMOD::serialize (std::ostream & stream) const
01332 {
01333   const int nr_templates = static_cast<int> (templates_.size ());
01334   write (stream, nr_templates);
01335   for (int template_index = 0; template_index < nr_templates; ++template_index)
01336     templates_[template_index].serialize (stream);
01337 }
01338 
01340 void 
01341 pcl::LINEMOD::deserialize (std::istream & stream)
01342 {
01343   templates_.clear ();
01344 
01345   int nr_templates;
01346   read (stream, nr_templates);
01347   templates_.resize (nr_templates);
01348   for (int template_index = 0; template_index < nr_templates; ++template_index)
01349     templates_[template_index].deserialize (stream);
01350 }


pcl
Author(s): Open Perception
autogenerated on Wed Aug 26 2015 15:25:13