ProgramGLSL.cpp
Go to the documentation of this file.
00001 
00002 //      File:           ProgramGLSL.cpp
00003 //      Author:         Changchang Wu
00004 //      Description : GLSL related classes
00005 //              class ProgramGLSL               A simple wrapper of GLSL programs
00006 //              class ShaderBagGLSL             GLSL shaders for SIFT
00007 //              class FilterGLSL                GLSL gaussian filters for SIFT
00008 //
00009 //      Copyright (c) 2007 University of North Carolina at Chapel Hill
00010 //      All Rights Reserved
00011 //
00012 //      Permission to use, copy, modify and distribute this software and its
00013 //      documentation for educational, research and non-profit purposes, without
00014 //      fee, and without a written agreement is hereby granted, provided that the
00015 //      above copyright notice and the following paragraph appear in all copies.
00016 //      
00017 //      The University of North Carolina at Chapel Hill make no representations
00018 //      about the suitability of this software for any purpose. It is provided
00019 //      'as is' without express or implied warranty. 
00020 //
00021 //      Please send BUG REPORTS to ccwu@cs.unc.edu
00022 //
00024 
00025   
00026 #include "GL/glew.h"
00027 #include <string.h>
00028 #include <stdio.h>
00029 #include <iomanip>
00030 #include <iostream>
00031 #include <sstream>
00032 #include <vector>
00033 #include <algorithm>
00034 #include <math.h>
00035 using namespace std;
00036 
00037 #include "GlobalUtil.h"
00038 #include "ProgramGLSL.h"
00039 #include "GLTexImage.h"
00040 #include "ShaderMan.h"
00041 #include "SiftGPU.h"
00042 
00043 ProgramGLSL::ShaderObject::ShaderObject(int shadertype, const char * source, int filesource)
00044 {
00045 
00046 
00047         _type = shadertype; 
00048         _compiled = 0;
00049 
00050 
00051         _shaderID = glCreateShader(shadertype);
00052         if(_shaderID == 0) return;
00053         
00054         if(source)
00055         {
00056 
00057                 GLint                           code_length;
00058                 if(filesource ==0)
00059                 {
00060                         const char* code  = source;
00061                         code_length = (GLint) strlen(code);
00062                         glShaderSource(_shaderID, 1, (const char **) &code, &code_length);
00063                 }else
00064                 {
00065                         char * code = NULL;
00066                         if ( (code_length = ReadShaderFile(source, code)) == 0 ) 
00067                         {
00068                           if ( code != NULL )
00069                             delete[] code;
00070                           return;
00071                         }
00072                         glShaderSource(_shaderID, 1, (const char **) &code, &code_length);
00073                         delete[] code;
00074                 }
00075 
00076                 glCompileShader(_shaderID);
00077 
00078                 CheckCompileLog();
00079 
00080                 if(!_compiled)          std::cout << source;
00081         }
00082 
00083 
00084 
00085 
00086 }
00087 
00088 int ProgramGLSL::ShaderObject::ReadShaderFile(const char *sourcefile,  char*& code )
00089 {
00090         code = NULL;
00091         FILE * file;
00092         int    len=0;
00093 
00094         if(sourcefile == NULL) return 0;
00095 
00096         file = fopen(sourcefile,"rt");
00097         if(file == NULL) return 0;
00098 
00099         
00100         fseek(file, 0, SEEK_END);
00101         len = ftell(file);
00102         rewind(file);
00103         if(len >1)
00104         {
00105                 code = new  char[len+1];
00106                 fread(code, sizeof( char), len, file);
00107                 code[len] = 0;
00108         }else
00109         {
00110                 len = 0;
00111         }
00112 
00113         fclose(file);
00114 
00115         return len;
00116         
00117 }
00118 
00119 void ProgramGLSL::ShaderObject::CheckCompileLog()
00120 {
00121 
00122         GLint status;
00123         glGetShaderiv(_shaderID, GL_COMPILE_STATUS, &status);
00124         _compiled = (status ==GL_TRUE);
00125 
00126         if(_compiled == 0)      PrintCompileLog(std::cout);
00127 
00128 
00129 }
00130 
00131 ProgramGLSL::ShaderObject::~ShaderObject()
00132 {
00133         if(_shaderID)   glDeleteShader(_shaderID);
00134 
00135 }
00136 
00137 int ProgramGLSL::ShaderObject::IsValidFragmentShader()
00138 {
00139         return _type == GL_FRAGMENT_SHADER && _shaderID && _compiled;
00140 }
00141 
00142 int  ProgramGLSL::ShaderObject::IsValidVertexShader()
00143 {
00144         return _type == GL_VERTEX_SHADER && _shaderID && _compiled;
00145 }
00146 
00147 
00148 void ProgramGLSL::ShaderObject::PrintCompileLog(ostream&os)
00149 {
00150         GLint len = 0;  
00151 
00152         glGetShaderiv(_shaderID, GL_INFO_LOG_LENGTH , &len);
00153         if(len <=1) return;
00154         
00155         char * compileLog = new char[len+1];
00156         if(compileLog == NULL) return;
00157 
00158         glGetShaderInfoLog(_shaderID, len, &len, compileLog);
00159         
00160 
00161         os<<"Compile Log\n"<<compileLog<<"\n";
00162 
00163         delete[] compileLog;
00164 }
00165 
00166 
00167 ProgramGLSL::ProgramGLSL()
00168 {
00169         _linked = 0;
00170         _TextureParam0 = -1;
00171         _programID = glCreateProgram();
00172 }
00173 ProgramGLSL::~ProgramGLSL()
00174 {
00175         if(_programID)glDeleteProgram(_programID);
00176 }
00177 void ProgramGLSL::AttachShaderObject(ShaderObject &shader)
00178 {
00179         if(_programID  && shader.IsValidShaderObject()) 
00180                 glAttachShader(_programID, shader.GetShaderID());
00181 }
00182 void ProgramGLSL::DetachShaderObject(ShaderObject &shader)
00183 {
00184         if(_programID  && shader.IsValidShaderObject()) 
00185                 glDetachShader(_programID, shader.GetShaderID());
00186 }
00187 int ProgramGLSL::LinkProgram()
00188 {
00189         _linked = 0;
00190 
00191         if(_programID==0) return 0;
00192 
00193         glLinkProgram(_programID);
00194 
00195         CheckLinkLog();
00196 
00197 //      GlobalUtil::StartTimer("100 link test");
00198 //      for(int i = 0; i<100; i++) glLinkProgram(_programID);
00199 //      GlobalUtil::StopTimer();
00200 
00201         return _linked;
00202 }
00203 
00204 void ProgramGLSL::CheckLinkLog()
00205 {
00206         GLint status;
00207         glGetProgramiv(_programID, GL_LINK_STATUS, &status);
00208 
00209         _linked = (status == GL_TRUE);
00210 
00211 }
00212 
00213 
00214 int ProgramGLSL::ValidateProgram()
00215 {
00216         if(_programID && _linked)
00217         {
00219 //              glValidateProgram(_programID);
00220 //              glGetProgramiv(_programID, GL_VALIDATE_STATUS, &status);
00221 //              return status == GL_TRUE;
00222                 return 1;
00223         }
00224         else
00225                 return 0;
00226 }
00227 
00228 void ProgramGLSL::PrintLinkLog(std::ostream &os)
00229 {
00230         GLint len = 0;  
00231 
00232         glGetProgramiv(_programID, GL_INFO_LOG_LENGTH , &len);
00233         if(len <=1) return;
00234         
00235         char* linkLog = new char[len+1];
00236         if(linkLog == NULL) return;
00237 
00238         glGetProgramInfoLog(_programID, len, &len, linkLog);
00239         
00240         linkLog[len] = 0;
00241 
00242         if(strstr(linkLog, "failed"))
00243         {
00244                 os<<linkLog + (linkLog[0] == ' '? 1:0)<<"\n";
00245                 _linked = 0;
00246         }
00247 
00248         delete[] linkLog;
00249 }
00250 
00251 int ProgramGLSL::UseProgram()
00252 {
00253         if(ValidateProgram())
00254         {
00255                 glUseProgram(_programID);
00256                 if (_TextureParam0 >= 0) glUniform1i(_TextureParam0, 0);
00257                 return true;
00258         }
00259         else
00260         {
00261                 return false;
00262         }
00263 }
00264 
00265 
00266 ProgramGLSL::ProgramGLSL(const char *frag_source)
00267 {
00268         _linked = 0;
00269         _programID = glCreateProgram();
00270         _TextureParam0 = -1;
00271         ShaderObject shader(GL_FRAGMENT_SHADER, frag_source);
00272 
00273         if(shader.IsValidFragmentShader())
00274         {
00275                 AttachShaderObject(shader);
00276                 LinkProgram();
00277 
00278                 if(!_linked)
00279                 {
00280                         //shader.PrintCompileLog(std::cout);
00281                         PrintLinkLog(std::cout);
00282                 } else
00283                 {
00284                         _TextureParam0 = glGetUniformLocation(_programID, "tex");
00285                 }
00286         }else
00287         {
00288                 _linked = 0;
00289         }
00290         
00291 }
00292 
00293 /*
00294 ProgramGLSL::ProgramGLSL(char*frag_source, char * vert_source)
00295 {
00296         _used = 0;
00297         _linked = 0;
00298         _programID = glCreateProgram();
00299         ShaderObject shader(GL_FRAGMENT_SHADER, frag_source);
00300         ShaderObject vertex_shader(GL_VERTEX_SHADER, vert_source);
00301         AttachShaderObject(shader);
00302         AttachShaderObject(vertex_shader);
00303         LinkProgram();
00304         if(!_linked)
00305         {
00306                 shader.PrintCompileLog(std::cout);
00307                 vertex_shader.PrintCompileLog(std::cout);
00308                 PrintLinkLog(std::cout);
00309                 std::cout<<vert_source;
00310                 std::cout<<frag_source;
00311         }
00312 
00313 }
00314 */
00315 
00316 
00317 
00318 void ProgramGLSL::ReLink()
00319 {
00320         glLinkProgram(_programID);
00321 }
00322 
00323 int ProgramGLSL::IsNative()
00324 {
00325         return _linked;
00326 }
00327 
00328 FilterGLSL::FilterGLSL(float sigma) 
00329 {
00330         //pixel inside 3*sigma box
00331         int sz = int( ceil( GlobalUtil::_FilterWidthFactor * sigma -0.5) ) ;//
00332         int width = 2*sz + 1;
00333 
00334         //filter size truncation
00335         if(GlobalUtil::_MaxFilterWidth >0 && width > GlobalUtil::_MaxFilterWidth)
00336         {
00337                 std::cout<<"Filter size truncated from "<<width<<" to "<<GlobalUtil::_MaxFilterWidth<<endl;
00338                 sz = GlobalUtil::_MaxFilterWidth>>1;
00339                 width = 2 * sz + 1;
00340         }
00341 
00342         int i;
00343         float * kernel = new float[width];
00344         float   rv = 1.0f/(sigma*sigma);
00345         float   v, ksum =0; 
00346 
00347         // pre-compute filter
00348         for( i = -sz ; i <= sz ; ++i) 
00349         {
00350                 kernel[i+sz] =  v = exp(-0.5f * i * i *rv) ;
00351                 ksum += v;
00352         }
00353 
00354         //normalize the kernel
00355         rv = 1.0f / ksum;
00356         for(i = 0; i< width ;i++) kernel[i]*=rv;
00357         //
00358         
00359     MakeFilterProgram(kernel, width);
00360 
00361         _size = sz;
00362 
00363         delete[] kernel; 
00364     if(GlobalUtil::_verbose && GlobalUtil::_timingL) std::cout<<"Filter: sigma = "<<sigma<<", size = "<<width<<"x"<<width<<endl;
00365 }
00366 
00367 
00368 void FilterGLSL::MakeFilterProgram(float kernel[], int width)
00369 {
00370         if(GlobalUtil::_usePackedTex)
00371         {
00372                 s_shader_h = CreateFilterHPK(kernel, width);
00373                 s_shader_v = CreateFilterVPK(kernel, width);
00374         }else
00375         {
00376                 s_shader_h = CreateFilterH(kernel, width);
00377                 s_shader_v = CreateFilterV(kernel, width);
00378         }
00379 }
00380 
00381 ProgramGPU* FilterGLSL::CreateFilterH(float kernel[], int width)
00382 {
00383         ostringstream out;
00384         out<<setprecision(8);
00385 
00386         out<<  "uniform sampler2DRect tex;";
00387         out<< "\nvoid main(void){ float intensity = 0.0 ;  vec2 pos;\n";
00388 
00389     int half_width = width / 2; 
00390         for(int i = 0; i< width; i++)
00391         {
00392                 if(i == half_width)
00393                 {
00394 
00395                         out<<"float or = texture2DRect(tex, gl_TexCoord[0].st).r;\n";
00396                         out<<"intensity+= or * "<<kernel[i]<<";\n";
00397                 }else
00398                 {
00399                         out<<"pos = gl_TexCoord[0].st + vec2(float("<< (i - half_width) <<") , 0);\n";
00400                         out<<"intensity+= "<<kernel[i]<<"*texture2DRect(tex, pos).r;\n";
00401                 }
00402         }
00403 
00404         //copy original data to red channel
00405         out<<"gl_FragColor.r = or;\n"; 
00406         out<<"gl_FragColor.b  = intensity;}\n"<<'\0';
00407 
00408         return new ProgramGLSL(out.str().c_str());
00409 }
00410 
00411 
00412 ProgramGPU* FilterGLSL::CreateFilterV(float kernel[], int height)
00413 {
00414         ostringstream out;
00415         out<<setprecision(8);
00416 
00417         out<<  "uniform sampler2DRect tex;";
00418         out<< "\nvoid main(void){ float intensity = 0.0;vec2 pos; \n";
00419     int half_height = height / 2; 
00420         for(int i = 0; i< height; i++)
00421         {
00422 
00423                 if(i == half_height)
00424                 {
00425                         out<<"vec2 orb = texture2DRect(tex, gl_TexCoord[0].st).rb;\n";
00426                         out<<"intensity+= orb.y * "<<kernel[i]<<";\n";
00427 
00428                 }else
00429                 {
00430                         out<<"pos = gl_TexCoord[0].st + vec2(0, float("<<(i - half_height) <<") );\n";
00431                         out<<"intensity+= texture2DRect(tex, pos).b * "<<kernel[i]<<";\n";
00432                 }
00433                 
00434         }
00435 
00436         out<<"gl_FragColor.b = orb.y;\n";
00437         out<<"gl_FragColor.g = intensity - orb.x;\n"; // difference of gaussian..
00438         out<<"gl_FragColor.r = intensity;}\n"<<'\0';
00439         
00440 //      std::cout<<buffer<<endl;
00441         return new ProgramGLSL(out.str().c_str());
00442 }
00443 
00444 
00445 
00446 ProgramGPU* FilterGLSL::CreateFilterHPK(float kernel[], int width)
00447 {
00448         //both h and v are packed...
00449         int i, j , xw, xwn;
00450 
00451         int halfwidth  = width >>1;
00452         float * pf = kernel + halfwidth;
00453         int nhpixel = (halfwidth+1)>>1; //how many neighbour pixels need to be looked up
00454         int npixel  = (nhpixel<<1)+1;//
00455         float weight[3];
00456         ostringstream out;;
00457         out<<setprecision(8);
00458 
00459         out<<  "uniform sampler2DRect tex;";
00460         out<< "\nvoid main(void){ vec4 result = vec4(0, 0, 0, 0);\n";
00462         out<<"vec4 pc; vec2 coord; \n";
00463         for( i = 0 ; i < npixel ; i++)
00464         {
00465                 out<<"coord = gl_TexCoord[0].xy + vec2(float("<<i-nhpixel<<"),0);\n";
00466                 out<<"pc=texture2DRect(tex, coord);\n";
00467                 if(GlobalUtil::_PreciseBorder)          out<<"if(coord.x < 0.0) pc = pc.rrbb;\n";
00468                 //for each sub-pixel j  in center, the weight of sub-pixel k 
00469                 xw = (i - nhpixel)*2;
00470                 for( j = 0; j < 3; j++)
00471                 {
00472                         xwn = xw  + j  -1;
00473                         weight[j] = xwn < -halfwidth || xwn > halfwidth? 0 : pf[xwn];
00474                 }
00475                 if(weight[1] == 0.0)
00476                 {
00477                         out<<"result += vec4("<<weight[2]<<","<<weight[0]<<","<<weight[2]<<","<<weight[0]<<")*pc.grab;\n";
00478                 }
00479                 else
00480                 {
00481                         out<<"result += vec4("<<weight[1]<<", "<<weight[0]<<", "<<weight[1]<<", "<<weight[0]<<")*pc.rrbb;\n";
00482                         out<<"result += vec4("<<weight[2]<<", "<<weight[1]<<", "<<weight[2]<<", "<<weight[1]<<")*pc.ggaa;\n";
00483                 }       
00484         
00485         }
00486         out<<"gl_FragColor = result;}\n"<<'\0';
00487         
00488         return new ProgramGLSL(out.str().c_str());
00489 
00490 
00491 }
00492 
00493 
00494 ProgramGPU* FilterGLSL::CreateFilterVPK(float kernel[], int height)
00495 {
00496 
00497         //both h and v are packed...
00498         int i, j, yw, ywn;
00499 
00500         int halfh  = height >>1;
00501         float * pf = kernel + halfh;
00502         int nhpixel = (halfh+1)>>1;     //how many neighbour pixels need to be looked up
00503         int npixel  = (nhpixel<<1)+1;//
00504         float weight[3];
00505         ostringstream out;;
00506         out<<setprecision(8);
00507 
00508         out<<  "uniform sampler2DRect tex;";
00509         out<< "\nvoid main(void){ vec4 result = vec4(0, 0, 0, 0);\n";
00511         out<<"vec4 pc; vec2 coord;\n";
00512         for( i = 0 ; i < npixel ; i++)
00513         {
00514                 out<<"coord = gl_TexCoord[0].xy + vec2(0, float("<<i-nhpixel<<"));\n";
00515                 out<<"pc=texture2DRect(tex, coord);\n";
00516                 if(GlobalUtil::_PreciseBorder)  out<<"if(coord.y < 0.0) pc = pc.rgrg;\n";
00517 
00518                 //for each sub-pixel j  in center, the weight of sub-pixel k 
00519                 yw = (i - nhpixel)*2;
00520                 for( j = 0; j < 3; j++)
00521                 {
00522                         ywn = yw + j  -1;
00523                         weight[j] = ywn < -halfh || ywn > halfh? 0 : pf[ywn];
00524                 }
00525                 if(weight[1] == 0.0)
00526                 {
00527                         out<<"result += vec4("<<weight[2]<<","<<weight[2]<<","<<weight[0]<<","<<weight[0]<<")*pc.barg;\n";
00528                 }else
00529                 {
00530                         out<<"result += vec4("<<weight[1]<<","<<weight[1]<<","<<weight[0]<<","<<weight[0]<<")*pc.rgrg;\n";
00531                         out<<"result += vec4("<<weight[2]<<","<<weight[2]<<","<<weight[1]<<","<<weight[1]<<")*pc.baba;\n";
00532                 }
00533         }
00534         out<<"gl_FragColor = result;}\n"<<'\0';
00535 
00536         return new ProgramGLSL(out.str().c_str());
00537 }
00538 
00539 
00540 
00541 ShaderBag::ShaderBag()
00542 {
00543         s_debug = 0;
00544         s_orientation = 0;
00545         s_display_gaussian = 0;
00546         s_display_dog = 0;
00547         s_display_grad = 0;
00548         s_display_keys = 0;
00549         s_sampling = 0;
00550         s_grad_pass = 0;
00551         s_dog_pass = 0;
00552         s_keypoint = 0;
00553         s_genlist_init_tight = 0;
00554         s_genlist_init_ex = 0;
00555         s_genlist_histo = 0;
00556         s_genlist_start = 0;
00557         s_genlist_step = 0;
00558         s_genlist_end = 0;
00559         s_vertex_list = 0;
00560         s_descriptor_fp = 0;
00561         s_margin_copy = 0;
00563     f_gaussian_skip0 = NULL;
00564     f_gaussian_skip1 = NULL;
00565     f_gaussian_step = NULL;
00566     _gaussian_step_num = 0; 
00567 
00568 }
00569 
00570 ShaderBag::~ShaderBag()
00571 {
00572         if(s_debug)delete s_debug;
00573         if(s_orientation)delete s_orientation;
00574         if(s_display_gaussian)delete s_display_gaussian;
00575         if(s_display_dog)delete s_display_dog;
00576         if(s_display_grad)delete s_display_grad;
00577         if(s_display_keys)delete s_display_keys;
00578         if(s_sampling)delete s_sampling;
00579         if(s_grad_pass)delete s_grad_pass;
00580         if(s_dog_pass) delete s_dog_pass;
00581         if(s_keypoint)delete s_keypoint;
00582         if(s_genlist_init_tight)delete s_genlist_init_tight;
00583         if(s_genlist_init_ex)delete s_genlist_init_ex;
00584         if(s_genlist_histo)delete s_genlist_histo;
00585         if(s_genlist_start)delete s_genlist_start;
00586         if(s_genlist_step)delete s_genlist_step;
00587         if(s_genlist_end)delete s_genlist_end;
00588         if(s_vertex_list)delete s_vertex_list;
00589         if(s_descriptor_fp)delete s_descriptor_fp;
00590         if(s_margin_copy) delete s_margin_copy;
00591 
00593     if(f_gaussian_skip1) delete f_gaussian_skip1;
00594 
00595     for(unsigned int i = 0; i < f_gaussian_skip0_v.size(); i++)
00596     {
00597             if(f_gaussian_skip0_v[i]) delete f_gaussian_skip0_v[i];
00598     }
00599     if(f_gaussian_step && _gaussian_step_num > 0) 
00600     {
00601             for(int i = 0; i< _gaussian_step_num; i++)
00602             {
00603                     delete f_gaussian_step[i];
00604             }
00605             delete[] f_gaussian_step;
00606     }
00607 }
00608 
00609 
00610 void ShaderBag::SelectInitialSmoothingFilter(int octave_min, SiftParam&param)
00611 {
00612     float sigma = param.GetInitialSmoothSigma(octave_min);
00613     if(sigma == 0) 
00614     {
00615        f_gaussian_skip0 = NULL; 
00616     }else
00617     {
00618             for(unsigned int i = 0; i < f_gaussian_skip0_v.size(); i++)
00619             {
00620                     if(f_gaussian_skip0_v[i]->_id == octave_min)
00621                     {
00622                             f_gaussian_skip0 = f_gaussian_skip0_v[i];
00623                             return ;
00624                     }
00625             }
00626             FilterGLSL * filter = new FilterGLSL(sigma); 
00627             filter->_id = octave_min;
00628             f_gaussian_skip0_v.push_back(filter);
00629             f_gaussian_skip0 = filter; 
00630     }
00631 }
00632 
00633 void ShaderBag::CreateGaussianFilters(SiftParam&param)
00634 {
00635         if(param._sigma_skip0>0.0f) 
00636         {
00637         FilterGLSL * filter;
00638                 f_gaussian_skip0 = filter = new FilterGLSL(param._sigma_skip0);
00639                 filter->_id = GlobalUtil::_octave_min_default; 
00640                 f_gaussian_skip0_v.push_back(filter);
00641         }
00642         if(param._sigma_skip1>0.0f) 
00643         {
00644                 f_gaussian_skip1 = new FilterGLSL(param._sigma_skip1);
00645         }
00646 
00647         f_gaussian_step = new FilterProgram*[param._sigma_num];
00648         for(int i = 0; i< param._sigma_num; i++)
00649         {
00650                 f_gaussian_step[i] =  new FilterGLSL(param._sigma[i]);
00651         }
00652     _gaussian_step_num = param._sigma_num;
00653 }
00654 
00655 
00656 void ShaderBag::LoadDynamicShaders(SiftParam& param)
00657 {
00658     LoadKeypointShader(param._dog_threshold, param._edge_threshold);
00659     LoadGenListShader(param._dog_level_num, 0);
00660     CreateGaussianFilters(param);
00661 }
00662 
00663 
00664 void ShaderBagGLSL::LoadFixedShaders()
00665 {
00666 
00667 
00668         s_gray = new ProgramGLSL( 
00669                 "uniform sampler2DRect tex; void main(void){\n"
00670                 "float intensity = dot(vec3(0.299, 0.587, 0.114), texture2DRect(tex, gl_TexCoord[0].st ).rgb);\n"
00671                 "gl_FragColor = vec4(intensity, intensity, intensity, 1.0);}");
00672 
00673 
00674         s_debug = new ProgramGLSL( "void main(void){gl_FragColor.rg =  gl_TexCoord[0].st;}");
00675 
00676 
00677         s_sampling = new ProgramGLSL(
00678                 "uniform sampler2DRect tex; void main(void){gl_FragColor.rg= texture2DRect(tex, gl_TexCoord[0].st).rg;}");
00679 
00680         //
00681         s_grad_pass = new ProgramGLSL(
00682         "uniform sampler2DRect tex; void main ()\n"
00683         "{\n"
00684         "       vec4 v1, v2, gg;\n"
00685         "       vec4 cc  = texture2DRect(tex, gl_TexCoord[0].xy);\n"
00686         "       gg.x = texture2DRect(tex, gl_TexCoord[1].xy).r;\n"
00687         "       gg.y = texture2DRect(tex, gl_TexCoord[2].xy).r;\n"
00688         "       gg.z = texture2DRect(tex, gl_TexCoord[3].xy).r;\n"
00689         "       gg.w = texture2DRect(tex, gl_TexCoord[4].xy).r;\n"
00690         "       vec2 dxdy = (gg.yw - gg.xz); \n"
00691         "       float grad = 0.5*length(dxdy);\n"
00692         "       float theta = grad==0.0? 0.0: atan(dxdy.y, dxdy.x);\n"
00693         "       gl_FragData[0] = vec4(cc.rg, grad, theta);\n"
00694         "}\n\0");
00695 
00696         ProgramGLSL * program;
00697         s_margin_copy = program = new ProgramGLSL(
00698         "uniform sampler2DRect tex; uniform vec2 truncate;\n"
00699         "void main(){ gl_FragColor = texture2DRect(tex, min(gl_TexCoord[0].xy, truncate)); }");
00700 
00701         _param_margin_copy_truncate = glGetUniformLocation(*program, "truncate");
00702 
00703 
00704         GlobalUtil::_OrientationPack2 = 0;
00705         LoadOrientationShader();
00706 
00707         if(s_orientation == NULL)
00708         {
00709                 //Load a simplified version if the right version is not supported
00710                 s_orientation = program =  new ProgramGLSL(
00711                 "uniform sampler2DRect tex; uniform sampler2DRect oTex;\n"
00712         "       uniform float size; void main(){\n"
00713         "       vec4 cc = texture2DRect(tex, gl_TexCoord[0].st);\n"
00714         "       vec4 oo = texture2DRect(oTex, cc.rg);\n"
00715         "       gl_FragColor.rg = cc.rg;\n"
00716         "       gl_FragColor.b = oo.a;\n"
00717         "       gl_FragColor.a = size;}");  
00718 
00719                 _param_orientation_gtex = glGetUniformLocation(*program, "oTex");
00720                 _param_orientation_size = glGetUniformLocation(*program, "size");
00721                 GlobalUtil::_MaxOrientation = 0;
00722                 GlobalUtil::_FullSupported = 0;
00723                 std::cerr<<"Orientation simplified on this hardware"<<endl;
00724         }
00725 
00726         if(GlobalUtil::_DescriptorPPT) LoadDescriptorShader();
00727         if(s_descriptor_fp == NULL) 
00728         {
00729                 GlobalUtil::_DescriptorPPT = GlobalUtil::_FullSupported = 0; 
00730                 std::cerr<<"Descriptor ignored on this hardware"<<endl;
00731         }
00732 
00733         s_zero_pass = new ProgramGLSL("void main(){gl_FragColor = vec4(0.0);}");
00734 }
00735 
00736 
00737 void ShaderBagGLSL::LoadDisplayShaders()
00738 {
00739         s_copy_key = new ProgramGLSL(
00740                 "uniform sampler2DRect tex; void main(){\n"
00741         "gl_FragColor.rg= texture2DRect(tex, gl_TexCoord[0].st).rg; gl_FragColor.ba = vec2(0.0,1.0);    }");
00742         
00743 
00744         ProgramGLSL * program;
00745         s_vertex_list = program = new ProgramGLSL(
00746         "uniform vec4 sizes; uniform sampler2DRect tex;\n"
00747         "void main(void){\n"
00748         "float fwidth = sizes.y; float twidth = sizes.z; float rwidth = sizes.w; \n"
00749         "float index = 0.1*(fwidth*floor(gl_TexCoord[0].y) + gl_TexCoord[0].x);\n"
00750         "float px = mod(index, twidth);\n"
00751         "vec2 tpos= floor(vec2(px, index*rwidth))+0.5;\n"
00752         "vec4 cc = texture2DRect(tex, tpos );\n"
00753         "float size = 3.0 * cc.a; //sizes.x;// \n"
00754         "gl_FragColor.zw = vec2(0.0, 1.0);\n"
00755         "if(any(lessThan(cc.xy,vec2(0.0))))  {gl_FragColor.xy = cc.xy; }\n"
00756         "else {float type = fract(px);\n"
00757         "vec2 dxy = vec2(0); \n"
00758         "dxy.x = type < 0.1 ? 0.0 : (((type <0.5) || (type > 0.9))? size : -size);\n"
00759         "dxy.y = type < 0.2 ? 0.0 : (((type < 0.3) || (type > 0.7) )? -size :size); \n"
00760         "float s = sin(cc.b); float c = cos(cc.b); \n"
00761         "gl_FragColor.x = cc.x + c*dxy.x-s*dxy.y;\n"
00762         "gl_FragColor.y = cc.y + c*dxy.y+s*dxy.x;}\n}\n");
00763 
00764         _param_genvbo_size = glGetUniformLocation(*program, "sizes");
00765 
00766         s_display_gaussian =  new ProgramGLSL(
00767         "uniform sampler2DRect tex; void main(void){float r = texture2DRect(tex, gl_TexCoord[0].st).r;\n"
00768         "gl_FragColor = vec4(r, r, r, 1);}" );
00769 
00770         s_display_dog =  new ProgramGLSL(
00771         "uniform sampler2DRect tex; void main(void){float g = 0.5+(20.0*texture2DRect(tex, gl_TexCoord[0].st).g);\n"
00772         "gl_FragColor = vec4(g, g, g, 0.0);}" );
00773 
00774         s_display_grad = new ProgramGLSL(
00775                 "uniform sampler2DRect tex; void main(void){\n"
00776     "   vec4 cc = texture2DRect(tex, gl_TexCoord[0].st);gl_FragColor = vec4(5.0* cc.bbb, 1.0);}");
00777 
00778         s_display_keys= new ProgramGLSL(
00779                 "uniform sampler2DRect tex; void main(void){\n"
00780         "       vec4 cc = texture2DRect(tex, gl_TexCoord[0].st);\n"
00781         "       if(cc.r ==0.0) discard; gl_FragColor =  (cc.r==1.0? vec4(1.0, 0.0, 0,1.0):vec4(0.0,1.0,0.0,1.0));}");
00782 }
00783 
00784 void ShaderBagGLSL::LoadKeypointShader(float threshold, float edge_threshold)
00785 {
00786         float threshold0 = threshold* (GlobalUtil::_SubpixelLocalization?0.8f:1.0f);    
00787         float threshold1 = threshold;
00788         float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold;
00789         ostringstream out;;
00790         streampos pos;
00791 
00792         //tex(X)(Y)
00793         //X: (CLR) (CENTER 0, LEFT -1, RIGHT +1)  
00794         //Y: (CDU) (CENTER 0, DOWN -1, UP    +1) 
00795         if(GlobalUtil::_DarknessAdaption)
00796         {
00797                 out <<  "#define THRESHOLD0 (" << threshold0 << " * min(2.0 * cc.r + 0.1, 1.0))\n"
00798                                 "#define THRESHOLD1 (" << threshold1 << " * min(2.0 * cc.r + 0.1, 1.0))\n"
00799                                 "#define THRESHOLD2 " << threshold2 << "\n";
00800         }else
00801         {
00802                 out <<  "#define THRESHOLD0 " << threshold0 << "\n"
00803                                 "#define THRESHOLD1 " << threshold1 << "\n"
00804                                 "#define THRESHOLD2 " << threshold2 << "\n";
00805         }
00806 
00807         out<<
00808         "uniform sampler2DRect tex, texU, texD; void main ()\n"
00809         "{\n"
00810         "       vec4 v1, v2, gg, temp;\n"
00811         "       vec2 TexRU = vec2(gl_TexCoord[2].x, gl_TexCoord[4].y); \n"
00812         "       vec4 cc  = texture2DRect(tex, gl_TexCoord[0].xy);\n"
00813         "       temp =  texture2DRect(tex, gl_TexCoord[1].xy);\n"
00814         "       v1.x =  temp.g;                 gg.x = temp.r;\n"
00815         "       temp = texture2DRect(tex, gl_TexCoord[2].xy) ;\n"
00816         "       v1.y = temp.g;                  gg.y = temp.r;\n"
00817         "       temp = texture2DRect(tex, gl_TexCoord[3].xy) ;\n"
00818         "       v1.z = temp.g;                  gg.z = temp.r;\n"
00819         "       temp = texture2DRect(tex, gl_TexCoord[4].xy) ;\n"
00820         "       v1.w = temp.g;                  gg.w = temp.r;\n"
00821         "       v2.x = texture2DRect(tex, gl_TexCoord[5].xy).g;\n"
00822         "       v2.y = texture2DRect(tex, gl_TexCoord[6].xy).g;\n"
00823         "       v2.z = texture2DRect(tex, gl_TexCoord[7].xy).g;\n"
00824         "       v2.w = texture2DRect(tex, TexRU.xy).g;\n"
00825         "       vec2 dxdy = (gg.yw - gg.xz); \n"
00826         "       float grad = 0.5*length(dxdy);\n"
00827         "       float theta = grad==0.0? 0.0: atan(dxdy.y, dxdy.x);\n"
00828         "       gl_FragData[0] = vec4(cc.rg, grad, theta);\n"
00829 
00830         //test against 8 neighbours
00831         //use variable to identify type of extremum
00832         //1.0 for local maximum and 0.5 for minimum
00833         <<
00834         "       float dog = 0.0; \n"
00835         "       gl_FragData[1] = vec4(0, 0, 0, 0); \n"
00836         "       dog = cc.g > float(THRESHOLD0) && all(greaterThan(cc.gggg, max(v1, v2)))?1.0: 0.0;\n"
00837         "       dog = cc.g < float(-THRESHOLD0) && all(lessThan(cc.gggg, min(v1, v2)))?0.5: dog;\n"
00838         "       if(dog == 0.0) return;\n";
00839 
00840         pos = out.tellp();
00841         //do edge supression first.. 
00842         //vector v1 is < (-1, 0), (1, 0), (0,-1), (0, 1)>
00843         //vector v2 is < (-1,-1), (-1,1), (1,-1), (1, 1)>
00844 
00845         out<<
00846         "       float fxx, fyy, fxy; \n"
00847         "       vec4 D2 = v1.xyzw - cc.gggg;\n"
00848         "       vec2 D4 = v2.xw - v2.yz;\n"
00849         "       fxx = D2.x + D2.y;\n"
00850         "       fyy = D2.z + D2.w;\n"
00851         "       fxy = 0.25*(D4.x + D4.y);\n"
00852         "       float fxx_plus_fyy = fxx + fyy;\n"
00853         "       float score_up = fxx_plus_fyy*fxx_plus_fyy; \n"
00854         "       float score_down = (fxx*fyy - fxy*fxy);\n"
00855         "       if( score_down <= 0.0 || score_up > THRESHOLD2 * score_down)return;\n";
00856 
00857         //...
00858         out<<" \n"
00859         "       vec2 D5 = 0.5*(v1.yw-v1.xz); \n"
00860         "       float fx = D5.x, fy = D5.y ; \n"
00861         "       float fs, fss , fxs, fys ; \n"
00862         "       vec2 v3; vec4 v4, v5, v6;\n"
00863         //read 9 pixels of upper level
00864         <<
00865         "       v3.x = texture2DRect(texU, gl_TexCoord[0].xy).g;\n"
00866         "       v4.x = texture2DRect(texU, gl_TexCoord[1].xy).g;\n"
00867         "       v4.y = texture2DRect(texU, gl_TexCoord[2].xy).g;\n"
00868         "       v4.z = texture2DRect(texU, gl_TexCoord[3].xy).g;\n"
00869         "       v4.w = texture2DRect(texU, gl_TexCoord[4].xy).g;\n"
00870         "       v6.x = texture2DRect(texU, gl_TexCoord[5].xy).g;\n"
00871         "       v6.y = texture2DRect(texU, gl_TexCoord[6].xy).g;\n"
00872         "       v6.z = texture2DRect(texU, gl_TexCoord[7].xy).g;\n"
00873         "       v6.w = texture2DRect(texU, TexRU.xy).g;\n"
00874         //compare with 9 pixels of upper level
00875         //read and compare with 9 pixels of lower level
00876         //the maximum case
00877         <<
00878         "       if(dog == 1.0)\n"
00879         "       {\n"
00880         "               if(cc.g < v3.x || any(lessThan(cc.gggg, v4)) ||any(lessThan(cc.gggg, v6)))return; \n"
00881         "               v3.y = texture2DRect(texD, gl_TexCoord[0].xy).g;\n"
00882         "               v5.x = texture2DRect(texD, gl_TexCoord[1].xy).g;\n"
00883         "               v5.y = texture2DRect(texD, gl_TexCoord[2].xy).g;\n"
00884         "               v5.z = texture2DRect(texD, gl_TexCoord[3].xy).g;\n"
00885         "               v5.w = texture2DRect(texD, gl_TexCoord[4].xy).g;\n"
00886         "               v6.x = texture2DRect(texD, gl_TexCoord[5].xy).g;\n"
00887         "               v6.y = texture2DRect(texD, gl_TexCoord[6].xy).g;\n"
00888         "               v6.z = texture2DRect(texD, gl_TexCoord[7].xy).g;\n"
00889         "               v6.w = texture2DRect(texD, TexRU.xy).g;\n"
00890         "               if(cc.g < v3.y || any(lessThan(cc.gggg, v5)) ||any(lessThan(cc.gggg, v6)))return; \n"
00891         "       }\n"
00892         //the minimum case
00893         <<
00894         "       else{\n"
00895         "       if(cc.g > v3.x || any(greaterThan(cc.gggg, v4)) ||any(greaterThan(cc.gggg, v6)))return; \n"
00896         "               v3.y = texture2DRect(texD, gl_TexCoord[0].xy).g;\n"
00897         "               v5.x = texture2DRect(texD, gl_TexCoord[1].xy).g;\n"
00898         "               v5.y = texture2DRect(texD, gl_TexCoord[2].xy).g;\n"
00899         "               v5.z = texture2DRect(texD, gl_TexCoord[3].xy).g;\n"
00900         "               v5.w = texture2DRect(texD, gl_TexCoord[4].xy).g;\n"
00901         "               v6.x = texture2DRect(texD, gl_TexCoord[5].xy).g;\n"
00902         "               v6.y = texture2DRect(texD, gl_TexCoord[6].xy).g;\n"
00903         "               v6.z = texture2DRect(texD, gl_TexCoord[7].xy).g;\n"
00904         "               v6.w = texture2DRect(texD, TexRU.xy).g;\n"
00905         "               if(cc.g > v3.y || any(greaterThan(cc.gggg, v5)) ||any(greaterThan(cc.gggg, v6)))return; \n"
00906         "       }\n";
00907 
00908         if(GlobalUtil::_SubpixelLocalization)
00909 
00910         // sub-pixel localization FragData1 = vec4(dog, 0, 0, 0); return;
00911         out <<
00912         "       fs = 0.5*( v3.x - v3.y );  \n"
00913         "       fss = v3.x + v3.y - cc.g - cc.g;\n"
00914         "       fxs = 0.25 * ( v4.y + v5.x - v4.x - v5.y);\n"
00915         "       fys = 0.25 * ( v4.w + v5.z - v4.z - v5.w);\n"
00916         
00917         // 
00918         // let dog difference be quatratic function  of dx, dy, ds; 
00919         // df(dx, dy, ds) = fx * dx + fy*dy + fs * ds + 
00920         //                                + 0.5 * ( fxx * dx * dx + fyy * dy * dy + fss * ds * ds)
00921         //                                + (fxy * dx * dy + fxs * dx * ds + fys * dy * ds)
00922         // (fx, fy, fs, fxx, fyy, fss, fxy, fxs, fys are the derivatives)
00923         
00924         //the local extremum satisfies
00925         // df/dx = 0, df/dy = 0, df/dz = 0
00926         
00927         //that is 
00928         // |-fx|     | fxx fxy fxs |   |dx|
00929         // |-fy|  =  | fxy fyy fys | * |dy|
00930         // |-fs|     | fxs fys fss |   |ds|
00931         // need to solve dx, dy, ds
00932 
00933         // Use Gauss elimination to solve the linear system
00934     <<
00935         "       vec3 dxys = vec3(0.0);          \n"
00936         "       vec4 A0, A1, A2 ;                       \n"
00937         "       A0 = vec4(fxx, fxy, fxs, -fx);  \n"
00938         "       A1 = vec4(fxy, fyy, fys, -fy);  \n"
00939         "       A2 = vec4(fxs, fys, fss, -fs);  \n"
00940         "       vec3 x3 = abs(vec3(fxx, fxy, fxs));             \n"
00941         "       float maxa = max(max(x3.x, x3.y), x3.z);        \n"
00942         "       if(maxa >= 1e-10 ) {                                            \n"
00943         "               if(x3.y ==maxa )                                                        \n"
00944         "               {                                                                                       \n"
00945         "                       vec4 TEMP = A1; A1 = A0; A0 = TEMP;     \n"
00946         "               }else if( x3.z == maxa )                                        \n"
00947         "               {                                                                                       \n"
00948         "                       vec4 TEMP = A2; A2 = A0; A0 = TEMP;     \n"
00949         "               }                                                                                       \n"
00950         "               A0 /= A0.x;                                                                     \n"
00951         "               A1 -= A1.x * A0;                                                        \n"
00952         "               A2 -= A2.x * A0;                                                        \n"
00953         "               vec2 x2 = abs(vec2(A1.y, A2.y));                \n"
00954         "               if( x2.y > x2.x )                                                       \n"
00955         "               {                                                                                       \n"
00956         "                       vec3 TEMP = A2.yzw;                                     \n"
00957         "                       A2.yzw = A1.yzw;                                                \n"
00958         "                       A1.yzw = TEMP;                                                  \n"
00959         "                       x2.x = x2.y;                                                    \n"
00960         "               }                                                                                       \n"
00961         "               if(x2.x >= 1e-10) {                                             \n"
00962         "                       A1.yzw /= A1.y;                                                         \n"
00963         "                       A2.yzw -= A2.y * A1.yzw;                                        \n"
00964         "                       if(abs(A2.z) >= 1e-10) {                \n"
00965         // compute dx, dy, ds: 
00966         <<
00967         "                               \n"
00968         "                               dxys.z = A2.w /A2.z;                                \n"
00969         "                               dxys.y = A1.w - dxys.z*A1.z;                        \n"
00970         "                               dxys.x = A0.w - dxys.z*A0.z - dxys.y*A0.y;      \n"
00971 
00972         //one more threshold which I forgot in versions prior to 286
00973         <<
00974         "                               bool dog_test = (abs(cc.g + 0.5*dot(vec3(fx, fy, fs), dxys ))<= float(THRESHOLD1)) ;\n"
00975         "                               if(dog_test || any(greaterThan(abs(dxys), vec3(1.0)))) dog = 0.0;\n"
00976         "                       }\n"
00977         "               }\n"
00978         "       }\n"
00979     //keep the point when the offset is less than 1
00980         <<
00981         "       gl_FragData[1] = vec4( dog, dxys); \n";
00982         else
00983 
00984         out<<
00985         "       gl_FragData[1] =  vec4( dog, 0.0, 0.0, 0.0) ;   \n";
00986 
00987         out<<
00988         "}\n" <<'\0';
00989 
00990 
00991 
00992         ProgramGLSL * program = new ProgramGLSL(out.str().c_str()); 
00993         if(program->IsNative())
00994         {
00995                 s_keypoint = program ;
00996                 //parameter
00997         }else
00998         {
00999                 delete program;
01000                 out.seekp(pos);
01001                 out << 
01002         "       gl_FragData[1] =  vec4(dog, 0.0, 0.0, 0.0) ;    \n"
01003         "}\n" <<'\0';
01004                 s_keypoint = program = new ProgramGLSL(out.str().c_str());
01005                 GlobalUtil::_SubpixelLocalization = 0;
01006                 std::cerr<<"Detection simplified on this hardware"<<endl;
01007         }
01008 
01009         _param_dog_texu = glGetUniformLocation(*program, "texU");
01010         _param_dog_texd = glGetUniformLocation(*program, "texD");
01011 }
01012 
01013 
01014 void ShaderBagGLSL::SetDogTexParam(int texU, int texD)
01015 {
01016         glUniform1i(_param_dog_texu, 1);
01017         glUniform1i(_param_dog_texd, 2);
01018 }
01019 
01020 void ShaderBagGLSL::SetGenListStepParam(int tex, int tex0)
01021 {
01022         glUniform1i(_param_genlist_step_tex0, 1);       
01023 }
01024 void ShaderBagGLSL::SetGenVBOParam( float width, float fwidth,  float size)
01025 {
01026         float sizes[4] = {size*3.0f, fwidth, width, 1.0f/width};
01027         glUniform4fv(_param_genvbo_size, 1, sizes);
01028 
01029 }
01030 
01031 
01032 
01033 void ShaderBagGLSL::UnloadProgram()
01034 {
01035         glUseProgram(0);
01036 } 
01037 
01038 
01039 
01040 void ShaderBagGLSL::LoadGenListShader(int ndoglev, int nlev)
01041 {
01042         ProgramGLSL * program;
01043 
01044         s_genlist_init_tight = new ProgramGLSL(
01045         "uniform sampler2DRect tex; void main (void){\n"
01046         "vec4 helper = vec4( texture2DRect(tex, gl_TexCoord[0].xy).r,  texture2DRect(tex, gl_TexCoord[1].xy).r,\n"
01047         "texture2DRect(tex, gl_TexCoord[2].xy).r, texture2DRect(tex, gl_TexCoord[3].xy).r);\n"
01048         "gl_FragColor = vec4(greaterThan(helper, vec4(0.0,0.0,0.0,0.0)));\n"
01049         "}");
01050 
01051         
01052         s_genlist_init_ex = program = new ProgramGLSL(
01053         "uniform sampler2DRect tex;uniform vec2 bbox;\n"
01054         "void main (void ){\n"
01055         "vec4 helper = vec4( texture2DRect(tex, gl_TexCoord[0].xy).r,  texture2DRect(tex, gl_TexCoord[1].xy).r,\n"
01056         "texture2DRect(tex, gl_TexCoord[2].xy).r, texture2DRect(tex, gl_TexCoord[3].xy).r);\n"
01057         "bvec4 helper2 = bvec4( \n"
01058         "all(lessThan(gl_TexCoord[0].xy , bbox)) && helper.x >0.0,\n"
01059         "all(lessThan(gl_TexCoord[1].xy , bbox)) && helper.y >0.0,\n"
01060         "all(lessThan(gl_TexCoord[2].xy , bbox)) && helper.z >0.0,\n"
01061         "all(lessThan(gl_TexCoord[3].xy , bbox)) && helper.w >0.0);\n"
01062         "gl_FragColor = vec4(helper2);\n"
01063         "}");
01064         _param_genlist_init_bbox = glGetUniformLocation( *program, "bbox");
01065 
01066 
01067         //reduction ...
01068         s_genlist_histo = new ProgramGLSL(
01069         "uniform sampler2DRect tex; void main (void){\n"
01070         "vec4 helper; vec4 helper2; \n"
01071         "helper = texture2DRect(tex, gl_TexCoord[0].xy); helper2.xy = helper.xy + helper.zw; \n"
01072         "helper = texture2DRect(tex, gl_TexCoord[1].xy); helper2.zw = helper.xy + helper.zw; \n"
01073         "gl_FragColor.rg = helper2.xz + helper2.yw;\n"
01074         "helper = texture2DRect(tex, gl_TexCoord[2].xy); helper2.xy = helper.xy + helper.zw; \n"
01075         "helper = texture2DRect(tex, gl_TexCoord[3].xy); helper2.zw = helper.xy + helper.zw; \n"
01076         "gl_FragColor.ba= helper2.xz+helper2.yw;\n"
01077         "}");
01078 
01079 
01080         //read of the first part, which generates tex coordinates 
01081         s_genlist_start= program =  LoadGenListStepShader(1, 1);
01082         _param_ftex_width= glGetUniformLocation(*program, "width");
01083         _param_genlist_start_tex0 = glGetUniformLocation(*program, "tex0");
01084         //stepping
01085         s_genlist_step = program = LoadGenListStepShader(0, 1);
01086         _param_genlist_step_tex0= glGetUniformLocation(*program, "tex0");
01087 
01088 }
01089 
01090 void ShaderBagGLSL::SetMarginCopyParam(int xmax, int ymax)
01091 {
01092         float truncate[2] = {xmax - 0.5f , ymax - 0.5f};
01093         glUniform2fv(_param_margin_copy_truncate, 1, truncate);
01094 }
01095 
01096 void ShaderBagGLSL::SetGenListInitParam(int w, int h)
01097 {
01098         float bbox[2] = {w - 1.0f, h - 1.0f};
01099         glUniform2fv(_param_genlist_init_bbox, 1, bbox);
01100 }
01101 void ShaderBagGLSL::SetGenListStartParam(float width, int tex0)
01102 {
01103         glUniform1f(_param_ftex_width, width);
01104         glUniform1i(_param_genlist_start_tex0, 0);
01105 }
01106 
01107 
01108 ProgramGLSL* ShaderBagGLSL::LoadGenListStepShader(int start, int step)
01109 {
01110         int i;
01111         // char chanels[5] = "rgba";
01112         ostringstream out;
01113 
01114         for(i = 0; i < step; i++) out<<"uniform sampler2DRect tex"<<i<<";\n";
01115         if(start)
01116         {
01117                 out<<"uniform float width;\n";
01118                 out<<"void main(void){\n";
01119                 out<<"float  index = floor(gl_TexCoord[0].y) * width + floor(gl_TexCoord[0].x);\n";
01120                 out<<"vec2 pos = vec2(0.5, 0.5);\n";
01121         }else
01122         {
01123                 out<<"uniform sampler2DRect tex;\n";
01124                 out<<"void main(void){\n";
01125                 out<<"vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);\n";
01126                 out<<"vec2 pos = tc.rg; float index = tc.b;\n";
01127         }
01128         out<<"vec2 sum;         vec4 cc;\n";
01129 
01130 
01131         if(step>0)
01132         {
01133                 out<<"vec2 cpos = vec2(-0.5, 0.5);\t vec2 opos;\n";
01134                 for(i = 0; i < step; i++)
01135                 {
01136 
01137                         out<<"cc = texture2DRect(tex"<<i<<", pos);\n";
01138                         out<<"sum.x = cc.r + cc.g; sum.y = sum.x + cc.b;  \n";
01139                         out<<"if (index <cc.r){ opos = cpos.xx;}\n";
01140                         out<<"else if(index < sum.x ) {opos = cpos.yx; index -= cc.r;}\n";
01141                         out<<"else if(index < sum.y ) {opos = cpos.xy; index -= sum.x;}\n";
01142                         out<<"else {opos = cpos.yy; index -= sum.y;}\n";
01143                         out<<"pos = (pos + pos + opos);\n";
01144                 }
01145         }
01146         out<<"gl_FragColor = vec4(pos, index, 1.0);\n";
01147         out<<"}\n"<<'\0';
01148         return new ProgramGLSL(out.str().c_str());
01149 }
01150 
01151 
01152 void ShaderBagGLSL::LoadOrientationShader()
01153 {
01154         ostringstream out;
01155 
01156         if(GlobalUtil::_IsNvidia)
01157         {
01158         out <<  "#pragma optionNV(ifcvt none)\n"
01159                         "#pragma optionNV(unroll all)\n";
01160         }
01161 
01162         out<<"\n"
01163         "#define GAUSSIAN_WF float("<<GlobalUtil::_OrientationGaussianFactor<<") \n"
01164         "#define SAMPLE_WF float("<<GlobalUtil::_OrientationWindowFactor<< " )\n"
01165         "#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n"
01166         "uniform sampler2DRect tex;                                     \n"
01167         "uniform sampler2DRect gradTex;                         \n"
01168         "uniform vec4 size;                                             \n"
01169         << ((GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)? "     uniform sampler2DRect texS;     \n" : " ")      <<
01170         "void main()            \n"
01171         "{                                                                                                      \n"
01172         "       vec4 bins[10];                                                          \n"
01173         "       bins[0] = vec4(0.0);bins[1] = vec4(0.0);bins[2] = vec4(0.0);    \n"
01174         "       bins[3] = vec4(0.0);bins[4] = vec4(0.0);bins[5] = vec4(0.0);    \n"
01175         "       bins[6] = vec4(0.0);bins[7] = vec4(0.0);bins[8] = vec4(0.0);    \n"
01176         "       vec4 loc = texture2DRect(tex, gl_TexCoord[0].xy);       \n"
01177         "       vec2 pos = loc.xy;              \n"
01178         "       bool orientation_mode = (size.z != 0.0);                        \n"
01179         "       float sigma = orientation_mode? abs(size.z) : loc.w; \n";
01180         if(GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)
01181         {
01182                 out<<
01183         "       if(orientation_mode){\n"
01184         "               vec4 offset = texture2DRect(texS, pos);\n"
01185         "               pos.xy = pos.xy + offset.yz; \n"
01186         "               sigma = sigma * pow(size.w, offset.w);\n"
01187         "               #if "<< GlobalUtil::_KeepExtremumSign << "\n"
01188         "                       if(offset.x < 0.6) sigma = -sigma; \n"
01189         "               #endif\n"
01190         "       }\n";
01191         }
01192         out<<
01193         "       //bool fixed_orientation = (size.z < 0.0);              \n"
01194         "       if(size.z < 0.0) {gl_FragData[0] = vec4(pos, 0.0, sigma); return;}"
01195         "       float gsigma = sigma * GAUSSIAN_WF;                             \n"
01196         "       vec2 win = abs(vec2(sigma * (SAMPLE_WF * GAUSSIAN_WF))) ;       \n"
01197         "       vec2 dim = size.xy;                                                     \n"
01198         "       float dist_threshold = win.x*win.x+0.5;                 \n"
01199         "       float factor = -0.5/(gsigma*gsigma);                    \n"
01200         "       vec4 sz;        vec2 spos;                                              \n"
01201         "       //if(any(pos.xy <= 1)) discard;                                 \n"
01202         "       sz.xy = max( pos - win, vec2(1,1));                     \n"
01203         "       sz.zw = min( pos + win, dim-vec2(2, 2));                                \n"
01204         "       sz = floor(sz)+0.5;";
01205         //loop to get the histogram
01206 
01207         out<<"\n"
01208         "       for(spos.y = sz.y; spos.y <= sz.w;      spos.y+=1.0)                            \n"
01209         "       {                                                                                                                               \n"
01210         "               for(spos.x = sz.x; spos.x <= sz.z;      spos.x+=1.0)                    \n"
01211         "               {                                                                                                                       \n"
01212         "                       vec2 offset = spos - pos;                                                               \n"
01213         "                       float sq_dist = dot(offset,offset);                                             \n"
01214         "                       if( sq_dist < dist_threshold){                                                  \n"
01215         "                               vec4 cc = texture2DRect(gradTex, spos);                         \n"
01216         "                               float grad = cc.b;      float theta = cc.a;                             \n"
01217         "                               float idx = floor(degrees(theta)*0.1);                          \n"
01218         "                               if(idx < 0.0 ) idx += 36.0;                                                                     \n"
01219         "                               float weight = grad*exp(sq_dist * factor);                              \n"
01220         "                               float vidx = fract(idx * 0.25) * 4.0;//mod(idx, 4.0) ;                                                  \n"
01221         "                               vec4 inc = weight*vec4(equal(vec4(vidx), vec4(0.0,1.0,2.0,3.0)));";
01222 
01223         if(GlobalUtil::_UseDynamicIndexing)
01224         {
01225                 //dynamic indexing may not be faster
01226                 out<<"\n"
01227         "                               int iidx = int((idx*0.25));     \n"
01228         "                               bins[iidx]+=inc;                                        \n"
01229         "                       }                                                                               \n"
01230         "               }                                                                                       \n"
01231         "       }";
01232 
01233         }else
01234         {
01235                 //nvfp40 still does not support dynamic array indexing
01236                 //unrolled binary search...
01237                 out<<"\n"
01238         "                               if(idx < 16.0)                                                  \n"
01239         "                               {                                                                               \n"
01240         "                                       if(idx < 8.0)                                                   \n"
01241         "                                       {                                                                       \n"
01242         "                                               if(idx < 4.0)   {       bins[0]+=inc;}  \n"
01243         "                                               else            {       bins[1]+=inc;}  \n"
01244         "                                       }else                                                           \n"
01245         "                                       {                                                                       \n"
01246         "                                               if(idx < 12.0){ bins[2]+=inc;}  \n"
01247         "                                               else            {       bins[3]+=inc;}  \n"
01248         "                                       }                                                                       \n"
01249         "                               }else if(idx < 32.0)                                            \n"
01250         "                               {                                                                               \n"
01251         "                                       if(idx < 24.0)                                          \n"
01252         "                                       {                                                                       \n"
01253         "                                               if(idx <20.0)   {       bins[4]+=inc;}  \n"
01254         "                                               else            {       bins[5]+=inc;}  \n"
01255         "                                       }else                                                           \n"
01256         "                                       {                                                                       \n"
01257         "                                               if(idx < 28.0){ bins[6]+=inc;}  \n"
01258         "                                               else            {       bins[7]+=inc;}  \n"
01259         "                                       }                                                                       \n"
01260         "                               }else                                           \n"
01261         "                               {                                                                               \n"
01262         "                                       bins[8]+=inc;                                           \n"
01263         "                               }                                                                               \n"
01264         "                       }                                                                               \n"
01265         "               }                                                                                       \n"
01266         "       }";
01267 
01268         }
01269 
01270         WriteOrientationCodeToStream(out);
01271 
01272         ProgramGLSL * program = new ProgramGLSL(out.str().c_str());
01273         if(program->IsNative())
01274         {
01275                 s_orientation = program ;
01276                 _param_orientation_gtex = glGetUniformLocation(*program, "gradTex");
01277                 _param_orientation_size = glGetUniformLocation(*program, "size");
01278                 _param_orientation_stex = glGetUniformLocation(*program, "texS");
01279         }else
01280         {
01281                 delete program;
01282         }
01283 }
01284 
01285 
01286 void ShaderBagGLSL::WriteOrientationCodeToStream(std::ostream& out)
01287 {
01288         //smooth histogram and find the largest
01289 /*
01290         smoothing kernel:        (1 3 6 7 6 3 1 )/27
01291         the same as 3 pass of (1 1 1)/3 averaging
01292         maybe better to use 4 pass on the vectors...
01293 */
01294 
01295 
01296         //the inner loop on different array numbers is always unrolled in fp40
01297 
01298         //bug fixed here:)
01299         out<<"\n"
01300         "       //mat3 m1 = mat3(1, 0, 0, 3, 1, 0, 6, 3, 1)/27.0;  \n"
01301         "       mat3 m1 = mat3(1, 3, 6, 0, 1, 3,0, 0, 1)/27.0;  \n"
01302         "       mat4 m2 = mat4(7, 6, 3, 1, 6, 7, 6, 3, 3, 6, 7, 6, 1, 3, 6, 7)/27.0;\n"
01303         "       #define FILTER_CODE(i) {                                                \\\n"
01304         "                       vec4 newb       =       (bins[i]* m2);                  \\\n"
01305         "                       newb.xyz        +=      ( prev.yzw * m1);               \\\n"
01306         "                       prev = bins[i];                                                 \\\n"
01307         "                       newb.wzy        +=      ( bins[i+1].zyx *m1);   \\\n"
01308         "                       bins[i] = newb;}\n"
01309         "       for (int j=0; j<2; j++)                                                         \n"
01310         "       {                                                                                               \n"
01311         "               vec4 prev  = bins[8];                                           \n"
01312         "               bins[9]          = bins[0];                                             \n";
01313 
01314         if(GlobalUtil::_KeepShaderLoop)
01315         {
01316                 out<<
01317         "               for (int i=0; i<9; i++)                                                 \n"
01318         "               {                                                                                               \n"
01319         "                       FILTER_CODE(i);                                                         \n"
01320         "               }                                                                                               \n"
01321         "       }";
01322 
01323         }else
01324         {
01325                 //manually unroll the loop for ATI.
01326                 out << 
01327         "          FILTER_CODE(0);\n"
01328         "          FILTER_CODE(1);\n"
01329         "          FILTER_CODE(2);\n"
01330         "          FILTER_CODE(3);\n"
01331         "          FILTER_CODE(4);\n"
01332         "          FILTER_CODE(5);\n"
01333         "          FILTER_CODE(6);\n"
01334         "          FILTER_CODE(7);\n"
01335         "          FILTER_CODE(8);\n"
01336         "       }\n";
01337         }
01338         //find the maximum voting
01339         out<<"\n"
01340         "       vec4 maxh; vec2 maxh2;  \n"
01341         "       vec4 maxh4 = max(max(max(max(max(max(max(max(bins[0], bins[1]), bins[2]), \n"
01342         "                       bins[3]), bins[4]), bins[5]), bins[6]), bins[7]), bins[8]);\n"
01343         "       maxh2 = max(maxh4.xy, maxh4.zw); maxh = vec4(max(maxh2.x, maxh2.y));";
01344 
01345         char *testpeak_code;
01346         char *savepeak_code;
01347 
01348         //save two/three/four orientations with the largest votings?
01349 
01350         if(GlobalUtil::_MaxOrientation>1)
01351         {
01352                 out<<"\n"
01353                 "       vec4 Orientations = vec4(0.0, 0.0, 0.0, 0.0);                           \n"
01354                 "       vec4 weights = vec4(0.0,0.0,0.0,0.0);           ";      
01355                 
01356                 testpeak_code = "\\\n"
01357                 "       {test = greaterThan(bins[i], hh);";
01358 
01359                 //save the orientations in weight-decreasing order
01360                 if(GlobalUtil::_MaxOrientation ==2)
01361                 {
01362                 savepeak_code = "\\\n"
01363                 "                       if(weight <=weights.g){}\\\n"
01364                 "                       else if(weight >weights.r)\\\n"
01365                 "                       {weights.rg = vec2(weight, weights.r); Orientations.rg = vec2(th, Orientations.r);}\\\n"
01366                 "                       else {weights.g = weight; Orientations.g = th;}";
01367                 }else if(GlobalUtil::_MaxOrientation ==3)
01368                 {
01369                 savepeak_code = "\\\n"
01370                 "                       if(weight <=weights.b){}\\\n"
01371                 "                       else if(weight >weights.r)\\\n"
01372                 "                       {weights.rgb = vec3(weight, weights.rg); Orientations.rgb = vec3(th, Orientations.rg);}\\\n"
01373                 "                       else if(weight >weights.g)\\\n"
01374                 "                       {weights.gb = vec2(weight, weights.g); Orientations.gb = vec2(th, Orientations.g);}\\\n"
01375                 "                       else {weights.b = weight; Orientations.b = th;}";
01376                 }else
01377                 {
01378                 savepeak_code = "\\\n"
01379                 "                       if(weight <=weights.a){}\\\n"
01380                 "                       else if(weight >weights.r)\\\n"
01381                 "                       {weights = vec4(weight, weights.rgb); Orientations = vec4(th, Orientations.rgb);}\\\n"
01382                 "                       else if(weight >weights.g)\\\n"
01383                 "                       {weights.gba = vec3(weight, weights.gb); Orientations.gba = vec3(th, Orientations.gb);}\\\n"
01384                 "                       else if(weight >weights.b)\\\n"
01385                 "                       {weights.ba = vec2(weight, weights.b); Orientations.ba = vec2(th, Orientations.b);}\\\n"
01386                 "                       else {weights.a = weight; Orientations.a = th;}";
01387                 }
01388 
01389         }else
01390         {
01391                 out<<"\n"
01392                 "       float Orientation;                              ";
01393                 testpeak_code ="\\\n"
01394                 "       if(npeaks<=0.0){\\\n"
01395                 "       test = equal(bins[i], maxh)     ;";
01396                 savepeak_code="\\\n"
01397                 "                       npeaks++;       \\\n"
01398                 "                       Orientation = th;";
01399 
01400         }
01401         //find the peaks
01402         out <<"\n"
01403         "       #define FINDPEAK(i, k)" <<testpeak_code<<"\\\n"
01404         "       if( any ( test) )                                                       \\\n"
01405         "       {                                                                                       \\\n"
01406         "               if(test.r && bins[i].x > prevb && bins[i].x > bins[i].y )       \\\n"
01407         "               {                                                                                       \\\n"
01408         "                   float       di = -0.5 * (bins[i].y-prevb) / (bins[i].y+prevb-bins[i].x - bins[i].x) ; \\\n"
01409         "                   float       th = (k+di+0.5);        float weight = bins[i].x;"
01410                                 <<savepeak_code<<"\\\n"
01411         "               }\\\n"
01412         "               else if(test.g && all( greaterThan(bins[i].yy , bins[i].xz)) )  \\\n"
01413         "               {                                                                                       \\\n"
01414         "                   float       di = -0.5 * (bins[i].z-bins[i].x) / (bins[i].z+bins[i].x-bins[i].y- bins[i].y) ; \\\n"
01415         "                   float       th = (k+di+1.5);        float weight = bins[i].y;                               "
01416                                 <<savepeak_code<<"      \\\n"
01417         "               }\\\n"
01418         "               if(test.b && all( greaterThan( bins[i].zz , bins[i].yw)) )      \\\n"
01419         "               {                                                                                       \\\n"
01420         "                   float       di = -0.5 * (bins[i].w-bins[i].y) / (bins[i].w+bins[i].y-bins[i].z- bins[i].z) ; \\\n"
01421         "                   float       th = (k+di+2.5);        float weight = bins[i].z;                               "
01422                                 <<savepeak_code<<"      \\\n"
01423         "               }\\\n"
01424         "               else if(test.a && bins[i].w > bins[i].z && bins[i].w > bins[i+1].x )    \\\n"
01425         "               {                                                                                       \\\n"
01426         "                   float       di = -0.5 * (bins[i+1].x-bins[i].z) / (bins[i+1].x+bins[i].z-bins[i].w - bins[i].w) ; \\\n"
01427         "                   float       th = (k+di+3.5);        float weight = bins[i].w;                               "
01428                                 <<savepeak_code<<"      \\\n"
01429         "               }\\\n"
01430         "       }}\\\n"
01431         "       prevb = bins[i].w;";
01432         //the following loop will be unrolled anyway in fp40,
01433         //taking more than 1000 instrucsions..
01434         //....
01435         if(GlobalUtil::_KeepShaderLoop)
01436         {
01437         out<<"\n"
01438         "       vec4 hh = maxh * ORIENTATION_THRESHOLD; bvec4 test;     \n"
01439         "       bins[9] = bins[0];                                                              \n"
01440         "       float npeaks = 0.0, k = 0.0;                                            \n"
01441         "       float prevb     = bins[8].w;                                            \n"
01442         "       for (int i = 0; i < 9; i++)                                             \n"
01443         "       {\n"
01444         "               FINDPEAK(i, k);\n"
01445         "               k = k + 4.0;    \n"
01446         "       }";
01447         }else
01448         {
01449                 //loop unroll for ATI.
01450         out <<"\n"
01451         "       vec4 hh = maxh * ORIENTATION_THRESHOLD; bvec4 test;\n"
01452         "       bins[9] = bins[0];                                                              \n"
01453         "       float npeaks = 0.0;                                                             \n"
01454         "       float prevb     = bins[8].w;                                            \n"
01455         "       FINDPEAK(0, 0.0);\n"
01456         "       FINDPEAK(1, 4.0);\n"
01457         "       FINDPEAK(2, 8.0);\n"
01458         "       FINDPEAK(3, 12.0);\n"
01459         "       FINDPEAK(4, 16.0);\n"
01460         "       FINDPEAK(5, 20.0);\n"
01461         "       FINDPEAK(6, 24.0);\n"
01462         "       FINDPEAK(7, 28.0);\n"
01463         "       FINDPEAK(8, 32.0);\n";
01464         }
01465         //WRITE output
01466         if(GlobalUtil::_MaxOrientation>1)
01467         {
01468         out<<"\n"
01469         "       if(orientation_mode){\n"
01470         "               npeaks = dot(vec4(1,1,"
01471                         <<(GlobalUtil::_MaxOrientation>2 ? 1 : 0)<<","
01472                         <<(GlobalUtil::_MaxOrientation >3? 1 : 0)<<"), vec4(greaterThan(weights, hh)));\n"
01473         "               gl_FragData[0] = vec4(pos, npeaks, sigma);\n"
01474         "               gl_FragData[1] = radians((Orientations )*10.0);\n"
01475         "       }else{\n"
01476         "               gl_FragData[0] = vec4(pos, radians((Orientations.x)*10.0), sigma);\n"
01477         "       }\n";
01478         }else
01479         {
01480         out<<"\n"
01481         "        gl_FragData[0] = vec4(pos, radians((Orientation)*10.0), sigma);\n";
01482         }
01483         //end
01484         out<<"\n"
01485         "}\n"<<'\0';
01486 
01487 
01488 }
01489 
01490 void ShaderBagGLSL::SetSimpleOrientationInput(int oTex, float sigma, float sigma_step)
01491 {
01492         glUniform1i(_param_orientation_gtex, 1);
01493         glUniform1f(_param_orientation_size, sigma);
01494 }
01495 
01496 
01497 
01498 
01499 void ShaderBagGLSL::SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int stex, float step)
01500 {
01502         glUniform1i(_param_orientation_gtex, 1);        
01503 
01504         if((GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)&& stex)
01505         {
01506                 //specify texutre for subpixel subscale localization
01507                 glUniform1i(_param_orientation_stex, 2);
01508         }
01509 
01510         float size[4];
01511         size[0] = (float)width;
01512         size[1] = (float)height;
01513         size[2] = sigma;
01514         size[3] = step;
01515         glUniform4fv(_param_orientation_size, 1, size);
01516 }
01517 
01518 
01519 void ShaderBagGLSL::LoadDescriptorShaderF2()
01520 {
01521         //one shader outpout 128/8 = 16 , each fragout encodes 4
01522         //const double twopi = 2.0*3.14159265358979323846;
01523         //const double rpi  = 8.0/twopi;
01524         ostringstream out;
01525         out<<setprecision(8);
01526 
01527         out<<"\n"
01528         "#define M_PI 3.14159265358979323846\n"
01529         "#define TWO_PI (2.0*M_PI)\n"
01530         "#define RPI 1.2732395447351626861510701069801\n"
01531         "#define WF  size.z\n"
01532         "uniform sampler2DRect tex;                             \n"
01533         "uniform sampler2DRect gradTex;                 \n"
01534         "uniform vec4 dsize;                                            \n"
01535         "uniform vec3 size;                                             \n"
01536         "void main()            \n"
01537         "{\n"
01538         "       vec2 dim        = size.xy;      //image size                    \n"
01539         "       float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;\n"
01540         "       float idx = 8.0 * fract(index * 0.125) + 8.0 * floor(2.0 * fract(gl_TexCoord[0].y * 0.5));              \n"
01541         "       index = floor(index*0.125) + 0.49;  \n"
01542         "       vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
01543         "       vec2 pos = texture2DRect(tex, coord).xy;                \n"
01544         "       if(any(lessThanEqual(pos.xy,  vec2(1.0))) || any(greaterThanEqual(pos.xy, dim-1.0)))// discard; \n"
01545         "       { gl_FragData[0] = gl_FragData[1] = vec4(0.0); return; }\n"
01546         "       float  anglef = texture2DRect(tex, coord).z;\n"
01547         "       if(anglef > M_PI) anglef -= TWO_PI;\n"
01548         "       float sigma = texture2DRect(tex, coord).w; \n"
01549         "       float spt  = abs(sigma * WF);   //default to be 3*sigma \n";
01550 
01551         //rotation
01552         out<<
01553         "       vec4 cscs, rots;                                                                \n"
01554         "       cscs.y = sin(anglef);   cscs.x = cos(anglef);   \n"
01555         "       cscs.zw = - cscs.xy;                                                    \n"
01556         "       rots = cscs /spt;                                                               \n"
01557         "       cscs *= spt; \n";
01558 
01559         //here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
01560         //and rots is  (cos, sin, -cos, -sin ) /(factor*sigma)
01561         //devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
01562         //To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
01563 
01564         out<<
01565         "vec4 temp; vec2 pt, offsetpt;                          \n"
01566         "       /*the fraction part of idx is .5*/                      \n"
01567         "       offsetpt.x = 4.0* fract(idx*0.25) - 2.0;                                \n"
01568         "       offsetpt.y = floor(idx*0.25) - 1.5;                     \n"
01569         "       temp = cscs.xwyx*offsetpt.xyxy;                         \n"
01570         "       pt = pos + temp.xz + temp.yw;                           \n";
01571         
01572         //get a horizontal bounding box of the rotated rectangle
01573         out<<
01574         "       vec2 bwin = abs(cscs.xy);                                       \n"
01575         "       float bsz = bwin.x + bwin.y;                                    \n"
01576         "       vec4 sz;                                        \n"
01577         "       sz.xy = max(pt - vec2(bsz), vec2(1,1));\n"
01578         "       sz.zw = min(pt + vec2(bsz), dim - vec2(2, 2));          \n"
01579         "       sz = floor(sz)+0.5;"; //move sample point to pixel center
01580         //get voting for two box
01581 
01582         out<<"\n"
01583         "       vec4 DA, DB; vec2 spos;                 \n"
01584         "       DA = DB  = vec4(0.0, 0.0, 0.0, 0.0);            \n"
01585         "       for(spos.y = sz.y; spos.y <= sz.w;      spos.y+=1.0)                            \n"
01586         "       {                                                                                                                               \n"
01587         "               for(spos.x = sz.x; spos.x <= sz.z;      spos.x+=1.0)                    \n"
01588         "               {                                                                                                                       \n"
01589         "                       vec2 diff = spos - pt;                                                          \n"
01590         "                       temp = rots.xywx * diff.xyxy;\n"
01591         "                       vec2 nxy = (temp.xz + temp.yw); \n"
01592         "                       vec2 nxyn = abs(nxy);                   \n"
01593         "                       if(all( lessThan(nxyn, vec2(1.0)) ))\n"
01594         "                       {\n"
01595         "                               vec4 cc = texture2DRect(gradTex, spos);                                         \n"
01596         "                               float mod = cc.b;       float angle = cc.a;                                     \n"
01597         "                               float theta0 = RPI * (anglef - angle);                          \n"
01598         "                               float theta = theta0 < 0.0? theta0 + 8.0 : theta0;;\n"
01599         "                               diff = nxy + offsetpt.xy;                                                               \n"
01600         "                               float ww = exp(-0.125*dot(diff, diff));\n"
01601         "                               vec2 weights = vec2(1) - nxyn;\n"
01602         "                               float weight = weights.x * weights.y *mod*ww; \n"
01603         "                               float theta1 = floor(theta); \n"
01604         "                               float weight2 = (theta - theta1) * weight;\n"
01605         "                               float weight1 = weight - weight2;\n"
01606         "                               DA += vec4(equal(vec4(theta1),  vec4(0, 1, 2, 3)))*weight1;\n"
01607         "                               DA += vec4(equal(vec4(theta1),  vec4(7, 0, 1, 2)))*weight2; \n"
01608         "                               DB += vec4(equal(vec4(theta1),  vec4(4, 5, 6, 7)))*weight1;\n"
01609         "                               DB += vec4(equal(vec4(theta1),  vec4(3, 4, 5, 6)))*weight2; \n"
01610         "                       }\n"
01611         "               }\n"
01612         "       }\n";
01613 
01614         out<<
01615         "        gl_FragData[0] = DA; gl_FragData[1] = DB;\n"
01616         "}\n"<<'\0';
01617 
01618         ProgramGLSL * program =  new ProgramGLSL(out.str().c_str()); 
01619 
01620         if(program->IsNative())
01621         {
01622                 s_descriptor_fp = program ;
01623                 _param_descriptor_gtex = glGetUniformLocation(*program, "gradTex");
01624                 _param_descriptor_size = glGetUniformLocation(*program, "size");
01625                 _param_descriptor_dsize = glGetUniformLocation(*program, "dsize");
01626         }else
01627         {
01628                 delete program;
01629         }
01630 
01631 
01632 }
01633 
01634 void ShaderBagGLSL::LoadDescriptorShader()
01635 {
01636         GlobalUtil::_DescriptorPPT = 16;
01637         LoadDescriptorShaderF2();
01638 }
01639 
01640 
01641 void ShaderBagGLSL::SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth,  float width, float height, float sigma)
01642 {
01644         glUniform1i(_param_descriptor_gtex, 1); 
01645 
01646         float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth};
01647         glUniform4fv(_param_descriptor_dsize, 1, dsize);
01648         float size[3];
01649         size[0] = width;
01650         size[1] = height;
01651         size[2] = GlobalUtil::_DescriptorWindowFactor;
01652         glUniform3fv(_param_descriptor_size, 1, size);
01653 
01654 }
01655 
01657 
01658 void ShaderBagPKSL::LoadFixedShaders()
01659 {
01660         ProgramGLSL * program;
01661 
01662 
01663         s_gray = new ProgramGLSL( 
01664         "uniform sampler2DRect tex; void main(){\n"
01665         "float intensity = dot(vec3(0.299, 0.587, 0.114), texture2DRect(tex,gl_TexCoord[0].xy ).rgb);\n"
01666         "gl_FragColor= vec4(intensity, intensity, intensity, 1.0);}"    );
01667 
01668 
01669         s_sampling = new ProgramGLSL(
01670         "uniform sampler2DRect tex; void main(){\n"
01671         "gl_FragColor= vec4(    texture2DRect(tex,gl_TexCoord[0].st ).r,texture2DRect(tex,gl_TexCoord[1].st ).r,\n"
01672         "                                               texture2DRect(tex,gl_TexCoord[2].st ).r,texture2DRect(tex,gl_TexCoord[3].st ).r);}"     );
01673 
01674 
01675         s_margin_copy = program = new ProgramGLSL(
01676         "uniform sampler2DRect tex;  uniform vec4 truncate; void main(){\n"
01677         "vec4 cc = texture2DRect(tex, min(gl_TexCoord[0].xy, truncate.xy)); \n"
01678         "bvec2 ob = lessThan(gl_TexCoord[0].xy, truncate.xy);\n"
01679         "if(ob.y) { gl_FragColor = (truncate.z ==0.0 ? cc.rrbb : cc.ggaa); } \n"
01680         "else if(ob.x) {gl_FragColor = (truncate.w <1.5 ? cc.rgrg : cc.baba);} \n"
01681         "else { vec4 weights = vec4(vec4(0.0, 1.0, 2.0, 3.0) == truncate.wwww);\n"
01682         "float v = dot(weights, cc); gl_FragColor = vec4(v);}}");
01683 
01684         _param_margin_copy_truncate = glGetUniformLocation(*program, "truncate");
01685 
01686 
01687 
01688         s_zero_pass = new ProgramGLSL("void main(){gl_FragColor = vec4(0.0);}");
01689 
01690 
01691 
01692         s_grad_pass = program = new ProgramGLSL(
01693         "uniform sampler2DRect tex; uniform sampler2DRect texp; void main ()\n"
01694         "{\n"
01695         "       vec4 v1, v2, gg;\n"
01696         "       vec4 cc = texture2DRect(tex, gl_TexCoord[0].xy);\n"
01697         "       vec4 cp = texture2DRect(texp, gl_TexCoord[0].xy);\n"
01698         "       gl_FragData[0] = cc - cp; \n"
01699         "       vec4 cl = texture2DRect(tex, gl_TexCoord[1].xy); vec4 cr = texture2DRect(tex, gl_TexCoord[2].xy);\n"
01700         "       vec4 cd = texture2DRect(tex, gl_TexCoord[3].xy); vec4 cu = texture2DRect(tex, gl_TexCoord[4].xy);\n"
01701         "       vec4 dx = (vec4(cr.rb, cc.ga) - vec4(cc.rb, cl.ga)).zxwy;\n"
01702         "       vec4 dy = (vec4(cu.rg, cc.ba) - vec4(cc.rg, cd.ba)).zwxy;\n"
01703         "       vec4 grad = 0.5 * sqrt(dx*dx + dy * dy);\n"
01704         "       gl_FragData[1] = grad;\n"
01705         "       vec4 invalid = vec4(equal(grad, vec4(0.0)));    \n"
01706         "       vec4 ov = atan(dy, dx + invalid);               \n"
01707         "       gl_FragData[2] = ov; \n"
01708         "}\n\0"); //when 
01709 
01710         _param_grad_pass_texp = glGetUniformLocation(*program, "texp");
01711 
01712 
01713         GlobalUtil::_OrientationPack2 = 0;
01714         LoadOrientationShader();
01715 
01716         if(s_orientation == NULL)
01717         {
01718                 //Load a simplified version if the right version is not supported
01719                 s_orientation = program =  new ProgramGLSL(
01720                 "uniform sampler2DRect tex; uniform sampler2DRect oTex; uniform vec2 size; void main(){\n"
01721                 "       vec4 cc = texture2DRect(tex, gl_TexCoord[0].xy);\n"
01722                 "       vec2 co = cc.xy * 0.5; \n"
01723                 "       vec4 oo = texture2DRect(oTex, co);\n"
01724                 "       bvec2 bo = lessThan(fract(co), vec2(0.5)); \n"
01725                 "       float o = bo.y? (bo.x? oo.r : oo.g) : (bo.x? oo.b : oo.a); \n"
01726                 "       gl_FragColor = vec4(cc.rg, o, size.x * pow(size.y, cc.a));}");  
01727 
01728                 _param_orientation_gtex= glGetUniformLocation(*program, "oTex");
01729                 _param_orientation_size= glGetUniformLocation(*program, "size");
01730                 GlobalUtil::_MaxOrientation = 0;
01731                 GlobalUtil::_FullSupported = 0;
01732                 std::cerr<<"Orientation simplified on this hardware"<<endl;
01733         }
01734 
01735         if(GlobalUtil::_DescriptorPPT)
01736         {
01737                 LoadDescriptorShader();
01738                 if(s_descriptor_fp == NULL) 
01739                 {
01740                         GlobalUtil::_DescriptorPPT = GlobalUtil::_FullSupported = 0; 
01741                         std::cerr<<"Descriptor ignored on this hardware"<<endl;
01742                 }
01743         }
01744 }
01745 
01746 
01747 void ShaderBagPKSL::LoadDisplayShaders()
01748 {
01749         ProgramGLSL * program;
01750 
01751         s_copy_key = new ProgramGLSL(
01752         "uniform sampler2DRect tex;void main(){\n"
01753         "gl_FragColor= vec4(texture2DRect(tex, gl_TexCoord[0].xy).rg, 0,1);}");
01754 
01755         //shader used to write a vertex buffer object
01756         //which is used to draw the quads of each feature
01757         s_vertex_list = program = new ProgramGLSL(
01758         "uniform sampler2DRect tex; uniform vec4 sizes; void main(){\n"
01759         "float fwidth = sizes.y; \n"
01760         "float twidth = sizes.z; \n"
01761         "float rwidth = sizes.w; \n"
01762         "float index = 0.1*(fwidth*floor(gl_TexCoord[0].y) + gl_TexCoord[0].x);\n"
01763         "float px = mod(index, twidth);\n"
01764         "vec2 tpos= floor(vec2(px, index*rwidth))+0.5;\n"
01765         "vec4 cc = texture2DRect(tex, tpos );\n"
01766         "float size = 3.0 * cc.a; \n"
01767         "gl_FragColor.zw = vec2(0.0, 1.0);\n"
01768         "if(any(lessThan(cc.xy,vec2(0.0)))) {gl_FragColor.xy = cc.xy;}else \n"
01769         "{\n"
01770         "       float type = fract(px);\n"
01771         "       vec2 dxy; float s, c;\n"
01772         "       dxy.x = type < 0.1 ? 0.0 : (((type <0.5) || (type > 0.9))? size : -size);\n"
01773         "       dxy.y = type < 0.2 ? 0.0 : (((type < 0.3) || (type > 0.7) )? -size :size); \n"
01774         "       s = sin(cc.b); c = cos(cc.b); \n"
01775         "       gl_FragColor.x = cc.x + c*dxy.x-s*dxy.y;\n"
01776         "       gl_FragColor.y = cc.y + c*dxy.y+s*dxy.x;}\n"
01777         "}\n\0");
01778         /*gl_FragColor = vec4(tpos, 0.0, 1.0);}\n\0");*/
01779 
01780         _param_genvbo_size = glGetUniformLocation(*program, "sizes");
01781 
01782         s_display_gaussian = new ProgramGLSL(
01783         "uniform sampler2DRect tex; void main(){\n"
01784     "vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy);   bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));\n"
01785     "float v = ff.y?(ff.x? pc.r : pc.g):(ff.x?pc.b:pc.a); gl_FragColor = vec4(vec3(v), 1.0);}");
01786 
01787         s_display_dog =  new ProgramGLSL(
01788         "uniform sampler2DRect tex; void main(){\n"
01789         "vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy); bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));\n"
01790         "float v = ff.y ?(ff.x ? pc.r : pc.g):(ff.x ? pc.b : pc.a);float g = (0.5+20.0*v);\n"
01791         "gl_FragColor = vec4(g, g, g, 1.0);}" );
01792 
01793 
01794         s_display_grad = new ProgramGLSL(
01795         "uniform sampler2DRect tex; void main(){\n"
01796         "vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy); bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));\n"
01797         "float v = ff.y ?(ff.x ? pc.r : pc.g):(ff.x ? pc.b : pc.a); gl_FragColor = vec4(5.0 *vec3(v), 1.0); }");
01798 
01799         s_display_keys= new ProgramGLSL(
01800         "uniform sampler2DRect tex; void main(){\n"
01801         "vec4 oc = texture2DRect(tex, gl_TexCoord[0].xy); \n"
01802         "vec4 cc = vec4(equal(abs(oc.rrrr), vec4(1.0, 2.0, 3.0, 4.0))); \n"
01803         "bvec2 ff = lessThan(fract(gl_TexCoord[0].xy) , vec2(0.5));\n"
01804         "float v = ff.y ?(ff.x ? cc.r : cc.g):(ff.x ? cc.b : cc.a);\n"
01805         "if(v == 0.0) discard;  \n"
01806         "else if(oc.r > 0.0) gl_FragColor = vec4(1.0, 0.0, 0,1.0); \n"
01807         "else gl_FragColor = vec4(0.0,1.0,0.0,1.0);     }" );
01808 }
01809 
01810 void ShaderBagPKSL::LoadOrientationShader(void)
01811 {
01812         ostringstream out;
01813         if(GlobalUtil::_IsNvidia)
01814         {
01815                 out <<  "#pragma optionNV(ifcvt none)\n"
01816                                 "#pragma optionNV(unroll all)\n";
01817         }
01818         out<<"\n"
01819         "#define GAUSSIAN_WF float("<<GlobalUtil::_OrientationGaussianFactor<<") \n"
01820         "#define SAMPLE_WF float("<<GlobalUtil::_OrientationWindowFactor<< " )\n"
01821         "#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n"
01822         "uniform sampler2DRect tex;     uniform sampler2DRect gtex;\n"
01823         "uniform sampler2DRect otex; uniform vec4 size;\n" 
01824         "void main()            \n"
01825         "{                                                                                                      \n"
01826         "       vec4 bins[10];                                                          \n"
01827         "       bins[0] = vec4(0.0);bins[1] = vec4(0.0);bins[2] = vec4(0.0);    \n"
01828         "       bins[3] = vec4(0.0);bins[4] = vec4(0.0);bins[5] = vec4(0.0);    \n"
01829         "       bins[6] = vec4(0.0);bins[7] = vec4(0.0);bins[8] = vec4(0.0);    \n"
01830         "       vec4 sift = texture2DRect(tex, gl_TexCoord[0].xy);      \n"
01831         "       vec2 pos = sift.xy; \n"
01832         "       bool orientation_mode = (size.z != 0.0);                \n"
01833         "       float sigma = orientation_mode? (abs(size.z) * pow(size.w, sift.w) * sift.z) : (sift.w); \n"
01834         "       //bool fixed_orientation = (size.z < 0.0);              \n"
01835         "       if(size.z < 0.0) {gl_FragData[0] = vec4(pos, 0.0, sigma); return;}"
01836         "       float gsigma = sigma * GAUSSIAN_WF;                             \n"
01837         "       vec2 win = abs(vec2(sigma * (SAMPLE_WF * GAUSSIAN_WF)));        \n"
01838         "       vec2 dim = size.xy;                                                     \n"
01839         "       vec4 dist_threshold = vec4(win.x*win.x+0.5);                    \n"
01840         "       float factor = -0.5/(gsigma*gsigma);                    \n"
01841         "       vec4 sz;        vec2 spos;                                              \n"
01842         "       //if(any(pos.xy <= float(1))) discard;                                  \n"
01843         "       sz.xy = max( pos - win, vec2(2.0,2.0));                 \n"
01844         "       sz.zw = min( pos + win, dim-vec2(3.0));                         \n"
01845         "       sz = floor(sz*0.5) + 0.5; ";
01846                 //loop to get the histogram
01847 
01848         out<<"\n"
01849         "       for(spos.y = sz.y; spos.y <= sz.w;      spos.y+=1.0)                            \n"
01850         "       {                                                                                                                               \n"
01851         "               for(spos.x = sz.x; spos.x <= sz.z;      spos.x+=1.0)                    \n"
01852         "               {                                                                                                                       \n"
01853         "                       vec2 offset = 2.0 * spos - pos - vec2(0.5);                                     \n"
01854         "                       vec4 off = vec4(offset, offset + vec2(1));                              \n"
01855         "                       vec4 distsq = off.xzxz * off.xzxz + off.yyww * off.yyww;        \n"
01856         "                       bvec4 inside = lessThan(distsq, dist_threshold);                        \n"
01857         "                       if(any(inside))                                                                         \n"
01858         "                       {                                                                                                               \n"
01859         "                               vec4 gg = texture2DRect(gtex, spos);                            \n"
01860         "                               vec4 oo = texture2DRect(otex, spos);                            \n"
01861         "                               vec4 weight = gg * exp(distsq * factor);                        \n"
01862         "                               vec4 idxv  = floor(degrees(oo)*0.1);                            \n"
01863         "                               idxv+= (vec4(lessThan(idxv, vec4(0.0)))*36.0);                  \n"
01864         "                               vec4 vidx = fract(idxv * 0.25) * 4.0;//mod(idxv, 4.0);  \n";
01865         //
01866         if(GlobalUtil::_UseDynamicIndexing)
01867         {
01868                 // it might be slow on some GPUs
01869                 out<<"\n"
01870         "                               for(int i = 0 ; i < 4; i++)\n"
01871         "                               {\n"
01872         "                                       if(inside[i])\n"
01873         "                                       {\n"
01874         "                                               float idx = idxv[i];                                                            \n"
01875         "                                               vec4 inc = weight[i] * vec4(equal(vec4(vidx[i]), vec4(0.0,1.0,2.0,3.0)));       \n"
01876         "                                               int iidx = int(floor(idx*0.25));        \n"
01877         "                                               bins[iidx]+=inc;                                        \n"
01878         "                                       }                                                                               \n"
01879         "                               }                                                                                       \n"
01880         "                       }                                                                                               \n"
01881         "               }                                                                                                       \n"
01882         "       }";
01883 
01884         }else
01885         {
01886                 //nvfp40 still does not support dynamic array indexing
01887                 //unrolled binary search
01888                 //it seems to be faster than the dyanmic indexing version on some GPUs
01889                 out<<"\n"
01890         "                               for(int i = 0 ; i < 4; i++)\n"
01891         "                               {\n"
01892         "                                       if(inside[i])\n"
01893         "                                       {\n"
01894         "                                               float idx = idxv[i];                                                                            \n"
01895         "                                               vec4 inc = weight[i] * vec4(equal(vec4(vidx[i]), vec4(0,1,2,3)));       \n"
01896         "                                               if(idx < 16.0)                                                  \n"
01897         "                                               {                                                                               \n"
01898         "                                                       if(idx < 8.0)                                                   \n"
01899         "                                                       {                                                                       \n"
01900         "                                                               if(idx < 4.0)   {       bins[0]+=inc;}  \n"
01901         "                                                               else            {       bins[1]+=inc;}  \n"
01902         "                                                       }else                                                           \n"
01903         "                                                       {                                                                       \n"
01904         "                                                               if(idx < 12.0){ bins[2]+=inc;}  \n"
01905         "                                                               else            {       bins[3]+=inc;}  \n"
01906         "                                                       }                                                                       \n"
01907         "                                               }else if(idx < 32.0)                                            \n"
01908         "                                               {                                                                               \n"
01909         "                                                       if(idx < 24.0)                                          \n"
01910         "                                                       {                                                                       \n"
01911         "                                                               if(idx <20.0)   {       bins[4]+=inc;}  \n"
01912         "                                                               else            {       bins[5]+=inc;}  \n"
01913         "                                                       }else                                                           \n"
01914         "                                                       {                                                                       \n"
01915         "                                                               if(idx < 28.0){ bins[6]+=inc;}  \n"
01916         "                                                               else            {       bins[7]+=inc;}  \n"
01917         "                                                       }                                                                       \n"
01918         "                                               }else                                           \n"
01919         "                                               {                                                                               \n"
01920         "                                                       bins[8]+=inc;                                           \n"
01921         "                                               }                                                                               \n"
01922         "                                       }                                                                                       \n"
01923         "                               }                                                                                               \n"
01924         "                       }                                                                               \n"
01925         "               }                                                                                       \n"
01926         "       }";
01927 
01928         }
01929 
01930         //reuse the code from the unpacked version..
01931         ShaderBagGLSL::WriteOrientationCodeToStream(out);
01932 
01933 
01934 
01935         ProgramGLSL * program = new ProgramGLSL(out.str().c_str());
01936         if(program->IsNative())
01937         {
01938                 s_orientation = program ;
01939                 _param_orientation_gtex = glGetUniformLocation(*program, "gtex");
01940                 _param_orientation_otex = glGetUniformLocation(*program, "otex");
01941                 _param_orientation_size = glGetUniformLocation(*program, "size");
01942         }else
01943         {
01944                 delete program;
01945         }
01946 }
01947 
01948 void ShaderBagPKSL::SetGenListStartParam(float width, int tex0)
01949 {
01950         glUniform1f(_param_ftex_width, width);
01951         glUniform1i(_param_genlist_start_tex0, 0);
01952 }
01953 
01954 void ShaderBagPKSL::LoadGenListShader(int ndoglev,int nlev)
01955 {
01956         ProgramGLSL * program;
01957 
01958         s_genlist_init_tight = new ProgramGLSL(
01959         "uniform sampler2DRect tex; void main ()\n"
01960         "{\n"
01961         "       vec4 key = vec4(texture2DRect(tex, gl_TexCoord[0].xy).r, \n"
01962         "                                       texture2DRect(tex, gl_TexCoord[1].xy).r, \n"
01963         "                                       texture2DRect(tex, gl_TexCoord[2].xy).r, \n"
01964         "                                       texture2DRect(tex, gl_TexCoord[3].xy).r); \n"
01965         "                                       gl_FragColor = vec4(notEqual(key, vec4(0.0))); \n"
01966         "}");
01967 
01968         s_genlist_init_ex = program = new ProgramGLSL(
01969         "uniform sampler2DRect tex; uniform vec4 bbox; void main ()\n"
01970         "{\n"
01971         "       vec4 helper1 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[0].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"
01972         "       vec4 helper2 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[1].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"
01973         "       vec4 helper3 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[2].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"
01974         "       vec4 helper4 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[3].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"
01975         "       vec4 bx1 = vec4(lessThan(gl_TexCoord[0].xxyy, bbox)); \n"
01976         "       vec4 bx4 = vec4(lessThan(gl_TexCoord[3].xxyy, bbox)); \n"
01977         "       vec4 bx2 = vec4(bx4.xy, bx1.zw); \n"
01978         "       vec4 bx3 = vec4(bx1.xy, bx4.zw);\n"
01979         "       helper1 = min(min(bx1.xyxy, bx1.zzww), helper1);\n"
01980         "       helper2 = min(min(bx2.xyxy, bx2.zzww), helper2);\n"
01981         "       helper3 = min(min(bx3.xyxy, bx3.zzww), helper3);\n"
01982         "       helper4 = min(min(bx4.xyxy, bx4.zzww), helper4);\n"
01983         "       gl_FragColor.r = float(any(greaterThan(max(helper1.xy, helper1.zw), vec2(0.0))));       \n"
01984         "       gl_FragColor.g = float(any(greaterThan(max(helper2.xy, helper2.zw), vec2(0.0))));       \n"
01985         "       gl_FragColor.b = float(any(greaterThan(max(helper3.xy, helper3.zw), vec2(0.0))));       \n"
01986         "       gl_FragColor.a = float(any(greaterThan(max(helper4.xy, helper4.zw), vec2(0.0))));       \n"
01987         "}");
01988         _param_genlist_init_bbox = glGetUniformLocation( *program, "bbox");
01989 
01990         s_genlist_end = program = new ProgramGLSL(
01991                 GlobalUtil::_KeepExtremumSign == 0 ? 
01992         
01993         "uniform sampler2DRect tex; uniform sampler2DRect ktex; void main()\n"
01994         "{\n"
01995         "       vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);\n"
01996         "       vec2 pos = tc.rg; float index = tc.b;\n"
01997         "       vec4 tk = texture2DRect( ktex, pos); \n"
01998         "       vec4 keys = vec4(equal(abs(tk.rrrr), vec4(1.0, 2.0, 3.0, 4.0))); \n"
01999         "       vec2 opos; \n"
02000         "       opos.x = dot(keys, vec4(-0.5, 0.5, -0.5, 0.5));\n"
02001         "       opos.y = dot(keys, vec4(-0.5, -0.5, 0.5, 0.5));\n"
02002         "       gl_FragColor = vec4(opos + pos * 2.0 + tk.yz, 1.0, tk.w);\n"
02003         "}" : 
02004         
02005         "uniform sampler2DRect tex; uniform sampler2DRect ktex; void main()\n"
02006         "{\n"
02007         "       vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);\n"
02008         "       vec2 pos = tc.rg; float index = tc.b;\n"
02009         "       vec4 tk = texture2DRect( ktex, pos); \n"
02010         "       vec4 keys = vec4(equal(abs(tk.rrrr), vec4(1.0, 2.0, 3.0, 4.0))) \n"
02011         "       vec2 opos; \n"
02012         "       opos.x = dot(keys, vec4(-0.5, 0.5, -0.5, 0.5));\n"
02013         "       opos.y = dot(keys, vec4(-0.5, -0.5, 0.5, 0.5));\n"
02014         "       gl_FragColor = vec4(opos + pos * 2.0 + tk.yz, sign(tk.r), tk.w);\n"
02015         "}"     
02016         );
02017 
02018         _param_genlist_end_ktex = glGetUniformLocation(*program, "ktex");
02019 
02020         //reduction ...
02021         s_genlist_histo = new ProgramGLSL(
02022         "uniform sampler2DRect tex; void main ()\n"
02023         "{\n"
02024         "       vec4 helper; vec4 helper2; \n"
02025         "       helper = texture2DRect(tex, gl_TexCoord[0].xy); helper2.xy = helper.xy + helper.zw; \n"
02026         "       helper = texture2DRect(tex, gl_TexCoord[1].xy); helper2.zw = helper.xy + helper.zw; \n"
02027         "       gl_FragColor.rg = helper2.xz + helper2.yw;\n"
02028         "       helper = texture2DRect(tex, gl_TexCoord[2].xy); helper2.xy = helper.xy + helper.zw; \n"
02029         "       helper = texture2DRect(tex, gl_TexCoord[3].xy); helper2.zw = helper.xy + helper.zw; \n"
02030         "       gl_FragColor.ba= helper2.xz+helper2.yw;\n"
02031         "}");
02032 
02033 
02034         //read of the first part, which generates tex coordinates 
02035 
02036         s_genlist_start= program =  ShaderBagGLSL::LoadGenListStepShader(1, 1);
02037         _param_ftex_width= glGetUniformLocation(*program, "width");
02038         _param_genlist_start_tex0 = glGetUniformLocation(*program, "tex0");
02039         //stepping
02040         s_genlist_step = program = ShaderBagGLSL::LoadGenListStepShader(0, 1);
02041         _param_genlist_step_tex0= glGetUniformLocation(*program, "tex0");
02042 
02043 }
02044 void ShaderBagPKSL::UnloadProgram(void)
02045 {
02046         glUseProgram(0);
02047 }
02048 void ShaderBagPKSL::LoadKeypointShader(float dog_threshold, float edge_threshold)
02049 {
02050         float threshold0 = dog_threshold* (GlobalUtil::_SubpixelLocalization?0.8f:1.0f);
02051         float threshold1 = dog_threshold;
02052         float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold;
02053         ostringstream out;;
02054         out<<setprecision(8);
02055 
02056         if(GlobalUtil::_IsNvidia)
02057         {
02058                 out << "#pragma optionNV(ifcvt none)\n"
02059                                 "#pragma optionNV(unroll all)\n";
02060 
02061         }
02062         if(GlobalUtil::_KeepShaderLoop)
02063         {
02064                 out <<  "#define REPEAT4(FUNCTION)\\\n"
02065                                 "for(int i = 0; i < 4; ++i)\\\n"
02066                                 "{\\\n"
02067                                 "       FUNCTION(i);\\\n"
02068                                 "}\n";
02069         }else
02070         {
02071                 //loop unroll
02072                 out <<  "#define REPEAT4(FUNCTION)\\\n"
02073                                 "FUNCTION(0);\\\n"
02074                                 "FUNCTION(1);\\\n"
02075                                 "FUNCTION(2);\\\n"
02076                                 "FUNCTION(3);\n";
02077         }
02078         //tex(X)(Y)
02079         //X: (CLR) (CENTER 0, LEFT -1, RIGHT +1)  
02080         //Y: (CDU) (CENTER 0, DOWN -1, UP    +1) 
02081 
02082         if(GlobalUtil::_DarknessAdaption)
02083         {
02084                 out <<  "#define THRESHOLD0(i) (" << threshold0 << "* ii[i])\n"
02085                                 "#define THRESHOLD1 (" << threshold1 << "* ii[0])\n"
02086                                 "#define THRESHOLD2 " << threshold2 << "\n"
02087                                 "#define DEFINE_EXTRA() vec4 ii = texture2DRect(texI, gl_TexCoord[0].xy); "
02088                                 "ii = min(2.0 * ii + 0.1, 1.0) \n"
02089                                 "#define MOVE_EXTRA(idx)        ii[0] = ii[idx]\n";
02090                 out << "uniform sampler2DRect texI;\n";
02091         }else
02092         {
02093                 out <<  "#define THRESHOLD0(i) " << threshold0 << "\n"
02094                                 "#define THRESHOLD1 " << threshold1 << "\n"
02095                                 "#define THRESHOLD2 " << threshold2 << "\n"
02096                                 "#define DEFINE_EXTRA()\n"
02097                                 "#define MOVE_EXTRA(idx) \n"    ;
02098         }
02099 
02100         out<<
02101         "uniform sampler2DRect tex; uniform sampler2DRect texU;\n"
02102         "uniform sampler2DRect texD; void main ()\n"
02103         "{\n"
02104         "       vec2 TexRU = vec2(gl_TexCoord[2].x, gl_TexCoord[4].y); \n"
02105         "       vec4 ccc = texture2DRect(tex, gl_TexCoord[0].xy);\n"
02106         "       vec4 clc = texture2DRect(tex, gl_TexCoord[1].xy);\n"
02107         "       vec4 crc = texture2DRect(tex, gl_TexCoord[2].xy);\n"
02108         "       vec4 ccd = texture2DRect(tex, gl_TexCoord[3].xy);\n"
02109         "       vec4 ccu = texture2DRect(tex, gl_TexCoord[4].xy);\n"
02110         "       vec4 cld = texture2DRect(tex, gl_TexCoord[5].xy);\n"
02111         "       vec4 clu = texture2DRect(tex, gl_TexCoord[6].xy);\n"
02112         "       vec4 crd = texture2DRect(tex, gl_TexCoord[7].xy);\n"
02113         "       vec4 cru = texture2DRect(tex, TexRU.xy);\n"
02114         "       vec4  cc = ccc;\n"
02115         "       vec4  v1[4], v2[4];\n"
02116         "       v1[0] = vec4(clc.g, ccc.g, ccd.b, ccc.b);\n"
02117         "       v1[1] = vec4(ccc.r, crc.r, ccd.a, ccc.a);\n"
02118         "       v1[2] = vec4(clc.a, ccc.a, ccc.r, ccu.r);\n"
02119         "       v1[3] = vec4(ccc.b, crc.b, ccc.g, ccu.g);\n"
02120         "       v2[0] = vec4(cld.a, clc.a, ccd.a, ccc.a);\n"
02121         "       v2[1] = vec4(ccd.b, ccc.b, crd.b, crc.b);\n"
02122         "       v2[2] = vec4(clc.g, clu.g, ccc.g, ccu.g);\n"
02123         "       v2[3] = vec4(ccc.r, ccu.r, crc.r, cru.r);\n"
02124         "       DEFINE_EXTRA();\n";
02125 
02126         //test against 8 neighbours
02127         //use variable to identify type of extremum
02128         //1.0 for local maximum and -1.0 for minimum
02129         out <<
02130         "       vec4 key = vec4(0.0); \n"
02131         "       #define KEYTEST_STEP0(i) \\\n"
02132         "       {\\\n"
02133         "               bvec4 test1 = greaterThan(vec4(cc[i]), max(v1[i], v2[i])), test2 = lessThan(vec4(cc[i]), min(v1[i], v2[i]));\\\n"
02134         "               key[i] = cc[i] > float(THRESHOLD0(i)) && all(test1)?1.0: 0.0;\\\n"
02135         "               key[i] = cc[i] < float(-THRESHOLD0(i)) && all(test2)? -1.0: key[i];\\\n"
02136         "       }\n"
02137         "       REPEAT4(KEYTEST_STEP0);\n"
02138         "       if(gl_TexCoord[0].x < 1.0) {key.rb = vec2(0.0);}\n"
02139         "       if(gl_TexCoord[0].y < 1.0) {key.rg = vec2(0.0);}\n"
02140         "       gl_FragColor = vec4(0.0);\n"
02141         "       if(any(notEqual(key, vec4(0.0)))) {\n";
02142 
02143         //do edge supression first.. 
02144         //vector v1 is < (-1, 0), (1, 0), (0,-1), (0, 1)>
02145         //vector v2 is < (-1,-1), (-1,1), (1,-1), (1, 1)>
02146 
02147         out<<
02148         "       float fxx[4], fyy[4], fxy[4], fx[4], fy[4];\n"
02149         "       #define EDGE_SUPPRESION(i) \\\n"
02150         "       if(key[i] != 0.0)\\\n"
02151         "       {\\\n"
02152         "               vec4 D2 = v1[i].xyzw - cc[i];\\\n"
02153         "               vec2 D4 = v2[i].xw - v2[i].yz;\\\n"
02154         "               vec2 D5 = 0.5*(v1[i].yw-v1[i].xz); \\\n"
02155         "               fx[i] = D5.x;   fy[i] = D5.y ;\\\n"
02156         "               fxx[i] = D2.x + D2.y;\\\n"
02157         "               fyy[i] = D2.z + D2.w;\\\n"
02158         "               fxy[i] = 0.25*(D4.x + D4.y);\\\n"
02159         "               float fxx_plus_fyy = fxx[i] + fyy[i];\\\n"
02160         "               float score_up = fxx_plus_fyy*fxx_plus_fyy; \\\n"
02161         "               float score_down = (fxx[i]*fyy[i] - fxy[i]*fxy[i]);\\\n"
02162         "               if( score_down <= 0.0 || score_up > THRESHOLD2 * score_down)key[i] = 0.0;\\\n"
02163         "       }\n"
02164         "       REPEAT4(EDGE_SUPPRESION);\n"
02165         "       if(any(notEqual(key, vec4(0.0)))) {\n";
02166         
02168         //read 9 pixels of upper/lower level
02169         out<<
02170         "       vec4  v4[4], v5[4], v6[4];\n"
02171         "       ccc = texture2DRect(texU, gl_TexCoord[0].xy);\n"
02172         "       clc = texture2DRect(texU, gl_TexCoord[1].xy);\n"
02173         "       crc = texture2DRect(texU, gl_TexCoord[2].xy);\n"
02174         "       ccd = texture2DRect(texU, gl_TexCoord[3].xy);\n"
02175         "       ccu = texture2DRect(texU, gl_TexCoord[4].xy);\n"
02176         "       cld = texture2DRect(texU, gl_TexCoord[5].xy);\n"
02177         "       clu = texture2DRect(texU, gl_TexCoord[6].xy);\n"
02178         "       crd = texture2DRect(texU, gl_TexCoord[7].xy);\n"
02179         "       cru = texture2DRect(texU, TexRU.xy);\n"
02180         "       vec4 cu = ccc;\n"
02181         "       v4[0] = vec4(clc.g, ccc.g, ccd.b, ccc.b);\n"
02182         "       v4[1] = vec4(ccc.r, crc.r, ccd.a, ccc.a);\n"
02183         "       v4[2] = vec4(clc.a, ccc.a, ccc.r, ccu.r);\n"
02184         "       v4[3] = vec4(ccc.b, crc.b, ccc.g, ccu.g);\n"
02185         "       v6[0] = vec4(cld.a, clc.a, ccd.a, ccc.a);\n"
02186         "       v6[1] = vec4(ccd.b, ccc.b, crd.b, crc.b);\n"
02187         "       v6[2] = vec4(clc.g, clu.g, ccc.g, ccu.g);\n"
02188         "       v6[3] = vec4(ccc.r, ccu.r, crc.r, cru.r);\n"
02189         <<
02190         "       #define KEYTEST_STEP1(i)\\\n"
02191         "       if(key[i] == 1.0)\\\n"
02192         "       {\\\n"
02193         "               bvec4 test = lessThan(vec4(cc[i]), max(v4[i], v6[i])); \\\n"
02194         "               if(cc[i] < cu[i] || any(test))key[i] = 0.0; \\\n"
02195         "       }else if(key[i] == -1.0)\\\n"
02196         "       {\\\n"
02197         "               bvec4 test = greaterThan(vec4(cc[i]), min(v4[i], v6[i])); \\\n"
02198         "               if(cc[i] > cu[i] || any(test) )key[i] = 0.0; \\\n"
02199         "       }\n"
02200         "       REPEAT4(KEYTEST_STEP1);\n"
02201         "       if(any(notEqual(key, vec4(0.0)))) { \n"
02202         <<
02203         "       ccc = texture2DRect(texD, gl_TexCoord[0].xy);\n"
02204         "       clc = texture2DRect(texD, gl_TexCoord[1].xy);\n"
02205         "       crc = texture2DRect(texD, gl_TexCoord[2].xy);\n"
02206         "       ccd = texture2DRect(texD, gl_TexCoord[3].xy);\n"
02207         "       ccu = texture2DRect(texD, gl_TexCoord[4].xy);\n"
02208         "       cld = texture2DRect(texD, gl_TexCoord[5].xy);\n"
02209         "       clu = texture2DRect(texD, gl_TexCoord[6].xy);\n"
02210         "       crd = texture2DRect(texD, gl_TexCoord[7].xy);\n"
02211         "       cru = texture2DRect(texD, TexRU.xy);\n"
02212         "       vec4 cd = ccc;\n"
02213         "       v5[0] = vec4(clc.g, ccc.g, ccd.b, ccc.b);\n"
02214         "       v5[1] = vec4(ccc.r, crc.r, ccd.a, ccc.a);\n"
02215         "       v5[2] = vec4(clc.a, ccc.a, ccc.r, ccu.r);\n"
02216         "       v5[3] = vec4(ccc.b, crc.b, ccc.g, ccu.g);\n"
02217         "       v6[0] = vec4(cld.a, clc.a, ccd.a, ccc.a);\n"
02218         "       v6[1] = vec4(ccd.b, ccc.b, crd.b, crc.b);\n"
02219         "       v6[2] = vec4(clc.g, clu.g, ccc.g, ccu.g);\n"
02220         "       v6[3] = vec4(ccc.r, ccu.r, crc.r, cru.r);\n"
02221         <<
02222         "       #define KEYTEST_STEP2(i)\\\n"
02223         "       if(key[i] == 1.0)\\\n"
02224         "       {\\\n"
02225         "               bvec4 test = lessThan(vec4(cc[i]), max(v5[i], v6[i]));\\\n"
02226         "               if(cc[i] < cd[i] || any(test))key[i] = 0.0; \\\n"
02227         "       }else if(key[i] == -1.0)\\\n"
02228         "       {\\\n"
02229         "               bvec4 test = greaterThan(vec4(cc[i]), min(v5[i], v6[i]));\\\n"
02230         "               if(cc[i] > cd[i] || any(test))key[i] = 0.0; \\\n"
02231         "       }\n"
02232         "       REPEAT4(KEYTEST_STEP2);\n"
02233         "       float keysum = dot(abs(key), vec4(1, 1, 1, 1)) ;\n"
02234         "       //assume there is only one keypoint in the four. \n"
02235         "       if(keysum==1.0) {\n";
02236 
02238         if(GlobalUtil::_SubpixelLocalization)
02239 
02240         out <<
02241         "       vec3 offset = vec3(0.0, 0.0, 0.0); \n"
02242         "       #define TESTMOVE_KEYPOINT(idx) \\\n"
02243         "       if(key[idx] != 0.0) \\\n"
02244         "       {\\\n"
02245         "               cu[0] = cu[idx];        cd[0] = cd[idx];        cc[0] = cc[idx];        \\\n"
02246         "               v4[0] = v4[idx];        v5[0] = v5[idx];                                                \\\n"
02247         "               fxy[0] = fxy[idx];      fxx[0] = fxx[idx];      fyy[0] = fyy[idx];      \\\n"
02248         "               fx[0] = fx[idx];        fy[0] = fy[idx];        MOVE_EXTRA(idx);  \\\n"
02249         "       }\n"
02250         "       TESTMOVE_KEYPOINT(1);\n"
02251         "       TESTMOVE_KEYPOINT(2);\n"
02252         "       TESTMOVE_KEYPOINT(3);\n"
02253         <<
02254                 
02255         "       float fs = 0.5*( cu[0] - cd[0] );                               \n"
02256         "       float fss = cu[0] + cd[0] - cc[0] - cc[0];\n"
02257         "       float fxs = 0.25 * (v4[0].y + v5[0].x - v4[0].x - v5[0].y);\n"
02258         "       float fys = 0.25 * (v4[0].w + v5[0].z - v4[0].z - v5[0].w);\n"
02259         "       vec4 A0, A1, A2 ;                       \n"
02260         "       A0 = vec4(fxx[0], fxy[0], fxs, -fx[0]); \n"
02261         "       A1 = vec4(fxy[0], fyy[0], fys, -fy[0]); \n"
02262         "       A2 = vec4(fxs, fys, fss, -fs);  \n"
02263         "       vec3 x3 = abs(vec3(fxx[0], fxy[0], fxs));               \n"
02264         "       float maxa = max(max(x3.x, x3.y), x3.z);        \n"
02265         "       if(maxa >= 1e-10 ) \n"
02266         "       {                                                                                               \n"
02267         "               if(x3.y ==maxa )                                                        \n"
02268         "               {                                                                                       \n"
02269         "                       vec4 TEMP = A1; A1 = A0; A0 = TEMP;     \n"
02270         "               }else if( x3.z == maxa )                                        \n"
02271         "               {                                                                                       \n"
02272         "                       vec4 TEMP = A2; A2 = A0; A0 = TEMP;     \n"
02273         "               }                                                                                       \n"
02274         "               A0 /= A0.x;                                                                     \n"
02275         "               A1 -= A1.x * A0;                                                        \n"
02276         "               A2 -= A2.x * A0;                                                        \n"
02277         "               vec2 x2 = abs(vec2(A1.y, A2.y));                \n"
02278         "               if( x2.y > x2.x )                                                       \n"
02279         "               {                                                                                       \n"
02280         "                       vec3 TEMP = A2.yzw;                                     \n"
02281         "                       A2.yzw = A1.yzw;                                                \n"
02282         "                       A1.yzw = TEMP;                                                  \n"
02283         "                       x2.x = x2.y;                                                    \n"
02284         "               }                                                                                       \n"
02285         "               if(x2.x >= 1e-10) {                                                             \n"
02286         "                       A1.yzw /= A1.y;                                                         \n"
02287         "                       A2.yzw -= A2.y * A1.yzw;                                        \n"
02288         "                       if(abs(A2.z) >= 1e-10) {\n"
02289         "                               offset.z = A2.w /A2.z;                              \n"
02290         "                               offset.y = A1.w - offset.z*A1.z;                            \n"
02291         "                               offset.x = A0.w - offset.z*A0.z - offset.y*A0.y;        \n"
02292         "                               bool test = (abs(cc[0] + 0.5*dot(vec3(fx[0], fy[0], fs), offset ))>float(THRESHOLD1)) ;\n"
02293         "                               if(!test || any( greaterThan(abs(offset), vec3(1.0)))) key = vec4(0.0);\n"
02294         "                       }\n"
02295         "               }\n"
02296         "       }\n"
02297         <<"\n"
02298         "       float keyv = dot(key, vec4(1.0, 2.0, 3.0, 4.0));\n"
02299         "       gl_FragColor = vec4(keyv,  offset);\n"
02300         "       }}}}\n"
02301         "}\n"   <<'\0';
02302 
02303         else out << "\n"
02304         "       float keyv = dot(key, vec4(1.0, 2.0, 3.0, 4.0));\n"
02305         "       gl_FragColor =  vec4(keyv, 0.0, 0.0, 0.0);\n"
02306         "       }}}}\n"
02307         "}\n"   <<'\0';
02308 
02309         ProgramGLSL * program = new ProgramGLSL(out.str().c_str()); 
02310         s_keypoint = program ;
02311 
02312         //parameter
02313         _param_dog_texu = glGetUniformLocation(*program, "texU");
02314         _param_dog_texd = glGetUniformLocation(*program, "texD");
02315         if(GlobalUtil::_DarknessAdaption)       _param_dog_texi = glGetUniformLocation(*program, "texI");
02316 }
02317 void ShaderBagPKSL::SetDogTexParam(int texU, int texD)
02318 {
02319         glUniform1i(_param_dog_texu, 1);
02320         glUniform1i(_param_dog_texd, 2);
02321         if(GlobalUtil::_DarknessAdaption)glUniform1i(_param_dog_texi, 3);
02322 }
02323 void ShaderBagPKSL::SetGenListStepParam(int tex, int tex0)
02324 {
02325         glUniform1i(_param_genlist_step_tex0, 1);       
02326 }
02327 
02328 void ShaderBagPKSL::SetGenVBOParam(float width, float fwidth,float size)
02329 {
02330         float sizes[4] = {size*3.0f, fwidth, width, 1.0f/width};
02331         glUniform4fv(_param_genvbo_size, 1, sizes);
02332 }
02333 void ShaderBagPKSL::SetGradPassParam(int texP)
02334 {
02335         glUniform1i(_param_grad_pass_texp, 1);
02336 }
02337 
02338 void ShaderBagPKSL::LoadDescriptorShader()
02339 {
02340         GlobalUtil::_DescriptorPPT = 16;
02341         LoadDescriptorShaderF2();
02342     s_rect_description = LoadDescriptorProgramRECT();
02343 }
02344 
02345 ProgramGLSL* ShaderBagPKSL::LoadDescriptorProgramRECT()
02346 {
02347         //one shader outpout 128/8 = 16 , each fragout encodes 4
02348         //const double twopi = 2.0*3.14159265358979323846;
02349         //const double rpi  = 8.0/twopi;
02350         ostringstream out;
02351         out<<setprecision(8);
02352         if(GlobalUtil::_KeepShaderLoop)
02353         {
02354                 out <<  "#define REPEAT4(FUNCTION)\\\n"
02355                                 "for(int i = 0; i < 4; ++i)\\\n"
02356                                 "{\\\n"
02357                                 "       FUNCTION(i);\\\n"
02358                                 "}\n";
02359         }else
02360         {
02361                 //loop unroll for ATI
02362                 out <<  "#define REPEAT4(FUNCTION)\\\n"
02363                                 "FUNCTION(0);\\\n"
02364                                 "FUNCTION(1);\\\n"
02365                                 "FUNCTION(2);\\\n"
02366                                 "FUNCTION(3);\n";
02367         }
02368 
02369         out<<"\n"
02370         "#define M_PI 3.14159265358979323846\n"
02371         "#define TWO_PI (2.0*M_PI)\n"
02372         "#define RPI 1.2732395447351626861510701069801\n"
02373         "#define WF size.z\n"
02374         "uniform sampler2DRect tex;                     \n"
02375         "uniform sampler2DRect gtex;                    \n"
02376         "uniform sampler2DRect otex;                    \n"
02377         "uniform vec4           dsize;                          \n"
02378         "uniform vec3           size;                           \n"
02379         "void main()                    \n"
02380         "{\n"
02381         "       vec2 dim        = size.xy;      //image size                    \n"
02382         "       float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;\n"
02383         "       float idx = 8.0* fract(index * 0.125) + 8.0 * floor(2.0* fract(gl_TexCoord[0].y * 0.5));                \n"
02384         "       index = floor(index*0.125)+ 0.49;  \n"
02385         "       vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
02386         "       vec2 pos = texture2DRect(tex, coord).xy;                \n"
02387         "       vec2 wsz = texture2DRect(tex, coord).zw;\n"
02388     "   float aspect_ratio = wsz.y / wsz.x;\n"
02389     "   float aspect_sq = aspect_ratio * aspect_ratio; \n"
02390         "       vec2 spt  = wsz * 0.25; vec2 ispt = 1.0 / spt; \n";
02391 
02392         //here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
02393         //and rots is  (cos, sin, -cos, -sin ) /(factor*sigma)
02394         //devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
02395         //To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
02396         out<<
02397         "       vec4 temp; vec2 pt;                             \n"
02398     "   pt.x = pos.x + fract(idx*0.25) * wsz.x;                         \n"
02399         "       pt.y = pos.y + (floor(idx*0.25) + 0.5) * spt.y;                 \n";
02400         
02401         //get a horizontal bounding box of the rotated rectangle
02402         out<<
02403     "   vec4 sz;                                        \n"
02404         "       sz.xy = max(pt - spt, vec2(2,2));\n"
02405         "       sz.zw = min(pt + spt, dim - vec2(3));           \n"
02406         "       sz = floor(sz * 0.5)+0.5;"; //move sample point to pixel center
02407         //get voting for two box
02408 
02409         out<<"\n"
02410         "       vec4 DA, DB;   vec2 spos;                       \n"
02411         "       DA = DB  = vec4(0.0, 0.0, 0.0, 0.0);            \n"
02412         "       vec4 nox = vec4(0.0, 1.0, 0.0, 1.0);                                    \n"
02413         "       vec4 noy = vec4(0.0, 0.0, 1.0, 1.0);                                    \n"
02414         "       for(spos.y = sz.y; spos.y <= sz.w;      spos.y+=1.0)                            \n"
02415         "       {                                                                                                                               \n"
02416         "               for(spos.x = sz.x; spos.x <= sz.z;      spos.x+=1.0)                    \n"
02417         "               {                                                                                                                       \n"
02418         "                       vec2 tpt = spos * 2.0 - pt - 0.5;                                       \n"
02419     "                   vec4 nx = (tpt.x + nox) * ispt.x;                                                               \n"
02420     "                   vec4 ny = (tpt.y + noy) * ispt.y;                       \n"
02421         "                       vec4 nxn = abs(nx), nyn = abs(ny);                                              \n"
02422     "                   bvec4 inside = lessThan(max(nxn, nyn) , vec4(1.0));     \n"
02423         "                       if(any(inside))\n"
02424         "                       {\n"
02425         "                               vec4 gg = texture2DRect(gtex, spos);\n"
02426         "                               vec4 oo = texture2DRect(otex, spos);\n"
02427     //"               vec4 cc = cos(oo), ss = sin(oo); \n"
02428     //"               oo = atan(ss* aspect_ratio, cc); \n"
02429     //"               gg = gg * sqrt(ss * ss * aspect_sq + cc * cc); \n "
02430         "                               vec4 theta0 = (- oo)*RPI;\n"
02431         "                               vec4 theta = 8.0 * fract(1.0 + 0.125 * theta0);                 \n"
02432         "                               vec4 theta1 = floor(theta);                                                             \n"
02433         "                               vec4 weight = (vec4(1) - nxn) * (vec4(1) - nyn) * gg; \n"
02434         "                               vec4 weight2 = (theta - theta1) * weight;                               \n"
02435         "                               vec4 weight1 = weight - weight2;                                                \n"
02436         "                               #define ADD_DESCRIPTOR(i) \\\n"
02437         "                               if(inside[i])\\\n"
02438         "                               {\\\n"
02439         "                                       DA += vec4(equal(vec4(theta1[i]), vec4(0, 1, 2, 3)))*weight1[i]; \\\n"
02440         "                                       DA += vec4(equal(vec4(theta1[i]), vec4(7, 0, 1, 2)))*weight2[i]; \\\n"
02441         "                                       DB += vec4(equal(vec4(theta1[i]), vec4(4, 5, 6, 7)))*weight1[i]; \\\n"
02442         "                                       DB += vec4(equal(vec4(theta1[i]), vec4(3, 4, 5, 6)))*weight2[i]; \\\n"
02443         "                               }\n"
02444         "                               REPEAT4(ADD_DESCRIPTOR);\n"
02445         "                       }\n"
02446         "               }\n"
02447         "       }\n";
02448         out<<
02449         "        gl_FragData[0] = DA; gl_FragData[1] = DB;\n"
02450         "}\n"<<'\0';
02451 
02452         ProgramGLSL * program =  new ProgramGLSL(out.str().c_str()); 
02453         if(program->IsNative()) 
02454         {
02455                 return program;
02456         }
02457         else
02458         {
02459                 delete program;
02460                 return NULL;
02461         }
02462 }
02463 
02464 ProgramGLSL* ShaderBagPKSL::LoadDescriptorProgramPKSL()
02465 {
02466         //one shader outpout 128/8 = 16 , each fragout encodes 4
02467         //const double twopi = 2.0*3.14159265358979323846;
02468         //const double rpi  = 8.0/twopi;
02469         ostringstream out;
02470         out<<setprecision(8);
02471 
02472         if(GlobalUtil::_KeepShaderLoop)
02473         {
02474                 out <<  "#define REPEAT4(FUNCTION)\\\n"
02475                                 "for(int i = 0; i < 4; ++i)\\\n"
02476                                 "{\\\n"
02477                                 "       FUNCTION(i);\\\n"
02478                                 "}\n";
02479         }else
02480         {
02481                 //loop unroll for ATI
02482                 out <<  "#define REPEAT4(FUNCTION)\\\n"
02483                                 "FUNCTION(0);\\\n"
02484                                 "FUNCTION(1);\\\n"
02485                                 "FUNCTION(2);\\\n"
02486                                 "FUNCTION(3);\n";
02487         }
02488 
02489         out<<"\n"
02490         "#define M_PI 3.14159265358979323846\n"
02491         "#define TWO_PI (2.0*M_PI)\n"
02492         "#define RPI 1.2732395447351626861510701069801\n"
02493         "#define WF size.z\n"
02494         "uniform sampler2DRect tex;                     \n"
02495         "uniform sampler2DRect gtex;                    \n"
02496         "uniform sampler2DRect otex;                    \n"
02497         "uniform vec4           dsize;                          \n"
02498         "uniform vec3           size;                           \n"
02499         "void main()                    \n"
02500         "{\n"
02501         "       vec2 dim        = size.xy;      //image size                    \n"
02502         "       float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;\n"
02503         "       float idx = 8.0* fract(index * 0.125) + 8.0 * floor(2.0* fract(gl_TexCoord[0].y * 0.5));                \n"
02504         "       index = floor(index*0.125)+ 0.49;  \n"
02505         "       vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
02506         "       vec2 pos = texture2DRect(tex, coord).xy;                \n"
02507         "       if(any(lessThan(pos.xy, vec2(1.0))) || any(greaterThan(pos.xy, dim-1.0))) "
02508         "       //discard;      \n" 
02509         "       { gl_FragData[0] = gl_FragData[1] = vec4(0.0); return; }\n"
02510         "       float anglef = texture2DRect(tex, coord).z;\n"
02511         "       if(anglef > M_PI) anglef -= TWO_PI;\n"
02512         "       float sigma = texture2DRect(tex, coord).w; \n"
02513         "       float spt  = abs(sigma * WF);   //default to be 3*sigma \n";
02514         //rotation
02515         out<<
02516         "       vec4 cscs, rots;                                                \n"
02517         "       cscs.x = cos(anglef); cscs.y = sin(anglef);     \n"
02518         "       cscs.zw = - cscs.xy;                                                    \n"
02519         "       rots = cscs /spt;                                                               \n"
02520         "       cscs *= spt; \n";
02521 
02522         //here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
02523         //and rots is  (cos, sin, -cos, -sin ) /(factor*sigma)
02524         //devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
02525         //To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
02526         out<<
02527         "       vec4 temp; vec2 pt, offsetpt;                           \n"
02528         "       /*the fraction part of idx is .5*/                      \n"
02529         "       offsetpt.x = 4.0* fract(idx*0.25) - 2.0;                                \n"
02530         "       offsetpt.y = floor(idx*0.25) - 1.5;                     \n"
02531         "       temp = cscs.xwyx*offsetpt.xyxy;                         \n"
02532         "       pt = pos + temp.xz + temp.yw;                           \n";
02533         
02534         //get a horizontal bounding box of the rotated rectangle
02535         out<<
02536         "       vec2 bwin = abs(cscs.xy);                                       \n"
02537         "       float bsz = bwin.x + bwin.y;                                    \n"
02538         "       vec4 sz;                                        \n"
02539         "       sz.xy = max(pt - vec2(bsz), vec2(2,2));\n"
02540         "       sz.zw = min(pt + vec2(bsz), dim - vec2(3));             \n"
02541         "       sz = floor(sz * 0.5)+0.5;"; //move sample point to pixel center
02542         //get voting for two box
02543 
02544         out<<"\n"
02545         "       vec4 DA, DB;   vec2 spos;                       \n"
02546         "       DA = DB  = vec4(0.0, 0.0, 0.0, 0.0);            \n"
02547         "       vec4 nox = vec4(0.0, rots.xy, rots.x + rots.y);                                 \n"
02548         "       vec4 noy = vec4(0.0, rots.wx, rots.w + rots.x);                                 \n"
02549         "       for(spos.y = sz.y; spos.y <= sz.w;      spos.y+=1.0)                            \n"
02550         "       {                                                                                                                               \n"
02551         "               for(spos.x = sz.x; spos.x <= sz.z;      spos.x+=1.0)                    \n"
02552         "               {                                                                                                                       \n"
02553         "                       vec2 tpt = spos * 2.0 - pt - 0.5;                                       \n"
02554         "                       vec4 temp = rots.xywx * tpt.xyxy;                                               \n"
02555         "                       vec2 temp2 = temp.xz + temp.yw;                                         \n"
02556         "                       vec4 nx = temp2.x + nox;                                                                \n"
02557         "                       vec4 ny = temp2.y + noy;                        \n"
02558         "                       vec4 nxn = abs(nx), nyn = abs(ny);                                              \n"
02559         "                       bvec4 inside = lessThan(max(nxn, nyn) , vec4(1.0));     \n"
02560         "                       if(any(inside))\n"
02561         "                       {\n"
02562         "                               vec4 gg = texture2DRect(gtex, spos);\n"
02563         "                               vec4 oo = texture2DRect(otex, spos);\n"
02564         "                               vec4 theta0 = (anglef - oo)*RPI;\n"
02565         "                               vec4 theta = 8.0 * fract(1.0 + 0.125 * theta0);                 \n"
02566         "                               vec4 theta1 = floor(theta);                                                             \n"
02567         "                               vec4 diffx = nx + offsetpt.x, diffy = ny + offsetpt.y;  \n"
02568         "                               vec4 ww = exp(-0.125 * (diffx * diffx + diffy * diffy ));       \n"
02569         "                               vec4 weight = (vec4(1) - nxn) * (vec4(1) - nyn) * gg * ww; \n"
02570         "                               vec4 weight2 = (theta - theta1) * weight;                               \n"
02571         "                               vec4 weight1 = weight - weight2;                                                \n"
02572         "       #define ADD_DESCRIPTOR(i) \\\n"
02573         "                               if(inside[i])\\\n"
02574         "                               {\\\n"
02575         "                                       DA += vec4(equal(vec4(theta1[i]), vec4(0, 1, 2, 3)))*weight1[i]; \\\n"
02576         "                                       DA += vec4(equal(vec4(theta1[i]), vec4(7, 0, 1, 2)))*weight2[i]; \\\n"
02577         "                                       DB += vec4(equal(vec4(theta1[i]), vec4(4, 5, 6, 7)))*weight1[i]; \\\n"
02578         "                                       DB += vec4(equal(vec4(theta1[i]), vec4(3, 4, 5, 6)))*weight2[i]; \\\n"
02579         "                               }\n"
02580         "                               REPEAT4(ADD_DESCRIPTOR);\n"
02581         "                       }\n"
02582         "               }\n"
02583         "       }\n";
02584         out<<
02585         "        gl_FragData[0] = DA; gl_FragData[1] = DB;\n"
02586         "}\n"<<'\0';
02587 
02588         ProgramGLSL * program =  new ProgramGLSL(out.str().c_str()); 
02589         if(program->IsNative()) 
02590         {
02591                 return program;
02592         }
02593         else
02594         {
02595                 delete program;
02596                 return NULL;
02597         }
02598 }
02599 
02600 void ShaderBagPKSL::LoadDescriptorShaderF2()
02601 {
02602 
02603         ProgramGLSL * program = LoadDescriptorProgramPKSL();
02604         if( program )
02605         {
02606                 s_descriptor_fp = program;
02607                 _param_descriptor_gtex = glGetUniformLocation(*program, "gtex");
02608                 _param_descriptor_otex = glGetUniformLocation(*program, "otex");
02609                 _param_descriptor_size = glGetUniformLocation(*program, "size");
02610                 _param_descriptor_dsize = glGetUniformLocation(*program, "dsize");
02611         }
02612 }
02613 
02614 
02615 
02616 void ShaderBagPKSL::SetSimpleOrientationInput(int oTex, float sigma, float sigma_step)
02617 {
02618         glUniform1i(_param_orientation_gtex, 1);
02619         glUniform2f(_param_orientation_size, sigma, sigma_step);
02620 }
02621 
02622 
02623 void ShaderBagPKSL::SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int otex, float step)
02624 {
02626         glUniform1i(_param_orientation_gtex, 1);        
02627         glUniform1i(_param_orientation_otex, 2);        
02628 
02629         float size[4];
02630         size[0] = (float)width;
02631         size[1] = (float)height;
02632         size[2] = sigma;
02633         size[3] = step;
02634         glUniform4fv(_param_orientation_size, 1, size);
02635 }
02636 
02637 void ShaderBagPKSL::SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth,  float width, float height, float sigma)
02638 {
02639     if(sigma == 0 && s_rect_description)
02640     {
02641         //rectangle description mode
02642         s_rect_description->UseProgram();       
02643         GLint param_descriptor_gtex = glGetUniformLocation(*s_rect_description, "gtex");
02644                 GLint param_descriptor_otex = glGetUniformLocation(*s_rect_description, "otex");
02645                 GLint param_descriptor_size = glGetUniformLocation(*s_rect_description, "size");
02646                 GLint param_descriptor_dsize = glGetUniformLocation(*s_rect_description, "dsize");
02648             glUniform1i(param_descriptor_gtex, 1);      
02649             glUniform1i(param_descriptor_otex, 2);      
02650 
02651             float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth};
02652             glUniform4fv(param_descriptor_dsize, 1, dsize);
02653             float size[3];
02654             size[0] = width;
02655             size[1] = height;
02656             size[2] = GlobalUtil::_DescriptorWindowFactor;
02657             glUniform3fv(param_descriptor_size, 1, size);
02658     }else
02659     {
02661             glUniform1i(_param_descriptor_gtex, 1);     
02662             glUniform1i(_param_descriptor_otex, 2);     
02663 
02664 
02665             float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth};
02666             glUniform4fv(_param_descriptor_dsize, 1, dsize);
02667             float size[3];
02668             size[0] = width;
02669             size[1] = height;
02670             size[2] = GlobalUtil::_DescriptorWindowFactor;
02671             glUniform3fv(_param_descriptor_size, 1, size);
02672     }
02673 
02674 }
02675 
02676 
02677 void ShaderBagPKSL::SetGenListEndParam(int ktex)
02678 {
02679         glUniform1i(_param_genlist_end_ktex, 1);
02680 }
02681 void ShaderBagPKSL::SetGenListInitParam(int w, int h)
02682 {
02683         float bbox[4] = {(w -1.0f) * 0.5f +0.25f, (w-1.0f) * 0.5f - 0.25f,  (h - 1.0f) * 0.5f + 0.25f, (h-1.0f) * 0.5f - 0.25f};
02684         glUniform4fv(_param_genlist_init_bbox, 1, bbox);
02685 }
02686 
02687 void ShaderBagPKSL::SetMarginCopyParam(int xmax, int ymax)
02688 {
02689         float truncate[4];
02690         truncate[0] = (xmax - 0.5f) * 0.5f; //((xmax + 1)  >> 1) - 0.5f;
02691         truncate[1] = (ymax - 0.5f) * 0.5f; //((ymax + 1)  >> 1) - 0.5f;
02692         truncate[2] = (xmax %2 == 1)? 0.0f: 1.0f;
02693         truncate[3] = truncate[2] +  (((ymax % 2) == 1)? 0.0f : 2.0f);
02694         glUniform4fv(_param_margin_copy_truncate, 1,  truncate);
02695 }


siftgpu
Author(s): Changchang Wu (library), Bence Magyar (ROS wrapper)
autogenerated on Thu Jan 2 2014 11:38:01