ProgramCG.cpp
Go to the documentation of this file.
00001 
00002 //      File:           ProgramCG.cpp
00003 //      Author:         Changchang Wu
00004 //      Description :   implementation of cg related class.
00005 //              class ProgramCG                 A simple wrapper of Cg programs
00006 //              class ShaderBagCG               cg shaders for SIFT
00007 //              class FilterCGGL                cg gaussian filters for SIFT
00008 //
00009 //      Copyright (c) 2007 University of North Carolina at Chapel Hill
00010 //      All Rights Reserved
00011 //
00012 //      Permission to use, copy, modify and distribute this software and its
00013 //      documentation for educational, research and non-profit purposes, without
00014 //      fee, and without a written agreement is hereby granted, provided that the
00015 //      above copyright notice and the following paragraph appear in all copies.
00016 //      
00017 //      The University of North Carolina at Chapel Hill make no representations
00018 //      about the suitability of this software for any purpose. It is provided
00019 //      'as is' without express or implied warranty. 
00020 //
00021 //      Please send BUG REPORTS to ccwu@cs.unc.edu
00022 //
00024 
00025 #if defined(CG_SIFTGPU_ENABLED) 
00026 
00027 #include "GL/glew.h"
00028 
00029 #include <iostream>
00030 #include <iomanip>
00031 #include <vector>
00032 #include <strstream>
00033 #include <algorithm>
00034 #include <stdlib.h>
00035 #include <math.h>
00036 #include <string.h>
00037 using namespace std;
00038 
00039 #include "GlobalUtil.h"
00040 #include "ProgramCG.h"
00041 #include "GLTexImage.h"
00042 #include "ShaderMan.h"
00043 #include "FrameBufferObject.h"
00044 
00045 
00046 
00047 #if  defined(_WIN32) 
00048         #pragma comment (lib, "../../lib/cg.lib")
00049         #pragma comment (lib, "../../lib/cggl.lib")
00050 #endif
00051 
00052 CGcontext       ProgramCG::_Context     =0;
00053 CGprofile       ProgramCG::_FProfile;
00054 
00056 // Construction/Destruction
00058 
00059 ProgramCG::ProgramCG()
00060 {
00061         _programID = NULL;
00062 }
00063 
00064 ProgramCG::~ProgramCG()
00065 {
00066         if(_programID) cgDestroyProgram(_programID);
00067 }
00068 
00069 ProgramCG::ProgramCG(const char *code, const char** cg_compile_args, CGprofile profile)
00070 {
00071         _valid = 0;
00072         _profile = profile;
00073         GLint epos;
00074         const char* ati_args[] = {"-po", "ATI_draw_buffers",0}; 
00075         const char* fp40_args[] = {"-ifcvt", "none","-unroll", "all", GlobalUtil::_UseFastMath? "-fastmath" : 0, 0};
00076         if(cg_compile_args == NULL) cg_compile_args = GlobalUtil::_IsNvidia? (GlobalUtil::_SupportFP40? fp40_args:NULL) : ati_args;
00077         _programID = ::cgCreateProgram(_Context, CG_SOURCE, code, profile, NULL, cg_compile_args);
00078         if(_programID)
00079         {
00080                 cgGLLoadProgram(_programID );
00081                 //_texParamID = cgGetNamedParameter(_programID, "tex");
00082 
00083                 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &epos);
00084                 if(epos >=0)
00085                 {
00086                         std::cout<<cgGetProgramString(_programID, CG_COMPILED_PROGRAM)<<endl;
00087                         std::cerr<<glGetString(GL_PROGRAM_ERROR_STRING_ARB)<<endl;
00088                 }else
00089                 {
00090                         _valid = 1;
00091                 }
00092         }else
00093         {
00094                 std::cerr<<code<<endl;
00095                 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &epos);
00096                 if(epos >=0)
00097                 {
00098                         std::cout<<cgGetProgramString(_programID, CG_COMPILED_PROGRAM)<<endl;
00099                         std::cerr<<glGetString(GL_PROGRAM_ERROR_STRING_ARB)<<endl;
00100                 }else
00101                 {
00102                         std::cout<<glGetString(GL_PROGRAM_ERROR_STRING_ARB)<<endl;
00103                 }
00104         }
00105 
00106 }
00107 
00108 void ProgramCG::ErrorCallback()
00109 {
00110         CGerror err = cgGetError();
00111         if(err)
00112         {
00113                 std::cerr<< cgGetErrorString(err)<<endl;
00114         }
00115 }
00116 
00117 
00118 void ProgramCG::InitContext()
00119 {
00120         if(_Context == 0)
00121         {
00122                 _Context        = cgCreateContext();
00123  
00125                 _FProfile = cgGLGetLatestProfile(CG_GL_FRAGMENT);
00126                 cgGLSetOptimalOptions(_FProfile);
00127 
00128                 if(GlobalUtil::_verbose) std::cout<<"Shader Profile: "<<cgGetProfileString(_FProfile)<<endl;
00129 
00130                 cgSetErrorCallback(ErrorCallback);
00131         }
00132 }
00133 
00134 void ProgramCG::DestroyContext()
00135 {
00136         cgDestroyContext(_Context);
00137 }
00138 
00139 ShaderBagCG::ShaderBagCG()
00140 {
00141         ProgramCG::InitContext();
00142 }
00143 
00144 
00145 int ProgramCG::UseProgram()
00146 {
00147         if(_programID)
00148         {
00149                 cgGLEnableProfile(_profile);
00150                 cgGLBindProgram(_programID);
00151 
00152                 return 1;
00153         }else
00154         {
00155                 return 0;
00156         }
00157 }
00158 
00159 void ShaderBagCG::UnloadProgram()
00160 {
00161 
00162         cgGLUnbindProgram(ProgramCG::_FProfile);
00163         cgGLDisableProfile(ProgramCG::_FProfile);
00164 }
00165 
00166 
00167 void ShaderBagCG::LoadFixedShaders()
00168 {
00169 //      s_debug = new ProgramCG( "void main(float4 TexCoord0:TEXCOORD0, out float4 FragColor:COLOR0,"
00170 //              "uniform samplerRECT tex){ gl_FragColor.rg =  gl_TexCoord[0].st;}");
00171 
00172         s_gray = new ProgramCG( 
00173         "void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
00174         "float intensity = dot(float3(0.299, 0.587, 0.114), texRECT(tex,TexCoord0.xy ).rgb);\n"
00175         "FragColor= float4(intensity, intensity, intensity, 1.0);}"     );
00176 
00177 
00178         s_sampling = new ProgramCG(
00179         "void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
00180         "float4 cc = texRECT(tex, TexCoord0.xy);        FragColor = float4(cc.rg, 0.0, 0.0);    }"      );
00181 
00182 
00183         s_zero_pass = new ProgramCG("void main(out float4 FragColor : COLOR0){FragColor = 0;}");
00184 
00185 
00186         ProgramCG * program;
00187         s_margin_copy = program = new ProgramCG(
00188         "void main(float4 texCoord0: TEXCOORD0, out float4 FragColor: COLOR0, \n"
00189         "uniform samplerRECT tex, uniform float2 truncate){\n"
00190         "FragColor = texRECT(tex, min(texCoord0.xy, truncate)); }");
00191 
00192         _param_margin_copy_truncate = cgGetNamedParameter(*program, "truncate");
00193 
00194 
00195         s_grad_pass = new ProgramCG(
00196         "void main (\n"
00197         "float4 TexCC : TEXCOORD0, float4 TexLC : TEXCOORD1,\n"
00198         "float4 TexRC : TEXCOORD2, float4 TexCD : TEXCOORD3, float4 TexCU : TEXCOORD4,\n"
00199         "out float4 FragData0 : COLOR0, uniform samplerRECT tex)\n"
00200         "{\n"
00201         "       float4 v1, v2, gg;\n"
00202         "       float4 cc  = texRECT(tex, TexCC.xy);\n"
00203         "       gg.x = texRECT(tex, TexLC.xy).r;\n"
00204         "       gg.y = texRECT(tex, TexRC.xy).r;\n"
00205         "       gg.z = texRECT(tex, TexCD.xy).r;\n"
00206         "       gg.w = texRECT(tex, TexCU.xy).r;\n"
00207         "       float2 dxdy = (gg.yw - gg.xz); \n"
00208         "       float grad = 0.5*length(dxdy);\n"
00209         "       float theta = grad==0? 0: atan2(dxdy.y, dxdy.x);\n"
00210         "       FragData0 = float4(cc.rg, grad, theta);\n"
00211         "}\n\0");
00212 
00213 
00214         if(GlobalUtil::_SupportFP40)
00215         {
00216                 //use the packing mode for cpu list reshape and two orientations
00217                 if(GlobalUtil::_MaxOrientation != 2) GlobalUtil::_OrientationPack2 = 0;
00218 
00219                 LoadOrientationShader();
00220 
00221 
00222                 if(GlobalUtil::_DescriptorPPT)          LoadDescriptorShader();
00223 
00224         }else
00225         {
00226                 s_orientation = program =  new ProgramCG(
00227                 "void main(out float4 FragColor : COLOR0, \n"
00228         "       uniform samplerRECT fTex, uniform samplerRECT oTex, \n"
00229         "       uniform float size, \n"
00230         "       in float2 tpos : TEXCOORD0){\n"
00231         "       float4 cc = texRECT(fTex, tpos);\n"
00232         "       float4 oo = texRECT(oTex, cc.rg);\n"
00233         "       FragColor = float4(cc.rg, oo.a, size);}");  
00234                 _param_orientation_gtex= cgGetNamedParameter(*program, "oTex");
00235                 _param_orientation_size= cgGetNamedParameter(*program, "size");
00236 
00237 
00239                 GlobalUtil::_FullSupported = 0;
00240                 GlobalUtil::_MaxOrientation = 0;  //0 for simplified version
00241                 GlobalUtil::_DescriptorPPT = 0;
00242                 std::cerr<<"Orientation simplified on this hardware"<<endl;
00243                 std::cerr<<"Descriptor ignored on this hardware"<<endl;
00244         }
00245 
00246 
00247 }
00248 
00249 void ShaderBagCG::LoadDisplayShaders()
00250 {
00251         s_copy_key = new ProgramCG(
00252         "void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
00253         "FragColor.rg= texRECT(tex, TexCoord0.xy).rg; FragColor.ba = float2(0,1);       }");
00254 
00255         //shader used to write a vertex buffer object
00256         //which is used to draw the quads of each feature
00257         ProgramCG * program;
00258         s_vertex_list = program = new ProgramCG(
00259         "void main(in float4 TexCoord0: TEXCOORD0,\n"
00260         "uniform float4 sizes, \n"
00261         "uniform samplerRECT tex, \n"
00262         "out float4 FragColor: COLOR0){\n"
00263         "float fwidth = sizes.y; \n"
00264         "float twidth = sizes.z; \n"
00265         "float rwidth = sizes.w; \n"
00266         "float index = 0.1*(fwidth*floor(TexCoord0.y) + TexCoord0.x);\n"
00267         "float px = fmod(index, twidth);\n"
00268         "float2 tpos= floor(float2(px, index*rwidth))+0.5;\n"
00269         "float4 cc = texRECT(tex, tpos );\n"
00270         "float size = cc.a * 3.0f;//sizes.x;// \n"
00271         "FragColor.zw = float2(0.0, 1.0);\n"
00272         "if(any(cc.xy <=0)) {FragColor.xy = cc.xy;}else \n"
00273         "{\n"
00274         "       float type = frac(px);\n"
00275         "       float2 dxy; float s, c;\n"
00276         "       dxy.x = type < 0.1 ? 0 : ((type <0.5 || type > 0.9)? size : -size);\n"
00277         "       dxy.y = type < 0.2 ? 0 : ((type < 0.3 || type > 0.7 )? -size :size); \n"
00278         "       sincos(cc.b, s, c);\n"
00279         "       FragColor.x = cc.x + c*dxy.x-s*dxy.y;\n"
00280         "       FragColor.y = cc.y + c*dxy.y+s*dxy.x;}\n"
00281         "}\n\0");
00282         /*FragColor = float4(tpos, 0.0, 1.0);}\n\0");*/
00283 
00284         _param_genvbo_size = cgGetNamedParameter(*program, "sizes");
00285 
00286 
00287         s_display_gaussian =  new ProgramCG(
00288         "void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
00289         "float r = texRECT(tex, TexCoord0.xy).r;\n"
00290         "FragColor = float4(r, r, r, 1.0);}");
00291 
00292 
00293         s_display_dog =  new ProgramCG(
00294         "void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
00295         "float g = (0.5+20.0*texRECT(tex, TexCoord0.xy).g);\n"
00296         "FragColor = float4(g, g, g, 1.0);}" );
00297 
00298 
00299         s_display_grad = new ProgramCG(
00300         "void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
00301         "float4 cc = texRECT(tex, TexCoord0.xy); FragColor = float4(5.0 * cc.bbb, 1.0); }");
00302 
00303 
00304         s_display_keys= new ProgramCG(
00305         "void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
00306         "float4 cc = texRECT(tex, TexCoord0.xy);\n"
00307         "if(cc.r ==1.0) FragColor = float4(1.0, 0, 0,1.0); \n"
00308         "else {if (cc.r ==0.5) FragColor = float4(0.0,1.0,0.0,1.0);     else discard;}}");      
00309 
00310 }
00311 
00312 void ShaderBagCG::SetMarginCopyParam(int xmax, int ymax)
00313 {
00314         float truncate[2] = {xmax - 0.5f , ymax - 0.5f};
00315         cgGLSetParameter2fv(_param_margin_copy_truncate, truncate);
00316 }
00317 
00318 
00319 int ShaderBagCG::LoadKeypointShaderMR(float threshold, float edge_threshold)
00320 {
00321         char buffer[10240];
00322         float threshold0 = threshold * 0.8f;
00323         float threshold1 = threshold;
00324         float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold;
00325         int   max_refine = max(2, GlobalUtil::_SubpixelLocalization);
00326         ostrstream out(buffer, 10240);
00327 
00328         out <<  "#define THRESHOLD0 " << threshold0 << "\n"
00329                         "#define THRESHOLD1 " << threshold1 << "\n"
00330                         "#define THRESHOLD2 " << threshold2 << "\n"
00331                         "#define MAX_REFINE " << max_refine << "\n";
00332         out<<
00333         "void main (\n"
00334         "float4 TexCC : TEXCOORD0, float4 TexLC : TEXCOORD1,\n"
00335         "float4 TexRC : TEXCOORD2, float4 TexCD : TEXCOORD3, \n"
00336         "float4 TexCU : TEXCOORD4, float4 TexLD : TEXCOORD5, \n"
00337         "float4 TexLU : TEXCOORD6, float4 TexRD : TEXCOORD7,\n"
00338         "out float4 FragData0 : COLOR0, out float4 FragData1 : COLOR1, \n"
00339         "uniform samplerRECT tex, uniform samplerRECT texU, uniform samplerRECT texD)\n"
00340         "{\n"
00341         "       float4 v1, v2, gg;\n"
00342         "       float2 TexRU = float2(TexRC.x, TexCU.y); \n"
00343         "       float4 cc  = texRECT(tex, TexCC.xy);\n"
00344         "       v1.x = texRECT(tex, TexLC.xy).g;\n"
00345         "       gg.x = texRECT(tex, TexLC.xy).r;\n"
00346         "       v1.y = texRECT(tex, TexRC.xy).g;\n"
00347         "       gg.y = texRECT(tex, TexRC.xy).r;\n"
00348         "       v1.z = texRECT(tex, TexCD.xy).g;\n"
00349         "       gg.z = texRECT(tex, TexCD.xy).r;\n"
00350         "       v1.w = texRECT(tex, TexCU.xy).g;\n"
00351         "       gg.w = texRECT(tex, TexCU.xy).r;\n"
00352         "       v2.x = texRECT(tex, TexLD.xy).g;\n"
00353         "       v2.y = texRECT(tex, TexLU.xy).g;\n"
00354         "       v2.z = texRECT(tex, TexRD.xy).g;\n"
00355         "       v2.w = texRECT(tex, TexRU.xy).g;\n"
00356         "       float2 dxdy = 0.5*(gg.yw - gg.xz); \n"
00357         "       float grad = length(dxdy);\n"
00358         "       float theta = grad==0? 0: atan2(dxdy.y, dxdy.x);\n"
00359         "       FragData0 = float4(cc.rg, grad, theta);\n"
00360         <<
00361         "       float dog = 0.0; \n"
00362         "       FragData1 = float4(0, 0, 0, 0); \n"
00363         "       float2 v3; float4 v4, v5, v6;\n"
00364         <<
00365         "       if( cc.g > THRESHOLD0 && all(cc.gggg > max(v1, v2)))\n"
00366         "       {\n"
00367         "               v3.x = texRECT(texU, TexCC.xy).g;\n"
00368         "               v4.x = texRECT(texU, TexLC.xy).g;\n"
00369         "               v4.y = texRECT(texU, TexRC.xy).g;\n"
00370         "               v4.z = texRECT(texU, TexCD.xy).g;\n"
00371         "               v4.w = texRECT(texU, TexCU.xy).g;\n"
00372         "               v6.x = texRECT(texU, TexLD.xy).g;\n"
00373         "               v6.y = texRECT(texU, TexLU.xy).g;\n"
00374         "               v6.z = texRECT(texU, TexRD.xy).g;\n"
00375         "               v6.w = texRECT(texU, TexRU.xy).g;\n"
00376         "               if(cc.g < v3.x || any(cc.gggg<v4.xyzw || cc.gggg<v6.xyzw))return; \n"
00377         "               v3.y = texRECT(texD, TexCC.xy).g;\n"
00378         "               v5.x = texRECT(texD, TexLC.xy).g;\n"
00379         "               v5.y = texRECT(texD, TexRC.xy).g;\n"
00380         "               v5.z = texRECT(texD, TexCD.xy).g;\n"
00381         "               v5.w = texRECT(texD, TexCU.xy).g;\n"
00382         "               v6.x = texRECT(texD, TexLD.xy).g;\n"
00383         "               v6.y = texRECT(texD, TexLU.xy).g;\n"
00384         "               v6.z = texRECT(texD, TexRD.xy).g;\n"
00385         "               v6.w = texRECT(texD, TexRU.xy).g;\n"
00386         "               if(cc.g < v3.y || any(cc.gggg<v5.xyzw || cc.gggg<v6.xyzw))return; \n"
00387         "               dog = 1.0; \n"
00388         "       }\n"
00389         //the minimum case
00390         <<
00391         "  else if(cc.g < -THRESHOLD0 && all(cc.gggg < min(v1, v2)))\n"
00392         "  {\n"
00393         "               v3.x = texRECT(texU, TexCC.xy).g;\n"
00394         "               v4.x = texRECT(texU, TexLC.xy).g;\n"
00395         "               v4.y = texRECT(texU, TexRC.xy).g;\n"
00396         "               v4.z = texRECT(texU, TexCD.xy).g;\n"
00397         "               v4.w = texRECT(texU, TexCU.xy).g;\n"
00398         "               v6.x = texRECT(texU, TexLD.xy).g;\n"
00399         "               v6.y = texRECT(texU, TexLU.xy).g;\n"
00400         "               v6.z = texRECT(texU, TexRD.xy).g;\n"
00401         "               v6.w = texRECT(texU, TexRU.xy).g;\n"
00402         "               if(cc.g > v3.x || any(cc.gggg>v4.xyzw || cc.gggg>v6.xyzw))return; \n"
00403         "               v3.y = texRECT(texD, TexCC.xy).g;\n"
00404         "               v5.x = texRECT(texD, TexLC.xy).g;\n"
00405         "               v5.y = texRECT(texD, TexRC.xy).g;\n"
00406         "               v5.z = texRECT(texD, TexCD.xy).g;\n"
00407         "               v5.w = texRECT(texD, TexCU.xy).g;\n"
00408         "               v6.x = texRECT(texD, TexLD.xy).g;\n"
00409         "               v6.y = texRECT(texD, TexLU.xy).g;\n"
00410         "               v6.z = texRECT(texD, TexRD.xy).g;\n"
00411         "               v6.w = texRECT(texD, TexRU.xy).g;\n"
00412         "               if(cc.g > v3.y || any(cc.gggg>v5.xyzw || cc.gggg>v6.xyzw))return; \n"
00413         "               dog = 0.5 ; \n"
00414         "       }\n"
00415         "       else\n"
00416         "               return;\n"
00417         <<
00418         "  int i = 0; \n"
00419         "  float2 offset = float2(0, 0);\n"
00420         "  float2 offsets = float2(0, 0);\n"
00421         "  float3 dxys;         bool key_moved; \n"
00422         "  float fx, fy, fs; \n"
00423         "  float fxx, fyy, fxy; \n"
00424         "  float fxs, fys, fss; \n"
00425         "  do\n"
00426         "  {\n"
00427         "       dxys = float3(0, 0, 0);\n"
00428         "       offset = float2(0, 0);\n"
00429         "       float4 D2 = v1.xyzw - cc.gggg;\n"
00430         "       fxx = D2.x + D2.y;\n"
00431         "       fyy = D2.z + D2.w;\n"
00432         "       float2 D4 = v2.xw - v2.yz;\n"
00433         "       fxy = 0.25*(D4.x + D4.y);\n"
00434         "       float2 D5 = 0.5*(v1.yw-v1.xz); \n"
00435         "       fx = D5.x;\n"
00436         "       fy = D5.y ; \n"
00437         "       fs = 0.5*( v3.x - v3.y ); \n"
00438         "       fss = v3.x + v3.y - cc.g - cc.g;\n"
00439         "       fxs = 0.25 * ( v4.y + v5.x - v4.x - v5.y);\n"
00440         "       fys = 0.25 * ( v4.w + v5.z - v4.z - v5.w);\n"
00441         "       float4 A0, A1, A2 ;                     \n"
00442         "       A0 = float4(fxx, fxy, fxs, -fx);        \n"
00443         "       A1 = float4(fxy, fyy, fys, -fy);        \n"
00444         "       A2 = float4(fxs, fys, fss, -fs);        \n"
00445         "       float3 x3 = abs(float3(fxx, fxy, fxs));         \n"
00446         "       float maxa = max(max(x3.x, x3.y), x3.z);        \n"
00447         "       if(maxa > 1e-10 )                                                       \n"
00448         "       {\n"
00449         "               if(x3.y ==maxa )                                                        \n"
00450         "               {                                                                                       \n"
00451         "                       float4 TEMP = A1; A1 = A0; A0 = TEMP;   \n"
00452         "               }else if( x3.z == maxa )                                        \n"
00453         "               {                                                                                       \n"
00454         "                       float4 TEMP = A2; A2 = A0; A0 = TEMP;   \n"
00455         "               }                                                                                       \n"
00456         "               A0 /= A0.x;                                                                     \n"
00457         "               A1 -= A1.x * A0;                                                        \n"
00458         "               A2 -= A2.x * A0;                                                        \n"
00459         "               float2 x2 = abs(float2(A1.y, A2.y));            \n"
00460         "               if( x2.y > x2.x )                                                       \n"
00461         "               {                                                                                       \n"
00462         "                       float3 TEMP = A2.yzw;                                   \n"
00463         "                       A2.yzw = A1.yzw;                                                \n"
00464         "                       A1.yzw = TEMP;                                                  \n"
00465         "                       x2.x = x2.y;                                                    \n"
00466         "               }                                                                                       \n"
00467         "               if(x2.x > 1e-10)                                                        \n"
00468         "               {\n"
00469         "                       A1.yzw /= A1.y;                                                 \n"
00470         "                       A2.yzw -= A2.y * A1.yzw;                                \n"
00471         "                       if(abs(A2.z) > 1e-10)                                   \n"
00472         "                       {\n"
00473         // compute dx, dy, ds: 
00474         <<
00475         "                               dxys.z = A2.w /A2.z;                                            \n"
00476         "                               dxys.y = A1.w - dxys.z*A1.z;                        \n"
00477         "                               dxys.x = A0.w - dxys.z*A0.z - dxys.y*A0.y;      \n"
00478         "                       }\n"
00479         "               }\n"
00480         "       }\n"
00481         "       offset.x = dxys.x > 0.6 ? 1 : 0 + dxys.x < -0.6 ? -1 : 0;\n"
00482         "       offset.y = dxys.y > 0.6 ? 1 : 0 + dxys.y < - 0.6? -1 : 0;\n"
00483         "       i++; key_moved = i < MAX_REFINE && any(abs(offset)>0) ; \n"
00484         "       if(key_moved)\n"
00485         "       {\n"
00486         "               offsets += offset; \n"
00487         "           cc  =  texRECT(tex, TexCC.xy  + offsets);\n"
00488         "               v1.x = texRECT(tex , TexLC.xy + offsets).g;\n"
00489         "               v1.y = texRECT(tex , TexRC.xy + offsets).g;\n"
00490         "               v1.z = texRECT(tex , TexCD.xy + offsets).g;\n"
00491         "               v1.w = texRECT(tex , TexCU.xy + offsets).g;\n"
00492         "               v2.x = texRECT(tex , TexLD.xy + offsets).g;\n"
00493         "               v2.y = texRECT(tex , TexLU.xy + offsets).g;\n"
00494         "               v2.z = texRECT(tex , TexRD.xy + offsets).g;\n"
00495         "               v2.w = texRECT(tex , TexRU.xy + offsets).g;\n"
00496         "               v3.x = texRECT(texU, TexCC.xy + offsets).g;\n"
00497         "               v4.x = texRECT(texU, TexLC.xy + offsets).g;\n"
00498         "               v4.y = texRECT(texU, TexRC.xy + offsets).g;\n"
00499         "               v4.z = texRECT(texU, TexCD.xy + offsets).g;\n"
00500         "               v4.w = texRECT(texU, TexCU.xy + offsets).g;\n"
00501         "               v3.y = texRECT(texD, TexCC.xy + offsets).g;\n"
00502         "               v5.x = texRECT(texD, TexLC.xy + offsets).g;\n"
00503         "               v5.y = texRECT(texD, TexRC.xy + offsets).g;\n"
00504         "               v5.z = texRECT(texD, TexCD.xy + offsets).g;\n"
00505         "               v5.w = texRECT(texD, TexCU.xy + offsets).g;\n"
00506         "       }\n"
00507         "  }while(key_moved);\n"
00508           <<
00509         "  bool test1 = (abs(cc.g + 0.5*dot(float3(fx, fy, fs), dxys ))> THRESHOLD1) ;\n"
00510         "  float test2_v1= fxx*fyy - fxy *fxy; \n"
00511         "  float test2_v2 = (fxx+fyy); \n"
00512         "  test2_v2 = test2_v2*test2_v2;\n"
00513         "  bool test2 = test2_v1>0 && test2_v2 < THRESHOLD2 * test2_v1; \n "
00514     //keep the point when the offset is less than 1
00515         <<
00516         "  FragData1 = test1 && test2 && all( abs(dxys) < 1)? float4( dog, dxys.xy+offsets, dxys.z) : float4(0, 0, 0, 0); \n"
00517         "}\n"   
00518         <<'\0';
00519 
00520         ProgramCG * program; 
00521         s_keypoint = program = new ProgramCG(buffer);
00522         //parameter
00523         _param_dog_texu = cgGetNamedParameter(*program, "texU");
00524         _param_dog_texd = cgGetNamedParameter(*program, "texD");
00525 
00526         return 1;
00527 
00528 }
00529 
00530 //keypoint detection shader
00531 //1. compare with 26 neighbours
00532 //2. sub-pixel sub-scale localization
00533 //3. output: [dog, offset(x,y,s)]
00534 
00535 void ShaderBagCG:: LoadKeypointShader(float threshold, float edge_threshold)
00536 {
00537         char buffer[10240];
00538         float threshold0 = threshold* (GlobalUtil::_SubpixelLocalization?0.8f:1.0f);
00539         float threshold1 = threshold;
00540         float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold;
00541         ostrstream out(buffer, 10240);
00542         out<<setprecision(8);
00543         streampos pos;
00544         //tex(X)(Y)
00545         //X: (CLR) (CENTER 0, LEFT -1, RIGHT +1)  
00546         //Y: (CDU) (CENTER 0, DOWN -1, UP    +1) 
00547 
00548         out <<  "#define THRESHOLD0 " << threshold0 << "\n"
00549                         "#define THRESHOLD1 " << threshold1 << "\n"
00550                         "#define THRESHOLD2 " << threshold2 << "\n";
00551         out<<
00552         "void main (\n"
00553         "float4 TexCC : TEXCOORD0, float4 TexLC : TEXCOORD1,\n"
00554         "float4 TexRC : TEXCOORD2, float4 TexCD : TEXCOORD3, \n"
00555         "float4 TexCU : TEXCOORD4, float4 TexLD : TEXCOORD5, \n"
00556         "float4 TexLU : TEXCOORD6, float4 TexRD : TEXCOORD7,\n"
00557         "out float4 FragData0 : COLOR0, out float4 FragData1 : COLOR1, \n"
00558         "uniform samplerRECT tex, uniform samplerRECT texU, uniform samplerRECT texD)\n"
00559         "{\n"
00560         "       float4 v1, v2, gg;\n"
00561         "       float2 TexRU = float2(TexRC.x, TexCU.y); \n"
00562         "       float4 cc  = texRECT(tex, TexCC.xy);\n"
00563         "       v1.x = texRECT(tex, TexLC.xy).g;\n"
00564         "       gg.x = texRECT(tex, TexLC.xy).r;\n"
00565         "       v1.y = texRECT(tex, TexRC.xy).g;\n"
00566         "       gg.y = texRECT(tex, TexRC.xy).r;\n"
00567         "       v1.z = texRECT(tex, TexCD.xy).g;\n"
00568         "       gg.z = texRECT(tex, TexCD.xy).r;\n"
00569         "       v1.w = texRECT(tex, TexCU.xy).g;\n"
00570         "       gg.w = texRECT(tex, TexCU.xy).r;\n"
00571         "       v2.x = texRECT(tex, TexLD.xy).g;\n"
00572         "       v2.y = texRECT(tex, TexLU.xy).g;\n"
00573         "       v2.z = texRECT(tex, TexRD.xy).g;\n"
00574         "       v2.w = texRECT(tex, TexRU.xy).g;\n"
00575         "       float2 dxdy = (gg.yw - gg.xz); \n"
00576         "       float grad = 0.5*length(dxdy);\n"
00577         "       float theta = grad==0? 0: atan2(dxdy.y, dxdy.x);\n"
00578         "       FragData0 = float4(cc.rg, grad, theta);\n"
00579 
00580         //test against 8 neighbours
00581         //use variable to identify type of extremum
00582         //1.0 for local maximum and 0.5 for minimum
00583         <<
00584         "       float dog = 0.0; \n"
00585         "       FragData1 = float4(0, 0, 0, 0); \n"
00586         "       dog = cc.g > THRESHOLD0 && all(cc.gggg > max(v1, v2))?1.0: 0.0;\n"
00587         "       dog = cc.g < -THRESHOLD0 && all(cc.gggg < min(v1, v2))?0.5: dog;\n";
00588 
00589         pos = out.tellp();
00590         //do edge supression first.. 
00591         //vector v1 is < (-1, 0), (1, 0), (0,-1), (0, 1)>
00592         //vector v2 is < (-1,-1), (-1,1), (1,-1), (1, 1)>
00593 
00594         out<<
00595         "       if(dog == 0.0) return;\n"
00596         "       float fxx, fyy, fxy; \n"
00597         "       float4 D2 = v1.xyzw - cc.gggg;\n"
00598         "       float2 D4 = v2.xw - v2.yz;\n"
00599         "       fxx = D2.x + D2.y;\n"
00600         "       fyy = D2.z + D2.w;\n"
00601         "       fxy = 0.25*(D4.x + D4.y);\n"
00602         "       float fxx_plus_fyy = fxx + fyy;\n"
00603         "       float score_up = fxx_plus_fyy*fxx_plus_fyy; \n"
00604         "       float score_down = (fxx*fyy - fxy*fxy);\n"
00605         "       if( score_down <= 0 || score_up > THRESHOLD2 * score_down)return;\n"
00606         //...
00607         <<
00608         "       float2 D5 = 0.5*(v1.yw-v1.xz); \n"
00609         "       float fx = D5.x, fy = D5.y ; \n"
00610         "       float fs, fss , fxs, fys ; \n"
00611         "       float2 v3; float4 v4, v5, v6;\n"
00612         //read 9 pixels of upper level
00613         <<
00614         "       v3.x = texRECT(texU, TexCC.xy).g;\n"
00615         "       v4.x = texRECT(texU, TexLC.xy).g;\n"
00616         "       v4.y = texRECT(texU, TexRC.xy).g;\n"
00617         "       v4.z = texRECT(texU, TexCD.xy).g;\n"
00618         "       v4.w = texRECT(texU, TexCU.xy).g;\n"
00619         "       v6.x = texRECT(texU, TexLD.xy).g;\n"
00620         "       v6.y = texRECT(texU, TexLU.xy).g;\n"
00621         "       v6.z = texRECT(texU, TexRD.xy).g;\n"
00622         "       v6.w = texRECT(texU, TexRU.xy).g;\n"
00623         //compare with 9 pixels of upper level
00624         //read and compare with 9 pixels of lower level
00625         //the maximum case
00626         <<
00627         "       if(dog == 1.0)\n"
00628         "       {\n"
00629         "               bool4 test = cc.gggg < max(v4, v6); \n"
00630         "               if(cc.g < v3.x || any(test.xy||test.zw))return; \n"
00631         "               v3.y = texRECT(texD, TexCC.xy).g;\n"
00632         "               v5.x = texRECT(texD, TexLC.xy).g;\n"
00633         "               v5.y = texRECT(texD, TexRC.xy).g;\n"
00634         "               v5.z = texRECT(texD, TexCD.xy).g;\n"
00635         "               v5.w = texRECT(texD, TexCU.xy).g;\n"
00636         "               v6.x = texRECT(texD, TexLD.xy).g;\n"
00637         "               v6.y = texRECT(texD, TexLU.xy).g;\n"
00638         "               v6.z = texRECT(texD, TexRD.xy).g;\n"
00639         "               v6.w = texRECT(texD, TexRU.xy).g;\n"
00640         "               test = cc.gggg<max(v5, v6); \n"
00641         "               if(cc.g < v3.y || any(test.xy||test.zw))return; \n"
00642         "       }\n"
00643         //the minimum case
00644         <<
00645         "       else{\n"
00646         "               bool4 test = cc.gggg>min(v4, v6); \n"
00647         "               if(cc.g > v3.x || any(test.xy||test.zw))return; \n"
00648         "               v3.y = texRECT(texD, TexCC.xy).g;\n"
00649         "               v5.x = texRECT(texD, TexLC.xy).g;\n"
00650         "               v5.y = texRECT(texD, TexRC.xy).g;\n"
00651         "               v5.z = texRECT(texD, TexCD.xy).g;\n"
00652         "               v5.w = texRECT(texD, TexCU.xy).g;\n"
00653         "               v6.x = texRECT(texD, TexLD.xy).g;\n"
00654         "               v6.y = texRECT(texD, TexLU.xy).g;\n"
00655         "               v6.z = texRECT(texD, TexRD.xy).g;\n"
00656         "               v6.w = texRECT(texD, TexRU.xy).g;\n"
00657         "               test = cc.gggg>min(v5, v6); \n"
00658         "               if(cc.g > v3.y || any(test.xy||test.zw))return; \n"
00659         "       }\n";
00660 
00661         if(GlobalUtil::_SubpixelLocalization)
00662 
00663         // sub-pixel localization FragData1 = float4(dog, 0, 0, 0); return;
00664         out <<
00665         "       fs = 0.5*( v3.x - v3.y ); //bug fix 9/12/2007 \n"
00666         "       fss = v3.x + v3.y - cc.g - cc.g;\n"
00667         "       fxs = 0.25 * ( v4.y + v5.x - v4.x - v5.y);\n"
00668         "       fys = 0.25 * ( v4.w + v5.z - v4.z - v5.w);\n"
00669         
00671         // let dog difference be quatratic function  of dx, dy, ds; 
00672         // df(dx, dy, ds) = fx * dx + fy*dy + fs * ds + 
00673         //                                + 0.5 * ( fxx * dx * dx + fyy * dy * dy + fss * ds * ds)
00674         //                                + (fxy * dx * dy + fxs * dx * ds + fys * dy * ds)
00675         // (fx, fy, fs, fxx, fyy, fss, fxy, fxs, fys are the derivatives)
00676         
00677         //the local extremum satisfies
00678         // df/dx = 0, df/dy = 0, df/dz = 0
00679         
00680         //that is 
00681         // |-fx|     | fxx fxy fxs |   |dx|
00682         // |-fy|  =  | fxy fyy fys | * |dy|
00683         // |-fs|     | fxs fys fss |   |ds|
00684         // need to solve dx, dy, ds
00685 
00686         // Use Gauss elimination to solve the linear system
00687     <<
00688         "       float3 dxys = float3(0.0);                      \n"
00689         "       float4 A0, A1, A2 ;                     \n"
00690         "       A0 = float4(fxx, fxy, fxs, -fx);        \n"
00691         "       A1 = float4(fxy, fyy, fys, -fy);        \n"
00692         "       A2 = float4(fxs, fys, fss, -fs);        \n"
00693         "       float3 x3 = abs(float3(fxx, fxy, fxs));         \n"
00694         "       float maxa = max(max(x3.x, x3.y), x3.z);        \n"
00695         "       if(maxa >= 1e-10 ) {                                            \n"
00696         "       if(x3.y ==maxa )                                                        \n"
00697         "       {                                                                                       \n"
00698         "               float4 TEMP = A1; A1 = A0; A0 = TEMP;   \n"
00699         "       }else if( x3.z == maxa )                                        \n"
00700         "       {                                                                                       \n"
00701         "               float4 TEMP = A2; A2 = A0; A0 = TEMP;   \n"
00702         "       }                                                                                       \n"
00703         "       A0 /= A0.x;                                                                     \n"
00704         "       A1 -= A1.x * A0;                                                        \n"
00705         "       A2 -= A2.x * A0;                                                        \n"
00706         "       float2 x2 = abs(float2(A1.y, A2.y));            \n"
00707         "       if( x2.y > x2.x )                                                       \n"
00708         "       {                                                                                       \n"
00709         "               float3 TEMP = A2.yzw;                                   \n"
00710         "               A2.yzw = A1.yzw;                                                \n"
00711         "               A1.yzw = TEMP;                                                  \n"
00712         "               x2.x = x2.y;                                                    \n"
00713         "       }                                                                                       \n"
00714         "       if(x2.x >= 1e-10) {                                             \n"
00715         "       A1.yzw /= A1.y;                                                         \n"
00716         "       A2.yzw -= A2.y * A1.yzw;                                        \n"
00717         "       if(abs(A2.z) >= 1e-10) {                                \n"
00718         // compute dx, dy, ds: 
00719         <<
00720         "       dxys.z = A2.w /A2.z;                                \n"
00721         "       dxys.y = A1.w - dxys.z*A1.z;                        \n"
00722         "       dxys.x = A0.w - dxys.z*A0.z - dxys.y*A0.y;      \n"
00723 
00724         //one more threshold which I forgot in  versions prior to 286
00725         <<
00726         "       bool bugfix_test = (abs(cc.g + 0.5*dot(float3(fx, fy, fs), dxys )) < THRESHOLD1) ;\n"
00727         "       if(bugfix_test || any(abs(dxys) >= 1.0)) dog = 0; \n"
00728         "       }}}\n"
00729     //keep the point when the offset is less than 1
00730         <<
00731         "       FragData1 = float4( dog, dxys); \n"
00732         "}\n"   <<'\0';
00733 
00734         else            out<<
00735         "       FragData1 =  float4( dog, 0, 0, 0) ;    \n"
00736         "}\n"   <<'\0';
00737 
00738         ProgramCG * program; 
00739         s_keypoint = program = new ProgramCG(buffer);
00740         if(!program->IsValidProgram())
00741         {
00742                 delete program;
00743                 out.seekp(pos);
00744                 out << 
00745         "       FragData1 =  float4( fabs(cc.g) > 2.0 * THRESHOLD0? dog : 0, 0, 0, 0) ; \n"
00746         "}\n" <<'\0';
00747                 s_keypoint = program = new ProgramCG(buffer);
00748                 GlobalUtil::_SubpixelLocalization = 0;
00749                 std::cerr<<"Detection simplified on this hardware"<<endl;
00750         }
00751         //parameter
00752         _param_dog_texu = cgGetNamedParameter(*program, "texU");
00753         _param_dog_texd = cgGetNamedParameter(*program, "texD");
00754 
00755 
00756 
00757 
00758 }
00759 
00760 
00761 void ShaderBagCG::SetDogTexParam(int texU, int texD)
00762 {
00763         cgGLSetTextureParameter(_param_dog_texu, texU);
00764         cgGLEnableTextureParameter(_param_dog_texu);
00765         cgGLSetTextureParameter(_param_dog_texd, texD);
00766         cgGLEnableTextureParameter(_param_dog_texd);
00767 }
00768 
00769 void ShaderBagCG::SetGenListStepParam(int tex, int tex0)
00770 {
00771         cgGLSetTextureParameter(_param_genlist_step_tex, tex);
00772         cgGLEnableTextureParameter(_param_genlist_step_tex);
00773         cgGLSetTextureParameter(_param_genlist_step_tex0, tex0);
00774         cgGLEnableTextureParameter(_param_genlist_step_tex0);
00775 }
00776 
00777 void ShaderBagCG::SetGenVBOParam(float width, float fwidth, float size)
00778 {
00779         float sizes[4] = {size*3.0f, fwidth, width, 1.0f/width};
00780         cgGLSetParameter4fv(_param_genvbo_size, sizes);
00781 }
00782 
00783 
00784 ProgramGPU* FilterGLCG::CreateFilterH(float kernel[], float offset[], int width)
00785 {
00786 
00787 
00788         char buffer[10240];
00789         ostrstream out(buffer, 10240);
00790 
00791         out<<setprecision(8);
00792 
00793         if(GlobalUtil::_BetaFilter)
00794         {
00795                 out<< "void main(uniform samplerRECT tex,";
00796                 out<<"\n\tin float4 TexCoord0: TEXCOORD0,";
00797                 out<<"\n\tout float4 FragColor : COLOR0 )";
00798                 out<<"\n{\n\tfloat4 intensity4 = float4(0, 0, 0, 0), data;\n";
00799                 out<<"float or = texRECT(tex, TexCoord0.xy).r, intensity;\n";
00800 
00801                 for(int i = 0; i< width; i+=4)
00802                 {
00803                         out <<"data = float4(";
00804                         for(int j = i; j < i + 4; j++)
00805                         {
00806                                 if(j != i) out <<", \n";
00807                                 if(j >= width)
00808                                 {
00809                                         out<<"0";
00810                                 }else if(offset[j]==0.0)
00811                                 {
00812                                         out<<"or";
00813                                 }else
00814                                 {
00815                                         out<<"texRECT(tex, TexCoord0.xy + float2(float("<<offset[j] <<") , 0)).r";
00816                                 }
00817                         }
00818                         out << ");\n";
00819                         out << "intensity4 += data * float4(";
00820                         for(int k = i; k < i + 4; k++)
00821                         {
00822                                 if(k != i) out <<", ";
00823                                 if(k >= width)  out<<"0";
00824                                 else                    out<<kernel[k];
00825                         }
00826                         out << ");\n";
00827 
00828                 }
00829                 out << "intensity4.xy += intensity4.zw;\n";
00830                 out << "intensity = intensity4.x + intensity4.y;\n";
00831         }else
00832         {
00833                 out<< "void main(uniform samplerRECT tex,";
00834                 out<<"\n\tin float4 TexCoord0: TEXCOORD0,";
00835                 out<<"\n\tout float4 FragColor : COLOR0 )";
00836                 out<<"\n{\n\tfloat intensity = 0.0 ;  float2 pos;\n";
00837 
00838                 for(int i = 0; i< width; i++)
00839                 {
00840                         if(offset[i]==0.0)
00841                         {
00842                                 out<<"float or = texRECT(tex, TexCoord0.xy).r;\n";
00843                                 out<<"intensity+= or * "<<kernel[i]<<";\n";
00844 
00845                         }else
00846                         {
00847                                 out<<"pos = TexCoord0.xy + float2(float("<<offset[i] <<") , 0);\n";
00848                                 out<<"intensity+= "<<kernel[i]<<"*texRECT(tex, pos).r;\n";
00849                         }
00850                 }
00851         }
00852         //copy original data to red channel
00853         out<<"FragColor.r = or;\n"; 
00854         out<<"FragColor.b  = intensity;}\n"<<'\0';
00855 
00856         return new ProgramCG( buffer);
00857 }
00858 
00859 
00860 ProgramGPU* FilterGLCG::CreateFilterV(float kernel[], float offset[], int height)
00861 {
00862         char buffer[10240];
00863         ostrstream out(buffer, 10240);
00864         out<<setprecision(8);
00865 
00866         if(GlobalUtil::_BetaFilter)
00867         {
00868                 out<< "void main(uniform samplerRECT tex,";
00869                 out<<"\n\tin float4 TexCoord0: TEXCOORD0,";
00870                 out<<"\n\tout float4 FragColor : COLOR0 )";
00871                 out<<"\n{\n\tfloat4 intensity4 = float4(0, 0, 0, 0), data;\n";
00872                 out<<"float2 orb = texRECT(tex, TexCoord0.xy).rb; float intensity;\n";
00873 
00874                 for(int i = 0; i< height; i+=4)
00875                 {
00876                         out <<"data = float4(";
00877                         for(int j = i; j < i + 4; j++)
00878                         {
00879                                 if(j != i) out <<", \n";
00880                                 if(j >= height)
00881                                 {
00882                                         out<<"0";
00883                                 }else if(offset[j]==0.0)
00884                                 {
00885                                         out<<"orb.y";
00886                                 }else
00887                                 {
00888                                         out<<"texRECT(tex, TexCoord0.xy + float2(0, float("<<offset[j] <<"))).b";
00889                                 }
00890                         }
00891                         out << ");\n";
00892                         out << "intensity4 += data * float4(";
00893                         for(int k = i; k < i + 4; k++)
00894                         {
00895                                 if(k != i) out <<", ";
00896                                 if(k >= height) out<<"0";
00897                                 else                    out<<kernel[k];
00898                         }
00899                         out << ");\n";
00900 
00901                 }
00902                 out << "intensity4.xy += intensity4.zw;\n";
00903                 out << "intensity = intensity4.x + intensity4.y;\n";
00904         }else
00905         {
00906                 out<< "void main(uniform samplerRECT tex,";
00907                 out<<"\n\tin float4 TexCoord0: TEXCOORD0,";
00908                 out<<"\n\tout float4 FragColor : COLOR0 )";
00909                 out<<"\n{\n\tfloat intensity = 0.0 ;  float2 pos;\n";
00910 
00911                 for(int i = 0; i< height; i++)
00912                 {
00913                         if(offset[i]==0.0)
00914                         {
00915                                 out<<"float2 orb = texRECT(tex, TexCoord0.xy).rb;\n";
00916                                 out<<"intensity+= orb.y * "<<kernel[i]<<";\n";
00917 
00918                         }else
00919                         {
00920                                 out<<"pos = TexCoord0.xy + float2(0, float("<<offset[i] <<"));\n";
00921                                 out<<"intensity+= "<<kernel[i]<<"*texRECT(tex, pos).b;\n";
00922                         }
00923                 }
00924         }
00925         out<<"FragColor.b = orb.y;\n";
00926         out<<"FragColor.g = intensity - orb.x;\n"; // difference of gaussian..
00927         out<<"FragColor.r = intensity;}\n"<<'\0';
00928         
00929         return new ProgramCG( buffer);
00930 }
00931 
00932 
00933 ProgramGPU* FilterGLCG::CreateFilterHPK(float kernel[], float offset[], int width)
00934 {
00935         //both h and v are packed...
00936         int i, j , xw, xwn;
00937         int halfwidth  = width >>1;
00938         float * pf = kernel + halfwidth;
00939         int nhpixel = (halfwidth+1)>>1; //how many neighbour pixels need to be looked up
00940         int npixel  = (nhpixel<<1)+1;//
00941         char buffer[10240];
00942         float weight[3];
00943         ostrstream out(buffer, 10240);
00944         out<<setprecision(8);
00945 
00946         out<< "void main(uniform samplerRECT tex, float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0 ){\n";
00947         out<< "float4 result = float4(0, 0, 0, 0); \nfloat4 pc; float2 coord; \n";
00949         for( i = 0 ; i < npixel ; i++)
00950         {
00951 
00952                 out<<"coord = TexCoord0.xy + float2(float("<<i-nhpixel<<"),0);\n";
00953                 out<<"pc=texRECT(tex, coord);\n";
00954                 if(GlobalUtil::_PreciseBorder)          out<<"if(coord.x < 0) pc = pc.rrbb;\n";
00955 
00956                 //for each sub-pixel j  in center, the weight of sub-pixel k 
00957                 xw = (i - nhpixel)*2;
00958                 for( j = 0; j < 3; j++)
00959                 {
00960                         xwn = xw  + j  -1;
00961                         weight[j] = xwn < -halfwidth || xwn > halfwidth? 0 : pf[xwn];
00962                 }
00963                 //if(weight[1]!=0.0)    out<<"FragColor += "<<weight[1]<<"*pc;\n";
00964                 //out<<"FragColor += float4("<<weight[2]<<","<<weight[0]<<","<<weight[2]<<","<<weight[0]<<")*pc.grab;\n";
00965 
00966                 if(weight[1] == 0.0)
00967                 {
00968                         out<<"result += float4("<<weight[2]<<","<<weight[0]<<","<<weight[2]<<","<<weight[0]<<")*pc.grab;\n";
00969                 }
00970                 else
00971                 {
00972                         out<<"result += float4("<<weight[1]<<", "<<weight[0]<<", "<<weight[1]<<", "<<weight[0]<<")*pc.rrbb;\n";
00973                         out<<"result += float4("<<weight[2]<<", "<<weight[1]<<", "<<weight[2]<<", "<<weight[1]<<")*pc.ggaa;\n";
00974                 }
00975 
00976         }
00977         out<<
00978         "        FragColor = result; }\n"<<'\0';
00979         return new ProgramCG( buffer);
00980 }
00981 
00982 ProgramGPU* FilterGLCG::CreateFilterVPK(float kernel[], float offset[], int height)
00983 {
00984 
00985         //both h and v are packed...
00986         int i, j , yw, ywn;
00987         int halfh  = height >>1;
00988         float * pf = kernel + halfh;
00989         int nhpixel = (halfh+1)>>1;     //how many neighbour pixels need to be looked up
00990         int npixel  = (nhpixel<<1)+1;//
00991         char buffer[10240];
00992         float weight[3];
00993         ostrstream out(buffer, 10240);
00994         out<<setprecision(8);
00995 
00996         out<< "void main(uniform samplerRECT tex, float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0 ){\n";
00997         out<< "float4 result = float4(0, 0, 0, 0);\nfloat4 pc; float2 coord;\n";
00999 
01000         for( i = 0 ; i < npixel ; i++)
01001         {
01002 
01003                 out<<"coord = TexCoord0.xy + float2(0, float("<<i-nhpixel<<"));\n";
01004                 out<<"pc=texRECT(tex, coord);\n";
01005                 if(GlobalUtil::_PreciseBorder)  out<<"if(coord.y < 0) pc = pc.rgrg;\n";
01006                 //for each sub-pixel j  in center, the weight of sub-pixel k 
01007                 yw = (i - nhpixel)*2;
01008                 for( j = 0; j < 3; j++)
01009                 {
01010                         ywn = yw + j  -1;
01011                         weight[j] = ywn < -halfh || ywn > halfh? 0 : pf[ywn];
01012                 }
01013                 //if(weight[1]!=0.0)    out<<"FragColor += "<<weight[1]<<"*pc;\n";
01014                 //out<<"FragColor += float4("<<weight[2]<<","<<weight[2]<<","<<weight[0]<<","<<weight[0]<<")*pc.barg;\n";
01015                 if(weight[1] == 0.0)
01016                 {
01017                         out<<"result += float4("<<weight[2]<<","<<weight[2]<<","<<weight[0]<<","<<weight[0]<<")*pc.barg;\n";
01018                 }else
01019                 {
01020                         out<<"result += float4("<<weight[1]<<","<<weight[1]<<","<<weight[0]<<","<<weight[0]<<")*pc.rgrg;\n";
01021                         out<<"result += float4("<<weight[2]<<","<<weight[2]<<","<<weight[1]<<","<<weight[1]<<")*pc.baba;\n";
01022                 }
01023         }
01024         out<<
01025         "        FragColor = result; }\n"<<'\0';
01026         return new ProgramCG( buffer);
01027 }
01028 
01029 
01030 void ShaderBagCG::LoadGenListShader(int ndoglev, int nlev)
01031 {
01032         ProgramCG * program;
01033 
01034         s_genlist_init_tight = new ProgramCG(
01035         "void main (\n"
01036         "uniform samplerRECT tex, in float4 TexCoord0 : TEXCOORD0,\n"
01037         "in float4 TexCoord1 : TEXCOORD1, in float4 TexCoord2 : TEXCOORD2, in float4 TexCoord3 : TEXCOORD3,\n"
01038         "out float4 FragColor : COLOR0){\n"
01039         "float4 helper = float4( texRECT(tex, TexCoord0.xy).r,  texRECT(tex, TexCoord1.xy).r,\n"
01040         "texRECT(tex, TexCoord2.xy).r, texRECT(tex, TexCoord3.xy).r);\n"
01041         "FragColor = float4(helper>0.0);\n"
01042         "}");
01043 
01044         s_genlist_init_ex = program = new ProgramCG(
01045         "void main (uniform float2 bbox, \n"
01046         "uniform samplerRECT tex, \n"
01047         "in float4 TexCoord0 : TEXCOORD0,\n"
01048         "in float4 TexCoord1 : TEXCOORD1, \n"
01049         "in float4 TexCoord2 : TEXCOORD2, \n"
01050         "in float4 TexCoord3 : TEXCOORD3,\n"
01051         "out float4 FragColor : COLOR0){\n"
01052         "float4 helper = float4( \n"
01053         "texRECT(tex, TexCoord0.xy).r, texRECT(tex, TexCoord1.xy).r,\n"
01054         "texRECT(tex, TexCoord2.xy).r, texRECT(tex, TexCoord3.xy).r);\n"
01055         "bool4 helper4 = bool4(TexCoord0.xy < bbox, TexCoord3.xy < bbox); \n"
01056         "bool4 helper2 = helper4.xzxz && helper4.yyww; \n"
01057         "FragColor = float4(helper2 && (helper>0.0 ));\n"
01058         "}");
01059         _param_genlist_init_bbox = cgGetNamedParameter( *program, "bbox");
01060 
01061 
01062         //reduction ...
01063         s_genlist_histo = new ProgramCG(
01064         "void main (\n"
01065         "uniform samplerRECT tex, in float2 TexCoord0 : TEXCOORD0,\n"
01066         "in float2 TexCoord1 : TEXCOORD1, in float2 TexCoord2 : TEXCOORD2, in float2 TexCoord3 : TEXCOORD3,\n"
01067         "out float4 FragColor : COLOR0){\n"
01068         "float4 helper; float4 helper2; \n"
01069         "helper = texRECT(tex, TexCoord0); helper2.xy = helper.xy + helper.zw; \n"
01070         "helper = texRECT(tex, TexCoord1); helper2.zw = helper.xy + helper.zw; \n"
01071         "FragColor.rg = helper2.xz + helper2.yw;\n"
01072         "helper = texRECT(tex, TexCoord2); helper2.xy = helper.xy + helper.zw; \n"
01073         "helper = texRECT(tex, TexCoord3); helper2.zw = helper.xy + helper.zw; \n"
01074         "FragColor.ba= helper2.xz+helper2.yw;\n"
01075         "}");
01076 
01077 
01078         //read of the first part, which generates tex coordinates 
01079 
01080         s_genlist_start= program =  LoadGenListStepShader(1, 1);
01081         _param_ftex_width= cgGetNamedParameter(*program, "width");
01082         _param_genlist_start_tex0 = cgGetNamedParameter(*program, "tex0");
01083         //stepping
01084         s_genlist_step = program = LoadGenListStepShader(0, 1);
01085         _param_genlist_step_tex= cgGetNamedParameter(*program, "tex");
01086         _param_genlist_step_tex0= cgGetNamedParameter(*program, "tex0");
01087 
01088 
01089 }
01090 
01091 ProgramCG* ShaderBagCG::LoadGenListStepShader(int start, int step)
01092 {
01093         int i;
01094         char buffer[10240];
01095         //char chanels[5] = "rgba";
01096         ostrstream out(buffer, 10240);
01097         out<<"void main(out float4 FragColor : COLOR0, \n";
01098 
01099         for(i = 0; i < step; i++) out<<"uniform samplerRECT tex"<<i<<",\n";
01100 
01101         if(start)
01102         {
01103                 out<<"uniform float width, \nin float2 tpos : TEXCOORD0){\n";
01104                 out<<"float  index = floor(tpos.y) * width + floor(tpos.x) + 0.0001;\n";
01105                 out<<"float2 pos = float2(0.5, 0.5);\n";
01106         }else
01107         {
01108                 out<<"uniform samplerRECT tex, in float2 tpos: TEXCOORD0 ){\n";
01109                 out<<"float4 tc = texRECT( tex, tpos);\n";
01110                 out<<"float2 pos = tc.rg; float index = tc.b;\n";
01111         }
01112         out<<"float2 sum;       float4 cc;\n";
01113 
01114 
01115 
01116         if(step>0)
01117         {
01118                 out<<"float2 cpos = float2(-0.5, 0.5);\t float2 opos;\n";
01119                 for(i = 0; i < step; i++)
01120                 {
01121 //#define SETP_CODE_2
01122 
01123 #ifndef SETP_CODE_2
01124 /*                      out<<"cc = texRECT(tex"<<i<<", pos);\n";
01125                         out<<"float sum3[3] = {cc.r, cc.r + cc.g, cc.r + cc.g + cc.b};\n";
01126                         out<<"float3 cmp = float3(index > float3(sum3[0], sum3[1], sum3[2]));\n";
01127                         out<<"opos.y = -0.5 + cmp.y; opos.x = -0.5 + cmp.x + (cmp.z - cmp.y);\n";
01128                         out<<"index -= dot(cmp, cc.rgb);\n";
01129                         out<<"pos = (pos + pos + opos);\n";*/
01130 
01131                         out<<"cc = texRECT(tex"<<i<<", pos); sum.x = cc.r + cc.g;\n";
01132                         out<<"if (index < sum.x){ if(index < cc.r) opos = cpos.xx; else {opos = cpos.yx; index -= cc.r;}}\n";
01133                         out<<"else {index -= sum.x; if(index < cc.b) opos = cpos.xy; else{opos = cpos.yy; index -= cc.b;}}";
01134                         out<<"pos = (pos + pos + opos);\n";
01135 
01136 /*                      out<<"cc = texRECT(tex"<<i<<", pos);\n";
01137                         out<<"if (index <cc.r){ opos = cpos.xx;}\n";
01138                         out<<"else{sum.x = cc.r + cc.g;";
01139                                         out<<"if(index < sum.x ) {opos = cpos.yx; index -= cc.r;}\n";
01140                                         out<<"else{sum.y = sum.x + cc.b;";
01141                                                         out<<"if(index < sum.y ) {opos = cpos.xy; index -= sum.x;}\n";
01142                                                         out<<"else {opos = cpos.yy; index -= sum.y;}}}\n";
01143                         out<<"pos = (pos + pos + opos);\n";*/
01144 
01145 #else
01146                         out<<"cc = texRECT(tex"<<i<<", pos);\n";
01147                         out<<"if (index < cc.r) opos = cpos.xx;\n";
01148                         out<<"else if (index < cc.r + cc.g){opos = cpos.yx; index -= cc.r;}\n";
01149                         out<<"else if (index < cc.r + cc.g + cc.b){opos = cpos.xy; index -= (cc.r + cc.g);}\n";
01150                         out<<"else {opos = cpos.yy; index -= (cc.r + cc.g + cc.b);}\n";
01151                         out<<"pos = (pos + pos + opos);\n";
01152 #endif
01153                 }
01154         }
01155         out<<"FragColor = float4(pos, index, 1);\n";
01156         out<<"}\n"<<'\0';
01157         return new ProgramCG(buffer);
01158 }
01159 
01160 void ShaderBagCG::SetGenListInitParam(int w, int h)
01161 {
01162         float bbox[2] = {w -1.0f, h - 1.0f};
01163         cgGLSetParameter2fv(_param_genlist_init_bbox, bbox);
01164 }
01165 
01166 void ShaderBagCG::SetGenListStartParam(float width, int tex0)
01167 {
01168         cgGLSetParameter1f(_param_ftex_width, width);
01169 
01170         if(_param_genlist_start_tex0)
01171         {
01172                 cgGLSetTextureParameter(_param_genlist_start_tex0, tex0);
01173                 cgGLEnableTextureParameter(_param_genlist_start_tex0);
01174         }
01175 }
01176 
01177 void ShaderBagCG::LoadDescriptorShaderF2()
01178 {
01179         //one shader outpout 128/8 = 16 , each fragout encodes 4
01180         //const double twopi = 2.0*3.14159265358979323846;
01181         //const double rpi  = 8.0/twopi;
01182         char buffer[10240];
01183         ostrstream out(buffer, 10240);
01184 
01185         out<<setprecision(8);
01186 
01187         out<<"\n"
01188         "#define M_PI 3.14159265358979323846\n"
01189         "#define TWO_PI (2.0*M_PI)\n"
01190         "#define RPI 1.2732395447351626861510701069801\n"
01191         "#define WF size.z\n"
01192         "void main(uniform samplerRECT tex,             \n"
01193         "uniform        samplerRECT gradTex,                    \n"
01194         "uniform float4         dsize,                          \n"
01195         "uniform float3         size,                           \n"
01196         "in             float2  TexCoord0 : TEXCOORD0,  \n"
01197         "out            float4  FragData0:COLOR0,               \n"
01198         "out            float4  FragData1:COLOR1)               \n"
01199         "{\n"
01200         "       float2 dim      = size.xy;      //image size                    \n"
01201         "       float index = dsize.x * floor(TexCoord0.y * 0.5) + TexCoord0.x;\n"
01202         "       float idx = 8.0 * frac(index * 0.125) + 8.0 * floor(2.0 * frac(TexCoord0.y * 0.5));             \n"
01203         "       index = floor(index*0.125) + 0.49;  \n"
01204         "       float2 coord = floor( float2( fmod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
01205         "       float2 pos = texRECT(tex, coord).xy;            \n"
01206         "       if(any(pos.xy <= 1) || any(pos.xy >=dim-1)) "
01207         "       //discard;      \n"
01208         "       { FragData0 = FragData1 = float4(0.0); return; }\n"
01209         "       float  anglef = texRECT(tex, coord).z;\n"
01210         "       if(anglef > M_PI) anglef -= TWO_PI;\n"
01211         "       float sigma = texRECT(tex, coord).w; \n"
01212         "       float spt  = abs(sigma * WF);   //default to be 3*sigma \n";
01213 
01214         //rotation
01215         out<<
01216         "       float4 cscs, rots;                                                              \n"
01217         "       sincos(anglef, cscs.y, cscs.x);                                 \n"
01218         "       cscs.zw = - cscs.xy;                                                    \n"
01219         "       rots = cscs /spt;                                                               \n"
01220         "       cscs *= spt; \n";
01221 
01222         //here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
01223         //and rots is  (cos, sin, -cos, -sin ) /(factor*sigma)
01224         //devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
01225         //To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
01226         out<<
01227         "       float4 temp; float2 pt, offsetpt;                               \n"
01228         "       /*the fraction part of idx is .5*/                      \n"
01229         "       offsetpt.x = 4.0 * frac(idx*0.25) - 2.0;                                \n"
01230         "       offsetpt.y = floor(idx*0.25) - 1.5;                     \n"
01231         "       temp = cscs.xwyx*offsetpt.xyxy;                         \n"
01232         "       pt = pos + temp.xz + temp.yw;                           \n";
01233         
01234         //get a horizontal bounding box of the rotated rectangle
01235         out<<
01236         "       float2 bwin = abs(cscs.xy);                                     \n"
01237         "       float bsz = bwin.x + bwin.y;                                    \n"
01238         "       float4 sz;      float2 spos;                                    \n"
01239         "       sz.xy = max(pt - bsz, float2(1,1));\n"
01240         "       sz.zw = min(pt + bsz, dim - 2);         \n"
01241         "       sz = floor(sz)+0.5;"; //move sample point to pixel center
01242 
01243         //get voting for two box
01244         out<<"\n"
01245         "       float4 DA, DB;                  \n"
01246         "       DA = DB  = float4(0, 0, 0, 0);          \n"
01247         "       for(spos.y = sz.y; spos.y <= sz.w;      spos.y+=1.0)                            \n"
01248         "       {                                                                                                                               \n"
01249         "               for(spos.x = sz.x; spos.x <= sz.z;      spos.x+=1.0)                    \n"
01250         "               {                                                                                                                       \n"
01251         "                       float2 diff = spos - pt;                                                                \n"
01252         "                       temp = rots.xywx * diff.xyxy;                                                   \n"
01253         "                       float2 nxy = (temp.xz + temp.yw);                                               \n"
01254         "                       float2 nxyn = abs(nxy);                                                                 \n"
01255         "                       if(all(nxyn < float2(1.0)))\n"
01256         "                       {\n"
01257         "                               float4 cc = texRECT(gradTex, spos);                                             \n"
01258         "                               float mod = cc.b;       float angle = cc.a;                                     \n"
01259         "                               float theta0 = (anglef - angle)*RPI;                            \n"
01260         "                               float theta = theta0 < 0? theta0 + 8.0 : theta0; // fmod(theta0 + 8.0, 8.0); \n"
01261         "                               diff = nxy + offsetpt.xy;                                                               \n"
01262         "                               float ww = exp(-0.125*dot(diff, diff));\n"
01263         "                               float2 weights = 1 - nxyn;\n"
01264         "                               float weight = weights.x * weights.y *mod*ww; \n"
01265         "                               float theta1 = floor(theta); \n"
01266         "                               float weight2 = (theta - theta1) * weight; \n"
01267         "                               float weight1 = weight - weight2;\n"
01268         "                               DA += float4(theta1 == float4(0, 1, 2, 3))*weight1; \n"
01269         "                               DA += float4(theta1 == float4(7, 0, 1, 2))*weight2; \n"
01270         "                               DB += float4(theta1 == float4(4, 5, 6, 7))*weight1;     \n"
01271         "                               DB += float4(theta1 == float4(3, 4, 5, 6))*weight2; \n"
01272         "                       }\n"
01273         "               }\n"
01274         "       }\n";
01275 
01276         out<<
01277         "       FragData0 = DA; FragData1 = DB;\n"
01278         "}\n"<<'\0';
01279 
01280         ProgramCG * program; 
01281         s_descriptor_fp = program =  new ProgramCG(buffer);
01282         _param_descriptor_gtex = cgGetNamedParameter(*program, "gradTex");
01283         _param_descriptor_size = cgGetNamedParameter(*program, "size");
01284         _param_descriptor_dsize = cgGetNamedParameter(*program, "dsize");
01285 
01286 
01287 }
01288 
01289 //the shader that computes the descriptors
01290 void ShaderBagCG::LoadDescriptorShader()
01291 {
01292         GlobalUtil::_DescriptorPPT = 16;
01293         LoadDescriptorShaderF2();
01294 }
01295 
01296 void ShaderBagCG::LoadOrientationShader()
01297 {
01298 
01299         char buffer[10240];
01300         ostrstream out(buffer,10240);
01301 
01302 
01303         out<<"\n"
01304         "#define GAUSSIAN_WF "<<GlobalUtil::_OrientationGaussianFactor<<" \n"
01305         "#define SAMPLE_WF ("<<GlobalUtil::_OrientationWindowFactor<< " )\n"
01306         "#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n"
01307         "void main(uniform samplerRECT tex,                     \n"
01308         "uniform samplerRECT gradTex,           \n"
01309         "               uniform float4 size,                            \n"
01310         "               in float2 TexCoord0 : TEXCOORD0,        \n"
01311         "               out float4 FeatureData : COLOR0 ";
01312 
01313         //multi orientation output
01314         //use one additional texture to store up to four orientations
01315         //when we use one 32bit float to store two orientations, no extra texture is required
01316 
01317         if(GlobalUtil::_MaxOrientation >1  && GlobalUtil::_OrientationPack2 == 0)
01318                 out<<", out float4 OrientationData : COLOR1";
01319 
01320         if(GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)
01321         {
01322                 //data for sub-pixel localization
01323                 out<<", uniform samplerRECT texS";
01324         }
01325 
01326         //use 9 float4 to store histogram of 36 directions
01327         out<<")         \n"
01328         "{                                                                                                      \n"
01329         "       float4 bins[10];                                                                \n"
01330         "       for (int i=0; i<9; i++) bins[i] = float4(0,0,0,0);      \n"
01331         "       const float4 loc = texRECT(tex, TexCoord0);                     \n"
01332         "       const bool orientation_mode = (size.z != 0);                    \n"
01333         "       float2 pos = loc.xy;                                                    \n"
01334         "       float sigma = orientation_mode? abs(size.z) : loc.w; \n";
01335         if(GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)
01336         {
01337                 out<<
01338         "       if(orientation_mode) {\n"
01339         "               float4 keyx = texRECT(texS, pos);\n"
01340         "               sigma = sigma * pow(size.w, keyx.w); \n"
01341         "               pos.xy = pos.xy + keyx.yz; \n"
01342         "               #if " << GlobalUtil::_KeepExtremumSign << "\n"
01343         "                       if(keyx.x<0.6) sigma = - sigma;\n"
01344         "               #endif\n"
01345         "       }\n";
01346         }
01347 
01348         out<<
01349         "       //bool fixed_orientation = (size.z < 0);                \n"
01350         "       if(size.z < 0) {FeatureData = float4(pos, 0, sigma); return;}"
01351         "       const float gsigma = sigma * GAUSSIAN_WF;                               \n"
01352         "       const float2 win = abs(sigma.xx) * (SAMPLE_WF * GAUSSIAN_WF);   \n"
01353         "       const float2 dim = size.xy;                                                     \n"
01354         "       const float dist_threshold = win.x*win.x+0.5;                   \n"
01355         "       const float factor = -0.5/(gsigma*gsigma);                      \n"
01356         "       float4 sz;      float2 spos;                                            \n"
01357         "       //if(any(pos.xy <= 1)) discard;                                 \n"
01358         "       sz.xy = max( pos - win, float2(1,1));                   \n"
01359         "       sz.zw = min( pos + win, dim-2);                         \n"
01360         "       sz = floor(sz)+0.5;";
01361         //loop to get the histogram
01362 
01363         out<<"\n"
01364         "       for(spos.y = sz.y; spos.y <= sz.w;      spos.y+=1.0)                            \n"
01365         "       {                                                                                                                               \n"
01366         "               for(spos.x = sz.x; spos.x <= sz.z;      spos.x+=1.0)                    \n"
01367         "               {                                                                                                                       \n"
01368         "                       const float2 offset = spos - pos;                                               \n"
01369         "                       const float sq_dist = dot(offset,offset);                               \n"
01370         "                       if( sq_dist < dist_threshold){                                                  \n"
01371         "                               const float4 cc = texRECT(gradTex, spos);                       \n"
01372         "                               const float grad = cc.b;        float theta = cc.a;             \n"
01373         "                               float idx = floor(degrees(theta)*0.1);          \n"
01374         "                               const float weight = grad*exp(sq_dist * factor);                                \n"
01375         "                               if(idx < 0 ) idx += 36;                                                                 \n"
01376         "                               const float vidx = 4.0 * fract(idx * 0.25);//fmod(idx, 4);                                                              \n"
01377         "                               const float4 inc = weight*float4(vidx == float4(0,1,2,3));      ";
01378 
01379         if(GlobalUtil::_UseDynamicIndexing && strcmp(cgGetProfileString(ProgramCG::_FProfile), "gp4fp")==0)
01380 //      if(ProgramCG::_FProfile == CG_PROFILE_GPU_FP) this enumerant is not defined in cg1.5
01381         {
01382                 //gp_fp supports dynamic indexing
01383                 out<<"\n"
01384         "                               int iidx = int(floor(idx*0.25));        \n"
01385         "                               bins[iidx]+=inc;                                        \n"
01386         "                       }                                                                               \n"
01387         "               }                                                                                       \n"
01388         "       }";
01389 
01390         }else
01391         {
01392                 //nvfp40 still does not support dynamic array indexing
01393                 //unrolled binary search...
01394                 out<<"\n"
01395         "                               if(idx < 16)                                                    \n"
01396         "                               {                                                                               \n"
01397         "                                       if(idx < 8)                                                     \n"
01398         "                                       {                                                                       \n"
01399         "                                               if(idx < 4)     {       bins[0]+=inc;}  \n"
01400         "                                               else            {       bins[1]+=inc;}  \n"
01401         "                                       }else                                                           \n"
01402         "                                       {                                                                       \n"
01403         "                                               if(idx < 12){   bins[2]+=inc;}  \n"
01404         "                                               else            {       bins[3]+=inc;}  \n"
01405         "                                       }                                                                       \n"
01406         "                               }else if(idx < 32)                                              \n"
01407         "                               {                                                                               \n"
01408         "                                       if(idx < 24)                                            \n"
01409         "                                       {                                                                       \n"
01410         "                                               if(idx <20)     {       bins[4]+=inc;}  \n"
01411         "                                               else            {       bins[5]+=inc;}  \n"
01412         "                                       }else                                                           \n"
01413         "                                       {                                                                       \n"
01414         "                                               if(idx < 28){   bins[6]+=inc;}  \n"
01415         "                                               else            {       bins[7]+=inc;}  \n"
01416         "                                       }                                                                       \n"
01417         "                               }else                                           \n"
01418         "                               {                                                                               \n"
01419         "                                       bins[8]+=inc;                                           \n"
01420         "                               }                                                                               \n"
01421         "                       }                                                                               \n"
01422         "               }                                                                                       \n"
01423         "       }";
01424 
01425         }
01426 
01427         WriteOrientationCodeToStream(out);
01428 
01429         ProgramCG * program;
01430         s_orientation = program = new ProgramCG(buffer);
01431         _param_orientation_gtex = cgGetNamedParameter(*program, "gradTex");
01432         _param_orientation_size = cgGetNamedParameter(*program, "size");
01433         _param_orientation_stex = cgGetNamedParameter(*program, "texS");
01434 }
01435 
01436 void ShaderBagCG::WriteOrientationCodeToStream(std::ostream& out)
01437 {
01438         //smooth histogram and find the largest
01439 /*
01440         smoothing kernel:        (1 3 6 7 6 3 1 )/27
01441         the same as 3 pass of (1 1 1)/3 averaging
01442         maybe better to use 4 pass on the vectors...
01443 */
01444 
01445 
01446         //the inner loop on different array numbers is always unrolled in fp40
01447 
01448         //bug fixed here:)
01449         out<<"\n"
01450         "       float3x3 mat1 = float3x3(1, 0, 0, 3, 1, 0, 6, 3, 1)/27.0;; //bug fix.. \n"
01451         "       float4x4 mat2 = float4x4( 7, 6, 3, 1, 6, 7, 6, 3, 3, 6, 7, 6, 1, 3, 6, 7)/27.0;;\n"
01452         "       for (int j=0; j<2; j++)                                                         \n"
01453         "       {                                                                                               \n"
01454         "               float4 prev  = bins[8];                                         \n"
01455         "               bins[9]          = bins[0];                                             \n"
01456         "               for (int i=0; i<9; i++)                                                 \n"
01457         "               {                                                                                               \n"
01458         "                       float4 newb     =       mul ( bins[i], mat2);           \n"
01459         "                       newb.xyz        +=      mul ( prev.yzw, mat1);          \n"
01460         "                       prev = bins[i];                                                         \n"
01461         "                       newb.wzy        +=      mul     ( bins[i+1].zyx, mat1); \n"
01462         "                       bins[i] = newb;                                                 \n"
01463         "               }                                                                                               \n"
01464         "       }";
01465 
01466 
01467         //find the maximum voting
01468         out<<"\n"
01469         "       float4 maxh; float2 maxh2; float4 maxh4 = bins[0];                              \n"
01470         "       for (int i=1; i<9; i++) maxh4 = max(maxh4, bins[i]);                            \n"
01471         "       maxh2 = max(maxh4.xy, maxh4.zw); maxh = float4(max(maxh2.x, maxh2.y));";
01472 
01473         char *testpeak_code;
01474         char *savepeak_code;
01475 
01476 
01477 
01478         //save two/three/four orientations with the largest votings?
01479 
01480         //
01481         if(GlobalUtil::_MaxOrientation>1)
01482         {
01483                 out<<"\n"
01484         "       float4 Orientations = float4(0, 0, 0, 0);                               \n"
01485         "       float4 weights = float4(0,0,0,0);               ";      
01486                 
01487                 testpeak_code = "\n"
01488         "               {test = bins[i]>hh;";
01489 
01490                 //save the orientations in weight-decreasing order
01491                 if(GlobalUtil::_MaxOrientation ==2)
01492                 {
01493                 savepeak_code = "\n"
01494         "               if(weight <=weights.g){}\n"
01495         "               else if(weight >weights.r)\n"
01496         "               {weights.rg = float2(weight, weights.r); Orientations.rg = float2(th, Orientations.r);}\n"
01497         "               else {weights.g = weight; Orientations.g = th;}";
01498 
01499                 }else if(GlobalUtil::_MaxOrientation ==3)
01500                 {
01501                 savepeak_code = "\n"
01502         "               if(weight <=weights.b){}\n"
01503         "               else if(weight >weights.r)\n"
01504         "               {weights.rgb = float3(weight, weights.rg); Orientations.rgb = float3(th, Orientations.rg);}\n"
01505         "               else if(weight >weights.g)\n"
01506         "               {weights.gb = float2(weight, weights.g); Orientations.gb = float2(th, Orientations.g);}\n"
01507         "               else {weights.b = weight; Orientations.b = th;}";
01508                 }else
01509                 {
01510                 savepeak_code = "\n"
01511         "               if(weight <=weights.a){}\n"
01512         "               else if(weight >weights.r)\n"
01513         "               {weights = float4(weight, weights.rgb); Orientations = float4(th, Orientations.rgb);}\n"
01514         "               else if(weight >weights.g)\n"
01515         "               {weights.gba = float3(weight, weights.gb); Orientations.gba = float3(th, Orientations.gb);}\n"
01516         "               else if(weight >weights.b)\n"
01517         "               {weights.ba = float2(weight, weights.b); Orientations.ba = float2(th, Orientations.b);}\n"
01518         "               else {weights.a = weight; Orientations.a = th;}";
01519                 }
01520 
01521         }else
01522         {
01523                 out<<"\n"
01524         "       float Orientations = 0;                         ";
01525                 testpeak_code ="\n"
01526         "               if(npeaks==0){                                                          \n"
01527         "               test = (bins[i] >= maxh)        ;";
01528                 savepeak_code="\n"
01529         "                               npeaks++;                                                               \n"
01530         "                               Orientations = th.x;";
01531 
01532         }
01533 
01534         //find the peaks
01535         //the following loop will be unrolled
01536 
01537         out<<"\n"
01538         "       const float4 hh = maxh * ORIENTATION_THRESHOLD; bool4 test;     \n"
01539         "       bins[9] = bins[0];                                                              \n"
01540         "       float npeaks = 0, k = 0;                        \n"
01541         "       float prevb     = bins[8].w;                                            \n"
01542         "       for (int i = 0; i <9 ; i++)                                             \n"
01543         "       {"
01544                 <<testpeak_code<<"                                                                      \n"
01545         "               if( any ( test.xy || test.zw) )                                                 \n"
01546         "               {                                                                                       \n"
01547         "                       if(test.r && bins[i].x > prevb && bins[i].x > bins[i].y )       \n"
01548         "                       {                                                                                       \n"
01549         "                           float       di = 0.5 * (bins[i].y-prevb) / (bins[i].x *2.0 -bins[i].y -prevb) ; \n"
01550         "                               float   th = (k+di+0.5);        float weight = bins[i].x;"
01551                                         <<savepeak_code<<"\n"
01552         "                       }\n"
01553         "                       else if(test.g && all( bins[i].yy > bins[i].xz) )       \n"
01554         "                       {                                                                                       \n"
01555         "                           float       di = 0.5 * (bins[i].z-bins[i].x) / (bins[i].y * 2.0 - bins[i].z - bins[i].x) ; \n"
01556         "                               float   th = (k+di+1.5);        float weight = bins[i].y;                               "
01557                                         <<savepeak_code<<"      \n"
01558         "                       }"
01559                 <<"\n"
01560         "                       if(test.b && all( bins[i].zz > bins[i].yw) )    \n"
01561         "                       {                                                                                       \n"
01562         "                           float       di = 0.5 * (bins[i].w-bins[i].y) / (bins[i].z * 2.0-bins[i].w-bins[i].y) ; \n"
01563         "                               float   th = (k+di+2.5);        float weight = bins[i].z;                               "
01564                                         <<savepeak_code<<"      \n"
01565         "                       }\n"
01566         "                       else if(test.a && bins[i].w > bins[i].z && bins[i].w > bins[i+1].x )    \n"
01567         "                       {                                                                                       \n"
01568         "                           float       di = 0.5 * (bins[i+1].x-bins[i].z) / (bins[i].w * 2.0- bins[i+1].x-bins[i].z) ; \n"
01569         "                               float   th = (k+di+3.5);        float weight = bins[i].w;                               "
01570                                         <<savepeak_code<<"      \n"
01571         "                       }\n"
01572         "               }}\n"
01573         "               k = k + 4.0;                                            \n"
01574         "               prevb = bins[i].w;\n"
01575         "       }";
01576         //WRITE output
01577         if(GlobalUtil::_OrientationPack2)
01578         {
01579                 //pack two orientations in one float
01580         out<<"\n"
01581         "        if(orientation_mode){\n"
01582         "               Orientations.xy = frac(Orientations.xy / 36.0 + 1.0);\n"
01583         "               if(weights.x <= 0) Orientations.x = 1.0;\n"
01584         "               if(weights.y <= 0) Orientations.y = 1.0;\n"
01585         "               float packed_orientation = pack_2ushort(Orientations.xy); \n"
01586         "               FeatureData = float4(pos, packed_orientation, sigma);\n"
01587         "       }else{\n"
01588         "               FeatureData = float4(pos, radians((Orientations.x)*10.0), sigma);\n"
01589         "       }\n";           
01590         }else if(GlobalUtil::_MaxOrientation>1)
01591         {
01592         out<<"\n"
01593         "        if(orientation_mode){\n"
01594         "        npeaks = dot(float4(1,1,"
01595                         <<(GlobalUtil::_MaxOrientation>2 ? 1 : 0)<<","
01596                         <<(GlobalUtil::_MaxOrientation >3? 1 : 0)<<"), float4(weights>hh));\n"
01597         "               OrientationData = radians((Orientations )*10.0);\n"
01598         "               FeatureData = float4(pos, npeaks, sigma);\n"
01599         "       }else{\n"
01600         "               FeatureData = float4(pos, radians((Orientations.x)*10.0), sigma);\n"
01601         "       }\n";
01602         }else
01603         {
01604         out<<"\n"
01605         "        FeatureData = float4(pos, radians((Orientations.x)*10.0), sigma);";
01606         }
01607         //end
01608         out<<"\n"
01609         "}\n"<<'\0';
01610 
01611 
01612 }
01613 
01614 void ShaderBagCG::SetSimpleOrientationInput(int oTex, float sigma, float sigma_step)
01615 {
01616         cgGLSetTextureParameter(_param_orientation_gtex, oTex);
01617         cgGLEnableTextureParameter(_param_orientation_gtex);
01618         cgGLSetParameter1f(_param_orientation_size, sigma);
01619 }
01620 
01621 void ShaderBagCG::SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int stex, float step)
01622 {
01624         cgGLSetTextureParameter(_param_orientation_gtex, gtex); 
01625         cgGLEnableTextureParameter(_param_orientation_gtex);
01626 
01627         if((GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)&& stex)
01628         {
01629                 //specify texutre for subpixel subscale localization
01630                 cgGLSetTextureParameter(_param_orientation_stex, stex);
01631                 cgGLEnableTextureParameter(_param_orientation_stex);
01632         }
01633 
01634         float size[4];
01635         size[0] = (float)width;
01636         size[1] = (float)height;
01637         size[2] = sigma;
01638         size[3] = step;
01639         cgGLSetParameter4fv(_param_orientation_size, size);
01640 
01641 }
01642 
01643 void ShaderBagCG::SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth,  float width, float height, float sigma)
01644 {
01646         cgGLSetTextureParameter(_param_descriptor_gtex, gtex);  
01647         cgGLEnableTextureParameter(_param_descriptor_gtex);
01648 
01649         float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth};
01650         cgGLSetParameter4fv(_param_descriptor_dsize, dsize);
01651         float size[3];
01652         size[0] = width;
01653         size[1] = height;
01654         size[2] = GlobalUtil::_DescriptorWindowFactor;  
01655         cgGLSetParameter3fv(_param_descriptor_size, size);
01656 }
01657 
01658 
01661 
01662 ShaderBagPKCG::ShaderBagPKCG()
01663 {
01664         ProgramCG::InitContext();
01665 }
01666 
01667 void ShaderBagPKCG::UnloadProgram()
01668 {
01669 
01670         cgGLUnbindProgram(ProgramCG::_FProfile);
01671         cgGLDisableProfile(ProgramCG::_FProfile);
01672 }
01673 
01674 void ShaderBagPKCG::LoadFixedShaders()
01675 {
01676         ProgramCG * program;
01677 
01678         /*
01679         char *rgb2gray_packing_code =
01680                 "void main(uniform samplerRECT rgbTex, in float4 TexCoord0 : TEXCOORD0, \n"
01681         "               in float4 TexCoord1 : TEXCOORD1, in float4 TexCoord2 : TEXCOORD2, \n"
01682         "               in float4 TexCoord3 : TEXCOORD3, out float4 FragData : COLOR0){\n"
01683         "               const float3 weight = vec3(0.299, 0.587, 0.114);\n"
01684         "               FragData.r = dot(weight, texRECT(rgbTex,TexCoord0.st ).rgb);\n"
01685         "               FragData.g = dot(weight, texRECT(rgbTex,TexCoord1.st ).rgb);\n"
01686         "               FragData.b = dot(weight, texRECT(rgbTex,TexCoord2.st ).rgb);\n"
01687         "               FragData.a = dot(weight, texRECT(rgbTex,TexCoord3.st ).rgb);}";//
01688         s_gray = new ProgramCG( rgb2gray_packing_code);
01689         */
01690 
01691         s_gray = new ProgramCG( 
01692         "void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
01693         "float intensity = dot(float3(0.299, 0.587, 0.114), texRECT(tex,TexCoord0.xy ).rgb);\n"
01694         "FragColor= float4(intensity, intensity, intensity, 1.0);}"     );
01695 
01696 
01697         s_sampling = new ProgramCG(
01698         "void main(uniform samplerRECT tex, in float4 TexCoord0 : TEXCOORD0, \n"
01699         "               in float4 TexCoord1 : TEXCOORD1, in float4 TexCoord2 : TEXCOORD2, \n"
01700         "               in float4 TexCoord3 : TEXCOORD3, out float4 FragData : COLOR0 ){\n"
01701         "               FragData= float4(       texRECT(tex,TexCoord0.st ).r,texRECT(tex,TexCoord1.st ).r,\n"
01702         "                                                       texRECT(tex,TexCoord2.st ).r,texRECT(tex,TexCoord3.st ).r);}"   );
01703 
01704 
01705         s_margin_copy = program = new ProgramCG(
01706         "void main(in float4 texCoord0: TEXCOORD0, out float4 FragColor: COLOR0, \n"
01707         "uniform samplerRECT tex, uniform float4 truncate){\n"
01708         "float4 cc = texRECT(tex, min(texCoord0.xy, truncate.xy)); \n"
01709         "bool2 ob = texCoord0.xy < truncate.xy;\n"
01710         "if(ob.y) { FragColor = (truncate.z ==0 ? cc.rrbb : cc.ggaa); } \n"
01711         "else if(ob.x) {FragColor = (truncate.w <1.5 ? cc.rgrg : cc.baba);} \n"
01712         "else { float4 weights = float4(float4(0, 1, 2, 3) == truncate.w);\n"
01713         "float v = dot(weights, cc); FragColor = v.xxxx;}}");
01714 
01715         _param_margin_copy_truncate = cgGetNamedParameter(*program, "truncate");
01716 
01717 
01718         s_zero_pass = new ProgramCG("void main(out float4 FragColor : COLOR0){FragColor = 0;}");
01719 
01720         s_grad_pass = program = new ProgramCG(
01721         "void main (\n"
01722         "float4 TexCC : TEXCOORD0, float4 TexLC : TEXCOORD1,\n"
01723         "float4 TexRC : TEXCOORD2, float4 TexCD : TEXCOORD3, float4 TexCU : TEXCOORD4,\n"
01724         "out float4 FragData0 : COLOR0, out float4 FragData1 : COLOR1, \n"
01725         "out float4 FragData2 : COLOR2, uniform samplerRECT tex, uniform samplerRECT texp)\n"
01726         "{\n"
01727         "       float4 v1, v2, gg;\n"
01728         "       float4 cc = texRECT(tex, TexCC.xy);\n"
01729         "       float4 cp = texRECT(texp, TexCC.xy);\n"
01730         "       FragData0 = cc - cp; \n"
01731         "       float4 cl = texRECT(tex, TexLC.xy);     float4 cr = texRECT(tex, TexRC.xy);\n"
01732         "       float4 cd = texRECT(tex, TexCD.xy); float4 cu = texRECT(tex, TexCU.xy);\n"
01733         "       float4 dx = (float4(cr.rb, cc.ga) - float4(cc.rb, cl.ga)).zxwy;\n"
01734         "       float4 dy = (float4(cu.rg, cc.ba) - float4(cc.rg, cd.ba)).zwxy;\n"
01735         "       FragData1 = 0.5 * sqrt(dx*dx + dy * dy);\n"
01736         "       FragData2 = FragData1 > 0?  atan2(dy, dx) : float4(0);\n"
01737         "}\n\0");
01738 
01739         _param_grad_pass_texp = cgGetNamedParameter(*program, "texp");
01740 
01741 
01742         s_dog_pass = program = new ProgramCG(
01743         "void main (float4 TexCC : TEXCOORD0, out float4 FragData0 : COLOR0, \n"
01744         "       uniform samplerRECT tex, uniform samplerRECT texp)\n"
01745         "{\n"
01746         "       float4 cc = texRECT(tex, TexCC.xy);\n"
01747         "       float4 cp = texRECT(texp, TexCC.xy);\n"
01748         "       FragData0 = cc - cp; \n"
01749         "}\n\0");
01750 
01752         if(GlobalUtil::_SupportFP40)
01753         {
01754                 LoadOrientationShader();
01755                 if(GlobalUtil::_DescriptorPPT) LoadDescriptorShader();
01756         }else
01757         {
01758                 s_orientation = program =  new ProgramCG(
01759                 "void main(out float4 FragColor : COLOR0, \n"
01760         "       uniform samplerRECT fTex, uniform samplerRECT oTex, \n"
01761         "       uniform float2 size, \n"
01762         "       in float2 tpos : TEXCOORD0){\n"
01763         "       float4 cc = texRECT(fTex, tpos);\n"
01764         "       float2 co = cc.xy * 0.5; \n"
01765         "       float4 oo = texRECT(oTex, co);\n"
01766         "       bool2 bo = frac(co) < 0.5; \n"
01767         "       float o = bo.y? (bo.x? oo.r : oo.g) : (bo.x? oo.b : oo.a); \n"
01768         "       FragColor = float4(cc.rg, o, size.x * pow(size.y, cc.a));}");  
01769                 _param_orientation_gtex= cgGetNamedParameter(*program, "oTex");
01770                 _param_orientation_size= cgGetNamedParameter(*program, "size");
01771 
01772                 GlobalUtil::_FullSupported = 0;
01773                 GlobalUtil::_MaxOrientation = 0;
01774                 GlobalUtil::_DescriptorPPT = 0;
01775                 std::cerr<<"Orientation simplified on this hardware"<<endl;
01776                 std::cerr<<"Descriptor ignored on this hardware"<<endl;
01777         }
01778 }
01779 
01780 void ShaderBagPKCG::LoadDisplayShaders()
01781 {
01782         ProgramCG * program;
01783 
01784         s_copy_key = new ProgramCG(
01785         "void main(in float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
01786         "FragColor.rg= texRECT(tex, TexCoord0.xy).rg; FragColor.ba = float2(0,1);       }");
01787 
01788         //shader used to write a vertex buffer object
01789         //which is used to draw the quads of each feature
01790         s_vertex_list = program = new ProgramCG(
01791         "void main(in float4 TexCoord0: TEXCOORD0,\n"
01792         "uniform float4 sizes, \n"
01793         "uniform samplerRECT tex, \n"
01794         "out float4 FragColor: COLOR0){\n"
01795         "float fwidth = sizes.y; \n"
01796         "float twidth = sizes.z; \n"
01797         "float rwidth = sizes.w; \n"
01798         "float index = 0.1*(fwidth*floor(TexCoord0.y) + TexCoord0.x);\n"
01799         "float px = fmod(index, twidth);\n"
01800         "float2 tpos= floor(float2(px, index*rwidth))+0.5;\n"
01801         "float4 cc = texRECT(tex, tpos );\n"
01802         "float size = 3.0f * cc.a;// sizes.x;// \n"
01803         "FragColor.zw = float2(0.0, 1.0);\n"
01804         "if(any(cc.xy <=0)) {FragColor.xy = cc.xy;}else \n"
01805         "{\n"
01806         "       float type = frac(px);\n"
01807         "       float2 dxy; float s, c;\n"
01808         "       dxy.x = type < 0.1 ? 0 : ((type <0.5 || type > 0.9)? size : -size);\n"
01809         "       dxy.y = type < 0.2 ? 0 : ((type < 0.3 || type > 0.7 )? -size :size); \n"
01810         "       sincos(cc.b, s, c);\n"
01811         "       FragColor.x = cc.x + c*dxy.x-s*dxy.y;\n"
01812         "       FragColor.y = cc.y + c*dxy.y+s*dxy.x;}\n"
01813         "}\n\0");
01814         /*FragColor = float4(tpos, 0.0, 1.0);}\n\0");*/
01815 
01816         _param_genvbo_size = cgGetNamedParameter(*program, "sizes");
01817 
01818         s_display_gaussian = new ProgramCG(
01819                 "void main(uniform samplerRECT tex, in float4 TexCoord0:TEXCOORD0, out float4 FragData: COLOR0 ){\n"
01820                 "float4 pc = texRECT(tex, TexCoord0.xy);         bool2 ff = (frac(TexCoord0.xy) < 0.5);\n"
01821     "float v = ff.y?(ff.x? pc.r : pc.g):(ff.x?pc.b:pc.a); FragData = float4(v.xxx, 1.0);}");
01822 
01823         s_display_dog =  new ProgramCG(
01824         "void main(in float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
01825         "float4 pc = texRECT(tex, TexCoord0.xy); bool2 ff = (frac(TexCoord0.xy) < 0.5);\n"
01826         "float v = ff.y ?(ff.x ? pc.r : pc.g):(ff.x ? pc.b : pc.a);float g = (0.5+20.0*v);\n"
01827         "FragColor = float4(g, g, g, 1.0);}" );
01828 
01829 
01830         s_display_grad = new ProgramCG(
01831         "void main(in float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
01832         "float4 pc = texRECT(tex, TexCoord0.xy); bool2 ff = (frac(TexCoord0.xy) < 0.5);\n"
01833         "float v = ff.y ?(ff.x ? pc.r : pc.g):(ff.x ? pc.b : pc.a); FragColor = float4(5.0 *v.xxx, 1.0); }");
01834 
01835         s_display_keys= new ProgramCG(
01836         "void main(in float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
01837         "float4 oc = texRECT(tex, TexCoord0.xy); \n"
01838         "float4 cc = float4(abs(oc.r) == float4(1.0, 2.0, 3.0, 4.0));\n"
01839         "bool2 ff = (frac(TexCoord0.xy) < 0.5);\n"
01840         "float v = ff.y ?(ff.x ? cc.r : cc.g):(ff.x ? cc.b : cc.a);\n"
01841         "if(oc.r == 0) discard;\n"
01842         "else if(oc.r > 0) FragColor = float4(1.0, 0, 0,1.0); \n"
01843         "else FragColor = float4(0.0,1.0,0.0,1.0); }" );                
01844 }
01845 
01846 void ShaderBagPKCG::LoadGenListShader(int ndoglev, int nlev)
01847 {
01848 
01849         //the V2 algorithms are only slightly faster, but way more complicated
01850         //LoadGenListShaderV2(ndoglev, nlev); return; 
01851         ProgramCG * program;
01852 
01853         s_genlist_init_tight = new ProgramCG(
01854         "void main (uniform samplerRECT tex, in float4 TexCoord0 : TEXCOORD0,\n"
01855         "in float4 TexCoord1 : TEXCOORD1, in float4 TexCoord2 : TEXCOORD2, \n"
01856         "in float4 TexCoord3 : TEXCOORD3, out float4 FragColor : COLOR0)\n"
01857         "{\n"
01858         "       float4 data = float4(   texRECT(tex, TexCoord0.xy).r,\n"
01859         "                                                       texRECT(tex, TexCoord1.xy).r,\n"
01860         "                                                       texRECT(tex, TexCoord2.xy).r,\n"
01861         "                                                       texRECT(tex, TexCoord3.xy).r);\n"
01862         "       FragColor = float4(data != 0);\n"
01863         "}");
01864 
01865         s_genlist_init_ex = program = new ProgramCG(
01866         "void main (uniform float4 bbox, uniform samplerRECT tex, \n"
01867         "in float4 TexCoord0 : TEXCOORD0, in float4 TexCoord1 : TEXCOORD1, \n"
01868         "in float4 TexCoord2 : TEXCOORD2, in float4 TexCoord3 : TEXCOORD3,\n"
01869         "out float4 FragColor : COLOR0)\n"
01870         "{\n"
01871         "       bool4 helper1 = abs(texRECT(tex, TexCoord0.xy).r)== float4(1.0, 2.0, 3.0, 4.0); \n"
01872         "       bool4 helper2 = abs(texRECT(tex, TexCoord1.xy).r)== float4(1.0, 2.0, 3.0, 4.0);\n"
01873         "       bool4 helper3 = abs(texRECT(tex, TexCoord2.xy).r)== float4(1.0, 2.0, 3.0, 4.0);\n"
01874         "       bool4 helper4 = abs(texRECT(tex, TexCoord3.xy).r)== float4(1.0, 2.0, 3.0, 4.0);\n"
01875         "       bool4 bx1 = TexCoord0.xxyy < bbox; \n"
01876         "       bool4 bx4 = TexCoord3.xxyy < bbox; \n"
01877         "       bool4 bx2 = bool4(bx4.xy, bx1.zw); \n"
01878         "       bool4 bx3 = bool4(bx1.xy, bx4.zw);\n"
01879         "       helper1 = (bx1.xyxy && bx1.zzww && helper1);\n"
01880         "       helper2 = (bx2.xyxy && bx2.zzww && helper2);\n"
01881         "       helper3 = (bx3.xyxy && bx3.zzww && helper3);\n"
01882         "       helper4 = (bx4.xyxy && bx4.zzww && helper4);\n"
01883         "       FragColor.r = any(helper1.xy || helper1.zw);    \n"
01884         "       FragColor.g = any(helper2.xy || helper2.zw);    \n"
01885         "       FragColor.b = any(helper3.xy || helper3.zw);    \n"
01886         "       FragColor.a = any(helper4.xy || helper4.zw);    \n"
01887         "}");
01888         _param_genlist_init_bbox = cgGetNamedParameter( *program, "bbox");
01889 
01890         s_genlist_end = program = new ProgramCG(
01891                 GlobalUtil::_KeepExtremumSign == 0 ? 
01892         
01893         "void main(     uniform samplerRECT tex, uniform samplerRECT ktex,\n"
01894         "                       in float4 tpos : TEXCOORD0,     out float4 FragColor : COLOR0)\n"
01895         "{\n"
01896         "       float4 tc = texRECT( tex, tpos.xy);\n"
01897         "       float2 pos = tc.rg; float index = tc.b;\n"
01898         "       float4 tk = texRECT( ktex, pos); \n"
01899         "       float4 keys = float4(abs(tk.x) == float4(1.0, 2.0, 3.0, 4.0)); \n"
01900         "       float2 opos; \n"
01901         "       opos.x = dot(keys, float4(-0.5, 0.5, -0.5, 0.5));\n"
01902         "       opos.y = dot(keys, float4(-0.5, -0.5, 0.5, 0.5));\n"
01903         "       FragColor = float4(opos + pos + pos + tk.yz, 1.0, tk.w);\n"
01904         "}" : 
01905         
01906         "void main(     uniform samplerRECT tex, uniform samplerRECT ktex,\n"
01907         "                       in float4 tpos : TEXCOORD0,     out float4 FragColor : COLOR0)\n"
01908         "{\n"
01909         "       float4 tc = texRECT( tex, tpos.xy);\n"
01910         "       float2 pos = tc.rg; float index = tc.b;\n"
01911         "       float4 tk = texRECT( ktex, pos); \n"
01912         "       float4 keys = float4(abs(tk.x) == float4(1.0, 2.0, 3.0, 4.0)); \n"
01913         "       float2 opos; \n"
01914         "       opos.x = dot(keys, float4(-0.5, 0.5, -0.5, 0.5));\n"
01915         "       opos.y = dot(keys, float4(-0.5, -0.5, 0.5, 0.5));\n"
01916         "       FragColor = float4(opos + pos + pos + tk.yz, sign(tk.x), tk.w);\n"
01917         "}"     
01918         );
01919         _param_genlist_end_ktex = cgGetNamedParameter(*program, "ktex");
01920 
01921         //reduction ...
01922         s_genlist_histo = new ProgramCG(
01923         "void main (uniform samplerRECT tex, in float2 TexCoord0 : TEXCOORD0,\n"
01924         "in float2 TexCoord1 : TEXCOORD1, in float2 TexCoord2 : TEXCOORD2, \n"
01925         "in float2 TexCoord3 : TEXCOORD3, out float4 FragColor : COLOR0)\n"
01926         "{\n"
01927         "       float4 helper; float4 helper2; \n"
01928         "       helper = texRECT(tex, TexCoord0); helper2.xy = helper.xy + helper.zw; \n"
01929         "       helper = texRECT(tex, TexCoord1); helper2.zw = helper.xy + helper.zw; \n"
01930         "       FragColor.rg = helper2.xz + helper2.yw;\n"
01931         "       helper = texRECT(tex, TexCoord2); helper2.xy = helper.xy + helper.zw; \n"
01932         "       helper = texRECT(tex, TexCoord3); helper2.zw = helper.xy + helper.zw; \n"
01933         "       FragColor.ba= helper2.xz+helper2.yw;\n"
01934         "}");
01935 
01936 
01937         //read of the first part, which generates tex coordinates 
01938 
01939         s_genlist_start= program =  ShaderBagCG::LoadGenListStepShader(1, 1);
01940         _param_ftex_width= cgGetNamedParameter(*program, "width");
01941         _param_genlist_start_tex0 = cgGetNamedParameter(*program, "tex0");
01942         //stepping
01943         s_genlist_step = program = ShaderBagCG::LoadGenListStepShader(0, 1);
01944         _param_genlist_step_tex= cgGetNamedParameter(*program, "tex");
01945         _param_genlist_step_tex0= cgGetNamedParameter(*program, "tex0");
01946 
01947 
01948 }
01949 
01950 
01951 
01952 void ShaderBagPKCG::LoadGenListShaderV2(int ndoglev, int nlev)
01953 {
01954         ProgramCG * program;
01955 
01956         s_genlist_init_tight = new ProgramCG(
01957         "void main (uniform samplerRECT tex, in float4 TexCoord0 : TEXCOORD0,\n"
01958         "in float4 TexCoord1 : TEXCOORD1, in float4 TexCoord2 : TEXCOORD2, \n"
01959         "in float4 TexCoord3 : TEXCOORD3, out float4 FragColor : COLOR0)\n"
01960         "{\n"
01961         "       float4 data1 = texRECT(tex, TexCoord0.xy);\n"
01962         "       float4 data2 = texRECT(tex, TexCoord1.xy);\n"
01963         "       float4 data3 = texRECT(tex, TexCoord2.xy);\n"
01964         "       float4 data4 = texRECT(tex, TexCoord3.xy);\n"
01965         "       bool4 helper1 = (abs(data1.r), float4(1.0, 2.0, 3.0, 4.0)); \n"
01966         "       bool4 helper2 = (abs(data2.r), float4(1.0, 2.0, 3.0, 4.0));\n"
01967         "       bool4 helper3 = (abs(data3.r), float4(1.0, 2.0, 3.0, 4.0));\n"
01968         "       bool4 helper4 = (abs(data4.r), float4(1.0, 2.0, 3.0, 4.0));\n"
01969         "       FragColor.r = any(helper1.xy || helper1.zw);    \n"
01970         "       FragColor.g = any(helper2.xy || helper2.zw);    \n"
01971         "       FragColor.b = any(helper3.xy || helper3.zw);    \n"
01972         "       FragColor.a = any(helper4.xy || helper4.zw);    \n"
01973         "       if(dot(FragColor, float4(1,1,1,1)) == 1) \n"
01974         "       {\n"
01975         "               //use a special method if there is only one in the 16, \n"
01976         "               float4 data, helper; float2 pos, opos; \n"
01977         "               if(FragColor.r){ \n"
01978         "                       data = data1; helper = helper1; pos = TexCoord0.xy;\n"
01979         "               }else if(FragColor.g){\n"
01980         "                       data = data2; helper = helper2; pos = TexCoord1.xy;\n"
01981         "               }else if(FragColor.b){\n"
01982         "                       data = data3; helper = helper3; pos = TexCoord2.xy;\n"
01983         "               }else{\n"
01984         "                       data = data4; helper = helper4; pos = TexCoord3.xy;\n"
01985         "               }\n"
01986         "               opos.x = dot(helper, float4(-0.5, 0.5, -0.5, 0.5));\n"
01987         "               opos.y = dot(helper, float4(-0.5, -0.5, 0.5, 0.5));\n"
01988         "               FragColor = float4( pos + pos + opos + data.yz, -1, data.w); \n"
01989         "       }\n"
01990         "}");
01991 
01992         s_genlist_init_ex = program = new ProgramCG(
01993         "void main (uniform float4 bbox, uniform samplerRECT tex, \n"
01994         "in float4 TexCoord0 : TEXCOORD0, in float4 TexCoord1 : TEXCOORD1, \n"
01995         "in float4 TexCoord2 : TEXCOORD2, in float4 TexCoord3 : TEXCOORD3,\n"
01996         "out float4 FragColor : COLOR0)\n"
01997         "{\n"
01998         "       float4 data1 = texRECT(tex, TexCoord0.xy);\n"
01999         "       float4 data2 = texRECT(tex, TexCoord1.xy);\n"
02000         "       float4 data3 = texRECT(tex, TexCoord2.xy);\n"
02001         "       float4 data4 = texRECT(tex, TexCoord3.xy);\n"
02002         "       bool4 helper1 = (abs(data1.r), float4(1.0, 2.0, 3.0, 4.0)); \n"
02003         "       bool4 helper2 = (abs(data2.r), float4(1.0, 2.0, 3.0, 4.0));\n"
02004         "       bool4 helper3 = (abs(data3.r), float4(1.0, 2.0, 3.0, 4.0));\n"
02005         "       bool4 helper4 = (abs(data4.r), float4(1.0, 2.0, 3.0, 4.0));\n"
02006         "       bool4 bx1 = TexCoord0.xxyy < bbox; \n"
02007         "       bool4 bx4 = TexCoord3.xxyy < bbox; \n"
02008         "       bool4 bx2 = bool4(bx4.xy, bx1.zw); \n"
02009         "       bool4 bx3 = bool4(bx1.xy, bx4.zw);\n"
02010         "       helper1 = bx1.xyxy && bx1.zzww && helper1; \n"
02011         "       helper2 = bx2.xyxy && bx2.zzww && helper2; \n"
02012         "       helper3 = bx3.xyxy && bx3.zzww && helper3; \n"
02013         "       helper4 = bx4.xyxy && bx4.zzww && helper4; \n"
02014         "       FragColor.r = any(helper1.xy || helper1.zw);    \n"
02015         "       FragColor.g = any(helper2.xy || helper2.zw);    \n"
02016         "       FragColor.b = any(helper3.xy || helper3.zw);    \n"
02017         "       FragColor.a = any(helper4.xy || helper4.zw);    \n"
02018         "       if(dot(FragColor, float4(1,1,1,1)) == 1) \n"
02019         "       {\n"
02020         "               //use a special method if there is only one in the 16, \n"
02021         "               float4 data, helper; bool4 bhelper; float2 pos, opos; \n"
02022         "               if(FragColor.r){ \n"
02023         "                       data = data1; bhelper = helper1; pos = TexCoord0.xy;\n"
02024         "               }else if(FragColor.g){\n"
02025         "                       data = data2; bhelper = helper2; pos = TexCoord1.xy;\n"
02026         "               }else if(FragColor.b){\n"
02027         "                       data = data3; bhelper = helper3; pos = TexCoord2.xy;\n"
02028         "               }else{\n"
02029         "                       data = data4; bhelper = helper4; pos = TexCoord3.xy;\n"
02030         "               }\n"
02031         "               helper = float4(bhelper); \n"
02032         "               opos.x = dot(helper, float4(-0.5, 0.5, -0.5, 0.5));\n"
02033         "               opos.y = dot(helper, float4(-0.5, -0.5, 0.5, 0.5));\n"
02034         "               FragColor = float4(pos + pos + opos + data.yz, -1, data.w); \n"
02035         "       }\n"
02036         "}");
02037         _param_genlist_init_bbox = cgGetNamedParameter( *program, "bbox");
02038 
02039         s_genlist_end = program = new ProgramCG(
02040         
02041         "void main(     uniform samplerRECT tex, uniform samplerRECT ktex,\n"
02042         "                       in float4 tpos : TEXCOORD0,     out float4 FragColor : COLOR0)\n"
02043         "{\n"
02044         "       float4 tc = texRECT( tex, tpos.xy);\n"
02045         "       float2 pos = tc.rg; float index = tc.b;\n"
02046         "       if(index == -1)\n"
02047         "       {\n"
02048         "               FragColor = float4(tc.xy, 0, tc.w);\n"
02049         "       }else\n"
02050         "       {\n"
02051         "               float4 tk = texRECT( ktex, pos); \n"
02052         "               float4 keys = float4(abs(tk.r) == float4(1.0, 2.0, 3.0, 4.0)); \n"
02053         "               float2 opos; \n"
02054         "               opos.x = dot(keys, float4(-0.5, 0.5, -0.5, 0.5));\n"
02055         "               opos.y = dot(keys, float4(-0.5, -0.5, 0.5, 0.5));\n"
02056         "               FragColor = float4(opos + pos + pos + tk.yz, 0, tk.w);\n"
02057         "       }\n"
02058         "}");
02059         _param_genlist_end_ktex = cgGetNamedParameter(*program, "ktex");
02060 
02061         //reduction ...
02062         s_genlist_histo = new ProgramCG(
02063         "void main (uniform samplerRECT tex, in float2 TexCoord0 : TEXCOORD0,\n"
02064         "in float2 TexCoord1 : TEXCOORD1, in float2 TexCoord2 : TEXCOORD2, \n"
02065         "in float2 TexCoord3 : TEXCOORD3, out float4 FragColor : COLOR0)\n"
02066         "{\n"
02067         "       float4 helper[4]; float4 helper2; \n"
02068         "       helper[0] = texRECT(tex, TexCoord0); helper2.xy = helper[0].xy + helper[0].zw; \n"
02069         "       helper[1] = texRECT(tex, TexCoord1); helper2.zw = helper[1].xy + helper[1].zw; \n"
02070         "       FragColor.rg = helper2.xz + helper2.yw;\n"
02071         "       helper[2] = texRECT(tex, TexCoord2); helper2.xy = helper[2].xy + helper[2].zw; \n"
02072         "       helper[3] = texRECT(tex, TexCoord3); helper2.zw = helper[3].xy + helper[3].zw; \n"
02073         "       FragColor.ba= helper2.xz+helper2.yw;\n"
02074         "       bool4 keyt = float4(helper[0].z, helper[1].z, helper[2].z, helper[3].z) == -1.0; \n"
02075         "       float keyc = dot(float4(keyt), float4(1,1,1,1)); \n"
02076         "       if(keyc == 1.0 && dot(FragColor, float4(1,1,1,1)) == -1.0) \n"
02077         "       {\n"
02078         "               if(keyt.x) FragColor = helper[0];\n"
02079         "               else if(keyt.y) FragColor = helper[1]; \n"
02080         "               else if(keyt.z) FragColor = helper[2]; \n"
02081         "               else FragColor = helper[3]; \n"
02082         "       }else\n"
02083         "       {\n"
02084         "               FragColor = keyt? float4(1,1,1,1) : FragColor;\n"
02085         "       }\n"
02086         "}");
02087 
02088         //read of the first part, which generates tex coordinates 
02089 
02090         s_genlist_start= program =  ShaderBagCG::LoadGenListStepShaderV2(1, 1);
02091         _param_ftex_width= cgGetNamedParameter(*program, "width");
02092         _param_genlist_start_tex0 = cgGetNamedParameter(*program, "tex0");
02093         //stepping
02094         s_genlist_step = program = ShaderBagCG::LoadGenListStepShaderV2(0, 1);
02095         _param_genlist_step_tex= cgGetNamedParameter(*program, "tex");
02096         _param_genlist_step_tex0= cgGetNamedParameter(*program, "tex0");
02097 
02098 
02099 }
02100 
02101 
02102 
02103 ProgramCG* ShaderBagCG::LoadGenListStepShaderV2(int start, int step)
02104 {
02105         int i;
02106         char buffer[10240];
02107         //char chanels[5] = "rgba";
02108         ostrstream out(buffer, 10240);
02109         out<<"void main(out float4 FragColor : COLOR0, \n";
02110 
02111         for(i = 0; i < step; i++) out<<"uniform samplerRECT tex"<<i<<",\n";
02112 
02113         if(start)
02114         {
02115                 out<<"uniform float width, \nin float2 tpos : TEXCOORD0){\n";
02116                 out<<"float  index = floor(tpos.y) * width + floor(tpos.x);\n";
02117                 out<<"float2 pos = float2(0.5, 0.5);\n";
02118         }else
02119         {
02120                 out<<"uniform samplerRECT tex, in float2 tpos: TEXCOORD0 ){\n";
02121                 out<<"float4 tc = texRECT( tex, tpos);\n";
02122                 out<<"float2 pos = tc.rg; float index = tc.b;\n";
02123                 out<<"if(index==-1) {FragColor = tc; return;}\n";
02124         }
02125         out<<"float2 sum;       float4 cc;\n";
02126 
02127 
02128 
02129         if(step>0)
02130         {
02131                 out<<"float2 cpos = float2(-0.5, 0.5);\t float2 opos;\n";
02132                 for(i = 0; i < step; i++)
02133                 {
02134 
02135                         out<<"cc = texRECT(tex"<<i<<", pos);\n";
02136                         out<<"if(cc.z == -1){FragColor = cc; return;}";
02137                         out<<"sum.x = cc.r + cc.g;if (index < sum.x){ if(index < cc.r) opos = cpos.xx; else {opos = cpos.yx; index -= cc.r;}}\n";
02138                         out<<"else {index -= sum.x; if(index < cc.b) opos = cpos.xy; else{opos = cpos.yy; index -= cc.b;}}";
02139                         out<<"pos = (pos + pos + opos);\n";
02140                 }
02141         }
02142         out<<"FragColor = float4(pos, index, 1);\n";
02143         out<<"}\n"<<'\0';
02144         return new ProgramCG(buffer);
02145 }
02146 
02147 
02148 void ShaderBagPKCG:: LoadKeypointShader(float threshold, float edge_threshold)
02149 {
02150         //
02151         ProgramCG * program;
02152         char buffer[10240];
02153         float threshold0 = threshold* (GlobalUtil::_SubpixelLocalization?0.8f:1.0f);
02154         float threshold1 = threshold;
02155         float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold;
02156         ostrstream out(buffer, 10240);
02157         out<<setprecision(8);
02158         //tex(X)(Y)
02159         //X: (CLR) (CENTER 0, LEFT -1, RIGHT +1)  
02160         //Y: (CDU) (CENTER 0, DOWN -1, UP    +1) 
02161         out <<  "#define THRESHOLD0 " << threshold0 << "\n"
02162                         "#define THRESHOLD1 " << threshold1 << "\n"
02163                         "#define THRESHOLD2 " << threshold2 << "\n";
02164 
02165         out<<
02166         "void main (\n"
02167         "float4 TexCC : TEXCOORD0, float4 TexLC : TEXCOORD1,\n"
02168         "float4 TexRC : TEXCOORD2, float4 TexCD : TEXCOORD3, \n"
02169         "float4 TexCU : TEXCOORD4, float4 TexLD : TEXCOORD5, \n"
02170         "float4 TexLU : TEXCOORD6, float4 TexRD : TEXCOORD7,\n"
02171         "out float4 FragData0 : COLOR0, uniform samplerRECT tex, \n"
02172         "uniform samplerRECT texU, uniform samplerRECT texD)\n"
02173         "{\n"
02174         "       float2 TexRU = float2(TexRC.x, TexCU.y); \n"
02175         "       float4 ccc = texRECT(tex, TexCC.xy);\n"
02176         "       float4 clc = texRECT(tex, TexLC.xy);\n"
02177         "       float4 crc = texRECT(tex, TexRC.xy);\n"
02178         "       float4 ccd = texRECT(tex, TexCD.xy);\n"
02179         "       float4 ccu = texRECT(tex, TexCU.xy);\n"
02180         "       float4 cld = texRECT(tex, TexLD.xy);\n"
02181         "       float4 clu = texRECT(tex, TexLU.xy);\n"
02182         "       float4 crd = texRECT(tex, TexRD.xy);\n"
02183         "       float4 cru = texRECT(tex, TexRU.xy);\n"
02184         "       float4  cc = ccc;\n"
02185         "       float4  v1[4], v2[4];\n"
02186         "       v1[0] = float4(clc.g, ccc.g, ccd.b, ccc.b);\n"
02187         "       v1[1] = float4(ccc.r, crc.r, ccd.a, ccc.a);\n"
02188         "       v1[2] = float4(clc.a, ccc.a, ccc.r, ccu.r);\n"
02189         "       v1[3] = float4(ccc.b, crc.b, ccc.g, ccu.g);\n"
02190         "       v2[0] = float4(cld.a, clc.a, ccd.a, ccc.a);\n"
02191         "       v2[1] = float4(ccd.b, ccc.b, crd.b, crc.b);\n"
02192         "       v2[2] = float4(clc.g, clu.g, ccc.g, ccu.g);\n"
02193         "       v2[3] = float4(ccc.r, ccu.r, crc.r, cru.r);\n"
02194 
02195         //test against 8 neighbours
02196         //use variable to identify type of extremum
02197         //1.0 for local maximum and -1.0 for minimum
02198         <<
02199         "       float4 key ={0, 0, 0, 0}; \n"
02200         "       for(int i = 0; i < 4; i++)\n"
02201         "       {\n"
02202         "               bool4 test1 = cc[i] > max(v1[i], v2[i]), test2 = cc[i] < min(v1[i], v2[i]);\n"
02203         "               key[i] = cc[i] > THRESHOLD0 && all(test1.xy&&test1.zw)?1.0: 0.0;\n"
02204         "               key[i] = cc[i] < -THRESHOLD0 && all(test2.xy&&test2.zw)? -1.0: key[i];\n"
02205         "       }\n"
02206         "       if(TexCC.x < 1.0) {key.rb = 0;}\n"
02207         "       if(TexCC.y < 1.0) {key.rg = 0;}\n"
02208         "       FragData0 = float4(0.0);\n"
02209         "       if(all(key == 0.0)) return; \n";
02210 
02211         //do edge supression first.. 
02212         //vector v1 is < (-1, 0), (1, 0), (0,-1), (0, 1)>
02213         //vector v2 is < (-1,-1), (-1,1), (1,-1), (1, 1)>
02214 
02215         out<<
02216         "       float fxx[4], fyy[4], fxy[4], fx[4], fy[4];\n"
02217         "       for(int i = 0; i < 4; i++) \n"
02218         "       {\n"
02219         "               if(key[i] != 0)\n"
02220         "               {\n"
02221         "                       float4 D2 = v1[i].xyzw - cc[i];\n"
02222         "                       float2 D4 = v2[i].xw - v2[i].yz;\n"
02223         "                       float2 D5 = 0.5*(v1[i].yw-v1[i].xz); \n"
02224         "                       fx[i] = D5.x;\n"
02225         "                       fy[i] = D5.y ;\n"
02226         "                       fxx[i] = D2.x + D2.y;\n"
02227         "                       fyy[i] = D2.z + D2.w;\n"
02228         "                       fxy[i] = 0.25*(D4.x + D4.y);\n"
02229         "                       float fxx_plus_fyy = fxx[i] + fyy[i];\n"
02230         "                       float score_up = fxx_plus_fyy*fxx_plus_fyy; \n"
02231         "                       float score_down = (fxx[i]*fyy[i] - fxy[i]*fxy[i]);\n"
02232         "                       if( score_down <= 0 || score_up > THRESHOLD2 * score_down)key[i] = 0;\n"
02233         "               }\n"
02234         "       }\n"
02235         "       if(all(key == 0.0)) return; \n\n";
02236 
02238         //read 9 pixels of upper/lower level
02239         out<<
02240         "       float4  v4[4], v5[4], v6[4];\n"
02241         "       ccc = texRECT(texU, TexCC.xy);\n"
02242         "       clc = texRECT(texU, TexLC.xy);\n"
02243         "       crc = texRECT(texU, TexRC.xy);\n"
02244         "       ccd = texRECT(texU, TexCD.xy);\n"
02245         "       ccu = texRECT(texU, TexCU.xy);\n"
02246         "       cld = texRECT(texU, TexLD.xy);\n"
02247         "       clu = texRECT(texU, TexLU.xy);\n"
02248         "       crd = texRECT(texU, TexRD.xy);\n"
02249         "       cru = texRECT(texU, TexRU.xy);\n"
02250         "       float4  cu = ccc;\n"
02251         "       v4[0] = float4(clc.g, ccc.g, ccd.b, ccc.b);\n"
02252         "       v4[1] = float4(ccc.r, crc.r, ccd.a, ccc.a);\n"
02253         "       v4[2] = float4(clc.a, ccc.a, ccc.r, ccu.r);\n"
02254         "       v4[3] = float4(ccc.b, crc.b, ccc.g, ccu.g);\n"
02255         "       v6[0] = float4(cld.a, clc.a, ccd.a, ccc.a);\n"
02256         "       v6[1] = float4(ccd.b, ccc.b, crd.b, crc.b);\n"
02257         "       v6[2] = float4(clc.g, clu.g, ccc.g, ccu.g);\n"
02258         "       v6[3] = float4(ccc.r, ccu.r, crc.r, cru.r);\n"
02259         <<
02260         "       for(int i = 0; i < 4; i++)\n"
02261         "       {\n"
02262         "               if(key[i] == 1.0)\n"
02263         "               {\n"
02264         "                       bool4 test = cc[i]< max(v4[i], v6[i]); \n"
02265         "                       if(cc[i] < cu[i] || any(test.xy||test.zw))key[i] = 0.0; \n"
02266         "               }else if(key[i] == -1.0)\n"
02267         "               {\n"
02268         "                       bool4 test = cc[i]> min( v4[i], v6[i]); \n"
02269         "                       if(cc[i] > cu[i] || any(test.xy||test.zw))key[i] = 0.0; \n"
02270         "               }\n"
02271         "       }\n"
02272         "       if(all(key == 0.0)) return; \n"
02273         <<
02274         "       ccc = texRECT(texD, TexCC.xy);\n"
02275         "       clc = texRECT(texD, TexLC.xy);\n"
02276         "       crc = texRECT(texD, TexRC.xy);\n"
02277         "       ccd = texRECT(texD, TexCD.xy);\n"
02278         "       ccu = texRECT(texD, TexCU.xy);\n"
02279         "       cld = texRECT(texD, TexLD.xy);\n"
02280         "       clu = texRECT(texD, TexLU.xy);\n"
02281         "       crd = texRECT(texD, TexRD.xy);\n"
02282         "       cru = texRECT(texD, TexRU.xy);\n"
02283         "       float4  cd = ccc;\n"
02284         "       v5[0] = float4(clc.g, ccc.g, ccd.b, ccc.b);\n"
02285         "       v5[1] = float4(ccc.r, crc.r, ccd.a, ccc.a);\n"
02286         "       v5[2] = float4(clc.a, ccc.a, ccc.r, ccu.r);\n"
02287         "       v5[3] = float4(ccc.b, crc.b, ccc.g, ccu.g);\n"
02288         "       v6[0] = float4(cld.a, clc.a, ccd.a, ccc.a);\n"
02289         "       v6[1] = float4(ccd.b, ccc.b, crd.b, crc.b);\n"
02290         "       v6[2] = float4(clc.g, clu.g, ccc.g, ccu.g);\n"
02291         "       v6[3] = float4(ccc.r, ccu.r, crc.r, cru.r);\n"
02292         <<
02293         "       for(int i = 0; i < 4; i++)\n"
02294         "       {\n"
02295         "               if(key[i] == 1.0)\n"
02296         "               {\n"
02297         "                       bool4 test = cc[i]< max(v5[i], v6[i]);\n"
02298         "                       if(cc[i] < cd[i] || any(test.xy||test.zw))key[i] = 0.0; \n"
02299         "               }else if(key[i] == -1.0)\n"
02300         "               {\n"
02301         "                       bool4 test = cc[i]>min(v5[i],v6[i]);\n"
02302         "                       if(cc[i] > cd[i] || any(test.xy||test.zw))key[i] = 0.0; \n"
02303         "               }\n"
02304         "       }\n"
02305         "       float keysum = dot(abs(key), float4(1, 1, 1, 1)) ;\n"
02306         "       //assume there is only one keypoint in the four. \n"
02307         "       if(keysum != 1.0) return;       \n";
02308 
02310         if(GlobalUtil::_SubpixelLocalization)
02311 
02312         out <<
02313         "       float3 offset = float3(0, 0, 0); \n"
02314         "       /*The unrolled follwing loop is faster than a dynamic indexing version.*/\n"
02315         "       for(int idx = 1; idx < 4; idx++)\n"
02316         "       {\n"
02317         "               if(key[idx] != 0) \n"
02318         "               {\n"
02319         "                       cu[0] = cu[idx];        cd[0] = cd[idx];        cc[0] = cc[idx];        \n"
02320         "                       v4[0] = v4[idx];        v5[0] = v5[idx];                                                \n"
02321         "                       fxy[0] = fxy[idx];      fxx[0] = fxx[idx];      fyy[0] = fyy[idx];      \n"
02322         "                       fx[0] = fx[idx];        fy[0] = fy[idx];                                                \n"
02323         "               }\n"
02324         "       }\n"
02325         <<
02326                 
02327         "       float fs = 0.5*( cu[0] - cd[0] );                               \n"
02328         "       float fss = cu[0] + cd[0] - cc[0] - cc[0];\n"
02329         "       float fxs = 0.25 * (v4[0].y + v5[0].x - v4[0].x - v5[0].y);\n"
02330         "       float fys = 0.25 * (v4[0].w + v5[0].z - v4[0].z - v5[0].w);\n"
02331         "       float4 A0, A1, A2 ;                     \n"
02332         "       A0 = float4(fxx[0], fxy[0], fxs, -fx[0]);       \n"
02333         "       A1 = float4(fxy[0], fyy[0], fys, -fy[0]);       \n"
02334         "       A2 = float4(fxs, fys, fss, -fs);        \n"
02335         "       float3 x3 = abs(float3(fxx[0], fxy[0], fxs));           \n"
02336         "       float maxa = max(max(x3.x, x3.y), x3.z);        \n"
02337         "       if(maxa >= 1e-10 ) \n"
02338         "       {                                                                                               \n"
02339         "               if(x3.y ==maxa )                                                        \n"
02340         "               {                                                                                       \n"
02341         "                       float4 TEMP = A1; A1 = A0; A0 = TEMP;   \n"
02342         "               }else if( x3.z == maxa )                                        \n"
02343         "               {                                                                                       \n"
02344         "                       float4 TEMP = A2; A2 = A0; A0 = TEMP;   \n"
02345         "               }                                                                                       \n"
02346         "               A0 /= A0.x;                                                                     \n"
02347         "               A1 -= A1.x * A0;                                                        \n"
02348         "               A2 -= A2.x * A0;                                                        \n"
02349         "               float2 x2 = abs(float2(A1.y, A2.y));            \n"
02350         "               if( x2.y > x2.x )                                                       \n"
02351         "               {                                                                                       \n"
02352         "                       float3 TEMP = A2.yzw;                                   \n"
02353         "                       A2.yzw = A1.yzw;                                                \n"
02354         "                       A1.yzw = TEMP;                                                  \n"
02355         "                       x2.x = x2.y;                                                    \n"
02356         "               }                                                                                       \n"
02357         "               if(x2.x >= 1e-10) {                                                             \n"
02358         "                       A1.yzw /= A1.y;                                                         \n"
02359         "                       A2.yzw -= A2.y * A1.yzw;                                        \n"
02360         "                       if(abs(A2.z) >= 1e-10) {\n"
02361         "                               offset.z = A2.w /A2.z;                              \n"
02362         "                               offset.y = A1.w - offset.z*A1.z;                            \n"
02363         "                               offset.x = A0.w - offset.z*A0.z - offset.y*A0.y;        \n"
02364         "                               bool test = (abs(cc[0] + 0.5*dot(float3(fx[0], fy[0], fs), offset ))>THRESHOLD1) ;\n"
02365         "                               if(!test || any( abs(offset) >= 1.0)) return;\n"
02366         "                       }\n"
02367         "               }\n"
02368         "       }\n"
02369         <<"\n"
02370         "       float keyv = dot(key, float4(1.0, 2.0, 3.0, 4.0));\n"
02371         "       FragData0 = float4(keyv,  offset);\n"
02372         "}\n"   <<'\0';
02373 
02374         else out << "\n"
02375         "       float keyv = dot(key, float4(1.0, 2.0, 3.0, 4.0));\n"
02376         "       FragData0 =  float4(keyv, 0, 0, 0);\n"
02377         "}\n"   <<'\0';
02378 
02379         s_keypoint = program = new ProgramCG(buffer);
02380         //parameter
02381         _param_dog_texu = cgGetNamedParameter(*program, "texU");
02382         _param_dog_texd = cgGetNamedParameter(*program, "texD");
02383 }
02384 
02385 void ShaderBagPKCG::LoadOrientationShader()
02386 {
02387         char buffer[10240];
02388         ostrstream out(buffer,10240);
02389 
02390         out<<"\n"
02391         "#define GAUSSIAN_WF "<<GlobalUtil::_OrientationGaussianFactor<<" \n"
02392         "#define SAMPLE_WF ("<<GlobalUtil::_OrientationWindowFactor<< " )\n"
02393         "#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n"
02394         "void main(uniform samplerRECT tex,     uniform samplerRECT gtex,               \n"
02395         "               uniform samplerRECT otex,       uniform float4 size, in float2 TexCoord0 : TEXCOORD0,   \n"
02396         "               out float4 FeatureData : COLOR0 ";
02397 
02398         //multi orientation output
02399         //use one additional texture to store up to four orientations
02400         //when we use one 32bit float to store two orientations, no extra texture is required
02401 
02402         if(GlobalUtil::_MaxOrientation >1  && GlobalUtil::_OrientationPack2 == 0)
02403                 out<<", out float4 OrientationData : COLOR1";
02404 
02405 
02406         //use 9 float4 to store histogram of 36 directions
02407         out<<")         \n"
02408         "{                                                                                                      \n"
02409         "       float4 bins[10];                                                                \n"
02410         "       for (int i=0; i<9; i++) bins[i] = float4(0,0,0,0);      \n"
02411         "       float4 sift = texRECT(tex, TexCoord0);          \n"
02412         "       float2 pos = sift.xy; \n"
02413         "       bool orientation_mode = (size.z != 0);          \n"
02414         "       float sigma = orientation_mode? (abs(size.z) * pow(size.w, sift.w) * sift.z) : (sift.w); \n"
02415         "       //bool fixed_orientation = (size.z < 0);                \n"
02416         "       if(size.z < 0) {FeatureData = float4(pos, 0, sigma); return;}"
02417         "       float gsigma = sigma * GAUSSIAN_WF;                             \n"
02418         "       float2 win = abs(sigma.xx) * (SAMPLE_WF * GAUSSIAN_WF); \n"
02419         "       float2 dim = size.xy;                                                   \n"
02420         "       float4 dist_threshold = float4(win.x*win.x+0.5);        \n"
02421         "       float factor = -0.5/(gsigma*gsigma);                    \n"
02422         "       float4 sz;      float2 spos;                                            \n"
02423         "       //if(any(pos.xy <= 1)) discard;                                 \n"
02424         "       sz.xy = max( pos - win, float2(2,2));                   \n"
02425         "       sz.zw = min( pos + win, dim-3);                         \n"
02426         "       sz = floor(sz*0.5) + 0.5; ";
02427         //loop to get the histogram
02428 
02429         out<<"\n"
02430         "       for(spos.y = sz.y; spos.y <= sz.w;      spos.y+=1.0)                            \n"
02431         "       {                                                                                                                               \n"
02432         "               for(spos.x = sz.x; spos.x <= sz.z;      spos.x+=1.0)                    \n"
02433         "               {                                                                                                                       \n"
02434         "                       float2 offset = 2* spos - pos - 0.5;                                    \n"
02435         "                       float4 off = float4(offset, offset + 1);                                \n"
02436         "                       float4 distsq = off.xzxz * off.xzxz + off.yyww * off.yyww;      \n"
02437         "                       bool4 inside = distsq < dist_threshold;                                 \n"
02438         "                       if(any(inside.xy||inside.zw))                                                   \n"
02439         "                       {                                                                                                               \n"
02440         "                               float4 gg = texRECT(gtex, spos);                                        \n"
02441         "                               float4 oo = texRECT(otex, spos);                                        \n"
02442         "                               float4 weight = gg * exp(distsq * factor);                      \n"
02443         "                               float4 idxv  = floor(degrees(oo)*0.1);                          \n"
02444         "                               idxv = idxv<0? idxv + 36.0: idxv;                                       \n"
02445         "                               float4 vidx = 4.0* fract(idxv * 0.25);//fmod(idxv, 4.0);\n";
02446 
02447         //
02448         if(GlobalUtil::_UseDynamicIndexing && strcmp(cgGetProfileString(ProgramCG::_FProfile), "gp4fp")==0)
02449         //if(ProgramCG::_FProfile == CG_PROFILE_GPU_FP) this enumerant is not defined in cg1.5
02450         {
02451                 //gp4fp supports dynamic indexing, but it might be slow on some GPUs
02452                 out<<"\n"
02453         "                               for(int i = 0 ; i < 4; i++)\n"
02454         "                               {\n"
02455         "                                       if(inside[i])\n"
02456         "                                       {\n"
02457         "                                               float idx = idxv[i];                                                            \n"
02458         "                                               float4 inc = weight[i] * float4(vidx[i] == float4(0,1,2,3));    \n"
02459         "                                               int iidx = int(floor(idx*0.25));        \n"
02460         "                                               bins[iidx]+=inc;                                        \n"
02461         "                                       }                                                                               \n"
02462         "                               }                                                                                       \n"
02463         "                       }                                                                                               \n"
02464         "               }                                                                                                       \n"
02465         "       }";
02466 
02467         }else
02468         {
02469                 //nvfp40 still does not support dynamic array indexing
02470                 //unrolled binary search
02471                 //it seems to be faster than the dyanmic indexing version on some GPUs
02472                 out<<"\n"
02473         "                               for(int i = 0 ; i < 4; i++)\n"
02474         "                               {\n"
02475         "                                       if(inside[i])\n"
02476         "                                       {\n"
02477         "                                               float idx = idxv[i];                                                                                    \n"
02478         "                                               float4 inc = weight[i] * float4(vidx[i] == float4(0,1,2,3));    \n"
02479         "                                               if(idx < 16)                                                    \n"
02480         "                                               {                                                                               \n"
02481         "                                                       if(idx < 8)                                                     \n"
02482         "                                                       {                                                                       \n"
02483         "                                                               if(idx < 4)     {       bins[0]+=inc;}  \n"
02484         "                                                               else            {       bins[1]+=inc;}  \n"
02485         "                                                       }else                                                           \n"
02486         "                                                       {                                                                       \n"
02487         "                                                               if(idx < 12){   bins[2]+=inc;}  \n"
02488         "                                                               else            {       bins[3]+=inc;}  \n"
02489         "                                                       }                                                                       \n"
02490         "                                               }else if(idx < 32)                                              \n"
02491         "                                               {                                                                               \n"
02492         "                                                       if(idx < 24)                                            \n"
02493         "                                                       {                                                                       \n"
02494         "                                                               if(idx <20)     {       bins[4]+=inc;}  \n"
02495         "                                                               else            {       bins[5]+=inc;}  \n"
02496         "                                                       }else                                                           \n"
02497         "                                                       {                                                                       \n"
02498         "                                                               if(idx < 28){   bins[6]+=inc;}  \n"
02499         "                                                               else            {       bins[7]+=inc;}  \n"
02500         "                                                       }                                                                       \n"
02501         "                                               }else                                           \n"
02502         "                                               {                                                                               \n"
02503         "                                                       bins[8]+=inc;                                           \n"
02504         "                                               }                                                                               \n"
02505         "                                       }                                                                                       \n"
02506         "                               }                                                                                               \n"
02507         "                       }                                                                               \n"
02508         "               }                                                                                       \n"
02509         "       }";
02510 
02511         }
02512 
02513         //reuse the code from the unpacked version..
02514         ShaderBagCG::WriteOrientationCodeToStream(out);
02515 
02516 
02517         ProgramCG * program;
02518         s_orientation = program = new ProgramCG(buffer);
02519         _param_orientation_gtex = cgGetNamedParameter(*program, "gtex");
02520         _param_orientation_otex = cgGetNamedParameter(*program, "otex");
02521         _param_orientation_size = cgGetNamedParameter(*program, "size");
02522 
02523 
02524 }
02525 
02526 void ShaderBagPKCG::LoadDescriptorShader()
02527 {
02528         GlobalUtil::_DescriptorPPT = 16;
02529         LoadDescriptorShaderF2();
02530 
02531 }
02532 
02533 void ShaderBagPKCG::LoadDescriptorShaderF2()
02534 {
02535         //one shader outpout 128/8 = 16 , each fragout encodes 4
02536         //const double twopi = 2.0*3.14159265358979323846;
02537         //const double rpi  = 8.0/twopi;
02538         char buffer[10240];
02539         ostrstream out(buffer, 10240);
02540 
02541         out<<setprecision(8);
02542 
02543         out<<"\n"
02544         "#define M_PI 3.14159265358979323846\n"
02545         "#define TWO_PI (2.0*M_PI)\n"
02546         "#define RPI 1.2732395447351626861510701069801\n"
02547         "#define WF size.z\n"
02548         "void main(uniform samplerRECT tex,             \n"
02549         "uniform        samplerRECT gtex,                               \n"
02550         "uniform samplerRECT otex,                              \n"
02551         "uniform float4         dsize,                          \n"
02552         "uniform float3         size,                           \n"
02553         "in             float2  TexCoord0 : TEXCOORD0,  \n"
02554         "out            float4  FragData0:COLOR0,               \n"
02555         "out            float4  FragData1:COLOR1)               \n"
02556         "{\n"
02557         "       float2 dim      = size.xy;      //image size                    \n"
02558         "       float index = dsize.x*floor(TexCoord0.y * 0.5) + TexCoord0.x;\n"
02559         "       float idx = 8.0 * frac(index * 0.125) + 8.0 * floor(2.0 * frac(TexCoord0.y * 0.5));             \n"
02560         "       index = floor(index*0.125)+ 0.49;  \n"
02561         "       float2 coord = floor( float2( fmod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
02562         "       float2 pos = texRECT(tex, coord).xy;            \n"
02563         "       if(any(pos.xy <= 1) || any(pos.xy >=dim-1)) "
02564         "       //discard;      \n"
02565         "       { FragData0 = FragData1 = float4(0.0); return; }\n"
02566         "       float anglef = texRECT(tex, coord).z;\n"
02567         "       if(anglef > M_PI) anglef -= TWO_PI;\n"
02568         "       float sigma = texRECT(tex, coord).w; \n"
02569         "       float spt  = abs(sigma * WF);   //default to be 3*sigma \n";
02570         //rotation
02571         out<<
02572         "       float4 cscs, rots;                                                              \n"
02573         "       sincos(anglef, cscs.y, cscs.x);                                 \n"
02574         "       cscs.zw = - cscs.xy;                                                    \n"
02575         "       rots = cscs /spt;                                                               \n"
02576         "       cscs *= spt; \n";
02577 
02578         //here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
02579         //and rots is  (cos, sin, -cos, -sin ) /(factor*sigma)
02580         //devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
02581         //To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
02582         out<<
02583         "       float4 temp; float2 pt, offsetpt;                               \n"
02584         "       /*the fraction part of idx is .5*/                      \n"
02585         "       offsetpt.x = 4.0 * fract(idx * 0.25) - 2.0;                             \n"
02586         "       offsetpt.y = floor(idx*0.25) - 1.5;                     \n"
02587         "       temp = cscs.xwyx*offsetpt.xyxy;                         \n"
02588         "       pt = pos + temp.xz + temp.yw;                           \n";
02589         
02590         //get a horizontal bounding box of the rotated rectangle
02591         out<<
02592         "       float2 bwin = abs(cscs.xy);                                     \n"
02593         "       float bsz = bwin.x + bwin.y;                                    \n"
02594         "       float4 sz;      float2 spos;                                    \n"
02595         "       sz.xy = max(pt - bsz, float2(2,2));\n"
02596         "       sz.zw = min(pt + bsz, dim - 3);         \n"
02597         "       sz = floor(sz * 0.5) + 0.5;"; //move sample point to pixel center
02598         //get voting for two box
02599 
02600         out<<"\n"
02601         "       float4 DA, DB;                                          \n"
02602         "       DA = DB  = float4(0, 0, 0, 0);          \n"
02603         "       float4 nox = float4(0, rots.xy, rots.x + rots.y);                                       \n"
02604         "       float4 noy = float4(0, rots.wx, rots.w + rots.x);                                       \n"
02605         "       for(spos.y = sz.y; spos.y <= sz.w;      spos.y+=1.0)                            \n"
02606         "       {                                                                                                                               \n"
02607         "               for(spos.x = sz.x; spos.x <= sz.z;      spos.x+=1.0)                    \n"
02608         "               {                                                                                                                       \n"
02609         "                       float2 tpt = spos * 2.0 - pt - 0.5;                                     \n"
02610         "                       float4 temp = rots.xywx * tpt.xyxy;                                             \n"
02611         "                       float2 temp2 = temp.xz + temp.yw;                                               \n"
02612         "                       float4 nx = temp2.x + nox;                                                              \n"
02613         "                       float4 ny = temp2.y + noy;                      \n"
02614         "                       float4 nxn = abs(nx), nyn = abs(ny);                                            \n"
02615         "                       bool4 inside = (max(nxn, nyn) < 1.0);   \n"
02616         "                       if(any(inside.xy || inside.zw))\n"
02617         "                       {\n"
02618         "                               float4 gg = texRECT(gtex, spos);\n"
02619         "                               float4 oo = texRECT(otex, spos);\n"
02620         "                               float4 theta0 = (anglef - oo)*RPI;\n"
02621         "                               float4 theta = theta0 < 0? theta0 + 8.0 : theta0;//8.0 * frac(1.0 + 0.125 * theta0);//          \n"
02622         "                               float4 theta1 = floor(theta); \n"
02623         "                               float4 diffx = nx + offsetpt.x, diffy = ny + offsetpt.y;        \n"
02624         "                               float4 ww = exp(-0.125 * (diffx * diffx + diffy * diffy ));     \n"
02625         "                               float4 weight = (1 - nxn) * (1 - nyn) * gg * ww; \n"
02626         "                               float4 weight2 = (theta - theta1) * weight;                                                     \n"
02627         "                               float4 weight1 = weight - weight2;                                              \n"
02628         "                               for(int i = 0;i < 4; i++)\n"
02629         "                               {\n"
02630         "                                       if(inside[i])\n"
02631         "                                       {\n"
02632         "                                               DA += float4(theta1[i] == float4(0, 1, 2, 3))*weight1[i];       \n"
02633         "                                               DA += float4(theta1[i] == float4(7, 0, 1, 2))*weight2[i];       \n"
02634         "                                               DB += float4(theta1[i] == float4(4, 5, 6, 7))*weight1[i];       \n"
02635         "                                               DB += float4(theta1[i] == float4(3, 4, 5, 6))*weight2[i];       \n"
02636         "                                       }\n"
02637         "                               }\n"
02638         "                       }\n"
02639         "               }\n"
02640         "       }\n";
02641         out<<
02642         "        FragData0 = DA; FragData1 = DB;\n"
02643         "}\n"<<'\0';
02644         ProgramCG * program; 
02645 
02646         s_descriptor_fp = program =  new ProgramCG(buffer);
02647         _param_descriptor_gtex = cgGetNamedParameter(*program, "gtex");
02648         _param_descriptor_otex = cgGetNamedParameter(*program, "otex");
02649         _param_descriptor_size = cgGetNamedParameter(*program, "size");
02650         _param_descriptor_dsize = cgGetNamedParameter(*program, "dsize");
02651 
02652 }
02653 
02654 void ShaderBagPKCG::SetMarginCopyParam(int xmax, int ymax)
02655 {
02656         float truncate[4];
02657         truncate[0] = (xmax - 0.5f) * 0.5f; //((xmax + 1)  >> 1) - 0.5f;
02658         truncate[1] = (ymax - 0.5f) * 0.5f; //((ymax + 1)  >> 1) - 0.5f;
02659         truncate[2] = (xmax %2 == 1)? 0.0f: 1.0f;
02660         truncate[3] = truncate[2] +  (((ymax % 2) == 1)? 0.0f : 2.0f);
02661         cgGLSetParameter4fv(_param_margin_copy_truncate, truncate);
02662 }
02663 
02664 void ShaderBagPKCG::SetGradPassParam(int texP)
02665 {
02666         cgGLSetTextureParameter(_param_grad_pass_texp, texP);
02667         cgGLEnableTextureParameter(_param_grad_pass_texp);
02668 }
02669 
02670 void ShaderBagPKCG::SetGenListEndParam(int ktex)
02671 {
02672         cgGLSetTextureParameter(_param_genlist_end_ktex, ktex);
02673         cgGLEnableTextureParameter(_param_genlist_end_ktex);
02674 }
02675 
02676 void ShaderBagPKCG::SetDogTexParam(int texU, int texD)
02677 {
02678         cgGLSetTextureParameter(_param_dog_texu, texU);
02679         cgGLEnableTextureParameter(_param_dog_texu);
02680         cgGLSetTextureParameter(_param_dog_texd, texD);
02681         cgGLEnableTextureParameter(_param_dog_texd);
02682 }
02683 
02684 void ShaderBagPKCG::SetGenListInitParam(int w, int h)
02685 {
02686         float bbox[4] = {(w -1.0f) * 0.5f +0.25f, (w-1.0f) * 0.5f - 0.25f,  (h - 1.0f) * 0.5f + 0.25f, (h-1.0f) * 0.5f - 0.25f};
02687         cgGLSetParameter4fv(_param_genlist_init_bbox, bbox);
02688 }
02689 
02690 
02691 void ShaderBagPKCG::SetGenListStartParam(float width, int tex0)
02692 {
02693         cgGLSetParameter1f(_param_ftex_width, width);
02694 
02695         if(_param_genlist_start_tex0)
02696         {
02697                 cgGLSetTextureParameter(_param_genlist_start_tex0, tex0);
02698                 cgGLEnableTextureParameter(_param_genlist_start_tex0);
02699         }
02700 }
02701 
02702 
02703 
02704 void ShaderBagPKCG::SetGenListStepParam(int tex, int tex0)
02705 {
02706         cgGLSetTextureParameter(_param_genlist_step_tex, tex);
02707         cgGLEnableTextureParameter(_param_genlist_step_tex);
02708         cgGLSetTextureParameter(_param_genlist_step_tex0, tex0);
02709         cgGLEnableTextureParameter(_param_genlist_step_tex0);
02710 }
02711 
02712 void ShaderBagPKCG::SetGenVBOParam(float width, float fwidth, float size)
02713 {
02714         float sizes[4] = {size*3.0f, fwidth, width, 1.0f/width};
02715         cgGLSetParameter4fv(_param_genvbo_size, sizes);
02716 }
02717 
02718 void ShaderBagPKCG::SetSimpleOrientationInput(int oTex, float sigma, float sigma_step)
02719 {
02720         cgGLSetTextureParameter(_param_orientation_gtex, oTex);
02721         cgGLEnableTextureParameter(_param_orientation_gtex);
02722         cgGLSetParameter2f(_param_orientation_size, sigma, sigma_step);
02723 }
02724 
02725 
02726 void ShaderBagPKCG::SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int otex, float step)
02727 {
02729         cgGLSetTextureParameter(_param_orientation_gtex, gtex); 
02730         cgGLEnableTextureParameter(_param_orientation_gtex);
02731         cgGLSetTextureParameter(_param_orientation_otex, otex); 
02732         cgGLEnableTextureParameter(_param_orientation_otex);
02733 
02734         float size[4];
02735         size[0] = (float)width;
02736         size[1] = (float)height;
02737         size[2] = sigma;
02738         size[3] = step;
02739         cgGLSetParameter4fv(_param_orientation_size, size);
02740 
02741 }
02742 
02743 void ShaderBagPKCG::SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth,  float width, float height, float sigma)
02744 {
02746 
02747         cgGLSetTextureParameter(_param_descriptor_gtex, gtex);  
02748         cgGLEnableTextureParameter(_param_descriptor_gtex);
02749         cgGLSetTextureParameter(_param_descriptor_otex, otex);  
02750         cgGLEnableTextureParameter(_param_descriptor_otex);
02751 
02752 
02753         float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth};
02754         cgGLSetParameter4fv(_param_descriptor_dsize, dsize);
02755         float size[3];
02756         size[0] = width;
02757         size[1] = height;
02758         size[2] = GlobalUtil::_DescriptorWindowFactor;
02759         cgGLSetParameter3fv(_param_descriptor_size, size);
02760 
02761 
02762 }
02763 
02764 #endif
02765 


siftgpu
Author(s): Changchang Wu
autogenerated on Wed Aug 26 2015 15:24:06