opencl_utils.cpp
Go to the documentation of this file.
00001 #include <opencl_utils.h>
00002 
00003 #define HDASHLINE "-----------------------------------------------------------\n"
00004 
00005 #include <fstream>
00006 #include <vector>
00007 #include <string.h>
00008 #include <iostream>
00009 #include <algorithm>
00010 #include <stdarg.h>
00011 #include <stdlib.h>
00012 
00013 cl_context g_clDeviceContext;
00014 cl_command_queue g_clDeviceQueue;
00015 
00016 cl_context oclGetGlobalContext( ) {
00017   return g_clDeviceContext;
00018 }
00019 
00020 cl_command_queue oclGetGlobalQueue( ) {
00021   return g_clDeviceQueue;
00022 }
00023 
00030 cl_int oclGetPlatformID(cl_platform_id* clSelectedPlatformID)
00031 {
00032   char chBuffer[1024];
00033   cl_uint num_platforms;
00034   cl_platform_id* clPlatformIDs;
00035   cl_int ciErrNum;
00036   *clSelectedPlatformID = NULL;
00037 
00038   // Get OpenCL platform count
00039   ciErrNum = clGetPlatformIDs (0, NULL, &num_platforms);
00040   if (ciErrNum != CL_SUCCESS)
00041   {
00042     printf(" Error %i in clGetPlatformIDs Call !!!\n\n", ciErrNum);
00043     return -1000;
00044   }
00045   else
00046   {
00047     if(num_platforms == 0)
00048     {
00049       printf("No OpenCL platform found!\n\n");
00050       return -2000;
00051     }
00052     else
00053     {
00054       // if there's a platform or more, make space for ID's
00055       if ((clPlatformIDs = (cl_platform_id*)malloc(num_platforms * sizeof(cl_platform_id))) == NULL)
00056       {
00057         printf("Failed to allocate memory for cl_platform ID's!\n\n");
00058         return -3000;
00059       }
00060 
00061       // get platform info for each platform and trap the NVIDIA platform if found
00062       ciErrNum = clGetPlatformIDs (num_platforms, clPlatformIDs, NULL);
00063       for(cl_uint i = 0; i < num_platforms; ++i)
00064       {
00065         ciErrNum = clGetPlatformInfo (clPlatformIDs[i], CL_PLATFORM_NAME, 1024, &chBuffer, NULL);
00066         if(ciErrNum == CL_SUCCESS)
00067         {
00068           if(strstr(chBuffer, "NVIDIA") != NULL)
00069           {
00070             *clSelectedPlatformID = clPlatformIDs[i];
00071             break;
00072           }
00073         }
00074       }
00075 
00076       // default to zeroeth platform if NVIDIA not found
00077       if(*clSelectedPlatformID == NULL)
00078       {
00079         printf("WARNING: NVIDIA OpenCL platform not found - defaulting to first platform!\n\n");
00080         *clSelectedPlatformID = clPlatformIDs[0];
00081       }
00082 
00083       free(clPlatformIDs);
00084     }
00085   }
00086 
00087   return CL_SUCCESS;
00088 }
00089 
00096 cl_device_id oclGetFirstDev(cl_context cxGPUContext)
00097 {
00098     size_t szParmDataBytes;
00099     cl_device_id* cdDevices;
00100 
00101     // get the list of GPU devices associated with context
00102     clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
00103     cdDevices = (cl_device_id*) malloc(szParmDataBytes);
00104 
00105     clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
00106 
00107     cl_device_id first = cdDevices[0];
00108     free(cdDevices);
00109 
00110     return first;
00111 }
00112 
00116 void oclInit( ) {
00117   cl_platform_id cpPlatform;
00118   cl_device_id cdDevice;
00119   cl_int ciErrNum;
00120 
00121   ciErrNum = oclGetPlatformID(&cpPlatform);
00122   oclCheckError(ciErrNum, CL_SUCCESS);
00123 
00124   ciErrNum = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 1, &cdDevice, NULL);
00125   oclCheckError(ciErrNum, CL_SUCCESS);
00126 
00127   g_clDeviceContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ciErrNum);
00128   oclCheckError(ciErrNum, CL_SUCCESS);
00129 
00130   g_clDeviceQueue = clCreateCommandQueue(g_clDeviceContext, cdDevice, 0, &ciErrNum);
00131   oclCheckError(ciErrNum, CL_SUCCESS);
00132 }
00133 
00140 void oclShutdown(cl_context clContext, cl_command_queue clQueue) {
00141   cl_int ciErrNum;
00142   ciErrNum  = clReleaseCommandQueue(clQueue);
00143   ciErrNum |= clReleaseContext(clContext);
00144   oclCheckError(ciErrNum, CL_SUCCESS);
00145 }
00146 
00155 char* oclLoadProgSource(const char* cFilename, const char* cPreamble, size_t* szFinalLength)
00156 {
00157   // locals
00158   FILE* pFileStream = NULL;
00159   size_t szSourceLength;
00160 
00161   // open the OpenCL source code file
00162 #ifdef _WIN32   // Windows version
00163   if(fopen_s(&pFileStream, cFilename, "rb") != 0)
00164   {
00165     return NULL;
00166   }
00167 #else           // Linux version
00168   pFileStream = fopen(cFilename, "rb");
00169   if(pFileStream == 0)
00170   {
00171     return NULL;
00172   }
00173 #endif
00174 
00175   size_t szPreambleLength = strlen(cPreamble);
00176 
00177   // get the length of the source code
00178   fseek(pFileStream, 0, SEEK_END);
00179   szSourceLength = ftell(pFileStream);
00180   fseek(pFileStream, 0, SEEK_SET);
00181 
00182   // allocate a buffer for the source code string and read it in
00183   char* cSourceString = (char *)malloc(szSourceLength + szPreambleLength + 1);
00184   memcpy(cSourceString, cPreamble, szPreambleLength);
00185   if (fread((cSourceString) + szPreambleLength, szSourceLength, 1, pFileStream) != 1)
00186   {
00187     fclose(pFileStream);
00188     free(cSourceString);
00189     return 0;
00190   }
00191 
00192   // close the file and return the total length of the combined (preamble + source) string
00193   fclose(pFileStream);
00194   if(szFinalLength != 0)
00195   {
00196     *szFinalLength = szSourceLength + szPreambleLength;
00197   }
00198   cSourceString[szSourceLength + szPreambleLength] = '\0';
00199 
00200   return cSourceString;
00201 }
00202 
00211 void oclGetProgBinary( cl_program cpProgram, cl_device_id cdDevice, char** binary, size_t* length)
00212 {
00213   // Grab the number of devices associated witht the program
00214   cl_uint num_devices;
00215   clGetProgramInfo(cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_devices, NULL);
00216 
00217   // Grab the device ids
00218   cl_device_id* devices = (cl_device_id*) malloc(num_devices * sizeof(cl_device_id));
00219   clGetProgramInfo(cpProgram, CL_PROGRAM_DEVICES, num_devices * sizeof(cl_device_id), devices, 0);
00220 
00221   // Grab the sizes of the binaries
00222   size_t* binary_sizes = (size_t*)malloc(num_devices * sizeof(size_t));
00223   clGetProgramInfo(cpProgram, CL_PROGRAM_BINARY_SIZES, num_devices * sizeof(size_t), binary_sizes, NULL);
00224 
00225   // Now get the binaries
00226   char** ptx_code = (char**) malloc(num_devices * sizeof(char*));
00227   for( unsigned int i=0; i<num_devices; ++i) {
00228     ptx_code[i]= (char*)malloc(binary_sizes[i]);
00229   }
00230   clGetProgramInfo(cpProgram, CL_PROGRAM_BINARIES, 0, ptx_code, NULL);
00231 
00232   // Find the index of the device of interest
00233   unsigned int idx = 0;
00234   while( idx<num_devices && devices[idx] != cdDevice ) ++idx;
00235 
00236   // If it is associated prepare the result
00237   if( idx < num_devices )
00238   {
00239     *binary = ptx_code[idx];
00240     *length = binary_sizes[idx];
00241   }
00242 
00243   // Cleanup
00244   free( devices );
00245   free( binary_sizes );
00246   for( unsigned int i=0; i<num_devices; ++i) {
00247     if( i != idx ) free(ptx_code[i]);
00248   }
00249   free( ptx_code );
00250 }
00251 
00259 void oclLogPtx(cl_program cpProgram, cl_device_id cdDevice, const char* cPtxFileName)
00260 {
00261   // Grab the number of devices associated with the program
00262   cl_uint num_devices;
00263   clGetProgramInfo(cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_devices, NULL);
00264 
00265   // Grab the device ids
00266   cl_device_id* devices = (cl_device_id*) malloc(num_devices * sizeof(cl_device_id));
00267   clGetProgramInfo(cpProgram, CL_PROGRAM_DEVICES, num_devices * sizeof(cl_device_id), devices, 0);
00268 
00269   // Grab the sizes of the binaries
00270   size_t* binary_sizes = (size_t*)malloc(num_devices * sizeof(size_t));
00271   clGetProgramInfo(cpProgram, CL_PROGRAM_BINARY_SIZES, num_devices * sizeof(size_t), binary_sizes, NULL);
00272 
00273   // Now get the binaries
00274   char** ptx_code = (char**)malloc(num_devices * sizeof(char*));
00275   for( unsigned int i=0; i<num_devices; ++i)
00276   {
00277     ptx_code[i] = (char*)malloc(binary_sizes[i]);
00278   }
00279   clGetProgramInfo(cpProgram, CL_PROGRAM_BINARIES, 0, ptx_code, NULL);
00280 
00281   // Find the index of the device of interest
00282   unsigned int idx = 0;
00283   while((idx < num_devices) && (devices[idx] != cdDevice))
00284   {
00285     ++idx;
00286   }
00287 
00288   // If the index is associated, log the result
00289   if(idx < num_devices)
00290   {
00291 
00292     // if a separate filename is supplied, dump ptx there
00293     if (NULL != cPtxFileName)
00294     {
00295       printf("\nWriting ptx to separate file: %s ...\n\n", cPtxFileName);
00296       FILE* pFileStream = NULL;
00297 #ifdef _WIN32
00298       fopen_s(&pFileStream, cPtxFileName, "wb");
00299 #else
00300       pFileStream = fopen(cPtxFileName, "wb");
00301 #endif
00302 
00303       fwrite(ptx_code[idx], binary_sizes[idx], 1, pFileStream);
00304       fclose(pFileStream);
00305     }
00306     else // log to logfile and console if no ptx file specified
00307     {
00308       printf("\n%s\nProgram Binary:\n%s\n%s\n", HDASHLINE, ptx_code[idx], HDASHLINE);
00309     }
00310   }
00311 
00312   // Cleanup
00313   free(devices);
00314   free(binary_sizes);
00315   for(unsigned int i = 0; i < num_devices; ++i)
00316   {
00317     free(ptx_code[i]);
00318   }
00319   free( ptx_code );
00320 }
00321 
00328 void oclLogBuildInfo(cl_program cpProgram, cl_device_id cdDevice)
00329 {
00330   char *cBuildLog;
00331   size_t retValSize;
00332 
00333   clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG, 0, NULL, &retValSize);
00334   cBuildLog = new char[retValSize+1];
00335 
00336   clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG, retValSize, cBuildLog, NULL);
00337   cBuildLog[retValSize] = '\0';
00338 
00339   printf("\n%s\nBUILD LOG:\n%s\n%s\n", HDASHLINE, cBuildLog, HDASHLINE);
00340 
00341   delete[] cBuildLog;
00342 }
00343 
00344 // Helper function to get OpenCL error string from constant
00345 // *********************************************************************
00346 const char* oclErrorString(cl_int error)
00347 {
00348   static const char* errorString[] = {
00349     "CL_SUCCESS",
00350     "CL_DEVICE_NOT_FOUND",
00351     "CL_DEVICE_NOT_AVAILABLE",
00352     "CL_COMPILER_NOT_AVAILABLE",
00353     "CL_MEM_OBJECT_ALLOCATION_FAILURE",
00354     "CL_OUT_OF_RESOURCES",
00355     "CL_OUT_OF_HOST_MEMORY",
00356     "CL_PROFILING_INFO_NOT_AVAILABLE",
00357     "CL_MEM_COPY_OVERLAP",
00358     "CL_IMAGE_FORMAT_MISMATCH",
00359     "CL_IMAGE_FORMAT_NOT_SUPPORTED",
00360     "CL_BUILD_PROGRAM_FAILURE",
00361     "CL_MAP_FAILURE",
00362     "",
00363     "",
00364     "",
00365     "",
00366     "",
00367     "",
00368     "",
00369     "",
00370     "",
00371     "",
00372     "",
00373     "",
00374     "",
00375     "",
00376     "",
00377     "",
00378     "",
00379     "CL_INVALID_VALUE",
00380     "CL_INVALID_DEVICE_TYPE",
00381     "CL_INVALID_PLATFORM",
00382     "CL_INVALID_DEVICE",
00383     "CL_INVALID_CONTEXT",
00384     "CL_INVALID_QUEUE_PROPERTIES",
00385     "CL_INVALID_COMMAND_QUEUE",
00386     "CL_INVALID_HOST_PTR",
00387     "CL_INVALID_MEM_OBJECT",
00388     "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR",
00389     "CL_INVALID_IMAGE_SIZE",
00390     "CL_INVALID_SAMPLER",
00391     "CL_INVALID_BINARY",
00392     "CL_INVALID_BUILD_OPTIONS",
00393     "CL_INVALID_PROGRAM",
00394     "CL_INVALID_PROGRAM_EXECUTABLE",
00395     "CL_INVALID_KERNEL_NAME",
00396     "CL_INVALID_KERNEL_DEFINITION",
00397     "CL_INVALID_KERNEL",
00398     "CL_INVALID_ARG_INDEX",
00399     "CL_INVALID_ARG_VALUE",
00400     "CL_INVALID_ARG_SIZE",
00401     "CL_INVALID_KERNEL_ARGS",
00402     "CL_INVALID_WORK_DIMENSION",
00403     "CL_INVALID_WORK_GROUP_SIZE",
00404     "CL_INVALID_WORK_ITEM_SIZE",
00405     "CL_INVALID_GLOBAL_OFFSET",
00406     "CL_INVALID_EVENT_WAIT_LIST",
00407     "CL_INVALID_EVENT",
00408     "CL_INVALID_OPERATION",
00409     "CL_INVALID_GL_OBJECT",
00410     "CL_INVALID_BUFFER_SIZE",
00411     "CL_INVALID_MIP_LEVEL",
00412     "CL_INVALID_GLOBAL_WORK_SIZE",
00413   };
00414 
00415   const int errorCount = sizeof(errorString) / sizeof(errorString[0]);
00416   const int index = -error;
00417 
00418   return (index >= 0 && index < errorCount) ? errorString[index] : "Unspecified Error";
00419 }
00420 
00421 char* oclFindFilePath(const char* filename, const char* executable_path)
00422 {
00423   // <executable_name> defines a variable that is replaced with the name of the executable
00424 
00425   // Typical relative search paths to locate needed companion files (e.g. sample input data, or JIT source files)
00426   // The origin for the relative search may be the .exe file, a .bat file launching an .exe, a browser .exe launching the .exe or .bat, etc
00427   const char* searchPath[] =
00428       {
00429         "./",                                       // same dir
00430         "./data/",                                  // "/data/" subdir
00431         "./src/",                                   // "/src/" subdir
00432         "./src/<executable_name>/data/",            // "/src/<executable_name>/data/" subdir
00433         "./include/",                               // "/include/" subdir
00434         "./include/<executable_name>/",             // "/include/<executable_name/" subdir
00435         "../",                                      // up 1 in tree
00436         "../data/",                                 // up 1 in tree, "/data/" subdir
00437         "../src/",                                  // up 1 in tree, "/src/" subdir
00438         "../include/",                              // up 1 in tree, "/include/" subdir
00439         "../OpenCL/src/<executable_name>/",         // up 1 in tree, "/OpenCL/src/<executable_name>/" subdir
00440         "../OpenCL/src/<executable_name>/data/",    // up 1 in tree, "/OpenCL/src/<executable_name>/data/" subdir
00441         "../OpenCL/src/<executable_name>/src/",     // up 1 in tree, "/OpenCL/src/<executable_name>/src/" subdir
00442         "../OpenCL/src/<executable_name>/inc/",     // up 1 in tree, "/OpenCL/src/<executable_name>/inc/" subdir
00443         "../C/src/<executable_name>/",              // up 1 in tree, "/C/src/<executable_name>/" subdir
00444         "../C/src/<executable_name>/data/",         // up 1 in tree, "/C/src/<executable_name>/data/" subdir
00445         "../C/src/<executable_name>/src/",          // up 1 in tree, "/C/src/<executable_name>/src/" subdir
00446         "../C/src/<executable_name>/inc/",          // up 1 in tree, "/C/src/<executable_name>/inc/" subdir
00447         "../../",                                   // up 2 in tree
00448         "../../data/",                              // up 2 in tree, "/data/" subdir
00449         "../../src/",                               // up 2 in tree, "/src/" subdir
00450         "../../inc/",                               // up 2 in tree, "/inc/" subdir
00451         "../../../",                                // up 3 in tree
00452         "../../../src/<executable_name>/",          // up 3 in tree, "/src/<executable_name>/" subdir
00453         "../../../src/<executable_name>/data/",     // up 3 in tree, "/src/<executable_name>/data/" subdir
00454         "../../../src/<executable_name>/src/",      // up 3 in tree, "/src/<executable_name>/src/" subdir
00455         "../../../src/<executable_name>/inc/",      // up 3 in tree, "/src/<executable_name>/inc/" subdir
00456         "../../../sandbox/<executable_name>/",      // up 3 in tree, "/sandbox/<executable_name>/" subdir
00457         "../../../sandbox/<executable_name>/data/", // up 3 in tree, "/sandbox/<executable_name>/data/" subdir
00458         "../../../sandbox/<executable_name>/src/",  // up 3 in tree, "/sandbox/<executable_name>/src/" subdir
00459         "../../../sandbox/<executable_name>/inc/"   // up 3 in tree, "/sandbox/<executable_name>/inc/" subdir
00460       };
00461 
00462   // Extract the executable name
00463   std::string executable_name;
00464   if (executable_path != 0)
00465   {
00466     executable_name = std::string(executable_path);
00467 
00468 #ifdef _WIN32
00469     // Windows path delimiter
00470     size_t delimiter_pos = executable_name.find_last_of('\\');
00471     executable_name.erase(0, delimiter_pos + 1);
00472 
00473     if (executable_name.rfind(".exe") != string::npos)
00474     {
00475       // we strip .exe, only if the .exe is found
00476       executable_name.resize(executable_name.size() - 4);
00477     }
00478 #else
00479     // Linux & OSX path delimiter
00480     size_t delimiter_pos = executable_name.find_last_of('/');
00481     executable_name.erase(0,delimiter_pos+1);
00482 #endif
00483 
00484   }
00485 
00486   // Loop over all search paths and return the first hit
00487   for( unsigned int i = 0; i < sizeof(searchPath)/sizeof(char*); ++i )
00488   {
00489     std::string path(searchPath[i]);
00490     size_t executable_name_pos = path.find("<executable_name>");
00491 
00492     // If there is executable_name variable in the searchPath
00493     // replace it with the value
00494     if(executable_name_pos != std::string::npos)
00495     {
00496       if(executable_path != 0)
00497       {
00498         path.replace(executable_name_pos, strlen("<executable_name>"), executable_name);
00499 
00500       }
00501       else
00502       {
00503         // Skip this path entry if no executable argument is given
00504         continue;
00505       }
00506     }
00507 
00508     // Test if the file exists
00509     path.append(filename);
00510     std::fstream fh(path.c_str(), std::fstream::in);
00511     if (fh.good())
00512     {
00513       // File found
00514       // returning an allocated array here for backwards compatibility reasons
00515       char* file_path = (char*) malloc(path.length() + 1);
00516 #ifdef _WIN32
00517       strcpy_s(file_path, path.length() + 1, path.c_str());
00518 #else
00519       strcpy(file_path, path.c_str());
00520 #endif
00521       return file_path;
00522     }
00523   }
00524 
00525   // File not found
00526   return 0;
00527 }


parallel_quickstep
Author(s): Jared Duke
autogenerated on Wed Apr 23 2014 10:23:51