$search
00001 #include <opencl_utils.h> 00002 00003 #define HDASHLINE "-----------------------------------------------------------\n" 00004 00005 #include <fstream> 00006 #include <vector> 00007 #include <string.h> 00008 #include <iostream> 00009 #include <algorithm> 00010 #include <stdarg.h> 00011 #include <stdlib.h> 00012 00013 cl_context g_clDeviceContext; 00014 cl_command_queue g_clDeviceQueue; 00015 00016 cl_context oclGetGlobalContext( ) { 00017 return g_clDeviceContext; 00018 } 00019 00020 cl_command_queue oclGetGlobalQueue( ) { 00021 return g_clDeviceQueue; 00022 } 00023 00030 cl_int oclGetPlatformID(cl_platform_id* clSelectedPlatformID) 00031 { 00032 char chBuffer[1024]; 00033 cl_uint num_platforms; 00034 cl_platform_id* clPlatformIDs; 00035 cl_int ciErrNum; 00036 *clSelectedPlatformID = NULL; 00037 00038 // Get OpenCL platform count 00039 ciErrNum = clGetPlatformIDs (0, NULL, &num_platforms); 00040 if (ciErrNum != CL_SUCCESS) 00041 { 00042 printf(" Error %i in clGetPlatformIDs Call !!!\n\n", ciErrNum); 00043 return -1000; 00044 } 00045 else 00046 { 00047 if(num_platforms == 0) 00048 { 00049 printf("No OpenCL platform found!\n\n"); 00050 return -2000; 00051 } 00052 else 00053 { 00054 // if there's a platform or more, make space for ID's 00055 if ((clPlatformIDs = (cl_platform_id*)malloc(num_platforms * sizeof(cl_platform_id))) == NULL) 00056 { 00057 printf("Failed to allocate memory for cl_platform ID's!\n\n"); 00058 return -3000; 00059 } 00060 00061 // get platform info for each platform and trap the NVIDIA platform if found 00062 ciErrNum = clGetPlatformIDs (num_platforms, clPlatformIDs, NULL); 00063 for(cl_uint i = 0; i < num_platforms; ++i) 00064 { 00065 ciErrNum = clGetPlatformInfo (clPlatformIDs[i], CL_PLATFORM_NAME, 1024, &chBuffer, NULL); 00066 if(ciErrNum == CL_SUCCESS) 00067 { 00068 if(strstr(chBuffer, "NVIDIA") != NULL) 00069 { 00070 *clSelectedPlatformID = clPlatformIDs[i]; 00071 break; 00072 } 00073 } 00074 } 00075 00076 // default to zeroeth platform if NVIDIA not found 00077 if(*clSelectedPlatformID == NULL) 00078 { 00079 printf("WARNING: NVIDIA OpenCL platform not found - defaulting to first platform!\n\n"); 00080 *clSelectedPlatformID = clPlatformIDs[0]; 00081 } 00082 00083 free(clPlatformIDs); 00084 } 00085 } 00086 00087 return CL_SUCCESS; 00088 } 00089 00096 cl_device_id oclGetFirstDev(cl_context cxGPUContext) 00097 { 00098 size_t szParmDataBytes; 00099 cl_device_id* cdDevices; 00100 00101 // get the list of GPU devices associated with context 00102 clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes); 00103 cdDevices = (cl_device_id*) malloc(szParmDataBytes); 00104 00105 clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL); 00106 00107 cl_device_id first = cdDevices[0]; 00108 free(cdDevices); 00109 00110 return first; 00111 } 00112 00116 void oclInit( ) { 00117 cl_platform_id cpPlatform; 00118 cl_device_id cdDevice; 00119 cl_int ciErrNum; 00120 00121 ciErrNum = oclGetPlatformID(&cpPlatform); 00122 oclCheckError(ciErrNum, CL_SUCCESS); 00123 00124 ciErrNum = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 1, &cdDevice, NULL); 00125 oclCheckError(ciErrNum, CL_SUCCESS); 00126 00127 g_clDeviceContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ciErrNum); 00128 oclCheckError(ciErrNum, CL_SUCCESS); 00129 00130 g_clDeviceQueue = clCreateCommandQueue(g_clDeviceContext, cdDevice, 0, &ciErrNum); 00131 oclCheckError(ciErrNum, CL_SUCCESS); 00132 } 00133 00140 void oclShutdown(cl_context clContext, cl_command_queue clQueue) { 00141 cl_int ciErrNum; 00142 ciErrNum = clReleaseCommandQueue(clQueue); 00143 ciErrNum |= clReleaseContext(clContext); 00144 oclCheckError(ciErrNum, CL_SUCCESS); 00145 } 00146 00155 char* oclLoadProgSource(const char* cFilename, const char* cPreamble, size_t* szFinalLength) 00156 { 00157 // locals 00158 FILE* pFileStream = NULL; 00159 size_t szSourceLength; 00160 00161 // open the OpenCL source code file 00162 #ifdef _WIN32 // Windows version 00163 if(fopen_s(&pFileStream, cFilename, "rb") != 0) 00164 { 00165 return NULL; 00166 } 00167 #else // Linux version 00168 pFileStream = fopen(cFilename, "rb"); 00169 if(pFileStream == 0) 00170 { 00171 return NULL; 00172 } 00173 #endif 00174 00175 size_t szPreambleLength = strlen(cPreamble); 00176 00177 // get the length of the source code 00178 fseek(pFileStream, 0, SEEK_END); 00179 szSourceLength = ftell(pFileStream); 00180 fseek(pFileStream, 0, SEEK_SET); 00181 00182 // allocate a buffer for the source code string and read it in 00183 char* cSourceString = (char *)malloc(szSourceLength + szPreambleLength + 1); 00184 memcpy(cSourceString, cPreamble, szPreambleLength); 00185 if (fread((cSourceString) + szPreambleLength, szSourceLength, 1, pFileStream) != 1) 00186 { 00187 fclose(pFileStream); 00188 free(cSourceString); 00189 return 0; 00190 } 00191 00192 // close the file and return the total length of the combined (preamble + source) string 00193 fclose(pFileStream); 00194 if(szFinalLength != 0) 00195 { 00196 *szFinalLength = szSourceLength + szPreambleLength; 00197 } 00198 cSourceString[szSourceLength + szPreambleLength] = '\0'; 00199 00200 return cSourceString; 00201 } 00202 00211 void oclGetProgBinary( cl_program cpProgram, cl_device_id cdDevice, char** binary, size_t* length) 00212 { 00213 // Grab the number of devices associated witht the program 00214 cl_uint num_devices; 00215 clGetProgramInfo(cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_devices, NULL); 00216 00217 // Grab the device ids 00218 cl_device_id* devices = (cl_device_id*) malloc(num_devices * sizeof(cl_device_id)); 00219 clGetProgramInfo(cpProgram, CL_PROGRAM_DEVICES, num_devices * sizeof(cl_device_id), devices, 0); 00220 00221 // Grab the sizes of the binaries 00222 size_t* binary_sizes = (size_t*)malloc(num_devices * sizeof(size_t)); 00223 clGetProgramInfo(cpProgram, CL_PROGRAM_BINARY_SIZES, num_devices * sizeof(size_t), binary_sizes, NULL); 00224 00225 // Now get the binaries 00226 char** ptx_code = (char**) malloc(num_devices * sizeof(char*)); 00227 for( unsigned int i=0; i<num_devices; ++i) { 00228 ptx_code[i]= (char*)malloc(binary_sizes[i]); 00229 } 00230 clGetProgramInfo(cpProgram, CL_PROGRAM_BINARIES, 0, ptx_code, NULL); 00231 00232 // Find the index of the device of interest 00233 unsigned int idx = 0; 00234 while( idx<num_devices && devices[idx] != cdDevice ) ++idx; 00235 00236 // If it is associated prepare the result 00237 if( idx < num_devices ) 00238 { 00239 *binary = ptx_code[idx]; 00240 *length = binary_sizes[idx]; 00241 } 00242 00243 // Cleanup 00244 free( devices ); 00245 free( binary_sizes ); 00246 for( unsigned int i=0; i<num_devices; ++i) { 00247 if( i != idx ) free(ptx_code[i]); 00248 } 00249 free( ptx_code ); 00250 } 00251 00259 void oclLogPtx(cl_program cpProgram, cl_device_id cdDevice, const char* cPtxFileName) 00260 { 00261 // Grab the number of devices associated with the program 00262 cl_uint num_devices; 00263 clGetProgramInfo(cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_devices, NULL); 00264 00265 // Grab the device ids 00266 cl_device_id* devices = (cl_device_id*) malloc(num_devices * sizeof(cl_device_id)); 00267 clGetProgramInfo(cpProgram, CL_PROGRAM_DEVICES, num_devices * sizeof(cl_device_id), devices, 0); 00268 00269 // Grab the sizes of the binaries 00270 size_t* binary_sizes = (size_t*)malloc(num_devices * sizeof(size_t)); 00271 clGetProgramInfo(cpProgram, CL_PROGRAM_BINARY_SIZES, num_devices * sizeof(size_t), binary_sizes, NULL); 00272 00273 // Now get the binaries 00274 char** ptx_code = (char**)malloc(num_devices * sizeof(char*)); 00275 for( unsigned int i=0; i<num_devices; ++i) 00276 { 00277 ptx_code[i] = (char*)malloc(binary_sizes[i]); 00278 } 00279 clGetProgramInfo(cpProgram, CL_PROGRAM_BINARIES, 0, ptx_code, NULL); 00280 00281 // Find the index of the device of interest 00282 unsigned int idx = 0; 00283 while((idx < num_devices) && (devices[idx] != cdDevice)) 00284 { 00285 ++idx; 00286 } 00287 00288 // If the index is associated, log the result 00289 if(idx < num_devices) 00290 { 00291 00292 // if a separate filename is supplied, dump ptx there 00293 if (NULL != cPtxFileName) 00294 { 00295 printf("\nWriting ptx to separate file: %s ...\n\n", cPtxFileName); 00296 FILE* pFileStream = NULL; 00297 #ifdef _WIN32 00298 fopen_s(&pFileStream, cPtxFileName, "wb"); 00299 #else 00300 pFileStream = fopen(cPtxFileName, "wb"); 00301 #endif 00302 00303 fwrite(ptx_code[idx], binary_sizes[idx], 1, pFileStream); 00304 fclose(pFileStream); 00305 } 00306 else // log to logfile and console if no ptx file specified 00307 { 00308 printf("\n%s\nProgram Binary:\n%s\n%s\n", HDASHLINE, ptx_code[idx], HDASHLINE); 00309 } 00310 } 00311 00312 // Cleanup 00313 free(devices); 00314 free(binary_sizes); 00315 for(unsigned int i = 0; i < num_devices; ++i) 00316 { 00317 free(ptx_code[i]); 00318 } 00319 free( ptx_code ); 00320 } 00321 00328 void oclLogBuildInfo(cl_program cpProgram, cl_device_id cdDevice) 00329 { 00330 char *cBuildLog; 00331 size_t retValSize; 00332 00333 clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG, 0, NULL, &retValSize); 00334 cBuildLog = new char[retValSize+1]; 00335 00336 clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG, retValSize, cBuildLog, NULL); 00337 cBuildLog[retValSize] = '\0'; 00338 00339 printf("\n%s\nBUILD LOG:\n%s\n%s\n", HDASHLINE, cBuildLog, HDASHLINE); 00340 00341 delete[] cBuildLog; 00342 } 00343 00344 // Helper function to get OpenCL error string from constant 00345 // ********************************************************************* 00346 const char* oclErrorString(cl_int error) 00347 { 00348 static const char* errorString[] = { 00349 "CL_SUCCESS", 00350 "CL_DEVICE_NOT_FOUND", 00351 "CL_DEVICE_NOT_AVAILABLE", 00352 "CL_COMPILER_NOT_AVAILABLE", 00353 "CL_MEM_OBJECT_ALLOCATION_FAILURE", 00354 "CL_OUT_OF_RESOURCES", 00355 "CL_OUT_OF_HOST_MEMORY", 00356 "CL_PROFILING_INFO_NOT_AVAILABLE", 00357 "CL_MEM_COPY_OVERLAP", 00358 "CL_IMAGE_FORMAT_MISMATCH", 00359 "CL_IMAGE_FORMAT_NOT_SUPPORTED", 00360 "CL_BUILD_PROGRAM_FAILURE", 00361 "CL_MAP_FAILURE", 00362 "", 00363 "", 00364 "", 00365 "", 00366 "", 00367 "", 00368 "", 00369 "", 00370 "", 00371 "", 00372 "", 00373 "", 00374 "", 00375 "", 00376 "", 00377 "", 00378 "", 00379 "CL_INVALID_VALUE", 00380 "CL_INVALID_DEVICE_TYPE", 00381 "CL_INVALID_PLATFORM", 00382 "CL_INVALID_DEVICE", 00383 "CL_INVALID_CONTEXT", 00384 "CL_INVALID_QUEUE_PROPERTIES", 00385 "CL_INVALID_COMMAND_QUEUE", 00386 "CL_INVALID_HOST_PTR", 00387 "CL_INVALID_MEM_OBJECT", 00388 "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR", 00389 "CL_INVALID_IMAGE_SIZE", 00390 "CL_INVALID_SAMPLER", 00391 "CL_INVALID_BINARY", 00392 "CL_INVALID_BUILD_OPTIONS", 00393 "CL_INVALID_PROGRAM", 00394 "CL_INVALID_PROGRAM_EXECUTABLE", 00395 "CL_INVALID_KERNEL_NAME", 00396 "CL_INVALID_KERNEL_DEFINITION", 00397 "CL_INVALID_KERNEL", 00398 "CL_INVALID_ARG_INDEX", 00399 "CL_INVALID_ARG_VALUE", 00400 "CL_INVALID_ARG_SIZE", 00401 "CL_INVALID_KERNEL_ARGS", 00402 "CL_INVALID_WORK_DIMENSION", 00403 "CL_INVALID_WORK_GROUP_SIZE", 00404 "CL_INVALID_WORK_ITEM_SIZE", 00405 "CL_INVALID_GLOBAL_OFFSET", 00406 "CL_INVALID_EVENT_WAIT_LIST", 00407 "CL_INVALID_EVENT", 00408 "CL_INVALID_OPERATION", 00409 "CL_INVALID_GL_OBJECT", 00410 "CL_INVALID_BUFFER_SIZE", 00411 "CL_INVALID_MIP_LEVEL", 00412 "CL_INVALID_GLOBAL_WORK_SIZE", 00413 }; 00414 00415 const int errorCount = sizeof(errorString) / sizeof(errorString[0]); 00416 const int index = -error; 00417 00418 return (index >= 0 && index < errorCount) ? errorString[index] : "Unspecified Error"; 00419 } 00420 00421 char* oclFindFilePath(const char* filename, const char* executable_path) 00422 { 00423 // <executable_name> defines a variable that is replaced with the name of the executable 00424 00425 // Typical relative search paths to locate needed companion files (e.g. sample input data, or JIT source files) 00426 // The origin for the relative search may be the .exe file, a .bat file launching an .exe, a browser .exe launching the .exe or .bat, etc 00427 const char* searchPath[] = 00428 { 00429 "./", // same dir 00430 "./data/", // "/data/" subdir 00431 "./src/", // "/src/" subdir 00432 "./src/<executable_name>/data/", // "/src/<executable_name>/data/" subdir 00433 "./include/", // "/include/" subdir 00434 "./include/<executable_name>/", // "/include/<executable_name/" subdir 00435 "../", // up 1 in tree 00436 "../data/", // up 1 in tree, "/data/" subdir 00437 "../src/", // up 1 in tree, "/src/" subdir 00438 "../include/", // up 1 in tree, "/include/" subdir 00439 "../OpenCL/src/<executable_name>/", // up 1 in tree, "/OpenCL/src/<executable_name>/" subdir 00440 "../OpenCL/src/<executable_name>/data/", // up 1 in tree, "/OpenCL/src/<executable_name>/data/" subdir 00441 "../OpenCL/src/<executable_name>/src/", // up 1 in tree, "/OpenCL/src/<executable_name>/src/" subdir 00442 "../OpenCL/src/<executable_name>/inc/", // up 1 in tree, "/OpenCL/src/<executable_name>/inc/" subdir 00443 "../C/src/<executable_name>/", // up 1 in tree, "/C/src/<executable_name>/" subdir 00444 "../C/src/<executable_name>/data/", // up 1 in tree, "/C/src/<executable_name>/data/" subdir 00445 "../C/src/<executable_name>/src/", // up 1 in tree, "/C/src/<executable_name>/src/" subdir 00446 "../C/src/<executable_name>/inc/", // up 1 in tree, "/C/src/<executable_name>/inc/" subdir 00447 "../../", // up 2 in tree 00448 "../../data/", // up 2 in tree, "/data/" subdir 00449 "../../src/", // up 2 in tree, "/src/" subdir 00450 "../../inc/", // up 2 in tree, "/inc/" subdir 00451 "../../../", // up 3 in tree 00452 "../../../src/<executable_name>/", // up 3 in tree, "/src/<executable_name>/" subdir 00453 "../../../src/<executable_name>/data/", // up 3 in tree, "/src/<executable_name>/data/" subdir 00454 "../../../src/<executable_name>/src/", // up 3 in tree, "/src/<executable_name>/src/" subdir 00455 "../../../src/<executable_name>/inc/", // up 3 in tree, "/src/<executable_name>/inc/" subdir 00456 "../../../sandbox/<executable_name>/", // up 3 in tree, "/sandbox/<executable_name>/" subdir 00457 "../../../sandbox/<executable_name>/data/", // up 3 in tree, "/sandbox/<executable_name>/data/" subdir 00458 "../../../sandbox/<executable_name>/src/", // up 3 in tree, "/sandbox/<executable_name>/src/" subdir 00459 "../../../sandbox/<executable_name>/inc/" // up 3 in tree, "/sandbox/<executable_name>/inc/" subdir 00460 }; 00461 00462 // Extract the executable name 00463 std::string executable_name; 00464 if (executable_path != 0) 00465 { 00466 executable_name = std::string(executable_path); 00467 00468 #ifdef _WIN32 00469 // Windows path delimiter 00470 size_t delimiter_pos = executable_name.find_last_of('\\'); 00471 executable_name.erase(0, delimiter_pos + 1); 00472 00473 if (executable_name.rfind(".exe") != string::npos) 00474 { 00475 // we strip .exe, only if the .exe is found 00476 executable_name.resize(executable_name.size() - 4); 00477 } 00478 #else 00479 // Linux & OSX path delimiter 00480 size_t delimiter_pos = executable_name.find_last_of('/'); 00481 executable_name.erase(0,delimiter_pos+1); 00482 #endif 00483 00484 } 00485 00486 // Loop over all search paths and return the first hit 00487 for( unsigned int i = 0; i < sizeof(searchPath)/sizeof(char*); ++i ) 00488 { 00489 std::string path(searchPath[i]); 00490 size_t executable_name_pos = path.find("<executable_name>"); 00491 00492 // If there is executable_name variable in the searchPath 00493 // replace it with the value 00494 if(executable_name_pos != std::string::npos) 00495 { 00496 if(executable_path != 0) 00497 { 00498 path.replace(executable_name_pos, strlen("<executable_name>"), executable_name); 00499 00500 } 00501 else 00502 { 00503 // Skip this path entry if no executable argument is given 00504 continue; 00505 } 00506 } 00507 00508 // Test if the file exists 00509 path.append(filename); 00510 std::fstream fh(path.c_str(), std::fstream::in); 00511 if (fh.good()) 00512 { 00513 // File found 00514 // returning an allocated array here for backwards compatibility reasons 00515 char* file_path = (char*) malloc(path.length() + 1); 00516 #ifdef _WIN32 00517 strcpy_s(file_path, path.length() + 1, path.c_str()); 00518 #else 00519 strcpy(file_path, path.c_str()); 00520 #endif 00521 return file_path; 00522 } 00523 } 00524 00525 // File not found 00526 return 0; 00527 }