// This file is part of BOINC. // http://boinc.berkeley.edu // Copyright (C) 2009 University of California // // BOINC is free software; you can redistribute it and/or modify it // under the terms of the GNU Lesser General Public License // as published by the Free Software Foundation, // either version 3 of the License, or (at your option) any later version. // // BOINC is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // See the GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License // along with BOINC. If not, see . // client-specific GPU code. Mostly GPU detection #define FAKENVIDIACUDA0 0 #define FAKE2NVIDIAOPENCLS 0 #define DEBUGFOROLIVER 1 #include "cpp.h" #ifdef _WIN32 #include "boinc_win.h" #ifndef SIM #include #endif #else #ifdef __APPLE__ // Suppress obsolete warning when building for OS 10.3.9 #define DLOPEN_NO_WARN #include #endif #include "config.h" #include #include #include #endif #include "coproc.h" #include "str_util.h" #include "util.h" #include "client_state.h" #include "client_msgs.h" using std::string; using std::vector; static bool in_vector(int n, vector& v) { for (unsigned int i=0; i ati_gpus; vector nvidia_gpus; void COPROCS::get( bool use_all, vector&descs, vector&warnings, vector& ignore_nvidia_dev, vector& ignore_ati_dev ) { #ifdef _WIN32 try { nvidia.get(use_all, descs, warnings, ignore_nvidia_dev); } catch (...) { warnings.push_back("Caught SIGSEGV in NVIDIA GPU detection"); } try { ati.get(use_all, descs, warnings, ignore_ati_dev); } catch (...) { warnings.push_back("Caught SIGSEGV in ATI GPU detection"); } try { get_opencl(use_all, descs, warnings, ignore_ati_dev, ignore_nvidia_dev); } catch (...) { warnings.push_back("Caught SIGSEGV in OpenCL detection"); } #else void (*old_sig)(int) = signal(SIGSEGV, segv_handler); if (setjmp(resume)) { warnings.push_back("Caught SIGSEGV in NVIDIA GPU detection"); } else { nvidia.get(use_all, descs, warnings, ignore_nvidia_dev); } #ifndef __APPLE__ // ATI does not yet support CAL on Macs if (setjmp(resume)) { warnings.push_back("Caught SIGSEGV in ATI GPU detection"); } else { ati.get(use_all, descs, warnings, ignore_ati_dev); } #endif if (setjmp(resume)) { warnings.push_back("Caught SIGSEGV in OpenCL detection"); } else { get_opencl(use_all, descs, warnings, ignore_ati_dev, ignore_nvidia_dev); } signal(SIGSEGV, old_sig); #endif ati_gpus.clear(); nvidia_gpus.clear(); } ////////////////// OPENCL STARTS HERE ///////////////// // #ifdef _WIN32 HMODULE opencl_lib = NULL; typedef cl_int (__stdcall *CL_PLATFORMIDS) (cl_uint, cl_platform_id*, cl_uint*); typedef cl_int (__stdcall *CL_PLATFORMINFO) (cl_platform_id, cl_platform_info, size_t, void*, size_t*); typedef cl_int (__stdcall *CL_DEVICEIDS)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*); typedef cl_int (__stdcall *CL_INFO) (cl_device_id, cl_device_info, size_t, void*, size_t*); CL_PLATFORMIDS __clGetPlatformIDs = NULL; CL_PLATFORMINFO __clGetPlatformInfo = NULL; CL_DEVICEIDS __clGetDeviceIDs = NULL; CL_INFO __clGetDeviceInfo = NULL; #else void* opencl_lib = NULL; cl_int (*__clGetPlatformIDs)(cl_uint /* num_entries */, cl_platform_id * /* platforms */, cl_uint * /* num_platforms */); cl_int (*__clGetPlatformInfo)(cl_platform_id /* platform */, cl_platform_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */); cl_int (*__clGetDeviceIDs)(cl_platform_id /* platform */, cl_device_type /* device_type */, cl_uint /* num_entries */, cl_device_id * /* devices */, cl_uint * /* num_devices */); cl_int (*__clGetDeviceInfo)(cl_device_id /* device */, cl_device_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */); #endif // If "loose", tolerate small diff // int opencl_compare(OPENCL_DEVICE_PROP& c1, OPENCL_DEVICE_PROP& c2, bool loose) { if (c1.opencl_device_version_int > c2.opencl_device_version_int) return 1; if (c1.opencl_device_version_int < c2.opencl_device_version_int) return -1; if (loose) { if (c1.global_mem_size > 1.4*c2.global_mem_size) return 1; if (c1.global_mem_size < .7*c2.global_mem_size) return -1; return 0; } if (c1.global_mem_size > c2.global_mem_size) return 1; if (c1.global_mem_size < c2.global_mem_size) return -1; if (c1.peak_flops > c2.peak_flops) return 1; if (c1.peak_flops < c2.peak_flops) return -1; return 0; } // OpenCL interfaces are documented here: // http://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/ and // http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/ void COPROCS::get_opencl( bool use_all, vector& descs, vector& warnings, vector& ignore_ati_dev, vector& ignore_nvidia_dev ) { cl_int ciErrNum; cl_platform_id platforms[MAX_OPENCL_PLATFORMS]; cl_uint num_platforms, platform_index, num_devices, device_index; cl_device_id devices[MAX_COPROC_INSTANCES]; char platform_version[256]; OPENCL_DEVICE_PROP prop; vector nvidia_opencls; vector ati_opencls; COPROC_NVIDIA nvidia_temp; COPROC_ATI ati_temp; unsigned int i; int current_CUDA_index; char buf[256]; #ifdef _WIN32 opencl_lib = LoadLibrary("OpenCL.dll"); if (!opencl_lib) { warnings.push_back("No OpenCL library found"); return; } __clGetPlatformIDs = (CL_PLATFORMIDS)GetProcAddress( opencl_lib, "clGetPlatformIDs" ); __clGetPlatformInfo = (CL_PLATFORMINFO)GetProcAddress( opencl_lib, "clGetPlatformInfo" ); __clGetDeviceIDs = (CL_DEVICEIDS)GetProcAddress( opencl_lib, "clGetDeviceIDs" ); __clGetDeviceInfo = (CL_INFO)GetProcAddress( opencl_lib, "clGetDeviceInfo" ); #else #ifdef __APPLE__ opencl_lib = dlopen("/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL", RTLD_NOW); #else //TODO: Is this correct? opencl_lib = dlopen("libOpenCL.so", RTLD_NOW); #endif if (!opencl_lib) { warnings.push_back("No OpenCL library found"); return; } __clGetPlatformIDs = (cl_int(*)(cl_uint, cl_platform_id*, cl_uint*)) dlsym( opencl_lib, "clGetPlatformIDs" ); __clGetPlatformInfo = (cl_int(*)(cl_platform_id, cl_platform_info, size_t, void*, size_t*)) dlsym( opencl_lib, "clGetPlatformInfo" ); __clGetDeviceIDs = (cl_int(*)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*)) dlsym( opencl_lib, "clGetDeviceIDs" ); __clGetDeviceInfo = (cl_int(*)(cl_device_id, cl_device_info, size_t, void*, size_t*)) dlsym( opencl_lib, "clGetDeviceInfo" ); #endif if (!__clGetPlatformIDs) { warnings.push_back("clGetPlatformIDs() missing from OpenCL library"); return; } if (!__clGetPlatformInfo) { warnings.push_back("clGetPlatformInfo() missing from OpenCL library"); return; } if (!__clGetDeviceIDs) { warnings.push_back("clGetDeviceIDs() missing from OpenCL library"); return; } if (!__clGetDeviceInfo) { warnings.push_back("clGetDeviceInfo() missing from OpenCL library"); return; } ciErrNum = (*__clGetPlatformIDs)(MAX_OPENCL_PLATFORMS, platforms, &num_platforms); if ((ciErrNum != CL_SUCCESS) || (num_platforms == 0)) { warnings.push_back("clGetPlatformIDs() failed to return any OpenCL platforms"); return; } for (platform_index=0; platform_index= (int)(nvidia_gpus.size())) { if (log_flags.coproc_debug) { msg_printf(0, MSG_INFO, "[coproc] OpenCL NVIDIA index #%d does not match any CUDA device", device_index ); } return; // Should never happen } } prop.device_num = current_CUDA_index; prop.opencl_device_index = device_index; if (!nvidia.have_cuda) { COPROC_NVIDIA c; c.opencl_prop = prop; c.set_peak_flops(); prop.peak_flops = c.peak_flops; } if (nvidia_gpus.size()) { // Assumes OpenCL and CUDA return the devices in the same order prop.opencl_available_ram = nvidia_gpus[prop.device_num].available_ram; } else { prop.opencl_available_ram = prop.global_mem_size; } #if DEBUGFOROLIVER if (log_flags.coproc_debug) { msg_printf(0, MSG_INFO, "[coproc] OpenCL device %d: clGetDeviceInfo got available memory size %.0fMB", device_index, prop.opencl_available_ram/MEGA ); } #endif nvidia_opencls.push_back(prop); } if ((strstr(prop.vendor, GPU_TYPE_ATI)) || (strstr(prop.vendor, "AMD")) || (strstr(prop.vendor, "Advanced Micro Devices, Inc.")) ) { prop.device_num = (int)(ati_opencls.size()); prop.opencl_device_index = prop.device_num; #ifdef __APPLE__ // Work around a bug in OpenCL which returns only // 1/2 of total global RAM size. // This bug applies only to ATI GPUs, not to NVIDIA // This has already been fixed on latest Catalyst // drivers, but Mac does not use Catalyst drivers. // Assume this will be fixed in openCL 1.2. // See also further workaround code for systems with // CAL support. if ((!strstr("1.0", prop.opencl_platform_version)) || (!strstr("1.1", prop.opencl_platform_version)) ){ prop.global_mem_size *= 2; } #endif if (!ati.have_cal) { COPROC_ATI c; c.opencl_prop = prop; c.set_peak_flops(); prop.peak_flops = c.peak_flops; } if (ati_gpus.size()) { // Assumes OpenCL and CAL return the same device with the same index prop.opencl_available_ram = ati_gpus[prop.device_num].available_ram; } else { prop.opencl_available_ram = prop.global_mem_size; } ati_opencls.push_back(prop); } } } if ((nvidia_opencls.size() == 0) && (ati_opencls.size() == 0)) { warnings.push_back("OpenCL library present but no OpenCL-capable GPUs found"); return; } if (nvidia.have_cuda) { // If CUDA already found the "best" NVIDIA GPU nvidia.merge_opencl(nvidia_opencls, ignore_nvidia_dev); } else { nvidia.find_best_opencls(use_all, nvidia_opencls, ignore_nvidia_dev); nvidia.prop.totalGlobalMem = nvidia.opencl_prop.global_mem_size; nvidia.available_ram = nvidia.opencl_prop.global_mem_size; nvidia.prop.clockRate = nvidia.opencl_prop.max_clock_frequency * 1000; } #if DEBUGFOROLIVER if (log_flags.coproc_debug) { msg_printf(0, MSG_INFO, "[coproc] Creating descriptions for %d NVIDIA OpenCL devices", (int)nvidia_opencls.size() ); } #endif // Create descriptions for OpenCL NVIDIA GPUs // for (i=0; i&warnings ) { cl_int ciErrNum; char buf[256]; ciErrNum = (*__clGetDeviceInfo)(prop.device_id, CL_DEVICE_NAME, sizeof(prop.name), prop.name, NULL); if ((ciErrNum != CL_SUCCESS) || (prop.name[0] == 0)) { sprintf(buf, "clGetDeviceInfo failed to get name for GPU %d", (int)device_index); warnings.push_back(buf); return ciErrNum; } #if DEBUGFOROLIVER if (log_flags.coproc_debug) { msg_printf(0, MSG_INFO, "[coproc] OpenCL device %d: clGetDeviceInfo got name %s", device_index, prop.name ); } #endif ciErrNum = (*__clGetDeviceInfo)(prop.device_id, CL_DEVICE_VENDOR, sizeof(prop.vendor), prop.vendor, NULL); if ((ciErrNum != CL_SUCCESS) || (prop.vendor[0] == 0)) { sprintf(buf, "clGetDeviceInfo failed to get vendor for GPU %d", (int)device_index); warnings.push_back(buf); return ciErrNum; } #if DEBUGFOROLIVER if (log_flags.coproc_debug) { msg_printf(0, MSG_INFO, "[coproc] OpenCL device %d: clGetDeviceInfo got vendor %s", device_index, prop.vendor ); } #endif ciErrNum = (*__clGetDeviceInfo)(prop.device_id, CL_DEVICE_VENDOR_ID, sizeof(prop.vendor_id), &prop.vendor_id, NULL); if (ciErrNum != CL_SUCCESS) { sprintf(buf, "clGetDeviceInfo failed to get vendor ID for GPU %d", (int)device_index); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*__clGetDeviceInfo)(prop.device_id, CL_DEVICE_AVAILABLE, sizeof(prop.available), &prop.available, NULL); if (ciErrNum != CL_SUCCESS) { sprintf(buf, "clGetDeviceInfo failed to get availability for GPU %d", (int)device_index); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*__clGetDeviceInfo)( prop.device_id, CL_DEVICE_HALF_FP_CONFIG, sizeof(prop.half_fp_config), &prop.half_fp_config, NULL ); if (ciErrNum != CL_SUCCESS) { if ((ciErrNum == CL_INVALID_VALUE) || (ciErrNum == CL_INVALID_OPERATION)) { prop.half_fp_config = 0; // Not supported by OpenCL 1.0 } else { sprintf(buf, "clGetDeviceInfo failed to get half-precision floating point capabilities for GPU %d", (int)device_index); warnings.push_back(buf); return ciErrNum; } } ciErrNum = (*__clGetDeviceInfo)( prop.device_id, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(prop.single_fp_config), &prop.single_fp_config, NULL ); if (ciErrNum != CL_SUCCESS) { sprintf(buf, "clGetDeviceInfo failed to get single-precision floating point capabilities for GPU %d", (int)device_index); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*__clGetDeviceInfo)( prop.device_id, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(prop.double_fp_config), &prop.double_fp_config, NULL ); if (ciErrNum != CL_SUCCESS) { if ((ciErrNum == CL_INVALID_VALUE) || (ciErrNum == CL_INVALID_OPERATION)) { prop.double_fp_config = 0; // Not supported by OpenCL 1.0 } else { sprintf(buf, "clGetDeviceInfo failed to get double-precision floating point capabilities for GPU %d", (int)device_index); warnings.push_back(buf); return ciErrNum; } } ciErrNum = (*__clGetDeviceInfo)( prop.device_id, CL_DEVICE_ENDIAN_LITTLE, sizeof(prop.endian_little), &prop.endian_little, NULL ); if (ciErrNum != CL_SUCCESS) { sprintf(buf, "clGetDeviceInfo failed to get little or big endian for GPU %d", (int)device_index); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*__clGetDeviceInfo)( prop.device_id, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof(prop.execution_capabilities), &prop.execution_capabilities, NULL ); if (ciErrNum != CL_SUCCESS) { sprintf(buf, "clGetDeviceInfo failed to get execution capabilities for GPU %d", (int)device_index); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*__clGetDeviceInfo)( prop.device_id, CL_DEVICE_EXTENSIONS, sizeof(prop.extensions), prop.extensions, NULL ); if (ciErrNum != CL_SUCCESS) { sprintf(buf, "clGetDeviceInfo failed to get device extensions for GPU %d", (int)device_index); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*__clGetDeviceInfo)( prop.device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(prop.global_mem_size), &prop.global_mem_size, NULL ); if (ciErrNum != CL_SUCCESS) { sprintf(buf, "clGetDeviceInfo failed to get global memory size for GPU %d", (int)device_index); warnings.push_back(buf); return ciErrNum; } #if DEBUGFOROLIVER if (log_flags.coproc_debug) { msg_printf(0, MSG_INFO, "[coproc] OpenCL device %d: clGetDeviceInfo got global memory size %.0fMB", device_index, prop.global_mem_size/MEGA ); } #endif ciErrNum = (*__clGetDeviceInfo)( prop.device_id, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(prop.local_mem_size), &prop.local_mem_size, NULL ); if (ciErrNum != CL_SUCCESS) { sprintf(buf, "clGetDeviceInfo failed to get local memory size for GPU %d", (int)device_index); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*__clGetDeviceInfo)( prop.device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(prop.max_clock_frequency), &prop.max_clock_frequency, NULL ); if (ciErrNum != CL_SUCCESS) { sprintf(buf, "clGetDeviceInfo failed to get max clock frequency for GPU %d", (int)device_index); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*__clGetDeviceInfo)( prop.device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(prop.max_compute_units), &prop.max_compute_units, NULL ); if (ciErrNum != CL_SUCCESS) { sprintf(buf, "clGetDeviceInfo failed to get max compute units for GPU %d", (int)device_index); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*__clGetDeviceInfo)(prop.device_id, CL_DEVICE_VERSION, sizeof(prop.opencl_device_version), prop.opencl_device_version, NULL); if (ciErrNum != CL_SUCCESS) { sprintf(buf, "clGetDeviceInfo failed to get OpenCL version supported by GPU %d", (int)device_index); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*__clGetDeviceInfo)(prop.device_id, CL_DRIVER_VERSION, sizeof(prop.opencl_driver_version), prop.opencl_driver_version, NULL); if (ciErrNum != CL_SUCCESS) { sprintf(buf, "clGetDeviceInfo failed to get OpenCL driver version for GPU %d", (int)device_index); warnings.push_back(buf); return ciErrNum; } return CL_SUCCESS; } // This is called for ATI GPUs with CAL or NVIDIA GPUs with CUDA, to merge // the OpenCL info into the CAL or CUDA data for the "best" CAL or CUDA GPU. // This assumes that, for each GPU, we have previously correlated its CAL // or CUDA device_num with its opencl_device_index. void COPROC::merge_opencl( vector &opencls, vector& ignore_dev ) { unsigned int i, j; for (i=0; i &opencls, vector& ignore_dev ) { unsigned int i; // identify the most capable ATI or NVIDIA OpenCL GPU // bool first = true; for (i=0; i 0) { is_best = true; } if (is_best) { // fill in what info we have opencl_prop = opencls[i]; device_num = opencls[i].device_num; peak_flops = opencls[i].peak_flops; have_opencl = true; } } // see which other instances are equivalent, and set the count, // device_nums, opencl_device_count and opencl_device_ids fields // count = 0; opencl_device_count = 0; for (i=0; i c2.prop.major) return 1; if (c1.prop.major < c2.prop.major) return -1; if (c1.prop.minor > c2.prop.minor) return 1; if (c1.prop.minor < c2.prop.minor) return -1; if (c1.cuda_version > c2.cuda_version) return 1; if (c1.cuda_version < c2.cuda_version) return -1; if (loose) { if (c1.available_ram> 1.4*c2.available_ram) return 1; if (c1.available_ram < .7* c2.available_ram) return -1; return 0; } if (c1.available_ram > c2.available_ram) return 1; if (c1.available_ram < c2.available_ram) return -1; double s1 = c1.peak_flops; double s2 = c2.peak_flops; if (s1 > s2) return 1; if (s1 < s2) return -1; return 0; } enum CUdevice_attribute_enum { CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7, CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9, CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10, CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11, CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12, CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14, CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17, CU_DEVICE_ATTRIBUTE_INTEGRATED = 18, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20 }; #ifdef _WIN32 typedef int (__stdcall *CUDA_GDC)(int *count); typedef int (__stdcall *CUDA_GDV)(int* version); typedef int (__stdcall *CUDA_GDI)(int); typedef int (__stdcall *CUDA_GDG)(int*, int); typedef int (__stdcall *CUDA_GDA)(int*, int, int); typedef int (__stdcall *CUDA_GDN)(char*, int, int); typedef int (__stdcall *CUDA_GDM)(unsigned int*, int); typedef int (__stdcall *CUDA_GDCC)(int*, int*, int); typedef int (__stdcall *CUDA_CC)(void**, unsigned int, unsigned int); typedef int (__stdcall *CUDA_CD)(void*); typedef int (__stdcall *CUDA_MA)(unsigned int*, unsigned int); typedef int (__stdcall *CUDA_MF)(unsigned int); typedef int (__stdcall *CUDA_MGI)(unsigned int*, unsigned int*); CUDA_GDC __cuDeviceGetCount = NULL; CUDA_GDV __cuDriverGetVersion = NULL; CUDA_GDI __cuInit = NULL; CUDA_GDG __cuDeviceGet = NULL; CUDA_GDA __cuDeviceGetAttribute = NULL; CUDA_GDN __cuDeviceGetName = NULL; CUDA_GDM __cuDeviceTotalMem = NULL; CUDA_GDCC __cuDeviceComputeCapability = NULL; CUDA_CC __cuCtxCreate = NULL; CUDA_CD __cuCtxDestroy = NULL; CUDA_MA __cuMemAlloc = NULL; CUDA_MF __cuMemFree = NULL; CUDA_MGI __cuMemGetInfo = NULL; #else void* cudalib; int (*__cuInit)(int); int (*__cuDeviceGetCount)(int*); int (*__cuDriverGetVersion)(int*); int (*__cuDeviceGet)(int*, int); int (*__cuDeviceGetAttribute)(int*, int, int); int (*__cuDeviceGetName)(char*, int, int); int (*__cuDeviceTotalMem)(unsigned int*, int); int (*__cuDeviceComputeCapability)(int*, int*, int); int (*__cuCtxCreate)(void**, unsigned int, unsigned int); int (*__cuCtxDestroy)(void*); int (*__cuMemAlloc)(unsigned int*, unsigned int); int (*__cuMemFree)(unsigned int); int (*__cuMemGetInfo)(unsigned int*, unsigned int*); #endif // NVIDIA interfaces are documented here: // http://developer.download.nvidia.com/compute/cuda/2_3/toolkit/docs/online/index.html void COPROC_NVIDIA::get( bool use_all, // if false, use only those equivalent to most capable vector& descs, vector& warnings, vector& ignore_devs ) { int cuda_ndevs, retval; char buf[256]; #ifdef _WIN32 HMODULE cudalib = LoadLibrary("nvcuda.dll"); if (!cudalib) { warnings.push_back("No NVIDIA library found"); return; } __cuDeviceGetCount = (CUDA_GDC)GetProcAddress( cudalib, "cuDeviceGetCount" ); __cuDriverGetVersion = (CUDA_GDV)GetProcAddress( cudalib, "cuDriverGetVersion" ); __cuInit = (CUDA_GDI)GetProcAddress( cudalib, "cuInit" ); __cuDeviceGet = (CUDA_GDG)GetProcAddress( cudalib, "cuDeviceGet" ); __cuDeviceGetAttribute = (CUDA_GDA)GetProcAddress( cudalib, "cuDeviceGetAttribute" ); __cuDeviceGetName = (CUDA_GDN)GetProcAddress( cudalib, "cuDeviceGetName" ); __cuDeviceTotalMem = (CUDA_GDM)GetProcAddress( cudalib, "cuDeviceTotalMem" ); __cuDeviceComputeCapability = (CUDA_GDCC)GetProcAddress( cudalib, "cuDeviceComputeCapability" ); __cuCtxCreate = (CUDA_CC)GetProcAddress( cudalib, "cuCtxCreate" ); __cuCtxDestroy = (CUDA_CD)GetProcAddress( cudalib, "cuCtxDestroy" ); __cuMemAlloc = (CUDA_MA)GetProcAddress( cudalib, "cuMemAlloc" ); __cuMemFree = (CUDA_MF)GetProcAddress( cudalib, "cuMemFree" ); __cuMemGetInfo = (CUDA_MGI)GetProcAddress( cudalib, "cuMemGetInfo" ); #ifndef SIM NvAPI_Status nvapiStatus; NV_DISPLAY_DRIVER_VERSION Version; memset(&Version, 0, sizeof(Version)); Version.version = NV_DISPLAY_DRIVER_VERSION_VER; NvAPI_Initialize(); nvapiStatus = NvAPI_GetDisplayDriverVersion(NULL, &Version); #endif #else #ifdef __APPLE__ cudalib = dlopen("/usr/local/cuda/lib/libcuda.dylib", RTLD_NOW); #else cudalib = dlopen("libcuda.so", RTLD_NOW); #endif if (!cudalib) { warnings.push_back("No NVIDIA library found"); return; } __cuDeviceGetCount = (int(*)(int*)) dlsym(cudalib, "cuDeviceGetCount"); __cuDriverGetVersion = (int(*)(int*)) dlsym( cudalib, "cuDriverGetVersion" ); __cuInit = (int(*)(int)) dlsym( cudalib, "cuInit" ); __cuDeviceGet = (int(*)(int*, int)) dlsym( cudalib, "cuDeviceGet" ); __cuDeviceGetAttribute = (int(*)(int*, int, int)) dlsym( cudalib, "cuDeviceGetAttribute" ); __cuDeviceGetName = (int(*)(char*, int, int)) dlsym( cudalib, "cuDeviceGetName" ); __cuDeviceTotalMem = (int(*)(unsigned int*, int)) dlsym( cudalib, "cuDeviceTotalMem" ); __cuDeviceComputeCapability = (int(*)(int*, int*, int)) dlsym( cudalib, "cuDeviceComputeCapability" ); __cuCtxCreate = (int(*)(void**, unsigned int, unsigned int)) dlsym( cudalib, "cuCtxCreate" ); __cuCtxDestroy = (int(*)(void*)) dlsym( cudalib, "cuCtxDestroy" ); __cuMemAlloc = (int(*)(unsigned int*, unsigned int)) dlsym( cudalib, "cuMemAlloc" ); __cuMemFree = (int(*)(unsigned int)) dlsym( cudalib, "cuMemFree" ); __cuMemGetInfo = (int(*)(unsigned int*, unsigned int*)) dlsym( cudalib, "cuMemGetInfo" ); #endif if (!__cuDriverGetVersion) { warnings.push_back("cuDriverGetVersion() missing from NVIDIA library"); return; } if (!__cuInit) { warnings.push_back("cuInit() missing from NVIDIA library"); return; } if (!__cuDeviceGetCount) { warnings.push_back("cuDeviceGetCount() missing from NVIDIA library"); return; } if (!__cuDeviceGet) { warnings.push_back("cuDeviceGet() missing from NVIDIA library"); return; } if (!__cuDeviceGetAttribute) { warnings.push_back("cuDeviceGetAttribute() missing from NVIDIA library"); return; } if (!__cuDeviceTotalMem) { warnings.push_back("cuDeviceTotalMem() missing from NVIDIA library"); return; } if (!__cuDeviceComputeCapability) { warnings.push_back("cuDeviceComputeCapability() missing from NVIDIA library"); return; } if (!__cuCtxCreate) { warnings.push_back("cuCtxCreate() missing from NVIDIA library"); return; } if (!__cuCtxDestroy) { warnings.push_back("cuCtxDestroy() missing from NVIDIA library"); return; } if (!__cuMemAlloc) { warnings.push_back("cuMemAlloc() missing from NVIDIA library"); return; } if (!__cuMemFree) { warnings.push_back("cuMemFree() missing from NVIDIA library"); return; } if (!__cuMemGetInfo) { warnings.push_back("cuMemGetInfo() missing from NVIDIA library"); return; } retval = (*__cuInit)(0); if (retval) { sprintf(buf, "NVIDIA drivers present but no GPUs found"); warnings.push_back(buf); return; } retval = (*__cuDriverGetVersion)(&cuda_version); if (retval) { sprintf(buf, "cuDriverGetVersion() returned %d", retval); warnings.push_back(buf); return; } retval = (*__cuDeviceGetCount)(&cuda_ndevs); if (retval) { sprintf(buf, "cuDeviceGetCount() returned %d", retval); warnings.push_back(buf); return; } sprintf(buf, "NVIDIA library reports %d GPU%s", cuda_ndevs, (cuda_ndevs==1)?"":"s"); warnings.push_back(buf); int j; unsigned int i; COPROC_NVIDIA cc; string s; for (j=0; j 100) continue; // e.g. 9999 is an error #if defined(_WIN32) && !defined(SIM) cc.display_driver_version = Version.drvVersion; #elif defined(__APPLE__) cc.display_driver_version = NSVersionOfRunTimeLibrary("cuda"); #else cc.display_driver_version = 0; #endif cc.have_cuda = true; cc.cuda_version = cuda_version; cc.device_num = j; cc.set_peak_flops(); cc.get_available_ram(); #if FAKENVIDIACUDA0 cc.device_num = j+1; #endif nvidia_gpus.push_back(cc); } if (!nvidia_gpus.size()) { warnings.push_back("No CUDA-capable NVIDIA GPUs found"); return; } // identify the most capable non-ignored instance // bool first = true; for (i=0; i 0) { *this = nvidia_gpus[i]; } } // see which other instances are equivalent, // and set the "count" and "device_nums" fields // count = 0; for (i=0; i 1.4*c2.attribs.localRAM) return 1; if (c1.attribs.localRAM< .7* c2.attribs.localRAM) return -1; return 0; } if (c1.attribs.localRAM > c2.attribs.localRAM) return 1; if (c1.attribs.localRAM < c2.attribs.localRAM) return -1; double s1 = c1.peak_flops; double s2 = c2.peak_flops; if (s1 > s2) return 1; if (s1 < s2) return -1; return 0; } #ifdef _WIN32 typedef int (__stdcall *ATI_ATTRIBS) (CALdeviceattribs *attribs, CALuint ordinal); typedef int (__stdcall *ATI_CLOSE)(void); typedef int (__stdcall *ATI_GDC)(CALuint *numDevices); typedef int (__stdcall *ATI_GDI)(void); typedef int (__stdcall *ATI_INFO) (CALdeviceinfo *info, CALuint ordinal); typedef int (__stdcall *ATI_VER) (CALuint *cal_major, CALuint *cal_minor, CALuint *cal_imp); typedef int (__stdcall *ATI_STATUS) (CALdevicestatus*, CALdevice); typedef int (__stdcall *ATI_DEVICEOPEN) (CALdevice*, CALuint); typedef int (__stdcall *ATI_DEVICECLOSE) (CALdevice); ATI_ATTRIBS __calDeviceGetAttribs = NULL; ATI_CLOSE __calShutdown = NULL; ATI_GDC __calDeviceGetCount = NULL; ATI_GDI __calInit = NULL; ATI_INFO __calDeviceGetInfo = NULL; ATI_VER __calGetVersion = NULL; ATI_STATUS __calDeviceGetStatus = NULL; ATI_DEVICEOPEN __calDeviceOpen = NULL; ATI_DEVICECLOSE __calDeviceClose = NULL; #else int (*__calInit)(); int (*__calGetVersion)(CALuint*, CALuint*, CALuint*); int (*__calDeviceGetCount)(CALuint*); int (*__calDeviceGetAttribs)(CALdeviceattribs*, CALuint); int (*__calShutdown)(); int (*__calDeviceGetInfo)(CALdeviceinfo*, CALuint); int (*__calDeviceGetStatus)(CALdevicestatus*, CALdevice); int (*__calDeviceOpen)(CALdevice*, CALuint); int (*__calDeviceClose)(CALdevice); #endif void COPROC_ATI::get( bool use_all, vector& descs, vector& warnings, vector& ignore_devs ) { CALuint numDevices, cal_major, cal_minor, cal_imp; char buf[256]; int retval; attribs.struct_size = sizeof(CALdeviceattribs); numDevices =0; #ifdef _WIN32 #if defined _M_X64 const char* atilib_name = "aticalrt64.dll"; const char* amdlib_name = "amdcalrt64.dll"; #else const char* atilib_name = "aticalrt.dll"; const char* amdlib_name = "amdcalrt.dll"; #endif HINSTANCE callib = LoadLibrary(atilib_name); if (callib) { atirt_detected = true; } else { callib = LoadLibrary(amdlib_name); if (callib) { amdrt_detected = true; } } if (!callib) { warnings.push_back("No ATI library found."); return; } __calInit = (ATI_GDI)GetProcAddress(callib, "calInit" ); __calGetVersion = (ATI_VER)GetProcAddress(callib, "calGetVersion" ); __calDeviceGetCount = (ATI_GDC)GetProcAddress(callib, "calDeviceGetCount" ); __calDeviceGetAttribs =(ATI_ATTRIBS)GetProcAddress(callib, "calDeviceGetAttribs" ); __calShutdown = (ATI_CLOSE)GetProcAddress(callib, "calShutdown" ); __calDeviceGetInfo = (ATI_INFO)GetProcAddress(callib, "calDeviceGetInfo" ); __calDeviceGetStatus = (ATI_STATUS)GetProcAddress(callib, "calDeviceGetStatus" ); __calDeviceOpen = (ATI_DEVICEOPEN)GetProcAddress(callib, "calDeviceOpen" ); __calDeviceClose = (ATI_DEVICECLOSE)GetProcAddress(callib, "calDeviceClose" ); #else void* callib; callib = dlopen("libaticalrt.so", RTLD_NOW); if (!callib) { warnings.push_back("No ATI library found"); return; } atirt_detected = true; __calInit = (int(*)()) dlsym(callib, "calInit"); __calGetVersion = (int(*)(CALuint*, CALuint*, CALuint*)) dlsym(callib, "calGetVersion"); __calDeviceGetCount = (int(*)(CALuint*)) dlsym(callib, "calDeviceGetCount"); __calDeviceGetAttribs = (int(*)(CALdeviceattribs*, CALuint)) dlsym(callib, "calDeviceGetAttribs"); __calShutdown = (int(*)()) dlsym(callib, "calShutdown"); __calDeviceGetInfo = (int(*)(CALdeviceinfo*, CALuint)) dlsym(callib, "calDeviceGetInfo"); __calDeviceGetStatus = (int(*)(CALdevicestatus*, CALdevice)) dlsym(callib, "calDeviceGetStatus"); __calDeviceOpen = (int(*)(CALdevice*, CALuint)) dlsym(callib, "calDeviceOpen"); __calDeviceClose = (int(*)(CALdevice)) dlsym(callib, "calDeviceClose"); #endif if (!__calInit) { warnings.push_back("calInit() missing from CAL library"); return; } if (!__calGetVersion) { warnings.push_back("calGetVersion() missing from CAL library"); return; } if (!__calDeviceGetCount) { warnings.push_back("calDeviceGetCount() missing from CAL library"); return; } if (!__calDeviceGetAttribs) { warnings.push_back("calDeviceGetAttribs() missing from CAL library"); return; } if (!__calDeviceGetInfo) { warnings.push_back("calDeviceGetInfo() missing from CAL library"); return; } if (!__calDeviceGetStatus) { warnings.push_back("calDeviceGetStatus() missing from CAL library"); return; } if (!__calDeviceOpen) { warnings.push_back("calDeviceOpen() missing from CAL library"); return; } if (!__calDeviceClose) { warnings.push_back("calDeviceClose() missing from CAL library"); return; } retval = (*__calInit)(); if (retval != CAL_RESULT_OK) { sprintf(buf, "calInit() returned %d", retval); warnings.push_back(buf); return; } retval = (*__calDeviceGetCount)(&numDevices); if (retval != CAL_RESULT_OK) { sprintf(buf, "calDeviceGetCount() returned %d", retval); warnings.push_back(buf); return; } retval = (*__calGetVersion)(&cal_major, &cal_minor, &cal_imp); if (retval != CAL_RESULT_OK) { sprintf(buf, "calGetVersion() returned %d", retval); warnings.push_back(buf); return; } if (!numDevices) { warnings.push_back("No usable CAL devices found"); return; } COPROC_ATI cc, cc2; string s, gpu_name; for (CALuint i=0; i 0) { *this = ati_gpus[i]; } } // see which other instances are equivalent, // and set the "count" and "device_nums" fields // count = 0; for (i=0; i