// This file is part of BOINC. // http://boinc.berkeley.edu // Copyright (C) 2009 University of California // // BOINC is free software; you can redistribute it and/or modify it // under the terms of the GNU Lesser General Public License // as published by the Free Software Foundation, // either version 3 of the License, or (at your option) any later version. // // BOINC is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // See the GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License // along with BOINC. If not, see . // client-specific GPU code. Mostly GPU detection #ifdef _WIN32 #ifndef SIM #include #endif #else #ifdef __APPLE__ // Suppress obsolete warning when building for OS 10.3.9 #define DLOPEN_NO_WARN #endif #include #include #include #endif #include "str_util.h" #include "coproc.h" using std::string; using std::vector; static bool in_vector(int n, vector& v) { for (unsigned int i=0; i&descs, vector&warnings, vector& ignore_cuda_dev, vector& ignore_ati_dev ) { #ifdef _WIN32 COPROC_CUDA::get(*this, use_all, descs, warnings, ignore_cuda_dev); COPROC_ATI::get(*this, descs, warnings, ignore_ati_dev); #else void (*old_sig)(int) = signal(SIGSEGV, segv_handler); if (setjmp(resume)) { warnings.push_back("Caught SIGSEGV in NVIDIA GPU detection"); } else { COPROC_CUDA::get(*this, use_all, descs, warnings, ignore_cuda_dev); } #ifndef __APPLE__ // ATI does not yet support CAL on Macs if (setjmp(resume)) { warnings.push_back("Caught SIGSEGV in ATI GPU detection"); } else { COPROC_ATI::get(*this, descs, warnings, ignore_ati_dev); } #endif signal(SIGSEGV, old_sig); #endif } // return 1/-1/0 if device 1 is more/less/same capable than device 2. // If "loose", ignore FLOPS and tolerate small memory diff // int cuda_compare(COPROC_CUDA& c1, COPROC_CUDA& c2, bool loose) { if (c1.prop.major > c2.prop.major) return 1; if (c1.prop.major < c2.prop.major) return -1; if (c1.prop.minor > c2.prop.minor) return 1; if (c1.prop.minor < c2.prop.minor) return -1; if (c1.cuda_version > c2.cuda_version) return 1; if (c1.cuda_version < c2.cuda_version) return -1; if (loose) { if (c1.prop.totalGlobalMem > 1.4*c2.prop.totalGlobalMem) return 1; if (c1.prop.totalGlobalMem < .7* c2.prop.totalGlobalMem) return -1; return 0; } if (c1.prop.totalGlobalMem > c2.prop.totalGlobalMem) return 1; if (c1.prop.totalGlobalMem < c2.prop.totalGlobalMem) return -1; double s1 = c1.peak_flops(); double s2 = c2.peak_flops(); if (s1 > s2) return 1; if (s1 < s2) return -1; return 0; } #ifdef _WIN32 typedef int (__stdcall *CUDA_GDC)(int *count); typedef int (__stdcall *CUDA_GDV)(int* version); typedef int (__stdcall *CUDA_GDI)(int); typedef int (__stdcall *CUDA_GDG)(int*, int); typedef int (__stdcall *CUDA_GDA)(int*, int, int); typedef int (__stdcall *CUDA_GDN)(char*, int, int); typedef int (__stdcall *CUDA_GDM)(unsigned int*, int); typedef int (__stdcall *CUDA_GDCC)(int*, int*, int); typedef int (__stdcall *CUDA_CC)(unsigned int*, unsigned int, unsigned int); typedef int (__stdcall *CUDA_CD)(unsigned int); typedef int (__stdcall *CUDA_MA)(unsigned int*, unsigned int); typedef int (__stdcall *CUDA_MF)(unsigned int); typedef int (__stdcall *CUDA_MGI)(unsigned int*, unsigned int*); CUDA_GDC __cuDeviceGetCount = NULL; CUDA_GDV __cuDriverGetVersion = NULL; CUDA_GDI __cuInit = NULL; CUDA_GDG __cuDeviceGet = NULL; CUDA_GDA __cuDeviceGetAttribute = NULL; CUDA_GDN __cuDeviceGetName = NULL; CUDA_GDM __cuDeviceTotalMem = NULL; CUDA_GDCC __cuDeviceComputeCapability = NULL; CUDA_CC __cuCtxCreate = NULL; CUDA_CD __cuCtxDestroy = NULL; CUDA_MA __cuMemAlloc = NULL; CUDA_MF __cuMemFree = NULL; CUDA_MGI __cuMemGetInfo = NULL; #else void* cudalib; int (*__cuInit)(int); int (*__cuDeviceGetCount)(int*); int (*__cuDriverGetVersion)(int*); int (*__cuDeviceGet)(int*, int); int (*__cuDeviceGetAttribute)(int*, int, int); int (*__cuDeviceGetName)(char*, int, int); int (*__cuDeviceTotalMem)(unsigned int*, int); int (*__cuDeviceComputeCapability)(int*, int*, int); int (*__cuCtxCreate)(unsigned int*, unsigned int, unsigned int); int (*__cuCtxDestroy)(unsigned int); int (*__cuMemAlloc)(unsigned int*, unsigned int); int (*__cuMemFree)(unsigned int); int (*__cuMemGetInfo)(unsigned int*, unsigned int*); #endif // NVIDIA interfaces are documented here: // http://developer.download.nvidia.com/compute/cuda/2_3/toolkit/docs/online/index.html void COPROC_CUDA::get( COPROCS& coprocs, bool use_all, // if false, use only those equivalent to most capable vector& descs, vector& warnings, vector& ignore_devs ) { int count, retval; char buf[256]; #ifdef _WIN32 HMODULE cudalib = LoadLibrary("nvcuda.dll"); if (!cudalib) { warnings.push_back("No NVIDIA library found"); return; } __cuDeviceGetCount = (CUDA_GDC)GetProcAddress( cudalib, "cuDeviceGetCount" ); __cuDriverGetVersion = (CUDA_GDV)GetProcAddress( cudalib, "cuDriverGetVersion" ); __cuInit = (CUDA_GDI)GetProcAddress( cudalib, "cuInit" ); __cuDeviceGet = (CUDA_GDG)GetProcAddress( cudalib, "cuDeviceGet" ); __cuDeviceGetAttribute = (CUDA_GDA)GetProcAddress( cudalib, "cuDeviceGetAttribute" ); __cuDeviceGetName = (CUDA_GDN)GetProcAddress( cudalib, "cuDeviceGetName" ); __cuDeviceTotalMem = (CUDA_GDM)GetProcAddress( cudalib, "cuDeviceTotalMem" ); __cuDeviceComputeCapability = (CUDA_GDCC)GetProcAddress( cudalib, "cuDeviceComputeCapability" ); __cuCtxCreate = (CUDA_CC)GetProcAddress( cudalib, "cuCtxCreate" ); __cuCtxDestroy = (CUDA_CD)GetProcAddress( cudalib, "cuCtxDestroy" ); __cuMemAlloc = (CUDA_MA)GetProcAddress( cudalib, "cuMemAlloc" ); __cuMemFree = (CUDA_MF)GetProcAddress( cudalib, "cuMemFree" ); __cuMemGetInfo = (CUDA_MGI)GetProcAddress( cudalib, "cuMemGetInfo" ); #ifndef SIM NvAPI_Status nvapiStatus; NvDisplayHandle hDisplay; NV_DISPLAY_DRIVER_VERSION Version; memset(&Version, 0, sizeof(Version)); Version.version = NV_DISPLAY_DRIVER_VERSION_VER; NvAPI_Initialize(); for (int i=0; ; i++) { nvapiStatus = NvAPI_EnumNvidiaDisplayHandle(i, &hDisplay); if (nvapiStatus != NVAPI_OK) break; nvapiStatus = NvAPI_GetDisplayDriverVersion(hDisplay, &Version); if (nvapiStatus == NVAPI_OK) break; } #endif #else #ifdef __APPLE__ cudalib = dlopen("/usr/local/cuda/lib/libcuda.dylib", RTLD_NOW); #else cudalib = dlopen("libcuda.so", RTLD_NOW); #endif if (!cudalib) { warnings.push_back("No NVIDIA library found"); return; } __cuDeviceGetCount = (int(*)(int*)) dlsym(cudalib, "cuDeviceGetCount"); __cuDriverGetVersion = (int(*)(int*)) dlsym( cudalib, "cuDriverGetVersion" ); __cuInit = (int(*)(int)) dlsym( cudalib, "cuInit" ); __cuDeviceGet = (int(*)(int*, int)) dlsym( cudalib, "cuDeviceGet" ); __cuDeviceGetAttribute = (int(*)(int*, int, int)) dlsym( cudalib, "cuDeviceGetAttribute" ); __cuDeviceGetName = (int(*)(char*, int, int)) dlsym( cudalib, "cuDeviceGetName" ); __cuDeviceTotalMem = (int(*)(unsigned int*, int)) dlsym( cudalib, "cuDeviceTotalMem" ); __cuDeviceComputeCapability = (int(*)(int*, int*, int)) dlsym( cudalib, "cuDeviceComputeCapability" ); __cuCtxCreate = (int(*)(unsigned int*, unsigned int, unsigned int)) dlsym( cudalib, "cuCtxCreate" ); __cuCtxDestroy = (int(*)(unsigned int)) dlsym( cudalib, "cuCtxDestroy" ); __cuMemAlloc = (int(*)(unsigned int*, unsigned int)) dlsym( cudalib, "cuMemAlloc" ); __cuMemFree = (int(*)(unsigned int)) dlsym( cudalib, "cuMemFree" ); #endif if (!__cuDriverGetVersion) { warnings.push_back("cuDriverGetVersion() missing from NVIDIA library"); return; } if (!__cuInit) { warnings.push_back("cuInit() missing from NVIDIA library"); return; } if (!__cuDeviceGetCount) { warnings.push_back("cuDeviceGetCount() missing from NVIDIA library"); return; } if (!__cuDeviceGet) { warnings.push_back("cuDeviceGet() missing from NVIDIA library"); return; } if (!__cuDeviceGetAttribute) { warnings.push_back("cuDeviceGetAttribute() missing from NVIDIA library"); return; } if (!__cuDeviceTotalMem) { warnings.push_back("cuDeviceTotalMem() missing from NVIDIA library"); return; } if (!__cuDeviceComputeCapability) { warnings.push_back("cuDeviceComputeCapability() missing from NVIDIA library"); return; } if (!__cuCtxCreate) { warnings.push_back("cuCtxCreate() missing from NVIDIA library"); return; } if (!__cuCtxDestroy) { warnings.push_back("cuCtxDestroy() missing from NVIDIA library"); return; } if (!__cuMemAlloc) { warnings.push_back("cuMemAlloc() missing from NVIDIA library"); return; } if (!__cuMemFree) { warnings.push_back("cuMemFree() missing from NVIDIA library"); return; } if (!__cuMemGetInfo) { warnings.push_back("cuMemGetInfo() missing from NVIDIA library"); return; } retval = (*__cuInit)(0); if (retval) { sprintf(buf, "NVIDIA drivers present but no GPUs found"); warnings.push_back(buf); return; } int cuda_version; retval = (*__cuDriverGetVersion)(&cuda_version); if (retval) { sprintf(buf, "cuDriverGetVersion() returned %d", retval); warnings.push_back(buf); return; } vector gpus; retval = (*__cuDeviceGetCount)(&count); if (retval) { sprintf(buf, "cuDeviceGetCount() returned %d", retval); warnings.push_back(buf); return; } sprintf(buf, "NVIDIA library reports %d GPU%s", count, (count==1)?"":"s"); warnings.push_back(buf); int j; unsigned int i; COPROC_CUDA cc; string s; for (j=0; j 100) continue; // e.g. 9999 is an error #if defined(_WIN32) && !defined(SIM) cc.display_driver_version = Version.drvVersion; #else cc.display_driver_version = 0; #endif cc.cuda_version = cuda_version; cc.device_num = j; gpus.push_back(cc); } if (!gpus.size()) { warnings.push_back("No CUDA-capable NVIDIA GPUs found"); return; } // identify the most capable non-ignored instance // COPROC_CUDA best; bool first = true; for (i=0; i 0) { best = gpus[i]; } } // see which other instances are equivalent, // and set the "count" and "device_nums" fields // best.count = 0; for (i=0; itype, "CUDA"); cc->count = count; for (int i=0; idevice_nums[i] = i; } cc->display_driver_version = 18000; cc->cuda_version = 2020; strcpy(cc->prop.name, "Fake NVIDIA GPU"); cc->prop.totalGlobalMem = 256*1024*1024; cc->prop.sharedMemPerBlock = 100; cc->prop.regsPerBlock = 8; cc->prop.warpSize = 10; cc->prop.memPitch = 10; cc->prop.maxThreadsPerBlock = 20; cc->prop.maxThreadsDim[0] = 2; cc->prop.maxThreadsDim[1] = 2; cc->prop.maxThreadsDim[2] = 2; cc->prop.maxGridSize[0] = 10; cc->prop.maxGridSize[1] = 10; cc->prop.maxGridSize[2] = 10; cc->prop.totalConstMem = 10; cc->prop.major = 1; cc->prop.minor = 2; cc->prop.clockRate = 1250000; cc->prop.textureAlignment = 1000; cc->prop.multiProcessorCount = 14; coprocs.coprocs.push_back(cc); } int COPROC_CUDA::available_ram(int devnum, double& ar) { int device; unsigned int memfree, memtotal; unsigned int ctx; int retval = (*__cuDeviceGet)(&device, devnum); if (retval) return retval; retval = (*__cuCtxCreate)(&ctx, 0, device); if (retval) return retval; retval = (*__cuMemGetInfo)(&memfree, &memtotal); if (retval) return retval; retval = (*__cuCtxDestroy)(ctx); ar = (double) memfree; return 0; } // check whether each GPU is running a graphics app (assume yes) // return true if there's been a change since last time // bool COPROC_CUDA::check_running_graphics_app() { int retval, j; bool change = false; for (j=0; j& descs, vector& warnings, vector& ignore_devs ) { CALuint numDevices, cal_major, cal_minor, cal_imp; CALdevice device; CALdeviceinfo info; CALdeviceattribs attribs; char buf[256]; bool amdrt_detected = false; bool atirt_detected = false; int retval; attribs.struct_size = sizeof(CALdeviceattribs); device = 0; numDevices =0; #ifdef _WIN32 #if defined _M_X64 const char* atilib_name = "aticalrt64.dll"; const char* amdlib_name = "amdcalrt64.dll"; #else const char* atilib_name = "aticalrt.dll"; const char* amdlib_name = "amdcalrt.dll"; #endif HINSTANCE callib = LoadLibrary(atilib_name); if (callib) { atirt_detected = true; } else { callib = LoadLibrary(amdlib_name); if (callib) { amdrt_detected = true; } } if (!callib) { warnings.push_back("No ATI library found."); return; } __calInit = (ATI_GDI)GetProcAddress(callib, "calInit" ); __calGetVersion = (ATI_VER)GetProcAddress(callib, "calGetVersion" ); __calDeviceGetCount = (ATI_GDC)GetProcAddress(callib, "calDeviceGetCount" ); __calDeviceGetAttribs =(ATI_ATTRIBS)GetProcAddress(callib, "calDeviceGetAttribs" ); __calShutdown = (ATI_CLOSE)GetProcAddress(callib, "calShutdown" ); __calDeviceGetInfo = (ATI_INFO)GetProcAddress(callib, "calDeviceGetInfo" ); __calDeviceGetStatus = (ATI_STATUS)GetProcAddress(callib, "calDeviceGetStatus" ); #else void* callib; callib = dlopen("libaticalrt.so", RTLD_NOW); if (!callib) { warnings.push_back("No ATI library found"); return; } atirt_detected = true; __calInit = (int(*)()) dlsym(callib, "calInit"); __calGetVersion = (int(*)(CALuint*, CALuint*, CALuint*)) dlsym(callib, "calGetVersion"); __calDeviceGetCount = (int(*)(CALuint*)) dlsym(callib, "calDeviceGetCount"); __calDeviceGetAttribs = (int(*)(CALdeviceattribs*, CALuint)) dlsym(callib, "calDeviceGetAttribs"); __calShutdown = (int(*)()) dlsym(callib, "calShutdown"); __calDeviceGetInfo = (int(*)(CALdeviceinfo*, CALuint)) dlsym(callib, "calDeviceGetInfo"); __calDeviceGetStatus = (int(*)(CALdevicestatus*, CALdevice)) dlsym(callib, "calDeviceGetStatus"); #endif if (!__calInit) { warnings.push_back("calInit() missing from CAL library"); return; } if (!__calGetVersion) { warnings.push_back("calGetVersion() missing from CAL library"); return; } if (!__calDeviceGetCount) { warnings.push_back("calDeviceGetCount() missing from CAL library"); return; } if (!__calDeviceGetAttribs) { warnings.push_back("calDeviceGetAttribs() missing from CAL library"); return; } if (!__calDeviceGetInfo) { warnings.push_back("calDeviceGetInfo() missing from CAL library"); return; } if (!__calDeviceGetStatus) { warnings.push_back("calDeviceGetStatus() missing from CAL library"); return; } retval = (*__calInit)(); if (retval != CAL_RESULT_OK) { sprintf(buf, "calInit() returned %d", retval); warnings.push_back(buf); return; } retval = (*__calDeviceGetCount)(&numDevices); if (retval != CAL_RESULT_OK) { sprintf(buf, "calDeviceGetCount() returned %d", retval); warnings.push_back(buf); return; } retval = (*__calGetVersion)(&cal_major, &cal_minor, &cal_imp); if (retval != CAL_RESULT_OK) { sprintf(buf, "calGetVersion() returned %d", retval); warnings.push_back(buf); return; } if (!numDevices) { warnings.push_back("No usable CAL devices found"); return; } COPROC_ATI cc, cc2; string s, gpu_name; vector gpus; for (CALuint i=0; i best.peak_flops()) { best = gpus[i]; } sprintf(buf2, "ATI GPU %d: %s", gpus[i].device_num, buf); } descs.push_back(buf2); } best.count = 0; for (unsigned int i=0; itype, "ATI"); coprocs.coprocs.push_back(ccp); } void fake_ati(COPROCS& coprocs, int count) { COPROC_ATI* cc = new COPROC_ATI; strcpy(cc->type, "ATI"); strcpy(cc->version, "1.4.3"); cc->count = count; cc->attribs.numberOfSIMD = 32; cc->attribs.wavefrontSize = 32; cc->attribs.engineClock = 50; for (int i=0; idevice_nums[i] = i; } coprocs.coprocs.push_back(cc); } int COPROC_ATI::available_ram(int devnum, double& ar) { CALdevicestatus st; int retval = (*__calDeviceGetStatus)(&st, devnum); if (retval) return retval; ar = st.availLocalRAM*MEGA; return 0; }