// This file is part of BOINC. // http://boinc.berkeley.edu // Copyright (C) 2008 University of California // // BOINC is free software; you can redistribute it and/or modify it // under the terms of the GNU Lesser General Public License // as published by the Free Software Foundation, // either version 3 of the License, or (at your option) any later version. // // BOINC is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // See the GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License // along with BOINC. If not, see . #ifndef _USING_FCGI_ #include "boinc_fcgi.h" #else #include #endif #include #include #ifdef _WIN32 #ifndef SIM #include #endif #else #ifdef __APPLE__ // Suppress obsolete warning when building for OS 10.3.9 #define DLOPEN_NO_WARN #endif #include #endif #include "error_numbers.h" #include "filesys.h" #include "parse.h" #include "str_util.h" #include "coproc.h" using std::string; using std::vector; #ifndef _USING_FCGI_ using std::perror; #endif #ifndef _USING_FCGI_ void COPROC::write_xml(MIOFILE& f) { f.printf( "\n" " %s\n" " %d\n" "\n", type, count ); } #endif int COPROC::parse(MIOFILE& fin) { char buf[1024]; strcpy(type, ""); count = 0; used = 0; req_secs = 0; estimated_delay = 0; req_instances = 0; while (fin.fgets(buf, sizeof(buf))) { if (match_tag(buf, "")) { if (!strlen(type)) return ERR_XML_PARSE; return 0; } if (parse_str(buf, "", type, sizeof(type))) continue; if (parse_int(buf, "", count)) continue; if (parse_double(buf, "", req_secs)) continue; if (parse_int(buf, "", req_instances)) continue; if (parse_double(buf, "", estimated_delay)) continue; } return ERR_XML_PARSE; } void COPROCS::summary_string(char* buf, int len) { char bigbuf[8192], buf2[1024]; strcpy(bigbuf, ""); for (unsigned int i=0; itype, "CUDA")) { COPROC_CUDA* cp2 = (COPROC_CUDA*) cp; int mem = (int)(cp2->prop.dtotalGlobalMem/MEGA); sprintf(buf2, "[CUDA|%s|%d|%dMB|%d]", cp2->prop.name, cp2->count, mem, cp2->display_driver_version ); strcat(bigbuf, buf2); } else if (!strcmp(cp->type, "CAL")){ COPROC_ATI* cp2 =(COPROC_ATI*) cp; sprintf(buf2,"[CAL|%s|%d|%dMB|%s]", cp2->name, cp2->count, cp2->attribs.localRAM, cp2->version ); strcat(bigbuf,buf2); } } bigbuf[len-1] = 0; strcpy(buf, bigbuf); } vector COPROCS::get(bool use_all) { vector strings; COPROC_CUDA::get(*this, strings, use_all); COPROC_ATI::get(*this, strings); return strings; } // used only to parse scheduler request messages // int COPROCS::parse(FILE* fin) { char buf[1024]; while (fgets(buf, sizeof(buf), fin)) { if (match_tag(buf, "")) { return 0; } if (strstr(buf, "")) { COPROC_CUDA* cc = new COPROC_CUDA; int retval = cc->parse(fin); if (!retval) { coprocs.push_back(cc); } } if (strstr(buf, "")) { COPROC_ATI* cc = new COPROC_ATI; int retval = cc->parse(fin); if (!retval) { coprocs.push_back(cc); } } } return ERR_XML_PARSE; } COPROC* COPROCS::lookup(const char* type) { for (unsigned int i=0; itype)) return cp; } return NULL; } #ifdef _WIN32 #endif // return 1/-1/0 if device 1 is more/less/same capable than device 2. // If "loose", ignore FLOPS and tolerate small memory diff // int cuda_compare(COPROC_CUDA& c1, COPROC_CUDA& c2, bool loose) { if (c1.prop.major > c2.prop.major) return 1; if (c1.prop.major < c2.prop.major) return -1; if (c1.prop.minor > c2.prop.minor) return 1; if (c1.prop.minor < c2.prop.minor) return -1; if (c1.cuda_version > c2.cuda_version) return 1; if (c1.cuda_version < c2.cuda_version) return -1; if (loose) { if (c1.prop.totalGlobalMem > 1.4*c2.prop.totalGlobalMem) return 1; if (c1.prop.totalGlobalMem < .7* c2.prop.totalGlobalMem) return -1; return 0; } if (c1.prop.totalGlobalMem > c2.prop.totalGlobalMem) return 1; if (c1.prop.totalGlobalMem < c2.prop.totalGlobalMem) return -1; double s1 = c1.flops_estimate(); double s2 = c2.flops_estimate(); if (s1 > s2) return 1; if (s1 < s2) return -1; return 0; } void COPROC_CUDA::get( COPROCS& coprocs, vector& strings, bool use_all // if false, use only those equivalent to most capable ) { int count, retval; #ifdef _WIN32 typedef int (__stdcall *PCGDC)(int *count); typedef int (__stdcall *PCGDP)(struct cudaDeviceProp *prop, int device); typedef int (__stdcall *PCGDV)(int* version); typedef int (__stdcall *PCGDI)(int); typedef int (__stdcall *PCGDG)(int*, int); typedef int (__stdcall *PCGDA)(int*, int, int); typedef int (__stdcall *PCGDN)(char*, int, int); typedef int (__stdcall *PCGDM)(unsigned int*, int); typedef int (__stdcall *PCGDCC)(int*, int*, int); PCGDC __cuDeviceGetCount = NULL; PCGDP __cuDeviceGetProperties = NULL; PCGDV __cuDriverGetVersion = NULL; PCGDI __cuInit = NULL; PCGDG __cuDeviceGet = NULL; PCGDA __cuDeviceGetAttribute = NULL; PCGDN __cuDeviceGetName = NULL; PCGDM __cuDeviceTotalMem = NULL; PCGDCC __cuDeviceComputeCapability = NULL; HMODULE cudalib = LoadLibrary("nvcuda.dll"); if (!cudalib) { strings.push_back("Can't load library nvcuda.dll"); return; } __cuDeviceGetCount = (PCGDC)GetProcAddress(cudalib, "cuDeviceGetCount"); __cuDeviceGetProperties = (PCGDP)GetProcAddress(cudalib, "cuDeviceGetProperties"); __cuDriverGetVersion = (PCGDV)GetProcAddress(cudalib, "cuDriverGetVersion" ); __cuInit = (PCGDI)GetProcAddress(cudalib, "cuInit" ); __cuDeviceGet = (PCGDG)GetProcAddress(cudalib, "cuDeviceGet" ); __cuDeviceGetAttribute = (PCGDA)GetProcAddress(cudalib, "cuDeviceGetAttribute" ); __cuDeviceGetName = (PCGDN)GetProcAddress(cudalib, "cuDeviceGetName" ); __cuDeviceTotalMem = (PCGDM)GetProcAddress(cudalib, "cuDeviceTotalMem" ); __cuDeviceComputeCapability = (PCGDCC)GetProcAddress(cudalib, "cuDeviceComputeCapability" ); #ifndef SIM NvAPI_Status nvapiStatus; NvDisplayHandle hDisplay; NV_DISPLAY_DRIVER_VERSION Version; memset(&Version, 0, sizeof(Version)); Version.version = NV_DISPLAY_DRIVER_VERSION_VER; NvAPI_Initialize(); for (int i=0; ; i++) { nvapiStatus = NvAPI_EnumNvidiaDisplayHandle(i, &hDisplay); if (nvapiStatus != NVAPI_OK) break; nvapiStatus = NvAPI_GetDisplayDriverVersion(hDisplay, &Version); if (nvapiStatus == NVAPI_OK) break; } #endif #else void* cudalib; int (*__cuInit)(int); int (*__cuDeviceGetCount)(int*); int (*__cuDeviceGetProperties)(cudaDeviceProp*, int); int (*__cuDriverGetVersion)(int*); int (*__cuDeviceGet)(int*, int); int (*__cuDeviceGetAttribute)(int*, int, int); int (*__cuDeviceGetName)(char*, int, int); int (*__cuDeviceTotalMem)(unsigned int*, int); int (*__cuDeviceComputeCapability)(int*, int*, int); #ifdef __APPLE__ cudalib = dlopen("/usr/local/cuda/lib/libcuda.dylib", RTLD_NOW); #else cudalib = dlopen("libcuda.so", RTLD_NOW); #endif if (!cudalib) { strings.push_back("Can't load library libcuda"); return; } __cuDeviceGetCount = (int(*)(int*)) dlsym(cudalib, "cuDeviceGetCount"); __cuDeviceGetProperties = (int(*)(cudaDeviceProp*, int)) dlsym( cudalib, "cuDeviceGetProperties" ); __cuDriverGetVersion = (int(*)(int*)) dlsym( cudalib, "cuDriverGetVersion" ); __cuInit = (int(*)(int)) dlsym( cudalib, "cuInit" ); __cuDeviceGet = (int(*)(int*, int)) dlsym( cudalib, "cuDeviceGet" ); __cuDeviceGetAttribute = (int(*)(int*, int, int)) dlsym( cudalib, "cuDeviceGetAttribute" ); __cuDeviceGetName = (int(*)(char*, int, int)) dlsym( cudalib, "cuDeviceGetName" ); __cuDeviceTotalMem = (int(*)(unsigned int*, int)) dlsym( cudalib, "cuDeviceTotalMem" ); __cuDeviceComputeCapability = (int(*)(int*, int*, int)) dlsym( cudalib, "cuDeviceComputeCapability" ); #endif #ifdef __APPLE__ if (!__cuDriverGetVersion) { strings.push_back("CUDA driver is out of date. Please install CUDA driver 2.3 or later."); return; } #endif retval = (*__cuInit)(0); int cuda_version; retval = (*__cuDriverGetVersion)(&cuda_version); vector gpus; retval = (*__cuDeviceGetCount)(&count); int j; unsigned int i; COPROC_CUDA cc; string s; for (j=0; j 100) continue; // e.g. 9999 is an error #if defined(_WIN32) && !defined(SIM) cc.display_driver_version = Version.drvVersion; #else cc.display_driver_version = 0; #endif cc.cuda_version = cuda_version; cc.device_num = j; gpus.push_back(cc); } if (!gpus.size()) { strings.push_back("No CUDA-capable NVIDIA GPUs found"); return; } // identify the most capable instance // COPROC_CUDA best; for (i=0; i 0) { best = gpus[i]; } } // see which other instances are equivalent, // and set the "count" and "device_nums" fields // best.count = 0; for (i=0; itype, "CUDA"); cc->count = count; for (int i=0; idevice_nums[i] = i; } cc->display_driver_version = 18000; cc->cuda_version = 2020; strcpy(cc->prop.name, "CUDA NVIDIA chip"); cc->prop.totalGlobalMem = 256*1024*1024; cc->prop.sharedMemPerBlock = 100; cc->prop.regsPerBlock = 8; cc->prop.warpSize = 10; cc->prop.memPitch = 10; cc->prop.maxThreadsPerBlock = 20; cc->prop.maxThreadsDim[0] = 2; cc->prop.maxThreadsDim[1] = 2; cc->prop.maxThreadsDim[2] = 2; cc->prop.maxGridSize[0] = 10; cc->prop.maxGridSize[1] = 10; cc->prop.maxGridSize[2] = 10; cc->prop.totalConstMem = 10; cc->prop.major = 1; cc->prop.minor = 2; cc->prop.clockRate = 1250000; cc->prop.textureAlignment = 1000; cc->prop.multiProcessorCount = 14; coprocs.coprocs.push_back(cc); } #ifndef _USING_FCGI_ void COPROC_CUDA::write_xml(MIOFILE& f) { f.printf( "\n" " %d\n" " %s\n" " %f\n" " %d\n" " %f\n" " %d\n" " %d\n" " %u\n" " %u\n" " %d\n" " %d\n" " %u\n" " %d\n" " %d %d %d\n" " %d %d %d\n" " %u\n" " %d\n" " %d\n" " %d\n" " %u\n" " %d\n" " %d\n" "\n", count, prop.name, req_secs, req_instances, estimated_delay, display_driver_version, cuda_version, (unsigned int)prop.totalGlobalMem, (unsigned int)prop.sharedMemPerBlock, prop.regsPerBlock, prop.warpSize, (unsigned int)prop.memPitch, prop.maxThreadsPerBlock, prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2], prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2], (unsigned int)prop.totalConstMem, prop.major, prop.minor, prop.clockRate, (unsigned int)prop.textureAlignment, prop.deviceOverlap, prop.multiProcessorCount ); } #endif void COPROC_CUDA::clear() { count = 0; used = 0; req_secs = 0; req_instances = 0; estimated_delay = -1; // mark as absent cuda_version = 0; display_driver_version = 0; strcpy(prop.name, ""); prop.totalGlobalMem = 0; prop.sharedMemPerBlock = 0; prop.regsPerBlock = 0; prop.warpSize = 0; prop.memPitch = 0; prop.maxThreadsPerBlock = 0; prop.maxThreadsDim[0] = 0; prop.maxThreadsDim[1] = 0; prop.maxThreadsDim[2] = 0; prop.maxGridSize[0] = 0; prop.maxGridSize[1] = 0; prop.maxGridSize[2] = 0; prop.clockRate = 0; prop.totalConstMem = 0; prop.major = 0; prop.minor = 0; prop.textureAlignment = 0; prop.deviceOverlap = 0; prop.multiProcessorCount = 0; } int COPROC_CUDA::parse(FILE* fin) { char buf[1024], buf2[256]; clear(); while (fgets(buf, sizeof(buf), fin)) { if (strstr(buf, "")) { return 0; } if (parse_int(buf, "", count)) continue; if (parse_double(buf, "", req_secs)) continue; if (parse_int(buf, "", req_instances)) continue; if (parse_double(buf, "", estimated_delay)) continue; if (parse_str(buf, "", prop.name, sizeof(prop.name))) continue; if (parse_int(buf, "", display_driver_version)) continue; if (parse_int(buf, "", cuda_version)) continue; if (parse_double(buf, "", prop.dtotalGlobalMem)) continue; if (parse_int(buf, "", (int&)prop.sharedMemPerBlock)) continue; if (parse_int(buf, "", prop.regsPerBlock)) continue; if (parse_int(buf, "", prop.warpSize)) continue; if (parse_int(buf, "", (int&)prop.memPitch)) continue; if (parse_int(buf, "", prop.maxThreadsPerBlock)) continue; if (parse_str(buf, "", buf2, sizeof(buf2))) { // can't use sscanf here (FCGI) // prop.maxThreadsDim[0] = atoi(buf2); char* p = strchr(buf2, ' '); if (p) { p++; prop.maxThreadsDim[1] = atoi(p); p = strchr(p, ' '); if (p) { p++; prop.maxThreadsDim[2] = atoi(p); } } continue; } if (parse_str(buf, "", buf2, sizeof(buf2))) { prop.maxGridSize[0] = atoi(buf2); char* p = strchr(buf2, ' '); if (p) { p++; prop.maxGridSize[1] = atoi(p); p = strchr(p, ' '); if (p) { p++; prop.maxGridSize[2] = atoi(p); } } continue; } if (parse_int(buf, "", prop.clockRate)) continue; if (parse_int(buf, "", (int&)prop.totalConstMem)) continue; if (parse_int(buf, "", prop.major)) continue; if (parse_int(buf, "", prop.minor)) continue; if (parse_int(buf, "", (int&)prop.textureAlignment)) continue; if (parse_int(buf, "", prop.deviceOverlap)) continue; if (parse_int(buf, "", prop.multiProcessorCount)) continue; } return ERR_XML_PARSE; } void COPROC_ATI::get(COPROCS& coprocs, vector& strings) { CALuint numDevices, cal_major, cal_minor, cal_imp; CALdevice device; CALdeviceinfo info; CALdeviceattribs attribs; attribs.struct_size = sizeof(CALdeviceattribs); device = 0; numDevices =0; #ifdef _WIN32 typedef int (__stdcall *PCGDC)(CALuint *numDevices); typedef int (__stdcall *ATTRIBS) (CALdeviceattribs *attribs, CALuint ordinal); typedef int (__stdcall *INFO) (CALdeviceinfo *info, CALuint ordinal); typedef int (__stdcall *VER) (CALuint *cal_major, CALuint *cal_minor, CALuint *cal_imp); typedef int (__stdcall *PCGDI)(void); typedef int (__stdcall *CLOSE)(void); PCGDI __calInit = NULL; VER __calGetVersion = NULL; PCGDC __calDeviceGetCount = NULL; ATTRIBS __calDeviceGetAttribs = NULL; INFO __calDeviceGetInfo = NULL; CLOSE __calShutdown = NULL; #if defined _M_X64 // TRY CAL 1.4 first driver > 9.2 HINSTANCE callib = LoadLibrary("aticalrt64.dll"); if (!callib) { callib = LoadLibrary("amdcalrt64.dll"); } #else HINSTANCE callib = LoadLibrary("aticalrt.dll"); if (!callib) { callib = LoadLibrary("amdcalrt.dll"); } #endif if (!callib) { strings.push_back("No CAL Runtime Libraries installed."); return; } __calInit = (PCGDI)GetProcAddress(callib, "calInit" ); __calDeviceGetCount = (PCGDC)GetProcAddress(callib, "calDeviceGetCount" ); __calGetVersion = (VER)GetProcAddress(callib, "calGetVersion" ); __calDeviceGetInfo = (INFO)GetProcAddress(callib, "calDeviceGetInfo" ); __calDeviceGetAttribs =(ATTRIBS)GetProcAddress(callib, "calDeviceGetAttribs" ); __calShutdown = (CLOSE)GetProcAddress(callib, "calShutdown" ); #else void* callib; int (*__calInit)(); int (*__calGetVersion)(CALuint*, CALuint*, CALuint*); int (*__calDeviceGetCount)(CALuint*); int (*__calDeviceGetAttribs)(CALdeviceattribs*, CALuint); int (*__calDeviceGetInfo)(CALdeviceinfo*, CALuint); int (*__calShutdown)(); callib = dlopen("libcal.so", RTLD_NOW); if (!callib) { strings.push_back("Can't load library libcal.so"); return; } __calInit = (int(*)()) dlsym(callib, "calInit"); __calGetVersion = (int(*)(CALuint*, CALuint*, CALuint*)) dlsym(callib, "calGetVersion"); __calDeviceGetCount = (int(*)(CALuint*)) dlsym(callib, "calDeviceGetCount"); __calDeviceGetAttribs = (int(*)(CALdeviceattribs*, CALuint)) dlsym(callib, "calDeviceGetAttribs"); __calDeviceGetInfo = (int(*)(CALdeviceinfo*, CALuint)) dlsym(callib, "calDeviceGetInfo"); __calShutdown = (int(*)()) dlsym(callib, "calShutdown"); #endif (*__calInit)(); (*__calDeviceGetCount)(&numDevices); (*__calGetVersion)(&cal_major,&cal_minor,&cal_imp); if (!numDevices) { strings.push_back("No usable CAL devices found"); return; } COPROC_ATI cc, cc2; string s, gpu_name; vector gpus; for (CALuint i=0; i best.flops()) { best = gpus[i]; } gpus[i].description(buf); sprintf(buf2, "ATI GPU %d: %s", gpus[i].device_num, buf); strings.push_back(buf2); } for (unsigned int i=0; iversion, "%d.%d.%d", cal_major, cal_minor, cal_imp); strcpy(ccp->type, "ATI"); ccp->count = numDevices; coprocs.coprocs.push_back(ccp); __calShutdown(); } #ifndef _USING_FCGI_ void COPROC_ATI::write_xml(MIOFILE& f) { f.printf( "\n" " %d\n" " %s\n" " %d\n" " %d\n" " %d\n" " %u\n" " %d\n" " %d\n" " %d\n" " %d\n" " %d\n" " %d\n" " %s\n" "\n", count, name, attribs.localRAM, attribs.uncachedRemoteRAM, attribs.cachedRemoteRAM, attribs.engineClock, attribs.memoryClock, attribs.wavefrontSize, attribs.numberOfSIMD, attribs.doublePrecision, attribs.pitch_alignment, attribs.surface_alignment, version ); }; #endif void COPROC_ATI::clear() { count = 0; strcpy(name, ""); strcpy(version, ""); attribs.localRAM = 0; attribs.uncachedRemoteRAM = 0; attribs.cachedRemoteRAM = 0; attribs.engineClock = 0; attribs.memoryClock = 0; attribs.wavefrontSize = 0; attribs.numberOfSIMD = 0; attribs.doublePrecision = CAL_FALSE; attribs.pitch_alignment = 0; attribs.surface_alignment = 0; } int COPROC_ATI::parse(FILE* fin) { char buf[1024]; int n; clear(); while (fgets(buf, sizeof(buf), fin)) { if (strstr(buf, "")) return 0; if (parse_int(buf, "", count)) continue; if (parse_str(buf, "", name, sizeof(name))) continue; if (parse_int(buf, "", n)) { attribs.localRAM = n; continue; } if (parse_int(buf, "", n)) { attribs.uncachedRemoteRAM = n; continue; } if (parse_int(buf, "", n)) { attribs.cachedRemoteRAM = n; continue; } if (parse_int(buf, "", n)) { attribs.engineClock = n; continue; } if (parse_int(buf, "", n)) { attribs.memoryClock = n; continue; } if (parse_int(buf, "", n)) { attribs.wavefrontSize = n; continue; } if (parse_int(buf, "" , n)) { attribs.numberOfSIMD = n; continue; } if (parse_int(buf, "", n)) { attribs.doublePrecision = n?CAL_TRUE:CAL_FALSE; continue; } if (parse_int(buf, "", n)) { attribs.pitch_alignment = n; continue; } if (parse_int(buf, "", n)) { attribs.surface_alignment = n; continue; } if (parse_str(buf, "", version, sizeof(version))) continue; } return ERR_XML_PARSE; } void COPROC_ATI::description(char* buf) { sprintf(buf, "%s (CAL version %s, %.0fMB, %.0fGFLOPS)", name, version, attribs.localRAM/1024.*1024., flops()/1.e9 ); }