// This file is part of BOINC. // http://boinc.berkeley.edu // Copyright (C) 2008 University of California // // BOINC is free software; you can redistribute it and/or modify it // under the terms of the GNU Lesser General Public License // as published by the Free Software Foundation, // either version 3 of the License, or (at your option) any later version. // // BOINC is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // See the GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License // along with BOINC. If not, see . #ifndef _USING_FCGI_ #include "boinc_fcgi.h" #else #include #endif #include #include #ifdef _WIN32 #ifndef SIM #include #endif #else #ifdef __APPLE__ // Suppress obsolete warning when building for OS 10.3.9 #define DLOPEN_NO_WARN #endif #include #endif #include "error_numbers.h" #include "filesys.h" #include "parse.h" #include "str_util.h" #include "coproc.h" using std::string; using std::vector; #ifndef _USING_FCGI_ using std::perror; #endif #ifndef _USING_FCGI_ void COPROC::write_xml(MIOFILE& f) { f.printf( "\n" " %s\n" " %d\n" "\n", type, count ); } #endif int COPROC::parse(MIOFILE& fin) { char buf[1024]; strcpy(type, ""); count = 0; used = 0; req_secs = 0; estimated_delay = 0; req_instances = 0; while (fin.fgets(buf, sizeof(buf))) { if (match_tag(buf, "")) { if (!strlen(type)) return ERR_XML_PARSE; return 0; } if (parse_str(buf, "", type, sizeof(type))) continue; if (parse_int(buf, "", count)) continue; if (parse_double(buf, "", req_secs)) continue; if (parse_int(buf, "", req_instances)) continue; if (parse_double(buf, "", estimated_delay)) continue; } return ERR_XML_PARSE; } void COPROCS::summary_string(char* buf, int len) { char bigbuf[8192], buf2[1024]; strcpy(bigbuf, ""); for (unsigned int i=0; itype, "CUDA")) { COPROC_CUDA* cp2 = (COPROC_CUDA*) cp; int mem = (int)(cp2->prop.dtotalGlobalMem/MEGA); sprintf(buf2, "[CUDA|%s|%d|%dMB|%d]", cp2->prop.name, cp2->count, mem, cp2->drvVersion ); strcat(bigbuf, buf2); } } bigbuf[len-1] = 0; strcpy(buf, bigbuf); } vector COPROCS::get() { vector strings; string s = COPROC_CUDA::get(*this); if (s.size()) strings.push_back(s); s = COPROC_CELL_SPE::get(*this); if (s.size()) strings.push_back(s); return strings; } // used only to parse scheduler request messages // int COPROCS::parse(FILE* fin) { char buf[1024]; while (fgets(buf, sizeof(buf), fin)) { if (match_tag(buf, "")) { return 0; } if (strstr(buf, "")) { COPROC_CUDA* cc = new COPROC_CUDA; int retval = cc->parse(fin); if (!retval) { coprocs.push_back(cc); } } } return ERR_XML_PARSE; } COPROC* COPROCS::lookup(const char* type) { for (unsigned int i=0; itype)) return cp; } return NULL; } #ifdef _WIN32 #endif string COPROC_CUDA::get(COPROCS& coprocs) { int count; #ifdef _WIN32 typedef int (__stdcall *PCGDC)(int *count); typedef int (__stdcall *PCGDP)(struct cudaDeviceProp *prop, int device); PCGDC __cudaGetDeviceCount = NULL; PCGDP __cudaGetDeviceProperties = NULL; HMODULE cudalib = LoadLibrary("cudart.dll"); if (!cudalib) { return "Can't load library cudart.dll"; } __cudaGetDeviceCount = (PCGDC)GetProcAddress( cudalib, "cudaGetDeviceCount" ); if (!__cudaGetDeviceCount) { return "Library doesn't have cudaGetDeviceCount()"; } __cudaGetDeviceProperties = (PCGDP)GetProcAddress( cudalib, "cudaGetDeviceProperties" ); if (!__cudaGetDeviceProperties) { return "Library doesn't have cudaGetDeviceProperties()"; } #ifndef SIM NvAPI_Status nvapiStatus; NvDisplayHandle hDisplay; NV_DISPLAY_DRIVER_VERSION Version; memset(&Version, 0, sizeof(Version)); Version.version = NV_DISPLAY_DRIVER_VERSION_VER; NvAPI_Initialize(); for (int i=0; ; i++) { nvapiStatus = NvAPI_EnumNvidiaDisplayHandle(i, &hDisplay); if (nvapiStatus != NVAPI_OK) break; nvapiStatus = NvAPI_GetDisplayDriverVersion(hDisplay, &Version); if (nvapiStatus == NVAPI_OK) break; } #endif #else void* cudalib; void (*__cudaGetDeviceCount)(int*); void (*__cudaGetDeviceProperties)(cudaDeviceProp*, int); #ifdef __APPLE__ cudalib = dlopen("/usr/local/cuda/lib/libcudart.dylib", RTLD_NOW); #else // libcudart.so is included with the BOINC install for linux, // so look for it in the current dir. // cudalib = dlopen("./libcudart.so", RTLD_NOW); if (!cudalib) { // If that fails, look for it in the library search path // cudalib = dlopen("libcudart.so", RTLD_NOW); } #endif if (!cudalib) { return "Can't load library libcudart"; #ifdef _USING_FCGI_ FCGI::perror("dlopen"); #else std::perror("dlopen"); #endif } __cudaGetDeviceCount = (void(*)(int*)) dlsym(cudalib, "cudaGetDeviceCount"); if(!__cudaGetDeviceCount) { return "Library doesn't have cudaGetDeviceCount()"; } __cudaGetDeviceProperties = (void(*)(cudaDeviceProp*, int)) dlsym( cudalib, "cudaGetDeviceProperties" ); if (!__cudaGetDeviceProperties) { return "Library doesn't have cudaGetDeviceProperties()"; } #endif // NOTE: our design is slightly flawed: // there's no provision for having two coprocs of type CUDA. // So on systems with two GPUs of different hardware types // we have to count them as two of the same type. // Pick the fastest. // (*__cudaGetDeviceCount)(&count); int real_count = 0; COPROC_CUDA cc, cc2; string s; for (int i=0; i 100) continue; // e.g. 9999 is an error #if defined(_WIN32) && !defined(SIM) cc.drvVersion = Version.drvVersion; #else cc.drvVersion = 0; #endif cc.description(buf); if (real_count) { if (cc.flops_estimate() > cc2.flops_estimate()) { cc2 = cc; } s += ", "; s += buf; } else { s = buf; cc2 = cc; } real_count++; } if (!real_count) { return "No CUDA devices found"; } COPROC_CUDA* ccp = new COPROC_CUDA; *ccp = cc2; ccp->count = real_count; strcpy(ccp->type, "CUDA"); coprocs.coprocs.push_back(ccp); if (real_count == 1) { return "CUDA device: "+s; } else { return "CUDA devices: "+s; } } void COPROC_CUDA::description(char* buf) { sprintf(buf, "%s (driver version %d, CUDA version %d.%d, %.0fMB, est. %.0fGFLOPS)", prop.name, drvVersion, prop.major, prop.minor, prop.totalGlobalMem/(1024.*1024.), flops_estimate()/1e9 ); } // add a non-existent CUDA coproc (for debugging) // void fake_cuda(COPROCS& coprocs, int count) { COPROC_CUDA* cc = new COPROC_CUDA; strcpy(cc->type, "CUDA"); cc->count = count; strcpy(cc->prop.name, "CUDA NVIDIA chip"); cc->prop.totalGlobalMem = 256*1024*1024; cc->prop.sharedMemPerBlock = 100; cc->prop.regsPerBlock = 8; cc->prop.warpSize = 10; cc->prop.memPitch = 10; cc->prop.maxThreadsPerBlock = 20; cc->prop.maxThreadsDim[0] = 2; cc->prop.maxThreadsDim[1] = 2; cc->prop.maxThreadsDim[2] = 2; cc->prop.maxGridSize[0] = 10; cc->prop.maxGridSize[1] = 10; cc->prop.maxGridSize[2] = 10; cc->prop.totalConstMem = 10; cc->prop.major = 1; cc->prop.minor = 2; cc->prop.clockRate = 1250000; cc->prop.textureAlignment = 1000; cc->prop.multiProcessorCount = 14; coprocs.coprocs.push_back(cc); } #ifndef _USING_FCGI_ void COPROC_CUDA::write_xml(MIOFILE& f) { f.printf( "\n" " %d\n" " %s\n" " %f\n" " %d\n" " %f\n" " %d\n" " %u\n" " %u\n" " %d\n" " %d\n" " %u\n" " %d\n" " %d %d %d\n" " %d %d %d\n" " %u\n" " %d\n" " %d\n" " %d\n" " %u\n" " %d\n" " %d\n" "\n", count, prop.name, req_secs, req_instances, estimated_delay, drvVersion, (unsigned int)prop.totalGlobalMem, (unsigned int)prop.sharedMemPerBlock, prop.regsPerBlock, prop.warpSize, (unsigned int)prop.memPitch, prop.maxThreadsPerBlock, prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2], prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2], (unsigned int)prop.totalConstMem, prop.major, prop.minor, prop.clockRate, (unsigned int)prop.textureAlignment, prop.deviceOverlap, prop.multiProcessorCount ); } #endif void COPROC_CUDA::clear() { count = 0; used = 0; req_secs = 0; req_instances = 0; estimated_delay = -1; // mark as absent strcpy(prop.name, ""); prop.totalGlobalMem = 0; prop.sharedMemPerBlock = 0; prop.regsPerBlock = 0; prop.warpSize = 0; prop.memPitch = 0; prop.maxThreadsPerBlock = 0; prop.maxThreadsDim[0] = 0; prop.maxThreadsDim[1] = 0; prop.maxThreadsDim[2] = 0; prop.maxGridSize[0] = 0; prop.maxGridSize[1] = 0; prop.maxGridSize[2] = 0; prop.clockRate = 0; prop.totalConstMem = 0; prop.major = 0; prop.minor = 0; prop.textureAlignment = 0; prop.deviceOverlap = 0; prop.multiProcessorCount = 0; } int COPROC_CUDA::parse(FILE* fin) { char buf[1024], buf2[256]; clear(); while (fgets(buf, sizeof(buf), fin)) { if (strstr(buf, "")) { return 0; } if (parse_int(buf, "", count)) continue; if (parse_double(buf, "", req_secs)) continue; if (parse_int(buf, "", req_instances)) continue; if (parse_double(buf, "", estimated_delay)) continue; if (parse_str(buf, "", prop.name, sizeof(prop.name))) continue; if (parse_int(buf, "", drvVersion)) continue; if (parse_double(buf, "", prop.dtotalGlobalMem)) continue; if (parse_int(buf, "", (int&)prop.sharedMemPerBlock)) continue; if (parse_int(buf, "", prop.regsPerBlock)) continue; if (parse_int(buf, "", prop.warpSize)) continue; if (parse_int(buf, "", (int&)prop.memPitch)) continue; if (parse_int(buf, "", prop.maxThreadsPerBlock)) continue; if (parse_str(buf, "", buf2, sizeof(buf2))) { // can't use sscanf here (FCGI) // prop.maxThreadsDim[0] = atoi(buf2); char* p = strchr(buf2, ' '); if (p) { p++; prop.maxThreadsDim[1] = atoi(p); p = strchr(p, ' '); if (p) { p++; prop.maxThreadsDim[2] = atoi(p); } } continue; } if (parse_str(buf, "", buf2, sizeof(buf2))) { prop.maxGridSize[0] = atoi(buf2); char* p = strchr(buf2, ' '); if (p) { p++; prop.maxGridSize[1] = atoi(p); p = strchr(p, ' '); if (p) { p++; prop.maxGridSize[2] = atoi(p); } } continue; } if (parse_int(buf, "", prop.clockRate)) continue; if (parse_int(buf, "", (int&)prop.totalConstMem)) continue; if (parse_int(buf, "", prop.major)) continue; if (parse_int(buf, "", prop.minor)) continue; if (parse_int(buf, "", (int&)prop.textureAlignment)) continue; if (parse_int(buf, "", prop.deviceOverlap)) continue; if (parse_int(buf, "", prop.multiProcessorCount)) continue; } return ERR_XML_PARSE; } string COPROC_CELL_SPE::get(COPROCS&) { return ""; }