mirror of https://github.com/BOINC/boinc.git
svn path=/trunk/boinc/; revision=18770
This commit is contained in:
parent
ff5d082e86
commit
94e75fd4b1
|
@ -93,7 +93,7 @@ void COPROCS::summary_string(char* buf, int len) {
|
|||
COPROC_CUDA* cp2 = (COPROC_CUDA*) cp;
|
||||
int mem = (int)(cp2->prop.dtotalGlobalMem/MEGA);
|
||||
sprintf(buf2, "[CUDA|%s|%d|%dMB|%d]",
|
||||
cp2->prop.name, cp2->count, mem, cp2->drvVersion
|
||||
cp2->prop.name, cp2->count, mem, cp2->display_driver_version
|
||||
);
|
||||
strcat(bigbuf, buf2);
|
||||
}
|
||||
|
@ -149,8 +149,8 @@ int cuda_compare(COPROC_CUDA& c1, COPROC_CUDA& c2, bool loose) {
|
|||
if (c1.prop.major < c2.prop.major) return -1;
|
||||
if (c1.prop.minor > c2.prop.minor) return 1;
|
||||
if (c1.prop.minor < c2.prop.minor) return -1;
|
||||
if (c1.drvVersion > c2.drvVersion) return 1;
|
||||
if (c1.drvVersion < c2.drvVersion) return -1;
|
||||
if (c1.cuda_version > c2.cuda_version) return 1;
|
||||
if (c1.cuda_version < c2.cuda_version) return -1;
|
||||
if (loose) {
|
||||
if (c1.prop.totalGlobalMem > 1.4*c2.prop.totalGlobalMem) return 1;
|
||||
if (c1.prop.totalGlobalMem < .7* c2.prop.totalGlobalMem) return -1;
|
||||
|
@ -169,17 +169,29 @@ void COPROC_CUDA::get(
|
|||
COPROCS& coprocs, vector<string>& strings,
|
||||
bool use_all // if false, use only those equivalent to most capable
|
||||
) {
|
||||
int count;
|
||||
int count, retval;
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
typedef int (__stdcall *PCGDC)(int *count);
|
||||
typedef int (__stdcall *PCGDP)(struct cudaDeviceProp *prop, int device);
|
||||
typedef int (__stdcall *PCGDV)(int* version);
|
||||
typedef int (__stdcall *PCGDI)(int);
|
||||
typedef int (__stdcall *PCGDG)(int*, int);
|
||||
typedef int (__stdcall *PCGDA)(int*, int, int);
|
||||
typedef int (__stdcall *PCGDN)(char*, int, int);
|
||||
typedef int (__stdcall *PCGDM)(unsigned int*, int);
|
||||
typedef int (__stdcall *PCGDCC)(int*, int*, int);
|
||||
|
||||
PCGDC __cudaGetDeviceCount = NULL;
|
||||
PCGDP __cudaGetDeviceProperties = NULL;
|
||||
PCGDV __cudaGetDriverVersion = NULL;
|
||||
PCGDI __cudaInit = NULL;
|
||||
PCGDG __cuDeviceGet = NULL;
|
||||
PCGDA __cuDeviceGetAttribute = NULL;
|
||||
PCGDN __cuDeviceGetName = NULL;
|
||||
PCGDM __cuDeviceTotalMem = NULL;
|
||||
PCGDCC __cuDeviceComputeCapability = NULL;
|
||||
|
||||
HMODULE cudalib = LoadLibrary("nvcuda.dll");
|
||||
if (!cudalib) {
|
||||
|
@ -204,7 +216,28 @@ void COPROC_CUDA::get(
|
|||
strings.push_back("Library doesn't have cuDriverGetVersion()");
|
||||
return;
|
||||
}
|
||||
__cudaInit = (PCGDI)GetProcAddress(cudalib, "cuInit" );
|
||||
__cuDeviceGet = (PCGDG)GetProcAddress(cudalib, "cuDeviceGet" );
|
||||
__cuDeviceGetAttribute = (PCGDA)GetProcAddress(cudalib, "cuDeviceGetAttribute" );
|
||||
__cuDeviceGetName = (PCGDN)GetProcAddress(cudalib, "cuDeviceGetName" );
|
||||
__cuDeviceTotalMem = (PCGDM)GetProcAddress(cudalib, "cuDeviceTotalMem" );
|
||||
__cuDeviceComputeCapability = (PCGDCC)GetProcAddress(cudalib, "cuDeviceComputeCapability" );
|
||||
|
||||
#ifndef SIM
|
||||
NvAPI_Status nvapiStatus;
|
||||
NvDisplayHandle hDisplay;
|
||||
NV_DISPLAY_DRIVER_VERSION Version;
|
||||
memset(&Version, 0, sizeof(Version));
|
||||
Version.version = NV_DISPLAY_DRIVER_VERSION_VER;
|
||||
|
||||
NvAPI_Initialize();
|
||||
for (int i=0; ; i++) {
|
||||
nvapiStatus = NvAPI_EnumNvidiaDisplayHandle(i, &hDisplay);
|
||||
if (nvapiStatus != NVAPI_OK) break;
|
||||
nvapiStatus = NvAPI_GetDisplayDriverVersion(hDisplay, &Version);
|
||||
if (nvapiStatus == NVAPI_OK) break;
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
void* cudalib;
|
||||
void (*__cudaGetDeviceCount)(int*);
|
||||
|
@ -237,20 +270,50 @@ void COPROC_CUDA::get(
|
|||
}
|
||||
#endif
|
||||
|
||||
int driver_version;
|
||||
(*__cudaGetDriverVersion)(&driver_version);
|
||||
|
||||
retval = (*__cudaInit)(0);
|
||||
|
||||
int cuda_version;
|
||||
retval = (*__cudaGetDriverVersion)(&cuda_version);
|
||||
|
||||
vector<COPROC_CUDA> gpus;
|
||||
(*__cudaGetDeviceCount)(&count);
|
||||
retval = (*__cudaGetDeviceCount)(&count);
|
||||
int j;
|
||||
unsigned int i;
|
||||
COPROC_CUDA cc;
|
||||
string s;
|
||||
for (j=0; j<count; j++) {
|
||||
(*__cudaGetDeviceProperties)(&cc.prop, j);
|
||||
memset(&cc.prop, 0, sizeof(cc.prop));
|
||||
int device;
|
||||
retval = (*__cuDeviceGet)(&device, j);
|
||||
(*__cuDeviceGetName)(cc.prop.name, 256, device);
|
||||
(*__cuDeviceComputeCapability)(&cc.prop.major, &cc.prop.minor, device);
|
||||
(*__cuDeviceTotalMem)(&cc.prop.totalGlobalMem, device);
|
||||
(*__cuDeviceGetAttribute)(&cc.prop.sharedMemPerBlock, CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK, device);
|
||||
(*__cuDeviceGetAttribute)(&cc.prop.regsPerBlock, CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK, device);
|
||||
(*__cuDeviceGetAttribute)(&cc.prop.warpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE, device);
|
||||
(*__cuDeviceGetAttribute)(&cc.prop.memPitch, CU_DEVICE_ATTRIBUTE_MAX_PITCH, device);
|
||||
retval = (*__cuDeviceGetAttribute)(&cc.prop.maxThreadsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, device);
|
||||
retval = (*__cuDeviceGetAttribute)(&cc.prop.maxThreadsDim[0], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, device);
|
||||
(*__cuDeviceGetAttribute)(&cc.prop.maxThreadsDim[1], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, device);
|
||||
(*__cuDeviceGetAttribute)(&cc.prop.maxThreadsDim[2], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, device);
|
||||
(*__cuDeviceGetAttribute)(&cc.prop.maxGridSize[0], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, device);
|
||||
(*__cuDeviceGetAttribute)(&cc.prop.maxGridSize[1], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, device);
|
||||
(*__cuDeviceGetAttribute)(&cc.prop.maxGridSize[2], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, device);
|
||||
(*__cuDeviceGetAttribute)(&cc.prop.clockRate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, device);
|
||||
(*__cuDeviceGetAttribute)(&cc.prop.totalConstMem, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, device);
|
||||
(*__cuDeviceGetAttribute)(&cc.prop.textureAlignment, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, device);
|
||||
(*__cuDeviceGetAttribute)(&cc.prop.deviceOverlap, CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, device);
|
||||
retval = (*__cuDeviceGetAttribute)(&cc.prop.multiProcessorCount, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device);
|
||||
//retval = (*__cudaGetDeviceProperties)(&cc.prop, device);
|
||||
if (cc.prop.major <= 0) continue; // major == 0 means emulation
|
||||
if (cc.prop.major > 100) continue; // e.g. 9999 is an error
|
||||
cc.drvVersion = driver_version;
|
||||
#if defined(_WIN32) && !defined(SIM)
|
||||
cc.display_driver_version = Version.drvVersion;
|
||||
#else
|
||||
cc.display_driver_version = 0;
|
||||
#endif
|
||||
cc.cuda_version = cuda_version;
|
||||
cc.device_num = j;
|
||||
gpus.push_back(cc);
|
||||
}
|
||||
|
@ -294,8 +357,8 @@ void COPROC_CUDA::get(
|
|||
}
|
||||
|
||||
void COPROC_CUDA::description(char* buf) {
|
||||
sprintf(buf, "%s (driver version %d, compute capability %d.%d, %.0fMB, est. %.0fGFLOPS)",
|
||||
prop.name, drvVersion, prop.major, prop.minor, prop.totalGlobalMem/(1024.*1024.), flops_estimate()/1e9
|
||||
sprintf(buf, "%s (driver version %d, CUDA version %d, compute capability %d.%d, %.0fMB, est. %.0fGFLOPS)",
|
||||
prop.name, display_driver_version, cuda_version, prop.major, prop.minor, prop.totalGlobalMem/(1024.*1024.), flops_estimate()/1e9
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -337,6 +400,7 @@ void COPROC_CUDA::write_xml(MIOFILE& f) {
|
|||
" <req_instances>%d</req_instances>\n"
|
||||
" <estimated_delay>%f</estimated_delay>\n"
|
||||
" <drvVersion>%d</drvVersion>\n"
|
||||
" <cudaVersion>%d</cudaVersion>\n"
|
||||
" <totalGlobalMem>%u</totalGlobalMem>\n"
|
||||
" <sharedMemPerBlock>%u</sharedMemPerBlock>\n"
|
||||
" <regsPerBlock>%d</regsPerBlock>\n"
|
||||
|
@ -358,7 +422,8 @@ void COPROC_CUDA::write_xml(MIOFILE& f) {
|
|||
req_secs,
|
||||
req_instances,
|
||||
estimated_delay,
|
||||
drvVersion,
|
||||
display_driver_version,
|
||||
cuda_version,
|
||||
(unsigned int)prop.totalGlobalMem,
|
||||
(unsigned int)prop.sharedMemPerBlock,
|
||||
prop.regsPerBlock,
|
||||
|
@ -384,6 +449,8 @@ void COPROC_CUDA::clear() {
|
|||
req_secs = 0;
|
||||
req_instances = 0;
|
||||
estimated_delay = -1; // mark as absent
|
||||
cuda_version = 0;
|
||||
display_driver_version = 0;
|
||||
strcpy(prop.name, "");
|
||||
prop.totalGlobalMem = 0;
|
||||
prop.sharedMemPerBlock = 0;
|
||||
|
@ -419,7 +486,8 @@ int COPROC_CUDA::parse(FILE* fin) {
|
|||
if (parse_int(buf, "<req_instances>", req_instances)) continue;
|
||||
if (parse_double(buf, "<estimated_delay>", estimated_delay)) continue;
|
||||
if (parse_str(buf, "<name>", prop.name, sizeof(prop.name))) continue;
|
||||
if (parse_int(buf, "<drvVersion>", drvVersion)) continue;
|
||||
if (parse_int(buf, "<drvVersion>", display_driver_version)) continue;
|
||||
if (parse_int(buf, "<cudaVersion>", cuda_version)) continue;
|
||||
if (parse_double(buf, "<totalGlobalMem>", prop.dtotalGlobalMem)) continue;
|
||||
if (parse_int(buf, "<sharedMemPerBlock>", (int&)prop.sharedMemPerBlock)) continue;
|
||||
if (parse_int(buf, "<regsPerBlock>", prop.regsPerBlock)) continue;
|
||||
|
|
37
lib/coproc.h
37
lib/coproc.h
|
@ -174,29 +174,29 @@ struct COPROCS {
|
|||
//
|
||||
struct cudaDeviceProp {
|
||||
char name[256];
|
||||
size_t totalGlobalMem;
|
||||
unsigned int totalGlobalMem;
|
||||
// not used on the server; dtotalGlobalMem is used instead
|
||||
// (since some boards have >= 4GB)
|
||||
size_t sharedMemPerBlock;
|
||||
int sharedMemPerBlock;
|
||||
int regsPerBlock;
|
||||
int warpSize;
|
||||
size_t memPitch;
|
||||
int memPitch;
|
||||
int maxThreadsPerBlock;
|
||||
int maxThreadsDim[3];
|
||||
int maxGridSize[3];
|
||||
int clockRate;
|
||||
size_t totalConstMem;
|
||||
int totalConstMem;
|
||||
int major;
|
||||
int minor;
|
||||
size_t textureAlignment;
|
||||
int textureAlignment;
|
||||
int deviceOverlap;
|
||||
int multiProcessorCount;
|
||||
int __cudaReserved[40];
|
||||
double dtotalGlobalMem; // not defined in client
|
||||
};
|
||||
|
||||
struct COPROC_CUDA : public COPROC {
|
||||
int drvVersion; // display driver version, obtained from NVAPI
|
||||
int cuda_version; // CUDA runtime version
|
||||
int display_driver_version;
|
||||
cudaDeviceProp prop;
|
||||
|
||||
#ifndef _USING_FCGI_
|
||||
|
@ -222,4 +222,27 @@ struct COPROC_CUDA : public COPROC {
|
|||
|
||||
void fake_cuda(COPROCS&, int);
|
||||
|
||||
enum CUdevice_attribute_enum {
|
||||
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,
|
||||
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,
|
||||
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3,
|
||||
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4,
|
||||
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,
|
||||
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6,
|
||||
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7,
|
||||
CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8,
|
||||
CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9,
|
||||
CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,
|
||||
CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11,
|
||||
CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12,
|
||||
CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,
|
||||
CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,
|
||||
CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,
|
||||
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,
|
||||
CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17,
|
||||
CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,
|
||||
CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,
|
||||
CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue