From 631e236b0887fdc0baa0060773cd7978b366b143 Mon Sep 17 00:00:00 2001 From: Charlie Fenton Date: Thu, 27 Jun 2013 02:36:20 -0700 Subject: [PATCH] client: tweaks to code for detecting GPUs via a child process. Added safety features requested by Rom Walton: * Change COPROC_ATI::get_available_ram and COPROC_NVIDIA::get_available_ram to static routines to prevent calling them without first loading CAL or CUDA libraries. * Add tests for NULL library calls in these routines. * Add comments warning about need to call from a separate child process on dual-GPU laptops, proper library initialization, etc. --- client/gpu_amd.cpp | 56 +++++++++++++++++++---------- client/gpu_nvidia.cpp | 84 ++++++++++++++++++++++++++++++++----------- lib/coproc.h | 3 -- 3 files changed, 100 insertions(+), 43 deletions(-) diff --git a/client/gpu_amd.cpp b/client/gpu_amd.cpp index 0e2506b5ae..05a851ea02 100644 --- a/client/gpu_amd.cpp +++ b/client/gpu_amd.cpp @@ -49,6 +49,8 @@ using std::string; #include "client_msgs.h" #include "gpu_detect.h" +static void get_available_ati_ram(COPROC_ATI &cc, vector& warnings); + // criteria: // // - double precision support @@ -196,18 +198,6 @@ void COPROC_ATI::get( warnings.push_back("calDeviceGetInfo() missing from CAL library"); return; } - if (!__calDeviceGetStatus) { - warnings.push_back("calDeviceGetStatus() missing from CAL library"); - return; - } - if (!__calDeviceOpen) { - warnings.push_back("calDeviceOpen() missing from CAL library"); - return; - } - if (!__calDeviceClose) { - warnings.push_back("calDeviceClose() missing from CAL library"); - return; - } retval = (*__calInit)(); if (retval != CAL_RESULT_OK) { @@ -375,7 +365,7 @@ void COPROC_ATI::get( cc.atirt_detected = atirt_detected; cc.device_num = i; cc.set_peak_flops(); - cc.get_available_ram(warnings); + get_available_ati_ram(cc, warnings); ati_gpus.push_back(cc); } @@ -431,20 +421,48 @@ void COPROC_ATI::correlate( // get available RAM of ATI GPU // -void COPROC_ATI::get_available_ram(vector& warnings) { +// CAUTION: as currently written, this method should be +// called only from COPROC_ATI::get(). If in the future +// you wish to call it from additional places: +// * It must be called from a separate child process on +// dual-GPU laptops (e.g., Macbook Pros) with the results +// communicated to the main client process via IPC or a +// temp file. See the comments about dual-GPU laptops +// in gpu_detect.cpp and main.cpp for more details. +// * The CAL library must be loaded and calInit() called +// first. +// * See client/coproc_detect.cpp and cpu_sched.cpp in +// BOINC 6.12.36 for an earlier attempt to call this +// from the scheduler. Note that it was abandoned +// due to repeated calls crashing the driver. +// +static void get_available_ati_ram(COPROC_ATI &cc, vector& warnings) { CALdevicestatus st; CALdevice dev; char buf[256]; int retval; - available_ram = attribs.localRAM*MEGA; + cc.available_ram = cc.attribs.localRAM*MEGA; st.struct_size = sizeof(CALdevicestatus); - retval = (*__calDeviceOpen)(&dev, device_num); + if (!__calDeviceOpen) { + warnings.push_back("calDeviceOpen() missing from CAL library"); + return; + } + if (!__calDeviceGetStatus) { + warnings.push_back("calDeviceGetStatus() missing from CAL library"); + return; + } + if (!__calDeviceClose) { + warnings.push_back("calDeviceClose() missing from CAL library"); + return; + } + + retval = (*__calDeviceOpen)(&dev, cc.device_num); if (retval) { snprintf(buf, sizeof(buf), - "[coproc] calDeviceOpen(%d) returned %d", device_num, retval + "[coproc] calDeviceOpen(%d) returned %d", cc.device_num, retval ); warnings.push_back(buf); return; @@ -453,12 +471,12 @@ void COPROC_ATI::get_available_ram(vector& warnings) { if (retval) { snprintf(buf, sizeof(buf), "[coproc] calDeviceGetStatus(%d) returned %d", - device_num, retval + cc.device_num, retval ); warnings.push_back(buf); (*__calDeviceClose)(dev); return; } - available_ram = st.availLocalRAM*MEGA; + cc.available_ram = st.availLocalRAM*MEGA; (*__calDeviceClose)(dev); } diff --git a/client/gpu_nvidia.cpp b/client/gpu_nvidia.cpp index 95cce53856..f279f525c5 100644 --- a/client/gpu_nvidia.cpp +++ b/client/gpu_nvidia.cpp @@ -45,6 +45,8 @@ using std::string; #include "client_msgs.h" #include "gpu_detect.h" +static void get_available_nvidia_ram(COPROC_NVIDIA &cc, vector& warnings); + // return 1/-1/0 if device 1 is more/less/same capable than device 2. // factors (decreasing priority): // - compute capability @@ -237,14 +239,6 @@ void COPROC_NVIDIA::get( warnings.push_back("cuDeviceComputeCapability() missing from NVIDIA library"); return; } - if (!__cuCtxCreate) { - warnings.push_back("cuCtxCreate() missing from NVIDIA library"); - return; - } - if (!__cuCtxDestroy) { - warnings.push_back("cuCtxDestroy() missing from NVIDIA library"); - return; - } if (!__cuMemAlloc) { warnings.push_back("cuMemAlloc() missing from NVIDIA library"); return; @@ -253,10 +247,6 @@ void COPROC_NVIDIA::get( warnings.push_back("cuMemFree() missing from NVIDIA library"); return; } - if (!__cuMemGetInfo) { - warnings.push_back("cuMemGetInfo() missing from NVIDIA library"); - return; - } retval = (*__cuInit)(0); if (retval) { @@ -340,7 +330,7 @@ void COPROC_NVIDIA::get( cc.cuda_version = cuda_version; cc.device_num = j; cc.set_peak_flops(); - cc.get_available_ram(warnings); + get_available_nvidia_ram(cc, warnings); nvidia_gpus.push_back(cc); } if (!nvidia_gpus.size()) { @@ -390,18 +380,50 @@ void COPROC_NVIDIA::correlate( // See how much RAM is available on this GPU. // -void COPROC_NVIDIA::get_available_ram(vector& warnings) { +// CAUTION: as currently written, this method should be +// called only from COPROC_NVIDIA::get(). If in the +// future you wish to call it from additional places: +// * It must be called from a separate child process on +// dual-GPU laptops (e.g., Macbook Pros) with the results +// communicated to the main client process via IPC or a +// temp file. See the comments about dual-GPU laptops +// in gpu_detect.cpp and main.cpp for more details. +// * The CUDA library must be loaded and cuInit() called +// first. +// * See client/coproc_detect.cpp and cpu_sched.cpp in +// BOINC 6.12.36 for an earlier attempt to call this +// from the scheduler. Note that it was abandoned +// due to repeated calls crashing the driver. +// +static void get_available_nvidia_ram(COPROC_NVIDIA &cc, vector& warnings) { int retval; size_t memfree = 0, memtotal = 0; int device; void* ctx; char buf[256]; - available_ram = prop.totalGlobalMem; - retval = (*__cuDeviceGet)(&device, device_num); + cc.available_ram = cc.prop.totalGlobalMem; + if (!__cuDeviceGet) { + warnings.push_back("cuDeviceGet() missing from NVIDIA library"); + return; + } + if (!__cuCtxCreate) { + warnings.push_back("cuCtxCreate() missing from NVIDIA library"); + return; + } + if (!__cuCtxDestroy) { + warnings.push_back("cuCtxDestroy() missing from NVIDIA library"); + return; + } + if (!__cuMemGetInfo) { + warnings.push_back("cuMemGetInfo() missing from NVIDIA library"); + return; + } + + retval = (*__cuDeviceGet)(&device, cc.device_num); if (retval) { snprintf(buf, sizeof(buf), - "[coproc] cuDeviceGet(%d) returned %d", device_num, retval + "[coproc] cuDeviceGet(%d) returned %d", cc.device_num, retval ); warnings.push_back(buf); return; @@ -409,7 +431,7 @@ void COPROC_NVIDIA::get_available_ram(vector& warnings) { retval = (*__cuCtxCreate)(&ctx, 0, device); if (retval) { snprintf(buf, sizeof(buf), - "[coproc] cuCtxCreate(%d) returned %d", device_num, retval + "[coproc] cuCtxCreate(%d) returned %d", cc.device_num, retval ); warnings.push_back(buf); return; @@ -417,22 +439,42 @@ void COPROC_NVIDIA::get_available_ram(vector& warnings) { retval = (*__cuMemGetInfo)(&memfree, &memtotal); if (retval) { snprintf(buf, sizeof(buf), - "[coproc] cuMemGetInfo(%d) returned %d", device_num, retval + "[coproc] cuMemGetInfo(%d) returned %d", cc.device_num, retval ); warnings.push_back(buf); (*__cuCtxDestroy)(ctx); return; } (*__cuCtxDestroy)(ctx); - available_ram = (double) memfree; + cc.available_ram = (double) memfree; } // check whether each GPU is running a graphics app (assume yes) // return true if there's been a change since last time // +// CAUTION: this method is not currently used. If you wish +// to call it in the future: +// * It must be called from a separate child process on +// dual-GPU laptops (e.g., Macbook Pros) with the results +// communicated to the main client process via IPC or a +// temp file. See the comments about dual-GPU laptops +// in gpu_detect.cpp and main.cpp for more details. +// * The CUDA library must be loaded and cuInit() called +// first. +// +#if 0 bool COPROC_NVIDIA::check_running_graphics_app() { int retval, j; bool change = false; + if (!__cuDeviceGet) { + warnings.push_back("cuDeviceGet() missing from NVIDIA library"); + return; + } + if (!__cuDeviceGetAttribute) { + warnings.push_back("cuDeviceGetAttribute() missing from NVIDIA library"); + return; + } + for (j=0; j& warnings); void set_peak_flops(); - bool check_running_graphics_app(); void fake(int driver_version, double ram, double avail_ram, int count); }; @@ -371,7 +369,6 @@ struct COPROC_ATI : public COPROC { void description(char* buf, int buflen); void clear(); int parse(XML_PARSER&); - void get_available_ram(std::vector& warnings); void set_peak_flops(); void fake(double ram, double avail_ram, int); };