client: tweaks to code for detecting GPUs via a child process.

Added safety features requested by Rom Walton:
* Change COPROC_ATI::get_available_ram and  COPROC_NVIDIA::get_available_ram to static routines to prevent calling them without first loading CAL or CUDA libraries.
* Add tests for NULL library calls in these routines.
* Add comments warning about need to call from a separate child process on dual-GPU laptops, proper library initialization, etc.
This commit is contained in:
Charlie Fenton 2013-06-27 02:36:20 -07:00
parent 31e3bf2783
commit 631e236b08
3 changed files with 100 additions and 43 deletions

View File

@ -49,6 +49,8 @@ using std::string;
#include "client_msgs.h"
#include "gpu_detect.h"
static void get_available_ati_ram(COPROC_ATI &cc, vector<string>& warnings);
// criteria:
//
// - double precision support
@ -196,18 +198,6 @@ void COPROC_ATI::get(
warnings.push_back("calDeviceGetInfo() missing from CAL library");
return;
}
if (!__calDeviceGetStatus) {
warnings.push_back("calDeviceGetStatus() missing from CAL library");
return;
}
if (!__calDeviceOpen) {
warnings.push_back("calDeviceOpen() missing from CAL library");
return;
}
if (!__calDeviceClose) {
warnings.push_back("calDeviceClose() missing from CAL library");
return;
}
retval = (*__calInit)();
if (retval != CAL_RESULT_OK) {
@ -375,7 +365,7 @@ void COPROC_ATI::get(
cc.atirt_detected = atirt_detected;
cc.device_num = i;
cc.set_peak_flops();
cc.get_available_ram(warnings);
get_available_ati_ram(cc, warnings);
ati_gpus.push_back(cc);
}
@ -431,20 +421,48 @@ void COPROC_ATI::correlate(
// get available RAM of ATI GPU
//
void COPROC_ATI::get_available_ram(vector<string>& warnings) {
// CAUTION: as currently written, this method should be
// called only from COPROC_ATI::get(). If in the future
// you wish to call it from additional places:
// * It must be called from a separate child process on
// dual-GPU laptops (e.g., Macbook Pros) with the results
// communicated to the main client process via IPC or a
// temp file. See the comments about dual-GPU laptops
// in gpu_detect.cpp and main.cpp for more details.
// * The CAL library must be loaded and calInit() called
// first.
// * See client/coproc_detect.cpp and cpu_sched.cpp in
// BOINC 6.12.36 for an earlier attempt to call this
// from the scheduler. Note that it was abandoned
// due to repeated calls crashing the driver.
//
static void get_available_ati_ram(COPROC_ATI &cc, vector<string>& warnings) {
CALdevicestatus st;
CALdevice dev;
char buf[256];
int retval;
available_ram = attribs.localRAM*MEGA;
cc.available_ram = cc.attribs.localRAM*MEGA;
st.struct_size = sizeof(CALdevicestatus);
retval = (*__calDeviceOpen)(&dev, device_num);
if (!__calDeviceOpen) {
warnings.push_back("calDeviceOpen() missing from CAL library");
return;
}
if (!__calDeviceGetStatus) {
warnings.push_back("calDeviceGetStatus() missing from CAL library");
return;
}
if (!__calDeviceClose) {
warnings.push_back("calDeviceClose() missing from CAL library");
return;
}
retval = (*__calDeviceOpen)(&dev, cc.device_num);
if (retval) {
snprintf(buf, sizeof(buf),
"[coproc] calDeviceOpen(%d) returned %d", device_num, retval
"[coproc] calDeviceOpen(%d) returned %d", cc.device_num, retval
);
warnings.push_back(buf);
return;
@ -453,12 +471,12 @@ void COPROC_ATI::get_available_ram(vector<string>& warnings) {
if (retval) {
snprintf(buf, sizeof(buf),
"[coproc] calDeviceGetStatus(%d) returned %d",
device_num, retval
cc.device_num, retval
);
warnings.push_back(buf);
(*__calDeviceClose)(dev);
return;
}
available_ram = st.availLocalRAM*MEGA;
cc.available_ram = st.availLocalRAM*MEGA;
(*__calDeviceClose)(dev);
}

View File

@ -45,6 +45,8 @@ using std::string;
#include "client_msgs.h"
#include "gpu_detect.h"
static void get_available_nvidia_ram(COPROC_NVIDIA &cc, vector<string>& warnings);
// return 1/-1/0 if device 1 is more/less/same capable than device 2.
// factors (decreasing priority):
// - compute capability
@ -237,14 +239,6 @@ void COPROC_NVIDIA::get(
warnings.push_back("cuDeviceComputeCapability() missing from NVIDIA library");
return;
}
if (!__cuCtxCreate) {
warnings.push_back("cuCtxCreate() missing from NVIDIA library");
return;
}
if (!__cuCtxDestroy) {
warnings.push_back("cuCtxDestroy() missing from NVIDIA library");
return;
}
if (!__cuMemAlloc) {
warnings.push_back("cuMemAlloc() missing from NVIDIA library");
return;
@ -253,10 +247,6 @@ void COPROC_NVIDIA::get(
warnings.push_back("cuMemFree() missing from NVIDIA library");
return;
}
if (!__cuMemGetInfo) {
warnings.push_back("cuMemGetInfo() missing from NVIDIA library");
return;
}
retval = (*__cuInit)(0);
if (retval) {
@ -340,7 +330,7 @@ void COPROC_NVIDIA::get(
cc.cuda_version = cuda_version;
cc.device_num = j;
cc.set_peak_flops();
cc.get_available_ram(warnings);
get_available_nvidia_ram(cc, warnings);
nvidia_gpus.push_back(cc);
}
if (!nvidia_gpus.size()) {
@ -390,18 +380,50 @@ void COPROC_NVIDIA::correlate(
// See how much RAM is available on this GPU.
//
void COPROC_NVIDIA::get_available_ram(vector<string>& warnings) {
// CAUTION: as currently written, this method should be
// called only from COPROC_NVIDIA::get(). If in the
// future you wish to call it from additional places:
// * It must be called from a separate child process on
// dual-GPU laptops (e.g., Macbook Pros) with the results
// communicated to the main client process via IPC or a
// temp file. See the comments about dual-GPU laptops
// in gpu_detect.cpp and main.cpp for more details.
// * The CUDA library must be loaded and cuInit() called
// first.
// * See client/coproc_detect.cpp and cpu_sched.cpp in
// BOINC 6.12.36 for an earlier attempt to call this
// from the scheduler. Note that it was abandoned
// due to repeated calls crashing the driver.
//
static void get_available_nvidia_ram(COPROC_NVIDIA &cc, vector<string>& warnings) {
int retval;
size_t memfree = 0, memtotal = 0;
int device;
void* ctx;
char buf[256];
available_ram = prop.totalGlobalMem;
retval = (*__cuDeviceGet)(&device, device_num);
cc.available_ram = cc.prop.totalGlobalMem;
if (!__cuDeviceGet) {
warnings.push_back("cuDeviceGet() missing from NVIDIA library");
return;
}
if (!__cuCtxCreate) {
warnings.push_back("cuCtxCreate() missing from NVIDIA library");
return;
}
if (!__cuCtxDestroy) {
warnings.push_back("cuCtxDestroy() missing from NVIDIA library");
return;
}
if (!__cuMemGetInfo) {
warnings.push_back("cuMemGetInfo() missing from NVIDIA library");
return;
}
retval = (*__cuDeviceGet)(&device, cc.device_num);
if (retval) {
snprintf(buf, sizeof(buf),
"[coproc] cuDeviceGet(%d) returned %d", device_num, retval
"[coproc] cuDeviceGet(%d) returned %d", cc.device_num, retval
);
warnings.push_back(buf);
return;
@ -409,7 +431,7 @@ void COPROC_NVIDIA::get_available_ram(vector<string>& warnings) {
retval = (*__cuCtxCreate)(&ctx, 0, device);
if (retval) {
snprintf(buf, sizeof(buf),
"[coproc] cuCtxCreate(%d) returned %d", device_num, retval
"[coproc] cuCtxCreate(%d) returned %d", cc.device_num, retval
);
warnings.push_back(buf);
return;
@ -417,22 +439,42 @@ void COPROC_NVIDIA::get_available_ram(vector<string>& warnings) {
retval = (*__cuMemGetInfo)(&memfree, &memtotal);
if (retval) {
snprintf(buf, sizeof(buf),
"[coproc] cuMemGetInfo(%d) returned %d", device_num, retval
"[coproc] cuMemGetInfo(%d) returned %d", cc.device_num, retval
);
warnings.push_back(buf);
(*__cuCtxDestroy)(ctx);
return;
}
(*__cuCtxDestroy)(ctx);
available_ram = (double) memfree;
cc.available_ram = (double) memfree;
}
// check whether each GPU is running a graphics app (assume yes)
// return true if there's been a change since last time
//
// CAUTION: this method is not currently used. If you wish
// to call it in the future:
// * It must be called from a separate child process on
// dual-GPU laptops (e.g., Macbook Pros) with the results
// communicated to the main client process via IPC or a
// temp file. See the comments about dual-GPU laptops
// in gpu_detect.cpp and main.cpp for more details.
// * The CUDA library must be loaded and cuInit() called
// first.
//
#if 0
bool COPROC_NVIDIA::check_running_graphics_app() {
int retval, j;
bool change = false;
if (!__cuDeviceGet) {
warnings.push_back("cuDeviceGet() missing from NVIDIA library");
return;
}
if (!__cuDeviceGetAttribute) {
warnings.push_back("cuDeviceGetAttribute() missing from NVIDIA library");
return;
}
for (j=0; j<count; j++) {
bool new_val = true;
int device, kernel_timeout;
@ -450,4 +492,4 @@ bool COPROC_NVIDIA::check_running_graphics_app() {
}
return change;
}
#endif

View File

@ -332,9 +332,7 @@ struct COPROC_NVIDIA : public COPROC {
void description(char* buf, int buflen);
void clear();
int parse(XML_PARSER&);
void get_available_ram(std::vector<std::string>& warnings);
void set_peak_flops();
bool check_running_graphics_app();
void fake(int driver_version, double ram, double avail_ram, int count);
};
@ -371,7 +369,6 @@ struct COPROC_ATI : public COPROC {
void description(char* buf, int buflen);
void clear();
int parse(XML_PARSER&);
void get_available_ram(std::vector<std::string>& warnings);
void set_peak_flops();
void fake(double ram, double avail_ram, int);
};