mirror of https://github.com/BOINC/boinc.git
client: tweaks to code for detecting GPUs via a child process.
Added safety features requested by Rom Walton: * Change COPROC_ATI::get_available_ram and COPROC_NVIDIA::get_available_ram to static routines to prevent calling them without first loading CAL or CUDA libraries. * Add tests for NULL library calls in these routines. * Add comments warning about need to call from a separate child process on dual-GPU laptops, proper library initialization, etc.
This commit is contained in:
parent
31e3bf2783
commit
631e236b08
|
@ -49,6 +49,8 @@ using std::string;
|
|||
#include "client_msgs.h"
|
||||
#include "gpu_detect.h"
|
||||
|
||||
static void get_available_ati_ram(COPROC_ATI &cc, vector<string>& warnings);
|
||||
|
||||
// criteria:
|
||||
//
|
||||
// - double precision support
|
||||
|
@ -196,18 +198,6 @@ void COPROC_ATI::get(
|
|||
warnings.push_back("calDeviceGetInfo() missing from CAL library");
|
||||
return;
|
||||
}
|
||||
if (!__calDeviceGetStatus) {
|
||||
warnings.push_back("calDeviceGetStatus() missing from CAL library");
|
||||
return;
|
||||
}
|
||||
if (!__calDeviceOpen) {
|
||||
warnings.push_back("calDeviceOpen() missing from CAL library");
|
||||
return;
|
||||
}
|
||||
if (!__calDeviceClose) {
|
||||
warnings.push_back("calDeviceClose() missing from CAL library");
|
||||
return;
|
||||
}
|
||||
|
||||
retval = (*__calInit)();
|
||||
if (retval != CAL_RESULT_OK) {
|
||||
|
@ -375,7 +365,7 @@ void COPROC_ATI::get(
|
|||
cc.atirt_detected = atirt_detected;
|
||||
cc.device_num = i;
|
||||
cc.set_peak_flops();
|
||||
cc.get_available_ram(warnings);
|
||||
get_available_ati_ram(cc, warnings);
|
||||
ati_gpus.push_back(cc);
|
||||
}
|
||||
|
||||
|
@ -431,20 +421,48 @@ void COPROC_ATI::correlate(
|
|||
|
||||
// get available RAM of ATI GPU
|
||||
//
|
||||
void COPROC_ATI::get_available_ram(vector<string>& warnings) {
|
||||
// CAUTION: as currently written, this method should be
|
||||
// called only from COPROC_ATI::get(). If in the future
|
||||
// you wish to call it from additional places:
|
||||
// * It must be called from a separate child process on
|
||||
// dual-GPU laptops (e.g., Macbook Pros) with the results
|
||||
// communicated to the main client process via IPC or a
|
||||
// temp file. See the comments about dual-GPU laptops
|
||||
// in gpu_detect.cpp and main.cpp for more details.
|
||||
// * The CAL library must be loaded and calInit() called
|
||||
// first.
|
||||
// * See client/coproc_detect.cpp and cpu_sched.cpp in
|
||||
// BOINC 6.12.36 for an earlier attempt to call this
|
||||
// from the scheduler. Note that it was abandoned
|
||||
// due to repeated calls crashing the driver.
|
||||
//
|
||||
static void get_available_ati_ram(COPROC_ATI &cc, vector<string>& warnings) {
|
||||
CALdevicestatus st;
|
||||
CALdevice dev;
|
||||
char buf[256];
|
||||
int retval;
|
||||
|
||||
available_ram = attribs.localRAM*MEGA;
|
||||
cc.available_ram = cc.attribs.localRAM*MEGA;
|
||||
|
||||
st.struct_size = sizeof(CALdevicestatus);
|
||||
|
||||
retval = (*__calDeviceOpen)(&dev, device_num);
|
||||
if (!__calDeviceOpen) {
|
||||
warnings.push_back("calDeviceOpen() missing from CAL library");
|
||||
return;
|
||||
}
|
||||
if (!__calDeviceGetStatus) {
|
||||
warnings.push_back("calDeviceGetStatus() missing from CAL library");
|
||||
return;
|
||||
}
|
||||
if (!__calDeviceClose) {
|
||||
warnings.push_back("calDeviceClose() missing from CAL library");
|
||||
return;
|
||||
}
|
||||
|
||||
retval = (*__calDeviceOpen)(&dev, cc.device_num);
|
||||
if (retval) {
|
||||
snprintf(buf, sizeof(buf),
|
||||
"[coproc] calDeviceOpen(%d) returned %d", device_num, retval
|
||||
"[coproc] calDeviceOpen(%d) returned %d", cc.device_num, retval
|
||||
);
|
||||
warnings.push_back(buf);
|
||||
return;
|
||||
|
@ -453,12 +471,12 @@ void COPROC_ATI::get_available_ram(vector<string>& warnings) {
|
|||
if (retval) {
|
||||
snprintf(buf, sizeof(buf),
|
||||
"[coproc] calDeviceGetStatus(%d) returned %d",
|
||||
device_num, retval
|
||||
cc.device_num, retval
|
||||
);
|
||||
warnings.push_back(buf);
|
||||
(*__calDeviceClose)(dev);
|
||||
return;
|
||||
}
|
||||
available_ram = st.availLocalRAM*MEGA;
|
||||
cc.available_ram = st.availLocalRAM*MEGA;
|
||||
(*__calDeviceClose)(dev);
|
||||
}
|
||||
|
|
|
@ -45,6 +45,8 @@ using std::string;
|
|||
#include "client_msgs.h"
|
||||
#include "gpu_detect.h"
|
||||
|
||||
static void get_available_nvidia_ram(COPROC_NVIDIA &cc, vector<string>& warnings);
|
||||
|
||||
// return 1/-1/0 if device 1 is more/less/same capable than device 2.
|
||||
// factors (decreasing priority):
|
||||
// - compute capability
|
||||
|
@ -237,14 +239,6 @@ void COPROC_NVIDIA::get(
|
|||
warnings.push_back("cuDeviceComputeCapability() missing from NVIDIA library");
|
||||
return;
|
||||
}
|
||||
if (!__cuCtxCreate) {
|
||||
warnings.push_back("cuCtxCreate() missing from NVIDIA library");
|
||||
return;
|
||||
}
|
||||
if (!__cuCtxDestroy) {
|
||||
warnings.push_back("cuCtxDestroy() missing from NVIDIA library");
|
||||
return;
|
||||
}
|
||||
if (!__cuMemAlloc) {
|
||||
warnings.push_back("cuMemAlloc() missing from NVIDIA library");
|
||||
return;
|
||||
|
@ -253,10 +247,6 @@ void COPROC_NVIDIA::get(
|
|||
warnings.push_back("cuMemFree() missing from NVIDIA library");
|
||||
return;
|
||||
}
|
||||
if (!__cuMemGetInfo) {
|
||||
warnings.push_back("cuMemGetInfo() missing from NVIDIA library");
|
||||
return;
|
||||
}
|
||||
|
||||
retval = (*__cuInit)(0);
|
||||
if (retval) {
|
||||
|
@ -340,7 +330,7 @@ void COPROC_NVIDIA::get(
|
|||
cc.cuda_version = cuda_version;
|
||||
cc.device_num = j;
|
||||
cc.set_peak_flops();
|
||||
cc.get_available_ram(warnings);
|
||||
get_available_nvidia_ram(cc, warnings);
|
||||
nvidia_gpus.push_back(cc);
|
||||
}
|
||||
if (!nvidia_gpus.size()) {
|
||||
|
@ -390,18 +380,50 @@ void COPROC_NVIDIA::correlate(
|
|||
|
||||
// See how much RAM is available on this GPU.
|
||||
//
|
||||
void COPROC_NVIDIA::get_available_ram(vector<string>& warnings) {
|
||||
// CAUTION: as currently written, this method should be
|
||||
// called only from COPROC_NVIDIA::get(). If in the
|
||||
// future you wish to call it from additional places:
|
||||
// * It must be called from a separate child process on
|
||||
// dual-GPU laptops (e.g., Macbook Pros) with the results
|
||||
// communicated to the main client process via IPC or a
|
||||
// temp file. See the comments about dual-GPU laptops
|
||||
// in gpu_detect.cpp and main.cpp for more details.
|
||||
// * The CUDA library must be loaded and cuInit() called
|
||||
// first.
|
||||
// * See client/coproc_detect.cpp and cpu_sched.cpp in
|
||||
// BOINC 6.12.36 for an earlier attempt to call this
|
||||
// from the scheduler. Note that it was abandoned
|
||||
// due to repeated calls crashing the driver.
|
||||
//
|
||||
static void get_available_nvidia_ram(COPROC_NVIDIA &cc, vector<string>& warnings) {
|
||||
int retval;
|
||||
size_t memfree = 0, memtotal = 0;
|
||||
int device;
|
||||
void* ctx;
|
||||
char buf[256];
|
||||
|
||||
available_ram = prop.totalGlobalMem;
|
||||
retval = (*__cuDeviceGet)(&device, device_num);
|
||||
cc.available_ram = cc.prop.totalGlobalMem;
|
||||
if (!__cuDeviceGet) {
|
||||
warnings.push_back("cuDeviceGet() missing from NVIDIA library");
|
||||
return;
|
||||
}
|
||||
if (!__cuCtxCreate) {
|
||||
warnings.push_back("cuCtxCreate() missing from NVIDIA library");
|
||||
return;
|
||||
}
|
||||
if (!__cuCtxDestroy) {
|
||||
warnings.push_back("cuCtxDestroy() missing from NVIDIA library");
|
||||
return;
|
||||
}
|
||||
if (!__cuMemGetInfo) {
|
||||
warnings.push_back("cuMemGetInfo() missing from NVIDIA library");
|
||||
return;
|
||||
}
|
||||
|
||||
retval = (*__cuDeviceGet)(&device, cc.device_num);
|
||||
if (retval) {
|
||||
snprintf(buf, sizeof(buf),
|
||||
"[coproc] cuDeviceGet(%d) returned %d", device_num, retval
|
||||
"[coproc] cuDeviceGet(%d) returned %d", cc.device_num, retval
|
||||
);
|
||||
warnings.push_back(buf);
|
||||
return;
|
||||
|
@ -409,7 +431,7 @@ void COPROC_NVIDIA::get_available_ram(vector<string>& warnings) {
|
|||
retval = (*__cuCtxCreate)(&ctx, 0, device);
|
||||
if (retval) {
|
||||
snprintf(buf, sizeof(buf),
|
||||
"[coproc] cuCtxCreate(%d) returned %d", device_num, retval
|
||||
"[coproc] cuCtxCreate(%d) returned %d", cc.device_num, retval
|
||||
);
|
||||
warnings.push_back(buf);
|
||||
return;
|
||||
|
@ -417,22 +439,42 @@ void COPROC_NVIDIA::get_available_ram(vector<string>& warnings) {
|
|||
retval = (*__cuMemGetInfo)(&memfree, &memtotal);
|
||||
if (retval) {
|
||||
snprintf(buf, sizeof(buf),
|
||||
"[coproc] cuMemGetInfo(%d) returned %d", device_num, retval
|
||||
"[coproc] cuMemGetInfo(%d) returned %d", cc.device_num, retval
|
||||
);
|
||||
warnings.push_back(buf);
|
||||
(*__cuCtxDestroy)(ctx);
|
||||
return;
|
||||
}
|
||||
(*__cuCtxDestroy)(ctx);
|
||||
available_ram = (double) memfree;
|
||||
cc.available_ram = (double) memfree;
|
||||
}
|
||||
|
||||
// check whether each GPU is running a graphics app (assume yes)
|
||||
// return true if there's been a change since last time
|
||||
//
|
||||
// CAUTION: this method is not currently used. If you wish
|
||||
// to call it in the future:
|
||||
// * It must be called from a separate child process on
|
||||
// dual-GPU laptops (e.g., Macbook Pros) with the results
|
||||
// communicated to the main client process via IPC or a
|
||||
// temp file. See the comments about dual-GPU laptops
|
||||
// in gpu_detect.cpp and main.cpp for more details.
|
||||
// * The CUDA library must be loaded and cuInit() called
|
||||
// first.
|
||||
//
|
||||
#if 0
|
||||
bool COPROC_NVIDIA::check_running_graphics_app() {
|
||||
int retval, j;
|
||||
bool change = false;
|
||||
if (!__cuDeviceGet) {
|
||||
warnings.push_back("cuDeviceGet() missing from NVIDIA library");
|
||||
return;
|
||||
}
|
||||
if (!__cuDeviceGetAttribute) {
|
||||
warnings.push_back("cuDeviceGetAttribute() missing from NVIDIA library");
|
||||
return;
|
||||
}
|
||||
|
||||
for (j=0; j<count; j++) {
|
||||
bool new_val = true;
|
||||
int device, kernel_timeout;
|
||||
|
@ -450,4 +492,4 @@ bool COPROC_NVIDIA::check_running_graphics_app() {
|
|||
}
|
||||
return change;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -332,9 +332,7 @@ struct COPROC_NVIDIA : public COPROC {
|
|||
void description(char* buf, int buflen);
|
||||
void clear();
|
||||
int parse(XML_PARSER&);
|
||||
void get_available_ram(std::vector<std::string>& warnings);
|
||||
void set_peak_flops();
|
||||
bool check_running_graphics_app();
|
||||
void fake(int driver_version, double ram, double avail_ram, int count);
|
||||
|
||||
};
|
||||
|
@ -371,7 +369,6 @@ struct COPROC_ATI : public COPROC {
|
|||
void description(char* buf, int buflen);
|
||||
void clear();
|
||||
int parse(XML_PARSER&);
|
||||
void get_available_ram(std::vector<std::string>& warnings);
|
||||
void set_peak_flops();
|
||||
void fake(double ram, double avail_ram, int);
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue