mirror of https://github.com/BOINC/boinc.git
Merge pull request #3001 from BOINC/dpa_opencl
client: fix possible overflow in peak FLOPS calculation for OpenCL GPUs
This commit is contained in:
commit
dc70ff9100
|
@ -377,6 +377,9 @@ void COPROC_ATI::get(
|
|||
cc.atirt_detected = atirt_detected;
|
||||
cc.device_num = i;
|
||||
cc.set_peak_flops();
|
||||
if (cc.bad_gpu_peak_flops("CAL", s)) {
|
||||
warnings.push_back(s);
|
||||
}
|
||||
get_available_ati_ram(cc, warnings);
|
||||
ati_gpus.push_back(cc);
|
||||
}
|
||||
|
|
|
@ -240,6 +240,7 @@ void COPROC_NVIDIA::get(
|
|||
char buf[256];
|
||||
int j, itemp;
|
||||
size_t global_mem = 0;
|
||||
string s;
|
||||
COPROC_NVIDIA cc;
|
||||
|
||||
#ifdef _WIN32
|
||||
|
@ -444,6 +445,9 @@ void* cudalib = NULL;
|
|||
cc.cuda_version = cuda_version;
|
||||
cc.device_num = j;
|
||||
cc.set_peak_flops();
|
||||
if (cc.bad_gpu_peak_flops("CUDA", s)) {
|
||||
warnings.push_back(s);
|
||||
}
|
||||
get_available_nvidia_ram(cc, warnings);
|
||||
nvidia_gpus.push_back(cc);
|
||||
}
|
||||
|
|
|
@ -202,6 +202,7 @@ void COPROCS::get_opencl(
|
|||
vector<int>devnums_pci_slot_sort;
|
||||
vector<OPENCL_DEVICE_PROP>::iterator it;
|
||||
int max_other_coprocs = MAX_RSC-1; // coprocs[0] is reserved for CPU
|
||||
string s;
|
||||
|
||||
if (cc_config.no_opencl) {
|
||||
return;
|
||||
|
@ -486,6 +487,9 @@ void COPROCS::get_opencl(
|
|||
COPROC_NVIDIA c;
|
||||
c.opencl_prop = prop;
|
||||
c.set_peak_flops();
|
||||
if (c.bad_gpu_peak_flops("NVIDIA OpenCL", s)) {
|
||||
warnings.push_back(s);
|
||||
}
|
||||
prop.peak_flops = c.peak_flops;
|
||||
}
|
||||
if (cuda_match_found) {
|
||||
|
@ -552,6 +556,9 @@ void COPROCS::get_opencl(
|
|||
COPROC_ATI c;
|
||||
c.opencl_prop = prop;
|
||||
c.set_peak_flops();
|
||||
if (c.bad_gpu_peak_flops("AMD OpenCL", s)) {
|
||||
warnings.push_back(s);
|
||||
}
|
||||
prop.peak_flops = c.peak_flops;
|
||||
}
|
||||
|
||||
|
@ -576,6 +583,9 @@ void COPROCS::get_opencl(
|
|||
safe_strcpy(c.version, prop.opencl_driver_version);
|
||||
|
||||
c.set_peak_flops();
|
||||
if (c.bad_gpu_peak_flops("Intel OpenCL", s)) {
|
||||
warnings.push_back(s);
|
||||
}
|
||||
prop.peak_flops = c.peak_flops;
|
||||
prop.opencl_available_ram = prop.global_mem_size;
|
||||
|
||||
|
@ -606,12 +616,22 @@ void COPROCS::get_opencl(
|
|||
prop.opencl_available_ram = prop.global_mem_size;
|
||||
prop.is_used = COPROC_USED;
|
||||
|
||||
// TODO: Find a better way to calculate / estimate peak_flops for future coprocessors?
|
||||
// TODO: is there a better way to estimate peak_flops?
|
||||
//
|
||||
prop.peak_flops = 0;
|
||||
if (prop.max_compute_units) {
|
||||
prop.peak_flops = prop.max_compute_units * prop.max_clock_frequency * MEGA;
|
||||
double freq = ((double)prop.max_clock_frequency) * MEGA;
|
||||
prop.peak_flops = ((double)prop.max_compute_units) * freq;
|
||||
}
|
||||
if (prop.peak_flops <= 0 || prop.peak_flops > GPU_MAX_PEAK_FLOPS) {
|
||||
char buf2[256];
|
||||
sprintf(buf2,
|
||||
"OpenCL generic: bad peak FLOPS; Max units %d, max freq %d MHz",
|
||||
prop.max_compute_units, prop.max_clock_frequency
|
||||
);
|
||||
warnings.push_back(buf2);
|
||||
prop.peak_flops = GPU_DEFAULT_PEAK_FLOPS;
|
||||
}
|
||||
if (prop.peak_flops <= 0) prop.peak_flops = 45e9;
|
||||
|
||||
other_opencls.push_back(prop);
|
||||
}
|
||||
|
|
|
@ -600,7 +600,7 @@ void COPROC_NVIDIA::set_peak_flops() {
|
|||
//
|
||||
x = opencl_prop.max_compute_units * 48 * 2 * opencl_prop.max_clock_frequency * 1e6;
|
||||
}
|
||||
peak_flops = (x>0)?x:5e10;
|
||||
peak_flops = x;
|
||||
}
|
||||
|
||||
// fake a NVIDIA GPU (for debugging)
|
||||
|
@ -868,7 +868,7 @@ void COPROC_ATI::set_peak_flops() {
|
|||
//
|
||||
x = opencl_prop.max_compute_units * 16 * 5 * opencl_prop.max_clock_frequency * 1e6;
|
||||
}
|
||||
peak_flops = (x>0)?x:5e10;
|
||||
peak_flops = x;
|
||||
}
|
||||
|
||||
void COPROC_ATI::fake(double ram, double avail_ram, int n) {
|
||||
|
@ -980,7 +980,7 @@ void COPROC_INTEL::set_peak_flops() {
|
|||
if (opencl_prop.max_compute_units) {
|
||||
x = opencl_prop.max_compute_units * 8 * opencl_prop.max_clock_frequency * 1e6;
|
||||
}
|
||||
peak_flops = (x>0)?x:45e9;
|
||||
peak_flops = x;
|
||||
}
|
||||
|
||||
void COPROC_INTEL::fake(double ram, double avail_ram, int n) {
|
||||
|
|
23
lib/coproc.h
23
lib/coproc.h
|
@ -91,6 +91,14 @@
|
|||
#define MAX_COPROC_INSTANCES 64
|
||||
#define MAX_RSC 8
|
||||
// max # of processing resources types
|
||||
#define GPU_MAX_PEAK_FLOPS 1.e15
|
||||
// sanity-check bound for peak FLOPS
|
||||
// for now (Feb 2019) 1000 TeraFLOPS.
|
||||
// As of now, the fastest GPU is 20 TeraFLOPS (NVIDIA).
|
||||
// May need to increase this at some point
|
||||
#define GPU_DEFAULT_PEAK_FLOPS 100.e9
|
||||
// value to use if sanity check fails
|
||||
// as of now (Feb 2019) 100 GigaFLOPS is a typical low-end GPU
|
||||
|
||||
// arguments to proc_type_name() and proc_type_name_xml().
|
||||
//
|
||||
|
@ -246,6 +254,21 @@ struct COPROC {
|
|||
std::vector<OPENCL_DEVICE_PROP> &opencls,
|
||||
std::vector<int>& ignore_dev
|
||||
);
|
||||
|
||||
// sanity check GPU peak FLOPS
|
||||
//
|
||||
inline bool bad_gpu_peak_flops(const char* source, std::string& msg) {
|
||||
if (peak_flops <= 0 || peak_flops > GPU_MAX_PEAK_FLOPS) {
|
||||
char buf[256];
|
||||
sprintf(buf, "%s reported bad GPU peak FLOPS %f; using %f",
|
||||
source, peak_flops, GPU_DEFAULT_PEAK_FLOPS
|
||||
);
|
||||
msg = buf;
|
||||
peak_flops = GPU_DEFAULT_PEAK_FLOPS;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
// Based on cudaDeviceProp from /usr/local/cuda/include/driver_types.h
|
||||
|
|
|
@ -181,6 +181,7 @@ bool PLAN_CLASS_SPEC::opencl_check(OPENCL_DEVICE_PROP& opencl_prop) {
|
|||
bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu, const WORKUNIT* wu) {
|
||||
COPROC* cpp = NULL;
|
||||
bool can_use_multicore = true;
|
||||
string msg;
|
||||
|
||||
if (infeasible_random && drand()<infeasible_random) {
|
||||
return false;
|
||||
|
@ -594,7 +595,9 @@ bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu, const WORKU
|
|||
return false;
|
||||
}
|
||||
|
||||
cp.set_peak_flops();
|
||||
if (cp.bad_gpu_peak_flops("AMD", msg)) {
|
||||
log_messages.printf(MSG_NORMAL, "%s\n", msg.c_str());
|
||||
}
|
||||
gpu_ram = cp.opencl_prop.global_mem_size;
|
||||
|
||||
driver_version = 0;
|
||||
|
@ -686,7 +689,9 @@ bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu, const WORKU
|
|||
}
|
||||
}
|
||||
gpu_ram = cp.prop.totalGlobalMem;
|
||||
cp.set_peak_flops();
|
||||
if (cp.bad_gpu_peak_flops("NVIDIA", msg)) {
|
||||
log_messages.printf(MSG_NORMAL, "%s\n", msg.c_str());
|
||||
}
|
||||
|
||||
// Intel GPU
|
||||
//
|
||||
|
@ -705,6 +710,9 @@ bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu, const WORKU
|
|||
if (min_gpu_ram_mb) {
|
||||
gpu_requirements[PROC_TYPE_INTEL_GPU].update(0, min_gpu_ram_mb * MEGA);
|
||||
}
|
||||
if (cp.bad_gpu_peak_flops("Intel GPU", msg)) {
|
||||
log_messages.printf(MSG_NORMAL, "%s\n", msg.c_str());
|
||||
}
|
||||
|
||||
// custom GPU type
|
||||
//
|
||||
|
@ -723,6 +731,9 @@ bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu, const WORKU
|
|||
"[version] plan_class_spec: Custom coproc %s found\n", gpu_type
|
||||
);
|
||||
}
|
||||
if (cpp->bad_gpu_peak_flops("Custom GPU", msg)) {
|
||||
log_messages.printf(MSG_NORMAL, "%s\n", msg.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
if (opencl) {
|
||||
|
|
Loading…
Reference in New Issue