From e77a6c3b6cb8e127c984d6a59449b20a4bb8f18b Mon Sep 17 00:00:00 2001 From: David Anderson Date: Sat, 2 Feb 2019 18:04:31 -0800 Subject: [PATCH 1/2] client: fix possible overflow in peak FLOPS calculation for OpenCL GPUs ... and generate a warning message if impossible values --- client/gpu_opencl.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/client/gpu_opencl.cpp b/client/gpu_opencl.cpp index 152ebadd92..05be8cd699 100644 --- a/client/gpu_opencl.cpp +++ b/client/gpu_opencl.cpp @@ -606,12 +606,22 @@ void COPROCS::get_opencl( prop.opencl_available_ram = prop.global_mem_size; prop.is_used = COPROC_USED; - // TODO: Find a better way to calculate / estimate peak_flops for future coprocessors? + // TODO: is there a better way to estimate peak_flops? + // prop.peak_flops = 0; if (prop.max_compute_units) { - prop.peak_flops = prop.max_compute_units * prop.max_clock_frequency * MEGA; + double freq = ((double)prop.max_clock_frequency) * MEGA; + prop.peak_flops = ((double)prop.max_compute_units) * freq; + } + if (prop.peak_flops <= 0 || prop.peak_flops > 1.e15) { + char buf2[256]; + sprintf(buf2, + "bad peak flops; Max units %d, max freq %d MHz", + prop.max_compute_units, prop.max_clock_frequency + ); + warnings.push_back(buf2); + prop.peak_flops = 1.e12; } - if (prop.peak_flops <= 0) prop.peak_flops = 45e9; other_opencls.push_back(prop); } From ccd0e64b2bf2cb45d617d94db334347c5313a60c Mon Sep 17 00:00:00 2001 From: David Anderson Date: Thu, 7 Feb 2019 13:52:35 -0800 Subject: [PATCH 2/2] sanity-check GPU peak FLOPS in both client and server If the calculated peak FLOPS is nonpositive or greater than 1000 TeraFLOPS, use a default (100 GigaFLOPS) instead. Server: use the value reported by client rather than recalculating it; the client is probably more recent than you are. Sanity-check it in any case. --- client/gpu_amd.cpp | 3 +++ client/gpu_nvidia.cpp | 4 ++++ client/gpu_opencl.cpp | 16 +++++++++++++--- lib/coproc.cpp | 6 +++--- lib/coproc.h | 23 +++++++++++++++++++++++ sched/plan_class_spec.cpp | 15 +++++++++++++-- 6 files changed, 59 insertions(+), 8 deletions(-) diff --git a/client/gpu_amd.cpp b/client/gpu_amd.cpp index ec594a09b8..9c0c2abffd 100644 --- a/client/gpu_amd.cpp +++ b/client/gpu_amd.cpp @@ -377,6 +377,9 @@ void COPROC_ATI::get( cc.atirt_detected = atirt_detected; cc.device_num = i; cc.set_peak_flops(); + if (cc.bad_gpu_peak_flops("CAL", s)) { + warnings.push_back(s); + } get_available_ati_ram(cc, warnings); ati_gpus.push_back(cc); } diff --git a/client/gpu_nvidia.cpp b/client/gpu_nvidia.cpp index aa9232dbbf..a03e98d3b6 100644 --- a/client/gpu_nvidia.cpp +++ b/client/gpu_nvidia.cpp @@ -240,6 +240,7 @@ void COPROC_NVIDIA::get( char buf[256]; int j, itemp; size_t global_mem = 0; + string s; COPROC_NVIDIA cc; #ifdef _WIN32 @@ -444,6 +445,9 @@ void* cudalib = NULL; cc.cuda_version = cuda_version; cc.device_num = j; cc.set_peak_flops(); + if (cc.bad_gpu_peak_flops("CUDA", s)) { + warnings.push_back(s); + } get_available_nvidia_ram(cc, warnings); nvidia_gpus.push_back(cc); } diff --git a/client/gpu_opencl.cpp b/client/gpu_opencl.cpp index 05be8cd699..391061534f 100644 --- a/client/gpu_opencl.cpp +++ b/client/gpu_opencl.cpp @@ -202,6 +202,7 @@ void COPROCS::get_opencl( vectordevnums_pci_slot_sort; vector::iterator it; int max_other_coprocs = MAX_RSC-1; // coprocs[0] is reserved for CPU + string s; if (cc_config.no_opencl) { return; @@ -486,6 +487,9 @@ void COPROCS::get_opencl( COPROC_NVIDIA c; c.opencl_prop = prop; c.set_peak_flops(); + if (c.bad_gpu_peak_flops("NVIDIA OpenCL", s)) { + warnings.push_back(s); + } prop.peak_flops = c.peak_flops; } if (cuda_match_found) { @@ -552,6 +556,9 @@ void COPROCS::get_opencl( COPROC_ATI c; c.opencl_prop = prop; c.set_peak_flops(); + if (c.bad_gpu_peak_flops("AMD OpenCL", s)) { + warnings.push_back(s); + } prop.peak_flops = c.peak_flops; } @@ -576,6 +583,9 @@ void COPROCS::get_opencl( safe_strcpy(c.version, prop.opencl_driver_version); c.set_peak_flops(); + if (c.bad_gpu_peak_flops("Intel OpenCL", s)) { + warnings.push_back(s); + } prop.peak_flops = c.peak_flops; prop.opencl_available_ram = prop.global_mem_size; @@ -613,14 +623,14 @@ void COPROCS::get_opencl( double freq = ((double)prop.max_clock_frequency) * MEGA; prop.peak_flops = ((double)prop.max_compute_units) * freq; } - if (prop.peak_flops <= 0 || prop.peak_flops > 1.e15) { + if (prop.peak_flops <= 0 || prop.peak_flops > GPU_MAX_PEAK_FLOPS) { char buf2[256]; sprintf(buf2, - "bad peak flops; Max units %d, max freq %d MHz", + "OpenCL generic: bad peak FLOPS; Max units %d, max freq %d MHz", prop.max_compute_units, prop.max_clock_frequency ); warnings.push_back(buf2); - prop.peak_flops = 1.e12; + prop.peak_flops = GPU_DEFAULT_PEAK_FLOPS; } other_opencls.push_back(prop); diff --git a/lib/coproc.cpp b/lib/coproc.cpp index 08c5cf0d2f..81a1d387b0 100644 --- a/lib/coproc.cpp +++ b/lib/coproc.cpp @@ -600,7 +600,7 @@ void COPROC_NVIDIA::set_peak_flops() { // x = opencl_prop.max_compute_units * 48 * 2 * opencl_prop.max_clock_frequency * 1e6; } - peak_flops = (x>0)?x:5e10; + peak_flops = x; } // fake a NVIDIA GPU (for debugging) @@ -868,7 +868,7 @@ void COPROC_ATI::set_peak_flops() { // x = opencl_prop.max_compute_units * 16 * 5 * opencl_prop.max_clock_frequency * 1e6; } - peak_flops = (x>0)?x:5e10; + peak_flops = x; } void COPROC_ATI::fake(double ram, double avail_ram, int n) { @@ -980,7 +980,7 @@ void COPROC_INTEL::set_peak_flops() { if (opencl_prop.max_compute_units) { x = opencl_prop.max_compute_units * 8 * opencl_prop.max_clock_frequency * 1e6; } - peak_flops = (x>0)?x:45e9; + peak_flops = x; } void COPROC_INTEL::fake(double ram, double avail_ram, int n) { diff --git a/lib/coproc.h b/lib/coproc.h index b5ee738ad1..ddf7c16315 100644 --- a/lib/coproc.h +++ b/lib/coproc.h @@ -91,6 +91,14 @@ #define MAX_COPROC_INSTANCES 64 #define MAX_RSC 8 // max # of processing resources types +#define GPU_MAX_PEAK_FLOPS 1.e15 + // sanity-check bound for peak FLOPS + // for now (Feb 2019) 1000 TeraFLOPS. + // As of now, the fastest GPU is 20 TeraFLOPS (NVIDIA). + // May need to increase this at some point +#define GPU_DEFAULT_PEAK_FLOPS 100.e9 + // value to use if sanity check fails + // as of now (Feb 2019) 100 GigaFLOPS is a typical low-end GPU // arguments to proc_type_name() and proc_type_name_xml(). // @@ -246,6 +254,21 @@ struct COPROC { std::vector &opencls, std::vector& ignore_dev ); + + // sanity check GPU peak FLOPS + // + inline bool bad_gpu_peak_flops(const char* source, std::string& msg) { + if (peak_flops <= 0 || peak_flops > GPU_MAX_PEAK_FLOPS) { + char buf[256]; + sprintf(buf, "%s reported bad GPU peak FLOPS %f; using %f", + source, peak_flops, GPU_DEFAULT_PEAK_FLOPS + ); + msg = buf; + peak_flops = GPU_DEFAULT_PEAK_FLOPS; + return true; + } + return false; + } }; // Based on cudaDeviceProp from /usr/local/cuda/include/driver_types.h diff --git a/sched/plan_class_spec.cpp b/sched/plan_class_spec.cpp index f77a7b7edd..bbf1b40912 100644 --- a/sched/plan_class_spec.cpp +++ b/sched/plan_class_spec.cpp @@ -181,6 +181,7 @@ bool PLAN_CLASS_SPEC::opencl_check(OPENCL_DEVICE_PROP& opencl_prop) { bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu, const WORKUNIT* wu) { COPROC* cpp = NULL; bool can_use_multicore = true; + string msg; if (infeasible_random && drand()bad_gpu_peak_flops("Custom GPU", msg)) { + log_messages.printf(MSG_NORMAL, "%s\n", msg.c_str()); + } } if (opencl) {