diff --git a/client/gpu_amd.cpp b/client/gpu_amd.cpp index ec594a09b8..9c0c2abffd 100644 --- a/client/gpu_amd.cpp +++ b/client/gpu_amd.cpp @@ -377,6 +377,9 @@ void COPROC_ATI::get( cc.atirt_detected = atirt_detected; cc.device_num = i; cc.set_peak_flops(); + if (cc.bad_gpu_peak_flops("CAL", s)) { + warnings.push_back(s); + } get_available_ati_ram(cc, warnings); ati_gpus.push_back(cc); } diff --git a/client/gpu_nvidia.cpp b/client/gpu_nvidia.cpp index aa9232dbbf..a03e98d3b6 100644 --- a/client/gpu_nvidia.cpp +++ b/client/gpu_nvidia.cpp @@ -240,6 +240,7 @@ void COPROC_NVIDIA::get( char buf[256]; int j, itemp; size_t global_mem = 0; + string s; COPROC_NVIDIA cc; #ifdef _WIN32 @@ -444,6 +445,9 @@ void* cudalib = NULL; cc.cuda_version = cuda_version; cc.device_num = j; cc.set_peak_flops(); + if (cc.bad_gpu_peak_flops("CUDA", s)) { + warnings.push_back(s); + } get_available_nvidia_ram(cc, warnings); nvidia_gpus.push_back(cc); } diff --git a/client/gpu_opencl.cpp b/client/gpu_opencl.cpp index 152ebadd92..391061534f 100644 --- a/client/gpu_opencl.cpp +++ b/client/gpu_opencl.cpp @@ -202,6 +202,7 @@ void COPROCS::get_opencl( vectordevnums_pci_slot_sort; vector::iterator it; int max_other_coprocs = MAX_RSC-1; // coprocs[0] is reserved for CPU + string s; if (cc_config.no_opencl) { return; @@ -486,6 +487,9 @@ void COPROCS::get_opencl( COPROC_NVIDIA c; c.opencl_prop = prop; c.set_peak_flops(); + if (c.bad_gpu_peak_flops("NVIDIA OpenCL", s)) { + warnings.push_back(s); + } prop.peak_flops = c.peak_flops; } if (cuda_match_found) { @@ -552,6 +556,9 @@ void COPROCS::get_opencl( COPROC_ATI c; c.opencl_prop = prop; c.set_peak_flops(); + if (c.bad_gpu_peak_flops("AMD OpenCL", s)) { + warnings.push_back(s); + } prop.peak_flops = c.peak_flops; } @@ -576,6 +583,9 @@ void COPROCS::get_opencl( safe_strcpy(c.version, prop.opencl_driver_version); c.set_peak_flops(); + if (c.bad_gpu_peak_flops("Intel OpenCL", s)) { + warnings.push_back(s); + } prop.peak_flops = c.peak_flops; prop.opencl_available_ram = prop.global_mem_size; @@ -606,12 +616,22 @@ void COPROCS::get_opencl( prop.opencl_available_ram = prop.global_mem_size; prop.is_used = COPROC_USED; - // TODO: Find a better way to calculate / estimate peak_flops for future coprocessors? + // TODO: is there a better way to estimate peak_flops? + // prop.peak_flops = 0; if (prop.max_compute_units) { - prop.peak_flops = prop.max_compute_units * prop.max_clock_frequency * MEGA; + double freq = ((double)prop.max_clock_frequency) * MEGA; + prop.peak_flops = ((double)prop.max_compute_units) * freq; + } + if (prop.peak_flops <= 0 || prop.peak_flops > GPU_MAX_PEAK_FLOPS) { + char buf2[256]; + sprintf(buf2, + "OpenCL generic: bad peak FLOPS; Max units %d, max freq %d MHz", + prop.max_compute_units, prop.max_clock_frequency + ); + warnings.push_back(buf2); + prop.peak_flops = GPU_DEFAULT_PEAK_FLOPS; } - if (prop.peak_flops <= 0) prop.peak_flops = 45e9; other_opencls.push_back(prop); } diff --git a/lib/coproc.cpp b/lib/coproc.cpp index 08c5cf0d2f..81a1d387b0 100644 --- a/lib/coproc.cpp +++ b/lib/coproc.cpp @@ -600,7 +600,7 @@ void COPROC_NVIDIA::set_peak_flops() { // x = opencl_prop.max_compute_units * 48 * 2 * opencl_prop.max_clock_frequency * 1e6; } - peak_flops = (x>0)?x:5e10; + peak_flops = x; } // fake a NVIDIA GPU (for debugging) @@ -868,7 +868,7 @@ void COPROC_ATI::set_peak_flops() { // x = opencl_prop.max_compute_units * 16 * 5 * opencl_prop.max_clock_frequency * 1e6; } - peak_flops = (x>0)?x:5e10; + peak_flops = x; } void COPROC_ATI::fake(double ram, double avail_ram, int n) { @@ -980,7 +980,7 @@ void COPROC_INTEL::set_peak_flops() { if (opencl_prop.max_compute_units) { x = opencl_prop.max_compute_units * 8 * opencl_prop.max_clock_frequency * 1e6; } - peak_flops = (x>0)?x:45e9; + peak_flops = x; } void COPROC_INTEL::fake(double ram, double avail_ram, int n) { diff --git a/lib/coproc.h b/lib/coproc.h index b5ee738ad1..ddf7c16315 100644 --- a/lib/coproc.h +++ b/lib/coproc.h @@ -91,6 +91,14 @@ #define MAX_COPROC_INSTANCES 64 #define MAX_RSC 8 // max # of processing resources types +#define GPU_MAX_PEAK_FLOPS 1.e15 + // sanity-check bound for peak FLOPS + // for now (Feb 2019) 1000 TeraFLOPS. + // As of now, the fastest GPU is 20 TeraFLOPS (NVIDIA). + // May need to increase this at some point +#define GPU_DEFAULT_PEAK_FLOPS 100.e9 + // value to use if sanity check fails + // as of now (Feb 2019) 100 GigaFLOPS is a typical low-end GPU // arguments to proc_type_name() and proc_type_name_xml(). // @@ -246,6 +254,21 @@ struct COPROC { std::vector &opencls, std::vector& ignore_dev ); + + // sanity check GPU peak FLOPS + // + inline bool bad_gpu_peak_flops(const char* source, std::string& msg) { + if (peak_flops <= 0 || peak_flops > GPU_MAX_PEAK_FLOPS) { + char buf[256]; + sprintf(buf, "%s reported bad GPU peak FLOPS %f; using %f", + source, peak_flops, GPU_DEFAULT_PEAK_FLOPS + ); + msg = buf; + peak_flops = GPU_DEFAULT_PEAK_FLOPS; + return true; + } + return false; + } }; // Based on cudaDeviceProp from /usr/local/cuda/include/driver_types.h diff --git a/sched/plan_class_spec.cpp b/sched/plan_class_spec.cpp index f77a7b7edd..bbf1b40912 100644 --- a/sched/plan_class_spec.cpp +++ b/sched/plan_class_spec.cpp @@ -181,6 +181,7 @@ bool PLAN_CLASS_SPEC::opencl_check(OPENCL_DEVICE_PROP& opencl_prop) { bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu, const WORKUNIT* wu) { COPROC* cpp = NULL; bool can_use_multicore = true; + string msg; if (infeasible_random && drand()bad_gpu_peak_flops("Custom GPU", msg)) { + log_messages.printf(MSG_NORMAL, "%s\n", msg.c_str()); + } } if (opencl) {