From e77a6c3b6cb8e127c984d6a59449b20a4bb8f18b Mon Sep 17 00:00:00 2001
From: David Anderson <davea@ssl.berkeley.edu>
Date: Sat, 2 Feb 2019 18:04:31 -0800
Subject: [PATCH 1/2] client: fix possible overflow in peak FLOPS calculation
 for OpenCL GPUs

... and generate a warning message if impossible values
---
 client/gpu_opencl.cpp | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/client/gpu_opencl.cpp b/client/gpu_opencl.cpp
index 152ebadd92..05be8cd699 100644
--- a/client/gpu_opencl.cpp
+++ b/client/gpu_opencl.cpp
@@ -606,12 +606,22 @@ void COPROCS::get_opencl(
                 prop.opencl_available_ram = prop.global_mem_size;
                 prop.is_used = COPROC_USED;
 
-                // TODO: Find a better way to calculate / estimate peak_flops for future coprocessors?
+                // TODO: is there a better way to estimate peak_flops?
+                //
                 prop.peak_flops = 0;
                 if (prop.max_compute_units) {
-                    prop.peak_flops = prop.max_compute_units * prop.max_clock_frequency * MEGA;
+                    double freq = ((double)prop.max_clock_frequency) * MEGA;
+                    prop.peak_flops = ((double)prop.max_compute_units) * freq;
+                }
+                if (prop.peak_flops <= 0 || prop.peak_flops > 1.e15) {
+                    char buf2[256];
+                    sprintf(buf2,
+                        "bad peak flops; Max units %d, max freq %d MHz",
+                        prop.max_compute_units, prop.max_clock_frequency
+                    );
+                    warnings.push_back(buf2);
+                    prop.peak_flops = 1.e12;
                 }
-                if (prop.peak_flops <= 0) prop.peak_flops = 45e9;
 
                 other_opencls.push_back(prop);
             }

From ccd0e64b2bf2cb45d617d94db334347c5313a60c Mon Sep 17 00:00:00 2001
From: David Anderson <davea@ssl.berkeley.edu>
Date: Thu, 7 Feb 2019 13:52:35 -0800
Subject: [PATCH 2/2] sanity-check GPU peak FLOPS in both client and server

If the calculated peak FLOPS is nonpositive or greater than 1000 TeraFLOPS,
use a default (100 GigaFLOPS) instead.

Server: use the value reported by client rather than recalculating it;
the client is probably more recent than you are.
Sanity-check it in any case.
---
 client/gpu_amd.cpp        |  3 +++
 client/gpu_nvidia.cpp     |  4 ++++
 client/gpu_opencl.cpp     | 16 +++++++++++++---
 lib/coproc.cpp            |  6 +++---
 lib/coproc.h              | 23 +++++++++++++++++++++++
 sched/plan_class_spec.cpp | 15 +++++++++++++--
 6 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/client/gpu_amd.cpp b/client/gpu_amd.cpp
index ec594a09b8..9c0c2abffd 100644
--- a/client/gpu_amd.cpp
+++ b/client/gpu_amd.cpp
@@ -377,6 +377,9 @@ void COPROC_ATI::get(
         cc.atirt_detected = atirt_detected;
         cc.device_num = i;
         cc.set_peak_flops();
+        if (cc.bad_gpu_peak_flops("CAL", s)) {
+            warnings.push_back(s);
+        }
         get_available_ati_ram(cc, warnings);
         ati_gpus.push_back(cc);
     }
diff --git a/client/gpu_nvidia.cpp b/client/gpu_nvidia.cpp
index aa9232dbbf..a03e98d3b6 100644
--- a/client/gpu_nvidia.cpp
+++ b/client/gpu_nvidia.cpp
@@ -240,6 +240,7 @@ void COPROC_NVIDIA::get(
     char buf[256];
     int j, itemp;
     size_t global_mem = 0;
+    string s;
     COPROC_NVIDIA cc;
 
 #ifdef _WIN32
@@ -444,6 +445,9 @@ void* cudalib = NULL;
         cc.cuda_version = cuda_version;
         cc.device_num = j;
         cc.set_peak_flops();
+        if (cc.bad_gpu_peak_flops("CUDA", s)) {
+            warnings.push_back(s);
+        }
         get_available_nvidia_ram(cc, warnings);
         nvidia_gpus.push_back(cc);
     }
diff --git a/client/gpu_opencl.cpp b/client/gpu_opencl.cpp
index 05be8cd699..391061534f 100644
--- a/client/gpu_opencl.cpp
+++ b/client/gpu_opencl.cpp
@@ -202,6 +202,7 @@ void COPROCS::get_opencl(
     vector<int>devnums_pci_slot_sort;
     vector<OPENCL_DEVICE_PROP>::iterator it;
     int max_other_coprocs = MAX_RSC-1;  // coprocs[0] is reserved for CPU
+    string s;
 
     if (cc_config.no_opencl) {
         return;
@@ -486,6 +487,9 @@ void COPROCS::get_opencl(
                     COPROC_NVIDIA c;
                     c.opencl_prop = prop;
                     c.set_peak_flops();
+                    if (c.bad_gpu_peak_flops("NVIDIA OpenCL", s)) {
+                        warnings.push_back(s);
+                    }
                     prop.peak_flops = c.peak_flops;
                 }
                 if (cuda_match_found) {
@@ -552,6 +556,9 @@ void COPROCS::get_opencl(
                     COPROC_ATI c;
                     c.opencl_prop = prop;
                     c.set_peak_flops();
+                    if (c.bad_gpu_peak_flops("AMD OpenCL", s)) {
+                        warnings.push_back(s);
+                    }
                     prop.peak_flops = c.peak_flops;
                 }
 
@@ -576,6 +583,9 @@ void COPROCS::get_opencl(
                 safe_strcpy(c.version, prop.opencl_driver_version);
 
                 c.set_peak_flops();
+                if (c.bad_gpu_peak_flops("Intel OpenCL", s)) {
+                    warnings.push_back(s);
+                }
                 prop.peak_flops = c.peak_flops;
                 prop.opencl_available_ram = prop.global_mem_size;
 
@@ -613,14 +623,14 @@ void COPROCS::get_opencl(
                     double freq = ((double)prop.max_clock_frequency) * MEGA;
                     prop.peak_flops = ((double)prop.max_compute_units) * freq;
                 }
-                if (prop.peak_flops <= 0 || prop.peak_flops > 1.e15) {
+                if (prop.peak_flops <= 0 || prop.peak_flops > GPU_MAX_PEAK_FLOPS) {
                     char buf2[256];
                     sprintf(buf2,
-                        "bad peak flops; Max units %d, max freq %d MHz",
+                        "OpenCL generic: bad peak FLOPS; Max units %d, max freq %d MHz",
                         prop.max_compute_units, prop.max_clock_frequency
                     );
                     warnings.push_back(buf2);
-                    prop.peak_flops = 1.e12;
+                    prop.peak_flops = GPU_DEFAULT_PEAK_FLOPS;
                 }
 
                 other_opencls.push_back(prop);
diff --git a/lib/coproc.cpp b/lib/coproc.cpp
index 08c5cf0d2f..81a1d387b0 100644
--- a/lib/coproc.cpp
+++ b/lib/coproc.cpp
@@ -600,7 +600,7 @@ void COPROC_NVIDIA::set_peak_flops() {
         //
         x = opencl_prop.max_compute_units * 48 * 2 * opencl_prop.max_clock_frequency * 1e6;
     }
-    peak_flops =  (x>0)?x:5e10;
+    peak_flops = x;
 }
 
 // fake a NVIDIA GPU (for debugging)
@@ -868,7 +868,7 @@ void COPROC_ATI::set_peak_flops() {
         //
         x = opencl_prop.max_compute_units * 16 * 5 * opencl_prop.max_clock_frequency * 1e6;
     }
-    peak_flops = (x>0)?x:5e10;
+    peak_flops = x;
 }
 
 void COPROC_ATI::fake(double ram, double avail_ram, int n) {
@@ -980,7 +980,7 @@ void COPROC_INTEL::set_peak_flops() {
     if (opencl_prop.max_compute_units) {
         x = opencl_prop.max_compute_units * 8 * opencl_prop.max_clock_frequency * 1e6;
     }
-    peak_flops = (x>0)?x:45e9;
+    peak_flops = x;
 }
 
 void COPROC_INTEL::fake(double ram, double avail_ram, int n) {
diff --git a/lib/coproc.h b/lib/coproc.h
index b5ee738ad1..ddf7c16315 100644
--- a/lib/coproc.h
+++ b/lib/coproc.h
@@ -91,6 +91,14 @@
 #define MAX_COPROC_INSTANCES 64
 #define MAX_RSC 8
     // max # of processing resources types
+#define GPU_MAX_PEAK_FLOPS  1.e15
+    // sanity-check bound for peak FLOPS
+    // for now (Feb 2019) 1000 TeraFLOPS.
+    // As of now, the fastest GPU is 20 TeraFLOPS (NVIDIA).
+    // May need to increase this at some point
+#define GPU_DEFAULT_PEAK_FLOPS  100.e9
+    // value to use if sanity check fails
+    // as of now (Feb 2019) 100 GigaFLOPS is a typical low-end GPU
 
 // arguments to proc_type_name() and proc_type_name_xml().
 //
@@ -246,6 +254,21 @@ struct COPROC {
         std::vector<OPENCL_DEVICE_PROP> &opencls,
         std::vector<int>& ignore_dev
     );
+
+    // sanity check GPU peak FLOPS
+    //
+    inline bool bad_gpu_peak_flops(const char* source, std::string& msg) {
+        if (peak_flops <= 0 || peak_flops > GPU_MAX_PEAK_FLOPS) {
+            char buf[256];
+            sprintf(buf, "%s reported bad GPU peak FLOPS %f; using %f",
+                source, peak_flops, GPU_DEFAULT_PEAK_FLOPS
+            );
+            msg = buf;
+            peak_flops = GPU_DEFAULT_PEAK_FLOPS;
+            return true;
+        }
+        return false;
+    }
 };
 
 // Based on cudaDeviceProp from /usr/local/cuda/include/driver_types.h
diff --git a/sched/plan_class_spec.cpp b/sched/plan_class_spec.cpp
index f77a7b7edd..bbf1b40912 100644
--- a/sched/plan_class_spec.cpp
+++ b/sched/plan_class_spec.cpp
@@ -181,6 +181,7 @@ bool PLAN_CLASS_SPEC::opencl_check(OPENCL_DEVICE_PROP& opencl_prop) {
 bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu, const WORKUNIT* wu) {
     COPROC* cpp = NULL;
     bool can_use_multicore = true;
+    string msg;
 
     if (infeasible_random && drand()<infeasible_random) {
         return false;
@@ -594,7 +595,9 @@ bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu, const WORKU
             return false;
         }
 
-        cp.set_peak_flops();
+        if (cp.bad_gpu_peak_flops("AMD", msg)) {
+            log_messages.printf(MSG_NORMAL, "%s\n", msg.c_str());
+        }
         gpu_ram = cp.opencl_prop.global_mem_size;
 
         driver_version = 0;
@@ -686,7 +689,9 @@ bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu, const WORKU
             }
         }
         gpu_ram = cp.prop.totalGlobalMem;
-        cp.set_peak_flops();
+        if (cp.bad_gpu_peak_flops("NVIDIA", msg)) {
+            log_messages.printf(MSG_NORMAL, "%s\n", msg.c_str());
+        }
 
     // Intel GPU
     //
@@ -705,6 +710,9 @@ bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu, const WORKU
         if (min_gpu_ram_mb) {
             gpu_requirements[PROC_TYPE_INTEL_GPU].update(0, min_gpu_ram_mb * MEGA);
         }
+        if (cp.bad_gpu_peak_flops("Intel GPU", msg)) {
+            log_messages.printf(MSG_NORMAL, "%s\n", msg.c_str());
+        }
 
     // custom GPU type
     //
@@ -723,6 +731,9 @@ bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu, const WORKU
                 "[version] plan_class_spec: Custom coproc %s found\n", gpu_type
             );
         }
+        if (cpp->bad_gpu_peak_flops("Custom GPU", msg)) {
+            log_messages.printf(MSG_NORMAL, "%s\n", msg.c_str());
+        }
     }
 
     if (opencl) {