From 7411dd60aa0506319f0ee7e598c70b033cfc685c Mon Sep 17 00:00:00 2001
From: David Anderson <davea@ssl.berkeley.edu>
Date: Wed, 14 Sep 2011 22:45:26 +0000
Subject: [PATCH] - client: change in the use of GPU available RAM:     -
 measure the available RAM of each GPU when BOINC starts up.         If this
 fails, set available = physical.         Show available RAM in startup
 messages.     - use available RAM rather than physical RAM in selecting      
   the "best" GPU instance     - report available RAM to the scheduler TODO:
 change the scheduler to use available rather than physical     if it's
 reported

svn path=/trunk/boinc/; revision=24210
---
 checkin_notes                      | 20 ++++++
 client/client_state.cpp            | 10 +--
 client/coproc_detect.cpp           | 98 +++++++++++-------------------
 client/cpu_sched.cpp               | 38 ++++--------
 doc/index.php                      |  4 +-
 lib/coproc.cpp                     |  8 +--
 lib/coproc.h                       | 21 +++----
 sched/sample_trivial_validator.cpp | 15 ++---
 8 files changed, 86 insertions(+), 128 deletions(-)

diff --git a/checkin_notes b/checkin_notes
index dadfe483e0..f6f38e8ad4 100644
--- a/checkin_notes
+++ b/checkin_notes
@@ -6032,3 +6032,23 @@ Rom    14 Sept 2011
         
     client/
         hostinfo_win.cpp
+
+David  14 Sept 2011
+    - client: change in the use of GPU available RAM:
+        - measure the available RAM of each GPU when BOINC starts up.
+            If this fails, set available = physical.
+            Show available RAM in startup messages.
+        - use available RAM rather than physical RAM in selecting
+            the "best" GPU instance
+        - report available RAM to the scheduler
+    TODO: change the scheduler to use available rather than physical
+        if it's reported
+
+    sched/
+        sample_trivial_validator.cpp
+    lib/
+        coproc.cpp,h
+    client/
+        client_state.cpp
+        coproc_detect.cpp
+        cpu_sched.cpp
diff --git a/client/client_state.cpp b/client/client_state.cpp
index 7fa101d2b1..db285a5da1 100644
--- a/client/client_state.cpp
+++ b/client/client_state.cpp
@@ -369,15 +369,11 @@ int CLIENT_STATE::init() {
         }
 #if 0
         msg_printf(NULL, MSG_INFO, "Faking an NVIDIA GPU");
-        coprocs.nvidia.fake(18000, 256*MEGA, 2);
-        coprocs.nvidia.available_ram_fake[0] = 256*MEGA;
-        coprocs.nvidia.available_ram_fake[1] = 192*MEGA;
+        coprocs.nvidia.fake(18000, 256*MEGA, 192*MEGA, 2);
 #endif
 #if 0
         msg_printf(NULL, MSG_INFO, "Faking an ATI GPU");
-        coprocs.ati.fake(512*MEGA, 2);
-        coprocs.ati.available_ram_fake[0] = 256*MEGA;
-        coprocs.ati.available_ram_fake[1] = 192*MEGA;
+        coprocs.ati.fake(512*MEGA, 256*MEGA, 2);
 #endif
     }
 
@@ -386,7 +382,6 @@ int CLIENT_STATE::init() {
             msg_printf(NULL, MSG_INFO, "NVIDIA GPU info taken from cc_config.xml");
         } else {
             coprocs.add(coprocs.nvidia);
-            coprocs.nvidia.print_available_ram();
         }
     }
     if (coprocs.have_ati()) {
@@ -394,7 +389,6 @@ int CLIENT_STATE::init() {
             msg_printf(NULL, MSG_INFO, "ATI GPU info taken from cc_config.xml");
         } else {
             coprocs.add(coprocs.ati);
-            coprocs.ati.print_available_ram();
         }
     }
     host_info._coprocs = coprocs;
diff --git a/client/coproc_detect.cpp b/client/coproc_detect.cpp
index 9f6374b168..1efe05233c 100644
--- a/client/coproc_detect.cpp
+++ b/client/coproc_detect.cpp
@@ -101,24 +101,6 @@ cl_int (*__clGetDeviceInfo)(cl_device_id    /* device */,
 
 #endif
 
-void COPROC::print_available_ram() {
-    for (int i=0; i<count; i++) {
-        if (available_ram_unknown[i]) {
-            msg_printf(0, MSG_INFO,
-                "[coproc] %s device %d: available RAM unknown",
-                type, device_nums[i]
-            );
-        } else {
-            msg_printf(0, MSG_INFO,
-                "[coproc] %s device %d: available RAM %d MB",
-                type, device_nums[i],
-                (int)(available_ram[i]/MEGA)
-            );
-        }
-    }
-}
-
-
 //TODO: Determine how we want to compare OpenCL devices - this is only a placeholder
 // return 1/-1/0 if device 1 is more/less/same capable than device 2.
 // factors (decreasing priority):
@@ -614,7 +596,7 @@ void COPROCS::get(
 // factors (decreasing priority):
 // - compute capability
 // - software version
-// - memory
+// - available memory
 // - speed
 //
 // If "loose", ignore FLOPS and tolerate small memory diff
@@ -627,12 +609,12 @@ int nvidia_compare(COPROC_NVIDIA& c1, COPROC_NVIDIA& c2, bool loose) {
     if (c1.cuda_version > c2.cuda_version) return 1;
     if (c1.cuda_version < c2.cuda_version) return -1;
     if (loose) {
-        if (c1.prop.totalGlobalMem > 1.4*c2.prop.totalGlobalMem) return 1;
-        if (c1.prop.totalGlobalMem < .7* c2.prop.totalGlobalMem) return -1;
+        if (c1.available_ram> 1.4*c2.available_ram) return 1;
+        if (c1.available_ram < .7* c2.available_ram) return -1;
         return 0;
     }
-    if (c1.prop.totalGlobalMem > c2.prop.totalGlobalMem) return 1;
-    if (c1.prop.totalGlobalMem < c2.prop.totalGlobalMem) return -1;
+    if (c1.available_ram > c2.available_ram) return 1;
+    if (c1.available_ram < c2.available_ram) return -1;
     double s1 = c1.peak_flops;
     double s2 = c2.peak_flops;
     if (s1 > s2) return 1;
@@ -944,12 +926,15 @@ void COPROC_NVIDIA::get(
 
 // fake a NVIDIA GPU (for debugging)
 //
-void COPROC_NVIDIA::fake(int driver_version, double ram, int n) {
+void COPROC_NVIDIA::fake(
+    int driver_version, double ram, double avail_ram, int n
+) {
    strcpy(type, GPU_TYPE_NVIDIA);
    count = n;
    for (int i=0; i<count; i++) {
        device_nums[i] = i;
    }
+   available_ram = avail_ram;
    display_driver_version = driver_version;
    cuda_version = 2020;
    strcpy(prop.name, "Fake NVIDIA GPU");
@@ -974,8 +959,7 @@ void COPROC_NVIDIA::fake(int driver_version, double ram, int n) {
    set_peak_flops();
 }
 
-// See how much RAM is available on each GPU.
-// If this fails, set "available_ram_unknown"
+// See how much RAM is available on this GPU.
 //
 void COPROC_NVIDIA::get_available_ram() {
     int retval;
@@ -983,8 +967,7 @@ void COPROC_NVIDIA::get_available_ram() {
 	int device;
     void* ctx;
     
-    available_ram[0] = 0;
-    available_ram_unknown[0] = true;
+    available_ram = prop.dtotalGlobalMem;
     retval = (*__cuDeviceGet)(&device, device_num);
     if (retval) {
         if (log_flags.coproc_debug) {
@@ -1014,8 +997,7 @@ void COPROC_NVIDIA::get_available_ram() {
         return;
     }
     (*__cuCtxDestroy)(ctx);
-    available_ram[0] = (double) memfree;
-    available_ram_unknown[0] = false;
+    available_ram = (double) memfree;
 }
 
 // check whether each GPU is running a graphics app (assume yes)
@@ -1429,11 +1411,12 @@ void COPROC_ATI::get(
     }
 }
 
-void COPROC_ATI::fake(double ram, int n) {
+void COPROC_ATI::fake(double ram, double avail_ram, int n) {
     strcpy(type, GPU_TYPE_ATI);
     strcpy(version, "1.4.3");
     strcpy(name, "foobar");
     count = n;
+    available_ram = avail_ram;
     memset(&attribs, 0, sizeof(attribs));
     memset(&info, 0, sizeof(info));
     attribs.localRAM = (int)(ram/MEGA);
@@ -1446,52 +1429,39 @@ void COPROC_ATI::fake(double ram, int n) {
     set_peak_flops();
 }
 
-// get available RAM of ATI GPUs
-// NOTE: last time we checked, repeated calls to this crash the driver
+// get available RAM of ATI GPU
 //
 void COPROC_ATI::get_available_ram() {
     CALdevicestatus st;
     CALdevice dev;
-    int i, retval;
+    int retval;
+
+    available_ram = attribs.localRAM*MEGA;
 
     st.struct_size = sizeof(CALdevicestatus);
 
-    // avoid crash if faked GPU
-    if (!__calInit) {
-        for (i=0; i<count; i++) {
-            available_ram[i] = available_ram_fake[i];
-            available_ram_unknown[i] = false;
+    retval = (*__calDeviceOpen)(&dev, device_num);
+    if (retval) {
+        if (log_flags.coproc_debug) {
+            msg_printf(0, MSG_INFO,
+                "[coproc] calDeviceOpen(%d) returned %d", device_num, retval
+            );
         }
         return;
     }
-    for (i=0; i<count; i++) {
-        available_ram[i] = 0;
-        available_ram_unknown[i] = true;
-        int devnum = device_nums[i];
-        retval = (*__calDeviceOpen)(&dev, devnum);
-        if (retval) {
-            if (log_flags.coproc_debug) {
-                msg_printf(0, MSG_INFO,
-                    "[coproc] calDeviceOpen(%d) returned %d", devnum, retval
-                );
-            }
-            continue;
+    retval = (*__calDeviceGetStatus)(&st, dev);
+    if (retval) {
+        if (log_flags.coproc_debug) {
+            msg_printf(0, MSG_INFO,
+                "[coproc] calDeviceGetStatus(%d) returned %d",
+                device_num, retval
+            );
         }
-        retval = (*__calDeviceGetStatus)(&st, dev);
-        if (retval) {
-            if (log_flags.coproc_debug) {
-                msg_printf(0, MSG_INFO,
-                    "[coproc] calDeviceGetStatus(%d) returned %d",
-                    devnum, retval
-                );
-            }
-            (*__calDeviceClose)(dev);
-            continue;
-        }
-        available_ram[i] = st.availLocalRAM*MEGA;
-        available_ram_unknown[i] = false;
         (*__calDeviceClose)(dev);
+        return;
     }
+    available_ram = st.availLocalRAM*MEGA;
+    (*__calDeviceClose)(dev);
 }
 
 bool COPROC_ATI::matches(OPENCL_DEVICE_PROP& OpenCLprop) {
diff --git a/client/cpu_sched.cpp b/client/cpu_sched.cpp
index 6a934c9c3c..533a7dd5af 100644
--- a/client/cpu_sched.cpp
+++ b/client/cpu_sched.cpp
@@ -1156,7 +1156,7 @@ static inline void confirm_current_assignment(
                 cp->type, j, rp->name
             );
         }
-        cp->available_ram[j] -= rp->avp->gpu_ram;
+        cp->available_ram_temp[j] -= rp->avp->gpu_ram;
     }
 }
 
@@ -1169,22 +1169,19 @@ static inline bool get_fractional_assignment(
     // try to assign an instance that's already fractionally assigned
     //
     for (i=0; i<cp->count; i++) {
-        if (cp->available_ram_unknown[i]) {
-            continue;
-        }
         if (excluded(rp, cp, i)) {
             continue;
         }
         if ((cp->usage[i] || cp->pending_usage[i])
             && (cp->usage[i] + cp->pending_usage[i] + usage <= 1)
         ) {
-            if (rp->avp->gpu_ram > cp->available_ram[i]) {
+            if (rp->avp->gpu_ram > cp->available_ram_temp[i]) {
                 defer_sched = true;
                 continue;
             }
             rp->coproc_indices[0] = i;
             cp->usage[i] += usage;
-            cp->available_ram[i] -= rp->avp->gpu_ram;
+            cp->available_ram_temp[i] -= rp->avp->gpu_ram;
             if (log_flags.coproc_debug) {
                 msg_printf(rp->project, MSG_INFO,
                     "[coproc] Assigning %f of %s instance %d to %s",
@@ -1198,20 +1195,17 @@ static inline bool get_fractional_assignment(
     // failing that, assign an unreserved instance
     //
     for (i=0; i<cp->count; i++) {
-        if (cp->available_ram_unknown[i]) {
-            continue;
-        }
         if (excluded(rp, cp, i)) {
             continue;
         }
         if (!cp->usage[i]) {
-            if (rp->avp->gpu_ram > cp->available_ram[i]) {
+            if (rp->avp->gpu_ram > cp->available_ram_temp[i]) {
                 defer_sched = true;
                 continue;
             }
             rp->coproc_indices[0] = i;
             cp->usage[i] += usage;
-            cp->available_ram[i] -= rp->avp->gpu_ram;
+            cp->available_ram_temp[i] -= rp->avp->gpu_ram;
             if (log_flags.coproc_debug) {
                 msg_printf(rp->project, MSG_INFO,
                     "[coproc] Assigning %f of %s free instance %d to %s",
@@ -1241,14 +1235,11 @@ static inline bool get_integer_assignment(
     //
     int nfree = 0;
     for (i=0; i<cp->count; i++) {
-        if (cp->available_ram_unknown[i]) {
-            continue;
-        }
         if (excluded(rp, cp, i)) {
             continue;
         }
         if (!cp->usage[i]) {
-            if (rp->avp->gpu_ram > cp->available_ram[i]) {
+            if (rp->avp->gpu_ram > cp->available_ram_temp[i]) {
                 defer_sched = true;
                 continue;
             };
@@ -1275,18 +1266,15 @@ static inline bool get_integer_assignment(
     // assign non-pending instances first
 
     for (i=0; i<cp->count; i++) {
-        if (cp->available_ram_unknown[i]) {
-            continue;
-        }
         if (excluded(rp, cp, i)) {
             continue;
         }
         if (!cp->usage[i]
             && !cp->pending_usage[i]
-            && (rp->avp->gpu_ram <= cp->available_ram[i])
+            && (rp->avp->gpu_ram <= cp->available_ram_temp[i])
         ) {
             cp->usage[i] = 1;
-            cp->available_ram[i] -= rp->avp->gpu_ram;
+            cp->available_ram_temp[i] -= rp->avp->gpu_ram;
             rp->coproc_indices[n++] = i;
             if (log_flags.coproc_debug) {
                 msg_printf(rp->project, MSG_INFO,
@@ -1301,17 +1289,14 @@ static inline bool get_integer_assignment(
     // if needed, assign pending instances
 
     for (i=0; i<cp->count; i++) {
-        if (cp->available_ram_unknown[i]) {
-            continue;
-        }
         if (excluded(rp, cp, i)) {
             continue;
         }
         if (!cp->usage[i]
-            && (rp->avp->gpu_ram <= cp->available_ram[i])
+            && (rp->avp->gpu_ram <= cp->available_ram_temp[i])
         ) {
             cp->usage[i] = 1;
-            cp->available_ram[i] -= rp->avp->gpu_ram;
+            cp->available_ram_temp[i] -= rp->avp->gpu_ram;
             rp->coproc_indices[n++] = i;
             if (log_flags.coproc_debug) {
                 msg_printf(rp->project, MSG_INFO,
@@ -1344,8 +1329,7 @@ static void copy_available_ram(COPROC& cp, const char* name) {
     int rt = rsc_index(name);
     if (rt > 0) {
         for (int i=0; i<MAX_COPROC_INSTANCES; i++) {
-            coprocs.coprocs[rt].available_ram[i] = cp.available_ram[i];
-            coprocs.coprocs[rt].available_ram_unknown[i] = cp.available_ram_unknown[i];
+            coprocs.coprocs[rt].available_ram_temp[i] = cp.available_ram;
         }
     }
 }
diff --git a/doc/index.php b/doc/index.php
index de23ab7ff7..cf37a87f71 100644
--- a/doc/index.php
+++ b/doc/index.php
@@ -49,9 +49,9 @@ function show_totals() {
     $users = number_format($users);
     $hosts = number_format($hosts);
 
-    $teraflops = number_format($credit_day/200000, 2);
+    $petaflops = number_format($credit_day/200000000, 3);
     echo tra("Active:")." $users ".tra("volunteers,")." $hosts ".tra("computers.
-")."        <br>".tra("24-hour average:")." $teraflops ".tra("TeraFLOPS.")."
+")."        <br>".tra("24-hour average:")." $petaflops ".tra("PetaFLOPS.")."
         <hr size=1 width=\"80%\">
     ";
 }
diff --git a/lib/coproc.cpp b/lib/coproc.cpp
index 8ceaf54451..3ec76e878b 100644
--- a/lib/coproc.cpp
+++ b/lib/coproc.cpp
@@ -300,9 +300,9 @@ void COPROC_NVIDIA::description(char* buf) {
     } else {
         strcpy(vers, "unknown");
     }
-    sprintf(buf, "%s (driver version %s, CUDA version %d, compute capability %d.%d, %.0fMB, %.0f GFLOPS peak)",
+    sprintf(buf, "%s (driver version %s, CUDA version %d, compute capability %d.%d, %.0fMB, %.0fMB available, %.0f GFLOPS peak)",
         prop.name, vers, cuda_version, prop.major, prop.minor,
-        prop.totalGlobalMem/(1024.*1024.), peak_flops/1e9
+        prop.totalGlobalMem/MEGA, available_ram/MEGA, peak_flops/1e9
     );
 }
 
@@ -663,7 +663,7 @@ int COPROC_ATI::parse(XML_PARSER& xp) {
 }
 
 void COPROC_ATI::description(char* buf) {
-    sprintf(buf, "%s (CAL version %s, %.0fMB, %.0f GFLOPS peak)",
-        name, version, attribs.localRAM/1024.*1024., peak_flops/1.e9
+    sprintf(buf, "%s (CAL version %s, %.0fMB, %.0fMB available, %.0f GFLOPS peak)",
+        name, version, attribs.localRAM/MEGA, available_ram/MEGA, peak_flops/1.e9
     );
 }
diff --git a/lib/coproc.h b/lib/coproc.h
index 0716e2da1c..3b625d8f02 100644
--- a/lib/coproc.h
+++ b/lib/coproc.h
@@ -95,8 +95,9 @@ struct COPROC_REQ {
     int parse(XML_PARSER&);
 };
 
-// For now, there will be some duplication between the values present in 
-// the OPENCL_DEVICE_PROP struct and the NVIDA and / or ATI structs
+// For now, there will be some duplication between the values in 
+// the OPENCL_DEVICE_PROP struct and the NVIDIA/ATI structs
+//
 struct OPENCL_DEVICE_PROP {
     cl_device_id device_id;
     char name[256];                     // Device name
@@ -159,10 +160,9 @@ struct COPROC {
     int opencl_device_count;
     bool running_graphics_app[MAX_COPROC_INSTANCES];
         // is this GPU running a graphics app (NVIDIA only)
-    double available_ram[MAX_COPROC_INSTANCES];
-    bool available_ram_unknown[MAX_COPROC_INSTANCES];
-        // couldn't get available RAM; don't start new apps on this instance
-    double available_ram_fake[MAX_COPROC_INSTANCES];
+    double available_ram;
+    double available_ram_temp[MAX_COPROC_INSTANCES];
+        // used during job scheduling
 
     double last_print_time;
     
@@ -189,13 +189,11 @@ struct COPROC {
         req_instances = 0;
         opencl_device_count = 0;
         estimated_delay = 0;
+        available_ram = 0;
         for (int i=0; i<MAX_COPROC_INSTANCES; i++) {
             device_nums[i] = 0;
             opencl_device_ids[i] = 0;
             running_graphics_app[i] = true;
-            available_ram[i] = 0;
-            available_ram_fake[i] = 0;
-            available_ram_unknown[i] = true;
         }
         memset(&opencl_prop, 0, sizeof(opencl_prop));
     }
@@ -212,7 +210,6 @@ struct COPROC {
     COPROC() {
         clear();
     }
-    void print_available_ram();
 };
 
 // based on cudaDeviceProp from /usr/local/cuda/include/driver_types.h
@@ -285,7 +282,7 @@ struct COPROC_NVIDIA : public COPROC {
 
     bool check_running_graphics_app();
     bool matches(OPENCL_DEVICE_PROP& OpenCLprop);
-    void fake(int driver_version, double ram, int count);
+    void fake(int driver_version, double ram, double avail_ram, int count);
 
 };
 
@@ -318,7 +315,7 @@ struct COPROC_ATI : public COPROC {
         // clock is in MHz
         peak_flops = (x>0)?x:5e10;
 	}
-    void fake(double, int);
+    void fake(double ram, double avail_ram, int);
 };
 
 struct COPROCS {
diff --git a/sched/sample_trivial_validator.cpp b/sched/sample_trivial_validator.cpp
index d713a17032..92ff335af2 100644
--- a/sched/sample_trivial_validator.cpp
+++ b/sched/sample_trivial_validator.cpp
@@ -15,8 +15,7 @@
 // You should have received a copy of the GNU Lesser General Public License
 // along with BOINC.  If not, see <http://www.gnu.org/licenses/>.
 
-// A sample validator that grants credit to any result whose CPU time is above
-// a certain minimum
+// A sample validator that accepts all results
 
 #include <cstdlib>
 #include "config.h"
@@ -24,18 +23,12 @@
 
 using std::vector;
 
-static const double MIN_CPU_TIME = 0;
-
-int init_result(RESULT& /*result*/, void*& /*data*/) {
+int init_result(RESULT&, void*&) {
     return 0;
 }
 
-int compare_results(
-    RESULT & r1, void* /*data1*/,
-    RESULT const& r2, void* /*data2*/,
-    bool& match
-) {
-    match = (r1.cpu_time >= MIN_CPU_TIME && r2.cpu_time >= MIN_CPU_TIME);
+int compare_results(RESULT&, void*, RESULT const&, void*, bool& match) {
+    match = true;
     return 0;
 }