From fe2a18f282bbcdd2b1486ff6b5f1fde85af963d5 Mon Sep 17 00:00:00 2001
From: David Anderson <davea@ssl.berkeley.edu>
Date: Fri, 16 Oct 2009 00:13:01 +0000
Subject: [PATCH] - client/scheduler: standardize the FLOPS estimate between
 NVIDIA and ATI.     Make them both peak FLOPS,     according to the formula
 supplied by the manufacturer.

    The impact on the client is minor:
    - the startup message describing the GPU
    - the weight of the resource type in computing long-term debt

    On the server, I changed the example app_plan() function
    to assume that app FLOPS is 20% of peak FLOPS
    (that's about what it is for SETI@home)

svn path=/trunk/boinc/; revision=19310
---
 checkin_notes             | 29 +++++++++++++++++++++++++----
 checkin_notes_2008        |  4 ++--
 client/client_state.cpp   |  2 --
 client/work_fetch.cpp     |  6 ++++--
 doc/links.php             |  1 +
 lib/coproc.cpp            | 14 +++++++-------
 lib/coproc.h              | 17 ++++++++++-------
 sched/sched_customize.cpp | 34 +++++++++++-----------------------
 8 files changed, 60 insertions(+), 47 deletions(-)

diff --git a/checkin_notes b/checkin_notes
index 3ee02de836..2019d338c8 100644
--- a/checkin_notes
+++ b/checkin_notes
@@ -377,7 +377,7 @@ David  14 Jan 2009
         app_control.cpp
 
 David  14 Jan 2009
-    - client: clamp long term debts tp +- 1 week
+    - client: clamp long term debts to +- 1 week
     - client: fix CUDA debt calculation
     - client: don't accumulate debt if project->dont_request_more_work
     - client: improves messages
@@ -1027,7 +1027,7 @@ David  30 Jan 2009
         scheduler_op.cpp
 
 David  31 Jan 2009
-    - client: there was a problem with how the round simulator
+    - client: there was a problem with how the round-robin simulator
         worked in the presence of coprocessors.
         The simulator maintained per-project queues of pending jobs.
         When a job finished (in the simulation) it would get
@@ -5236,7 +5236,7 @@ David  10 June 2009
         sched_result.cpp
 
 David  10 June 2009
-    - web: allow projects to account email addresses in certain domains.
+    - web: allow projects to ban email addresses in certain domains.
         Add the following to html/project/project.inc:
 
         $banned_email_domains = array(
@@ -6455,7 +6455,7 @@ David  23 July 2009
 
 David  24 July 2009
     - client: in get_project_config_poll() GUI RPC,
-        return ERR_IN_PROGRESS is the reference site check is in progress.
+        return ERR_IN_PROGRESS if the reference site check is in progress.
         This hopefully fixes a bug where:
         - the user is connected via a proxy
         - the manager is run for the first time, and an attach is tried
@@ -8674,3 +8674,24 @@ David  14 Oct 2009
 
 David  14 Oct 2009
     - undo the above
+
+David  15 Oct 2009
+    - client/scheduler: standardize the FLOPS estimate between NVIDIA and ATI.
+        Make them both peak FLOPS,
+        according to the formula supplied by the manufacturer.
+
+        The impact on the client is minor:
+        - the startup message describing the GPU
+        - the weight of the resource type in computing long-term debt
+
+        On the server, I changed the example app_plan() function
+        to assume that app FLOPS is 20% of peak FLOPS
+        (that's about what it is for SETI@home)
+
+    client/
+        client_state.cpp
+        work_fetch.cpp
+    lib/
+        coproc.cpp,h
+    sched/
+        sched_customize.cpp
diff --git a/checkin_notes_2008 b/checkin_notes_2008
index ce4bf1efe4..a9f9dafdc5 100644
--- a/checkin_notes_2008
+++ b/checkin_notes_2008
@@ -9253,7 +9253,7 @@ David  5 Nov 2008
 David  5 Nov 2008
     - client: add OS name into the hash for host CPID
         (for multi-OS hosts)
-    - scheduler: use sqrt(x) instead of x in stop-checking
+    - scheduler: use sqrt(x) instead of x in spot-checking
         for single redundancy.
 
     client/
@@ -9285,7 +9285,7 @@ Charlie 5 Nov 2008
             project.pbxproj
 
 David  6 Nov
-    - API: remove debugging printf from trickly down code
+    - API: remove debugging printf from trickle down code
     - API: use non-verbose option to zip
     - scheduler: if multiple_client_per_host is set,
         don't mark results as over if get repeat CPID
diff --git a/client/client_state.cpp b/client/client_state.cpp
index 648e12cd8e..52a70e10af 100644
--- a/client/client_state.cpp
+++ b/client/client_state.cpp
@@ -322,11 +322,9 @@ int CLIENT_STATE::init() {
             // assume app will run at peak CPU speed, not peak GPU
             //
             if (avp->ncudas) {
-                //avp->flops += avp->ncudas * coproc_cuda->flops_estimate();
                 avp->flops += avp->ncudas * host_info.p_fpops;
             }
             if (avp->natis) {
-                //avp->flops += avp->natis * coproc_ati->flops_estimate();
                 avp->flops += avp->natis * host_info.p_fpops;
             }
         }
diff --git a/client/work_fetch.cpp b/client/work_fetch.cpp
index 22dc1e02e3..c2877e12ea 100644
--- a/client/work_fetch.cpp
+++ b/client/work_fetch.cpp
@@ -899,17 +899,19 @@ void WORK_FETCH::set_initial_work_request() {
 void WORK_FETCH::init() {
     cpu_work_fetch.init(RSC_TYPE_CPU, gstate.ncpus, 1);
 
+    // use 20% as a rough estimate of GPU efficiency
+
     if (coproc_cuda) {
         cuda_work_fetch.init(
             RSC_TYPE_CUDA, coproc_cuda->count,
-            coproc_cuda->flops_estimate()/gstate.host_info.p_fpops
+            0.2*coproc_cuda->peak_flops()/gstate.host_info.p_fpops
         );
     }
     if (coproc_ati) {
         ati_work_fetch.init(
             RSC_TYPE_ATI,
             coproc_ati->count,
-            coproc_ati->flops_estimate()/gstate.host_info.p_fpops
+            0.2*coproc_ati->peak_flops()/gstate.host_info.p_fpops
         );
     }
 
diff --git a/doc/links.php b/doc/links.php
index e58b850bbf..43cf9d470b 100644
--- a/doc/links.php
+++ b/doc/links.php
@@ -295,6 +295,7 @@ language("Polish", array(
     site("http://www.boinc.prv.pl", "BOINC@Kolobrzeg"),
     site("http://www.boincatpoland.org", "BOINC@Poland"),
     //site("http://www.boinc.pl", "www.boinc.pl"),
+    site("http://www.tomaszpawel.republika.pl/", "TomaszPawelTeam"),
 ));
 language("Portuguese", array(
     site( "http://portugalathome.pt.vu/", "Portugal@home"),
diff --git a/lib/coproc.cpp b/lib/coproc.cpp
index 73687d1c49..0a8f2d39e5 100644
--- a/lib/coproc.cpp
+++ b/lib/coproc.cpp
@@ -191,8 +191,8 @@ int cuda_compare(COPROC_CUDA& c1, COPROC_CUDA& c2, bool loose) {
     }
     if (c1.prop.totalGlobalMem > c2.prop.totalGlobalMem) return 1;
     if (c1.prop.totalGlobalMem < c2.prop.totalGlobalMem) return -1;
-    double s1 = c1.flops_estimate();
-    double s2 = c2.flops_estimate();
+    double s1 = c1.peak_flops();
+    double s2 = c2.peak_flops();
     if (s1 > s2) return 1;
     if (s1 < s2) return -1;
     return 0;
@@ -488,9 +488,9 @@ void COPROC_CUDA::description(char* buf) {
     } else {
         strcpy(vers, "unknown");
     }
-    sprintf(buf, "%s (driver version %s, CUDA version %d, compute capability %d.%d, %.0fMB, est. %.0fGFLOPS)",
+    sprintf(buf, "%s (driver version %s, CUDA version %d, compute capability %d.%d, %.0fMB, %.0f GFLOPS peak)",
         prop.name, vers, cuda_version, prop.major, prop.minor,
-        prop.totalGlobalMem/(1024.*1024.), flops_estimate()/1e9
+        prop.totalGlobalMem/(1024.*1024.), peak_flops()/1e9
     );
 }
 
@@ -918,7 +918,7 @@ void COPROC_ATI::get(COPROCS& coprocs,
         char buf[256], buf2[256];
         if (i == 0) {
             best = gpus[i];
-        } else if (gpus[i].flops_estimate() > best.flops_estimate()) {
+        } else if (gpus[i].peak_flops() > best.peak_flops()) {
             best = gpus[i];
         }
         gpus[i].description(buf);
@@ -1102,8 +1102,8 @@ int COPROC_ATI::parse(FILE* fin) {
 }
 
 void COPROC_ATI::description(char* buf) {
-    sprintf(buf, "%s (CAL version %s, %.0fMB, %.0fGFLOPS)",
-        name, version, attribs.localRAM/1024.*1024., flops_estimate()/1.e9
+    sprintf(buf, "%s (CAL version %s, %.0fMB, %.0f GFLOPS peak)",
+        name, version, attribs.localRAM/1024.*1024., peak_flops()/1.e9
     );
 }
 
diff --git a/lib/coproc.h b/lib/coproc.h
index 9f78fa85f7..fadceb938c 100644
--- a/lib/coproc.h
+++ b/lib/coproc.h
@@ -256,13 +256,16 @@ struct COPROC_CUDA : public COPROC {
     int parse(FILE*);
     virtual bool is_usable();
 
-    // rough estimate of FLOPS
-    // The following is based on SETI@home CUDA,
-    // which gets 50 GFLOPS on a Quadro FX 3700,
-    // which has 14 MPs and a clock rate of 1.25 MHz
+    // Estimate of peak FLOPS.
+    // FLOPS for a given app may be much less;
+    // e.g. for SETI@home it's about 0.18 of the peak
     //
-    inline double flops_estimate() {
-        double x = (prop.clockRate * prop.multiProcessorCount)*5e10/(14*1.25e6);
+    inline double peak_flops() {
+        // clock rate is scaled down by 1000;
+        // each processor has 8 cores;
+        // each core can do 2 ops per clock
+        //
+        double x = (1000.*prop.clockRate) * prop.multiProcessorCount * 8. * 2.;
         return x?x:5e10;
     }
 
@@ -314,7 +317,7 @@ struct COPROC_ATI : public COPROC {
     void clear();
     int parse(FILE*);
     virtual bool is_usable();
-    inline double flops_estimate() {
+    inline double peak_flops() {
 		double x = attribs.numberOfSIMD * attribs.wavefrontSize * 2.5 * attribs.engineClock * 1.e6;
         // clock is in MHz
         return x?x:5e10;
diff --git a/sched/sched_customize.cpp b/sched/sched_customize.cpp
index 97a23649c0..49f818be75 100644
--- a/sched/sched_customize.cpp
+++ b/sched/sched_customize.cpp
@@ -71,9 +71,9 @@ bool wu_is_infeasible_custom(WORKUNIT& wu, APP& app, BEST_APP_VERSION& bav) {
     //
     if (bav.host_usage.ncudas) {
         if (!strstr(wu.name, "slow")) {
-            bav.host_usage.flops = g_request->coproc_cuda->flops_estimate()/2;
+            bav.host_usage.flops = g_request->coproc_cuda->peak_flops()/10;
         } else {
-            bav.host_usage.flops = g_request->coproc_cuda->flops_estimate();
+            bav.host_usage.flops = g_request->coproc_cuda->peak_flops()/5;
         }
     }
 #endif
@@ -217,7 +217,7 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) {
         //   2. ati13ati
         //   3. ati13amd
         //   4. ati
-        hu.flops = cp->flops_estimate();
+        hu.flops = cp->peak_flops()/5;
         if (!strcmp(plan_class, "ati13amd")) {
             hu.flops *= 1.01;
         }
@@ -273,6 +273,8 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) {
             return false;
         } 
 
+        double min_ram;
+
         // for CUDA 2.3, we need to check the CUDA RT version.
         // Old BOINC clients report display driver version;
         // newer ones report CUDA RT version
@@ -298,22 +300,7 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) {
                 add_no_work_message("CUDA version 2.3 needed");
                 return false;
             }
-#ifdef PLAN_CUDA23_MIN_RAM
-            if (cp->prop.dtotalGlobalMem < PLAN_CUDA23_MIN_RAM) {
-                if (config.debug_version_select) {
-                    log_messages.printf(MSG_NORMAL,
-                        "[version] CUDA23 mem %d < %d\n",
-                        cp->prop.dtotalGlobalMem, PLAN_CUDA23_MIN_RAM
-                    );
-                }
-                sprintf(buf,
-                    "Your NVIDIA GPU has insufficient memory (need %.0fMB)",
-                    PLAN_CUDA23_MIN_RAM/MEGA
-                );
-                add_no_work_message(buf);
-                return false;
-            }
-#endif
+            min_ram = PLAN_CUDA23_MIN_RAM;
         } else {
             if (cp->display_driver_version && cp->display_driver_version < PLAN_CUDA_MIN_DRIVER_VERSION) {
                 if (config.debug_version_select) {
@@ -328,24 +315,25 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) {
                 add_no_work_message(buf);
                 return false;
             }
+            min_ram = PLAN_CUDA_MIN_RAM;
         }
 
-        if (cp->prop.dtotalGlobalMem < PLAN_CUDA_MIN_RAM) {
+        if (cp->prop.dtotalGlobalMem < min_ram) {
             if (config.debug_version_select) {
                 log_messages.printf(MSG_NORMAL,
                     "[version] CUDA mem %d < %d\n",
-                    cp->prop.dtotalGlobalMem, PLAN_CUDA_MIN_RAM
+                    cp->prop.dtotalGlobalMem, min_ram
                 );
             }
             sprintf(buf,
                 "Your NVIDIA GPU has insufficient memory (need %.0fMB)",
-                PLAN_CUDA_MIN_RAM/MEGA
+                min_ram/MEGA
             );
             add_no_work_message(buf);
             return false;
         }
 
-        hu.flops = cp->flops_estimate();
+        hu.flops = cp->peak_flops()/5;
         if (!strcmp(plan_class, "cuda23")) {
             hu.flops *= 1.01;
         }