- client/scheduler: standardize the FLOPS estimate between NVIDIA and ATI.

Make them both peak FLOPS, according to the formula supplied by the manufacturer. The impact on the client is minor: - the startup message describing the GPU - the weight of the resource type in computing long-term debt On the server, I changed the example app_plan() function to assume that app FLOPS is 20% of peak FLOPS (that's about what it is for SETI@home) svn path=/trunk/boinc/; revision=19310
2009-10-16 00:13:01 +00:00 · 2009-10-16 00:13:01 +00:00 · fe2a18f282
parent 40c159c6b0
commit fe2a18f282
8 changed files with 60 additions and 47 deletions
--- a/29
+++ b/29
@ -377,7 +377,7 @@ David  14 Jan 2009
        app_control.cpp

 David  14 Jan 2009
-    - client: clamp long term debts tp +- 1 week
+    - client: clamp long term debts to +- 1 week
    - client: fix CUDA debt calculation
    - client: don't accumulate debt if project->dont_request_more_work
    - client: improves messages
@ -1027,7 +1027,7 @@ David  30 Jan 2009
        scheduler_op.cpp

 David  31 Jan 2009
-    - client: there was a problem with how the round simulator
+    - client: there was a problem with how the round-robin simulator
        worked in the presence of coprocessors.
        The simulator maintained per-project queues of pending jobs.
        When a job finished (in the simulation) it would get
@ -5236,7 +5236,7 @@ David  10 June 2009
        sched_result.cpp

 David  10 June 2009
-    - web: allow projects to account email addresses in certain domains.
+    - web: allow projects to ban email addresses in certain domains.
        Add the following to html/project/project.inc:

        $banned_email_domains = array(
@ -6455,7 +6455,7 @@ David  23 July 2009

 David  24 July 2009
    - client: in get_project_config_poll() GUI RPC,
-        return ERR_IN_PROGRESS is the reference site check is in progress.
+        return ERR_IN_PROGRESS if the reference site check is in progress.
        This hopefully fixes a bug where:
        - the user is connected via a proxy
        - the manager is run for the first time, and an attach is tried
@ -8674,3 +8674,24 @@ David  14 Oct 2009

 David  14 Oct 2009
    - undo the above
+
+David  15 Oct 2009
+    - client/scheduler: standardize the FLOPS estimate between NVIDIA and ATI.
+        Make them both peak FLOPS,
+        according to the formula supplied by the manufacturer.
+
+        The impact on the client is minor:
+        - the startup message describing the GPU
+        - the weight of the resource type in computing long-term debt
+
+        On the server, I changed the example app_plan() function
+        to assume that app FLOPS is 20% of peak FLOPS
+        (that's about what it is for SETI@home)
+
+    client/
+        client_state.cpp
+        work_fetch.cpp
+    lib/
+        coproc.cpp,h
+    sched/
+        sched_customize.cpp
--- a/4
+++ b/4
@ -9253,7 +9253,7 @@ David  5 Nov 2008
 David  5 Nov 2008
    - client: add OS name into the hash for host CPID
        (for multi-OS hosts)
-    - scheduler: use sqrt(x) instead of x in stop-checking
+    - scheduler: use sqrt(x) instead of x in spot-checking
        for single redundancy.

    client/
@ -9285,7 +9285,7 @@ Charlie 5 Nov 2008
            project.pbxproj

 David  6 Nov
-    - API: remove debugging printf from trickly down code
+    - API: remove debugging printf from trickle down code
    - API: use non-verbose option to zip
    - scheduler: if multiple_client_per_host is set,
        don't mark results as over if get repeat CPID
--- a/client/client_state.cpp
+++ b/client/client_state.cpp
@ -322,11 +322,9 @@ int CLIENT_STATE::init() {
            // assume app will run at peak CPU speed, not peak GPU
            //
            if (avp->ncudas) {
-                //avp->flops += avp->ncudas * coproc_cuda->flops_estimate();
                avp->flops += avp->ncudas * host_info.p_fpops;
            }
            if (avp->natis) {
-                //avp->flops += avp->natis * coproc_ati->flops_estimate();
                avp->flops += avp->natis * host_info.p_fpops;
            }
        }
--- a/client/work_fetch.cpp
+++ b/client/work_fetch.cpp
@ -899,17 +899,19 @@ void WORK_FETCH::set_initial_work_request() {
 void WORK_FETCH::init() {
    cpu_work_fetch.init(RSC_TYPE_CPU, gstate.ncpus, 1);

+    // use 20% as a rough estimate of GPU efficiency
+
    if (coproc_cuda) {
        cuda_work_fetch.init(
            RSC_TYPE_CUDA, coproc_cuda->count,
-            coproc_cuda->flops_estimate()/gstate.host_info.p_fpops
+            0.2*coproc_cuda->peak_flops()/gstate.host_info.p_fpops
        );
    }
    if (coproc_ati) {
        ati_work_fetch.init(
            RSC_TYPE_ATI,
            coproc_ati->count,
-            coproc_ati->flops_estimate()/gstate.host_info.p_fpops
+            0.2*coproc_ati->peak_flops()/gstate.host_info.p_fpops
        );
    }

--- a/doc/links.php
+++ b/doc/links.php
@ -295,6 +295,7 @@ language("Polish", array(
    site("http://www.boinc.prv.pl", "BOINC@Kolobrzeg"),
    site("http://www.boincatpoland.org", "BOINC@Poland"),
    //site("http://www.boinc.pl", "www.boinc.pl"),
+    site("http://www.tomaszpawel.republika.pl/", "TomaszPawelTeam"),
 ));
 language("Portuguese", array(
    site( "http://portugalathome.pt.vu/", "Portugal@home"),
--- a/lib/coproc.cpp
+++ b/lib/coproc.cpp
@ -191,8 +191,8 @@ int cuda_compare(COPROC_CUDA& c1, COPROC_CUDA& c2, bool loose) {
    }
    if (c1.prop.totalGlobalMem > c2.prop.totalGlobalMem) return 1;
    if (c1.prop.totalGlobalMem < c2.prop.totalGlobalMem) return -1;
-    double s1 = c1.flops_estimate();
-    double s2 = c2.flops_estimate();
+    double s1 = c1.peak_flops();
+    double s2 = c2.peak_flops();
    if (s1 > s2) return 1;
    if (s1 < s2) return -1;
    return 0;
@ -488,9 +488,9 @@ void COPROC_CUDA::description(char* buf) {
    } else {
        strcpy(vers, "unknown");
    }
-    sprintf(buf, "%s (driver version %s, CUDA version %d, compute capability %d.%d, %.0fMB, est. %.0fGFLOPS)",
+    sprintf(buf, "%s (driver version %s, CUDA version %d, compute capability %d.%d, %.0fMB, %.0f GFLOPS peak)",
        prop.name, vers, cuda_version, prop.major, prop.minor,
-        prop.totalGlobalMem/(1024.*1024.), flops_estimate()/1e9
+        prop.totalGlobalMem/(1024.*1024.), peak_flops()/1e9
    );
 }

@ -918,7 +918,7 @@ void COPROC_ATI::get(COPROCS& coprocs,
        char buf[256], buf2[256];
        if (i == 0) {
            best = gpus[i];
-        } else if (gpus[i].flops_estimate() > best.flops_estimate()) {
+        } else if (gpus[i].peak_flops() > best.peak_flops()) {
            best = gpus[i];
        }
        gpus[i].description(buf);
@ -1102,8 +1102,8 @@ int COPROC_ATI::parse(FILE* fin) {
 }

 void COPROC_ATI::description(char* buf) {
-    sprintf(buf, "%s (CAL version %s, %.0fMB, %.0fGFLOPS)",
-        name, version, attribs.localRAM/1024.*1024., flops_estimate()/1.e9
+    sprintf(buf, "%s (CAL version %s, %.0fMB, %.0f GFLOPS peak)",
+        name, version, attribs.localRAM/1024.*1024., peak_flops()/1.e9
    );
 }

--- a/lib/coproc.h
+++ b/lib/coproc.h
@ -256,13 +256,16 @@ struct COPROC_CUDA : public COPROC {
    int parse(FILE*);
    virtual bool is_usable();

-    // rough estimate of FLOPS
-    // The following is based on SETI@home CUDA,
-    // which gets 50 GFLOPS on a Quadro FX 3700,
-    // which has 14 MPs and a clock rate of 1.25 MHz
+    // Estimate of peak FLOPS.
+    // FLOPS for a given app may be much less;
+    // e.g. for SETI@home it's about 0.18 of the peak
    //
-    inline double flops_estimate() {
-        double x = (prop.clockRate * prop.multiProcessorCount)*5e10/(14*1.25e6);
+    inline double peak_flops() {
+        // clock rate is scaled down by 1000;
+        // each processor has 8 cores;
+        // each core can do 2 ops per clock
+        //
+        double x = (1000.*prop.clockRate) * prop.multiProcessorCount * 8. * 2.;
        return x?x:5e10;
    }

@ -314,7 +317,7 @@ struct COPROC_ATI : public COPROC {
    void clear();
    int parse(FILE*);
    virtual bool is_usable();
-    inline double flops_estimate() {
+    inline double peak_flops() {
 		double x = attribs.numberOfSIMD * attribs.wavefrontSize * 2.5 * attribs.engineClock * 1.e6;
        // clock is in MHz
        return x?x:5e10;
--- a/sched/sched_customize.cpp
+++ b/sched/sched_customize.cpp
@ -71,9 +71,9 @@ bool wu_is_infeasible_custom(WORKUNIT& wu, APP& app, BEST_APP_VERSION& bav) {
    //
    if (bav.host_usage.ncudas) {
        if (!strstr(wu.name, "slow")) {
-            bav.host_usage.flops = g_request->coproc_cuda->flops_estimate()/2;
+            bav.host_usage.flops = g_request->coproc_cuda->peak_flops()/10;
        } else {
-            bav.host_usage.flops = g_request->coproc_cuda->flops_estimate();
+            bav.host_usage.flops = g_request->coproc_cuda->peak_flops()/5;
        }
    }
 #endif
@ -217,7 +217,7 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) {
        //   2. ati13ati
        //   3. ati13amd
        //   4. ati
-        hu.flops = cp->flops_estimate();
+        hu.flops = cp->peak_flops()/5;
        if (!strcmp(plan_class, "ati13amd")) {
            hu.flops *= 1.01;
        }
@ -273,6 +273,8 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) {
            return false;
        } 

+        double min_ram;
+
        // for CUDA 2.3, we need to check the CUDA RT version.
        // Old BOINC clients report display driver version;
        // newer ones report CUDA RT version
@ -298,22 +300,7 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) {
                add_no_work_message("CUDA version 2.3 needed");
                return false;
            }
-#ifdef PLAN_CUDA23_MIN_RAM
-            if (cp->prop.dtotalGlobalMem < PLAN_CUDA23_MIN_RAM) {
-                if (config.debug_version_select) {
-                    log_messages.printf(MSG_NORMAL,
-                        "[version] CUDA23 mem %d < %d\n",
-                        cp->prop.dtotalGlobalMem, PLAN_CUDA23_MIN_RAM
-                    );
-                }
-                sprintf(buf,
-                    "Your NVIDIA GPU has insufficient memory (need %.0fMB)",
-                    PLAN_CUDA23_MIN_RAM/MEGA
-                );
-                add_no_work_message(buf);
-                return false;
-            }
-#endif
+            min_ram = PLAN_CUDA23_MIN_RAM;
        } else {
            if (cp->display_driver_version && cp->display_driver_version < PLAN_CUDA_MIN_DRIVER_VERSION) {
                if (config.debug_version_select) {
@ -328,24 +315,25 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) {
                add_no_work_message(buf);
                return false;
            }
+            min_ram = PLAN_CUDA_MIN_RAM;
        }

-        if (cp->prop.dtotalGlobalMem < PLAN_CUDA_MIN_RAM) {
+        if (cp->prop.dtotalGlobalMem < min_ram) {
            if (config.debug_version_select) {
                log_messages.printf(MSG_NORMAL,
                    "[version] CUDA mem %d < %d\n",
-                    cp->prop.dtotalGlobalMem, PLAN_CUDA_MIN_RAM
+                    cp->prop.dtotalGlobalMem, min_ram
                );
            }
            sprintf(buf,
                "Your NVIDIA GPU has insufficient memory (need %.0fMB)",
-                PLAN_CUDA_MIN_RAM/MEGA
+                min_ram/MEGA
            );
            add_no_work_message(buf);
            return false;
        }

-        hu.flops = cp->flops_estimate();
+        hu.flops = cp->peak_flops()/5;
        if (!strcmp(plan_class, "cuda23")) {
            hu.flops *= 1.01;
        }