From fe2a18f282bbcdd2b1486ff6b5f1fde85af963d5 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Fri, 16 Oct 2009 00:13:01 +0000 Subject: [PATCH] - client/scheduler: standardize the FLOPS estimate between NVIDIA and ATI. Make them both peak FLOPS, according to the formula supplied by the manufacturer. The impact on the client is minor: - the startup message describing the GPU - the weight of the resource type in computing long-term debt On the server, I changed the example app_plan() function to assume that app FLOPS is 20% of peak FLOPS (that's about what it is for SETI@home) svn path=/trunk/boinc/; revision=19310 --- checkin_notes | 29 +++++++++++++++++++++++++---- checkin_notes_2008 | 4 ++-- client/client_state.cpp | 2 -- client/work_fetch.cpp | 6 ++++-- doc/links.php | 1 + lib/coproc.cpp | 14 +++++++------- lib/coproc.h | 17 ++++++++++------- sched/sched_customize.cpp | 34 +++++++++++----------------------- 8 files changed, 60 insertions(+), 47 deletions(-) diff --git a/checkin_notes b/checkin_notes index 3ee02de836..2019d338c8 100644 --- a/checkin_notes +++ b/checkin_notes @@ -377,7 +377,7 @@ David 14 Jan 2009 app_control.cpp David 14 Jan 2009 - - client: clamp long term debts tp +- 1 week + - client: clamp long term debts to +- 1 week - client: fix CUDA debt calculation - client: don't accumulate debt if project->dont_request_more_work - client: improves messages @@ -1027,7 +1027,7 @@ David 30 Jan 2009 scheduler_op.cpp David 31 Jan 2009 - - client: there was a problem with how the round simulator + - client: there was a problem with how the round-robin simulator worked in the presence of coprocessors. The simulator maintained per-project queues of pending jobs. When a job finished (in the simulation) it would get @@ -5236,7 +5236,7 @@ David 10 June 2009 sched_result.cpp David 10 June 2009 - - web: allow projects to account email addresses in certain domains. + - web: allow projects to ban email addresses in certain domains. Add the following to html/project/project.inc: $banned_email_domains = array( @@ -6455,7 +6455,7 @@ David 23 July 2009 David 24 July 2009 - client: in get_project_config_poll() GUI RPC, - return ERR_IN_PROGRESS is the reference site check is in progress. + return ERR_IN_PROGRESS if the reference site check is in progress. This hopefully fixes a bug where: - the user is connected via a proxy - the manager is run for the first time, and an attach is tried @@ -8674,3 +8674,24 @@ David 14 Oct 2009 David 14 Oct 2009 - undo the above + +David 15 Oct 2009 + - client/scheduler: standardize the FLOPS estimate between NVIDIA and ATI. + Make them both peak FLOPS, + according to the formula supplied by the manufacturer. + + The impact on the client is minor: + - the startup message describing the GPU + - the weight of the resource type in computing long-term debt + + On the server, I changed the example app_plan() function + to assume that app FLOPS is 20% of peak FLOPS + (that's about what it is for SETI@home) + + client/ + client_state.cpp + work_fetch.cpp + lib/ + coproc.cpp,h + sched/ + sched_customize.cpp diff --git a/checkin_notes_2008 b/checkin_notes_2008 index ce4bf1efe4..a9f9dafdc5 100644 --- a/checkin_notes_2008 +++ b/checkin_notes_2008 @@ -9253,7 +9253,7 @@ David 5 Nov 2008 David 5 Nov 2008 - client: add OS name into the hash for host CPID (for multi-OS hosts) - - scheduler: use sqrt(x) instead of x in stop-checking + - scheduler: use sqrt(x) instead of x in spot-checking for single redundancy. client/ @@ -9285,7 +9285,7 @@ Charlie 5 Nov 2008 project.pbxproj David 6 Nov - - API: remove debugging printf from trickly down code + - API: remove debugging printf from trickle down code - API: use non-verbose option to zip - scheduler: if multiple_client_per_host is set, don't mark results as over if get repeat CPID diff --git a/client/client_state.cpp b/client/client_state.cpp index 648e12cd8e..52a70e10af 100644 --- a/client/client_state.cpp +++ b/client/client_state.cpp @@ -322,11 +322,9 @@ int CLIENT_STATE::init() { // assume app will run at peak CPU speed, not peak GPU // if (avp->ncudas) { - //avp->flops += avp->ncudas * coproc_cuda->flops_estimate(); avp->flops += avp->ncudas * host_info.p_fpops; } if (avp->natis) { - //avp->flops += avp->natis * coproc_ati->flops_estimate(); avp->flops += avp->natis * host_info.p_fpops; } } diff --git a/client/work_fetch.cpp b/client/work_fetch.cpp index 22dc1e02e3..c2877e12ea 100644 --- a/client/work_fetch.cpp +++ b/client/work_fetch.cpp @@ -899,17 +899,19 @@ void WORK_FETCH::set_initial_work_request() { void WORK_FETCH::init() { cpu_work_fetch.init(RSC_TYPE_CPU, gstate.ncpus, 1); + // use 20% as a rough estimate of GPU efficiency + if (coproc_cuda) { cuda_work_fetch.init( RSC_TYPE_CUDA, coproc_cuda->count, - coproc_cuda->flops_estimate()/gstate.host_info.p_fpops + 0.2*coproc_cuda->peak_flops()/gstate.host_info.p_fpops ); } if (coproc_ati) { ati_work_fetch.init( RSC_TYPE_ATI, coproc_ati->count, - coproc_ati->flops_estimate()/gstate.host_info.p_fpops + 0.2*coproc_ati->peak_flops()/gstate.host_info.p_fpops ); } diff --git a/doc/links.php b/doc/links.php index e58b850bbf..43cf9d470b 100644 --- a/doc/links.php +++ b/doc/links.php @@ -295,6 +295,7 @@ language("Polish", array( site("http://www.boinc.prv.pl", "BOINC@Kolobrzeg"), site("http://www.boincatpoland.org", "BOINC@Poland"), //site("http://www.boinc.pl", "www.boinc.pl"), + site("http://www.tomaszpawel.republika.pl/", "TomaszPawelTeam"), )); language("Portuguese", array( site( "http://portugalathome.pt.vu/", "Portugal@home"), diff --git a/lib/coproc.cpp b/lib/coproc.cpp index 73687d1c49..0a8f2d39e5 100644 --- a/lib/coproc.cpp +++ b/lib/coproc.cpp @@ -191,8 +191,8 @@ int cuda_compare(COPROC_CUDA& c1, COPROC_CUDA& c2, bool loose) { } if (c1.prop.totalGlobalMem > c2.prop.totalGlobalMem) return 1; if (c1.prop.totalGlobalMem < c2.prop.totalGlobalMem) return -1; - double s1 = c1.flops_estimate(); - double s2 = c2.flops_estimate(); + double s1 = c1.peak_flops(); + double s2 = c2.peak_flops(); if (s1 > s2) return 1; if (s1 < s2) return -1; return 0; @@ -488,9 +488,9 @@ void COPROC_CUDA::description(char* buf) { } else { strcpy(vers, "unknown"); } - sprintf(buf, "%s (driver version %s, CUDA version %d, compute capability %d.%d, %.0fMB, est. %.0fGFLOPS)", + sprintf(buf, "%s (driver version %s, CUDA version %d, compute capability %d.%d, %.0fMB, %.0f GFLOPS peak)", prop.name, vers, cuda_version, prop.major, prop.minor, - prop.totalGlobalMem/(1024.*1024.), flops_estimate()/1e9 + prop.totalGlobalMem/(1024.*1024.), peak_flops()/1e9 ); } @@ -918,7 +918,7 @@ void COPROC_ATI::get(COPROCS& coprocs, char buf[256], buf2[256]; if (i == 0) { best = gpus[i]; - } else if (gpus[i].flops_estimate() > best.flops_estimate()) { + } else if (gpus[i].peak_flops() > best.peak_flops()) { best = gpus[i]; } gpus[i].description(buf); @@ -1102,8 +1102,8 @@ int COPROC_ATI::parse(FILE* fin) { } void COPROC_ATI::description(char* buf) { - sprintf(buf, "%s (CAL version %s, %.0fMB, %.0fGFLOPS)", - name, version, attribs.localRAM/1024.*1024., flops_estimate()/1.e9 + sprintf(buf, "%s (CAL version %s, %.0fMB, %.0f GFLOPS peak)", + name, version, attribs.localRAM/1024.*1024., peak_flops()/1.e9 ); } diff --git a/lib/coproc.h b/lib/coproc.h index 9f78fa85f7..fadceb938c 100644 --- a/lib/coproc.h +++ b/lib/coproc.h @@ -256,13 +256,16 @@ struct COPROC_CUDA : public COPROC { int parse(FILE*); virtual bool is_usable(); - // rough estimate of FLOPS - // The following is based on SETI@home CUDA, - // which gets 50 GFLOPS on a Quadro FX 3700, - // which has 14 MPs and a clock rate of 1.25 MHz + // Estimate of peak FLOPS. + // FLOPS for a given app may be much less; + // e.g. for SETI@home it's about 0.18 of the peak // - inline double flops_estimate() { - double x = (prop.clockRate * prop.multiProcessorCount)*5e10/(14*1.25e6); + inline double peak_flops() { + // clock rate is scaled down by 1000; + // each processor has 8 cores; + // each core can do 2 ops per clock + // + double x = (1000.*prop.clockRate) * prop.multiProcessorCount * 8. * 2.; return x?x:5e10; } @@ -314,7 +317,7 @@ struct COPROC_ATI : public COPROC { void clear(); int parse(FILE*); virtual bool is_usable(); - inline double flops_estimate() { + inline double peak_flops() { double x = attribs.numberOfSIMD * attribs.wavefrontSize * 2.5 * attribs.engineClock * 1.e6; // clock is in MHz return x?x:5e10; diff --git a/sched/sched_customize.cpp b/sched/sched_customize.cpp index 97a23649c0..49f818be75 100644 --- a/sched/sched_customize.cpp +++ b/sched/sched_customize.cpp @@ -71,9 +71,9 @@ bool wu_is_infeasible_custom(WORKUNIT& wu, APP& app, BEST_APP_VERSION& bav) { // if (bav.host_usage.ncudas) { if (!strstr(wu.name, "slow")) { - bav.host_usage.flops = g_request->coproc_cuda->flops_estimate()/2; + bav.host_usage.flops = g_request->coproc_cuda->peak_flops()/10; } else { - bav.host_usage.flops = g_request->coproc_cuda->flops_estimate(); + bav.host_usage.flops = g_request->coproc_cuda->peak_flops()/5; } } #endif @@ -217,7 +217,7 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) { // 2. ati13ati // 3. ati13amd // 4. ati - hu.flops = cp->flops_estimate(); + hu.flops = cp->peak_flops()/5; if (!strcmp(plan_class, "ati13amd")) { hu.flops *= 1.01; } @@ -273,6 +273,8 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) { return false; } + double min_ram; + // for CUDA 2.3, we need to check the CUDA RT version. // Old BOINC clients report display driver version; // newer ones report CUDA RT version @@ -298,22 +300,7 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) { add_no_work_message("CUDA version 2.3 needed"); return false; } -#ifdef PLAN_CUDA23_MIN_RAM - if (cp->prop.dtotalGlobalMem < PLAN_CUDA23_MIN_RAM) { - if (config.debug_version_select) { - log_messages.printf(MSG_NORMAL, - "[version] CUDA23 mem %d < %d\n", - cp->prop.dtotalGlobalMem, PLAN_CUDA23_MIN_RAM - ); - } - sprintf(buf, - "Your NVIDIA GPU has insufficient memory (need %.0fMB)", - PLAN_CUDA23_MIN_RAM/MEGA - ); - add_no_work_message(buf); - return false; - } -#endif + min_ram = PLAN_CUDA23_MIN_RAM; } else { if (cp->display_driver_version && cp->display_driver_version < PLAN_CUDA_MIN_DRIVER_VERSION) { if (config.debug_version_select) { @@ -328,24 +315,25 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) { add_no_work_message(buf); return false; } + min_ram = PLAN_CUDA_MIN_RAM; } - if (cp->prop.dtotalGlobalMem < PLAN_CUDA_MIN_RAM) { + if (cp->prop.dtotalGlobalMem < min_ram) { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] CUDA mem %d < %d\n", - cp->prop.dtotalGlobalMem, PLAN_CUDA_MIN_RAM + cp->prop.dtotalGlobalMem, min_ram ); } sprintf(buf, "Your NVIDIA GPU has insufficient memory (need %.0fMB)", - PLAN_CUDA_MIN_RAM/MEGA + min_ram/MEGA ); add_no_work_message(buf); return false; } - hu.flops = cp->flops_estimate(); + hu.flops = cp->peak_flops()/5; if (!strcmp(plan_class, "cuda23")) { hu.flops *= 1.01; }