From 7411dd60aa0506319f0ee7e598c70b033cfc685c Mon Sep 17 00:00:00 2001 From: David Anderson Date: Wed, 14 Sep 2011 22:45:26 +0000 Subject: [PATCH] - client: change in the use of GPU available RAM: - measure the available RAM of each GPU when BOINC starts up. If this fails, set available = physical. Show available RAM in startup messages. - use available RAM rather than physical RAM in selecting the "best" GPU instance - report available RAM to the scheduler TODO: change the scheduler to use available rather than physical if it's reported svn path=/trunk/boinc/; revision=24210 --- checkin_notes | 20 ++++++ client/client_state.cpp | 10 +-- client/coproc_detect.cpp | 98 +++++++++++------------------- client/cpu_sched.cpp | 38 ++++-------- doc/index.php | 4 +- lib/coproc.cpp | 8 +-- lib/coproc.h | 21 +++---- sched/sample_trivial_validator.cpp | 15 ++--- 8 files changed, 86 insertions(+), 128 deletions(-) diff --git a/checkin_notes b/checkin_notes index dadfe483e0..f6f38e8ad4 100644 --- a/checkin_notes +++ b/checkin_notes @@ -6032,3 +6032,23 @@ Rom 14 Sept 2011 client/ hostinfo_win.cpp + +David 14 Sept 2011 + - client: change in the use of GPU available RAM: + - measure the available RAM of each GPU when BOINC starts up. + If this fails, set available = physical. + Show available RAM in startup messages. + - use available RAM rather than physical RAM in selecting + the "best" GPU instance + - report available RAM to the scheduler + TODO: change the scheduler to use available rather than physical + if it's reported + + sched/ + sample_trivial_validator.cpp + lib/ + coproc.cpp,h + client/ + client_state.cpp + coproc_detect.cpp + cpu_sched.cpp diff --git a/client/client_state.cpp b/client/client_state.cpp index 7fa101d2b1..db285a5da1 100644 --- a/client/client_state.cpp +++ b/client/client_state.cpp @@ -369,15 +369,11 @@ int CLIENT_STATE::init() { } #if 0 msg_printf(NULL, MSG_INFO, "Faking an NVIDIA GPU"); - coprocs.nvidia.fake(18000, 256*MEGA, 2); - coprocs.nvidia.available_ram_fake[0] = 256*MEGA; - coprocs.nvidia.available_ram_fake[1] = 192*MEGA; + coprocs.nvidia.fake(18000, 256*MEGA, 192*MEGA, 2); #endif #if 0 msg_printf(NULL, MSG_INFO, "Faking an ATI GPU"); - coprocs.ati.fake(512*MEGA, 2); - coprocs.ati.available_ram_fake[0] = 256*MEGA; - coprocs.ati.available_ram_fake[1] = 192*MEGA; + coprocs.ati.fake(512*MEGA, 256*MEGA, 2); #endif } @@ -386,7 +382,6 @@ int CLIENT_STATE::init() { msg_printf(NULL, MSG_INFO, "NVIDIA GPU info taken from cc_config.xml"); } else { coprocs.add(coprocs.nvidia); - coprocs.nvidia.print_available_ram(); } } if (coprocs.have_ati()) { @@ -394,7 +389,6 @@ int CLIENT_STATE::init() { msg_printf(NULL, MSG_INFO, "ATI GPU info taken from cc_config.xml"); } else { coprocs.add(coprocs.ati); - coprocs.ati.print_available_ram(); } } host_info._coprocs = coprocs; diff --git a/client/coproc_detect.cpp b/client/coproc_detect.cpp index 9f6374b168..1efe05233c 100644 --- a/client/coproc_detect.cpp +++ b/client/coproc_detect.cpp @@ -101,24 +101,6 @@ cl_int (*__clGetDeviceInfo)(cl_device_id /* device */, #endif -void COPROC::print_available_ram() { - for (int i=0; i c2.cuda_version) return 1; if (c1.cuda_version < c2.cuda_version) return -1; if (loose) { - if (c1.prop.totalGlobalMem > 1.4*c2.prop.totalGlobalMem) return 1; - if (c1.prop.totalGlobalMem < .7* c2.prop.totalGlobalMem) return -1; + if (c1.available_ram> 1.4*c2.available_ram) return 1; + if (c1.available_ram < .7* c2.available_ram) return -1; return 0; } - if (c1.prop.totalGlobalMem > c2.prop.totalGlobalMem) return 1; - if (c1.prop.totalGlobalMem < c2.prop.totalGlobalMem) return -1; + if (c1.available_ram > c2.available_ram) return 1; + if (c1.available_ram < c2.available_ram) return -1; double s1 = c1.peak_flops; double s2 = c2.peak_flops; if (s1 > s2) return 1; @@ -944,12 +926,15 @@ void COPROC_NVIDIA::get( // fake a NVIDIA GPU (for debugging) // -void COPROC_NVIDIA::fake(int driver_version, double ram, int n) { +void COPROC_NVIDIA::fake( + int driver_version, double ram, double avail_ram, int n +) { strcpy(type, GPU_TYPE_NVIDIA); count = n; for (int i=0; itype, j, rp->name ); } - cp->available_ram[j] -= rp->avp->gpu_ram; + cp->available_ram_temp[j] -= rp->avp->gpu_ram; } } @@ -1169,22 +1169,19 @@ static inline bool get_fractional_assignment( // try to assign an instance that's already fractionally assigned // for (i=0; icount; i++) { - if (cp->available_ram_unknown[i]) { - continue; - } if (excluded(rp, cp, i)) { continue; } if ((cp->usage[i] || cp->pending_usage[i]) && (cp->usage[i] + cp->pending_usage[i] + usage <= 1) ) { - if (rp->avp->gpu_ram > cp->available_ram[i]) { + if (rp->avp->gpu_ram > cp->available_ram_temp[i]) { defer_sched = true; continue; } rp->coproc_indices[0] = i; cp->usage[i] += usage; - cp->available_ram[i] -= rp->avp->gpu_ram; + cp->available_ram_temp[i] -= rp->avp->gpu_ram; if (log_flags.coproc_debug) { msg_printf(rp->project, MSG_INFO, "[coproc] Assigning %f of %s instance %d to %s", @@ -1198,20 +1195,17 @@ static inline bool get_fractional_assignment( // failing that, assign an unreserved instance // for (i=0; icount; i++) { - if (cp->available_ram_unknown[i]) { - continue; - } if (excluded(rp, cp, i)) { continue; } if (!cp->usage[i]) { - if (rp->avp->gpu_ram > cp->available_ram[i]) { + if (rp->avp->gpu_ram > cp->available_ram_temp[i]) { defer_sched = true; continue; } rp->coproc_indices[0] = i; cp->usage[i] += usage; - cp->available_ram[i] -= rp->avp->gpu_ram; + cp->available_ram_temp[i] -= rp->avp->gpu_ram; if (log_flags.coproc_debug) { msg_printf(rp->project, MSG_INFO, "[coproc] Assigning %f of %s free instance %d to %s", @@ -1241,14 +1235,11 @@ static inline bool get_integer_assignment( // int nfree = 0; for (i=0; icount; i++) { - if (cp->available_ram_unknown[i]) { - continue; - } if (excluded(rp, cp, i)) { continue; } if (!cp->usage[i]) { - if (rp->avp->gpu_ram > cp->available_ram[i]) { + if (rp->avp->gpu_ram > cp->available_ram_temp[i]) { defer_sched = true; continue; }; @@ -1275,18 +1266,15 @@ static inline bool get_integer_assignment( // assign non-pending instances first for (i=0; icount; i++) { - if (cp->available_ram_unknown[i]) { - continue; - } if (excluded(rp, cp, i)) { continue; } if (!cp->usage[i] && !cp->pending_usage[i] - && (rp->avp->gpu_ram <= cp->available_ram[i]) + && (rp->avp->gpu_ram <= cp->available_ram_temp[i]) ) { cp->usage[i] = 1; - cp->available_ram[i] -= rp->avp->gpu_ram; + cp->available_ram_temp[i] -= rp->avp->gpu_ram; rp->coproc_indices[n++] = i; if (log_flags.coproc_debug) { msg_printf(rp->project, MSG_INFO, @@ -1301,17 +1289,14 @@ static inline bool get_integer_assignment( // if needed, assign pending instances for (i=0; icount; i++) { - if (cp->available_ram_unknown[i]) { - continue; - } if (excluded(rp, cp, i)) { continue; } if (!cp->usage[i] - && (rp->avp->gpu_ram <= cp->available_ram[i]) + && (rp->avp->gpu_ram <= cp->available_ram_temp[i]) ) { cp->usage[i] = 1; - cp->available_ram[i] -= rp->avp->gpu_ram; + cp->available_ram_temp[i] -= rp->avp->gpu_ram; rp->coproc_indices[n++] = i; if (log_flags.coproc_debug) { msg_printf(rp->project, MSG_INFO, @@ -1344,8 +1329,7 @@ static void copy_available_ram(COPROC& cp, const char* name) { int rt = rsc_index(name); if (rt > 0) { for (int i=0; i".tra("24-hour average:")." $teraflops ".tra("TeraFLOPS.")." +")."
".tra("24-hour average:")." $petaflops ".tra("PetaFLOPS.")."
"; } diff --git a/lib/coproc.cpp b/lib/coproc.cpp index 8ceaf54451..3ec76e878b 100644 --- a/lib/coproc.cpp +++ b/lib/coproc.cpp @@ -300,9 +300,9 @@ void COPROC_NVIDIA::description(char* buf) { } else { strcpy(vers, "unknown"); } - sprintf(buf, "%s (driver version %s, CUDA version %d, compute capability %d.%d, %.0fMB, %.0f GFLOPS peak)", + sprintf(buf, "%s (driver version %s, CUDA version %d, compute capability %d.%d, %.0fMB, %.0fMB available, %.0f GFLOPS peak)", prop.name, vers, cuda_version, prop.major, prop.minor, - prop.totalGlobalMem/(1024.*1024.), peak_flops/1e9 + prop.totalGlobalMem/MEGA, available_ram/MEGA, peak_flops/1e9 ); } @@ -663,7 +663,7 @@ int COPROC_ATI::parse(XML_PARSER& xp) { } void COPROC_ATI::description(char* buf) { - sprintf(buf, "%s (CAL version %s, %.0fMB, %.0f GFLOPS peak)", - name, version, attribs.localRAM/1024.*1024., peak_flops/1.e9 + sprintf(buf, "%s (CAL version %s, %.0fMB, %.0fMB available, %.0f GFLOPS peak)", + name, version, attribs.localRAM/MEGA, available_ram/MEGA, peak_flops/1.e9 ); } diff --git a/lib/coproc.h b/lib/coproc.h index 0716e2da1c..3b625d8f02 100644 --- a/lib/coproc.h +++ b/lib/coproc.h @@ -95,8 +95,9 @@ struct COPROC_REQ { int parse(XML_PARSER&); }; -// For now, there will be some duplication between the values present in -// the OPENCL_DEVICE_PROP struct and the NVIDA and / or ATI structs +// For now, there will be some duplication between the values in +// the OPENCL_DEVICE_PROP struct and the NVIDIA/ATI structs +// struct OPENCL_DEVICE_PROP { cl_device_id device_id; char name[256]; // Device name @@ -159,10 +160,9 @@ struct COPROC { int opencl_device_count; bool running_graphics_app[MAX_COPROC_INSTANCES]; // is this GPU running a graphics app (NVIDIA only) - double available_ram[MAX_COPROC_INSTANCES]; - bool available_ram_unknown[MAX_COPROC_INSTANCES]; - // couldn't get available RAM; don't start new apps on this instance - double available_ram_fake[MAX_COPROC_INSTANCES]; + double available_ram; + double available_ram_temp[MAX_COPROC_INSTANCES]; + // used during job scheduling double last_print_time; @@ -189,13 +189,11 @@ struct COPROC { req_instances = 0; opencl_device_count = 0; estimated_delay = 0; + available_ram = 0; for (int i=0; i0)?x:5e10; } - void fake(double, int); + void fake(double ram, double avail_ram, int); }; struct COPROCS { diff --git a/sched/sample_trivial_validator.cpp b/sched/sample_trivial_validator.cpp index d713a17032..92ff335af2 100644 --- a/sched/sample_trivial_validator.cpp +++ b/sched/sample_trivial_validator.cpp @@ -15,8 +15,7 @@ // You should have received a copy of the GNU Lesser General Public License // along with BOINC. If not, see . -// A sample validator that grants credit to any result whose CPU time is above -// a certain minimum +// A sample validator that accepts all results #include #include "config.h" @@ -24,18 +23,12 @@ using std::vector; -static const double MIN_CPU_TIME = 0; - -int init_result(RESULT& /*result*/, void*& /*data*/) { +int init_result(RESULT&, void*&) { return 0; } -int compare_results( - RESULT & r1, void* /*data1*/, - RESULT const& r2, void* /*data2*/, - bool& match -) { - match = (r1.cpu_time >= MIN_CPU_TIME && r2.cpu_time >= MIN_CPU_TIME); +int compare_results(RESULT&, void*, RESULT const&, void*, bool& match) { + match = true; return 0; }