From 90f863f08ca86ad20af2e66e6ab06f26123afd58 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Tue, 21 Apr 2009 08:11:28 +0000 Subject: [PATCH] - partial checkin so I can edit locally (bad network connection) svn path=/trunk/boinc/; revision=17852 --- checkin_notes | 6 ++++++ lib/coproc.cpp | 24 ++++++++++++++++++------ lib/coproc.h | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 6 deletions(-) diff --git a/checkin_notes b/checkin_notes index 226c1bae6e..edddc7e078 100644 --- a/checkin_notes +++ b/checkin_notes @@ -4025,3 +4025,9 @@ David 20 Apr 2009 sched/ db_purge.cpp + +David 20 Apr 2009 + - partial checkin so I can edit locally (bad network connection) + + lib/ + coproc.cpp,h diff --git a/lib/coproc.cpp b/lib/coproc.cpp index d65f1d0666..ecb8453874 100644 --- a/lib/coproc.cpp +++ b/lib/coproc.cpp @@ -142,6 +142,24 @@ COPROC* COPROCS::lookup(const char* type) { #endif +// return 1/-1/0 if device 1 is more/less/same capable than device 2 +// +int cuda_compare(COPROC_CUDA& c1, COPROC_CUDA& c2) { + if (c1.prop.major > c2.prop.major) return 1; + if (c1.prop.major < c2.prop.major) return -1; + if (c1.prop.minor > c2.prop.minor) return 1; + if (c1.prop.minor < c2.prop.minor) return -1; + if (c1.drvVersion > c2.drvVersion) return 1; + if (c1.drvVersion < c2.drvVersion) return -1; + if (c1.prop.dtotalGlobalMem > c2.prop.dtotalGlobalMem) return 1; + if (c1.prop.dtotalGlobalMem < c2.prop.dtotalGlobalMem) return -1; + double s1 = c1.flops_estimate(); + double s2 = c1.flops_estimate(); + if (s1 > s2) return 1; + if (s1 < s2) return -1; + return 0; +} + void COPROC_CUDA::get(COPROCS& coprocs, vector& strings) { int count; @@ -220,12 +238,6 @@ void COPROC_CUDA::get(COPROCS& coprocs, vector& strings) { } #endif - // NOTE: our design is slightly flawed: - // there's no provision for having two coprocs of type CUDA. - // So on systems with two GPUs of different hardware types - // we have to count them as two of the same type. - // Pick the fastest. - // (*__cudaGetDeviceCount)(&count); int real_count = 0; COPROC_CUDA cc, cc2; diff --git a/lib/coproc.h b/lib/coproc.h index 1c9f582bac..d651a5f4da 100644 --- a/lib/coproc.h +++ b/lib/coproc.h @@ -15,6 +15,50 @@ // You should have received a copy of the GNU Lesser General Public License // along with BOINC. If not, see . +// Structures representing coprocessors (e.g. GPUs); +// used in both client and server. +// +// Notes: +// +// 1) The use of "CUDA" is misleading; it really means "NVIDIA GPU". +// 2) The design treats each resource type as a pool of identical devices; +// for example, there is a single "CUDA long-term debt" per project, +// and a scheduler request contains a request (#instances, instance-seconds) +// for CUDA jobs. +// In reality, the instances of a resource type can have different properties: +// In the case of CUDA, "compute capability", driver version, RAM, speed, etc. +// How to resolve this discrepancy? +// +// Prior to 21 Apr 09 we identified the fastest instance +// and pretended that the others were identical to it. +// This approach has a serious flaw: +// suppose that the fastest instance has characteristics +// (version, RAM etc.) that satisfy the project's requirements, +// but other instances to not. +// Then BOINC executes jobs on GPUs that can't handle them, +// the jobs fail, the host is punished, etc. +// +// We could treat each GPU has a separate resource, +// with its own set of debts, backoffs, etc. +// However, this would imply tying jobs to instances, +// which is undesirable from a scheduling viewpoint. +// It would also be a big code change in both client and server. +// +// Instead, (as of 21 Apr 09) our approach is to identify a +// "most capable" instance, which in the case of CUDA is based on +// a) compute capability +// b) driver version +// c) RAM size +// d) est. FLOPS +// (in decreasing priority). +// We ignore and don't use any instances that are less capable +// on any of these axes. +// +// This design avoids running coprocessor apps on instances +// that are incapable of handling them, and it involves no server changes. +// Its drawback is that, on systems with multiple and differing GPUs, +// it may not use some GPUs that actually could be used. + #ifndef _COPROC_ #define _COPROC_ @@ -46,6 +90,11 @@ struct COPROC { // void* owner[MAX_COPROC_INSTANCES]; + // the device number of each instance + // These are not sequential if we omit instances (see above) + // + int device_num[MAX_COPROC_INSTANCES]; + #ifndef _USING_FCGI_ virtual void write_xml(MIOFILE&); #endif