- client: change in the use of GPU available RAM:

- measure the available RAM of each GPU when BOINC starts up.
        If this fails, set available = physical.
        Show available RAM in startup messages.
    - use available RAM rather than physical RAM in selecting
        the "best" GPU instance
    - report available RAM to the scheduler
TODO: change the scheduler to use available rather than physical
    if it's reported


svn path=/trunk/boinc/; revision=24210
This commit is contained in:
David Anderson 2011-09-14 22:45:26 +00:00
parent 9e1cafbf4d
commit 7411dd60aa
8 changed files with 86 additions and 128 deletions

View File

@ -6032,3 +6032,23 @@ Rom 14 Sept 2011
client/
hostinfo_win.cpp
David 14 Sept 2011
- client: change in the use of GPU available RAM:
- measure the available RAM of each GPU when BOINC starts up.
If this fails, set available = physical.
Show available RAM in startup messages.
- use available RAM rather than physical RAM in selecting
the "best" GPU instance
- report available RAM to the scheduler
TODO: change the scheduler to use available rather than physical
if it's reported
sched/
sample_trivial_validator.cpp
lib/
coproc.cpp,h
client/
client_state.cpp
coproc_detect.cpp
cpu_sched.cpp

View File

@ -369,15 +369,11 @@ int CLIENT_STATE::init() {
}
#if 0
msg_printf(NULL, MSG_INFO, "Faking an NVIDIA GPU");
coprocs.nvidia.fake(18000, 256*MEGA, 2);
coprocs.nvidia.available_ram_fake[0] = 256*MEGA;
coprocs.nvidia.available_ram_fake[1] = 192*MEGA;
coprocs.nvidia.fake(18000, 256*MEGA, 192*MEGA, 2);
#endif
#if 0
msg_printf(NULL, MSG_INFO, "Faking an ATI GPU");
coprocs.ati.fake(512*MEGA, 2);
coprocs.ati.available_ram_fake[0] = 256*MEGA;
coprocs.ati.available_ram_fake[1] = 192*MEGA;
coprocs.ati.fake(512*MEGA, 256*MEGA, 2);
#endif
}
@ -386,7 +382,6 @@ int CLIENT_STATE::init() {
msg_printf(NULL, MSG_INFO, "NVIDIA GPU info taken from cc_config.xml");
} else {
coprocs.add(coprocs.nvidia);
coprocs.nvidia.print_available_ram();
}
}
if (coprocs.have_ati()) {
@ -394,7 +389,6 @@ int CLIENT_STATE::init() {
msg_printf(NULL, MSG_INFO, "ATI GPU info taken from cc_config.xml");
} else {
coprocs.add(coprocs.ati);
coprocs.ati.print_available_ram();
}
}
host_info._coprocs = coprocs;

View File

@ -101,24 +101,6 @@ cl_int (*__clGetDeviceInfo)(cl_device_id /* device */,
#endif
void COPROC::print_available_ram() {
for (int i=0; i<count; i++) {
if (available_ram_unknown[i]) {
msg_printf(0, MSG_INFO,
"[coproc] %s device %d: available RAM unknown",
type, device_nums[i]
);
} else {
msg_printf(0, MSG_INFO,
"[coproc] %s device %d: available RAM %d MB",
type, device_nums[i],
(int)(available_ram[i]/MEGA)
);
}
}
}
//TODO: Determine how we want to compare OpenCL devices - this is only a placeholder
// return 1/-1/0 if device 1 is more/less/same capable than device 2.
// factors (decreasing priority):
@ -614,7 +596,7 @@ void COPROCS::get(
// factors (decreasing priority):
// - compute capability
// - software version
// - memory
// - available memory
// - speed
//
// If "loose", ignore FLOPS and tolerate small memory diff
@ -627,12 +609,12 @@ int nvidia_compare(COPROC_NVIDIA& c1, COPROC_NVIDIA& c2, bool loose) {
if (c1.cuda_version > c2.cuda_version) return 1;
if (c1.cuda_version < c2.cuda_version) return -1;
if (loose) {
if (c1.prop.totalGlobalMem > 1.4*c2.prop.totalGlobalMem) return 1;
if (c1.prop.totalGlobalMem < .7* c2.prop.totalGlobalMem) return -1;
if (c1.available_ram> 1.4*c2.available_ram) return 1;
if (c1.available_ram < .7* c2.available_ram) return -1;
return 0;
}
if (c1.prop.totalGlobalMem > c2.prop.totalGlobalMem) return 1;
if (c1.prop.totalGlobalMem < c2.prop.totalGlobalMem) return -1;
if (c1.available_ram > c2.available_ram) return 1;
if (c1.available_ram < c2.available_ram) return -1;
double s1 = c1.peak_flops;
double s2 = c2.peak_flops;
if (s1 > s2) return 1;
@ -944,12 +926,15 @@ void COPROC_NVIDIA::get(
// fake a NVIDIA GPU (for debugging)
//
void COPROC_NVIDIA::fake(int driver_version, double ram, int n) {
void COPROC_NVIDIA::fake(
int driver_version, double ram, double avail_ram, int n
) {
strcpy(type, GPU_TYPE_NVIDIA);
count = n;
for (int i=0; i<count; i++) {
device_nums[i] = i;
}
available_ram = avail_ram;
display_driver_version = driver_version;
cuda_version = 2020;
strcpy(prop.name, "Fake NVIDIA GPU");
@ -974,8 +959,7 @@ void COPROC_NVIDIA::fake(int driver_version, double ram, int n) {
set_peak_flops();
}
// See how much RAM is available on each GPU.
// If this fails, set "available_ram_unknown"
// See how much RAM is available on this GPU.
//
void COPROC_NVIDIA::get_available_ram() {
int retval;
@ -983,8 +967,7 @@ void COPROC_NVIDIA::get_available_ram() {
int device;
void* ctx;
available_ram[0] = 0;
available_ram_unknown[0] = true;
available_ram = prop.dtotalGlobalMem;
retval = (*__cuDeviceGet)(&device, device_num);
if (retval) {
if (log_flags.coproc_debug) {
@ -1014,8 +997,7 @@ void COPROC_NVIDIA::get_available_ram() {
return;
}
(*__cuCtxDestroy)(ctx);
available_ram[0] = (double) memfree;
available_ram_unknown[0] = false;
available_ram = (double) memfree;
}
// check whether each GPU is running a graphics app (assume yes)
@ -1429,11 +1411,12 @@ void COPROC_ATI::get(
}
}
void COPROC_ATI::fake(double ram, int n) {
void COPROC_ATI::fake(double ram, double avail_ram, int n) {
strcpy(type, GPU_TYPE_ATI);
strcpy(version, "1.4.3");
strcpy(name, "foobar");
count = n;
available_ram = avail_ram;
memset(&attribs, 0, sizeof(attribs));
memset(&info, 0, sizeof(info));
attribs.localRAM = (int)(ram/MEGA);
@ -1446,52 +1429,39 @@ void COPROC_ATI::fake(double ram, int n) {
set_peak_flops();
}
// get available RAM of ATI GPUs
// NOTE: last time we checked, repeated calls to this crash the driver
// get available RAM of ATI GPU
//
void COPROC_ATI::get_available_ram() {
CALdevicestatus st;
CALdevice dev;
int i, retval;
int retval;
available_ram = attribs.localRAM*MEGA;
st.struct_size = sizeof(CALdevicestatus);
// avoid crash if faked GPU
if (!__calInit) {
for (i=0; i<count; i++) {
available_ram[i] = available_ram_fake[i];
available_ram_unknown[i] = false;
retval = (*__calDeviceOpen)(&dev, device_num);
if (retval) {
if (log_flags.coproc_debug) {
msg_printf(0, MSG_INFO,
"[coproc] calDeviceOpen(%d) returned %d", device_num, retval
);
}
return;
}
for (i=0; i<count; i++) {
available_ram[i] = 0;
available_ram_unknown[i] = true;
int devnum = device_nums[i];
retval = (*__calDeviceOpen)(&dev, devnum);
if (retval) {
if (log_flags.coproc_debug) {
msg_printf(0, MSG_INFO,
"[coproc] calDeviceOpen(%d) returned %d", devnum, retval
);
}
continue;
retval = (*__calDeviceGetStatus)(&st, dev);
if (retval) {
if (log_flags.coproc_debug) {
msg_printf(0, MSG_INFO,
"[coproc] calDeviceGetStatus(%d) returned %d",
device_num, retval
);
}
retval = (*__calDeviceGetStatus)(&st, dev);
if (retval) {
if (log_flags.coproc_debug) {
msg_printf(0, MSG_INFO,
"[coproc] calDeviceGetStatus(%d) returned %d",
devnum, retval
);
}
(*__calDeviceClose)(dev);
continue;
}
available_ram[i] = st.availLocalRAM*MEGA;
available_ram_unknown[i] = false;
(*__calDeviceClose)(dev);
return;
}
available_ram = st.availLocalRAM*MEGA;
(*__calDeviceClose)(dev);
}
bool COPROC_ATI::matches(OPENCL_DEVICE_PROP& OpenCLprop) {

View File

@ -1156,7 +1156,7 @@ static inline void confirm_current_assignment(
cp->type, j, rp->name
);
}
cp->available_ram[j] -= rp->avp->gpu_ram;
cp->available_ram_temp[j] -= rp->avp->gpu_ram;
}
}
@ -1169,22 +1169,19 @@ static inline bool get_fractional_assignment(
// try to assign an instance that's already fractionally assigned
//
for (i=0; i<cp->count; i++) {
if (cp->available_ram_unknown[i]) {
continue;
}
if (excluded(rp, cp, i)) {
continue;
}
if ((cp->usage[i] || cp->pending_usage[i])
&& (cp->usage[i] + cp->pending_usage[i] + usage <= 1)
) {
if (rp->avp->gpu_ram > cp->available_ram[i]) {
if (rp->avp->gpu_ram > cp->available_ram_temp[i]) {
defer_sched = true;
continue;
}
rp->coproc_indices[0] = i;
cp->usage[i] += usage;
cp->available_ram[i] -= rp->avp->gpu_ram;
cp->available_ram_temp[i] -= rp->avp->gpu_ram;
if (log_flags.coproc_debug) {
msg_printf(rp->project, MSG_INFO,
"[coproc] Assigning %f of %s instance %d to %s",
@ -1198,20 +1195,17 @@ static inline bool get_fractional_assignment(
// failing that, assign an unreserved instance
//
for (i=0; i<cp->count; i++) {
if (cp->available_ram_unknown[i]) {
continue;
}
if (excluded(rp, cp, i)) {
continue;
}
if (!cp->usage[i]) {
if (rp->avp->gpu_ram > cp->available_ram[i]) {
if (rp->avp->gpu_ram > cp->available_ram_temp[i]) {
defer_sched = true;
continue;
}
rp->coproc_indices[0] = i;
cp->usage[i] += usage;
cp->available_ram[i] -= rp->avp->gpu_ram;
cp->available_ram_temp[i] -= rp->avp->gpu_ram;
if (log_flags.coproc_debug) {
msg_printf(rp->project, MSG_INFO,
"[coproc] Assigning %f of %s free instance %d to %s",
@ -1241,14 +1235,11 @@ static inline bool get_integer_assignment(
//
int nfree = 0;
for (i=0; i<cp->count; i++) {
if (cp->available_ram_unknown[i]) {
continue;
}
if (excluded(rp, cp, i)) {
continue;
}
if (!cp->usage[i]) {
if (rp->avp->gpu_ram > cp->available_ram[i]) {
if (rp->avp->gpu_ram > cp->available_ram_temp[i]) {
defer_sched = true;
continue;
};
@ -1275,18 +1266,15 @@ static inline bool get_integer_assignment(
// assign non-pending instances first
for (i=0; i<cp->count; i++) {
if (cp->available_ram_unknown[i]) {
continue;
}
if (excluded(rp, cp, i)) {
continue;
}
if (!cp->usage[i]
&& !cp->pending_usage[i]
&& (rp->avp->gpu_ram <= cp->available_ram[i])
&& (rp->avp->gpu_ram <= cp->available_ram_temp[i])
) {
cp->usage[i] = 1;
cp->available_ram[i] -= rp->avp->gpu_ram;
cp->available_ram_temp[i] -= rp->avp->gpu_ram;
rp->coproc_indices[n++] = i;
if (log_flags.coproc_debug) {
msg_printf(rp->project, MSG_INFO,
@ -1301,17 +1289,14 @@ static inline bool get_integer_assignment(
// if needed, assign pending instances
for (i=0; i<cp->count; i++) {
if (cp->available_ram_unknown[i]) {
continue;
}
if (excluded(rp, cp, i)) {
continue;
}
if (!cp->usage[i]
&& (rp->avp->gpu_ram <= cp->available_ram[i])
&& (rp->avp->gpu_ram <= cp->available_ram_temp[i])
) {
cp->usage[i] = 1;
cp->available_ram[i] -= rp->avp->gpu_ram;
cp->available_ram_temp[i] -= rp->avp->gpu_ram;
rp->coproc_indices[n++] = i;
if (log_flags.coproc_debug) {
msg_printf(rp->project, MSG_INFO,
@ -1344,8 +1329,7 @@ static void copy_available_ram(COPROC& cp, const char* name) {
int rt = rsc_index(name);
if (rt > 0) {
for (int i=0; i<MAX_COPROC_INSTANCES; i++) {
coprocs.coprocs[rt].available_ram[i] = cp.available_ram[i];
coprocs.coprocs[rt].available_ram_unknown[i] = cp.available_ram_unknown[i];
coprocs.coprocs[rt].available_ram_temp[i] = cp.available_ram;
}
}
}

View File

@ -49,9 +49,9 @@ function show_totals() {
$users = number_format($users);
$hosts = number_format($hosts);
$teraflops = number_format($credit_day/200000, 2);
$petaflops = number_format($credit_day/200000000, 3);
echo tra("Active:")." $users ".tra("volunteers,")." $hosts ".tra("computers.
")." <br>".tra("24-hour average:")." $teraflops ".tra("TeraFLOPS.")."
")." <br>".tra("24-hour average:")." $petaflops ".tra("PetaFLOPS.")."
<hr size=1 width=\"80%\">
";
}

View File

@ -300,9 +300,9 @@ void COPROC_NVIDIA::description(char* buf) {
} else {
strcpy(vers, "unknown");
}
sprintf(buf, "%s (driver version %s, CUDA version %d, compute capability %d.%d, %.0fMB, %.0f GFLOPS peak)",
sprintf(buf, "%s (driver version %s, CUDA version %d, compute capability %d.%d, %.0fMB, %.0fMB available, %.0f GFLOPS peak)",
prop.name, vers, cuda_version, prop.major, prop.minor,
prop.totalGlobalMem/(1024.*1024.), peak_flops/1e9
prop.totalGlobalMem/MEGA, available_ram/MEGA, peak_flops/1e9
);
}
@ -663,7 +663,7 @@ int COPROC_ATI::parse(XML_PARSER& xp) {
}
void COPROC_ATI::description(char* buf) {
sprintf(buf, "%s (CAL version %s, %.0fMB, %.0f GFLOPS peak)",
name, version, attribs.localRAM/1024.*1024., peak_flops/1.e9
sprintf(buf, "%s (CAL version %s, %.0fMB, %.0fMB available, %.0f GFLOPS peak)",
name, version, attribs.localRAM/MEGA, available_ram/MEGA, peak_flops/1.e9
);
}

View File

@ -95,8 +95,9 @@ struct COPROC_REQ {
int parse(XML_PARSER&);
};
// For now, there will be some duplication between the values present in
// the OPENCL_DEVICE_PROP struct and the NVIDA and / or ATI structs
// For now, there will be some duplication between the values in
// the OPENCL_DEVICE_PROP struct and the NVIDIA/ATI structs
//
struct OPENCL_DEVICE_PROP {
cl_device_id device_id;
char name[256]; // Device name
@ -159,10 +160,9 @@ struct COPROC {
int opencl_device_count;
bool running_graphics_app[MAX_COPROC_INSTANCES];
// is this GPU running a graphics app (NVIDIA only)
double available_ram[MAX_COPROC_INSTANCES];
bool available_ram_unknown[MAX_COPROC_INSTANCES];
// couldn't get available RAM; don't start new apps on this instance
double available_ram_fake[MAX_COPROC_INSTANCES];
double available_ram;
double available_ram_temp[MAX_COPROC_INSTANCES];
// used during job scheduling
double last_print_time;
@ -189,13 +189,11 @@ struct COPROC {
req_instances = 0;
opencl_device_count = 0;
estimated_delay = 0;
available_ram = 0;
for (int i=0; i<MAX_COPROC_INSTANCES; i++) {
device_nums[i] = 0;
opencl_device_ids[i] = 0;
running_graphics_app[i] = true;
available_ram[i] = 0;
available_ram_fake[i] = 0;
available_ram_unknown[i] = true;
}
memset(&opencl_prop, 0, sizeof(opencl_prop));
}
@ -212,7 +210,6 @@ struct COPROC {
COPROC() {
clear();
}
void print_available_ram();
};
// based on cudaDeviceProp from /usr/local/cuda/include/driver_types.h
@ -285,7 +282,7 @@ struct COPROC_NVIDIA : public COPROC {
bool check_running_graphics_app();
bool matches(OPENCL_DEVICE_PROP& OpenCLprop);
void fake(int driver_version, double ram, int count);
void fake(int driver_version, double ram, double avail_ram, int count);
};
@ -318,7 +315,7 @@ struct COPROC_ATI : public COPROC {
// clock is in MHz
peak_flops = (x>0)?x:5e10;
}
void fake(double, int);
void fake(double ram, double avail_ram, int);
};
struct COPROCS {

View File

@ -15,8 +15,7 @@
// You should have received a copy of the GNU Lesser General Public License
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
// A sample validator that grants credit to any result whose CPU time is above
// a certain minimum
// A sample validator that accepts all results
#include <cstdlib>
#include "config.h"
@ -24,18 +23,12 @@
using std::vector;
static const double MIN_CPU_TIME = 0;
int init_result(RESULT& /*result*/, void*& /*data*/) {
int init_result(RESULT&, void*&) {
return 0;
}
int compare_results(
RESULT & r1, void* /*data1*/,
RESULT const& r2, void* /*data2*/,
bool& match
) {
match = (r1.cpu_time >= MIN_CPU_TIME && r2.cpu_time >= MIN_CPU_TIME);
int compare_results(RESULT&, void*, RESULT const&, void*, bool& match) {
match = true;
return 0;
}