mirror of https://github.com/BOINC/boinc.git
- client: change in the use of GPU available RAM:
- measure the available RAM of each GPU when BOINC starts up. If this fails, set available = physical. Show available RAM in startup messages. - use available RAM rather than physical RAM in selecting the "best" GPU instance - report available RAM to the scheduler TODO: change the scheduler to use available rather than physical if it's reported svn path=/trunk/boinc/; revision=24210
This commit is contained in:
parent
9e1cafbf4d
commit
7411dd60aa
|
@ -6032,3 +6032,23 @@ Rom 14 Sept 2011
|
|||
|
||||
client/
|
||||
hostinfo_win.cpp
|
||||
|
||||
David 14 Sept 2011
|
||||
- client: change in the use of GPU available RAM:
|
||||
- measure the available RAM of each GPU when BOINC starts up.
|
||||
If this fails, set available = physical.
|
||||
Show available RAM in startup messages.
|
||||
- use available RAM rather than physical RAM in selecting
|
||||
the "best" GPU instance
|
||||
- report available RAM to the scheduler
|
||||
TODO: change the scheduler to use available rather than physical
|
||||
if it's reported
|
||||
|
||||
sched/
|
||||
sample_trivial_validator.cpp
|
||||
lib/
|
||||
coproc.cpp,h
|
||||
client/
|
||||
client_state.cpp
|
||||
coproc_detect.cpp
|
||||
cpu_sched.cpp
|
||||
|
|
|
@ -369,15 +369,11 @@ int CLIENT_STATE::init() {
|
|||
}
|
||||
#if 0
|
||||
msg_printf(NULL, MSG_INFO, "Faking an NVIDIA GPU");
|
||||
coprocs.nvidia.fake(18000, 256*MEGA, 2);
|
||||
coprocs.nvidia.available_ram_fake[0] = 256*MEGA;
|
||||
coprocs.nvidia.available_ram_fake[1] = 192*MEGA;
|
||||
coprocs.nvidia.fake(18000, 256*MEGA, 192*MEGA, 2);
|
||||
#endif
|
||||
#if 0
|
||||
msg_printf(NULL, MSG_INFO, "Faking an ATI GPU");
|
||||
coprocs.ati.fake(512*MEGA, 2);
|
||||
coprocs.ati.available_ram_fake[0] = 256*MEGA;
|
||||
coprocs.ati.available_ram_fake[1] = 192*MEGA;
|
||||
coprocs.ati.fake(512*MEGA, 256*MEGA, 2);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -386,7 +382,6 @@ int CLIENT_STATE::init() {
|
|||
msg_printf(NULL, MSG_INFO, "NVIDIA GPU info taken from cc_config.xml");
|
||||
} else {
|
||||
coprocs.add(coprocs.nvidia);
|
||||
coprocs.nvidia.print_available_ram();
|
||||
}
|
||||
}
|
||||
if (coprocs.have_ati()) {
|
||||
|
@ -394,7 +389,6 @@ int CLIENT_STATE::init() {
|
|||
msg_printf(NULL, MSG_INFO, "ATI GPU info taken from cc_config.xml");
|
||||
} else {
|
||||
coprocs.add(coprocs.ati);
|
||||
coprocs.ati.print_available_ram();
|
||||
}
|
||||
}
|
||||
host_info._coprocs = coprocs;
|
||||
|
|
|
@ -101,24 +101,6 @@ cl_int (*__clGetDeviceInfo)(cl_device_id /* device */,
|
|||
|
||||
#endif
|
||||
|
||||
void COPROC::print_available_ram() {
|
||||
for (int i=0; i<count; i++) {
|
||||
if (available_ram_unknown[i]) {
|
||||
msg_printf(0, MSG_INFO,
|
||||
"[coproc] %s device %d: available RAM unknown",
|
||||
type, device_nums[i]
|
||||
);
|
||||
} else {
|
||||
msg_printf(0, MSG_INFO,
|
||||
"[coproc] %s device %d: available RAM %d MB",
|
||||
type, device_nums[i],
|
||||
(int)(available_ram[i]/MEGA)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//TODO: Determine how we want to compare OpenCL devices - this is only a placeholder
|
||||
// return 1/-1/0 if device 1 is more/less/same capable than device 2.
|
||||
// factors (decreasing priority):
|
||||
|
@ -614,7 +596,7 @@ void COPROCS::get(
|
|||
// factors (decreasing priority):
|
||||
// - compute capability
|
||||
// - software version
|
||||
// - memory
|
||||
// - available memory
|
||||
// - speed
|
||||
//
|
||||
// If "loose", ignore FLOPS and tolerate small memory diff
|
||||
|
@ -627,12 +609,12 @@ int nvidia_compare(COPROC_NVIDIA& c1, COPROC_NVIDIA& c2, bool loose) {
|
|||
if (c1.cuda_version > c2.cuda_version) return 1;
|
||||
if (c1.cuda_version < c2.cuda_version) return -1;
|
||||
if (loose) {
|
||||
if (c1.prop.totalGlobalMem > 1.4*c2.prop.totalGlobalMem) return 1;
|
||||
if (c1.prop.totalGlobalMem < .7* c2.prop.totalGlobalMem) return -1;
|
||||
if (c1.available_ram> 1.4*c2.available_ram) return 1;
|
||||
if (c1.available_ram < .7* c2.available_ram) return -1;
|
||||
return 0;
|
||||
}
|
||||
if (c1.prop.totalGlobalMem > c2.prop.totalGlobalMem) return 1;
|
||||
if (c1.prop.totalGlobalMem < c2.prop.totalGlobalMem) return -1;
|
||||
if (c1.available_ram > c2.available_ram) return 1;
|
||||
if (c1.available_ram < c2.available_ram) return -1;
|
||||
double s1 = c1.peak_flops;
|
||||
double s2 = c2.peak_flops;
|
||||
if (s1 > s2) return 1;
|
||||
|
@ -944,12 +926,15 @@ void COPROC_NVIDIA::get(
|
|||
|
||||
// fake a NVIDIA GPU (for debugging)
|
||||
//
|
||||
void COPROC_NVIDIA::fake(int driver_version, double ram, int n) {
|
||||
void COPROC_NVIDIA::fake(
|
||||
int driver_version, double ram, double avail_ram, int n
|
||||
) {
|
||||
strcpy(type, GPU_TYPE_NVIDIA);
|
||||
count = n;
|
||||
for (int i=0; i<count; i++) {
|
||||
device_nums[i] = i;
|
||||
}
|
||||
available_ram = avail_ram;
|
||||
display_driver_version = driver_version;
|
||||
cuda_version = 2020;
|
||||
strcpy(prop.name, "Fake NVIDIA GPU");
|
||||
|
@ -974,8 +959,7 @@ void COPROC_NVIDIA::fake(int driver_version, double ram, int n) {
|
|||
set_peak_flops();
|
||||
}
|
||||
|
||||
// See how much RAM is available on each GPU.
|
||||
// If this fails, set "available_ram_unknown"
|
||||
// See how much RAM is available on this GPU.
|
||||
//
|
||||
void COPROC_NVIDIA::get_available_ram() {
|
||||
int retval;
|
||||
|
@ -983,8 +967,7 @@ void COPROC_NVIDIA::get_available_ram() {
|
|||
int device;
|
||||
void* ctx;
|
||||
|
||||
available_ram[0] = 0;
|
||||
available_ram_unknown[0] = true;
|
||||
available_ram = prop.dtotalGlobalMem;
|
||||
retval = (*__cuDeviceGet)(&device, device_num);
|
||||
if (retval) {
|
||||
if (log_flags.coproc_debug) {
|
||||
|
@ -1014,8 +997,7 @@ void COPROC_NVIDIA::get_available_ram() {
|
|||
return;
|
||||
}
|
||||
(*__cuCtxDestroy)(ctx);
|
||||
available_ram[0] = (double) memfree;
|
||||
available_ram_unknown[0] = false;
|
||||
available_ram = (double) memfree;
|
||||
}
|
||||
|
||||
// check whether each GPU is running a graphics app (assume yes)
|
||||
|
@ -1429,11 +1411,12 @@ void COPROC_ATI::get(
|
|||
}
|
||||
}
|
||||
|
||||
void COPROC_ATI::fake(double ram, int n) {
|
||||
void COPROC_ATI::fake(double ram, double avail_ram, int n) {
|
||||
strcpy(type, GPU_TYPE_ATI);
|
||||
strcpy(version, "1.4.3");
|
||||
strcpy(name, "foobar");
|
||||
count = n;
|
||||
available_ram = avail_ram;
|
||||
memset(&attribs, 0, sizeof(attribs));
|
||||
memset(&info, 0, sizeof(info));
|
||||
attribs.localRAM = (int)(ram/MEGA);
|
||||
|
@ -1446,52 +1429,39 @@ void COPROC_ATI::fake(double ram, int n) {
|
|||
set_peak_flops();
|
||||
}
|
||||
|
||||
// get available RAM of ATI GPUs
|
||||
// NOTE: last time we checked, repeated calls to this crash the driver
|
||||
// get available RAM of ATI GPU
|
||||
//
|
||||
void COPROC_ATI::get_available_ram() {
|
||||
CALdevicestatus st;
|
||||
CALdevice dev;
|
||||
int i, retval;
|
||||
int retval;
|
||||
|
||||
available_ram = attribs.localRAM*MEGA;
|
||||
|
||||
st.struct_size = sizeof(CALdevicestatus);
|
||||
|
||||
// avoid crash if faked GPU
|
||||
if (!__calInit) {
|
||||
for (i=0; i<count; i++) {
|
||||
available_ram[i] = available_ram_fake[i];
|
||||
available_ram_unknown[i] = false;
|
||||
retval = (*__calDeviceOpen)(&dev, device_num);
|
||||
if (retval) {
|
||||
if (log_flags.coproc_debug) {
|
||||
msg_printf(0, MSG_INFO,
|
||||
"[coproc] calDeviceOpen(%d) returned %d", device_num, retval
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
for (i=0; i<count; i++) {
|
||||
available_ram[i] = 0;
|
||||
available_ram_unknown[i] = true;
|
||||
int devnum = device_nums[i];
|
||||
retval = (*__calDeviceOpen)(&dev, devnum);
|
||||
if (retval) {
|
||||
if (log_flags.coproc_debug) {
|
||||
msg_printf(0, MSG_INFO,
|
||||
"[coproc] calDeviceOpen(%d) returned %d", devnum, retval
|
||||
);
|
||||
}
|
||||
continue;
|
||||
retval = (*__calDeviceGetStatus)(&st, dev);
|
||||
if (retval) {
|
||||
if (log_flags.coproc_debug) {
|
||||
msg_printf(0, MSG_INFO,
|
||||
"[coproc] calDeviceGetStatus(%d) returned %d",
|
||||
device_num, retval
|
||||
);
|
||||
}
|
||||
retval = (*__calDeviceGetStatus)(&st, dev);
|
||||
if (retval) {
|
||||
if (log_flags.coproc_debug) {
|
||||
msg_printf(0, MSG_INFO,
|
||||
"[coproc] calDeviceGetStatus(%d) returned %d",
|
||||
devnum, retval
|
||||
);
|
||||
}
|
||||
(*__calDeviceClose)(dev);
|
||||
continue;
|
||||
}
|
||||
available_ram[i] = st.availLocalRAM*MEGA;
|
||||
available_ram_unknown[i] = false;
|
||||
(*__calDeviceClose)(dev);
|
||||
return;
|
||||
}
|
||||
available_ram = st.availLocalRAM*MEGA;
|
||||
(*__calDeviceClose)(dev);
|
||||
}
|
||||
|
||||
bool COPROC_ATI::matches(OPENCL_DEVICE_PROP& OpenCLprop) {
|
||||
|
|
|
@ -1156,7 +1156,7 @@ static inline void confirm_current_assignment(
|
|||
cp->type, j, rp->name
|
||||
);
|
||||
}
|
||||
cp->available_ram[j] -= rp->avp->gpu_ram;
|
||||
cp->available_ram_temp[j] -= rp->avp->gpu_ram;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1169,22 +1169,19 @@ static inline bool get_fractional_assignment(
|
|||
// try to assign an instance that's already fractionally assigned
|
||||
//
|
||||
for (i=0; i<cp->count; i++) {
|
||||
if (cp->available_ram_unknown[i]) {
|
||||
continue;
|
||||
}
|
||||
if (excluded(rp, cp, i)) {
|
||||
continue;
|
||||
}
|
||||
if ((cp->usage[i] || cp->pending_usage[i])
|
||||
&& (cp->usage[i] + cp->pending_usage[i] + usage <= 1)
|
||||
) {
|
||||
if (rp->avp->gpu_ram > cp->available_ram[i]) {
|
||||
if (rp->avp->gpu_ram > cp->available_ram_temp[i]) {
|
||||
defer_sched = true;
|
||||
continue;
|
||||
}
|
||||
rp->coproc_indices[0] = i;
|
||||
cp->usage[i] += usage;
|
||||
cp->available_ram[i] -= rp->avp->gpu_ram;
|
||||
cp->available_ram_temp[i] -= rp->avp->gpu_ram;
|
||||
if (log_flags.coproc_debug) {
|
||||
msg_printf(rp->project, MSG_INFO,
|
||||
"[coproc] Assigning %f of %s instance %d to %s",
|
||||
|
@ -1198,20 +1195,17 @@ static inline bool get_fractional_assignment(
|
|||
// failing that, assign an unreserved instance
|
||||
//
|
||||
for (i=0; i<cp->count; i++) {
|
||||
if (cp->available_ram_unknown[i]) {
|
||||
continue;
|
||||
}
|
||||
if (excluded(rp, cp, i)) {
|
||||
continue;
|
||||
}
|
||||
if (!cp->usage[i]) {
|
||||
if (rp->avp->gpu_ram > cp->available_ram[i]) {
|
||||
if (rp->avp->gpu_ram > cp->available_ram_temp[i]) {
|
||||
defer_sched = true;
|
||||
continue;
|
||||
}
|
||||
rp->coproc_indices[0] = i;
|
||||
cp->usage[i] += usage;
|
||||
cp->available_ram[i] -= rp->avp->gpu_ram;
|
||||
cp->available_ram_temp[i] -= rp->avp->gpu_ram;
|
||||
if (log_flags.coproc_debug) {
|
||||
msg_printf(rp->project, MSG_INFO,
|
||||
"[coproc] Assigning %f of %s free instance %d to %s",
|
||||
|
@ -1241,14 +1235,11 @@ static inline bool get_integer_assignment(
|
|||
//
|
||||
int nfree = 0;
|
||||
for (i=0; i<cp->count; i++) {
|
||||
if (cp->available_ram_unknown[i]) {
|
||||
continue;
|
||||
}
|
||||
if (excluded(rp, cp, i)) {
|
||||
continue;
|
||||
}
|
||||
if (!cp->usage[i]) {
|
||||
if (rp->avp->gpu_ram > cp->available_ram[i]) {
|
||||
if (rp->avp->gpu_ram > cp->available_ram_temp[i]) {
|
||||
defer_sched = true;
|
||||
continue;
|
||||
};
|
||||
|
@ -1275,18 +1266,15 @@ static inline bool get_integer_assignment(
|
|||
// assign non-pending instances first
|
||||
|
||||
for (i=0; i<cp->count; i++) {
|
||||
if (cp->available_ram_unknown[i]) {
|
||||
continue;
|
||||
}
|
||||
if (excluded(rp, cp, i)) {
|
||||
continue;
|
||||
}
|
||||
if (!cp->usage[i]
|
||||
&& !cp->pending_usage[i]
|
||||
&& (rp->avp->gpu_ram <= cp->available_ram[i])
|
||||
&& (rp->avp->gpu_ram <= cp->available_ram_temp[i])
|
||||
) {
|
||||
cp->usage[i] = 1;
|
||||
cp->available_ram[i] -= rp->avp->gpu_ram;
|
||||
cp->available_ram_temp[i] -= rp->avp->gpu_ram;
|
||||
rp->coproc_indices[n++] = i;
|
||||
if (log_flags.coproc_debug) {
|
||||
msg_printf(rp->project, MSG_INFO,
|
||||
|
@ -1301,17 +1289,14 @@ static inline bool get_integer_assignment(
|
|||
// if needed, assign pending instances
|
||||
|
||||
for (i=0; i<cp->count; i++) {
|
||||
if (cp->available_ram_unknown[i]) {
|
||||
continue;
|
||||
}
|
||||
if (excluded(rp, cp, i)) {
|
||||
continue;
|
||||
}
|
||||
if (!cp->usage[i]
|
||||
&& (rp->avp->gpu_ram <= cp->available_ram[i])
|
||||
&& (rp->avp->gpu_ram <= cp->available_ram_temp[i])
|
||||
) {
|
||||
cp->usage[i] = 1;
|
||||
cp->available_ram[i] -= rp->avp->gpu_ram;
|
||||
cp->available_ram_temp[i] -= rp->avp->gpu_ram;
|
||||
rp->coproc_indices[n++] = i;
|
||||
if (log_flags.coproc_debug) {
|
||||
msg_printf(rp->project, MSG_INFO,
|
||||
|
@ -1344,8 +1329,7 @@ static void copy_available_ram(COPROC& cp, const char* name) {
|
|||
int rt = rsc_index(name);
|
||||
if (rt > 0) {
|
||||
for (int i=0; i<MAX_COPROC_INSTANCES; i++) {
|
||||
coprocs.coprocs[rt].available_ram[i] = cp.available_ram[i];
|
||||
coprocs.coprocs[rt].available_ram_unknown[i] = cp.available_ram_unknown[i];
|
||||
coprocs.coprocs[rt].available_ram_temp[i] = cp.available_ram;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -49,9 +49,9 @@ function show_totals() {
|
|||
$users = number_format($users);
|
||||
$hosts = number_format($hosts);
|
||||
|
||||
$teraflops = number_format($credit_day/200000, 2);
|
||||
$petaflops = number_format($credit_day/200000000, 3);
|
||||
echo tra("Active:")." $users ".tra("volunteers,")." $hosts ".tra("computers.
|
||||
")." <br>".tra("24-hour average:")." $teraflops ".tra("TeraFLOPS.")."
|
||||
")." <br>".tra("24-hour average:")." $petaflops ".tra("PetaFLOPS.")."
|
||||
<hr size=1 width=\"80%\">
|
||||
";
|
||||
}
|
||||
|
|
|
@ -300,9 +300,9 @@ void COPROC_NVIDIA::description(char* buf) {
|
|||
} else {
|
||||
strcpy(vers, "unknown");
|
||||
}
|
||||
sprintf(buf, "%s (driver version %s, CUDA version %d, compute capability %d.%d, %.0fMB, %.0f GFLOPS peak)",
|
||||
sprintf(buf, "%s (driver version %s, CUDA version %d, compute capability %d.%d, %.0fMB, %.0fMB available, %.0f GFLOPS peak)",
|
||||
prop.name, vers, cuda_version, prop.major, prop.minor,
|
||||
prop.totalGlobalMem/(1024.*1024.), peak_flops/1e9
|
||||
prop.totalGlobalMem/MEGA, available_ram/MEGA, peak_flops/1e9
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -663,7 +663,7 @@ int COPROC_ATI::parse(XML_PARSER& xp) {
|
|||
}
|
||||
|
||||
void COPROC_ATI::description(char* buf) {
|
||||
sprintf(buf, "%s (CAL version %s, %.0fMB, %.0f GFLOPS peak)",
|
||||
name, version, attribs.localRAM/1024.*1024., peak_flops/1.e9
|
||||
sprintf(buf, "%s (CAL version %s, %.0fMB, %.0fMB available, %.0f GFLOPS peak)",
|
||||
name, version, attribs.localRAM/MEGA, available_ram/MEGA, peak_flops/1.e9
|
||||
);
|
||||
}
|
||||
|
|
21
lib/coproc.h
21
lib/coproc.h
|
@ -95,8 +95,9 @@ struct COPROC_REQ {
|
|||
int parse(XML_PARSER&);
|
||||
};
|
||||
|
||||
// For now, there will be some duplication between the values present in
|
||||
// the OPENCL_DEVICE_PROP struct and the NVIDA and / or ATI structs
|
||||
// For now, there will be some duplication between the values in
|
||||
// the OPENCL_DEVICE_PROP struct and the NVIDIA/ATI structs
|
||||
//
|
||||
struct OPENCL_DEVICE_PROP {
|
||||
cl_device_id device_id;
|
||||
char name[256]; // Device name
|
||||
|
@ -159,10 +160,9 @@ struct COPROC {
|
|||
int opencl_device_count;
|
||||
bool running_graphics_app[MAX_COPROC_INSTANCES];
|
||||
// is this GPU running a graphics app (NVIDIA only)
|
||||
double available_ram[MAX_COPROC_INSTANCES];
|
||||
bool available_ram_unknown[MAX_COPROC_INSTANCES];
|
||||
// couldn't get available RAM; don't start new apps on this instance
|
||||
double available_ram_fake[MAX_COPROC_INSTANCES];
|
||||
double available_ram;
|
||||
double available_ram_temp[MAX_COPROC_INSTANCES];
|
||||
// used during job scheduling
|
||||
|
||||
double last_print_time;
|
||||
|
||||
|
@ -189,13 +189,11 @@ struct COPROC {
|
|||
req_instances = 0;
|
||||
opencl_device_count = 0;
|
||||
estimated_delay = 0;
|
||||
available_ram = 0;
|
||||
for (int i=0; i<MAX_COPROC_INSTANCES; i++) {
|
||||
device_nums[i] = 0;
|
||||
opencl_device_ids[i] = 0;
|
||||
running_graphics_app[i] = true;
|
||||
available_ram[i] = 0;
|
||||
available_ram_fake[i] = 0;
|
||||
available_ram_unknown[i] = true;
|
||||
}
|
||||
memset(&opencl_prop, 0, sizeof(opencl_prop));
|
||||
}
|
||||
|
@ -212,7 +210,6 @@ struct COPROC {
|
|||
COPROC() {
|
||||
clear();
|
||||
}
|
||||
void print_available_ram();
|
||||
};
|
||||
|
||||
// based on cudaDeviceProp from /usr/local/cuda/include/driver_types.h
|
||||
|
@ -285,7 +282,7 @@ struct COPROC_NVIDIA : public COPROC {
|
|||
|
||||
bool check_running_graphics_app();
|
||||
bool matches(OPENCL_DEVICE_PROP& OpenCLprop);
|
||||
void fake(int driver_version, double ram, int count);
|
||||
void fake(int driver_version, double ram, double avail_ram, int count);
|
||||
|
||||
};
|
||||
|
||||
|
@ -318,7 +315,7 @@ struct COPROC_ATI : public COPROC {
|
|||
// clock is in MHz
|
||||
peak_flops = (x>0)?x:5e10;
|
||||
}
|
||||
void fake(double, int);
|
||||
void fake(double ram, double avail_ram, int);
|
||||
};
|
||||
|
||||
struct COPROCS {
|
||||
|
|
|
@ -15,8 +15,7 @@
|
|||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
// A sample validator that grants credit to any result whose CPU time is above
|
||||
// a certain minimum
|
||||
// A sample validator that accepts all results
|
||||
|
||||
#include <cstdlib>
|
||||
#include "config.h"
|
||||
|
@ -24,18 +23,12 @@
|
|||
|
||||
using std::vector;
|
||||
|
||||
static const double MIN_CPU_TIME = 0;
|
||||
|
||||
int init_result(RESULT& /*result*/, void*& /*data*/) {
|
||||
int init_result(RESULT&, void*&) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int compare_results(
|
||||
RESULT & r1, void* /*data1*/,
|
||||
RESULT const& r2, void* /*data2*/,
|
||||
bool& match
|
||||
) {
|
||||
match = (r1.cpu_time >= MIN_CPU_TIME && r2.cpu_time >= MIN_CPU_TIME);
|
||||
int compare_results(RESULT&, void*, RESULT const&, void*, bool& match) {
|
||||
match = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue