mirror of https://github.com/BOINC/boinc.git
client: fix work-fetch bug that can cause idle GPUs when use exclusions
Round-robin simulation, among other things, creates a bitmap "sim_excluded_instances" of instances that are idle because of CPU exclusions. There was a problem in how this was computed; in the situation where there are fewer jobs than GPU instances it could fail to set any bits, so no work fetch would happen. My solution is a bit of a kludge, but should work in most cases. The long-term solution is to treat GPU instances separately, eliminating the need for GPU exclusions.
This commit is contained in:
parent
8caa9bc444
commit
424b8c4034
|
@ -60,6 +60,23 @@ inline void rsc_string(RESULT* rp, char* buf) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// set "nused" bits of the source bitmap in the dest bitmap
|
||||||
|
//
|
||||||
|
static inline void set_bits(int src, double nused, int& dst) {
|
||||||
|
// if all bits are already set, we're done
|
||||||
|
//
|
||||||
|
if (src&dst == dst) return;
|
||||||
|
int bit = 1;
|
||||||
|
for (int i=0; i<32; i++) {
|
||||||
|
if (nused <= 0) break;
|
||||||
|
if (bit & src) {
|
||||||
|
dst |= bit;
|
||||||
|
nused -= 1;
|
||||||
|
}
|
||||||
|
bit <<= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// this is here (rather than rr_sim.h) because its inline functions
|
// this is here (rather than rr_sim.h) because its inline functions
|
||||||
// refer to RESULT
|
// refer to RESULT
|
||||||
//
|
//
|
||||||
|
@ -74,8 +91,12 @@ struct RR_SIM {
|
||||||
int rt = rp->avp->gpu_usage.rsc_type;
|
int rt = rp->avp->gpu_usage.rsc_type;
|
||||||
if (rt) {
|
if (rt) {
|
||||||
rsc_work_fetch[rt].sim_nused += rp->avp->gpu_usage.usage;
|
rsc_work_fetch[rt].sim_nused += rp->avp->gpu_usage.usage;
|
||||||
rsc_work_fetch[rt].sim_used_instances |= rp->app->non_excluded_instances[rt];
|
|
||||||
p->rsc_pwf[rt].sim_nused += rp->avp->gpu_usage.usage;
|
p->rsc_pwf[rt].sim_nused += rp->avp->gpu_usage.usage;
|
||||||
|
set_bits(
|
||||||
|
rp->app->non_excluded_instances[rt],
|
||||||
|
p->rsc_pwf[rt].sim_nused,
|
||||||
|
rsc_work_fetch[rt].sim_used_instances
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1261,7 +1261,7 @@ void get_app_params() {
|
||||||
// if missing app params, fill in defaults
|
// if missing app params, fill in defaults
|
||||||
//
|
//
|
||||||
if (!app->fpops_est) {
|
if (!app->fpops_est) {
|
||||||
app->fpops_est = 3600e9;
|
app->fpops_est = 3600e11;
|
||||||
}
|
}
|
||||||
if (!app->latency_bound) {
|
if (!app->latency_bound) {
|
||||||
app->latency_bound = 864000;
|
app->latency_bound = 864000;
|
||||||
|
|
Loading…
Reference in New Issue