mirror of https://github.com/BOINC/boinc.git
client: fix work-fetch bug that can cause idle GPUs when use exclusions
Round-robin simulation, among other things, creates a bitmap "sim_excluded_instances" of instances that are idle because of CPU exclusions. There was a problem in how this was computed; in the situation where there are fewer jobs than GPU instances it could fail to set any bits, so no work fetch would happen. My solution is a bit of a kludge, but should work in most cases. The long-term solution is to treat GPU instances separately, eliminating the need for GPU exclusions.
This commit is contained in:
parent
8caa9bc444
commit
424b8c4034
|
@ -60,6 +60,23 @@ inline void rsc_string(RESULT* rp, char* buf) {
|
|||
}
|
||||
}
|
||||
|
||||
// set "nused" bits of the source bitmap in the dest bitmap
|
||||
//
|
||||
static inline void set_bits(int src, double nused, int& dst) {
|
||||
// if all bits are already set, we're done
|
||||
//
|
||||
if (src&dst == dst) return;
|
||||
int bit = 1;
|
||||
for (int i=0; i<32; i++) {
|
||||
if (nused <= 0) break;
|
||||
if (bit & src) {
|
||||
dst |= bit;
|
||||
nused -= 1;
|
||||
}
|
||||
bit <<= 1;
|
||||
}
|
||||
}
|
||||
|
||||
// this is here (rather than rr_sim.h) because its inline functions
|
||||
// refer to RESULT
|
||||
//
|
||||
|
@ -74,8 +91,12 @@ struct RR_SIM {
|
|||
int rt = rp->avp->gpu_usage.rsc_type;
|
||||
if (rt) {
|
||||
rsc_work_fetch[rt].sim_nused += rp->avp->gpu_usage.usage;
|
||||
rsc_work_fetch[rt].sim_used_instances |= rp->app->non_excluded_instances[rt];
|
||||
p->rsc_pwf[rt].sim_nused += rp->avp->gpu_usage.usage;
|
||||
set_bits(
|
||||
rp->app->non_excluded_instances[rt],
|
||||
p->rsc_pwf[rt].sim_nused,
|
||||
rsc_work_fetch[rt].sim_used_instances
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1261,7 +1261,7 @@ void get_app_params() {
|
|||
// if missing app params, fill in defaults
|
||||
//
|
||||
if (!app->fpops_est) {
|
||||
app->fpops_est = 3600e9;
|
||||
app->fpops_est = 3600e11;
|
||||
}
|
||||
if (!app->latency_bound) {
|
||||
app->latency_bound = 864000;
|
||||
|
|
Loading…
Reference in New Issue