client: fix work-fetch bug that can cause idle GPUs when use exclusions

Round-robin simulation, among other things, creates a bitmap
"sim_excluded_instances" of instances that are idle because of CPU exclusions.
There was a problem in how this was computed;
in the situation where there are fewer jobs than GPU instances
it could fail to set any bits, so no work fetch would happen.

My solution is a bit of a kludge, but should work in most cases.
The long-term solution is to treat GPU instances separately,
eliminating the need for GPU exclusions.
This commit is contained in:
David Anderson 2013-06-08 16:25:53 -07:00
parent 8caa9bc444
commit 424b8c4034
2 changed files with 23 additions and 2 deletions

View File

@ -60,6 +60,23 @@ inline void rsc_string(RESULT* rp, char* buf) {
}
}
// set "nused" bits of the source bitmap in the dest bitmap
//
static inline void set_bits(int src, double nused, int& dst) {
// if all bits are already set, we're done
//
if (src&dst == dst) return;
int bit = 1;
for (int i=0; i<32; i++) {
if (nused <= 0) break;
if (bit & src) {
dst |= bit;
nused -= 1;
}
bit <<= 1;
}
}
// this is here (rather than rr_sim.h) because its inline functions
// refer to RESULT
//
@ -74,8 +91,12 @@ struct RR_SIM {
int rt = rp->avp->gpu_usage.rsc_type;
if (rt) {
rsc_work_fetch[rt].sim_nused += rp->avp->gpu_usage.usage;
rsc_work_fetch[rt].sim_used_instances |= rp->app->non_excluded_instances[rt];
p->rsc_pwf[rt].sim_nused += rp->avp->gpu_usage.usage;
set_bits(
rp->app->non_excluded_instances[rt],
p->rsc_pwf[rt].sim_nused,
rsc_work_fetch[rt].sim_used_instances
);
}
}

View File

@ -1261,7 +1261,7 @@ void get_app_params() {
// if missing app params, fill in defaults
//
if (!app->fpops_est) {
app->fpops_est = 3600e9;
app->fpops_est = 3600e11;
}
if (!app->latency_bound) {
app->latency_bound = 864000;