- client: change work fetch policy to avoid starving GPUs in situations where GPU exclusions are used. - client: fix bug in round-robin simulation when GPU exclusions are used.

Note: this fixes a major problem (starvation)
    with project-level GPU exclusion.
    However, project-level GPU exclusion interferes with most of
    the client's scheduling policies.
    E.g., round-robin simulation doesn't take GPU exclusion into account,
    and the resulting completion estimates and device shortfalls
    can be wrong by an order of magnitude.

    The only way I can see to fix this would be to model each
    GPU instance as a separate resource,
    and to associate each job with a particular GPU instance.
    This would be a sweeping change in both client and server.
This commit is contained in:
David Anderson 2012-10-20 00:43:44 -07:00 committed by Oliver Bock
parent 02213ba568
commit 777f1f11e8
9 changed files with 147 additions and 20 deletions

View File

@ -6274,3 +6274,29 @@ Rom 19 Oct 2012
uc2_graphics.vcproj
vboxwrapper.vcproj
wrapper.vcproj
David 19 Oct 2012
- client: change work fetch policy to avoid starving GPUs
in situations where GPU exclusions are used.
- client: fix bug in round-robin simulation when GPU exclusions are used.
Note: this fixes a major problem (starvation)
with project-level GPU exclusion.
However, project-level GPU exclusion interferes with most of
the client's scheduling policies.
E.g., round-robin simulation doesn't take GPU exclusion into account,
and the resulting completion estimates and device shortfalls
can be wrong by an order of magnitude.
The only way I can see to fix this would be to model each
GPU instance as a separate resource,
and to associate each job with a particular GPU instance.
This would be a sweeping change in both client and server.
client/
log_flags.cpp
project.cpp,h
rr_sim.cpp
work_fetch.cpp,h
lib/
coproc.h

View File

@ -531,6 +531,7 @@ void process_gpu_exclusions() {
for (int k=1; k<coprocs.n_rsc; k++) {
int n=0;
COPROC& cp = coprocs.coprocs[k];
p->rsc_pwf[k].non_excluded_instances = (1<<cp.count)-1; // all 1's
for (j=0; j<config.exclude_gpus.size(); j++) {
EXCLUDE_GPU& eg = config.exclude_gpus[j];
if (strcmp(eg.url.c_str(), p->master_url)) continue;
@ -539,14 +540,16 @@ void process_gpu_exclusions() {
if (eg.device_num >= 0) {
// exclusion may refer to nonexistent GPU
//
if (cp.device_num_exists(eg.device_num)) {
int ind = cp.device_num_index(eg.device_num);
if (ind >= 0) {
n++;
p->rsc_pwf[k].non_excluded_instances &= ~(1<<ind);
}
} else {
n = cp.count;
}
}
p->ncoprocs_excluded[k] = n;
p->rsc_pwf[k].ncoprocs_excluded = n;
}
}

View File

@ -419,7 +419,7 @@ int PROJECT::write_state(MIOFILE& out, bool gui_rpc) {
if (no_rsc_pref[j]) {
out.printf(" <no_rsc_pref>%s</no_rsc_pref>\n", rsc_name(j));
}
if (j>0 && gui_rpc && (ncoprocs_excluded[j] == rsc_work_fetch[j].ninstances)) {
if (j>0 && gui_rpc && (rsc_pwf[j].ncoprocs_excluded == rsc_work_fetch[j].ninstances)) {
out.printf(" <no_rsc_config>%s</no_rsc_config>\n", rsc_name(j));
}
}

View File

@ -246,8 +246,6 @@ struct PROJECT : PROJ_AM {
//
double rr_sim_cpu_share;
bool rr_sim_active;
int ncoprocs_excluded[MAX_RSC];
// number of excluded instances per processor type
bool operator<(const PROJECT& p) {
return sched_priority > p.sched_priority;
}

View File

@ -74,6 +74,7 @@ struct RR_SIM {
int rt = rp->avp->gpu_usage.rsc_type;
if (rt) {
rsc_work_fetch[rt].sim_nused += rp->avp->gpu_usage.usage;
rsc_work_fetch[rt].sim_used_instances |= p->rsc_pwf[rt].non_excluded_instances;
p->rsc_pwf[rt].sim_nused += rp->avp->gpu_usage.usage;
}
}
@ -169,10 +170,14 @@ void RR_SIM::init_pending_lists() {
}
}
// pick jobs to run; put them in "active" list.
// Pick jobs to run, putting them in "active" list.
// Simulate what the job scheduler would do:
// pick a job from the project P with highest scheduling priority,
// then adjust P's scheduling priority
// then adjust P's scheduling priority.
//
// This is called at the start of the simulation,
// and again each time a job finishes.
// In the latter case, some resources may be saturated.
//
void RR_SIM::pick_jobs_to_run(double reltime) {
active.clear();
@ -241,7 +246,18 @@ void RR_SIM::pick_jobs_to_run(double reltime) {
// check whether resource is saturated
//
if (rt) {
if (rsc_work_fetch[rt].sim_nused >= coprocs.coprocs[rt].count - p->ncoprocs_excluded[rt]) break;
if (rsc_work_fetch[rt].sim_nused >= coprocs.coprocs[rt].count) {
break;
}
// if a GPU isn't saturated but this project is using
// its max given exclusions, remove it from project heap
//
if (rsc_pwf.sim_nused >= coprocs.coprocs[rt].count - p->rsc_pwf[rt].ncoprocs_excluded) {
pop_heap(project_heap.begin(), project_heap.end());
project_heap.pop_back();
continue;
}
} else {
if (rsc_work_fetch[rt].sim_nused >= gstate.ncpus) break;
}
@ -255,7 +271,7 @@ void RR_SIM::pick_jobs_to_run(double reltime) {
pop_heap(project_heap.begin(), project_heap.end());
project_heap.pop_back();
} else if (!rp->rrsim_done) {
// Otherwise reshuffle the heap
// Otherwise reshuffle the project heap
//
make_heap(project_heap.begin(), project_heap.end());
}
@ -401,7 +417,9 @@ void RR_SIM::simulate() {
}
}
}
// adjust FLOPS left
// adjust FLOPS left of other active jobs
//
for (unsigned int i=0; i<active.size(); i++) {
rp = active[i];
rp->rrsim_flops_left -= rp->rrsim_flops*delta_t;
@ -464,6 +482,19 @@ void RR_SIM::simulate() {
sim_now += delta_t;
}
// identify GPU instances starved because of exclusions
//
for (int i=1; i<coprocs.n_rsc; i++) {
RSC_WORK_FETCH& rwf = rsc_work_fetch[i];
COPROC& cp = coprocs.coprocs[i];
int mask = (1<<cp.count)-1;
rwf.sim_excluded_instances = ~(rwf.sim_used_instances) & mask;
msg_printf(0, MSG_INFO,
"rsc %d: sim_used_inst %d mask %d sim_excluded_instances %d",
i, rwf.sim_used_instances, mask, rwf.sim_excluded_instances
);
}
// if simulation ends before end of buffer, take the tail into account
//
if (sim_now < buf_end) {

View File

@ -187,6 +187,7 @@ void RSC_WORK_FETCH::rr_init() {
deadline_missed_instances = 0;
saturated_time = 0;
busy_time_estimator.reset();
sim_used_instances = 0;
}
void RSC_WORK_FETCH::accumulate_shortfall(double d_time) {
@ -204,6 +205,7 @@ void RSC_WORK_FETCH::accumulate_shortfall(double d_time) {
void RSC_WORK_FETCH::update_saturated_time(double dt) {
double idle = ninstances - sim_nused;
//msg_printf(0, MSG_INFO, "update_saturated rsc %d idle %f dt %f", rsc_type, idle, dt);
if (idle < 1e-6) {
saturated_time = dt;
}
@ -222,6 +224,10 @@ static bool wacky_dcf(PROJECT* p) {
// If this resource is below min buffer level,
// return the highest-priority project that may have jobs for it.
//
// It the resource has instanced starved because of exclusions,
// return the highest-priority project that may have jobs
// and doesn't exclude those instances.
//
// If strict is true, enforce hysteresis and backoff rules
// (which are there to limit rate of scheduler RPCs).
// Otherwise, we're going to do a scheduler RPC anyway
@ -230,12 +236,21 @@ static bool wacky_dcf(PROJECT* p) {
//
PROJECT* RSC_WORK_FETCH::choose_project_hyst(bool strict) {
PROJECT* pbest = NULL;
bool buffer_low = true;
if (strict) {
if (saturated_time > gstate.work_buf_min()) return NULL;
if (saturated_time > gstate.work_buf_min()) buffer_low = false;
} else {
if (saturated_time > gstate.work_buf_total()) return NULL;
if (saturated_time > gstate.work_buf_total()) buffer_low = false;
}
if (saturated_time > gstate.work_buf_total()) return NULL;
if (log_flags.work_fetch_debug) {
msg_printf(0, MSG_INFO,
"[work_fetch] buffer_low: %s; sim_excluded_instances %d\n",
buffer_low?"yes":"no", sim_excluded_instances
);
}
if (!buffer_low && !sim_excluded_instances) return NULL;
for (unsigned i=0; i<gstate.projects.size(); i++) {
PROJECT* p = gstate.projects[i];
@ -270,11 +285,11 @@ PROJECT* RSC_WORK_FETCH::choose_project_hyst(bool strict) {
// computing shortfall etc. on a per-project basis
//
if (rsc_type) {
int n_not_excluded = ninstances - p->ncoprocs_excluded[rsc_type];
int n_not_excluded = ninstances - p->rsc_pwf[rsc_type].ncoprocs_excluded;
if (n_not_excluded == 0) {
continue;
}
if (p->ncoprocs_excluded[rsc_type]
if (p->rsc_pwf[rsc_type].ncoprocs_excluded
&& p->rsc_pwf[rsc_type].n_runnable_jobs > n_not_excluded
) {
continue;
@ -283,6 +298,16 @@ PROJECT* RSC_WORK_FETCH::choose_project_hyst(bool strict) {
RSC_PROJECT_WORK_FETCH& rpwf = project_state(p);
if (rpwf.anon_skip) continue;
// if we're sending work only because of exclusion starvation,
// make sure this project can use the starved instances
//
if (!buffer_low) {
if ((sim_excluded_instances & rpwf.non_excluded_instances) == 0) {
continue;
}
}
if (pbest) {
if (pbest->sched_priority > p->sched_priority) {
continue;
@ -292,7 +317,11 @@ PROJECT* RSC_WORK_FETCH::choose_project_hyst(bool strict) {
}
if (!pbest) return NULL;
work_fetch.clear_request();
if (buffer_low) {
work_fetch.set_all_requests_hyst(pbest, rsc_type);
} else {
set_request_excluded(pbest);
}
return pbest;
}
@ -442,6 +471,29 @@ void RSC_WORK_FETCH::set_request(PROJECT* p) {
}
}
// We're fetching work because some instances are starved because
// of exclusions.
// See how many N of these instances are not excluded for this project.
// Ask for N instances and for N*work_buf_min seconds.
//
void RSC_WORK_FETCH::set_request_excluded(PROJECT* p) {
RSC_PROJECT_WORK_FETCH& pwf = project_state(p);
int inst_mask = sim_excluded_instances & pwf.non_excluded_instances;
int n = 0;
for (int i=0; i<ninstances; i++) {
if ((i<<i) & inst_mask) {
n++;
}
}
req_instances = n;
if (p->resource_share == 0 || config.fetch_minimal_work) {
req_secs = 1;
} else {
req_secs = n*gstate.work_buf_total();
}
}
void RSC_WORK_FETCH::print_state(const char* name) {
msg_printf(0, MSG_INFO,
"[work_fetch] %s: shortfall %.2f nidle %.2f saturated %.2f busy %.2f",
@ -877,7 +929,7 @@ void WORK_FETCH::set_initial_work_request(PROJECT* p) {
rsc_work_fetch[i].req_secs = 1;
if (i) {
RSC_WORK_FETCH& rwf = rsc_work_fetch[i];
if (rwf.ninstances == p->ncoprocs_excluded[i]) {
if (rwf.ninstances == p->rsc_pwf[i].ncoprocs_excluded) {
rsc_work_fetch[i].req_secs = 0;
}
}

View File

@ -100,6 +100,11 @@ struct RSC_PROJECT_WORK_FETCH {
int n_runnable_jobs;
double sim_nused;
double nused_total; // sum of instances over all runnable jobs
int ncoprocs_excluded;
// number of excluded instances
int non_excluded_instances;
// bitmap of non-excluded instances
// (i.e. instances this project's jobs can run on)
int deadlines_missed;
int deadlines_missed_copy;
// copy of the above used during schedule_cpus()
@ -116,6 +121,8 @@ struct RSC_PROJECT_WORK_FETCH {
n_runnable_jobs = 0;
sim_nused = 0;
nused_total = 0;
ncoprocs_excluded = 0;
non_excluded_instances = 0;
deadlines_missed = 0;
deadlines_missed_copy = 0;
}
@ -201,6 +208,11 @@ struct RSC_WORK_FETCH {
// seconds of idle instances between now and now+work_buf_total()
double nidle_now;
double sim_nused;
int sim_used_instances;
// bitmap of instances used in simulation,
// taking into account GPU exclusions
int sim_excluded_instances;
// bitmap of instances not used (i.e. starved because of exclusion)
double total_fetchable_share;
// total RS of projects from which we could fetch jobs for this device
double saturated_time;
@ -241,6 +253,7 @@ struct RSC_WORK_FETCH {
void print_state(const char*);
void clear_request();
void set_request(PROJECT*);
void set_request_excluded(PROJECT*);
bool may_have_work(PROJECT*);
RSC_WORK_FETCH() {
rsc_type = 0;

View File

@ -339,6 +339,7 @@ function log_flag_boxes() {
return "
<input type=checkbox name=cpu_sched_debug> CPU scheduling debug
<br> <input type=checkbox name=rr_simulation> Round-robin simulation info
<br> <input type=checkbox name=rrsim_detail> Round-robin simulation details
<br> <input type=checkbox name=work_fetch_debug> Work fetch debug
";
}
@ -440,6 +441,9 @@ function simulation_action() {
if (post_str("rr_simulation", true)) {
$x .= "<rr_simulation/>\n";
}
if (post_str("rrsim_detail", true)) {
$x .= "<rrsim_detail/>\n";
}
if (post_str("work_fetch_debug", true)) {
$x .= "<work_fetch_debug/>\n";
}

View File

@ -264,11 +264,11 @@ struct COPROC {
COPROC() {
clear();
}
bool device_num_exists(int n) {
int device_num_index(int n) {
for (int i=0; i<count; i++) {
if (device_nums[i] == n) return true;
if (device_nums[i] == n) return i;
}
return false;
return -1;
}
void merge_opencl(
std::vector<OPENCL_DEVICE_PROP> &opencls,