From 777f1f11e8cdf9f61113e138ef5b935dbb234dec Mon Sep 17 00:00:00 2001 From: David Anderson Date: Sat, 20 Oct 2012 00:43:44 -0700 Subject: [PATCH] - client: change work fetch policy to avoid starving GPUs in situations where GPU exclusions are used. - client: fix bug in round-robin simulation when GPU exclusions are used. Note: this fixes a major problem (starvation) with project-level GPU exclusion. However, project-level GPU exclusion interferes with most of the client's scheduling policies. E.g., round-robin simulation doesn't take GPU exclusion into account, and the resulting completion estimates and device shortfalls can be wrong by an order of magnitude. The only way I can see to fix this would be to model each GPU instance as a separate resource, and to associate each job with a particular GPU instance. This would be a sweeping change in both client and server. --- checkin_notes | 26 +++++++++++++++++ client/log_flags.cpp | 7 +++-- client/project.cpp | 2 +- client/project.h | 2 -- client/rr_sim.cpp | 41 +++++++++++++++++++++++---- client/work_fetch.cpp | 66 ++++++++++++++++++++++++++++++++++++++----- client/work_fetch.h | 13 +++++++++ doc/sim/sim_web.php | 4 +++ lib/coproc.h | 6 ++-- 9 files changed, 147 insertions(+), 20 deletions(-) diff --git a/checkin_notes b/checkin_notes index c5799a7870..8378c30ae2 100644 --- a/checkin_notes +++ b/checkin_notes @@ -6274,3 +6274,29 @@ Rom 19 Oct 2012 uc2_graphics.vcproj vboxwrapper.vcproj wrapper.vcproj + +David 19 Oct 2012 + - client: change work fetch policy to avoid starving GPUs + in situations where GPU exclusions are used. + - client: fix bug in round-robin simulation when GPU exclusions are used. + + Note: this fixes a major problem (starvation) + with project-level GPU exclusion. + However, project-level GPU exclusion interferes with most of + the client's scheduling policies. + E.g., round-robin simulation doesn't take GPU exclusion into account, + and the resulting completion estimates and device shortfalls + can be wrong by an order of magnitude. + + The only way I can see to fix this would be to model each + GPU instance as a separate resource, + and to associate each job with a particular GPU instance. + This would be a sweeping change in both client and server. + + client/ + log_flags.cpp + project.cpp,h + rr_sim.cpp + work_fetch.cpp,h + lib/ + coproc.h diff --git a/client/log_flags.cpp b/client/log_flags.cpp index b4ed996861..c548b2be16 100644 --- a/client/log_flags.cpp +++ b/client/log_flags.cpp @@ -531,6 +531,7 @@ void process_gpu_exclusions() { for (int k=1; krsc_pwf[k].non_excluded_instances = (1<master_url)) continue; @@ -539,14 +540,16 @@ void process_gpu_exclusions() { if (eg.device_num >= 0) { // exclusion may refer to nonexistent GPU // - if (cp.device_num_exists(eg.device_num)) { + int ind = cp.device_num_index(eg.device_num); + if (ind >= 0) { n++; + p->rsc_pwf[k].non_excluded_instances &= ~(1<ncoprocs_excluded[k] = n; + p->rsc_pwf[k].ncoprocs_excluded = n; } } diff --git a/client/project.cpp b/client/project.cpp index 69f90d44ca..b86c8b5aee 100644 --- a/client/project.cpp +++ b/client/project.cpp @@ -419,7 +419,7 @@ int PROJECT::write_state(MIOFILE& out, bool gui_rpc) { if (no_rsc_pref[j]) { out.printf(" %s\n", rsc_name(j)); } - if (j>0 && gui_rpc && (ncoprocs_excluded[j] == rsc_work_fetch[j].ninstances)) { + if (j>0 && gui_rpc && (rsc_pwf[j].ncoprocs_excluded == rsc_work_fetch[j].ninstances)) { out.printf(" %s\n", rsc_name(j)); } } diff --git a/client/project.h b/client/project.h index 74dacb8805..ffc30e1af3 100644 --- a/client/project.h +++ b/client/project.h @@ -246,8 +246,6 @@ struct PROJECT : PROJ_AM { // double rr_sim_cpu_share; bool rr_sim_active; - int ncoprocs_excluded[MAX_RSC]; - // number of excluded instances per processor type bool operator<(const PROJECT& p) { return sched_priority > p.sched_priority; } diff --git a/client/rr_sim.cpp b/client/rr_sim.cpp index 552a8d7e90..4f0bf3adf3 100644 --- a/client/rr_sim.cpp +++ b/client/rr_sim.cpp @@ -74,6 +74,7 @@ struct RR_SIM { int rt = rp->avp->gpu_usage.rsc_type; if (rt) { rsc_work_fetch[rt].sim_nused += rp->avp->gpu_usage.usage; + rsc_work_fetch[rt].sim_used_instances |= p->rsc_pwf[rt].non_excluded_instances; p->rsc_pwf[rt].sim_nused += rp->avp->gpu_usage.usage; } } @@ -169,10 +170,14 @@ void RR_SIM::init_pending_lists() { } } -// pick jobs to run; put them in "active" list. +// Pick jobs to run, putting them in "active" list. // Simulate what the job scheduler would do: // pick a job from the project P with highest scheduling priority, -// then adjust P's scheduling priority +// then adjust P's scheduling priority. +// +// This is called at the start of the simulation, +// and again each time a job finishes. +// In the latter case, some resources may be saturated. // void RR_SIM::pick_jobs_to_run(double reltime) { active.clear(); @@ -241,7 +246,18 @@ void RR_SIM::pick_jobs_to_run(double reltime) { // check whether resource is saturated // if (rt) { - if (rsc_work_fetch[rt].sim_nused >= coprocs.coprocs[rt].count - p->ncoprocs_excluded[rt]) break; + if (rsc_work_fetch[rt].sim_nused >= coprocs.coprocs[rt].count) { + break; + } + + // if a GPU isn't saturated but this project is using + // its max given exclusions, remove it from project heap + // + if (rsc_pwf.sim_nused >= coprocs.coprocs[rt].count - p->rsc_pwf[rt].ncoprocs_excluded) { + pop_heap(project_heap.begin(), project_heap.end()); + project_heap.pop_back(); + continue; + } } else { if (rsc_work_fetch[rt].sim_nused >= gstate.ncpus) break; } @@ -255,7 +271,7 @@ void RR_SIM::pick_jobs_to_run(double reltime) { pop_heap(project_heap.begin(), project_heap.end()); project_heap.pop_back(); } else if (!rp->rrsim_done) { - // Otherwise reshuffle the heap + // Otherwise reshuffle the project heap // make_heap(project_heap.begin(), project_heap.end()); } @@ -401,7 +417,9 @@ void RR_SIM::simulate() { } } } - // adjust FLOPS left + + // adjust FLOPS left of other active jobs + // for (unsigned int i=0; irrsim_flops_left -= rp->rrsim_flops*delta_t; @@ -464,6 +482,19 @@ void RR_SIM::simulate() { sim_now += delta_t; } + // identify GPU instances starved because of exclusions + // + for (int i=1; i gstate.work_buf_min()) return NULL; + if (saturated_time > gstate.work_buf_min()) buffer_low = false; } else { - if (saturated_time > gstate.work_buf_total()) return NULL; + if (saturated_time > gstate.work_buf_total()) buffer_low = false; } - if (saturated_time > gstate.work_buf_total()) return NULL; + + if (log_flags.work_fetch_debug) { + msg_printf(0, MSG_INFO, + "[work_fetch] buffer_low: %s; sim_excluded_instances %d\n", + buffer_low?"yes":"no", sim_excluded_instances + ); + } + + if (!buffer_low && !sim_excluded_instances) return NULL; for (unsigned i=0; incoprocs_excluded[rsc_type]; + int n_not_excluded = ninstances - p->rsc_pwf[rsc_type].ncoprocs_excluded; if (n_not_excluded == 0) { continue; } - if (p->ncoprocs_excluded[rsc_type] + if (p->rsc_pwf[rsc_type].ncoprocs_excluded && p->rsc_pwf[rsc_type].n_runnable_jobs > n_not_excluded ) { continue; @@ -283,6 +298,16 @@ PROJECT* RSC_WORK_FETCH::choose_project_hyst(bool strict) { RSC_PROJECT_WORK_FETCH& rpwf = project_state(p); if (rpwf.anon_skip) continue; + + // if we're sending work only because of exclusion starvation, + // make sure this project can use the starved instances + // + if (!buffer_low) { + if ((sim_excluded_instances & rpwf.non_excluded_instances) == 0) { + continue; + } + } + if (pbest) { if (pbest->sched_priority > p->sched_priority) { continue; @@ -292,7 +317,11 @@ PROJECT* RSC_WORK_FETCH::choose_project_hyst(bool strict) { } if (!pbest) return NULL; work_fetch.clear_request(); - work_fetch.set_all_requests_hyst(pbest, rsc_type); + if (buffer_low) { + work_fetch.set_all_requests_hyst(pbest, rsc_type); + } else { + set_request_excluded(pbest); + } return pbest; } @@ -442,6 +471,29 @@ void RSC_WORK_FETCH::set_request(PROJECT* p) { } } +// We're fetching work because some instances are starved because +// of exclusions. +// See how many N of these instances are not excluded for this project. +// Ask for N instances and for N*work_buf_min seconds. +// +void RSC_WORK_FETCH::set_request_excluded(PROJECT* p) { + RSC_PROJECT_WORK_FETCH& pwf = project_state(p); + + int inst_mask = sim_excluded_instances & pwf.non_excluded_instances; + int n = 0; + for (int i=0; iresource_share == 0 || config.fetch_minimal_work) { + req_secs = 1; + } else { + req_secs = n*gstate.work_buf_total(); + } +} + void RSC_WORK_FETCH::print_state(const char* name) { msg_printf(0, MSG_INFO, "[work_fetch] %s: shortfall %.2f nidle %.2f saturated %.2f busy %.2f", @@ -877,7 +929,7 @@ void WORK_FETCH::set_initial_work_request(PROJECT* p) { rsc_work_fetch[i].req_secs = 1; if (i) { RSC_WORK_FETCH& rwf = rsc_work_fetch[i]; - if (rwf.ninstances == p->ncoprocs_excluded[i]) { + if (rwf.ninstances == p->rsc_pwf[i].ncoprocs_excluded) { rsc_work_fetch[i].req_secs = 0; } } diff --git a/client/work_fetch.h b/client/work_fetch.h index 975d14503c..4f1a1d371d 100644 --- a/client/work_fetch.h +++ b/client/work_fetch.h @@ -100,6 +100,11 @@ struct RSC_PROJECT_WORK_FETCH { int n_runnable_jobs; double sim_nused; double nused_total; // sum of instances over all runnable jobs + int ncoprocs_excluded; + // number of excluded instances + int non_excluded_instances; + // bitmap of non-excluded instances + // (i.e. instances this project's jobs can run on) int deadlines_missed; int deadlines_missed_copy; // copy of the above used during schedule_cpus() @@ -116,6 +121,8 @@ struct RSC_PROJECT_WORK_FETCH { n_runnable_jobs = 0; sim_nused = 0; nused_total = 0; + ncoprocs_excluded = 0; + non_excluded_instances = 0; deadlines_missed = 0; deadlines_missed_copy = 0; } @@ -201,6 +208,11 @@ struct RSC_WORK_FETCH { // seconds of idle instances between now and now+work_buf_total() double nidle_now; double sim_nused; + int sim_used_instances; + // bitmap of instances used in simulation, + // taking into account GPU exclusions + int sim_excluded_instances; + // bitmap of instances not used (i.e. starved because of exclusion) double total_fetchable_share; // total RS of projects from which we could fetch jobs for this device double saturated_time; @@ -241,6 +253,7 @@ struct RSC_WORK_FETCH { void print_state(const char*); void clear_request(); void set_request(PROJECT*); + void set_request_excluded(PROJECT*); bool may_have_work(PROJECT*); RSC_WORK_FETCH() { rsc_type = 0; diff --git a/doc/sim/sim_web.php b/doc/sim/sim_web.php index 73d184ff3d..8a91e025d0 100644 --- a/doc/sim/sim_web.php +++ b/doc/sim/sim_web.php @@ -339,6 +339,7 @@ function log_flag_boxes() { return " CPU scheduling debug
Round-robin simulation info +
Round-robin simulation details
Work fetch debug "; } @@ -440,6 +441,9 @@ function simulation_action() { if (post_str("rr_simulation", true)) { $x .= "\n"; } + if (post_str("rrsim_detail", true)) { + $x .= "\n"; + } if (post_str("work_fetch_debug", true)) { $x .= "\n"; } diff --git a/lib/coproc.h b/lib/coproc.h index 1b9095a759..5971dddeea 100644 --- a/lib/coproc.h +++ b/lib/coproc.h @@ -264,11 +264,11 @@ struct COPROC { COPROC() { clear(); } - bool device_num_exists(int n) { + int device_num_index(int n) { for (int i=0; i &opencls,