diff --git a/checkin_notes b/checkin_notes index 9fa5f1a603..300eef7162 100644 --- a/checkin_notes +++ b/checkin_notes @@ -5592,3 +5592,12 @@ David 17 Aug 2012 sched_vda.cpp vda_lib2.cpp vda_lib.h + +David 17 Aug 2012 + - client: if a project has excluded GPUs of a given type, + allow it to fetch work of that type if the # of runnable + jobs it <= the # of non-excluded instances (rather than 0). + + client/ + work_fetch.cpp,h + rr_sim.cpp diff --git a/client/rr_sim.cpp b/client/rr_sim.cpp index 06489e3642..552a8d7e90 100644 --- a/client/rr_sim.cpp +++ b/client/rr_sim.cpp @@ -156,12 +156,12 @@ void RR_SIM::init_pending_lists() { // if it's past its deadline, we need to mark it as such PROJECT* p = rp->project; - p->pwf.has_runnable_jobs = true; + p->pwf.n_runnable_jobs++; p->rsc_pwf[0].nused_total += rp->avp->avg_ncpus; int rt = rp->avp->gpu_usage.rsc_type; if (rt) { p->rsc_pwf[rt].nused_total += rp->avp->gpu_usage.usage; - p->rsc_pwf[rt].has_runnable_jobs = true; + p->rsc_pwf[rt].n_runnable_jobs++; } p->rsc_pwf[rt].pending.push_back(rp); set_rrsim_flops(rp); diff --git a/client/work_fetch.cpp b/client/work_fetch.cpp index dd9073e686..ef816e6e91 100644 --- a/client/work_fetch.cpp +++ b/client/work_fetch.cpp @@ -143,7 +143,7 @@ bool RSC_PROJECT_WORK_FETCH::compute_may_have_work(PROJECT* p, int rsc_type) { void RSC_PROJECT_WORK_FETCH::rr_init(PROJECT* p, int rsc_type) { may_have_work = compute_may_have_work(p, rsc_type); fetchable_share = 0; - has_runnable_jobs = false; + n_runnable_jobs = 0; sim_nused = 0; nused_total = 0; deadlines_missed = 0; @@ -264,18 +264,19 @@ PROJECT* RSC_WORK_FETCH::choose_project_hyst(bool strict) { } // if project has excluded GPUs of this type, - // and it has runnable jobs for this type, + // and it has more runnable jobs than non-excluded instances, // don't fetch work for it. // TODO: THIS IS CRUDE. Making it smarter would require // computing shortfall etc. on a per-project basis // if (rsc_type) { - if (p->ncoprocs_excluded[rsc_type] == ninstances) { + int n_not_excluded = ninstances - p->ncoprocs_excluded[rsc_type]; + if (n_not_excluded == 0) { continue; } if (p->ncoprocs_excluded[rsc_type] - && p->rsc_pwf[rsc_type].has_runnable_jobs - ){ + && p->rsc_pwf[rsc_type].n_runnable_jobs > n_not_excluded + ) { continue; } } @@ -503,7 +504,7 @@ void WORK_FETCH::rr_init() { for (unsigned int i=0; ipwf.cant_fetch_work_reason = p->pwf.compute_cant_fetch_work_reason(p); - p->pwf.has_runnable_jobs = false; + p->pwf.n_runnable_jobs = 0; for (int j=0; jrsc_pwf[j].rr_init(p, j); } diff --git a/client/work_fetch.h b/client/work_fetch.h index 5bfa0ea541..1633882671 100644 --- a/client/work_fetch.h +++ b/client/work_fetch.h @@ -97,7 +97,7 @@ struct RSC_PROJECT_WORK_FETCH { // this project's share relative to projects from which // we could probably get work for this resource; // determines how many instances this project deserves - bool has_runnable_jobs; + int n_runnable_jobs; double sim_nused; double nused_total; // sum of instances over all runnable jobs int deadlines_missed; @@ -113,7 +113,7 @@ struct RSC_PROJECT_WORK_FETCH { queue_est = 0; anon_skip = false; fetchable_share = 0; - has_runnable_jobs = false; + n_runnable_jobs = 0; sim_nused = 0; nused_total = 0; deadlines_missed = 0; @@ -269,7 +269,7 @@ struct PROJECT_WORK_FETCH { // temporary used during RR simulation int cant_fetch_work_reason; int compute_cant_fetch_work_reason(PROJECT*); - bool has_runnable_jobs; + int n_runnable_jobs; PROJECT_WORK_FETCH() { memset(this, 0, sizeof(*this)); }