Client: fix bug that prevented work fetch for zero-share projects

In work fetch setup, we were computing rsc_project_reason
before doing the round-robin simulation.
It needs to be done after, because it uses the # of idle devices,
which is computed by the simulation.
This commit is contained in:
David Anderson 2014-11-17 13:56:06 -08:00
parent 07ba0a57d7
commit fbc6e40dca
2 changed files with 18 additions and 6 deletions

View File

@ -72,8 +72,7 @@ inline bool has_coproc_app(PROJECT* p, int rsc_type) {
/////////////// RSC_PROJECT_WORK_FETCH /////////////// /////////////// RSC_PROJECT_WORK_FETCH ///////////////
void RSC_PROJECT_WORK_FETCH::rr_init(PROJECT* p, int rsc_type) { void RSC_PROJECT_WORK_FETCH::rr_init() {
rsc_project_reason = compute_rsc_project_reason(p, rsc_type);
fetchable_share = 0; fetchable_share = 0;
n_runnable_jobs = 0; n_runnable_jobs = 0;
sim_nused = 0; sim_nused = 0;
@ -397,7 +396,7 @@ void WORK_FETCH::rr_init() {
PROJECT* p = gstate.projects[i]; PROJECT* p = gstate.projects[i];
p->pwf.rr_init(p); p->pwf.rr_init(p);
for (int j=0; j<coprocs.n_rsc; j++) { for (int j=0; j<coprocs.n_rsc; j++) {
p->rsc_pwf[j].rr_init(p, j); p->rsc_pwf[j].rr_init();
} }
} }
} }
@ -588,6 +587,19 @@ void WORK_FETCH::setup() {
gstate.compute_nuploading_results(); gstate.compute_nuploading_results();
rr_simulation(); rr_simulation();
// Compute rsc_project_reason.
// Must do this after rr_simulation() because the logic for
// zero-resource-share projects uses #idle instances
//
for (unsigned int i=0; i<gstate.projects.size(); i++) {
PROJECT* p = gstate.projects[i];
for (int j=0; j<coprocs.n_rsc; j++) {
RSC_PROJECT_WORK_FETCH& rpwf = p->rsc_pwf[j];
rpwf.rsc_project_reason = rpwf.compute_rsc_project_reason(p, j);
}
}
compute_shares(); compute_shares();
project_priority_init(true); project_priority_init(true);
clear_request(); clear_request();
@ -649,7 +661,7 @@ PROJECT* WORK_FETCH::choose_project() {
p = gstate.projects[j]; p = gstate.projects[j];
WF_DEBUG(msg_printf(p, MSG_INFO, "scanning");) WF_DEBUG(msg_printf(p, MSG_INFO, "scanning");)
if (p->pwf.project_reason) { if (p->pwf.project_reason) {
WF_DEBUG(msg_printf(p, MSG_INFO, "skip: cfwr %d", p->pwf.cant_fetch_work_reason);) WF_DEBUG(msg_printf(p, MSG_INFO, "skip: cfwr %d", p->pwf.project_reason);)
continue; continue;
} }
@ -671,7 +683,7 @@ PROJECT* WORK_FETCH::choose_project() {
} }
WF_DEBUG(msg_printf(p, MSG_INFO, "can fetch %s", rsc_name_long(i));) WF_DEBUG(msg_printf(p, MSG_INFO, "can fetch %s", rsc_name_long(i));)
} else { } else {
WF_DEBUG(msg_printf(p, MSG_INFO, "can't fetch %s", rsc_name_long(i));) WF_DEBUG(msg_printf(p, MSG_INFO, "can't fetch %s: %s", rsc_name_long(i), rsc_project_reason_string(rpwf.rsc_project_reason));)
continue; continue;
} }
if (rwf.saturated_time < gstate.work_buf_min()) { if (rwf.saturated_time < gstate.work_buf_min()) {

View File

@ -127,7 +127,7 @@ struct RSC_PROJECT_WORK_FETCH {
int rsc_project_reason; int rsc_project_reason;
int compute_rsc_project_reason(PROJECT*, int rsc_type); int compute_rsc_project_reason(PROJECT*, int rsc_type);
void resource_backoff(PROJECT*, const char*); void resource_backoff(PROJECT*, const char*);
void rr_init(PROJECT*, int rsc_type); void rr_init();
void clear_backoff() { void clear_backoff() {
backoff_time = 0; backoff_time = 0;
backoff_interval = 0; backoff_interval = 0;