- client: fix work-fetch bug that caused infinite fetch;

cleanup/reorganization of work fetch logic

svn path=/trunk/boinc/; revision=17337
This commit is contained in:
David Anderson 2009-02-23 20:35:52 +00:00
parent b402864705
commit f257101d36
3 changed files with 116 additions and 105 deletions

View File

@ -1938,4 +1938,10 @@ Rom 22 Feb 2009
boinc_ss.rc
screensaver.cpp
screensaver_win.cpp, .h
David 22 Feb 2009
- client: fix work-fetch bug that caused infinite fetch;
cleanup/reorganization of work fetch logic
client/
work_fetch.cpp,h

View File

@ -140,25 +140,45 @@ bool RSC_PROJECT_WORK_FETCH::overworked() {
return (debt < -x);
}
#define FETCH_IF_DEVICE_IDLE 1
// If a resource is idle, get work for it
// from the project with greatest LTD, even if it's overworked.
#define FETCH_IF_SHORTFALL 2
// If a resource has a shortfall,
// get work for it from the non-overworked project with greatest LTD.
#define FETCH_IF_PROJECT_STARVED 3
// If any project is not overworked and has no runnable jobs for the rsc,
// get work from the one with greatest LTD.
// Choose the best project to ask for work for this resource,
// given some constraints.
// given the specific criterion
//
PROJECT* RSC_WORK_FETCH::choose_project(
bool allow_overworked, // consider overworked projects
bool only_starved // consider only starved projects
) {
PROJECT* RSC_WORK_FETCH::choose_project(int criterion) {
double req;
PROJECT* pbest = NULL;
switch (criterion) {
case FETCH_IF_DEVICE_IDLE:
if (!nidle_now) return NULL;
break;
case FETCH_IF_SHORTFALL:
if (!shortfall) return NULL;
break;
}
for (unsigned i=0; i<gstate.projects.size(); i++) {
PROJECT* p = gstate.projects[i];
if (!p->pwf.can_fetch_work) continue;
if (!project_state(p).may_have_work) continue;
RSC_PROJECT_WORK_FETCH& rpwf = project_state(p);
if (!allow_overworked && rpwf.overworked()) {
continue;
}
if (only_starved && rpwf.has_runnable_jobs) {
continue;
switch (criterion) {
case FETCH_IF_SHORTFALL:
if (rpwf.overworked()) continue;
break;
case FETCH_IF_PROJECT_STARVED:
if (rpwf.overworked()) continue;
if (rpwf.has_runnable_jobs) continue;
break;
}
if (pbest) {
if (pbest->pwf.overall_debt > p->pwf.overall_debt) {
@ -168,9 +188,55 @@ PROJECT* RSC_WORK_FETCH::choose_project(
}
pbest = p;
}
if (!pbest) return NULL;
// decide how much work to request from each resource
//
work_fetch.clear_request();
switch (criterion) {
case FETCH_IF_DEVICE_IDLE:
if (log_flags.work_fetch_debug) {
msg_printf(pbest, MSG_INFO,
"chosen: %s idle instance", rsc_name(rsc_type)
);
}
req = share_request(pbest);
if (req > shortfall) req = shortfall;
set_request(pbest, req);
break;
case FETCH_IF_SHORTFALL:
if (log_flags.work_fetch_debug) {
msg_printf(pbest, MSG_INFO,
"chosen: %s shortfall", rsc_name(rsc_type)
);
}
work_fetch.set_shortfall_requests(pbest);
break;
case FETCH_IF_PROJECT_STARVED:
if (log_flags.work_fetch_debug) {
msg_printf(pbest, MSG_INFO,
"chosen: %s idle instance", rsc_name(rsc_type)
);
}
req = share_request(pbest);
set_request(pbest, req);
break;
}
return pbest;
}
void WORK_FETCH::set_shortfall_requests(PROJECT* p) {
cpu_work_fetch.set_shortfall_request(p);
if (coproc_cuda) {
cuda_work_fetch.set_shortfall_request(p);
}
}
void RSC_WORK_FETCH::set_shortfall_request(PROJECT* p) {
if (!shortfall) return;
set_request(p, shortfall);
}
void WORK_FETCH::set_overall_debts() {
for (unsigned i=0; i<gstate.projects.size(); i++) {
PROJECT* p = gstate.projects[i];
@ -316,98 +382,29 @@ PROJECT* WORK_FETCH::choose_project() {
gstate.rr_simulation();
set_overall_debts();
bool cpu_emergency = false;
bool cuda_emergency = false;
// If a resource is idle, it's an "emergency";
// get work for it from the project with greatest LTD,
// even if it's overworked.
// Give GPU priority over CPU
//
if (coproc_cuda && cuda_work_fetch.nidle_now) {
p = cuda_work_fetch.choose_project(true, false);
if (p) {
cuda_emergency = true;
if (log_flags.work_fetch_debug) {
msg_printf(p, MSG_INFO, "chosen: CUDA idle instance");
}
}
}
if (!p && cpu_work_fetch.nidle_now) {
p = cpu_work_fetch.choose_project(true, false);
if (p) {
cpu_emergency = true;
if (log_flags.work_fetch_debug) {
msg_printf(p, MSG_INFO, "chosen: CPU idle instance");
}
}
}
// If a resource has a shortfall,
// get work for it from the non-overworked project with greatest LTD.
//
if (!p && coproc_cuda && cuda_work_fetch.shortfall) {
p = cuda_work_fetch.choose_project(false, false);
if (p) {
if (log_flags.work_fetch_debug) {
msg_printf(p, MSG_INFO, "chosen: CUDA shortfall");
}
}
}
if (!p && cpu_work_fetch.shortfall) {
p = cpu_work_fetch.choose_project(false, false);
if (p) {
if (log_flags.work_fetch_debug) {
msg_printf(p, MSG_INFO, "chosen: CPU shortfall");
}
}
}
// If any project is not overworked and has no runnable jobs,
// get work from the one with greatest LTD.
//
if (!p && coproc_cuda) {
p = cuda_work_fetch.choose_project(false, true);
if (p) {
if (log_flags.work_fetch_debug) {
msg_printf(p, MSG_INFO, "chosen: project has no CUDA jobs");
}
}
if (coproc_cuda) {
p = cuda_work_fetch.choose_project(FETCH_IF_DEVICE_IDLE);
}
if (!p) {
p = cpu_work_fetch.choose_project(false, true);
if (p) {
if (log_flags.work_fetch_debug) {
msg_printf(p, MSG_INFO, "chosen: project has no CPU jobs");
}
}
p = cpu_work_fetch.choose_project(FETCH_IF_DEVICE_IDLE);
}
// decide how much work to request for each resource
//
clear_request();
if (p) {
// in emergency cases, get work only for that resource
//
if (cpu_emergency) {
cpu_work_fetch.set_request(p);
} else if (cuda_emergency) {
cuda_work_fetch.set_request(p);
} else {
// in non-emergency cases, get work for any resource
// for which the project is not overworked
//
if (!cpu_work_fetch.project_state(p).overworked()) {
cpu_work_fetch.set_request(p);
}
if (coproc_cuda) {
if (!cuda_work_fetch.project_state(p).overworked()) {
cuda_work_fetch.set_request(p);
}
}
}
if (!p && coproc_cuda) {
p = cuda_work_fetch.choose_project(FETCH_IF_SHORTFALL);
}
if (coproc_cuda) {
if (!p) {
p = cpu_work_fetch.choose_project(FETCH_IF_SHORTFALL);
}
if (!p && coproc_cuda) {
p = cuda_work_fetch.choose_project(FETCH_IF_PROJECT_STARVED);
}
if (!p) {
p = cpu_work_fetch.choose_project(FETCH_IF_PROJECT_STARVED);
}
if (p && coproc_cuda) {
coproc_cuda->req_secs = cuda_work_fetch.req_secs;
coproc_cuda->req_instances = cuda_work_fetch.req_instances;
coproc_cuda->estimated_delay = cuda_work_fetch.estimated_delay;
@ -424,19 +421,24 @@ PROJECT* WORK_FETCH::choose_project() {
return p;
}
void RSC_WORK_FETCH::set_request(PROJECT* p) {
RSC_PROJECT_WORK_FETCH& w = project_state(p);
if (p->duration_correction_factor < 0.02 || p->duration_correction_factor > 80.0) {
double RSC_WORK_FETCH::share_request(PROJECT* p) {
double dcf = p->duration_correction_factor;
if (dcf < 0.02 || dcf > 80.0) {
// if project's DCF is too big or small,
// its completion time estimates are useless; just ask for 1 second
//
req_secs = 1;
return 1;
} else {
// otherwise ask for the project's share
//
req_secs = gstate.work_buf_total()*w.fetchable_share;
RSC_PROJECT_WORK_FETCH& w = project_state(p);
return gstate.work_buf_total()*w.fetchable_share;
}
}
void RSC_WORK_FETCH::set_request(PROJECT* p, double r) {
RSC_PROJECT_WORK_FETCH& w = project_state(p);
req_secs = r;
req_instances = (int)ceil(w.fetchable_share*nidle_now);
}

View File

@ -116,13 +116,15 @@ struct RSC_WORK_FETCH {
void rr_init();
void accumulate_shortfall(double d_time);
void update_estimated_delay(double dt);
PROJECT* choose_project(bool allow_overworked, bool only_starved);
PROJECT* choose_project(int);
void accumulate_debt();
RSC_PROJECT_WORK_FETCH& project_state(PROJECT*);
void update_debts();
void print_state(char*);
void clear_request();
void set_request(PROJECT*);
void set_request(PROJECT*, double);
double share_request(PROJECT*);
void set_shortfall_request(PROJECT*);
bool may_have_work(PROJECT*);
RSC_WORK_FETCH() {
memset(this, 0, sizeof(*this));
@ -156,6 +158,7 @@ struct WORK_FETCH {
void write_request(FILE*);
void handle_reply(PROJECT*, std::vector<RESULT*>new_results);
void set_initial_work_request();
void set_shortfall_requests(PROJECT*);
void print_state();
void init();
void rr_init();