mirror of https://github.com/BOINC/boinc.git
- client: fix work-fetch bug that caused infinite fetch;
cleanup/reorganization of work fetch logic svn path=/trunk/boinc/; revision=17337
This commit is contained in:
parent
b402864705
commit
f257101d36
|
@ -1938,4 +1938,10 @@ Rom 22 Feb 2009
|
|||
boinc_ss.rc
|
||||
screensaver.cpp
|
||||
screensaver_win.cpp, .h
|
||||
|
||||
|
||||
David 22 Feb 2009
|
||||
- client: fix work-fetch bug that caused infinite fetch;
|
||||
cleanup/reorganization of work fetch logic
|
||||
|
||||
client/
|
||||
work_fetch.cpp,h
|
||||
|
|
|
@ -140,25 +140,45 @@ bool RSC_PROJECT_WORK_FETCH::overworked() {
|
|||
return (debt < -x);
|
||||
}
|
||||
|
||||
#define FETCH_IF_DEVICE_IDLE 1
|
||||
// If a resource is idle, get work for it
|
||||
// from the project with greatest LTD, even if it's overworked.
|
||||
#define FETCH_IF_SHORTFALL 2
|
||||
// If a resource has a shortfall,
|
||||
// get work for it from the non-overworked project with greatest LTD.
|
||||
#define FETCH_IF_PROJECT_STARVED 3
|
||||
// If any project is not overworked and has no runnable jobs for the rsc,
|
||||
// get work from the one with greatest LTD.
|
||||
|
||||
// Choose the best project to ask for work for this resource,
|
||||
// given some constraints.
|
||||
// given the specific criterion
|
||||
//
|
||||
PROJECT* RSC_WORK_FETCH::choose_project(
|
||||
bool allow_overworked, // consider overworked projects
|
||||
bool only_starved // consider only starved projects
|
||||
) {
|
||||
PROJECT* RSC_WORK_FETCH::choose_project(int criterion) {
|
||||
double req;
|
||||
PROJECT* pbest = NULL;
|
||||
|
||||
switch (criterion) {
|
||||
case FETCH_IF_DEVICE_IDLE:
|
||||
if (!nidle_now) return NULL;
|
||||
break;
|
||||
case FETCH_IF_SHORTFALL:
|
||||
if (!shortfall) return NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
for (unsigned i=0; i<gstate.projects.size(); i++) {
|
||||
PROJECT* p = gstate.projects[i];
|
||||
if (!p->pwf.can_fetch_work) continue;
|
||||
if (!project_state(p).may_have_work) continue;
|
||||
RSC_PROJECT_WORK_FETCH& rpwf = project_state(p);
|
||||
if (!allow_overworked && rpwf.overworked()) {
|
||||
continue;
|
||||
}
|
||||
if (only_starved && rpwf.has_runnable_jobs) {
|
||||
continue;
|
||||
switch (criterion) {
|
||||
case FETCH_IF_SHORTFALL:
|
||||
if (rpwf.overworked()) continue;
|
||||
break;
|
||||
case FETCH_IF_PROJECT_STARVED:
|
||||
if (rpwf.overworked()) continue;
|
||||
if (rpwf.has_runnable_jobs) continue;
|
||||
break;
|
||||
}
|
||||
if (pbest) {
|
||||
if (pbest->pwf.overall_debt > p->pwf.overall_debt) {
|
||||
|
@ -168,9 +188,55 @@ PROJECT* RSC_WORK_FETCH::choose_project(
|
|||
}
|
||||
pbest = p;
|
||||
}
|
||||
if (!pbest) return NULL;
|
||||
|
||||
// decide how much work to request from each resource
|
||||
//
|
||||
work_fetch.clear_request();
|
||||
switch (criterion) {
|
||||
case FETCH_IF_DEVICE_IDLE:
|
||||
if (log_flags.work_fetch_debug) {
|
||||
msg_printf(pbest, MSG_INFO,
|
||||
"chosen: %s idle instance", rsc_name(rsc_type)
|
||||
);
|
||||
}
|
||||
req = share_request(pbest);
|
||||
if (req > shortfall) req = shortfall;
|
||||
set_request(pbest, req);
|
||||
break;
|
||||
case FETCH_IF_SHORTFALL:
|
||||
if (log_flags.work_fetch_debug) {
|
||||
msg_printf(pbest, MSG_INFO,
|
||||
"chosen: %s shortfall", rsc_name(rsc_type)
|
||||
);
|
||||
}
|
||||
work_fetch.set_shortfall_requests(pbest);
|
||||
break;
|
||||
case FETCH_IF_PROJECT_STARVED:
|
||||
if (log_flags.work_fetch_debug) {
|
||||
msg_printf(pbest, MSG_INFO,
|
||||
"chosen: %s idle instance", rsc_name(rsc_type)
|
||||
);
|
||||
}
|
||||
req = share_request(pbest);
|
||||
set_request(pbest, req);
|
||||
break;
|
||||
}
|
||||
return pbest;
|
||||
}
|
||||
|
||||
void WORK_FETCH::set_shortfall_requests(PROJECT* p) {
|
||||
cpu_work_fetch.set_shortfall_request(p);
|
||||
if (coproc_cuda) {
|
||||
cuda_work_fetch.set_shortfall_request(p);
|
||||
}
|
||||
}
|
||||
|
||||
void RSC_WORK_FETCH::set_shortfall_request(PROJECT* p) {
|
||||
if (!shortfall) return;
|
||||
set_request(p, shortfall);
|
||||
}
|
||||
|
||||
void WORK_FETCH::set_overall_debts() {
|
||||
for (unsigned i=0; i<gstate.projects.size(); i++) {
|
||||
PROJECT* p = gstate.projects[i];
|
||||
|
@ -316,98 +382,29 @@ PROJECT* WORK_FETCH::choose_project() {
|
|||
|
||||
gstate.rr_simulation();
|
||||
set_overall_debts();
|
||||
bool cpu_emergency = false;
|
||||
bool cuda_emergency = false;
|
||||
|
||||
// If a resource is idle, it's an "emergency";
|
||||
// get work for it from the project with greatest LTD,
|
||||
// even if it's overworked.
|
||||
// Give GPU priority over CPU
|
||||
//
|
||||
if (coproc_cuda && cuda_work_fetch.nidle_now) {
|
||||
p = cuda_work_fetch.choose_project(true, false);
|
||||
if (p) {
|
||||
cuda_emergency = true;
|
||||
if (log_flags.work_fetch_debug) {
|
||||
msg_printf(p, MSG_INFO, "chosen: CUDA idle instance");
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!p && cpu_work_fetch.nidle_now) {
|
||||
p = cpu_work_fetch.choose_project(true, false);
|
||||
if (p) {
|
||||
cpu_emergency = true;
|
||||
if (log_flags.work_fetch_debug) {
|
||||
msg_printf(p, MSG_INFO, "chosen: CPU idle instance");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If a resource has a shortfall,
|
||||
// get work for it from the non-overworked project with greatest LTD.
|
||||
//
|
||||
if (!p && coproc_cuda && cuda_work_fetch.shortfall) {
|
||||
p = cuda_work_fetch.choose_project(false, false);
|
||||
if (p) {
|
||||
if (log_flags.work_fetch_debug) {
|
||||
msg_printf(p, MSG_INFO, "chosen: CUDA shortfall");
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!p && cpu_work_fetch.shortfall) {
|
||||
p = cpu_work_fetch.choose_project(false, false);
|
||||
if (p) {
|
||||
if (log_flags.work_fetch_debug) {
|
||||
msg_printf(p, MSG_INFO, "chosen: CPU shortfall");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If any project is not overworked and has no runnable jobs,
|
||||
// get work from the one with greatest LTD.
|
||||
//
|
||||
if (!p && coproc_cuda) {
|
||||
p = cuda_work_fetch.choose_project(false, true);
|
||||
if (p) {
|
||||
if (log_flags.work_fetch_debug) {
|
||||
msg_printf(p, MSG_INFO, "chosen: project has no CUDA jobs");
|
||||
}
|
||||
}
|
||||
if (coproc_cuda) {
|
||||
p = cuda_work_fetch.choose_project(FETCH_IF_DEVICE_IDLE);
|
||||
}
|
||||
if (!p) {
|
||||
p = cpu_work_fetch.choose_project(false, true);
|
||||
if (p) {
|
||||
if (log_flags.work_fetch_debug) {
|
||||
msg_printf(p, MSG_INFO, "chosen: project has no CPU jobs");
|
||||
}
|
||||
}
|
||||
p = cpu_work_fetch.choose_project(FETCH_IF_DEVICE_IDLE);
|
||||
}
|
||||
|
||||
// decide how much work to request for each resource
|
||||
//
|
||||
clear_request();
|
||||
if (p) {
|
||||
// in emergency cases, get work only for that resource
|
||||
//
|
||||
if (cpu_emergency) {
|
||||
cpu_work_fetch.set_request(p);
|
||||
} else if (cuda_emergency) {
|
||||
cuda_work_fetch.set_request(p);
|
||||
} else {
|
||||
// in non-emergency cases, get work for any resource
|
||||
// for which the project is not overworked
|
||||
//
|
||||
if (!cpu_work_fetch.project_state(p).overworked()) {
|
||||
cpu_work_fetch.set_request(p);
|
||||
}
|
||||
if (coproc_cuda) {
|
||||
if (!cuda_work_fetch.project_state(p).overworked()) {
|
||||
cuda_work_fetch.set_request(p);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!p && coproc_cuda) {
|
||||
p = cuda_work_fetch.choose_project(FETCH_IF_SHORTFALL);
|
||||
}
|
||||
if (coproc_cuda) {
|
||||
if (!p) {
|
||||
p = cpu_work_fetch.choose_project(FETCH_IF_SHORTFALL);
|
||||
}
|
||||
|
||||
if (!p && coproc_cuda) {
|
||||
p = cuda_work_fetch.choose_project(FETCH_IF_PROJECT_STARVED);
|
||||
}
|
||||
if (!p) {
|
||||
p = cpu_work_fetch.choose_project(FETCH_IF_PROJECT_STARVED);
|
||||
}
|
||||
|
||||
if (p && coproc_cuda) {
|
||||
coproc_cuda->req_secs = cuda_work_fetch.req_secs;
|
||||
coproc_cuda->req_instances = cuda_work_fetch.req_instances;
|
||||
coproc_cuda->estimated_delay = cuda_work_fetch.estimated_delay;
|
||||
|
@ -424,19 +421,24 @@ PROJECT* WORK_FETCH::choose_project() {
|
|||
return p;
|
||||
}
|
||||
|
||||
void RSC_WORK_FETCH::set_request(PROJECT* p) {
|
||||
RSC_PROJECT_WORK_FETCH& w = project_state(p);
|
||||
|
||||
if (p->duration_correction_factor < 0.02 || p->duration_correction_factor > 80.0) {
|
||||
double RSC_WORK_FETCH::share_request(PROJECT* p) {
|
||||
double dcf = p->duration_correction_factor;
|
||||
if (dcf < 0.02 || dcf > 80.0) {
|
||||
// if project's DCF is too big or small,
|
||||
// its completion time estimates are useless; just ask for 1 second
|
||||
//
|
||||
req_secs = 1;
|
||||
return 1;
|
||||
} else {
|
||||
// otherwise ask for the project's share
|
||||
//
|
||||
req_secs = gstate.work_buf_total()*w.fetchable_share;
|
||||
RSC_PROJECT_WORK_FETCH& w = project_state(p);
|
||||
return gstate.work_buf_total()*w.fetchable_share;
|
||||
}
|
||||
}
|
||||
|
||||
void RSC_WORK_FETCH::set_request(PROJECT* p, double r) {
|
||||
RSC_PROJECT_WORK_FETCH& w = project_state(p);
|
||||
req_secs = r;
|
||||
req_instances = (int)ceil(w.fetchable_share*nidle_now);
|
||||
}
|
||||
|
||||
|
|
|
@ -116,13 +116,15 @@ struct RSC_WORK_FETCH {
|
|||
void rr_init();
|
||||
void accumulate_shortfall(double d_time);
|
||||
void update_estimated_delay(double dt);
|
||||
PROJECT* choose_project(bool allow_overworked, bool only_starved);
|
||||
PROJECT* choose_project(int);
|
||||
void accumulate_debt();
|
||||
RSC_PROJECT_WORK_FETCH& project_state(PROJECT*);
|
||||
void update_debts();
|
||||
void print_state(char*);
|
||||
void clear_request();
|
||||
void set_request(PROJECT*);
|
||||
void set_request(PROJECT*, double);
|
||||
double share_request(PROJECT*);
|
||||
void set_shortfall_request(PROJECT*);
|
||||
bool may_have_work(PROJECT*);
|
||||
RSC_WORK_FETCH() {
|
||||
memset(this, 0, sizeof(*this));
|
||||
|
@ -156,6 +158,7 @@ struct WORK_FETCH {
|
|||
void write_request(FILE*);
|
||||
void handle_reply(PROJECT*, std::vector<RESULT*>new_results);
|
||||
void set_initial_work_request();
|
||||
void set_shortfall_requests(PROJECT*);
|
||||
void print_state();
|
||||
void init();
|
||||
void rr_init();
|
||||
|
|
Loading…
Reference in New Issue