- client: fix small work fetch bug that caused the client to

not add a piggyback work request when it should have.
This commit is contained in:
David Anderson 2013-03-12 11:16:34 -07:00 committed by Oliver Bock
parent fc6b050883
commit 546ea233a0
3 changed files with 26 additions and 54 deletions

View File

@ -491,7 +491,7 @@ bool CLIENT_STATE::scheduler_rpc_poll() {
&& !(config.fetch_minimal_work && had_or_requested_work)
) {
p = work_fetch.choose_project(true);
p = work_fetch.choose_project(true, NULL);
if (p) {
if (actively_uploading(p)) {
if (log_flags.work_fetch_debug) {

View File

@ -38,32 +38,6 @@ using std::vector;
RSC_WORK_FETCH rsc_work_fetch[MAX_RSC];
WORK_FETCH work_fetch;
#define FETCH_IF_IDLE_INSTANCE 0
// If resource has an idle instance,
// get work for it from the project with greatest LTD,
// even if it's overworked.
#define FETCH_IF_MAJOR_SHORTFALL 1
// If resource is saturated for less than work_buf_min(),
// get work for it from the project with greatest LTD,
// even if it's overworked.
#define FETCH_IF_MINOR_SHORTFALL 2
// If resource is saturated for less than work_buf_total(),
// get work for it from the non-overworked project with greatest LTD.
#define FETCH_IF_PROJECT_STARVED 3
// If any project is not overworked and has too few jobs
// to use its instance share,
// get work from the one with greatest LTD.
static const char* criterion_name(int criterion) {
switch (criterion) {
case FETCH_IF_IDLE_INSTANCE: return "idle instance";
case FETCH_IF_MAJOR_SHORTFALL: return "major shortfall";
case FETCH_IF_MINOR_SHORTFALL: return "minor shortfall";
case FETCH_IF_PROJECT_STARVED: return "starved";
}
return "unknown";
}
inline bool dont_fetch(PROJECT* p, int rsc_type) {
if (p->no_rsc_pref[rsc_type]) return true;
if (p->no_rsc_config[rsc_type]) return true;
@ -169,14 +143,6 @@ RSC_PROJECT_WORK_FETCH& RSC_WORK_FETCH::project_state(PROJECT* p) {
return p->rsc_pwf[rsc_type];
}
#if 0
bool RSC_WORK_FETCH::may_have_work(PROJECT* p) {
if (dont_fetch(p, rsc_type)) return false;
RSC_PROJECT_WORK_FETCH& w = project_state(p);
return (w.backoff_time < gstate.now);
}
#endif
void RSC_WORK_FETCH::rr_init() {
shortfall = 0;
nidle_now = 0;
@ -221,16 +187,21 @@ static bool wacky_dcf(PROJECT* p) {
// return the highest-priority project that may have jobs
// and doesn't exclude those instances.
//
// If strict is true, enforce hysteresis and backoff rules
// (which are there to limit rate of scheduler RPCs).
// Otherwise, we're going to do a scheduler RPC anyway
// and we're deciding whether to piggyback a work request,
// so there is no reason to enforce these rules.
// Only choose a project if the buffer is below min level;
// if strict_hyst is true, relax this to max level
//
PROJECT* RSC_WORK_FETCH::choose_project_hyst(bool strict) {
// If backoff_exempt_project is non-NULL,
// don't enforce resource backoffs for that project;
// this is for when we're going to do a scheduler RPC anyway
// and we're deciding whether to piggyback a work request
//
PROJECT* RSC_WORK_FETCH::choose_project_hyst(
bool strict_hyst,
PROJECT* backoff_exempt_project
) {
PROJECT* pbest = NULL;
bool buffer_low = true;
if (strict) {
if (strict_hyst) {
if (saturated_time > gstate.work_buf_min()) buffer_low = false;
} else {
if (saturated_time > gstate.work_buf_total()) buffer_low = false;
@ -255,16 +226,17 @@ PROJECT* RSC_WORK_FETCH::choose_project_hyst(bool strict) {
continue;
}
// check whether we can fetch work of this type
// see whether work fetch for this resource is banned
// by prefs, config, project, or acct mgr
//
if (dont_fetch(p, rsc_type)) {
//msg_printf(p, MSG_INFO, "skip: dont_fetch");
continue;
}
// if strict, check backoff
// check backoff
//
if (strict) {
if (p != backoff_exempt_project) {
if (project_state(p).backoff_time > gstate.now) {
//msg_printf(p, MSG_INFO, "skip: backoff");
continue;
@ -624,7 +596,7 @@ void WORK_FETCH::piggyback_work_request(PROJECT* p) {
return;
}
compute_cant_fetch_work_reason();
PROJECT* bestp = choose_project(false);
PROJECT* bestp = choose_project(false, p);
if (p != bestp) {
if (p->pwf.cant_fetch_work_reason == 0) {
if (bestp) {
@ -661,9 +633,11 @@ PROJECT* WORK_FETCH::non_cpu_intensive_project_needing_work() {
// choose a project to fetch work from,
// and set the request fields of resource objects.
// If strict is true, enforce hysteresis and backoff rules
//
PROJECT* WORK_FETCH::choose_project(bool strict) {
PROJECT* WORK_FETCH::choose_project(
bool strict_hyst,
PROJECT* backoff_exempt_project
) {
PROJECT* p;
if (log_flags.work_fetch_debug) {
@ -695,12 +669,12 @@ PROJECT* WORK_FETCH::choose_project(bool strict) {
p = 0;
if (gpus_usable) {
for (int i=1; i<coprocs.n_rsc; i++) {
p = rsc_work_fetch[i].choose_project_hyst(strict);
p = rsc_work_fetch[i].choose_project_hyst(strict_hyst, backoff_exempt_project);
if (p) break;
}
}
if (!p) {
p = rsc_work_fetch[0].choose_project_hyst(strict);
p = rsc_work_fetch[0].choose_project_hyst(strict_hyst, backoff_exempt_project);
}
if (log_flags.work_fetch_debug) {

View File

@ -24,8 +24,6 @@
#include <vector>
#include <deque>
extern bool use_hyst_fetch;
#define RSC_TYPE_ANY -1
#define RSC_TYPE_CPU 0
@ -244,7 +242,7 @@ struct RSC_WORK_FETCH {
void rr_init();
void update_stats(double sim_now, double dt, double buf_end);
void update_busy_time(double dur, double nused);
PROJECT* choose_project_hyst(bool strict);
PROJECT* choose_project_hyst(bool strict_hyst, PROJECT*);
void supplement(PROJECT*);
RSC_PROJECT_WORK_FETCH& project_state(PROJECT*);
void print_state(const char*);
@ -289,7 +287,7 @@ struct PROJECT_WORK_FETCH {
// global work fetch state
//
struct WORK_FETCH {
PROJECT* choose_project(bool strict);
PROJECT* choose_project(bool strict_hyst, PROJECT*);
// Find a project to ask for work.
// If strict is false consider requesting work
// even if buffer is above min level