- client: major overhaul of work-fetch logic based on suggestions

by Jacob Klein.
    The new policy is roughly as follows:
    - find the highest-priority project P that is allowed
        to fetch work for a resource below buf_min
    - Ask P for work for all resources R below buf_max
        for which it's allowed to fetch work,
        unless there's a higher-priority project allowed
        to request work for R.
    If we're going to do an RPC to P for reasons other than work fetch,
    the policy is:
    - for each resource R for which P is the highest-priority project
        allowed to fetch work, and R is below buf_max,
        request work for R.
This commit is contained in:
David Anderson 2013-04-02 12:32:28 -07:00
parent 1b9ad86694
commit f6a61fe801
4 changed files with 255 additions and 194 deletions

View File

@ -491,7 +491,7 @@ bool CLIENT_STATE::scheduler_rpc_poll() {
&& !(config.fetch_minimal_work && had_or_requested_work)
) {
p = work_fetch.choose_project(true, NULL);
p = work_fetch.choose_project();
if (p) {
if (actively_uploading(p)) {
if (log_flags.work_fetch_debug) {

View File

@ -525,7 +525,7 @@ bool CLIENT_STATE::scheduler_rpc_poll() {
p = find_project_with_overdue_results(false);
if (p) {
//printf("doing RPC to %s to report results\n", p->project_name);
msg_printf(p, MSG_INFO, "doing RPC to report results");
work_fetch.piggyback_work_request(p);
action = simulate_rpc(p);
break;
@ -546,6 +546,7 @@ bool CLIENT_STATE::scheduler_rpc_poll() {
p = work_fetch.choose_project(true, NULL);
if (p) {
msg_printf(p, MSG_INFO, "doing RPC to get work");
action = simulate_rpc(p);
break;
}
@ -870,7 +871,7 @@ void show_resource(int rsc_type) {
found = true;
fprintf(html_out,
"<table>\n"
"<tr><th>#devs</th><th>Job name</th><th>GFLOPs left</th>%s</tr>\n",
"<tr><th>#devs</th><th>Job name (* = high priority)</th><th>GFLOPs left</th>%s</tr>\n",
rsc_type?"<th>GPU</th>":""
);
}
@ -882,7 +883,7 @@ void show_resource(int rsc_type) {
fprintf(html_out, "<tr><td>%.2f</td><td bgcolor=%s><font color=#ffffff>%s%s</font></td><td>%.0f</td>%s</tr>\n",
ninst,
colors[p->index%NCOLORS],
rp->rr_sim_misses_deadline?"*":"",
rp->edf_scheduled?"*":"",
rp->name,
rp->sim_flops_left/1e9,
buf
@ -1006,8 +1007,17 @@ void set_initial_rec() {
}
}
static bool compare_names(PROJECT* p1, PROJECT* p2) {
return (strcmp(p1->project_name, p2->project_name) < 0);
}
void write_recs() {
fprintf(rec_file, "%f ", gstate.now);
std::sort(
gstate.projects.begin(),
gstate.projects.end(),
compare_names
);
for (unsigned int i=0; i<gstate.projects.size(); i++) {
PROJECT* p = gstate.projects[i];
fprintf(rec_file, "%f ", p->pwf.rec);

View File

@ -33,6 +33,9 @@
#include "work_fetch.h"
//#define DEBUG(x) x
#define DEBUG(X)
using std::vector;
RSC_WORK_FETCH rsc_work_fetch[MAX_RSC];
@ -180,142 +183,6 @@ static bool wacky_dcf(PROJECT* p) {
return (dcf < 0.02 || dcf > 80.0);
}
// If this resource is below min buffer level,
// return the highest-priority project that may have jobs for it.
//
// It the resource has instanced starved because of exclusions,
// return the highest-priority project that may have jobs
// and doesn't exclude those instances.
//
// Only choose a project if the buffer is below min level;
// if strict_hyst is true, relax this to max level
//
// If backoff_exempt_project is non-NULL,
// don't enforce resource backoffs for that project;
// this is for when we're going to do a scheduler RPC anyway
// and we're deciding whether to piggyback a work request
//
PROJECT* RSC_WORK_FETCH::choose_project_hyst(
bool strict_hyst,
PROJECT* backoff_exempt_project
) {
PROJECT* pbest = NULL;
bool buffer_low = true;
if (strict_hyst) {
if (saturated_time > gstate.work_buf_min()) buffer_low = false;
} else {
if (saturated_time > gstate.work_buf_total()) buffer_low = false;
}
if (log_flags.work_fetch_debug) {
msg_printf(0, MSG_INFO,
"[work_fetch] choose_project() for %s: buffer_low: %s; sim_excluded_instances %d\n",
rsc_name(rsc_type), buffer_low?"yes":"no", sim_excluded_instances
);
}
if (!buffer_low && !sim_excluded_instances) return NULL;
for (unsigned i=0; i<gstate.projects.size(); i++) {
PROJECT* p = gstate.projects[i];
// check whether we can fetch work of any type from this project
//
if (p->pwf.cant_fetch_work_reason) {
//msg_printf(p, MSG_INFO, "skip: cfwr %d", p->pwf.cant_fetch_work_reason);
continue;
}
// see whether work fetch for this resource is banned
// by prefs, config, project, or acct mgr
//
if (dont_fetch(p, rsc_type)) {
//msg_printf(p, MSG_INFO, "skip: dont_fetch");
continue;
}
// check backoff
//
if (p != backoff_exempt_project) {
if (project_state(p).backoff_time > gstate.now) {
//msg_printf(p, MSG_INFO, "skip: backoff");
continue;
}
}
// if project has zero resource share,
// only fetch work if a device is idle
//
if (p->resource_share == 0 && nidle_now == 0) {
//msg_printf(p, MSG_INFO, "skip: zero share");
continue;
}
// if project has excluded GPUs of this type,
// we need to avoid fetching work just because there's an idle instance
// or a shortfall;
// fetching work might not alleviate either of these,
// and we'd end up fetching unbounded work.
// At the same time, we want to respect work buf params if possible.
//
// Current policy:
// don't fetch work if remaining time of this project's jobs
// exceeds work_buf_min * (#usable instances / #instances)
//
// TODO: THIS IS FAIRLY CRUDE. Making it smarter would require
// computing shortfall etc. on a per-project basis
//
int nexcl = p->rsc_pwf[rsc_type].ncoprocs_excluded;
if (rsc_type && nexcl) {
int n_not_excluded = ninstances - nexcl;
if (p->rsc_pwf[rsc_type].queue_est > (gstate.work_buf_min() * n_not_excluded)/ninstances) {
//msg_printf(p, MSG_INFO, "skip: too much work");
continue;
}
}
RSC_PROJECT_WORK_FETCH& rpwf = project_state(p);
if (rpwf.anon_skip) {
//msg_printf(p, MSG_INFO, "skip: anon");
continue;
}
// if we're sending work only because of exclusion starvation,
// make sure this project can use the starved instances
//
if (!buffer_low) {
if ((sim_excluded_instances & rpwf.non_excluded_instances) == 0) {
//msg_printf(p, MSG_INFO, "skip: excl");
continue;
}
}
if (pbest) {
if (pbest->sched_priority > p->sched_priority) {
//msg_printf(p, MSG_INFO, "skip: prio");
continue;
}
}
pbest = p;
}
if (!pbest) {
if (log_flags.work_fetch_debug) {
msg_printf(0, MSG_INFO,
"[work_fetch] no eligible project for %s",
rsc_name(rsc_type)
);
}
return NULL;
}
work_fetch.clear_request();
if (buffer_low) {
work_fetch.set_all_requests_hyst(pbest, rsc_type);
} else {
set_request_excluded(pbest);
}
return pbest;
}
// request this project's share of shortfall and instances.
// don't request anything if project is backed off.
//
@ -483,6 +350,7 @@ void WORK_FETCH::rr_init() {
}
}
#if 0
// if the given project is highest-priority among the projects
// eligible for the resource, set request fields
//
@ -546,6 +414,7 @@ void WORK_FETCH::set_all_requests(PROJECT* p) {
}
}
}
#endif
void WORK_FETCH::print_state() {
msg_printf(0, MSG_INFO, "[work_fetch] ------- start work fetch state -------");
@ -590,7 +459,7 @@ void WORK_FETCH::clear_request() {
// decide if we should piggy-back a work fetch request.
//
void WORK_FETCH::piggyback_work_request(PROJECT* p) {
clear_request();
DEBUG(msg_printf(p, MSG_INFO, "piggyback_work_request()");)
if (config.fetch_minimal_work && gstate.had_or_requested_work) return;
if (p->dont_request_more_work) return;
if (p->non_cpu_intensive) {
@ -601,29 +470,53 @@ void WORK_FETCH::piggyback_work_request(PROJECT* p) {
}
// if project was updated from manager and config says so,
// always fetch work if needed
// fetch work for a resource even if there are higher-prio projects
// able to fetch it
//
bool check_higher_priority_projects = true;
if (p->sched_rpc_pending && config.fetch_on_update) {
set_all_requests_hyst(p, -1);
check_higher_priority_projects = false;
return;
}
compute_cant_fetch_work_reason();
PROJECT* bestp = choose_project(false, p);
if (p != bestp) {
if (p->pwf.cant_fetch_work_reason == 0) {
if (bestp) {
p->pwf.cant_fetch_work_reason = CANT_FETCH_WORK_NOT_HIGHEST_PRIORITY;
if (log_flags.work_fetch_debug) {
msg_printf(0, MSG_INFO,
"[work_fetch] not piggybacking work req: %s has higher priority",
bestp->get_project_name()
);
}
} else {
p->pwf.cant_fetch_work_reason = CANT_FETCH_WORK_DONT_NEED;
}
setup();
// For each resource, scan projects in decreasing priority,
// seeing if there's one that's higher-priority than this
// able to fetch work for the resource.
// If not, and the resource needs topping off, do so
//
for (int i=0; i<coprocs.n_rsc; i++) {
DEBUG(msg_printf(p, MSG_INFO, "piggyback: resource %s", rsc_name(i));)
if (i && !gpus_usable) continue;
RSC_WORK_FETCH& rwf = rsc_work_fetch[i];
if (!rwf.can_fetch(p)) {
DEBUG(msg_printf(p, MSG_INFO, "piggyback: can't fetch %s", rsc_name(i));)
continue;
}
bool buffer_low = (rwf.saturated_time < gstate.work_buf_total());
if (!buffer_low && !rwf.uses_starved_excluded_instances(p)) {
DEBUG(msg_printf(p, MSG_INFO, "piggyback: don't need %s", rsc_name(i));)
continue;
}
if (check_higher_priority_projects) {
PROJECT* p2 = NULL;
for (unsigned int j=0; j<gstate.projects.size(); j++) {
p2 = gstate.projects[j];
if (p2 == p) break;
if (rwf.can_fetch(p2)) {
DEBUG(msg_printf(p, MSG_INFO, "piggyback: better proj %s", p2->project_name);)
break;
}
}
if (p != p2) continue;
}
DEBUG(msg_printf(p, MSG_INFO, "piggyback: requesting %s", rsc_name(i));)
if (buffer_low) {
rwf.set_request(p);
} else {
rwf.set_request_excluded(p);
}
clear_request();
}
}
@ -643,13 +536,117 @@ PROJECT* WORK_FETCH::non_cpu_intensive_project_needing_work() {
return 0;
}
// choose a project to fetch work from,
// and set the request fields of resource objects.
static bool higher_priority(PROJECT *p1, PROJECT *p2) {
return (p1->sched_priority > p2->sched_priority);
}
// a variety of checks for whether we should ask this project
// for work of this type
//
PROJECT* WORK_FETCH::choose_project(
bool strict_hyst,
PROJECT* backoff_exempt_project
) {
bool RSC_WORK_FETCH::can_fetch(PROJECT *p) {
// see whether work fetch for this resource is banned
// by prefs, config, project, or acct mgr
//
if (dont_fetch(p, rsc_type)) {
DEBUG(msg_printf(p, MSG_INFO, "skip: dont_fetch");)
return false;
}
// check backoff
//
if (project_state(p).backoff_time > gstate.now) {
DEBUG(msg_printf(p, MSG_INFO, "skip: backoff");)
return false;
}
// if project has zero resource share,
// only fetch work if a device is idle
//
if (p->resource_share == 0 && nidle_now == 0) {
DEBUG(msg_printf(p, MSG_INFO, "skip: zero share");)
return false;
}
// if project has excluded GPUs of this type,
// we need to avoid fetching work just because there's an idle instance
// or a shortfall;
// fetching work might not alleviate either of these,
// and we'd end up fetching unbounded work.
// At the same time, we want to respect work buf params if possible.
//
// Current policy:
// don't fetch work if remaining time of this project's jobs
// exceeds work_buf_min * (#usable instances / #instances)
//
// TODO: THIS IS FAIRLY CRUDE. Making it smarter would require
// computing shortfall etc. on a per-project basis
//
int nexcl = p->rsc_pwf[rsc_type].ncoprocs_excluded;
if (rsc_type && nexcl) {
int n_not_excluded = ninstances - nexcl;
if (p->rsc_pwf[rsc_type].queue_est > (gstate.work_buf_min() * n_not_excluded)/ninstances) {
DEBUG(msg_printf(p, MSG_INFO, "skip: too much work");)
return false;
}
}
RSC_PROJECT_WORK_FETCH& rpwf = project_state(p);
if (rpwf.anon_skip) {
DEBUG(msg_printf(p, MSG_INFO, "skip: anon");)
return false;
}
return true;
}
// return true if there is exclusion starvation
// and this project can use the starved instances
//
bool RSC_WORK_FETCH::uses_starved_excluded_instances(PROJECT* p) {
RSC_PROJECT_WORK_FETCH& rpwf = project_state(p);
if (!sim_excluded_instances) return false;
if ((sim_excluded_instances & rpwf.non_excluded_instances) == 0) {
DEBUG(msg_printf(p, MSG_INFO, "skip: excl");)
return false;
}
return true;
}
// setup for choose_project() and piggyback()
//
void WORK_FETCH::setup() {
gstate.compute_nuploading_results();
rr_simulation();
compute_shares();
project_priority_init(true);
clear_request();
// Decrement the priority of projects that have work queued.
// Specifically, subtract
// (FLOPs queued for P)/(FLOPs of max queue)
// which will generally be between 0 and 1.
// This is a little arbitrary but I can't think of anything better.
//
double max_queued_flops = gstate.work_buf_total()*total_peak_flops();
for (unsigned int i=0; i<gstate.results.size(); i++) {
RESULT* rp = gstate.results[i];
PROJECT* p = rp->project;
p->sched_priority -= rp->estimated_flops_remaining()/max_queued_flops;
}
std::sort(
gstate.projects.begin(),
gstate.projects.end(),
higher_priority
);
}
// Choose a project to fetch work from,
// and set the request fields of resource objects.
// If "strict_hyst" is set, require that some resource be below min buf;
// otherwise require below max buf
// (or
//
PROJECT* WORK_FETCH::choose_project() {
PROJECT* p;
if (log_flags.work_fetch_debug) {
@ -659,34 +656,86 @@ PROJECT* WORK_FETCH::choose_project(
p = non_cpu_intensive_project_needing_work();
if (p) return p;
gstate.compute_nuploading_results();
setup();
rr_simulation();
compute_shares();
project_priority_init(true);
for (int i=0; i<coprocs.n_rsc; i++) {
rsc_work_fetch[i].found_project = NULL;
}
// Decrement the priority of projects that have a lot of work queued.
// Specifically, subtract
// (FLOPs queued for P)/(FLOPs of max queue)
// which will generally be between 0 and 1.
// This is a little arbitrary but I can't think of anything better.
// scan projects in order of decreasing priority
//
double max_queued_flops = gstate.work_buf_total()*total_peak_flops();
for (unsigned int i=0; i<gstate.results.size(); i++) {
RESULT* rp = gstate.results[i];
p = rp->project;
p->sched_priority -= rp->estimated_flops_remaining()/max_queued_flops;
}
p = 0;
if (gpus_usable) {
for (int i=1; i<coprocs.n_rsc; i++) {
p = rsc_work_fetch[i].choose_project_hyst(strict_hyst, backoff_exempt_project);
if (p) break;
for (unsigned int j=0; j<gstate.projects.size(); j++) {
p = gstate.projects[j];
DEBUG(msg_printf(p, MSG_INFO, "scanning");)
if (p->pwf.cant_fetch_work_reason) {
DEBUG(msg_printf(p, MSG_INFO, "skip: cfwr %d", p->pwf.cant_fetch_work_reason);)
continue;
}
// For each resource type:
// - See if we can ask this project for work of that type;
// if so set a flag so that lower-priority projects
// won't request it
// - If so, see if work is needed for this type;
// if so, set "found"
//
int rsc_index = -1;
for (int i=0; i<coprocs.n_rsc; i++) {
if (i && !gpus_usable) continue;
RSC_WORK_FETCH& rwf = rsc_work_fetch[i];
if (rwf.can_fetch(p)) {
if (!rwf.found_project) {
rwf.found_project = p;
}
DEBUG(msg_printf(p, MSG_INFO, "can fetch %s", rsc_name(i));)
} else {
DEBUG(msg_printf(p, MSG_INFO, "can't fetch %s", rsc_name(i));)
continue;
}
bool buffer_low = (rwf.saturated_time < gstate.work_buf_min());
if (buffer_low || rwf.uses_starved_excluded_instances(p)) {
DEBUG(msg_printf(p, MSG_INFO, "%s needs work", rsc_name(i));)
rsc_index = i;
break;
}
}
// If rsc_index is nonzero, it's a resource that this project
// can ask for work, and which needs work.
// And this is the highest-priority project having this property.
// Request work from this resource,
// and any others for which this is the highest-priority project
// able to request work
//
if (rsc_index >= 0) {
for (int i=0; i<coprocs.n_rsc; i++) {
if (i && !gpus_usable) continue;
RSC_WORK_FETCH& rwf = rsc_work_fetch[i];
bool buffer_low;
if (i == rsc_index) {
buffer_low = (rwf.saturated_time < gstate.work_buf_min());
} else {
if (rwf.found_project && rwf.found_project != p) {
continue;
}
buffer_low = (rwf.saturated_time < gstate.work_buf_total());
if (!buffer_low && !rwf.uses_starved_excluded_instances(p)) {
continue;
}
if (!rsc_work_fetch[i].can_fetch(p)) {
continue;
}
}
if (buffer_low) {
rwf.set_request(p);
} else {
rwf.set_request_excluded(p);
}
}
break;
} else {
p = NULL;
}
}
if (!p) {
p = rsc_work_fetch[0].choose_project_hyst(strict_hyst, backoff_exempt_project);
}
if (log_flags.work_fetch_debug) {

View File

@ -239,10 +239,12 @@ struct RSC_WORK_FETCH {
this->secs_this_rec_interval = 0;
}
// temp in choose_project()
PROJECT* found_project; // a project able to ask for this work
void rr_init();
void update_stats(double sim_now, double dt, double buf_end);
void update_busy_time(double dur, double nused);
PROJECT* choose_project_hyst(bool strict_hyst, PROJECT*);
void supplement(PROJECT*);
RSC_PROJECT_WORK_FETCH& project_state(PROJECT*);
void print_state(const char*);
@ -250,6 +252,8 @@ struct RSC_WORK_FETCH {
void set_request(PROJECT*);
void set_request_excluded(PROJECT*);
bool may_have_work(PROJECT*);
bool can_fetch(PROJECT*);
bool uses_starved_excluded_instances(PROJECT*);
RSC_WORK_FETCH() {
rsc_type = 0;
ninstances = 0;
@ -287,11 +291,9 @@ struct PROJECT_WORK_FETCH {
// global work fetch state
//
struct WORK_FETCH {
PROJECT* choose_project(bool strict_hyst, PROJECT*);
void setup();
PROJECT* choose_project();
// Find a project to ask for work.
// If strict is false consider requesting work
// even if buffer is above min level
// or project is backed off for a resource type
PROJECT* non_cpu_intensive_project_needing_work();
void piggyback_work_request(PROJECT*);
// we're going to contact this project anyway;