- client: remove code related to debt-based scheduling

svn path=/trunk/boinc/; revision=24163
This commit is contained in:
David Anderson 2011-09-12 17:57:31 +00:00
parent be1d379f6a
commit 9856f795ed
7 changed files with 56 additions and 654 deletions

View File

@ -5671,11 +5671,11 @@ David 6 Sept 2011
submit_app.php
David 7 Sept 2011
- scheduler: fix crashing bug
- scheduler: fix crashing bug
sched/
sched_version.cpp
sched_assign.cpp
sched/
sched_version.cpp
sched_assign.cpp
Rom 7 Sept 2011
- Tag for 6.13.2 release, all platforms
@ -5791,10 +5791,10 @@ David 10 Sept 2011
app_ipc.cpp,h
David 10 Sept 2011
- client: fix error in parsing of XML comments
- client: fix error in parsing of XML comments
lib/
parse.cpp
lib/
parse.cpp
David 11 Sept 2011
- client: finish next-to-last checkin
@ -5805,22 +5805,22 @@ David 11 Sept 2011
app.h
David 11 Sept 2011
- manager, simple view:
- edit label text
- increase opacity of main area;
otherwise some text is hard to read
- change borders from blue to light gray.
In general shouldn't use primary colors
- display credit as integer
- reduce size of project name;
otherwise long names don't fit (on Win)
- don't use "(in slot x)".
"Slot" is not part of the vocabulary here.
- manager, simple view:
- edit label text
- increase opacity of main area;
otherwise some text is hard to read
- change borders from blue to light gray.
In general shouldn't use primary colors
- display credit as integer
- reduce size of project name;
otherwise long names don't fit (on Win)
- don't use "(in slot x)".
"Slot" is not part of the vocabulary here.
clientgui/
sg_PanelBase.cpp
sg_ProjectPanel.cpp
sg_TaskPanel.cpp
clientgui/
sg_PanelBase.cpp
sg_ProjectPanel.cpp
sg_TaskPanel.cpp
David 11 Sept 2011
- API: change boinc_get_opencl_ids() to use APP_INIT_DATA
@ -5853,3 +5853,12 @@ David 12 Sept 2011
client_types.cpp,h
rr_sim.cpp,h
cpu_sched.cpp
David 12 Sept 2011
- client: remove code related to debt-based scheduling
client/
work_fetch.cpp,h
client_types.cpp,h
sim.cpp
cpu_sched.cpp

View File

@ -242,13 +242,8 @@ int PROJECT::parse_state(XML_PARSER& xp) {
if (xp.parse_bool("dont_request_more_work", dont_request_more_work)) continue;
if (xp.parse_bool("detach_when_done", detach_when_done)) continue;
if (xp.parse_bool("ended", ended)) continue;
//#ifdef USE_REC
if (xp.parse_double("rec", pwf.rec)) continue;
if (xp.parse_double("rec_time", pwf.rec_time)) continue;
//#else
if (xp.parse_double("short_term_debt", rsc_pwf[0].short_term_debt)) continue;
if (xp.parse_double("long_term_debt", rsc_pwf[0].long_term_debt)) continue;
//#endif
if (xp.parse_double("cpu_backoff_interval", rsc_pwf[0].backoff_interval)) continue;
if (xp.parse_double("cpu_backoff_time", rsc_pwf[0].backoff_time)) {
if (rsc_pwf[0].backoff_time > gstate.now + 28*SECONDS_PER_DAY) {
@ -256,20 +251,6 @@ int PROJECT::parse_state(XML_PARSER& xp) {
}
continue;
}
//#ifndef USE_REC
if (xp.match_tag("rsc_short_term_debt")) {
if (parse_rsc_param(xp, "/rsc_short_term_debt", rt, x)) {
rsc_pwf[rt].short_term_debt = x;
}
continue;
}
if (xp.match_tag("rsc_long_term_debt")) {
if (parse_rsc_param(xp, "/rsc_long_term_debt", rt, x)) {
rsc_pwf[rt].long_term_debt = x;
}
continue;
}
//#endif
if (xp.match_tag("rsc_backoff_interval")) {
if (parse_rsc_param(xp, "/rsc_backoff_interval", rt, x)) {
rsc_pwf[rt].backoff_interval = x;
@ -377,10 +358,8 @@ int PROJECT::write_state(MIOFILE& out, bool gui_rpc) {
" <master_fetch_failures>%d</master_fetch_failures>\n"
" <min_rpc_time>%f</min_rpc_time>\n"
" <next_rpc_time>%f</next_rpc_time>\n"
//#ifdef USE_REC
" <rec>%f</rec>\n"
" <rec_time>%f</rec_time>\n"
//#endif
" <resource_share>%f</resource_share>\n"
" <duration_correction_factor>%f</duration_correction_factor>\n"
@ -411,10 +390,8 @@ int PROJECT::write_state(MIOFILE& out, bool gui_rpc) {
master_fetch_failures,
min_rpc_time,
next_rpc_time,
//#ifdef USE_REC
pwf.rec,
pwf.rec_time,
//#else
resource_share,
duration_correction_factor,
sched_rpc_pending,
@ -436,16 +413,6 @@ int PROJECT::write_state(MIOFILE& out, bool gui_rpc) {
);
for (int j=0; j<coprocs.n_rsc; j++) {
out.printf(
//#ifndef USE_REC
" <rsc_short_term_debt>\n"
" <name>%s</name>\n"
" <value>%f</value>\n"
" </rsc_short_term_debt>\n"
" <rsc_long_term_debt>\n"
" <name>%s</name>\n"
" <value>%f</value>\n"
" </rsc_long_term_debt>\n"
//#endif
" <rsc_backoff_time>\n"
" <name>%s</name>\n"
" <value>%f</value>\n"
@ -454,10 +421,6 @@ int PROJECT::write_state(MIOFILE& out, bool gui_rpc) {
" <name>%s</name>\n"
" <value>%f</value>\n"
" </rsc_backoff_interval>\n",
//#ifndef USE_REC
rsc_name(j), rsc_pwf[j].short_term_debt,
rsc_name(j), rsc_pwf[j].long_term_debt,
//#endif
rsc_name(j), rsc_pwf[j].backoff_interval,
rsc_name(j), rsc_pwf[j].backoff_time
);

View File

@ -451,11 +451,6 @@ struct PROJECT : PROJ_AM {
inline int deadlines_missed(int rsc_type) {
return rsc_pwf[rsc_type].deadlines_missed;
}
//#ifndef USE_REC
inline double anticipated_debt(int rsc_type) {
return rsc_pwf[rsc_type].anticipated_debt;
}
//#endif
void get_task_durs(double& not_started_dur, double& in_progress_dur);
int nresults_returned;

View File

@ -340,31 +340,14 @@ RESULT* CLIENT_STATE::largest_debt_project_best_result() {
PROJECT* p = projects[i];
if (!p->next_runnable_result) continue;
if (p->non_cpu_intensive) continue;
if (use_rec) {
if (first || project_priority(p)> best_debt) {
first = false;
best_project = p;
best_debt = project_priority(p);
}
} else {
if (first || p->rsc_pwf[0].anticipated_debt > best_debt) {
first = false;
best_project = p;
best_debt = p->rsc_pwf[0].anticipated_debt;
}
if (first || project_priority(p)> best_debt) {
first = false;
best_project = p;
best_debt = project_priority(p);
}
}
if (!best_project) return NULL;
if (!use_rec) {
if (log_flags.cpu_sched_debug) {
msg_printf(best_project, MSG_INFO,
"[cpu_sched_debug] highest debt: %f %s",
best_project->rsc_pwf[0].anticipated_debt,
best_project->next_runnable_result->name
);
}
}
RESULT* rp = best_project->next_runnable_result;
best_project->next_runnable_result = 0;
return rp;
@ -392,11 +375,7 @@ RESULT* first_coproc_result(int rsc_type) {
if (!rp->runnable()) continue;
if (rp->non_cpu_intensive()) continue;
if (rp->already_selected) continue;
if (use_rec) {
std = project_priority(rp->project);
} else {
std = rp->project->anticipated_debt(rsc_type);
}
std = project_priority(rp->project);
if (!best) {
best = rp;
best_std = std;
@ -683,14 +662,7 @@ void CLIENT_STATE::adjust_debts() {
work_fetch.accumulate_inst_sec(atp, elapsed_time);
}
if (use_rec) {
update_rec();
} else {
for (int j=0; j<coprocs.n_rsc; j++) {
rsc_work_fetch[j].update_long_term_debts();
rsc_work_fetch[j].update_short_term_debts();
}
}
update_rec();
reset_debt_accounting();
}
@ -757,21 +729,12 @@ static bool schedule_if_possible(
if (log_flags.cpu_sched_debug) {
msg_printf(rp->project, MSG_INFO,
"[cpu_sched_debug] scheduling %s (%s) (%f)", rp->name, description,
use_rec?project_priority(rp->project):0
project_priority(rp->project)
);
}
proc_rsc.schedule(rp, atp);
if (use_rec) {
adjust_rec_sched(rp);
} else {
// project STD at end of time slice
//
double dt = gstate.global_prefs.cpu_scheduling_period();
for (int i=0; i<coprocs.n_rsc; i++) {
rp->project->rsc_pwf[i].anticipated_debt -= dt*rp->avp->avg_ncpus/rsc_work_fetch[i].ninstances;
}
}
adjust_rec_sched(rp);
return true;
}
@ -893,9 +856,7 @@ void CLIENT_STATE::make_run_list(vector<RESULT*>& run_list) {
// set temporary variables
//
if (use_rec) {
project_priority_init();
}
project_priority_init();
for (i=0; i<results.size(); i++) {
rp = results[i];
rp->already_selected = false;
@ -904,11 +865,6 @@ void CLIENT_STATE::make_run_list(vector<RESULT*>& run_list) {
for (i=0; i<projects.size(); i++) {
p = projects[i];
p->next_runnable_result = NULL;
if (!use_rec) {
for (int j=0; j<coprocs.n_rsc; j++) {
p->rsc_pwf[j].anticipated_debt = p->rsc_pwf[j].short_term_debt;
}
}
for (int j=0; j<coprocs.n_rsc; j++) {
p->rsc_pwf[j].deadlines_missed_copy = p->rsc_pwf[j].deadlines_missed;
}

View File

@ -59,8 +59,6 @@
// simulate use of EDF sim by scheduler
// [--cpu_sched_rr_only]
// use only RR scheduling
// [--use_rec]
// client scheduling is based on recent estimated credit (REC)
// [--use_hyst_fetch]
// client work fetch uses hysteresis
// [--rec_half_life X]
@ -127,7 +125,6 @@ void usage(char* prog) {
"[--delta X]\n"
"[--server_uses_workload]\n"
"[--cpu_sched_rr_only]\n"
"[--use_rec]\n"
"[--use_hyst_fetch]\n"
"[--rec_half_life X]\n",
prog
@ -919,8 +916,6 @@ void html_end() {
fclose(html_out);
}
//#ifdef USE_REC
void set_initial_rec() {
unsigned int i;
double sum=0;
@ -969,84 +964,6 @@ void make_graph(const char* title, const char* fname, int field) {
system(cmd);
}
//#else
// lines in the debt file have these fields:
// time
// per project:
// overall LTD
// CPU LTD
// CPU STD
// [NVIDIA LTD]
// [NVIDIA STD]
// [ATI LTD]
// [ATI STD]
//
void write_debts() {
fprintf(debt_file, "%f ", gstate.now);
for (unsigned int i=0; i<gstate.projects.size(); i++) {
PROJECT* p = gstate.projects[i];
fprintf(debt_file, "%f %f %f ",
p->pwf.overall_debt,
p->rsc_pwf[0].long_term_debt,
p->rsc_pwf[0].short_term_debt
);
for (int j=1; j<coprocs.n_rsc; j++) {
fprintf(debt_file, "%f %f ",
p->rsc_pwf[j].long_term_debt,
p->rsc_pwf[j].short_term_debt
);
}
}
fprintf(debt_file, "\n");
}
// generate a bunch of debt graphs
//
void make_graph(const char* title, const char* fname, int field, int nfields) {
char gp_fname[256], cmd[256], png_fname[256];
sprintf(gp_fname, "%s%s.gp", outfile_prefix, fname);
FILE* f = fopen(gp_fname, "w");
fprintf(f,
"set terminal png small size 1024, 768\n"
"set title \"%s\"\n"
"plot ",
title
);
for (unsigned int i=0; i<gstate.projects.size(); i++) {
PROJECT* p = gstate.projects[i];
fprintf(f, "\"%sdebt.dat\" using 1:%d title \"%s\" with lines%s",
outfile_prefix, 2+field+i*nfields, p->project_name,
(i==gstate.projects.size()-1)?"\n":", \\\n"
);
}
fclose(f);
sprintf(png_fname, "%s%s.png", outfile_prefix, fname);
sprintf(cmd, "gnuplot < %s > %s", gp_fname, png_fname);
fprintf(index_file, "<br><a href=%s.png>Graph of %s</a>\n", fname, title);
system(cmd);
}
void debt_graphs() {
int nfields = 3 + (coprocs.have_nvidia()?2:0) + (coprocs.have_ati()?2:0);
make_graph("Overall debt", "debt_overall", 0, nfields);
make_graph("CPU LTD", "debt_cpu_ltd", 1, nfields);
make_graph("CPU STD", "debt_cpu_std", 2, nfields);
if (coprocs.have_nvidia()) {
make_graph("NVIDIA LTD", "debt_nvidia_ltd", 3, nfields);
make_graph("NVIDIA STD", "debt_nvidia_std", 4, nfields);
}
if (coprocs.have_ati()) {
int off = coprocs.have_nvidia()?2:0;
make_graph("ATI LTD", "debt_ati_ltd", 3+off, nfields);
make_graph("ATI STD", "debt_ati_std", 4+off, nfields);
}
}
//#endif
static void write_inputs() {
char buf[256];
sprintf(buf, "%s/%s", outfile_prefix, INPUTS_FNAME);
@ -1054,18 +971,14 @@ static void write_inputs() {
fprintf(f,
"Round-robin only: %s\n"
"scheduler EDF sim: %s\n"
"hysteresis work fetch: %s\n"
"REC-based scheduling: %s\n",
"hysteresis work fetch: %s\n",
cpu_sched_rr_only?"yes":"no",
server_uses_workload?"yes":"no",
use_hyst_fetch?"yes":"no",
use_rec?"yes":"no"
use_hyst_fetch?"yes":"no"
);
fprintf(f,
"REC half-life: %f\n", config.rec_half_life
);
if (use_rec) {
fprintf(f,
"REC half-life: %f\n", config.rec_half_life
);
}
fprintf(f,
"Simulation duration: %f\nTime step: %f\n",
duration, delta
@ -1097,20 +1010,16 @@ void simulate() {
"Scheduling policies\n"
" Round-robin only: %s\n"
" Scheduler EDF simulation: %s\n"
" Hysteresis work fetch: %s\n"
" REC-based scheduling: %s\n",
" Hysteresis work fetch: %s\n",
gstate.work_buf_min(), gstate.work_buf_total(),
gstate.global_prefs.cpu_scheduling_period(),
cpu_sched_rr_only?"yes":"no",
server_uses_workload?"yes":"no",
use_hyst_fetch?"yes":"no",
use_rec?"yes":"no"
use_hyst_fetch?"yes":"no"
);
fprintf(summary_file,
" REC half-life: %f\n", config.rec_half_life
);
if (use_rec) {
fprintf(summary_file,
" REC half-life: %f\n", config.rec_half_life
);
}
fprintf(summary_file,
"Simulation parameters\n"
" time step %f, duration %f\n"
@ -1155,11 +1064,7 @@ void simulate() {
}
}
html_rec();
if (use_rec) {
write_recs();
} else {
write_debts();
}
gstate.now += delta;
if (gstate.now > start + duration) break;
}
@ -1372,9 +1277,7 @@ void do_client_simulation() {
gstate.set_ncpus();
work_fetch.init();
if (use_rec) {
set_initial_rec();
}
gstate.request_work_fetch("init");
simulate();
@ -1406,11 +1309,7 @@ if (use_rec) {
);
print_project_results(summary_file);
if (use_rec) {
make_graph("REC", "rec", 0);
} else {
debt_graphs();
}
}
char* next_arg(int argc, char** argv, int& i) {
@ -1442,8 +1341,6 @@ int main(int argc, char** argv) {
server_uses_workload = true;
} else if (!strcmp(opt, "--cpu_sched_rr_only")) {
cpu_sched_rr_only = true;
} else if (!strcmp(opt, "--use_rec")) {
use_rec = true;
} else if (!strcmp(opt, "--use_hyst_fetch")) {
use_hyst_fetch = true;
} else if (!strcmp(opt, "--rec_half_life")) {

View File

@ -33,7 +33,6 @@
using std::vector;
bool use_rec = true;
bool use_hyst_fetch = true;
RSC_WORK_FETCH rsc_work_fetch[MAX_RSC];
@ -141,7 +140,6 @@ bool RSC_PROJECT_WORK_FETCH::compute_may_have_work(PROJECT* p, int rsc_type) {
void RSC_PROJECT_WORK_FETCH::rr_init(PROJECT* p, int rsc_type) {
may_have_work = compute_may_have_work(p, rsc_type);
runnable_share = 0;
fetchable_share = 0;
has_runnable_jobs = false;
sim_nused = 0;
@ -149,49 +147,6 @@ void RSC_PROJECT_WORK_FETCH::rr_init(PROJECT* p, int rsc_type) {
deadlines_missed = 0;
}
//#ifndef USE_REC
// see if the project's debt is beyond what would normally happen;
// if so we conclude that it had a long job that ran in EDF mode;
// avoid asking it for work unless absolutely necessary.
//
bool RSC_PROJECT_WORK_FETCH::overworked() {
double x = gstate.work_buf_total() + gstate.global_prefs.cpu_scheduling_period();
if (x < 86400) x = 86400;
return (long_term_debt < -x);
}
// should this project be accumulating LTD for this resource?
//
bool RSC_PROJECT_WORK_FETCH::debt_eligible(PROJECT* p, RSC_WORK_FETCH& rwf) {
if (p->non_cpu_intensive) return false;
if (p->suspended_via_gui) return false;
if (p->some_result_suspended()) return false;
if (has_runnable_jobs) return true;
// must precede the done_request_more_work check
if (p->dont_request_more_work) return false;
if (backoff_time > gstate.now) return false;
if (dont_fetch(p, rwf.rsc_type)) return false;
// NOTE: it's critical that all conditions that might prevent
// us from asking the project for work of this type
// be included in the above list.
// Otherwise we might get in a state where debt accumulates,
// pushing other projects into overworked state
// The last time we asked for work we didn't get any,
// but it's been a while since we asked.
// In this case, accumulate debt until we reach (around) zero, then stop.
//
if (backoff_interval == WF_MAX_BACKOFF_INTERVAL) {
if (long_term_debt > -DEBT_ADJUST_PERIOD) {
return false;
}
}
if (p->min_rpc_time > gstate.now) return false;
return true;
}
//#endif
void RSC_PROJECT_WORK_FETCH::backoff(PROJECT* p, const char* name) {
if (backoff_interval) {
backoff_interval *= 2;
@ -225,7 +180,6 @@ void RSC_WORK_FETCH::rr_init() {
nidle_now = 0;
sim_nused = 0;
total_fetchable_share = 0;
total_runnable_share = 0;
deadline_missed_instances = 0;
saturated_time = 0;
pending.clear();
@ -271,15 +225,9 @@ PROJECT* RSC_WORK_FETCH::choose_project_hyst() {
RSC_PROJECT_WORK_FETCH& rpwf = project_state(p);
if (rpwf.anon_skip) continue;
if (pbest) {
if (use_rec) {
if (project_priority(pbest) > project_priority(p)) {
continue;
}
} else {
if (pbest->pwf.overall_debt > p->pwf.overall_debt) {
continue;
}
}
}
pbest = p;
}
@ -318,9 +266,6 @@ PROJECT* RSC_WORK_FETCH::choose_project(int criterion) {
if (rpwf.anon_skip) continue;
switch (criterion) {
case FETCH_IF_MINOR_SHORTFALL:
if (!use_rec) {
if (rpwf.overworked()) continue;
}
if (wacky_dcf(p)) continue;
if (!p->resource_share) continue;
break;
@ -329,13 +274,8 @@ if (!use_rec) {
if (!p->resource_share) continue;
break;
case FETCH_IF_PROJECT_STARVED:
if (!use_rec) {
if (rpwf.overworked()) continue;
if (rpwf.nused_total >= ninstances*rpwf.fetchable_share) continue;
} else {
if (project_priority(p) < 0) continue;
if (rpwf.nused_total >= ninstances) continue;
}
if (!p->resource_share) continue;
break;
}
@ -344,15 +284,9 @@ if (!use_rec) {
if (!p->resource_share) {
continue;
}
if (use_rec) {
if (project_priority(pbest) > project_priority(p)) {
continue;
}
} else {
if (pbest->pwf.overall_debt > p->pwf.overall_debt) {
continue;
}
}
}
pbest = p;
}
@ -372,7 +306,7 @@ if (use_rec) {
case FETCH_IF_MINOR_SHORTFALL:
// in this case, potentially request work for all resources
//
if (use_rec && (project_priority(pbest) < 0)) {
if (project_priority(pbest) < 0) {
set_request(pbest, true);
} else {
work_fetch.set_all_requests(pbest);
@ -423,9 +357,6 @@ void RSC_WORK_FETCH::set_request(PROJECT* p, bool allow_overworked) {
RSC_PROJECT_WORK_FETCH& w = project_state(p);
if (!w.may_have_work) return;
if (w.anon_skip) return;
if (!use_rec) {
if (!allow_overworked && w.overworked()) return;
}
if (shortfall) {
if (wacky_dcf(p)) {
// if project's DCF is too big or small,
@ -433,22 +364,14 @@ if (!use_rec) {
//
req_secs = 1;
} else {
if (use_rec) {
req_secs = shortfall;
} else {
req_secs = shortfall * w.fetchable_share;
}
req_secs = shortfall;
}
}
// the number of additional instances needed to have our share
//
double x1;
if (use_rec) {
x1 = ninstances - w.nused_total;
} else {
x1 = (ninstances * w.fetchable_share) - w.nused_total;
}
x1 = ninstances - w.nused_total;
// our share of the idle instances
//
@ -462,10 +385,10 @@ if (!use_rec) {
void RSC_WORK_FETCH::print_state(const char* name) {
msg_printf(0, MSG_INFO,
"[work_fetch] %s: shortfall %.2f nidle %.2f saturated %.2f busy %.2f RS fetchable %.2f runnable %.2f",
"[work_fetch] %s: shortfall %.2f nidle %.2f saturated %.2f busy %.2f",
name,
shortfall, nidle_now, saturated_time, busy_time_estimator.get_busy_time(),
total_fetchable_share, total_runnable_share
shortfall, nidle_now, saturated_time,
busy_time_estimator.get_busy_time()
);
for (unsigned int i=0; i<gstate.projects.size(); i++) {
PROJECT* p = gstate.projects[i];
@ -476,7 +399,6 @@ void RSC_WORK_FETCH::print_state(const char* name) {
bool no_rsc_apps = p->no_rsc_apps[rsc_type];
bool no_rsc_ams = p->no_rsc_ams[rsc_type];
double bt = pwf.backoff_time>gstate.now?pwf.backoff_time-gstate.now:0;
if (use_rec) {
msg_printf(p, MSG_INFO,
"[work_fetch] %s: fetch share %.2f rec %.5f prio %.5f backoff dt %.2f int %.2f%s%s%s%s%s%s%s%s%s",
name,
@ -491,21 +413,6 @@ if (use_rec) {
no_rsc_ams?" (blocked by account manager)":"",
no_rsc_config?" (blocked by configuration file)":""
);
} else {
msg_printf(p, MSG_INFO,
"[work_fetch] %s: fetch share %.2f LTD %.2f backoff dt %.2f int %.2f%s%s%s%s%s%s%s%s",
name,
pwf.fetchable_share, pwf.long_term_debt, bt, pwf.backoff_interval,
p->suspended_via_gui?" (susp via GUI)":"",
p->master_url_fetch_pending?" (master fetch pending)":"",
p->min_rpc_time > gstate.now?" (comm deferred)":"",
p->dont_request_more_work?" (no new tasks)":"",
pwf.overworked()?" (overworked)":"",
p->too_many_uploading_results?" (too many uploads)":"",
no_rsc_pref?" (blocked by prefs)":"",
no_rsc_config?" (blocked by configuration file)":""
);
}
}
}
@ -514,202 +421,6 @@ void RSC_WORK_FETCH::clear_request() {
req_instances = 0;
}
//#ifndef USE_REC
// update long-term debts for a resource.
//
void RSC_WORK_FETCH::update_long_term_debts() {
unsigned int i;
int neligible = 0;
double ders = 0;
PROJECT* p;
// find the total resource share of eligible projects
//
for (i=0; i<gstate.projects.size(); i++) {
p = gstate.projects[i];
RSC_PROJECT_WORK_FETCH& w = project_state(p);
if (w.debt_eligible(p, *this)) {
ders += p->resource_share;
neligible++;
}
}
if (!neligible) {
if (log_flags.debt_debug) {
msg_printf(0, MSG_INFO,
"[debt] %s: no eligible projects", rsc_name(rsc_type)
);
}
return;
}
double max_debt=0;
bool first = true;
for (i=0; i<gstate.projects.size(); i++) {
p = gstate.projects[i];
if (p->non_cpu_intensive) continue;
if (!p->resource_share) continue;
RSC_PROJECT_WORK_FETCH& w = project_state(p);
if (w.debt_eligible(p, *this)) {
double share_frac = p->resource_share/ders;
// the change to a project's debt is:
// (how much it's owed) - (how much it got)
//
double delta = share_frac*secs_this_debt_interval - w.secs_this_debt_interval;
delta /= ninstances;
w.long_term_debt += delta;
if (log_flags.debt_debug) {
msg_printf(p, MSG_INFO,
"[debt] %s LTD %.2f delta %.2f (%.2f*%.2f - %.2f)/%d",
rsc_name(rsc_type),
w.long_term_debt, delta, share_frac,
secs_this_debt_interval,
w.secs_this_debt_interval,
ninstances
);
}
if (first) {
max_debt = w.long_term_debt;
first = false;
} else {
if (w.long_term_debt > max_debt) {
max_debt = w.long_term_debt;
}
}
} else {
if (log_flags.debt_debug) {
msg_printf(p, MSG_INFO,
"[debt] %s ineligible; LTD %.2f",
rsc_name(rsc_type), w.long_term_debt
);
}
}
}
// The net change may be
// - positive if the resource wasn't fully utilized during the debt interval
// - negative it was overcommitted (e.g., CPU)
// We need to keep eligible projects from diverging from non-eligible ones;
// also, if all the debts are large negative we need to gradually
// shift them towards zero.
// To do this, we add an offset as follows:
// delta_limit is the largest rate at which any project's debt
// could increase or decrease.
// If the largest debt is close to zero (relative to delta_limit)
// than add an offset that will bring it exactly to zero.
// Otherwise add an offset of 2*delta_limit,
// which will gradually bring all the debts towards zero
//
// The policy of keeping the max debt at zero is important;
// it means that new projects will begin in parity with high-debt project,
// and won't wait for months to get work.
//
double offset;
double delta_limit = secs_this_debt_interval;
if (max_debt > -2*delta_limit) {
if (fabs(max_debt) < 1e-6) max_debt = 0;
offset = max_debt?-max_debt:0; // avoid -0
} else {
offset = 2*delta_limit;
}
if (log_flags.debt_debug) {
msg_printf(0, MSG_INFO, "[debt] %s LTD: adding offset %f",
rsc_name(rsc_type), offset
);
}
for (i=0; i<gstate.projects.size(); i++) {
p = gstate.projects[i];
if (p->non_cpu_intensive) continue;
if (!p->resource_share) continue;
RSC_PROJECT_WORK_FETCH& w = project_state(p);
if (w.debt_eligible(p, *this)) {
w.long_term_debt += offset;
} else {
if (offset > 0) {
w.long_term_debt += offset;
}
}
if (w.long_term_debt > 0) w.long_term_debt = 0;
}
}
// update short-term debts for a resource.
//
void RSC_WORK_FETCH::update_short_term_debts() {
unsigned int i;
PROJECT* p;
int nprojects=0, nrprojects=0;
double share_frac;
double total_short_term_debt = 0;
double rrs = gstate.runnable_resource_share(rsc_type);
// for projects with no runnable jobs,
// STD decays by a factor of e every day
//
double decay_factor = exp(-secs_this_debt_interval/86400);
for (i=0; i<gstate.projects.size(); i++) {
double delta;
p = gstate.projects[i];
if (p->non_cpu_intensive) continue;
if (!p->resource_share) continue;
RSC_PROJECT_WORK_FETCH& rpwf = project_state(p);
nprojects++;
if (p->runnable(rsc_type)) {
nrprojects++;
share_frac = p->resource_share/rrs;
delta = share_frac*secs_this_debt_interval
- rpwf.secs_this_debt_interval;
delta /= ninstances;
if (log_flags.std_debug) {
msg_printf(p, MSG_INFO,
"[std] %s STD delta %.2f (%.2f*%.2f - %.2f)/%d",
rsc_name(rsc_type),
delta,
share_frac,
secs_this_debt_interval,
rpwf.secs_this_debt_interval,
ninstances
);
}
rpwf.short_term_debt += delta;
} else {
rpwf.short_term_debt *= decay_factor;
}
total_short_term_debt += rpwf.short_term_debt;
}
// normalize so mean is zero, and limit abs value to MAX_STD
//
if (nrprojects) {
double avg_short_term_debt = total_short_term_debt / nprojects;
for (i=0; i<gstate.projects.size(); i++) {
p = gstate.projects[i];
if (p->non_cpu_intensive) continue;
if (!p->resource_share) continue;
if (p->runnable(rsc_type)) {
RSC_PROJECT_WORK_FETCH& rpwf = project_state(p);
rpwf.short_term_debt -= avg_short_term_debt;
if (rpwf.short_term_debt > MAX_STD) {
rpwf.short_term_debt = MAX_STD;
}
if (rpwf.short_term_debt < -MAX_STD) {
rpwf.short_term_debt = -MAX_STD;
}
if (log_flags.std_debug) {
msg_printf(p, MSG_INFO,
"[std] %s STD %.2f",
rsc_name(rsc_type), rpwf.short_term_debt
);
}
}
}
}
}
//#endif
/////////////// PROJECT_WORK_FETCH ///////////////
bool PROJECT_WORK_FETCH::compute_can_fetch_work(PROJECT* p) {
@ -786,57 +497,6 @@ void WORK_FETCH::set_all_requests(PROJECT* p) {
}
}
//#ifndef USE_REC
// Compute an "overall long-term debt" for each project.
// This is a sum of per-resource terms, scaled by the relative speed of the resource.
// The term for a resource is its LTD plus an estimate of queued work.
//
void WORK_FETCH::set_overall_debts() {
unsigned int i;
PROJECT* p;
RESULT* rp;
APP_VERSION* avp;
for (i=0; i<gstate.projects.size(); i++) {
p = gstate.projects[i];
for (int j=0; j<coprocs.n_rsc; j++) {
p->rsc_pwf[j].queue_est = 0;
}
}
for (i=0; i<gstate.results.size(); i++) {
rp = gstate.results[i];
p = rp->project;
if (!rp->nearly_runnable()) continue;
if (p->non_cpu_intensive) continue;
double dt = rp->estimated_time_remaining();
avp = rp->avp;
p->rsc_pwf[0].queue_est += dt*avp->avg_ncpus;
int rt = avp->gpu_usage.rsc_type;
if (rt) {
p->rsc_pwf[rt].queue_est += dt*avp->gpu_usage.usage;
}
}
for (i=0; i<gstate.projects.size(); i++) {
p = gstate.projects[i];
double queue_debt = p->rsc_pwf[0].queue_est/gstate.ncpus;
p->pwf.overall_debt = p->rsc_pwf[0].long_term_debt - queue_debt;
for (int j=1; j<coprocs.n_rsc; j++) {
p->pwf.overall_debt += rsc_work_fetch[j].relative_speed*
(p->rsc_pwf[j].long_term_debt - p->rsc_pwf[j].queue_est/coprocs.coprocs[j].count);
}
}
}
void WORK_FETCH::zero_debts() {
for (unsigned i=0; i<gstate.projects.size(); i++) {
PROJECT* p = gstate.projects[i];
for (int j=0; j<coprocs.n_rsc; j++) {
p->rsc_pwf[j].zero_debt();
}
}
}
//#endif
void WORK_FETCH::print_state() {
msg_printf(0, MSG_INFO, "[work_fetch] ------- start work fetch state -------");
msg_printf(0, MSG_INFO, "[work_fetch] target work buffer: %.2f + %.2f sec",
@ -848,13 +508,7 @@ void WORK_FETCH::print_state() {
for (unsigned int i=0; i<gstate.projects.size(); i++) {
PROJECT* p = gstate.projects[i];
if (p->non_cpu_intensive) continue;
if (use_rec) {
msg_printf(p, MSG_INFO, "[work_fetch] REC %f", p->pwf.rec);
} else {
msg_printf(p, MSG_INFO, "[work_fetch] overall LTD %.2f",
p->pwf.overall_debt
);
}
}
msg_printf(0, MSG_INFO, "[work_fetch] ------- end work fetch state -------");
}
@ -939,15 +593,11 @@ PROJECT* WORK_FETCH::choose_project() {
rr_simulation();
compute_shares();
if (use_rec) {
project_priority_init();
for (unsigned int i=0; i<gstate.results.size(); i++) {
RESULT* rp = gstate.results[i];
adjust_rec_work_fetch(rp);
}
} else {
set_overall_debts();
}
if (use_hyst_fetch) {
if (gpus_usable) {
@ -1033,11 +683,6 @@ void WORK_FETCH::compute_shares() {
for (i=0; i<gstate.projects.size(); i++) {
p = gstate.projects[i];
if (p->non_cpu_intensive) continue;
for (int j=0; j<coprocs.n_rsc; j++) {
if (p->rsc_pwf[j].has_runnable_jobs) {
rsc_work_fetch[j].total_runnable_share += p->resource_share;
}
}
if (!p->pwf.can_fetch_work) continue;
for (int j=0; j<coprocs.n_rsc; j++) {
if (p->rsc_pwf[j].may_have_work) {
@ -1048,11 +693,6 @@ void WORK_FETCH::compute_shares() {
for (i=0; i<gstate.projects.size(); i++) {
p = gstate.projects[i];
if (p->non_cpu_intensive) continue;
for (int j=0; j<coprocs.n_rsc; j++) {
if (p->rsc_pwf[j].has_runnable_jobs) {
p->rsc_pwf[j].runnable_share = p->resource_share/rsc_work_fetch[j].total_runnable_share;
}
}
if (!p->pwf.can_fetch_work) continue;
for (int j=0; j<coprocs.n_rsc; j++) {
if (p->rsc_pwf[j].may_have_work) {
@ -1173,12 +813,6 @@ void WORK_FETCH::init() {
);
}
if (!use_rec) {
if (config.zero_debts) {
zero_debts();
}
}
// see what resources anon platform projects can use
//
unsigned int i, j;

View File

@ -23,9 +23,6 @@
#include <vector>
extern bool use_rec;
#define USE_REC
extern bool use_hyst_fetch;
#define RSC_TYPE_ANY -1
@ -44,12 +41,6 @@ struct RSC_PROJECT_WORK_FETCH {
// the following are persistent (saved in state file)
double backoff_time;
double backoff_interval;
//#ifndef USE_REC
double long_term_debt;
double short_term_debt;
double anticipated_debt;
// short-term debt, adjusted by scheduled jobs
//#endif
// the following used by debt accounting
double secs_this_debt_interval;
@ -62,13 +53,6 @@ struct RSC_PROJECT_WORK_FETCH {
bool anon_skip;
// set if this project is anonymous platform
// and it has no app version that uses this resource
// the following are used by rr_simulation()
//
double runnable_share;
// this project's share relative to projects that have
// nearly runnable jobs for this resource;
// determines processing rate for CPU
double fetchable_share;
// this project's share relative to projects from which
// we could probably get work for this resource;
@ -84,25 +68,9 @@ struct RSC_PROJECT_WORK_FETCH {
memset(this, 0, sizeof(*this));
}
//#ifndef USE_REC
// whether this project should accumulate debt for this resource
//
bool debt_eligible(PROJECT*, RSC_WORK_FETCH&);
inline void zero_debt() {
long_term_debt = 0;
short_term_debt = 0;
}
//#endif
inline void reset() {
backoff_time = 0;
backoff_interval = 0;
//#ifndef USE_REC
long_term_debt = 0;
short_term_debt = 0;
anticipated_debt = 0;
//#endif
}
bool may_have_work;
@ -113,9 +81,6 @@ struct RSC_PROJECT_WORK_FETCH {
backoff_time = 0;
backoff_interval = 0;
}
//#ifndef USE_REC
bool overworked();
//#endif
};
// estimate the time a resources will be saturated
@ -186,8 +151,6 @@ struct RSC_WORK_FETCH {
double sim_nused;
double total_fetchable_share;
// total RS of projects from which we could fetch jobs for this device
double total_runnable_share;
// total RS of projects with runnable jobs for this device
double saturated_time;
// estimated time until resource is not saturated
// used to calculate work request
@ -224,10 +187,6 @@ struct RSC_WORK_FETCH {
PROJECT* choose_project(int);
void supplement(PROJECT*);
RSC_PROJECT_WORK_FETCH& project_state(PROJECT*);
//#ifndef USE_REC
void update_long_term_debts();
void update_short_term_debts();
//#endif
void print_state(const char*);
void clear_request();
void set_request(PROJECT*, bool allow_overworked);
@ -240,7 +199,6 @@ struct RSC_WORK_FETCH {
nidle_now = 0;
sim_nused = 0;
total_fetchable_share = 0;
total_runnable_share = 0;
saturated_time = 0;
deadline_missed_instances = 0;
}
@ -250,16 +208,12 @@ struct RSC_WORK_FETCH {
// per project state
//
struct PROJECT_WORK_FETCH {
//#ifdef USE_REC
double rec;
// recent estimated credit
double rec_time;
// when it was last updated
double rec_temp;
// temporary copy used during schedule_cpus()
//#else
double overall_debt;
//#endif
bool can_fetch_work;
bool compute_can_fetch_work(PROJECT*);
bool has_runnable_jobs;
@ -272,10 +226,6 @@ struct PROJECT_WORK_FETCH {
// global work fetch state
//
struct WORK_FETCH {
//#ifndef USE_REC
void set_overall_debts();
void zero_debts();
//#endif
PROJECT* choose_project();
// find a project to ask for work
PROJECT* non_cpu_intensive_project_needing_work();
@ -304,11 +254,9 @@ extern WORK_FETCH work_fetch;
extern void set_no_rsc_config();
//#ifdef USE_REC
void project_priority_init(bool init_rec_temp=true);
double project_priority(PROJECT*);
void adjust_rec_sched(RESULT*);
void adjust_rec_work_fetch(RESULT*);
//#endif
#endif