*** empty log message ***

svn path=/trunk/boinc/; revision=6081
This commit is contained in:
David Anderson 2005-05-09 04:49:56 +00:00
parent e7b6066a77
commit 9cd02523b2
8 changed files with 120 additions and 37 deletions

View File

@ -6339,3 +6339,11 @@ Janus 8 May 2005
payment_success.php (new) payment_success.php (new)
payment_process.php (new) payment_process.php (new)
David 8 May 2005
- work fetch and CPU scheduling fixes (from John McLeod)
client/
client_state.C,h
client_types.C,h
cs_scheduler.C
scheduler_op.C,h

View File

@ -1319,7 +1319,7 @@ double CLIENT_STATE::total_resource_share() {
double x = 0; double x = 0;
for (i=0; i<projects.size(); i++) { for (i=0; i<projects.size(); i++) {
x += projects[i]->resource_share; if (!projects[i]->non_cpu_intensive ) x += projects[i]->resource_share;
} }
return x; return x;
} }

View File

@ -47,8 +47,9 @@
#define USER_RUN_REQUEST_NEVER 3 #define USER_RUN_REQUEST_NEVER 3
#define WORK_FETCH_DONT_NEED 0 #define WORK_FETCH_DONT_NEED 0
#define WORK_FETCH_NEED 1 #define WORK_FETCH_OK 1
#define WORK_FETCH_NEED_IMMEDIATELY 2 #define WORK_FETCH_NEED 2
#define WORK_FETCH_NEED_IMMEDIATELY 3
enum SUSPEND_REASON { enum SUSPEND_REASON {
SUSPEND_REASON_BATTERIES = 1, SUSPEND_REASON_BATTERIES = 1,
@ -273,7 +274,7 @@ private:
public: public:
double work_needed_secs(); double work_needed_secs();
PROJECT* next_project_master_pending(); PROJECT* next_project_master_pending();
PROJECT* next_project_need_work(PROJECT*); PROJECT* next_project_need_work(PROJECT* old, int urgency);
int make_scheduler_request(PROJECT*, double); int make_scheduler_request(PROJECT*, double);
int handle_scheduler_reply(PROJECT*, char* scheduler_url, int& nresults); int handle_scheduler_reply(PROJECT*, char* scheduler_url, int& nresults);
int compute_work_requests(); int compute_work_requests();
@ -287,9 +288,9 @@ private:
bool scheduler_rpc_poll(double); bool scheduler_rpc_poll(double);
double ettprc(PROJECT*, int); double ettprc(PROJECT*, int);
double avg_proc_rate(PROJECT*); double avg_proc_rate(PROJECT*);
bool should_get_work(); bool should_get_work();
bool no_work_for_a_cpu(); bool no_work_for_a_cpu();
void set_cpu_scheduler_modes(); void set_cpu_scheduler_modes();
// --------------- cs_statefile.C: // --------------- cs_statefile.C:
public: public:

View File

@ -1129,6 +1129,8 @@ void RESULT::clear() {
app = NULL; app = NULL;
wup = NULL; wup = NULL;
project = NULL; project = NULL;
high_priority = false;
return_result_immediately = false;
} }
// parse a <result> element from scheduling server. // parse a <result> element from scheduling server.
@ -1148,6 +1150,13 @@ int RESULT::parse_server(MIOFILE& in) {
validate_time(report_deadline); validate_time(report_deadline);
continue; continue;
} }
if (match_tag(buf, "<high_priority>")) {
high_priority = true;
return_result_immediately = true;
}
if (match_tag(buf, "<return_result_immediately>")) {
return_result_immediately = true;
}
if (match_tag(buf, "<file_ref>")) { if (match_tag(buf, "<file_ref>")) {
file_ref.parse(in); file_ref.parse(in);
output_files.push_back(file_ref); output_files.push_back(file_ref);
@ -1257,6 +1266,8 @@ int RESULT::write(MIOFILE& out, bool to_server) {
if (ready_to_report) out.printf(" <ready_to_report/>\n"); if (ready_to_report) out.printf(" <ready_to_report/>\n");
if (suspended_via_gui) out.printf(" <suspended_via_gui/>\n"); if (suspended_via_gui) out.printf(" <suspended_via_gui/>\n");
if (aborted_via_gui) out.printf(" <aborted_via_gui/>\n"); if (aborted_via_gui) out.printf(" <aborted_via_gui/>\n");
if (high_priority) out.printf(" <high_priority/>\n");
if (return_result_immediately) out.printf(" <return_result_immediately/>\n");
out.printf( out.printf(
" <wu_name>%s</wu_name>\n" " <wu_name>%s</wu_name>\n"
" <report_deadline>%f</report_deadline>\n", " <report_deadline>%f</report_deadline>\n",
@ -1296,6 +1307,8 @@ int RESULT::write_gui(MIOFILE& out) {
if (ready_to_report) out.printf(" <ready_to_report/>\n"); if (ready_to_report) out.printf(" <ready_to_report/>\n");
if (suspended_via_gui) out.printf(" <suspended_via_gui/>\n"); if (suspended_via_gui) out.printf(" <suspended_via_gui/>\n");
if (aborted_via_gui) out.printf(" <aborted_via_gui/>\n"); if (aborted_via_gui) out.printf(" <aborted_via_gui/>\n");
if (high_priority) out.printf(" <high_priority/>\n");
if (return_result_immediately) out.printf(" <return_result_immediately/>\n");
ACTIVE_TASK* atp = gstate.active_tasks.lookup_result(this); ACTIVE_TASK* atp = gstate.active_tasks.lookup_result(this);
if (atp) { if (atp) {
atp->write(out); atp->write(out);

View File

@ -231,7 +231,7 @@ public:
double work_done_this_period; // how much CPU time has been devoted to this double work_done_this_period; // how much CPU time has been devoted to this
// project in the current period (secs) // project in the current period (secs)
struct RESULT *next_runnable_result; // the next result to run for this project struct RESULT *next_runnable_result; // the next result to run for this project
// the following used by work-fetch algorithm // the following used by work-fetch algorithm
double long_term_debt; double long_term_debt;
// how much CPU time we owe this project in the long term (secs) // how much CPU time we owe this project in the long term (secs)
@ -365,6 +365,13 @@ struct RESULT {
bool already_selected; bool already_selected;
// used to keep cpu scheduler from scheduling a result twice // used to keep cpu scheduler from scheduling a result twice
// transient; used only within schedule_cpus() // transient; used only within schedule_cpus()
bool high_priority; // this will override the normal CPU handler so that this WU will run to completion
// before any other normal work is run. It will not override the panic mode scheduler.
// this can be set by the server supplying the WU. This was strongly requested by
// Folding@Home.
bool return_result_immediately; // this is what it says. It can be set by the project supplying the WU.
// It will be set to true by the parsing code if high_priority is set, even
// if it is not set by the calling server.
void clear(); void clear();
int parse_server(MIOFILE&); int parse_server(MIOFILE&);
int parse_state(MIOFILE&); int parse_state(MIOFILE&);

View File

@ -75,7 +75,7 @@ const int SECONDS_BEFORE_REPORTING_MIN_RPC_TIME_AGAIN = 60*60;
#define MAX_CPU_LOAD_FACTOR 0.8 #define MAX_CPU_LOAD_FACTOR 0.8
static int proj_min_results(PROJECT* p, int ncpus) { static int proj_min_results(PROJECT* p, int ncpus) {
return (int)(ceil(ncpus*p->resource_share/trs)); return p->non_cpu_intensive ? 1 : (int)(ceil(ncpus*p->resource_share/trs));
} }
void PROJECT::set_min_rpc_time(double future_time) { void PROJECT::set_min_rpc_time(double future_time) {
if (future_time > min_rpc_time) { if (future_time > min_rpc_time) {
@ -144,7 +144,7 @@ PROJECT* CLIENT_STATE::next_project_sched_rpc_pending() {
// 3) has master_url_fetch_pending == false // 3) has master_url_fetch_pending == false
// 4) has dont_request_more_work == false // 4) has dont_request_more_work == false
// //
PROJECT* CLIENT_STATE::next_project_need_work(PROJECT* old) { PROJECT* CLIENT_STATE::next_project_need_work(PROJECT* old, int urgency) {
PROJECT *p, *p_prospect = NULL; PROJECT *p, *p_prospect = NULL;
double work_on_prospect; double work_on_prospect;
double now = dtime(); double now = dtime();
@ -163,7 +163,18 @@ PROJECT* CLIENT_STATE::next_project_need_work(PROJECT* old) {
if (p->suspended_via_gui) continue; if (p->suspended_via_gui) continue;
if (p->dont_request_more_work) continue; if (p->dont_request_more_work) continue;
if (p->long_term_debt < 0 && !cpu_idle) continue; if (p->long_term_debt < 0 && !cpu_idle) continue;
if (p->non_cpu_intensive) continue; if (p->work_request_urgency == WORK_FETCH_DONT_NEED) continue;
// if we don't really need work, and we don't really need work from this project, pass.
if (urgency <= WORK_FETCH_OK && p->work_request_urgency <= WORK_FETCH_OK) continue;
// if there is a project for which a work request is OK and one that has a higher priority, take the one
// with the higher priority.
if (p_prospect && p->work_request_urgency == WORK_FETCH_OK &&
p_prospect->work_request_urgency > p->work_request_urgency) {
continue;
}
double work_on_current = ettprc(p, 0); double work_on_current = ettprc(p, 0);
if (p_prospect if (p_prospect
@ -172,17 +183,13 @@ PROJECT* CLIENT_STATE::next_project_need_work(PROJECT* old) {
) { ) {
continue; continue;
} }
if (p->work_request_urgency == WORK_FETCH_DONT_NEED
&& (!cpu_idle || p->non_cpu_intensive)
) {
continue;
}
if (found_old && p->work_request > 0) { if (p->work_request > 0) {
p_prospect = p; p_prospect = p;
work_on_prospect = work_on_current; work_on_prospect = work_on_current;
} }
} }
if (p_prospect && !(p_prospect->work_request > 0.0)) p_prospect->work_request = 1.0;
return p_prospect; return p_prospect;
} }
@ -405,7 +412,12 @@ double CLIENT_STATE::ettprc(PROJECT *p, int k) {
--num_results_to_skip; --num_results_to_skip;
continue; continue;
} }
est += rp->estimated_cpu_time_remaining(); if (rp->project->non_cpu_intensive) {
// if it is a non_cpu intensive project, it needs only one at a time.
est = max(rp->estimated_cpu_time_remaining(), global_prefs.work_buf_min_days * SECONDS_PER_DAY);
} else {
est += rp->estimated_cpu_time_remaining();
}
} }
double apr = avg_proc_rate(p); double apr = avg_proc_rate(p);
return est/apr; return est/apr;
@ -418,10 +430,36 @@ double CLIENT_STATE::ettprc(PROJECT *p, int k) {
// //
int CLIENT_STATE::compute_work_requests() { int CLIENT_STATE::compute_work_requests() {
int urgency = WORK_FETCH_DONT_NEED; int urgency = WORK_FETCH_DONT_NEED;
int highest_project_urgency = WORK_FETCH_DONT_NEED;
unsigned int i; unsigned int i;
double work_min_period = global_prefs.work_buf_min_days * SECONDS_PER_DAY; double work_min_period = global_prefs.work_buf_min_days * SECONDS_PER_DAY;
double now = dtime(); double now = dtime();
double global_work_need = work_needed_secs();
for (i = 0; i < projects.size(); ++i) {
projects[i]->work_request_urgency = WORK_FETCH_DONT_NEED;
projects[i]->work_request = 0;
}
if (!should_get_work()) {
return WORK_FETCH_DONT_NEED;
} else if (no_work_for_a_cpu()) {
urgency = WORK_FETCH_NEED_IMMEDIATELY;
} else if (global_work_need > 0) {
urgency = WORK_FETCH_NEED;
} else {
urgency = WORK_FETCH_OK;
}
double max_fetch = work_min_period;
// it is possible to have a work fetch policy of no new work and also have
// a CPU idle or not enough to fill the cache. In this case, we get work, but in little tiny increments
// as we are already in trouble and we need to minimize the damage.
if (this->work_fetch_no_new_work) {
max_fetch = 1.0;
}
trs = total_resource_share(); trs = total_resource_share();
// for each project, compute // for each project, compute
@ -435,10 +473,11 @@ int CLIENT_STATE::compute_work_requests() {
p->work_request = 0; p->work_request = 0;
p->work_request_urgency = WORK_FETCH_DONT_NEED; p->work_request_urgency = WORK_FETCH_DONT_NEED;
if (p->master_url_fetch_pending) continue;
if (p->min_rpc_time >= now) continue; if (p->min_rpc_time >= now) continue;
if (p->dont_request_more_work) continue; if (p->dont_request_more_work) continue;
if (p->suspended_via_gui) continue; if (p->suspended_via_gui) continue;
if (p->long_term_debt < 0 && !no_work_for_a_cpu()) continue; if ((p->long_term_debt < 0) && (urgency != WORK_FETCH_NEED_IMMEDIATELY)) continue;
int min_results = proj_min_results(p, ncpus); int min_results = proj_min_results(p, ncpus);
double estimated_time_to_starvation = ettprc(p, min_results-1); double estimated_time_to_starvation = ettprc(p, min_results-1);
@ -450,7 +489,6 @@ int CLIENT_STATE::compute_work_requests() {
#if DEBUG_SCHED #if DEBUG_SCHED
msg_printf(p, MSG_INFO, "is starved"); msg_printf(p, MSG_INFO, "is starved");
#endif #endif
urgency = WORK_FETCH_NEED_IMMEDIATELY;
p->work_request_urgency = WORK_FETCH_NEED_IMMEDIATELY; p->work_request_urgency = WORK_FETCH_NEED_IMMEDIATELY;
} else { } else {
#if DEBUG_SCHED #if DEBUG_SCHED
@ -458,11 +496,15 @@ int CLIENT_STATE::compute_work_requests() {
estimated_time_to_starvation estimated_time_to_starvation
); );
#endif #endif
urgency = max(WORK_FETCH_NEED, urgency); p->work_request_urgency = WORK_FETCH_NEED;
urgency = WORK_FETCH_NEED;
} }
} else if (WORK_FETCH_OK < urgency) {
p->work_request_urgency = WORK_FETCH_OK;
p->work_request = global_work_need;
} }
highest_project_urgency = max(highest_project_urgency, p->work_request_urgency);
// determine work requests for each project // determine work requests for each project
// NOTE: don't need to divide by active_frac etc.; // NOTE: don't need to divide by active_frac etc.;
// the scheduler does that (see sched/sched_send.C) // the scheduler does that (see sched/sched_send.C)
@ -477,12 +519,6 @@ int CLIENT_STATE::compute_work_requests() {
#endif #endif
} }
if (urgency == WORK_FETCH_DONT_NEED) {
for (i=0; i<projects.size(); ++i) {
projects[i]->work_request = 0;
}
}
return urgency; return urgency;
} }
@ -522,10 +558,10 @@ bool CLIENT_STATE::scheduler_rpc_poll(double now) {
"Insufficient work; requesting more" "Insufficient work; requesting more"
); );
} }
scheduler_op->init_get_work(false); scheduler_op->init_get_work(false, urgency);
action = true; action = true;
} else if ((p=next_project_master_pending())) { } else if ((p=next_project_master_pending())) {
scheduler_op->init_get_work(true); scheduler_op->init_get_work(true, urgency);
action = true; action = true;
} else if ((p=next_project_sched_rpc_pending())) { } else if ((p=next_project_sched_rpc_pending())) {
scheduler_op->init_return_results(p); scheduler_op->init_return_results(p);
@ -1012,4 +1048,18 @@ void CLIENT_STATE::set_cpu_scheduler_modes() {
cpu_earliest_deadline_first = use_earliest_deadline_first; cpu_earliest_deadline_first = use_earliest_deadline_first;
} }
double CLIENT_STATE::work_needed_secs()
{
double total_work = 0;
for( unsigned int i = 0; i < results.size(); ++i) {
if (results[i]->project->non_cpu_intensive) continue;
total_work += results[i]->estimated_cpu_time_remaining();
}
if (total_work > global_prefs.work_buf_min_days) {
return 0;
} else {
return global_prefs.work_buf_min_days - total_work;
}
}
const char *BOINC_RCSID_d35a4a7711 = "$Id$"; const char *BOINC_RCSID_d35a4a7711 = "$Id$";

View File

@ -80,13 +80,13 @@ bool SCHEDULER_OP::check_master_fetch_start() {
// PRECONDITION: compute_work_requests() has been called // PRECONDITION: compute_work_requests() has been called
// to fill in PROJECT::work_request // to fill in PROJECT::work_request
// //
int SCHEDULER_OP::init_get_work(bool master_file_only) { int SCHEDULER_OP::init_get_work(bool master_file_only, int urgency) {
int retval; int retval;
char err_msg[256]; char err_msg[256];
double ns; double ns;
must_get_work = true; must_get_work = true;
project = gstate.next_project_need_work(0); project = gstate.next_project_need_work(0, urgency);
if (project && !master_file_only) { if (project && !master_file_only) {
ns = project->work_request; ns = project->work_request;
msg_printf(project, MSG_INFO, "Requesting %.2f seconds of work", ns); msg_printf(project, MSG_INFO, "Requesting %.2f seconds of work", ns);
@ -209,6 +209,7 @@ void SCHEDULER_OP::backoff(PROJECT* p, const char *error_msg ) {
p->nrpc_failures++; p->nrpc_failures++;
} }
set_min_rpc_time(p); set_min_rpc_time(p);
p->long_term_debt -= (p->min_rpc_time - dtime()) / gstate.global_prefs.max_projects_on_client;
} }
// low-level routine to initiate an RPC // low-level routine to initiate an RPC
@ -448,10 +449,13 @@ bool SCHEDULER_OP::poll() {
backoff(project, "No schedulers responded"); backoff(project, "No schedulers responded");
if (must_get_work) { if (must_get_work) {
int urgency = gstate.compute_work_requests(); int urgency = gstate.compute_work_requests();
project = gstate.next_project_need_work(project); if (urgency != WORK_FETCH_DONT_NEED) {
if (project && urgency != WORK_FETCH_DONT_NEED) { project = gstate.next_project_need_work(project, urgency);
retval = init_op_project(project->work_request); if (project) {
} else { retval = init_op_project(project->work_request);
} else {
scheduler_op_done = true;
}
scheduler_op_done = true; scheduler_op_done = true;
} }
} else { } else {
@ -512,7 +516,7 @@ bool SCHEDULER_OP::poll() {
if (must_get_work) { if (must_get_work) {
int urgency = gstate.compute_work_requests(); int urgency = gstate.compute_work_requests();
if (urgency != WORK_FETCH_DONT_NEED) { if (urgency != WORK_FETCH_DONT_NEED) {
project = gstate.next_project_need_work(project); project = gstate.next_project_need_work(project, urgency);
if (project) { if (project) {
retval = init_op_project(project->work_request); retval = init_op_project(project->work_request);
} else { } else {

View File

@ -73,7 +73,7 @@ struct SCHEDULER_OP {
SCHEDULER_OP(HTTP_OP_SET*); SCHEDULER_OP(HTTP_OP_SET*);
bool poll(); bool poll();
int init_get_work(bool master_file_only); int init_get_work(bool master_file_only, int urgency);
int init_return_results(PROJECT*); int init_return_results(PROJECT*);
int init_op_project(double ns); int init_op_project(double ns);
int init_master_fetch(); int init_master_fetch();