*** empty log message ***

svn path=/trunk/boinc/; revision=5846
This commit is contained in:
David Anderson 2005-04-13 18:38:55 +00:00
parent b78353d643
commit 534ead4118
13 changed files with 266 additions and 41 deletions

View File

@ -27123,3 +27123,15 @@ Rom 12 April 2005
BOINCTaskCtrl.cpp
lib/
gui_rpc_client.C
David 13 April 2005
client/
app_control.C
client_state.C,h
client_types.C,h
cs_apps.C
cs_scheduler.C
scheduler_op.C,h
lib/
boinc_win.h
prefs.C,h

View File

@ -673,6 +673,7 @@ int ACTIVE_TASK_SET::abort_project(PROJECT* project) {
task_iter++;
}
}
project->long_term_debt = 0;
return 0;
}

Binary file not shown.

View File

@ -147,6 +147,8 @@ private:
//
double cpu_sched_last_time;
double cpu_sched_work_done_this_period;
bool work_fetch_no_new_work;
bool cpu_crunch_nearest_first;
// --------------- client_state.C:
public:
@ -219,6 +221,7 @@ private:
int app_finished(ACTIVE_TASK&);
void assign_results_to_projects();
bool schedule_largest_debt_project(double expected_pay_off);
bool schedule_nearest_deadline_project(double expected_pay_off);
bool start_apps();
bool schedule_cpus(double);
bool handle_finished_apps(double);
@ -282,6 +285,9 @@ private:
bool scheduler_rpc_poll(double);
double ettprc(PROJECT*, int);
double avg_proc_rate(PROJECT*);
bool should_get_work();
bool no_work_for_a_cpu();
void set_cpu_scheduler_modes();
// --------------- cs_statefile.C:
public:

View File

@ -86,6 +86,7 @@ void PROJECT::init() {
anonymous_platform = false;
non_cpu_intensive = false;
debt = 0;
long_term_debt = 0;
send_file_list = false;
suspended_via_gui = false;
dont_request_more_work = false;
@ -176,6 +177,7 @@ int PROJECT::parse_state(MIOFILE& in) {
else if (match_tag(buf, "<deletion_policy_expire/>")) deletion_policy_expire = true;
#endif
else if (parse_double(buf, "<debt>", debt)) continue;
else if (parse_double(buf, "<long_term_debt>", long_term_debt)) continue;
else if (parse_double(buf, "<resource_share>", x)) continue; // not authoritative
else scope_messages.printf("PROJECT::parse_state(): unrecognized: %s\n", buf);
}
@ -221,6 +223,7 @@ int PROJECT::write_state(MIOFILE& out, bool gui_rpc) {
" <master_fetch_failures>%d</master_fetch_failures>\n"
" <min_rpc_time>%f</min_rpc_time>\n"
" <debt>%f</debt>\n"
" <long_term_debt>%f</long_term_debt>\n"
" <resource_share>%f</resource_share>\n"
"%s%s%s%s%s%s",
master_url,
@ -247,6 +250,7 @@ int PROJECT::write_state(MIOFILE& out, bool gui_rpc) {
master_fetch_failures,
min_rpc_time,
debt,
long_term_debt,
resource_share,
master_url_fetch_pending?" <master_url_fetch_pending/>\n":"",
sched_rpc_pending?" <sched_rpc_pending/>\n":"",
@ -314,6 +318,7 @@ void PROJECT::copy_state_fields(PROJECT& p) {
sched_rpc_pending = p.sched_rpc_pending;
safe_strcpy(code_sign_key, p.code_sign_key);
debt = p.debt;
long_term_debt = p.long_term_debt;
send_file_list = p.send_file_list;
non_cpu_intensive = p.non_cpu_intensive;
suspended_via_gui = p.suspended_via_gui;
@ -657,7 +662,7 @@ int FILE_INFO::write_gui(MIOFILE& out) {
" <nbytes>%f</nbytes>\n"
" <max_nbytes>%f</max_nbytes>\n"
" <status>%d</status>\n",
project->master_url,
project->master_url,
project->project_name,
name,
nbytes,

View File

@ -232,6 +232,7 @@ public:
struct RESULT *next_runnable_result; // the next result to run for this project
// the following used by work-fetch algorithm
double long_term_debt; // how much CPU time we owe this project in the long term (secs)
double work_request;
// the unit is "normalized CPU seconds",
// i.e. the work should take 1 CPU on this host

View File

@ -347,6 +347,36 @@ bool CLIENT_STATE::schedule_largest_debt_project(double expected_pay_off) {
best_project->next_runnable_result = 0;
return true;
}
// The CPU scheduler is in panic mode.
// Schedule the active task with the earliest deadline
// Return true iff a task was scheduled.
//
bool CLIENT_STATE::schedule_nearest_deadline_project(double expected_pay_off) {
PROJECT *best_project = NULL;
RESULT *best_result = NULL;
double earliest_deadline;
bool first = true;
unsigned int i;
for (i=0; i < results.size(); ++i) {
RESULT *r = results[i];
if (RESULT_FILES_DOWNLOADED != r->state) continue;
if (r->project->non_cpu_intensive) continue;
if (r->already_selected) continue;
if (first || r->report_deadline < earliest_deadline) {
first = false;
best_project = r->project;
best_result = r;
earliest_deadline = r->report_deadline;
}
}
if (!best_result) return false;
schedule_result(best_result);
best_project->anticipated_debt -= expected_pay_off;
best_project->next_runnable_result = 0;
return true;
}
// Schedule active tasks to be run and preempted.
//
@ -399,6 +429,8 @@ bool CLIENT_STATE::schedule_cpus(double now) {
results[i]->already_selected = false;
}
set_cpu_scheduler_modes();
// do work accounting for active tasks
//
for (i=0; i<active_tasks.active_tasks.size(); i++) {
@ -428,17 +460,23 @@ bool CLIENT_STATE::schedule_cpus(double now) {
// reset temporary fields
//
first = true;
double total_long_term_debt = 0;
int count_cpu_intensive = 0;
for (i=0; i<projects.size(); i++) {
p = projects[i];
if (p->non_cpu_intensive) continue;
count_cpu_intensive++;
double debt_inc =
(p->resource_share/local_total_resource_share)
* cpu_sched_work_done_this_period
- p->work_done_this_period;
p->long_term_debt += debt_inc;
total_long_term_debt += p->long_term_debt;
if (!p->next_runnable_result) {
p->debt = 0;
p->anticipated_debt = 0;
} else {
p->debt +=
(p->resource_share/local_total_resource_share)
* cpu_sched_work_done_this_period
- p->work_done_this_period;
} else {
p->debt += debt_inc;
if (first) {
first = false;
min_debt = p->debt;
@ -452,6 +490,8 @@ bool CLIENT_STATE::schedule_cpus(double now) {
);
}
double avg_long_term_debt = total_long_term_debt / count_cpu_intensive;
// Normalize debts to zero
//
for (i=0; i<projects.size(); i++) {
@ -466,6 +506,7 @@ bool CLIENT_STATE::schedule_cpus(double now) {
//msg_printf(p, MSG_INFO, "debt %f", p->debt);
p->next_runnable_result = NULL;
}
p->long_term_debt -= avg_long_term_debt;
}
// schedule tasks for projects in order of decreasing anticipated debt
@ -477,7 +518,11 @@ bool CLIENT_STATE::schedule_cpus(double now) {
expected_pay_off = cpu_sched_work_done_this_period / ncpus;
for (j=0; j<ncpus; j++) {
assign_results_to_projects();
if (!schedule_largest_debt_project(expected_pay_off)) break;
if (cpu_crunch_nearest_first) {
if (!schedule_nearest_deadline_project(expected_pay_off)) break;
} else {
if (!schedule_largest_debt_project(expected_pay_off)) break;
}
}
// schedule non CPU intensive tasks

View File

@ -33,6 +33,8 @@
#include <math.h>
#include <time.h>
#include <strings.h>
#include <map>
#include <set>
#endif
#include "crypt.h"
@ -68,13 +70,17 @@ const int SECONDS_BEFORE_REPORTING_MIN_RPC_TIME_AGAIN = 60*60;
//
#define REPORT_DEADLINE_CUSHION SECONDS_PER_DAY
// try to maintain no more than this load factor on the CPU.
//
#define MAX_CPU_LOAD_FACTOR 0.8
static int proj_min_results(PROJECT* p, int ncpus) {
return (int)(ceil(ncpus*p->resource_share/trs));
}
void PROJECT::set_min_rpc_time(double future_time) {
if (future_time > min_rpc_time) {
min_rpc_time = future_time;
}
if (future_time > min_rpc_time) {
min_rpc_time = future_time;
}
min_report_min_rpc_time = 0;
}
@ -139,7 +145,7 @@ PROJECT* CLIENT_STATE::next_project_sched_rpc_pending() {
// 4) has dont_request_more_work == false
//
PROJECT* CLIENT_STATE::next_project_need_work(PROJECT *old) {
PROJECT *p;
PROJECT *p, *p_prospect = NULL;
double now = dtime();
unsigned int i;
bool found_old = (old == 0);
@ -153,11 +159,13 @@ PROJECT* CLIENT_STATE::next_project_need_work(PROJECT *old) {
if (p->waiting_until_min_rpc_time(now)) continue;
if (p->suspended_via_gui) continue;
if (p->dont_request_more_work) continue;
if (p->long_term_debt < 0 && !no_work_for_a_cpu()) continue;
if (p_prospect && p->long_term_debt < p_prospect->long_term_debt && !p->non_cpu_intensive) continue;
if (found_old && p->work_request > 0) {
return p;
p_prospect = p;
}
}
return 0;
return p_prospect;
}
// Write a scheduler request to a disk file
@ -194,22 +202,22 @@ int CLIENT_STATE::make_scheduler_request(PROJECT* p, double work_req) {
p->authenticator,
p->hostid,
p->rpc_seqno,
p->anonymous_platform?"anonymous":platform_name,
p->anonymous_platform?"anonymous":platform_name,
core_client_major_version,
core_client_minor_version,
work_req,
p->resource_share / trs,
ettprc(p, proj_min_results(p, ncpus)-1)
);
if (p->anonymous_platform) {
fprintf(f, " <app_versions>\n");
for (i=0; i<app_versions.size(); i++) {
APP_VERSION* avp = app_versions[i];
if (avp->project != p) continue;
avp->write(mf);
}
fprintf(f, " </app_versions>\n");
}
if (p->anonymous_platform) {
fprintf(f, " <app_versions>\n");
for (i=0; i<app_versions.size(); i++) {
APP_VERSION* avp = app_versions[i];
if (avp->project != p) continue;
avp->write(mf);
}
fprintf(f, " </app_versions>\n");
}
#if 0
anything_free(free);
fprintf(f, " <project_disk_free>%f</project_disk_free>\n", free);
@ -352,7 +360,7 @@ double CLIENT_STATE::avg_proc_rate(PROJECT *p) {
double running_frac = time_stats.on_frac * time_stats.active_frac;
if (running_frac < 0.1) running_frac = 0.1;
if (running_frac > 1) running_frac = 1;
return (p->resource_share / trs) * ncpus * running_frac;
return (p ? (p->resource_share / trs) : 1) * ncpus * running_frac;
}
// "estimated time to project result count"
@ -411,6 +419,7 @@ int CLIENT_STATE::compute_work_requests() {
if (p->min_rpc_time >= now) continue;
if (p->dont_request_more_work) continue;
if (p->suspended_via_gui) continue;
if (p->long_term_debt < 0 && !no_work_for_a_cpu()) continue;
int min_results = proj_min_results(p, ncpus);
double estimated_time_to_starvation = ettprc(p, min_results-1);
@ -471,7 +480,9 @@ bool CLIENT_STATE::scheduler_rpc_poll(double now) {
switch(scheduler_op->state) {
case SCHEDULER_OP_STATE_IDLE:
if (network_suspended) break;
urgency = compute_work_requests();
if (should_get_work()) {
urgency = compute_work_requests();
}
// highest priority is to report overdue results
//
@ -486,14 +497,11 @@ bool CLIENT_STATE::scheduler_rpc_poll(double now) {
global_prefs.work_buf_min_days
);
} else if (urgency == NEED_WORK_IMMEDIATELY) {
msg_printf(NULL, MSG_INFO,
"Insufficient work; requesting more"
);
}
scheduler_op->init_get_work();
}
scheduler_op->init_get_work(false);
action = true;
} else if ((p=next_project_master_pending())) {
scheduler_op->init_get_work();
scheduler_op->init_get_work(true);
action = true;
} else if ((p=next_project_sched_rpc_pending())) {
scheduler_op->init_return_results(p);
@ -559,7 +567,7 @@ int CLIENT_STATE::handle_scheduler_reply(
// see if we have a new venue from this project
//
if (strlen(sr.host_venue) && strcmp(project->host_venue, sr.host_venue)) {
if (strlen(sr.host_venue) && strcmp(project->host_venue, sr.host_venue)) {
safe_strcpy(project->host_venue, sr.host_venue);
msg_printf(project, MSG_INFO, "New host venue: %s", sr.host_venue);
update_project_prefs = true;
@ -596,10 +604,10 @@ int CLIENT_STATE::handle_scheduler_reply(
sr.global_prefs_xml
);
fclose(f);
update_global_prefs = true;
}
update_global_prefs = true;
}
if (update_global_prefs) {
if (update_global_prefs) {
bool found_venue;
retval = global_prefs.parse_file(
GLOBAL_PREFS_FILE_NAME, project->host_venue, found_venue
@ -813,4 +821,142 @@ int CLIENT_STATE::handle_scheduler_reply(
return 0;
}
// Prevents work from being downloaded if there may be too much if more is downloaded.
bool CLIENT_STATE::should_get_work() {
// if there are fewer wus available then CPUS, then we need more work.
if (no_work_for_a_cpu()) return true;
double tot_cpu_time_remaining = 0;
for (unsigned int i = 0; i < results.size();++i) {
tot_cpu_time_remaining += results[i]->estimated_cpu_time_remaining();
}
if (tot_cpu_time_remaining < global_prefs.work_buf_min_days * SECONDS_PER_DAY) return true;
// if the CPU started this time period over loaded, let it crunch for a while to get out
// of the CPU overload status.
if (!work_fetch_no_new_work) {
set_cpu_scheduler_modes();
}
bool ret = !work_fetch_no_new_work;
return ret;
}
// CPU idle check.
bool CLIENT_STATE::no_work_for_a_cpu() {
return (unsigned int)ncpus > results.size();
}
// sets a couple of variables showing the needed state for the CPU scheduler.
void CLIENT_STATE::set_cpu_scheduler_modes() {
std::map < double, RESULT * > results_by_deadline;
std::set < PROJECT * > projects_with_work;
// cheap sorting trick. This works on every implementation of std::map that I know of, and
// it will be hard to avoid this behavour since inserts, lookups and deletes are all guaranteed lg(N)
std::vector<RESULT*>::iterator it_u;
for (it_u = results.begin() ; it_u != results.end(); ++it_u) {
if (RESULT_COMPUTE_ERROR > (*it_u)->state && !(*it_u)->project->non_cpu_intensive) {
results_by_deadline[(*it_u)->report_deadline] = *it_u;
projects_with_work.insert((*it_u)->project);
}
}
bool should_not_fetch_work = false;
bool should_crunch_nearest = false;
double now;
double frac_booked;
std::vector <double> booked_to;
now = dtime();
frac_booked = 0;
for (int i = 0; i < ncpus; ++i) booked_to.push_back(now);
std::map<double, RESULT*>::iterator it;
double up_frac = avg_proc_rate(0);
for (it = results_by_deadline.begin(); it != results_by_deadline.end() && !should_not_fetch_work; ++it) {
RESULT *r = (*it).second;
if (RESULT_COMPUTE_ERROR > ((*it).second)->state) {
double lowest_book = booked_to[0];
int lowest_booked_cpu = 0;
for(int i = 1; i < ncpus; ++i)
{
if (booked_to[i] < lowest_book) {
lowest_book = booked_to[i];
lowest_booked_cpu = i;
}
}
booked_to[lowest_booked_cpu] += ((*it).second)->estimated_cpu_time_remaining();
// Are the deadlines too tight to meet reliably?
if (booked_to[lowest_booked_cpu] - now > (r->report_deadline - now) * MAX_CPU_LOAD_FACTOR * up_frac) {
should_not_fetch_work = true;
should_crunch_nearest = true;
if (!cpu_crunch_nearest_first || !work_fetch_no_new_work) {
msg_printf(NULL, MSG_INFO,
"Work fetch policy, CPU Scheduler policy - Overbooked.");
}
}
// Is the deadline soon?
if (r->report_deadline - now < 60 * 60 * 24) {
should_crunch_nearest = true;
if (!cpu_crunch_nearest_first) {
msg_printf(NULL, MSG_INFO,
"CPU Scheduler policy - Deadline < 1 day.");
}
}
// is there a deadline < twice the users connect period? If so, we should crunch nearest so
// that it can be returned the next connection if possible.
if (r->report_deadline - now < global_prefs.work_buf_min_days * SECONDS_PER_DAY * 2) {
should_crunch_nearest = true;
if (!cpu_crunch_nearest_first) {
msg_printf(NULL, MSG_INFO,
"CPU Scheduler policy - deadline < 2 * queue size.");
}
}
// is it getting a little uncomfortable?
frac_booked += r->estimated_cpu_time_remaining() / (r->report_deadline - now);
}
}
// Is it getting a little uncomfortable?
if (frac_booked > MAX_CPU_LOAD_FACTOR * up_frac * ncpus) {
should_not_fetch_work = true;
if (!work_fetch_no_new_work) {
msg_printf(NULL, MSG_INFO,
"Work fetch policy - uncomfortable.");
}
}
// check for too many projects that have work
if (projects_with_work.size() >= (unsigned int)global_prefs.max_projects_on_client) {
should_not_fetch_work = true;
if (!work_fetch_no_new_work) {
msg_printf(NULL, MSG_INFO,
"Work fetch policy - max projects exceeded.");
}
}
if (work_fetch_no_new_work && !should_not_fetch_work) { // display only when the policy changes to avoid once per second
msg_printf(NULL, MSG_INFO,
"Work fetch policy - work fetch now allowed.");
}
if (!work_fetch_no_new_work && should_not_fetch_work) { // display only when the policy changes to avoid once per second
msg_printf(NULL, MSG_INFO,
"Work fetch policy - no work fetch allowed.");
}
if (cpu_crunch_nearest_first && !should_crunch_nearest) { // display only when the policy changes to avoid once per second
msg_printf(NULL, MSG_INFO,
"CPU scheduler policy - crunch highest debt first (normal mode).");
}
if (!cpu_crunch_nearest_first && should_crunch_nearest) { // display only when the policy changes to avoid once per second
msg_printf(NULL, MSG_INFO,
"CPU scheduler policy - crunch earliest deadline first (panic mode).");
}
work_fetch_no_new_work = should_not_fetch_work;
cpu_crunch_nearest_first = should_crunch_nearest;
}
const char *BOINC_RCSID_d35a4a7711 = "$Id$";

View File

@ -80,14 +80,14 @@ bool SCHEDULER_OP::check_master_fetch_start() {
// PRECONDITION: compute_work_requests() has been called
// to fill in PROJECT::work_request
//
int SCHEDULER_OP::init_get_work() {
int SCHEDULER_OP::init_get_work(bool master_file_only) {
int retval;
char err_msg[256];
double ns;
must_get_work = true;
project = gstate.next_project_need_work(0);
if (project) {
if (project && !master_file_only) {
ns = project->work_request;
msg_printf(project, MSG_INFO, "Requesting %.2f seconds of work", ns);
retval = init_op_project(ns);
@ -614,14 +614,14 @@ int SCHEDULER_REPLY::parse(FILE* in, PROJECT* project) {
project->write_statistics_file();
return 0;
return 0;
}
else if (parse_str(buf, "<project_name>", project->project_name, sizeof(project->project_name))) continue;
else if (parse_str(buf, "<user_name>", project->user_name, sizeof(project->user_name))) continue;
else if (parse_str(buf, "<user_name>", project->user_name, sizeof(project->user_name))) continue;
else if (parse_double(buf, "<user_total_credit>", project->user_total_credit)) continue;
else if (parse_double(buf, "<user_expavg_credit>", project->user_expavg_credit)) continue;
else if (parse_double(buf, "<user_create_time>", project->user_create_time)) continue;
else if (parse_str(buf, "<team_name>", project->team_name, sizeof(project->team_name))) continue;
else if (parse_str(buf, "<team_name>", project->team_name, sizeof(project->team_name))) continue;
else if (parse_int(buf, "<hostid>", hostid)) continue;
else if (parse_double(buf, "<host_total_credit>", project->host_total_credit)) continue;
else if (parse_double(buf, "<host_expavg_credit>", project->host_expavg_credit)) continue;

View File

@ -73,7 +73,7 @@ struct SCHEDULER_OP {
SCHEDULER_OP(HTTP_OP_SET*);
bool poll();
int init_get_work();
int init_get_work(bool master_file_only);
int init_return_results(PROJECT*);
int init_op_project(double ns);
int init_master_fetch();

View File

@ -115,6 +115,8 @@
#include <vector>
#include <deque>
#include <list>
#include <map>
#include <set>
#endif

View File

@ -51,6 +51,7 @@ void GLOBAL_PREFS::defaults() {
dont_verify_images = false;
work_buf_min_days = 0.1;
max_cpus = 1;
max_projects_on_client = 5;
cpu_scheduling_period_minutes = 60;
disk_interval = 60;
disk_max_used_gb = 1;
@ -175,6 +176,9 @@ int GLOBAL_PREFS::parse(FILE* in, const char* host_venue, bool& found_venue) {
} else if (parse_int(buf, "<max_cpus>", max_cpus)) {
if (max_cpus < 1) max_cpus = 1;
continue;
} else if (parse_int(buf, "<max_projects_on_client>", max_projects_on_client)) {
if (max_projects_on_client < 1) max_projects_on_client = 1;
continue;
} else if (parse_double(buf, "<disk_interval>", disk_interval)) {
if (disk_interval<0) disk_interval = 0;
continue;
@ -235,6 +239,7 @@ int GLOBAL_PREFS::write(FILE* f) {
"%s%s%s%s%s%s"
" <work_buf_min_days>%f</work_buf_min_days>\n"
" <max_cpus>%d</max_cpus>\n"
" <max_projects_on_client>%d</max_projects_on_client>\n"
" <cpu_scheduling_period_minutes>%f</cpu_scheduling_period_minutes>\n"
" <disk_interval>%f</disk_interval>\n"
" <disk_max_used_gb>%f</disk_max_used_gb>\n"
@ -258,6 +263,7 @@ int GLOBAL_PREFS::write(FILE* f) {
dont_verify_images?" <dont_verify_images/>\n":"",
work_buf_min_days,
max_cpus,
max_projects_on_client,
cpu_scheduling_period_minutes,
disk_interval,
disk_max_used_gb,

View File

@ -46,6 +46,7 @@ struct GLOBAL_PREFS {
bool dont_verify_images;
double work_buf_min_days;
int max_cpus;
int max_projects_on_client;
double cpu_scheduling_period_minutes;
double disk_interval;
double disk_max_used_gb;