startup sequence

svn path=/trunk/boinc/; revision=11000
This commit is contained in:
David Anderson 2006-08-24 20:33:46 +00:00
parent 3fb5cc6ca2
commit d079ddd589
13 changed files with 116 additions and 53 deletions

View File

@ -9294,3 +9294,36 @@ Kevin 24 Aug 2006
sg_SkinClass.cpp
David 24 Aug 2006
- Core client: change the sequence of events for first-time use,
with the goal of not running CPU benchmarks until user
has attached to project and started to run first result.
Sequence is:
1) manager requests get_project_config, polls until done
2) manager calls lookup_account or create_account, polls until done
3) manager calls project_attach.
core client sets CPU specs to default values (1 GFLOPS),
sets project work_request to 1 second,
does scheduler RPC
4) Manager poll finishes
5) core client starts CPU benchmarks
The above is enforced by:
- only run benchmarks if there's a nontentative project
- if RPC reason is INIT, set work_request to 1
- after successful RPC, set work_request to zero
- Core client: PROJECT::sched_rpc_pending is now an int
that encodes the reason we need to do a sched RPC.
The actual reason is now shown, not always "Requested by user".
client/
acct_mgr.C
client_state.C,h
client_types.C,h
cs_account.C
cs_benchmark.C
cs_cmdline.C
cs_scheduler.C
gui_rpc_server_ops.C
scheduler_op.C,h

View File

@ -354,7 +354,7 @@ void ACCT_MGR_OP::handle_reply(int http_op_retval) {
// initiate a scheduler RPC if requested by AMS
//
if (acct.update) {
pp->sched_rpc_pending = true;
pp->sched_rpc_pending = REASON_ACCT_MGR_REQ;
pp->min_rpc_time = 0;
}
}

View File

@ -415,7 +415,7 @@ bool CLIENT_STATE::poll_slow_events() {
now = dtime();
if (should_run_cpu_benchmarks() && !are_cpu_benchmarks_running() && projects.size() > 0) {
if (should_run_cpu_benchmarks() && !are_cpu_benchmarks_running() && have_nontentative_project()) {
run_cpu_benchmarks = false;
start_cpu_benchmarks();
}
@ -1410,4 +1410,13 @@ double calculate_exponential_backoff( int n, double MIN, double MAX) {
return rand_range(MIN, rmax);
}
bool CLIENT_STATE::have_nontentative_project() {
unsigned int i;
for (i=0; i<projects.size(); i++) {
PROJECT* p = projects[i];
if (!p->tentative) return true;
}
return false;
}
const char *BOINC_RCSID_e836980ee1 = "$Id$";

View File

@ -230,6 +230,7 @@ public:
int detach_project(PROJECT*);
int report_result_error(RESULT&, const char *format, ...);
int reset_project(PROJECT*);
bool have_nontentative_project();
bool no_gui_rpc;
private:
int link_app(PROJECT*, APP*);
@ -335,6 +336,8 @@ public:
bool cpu_benchmarks_poll();
void abort_cpu_benchmarks();
bool are_cpu_benchmarks_running();
bool cpu_benchmarks_done();
void cpu_benchmarks_set_defaults();
// --------------- cs_cmdline.C:
public:

View File

@ -75,7 +75,7 @@ void PROJECT::init() {
master_fetch_failures = 0;
min_rpc_time = 0;
master_url_fetch_pending = false;
sched_rpc_pending = false;
sched_rpc_pending = 0;
next_rpc_time = 0;
trickle_up_pending = false;
tentative = false;
@ -159,7 +159,7 @@ int PROJECT::parse_state(MIOFILE& in) {
continue;
}
else if (match_tag(buf, "<master_url_fetch_pending/>")) master_url_fetch_pending = true;
else if (match_tag(buf, "<sched_rpc_pending/>")) sched_rpc_pending = true;
else if (parse_int(buf, "<sched_rpc_pending>", sched_rpc_pending)) continue;
else if (parse_double(buf, "<next_rpc_time>", next_rpc_time)) continue;
else if (match_tag(buf, "<trickle_up_pending/>")) trickle_up_pending = true;
else if (match_tag(buf, "<send_file_list/>")) send_file_list = true;
@ -220,7 +220,8 @@ int PROJECT::write_state(MIOFILE& out, bool gui_rpc) {
" <long_term_debt>%f</long_term_debt>\n"
" <resource_share>%f</resource_share>\n"
" <duration_correction_factor>%f</duration_correction_factor>\n"
"%s%s%s%s%s%s%s%s%s%s",
" <sched_rpc_pending>%d</sched_rpc_pending>\n"
"%s%s%s%s%s%s%s%s%s",
master_url,
project_name,
symstore,
@ -245,8 +246,8 @@ int PROJECT::write_state(MIOFILE& out, bool gui_rpc) {
long_term_debt,
resource_share,
duration_correction_factor,
sched_rpc_pending,
master_url_fetch_pending?" <master_url_fetch_pending/>\n":"",
sched_rpc_pending?" <sched_rpc_pending/>\n":"",
trickle_up_pending?" <trickle_up_pending/>\n":"",
send_file_list?" <send_file_list/>\n":"",
non_cpu_intensive?" <non_cpu_intensive/>\n":"",

View File

@ -205,15 +205,16 @@ public:
// returns true if min_rpc_time > now
bool master_url_fetch_pending;
// need to fetch and parse the master URL
bool sched_rpc_pending;
int sched_rpc_pending;
// we need to do a scheduler RPC, for various possible reasons:
// user request, propagate host CPID, time-based, etc
// user request, propagate host CPID, time-based, etc.
// Reasons are enumerated in scheduler_op.h
double next_rpc_time;
// if nonzero, specifies a time when another scheduler RPC
// should be done (as requested by server)
bool trickle_up_pending; // have trickle up to send
bool tentative; // master URL and account ID not confirmed
bool tentative; // we haven't done a scheduler RPC to this project yet
// (still need to verify that its name isn't a dup)
bool anonymous_platform; // app_versions.xml file found in project dir;
// use those apps rather then getting from server
bool non_cpu_intensive;

View File

@ -239,7 +239,7 @@ int CLIENT_STATE::parse_account_files() {
// true until we read client_state.xml
//
project->master_url_fetch_pending = true;
project->sched_rpc_pending = true;
project->sched_rpc_pending = REASON_INIT;
retval = project->parse_account(f);
fclose(f);
if (retval) {
@ -326,13 +326,13 @@ int CLIENT_STATE::parse_statistics_files() {
fclose(f);
if (retval) {
msg_printf(NULL, MSG_ERROR,
"Couldn't parse statistics file %s", name.c_str()
"Couldn't parse %s", name.c_str()
);
} else {
project=lookup_project(temp->master_url);
if (project==NULL) {
msg_printf(NULL, MSG_ERROR,
"Project for statistics file %s not found - ignoring",
"Project for %s not found - ignoring",
name.c_str()
);
} else {
@ -454,7 +454,7 @@ int CLIENT_STATE::add_project(
retval = make_project_dir(*project);
if (retval) return retval;
projects.push_back(project);
project->sched_rpc_pending = true;
project->sched_rpc_pending = REASON_INIT;
set_client_state_dirty("Add project");
return 0;
}

View File

@ -178,10 +178,7 @@ void CLIENT_STATE::start_cpu_benchmarks() {
"CLIENT_STATE::cpu_benchmarks(): Skipping CPU benchmarks"
);
}
host_info.p_fpops = DEFAULT_FPOPS;
host_info.p_iops = DEFAULT_IOPS;
host_info.p_membw = DEFAULT_MEMBW;
host_info.m_cache = DEFAULT_CACHE;
cpu_benchmarks_set_defaults();
return;
}
@ -351,10 +348,7 @@ bool CLIENT_STATE::cpu_benchmarks_poll() {
"CPU benchmarks timed out, using default values"
);
abort_cpu_benchmarks();
host_info.p_fpops = DEFAULT_FPOPS;
host_info.p_iops = DEFAULT_IOPS;
host_info.p_membw = DEFAULT_MEMBW;
host_info.m_cache = DEFAULT_CACHE;
cpu_benchmarks_set_defaults();
benchmarks_running = false;
set_client_state_dirty("CPU benchmarks");
}
@ -373,10 +367,7 @@ bool CLIENT_STATE::cpu_benchmarks_poll() {
double old_p_fpops = host_info.p_fpops;
if (had_error) {
msg_printf(NULL, MSG_ERROR, "CPU benchmarks error");
host_info.p_fpops = DEFAULT_FPOPS;
host_info.p_iops = DEFAULT_IOPS;
host_info.p_membw = DEFAULT_MEMBW;
host_info.m_cache = DEFAULT_CACHE;
cpu_benchmarks_set_defaults();
} else {
host_info.p_fpops = 0;
host_info.p_iops = 0;
@ -423,6 +414,17 @@ bool CLIENT_STATE::cpu_benchmarks_poll() {
return false;
}
bool CLIENT_STATE::cpu_benchmarks_done() {
return (host_info.p_calculated != 0);
}
void CLIENT_STATE::cpu_benchmarks_set_defaults() {
host_info.p_fpops = DEFAULT_FPOPS;
host_info.p_iops = DEFAULT_IOPS;
host_info.p_membw = DEFAULT_MEMBW;
host_info.m_cache = DEFAULT_CACHE;
}
// return true if any CPU benchmark thread/process is running
//
bool CLIENT_STATE::are_cpu_benchmarks_running() {

View File

@ -273,7 +273,7 @@ void CLIENT_STATE::do_cmdline_actions() {
if (strlen(update_prefs_url)) {
PROJECT* project = lookup_project(update_prefs_url);
if (project) {
project->sched_rpc_pending = true;
project->sched_rpc_pending = REASON_USER_REQ;
} else {
msg_printf(NULL, MSG_ERROR, "project %s not found\n", update_prefs_url);
}

View File

@ -132,7 +132,7 @@ PROJECT* CLIENT_STATE::next_project_sched_rpc_pending() {
p = projects[i];
if (p->waiting_until_min_rpc_time()) continue;
if (p->next_rpc_time && p->next_rpc_time<now) {
p->sched_rpc_pending = true;
p->sched_rpc_pending = REASON_PROJECT_REQ;
p->next_rpc_time = 0;
}
//if (p->suspended_via_gui) continue;
@ -738,6 +738,8 @@ bool CLIENT_STATE::scheduler_rpc_poll() {
bool action=false;
static double last_time=0;
// check only every 5 sec, unless there's a tentative (new) project
//
if (!have_tentative_project && gstate.now - last_time < 5.0) return false;
last_time = gstate.now;
@ -748,11 +750,9 @@ bool CLIENT_STATE::scheduler_rpc_poll() {
break;
}
// contact project requested by user
//
p = next_project_sched_rpc_pending();
if (p) {
scheduler_op->init_op_project(p, REASON_USER_REQ);
scheduler_op->init_op_project(p, p->sched_rpc_pending);
action = true;
break;
}
@ -1166,12 +1166,12 @@ int CLIENT_STATE::handle_scheduler_reply(
if (sr.send_file_list) {
project->send_file_list = true;
}
project->sched_rpc_pending = false;
project->sched_rpc_pending = 0;
project->trickle_up_pending = false;
// handle delay request
//
if (sr.request_delay && !project->tentative) {
if (sr.request_delay) {
double x = gstate.now + sr.request_delay;
if (x > project->min_rpc_time) project->min_rpc_time = x;
} else {
@ -1245,7 +1245,7 @@ void CLIENT_STATE::generate_new_host_cpid() {
host_info.generate_host_cpid();
for (unsigned int i=0; i<projects.size(); i++) {
if (projects[i]->attached_via_acct_mgr) {
projects[i]->sched_rpc_pending = true;
projects[i]->sched_rpc_pending = REASON_ACCT_MGR_REQ;
projects[i]->min_rpc_time = now + 15;
}
}

View File

@ -229,7 +229,7 @@ static void handle_project_op(char* buf, MIOFILE& fout, const char* op) {
gstate.request_schedule_cpus("project detached by user");
gstate.request_work_fetch("project detached by user");
} else if (!strcmp(op, "update")) {
p->sched_rpc_pending = true;
p->sched_rpc_pending = REASON_USER_REQ;
p->min_rpc_time = 0;
gstate.request_work_fetch("project updated by user");
gstate.set_client_state_dirty("project updated by user");

View File

@ -99,7 +99,7 @@ int SCHEDULER_OP::init_get_work() {
// If there are multiple schedulers, start with a random one.
// User messages and backoff() is done at this level.
//
int SCHEDULER_OP::init_op_project(PROJECT* p, SCHEDULER_OP_REASON r) {
int SCHEDULER_OP::init_op_project(PROJECT* p, int r) {
int retval;
char err_msg[256];
@ -125,6 +125,13 @@ int SCHEDULER_OP::init_op_project(PROJECT* p, SCHEDULER_OP_REASON r) {
return retval;
}
if (reason == REASON_INIT) {
p->work_request = 1;
if (!gstate.cpu_benchmarks_done()) {
gstate.cpu_benchmarks_set_defaults();
}
}
url_index = 0;
retval = gstate.make_scheduler_request(p);
if (!retval) {
@ -213,6 +220,9 @@ int SCHEDULER_OP::start_rpc(PROJECT* p) {
case REASON_NEED_WORK: why = "To fetch work"; break;
case REASON_RESULTS_DUE: why = "To report completed tasks"; break;
case REASON_TRICKLE_UP: why = "To send trickle-up message"; break;
case REASON_ACCT_MGR_REQ: why = "Requested by account manager"; break;
case REASON_INIT: why = "Project initialization"; break;
case REASON_PROJECT_REQ: why = "Requested by project"; break;
default: why = "Unknown";
}
msg_printf(p, MSG_INFO, "Sending scheduler request: %s", why);
@ -407,7 +417,7 @@ bool SCHEDULER_OP::poll() {
} else {
// parse succeeded
//
msg_printf(cur_proj, MSG_INFO, "Scheduler list download succeeded");
msg_printf(cur_proj, MSG_INFO, "Master file download succeeded");
cur_proj->master_fetch_failures = 0;
changed = update_urls(cur_proj, urls);
@ -471,7 +481,7 @@ bool SCHEDULER_OP::poll() {
// if project suspended, don't retry failed RPC
//
if (cur_proj->suspended_via_gui) {
cur_proj->sched_rpc_pending = false;
cur_proj->sched_rpc_pending = 0;
}
}
} else {
@ -485,15 +495,16 @@ bool SCHEDULER_OP::poll() {
retval = cur_proj->write_account_file();
if (retval) {
cur_proj->attach_failed(ERR_ATTACH_FAIL_FILE_WRITE);
} else {
gstate.project_attach.error_num = 0;
msg_printf(cur_proj, MSG_INFO,
"Successfully attached to %s",
cur_proj->get_project_name()
);
}
} else {
gstate.project_attach.error_num = 0;
msg_printf(cur_proj, MSG_INFO,
"Successfully attached to %s",
cur_proj->get_project_name()
);
}
}
} else {
cur_proj->work_request = 0; // don't ask again right away
switch (retval) {
case 0:
// if we asked for work and didn't get any,

View File

@ -38,12 +38,15 @@
// reasons for making a scheduler RPC:
//
typedef enum {
REASON_USER_REQ,
REASON_RESULTS_DUE,
REASON_NEED_WORK,
REASON_TRICKLE_UP
} SCHEDULER_OP_REASON ;
#define REASON_USER_REQ 1
#define REASON_RESULTS_DUE 2
#define REASON_NEED_WORK 3
#define REASON_TRICKLE_UP 4
#define REASON_ACCT_MGR_REQ 5
#define REASON_INIT 6
#define REASON_PROJECT_REQ 7
// defaults related to scheduler RPC policy
// See client_state.h for definitions
@ -74,14 +77,14 @@ private:
public:
PROJECT* cur_proj; // project we're currently contacting
int state;
SCHEDULER_OP_REASON reason;
int reason;
double url_random; // used to randomize order
public:
SCHEDULER_OP(HTTP_OP_SET*);
bool poll();
int init_get_work();
int init_op_project(PROJECT*, SCHEDULER_OP_REASON);
int init_op_project(PROJECT*, int);
int init_master_fetch(PROJECT*);
bool check_master_fetch_start();
void backoff(PROJECT* p, const char *error_msg);