*** empty log message ***

svn path=/trunk/boinc/; revision=634
This commit is contained in:
Hamid Aghdaee 2002-11-18 23:15:32 +00:00
parent 98ebe8b117
commit 6693e1ceed
5 changed files with 61 additions and 25 deletions

View File

@ -50,6 +50,7 @@ PROJECT::PROJECT() {
code_sign_key = NULL;
nrpc_failures = 0;
min_rpc_time = 0;
master_fetch_failures = 0;
resource_debt = 0;
debt_order = 0;
master_url_fetch_pending = 0;

View File

@ -76,7 +76,7 @@ public:
// contact all scheduling servers
int min_rpc_time; // earliest time to contact any server
// of this project (or zero)
int master_fetch_failures;
// the following items are transient; not saved in state file
double resource_debt; // How much CPU time we owe this project
// (arbitrary scale)

View File

@ -83,7 +83,7 @@ void CLIENT_STATE::update_avg_cpu(PROJECT* p) {
}
// find a project that needs its master file parsed
//
//
PROJECT* CLIENT_STATE::next_project_master_pending() {
unsigned int i;
PROJECT* p;
@ -101,7 +101,8 @@ PROJECT* CLIENT_STATE::next_project_master_pending() {
// return the next project after "old", in debt order,
// that is eligible for a scheduler RPC
//
// It excludes projects that have (p->master_url_fetch_pending) set to true.
// Such projects will be returned by next_project_master_pending routine.
PROJECT* CLIENT_STATE::next_project(PROJECT* old) {
PROJECT* p, *pbest;
int best = 999;

View File

@ -106,8 +106,21 @@ int SCHEDULER_OP::set_min_rpc_time(PROJECT* p) {
int n = p->nrpc_failures;
if (n > RETRY_CAP) n = RETRY_CAP;
for (i=0; i<n; i++) x *= 2;
p->min_rpc_time = time(0) + x;
// we've hit the limit on master_url fetches
if(project->master_fetch_failures >= MASTER_FETCH_RETRY_CAP)
{
if (log_flags.sched_op_debug) {
printf("we've hit the limit on master_url fetches\n");
}
p->min_rpc_time = time(0) + MASTER_FETCH_INTERVAL;
x = MASTER_FETCH_INTERVAL;
}
else
{
for (i=0; i<n; i++) x *= 2;
p->min_rpc_time = time(0) + x;
}
if (log_flags.sched_op_debug) {
printf(
"setting min RPC time for %s to %d seconds from now\n",
@ -119,15 +132,30 @@ int SCHEDULER_OP::set_min_rpc_time(PROJECT* p) {
// Back off on the scheduler and output an error msg if needed
//
int SCHEDULER_OP::backoff( PROJECT* p, char *error_msg ) {
p->nrpc_failures++;
set_min_rpc_time(p);
if (log_flags.sched_op_debug) {
printf(error_msg);
}
return 0;
if (log_flags.sched_op_debug) {
printf(error_msg);
}
if(project->master_fetch_failures >= MASTER_FETCH_RETRY_CAP)
{
project->master_url_fetch_pending = true;
set_min_rpc_time(p);
return 0;
}
// if nrpc failures a multiple of master_fetch_period, then set master_url_fetch_pending and initialize again
if (project->nrpc_failures == MASTER_FETCH_PERIOD) {
project->master_url_fetch_pending = true;
project->min_rpc_time = 0;
project->nrpc_failures = 0;
project->master_fetch_failures++;
}
p->nrpc_failures++;
set_min_rpc_time(p);
return 0;
}
// low-level routine to initiate an RPC
@ -198,6 +226,11 @@ int SCHEDULER_OP::parse_master_file(vector<STRING256> &urls) {
if (log_flags.sched_op_debug) {
printf("Parsed master file; got %d scheduler URLs\n", (int)urls.size());
}
//if couldn't find any urls in the master file.
if((int) urls.size() == 0)
return -1;
return 0;
}
@ -259,16 +292,17 @@ bool SCHEDULER_OP::poll() {
if (changed) {
project->min_rpc_time = 0;
project->nrpc_failures = 0;
project->master_fetch_failures = 0;
}
} else {
// master file parse failed. treat like RPC error
//
backoff(project, "Master file parse failed\n");
backoff(project, "Master file parse failed\n");
}
} else {
// fetch of master file failed. Treat like RPC error
//
backoff(project, "Master file fetch failed\n");
backoff(project, "Master file fetch failed\n");
}
project = gstate.next_project_master_pending();
if (project) {
@ -300,11 +334,7 @@ bool SCHEDULER_OP::poll() {
start_rpc();
} else {
backoff(project,"");
if ((project->nrpc_failures % MASTER_FETCH_PERIOD) == 0) {
project->master_url_fetch_pending = true;
project->min_rpc_time = 0;
project->nrpc_failures = 0;
}
if (must_get_work) {
project = gstate.next_project(project);
if (project) {
@ -331,6 +361,7 @@ bool SCHEDULER_OP::poll() {
);
}
project->nrpc_failures = 0;
project->min_rpc_time = 0;
gstate.handle_scheduler_reply(project, scheduler_url);
if (must_get_work) {
double x = gstate.work_needed_secs();
@ -356,8 +387,8 @@ bool SCHEDULER_OP::poll() {
}
}
if (scheduler_op_done) {
project = gstate.next_project_master_pending();
if (project) {
project = gstate.next_project_master_pending();
if (project) {
init_master_fetch(project);
} else {
state = SCHEDULER_OP_STATE_IDLE;

View File

@ -36,12 +36,15 @@
// constants related to scheduler RPC policy
#define MASTER_FETCH_PERIOD 10
#define MASTER_FETCH_PERIOD 3
// fetch and parse master URL if nrpc_failures is a multiple of this
#define RETRY_BASE_PERIOD 100
#define RETRY_BASE_PERIOD 1
// after failure, back off 2^nrpc_failures times this times random
#define RETRY_CAP 10
#define RETRY_CAP 3
// cap on nrpc_failures in the above formula
#define MASTER_FETCH_RETRY_CAP 3
//cap on how many times we will contact master_url before moving into a state in which we will not exponentially backoff anymore but rather contact the master URL at the frequency below
#define MASTER_FETCH_INTERVAL 5
#define SCHEDULER_OP_STATE_IDLE 0
#define SCHEDULER_OP_STATE_GET_MASTER 1