mirror of https://github.com/BOINC/boinc.git
*** empty log message ***
svn path=/trunk/boinc/; revision=634
This commit is contained in:
parent
98ebe8b117
commit
6693e1ceed
|
@ -50,6 +50,7 @@ PROJECT::PROJECT() {
|
|||
code_sign_key = NULL;
|
||||
nrpc_failures = 0;
|
||||
min_rpc_time = 0;
|
||||
master_fetch_failures = 0;
|
||||
resource_debt = 0;
|
||||
debt_order = 0;
|
||||
master_url_fetch_pending = 0;
|
||||
|
|
|
@ -76,7 +76,7 @@ public:
|
|||
// contact all scheduling servers
|
||||
int min_rpc_time; // earliest time to contact any server
|
||||
// of this project (or zero)
|
||||
|
||||
int master_fetch_failures;
|
||||
// the following items are transient; not saved in state file
|
||||
double resource_debt; // How much CPU time we owe this project
|
||||
// (arbitrary scale)
|
||||
|
|
|
@ -83,7 +83,7 @@ void CLIENT_STATE::update_avg_cpu(PROJECT* p) {
|
|||
}
|
||||
|
||||
// find a project that needs its master file parsed
|
||||
//
|
||||
//
|
||||
PROJECT* CLIENT_STATE::next_project_master_pending() {
|
||||
unsigned int i;
|
||||
PROJECT* p;
|
||||
|
@ -101,7 +101,8 @@ PROJECT* CLIENT_STATE::next_project_master_pending() {
|
|||
|
||||
// return the next project after "old", in debt order,
|
||||
// that is eligible for a scheduler RPC
|
||||
//
|
||||
// It excludes projects that have (p->master_url_fetch_pending) set to true.
|
||||
// Such projects will be returned by next_project_master_pending routine.
|
||||
PROJECT* CLIENT_STATE::next_project(PROJECT* old) {
|
||||
PROJECT* p, *pbest;
|
||||
int best = 999;
|
||||
|
|
|
@ -106,8 +106,21 @@ int SCHEDULER_OP::set_min_rpc_time(PROJECT* p) {
|
|||
|
||||
int n = p->nrpc_failures;
|
||||
if (n > RETRY_CAP) n = RETRY_CAP;
|
||||
for (i=0; i<n; i++) x *= 2;
|
||||
p->min_rpc_time = time(0) + x;
|
||||
|
||||
// we've hit the limit on master_url fetches
|
||||
if(project->master_fetch_failures >= MASTER_FETCH_RETRY_CAP)
|
||||
{
|
||||
if (log_flags.sched_op_debug) {
|
||||
printf("we've hit the limit on master_url fetches\n");
|
||||
}
|
||||
p->min_rpc_time = time(0) + MASTER_FETCH_INTERVAL;
|
||||
x = MASTER_FETCH_INTERVAL;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i=0; i<n; i++) x *= 2;
|
||||
p->min_rpc_time = time(0) + x;
|
||||
}
|
||||
if (log_flags.sched_op_debug) {
|
||||
printf(
|
||||
"setting min RPC time for %s to %d seconds from now\n",
|
||||
|
@ -119,15 +132,30 @@ int SCHEDULER_OP::set_min_rpc_time(PROJECT* p) {
|
|||
|
||||
// Back off on the scheduler and output an error msg if needed
|
||||
//
|
||||
|
||||
int SCHEDULER_OP::backoff( PROJECT* p, char *error_msg ) {
|
||||
p->nrpc_failures++;
|
||||
set_min_rpc_time(p);
|
||||
|
||||
if (log_flags.sched_op_debug) {
|
||||
printf(error_msg);
|
||||
}
|
||||
|
||||
return 0;
|
||||
if (log_flags.sched_op_debug) {
|
||||
printf(error_msg);
|
||||
}
|
||||
|
||||
if(project->master_fetch_failures >= MASTER_FETCH_RETRY_CAP)
|
||||
{
|
||||
project->master_url_fetch_pending = true;
|
||||
set_min_rpc_time(p);
|
||||
return 0;
|
||||
}
|
||||
// if nrpc failures a multiple of master_fetch_period, then set master_url_fetch_pending and initialize again
|
||||
if (project->nrpc_failures == MASTER_FETCH_PERIOD) {
|
||||
project->master_url_fetch_pending = true;
|
||||
project->min_rpc_time = 0;
|
||||
project->nrpc_failures = 0;
|
||||
project->master_fetch_failures++;
|
||||
}
|
||||
|
||||
p->nrpc_failures++;
|
||||
set_min_rpc_time(p);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// low-level routine to initiate an RPC
|
||||
|
@ -198,6 +226,11 @@ int SCHEDULER_OP::parse_master_file(vector<STRING256> &urls) {
|
|||
if (log_flags.sched_op_debug) {
|
||||
printf("Parsed master file; got %d scheduler URLs\n", (int)urls.size());
|
||||
}
|
||||
|
||||
//if couldn't find any urls in the master file.
|
||||
if((int) urls.size() == 0)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -259,16 +292,17 @@ bool SCHEDULER_OP::poll() {
|
|||
if (changed) {
|
||||
project->min_rpc_time = 0;
|
||||
project->nrpc_failures = 0;
|
||||
project->master_fetch_failures = 0;
|
||||
}
|
||||
} else {
|
||||
// master file parse failed. treat like RPC error
|
||||
//
|
||||
backoff(project, "Master file parse failed\n");
|
||||
backoff(project, "Master file parse failed\n");
|
||||
}
|
||||
} else {
|
||||
// fetch of master file failed. Treat like RPC error
|
||||
//
|
||||
backoff(project, "Master file fetch failed\n");
|
||||
backoff(project, "Master file fetch failed\n");
|
||||
}
|
||||
project = gstate.next_project_master_pending();
|
||||
if (project) {
|
||||
|
@ -300,11 +334,7 @@ bool SCHEDULER_OP::poll() {
|
|||
start_rpc();
|
||||
} else {
|
||||
backoff(project,"");
|
||||
if ((project->nrpc_failures % MASTER_FETCH_PERIOD) == 0) {
|
||||
project->master_url_fetch_pending = true;
|
||||
project->min_rpc_time = 0;
|
||||
project->nrpc_failures = 0;
|
||||
}
|
||||
|
||||
if (must_get_work) {
|
||||
project = gstate.next_project(project);
|
||||
if (project) {
|
||||
|
@ -331,6 +361,7 @@ bool SCHEDULER_OP::poll() {
|
|||
);
|
||||
}
|
||||
project->nrpc_failures = 0;
|
||||
project->min_rpc_time = 0;
|
||||
gstate.handle_scheduler_reply(project, scheduler_url);
|
||||
if (must_get_work) {
|
||||
double x = gstate.work_needed_secs();
|
||||
|
@ -356,8 +387,8 @@ bool SCHEDULER_OP::poll() {
|
|||
}
|
||||
}
|
||||
if (scheduler_op_done) {
|
||||
project = gstate.next_project_master_pending();
|
||||
if (project) {
|
||||
project = gstate.next_project_master_pending();
|
||||
if (project) {
|
||||
init_master_fetch(project);
|
||||
} else {
|
||||
state = SCHEDULER_OP_STATE_IDLE;
|
||||
|
|
|
@ -36,12 +36,15 @@
|
|||
|
||||
// constants related to scheduler RPC policy
|
||||
|
||||
#define MASTER_FETCH_PERIOD 10
|
||||
#define MASTER_FETCH_PERIOD 3
|
||||
// fetch and parse master URL if nrpc_failures is a multiple of this
|
||||
#define RETRY_BASE_PERIOD 100
|
||||
#define RETRY_BASE_PERIOD 1
|
||||
// after failure, back off 2^nrpc_failures times this times random
|
||||
#define RETRY_CAP 10
|
||||
#define RETRY_CAP 3
|
||||
// cap on nrpc_failures in the above formula
|
||||
#define MASTER_FETCH_RETRY_CAP 3
|
||||
//cap on how many times we will contact master_url before moving into a state in which we will not exponentially backoff anymore but rather contact the master URL at the frequency below
|
||||
#define MASTER_FETCH_INTERVAL 5
|
||||
|
||||
#define SCHEDULER_OP_STATE_IDLE 0
|
||||
#define SCHEDULER_OP_STATE_GET_MASTER 1
|
||||
|
|
Loading…
Reference in New Issue