diff --git a/client/scheduler_op.C b/client/scheduler_op.C index a9819defa2..c89473d80a 100644 --- a/client/scheduler_op.C +++ b/client/scheduler_op.C @@ -313,13 +313,15 @@ bool SCHEDULER_OP::poll() { } else { // master file parse failed. treat like RPC error // + project->master_fetch_failures++; backoff(project, "Master file parse failed\n"); get_master_success = false; - err_url = project->master_url; + err_url = project->master_url; } } else { // fetch of master file failed. Treat like RPC error // + project->master_fetch_failures++; backoff(project, "Master file fetch failed\n"); get_master_success = false; err_url = project->master_url; @@ -327,6 +329,7 @@ bool SCHEDULER_OP::poll() { project = gstate.next_project_master_pending(); if (project) { if ((retval = init_master_fetch(project))) { + project->master_fetch_failures++; backoff(project, "Master file fetch failed\n"); get_master_success = false; err_url = project->master_url; diff --git a/client/scheduler_op.h b/client/scheduler_op.h index b06193c005..c932595bfc 100644 --- a/client/scheduler_op.h +++ b/client/scheduler_op.h @@ -44,12 +44,12 @@ // cap on nrpc_failures in the above formula #define MASTER_FETCH_RETRY_CAP 3 //cap on how many times we will contact master_url before moving into a state in which we will not exponentially backoff anymore but rather contact the master URL at the frequency below -#define MASTER_FETCH_INTERVAL 5 +#define MASTER_FETCH_INTERVAL (60*60*24*7*2) // 2 weeks //This is the Max on the time to wait after we've contacted the Master URL MASTER_FETCH_RETRY_CAP times. //The next two constants are used to bound RPC exponential waiting. -#define PERS_RETRY_DELAY_MIN 1 -#define PERS_RETRY_DELAY_MAX 30 +#define PERS_RETRY_DELAY_MIN 60 // 1 minute +#define PERS_RETRY_DELAY_MAX (60*60*4) // 4 hours #define SCHEDULER_OP_STATE_IDLE 0 #define SCHEDULER_OP_STATE_GET_MASTER 1 diff --git a/sched/handle_request.C b/sched/handle_request.C index db792dfd32..40ec87a065 100644 --- a/sched/handle_request.C +++ b/sched/handle_request.C @@ -337,13 +337,17 @@ int handle_results( result.received_time = time(0); result.client_state = rp->client_state; result.cpu_time = rp->cpu_time; + result.claimed_credit = result.cpu_time * host.credit_per_cpu_sec; + result.validate_state = VALIDATE_STATE_NEED_CHECK; if (result.client_state != CLIENT_DONE) { + + result.validate_state = VALIDATE_STATE_INVALID; //so we won't try to validate this result anymore result.server_state = RESULT_SERVER_STATE_ERROR; } else { result.server_state = RESULT_SERVER_STATE_DONE; } - result.claimed_credit = result.cpu_time * host.credit_per_cpu_sec; - result.validate_state = VALIDATE_STATE_NEED_CHECK; + + strncpy(result.stderr_out, rp->stderr_out, sizeof(result.stderr_out)); strncpy(result.xml_doc_out, rp->xml_doc_out, sizeof(result.xml_doc_out)); retval = db_result_update(result); diff --git a/test/test.inc b/test/test.inc index 42bcd0d56f..111a8da591 100644 --- a/test/test.inc +++ b/test/test.inc @@ -400,6 +400,10 @@ class Project { PassThru("cd $this->project_dir/cgi; ./feeder -asynch > feeder_out"); } + function result_retry($app){ + PassThru("cd $this->project_dir/cgi; ./result_retry -app $app->name -nerror 10 -ndet 10 -nredundancy 10 > result_retry_out"); + } + function start_result_retry($app){ PassThru("cd $this->project_dir/cgi; ./result_retry -app $app->name -nerror 10 -ndet 10 -nredundancy 10 -asynch > result_retry_out"); } @@ -421,13 +425,21 @@ class Project { function validate($app, $quorum) { PassThru("cd $this->project_dir/cgi; ./validate_test -one_pass -app $app->name -quorum $quorum > validate_out"); } - + + function start_file_delete(){ + PassThru("cd $this->project_dir/cgi; ./file_deleter -asynch > file_deleter_out"); + } + // do one pass of file_deleter // function file_delete() { PassThru("cd $this->project_dir/cgi; ./file_deleter -one_pass > file_deleter_out"); } - + + function start_assimilate($app) { + PassThru("cd $this->project_dir/cgi; ./assimilator -asynch -app $app->name > assimilator_out"); + } + // do one pass of assimilator // function assimilate($app) { diff --git a/test/test_upload_backoff.php b/test/test_upload_backoff.php index 43b0e26688..226af64317 100644 --- a/test/test_upload_backoff.php +++ b/test/test_upload_backoff.php @@ -39,7 +39,7 @@ $project->start_feeder(); //delete the download_dir immediately - $project->remove_file_upload_handler() + $project->remove_file_upload_handler(); $pid = $host->run_asynch("-exit_when_idle"); //reinstall download_dir after 100 seconds