*** empty log message ***

svn path=/trunk/boinc/; revision=10393
This commit is contained in:
David Anderson 2006-06-19 04:29:26 +00:00
parent 239013bb1d
commit 1fa2e3de0a
10 changed files with 61 additions and 152 deletions

View File

@ -6276,3 +6276,17 @@ Matt 17 June 2006
html/inc
forum.inc
email.inc
David 18 June 2006
- Don't parse ACTIVE_TASK::scheduler_state from the state file.
This isn't carried across runs of BOINC.
- Remove ACTIVE_TASK_SET::restart_tasks() and CLIENT_STATE::restart_tasks():
just let the CPU scheduler take care of starting tasks.
- If file upload fails because file is missing, don't retry,
and show appropriate error message
client/
app.C,h
app_start.C
client_state.C,h
cs_apps.C

View File

@ -177,10 +177,9 @@ int ACTIVE_TASK::init(RESULT* rp) {
max_disk_usage = rp->wup->rsc_disk_bound;
max_mem_usage = rp->wup->rsc_memory_bound;
non_cpu_intensive = rp->project->non_cpu_intensive;
strcpy(process_control_queue.name, rp->name);
strcpy(graphics_request_queue.name, rp->name);
get_slot_dir(slot, slot_dir);
return 0;
}
@ -385,7 +384,6 @@ int ACTIVE_TASK::parse(MIOFILE& fin) {
strcpy(result_name, "");
strcpy(project_master_url, "");
scheduler_state = CPU_SCHED_SCHEDULED;
while (fin.fgets(buf, 256)) {
if (match_tag(buf, "</active_task>")) {
@ -452,7 +450,6 @@ int ACTIVE_TASK::parse(MIOFILE& fin) {
else if (parse_str(buf, "<project_master_url>", project_master_url, sizeof(project_master_url))) continue;
else if (parse_int(buf, "<app_version_num>", app_version_num)) continue;
else if (parse_int(buf, "<slot>", slot)) continue;
else if (parse_int(buf, "<scheduler_state>", scheduler_state)) continue;
else if (parse_double(buf, "<checkpoint_cpu_time>", checkpoint_cpu_time)) continue;
else if (parse_double(buf, "<fraction_done>", fraction_done)) continue;
else if (parse_double(buf, "<current_cpu_time>", current_cpu_time)) continue;
@ -627,4 +624,12 @@ void ACTIVE_TASK_SET::upload_notify_app(FILE_INFO* fip) {
}
}
void ACTIVE_TASK_SET::init() {
for (unsigned int i=0; i<active_tasks.size(); i++) {
ACTIVE_TASK* atp = active_tasks[i];
atp->init(atp->result);
atp->scheduler_state = CPU_SCHED_PREEMPTED;
}
}
const char *BOINC_RCSID_778b61195e = "$Id$";

View File

@ -216,11 +216,11 @@ public:
int remove(ACTIVE_TASK*);
ACTIVE_TASK* lookup_pid(int);
ACTIVE_TASK* lookup_result(RESULT*);
void init();
bool poll();
void suspend_all(bool leave_apps_in_memory=true);
void unsuspend_all();
bool is_task_executing();
int restart_tasks(int max_tasks);
void request_tasks_exit(PROJECT* p=0);
int wait_for_exit(double, PROJECT* p=0);
int exit_tasks(PROJECT* p=0);

View File

@ -672,77 +672,6 @@ int ACTIVE_TASK::resume_or_start() {
return 0;
}
// Restart active tasks without wiping and reinitializing slot directories
// Called at init, with max_tasks = ncpus
//
int ACTIVE_TASK_SET::restart_tasks(int max_tasks) {
vector<ACTIVE_TASK*>::iterator iter;
ACTIVE_TASK* atp;
RESULT* result;
int retval, num_tasks_started;
SCOPE_MSG_LOG scope_messages(log_messages, CLIENT_MSG_LOG::DEBUG_TASK);
num_tasks_started = 0;
iter = active_tasks.begin();
while (iter != active_tasks.end()) {
atp = *iter;
result = atp->result;
atp->init(atp->result);
get_slot_dir(atp->slot, atp->slot_dir);
if (!gstate.input_files_available(result)) {
msg_printf(atp->wup->project, MSG_ERROR,
"Can't restart %s: missing files", atp->result->name
);
gstate.report_result_error(
*(atp->result),
"One or more missing files"
);
iter = active_tasks.erase(iter);
delete atp;
continue;
}
if (atp->scheduler_state != CPU_SCHED_SCHEDULED
|| num_tasks_started >= max_tasks
) {
msg_printf(atp->wup->project, MSG_INFO,
"Deferring task %s",
atp->result->name
);
atp->scheduler_state = CPU_SCHED_PREEMPTED;
iter++;
continue;
}
msg_printf(atp->wup->project, MSG_INFO,
"Resuming task %s using %s version %d",
atp->result->name,
atp->app_version->app->name,
atp->app_version->version_num
);
retval = atp->start(false);
if (retval) {
msg_printf(atp->wup->project, MSG_ERROR,
"Restart of task %s failed: %s\n",
atp->result->name, boincerror(retval)
);
gstate.report_result_error(
*(atp->result),
"Couldn't restart app: %d", retval
);
iter = active_tasks.erase(iter);
delete atp;
} else {
++num_tasks_started;
iter++;
}
}
return 0;
}
#if (defined(__APPLE__) && defined(__i386__))
union headeru {

View File

@ -260,8 +260,7 @@ int CLIENT_STATE::init() {
// set period start time and reschedule
//
must_schedule_cpus = true;
must_enforce_cpu_schedule = true;
request_schedule_cpus("Startup");
cpu_sched_last_time = now;
cpu_sched_last_check = now;
@ -271,6 +270,7 @@ int CLIENT_STATE::init() {
retval = make_project_dirs();
if (retval) return retval;
active_tasks.init();
active_tasks.report_overdue();
active_tasks.handle_upload_files();
@ -410,22 +410,6 @@ bool CLIENT_STATE::poll_slow_events() {
check_suspend_activities(suspend_reason);
#ifdef NEW_CPU_SCHED
cpu_scheduler.make_schedule();
#else
// Restart tasks on startup.
// Do this here (rather than CLIENT_STATE::init())
// so that if we do benchmark on startup,
// we don't immediately suspend apps
// (this fixes a CPDN problem where quitting the app
// right after start kills it)
//
if (!suspend_reason && !tasks_restarted) {
restart_tasks();
tasks_restarted = true;
}
#endif
// suspend or resume activities (but only if already did startup)
//
if (tasks_restarted) {
@ -507,6 +491,7 @@ bool CLIENT_STATE::poll_slow_events() {
if (!tasks_suspended) {
POLL_ACTION(possibly_schedule_cpus, possibly_schedule_cpus );
POLL_ACTION(enforce_schedule , enforce_schedule );
tasks_restarted = true;
}
#endif
if (!network_suspended) {

View File

@ -314,7 +314,6 @@ public:
// - an app fails to start (CS::schedule_cpus())
// - any project op is done via RPC (suspend/resume)
// - any result op is done via RPC (suspend/resume)
int restart_tasks();
int quit_activities();
void set_ncpus();
double estimate_cpu_time(WORKUNIT&);

View File

@ -360,33 +360,6 @@ bool CLIENT_STATE::possibly_schedule_cpus() {
cpu_sched_last_check = now;
}
#if 0 // THE FOLLOWING SHOULD NOT BE NECESSARY
// if the count of running tasks is not either ncpus
// or the count of runnable results a re-schedule is mandatory.
//
if (!must_schedule_cpus) {
int count_running_tasks = 0;
for (i=0; i<active_tasks.active_tasks.size(); i++) {
if (!active_tasks.active_tasks[i] || !active_tasks.active_tasks[i]->result) continue;
if (CPU_SCHED_SCHEDULED != active_tasks.active_tasks[i]->scheduler_state) continue;
if (active_tasks.active_tasks[i]->result->project->non_cpu_intensive) continue;
count_running_tasks++;
}
if (count_running_tasks != ncpus){
int count_runnable_results = 0;
for (i=0; i<results.size(); i++) {
if (!results[i]->runnable()) continue;
if (results[i]->project->non_cpu_intensive) continue;
count_runnable_results++;
}
if (count_running_tasks != count_runnable_results) {
must_schedule_cpus = true;
}
}
}
#endif
if (!must_schedule_cpus) return false;
must_schedule_cpus = false;
schedule_cpus();

View File

@ -234,9 +234,8 @@ int CLIENT_STATE::schedule_result(RESULT* rp) {
ACTIVE_TASK *atp = lookup_active_task_by_result(rp);
if (!atp) {
atp = new ACTIVE_TASK;
atp->init(rp);
atp->slot = active_tasks.get_free_slot();
get_slot_dir(atp->slot, atp->slot_dir);
atp->init(rp);
active_tasks.active_tasks.push_back(atp);
}
atp->next_scheduler_state = CPU_SCHED_SCHEDULED;
@ -295,14 +294,6 @@ double CLIENT_STATE::nearly_runnable_resource_share() {
return x;
}
// This is called when the client is initialized.
// Try to restart any tasks that were running when we last shut down.
//
int CLIENT_STATE::restart_tasks() {
return active_tasks.restart_tasks(ncpus);
}
void CLIENT_STATE::set_ncpus() {
if (ncpus == 0) ncpus = host_info.p_ncpus;

View File

@ -227,7 +227,15 @@ bool PERS_FILE_XFER::poll() {
is_upload?"upload":"download", fip->name
);
}
check_giveup("server rejected file");
try_next_url("server rejected file");
} else if (fxp->file_xfer_retval == ERR_NOT_FOUND) {
if (log_flags.file_xfer) {
msg_printf(
fip->project, MSG_INFO, "Permanently failed %s of %s",
is_upload?"upload":"download", fip->name
);
}
try_next_url("File not found on client");
} else {
if (log_flags.file_xfer) {
msg_printf(
@ -239,7 +247,7 @@ bool PERS_FILE_XFER::poll() {
handle_xfer_failure();
}
// fxp could have already been freed and zeroed by check_giveup
// fxp could have already been freed and zeroed by try_next_url
// so check before trying to remove
//
if (fxp) {
@ -252,6 +260,23 @@ bool PERS_FILE_XFER::poll() {
return false;
}
void PERS_FILE_XFER::xfer_failed(const char* why) {
gstate.file_xfers->remove(fxp);
delete fxp;
fxp = NULL;
if (is_upload) {
fip->status = ERR_GIVEUP_UPLOAD;
} else {
fip->status = ERR_GIVEUP_DOWNLOAD;
}
pers_xfer_done = true;
msg_printf(
fip->project, MSG_ERROR, "Giving up on %s of %s: %s",
is_upload?"upload":"download", fip->name, why
);
fip->error_msg = why;
}
// A file transfer (to a particular server)
// has had a failure
// TODO ?? transient ? permanent? terminology??
@ -266,22 +291,9 @@ bool PERS_FILE_XFER::poll() {
// If there are more URLs to try, the file_xfer is restarted with these new
// urls until a good transfer is made or it completely gives up.
//
void PERS_FILE_XFER::check_giveup(const char* why) {
void PERS_FILE_XFER::try_next_url(const char* why) {
if (fip->get_next_url(fip->upload_when_present) == NULL) {
gstate.file_xfers->remove(fxp);
delete fxp;
fxp = NULL;
if (is_upload) {
fip->status = ERR_GIVEUP_UPLOAD;
} else {
fip->status = ERR_GIVEUP_DOWNLOAD;
}
pers_xfer_done = true;
msg_printf(
fip->project, MSG_ERROR, "Giving up on %s of %s: %s",
is_upload?"upload":"download", fip->name, why
);
fip->error_msg = why;
xfer_failed(why);
fip->delete_file();
} else {
if (is_upload) {
@ -315,7 +327,7 @@ void PERS_FILE_XFER::handle_xfer_failure() {
retry_or_backoff();
return;
} else {
check_giveup("file was not found on server");
try_next_url("file was not found on server");
return;
}
}
@ -323,7 +335,7 @@ void PERS_FILE_XFER::handle_xfer_failure() {
// See if it's time to give up on the persistent file xfer
//
if ((gstate.now - first_request_time) > gstate.file_xfer_giveup_period) {
check_giveup("too much elapsed time");
try_next_url("too much elapsed time");
} else {
retry_or_backoff();
}

View File

@ -111,7 +111,8 @@ public:
bool poll();
void handle_xfer_failure();
void retry_or_backoff();
void check_giveup(const char*);
void xfer_failed(const char*);
void try_next_url(const char*);
void abort();
int write(MIOFILE& fout);
int parse(MIOFILE& fin);