mirror of https://github.com/BOINC/boinc.git
*** empty log message ***
svn path=/trunk/boinc/; revision=10393
This commit is contained in:
parent
239013bb1d
commit
1fa2e3de0a
|
@ -6276,3 +6276,17 @@ Matt 17 June 2006
|
|||
html/inc
|
||||
forum.inc
|
||||
email.inc
|
||||
|
||||
David 18 June 2006
|
||||
- Don't parse ACTIVE_TASK::scheduler_state from the state file.
|
||||
This isn't carried across runs of BOINC.
|
||||
- Remove ACTIVE_TASK_SET::restart_tasks() and CLIENT_STATE::restart_tasks():
|
||||
just let the CPU scheduler take care of starting tasks.
|
||||
- If file upload fails because file is missing, don't retry,
|
||||
and show appropriate error message
|
||||
|
||||
client/
|
||||
app.C,h
|
||||
app_start.C
|
||||
client_state.C,h
|
||||
cs_apps.C
|
||||
|
|
13
client/app.C
13
client/app.C
|
@ -177,10 +177,9 @@ int ACTIVE_TASK::init(RESULT* rp) {
|
|||
max_disk_usage = rp->wup->rsc_disk_bound;
|
||||
max_mem_usage = rp->wup->rsc_memory_bound;
|
||||
non_cpu_intensive = rp->project->non_cpu_intensive;
|
||||
|
||||
strcpy(process_control_queue.name, rp->name);
|
||||
strcpy(graphics_request_queue.name, rp->name);
|
||||
|
||||
get_slot_dir(slot, slot_dir);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -385,7 +384,6 @@ int ACTIVE_TASK::parse(MIOFILE& fin) {
|
|||
|
||||
strcpy(result_name, "");
|
||||
strcpy(project_master_url, "");
|
||||
scheduler_state = CPU_SCHED_SCHEDULED;
|
||||
|
||||
while (fin.fgets(buf, 256)) {
|
||||
if (match_tag(buf, "</active_task>")) {
|
||||
|
@ -452,7 +450,6 @@ int ACTIVE_TASK::parse(MIOFILE& fin) {
|
|||
else if (parse_str(buf, "<project_master_url>", project_master_url, sizeof(project_master_url))) continue;
|
||||
else if (parse_int(buf, "<app_version_num>", app_version_num)) continue;
|
||||
else if (parse_int(buf, "<slot>", slot)) continue;
|
||||
else if (parse_int(buf, "<scheduler_state>", scheduler_state)) continue;
|
||||
else if (parse_double(buf, "<checkpoint_cpu_time>", checkpoint_cpu_time)) continue;
|
||||
else if (parse_double(buf, "<fraction_done>", fraction_done)) continue;
|
||||
else if (parse_double(buf, "<current_cpu_time>", current_cpu_time)) continue;
|
||||
|
@ -627,4 +624,12 @@ void ACTIVE_TASK_SET::upload_notify_app(FILE_INFO* fip) {
|
|||
}
|
||||
}
|
||||
|
||||
void ACTIVE_TASK_SET::init() {
|
||||
for (unsigned int i=0; i<active_tasks.size(); i++) {
|
||||
ACTIVE_TASK* atp = active_tasks[i];
|
||||
atp->init(atp->result);
|
||||
atp->scheduler_state = CPU_SCHED_PREEMPTED;
|
||||
}
|
||||
}
|
||||
|
||||
const char *BOINC_RCSID_778b61195e = "$Id$";
|
||||
|
|
|
@ -216,11 +216,11 @@ public:
|
|||
int remove(ACTIVE_TASK*);
|
||||
ACTIVE_TASK* lookup_pid(int);
|
||||
ACTIVE_TASK* lookup_result(RESULT*);
|
||||
void init();
|
||||
bool poll();
|
||||
void suspend_all(bool leave_apps_in_memory=true);
|
||||
void unsuspend_all();
|
||||
bool is_task_executing();
|
||||
int restart_tasks(int max_tasks);
|
||||
void request_tasks_exit(PROJECT* p=0);
|
||||
int wait_for_exit(double, PROJECT* p=0);
|
||||
int exit_tasks(PROJECT* p=0);
|
||||
|
|
|
@ -672,77 +672,6 @@ int ACTIVE_TASK::resume_or_start() {
|
|||
return 0;
|
||||
}
|
||||
|
||||
// Restart active tasks without wiping and reinitializing slot directories
|
||||
// Called at init, with max_tasks = ncpus
|
||||
//
|
||||
int ACTIVE_TASK_SET::restart_tasks(int max_tasks) {
|
||||
vector<ACTIVE_TASK*>::iterator iter;
|
||||
ACTIVE_TASK* atp;
|
||||
RESULT* result;
|
||||
int retval, num_tasks_started;
|
||||
|
||||
SCOPE_MSG_LOG scope_messages(log_messages, CLIENT_MSG_LOG::DEBUG_TASK);
|
||||
|
||||
num_tasks_started = 0;
|
||||
iter = active_tasks.begin();
|
||||
while (iter != active_tasks.end()) {
|
||||
atp = *iter;
|
||||
result = atp->result;
|
||||
atp->init(atp->result);
|
||||
get_slot_dir(atp->slot, atp->slot_dir);
|
||||
if (!gstate.input_files_available(result)) {
|
||||
msg_printf(atp->wup->project, MSG_ERROR,
|
||||
"Can't restart %s: missing files", atp->result->name
|
||||
);
|
||||
gstate.report_result_error(
|
||||
*(atp->result),
|
||||
"One or more missing files"
|
||||
);
|
||||
iter = active_tasks.erase(iter);
|
||||
delete atp;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (atp->scheduler_state != CPU_SCHED_SCHEDULED
|
||||
|| num_tasks_started >= max_tasks
|
||||
) {
|
||||
msg_printf(atp->wup->project, MSG_INFO,
|
||||
"Deferring task %s",
|
||||
atp->result->name
|
||||
);
|
||||
|
||||
atp->scheduler_state = CPU_SCHED_PREEMPTED;
|
||||
iter++;
|
||||
continue;
|
||||
}
|
||||
|
||||
msg_printf(atp->wup->project, MSG_INFO,
|
||||
"Resuming task %s using %s version %d",
|
||||
atp->result->name,
|
||||
atp->app_version->app->name,
|
||||
atp->app_version->version_num
|
||||
);
|
||||
retval = atp->start(false);
|
||||
|
||||
if (retval) {
|
||||
msg_printf(atp->wup->project, MSG_ERROR,
|
||||
"Restart of task %s failed: %s\n",
|
||||
atp->result->name, boincerror(retval)
|
||||
);
|
||||
gstate.report_result_error(
|
||||
*(atp->result),
|
||||
"Couldn't restart app: %d", retval
|
||||
);
|
||||
iter = active_tasks.erase(iter);
|
||||
delete atp;
|
||||
} else {
|
||||
++num_tasks_started;
|
||||
iter++;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if (defined(__APPLE__) && defined(__i386__))
|
||||
|
||||
union headeru {
|
||||
|
|
|
@ -260,8 +260,7 @@ int CLIENT_STATE::init() {
|
|||
|
||||
// set period start time and reschedule
|
||||
//
|
||||
must_schedule_cpus = true;
|
||||
must_enforce_cpu_schedule = true;
|
||||
request_schedule_cpus("Startup");
|
||||
cpu_sched_last_time = now;
|
||||
cpu_sched_last_check = now;
|
||||
|
||||
|
@ -271,6 +270,7 @@ int CLIENT_STATE::init() {
|
|||
retval = make_project_dirs();
|
||||
if (retval) return retval;
|
||||
|
||||
active_tasks.init();
|
||||
active_tasks.report_overdue();
|
||||
active_tasks.handle_upload_files();
|
||||
|
||||
|
@ -410,22 +410,6 @@ bool CLIENT_STATE::poll_slow_events() {
|
|||
|
||||
check_suspend_activities(suspend_reason);
|
||||
|
||||
#ifdef NEW_CPU_SCHED
|
||||
cpu_scheduler.make_schedule();
|
||||
#else
|
||||
// Restart tasks on startup.
|
||||
// Do this here (rather than CLIENT_STATE::init())
|
||||
// so that if we do benchmark on startup,
|
||||
// we don't immediately suspend apps
|
||||
// (this fixes a CPDN problem where quitting the app
|
||||
// right after start kills it)
|
||||
//
|
||||
if (!suspend_reason && !tasks_restarted) {
|
||||
restart_tasks();
|
||||
tasks_restarted = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
// suspend or resume activities (but only if already did startup)
|
||||
//
|
||||
if (tasks_restarted) {
|
||||
|
@ -507,6 +491,7 @@ bool CLIENT_STATE::poll_slow_events() {
|
|||
if (!tasks_suspended) {
|
||||
POLL_ACTION(possibly_schedule_cpus, possibly_schedule_cpus );
|
||||
POLL_ACTION(enforce_schedule , enforce_schedule );
|
||||
tasks_restarted = true;
|
||||
}
|
||||
#endif
|
||||
if (!network_suspended) {
|
||||
|
|
|
@ -314,7 +314,6 @@ public:
|
|||
// - an app fails to start (CS::schedule_cpus())
|
||||
// - any project op is done via RPC (suspend/resume)
|
||||
// - any result op is done via RPC (suspend/resume)
|
||||
int restart_tasks();
|
||||
int quit_activities();
|
||||
void set_ncpus();
|
||||
double estimate_cpu_time(WORKUNIT&);
|
||||
|
|
|
@ -360,33 +360,6 @@ bool CLIENT_STATE::possibly_schedule_cpus() {
|
|||
cpu_sched_last_check = now;
|
||||
}
|
||||
|
||||
#if 0 // THE FOLLOWING SHOULD NOT BE NECESSARY
|
||||
|
||||
// if the count of running tasks is not either ncpus
|
||||
// or the count of runnable results a re-schedule is mandatory.
|
||||
//
|
||||
if (!must_schedule_cpus) {
|
||||
int count_running_tasks = 0;
|
||||
for (i=0; i<active_tasks.active_tasks.size(); i++) {
|
||||
if (!active_tasks.active_tasks[i] || !active_tasks.active_tasks[i]->result) continue;
|
||||
if (CPU_SCHED_SCHEDULED != active_tasks.active_tasks[i]->scheduler_state) continue;
|
||||
if (active_tasks.active_tasks[i]->result->project->non_cpu_intensive) continue;
|
||||
count_running_tasks++;
|
||||
}
|
||||
if (count_running_tasks != ncpus){
|
||||
int count_runnable_results = 0;
|
||||
for (i=0; i<results.size(); i++) {
|
||||
if (!results[i]->runnable()) continue;
|
||||
if (results[i]->project->non_cpu_intensive) continue;
|
||||
count_runnable_results++;
|
||||
}
|
||||
if (count_running_tasks != count_runnable_results) {
|
||||
must_schedule_cpus = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!must_schedule_cpus) return false;
|
||||
must_schedule_cpus = false;
|
||||
schedule_cpus();
|
||||
|
|
|
@ -234,9 +234,8 @@ int CLIENT_STATE::schedule_result(RESULT* rp) {
|
|||
ACTIVE_TASK *atp = lookup_active_task_by_result(rp);
|
||||
if (!atp) {
|
||||
atp = new ACTIVE_TASK;
|
||||
atp->init(rp);
|
||||
atp->slot = active_tasks.get_free_slot();
|
||||
get_slot_dir(atp->slot, atp->slot_dir);
|
||||
atp->init(rp);
|
||||
active_tasks.active_tasks.push_back(atp);
|
||||
}
|
||||
atp->next_scheduler_state = CPU_SCHED_SCHEDULED;
|
||||
|
@ -295,14 +294,6 @@ double CLIENT_STATE::nearly_runnable_resource_share() {
|
|||
return x;
|
||||
}
|
||||
|
||||
|
||||
// This is called when the client is initialized.
|
||||
// Try to restart any tasks that were running when we last shut down.
|
||||
//
|
||||
int CLIENT_STATE::restart_tasks() {
|
||||
return active_tasks.restart_tasks(ncpus);
|
||||
}
|
||||
|
||||
void CLIENT_STATE::set_ncpus() {
|
||||
if (ncpus == 0) ncpus = host_info.p_ncpus;
|
||||
|
||||
|
|
|
@ -227,7 +227,15 @@ bool PERS_FILE_XFER::poll() {
|
|||
is_upload?"upload":"download", fip->name
|
||||
);
|
||||
}
|
||||
check_giveup("server rejected file");
|
||||
try_next_url("server rejected file");
|
||||
} else if (fxp->file_xfer_retval == ERR_NOT_FOUND) {
|
||||
if (log_flags.file_xfer) {
|
||||
msg_printf(
|
||||
fip->project, MSG_INFO, "Permanently failed %s of %s",
|
||||
is_upload?"upload":"download", fip->name
|
||||
);
|
||||
}
|
||||
try_next_url("File not found on client");
|
||||
} else {
|
||||
if (log_flags.file_xfer) {
|
||||
msg_printf(
|
||||
|
@ -239,7 +247,7 @@ bool PERS_FILE_XFER::poll() {
|
|||
handle_xfer_failure();
|
||||
}
|
||||
|
||||
// fxp could have already been freed and zeroed by check_giveup
|
||||
// fxp could have already been freed and zeroed by try_next_url
|
||||
// so check before trying to remove
|
||||
//
|
||||
if (fxp) {
|
||||
|
@ -252,6 +260,23 @@ bool PERS_FILE_XFER::poll() {
|
|||
return false;
|
||||
}
|
||||
|
||||
void PERS_FILE_XFER::xfer_failed(const char* why) {
|
||||
gstate.file_xfers->remove(fxp);
|
||||
delete fxp;
|
||||
fxp = NULL;
|
||||
if (is_upload) {
|
||||
fip->status = ERR_GIVEUP_UPLOAD;
|
||||
} else {
|
||||
fip->status = ERR_GIVEUP_DOWNLOAD;
|
||||
}
|
||||
pers_xfer_done = true;
|
||||
msg_printf(
|
||||
fip->project, MSG_ERROR, "Giving up on %s of %s: %s",
|
||||
is_upload?"upload":"download", fip->name, why
|
||||
);
|
||||
fip->error_msg = why;
|
||||
}
|
||||
|
||||
// A file transfer (to a particular server)
|
||||
// has had a failure
|
||||
// TODO ?? transient ? permanent? terminology??
|
||||
|
@ -266,22 +291,9 @@ bool PERS_FILE_XFER::poll() {
|
|||
// If there are more URLs to try, the file_xfer is restarted with these new
|
||||
// urls until a good transfer is made or it completely gives up.
|
||||
//
|
||||
void PERS_FILE_XFER::check_giveup(const char* why) {
|
||||
void PERS_FILE_XFER::try_next_url(const char* why) {
|
||||
if (fip->get_next_url(fip->upload_when_present) == NULL) {
|
||||
gstate.file_xfers->remove(fxp);
|
||||
delete fxp;
|
||||
fxp = NULL;
|
||||
if (is_upload) {
|
||||
fip->status = ERR_GIVEUP_UPLOAD;
|
||||
} else {
|
||||
fip->status = ERR_GIVEUP_DOWNLOAD;
|
||||
}
|
||||
pers_xfer_done = true;
|
||||
msg_printf(
|
||||
fip->project, MSG_ERROR, "Giving up on %s of %s: %s",
|
||||
is_upload?"upload":"download", fip->name, why
|
||||
);
|
||||
fip->error_msg = why;
|
||||
xfer_failed(why);
|
||||
fip->delete_file();
|
||||
} else {
|
||||
if (is_upload) {
|
||||
|
@ -315,7 +327,7 @@ void PERS_FILE_XFER::handle_xfer_failure() {
|
|||
retry_or_backoff();
|
||||
return;
|
||||
} else {
|
||||
check_giveup("file was not found on server");
|
||||
try_next_url("file was not found on server");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -323,7 +335,7 @@ void PERS_FILE_XFER::handle_xfer_failure() {
|
|||
// See if it's time to give up on the persistent file xfer
|
||||
//
|
||||
if ((gstate.now - first_request_time) > gstate.file_xfer_giveup_period) {
|
||||
check_giveup("too much elapsed time");
|
||||
try_next_url("too much elapsed time");
|
||||
} else {
|
||||
retry_or_backoff();
|
||||
}
|
||||
|
|
|
@ -111,7 +111,8 @@ public:
|
|||
bool poll();
|
||||
void handle_xfer_failure();
|
||||
void retry_or_backoff();
|
||||
void check_giveup(const char*);
|
||||
void xfer_failed(const char*);
|
||||
void try_next_url(const char*);
|
||||
void abort();
|
||||
int write(MIOFILE& fout);
|
||||
int parse(MIOFILE& fin);
|
||||
|
|
Loading…
Reference in New Issue