mirror of https://github.com/BOINC/boinc.git
- client: we were assuming that if we ask a task to exit
and its main process exits, everything is OK. That's not necessarily the case - buggy apps may have subprocesses that the main process fails to kill. Solution: when we request a task to exit or abort, make a list of the descendants. When the main process exits, kill any remaining descendants. Also: we weren't checking for the ABORT_PENDING case in the process exit logic. This may explain the 5/15 second delay in detaching or resetting a project with running tasks svn path=/trunk/boinc/; revision=23738
This commit is contained in:
parent
e456ede797
commit
c1bf16f7f3
|
@ -3563,3 +3563,22 @@ Charlie 16 Jun 2011
|
|||
installerv2/
|
||||
redist/
|
||||
all_projects_list.xml
|
||||
|
||||
David 16 Jun 2011
|
||||
- client: we were assuming that if we ask a task to exit
|
||||
and its main process exits, everything is OK.
|
||||
That's not necessarily the case - buggy apps may have
|
||||
subprocesses that the main process fails to kill.
|
||||
|
||||
Solution: when we request a task to exit or abort,
|
||||
make a list of the descendants.
|
||||
When the main process exits, kill any remaining descendants.
|
||||
|
||||
Also: we weren't checking for the ABORT_PENDING case
|
||||
in the process exit logic.
|
||||
This may explain the 5/15 second delay in detaching or
|
||||
resetting a project with running tasks
|
||||
|
||||
client/
|
||||
app.cpp,h
|
||||
app_control.cpp
|
||||
|
|
|
@ -163,7 +163,6 @@ int ACTIVE_TASK::preempt(int preempt_type) {
|
|||
result->name
|
||||
);
|
||||
}
|
||||
set_task_state(PROCESS_QUIT_PENDING, "preempt");
|
||||
retval = request_exit();
|
||||
} else {
|
||||
if (log_flags.cpu_sched) {
|
||||
|
|
43
client/app.h
43
client/app.h
|
@ -187,27 +187,50 @@ struct ACTIVE_TASK {
|
|||
// disk used by output files and temp files of this task
|
||||
void get_free_slot(RESULT*);
|
||||
int start(bool first_time); // start a process
|
||||
int request_exit();
|
||||
// ask the process to exit gracefully,
|
||||
// i.e. by sending a <quit> message
|
||||
int request_abort(); // send "abort" message
|
||||
bool process_exists();
|
||||
|
||||
// Termination stuff.
|
||||
// Terminology:
|
||||
// "kill": forcibly kill the main process and all its descendants.
|
||||
// (note: on Windows secure mode, we can't kill the descendants)
|
||||
// "request exit": send a request-exit message, and enumerate descendants.
|
||||
// If after 15 secs any processes remain, kill them
|
||||
// called from:
|
||||
// task preemption
|
||||
// project detach or reset
|
||||
// implementation:
|
||||
// sends msg, sets quit_time, state QUIT_PENDING;
|
||||
// get list of descendants
|
||||
// normal exit handled in handle_premature_exit()
|
||||
// timeout handled in ACTIVE_TASK_SET::poll()
|
||||
// "abort_task": like request exit,
|
||||
// but the app is supposed to write a stack trace to stderr
|
||||
// called from: rsc exceeded; got ack of running task;
|
||||
// intermediate upload failure
|
||||
// client exiting w/ abort_jobs_on_exit set
|
||||
//
|
||||
int request_exit();
|
||||
int request_abort();
|
||||
int kill_task(bool restart);
|
||||
// Kill process forcibly,
|
||||
// otherwise it ends with an error
|
||||
// Unix: send a SIGKILL signal, Windows: TerminateProcess()
|
||||
// if restart is true, arrange for resulted to get restarted;
|
||||
// if restart is true, arrange for result to get restarted;
|
||||
int abort_task(int exit_status, const char*);
|
||||
// can be called whether or not process exists
|
||||
|
||||
|
||||
// Implementation stuff related to termination
|
||||
//
|
||||
std::vector<int> descendants;
|
||||
bool process_exists();
|
||||
bool has_task_exited();
|
||||
// return true if this task has exited
|
||||
|
||||
int suspend();
|
||||
// tell a process to stop executing (but stay in mem)
|
||||
// Done by sending it a <suspend> message
|
||||
int unsuspend();
|
||||
// Undo a suspend: send a <resume> message
|
||||
int abort_task(int exit_status, const char*);
|
||||
// can be called whether or not process exists
|
||||
bool has_task_exited();
|
||||
// return true if this task has exited
|
||||
int preempt(int preempt_type);
|
||||
// preempt (via suspend or quit) a running task
|
||||
int resume_or_start(bool);
|
||||
|
|
|
@ -154,7 +154,10 @@ int ACTIVE_TASK::request_exit() {
|
|||
"<quit/>",
|
||||
app_client_shm.shm->process_control_request
|
||||
);
|
||||
set_task_state(PROCESS_QUIT_PENDING, "request_exit()");
|
||||
quit_time = gstate.now;
|
||||
descendants.clear();
|
||||
get_descendants(pid, descendants);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -183,6 +186,12 @@ static void kill_app_process(int pid) {
|
|||
#endif
|
||||
}
|
||||
|
||||
static inline void kill_processes(vector<int> pids) {
|
||||
for (unsigned int i=0; i<pids.size(); i++) {
|
||||
kill_app_process(pids[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Kill the task (and descendants) by OS-specific means.
|
||||
//
|
||||
int ACTIVE_TASK::kill_task(bool restart) {
|
||||
|
@ -200,9 +209,7 @@ int ACTIVE_TASK::kill_task(bool restart) {
|
|||
#endif
|
||||
get_descendants(pid, pids);
|
||||
pids.push_back(pid);
|
||||
for (unsigned int i=0; i<pids.size(); i++) {
|
||||
kill_app_process(pids[i]);
|
||||
}
|
||||
kill_processes(pids);
|
||||
cleanup_task();
|
||||
if (restart) {
|
||||
set_task_state(PROCESS_UNINITIALIZED, "kill_task");
|
||||
|
@ -279,14 +286,19 @@ static void clear_backoffs(ACTIVE_TASK* atp) {
|
|||
}
|
||||
}
|
||||
|
||||
// handle a task that exited prematurely (i.e. the job isn't done)
|
||||
// handle a task that exited prematurely (i.e. no finish file)
|
||||
//
|
||||
void ACTIVE_TASK::handle_premature_exit(bool& will_restart) {
|
||||
// if it exited because we sent it a quit message, don't count
|
||||
//
|
||||
if (task_state() == PROCESS_QUIT_PENDING) {
|
||||
switch (task_state()) {
|
||||
case PROCESS_QUIT_PENDING:
|
||||
set_task_state(PROCESS_UNINITIALIZED, "handle_premature_exit");
|
||||
will_restart = true;
|
||||
kill_processes(descendants);
|
||||
return;
|
||||
case PROCESS_ABORT_PENDING:
|
||||
set_task_state(PROCESS_UNINITIALIZED, "handle_premature_exit");
|
||||
will_restart = false;
|
||||
kill_processes(descendants);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -745,6 +757,8 @@ int ACTIVE_TASK::abort_task(int exit_status, const char* msg) {
|
|||
set_task_state(PROCESS_ABORT_PENDING, "abort_task");
|
||||
abort_time = gstate.now;
|
||||
request_abort();
|
||||
descendants.clear();
|
||||
get_descendants(pid, descendants);
|
||||
} else {
|
||||
set_task_state(PROCESS_ABORTED, "abort_task");
|
||||
}
|
||||
|
@ -831,7 +845,7 @@ void ACTIVE_TASK_SET::request_reread_app_info() {
|
|||
}
|
||||
|
||||
|
||||
// send quit signal to all tasks in the project
|
||||
// send quit message to all tasks in the project
|
||||
// (or all tasks, if proj==0).
|
||||
// If they don't exit in 5 seconds,
|
||||
// send them a kill signal and wait up to 5 more seconds to exit.
|
||||
|
|
Loading…
Reference in New Issue