- client: we were assuming that if we ask a task to exit

and its main process exits, everything is OK.
    That's not necessarily the case - buggy apps may have
    subprocesses that the main process fails to kill.

    Solution: when we request a task to exit or abort,
    make a list of the descendants.
    When the main process exits, kill any remaining descendants.
    
    Also: we weren't checking for the ABORT_PENDING case
    in the process exit logic.
    This may explain the 5/15 second delay in detaching or
    resetting a project with running tasks


svn path=/trunk/boinc/; revision=23738
This commit is contained in:
David Anderson 2011-06-17 04:18:28 +00:00
parent e456ede797
commit c1bf16f7f3
4 changed files with 74 additions and 19 deletions

View File

@ -3563,3 +3563,22 @@ Charlie 16 Jun 2011
installerv2/
redist/
all_projects_list.xml
David 16 Jun 2011
- client: we were assuming that if we ask a task to exit
and its main process exits, everything is OK.
That's not necessarily the case - buggy apps may have
subprocesses that the main process fails to kill.
Solution: when we request a task to exit or abort,
make a list of the descendants.
When the main process exits, kill any remaining descendants.
Also: we weren't checking for the ABORT_PENDING case
in the process exit logic.
This may explain the 5/15 second delay in detaching or
resetting a project with running tasks
client/
app.cpp,h
app_control.cpp

View File

@ -163,7 +163,6 @@ int ACTIVE_TASK::preempt(int preempt_type) {
result->name
);
}
set_task_state(PROCESS_QUIT_PENDING, "preempt");
retval = request_exit();
} else {
if (log_flags.cpu_sched) {

View File

@ -187,27 +187,50 @@ struct ACTIVE_TASK {
// disk used by output files and temp files of this task
void get_free_slot(RESULT*);
int start(bool first_time); // start a process
int request_exit();
// ask the process to exit gracefully,
// i.e. by sending a <quit> message
int request_abort(); // send "abort" message
bool process_exists();
// Termination stuff.
// Terminology:
// "kill": forcibly kill the main process and all its descendants.
// (note: on Windows secure mode, we can't kill the descendants)
// "request exit": send a request-exit message, and enumerate descendants.
// If after 15 secs any processes remain, kill them
// called from:
// task preemption
// project detach or reset
// implementation:
// sends msg, sets quit_time, state QUIT_PENDING;
// get list of descendants
// normal exit handled in handle_premature_exit()
// timeout handled in ACTIVE_TASK_SET::poll()
// "abort_task": like request exit,
// but the app is supposed to write a stack trace to stderr
// called from: rsc exceeded; got ack of running task;
// intermediate upload failure
// client exiting w/ abort_jobs_on_exit set
//
int request_exit();
int request_abort();
int kill_task(bool restart);
// Kill process forcibly,
// otherwise it ends with an error
// Unix: send a SIGKILL signal, Windows: TerminateProcess()
// if restart is true, arrange for resulted to get restarted;
// if restart is true, arrange for result to get restarted;
int abort_task(int exit_status, const char*);
// can be called whether or not process exists
// Implementation stuff related to termination
//
std::vector<int> descendants;
bool process_exists();
bool has_task_exited();
// return true if this task has exited
int suspend();
// tell a process to stop executing (but stay in mem)
// Done by sending it a <suspend> message
int unsuspend();
// Undo a suspend: send a <resume> message
int abort_task(int exit_status, const char*);
// can be called whether or not process exists
bool has_task_exited();
// return true if this task has exited
int preempt(int preempt_type);
// preempt (via suspend or quit) a running task
int resume_or_start(bool);

View File

@ -154,7 +154,10 @@ int ACTIVE_TASK::request_exit() {
"<quit/>",
app_client_shm.shm->process_control_request
);
set_task_state(PROCESS_QUIT_PENDING, "request_exit()");
quit_time = gstate.now;
descendants.clear();
get_descendants(pid, descendants);
return 0;
}
@ -183,6 +186,12 @@ static void kill_app_process(int pid) {
#endif
}
static inline void kill_processes(vector<int> pids) {
for (unsigned int i=0; i<pids.size(); i++) {
kill_app_process(pids[i]);
}
}
// Kill the task (and descendants) by OS-specific means.
//
int ACTIVE_TASK::kill_task(bool restart) {
@ -200,9 +209,7 @@ int ACTIVE_TASK::kill_task(bool restart) {
#endif
get_descendants(pid, pids);
pids.push_back(pid);
for (unsigned int i=0; i<pids.size(); i++) {
kill_app_process(pids[i]);
}
kill_processes(pids);
cleanup_task();
if (restart) {
set_task_state(PROCESS_UNINITIALIZED, "kill_task");
@ -279,14 +286,19 @@ static void clear_backoffs(ACTIVE_TASK* atp) {
}
}
// handle a task that exited prematurely (i.e. the job isn't done)
// handle a task that exited prematurely (i.e. no finish file)
//
void ACTIVE_TASK::handle_premature_exit(bool& will_restart) {
// if it exited because we sent it a quit message, don't count
//
if (task_state() == PROCESS_QUIT_PENDING) {
switch (task_state()) {
case PROCESS_QUIT_PENDING:
set_task_state(PROCESS_UNINITIALIZED, "handle_premature_exit");
will_restart = true;
kill_processes(descendants);
return;
case PROCESS_ABORT_PENDING:
set_task_state(PROCESS_UNINITIALIZED, "handle_premature_exit");
will_restart = false;
kill_processes(descendants);
return;
}
@ -745,6 +757,8 @@ int ACTIVE_TASK::abort_task(int exit_status, const char* msg) {
set_task_state(PROCESS_ABORT_PENDING, "abort_task");
abort_time = gstate.now;
request_abort();
descendants.clear();
get_descendants(pid, descendants);
} else {
set_task_state(PROCESS_ABORTED, "abort_task");
}
@ -831,7 +845,7 @@ void ACTIVE_TASK_SET::request_reread_app_info() {
}
// send quit signal to all tasks in the project
// send quit message to all tasks in the project
// (or all tasks, if proj==0).
// If they don't exit in 5 seconds,
// send them a kill signal and wait up to 5 more seconds to exit.