diff --git a/checkin_notes b/checkin_notes index 663094da70..8a665eb852 100644 --- a/checkin_notes +++ b/checkin_notes @@ -3563,3 +3563,22 @@ Charlie 16 Jun 2011 installerv2/ redist/ all_projects_list.xml + +David 16 Jun 2011 + - client: we were assuming that if we ask a task to exit + and its main process exits, everything is OK. + That's not necessarily the case - buggy apps may have + subprocesses that the main process fails to kill. + + Solution: when we request a task to exit or abort, + make a list of the descendants. + When the main process exits, kill any remaining descendants. + + Also: we weren't checking for the ABORT_PENDING case + in the process exit logic. + This may explain the 5/15 second delay in detaching or + resetting a project with running tasks + + client/ + app.cpp,h + app_control.cpp diff --git a/client/app.cpp b/client/app.cpp index 7446c19d99..2274c91c77 100644 --- a/client/app.cpp +++ b/client/app.cpp @@ -163,7 +163,6 @@ int ACTIVE_TASK::preempt(int preempt_type) { result->name ); } - set_task_state(PROCESS_QUIT_PENDING, "preempt"); retval = request_exit(); } else { if (log_flags.cpu_sched) { diff --git a/client/app.h b/client/app.h index 07c8b687d8..cb246806ed 100644 --- a/client/app.h +++ b/client/app.h @@ -187,27 +187,50 @@ struct ACTIVE_TASK { // disk used by output files and temp files of this task void get_free_slot(RESULT*); int start(bool first_time); // start a process - int request_exit(); - // ask the process to exit gracefully, - // i.e. by sending a message - int request_abort(); // send "abort" message - bool process_exists(); + // Termination stuff. + // Terminology: + // "kill": forcibly kill the main process and all its descendants. + // (note: on Windows secure mode, we can't kill the descendants) + // "request exit": send a request-exit message, and enumerate descendants. + // If after 15 secs any processes remain, kill them + // called from: + // task preemption + // project detach or reset + // implementation: + // sends msg, sets quit_time, state QUIT_PENDING; + // get list of descendants + // normal exit handled in handle_premature_exit() + // timeout handled in ACTIVE_TASK_SET::poll() + // "abort_task": like request exit, + // but the app is supposed to write a stack trace to stderr + // called from: rsc exceeded; got ack of running task; + // intermediate upload failure + // client exiting w/ abort_jobs_on_exit set + // + int request_exit(); + int request_abort(); int kill_task(bool restart); // Kill process forcibly, // otherwise it ends with an error // Unix: send a SIGKILL signal, Windows: TerminateProcess() - // if restart is true, arrange for resulted to get restarted; + // if restart is true, arrange for result to get restarted; + int abort_task(int exit_status, const char*); + // can be called whether or not process exists + + + // Implementation stuff related to termination + // + std::vector descendants; + bool process_exists(); + bool has_task_exited(); + // return true if this task has exited int suspend(); // tell a process to stop executing (but stay in mem) // Done by sending it a message int unsuspend(); // Undo a suspend: send a message - int abort_task(int exit_status, const char*); - // can be called whether or not process exists - bool has_task_exited(); - // return true if this task has exited int preempt(int preempt_type); // preempt (via suspend or quit) a running task int resume_or_start(bool); diff --git a/client/app_control.cpp b/client/app_control.cpp index 1b81522f7a..e3767844a2 100644 --- a/client/app_control.cpp +++ b/client/app_control.cpp @@ -154,7 +154,10 @@ int ACTIVE_TASK::request_exit() { "", app_client_shm.shm->process_control_request ); + set_task_state(PROCESS_QUIT_PENDING, "request_exit()"); quit_time = gstate.now; + descendants.clear(); + get_descendants(pid, descendants); return 0; } @@ -183,6 +186,12 @@ static void kill_app_process(int pid) { #endif } +static inline void kill_processes(vector pids) { + for (unsigned int i=0; i