2003-07-01 20:37:09 +00:00
|
|
|
// The contents of this file are subject to the BOINC Public License
|
2002-04-30 22:22:54 +00:00
|
|
|
// Version 1.0 (the "License"); you may not use this file except in
|
|
|
|
// compliance with the License. You may obtain a copy of the License at
|
2003-07-01 20:37:09 +00:00
|
|
|
// http://boinc.berkeley.edu/license_1.0.txt
|
2003-07-02 02:02:18 +00:00
|
|
|
//
|
2002-04-30 22:22:54 +00:00
|
|
|
// Software distributed under the License is distributed on an "AS IS"
|
|
|
|
// basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
|
|
|
// License for the specific language governing rights and limitations
|
2003-07-02 02:02:18 +00:00
|
|
|
// under the License.
|
|
|
|
//
|
|
|
|
// The Original Code is the Berkeley Open Infrastructure for Network Computing.
|
|
|
|
//
|
2002-04-30 22:22:54 +00:00
|
|
|
// The Initial Developer of the Original Code is the SETI@home project.
|
2003-07-02 20:57:59 +00:00
|
|
|
// Portions created by the SETI@home project are Copyright (C) 2002
|
2003-07-02 02:02:18 +00:00
|
|
|
// University of California at Berkeley. All Rights Reserved.
|
|
|
|
//
|
2002-04-30 22:22:54 +00:00
|
|
|
// Contributor(s):
|
|
|
|
//
|
|
|
|
|
|
|
|
// Abstraction of a set of executing applications,
|
|
|
|
// connected to I/O files in various ways.
|
|
|
|
// Shouldn't depend on CLIENT_STATE.
|
|
|
|
|
2002-06-06 18:42:01 +00:00
|
|
|
#include "windows_cpp.h"
|
2002-07-11 01:09:53 +00:00
|
|
|
#include "error_numbers.h"
|
2002-06-06 18:42:01 +00:00
|
|
|
|
|
|
|
#ifdef _WIN32
|
2002-06-20 23:46:51 +00:00
|
|
|
#include <io.h>
|
2002-10-06 00:43:54 +00:00
|
|
|
#include <afxwin.h>
|
2002-07-05 19:20:00 +00:00
|
|
|
#endif
|
|
|
|
#if HAVE_UNISTD_H
|
2002-04-30 22:22:54 +00:00
|
|
|
#include <unistd.h>
|
2002-07-05 19:20:00 +00:00
|
|
|
#endif
|
|
|
|
#if HAVE_SYS_WAIT_H
|
2002-04-30 22:22:54 +00:00
|
|
|
#include <sys/wait.h>
|
2002-07-05 19:20:00 +00:00
|
|
|
#endif
|
|
|
|
#if HAVE_SYS_TIME_H
|
2002-05-22 15:52:26 +00:00
|
|
|
#include <sys/time.h>
|
2002-07-05 19:20:00 +00:00
|
|
|
#endif
|
|
|
|
#if HAVE_SYS_RESOURCE_H
|
2002-04-30 22:22:54 +00:00
|
|
|
#include <sys/resource.h>
|
2002-06-06 18:42:01 +00:00
|
|
|
#endif
|
2002-07-05 19:20:00 +00:00
|
|
|
#if HAVE_SYS_TYPES_H
|
2002-06-06 18:42:01 +00:00
|
|
|
#include <sys/types.h>
|
2002-07-05 19:20:00 +00:00
|
|
|
#endif
|
|
|
|
#if HAVE_SYS_SIGNAL_H
|
2002-07-01 18:16:31 +00:00
|
|
|
#include <sys/signal.h>
|
2002-07-05 19:20:00 +00:00
|
|
|
#endif
|
2003-03-20 19:16:28 +00:00
|
|
|
#if HAVE_SYS_IPC_H
|
|
|
|
#include <sys/ipc.h>
|
|
|
|
#endif
|
2002-07-05 19:20:00 +00:00
|
|
|
#if HAVE_FCNTL_H
|
2002-04-30 22:22:54 +00:00
|
|
|
#include <fcntl.h>
|
2002-07-05 19:20:00 +00:00
|
|
|
#endif
|
|
|
|
#if HAVE_SIGNAL_H
|
2002-04-30 22:22:54 +00:00
|
|
|
#include <signal.h>
|
2002-07-05 19:20:00 +00:00
|
|
|
#endif
|
2002-07-15 23:21:20 +00:00
|
|
|
|
|
|
|
#include <ctype.h>
|
2002-06-06 18:42:01 +00:00
|
|
|
#include <time.h>
|
2002-06-20 23:46:51 +00:00
|
|
|
#include <stdio.h>
|
2003-06-06 21:23:14 +00:00
|
|
|
#include <math.h>
|
2002-06-20 23:46:51 +00:00
|
|
|
#include <stdlib.h>
|
2002-04-30 22:22:54 +00:00
|
|
|
|
|
|
|
#include "client_state.h"
|
2002-07-15 23:21:20 +00:00
|
|
|
#include "client_types.h"
|
2002-04-30 22:22:54 +00:00
|
|
|
#include "filesys.h"
|
|
|
|
#include "file_names.h"
|
|
|
|
#include "parse.h"
|
2003-03-17 19:24:38 +00:00
|
|
|
#include "shmem.h"
|
2002-07-22 23:07:14 +00:00
|
|
|
#include "util.h"
|
2002-04-30 22:22:54 +00:00
|
|
|
|
|
|
|
#include "app.h"
|
2002-07-31 19:05:15 +00:00
|
|
|
|
2003-08-11 03:33:14 +00:00
|
|
|
// value for setpriority(2)
|
|
|
|
static const int PROCESS_IDLE_PRIORITY = 19;
|
|
|
|
|
2002-07-15 23:21:20 +00:00
|
|
|
// Goes through an array of strings, and prints each string
|
|
|
|
//
|
2003-07-02 02:02:18 +00:00
|
|
|
static int debug_print_argv(char** argv) {
|
2002-04-30 22:22:54 +00:00
|
|
|
int i;
|
2002-07-15 23:21:20 +00:00
|
|
|
|
2003-07-02 02:02:18 +00:00
|
|
|
log_messages.printf(ClientMessages::DEBUG_TASK, "Arguments:");
|
|
|
|
++log_messages;
|
2002-04-30 22:22:54 +00:00
|
|
|
for (i=0; argv[i]; i++) {
|
2003-07-02 02:02:18 +00:00
|
|
|
log_messages.printf(ClientMessages::DEBUG_TASK,
|
|
|
|
"argv[%d]: %s\n", i, argv[i]);
|
2002-04-30 22:22:54 +00:00
|
|
|
}
|
2003-07-02 02:02:18 +00:00
|
|
|
--log_messages;
|
2002-07-15 23:21:20 +00:00
|
|
|
|
|
|
|
return 0;
|
2002-04-30 22:22:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
ACTIVE_TASK::ACTIVE_TASK() {
|
|
|
|
result = NULL;
|
|
|
|
wup = NULL;
|
|
|
|
app_version = NULL;
|
2002-07-15 23:21:20 +00:00
|
|
|
pid = 0;
|
2002-04-30 22:22:54 +00:00
|
|
|
slot = 0;
|
2002-07-15 23:21:20 +00:00
|
|
|
state = PROCESS_UNINITIALIZED;
|
2002-04-30 22:22:54 +00:00
|
|
|
exit_status = 0;
|
|
|
|
signal = 0;
|
2003-03-19 01:15:46 +00:00
|
|
|
strcpy(slot_dir, "");
|
2003-05-09 20:33:57 +00:00
|
|
|
graphics_requested_mode = MODE_HIDE_GRAPHICS;
|
|
|
|
graphics_request_time = time(0);
|
|
|
|
graphics_acked_mode = MODE_UNSUPPORTED;
|
|
|
|
graphics_mode_before_ss = MODE_HIDE_GRAPHICS;
|
2003-05-28 19:56:53 +00:00
|
|
|
last_status_msg_time = 0;
|
|
|
|
current_cpu_time = working_set_size = 0;
|
2003-08-13 20:00:19 +00:00
|
|
|
|
|
|
|
fraction_done = 0;
|
|
|
|
frac_rate_of_change = 0;
|
|
|
|
last_frac_done = 0;
|
|
|
|
recent_change = 0;
|
|
|
|
last_frac_update = 0;
|
|
|
|
starting_cpu_time = 0;
|
|
|
|
checkpoint_cpu_time = 0;
|
|
|
|
current_cpu_time = 0;
|
|
|
|
working_set_size = 0;
|
2002-04-30 22:22:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int ACTIVE_TASK::init(RESULT* rp) {
|
|
|
|
result = rp;
|
|
|
|
wup = rp->wup;
|
|
|
|
app_version = wup->avp;
|
2003-09-08 06:01:41 +00:00
|
|
|
max_cpu_time = rp->wup->rsc_fpops_bound/gstate.host_info.p_fpops;
|
2003-09-04 00:41:51 +00:00
|
|
|
max_disk_usage = rp->wup->rsc_disk_bound;
|
|
|
|
max_mem_usage = rp->wup->rsc_memory_bound;
|
2003-07-02 02:02:18 +00:00
|
|
|
|
2002-04-30 22:22:54 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2003-03-06 00:42:18 +00:00
|
|
|
// Start a task in a slot directory.
|
|
|
|
// This includes setting up soft links,
|
2002-08-05 00:29:34 +00:00
|
|
|
// passing preferences, and starting the process
|
|
|
|
//
|
2002-08-20 00:30:13 +00:00
|
|
|
// Current dir is top-level BOINC dir
|
2002-07-15 23:21:20 +00:00
|
|
|
//
|
2002-04-30 22:22:54 +00:00
|
|
|
int ACTIVE_TASK::start(bool first_time) {
|
2003-03-13 21:49:52 +00:00
|
|
|
char exec_name[256], file_path[256], link_path[256], buf[256], exec_path[256];
|
2002-04-30 22:22:54 +00:00
|
|
|
unsigned int i;
|
2002-05-17 22:33:57 +00:00
|
|
|
FILE_REF file_ref;
|
|
|
|
FILE_INFO* fip;
|
2002-10-24 08:25:42 +00:00
|
|
|
int retval;
|
2002-09-11 21:41:42 +00:00
|
|
|
char init_data_path[256], graphics_data_path[256], fd_init_path[256];
|
2002-08-24 00:41:25 +00:00
|
|
|
FILE *f;
|
2002-08-05 00:29:34 +00:00
|
|
|
APP_INIT_DATA aid;
|
2002-09-17 21:54:59 +00:00
|
|
|
GRAPHICS_INFO gi;
|
2002-08-05 00:29:34 +00:00
|
|
|
|
2003-07-02 02:02:18 +00:00
|
|
|
ScopeMessages scope_messages(log_messages, ClientMessages::DEBUG_TASK);
|
|
|
|
scope_messages.printf("ACTIVE_TASK::start(first_time=%d)\n", first_time);
|
|
|
|
|
2002-08-05 00:29:34 +00:00
|
|
|
if (first_time) {
|
|
|
|
checkpoint_cpu_time = 0;
|
|
|
|
}
|
|
|
|
current_cpu_time = checkpoint_cpu_time;
|
|
|
|
starting_cpu_time = checkpoint_cpu_time;
|
|
|
|
fraction_done = 0;
|
|
|
|
|
2002-11-18 21:20:54 +00:00
|
|
|
gi.xsize = 800;
|
|
|
|
gi.ysize = 600;
|
2002-09-11 21:41:42 +00:00
|
|
|
gi.refresh_period = 0.1;
|
2002-08-05 00:29:34 +00:00
|
|
|
|
|
|
|
memset(&aid, 0, sizeof(aid));
|
2002-08-22 20:19:18 +00:00
|
|
|
|
2003-09-27 17:53:43 +00:00
|
|
|
safe_strcpy(aid.app_name, wup->app->name);
|
2003-04-03 18:35:40 +00:00
|
|
|
safe_strcpy(aid.user_name, wup->project->user_name);
|
|
|
|
safe_strcpy(aid.team_name, wup->project->team_name);
|
2003-02-24 21:25:16 +00:00
|
|
|
if (wup->project->project_specific_prefs) {
|
2003-03-06 00:42:18 +00:00
|
|
|
extract_venue(
|
2003-02-24 21:25:16 +00:00
|
|
|
wup->project->project_specific_prefs,
|
2003-03-06 00:42:18 +00:00
|
|
|
gstate.host_venue,
|
|
|
|
aid.app_preferences
|
2003-02-24 21:25:16 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
aid.user_total_credit = wup->project->user_total_credit;
|
|
|
|
aid.user_expavg_credit = wup->project->user_expavg_credit;
|
|
|
|
aid.host_total_credit = wup->project->host_total_credit;
|
|
|
|
aid.host_expavg_credit = wup->project->host_expavg_credit;
|
2003-03-20 02:05:25 +00:00
|
|
|
aid.checkpoint_period = gstate.global_prefs.disk_interval;
|
2002-08-05 00:29:34 +00:00
|
|
|
aid.fraction_done_update_period = DEFAULT_FRACTION_DONE_UPDATE_PERIOD;
|
2003-03-17 19:24:38 +00:00
|
|
|
aid.shm_key = 0;
|
2002-08-05 00:29:34 +00:00
|
|
|
aid.wu_cpu_time = checkpoint_cpu_time;
|
|
|
|
|
2002-08-24 00:41:25 +00:00
|
|
|
sprintf(init_data_path, "%s%s%s", slot_dir, PATH_SEPARATOR, INIT_DATA_FILE);
|
|
|
|
f = fopen(init_data_path, "w");
|
|
|
|
if (!f) {
|
2003-10-02 18:51:38 +00:00
|
|
|
msg_printf(wup->project, MSG_ERROR,
|
|
|
|
"Failed to open core-to-app prefs file %s",
|
|
|
|
init_data_path
|
|
|
|
);
|
2002-07-15 23:21:20 +00:00
|
|
|
return ERR_FOPEN;
|
2002-06-10 22:59:15 +00:00
|
|
|
}
|
2003-05-07 23:42:17 +00:00
|
|
|
|
2003-05-09 20:33:57 +00:00
|
|
|
// make a unique key for core/app shared memory segment
|
|
|
|
//
|
2003-05-07 23:42:17 +00:00
|
|
|
#ifdef _WIN32
|
|
|
|
sprintf(aid.comm_obj_name, "boinc_%d", slot);
|
2003-06-06 17:43:45 +00:00
|
|
|
#elif HAVE_SYS_IPC_H
|
2003-04-01 03:18:12 +00:00
|
|
|
aid.shm_key = ftok(init_data_path, slot);
|
|
|
|
#else
|
2003-06-06 17:43:45 +00:00
|
|
|
#error shared memory key generation unimplemented
|
2003-03-17 19:24:38 +00:00
|
|
|
#endif
|
2003-05-07 23:42:17 +00:00
|
|
|
|
2003-03-08 23:48:05 +00:00
|
|
|
retval = write_init_data_file(f, aid);
|
|
|
|
if (retval) return retval;
|
2003-07-02 02:02:18 +00:00
|
|
|
|
2002-08-24 00:41:25 +00:00
|
|
|
fclose(f);
|
2002-06-08 00:55:25 +00:00
|
|
|
|
2002-09-11 21:41:42 +00:00
|
|
|
sprintf(graphics_data_path, "%s%s%s", slot_dir, PATH_SEPARATOR, GRAPHICS_DATA_FILE);
|
|
|
|
f = fopen(graphics_data_path, "w");
|
|
|
|
if (!f) {
|
2003-10-02 18:51:38 +00:00
|
|
|
msg_printf(wup->project, MSG_ERROR,
|
|
|
|
"Failed to open core-to-app graphics prefs file %s",
|
|
|
|
graphics_data_path
|
|
|
|
);
|
2002-09-11 21:41:42 +00:00
|
|
|
return ERR_FOPEN;
|
|
|
|
}
|
2002-09-17 21:54:59 +00:00
|
|
|
retval = write_graphics_file(f, &gi);
|
2002-09-11 21:41:42 +00:00
|
|
|
fclose(f);
|
|
|
|
|
2002-08-24 00:41:25 +00:00
|
|
|
sprintf(fd_init_path, "%s%s%s", slot_dir, PATH_SEPARATOR, FD_INIT_FILE);
|
|
|
|
f = fopen(fd_init_path, "w");
|
|
|
|
if (!f) {
|
2003-06-03 22:47:15 +00:00
|
|
|
msg_printf(wup->project, MSG_ERROR, "Failed to open init file %s", fd_init_path);
|
2002-07-15 23:21:20 +00:00
|
|
|
return ERR_FOPEN;
|
2002-06-20 23:46:51 +00:00
|
|
|
}
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2002-08-05 00:29:34 +00:00
|
|
|
// make soft links to the executable(s)
|
2002-06-20 23:46:51 +00:00
|
|
|
//
|
|
|
|
for (i=0; i<app_version->app_files.size(); i++) {
|
2003-05-20 00:03:39 +00:00
|
|
|
FILE_REF fref = app_version->app_files[i];
|
|
|
|
fip = fref.file_info;
|
2002-06-20 23:46:51 +00:00
|
|
|
get_pathname(fip, file_path);
|
2003-05-20 00:03:39 +00:00
|
|
|
if (fref.main_program) {
|
2003-04-03 18:35:40 +00:00
|
|
|
safe_strcpy(exec_name, fip->name);
|
|
|
|
safe_strcpy(exec_path, file_path);
|
2002-06-20 23:46:51 +00:00
|
|
|
}
|
|
|
|
if (first_time) {
|
2003-06-16 23:40:20 +00:00
|
|
|
sprintf(link_path, "%s%s%s", slot_dir, PATH_SEPARATOR, strlen(fref.open_name)?fref.open_name:fip->name);
|
|
|
|
sprintf(buf, "..%s..%s%s", PATH_SEPARATOR, PATH_SEPARATOR, file_path);
|
|
|
|
retval = boinc_link(buf, link_path);
|
2003-07-02 02:02:18 +00:00
|
|
|
scope_messages.printf("ACTIVE_TASK::start(): Linking %s to %s\n", file_path, link_path);
|
2002-06-20 23:46:51 +00:00
|
|
|
if (retval) {
|
2003-06-03 22:47:15 +00:00
|
|
|
msg_printf(wup->project, MSG_ERROR, "Can't link %s to %s", file_path, link_path);
|
2002-08-24 00:41:25 +00:00
|
|
|
fclose(f);
|
2002-06-20 23:46:51 +00:00
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// create symbolic links, and hook up descriptors, for input files
|
|
|
|
//
|
|
|
|
for (i=0; i<wup->input_files.size(); i++) {
|
|
|
|
file_ref = wup->input_files[i];
|
|
|
|
get_pathname(file_ref.file_info, file_path);
|
|
|
|
if (strlen(file_ref.open_name)) {
|
2002-05-17 22:33:57 +00:00
|
|
|
if (first_time) {
|
2002-09-25 18:33:26 +00:00
|
|
|
sprintf(link_path, "%s%s%s", slot_dir, PATH_SEPARATOR, file_ref.open_name);
|
2003-03-13 21:49:52 +00:00
|
|
|
sprintf(buf, "..%s..%s%s", PATH_SEPARATOR, PATH_SEPARATOR, file_path );
|
2003-07-02 02:02:18 +00:00
|
|
|
scope_messages.printf("ACTIVE_TASK::start(): link %s to %s\n", file_path, link_path);
|
2003-07-01 18:27:08 +00:00
|
|
|
if (file_ref.copy_file) {
|
2003-07-01 21:05:37 +00:00
|
|
|
retval = boinc_copy(file_path, link_path);
|
2003-07-01 18:27:08 +00:00
|
|
|
if (retval) {
|
|
|
|
msg_printf(wup->project, MSG_ERROR, "Can't copy %s to %s", file_path, link_path);
|
|
|
|
fclose(f);
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
retval = boinc_link(buf, link_path);
|
|
|
|
if (retval) {
|
|
|
|
msg_printf(wup->project, MSG_ERROR, "Can't link %s to %s", file_path, link_path);
|
|
|
|
fclose(f);
|
|
|
|
return retval;
|
|
|
|
}
|
2002-04-30 22:22:54 +00:00
|
|
|
}
|
|
|
|
}
|
2002-06-20 23:46:51 +00:00
|
|
|
} else {
|
2003-03-13 21:49:52 +00:00
|
|
|
sprintf(buf, "..%s..%s%s", PATH_SEPARATOR, PATH_SEPARATOR, file_path);
|
2003-09-27 17:53:43 +00:00
|
|
|
retval = write_fd_init_file(f, buf, file_ref.fd, true);
|
2003-03-08 23:48:05 +00:00
|
|
|
if (retval) return retval;
|
2002-04-30 22:22:54 +00:00
|
|
|
}
|
2002-06-20 23:46:51 +00:00
|
|
|
}
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2002-07-15 23:21:20 +00:00
|
|
|
// hook up the output files using BOINC soft links
|
2002-06-20 23:46:51 +00:00
|
|
|
//
|
|
|
|
for (i=0; i<result->output_files.size(); i++) {
|
|
|
|
file_ref = result->output_files[i];
|
|
|
|
get_pathname(file_ref.file_info, file_path);
|
|
|
|
if (strlen(file_ref.open_name)) {
|
|
|
|
if (first_time) {
|
2002-09-25 18:33:26 +00:00
|
|
|
sprintf(link_path, "%s%s%s", slot_dir, PATH_SEPARATOR, file_ref.open_name);
|
2003-03-13 21:49:52 +00:00
|
|
|
sprintf(buf, "..%s..%s%s", PATH_SEPARATOR, PATH_SEPARATOR, file_path );
|
2003-07-02 02:02:18 +00:00
|
|
|
scope_messages.printf("ACTIVE_TASK::start(): link %s to %s\n", file_path, link_path);
|
2003-03-13 21:49:52 +00:00
|
|
|
retval = boinc_link(buf, link_path);
|
2002-06-20 23:46:51 +00:00
|
|
|
if (retval) {
|
2003-06-03 22:47:15 +00:00
|
|
|
msg_printf(wup->project, MSG_ERROR, "Can't link %s to %s", file_path, link_path);
|
2002-08-24 00:41:25 +00:00
|
|
|
fclose(f);
|
2002-06-20 23:46:51 +00:00
|
|
|
return retval;
|
2002-04-30 22:22:54 +00:00
|
|
|
}
|
|
|
|
}
|
2002-06-20 23:46:51 +00:00
|
|
|
} else {
|
2003-03-13 21:49:52 +00:00
|
|
|
sprintf(buf, "..%s..%s%s", PATH_SEPARATOR, PATH_SEPARATOR, file_path);
|
2003-09-27 17:53:43 +00:00
|
|
|
retval = write_fd_init_file(f, buf, file_ref.fd, false);
|
2003-03-08 23:48:05 +00:00
|
|
|
if (retval) return retval;
|
2002-04-30 22:22:54 +00:00
|
|
|
}
|
2002-06-20 23:46:51 +00:00
|
|
|
}
|
|
|
|
|
2002-08-24 00:41:25 +00:00
|
|
|
fclose(f);
|
2002-06-20 23:46:51 +00:00
|
|
|
|
2002-04-30 22:22:54 +00:00
|
|
|
#ifdef _WIN32
|
2002-07-05 05:43:57 +00:00
|
|
|
PROCESS_INFORMATION process_info;
|
|
|
|
STARTUPINFO startup_info;
|
2002-08-24 00:41:25 +00:00
|
|
|
char slotdirpath[256];
|
2002-11-19 00:28:33 +00:00
|
|
|
char cmd_line[512];
|
2002-12-09 22:36:46 +00:00
|
|
|
int win_error;
|
2002-06-20 23:46:51 +00:00
|
|
|
|
2002-07-05 05:43:57 +00:00
|
|
|
memset( &process_info, 0, sizeof( process_info ) );
|
|
|
|
memset( &startup_info, 0, sizeof( startup_info ) );
|
|
|
|
startup_info.cb = sizeof(startup_info);
|
|
|
|
startup_info.lpReserved = NULL;
|
|
|
|
startup_info.lpDesktop = "";
|
2002-06-20 23:46:51 +00:00
|
|
|
|
2003-03-18 19:37:09 +00:00
|
|
|
sprintf(buf, "%s%s", QUIT_PREFIX, aid.comm_obj_name);
|
|
|
|
quitRequestEvent = CreateEvent(0, TRUE, FALSE, buf);
|
|
|
|
|
2003-05-09 20:33:57 +00:00
|
|
|
// create core/app share mem segment
|
|
|
|
//
|
2003-03-18 19:37:09 +00:00
|
|
|
sprintf(buf, "%s%s", SHM_PREFIX, aid.comm_obj_name);
|
2003-05-07 23:42:17 +00:00
|
|
|
shm_handle = create_shmem(buf, APP_CLIENT_SHMEM_SIZE,
|
2003-05-09 20:33:57 +00:00
|
|
|
(void **)&app_client_shm.shm
|
|
|
|
);
|
|
|
|
app_client_shm.reset_msgs();
|
2003-03-18 19:37:09 +00:00
|
|
|
|
2002-08-22 20:19:18 +00:00
|
|
|
// NOTE: in Windows, stderr is redirected within boinc_init();
|
2002-06-20 23:46:51 +00:00
|
|
|
|
2003-03-13 21:49:52 +00:00
|
|
|
sprintf(cmd_line, "%s %s", exec_path, wup->command_line);
|
2002-08-24 00:41:25 +00:00
|
|
|
full_path(slot_dir, slotdirpath);
|
2002-08-22 20:19:18 +00:00
|
|
|
if (!CreateProcess(exec_path,
|
2002-10-24 17:06:42 +00:00
|
|
|
cmd_line,
|
2002-08-22 20:19:18 +00:00
|
|
|
NULL,
|
|
|
|
NULL,
|
2002-06-20 23:46:51 +00:00
|
|
|
FALSE,
|
2002-11-05 21:16:00 +00:00
|
|
|
CREATE_NEW_PROCESS_GROUP|CREATE_NO_WINDOW|IDLE_PRIORITY_CLASS,
|
2002-06-20 23:46:51 +00:00
|
|
|
NULL,
|
2002-08-24 00:41:25 +00:00
|
|
|
slotdirpath,
|
2002-06-20 23:46:51 +00:00
|
|
|
&startup_info,
|
2002-08-22 20:19:18 +00:00
|
|
|
&process_info
|
|
|
|
)) {
|
2002-12-09 22:36:46 +00:00
|
|
|
win_error = GetLastError();
|
2002-11-19 00:28:33 +00:00
|
|
|
char *errorargs[] = {app_version->app_name,"","","",""};
|
|
|
|
LPVOID lpMsgBuf;
|
|
|
|
FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM,
|
2003-02-20 00:06:07 +00:00
|
|
|
NULL, win_error, 0, (LPTSTR)&lpMsgBuf, 0, errorargs
|
|
|
|
);
|
2002-11-19 00:28:33 +00:00
|
|
|
|
2003-06-02 22:23:06 +00:00
|
|
|
state = PROCESS_COULDNT_START;
|
|
|
|
result->active_task_state = PROCESS_COULDNT_START;
|
2003-02-20 00:06:07 +00:00
|
|
|
if (win_error) {
|
2003-03-13 21:49:52 +00:00
|
|
|
gstate.report_result_error(*result, win_error, (LPTSTR)&lpMsgBuf);
|
2002-11-19 00:28:33 +00:00
|
|
|
LocalFree(lpMsgBuf);
|
|
|
|
return -1;
|
|
|
|
}
|
2003-06-03 22:47:15 +00:00
|
|
|
msg_printf(wup->project, MSG_ERROR, "CreateProcess: %s", (LPCTSTR)lpMsgBuf);
|
2002-11-19 00:28:33 +00:00
|
|
|
LocalFree(lpMsgBuf);
|
2002-07-05 05:43:57 +00:00
|
|
|
}
|
2003-02-20 00:06:07 +00:00
|
|
|
pid = process_info.dwProcessId;
|
2002-07-05 05:43:57 +00:00
|
|
|
pid_handle = process_info.hProcess;
|
2002-11-19 00:28:33 +00:00
|
|
|
thread_handle = process_info.hThread;
|
2002-12-06 07:33:45 +00:00
|
|
|
#else
|
|
|
|
char* argv[100];
|
2003-07-02 02:02:18 +00:00
|
|
|
|
2003-05-07 23:42:17 +00:00
|
|
|
// Set up core/app shared memory seg
|
2003-03-17 19:24:38 +00:00
|
|
|
//
|
|
|
|
shm_key = aid.shm_key;
|
2003-05-08 18:11:05 +00:00
|
|
|
if (!create_shmem(
|
|
|
|
shm_key, APP_CLIENT_SHMEM_SIZE, (void**)&app_client_shm.shm)
|
|
|
|
) {
|
|
|
|
app_client_shm.reset_msgs();
|
2003-05-07 23:42:17 +00:00
|
|
|
}
|
2003-07-02 02:02:18 +00:00
|
|
|
|
2002-12-06 07:33:45 +00:00
|
|
|
pid = fork();
|
2003-08-11 03:33:14 +00:00
|
|
|
if (pid == -1) {
|
|
|
|
state = PROCESS_COULDNT_START;
|
|
|
|
result->active_task_state = PROCESS_COULDNT_START;
|
|
|
|
gstate.report_result_error(*result, -1, strerror(errno));
|
|
|
|
msg_printf(wup->project, MSG_ERROR, "fork(): %s", strerror(errno));
|
|
|
|
return -1;
|
|
|
|
}
|
2002-12-06 07:33:45 +00:00
|
|
|
if (pid == 0) {
|
|
|
|
// from here on we're running in a new process.
|
|
|
|
// If an error happens, exit nonzero so that the core client
|
|
|
|
// knows there was a problem.
|
2002-06-20 23:46:51 +00:00
|
|
|
|
2002-12-06 07:33:45 +00:00
|
|
|
// chdir() into the slot directory
|
|
|
|
//
|
|
|
|
retval = chdir(slot_dir);
|
|
|
|
if (retval) {
|
|
|
|
perror("chdir");
|
2003-08-01 22:58:40 +00:00
|
|
|
_exit(retval);
|
2002-12-06 07:33:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// hook up stderr to a specially-named file
|
|
|
|
//
|
|
|
|
freopen(STDERR_FILE, "a", stderr);
|
|
|
|
|
|
|
|
argv[0] = exec_name;
|
|
|
|
parse_command_line(wup->command_line, argv+1);
|
2003-07-02 02:02:18 +00:00
|
|
|
debug_print_argv(argv);
|
2003-06-03 23:27:34 +00:00
|
|
|
sprintf(buf, "..%s..%s%s", PATH_SEPARATOR, PATH_SEPARATOR, exec_path );
|
2003-03-13 21:49:52 +00:00
|
|
|
retval = execv(buf, argv);
|
2003-07-03 05:01:29 +00:00
|
|
|
msg_printf(wup->project, MSG_ERROR, "execv failed: %d\n", retval);
|
2002-12-06 07:33:45 +00:00
|
|
|
perror("execv");
|
2003-08-01 22:58:40 +00:00
|
|
|
_exit(1);
|
2002-12-06 07:33:45 +00:00
|
|
|
}
|
2003-07-02 02:02:18 +00:00
|
|
|
|
|
|
|
scope_messages.printf("ACTIVE_TASK::start(): forked process: pid %d\n", pid);
|
2003-08-11 03:33:14 +00:00
|
|
|
|
|
|
|
// set idle process priority
|
|
|
|
#ifdef HAVE_SETPRIORITY
|
|
|
|
if (setpriority(PRIO_PROCESS, pid, PROCESS_IDLE_PRIORITY)) {
|
|
|
|
perror("setpriority");
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2002-04-30 22:22:54 +00:00
|
|
|
#endif
|
|
|
|
state = PROCESS_RUNNING;
|
2002-12-07 00:56:51 +00:00
|
|
|
result->active_task_state = PROCESS_RUNNING;
|
2002-04-30 22:22:54 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2003-05-22 20:47:56 +00:00
|
|
|
// Send a quit signal.
|
|
|
|
// Normally this is caught by the process, which can checkpoint
|
2002-07-15 23:21:20 +00:00
|
|
|
//
|
2002-12-06 07:33:45 +00:00
|
|
|
int ACTIVE_TASK::request_exit() {
|
2003-03-12 18:15:48 +00:00
|
|
|
#ifdef _WIN32
|
|
|
|
return !SetEvent(quitRequestEvent);
|
|
|
|
#else
|
|
|
|
return kill(pid, SIGQUIT);
|
|
|
|
#endif
|
2002-12-06 07:33:45 +00:00
|
|
|
}
|
2002-11-19 00:28:33 +00:00
|
|
|
|
2003-05-22 20:47:56 +00:00
|
|
|
// send a kill signal.
|
|
|
|
// This is not caught by the process
|
|
|
|
//
|
2002-12-06 07:33:45 +00:00
|
|
|
int ACTIVE_TASK::kill_task() {
|
2002-07-05 19:20:00 +00:00
|
|
|
#ifdef _WIN32
|
2003-03-12 18:15:48 +00:00
|
|
|
return !TerminateProcess(pid_handle, -1);
|
2002-12-06 07:33:45 +00:00
|
|
|
#else
|
|
|
|
return kill(pid, SIGKILL);
|
2002-07-05 19:20:00 +00:00
|
|
|
#endif
|
2002-07-01 18:16:31 +00:00
|
|
|
}
|
|
|
|
|
2003-09-06 03:34:03 +00:00
|
|
|
#if !defined(HAVE_WAIT4) && defined(HAVE_WAIT3)
|
|
|
|
#include <map>
|
|
|
|
struct proc_info_t {
|
|
|
|
int status;
|
|
|
|
rusage r;
|
|
|
|
proc_info_t() {};
|
|
|
|
proc_info_t(int s, const rusage &ru);
|
|
|
|
};
|
|
|
|
|
|
|
|
proc_info_t::proc_info_t(int s, const rusage &ru) : status(s), r(ru) {}
|
|
|
|
|
|
|
|
pid_t wait4(pid_t pid, int *statusp, int options, struct rusage *rusagep) {
|
|
|
|
static std::map<pid_t,proc_info_t> proc_info;
|
|
|
|
pid_t tmp_pid=0;
|
|
|
|
|
|
|
|
if (!pid) {
|
|
|
|
return wait3(statusp,options,rusagep);
|
|
|
|
} else {
|
|
|
|
if (proc_info.find(pid) == proc_info.end()) {
|
|
|
|
do {
|
|
|
|
tmp_pid=wait3(statusp,options,rusagep);
|
|
|
|
if ((tmp_pid>0) && (tmp_pid != pid)) {
|
|
|
|
proc_info[tmp_pid]=proc_info_t(*statusp,*rusagep);
|
|
|
|
if (!(options && WNOHANG)) {
|
|
|
|
tmp_pid=0;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return pid;
|
|
|
|
}
|
|
|
|
} while (!tmp_pid);
|
|
|
|
} else {
|
|
|
|
*statusp=proc_info[pid].status;
|
|
|
|
*rusagep=proc_info[pid].r;
|
|
|
|
proc_info.erase(pid);
|
|
|
|
return pid;
|
|
|
|
}
|
2003-09-08 06:01:41 +00:00
|
|
|
}
|
2003-09-06 03:34:03 +00:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2003-03-11 22:18:01 +00:00
|
|
|
bool ACTIVE_TASK::task_exited() {
|
|
|
|
#ifdef _WIN32
|
|
|
|
unsigned long exit_code;
|
|
|
|
if (GetExitCodeProcess(pid_handle, &exit_code)) {
|
|
|
|
if (exit_code != STILL_ACTIVE) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
int my_pid, stat;
|
2003-06-02 23:27:44 +00:00
|
|
|
struct rusage rs;
|
2003-03-11 22:18:01 +00:00
|
|
|
|
2003-06-02 23:27:44 +00:00
|
|
|
my_pid = wait4(pid, &stat, WNOHANG, &rs);
|
2003-03-11 22:18:01 +00:00
|
|
|
if (my_pid == pid) {
|
2003-06-02 23:27:44 +00:00
|
|
|
double x = rs.ru_utime.tv_sec + rs.ru_utime.tv_usec/1.e6;
|
|
|
|
result->final_cpu_time = current_cpu_time =
|
|
|
|
checkpoint_cpu_time = starting_cpu_time + x;
|
2003-03-11 22:18:01 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
#endif
|
2003-03-18 19:37:09 +00:00
|
|
|
return false;
|
2003-03-11 22:18:01 +00:00
|
|
|
}
|
|
|
|
|
2002-07-15 23:21:20 +00:00
|
|
|
// Inserts an active task into the ACTIVE_TASK_SET and starts it up
|
|
|
|
//
|
2002-04-30 22:22:54 +00:00
|
|
|
int ACTIVE_TASK_SET::insert(ACTIVE_TASK* atp) {
|
|
|
|
int retval;
|
2002-08-05 00:29:34 +00:00
|
|
|
|
2002-08-24 00:41:25 +00:00
|
|
|
get_slot_dir(atp->slot, atp->slot_dir);
|
|
|
|
clean_out_dir(atp->slot_dir);
|
2002-04-30 22:22:54 +00:00
|
|
|
retval = atp->start(true);
|
|
|
|
if (retval) return retval;
|
|
|
|
active_tasks.push_back(atp);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2003-05-08 18:16:58 +00:00
|
|
|
#if 0
|
2003-04-01 07:36:19 +00:00
|
|
|
// Deallocate memory to prevent unneeded reporting of memory leaks
|
|
|
|
//
|
2003-03-19 18:46:58 +00:00
|
|
|
void ACTIVE_TASK_SET::free_mem() {
|
|
|
|
vector<ACTIVE_TASK*>::iterator at_iter;
|
2003-03-20 20:00:57 +00:00
|
|
|
ACTIVE_TASK *at;
|
2003-03-19 18:46:58 +00:00
|
|
|
|
|
|
|
at_iter = active_tasks.begin();
|
|
|
|
while (at_iter != active_tasks.end()) {
|
2003-03-20 20:00:57 +00:00
|
|
|
at = active_tasks[0];
|
2003-03-19 18:46:58 +00:00
|
|
|
at_iter = active_tasks.erase(at_iter);
|
2003-03-20 20:00:57 +00:00
|
|
|
delete at;
|
2003-03-19 18:46:58 +00:00
|
|
|
}
|
|
|
|
}
|
2003-05-08 18:16:58 +00:00
|
|
|
#endif
|
2003-03-19 18:46:58 +00:00
|
|
|
|
2003-05-22 20:47:56 +00:00
|
|
|
// Do period checks on running apps:
|
|
|
|
// - get latest CPU time and % done info
|
|
|
|
// - check if any has exited, and clean up
|
|
|
|
// - see if any has exceeded its CPU or disk space limits, and abort
|
2002-04-30 22:22:54 +00:00
|
|
|
//
|
|
|
|
bool ACTIVE_TASK_SET::poll() {
|
2003-05-22 20:47:56 +00:00
|
|
|
bool action;
|
|
|
|
|
|
|
|
get_cpu_times();
|
|
|
|
action = check_app_exited();
|
|
|
|
if (action) return true;
|
2003-05-28 19:56:53 +00:00
|
|
|
action = check_rsc_limits_exceeded();
|
2003-05-22 20:47:56 +00:00
|
|
|
if (action) return true;
|
|
|
|
return false;
|
|
|
|
}
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2003-05-22 20:47:56 +00:00
|
|
|
bool ACTIVE_TASK_SET::check_app_exited() {
|
|
|
|
ACTIVE_TASK* atp;
|
2003-07-02 02:02:18 +00:00
|
|
|
|
|
|
|
ScopeMessages scope_messages(log_messages, ClientMessages::DEBUG_TASK);
|
|
|
|
|
2002-06-06 18:42:01 +00:00
|
|
|
#ifdef _WIN32
|
2002-07-05 05:43:57 +00:00
|
|
|
unsigned long exit_code;
|
2002-08-22 22:28:51 +00:00
|
|
|
bool found = false;
|
2002-06-06 18:42:01 +00:00
|
|
|
|
2002-12-07 04:14:52 +00:00
|
|
|
for (int i=0; i<active_tasks.size(); i++) {
|
2002-06-06 18:42:01 +00:00
|
|
|
atp = active_tasks[i];
|
2002-08-05 00:29:34 +00:00
|
|
|
if (GetExitCodeProcess(atp->pid_handle, &exit_code)) {
|
2002-11-20 22:57:43 +00:00
|
|
|
atp->result->final_cpu_time = atp->checkpoint_cpu_time;
|
2002-08-05 00:29:34 +00:00
|
|
|
if (exit_code != STILL_ACTIVE) {
|
2002-08-24 00:41:25 +00:00
|
|
|
found = true;
|
2002-12-06 07:33:45 +00:00
|
|
|
if (atp->state == PROCESS_ABORT_PENDING) {
|
|
|
|
atp->state = PROCESS_ABORTED;
|
2003-02-06 19:01:49 +00:00
|
|
|
atp->result->active_task_state = PROCESS_ABORTED;
|
2003-03-13 21:49:52 +00:00
|
|
|
gstate.report_result_error(
|
2003-08-02 00:02:11 +00:00
|
|
|
*(atp->result), 0, "process was aborted"
|
2003-02-20 00:06:07 +00:00
|
|
|
);
|
2002-12-06 07:33:45 +00:00
|
|
|
} else {
|
|
|
|
atp->state = PROCESS_EXITED;
|
|
|
|
atp->exit_status = exit_code;
|
|
|
|
atp->result->exit_status = atp->exit_status;
|
|
|
|
atp->result->active_task_state = PROCESS_EXITED;
|
2003-02-06 19:01:49 +00:00
|
|
|
//if a nonzero error code, then report it
|
2003-02-20 00:06:07 +00:00
|
|
|
if (exit_code) {
|
2003-03-13 21:49:52 +00:00
|
|
|
gstate.report_result_error(
|
2003-02-20 00:06:07 +00:00
|
|
|
*(atp->result), 0,
|
2003-08-06 22:55:35 +00:00
|
|
|
"process exited with a non-zero exit code"
|
2003-02-20 00:06:07 +00:00
|
|
|
);
|
2003-02-06 19:01:49 +00:00
|
|
|
}
|
2002-12-06 07:33:45 +00:00
|
|
|
}
|
2002-11-19 00:28:33 +00:00
|
|
|
CloseHandle(atp->pid_handle);
|
|
|
|
CloseHandle(atp->thread_handle);
|
2003-03-12 18:15:48 +00:00
|
|
|
CloseHandle(atp->quitRequestEvent);
|
2002-11-22 00:13:47 +00:00
|
|
|
atp->read_stderr_file();
|
|
|
|
clean_out_dir(atp->slot_dir);
|
2003-05-22 20:47:56 +00:00
|
|
|
|
|
|
|
// detach from shared mem. This will destroy shmem seg
|
|
|
|
// since we're the last attachment
|
|
|
|
//
|
|
|
|
if (atp->app_client_shm.shm) {
|
|
|
|
detach_shmem(atp->shm_handle, atp->app_client_shm.shm);
|
2003-05-23 17:45:05 +00:00
|
|
|
atp->app_client_shm.shm = NULL;
|
2003-05-22 20:47:56 +00:00
|
|
|
}
|
2002-07-05 05:43:57 +00:00
|
|
|
}
|
|
|
|
}
|
2002-06-06 18:42:01 +00:00
|
|
|
}
|
2002-12-06 07:33:45 +00:00
|
|
|
if (found) return true;
|
2002-12-05 19:13:06 +00:00
|
|
|
#else
|
2002-06-06 18:42:01 +00:00
|
|
|
int pid;
|
2002-08-09 23:16:37 +00:00
|
|
|
int stat;
|
2003-06-02 23:27:44 +00:00
|
|
|
struct rusage rs;
|
2002-06-06 18:42:01 +00:00
|
|
|
|
2003-06-02 23:27:44 +00:00
|
|
|
pid = wait4(0, &stat, WNOHANG, &rs);
|
2002-12-06 07:33:45 +00:00
|
|
|
if (pid > 0) {
|
2003-07-02 02:02:18 +00:00
|
|
|
scope_messages.printf("ACTIVE_TASK_SET::check_app_exited(): process %d is done\n", pid);
|
2002-12-06 07:33:45 +00:00
|
|
|
atp = lookup_pid(pid);
|
|
|
|
if (!atp) {
|
2003-07-03 05:01:29 +00:00
|
|
|
msg_printf(NULL, MSG_ERROR, "ACTIVE_TASK_SET::check_app_exited(): pid %d not found\n", pid);
|
2002-12-06 07:33:45 +00:00
|
|
|
return true;
|
|
|
|
}
|
2003-06-02 23:27:44 +00:00
|
|
|
double x = rs.ru_utime.tv_sec + rs.ru_utime.tv_usec/1.e6;
|
|
|
|
atp->result->final_cpu_time = atp->current_cpu_time =
|
|
|
|
atp->checkpoint_cpu_time = atp->starting_cpu_time + x;
|
2002-12-06 07:33:45 +00:00
|
|
|
if (atp->state == PROCESS_ABORT_PENDING) {
|
|
|
|
atp->state = PROCESS_ABORTED;
|
2003-03-17 19:24:38 +00:00
|
|
|
atp->result->active_task_state = PROCESS_ABORTED;
|
2003-03-13 21:49:52 +00:00
|
|
|
gstate.report_result_error(
|
2003-08-02 00:02:11 +00:00
|
|
|
*(atp->result), 0, "process was aborted"
|
2003-02-20 00:06:07 +00:00
|
|
|
);
|
2002-12-06 07:33:45 +00:00
|
|
|
} else {
|
|
|
|
if (WIFEXITED(stat)) {
|
|
|
|
atp->state = PROCESS_EXITED;
|
|
|
|
atp->exit_status = WEXITSTATUS(stat);
|
|
|
|
atp->result->exit_status = atp->exit_status;
|
2003-02-06 19:01:49 +00:00
|
|
|
atp->result->active_task_state = PROCESS_EXITED;
|
2003-03-13 21:49:52 +00:00
|
|
|
|
|
|
|
// If exit_status is nonzero, then we don't need to upload the
|
|
|
|
// output files
|
|
|
|
//
|
2003-02-06 19:01:49 +00:00
|
|
|
if(atp->exit_status) {
|
2003-03-13 21:49:52 +00:00
|
|
|
gstate.report_result_error(
|
2003-02-20 00:06:07 +00:00
|
|
|
*(atp->result), 0,
|
2003-08-06 22:55:35 +00:00
|
|
|
"process exited with a non-zero exit code"
|
2003-02-20 00:06:07 +00:00
|
|
|
);
|
2003-02-06 19:01:49 +00:00
|
|
|
}
|
2003-07-02 02:02:18 +00:00
|
|
|
scope_messages.printf("ACTIVE_TASK_SET::check_app_exited(): process exited: status %d\n", atp->exit_status);
|
2002-12-06 07:33:45 +00:00
|
|
|
} else if (WIFSIGNALED(stat)) {
|
|
|
|
atp->state = PROCESS_WAS_SIGNALED;
|
|
|
|
atp->signal = WTERMSIG(stat);
|
2002-12-07 00:56:51 +00:00
|
|
|
atp->result->signal = atp->signal;
|
2003-02-06 19:01:49 +00:00
|
|
|
atp->result->active_task_state = PROCESS_WAS_SIGNALED;
|
2003-03-13 21:49:52 +00:00
|
|
|
gstate.report_result_error(
|
2003-08-02 00:02:11 +00:00
|
|
|
*(atp->result), 0, "process was signaled"
|
2003-02-20 00:06:07 +00:00
|
|
|
);
|
2003-07-02 02:02:18 +00:00
|
|
|
scope_messages.printf("ACTIVE_TASK_SET::check_app_exited(): process was signaled: %d\n", atp->signal);
|
2002-12-06 07:33:45 +00:00
|
|
|
} else {
|
2003-02-06 19:01:49 +00:00
|
|
|
atp->state = PROCESS_EXIT_UNKNOWN;
|
2003-02-20 00:06:07 +00:00
|
|
|
atp->result->state = PROCESS_EXIT_UNKNOWN;
|
2002-12-06 07:33:45 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
atp->read_stderr_file();
|
|
|
|
clean_out_dir(atp->slot_dir);
|
|
|
|
|
2003-05-22 20:47:56 +00:00
|
|
|
// detach from and destroy share mem
|
|
|
|
//
|
|
|
|
if (atp->app_client_shm.shm) {
|
|
|
|
detach_shmem(atp->app_client_shm.shm);
|
2003-05-23 17:45:05 +00:00
|
|
|
atp->app_client_shm.shm = NULL;
|
2003-05-22 20:47:56 +00:00
|
|
|
}
|
|
|
|
destroy_shmem(atp->shm_key);
|
|
|
|
|
2002-04-30 22:22:54 +00:00
|
|
|
return true;
|
|
|
|
}
|
2002-06-06 18:42:01 +00:00
|
|
|
#endif
|
2003-05-22 20:47:56 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// if an app has exceeded its maximum CPU time, abort it
|
|
|
|
//
|
2003-05-28 19:56:53 +00:00
|
|
|
bool ACTIVE_TASK::check_max_cpu_exceeded() {
|
|
|
|
if (current_cpu_time > max_cpu_time) {
|
2003-06-03 22:47:15 +00:00
|
|
|
msg_printf(result->project, MSG_INFO,
|
2003-05-28 19:56:53 +00:00
|
|
|
"Aborting result %s: exceeded CPU time limit %f\n",
|
2003-06-03 22:47:15 +00:00
|
|
|
result->name, max_cpu_time);
|
2003-05-28 19:56:53 +00:00
|
|
|
abort();
|
|
|
|
return true;
|
2002-12-06 07:33:45 +00:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2002-11-22 00:13:47 +00:00
|
|
|
|
2003-05-22 20:47:56 +00:00
|
|
|
// if an app has exceeded its maximum disk usage, abort it
|
|
|
|
//
|
2003-05-28 19:56:53 +00:00
|
|
|
bool ACTIVE_TASK::check_max_disk_exceeded() {
|
2003-05-22 20:47:56 +00:00
|
|
|
double disk_usage;
|
|
|
|
int retval;
|
|
|
|
|
2003-05-28 19:56:53 +00:00
|
|
|
// don't do disk check too often
|
|
|
|
//
|
|
|
|
retval = current_disk_usage(disk_usage);
|
|
|
|
if (retval) {
|
2003-06-03 22:47:15 +00:00
|
|
|
msg_printf(0, MSG_ERROR, "Can't get application disk usage");
|
2003-05-28 19:56:53 +00:00
|
|
|
} else {
|
|
|
|
if (disk_usage > max_disk_usage) {
|
2003-07-03 05:01:29 +00:00
|
|
|
msg_printf(
|
|
|
|
result->project, MSG_INFO,
|
|
|
|
"Aborting result %s: exceeded disk limit %f\n",
|
|
|
|
result->name, max_disk_usage
|
|
|
|
);
|
2003-05-28 19:56:53 +00:00
|
|
|
abort();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2003-06-17 18:59:36 +00:00
|
|
|
#if 0
|
2003-05-28 19:56:53 +00:00
|
|
|
// if an app has exceeded its maximum allowed memory, abort it
|
2003-05-22 20:47:56 +00:00
|
|
|
//
|
2003-05-28 19:56:53 +00:00
|
|
|
bool ACTIVE_TASK::check_max_mem_exceeded() {
|
2003-06-03 22:47:15 +00:00
|
|
|
// TODO: calculate working set size elsewhere
|
2003-06-16 23:40:20 +00:00
|
|
|
if (working_set_size > max_mem_usage || working_set_size/1048576 > gstate.global_prefs.max_memory_mbytes) {
|
2003-07-03 05:01:29 +00:00
|
|
|
msg_printf(
|
|
|
|
result->project, MSG_INFO,
|
|
|
|
"Aborting result %s: exceeded memory limit %f\n",
|
|
|
|
result->name,
|
|
|
|
min(max_mem_usage, gstate.global_prefs.max_memory_mbytes*1048576)
|
|
|
|
);
|
2003-05-28 19:56:53 +00:00
|
|
|
abort();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2003-06-17 18:59:36 +00:00
|
|
|
#endif
|
2003-05-28 19:56:53 +00:00
|
|
|
|
|
|
|
// Check if any of the active tasks have exceeded their
|
|
|
|
// resource limits on disk, CPU time or memory
|
|
|
|
//
|
|
|
|
bool ACTIVE_TASK_SET::check_rsc_limits_exceeded() {
|
|
|
|
unsigned int j;
|
|
|
|
ACTIVE_TASK *atp;
|
|
|
|
static time_t last_disk_check_time = 0;
|
|
|
|
|
|
|
|
for (j=0;j<active_tasks.size();j++) {
|
|
|
|
atp = active_tasks[j];
|
|
|
|
if (atp->check_max_cpu_exceeded()) return true;
|
2003-06-17 18:59:36 +00:00
|
|
|
//else if (atp->check_max_mem_exceeded()) return true;
|
2003-05-28 19:56:53 +00:00
|
|
|
else if (time(0)>last_disk_check_time + gstate.global_prefs.disk_interval) {
|
|
|
|
last_disk_check_time = time(0);
|
|
|
|
if (atp->check_max_disk_exceeded()) return true;
|
2003-05-22 20:47:56 +00:00
|
|
|
}
|
|
|
|
}
|
2003-05-28 19:56:53 +00:00
|
|
|
|
2003-05-22 20:47:56 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// The application has done something wrong.
|
|
|
|
// May as well send it a kill signal.
|
2003-07-02 02:02:18 +00:00
|
|
|
//
|
2002-12-06 07:33:45 +00:00
|
|
|
int ACTIVE_TASK::abort() {
|
2003-09-02 03:38:04 +00:00
|
|
|
if (state == PROCESS_RUNNING) {
|
|
|
|
state = PROCESS_ABORT_PENDING;
|
|
|
|
result->active_task_state = PROCESS_ABORT_PENDING;
|
|
|
|
kill_task();
|
|
|
|
} else {
|
|
|
|
state = PROCESS_ABORTED;
|
|
|
|
}
|
|
|
|
return 0;
|
2002-11-22 00:13:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// check for the stderr file, copy to result record
|
|
|
|
//
|
|
|
|
bool ACTIVE_TASK::read_stderr_file() {
|
2003-06-27 23:13:15 +00:00
|
|
|
char stderr_file[MAX_BLOB_LEN];
|
2002-11-22 00:13:47 +00:00
|
|
|
char path[256];
|
|
|
|
int n;
|
|
|
|
|
|
|
|
sprintf(path, "%s%s%s", slot_dir, PATH_SEPARATOR, STDERR_FILE);
|
2002-04-30 22:22:54 +00:00
|
|
|
FILE* f = fopen(path, "r");
|
|
|
|
if (f) {
|
2003-07-21 12:42:41 +00:00
|
|
|
n = fread(stderr_file, 1, sizeof(stderr_file)-1, f);
|
2002-04-30 22:22:54 +00:00
|
|
|
fclose(f);
|
2003-07-21 12:42:41 +00:00
|
|
|
if (n < 0) return false;
|
|
|
|
stderr_file[n] = '\0';
|
2003-06-27 23:13:15 +00:00
|
|
|
result->stderr_out += "<stderr_txt>\n";
|
|
|
|
result->stderr_out += stderr_file;
|
|
|
|
result->stderr_out += "\n</stderr_txt>\n";
|
|
|
|
result->stderr_out = result->stderr_out.substr(0,MAX_BLOB_LEN-1);
|
2003-02-20 00:06:07 +00:00
|
|
|
return true;
|
2002-04-30 22:22:54 +00:00
|
|
|
}
|
2002-12-05 21:56:33 +00:00
|
|
|
return false;
|
2002-04-30 22:22:54 +00:00
|
|
|
}
|
|
|
|
|
2003-05-09 00:00:39 +00:00
|
|
|
void ACTIVE_TASK::request_graphics_mode(int mode) {
|
2003-05-09 20:33:57 +00:00
|
|
|
app_client_shm.send_graphics_mode_msg(CORE_APP_GFX_SEG, mode);
|
|
|
|
graphics_requested_mode = mode;
|
2003-05-07 23:42:17 +00:00
|
|
|
}
|
|
|
|
|
2003-05-09 00:00:39 +00:00
|
|
|
void ACTIVE_TASK::check_graphics_mode_ack() {
|
2003-05-09 20:33:57 +00:00
|
|
|
int mode;
|
|
|
|
if (app_client_shm.get_graphics_mode_msg(APP_CORE_GFX_SEG, mode)) {
|
|
|
|
graphics_acked_mode = mode;
|
|
|
|
if (mode != MODE_FULLSCREEN) {
|
|
|
|
graphics_mode_before_ss = mode;
|
|
|
|
}
|
|
|
|
}
|
2003-05-07 23:42:17 +00:00
|
|
|
}
|
|
|
|
|
2003-05-22 20:47:56 +00:00
|
|
|
// send quit signal to all tasks.
|
|
|
|
// If they don't exit in one second, send them a kill signal
|
|
|
|
// TODO: unsuspend active tasks so they have a chance to checkpoint
|
|
|
|
//
|
|
|
|
int ACTIVE_TASK_SET::exit_tasks() {
|
|
|
|
request_tasks_exit();
|
|
|
|
|
2003-06-27 21:15:25 +00:00
|
|
|
// Wait 5 seconds for them to exit normally; if they don't then kill them
|
2003-05-22 20:47:56 +00:00
|
|
|
//
|
2003-06-27 21:15:25 +00:00
|
|
|
if (wait_for_exit(5)) {
|
2003-05-22 20:47:56 +00:00
|
|
|
kill_tasks();
|
|
|
|
}
|
|
|
|
|
|
|
|
get_cpu_times();
|
2003-07-02 02:02:18 +00:00
|
|
|
|
2003-05-22 20:47:56 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Wait up to wait_time seconds for all processes to exit
|
2003-03-11 22:18:01 +00:00
|
|
|
//
|
|
|
|
int ACTIVE_TASK_SET::wait_for_exit(double wait_time) {
|
2003-03-13 21:49:52 +00:00
|
|
|
bool all_exited;
|
2003-03-17 19:24:38 +00:00
|
|
|
unsigned int i,n;
|
|
|
|
ACTIVE_TASK *atp;
|
2003-03-11 22:18:01 +00:00
|
|
|
|
2003-03-13 21:49:52 +00:00
|
|
|
for (i=0; i<10; i++) {
|
2003-03-11 22:18:01 +00:00
|
|
|
boinc_sleep(wait_time/10.0);
|
|
|
|
all_exited = true;
|
|
|
|
|
2003-03-13 21:49:52 +00:00
|
|
|
for (n=0; n<active_tasks.size(); n++) {
|
2003-03-11 22:18:01 +00:00
|
|
|
atp = active_tasks[n];
|
|
|
|
if (!atp->task_exited()) {
|
|
|
|
all_exited = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2003-03-17 19:24:38 +00:00
|
|
|
if (all_exited) return 0;
|
2003-03-11 22:18:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2002-07-15 23:21:20 +00:00
|
|
|
// Find the ACTIVE_TASK in the current set with the matching PID
|
|
|
|
//
|
2002-04-30 22:22:54 +00:00
|
|
|
ACTIVE_TASK* ACTIVE_TASK_SET::lookup_pid(int pid) {
|
|
|
|
unsigned int i;
|
|
|
|
ACTIVE_TASK* atp;
|
2002-08-05 00:29:34 +00:00
|
|
|
|
2002-04-30 22:22:54 +00:00
|
|
|
for (i=0; i<active_tasks.size(); i++) {
|
|
|
|
atp = active_tasks[i];
|
|
|
|
if (atp->pid == pid) return atp;
|
|
|
|
}
|
2002-07-15 23:21:20 +00:00
|
|
|
return NULL;
|
2002-04-30 22:22:54 +00:00
|
|
|
}
|
|
|
|
|
2003-08-12 20:28:55 +00:00
|
|
|
// Find the ACTIVE_TASK in the current set with the matching result
|
|
|
|
//
|
|
|
|
ACTIVE_TASK* ACTIVE_TASK_SET::lookup_result(RESULT* result) {
|
|
|
|
for (active_tasks_v::iterator i = active_tasks.begin();
|
|
|
|
i != active_tasks.end(); ++i)
|
|
|
|
{
|
|
|
|
ACTIVE_TASK* atp = *i;
|
|
|
|
if (atp->result == result) {
|
|
|
|
return atp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2002-12-06 07:33:45 +00:00
|
|
|
// suspend all currently running tasks
|
2002-07-15 23:21:20 +00:00
|
|
|
//
|
2002-04-30 22:22:54 +00:00
|
|
|
void ACTIVE_TASK_SET::suspend_all() {
|
|
|
|
unsigned int i;
|
|
|
|
ACTIVE_TASK* atp;
|
|
|
|
for (i=0; i<active_tasks.size(); i++) {
|
|
|
|
atp = active_tasks[i];
|
2003-03-13 21:49:52 +00:00
|
|
|
if (atp->suspend()) {
|
2003-06-03 22:47:15 +00:00
|
|
|
msg_printf(
|
2003-03-13 21:49:52 +00:00
|
|
|
atp->wup->project,
|
2003-06-03 22:47:15 +00:00
|
|
|
MSG_ERROR,
|
|
|
|
"ACTIVE_TASK_SET::suspend_all(): could not suspend active_task"
|
2003-03-13 21:49:52 +00:00
|
|
|
);
|
2003-02-06 19:01:49 +00:00
|
|
|
}
|
2002-04-30 22:22:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-12-06 07:33:45 +00:00
|
|
|
// resume all currently running tasks
|
2002-07-15 23:21:20 +00:00
|
|
|
//
|
2002-04-30 22:22:54 +00:00
|
|
|
void ACTIVE_TASK_SET::unsuspend_all() {
|
|
|
|
unsigned int i;
|
|
|
|
ACTIVE_TASK* atp;
|
2003-02-06 19:01:49 +00:00
|
|
|
for (i=0; i<active_tasks.size(); i++) {
|
2002-04-30 22:22:54 +00:00
|
|
|
atp = active_tasks[i];
|
2003-03-13 21:49:52 +00:00
|
|
|
if (atp->unsuspend()) {
|
2003-06-03 22:47:15 +00:00
|
|
|
msg_printf(
|
2003-03-13 21:49:52 +00:00
|
|
|
atp->wup->project,
|
2003-06-03 22:47:15 +00:00
|
|
|
MSG_ERROR,
|
|
|
|
"ACTIVE_TASK_SET::unsuspend_all(): could not unsuspend active_task"
|
2003-03-13 21:49:52 +00:00
|
|
|
);
|
2003-02-06 19:01:49 +00:00
|
|
|
}
|
2003-07-02 02:02:18 +00:00
|
|
|
}
|
2002-04-30 22:22:54 +00:00
|
|
|
}
|
|
|
|
|
2002-12-06 07:33:45 +00:00
|
|
|
// initiate exit of all currently running tasks
|
2002-07-15 23:21:20 +00:00
|
|
|
//
|
2003-03-11 22:18:01 +00:00
|
|
|
void ACTIVE_TASK_SET::request_tasks_exit() {
|
2002-07-01 18:16:31 +00:00
|
|
|
unsigned int i;
|
2002-07-11 01:09:53 +00:00
|
|
|
ACTIVE_TASK *atp;
|
2002-07-01 18:16:31 +00:00
|
|
|
for (i=0; i<active_tasks.size(); i++) {
|
|
|
|
atp = active_tasks[i];
|
2003-02-06 19:01:49 +00:00
|
|
|
if(atp->request_exit()) {
|
2003-06-03 22:47:15 +00:00
|
|
|
msg_printf(atp->wup->project,
|
|
|
|
MSG_ERROR,
|
|
|
|
"ACTIVE_TASK_SET::exit_tasks(): could not request exit of active_task"
|
2003-03-13 21:49:52 +00:00
|
|
|
);
|
2003-02-06 19:01:49 +00:00
|
|
|
}
|
2002-07-01 18:16:31 +00:00
|
|
|
}
|
2002-10-07 06:32:51 +00:00
|
|
|
}
|
2002-10-06 00:43:54 +00:00
|
|
|
|
2003-03-11 22:18:01 +00:00
|
|
|
// Kills all currently running tasks without warning
|
|
|
|
//
|
|
|
|
void ACTIVE_TASK_SET::kill_tasks() {
|
|
|
|
unsigned int i;
|
|
|
|
ACTIVE_TASK *atp;
|
|
|
|
for (i=0; i<active_tasks.size(); i++) {
|
|
|
|
atp = active_tasks[i];
|
|
|
|
atp->kill_task();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-12-06 07:33:45 +00:00
|
|
|
// suspend a task
|
2002-07-15 23:21:20 +00:00
|
|
|
//
|
2002-12-06 07:33:45 +00:00
|
|
|
int ACTIVE_TASK::suspend() {
|
|
|
|
#ifdef _WIN32
|
2003-02-06 19:01:49 +00:00
|
|
|
SuspendThread( thread_handle );
|
2002-12-06 07:33:45 +00:00
|
|
|
#else
|
|
|
|
kill(pid, SIGSTOP);
|
|
|
|
#endif
|
|
|
|
return 0;
|
|
|
|
}
|
2002-06-06 18:42:01 +00:00
|
|
|
|
2002-12-06 07:33:45 +00:00
|
|
|
// resume a suspended task
|
|
|
|
//
|
|
|
|
int ACTIVE_TASK::unsuspend() {
|
|
|
|
#ifdef _WIN32
|
2003-02-06 19:01:49 +00:00
|
|
|
ResumeThread( thread_handle );
|
2002-06-06 18:42:01 +00:00
|
|
|
#else
|
2002-12-06 07:33:45 +00:00
|
|
|
kill(pid, SIGCONT);
|
2002-06-06 18:42:01 +00:00
|
|
|
#endif
|
2002-12-06 07:33:45 +00:00
|
|
|
return 0;
|
|
|
|
}
|
2002-06-06 18:42:01 +00:00
|
|
|
|
2002-12-06 07:33:45 +00:00
|
|
|
// Remove an ACTIVE_TASK from the set.
|
|
|
|
// Do this only if you're sure that the process has exited.
|
2002-07-15 23:21:20 +00:00
|
|
|
//
|
2002-04-30 22:22:54 +00:00
|
|
|
int ACTIVE_TASK_SET::remove(ACTIVE_TASK* atp) {
|
|
|
|
vector<ACTIVE_TASK*>::iterator iter;
|
2002-08-05 00:29:34 +00:00
|
|
|
|
2002-04-30 22:22:54 +00:00
|
|
|
iter = active_tasks.begin();
|
|
|
|
while (iter != active_tasks.end()) {
|
|
|
|
if (*iter == atp) {
|
|
|
|
active_tasks.erase(iter);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
iter++;
|
|
|
|
}
|
2003-07-03 05:01:29 +00:00
|
|
|
msg_printf(NULL, MSG_ERROR, "ACTIVE_TASK_SET::remove(): not found\n");
|
2002-04-30 22:22:54 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2002-07-15 23:21:20 +00:00
|
|
|
// Restart active tasks without wiping and reinitializing slot directories
|
|
|
|
//
|
2002-04-30 22:22:54 +00:00
|
|
|
int ACTIVE_TASK_SET::restart_tasks() {
|
|
|
|
vector<ACTIVE_TASK*>::iterator iter;
|
|
|
|
ACTIVE_TASK* atp;
|
|
|
|
int retval;
|
|
|
|
|
2003-07-02 02:02:18 +00:00
|
|
|
ScopeMessages scope_messages(log_messages, ClientMessages::DEBUG_TASK);
|
|
|
|
|
2002-04-30 22:22:54 +00:00
|
|
|
iter = active_tasks.begin();
|
|
|
|
while (iter != active_tasks.end()) {
|
|
|
|
atp = *iter;
|
2002-12-11 22:40:33 +00:00
|
|
|
atp->init(atp->result);
|
2002-08-24 00:41:25 +00:00
|
|
|
get_slot_dir(atp->slot, atp->slot_dir);
|
2003-02-06 19:01:49 +00:00
|
|
|
atp->result->is_active = true;
|
2002-04-30 22:22:54 +00:00
|
|
|
retval = atp->start(false);
|
2003-07-29 23:26:32 +00:00
|
|
|
msg_printf(atp->wup->project, MSG_INFO,
|
|
|
|
"Restarting computation for result %s", atp->result->name
|
|
|
|
);
|
2002-04-30 22:22:54 +00:00
|
|
|
if (retval) {
|
2003-06-03 22:47:15 +00:00
|
|
|
msg_printf(atp->wup->project, MSG_ERROR, "ACTIVE_TASKS::restart_tasks(); restart failed: %d\n", retval);
|
2003-01-29 21:40:34 +00:00
|
|
|
atp->result->active_task_state = PROCESS_COULDNT_START;
|
2003-03-13 21:49:52 +00:00
|
|
|
gstate.report_result_error(
|
2003-02-20 00:06:07 +00:00
|
|
|
*(atp->result), retval,
|
2003-08-02 00:02:11 +00:00
|
|
|
"Couldn't restart the app for this result."
|
2003-02-20 00:06:07 +00:00
|
|
|
);
|
2002-04-30 22:22:54 +00:00
|
|
|
active_tasks.erase(iter);
|
|
|
|
} else {
|
|
|
|
iter++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2003-05-22 20:47:56 +00:00
|
|
|
int ACTIVE_TASK::get_cpu_time_via_os() {
|
2003-03-17 19:24:38 +00:00
|
|
|
#ifdef _WIN32
|
|
|
|
FILETIME creation_time, exit_time, kernel_time, user_time;
|
|
|
|
ULARGE_INTEGER tKernel, tUser;
|
|
|
|
LONGLONG totTime;
|
2003-07-02 02:02:18 +00:00
|
|
|
|
2003-03-17 19:24:38 +00:00
|
|
|
// Get the elapsed CPU time
|
2003-05-09 21:54:23 +00:00
|
|
|
if (GetProcessTimes(pid_handle, &creation_time, &exit_time, &kernel_time, &user_time)) {
|
2003-03-17 19:24:38 +00:00
|
|
|
tKernel.LowPart = kernel_time.dwLowDateTime;
|
|
|
|
tKernel.HighPart = kernel_time.dwHighDateTime;
|
|
|
|
tUser.LowPart = user_time.dwLowDateTime;
|
|
|
|
tUser.HighPart = user_time.dwHighDateTime;
|
|
|
|
|
|
|
|
// Runtimes in 100 nanosecond units
|
|
|
|
totTime = tKernel.QuadPart + tUser.QuadPart;
|
|
|
|
current_cpu_time = checkpoint_cpu_time = starting_cpu_time + totTime/1.e7;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#else
|
2003-06-02 23:27:44 +00:00
|
|
|
// On UNIX, we can't get CPU time before process has exited for some reason
|
2003-03-17 19:24:38 +00:00
|
|
|
#endif
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2003-05-22 20:47:56 +00:00
|
|
|
// See if the app has generated a new fraction-done message in shared mem.
|
2002-11-19 21:26:34 +00:00
|
|
|
// If so read it and return true.
|
2002-07-15 23:21:20 +00:00
|
|
|
//
|
2003-05-22 20:47:56 +00:00
|
|
|
int ACTIVE_TASK::get_cpu_time_via_shmem(time_t now) {
|
2003-05-09 20:33:57 +00:00
|
|
|
char msg_buf[SHM_SEG_SIZE];
|
2003-05-08 18:11:05 +00:00
|
|
|
if (app_client_shm.get_msg(msg_buf, APP_CORE_WORKER_SEG)) {
|
2003-05-22 20:47:56 +00:00
|
|
|
last_status_msg_time = now;
|
2003-05-08 18:11:05 +00:00
|
|
|
fraction_done = current_cpu_time = checkpoint_cpu_time = 0.0;
|
|
|
|
parse_double(msg_buf, "<fraction_done>", fraction_done);
|
|
|
|
parse_double(msg_buf, "<current_cpu_time>", current_cpu_time);
|
|
|
|
parse_double(msg_buf, "<checkpoint_cpu_time>", checkpoint_cpu_time);
|
2003-05-28 19:56:53 +00:00
|
|
|
parse_double(msg_buf, "<working_set_size>", working_set_size);
|
2003-06-06 21:23:14 +00:00
|
|
|
|
2003-08-13 21:32:58 +00:00
|
|
|
if (last_frac_update == 0) {
|
|
|
|
last_frac_update = now;
|
|
|
|
last_frac_done = fraction_done;
|
|
|
|
recent_change = 0;
|
|
|
|
} else {
|
|
|
|
recent_change += (fraction_done - last_frac_done);
|
|
|
|
int tdiff = now-last_frac_update;
|
|
|
|
if (tdiff>0) {
|
|
|
|
double recent_frac_rate_of_change = max(0.0, recent_change) / tdiff;
|
|
|
|
if (frac_rate_of_change == 0) {
|
|
|
|
frac_rate_of_change = recent_frac_rate_of_change;
|
|
|
|
} else {
|
|
|
|
double x = exp(-1*log(2.0)/20.0);
|
|
|
|
frac_rate_of_change = frac_rate_of_change*x + recent_frac_rate_of_change*(1-x);
|
|
|
|
}
|
|
|
|
last_frac_update = now;
|
|
|
|
last_frac_done = fraction_done;
|
|
|
|
recent_change = 0;
|
|
|
|
}
|
|
|
|
}
|
2003-06-06 21:23:14 +00:00
|
|
|
|
2003-05-22 20:47:56 +00:00
|
|
|
return 0;
|
2002-08-05 00:29:34 +00:00
|
|
|
}
|
2003-05-22 20:47:56 +00:00
|
|
|
|
|
|
|
// if no message in 5 seconds, get the CPU time by system calls
|
|
|
|
//
|
|
|
|
if (last_status_msg_time+5 < now) {
|
|
|
|
last_status_msg_time = now;
|
|
|
|
return get_cpu_time_via_os();
|
2003-05-09 21:54:23 +00:00
|
|
|
}
|
2003-05-22 20:47:56 +00:00
|
|
|
return -1;
|
2003-03-17 19:24:38 +00:00
|
|
|
}
|
|
|
|
|
2003-05-22 20:47:56 +00:00
|
|
|
// get CPU times of active tasks
|
2003-03-17 19:24:38 +00:00
|
|
|
//
|
2003-05-22 20:47:56 +00:00
|
|
|
void ACTIVE_TASK_SET::get_cpu_times() {
|
2003-03-17 19:24:38 +00:00
|
|
|
unsigned int i;
|
|
|
|
ACTIVE_TASK *atp;
|
2003-05-22 20:47:56 +00:00
|
|
|
time_t now = time(0);
|
2003-03-17 19:24:38 +00:00
|
|
|
for (i=0; i<active_tasks.size(); i++) {
|
|
|
|
atp = active_tasks[i];
|
2003-05-22 20:47:56 +00:00
|
|
|
atp->get_cpu_time_via_shmem(now);
|
2003-03-17 19:24:38 +00:00
|
|
|
}
|
2002-06-21 18:31:32 +00:00
|
|
|
}
|
|
|
|
|
2002-12-06 07:33:45 +00:00
|
|
|
// Returns the estimated time to completion (in seconds) of this task,
|
|
|
|
// based on current reported CPU time and fraction done
|
2002-11-20 20:14:48 +00:00
|
|
|
//
|
|
|
|
double ACTIVE_TASK::est_time_to_completion() {
|
2003-08-13 20:00:19 +00:00
|
|
|
if (fraction_done <= 0 || fraction_done > 1 || frac_rate_of_change <= 0) {
|
2002-12-05 21:56:33 +00:00
|
|
|
return -1;
|
|
|
|
}
|
2003-08-13 21:32:07 +00:00
|
|
|
return (current_cpu_time / fraction_done) - current_cpu_time;
|
|
|
|
//return (1.0-fraction_done)/frac_rate_of_change;
|
2002-11-20 20:14:48 +00:00
|
|
|
}
|
|
|
|
|
2002-12-06 07:33:45 +00:00
|
|
|
// size of output files and files in slot dir
|
|
|
|
//
|
|
|
|
int ACTIVE_TASK::current_disk_usage(double& size) {
|
|
|
|
double x;
|
|
|
|
unsigned int i;
|
|
|
|
int retval;
|
|
|
|
FILE_INFO* fip;
|
|
|
|
char path[256];
|
|
|
|
|
|
|
|
retval = dir_size(slot_dir, size);
|
|
|
|
if (retval) return retval;
|
|
|
|
for (i=0; i<result->output_files.size(); i++) {
|
|
|
|
fip = result->output_files[i].file_info;
|
|
|
|
get_pathname(fip, path);
|
|
|
|
retval = file_size(path, x);
|
|
|
|
if (!retval) size += x;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2003-02-24 21:25:16 +00:00
|
|
|
// Get the next available free slot, or returns -1 if all slots are full
|
2002-08-26 22:14:06 +00:00
|
|
|
//
|
|
|
|
int ACTIVE_TASK_SET::get_free_slot(int total_slots) {
|
2002-09-22 23:27:14 +00:00
|
|
|
unsigned int i;
|
2003-02-24 21:25:16 +00:00
|
|
|
int j;
|
|
|
|
bool found;
|
2002-08-26 22:14:06 +00:00
|
|
|
|
2002-09-22 23:27:14 +00:00
|
|
|
if (active_tasks.size() >= (unsigned int)total_slots) {
|
2002-08-26 22:14:06 +00:00
|
|
|
return -1;
|
2002-09-22 23:27:14 +00:00
|
|
|
}
|
|
|
|
|
2003-02-24 21:25:16 +00:00
|
|
|
for (j=0; j<total_slots; j++) {
|
|
|
|
found = false;
|
|
|
|
for (i=0; i<active_tasks.size(); i++) {
|
|
|
|
if (active_tasks[i]->slot == j) {
|
|
|
|
found = true;
|
|
|
|
break;
|
|
|
|
}
|
2002-08-26 22:14:06 +00:00
|
|
|
}
|
2003-02-24 21:25:16 +00:00
|
|
|
if (!found) return j;
|
2002-08-26 22:14:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2002-07-15 23:21:20 +00:00
|
|
|
// Write XML data about this ACTIVE_TASK
|
|
|
|
//
|
2002-04-30 22:22:54 +00:00
|
|
|
int ACTIVE_TASK::write(FILE* fout) {
|
|
|
|
fprintf(fout,
|
|
|
|
"<active_task>\n"
|
2002-06-21 06:52:47 +00:00
|
|
|
" <project_master_url>%s</project_master_url>\n"
|
2002-04-30 22:22:54 +00:00
|
|
|
" <result_name>%s</result_name>\n"
|
|
|
|
" <app_version_num>%d</app_version_num>\n"
|
|
|
|
" <slot>%d</slot>\n"
|
2002-08-05 00:29:34 +00:00
|
|
|
" <checkpoint_cpu_time>%f</checkpoint_cpu_time>\n"
|
2002-04-30 22:22:54 +00:00
|
|
|
"</active_task>\n",
|
2002-06-21 06:52:47 +00:00
|
|
|
result->project->master_url,
|
2002-04-30 22:22:54 +00:00
|
|
|
result->name,
|
|
|
|
app_version->version_num,
|
|
|
|
slot,
|
2002-08-05 00:29:34 +00:00
|
|
|
checkpoint_cpu_time
|
2002-04-30 22:22:54 +00:00
|
|
|
);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2002-07-15 23:21:20 +00:00
|
|
|
// Parse XML information about an active task
|
|
|
|
//
|
2002-04-30 22:22:54 +00:00
|
|
|
int ACTIVE_TASK::parse(FILE* fin, CLIENT_STATE* cs) {
|
2002-06-21 06:52:47 +00:00
|
|
|
char buf[256], result_name[256], project_master_url[256];
|
2002-04-30 22:22:54 +00:00
|
|
|
int app_version_num=0;
|
|
|
|
PROJECT* project;
|
2002-08-05 00:29:34 +00:00
|
|
|
|
2003-03-19 01:15:46 +00:00
|
|
|
strcpy(result_name, "");
|
|
|
|
strcpy(project_master_url, "");
|
2002-04-30 22:22:54 +00:00
|
|
|
while (fgets(buf, 256, fin)) {
|
|
|
|
if (match_tag(buf, "</active_task>")) {
|
2002-06-21 06:52:47 +00:00
|
|
|
project = cs->lookup_project(project_master_url);
|
2002-04-30 22:22:54 +00:00
|
|
|
if (!project) {
|
2003-07-03 05:01:29 +00:00
|
|
|
msg_printf(
|
|
|
|
NULL, MSG_ERROR,
|
2002-04-30 22:22:54 +00:00
|
|
|
"ACTIVE_TASK::parse(): project not found: %s\n",
|
2002-06-21 06:52:47 +00:00
|
|
|
project_master_url
|
2002-04-30 22:22:54 +00:00
|
|
|
);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
result = cs->lookup_result(project, result_name);
|
|
|
|
if (!result) {
|
2003-07-03 05:01:29 +00:00
|
|
|
msg_printf(
|
|
|
|
NULL, MSG_ERROR, "ACTIVE_TASK::parse(): result not found\n"
|
|
|
|
);
|
2002-04-30 22:22:54 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
wup = result->wup;
|
|
|
|
app_version = cs->lookup_app_version(
|
|
|
|
result->app, app_version_num
|
|
|
|
);
|
|
|
|
if (!app_version) {
|
2003-07-03 05:01:29 +00:00
|
|
|
msg_printf(
|
|
|
|
NULL, MSG_ERROR,
|
|
|
|
"ACTIVE_TASK::parse(): app_version not found\n"
|
|
|
|
);
|
2002-04-30 22:22:54 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2002-09-22 23:27:14 +00:00
|
|
|
else if (parse_str(buf, "<result_name>", result_name, sizeof(result_name))) continue;
|
|
|
|
else if (parse_str(buf, "<project_master_url>", project_master_url, sizeof(project_master_url))) continue;
|
2002-04-30 22:22:54 +00:00
|
|
|
else if (parse_int(buf, "<app_version_num>", app_version_num)) continue;
|
|
|
|
else if (parse_int(buf, "<slot>", slot)) continue;
|
2002-08-24 00:41:25 +00:00
|
|
|
else if (parse_double(buf, "<checkpoint_cpu_time>", checkpoint_cpu_time)) continue;
|
2003-07-03 05:01:29 +00:00
|
|
|
else msg_printf(NULL, MSG_ERROR, "ACTIVE_TASK::parse(): unrecognized %s\n", buf);
|
2002-04-30 22:22:54 +00:00
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2002-07-15 23:21:20 +00:00
|
|
|
// Write XML information about this active task set
|
|
|
|
//
|
2002-04-30 22:22:54 +00:00
|
|
|
int ACTIVE_TASK_SET::write(FILE* fout) {
|
|
|
|
unsigned int i;
|
2003-03-08 23:48:05 +00:00
|
|
|
int retval;
|
2002-08-05 00:29:34 +00:00
|
|
|
|
2002-04-30 22:22:54 +00:00
|
|
|
fprintf(fout, "<active_task_set>\n");
|
|
|
|
for (i=0; i<active_tasks.size(); i++) {
|
2003-03-08 23:48:05 +00:00
|
|
|
retval = active_tasks[i]->write(fout);
|
|
|
|
if (retval) return retval;
|
2002-04-30 22:22:54 +00:00
|
|
|
}
|
|
|
|
fprintf(fout, "</active_task_set>\n");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2002-07-15 23:21:20 +00:00
|
|
|
// Parse XML information about an active task set
|
|
|
|
//
|
2002-04-30 22:22:54 +00:00
|
|
|
int ACTIVE_TASK_SET::parse(FILE* fin, CLIENT_STATE* cs) {
|
|
|
|
ACTIVE_TASK* atp;
|
|
|
|
char buf[256];
|
|
|
|
int retval;
|
2002-08-05 00:29:34 +00:00
|
|
|
|
2002-04-30 22:22:54 +00:00
|
|
|
while (fgets(buf, 256, fin)) {
|
|
|
|
if (match_tag(buf, "</active_task_set>")) return 0;
|
|
|
|
else if (match_tag(buf, "<active_task>")) {
|
|
|
|
atp = new ACTIVE_TASK;
|
|
|
|
retval = atp->parse(fin, cs);
|
|
|
|
if (!retval) active_tasks.push_back(atp);
|
|
|
|
else delete atp;
|
|
|
|
} else {
|
2003-07-03 05:01:29 +00:00
|
|
|
msg_printf(NULL, MSG_ERROR, "ACTIVE_TASK_SET::parse(): unrecognized %s\n", buf);
|
2002-04-30 22:22:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2003-05-08 18:11:05 +00:00
|
|
|
|
|
|
|
// return an app with pre-ss mode WINDOW, if there is one
|
|
|
|
// else return an app with pre-ss mode HIDE, if there is one
|
|
|
|
// else return NULL
|
|
|
|
//
|
|
|
|
ACTIVE_TASK* ACTIVE_TASK_SET::get_graphics_capable_app() {
|
|
|
|
unsigned int i;
|
|
|
|
ACTIVE_TASK* atp;
|
|
|
|
|
|
|
|
for (i=0; i<active_tasks.size(); i++) {
|
|
|
|
atp = active_tasks[i];
|
|
|
|
if (atp->graphics_mode_before_ss == MODE_WINDOW) {
|
|
|
|
return atp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (i=0; i<active_tasks.size(); i++) {
|
|
|
|
atp = active_tasks[i];
|
|
|
|
if (atp->graphics_mode_before_ss == MODE_HIDE_GRAPHICS) {
|
|
|
|
return atp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
// return an app (if any) with given requested mode
|
|
|
|
//
|
|
|
|
ACTIVE_TASK* ACTIVE_TASK_SET::get_app_requested(int req_mode) {
|
|
|
|
unsigned int i;
|
|
|
|
ACTIVE_TASK* atp;
|
|
|
|
|
|
|
|
for (i=0; i<active_tasks.size(); i++) {
|
|
|
|
atp = active_tasks[i];
|
|
|
|
if (atp->graphics_requested_mode == req_mode) {
|
|
|
|
return atp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
void ACTIVE_TASK_SET::save_app_modes() {
|
|
|
|
unsigned int i;
|
|
|
|
ACTIVE_TASK* atp;
|
|
|
|
|
|
|
|
for (i=0; i<active_tasks.size(); i++) {
|
|
|
|
atp = active_tasks[i];
|
|
|
|
atp->graphics_mode_before_ss = atp->graphics_acked_mode;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void ACTIVE_TASK_SET::hide_apps() {
|
|
|
|
unsigned int i;
|
|
|
|
ACTIVE_TASK* atp;
|
|
|
|
|
|
|
|
for (i=0; i<active_tasks.size(); i++) {
|
|
|
|
atp = active_tasks[i];
|
2003-05-09 18:17:25 +00:00
|
|
|
atp->request_graphics_mode(MODE_HIDE_GRAPHICS);
|
2003-05-08 18:11:05 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void ACTIVE_TASK_SET::restore_apps() {
|
|
|
|
unsigned int i;
|
|
|
|
ACTIVE_TASK* atp;
|
|
|
|
|
|
|
|
for (i=0; i<active_tasks.size(); i++) {
|
|
|
|
atp = active_tasks[i];
|
|
|
|
if (atp->graphics_requested_mode != atp->graphics_mode_before_ss) {
|
2003-05-09 18:17:25 +00:00
|
|
|
atp->request_graphics_mode(atp->graphics_mode_before_ss);
|
2003-05-08 18:11:05 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2003-05-09 00:00:39 +00:00
|
|
|
|
|
|
|
void ACTIVE_TASK_SET::check_graphics_mode_ack() {
|
|
|
|
unsigned int i;
|
|
|
|
ACTIVE_TASK* atp;
|
|
|
|
|
|
|
|
for (i=0; i<active_tasks.size(); i++) {
|
|
|
|
atp = active_tasks[i];
|
2003-05-09 20:33:57 +00:00
|
|
|
atp->check_graphics_mode_ack();
|
|
|
|
}
|
2003-05-09 00:00:39 +00:00
|
|
|
}
|
2003-05-29 18:28:02 +00:00
|
|
|
|
|
|
|
bool ACTIVE_TASK::supports_graphics() {
|
|
|
|
return (graphics_acked_mode != MODE_UNSUPPORTED);
|
|
|
|
}
|