2005-01-20 23:22:22 +00:00
|
|
|
// Berkeley Open Infrastructure for Network Computing
|
|
|
|
// http://boinc.berkeley.edu
|
|
|
|
// Copyright (C) 2005 University of California
|
2003-08-21 23:44:57 +00:00
|
|
|
//
|
2005-01-20 23:22:22 +00:00
|
|
|
// This is free software; you can redistribute it and/or
|
|
|
|
// modify it under the terms of the GNU Lesser General Public
|
|
|
|
// License as published by the Free Software Foundation;
|
|
|
|
// either version 2.1 of the License, or (at your option) any later version.
|
2003-08-21 23:44:57 +00:00
|
|
|
//
|
2005-01-20 23:22:22 +00:00
|
|
|
// This software is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU Lesser General Public License for more details.
|
2002-08-05 00:29:34 +00:00
|
|
|
//
|
2005-01-20 23:22:22 +00:00
|
|
|
// To view the GNU Lesser General Public License visit
|
|
|
|
// http://www.gnu.org/copyleft/lesser.html
|
|
|
|
// or write to the Free Software Foundation, Inc.,
|
|
|
|
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
2002-08-05 00:29:34 +00:00
|
|
|
|
2003-05-07 23:42:17 +00:00
|
|
|
// Code that's in the BOINC app library (but NOT in the core client)
|
2003-09-30 21:17:20 +00:00
|
|
|
// graphics-related code goes in graphics_api.C, not here
|
2003-05-07 23:42:17 +00:00
|
|
|
|
2004-06-16 21:34:31 +00:00
|
|
|
#ifdef _WIN32
|
|
|
|
#include "boinc_win.h"
|
2004-12-30 03:03:26 +00:00
|
|
|
#include "version.h"
|
2004-06-16 21:34:31 +00:00
|
|
|
#else
|
2004-12-14 20:28:13 +00:00
|
|
|
#include "config.h"
|
2004-06-16 12:01:49 +00:00
|
|
|
#include <cstdlib>
|
|
|
|
#include <cstdio>
|
|
|
|
#include <cstdarg>
|
2004-06-12 18:44:53 +00:00
|
|
|
#include <sys/types.h>
|
2004-02-06 09:34:00 +00:00
|
|
|
#ifdef HAVE_UNISTD_H
|
2002-08-05 00:29:34 +00:00
|
|
|
#include <unistd.h>
|
|
|
|
#endif
|
|
|
|
#ifdef HAVE_SYS_TIME_H
|
|
|
|
#include <sys/time.h>
|
|
|
|
#endif
|
2005-01-27 09:19:42 +00:00
|
|
|
#ifdef HAVE_PTHREAD
|
|
|
|
#include <pthread.h>
|
|
|
|
#include <sched.h>
|
|
|
|
#endif
|
2004-03-05 04:37:53 +00:00
|
|
|
using namespace std;
|
2004-06-16 21:34:31 +00:00
|
|
|
#endif
|
2004-03-05 04:37:53 +00:00
|
|
|
|
2004-03-24 07:39:24 +00:00
|
|
|
#include "diagnostics.h"
|
2002-08-05 00:29:34 +00:00
|
|
|
#include "parse.h"
|
2003-03-17 19:24:38 +00:00
|
|
|
#include "shmem.h"
|
2002-11-18 23:09:11 +00:00
|
|
|
#include "util.h"
|
2003-12-24 00:50:51 +00:00
|
|
|
#include "filesys.h"
|
2004-12-14 20:28:13 +00:00
|
|
|
#include "mem_usage.h"
|
2002-08-05 00:29:34 +00:00
|
|
|
#include "error_numbers.h"
|
2003-05-07 23:42:17 +00:00
|
|
|
#include "app_ipc.h"
|
2004-10-26 21:12:00 +00:00
|
|
|
|
2002-08-05 00:29:34 +00:00
|
|
|
#include "boinc_api.h"
|
|
|
|
|
2004-05-21 20:25:12 +00:00
|
|
|
// The BOINC API communicates CPU time and fraction done to the core client.
|
|
|
|
// Currently this is done using a timer.
|
|
|
|
// Remember that the processing of a result can be divided
|
|
|
|
// into multiple "episodes" (executions of the app),
|
|
|
|
// each of which resumes from the checkpointed state of the previous episode.
|
|
|
|
// Unless otherwise noted, "CPU time" refers to the sum over all episodes
|
|
|
|
// (not counting the part after the last checkpoint in an episode).
|
2004-02-06 09:34:00 +00:00
|
|
|
|
2004-12-13 19:14:54 +00:00
|
|
|
static APP_INIT_DATA aid;
|
2004-07-16 01:56:32 +00:00
|
|
|
|
2004-07-09 15:29:18 +00:00
|
|
|
APP_CLIENT_SHM *app_client_shm = 0;
|
2004-12-05 23:52:17 +00:00
|
|
|
static double timer_period = 1.0;
|
|
|
|
// period of API timer
|
|
|
|
// This determines the resolution of fraction done and CPU time reporting
|
|
|
|
// to the core client, and of checkpoint enabling.
|
|
|
|
// It doesn't influence graphics, so 1 sec is enough.
|
2004-07-09 15:29:18 +00:00
|
|
|
static double time_until_checkpoint;
|
2004-10-26 20:33:16 +00:00
|
|
|
// time until enable checkpoint
|
2004-07-09 15:29:18 +00:00
|
|
|
static double time_until_fraction_done_update;
|
2004-10-26 20:33:16 +00:00
|
|
|
// time until report fraction done to core client
|
2004-07-09 15:29:18 +00:00
|
|
|
static double fraction_done;
|
|
|
|
static double last_checkpoint_cpu_time;
|
|
|
|
static bool ready_to_checkpoint = false;
|
|
|
|
static double last_wu_cpu_time;
|
|
|
|
static bool standalone = false;
|
|
|
|
static double initial_wu_cpu_time;
|
|
|
|
static bool have_new_trickle_up = false;
|
|
|
|
static bool have_trickle_down = true;
|
2004-07-06 04:01:15 +00:00
|
|
|
// on first call, scan slot dir for msgs
|
2004-07-09 15:29:18 +00:00
|
|
|
static double heartbeat_giveup_time;
|
2004-12-05 23:52:17 +00:00
|
|
|
static bool heartbeat_active;
|
|
|
|
// if false, suppress heartbeat mechanism
|
2005-02-16 23:30:29 +00:00
|
|
|
#ifdef _WIN32
|
2004-12-05 23:52:17 +00:00
|
|
|
static int nrunning_ticks = 0;
|
2005-02-16 23:30:29 +00:00
|
|
|
#endif
|
2004-06-29 01:15:51 +00:00
|
|
|
|
2004-07-20 21:28:24 +00:00
|
|
|
#define HEARTBEAT_GIVEUP_PERIOD 30.0
|
2004-06-29 01:15:51 +00:00
|
|
|
// quit if no heartbeat from core in this #secs
|
2004-07-20 21:28:24 +00:00
|
|
|
#define HEARTBEAT_TIMEOUT_PERIOD 35.0
|
|
|
|
// quit if we cannot aquire slot resource in this #secs
|
2004-02-06 09:34:00 +00:00
|
|
|
|
|
|
|
#ifdef _WIN32
|
2004-08-03 09:50:24 +00:00
|
|
|
//HANDLE hErrorNotification;
|
|
|
|
//HANDLE hQuitRequest;
|
|
|
|
//HANDLE hSuspendRequest;
|
|
|
|
//HANDLE hResumeRequest;
|
|
|
|
static HANDLE hSharedMem;
|
2004-10-25 20:16:30 +00:00
|
|
|
HANDLE worker_thread_handle;
|
2005-01-28 01:58:11 +00:00
|
|
|
// used to suspend worker thread, and to measure its CPU time
|
2004-08-03 09:50:24 +00:00
|
|
|
static MMRESULT timer_id;
|
2004-02-06 09:34:00 +00:00
|
|
|
#endif
|
|
|
|
|
2004-06-16 12:07:01 +00:00
|
|
|
static int setup_shared_mem();
|
|
|
|
static int update_app_progress(double cpu_t, double cp_cpu_t, double ws_t);
|
2004-08-03 09:50:24 +00:00
|
|
|
static BOINC_OPTIONS options;
|
|
|
|
static BOINC_STATUS boinc_status;
|
|
|
|
|
2004-10-26 21:48:37 +00:00
|
|
|
// the following 2 functions are used when there's no graphics
|
2004-10-26 21:12:00 +00:00
|
|
|
//
|
|
|
|
int boinc_init() {
|
2004-12-13 19:14:54 +00:00
|
|
|
boinc_options_defaults(options);
|
2004-10-26 21:12:00 +00:00
|
|
|
return boinc_init_options(options);
|
|
|
|
}
|
2004-11-22 19:17:13 +00:00
|
|
|
|
2004-10-26 21:12:00 +00:00
|
|
|
int boinc_init_options(BOINC_OPTIONS& opt) {
|
2004-10-26 21:48:37 +00:00
|
|
|
int retval;
|
|
|
|
retval = boinc_init_options_general(opt);
|
|
|
|
if (retval) return retval;
|
|
|
|
return set_worker_timer();
|
|
|
|
}
|
|
|
|
|
|
|
|
// the following can be called by either graphics or worker thread
|
|
|
|
//
|
|
|
|
int boinc_init_options_general(BOINC_OPTIONS& opt) {
|
2004-02-06 09:34:00 +00:00
|
|
|
int retval;
|
2004-08-03 09:50:24 +00:00
|
|
|
options = opt;
|
2004-02-06 09:34:00 +00:00
|
|
|
|
2004-08-03 09:50:24 +00:00
|
|
|
memset(&boinc_status, 0, sizeof(boinc_status));
|
|
|
|
|
|
|
|
if (options.main_program) {
|
2004-07-03 20:03:16 +00:00
|
|
|
// make sure we're the only app running in this slot
|
2004-06-29 04:34:55 +00:00
|
|
|
//
|
|
|
|
retval = lock_file(LOCKFILE);
|
2004-07-03 20:03:16 +00:00
|
|
|
if (retval) {
|
|
|
|
// give any previous occupant a chance to timeout and exit
|
|
|
|
//
|
2004-07-20 21:28:24 +00:00
|
|
|
boinc_sleep(HEARTBEAT_TIMEOUT_PERIOD);
|
2004-07-03 20:03:16 +00:00
|
|
|
retval = lock_file(LOCKFILE);
|
|
|
|
}
|
|
|
|
if (retval) {
|
|
|
|
fprintf(stderr, "Can't acquire lockfile - exiting\n");
|
2005-01-19 15:54:04 +00:00
|
|
|
boinc_exit(0); // not un-recoverable ==> status=0
|
2004-07-03 20:03:16 +00:00
|
|
|
}
|
2004-06-29 04:34:55 +00:00
|
|
|
}
|
|
|
|
|
2004-02-06 09:34:00 +00:00
|
|
|
retval = boinc_parse_init_data_file();
|
2004-05-21 20:25:12 +00:00
|
|
|
if (retval) {
|
|
|
|
standalone = true;
|
2004-06-03 04:25:44 +00:00
|
|
|
} else {
|
|
|
|
retval = setup_shared_mem();
|
|
|
|
if (retval) {
|
2004-07-08 17:59:46 +00:00
|
|
|
fprintf(stderr, "Can't set up shared mem: %d\n", retval);
|
2004-06-03 04:25:44 +00:00
|
|
|
standalone = true;
|
|
|
|
}
|
2004-05-21 20:25:12 +00:00
|
|
|
}
|
|
|
|
|
2004-02-06 09:34:00 +00:00
|
|
|
// copy the WU CPU time to a separate var,
|
|
|
|
// since we may reread the structure again later.
|
|
|
|
//
|
|
|
|
initial_wu_cpu_time = aid.wu_cpu_time;
|
|
|
|
|
2004-08-03 09:50:24 +00:00
|
|
|
// the following may not be needed, but do it anyway
|
|
|
|
//
|
2004-05-21 20:25:12 +00:00
|
|
|
fraction_done = -1;
|
2004-02-06 09:34:00 +00:00
|
|
|
time_until_checkpoint = aid.checkpoint_period;
|
|
|
|
last_checkpoint_cpu_time = aid.wu_cpu_time;
|
|
|
|
time_until_fraction_done_update = aid.fraction_done_update_period;
|
|
|
|
last_wu_cpu_time = aid.wu_cpu_time;
|
|
|
|
|
2004-06-29 01:15:51 +00:00
|
|
|
heartbeat_active = !standalone;
|
2004-07-08 03:38:52 +00:00
|
|
|
heartbeat_giveup_time = dtime() + HEARTBEAT_GIVEUP_PERIOD;
|
2004-06-29 01:15:51 +00:00
|
|
|
|
2004-08-03 09:50:24 +00:00
|
|
|
return 0;
|
|
|
|
}
|
2004-02-06 09:34:00 +00:00
|
|
|
|
2004-08-03 09:50:24 +00:00
|
|
|
int boinc_get_status(BOINC_STATUS& s) {
|
|
|
|
s = boinc_status;
|
2004-02-06 09:34:00 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2004-08-03 09:50:24 +00:00
|
|
|
static void send_trickle_up_msg() {
|
|
|
|
if (have_new_trickle_up) {
|
|
|
|
if (app_client_shm->shm->trickle_up.send_msg("<have_new_trickle_up/>\n")) {
|
|
|
|
have_new_trickle_up = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2004-02-06 09:34:00 +00:00
|
|
|
|
2005-01-08 01:17:51 +00:00
|
|
|
|
|
|
|
// NOTE: a non-zero status tells a running client that we're exiting with
|
2005-01-19 15:54:04 +00:00
|
|
|
// an "unrecoverable error", which will be reported back to server.
|
|
|
|
// A zero exit-status will tell the client we've successfully finished the result.
|
2004-08-03 09:50:24 +00:00
|
|
|
int boinc_finish(int status) {
|
2005-01-08 01:17:51 +00:00
|
|
|
|
2004-08-03 09:50:24 +00:00
|
|
|
if (options.send_status_msgs) {
|
2004-12-05 23:52:17 +00:00
|
|
|
boinc_calling_thread_cpu_time(last_checkpoint_cpu_time);
|
2004-07-02 22:14:13 +00:00
|
|
|
last_checkpoint_cpu_time += aid.wu_cpu_time;
|
2004-12-05 23:52:17 +00:00
|
|
|
update_app_progress(last_checkpoint_cpu_time, last_checkpoint_cpu_time, 0);
|
2004-08-03 09:50:24 +00:00
|
|
|
}
|
|
|
|
if (options.handle_trickle_ups) {
|
|
|
|
send_trickle_up_msg();
|
|
|
|
}
|
2004-02-06 09:34:00 +00:00
|
|
|
#ifdef _WIN32
|
2004-08-03 09:50:24 +00:00
|
|
|
// Stop the timer
|
|
|
|
timeKillEvent(timer_id);
|
|
|
|
CloseHandle(worker_thread_handle);
|
2002-11-21 23:27:27 +00:00
|
|
|
#endif
|
2004-08-03 09:50:24 +00:00
|
|
|
if (options.main_program && status==0) {
|
|
|
|
FILE* f = fopen(BOINC_FINISH_CALLED_FILE, "w");
|
|
|
|
if (f) fclose(f);
|
|
|
|
}
|
|
|
|
if (options.send_status_msgs) {
|
2004-07-02 22:14:13 +00:00
|
|
|
aid.wu_cpu_time = last_checkpoint_cpu_time;
|
|
|
|
boinc_write_init_data_file();
|
2004-04-04 01:59:47 +00:00
|
|
|
}
|
2005-01-08 01:17:51 +00:00
|
|
|
|
2005-01-19 15:54:04 +00:00
|
|
|
// now remove lockfile+exit
|
|
|
|
boinc_exit(status);
|
|
|
|
|
|
|
|
return(0); // doh... we never get here
|
|
|
|
} // boinc_finish()
|
|
|
|
|
|
|
|
|
|
|
|
// exit a boinc-app
|
|
|
|
// this simply closes, then removes the app's lockfile and
|
|
|
|
// calls the appropriate exit-function
|
|
|
|
#if (!defined _WIN32) && (!defined HANDLE)
|
|
|
|
typedef int HANDLE;
|
|
|
|
#endif
|
|
|
|
extern HANDLE app_lockfile_handle;
|
|
|
|
|
|
|
|
void
|
|
|
|
boinc_exit (int status)
|
|
|
|
{
|
2005-01-08 07:55:59 +00:00
|
|
|
#ifdef _WIN32
|
2005-01-19 15:54:04 +00:00
|
|
|
if ( !CloseHandle ( app_lockfile_handle ) )
|
|
|
|
perror ( "Failed to close the application-lockfile.");
|
2005-01-08 07:55:59 +00:00
|
|
|
#else
|
2005-01-19 15:54:04 +00:00
|
|
|
if ( close ( app_lockfile_handle ) )
|
|
|
|
perror ( "Failed to close the application-lockfile " LOCKFILE);
|
2005-01-08 07:55:59 +00:00
|
|
|
#endif
|
2005-01-19 15:54:04 +00:00
|
|
|
// remove the lockfile
|
|
|
|
if ( boinc_delete_file (LOCKFILE) != 0)
|
|
|
|
perror ("boinc_finish(): failed to remove lockfile");
|
|
|
|
|
|
|
|
// on Mac, calling exit() can lead to infinite exit-atexit loops, while _exit() seems
|
|
|
|
// to behave nicely. This is not pretty but unless someone finds a cleaner solution,
|
|
|
|
// we handle the Mac-case separately .
|
|
|
|
#ifdef __APPLE_CC__
|
|
|
|
_exit(status);
|
|
|
|
#else
|
|
|
|
exit(status);
|
2005-01-08 01:17:51 +00:00
|
|
|
#endif
|
|
|
|
|
2005-01-19 15:54:04 +00:00
|
|
|
} // boinc_exit()
|
2004-02-06 09:34:00 +00:00
|
|
|
|
|
|
|
bool boinc_is_standalone() {
|
|
|
|
return standalone;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// parse the init data file.
|
|
|
|
// This is done at startup, and also if a "reread prefs" message is received
|
|
|
|
//
|
|
|
|
int boinc_parse_init_data_file() {
|
|
|
|
FILE* f;
|
|
|
|
int retval;
|
|
|
|
|
2004-10-14 22:01:05 +00:00
|
|
|
memset(&aid, 0, sizeof(aid));
|
|
|
|
safe_strncpy(aid.user_name, "Unknown user", sizeof(aid.user_name));
|
|
|
|
safe_strncpy(aid.team_name, "Unknown team", sizeof(aid.team_name));
|
|
|
|
aid.wu_cpu_time = 1000;
|
|
|
|
aid.user_total_credit = 1000;
|
|
|
|
aid.user_expavg_credit = 500;
|
|
|
|
aid.host_total_credit = 1000;
|
|
|
|
aid.host_expavg_credit = 500;
|
|
|
|
aid.checkpoint_period = DEFAULT_CHECKPOINT_PERIOD;
|
|
|
|
aid.fraction_done_update_period = DEFAULT_FRACTION_DONE_UPDATE_PERIOD;
|
|
|
|
|
2004-04-02 08:07:12 +00:00
|
|
|
if (!boinc_file_exists(INIT_DATA_FILE)) {
|
2004-10-14 22:01:05 +00:00
|
|
|
fprintf(stderr,
|
|
|
|
"Can't open init data file - running in standalone mode\n"
|
|
|
|
);
|
|
|
|
return ERR_FOPEN;
|
|
|
|
}
|
|
|
|
f = boinc_fopen(INIT_DATA_FILE, "r");
|
|
|
|
retval = parse_init_data_file(f, aid);
|
|
|
|
fclose(f);
|
|
|
|
if (retval) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"Can't parse init data file - running in standalone mode\n"
|
|
|
|
);
|
|
|
|
return retval;
|
2004-02-06 09:34:00 +00:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2004-05-24 04:03:34 +00:00
|
|
|
int boinc_write_init_data_file() {
|
|
|
|
FILE* f = boinc_fopen(INIT_DATA_FILE, "w");
|
|
|
|
if (!f) return ERR_FOPEN;
|
|
|
|
int retval = write_init_data_file(f, aid);
|
|
|
|
fclose(f);
|
|
|
|
return retval;
|
|
|
|
}
|
2002-10-28 22:30:37 +00:00
|
|
|
|
2004-12-09 01:07:01 +00:00
|
|
|
int boinc_report_app_status(
|
|
|
|
double cpu_time,
|
|
|
|
double checkpoint_cpu_time,
|
2005-02-16 23:30:29 +00:00
|
|
|
double _fraction_done
|
2004-12-09 01:07:01 +00:00
|
|
|
) {
|
2004-12-10 04:18:40 +00:00
|
|
|
char msg_buf[MSG_CHANNEL_SIZE];
|
2004-12-09 01:07:01 +00:00
|
|
|
sprintf(msg_buf,
|
|
|
|
"<current_cpu_time>%10.4f</current_cpu_time>\n"
|
|
|
|
"<checkpoint_cpu_time>%.15e</checkpoint_cpu_time>\n"
|
|
|
|
"<fraction_done>%2.8f</fraction_done>\n",
|
|
|
|
cpu_time,
|
|
|
|
checkpoint_cpu_time,
|
2005-02-16 23:30:29 +00:00
|
|
|
_fraction_done
|
2004-12-09 01:07:01 +00:00
|
|
|
);
|
|
|
|
app_client_shm->shm->app_status.send_msg(msg_buf);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2003-11-06 20:27:07 +00:00
|
|
|
// communicate to the core client (via shared mem)
|
|
|
|
// the current CPU time and fraction done
|
|
|
|
//
|
|
|
|
static int update_app_progress(
|
2005-02-16 23:30:29 +00:00
|
|
|
double cpu_t, double cp_cpu_t, double /*ws_t*/
|
2003-11-06 20:27:07 +00:00
|
|
|
) {
|
2004-07-08 03:38:52 +00:00
|
|
|
char msg_buf[MSG_CHANNEL_SIZE], buf[256];
|
2004-12-14 20:28:13 +00:00
|
|
|
double vm, rs;
|
2003-05-28 20:58:01 +00:00
|
|
|
|
2003-11-06 20:27:07 +00:00
|
|
|
if (!app_client_shm) return 0;
|
|
|
|
|
2004-07-26 22:23:16 +00:00
|
|
|
sprintf(msg_buf,
|
|
|
|
"<current_cpu_time>%10.4f</current_cpu_time>\n"
|
|
|
|
"<checkpoint_cpu_time>%.15e</checkpoint_cpu_time>\n",
|
|
|
|
cpu_t, cp_cpu_t
|
|
|
|
);
|
2004-05-21 20:25:12 +00:00
|
|
|
if (fraction_done >= 0) {
|
2004-05-24 04:03:34 +00:00
|
|
|
double range = aid.fraction_done_end - aid.fraction_done_start;
|
|
|
|
double fdone = aid.fraction_done_start + fraction_done*range;
|
|
|
|
sprintf(buf, "<fraction_done>%2.8f</fraction_done>\n", fdone);
|
2004-05-21 20:25:12 +00:00
|
|
|
strcat(msg_buf, buf);
|
|
|
|
}
|
2004-07-26 22:23:16 +00:00
|
|
|
if (!mem_usage(vm, rs)) {
|
2004-07-29 17:18:36 +00:00
|
|
|
sprintf(buf,
|
2004-12-14 20:28:13 +00:00
|
|
|
"<vm_bytes>%f</vm_bytes>\n"
|
|
|
|
"<rss_bytes>%flu</rss_bytes>\n",
|
2004-07-29 17:18:36 +00:00
|
|
|
vm, rs
|
|
|
|
);
|
|
|
|
strcat(msg_buf, buf);
|
2004-07-26 22:23:16 +00:00
|
|
|
}
|
2004-08-03 09:50:24 +00:00
|
|
|
app_client_shm->shm->app_status.send_msg(msg_buf);
|
2004-07-06 04:01:15 +00:00
|
|
|
return 0;
|
2002-08-05 00:29:34 +00:00
|
|
|
}
|
|
|
|
|
2002-08-09 23:34:11 +00:00
|
|
|
int boinc_get_init_data(APP_INIT_DATA& app_init_data) {
|
|
|
|
app_init_data = aid;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2003-08-21 23:44:57 +00:00
|
|
|
|
2003-11-06 20:27:07 +00:00
|
|
|
// this can be called from the graphics thread
|
|
|
|
//
|
|
|
|
int boinc_wu_cpu_time(double& cpu_t) {
|
|
|
|
cpu_t = last_wu_cpu_time;
|
|
|
|
return 0;
|
|
|
|
}
|
2002-12-02 22:51:11 +00:00
|
|
|
|
2003-11-06 20:27:07 +00:00
|
|
|
#ifdef _WIN32
|
2002-08-05 00:29:34 +00:00
|
|
|
|
2004-12-05 23:52:17 +00:00
|
|
|
int boinc_worker_thread_cpu_time(double& cpu) {
|
|
|
|
if (boinc_thread_cpu_time(worker_thread_handle, cpu)) {
|
2004-12-06 00:08:04 +00:00
|
|
|
cpu = nrunning_ticks * timer_period; // for Win9x
|
2004-12-05 23:52:17 +00:00
|
|
|
}
|
2004-12-06 00:08:04 +00:00
|
|
|
return 0;
|
2002-08-05 00:29:34 +00:00
|
|
|
}
|
|
|
|
|
2003-11-06 20:27:07 +00:00
|
|
|
#else
|
2003-11-07 01:46:38 +00:00
|
|
|
|
2004-12-05 23:52:17 +00:00
|
|
|
int boinc_worker_thread_cpu_time(double& cpu) {
|
|
|
|
return boinc_calling_thread_cpu_time(cpu);
|
2002-08-05 00:29:34 +00:00
|
|
|
}
|
2004-05-24 17:51:08 +00:00
|
|
|
|
2003-11-06 20:27:07 +00:00
|
|
|
#endif // _WIN32
|
2002-08-05 00:29:34 +00:00
|
|
|
|
2004-08-03 09:50:24 +00:00
|
|
|
static void handle_heartbeat_msg() {
|
2004-07-08 03:38:52 +00:00
|
|
|
char buf[MSG_CHANNEL_SIZE];
|
|
|
|
if (app_client_shm->shm->heartbeat.get_msg(buf)) {
|
|
|
|
if (match_tag(buf, "<heartbeat/>")) {
|
|
|
|
heartbeat_giveup_time = dtime() + HEARTBEAT_GIVEUP_PERIOD;
|
|
|
|
}
|
|
|
|
if (match_tag(buf, "<enable_heartbeat/>")) {
|
|
|
|
heartbeat_active = true;
|
|
|
|
}
|
|
|
|
if (match_tag(buf, "<disable_heartbeat/>")) {
|
|
|
|
heartbeat_active = false;
|
|
|
|
}
|
2004-06-29 01:15:51 +00:00
|
|
|
}
|
2004-08-03 09:50:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void handle_trickle_down_msg() {
|
|
|
|
char buf[MSG_CHANNEL_SIZE];
|
2004-07-08 03:38:52 +00:00
|
|
|
if (app_client_shm->shm->trickle_down.get_msg(buf)) {
|
|
|
|
if (match_tag(buf, "<have_trickle_down/>")) {
|
|
|
|
have_trickle_down = true;
|
|
|
|
}
|
2004-06-29 01:15:51 +00:00
|
|
|
}
|
2004-08-03 09:50:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void handle_process_control_msg() {
|
|
|
|
char buf[MSG_CHANNEL_SIZE];
|
2004-07-08 03:38:52 +00:00
|
|
|
if (app_client_shm->shm->process_control_request.get_msg(buf)) {
|
|
|
|
if (match_tag(buf, "<suspend/>")) {
|
2004-12-05 23:52:17 +00:00
|
|
|
boinc_status.suspended = true;
|
2004-08-03 09:50:24 +00:00
|
|
|
if (options.direct_process_action) {
|
|
|
|
#ifdef _WIN32
|
|
|
|
SuspendThread(worker_thread_handle);
|
2004-07-08 03:38:52 +00:00
|
|
|
#else
|
2004-08-03 09:50:24 +00:00
|
|
|
while (1) {
|
|
|
|
if (app_client_shm->shm->process_control_request.get_msg(buf)) {
|
|
|
|
if (match_tag(buf, "<resume/>")) {
|
|
|
|
break;
|
|
|
|
}
|
2004-08-23 22:06:48 +00:00
|
|
|
if (match_tag(buf, "<quit/>")) {
|
2005-01-19 15:54:04 +00:00
|
|
|
boinc_exit(0); // NOTE: exit-status = 0 ==> recoverable exit!
|
2004-08-23 22:06:48 +00:00
|
|
|
}
|
2004-07-08 03:38:52 +00:00
|
|
|
}
|
2004-08-03 09:50:24 +00:00
|
|
|
boinc_sleep(1.0);
|
2004-07-08 03:38:52 +00:00
|
|
|
}
|
2004-08-03 09:50:24 +00:00
|
|
|
heartbeat_giveup_time = dtime() + HEARTBEAT_GIVEUP_PERIOD;
|
|
|
|
#endif
|
2004-07-08 03:38:52 +00:00
|
|
|
}
|
|
|
|
}
|
2004-08-03 09:50:24 +00:00
|
|
|
|
|
|
|
if (match_tag(buf, "<resume/>")) {
|
2004-12-05 23:52:17 +00:00
|
|
|
boinc_status.suspended = false;
|
2004-08-03 09:50:24 +00:00
|
|
|
if (options.direct_process_action) {
|
|
|
|
#ifdef _WIN32
|
|
|
|
ResumeThread(worker_thread_handle);
|
2004-07-08 03:38:52 +00:00
|
|
|
#endif
|
2004-08-03 09:50:24 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2004-07-08 03:38:52 +00:00
|
|
|
if (match_tag(buf, "<quit/>")) {
|
2004-12-05 23:52:17 +00:00
|
|
|
boinc_status.quit_request = true;
|
2004-08-03 09:50:24 +00:00
|
|
|
if (options.direct_process_action) {
|
2005-01-19 15:54:04 +00:00
|
|
|
boinc_exit(0); // NOTE: exit-status == 0!
|
2004-08-03 09:50:24 +00:00
|
|
|
}
|
2004-07-08 03:38:52 +00:00
|
|
|
}
|
2004-06-29 01:15:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-08-05 00:29:34 +00:00
|
|
|
#ifdef _WIN32
|
2004-10-26 20:33:16 +00:00
|
|
|
static void CALLBACK worker_timer(
|
|
|
|
UINT uTimerID, UINT uMsg, DWORD dwUser, DWORD dw1, DWORD dw2
|
|
|
|
) {
|
2002-08-05 00:29:34 +00:00
|
|
|
#else
|
2005-02-16 23:30:29 +00:00
|
|
|
static void worker_timer(int /*a*/) {
|
2002-08-05 00:29:34 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
if (!ready_to_checkpoint) {
|
|
|
|
time_until_checkpoint -= timer_period;
|
|
|
|
if (time_until_checkpoint <= 0) {
|
|
|
|
ready_to_checkpoint = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2004-07-09 15:29:18 +00:00
|
|
|
// handle messages from the core client
|
|
|
|
//
|
2004-06-29 01:15:51 +00:00
|
|
|
if (app_client_shm) {
|
2004-08-03 09:50:24 +00:00
|
|
|
if (options.check_heartbeat) {
|
|
|
|
handle_heartbeat_msg();
|
|
|
|
}
|
|
|
|
if (options.handle_trickle_downs) {
|
|
|
|
handle_trickle_down_msg();
|
|
|
|
}
|
|
|
|
if (options.handle_process_control) {
|
|
|
|
handle_process_control_msg();
|
|
|
|
}
|
2004-06-29 01:15:51 +00:00
|
|
|
}
|
|
|
|
|
2004-10-12 20:56:44 +00:00
|
|
|
// see if the core client has died, which means we need to die too
|
2004-07-09 15:29:18 +00:00
|
|
|
//
|
2004-08-03 09:50:24 +00:00
|
|
|
if (options.check_heartbeat && heartbeat_active) {
|
2004-10-12 20:56:44 +00:00
|
|
|
double now = dtime();
|
|
|
|
if (heartbeat_giveup_time < now) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"No heartbeat from core client for %f sec - exiting\n",
|
2004-10-13 00:21:50 +00:00
|
|
|
now - (heartbeat_giveup_time - HEARTBEAT_GIVEUP_PERIOD)
|
2004-10-12 20:56:44 +00:00
|
|
|
);
|
2004-08-03 09:50:24 +00:00
|
|
|
if (options.direct_process_action) {
|
2005-01-19 15:54:04 +00:00
|
|
|
boinc_exit(0); // NOTE: exit-status == 0! (recoverable error)
|
2004-08-03 09:50:24 +00:00
|
|
|
} else {
|
2005-01-19 15:54:04 +00:00
|
|
|
boinc_status.no_heartbeat = true;
|
2004-08-03 09:50:24 +00:00
|
|
|
}
|
2004-06-29 01:15:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2004-08-03 09:50:24 +00:00
|
|
|
if (options.send_status_msgs) {
|
|
|
|
time_until_fraction_done_update -= timer_period;
|
|
|
|
if (time_until_fraction_done_update <= 0) {
|
|
|
|
double cur_cpu;
|
2004-12-05 23:52:17 +00:00
|
|
|
boinc_worker_thread_cpu_time(cur_cpu);
|
2004-08-03 09:50:24 +00:00
|
|
|
last_wu_cpu_time = cur_cpu + initial_wu_cpu_time;
|
2004-12-05 23:52:17 +00:00
|
|
|
update_app_progress(last_wu_cpu_time, last_checkpoint_cpu_time, 0);
|
2004-08-03 09:50:24 +00:00
|
|
|
time_until_fraction_done_update = aid.fraction_done_update_period;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (options.handle_trickle_ups) {
|
|
|
|
send_trickle_up_msg();
|
2004-07-08 03:38:52 +00:00
|
|
|
}
|
2004-12-05 23:52:17 +00:00
|
|
|
#ifdef _WIN32
|
|
|
|
// poor man's CPU time accounting for Win9x
|
|
|
|
//
|
2004-12-06 00:08:04 +00:00
|
|
|
if (!boinc_status.suspended) {
|
2004-12-05 23:52:17 +00:00
|
|
|
nrunning_ticks++;
|
|
|
|
}
|
|
|
|
#endif
|
2002-08-05 00:29:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2005-01-28 01:58:11 +00:00
|
|
|
// set up a periodic timer interrupt for the worker thread.
|
|
|
|
// This is called only and always by the worker thread
|
|
|
|
//
|
2004-10-26 20:33:16 +00:00
|
|
|
int set_worker_timer() {
|
2002-08-05 00:29:34 +00:00
|
|
|
int retval=0;
|
2005-01-28 01:58:11 +00:00
|
|
|
|
2002-08-05 00:29:34 +00:00
|
|
|
#ifdef _WIN32
|
2005-01-28 01:58:11 +00:00
|
|
|
DuplicateHandle(
|
|
|
|
GetCurrentProcess(),
|
|
|
|
GetCurrentThread(),
|
|
|
|
GetCurrentProcess(),
|
|
|
|
&worker_thread_handle,
|
|
|
|
0,
|
|
|
|
FALSE,
|
|
|
|
DUPLICATE_SAME_ACCESS
|
|
|
|
);
|
2003-03-18 19:37:09 +00:00
|
|
|
|
2002-10-28 22:30:37 +00:00
|
|
|
// Use Windows multimedia timer, since it is more accurate
|
|
|
|
// than SetTimer and doesn't require an associated event loop
|
2004-03-26 18:32:57 +00:00
|
|
|
//
|
2002-11-14 00:49:24 +00:00
|
|
|
timer_id = timeSetEvent(
|
2004-10-26 20:33:16 +00:00
|
|
|
(int)(timer_period*1000), // uDelay
|
|
|
|
(int)(timer_period*1000), // uResolution
|
|
|
|
worker_timer, // lpTimeProc
|
2002-10-28 22:30:37 +00:00
|
|
|
NULL, // dwUser
|
|
|
|
TIME_PERIODIC // fuEvent
|
2003-09-05 21:26:21 +00:00
|
|
|
);
|
2005-01-28 01:58:11 +00:00
|
|
|
|
|
|
|
// lower our priority here
|
|
|
|
//
|
|
|
|
SetThreadPriority(worker_thread_handle, THREAD_PRIORITY_LOWEST);
|
2004-11-02 23:12:29 +00:00
|
|
|
#else
|
2002-08-05 00:29:34 +00:00
|
|
|
struct sigaction sa;
|
|
|
|
itimerval value;
|
2004-10-26 20:33:16 +00:00
|
|
|
sa.sa_handler = worker_timer;
|
2002-12-02 22:51:11 +00:00
|
|
|
sa.sa_flags = SA_RESTART;
|
|
|
|
retval = sigaction(SIGALRM, &sa, NULL);
|
|
|
|
if (retval) {
|
2004-10-26 20:33:16 +00:00
|
|
|
perror("boinc set_worker_timer() sigaction");
|
2002-12-02 22:51:11 +00:00
|
|
|
return retval;
|
|
|
|
}
|
2004-10-26 20:33:16 +00:00
|
|
|
value.it_value.tv_sec = (int)timer_period;
|
|
|
|
value.it_value.tv_usec = ((int)(timer_period*1000000))%1000000;
|
2002-08-05 00:29:34 +00:00
|
|
|
value.it_interval = value.it_value;
|
2002-10-19 05:22:02 +00:00
|
|
|
retval = setitimer(ITIMER_REAL, &value, NULL);
|
2002-12-02 22:51:11 +00:00
|
|
|
if (retval) {
|
2004-10-26 20:33:16 +00:00
|
|
|
perror("boinc set_worker_timer() setitimer");
|
2002-12-02 22:51:11 +00:00
|
|
|
}
|
2002-08-05 00:29:34 +00:00
|
|
|
#endif
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
2004-05-21 20:25:12 +00:00
|
|
|
static int setup_shared_mem() {
|
2003-08-21 23:44:57 +00:00
|
|
|
if (standalone) {
|
2004-02-09 05:11:05 +00:00
|
|
|
fprintf(stderr, "Standalone mode, so not using shared memory.\n");
|
2004-05-21 20:25:12 +00:00
|
|
|
return 0;
|
2003-08-21 23:44:57 +00:00
|
|
|
}
|
2004-06-16 12:07:01 +00:00
|
|
|
app_client_shm = new APP_CLIENT_SHM;
|
2003-03-17 19:24:38 +00:00
|
|
|
|
2003-03-18 19:37:09 +00:00
|
|
|
#ifdef _WIN32
|
|
|
|
char buf[256];
|
2004-08-31 21:20:31 +00:00
|
|
|
sprintf(buf, "%s%s", SHM_PREFIX, aid.shmem_seg_name);
|
2003-05-05 23:40:34 +00:00
|
|
|
hSharedMem = attach_shmem(buf, (void**)&app_client_shm->shm);
|
2003-09-05 21:26:21 +00:00
|
|
|
if (hSharedMem == NULL) {
|
2004-03-29 06:14:22 +00:00
|
|
|
delete app_client_shm;
|
2003-03-18 19:37:09 +00:00
|
|
|
app_client_shm = NULL;
|
2003-09-05 21:26:21 +00:00
|
|
|
}
|
2004-05-21 20:25:12 +00:00
|
|
|
#else
|
2004-09-01 20:12:50 +00:00
|
|
|
if (attach_shmem(aid.shmem_seg_name, (void**)&app_client_shm->shm)) {
|
2004-03-29 06:14:22 +00:00
|
|
|
delete app_client_shm;
|
2003-03-17 19:24:38 +00:00
|
|
|
app_client_shm = NULL;
|
|
|
|
}
|
|
|
|
#endif
|
2004-05-21 20:25:12 +00:00
|
|
|
if (app_client_shm == NULL) return -1;
|
|
|
|
return 0;
|
2003-03-17 19:24:38 +00:00
|
|
|
}
|
|
|
|
|
2004-07-06 04:01:15 +00:00
|
|
|
int boinc_send_trickle_up(char* variety, char* p) {
|
2004-08-03 09:50:24 +00:00
|
|
|
if (!options.handle_trickle_ups) return ERR_NO_OPTION;
|
2004-04-23 00:05:16 +00:00
|
|
|
FILE* f = boinc_fopen(TRICKLE_UP_FILENAME, "wb");
|
2004-01-04 06:48:40 +00:00
|
|
|
if (!f) return ERR_FOPEN;
|
2004-07-06 04:01:15 +00:00
|
|
|
fprintf(f, "<variety>%s</variety>\n", variety);
|
2004-01-04 06:48:40 +00:00
|
|
|
size_t n = fwrite(p, strlen(p), 1, f);
|
|
|
|
fclose(f);
|
|
|
|
if (n != 1) return ERR_WRITE;
|
2004-04-23 00:05:16 +00:00
|
|
|
have_new_trickle_up = true;
|
2004-01-04 06:48:40 +00:00
|
|
|
return 0;
|
|
|
|
}
|
2003-11-06 20:27:07 +00:00
|
|
|
|
2004-12-22 15:02:03 +00:00
|
|
|
// logically this should be a bool. But it need to be an int to be
|
|
|
|
// compatible with C API.
|
|
|
|
int boinc_time_to_checkpoint() {
|
2003-11-06 20:27:07 +00:00
|
|
|
|
|
|
|
// If the application has received a quit request it should checkpoint
|
|
|
|
//
|
2005-01-19 15:54:04 +00:00
|
|
|
if (ready_to_checkpoint) {
|
|
|
|
return 1;
|
2003-11-06 20:27:07 +00:00
|
|
|
}
|
|
|
|
|
2004-12-22 15:02:03 +00:00
|
|
|
return 0;
|
2003-11-06 20:27:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int boinc_checkpoint_completed() {
|
2004-12-05 23:52:17 +00:00
|
|
|
double cur_cpu;
|
|
|
|
boinc_calling_thread_cpu_time(cur_cpu);
|
2003-11-06 20:27:07 +00:00
|
|
|
last_wu_cpu_time = cur_cpu + aid.wu_cpu_time;
|
|
|
|
last_checkpoint_cpu_time = last_wu_cpu_time;
|
2004-12-05 23:52:17 +00:00
|
|
|
update_app_progress(last_checkpoint_cpu_time, last_checkpoint_cpu_time, 0);
|
2003-11-06 20:27:07 +00:00
|
|
|
ready_to_checkpoint = false;
|
|
|
|
time_until_checkpoint = aid.checkpoint_period;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int boinc_fraction_done(double x) {
|
|
|
|
fraction_done = x;
|
|
|
|
return 0;
|
|
|
|
}
|
2004-07-02 05:06:30 +00:00
|
|
|
|
|
|
|
bool boinc_receive_trickle_down(char* buf, int len) {
|
|
|
|
std::string filename;
|
2004-07-06 04:01:15 +00:00
|
|
|
char path[256];
|
2004-08-03 09:50:24 +00:00
|
|
|
|
|
|
|
if (!options.handle_trickle_downs) return false;
|
|
|
|
|
2004-07-02 05:06:30 +00:00
|
|
|
if (have_trickle_down) {
|
2004-08-03 09:50:24 +00:00
|
|
|
relative_to_absolute("", path);
|
2004-07-06 04:01:15 +00:00
|
|
|
DirScanner dirscan(path);
|
|
|
|
fprintf(stderr, "starting scan of %s\n", path);
|
2004-07-02 05:06:30 +00:00
|
|
|
while (dirscan.scan(filename)) {
|
2004-07-06 04:01:15 +00:00
|
|
|
fprintf(stderr, "scan: %s\n", filename.c_str());
|
2004-07-02 05:06:30 +00:00
|
|
|
if (strstr(filename.c_str(), "trickle_down")) {
|
|
|
|
strncpy(buf, filename.c_str(), len);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
2004-10-22 05:46:25 +00:00
|
|
|
have_trickle_down = false;
|
2004-07-02 05:06:30 +00:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2005-01-02 18:29:53 +00:00
|
|
|
const char *BOINC_RCSID_0fa0410386 = "$Id$";
|