mirror of https://github.com/BOINC/boinc.git
1047 lines
30 KiB
C
1047 lines
30 KiB
C
// Berkeley Open Infrastructure for Network Computing
|
|
// http://boinc.berkeley.edu
|
|
// Copyright (C) 2005 University of California
|
|
//
|
|
// This is free software; you can redistribute it and/or
|
|
// modify it under the terms of the GNU Lesser General Public
|
|
// License as published by the Free Software Foundation;
|
|
// either version 2.1 of the License, or (at your option) any later version.
|
|
//
|
|
// This software is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
// See the GNU Lesser General Public License for more details.
|
|
//
|
|
// To view the GNU Lesser General Public License visit
|
|
// http://www.gnu.org/copyleft/lesser.html
|
|
// or write to the Free Software Foundation, Inc.,
|
|
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
// Code that's in the BOINC app library (but NOT in the core client)
|
|
// graphics-related code goes in graphics_api.C, not here
|
|
|
|
#if defined(_WIN32) && !defined(__STDWX_H__) && !defined(_BOINC_WIN_) && !defined(_AFX_STDAFX_H_)
|
|
#include "boinc_win.h"
|
|
#endif
|
|
|
|
#ifdef _WIN32
|
|
#include "version.h"
|
|
#else
|
|
#include "config.h"
|
|
#include <cstdlib>
|
|
#include <cstdio>
|
|
#include <cstdarg>
|
|
#include <sys/types.h>
|
|
#include <errno.h>
|
|
#include <unistd.h>
|
|
#include <sys/time.h>
|
|
#include <sys/resource.h>
|
|
#include <pthread.h>
|
|
#ifndef __EMX__
|
|
#include <sched.h>
|
|
#endif
|
|
using namespace std;
|
|
#endif
|
|
|
|
#include "diagnostics.h"
|
|
#include "parse.h"
|
|
#include "shmem.h"
|
|
#include "util.h"
|
|
#include "filesys.h"
|
|
#include "mem_usage.h"
|
|
#include "error_numbers.h"
|
|
#include "app_ipc.h"
|
|
|
|
#include "boinc_api.h"
|
|
|
|
#ifdef __APPLE__
|
|
#include "mac_backtrace.h"
|
|
#endif
|
|
|
|
// The BOINC API has various functions:
|
|
// - check heartbeat from core client, exit if none
|
|
// - handle trickle up/down messages
|
|
// - report CPU time and fraction done to the core client.
|
|
|
|
// Implementation notes:
|
|
// Unix: getting CPU time and suspend/resume have to be done
|
|
// in the worker thread, so we use a SIGALRM signal handler.
|
|
// However, many library functions and system calls
|
|
// are not "asynch signal safe": see, e.g.
|
|
// http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html#tag_02_04_03
|
|
// (e.g. sprintf() in a signal handler hangs Mac OS X)
|
|
// so we do as little as possible in the signal handler,
|
|
// and do the rest in a separate "timer thread".
|
|
|
|
// Terminology:
|
|
// The processing of a result can be divided
|
|
// into multiple "episodes" (executions of the app),
|
|
// each of which resumes from the checkpointed state of the previous episode.
|
|
// Unless otherwise noted, "CPU time" refers to the sum over all episodes
|
|
// (not counting the part after the last checkpoint in an episode).
|
|
|
|
// All variables that are accessed by two threads (i.e. worker and timer)
|
|
// MUST be declared volatile.
|
|
|
|
static APP_INIT_DATA aid;
|
|
static FILE_LOCK file_lock;
|
|
APP_CLIENT_SHM* app_client_shm = 0;
|
|
static volatile int time_until_checkpoint;
|
|
// time until enable checkpoint
|
|
static volatile int time_until_fraction_done_update;
|
|
// time until report fraction done to core client
|
|
static volatile double fraction_done;
|
|
static volatile double last_checkpoint_cpu_time;
|
|
static volatile bool ready_to_checkpoint = false;
|
|
static volatile bool in_critical_section;
|
|
static volatile double last_wu_cpu_time;
|
|
static volatile bool standalone = false;
|
|
static volatile double initial_wu_cpu_time;
|
|
static volatile bool have_new_trickle_up = false;
|
|
static volatile bool have_trickle_down = true;
|
|
// on first call, scan slot dir for msgs
|
|
static volatile int heartbeat_giveup_time;
|
|
// interrupt count value at which to give up on core client
|
|
static volatile bool heartbeat_active;
|
|
// if false, suppress heartbeat mechanism
|
|
#ifdef _WIN32
|
|
static volatile int nrunning_ticks = 0;
|
|
#endif
|
|
static volatile int interrupt_count = 0;
|
|
// number of timer interrupts
|
|
// used to measure elapsed time in a way that's
|
|
// not affected by user changing system clock,
|
|
// and that doesn't have big jumps around hibernation
|
|
static double fpops_per_cpu_sec = 0;
|
|
static double fpops_cumulative = 0;
|
|
static double intops_per_cpu_sec = 0;
|
|
static double intops_cumulative = 0;
|
|
static int want_network = 0;
|
|
static int have_network = 1;
|
|
bool g_sleep = false;
|
|
// simulate unresponsive app by setting to true (debugging)
|
|
static FUNC_PTR timer_callback = 0;
|
|
|
|
#define TIMER_PERIOD 1
|
|
// period of worker-thread timer interrupts.
|
|
// This determines the resolution of fraction done and CPU time reporting
|
|
// to the core client, and of checkpoint enabling.
|
|
// It doesn't influence graphics, so 1 sec is enough.
|
|
#define HEARTBEAT_GIVEUP_PERIOD (30/TIMER_PERIOD)
|
|
// quit if no heartbeat from core in this #interrupts
|
|
#define HEARTBEAT_TIMEOUT_PERIOD 35
|
|
// quit if we cannot aquire slot resource in this #secs
|
|
|
|
#ifdef _WIN32
|
|
static HANDLE hSharedMem;
|
|
HANDLE worker_thread_handle;
|
|
// used to suspend worker thread, and to measure its CPU time
|
|
#else
|
|
static pthread_t timer_thread_handle;
|
|
static struct rusage worker_thread_ru;
|
|
#endif
|
|
|
|
static BOINC_OPTIONS options;
|
|
static volatile BOINC_STATUS boinc_status;
|
|
|
|
// vars related to intermediate file upload
|
|
struct UPLOAD_FILE_STATUS {
|
|
std::string name;
|
|
int status;
|
|
};
|
|
static bool have_new_upload_file;
|
|
static std::vector<UPLOAD_FILE_STATUS> upload_file_status;
|
|
|
|
static int setup_shared_mem() {
|
|
if (standalone) {
|
|
fprintf(stderr, "Standalone mode, so not using shared memory.\n");
|
|
return 0;
|
|
}
|
|
app_client_shm = new APP_CLIENT_SHM;
|
|
|
|
#ifdef _WIN32
|
|
char buf[256];
|
|
sprintf(buf, "%s%s", SHM_PREFIX, aid.shmem_seg_name);
|
|
hSharedMem = attach_shmem(buf, (void**)&app_client_shm->shm);
|
|
if (hSharedMem == NULL) {
|
|
delete app_client_shm;
|
|
app_client_shm = NULL;
|
|
}
|
|
#else
|
|
#ifdef USE_FILE_MAPPED_SHMEM
|
|
if (create_shmem(
|
|
MMAPPED_FILE_NAME, sizeof(SHARED_MEM), (void**)&app_client_shm->shm
|
|
)) {
|
|
delete app_client_shm;
|
|
app_client_shm = NULL;
|
|
}
|
|
#else
|
|
if (attach_shmem(aid.shmem_seg_name, (void**)&app_client_shm->shm)) {
|
|
delete app_client_shm;
|
|
app_client_shm = NULL;
|
|
}
|
|
#endif // ! USE_FILE_MAPPED_SHMEM
|
|
#endif // ! _WIN32
|
|
if (app_client_shm == NULL) return -1;
|
|
return 0;
|
|
}
|
|
|
|
// Return CPU time of worker thread (and optionally others)
|
|
// This may be called from other threads
|
|
//
|
|
double boinc_worker_thread_cpu_time() {
|
|
static double last_cpu=0;
|
|
static time_t last_time=0;
|
|
time_t now = time(0);
|
|
double cpu, time_diff = (double)(now - last_time);
|
|
#ifdef _WIN32
|
|
int retval;
|
|
if (options.all_threads_cpu_time) {
|
|
retval = boinc_process_cpu_time(cpu);
|
|
} else {
|
|
retval = boinc_thread_cpu_time(worker_thread_handle, cpu);
|
|
}
|
|
if (retval) {
|
|
cpu = nrunning_ticks * TIMER_PERIOD; // for Win9x
|
|
}
|
|
#else
|
|
if (pthread_mutex_lock(&getrusage_mutex)) return last_cpu;
|
|
cpu = (double)worker_thread_ru.ru_utime.tv_sec
|
|
+ (((double)worker_thread_ru.ru_utime.tv_usec)/1000000.0);
|
|
cpu += (double)worker_thread_ru.ru_stime.tv_sec
|
|
+ (((double)worker_thread_ru.ru_stime.tv_usec)/1000000.0);
|
|
pthread_mutex_unlock(&getrusage_mutex);
|
|
#endif
|
|
double cpu_diff = cpu - last_cpu;
|
|
if (cpu_diff>(time_diff + 1)) {
|
|
// fprintf(stderr,"CPU time incrementing faster than real time. Correcting.\n");
|
|
cpu = last_cpu + time_diff;
|
|
}
|
|
if (time_diff) {
|
|
last_cpu = cpu;
|
|
last_time = now;
|
|
}
|
|
return cpu;
|
|
}
|
|
|
|
// communicate to the core client (via shared mem)
|
|
// the current CPU time and fraction done
|
|
//
|
|
static bool update_app_progress(double cpu_t, double cp_cpu_t) {
|
|
char msg_buf[MSG_CHANNEL_SIZE], buf[256];
|
|
|
|
if (standalone) return true;
|
|
|
|
sprintf(msg_buf,
|
|
"<current_cpu_time>%.15e</current_cpu_time>\n"
|
|
"<checkpoint_cpu_time>%.15e</checkpoint_cpu_time>\n",
|
|
cpu_t, cp_cpu_t
|
|
);
|
|
if (want_network) {
|
|
strcat(msg_buf, "<want_network>1</want_network>\n");
|
|
}
|
|
if (fraction_done >= 0) {
|
|
double range = aid.fraction_done_end - aid.fraction_done_start;
|
|
double fdone = aid.fraction_done_start + fraction_done*range;
|
|
sprintf(buf, "<fraction_done>%2.8f</fraction_done>\n", fdone);
|
|
strcat(msg_buf, buf);
|
|
}
|
|
if (fpops_per_cpu_sec) {
|
|
sprintf(buf, "<fpops_per_cpu_sec>%f</fpops_per_cpu_sec>\n", fpops_per_cpu_sec);
|
|
strcat(msg_buf, buf);
|
|
}
|
|
if (fpops_cumulative) {
|
|
sprintf(buf, "<fpops_cumulative>%f</fpops_cumulative>\n", fpops_cumulative);
|
|
strcat(msg_buf, buf);
|
|
}
|
|
if (intops_per_cpu_sec) {
|
|
sprintf(buf, "<intops_per_cpu_sec>%f</intops_per_cpu_sec>\n", intops_per_cpu_sec);
|
|
strcat(msg_buf, buf);
|
|
}
|
|
if (intops_cumulative) {
|
|
sprintf(buf, "<intops_cumulative>%f</intops_cumulative>\n", intops_cumulative);
|
|
strcat(msg_buf, buf);
|
|
}
|
|
return app_client_shm->shm->app_status.send_msg(msg_buf);
|
|
}
|
|
|
|
// the following 2 functions are used for apps without graphics
|
|
//
|
|
int boinc_init() {
|
|
int retval;
|
|
if (!diagnostics_is_initialized()) {
|
|
retval = boinc_init_diagnostics(BOINC_DIAG_DEFAULTS);
|
|
if (retval) return retval;
|
|
}
|
|
boinc_options_defaults(options);
|
|
return boinc_init_options(&options);
|
|
}
|
|
|
|
int boinc_init_options(BOINC_OPTIONS* opt) {
|
|
int retval;
|
|
if (!diagnostics_is_initialized()) {
|
|
retval = boinc_init_diagnostics(BOINC_DIAG_DEFAULTS);
|
|
if (retval) return retval;
|
|
}
|
|
retval = boinc_init_options_general(*opt);
|
|
if (retval) return retval;
|
|
return set_worker_timer();
|
|
}
|
|
|
|
// the following can be called by either graphics or worker thread
|
|
//
|
|
int boinc_init_options_general(BOINC_OPTIONS& opt) {
|
|
int retval;
|
|
options = opt;
|
|
|
|
boinc_status.no_heartbeat = false;
|
|
boinc_status.suspended = false;
|
|
boinc_status.quit_request = false;
|
|
boinc_status.abort_request = false;
|
|
|
|
if (options.main_program) {
|
|
// make sure we're the only app running in this slot
|
|
//
|
|
retval = file_lock.lock(LOCKFILE);
|
|
if (retval) {
|
|
// give any previous occupant a chance to timeout and exit
|
|
//
|
|
boinc_sleep(HEARTBEAT_TIMEOUT_PERIOD);
|
|
retval = file_lock.lock(LOCKFILE);
|
|
}
|
|
if (retval) {
|
|
fprintf(stderr, "Can't acquire lockfile - exiting\n");
|
|
boinc_exit(0); // not un-recoverable ==> status=0
|
|
}
|
|
}
|
|
|
|
retval = boinc_parse_init_data_file();
|
|
if (retval) {
|
|
standalone = true;
|
|
} else {
|
|
retval = setup_shared_mem();
|
|
if (retval) {
|
|
fprintf(stderr,
|
|
"Can't set up shared mem: %d\n"
|
|
"Will run in standalone mode.\n",
|
|
retval
|
|
);
|
|
standalone = true;
|
|
}
|
|
}
|
|
|
|
// copy the WU CPU time to a separate var,
|
|
// since we may reread the structure again later.
|
|
//
|
|
initial_wu_cpu_time = aid.wu_cpu_time;
|
|
|
|
// the following may not be needed, but do it anyway
|
|
//
|
|
fraction_done = -1;
|
|
time_until_checkpoint = (int)aid.checkpoint_period;
|
|
last_checkpoint_cpu_time = aid.wu_cpu_time;
|
|
time_until_fraction_done_update = (int)aid.fraction_done_update_period;
|
|
last_wu_cpu_time = aid.wu_cpu_time;
|
|
|
|
heartbeat_active = !standalone;
|
|
heartbeat_giveup_time = interrupt_count + HEARTBEAT_GIVEUP_PERIOD;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int boinc_get_status(BOINC_STATUS *s) {
|
|
s->no_heartbeat = boinc_status.no_heartbeat;
|
|
s->suspended = boinc_status.suspended;
|
|
s->quit_request = boinc_status.quit_request;
|
|
s->reread_init_data_file = boinc_status.reread_init_data_file;
|
|
s->abort_request = boinc_status.abort_request;
|
|
s->working_set_size = boinc_status.working_set_size;
|
|
s->max_working_set_size = boinc_status.max_working_set_size;
|
|
return 0;
|
|
}
|
|
|
|
// if we have any new trickle-ups or file upload requests,
|
|
// send a message describing them
|
|
//
|
|
static void send_trickle_up_msg() {
|
|
char buf[MSG_CHANNEL_SIZE];
|
|
BOINCINFO("Sending Trickle Up Message");
|
|
strcpy(buf, "");
|
|
if (have_new_trickle_up) {
|
|
strcat(buf, "<have_new_trickle_up/>\n");
|
|
}
|
|
if (have_new_upload_file) {
|
|
strcat(buf, "<have_new_upload_file/>\n");
|
|
}
|
|
if (strlen(buf)) {
|
|
if (app_client_shm->shm->trickle_up.send_msg(buf)) {
|
|
have_new_trickle_up = false;
|
|
have_new_upload_file = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// NOTE: a non-zero status tells the core client that we're exiting with
|
|
// an "unrecoverable error", which will be reported back to server.
|
|
// A zero exit-status tells the client we've successfully finished the result.
|
|
//
|
|
// This function can be called from any thread. (Timer, Worker, and Graphics)
|
|
//
|
|
int boinc_finish(int status) {
|
|
if (options.send_status_msgs) {
|
|
double total_cpu;
|
|
total_cpu = boinc_worker_thread_cpu_time();
|
|
total_cpu += initial_wu_cpu_time;
|
|
fraction_done = 1;
|
|
|
|
// NOTE: the app_status slot may already contain a message.
|
|
// So retry a couple of times.
|
|
//
|
|
for (int i=0; i<3; i++) {
|
|
if (update_app_progress(total_cpu, total_cpu)) break;
|
|
boinc_sleep(1.0);
|
|
}
|
|
}
|
|
if (options.handle_trickle_ups) {
|
|
send_trickle_up_msg();
|
|
}
|
|
if (options.main_program && status==0) {
|
|
FILE* f = fopen(BOINC_FINISH_CALLED_FILE, "w");
|
|
if (f) fclose(f);
|
|
}
|
|
if (options.send_status_msgs) {
|
|
aid.wu_cpu_time = last_checkpoint_cpu_time;
|
|
boinc_write_init_data_file(aid);
|
|
}
|
|
|
|
boinc_exit(status);
|
|
|
|
return 0; // never reached
|
|
}
|
|
|
|
|
|
// unlock the lockfile and call the appropriate exit function
|
|
// This is called from the worker, timer, and graphics threads.
|
|
//
|
|
void boinc_exit(int status) {
|
|
// Unlock the lock file
|
|
//
|
|
file_lock.unlock(LOCKFILE);
|
|
|
|
// flush all the output buffers
|
|
//
|
|
fflush(NULL);
|
|
|
|
// Cleanup the diagnostics allocations and stuff. Dump any memory
|
|
// leaks if it is a debug build.
|
|
//
|
|
boinc_finish_diag();
|
|
|
|
// various platforms have various issues with shutting down
|
|
// a process while an unspecified number of threads are still
|
|
// executing or triggering endless exit()/atexit() loops. Use
|
|
// alternate methods to shutdown the application on those
|
|
// platforms.
|
|
BOINCINFO("Exit Status: %d", status);
|
|
#if defined(_WIN32)
|
|
// Halts all the threads and then cleans up.
|
|
TerminateProcess(GetCurrentProcess(), status);
|
|
#elif defined(__APPLE_CC__)
|
|
// stops endless exit()/atexit() loops.
|
|
_exit(status);
|
|
#else
|
|
// POSIX exit call.
|
|
exit(status);
|
|
#endif
|
|
}
|
|
|
|
int boinc_is_standalone() {
|
|
if (standalone) return 1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
// parse the init data file.
|
|
// This is done at startup, and also if a "reread prefs" message is received
|
|
//
|
|
int boinc_parse_init_data_file() {
|
|
FILE* f;
|
|
int retval;
|
|
|
|
// in principle should free project_preferences here if it's nonzero
|
|
|
|
memset(&aid, 0, sizeof(aid));
|
|
strcpy(aid.user_name, "");
|
|
strcpy(aid.team_name, "");
|
|
aid.wu_cpu_time = 0;
|
|
aid.user_total_credit = 0;
|
|
aid.user_expavg_credit = 0;
|
|
aid.host_total_credit = 0;
|
|
aid.host_expavg_credit = 0;
|
|
aid.checkpoint_period = DEFAULT_CHECKPOINT_PERIOD;
|
|
aid.fraction_done_update_period = DEFAULT_FRACTION_DONE_UPDATE_PERIOD;
|
|
|
|
if (!boinc_file_exists(INIT_DATA_FILE)) {
|
|
fprintf(stderr,
|
|
"Can't open init data file - running in standalone mode\n"
|
|
);
|
|
return ERR_FOPEN;
|
|
}
|
|
f = boinc_fopen(INIT_DATA_FILE, "r");
|
|
retval = parse_init_data_file(f, aid);
|
|
fclose(f);
|
|
if (retval) {
|
|
fprintf(stderr,
|
|
"Can't parse init data file - running in standalone mode\n"
|
|
);
|
|
return retval;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int boinc_write_init_data_file(APP_INIT_DATA& x) {
|
|
FILE* f = boinc_fopen(INIT_DATA_FILE, "w");
|
|
if (!f) return ERR_FOPEN;
|
|
int retval = write_init_data_file(f, x);
|
|
fclose(f);
|
|
return retval;
|
|
}
|
|
|
|
int boinc_report_app_status(
|
|
double cpu_time,
|
|
double checkpoint_cpu_time,
|
|
double _fraction_done
|
|
) {
|
|
char msg_buf[MSG_CHANNEL_SIZE];
|
|
if (standalone) return 0;
|
|
|
|
sprintf(msg_buf,
|
|
"<current_cpu_time>%10.4f</current_cpu_time>\n"
|
|
"<checkpoint_cpu_time>%.15e</checkpoint_cpu_time>\n"
|
|
"<fraction_done>%2.8f</fraction_done>\n",
|
|
cpu_time,
|
|
checkpoint_cpu_time,
|
|
_fraction_done
|
|
);
|
|
app_client_shm->shm->app_status.send_msg(msg_buf);
|
|
return 0;
|
|
}
|
|
|
|
int boinc_get_init_data_p(APP_INIT_DATA* app_init_data) {
|
|
*app_init_data = aid;
|
|
return 0;
|
|
}
|
|
|
|
int boinc_get_init_data(APP_INIT_DATA& app_init_data) {
|
|
app_init_data = aid;
|
|
return 0;
|
|
}
|
|
|
|
|
|
// this can be called from the graphics thread
|
|
//
|
|
int boinc_wu_cpu_time(double& cpu_t) {
|
|
cpu_t = last_wu_cpu_time;
|
|
return 0;
|
|
}
|
|
|
|
// this can be called from the graphics thread
|
|
//
|
|
int suspend_activities() {
|
|
BOINCINFO("Received Suspend Message");
|
|
//fprintf(stderr, "suspending; %f %d\n", last_wu_cpu_time, options.direct_process_action);
|
|
#ifdef _WIN32
|
|
if (options.direct_process_action) {
|
|
// in Windows this is called from a separate "timer thread",
|
|
// and Windows lets us suspend the worker thread
|
|
//
|
|
SuspendThread(worker_thread_handle);
|
|
}
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
// this can be called from the graphics thread
|
|
//
|
|
int resume_activities() {
|
|
BOINCINFO("Received Resume Message");
|
|
#ifdef _WIN32
|
|
//fprintf(stderr, "resuming; %f %d\n", last_wu_cpu_time, options.direct_process_action);
|
|
if (options.direct_process_action) {
|
|
// in Windows this is called from a separate "timer thread",
|
|
// and Windows lets us resume the worker thread
|
|
//
|
|
ResumeThread(worker_thread_handle);
|
|
}
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
// this can be called from the graphics thread
|
|
//
|
|
int restore_activities() {
|
|
int retval;
|
|
if (boinc_status.suspended) {
|
|
retval = suspend_activities();
|
|
} else {
|
|
retval = resume_activities();
|
|
}
|
|
return retval;
|
|
}
|
|
|
|
static void handle_heartbeat_msg() {
|
|
char buf[MSG_CHANNEL_SIZE];
|
|
double dtemp;
|
|
|
|
if (app_client_shm->shm->heartbeat.get_msg(buf)) {
|
|
if (match_tag(buf, "<heartbeat/>")) {
|
|
heartbeat_giveup_time = interrupt_count + HEARTBEAT_GIVEUP_PERIOD;
|
|
}
|
|
if (match_tag(buf, "<enable_heartbeat/>")) {
|
|
BOINCINFO("Enabling heartbeat");
|
|
heartbeat_active = true;
|
|
}
|
|
if (match_tag(buf, "<disable_heartbeat/>")) {
|
|
BOINCINFO("Disabling heartbeat");
|
|
heartbeat_active = false;
|
|
}
|
|
if (parse_double(buf, "<wss>", dtemp)) {
|
|
boinc_status.working_set_size = dtemp;
|
|
}
|
|
if (parse_double(buf, "<max_wss>", dtemp)) {
|
|
boinc_status.max_working_set_size = dtemp;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void handle_upload_file_status() {
|
|
char path[256], buf[256], log_name[256], *p;
|
|
std::string filename;
|
|
int status;
|
|
|
|
relative_to_absolute("", path);
|
|
DirScanner dirscan(path);
|
|
while (dirscan.scan(filename)) {
|
|
strcpy(buf, filename.c_str());
|
|
if (strstr(buf, UPLOAD_FILE_STATUS_PREFIX) != buf) continue;
|
|
strcpy(log_name, buf+strlen(UPLOAD_FILE_STATUS_PREFIX));
|
|
FILE* f = boinc_fopen(filename.c_str(), "r");
|
|
if (!f) {
|
|
fprintf(stderr, "handle_file_upload_status: can't open %s\n", filename.c_str());
|
|
continue;
|
|
}
|
|
p = fgets(buf, 256, f);
|
|
fclose(f);
|
|
if (p && parse_int(buf, "<status>", status)) {
|
|
UPLOAD_FILE_STATUS uf;
|
|
uf.name = std::string(log_name);
|
|
uf.status = status;
|
|
upload_file_status.push_back(uf);
|
|
} else {
|
|
fprintf(stderr, "handle_upload_file_status: can't parse %s\n", buf);
|
|
}
|
|
}
|
|
}
|
|
|
|
// handle trickle and file upload messages
|
|
//
|
|
static void handle_trickle_down_msg() {
|
|
char buf[MSG_CHANNEL_SIZE];
|
|
if (app_client_shm->shm->trickle_down.get_msg(buf)) {
|
|
BOINCINFO("Received Trickle Down Message");
|
|
if (match_tag(buf, "<have_trickle_down/>")) {
|
|
have_trickle_down = true;
|
|
}
|
|
if (match_tag(buf, "<upload_file_status/>")) {
|
|
handle_upload_file_status();
|
|
}
|
|
}
|
|
}
|
|
|
|
static void handle_process_control_msg() {
|
|
char buf[MSG_CHANNEL_SIZE];
|
|
if (app_client_shm->shm->process_control_request.get_msg(buf)) {
|
|
//fprintf(stderr, "%f: got %s\n", dtime(), buf);
|
|
if (match_tag(buf, "<suspend/>")) {
|
|
boinc_status.suspended = true;
|
|
suspend_activities();
|
|
}
|
|
|
|
if (match_tag(buf, "<resume/>")) {
|
|
boinc_status.suspended = false;
|
|
resume_activities();
|
|
}
|
|
|
|
if (match_tag(buf, "<quit/>")) {
|
|
BOINCINFO("Received quit message");
|
|
boinc_status.quit_request = true;
|
|
if (options.direct_process_action) {
|
|
boinc_exit(0);
|
|
}
|
|
}
|
|
if (match_tag(buf, "<abort/>")) {
|
|
BOINCINFO("Received abort message");
|
|
boinc_status.abort_request = true;
|
|
if (options.direct_process_action) {
|
|
diagnostics_set_aborted_via_gui();
|
|
#if defined(_WIN32)
|
|
// Cause a controlled assert and dump the callstacks.
|
|
DebugBreak();
|
|
#elif defined(__APPLE__)
|
|
PrintBacktrace();
|
|
#endif
|
|
boinc_exit(EXIT_ABORTED_VIA_GUI);
|
|
}
|
|
}
|
|
if (match_tag(buf, "<reread_app_info/>")) {
|
|
boinc_status.reread_init_data_file = true;
|
|
}
|
|
if (match_tag(buf, "<network_available/>")) {
|
|
have_network = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
// once-a-second timer.
|
|
// Runs in a separate thread (not the worker thread)
|
|
//
|
|
static void timer_handler() {
|
|
if (g_sleep) return;
|
|
interrupt_count++;
|
|
if (!ready_to_checkpoint) {
|
|
time_until_checkpoint -= TIMER_PERIOD;
|
|
if (time_until_checkpoint <= 0) {
|
|
ready_to_checkpoint = true;
|
|
}
|
|
}
|
|
|
|
// handle messages from the core client
|
|
//
|
|
if (app_client_shm) {
|
|
if (options.check_heartbeat) {
|
|
handle_heartbeat_msg();
|
|
}
|
|
if (options.handle_trickle_downs) {
|
|
handle_trickle_down_msg();
|
|
}
|
|
if (!in_critical_section && options.handle_process_control) {
|
|
handle_process_control_msg();
|
|
}
|
|
}
|
|
|
|
// see if the core client has died, which means we need to die too
|
|
// (unless we're in a critical section)
|
|
//
|
|
if (!in_critical_section && options.check_heartbeat && heartbeat_active) {
|
|
if (heartbeat_giveup_time < interrupt_count) {
|
|
fprintf(stderr,
|
|
"No heartbeat from core client for %d sec - exiting\n",
|
|
interrupt_count - (heartbeat_giveup_time - HEARTBEAT_GIVEUP_PERIOD)
|
|
);
|
|
if (options.direct_process_action) {
|
|
boinc_exit(0);
|
|
} else {
|
|
boinc_status.no_heartbeat = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// don't bother reporting CPU time etc. if we're suspended
|
|
//
|
|
if (options.send_status_msgs && !boinc_status.suspended) {
|
|
time_until_fraction_done_update -= TIMER_PERIOD;
|
|
if (time_until_fraction_done_update <= 0) {
|
|
double cur_cpu;
|
|
cur_cpu = boinc_worker_thread_cpu_time();
|
|
last_wu_cpu_time = cur_cpu + initial_wu_cpu_time;
|
|
update_app_progress(last_wu_cpu_time, last_checkpoint_cpu_time);
|
|
time_until_fraction_done_update = (int)aid.fraction_done_update_period;
|
|
}
|
|
}
|
|
if (options.handle_trickle_ups) {
|
|
send_trickle_up_msg();
|
|
}
|
|
if (timer_callback) {
|
|
timer_callback();
|
|
}
|
|
}
|
|
|
|
#ifdef _WIN32
|
|
|
|
static HANDLE timer_quit_event;
|
|
|
|
UINT WINAPI timer_thread(void *) {
|
|
|
|
diagnostics_set_thread_name("Timer");
|
|
while (1) {
|
|
Sleep(TIMER_PERIOD*1000);
|
|
timer_handler();
|
|
|
|
// poor man's CPU time accounting for Win9x
|
|
//
|
|
if (!boinc_status.suspended) {
|
|
nrunning_ticks++;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#else
|
|
|
|
void* timer_thread(void*) {
|
|
block_sigalrm();
|
|
while(1) {
|
|
boinc_sleep(TIMER_PERIOD);
|
|
timer_handler();
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// This SIGALRM handler gets handled only by the worker thread.
|
|
// It gets CPU time and implements sleeping.
|
|
// It must call only signal-safe functions, and must not do FP math
|
|
//
|
|
void worker_signal_handler(int) {
|
|
if (!pthread_mutex_trylock(&getrusage_mutex)) {
|
|
getrusage(RUSAGE_SELF, &worker_thread_ru);
|
|
pthread_mutex_unlock(&getrusage_mutex);
|
|
}
|
|
if (options.direct_process_action) {
|
|
while (boinc_status.suspended && !in_critical_section) {
|
|
sleep(1); // don't use boinc_sleep() because it does FP math
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
// Called from the worker thread;
|
|
// create a thread to do timer-related things.
|
|
//
|
|
int set_worker_timer() {
|
|
int retval=0;
|
|
|
|
#ifdef _WIN32
|
|
|
|
// get the worker thread handle
|
|
//
|
|
DuplicateHandle(
|
|
GetCurrentProcess(),
|
|
GetCurrentThread(),
|
|
GetCurrentProcess(),
|
|
&worker_thread_handle,
|
|
0,
|
|
FALSE,
|
|
DUPLICATE_SAME_ACCESS
|
|
);
|
|
|
|
// Initialize the worker thread info for diagnostic purposes.
|
|
//
|
|
diagnostics_set_thread_name("Worker");
|
|
diagnostics_set_thread_worker();
|
|
|
|
// Create the timer thread
|
|
//
|
|
uintptr_t thread;
|
|
UINT uiThreadId;
|
|
thread = _beginthreadex(
|
|
NULL,
|
|
16384, // stack size
|
|
timer_thread,
|
|
0,
|
|
0,
|
|
&uiThreadId
|
|
);
|
|
|
|
if (!thread) {
|
|
fprintf(stderr, "set_worker_timer(): _beginthreadex() failed, errno %d\n", errno);
|
|
retval = errno;
|
|
return retval;
|
|
}
|
|
|
|
// lower our priority
|
|
//
|
|
SetThreadPriority(worker_thread_handle, THREAD_PRIORITY_IDLE);
|
|
|
|
#else
|
|
pthread_attr_t thread_attrs;
|
|
pthread_attr_init(&thread_attrs);
|
|
pthread_attr_setstacksize(&thread_attrs, 16384);
|
|
retval = pthread_create(&timer_thread_handle, &thread_attrs, timer_thread, NULL);
|
|
if (retval) {
|
|
fprintf(stderr, "set_worker_timer(): pthread_create(): %d", retval);
|
|
return retval;
|
|
}
|
|
|
|
// set up a periodic SIGALRM, to be hanled by the worker thread
|
|
//
|
|
struct sigaction sa;
|
|
itimerval value;
|
|
sa.sa_handler = worker_signal_handler;
|
|
sa.sa_flags = SA_RESTART;
|
|
sigemptyset(&sa.sa_mask);
|
|
retval = sigaction(SIGALRM, &sa, NULL);
|
|
if (retval) {
|
|
perror("boinc set_worker_timer() sigaction");
|
|
return retval;
|
|
}
|
|
value.it_value.tv_sec = TIMER_PERIOD;
|
|
value.it_value.tv_usec = 0;
|
|
value.it_interval = value.it_value;
|
|
retval = setitimer(ITIMER_REAL, &value, NULL);
|
|
if (retval) {
|
|
perror("boinc set_worker_timer() setitimer");
|
|
}
|
|
|
|
#endif
|
|
return retval;
|
|
}
|
|
|
|
int boinc_send_trickle_up(char* variety, char* p) {
|
|
if (!options.handle_trickle_ups) return ERR_NO_OPTION;
|
|
FILE* f = boinc_fopen(TRICKLE_UP_FILENAME, "wb");
|
|
if (!f) return ERR_FOPEN;
|
|
fprintf(f, "<variety>%s</variety>\n", variety);
|
|
size_t n = fwrite(p, strlen(p), 1, f);
|
|
fclose(f);
|
|
if (n != 1) return ERR_WRITE;
|
|
have_new_trickle_up = true;
|
|
return 0;
|
|
}
|
|
|
|
// logically this should be a bool.
|
|
// But it needs to be an int to be compatible with C
|
|
//
|
|
int boinc_time_to_checkpoint() {
|
|
if (ready_to_checkpoint) {
|
|
in_critical_section = true;
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int boinc_checkpoint_completed() {
|
|
double cur_cpu;
|
|
cur_cpu = boinc_worker_thread_cpu_time();
|
|
last_wu_cpu_time = cur_cpu + aid.wu_cpu_time;
|
|
last_checkpoint_cpu_time = last_wu_cpu_time;
|
|
if (options.send_status_msgs) {
|
|
update_app_progress(last_checkpoint_cpu_time, last_checkpoint_cpu_time);
|
|
}
|
|
time_until_checkpoint = (int)aid.checkpoint_period;
|
|
in_critical_section = false;
|
|
ready_to_checkpoint = false;
|
|
|
|
return 0;
|
|
}
|
|
|
|
void boinc_begin_critical_section() {
|
|
in_critical_section = true;
|
|
}
|
|
|
|
void boinc_end_critical_section() {
|
|
in_critical_section = false;
|
|
}
|
|
|
|
int boinc_fraction_done(double x) {
|
|
fraction_done = x;
|
|
return 0;
|
|
}
|
|
|
|
// for use by graphics code.
|
|
// Caller should check for values outside [0..1];
|
|
// that means undefined (no information available).
|
|
//
|
|
double boinc_get_fraction_done() {
|
|
return fraction_done;
|
|
}
|
|
|
|
int boinc_receive_trickle_down(char* buf, int len) {
|
|
std::string filename;
|
|
char path[256];
|
|
|
|
if (!options.handle_trickle_downs) return false;
|
|
|
|
if (have_trickle_down) {
|
|
relative_to_absolute("", path);
|
|
DirScanner dirscan(path);
|
|
while (dirscan.scan(filename)) {
|
|
fprintf(stderr, "scan: %s\n", filename.c_str());
|
|
if (strstr(filename.c_str(), "trickle_down")) {
|
|
strncpy(buf, filename.c_str(), len);
|
|
return true;
|
|
}
|
|
}
|
|
have_trickle_down = false;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
int boinc_upload_file(std::string& name) {
|
|
char buf[256];
|
|
std::string pname;
|
|
int retval;
|
|
|
|
retval = boinc_resolve_filename_s(name.c_str(), pname);
|
|
if (retval) return retval;
|
|
sprintf(buf, "%s%s", UPLOAD_FILE_REQ_PREFIX, name.c_str());
|
|
FILE* f = boinc_fopen(buf, "w");
|
|
if (!f) return ERR_FOPEN;
|
|
have_new_upload_file = true;
|
|
fclose(f);
|
|
return 0;
|
|
}
|
|
|
|
|
|
int boinc_upload_status(std::string& name) {
|
|
for (unsigned int i=0; i<upload_file_status.size(); i++) {
|
|
UPLOAD_FILE_STATUS& ufs = upload_file_status[i];
|
|
if (ufs.name == name) {
|
|
return ufs.status;
|
|
}
|
|
}
|
|
return ERR_NOT_FOUND;
|
|
}
|
|
|
|
void boinc_ops_per_cpu_sec(double fp, double i) {
|
|
fpops_per_cpu_sec = fp;
|
|
intops_per_cpu_sec = i;
|
|
}
|
|
|
|
void boinc_ops_cumulative(double fp, double i) {
|
|
fpops_cumulative = fp;
|
|
intops_cumulative = i;
|
|
}
|
|
|
|
void boinc_need_network() {
|
|
want_network = 1;
|
|
have_network = 0;
|
|
}
|
|
|
|
int boinc_network_poll() {
|
|
return have_network?0:1;
|
|
}
|
|
|
|
void boinc_network_done() {
|
|
want_network = 0;
|
|
}
|
|
|
|
#ifndef _WIN32
|
|
// block SIGALRM, so that the worker thread will be forced to handle it
|
|
//
|
|
void block_sigalrm() {
|
|
sigset_t mask;
|
|
sigemptyset(&mask);
|
|
sigaddset(&mask, SIGALRM);
|
|
pthread_sigmask(SIG_BLOCK, &mask, NULL);
|
|
}
|
|
#endif
|
|
|
|
void boinc_register_timer_callback(FUNC_PTR p) {
|
|
timer_callback = p;
|
|
}
|
|
|
|
const char *BOINC_RCSID_0fa0410386 = "$Id$";
|