API: add test harness for the runtime system

- If you run the client with --run_test_app,
  runs "test_app" in the current directory and interacts with it
  (and does nothing else).
  It can suspend/resume it with arbitrary timing;
  this is controlled in run_test_app() (app_start.cpp).
- example app: add --critical_section option.
  This lets you test the runtime system for apps that do
  most of their work in a critical section (like GPU apps).
- Add some logging messages (conditioned by DEBUG_BOINC_API)
  to the runtime system.
- boinc_finish() waits for the timer thread to write final messages;
  make sure it doesn't do anything else
  (like suspend the worker thread) during this period
This commit is contained in:
David Anderson 2013-07-04 16:00:10 -07:00
parent 18b6f26adb
commit 519a0bcbef
13 changed files with 190 additions and 48 deletions

View File

@ -149,11 +149,13 @@ static volatile int interrupt_count = 0;
static volatile int running_interrupt_count = 0;
// number of timer interrupts while not suspended.
// Used to compute elapsed time
static volatile bool finishing;
// used for worker/timer synch during boinc_finish();
static int want_network = 0;
static int have_network = 1;
static double bytes_sent = 0;
static double bytes_received = 0;
bool g_sleep = false;
bool boinc_disable_timer_thread = false;
// simulate unresponsive app by setting to true (debugging)
static FUNC_PTR timer_callback = 0;
char web_graphics_url[256];
@ -678,8 +680,9 @@ int boinc_finish(int status) {
"%s called boinc_finish\n",
boinc_msg_prefix(buf, sizeof(buf))
);
finishing = true;
boinc_sleep(2.0); // let the timer thread send final messages
g_sleep = true; // then disable it
boinc_disable_timer_thread = true; // then disable it
if (options.main_program && status==0) {
FILE* f = fopen(BOINC_FINISH_CALLED_FILE, "w");
@ -905,6 +908,13 @@ int boinc_wu_cpu_time(double& cpu_t) {
// Can be called from either timer or worker thread.
//
static int suspend_activities(bool called_from_worker) {
#ifdef DEBUG_BOINC_API
char log_buf[256];
fprintf(stderr, "%s suspend_activities() called from %s\n",
boinc_msg_prefix(log_buf, sizeof(log_buf)),
called_from_worker?"worker thread":"timer thread"
);
#endif
#ifdef _WIN32
static vector<int> pids;
if (options.multi_thread) {
@ -933,6 +943,12 @@ static int suspend_activities(bool called_from_worker) {
}
int resume_activities() {
#ifdef DEBUG_BOINC_API
char log_buf[256];
fprintf(stderr, "%s resume_activities()\n",
boinc_msg_prefix(log_buf, sizeof(log_buf))
);
#endif
#ifdef _WIN32
static vector<int> pids;
if (options.multi_thread) {
@ -1011,7 +1027,7 @@ static void handle_process_control_msg() {
if (app_client_shm->shm->process_control_request.get_msg(buf)) {
acquire_mutex();
#ifdef DEBUG_BOINC_API
char log_buf[256]
char log_buf[256];
fprintf(stderr, "%s got process control msg %s\n",
boinc_msg_prefix(log_buf, sizeof(log_buf)), buf
);
@ -1194,7 +1210,14 @@ static void graphics_cleanup() {
//
static void timer_handler() {
char buf[512];
if (g_sleep) return;
if (boinc_disable_timer_thread) return;
if (finishing) {
double cur_cpu = boinc_worker_thread_cpu_time();
last_wu_cpu_time = cur_cpu + initial_wu_cpu_time;
update_app_progress(last_wu_cpu_time, last_checkpoint_cpu_time);
boinc_disable_timer_thread = true;
return;
}
interrupt_count++;
if (!boinc_status.suspended) {
running_interrupt_count++;
@ -1203,7 +1226,7 @@ static void timer_handler() {
#ifdef DEBUG_BOINC_API
if (in_critical_section) {
fprintf(stderr,
"%s: timer_handler(): in critical section\n",
"%s timer_handler(): in critical section\n",
boinc_msg_prefix(buf, sizeof(buf))
);
}
@ -1458,10 +1481,24 @@ int boinc_checkpoint_completed() {
}
void boinc_begin_critical_section() {
#ifdef DEBUG_BOINC_API
char buf[256];
fprintf(stderr,
"%s begin_critical_section\n",
boinc_msg_prefix(buf, sizeof(buf))
);
#endif
in_critical_section++;
}
void boinc_end_critical_section() {
#ifdef DEBUG_BOINC_API
char buf[256];
fprintf(stderr,
"%s end_critical_section\n",
boinc_msg_prefix(buf, sizeof(buf))
);
#endif
in_critical_section--;
if (in_critical_section < 0) {
in_critical_section = 0; // just in case

View File

@ -155,7 +155,7 @@ extern HANDLE worker_thread_handle;
#endif
extern int boinc_init_options_general(BOINC_OPTIONS& opt);
extern int start_timer_thread();
extern bool g_sleep;
extern bool boinc_disable_timer_thread;
inline void boinc_options_defaults(BOINC_OPTIONS& b) {
b.main_program = 1;

View File

@ -187,7 +187,7 @@ struct ACTIVE_TASK {
int current_disk_usage(double&);
// disk used by output files and temp files of this task
void get_free_slot(RESULT*);
int start(); // start a process
int start(bool test=false); // start a process
// Termination stuff.
// Terminology:
@ -315,4 +315,6 @@ extern double exclusive_gpu_app_running;
extern int gpu_suspend_reason;
extern double non_boinc_cpu_usage;
extern void run_test_app();
#endif

View File

@ -555,6 +555,11 @@ void ACTIVE_TASK::handle_exited_app(int stat) {
cleanup_task();
if (gstate.run_test_app) {
msg_printf(0, MSG_INFO, "test app finished - exiting");
exit(0);
}
if (!will_restart) {
copy_output_files();
int retval = read_stderr_file();

View File

@ -491,7 +491,9 @@ int ACTIVE_TASK::copy_output_files() {
// else
// ACTIVE_TASK::task_state is PROCESS_EXECUTING
//
int ACTIVE_TASK::start() {
// If "test" is set, we're doing the API test; just run "test_app".
//
int ACTIVE_TASK::start(bool test) {
char exec_name[256], file_path[MAXPATHLEN], buf[256], exec_path[MAXPATHLEN];
char cmdline[80000]; // 64KB plus some extra
unsigned int i;
@ -562,7 +564,12 @@ int ACTIVE_TASK::start() {
// set up applications files
//
strcpy(exec_name, "");
if (test) {
strcpy(exec_name, "test_app");
strcpy(exec_path, "test_app");
} else {
strcpy(exec_name, "");
}
for (i=0; i<app_version->app_files.size(); i++) {
fref = app_version->app_files[i];
fip = fref.file_info;
@ -1017,7 +1024,11 @@ int ACTIVE_TASK::start() {
}
#endif
}
sprintf(buf, "../../%s", exec_path);
if (test) {
strcpy(buf, exec_path);
} else {
sprintf(buf, "../../%s", exec_path);
}
if (g_use_sandbox) {
char switcher_path[MAXPATHLEN];
sprintf(switcher_path, "../../%s/%s",
@ -1041,7 +1052,7 @@ int ACTIVE_TASK::start() {
parse_command_line(cmdline, argv+1);
retval = execv(buf, argv);
}
msg_printf(wup->project, MSG_INTERNAL_ERROR,
fprintf(stderr,
"Process creation (%s) failed: %s, errno=%d\n",
buf, boincerror(retval), errno
);
@ -1063,6 +1074,9 @@ int ACTIVE_TASK::start() {
// go here on error; "buf" contains error message, "retval" is nonzero
//
error:
if (test) {
return retval;
}
// if something failed, it's possible that the executable was munged.
// Verify it to trigger another download.
@ -1142,7 +1156,8 @@ union headeru {
// Read the mach-o headers to determine the architectures
// supported by executable file.
// Returns 1 if application can run natively on i386 / x86_64 Macs, else returns 0.
// Returns 1 if application can run natively on i386 / x86_64 Macs,
// else returns 0.
//
int ACTIVE_TASK::is_native_i386_app(char* exec_path) {
FILE *f;
@ -1208,3 +1223,62 @@ int ACTIVE_TASK::is_native_i386_app(char* exec_path) {
return result;
}
#endif
// The following runs "test_app" and sends it various messages.
// Used for testing the runtime system.
//
void run_test_app() {
WORKUNIT wu;
PROJECT project;
APP app;
APP_VERSION av;
ACTIVE_TASK at;
ACTIVE_TASK_SET ats;
RESULT result;
gstate.run_test_app = true;
wu.project = &project;
wu.app = &app;
wu.command_line = string("--critical_section");
strcpy(app.name, "test app");
av.init();
av.avg_ncpus = 1;
strcpy(result.name, "test result");
result.avp = &av;
result.wup = &wu;
result.project = &project;
result.app = &app;
at.result = &result;
at.wup = &wu;
at.app_version = &av;
at.max_elapsed_time = 1e6;
at.max_disk_usage = 1e14;
at.max_mem_usage = 1e14;
strcpy(at.slot_dir, ".");
ats.active_tasks.push_back(&at);
log_flags.task_debug = true;
unlink("boinc_finish_called");
unlink("boinc_lockfile");
unlink("boinc_temporary_exit");
unlink("stderr.txt");
int retval = at.start(true);
if (retval) {
fprintf(stderr, "start() failed: %s\n", boincerror(retval));
}
while (1) {
gstate.now = dtime();
at.preempt(REMOVE_NEVER);
ats.poll();
usleep(100000);
at.unsuspend();
ats.poll();
usleep(200000);
}
}

View File

@ -105,6 +105,7 @@ CLIENT_STATE::CLIENT_STATE()
exit_after_app_start_secs = 0;
app_started = 0;
exit_before_upload = false;
run_test_app = false;
show_projects = false;
strcpy(detach_project_url, "");
strcpy(main_host_venue, "");

View File

@ -141,6 +141,8 @@ struct CLIENT_STATE {
char attach_project_auth[256];
bool exit_before_upload;
// exit when about to upload a file
bool run_test_app;
// API test mode
#ifndef _WIN32
gid_t boinc_project_gid;
#endif

View File

@ -725,11 +725,7 @@ int FILE_INFO::gunzip(char* md5_buf) {
return 0;
}
int APP_VERSION::parse(XML_PARSER& xp) {
FILE_REF file_ref;
double dtemp;
int rt;
void APP_VERSION::init() {
strcpy(app_name, "");
strcpy(api_version, "");
version_num = 0;
@ -749,7 +745,14 @@ int APP_VERSION::parse(XML_PARSER& xp) {
strcpy(missing_coproc_name, "");
dont_throttle = false;
needs_network = false;
}
int APP_VERSION::parse(XML_PARSER& xp) {
FILE_REF file_ref;
double dtemp;
int rt;
init();
while (!xp.get_tag()) {
if (xp.match_tag("/app_version")) {
rt = gpu_usage.rsc_type;

View File

@ -314,6 +314,7 @@ struct APP_VERSION {
APP_VERSION(){}
~APP_VERSION(){}
void init();
int parse(XML_PARSER&);
int write(MIOFILE&, bool write_file_info = true);
bool had_download_failure(int& failnum);

View File

@ -104,7 +104,7 @@ void log_message_error(const char* msg) {
snprintf(evt_msg, sizeof(evt_msg),
"%s %s\n"
"GLE: %s\n",
time_string, msg,
time_string, msg,
windows_format_error_string(GetLastError(), evt_msg, (sizeof(evt_msg)-((int)strlen(msg)+7)))
);
#else
@ -365,7 +365,7 @@ int boinc_main_loop() {
int main(int argc, char** argv) {
int retval = 0;
coprocs.set_path_to_client(argv[0]); // Used to launch the child process
for (int index = 1; index < argc; index++) {
@ -374,27 +374,36 @@ int main(int argc, char** argv) {
log_message_startup("BOINC is initializing...");
#if !defined(_WIN32) && !defined(__EMX__) && !defined(__APPLE__)
// from <unistd.h>:
// Detach from the controlling terminal and run in the background as system daemon.
// Detach from the controlling terminal and run in the background
// as system daemon.
// Don't change working directory to root ("/"), but redirect
// standard input, standard output and standard error to /dev/null.
//
retval = daemon(1, 0);
break;
#endif
}
// Some dual-GPU laptops (e.g., Macbook Pro) don't power down the more powerful GPU until
// all applications which used them exit. To save battery life, the client launches a
// second instance of the client as a child process to detect and get info about the GPUs.
// The child process writes the info to a temp file which our main client then reads.
if (strcmp(argv[index], "-detect_gpus") == 0 || strcmp(argv[index], "--detect_gpus") == 0) {
std::vector<std::string> warnings;
// Some dual-GPU laptops (e.g., Macbook Pro) don't power down
// the more powerful GPU until all applications which used them exit.
// To save battery life, the client launches a second instance
// of the client as a child process to detect and get info
// about the GPUs.
// The child process writes the info to a temp file which our main
// client then reads.
//
if (!strcmp(argv[index], "--detect_gpus")) {
vector<string> warnings;
coprocs.detect_gpus(warnings);
coprocs.write_coproc_info_file(warnings);
warnings.clear();
return 0;
}
if (!strcmp(argv[index], "--run_test_app")) {
run_test_app();
}
#ifdef _WIN32
// This bit of silliness is required to properly detach when run from within a command
// prompt under Win32. The root cause of the problem is that CMD.EXE does not return

View File

@ -74,17 +74,18 @@ bool early_crash = false;
bool early_sleep = false;
bool trickle_up = false;
bool trickle_down = false;
bool critical_section = false; // run most of the time in a critical section
double cpu_time = 20, comp_result;
// do a billion floating-point ops
// do about .5 seconds of computing
// (note: I needed to add an arg to this;
// otherwise the MS C++ compiler optimizes away
// all but the first call to it!)
//
static double do_a_giga_flop(int foo) {
static double do_some_computing(int foo) {
double x = 3.14159*foo;
int i;
for (i=0; i<500000000; i++) {
for (i=0; i<50000000; i++) {
x += 5.12313123;
x *= 0.5398394834;
}
@ -147,12 +148,24 @@ int main(int argc, char **argv) {
if (strstr(argv[i], "early_crash")) early_crash = true;
if (strstr(argv[i], "early_sleep")) early_sleep = true;
if (strstr(argv[i], "run_slow")) run_slow = true;
if (strstr(argv[i], "critical_section")) critical_section = true;
if (strstr(argv[i], "cpu_time")) {
cpu_time = atof(argv[++i]);
}
if (strstr(argv[i], "trickle_up")) trickle_up = true;
if (strstr(argv[i], "trickle_down")) trickle_down = true;
}
fprintf(stderr, "%s app started; CPU time %f, flags:%s%s%s%s%s%s%s\n",
boinc_msg_prefix(buf, sizeof(buf)),
cpu_time,
early_exit?" early_exit":"",
early_crash?" early_crash":"",
early_sleep?" early_sleep":"",
run_slow?" run_slow":"",
critical_section?" critical_section":"",
trickle_up?" trickle_up":"",
trickle_down?" trickle_down":""
);
retval = boinc_init();
if (retval) {
@ -241,7 +254,7 @@ int main(int argc, char **argv) {
boinc_crash();
}
if (early_sleep && i>30) {
g_sleep = true;
boinc_disable_timer_thread = true;
while (1) boinc_sleep(1);
}
@ -304,7 +317,13 @@ int main(int argc, char **argv) {
}
boinc_checkpoint_completed();
}
comp_result = do_a_giga_flop(i);
if (critical_section) {
boinc_begin_critical_section();
}
comp_result = do_some_computing(i);
if (critical_section) {
boinc_end_critical_section();
}
}
}
boinc_fraction_done(1);
@ -315,13 +334,15 @@ int main(int argc, char **argv) {
}
#ifdef _WIN32
int WINAPI WinMain(HINSTANCE hInst, HINSTANCE hPrevInst, LPSTR Args, int WinMode) {
int WINAPI WinMain(
HINSTANCE hInst, HINSTANCE hPrevInst, LPSTR Args, int WinMode
) {
LPSTR command_line;
char* argv[100];
int argc;
command_line = GetCommandLine();
argc = parse_command_line( command_line, argv );
argc = parse_command_line(command_line, argv);
return main(argc, argv);
}
#endif

View File

@ -41,7 +41,6 @@ using std::string;
bool run_slow = false;
bool early_exit = false;
bool early_crash = false;
bool early_sleep = false;
double cpu_time = 20, comp_result;
int main(int argc, char** argv) {
@ -58,7 +57,6 @@ int main(int argc, char** argv) {
for (i=0; i<argc; i++) {
if (!strcmp(argv[i], "-early_exit")) early_exit = true;
if (!strcmp(argv[i], "-early_crash")) early_crash = true;
if (!strcmp(argv[i], "-early_sleep")) early_sleep = true;
if (!strcmp(argv[i], "-run_slow")) run_slow = true;
if (!strcmp(argv[i], "-cpu_time")) {
cpu_time = atof(argv[++i]);
@ -167,10 +165,6 @@ int main(int argc, char** argv) {
if (early_crash && i>30) {
boinc_crash();
}
if (early_sleep && i>30) {
g_sleep = true;
while (1) boinc_sleep(1);
}
if (boinc_time_to_checkpoint()) {
//if (i==7) {

View File

@ -34,7 +34,6 @@
// -cpu_time N: use about N CPU seconds after copying files
// -early_exit: exit(10) after 30 iterations
// -early_crash: crash after 30 iterations
// -early_sleep: go into infinite sleep after 30 iterations
//
// See http://boinc.berkeley.edu/trac/wiki/GPUApp for any compiling issues.
// Original contributor: Tuan Le (tuanle86@berkeley.edu)
@ -56,7 +55,6 @@ int main(int argc, char * argv[]) {
for (i=0; i<argc; i++) {
if (!strcmp(argv[i], "-early_exit")) early_exit = true;
if (!strcmp(argv[i], "-early_crash")) early_crash = true;
if (!strcmp(argv[i], "-early_sleep")) early_sleep = true;
if (!strcmp(argv[i], "-run_slow")) run_slow = true;
if (!strcmp(argv[i], "-cpu_time")) {
cpu_time = atof(argv[++i]);
@ -182,11 +180,6 @@ int main(int argc, char * argv[]) {
boinc_crash();
}
if (early_sleep && i>30) {
g_sleep = true;
while (1) boinc_sleep(1);
}
if (boinc_time_to_checkpoint()) {
printf("Perform checkpointing at inversion # %d\n",i);
//we'll need to write the current matrix to the state file.