From 77ca32d7e2370ee61a2b61af17d1539a4f09c4a5 Mon Sep 17 00:00:00 2001 From: "Eric J. Korpela" Date: Thu, 11 May 2006 18:28:35 +0000 Subject: [PATCH] *** empty log message *** svn path=/trunk/boinc/; revision=10123 --- api/boinc_api.C | 85 +++++++++++++++++++++---- api/boinc_api.h | 4 ++ api/gutil.C | 11 ++++ checkin_notes | 33 ++++++++++ lib/diagnostics_win.C | 144 +++++++++++++++++++++++++++++++++++++++++- 5 files changed, 265 insertions(+), 12 deletions(-) diff --git a/api/boinc_api.C b/api/boinc_api.C index d031269272..178c03dd53 100644 --- a/api/boinc_api.C +++ b/api/boinc_api.C @@ -118,6 +118,7 @@ static int non_cpu_intensive = 0; static int want_network = 0; static int have_network = 1; + #define TIMER_PERIOD 1 // period of worker-thread timer interrupts. // This determines the resolution of fraction done and CPU time reporting @@ -135,6 +136,7 @@ HANDLE worker_thread_handle; static MMRESULT timer_id; #else static pthread_t timer_thread_handle; +static pthread_mutex_t timer_mutex=PTHREAD_MUTEX_INITIALIZER; static struct rusage worker_thread_ru; #endif @@ -189,10 +191,13 @@ static int boinc_worker_thread_cpu_time(double& cpu) { cpu = nrunning_ticks * TIMER_PERIOD; // for Win9x } #else - cpu = (double)worker_thread_ru.ru_utime.tv_sec + if (!pthread_mutex_lock(&timer_mutex)) { + cpu = (double)worker_thread_ru.ru_utime.tv_sec + (((double)worker_thread_ru.ru_utime.tv_usec)/1000000.0); - cpu += (double)worker_thread_ru.ru_stime.tv_sec + cpu += (double)worker_thread_ru.ru_stime.tv_sec + (((double)worker_thread_ru.ru_stime.tv_usec)/1000000.0); + pthread_mutex_unlock(&timer_mutex); + } #endif return 0; } @@ -381,7 +386,10 @@ int boinc_finish(int status) { } #ifdef _WIN32 // Stop the timer - timeKillEvent(timer_id); + if (timer_id) { + timeKillEvent(timer_id); + timer_id=0; + } CloseHandle(worker_thread_handle); #endif if (options.main_program && status==0) { @@ -403,6 +411,15 @@ int boinc_finish(int status) { // This is called from the worker, timer, and graphics threads. // void boinc_exit(int status) { + +#ifdef _WIN32 + // Free up the windows timer event, so Win98 doesn't run out. + if (timer_id) { + timeKillEvent(timer_id); + timer_id=0; + } +#endif + // Shutdown graphics thread if it is running // if (stop_graphics_thread_ptr) { @@ -654,6 +671,8 @@ static void handle_process_control_msg() { } } +static int timer_thread_created=1; + #ifdef _WIN32 static void CALLBACK worker_timer( UINT uTimerID, UINT uMsg, DWORD dwUser, DWORD dw1, DWORD dw2 @@ -665,7 +684,7 @@ static void worker_timer(int /*a*/) { #ifdef _WIN32 // Initialize the timer thread info for diagnostic // purposes. - diagnostics_set_thread_name("Timer"); + if (timer_thread_created) diagnostics_set_thread_name("Timer"); #endif interrupt_count++; @@ -729,6 +748,26 @@ static void worker_timer(int /*a*/) { #endif } + +void boinc_worker_timer() { + static time_t last_call=time(0); + // timer of last resort if timer thread initialization fails. + if (timer_thread_created) { + return; + } else { + int diff=time(0)-last_call; + while (diff>=TIMER_PERIOD) { + diff-=TIMER_PERIOD; + last_call+=TIMER_PERIOD; +#ifdef _WIN32 + worker_timer(0,0,0,0,0); +#else + worker_timer(0); +#endif + } + } +} + #ifndef _WIN32 void* timer_thread(void*) { block_sigalrm(); @@ -740,16 +779,28 @@ void* timer_thread(void*) { } void worker_signal_handler(int) { - getrusage(RUSAGE_SELF, &worker_thread_ru); +// getrusage can return an error, so try a few times if it returns an error. + if (!pthread_mutex_trylock(&timer_mutex)) { + int i=0; + while (getrusage(RUSAGE_SELF, &worker_thread_ru) && i<10) i++; + pthread_mutex_unlock(&timer_mutex); + } if (options.direct_process_action) { while (boinc_status.suspended) { sleep(1); // don't use boinc_sleep() because it does FP math } } } - #endif + + +// Allow apps to check the status of the timer thread and do something +// about it if the thread creation failed (i.e. WIN9X) +int boinc_timer_thread_active() { + return timer_thread_created; +} + // set up timer actitivies. // This is called only and always by the worker thread // @@ -774,16 +825,26 @@ int set_worker_timer() { diagnostics_set_thread_worker(); // Use Windows multimedia timer, since it is more accurate - // than SetTimer and doesn't require an associated event loop + // than SetTimer and doesn't require an associated event loop. + // Try more than once if it fails the first time. // - timer_id = timeSetEvent( + int i=0; + while ((timer_id = timeSetEvent( (int)(TIMER_PERIOD*1000), // uDelay (int)(TIMER_PERIOD*1000), // uResolution worker_timer, // lpTimeProc - NULL, // dwUser + 0, // dwUser TIME_PERIODIC // fuEvent - ); - + )==0) + && (i++ < 10) + ) ; /* do nothing */ + + if (i>10) { + fprintf(stderr, "set_worker_timer(): timeSetEvent() failed.\n"); + timer_thread_created=0; + } + timer_thread_created=1; + // lower our priority here // SetThreadPriority(worker_thread_handle, THREAD_PRIORITY_IDLE); @@ -791,7 +852,9 @@ int set_worker_timer() { retval = pthread_create(&timer_thread_handle, NULL, timer_thread, NULL); if (retval) { fprintf(stderr, "set_worker_timer(): pthread_create(): %d", retval); + timer_thread_created=0; } + timer_thread_created=1; struct sigaction sa; itimerval value; diff --git a/api/boinc_api.h b/api/boinc_api.h index 6f42ac1897..17fad11900 100755 --- a/api/boinc_api.h +++ b/api/boinc_api.h @@ -88,6 +88,10 @@ extern int boinc_receive_trickle_down(char* buf, int len); extern int boinc_init_options(BOINC_OPTIONS*); extern int boinc_get_status(BOINC_STATUS*); extern double boinc_get_fraction_done(); +// Allow check of timer thread creation success. +extern int boinc_timer_thread_active(); +// Allow apps to call the timer event if all else fails. +extern void boinc_worker_timer(); #ifdef __APPLE__ extern int setMacPList(void); diff --git a/api/gutil.C b/api/gutil.C index 91447e4d77..def54aa4c0 100755 --- a/api/gutil.C +++ b/api/gutil.C @@ -683,10 +683,21 @@ my_error_exit (j_common_ptr cinfo) longjmp(myerr->setjmp_buffer, 1); } +#if defined(HAVE_MALLOC_H) || defined(_WIN32) +#include +#endif +#if defined(HAVE_ALLOCA_H) +#include +#endif + tImageJPG *LoadJPG(const char *filename) { struct jpeg_decompress_struct cinfo; tImageJPG *pImageData = NULL; FILE *pFile; +#if defined(HAVE_ALLOCA) || defined(_WIN32) + alloca(16); // Force a frame pointer even when compiled with + // -fomit-frame-pointer +#endif if((pFile = boinc_fopen(filename, "rb")) == NULL) { fprintf(stderr,"Unable to load JPG File!"); diff --git a/checkin_notes b/checkin_notes index d6610686de..e56d2a7e0b 100755 --- a/checkin_notes +++ b/checkin_notes @@ -4598,3 +4598,36 @@ Rom 10 May 2006 lib/ diagnostics_win.C + +Eric K 11 May 2006 + - (pthreads) Put a mutex around the cpu time calculation in + boinc_worker_thread_cpu_time(). If called from multiple + threads, this would occasionally report the wrong CPU time, + which could cause applications to abort. + - (unix) Added check of return value from getrusage() and retry on + failure. + - (WIN32) Added check of return value from timeSetEvent() and retry + on failure. + - (WIN32) Added timeKillEvent() calls to the boinc_exit() function + ensure that timers are freed properly. + - Added function boinc_timer_thread_active() function that apps can + call to see if the timer thread was properly created. + - Added function boinc_worker_timer() that application worker threads + can call in their main loop. This function will communicate with the + core client if the timer thread was not created. + - Added a dummy call to alloca() in LoadJPG to prevent failure of + setjmp/longjmp in the case that the frame pointer is optimized away. + This should be done on all functions where setjmp is called because + on some platforms longjmp will segfault if the frame pointer doesn't + exist. Some compilers aren't smart enough to know this. + - (WIN32) Added code to the windows exception handler to allow it pass + exceptions to the standard C signal handlers in the case where the + signal handlers have been overridden. + + api/ + boinc_api.C + boinc_api.h + gutil.C + lib/ + diagnostics_win.C + diff --git a/lib/diagnostics_win.C b/lib/diagnostics_win.C index aa41336cd3..0e2107ffce 100644 --- a/lib/diagnostics_win.C +++ b/lib/diagnostics_win.C @@ -35,6 +35,8 @@ #include "util.h" + + // NtQuerySystemInformation typedef NTSTATUS (WINAPI *tNTQSI)( ULONG SystemInformationClass, @@ -1621,12 +1623,152 @@ DWORD WINAPI diagnostics_unhandled_exception_monitor(LPVOID lpParameter) { return 0; } +#include +#ifndef SIGRTMAX +#if defined(_SIGRTMAX) +#define SIGRTMAX _SIGRTMAX +#elif defined(NSIG) +#define SIGRTMAX (NSIG-1) +#else +#define SIGRTMAX 32 +#endif +#endif + +static int no_reset[SIGRTMAX+1]; + +void setup_no_reset() { + no_reset[SIGILL]=1; +#ifdef SIGTRAP + no_reset[SIGTRAP]=1; +#endif +#ifdef SIGPRIV + no_reset[SIGPRIV]=1; +#endif + no_reset[SIGINT]=1; +}; + +static int no_ignore[SIGRTMAX+1]; + +void setup_no_ignore() { +#ifdef SIGKILL + no_ignore[SIGKILL]=1; +#endif +#ifdef SIGSTOP + no_ignore[SIGSTOP]=1; +#endif + no_ignore[SIGSEGV]=1; +}; + +static int setup_arrays=0; + + + +LONG pass_to_signal_handler(int signum) { + void (*handler)(int); + + if (!setup_arrays) { + setup_arrays=1; + setup_no_ignore(); + setup_no_reset(); + } + handler=signal(signum,SIG_DFL); + // Are we using the default signal handler? + // If so return to the exception handler. + if (handler==SIG_DFL) { + return EXCEPTION_CONTINUE_SEARCH; + } + // Should we ignore this signal? + if (handler==SIG_IGN) { + signal(signum,handler); + // Are we allowed to? + if (!no_ignore[signum]) { + // Yes? Attempt to ignore the exception. + return EXCEPTION_CONTINUE_EXECUTION; + } else { + return EXCEPTION_CONTINUE_SEARCH; + } + } +// Call our signal handler, this probably won't return... + handler(signum); +// if it does, reset the signal handler if appropriate. + if (no_reset[signum]) signal(signum,handler); +// try to continue execution + return EXCEPTION_CONTINUE_EXECUTION; +} + +#include +extern "C" { +void __cdecl _fpreset(void); +} + +// Allow apps to install signal handlers for some exceptions that bypass +// the boinc diagnostics. This translates the Windows exceptions into +// standard signals. +LONG diagnostics_check_signal_handlers(PEXCEPTION_POINTERS pExPtrs) { + switch (pExPtrs->ExceptionRecord->ExceptionCode) { + case CONTROL_C_EXIT: + return pass_to_signal_handler(SIGINT); + case EXCEPTION_BREAKPOINT: + case EXCEPTION_SINGLE_STEP: +#ifdef SIGTRAP + return pass_to_signal_handler(SIGTRAP); +#else + break; +#endif + case EXCEPTION_FLT_DENORMAL_OPERAND: + case EXCEPTION_FLT_DIVIDE_BY_ZERO: + case EXCEPTION_FLT_INEXACT_RESULT: + case EXCEPTION_FLT_INVALID_OPERATION: + case EXCEPTION_FLT_OVERFLOW: + case EXCEPTION_FLT_UNDERFLOW: + { + LONG rv=pass_to_signal_handler(SIGFPE); + /* MS claims ignoring an FP signal + * results in an unknown FP state. + * Does an _fpreset() help? + */ + if (rv != EXCEPTION_CONTINUE_SEARCH) + _fpreset(); + return rv; + } + case EXCEPTION_INT_DIVIDE_BY_ZERO: + case EXCEPTION_INT_OVERFLOW: + return pass_to_signal_handler(SIGFPE); + case EXCEPTION_PRIV_INSTRUCTION: +#ifdef SIGPRIV + return pass_to_signal_handler(SIGPRIV); + // nobreak +#endif + case EXCEPTION_ILLEGAL_INSTRUCTION: + return pass_to_signal_handler(SIGILL); + // nobreak + case EXCEPTION_DATATYPE_MISALIGNMENT: +#ifdef SIGBUS + return pass_to_signal_handler(SIGBUS); + // nobreak +#endif + case EXCEPTION_STACK_OVERFLOW: + case EXCEPTION_ACCESS_VIOLATION: + case EXCEPTION_ARRAY_BOUNDS_EXCEEDED: + case EXCEPTION_IN_PAGE_ERROR: + return pass_to_signal_handler(SIGSEGV); + // nobreak + default: break; + } + return EXCEPTION_CONTINUE_SEARCH; +} + // Let the unhandled exception monitor take care of logging the exception data. // Store the exception pointers and then singal the exception monitor to start // partying on the data. LONG CALLBACK boinc_catch_signal(PEXCEPTION_POINTERS pExPtrs) { + // Check whether somone has installed a standard C signal handler to + // handle this exception. + if (diagnostics_check_signal_handlers(pExPtrs) == EXCEPTION_CONTINUE_EXECUTION) + return EXCEPTION_CONTINUE_EXECUTION; + // Store the exception record pointers. diagnostics_set_thread_exception_record(pExPtrs); @@ -1637,7 +1779,7 @@ LONG CALLBACK boinc_catch_signal(PEXCEPTION_POINTERS pExPtrs) { WaitForSingleObject(hExceptionMonitorHalt, INFINITE); // We won't make it to this point, but make the compiler happy anyway. - return 0; + return EXCEPTION_CONTINUE_SEARCH; }