From ac52f73953c265da5566cd700d012348baa37734 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Fri, 29 Jul 2022 12:42:42 -0700 Subject: [PATCH 1/3] client (Win and Linux): compute non-BOINC CPU usage in a more accurate way client: if suspend because of non-BOINC CPU usage, don't resume for at least 20 sec, since (even with the above) a one-time false negative is possible; consecutive false negatives are less likely. --- client/app.cpp | 30 +++++++++++++++++++++++++++--- client/cs_prefs.cpp | 16 ++++++++++++++-- lib/procinfo.cpp | 26 +++++++++++++++++++++++++- lib/procinfo.h | 6 ++++++ lib/procinfo_unix.cpp | 21 +++++++++++++++++++++ lib/procinfo_win.cpp | 13 +++++++++++++ 6 files changed, 106 insertions(+), 6 deletions(-) diff --git a/client/app.cpp b/client/app.cpp index 4111e1b959..db9bcecc74 100644 --- a/client/app.cpp +++ b/client/app.cpp @@ -359,8 +359,8 @@ void ACTIVE_TASK_SET::get_memory_usage() { unsigned int i; int retval; static bool first = true; - static double last_cpu_time; double diff=0; + bool using_vbox = false; if (!first) { diff = gstate.now - last_mem_time; @@ -412,6 +412,7 @@ void ACTIVE_TASK_SET::get_memory_usage() { } procinfo_app(pi, v, pm, atp->app_version->graphics_exec_file); if (atp->app_version->is_vm_app) { + using_vbox = true; // the memory of virtual machine apps is not reported correctly, // at least on Windows. Use the VM size instead. // @@ -485,13 +486,32 @@ void ACTIVE_TASK_SET::get_memory_usage() { } } - // get info on non-BOINC processes. +#if defined(__linux__) || defined(_WIN32) + // compute non_boinc_cpu_usage + // Improved version for systems where we can get total CPU (Win, Linux) + // + static double last_nbrc=0; + double nbrc = total_cpu_time() - boinc_related_cpu_time(pm, using_vbox); + double delta_nbrc = nbrc - last_nbrc; + if (delta_nbrc < 0) delta_nbrc = 0; + last_nbrc = nbrc; + if (!first) { + non_boinc_cpu_usage = delta_nbrc/(diff*gstate.host_info.p_ncpus); + //printf("non_boinc_cpu_usage %f\n", non_boinc_cpu_usage); + } +#else + // compute non_boinc_cpu_usage + // + // NOTE: this is flawed because it doesn't count short-lived processes + // correctly. Linux and Win use a better approach (see above). + // // mem usage info is not useful because most OSs don't // move idle processes out of RAM, so physical memory is always full. // Also (at least on Win) page faults are used for various things, // not all of them generate disk I/O, // so they're not useful for detecting paging/thrashing. // + static double last_cpu_time; PROCINFO pi; procinfo_non_boinc(pi, pm); if (log_flags.mem_usage_debug) { @@ -508,13 +528,17 @@ void ACTIVE_TASK_SET::get_memory_usage() { // processes might have exited in the last 10 sec, // causing this to be negative. if (non_boinc_cpu_usage < 0) non_boinc_cpu_usage = 0; + } + last_cpu_time = new_cpu_time; +#endif + + if (!first) { if (log_flags.mem_usage_debug) { msg_printf(NULL, MSG_INFO, "[mem_usage] non-BOINC CPU usage: %.2f%%", non_boinc_cpu_usage*100 ); } } - last_cpu_time = new_cpu_time; first = false; } diff --git a/client/cs_prefs.cpp b/client/cs_prefs.cpp index e673e0fd69..3ce83fed27 100644 --- a/client/cs_prefs.cpp +++ b/client/cs_prefs.cpp @@ -203,6 +203,8 @@ void CLIENT_STATE::get_disk_shares() { // and if it's zero set gpu_suspend_reason // int CLIENT_STATE::check_suspend_processing() { + static double last_cpu_usage_suspend=0; + if (benchmarks_running) { return SUSPEND_REASON_BENCHMARKS; } @@ -247,8 +249,18 @@ int CLIENT_STATE::check_suspend_processing() { if (now - exclusive_app_running < MEMORY_USAGE_PERIOD + EXCLUSIVE_APP_WAIT) { return SUSPEND_REASON_EXCLUSIVE_APP_RUNNING; } - if (global_prefs.suspend_cpu_usage && non_boinc_cpu_usage*100 > global_prefs.suspend_cpu_usage) { - return SUSPEND_REASON_CPU_USAGE; + + // if we suspended because of CPU usage, + // don't unsuspend for at least 2*MEMORY_USAGE_PERIOD + // + if (global_prefs.suspend_cpu_usage) { + if (now < last_cpu_usage_suspend+2*MEMORY_USAGE_PERIOD) { + return SUSPEND_REASON_CPU_USAGE; + } + if (non_boinc_cpu_usage*100 > global_prefs.suspend_cpu_usage) { + last_cpu_usage_suspend = now; + return SUSPEND_REASON_CPU_USAGE; + } } } diff --git a/lib/procinfo.cpp b/lib/procinfo.cpp index eb584656f2..8834723ebc 100644 --- a/lib/procinfo.cpp +++ b/lib/procinfo.cpp @@ -135,10 +135,34 @@ void procinfo_non_boinc(PROCINFO& procinfo, PROC_MAP& pm) { procinfo.working_set_size += p.working_set_size; } #if 0 - fprintf(stderr, "total non-boinc: %f %f\n", procinfo.user_time, procinfo.kernel_time); + fprintf(stderr, + "total non-boinc: %f %f\n", procinfo.user_time, procinfo.kernel_time + ); #endif } +// get CPU time of BOINC-related processes, low-priority processes, +// and (if we're using Vbox) the Vbox daemon. +// +double boinc_related_cpu_time(PROC_MAP& pm, bool using_vbox) { + double sum = 0; + PROC_MAP::iterator i; + for (i=pm.begin(); i!=pm.end(); ++i) { + PROCINFO& p = i->second; +#ifdef _WIN32 + if (p.id == 0) continue; // idle process +#endif + if ( + p.is_boinc_app + || p.is_low_priority + || (using_vbox && strstr(p.command, "VBoxSVC")) + ) { + sum += p.user_time; + } + } + return sum; +} + double process_tree_cpu_time(int pid) { PROC_MAP pm; PROCINFO procinfo; diff --git a/lib/procinfo.h b/lib/procinfo.h index 4d745e6dbf..0b08b89b3f 100644 --- a/lib/procinfo.h +++ b/lib/procinfo.h @@ -82,4 +82,10 @@ extern void procinfo_non_boinc(PROCINFO&, PROC_MAP&); extern double process_tree_cpu_time(int pid); // get the CPU time of the given process and its descendants +extern double total_cpu_time(); + // total CPU time, as reported by OS + +extern double boinc_related_cpu_time(PROC_MAP&, bool using_vbox); + // total CPU of current BOINC processes, low-priority processes, + // and (if using vbox) the Vbox daemon #endif diff --git a/lib/procinfo_unix.cpp b/lib/procinfo_unix.cpp index 3318174584..b620691a66 100644 --- a/lib/procinfo_unix.cpp +++ b/lib/procinfo_unix.cpp @@ -263,3 +263,24 @@ int procinfo_setup(PROC_MAP& pm) { find_children(pm); return 0; } + +// get total user-mode CPU time +// see https://www.baeldung.com/linux/get-cpu-usage +// +double total_cpu_time() { + char buf[1024]; + static FILE *f=NULL; + static double scale; + if (!f) { + f = fopen("/proc/stat", "r"); + long hz = sysconf(_SC_CLK_TCK); + scale = 1./hz; + } else { + fflush(f); + rewind(f); + } + if (!fgets(buf, 256, f)) return 0; + double user, nice; + sscanf(buf, "cpu %lf %lf", &user, &nice); + return (user+nice)*scale; +} diff --git a/lib/procinfo_win.cpp b/lib/procinfo_win.cpp index 7179382a16..c1fe19851f 100644 --- a/lib/procinfo_win.cpp +++ b/lib/procinfo_win.cpp @@ -149,3 +149,16 @@ int procinfo_setup(PROC_MAP& pm) { } return 0; } + +// get total CPU time +// see https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getsystemtimes +// +double total_cpu_time() { + FILETIME i, s, u; + GetSystemTimes(&i, &s, &u); + ULARGE_INTEGER x; + x.LowPart = u.dwLowDateTime; + x.HighPart = u.dwHighDateTime; + user = (double)x.QuadPart/1e7; + return user; +} From 8f6e800277aff8963258535c2827657e187ac4df Mon Sep 17 00:00:00 2001 From: davidpanderson Date: Fri, 29 Jul 2022 22:31:43 -0700 Subject: [PATCH 2/3] Win fix --- lib/procinfo_win.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/procinfo_win.cpp b/lib/procinfo_win.cpp index c1fe19851f..a86726cee4 100644 --- a/lib/procinfo_win.cpp +++ b/lib/procinfo_win.cpp @@ -159,6 +159,5 @@ double total_cpu_time() { ULARGE_INTEGER x; x.LowPart = u.dwLowDateTime; x.HighPart = u.dwHighDateTime; - user = (double)x.QuadPart/1e7; - return user; + return (double)x.QuadPart/1e7; } From 46d489cc9266bbeac33a6b13f881ecd2d28faf70 Mon Sep 17 00:00:00 2001 From: davidpanderson Date: Tue, 2 Aug 2022 13:13:03 -0700 Subject: [PATCH 3/3] add error checking for Linux get_cpu_time() --- lib/procinfo.h | 2 +- lib/procinfo_unix.cpp | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/lib/procinfo.h b/lib/procinfo.h index 0b08b89b3f..8112475d17 100644 --- a/lib/procinfo.h +++ b/lib/procinfo.h @@ -83,7 +83,7 @@ extern double process_tree_cpu_time(int pid); // get the CPU time of the given process and its descendants extern double total_cpu_time(); - // total CPU time, as reported by OS + // total user-mode CPU time, as reported by OS extern double boinc_related_cpu_time(PROC_MAP&, bool using_vbox); // total CPU of current BOINC processes, low-priority processes, diff --git a/lib/procinfo_unix.cpp b/lib/procinfo_unix.cpp index b620691a66..72cfc3e3e0 100644 --- a/lib/procinfo_unix.cpp +++ b/lib/procinfo_unix.cpp @@ -273,14 +273,25 @@ double total_cpu_time() { static double scale; if (!f) { f = fopen("/proc/stat", "r"); + if (!f) { + fprintf(stderr, "can't open /proc/stat\n"); + return 0; + } long hz = sysconf(_SC_CLK_TCK); scale = 1./hz; } else { fflush(f); rewind(f); } - if (!fgets(buf, 256, f)) return 0; + if (!fgets(buf, 256, f)) { + fprintf(stderr, "can't read /proc/stat\n"); + return 0; + } double user, nice; - sscanf(buf, "cpu %lf %lf", &user, &nice); + int n = sscanf(buf, "cpu %lf %lf", &user, &nice); + if (n != 2) { + fprintf(stderr, "can't parse /proc/stat: %s\n", buf); + return 0; + } return (user+nice)*scale; }