Merge pull request #4859 from BOINC/dpa_cpu_usage2

fix non-BOINC CPU usage limit on Win and Linux
This commit is contained in:
David Anderson 2022-08-02 13:53:31 -07:00 committed by GitHub
commit a036709578
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 116 additions and 6 deletions

View File

@ -359,8 +359,8 @@ void ACTIVE_TASK_SET::get_memory_usage() {
unsigned int i;
int retval;
static bool first = true;
static double last_cpu_time;
double diff=0;
bool using_vbox = false;
if (!first) {
diff = gstate.now - last_mem_time;
@ -412,6 +412,7 @@ void ACTIVE_TASK_SET::get_memory_usage() {
}
procinfo_app(pi, v, pm, atp->app_version->graphics_exec_file);
if (atp->app_version->is_vm_app) {
using_vbox = true;
// the memory of virtual machine apps is not reported correctly,
// at least on Windows. Use the VM size instead.
//
@ -485,13 +486,32 @@ void ACTIVE_TASK_SET::get_memory_usage() {
}
}
// get info on non-BOINC processes.
#if defined(__linux__) || defined(_WIN32)
// compute non_boinc_cpu_usage
// Improved version for systems where we can get total CPU (Win, Linux)
//
static double last_nbrc=0;
double nbrc = total_cpu_time() - boinc_related_cpu_time(pm, using_vbox);
double delta_nbrc = nbrc - last_nbrc;
if (delta_nbrc < 0) delta_nbrc = 0;
last_nbrc = nbrc;
if (!first) {
non_boinc_cpu_usage = delta_nbrc/(diff*gstate.host_info.p_ncpus);
//printf("non_boinc_cpu_usage %f\n", non_boinc_cpu_usage);
}
#else
// compute non_boinc_cpu_usage
//
// NOTE: this is flawed because it doesn't count short-lived processes
// correctly. Linux and Win use a better approach (see above).
//
// mem usage info is not useful because most OSs don't
// move idle processes out of RAM, so physical memory is always full.
// Also (at least on Win) page faults are used for various things,
// not all of them generate disk I/O,
// so they're not useful for detecting paging/thrashing.
//
static double last_cpu_time;
PROCINFO pi;
procinfo_non_boinc(pi, pm);
if (log_flags.mem_usage_debug) {
@ -508,13 +528,17 @@ void ACTIVE_TASK_SET::get_memory_usage() {
// processes might have exited in the last 10 sec,
// causing this to be negative.
if (non_boinc_cpu_usage < 0) non_boinc_cpu_usage = 0;
}
last_cpu_time = new_cpu_time;
#endif
if (!first) {
if (log_flags.mem_usage_debug) {
msg_printf(NULL, MSG_INFO,
"[mem_usage] non-BOINC CPU usage: %.2f%%", non_boinc_cpu_usage*100
);
}
}
last_cpu_time = new_cpu_time;
first = false;
}

View File

@ -203,6 +203,8 @@ void CLIENT_STATE::get_disk_shares() {
// and if it's zero set gpu_suspend_reason
//
int CLIENT_STATE::check_suspend_processing() {
static double last_cpu_usage_suspend=0;
if (benchmarks_running) {
return SUSPEND_REASON_BENCHMARKS;
}
@ -247,8 +249,18 @@ int CLIENT_STATE::check_suspend_processing() {
if (now - exclusive_app_running < MEMORY_USAGE_PERIOD + EXCLUSIVE_APP_WAIT) {
return SUSPEND_REASON_EXCLUSIVE_APP_RUNNING;
}
if (global_prefs.suspend_cpu_usage && non_boinc_cpu_usage*100 > global_prefs.suspend_cpu_usage) {
return SUSPEND_REASON_CPU_USAGE;
// if we suspended because of CPU usage,
// don't unsuspend for at least 2*MEMORY_USAGE_PERIOD
//
if (global_prefs.suspend_cpu_usage) {
if (now < last_cpu_usage_suspend+2*MEMORY_USAGE_PERIOD) {
return SUSPEND_REASON_CPU_USAGE;
}
if (non_boinc_cpu_usage*100 > global_prefs.suspend_cpu_usage) {
last_cpu_usage_suspend = now;
return SUSPEND_REASON_CPU_USAGE;
}
}
}

View File

@ -135,10 +135,34 @@ void procinfo_non_boinc(PROCINFO& procinfo, PROC_MAP& pm) {
procinfo.working_set_size += p.working_set_size;
}
#if 0
fprintf(stderr, "total non-boinc: %f %f\n", procinfo.user_time, procinfo.kernel_time);
fprintf(stderr,
"total non-boinc: %f %f\n", procinfo.user_time, procinfo.kernel_time
);
#endif
}
// get CPU time of BOINC-related processes, low-priority processes,
// and (if we're using Vbox) the Vbox daemon.
//
double boinc_related_cpu_time(PROC_MAP& pm, bool using_vbox) {
double sum = 0;
PROC_MAP::iterator i;
for (i=pm.begin(); i!=pm.end(); ++i) {
PROCINFO& p = i->second;
#ifdef _WIN32
if (p.id == 0) continue; // idle process
#endif
if (
p.is_boinc_app
|| p.is_low_priority
|| (using_vbox && strstr(p.command, "VBoxSVC"))
) {
sum += p.user_time;
}
}
return sum;
}
double process_tree_cpu_time(int pid) {
PROC_MAP pm;
PROCINFO procinfo;

View File

@ -82,4 +82,10 @@ extern void procinfo_non_boinc(PROCINFO&, PROC_MAP&);
extern double process_tree_cpu_time(int pid);
// get the CPU time of the given process and its descendants
extern double total_cpu_time();
// total user-mode CPU time, as reported by OS
extern double boinc_related_cpu_time(PROC_MAP&, bool using_vbox);
// total CPU of current BOINC processes, low-priority processes,
// and (if using vbox) the Vbox daemon
#endif

View File

@ -263,3 +263,35 @@ int procinfo_setup(PROC_MAP& pm) {
find_children(pm);
return 0;
}
// get total user-mode CPU time
// see https://www.baeldung.com/linux/get-cpu-usage
//
double total_cpu_time() {
char buf[1024];
static FILE *f=NULL;
static double scale;
if (!f) {
f = fopen("/proc/stat", "r");
if (!f) {
fprintf(stderr, "can't open /proc/stat\n");
return 0;
}
long hz = sysconf(_SC_CLK_TCK);
scale = 1./hz;
} else {
fflush(f);
rewind(f);
}
if (!fgets(buf, 256, f)) {
fprintf(stderr, "can't read /proc/stat\n");
return 0;
}
double user, nice;
int n = sscanf(buf, "cpu %lf %lf", &user, &nice);
if (n != 2) {
fprintf(stderr, "can't parse /proc/stat: %s\n", buf);
return 0;
}
return (user+nice)*scale;
}

View File

@ -149,3 +149,15 @@ int procinfo_setup(PROC_MAP& pm) {
}
return 0;
}
// get total CPU time
// see https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getsystemtimes
//
double total_cpu_time() {
FILETIME i, s, u;
GetSystemTimes(&i, &s, &u);
ULARGE_INTEGER x;
x.LowPart = u.dwLowDateTime;
x.HighPart = u.dwHighDateTime;
return (double)x.QuadPart/1e7;
}