diff --git a/api/boinc_api.cpp b/api/boinc_api.cpp
index be7b345d3e..6b1709f6d0 100644
--- a/api/boinc_api.cpp
+++ b/api/boinc_api.cpp
@@ -1052,7 +1052,8 @@ int boinc_report_app_status_aux(
double _fraction_done,
int other_pid,
double _bytes_sent,
- double _bytes_received
+ double _bytes_received,
+ double wss
) {
char msg_buf[MSG_CHANNEL_SIZE], buf[1024];
if (standalone) return 0;
@@ -1081,6 +1082,10 @@ int boinc_report_app_status_aux(
sprintf(buf, "%d\n", ac_state);
strlcat(msg_buf, buf, sizeof(msg_buf));
}
+ if (wss) {
+ sprintf(buf, "%f\n", wss);
+ strlcat(msg_buf, buf, sizeof(msg_buf));
+ }
#ifdef MSGS_FROM_FILE
if (fout) {
fputs(msg_buf, fout);
@@ -1100,7 +1105,7 @@ int boinc_report_app_status(
double _fraction_done
){
return boinc_report_app_status_aux(
- cpu_time, checkpoint_cpu_time, _fraction_done, 0, 0, 0
+ cpu_time, checkpoint_cpu_time, _fraction_done, 0, 0, 0, 0
);
}
diff --git a/api/boinc_api.h b/api/boinc_api.h
index acb65bb845..758cf7e55b 100644
--- a/api/boinc_api.h
+++ b/api/boinc_api.h
@@ -138,7 +138,8 @@ extern int boinc_upload_status(std::string& name);
extern char* boinc_msg_prefix(char*, int);
extern int boinc_report_app_status_aux(
double cpu_time, double checkpoint_cpu_time, double _fraction_done,
- int other_pid, double bytes_sent, double bytes_received
+ int other_pid, double bytes_sent, double bytes_received,
+ double wss
);
extern int boinc_temporary_exit(
int delay, const char* reason=NULL, bool is_notice=false
diff --git a/client/app.cpp b/client/app.cpp
index eda742eb22..8fbfba2c77 100644
--- a/client/app.cpp
+++ b/client/app.cpp
@@ -110,6 +110,7 @@ ACTIVE_TASK::ACTIVE_TASK() {
peak_disk_usage = 0;
once_ran_edf = false;
+ wss_from_app = 0;
fraction_done = 0;
fraction_done_elapsed_time = 0;
first_fraction_done = 0;
@@ -420,6 +421,8 @@ void ACTIVE_TASK_SET::get_memory_usage() {
// at least on Windows. Use the VM size instead.
//
pi.working_set_size_smoothed = atp->wup->rsc_memory_bound;
+ } else if (atp->wss_from_app > 0) {
+ pi.working_set_size_smoothed = .5*(pi.working_set_size_smoothed + atp->wss_from_app);
} else {
pi.working_set_size_smoothed = .5*(pi.working_set_size_smoothed + pi.working_set_size);
}
diff --git a/client/app.h b/client/app.h
index d7b306bea2..72534512ef 100644
--- a/client/app.h
+++ b/client/app.h
@@ -53,12 +53,14 @@ typedef int PROCESS_ID;
// Represents a job in progress.
-// When an active task is created, it is assigned a "slot"
+// When a job is started, it is assigned a "slot"
// which determines the directory it runs in.
-// This doesn't change over the life of the active task;
-// thus the task can use the slot directory for temp files
+// This doesn't change over the life of the job;
+// so it can use the slot directory for temp files
// that BOINC doesn't know about.
+// If you add anything, initialize it in the constructor
+//
struct ACTIVE_TASK {
#ifdef _WIN32
HANDLE process_handle, shm_handle;
@@ -100,8 +102,12 @@ struct ACTIVE_TASK {
// most recent CPU time reported by app
bool once_ran_edf;
- // END OF ITEMS SAVED IN STATE FILE
+ // END OF ITEMS SAVED IN STATE FILES
+ double wss_from_app;
+ // work set size reported by the app
+ // (e.g. docker_wrapper does this).
+ // If nonzero, use this instead of procinfo data
double fraction_done;
// App's estimate of how much of the work unit is done.
// Passed from the application via an API call;
diff --git a/client/app_control.cpp b/client/app_control.cpp
index b03a9875db..14c628461c 100644
--- a/client/app_control.cpp
+++ b/client/app_control.cpp
@@ -1439,8 +1439,23 @@ bool ACTIVE_TASK::get_app_status_msg() {
}
}
}
- parse_double(msg_buf, "", current_cpu_time);
- parse_double(msg_buf, "", checkpoint_cpu_time);
+ if (parse_double(msg_buf, "", current_cpu_time)) {
+ if (current_cpu_time < 0) {
+ msg_printf(result->project, MSG_INFO,
+ "app reporting negative CPU: %f", current_cpu_time
+ );
+ current_cpu_time = 0;
+ }
+ }
+ if (parse_double(msg_buf, "", checkpoint_cpu_time)) {
+ if (checkpoint_cpu_time < 0) {
+ msg_printf(result->project, MSG_INFO,
+ "app reporting negative checkpoint CPU: %f", checkpoint_cpu_time
+ );
+ checkpoint_cpu_time = 0;
+ }
+ }
+ parse_double(msg_buf, "", wss_from_app);
parse_double(msg_buf, "", result->fpops_per_cpu_sec);
parse_double(msg_buf, "", result->fpops_cumulative);
parse_double(msg_buf, "", result->intops_per_cpu_sec);
@@ -1470,18 +1485,6 @@ bool ACTIVE_TASK::get_app_status_msg() {
if (parse_int(msg_buf, "", i)) {
sporadic_ac_state = (SPORADIC_AC_STATE)i;
}
- if (current_cpu_time < 0) {
- msg_printf(result->project, MSG_INFO,
- "app reporting negative CPU: %f", current_cpu_time
- );
- current_cpu_time = 0;
- }
- if (checkpoint_cpu_time < 0) {
- msg_printf(result->project, MSG_INFO,
- "app reporting negative checkpoint CPU: %f", checkpoint_cpu_time
- );
- checkpoint_cpu_time = 0;
- }
return true;
}
diff --git a/samples/docker_wrapper/docker_wrapper.cpp b/samples/docker_wrapper/docker_wrapper.cpp
index 70d9df0039..09458bc445 100644
--- a/samples/docker_wrapper/docker_wrapper.cpp
+++ b/samples/docker_wrapper/docker_wrapper.cpp
@@ -79,14 +79,16 @@ using std::string;
using std::vector;
#define POLL_PERIOD 1.0
+#define STATUS_PERIOD 10
+ // reports status this often
enum JOB_STATUS {JOB_IN_PROGRESS, JOB_SUCCESS, JOB_FAIL};
struct RSC_USAGE {
- double cpu_time;
+ double cpu_frac;
double wss;
void clear() {
- cpu_time = 0;
+ cpu_frac = 0;
wss = 0;
}
};
@@ -442,7 +444,10 @@ void poll_client_msgs() {
}
}
-JOB_STATUS poll_app(RSC_USAGE &ru) {
+// check whether job has exited
+// Note: on both Podman and Docker this takes significant CPU time
+// (like .03 sec) so do it infrequently (like 5 sec)
+JOB_STATUS poll_app() {
char cmd[1024];
vector out;
int retval;
@@ -461,6 +466,41 @@ JOB_STATUS poll_app(RSC_USAGE &ru) {
return JOB_FAIL;
}
+// get CPU and mem usage
+// This is also surprisingly slow
+int get_stats(RSC_USAGE &ru) {
+ char cmd[1024];
+ vector out;
+ int retval;
+ unsigned int n;
+
+ sprintf(cmd,
+ "%s stats --no-stream --format \"{{.CPUPerc}} {{.MemUsage}}\" %s",
+ cli_prog, container_name
+ );
+ retval = run_docker_command(cmd, out);
+ if (retval) return -1;
+ n = out.size();
+ if (n == 0) return -1;
+ const char *buf = out[n-1].c_str();
+ // output is like
+ // 0.00% 420KiB / 503.8GiB
+ double cpu_pct, mem;
+ char mem_unit;
+ n = sscanf(buf, "%lf%% %lf%c", &cpu_pct, &mem, &mem_unit);
+ if (n != 3) return -1;
+ switch (mem_unit) {
+ case 'G': mem *= GIGA; break;
+ case 'M': mem *= MEGA; break;
+ case 'K': mem *= KILO; break;
+ case 'B': break;
+ default: return -1;
+ }
+ ru.cpu_frac = cpu_pct/100.;
+ ru.wss = mem;
+ return 0;
+}
+
#ifdef _WIN32
// find a WSL distro with Docker and set up a command link to it
//
@@ -543,6 +583,14 @@ int main(int argc, char** argv) {
}
if (verbose) config.print();
+ if (sporadic) {
+ retval = boinc_sporadic_dir(".");
+ if (retval) {
+ fprintf(stderr, "can't create sporadic files\n");
+ boinc_finish(retval);
+ }
+ }
+
#ifdef _WIN32
retval = wsl_init();
if (retval) {
@@ -578,18 +626,35 @@ int main(int argc, char** argv) {
boinc_finish(1);
}
running = true;
- while (1) {
+ double cpu_time = 0;
+ for (int i=0; ; i++) {
poll_client_msgs();
- switch(poll_app(ru)) {
- case JOB_FAIL:
- cleanup();
- boinc_finish(1);
- break;
- case JOB_SUCCESS:
- copy_files_from_container();
- cleanup();
- boinc_finish(0);
- break;
+ if (i%STATUS_PERIOD == 0) {
+ switch(poll_app()) {
+ case JOB_FAIL:
+ cleanup();
+ boinc_finish(1);
+ break;
+ case JOB_SUCCESS:
+ copy_files_from_container();
+ cleanup();
+ boinc_finish(0);
+ break;
+ default:
+ break;
+ }
+ retval = get_stats(ru);
+ if (!retval) {
+ cpu_time += STATUS_PERIOD*ru.cpu_frac;
+ boinc_report_app_status_aux(
+ cpu_time,
+ 0, // checkpoint CPU time
+ 0, // frac done
+ 0, // other PID
+ 0,0, // bytes send/received
+ ru.wss
+ );
+ }
}
boinc_sleep(POLL_PERIOD);
}
diff --git a/samples/vboxwrapper/vbox_common.cpp b/samples/vboxwrapper/vbox_common.cpp
index 42664be020..cd42a816ef 100644
--- a/samples/vboxwrapper/vbox_common.cpp
+++ b/samples/vboxwrapper/vbox_common.cpp
@@ -316,7 +316,8 @@ void VBOX_VM::report_clean(
fraction_done,
vm_pid,
bytes_sent,
- bytes_received
+ bytes_received,
+ 0
);
}
diff --git a/samples/vboxwrapper/vboxwrapper.cpp b/samples/vboxwrapper/vboxwrapper.cpp
index 9f15ee68ae..75eec21aa6 100644
--- a/samples/vboxwrapper/vboxwrapper.cpp
+++ b/samples/vboxwrapper/vboxwrapper.cpp
@@ -889,7 +889,8 @@ int main(int argc, char** argv) {
fraction_done,
pVM->vm_pid,
bytes_sent,
- bytes_received
+ bytes_received,
+ 0
);
// Wait for up to 5 minutes for the VM to switch states.
@@ -1373,7 +1374,8 @@ int main(int argc, char** argv) {
fraction_done,
pVM->vm_pid,
bytes_sent,
- bytes_received
+ bytes_received,
+ 0
);
if (!retval) {