From 3321837b01844b3d1c876fc1586f9a8fd2804a37 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Fri, 25 Jan 2013 22:43:57 -0800 Subject: [PATCH] - wrapper: fix CPU time accounting on Unix --- samples/condor/curl.cpp | 15 ++++++++------ samples/wrapper/job.xml | 13 ++++++------- samples/wrapper/wrapper.cpp | 39 ++++++++++++++++++++++++++++++++++--- 3 files changed, 51 insertions(+), 16 deletions(-) diff --git a/samples/condor/curl.cpp b/samples/condor/curl.cpp index a0635cce1f..ea858ff52a 100644 --- a/samples/condor/curl.cpp +++ b/samples/condor/curl.cpp @@ -174,7 +174,9 @@ int upload_files ( int create_batch( const char* project_url, const char* authenticator, - SUBMIT_REQ& sr + const char* batch_name, + const char* app_name, + int& batch_id ) { char request[1024]; char url[1024]; @@ -187,8 +189,8 @@ int create_batch( " \n" "\n", authenticator, - sr.batch_name, - sr.app_name + batch_name, + app_name ); sprintf(url, "%ssubmit_rpc_handler.php", project_url); FILE* reply = tmpfile(); @@ -199,13 +201,14 @@ int create_batch( return retval; } char buf[256]; - sr.batch_id = 0; + batch_id = 0; fseek(reply, 0, SEEK_SET); while (fgets(buf, 256, reply)) { - if (parse_int(buf, "", sr.batch_id)) break; + printf("reply: %s", buf); + if (parse_int(buf, "", batch_id)) break; } fclose(reply); - if (sr.batch_id == 0) { + if (batch_id == 0) { return -1; } return 0; diff --git a/samples/wrapper/job.xml b/samples/wrapper/job.xml index fcac9f18c1..4bc0cdfa24 100644 --- a/samples/wrapper/job.xml +++ b/samples/wrapper/job.xml @@ -5,11 +5,10 @@ stdout 10 - - in.zip - - - foo.zip - out - + + worker + stdin + stdout + 10 + diff --git a/samples/wrapper/wrapper.cpp b/samples/wrapper/wrapper.cpp index fad7c29d6e..81644f06be 100644 --- a/samples/wrapper/wrapper.cpp +++ b/samples/wrapper/wrapper.cpp @@ -66,11 +66,14 @@ #include "regexp.h" +//#define DEBUG +#if 1 +#define debug_msg(x) +#else inline void debug_msg(const char* x) { -#if 0 fprintf(stderr, "%s\n", x); -#endif } +#endif #define JOB_FILENAME "job.xml" #define CHECKPOINT_FILENAME "wrapper_checkpoint.txt" @@ -116,10 +119,11 @@ struct TASK { HANDLE pid_handle; DWORD pid; HANDLE thread_handle; - struct _stat last_stat; // mod time of checkpoint file + struct _stat last_stat; // mod time of checkpoint file #else int pid; struct stat last_stat; + double start_rusage; // getrusage() CPU time at start of task #endif bool stat_first; @@ -699,6 +703,10 @@ int TASK::run(int argct, char** argvt) { FILE* stdin_file; FILE* stderr_file; + struct rusage ru; + getrusage(RUSAGE_CHILDREN, &ru); + start_rusage = (float)ru.ru_utime.tv_sec + ((float)ru.ru_utime.tv_usec)/1e+6; + pid = fork(); if (pid == -1) { perror("fork(): "); @@ -790,6 +798,12 @@ bool TASK::poll(int& status) { if (wpid) { getrusage(RUSAGE_CHILDREN, &ru); final_cpu_time = (float)ru.ru_utime.tv_sec + ((float)ru.ru_utime.tv_usec)/1e+6; + final_cpu_time -= start_rusage; +#ifdef DEBUG + printf("process exited; current CPU %f final CPU %f\n", + current_cpu_time, final_cpu_time + ); +#endif if (final_cpu_time < current_cpu_time) { final_cpu_time = current_cpu_time; } @@ -1005,6 +1019,13 @@ int main(int argc, char** argv) { if (counter%10 == 0) { cpu_time = task.cpu_time(); } +#ifdef DEBUG + printf("cpu time %f, checkpoint CPU time %f frac done %f\n", + task.starting_cpu + cpu_time, + checkpoint_cpu_time, + frac_done + delta + ); +#endif boinc_report_app_status( task.starting_cpu + cpu_time, checkpoint_cpu_time, @@ -1019,6 +1040,18 @@ int main(int argc, char** argv) { counter++; } checkpoint_cpu_time = task.starting_cpu + task.final_cpu_time; +#ifdef DEBUG + printf("cpu time %f, checkpoint CPU time %f frac done %f\n", + task.starting_cpu + task.final_cpu_time, + checkpoint_cpu_time, + frac_done + task.weight/total_weight + ); +#endif + boinc_report_app_status( + task.starting_cpu + task.final_cpu_time, + checkpoint_cpu_time, + frac_done + task.weight/total_weight + ); write_checkpoint(i+1, checkpoint_cpu_time); weight_completed += task.weight; }