From 753ae6e768af47f2584d6c7fb81e9a647507449f Mon Sep 17 00:00:00 2001 From: David Anderson Date: Wed, 17 Jun 2015 14:14:54 -0700 Subject: [PATCH] client: fix bug that caused delay in job cleanup If a job has an output file with and , and it doesn't create the file, then the call to boinc_rename() (to move it to the project dir) fails, and we back off and retry. Solution: in boinc_rename(), if the rename fails, check if the file exists, and if it doesn't then don't retry. Also: - when writing client messages, use the actual current time (dtime()) rather than client_state.now. - write log msgs when output file renames fail --- client/app_start.cpp | 30 +++++++++++++++++++----------- client/client_msgs.cpp | 7 ++++--- client/sandbox.cpp | 2 +- lib/filesys.cpp | 4 ++++ 4 files changed, 28 insertions(+), 15 deletions(-) diff --git a/client/app_start.cpp b/client/app_start.cpp index a3d888a071..42e0a464a5 100644 --- a/client/app_start.cpp +++ b/client/app_start.cpp @@ -478,21 +478,29 @@ int ACTIVE_TASK::copy_output_files() { ); sprintf(slotfile, "%s/%s", slot_dir, open_name); get_pathname(fip, projfile, sizeof(projfile)); -#if 1 - boinc_rename(slotfile, projfile); -#else int retval = boinc_rename(slotfile, projfile); - // this isn't a BOINC error. - // it just means the app didn't create an output file - // that it was supposed to. + // the rename fails if the output file isn't there. // if (retval) { - msg_printf(wup->project, MSG_INTERNAL_ERROR, - "Can't rename output file %s to %s: %s", - fip->name, projfile, boincerror(retval) - ); + if (retval == ERR_FILE_MISSING) { + if (log_flags.slot_debug) { + msg_printf(wup->project, MSG_INFO, + "[slot] output file %s missing, not copying", slotfile + ); + } + } else { + msg_printf(wup->project, MSG_INTERNAL_ERROR, + "Can't rename output file %s to %s: %s", + slotfile, projfile, boincerror(retval) + ); + } + } else { + if (log_flags.slot_debug) { + msg_printf(wup->project, MSG_INFO, + "[slot] renamed %s to %s", slotfile, projfile + ); + } } -#endif } return 0; } diff --git a/client/client_msgs.cpp b/client/client_msgs.cpp index 024898f470..55f75b255c 100644 --- a/client/client_msgs.cpp +++ b/client/client_msgs.cpp @@ -56,7 +56,8 @@ void show_message( ) { const char* x; char message[1024], event_msg[1024], evt_message[2048]; - char* time_string = time_to_string(gstate.now); + double t = dtime(); + char* time_string = time_to_string(t); // Cycle the log files if needed // @@ -84,7 +85,7 @@ void show_message( default: strlcpy(event_msg, message, sizeof(event_msg)); } - message_descs.insert(p, priority, (int)gstate.now, event_msg); + message_descs.insert(p, priority, (int)t, event_msg); // add a notice // @@ -105,7 +106,7 @@ void show_message( if (p) { safe_strcpy(n.project_name, p->get_project_name()); } - n.create_time = n.arrival_time = gstate.now; + n.create_time = n.arrival_time = t; safe_strcpy(n.category, (priority==MSG_USER_ALERT)?"client":"scheduler"); notices.append(n); } diff --git a/client/sandbox.cpp b/client/sandbox.cpp index 0b0d50f822..3165f6fac0 100644 --- a/client/sandbox.cpp +++ b/client/sandbox.cpp @@ -273,7 +273,7 @@ int delete_project_owned_file(const char* path, bool retry) { if (retval && retry) { if (log_flags.slot_debug) { msg_printf(0, MSG_INFO, - "delete of %s failed (%d); retrying", path, retval + "[slot] delete of %s failed (%d); retrying", path, retval ); } double start = dtime(); diff --git a/lib/filesys.cpp b/lib/filesys.cpp index 6bcd0a880a..833e72fb48 100644 --- a/lib/filesys.cpp +++ b/lib/filesys.cpp @@ -628,6 +628,10 @@ int boinc_rename(const char* old, const char* newf) { retval = boinc_rename_aux(old, newf); if (retval) { + // if the rename failed, and the file exists, + // retry a few times + // + if (!boinc_file_exists(old)) return ERR_FILE_MISSING; double start = dtime(); do { boinc_sleep(drand()*2); // avoid lockstep