mirror of https://github.com/BOINC/boinc.git
client: fix bug that caused delay in job cleanup
If a job has an output file with <copy_file> and <optional>, and it doesn't create the file, then the call to boinc_rename() (to move it to the project dir) fails, and we back off and retry. Solution: in boinc_rename(), if the rename fails, check if the file exists, and if it doesn't then don't retry. Also: - when writing client messages, use the actual current time (dtime()) rather than client_state.now. - write log msgs when output file renames fail
This commit is contained in:
parent
35d9b15ed8
commit
753ae6e768
|
@ -478,21 +478,29 @@ int ACTIVE_TASK::copy_output_files() {
|
|||
);
|
||||
sprintf(slotfile, "%s/%s", slot_dir, open_name);
|
||||
get_pathname(fip, projfile, sizeof(projfile));
|
||||
#if 1
|
||||
boinc_rename(slotfile, projfile);
|
||||
#else
|
||||
int retval = boinc_rename(slotfile, projfile);
|
||||
// this isn't a BOINC error.
|
||||
// it just means the app didn't create an output file
|
||||
// that it was supposed to.
|
||||
// the rename fails if the output file isn't there.
|
||||
//
|
||||
if (retval) {
|
||||
msg_printf(wup->project, MSG_INTERNAL_ERROR,
|
||||
"Can't rename output file %s to %s: %s",
|
||||
fip->name, projfile, boincerror(retval)
|
||||
);
|
||||
if (retval == ERR_FILE_MISSING) {
|
||||
if (log_flags.slot_debug) {
|
||||
msg_printf(wup->project, MSG_INFO,
|
||||
"[slot] output file %s missing, not copying", slotfile
|
||||
);
|
||||
}
|
||||
} else {
|
||||
msg_printf(wup->project, MSG_INTERNAL_ERROR,
|
||||
"Can't rename output file %s to %s: %s",
|
||||
slotfile, projfile, boincerror(retval)
|
||||
);
|
||||
}
|
||||
} else {
|
||||
if (log_flags.slot_debug) {
|
||||
msg_printf(wup->project, MSG_INFO,
|
||||
"[slot] renamed %s to %s", slotfile, projfile
|
||||
);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -56,7 +56,8 @@ void show_message(
|
|||
) {
|
||||
const char* x;
|
||||
char message[1024], event_msg[1024], evt_message[2048];
|
||||
char* time_string = time_to_string(gstate.now);
|
||||
double t = dtime();
|
||||
char* time_string = time_to_string(t);
|
||||
|
||||
// Cycle the log files if needed
|
||||
//
|
||||
|
@ -84,7 +85,7 @@ void show_message(
|
|||
default:
|
||||
strlcpy(event_msg, message, sizeof(event_msg));
|
||||
}
|
||||
message_descs.insert(p, priority, (int)gstate.now, event_msg);
|
||||
message_descs.insert(p, priority, (int)t, event_msg);
|
||||
|
||||
// add a notice
|
||||
//
|
||||
|
@ -105,7 +106,7 @@ void show_message(
|
|||
if (p) {
|
||||
safe_strcpy(n.project_name, p->get_project_name());
|
||||
}
|
||||
n.create_time = n.arrival_time = gstate.now;
|
||||
n.create_time = n.arrival_time = t;
|
||||
safe_strcpy(n.category, (priority==MSG_USER_ALERT)?"client":"scheduler");
|
||||
notices.append(n);
|
||||
}
|
||||
|
|
|
@ -273,7 +273,7 @@ int delete_project_owned_file(const char* path, bool retry) {
|
|||
if (retval && retry) {
|
||||
if (log_flags.slot_debug) {
|
||||
msg_printf(0, MSG_INFO,
|
||||
"delete of %s failed (%d); retrying", path, retval
|
||||
"[slot] delete of %s failed (%d); retrying", path, retval
|
||||
);
|
||||
}
|
||||
double start = dtime();
|
||||
|
|
|
@ -628,6 +628,10 @@ int boinc_rename(const char* old, const char* newf) {
|
|||
|
||||
retval = boinc_rename_aux(old, newf);
|
||||
if (retval) {
|
||||
// if the rename failed, and the file exists,
|
||||
// retry a few times
|
||||
//
|
||||
if (!boinc_file_exists(old)) return ERR_FILE_MISSING;
|
||||
double start = dtime();
|
||||
do {
|
||||
boinc_sleep(drand()*2); // avoid lockstep
|
||||
|
|
Loading…
Reference in New Issue