From f74c293dd2e6b80a8e6011628e71a00547a91611 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Sun, 12 Mar 2006 06:20:28 +0000 Subject: [PATCH] scheduler fix svn path=/trunk/boinc/; revision=9633 --- checkin_notes | 14 ++++++++++++++ sched/handle_request.C | 33 +++++++++++++++++++++++++++++++-- sched/transitioner.C | 2 +- 3 files changed, 46 insertions(+), 3 deletions(-) diff --git a/checkin_notes b/checkin_notes index 1f96ac41ec..56f6568adf 100755 --- a/checkin_notes +++ b/checkin_notes @@ -2773,3 +2773,17 @@ Rom 11 Mar 2006 api/ boinc_api.C, .h windows_opengl.C + +David 11 Mar 2006 + - Scheduler (see checkin of 9 Mar): When we use a host record + based on host CPID (presumably because user detached/reattached) + mark all in-progress results as over, client error. + This prevents them from being resent. + Avoid a situation where a result causes problems on a host, + user detaches/reattaches to clear things up, + and then the host gets sent the same result again. + (from Bruce Allen) + + sched/ + handle_request.C + transitioner.C diff --git a/sched/handle_request.C b/sched/handle_request.C index 46eff8a2e2..e6e0059018 100644 --- a/sched/handle_request.C +++ b/sched/handle_request.C @@ -108,7 +108,7 @@ void unlock_sched(SCHEDULER_REPLY& reply) { close(reply.lockfile_fd); } -// find the user's most recent host with given host CPID +// find the user's most recently-created host with given host CPID // bool find_host_by_cpid(DB_USER& user, char* host_cpid, DB_HOST& host) { char buf[256], buf2[256]; @@ -125,6 +125,33 @@ bool find_host_by_cpid(DB_USER& user, char* host_cpid, DB_HOST& host) { return false; } +// scan in-progress results for the given host, +// and mark them as done, client error +// +void mark_results_aborted(DB_HOST& host) { + char buf[256], buf2[256]; + DB_RESULT result; + sprintf(buf, "where hostid=%d and server_state=%d", + host.id, + RESULT_SERVER_STATE_IN_PROGRESS + ); + while (!result.enumerate(buf)) { + sprintf(buf2, + "server_state=%d, outcome=%d", + RESULT_SERVER_STATE_OVER, + RESULT_OUTCOME_CLIENT_ERROR + ); + result.update_field(buf2); + + // and trigger WU transition + // + DB_WORKUNIT wu; + wu.id = result.workunitid; + sprintf(buf2, "transition_time=%d", time(0)); + wu.update_field(buf2); + } +} + // Based on the info in the request message, // look up the host and its user, and make sure the authenticator matches. // Some special cases: @@ -262,10 +289,12 @@ lookup_user_and_make_new_host: // scan backwards through this user's hosts, // looking for one with the same host CPID. // If we find one, it means the user detached and reattached. - // Use the existing host record. + // Use the existing host record, + // and mark in-progress results as aborted. // if (strlen(sreq.host.host_cpid)) { if (find_host_by_cpid(user, sreq.host.host_cpid, host)) { + mark_results_aborted(host); goto got_host; } } diff --git a/sched/transitioner.C b/sched/transitioner.C index 7204be7e61..40cfd815d4 100644 --- a/sched/transitioner.C +++ b/sched/transitioner.C @@ -66,7 +66,7 @@ int result_suffix(char* name) { return 0; } -// The given result just timed out. +// A result just timed out. // Update the host's avg_turnaround and max_results_day. // int penalize_host(int hostid, double delay_bound) {