scheduler fix

svn path=/trunk/boinc/; revision=9633
This commit is contained in:
David Anderson 2006-03-12 06:20:28 +00:00
parent 0137d9f38d
commit f74c293dd2
3 changed files with 46 additions and 3 deletions

View File

@ -2773,3 +2773,17 @@ Rom 11 Mar 2006
api/
boinc_api.C, .h
windows_opengl.C
David 11 Mar 2006
- Scheduler (see checkin of 9 Mar): When we use a host record
based on host CPID (presumably because user detached/reattached)
mark all in-progress results as over, client error.
This prevents them from being resent.
Avoid a situation where a result causes problems on a host,
user detaches/reattaches to clear things up,
and then the host gets sent the same result again.
(from Bruce Allen)
sched/
handle_request.C
transitioner.C

View File

@ -108,7 +108,7 @@ void unlock_sched(SCHEDULER_REPLY& reply) {
close(reply.lockfile_fd);
}
// find the user's most recent host with given host CPID
// find the user's most recently-created host with given host CPID
//
bool find_host_by_cpid(DB_USER& user, char* host_cpid, DB_HOST& host) {
char buf[256], buf2[256];
@ -125,6 +125,33 @@ bool find_host_by_cpid(DB_USER& user, char* host_cpid, DB_HOST& host) {
return false;
}
// scan in-progress results for the given host,
// and mark them as done, client error
//
void mark_results_aborted(DB_HOST& host) {
char buf[256], buf2[256];
DB_RESULT result;
sprintf(buf, "where hostid=%d and server_state=%d",
host.id,
RESULT_SERVER_STATE_IN_PROGRESS
);
while (!result.enumerate(buf)) {
sprintf(buf2,
"server_state=%d, outcome=%d",
RESULT_SERVER_STATE_OVER,
RESULT_OUTCOME_CLIENT_ERROR
);
result.update_field(buf2);
// and trigger WU transition
//
DB_WORKUNIT wu;
wu.id = result.workunitid;
sprintf(buf2, "transition_time=%d", time(0));
wu.update_field(buf2);
}
}
// Based on the info in the request message,
// look up the host and its user, and make sure the authenticator matches.
// Some special cases:
@ -262,10 +289,12 @@ lookup_user_and_make_new_host:
// scan backwards through this user's hosts,
// looking for one with the same host CPID.
// If we find one, it means the user detached and reattached.
// Use the existing host record.
// Use the existing host record,
// and mark in-progress results as aborted.
//
if (strlen(sreq.host.host_cpid)) {
if (find_host_by_cpid(user, sreq.host.host_cpid, host)) {
mark_results_aborted(host);
goto got_host;
}
}

View File

@ -66,7 +66,7 @@ int result_suffix(char* name) {
return 0;
}
// The given result just timed out.
// A result just timed out.
// Update the host's avg_turnaround and max_results_day.
//
int penalize_host(int hostid, double delay_bound) {