Redid 'fix' to scheduler bug from 3 December 2004. In fact the

fix was incomplete. In order for it to work as intended, I would
have also had to modify SCHED_RESULT_ITEM::parse to copy the
additional needed fields into the in-memory structure.  But this
is fragile.  The next time some additional fields are added to the
result table, they would have to be incorporated here as well, to
ensure that SET followed by UPDATE is the identity operation.  So
I did a more graceful and robust fix.  Simply set result.id=0 for
those results that have already been received or which for other
reasons should not be modified in the database, then skip these
when updating.

svn path=/trunk/boinc/; revision=4763
This commit is contained in:
Bruce Allen 2004-12-05 21:53:32 +00:00
parent 3a2cc0dc44
commit 3983504893
3 changed files with 72 additions and 24 deletions

View File

@ -20682,4 +20682,25 @@ Janus 5 Dec 2004
- Also made forum_threads.php output HTML 4.01 transitional valid
html/user/
forum_threads.php
forum_threads.php
Bruce 5 Dec 2004
- Redid 'fix' to scheduler bug from 3 December 2004. In fact the
fix was incomplete. In order for it to work as intended, I would
have also had to modify SCHED_RESULT_ITEM::parse to copy the
additional needed fields into the in-memory structure. But this
is fragile. The next time some additional fields are added to the
result table, they would have to be incorporated here as well, to
ensure that SET followed by UPDATE is the identity operation. So
I did a more graceful and robust fix. Simply set result.id=0 for
those results that have already been received or which for other
reasons should not be modified in the database, then skip these
when updating.
db/
boinc_db.C [reverted to 1.96 + comments]
sched/
handle_result.C

View File

@ -1,4 +1,4 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
static volatile const char *BOINCrcsid="$Id$";
// The contents of this file are subject to the BOINC Public License
// Version 1.0 (the "License"); you may not use this file except in
@ -1027,6 +1027,9 @@ int DB_WORK_ITEM::enumerate(int limit, bool random_order) {
return 0;
}
// The items that appear here must agree with those that appear in the
// enumerate method just below!
void SCHED_RESULT_ITEM::parse(MYSQL_ROW& r) {
int i=0;
clear();
@ -1063,17 +1066,7 @@ int DB_SCHED_RESULT_ITEM_SET::enumerate() {
" server_state, "
" hostid, "
" userid, "
" received_time, "
" outcome, "
" client_state, "
" exit_status, "
" cpu_time, "
" xml_doc_out, "
" stderr_out, "
" validate_state, "
" claimed_credit, "
" app_version_num, "
" teamid "
" received_time "
"FROM "
" result "
"WHERE "
@ -1144,8 +1137,7 @@ int DB_SCHED_RESULT_ITEM_SET::update_result(SCHED_RESULT_ITEM& ri) {
" stderr_out='%s', "
" xml_doc_out='%s', "
" validate_state=%d, "
" teamid=%d, "
" userid=%d "
" teamid=%d " // Seems not to be used in handle_request.C
"WHERE "
" id=%d",
ri.hostid,
@ -1161,7 +1153,6 @@ int DB_SCHED_RESULT_ITEM_SET::update_result(SCHED_RESULT_ITEM& ri) {
ri.xml_doc_out,
ri.validate_state,
ri.teamid,
ri.userid,
ri.id
);
retval = db->do_query(query);

View File

@ -1,3 +1,4 @@
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
static volatile const char *BOINCrcsid="$Id$";
// The contents of this file are subject to the BOINC Public License
// Version 1.0 (the "License"); you may not use this file except in
@ -323,15 +324,18 @@ int handle_results(
unsigned int i;
int retval;
RESULT* rp;
if (sreq.results.size() == 0) return 0;
// read all results the user is reporting
//
for (i=0; i<sreq.results.size(); i++) {
result_handler.add_result(sreq.results[i].name);
}
// read all results with the same name from the database into
// memory
//
retval = result_handler.enumerate();
if (retval) {
log_messages.printf(
@ -341,7 +345,13 @@ int handle_results(
);
}
// loop over all results that came back from the host
//
for (i=0; i<sreq.results.size(); i++) {
// a pointer to the current result from the host
//
rp = &sreq.results[i];
// acknowledge the result even if we couldn't find it --
@ -349,6 +359,17 @@ int handle_results(
//
reply.result_acks.push_back(*rp);
// get the result with the same name that came back from the
// database and point srip to it. Quantities that MUST be
// read from the DB are those where srip appears as an rval.
// These are: id, name, server_state, received_time, hostid.
// // Quantities that must be WRITTEN to the DB are those for
// which srip appears as an lval. These are:
// hostid,
// teamid, received_time, client_state, cpu_time, exit_status,
// app_version_num, claimed_credit, server_state, stderr_out,
// xml_doc_out, outcome, validate_state
retval = result_handler.lookup_result(rp->name, &srip);
if (retval) {
log_messages.printf(
@ -356,6 +377,7 @@ int handle_results(
"[HOST#%d] [RESULT#? %s] can't find result\n",
reply.host.id, rp->name
);
// no need to skip when updating DB, since it's not there!
continue;
}
@ -364,12 +386,19 @@ int handle_results(
reply.host.id, srip->id, srip->name
);
// Comment -- In the sanity checks that follow, should we
// verify that the results validate_state is consistent with
// this being a newly arrived result? What happens if a
// workunit was canceled after a result was sent? When it
// gets back in, do we want to leave the validate state 'as
// is'? Probably yes, which is as the code currently behaves.
if (srip->server_state == RESULT_SERVER_STATE_UNSENT) {
log_messages.printf(
SCHED_MSG_LOG::CRITICAL,
"[HOST#%d] [RESULT#%d %s] got unexpected result: server state is %d\n",
reply.host.id, srip->id, srip->name, srip->server_state
);
srip->id=0; // mark to skip when updating DB
continue;
}
@ -379,6 +408,7 @@ int handle_results(
"[HOST#%d] [RESULT#%d %s] got result twice\n",
reply.host.id, srip->id, srip->name
);
srip->id=0; // mark to skip when updating DB
continue;
}
@ -397,6 +427,7 @@ int handle_results(
"[RESULT#%d %s] Can't lookup [HOST#%d]\n",
srip->id, srip->name, srip->hostid
);
srip->id=0; // mark to skip when updating DB
continue;
} else if (result_host.userid != reply.host.userid) {
log_messages.printf(
@ -404,6 +435,7 @@ int handle_results(
"[USER#%d] [HOST#%d] [RESULT#%d %s] Not even the same user; expected [USER#%d]\n",
reply.host.userid, reply.host.id, srip->id, srip->name, result_host.userid
);
srip->id=0; // mark to skip when updating DB
continue;
} else {
log_messages.printf(
@ -412,9 +444,11 @@ int handle_results(
reply.host.id, srip->id, srip->name, reply.host.userid
);
}
}
} // hostids do not match
// update the result record in DB
// modify the result record in the in-memory copy obtained
// from the DB earlier. If we found a problem above, we have
// continued and skipped this modify
//
srip->hostid = reply.host.id;
srip->teamid = reply.user.teamid;
@ -423,7 +457,7 @@ int handle_results(
srip->cpu_time = rp->cpu_time;
srip->exit_status = rp->exit_status;
srip->app_version_num = rp->app_version_num;
srip->claimed_credit = srip->cpu_time * reply.host.credit_per_cpu_sec;
srip->claimed_credit = rp->cpu_time * reply.host.credit_per_cpu_sec;
#if 1
log_messages.printf(SCHED_MSG_LOG::DEBUG,
"cpu %f cpcs %f, cc %f\n", srip->cpu_time, reply.host.credit_per_cpu_sec, srip->claimed_credit
@ -454,10 +488,11 @@ int handle_results(
srip->outcome = RESULT_OUTCOME_CLIENT_ERROR;
srip->validate_state = VALIDATE_STATE_INVALID;
}
}
} // end of loop over all incoming results
// update all the results we have
// update all the results we have kept in memory, by storing to
// database.
//
if (config.use_transactions) {
retval = boinc_db.start_transaction();
@ -470,6 +505,7 @@ int handle_results(
}
for (i=0; i<result_handler.results.size(); i++) {
// skip items that we previously marked to skip!
if (result_handler.results[i].id > 0) {
retval = result_handler.update_result(result_handler.results[i]);
if (retval) {