From 398350489357f1bc5922fccba548fa2298f50a6f Mon Sep 17 00:00:00 2001 From: Bruce Allen Date: Sun, 5 Dec 2004 21:53:32 +0000 Subject: [PATCH] Redid 'fix' to scheduler bug from 3 December 2004. In fact the fix was incomplete. In order for it to work as intended, I would have also had to modify SCHED_RESULT_ITEM::parse to copy the additional needed fields into the in-memory structure. But this is fragile. The next time some additional fields are added to the result table, they would have to be incorporated here as well, to ensure that SET followed by UPDATE is the identity operation. So I did a more graceful and robust fix. Simply set result.id=0 for those results that have already been received or which for other reasons should not be modified in the database, then skip these when updating. svn path=/trunk/boinc/; revision=4763 --- checkin_notes | 23 ++++++++++++++++++- db/boinc_db.C | 21 +++++------------ sched/handle_request.C | 52 +++++++++++++++++++++++++++++++++++------- 3 files changed, 72 insertions(+), 24 deletions(-) diff --git a/checkin_notes b/checkin_notes index 0c8a334ebe..ae4e1d293e 100755 --- a/checkin_notes +++ b/checkin_notes @@ -20682,4 +20682,25 @@ Janus 5 Dec 2004 - Also made forum_threads.php output HTML 4.01 transitional valid html/user/ - forum_threads.php \ No newline at end of file + forum_threads.php + +Bruce 5 Dec 2004 + + - Redid 'fix' to scheduler bug from 3 December 2004. In fact the + fix was incomplete. In order for it to work as intended, I would + have also had to modify SCHED_RESULT_ITEM::parse to copy the + additional needed fields into the in-memory structure. But this + is fragile. The next time some additional fields are added to the + result table, they would have to be incorporated here as well, to + ensure that SET followed by UPDATE is the identity operation. So + I did a more graceful and robust fix. Simply set result.id=0 for + those results that have already been received or which for other + reasons should not be modified in the database, then skip these + when updating. + + db/ + boinc_db.C [reverted to 1.96 + comments] + sched/ + handle_result.C + + diff --git a/db/boinc_db.C b/db/boinc_db.C index 264504ffb2..baeba589d3 100644 --- a/db/boinc_db.C +++ b/db/boinc_db.C @@ -1,4 +1,4 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ static volatile const char *BOINCrcsid="$Id$"; // The contents of this file are subject to the BOINC Public License // Version 1.0 (the "License"); you may not use this file except in @@ -1027,6 +1027,9 @@ int DB_WORK_ITEM::enumerate(int limit, bool random_order) { return 0; } + +// The items that appear here must agree with those that appear in the +// enumerate method just below! void SCHED_RESULT_ITEM::parse(MYSQL_ROW& r) { int i=0; clear(); @@ -1063,17 +1066,7 @@ int DB_SCHED_RESULT_ITEM_SET::enumerate() { " server_state, " " hostid, " " userid, " - " received_time, " - " outcome, " - " client_state, " - " exit_status, " - " cpu_time, " - " xml_doc_out, " - " stderr_out, " - " validate_state, " - " claimed_credit, " - " app_version_num, " - " teamid " + " received_time " "FROM " " result " "WHERE " @@ -1144,8 +1137,7 @@ int DB_SCHED_RESULT_ITEM_SET::update_result(SCHED_RESULT_ITEM& ri) { " stderr_out='%s', " " xml_doc_out='%s', " " validate_state=%d, " - " teamid=%d, " - " userid=%d " + " teamid=%d " // Seems not to be used in handle_request.C "WHERE " " id=%d", ri.hostid, @@ -1161,7 +1153,6 @@ int DB_SCHED_RESULT_ITEM_SET::update_result(SCHED_RESULT_ITEM& ri) { ri.xml_doc_out, ri.validate_state, ri.teamid, - ri.userid, ri.id ); retval = db->do_query(query); diff --git a/sched/handle_request.C b/sched/handle_request.C index 7978b18d4e..b858a60729 100644 --- a/sched/handle_request.C +++ b/sched/handle_request.C @@ -1,3 +1,4 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ static volatile const char *BOINCrcsid="$Id$"; // The contents of this file are subject to the BOINC Public License // Version 1.0 (the "License"); you may not use this file except in @@ -323,15 +324,18 @@ int handle_results( unsigned int i; int retval; RESULT* rp; - + if (sreq.results.size() == 0) return 0; - + // read all results the user is reporting // for (i=0; iname, &srip); if (retval) { log_messages.printf( @@ -356,6 +377,7 @@ int handle_results( "[HOST#%d] [RESULT#? %s] can't find result\n", reply.host.id, rp->name ); + // no need to skip when updating DB, since it's not there! continue; } @@ -364,12 +386,19 @@ int handle_results( reply.host.id, srip->id, srip->name ); + // Comment -- In the sanity checks that follow, should we + // verify that the results validate_state is consistent with + // this being a newly arrived result? What happens if a + // workunit was canceled after a result was sent? When it + // gets back in, do we want to leave the validate state 'as + // is'? Probably yes, which is as the code currently behaves. if (srip->server_state == RESULT_SERVER_STATE_UNSENT) { log_messages.printf( SCHED_MSG_LOG::CRITICAL, "[HOST#%d] [RESULT#%d %s] got unexpected result: server state is %d\n", reply.host.id, srip->id, srip->name, srip->server_state ); + srip->id=0; // mark to skip when updating DB continue; } @@ -379,6 +408,7 @@ int handle_results( "[HOST#%d] [RESULT#%d %s] got result twice\n", reply.host.id, srip->id, srip->name ); + srip->id=0; // mark to skip when updating DB continue; } @@ -397,6 +427,7 @@ int handle_results( "[RESULT#%d %s] Can't lookup [HOST#%d]\n", srip->id, srip->name, srip->hostid ); + srip->id=0; // mark to skip when updating DB continue; } else if (result_host.userid != reply.host.userid) { log_messages.printf( @@ -404,6 +435,7 @@ int handle_results( "[USER#%d] [HOST#%d] [RESULT#%d %s] Not even the same user; expected [USER#%d]\n", reply.host.userid, reply.host.id, srip->id, srip->name, result_host.userid ); + srip->id=0; // mark to skip when updating DB continue; } else { log_messages.printf( @@ -412,9 +444,11 @@ int handle_results( reply.host.id, srip->id, srip->name, reply.host.userid ); } - } + } // hostids do not match - // update the result record in DB + // modify the result record in the in-memory copy obtained + // from the DB earlier. If we found a problem above, we have + // continued and skipped this modify // srip->hostid = reply.host.id; srip->teamid = reply.user.teamid; @@ -423,7 +457,7 @@ int handle_results( srip->cpu_time = rp->cpu_time; srip->exit_status = rp->exit_status; srip->app_version_num = rp->app_version_num; - srip->claimed_credit = srip->cpu_time * reply.host.credit_per_cpu_sec; + srip->claimed_credit = rp->cpu_time * reply.host.credit_per_cpu_sec; #if 1 log_messages.printf(SCHED_MSG_LOG::DEBUG, "cpu %f cpcs %f, cc %f\n", srip->cpu_time, reply.host.credit_per_cpu_sec, srip->claimed_credit @@ -454,10 +488,11 @@ int handle_results( srip->outcome = RESULT_OUTCOME_CLIENT_ERROR; srip->validate_state = VALIDATE_STATE_INVALID; } - } + } // end of loop over all incoming results - // update all the results we have + // update all the results we have kept in memory, by storing to + // database. // if (config.use_transactions) { retval = boinc_db.start_transaction(); @@ -470,6 +505,7 @@ int handle_results( } for (i=0; i 0) { retval = result_handler.update_result(result_handler.results[i]); if (retval) {