diff --git a/checkin_notes b/checkin_notes index 2c31d9527b..88477097f6 100755 --- a/checkin_notes +++ b/checkin_notes @@ -26784,3 +26784,31 @@ David 6 April 2005 clientgui/ *.cpp + +David 6 April 2005 + - Attempt to fix the situation where: + 1) user merges hosts; hosts with lower IDs are folded + into host with maximal ID, then deleted + 2) If host's client_state.xml file still has one of the + lower IDs, then the next time it contacts the scheduler, + the host lookup fails and a new host record is created, + which defeats the purpose of the merge. + + Solution: + - When merge hosts, don't delete lower-ID records. + Instead, change them to "zombie" state, in which: + - userid is zero + - rpc_seqno is ID of new host record + - scheduler: if host is zombie, follow link to new host, + send back its ID to client + + db/ + boinc_db.h + html/ + inc/ + util.inc + user/ + host_edit_action.php + host_edit_form.php + sched/ + handle_request.C diff --git a/clientgui/MainDocument.cpp b/clientgui/MainDocument.cpp index 5111fb5dd4..3aa2e10e4d 100644 --- a/clientgui/MainDocument.cpp +++ b/clientgui/MainDocument.cpp @@ -1749,8 +1749,9 @@ wxInt32 CMainDocument::GetResourceProjectName(wxInt32 iIndex, wxString& strBuffe PROJECT* pStateProject = NULL; try { - if (!resource_status.projects.empty()) + if (!resource_status.projects.empty()) { pProject = resource_status.projects.at(iIndex); + } } catch (std::out_of_range e) { pProject = NULL; @@ -1760,17 +1761,17 @@ wxInt32 CMainDocument::GetResourceProjectName(wxInt32 iIndex, wxString& strBuffe pStateProject = state.lookup_project(pProject->master_url); if (NULL != pStateProject) { strBuffer = pStateProject->project_name.c_str(); - } - else + } else { ForceCacheUpdate(); + } } return 0; } -wxInt32 CMainDocument::GetResourceDiskspace(wxInt32 iIndex, float& fBuffer) -{ PROJECT* pProject = NULL; +wxInt32 CMainDocument::GetResourceDiskspace(wxInt32 iIndex, float& fBuffer) { + PROJECT* pProject = NULL; try { if (!resource_status.projects.empty()) diff --git a/db/boinc_db.h b/db/boinc_db.h index 06019d2612..9c1d8f242a 100755 --- a/db/boinc_db.h +++ b/db/boinc_db.h @@ -204,6 +204,9 @@ struct HOST { int id; int create_time; int userid; // ID of user running this host + // If the host is "zombied" during merging of duplicate hosts, + // this field is set to zero and rpc_seqno is used to + // store the ID of the new host (kludge, but what the heck) int rpc_seqno; // last seqno received from client int rpc_time; // time of last scheduler RPC double total_credit; diff --git a/html/inc/util.inc b/html/inc/util.inc index 51099a3c18..b44e65f6ac 100644 --- a/html/inc/util.inc +++ b/html/inc/util.inc @@ -220,13 +220,30 @@ function row3($x, $y, $z) { function row4($xx, $xy, $yx, $yy) { echo "$xx$xy" - . "$yx$yy\n"; + . "$yx$yy + "; } function rowify($string) { echo "$string"; } +function row($x) { + echo ""; + foreach ($x as $h) { + echo "$h"; + } + echo "\n"; +} + +function row_heading($x) { + echo ""; + foreach ($x as $h) { + echo "$h"; + } + echo "\n"; +} + function random_string() { return md5(uniqid(rand(), true)); } diff --git a/html/user/host_edit_action.php b/html/user/host_edit_action.php index 5a87f19bf3..e820efc887 100644 --- a/html/user/host_edit_action.php +++ b/html/user/host_edit_action.php @@ -18,34 +18,35 @@ function get_host($hostid, $user) { } function merge_hosts($old_host, $new_host) { - if ($old_host->id == $new_host->id) { - fail("same host"); - } - if (!hosts_compatible($old_host, $new_host)) { - fail("Can't merge hosts - they're incompatible"); - } + if ($old_host->id == $new_host->id) { + fail("same host"); + } + if (!hosts_compatible($old_host, $new_host)) { + fail("Can't merge hosts - they're incompatible"); + } - echo "
Merging $old_host->id into $new_host->id\n"; + echo "
Merging host $old_host->id into host $new_host->id\n"; - // update the database: - // - add credit from old to new host - // - change results to refer to new host - // - delete old host - // - $total_credit = $old_host->total_credit + $new_host->total_credit; - $recent_credit = $old_host->expavg_credit + $new_host->expavg_credit; - $result = mysql_query("update host set total_credit=$total_credit, expavg_credit=$recent_credit where id=$new_host->id"); - if (!$result) { - fail("Couldn't update credit of new host"); - } - $result = mysql_query("update result set hostid=$new_host->id where hostid=$old_host->id"); - if (!$result) { - fail("Couldn't update results"); - } - $result = mysql_query("delete from host where id=$old_host->id"); - if (!$result) { - fail("Couldn't delete record of computer"); - } + // update the database: + // - add credit from old to new host + // - change results to refer to new host + // - put old host in "zombie" state + // + $total_credit = $old_host->total_credit + $new_host->total_credit; + $recent_credit = $old_host->expavg_credit + $new_host->expavg_credit; + $result = mysql_query("update host set total_credit=$total_credit, expavg_credit=$recent_credit where id=$new_host->id"); + if (!$result) { + fail("Couldn't update credit of new computer"); + } + $result = mysql_query("update result set hostid=$new_host->id where hostid=$old_host->id"); + if (!$result) { + fail("Couldn't update results"); + } + $result = mysql_query("update host set total_credit=0, expavg_credit=0, userid=0, rpc_seqno=$new_host->id where id=$old_host->id"); + if (!$result) { + fail("Couldn't update old computer"); + } + echo "
Retired old computer $old_host->id\n"; } db_init(); diff --git a/html/user/host_edit_form.php b/html/user/host_edit_form.php index 651eae08f4..245038cf1f 100644 --- a/html/user/host_edit_form.php +++ b/html/user/host_edit_form.php @@ -22,12 +22,15 @@ echo "

- Check the computers that are the same as $host->domain_name (created $t): + Check the computers that are the same as $host->domain_name + (created $t, computer ID $host->id):

"; $result = mysql_query("select * from host where userid=$user->id"); $nhosts = 1; +start_table(); +row_heading(array("", "name", "created", "computer ID")); while ($host2 = mysql_fetch_object($result)) { if ($host->id == $host2->id) continue; //if ($host2->create_time > $host->create_time) continue; @@ -37,10 +40,16 @@ while ($host2 = mysql_fetch_object($result)) { if ($x == "") { $x = "[no hostname]"; } - echo "
id> $x (created $t)\n"; + row(array( + "id>", + $x, + "$t", + "$host2->id" + )); $nhosts++; if ($nhosts==500) break; } +end_table(); mysql_free_result($result); echo "
diff --git a/sched/handle_request.C b/sched/handle_request.C index 4d309d06ee..6f32fe2276 100644 --- a/sched/handle_request.C +++ b/sched/handle_request.C @@ -87,6 +87,19 @@ int authenticate_user(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) { if (sreq.hostid) { retval = host.lookup_id(sreq.hostid); + if (!retval && host.userid==0) { + // if host record is zombie, follow link to new host + // + retval = host.lookup_id(host.rpc_seqno); + if (!retval) { + reply.hostid = host.id; + log_messages.printf( + SCHED_MSG_LOG::NORMAL, + "[HOST#%d] forwarding to new host ID %d\n", + sreq.hostid, host.id + ); + } + } if (retval) { USER_MESSAGE um("Can't find host record", "low"); reply.insert_message(um); @@ -98,12 +111,13 @@ int authenticate_user(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) { sreq.hostid = 0; goto lookup_user_and_make_new_host; } + reply.host = host; log_messages.printf( - SCHED_MSG_LOG::DEBUG, - "Request [HOST#%d] Database [HOST#%d] Request [RPC#%d] Database [RPC#%d]\n", - sreq.hostid, host.id, sreq.rpc_seqno, host.rpc_seqno - ); + SCHED_MSG_LOG::DEBUG, + "Request [HOST#%d] Database [HOST#%d] Request [RPC#%d] Database [RPC#%d]\n", + sreq.hostid, host.id, sreq.rpc_seqno, host.rpc_seqno + ); strlcpy( user.authenticator, sreq.authenticator, @@ -443,10 +457,10 @@ int handle_results( // Comment -- In the sanity checks that follow, should we // verify that the results validate_state is consistent with // this being a newly arrived result? - // What happens if a workunit was canceled after a result was sent? - // When it gets back in, do we want to leave the validate state 'as is'? - // Probably yes, which is as the code currently behaves. - // + // What happens if a workunit was canceled after a result was sent? + // When it gets back in, do we want to leave the validate state 'as is'? + // Probably yes, which is as the code currently behaves. + // if (srip->server_state == RESULT_SERVER_STATE_UNSENT) { log_messages.printf( SCHED_MSG_LOG::CRITICAL, @@ -1081,13 +1095,13 @@ int delete_file_from_host(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply) { } // pick a data file to delete. - // Do this deterministically so that we always tell host to delete the same file. - // But to prevent all hosts from removing 'the same' file, - // choose a file which depends upon the hostid. + // Do this deterministically so that we always tell host to delete the same file. + // But to prevent all hosts from removing 'the same' file, + // choose a file which depends upon the hostid. // // Assumption is that if nothing has changed on the host, - // the order in which it reports files is fixed. - // If this is false, we need to sort files into order by name! + // the order in which it reports files is fixed. + // If this is false, we need to sort files into order by name! // int j = sreply.host.id % nfiles; FILE_INFO& fi = sreq.file_infos[j]; @@ -1098,7 +1112,7 @@ int delete_file_from_host(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply) { ); // give host 4 hours to nuke the file and come back. - // This might in general be too soon, since host needs to complete any work + // This might in general be too soon, since host needs to complete any work // that depends upon this file, before it will be removed by core client. // sprintf(buf, "Removing file %s to free up disk space", fi.name); @@ -1203,10 +1217,10 @@ void handle_request( if (sreply.user.id==3) { USER_MESSAGE um("THIS IS A SHORT MESSAGE. \n AND ANOTHER", "high"); // USER_MESSAGE um("THIS IS A VERY LONG TEST MESSAGE. THIS IS A VERY LONG TEST MESSAGE. \n" - // "THIS IS A VERY LONG TEST MESSAGE. THIS IS A VERY LONG TEST MESSAGE.", "low"); + // "THIS IS A VERY LONG TEST MESSAGE. THIS IS A VERY LONG TEST MESSAGE.", "low"); sreply.insert_message(um); // USER_MESSAGE um2("THIS IS A VERY LONG TEST MESSAGE2. THIS IS A VERY LONG TEST MESSAGE. \n" - // "THIS IS A VERY LONG TEST MESSAGE. THIS IS A VERY LONG TEST MESSAGE.", "high"); + // "THIS IS A VERY LONG TEST MESSAGE. THIS IS A VERY LONG TEST MESSAGE.", "high"); // sreply.insert_message(um2); } #endif @@ -1239,7 +1253,7 @@ void handle_request( debug_sched(sreq, sreply, "../debug_sched"); } else if (max_allowable_disk(sreq)<0 || (sreply.wreq.insufficient_disk || sreply.wreq.disk_available<0)) { debug_sched(sreq, sreply, "../debug_sched"); - } + } #endif sreply.write(fout);