From 3f853a6beb31acee92bb9bf74b5113ab26a41e34 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Tue, 23 Sep 2003 23:19:41 +0000 Subject: [PATCH] *** empty log message *** svn path=/trunk/boinc/; revision=2353 --- checkin_notes | 18 ++++ client/Makefile.am | 1 + client/check_state.C | 169 +++++++++++++++++++++++++++++++ client/client_state.C | 229 ++++++++---------------------------------- client/client_types.C | 12 +-- client/client_types.h | 12 ++- client/cs_scheduler.C | 16 ++- doc/docutil.php | 3 + doc/index.html | 3 + 9 files changed, 263 insertions(+), 200 deletions(-) create mode 100644 client/check_state.C diff --git a/checkin_notes b/checkin_notes index cba468cbbe..e6e52a992f 100755 --- a/checkin_notes +++ b/checkin_notes @@ -6341,3 +6341,21 @@ Eric K 9/22/03 db/db_base.C +David Sept 23 2003 + - Don't reference-count WUs for results that are ready to report + (since don't need their input files anymore) + NOTE: this means that RESULT.wup may be zero. + Much check before dereferencing. + - Changed "ready_to_ack" to "ready_to_report" + (Ack is server->client, not client->server) + - Changed "server_ack" to "got_server_ack" + - Moved CLIENT_STATE integrity-check code to a new file (check_state.C) + In general it would be nice to move stuff out of client_state.C; + It's a hodge-podge currently + + client/ + Makefile.am + client_state.h + client_types.C,h + cs_scheduler.C + check_state.C (new) diff --git a/client/Makefile.am b/client/Makefile.am index 42b136cf57..84257976f0 100644 --- a/client/Makefile.am +++ b/client/Makefile.am @@ -13,6 +13,7 @@ boinc_client_SOURCES = \ main.C \ account.C \ app.C \ + check_state.C \ client_state.C \ client_types.C \ cs_apps.C \ diff --git a/client/check_state.C b/client/check_state.C new file mode 100644 index 0000000000..a883e180e8 --- /dev/null +++ b/client/check_state.C @@ -0,0 +1,169 @@ +// The contents of this file are subject to the BOINC Public License +// Version 1.0 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://boinc.berkeley.edu/license_1.0.txt +// +// Software distributed under the License is distributed on an "AS IS" +// basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +// License for the specific language governing rights and limitations +// under the License. +// +// The Original Code is the Berkeley Open Infrastructure for Network Computing. +// +// The Initial Developer of the Original Code is the SETI@home project. +// Portions created by the SETI@home project are Copyright (C) 2002 +// University of California at Berkeley. All Rights Reserved. +// +// Contributor(s): +// + +// Functions to check the integrity of core client data structures. +// Not currently used, but might be handy if *0 type crashes occur + +#include "windows_cpp.h" + +#include "client_state.h" + +void CLIENT_STATE::check_project_pointer(PROJECT* p) { + unsigned int i; + for (i=0; ipers_file_xfers.size(); i++) { + if (p == pers_file_xfers->pers_file_xfers[i]) return; + } + assert(0); +} +void CLIENT_STATE::check_file_xfer_pointer(FILE_XFER* p) { + unsigned int i; + for (i=0; ifile_xfers.size(); i++) { + if (p == file_xfers->file_xfers[i]) return; + } + assert(0); +} + +void CLIENT_STATE::check_app(APP& p) { + check_project_pointer(p.project); +} + +void CLIENT_STATE::check_file_info(FILE_INFO& p) { + if (p.pers_file_xfer) check_pers_file_xfer_pointer(p.pers_file_xfer); + if (p.result) check_result_pointer(p.result); + check_project_pointer(p.project); +} + +void CLIENT_STATE::check_file_ref(FILE_REF& p) { + check_file_info_pointer(p.file_info); +} + +void CLIENT_STATE::check_app_version(APP_VERSION& p) { + unsigned int i; + check_app_pointer(p.app); + check_project_pointer(p.project); + for (i=0; ipers_file_xfers.size(); i++) { + check_pers_file_xfer(*pers_file_xfers->pers_file_xfers[i]); + } + for (i=0; ifile_xfers.size(); i++) { + check_file_xfer(*file_xfers->file_xfers[i]); + } +} diff --git a/client/client_state.C b/client/client_state.C index 32284e585b..a1c6f7b668 100644 --- a/client/client_state.C +++ b/client/client_state.C @@ -1228,6 +1228,7 @@ bool CLIENT_STATE::garbage_collect() { ScopeMessages scope_messages(log_messages, ClientMessages::DEBUG_STATE); // zero references counts on WUs, FILE_INFOs and APP_VERSIONs + for (i=0; iref_cnt = 0; @@ -1241,58 +1242,59 @@ bool CLIENT_STATE::garbage_collect() { avp->ref_cnt = 0; } - // delete RESULTs that have been finished and reported; - // reference-count files referred to by other results + // Scan through RESULTs. + // delete RESULTs that have been reported and acked. + // Check for results whose WUs had download failures + // Check for resultw that had upload failures + // Reference-count output files + // Reference-count WUs referred to by results in progress // result_iter = results.begin(); while (result_iter != results.end()) { rp = *result_iter; - if (rp->server_ack) { + if (rp->got_server_ack) { scope_messages.printf("CLIENT_STATE::garbage_collect(): deleting result %s\n", rp->name); delete rp; result_iter = results.erase(result_iter); action = true; - } else { - // See if the files for this result's workunit had - // any errors (MD5, RSA, etc) + continue; + } + // See if the files for this result's workunit had + // any errors (download failure, MD5, RSA, etc) + // and we don't already have an error for this file + // + if (!rp->ready_to_report && rp->wup && rp->wup->had_failure(failnum)) { + rp->wup->get_file_errors(error_msgs); + report_result_error(*rp, 0, error_msgs.c_str()); + } + for (i=0; ioutput_files.size(); i++) { + // If one of the output files had an upload failure, + // mark the result as done and report the error. + // The result, workunits, and file infos + // will be cleaned up after the server is notified // - if (rp->wup->had_failure(failnum)) { - // If we don't already have an error for this file - if (!rp->ready_to_ack) { - // the wu corresponding to this result - // had an error downloading some input file(s). + if (rp->output_files[i].file_info->had_failure(failnum)) { + if (!rp->ready_to_report) { + // had an error uploading a file for this result // - rp->wup->get_file_errors(error_msgs); - report_result_error(*rp, 0, error_msgs.c_str()); - } - } - rp->wup->ref_cnt++; - for (i=0; ioutput_files.size(); i++) { - // If one of the file infos had a failure, - // mark the result as done and report the error. - // The result, workunits, and file infos - // will be cleaned up after the server is notified - // - if(rp->output_files[i].file_info->had_failure(failnum)) { - if (!rp->ready_to_ack) { - // had an error uploading a file for this result - // - switch(failnum) { - case ERR_FILE_TOO_BIG: - report_result_error(*rp, 0, "Output file exceeded size limit"); - break; - default: - report_result_error(*rp, 0, "Couldn't upload files or other output file error"); - } + switch(failnum) { + case ERR_FILE_TOO_BIG: + report_result_error(*rp, 0, "Output file exceeded size limit"); + break; + default: + report_result_error(*rp, 0, "Couldn't upload files or other output file error"); } } - rp->output_files[i].file_info->ref_cnt++; } - result_iter++; + rp->output_files[i].file_info->ref_cnt++; } + if (!rp->ready_to_report && rp->wup) { + rp->wup->ref_cnt++; + } + result_iter++; } - // delete WORKUNITs not referenced by any result; + // delete WORKUNITs not referenced by any in-progress result; // reference-count files and APP_VERSIONs referred to by other WUs // wu_iter = workunits.begin(); @@ -1391,7 +1393,7 @@ bool CLIENT_STATE::update_results() { // server. It will be deleted on the next // garbage collection, which we trigger by // setting action to true - if (rp->server_ack) + if (rp->got_server_ack) action = true; switch (rp->state) { @@ -1419,8 +1421,9 @@ bool CLIENT_STATE::update_results() { case RESULT_FILES_UPLOADING: // Once the computation has been done, check that the necessary // files have been uploaded before moving on + // if (rp->is_upload_done()) { - rp->ready_to_ack = true; + rp->ready_to_report = true; rp->state = RESULT_FILES_UPLOADED; action = true; } @@ -1624,11 +1627,11 @@ int CLIENT_STATE::report_result_error( // only do this once per result // - if (res.ready_to_ack) { + if (res.ready_to_report) { return 0; } - res.ready_to_ack = true; + res.ready_to_report = true; sprintf(buf, "Unrecoverable error for result %s (%s)", res.name, err_msg); scheduler_op->backoff(res.project, buf); @@ -1732,7 +1735,7 @@ int CLIENT_STATE::reset_project(PROJECT* project) { for (i=0; iproject == project) { - rp->server_ack = true; + rp->got_server_ack = true; } } @@ -1798,147 +1801,3 @@ int CLIENT_STATE::detach_project(PROJECT* project) { return 0; } - -void CLIENT_STATE::check_project_pointer(PROJECT* p) { - unsigned int i; - for (i=0; ipers_file_xfers.size(); i++) { - if (p == pers_file_xfers->pers_file_xfers[i]) return; - } - assert(0); -} -void CLIENT_STATE::check_file_xfer_pointer(FILE_XFER* p) { - unsigned int i; - for (i=0; ifile_xfers.size(); i++) { - if (p == file_xfers->file_xfers[i]) return; - } - assert(0); -} - -void CLIENT_STATE::check_app(APP& p) { - check_project_pointer(p.project); -} - -void CLIENT_STATE::check_file_info(FILE_INFO& p) { - if (p.pers_file_xfer) check_pers_file_xfer_pointer(p.pers_file_xfer); - if (p.result) check_result_pointer(p.result); - check_project_pointer(p.project); -} - -void CLIENT_STATE::check_file_ref(FILE_REF& p) { - check_file_info_pointer(p.file_info); -} - -void CLIENT_STATE::check_app_version(APP_VERSION& p) { - unsigned int i; - check_app_pointer(p.app); - check_project_pointer(p.project); - for (i=0; ipers_file_xfers.size(); i++) { - check_pers_file_xfer(*pers_file_xfers->pers_file_xfers[i]); - } - for (i=0; ifile_xfers.size(); i++) { - check_file_xfer(*file_xfers->file_xfers[i]); - } -} diff --git a/client/client_types.C b/client/client_types.C index e40e296e56..ea5af6038c 100644 --- a/client/client_types.C +++ b/client/client_types.C @@ -771,8 +771,8 @@ void RESULT::clear() { output_files.clear(); is_active = false; state = RESULT_NEW; - ready_to_ack = false; - server_ack = false; + ready_to_report = false; + got_server_ack = false; final_cpu_time = 0; exit_status = 0; active_task_state = 0; @@ -824,8 +824,8 @@ int RESULT::parse_state(FILE* in) { } else if (parse_double(buf, "", final_cpu_time)) continue; else if (parse_int(buf, "", exit_status)) continue; - else if (match_tag(buf, "")) server_ack = true; - else if (match_tag(buf, "")) ready_to_ack = true; + else if (match_tag(buf, "")) got_server_ack = true; + else if (match_tag(buf, "")) ready_to_report = true; else if (parse_int(buf, "", state)) continue; else if (match_tag(buf, "")) { while (fgets(buf, 256, in)) { @@ -861,8 +861,8 @@ int RESULT::write(FILE* out, bool to_server) { fprintf(out, "\n"); } if (!to_server) { - if (server_ack) fprintf(out, " \n"); - if (ready_to_ack) fprintf(out, " \n"); + if (got_server_ack) fprintf(out, " \n"); + if (ready_to_report) fprintf(out, " \n"); fprintf(out, " %s\n" " %d\n", diff --git a/client/client_types.h b/client/client_types.h index 602dd3566c..0b712afda9 100644 --- a/client/client_types.h +++ b/client/client_types.h @@ -229,11 +229,12 @@ struct RESULT { int report_deadline; vector output_files; bool is_active; // an app is currently running for this - bool ready_to_ack; // all the files have been uploaded or there - // was an error and we are ready to report this to - // the server - bool server_ack; // received the ack for the report of - // the status of the result from server + bool ready_to_report; + // we're ready to report this result to the server; + // either computation is done and all the files have been uploaded + // or there was an error + bool got_server_ack; + // we're received the ack for this result from the server double final_cpu_time; int state; // state of this result, see above int exit_status; // return value from the application @@ -244,6 +245,7 @@ struct RESULT { APP* app; WORKUNIT* wup; + // this may be NULL after result is finished PROJECT* project; void clear(); diff --git a/client/cs_scheduler.C b/client/cs_scheduler.C index 22f5d61ae0..8e94d10ffc 100644 --- a/client/cs_scheduler.C +++ b/client/cs_scheduler.C @@ -58,8 +58,12 @@ double CLIENT_STATE::current_work_buf_days() { for (i=0; istate >= RESULT_COMPUTE_DONE) continue; + if (!rp->wup) continue; + // TODO: subtract time already finished for WUs in progress + seconds_remaining += estimate_cpu_time(*rp->wup) * (1.0-get_percent_done(rp)); } return (seconds_remaining / SECONDS_PER_DAY); @@ -266,7 +270,7 @@ int CLIENT_STATE::make_scheduler_request(PROJECT* p, double work_req) { if (retval) return retval; for (i=0; iproject == p && rp->ready_to_ack) { + if (rp->project == p && rp->ready_to_report) { rp->write(f, true); } } @@ -287,14 +291,18 @@ PROJECT* CLIENT_STATE::find_project_with_overdue_results() { r = results[i]; // return the project for this result to report if: // - we're not backing off a scheduler request for its project - // - we're ready_to_ack (compute done; files uploaded) + // - we're ready_to_report (compute done; files uploaded) // - we're almost at the report_deadline (6 hours) + // + if (r->project->waiting_until_min_rpc_time(now)) continue; + // NOTE: early versions of scheduler (<2003/08/07) did not send // report_deadline (in which case it is 0) // 'return_results_immediately' is a debug flag that makes the client // ignore the report deadline when deciding when to report a result - if (r->ready_to_ack && + // + if (r->ready_to_report && (return_results_immediately || r->report_deadline <= now+SECONDS_BEFORE_REPORT_DEADLINE_TO_REPORT)) { @@ -560,7 +568,7 @@ int CLIENT_STATE::handle_scheduler_reply( RESULT* rp = lookup_result(project, sr.result_acks[i].name); scope_messages.printf("CLIENT_STATE::handle_scheduler_reply(): got ack for result %s\n", sr.result_acks[i].name); if (rp) { - rp->server_ack = true; + rp->got_server_ack = true; } else { sprintf(buf, "Got ack for result %s, can't find\n", sr.result_acks[i].name diff --git a/doc/docutil.php b/doc/docutil.php index e9b161894e..f32f6e9d4e 100644 --- a/doc/docutil.php +++ b/doc/docutil.php @@ -7,7 +7,10 @@ function last_mod() { function page_head($title) { $d = last_mod(); echo " + + $title + diff --git a/doc/index.html b/doc/index.html index b634feb3e4..252914864f 100644 --- a/doc/index.html +++ b/doc/index.html @@ -1,6 +1,9 @@ + +Berkeley Open Infrastructure for Network Computing (BOINC) +