*** empty log message ***

svn path=/trunk/boinc/; revision=2353
This commit is contained in:
David Anderson 2003-09-23 23:19:41 +00:00
parent 0a1a101649
commit 3f853a6beb
9 changed files with 263 additions and 200 deletions

View File

@ -6341,3 +6341,21 @@ Eric K 9/22/03
db/db_base.C
David Sept 23 2003
- Don't reference-count WUs for results that are ready to report
(since don't need their input files anymore)
NOTE: this means that RESULT.wup may be zero.
Much check before dereferencing.
- Changed "ready_to_ack" to "ready_to_report"
(Ack is server->client, not client->server)
- Changed "server_ack" to "got_server_ack"
- Moved CLIENT_STATE integrity-check code to a new file (check_state.C)
In general it would be nice to move stuff out of client_state.C;
It's a hodge-podge currently
client/
Makefile.am
client_state.h
client_types.C,h
cs_scheduler.C
check_state.C (new)

View File

@ -13,6 +13,7 @@ boinc_client_SOURCES = \
main.C \
account.C \
app.C \
check_state.C \
client_state.C \
client_types.C \
cs_apps.C \

169
client/check_state.C Normal file
View File

@ -0,0 +1,169 @@
// The contents of this file are subject to the BOINC Public License
// Version 1.0 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://boinc.berkeley.edu/license_1.0.txt
//
// Software distributed under the License is distributed on an "AS IS"
// basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
// License for the specific language governing rights and limitations
// under the License.
//
// The Original Code is the Berkeley Open Infrastructure for Network Computing.
//
// The Initial Developer of the Original Code is the SETI@home project.
// Portions created by the SETI@home project are Copyright (C) 2002
// University of California at Berkeley. All Rights Reserved.
//
// Contributor(s):
//
// Functions to check the integrity of core client data structures.
// Not currently used, but might be handy if *0 type crashes occur
#include "windows_cpp.h"
#include "client_state.h"
void CLIENT_STATE::check_project_pointer(PROJECT* p) {
unsigned int i;
for (i=0; i<projects.size(); i++) {
if (p == projects[i]) return;
}
assert(0);
}
void CLIENT_STATE::check_app_pointer(APP* p) {
unsigned int i;
for (i=0; i<apps.size(); i++) {
if (p == apps[i]) return;
}
assert(0);
}
void CLIENT_STATE::check_file_info_pointer(FILE_INFO* p) {
unsigned int i;
for (i=0; i<file_infos.size(); i++) {
if (p == file_infos[i]) return;
}
assert(0);
}
void CLIENT_STATE::check_app_version_pointer(APP_VERSION* p) {
unsigned int i;
for (i=0; i<app_versions.size(); i++) {
if (p == app_versions[i]) return;
}
assert(0);
}
void CLIENT_STATE::check_workunit_pointer(WORKUNIT* p) {
unsigned int i;
for (i=0; i<workunits.size(); i++) {
if (p == workunits[i]) return;
}
assert(0);
}
void CLIENT_STATE::check_result_pointer(RESULT* p) {
unsigned int i;
for (i=0; i<results.size(); i++) {
if (p == results[i]) return;
}
assert(0);
}
void CLIENT_STATE::check_pers_file_xfer_pointer(PERS_FILE_XFER* p) {
unsigned int i;
for (i=0; i<pers_file_xfers->pers_file_xfers.size(); i++) {
if (p == pers_file_xfers->pers_file_xfers[i]) return;
}
assert(0);
}
void CLIENT_STATE::check_file_xfer_pointer(FILE_XFER* p) {
unsigned int i;
for (i=0; i<file_xfers->file_xfers.size(); i++) {
if (p == file_xfers->file_xfers[i]) return;
}
assert(0);
}
void CLIENT_STATE::check_app(APP& p) {
check_project_pointer(p.project);
}
void CLIENT_STATE::check_file_info(FILE_INFO& p) {
if (p.pers_file_xfer) check_pers_file_xfer_pointer(p.pers_file_xfer);
if (p.result) check_result_pointer(p.result);
check_project_pointer(p.project);
}
void CLIENT_STATE::check_file_ref(FILE_REF& p) {
check_file_info_pointer(p.file_info);
}
void CLIENT_STATE::check_app_version(APP_VERSION& p) {
unsigned int i;
check_app_pointer(p.app);
check_project_pointer(p.project);
for (i=0; i<p.app_files.size(); i++) {
check_file_ref(p.app_files[i]);
}
}
void CLIENT_STATE::check_workunit(WORKUNIT& p) {
unsigned int i;
for (i=0; i<p.input_files.size(); i++) {
check_file_ref(p.input_files[i]);
}
check_project_pointer(p.project);
check_app_pointer(p.app);
check_app_version_pointer(p.avp);
}
void CLIENT_STATE::check_result(RESULT& p) {
unsigned int i;
for (i=0; i<p.output_files.size(); i++) {
check_file_ref(p.output_files[i]);
}
check_app_pointer(p.app);
check_workunit_pointer(p.wup);
check_project_pointer(p.project);
}
void CLIENT_STATE::check_active_task(ACTIVE_TASK& p) {
check_result_pointer(p.result);
check_workunit_pointer(p.wup);
check_app_version_pointer(p.app_version);
}
void CLIENT_STATE::check_pers_file_xfer(PERS_FILE_XFER& p) {
check_file_xfer_pointer(p.fxp);
check_file_info_pointer(p.fip);
}
void CLIENT_STATE::check_file_xfer(FILE_XFER& p) {
check_file_info_pointer(p.fip);
}
void CLIENT_STATE::check_all() {
unsigned int i;
for (i=0; i<apps.size(); i++) {
check_app(*apps[i]);
}
for (i=0; i<file_infos.size(); i++) {
check_file_info(*file_infos[i]);
}
for (i=0; i<app_versions.size(); i++) {
check_app_version(*app_versions[i]);
}
for (i=0; i<workunits.size(); i++) {
check_workunit(*workunits[i]);
}
for (i=0; i<results.size(); i++) {
check_result(*results[i]);
}
for (i=0; i<active_tasks.active_tasks.size(); i++) {
check_active_task(*active_tasks.active_tasks[i]);
}
for (i=0; i<pers_file_xfers->pers_file_xfers.size(); i++) {
check_pers_file_xfer(*pers_file_xfers->pers_file_xfers[i]);
}
for (i=0; i<file_xfers->file_xfers.size(); i++) {
check_file_xfer(*file_xfers->file_xfers[i]);
}
}

View File

@ -1228,6 +1228,7 @@ bool CLIENT_STATE::garbage_collect() {
ScopeMessages scope_messages(log_messages, ClientMessages::DEBUG_STATE);
// zero references counts on WUs, FILE_INFOs and APP_VERSIONs
for (i=0; i<workunits.size(); i++) {
wup = workunits[i];
wup->ref_cnt = 0;
@ -1241,58 +1242,59 @@ bool CLIENT_STATE::garbage_collect() {
avp->ref_cnt = 0;
}
// delete RESULTs that have been finished and reported;
// reference-count files referred to by other results
// Scan through RESULTs.
// delete RESULTs that have been reported and acked.
// Check for results whose WUs had download failures
// Check for resultw that had upload failures
// Reference-count output files
// Reference-count WUs referred to by results in progress
//
result_iter = results.begin();
while (result_iter != results.end()) {
rp = *result_iter;
if (rp->server_ack) {
if (rp->got_server_ack) {
scope_messages.printf("CLIENT_STATE::garbage_collect(): deleting result %s\n", rp->name);
delete rp;
result_iter = results.erase(result_iter);
action = true;
} else {
// See if the files for this result's workunit had
// any errors (MD5, RSA, etc)
continue;
}
// See if the files for this result's workunit had
// any errors (download failure, MD5, RSA, etc)
// and we don't already have an error for this file
//
if (!rp->ready_to_report && rp->wup && rp->wup->had_failure(failnum)) {
rp->wup->get_file_errors(error_msgs);
report_result_error(*rp, 0, error_msgs.c_str());
}
for (i=0; i<rp->output_files.size(); i++) {
// If one of the output files had an upload failure,
// mark the result as done and report the error.
// The result, workunits, and file infos
// will be cleaned up after the server is notified
//
if (rp->wup->had_failure(failnum)) {
// If we don't already have an error for this file
if (!rp->ready_to_ack) {
// the wu corresponding to this result
// had an error downloading some input file(s).
if (rp->output_files[i].file_info->had_failure(failnum)) {
if (!rp->ready_to_report) {
// had an error uploading a file for this result
//
rp->wup->get_file_errors(error_msgs);
report_result_error(*rp, 0, error_msgs.c_str());
}
}
rp->wup->ref_cnt++;
for (i=0; i<rp->output_files.size(); i++) {
// If one of the file infos had a failure,
// mark the result as done and report the error.
// The result, workunits, and file infos
// will be cleaned up after the server is notified
//
if(rp->output_files[i].file_info->had_failure(failnum)) {
if (!rp->ready_to_ack) {
// had an error uploading a file for this result
//
switch(failnum) {
case ERR_FILE_TOO_BIG:
report_result_error(*rp, 0, "Output file exceeded size limit");
break;
default:
report_result_error(*rp, 0, "Couldn't upload files or other output file error");
}
switch(failnum) {
case ERR_FILE_TOO_BIG:
report_result_error(*rp, 0, "Output file exceeded size limit");
break;
default:
report_result_error(*rp, 0, "Couldn't upload files or other output file error");
}
}
rp->output_files[i].file_info->ref_cnt++;
}
result_iter++;
rp->output_files[i].file_info->ref_cnt++;
}
if (!rp->ready_to_report && rp->wup) {
rp->wup->ref_cnt++;
}
result_iter++;
}
// delete WORKUNITs not referenced by any result;
// delete WORKUNITs not referenced by any in-progress result;
// reference-count files and APP_VERSIONs referred to by other WUs
//
wu_iter = workunits.begin();
@ -1391,7 +1393,7 @@ bool CLIENT_STATE::update_results() {
// server. It will be deleted on the next
// garbage collection, which we trigger by
// setting action to true
if (rp->server_ack)
if (rp->got_server_ack)
action = true;
switch (rp->state) {
@ -1419,8 +1421,9 @@ bool CLIENT_STATE::update_results() {
case RESULT_FILES_UPLOADING:
// Once the computation has been done, check that the necessary
// files have been uploaded before moving on
//
if (rp->is_upload_done()) {
rp->ready_to_ack = true;
rp->ready_to_report = true;
rp->state = RESULT_FILES_UPLOADED;
action = true;
}
@ -1624,11 +1627,11 @@ int CLIENT_STATE::report_result_error(
// only do this once per result
//
if (res.ready_to_ack) {
if (res.ready_to_report) {
return 0;
}
res.ready_to_ack = true;
res.ready_to_report = true;
sprintf(buf, "Unrecoverable error for result %s (%s)", res.name, err_msg);
scheduler_op->backoff(res.project, buf);
@ -1732,7 +1735,7 @@ int CLIENT_STATE::reset_project(PROJECT* project) {
for (i=0; i<results.size(); i++) {
rp = results[i];
if (rp->project == project) {
rp->server_ack = true;
rp->got_server_ack = true;
}
}
@ -1798,147 +1801,3 @@ int CLIENT_STATE::detach_project(PROJECT* project) {
return 0;
}
void CLIENT_STATE::check_project_pointer(PROJECT* p) {
unsigned int i;
for (i=0; i<projects.size(); i++) {
if (p == projects[i]) return;
}
assert(0);
}
void CLIENT_STATE::check_app_pointer(APP* p) {
unsigned int i;
for (i=0; i<apps.size(); i++) {
if (p == apps[i]) return;
}
assert(0);
}
void CLIENT_STATE::check_file_info_pointer(FILE_INFO* p) {
unsigned int i;
for (i=0; i<file_infos.size(); i++) {
if (p == file_infos[i]) return;
}
assert(0);
}
void CLIENT_STATE::check_app_version_pointer(APP_VERSION* p) {
unsigned int i;
for (i=0; i<app_versions.size(); i++) {
if (p == app_versions[i]) return;
}
assert(0);
}
void CLIENT_STATE::check_workunit_pointer(WORKUNIT* p) {
unsigned int i;
for (i=0; i<workunits.size(); i++) {
if (p == workunits[i]) return;
}
assert(0);
}
void CLIENT_STATE::check_result_pointer(RESULT* p) {
unsigned int i;
for (i=0; i<results.size(); i++) {
if (p == results[i]) return;
}
assert(0);
}
void CLIENT_STATE::check_pers_file_xfer_pointer(PERS_FILE_XFER* p) {
unsigned int i;
for (i=0; i<pers_file_xfers->pers_file_xfers.size(); i++) {
if (p == pers_file_xfers->pers_file_xfers[i]) return;
}
assert(0);
}
void CLIENT_STATE::check_file_xfer_pointer(FILE_XFER* p) {
unsigned int i;
for (i=0; i<file_xfers->file_xfers.size(); i++) {
if (p == file_xfers->file_xfers[i]) return;
}
assert(0);
}
void CLIENT_STATE::check_app(APP& p) {
check_project_pointer(p.project);
}
void CLIENT_STATE::check_file_info(FILE_INFO& p) {
if (p.pers_file_xfer) check_pers_file_xfer_pointer(p.pers_file_xfer);
if (p.result) check_result_pointer(p.result);
check_project_pointer(p.project);
}
void CLIENT_STATE::check_file_ref(FILE_REF& p) {
check_file_info_pointer(p.file_info);
}
void CLIENT_STATE::check_app_version(APP_VERSION& p) {
unsigned int i;
check_app_pointer(p.app);
check_project_pointer(p.project);
for (i=0; i<p.app_files.size(); i++) {
check_file_ref(p.app_files[i]);
}
}
void CLIENT_STATE::check_workunit(WORKUNIT& p) {
unsigned int i;
for (i=0; i<p.input_files.size(); i++) {
check_file_ref(p.input_files[i]);
}
check_project_pointer(p.project);
check_app_pointer(p.app);
check_app_version_pointer(p.avp);
}
void CLIENT_STATE::check_result(RESULT& p) {
unsigned int i;
for (i=0; i<p.output_files.size(); i++) {
check_file_ref(p.output_files[i]);
}
check_app_pointer(p.app);
check_workunit_pointer(p.wup);
check_project_pointer(p.project);
}
void CLIENT_STATE::check_active_task(ACTIVE_TASK& p) {
check_result_pointer(p.result);
check_workunit_pointer(p.wup);
check_app_version_pointer(p.app_version);
}
void CLIENT_STATE::check_pers_file_xfer(PERS_FILE_XFER& p) {
check_file_xfer_pointer(p.fxp);
check_file_info_pointer(p.fip);
}
void CLIENT_STATE::check_file_xfer(FILE_XFER& p) {
check_file_info_pointer(p.fip);
}
void CLIENT_STATE::check_all() {
unsigned int i;
for (i=0; i<apps.size(); i++) {
check_app(*apps[i]);
}
for (i=0; i<file_infos.size(); i++) {
check_file_info(*file_infos[i]);
}
for (i=0; i<app_versions.size(); i++) {
check_app_version(*app_versions[i]);
}
for (i=0; i<workunits.size(); i++) {
check_workunit(*workunits[i]);
}
for (i=0; i<results.size(); i++) {
check_result(*results[i]);
}
for (i=0; i<active_tasks.active_tasks.size(); i++) {
check_active_task(*active_tasks.active_tasks[i]);
}
for (i=0; i<pers_file_xfers->pers_file_xfers.size(); i++) {
check_pers_file_xfer(*pers_file_xfers->pers_file_xfers[i]);
}
for (i=0; i<file_xfers->file_xfers.size(); i++) {
check_file_xfer(*file_xfers->file_xfers[i]);
}
}

View File

@ -771,8 +771,8 @@ void RESULT::clear() {
output_files.clear();
is_active = false;
state = RESULT_NEW;
ready_to_ack = false;
server_ack = false;
ready_to_report = false;
got_server_ack = false;
final_cpu_time = 0;
exit_status = 0;
active_task_state = 0;
@ -824,8 +824,8 @@ int RESULT::parse_state(FILE* in) {
}
else if (parse_double(buf, "<final_cpu_time>", final_cpu_time)) continue;
else if (parse_int(buf, "<exit_status>", exit_status)) continue;
else if (match_tag(buf, "<server_ack/>")) server_ack = true;
else if (match_tag(buf, "<ready_to_ack/>")) ready_to_ack = true;
else if (match_tag(buf, "<got_server_ack/>")) got_server_ack = true;
else if (match_tag(buf, "<ready_to_report/>")) ready_to_report = true;
else if (parse_int(buf, "<state>", state)) continue;
else if (match_tag(buf, "<stderr_out>")) {
while (fgets(buf, 256, in)) {
@ -861,8 +861,8 @@ int RESULT::write(FILE* out, bool to_server) {
fprintf(out, "</stderr_out>\n");
}
if (!to_server) {
if (server_ack) fprintf(out, " <server_ack/>\n");
if (ready_to_ack) fprintf(out, " <ready_to_ack/>\n");
if (got_server_ack) fprintf(out, " <got_server_ack/>\n");
if (ready_to_report) fprintf(out, " <ready_to_report/>\n");
fprintf(out,
" <wu_name>%s</wu_name>\n"
" <report_deadline>%d</report_deadline>\n",

View File

@ -229,11 +229,12 @@ struct RESULT {
int report_deadline;
vector<FILE_REF> output_files;
bool is_active; // an app is currently running for this
bool ready_to_ack; // all the files have been uploaded or there
// was an error and we are ready to report this to
// the server
bool server_ack; // received the ack for the report of
// the status of the result from server
bool ready_to_report;
// we're ready to report this result to the server;
// either computation is done and all the files have been uploaded
// or there was an error
bool got_server_ack;
// we're received the ack for this result from the server
double final_cpu_time;
int state; // state of this result, see above
int exit_status; // return value from the application
@ -244,6 +245,7 @@ struct RESULT {
APP* app;
WORKUNIT* wup;
// this may be NULL after result is finished
PROJECT* project;
void clear();

View File

@ -58,8 +58,12 @@ double CLIENT_STATE::current_work_buf_days() {
for (i=0; i<results.size(); i++) {
rp = results[i];
// Don't count result if we've already computed it
//
if (rp->state >= RESULT_COMPUTE_DONE) continue;
if (!rp->wup) continue;
// TODO: subtract time already finished for WUs in progress
seconds_remaining += estimate_cpu_time(*rp->wup) * (1.0-get_percent_done(rp));
}
return (seconds_remaining / SECONDS_PER_DAY);
@ -266,7 +270,7 @@ int CLIENT_STATE::make_scheduler_request(PROJECT* p, double work_req) {
if (retval) return retval;
for (i=0; i<results.size(); i++) {
rp = results[i];
if (rp->project == p && rp->ready_to_ack) {
if (rp->project == p && rp->ready_to_report) {
rp->write(f, true);
}
}
@ -287,14 +291,18 @@ PROJECT* CLIENT_STATE::find_project_with_overdue_results() {
r = results[i];
// return the project for this result to report if:
// - we're not backing off a scheduler request for its project
// - we're ready_to_ack (compute done; files uploaded)
// - we're ready_to_report (compute done; files uploaded)
// - we're almost at the report_deadline (6 hours)
//
if (r->project->waiting_until_min_rpc_time(now)) continue;
// NOTE: early versions of scheduler (<2003/08/07) did not send
// report_deadline (in which case it is 0)
// 'return_results_immediately' is a debug flag that makes the client
// ignore the report deadline when deciding when to report a result
if (r->ready_to_ack &&
//
if (r->ready_to_report &&
(return_results_immediately ||
r->report_deadline <= now+SECONDS_BEFORE_REPORT_DEADLINE_TO_REPORT))
{
@ -560,7 +568,7 @@ int CLIENT_STATE::handle_scheduler_reply(
RESULT* rp = lookup_result(project, sr.result_acks[i].name);
scope_messages.printf("CLIENT_STATE::handle_scheduler_reply(): got ack for result %s\n", sr.result_acks[i].name);
if (rp) {
rp->server_ack = true;
rp->got_server_ack = true;
} else {
sprintf(buf, "Got ack for result %s, can't find\n",
sr.result_acks[i].name

View File

@ -7,7 +7,10 @@ function last_mod() {
function page_head($title) {
$d = last_mod();
echo "
<head>
<link rel='shortcut icon' href='iconsmall.ico'>
<title>$title</title>
</head>
<body bgcolor=ffffff>
<table width=100%>
<tr>

View File

@ -1,6 +1,9 @@
<head>
<link rel="shortcut icon" href="iconsmall.ico">
<title>Berkeley Open Infrastructure for Network Computing (BOINC)</title>
<meta name=description content="BOINC is a software platform for developing public-participation distributed computing projects">
<meta name=keywords content="distributed scientific computing supercomputing grid">
</head>
<body bgcolor=ffffff text=000088 link=000088 vlink=000088>
<table width=100% border=0 cellpadding=0 cellspacing=0>
<tr><td>