mirror of https://github.com/BOINC/boinc.git
*** empty log message ***
svn path=/trunk/boinc/; revision=4302
This commit is contained in:
parent
b02a64d34e
commit
81406ae66d
|
@ -18134,8 +18134,41 @@ David 6 Oct 2004
|
|||
lib/
|
||||
crypt_prog.C
|
||||
|
||||
David 6 Oct 2004
|
||||
- boinc_resolve_filename() fix bug where link file is empty
|
||||
(from Markku Degerholm)
|
||||
|
||||
lib/
|
||||
app_ipc.C
|
||||
|
||||
David 7 Oct 2004
|
||||
- return formatted msg in file_upload_handler
|
||||
|
||||
sched/
|
||||
file_upload_handler.C
|
||||
|
||||
David 7 Oct 2004
|
||||
- Housecleaning of result_report_error() and related code:
|
||||
- removed RESULT::active_task_state field
|
||||
- removed RESULT::signal field
|
||||
- removed "errnum" argument from result_report_error()
|
||||
- return XML text for upload errors
|
||||
- clean up logic of result_report_error()
|
||||
- changed FILE_INFO::had_failure() to optionally return error text
|
||||
- changed RESULT_COMPUTE_DONE to RESULT_COMPUTE_ERROR
|
||||
- renamed APP_VERSION::had_failure() to had_download_failure()
|
||||
(same work WORKUNIT)
|
||||
- schedule_cpus(): if any start or resume fails,
|
||||
set must_schedule_cpus so we'll try something else
|
||||
|
||||
client/
|
||||
app_control.C
|
||||
app_start.C
|
||||
client_state.C,h
|
||||
client_types.C,h
|
||||
cs_apps.C
|
||||
cs_scheduler.C
|
||||
cs_statefile.C
|
||||
lib/
|
||||
prefs.C
|
||||
result_state.h
|
||||
|
|
|
@ -199,7 +199,6 @@ bool ACTIVE_TASK::handle_exited_app(unsigned long exit_code) {
|
|||
result->final_cpu_time = checkpoint_cpu_time;
|
||||
if (state == PROCESS_ABORT_PENDING) {
|
||||
state = PROCESS_ABORTED;
|
||||
result->active_task_state = PROCESS_ABORTED;
|
||||
} else {
|
||||
state = PROCESS_EXITED;
|
||||
exit_status = exit_code;
|
||||
|
@ -207,7 +206,7 @@ bool ACTIVE_TASK::handle_exited_app(unsigned long exit_code) {
|
|||
if (exit_code) {
|
||||
char szError[1024];
|
||||
gstate.report_result_error(
|
||||
*result, 0,
|
||||
*result,
|
||||
"%s - exit code %d (0x%x)",
|
||||
windows_format_error_string(exit_code, szError, sizeof(szError)),
|
||||
exit_code, exit_code
|
||||
|
@ -220,19 +219,14 @@ bool ACTIVE_TASK::handle_exited_app(unsigned long exit_code) {
|
|||
return true;
|
||||
}
|
||||
if (!finish_file_present()) {
|
||||
#if 0
|
||||
state = PROCESS_IN_LIMBO;
|
||||
#else
|
||||
scheduler_state = CPU_SCHED_PREEMPTED;
|
||||
state = PROCESS_UNINITIALIZED;
|
||||
close_process_handles();
|
||||
#endif
|
||||
limbo_message(*this);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
result->exit_status = exit_status;
|
||||
result->active_task_state = PROCESS_EXITED;
|
||||
}
|
||||
|
||||
if (app_client_shm.shm) {
|
||||
|
@ -253,7 +247,6 @@ bool ACTIVE_TASK::handle_exited_app(int stat) {
|
|||
result->final_cpu_time = checkpoint_cpu_time;
|
||||
if (state == PROCESS_ABORT_PENDING) {
|
||||
state = PROCESS_ABORTED;
|
||||
result->active_task_state = PROCESS_ABORTED;
|
||||
} else {
|
||||
if (WIFEXITED(stat)) {
|
||||
state = PROCESS_EXITED;
|
||||
|
@ -261,7 +254,7 @@ bool ACTIVE_TASK::handle_exited_app(int stat) {
|
|||
|
||||
if (exit_status) {
|
||||
gstate.report_result_error(
|
||||
*result, 0,
|
||||
*result,
|
||||
"process exited with code %d (0x%x)",
|
||||
exit_status, exit_status
|
||||
);
|
||||
|
@ -298,17 +291,16 @@ bool ACTIVE_TASK::handle_exited_app(int stat) {
|
|||
}
|
||||
}
|
||||
result->exit_status = exit_status;
|
||||
result->active_task_state = PROCESS_EXITED;
|
||||
scope_messages.printf(
|
||||
"ACTIVE_TASK::handle_exited_app(): process exited: status %d\n",
|
||||
exit_status
|
||||
);
|
||||
} else if (WIFSIGNALED(stat)) {
|
||||
int signal = WTERMSIG(stat);
|
||||
int got_signal = WTERMSIG(stat);
|
||||
|
||||
// if the process was externally killed, allow it to restart.
|
||||
//
|
||||
switch(signal) {
|
||||
switch(got_signal) {
|
||||
case SIGHUP:
|
||||
case SIGINT:
|
||||
case SIGQUIT:
|
||||
|
@ -322,11 +314,9 @@ bool ACTIVE_TASK::handle_exited_app(int stat) {
|
|||
exit_status = stat;
|
||||
result->exit_status = exit_status;
|
||||
state = PROCESS_WAS_SIGNALED;
|
||||
signal = signal;
|
||||
result->signal = signal;
|
||||
result->active_task_state = PROCESS_WAS_SIGNALED;
|
||||
signal = got_signal;
|
||||
gstate.report_result_error(
|
||||
*result, 0, "process got signal %d", signal
|
||||
*result, "process got signal %d", signal
|
||||
);
|
||||
scope_messages.printf("ACTIVE_TASK::handle_exited_app(): process got signal %d\n", signal);
|
||||
} else {
|
||||
|
@ -547,12 +537,11 @@ bool ACTIVE_TASK_SET::check_rsc_limits_exceeded() {
|
|||
int ACTIVE_TASK::abort_task(char* msg) {
|
||||
if (state == PROCESS_EXECUTING || state == PROCESS_SUSPENDED) {
|
||||
state = PROCESS_ABORT_PENDING;
|
||||
result->active_task_state = PROCESS_ABORT_PENDING;
|
||||
kill_task();
|
||||
} else {
|
||||
state = PROCESS_ABORTED;
|
||||
}
|
||||
gstate.report_result_error(*result, ERR_RSC_LIMIT_EXCEEDED, msg);
|
||||
gstate.report_result_error(*result, msg);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -225,6 +225,8 @@ static int setup_file(
|
|||
//
|
||||
// Current dir is top-level BOINC dir
|
||||
//
|
||||
// postcondition: ACTIVE_TASK::state is set correctly
|
||||
//
|
||||
int ACTIVE_TASK::start(bool first_time) {
|
||||
char exec_name[256], file_path[256], buf[256], exec_path[256];
|
||||
unsigned int i;
|
||||
|
@ -364,8 +366,7 @@ int ACTIVE_TASK::start(bool first_time) {
|
|||
windows_error_string(szError, sizeof(szError));
|
||||
|
||||
state = PROCESS_COULDNT_START;
|
||||
result->active_task_state = PROCESS_COULDNT_START;
|
||||
gstate.report_result_error(*result, ERR_EXEC, "CreateProcess() failed - %s", szError);
|
||||
gstate.report_result_error(*result, "CreateProcess() failed - %s", szError);
|
||||
msg_printf(wup->project, MSG_ERROR, "CreateProcess() failed - %s", szError);
|
||||
return ERR_EXEC;
|
||||
}
|
||||
|
@ -393,9 +394,8 @@ int ACTIVE_TASK::start(bool first_time) {
|
|||
pid = fork();
|
||||
if (pid == -1) {
|
||||
state = PROCESS_COULDNT_START;
|
||||
result->active_task_state = PROCESS_COULDNT_START;
|
||||
gstate.report_result_error(*result, -1, "fork(): %s", strerror(errno));
|
||||
msg_printf(wup->project, MSG_ERROR, "fork(): %s", strerror(errno));
|
||||
gstate.report_result_error(*result, "fork() failed: %s", strerror(errno));
|
||||
msg_printf(wup->project, MSG_ERROR, "fork() failed: %s", strerror(errno));
|
||||
return ERR_FORK;
|
||||
}
|
||||
if (pid == 0) {
|
||||
|
@ -438,12 +438,11 @@ int ACTIVE_TASK::start(bool first_time) {
|
|||
|
||||
#endif
|
||||
state = PROCESS_EXECUTING;
|
||||
result->active_task_state = PROCESS_EXECUTING;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Resume the task if it was previously running
|
||||
// Otherwise, start it
|
||||
// Resume the task if it was previously running; otherwise start it
|
||||
// Postcondition: "state" is set correctly
|
||||
//
|
||||
int ACTIVE_TASK::resume_or_start() {
|
||||
char* str = "??";
|
||||
|
@ -462,7 +461,10 @@ int ACTIVE_TASK::resume_or_start() {
|
|||
retval = start(false);
|
||||
str = "Restarting";
|
||||
}
|
||||
if (retval) return retval;
|
||||
if (retval) {
|
||||
state = PROCESS_COULDNT_START;
|
||||
return retval;
|
||||
}
|
||||
break;
|
||||
case PROCESS_SUSPENDED:
|
||||
retval = unsuspend();
|
||||
|
@ -472,6 +474,7 @@ int ACTIVE_TASK::resume_or_start() {
|
|||
MSG_ERROR,
|
||||
"ACTIVE_TASK::resume_or_start(): could not unsuspend active_task"
|
||||
);
|
||||
state = PROCESS_COULDNT_START;
|
||||
return retval;
|
||||
}
|
||||
str = "Resuming";
|
||||
|
@ -515,9 +518,8 @@ int ACTIVE_TASK_SET::restart_tasks(int max_tasks) {
|
|||
get_slot_dir(atp->slot, atp->slot_dir);
|
||||
if (!gstate.input_files_available(result)) {
|
||||
msg_printf(atp->wup->project, MSG_ERROR, "ACTIVE_TASKS::restart_tasks(); missing files\n");
|
||||
atp->result->active_task_state = PROCESS_COULDNT_START;
|
||||
gstate.report_result_error(
|
||||
*(atp->result), ERR_FILE_MISSING,
|
||||
*(atp->result),
|
||||
"One or more missing files"
|
||||
);
|
||||
iter = active_tasks.erase(iter);
|
||||
|
@ -548,9 +550,8 @@ int ACTIVE_TASK_SET::restart_tasks(int max_tasks) {
|
|||
|
||||
if (retval) {
|
||||
msg_printf(atp->wup->project, MSG_ERROR, "ACTIVE_TASKS::restart_tasks(); restart failed: %d\n", retval);
|
||||
atp->result->active_task_state = PROCESS_COULDNT_START;
|
||||
gstate.report_result_error(
|
||||
*(atp->result), retval,
|
||||
*(atp->result),
|
||||
"Couldn't restart the app for this result: %d", retval
|
||||
);
|
||||
iter = active_tasks.erase(iter);
|
||||
|
|
|
@ -723,6 +723,7 @@ bool CLIENT_STATE::garbage_collect() {
|
|||
bool action = false, found;
|
||||
string error_msgs;
|
||||
PROJECT* project;
|
||||
char buf[1024];
|
||||
|
||||
SCOPE_MSG_LOG scope_messages(log_messages, CLIENT_MSG_LOG::DEBUG_STATE);
|
||||
|
||||
|
@ -769,43 +770,42 @@ bool CLIENT_STATE::garbage_collect() {
|
|||
}
|
||||
// See if the files for this result's workunit had
|
||||
// any errors (download failure, MD5, RSA, etc)
|
||||
// and we don't already have an error for this file
|
||||
// and we don't already have an error for this result
|
||||
//
|
||||
if (!rp->ready_to_report) {
|
||||
wup = rp->wup;
|
||||
if (wup->had_failure(failnum)) {
|
||||
if (wup->had_download_failure(failnum)) {
|
||||
wup->get_file_errors(error_msgs);
|
||||
report_result_error(
|
||||
*rp, 0, "WU download error: %s", error_msgs.c_str()
|
||||
*rp, "WU download error: %s", error_msgs.c_str()
|
||||
);
|
||||
} else if (wup->avp && wup->avp->had_failure(failnum)) {
|
||||
} else if (wup->avp && wup->avp->had_download_failure(failnum)) {
|
||||
wup->avp->get_file_errors(error_msgs);
|
||||
report_result_error(
|
||||
*rp, 0, "app_version download error: %s", error_msgs.c_str()
|
||||
*rp, "app_version download error: %s", error_msgs.c_str()
|
||||
);
|
||||
}
|
||||
}
|
||||
bool found_error = false;
|
||||
std::string error_str;
|
||||
for (i=0; i<rp->output_files.size(); i++) {
|
||||
// If one of the output files had an upload failure,
|
||||
// mark the result as done and report the error.
|
||||
// The result, workunits, and file infos
|
||||
// will be cleaned up after the server is notified
|
||||
//
|
||||
if (rp->output_files[i].file_info->had_failure(failnum)) {
|
||||
if (!rp->ready_to_report) {
|
||||
// had an error uploading a file for this result
|
||||
//
|
||||
switch (failnum) {
|
||||
case ERR_FILE_TOO_BIG:
|
||||
report_result_error(*rp, 0, "Output file exceeded size limit");
|
||||
break;
|
||||
default:
|
||||
report_result_error(*rp, 0, "Output file error: %d", failnum);
|
||||
}
|
||||
if (!rp->ready_to_report) {
|
||||
fip = rp->output_files[i].file_info;
|
||||
if (fip->had_failure(failnum, buf)) {
|
||||
found_error = true;
|
||||
error_str += buf;
|
||||
}
|
||||
}
|
||||
rp->output_files[i].file_info->ref_cnt++;
|
||||
}
|
||||
if (found_error) {
|
||||
report_result_error(*rp, error_str.c_str());
|
||||
}
|
||||
rp->wup->ref_cnt++;
|
||||
result_iter++;
|
||||
}
|
||||
|
@ -907,15 +907,11 @@ bool CLIENT_STATE::update_results() {
|
|||
vector<RESULT*>::iterator result_iter;
|
||||
bool action = false;
|
||||
|
||||
// delete RESULTs that have been finished and reported;
|
||||
// reference-count files referred to by other results
|
||||
//
|
||||
result_iter = results.begin();
|
||||
while (result_iter != results.end()) {
|
||||
rp = *result_iter;
|
||||
// The result has been acked by the scheduling server.
|
||||
// It will be deleted on the next garbage collection,
|
||||
// which we trigger by setting action to true
|
||||
if (rp->got_server_ack) {
|
||||
action = true;
|
||||
}
|
||||
|
@ -932,21 +928,7 @@ bool CLIENT_STATE::update_results() {
|
|||
action = true;
|
||||
}
|
||||
break;
|
||||
|
||||
// app_finished() transitions to either RESULT_COMPUTE_DONE or
|
||||
// RESULT_FILES_UPLOADING. RESULT_COMPUTE_DONE is a dead-end state
|
||||
// indicating we had an error at the end of computation.
|
||||
|
||||
// case RESULT_FILES_DOWNLOADED:
|
||||
// break;
|
||||
// case RESULT_COMPUTE_DONE:
|
||||
// rp->state = RESULT_FILES_UPLOADING;
|
||||
// action = true;
|
||||
// break;
|
||||
case RESULT_FILES_UPLOADING:
|
||||
// Once the computation has been done, check that the necessary
|
||||
// files have been uploaded before moving on
|
||||
//
|
||||
if (rp->is_upload_done()) {
|
||||
rp->ready_to_report = true;
|
||||
rp->state = RESULT_FILES_UPLOADED;
|
||||
|
@ -981,22 +963,12 @@ bool CLIENT_STATE::time_to_exit() {
|
|||
}
|
||||
|
||||
// Call this when a result has a nonrecoverable error.
|
||||
// Append a description of the error to the stderr_out field of the result.
|
||||
//
|
||||
// Go through the input and output files for this result
|
||||
// and generates error messages for upload/download failures.
|
||||
//
|
||||
// This function is called in the following situations:
|
||||
// 1. When the active_task could not start or restart,
|
||||
// in which case err_num is set to an OS-specific error_code.
|
||||
// and err_msg has an OS-supplied string.
|
||||
// 2. when we fail in downloading an input file or uploading an output file,
|
||||
// in which case err_num and err_msg are zero.
|
||||
// 3. When the active_task exits with a non_zero error code
|
||||
// or it gets signaled.
|
||||
// - back off on contacting the project's scheduler
|
||||
// (so don't crash over and over)
|
||||
// - Append a description of the error to result.stderr_out
|
||||
//
|
||||
int CLIENT_STATE::report_result_error(
|
||||
RESULT& res, int err_num, const char* format, ...
|
||||
RESULT& res, const char* format, ...
|
||||
) {
|
||||
char buf[MAX_BLOB_LEN], err_msg[MAX_BLOB_LEN];
|
||||
unsigned int i;
|
||||
|
@ -1018,44 +990,39 @@ int CLIENT_STATE::report_result_error(
|
|||
sprintf(buf, "Unrecoverable error for result %s (%s)", res.name, err_msg);
|
||||
scheduler_op->backoff(res.project, buf);
|
||||
|
||||
sprintf(
|
||||
buf,
|
||||
"<message>%s\n</message>\n"
|
||||
"<active_task_state>%d</active_task_state>\n"
|
||||
"<signal>%d</signal>\n",
|
||||
err_msg,
|
||||
res.active_task_state,
|
||||
res.signal
|
||||
);
|
||||
sprintf( buf, "<message>%s\n</message>\n", err_msg);
|
||||
res.stderr_out.append(buf);
|
||||
|
||||
if ((res.state == RESULT_FILES_DOWNLOADED) && err_num) {
|
||||
sprintf(buf,"<couldnt_start>%d</couldnt_start>\n", err_num);
|
||||
res.stderr_out.append(buf);
|
||||
if (!res.exit_status) {
|
||||
res.exit_status = ERR_RESULT_START;
|
||||
}
|
||||
}
|
||||
|
||||
if (res.state == RESULT_NEW) {
|
||||
for (i=0;i<res.wup->input_files.size();i++) {
|
||||
if (res.wup->input_files[i].file_info->had_failure(failnum)) {
|
||||
sprintf(buf,
|
||||
"<download_error>\n"
|
||||
" <file_name>%s</file_name>\n"
|
||||
" <error_code>%d</error_code>\n"
|
||||
"</download_error>\n",
|
||||
res.wup->input_files[i].file_info->name, failnum
|
||||
);
|
||||
res.stderr_out.append(buf);
|
||||
}
|
||||
}
|
||||
switch(res.state) {
|
||||
case RESULT_NEW:
|
||||
case RESULT_FILES_DOWNLOADING:
|
||||
// called from:
|
||||
// CLIENT_STATE::garbage_collect()
|
||||
// if WU or app_version had a download failure
|
||||
//
|
||||
if (!res.exit_status) {
|
||||
res.exit_status = ERR_RESULT_DOWNLOAD;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
if (res.state == RESULT_COMPUTE_DONE) {
|
||||
case RESULT_FILES_DOWNLOADED:
|
||||
// called from:
|
||||
// ACTIVE_TASK::start (if couldn't start app)
|
||||
// ACTIVE_TASK::restart (if files missing)
|
||||
// ACITVE_TASK_SET::restart_tasks (catch other error returns)
|
||||
// ACTIVE_TASK::handle_exited_app (on nonzero exit or signal)
|
||||
// ACTIVE_TASK::abort_task (if exceeded resource limit)
|
||||
// CLIENT_STATE::schedule_cpus (catch-all for resume/start errors)
|
||||
//
|
||||
if (!res.exit_status) {
|
||||
res.exit_status = ERR_RESULT_START;
|
||||
}
|
||||
break;
|
||||
|
||||
case RESULT_FILES_UPLOADING:
|
||||
// called from
|
||||
// CLIENT_STATE::garbage_collect() if result had an upload error
|
||||
//
|
||||
for (i=0; i<res.output_files.size(); i++) {
|
||||
if (res.output_files[i].file_info->had_failure(failnum)) {
|
||||
sprintf(buf,
|
||||
|
@ -1071,6 +1038,12 @@ int CLIENT_STATE::report_result_error(
|
|||
if (!res.exit_status) {
|
||||
res.exit_status = ERR_RESULT_UPLOAD;
|
||||
}
|
||||
break;
|
||||
case RESULT_COMPUTE_ERROR:
|
||||
break;
|
||||
case RESULT_FILES_UPLOADED:
|
||||
msg_printf(res.project, MSG_ERROR, "report_result_error() called unexpectedly");
|
||||
break;
|
||||
}
|
||||
|
||||
res.stderr_out = res.stderr_out.substr(0,MAX_BLOB_LEN-1);
|
||||
|
|
|
@ -165,8 +165,7 @@ public:
|
|||
APP_VERSION* lookup_app_version(APP*, int);
|
||||
ACTIVE_TASK* lookup_active_task_by_result(RESULT*);
|
||||
int detach_project(PROJECT*);
|
||||
int report_result_error(RESULT &res, int err_num, const char *format, ...);
|
||||
// flag a result as having an error
|
||||
int report_result_error(RESULT&, const char *format, ...);
|
||||
int reset_project(PROJECT*);
|
||||
private:
|
||||
int link_app(PROJECT*, APP*);
|
||||
|
|
|
@ -758,9 +758,21 @@ int FILE_INFO::merge_info(FILE_INFO& new_info) {
|
|||
// Returns true if the file had an unrecoverable error
|
||||
// (couldn't download, RSA/MD5 check failed, etc)
|
||||
//
|
||||
bool FILE_INFO::had_failure(int& failnum) {
|
||||
bool FILE_INFO::had_failure(int& failnum, char* buf) {
|
||||
if (status != FILE_NOT_PRESENT && status != FILE_PRESENT) {
|
||||
failnum = status;
|
||||
if (buf) {
|
||||
sprintf(buf,
|
||||
"<file_xfer_error>\n"
|
||||
" <file_name>%s</file_name>\n"
|
||||
" <error_code>%d</error_code>\n"
|
||||
" <error_message>%s</error_message>\n"
|
||||
"</file_xfer_error>\n",
|
||||
name,
|
||||
status,
|
||||
error_msg.c_str()
|
||||
);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -821,7 +833,7 @@ int APP_VERSION::write(MIOFILE& out) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
bool APP_VERSION::had_failure(int& failnum) {
|
||||
bool APP_VERSION::had_download_failure(int& failnum) {
|
||||
unsigned int i;
|
||||
|
||||
for (i=0; i<app_files.size();i++) {
|
||||
|
@ -833,14 +845,16 @@ bool APP_VERSION::had_failure(int& failnum) {
|
|||
}
|
||||
|
||||
void APP_VERSION::get_file_errors(string& str) {
|
||||
int x;
|
||||
int errnum;
|
||||
unsigned int i;
|
||||
FILE_INFO* fip;
|
||||
char buf[1024];
|
||||
|
||||
str = "couldn't get input files:\n";
|
||||
for (i=0; i<app_files.size();i++) {
|
||||
fip = app_files[i].file_info;
|
||||
if (fip->had_failure(x)) {
|
||||
str = str + fip->name + ": " + fip->error_msg + "\n";
|
||||
if (fip->had_failure(errnum, buf)) {
|
||||
str = str + buf;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -973,7 +987,7 @@ int WORKUNIT::write(MIOFILE& out) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
bool WORKUNIT::had_failure(int& failnum) {
|
||||
bool WORKUNIT::had_download_failure(int& failnum) {
|
||||
unsigned int i;
|
||||
|
||||
for (i=0;i<input_files.size();i++) {
|
||||
|
@ -988,11 +1002,13 @@ void WORKUNIT::get_file_errors(string& str) {
|
|||
int x;
|
||||
unsigned int i;
|
||||
FILE_INFO* fip;
|
||||
char buf[1024];
|
||||
|
||||
str = "couldn't get input files:\n";
|
||||
for (i=0;i<input_files.size();i++) {
|
||||
fip = input_files[i].file_info;
|
||||
if (fip->had_failure(x)) {
|
||||
str = str + fip->name + ": " + fip->error_msg + "\n";
|
||||
if (fip->had_failure(x, buf)) {
|
||||
str = str + buf;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1021,8 +1037,6 @@ void RESULT::clear() {
|
|||
got_server_ack = false;
|
||||
final_cpu_time = 0;
|
||||
exit_status = 0;
|
||||
active_task_state = 0;
|
||||
signal = 0;
|
||||
stderr_out = "";
|
||||
app = NULL;
|
||||
wup = NULL;
|
||||
|
@ -1188,7 +1202,6 @@ int RESULT::write_gui(MIOFILE& out) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
// this is called after the result state is RESULT_COMPUTE_DONE.
|
||||
// Returns true if the result's output files are all either
|
||||
// successfully uploaded or have unrecoverable errors
|
||||
//
|
||||
|
|
|
@ -105,7 +105,7 @@ public:
|
|||
char* get_next_url(bool);
|
||||
char* get_current_url(bool);
|
||||
bool is_correct_url_type(bool, STRING256);
|
||||
bool had_failure(int& failnum);
|
||||
bool had_failure(int& failnum, char* buf=0);
|
||||
bool verify_existing_file();
|
||||
int merge_info(FILE_INFO&);
|
||||
int verify_downloaded_file();
|
||||
|
@ -259,7 +259,7 @@ struct APP_VERSION {
|
|||
|
||||
int parse(MIOFILE&);
|
||||
int write(MIOFILE&);
|
||||
bool had_failure(int& failnum);
|
||||
bool had_download_failure(int& failnum);
|
||||
void get_file_errors(std::string&);
|
||||
void clear_errors();
|
||||
};
|
||||
|
@ -284,7 +284,7 @@ struct WORKUNIT {
|
|||
|
||||
int parse(MIOFILE&);
|
||||
int write(MIOFILE&);
|
||||
bool had_failure(int& failnum);
|
||||
bool had_download_failure(int& failnum);
|
||||
void get_file_errors(std::string&);
|
||||
};
|
||||
|
||||
|
@ -300,17 +300,13 @@ struct RESULT {
|
|||
bool got_server_ack;
|
||||
// we're received the ack for this result from the server
|
||||
double final_cpu_time;
|
||||
int state; // state of this result, see lib/result_state.h
|
||||
int state; // state of this result: see lib/result_state.h
|
||||
int exit_status; // return value from the application
|
||||
int signal; // the signal caught by the active_task,
|
||||
// defined only if active_task_state is PROCESS_SIGNALED
|
||||
int active_task_state; // the state of the active task corresponding to this result
|
||||
std::string stderr_out;
|
||||
// the concatenation of:
|
||||
//
|
||||
// - if report_result_error() is called for this result:
|
||||
// <message>x</message>
|
||||
// <active_task_state>x</active_task_state>
|
||||
// <exit_status>x</exit_status>
|
||||
// <signal>x</signal>
|
||||
// - if called in FILES_DOWNLOADED state:
|
||||
|
|
|
@ -121,11 +121,8 @@ int CLIENT_STATE::app_finished(ACTIVE_TASK& at) {
|
|||
}
|
||||
|
||||
if (had_error) {
|
||||
// dead-end state indicating we had an error at end of computation;
|
||||
// do not move to RESULT_FILES_UPLOADING
|
||||
rp->state = RESULT_COMPUTE_DONE;
|
||||
rp->state = RESULT_COMPUTE_ERROR;
|
||||
} else {
|
||||
// can now upload files.
|
||||
rp->state = RESULT_FILES_UPLOADING;
|
||||
}
|
||||
PROJECT* p = rp->project;
|
||||
|
@ -178,8 +175,9 @@ bool CLIENT_STATE::handle_finished_apps() {
|
|||
return action;
|
||||
}
|
||||
|
||||
// Returns true if all the input files for a result are available
|
||||
// locally, false otherwise
|
||||
// Returns true if all the input files for a result are present
|
||||
// (both WU and app version)
|
||||
// false otherwise
|
||||
//
|
||||
bool CLIENT_STATE::input_files_available(RESULT* rp) {
|
||||
WORKUNIT* wup = rp->wup;
|
||||
|
@ -308,14 +306,6 @@ bool CLIENT_STATE::schedule_largest_debt_project(double expected_pay_off) {
|
|||
PROJECT* p = projects[i];
|
||||
if (!p->next_runnable_result) continue;
|
||||
if (p->non_cpu_intensive) continue;
|
||||
if (!input_files_available(projects[i]->next_runnable_result)) {
|
||||
report_result_error(
|
||||
*(p->next_runnable_result), ERR_FILE_MISSING,
|
||||
"One or more missing files"
|
||||
);
|
||||
p->next_runnable_result = NULL;
|
||||
continue;
|
||||
}
|
||||
if (first || p->anticipated_debt > best_debt) {
|
||||
first = false;
|
||||
best_project = p;
|
||||
|
@ -488,12 +478,13 @@ bool CLIENT_STATE::schedule_cpus() {
|
|||
) {
|
||||
retval = atp->resume_or_start();
|
||||
if (retval) {
|
||||
atp->state = PROCESS_COULDNT_START;
|
||||
atp->result->active_task_state = PROCESS_COULDNT_START;
|
||||
report_result_error(
|
||||
*(atp->result), retval,
|
||||
"Couldn't start the app for this result: error %d", retval
|
||||
*(atp->result), "Couldn't start or resume: %d", retval
|
||||
);
|
||||
|
||||
// if we couldn't run something, reschedule
|
||||
//
|
||||
must_schedule_cpus = true;
|
||||
continue;
|
||||
}
|
||||
atp->scheduler_state = CPU_SCHED_SCHEDULED;
|
||||
|
|
|
@ -365,7 +365,7 @@ double CLIENT_STATE::ettprc(PROJECT *p, int k) {
|
|||
) {
|
||||
RESULT *rp = *iter;
|
||||
if (rp->project != p
|
||||
|| rp->state >= RESULT_COMPUTE_DONE
|
||||
|| rp->state > RESULT_FILES_DOWNLOADED
|
||||
|| rp->ready_to_report
|
||||
) continue;
|
||||
if (num_results_to_skip > 0) {
|
||||
|
|
|
@ -145,6 +145,7 @@ int CLIENT_STATE::parse_state_file() {
|
|||
file_infos.push_back(fip);
|
||||
// If the file had a failure before,
|
||||
// don't start another file transfer
|
||||
//
|
||||
if (fip->had_failure(failnum)) {
|
||||
if (fip->pers_file_xfer) {
|
||||
delete fip->pers_file_xfer;
|
||||
|
|
|
@ -27,6 +27,7 @@ show_name("Brian Boshes");
|
|||
show_name("Karl Chen");
|
||||
show_name("Pietro Cicotti");
|
||||
show_name("Seth Cooper");
|
||||
show_name("Markku Degerholm");
|
||||
show_name("James Drews");
|
||||
show_name("Michael Gary");
|
||||
show_name("Gary Gibson");
|
||||
|
|
|
@ -258,6 +258,9 @@ int boinc_resolve_filename(const char *virtual_name, char *physical_name, int le
|
|||
fp = boinc_fopen(virtual_name, "r");
|
||||
if (!fp) return ERR_FOPEN;
|
||||
|
||||
// must initialize buf since fgets() on an empty file won't do anything
|
||||
//
|
||||
buf[0] = 0;
|
||||
fgets(buf, 512, fp);
|
||||
fclose(fp);
|
||||
|
||||
|
|
|
@ -260,4 +260,5 @@ int GLOBAL_PREFS::write(FILE* f) {
|
|||
max_bytes_sec_down
|
||||
);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -25,17 +25,17 @@
|
|||
// (because of the >= comparison in current_work_buf_days())
|
||||
//
|
||||
#define RESULT_NEW 0
|
||||
// New result, files may still need to be downloaded
|
||||
// New result
|
||||
#define RESULT_FILES_DOWNLOADING 1
|
||||
// Input files for result are being downloaded
|
||||
// Input files for result (WU, app version) are being downloaded
|
||||
#define RESULT_FILES_DOWNLOADED 2
|
||||
// Files are downloaded, result can be computed
|
||||
#define RESULT_COMPUTE_DONE 3
|
||||
// Computation is done, if no error then files need to be uploaded
|
||||
// Files are downloaded, result can be (or is being) computed
|
||||
#define RESULT_COMPUTE_ERROR 3
|
||||
// computation failed; no file upload
|
||||
#define RESULT_FILES_UPLOADING 4
|
||||
// Output files for result are being uploaded
|
||||
#define RESULT_FILES_UPLOADED 5
|
||||
// Files are uploaded, notify scheduling server
|
||||
// Files are uploaded, notify scheduling server at some point
|
||||
|
||||
#endif
|
||||
|
||||
|
|
Loading…
Reference in New Issue