From 37248491e4ffd2007e6b2f6527fde8686d31031e Mon Sep 17 00:00:00 2001 From: David Anderson Date: Tue, 20 Jun 2006 19:34:42 +0000 Subject: [PATCH] *** empty log message *** svn path=/trunk/boinc/; revision=10414 --- checkin_notes | 13 +++++ client/app_control.C | 1 + client/app_start.C | 125 +++++++++++++++++++++++++----------------- client/client_state.C | 7 ++- client/client_state.h | 2 +- client/cs_apps.C | 15 +++-- 6 files changed, 104 insertions(+), 59 deletions(-) diff --git a/checkin_notes b/checkin_notes index 977ff02b97..267ff968f8 100755 --- a/checkin_notes +++ b/checkin_notes @@ -6404,3 +6404,16 @@ David 20 June 2006 sched_config.C,h server_types.h validate_util2.C + +David 20 June 2006 + - Debug "verify files on app start" feature + - ACTIVE_TASK::start(): call report_result_error() + on any error condition; this puts the result in + the correct state so that it won't get started again. + - CLIENT_STATE::verify_input_files() returns int, not bool + + client/ + app_control.C + app_start.C + client_state.C,h + cs_apps.C diff --git a/client/app_control.C b/client/app_control.C index 95ca112977..774e74ce23 100644 --- a/client/app_control.C +++ b/client/app_control.C @@ -553,6 +553,7 @@ int ACTIVE_TASK::abort_task(int exit_status, const char* msg) { } result->exit_status = exit_status; gstate.report_result_error(*result, msg); + result->state = RESULT_ABORTED; return 0; } diff --git a/client/app_start.C b/client/app_start.C index 0e4d9dc5de..3a40c7d3d4 100644 --- a/client/app_start.C +++ b/client/app_start.C @@ -297,7 +297,12 @@ int ACTIVE_TASK::copy_output_files() { // // Current dir is top-level BOINC dir // -// postcondition: ACTIVE_TASK::task_state is set correctly +// postcondition: +// If any error occurs +// ACTIVE_TASK::task_state is PROCESS_COULDNT_START +// report_result_error() is called +// else +// ACTIVE_TASK::task_state is PROCESS_EXECUTING // int ACTIVE_TASK::start(bool first_time) { char exec_name[256], file_path[256], buf[256], exec_path[256]; @@ -305,13 +310,19 @@ int ACTIVE_TASK::start(bool first_time) { FILE_REF fref; FILE_INFO* fip; int retval; +#ifdef _WIN32 + std::string cmd_line; +#endif SCOPE_MSG_LOG scope_messages(log_messages, CLIENT_MSG_LOG::DEBUG_TASK); scope_messages.printf("ACTIVE_TASK::start(first_time=%d)\n", first_time); if (wup->project->verify_files_on_app_start) { retval = gstate.input_files_available(result, true); - if (retval) return retval; + if (retval) { + strcpy(buf, "Input file missing or invalid"); + goto error; + } } if (first_time) { @@ -326,10 +337,11 @@ int ACTIVE_TASK::start(bool first_time) { if (!app_client_shm.shm) { retval = get_shmem_seg_name(); if (retval) { - msg_printf(wup->project, MSG_ERROR, - "Can't get shared memory segment name: %s", boincerror(retval) + sprintf(buf, + "Can't get shared memory segment name: %s", + boincerror(retval) ); - return retval; + goto error; } } @@ -337,7 +349,10 @@ int ACTIVE_TASK::start(bool first_time) { // since the shmem name is part of the file // retval = write_app_init_file(); - if (retval) return retval; + if (retval) { + strcpy(buf, "Can't write init file"); + goto error; + } // set up applications files // @@ -348,16 +363,14 @@ int ACTIVE_TASK::start(bool first_time) { get_pathname(fip, file_path); if (fref.main_program) { if (is_image_file(fip->name)) { - msg_printf(wup->project, MSG_ERROR, - "Main program %s is an image file", fip->name - ); - return ERR_NO_SIGNATURE; + sprintf(buf, "Main program %s is an image file", fip->name); + retval = ERR_NO_SIGNATURE; + goto error; } if (!fip->executable && !wup->project->anonymous_platform) { - msg_printf(wup->project, MSG_ERROR, - "Main program %s is not executable", fip->name - ); - return ERR_NO_SIGNATURE; + sprintf(buf, "Main program %s is not executable", fip->name); + retval = ERR_NO_SIGNATURE; + goto error; } safe_strcpy(exec_name, fip->name); safe_strcpy(exec_path, file_path); @@ -367,14 +380,16 @@ int ACTIVE_TASK::start(bool first_time) { // if (first_time || wup->project->anonymous_platform) { retval = setup_file(wup, fip, fref, file_path, slot_dir); - if (retval) return retval; + if (retval) { + strcpy(buf, "Can't link input file"); + goto error; + } } } if (!strlen(exec_name)) { - msg_printf(wup->project, MSG_ERROR, - "No main program specified" - ); - return ERR_NOT_FOUND; + strcpy(buf, "No main program specified"); + retval = ERR_NOT_FOUND; + goto error; } // set up input, output files @@ -385,7 +400,10 @@ int ACTIVE_TASK::start(bool first_time) { fip = fref.file_info; get_pathname(fref.file_info, file_path); retval = setup_file(wup, fip, fref, file_path, slot_dir); - if (retval) return retval; + if (retval) { + strcpy(buf, "Can't link input file"); + goto error; + } } for (i=0; ioutput_files.size(); i++) { fref = result->output_files[i]; @@ -393,7 +411,10 @@ int ACTIVE_TASK::start(bool first_time) { fip = fref.file_info; get_pathname(fref.file_info, file_path); retval = setup_file(wup, fip, fref, file_path, slot_dir); - if (retval) return retval; + if (retval) { + strcpy(buf, "Can't link output file"); + goto error; + } } } @@ -403,7 +424,6 @@ int ACTIVE_TASK::start(bool first_time) { PROCESS_INFORMATION process_info; STARTUPINFO startup_info; char slotdirpath[256]; - std::string cmd_line; char error_msg[1024]; memset(&process_info, 0, sizeof(process_info)); @@ -415,7 +435,11 @@ int ACTIVE_TASK::start(bool first_time) { if (!quitRequestEvent) { sprintf(buf, "%s%s", QUIT_PREFIX, shmem_seg_name); quitRequestEvent = CreateEvent(0, FALSE, FALSE, buf); - if (quitRequestEvent == NULL) return ERR_INVALID_EVENT; + if (quitRequestEvent == NULL) { + strcpy(buf, "Can't create event"); + retval = ERR_INVALID_EVENT; + goto error; + } } // create core/app share mem segment if needed @@ -425,7 +449,11 @@ int ACTIVE_TASK::start(bool first_time) { shm_handle = create_shmem(buf, sizeof(SHARED_MEM), (void **)&app_client_shm.shm, false ); - if (shm_handle == NULL) return ERR_SHMGET; + if (shm_handle == NULL) { + strcpy(buf, "Can't create shared memory"); + retval = ERR_SHMGET; + goto error; + } } app_client_shm.reset_msgs(); @@ -456,9 +484,9 @@ int ACTIVE_TASK::start(bool first_time) { boinc_sleep(drand()); } if (!success) { - task_state = PROCESS_COULDNT_START; - gstate.report_result_error(*result, "CreateProcess() failed - %s", error_msg); - return ERR_EXEC; + sprintf(buf, "CreateProcess() failed - %s", error_msg); + retval = ERR_EXEC; + goto error; } pid = process_info.dwProcessId; pid_handle = process_info.hProcess; @@ -476,11 +504,8 @@ int ACTIVE_TASK::start(bool first_time) { shmem_seg_name, sizeof(SHARED_MEM), (void**)&app_client_shm.shm ); if (retval) { - msg_printf( - wup->project, MSG_ERROR, - "Can't create shared memory: %s", boincerror(retval) - ); - return retval; + sprintf(buf, "Can't create shared memory: %s", boincerror(retval)); + goto error; } } app_client_shm.reset_msgs(); @@ -492,10 +517,8 @@ int ACTIVE_TASK::start(bool first_time) { // retval = chdir(slot_dir); if (retval) { - msg_printf(wup->project, MSG_ERROR, - "Can't change directory: %s", slot_dir, boincerror(retval) - ); - return retval; + sprintf(buf, "Can't change directory: %s", slot_dir, boincerror(retval)); + goto error; } // hook up stderr to a specially-named file @@ -510,11 +533,10 @@ int ACTIVE_TASK::start(bool first_time) { sprintf(buf, "../../%s", exec_path ); pid = spawnv(P_NOWAIT, buf, argv); if (pid == -1) { - msg_printf(wup->project, MSG_ERROR, - "Process creation failed: %s\n", buf, boincerror(retval) - ); + sprintf(buf, "Process creation failed: %s\n", buf, boincerror(retval)); chdir(current_dir); - return ERR_EXEC; + retval = ERR_EXEC; + goto error; } // restore current dir @@ -537,11 +559,8 @@ int ACTIVE_TASK::start(bool first_time) { (void**)&app_client_shm.shm ); if (retval) { - msg_printf( - wup->project, MSG_ERROR, - "Can't create shared memory: %s", boincerror(retval) - ); - return retval; + sprintf(buf, "Can't create shared memory: %s", boincerror(retval)); + goto error; } } app_client_shm.reset_msgs(); @@ -553,12 +572,9 @@ int ACTIVE_TASK::start(bool first_time) { pid = fork(); if (pid == -1) { - task_state = PROCESS_COULDNT_START; - gstate.report_result_error(*result, "fork() failed: %s", strerror(errno)); - msg_printf(wup->project, MSG_ERROR, - "Process creation failed: %s", strerror(errno) - ); - return ERR_FORK; + sprintf(buf, "fork() failed: %s", strerror(errno)); + retval = ERR_FORK; + goto error; } if (pid == 0) { // from here on we're running in a new process. @@ -614,6 +630,13 @@ int ACTIVE_TASK::start(bool first_time) { #endif task_state = PROCESS_EXECUTING; return 0; + + // go here on error; "buf" contains error message, "retval" is nonzero + // +error: + gstate.report_result_error(*result, buf); + task_state = PROCESS_COULDNT_START; + return retval; } // Resume the task if it was previously running; otherwise start it diff --git a/client/client_state.C b/client/client_state.C index 302d8e86a7..21f8ca445d 100644 --- a/client/client_state.C +++ b/client/client_state.C @@ -1012,6 +1012,7 @@ bool CLIENT_STATE::update_results() { vector::iterator result_iter; bool action = false; static double last_time=0; + int retval; if (gstate.now - last_time < 1.0) return false; last_time = gstate.now; @@ -1026,7 +1027,8 @@ bool CLIENT_STATE::update_results() { action = true; break; case RESULT_FILES_DOWNLOADING: - if (input_files_available(rp, false)) { + retval = input_files_available(rp, false); + if (!retval) { rp->state = RESULT_FILES_DOWNLOADED; if (rp->wup->avp->app_files.size()==0) { // if this is a file-transfer app, start the upload phase @@ -1089,6 +1091,8 @@ bool CLIENT_STATE::time_to_exit() { // - back off on contacting the project's scheduler // (so don't crash over and over) // - Append a description of the error to result.stderr_out +// - If result state is FILES_DOWNLOADED, change it to COMPUTE_ERROR +// so that we don't try to run it again. // int CLIENT_STATE::report_result_error(RESULT& res, const char* format, ...) { char buf[4096], err_msg[4096]; @@ -1138,6 +1142,7 @@ int CLIENT_STATE::report_result_error(RESULT& res, const char* format, ...) { // ACTIVE_TASK::abort_task (if exceeded resource limit) // CLIENT_STATE::schedule_cpus (catch-all for resume/start errors) // + res.state = RESULT_COMPUTE_ERROR; if (!res.exit_status) { res.exit_status = ERR_RESULT_START; } diff --git a/client/client_state.h b/client/client_state.h index 97776a51b6..2712b59241 100644 --- a/client/client_state.h +++ b/client/client_state.h @@ -315,7 +315,7 @@ public: void set_ncpus(); double estimate_cpu_time(WORKUNIT&); double get_fraction_done(RESULT* result); - bool input_files_available(RESULT*, bool); + int input_files_available(RESULT*, bool); ACTIVE_TASK* get_next_graphics_capable_app(); int ncpus; private: diff --git a/client/cs_apps.C b/client/cs_apps.C index 3c1d61bdb7..8de78c9b18 100644 --- a/client/cs_apps.C +++ b/client/cs_apps.C @@ -201,34 +201,37 @@ bool CLIENT_STATE::handle_finished_apps() { // Called from ACTIVE_TASK::start() (with verify=true) // when project has verify_files_on_app_start set. // -bool CLIENT_STATE::input_files_available(RESULT* rp, bool verify) { +int CLIENT_STATE::input_files_available(RESULT* rp, bool verify) { WORKUNIT* wup = rp->wup; FILE_INFO* fip; unsigned int i; APP_VERSION* avp; FILE_REF fr; PROJECT* project = rp->project; + int retval; avp = wup->avp; for (i=0; iapp_files.size(); i++) { fr = avp->app_files[i]; fip = fr.file_info; - if (fip->status != FILE_PRESENT) return false; + if (fip->status != FILE_PRESENT) return ERR_FILE_MISSING; // don't verify app files if using anonymous platform // if (!project->anonymous_platform) { - if (fip->verify_file(verify)) return false; + retval = fip->verify_file(verify); + if (retval) return retval; } } for (i=0; iinput_files.size(); i++) { fip = wup->input_files[i].file_info; if (fip->generated_locally) continue; - if (fip->status != FILE_PRESENT) return false; - if (fip->verify_file(verify)) return false; + if (fip->status != FILE_PRESENT) return ERR_FILE_MISSING; + retval = fip->verify_file(verify); + if (retval) return retval; } - return true; + return 0; }