2008-08-06 18:36:30 +00:00
|
|
|
// This file is part of BOINC.
|
2005-01-20 23:22:22 +00:00
|
|
|
// http://boinc.berkeley.edu
|
2008-08-06 18:36:30 +00:00
|
|
|
// Copyright (C) 2008 University of California
|
2003-08-02 00:02:11 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is free software; you can redistribute it and/or modify it
|
|
|
|
// under the terms of the GNU Lesser General Public License
|
|
|
|
// as published by the Free Software Foundation,
|
|
|
|
// either version 3 of the License, or (at your option) any later version.
|
2003-08-02 00:02:11 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is distributed in the hope that it will be useful,
|
2005-01-20 23:22:22 +00:00
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU Lesser General Public License for more details.
|
2002-04-30 22:22:54 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
2005-01-20 23:22:22 +00:00
|
|
|
|
2004-01-30 22:19:19 +00:00
|
|
|
// The "policy" part of task execution is here.
|
|
|
|
// The "mechanism" part is in app.C
|
2003-05-22 20:47:56 +00:00
|
|
|
//
|
2003-07-01 20:37:09 +00:00
|
|
|
|
2003-10-16 19:03:49 +00:00
|
|
|
#include "cpp.h"
|
2002-06-06 18:42:01 +00:00
|
|
|
|
2003-03-18 19:37:09 +00:00
|
|
|
#ifdef _WIN32
|
2004-06-16 23:16:08 +00:00
|
|
|
#include "boinc_win.h"
|
2010-05-11 19:10:29 +00:00
|
|
|
#else
|
2005-11-21 18:34:44 +00:00
|
|
|
#include "config.h"
|
2004-09-06 02:41:34 +00:00
|
|
|
#include <cassert>
|
2004-07-13 13:54:09 +00:00
|
|
|
#include <csignal>
|
2004-03-04 11:41:43 +00:00
|
|
|
#endif
|
2003-03-18 19:37:09 +00:00
|
|
|
|
2003-05-21 23:23:42 +00:00
|
|
|
#include "error_numbers.h"
|
|
|
|
#include "filesys.h"
|
2012-04-30 21:00:28 +00:00
|
|
|
#include "md5_file.h"
|
2003-03-17 23:35:00 +00:00
|
|
|
#include "shmem.h"
|
2012-04-30 21:00:28 +00:00
|
|
|
#include "util.h"
|
|
|
|
|
2004-04-08 08:15:23 +00:00
|
|
|
#include "client_msgs.h"
|
2002-04-30 22:22:54 +00:00
|
|
|
#include "client_state.h"
|
2012-04-30 21:00:28 +00:00
|
|
|
#include "file_names.h"
|
|
|
|
#include "log_flags.h"
|
|
|
|
#include "project.h"
|
|
|
|
#include "result.h"
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2004-06-30 18:17:21 +00:00
|
|
|
using std::vector;
|
|
|
|
|
2016-12-28 07:48:37 +00:00
|
|
|
// Clean up after finished apps.
|
|
|
|
// Called every second from the main polling loop.
|
2007-04-03 19:35:33 +00:00
|
|
|
//
|
|
|
|
bool CLIENT_STATE::handle_finished_apps() {
|
|
|
|
ACTIVE_TASK* atp;
|
|
|
|
bool action = false;
|
|
|
|
static double last_time = 0;
|
2013-03-15 03:43:29 +00:00
|
|
|
if (!clock_change && now - last_time < HANDLE_FINISHED_APPS_PERIOD) return false;
|
2007-05-10 16:23:10 +00:00
|
|
|
last_time = now;
|
2007-04-03 19:35:33 +00:00
|
|
|
|
2007-04-09 16:19:42 +00:00
|
|
|
vector<ACTIVE_TASK*>::iterator iter;
|
|
|
|
|
|
|
|
iter = active_tasks.active_tasks.begin();
|
|
|
|
while (iter != active_tasks.active_tasks.end()) {
|
|
|
|
atp = *iter;
|
2007-04-03 19:35:33 +00:00
|
|
|
switch (atp->task_state()) {
|
|
|
|
case PROCESS_EXITED:
|
|
|
|
case PROCESS_WAS_SIGNALED:
|
|
|
|
case PROCESS_EXIT_UNKNOWN:
|
|
|
|
case PROCESS_COULDNT_START:
|
|
|
|
case PROCESS_ABORTED:
|
|
|
|
if (log_flags.task) {
|
|
|
|
msg_printf(atp->wup->project, MSG_INFO,
|
|
|
|
"Computation for task %s finished", atp->result->name
|
|
|
|
);
|
|
|
|
}
|
|
|
|
app_finished(*atp);
|
2010-10-14 19:07:02 +00:00
|
|
|
if (!action) {
|
2013-03-24 18:22:01 +00:00
|
|
|
adjust_rec(); // update REC before erasing ACTIVE_TASK
|
2010-10-14 19:07:02 +00:00
|
|
|
}
|
2007-04-09 16:19:42 +00:00
|
|
|
iter = active_tasks.active_tasks.erase(iter);
|
2007-04-03 19:35:33 +00:00
|
|
|
delete atp;
|
|
|
|
set_client_state_dirty("handle_finished_apps");
|
|
|
|
|
|
|
|
// the following is critical; otherwise the result is
|
|
|
|
// still in the "scheduled" list and enforce_schedule()
|
|
|
|
// will try to run it again.
|
|
|
|
//
|
|
|
|
request_schedule_cpus("handle_finished_apps");
|
|
|
|
action = true;
|
2007-04-09 16:19:42 +00:00
|
|
|
break;
|
|
|
|
default:
|
2015-01-14 20:18:29 +00:00
|
|
|
++iter;
|
2007-04-03 19:35:33 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return action;
|
|
|
|
}
|
|
|
|
|
2002-04-30 22:22:54 +00:00
|
|
|
// Handle a task that has finished.
|
|
|
|
// Mark its output files as present, and delete scratch files.
|
|
|
|
// Don't delete input files because they might be shared with other WUs.
|
|
|
|
// Update state of result record.
|
|
|
|
//
|
|
|
|
int CLIENT_STATE::app_finished(ACTIVE_TASK& at) {
|
|
|
|
RESULT* rp = at.result;
|
2007-04-03 20:30:44 +00:00
|
|
|
bool had_error = false;
|
|
|
|
|
|
|
|
#ifndef SIM
|
2002-04-30 22:22:54 +00:00
|
|
|
FILE_INFO* fip;
|
|
|
|
unsigned int i;
|
2012-05-09 16:11:50 +00:00
|
|
|
char path[MAXPATHLEN];
|
2002-08-23 00:53:00 +00:00
|
|
|
int retval;
|
2003-05-21 23:23:42 +00:00
|
|
|
double size;
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2007-07-13 20:18:46 +00:00
|
|
|
// scan the output files, check if missing or too big.
|
|
|
|
// Don't bother doing this if result was aborted via GUI or by project
|
|
|
|
//
|
|
|
|
switch (rp->exit_status) {
|
2012-04-26 05:28:45 +00:00
|
|
|
case EXIT_ABORTED_VIA_GUI:
|
|
|
|
case EXIT_ABORTED_BY_PROJECT:
|
2007-07-13 20:18:46 +00:00
|
|
|
break;
|
|
|
|
default:
|
2005-01-21 23:26:36 +00:00
|
|
|
for (i=0; i<rp->output_files.size(); i++) {
|
2011-01-06 23:09:13 +00:00
|
|
|
FILE_REF& fref = rp->output_files[i];
|
2006-10-20 20:21:33 +00:00
|
|
|
fip = fref.file_info;
|
2005-04-28 23:19:58 +00:00
|
|
|
if (fip->uploaded) continue;
|
2007-03-13 19:33:27 +00:00
|
|
|
get_pathname(fip, path, sizeof(path));
|
2005-01-21 23:26:36 +00:00
|
|
|
retval = file_size(path, size);
|
|
|
|
if (retval) {
|
2011-01-06 23:09:13 +00:00
|
|
|
if (fref.optional) {
|
2011-07-20 19:12:10 +00:00
|
|
|
fip->upload_urls.clear();
|
2011-01-06 23:09:13 +00:00
|
|
|
continue;
|
|
|
|
}
|
2006-10-20 20:21:33 +00:00
|
|
|
|
2005-01-21 23:26:36 +00:00
|
|
|
// an output file is unexpectedly absent.
|
|
|
|
//
|
|
|
|
fip->status = retval;
|
|
|
|
had_error = true;
|
2007-02-05 03:37:17 +00:00
|
|
|
msg_printf(
|
|
|
|
rp->project, MSG_INFO,
|
|
|
|
"Output file %s for task %s absent",
|
|
|
|
fip->name, rp->name
|
|
|
|
);
|
2005-01-21 23:26:36 +00:00
|
|
|
} else if (size > fip->max_nbytes) {
|
|
|
|
// Note: this is only checked when the application finishes.
|
|
|
|
// The total disk space is checked while the application is running.
|
|
|
|
//
|
|
|
|
msg_printf(
|
|
|
|
rp->project, MSG_INFO,
|
2006-01-17 22:48:09 +00:00
|
|
|
"Output file %s for task %s exceeds size limit.",
|
2005-01-21 23:26:36 +00:00
|
|
|
fip->name, rp->name
|
|
|
|
);
|
|
|
|
msg_printf(
|
|
|
|
rp->project, MSG_INFO,
|
|
|
|
"File size: %f bytes. Limit: %f bytes",
|
|
|
|
size, fip->max_nbytes
|
|
|
|
);
|
|
|
|
|
|
|
|
fip->delete_file();
|
|
|
|
fip->status = ERR_FILE_TOO_BIG;
|
|
|
|
had_error = true;
|
2003-05-21 23:23:42 +00:00
|
|
|
} else {
|
2011-07-20 19:12:10 +00:00
|
|
|
if (!fip->uploadable() && !fip->sticky) {
|
2005-01-21 23:26:36 +00:00
|
|
|
fip->delete_file(); // sets status to NOT_PRESENT
|
2003-05-21 23:23:42 +00:00
|
|
|
} else {
|
2006-06-20 23:25:06 +00:00
|
|
|
retval = 0;
|
|
|
|
if (fip->gzip_when_done) {
|
|
|
|
retval = fip->gzip();
|
|
|
|
}
|
|
|
|
if (!retval) {
|
|
|
|
retval = md5_file(path, fip->md5_cksum, fip->nbytes);
|
|
|
|
}
|
2005-01-21 23:26:36 +00:00
|
|
|
if (retval) {
|
|
|
|
fip->status = retval;
|
|
|
|
had_error = true;
|
|
|
|
} else {
|
|
|
|
fip->status = FILE_PRESENT;
|
|
|
|
}
|
2003-05-21 23:23:42 +00:00
|
|
|
}
|
2002-11-19 22:57:05 +00:00
|
|
|
}
|
2002-04-30 22:22:54 +00:00
|
|
|
}
|
|
|
|
}
|
2007-04-03 19:35:33 +00:00
|
|
|
#endif
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2005-01-21 23:26:36 +00:00
|
|
|
if (rp->exit_status != 0) {
|
|
|
|
had_error = true;
|
|
|
|
}
|
|
|
|
|
2003-06-26 23:53:37 +00:00
|
|
|
if (had_error) {
|
2006-05-25 22:00:10 +00:00
|
|
|
switch (rp->exit_status) {
|
2012-04-26 05:28:45 +00:00
|
|
|
case EXIT_ABORTED_VIA_GUI:
|
|
|
|
case EXIT_ABORTED_BY_PROJECT:
|
2007-01-24 21:20:57 +00:00
|
|
|
rp->set_state(RESULT_ABORTED, "CS::app_finished");
|
2006-05-25 22:00:10 +00:00
|
|
|
break;
|
|
|
|
default:
|
2007-01-24 21:20:57 +00:00
|
|
|
rp->set_state(RESULT_COMPUTE_ERROR, "CS::app_finished");
|
2006-05-25 22:00:10 +00:00
|
|
|
}
|
2017-01-04 21:34:26 +00:00
|
|
|
rp->project->njobs_fail++;
|
2003-06-26 23:53:37 +00:00
|
|
|
} else {
|
2007-04-03 20:30:44 +00:00
|
|
|
#ifdef SIM
|
|
|
|
rp->set_state(RESULT_FILES_UPLOADED, "CS::app_finished");
|
2012-02-08 00:45:37 +00:00
|
|
|
rp->set_ready_to_report();
|
2007-04-03 20:30:44 +00:00
|
|
|
rp->completed_time = now;
|
|
|
|
#else
|
2007-01-24 21:20:57 +00:00
|
|
|
rp->set_state(RESULT_FILES_UPLOADING, "CS::app_finished");
|
2007-04-13 04:22:20 +00:00
|
|
|
rp->append_log_record();
|
2007-04-03 20:30:44 +00:00
|
|
|
#endif
|
2008-12-02 22:19:39 +00:00
|
|
|
rp->project->update_duration_correction_factor(&at);
|
2014-06-04 06:37:14 +00:00
|
|
|
rp->project->njobs_success++;
|
2003-06-26 23:53:37 +00:00
|
|
|
}
|
2004-06-30 01:10:22 +00:00
|
|
|
|
2013-03-24 18:22:01 +00:00
|
|
|
double elapsed_time = now - rec_interval_start;
|
2008-12-31 23:07:59 +00:00
|
|
|
work_fetch.accumulate_inst_sec(&at, elapsed_time);
|
2004-06-30 01:10:22 +00:00
|
|
|
|
2014-03-10 00:09:21 +00:00
|
|
|
rp->project->pwf.request_if_idle_and_uploading = true;
|
|
|
|
// set this to allow work fetch if idle instance,
|
|
|
|
// even before upload finishes
|
|
|
|
|
2002-04-30 22:22:54 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-02-06 06:06:44 +00:00
|
|
|
// Returns zero iff all the input files for a result are present
|
2004-10-07 19:18:37 +00:00
|
|
|
// (both WU and app version)
|
2006-06-20 17:36:28 +00:00
|
|
|
// Called from CLIENT_STATE::update_results (with verify=false)
|
|
|
|
// to transition result from DOWNLOADING to DOWNLOADED.
|
|
|
|
// Called from ACTIVE_TASK::start() (with verify=true)
|
|
|
|
// when project has verify_files_on_app_start set.
|
2002-07-15 23:21:20 +00:00
|
|
|
//
|
2007-11-07 19:32:32 +00:00
|
|
|
// If fipp is nonzero, return a pointer to offending FILE_INFO on error
|
|
|
|
//
|
|
|
|
int CLIENT_STATE::input_files_available(
|
2012-02-07 17:54:09 +00:00
|
|
|
RESULT* rp, bool verify_contents, FILE_INFO** fipp
|
2007-11-07 19:32:32 +00:00
|
|
|
) {
|
2002-04-30 22:22:54 +00:00
|
|
|
WORKUNIT* wup = rp->wup;
|
|
|
|
FILE_INFO* fip;
|
2002-12-05 21:56:33 +00:00
|
|
|
unsigned int i;
|
2002-05-17 22:33:57 +00:00
|
|
|
APP_VERSION* avp;
|
2003-08-01 21:50:09 +00:00
|
|
|
FILE_REF fr;
|
2004-03-30 23:05:34 +00:00
|
|
|
PROJECT* project = rp->project;
|
2006-06-20 19:34:42 +00:00
|
|
|
int retval;
|
2004-03-30 23:05:34 +00:00
|
|
|
|
2007-05-03 17:14:30 +00:00
|
|
|
avp = rp->avp;
|
2002-05-17 22:33:57 +00:00
|
|
|
for (i=0; i<avp->app_files.size(); i++) {
|
2003-08-01 21:50:09 +00:00
|
|
|
fr = avp->app_files[i];
|
|
|
|
fip = fr.file_info;
|
2007-11-07 19:32:32 +00:00
|
|
|
if (fip->status != FILE_PRESENT) {
|
|
|
|
if (fipp) *fipp = fip;
|
|
|
|
return ERR_FILE_MISSING;
|
|
|
|
}
|
2004-03-30 23:05:34 +00:00
|
|
|
|
2006-06-20 17:36:28 +00:00
|
|
|
// don't verify app files if using anonymous platform
|
2004-03-30 23:05:34 +00:00
|
|
|
//
|
2012-02-07 17:54:09 +00:00
|
|
|
if (verify_contents && !project->anonymous_platform) {
|
2012-02-08 19:30:57 +00:00
|
|
|
retval = fip->verify_file(true, true, false);
|
2007-11-07 19:32:32 +00:00
|
|
|
if (retval) {
|
|
|
|
if (fipp) *fipp = fip;
|
|
|
|
return retval;
|
|
|
|
}
|
2004-03-30 23:05:34 +00:00
|
|
|
}
|
2002-05-17 22:33:57 +00:00
|
|
|
}
|
2002-04-30 22:22:54 +00:00
|
|
|
|
|
|
|
for (i=0; i<wup->input_files.size(); i++) {
|
|
|
|
fip = wup->input_files[i].file_info;
|
2007-11-07 19:32:32 +00:00
|
|
|
if (fip->status != FILE_PRESENT) {
|
2011-07-20 19:12:10 +00:00
|
|
|
if (wup->input_files[i].optional) continue;
|
2007-11-07 19:32:32 +00:00
|
|
|
if (fipp) *fipp = fip;
|
|
|
|
return ERR_FILE_MISSING;
|
2012-02-07 17:54:09 +00:00
|
|
|
}
|
|
|
|
if (verify_contents) {
|
2012-02-08 19:30:57 +00:00
|
|
|
retval = fip->verify_file(true, true, false);
|
2011-07-20 19:12:10 +00:00
|
|
|
if (retval) {
|
|
|
|
if (fipp) *fipp = fip;
|
|
|
|
return retval;
|
|
|
|
}
|
2007-11-07 19:32:32 +00:00
|
|
|
}
|
2002-04-30 22:22:54 +00:00
|
|
|
}
|
2006-06-20 19:34:42 +00:00
|
|
|
return 0;
|
2002-04-30 22:22:54 +00:00
|
|
|
}
|
|
|
|
|
2003-10-19 01:48:32 +00:00
|
|
|
inline double force_fraction(double f) {
|
|
|
|
if (f < 0) return 0;
|
|
|
|
if (f > 1) return 1;
|
|
|
|
return f;
|
|
|
|
}
|
|
|
|
|
2004-04-07 06:51:42 +00:00
|
|
|
double CLIENT_STATE::get_fraction_done(RESULT* result) {
|
2003-10-19 01:48:32 +00:00
|
|
|
ACTIVE_TASK* atp = active_tasks.lookup_result(result);
|
|
|
|
return atp ? force_fraction(atp->fraction_done) : 0.0;
|
|
|
|
}
|
|
|
|
|
2007-05-03 17:14:30 +00:00
|
|
|
// Find latest version of app for given platform
|
|
|
|
// or -1 if can't find one
|
2003-12-24 21:50:41 +00:00
|
|
|
//
|
2007-05-03 17:14:30 +00:00
|
|
|
int CLIENT_STATE::latest_version(APP* app, char* platform) {
|
2003-10-19 01:48:32 +00:00
|
|
|
unsigned int i;
|
|
|
|
int best = -1;
|
2003-12-24 21:50:41 +00:00
|
|
|
|
2003-10-19 01:48:32 +00:00
|
|
|
for (i=0; i<app_versions.size(); i++) {
|
2007-05-03 17:14:30 +00:00
|
|
|
APP_VERSION* avp = app_versions[i];
|
|
|
|
if (avp->app != app) continue;
|
|
|
|
if (strcmp(platform, avp->platform)) continue;
|
2003-10-19 01:48:32 +00:00
|
|
|
if (avp->version_num < best) continue;
|
|
|
|
best = avp->version_num;
|
|
|
|
}
|
|
|
|
return best;
|
|
|
|
}
|
|
|
|
|
2007-02-21 22:27:35 +00:00
|
|
|
// Find the ACTIVE_TASK in the current set with the matching PID
|
|
|
|
//
|
|
|
|
ACTIVE_TASK* ACTIVE_TASK_SET::lookup_pid(int pid) {
|
|
|
|
unsigned int i;
|
|
|
|
ACTIVE_TASK* atp;
|
|
|
|
|
|
|
|
for (i=0; i<active_tasks.size(); i++) {
|
|
|
|
atp = active_tasks[i];
|
|
|
|
if (atp->pid == pid) return atp;
|
2006-07-10 00:46:07 +00:00
|
|
|
}
|
2007-02-21 22:27:35 +00:00
|
|
|
return NULL;
|
2006-07-10 00:46:07 +00:00
|
|
|
}
|
|
|
|
|
2007-02-21 22:27:35 +00:00
|
|
|
// Find the ACTIVE_TASK in the current set with the matching result
|
2007-02-21 16:26:51 +00:00
|
|
|
//
|
2007-02-21 22:27:35 +00:00
|
|
|
ACTIVE_TASK* ACTIVE_TASK_SET::lookup_result(RESULT* result) {
|
|
|
|
unsigned int i;
|
|
|
|
ACTIVE_TASK* atp;
|
|
|
|
|
|
|
|
for (i=0; i<active_tasks.size(); i++) {
|
|
|
|
atp = active_tasks[i];
|
|
|
|
if (atp->result == result) {
|
|
|
|
return atp;
|
2007-02-21 16:26:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|