2003-07-01 20:37:09 +00:00
|
|
|
// The contents of this file are subject to the BOINC Public License
|
2002-04-30 22:22:54 +00:00
|
|
|
// Version 1.0 (the "License"); you may not use this file except in
|
|
|
|
// compliance with the License. You may obtain a copy of the License at
|
2003-07-01 20:37:09 +00:00
|
|
|
// http://boinc.berkeley.edu/license_1.0.txt
|
2003-08-02 00:02:11 +00:00
|
|
|
//
|
2002-04-30 22:22:54 +00:00
|
|
|
// Software distributed under the License is distributed on an "AS IS"
|
|
|
|
// basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
|
|
|
// License for the specific language governing rights and limitations
|
2003-08-02 00:02:11 +00:00
|
|
|
// under the License.
|
|
|
|
//
|
|
|
|
// The Original Code is the Berkeley Open Infrastructure for Network Computing.
|
|
|
|
//
|
2002-04-30 22:22:54 +00:00
|
|
|
// The Initial Developer of the Original Code is the SETI@home project.
|
2003-07-01 20:37:09 +00:00
|
|
|
// Portions created by the SETI@home project are Copyright (C) 2002
|
2003-08-02 00:02:11 +00:00
|
|
|
// University of California at Berkeley. All Rights Reserved.
|
|
|
|
//
|
2002-04-30 22:22:54 +00:00
|
|
|
// Contributor(s):
|
|
|
|
//
|
2004-01-30 22:19:19 +00:00
|
|
|
// The "policy" part of task execution is here.
|
|
|
|
// The "mechanism" part is in app.C
|
2003-05-22 20:47:56 +00:00
|
|
|
//
|
2003-07-01 20:37:09 +00:00
|
|
|
|
2003-10-16 19:03:49 +00:00
|
|
|
#include "cpp.h"
|
2002-06-06 18:42:01 +00:00
|
|
|
|
2003-03-18 19:37:09 +00:00
|
|
|
#ifdef _WIN32
|
2004-06-16 23:16:08 +00:00
|
|
|
#include "boinc_win.h"
|
2003-03-18 19:37:09 +00:00
|
|
|
#endif
|
2004-03-04 11:41:43 +00:00
|
|
|
|
|
|
|
#ifndef _WIN32
|
2004-07-13 13:54:09 +00:00
|
|
|
#include <csignal>
|
2004-03-04 11:41:43 +00:00
|
|
|
#endif
|
2003-03-18 19:37:09 +00:00
|
|
|
|
2002-04-30 22:22:54 +00:00
|
|
|
#include "md5_file.h"
|
2004-04-07 07:21:35 +00:00
|
|
|
#include "util.h"
|
2003-05-21 23:23:42 +00:00
|
|
|
#include "error_numbers.h"
|
2002-04-30 22:22:54 +00:00
|
|
|
#include "file_names.h"
|
2003-05-21 23:23:42 +00:00
|
|
|
#include "filesys.h"
|
2003-03-17 23:35:00 +00:00
|
|
|
#include "shmem.h"
|
2003-07-02 02:02:18 +00:00
|
|
|
#include "log_flags.h"
|
2004-04-08 08:15:23 +00:00
|
|
|
#include "client_msgs.h"
|
2002-04-30 22:22:54 +00:00
|
|
|
#include "client_state.h"
|
|
|
|
|
2004-06-30 18:17:21 +00:00
|
|
|
using std::vector;
|
|
|
|
|
2002-08-22 21:29:58 +00:00
|
|
|
|
2004-05-05 17:48:39 +00:00
|
|
|
// Quit running applications, quit benchmarks,
|
|
|
|
// write the client_state.xml file
|
|
|
|
// (should we also terminate net_xfers here?)
|
2002-08-22 21:29:58 +00:00
|
|
|
//
|
2004-05-05 17:48:39 +00:00
|
|
|
int CLIENT_STATE::quit_activities() {
|
2002-08-22 21:29:58 +00:00
|
|
|
int retval;
|
2003-01-07 01:02:08 +00:00
|
|
|
|
2003-05-22 20:47:56 +00:00
|
|
|
retval = active_tasks.exit_tasks();
|
2002-08-22 21:29:58 +00:00
|
|
|
if (retval) {
|
2004-05-05 17:48:39 +00:00
|
|
|
msg_printf(NULL, MSG_ERROR, "CLIENT_STATE.quit_activities: exit_tasks failed\n");
|
2002-08-22 21:29:58 +00:00
|
|
|
}
|
2002-11-20 20:14:48 +00:00
|
|
|
retval = write_state_file();
|
2003-03-12 18:15:48 +00:00
|
|
|
if (retval) {
|
2004-05-05 17:48:39 +00:00
|
|
|
msg_printf(NULL, MSG_ERROR, "CLIENT_STATE.quit_activities: write_state_file failed\n");
|
2002-11-20 20:14:48 +00:00
|
|
|
}
|
2004-03-21 00:10:15 +00:00
|
|
|
abort_cpu_benchmarks();
|
2002-08-22 21:29:58 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2002-04-30 22:22:54 +00:00
|
|
|
// Handle a task that has finished.
|
|
|
|
// Mark its output files as present, and delete scratch files.
|
|
|
|
// Don't delete input files because they might be shared with other WUs.
|
|
|
|
// Update state of result record.
|
|
|
|
//
|
|
|
|
int CLIENT_STATE::app_finished(ACTIVE_TASK& at) {
|
|
|
|
RESULT* rp = at.result;
|
|
|
|
FILE_INFO* fip;
|
|
|
|
unsigned int i;
|
2003-06-03 22:47:15 +00:00
|
|
|
char path[256];
|
2002-08-23 00:53:00 +00:00
|
|
|
int retval;
|
2003-05-21 23:23:42 +00:00
|
|
|
double size;
|
2004-06-30 01:10:22 +00:00
|
|
|
double task_cpu_time;
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2003-06-26 23:53:37 +00:00
|
|
|
bool had_error = false;
|
|
|
|
|
2004-03-25 22:24:33 +00:00
|
|
|
if (at.exit_status != 0 && at.exit_status != ERR_QUIT_REQUEST) {
|
|
|
|
had_error = true;
|
|
|
|
}
|
2003-06-30 22:02:57 +00:00
|
|
|
|
2002-04-30 22:22:54 +00:00
|
|
|
for (i=0; i<rp->output_files.size(); i++) {
|
|
|
|
fip = rp->output_files[i].file_info;
|
2003-05-21 23:23:42 +00:00
|
|
|
get_pathname(fip, path);
|
|
|
|
retval = file_size(path, size);
|
|
|
|
if (retval) {
|
|
|
|
// an output file is unexpectedly absent.
|
2003-06-26 23:53:37 +00:00
|
|
|
//
|
2003-05-21 23:23:42 +00:00
|
|
|
fip->status = retval;
|
2003-06-26 23:53:37 +00:00
|
|
|
had_error = true;
|
|
|
|
} else if (size > fip->max_nbytes) {
|
2004-03-25 22:24:33 +00:00
|
|
|
// Note: this is only checked when the application finishes.
|
|
|
|
// The total disk space is checked while the application is running.
|
2003-07-29 23:26:32 +00:00
|
|
|
//
|
|
|
|
msg_printf(
|
|
|
|
rp->project, MSG_INFO,
|
|
|
|
"Output file %s for result %s exceeds size limit.",
|
|
|
|
fip->name, rp->name
|
|
|
|
);
|
2003-05-22 20:47:56 +00:00
|
|
|
|
2003-06-26 23:53:37 +00:00
|
|
|
fip->delete_file();
|
|
|
|
fip->status = ERR_FILE_TOO_BIG;
|
|
|
|
had_error = true;
|
|
|
|
} else {
|
|
|
|
if (!fip->upload_when_present && !fip->sticky) {
|
|
|
|
fip->delete_file(); // sets status to NOT_PRESENT
|
2003-05-21 23:23:42 +00:00
|
|
|
} else {
|
2003-06-26 23:53:37 +00:00
|
|
|
retval = md5_file(path, fip->md5_cksum, fip->nbytes);
|
|
|
|
if (retval) {
|
|
|
|
fip->status = retval;
|
|
|
|
had_error = true;
|
2003-05-21 23:23:42 +00:00
|
|
|
} else {
|
2003-06-26 23:53:37 +00:00
|
|
|
fip->status = FILE_PRESENT;
|
2003-05-21 23:23:42 +00:00
|
|
|
}
|
2002-11-19 22:57:05 +00:00
|
|
|
}
|
2002-04-30 22:22:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2003-06-26 23:53:37 +00:00
|
|
|
rp->is_active = false;
|
|
|
|
if (had_error) {
|
|
|
|
// dead-end state indicating we had an error at end of computation;
|
|
|
|
// do not move to RESULT_FILES_UPLOADING
|
|
|
|
rp->state = RESULT_COMPUTE_DONE;
|
|
|
|
} else {
|
|
|
|
// can now upload files.
|
|
|
|
rp->state = RESULT_FILES_UPLOADING;
|
|
|
|
}
|
2004-04-07 07:21:35 +00:00
|
|
|
PROJECT* p = rp->project;
|
|
|
|
update_average(
|
|
|
|
dtime()-rp->final_cpu_time, // KLUDGE - should be result start time
|
|
|
|
rp->final_cpu_time,
|
|
|
|
CPU_HALF_LIFE,
|
|
|
|
p->exp_avg_cpu,
|
|
|
|
p->exp_avg_mod_time
|
|
|
|
);
|
2004-06-30 01:10:22 +00:00
|
|
|
|
|
|
|
task_cpu_time = at.current_cpu_time - at.cpu_time_at_last_sched;
|
|
|
|
at.result->project->work_done_this_period += task_cpu_time;
|
|
|
|
cpu_sched_work_done_this_period += task_cpu_time;
|
|
|
|
|
2002-04-30 22:22:54 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2003-05-22 20:47:56 +00:00
|
|
|
// clean up after finished apps
|
2002-04-30 22:22:54 +00:00
|
|
|
//
|
2003-05-22 20:47:56 +00:00
|
|
|
bool CLIENT_STATE::handle_finished_apps() {
|
2002-04-30 22:22:54 +00:00
|
|
|
unsigned int i;
|
|
|
|
ACTIVE_TASK* atp;
|
|
|
|
bool action = false;
|
|
|
|
|
2004-04-08 08:15:23 +00:00
|
|
|
SCOPE_MSG_LOG scope_messages(log_messages, CLIENT_MSG_LOG::DEBUG_TASK);
|
2003-07-02 02:02:18 +00:00
|
|
|
|
2002-04-30 22:22:54 +00:00
|
|
|
for (i=0; i<active_tasks.active_tasks.size(); i++) {
|
|
|
|
atp = active_tasks.active_tasks[i];
|
2004-06-30 01:10:22 +00:00
|
|
|
if (atp->scheduler_state != CPU_SCHED_RUNNING) continue;
|
2003-05-22 20:47:56 +00:00
|
|
|
switch (atp->state) {
|
2004-08-07 00:30:34 +00:00
|
|
|
case PROCESS_UNINITIALIZED:
|
2003-05-22 20:47:56 +00:00
|
|
|
case PROCESS_RUNNING:
|
|
|
|
case PROCESS_ABORT_PENDING:
|
2004-04-08 18:12:01 +00:00
|
|
|
case PROCESS_IN_LIMBO:
|
2003-05-22 20:47:56 +00:00
|
|
|
break;
|
|
|
|
default:
|
2003-06-03 22:47:15 +00:00
|
|
|
msg_printf(atp->wup->project, MSG_INFO, "Computation for result %s finished", atp->wup->name);
|
2003-07-29 23:26:32 +00:00
|
|
|
scope_messages.printf(
|
|
|
|
"CLIENT_STATE::handle_finished_apps(): task finished; pid %d, status %d\n",
|
|
|
|
atp->pid, atp->exit_status
|
|
|
|
);
|
2002-04-30 22:22:54 +00:00
|
|
|
app_finished(*atp);
|
2003-03-19 18:46:58 +00:00
|
|
|
active_tasks.remove(atp);
|
2002-04-30 22:22:54 +00:00
|
|
|
delete atp;
|
2002-08-22 21:29:58 +00:00
|
|
|
set_client_state_dirty("handle_running_apps");
|
2002-04-30 22:22:54 +00:00
|
|
|
action = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return action;
|
|
|
|
}
|
|
|
|
|
2002-07-15 23:21:20 +00:00
|
|
|
// Returns true if all the input files for a result are available
|
|
|
|
// locally, false otherwise
|
|
|
|
//
|
2002-04-30 22:22:54 +00:00
|
|
|
bool CLIENT_STATE::input_files_available(RESULT* rp) {
|
|
|
|
WORKUNIT* wup = rp->wup;
|
|
|
|
FILE_INFO* fip;
|
2002-12-05 21:56:33 +00:00
|
|
|
unsigned int i;
|
2002-05-17 22:33:57 +00:00
|
|
|
APP_VERSION* avp;
|
2003-08-01 21:50:09 +00:00
|
|
|
FILE_REF fr;
|
2004-03-30 23:05:34 +00:00
|
|
|
PROJECT* project = rp->project;
|
|
|
|
|
2002-05-17 22:33:57 +00:00
|
|
|
avp = wup->avp;
|
|
|
|
for (i=0; i<avp->app_files.size(); i++) {
|
2003-08-01 21:50:09 +00:00
|
|
|
fr = avp->app_files[i];
|
|
|
|
fip = fr.file_info;
|
2002-08-12 21:54:19 +00:00
|
|
|
if (fip->status != FILE_PRESENT) return false;
|
2004-03-30 23:05:34 +00:00
|
|
|
|
|
|
|
// don't check file size for anonymous platform
|
|
|
|
//
|
|
|
|
if (!project->anonymous_platform) {
|
|
|
|
if (!fip->verify_existing_file()) return false;
|
|
|
|
}
|
2002-05-17 22:33:57 +00:00
|
|
|
}
|
2002-04-30 22:22:54 +00:00
|
|
|
|
|
|
|
for (i=0; i<wup->input_files.size(); i++) {
|
|
|
|
fip = wup->input_files[i].file_info;
|
2002-08-12 21:54:19 +00:00
|
|
|
if (fip->status != FILE_PRESENT) return false;
|
2004-03-24 23:33:46 +00:00
|
|
|
if (!fip->verify_existing_file()) return false;
|
2002-04-30 22:22:54 +00:00
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2004-06-30 01:10:22 +00:00
|
|
|
|
|
|
|
// Return true iff there are fewer running tasks than available CPUs
|
|
|
|
//
|
|
|
|
bool CLIENT_STATE::have_free_cpu() {
|
|
|
|
int num_running_tasks = 0;
|
|
|
|
for (unsigned int i=0; i<active_tasks.active_tasks.size(); ++i) {
|
|
|
|
if (active_tasks.active_tasks[i]->scheduler_state == CPU_SCHED_RUNNING) {
|
|
|
|
++num_running_tasks;
|
2003-08-07 21:45:58 +00:00
|
|
|
}
|
|
|
|
}
|
2004-06-30 01:10:22 +00:00
|
|
|
return num_running_tasks < ncpus;
|
2003-08-07 21:45:58 +00:00
|
|
|
}
|
|
|
|
|
2004-06-30 01:10:22 +00:00
|
|
|
// Choose the next runnable result for each project with this
|
|
|
|
// preference order:
|
|
|
|
// 1. results with active tasks that are running
|
|
|
|
// 2. results with active tasks that are preempted (but have a process)
|
|
|
|
// 3. results with active tasks that have no process
|
|
|
|
// 4. results with no active task
|
2002-04-30 22:22:54 +00:00
|
|
|
//
|
2004-06-30 01:10:22 +00:00
|
|
|
void CLIENT_STATE::assign_results_to_projects() {
|
|
|
|
|
2004-07-06 18:09:54 +00:00
|
|
|
// Before assigning a result to an active task, check if that result is a file xfer
|
|
|
|
// this will be appearent by the lack of files associated with the workunit's app
|
|
|
|
// Running this function will find these results and mark them as completed.
|
2004-07-13 00:13:29 +00:00
|
|
|
//
|
2004-07-06 18:09:11 +00:00
|
|
|
handle_file_xfer_apps();
|
2004-07-06 17:37:58 +00:00
|
|
|
|
2004-06-30 01:10:22 +00:00
|
|
|
for (unsigned int i=0; i<active_tasks.active_tasks.size(); ++i) {
|
|
|
|
ACTIVE_TASK *atp = active_tasks.active_tasks[i];
|
|
|
|
if (atp->result->already_selected) continue;
|
|
|
|
PROJECT *p = atp->wup->project;
|
|
|
|
if (p->next_runnable_result == NULL) {
|
|
|
|
p->next_runnable_result = atp->result;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
// any next_runnable_result assigned so far should have an active task
|
|
|
|
ACTIVE_TASK *next_atp = lookup_active_task_by_result(p->next_runnable_result);
|
2004-06-30 07:21:52 +00:00
|
|
|
//assert(next_atp != NULL);
|
2004-06-30 01:10:22 +00:00
|
|
|
if ((next_atp->state == PROCESS_UNINITIALIZED
|
|
|
|
&& atp->state == PROCESS_RUNNING) ||
|
|
|
|
(next_atp->scheduler_state == CPU_SCHED_PREEMPTED
|
|
|
|
&& atp->state == CPU_SCHED_RUNNING)
|
|
|
|
){
|
|
|
|
p->next_runnable_result = atp->result;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Note: !results[i]->is_active is true for results with preempted
|
|
|
|
// active tasks, but all of those were already been considered in the
|
|
|
|
// previous loop.
|
|
|
|
// So p->next_runnable_result will not be NULL if there were any.
|
2004-07-13 00:13:29 +00:00
|
|
|
//
|
2004-06-30 01:10:22 +00:00
|
|
|
for (unsigned int i=0; i<results.size(); ++i) {
|
|
|
|
if (results[i]->already_selected) continue;
|
|
|
|
PROJECT *p = results[i]->wup->project;
|
|
|
|
if (p->next_runnable_result == NULL
|
|
|
|
&& !results[i]->is_active
|
|
|
|
&& results[i]->state == RESULT_FILES_DOWNLOADED
|
|
|
|
){
|
|
|
|
p->next_runnable_result = results[i];
|
|
|
|
}
|
|
|
|
}
|
2003-10-22 23:11:49 +00:00
|
|
|
|
2004-06-30 01:10:22 +00:00
|
|
|
// mark selected results, so CPU scheduler won't try to consider
|
|
|
|
// a result more than once (i.e. for running on another CPU)
|
|
|
|
//
|
|
|
|
for (unsigned int i=0; i<projects.size(); ++i) {
|
2004-07-06 20:26:04 +00:00
|
|
|
if (projects[i]->next_runnable_result != NULL) {
|
2004-06-30 01:10:22 +00:00
|
|
|
projects[i]->next_runnable_result->already_selected = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2003-01-07 22:49:42 +00:00
|
|
|
|
2004-06-30 01:10:22 +00:00
|
|
|
// Schedule an active task for the project with the largest anticipated debt
|
|
|
|
// among those that have a runnable result. Return true iff a task was
|
|
|
|
// scheduled.
|
|
|
|
//
|
|
|
|
bool CLIENT_STATE::schedule_largest_debt_project(double expected_pay_off) {
|
|
|
|
PROJECT *best_project = NULL;
|
2004-06-30 07:21:52 +00:00
|
|
|
double best_debt = 0.; // initial value doesn't matter
|
2004-06-30 01:10:22 +00:00
|
|
|
bool first = true;
|
|
|
|
|
|
|
|
for (unsigned int i=0; i < projects.size(); ++i) {
|
|
|
|
if (projects[i]->next_runnable_result == NULL) continue;
|
|
|
|
if (!input_files_available(projects[i]->next_runnable_result)) {
|
2003-08-07 21:45:58 +00:00
|
|
|
report_result_error(
|
2004-06-30 01:10:22 +00:00
|
|
|
*(projects[i]->next_runnable_result), ERR_FILE_MISSING,
|
|
|
|
"One or more missing files"
|
2003-10-30 20:00:30 +00:00
|
|
|
);
|
2004-06-30 01:10:22 +00:00
|
|
|
projects[i]->next_runnable_result = NULL;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (first || projects[i]->anticipated_debt > best_debt) {
|
|
|
|
first = false;
|
|
|
|
best_project = projects[i];
|
|
|
|
best_debt = best_project->anticipated_debt;
|
2002-04-30 22:22:54 +00:00
|
|
|
}
|
|
|
|
}
|
2004-06-30 01:10:22 +00:00
|
|
|
if (!best_project) return false;
|
|
|
|
|
|
|
|
ACTIVE_TASK *atp = lookup_active_task_by_result(best_project->next_runnable_result);
|
|
|
|
if (!atp) {
|
|
|
|
atp = new ACTIVE_TASK;
|
|
|
|
atp->init(best_project->next_runnable_result);
|
|
|
|
atp->slot = active_tasks.get_free_slot();
|
|
|
|
get_slot_dir(atp->slot, atp->slot_dir);
|
|
|
|
atp->result->is_active = true;
|
|
|
|
active_tasks.active_tasks.push_back(atp);
|
|
|
|
}
|
|
|
|
best_project->anticipated_debt -= expected_pay_off;
|
|
|
|
best_project->next_runnable_result = false;
|
|
|
|
atp->next_scheduler_state = CPU_SCHED_RUNNING;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Schedule active tasks to be run and preempted.
|
|
|
|
//
|
|
|
|
// This is called in the do_something() loop
|
|
|
|
// (with must_reschedule=false)
|
|
|
|
// and whenever all the files for a result finish downloading
|
|
|
|
// (with must_reschedule=true)
|
|
|
|
//
|
|
|
|
bool CLIENT_STATE::schedule_cpus(bool must_reschedule) {
|
|
|
|
double expected_pay_off;
|
|
|
|
vector<ACTIVE_TASK*>::iterator iter;
|
2004-07-13 00:13:29 +00:00
|
|
|
ACTIVE_TASK *atp;
|
|
|
|
PROJECT *p;
|
2004-07-02 18:49:39 +00:00
|
|
|
bool some_app_started = false;
|
2004-07-14 20:55:26 +00:00
|
|
|
double adjusted_total_resource_share;
|
2004-06-30 01:10:22 +00:00
|
|
|
int retval, elapsed_time;
|
2004-07-06 20:26:04 +00:00
|
|
|
double max_debt = SECONDS_PER_DAY * ncpus;
|
2004-08-06 23:17:59 +00:00
|
|
|
double vm_limit;
|
2004-06-30 01:10:22 +00:00
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
elapsed_time = time(NULL) - cpu_sched_last_time;
|
2004-07-01 22:14:48 +00:00
|
|
|
if ((elapsed_time < cpu_sched_period
|
|
|
|
&& !have_free_cpu()
|
|
|
|
&& !must_reschedule)
|
|
|
|
|| projects.size() < 1
|
2004-07-01 22:21:33 +00:00
|
|
|
|| results.size() < 1
|
2004-06-30 01:10:22 +00:00
|
|
|
) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2004-07-19 20:37:45 +00:00
|
|
|
// tell app doing screensaver (fullscreen) graphics to stop
|
|
|
|
ss_logic.reset();
|
2004-07-13 00:13:29 +00:00
|
|
|
|
2004-06-30 01:10:22 +00:00
|
|
|
// finish work accounting for active tasks, reset temporary fields
|
2004-07-13 00:13:29 +00:00
|
|
|
//
|
2004-06-30 01:10:22 +00:00
|
|
|
for (i=0; i < active_tasks.active_tasks.size(); ++i) {
|
2004-07-13 00:13:29 +00:00
|
|
|
atp = active_tasks.active_tasks[i];
|
2004-07-02 18:49:39 +00:00
|
|
|
if (atp->scheduler_state != CPU_SCHED_RUNNING) continue;
|
2004-06-30 01:10:22 +00:00
|
|
|
double task_cpu_time = atp->current_cpu_time - atp->cpu_time_at_last_sched;
|
|
|
|
atp->result->project->work_done_this_period += task_cpu_time;
|
|
|
|
cpu_sched_work_done_this_period += task_cpu_time;
|
|
|
|
atp->next_scheduler_state = CPU_SCHED_PREEMPTED;
|
|
|
|
}
|
|
|
|
|
2004-07-06 20:26:04 +00:00
|
|
|
// compute total resource share among projects with runnable results
|
|
|
|
//
|
|
|
|
assign_results_to_projects(); // do this to see which projects have work
|
2004-07-14 20:55:26 +00:00
|
|
|
adjusted_total_resource_share = 0;
|
2004-06-30 01:10:22 +00:00
|
|
|
for (i=0; i < projects.size(); ++i) {
|
2004-07-13 00:13:29 +00:00
|
|
|
p = projects[i];
|
2004-07-06 20:26:04 +00:00
|
|
|
if (p->next_runnable_result != NULL) {
|
2004-07-14 20:55:26 +00:00
|
|
|
adjusted_total_resource_share += projects[i]->resource_share;
|
2004-07-06 20:26:04 +00:00
|
|
|
}
|
2004-06-30 01:10:22 +00:00
|
|
|
}
|
2004-07-06 20:26:04 +00:00
|
|
|
|
|
|
|
// adjust project debts
|
|
|
|
// reset debts for projects with no runnable results
|
|
|
|
// reset temporary fields
|
2004-07-13 00:13:29 +00:00
|
|
|
//
|
2004-06-30 01:10:22 +00:00
|
|
|
for (i=0; i < projects.size(); ++i) {
|
2004-07-13 00:13:29 +00:00
|
|
|
p = projects[i];
|
2004-07-06 20:26:04 +00:00
|
|
|
if (p->next_runnable_result == NULL) {
|
|
|
|
p->debt = 0;
|
|
|
|
p->anticipated_debt = 0;
|
|
|
|
} else {
|
|
|
|
p->debt +=
|
2004-07-14 20:55:26 +00:00
|
|
|
(p->resource_share/adjusted_total_resource_share)
|
2004-07-06 20:26:04 +00:00
|
|
|
* cpu_sched_work_done_this_period
|
|
|
|
- p->work_done_this_period;
|
|
|
|
if (p->debt < -max_debt) {
|
|
|
|
p->debt = -max_debt;
|
|
|
|
} else if (p->debt > max_debt) {
|
|
|
|
p->debt = max_debt;
|
|
|
|
}
|
|
|
|
p->anticipated_debt = p->debt;
|
|
|
|
}
|
2004-06-30 01:10:22 +00:00
|
|
|
p->next_runnable_result = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
// schedule tasks for projects in order of decreasing anticipated debt
|
2004-07-13 00:13:29 +00:00
|
|
|
//
|
2004-06-30 01:10:22 +00:00
|
|
|
for (i=0; i<results.size(); ++i) {
|
|
|
|
results[i]->already_selected = false;
|
|
|
|
}
|
|
|
|
expected_pay_off = cpu_sched_work_done_this_period / ncpus;
|
|
|
|
for (int j=0; j<ncpus; ++j) {
|
|
|
|
assign_results_to_projects();
|
|
|
|
if (!schedule_largest_debt_project(expected_pay_off)) break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// preempt, start, and resume tasks
|
2004-07-13 00:13:29 +00:00
|
|
|
//
|
2004-08-06 23:17:59 +00:00
|
|
|
vm_limit = global_prefs.vm_max_used_pct / 100.0 * host_info.m_swap;
|
2004-06-30 01:10:22 +00:00
|
|
|
iter = active_tasks.active_tasks.begin();
|
|
|
|
while (iter != active_tasks.active_tasks.end()) {
|
2004-07-13 00:13:29 +00:00
|
|
|
atp = *iter;
|
2004-06-30 01:10:22 +00:00
|
|
|
if (atp->scheduler_state == CPU_SCHED_RUNNING
|
|
|
|
&& atp->next_scheduler_state == CPU_SCHED_PREEMPTED
|
|
|
|
) {
|
2004-08-06 23:17:59 +00:00
|
|
|
atp->preempt(active_tasks.vm_limit_exceeded(vm_limit));
|
2004-06-30 01:10:22 +00:00
|
|
|
iter++;
|
|
|
|
} else if (atp->scheduler_state != CPU_SCHED_RUNNING
|
|
|
|
&& atp->next_scheduler_state == CPU_SCHED_RUNNING
|
|
|
|
) {
|
|
|
|
if ((retval = atp->resume_or_start())) {
|
|
|
|
atp->state = PROCESS_COULDNT_START;
|
|
|
|
atp->result->active_task_state = PROCESS_COULDNT_START;
|
|
|
|
report_result_error(
|
|
|
|
*(atp->result), retval,
|
|
|
|
"Couldn't start the app for this result: error %d", retval
|
|
|
|
);
|
|
|
|
iter = active_tasks.active_tasks.erase(iter);
|
|
|
|
delete atp;
|
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
some_app_started = true;
|
|
|
|
iter++;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
iter++;
|
|
|
|
}
|
|
|
|
atp->cpu_time_at_last_sched = atp->current_cpu_time;
|
|
|
|
}
|
|
|
|
|
|
|
|
// reset work accounting
|
|
|
|
// doing this at the end of schedule_cpus() because
|
|
|
|
// work_done_this_period's can change as apps finish
|
2004-07-13 00:13:29 +00:00
|
|
|
//
|
2004-06-30 01:10:22 +00:00
|
|
|
for (i=0; i < projects.size(); ++i) {
|
2004-07-13 00:13:29 +00:00
|
|
|
p = projects[i];
|
|
|
|
p->work_done_this_period = 0;
|
2004-06-30 01:10:22 +00:00
|
|
|
}
|
|
|
|
cpu_sched_work_done_this_period = 0;
|
|
|
|
|
|
|
|
cpu_sched_last_time = time(0);
|
|
|
|
if (some_app_started) {
|
|
|
|
app_started = cpu_sched_last_time;
|
|
|
|
}
|
2004-07-14 18:28:07 +00:00
|
|
|
|
|
|
|
// debts and active_tasks can only change if some project had
|
2004-07-14 20:55:26 +00:00
|
|
|
// a runnable result (and thus if adjusted_total_resource_share
|
2004-07-14 18:28:07 +00:00
|
|
|
// is positive)
|
|
|
|
//
|
2004-07-14 20:55:26 +00:00
|
|
|
if (adjusted_total_resource_share > 0.0) {
|
2004-07-14 18:28:07 +00:00
|
|
|
set_client_state_dirty("schedule_cpus");
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
2002-04-30 22:22:54 +00:00
|
|
|
}
|
|
|
|
|
2002-07-15 23:21:20 +00:00
|
|
|
// This is called when the client is initialized.
|
2002-04-30 22:22:54 +00:00
|
|
|
// Try to restart any tasks that were running when we last shut down.
|
|
|
|
//
|
|
|
|
int CLIENT_STATE::restart_tasks() {
|
2004-06-30 01:10:22 +00:00
|
|
|
return active_tasks.restart_tasks(ncpus);
|
2002-04-30 22:22:54 +00:00
|
|
|
}
|
2003-10-19 01:48:32 +00:00
|
|
|
|
2004-06-30 01:10:22 +00:00
|
|
|
void CLIENT_STATE::set_ncpus() {
|
2003-10-19 01:48:32 +00:00
|
|
|
if (host_info.p_ncpus > 0) {
|
2004-06-30 01:10:22 +00:00
|
|
|
ncpus = host_info.p_ncpus;
|
2003-10-19 01:48:32 +00:00
|
|
|
} else {
|
2004-06-30 01:10:22 +00:00
|
|
|
ncpus = 1;
|
2003-10-19 01:48:32 +00:00
|
|
|
}
|
2004-06-30 01:10:22 +00:00
|
|
|
if (ncpus > global_prefs.max_cpus) ncpus = global_prefs.max_cpus;
|
2003-10-19 01:48:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// estimate how long a WU will take on this host
|
|
|
|
//
|
|
|
|
double CLIENT_STATE::estimate_cpu_time(WORKUNIT& wu) {
|
|
|
|
double x;
|
|
|
|
|
|
|
|
x = wu.rsc_fpops_est/host_info.p_fpops;
|
|
|
|
return x;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline double force_fraction(double f) {
|
|
|
|
if (f < 0) return 0;
|
|
|
|
if (f > 1) return 1;
|
|
|
|
return f;
|
|
|
|
}
|
|
|
|
|
2004-04-07 06:51:42 +00:00
|
|
|
double CLIENT_STATE::get_fraction_done(RESULT* result) {
|
2003-10-19 01:48:32 +00:00
|
|
|
ACTIVE_TASK* atp = active_tasks.lookup_result(result);
|
|
|
|
return atp ? force_fraction(atp->fraction_done) : 0.0;
|
|
|
|
}
|
|
|
|
|
2003-12-24 21:50:41 +00:00
|
|
|
// Decide which app version to use for a WU.
|
|
|
|
//
|
|
|
|
int CLIENT_STATE::choose_version_num(char* app_name, SCHEDULER_REPLY& sr) {
|
2003-10-19 01:48:32 +00:00
|
|
|
unsigned int i;
|
|
|
|
int best = -1;
|
|
|
|
APP_VERSION* avp;
|
|
|
|
|
2003-12-24 21:50:41 +00:00
|
|
|
// First look in the scheduler reply
|
|
|
|
//
|
|
|
|
for (i=0; i<sr.app_versions.size(); i++) {
|
|
|
|
avp = &sr.app_versions[i];
|
|
|
|
if (!strcmp(app_name, avp->app_name)) {
|
|
|
|
return avp->version_num;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If not there, use the latest one in our state
|
|
|
|
//
|
2003-10-19 01:48:32 +00:00
|
|
|
for (i=0; i<app_versions.size(); i++) {
|
|
|
|
avp = app_versions[i];
|
|
|
|
if (strcmp(avp->app_name, app_name)) continue;
|
|
|
|
if (avp->version_num < best) continue;
|
|
|
|
best = avp->version_num;
|
|
|
|
}
|
|
|
|
if (best < 0) {
|
|
|
|
msg_printf(0, MSG_ERROR, "CLIENT_STATE::latest_version_num: no version\n");
|
|
|
|
}
|
|
|
|
return best;
|
|
|
|
}
|
|
|
|
|
2004-07-06 17:37:58 +00:00
|
|
|
// goes through results and checks if the associated apps has no app files
|
|
|
|
// then there is nothing to do, never start the app, close the result
|
2004-07-13 00:13:29 +00:00
|
|
|
//
|
2004-07-06 17:37:58 +00:00
|
|
|
void CLIENT_STATE::handle_file_xfer_apps() {
|
2004-08-12 10:13:01 +00:00
|
|
|
unsigned int i;
|
|
|
|
for (i=0; i<results.size(); i++) {
|
|
|
|
RESULT* rp = results[i];
|
|
|
|
if (rp->wup->avp->app_files.size() == 0 && rp->state == RESULT_FILES_DOWNLOADED) {
|
2004-07-06 18:31:56 +00:00
|
|
|
rp->state = RESULT_FILES_UPLOADING;
|
2004-08-05 21:11:06 +00:00
|
|
|
rp->reset_files();
|
2004-07-06 18:31:56 +00:00
|
|
|
}
|
|
|
|
}
|
2004-07-06 17:37:58 +00:00
|
|
|
}
|