2008-08-06 18:36:30 +00:00
|
|
|
// This file is part of BOINC.
|
2006-02-08 21:05:51 +00:00
|
|
|
// http://boinc.berkeley.edu
|
2008-08-06 18:36:30 +00:00
|
|
|
// Copyright (C) 2008 University of California
|
2006-02-08 21:05:51 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is free software; you can redistribute it and/or modify it
|
|
|
|
// under the terms of the GNU Lesser General Public License
|
|
|
|
// as published by the Free Software Foundation,
|
|
|
|
// either version 3 of the License, or (at your option) any later version.
|
2006-02-08 21:05:51 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is distributed in the hope that it will be useful,
|
2006-02-08 21:05:51 +00:00
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU Lesser General Public License for more details.
|
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
2006-02-08 21:05:51 +00:00
|
|
|
|
2006-08-22 20:58:57 +00:00
|
|
|
// CPU scheduling logic.
|
|
|
|
//
|
|
|
|
// Terminology:
|
|
|
|
//
|
|
|
|
// Episode
|
|
|
|
// The execution of a task is divided into "episodes".
|
|
|
|
// An episode starts then the application is executed,
|
|
|
|
// and ends when it exits or dies
|
|
|
|
// (e.g., because it's preempted and not left in memory,
|
|
|
|
// or the user quits BOINC, or the host is turned off).
|
|
|
|
// A task may checkpoint now and then.
|
|
|
|
// Each episode begins with the state of the last checkpoint.
|
|
|
|
//
|
|
|
|
// Debt interval
|
|
|
|
// The interval between consecutive executions of adjust_debts()
|
|
|
|
//
|
|
|
|
// Run interval
|
|
|
|
// If an app is running (not suspended), the interval
|
|
|
|
// during which it's been running.
|
|
|
|
|
2006-04-25 18:28:44 +00:00
|
|
|
#ifdef _WIN32
|
|
|
|
#include "boinc_win.h"
|
|
|
|
#endif
|
|
|
|
|
2008-02-27 23:26:38 +00:00
|
|
|
#include <string>
|
|
|
|
#include <cstring>
|
2008-04-01 20:46:41 +00:00
|
|
|
|
2007-02-21 16:26:51 +00:00
|
|
|
#include "str_util.h"
|
2007-02-21 22:27:35 +00:00
|
|
|
#include "util.h"
|
2007-06-25 19:05:29 +00:00
|
|
|
#include "error_numbers.h"
|
2008-04-01 20:46:41 +00:00
|
|
|
#include "coproc.h"
|
|
|
|
|
|
|
|
#include "client_msgs.h"
|
2006-06-15 23:15:27 +00:00
|
|
|
#include "log_flags.h"
|
2006-02-08 21:05:51 +00:00
|
|
|
|
2008-02-27 23:26:38 +00:00
|
|
|
#ifdef SIM
|
|
|
|
#include "sim.h"
|
|
|
|
#else
|
|
|
|
#include "client_state.h"
|
|
|
|
#endif
|
|
|
|
|
2006-02-08 21:05:51 +00:00
|
|
|
using std::vector;
|
|
|
|
|
2007-04-03 19:06:26 +00:00
|
|
|
#define MAX_STD (86400)
|
|
|
|
// maximum short-term debt
|
2006-02-08 21:05:51 +00:00
|
|
|
|
2007-04-03 19:06:26 +00:00
|
|
|
#define DEADLINE_CUSHION 0
|
|
|
|
// try to finish jobs this much in advance of their deadline
|
|
|
|
|
2009-04-01 23:22:17 +00:00
|
|
|
bool COPROCS::sufficient_coprocs(
|
|
|
|
COPROCS& needed, bool log_flag, const char* prefix
|
|
|
|
) {
|
2008-05-23 21:24:36 +00:00
|
|
|
for (unsigned int i=0; i<needed.coprocs.size(); i++) {
|
|
|
|
COPROC* cp = needed.coprocs[i];
|
|
|
|
COPROC* cp2 = lookup(cp->type);
|
2008-04-01 15:08:47 +00:00
|
|
|
if (!cp2) {
|
2008-08-20 17:34:18 +00:00
|
|
|
msg_printf(NULL, MSG_INTERNAL_ERROR,
|
2008-05-09 21:07:15 +00:00
|
|
|
"Missing a %s coprocessor", cp->type
|
2008-04-01 15:08:47 +00:00
|
|
|
);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (cp2->used + cp->count > cp2->count) {
|
2008-10-03 21:55:34 +00:00
|
|
|
if (log_flag) {
|
|
|
|
msg_printf(NULL, MSG_INFO,
|
|
|
|
"[%s] rr_sim: insufficient coproc %s (%d + %d > %d)",
|
|
|
|
prefix, cp2->type, cp2->used, cp->count, cp2->count
|
|
|
|
);
|
|
|
|
}
|
2008-04-01 15:08:47 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2009-02-26 21:36:41 +00:00
|
|
|
void COPROCS::reserve_coprocs(
|
2009-03-06 22:21:47 +00:00
|
|
|
COPROCS& needed, bool log_flag, const char* prefix
|
2009-02-26 21:36:41 +00:00
|
|
|
) {
|
2008-05-23 21:24:36 +00:00
|
|
|
for (unsigned int i=0; i<needed.coprocs.size(); i++) {
|
|
|
|
COPROC* cp = needed.coprocs[i];
|
|
|
|
COPROC* cp2 = lookup(cp->type);
|
2008-07-21 16:25:03 +00:00
|
|
|
if (!cp2) {
|
|
|
|
msg_printf(NULL, MSG_INTERNAL_ERROR,
|
|
|
|
"Coproc type %s not found", cp->type
|
|
|
|
);
|
|
|
|
continue;
|
|
|
|
}
|
2008-10-03 21:55:34 +00:00
|
|
|
if (log_flag) {
|
|
|
|
msg_printf(NULL, MSG_INFO,
|
|
|
|
"[%s] reserving %d of coproc %s", prefix, cp->count, cp2->type
|
|
|
|
);
|
|
|
|
}
|
2008-04-01 15:08:47 +00:00
|
|
|
cp2->used += cp->count;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-03-06 22:21:47 +00:00
|
|
|
#if 0
|
2009-02-26 21:36:41 +00:00
|
|
|
void COPROCS::free_coprocs(
|
2009-03-06 22:21:47 +00:00
|
|
|
COPROCS& needed, bool log_flag, const char* prefix
|
2009-02-26 21:36:41 +00:00
|
|
|
) {
|
2008-05-23 21:24:36 +00:00
|
|
|
for (unsigned int i=0; i<needed.coprocs.size(); i++) {
|
|
|
|
COPROC* cp = needed.coprocs[i];
|
|
|
|
COPROC* cp2 = lookup(cp->type);
|
2008-04-01 15:08:47 +00:00
|
|
|
if (!cp2) continue;
|
2008-10-03 21:55:34 +00:00
|
|
|
if (log_flag) {
|
|
|
|
msg_printf(NULL, MSG_INFO,
|
|
|
|
"[%s] freeing %d of coproc %s", prefix, cp->count, cp2->type
|
|
|
|
);
|
|
|
|
}
|
2008-04-01 15:08:47 +00:00
|
|
|
cp2->used -= cp->count;
|
|
|
|
}
|
|
|
|
}
|
2009-03-06 22:21:47 +00:00
|
|
|
#endif
|
2008-04-01 15:08:47 +00:00
|
|
|
|
2008-10-29 18:21:45 +00:00
|
|
|
// return true if the task has finished its time slice
|
|
|
|
// and has checkpointed in last 10 secs
|
|
|
|
//
|
|
|
|
static inline bool finished_time_slice(ACTIVE_TASK* atp) {
|
|
|
|
double time_running = gstate.now - atp->run_interval_start_wall_time;
|
|
|
|
bool running_beyond_sched_period = time_running >= gstate.global_prefs.cpu_scheduling_period();
|
|
|
|
double time_since_checkpoint = gstate.now - atp->checkpoint_wall_time;
|
|
|
|
bool checkpointed_recently = time_since_checkpoint < 10;
|
|
|
|
return (running_beyond_sched_period && checkpointed_recently);
|
|
|
|
}
|
|
|
|
|
2009-02-26 21:36:41 +00:00
|
|
|
// Choose a "best" runnable CPU job for each project
|
2006-02-08 21:05:51 +00:00
|
|
|
//
|
|
|
|
// Values are returned in project->next_runnable_result
|
|
|
|
// (skip projects for which this is already non-NULL)
|
|
|
|
//
|
|
|
|
// Don't choose results with already_selected == true;
|
|
|
|
// mark chosen results as already_selected.
|
|
|
|
//
|
|
|
|
// The preference order:
|
|
|
|
// 1. results with active tasks that are running
|
|
|
|
// 2. results with active tasks that are preempted (but have a process)
|
|
|
|
// 3. results with active tasks that have no process
|
|
|
|
// 4. results with no active task
|
|
|
|
//
|
2007-01-16 22:57:08 +00:00
|
|
|
// TODO: this is called in a loop over NCPUs, which is silly.
|
|
|
|
// Should call it once, and have it make an ordered list per project.
|
|
|
|
//
|
2006-02-08 21:05:51 +00:00
|
|
|
void CLIENT_STATE::assign_results_to_projects() {
|
|
|
|
unsigned int i;
|
|
|
|
RESULT* rp;
|
|
|
|
PROJECT* project;
|
|
|
|
|
|
|
|
// scan results with an ACTIVE_TASK
|
|
|
|
//
|
2006-06-15 23:15:27 +00:00
|
|
|
for (i=0; i<active_tasks.active_tasks.size(); i++) {
|
2006-02-08 21:05:51 +00:00
|
|
|
ACTIVE_TASK *atp = active_tasks.active_tasks[i];
|
2007-01-16 22:57:08 +00:00
|
|
|
if (!atp->runnable()) continue;
|
2006-02-08 21:05:51 +00:00
|
|
|
rp = atp->result;
|
|
|
|
if (rp->already_selected) continue;
|
2009-02-26 21:36:41 +00:00
|
|
|
if (rp->uses_coprocs()) continue;
|
2006-02-08 21:05:51 +00:00
|
|
|
if (!rp->runnable()) continue;
|
|
|
|
project = rp->project;
|
|
|
|
if (!project->next_runnable_result) {
|
|
|
|
project->next_runnable_result = rp;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// see if this task is "better" than the one currently
|
|
|
|
// selected for this project
|
|
|
|
//
|
|
|
|
ACTIVE_TASK *next_atp = lookup_active_task_by_result(
|
|
|
|
project->next_runnable_result
|
|
|
|
);
|
|
|
|
|
2007-01-24 21:20:57 +00:00
|
|
|
if ((next_atp->task_state() == PROCESS_UNINITIALIZED && atp->process_exists())
|
2006-02-08 21:05:51 +00:00
|
|
|
|| (next_atp->scheduler_state == CPU_SCHED_PREEMPTED
|
|
|
|
&& atp->scheduler_state == CPU_SCHED_SCHEDULED)
|
|
|
|
) {
|
|
|
|
project->next_runnable_result = atp->result;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Now consider results that don't have an active task
|
|
|
|
//
|
|
|
|
for (i=0; i<results.size(); i++) {
|
|
|
|
rp = results[i];
|
|
|
|
if (rp->already_selected) continue;
|
2009-02-26 21:36:41 +00:00
|
|
|
if (rp->uses_coprocs()) continue;
|
2006-02-08 21:05:51 +00:00
|
|
|
if (lookup_active_task_by_result(rp)) continue;
|
|
|
|
if (!rp->runnable()) continue;
|
|
|
|
|
|
|
|
project = rp->project;
|
|
|
|
if (project->next_runnable_result) continue;
|
|
|
|
project->next_runnable_result = rp;
|
|
|
|
}
|
|
|
|
|
|
|
|
// mark selected results, so CPU scheduler won't try to consider
|
|
|
|
// a result more than once
|
|
|
|
//
|
|
|
|
for (i=0; i<projects.size(); i++) {
|
|
|
|
project = projects[i];
|
|
|
|
if (project->next_runnable_result) {
|
|
|
|
project->next_runnable_result->already_selected = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-06-21 17:34:55 +00:00
|
|
|
// Among projects with a "next runnable result",
|
|
|
|
// find the project P with the greatest anticipated debt,
|
|
|
|
// and return its next runnable result
|
2006-02-08 21:05:51 +00:00
|
|
|
//
|
2006-06-21 17:34:55 +00:00
|
|
|
RESULT* CLIENT_STATE::largest_debt_project_best_result() {
|
2006-02-08 21:05:51 +00:00
|
|
|
PROJECT *best_project = NULL;
|
2007-04-03 19:06:26 +00:00
|
|
|
double best_debt = -MAX_STD;
|
2006-02-08 21:05:51 +00:00
|
|
|
bool first = true;
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
for (i=0; i<projects.size(); i++) {
|
|
|
|
PROJECT* p = projects[i];
|
|
|
|
if (!p->next_runnable_result) continue;
|
|
|
|
if (p->non_cpu_intensive) continue;
|
|
|
|
if (first || p->anticipated_debt > best_debt) {
|
|
|
|
first = false;
|
|
|
|
best_project = p;
|
|
|
|
best_debt = p->anticipated_debt;
|
|
|
|
}
|
|
|
|
}
|
2006-06-17 16:26:29 +00:00
|
|
|
if (!best_project) return NULL;
|
2006-02-08 21:05:51 +00:00
|
|
|
|
2006-06-21 22:08:20 +00:00
|
|
|
if (log_flags.cpu_sched_debug) {
|
2006-06-15 23:15:27 +00:00
|
|
|
msg_printf(best_project, MSG_INFO,
|
2006-09-07 20:39:25 +00:00
|
|
|
"[cpu_sched_debug] highest debt: %f %s",
|
2006-06-15 23:15:27 +00:00
|
|
|
best_project->anticipated_debt,
|
|
|
|
best_project->next_runnable_result->name
|
|
|
|
);
|
|
|
|
}
|
2006-06-17 16:26:29 +00:00
|
|
|
RESULT* rp = best_project->next_runnable_result;
|
2006-02-08 21:05:51 +00:00
|
|
|
best_project->next_runnable_result = 0;
|
2006-06-17 16:26:29 +00:00
|
|
|
return rp;
|
2006-02-08 21:05:51 +00:00
|
|
|
}
|
|
|
|
|
2009-04-10 19:01:38 +00:00
|
|
|
// return coproc jobs in FIFO order
|
|
|
|
//
|
|
|
|
RESULT* first_coproc_result() {
|
|
|
|
unsigned int i;
|
|
|
|
for (i=0; i<gstate.results.size(); i++) {
|
|
|
|
RESULT* rp = gstate.results[i];
|
|
|
|
if (!rp->runnable()) continue;
|
|
|
|
if (rp->project->non_cpu_intensive) continue;
|
|
|
|
if (rp->already_selected) continue;
|
|
|
|
if (!rp->uses_coprocs()) continue;
|
|
|
|
return rp;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return earliest-deadline result.
|
|
|
|
// if coproc_only:
|
|
|
|
// return only coproc jobs, and only if job is projected to miss deadline.
|
|
|
|
// otherwise:
|
|
|
|
// return only CPU jobs, and only from a project with deadlines_missed>0
|
2006-02-08 21:05:51 +00:00
|
|
|
//
|
2009-02-26 21:36:41 +00:00
|
|
|
RESULT* CLIENT_STATE::earliest_deadline_result(bool coproc_only) {
|
2006-02-08 21:05:51 +00:00
|
|
|
RESULT *best_result = NULL;
|
2007-09-12 12:13:33 +00:00
|
|
|
ACTIVE_TASK* best_atp = NULL;
|
2006-02-08 21:05:51 +00:00
|
|
|
unsigned int i;
|
|
|
|
|
2006-06-19 22:20:24 +00:00
|
|
|
for (i=0; i<results.size(); i++) {
|
|
|
|
RESULT* rp = results[i];
|
2006-02-08 21:05:51 +00:00
|
|
|
if (!rp->runnable()) continue;
|
|
|
|
if (rp->project->non_cpu_intensive) continue;
|
|
|
|
if (rp->already_selected) continue;
|
2009-02-26 21:36:41 +00:00
|
|
|
if (coproc_only) {
|
|
|
|
if (!rp->uses_coprocs()) continue;
|
2009-04-10 19:01:38 +00:00
|
|
|
if (!rp->rr_sim_misses_deadline) continue;
|
2009-02-26 21:36:41 +00:00
|
|
|
} else {
|
|
|
|
if (rp->uses_coprocs()) continue;
|
2007-12-04 23:48:08 +00:00
|
|
|
// treat projects with DCF>90 as if they had deadline misses
|
2009-04-10 19:01:38 +00:00
|
|
|
//
|
|
|
|
if (!rp->project->cpu_pwf.deadlines_missed_copy && rp->project->duration_correction_factor < 90.0) continue;
|
2009-02-26 21:36:41 +00:00
|
|
|
}
|
2007-03-28 21:30:45 +00:00
|
|
|
|
2007-11-14 17:15:46 +00:00
|
|
|
bool new_best = false;
|
|
|
|
if (best_result) {
|
|
|
|
if (rp->report_deadline < best_result->report_deadline) {
|
|
|
|
new_best = true;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
new_best = true;
|
|
|
|
}
|
|
|
|
if (new_best) {
|
2007-09-12 12:13:33 +00:00
|
|
|
best_result = rp;
|
2007-09-17 14:03:44 +00:00
|
|
|
best_atp = lookup_active_task_by_result(rp);
|
2007-09-13 06:53:40 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (rp->report_deadline > best_result->report_deadline) {
|
|
|
|
continue;
|
2007-09-12 12:13:33 +00:00
|
|
|
}
|
|
|
|
|
2009-02-26 21:36:41 +00:00
|
|
|
// If there's a tie, pick the job with the least remaining time
|
2007-09-13 06:53:40 +00:00
|
|
|
// (but don't pick an unstarted job over one that's started)
|
2007-03-28 21:30:45 +00:00
|
|
|
//
|
2007-09-17 14:03:44 +00:00
|
|
|
ACTIVE_TASK* atp = lookup_active_task_by_result(rp);
|
2007-09-12 12:13:33 +00:00
|
|
|
if (best_atp && !atp) continue;
|
2008-12-02 03:58:32 +00:00
|
|
|
if (rp->estimated_time_remaining(false)
|
|
|
|
< best_result->estimated_time_remaining(false)
|
2007-12-04 23:48:08 +00:00
|
|
|
|| (!best_atp && atp)
|
2007-03-28 21:30:45 +00:00
|
|
|
) {
|
2006-02-08 21:05:51 +00:00
|
|
|
best_result = rp;
|
2007-09-12 12:13:33 +00:00
|
|
|
best_atp = atp;
|
2006-02-08 21:05:51 +00:00
|
|
|
}
|
|
|
|
}
|
2006-06-17 16:26:29 +00:00
|
|
|
if (!best_result) return NULL;
|
2006-02-08 21:05:51 +00:00
|
|
|
|
2006-06-21 22:08:20 +00:00
|
|
|
if (log_flags.cpu_sched_debug) {
|
2006-06-15 23:15:27 +00:00
|
|
|
msg_printf(best_result->project, MSG_INFO,
|
2006-09-07 20:39:25 +00:00
|
|
|
"[cpu_sched_debug] earliest deadline: %f %s",
|
2006-06-15 23:15:27 +00:00
|
|
|
best_result->report_deadline, best_result->name
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2006-06-17 16:26:29 +00:00
|
|
|
return best_result;
|
2006-02-08 21:05:51 +00:00
|
|
|
}
|
|
|
|
|
2007-03-15 19:08:22 +00:00
|
|
|
void CLIENT_STATE::reset_debt_accounting() {
|
|
|
|
unsigned int i;
|
|
|
|
for (i=0; i<projects.size(); i++) {
|
|
|
|
PROJECT* p = projects[i];
|
2008-12-31 23:07:59 +00:00
|
|
|
p->cpu_pwf.reset_debt_accounting();
|
2009-02-24 00:06:45 +00:00
|
|
|
if (coproc_cuda) {
|
|
|
|
p->cuda_pwf.reset_debt_accounting();
|
|
|
|
}
|
2007-03-15 19:08:22 +00:00
|
|
|
}
|
2008-12-31 23:07:59 +00:00
|
|
|
cpu_work_fetch.reset_debt_accounting();
|
2009-02-24 00:06:45 +00:00
|
|
|
if (coproc_cuda) {
|
|
|
|
cuda_work_fetch.reset_debt_accounting();
|
|
|
|
}
|
2007-03-15 19:08:22 +00:00
|
|
|
debt_interval_start = now;
|
|
|
|
}
|
|
|
|
|
2006-02-08 21:05:51 +00:00
|
|
|
// adjust project debts (short, long-term)
|
|
|
|
//
|
|
|
|
void CLIENT_STATE::adjust_debts() {
|
|
|
|
unsigned int i;
|
|
|
|
double total_short_term_debt = 0;
|
2008-12-31 23:30:38 +00:00
|
|
|
double rrs;
|
2006-02-08 21:05:51 +00:00
|
|
|
int nprojects=0, nrprojects=0;
|
|
|
|
PROJECT *p;
|
|
|
|
double share_frac;
|
2008-12-31 23:07:59 +00:00
|
|
|
double elapsed_time = now - debt_interval_start;
|
2006-02-08 21:05:51 +00:00
|
|
|
|
2009-02-20 18:37:27 +00:00
|
|
|
// If the elapsed time is more than 2*DEBT_ADJUST_PERIOD
|
2007-03-15 19:08:22 +00:00
|
|
|
// it must be because the host was suspended for a long time.
|
2009-02-10 21:59:55 +00:00
|
|
|
// In this case, ignore the last period
|
2007-03-15 19:08:22 +00:00
|
|
|
//
|
2009-02-20 18:37:27 +00:00
|
|
|
if (elapsed_time > 2*DEBT_ADJUST_PERIOD || elapsed_time < 0) {
|
2007-03-15 19:08:22 +00:00
|
|
|
if (log_flags.debt_debug) {
|
|
|
|
msg_printf(NULL, MSG_INFO,
|
2009-02-18 19:47:02 +00:00
|
|
|
"[debt_debug] adjust_debt: elapsed time (%d) longer than sched enforce period(%d). Ignoring this period.",
|
2009-02-20 18:37:27 +00:00
|
|
|
(int)elapsed_time, (int)DEBT_ADJUST_PERIOD
|
2007-03-15 19:08:22 +00:00
|
|
|
);
|
|
|
|
}
|
2009-02-10 19:30:59 +00:00
|
|
|
reset_debt_accounting();
|
2007-03-15 19:08:22 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2009-02-10 21:59:55 +00:00
|
|
|
// skip small intervals
|
|
|
|
//
|
|
|
|
if (elapsed_time < 1) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2006-02-08 21:05:51 +00:00
|
|
|
for (i=0; i<active_tasks.active_tasks.size(); i++) {
|
|
|
|
ACTIVE_TASK* atp = active_tasks.active_tasks[i];
|
|
|
|
if (atp->scheduler_state != CPU_SCHED_SCHEDULED) continue;
|
2008-12-31 23:07:59 +00:00
|
|
|
p = atp->result->project;
|
|
|
|
if (p->non_cpu_intensive) continue;
|
|
|
|
work_fetch.accumulate_inst_sec(atp, elapsed_time);
|
2006-02-08 21:05:51 +00:00
|
|
|
}
|
|
|
|
|
2008-12-31 23:07:59 +00:00
|
|
|
// adjust long term debts
|
2009-04-17 06:12:41 +00:00
|
|
|
//
|
2008-12-31 23:07:59 +00:00
|
|
|
cpu_work_fetch.update_debts();
|
2009-02-24 00:06:45 +00:00
|
|
|
if (coproc_cuda) {
|
|
|
|
cuda_work_fetch.update_debts();
|
|
|
|
}
|
2006-02-08 21:05:51 +00:00
|
|
|
|
2008-12-31 23:07:59 +00:00
|
|
|
// adjust short term debts
|
2009-04-17 06:12:41 +00:00
|
|
|
//
|
2008-12-31 23:07:59 +00:00
|
|
|
rrs = runnable_resource_share();
|
2006-02-08 21:05:51 +00:00
|
|
|
for (i=0; i<projects.size(); i++) {
|
|
|
|
p = projects[i];
|
|
|
|
if (p->non_cpu_intensive) continue;
|
|
|
|
nprojects++;
|
|
|
|
|
|
|
|
if (p->runnable()) {
|
|
|
|
nrprojects++;
|
|
|
|
share_frac = p->resource_share/rrs;
|
2008-12-31 23:07:59 +00:00
|
|
|
p->short_term_debt += share_frac*cpu_work_fetch.secs_this_debt_interval
|
|
|
|
- p->cpu_pwf.secs_this_debt_interval;
|
2006-02-08 21:05:51 +00:00
|
|
|
total_short_term_debt += p->short_term_debt;
|
|
|
|
} else {
|
|
|
|
p->short_term_debt = 0;
|
|
|
|
p->anticipated_debt = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// short-term debt:
|
2007-04-03 19:06:26 +00:00
|
|
|
// normalize so mean is zero, and limit abs value at MAX_STD
|
2006-02-08 21:05:51 +00:00
|
|
|
//
|
2009-02-10 21:59:55 +00:00
|
|
|
if (nrprojects) {
|
|
|
|
double avg_short_term_debt = total_short_term_debt / nrprojects;
|
|
|
|
for (i=0; i<projects.size(); i++) {
|
|
|
|
p = projects[i];
|
|
|
|
if (p->non_cpu_intensive) continue;
|
|
|
|
if (p->runnable()) {
|
|
|
|
p->short_term_debt -= avg_short_term_debt;
|
|
|
|
if (p->short_term_debt > MAX_STD) {
|
|
|
|
p->short_term_debt = MAX_STD;
|
|
|
|
}
|
|
|
|
if (p->short_term_debt < -MAX_STD) {
|
|
|
|
p->short_term_debt = -MAX_STD;
|
|
|
|
}
|
2006-02-08 21:05:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2006-06-15 23:15:27 +00:00
|
|
|
|
2007-03-15 19:08:22 +00:00
|
|
|
reset_debt_accounting();
|
2006-02-08 21:05:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-06-16 18:52:25 +00:00
|
|
|
// Decide whether to run the CPU scheduler.
|
|
|
|
// This is called periodically.
|
2008-01-13 00:12:14 +00:00
|
|
|
// Scheduled tasks are placed in order of urgency for scheduling
|
|
|
|
// in the ordered_scheduled_results vector
|
2006-02-08 21:05:51 +00:00
|
|
|
//
|
2006-06-16 18:52:25 +00:00
|
|
|
bool CLIENT_STATE::possibly_schedule_cpus() {
|
2006-02-08 21:05:51 +00:00
|
|
|
double elapsed_time;
|
2006-06-19 16:21:35 +00:00
|
|
|
static double last_reschedule=0;
|
2006-06-15 23:15:27 +00:00
|
|
|
|
2006-02-08 21:05:51 +00:00
|
|
|
if (projects.size() == 0) return false;
|
|
|
|
if (results.size() == 0) return false;
|
|
|
|
|
|
|
|
// Reschedule every cpu_sched_period seconds,
|
|
|
|
// or if must_schedule_cpus is set
|
|
|
|
// (meaning a new result is available, or a CPU has been freed).
|
|
|
|
//
|
2007-03-15 19:08:22 +00:00
|
|
|
elapsed_time = now - last_reschedule;
|
2007-06-20 16:27:27 +00:00
|
|
|
if (elapsed_time >= global_prefs.cpu_scheduling_period()) {
|
2006-06-19 16:21:35 +00:00
|
|
|
request_schedule_cpus("Scheduling period elapsed.");
|
2006-06-15 23:15:27 +00:00
|
|
|
}
|
2006-02-08 21:05:51 +00:00
|
|
|
|
2006-06-15 23:15:27 +00:00
|
|
|
if (!must_schedule_cpus) return false;
|
2006-06-19 16:21:35 +00:00
|
|
|
last_reschedule = now;
|
2006-06-15 23:15:27 +00:00
|
|
|
must_schedule_cpus = false;
|
2006-06-16 18:52:25 +00:00
|
|
|
schedule_cpus();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2008-08-20 17:34:18 +00:00
|
|
|
struct PROC_RESOURCES {
|
2008-09-25 01:04:53 +00:00
|
|
|
int ncpus;
|
2008-08-20 17:34:18 +00:00
|
|
|
double ncpus_used;
|
|
|
|
double ram_left;
|
|
|
|
COPROCS coprocs;
|
2008-09-25 01:04:53 +00:00
|
|
|
|
|
|
|
// should we stop scanning jobs?
|
|
|
|
//
|
2009-02-26 21:36:41 +00:00
|
|
|
inline bool stop_scan_cpu() {
|
|
|
|
return ncpus_used >= ncpus;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline bool stop_scan_coproc() {
|
|
|
|
return coprocs.fully_used();
|
2008-09-25 01:04:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// should we consider scheduling this job?
|
|
|
|
//
|
2009-02-21 01:09:29 +00:00
|
|
|
bool can_schedule(RESULT* rp) {
|
2008-09-25 01:04:53 +00:00
|
|
|
if (rp->uses_coprocs()) {
|
2009-02-12 18:04:30 +00:00
|
|
|
if (gstate.user_active && !gstate.global_prefs.run_gpu_if_user_active) {
|
|
|
|
return false;
|
|
|
|
}
|
2008-09-25 01:04:53 +00:00
|
|
|
if (coprocs.sufficient_coprocs(
|
2008-10-03 21:55:34 +00:00
|
|
|
rp->avp->coprocs, log_flags.cpu_sched_debug, "cpu_sched_debug")
|
2008-09-25 01:04:53 +00:00
|
|
|
) {
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
if (log_flags.cpu_sched_debug) {
|
|
|
|
msg_printf(rp->project, MSG_INFO,
|
|
|
|
"[cpu_sched_debug] insufficient coprocessors for %s", rp->name
|
|
|
|
);
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// otherwise, only if CPUs are available
|
|
|
|
//
|
|
|
|
return (ncpus_used < ncpus);
|
2008-09-06 09:52:36 +00:00
|
|
|
}
|
|
|
|
}
|
2009-02-26 21:36:41 +00:00
|
|
|
|
|
|
|
// we've decided to run this - update bookkeeping
|
|
|
|
//
|
|
|
|
void schedule(RESULT* rp) {
|
|
|
|
coprocs.reserve_coprocs(
|
2009-03-06 22:21:47 +00:00
|
|
|
rp->avp->coprocs, log_flags.cpu_sched_debug, "cpu_sched_debug"
|
2009-02-26 21:36:41 +00:00
|
|
|
);
|
|
|
|
ncpus_used += rp->avp->avg_ncpus;
|
|
|
|
}
|
2008-08-20 17:34:18 +00:00
|
|
|
};
|
|
|
|
|
2008-09-25 01:04:53 +00:00
|
|
|
// Check whether the job can be run:
|
|
|
|
// - it will fit in RAM
|
|
|
|
// - we have enough shared-mem segments (old Mac problem)
|
|
|
|
// If so, update proc_rsc accordingly and return true
|
|
|
|
//
|
2008-04-25 21:44:47 +00:00
|
|
|
static bool schedule_if_possible(
|
2009-04-20 00:00:11 +00:00
|
|
|
RESULT* rp, ACTIVE_TASK* atp, PROC_RESOURCES& proc_rsc,
|
|
|
|
double rrs, double expected_payoff, const char* description
|
2008-04-25 21:44:47 +00:00
|
|
|
) {
|
2008-05-22 20:57:12 +00:00
|
|
|
if (atp) {
|
2008-04-25 21:44:47 +00:00
|
|
|
// see if it fits in available RAM
|
|
|
|
//
|
2008-08-20 17:34:18 +00:00
|
|
|
if (atp->procinfo.working_set_size_smoothed > proc_rsc.ram_left) {
|
2008-05-22 20:57:12 +00:00
|
|
|
if (log_flags.cpu_sched_debug) {
|
|
|
|
msg_printf(rp->project, MSG_INFO,
|
2009-04-09 16:46:03 +00:00
|
|
|
"[cpu_sched_debug] %s working set too large: %.2fMB",
|
2008-05-22 20:57:12 +00:00
|
|
|
rp->name, atp->procinfo.working_set_size_smoothed/MEGA
|
|
|
|
);
|
|
|
|
}
|
2008-04-25 21:44:47 +00:00
|
|
|
atp->too_large = true;
|
2008-05-22 20:57:12 +00:00
|
|
|
return false;
|
|
|
|
}
|
2008-04-25 21:44:47 +00:00
|
|
|
atp->too_large = false;
|
|
|
|
|
|
|
|
if (gstate.retry_shmem_time > gstate.now) {
|
|
|
|
if (atp->app_client_shm.shm == NULL) {
|
2008-08-20 17:34:18 +00:00
|
|
|
if (log_flags.cpu_sched_debug) {
|
|
|
|
msg_printf(rp->project, MSG_INFO,
|
|
|
|
"[cpu_sched_debug] waiting for shared mem: %s",
|
|
|
|
rp->name
|
|
|
|
);
|
|
|
|
}
|
2008-04-25 21:44:47 +00:00
|
|
|
atp->needs_shmem = true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
atp->needs_shmem = false;
|
|
|
|
}
|
2008-08-20 17:34:18 +00:00
|
|
|
proc_rsc.ram_left -= atp->procinfo.working_set_size_smoothed;
|
2009-04-09 16:46:03 +00:00
|
|
|
} else {
|
|
|
|
if (rp->avp->max_working_set_size > proc_rsc.ram_left) {
|
|
|
|
if (log_flags.cpu_sched_debug) {
|
|
|
|
msg_printf(rp->project, MSG_INFO,
|
|
|
|
"[cpu_sched_debug] %s projected working set too large: %.2fMB",
|
|
|
|
rp->name, rp->avp->max_working_set_size/MEGA
|
|
|
|
);
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2008-04-25 21:44:47 +00:00
|
|
|
}
|
|
|
|
if (log_flags.cpu_sched_debug) {
|
|
|
|
msg_printf(rp->project, MSG_INFO,
|
2009-04-20 00:00:11 +00:00
|
|
|
"[cpu_sched_debug] scheduling %s (%s)", rp->name, description
|
2008-05-22 20:57:12 +00:00
|
|
|
);
|
2008-04-25 21:44:47 +00:00
|
|
|
}
|
2009-02-26 21:36:41 +00:00
|
|
|
proc_rsc.schedule(rp);
|
2008-04-25 21:44:47 +00:00
|
|
|
rp->project->anticipated_debt -= (rp->project->resource_share / rrs) * expected_payoff;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2006-06-16 18:52:25 +00:00
|
|
|
// CPU scheduler - decide which results to run.
|
2006-06-21 17:34:55 +00:00
|
|
|
// output: sets ordered_scheduled_result.
|
2006-06-16 18:52:25 +00:00
|
|
|
//
|
|
|
|
void CLIENT_STATE::schedule_cpus() {
|
|
|
|
RESULT* rp;
|
|
|
|
PROJECT* p;
|
2008-04-25 21:44:47 +00:00
|
|
|
double expected_payoff;
|
2006-06-16 18:52:25 +00:00
|
|
|
unsigned int i;
|
2006-06-21 17:34:55 +00:00
|
|
|
double rrs = runnable_resource_share();
|
2008-08-20 17:34:18 +00:00
|
|
|
PROC_RESOURCES proc_rsc;
|
2008-10-03 21:55:34 +00:00
|
|
|
ACTIVE_TASK* atp;
|
2009-04-20 00:00:11 +00:00
|
|
|
bool can_run;
|
2008-08-20 17:34:18 +00:00
|
|
|
|
2008-09-25 01:04:53 +00:00
|
|
|
proc_rsc.ncpus = ncpus;
|
2008-08-20 17:34:18 +00:00
|
|
|
proc_rsc.ncpus_used = 0;
|
|
|
|
proc_rsc.ram_left = available_ram();
|
2009-02-21 01:09:29 +00:00
|
|
|
proc_rsc.coprocs.clone(coprocs, false);
|
2006-06-15 23:15:27 +00:00
|
|
|
|
2006-11-10 00:20:08 +00:00
|
|
|
if (log_flags.cpu_sched_debug) {
|
2008-05-22 20:57:12 +00:00
|
|
|
msg_printf(0, MSG_INFO, "[cpu_sched_debug] schedule_cpus(): start");
|
2006-11-10 00:20:08 +00:00
|
|
|
}
|
|
|
|
|
2008-05-22 20:57:12 +00:00
|
|
|
// do round-robin simulation to find what results miss deadline
|
2006-06-15 23:15:27 +00:00
|
|
|
//
|
2006-08-23 21:14:47 +00:00
|
|
|
rr_simulation();
|
2006-06-21 22:08:20 +00:00
|
|
|
if (log_flags.cpu_sched_debug) {
|
2006-06-21 17:34:55 +00:00
|
|
|
print_deadline_misses();
|
2006-02-08 21:05:51 +00:00
|
|
|
}
|
|
|
|
|
2006-06-15 23:15:27 +00:00
|
|
|
// set temporary variables
|
2006-02-08 21:05:51 +00:00
|
|
|
//
|
2006-06-15 23:15:27 +00:00
|
|
|
for (i=0; i<results.size(); i++) {
|
2006-06-16 18:52:25 +00:00
|
|
|
rp = results[i];
|
|
|
|
rp->already_selected = false;
|
2007-03-05 00:32:26 +00:00
|
|
|
rp->edf_scheduled = false;
|
2006-06-15 23:15:27 +00:00
|
|
|
}
|
2006-02-08 21:05:51 +00:00
|
|
|
for (i=0; i<projects.size(); i++) {
|
2006-06-16 18:52:25 +00:00
|
|
|
p = projects[i];
|
|
|
|
p->next_runnable_result = NULL;
|
|
|
|
p->anticipated_debt = p->short_term_debt;
|
2009-04-10 19:01:38 +00:00
|
|
|
p->cpu_pwf.deadlines_missed_copy = p->cpu_pwf.deadlines_missed;
|
2006-02-08 21:05:51 +00:00
|
|
|
}
|
2009-04-09 16:46:03 +00:00
|
|
|
for (i=0; i<app_versions.size(); i++) {
|
|
|
|
app_versions[i]->max_working_set_size = 0;
|
|
|
|
}
|
2008-05-22 20:57:12 +00:00
|
|
|
for (i=0; i<active_tasks.active_tasks.size(); i++) {
|
2009-04-09 16:46:03 +00:00
|
|
|
atp = active_tasks.active_tasks[i];
|
|
|
|
atp->too_large = false;
|
|
|
|
double w = atp->procinfo.working_set_size_smoothed;
|
|
|
|
APP_VERSION* avp = atp->app_version;
|
|
|
|
if (w > avp->max_working_set_size) {
|
|
|
|
avp->max_working_set_size = w;
|
|
|
|
}
|
2008-05-22 20:57:12 +00:00
|
|
|
}
|
2006-02-08 21:05:51 +00:00
|
|
|
|
2008-04-25 21:44:47 +00:00
|
|
|
expected_payoff = global_prefs.cpu_scheduling_period();
|
2006-06-15 23:15:27 +00:00
|
|
|
ordered_scheduled_results.clear();
|
2006-06-17 16:26:29 +00:00
|
|
|
|
2009-04-10 19:01:38 +00:00
|
|
|
// choose missed-deadline coproc jobs in deadline order
|
2009-02-26 21:36:41 +00:00
|
|
|
//
|
|
|
|
while (!proc_rsc.stop_scan_coproc()) {
|
|
|
|
rp = earliest_deadline_result(true);
|
|
|
|
if (!rp) break;
|
|
|
|
rp->already_selected = true;
|
|
|
|
if (!proc_rsc.can_schedule(rp)) continue;
|
|
|
|
atp = lookup_active_task_by_result(rp);
|
2009-04-20 00:00:11 +00:00
|
|
|
can_run = schedule_if_possible(
|
|
|
|
rp, atp, proc_rsc, rrs, expected_payoff,
|
|
|
|
"coprocessor job, EDF"
|
|
|
|
);
|
|
|
|
if (!can_run) continue;
|
2009-04-10 19:01:38 +00:00
|
|
|
ordered_scheduled_results.push_back(rp);
|
|
|
|
}
|
|
|
|
|
|
|
|
// then coproc jobs in FIFO order
|
|
|
|
//
|
|
|
|
while (!proc_rsc.stop_scan_coproc()) {
|
|
|
|
rp = first_coproc_result();
|
|
|
|
if (!rp) break;
|
|
|
|
rp->already_selected = true;
|
|
|
|
if (!proc_rsc.can_schedule(rp)) continue;
|
|
|
|
atp = lookup_active_task_by_result(rp);
|
2009-04-20 00:00:11 +00:00
|
|
|
can_run = schedule_if_possible(
|
|
|
|
rp, atp, proc_rsc, rrs, expected_payoff,
|
|
|
|
"coprocessor job, FIFO"
|
|
|
|
);
|
|
|
|
if (!can_run) continue;
|
2009-02-26 21:36:41 +00:00
|
|
|
ordered_scheduled_results.push_back(rp);
|
|
|
|
}
|
|
|
|
|
2009-04-10 19:01:38 +00:00
|
|
|
// choose CPU jobs from projects with CPU deadline misses
|
2006-06-21 17:34:55 +00:00
|
|
|
//
|
2007-05-15 20:29:26 +00:00
|
|
|
#ifdef SIM
|
2007-05-18 22:02:51 +00:00
|
|
|
if (!cpu_sched_rr_only) {
|
2007-05-15 20:29:26 +00:00
|
|
|
#endif
|
2009-02-26 21:36:41 +00:00
|
|
|
while (!proc_rsc.stop_scan_cpu()) {
|
|
|
|
rp = earliest_deadline_result(false);
|
2006-06-17 16:26:29 +00:00
|
|
|
if (!rp) break;
|
2006-10-03 17:14:56 +00:00
|
|
|
rp->already_selected = true;
|
2009-02-21 01:09:29 +00:00
|
|
|
if (!proc_rsc.can_schedule(rp)) continue;
|
2009-02-26 21:36:41 +00:00
|
|
|
atp = lookup_active_task_by_result(rp);
|
2009-04-20 00:00:11 +00:00
|
|
|
can_run = schedule_if_possible(
|
|
|
|
rp, atp, proc_rsc, rrs, expected_payoff,
|
|
|
|
"CPU job, EDF"
|
|
|
|
);
|
|
|
|
if (!can_run) continue;
|
2009-04-10 19:01:38 +00:00
|
|
|
rp->project->cpu_pwf.deadlines_missed_copy--;
|
2007-03-05 00:32:26 +00:00
|
|
|
rp->edf_scheduled = true;
|
2006-06-17 16:26:29 +00:00
|
|
|
ordered_scheduled_results.push_back(rp);
|
2006-06-19 16:21:35 +00:00
|
|
|
}
|
2007-05-15 20:29:26 +00:00
|
|
|
#ifdef SIM
|
|
|
|
}
|
|
|
|
#endif
|
2006-06-15 23:15:27 +00:00
|
|
|
|
2009-02-26 21:36:41 +00:00
|
|
|
// Next, choose CPU jobs from projects with large debt
|
2006-06-19 22:20:24 +00:00
|
|
|
//
|
2009-02-26 21:36:41 +00:00
|
|
|
while (!proc_rsc.stop_scan_cpu()) {
|
2006-06-15 23:15:27 +00:00
|
|
|
assign_results_to_projects();
|
2006-06-21 17:34:55 +00:00
|
|
|
rp = largest_debt_project_best_result();
|
2006-06-17 16:26:29 +00:00
|
|
|
if (!rp) break;
|
2008-10-03 21:55:34 +00:00
|
|
|
atp = lookup_active_task_by_result(rp);
|
2009-02-21 01:09:29 +00:00
|
|
|
if (!proc_rsc.can_schedule(rp)) continue;
|
2009-04-20 00:00:11 +00:00
|
|
|
can_run = schedule_if_possible(
|
|
|
|
rp, atp, proc_rsc, rrs, expected_payoff,
|
|
|
|
"CPU job, debt order"
|
|
|
|
);
|
|
|
|
if (!can_run) continue;
|
2006-06-17 16:26:29 +00:00
|
|
|
ordered_scheduled_results.push_back(rp);
|
2006-06-19 16:21:35 +00:00
|
|
|
}
|
2006-06-15 23:15:27 +00:00
|
|
|
|
2006-06-23 20:05:12 +00:00
|
|
|
request_enforce_schedule("schedule_cpus");
|
2006-06-15 23:15:27 +00:00
|
|
|
}
|
|
|
|
|
2008-10-29 22:13:04 +00:00
|
|
|
static inline bool in_ordered_scheduled_results(ACTIVE_TASK* atp) {
|
|
|
|
for (unsigned int i=0; i<gstate.ordered_scheduled_results.size(); i++) {
|
|
|
|
if (atp->result == gstate.ordered_scheduled_results[i]) return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2009-05-04 19:55:59 +00:00
|
|
|
// return true if r0 is more important to run than r1
|
2008-11-03 19:29:04 +00:00
|
|
|
//
|
2009-05-04 19:55:59 +00:00
|
|
|
static inline bool more_important(RESULT* r0, RESULT* r1) {
|
|
|
|
bool miss0 = r0->rr_sim_misses_deadline;
|
|
|
|
bool miss1 = r1->rr_sim_misses_deadline;
|
|
|
|
if (miss0 && !miss1) return true;
|
|
|
|
if (!miss0 && miss1) return false;
|
|
|
|
bool unfin0 = r0->unfinished_time_slice;
|
|
|
|
bool unfin1 = r1->unfinished_time_slice;
|
|
|
|
if (unfin0 && !unfin1) return true;
|
|
|
|
if (!unfin0 && unfin1) return false;
|
|
|
|
if (r0->seqno < r1->seqno) return true;
|
|
|
|
if (r0->seqno > r1->seqno) return false;
|
|
|
|
return (r0 < r1);
|
2008-11-03 19:29:04 +00:00
|
|
|
}
|
|
|
|
|
2009-05-07 13:54:51 +00:00
|
|
|
static void print_job_list(vector<RESULT*>& jobs, bool details) {
|
2009-05-06 09:49:46 +00:00
|
|
|
for (unsigned int i=0; i<jobs.size(); i++) {
|
|
|
|
RESULT* rp = jobs[i];
|
2009-05-07 13:54:51 +00:00
|
|
|
if (details) {
|
|
|
|
msg_printf(rp->project, MSG_INFO,
|
|
|
|
"[cpu_sched_debug] %d: %s", i, rp->name
|
|
|
|
);
|
|
|
|
} else {
|
|
|
|
msg_printf(rp->project, MSG_INFO,
|
|
|
|
"[cpu_sched_debug] %d: %s (MD: %s; UTS: %s)",
|
|
|
|
i, rp->name,
|
|
|
|
rp->rr_sim_misses_deadline?"yes":"no",
|
|
|
|
rp->unfinished_time_slice?"yes":"no"
|
|
|
|
);
|
|
|
|
}
|
2009-05-06 09:49:46 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-05-04 19:55:59 +00:00
|
|
|
// find running jobs that haven't finished their time slice.
|
|
|
|
// Mark them as such, and add to list if not already there
|
2006-06-19 22:20:24 +00:00
|
|
|
//
|
2009-05-04 19:55:59 +00:00
|
|
|
void CLIENT_STATE::append_unfinished_time_slice(
|
|
|
|
vector<RESULT*> &runnable_jobs
|
2006-06-19 22:20:24 +00:00
|
|
|
) {
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
for (i=0; i<active_tasks.active_tasks.size(); i++) {
|
2009-05-04 19:55:59 +00:00
|
|
|
ACTIVE_TASK* atp = active_tasks.active_tasks[i];
|
2008-10-29 22:13:04 +00:00
|
|
|
if (!atp->result->runnable()) continue;
|
2006-06-19 22:20:24 +00:00
|
|
|
if (atp->result->project->non_cpu_intensive) continue;
|
2008-10-29 22:13:04 +00:00
|
|
|
if (atp->scheduler_state != CPU_SCHED_SCHEDULED) continue;
|
2009-02-12 19:34:27 +00:00
|
|
|
if (atp->result->uses_coprocs()) continue;
|
2009-05-04 19:55:59 +00:00
|
|
|
if (finished_time_slice(atp)) continue;
|
|
|
|
atp->result->unfinished_time_slice = true;
|
|
|
|
if (in_ordered_scheduled_results(atp)) continue;
|
|
|
|
runnable_jobs.push_back(atp->result);
|
|
|
|
atp->result->seqno = 0;
|
2006-06-19 22:20:24 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Enforce the CPU schedule.
|
2006-06-21 17:34:55 +00:00
|
|
|
// Inputs:
|
|
|
|
// ordered_scheduled_results
|
|
|
|
// List of tasks that should (ideally) run, set by schedule_cpus().
|
2008-10-09 22:44:45 +00:00
|
|
|
// Most important tasks (e.g. early deadline) are first.
|
|
|
|
// The set of tasks that actually run may be different:
|
|
|
|
// - if a task hasn't checkpointed recently we avoid preempting it
|
|
|
|
// - we don't run tasks that would exceed working-set limits
|
2006-06-21 17:34:55 +00:00
|
|
|
// Details:
|
|
|
|
// Initially, each task's scheduler_state is PREEMPTED or SCHEDULED
|
|
|
|
// depending on whether or not it is running.
|
|
|
|
// This function sets each task's next_scheduler_state,
|
|
|
|
// and at the end it starts/resumes and preempts tasks
|
|
|
|
// based on scheduler_state and next_scheduler_state.
|
2006-06-17 16:26:29 +00:00
|
|
|
//
|
2006-06-15 23:15:27 +00:00
|
|
|
bool CLIENT_STATE::enforce_schedule() {
|
2006-06-16 18:52:25 +00:00
|
|
|
unsigned int i;
|
2008-10-09 22:44:45 +00:00
|
|
|
vector<ACTIVE_TASK*> preemptable_tasks;
|
2008-05-22 20:57:12 +00:00
|
|
|
static double last_time = 0;
|
2007-02-02 17:12:07 +00:00
|
|
|
int retval;
|
2008-12-07 22:06:32 +00:00
|
|
|
double ncpus_used;
|
2009-05-04 20:25:59 +00:00
|
|
|
ACTIVE_TASK* atp;
|
2006-06-15 23:15:27 +00:00
|
|
|
|
2006-11-10 00:20:08 +00:00
|
|
|
// Do this when requested, and once a minute as a safety net
|
2006-06-19 22:20:24 +00:00
|
|
|
//
|
2009-02-18 19:47:02 +00:00
|
|
|
if (now - last_time > CPU_SCHED_ENFORCE_PERIOD) {
|
2008-05-22 20:57:12 +00:00
|
|
|
must_enforce_cpu_schedule = true;
|
|
|
|
}
|
2006-06-19 22:20:24 +00:00
|
|
|
if (!must_enforce_cpu_schedule) return false;
|
2006-06-15 23:15:27 +00:00
|
|
|
must_enforce_cpu_schedule = false;
|
2009-02-20 18:37:27 +00:00
|
|
|
|
|
|
|
// NOTE: there's an assumption that debt is adjusted at
|
2009-05-06 09:49:46 +00:00
|
|
|
// least as often as the CPU sched is enforced (see client_state.h).
|
2009-02-20 18:37:27 +00:00
|
|
|
// If you remove the following, make changes accordingly
|
|
|
|
//
|
2009-02-18 19:47:02 +00:00
|
|
|
adjust_debts();
|
2008-05-22 20:57:12 +00:00
|
|
|
last_time = now;
|
2006-06-16 18:52:25 +00:00
|
|
|
bool action = false;
|
2006-06-15 23:15:27 +00:00
|
|
|
|
2006-09-07 17:59:34 +00:00
|
|
|
if (log_flags.cpu_sched_debug) {
|
2008-05-22 20:57:12 +00:00
|
|
|
msg_printf(0, MSG_INFO, "[cpu_sched_debug] enforce_schedule(): start");
|
2009-05-06 09:49:46 +00:00
|
|
|
msg_printf(0, MSG_INFO, "[cpu_sched_debug] preliminary job list:");
|
2009-05-07 13:54:51 +00:00
|
|
|
print_job_list(ordered_scheduled_results, false);
|
2009-05-04 19:55:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Set next_scheduler_state to preempt for all tasks
|
|
|
|
//
|
|
|
|
for (i=0; i< active_tasks.active_tasks.size(); i++) {
|
|
|
|
atp = active_tasks.active_tasks[i];
|
|
|
|
atp->next_scheduler_state = CPU_SCHED_PREEMPTED;
|
|
|
|
}
|
|
|
|
|
|
|
|
// make a copy of the to-run list
|
|
|
|
//
|
|
|
|
vector<RESULT*>runnable_jobs;
|
|
|
|
for (i=0; i<ordered_scheduled_results.size(); i++) {
|
|
|
|
RESULT* rp = ordered_scheduled_results[i];
|
|
|
|
rp->seqno = i;
|
|
|
|
rp->unfinished_time_slice = false;
|
|
|
|
runnable_jobs.push_back(rp);
|
2006-06-15 23:15:27 +00:00
|
|
|
}
|
|
|
|
|
2009-05-04 19:55:59 +00:00
|
|
|
// append running jobs not done with time slice
|
2006-06-16 18:52:25 +00:00
|
|
|
//
|
2009-05-04 19:55:59 +00:00
|
|
|
append_unfinished_time_slice(runnable_jobs);
|
2007-02-24 20:10:06 +00:00
|
|
|
|
2009-05-04 19:55:59 +00:00
|
|
|
// sort the result by decreasing importance
|
2008-10-09 22:44:45 +00:00
|
|
|
//
|
2009-05-04 19:55:59 +00:00
|
|
|
std::sort(
|
|
|
|
runnable_jobs.begin(),
|
|
|
|
runnable_jobs.end(),
|
|
|
|
more_important
|
|
|
|
);
|
2006-06-19 22:20:24 +00:00
|
|
|
|
2009-05-06 09:49:46 +00:00
|
|
|
if (log_flags.cpu_sched_debug) {
|
|
|
|
msg_printf(0, MSG_INFO, "[cpu_sched_debug] final job list:");
|
2009-05-07 13:54:51 +00:00
|
|
|
print_job_list(runnable_jobs, true);
|
2009-05-06 09:49:46 +00:00
|
|
|
}
|
|
|
|
|
2008-05-22 20:57:12 +00:00
|
|
|
double ram_left = available_ram();
|
2009-05-04 19:55:59 +00:00
|
|
|
double swap_left = (global_prefs.vm_max_used_frac)*host_info.m_swap;
|
2006-09-05 19:00:59 +00:00
|
|
|
|
|
|
|
if (log_flags.mem_usage_debug) {
|
|
|
|
msg_printf(0, MSG_INFO,
|
2009-05-04 19:55:59 +00:00
|
|
|
"[mem_usage_debug] enforce: available RAM %.2fMB swap %.2fMB",
|
|
|
|
ram_left/MEGA, swap_left/MEGA
|
2006-09-05 19:00:59 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2008-10-09 22:44:45 +00:00
|
|
|
// schedule all non CPU intensive tasks
|
|
|
|
//
|
|
|
|
for (i=0; i<results.size(); i++) {
|
|
|
|
RESULT* rp = results[i];
|
|
|
|
if (rp->project->non_cpu_intensive && rp->runnable()) {
|
|
|
|
atp = get_task(rp);
|
|
|
|
atp->next_scheduler_state = CPU_SCHED_SCHEDULED;
|
|
|
|
ram_left -= atp->procinfo.working_set_size_smoothed;
|
2009-05-04 19:55:59 +00:00
|
|
|
swap_left -= atp->procinfo.swap_size;
|
2008-10-09 22:44:45 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Loop through the jobs we want to schedule.
|
2006-06-19 22:20:24 +00:00
|
|
|
//
|
2009-05-04 19:55:59 +00:00
|
|
|
ncpus_used = 0;
|
|
|
|
for (i=0; i<runnable_jobs.size(); i++) {
|
|
|
|
RESULT* rp = runnable_jobs[i];
|
2009-05-06 09:49:46 +00:00
|
|
|
if (!rp->uses_coprocs() && (ncpus_used >= ncpus)) {
|
|
|
|
if (log_flags.cpu_sched_debug) {
|
|
|
|
msg_printf(rp->project, MSG_INFO,
|
|
|
|
"[cpu_sched_debug] all CPUs used, skipping %s", rp->name
|
|
|
|
);
|
|
|
|
}
|
|
|
|
continue;
|
2007-01-15 19:25:23 +00:00
|
|
|
}
|
2008-12-07 22:06:32 +00:00
|
|
|
|
2006-10-02 23:42:38 +00:00
|
|
|
atp = lookup_active_task_by_result(rp);
|
2006-09-05 19:00:59 +00:00
|
|
|
if (atp) {
|
2008-10-09 22:44:45 +00:00
|
|
|
atp->too_large = false;
|
2006-10-02 23:42:38 +00:00
|
|
|
if (atp->procinfo.working_set_size_smoothed > ram_left) {
|
2006-10-03 22:50:13 +00:00
|
|
|
atp->too_large = true;
|
2006-09-05 19:00:59 +00:00
|
|
|
if (log_flags.mem_usage_debug) {
|
|
|
|
msg_printf(rp->project, MSG_INFO,
|
2008-10-09 22:44:45 +00:00
|
|
|
"[mem_usage_debug] enforce: result %s can't run, too big %.2fMB > %.2fMB",
|
|
|
|
rp->name, atp->procinfo.working_set_size_smoothed/MEGA, ram_left/MEGA
|
2006-09-05 19:00:59 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
2006-06-19 22:20:24 +00:00
|
|
|
|
2009-05-06 09:49:46 +00:00
|
|
|
if (log_flags.cpu_sched_debug) {
|
|
|
|
msg_printf(rp->project, MSG_INFO,
|
|
|
|
"[cpu_sched_debug] scheduling %s", rp->name
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2008-10-09 22:44:45 +00:00
|
|
|
// We've decided to run this job; create an ACTIVE_TASK if needed.
|
|
|
|
//
|
2008-10-29 22:13:04 +00:00
|
|
|
if (!atp) {
|
|
|
|
atp = get_task(rp);
|
2006-09-05 19:00:59 +00:00
|
|
|
}
|
2008-10-29 22:13:04 +00:00
|
|
|
ncpus_used += rp->avp->avg_ncpus;
|
2008-10-09 22:44:45 +00:00
|
|
|
atp->next_scheduler_state = CPU_SCHED_SCHEDULED;
|
|
|
|
ram_left -= atp->procinfo.working_set_size_smoothed;
|
2006-09-05 19:00:59 +00:00
|
|
|
}
|
2006-02-08 21:05:51 +00:00
|
|
|
|
2008-03-27 18:25:29 +00:00
|
|
|
if (log_flags.cpu_sched_debug && ncpus_used < ncpus) {
|
|
|
|
msg_printf(0, MSG_INFO, "[cpu_sched_debug] using %f out of %d CPUs",
|
|
|
|
ncpus_used, ncpus
|
2006-08-04 23:07:11 +00:00
|
|
|
);
|
2008-03-27 18:25:29 +00:00
|
|
|
if (ncpus_used < ncpus) {
|
|
|
|
request_work_fetch("CPUs idle");
|
|
|
|
}
|
2006-08-04 23:07:11 +00:00
|
|
|
}
|
|
|
|
|
2007-01-07 19:45:26 +00:00
|
|
|
bool check_swap = (host_info.m_swap != 0);
|
|
|
|
// in case couldn't measure swap on this host
|
2006-02-08 21:05:51 +00:00
|
|
|
|
2009-04-10 17:48:46 +00:00
|
|
|
// preempt tasks as needed, and note whether there are any coproc jobs
|
2009-05-28 19:26:27 +00:00
|
|
|
// in QUIT_PENDING state (in which case we won't start new coproc jobs)
|
2006-06-19 22:20:24 +00:00
|
|
|
//
|
2009-04-10 17:48:46 +00:00
|
|
|
bool coproc_quit_pending = false;
|
2006-02-08 21:05:51 +00:00
|
|
|
for (i=0; i<active_tasks.active_tasks.size(); i++) {
|
|
|
|
atp = active_tasks.active_tasks[i];
|
2007-01-15 19:25:23 +00:00
|
|
|
if (log_flags.cpu_sched_debug) {
|
|
|
|
msg_printf(atp->result->project, MSG_INFO,
|
2007-02-02 17:12:07 +00:00
|
|
|
"[cpu_sched_debug] %s sched state %d next %d task state %d",
|
|
|
|
atp->result->name, atp->scheduler_state,
|
|
|
|
atp->next_scheduler_state, atp->task_state()
|
2007-01-15 19:25:23 +00:00
|
|
|
);
|
|
|
|
}
|
2009-01-23 00:02:39 +00:00
|
|
|
int preempt_type = REMOVE_MAYBE_SCHED;
|
2007-02-02 17:12:07 +00:00
|
|
|
switch (atp->next_scheduler_state) {
|
|
|
|
case CPU_SCHED_PREEMPTED:
|
|
|
|
switch (atp->task_state()) {
|
|
|
|
case PROCESS_EXECUTING:
|
|
|
|
action = true;
|
|
|
|
if (check_swap && swap_left < 0) {
|
|
|
|
if (log_flags.mem_usage_debug) {
|
|
|
|
msg_printf(atp->result->project, MSG_INFO,
|
|
|
|
"[mem_usage_debug] out of swap space, will preempt by quit"
|
|
|
|
);
|
|
|
|
}
|
2009-01-23 00:02:39 +00:00
|
|
|
preempt_type = REMOVE_ALWAYS;
|
2007-02-02 17:12:07 +00:00
|
|
|
}
|
|
|
|
if (atp->too_large) {
|
|
|
|
if (log_flags.mem_usage_debug) {
|
|
|
|
msg_printf(atp->result->project, MSG_INFO,
|
|
|
|
"[mem_usage_debug] job using too much memory, will preempt by quit"
|
|
|
|
);
|
|
|
|
}
|
2009-01-23 00:02:39 +00:00
|
|
|
preempt_type = REMOVE_ALWAYS;
|
2007-02-02 17:12:07 +00:00
|
|
|
}
|
2009-01-23 00:02:39 +00:00
|
|
|
atp->preempt(preempt_type);
|
2007-02-02 17:12:07 +00:00
|
|
|
break;
|
2008-10-20 16:37:20 +00:00
|
|
|
case PROCESS_SUSPENDED:
|
|
|
|
// Handle the case where user changes prefs from
|
2009-01-23 00:02:39 +00:00
|
|
|
// "leave in memory" to "remove from memory";
|
|
|
|
// need to quit suspended tasks.
|
|
|
|
//
|
2008-10-27 20:17:22 +00:00
|
|
|
if (atp->checkpoint_cpu_time && !global_prefs.leave_apps_in_memory) {
|
2009-01-23 00:02:39 +00:00
|
|
|
atp->preempt(REMOVE_ALWAYS);
|
2008-10-20 16:37:20 +00:00
|
|
|
}
|
|
|
|
break;
|
2006-11-21 18:41:27 +00:00
|
|
|
}
|
2007-06-11 21:56:56 +00:00
|
|
|
atp->scheduler_state = CPU_SCHED_PREEMPTED;
|
2007-02-02 17:12:07 +00:00
|
|
|
break;
|
2009-04-10 17:48:46 +00:00
|
|
|
}
|
|
|
|
if (atp->result->uses_coprocs() && atp->task_state() == PROCESS_QUIT_PENDING) {
|
|
|
|
coproc_quit_pending = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool coproc_start_deferred = false;
|
|
|
|
for (i=0; i<active_tasks.active_tasks.size(); i++) {
|
2009-04-10 18:05:07 +00:00
|
|
|
atp = active_tasks.active_tasks[i];
|
2009-05-28 19:26:27 +00:00
|
|
|
if (atp->next_scheduler_state != CPU_SCHED_SCHEDULED) continue;
|
|
|
|
int ts = atp->task_state();
|
|
|
|
if (ts == PROCESS_UNINITIALIZED || ts == PROCESS_SUSPENDED) {
|
|
|
|
// If there's a quit pending for a coproc job,
|
|
|
|
// don't start new ones since they may bomb out
|
|
|
|
// on memory allocation. Instead, trigger a retry
|
|
|
|
//
|
|
|
|
if (atp->result->uses_coprocs() && coproc_quit_pending) {
|
|
|
|
coproc_start_deferred = true;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
action = true;
|
|
|
|
retval = atp->resume_or_start(
|
|
|
|
atp->scheduler_state == CPU_SCHED_UNINITIALIZED
|
|
|
|
);
|
|
|
|
if ((retval == ERR_SHMGET) || (retval == ERR_SHMAT)) {
|
|
|
|
// Assume no additional shared memory segs
|
|
|
|
// will be available in the next 10 seconds
|
|
|
|
// (run only tasks which are already attached to shared memory).
|
2009-04-10 17:48:46 +00:00
|
|
|
//
|
2009-05-28 19:26:27 +00:00
|
|
|
if (gstate.retry_shmem_time < gstate.now) {
|
|
|
|
request_schedule_cpus("no more shared memory");
|
2009-04-10 17:48:46 +00:00
|
|
|
}
|
2009-05-28 19:26:27 +00:00
|
|
|
gstate.retry_shmem_time = gstate.now + 10.0;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (retval) {
|
|
|
|
report_result_error(
|
|
|
|
*(atp->result), "Couldn't start or resume: %d", retval
|
2006-02-08 21:05:51 +00:00
|
|
|
);
|
2009-05-28 19:26:27 +00:00
|
|
|
request_schedule_cpus("start failed");
|
|
|
|
continue;
|
2006-02-08 21:05:51 +00:00
|
|
|
}
|
2009-05-28 19:26:27 +00:00
|
|
|
atp->run_interval_start_wall_time = now;
|
|
|
|
app_started = now;
|
2006-02-08 21:05:51 +00:00
|
|
|
}
|
2009-05-28 19:26:27 +00:00
|
|
|
atp->scheduler_state = CPU_SCHED_SCHEDULED;
|
|
|
|
swap_left -= atp->procinfo.swap_size;
|
2006-02-08 21:05:51 +00:00
|
|
|
}
|
2006-06-17 16:26:29 +00:00
|
|
|
if (action) {
|
|
|
|
set_client_state_dirty("enforce_cpu_schedule");
|
|
|
|
}
|
2007-01-15 19:25:23 +00:00
|
|
|
if (log_flags.cpu_sched_debug) {
|
|
|
|
msg_printf(0, MSG_INFO, "[cpu_sched_debug] enforce_schedule: end");
|
|
|
|
}
|
2009-04-10 17:48:46 +00:00
|
|
|
if (coproc_start_deferred) {
|
|
|
|
if (log_flags.cpu_sched_debug) {
|
|
|
|
msg_printf(0, MSG_INFO,
|
|
|
|
"[cpu_sched_debug] coproc quit pending, deferring start"
|
|
|
|
);
|
|
|
|
}
|
|
|
|
request_enforce_schedule("coproc quit retry");
|
|
|
|
}
|
2006-06-16 18:52:25 +00:00
|
|
|
return action;
|
2006-02-08 21:05:51 +00:00
|
|
|
}
|
|
|
|
|
2006-08-03 04:07:18 +00:00
|
|
|
// trigger CPU schedule enforcement.
|
|
|
|
// Called when a new schedule is computed,
|
|
|
|
// and when an app checkpoints.
|
|
|
|
//
|
|
|
|
void CLIENT_STATE::request_enforce_schedule(const char* where) {
|
|
|
|
if (log_flags.cpu_sched_debug) {
|
2006-09-07 20:39:25 +00:00
|
|
|
msg_printf(0, MSG_INFO, "[cpu_sched_debug] Request enforce CPU schedule: %s", where);
|
2006-08-03 04:07:18 +00:00
|
|
|
}
|
|
|
|
must_enforce_cpu_schedule = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// trigger CPU scheduling.
|
|
|
|
// Called when a result is completed,
|
|
|
|
// when new results become runnable,
|
|
|
|
// or when the user performs a UI interaction
|
|
|
|
// (e.g. suspending or resuming a project or result).
|
|
|
|
//
|
|
|
|
void CLIENT_STATE::request_schedule_cpus(const char* where) {
|
|
|
|
if (log_flags.cpu_sched_debug) {
|
2006-09-07 20:39:25 +00:00
|
|
|
msg_printf(0, MSG_INFO, "[cpu_sched_debug] Request CPU reschedule: %s", where);
|
2006-08-03 04:07:18 +00:00
|
|
|
}
|
|
|
|
must_schedule_cpus = true;
|
|
|
|
}
|
|
|
|
|
2007-02-21 22:27:35 +00:00
|
|
|
// Find the active task for a given result
|
|
|
|
//
|
|
|
|
ACTIVE_TASK* CLIENT_STATE::lookup_active_task_by_result(RESULT* rep) {
|
|
|
|
for (unsigned int i = 0; i < active_tasks.active_tasks.size(); i ++) {
|
|
|
|
if (active_tasks.active_tasks[i]->result == rep) {
|
|
|
|
return active_tasks.active_tasks[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2009-02-26 17:12:55 +00:00
|
|
|
bool RESULT::not_started() {
|
|
|
|
if (computing_done()) return false;
|
|
|
|
if (gstate.lookup_active_task_by_result(this)) return false;
|
|
|
|
return true;
|
2007-02-21 22:27:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// find total resource shares of all projects
|
|
|
|
//
|
|
|
|
double CLIENT_STATE::total_resource_share() {
|
|
|
|
double x = 0;
|
|
|
|
for (unsigned int i=0; i<projects.size(); i++) {
|
|
|
|
if (!projects[i]->non_cpu_intensive ) {
|
|
|
|
x += projects[i]->resource_share;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return x;
|
|
|
|
}
|
|
|
|
|
|
|
|
// same, but only runnable projects (can use CPU right now)
|
|
|
|
//
|
|
|
|
double CLIENT_STATE::runnable_resource_share() {
|
|
|
|
double x = 0;
|
|
|
|
for (unsigned int i=0; i<projects.size(); i++) {
|
|
|
|
PROJECT* p = projects[i];
|
|
|
|
if (p->non_cpu_intensive) continue;
|
|
|
|
if (p->runnable()) {
|
|
|
|
x += p->resource_share;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return x;
|
|
|
|
}
|
|
|
|
|
|
|
|
// same, but potentially runnable (could ask for work right now)
|
|
|
|
//
|
|
|
|
double CLIENT_STATE::potentially_runnable_resource_share() {
|
|
|
|
double x = 0;
|
|
|
|
for (unsigned int i=0; i<projects.size(); i++) {
|
|
|
|
PROJECT* p = projects[i];
|
|
|
|
if (p->non_cpu_intensive) continue;
|
|
|
|
if (p->potentially_runnable()) {
|
|
|
|
x += p->resource_share;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return x;
|
|
|
|
}
|
|
|
|
|
|
|
|
// same, but nearly runnable (could be downloading work right now)
|
|
|
|
//
|
|
|
|
double CLIENT_STATE::nearly_runnable_resource_share() {
|
|
|
|
double x = 0;
|
|
|
|
for (unsigned int i=0; i<projects.size(); i++) {
|
|
|
|
PROJECT* p = projects[i];
|
|
|
|
if (p->non_cpu_intensive) continue;
|
|
|
|
if (p->nearly_runnable()) {
|
|
|
|
x += p->resource_share;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return x;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool ACTIVE_TASK::process_exists() {
|
|
|
|
switch (task_state()) {
|
|
|
|
case PROCESS_EXECUTING:
|
|
|
|
case PROCESS_SUSPENDED:
|
|
|
|
case PROCESS_ABORT_PENDING:
|
|
|
|
case PROCESS_QUIT_PENDING:
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// if there's not an active task for the result, make one
|
|
|
|
//
|
|
|
|
ACTIVE_TASK* CLIENT_STATE::get_task(RESULT* rp) {
|
|
|
|
ACTIVE_TASK *atp = lookup_active_task_by_result(rp);
|
|
|
|
if (!atp) {
|
|
|
|
atp = new ACTIVE_TASK;
|
2009-05-28 19:26:27 +00:00
|
|
|
atp->get_free_slot(rp);
|
2007-02-21 22:27:35 +00:00
|
|
|
atp->init(rp);
|
|
|
|
active_tasks.active_tasks.push_back(atp);
|
|
|
|
}
|
|
|
|
return atp;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Results must be complete early enough to report before the report deadline.
|
|
|
|
// Not all hosts are connected all of the time.
|
|
|
|
//
|
|
|
|
double RESULT::computation_deadline() {
|
|
|
|
return report_deadline - (
|
2007-04-03 19:06:26 +00:00
|
|
|
gstate.work_buf_min()
|
2007-02-21 22:27:35 +00:00
|
|
|
// Seconds that the host will not be connected to the Internet
|
2007-06-20 16:27:27 +00:00
|
|
|
+ gstate.global_prefs.cpu_scheduling_period()
|
2007-02-21 22:27:35 +00:00
|
|
|
// Seconds that the CPU may be busy with some other result
|
2007-04-03 19:06:26 +00:00
|
|
|
+ DEADLINE_CUSHION
|
2007-02-21 22:27:35 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2007-03-04 02:30:48 +00:00
|
|
|
static const char* result_state_name(int val) {
|
|
|
|
switch (val) {
|
|
|
|
case RESULT_NEW: return "NEW";
|
|
|
|
case RESULT_FILES_DOWNLOADING: return "FILES_DOWNLOADING";
|
|
|
|
case RESULT_FILES_DOWNLOADED: return "FILES_DOWNLOADED";
|
|
|
|
case RESULT_COMPUTE_ERROR: return "COMPUTE_ERROR";
|
|
|
|
case RESULT_FILES_UPLOADING: return "FILES_UPLOADING";
|
|
|
|
case RESULT_FILES_UPLOADED: return "FILES_UPLOADED";
|
2008-11-03 22:56:16 +00:00
|
|
|
case RESULT_ABORTED: return "ABORTED";
|
2007-03-04 02:30:48 +00:00
|
|
|
}
|
|
|
|
return "Unknown";
|
|
|
|
}
|
|
|
|
|
|
|
|
void RESULT::set_state(int val, const char* where) {
|
|
|
|
_state = val;
|
|
|
|
if (log_flags.task_debug) {
|
|
|
|
msg_printf(project, MSG_INFO,
|
|
|
|
"[task_debug] result state=%s for %s from %s",
|
|
|
|
result_state_name(val), name, where
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-04-03 17:16:39 +00:00
|
|
|
// called at startup (after get_host_info())
|
|
|
|
// and when general prefs have been parsed
|
|
|
|
//
|
|
|
|
void CLIENT_STATE::set_ncpus() {
|
|
|
|
int ncpus_old = ncpus;
|
|
|
|
|
2009-02-05 22:34:10 +00:00
|
|
|
if (config.ncpus>=0) {
|
2007-04-03 17:16:39 +00:00
|
|
|
ncpus = config.ncpus;
|
|
|
|
} else if (host_info.p_ncpus>0) {
|
2008-10-22 21:25:26 +00:00
|
|
|
ncpus = host_info.p_ncpus;
|
2007-04-03 17:16:39 +00:00
|
|
|
} else {
|
|
|
|
ncpus = 1;
|
|
|
|
}
|
|
|
|
|
2009-02-05 22:34:10 +00:00
|
|
|
// if config says no CPUs, honor it
|
|
|
|
//
|
|
|
|
if (ncpus) {
|
|
|
|
if (global_prefs.max_ncpus_pct) {
|
|
|
|
ncpus = (int)((ncpus * global_prefs.max_ncpus_pct)/100);
|
|
|
|
if (ncpus == 0) ncpus = 1;
|
|
|
|
} else if (global_prefs.max_ncpus && global_prefs.max_ncpus < ncpus) {
|
|
|
|
ncpus = global_prefs.max_ncpus;
|
|
|
|
}
|
2008-10-22 21:25:26 +00:00
|
|
|
}
|
|
|
|
|
2007-04-03 17:16:39 +00:00
|
|
|
if (initialized && ncpus != ncpus_old) {
|
|
|
|
msg_printf(0, MSG_INFO,
|
|
|
|
"Number of usable CPUs has changed from %d to %d. Running benchmarks.",
|
|
|
|
ncpus_old, ncpus
|
|
|
|
);
|
|
|
|
run_cpu_benchmarks = true;
|
|
|
|
request_schedule_cpus("Number of usable CPUs has changed");
|
|
|
|
request_work_fetch("Number of usable CPUs has changed");
|
2008-12-31 23:07:59 +00:00
|
|
|
work_fetch.init();
|
2007-04-03 17:16:39 +00:00
|
|
|
}
|
|
|
|
}
|
2007-02-21 22:27:35 +00:00
|
|
|
|
2007-04-03 19:35:33 +00:00
|
|
|
// The given result has just completed successfully.
|
|
|
|
// Update the correction factor used to predict
|
|
|
|
// completion time for this project's results
|
|
|
|
//
|
2008-12-02 22:19:39 +00:00
|
|
|
void PROJECT::update_duration_correction_factor(ACTIVE_TASK* atp) {
|
|
|
|
RESULT* rp = atp->result;
|
2007-05-09 15:23:10 +00:00
|
|
|
#ifdef SIM
|
2008-05-22 20:57:12 +00:00
|
|
|
if (dcf_dont_use) {
|
|
|
|
duration_correction_factor = 1.0;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (dcf_stats) {
|
|
|
|
((SIM_PROJECT*)this)->update_dcf_stats(rp);
|
2007-05-09 15:23:10 +00:00
|
|
|
return;
|
2008-05-22 20:57:12 +00:00
|
|
|
}
|
2007-05-09 15:23:10 +00:00
|
|
|
#endif
|
2008-12-02 22:19:39 +00:00
|
|
|
double raw_ratio = atp->elapsed_time/rp->estimated_duration_uncorrected();
|
|
|
|
double adj_ratio = atp->elapsed_time/rp->estimated_duration(false);
|
2008-05-22 20:57:12 +00:00
|
|
|
double old_dcf = duration_correction_factor;
|
2007-04-03 19:35:33 +00:00
|
|
|
|
|
|
|
// it's OK to overestimate completion time,
|
|
|
|
// but bad to underestimate it.
|
|
|
|
// So make it easy for the factor to increase,
|
|
|
|
// but decrease it with caution
|
|
|
|
//
|
|
|
|
if (adj_ratio > 1.1) {
|
|
|
|
duration_correction_factor = raw_ratio;
|
|
|
|
} else {
|
|
|
|
// in particular, don't give much weight to results
|
|
|
|
// that completed a lot earlier than expected
|
|
|
|
//
|
|
|
|
if (adj_ratio < 0.1) {
|
|
|
|
duration_correction_factor = duration_correction_factor*0.99 + 0.01*raw_ratio;
|
|
|
|
} else {
|
|
|
|
duration_correction_factor = duration_correction_factor*0.9 + 0.1*raw_ratio;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// limit to [.01 .. 100]
|
|
|
|
//
|
|
|
|
if (duration_correction_factor > 100) duration_correction_factor = 100;
|
|
|
|
if (duration_correction_factor < 0.01) duration_correction_factor = 0.01;
|
|
|
|
|
2009-02-17 03:16:25 +00:00
|
|
|
if (log_flags.dcf_debug) {
|
2008-05-22 20:57:12 +00:00
|
|
|
msg_printf(this, MSG_INFO,
|
2009-02-17 03:16:25 +00:00
|
|
|
"[dcf] DCF: %f->%f, raw_ratio %f, adj_ratio %f",
|
2008-05-22 20:57:12 +00:00
|
|
|
old_dcf, duration_correction_factor, raw_ratio, adj_ratio
|
|
|
|
);
|
|
|
|
}
|
2007-04-03 19:35:33 +00:00
|
|
|
}
|
2007-04-03 19:06:26 +00:00
|
|
|
|
2006-06-16 18:52:25 +00:00
|
|
|
const char *BOINC_RCSID_e830ee1 = "$Id$";
|