2008-08-06 18:36:30 +00:00
|
|
|
// This file is part of BOINC.
|
2007-02-21 16:58:29 +00:00
|
|
|
// http://boinc.berkeley.edu
|
2008-08-06 18:36:30 +00:00
|
|
|
// Copyright (C) 2008 University of California
|
2007-02-21 16:58:29 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is free software; you can redistribute it and/or modify it
|
|
|
|
// under the terms of the GNU Lesser General Public License
|
|
|
|
// as published by the Free Software Foundation,
|
|
|
|
// either version 3 of the License, or (at your option) any later version.
|
2007-02-21 16:58:29 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is distributed in the hope that it will be useful,
|
2007-02-21 16:58:29 +00:00
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU Lesser General Public License for more details.
|
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
2007-02-21 16:58:29 +00:00
|
|
|
|
2008-12-31 23:07:59 +00:00
|
|
|
#include "client_types.h"
|
2007-02-21 16:58:29 +00:00
|
|
|
#include "client_msgs.h"
|
2007-02-24 14:55:59 +00:00
|
|
|
#ifdef SIM
|
|
|
|
#include "sim.h"
|
|
|
|
#else
|
2007-02-21 16:58:29 +00:00
|
|
|
#include "client_state.h"
|
2007-02-24 14:55:59 +00:00
|
|
|
#endif
|
2007-02-21 16:58:29 +00:00
|
|
|
|
2008-12-31 23:07:59 +00:00
|
|
|
#include "work_fetch.h"
|
|
|
|
|
2007-02-21 16:58:29 +00:00
|
|
|
using std::vector;
|
2008-12-31 23:07:59 +00:00
|
|
|
|
|
|
|
RSC_WORK_FETCH cuda_work_fetch;
|
|
|
|
RSC_WORK_FETCH cpu_work_fetch;
|
|
|
|
WORK_FETCH work_fetch;
|
|
|
|
|
2009-01-14 23:56:07 +00:00
|
|
|
static inline char* rsc_name(int t) {
|
2009-01-21 20:51:33 +00:00
|
|
|
switch (t) {
|
|
|
|
case RSC_TYPE_CPU: return "CPU";
|
|
|
|
case RSC_TYPE_CUDA: return "CUDA";
|
|
|
|
}
|
|
|
|
return "Unknown";
|
2009-01-14 23:56:07 +00:00
|
|
|
}
|
|
|
|
|
2008-12-31 23:07:59 +00:00
|
|
|
RSC_PROJECT_WORK_FETCH& RSC_WORK_FETCH::project_state(PROJECT* p) {
|
|
|
|
switch(rsc_type) {
|
|
|
|
case RSC_TYPE_CUDA: return p->cuda_pwf;
|
2008-12-31 23:30:38 +00:00
|
|
|
default: return p->cpu_pwf;
|
2007-02-21 16:58:29 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-12-31 23:07:59 +00:00
|
|
|
bool RSC_WORK_FETCH::may_have_work(PROJECT* p) {
|
|
|
|
RSC_PROJECT_WORK_FETCH& w = project_state(p);
|
|
|
|
return (w.backoff_time < gstate.now);
|
|
|
|
}
|
|
|
|
|
2009-01-10 00:48:22 +00:00
|
|
|
bool RSC_PROJECT_WORK_FETCH::compute_may_have_work() {
|
2009-01-08 00:41:15 +00:00
|
|
|
return (backoff_time < gstate.now);
|
|
|
|
}
|
|
|
|
|
2009-01-10 00:48:22 +00:00
|
|
|
void RSC_PROJECT_WORK_FETCH::rr_init() {
|
|
|
|
may_have_work = compute_may_have_work();
|
2009-01-03 06:01:17 +00:00
|
|
|
}
|
|
|
|
|
2008-12-31 23:07:59 +00:00
|
|
|
void RSC_WORK_FETCH::rr_init() {
|
|
|
|
shortfall = 0;
|
|
|
|
nidle_now = 0;
|
2009-01-03 06:01:17 +00:00
|
|
|
total_fetchable_share = 0;
|
|
|
|
total_runnable_share = 0;
|
2008-12-31 23:07:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void WORK_FETCH::rr_init() {
|
|
|
|
cpu_work_fetch.rr_init();
|
|
|
|
if (coproc_cuda) {
|
|
|
|
cuda_work_fetch.rr_init();
|
2007-02-21 16:58:29 +00:00
|
|
|
}
|
2008-12-31 23:07:59 +00:00
|
|
|
estimated_delay = 0;
|
2009-01-03 06:01:17 +00:00
|
|
|
for (unsigned int i=0; i<gstate.projects.size(); i++) {
|
|
|
|
PROJECT* p = gstate.projects[i];
|
2009-01-08 00:41:15 +00:00
|
|
|
p->pwf.can_fetch_work = p->pwf.compute_can_fetch_work(p);
|
2009-01-10 00:48:22 +00:00
|
|
|
p->cpu_pwf.rr_init();
|
2009-01-03 06:01:17 +00:00
|
|
|
if (coproc_cuda) {
|
2009-01-10 00:48:22 +00:00
|
|
|
p->cuda_pwf.rr_init();
|
2009-01-03 06:01:17 +00:00
|
|
|
}
|
|
|
|
}
|
2007-02-21 16:58:29 +00:00
|
|
|
}
|
|
|
|
|
2009-01-08 00:41:15 +00:00
|
|
|
bool PROJECT_WORK_FETCH::compute_can_fetch_work(PROJECT* p) {
|
|
|
|
if (p->non_cpu_intensive) return false;
|
|
|
|
if (p->suspended_via_gui) return false;
|
|
|
|
if (p->master_url_fetch_pending) return false;
|
|
|
|
if (p->min_rpc_time > gstate.now) return false;
|
|
|
|
if (p->dont_request_more_work) return false;
|
|
|
|
if (p->some_download_stalled()) return false;
|
|
|
|
if (p->some_result_suspended()) return false;
|
|
|
|
if (p->nuploading_results > 2*gstate.ncpus) return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2009-01-23 18:29:28 +00:00
|
|
|
void PROJECT_WORK_FETCH::reset(PROJECT* p) {
|
|
|
|
p->cpu_pwf.reset();
|
|
|
|
p->cuda_pwf.reset();
|
2009-01-10 00:48:22 +00:00
|
|
|
}
|
|
|
|
|
2008-12-31 23:07:59 +00:00
|
|
|
void RSC_WORK_FETCH::accumulate_shortfall(double d_time, double nused) {
|
|
|
|
double idle = ninstances - nused;
|
|
|
|
if (idle > 0) {
|
|
|
|
shortfall += idle*d_time;
|
|
|
|
}
|
|
|
|
}
|
2007-02-21 16:58:29 +00:00
|
|
|
|
2009-01-08 00:41:15 +00:00
|
|
|
// choose the best project to ask for work for this resource
|
|
|
|
//
|
2008-12-31 23:07:59 +00:00
|
|
|
PROJECT* RSC_WORK_FETCH::choose_project() {
|
|
|
|
PROJECT* pbest = NULL;
|
2007-02-21 16:58:29 +00:00
|
|
|
|
2008-12-31 23:07:59 +00:00
|
|
|
for (unsigned i=0; i<gstate.projects.size(); i++) {
|
|
|
|
PROJECT* p = gstate.projects[i];
|
2009-01-08 00:41:15 +00:00
|
|
|
if (!p->pwf.can_fetch_work) continue;
|
|
|
|
if (!project_state(p).may_have_work) continue;
|
2008-12-31 23:07:59 +00:00
|
|
|
if (pbest) {
|
2009-01-08 00:41:15 +00:00
|
|
|
if (p->deadlines_missed && !pbest->deadlines_missed) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (project_state(p).overworked() && !project_state(pbest).overworked()) {
|
|
|
|
continue;
|
|
|
|
}
|
2008-12-31 23:07:59 +00:00
|
|
|
if (pbest->pwf.overall_debt > p->pwf.overall_debt) {
|
2007-02-21 16:58:29 +00:00
|
|
|
continue;
|
|
|
|
}
|
2009-01-08 00:41:15 +00:00
|
|
|
|
2007-02-21 16:58:29 +00:00
|
|
|
}
|
2008-12-31 23:07:59 +00:00
|
|
|
pbest = p;
|
2007-02-21 16:58:29 +00:00
|
|
|
}
|
2008-12-31 23:07:59 +00:00
|
|
|
return pbest;
|
|
|
|
}
|
|
|
|
|
|
|
|
void WORK_FETCH::set_overall_debts() {
|
|
|
|
for (unsigned i=0; i<gstate.projects.size(); i++) {
|
|
|
|
PROJECT* p = gstate.projects[i];
|
|
|
|
p->pwf.overall_debt = p->cpu_pwf.debt;
|
|
|
|
if (coproc_cuda) {
|
|
|
|
p->pwf.overall_debt += cuda_work_fetch.speed*p->cuda_pwf.debt;
|
2007-02-21 16:58:29 +00:00
|
|
|
}
|
|
|
|
}
|
2008-12-31 23:07:59 +00:00
|
|
|
}
|
2007-02-21 16:58:29 +00:00
|
|
|
|
2008-12-31 23:07:59 +00:00
|
|
|
void RSC_WORK_FETCH::print_state(char* name) {
|
|
|
|
msg_printf(0, MSG_INFO,
|
2009-01-20 18:06:49 +00:00
|
|
|
"[wfd] %s: shortfall %.2f nidle %.2f fetchable RS %.2f runnable RS %.2f",
|
2008-12-31 23:07:59 +00:00
|
|
|
name,
|
|
|
|
shortfall, nidle_now,
|
2009-01-03 06:01:17 +00:00
|
|
|
total_fetchable_share, total_runnable_share
|
2008-12-31 23:07:59 +00:00
|
|
|
);
|
|
|
|
for (unsigned int i=0; i<gstate.projects.size(); i++) {
|
|
|
|
PROJECT* p = gstate.projects[i];
|
2009-01-21 20:51:33 +00:00
|
|
|
if (p->non_cpu_intensive) continue;
|
2008-12-31 23:07:59 +00:00
|
|
|
RSC_PROJECT_WORK_FETCH& pwf = project_state(p);
|
2009-01-26 18:01:30 +00:00
|
|
|
double bt = pwf.backoff_time>gstate.now?pwf.backoff_time-gstate.now:0;
|
2008-12-31 23:07:59 +00:00
|
|
|
msg_printf(p, MSG_INFO,
|
2009-01-26 18:01:30 +00:00
|
|
|
"[wfd] %s: runshare %.2f debt %.2f backoff dt %.2f int %.2f",
|
2009-01-03 06:01:17 +00:00
|
|
|
name,
|
2009-01-26 18:01:30 +00:00
|
|
|
pwf.runnable_share, pwf.debt, bt, pwf.backoff_interval
|
2008-12-31 23:07:59 +00:00
|
|
|
);
|
|
|
|
}
|
2007-02-21 16:58:29 +00:00
|
|
|
}
|
|
|
|
|
2008-12-31 23:07:59 +00:00
|
|
|
void WORK_FETCH::print_state() {
|
|
|
|
msg_printf(0, MSG_INFO, "[wfd] ------- start work fetch state -------");
|
|
|
|
cpu_work_fetch.print_state("CPU");
|
|
|
|
if (coproc_cuda) {
|
|
|
|
cuda_work_fetch.print_state("CUDA");
|
|
|
|
}
|
|
|
|
for (unsigned int i=0; i<gstate.projects.size(); i++) {
|
|
|
|
PROJECT* p = gstate.projects[i];
|
2009-01-21 20:51:33 +00:00
|
|
|
if (p->non_cpu_intensive) continue;
|
2008-12-31 23:07:59 +00:00
|
|
|
msg_printf(p, MSG_INFO, "[wfd] overall_debt %f", p->pwf.overall_debt);
|
|
|
|
}
|
|
|
|
msg_printf(0, MSG_INFO, "[wfd] ------- end work fetch state -------");
|
2007-02-21 16:58:29 +00:00
|
|
|
}
|
|
|
|
|
2008-12-31 23:07:59 +00:00
|
|
|
static void print_req(PROJECT* p) {
|
|
|
|
msg_printf(p, MSG_INFO,
|
2009-01-08 09:53:32 +00:00
|
|
|
"[wfd] request: CPU (%.2f sec, %d) CUDA (%.2f sec, %d)",
|
2009-01-03 06:01:17 +00:00
|
|
|
cpu_work_fetch.req_secs, cpu_work_fetch.req_instances,
|
|
|
|
cuda_work_fetch.req_secs, cuda_work_fetch.req_instances
|
2008-12-31 23:07:59 +00:00
|
|
|
);
|
2007-02-21 16:58:29 +00:00
|
|
|
}
|
|
|
|
|
2009-01-03 06:01:17 +00:00
|
|
|
void RSC_WORK_FETCH::clear_request() {
|
|
|
|
req_secs = 0;
|
|
|
|
req_instances = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void WORK_FETCH::clear_request() {
|
|
|
|
cpu_work_fetch.clear_request();
|
|
|
|
cuda_work_fetch.clear_request();
|
|
|
|
}
|
|
|
|
|
|
|
|
// we're going to contact this project; decide how much work to request
|
|
|
|
//
|
|
|
|
void WORK_FETCH::compute_work_request(PROJECT* p) {
|
|
|
|
// check if this is the project we'd ask for work anyway
|
|
|
|
//
|
|
|
|
PROJECT* pbest = choose_project();
|
|
|
|
if (p == pbest) return;
|
|
|
|
|
|
|
|
// if not, don't request any work
|
|
|
|
//
|
|
|
|
clear_request();
|
|
|
|
}
|
|
|
|
|
2009-01-21 20:28:00 +00:00
|
|
|
// see if there's a fetchable non-CPU-intensive project without work
|
|
|
|
//
|
|
|
|
PROJECT* WORK_FETCH::non_cpu_intensive_project_needing_work() {
|
2009-01-21 20:51:33 +00:00
|
|
|
for (unsigned int i=0; i<gstate.projects.size(); i++) {
|
|
|
|
PROJECT* p = gstate.projects[i];
|
|
|
|
if (!p->non_cpu_intensive) continue;
|
|
|
|
if (!p->can_request_work()) continue;
|
2009-01-23 18:29:28 +00:00
|
|
|
if (p->cpu_pwf.backoff_time > gstate.now) continue;
|
2009-01-21 20:51:33 +00:00
|
|
|
bool has_work = false;
|
|
|
|
for (unsigned int j=0; j<gstate.results.size(); j++) {
|
|
|
|
RESULT* rp = gstate.results[j];
|
2009-01-23 18:29:28 +00:00
|
|
|
if (rp->project != p) continue;
|
|
|
|
if (rp->state() <= RESULT_FILES_DOWNLOADED) {
|
2009-01-21 20:51:33 +00:00
|
|
|
has_work = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!has_work) {
|
|
|
|
clear_request();
|
|
|
|
cpu_work_fetch.req_secs = 1;
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
2009-01-21 20:28:00 +00:00
|
|
|
}
|
|
|
|
|
2009-01-03 06:01:17 +00:00
|
|
|
// choose a project to fetch work from,
|
|
|
|
// and set the request fields of resource objects
|
2007-02-21 16:58:29 +00:00
|
|
|
//
|
2008-12-31 23:07:59 +00:00
|
|
|
PROJECT* WORK_FETCH::choose_project() {
|
|
|
|
PROJECT* p = 0;
|
2009-01-08 00:41:15 +00:00
|
|
|
|
2009-01-21 20:51:33 +00:00
|
|
|
p = non_cpu_intensive_project_needing_work();
|
|
|
|
if (p) return p;
|
2009-01-21 20:28:00 +00:00
|
|
|
|
2009-01-08 00:41:15 +00:00
|
|
|
gstate.adjust_debts();
|
|
|
|
gstate.compute_nuploading_results();
|
|
|
|
|
2008-12-31 23:07:59 +00:00
|
|
|
gstate.rr_simulation();
|
|
|
|
set_overall_debts();
|
2009-01-03 06:01:17 +00:00
|
|
|
bool request_cpu = true;
|
|
|
|
bool request_cuda = (coproc_cuda != NULL);
|
|
|
|
|
2008-12-31 23:07:59 +00:00
|
|
|
// if a resource is currently idle, get work for it;
|
|
|
|
// give GPU priority over CPU
|
2007-02-21 16:58:29 +00:00
|
|
|
//
|
2008-12-31 23:07:59 +00:00
|
|
|
if (coproc_cuda && cuda_work_fetch.nidle_now) {
|
|
|
|
p = cuda_work_fetch.choose_project();
|
|
|
|
if (p) {
|
2009-01-03 06:01:17 +00:00
|
|
|
request_cpu = false;
|
2007-02-21 16:58:29 +00:00
|
|
|
}
|
2008-12-31 23:07:59 +00:00
|
|
|
}
|
|
|
|
if (!p && cpu_work_fetch.nidle_now) {
|
|
|
|
p = cpu_work_fetch.choose_project();
|
|
|
|
if (p) {
|
2009-01-03 06:01:17 +00:00
|
|
|
request_cuda = false;
|
2007-02-21 16:58:29 +00:00
|
|
|
}
|
|
|
|
}
|
2009-01-03 06:01:17 +00:00
|
|
|
|
|
|
|
// if a resource has a shortfall, get work for it.
|
|
|
|
//
|
2008-12-31 23:07:59 +00:00
|
|
|
if (!p && coproc_cuda && cuda_work_fetch.shortfall) {
|
|
|
|
p = cuda_work_fetch.choose_project();
|
2007-02-21 16:58:29 +00:00
|
|
|
}
|
2008-12-31 23:07:59 +00:00
|
|
|
if (!p && cpu_work_fetch.shortfall) {
|
|
|
|
p = cpu_work_fetch.choose_project();
|
2007-02-21 16:58:29 +00:00
|
|
|
}
|
2009-01-03 06:01:17 +00:00
|
|
|
|
|
|
|
// decide how much work to request for each resource
|
|
|
|
//
|
|
|
|
clear_request();
|
|
|
|
if (p) {
|
|
|
|
if (request_cpu) {
|
|
|
|
cpu_work_fetch.set_request(p);
|
|
|
|
}
|
|
|
|
if (request_cuda) {
|
|
|
|
cuda_work_fetch.set_request(p);
|
|
|
|
}
|
|
|
|
if (coproc_cuda) {
|
|
|
|
coproc_cuda->req_secs = cuda_work_fetch.req_secs;
|
|
|
|
coproc_cuda->req_instances = cuda_work_fetch.req_instances;
|
|
|
|
}
|
|
|
|
}
|
2007-02-21 16:58:29 +00:00
|
|
|
if (log_flags.work_fetch_debug) {
|
2008-12-31 23:07:59 +00:00
|
|
|
print_state();
|
|
|
|
if (p) {
|
|
|
|
print_req(p);
|
|
|
|
} else {
|
|
|
|
msg_printf(0, MSG_INFO, "No project chosen for work fetch");
|
2007-07-11 20:13:53 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-12-31 23:07:59 +00:00
|
|
|
return p;
|
|
|
|
}
|
2007-02-21 16:58:29 +00:00
|
|
|
|
2009-01-03 06:01:17 +00:00
|
|
|
void RSC_WORK_FETCH::set_request(PROJECT* p) {
|
2009-01-26 05:00:49 +00:00
|
|
|
if (!shortfall) return;
|
2009-01-03 06:01:17 +00:00
|
|
|
RSC_PROJECT_WORK_FETCH& w = project_state(p);
|
2009-01-08 00:41:15 +00:00
|
|
|
|
|
|
|
// if project's DCF is too big or small, its completion time estimates
|
|
|
|
// are useless; just ask for 1 second
|
|
|
|
//
|
|
|
|
if (p->duration_correction_factor < 0.02 || p->duration_correction_factor > 80.0) {
|
|
|
|
req_secs = 1;
|
|
|
|
} else {
|
|
|
|
req_secs = gstate.work_buf_total()*w.fetchable_share;
|
|
|
|
}
|
2009-01-03 06:01:17 +00:00
|
|
|
req_instances = (int)ceil(w.fetchable_share*nidle_now);
|
|
|
|
}
|
|
|
|
|
2008-12-31 23:07:59 +00:00
|
|
|
void WORK_FETCH::accumulate_inst_sec(ACTIVE_TASK* atp, double dt) {
|
|
|
|
APP_VERSION* avp = atp->result->avp;
|
|
|
|
PROJECT* p = atp->result->project;
|
|
|
|
double x = dt*avp->avg_ncpus;
|
|
|
|
p->cpu_pwf.secs_this_debt_interval += x;
|
|
|
|
cpu_work_fetch.secs_this_debt_interval += x;
|
|
|
|
if (coproc_cuda) {
|
2009-01-14 23:56:07 +00:00
|
|
|
x = dt*avp->ncudas;
|
2008-12-31 23:07:59 +00:00
|
|
|
p->cuda_pwf.secs_this_debt_interval += x;
|
|
|
|
cuda_work_fetch.secs_this_debt_interval += x;
|
2007-02-21 16:58:29 +00:00
|
|
|
}
|
2008-12-31 23:07:59 +00:00
|
|
|
}
|
2007-02-21 16:58:29 +00:00
|
|
|
|
2008-12-31 23:07:59 +00:00
|
|
|
void RSC_WORK_FETCH::update_debts() {
|
|
|
|
unsigned int i;
|
|
|
|
int nprojects = 0;
|
|
|
|
double ders = 0;
|
|
|
|
PROJECT* p;
|
|
|
|
|
|
|
|
for (i=0; i<gstate.projects.size(); i++) {
|
|
|
|
p = gstate.projects[i];
|
|
|
|
RSC_PROJECT_WORK_FETCH& w = project_state(p);
|
2009-01-21 20:51:33 +00:00
|
|
|
if (w.debt_eligible(p)) {
|
|
|
|
ders += p->resource_share;
|
|
|
|
}
|
2007-02-21 16:58:29 +00:00
|
|
|
}
|
2008-12-31 23:07:59 +00:00
|
|
|
double total_debt = 0;
|
|
|
|
for (i=0; i<gstate.projects.size(); i++) {
|
|
|
|
p = gstate.projects[i];
|
|
|
|
RSC_PROJECT_WORK_FETCH& w = project_state(p);
|
|
|
|
if (w.debt_eligible(p)) {
|
|
|
|
double share_frac = p->resource_share/ders;
|
2009-01-21 20:51:33 +00:00
|
|
|
double delta = share_frac*secs_this_debt_interval - w.secs_this_debt_interval;
|
2009-01-14 23:56:07 +00:00
|
|
|
w.debt += delta;
|
2009-01-21 20:51:33 +00:00
|
|
|
if (log_flags.debt_debug) {
|
|
|
|
msg_printf(p, MSG_INFO,
|
|
|
|
"[debt] %s debt %.2f delta %.2f share frac %.2f (%.2f/%.2f) secs %.2f rsc_secs %.2f",
|
|
|
|
rsc_name(rsc_type),
|
|
|
|
w.debt, delta, share_frac, p->resource_share, ders, secs_this_debt_interval,
|
|
|
|
w.secs_this_debt_interval
|
|
|
|
);
|
|
|
|
}
|
2007-02-21 16:58:29 +00:00
|
|
|
}
|
2008-12-31 23:07:59 +00:00
|
|
|
total_debt += w.debt;
|
|
|
|
nprojects++;
|
2007-02-21 16:58:29 +00:00
|
|
|
}
|
|
|
|
|
2009-01-14 23:56:07 +00:00
|
|
|
// normalize so mean is zero, and clamp
|
2007-02-21 16:58:29 +00:00
|
|
|
//
|
2008-12-31 23:07:59 +00:00
|
|
|
double avg_debt = total_debt / nprojects;
|
|
|
|
for (i=0; i<gstate.projects.size(); i++) {
|
|
|
|
p = gstate.projects[i];
|
2009-01-23 18:29:28 +00:00
|
|
|
if (p->non_cpu_intensive) continue;
|
2008-12-31 23:07:59 +00:00
|
|
|
RSC_PROJECT_WORK_FETCH& w = project_state(p);
|
2009-01-14 23:56:07 +00:00
|
|
|
w.debt -= avg_debt;
|
2007-02-21 16:58:29 +00:00
|
|
|
}
|
2008-12-31 23:07:59 +00:00
|
|
|
}
|
2007-02-21 16:58:29 +00:00
|
|
|
|
2009-01-03 06:01:17 +00:00
|
|
|
// find total and per-project resource shares for each resource
|
|
|
|
//
|
|
|
|
void WORK_FETCH::compute_shares() {
|
|
|
|
unsigned int i;
|
|
|
|
PROJECT* p;
|
|
|
|
for (i=0; i<gstate.projects.size(); i++) {
|
|
|
|
p = gstate.projects[i];
|
|
|
|
if (p->non_cpu_intensive) continue;
|
|
|
|
if (p->rr_sim_status.has_cpu_jobs) {
|
|
|
|
cpu_work_fetch.total_runnable_share += p->resource_share;
|
2009-01-21 20:51:33 +00:00
|
|
|
}
|
2009-01-03 06:01:17 +00:00
|
|
|
if (p->rr_sim_status.has_cuda_jobs) {
|
|
|
|
cuda_work_fetch.total_runnable_share += p->resource_share;
|
2009-01-21 20:51:33 +00:00
|
|
|
}
|
2009-01-08 00:41:15 +00:00
|
|
|
if (!p->pwf.can_fetch_work) continue;
|
2009-01-10 00:48:22 +00:00
|
|
|
if (p->cpu_pwf.may_have_work) {
|
2009-01-03 06:01:17 +00:00
|
|
|
cpu_work_fetch.total_fetchable_share += p->resource_share;
|
|
|
|
}
|
2009-01-10 00:48:22 +00:00
|
|
|
if (coproc_cuda && p->cuda_pwf.may_have_work) {
|
2009-01-21 20:51:33 +00:00
|
|
|
cuda_work_fetch.total_fetchable_share += p->resource_share;
|
2009-01-03 06:01:17 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
for (i=0; i<gstate.projects.size(); i++) {
|
|
|
|
p = gstate.projects[i];
|
|
|
|
if (p->non_cpu_intensive) continue;
|
|
|
|
if (p->rr_sim_status.has_cpu_jobs) {
|
|
|
|
p->cpu_pwf.runnable_share = p->resource_share/cpu_work_fetch.total_runnable_share;
|
2009-01-21 20:51:33 +00:00
|
|
|
}
|
2009-01-03 06:01:17 +00:00
|
|
|
if (p->rr_sim_status.has_cuda_jobs) {
|
|
|
|
p->cuda_pwf.runnable_share = p->resource_share/cuda_work_fetch.total_runnable_share;
|
2009-01-21 20:51:33 +00:00
|
|
|
}
|
2009-01-08 00:41:15 +00:00
|
|
|
if (!p->pwf.can_fetch_work) continue;
|
2009-01-10 00:48:22 +00:00
|
|
|
if (p->cpu_pwf.may_have_work) {
|
2009-01-03 06:01:17 +00:00
|
|
|
p->cpu_pwf.fetchable_share = p->resource_share/cpu_work_fetch.total_fetchable_share;
|
|
|
|
}
|
2009-01-10 00:48:22 +00:00
|
|
|
if (coproc_cuda && p->cuda_pwf.may_have_work) {
|
2009-01-03 06:01:17 +00:00
|
|
|
p->cuda_pwf.fetchable_share = p->resource_share/cuda_work_fetch.total_fetchable_share;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// should this project be accumulating debt for this resource?
|
|
|
|
//
|
2008-12-31 23:07:59 +00:00
|
|
|
bool RSC_PROJECT_WORK_FETCH::debt_eligible(PROJECT* p) {
|
2009-01-23 18:29:28 +00:00
|
|
|
if (p->non_cpu_intensive) return false;
|
2009-01-03 06:01:17 +00:00
|
|
|
if (backoff_time > gstate.now) return false;
|
2008-12-31 23:07:59 +00:00
|
|
|
if (p->suspended_via_gui) return false;
|
2009-01-21 20:51:33 +00:00
|
|
|
if (p->dont_request_more_work) return false;
|
2008-12-31 23:07:59 +00:00
|
|
|
return true;
|
|
|
|
}
|
2007-04-06 20:37:14 +00:00
|
|
|
|
2009-01-10 00:48:22 +00:00
|
|
|
void WORK_FETCH::write_request(FILE* f) {
|
2008-12-31 23:07:59 +00:00
|
|
|
fprintf(f,
|
2009-01-11 05:14:47 +00:00
|
|
|
" <work_req_seconds>%f</work_req_seconds>\n"
|
2009-01-03 06:01:17 +00:00
|
|
|
" <cpu_req_secs>%f</cpu_req_secs>\n"
|
2009-01-08 09:53:32 +00:00
|
|
|
" <cpu_req_instances>%d</cpu_req_instances>\n",
|
2009-01-03 06:01:17 +00:00
|
|
|
cpu_work_fetch.req_secs,
|
2009-01-11 05:14:47 +00:00
|
|
|
cpu_work_fetch.req_secs,
|
2009-01-03 06:01:17 +00:00
|
|
|
cpu_work_fetch.req_instances
|
2008-12-31 23:07:59 +00:00
|
|
|
);
|
2007-02-21 16:58:29 +00:00
|
|
|
}
|
|
|
|
|
2009-01-03 06:01:17 +00:00
|
|
|
// we just got a scheduler reply with the given jobs; update backoffs
|
2007-02-21 16:58:29 +00:00
|
|
|
//
|
2008-12-31 23:07:59 +00:00
|
|
|
void WORK_FETCH::handle_reply(PROJECT* p, vector<RESULT*> new_results) {
|
|
|
|
unsigned int i;
|
2009-01-03 06:01:17 +00:00
|
|
|
bool got_cpu = false, got_cuda = false;
|
2008-12-31 23:30:38 +00:00
|
|
|
|
2009-01-03 06:01:17 +00:00
|
|
|
// if didn't get any jobs, back off on requested resource types
|
|
|
|
//
|
|
|
|
if (!new_results.size()) {
|
|
|
|
if (cpu_work_fetch.req_secs) {
|
|
|
|
p->cpu_pwf.backoff(p, "CPU");
|
|
|
|
}
|
|
|
|
if (coproc_cuda && coproc_cuda->req_secs) {
|
|
|
|
p->cuda_pwf.backoff(p, "CUDA");
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
2008-12-31 23:30:38 +00:00
|
|
|
|
2009-01-03 06:01:17 +00:00
|
|
|
// if we did get jobs, clear backoff on resource types
|
|
|
|
//
|
2008-12-31 23:07:59 +00:00
|
|
|
for (i=0; i<new_results.size(); i++) {
|
|
|
|
RESULT* rp = new_results[i];
|
2009-01-03 06:01:17 +00:00
|
|
|
if (rp->avp->ncudas) got_cuda = true;
|
|
|
|
else got_cpu = true;
|
2007-02-21 16:58:29 +00:00
|
|
|
}
|
2009-01-03 06:01:17 +00:00
|
|
|
if (got_cpu) p->cpu_pwf.clear_backoff();
|
|
|
|
if (got_cuda) p->cuda_pwf.clear_backoff();
|
2007-02-21 16:58:29 +00:00
|
|
|
}
|
|
|
|
|
2009-01-10 00:48:22 +00:00
|
|
|
void WORK_FETCH::set_initial_work_request() {
|
2009-01-03 06:01:17 +00:00
|
|
|
cpu_work_fetch.req_secs = 1;
|
|
|
|
if (coproc_cuda) {
|
|
|
|
coproc_cuda->req_secs = 1;
|
|
|
|
}
|
2008-12-31 23:07:59 +00:00
|
|
|
}
|
|
|
|
|
2009-01-08 00:41:15 +00:00
|
|
|
// called once, at client startup
|
|
|
|
//
|
2008-12-31 23:07:59 +00:00
|
|
|
void WORK_FETCH::init() {
|
|
|
|
cpu_work_fetch.rsc_type = RSC_TYPE_CPU;
|
|
|
|
cpu_work_fetch.ninstances = gstate.ncpus;
|
|
|
|
|
|
|
|
if (coproc_cuda) {
|
|
|
|
cuda_work_fetch.rsc_type = RSC_TYPE_CUDA;
|
|
|
|
cuda_work_fetch.ninstances = coproc_cuda->count;
|
|
|
|
cuda_work_fetch.speed = coproc_cuda->flops_estimate()/gstate.host_info.p_fpops;
|
2007-02-21 16:58:29 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-01-03 06:01:17 +00:00
|
|
|
void RSC_PROJECT_WORK_FETCH::backoff(PROJECT* p, char* name) {
|
|
|
|
if (backoff_interval) {
|
|
|
|
backoff_interval *= 2;
|
|
|
|
if (backoff_interval > 86400) backoff_interval = 86400;
|
|
|
|
} else {
|
|
|
|
backoff_interval = 60;
|
|
|
|
}
|
|
|
|
backoff_time = gstate.now + backoff_interval;
|
|
|
|
if (log_flags.work_fetch_debug) {
|
|
|
|
msg_printf(p, MSG_INFO,
|
|
|
|
"[wfd] backing off %s %f", name, backoff_interval
|
|
|
|
);
|
|
|
|
}
|
2008-12-31 23:30:38 +00:00
|
|
|
}
|
|
|
|
|
2008-12-31 23:07:59 +00:00
|
|
|
////////////////////////
|
|
|
|
|
2007-02-21 16:58:29 +00:00
|
|
|
void CLIENT_STATE::compute_nuploading_results() {
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
for (i=0; i<projects.size(); i++) {
|
|
|
|
projects[i]->nuploading_results = 0;
|
|
|
|
}
|
|
|
|
for (i=0; i<results.size(); i++) {
|
|
|
|
RESULT* rp = results[i];
|
|
|
|
if (rp->state() == RESULT_FILES_UPLOADING) {
|
|
|
|
rp->project->nuploading_results++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-02-21 22:27:35 +00:00
|
|
|
bool PROJECT::runnable() {
|
|
|
|
if (suspended_via_gui) return false;
|
|
|
|
for (unsigned int i=0; i<gstate.results.size(); i++) {
|
|
|
|
RESULT* rp = gstate.results[i];
|
|
|
|
if (rp->project != this) continue;
|
|
|
|
if (rp->runnable()) return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool PROJECT::downloading() {
|
|
|
|
if (suspended_via_gui) return false;
|
|
|
|
for (unsigned int i=0; i<gstate.results.size(); i++) {
|
|
|
|
RESULT* rp = gstate.results[i];
|
|
|
|
if (rp->project != this) continue;
|
|
|
|
if (rp->downloading()) return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool PROJECT::some_result_suspended() {
|
|
|
|
unsigned int i;
|
|
|
|
for (i=0; i<gstate.results.size(); i++) {
|
|
|
|
RESULT *rp = gstate.results[i];
|
|
|
|
if (rp->project != this) continue;
|
|
|
|
if (rp->suspended_via_gui) return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2008-12-31 23:07:59 +00:00
|
|
|
bool PROJECT::can_request_work() {
|
2007-02-21 22:27:35 +00:00
|
|
|
if (suspended_via_gui) return false;
|
|
|
|
if (master_url_fetch_pending) return false;
|
|
|
|
if (min_rpc_time > gstate.now) return false;
|
|
|
|
if (dont_request_more_work) return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool PROJECT::potentially_runnable() {
|
|
|
|
if (runnable()) return true;
|
2008-12-31 23:07:59 +00:00
|
|
|
if (can_request_work()) return true;
|
2007-02-21 22:27:35 +00:00
|
|
|
if (downloading()) return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool PROJECT::nearly_runnable() {
|
|
|
|
if (runnable()) return true;
|
|
|
|
if (downloading()) return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2008-12-31 23:07:59 +00:00
|
|
|
bool RSC_PROJECT_WORK_FETCH::overworked() {
|
|
|
|
return debt < -gstate.global_prefs.cpu_scheduling_period();
|
2007-02-21 22:27:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool RESULT::runnable() {
|
|
|
|
if (suspended_via_gui) return false;
|
|
|
|
if (project->suspended_via_gui) return false;
|
|
|
|
if (state() != RESULT_FILES_DOWNLOADED) return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool RESULT::nearly_runnable() {
|
|
|
|
return runnable() || downloading();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return true if the result is waiting for its files to download,
|
|
|
|
// and nothing prevents this from happening soon
|
|
|
|
//
|
|
|
|
bool RESULT::downloading() {
|
|
|
|
if (suspended_via_gui) return false;
|
|
|
|
if (project->suspended_via_gui) return false;
|
|
|
|
if (state() > RESULT_FILES_DOWNLOADING) return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2008-12-02 03:58:32 +00:00
|
|
|
double RESULT::estimated_duration_uncorrected() {
|
|
|
|
return wup->rsc_fpops_est/avp->flops;
|
2007-02-21 22:27:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// estimate how long a result will take on this host
|
|
|
|
//
|
2007-05-15 20:29:26 +00:00
|
|
|
#ifdef SIM
|
2008-12-02 03:58:32 +00:00
|
|
|
double RESULT::estimated_duration(bool for_work_fetch) {
|
2007-05-15 20:29:26 +00:00
|
|
|
SIM_PROJECT* spp = (SIM_PROJECT*)project;
|
|
|
|
if (dual_dcf && for_work_fetch && spp->completions_ratio_mean) {
|
2008-12-02 03:58:32 +00:00
|
|
|
return estimated_duration_uncorrected()*spp->completions_ratio_mean;
|
2007-05-15 20:29:26 +00:00
|
|
|
}
|
2008-12-02 03:58:32 +00:00
|
|
|
return estimated_duration_uncorrected()*project->duration_correction_factor;
|
2008-10-14 21:16:04 +00:00
|
|
|
}
|
|
|
|
#else
|
2008-12-02 03:58:32 +00:00
|
|
|
double RESULT::estimated_duration(bool) {
|
|
|
|
return estimated_duration_uncorrected()*project->duration_correction_factor;
|
2007-02-21 22:27:35 +00:00
|
|
|
}
|
2008-10-14 21:16:04 +00:00
|
|
|
#endif
|
2007-02-21 22:27:35 +00:00
|
|
|
|
2008-12-02 03:58:32 +00:00
|
|
|
double RESULT::estimated_time_remaining(bool for_work_fetch) {
|
2007-02-21 22:27:35 +00:00
|
|
|
if (computing_done()) return 0;
|
|
|
|
ACTIVE_TASK* atp = gstate.lookup_active_task_by_result(this);
|
|
|
|
if (atp) {
|
2008-12-02 03:58:32 +00:00
|
|
|
return atp->est_time_to_completion(for_work_fetch);
|
2007-02-21 22:27:35 +00:00
|
|
|
}
|
2008-12-02 03:58:32 +00:00
|
|
|
return estimated_duration(for_work_fetch);
|
2007-02-21 22:27:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Returns the estimated CPU time to completion (in seconds) of this task.
|
|
|
|
// Compute this as a weighted average of estimates based on
|
|
|
|
// 1) the workunit's flops count
|
|
|
|
// 2) the current reported CPU time and fraction done
|
|
|
|
//
|
2008-12-02 03:58:32 +00:00
|
|
|
double ACTIVE_TASK::est_time_to_completion(bool for_work_fetch) {
|
2007-02-21 22:27:35 +00:00
|
|
|
if (fraction_done >= 1) return 0;
|
2008-12-02 03:58:32 +00:00
|
|
|
double wu_est = result->estimated_duration(for_work_fetch);
|
2007-02-21 22:27:35 +00:00
|
|
|
if (fraction_done <= 0) return wu_est;
|
2008-12-02 17:48:29 +00:00
|
|
|
double frac_est = (elapsed_time / fraction_done) - elapsed_time;
|
2007-02-21 22:27:35 +00:00
|
|
|
double fraction_left = 1-fraction_done;
|
2009-01-03 06:01:17 +00:00
|
|
|
double wu_weight = fraction_left * fraction_left;
|
|
|
|
double fd_weight = 1 - wu_weight;
|
2008-12-02 22:19:39 +00:00
|
|
|
double x = fd_weight*frac_est + wu_weight*fraction_left*wu_est;
|
2007-05-08 01:55:28 +00:00
|
|
|
return x;
|
2007-02-21 22:27:35 +00:00
|
|
|
}
|
|
|
|
|
2008-12-31 23:07:59 +00:00
|
|
|
// the fraction of time a given CPU is working for BOINC
|
|
|
|
//
|
|
|
|
double CLIENT_STATE::overall_cpu_frac() {
|
|
|
|
double running_frac = time_stats.on_frac * time_stats.active_frac;
|
|
|
|
if (running_frac < 0.01) running_frac = 0.01;
|
|
|
|
if (running_frac > 1) running_frac = 1;
|
2009-01-03 06:01:17 +00:00
|
|
|
return running_frac;
|
2008-12-31 23:07:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// called when benchmarks change
|
|
|
|
//
|
|
|
|
void CLIENT_STATE::scale_duration_correction_factors(double factor) {
|
|
|
|
if (factor <= 0) return;
|
|
|
|
for (unsigned int i=0; i<projects.size(); i++) {
|
|
|
|
PROJECT* p = projects[i];
|
|
|
|
p->duration_correction_factor *= factor;
|
|
|
|
}
|
2009-01-03 06:01:17 +00:00
|
|
|
if (log_flags.cpu_sched_debug) {
|
|
|
|
msg_printf(NULL, MSG_INFO,
|
2008-12-31 23:07:59 +00:00
|
|
|
"[cpu_sched_debug] scaling duration correction factors by %f",
|
|
|
|
factor
|
|
|
|
);
|
2009-01-03 06:01:17 +00:00
|
|
|
}
|
2008-12-31 23:07:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Choose a new host CPID.
|
|
|
|
// If using account manager, do scheduler RPCs
|
|
|
|
// to all acct-mgr-attached projects to propagate the CPID
|
|
|
|
//
|
|
|
|
void CLIENT_STATE::generate_new_host_cpid() {
|
|
|
|
host_info.generate_host_cpid();
|
|
|
|
for (unsigned int i=0; i<projects.size(); i++) {
|
|
|
|
if (projects[i]->attached_via_acct_mgr) {
|
|
|
|
projects[i]->sched_rpc_pending = RPC_REASON_ACCT_MGR_REQ;
|
|
|
|
projects[i]->set_min_rpc_time(now + 15, "Sending new host CPID");
|
|
|
|
}
|
2007-02-21 22:27:35 +00:00
|
|
|
}
|
|
|
|
}
|
2007-02-21 16:58:29 +00:00
|
|
|
|