From 6241fff21fc1c3799b379caf8621e035a743847e Mon Sep 17 00:00:00 2001 From: David Anderson Date: Fri, 20 Feb 2009 21:44:39 +0000 Subject: [PATCH] - client: new work-fetch policy: 1) if an instance is idle, get work from highest-debt project, even if it's overworked. 2) if resource has a shortfall, get work from highest-debt non-overworked project 3) if there's a fetchable non-overworked project with no runnable jobs, get from from the highest-debt one. (each step is done first for GPU, then CPU) Clause 3) is new. It will cause the client to get jobs for as many projects as possible, even if there is no shortfall. This is necessary to make the notion of "overworked" meaningful (otherwise, any project with long jobs can become overworked). It also maintains as much variety as possible (like pre-6.6 clients). Also (small bug fix) if a project is overworked for resource R, request work for R only in case 1). svn path=/trunk/boinc/; revision=17327 --- checkin_notes | 22 +++++++++ client/work_fetch.cpp | 112 +++++++++++++++++++++++++++++++++--------- client/work_fetch.h | 2 +- 3 files changed, 112 insertions(+), 24 deletions(-) diff --git a/checkin_notes b/checkin_notes index f3363c3f0a..71f3622282 100644 --- a/checkin_notes +++ b/checkin_notes @@ -1787,3 +1787,25 @@ David 20 Feb 2009 cpu_sched.cpp sim.h work_fetch.cpp,h + +David 20 Feb 2009 + - client: new work-fetch policy: + 1) if an instance is idle, get work from highest-debt project, + even if it's overworked. + 2) if resource has a shortfall, get work from highest-debt + non-overworked project + 3) if there's a fetchable non-overworked project with no runnable jobs, + get from from the highest-debt one. + (each step is done first for GPU, then CPU) + Clause 3) is new. + It will cause the client to get jobs for as many projects as possible, + even if there is no shortfall. + This is necessary to make the notion of "overworked" meaningful + (otherwise, any project with long jobs can become overworked). + It also maintains as much variety as possible (like pre-6.6 clients). + + Also (small bug fix) if a project is overworked for resource R, + request work for R only in case 1). + + client/ + work_fetch.cpp,h diff --git a/client/work_fetch.cpp b/client/work_fetch.cpp index 98488c400c..c550757e54 100644 --- a/client/work_fetch.cpp +++ b/client/work_fetch.cpp @@ -140,17 +140,24 @@ bool RSC_PROJECT_WORK_FETCH::overworked() { return (debt < -x); } -// choose the best project to ask for work for this resource. -// If not urgent, don't choose an overworked project. +// Choose the best project to ask for work for this resource, +// given some constraints. // -PROJECT* RSC_WORK_FETCH::choose_project(bool urgent) { +PROJECT* RSC_WORK_FETCH::choose_project( + bool allow_overworked, // consider overworked projects + bool only_starved // consider only starved projects +) { PROJECT* pbest = NULL; for (unsigned i=0; ipwf.can_fetch_work) continue; if (!project_state(p).may_have_work) continue; - if (!urgent && project_state(p).overworked()) { + RSC_PROJECT_WORK_FETCH& rpwf = project_state(p); + if (!allow_overworked && rpwf.overworked()) { + continue; + } + if (only_starved && rpwf.has_runnable_jobs) { continue; } if (pbest) { @@ -247,7 +254,8 @@ static bool has_a_job(PROJECT* p) { return false; } -// we're going to contact this project; decide how much work to request +// we're going to contact this project reasons other than work fetch; +// decide if we should piggy-back a work fetch request. // void WORK_FETCH::compute_work_request(PROJECT* p) { clear_request(); @@ -259,7 +267,7 @@ void WORK_FETCH::compute_work_request(PROJECT* p) { return; } - // check if this is the project we'd ask for work anyway + // see if this is the project we'd ask for work anyway // PROJECT* pbest = choose_project(); if (p == pbest) return; @@ -298,43 +306,95 @@ PROJECT* WORK_FETCH::choose_project() { gstate.rr_simulation(); set_overall_debts(); - bool request_cpu = true; - bool request_cuda = (coproc_cuda != NULL); + bool cpu_emergency = false; + bool cuda_emergency = false; - // if a resource is currently idle, get work for it; - // give GPU priority over CPU + // If a resource is idle, it's an "emergency"; + // get work for it from the project with greatest LTD, + // even if it's overworked. + // Give GPU priority over CPU // if (coproc_cuda && cuda_work_fetch.nidle_now) { - p = cuda_work_fetch.choose_project(true); + p = cuda_work_fetch.choose_project(true, false); if (p) { - request_cpu = false; + cuda_emergency = true; + if (log_flags.work_fetch_debug) { + msg_printf(p, MSG_INFO, "chosen: CUDA idle instance"); + } } } if (!p && cpu_work_fetch.nidle_now) { - p = cpu_work_fetch.choose_project(true); + p = cpu_work_fetch.choose_project(true, false); if (p) { - request_cuda = false; + cpu_emergency = true; + if (log_flags.work_fetch_debug) { + msg_printf(p, MSG_INFO, "chosen: CPU idle instance"); + } } } - // if a resource has a shortfall, get work for it. + // If a resource has a shortfall, + // get work for it from the non-overworked project with greatest LTD. // if (!p && coproc_cuda && cuda_work_fetch.shortfall) { - p = cuda_work_fetch.choose_project(false); + p = cuda_work_fetch.choose_project(false, false); + if (p) { + if (log_flags.work_fetch_debug) { + msg_printf(p, MSG_INFO, "chosen: CUDA shortfall"); + } + } } if (!p && cpu_work_fetch.shortfall) { - p = cpu_work_fetch.choose_project(false); + p = cpu_work_fetch.choose_project(false, false); + if (p) { + if (log_flags.work_fetch_debug) { + msg_printf(p, MSG_INFO, "chosen: CPU shortfall"); + } + } + } + + // If any project is not overworked and has no runnable jobs, + // get work from the one with greatest LTD. + // + if (!p && coproc_cuda) { + p = cuda_work_fetch.choose_project(false, true); + if (p) { + if (log_flags.work_fetch_debug) { + msg_printf(p, MSG_INFO, "chosen: project has no CUDA jobs"); + } + } + } + if (!p) { + p = cpu_work_fetch.choose_project(false, true); + if (p) { + if (log_flags.work_fetch_debug) { + msg_printf(p, MSG_INFO, "chosen: project has no CPU jobs"); + } + } } // decide how much work to request for each resource // clear_request(); if (p) { - if (request_cpu) { + // in emergency cases, get work only for that resource + // + if (cpu_emergency) { cpu_work_fetch.set_request(p); - } - if (request_cuda) { + } else if (cuda_emergency) { cuda_work_fetch.set_request(p); + } else { + // in non-emergency cases, get work for any resource + // for which the project is not overworked + // + if (!cpu_work_fetch.project_state(p).overworked()) { + cpu_work_fetch.set_request(p); + } + if (coproc_cuda) { + if (!cuda_work_fetch.project_state(p).overworked()) { + cuda_work_fetch.set_request(p); + } + } } } if (coproc_cuda) { @@ -355,7 +415,6 @@ PROJECT* WORK_FETCH::choose_project() { } void RSC_WORK_FETCH::set_request(PROJECT* p) { - if (!shortfall) return; RSC_PROJECT_WORK_FETCH& w = project_state(p); if (p->duration_correction_factor < 0.02 || p->duration_correction_factor > 80.0) { @@ -364,10 +423,9 @@ void RSC_WORK_FETCH::set_request(PROJECT* p) { // req_secs = 1; } else { - // otherwise ask for the max of the project's share and the shortfall + // otherwise ask for the project's share // req_secs = gstate.work_buf_total()*w.fetchable_share; - if (req_secs > shortfall) req_secs = shortfall; } req_instances = (int)ceil(w.fetchable_share*nidle_now); } @@ -459,6 +517,7 @@ void RSC_WORK_FETCH::update_debts() { bool first = true; for (i=0; inon_cpu_intensive) continue; RSC_PROJECT_WORK_FETCH& w = project_state(p); if (w.debt_eligible(p, *this)) { double share_frac = p->resource_share/ders; @@ -484,6 +543,13 @@ void RSC_WORK_FETCH::update_debts() { max_debt = w.debt; } } + } else { + if (log_flags.debt_debug) { + msg_printf(p, MSG_INFO, + "[debt] %s ineligible; debt %.2f", + rsc_name(rsc_type), w.debt + ); + } } } diff --git a/client/work_fetch.h b/client/work_fetch.h index 2b25203d2b..e8826e0213 100644 --- a/client/work_fetch.h +++ b/client/work_fetch.h @@ -118,7 +118,7 @@ struct RSC_WORK_FETCH { void rr_init(); void accumulate_shortfall(double d_time); void update_estimated_delay(double dt); - PROJECT* choose_project(bool urgent); + PROJECT* choose_project(bool allow_overworked, bool only_starved); void accumulate_debt(); RSC_PROJECT_WORK_FETCH& project_state(PROJECT*); void update_debts();