From b076a947fcc3cc0b99a8591a240ee29322d7198a Mon Sep 17 00:00:00 2001 From: David Anderson Date: Sun, 9 Mar 2014 17:09:21 -0700 Subject: [PATCH] client: work fetch tweak to avoid starvation in a particular case My commit of Feb 7 caused work fetch to project P to be deferred for up to 5 min if an upload to P is active, even if some instances are idle. This was to deal with a case where the idleness was caused by a jobs-in-progress limit by P, and work requests lead to long backoff. However, this can cause instances to be idle unnecessarily. I changed things so that, if instances are idle, a work fetch can happen even during upload. But only one such fetch will be done. --- client/cs_apps.cpp | 4 ++++ client/cs_scheduler.cpp | 10 ++++++++-- client/work_fetch.cpp | 1 + client/work_fetch.h | 4 ++++ 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/client/cs_apps.cpp b/client/cs_apps.cpp index b107c53f65..88f0d4a3c8 100644 --- a/client/cs_apps.cpp +++ b/client/cs_apps.cpp @@ -205,6 +205,10 @@ int CLIENT_STATE::app_finished(ACTIVE_TASK& at) { double elapsed_time = now - rec_interval_start; work_fetch.accumulate_inst_sec(&at, elapsed_time); + rp->project->pwf.request_if_idle_and_uploading = true; + // set this to allow work fetch if idle instance, + // even before upload finishes + return 0; } diff --git a/client/cs_scheduler.cpp b/client/cs_scheduler.cpp index 69313350db..d016b14437 100644 --- a/client/cs_scheduler.cpp +++ b/client/cs_scheduler.cpp @@ -534,7 +534,13 @@ bool CLIENT_STATE::scheduler_rpc_poll() { p = work_fetch.choose_project(); if (p) { if (actively_uploading(p)) { - //if (!idle_request()) { + bool dont_request = true; + if (p->pwf.request_if_idle_and_uploading) { + if (idle_request()) { + dont_request = false; + } + } + if (dont_request) { if (log_flags.work_fetch_debug) { msg_printf(p, MSG_INFO, "[work_fetch] deferring work fetch; upload active" @@ -542,7 +548,7 @@ bool CLIENT_STATE::scheduler_rpc_poll() { } p->sched_rpc_pending = 0; return false; - //} + } } scheduler_op->init_op_project(p, RPC_REASON_NEED_WORK); return true; diff --git a/client/work_fetch.cpp b/client/work_fetch.cpp index fa21178ad2..08843dfa17 100644 --- a/client/work_fetch.cpp +++ b/client/work_fetch.cpp @@ -955,6 +955,7 @@ void WORK_FETCH::handle_reply( p->rsc_pwf[i].clear_backoff(); } } + p->pwf.request_if_idle_and_uploading = false; } // set up for initial RPC. diff --git a/client/work_fetch.h b/client/work_fetch.h index 75f7d09f29..b58a77193a 100644 --- a/client/work_fetch.h +++ b/client/work_fetch.h @@ -274,6 +274,10 @@ struct PROJECT_WORK_FETCH { int cant_fetch_work_reason; int compute_cant_fetch_work_reason(PROJECT*); int n_runnable_jobs; + bool request_if_idle_and_uploading; + // Set when a job finishes. + // If we're uploading but a resource is idle, make a work request. + // If this succeeds, clear the flag. PROJECT_WORK_FETCH() { memset(this, 0, sizeof(*this)); }