From 0b5bae4cc98660538b76842dea8b5cf4a16d06f6 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Sat, 20 Apr 2019 13:46:55 -0700 Subject: [PATCH] client: fix work fetch bug when max_concurrent used For projects P with MC restrictions, during RR simulation, we keep track of the max # of instances used by P, subject to the restrictions, and use that to calculate its "MC shortfall". Problem: if P doesn't have any jobs, the max # instances is zero, so MC shortfall is zero, so we erroneously don't request work for P. Solution: initialize max # of instances to the min of the restrictions; we'll always be able to use at least that many instances. --- client/app_config.cpp | 1 - client/work_fetch.cpp | 6 +++--- client/work_fetch.h | 2 +- lib/cc_config.cpp | 4 ++++ lib/cc_config.h | 3 +++ 5 files changed, 11 insertions(+), 5 deletions(-) diff --git a/client/app_config.cpp b/client/app_config.cpp index cfdb0047e1..0f57af9760 100644 --- a/client/app_config.cpp +++ b/client/app_config.cpp @@ -117,7 +117,6 @@ void max_concurrent_init() { // static void clear_app_config(PROJECT* p) { p->app_configs.clear(); - p->report_results_immediately = false; for (unsigned int i=0; iproject != p) continue; diff --git a/client/work_fetch.cpp b/client/work_fetch.cpp index 685fb319d7..0c2dc9f728 100644 --- a/client/work_fetch.cpp +++ b/client/work_fetch.cpp @@ -70,14 +70,14 @@ inline bool has_coproc_app(PROJECT* p, int rsc_type) { /////////////// RSC_PROJECT_WORK_FETCH /////////////// -void RSC_PROJECT_WORK_FETCH::rr_init() { +void RSC_PROJECT_WORK_FETCH::rr_init(PROJECT *p) { fetchable_share = 0; n_runnable_jobs = 0; sim_nused = 0; nused_total = 0; deadlines_missed = 0; mc_shortfall = 0; - max_nused = 0; + max_nused = p->app_configs.project_min_mc; } void RSC_PROJECT_WORK_FETCH::resource_backoff(PROJECT* p, const char* name) { @@ -415,7 +415,7 @@ void WORK_FETCH::rr_init() { PROJECT* p = gstate.projects[i]; p->pwf.rr_init(p); for (int j=0; jrsc_pwf[j].rr_init(); + p->rsc_pwf[j].rr_init(p); } } } diff --git a/client/work_fetch.h b/client/work_fetch.h index 57bb924f23..9dc6bc9f8c 100644 --- a/client/work_fetch.h +++ b/client/work_fetch.h @@ -151,7 +151,7 @@ struct RSC_PROJECT_WORK_FETCH { } RSC_REASON compute_rsc_project_reason(PROJECT*, int rsc_type); void resource_backoff(PROJECT*, const char*); - void rr_init(); + void rr_init(PROJECT*); void clear_backoff() { backoff_time = 0; backoff_interval = 0; diff --git a/lib/cc_config.cpp b/lib/cc_config.cpp index 37a397324b..2367b9ee51 100644 --- a/lib/cc_config.cpp +++ b/lib/cc_config.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #endif #include "common_defs.h" @@ -688,6 +689,7 @@ int CC_CONFIG::write(MIOFILE& out, LOG_FLAGS& log_flags) { // app_config.xml stuff bool have_max_concurrent = false; + // does any project have a max concurrent restriction? int APP_CONFIG::parse_gpu_versions( XML_PARSER& xp, MSG_VEC& mv, LOG_FLAGS& log_flags @@ -804,6 +806,7 @@ int APP_CONFIGS::parse(XML_PARSER& xp, MSG_VEC& mv, LOG_FLAGS& log_flags) { app_configs.push_back(ac); if (ac.max_concurrent) { project_has_mc = true; + project_min_mc = project_min_mc?std::min(project_min_mc, ac.max_concurrent):ac.max_concurrent; } continue; } @@ -819,6 +822,7 @@ int APP_CONFIGS::parse(XML_PARSER& xp, MSG_VEC& mv, LOG_FLAGS& log_flags) { have_max_concurrent = true; project_has_mc = true; project_max_concurrent = n; + project_min_mc = project_min_mc?std::min(project_min_mc, n):n; } continue; } diff --git a/lib/cc_config.h b/lib/cc_config.h index e153e65a2d..a39c414f18 100644 --- a/lib/cc_config.h +++ b/lib/cc_config.h @@ -242,6 +242,8 @@ struct APP_CONFIGS { int project_max_concurrent; bool project_has_mc; // have app- or project-level max concurrent restriction + int project_min_mc; + // the min of these restrictions bool report_results_immediately; int parse(XML_PARSER&, MSG_VEC&, LOG_FLAGS&); @@ -253,6 +255,7 @@ struct APP_CONFIGS { app_version_configs.clear(); project_max_concurrent = 0; project_has_mc = false; + project_min_mc = 0; report_results_immediately = false; } };