client: fix work fetch bug when max_concurrent used

For projects P with MC restrictions, during RR simulation,
we keep track of the max # of instances used by P,
subject to the restrictions, and use that to calculate its "MC shortfall".

Problem: if P doesn't have any jobs, the max # instances is zero,
so MC shortfall is zero, so we erroneously don't request work for P.

Solution: initialize max # of instances to the min of the restrictions;
we'll always be able to use at least that many instances.
This commit is contained in:
David Anderson 2019-04-20 13:46:55 -07:00
parent 4dce65a9dd
commit 0b5bae4cc9
5 changed files with 11 additions and 5 deletions

View File

@ -117,7 +117,6 @@ void max_concurrent_init() {
//
static void clear_app_config(PROJECT* p) {
p->app_configs.clear();
p->report_results_immediately = false;
for (unsigned int i=0; i<gstate.apps.size(); i++) {
APP* app = gstate.apps[i];
if (app->project != p) continue;

View File

@ -70,14 +70,14 @@ inline bool has_coproc_app(PROJECT* p, int rsc_type) {
/////////////// RSC_PROJECT_WORK_FETCH ///////////////
void RSC_PROJECT_WORK_FETCH::rr_init() {
void RSC_PROJECT_WORK_FETCH::rr_init(PROJECT *p) {
fetchable_share = 0;
n_runnable_jobs = 0;
sim_nused = 0;
nused_total = 0;
deadlines_missed = 0;
mc_shortfall = 0;
max_nused = 0;
max_nused = p->app_configs.project_min_mc;
}
void RSC_PROJECT_WORK_FETCH::resource_backoff(PROJECT* p, const char* name) {
@ -415,7 +415,7 @@ void WORK_FETCH::rr_init() {
PROJECT* p = gstate.projects[i];
p->pwf.rr_init(p);
for (int j=0; j<coprocs.n_rsc; j++) {
p->rsc_pwf[j].rr_init();
p->rsc_pwf[j].rr_init(p);
}
}
}

View File

@ -151,7 +151,7 @@ struct RSC_PROJECT_WORK_FETCH {
}
RSC_REASON compute_rsc_project_reason(PROJECT*, int rsc_type);
void resource_backoff(PROJECT*, const char*);
void rr_init();
void rr_init(PROJECT*);
void clear_backoff() {
backoff_time = 0;
backoff_interval = 0;

View File

@ -24,6 +24,7 @@
#include <cstdio>
#include <cstring>
#include <unistd.h>
#include <algorithm>
#endif
#include "common_defs.h"
@ -688,6 +689,7 @@ int CC_CONFIG::write(MIOFILE& out, LOG_FLAGS& log_flags) {
// app_config.xml stuff
bool have_max_concurrent = false;
// does any project have a max concurrent restriction?
int APP_CONFIG::parse_gpu_versions(
XML_PARSER& xp, MSG_VEC& mv, LOG_FLAGS& log_flags
@ -804,6 +806,7 @@ int APP_CONFIGS::parse(XML_PARSER& xp, MSG_VEC& mv, LOG_FLAGS& log_flags) {
app_configs.push_back(ac);
if (ac.max_concurrent) {
project_has_mc = true;
project_min_mc = project_min_mc?std::min(project_min_mc, ac.max_concurrent):ac.max_concurrent;
}
continue;
}
@ -819,6 +822,7 @@ int APP_CONFIGS::parse(XML_PARSER& xp, MSG_VEC& mv, LOG_FLAGS& log_flags) {
have_max_concurrent = true;
project_has_mc = true;
project_max_concurrent = n;
project_min_mc = project_min_mc?std::min(project_min_mc, n):n;
}
continue;
}

View File

@ -242,6 +242,8 @@ struct APP_CONFIGS {
int project_max_concurrent;
bool project_has_mc;
// have app- or project-level max concurrent restriction
int project_min_mc;
// the min of these restrictions
bool report_results_immediately;
int parse(XML_PARSER&, MSG_VEC&, LOG_FLAGS&);
@ -253,6 +255,7 @@ struct APP_CONFIGS {
app_version_configs.clear();
project_max_concurrent = 0;
project_has_mc = false;
project_min_mc = 0;
report_results_immediately = false;
}
};