client: fix work fetch bug when max_concurrent used

For projects P with MC restrictions, during RR simulation, we keep track of the max # of instances used by P, subject to the restrictions, and use that to calculate its "MC shortfall". Problem: if P doesn't have any jobs, the max # instances is zero, so MC shortfall is zero, so we erroneously don't request work for P. Solution: initialize max # of instances to the min of the restrictions; we'll always be able to use at least that many instances.
2019-04-20 13:46:55 -07:00 · 2019-04-20 13:46:55 -07:00 · 0b5bae4cc9
parent 4dce65a9dd
commit 0b5bae4cc9
5 changed files with 11 additions and 5 deletions
--- a/client/app_config.cpp
+++ b/client/app_config.cpp
@ -117,7 +117,6 @@ void max_concurrent_init() {
 //
 static void clear_app_config(PROJECT* p) {
    p->app_configs.clear();
-    p->report_results_immediately = false;
    for (unsigned int i=0; i<gstate.apps.size(); i++) {
        APP* app = gstate.apps[i];
        if (app->project != p) continue;
--- a/client/work_fetch.cpp
+++ b/client/work_fetch.cpp
@ -70,14 +70,14 @@ inline bool has_coproc_app(PROJECT* p, int rsc_type) {

 ///////////////  RSC_PROJECT_WORK_FETCH  ///////////////

-void RSC_PROJECT_WORK_FETCH::rr_init() {
+void RSC_PROJECT_WORK_FETCH::rr_init(PROJECT *p) {
    fetchable_share = 0;
    n_runnable_jobs = 0;
    sim_nused = 0;
    nused_total = 0;
    deadlines_missed = 0;
    mc_shortfall = 0;
-    max_nused = 0;
+    max_nused = p->app_configs.project_min_mc;
 }

 void RSC_PROJECT_WORK_FETCH::resource_backoff(PROJECT* p, const char* name) {
@ -415,7 +415,7 @@ void WORK_FETCH::rr_init() {
        PROJECT* p = gstate.projects[i];
        p->pwf.rr_init(p);
        for (int j=0; j<coprocs.n_rsc; j++) {
-            p->rsc_pwf[j].rr_init();
+            p->rsc_pwf[j].rr_init(p);
        }
    }
 }
--- a/client/work_fetch.h
+++ b/client/work_fetch.h
@ -151,7 +151,7 @@ struct RSC_PROJECT_WORK_FETCH {
    }
    RSC_REASON compute_rsc_project_reason(PROJECT*, int rsc_type);
    void resource_backoff(PROJECT*, const char*);
-    void rr_init();
+    void rr_init(PROJECT*);
    void clear_backoff() {
        backoff_time = 0;
        backoff_interval = 0;
--- a/lib/cc_config.cpp
+++ b/lib/cc_config.cpp
@ -24,6 +24,7 @@
 #include <cstdio>
 #include <cstring>
 #include <unistd.h>
+#include <algorithm>
 #endif

 #include "common_defs.h"
@ -688,6 +689,7 @@ int CC_CONFIG::write(MIOFILE& out, LOG_FLAGS& log_flags) {
 // app_config.xml stuff

 bool have_max_concurrent = false;
+    // does any project have a max concurrent restriction?

 int APP_CONFIG::parse_gpu_versions(
    XML_PARSER& xp, MSG_VEC& mv, LOG_FLAGS& log_flags
@ -804,6 +806,7 @@ int APP_CONFIGS::parse(XML_PARSER& xp, MSG_VEC& mv, LOG_FLAGS& log_flags) {
            app_configs.push_back(ac);
            if (ac.max_concurrent) {
                project_has_mc = true;
+                project_min_mc = project_min_mc?std::min(project_min_mc, ac.max_concurrent):ac.max_concurrent;
            }
            continue;
        }
@ -819,6 +822,7 @@ int APP_CONFIGS::parse(XML_PARSER& xp, MSG_VEC& mv, LOG_FLAGS& log_flags) {
                have_max_concurrent = true;
                project_has_mc = true;
                project_max_concurrent = n;
+                project_min_mc = project_min_mc?std::min(project_min_mc, n):n;
            }
            continue;
        }
--- a/lib/cc_config.h
+++ b/lib/cc_config.h
@ -242,6 +242,8 @@ struct APP_CONFIGS {
    int project_max_concurrent;
    bool project_has_mc;
        // have app- or project-level max concurrent restriction
+    int project_min_mc;
+        // the min of these restrictions
    bool report_results_immediately;

    int parse(XML_PARSER&, MSG_VEC&, LOG_FLAGS&);
@ -253,6 +255,7 @@ struct APP_CONFIGS {
        app_version_configs.clear();
        project_max_concurrent = 0;
        project_has_mc = false;
+        project_min_mc = 0;
        report_results_immediately = false;
    }
 };