From 0b5bae4cc98660538b76842dea8b5cf4a16d06f6 Mon Sep 17 00:00:00 2001
From: David Anderson <davea@ssl.berkeley.edu>
Date: Sat, 20 Apr 2019 13:46:55 -0700
Subject: [PATCH] client: fix work fetch bug when max_concurrent used

For projects P with MC restrictions, during RR simulation,
we keep track of the max # of instances used by P,
subject to the restrictions, and use that to calculate its "MC shortfall".

Problem: if P doesn't have any jobs, the max # instances is zero,
so MC shortfall is zero, so we erroneously don't request work for P.

Solution: initialize max # of instances to the min of the restrictions;
we'll always be able to use at least that many instances.
---
 client/app_config.cpp | 1 -
 client/work_fetch.cpp | 6 +++---
 client/work_fetch.h   | 2 +-
 lib/cc_config.cpp     | 4 ++++
 lib/cc_config.h       | 3 +++
 5 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/client/app_config.cpp b/client/app_config.cpp
index cfdb0047e1..0f57af9760 100644
--- a/client/app_config.cpp
+++ b/client/app_config.cpp
@@ -117,7 +117,6 @@ void max_concurrent_init() {
 //
 static void clear_app_config(PROJECT* p) {
     p->app_configs.clear();
-    p->report_results_immediately = false;
     for (unsigned int i=0; i<gstate.apps.size(); i++) {
         APP* app = gstate.apps[i];
         if (app->project != p) continue;
diff --git a/client/work_fetch.cpp b/client/work_fetch.cpp
index 685fb319d7..0c2dc9f728 100644
--- a/client/work_fetch.cpp
+++ b/client/work_fetch.cpp
@@ -70,14 +70,14 @@ inline bool has_coproc_app(PROJECT* p, int rsc_type) {
 
 ///////////////  RSC_PROJECT_WORK_FETCH  ///////////////
 
-void RSC_PROJECT_WORK_FETCH::rr_init() {
+void RSC_PROJECT_WORK_FETCH::rr_init(PROJECT *p) {
     fetchable_share = 0;
     n_runnable_jobs = 0;
     sim_nused = 0;
     nused_total = 0;
     deadlines_missed = 0;
     mc_shortfall = 0;
-    max_nused = 0;
+    max_nused = p->app_configs.project_min_mc;
 }
 
 void RSC_PROJECT_WORK_FETCH::resource_backoff(PROJECT* p, const char* name) {
@@ -415,7 +415,7 @@ void WORK_FETCH::rr_init() {
         PROJECT* p = gstate.projects[i];
         p->pwf.rr_init(p);
         for (int j=0; j<coprocs.n_rsc; j++) {
-            p->rsc_pwf[j].rr_init();
+            p->rsc_pwf[j].rr_init(p);
         }
     }
 }
diff --git a/client/work_fetch.h b/client/work_fetch.h
index 57bb924f23..9dc6bc9f8c 100644
--- a/client/work_fetch.h
+++ b/client/work_fetch.h
@@ -151,7 +151,7 @@ struct RSC_PROJECT_WORK_FETCH {
     }
     RSC_REASON compute_rsc_project_reason(PROJECT*, int rsc_type);
     void resource_backoff(PROJECT*, const char*);
-    void rr_init();
+    void rr_init(PROJECT*);
     void clear_backoff() {
         backoff_time = 0;
         backoff_interval = 0;
diff --git a/lib/cc_config.cpp b/lib/cc_config.cpp
index 37a397324b..2367b9ee51 100644
--- a/lib/cc_config.cpp
+++ b/lib/cc_config.cpp
@@ -24,6 +24,7 @@
 #include <cstdio>
 #include <cstring>
 #include <unistd.h>
+#include <algorithm>
 #endif
 
 #include "common_defs.h"
@@ -688,6 +689,7 @@ int CC_CONFIG::write(MIOFILE& out, LOG_FLAGS& log_flags) {
 // app_config.xml stuff
 
 bool have_max_concurrent = false;
+    // does any project have a max concurrent restriction?
 
 int APP_CONFIG::parse_gpu_versions(
     XML_PARSER& xp, MSG_VEC& mv, LOG_FLAGS& log_flags
@@ -804,6 +806,7 @@ int APP_CONFIGS::parse(XML_PARSER& xp, MSG_VEC& mv, LOG_FLAGS& log_flags) {
             app_configs.push_back(ac);
             if (ac.max_concurrent) {
                 project_has_mc = true;
+                project_min_mc = project_min_mc?std::min(project_min_mc, ac.max_concurrent):ac.max_concurrent;
             }
             continue;
         }
@@ -819,6 +822,7 @@ int APP_CONFIGS::parse(XML_PARSER& xp, MSG_VEC& mv, LOG_FLAGS& log_flags) {
                 have_max_concurrent = true;
                 project_has_mc = true;
                 project_max_concurrent = n;
+                project_min_mc = project_min_mc?std::min(project_min_mc, n):n;
             }
             continue;
         }
diff --git a/lib/cc_config.h b/lib/cc_config.h
index e153e65a2d..a39c414f18 100644
--- a/lib/cc_config.h
+++ b/lib/cc_config.h
@@ -242,6 +242,8 @@ struct APP_CONFIGS {
     int project_max_concurrent;
     bool project_has_mc;
         // have app- or project-level max concurrent restriction
+    int project_min_mc;
+        // the min of these restrictions
     bool report_results_immediately;
 
     int parse(XML_PARSER&, MSG_VEC&, LOG_FLAGS&);
@@ -253,6 +255,7 @@ struct APP_CONFIGS {
         app_version_configs.clear();
         project_max_concurrent = 0;
         project_has_mc = false;
+        project_min_mc = 0;
         report_results_immediately = false;
     }
 };