From 5039207e2c1712c3fb626a1eb4f1564707c211e2 Mon Sep 17 00:00:00 2001
From: David Anderson <davea@ssl.berkeley.edu>
Date: Tue, 21 Oct 2008 23:16:07 +0000
Subject: [PATCH] - scheduler: add <have_cuda_apps> config flag.     If set the
 "effective NCPUS" (which is used to scale     daily_result_quota and
 max_wus_in_progress)     is max'd with the # of CUDA GPUs.

svn path=/trunk/boinc/; revision=16246
---
 checkin_notes            | 13 ++++++++++
 sched/sched_array.cpp    |  2 +-
 sched/sched_config.cpp   |  1 +
 sched/sched_config.h     |  1 +
 sched/sched_locality.cpp | 24 +++++++++---------
 sched/sched_send.cpp     | 53 +++++++++++++++++++++++++---------------
 sched/sched_send.h       |  1 +
 sched/server_types.h     |  1 -
 8 files changed, 62 insertions(+), 34 deletions(-)
diff --git a/checkin_notes b/checkin_notes
index 9fc472f201..762fd39f20 100644
--- a/checkin_notes
+++ b/checkin_notes
@@ -8511,3 +8511,16 @@ David  21 Oct 2008
     - make "make distclean" work in all directories
 
     samples/*/Makefile
+
+David  21 Oct 2008
+    - scheduler: add <have_cuda_apps> config flag.
+        If set the "effective NCPUS" (which is used to scale
+        daily_result_quota and max_wus_in_progress)
+        is max'd with the # of CUDA GPUs.
+
+    sched/
+        sched_array.cpp
+        sched_config.cpp,h
+        sched_locality.cpp
+        sched_send.cpp,h
+        server_types.h
diff --git a/sched/sched_array.cpp b/sched/sched_array.cpp
index 120049a628..f7a05797a0 100644
--- a/sched/sched_array.cpp
+++ b/sched/sched_array.cpp
@@ -54,7 +54,7 @@ void scan_work_array(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) {
     rnd_off = rand() % ssp->max_wu_results;
     for (j=0; j<ssp->max_wu_results; j++) {
         i = (j+rnd_off) % ssp->max_wu_results;
-        if (!reply.work_needed()) break;
+        if (!work_needed(sreq, reply, false)) break;
 
         WU_RESULT& wu_result = ssp->wu_results[i];
 
diff --git a/sched/sched_config.cpp b/sched/sched_config.cpp
index 344e67abf9..f0d4d3f1ee 100644
--- a/sched/sched_config.cpp
+++ b/sched/sched_config.cpp
@@ -184,6 +184,7 @@ int SCHED_CONFIG::parse(FILE* f) {
         if (xp.parse_bool(tag, "use_credit_multiplier", use_credit_multiplier)) continue;
         if (xp.parse_bool(tag, "multiple_clients_per_host", multiple_clients_per_host)) continue;
         if (xp.parse_bool(tag, "no_vista_sandbox", no_vista_sandbox)) continue;
+        if (xp.parse_bool(tag, "have_cuda_apps", have_cuda_apps)) continue;
 
 
         if (xp.parse_bool(tag, "debug_version_select", debug_version_select)) continue;
diff --git a/sched/sched_config.h b/sched/sched_config.h
index 384217f795..d43ba175d2 100644
--- a/sched/sched_config.h
+++ b/sched/sched_config.h
@@ -125,6 +125,7 @@ public:
     bool use_credit_multiplier;
     bool multiple_clients_per_host;
     bool no_vista_sandbox;
+    bool have_cuda_apps;
 
     // log flags
     //
diff --git a/sched/sched_locality.cpp b/sched/sched_locality.cpp
index f4943484cd..e050aeced7 100644
--- a/sched/sched_locality.cpp
+++ b/sched/sched_locality.cpp
@@ -488,7 +488,7 @@ static int send_results_for_file(
 
     nsent = 0;
 
-    if (!reply.work_needed(true)) {
+    if (!work_needed(sreq, reply, true)) {
         return 0;
     }
 
@@ -519,7 +519,7 @@ static int send_results_for_file(
     for (i=0; i<100; i++) {     // avoid infinite loop
         int query_retval;
 
-        if (!reply.work_needed(true)) break;
+        if (!work_needed(sreq, reply, true)) break;
 
         log_messages.printf(MSG_DEBUG,
             "in_send_results_for_file(%s, %d) prev_result.id=%d\n", filename, i, prev_result.id
@@ -740,7 +740,7 @@ static int send_new_file_work_deterministic_seeded(
 
         if (retval==ERR_NO_APP_VERSION || retval==ERR_INSUFFICIENT_RESOURCE) return retval;
 
-        if (nsent>0 || !reply.work_needed(true)) break; 
+        if (nsent>0 || !work_needed(sreq, reply, true)) break; 
         // construct a name which is lexically greater than the name of any result
         // which uses this file.
         sprintf(min_resultname, "%s__~", filename);
@@ -793,7 +793,7 @@ static int send_new_file_work_deterministic(
 
     // continue deterministic search at lexically first possible
     // filename, continue to randomly choosen one
-    if (!getfile_retval && reply.work_needed(true)) {
+    if (!getfile_retval && work_needed(sreq, reply, true)) {
         send_new_file_work_deterministic_seeded(
             sreq, reply, nsent, "", start_filename
         );
@@ -834,7 +834,7 @@ static int send_new_file_work(
     SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply
 ) {
 
-    while (reply.work_needed(true)) {
+    while (work_needed(sreq, reply, true)) {
         int retval_sow, retval_snfwws;
         double frac=((double)rand())/(double)RAND_MAX;
         int now   = time(0);
@@ -857,7 +857,7 @@ static int send_new_file_work(
         if (retval_sow==ERR_NO_APP_VERSION || retval_sow==ERR_INSUFFICIENT_RESOURCE) return retval_sow;
 
     
-        while (reply.work_needed(true) && retry<5) {
+        while (work_needed(sreq, reply, true) && retry<5) {
             log_messages.printf(MSG_DEBUG,
                 "send_new_file_work(%d): try to send from working set\n", retry
             );
@@ -867,7 +867,7 @@ static int send_new_file_work(
 
         }    
 
-        if (reply.work_needed(true)) {
+        if (work_needed(sreq, reply, true)) {
             log_messages.printf(MSG_DEBUG,
                 "send_new_file_work(): try deterministic method\n"
             );
@@ -878,7 +878,7 @@ static int send_new_file_work(
                 return 1;
             }
         }
-    } // while reply.work_needed(true)
+    } // while reply.work_needed(sreq, reply, true)
     return 0;
 }
 
@@ -898,7 +898,7 @@ static int send_old_work(
     DB_RESULT result;
     int now=time(0);
 
-    if (!reply.work_needed(true)) {
+    if (!work_needed(sreq, reply, true)) {
         return 0;
     }
 
@@ -1067,7 +1067,7 @@ void send_work_locality(
         int k = (i+j)%nfiles;
         int retval_srff;
 
-        if (!reply.work_needed(true)) break;
+        if (!work_needed(sreq, reply, true)) break;
         FILE_INFO& fi = sreq.file_infos[k];
         retval_srff=send_results_for_file(
             fi.name, nsent, sreq, reply, false
@@ -1081,7 +1081,7 @@ void send_work_locality(
         // If the work was not sent for other (dynamic) reason such as insufficient
         // cpu, then DON'T delete the file.
         //
-        if (nsent == 0 && reply.work_needed(true) && config.file_deletion_strategy == 1) {
+        if (nsent == 0 && work_needed(sreq, reply, true) && config.file_deletion_strategy == 1) {
             reply.file_deletes.push_back(fi);
             log_messages.printf(MSG_DEBUG,
                 "[HOST#%d]: delete file %s (not needed)\n", reply.host.id, fi.name
@@ -1104,7 +1104,7 @@ void send_work_locality(
 
     // send new files if needed
     //
-    if (reply.work_needed(true)) {
+    if (work_needed(sreq, reply, true)) {
         send_new_file_work(sreq, reply);
     }
 }
diff --git a/sched/sched_send.cpp b/sched/sched_send.cpp
index 172896f8ba..9049beef83 100644
--- a/sched/sched_send.cpp
+++ b/sched/sched_send.cpp
@@ -77,10 +77,21 @@ const char* infeasible_string(int code) {
 const int MIN_SECONDS_TO_SEND = 0;
 const int MAX_SECONDS_TO_SEND = (28*SECONDS_IN_DAY);
 
-inline int effective_ncpus(HOST& host) {
-    int ncpus = host.p_ncpus;
+// return a number that
+// - is the # of CPUs in EDF simulation
+// - scales the daily result quota
+// - scales max_wus_in_progress
+
+inline int effective_ncpus(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) {
+    int ncpus = reply.host.p_ncpus;
     if (ncpus > config.max_ncpus) ncpus = config.max_ncpus;
     if (ncpus < 1) ncpus = 1;
+    if (config.have_cuda_apps) {
+        COPROC* cp = sreq.coprocs.lookup("cuda");
+        if (cp && cp->count > ncpus) {
+            ncpus = cp->count;
+        }
+    }
     return ncpus;
 }
 
@@ -653,7 +664,7 @@ int wu_is_infeasible_fast(
         }
         IP_RESULT candidate("", wu.delay_bound, est_cpu);
         strcpy(candidate.name, wu.name);
-        if (check_candidate(candidate, effective_ncpus(reply.host), request.ip_results)) {
+        if (check_candidate(candidate, effective_ncpus(request, reply), request.ip_results)) {
             // it passed the feasibility test,
             // but don't add it the the workload yet;
             // wait until we commit to sending it
@@ -833,47 +844,49 @@ void unlock_sema() {
 // and we haven't exceeded result per RPC limit,
 // and we haven't exceeded results per day limit
 //
-bool SCHEDULER_REPLY::work_needed(bool locality_sched) {
+bool work_needed(
+    SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply, bool locality_sched
+) {
     if (locality_sched) {
         // if we've failed to send a result because of a transient condition,
         // return false to preserve invariant
         //
-        if (wreq.disk.insufficient || wreq.speed.insufficient || wreq.mem.insufficient || wreq.no_allowed_apps_available) {
+        if (reply.wreq.disk.insufficient || reply.wreq.speed.insufficient || reply.wreq.mem.insufficient || reply.wreq.no_allowed_apps_available) {
             return false;
         }
     }
-    if (wreq.seconds_to_fill <= 0) return false;
-    if (wreq.disk_available <= 0) {
+    if (reply.wreq.seconds_to_fill <= 0) return false;
+    if (reply.wreq.disk_available <= 0) {
         return false;
     }
-    if (wreq.nresults >= config.max_wus_to_send) return false;
+    if (reply.wreq.nresults >= config.max_wus_to_send) return false;
 
-    int ncpus = effective_ncpus(host);
+    int ncpus = effective_ncpus(sreq, reply);
 
     // host.max_results_day is between 1 and config.daily_result_quota inclusive
     // wreq.daily_result_quota is between ncpus
     // and ncpus*host.max_results_day inclusive
     //
     if (config.daily_result_quota) {
-        if (host.max_results_day == 0 || host.max_results_day>config.daily_result_quota) {
-            host.max_results_day = config.daily_result_quota;
+        if (reply.host.max_results_day == 0 || reply.host.max_results_day>config.daily_result_quota) {
+            reply.host.max_results_day = config.daily_result_quota;
         }
-        wreq.daily_result_quota = ncpus*host.max_results_day;
-        if (host.nresults_today >= wreq.daily_result_quota) {
-            wreq.daily_result_quota_exceeded = true;
+        reply.wreq.daily_result_quota = ncpus*reply.host.max_results_day;
+        if (reply.host.nresults_today >= reply.wreq.daily_result_quota) {
+            reply.wreq.daily_result_quota_exceeded = true;
             return false;
         }
     }
 
     if (config.max_wus_in_progress) {
-        if (wreq.nresults_on_host >= config.max_wus_in_progress*ncpus) {
+        if (reply.wreq.nresults_on_host >= config.max_wus_in_progress*ncpus) {
             if (config.debug_send) {
                 log_messages.printf(MSG_DEBUG,
                     "in-progress job limit exceeded; %d > %d*%d\n",
-                    wreq.nresults_on_host, config.max_wus_in_progress, ncpus
+                    reply.wreq.nresults_on_host, config.max_wus_in_progress, ncpus
                 );
             }
-            wreq.cache_size_exceeded = true;
+            reply.wreq.cache_size_exceeded = true;
             return false;
         }
     }
@@ -1022,7 +1035,7 @@ int add_result_to_reply(
     result.bavp = bavp;
     reply.insert_result(result);
     reply.wreq.seconds_to_fill -= wu_seconds_filled;
-    request.estimated_delay += wu_seconds_filled/effective_ncpus(reply.host);
+    request.estimated_delay += wu_seconds_filled/effective_ncpus(request, reply);
     reply.wreq.nresults++;
     reply.wreq.nresults_on_host++;
     if (!resent_result) reply.host.nresults_today++;
@@ -1407,7 +1420,7 @@ void send_work(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) {
 
     if (config.workload_sim && sreq.have_other_results_list) {
         init_ip_results(
-            sreq.global_prefs.work_buf_min(), effective_ncpus(reply.host), sreq.ip_results
+            sreq.global_prefs.work_buf_min(), effective_ncpus(sreq, reply), sreq.ip_results
         );
     }
 
@@ -1450,7 +1463,7 @@ struct JOB_SET {
         disk_usage = 0;
         disk_limit = reply.wreq.disk_available;
         max_jobs = config.max_wus_to_send;
-        int ncpus = effective_ncpus(reply.host), n;
+        int ncpus = effective_ncpus(sreq, reply), n;
 
         if (config.daily_result_quota) {
             if (reply.host.max_results_day == 0 || reply.host.max_results_day>config.daily_result_quota) {
diff --git a/sched/sched_send.h b/sched/sched_send.h
index 81182bfdf0..a136710061 100644
--- a/sched/sched_send.h
+++ b/sched/sched_send.h
@@ -62,3 +62,4 @@ extern void lock_sema();
 extern void unlock_sema();
 extern const char* infeasible_string(int);
 extern bool app_not_selected(WORKUNIT&, SCHEDULER_REQUEST&, SCHEDULER_REPLY&);
+extern bool work_needed(SCHEDULER_REQUEST&, SCHEDULER_REPLY&, bool);
diff --git a/sched/server_types.h b/sched/server_types.h
index 85d2bbb555..9ed4cf8122 100644
--- a/sched/server_types.h
+++ b/sched/server_types.h
@@ -363,7 +363,6 @@ struct SCHEDULER_REPLY {
     void insert_workunit_unique(WORKUNIT&);
     void insert_result(RESULT&);
     void insert_message(USER_MESSAGE&);
-    bool work_needed(bool locality_sched=false);
     void set_delay(double);
     void got_good_result();     // adjust max_results_day
     void got_bad_result();      // adjust max_results_day