From c98a2415af2e10736af54c81065252b6a311f570 Mon Sep 17 00:00:00 2001
From: David Anderson <davea@ssl.berkeley.edu>
Date: Sun, 22 Oct 2006 00:42:44 +0000
Subject: [PATCH] *** empty log message ***

svn path=/trunk/boinc/; revision=11336
---
 checkin_notes          |  39 ++++++++++++
 db/boinc_db.C          |   5 +-
 db/boinc_db.h          |   1 +
 lib/parse.C            |  21 ++++---
 sched/assimilator.C    |   6 +-
 sched/feeder.C         |  12 ++--
 sched/sched_array.C    |  44 +++++++++----
 sched/sched_config.C   |   8 ++-
 sched/sched_config.h   |   4 ++
 sched/sched_locality.C |   2 +-
 sched/sched_send.C     | 136 ++++++++++++++++++++++++++++++++++-------
 sched/sched_send.h     |   4 +-
 sched/server_types.h   |  16 +++++
 13 files changed, 240 insertions(+), 58 deletions(-)
diff --git a/checkin_notes b/checkin_notes
index 4e1ae5c9b7..8b1d6a56cf 100755
--- a/checkin_notes
+++ b/checkin_notes
@@ -11331,3 +11331,42 @@ Charlie 21 Oct 2006
     mac_build/
         boinc.xcodeproj/
             project.pbxproj
+
+David  21 Oct 2006
+	- Server: apps can be labeled as "beta",
+		and jobs for them will be sent only to users
+		with "<beta>1</beta>" in their project preferences.
+		This lets you do application testing without
+		creating a separate project,
+		which has advantages both for you (1 projects instead of 2)
+		and for testers (their testing gives them points
+		in your main project)
+	- Scheduler: refined the criteria for "reliable" hosts
+		to which old results are sent,
+		and added a param for reducing delay bound
+		when sending results to reliable hosts
+	- add -sleep_interval option to assimilator
+	- move -reliable flag from feeder
+	- add to sched config:
+		reliable_min_avg_credit
+		reliable_min_avg_turnaround
+		reliable_time
+		reliable_reduced_delay_bound
+
+	(from Kevin Reed)
+
+	NOTES: updating your server software to this version or later requires
+		a small database change (see html/ops/db_update.php)
+
+	db/
+		boinc_db.C,h
+	lib/
+		parse.C
+	sched/
+		assimilator.C
+		feeder.C
+		sched_array.C
+		sched_config.C,h
+		sched_locality.C
+		sched_send.C,h
+		server_types.h
diff --git a/db/boinc_db.C b/db/boinc_db.C
index 9b8e817493..24e4da3b83 100644
--- a/db/boinc_db.C
+++ b/db/boinc_db.C
@@ -129,9 +129,9 @@ void DB_PLATFORM::db_parse(MYSQL_ROW &r) {
 void DB_APP::db_print(char* buf){
     sprintf(buf,
         "create_time=%d, name='%s', min_version=%d, "
-        "deprecated=%d, user_friendly_name='%s', homogeneous_redundancy=%d, weight=%f",
+        "deprecated=%d, user_friendly_name='%s', homogeneous_redundancy=%d, weight=%f, beta=%d",
         create_time, name, min_version,
-        deprecated?1:0, user_friendly_name, homogeneous_redundancy?1:0, weight
+        deprecated?1:0, user_friendly_name, homogeneous_redundancy?1:0, weight, beta
     );
 }
 
@@ -146,6 +146,7 @@ void DB_APP::db_parse(MYSQL_ROW &r) {
     strcpy2(user_friendly_name, r[i++]);
     homogeneous_redundancy = atoi(r[i++]);
     weight = atof(r[i++]);
+    beta = atoi(r[i++]);
 }
 
 void DB_APP_VERSION::db_print(char* buf){
diff --git a/db/boinc_db.h b/db/boinc_db.h
index dc872b7763..2855fb84e6 100755
--- a/db/boinc_db.h
+++ b/db/boinc_db.h
@@ -69,6 +69,7 @@ struct APP {
     bool homogeneous_redundancy;
     double weight;          // tells the feeder what fraction of results
                             // should come from this app
+    int beta;
 
     int write(FILE*);
     void clear();
diff --git a/lib/parse.C b/lib/parse.C
index 95c57a3c38..fb88a6d692 100644
--- a/lib/parse.C
+++ b/lib/parse.C
@@ -286,14 +286,19 @@ void extract_venue(char* in, char* venue_name, char* out) {
         q = strstr(out, "</venue");
         if (q) *q = 0;
     } else {
-        strcpy(out, in);
-        while (1) {
-            p = strstr(out, "<venue");
-            if (!p) break;
-            q = strstr(p, "</venue>\n");
-            if (!q) break;
-            strcpy(p, q+strlen("</venue>\n"));
-        }
+        q = in;
+        strcpy(out, "");
+       	while (1) {
+           	p = strstr(q, "<venue");
+           	if (!p) {
+       	        strcat(out, q);
+                break;
+            }
+           	strncat(out, q, p-q);
+           	q = strstr(p, "</venue>");
+           	if (!q) break;
+           	q += strlen("</venue>");
+       	}
     }
 }
 
diff --git a/sched/assimilator.C b/sched/assimilator.C
index 0b36b75f53..7c47b4f554 100644
--- a/sched/assimilator.C
+++ b/sched/assimilator.C
@@ -51,6 +51,8 @@ int wu_id_modulus=0, wu_id_remainder=0;
 
 #define SLEEP_INTERVAL 10
 
+int sleep_interval = SLEEP_INTERVAL;
+
 int one_pass_N_WU=0;
 
 // assimilate all WUs that need it
@@ -154,6 +156,8 @@ int main(int argc, char** argv) {
         } else if (!strcmp(argv[i], "-one_pass_N_WU")) {
             one_pass_N_WU = atoi(argv[++i]);
             one_pass = true;
+        } else if (!strcmp(argv[i], "-sleep_interval")) {
+            sleep_interval = atoi(argv[++i]);
         } else if (!strcmp(argv[i], "-one_pass")) {
             one_pass = true;
         } else if (!strcmp(argv[i], "-d")) {
@@ -214,7 +218,7 @@ int main(int argc, char** argv) {
     while (1) {
         if (!do_pass(app)) {
             if (one_pass) break;
-            sleep(SLEEP_INTERVAL);
+            sleep(sleep_interval);
         }
     }
 }
diff --git a/sched/feeder.C b/sched/feeder.C
index 5d727b5561..60ef070eac 100644
--- a/sched/feeder.C
+++ b/sched/feeder.C
@@ -32,7 +32,6 @@
 //  [ -purge_stale x ]    remove work items from the shared memory segment
 //                        that have been there for longer then x minutes
 //                        but haven't been assigned
-//  [ -reliable x ]       flag results for workunits older then x days as "need_reliable"
 //
 // Creates a shared memory segment containing DB info,
 // including the work array (results/workunits to send).
@@ -143,7 +142,6 @@ char select_clause[256];
 double sleep_interval = DEFAULT_SLEEP_INTERVAL;
 bool all_apps = false;
 int purge_stale_time = 0;
-int reliable_time = 0;
 
 void cleanup_shmem() {
     ssp->ready = false;
@@ -406,8 +404,8 @@ static void scan_work_array(
                 // workunits older then the specificed time as needing a reliable
                 // host
                 wu_result.need_reliable = 0;
-                if (reliable_time) {
-                	if ((wu_result.workunit.create_time + reliable_time*86400) <= time(0)) {
+                if (config.reliable_time) {
+                	if ((wu_result.workunit.create_time + config.reliable_time) <= time(0)) {
                 		wu_result.need_reliable = true;
                 	}
                 }
@@ -526,13 +524,11 @@ int main(int argc, char** argv) {
         } else if (!strcmp(argv[i], "-allapps")) {
             all_apps = true;
         } else if (!strcmp(argv[i], "-priority_order")) {
-            order_clause = "order by priority desc ";
+            order_clause = "order by result.priority desc ";
         } else if (!strcmp(argv[i], "-priority_order_create_time")) {
-            order_clause = "order by priority desc, workunit.create_time ";
+            order_clause = "order by result.priority desc, workunit.create_time ";
         } else if (!strcmp(argv[i], "-purge_stale")) {
             purge_stale_time = atoi(argv[++i])*60;
-        } else if (!strcmp(argv[i], "-reliable")) {
-            reliable_time = atoi(argv[++i]);
         } else if (!strcmp(argv[i], "-mod")) {
             int n = atoi(argv[++i]);
             int j = atoi(argv[++i]);
diff --git a/sched/sched_array.C b/sched/sched_array.C
index dbb57090c6..5e862698bd 100644
--- a/sched/sched_array.C
+++ b/sched/sched_array.C
@@ -79,24 +79,44 @@ void scan_work_array(
             continue;
         }
         
-        // If this is a reliable host and we are checking for results that
-        // need a reliable host, then continue if the result is a normal result
+        // If we are looking for beta results and result is not a beta result
+        // then move on
         //
-        if (reply.wreq.reliable_only && (!wu_result.need_reliable)) {
-        	continue;
+        APP* app = ss.lookup_app(wu_result.workunit.appid);
+        if (app == NULL) continue; // this should never happen
+        if (reply.wreq.beta_only) {
+        	if (!app->beta) {
+        		continue;
+        	}
+            log_messages.printf(SCHED_MSG_LOG::MSG_DEBUG,
+                "[HOST#%d] beta work found.  Result id %d \n",
+                reply.host.id 
+            );
+        } else {
+         	if (app->beta) {
+        		continue;
+        	}
         }
         
+        // If this is a reliable host and we are checking for results that
+        // need a reliable host, then continue if the result is a normal result
+        // skip if the app is beta (beta apps don't use the reliable mechanism)
+        //
+        if (!app->beta) {
+        	if (reply.wreq.reliable_only && (!wu_result.need_reliable)) {
+        		continue;
+        	} else if (!reply.wreq.reliable_only && wu_result.need_reliable) {
+        		continue;
+        	}
+        }
+        
+        // If we are looking for infeasible results and the result is not infeasiable
+        // then move on
+        //
         if (reply.wreq.infeasible_only && (wu_result.infeasible_count==0)) {
             continue;
         }
         
-        // Never send a result that needs a reliable host to one that 
-        // has not earned credit
-        //
-        if (wu_result.need_reliable && reply.host.total_credit == 0) {
-        	continue;
-        }
-
         // don't send if we're already sending a result for same WU
         //
         if (config.one_result_per_user_per_wu) {
@@ -108,7 +128,7 @@ void scan_work_array(
         // don't send if host can't handle it
         //
         wu = wu_result.workunit;
-        if (wu_is_infeasible(wu, sreq, reply)) {
+        if (wu_is_infeasible(wu, sreq, reply, ss)) {
            	log_messages.printf(
                	SCHED_MSG_LOG::MSG_DEBUG, "[HOST#%d] [WU#%d %s] WU is infeasible\n",
                	reply.host.id, wu.id, wu.name
diff --git a/sched/sched_config.C b/sched/sched_config.C
index f440463c9b..9af81e92e4 100644
--- a/sched/sched_config.C
+++ b/sched/sched_config.C
@@ -120,8 +120,12 @@ int SCHED_CONFIG::parse(FILE* f) {
         else if (xp.parse_bool(tag, "dont_delete_batches", dont_delete_batches)) continue;
         else if (xp.parse_int(tag, "sched_debug_level", sched_debug_level)) continue;
         else if (xp.parse_int(tag, "fuh_debug_level", fuh_debug_level)) continue;
-
-        // tags the scheduler doesn't care about
+        else if (xp.parse_int(tag, "reliable_min_avg_credit", reliable_min_avg_credit)) continue;
+        else if (xp.parse_int(tag, "reliable_min_avg_turnaround", reliable_min_avg_turnaround)) continue;
+        else if (xp.parse_int(tag, "reliable_time", reliable_time)) continue;
+        else if (xp.parse_double(tag, "reliable_reduced_delay_bound", reliable_reduced_delay_bound)) continue;
+		
+        // some tags that scheduler doesn't care about
         //
         else if (xp.parse_str(tag, "cgi_url", temp, sizeof(temp))) continue;
         else if (xp.parse_str(tag, "log_dir", temp, sizeof(temp))) continue;
diff --git a/sched/sched_config.h b/sched/sched_config.h
index 6bb49076dd..c0e5ed4cd1 100644
--- a/sched/sched_config.h
+++ b/sched/sched_config.h
@@ -76,6 +76,10 @@ public:
     bool dont_delete_batches;
     int sched_debug_level;
     int fuh_debug_level;
+    int reliable_time; // age of workunit before requiring reliable
+    int reliable_min_avg_credit;
+    int reliable_min_avg_turnaround;
+    double reliable_reduced_delay_bound;
 
     int parse(FILE*);
     int parse_file(const char* dir=".");
diff --git a/sched/sched_locality.C b/sched/sched_locality.C
index a7f96d16e5..ab5281a736 100644
--- a/sched/sched_locality.C
+++ b/sched/sched_locality.C
@@ -287,7 +287,7 @@ static int possibly_send_result(
     // why the WU is not feasible.  These are defined in sched_send.h.
     // INFEASIBLE_MEM, INFEASIBLE_DISK, INFEASIBLE_CPU.
     // 
-    if (wu_is_infeasible(wu, sreq, reply)) {
+    if (wu_is_infeasible(wu, sreq, reply, ss)) {
         return ERR_INSUFFICIENT_RESOURCE;
     }
 
diff --git a/sched/sched_send.C b/sched/sched_send.C
index 9df3f40864..843e871a28 100644
--- a/sched/sched_send.C
+++ b/sched/sched_send.C
@@ -248,25 +248,76 @@ static double estimate_wallclock_duration(
     return ewd;
 }
 
-// scan user's project prefs for elements of the form <app_id>N</app_id>,
-// indicating the apps they want to run.
+// Find or compute various details for the host.
+// These parameters affect how work is sent to the host
 //
-static int find_allowed_apps(
-    SCHEDULER_REPLY& reply, std::vector<int> *app_ids
-) {
+static int get_host_details(SCHEDULER_REPLY& reply) {
     char buf[8096];
    	std::string str;
    	extract_venue(reply.user.project_prefs, reply.host.venue, buf);
    	str = buf;
 	unsigned int pos = 0;
 	int temp_int;
+    USER_APP_DTL* app_dtl;
+
+    // scan user's project prefs for elements of the form <app_id>N</app_id>,
+    // indicating the apps they want to run.
+    //
 	while (parse_int(str.substr(pos,str.length()-pos).c_str(), "<app_id>", temp_int)) {
-		(*app_ids).push_back(temp_int);
+        app_dtl = new USER_APP_DTL();
+        app_dtl->appid = temp_int;
+        app_dtl->work_available=0;
+        reply.wreq.host_dtls.preferred_apps.push_back(app_dtl);
+
 		pos = str.find("<app_id>", pos) + 1;
 	}
+    temp_int = parse_int(buf,"<allow_beta_work>",temp_int);
+    reply.wreq.host_dtls.allow_beta_work = temp_int;
+ 
+    // Decide whether or not this computer is a 'reliable' computer
+    //
+    double expavg_credit = reply.host.expavg_credit;
+    double expavg_time = reply.host.expavg_time;
+    double avg_turnaround = reply.host.avg_turnaround;
+    update_average(0, 0, CREDIT_HALF_LIFE, expavg_credit, expavg_time);
+    if (strstr(reply.host.os_name,"Windows") || strstr(reply.host.os_name,"Linux")
+    ) {
+        if (((expavg_credit/reply.host.p_ncpus) > config.reliable_min_avg_credit || config.reliable_min_avg_credit == 0)
+            && (avg_turnaround < config.reliable_min_avg_turnaround || config.reliable_min_avg_turnaround == 0)
+        ){
+            reply.wreq.host_dtls.reliable = true;
+            log_messages.printf(SCHED_MSG_LOG::MSG_NORMAL,
+                "[HOST#%d] is reliable (OS = %s) expavg_credit = %.0f avg_turnaround(hours) = %.0f \n",
+                reply.host.id, reply.host.os_name, expavg_credit,
+                avg_turnaround/3600
+            );
+        }
+    } else {
+        if (((expavg_credit/reply.host.p_ncpus) > config.reliable_min_avg_credit*.75 || config.reliable_min_avg_credit == 0)
+            && (avg_turnaround < config.reliable_min_avg_turnaround*1.25 || config.reliable_min_avg_turnaround == 0)
+        ){
+            reply.wreq.host_dtls.reliable = true;
+            log_messages.printf(SCHED_MSG_LOG::MSG_NORMAL,
+                "[HOST#%d] is reliable (OS = %s) expavg_credit = %.0f avg_turnaround(hours) = %.0f \n",
+                reply.host.id, reply.host.os_name, expavg_credit,
+                avg_turnaround/3600
+            );
+        }
+    }
 	return 0;
 }
 
+int find_preferred_app_index(SCHEDULER_REPLY& reply, int appid) {
+    int result = -1;
+    for (int i=0; i<reply.wreq.host_dtls.preferred_apps.size(); i++) {
+        if (reply.wreq.host_dtls.preferred_apps[i]->appid == appid ) {
+            result = i;
+            break;
+        }
+    }
+    return result;
+}
+
 // if the WU can't be executed on the host, return a bitmap of reasons why.
 // Reasons include:
 // 1) the host doesn't have enough memory;
@@ -279,7 +330,8 @@ static int find_allowed_apps(
 // In particular it doesn't enforce the one-result-per-user-per-wu rule
 //
 int wu_is_infeasible(
-    WORKUNIT& wu, SCHEDULER_REQUEST& request, SCHEDULER_REPLY& reply
+    WORKUNIT& wu, SCHEDULER_REQUEST& request, SCHEDULER_REPLY& reply,
+    SCHED_SHMEM& ss
 ) {
     int reason = 0;
     unsigned int i;
@@ -288,17 +340,25 @@ int wu_is_infeasible(
     // If they have then only send work for the allowed applications
     // TODO: call find_allowed_apps() only once, not once for each WU!!
     //
-    std::vector<int> app_ids;
-    find_allowed_apps(reply, &app_ids);
-    if (app_ids.size() > 0) {
-    	bool app_allowed = false;
-    	for(i=0; i<app_ids.size(); i++) {
-    		if (wu.appid==app_ids[i]) {
+    bool app_allowed = false;
+    if (reply.wreq.host_dtls.preferred_apps.size() > 0) {
+        for (i=0; i<reply.wreq.host_dtls.preferred_apps.size(); i++) {
+            log_messages.printf(SCHED_MSG_LOG::MSG_DEBUG,
+                "Scanning preferred apps. index=%d, appid=%d, work_avail=%d\n",
+                i, reply.wreq.host_dtls.preferred_apps[i]->appid,
+                reply.wreq.host_dtls.preferred_apps[i]->work_available
+            );
+            if (wu.appid==reply.wreq.host_dtls.preferred_apps[i]->appid) {
+
     			app_allowed = true;
+                reply.wreq.host_dtls.preferred_apps[i]->work_available=1;
     			break;
     		}
     	}
-    	if (!app_allowed) {
+
+        // Only mark infeasible if we are looking at user preferred apps only
+        //
+        if (!app_allowed && !reply.wreq.beta_only) {
         	reply.wreq.no_allowed_apps_available = true;
     		reason |= INFEASIBLE_APP_SETTING;
 			log_messages.printf(SCHED_MSG_LOG::MSG_DEBUG,
@@ -661,10 +721,21 @@ int add_result_to_reply(
     result.sent_time = time(0);
     int old_server_state = result.server_state;
 
+    // If the workunit needs reliable and is being sent to a reliable host,
+    // then shorten the delay bound by the percent specified
+    //
+    int delay_bound = wu.delay_bound;
+    if (config.reliable_time && reply.wreq.host_dtls.reliable && config.reliable_reduced_delay_bound > 0.01) {
+        if ((wu.create_time + config.reliable_time) <= time(0)) {
+            delay_bound = (int) (delay_bound * config.reliable_reduced_delay_bound);
+        }
+    }
+
+
     if (result.server_state != RESULT_SERVER_STATE_IN_PROGRESS) {
         // We are sending this result for the first time
         //
-        result.report_deadline = result.sent_time + wu.delay_bound;
+        result.report_deadline = result.sent_time + delay_bound;
         result.server_state = RESULT_SERVER_STATE_IN_PROGRESS;
     } else {
         // Result was ALREADY sent to this host but never arrived.
@@ -677,8 +748,8 @@ int add_result_to_reply(
         if (result.report_deadline < result.sent_time) {
             result.report_deadline = result.sent_time + 10;
         }
-        if (result.report_deadline > result.sent_time + wu.delay_bound) {
-            result.report_deadline = result.sent_time + wu.delay_bound;
+        if (result.report_deadline > result.sent_time + delay_bound) {
+            result.report_deadline = result.sent_time + delay_bound;
         }
 
         log_messages.printf(
@@ -767,6 +838,8 @@ int send_work(
     reply.wreq.core_client_version = sreq.core_client_major_version*100
         + sreq.core_client_minor_version;
     reply.wreq.nresults = 0;
+    get_host_details(reply); // parse project prefs for app details
+    reply.wreq.beta_only = false;
 
     log_messages.printf(
         SCHED_MSG_LOG::MSG_NORMAL,
@@ -800,16 +873,27 @@ int send_work(
     } else {
     	// give top priority to results that require a 'reliable host'
         //
-        double expavg_credit = reply.host.expavg_credit;
-        double expavg_time = reply.host.expavg_time;
-        update_average(0, 0, CREDIT_HALF_LIFE, expavg_credit, expavg_time);
-        if ((expavg_credit/reply.host.p_ncpus) > 70) {
+        if (reply.wreq.host_dtls.reliable) {
         	reply.wreq.reliable_only = true;
         	reply.wreq.infeasible_only = false;
-            log_messages.printf(SCHED_MSG_LOG::MSG_DEBUG, "[HOST#%d] is reliable\n", reply.host.id);
         	scan_work_array(sreq, reply, platform, ss);
         }
     	reply.wreq.reliable_only = false;
+
+        // give 2nd priority to results that are for a beta app
+        // (projects should load beta work with care
+        // otherwise your users won't get production work done!
+        //
+        if (reply.wreq.host_dtls.allow_beta_work) {
+            reply.wreq.beta_only=true;
+            log_messages.printf(
+                SCHED_MSG_LOG::MSG_DEBUG,
+                "[HOST#%d] will accept beta work.  Scanning for beta work.\n",
+                reply.host.id
+            );
+            scan_work_array(sreq, reply, platform, ss);
+        }
+        reply.wreq.beta_only=false;
     	
         // give next priority to results that were infeasible for some other host
         //
@@ -935,6 +1019,14 @@ int send_work(
             reply.set_delay(delay_time);
         }
     }
+
+    // free memory
+    //
+    for (int i=0; i<reply.wreq.host_dtls.preferred_apps.size(); i++) {
+        delete(reply.wreq.host_dtls.preferred_apps[i]);
+    }
+    reply.wreq.host_dtls.preferred_apps.clear();
+
     return 0;
 }
 
diff --git a/sched/sched_send.h b/sched/sched_send.h
index 77463001a0..480204bc51 100644
--- a/sched/sched_send.h
+++ b/sched/sched_send.h
@@ -49,8 +49,8 @@ extern bool app_core_compatible(WORK_REQ& wreq, APP_VERSION& av);
 #define INFEASIBLE_WORK_BUF 8
 #define INFEASIBLE_APP_SETTING 16
 
-extern int wu_is_infeasible(WORKUNIT&, SCHEDULER_REQUEST&, SCHEDULER_REPLY&);
-
+extern int wu_is_infeasible(WORKUNIT&, SCHEDULER_REQUEST&, SCHEDULER_REPLY&, SCHED_SHMEM&);
+ 
 extern double max_allowable_disk(SCHEDULER_REQUEST&, SCHEDULER_REPLY&);
 
 extern bool wu_already_in_reply(WORKUNIT& wu, SCHEDULER_REPLY& reply);
diff --git a/sched/server_types.h b/sched/server_types.h
index 2c7e2b0a0e..450b3d8f32 100644
--- a/sched/server_types.h
+++ b/sched/server_types.h
@@ -27,11 +27,27 @@
 #include "common_defs.h"
 #include "md5_file.h"
 
+struct USER_APP_DTL {
+	int appid;
+	int reason;
+	int work_available;
+};
+
+// Details concerning a hosts settings
+//
+struct HOST_DETAILS {
+	int allow_beta_work;
+	bool reliable;
+	std::vector<USER_APP_DTL*> preferred_apps;
+};
+
 // summary of a client's request for work, and our response to it
 //
 struct WORK_REQ {
     bool infeasible_only;
     bool reliable_only;
+    bool beta_only;
+    HOST_DETAILS host_dtls;
     double seconds_to_fill;
 		// in "normalized CPU seconds" (see doc/work_req.php)
     double disk_available;