diff --git a/checkin_notes b/checkin_notes
index c45e0d4810..0f7da882e3 100755
--- a/checkin_notes
+++ b/checkin_notes
@@ -23737,3 +23737,38 @@ Bruce 2 Feb 2005
sched/
server_types.C
+
+Bruce 2 Feb 2005
+ - Improvements to the file deletion mechanism. Now try removing files if
+ no work was sent to hosts, and available space<0 OR if available space>0
+ but work was unfeasible because the disk bound requirements of the work
+ exceeded the available space.
+ - Added a new config.xml boolean element called 'choose_download_url_by_timezone'
+ This requires that projects provide a 2-column file in the project root named
+ 'download_servers'. An example is:
+ 3600 http://einstein.aei.mpg.de
+ -21600 http://einstein.phys.uwm.edu
+ The first column is offset from UTC in seconds, and the second column is the URL
+ of the download server. When enabled, the scheduler will replace the download
+ path for data and executables by a list of download URLs, ordered by proximity
+ to the host's timezone. The download path must start with the
+ BOINC default download/ and the different download servers must have identical
+ file paths under download/, in other words they must be mirrored.
+ - Really exciting news, David: I have finally gotten emacs to obey your
+ indentations and formatting conventions, apart from doing this:
+ some_function(foo, bar
+ );
+ instead of this:
+ some_function(foo, bar
+ );
+ Emacs experts, advice appreciated!
+
+ download/
+ configuration.php
+ sched/
+ sched_config.h
+ sched_config.C
+ sched_send.C
+ handle_request.C
+
+
diff --git a/doc/configuration.php b/doc/configuration.php
index aa96fa0f59..0bb4479a23 100644
--- a/doc/configuration.php
+++ b/doc/configuration.php
@@ -34,16 +34,17 @@ htmlspecialchars("
http://A/URL
/path/to/directory
- [ ]
- [ ]
- [ ]
- [ N ]
- [ N ]
- [ N ]
- [ ]
- [ ]
- [ N ]
- [ N ]
+ [ ]
+ [ ]
+ [ N ]
+ [ N ]
+ [ N ]
+ [ ]
+ [ ]
+ [ N ]
+ [ N ]
../
@@ -194,7 +195,20 @@ list_item("min_core_client_version",
a version number less than this,
it returns an error message and doesn't do any other processing."
);
-
+list_item("choose_download_url_by_timezone",
+ "When the scheduler sends work to hosts, it replaces the download
+ URL appearing in the data and executable file descriptions with
+ the download URL closest to the host's timezone. The project
+ must provide a two-column file called 'download_servers' in the
+ project root directory. This is a list of all download servers
+ that will be inserted when work is sent to hosts. The first column
+ is an integer listing the server's offset in seconds from UTC.
+ The second column is the server URL in the format such as
+ http://einstein.phys.uwm.edu. The download servers must
+ have identical file hierarchies and contents, and the path to
+ file and executables must start with '/download/...' as in
+ 'http://einstein.phys.uwm.edu/download/123/some_file_name'."
+);
list_end();
// THE INFORMATION BELOW NEEDS TO BE ORGANIZED AND PUT INTO TABLES OR SOME OTHER LESS CRAMPED FORM
diff --git a/sched/handle_request.C b/sched/handle_request.C
index cbb4f5fa58..a8f8d9ad92 100644
--- a/sched/handle_request.C
+++ b/sched/handle_request.C
@@ -908,7 +908,7 @@ extern double watch_diskspace[3];
int delete_file_from_host(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply) {
int nfiles = (int)sreq.file_infos.size();
char buf[256];
-
+
if (!nfiles) {
log_messages.printf(
SCHED_MSG_LOG::CRITICAL,
@@ -949,11 +949,10 @@ int delete_file_from_host(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply) {
SCHED_MSG_LOG::DEBUG,
"[HOST#%d]: delete file %s (make space)\n", sreply.host.id, fi.name
);
- // give host an hour to nuke the file and come back. This might
+ // give host 4 hours to nuke the file and come back. This might
// in general be too soon, since host needs to complete any work
// that depends upon this file, before it will be removed by core client.
//
-
sprintf(buf, "Removing file %s to free up disk space", fi.name);
USER_MESSAGE um(buf, "low");
sreply.insert_message(um);
@@ -970,11 +969,11 @@ void debug_sched(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply, const char *t
}
sprintf(tmpfilename, "sched_reply_%06d_%06d", sreq.hostid, sreq.rpc_seqno);
- // use _XXXXXX if you want random filenames rather than deterministic
- // mkstemp(tmpfilename);
-
+ // use _XXXXXX if you want random filenames rather than
+ // deterministic mkstemp(tmpfilename);
+
fp=fopen(tmpfilename, "w");
-
+
if (!fp) {
log_messages.printf(
SCHED_MSG_LOG::CRITICAL,
@@ -982,15 +981,15 @@ void debug_sched(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply, const char *t
);
return;
}
-
+
log_messages.printf(
SCHED_MSG_LOG::DEBUG,
"Found %s, so writing %s\n", trigger, tmpfilename
);
-
+
sreply.write(fp);
fclose(fp);
-
+
sprintf(tmpfilename, "sched_request_%06d_%06d", sreq.hostid, sreq.rpc_seqno);
fp=fopen(tmpfilename, "w");
@@ -1040,21 +1039,26 @@ void handle_request(
sreply.insert_message(um);
sreply.nucleus_only = true;
}
-
+
// if we got no work, and we have no file space, delete some files
- if (sreply.results.size()==0 && max_allowable_disk(sreq)<0) {
- // try to delete a file to make more space. Also give some
- // hints to the user about what's going wrong (lack of disk
- // space).
- delete_file_from_host(sreq, sreply);
+ if (sreply.results.size()==0 && (sreply.wreq.insufficient_disk || sreply.wreq.disk_available<0)) {
+ // try to delete a file to make more space. Also give some
+ // hints to the user about what's going wrong (lack of disk
+ // space).
+ delete_file_from_host(sreq, sreply);
}
-
+
#if 1
- if (sreply.results.size()==0) {
+ // You can call debug_sched() for whatever situation is of
+ // interest to you. It won't do anything unless you create
+ // (touch) the file 'debug_sched' in the project root directory.
+ //
+ if (sreply.results.size()==0 && sreply.hostid && sreq.work_req_seconds>1.0)
+ debug_sched(sreq, sreply, "../debug_sched");
+ else if (max_allowable_disk(sreq)<0 || (sreply.wreq.insufficient_disk || sreply.wreq.disk_available<0))
debug_sched(sreq, sreply, "../debug_sched");
- }
#endif
-
+
sreply.write(fout);
}
diff --git a/sched/sched_config.C b/sched/sched_config.C
index b107842c52..f5aec5b5a8 100644
--- a/sched/sched_config.C
+++ b/sched/sched_config.C
@@ -90,6 +90,8 @@ int SCHED_CONFIG::parse(char* buf) {
parse_int(buf, "", locality_scheduling_wait_period);
parse_int(buf, "", locality_scheduling_send_timeout);
parse_int(buf, "", min_core_client_version);
+ parse_bool(buf, "choose_download_url_by_timezone", choose_download_url_by_timezone);
+
if (match_tag(buf, "")) {
char hostname[256];
gethostname(hostname, 256);
diff --git a/sched/sched_config.h b/sched/sched_config.h
index 18c0267c57..acf85b0b9e 100644
--- a/sched/sched_config.h
+++ b/sched/sched_config.h
@@ -59,6 +59,7 @@ public:
int locality_scheduling_wait_period;
int locality_scheduling_send_timeout;
int min_core_client_version;
+ bool choose_download_url_by_timezone;
int parse(char*);
int parse_file(char* dir=".");
diff --git a/sched/sched_send.C b/sched/sched_send.C
index 1b3f76a1c8..18069899b1 100644
--- a/sched/sched_send.C
+++ b/sched/sched_send.C
@@ -23,6 +23,8 @@
#include
#include
#include
+#include
+
using namespace std;
#include "error_numbers.h"
@@ -34,6 +36,7 @@ using namespace std;
#include "sched_msgs.h"
#include "sched_send.h"
#include "sched_locality.h"
+#include "../lib/parse.h"
#ifdef _USING_FCGI_
#include "fcgi_stdio.h"
@@ -229,12 +232,186 @@ int insert_after(char* buffer, char* after, char* text) {
return 0;
}
+typedef struct urltag {
+ int zone;
+ char name[124];
+} URLTYPE;
+
+
+// these global variables are needed to pass information into the
+// compare function below.
+static int tzone=0;
+static int hostid=0;
+
+static int compare(const void *x, const void *y) {
+ URLTYPE *a=(URLTYPE *)x;
+ URLTYPE *b=(URLTYPE *)y;
+
+ char longname[512];
+
+ if (abs(tzone - (a->zone))zone)))
+ return -1;
+
+ if (abs(tzone - (a->zone))>abs(tzone - (b->zone)))
+ return +1;
+
+ // In order to ensure uniform distribution, we hash paths that are
+ // equidistant from the host's timezone in a way that gives a
+ // unique ordering for each host but which is effectively random
+ // between hosts.
+ sprintf(longname, "%s%d", a->name, hostid);
+ std::string sa = md5_string((const unsigned char *)longname, strlen((const char *)longname));
+ sprintf(longname, "%s%d", b->name, hostid);
+ std::string sb = md5_string((const unsigned char *)longname, strlen((const char *)longname));
+ int xa = strtol(sa.substr(1, 7).c_str(), 0, 16);
+ int xb = strtol(sb.substr(1, 7).c_str(), 0, 16);
+
+ if (xaxb)
+ return 1;
+
+ return 0;
+}
+
+// file
+//
+static URLTYPE *cached=NULL;
+#define BLOCKSIZE 32
+URLTYPE* read_download_list(){
+ FILE *fp;
+ int count=0;
+ int i;
+
+ if (cached)
+ return cached;
+
+ if (!(fp=fopen("../download_servers", "r")))
+ return NULL;
+
+ // read in lines from file
+ while (1) {
+ // allocate memory in blocks
+ if ((count % BLOCKSIZE)==0) {
+ cached=(URLTYPE *)realloc(cached, (count+BLOCKSIZE)*sizeof(URLTYPE));
+ if (!cached)
+ return NULL;
+ }
+ // read timezone offset and URL from file, and store in cache
+ // list
+ if (2==fscanf(fp, "%d %s", &(cached[count].zone), cached[count].name))
+ count++;
+ else {
+ // provide a null terminator so we don't need to keep
+ // another global variable for count.
+ cached[count].name[0]='\0';
+ break;
+ }
+ }
+ fclose(fp);
+
+ if (!count) {
+ log_messages.printf(
+ SCHED_MSG_LOG::CRITICAL,
+ "file ../download_servers contained no valid entries!\n"
+ "format of this file is one or more lines containing:\n"
+ "TIMEZONE_OFFSET htt://some.url.path\n"
+ );
+ free(cached);
+ return NULL;
+ }
+
+ // sort URLs by distance from host timezone. See compare() above
+ // for details.
+ qsort(cached, count, sizeof(URLTYPE), compare);
+
+ log_messages.printf(
+ SCHED_MSG_LOG::DEBUG, "Sorted list of URLs follows\n"
+ );
+ for (i=0; i%s/%s", i?"\n ":"", serverlist[i].name, path);
+
+ // make a second copy in the same order
+ for (i=0; strlen(serverlist[i].name); i++)
+ start+=sprintf(start, "%s%s/%s", "\n ", serverlist[i].name, path);
+
+ return (start-buffer);
+}
+
+// returns zero on success, non-zero to indicate an error
+//
+int add_download_servers(char *old_xml, char *new_xml, int timezone) {
+ char *p, *q, *r;
+
+ p=r=old_xml;
+
+ // search for next URL to do surgery on
+ while ((q=strstr(p, ""))) {
+ char *s;
+ char path[1024];
+ int len = q-p;
+
+ strncpy(new_xml, p, len);
+
+ new_xml += len;
+
+ // locate next instance of
+ if (!(r=strstr(q, "")))
+ return 1;
+ r += strlen("");
+
+ // parse out the URL
+ if (!parse_str(q, "", path, 1024))
+ return 1;
+
+ // find start of 'download/'
+ if (!(s=strstr(path,"download/")))
+ return 1;
+
+ // insert new download list in place of the original one
+ len = make_download_list(new_xml, s, timezone);
+ if (len<0)
+ return 1;
+ new_xml += len;
+
+ // advance pointer to start looking for next tag.
+ p=r;
+ }
+
+ strcpy(new_xml, r);
+ return 0;
+}
+
// add elements to WU's xml_doc, in preparation for sending
// it to a client
//
int insert_wu_tags(WORKUNIT& wu, APP& app) {
char buf[LARGE_BLOB_SIZE];
-
+
sprintf(buf,
" %f\n"
" %f\n"
@@ -305,18 +482,40 @@ int add_wu_to_reply(
APP* app, APP_VERSION* avp
) {
int retval;
- WORKUNIT wu2;
-
+ WORKUNIT wu2, wu3;
+
// add the app, app_version, and workunit to the reply,
// but only if they aren't already there
//
if (avp) {
+ APP_VERSION av2=*avp, *avp2=&av2;
+
+ if (config.choose_download_url_by_timezone) {
+ // replace the download URL for apps with a list of
+ // multiple download servers.
+
+ // set these global variables, needed by the compare()
+ // function so that the download URL list can be sorted by
+ // timezone
+ tzone=reply.host.timezone;
+ hostid=reply.host.id;
+ retval = add_download_servers(avp->xml_doc, av2.xml_doc, reply.host.timezone);
+ if (retval) {
+ log_messages.printf(
+ SCHED_MSG_LOG::CRITICAL,
+ "add_download_servers(to APP version) failed\n"
+ );
+ // restore original WU!
+ av2=*avp;
+ }
+ }
+
reply.insert_app_unique(*app);
- reply.insert_app_version_unique(*avp);
+ reply.insert_app_version_unique(*avp2);
log_messages.printf(
SCHED_MSG_LOG::DEBUG,
"[HOST#%d] Sending app_version %s %s %d\n",
- reply.host.id, app->name, platform.name, avp->version_num
+ reply.host.id, app->name, platform.name, avp2->version_num
);
}
@@ -328,7 +527,29 @@ int add_wu_to_reply(
log_messages.printf(SCHED_MSG_LOG::NORMAL, "insert_wu_tags failed\n");
return retval;
}
- reply.insert_workunit_unique(wu2);
+ wu3=wu2;
+ if (config.choose_download_url_by_timezone) {
+ // replace the download URL for WU files with a list of
+ // multiple download servers.
+
+ // set these global variables, needed by the compare()
+ // function so that the download URL list can be sorted by
+ // timezone
+ tzone=reply.host.timezone;
+ hostid=reply.host.id;
+
+ retval = add_download_servers(wu2.xml_doc, wu3.xml_doc, reply.host.timezone);
+ if (retval) {
+ log_messages.printf(
+ SCHED_MSG_LOG::CRITICAL,
+ "add_download_servers(to WU) failed\n"
+ );
+ // restore original WU!
+ wu3=wu2;
+ }
+ }
+
+ reply.insert_workunit_unique(wu3);
return 0;
}