Improvements to the file deletion mechanism. Now try removing files if

no work was sent to hosts, and available space<0 OR if available space>0
      but work was unfeasible because the disk bound requirements of the work
      exceeded the available space.
Added a new config.xml boolean element called 'choose_download_url_by_timezone'
      This requires that projects provide a 2-column file in the project root named
      'download_servers'.  An example is:
           3600   http://einstein.aei.mpg.de
         -21600   http://einstein.phys.uwm.edu
      The first column is offset from UTC in seconds, and the second column is the URL
      of the download server.  When enabled, the scheduler will replace the download
      path for data and executables by a list of download URLs, ordered by proximity
      to the host's timezone.  The download path must start with the
      BOINC default download/ and the different download servers must have identical
      file paths under download/, in other words they must be mirrored.

svn path=/trunk/boinc/; revision=5275
This commit is contained in:
Bruce Allen 2005-02-02 18:13:00 +00:00
parent 02d0ac6fd8
commit a27705fd59
6 changed files with 314 additions and 37 deletions

View File

@ -23737,3 +23737,38 @@ Bruce 2 Feb 2005
sched/
server_types.C
Bruce 2 Feb 2005
- Improvements to the file deletion mechanism. Now try removing files if
no work was sent to hosts, and available space<0 OR if available space>0
but work was unfeasible because the disk bound requirements of the work
exceeded the available space.
- Added a new config.xml boolean element called 'choose_download_url_by_timezone'
This requires that projects provide a 2-column file in the project root named
'download_servers'. An example is:
3600 http://einstein.aei.mpg.de
-21600 http://einstein.phys.uwm.edu
The first column is offset from UTC in seconds, and the second column is the URL
of the download server. When enabled, the scheduler will replace the download
path for data and executables by a list of download URLs, ordered by proximity
to the host's timezone. The download path must start with the
BOINC default download/ and the different download servers must have identical
file paths under download/, in other words they must be mirrored.
- Really exciting news, David: I have finally gotten emacs to obey your
indentations and formatting conventions, apart from doing this:
some_function(foo, bar
);
instead of this:
some_function(foo, bar
);
Emacs experts, advice appreciated!
download/
configuration.php
sched/
sched_config.h
sched_config.C
sched_send.C
handle_request.C

View File

@ -34,16 +34,17 @@ htmlspecialchars("
<stripchart_cgi_url> http://A/URL </stripchart_cgi_url>
<log_dir> /path/to/directory </log_dir>
[ <disable_account_creation/> ]
[ <show_results/> ]
[ <one_result_per_user_per_wu/> ]
[ <max_wus_to_send> N </max_wus_to_send> ]
[ <min_sendwork_interval> N </min_sendwork_interval> ]
[ <daily_result_quota> N </daily_result_quota> ]
[ <enforce_delay_bound/> ]
[ <locality_scheduling/> ]
[ <locality_scheduling_wait_period> N </locality_scheduling_wait_period> ]
[ <min_core_client_version> N </min_core_client_version ]
[ <disable_account_creation/> ]
[ <show_results/> ]
[ <one_result_per_user_per_wu/> ]
[ <max_wus_to_send> N </max_wus_to_send> ]
[ <min_sendwork_interval> N </min_sendwork_interval> ]
[ <daily_result_quota> N </daily_result_quota> ]
[ <enforce_delay_bound/> ]
[ <locality_scheduling/> ]
[ <locality_scheduling_wait_period> N </locality_scheduling_wait_period> ]
[ <min_core_client_version> N </min_core_client_version ]
[ <choose_download_url_by_timezone/> ]
<!-- optional; defaults as indicated: -->
<project_dir> ../ </project_dir> <!-- relative to location of 'start' -->
@ -194,7 +195,20 @@ list_item("min_core_client_version",
a version number less than this,
it returns an error message and doesn't do any other processing."
);
list_item("choose_download_url_by_timezone",
"When the scheduler sends work to hosts, it replaces the download
URL appearing in the data and executable file descriptions with
the download URL closest to the host's timezone. The project
must provide a two-column file called 'download_servers' in the
project root directory. This is a list of all download servers
that will be inserted when work is sent to hosts. The first column
is an integer listing the server's offset in seconds from UTC.
The second column is the server URL in the format such as
http://einstein.phys.uwm.edu. The download servers must
have identical file hierarchies and contents, and the path to
file and executables must start with '/download/...' as in
'http://einstein.phys.uwm.edu/download/123/some_file_name'."
);
list_end();
// THE INFORMATION BELOW NEEDS TO BE ORGANIZED AND PUT INTO TABLES OR SOME OTHER LESS CRAMPED FORM

View File

@ -908,7 +908,7 @@ extern double watch_diskspace[3];
int delete_file_from_host(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply) {
int nfiles = (int)sreq.file_infos.size();
char buf[256];
if (!nfiles) {
log_messages.printf(
SCHED_MSG_LOG::CRITICAL,
@ -949,11 +949,10 @@ int delete_file_from_host(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply) {
SCHED_MSG_LOG::DEBUG,
"[HOST#%d]: delete file %s (make space)\n", sreply.host.id, fi.name
);
// give host an hour to nuke the file and come back. This might
// give host 4 hours to nuke the file and come back. This might
// in general be too soon, since host needs to complete any work
// that depends upon this file, before it will be removed by core client.
//
sprintf(buf, "Removing file %s to free up disk space", fi.name);
USER_MESSAGE um(buf, "low");
sreply.insert_message(um);
@ -970,11 +969,11 @@ void debug_sched(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply, const char *t
}
sprintf(tmpfilename, "sched_reply_%06d_%06d", sreq.hostid, sreq.rpc_seqno);
// use _XXXXXX if you want random filenames rather than deterministic
// mkstemp(tmpfilename);
// use _XXXXXX if you want random filenames rather than
// deterministic mkstemp(tmpfilename);
fp=fopen(tmpfilename, "w");
if (!fp) {
log_messages.printf(
SCHED_MSG_LOG::CRITICAL,
@ -982,15 +981,15 @@ void debug_sched(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply, const char *t
);
return;
}
log_messages.printf(
SCHED_MSG_LOG::DEBUG,
"Found %s, so writing %s\n", trigger, tmpfilename
);
sreply.write(fp);
fclose(fp);
sprintf(tmpfilename, "sched_request_%06d_%06d", sreq.hostid, sreq.rpc_seqno);
fp=fopen(tmpfilename, "w");
@ -1040,21 +1039,26 @@ void handle_request(
sreply.insert_message(um);
sreply.nucleus_only = true;
}
// if we got no work, and we have no file space, delete some files
if (sreply.results.size()==0 && max_allowable_disk(sreq)<0) {
// try to delete a file to make more space. Also give some
// hints to the user about what's going wrong (lack of disk
// space).
delete_file_from_host(sreq, sreply);
if (sreply.results.size()==0 && (sreply.wreq.insufficient_disk || sreply.wreq.disk_available<0)) {
// try to delete a file to make more space. Also give some
// hints to the user about what's going wrong (lack of disk
// space).
delete_file_from_host(sreq, sreply);
}
#if 1
if (sreply.results.size()==0) {
// You can call debug_sched() for whatever situation is of
// interest to you. It won't do anything unless you create
// (touch) the file 'debug_sched' in the project root directory.
//
if (sreply.results.size()==0 && sreply.hostid && sreq.work_req_seconds>1.0)
debug_sched(sreq, sreply, "../debug_sched");
else if (max_allowable_disk(sreq)<0 || (sreply.wreq.insufficient_disk || sreply.wreq.disk_available<0))
debug_sched(sreq, sreply, "../debug_sched");
}
#endif
sreply.write(fout);
}

View File

@ -90,6 +90,8 @@ int SCHED_CONFIG::parse(char* buf) {
parse_int(buf, "<locality_scheduling_wait_period>", locality_scheduling_wait_period);
parse_int(buf, "<locality_scheduling_send_timeout>", locality_scheduling_send_timeout);
parse_int(buf, "<min_core_client_version>", min_core_client_version);
parse_bool(buf, "choose_download_url_by_timezone", choose_download_url_by_timezone);
if (match_tag(buf, "</config>")) {
char hostname[256];
gethostname(hostname, 256);

View File

@ -59,6 +59,7 @@ public:
int locality_scheduling_wait_period;
int locality_scheduling_send_timeout;
int min_core_client_version;
bool choose_download_url_by_timezone;
int parse(char*);
int parse_file(char* dir=".");

View File

@ -23,6 +23,8 @@
#include <string>
#include <ctime>
#include <cstdio>
#include <stdlib.h>
using namespace std;
#include "error_numbers.h"
@ -34,6 +36,7 @@ using namespace std;
#include "sched_msgs.h"
#include "sched_send.h"
#include "sched_locality.h"
#include "../lib/parse.h"
#ifdef _USING_FCGI_
#include "fcgi_stdio.h"
@ -229,12 +232,186 @@ int insert_after(char* buffer, char* after, char* text) {
return 0;
}
typedef struct urltag {
int zone;
char name[124];
} URLTYPE;
// these global variables are needed to pass information into the
// compare function below.
static int tzone=0;
static int hostid=0;
static int compare(const void *x, const void *y) {
URLTYPE *a=(URLTYPE *)x;
URLTYPE *b=(URLTYPE *)y;
char longname[512];
if (abs(tzone - (a->zone))<abs(tzone - (b->zone)))
return -1;
if (abs(tzone - (a->zone))>abs(tzone - (b->zone)))
return +1;
// In order to ensure uniform distribution, we hash paths that are
// equidistant from the host's timezone in a way that gives a
// unique ordering for each host but which is effectively random
// between hosts.
sprintf(longname, "%s%d", a->name, hostid);
std::string sa = md5_string((const unsigned char *)longname, strlen((const char *)longname));
sprintf(longname, "%s%d", b->name, hostid);
std::string sb = md5_string((const unsigned char *)longname, strlen((const char *)longname));
int xa = strtol(sa.substr(1, 7).c_str(), 0, 16);
int xb = strtol(sb.substr(1, 7).c_str(), 0, 16);
if (xa<xb)
return -1;
if (xa>xb)
return 1;
return 0;
}
// file
//
static URLTYPE *cached=NULL;
#define BLOCKSIZE 32
URLTYPE* read_download_list(){
FILE *fp;
int count=0;
int i;
if (cached)
return cached;
if (!(fp=fopen("../download_servers", "r")))
return NULL;
// read in lines from file
while (1) {
// allocate memory in blocks
if ((count % BLOCKSIZE)==0) {
cached=(URLTYPE *)realloc(cached, (count+BLOCKSIZE)*sizeof(URLTYPE));
if (!cached)
return NULL;
}
// read timezone offset and URL from file, and store in cache
// list
if (2==fscanf(fp, "%d %s", &(cached[count].zone), cached[count].name))
count++;
else {
// provide a null terminator so we don't need to keep
// another global variable for count.
cached[count].name[0]='\0';
break;
}
}
fclose(fp);
if (!count) {
log_messages.printf(
SCHED_MSG_LOG::CRITICAL,
"file ../download_servers contained no valid entries!\n"
"format of this file is one or more lines containing:\n"
"TIMEZONE_OFFSET htt://some.url.path\n"
);
free(cached);
return NULL;
}
// sort URLs by distance from host timezone. See compare() above
// for details.
qsort(cached, count, sizeof(URLTYPE), compare);
log_messages.printf(
SCHED_MSG_LOG::DEBUG, "Sorted list of URLs follows\n"
);
for (i=0; i<count; i++) {
log_messages.printf(
SCHED_MSG_LOG::DEBUG, "zone=%d name=%s\n", cached[i].zone, cached[i].name
);
}
return cached;
}
// return number of bytes written, or <0 to indicate an error
//
int make_download_list(char *buffer, char *path, int timezone) {
char *start=buffer;
int i;
// global variable used in the compare() function
tzone=timezone;
URLTYPE *serverlist=read_download_list();
if (!serverlist)
return -1;
// print list of servers in sorted order. Space is to format them
// nicely
for (i=0; strlen(serverlist[i].name); i++)
start+=sprintf(start, "%s<url>%s/%s</url>", i?"\n ":"", serverlist[i].name, path);
// make a second copy in the same order
for (i=0; strlen(serverlist[i].name); i++)
start+=sprintf(start, "%s<url>%s/%s</url>", "\n ", serverlist[i].name, path);
return (start-buffer);
}
// returns zero on success, non-zero to indicate an error
//
int add_download_servers(char *old_xml, char *new_xml, int timezone) {
char *p, *q, *r;
p=r=old_xml;
// search for next URL to do surgery on
while ((q=strstr(p, "<url>"))) {
char *s;
char path[1024];
int len = q-p;
strncpy(new_xml, p, len);
new_xml += len;
// locate next instance of </url>
if (!(r=strstr(q, "</url>")))
return 1;
r += strlen("</url>");
// parse out the URL
if (!parse_str(q, "<url>", path, 1024))
return 1;
// find start of 'download/'
if (!(s=strstr(path,"download/")))
return 1;
// insert new download list in place of the original one
len = make_download_list(new_xml, s, timezone);
if (len<0)
return 1;
new_xml += len;
// advance pointer to start looking for next <url> tag.
p=r;
}
strcpy(new_xml, r);
return 0;
}
// add elements to WU's xml_doc, in preparation for sending
// it to a client
//
int insert_wu_tags(WORKUNIT& wu, APP& app) {
char buf[LARGE_BLOB_SIZE];
sprintf(buf,
" <rsc_fpops_est>%f</rsc_fpops_est>\n"
" <rsc_fpops_bound>%f</rsc_fpops_bound>\n"
@ -305,18 +482,40 @@ int add_wu_to_reply(
APP* app, APP_VERSION* avp
) {
int retval;
WORKUNIT wu2;
WORKUNIT wu2, wu3;
// add the app, app_version, and workunit to the reply,
// but only if they aren't already there
//
if (avp) {
APP_VERSION av2=*avp, *avp2=&av2;
if (config.choose_download_url_by_timezone) {
// replace the download URL for apps with a list of
// multiple download servers.
// set these global variables, needed by the compare()
// function so that the download URL list can be sorted by
// timezone
tzone=reply.host.timezone;
hostid=reply.host.id;
retval = add_download_servers(avp->xml_doc, av2.xml_doc, reply.host.timezone);
if (retval) {
log_messages.printf(
SCHED_MSG_LOG::CRITICAL,
"add_download_servers(to APP version) failed\n"
);
// restore original WU!
av2=*avp;
}
}
reply.insert_app_unique(*app);
reply.insert_app_version_unique(*avp);
reply.insert_app_version_unique(*avp2);
log_messages.printf(
SCHED_MSG_LOG::DEBUG,
"[HOST#%d] Sending app_version %s %s %d\n",
reply.host.id, app->name, platform.name, avp->version_num
reply.host.id, app->name, platform.name, avp2->version_num
);
}
@ -328,7 +527,29 @@ int add_wu_to_reply(
log_messages.printf(SCHED_MSG_LOG::NORMAL, "insert_wu_tags failed\n");
return retval;
}
reply.insert_workunit_unique(wu2);
wu3=wu2;
if (config.choose_download_url_by_timezone) {
// replace the download URL for WU files with a list of
// multiple download servers.
// set these global variables, needed by the compare()
// function so that the download URL list can be sorted by
// timezone
tzone=reply.host.timezone;
hostid=reply.host.id;
retval = add_download_servers(wu2.xml_doc, wu3.xml_doc, reply.host.timezone);
if (retval) {
log_messages.printf(
SCHED_MSG_LOG::CRITICAL,
"add_download_servers(to WU) failed\n"
);
// restore original WU!
wu3=wu2;
}
}
reply.insert_workunit_unique(wu3);
return 0;
}