Made the caching of md5 info for source files a configuration option in

config.xml.  Use the boolean tag <cache_md5_info> to enable it.
This prevents the work generation library from having to go back and
continuously regenerate the md5 sums of your input data files.  Note
that reading these from disk can be expensive if you have many such files
that are large and that you re-use. See check-in notes from 30/31 Dec 2004
for some details.

svn path=/trunk/boinc/; revision=5281
This commit is contained in:
Bruce Allen 2005-02-02 21:57:11 +00:00
parent 81b2c78ee1
commit 21e1990508
5 changed files with 39 additions and 10 deletions

View File

@ -23783,3 +23783,22 @@ Rom 2 Feb 2005
locale/
<all localization files for the client are now here>
Bruce 2 Feb 2005
- Made the caching of md5 info for source files a configuration option in
config.xml. Use the boolean tag <cache_md5_info> to enable it.
This prevents the work generation library from having to go back and
continuously regenerate the md5 sums of your input data files. Note
that reading these from disk can be expensive if you have many such files
that are large and that you re-use. See check-in notes from 30/31 Dec 2004
for some details.
doc/
configuration.php
sched/
sched_config.C
sched_config.h
tools/
backend_lib.C

View File

@ -45,6 +45,7 @@ htmlspecialchars("
[ <locality_scheduling_wait_period> N </locality_scheduling_wait_period> ]
[ <min_core_client_version> N </min_core_client_version ]
[ <choose_download_url_by_timezone/> ]
[ <cache_md5_info/> ]
<!-- optional; defaults as indicated: -->
<project_dir> ../ </project_dir> <!-- relative to location of 'start' -->
@ -209,6 +210,13 @@ list_item("choose_download_url_by_timezone",
file and executables must start with '/download/...' as in
'http://einstein.phys.uwm.edu/download/123/some_file_name'."
);
list_item("cache_md5_info",
"When creating work, keep a record (in files called foo.md5) of the
file length and md5 sum of data files and executables. This can
greatly reduce the time needed to create work, if (1) these files
are re-used, and (2) there are many of these files, and (3) reading
the files from disk is time-consuming."
);
list_end();
// THE INFORMATION BELOW NEEDS TO BE ORGANIZED AND PUT INTO TABLES OR SOME OTHER LESS CRAMPED FORM

View File

@ -91,6 +91,7 @@ int SCHED_CONFIG::parse(char* buf) {
parse_int(buf, "<locality_scheduling_send_timeout>", locality_scheduling_send_timeout);
parse_int(buf, "<min_core_client_version>", min_core_client_version);
parse_bool(buf, "choose_download_url_by_timezone", choose_download_url_by_timezone);
parse_bool(buf, "cache_md5_info", cache_md5_info);
if (match_tag(buf, "</config>")) {
char hostname[256];

View File

@ -60,6 +60,7 @@ public:
int locality_scheduling_send_timeout;
int min_core_client_version;
bool choose_download_url_by_timezone;
bool cache_md5_info;
int parse(char*);
int parse_file(char* dir=".");

View File

@ -62,7 +62,8 @@ int read_filename(const char* path, char* buf, int len) {
return retval;
}
#ifdef BOINC_CACHE_MD5
// see checkin notes Dec 30 2004
//
static bool got_md5_info(
const char *path,
char *md5data,
@ -78,7 +79,7 @@ static bool got_md5_info(
struct stat md5stat, filestat;
bool retval=false;
char endline='\0';
sprintf(md5name, "%s.md5", path);
// get mod times for file
@ -113,6 +114,8 @@ static bool got_md5_info(
return retval;
}
// see checkin notes Dec 30 2004
//
static void write_md5_info(
const char *path,
const char *md5,
@ -142,7 +145,6 @@ static void write_md5_info(
return;
}
#endif // BOINC_CACHE_MD5
// process WU template
@ -193,20 +195,18 @@ static int process_wu_template(
);
boinc_copy(top_download_path,path);
}
#ifdef BOINC_CACHE_MD5
// see checkin notes Dec 30 2004
if (!got_md5_info(path, md5, &nbytes)) {
#endif
if (!config.cache_md5_info || !got_md5_info(path, md5, &nbytes)) {
retval = md5_file(path, md5, nbytes);
if (retval) {
fprintf(stderr, "process_wu_template: md5_file %d\n", retval);
return retval;
}
#ifdef BOINC_CACHE_MD5
else
else if (config.cache_md5_info) {
write_md5_info(path, md5, nbytes);
}
}
#endif
dir_hier_url(
infiles[file_number], config.download_url,