From 21e1990508f3b74e3232b2420cbf46acd33d4351 Mon Sep 17 00:00:00 2001 From: Bruce Allen Date: Wed, 2 Feb 2005 21:57:11 +0000 Subject: [PATCH] Made the caching of md5 info for source files a configuration option in config.xml. Use the boolean tag to enable it. This prevents the work generation library from having to go back and continuously regenerate the md5 sums of your input data files. Note that reading these from disk can be expensive if you have many such files that are large and that you re-use. See check-in notes from 30/31 Dec 2004 for some details. svn path=/trunk/boinc/; revision=5281 --- checkin_notes | 19 +++++++++++++++++++ doc/configuration.php | 8 ++++++++ sched/sched_config.C | 1 + sched/sched_config.h | 1 + tools/backend_lib.C | 20 ++++++++++---------- 5 files changed, 39 insertions(+), 10 deletions(-) diff --git a/checkin_notes b/checkin_notes index 9b646993eb..3fbac395e1 100755 --- a/checkin_notes +++ b/checkin_notes @@ -23783,3 +23783,22 @@ Rom 2 Feb 2005 locale/ +Bruce 2 Feb 2005 + + - Made the caching of md5 info for source files a configuration option in + config.xml. Use the boolean tag to enable it. + This prevents the work generation library from having to go back and + continuously regenerate the md5 sums of your input data files. Note + that reading these from disk can be expensive if you have many such files + that are large and that you re-use. See check-in notes from 30/31 Dec 2004 + for some details. + + doc/ + configuration.php + sched/ + sched_config.C + sched_config.h + tools/ + backend_lib.C + + diff --git a/doc/configuration.php b/doc/configuration.php index 0bb4479a23..c1b20adcf0 100644 --- a/doc/configuration.php +++ b/doc/configuration.php @@ -45,6 +45,7 @@ htmlspecialchars(" [ N ] [ N ] + [ ] ../ @@ -209,6 +210,13 @@ list_item("choose_download_url_by_timezone", file and executables must start with '/download/...' as in 'http://einstein.phys.uwm.edu/download/123/some_file_name'." ); +list_item("cache_md5_info", + "When creating work, keep a record (in files called foo.md5) of the + file length and md5 sum of data files and executables. This can + greatly reduce the time needed to create work, if (1) these files + are re-used, and (2) there are many of these files, and (3) reading + the files from disk is time-consuming." +); list_end(); // THE INFORMATION BELOW NEEDS TO BE ORGANIZED AND PUT INTO TABLES OR SOME OTHER LESS CRAMPED FORM diff --git a/sched/sched_config.C b/sched/sched_config.C index f5aec5b5a8..772742e07c 100644 --- a/sched/sched_config.C +++ b/sched/sched_config.C @@ -91,6 +91,7 @@ int SCHED_CONFIG::parse(char* buf) { parse_int(buf, "", locality_scheduling_send_timeout); parse_int(buf, "", min_core_client_version); parse_bool(buf, "choose_download_url_by_timezone", choose_download_url_by_timezone); + parse_bool(buf, "cache_md5_info", cache_md5_info); if (match_tag(buf, "")) { char hostname[256]; diff --git a/sched/sched_config.h b/sched/sched_config.h index acf85b0b9e..f022e13f3e 100644 --- a/sched/sched_config.h +++ b/sched/sched_config.h @@ -60,6 +60,7 @@ public: int locality_scheduling_send_timeout; int min_core_client_version; bool choose_download_url_by_timezone; + bool cache_md5_info; int parse(char*); int parse_file(char* dir="."); diff --git a/tools/backend_lib.C b/tools/backend_lib.C index 2171629b9b..c543263999 100644 --- a/tools/backend_lib.C +++ b/tools/backend_lib.C @@ -62,7 +62,8 @@ int read_filename(const char* path, char* buf, int len) { return retval; } -#ifdef BOINC_CACHE_MD5 +// see checkin notes Dec 30 2004 +// static bool got_md5_info( const char *path, char *md5data, @@ -78,7 +79,7 @@ static bool got_md5_info( struct stat md5stat, filestat; bool retval=false; char endline='\0'; - + sprintf(md5name, "%s.md5", path); // get mod times for file @@ -113,6 +114,8 @@ static bool got_md5_info( return retval; } +// see checkin notes Dec 30 2004 +// static void write_md5_info( const char *path, const char *md5, @@ -142,7 +145,6 @@ static void write_md5_info( return; } -#endif // BOINC_CACHE_MD5 // process WU template @@ -193,20 +195,18 @@ static int process_wu_template( ); boinc_copy(top_download_path,path); } -#ifdef BOINC_CACHE_MD5 - // see checkin notes Dec 30 2004 - if (!got_md5_info(path, md5, &nbytes)) { -#endif + + if (!config.cache_md5_info || !got_md5_info(path, md5, &nbytes)) { + retval = md5_file(path, md5, nbytes); if (retval) { fprintf(stderr, "process_wu_template: md5_file %d\n", retval); return retval; } -#ifdef BOINC_CACHE_MD5 - else + else if (config.cache_md5_info) { write_md5_info(path, md5, nbytes); + } } -#endif dir_hier_url( infiles[file_number], config.download_url,