diff --git a/checkin_notes b/checkin_notes index 9b646993eb..3fbac395e1 100755 --- a/checkin_notes +++ b/checkin_notes @@ -23783,3 +23783,22 @@ Rom 2 Feb 2005 locale/ +Bruce 2 Feb 2005 + + - Made the caching of md5 info for source files a configuration option in + config.xml. Use the boolean tag to enable it. + This prevents the work generation library from having to go back and + continuously regenerate the md5 sums of your input data files. Note + that reading these from disk can be expensive if you have many such files + that are large and that you re-use. See check-in notes from 30/31 Dec 2004 + for some details. + + doc/ + configuration.php + sched/ + sched_config.C + sched_config.h + tools/ + backend_lib.C + + diff --git a/doc/configuration.php b/doc/configuration.php index 0bb4479a23..c1b20adcf0 100644 --- a/doc/configuration.php +++ b/doc/configuration.php @@ -45,6 +45,7 @@ htmlspecialchars(" [ N ] [ N ] + [ ] ../ @@ -209,6 +210,13 @@ list_item("choose_download_url_by_timezone", file and executables must start with '/download/...' as in 'http://einstein.phys.uwm.edu/download/123/some_file_name'." ); +list_item("cache_md5_info", + "When creating work, keep a record (in files called foo.md5) of the + file length and md5 sum of data files and executables. This can + greatly reduce the time needed to create work, if (1) these files + are re-used, and (2) there are many of these files, and (3) reading + the files from disk is time-consuming." +); list_end(); // THE INFORMATION BELOW NEEDS TO BE ORGANIZED AND PUT INTO TABLES OR SOME OTHER LESS CRAMPED FORM diff --git a/sched/sched_config.C b/sched/sched_config.C index f5aec5b5a8..772742e07c 100644 --- a/sched/sched_config.C +++ b/sched/sched_config.C @@ -91,6 +91,7 @@ int SCHED_CONFIG::parse(char* buf) { parse_int(buf, "", locality_scheduling_send_timeout); parse_int(buf, "", min_core_client_version); parse_bool(buf, "choose_download_url_by_timezone", choose_download_url_by_timezone); + parse_bool(buf, "cache_md5_info", cache_md5_info); if (match_tag(buf, "")) { char hostname[256]; diff --git a/sched/sched_config.h b/sched/sched_config.h index acf85b0b9e..f022e13f3e 100644 --- a/sched/sched_config.h +++ b/sched/sched_config.h @@ -60,6 +60,7 @@ public: int locality_scheduling_send_timeout; int min_core_client_version; bool choose_download_url_by_timezone; + bool cache_md5_info; int parse(char*); int parse_file(char* dir="."); diff --git a/tools/backend_lib.C b/tools/backend_lib.C index 2171629b9b..c543263999 100644 --- a/tools/backend_lib.C +++ b/tools/backend_lib.C @@ -62,7 +62,8 @@ int read_filename(const char* path, char* buf, int len) { return retval; } -#ifdef BOINC_CACHE_MD5 +// see checkin notes Dec 30 2004 +// static bool got_md5_info( const char *path, char *md5data, @@ -78,7 +79,7 @@ static bool got_md5_info( struct stat md5stat, filestat; bool retval=false; char endline='\0'; - + sprintf(md5name, "%s.md5", path); // get mod times for file @@ -113,6 +114,8 @@ static bool got_md5_info( return retval; } +// see checkin notes Dec 30 2004 +// static void write_md5_info( const char *path, const char *md5, @@ -142,7 +145,6 @@ static void write_md5_info( return; } -#endif // BOINC_CACHE_MD5 // process WU template @@ -193,20 +195,18 @@ static int process_wu_template( ); boinc_copy(top_download_path,path); } -#ifdef BOINC_CACHE_MD5 - // see checkin notes Dec 30 2004 - if (!got_md5_info(path, md5, &nbytes)) { -#endif + + if (!config.cache_md5_info || !got_md5_info(path, md5, &nbytes)) { + retval = md5_file(path, md5, nbytes); if (retval) { fprintf(stderr, "process_wu_template: md5_file %d\n", retval); return retval; } -#ifdef BOINC_CACHE_MD5 - else + else if (config.cache_md5_info) { write_md5_info(path, md5, nbytes); + } } -#endif dir_hier_url( infiles[file_number], config.download_url,