From e009537167861477896703935148daaf2b2dc31f Mon Sep 17 00:00:00 2001 From: Bruce Allen Date: Thu, 30 Dec 2004 04:16:00 +0000 Subject: [PATCH] When creating large numbers of WU which have large input files, the cost of computing the md5 sums becomes expensive. Inserted a simple caching mechanism into the utility function process_wu_template(). For a given filename FILENAME, this looks for a file called FILENAME.md5, which contains the md5sum and length. If the file exists, these values are used. If not, such a file is created. Since (I think) David is planning to have a database for files, this mechanism will eventually be obsolete. Right now it disabled by default. Use -DBOINC_CACHE_MD5 to enable it. svn path=/trunk/boinc/; revision=4965 --- checkin_notes | 16 ++++++++++++++++ tools/backend_lib.C | 40 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/checkin_notes b/checkin_notes index 20b6345b02..20b7ed9d5e 100755 --- a/checkin_notes +++ b/checkin_notes @@ -21995,3 +21995,19 @@ David 29 Dec 2004 api/ x_opengl.C + +Bruce 30 Dec 2004 + + - When creating large numbers of WU which have large input files, + the cost of computing the md5 sums becomes expensive. Inserted + a simple caching mechanism into the utility function + process_wu_template(). For a given filename FILENAME, this looks + for a file called FILENAME.md5, which contains the md5sum and + length. If the file exists, these values are used. If not, such + a file is created. Since (I think) David is planning to have a + database for files, this mechanism will eventually be obsolete. + Right now it disabled by default. Use -DBOINC_CACHE_MD5 to + enable it. + + tools/ + backend_lib.C diff --git a/tools/backend_lib.C b/tools/backend_lib.C index f9a7ecfaae..053879c4e5 100644 --- a/tools/backend_lib.C +++ b/tools/backend_lib.C @@ -109,11 +109,43 @@ static int process_wu_template( boinc_copy(top_download_path,path); } - retval = md5_file(path, md5, nbytes); - if (retval) { - fprintf(stderr, "process_wu_template: md5_file %d\n", retval); - return retval; + retval=0; +#ifdef BOINC_CACHE_MD5 + // see checkin-notes Dec 30 2004 + { + // look for file named FILENAME.md5 containing + // md5sum and length + FILE *fp; + char md5name[512]; + sprintf(md5name, "%s.md5", path); + if ((fp=fopen(md5name, "r"))) { + // found cached file info! + if (2==fscanf(fp, "%s %lf", md5, &nbytes)) + retval=1; // indicates sucess getting cached info + fclose(fp); + } } +#endif + if (!retval) { + retval = md5_file(path, md5, nbytes); + if (retval) { + fprintf(stderr, "process_wu_template: md5_file %d\n", retval); + return retval; + } +#ifdef BOINC_CACHE_MD5 + else { + // Write file FILENAME.md5 containing md5sum and length + FILE *fp; + char md5name[512]; + sprintf(md5name, "%s.md5", path); + if ((fp=fopen(md5name, "w"))) { + fprintf(fp,"%s %.15e\n", md5, nbytes); + fclose(fp); + } + } +#endif + } // (!retval) + dir_hier_url( infiles[file_number], config.download_url, config.uldl_dir_fanout, url