From 97229c0fcf3cd93215a6333806788522c79c3a87 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Wed, 26 Mar 2014 15:15:16 -0700 Subject: [PATCH] Server: enable gzipped input files, and fix bugs in MD5 caching The docs said that putting for a file in your input template would cause it to be transferred in gzip form. But most of the server-side implementation was missing. - in process_input_template(), parse , and add elements to the output. - stage_file was generating MD5 cache files containing only the MD5, but process_input_template() expected them to contain file size as well. Change stage_file to write both, and change process_input_template() to write an error message if it finds a bad MD5 file. - remove stuff from process_input_template() related to "generated_locally", a feature that doesn't exist anymore. --- tools/process_input_template.cpp | 69 +++++++++++++++++++++++--------- tools/stage_file | 7 +++- 2 files changed, 56 insertions(+), 20 deletions(-) diff --git a/tools/process_input_template.cpp b/tools/process_input_template.cpp index 92efac98ec..dd81de3ec5 100644 --- a/tools/process_input_template.cpp +++ b/tools/process_input_template.cpp @@ -77,9 +77,10 @@ static bool got_md5_info( int n = fscanf(fp, "%s %lf%c", md5data, nbytes, &endline); int c = fgetc(fp); fclose(fp); - if (n != 3) return false; - if (endline !='\n') return false; - if (c != EOF) return false; + if ((n != 3) || (endline !='\n') || (c != EOF)) { + fprintf(stderr, "bad MD5 cache file %s; remove it and stage file again\n", md5name); + return false; + } // if this is one of our cached md5 files, but it's OLDER than the // data file which it supposedly corresponds to, delete it. @@ -141,17 +142,19 @@ static int process_file_info( SCHED_CONFIG& config_loc ) { vector urls; - bool generated_locally = false; + bool gzip = false; int retval, file_number = -1; - double nbytes, nbytesdef = -1; + double nbytes, nbytesdef = -1, gzipped_nbytes; string md5str, urlstr, tmpstr; - char buf[BLOB_SIZE], path[MAXPATHLEN], top_download_path[MAXPATHLEN], md5[33], url[256]; + char buf[BLOB_SIZE], path[MAXPATHLEN], top_download_path[MAXPATHLEN]; + char gzip_path[MAXPATHLEN]; + char md5[33], url[256], gzipped_url[256], buf2[256]; out += "\n"; while (!xp.get_tag()) { if (xp.parse_int("number", file_number)) { continue; - } else if (xp.parse_bool("generated_locally", generated_locally)) { + } else if (xp.parse_bool("gzip", gzip)) { continue; } else if (xp.parse_string("url", urlstr)) { urls.push_back(urlstr); @@ -160,6 +163,8 @@ static int process_file_info( continue; } else if (xp.parse_double("nbytes", nbytesdef)) { continue; + } else if (xp.parse_double("gzipped_nbytes", gzipped_nbytes)) { + continue; } else if (xp.match_tag("/file_info")) { if (nbytesdef != -1 || md5str != "" || urlstr != "") { if (nbytesdef == -1 || md5str == "" || urlstr == "") { @@ -168,6 +173,10 @@ static int process_file_info( ); return ERR_XML_PARSE; } + if (gzip && !gzipped_nbytes) { + fprintf(stderr, "Must specify gzipped_nbytes\n"); + return ERR_XML_PARSE; + } } if (file_number < 0) { fprintf(stderr, "No file number found\n"); @@ -187,15 +196,8 @@ static int process_file_info( return ERR_XML_PARSE; } input_file_found[file_number] = true; - if (generated_locally) { - sprintf(buf, - " %s\n" - " \n" - "\n", - infiles[file_number] - ); - } else if (nbytesdef == -1) { - // here if nybtes was not supplied; stage the file + if (nbytesdef == -1) { + // here if nybtes was not supplied; stage the file if needed // dir_hier_path( infiles[file_number], config_loc.download_dir, @@ -230,14 +232,36 @@ static int process_file_info( infiles[file_number], config_loc.download_url, config_loc.uldl_dir_fanout, url ); + + if (gzip) { + sprintf(gzip_path, "%s.gz", path); + retval = file_size(gzip_path, gzipped_nbytes); + if (retval) { + fprintf(stderr, + "process_input_template: missing gzip file %s\n", + gzip_path + ); + return ERR_FILE_MISSING; + } + sprintf(gzipped_url, + " %s.gz\n" + " %.0f\n", + url, gzipped_nbytes + ); + } else { + strcpy(gzipped_url, ""); + } + sprintf(buf, " %s\n" " %s\n" + "%s" " %s\n" " %.0f\n" "\n", infiles[file_number], url, + gzipped_url, md5, nbytes ); @@ -248,18 +272,27 @@ static int process_file_info( urlstr = ""; for (unsigned int i=0; i\n"; + if (gzip) { + urlstr += " " + urls.at(i) + string(infiles[file_number]) + ".gz\n"; + } } sprintf(buf, " %s\n" "%s" " %s\n" - " %.0f\n" - "\n", + " %.0f\n", infiles[file_number], urlstr.c_str(), md5str.c_str(), nbytesdef ); + if (gzip) { + sprintf(buf2, " %.0f\n", + gzipped_nbytes + ); + strcat(buf, buf2); + } + strcat(buf, "\n"); } out += buf; break; diff --git a/tools/stage_file b/tools/stage_file index 19bf0453b8..bedb1d0e06 100755 --- a/tools/stage_file +++ b/tools/stage_file @@ -90,7 +90,9 @@ $md5 = md5_file($path); // if (file_exists($dl_path)) { if (file_exists($dl_md5_path)) { - $dl_md5 = trim(file_get_contents($dl_md5_path)); + $x = file_get_contents($dl_md5_path); + $x = explode(" ", $x); + $dl_md5 = $x[0]; } else { $dl_md5 = md5_file($dl_path); } @@ -115,7 +117,8 @@ Please use a different file name. // make MD5 file if needed // if (!file_exists($dl_md5_path)) { - file_put_contents($dl_md5_path, $md5); + $x = $md5." ".filesize($path)."\n"; + file_put_contents($dl_md5_path, $x); } // make gzipped version if needed