From f5c64abac07322fd644194a79d640c057e5fea9e Mon Sep 17 00:00:00 2001 From: David Anderson Date: Fri, 22 Jul 2016 16:10:23 -0700 Subject: [PATCH] Condor GAHP: add extensions to input file names The content-based file management system used by Condor GAHP used a file's MD5 hash as its physical name. This caused problems for Einstein@home because their app expected filenames with particular extensions. So I: - changed the docs and param names of the C++ interface to the file management system (query_files() and upload_files()) to clarify that the physical file names can include additional text besides the MD5 hash. - changed the BOINC GAHP to look for filename extensions, and append these to the physical filenames --- lib/remote_submit.cpp | 12 +++--- lib/remote_submit.h | 81 ++++++++++++++++------------------- samples/condor/boinc_gahp.cpp | 44 +++++++++++++------ 3 files changed, 74 insertions(+), 63 deletions(-) diff --git a/lib/remote_submit.cpp b/lib/remote_submit.cpp index e90fd0f5be..9c4efeb5cc 100644 --- a/lib/remote_submit.cpp +++ b/lib/remote_submit.cpp @@ -123,7 +123,7 @@ static int do_http_post( int query_files( const char* project_url, const char* authenticator, - vector &md5s, + vector &boinc_names, int batch_id, vector &absent_files, string& error_msg @@ -137,8 +137,8 @@ int query_files( sprintf(buf, "%d\n", batch_id); req_msg += string(buf); } - for (unsigned int i=0; i%s\n", md5s[i].c_str()); + for (unsigned int i=0; i%s\n", boinc_names[i].c_str()); req_msg += string(buf); } req_msg += "\n"; @@ -178,7 +178,7 @@ int upload_files ( const char* project_url, const char* authenticator, vector &paths, - vector &md5s, + vector &boinc_names, int batch_id, string &error_msg ) { @@ -190,8 +190,8 @@ int upload_files ( sprintf(buf, "%d\n", batch_id); req_msg += string(buf); } - for (unsigned int i=0; i%s\n", md5s[i].c_str()); + for (unsigned int i=0; i%s\n", boinc_names[i].c_str()); req_msg += string(buf); } req_msg += "\n"; diff --git a/lib/remote_submit.h b/lib/remote_submit.h index 498ff6a8e0..6e5c0a5758 100644 --- a/lib/remote_submit.h +++ b/lib/remote_submit.h @@ -28,10 +28,6 @@ #include "parse.h" -using std::string; -using std::vector; -using std::map; - // Input file modes. // Only LOCAL_STAGED and REMOTE are implemented now. // @@ -63,25 +59,20 @@ struct INFILE { struct JOB { char job_name[256]; - string cmdline_args; - vector infiles; -}; - -struct LOCAL_FILE { - char md5[64]; - double nbytes; + std::string cmdline_args; + std::vector infiles; }; struct JOB_STATUS { - string job_name; - string status; + std::string job_name; + std::string status; JOB_STATUS(){} }; struct QUERY_BATCH_SET_REPLY { double server_time; // server time at start of query - vector batch_sizes; // how many jobs in each of the queried batches - vector jobs; // the jobs, sequentially + std::vector batch_sizes; // how many jobs in each of the queried batches + std::vector jobs; // the jobs, sequentially }; struct OUTFILE { @@ -93,13 +84,13 @@ struct FETCH_OUTPUT_REQ { char job_name[256]; char dir[256]; bool fetch_all; - string stderr_filename; - vector file_descs; + std::string stderr_filename; + std::vector file_descs; }; struct TEMPLATE_DESC { - vector input_files; - vector output_files; + std::vector input_files; + std::vector output_files; int parse(XML_PARSER&); }; @@ -114,7 +105,7 @@ struct COMPLETED_JOB_DESC { int exit_status; double elapsed_time; double cpu_time; - string stderr_out; + std::string stderr_out; int parse(XML_PARSER&); }; @@ -125,19 +116,19 @@ struct COMPLETED_JOB_DESC { extern int query_files( const char* project_url, const char* authenticator, - vector &md5s, + std::vector &boinc_names, int batch_id, - vector &absent_files, - string& error_msg + std::vector &absent_files, + std::string& error_msg ); extern int upload_files ( const char* project_url, const char* authenticator, - vector &paths, - vector &md5s, + std::vector &paths, + std::vector &boinc_names, int batch_id, - string& error_msg + std::string& error_msg ); extern int create_batch( @@ -147,7 +138,7 @@ extern int create_batch( const char* app_name, double expire_time, int &batch_id, - string& error_msg + std::string& error_msg ); extern int submit_jobs( @@ -155,17 +146,17 @@ extern int submit_jobs( const char* authenticator, char app_name[256], int batch_id, - vector jobs, - string& error_msg + std::vector jobs, + std::string& error_msg ); extern int estimate_batch( const char* project_url, const char* authenticator, char app_name[256], - vector jobs, + std::vector jobs, double& est_makespan, - string& error_msg + std::string& error_msg ); // Return the short status of the jobs in a given set of batches @@ -175,9 +166,9 @@ extern int query_batch_set( const char* project_url, const char* authenticator, double min_mod_time, - vector &batch_names, + std::vector &batch_names, QUERY_BATCH_SET_REPLY& reply, - string& error_msg + std::string& error_msg ); struct BATCH_STATUS { @@ -204,8 +195,8 @@ struct BATCH_STATUS { extern int query_batches( const char* project_url, const char* authenticator, - vector& batches, - string& error_msg + std::vector& batches, + std::string& error_msg ); struct JOB_STATE { @@ -227,8 +218,8 @@ extern int query_batch( const char* authenticator, int batch_id, const char batch_name[256], - vector& jobs, - string& error_msg + std::vector& jobs, + std::string& error_msg ); extern int get_output_file( @@ -237,14 +228,14 @@ extern int get_output_file( const char* job_name, int file_num, const char* dst_path, - string& error_msg + std::string& error_msg ); extern int abort_jobs( const char* project_url, const char* authenticator, - vector &job_names, - string& error_msg + std::vector &job_names, + std::string& error_msg ); extern int query_completed_job( @@ -252,7 +243,7 @@ extern int query_completed_job( const char* authenticator, const char* job_name, COMPLETED_JOB_DESC&, - string& error_msg + std::string& error_msg ); extern int get_templates( @@ -261,14 +252,14 @@ extern int get_templates( const char* app_name, // either this const char* job_name, // or this must be non-NULL TEMPLATE_DESC&, - string& error_msg + std::string& error_msg ); extern int retire_batch( const char* project_url, const char* authenticator, const char* batch_name, - string& error_msg + std::string& error_msg ); extern int set_expire_time( @@ -276,12 +267,12 @@ extern int set_expire_time( const char* authenticator, const char* batch_name, double expire_time, - string& error_msg + std::string& error_msg ); extern int ping_server( const char* project_url, - string& error_msg + std::string& error_msg ); #endif diff --git a/samples/condor/boinc_gahp.cpp b/samples/condor/boinc_gahp.cpp index fab6c6a8b7..f27cbc019e 100644 --- a/samples/condor/boinc_gahp.cpp +++ b/samples/condor/boinc_gahp.cpp @@ -68,6 +68,12 @@ struct SUBMIT_REQ { int batch_id; }; +struct LOCAL_FILE { + char boinc_name[256]; + // the MD5 followed by filename extension if any + double nbytes; +}; + // represents a command. // struct COMMAND { @@ -106,8 +112,22 @@ struct COMMAND { vector commands; -int compute_md5(string path, LOCAL_FILE& f) { - return md5_file(path.c_str(), f.md5, f.nbytes); +void filename_extension(const char* path, char* ext) { + const char* p = strrchr(path, '/'); + if (!p) p = path; + const char* q = strrchr(p, '.'); + if (q) { + strcpy(ext, q); + } else { + strcpy(ext, ""); + } +} + +int compute_boinc_name(string path, LOCAL_FILE& f) { + char md5[64], ext[256]; + return md5_file(path.c_str(), md5, f.nbytes); + filename_extension(path.c_str(), ext); + sprintf(f.boinc_name, "%s%s", md5, ext); } const char *escape_str(const string &str) { @@ -151,14 +171,14 @@ int process_input_files(SUBMIT_REQ& req, string& error_msg) { } } - // compute the MD5s of these files, - // and make a map from path to MD5 and size (LOCAL_FILE) + // compute the BOINC names (md5.ext) of these files, + // and make a map from path to BOINC name and size (LOCAL_FILE) // set::iterator iter = unique_paths.begin(); while (iter != unique_paths.end()) { string s = *iter; LOCAL_FILE lf; - retval = compute_md5(s, lf); + retval = compute_boinc_name(s, lf); if (retval) return retval; local_files.insert(std::pair(s, lf)); ++iter; @@ -168,18 +188,18 @@ int process_input_files(SUBMIT_REQ& req, string& error_msg) { // map::iterator map_iter; map_iter = local_files.begin(); - vector md5s, paths; + vector boinc_names, paths; vector absent_files; while (map_iter != local_files.end()) { LOCAL_FILE lf = map_iter->second; paths.push_back(map_iter->first); - md5s.push_back(lf.md5); + boinc_names.push_back(lf.boinc_name); ++map_iter; } retval = query_files( project_url, authenticator, - md5s, + boinc_names, req.batch_id, absent_files, error_msg @@ -188,17 +208,17 @@ int process_input_files(SUBMIT_REQ& req, string& error_msg) { // upload the missing files. // - vector upload_md5s, upload_paths; + vector upload_boinc_names, upload_paths; for (unsigned int i=0; i::iterator iter = local_files.find(infile.src_path); LOCAL_FILE& lf = iter->second; - sprintf(infile.physical_name, "jf_%s", lf.md5); + sprintf(infile.physical_name, "jf_%s", lf.boinc_name); } }