Condor GAHP: add extensions to input file names

The content-based file management system used by Condor GAHP
used a file's MD5 hash as its physical name.
This caused problems for Einstein@home because their app
expected filenames with particular extensions.
So I:

- changed the docs and param names of the C++ interface
  to the file management system (query_files() and upload_files())
  to clarify that the physical file names can include additional text
  besides the MD5 hash.
- changed the BOINC GAHP to look for filename extensions,
  and append these to the physical filenames
This commit is contained in:
David Anderson 2016-07-22 16:10:23 -07:00
parent cd87e03c5e
commit f5c64abac0
3 changed files with 74 additions and 63 deletions

View File

@ -123,7 +123,7 @@ static int do_http_post(
int query_files(
const char* project_url,
const char* authenticator,
vector<string> &md5s,
vector<string> &boinc_names,
int batch_id,
vector<int> &absent_files,
string& error_msg
@ -137,8 +137,8 @@ int query_files(
sprintf(buf, "<batch_id>%d</batch_id>\n", batch_id);
req_msg += string(buf);
}
for (unsigned int i=0; i<md5s.size(); i++) {
sprintf(buf, " <md5>%s</md5>\n", md5s[i].c_str());
for (unsigned int i=0; i<boinc_names.size(); i++) {
sprintf(buf, " <md5>%s</md5>\n", boinc_names[i].c_str());
req_msg += string(buf);
}
req_msg += "</query_files>\n";
@ -178,7 +178,7 @@ int upload_files (
const char* project_url,
const char* authenticator,
vector<string> &paths,
vector<string> &md5s,
vector<string> &boinc_names,
int batch_id,
string &error_msg
) {
@ -190,8 +190,8 @@ int upload_files (
sprintf(buf, "<batch_id>%d</batch_id>\n", batch_id);
req_msg += string(buf);
}
for (unsigned int i=0; i<md5s.size(); i++) {
sprintf(buf, "<md5>%s</md5>\n", md5s[i].c_str());
for (unsigned int i=0; i<boinc_names.size(); i++) {
sprintf(buf, "<md5>%s</md5>\n", boinc_names[i].c_str());
req_msg += string(buf);
}
req_msg += "</upload_files>\n";

View File

@ -28,10 +28,6 @@
#include "parse.h"
using std::string;
using std::vector;
using std::map;
// Input file modes.
// Only LOCAL_STAGED and REMOTE are implemented now.
//
@ -63,25 +59,20 @@ struct INFILE {
struct JOB {
char job_name[256];
string cmdline_args;
vector<INFILE> infiles;
};
struct LOCAL_FILE {
char md5[64];
double nbytes;
std::string cmdline_args;
std::vector<INFILE> infiles;
};
struct JOB_STATUS {
string job_name;
string status;
std::string job_name;
std::string status;
JOB_STATUS(){}
};
struct QUERY_BATCH_SET_REPLY {
double server_time; // server time at start of query
vector<int> batch_sizes; // how many jobs in each of the queried batches
vector<JOB_STATUS> jobs; // the jobs, sequentially
std::vector<int> batch_sizes; // how many jobs in each of the queried batches
std::vector<JOB_STATUS> jobs; // the jobs, sequentially
};
struct OUTFILE {
@ -93,13 +84,13 @@ struct FETCH_OUTPUT_REQ {
char job_name[256];
char dir[256];
bool fetch_all;
string stderr_filename;
vector<OUTFILE> file_descs;
std::string stderr_filename;
std::vector<OUTFILE> file_descs;
};
struct TEMPLATE_DESC {
vector<string> input_files;
vector<string> output_files;
std::vector<std::string> input_files;
std::vector<std::string> output_files;
int parse(XML_PARSER&);
};
@ -114,7 +105,7 @@ struct COMPLETED_JOB_DESC {
int exit_status;
double elapsed_time;
double cpu_time;
string stderr_out;
std::string stderr_out;
int parse(XML_PARSER&);
};
@ -125,19 +116,19 @@ struct COMPLETED_JOB_DESC {
extern int query_files(
const char* project_url,
const char* authenticator,
vector<string> &md5s,
std::vector<std::string> &boinc_names,
int batch_id,
vector<int> &absent_files,
string& error_msg
std::vector<int> &absent_files,
std::string& error_msg
);
extern int upload_files (
const char* project_url,
const char* authenticator,
vector<string> &paths,
vector<string> &md5s,
std::vector<std::string> &paths,
std::vector<std::string> &boinc_names,
int batch_id,
string& error_msg
std::string& error_msg
);
extern int create_batch(
@ -147,7 +138,7 @@ extern int create_batch(
const char* app_name,
double expire_time,
int &batch_id,
string& error_msg
std::string& error_msg
);
extern int submit_jobs(
@ -155,17 +146,17 @@ extern int submit_jobs(
const char* authenticator,
char app_name[256],
int batch_id,
vector<JOB> jobs,
string& error_msg
std::vector<JOB> jobs,
std::string& error_msg
);
extern int estimate_batch(
const char* project_url,
const char* authenticator,
char app_name[256],
vector<JOB> jobs,
std::vector<JOB> jobs,
double& est_makespan,
string& error_msg
std::string& error_msg
);
// Return the short status of the jobs in a given set of batches
@ -175,9 +166,9 @@ extern int query_batch_set(
const char* project_url,
const char* authenticator,
double min_mod_time,
vector<string> &batch_names,
std::vector<std::string> &batch_names,
QUERY_BATCH_SET_REPLY& reply,
string& error_msg
std::string& error_msg
);
struct BATCH_STATUS {
@ -204,8 +195,8 @@ struct BATCH_STATUS {
extern int query_batches(
const char* project_url,
const char* authenticator,
vector<BATCH_STATUS>& batches,
string& error_msg
std::vector<BATCH_STATUS>& batches,
std::string& error_msg
);
struct JOB_STATE {
@ -227,8 +218,8 @@ extern int query_batch(
const char* authenticator,
int batch_id,
const char batch_name[256],
vector<JOB_STATE>& jobs,
string& error_msg
std::vector<JOB_STATE>& jobs,
std::string& error_msg
);
extern int get_output_file(
@ -237,14 +228,14 @@ extern int get_output_file(
const char* job_name,
int file_num,
const char* dst_path,
string& error_msg
std::string& error_msg
);
extern int abort_jobs(
const char* project_url,
const char* authenticator,
vector<string> &job_names,
string& error_msg
std::vector<std::string> &job_names,
std::string& error_msg
);
extern int query_completed_job(
@ -252,7 +243,7 @@ extern int query_completed_job(
const char* authenticator,
const char* job_name,
COMPLETED_JOB_DESC&,
string& error_msg
std::string& error_msg
);
extern int get_templates(
@ -261,14 +252,14 @@ extern int get_templates(
const char* app_name, // either this
const char* job_name, // or this must be non-NULL
TEMPLATE_DESC&,
string& error_msg
std::string& error_msg
);
extern int retire_batch(
const char* project_url,
const char* authenticator,
const char* batch_name,
string& error_msg
std::string& error_msg
);
extern int set_expire_time(
@ -276,12 +267,12 @@ extern int set_expire_time(
const char* authenticator,
const char* batch_name,
double expire_time,
string& error_msg
std::string& error_msg
);
extern int ping_server(
const char* project_url,
string& error_msg
std::string& error_msg
);
#endif

View File

@ -68,6 +68,12 @@ struct SUBMIT_REQ {
int batch_id;
};
struct LOCAL_FILE {
char boinc_name[256];
// the MD5 followed by filename extension if any
double nbytes;
};
// represents a command.
//
struct COMMAND {
@ -106,8 +112,22 @@ struct COMMAND {
vector<COMMAND*> commands;
int compute_md5(string path, LOCAL_FILE& f) {
return md5_file(path.c_str(), f.md5, f.nbytes);
void filename_extension(const char* path, char* ext) {
const char* p = strrchr(path, '/');
if (!p) p = path;
const char* q = strrchr(p, '.');
if (q) {
strcpy(ext, q);
} else {
strcpy(ext, "");
}
}
int compute_boinc_name(string path, LOCAL_FILE& f) {
char md5[64], ext[256];
return md5_file(path.c_str(), md5, f.nbytes);
filename_extension(path.c_str(), ext);
sprintf(f.boinc_name, "%s%s", md5, ext);
}
const char *escape_str(const string &str) {
@ -151,14 +171,14 @@ int process_input_files(SUBMIT_REQ& req, string& error_msg) {
}
}
// compute the MD5s of these files,
// and make a map from path to MD5 and size (LOCAL_FILE)
// compute the BOINC names (md5.ext) of these files,
// and make a map from path to BOINC name and size (LOCAL_FILE)
//
set<string>::iterator iter = unique_paths.begin();
while (iter != unique_paths.end()) {
string s = *iter;
LOCAL_FILE lf;
retval = compute_md5(s, lf);
retval = compute_boinc_name(s, lf);
if (retval) return retval;
local_files.insert(std::pair<string, LOCAL_FILE>(s, lf));
++iter;
@ -168,18 +188,18 @@ int process_input_files(SUBMIT_REQ& req, string& error_msg) {
//
map<string, LOCAL_FILE>::iterator map_iter;
map_iter = local_files.begin();
vector<string> md5s, paths;
vector<string> boinc_names, paths;
vector<int> absent_files;
while (map_iter != local_files.end()) {
LOCAL_FILE lf = map_iter->second;
paths.push_back(map_iter->first);
md5s.push_back(lf.md5);
boinc_names.push_back(lf.boinc_name);
++map_iter;
}
retval = query_files(
project_url,
authenticator,
md5s,
boinc_names,
req.batch_id,
absent_files,
error_msg
@ -188,17 +208,17 @@ int process_input_files(SUBMIT_REQ& req, string& error_msg) {
// upload the missing files.
//
vector<string> upload_md5s, upload_paths;
vector<string> upload_boinc_names, upload_paths;
for (unsigned int i=0; i<absent_files.size(); i++) {
int j = absent_files[i];
upload_md5s.push_back(md5s[j]);
upload_boinc_names.push_back(boinc_names[j]);
upload_paths.push_back(paths[j]);
}
retval = upload_files(
project_url,
authenticator,
upload_paths,
upload_md5s,
upload_boinc_names,
req.batch_id,
error_msg
);
@ -212,7 +232,7 @@ int process_input_files(SUBMIT_REQ& req, string& error_msg) {
INFILE& infile = job.infiles[j];
map<string, LOCAL_FILE>::iterator iter = local_files.find(infile.src_path);
LOCAL_FILE& lf = iter->second;
sprintf(infile.physical_name, "jf_%s", lf.md5);
sprintf(infile.physical_name, "jf_%s", lf.boinc_name);
}
}