// This file is part of BOINC. // http://boinc.berkeley.edu // Copyright (C) 2011 University of California // // BOINC is free software; you can redistribute it and/or modify it // under the terms of the GNU Lesser General Public License // as published by the Free Software Foundation, // either version 3 of the License, or (at your option) any later version. // // BOINC is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // See the GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License // along with BOINC. If not, see . // process_input_template(): // fill in the workunit's XML document (wu.xml_doc) // by scanning the input template, macro-substituting the input files, // and putting in the command line element and additional XML #include #include #include #include #include #include "filesys.h" #include "md5_file.h" #include "str_replace.h" #include "sched_config.h" #include "sched_util.h" #include "process_input_template.h" using std::string; using std::vector; // look for file named FILENAME.md5 containing md5sum and length. // If found, and newer mod time than file, // read md5 sum and file length from it, and return true. // Else return false // // See checkin notes Dec 30 2004 // static bool got_md5_info( const char *path, char *md5data, double *nbytes ) { char md5name[512]; struct stat md5stat, filestat; char endline='\0'; sprintf(md5name, "%s.md5", path); // get mod times for file // if (stat(path, &filestat)) { return false; } // get mod time for md5 cache file // // coverity[toctou] if (stat(md5name, &md5stat)) { return false; } // if cached md5 newer, then open it #ifndef _USING_FCGI_ FILE *fp=fopen(md5name, "r"); #else FCGI_FILE *fp=FCGI::fopen(md5name, "r"); #endif if (!fp) { return false; } // read two quantities: md5 sum and length. // If we can't read these, or there is MORE stuff in the file, // it's not an md5 cache file // int n = fscanf(fp, "%s %lf%c", md5data, nbytes, &endline); int c = fgetc(fp); fclose(fp); if ((n != 3) || (endline !='\n') || (c != EOF)) { fprintf(stderr, "bad MD5 cache file %s; remove it and stage file again\n", md5name); return false; } // if this is one of our cached md5 files, but it's OLDER than the // data file which it supposedly corresponds to, delete it. // if (md5stat.st_mtime element for workunit XML doc, // based on the input template and list of variable files // // Inputs: // xp: parser for input template // var_infiles: list of files descs passed to create_work (i.e. var files) // // Outputs: // infiles: vector (appended to) of all input files // out: // // Actions: // as an input file for create_work. // out: append the element for the WU XML doc // If not a constant file: // increment nfiles_parsed // static int process_file_info( XML_PARSER& xp, SCHED_CONFIG& config_loc, vector &var_infiles, string& out, vector &infiles, int& n_var_infiles ) { vector urls; bool gzip = false; int retval, itemp; double nbytes, nbytesdef = -1, gzipped_nbytes; string md5str, urlstr, tmpstr; char physical_name[256]; char buf[BLOB_SIZE], path[MAXPATHLEN], top_download_path[MAXPATHLEN]; char gzip_path[MAXPATHLEN]; char md5[33], url[256], gzipped_url[256], buf2[256]; INFILE_DESC var_infile, infile; strcpy(physical_name, ""); out += "\n"; while (!xp.get_tag()) { if (xp.parse_bool("gzip", gzip)) { continue; } else if (xp.parse_str("physical_name", physical_name, sizeof(physical_name))) { continue; } else if (xp.parse_string("url", urlstr)) { urls.push_back(urlstr); continue; } else if (xp.parse_int("number", itemp)) { // ignore continue; } else if (xp.parse_string("md5_cksum", md5str)) { continue; } else if (xp.parse_double("nbytes", nbytesdef)) { continue; } else if (xp.parse_double("gzipped_nbytes", gzipped_nbytes)) { continue; } else if (xp.match_tag("/file_info")) { // there are four cases: // - normal file // - constant file // is given // - remote file, specified as create_work() arg // URL, size etc. are given in INFILE_DESC // - remote file, specified in template // URL, size etc. are given in template // if (!strlen(physical_name)) { if (n_var_infiles >= (int)var_infiles.size()) { fprintf(stderr, "Too few var input files given; need at least %d\n", n_var_infiles+1 ); return ERR_XML_PARSE; } var_infile = var_infiles[n_var_infiles]; } if (strlen(physical_name)) { // constant file case // dir_hier_path( physical_name, config_loc.download_dir, config_loc.uldl_dir_fanout, path, true ); if (!got_md5_info(path, md5, &nbytes)) { fprintf(stderr, "missing MD5 info file for %s\n", physical_name ); return ERR_FILE_MISSING; } dir_hier_url( physical_name, config_loc.download_url, config_loc.uldl_dir_fanout, url ); sprintf(buf, " %s\n" " %s\n" " %s\n" " %.0f\n", physical_name, url, md5, nbytes ); strcpy(infile.name, physical_name); strcpy(infile.md5, md5); infile.nbytes = nbytes; } else if (nbytesdef > 0) { // remote file specified in template // if (md5str == "" || urlstr == "") { fprintf(stderr, "All file properties must be defined " "if at least one is defined (url, md5_cksum, nbytes)\n" ); return ERR_XML_PARSE; } if (gzip && !gzipped_nbytes) { fprintf(stderr, "Must specify gzipped_nbytes\n"); return ERR_XML_PARSE; } urlstr = ""; for (unsigned int i=0; i\n"; if (gzip) { urlstr += " " + urls.at(i) + string(var_infile.name) + ".gz\n"; } } sprintf(buf, " %s\n" "%s" " %s\n" " %.0f\n", var_infile.name, urlstr.c_str(), md5str.c_str(), nbytesdef ); if (gzip) { sprintf(buf2, " %.0f\n", gzipped_nbytes ); strcat(buf, buf2); } strcpy(infile.name, var_infile.name); strcpy(infile.md5, md5str.c_str()); infile.nbytes = nbytesdef; } else if (var_infile.is_remote) { // remote file specified in create_work() arg // sprintf(buf2, "jf_%s", var_infile.md5); sprintf(buf, " %s\n" " %s\n" " %s\n" " %.0f\n", buf2, var_infile.url, var_infile.md5, var_infile.nbytes ); strcpy(infile.name, buf2); strcpy(infile.md5, var_infile.md5); infile.nbytes = var_infile.nbytes; } else { // normal case. we need to find file size and MD5; // stage the file if needed // dir_hier_path( var_infile.name, config_loc.download_dir, config_loc.uldl_dir_fanout, path, true ); // if file isn't found in hierarchy, // look for it at top level and copy // if (!boinc_file_exists(path)) { sprintf(top_download_path, "%s/%s",config_loc.download_dir, var_infile.name ); boinc_copy(top_download_path, path); printf("copy %s to %s\n", top_download_path, path); } if (!config_loc.cache_md5_info || !got_md5_info(path, md5, &nbytes)) { retval = md5_file(path, md5, nbytes); if (retval) { fprintf(stderr, "process_input_template: md5_file %s\n", boincerror(retval) ); return retval; } else if (config_loc.cache_md5_info) { write_md5_info(path, md5, nbytes); } } dir_hier_url( var_infile.name, config_loc.download_url, config_loc.uldl_dir_fanout, url ); if (gzip) { sprintf(gzip_path, "%s.gz", path); retval = file_size(gzip_path, gzipped_nbytes); if (retval) { fprintf(stderr, "process_input_template: missing gzip file %s\n", gzip_path ); return ERR_FILE_MISSING; } sprintf(gzipped_url, " %s.gz\n" " %.0f\n", url, gzipped_nbytes ); } else { strcpy(gzipped_url, ""); } sprintf(buf, " %s\n" " %s\n" "%s" " %s\n" " %.0f\n", var_infile.name, url, gzipped_url, md5, nbytes ); strcpy(infile.name, var_infile.name); strcpy(infile.md5, md5); infile.nbytes = nbytes; } infiles.push_back(infile); if (!strlen(physical_name)) { n_var_infiles++; } out += buf; out += "\n"; break; } else { // copy any other elements from input template to XML doc // retval = xp.copy_element(tmpstr); if (retval) return retval; out += tmpstr; out += "\n"; } } return 0; } // parse the elements, which includes file refs and job params // static int process_workunit( XML_PARSER& xp, WORKUNIT& wu, string& out, vector &infiles, const char* command_line, const char* additional_xml ) { char buf[256], open_name[256]; string tmpstr, cmdline; int retval, n_file_refs=0, itemp; out += "\n"; if (command_line) { //fprintf(stderr, "appending command line: %s\n", command_line); out += "\n"; out += command_line; out += "\n\n"; } while (!xp.get_tag()) { if (xp.match_tag("/workunit")) { if (additional_xml && strlen(additional_xml)) { out += additional_xml; out += "\n"; } out += ""; break; } else if (xp.match_tag("file_ref")) { out += "\n"; bool found_open_name = false, found_copy_file = false; if (n_file_refs >= (int)infiles.size()) { fprintf(stderr, "too many s\n"); return ERR_XML_PARSE; } INFILE_DESC& id = infiles[n_file_refs]; sprintf(buf, " %s\n", id.name); out += buf; while (!xp.get_tag()) { if (xp.parse_str("open_name", open_name, sizeof(open_name))) { sprintf(buf, " %s\n", open_name); out += buf; found_open_name = true; continue; } else if (xp.parse_int("file_number", itemp)) { } else if (xp.match_tag("copy_file/")) { out += " \n"; found_copy_file = true; continue; } else if (xp.match_tag("/file_ref")) { if (!found_open_name && !found_copy_file) { fprintf(stderr, "No open name found and copy_file not specified\n"); return ERR_XML_PARSE; } else if (!found_open_name && found_copy_file) { sprintf(buf, " %s\n", id.name); out += buf; } out += "\n"; break; } else if (xp.parse_string("file_name", tmpstr)) { fprintf(stderr, " ignored in element.\n"); continue; } else { retval = xp.copy_element(tmpstr); if (retval) return retval; out += tmpstr; out += "\n"; } } n_file_refs++; } else if (xp.parse_string("command_line", cmdline)) { if (command_line) { fprintf(stderr, "Can't specify command line twice\n"); return ERR_XML_PARSE; } out += "\n"; out += cmdline; out += "\n\n"; } else if (xp.parse_double("rsc_fpops_est", wu.rsc_fpops_est)) { continue; } else if (xp.parse_double("rsc_fpops_bound", wu.rsc_fpops_bound)) { continue; } else if (xp.parse_double("rsc_memory_bound", wu.rsc_memory_bound)) { continue; } else if (xp.parse_double("rsc_bandwidth_bound", wu.rsc_bandwidth_bound)) { continue; } else if (xp.parse_double("rsc_disk_bound", wu.rsc_disk_bound)) { continue; } else if (xp.parse_int("batch", wu.batch)) { continue; } else if (xp.parse_int("delay_bound", wu.delay_bound)){ continue; } else if (xp.parse_int("min_quorum", wu.min_quorum)) { continue; } else if (xp.parse_int("target_nresults", wu.target_nresults)) { continue; } else if (xp.parse_int("max_error_results", wu.max_error_results)) { continue; } else if (xp.parse_int("max_total_results", wu.max_total_results)) { continue; } else if (xp.parse_int("max_success_results", wu.max_success_results)) { continue; } else if (xp.parse_int("size_class", wu.size_class)) { continue; } else { retval = xp.copy_element(tmpstr); if (retval) return retval; out += tmpstr; out += "\n"; } } if (n_file_refs != (int)infiles.size()) { fprintf(stderr, "#file refs != #file infos\n"); return ERR_XML_PARSE; } return 0; } // fill in the workunit's XML document (wu.xml_doc) // by scanning the input template, macro-substituting the input files, // and putting in the command line element and additional XML // int process_input_template( WORKUNIT& wu, char* tmplate, vector &var_infiles, // files passed as args to create_work SCHED_CONFIG& config_loc, const char* command_line, const char* additional_xml ) { string out; int retval; bool found_workunit=false; vector infiles; // this gets filled in as we scan s int n_var_infiles=0; // number of non-constant infiles scanned // this is an index into var_infiles // "out" is the XML we're creating // out = ""; MIOFILE mf; XML_PARSER xp(&mf); mf.init_buf_read(tmplate); while (!xp.get_tag()) { if (!xp.is_tag) continue; if (xp.match_tag("input_template")) continue; if (xp.match_tag("/input_template")) continue; if (xp.match_tag("file_info")) { retval = process_file_info( xp, config_loc, var_infiles, out, infiles, n_var_infiles ); if (retval) return retval; } else if (xp.match_tag("workunit")) { found_workunit = true; retval = process_workunit( xp, wu, out, infiles, command_line, additional_xml ); if (retval) return retval; } } if (!found_workunit) { fprintf(stderr, "process_input_template: bad WU template - no \n"); return ERR_XML_PARSE; } if (n_var_infiles != (int)var_infiles.size()) { fprintf(stderr, "process_input_template: %d input files given, but template has %d\n", (int)var_infiles.size(), n_var_infiles ); return ERR_XML_PARSE; } if (out.size() > sizeof(wu.xml_doc)-1) { fprintf(stderr, "create_work: WU XML field is too long (%d bytes; max is %d)\n", (int)out.size(), (int)sizeof(wu.xml_doc)-1 ); return ERR_BUFFER_OVERFLOW; } //fprintf(stderr, "copying to xml_doc: %s\n", out.c_str()); safe_strcpy(wu.xml_doc, out.c_str()); return 0; } #ifdef TEST SCHED_CONFIG config_loc; int main(int, char**) { WORKUNIT wu; vector var_infiles; char* tmplate; const char* command_line = "sample command line"; const char* additional_xml; config_loc.parse_file(""); read_file_malloc("template_in", tmplate); INFILE_DESC id; #if 1 strcpy(id.name, "input"); var_infiles.push_back(id); #endif #if 0 strcpy(id.name, "input"); id.is_remote = true; strcpy(id.url, "http://blah.foo"); strcpy(id.md5, "sfslkjsdlfkj"); id.nbytes = 234; var_infiles.push_back(id); #endif int retval = process_input_template( wu, tmplate, var_infiles, config_loc, command_line, additional_xml ); if (retval) { printf("process_input_template returned %d\n", retval); } else { printf("xml_doc:\n%s\n", wu.xml_doc); } } #endif