diff --git a/checkin_notes b/checkin_notes index e6bec394a9..e2ac6317f4 100755 --- a/checkin_notes +++ b/checkin_notes @@ -22038,3 +22038,30 @@ Bruce 31 Dec 2004 ops/ show_log.php + +David 1 Jan 2005 + - Deprecated the bad hash function used for the + upload/download directory hierarchies. + It wasn't uniform. + Use MD5 instead. + + Kept the old one around for a the time being. + dir_hier_path() and dir_hier_url() take a new arg + saying whether to use new or old hash. + The file_deleter and validator try the new hash, then the old. + + doc/ + busy_work.php (new) + tools_work.php + lib/ + util.C,h + sched/ + file_deleter.C + file_upload_handler.C + make_work.C + validate_util.C + wu_check.C + tools/ + backend_lib.C + dir_hier_move.C + dir_hier_path.C diff --git a/doc/busy_work.php b/doc/busy_work.php new file mode 100644 index 0000000000..3a50b652cc --- /dev/null +++ b/doc/busy_work.php @@ -0,0 +1,21 @@ + +The daemon program +
+busy_work -wu_name name -cushion N
+
+creates an endless supply of work. +Specifically, it creates copies of the given work unit +as needed to maintain a supply of at least N unsent results. + +

+This is useful for testing purposes. + +"; + +page_tail(); +?> diff --git a/doc/tools_work.php b/doc/tools_work.php index 4c44ccda25..dd30c1b85f 100644 --- a/doc/tools_work.php +++ b/doc/tools_work.php @@ -2,15 +2,43 @@ require_once("docutil.php"); page_head("Generating work"); echo " -Workunits and results can be created using either a utility program -or a C++ function. -

-Workunits and results are described by template files, -with placeholders for their input and output files. -

Workunit template files

+As described earlier, a workunit +represents the inputs to a computation. +The steps in creating a workunit are: + +Once this is done, BOINC takes over: +it creates one or more results for the workunit, +distributes them to client hosts, +collects the output files, +finds a canonical result, +assimilates the canonical result, +and deletes files. +

-A WU template file has the form +During the testing phase of a project, +you can use the busy_work daemon +to replicate a given workunit as needed to maintain +a constant supply of work. +This is useful while testing and debugging the application. + + +

Workunit and result template files

+

+A workunit template file has the form

",htmlspecialchars("
 
     0
@@ -61,7 +89,7 @@ Within a <file_ref> element,
 <file_number>x</file_number> is replaced with an element
 giving the filename.
 
-

Result template files

+

A result template file has the form

", htmlspecialchars("
@@ -90,8 +118,27 @@ the ordinal number of the result (0, 1, ...).
 <UPLOAD_URL/> is replaced with the upload URL.
 
 

- -

Command-line interface

+ +

Placing input files in the download directory

+ +If you're a flat download directory, just place input files in that directory. +If you're using
hierarchical upload/download directories, +you must place each input file in the appropriate directory; +the directory is determined by the file's name. +To find this directory, call the C++ function +
+dir_hier_path(
+
+If you're using scripts, you can invoke the program +
+
+ + +

Creating workunit records

+

+Workunits can be created using either a script +(using the create_workprogram) +or a program (using the create_work() function).

The utility program is

@@ -126,7 +173,6 @@ If the -config_dir option is not used,
 the program must be run in the project root directory;
 it looks for config.xml there, and uses its contents.
 
-

C++ function interface

The C++ library (crypt.C, backend_lib.C,h) provides the functions:

@@ -166,17 +212,6 @@ delay_bound
 
All other fields should be zeroed. -
- -

Make_work

-

-The daemon program -

-make_work -wu_name name -cushion N
-
-can be used to create an endless supply of work. -It will create copies of the given work unit -as needed to maintain a supply of at least N unsent results. "; page_tail(); diff --git a/lib/util.C b/lib/util.C index 1761b156a1..09ed4d2b9e 100755 --- a/lib/util.C +++ b/lib/util.C @@ -753,47 +753,74 @@ void update_average( avg_time = now; } -int dir_hier_path( - const char* filename, const char* root, int fanout, char* result, - bool create -) { +static void filename_hash_old(const char* filename, int fanout, char* dir) { int sum=0; - char dir[256]; + + while (*p) sum += *p++; + sum %= fanout; + sprintf(dir, "%x", sum); +} + +static void filename_hash(const char* filename, int fanout, char* dir) { + std::string s = md5_string(p, strlen(p)); + int x = strtol(s.substr(1, 7).c_str(), 0, 16); + return x % fanout; +} + +// given a filename, compute its path in a directory hierarchy +// If create is true, create the directory if needed +// NOTE: this first time around I used a bad hash function. +// During the period of transition to the good hash function, +// programs to look for files (validator, assimilator, file deleter) +// will have to try both the old and new variants. +// We can phase this out after everyone is caught up. +// +int dir_hier_path( + const char* filename, const char* root, int fanout, bool new_hash, + char* path, bool create +) { + char dir[256], dirpath[256]; int retval; if (fanout==0) { - sprintf(result, "%s/%s", root, filename); + sprintf(path, "%s/%s", root, filename); return 0; } - char* p = (char*)filename; - while (*p) sum += *p++; - sum %= fanout; - sprintf(dir, "%s/%x", root, sum); + if (new_hash) { + filename_hash(filename, fanout, dir); + } else { + filename_hash_old(filename, fanout, dir); + } + + sprintf(dirpath, "%s/%s", root, dir); if (create) { - retval = boinc_mkdir(dir); + retval = boinc_mkdir(dirpath); if (retval && (retval != EEXIST)) { return ERR_MKDIR; } } - sprintf(result, "%s/%s", dir, filename); + sprintf(path, "%s/%s", dirpath, filename); return 0; } int dir_hier_url( - const char* filename, const char* root, int fanout, char* result + const char* filename, const char* root, int fanout, bool new_hash + char* result ) { - int sum=0; + char dir[256]; if (fanout==0) { sprintf(result, "%s/%s", root, filename); return 0; } - char* p = (char*)filename; - while (*p) sum += *p++; - sum %= fanout; - sprintf(result, "%s/%x/%s", root, sum, filename); + if (new_hash) { + filename_hash(filename, fanout, dir); + } else { + filename_hash_old(filename, fanout, dir); + } + sprintf(result, "%s/%s/%s", root, dir, filename); return 0; } diff --git a/lib/util.h b/lib/util.h index 0320f1d034..282db1def8 100755 --- a/lib/util.h +++ b/lib/util.h @@ -122,14 +122,15 @@ extern void update_average(double, double, double, double&, double&); // convert filename to path in a hierarchical directory system // extern int dir_hier_path( - const char* filename, const char* root, int fanout, char* result, - bool create=false + const char* filename, const char* root, int fanout, bool new_hash, + char* result, bool create=false ); // convert filename to URL in a hierarchical directory system // extern int dir_hier_url( - const char* filename, const char* root, int fanout, char* result + const char* filename, const char* root, int fanout, bool new_hash, + char* result ); extern int boinc_calling_thread_cpu_time(double&); diff --git a/sched/file_deleter.C b/sched/file_deleter.C index 0e07ad0dc8..dc2f44c67f 100644 --- a/sched/file_deleter.C +++ b/sched/file_deleter.C @@ -45,7 +45,7 @@ int wu_delete_files(WORKUNIT& wu) { char* p; char filename[256], pathname[256], buf[LARGE_BLOB_SIZE]; bool no_delete=false; - int count_deleted = 0, retval; + int count_deleted = 0, retval, ret1, ret2; safe_strcpy(buf, wu.xml_doc); @@ -60,11 +60,17 @@ int wu_delete_files(WORKUNIT& wu) { no_delete = true; } else if (match_tag(p, "")) { if (!no_delete) { - retval = dir_hier_path( - filename, config.download_dir, config.uldl_dir_fanout, + // TODO: get rid of the old hash in about 3/2005 + // + ret1 = dir_hier_path( + filename, config.download_dir, config.uldl_dir_fanout, true, pathname ); - if (retval) { + ret2 = dir_hier_path( + filename, config.download_dir, config.uldl_dir_fanout, false, + pathname + ); + if (ret1 && ret2) { log_messages.printf(SCHED_MSG_LOG::CRITICAL, "[%s] dir_hier_path: %d\n", wu.name, retval); } else { log_messages.printf(SCHED_MSG_LOG::NORMAL, "[%s] deleting download/%s\n", wu.name, filename); @@ -87,7 +93,7 @@ int result_delete_files(RESULT& result) { char* p; char filename[256], pathname[256], buf[LARGE_BLOB_SIZE]; bool no_delete=false; - int count_deleted = 0, retval; + int count_deleted = 0, retval, ret1, ret2; safe_strcpy(buf, result.xml_doc_in); p = strtok(buf,"\n"); @@ -100,14 +106,17 @@ int result_delete_files(RESULT& result) { no_delete = true; } else if (match_tag(p, "")) { if (!no_delete) { - retval = dir_hier_path( - filename, config.upload_dir, config.uldl_dir_fanout, + ret1 = dir_hier_path( + filename, config.upload_dir, config.uldl_dir_fanout, true, pathname ); - if (retval) { + ret2 = dir_hier_path( + filename, config.upload_dir, config.uldl_dir_fanout, false, + pathname + ); + if (ret1 && ret2) { log_messages.printf(SCHED_MSG_LOG::CRITICAL, - "[%s] dir_hier_path: %d\n", - result.name, retval + "[%s] dir_hier_path: %d\n", result.name, retval ); } else { retval = unlink(pathname); diff --git a/sched/file_upload_handler.C b/sched/file_upload_handler.C index d8a48aa216..c73578a69d 100644 --- a/sched/file_upload_handler.C +++ b/sched/file_upload_handler.C @@ -279,7 +279,7 @@ int handle_file_upload(FILE* in, R_RSA_PUBLIC_KEY& key) { } retval = dir_hier_path( - file_info.name, config.upload_dir, config.uldl_dir_fanout, + file_info.name, config.upload_dir, config.uldl_dir_fanout, true, path, true ); log_messages.printf( @@ -326,7 +326,7 @@ int handle_get_file_size(char* file_name) { // TODO: check to ensure path doesn't point somewhere bad // Use 64-bit variant // - dir_hier_path(file_name, config.upload_dir, config.uldl_dir_fanout, path); + dir_hier_path(file_name, config.upload_dir, config.uldl_dir_fanout, true, path); retval = stat( path, &sbuf ); if (retval && errno != ENOENT) { log_messages.printf(SCHED_MSG_LOG::DEBUG, "handle_get_file_size(): [%s] returning error\n", file_name); diff --git a/sched/make_work.C b/sched/make_work.C index ac7ed3f206..d3c4352789 100644 --- a/sched/make_work.C +++ b/sched/make_work.C @@ -177,7 +177,8 @@ void make_work() { p = strtok(buf, "\n"); strcpy(file_name, ""); - // make new copies of the WU's input files + // make new hard links to the WU's input files + // (don't actually copy files) // while (p) { if (parse_str(p, "", file_name, sizeof(file_name))) { @@ -185,11 +186,11 @@ void make_work() { new_file_name, "%s__%d_%d", file_name, start_time, seqno++ ); dir_hier_path( - file_name, config.download_dir, config.uldl_dir_fanout, + file_name, config.download_dir, config.uldl_dir_fanout, true, pathname ); dir_hier_path( - new_file_name, config.download_dir, config.uldl_dir_fanout, + new_file_name, config.download_dir, config.uldl_dir_fanout, true, new_pathname, true ); sprintf(command,"ln %s %s", pathname, new_pathname); diff --git a/sched/validate_util.C b/sched/validate_util.C index 466471d88d..c6d788b22f 100644 --- a/sched/validate_util.C +++ b/sched/validate_util.C @@ -46,7 +46,10 @@ int get_output_file_path(RESULT const& result, string& path_str) { flag = parse_str(result.xml_doc_out, "", buf, sizeof(buf)); if (!flag) return ERR_XML_PARSE; - dir_hier_path(buf, config.upload_dir, config.uldl_dir_fanout, path); + dir_hier_path(buf, config.upload_dir, config.uldl_dir_fanout, true, path); + if (!boinc_file_exists(path)) { + dir_hier_path(buf, config.upload_dir, config.uldl_dir_fanout, false, path); + } path_str = path; return 0; } diff --git a/sched/wu_check.C b/sched/wu_check.C index 8752a2ca3e..175e6c88d1 100644 --- a/sched/wu_check.C +++ b/sched/wu_check.C @@ -43,7 +43,7 @@ int get_file_path(WORKUNIT& wu, char* path) { bool flag; flag = parse_str(wu.xml_doc, "", buf, sizeof(buf)); if (!flag) return ERR_XML_PARSE; - dir_hier_path(buf, config.download_dir, config.uldl_dir_fanout, path); + dir_hier_path(buf, config.download_dir, config.uldl_dir_fanout, true, path); return 0; } diff --git a/tools/backend_lib.C b/tools/backend_lib.C index 3567ae9550..543614c969 100644 --- a/tools/backend_lib.C +++ b/tools/backend_lib.C @@ -183,7 +183,8 @@ static int process_wu_template( } dir_hier_path( infiles[file_number], config.download_dir, - config.uldl_dir_fanout, path, true + config.uldl_dir_fanout, true, + path, true ); if (!boinc_file_exists(path)) { sprintf(top_download_path, @@ -209,7 +210,8 @@ static int process_wu_template( dir_hier_url( infiles[file_number], config.download_url, - config.uldl_dir_fanout, url + config.uldl_dir_fanout, true, + url ); sprintf(buf, " %s\n" @@ -393,18 +395,15 @@ int create_result( int check_files(char** infiles, int ninfiles, SCHED_CONFIG& config) { int i; char path[256]; - FILE* f; for (i=0; i