replace bad filename hashing function

svn path=/trunk/boinc/; revision=4975
This commit is contained in:
David Anderson 2005-01-02 07:44:40 +00:00
parent 0704f0882d
commit 66950e2822
13 changed files with 195 additions and 72 deletions

View File

@ -22038,3 +22038,30 @@ Bruce 31 Dec 2004
ops/ ops/
show_log.php show_log.php
David 1 Jan 2005
- Deprecated the bad hash function used for the
upload/download directory hierarchies.
It wasn't uniform.
Use MD5 instead.
Kept the old one around for a the time being.
dir_hier_path() and dir_hier_url() take a new arg
saying whether to use new or old hash.
The file_deleter and validator try the new hash, then the old.
doc/
busy_work.php (new)
tools_work.php
lib/
util.C,h
sched/
file_deleter.C
file_upload_handler.C
make_work.C
validate_util.C
wu_check.C
tools/
backend_lib.C
dir_hier_move.C
dir_hier_path.C

21
doc/busy_work.php Normal file
View File

@ -0,0 +1,21 @@
<?php
require_once("docutil.php");
page_head("Creating busy work");
echo "
<p>
The daemon program
<pre>
busy_work -wu_name name -cushion N
</pre>
creates an endless supply of work.
Specifically, it creates copies of the given work unit
as needed to maintain a supply of at least N unsent results.
<p>
This is useful for testing purposes.
";
page_tail();
?>

View File

@ -2,15 +2,43 @@
require_once("docutil.php"); require_once("docutil.php");
page_head("Generating work"); page_head("Generating work");
echo " echo "
Workunits and results can be created using either a utility program
or a C++ function.
<p>
Workunits and results are described by <b>template files</b>,
with placeholders for their input and output files.
<h3>Workunit template files</h3> As described earlier, a <a href=work.php>workunit</a>
represents the inputs to a computation.
The steps in creating a workunit are:
<ul>
<li> Write XML 'template files' that describe the workunit
and its corresponding results.
Generally the same templates will be used for
a large number work of workunits.
<li> Create the workunit's input file(s)
and place them in the download directory.
<li> Call a BOINC function that creates a
database record for the workunit.
</ul>
Once this is done, BOINC takes over:
it creates one or more results for the workunit,
distributes them to client hosts,
collects the output files,
finds a canonical result,
assimilates the canonical result,
and deletes files.
<p> <p>
A WU template file has the form During the testing phase of a project,
you can use the <a href=busy_work.php>busy_work</a> daemon
to replicate a given workunit as needed to maintain
a constant supply of work.
This is useful while testing and debugging the application.
<h2>Workunit and result template files</h2>
<p>
A workunit template file has the form
<pre>",htmlspecialchars(" <pre>",htmlspecialchars("
<file_info> <file_info>
<number>0</number> <number>0</number>
@ -61,7 +89,7 @@ Within a &lt;file_ref> element,
&lt;file_number>x&lt;/file_number> is replaced with an element &lt;file_number>x&lt;/file_number> is replaced with an element
giving the filename. giving the filename.
</ul> </ul>
<h3>Result template files</h3>
<p> <p>
A result template file has the form A result template file has the form
<pre>", htmlspecialchars(" <pre>", htmlspecialchars("
@ -90,8 +118,27 @@ the ordinal number of the result (0, 1, ...).
&lt;UPLOAD_URL/> is replaced with the upload URL. &lt;UPLOAD_URL/> is replaced with the upload URL.
</ul> </ul>
<p> <p>
<a name=cmdline>
<h3>Command-line interface</h3> <h2>Placing input files in the download directory</h2>
If you're a flat download directory, just place input files in that directory.
If you're using <a href=hier_dir.php>hierarchical upload/download directories</a>,
you must place each input file in the appropriate directory;
the directory is determined by the file's name.
To find this directory, call the C++ function
<pre>
dir_hier_path(
</pre>
If you're using scripts, you can invoke the program
<pre>
</pre>
<h2>Creating workunit records</h2>
<p>
Workunits can be created using either a script
(using the <code>create_work</code>program)
or a program (using the <code>create_work()</code> function).
<p> <p>
The utility program is The utility program is
<pre> <pre>
@ -126,7 +173,6 @@ If the -config_dir option is not used,
the program must be run in the project root directory; the program must be run in the project root directory;
it looks for <b>config.xml</b> there, and uses its contents. it looks for <b>config.xml</b> there, and uses its contents.
<h3>C++ function interface</h3>
<p> <p>
The C++ library (crypt.C, backend_lib.C,h) provides the functions: The C++ library (crypt.C, backend_lib.C,h) provides the functions:
<pre> <pre>
@ -166,17 +212,6 @@ delay_bound
</pre> </pre>
All other fields should be zeroed. All other fields should be zeroed.
<hr>
<a name=make_work>
<h3>Make_work</h3>
<p>
The daemon program
<pre>
make_work -wu_name name -cushion N
</pre>
can be used to create an endless supply of work.
It will create copies of the given work unit
as needed to maintain a supply of at least N unsent results.
"; ";
page_tail(); page_tail();

View File

@ -753,47 +753,74 @@ void update_average(
avg_time = now; avg_time = now;
} }
int dir_hier_path( static void filename_hash_old(const char* filename, int fanout, char* dir) {
const char* filename, const char* root, int fanout, char* result,
bool create
) {
int sum=0; int sum=0;
char dir[256];
while (*p) sum += *p++;
sum %= fanout;
sprintf(dir, "%x", sum);
}
static void filename_hash(const char* filename, int fanout, char* dir) {
std::string s = md5_string(p, strlen(p));
int x = strtol(s.substr(1, 7).c_str(), 0, 16);
return x % fanout;
}
// given a filename, compute its path in a directory hierarchy
// If create is true, create the directory if needed
// NOTE: this first time around I used a bad hash function.
// During the period of transition to the good hash function,
// programs to look for files (validator, assimilator, file deleter)
// will have to try both the old and new variants.
// We can phase this out after everyone is caught up.
//
int dir_hier_path(
const char* filename, const char* root, int fanout, bool new_hash,
char* path, bool create
) {
char dir[256], dirpath[256];
int retval; int retval;
if (fanout==0) { if (fanout==0) {
sprintf(result, "%s/%s", root, filename); sprintf(path, "%s/%s", root, filename);
return 0; return 0;
} }
char* p = (char*)filename; if (new_hash) {
while (*p) sum += *p++; filename_hash(filename, fanout, dir);
sum %= fanout; } else {
sprintf(dir, "%s/%x", root, sum); filename_hash_old(filename, fanout, dir);
}
sprintf(dirpath, "%s/%s", root, dir);
if (create) { if (create) {
retval = boinc_mkdir(dir); retval = boinc_mkdir(dirpath);
if (retval && (retval != EEXIST)) { if (retval && (retval != EEXIST)) {
return ERR_MKDIR; return ERR_MKDIR;
} }
} }
sprintf(result, "%s/%s", dir, filename); sprintf(path, "%s/%s", dirpath, filename);
return 0; return 0;
} }
int dir_hier_url( int dir_hier_url(
const char* filename, const char* root, int fanout, char* result const char* filename, const char* root, int fanout, bool new_hash
char* result
) { ) {
int sum=0; char dir[256];
if (fanout==0) { if (fanout==0) {
sprintf(result, "%s/%s", root, filename); sprintf(result, "%s/%s", root, filename);
return 0; return 0;
} }
char* p = (char*)filename; if (new_hash) {
while (*p) sum += *p++; filename_hash(filename, fanout, dir);
sum %= fanout; } else {
sprintf(result, "%s/%x/%s", root, sum, filename); filename_hash_old(filename, fanout, dir);
}
sprintf(result, "%s/%s/%s", root, dir, filename);
return 0; return 0;
} }

View File

@ -122,14 +122,15 @@ extern void update_average(double, double, double, double&, double&);
// convert filename to path in a hierarchical directory system // convert filename to path in a hierarchical directory system
// //
extern int dir_hier_path( extern int dir_hier_path(
const char* filename, const char* root, int fanout, char* result, const char* filename, const char* root, int fanout, bool new_hash,
bool create=false char* result, bool create=false
); );
// convert filename to URL in a hierarchical directory system // convert filename to URL in a hierarchical directory system
// //
extern int dir_hier_url( extern int dir_hier_url(
const char* filename, const char* root, int fanout, char* result const char* filename, const char* root, int fanout, bool new_hash,
char* result
); );
extern int boinc_calling_thread_cpu_time(double&); extern int boinc_calling_thread_cpu_time(double&);

View File

@ -45,7 +45,7 @@ int wu_delete_files(WORKUNIT& wu) {
char* p; char* p;
char filename[256], pathname[256], buf[LARGE_BLOB_SIZE]; char filename[256], pathname[256], buf[LARGE_BLOB_SIZE];
bool no_delete=false; bool no_delete=false;
int count_deleted = 0, retval; int count_deleted = 0, retval, ret1, ret2;
safe_strcpy(buf, wu.xml_doc); safe_strcpy(buf, wu.xml_doc);
@ -60,11 +60,17 @@ int wu_delete_files(WORKUNIT& wu) {
no_delete = true; no_delete = true;
} else if (match_tag(p, "</file_info>")) { } else if (match_tag(p, "</file_info>")) {
if (!no_delete) { if (!no_delete) {
retval = dir_hier_path( // TODO: get rid of the old hash in about 3/2005
filename, config.download_dir, config.uldl_dir_fanout, //
ret1 = dir_hier_path(
filename, config.download_dir, config.uldl_dir_fanout, true,
pathname pathname
); );
if (retval) { ret2 = dir_hier_path(
filename, config.download_dir, config.uldl_dir_fanout, false,
pathname
);
if (ret1 && ret2) {
log_messages.printf(SCHED_MSG_LOG::CRITICAL, "[%s] dir_hier_path: %d\n", wu.name, retval); log_messages.printf(SCHED_MSG_LOG::CRITICAL, "[%s] dir_hier_path: %d\n", wu.name, retval);
} else { } else {
log_messages.printf(SCHED_MSG_LOG::NORMAL, "[%s] deleting download/%s\n", wu.name, filename); log_messages.printf(SCHED_MSG_LOG::NORMAL, "[%s] deleting download/%s\n", wu.name, filename);
@ -87,7 +93,7 @@ int result_delete_files(RESULT& result) {
char* p; char* p;
char filename[256], pathname[256], buf[LARGE_BLOB_SIZE]; char filename[256], pathname[256], buf[LARGE_BLOB_SIZE];
bool no_delete=false; bool no_delete=false;
int count_deleted = 0, retval; int count_deleted = 0, retval, ret1, ret2;
safe_strcpy(buf, result.xml_doc_in); safe_strcpy(buf, result.xml_doc_in);
p = strtok(buf,"\n"); p = strtok(buf,"\n");
@ -100,14 +106,17 @@ int result_delete_files(RESULT& result) {
no_delete = true; no_delete = true;
} else if (match_tag(p, "</file_info>")) { } else if (match_tag(p, "</file_info>")) {
if (!no_delete) { if (!no_delete) {
retval = dir_hier_path( ret1 = dir_hier_path(
filename, config.upload_dir, config.uldl_dir_fanout, filename, config.upload_dir, config.uldl_dir_fanout, true,
pathname pathname
); );
if (retval) { ret2 = dir_hier_path(
filename, config.upload_dir, config.uldl_dir_fanout, false,
pathname
);
if (ret1 && ret2) {
log_messages.printf(SCHED_MSG_LOG::CRITICAL, log_messages.printf(SCHED_MSG_LOG::CRITICAL,
"[%s] dir_hier_path: %d\n", "[%s] dir_hier_path: %d\n", result.name, retval
result.name, retval
); );
} else { } else {
retval = unlink(pathname); retval = unlink(pathname);

View File

@ -279,7 +279,7 @@ int handle_file_upload(FILE* in, R_RSA_PUBLIC_KEY& key) {
} }
retval = dir_hier_path( retval = dir_hier_path(
file_info.name, config.upload_dir, config.uldl_dir_fanout, file_info.name, config.upload_dir, config.uldl_dir_fanout, true,
path, true path, true
); );
log_messages.printf( log_messages.printf(
@ -326,7 +326,7 @@ int handle_get_file_size(char* file_name) {
// TODO: check to ensure path doesn't point somewhere bad // TODO: check to ensure path doesn't point somewhere bad
// Use 64-bit variant // Use 64-bit variant
// //
dir_hier_path(file_name, config.upload_dir, config.uldl_dir_fanout, path); dir_hier_path(file_name, config.upload_dir, config.uldl_dir_fanout, true, path);
retval = stat( path, &sbuf ); retval = stat( path, &sbuf );
if (retval && errno != ENOENT) { if (retval && errno != ENOENT) {
log_messages.printf(SCHED_MSG_LOG::DEBUG, "handle_get_file_size(): [%s] returning error\n", file_name); log_messages.printf(SCHED_MSG_LOG::DEBUG, "handle_get_file_size(): [%s] returning error\n", file_name);

View File

@ -177,7 +177,8 @@ void make_work() {
p = strtok(buf, "\n"); p = strtok(buf, "\n");
strcpy(file_name, ""); strcpy(file_name, "");
// make new copies of the WU's input files // make new hard links to the WU's input files
// (don't actually copy files)
// //
while (p) { while (p) {
if (parse_str(p, "<name>", file_name, sizeof(file_name))) { if (parse_str(p, "<name>", file_name, sizeof(file_name))) {
@ -185,11 +186,11 @@ void make_work() {
new_file_name, "%s__%d_%d", file_name, start_time, seqno++ new_file_name, "%s__%d_%d", file_name, start_time, seqno++
); );
dir_hier_path( dir_hier_path(
file_name, config.download_dir, config.uldl_dir_fanout, file_name, config.download_dir, config.uldl_dir_fanout, true,
pathname pathname
); );
dir_hier_path( dir_hier_path(
new_file_name, config.download_dir, config.uldl_dir_fanout, new_file_name, config.download_dir, config.uldl_dir_fanout, true,
new_pathname, true new_pathname, true
); );
sprintf(command,"ln %s %s", pathname, new_pathname); sprintf(command,"ln %s %s", pathname, new_pathname);

View File

@ -46,7 +46,10 @@ int get_output_file_path(RESULT const& result, string& path_str) {
flag = parse_str(result.xml_doc_out, "<name>", buf, sizeof(buf)); flag = parse_str(result.xml_doc_out, "<name>", buf, sizeof(buf));
if (!flag) return ERR_XML_PARSE; if (!flag) return ERR_XML_PARSE;
dir_hier_path(buf, config.upload_dir, config.uldl_dir_fanout, path); dir_hier_path(buf, config.upload_dir, config.uldl_dir_fanout, true, path);
if (!boinc_file_exists(path)) {
dir_hier_path(buf, config.upload_dir, config.uldl_dir_fanout, false, path);
}
path_str = path; path_str = path;
return 0; return 0;
} }

View File

@ -43,7 +43,7 @@ int get_file_path(WORKUNIT& wu, char* path) {
bool flag; bool flag;
flag = parse_str(wu.xml_doc, "<name>", buf, sizeof(buf)); flag = parse_str(wu.xml_doc, "<name>", buf, sizeof(buf));
if (!flag) return ERR_XML_PARSE; if (!flag) return ERR_XML_PARSE;
dir_hier_path(buf, config.download_dir, config.uldl_dir_fanout, path); dir_hier_path(buf, config.download_dir, config.uldl_dir_fanout, true, path);
return 0; return 0;
} }

View File

@ -183,7 +183,8 @@ static int process_wu_template(
} }
dir_hier_path( dir_hier_path(
infiles[file_number], config.download_dir, infiles[file_number], config.download_dir,
config.uldl_dir_fanout, path, true config.uldl_dir_fanout, true,
path, true
); );
if (!boinc_file_exists(path)) { if (!boinc_file_exists(path)) {
sprintf(top_download_path, sprintf(top_download_path,
@ -209,7 +210,8 @@ static int process_wu_template(
dir_hier_url( dir_hier_url(
infiles[file_number], config.download_url, infiles[file_number], config.download_url,
config.uldl_dir_fanout, url config.uldl_dir_fanout, true,
url
); );
sprintf(buf, sprintf(buf,
" <name>%s</name>\n" " <name>%s</name>\n"
@ -393,18 +395,15 @@ int create_result(
int check_files(char** infiles, int ninfiles, SCHED_CONFIG& config) { int check_files(char** infiles, int ninfiles, SCHED_CONFIG& config) {
int i; int i;
char path[256]; char path[256];
FILE* f;
for (i=0; i<ninfiles; i++) { for (i=0; i<ninfiles; i++) {
dir_hier_path( dir_hier_path(
infiles[i], config.download_dir, config.uldl_dir_fanout, path infiles[i], config.download_dir, config.uldl_dir_fanout, true, path
); );
f = fopen(path, "r"); if (!boinc_file_exists(path)) {
if (f) { return 1;
fclose(f); }
} else {
return 1;
}
} }
return 0; return 0;
} }

View File

@ -26,7 +26,7 @@ int main(int argc, char** argv) {
DirScanner scanner(src_dir); DirScanner scanner(src_dir);
while (scanner.scan(filename)) { while (scanner.scan(filename)) {
retval = dir_hier_path(filename.c_str(), dst_dir, fanout, dst_path, true); retval = dir_hier_path(filename.c_str(), dst_dir, fanout, true, dst_path, true);
if (retval) { if (retval) {
fprintf(stderr, "dir_hier_path: %d\n", retval); fprintf(stderr, "dir_hier_path: %d\n", retval);
exit(1); exit(1);

View File

@ -14,7 +14,7 @@ int main(int argc, char** argv) {
exit(1); exit(1);
} }
dir_hier_path(argv[1], "", config.uldl_dir_fanout, path); dir_hier_path(argv[1], "", config.uldl_dir_fanout, true, path);
printf("path: %s%s\n", config.download_dir, path); printf("path: %s%s\n", config.download_dir, path);
} }