replace bad filename hashing function

svn path=/trunk/boinc/; revision=4975
This commit is contained in:
David Anderson 2005-01-02 07:44:40 +00:00
parent 0704f0882d
commit 66950e2822
13 changed files with 195 additions and 72 deletions

View File

@ -22038,3 +22038,30 @@ Bruce 31 Dec 2004
ops/
show_log.php
David 1 Jan 2005
- Deprecated the bad hash function used for the
upload/download directory hierarchies.
It wasn't uniform.
Use MD5 instead.
Kept the old one around for a the time being.
dir_hier_path() and dir_hier_url() take a new arg
saying whether to use new or old hash.
The file_deleter and validator try the new hash, then the old.
doc/
busy_work.php (new)
tools_work.php
lib/
util.C,h
sched/
file_deleter.C
file_upload_handler.C
make_work.C
validate_util.C
wu_check.C
tools/
backend_lib.C
dir_hier_move.C
dir_hier_path.C

21
doc/busy_work.php Normal file
View File

@ -0,0 +1,21 @@
<?php
require_once("docutil.php");
page_head("Creating busy work");
echo "
<p>
The daemon program
<pre>
busy_work -wu_name name -cushion N
</pre>
creates an endless supply of work.
Specifically, it creates copies of the given work unit
as needed to maintain a supply of at least N unsent results.
<p>
This is useful for testing purposes.
";
page_tail();
?>

View File

@ -2,15 +2,43 @@
require_once("docutil.php");
page_head("Generating work");
echo "
Workunits and results can be created using either a utility program
or a C++ function.
<p>
Workunits and results are described by <b>template files</b>,
with placeholders for their input and output files.
<h3>Workunit template files</h3>
As described earlier, a <a href=work.php>workunit</a>
represents the inputs to a computation.
The steps in creating a workunit are:
<ul>
<li> Write XML 'template files' that describe the workunit
and its corresponding results.
Generally the same templates will be used for
a large number work of workunits.
<li> Create the workunit's input file(s)
and place them in the download directory.
<li> Call a BOINC function that creates a
database record for the workunit.
</ul>
Once this is done, BOINC takes over:
it creates one or more results for the workunit,
distributes them to client hosts,
collects the output files,
finds a canonical result,
assimilates the canonical result,
and deletes files.
<p>
A WU template file has the form
During the testing phase of a project,
you can use the <a href=busy_work.php>busy_work</a> daemon
to replicate a given workunit as needed to maintain
a constant supply of work.
This is useful while testing and debugging the application.
<h2>Workunit and result template files</h2>
<p>
A workunit template file has the form
<pre>",htmlspecialchars("
<file_info>
<number>0</number>
@ -61,7 +89,7 @@ Within a &lt;file_ref> element,
&lt;file_number>x&lt;/file_number> is replaced with an element
giving the filename.
</ul>
<h3>Result template files</h3>
<p>
A result template file has the form
<pre>", htmlspecialchars("
@ -90,8 +118,27 @@ the ordinal number of the result (0, 1, ...).
&lt;UPLOAD_URL/> is replaced with the upload URL.
</ul>
<p>
<a name=cmdline>
<h3>Command-line interface</h3>
<h2>Placing input files in the download directory</h2>
If you're a flat download directory, just place input files in that directory.
If you're using <a href=hier_dir.php>hierarchical upload/download directories</a>,
you must place each input file in the appropriate directory;
the directory is determined by the file's name.
To find this directory, call the C++ function
<pre>
dir_hier_path(
</pre>
If you're using scripts, you can invoke the program
<pre>
</pre>
<h2>Creating workunit records</h2>
<p>
Workunits can be created using either a script
(using the <code>create_work</code>program)
or a program (using the <code>create_work()</code> function).
<p>
The utility program is
<pre>
@ -126,7 +173,6 @@ If the -config_dir option is not used,
the program must be run in the project root directory;
it looks for <b>config.xml</b> there, and uses its contents.
<h3>C++ function interface</h3>
<p>
The C++ library (crypt.C, backend_lib.C,h) provides the functions:
<pre>
@ -166,17 +212,6 @@ delay_bound
</pre>
All other fields should be zeroed.
<hr>
<a name=make_work>
<h3>Make_work</h3>
<p>
The daemon program
<pre>
make_work -wu_name name -cushion N
</pre>
can be used to create an endless supply of work.
It will create copies of the given work unit
as needed to maintain a supply of at least N unsent results.
";
page_tail();

View File

@ -753,47 +753,74 @@ void update_average(
avg_time = now;
}
int dir_hier_path(
const char* filename, const char* root, int fanout, char* result,
bool create
) {
static void filename_hash_old(const char* filename, int fanout, char* dir) {
int sum=0;
char dir[256];
while (*p) sum += *p++;
sum %= fanout;
sprintf(dir, "%x", sum);
}
static void filename_hash(const char* filename, int fanout, char* dir) {
std::string s = md5_string(p, strlen(p));
int x = strtol(s.substr(1, 7).c_str(), 0, 16);
return x % fanout;
}
// given a filename, compute its path in a directory hierarchy
// If create is true, create the directory if needed
// NOTE: this first time around I used a bad hash function.
// During the period of transition to the good hash function,
// programs to look for files (validator, assimilator, file deleter)
// will have to try both the old and new variants.
// We can phase this out after everyone is caught up.
//
int dir_hier_path(
const char* filename, const char* root, int fanout, bool new_hash,
char* path, bool create
) {
char dir[256], dirpath[256];
int retval;
if (fanout==0) {
sprintf(result, "%s/%s", root, filename);
sprintf(path, "%s/%s", root, filename);
return 0;
}
char* p = (char*)filename;
while (*p) sum += *p++;
sum %= fanout;
sprintf(dir, "%s/%x", root, sum);
if (new_hash) {
filename_hash(filename, fanout, dir);
} else {
filename_hash_old(filename, fanout, dir);
}
sprintf(dirpath, "%s/%s", root, dir);
if (create) {
retval = boinc_mkdir(dir);
retval = boinc_mkdir(dirpath);
if (retval && (retval != EEXIST)) {
return ERR_MKDIR;
}
}
sprintf(result, "%s/%s", dir, filename);
sprintf(path, "%s/%s", dirpath, filename);
return 0;
}
int dir_hier_url(
const char* filename, const char* root, int fanout, char* result
const char* filename, const char* root, int fanout, bool new_hash
char* result
) {
int sum=0;
char dir[256];
if (fanout==0) {
sprintf(result, "%s/%s", root, filename);
return 0;
}
char* p = (char*)filename;
while (*p) sum += *p++;
sum %= fanout;
sprintf(result, "%s/%x/%s", root, sum, filename);
if (new_hash) {
filename_hash(filename, fanout, dir);
} else {
filename_hash_old(filename, fanout, dir);
}
sprintf(result, "%s/%s/%s", root, dir, filename);
return 0;
}

View File

@ -122,14 +122,15 @@ extern void update_average(double, double, double, double&, double&);
// convert filename to path in a hierarchical directory system
//
extern int dir_hier_path(
const char* filename, const char* root, int fanout, char* result,
bool create=false
const char* filename, const char* root, int fanout, bool new_hash,
char* result, bool create=false
);
// convert filename to URL in a hierarchical directory system
//
extern int dir_hier_url(
const char* filename, const char* root, int fanout, char* result
const char* filename, const char* root, int fanout, bool new_hash,
char* result
);
extern int boinc_calling_thread_cpu_time(double&);

View File

@ -45,7 +45,7 @@ int wu_delete_files(WORKUNIT& wu) {
char* p;
char filename[256], pathname[256], buf[LARGE_BLOB_SIZE];
bool no_delete=false;
int count_deleted = 0, retval;
int count_deleted = 0, retval, ret1, ret2;
safe_strcpy(buf, wu.xml_doc);
@ -60,11 +60,17 @@ int wu_delete_files(WORKUNIT& wu) {
no_delete = true;
} else if (match_tag(p, "</file_info>")) {
if (!no_delete) {
retval = dir_hier_path(
filename, config.download_dir, config.uldl_dir_fanout,
// TODO: get rid of the old hash in about 3/2005
//
ret1 = dir_hier_path(
filename, config.download_dir, config.uldl_dir_fanout, true,
pathname
);
if (retval) {
ret2 = dir_hier_path(
filename, config.download_dir, config.uldl_dir_fanout, false,
pathname
);
if (ret1 && ret2) {
log_messages.printf(SCHED_MSG_LOG::CRITICAL, "[%s] dir_hier_path: %d\n", wu.name, retval);
} else {
log_messages.printf(SCHED_MSG_LOG::NORMAL, "[%s] deleting download/%s\n", wu.name, filename);
@ -87,7 +93,7 @@ int result_delete_files(RESULT& result) {
char* p;
char filename[256], pathname[256], buf[LARGE_BLOB_SIZE];
bool no_delete=false;
int count_deleted = 0, retval;
int count_deleted = 0, retval, ret1, ret2;
safe_strcpy(buf, result.xml_doc_in);
p = strtok(buf,"\n");
@ -100,14 +106,17 @@ int result_delete_files(RESULT& result) {
no_delete = true;
} else if (match_tag(p, "</file_info>")) {
if (!no_delete) {
retval = dir_hier_path(
filename, config.upload_dir, config.uldl_dir_fanout,
ret1 = dir_hier_path(
filename, config.upload_dir, config.uldl_dir_fanout, true,
pathname
);
if (retval) {
ret2 = dir_hier_path(
filename, config.upload_dir, config.uldl_dir_fanout, false,
pathname
);
if (ret1 && ret2) {
log_messages.printf(SCHED_MSG_LOG::CRITICAL,
"[%s] dir_hier_path: %d\n",
result.name, retval
"[%s] dir_hier_path: %d\n", result.name, retval
);
} else {
retval = unlink(pathname);

View File

@ -279,7 +279,7 @@ int handle_file_upload(FILE* in, R_RSA_PUBLIC_KEY& key) {
}
retval = dir_hier_path(
file_info.name, config.upload_dir, config.uldl_dir_fanout,
file_info.name, config.upload_dir, config.uldl_dir_fanout, true,
path, true
);
log_messages.printf(
@ -326,7 +326,7 @@ int handle_get_file_size(char* file_name) {
// TODO: check to ensure path doesn't point somewhere bad
// Use 64-bit variant
//
dir_hier_path(file_name, config.upload_dir, config.uldl_dir_fanout, path);
dir_hier_path(file_name, config.upload_dir, config.uldl_dir_fanout, true, path);
retval = stat( path, &sbuf );
if (retval && errno != ENOENT) {
log_messages.printf(SCHED_MSG_LOG::DEBUG, "handle_get_file_size(): [%s] returning error\n", file_name);

View File

@ -177,7 +177,8 @@ void make_work() {
p = strtok(buf, "\n");
strcpy(file_name, "");
// make new copies of the WU's input files
// make new hard links to the WU's input files
// (don't actually copy files)
//
while (p) {
if (parse_str(p, "<name>", file_name, sizeof(file_name))) {
@ -185,11 +186,11 @@ void make_work() {
new_file_name, "%s__%d_%d", file_name, start_time, seqno++
);
dir_hier_path(
file_name, config.download_dir, config.uldl_dir_fanout,
file_name, config.download_dir, config.uldl_dir_fanout, true,
pathname
);
dir_hier_path(
new_file_name, config.download_dir, config.uldl_dir_fanout,
new_file_name, config.download_dir, config.uldl_dir_fanout, true,
new_pathname, true
);
sprintf(command,"ln %s %s", pathname, new_pathname);

View File

@ -46,7 +46,10 @@ int get_output_file_path(RESULT const& result, string& path_str) {
flag = parse_str(result.xml_doc_out, "<name>", buf, sizeof(buf));
if (!flag) return ERR_XML_PARSE;
dir_hier_path(buf, config.upload_dir, config.uldl_dir_fanout, path);
dir_hier_path(buf, config.upload_dir, config.uldl_dir_fanout, true, path);
if (!boinc_file_exists(path)) {
dir_hier_path(buf, config.upload_dir, config.uldl_dir_fanout, false, path);
}
path_str = path;
return 0;
}

View File

@ -43,7 +43,7 @@ int get_file_path(WORKUNIT& wu, char* path) {
bool flag;
flag = parse_str(wu.xml_doc, "<name>", buf, sizeof(buf));
if (!flag) return ERR_XML_PARSE;
dir_hier_path(buf, config.download_dir, config.uldl_dir_fanout, path);
dir_hier_path(buf, config.download_dir, config.uldl_dir_fanout, true, path);
return 0;
}

View File

@ -183,7 +183,8 @@ static int process_wu_template(
}
dir_hier_path(
infiles[file_number], config.download_dir,
config.uldl_dir_fanout, path, true
config.uldl_dir_fanout, true,
path, true
);
if (!boinc_file_exists(path)) {
sprintf(top_download_path,
@ -209,7 +210,8 @@ static int process_wu_template(
dir_hier_url(
infiles[file_number], config.download_url,
config.uldl_dir_fanout, url
config.uldl_dir_fanout, true,
url
);
sprintf(buf,
" <name>%s</name>\n"
@ -393,18 +395,15 @@ int create_result(
int check_files(char** infiles, int ninfiles, SCHED_CONFIG& config) {
int i;
char path[256];
FILE* f;
for (i=0; i<ninfiles; i++) {
dir_hier_path(
infiles[i], config.download_dir, config.uldl_dir_fanout, path
infiles[i], config.download_dir, config.uldl_dir_fanout, true, path
);
f = fopen(path, "r");
if (f) {
fclose(f);
} else {
return 1;
}
if (!boinc_file_exists(path)) {
return 1;
}
}
return 0;
}

View File

@ -26,7 +26,7 @@ int main(int argc, char** argv) {
DirScanner scanner(src_dir);
while (scanner.scan(filename)) {
retval = dir_hier_path(filename.c_str(), dst_dir, fanout, dst_path, true);
retval = dir_hier_path(filename.c_str(), dst_dir, fanout, true, dst_path, true);
if (retval) {
fprintf(stderr, "dir_hier_path: %d\n", retval);
exit(1);

View File

@ -14,7 +14,7 @@ int main(int argc, char** argv) {
exit(1);
}
dir_hier_path(argv[1], "", config.uldl_dir_fanout, path);
dir_hier_path(argv[1], "", config.uldl_dir_fanout, true, path);
printf("path: %s%s\n", config.download_dir, path);
}