*** empty log message ***

svn path=/trunk/boinc/; revision=4908
This commit is contained in:
David Anderson 2004-12-20 20:47:25 +00:00
parent 8f80a3f73d
commit 70472e1617
16 changed files with 350 additions and 81 deletions

View File

@ -21621,7 +21621,7 @@ David 19 Dec 2004
ViewResources.cpp
ViewTransfers.cpp
David
David 20 Dec 2004
- User web:
don't call db_init() in profile.inc.
This should be done by top-level files, not .inc files
@ -21646,3 +21646,28 @@ David
edit_forum_preferences_action.php
profile_menu.php
view_profile.php
David 20 Dec 2004
- have assimilate_handler return an error code.
If nonzero, assimilator exits
(don't want to mark WUs as assimilated if temporary NFS failure, e.g.)
- improve docs on file management, added doc on file_deleter
- added doc on backend utility funcs
- improve docs on assimilate_handler()
- core client: if an output file is too big, print info on size, limit
client/
cs_apps.C
doc/
various
backend_util.php (new)
benchmark.php (new)
file_deleter.php (new)
sched/
assimilate_handler.h
assimilator.C
assimilator_placeholder.C
file_deleter.C
sample_dummy_assimilator.C
sched_util.C
validate_util.C

View File

@ -101,6 +101,11 @@ int CLIENT_STATE::app_finished(ACTIVE_TASK& at) {
"Output file %s for result %s exceeds size limit.",
fip->name, rp->name
);
msg_printf(
rp->project, MSG_INFO,
"File size: %f bytes. Limit: %f bytes",
size, fip->max_nbytes
);
fip->delete_file();
fip->status = ERR_FILE_TOO_BIG;

View File

@ -3,13 +3,43 @@ require_once("docutil.php");
page_head("Result assimilation");
echo "
Projects must create one assimilator program per application.
This is best done by linking the program <b>sched/assimilate.C</b>
This is done by linking the program <b>sched/assimilate.C</b>
with an application-specific function of the form
<pre>
int assimilate_handler(
WORKUNIT& wu, vector&lt;RESULT>& results, RESULT& canononical_result
);
</pre>
This is called when either
<ul>
<li> The workunit has a nonzero
<a href=work.php>error mask</a>
(indicating, for example, too many error results).
In this case the handler might write a message to a log
or send an email to the application developer.
<li> The workunit has a canonical result.
In this case wu.canonical_resultid will be nonzero,
canonical_result will contain the canonical result.
Your handler might, for example, parse the canonical result's
output file and write its contents to a separate database.
</ul>
In both cases the 'results' vector will be populated with
all the workunit's results (including unsuccessful and unsent ones).
All files (both input and output) will generally be on disk.
<p>
It's possible that both conditions might hold.
<p>
If assimilate_handler() returns zero,
the workunit record will be marked as assimilated.
If assimilate_handler() returns nonzero,
the assimilator will print an error message and exit.
Typically you should do this in any error situation.
<p>
You can use BOINC's
<a href=backend_util.php>back-end utility functions</a>
to get file pathnames and open files.
";
page_tail();
?>

79
doc/backend_util.php Normal file
View File

@ -0,0 +1,79 @@
<?php
require_once("docutil.php");
page_head("Back-end utility functions");
echo "
The following functions can be used in your validator and assimilator code:
<hr>
<pre>
int get_output_file_path(RESULT const&, std::string&);
</pre>
Returns the path of a result's output file
(parses result.xml_doc_out and computes the file's position in the
<a href=hier_dir.php>hierarchical directory structure</a>).
<p>
Note: this function doesn't handle multiple output files
(if there are multiple files, it returns the path of the first one).
If your application has multiple output files, see below.
<hr>
<pre>
int try_fopen(char* path, FILE*& f, char* mode);
</pre>
Open a file, distinguishing between recoverable and nonrecoverable errors.
Returns zero on success.
Returns ERR_FOPEN if the directory is present but not the file
(this is considered a nonrecoverable error).
Returns ERR_OPENDIR if the directory is not there
(this is generally a recoverable error, like NFS mount failure).
<hr>
<pre>
double median_mean_credit(vector<RESULT> const& results);
</pre>
Given a vector of N correct results, computes a canonical credit as follows:
<ul>
<li> if N==1, return that result's claimed credit
<li> if N==2, return min of claimed credits
<li> if N>2, toss out high and low claimed credit,
and return the average of the rest.
</ul>
<hr>
<h3>Multiple output files</h3>
If your application has multiple output files
you'll need to generalize get_output_file_path().
To do this you'll need to know the following:
</b>
<p>
The database field 'result.xml_doc_out'
describes a result's output files.
It has the form
<pre>
",htmlspecialchars("
<file_info>...</file_info>
[ ... ]
<result>
<name>foobar</name>
<wu_name>blah</wu_name>
<exit_status>blah</exit_status>
<file_ref>...</file_ref>
[ ... ]
</result>
"),"
</pre>
The components are:
<ul>
<li> The <b>&lt;name></b> element is the result name.
<li> The <b>&lt;wu_name></b> element is the workunit name.
<li> Each <b>&lt;file_ref></b> element is an association to an output file,
described by a corresponding <b>&lt;file_info></b> element.
</ul>
<p>
The XML document describing the sizes and checksums of the output
files is a list of <b>&lt;file_info></b> elements,
with the <b>nbytes</b> and <b>md5_cksum</b> fields present.
The project back end
must parse this field to find the locations and checksums of output files.
";
page_tail();
?>

80
doc/benchmark.php Normal file
View File

@ -0,0 +1,80 @@
<?php
require_once("docutil.php");
page_head("More about benchmarks");
echo "
<h2>How benchmarks are calculated</h2>
<p>
'Whetstone' is the name of the benchmark that is reported on your
[Show computers] web page as 'Measured floating point speed'.
Dhrystone is the name of the benchmark used for 'Measured integer speed'.
Floats can have fractional parts (like 1.48283 or 3.141592);
integers are whole numbers like 1, 2, 938283 or 2004.
Whetstone does 8 different groups of tests (repeatedly of course),
times how long they took to finish, and produces a number,
[ops performed]/[time].
These tests all use floating point math operations of the CPUs being tested.
Some of them are simple math (addition, multiplication, division)
while others compute trigonometric and exponential functions
(sine, cosine, tangent, exponent).
Dhrystone checks repeated integer operations
and several operating system file handling operations.
<p>
Neither of the tests really checks how well/fast a system can access memory,
and SETI@home (for example) accesses memory a lot.
<p>
Here is an example of memory introducing a delay:
A Pentium 4 CPU of any speed can calculate the sine of an angle in
approximately 170 ticks of its internal clock.
It could have performed 170 regular integer additions in this time.
<p>
But if it wanted to do an integer addition on a number somewhere out in memory
(say it was working on a table of numbers), the
CPU might have to wait as much as 260 ticks
for this memory integer to be delivered to the CPU.
So a badly timed integer+memory
operation would take far longer than a sine calculation.
<p>
This is where Celeron CPUs can really slow down.
Pentium has many features to predict when the CPU might be getting memory,
and begins getting it long before the CPU actually calculates with it.
Thus there is much less delay for most memory operations.
<h2>Why 'predicted time' can be wrong</h2>
Each WU delivered to your machine includes an estimated number of
floating point (FP) calculations.
BOINC divides this by the FP
benchmark number to estimate completion time.
SETI@home's WUs estimate number is currently always 27.9 trillion (american),
however the actual number of FP ops varies greatly which is why
WUs take different amounts of time to finish.
<p>
SETI@home uses almost all single-precision floating point math,
while Whetstone is all double-precision math.
On Intel x86 processors
the speed difference in calculating single vs. double isn't very large.
<p>
SETI@home uses mostly add, sub, multiply and divide.
About 20% of its time is spent in trigonometry.
Almost all the time in Whetstone is used for trigonometry.
<p>
Memory access speed and trigonometry are the two major reasons that
the benchmark results and SETI@home processing speed don't match
up on many systems.
<br><br>
<i>Thanks to Ben Herndon for this writeup</i>
";
page_tail();
?>

View File

@ -101,6 +101,7 @@ How to generate tasks and handle the results.
<li> <a href=tools_work.php>Generating work</a>
<li> <a href=validate.php>Result validation</a>
<li> <a href=assimilate.php>Result assimilation</a>
<li> <a href=file_deleter.php>Server-side file deletion</a>
</ul>
<font size=+1><b>
@ -128,7 +129,7 @@ Using disk space on participant hosts
<li> <a href=get_file_list.php>Uploading file lists</a>
<li> <a href=get_file.php>Uploading files</a>
<li> <a href=send_file.php>Downloading files</a>
<li> <a href=delete_file.php>Deleting files</a>
<li> <a href=delete_file.php>Deleting files on client hosts</a>
</ul>
<font size=+1><b>

42
doc/file_deleter.php Normal file
View File

@ -0,0 +1,42 @@
<?php
require_once("docutil.php");
page_head("Server-side file deletion");
echo "
Files are deleted from the data server's upload and download directories
by the <b>file_deleter</b> daemon.
Typically you don't need to customize this.
The default file deletion policy is:
<ul>
<li> A workunit's input files are deleted when
all results are 'over' (reported or timed out)
and the workunit is assimilated.
<li> A result's output files are deleted
after the workunit is assimilated.
The canonical result is handled differently,
since its output files may be needed to validate
results that are reported after assimilation;
hence its files are deleted only all results are over,
and all successful results have been validated.
</ul>
<p>
In some cases you may not want files to be deleted.
There are two ways to accomplish this:
<ul>
<li> Run the file_deleter daemon with
the -preserve_wu_files command-line option
(to preserve all input files)
and/or the -preserve_result_files command-line option
(to preserve all output files).
<li> Include &lt;no_delete/>
in the <a href=files.php>&lt;file_info></a> element for a file in a
<a href=tools_work.php>workunit or result template</a>.
This lets you suppress deletion on a file-by-file basis.
</ul>
In either case you may need to implement your
own scheme for deleting files,
to avoid overflowing data server storage.
";
page_tail();
?>

View File

@ -5,11 +5,14 @@ echo "
<h3>Files and data servers</h3>
<p>
The BOINC storage model is based on <b>files</b>.
The inputs and outputs of applications,
and the application executables, are files.
Examples of files:
<ul>
<li> The inputs and outputs of applications;
<li> Application executables, libraries, etc.
</ul>
<p>
The BOINC core client transfers files to and from <b>data servers</b>
operated by the project, using HTTP.
The BOINC core client transfers files to and from project-operated
<b>data servers</b> using HTTP.
<p>
A file is described by an XML element of the form
".html_text("
@ -76,16 +79,16 @@ list_item("signature_required",
);
list_item("no_delete",
"If present for an input (workunit) file,
indicates that the file should NOT be removed from the download/
directory when the workunit is completed. You should use this
if a particular input file or files are used by more than one
workunit, or will be used by future, unqueued workunits."
indicates that the file should NOT be removed from the data server's
download directory when the workunit is completed.
Use this if a particular input file or files are used by more than one
workunit, or will be used by future workunits."
);
list_item("no_delete",
"If present for an output (result) file,
indicates that the file should NOT be removed from the upload/
indicates that the file should NOT be removed from the data server's upload
directory when the corresponding workunit is completed.
Use with caution - this may cause your upload/ directory to overflow."
Use with caution - this may cause your upload directory to overflow."
);
list_item("report_on_rpc",
"Include a description of this file in scheduler RPC requests,
@ -94,12 +97,11 @@ list_item("report_on_rpc",
);
list_end();
echo "
These attributes allow the specification of various types of files: for
example, input or output files that are retained for use as input to
later computations.
<p>
Once a file is created (on a data server or a participant host) it
is immutable.
is <b>immutable</b>.
This means that all replicas of that file are assumed to be identical.
<h3>File references</h3>
<p>
Files may be associated with <a href=work.php>workunits</a>,
@ -123,5 +125,29 @@ list_item("main_program", "Used for files
associated with application versions.
It indicates that this file is the application's main program.");
list_end();
echo "
<h3>File management</h3>
<p>
BOINC's default behavior is to delete files around
when they aren't needed any more.
Specifically:
<ul>
<li> On the client, input files are deleted when no workunit refers to them,
and output files are deleted when no result refers to them.
Application-version files are deleted when they are referenced
only from superceded application versions.
<li> On the client, the 'sticky' flag overrides the above mechanisms
and suppresses the deletion of the file.
The file may deleted by an explicit
<a href=delete_file.php>server request</a>.
The file may also be deleted at any time by the core client
in order to honor limits on disk-space usage.
<li> On the server, the <a href=file_deleter.php>file deleter daemon</a>
deletes input and output files that are no longer needed.
This can be suppressed using the 'no_delete' flag,
or using command-line options to the file deleter.
</ul>
";
page_tail();
?>

View File

@ -20,6 +20,7 @@ int check_set(vector<RESULT> results, DB_WORKUNIT& wu, int& canonicalid, double&
"</pre>
<ul>
<li><b>check_set()</b> takes a set of results (all with outcome=SUCCESS).
It reads and compares their output files.
If there is a quorum of matching results,
it selects one of them as the canonical result, returning its ID.
In this case it also returns the credit to
@ -31,8 +32,11 @@ If, when an output file for a result has a nonrecoverable error
or the file is present but has invalid contents),
then it must set the result's outcome (in memory, not database)
to VALIDATE_ERROR.
Note: the function try_fopen() (in lib/util.C) can be used
to distinguish recoverable and nonrecoverable file-open errors.
<p>
Note: use BOINC's
<a href=backend_util.php>back-end utility functions</a>
to get file pathnames
and to distinguish recoverable and nonrecoverable file-open errors.
<li>
If a canonical result is found, check_set() must set the
validate_state field of each non-ERROR result
@ -75,7 +79,7 @@ This tells the validator to write an error message and exit.
</ul>
<p>
Neither function should delete files.
Neither function should delete files or access the BOINC database.
<p>
A more detailed description is <a href=validate_logic.txt>here</a>.
<p>
@ -90,46 +94,7 @@ and regards results as equivalent only if they agree byte for byte.
regards any two results as equivalent if their CPU time
exceeds a given minimum.
</ul>
<p>
<b>validate_util.C</b> contains support functions for both of the above.
<hr>
<b>NOTE: the above code assumes that each result
has a single output file.
Revisions will be needed to handle multiple output files.
To do this you will need to know the following:
</b>
<p>
The database field 'result.xml_doc_out'
describes a result's output files.
It has the form
<pre>
",htmlspecialchars("
<file_info>...</file_info>
[ ... ]
<result>
<name>foobar</name>
<wu_name>blah</wu_name>
<exit_status>blah</exit_status>
<file_ref>...</file_ref>
[ ... ]
</result>
"),"
</pre>
The components are:
<ul>
<li> The <b>&lt;name></b> element is the result name.
<li> The <b>&lt;wu_name></b> element is the workunit name.
<li> Each <b>&lt;file_ref></b> element is an association to an output file,
described by a corresponding <b>&lt;file_info></b> element.
</ul>
<p>
The XML document describing the sizes and checksums of the output
files is a list of <b>&lt;file_info></b> elements,
with the <b>nbytes</b> and <b>md5_cksum</b> fields present.
The project back end
must parse this field to find the locations and checksums of output files.
";
page_tail();
?>

View File

@ -1,4 +1,4 @@
#include <vector>
#include "boinc_db.h"
extern void assimilate_handler(WORKUNIT&, std::vector<RESULT>&, RESULT&);
extern int assimilate_handler(WORKUNIT&, std::vector<RESULT>&, RESULT&);

View File

@ -84,7 +84,12 @@ bool do_pass(APP& app) {
}
}
assimilate_handler(wu, results, canonical_result);
retval = assimilate_handler(wu, results, canonical_result);
if (retval) {
log_messages.printf(SCHED_MSG_LOG::CRITICAL,
"[%s] handler returned error %d; exiting\n", wu.name, retval
);
}
if (update_db) {
sprintf(

View File

@ -35,7 +35,7 @@
using std::vector;
void assimilate_handler(
int assimilate_handler(
WORKUNIT& wu, vector<RESULT>& results, RESULT& canonical_result
) {
SCOPE_MSG_LOG scope_messages(log_messages, SCHED_MSG_LOG::NORMAL);
@ -61,6 +61,7 @@ void assimilate_handler(
if (wu.error_mask&WU_ERROR_TOO_MANY_SUCCESS_RESULTS) {
log_messages.printf(SCHED_MSG_LOG::CRITICAL, "[%s] Error: too many success results\n", wu.name);
}
return 0;
}
#ifdef __GNUC__

View File

@ -63,7 +63,7 @@ int wu_delete_files(WORKUNIT& wu) {
retval = dir_hier_path(
filename, config.download_dir, config.uldl_dir_fanout,
pathname
);
);
if (retval) {
log_messages.printf(SCHED_MSG_LOG::CRITICAL, "[%s] dir_hier_path: %d\n", wu.name, retval);
} else {
@ -147,8 +147,9 @@ bool do_pass() {
while (!wu.enumerate(buf)) {
did_something = true;
if (!preserve_wu_files)
if (!preserve_wu_files) {
wu_delete_files(wu);
}
wu.file_delete_state = FILE_DELETE_DONE;
sprintf(buf, "file_delete_state=%d", wu.file_delete_state);
retval= wu.update_field(buf);
@ -163,8 +164,9 @@ bool do_pass() {
sprintf(buf, "where file_delete_state=%d limit 1000", FILE_DELETE_READY);
while (!result.enumerate(buf)) {
did_something = true;
if (!preserve_result_files)
if (!preserve_result_files) {
result_delete_files(result);
}
result.file_delete_state = FILE_DELETE_DONE;
sprintf(buf, "file_delete_state=%d", result.file_delete_state);
retval= result.update_field(buf);
@ -190,18 +192,22 @@ int main(int argc, char** argv) {
} else if (!strcmp(argv[i], "-one_pass")) {
one_pass = true;
} else if (!strcmp(argv[i], "-preserve_wu_files")) {
// This option is primarily for testing. If enabled, the
// file_deleter will function 'normally' and will update
// the database, but will not actually delete the workunit
// input files. It's equivalent to setting <no_delete/>
// [undocumented] for all workunit input files.
// This option is primarily for testing.
// If enabled, the file_deleter will function 'normally'
// and will update the database,
// but will not actually delete the workunit input files.
// It's equivalent to setting <no_delete/>
// for all workunit input files.
//
preserve_wu_files = true;
} else if (!strcmp(argv[i], "-preserve_result_files")) {
// This option is primarily for testing. If enabled, the
// file_deleter will function 'normally' and will update
// the database but will not actually delete the result
// output files. It's equivalent to setting <no_delete/>
// [undocumented] for all result output files.
// This option is primarily for testing.
// If enabled, the file_deleter will function 'normally'
// and will update the database,
// but will not actually delete the result output files.
// It's equivalent to setting <no_delete/>
// for all result output files.
//
preserve_result_files = true;
} else if (!strcmp(argv[i], "-d")) {
log_messages.set_debug_level(atoi(argv[++i]));

View File

@ -28,7 +28,7 @@
using std::vector;
void assimilate_handler(
int assimilate_handler(
WORKUNIT& wu, vector<RESULT>& results, RESULT& canonical_result
) {
SCOPE_MSG_LOG scope_messages(log_messages, SCHED_MSG_LOG::NORMAL);
@ -54,6 +54,7 @@ void assimilate_handler(
if (wu.error_mask&WU_ERROR_TOO_MANY_SUCCESS_RESULTS) {
log_messages.printf(SCHED_MSG_LOG::CRITICAL, "[%s] Error: too many success results\n", wu.name);
}
return 0;
}
#ifdef __GNUC__

View File

@ -93,7 +93,7 @@ int try_fopen(char* path, FILE*& f, char* mode) {
f = fopen(path, mode);
if (!f) {
memset(dirpath, '\0', sizeof(dirpath));
memset(dirpath, '\0', sizeof(dirpath));
p = strrchr(path, '/');
if (p) {
strncpy(dirpath, path, (int)(p-path));

View File

@ -142,7 +142,8 @@ int generic_check_set(
log_messages.printf(
SCHED_MSG_LOG::CRITICAL,
"generic_check_set: init_result([RESULT#%d %s]) failed\n",
results[i].id, results[i].name);
results[i].id, results[i].name
);
goto cleanup;
}
}
@ -161,7 +162,8 @@ int generic_check_set(
log_messages.printf(
SCHED_MSG_LOG::CRITICAL,
"generic_check_set: check_pair_with_data([RESULT#%d %s], [RESULT#%d %s]) failed\n",
results[i].id, results[i].name, results[j].id, results[j].name);
results[i].id, results[i].name, results[j].id, results[j].name
);
} else if (match) {
++neq;
matches[j] = true;
@ -197,7 +199,8 @@ int generic_check_set_majority(
return generic_check_set(
results, canonicalid, credit,
init_result_f, check_pair_with_data_f, cleanup_result_f,
results.size() / 2);
results.size() / 2
);
}
int generic_check_pair(
@ -227,7 +230,7 @@ int generic_check_pair(
SCHED_MSG_LOG::CRITICAL,
"[RESULT#%d %s] [RESULT#%d %s] Couldn't initialize result 2\n",
r1.id, r1.name, r2.id, r2.name
);
);
cleanup_result_f(r1, data1);
return retval;
}