*** empty log message ***

svn path=/trunk/boinc/; revision=4908
2004-12-20 20:47:25 +00:00 · 2004-12-20 20:47:25 +00:00 · 70472e1617
parent 8f80a3f73d
commit 70472e1617
16 changed files with 350 additions and 81 deletions
--- a/27
+++ b/27
@ -21621,7 +21621,7 @@ David  19 Dec 2004
        ViewResources.cpp
        ViewTransfers.cpp

-David
+David  20 Dec 2004
    - User web:
        don't call db_init() in profile.inc.
            This should be done by top-level files, not .inc files
@ -21646,3 +21646,28 @@ David
            edit_forum_preferences_action.php
            profile_menu.php
            view_profile.php
+
+David  20 Dec 2004
+    - have assimilate_handler return an error code.
+        If nonzero,  assimilator exits
+        (don't want to mark WUs as assimilated if temporary NFS failure, e.g.)
+    - improve docs on file management, added doc on file_deleter
+    - added doc on backend utility funcs
+    - improve docs on assimilate_handler()
+    - core client: if an output file is too big, print info on size, limit
+
+    client/
+        cs_apps.C
+    doc/
+        various
+        backend_util.php (new)
+        benchmark.php (new)
+        file_deleter.php (new)
+    sched/
+        assimilate_handler.h
+        assimilator.C
+        assimilator_placeholder.C
+        file_deleter.C
+        sample_dummy_assimilator.C
+        sched_util.C
+        validate_util.C
--- a/client/cs_apps.C
+++ b/client/cs_apps.C
@ -101,6 +101,11 @@ int CLIENT_STATE::app_finished(ACTIVE_TASK& at) {
                "Output file %s for result %s exceeds size limit.",
                fip->name, rp->name
            );
+            msg_printf(
+                rp->project, MSG_INFO,
+                "File size: %f bytes.  Limit: %f bytes",
+                size, fip->max_nbytes
+            );

            fip->delete_file();
            fip->status = ERR_FILE_TOO_BIG;
--- a/doc/assimilate.php
+++ b/doc/assimilate.php
@ -3,13 +3,43 @@ require_once("docutil.php");
 page_head("Result assimilation");
 echo "
 Projects must create one assimilator program per application.
-This is best done by linking the program <b>sched/assimilate.C</b>
+This is done by linking the program <b>sched/assimilate.C</b>
 with an application-specific function of the form
 <pre>
 int assimilate_handler(
    WORKUNIT& wu, vector&lt;RESULT>& results, RESULT& canononical_result
 );
 </pre>
+
+This is called when either
+<ul>
+<li> The workunit has a nonzero
+<a href=work.php>error mask</a>
+(indicating, for example, too many error results).
+In this case the handler might write a message to a log
+or send an email to the application developer.
+<li> The workunit has a canonical result.
+In this case wu.canonical_resultid will be nonzero,
+canonical_result will contain the canonical result.
+Your handler might, for example, parse the canonical result's
+output file and write its contents to a separate database.
+</ul>
+In both cases the 'results' vector will be populated with
+all the workunit's results (including unsuccessful and unsent ones).
+All files (both input and output) will generally be on disk.
+<p>
+It's possible that both conditions might hold.
+<p>
+If assimilate_handler() returns zero,
+the workunit record will be marked as assimilated.
+If assimilate_handler() returns nonzero,
+the assimilator will print an error message and exit.
+Typically you should do this in any error situation.
+<p>
+You can use BOINC's
+<a href=backend_util.php>back-end utility functions</a>
+to get file pathnames and open files.
+
 ";
 page_tail();
 ?>
--- a/doc/backend_util.php
+++ b/doc/backend_util.php
@ -0,0 +1,79 @@
+<?php
+require_once("docutil.php");
+page_head("Back-end utility functions");
+echo "
+The following functions can be used in your validator and assimilator code:
+<hr>
+
+<pre>
+int get_output_file_path(RESULT const&, std::string&);
+</pre>
+Returns the path of a result's output file
+(parses result.xml_doc_out and computes the file's position in the
+ <a href=hier_dir.php>hierarchical directory structure</a>).
+
+<p>
+Note: this function doesn't handle multiple output files
+(if there are multiple files, it returns the path of the first one).
+If your application has multiple output files, see below.
+<hr>
+<pre>
+int try_fopen(char* path, FILE*& f, char* mode);
+</pre>
+Open a file, distinguishing between recoverable and nonrecoverable errors.
+Returns zero on success.
+Returns ERR_FOPEN if the directory is present but not the file
+(this is considered a nonrecoverable error).
+Returns ERR_OPENDIR if the directory is not there
+(this is generally a recoverable error, like NFS mount failure).
+<hr>
+<pre>
+double median_mean_credit(vector<RESULT> const& results);
+</pre>
+Given a vector of N correct results, computes a canonical credit as follows:
+<ul>
+<li> if N==1, return that result's claimed credit
+<li> if N==2, return min of claimed credits
+<li> if N>2, toss out high and low claimed credit,
+and return the average of the rest.
+</ul>
+<hr>
+<h3>Multiple output files</h3>
+If your application has multiple output files
+you'll need to generalize get_output_file_path().
+To do this you'll need to know the following:
+</b>
+
+<p>
+The database field 'result.xml_doc_out'
+describes a result's output files.
+It has the form
+<pre>
+",htmlspecialchars("
+<file_info>...</file_info>
+[ ... ]
+<result>
+    <name>foobar</name>
+    <wu_name>blah</wu_name>
+    <exit_status>blah</exit_status>
+    <file_ref>...</file_ref>
+    [ ... ]
+</result>
+"),"
+</pre>
+The components are:
+<ul>
+<li> The <b>&lt;name></b> element is the result name.
+<li> The <b>&lt;wu_name></b> element is the workunit name.
+<li> Each <b>&lt;file_ref></b> element is an association to an output file,
+described by a corresponding <b>&lt;file_info></b> element.
+</ul>
+<p>
+The XML document describing the sizes and checksums of the output
+files is a list of <b>&lt;file_info></b> elements,
+with the <b>nbytes</b> and <b>md5_cksum</b> fields present.
+The project back end
+must parse this field to find the locations and checksums of output files.
+";
+page_tail();
+?>
--- a/doc/benchmark.php
+++ b/doc/benchmark.php
@ -0,0 +1,80 @@
+<?php
+require_once("docutil.php");
+page_head("More about benchmarks");
+echo "
+<h2>How benchmarks are calculated</h2>
+
+<p>
+'Whetstone' is the name of the benchmark that is reported on your
+[Show computers] web page as 'Measured floating point speed'.
+Dhrystone is the name of the benchmark used for 'Measured integer speed'.
+Floats can have fractional parts (like 1.48283 or 3.141592);
+integers are whole numbers like 1, 2, 938283 or 2004.
+
+Whetstone does 8 different groups of tests (repeatedly of course),
+times how long they took to finish, and produces a number,
+[ops performed]/[time].
+These tests all use floating point math operations of the CPUs being tested.
+Some of them are simple math (addition, multiplication, division)
+while others compute trigonometric and exponential functions
+(sine, cosine, tangent, exponent).
+
+
+Dhrystone checks repeated integer operations
+and several operating system file handling operations.
+
+<p>
+Neither of the tests really checks how well/fast a system can access memory,
+and SETI@home (for example) accesses memory a lot.
+
+<p>
+Here is an example of memory introducing a delay:
+A Pentium 4 CPU of any speed can calculate the sine of an angle in
+approximately 170 ticks of its internal clock.
+It could have performed 170 regular integer additions in this time.
+
+<p>
+But if it wanted to do an integer addition on a number somewhere out in memory
+(say it was working on a table of numbers), the
+CPU might have to wait as much as 260 ticks
+for this memory integer to be delivered to the CPU.
+So a badly timed integer+memory
+operation would take far longer than a sine calculation.
+
+<p>
+This is where Celeron CPUs can really slow down.
+Pentium has many features to predict when the CPU might be getting memory,
+and begins getting it long before the CPU actually calculates with it.
+Thus there is much less delay for most memory operations.
+
+<h2>Why 'predicted time' can be wrong</h2>
+Each WU delivered to your machine includes an estimated number of
+floating point (FP) calculations.
+BOINC divides this by the FP
+benchmark number to estimate completion time.
+SETI@home's WUs estimate number is currently always 27.9 trillion (american),
+however the actual number of FP ops varies greatly which is why
+WUs take different amounts of time to finish.
+
+
+<p>
+SETI@home uses almost all single-precision floating point math,
+while Whetstone is all double-precision math.
+On Intel x86 processors
+the speed difference in calculating single vs. double isn't very large.
+
+<p>
+SETI@home uses mostly add, sub, multiply and divide.
+About 20% of its time is spent in trigonometry.
+Almost all the time in Whetstone is used for trigonometry.
+
+<p>
+Memory access speed and trigonometry are the two major reasons that
+the benchmark results and SETI@home processing speed don't match
+up on many systems.
+
+<br><br>
+<i>Thanks to Ben Herndon for this writeup</i>
+";
+page_tail();
+?>
--- a/doc/create_project.php
+++ b/doc/create_project.php
@ -101,6 +101,7 @@ How to generate tasks and handle the results.
 <li> <a href=tools_work.php>Generating work</a>
 <li> <a href=validate.php>Result validation</a>
 <li> <a href=assimilate.php>Result assimilation</a>
+<li> <a href=file_deleter.php>Server-side file deletion</a>
 </ul>

 <font size=+1><b>
@ -128,7 +129,7 @@ Using disk space on participant hosts
 <li> <a href=get_file_list.php>Uploading file lists</a>
 <li> <a href=get_file.php>Uploading files</a>
 <li> <a href=send_file.php>Downloading files</a>
-<li> <a href=delete_file.php>Deleting files</a>
+<li> <a href=delete_file.php>Deleting files on client hosts</a>
 </ul>

 <font size=+1><b>
--- a/doc/file_deleter.php
+++ b/doc/file_deleter.php
@ -0,0 +1,42 @@
+<?php
+require_once("docutil.php");
+page_head("Server-side file deletion");
+echo "
+Files are deleted from the data server's upload and download directories
+by the <b>file_deleter</b> daemon.
+Typically you don't need to customize this.
+The default file deletion policy is:
+<ul>
+<li> A workunit's input files are deleted when
+all results are 'over' (reported or timed out)
+and the workunit is assimilated.
+<li> A result's output files are deleted
+after the workunit is assimilated.
+The canonical result is handled differently,
+since its output files may be needed to validate
+results that are reported after assimilation;
+hence its files are deleted only all results are over,
+and all successful results have been validated.
+</ul>
+
+<p>
+In some cases you may not want files to be deleted.
+There are two ways to accomplish this:
+<ul>
+<li> Run the file_deleter daemon with
+the -preserve_wu_files command-line option
+(to preserve all input files)
+and/or the -preserve_result_files command-line option
+(to preserve all output files).
+<li> Include &lt;no_delete/>
+in the <a href=files.php>&lt;file_info></a> element for a file in a
+<a href=tools_work.php>workunit or result template</a>.
+This lets you suppress deletion on a file-by-file basis.
+
+</ul>
+In either case you may need to implement your
+own scheme for deleting files,
+to avoid overflowing data server storage.
+";
+page_tail();
+?>
--- a/doc/files.php
+++ b/doc/files.php
@ -5,11 +5,14 @@ echo "
 <h3>Files and data servers</h3> 
 <p>
 The BOINC storage model is based on <b>files</b>.
-The inputs and outputs of applications,
-and the application executables, are files.
+Examples of files:
+<ul>
+<li> The inputs and outputs of applications;
+<li> Application executables, libraries, etc.
+</ul>
 <p>
-The BOINC core client transfers files to and from <b>data servers</b>
-operated by the project, using HTTP.
+The BOINC core client transfers files to and from project-operated
+<b>data servers</b> using HTTP.
 <p>
 A file is described by an XML element of the form 
 ".html_text("
@ -76,16 +79,16 @@ list_item("signature_required",
 );
 list_item("no_delete",
    "If present for an input (workunit) file,
-    indicates that the file should NOT be removed from the download/
-    directory when the workunit is completed.  You should use this
-    if a particular input file or files are used by more than one
-    workunit, or will be used by future, unqueued workunits."
+    indicates that the file should NOT be removed from the data server's
+    download directory when the workunit is completed.
+    Use this if a particular input file or files are used by more than one
+    workunit, or will be used by future workunits."
 );
 list_item("no_delete",
    "If present for an output (result) file,
-    indicates that the file should NOT be removed from the upload/
+    indicates that the file should NOT be removed from the data server's upload
    directory when the corresponding workunit is completed.
-    Use with caution - this may cause your upload/ directory to overflow."
+    Use with caution - this may cause your upload directory to overflow."
 );
 list_item("report_on_rpc",
    "Include a description of this file in scheduler RPC requests,
@ -94,12 +97,11 @@ list_item("report_on_rpc",
 );
 list_end();
 echo "
-These attributes allow the specification of various types of files: for
-example, input or output files that are retained for use as input to
-later computations.
 <p>
 Once a file is created (on a data server or a participant host) it
-is immutable.
+is <b>immutable</b>.
+This means that all replicas of that file are assumed to be identical.
+
 <h3>File references</h3> 
 <p>
 Files may be associated with <a href=work.php>workunits</a>,
@ -123,5 +125,29 @@ list_item("main_program", "Used for files
 associated with application versions.
 It indicates that this file is the application's main program.");
 list_end();
+
+echo "
+<h3>File management</h3>
+<p>
+BOINC's default behavior is to delete files around
+when they aren't needed any more.
+Specifically:
+<ul>
+<li> On the client, input files are deleted when no workunit refers to them,
+and output files are deleted when no result refers to them.
+Application-version files are deleted when they are referenced
+only from superceded application versions.
+<li> On the client, the 'sticky' flag overrides the above mechanisms
+and suppresses the deletion of the file.
+The file may deleted by an explicit
+<a href=delete_file.php>server request</a>.
+The file may also be deleted at any time by the core client
+in order to honor limits on disk-space usage.
+<li> On the server, the <a href=file_deleter.php>file deleter daemon</a>
+deletes input and output files that are no longer needed.
+This can be suppressed using the 'no_delete' flag,
+or using command-line options to the file deleter.
+</ul>
+";
 page_tail();
 ?>
--- a/doc/validate.php
+++ b/doc/validate.php
@ -20,6 +20,7 @@ int check_set(vector<RESULT> results, DB_WORKUNIT& wu, int& canonicalid, double&
 "</pre>
 <ul>
 <li><b>check_set()</b> takes a set of results (all with outcome=SUCCESS).
+It reads and compares their output files.
 If there is a quorum of matching results,
 it selects one of them as the canonical result, returning its ID.
 In this case it also returns the credit to
@ -31,8 +32,11 @@ If, when an output file for a result has a nonrecoverable error
 or the file is present but has invalid contents),
 then it must set the result's outcome (in memory, not database)
 to VALIDATE_ERROR.
-Note: the function try_fopen() (in lib/util.C) can be used
-to distinguish recoverable and nonrecoverable file-open errors.
+<p>
+Note: use BOINC's
+<a href=backend_util.php>back-end utility functions</a>
+to get file pathnames
+and to distinguish recoverable and nonrecoverable file-open errors.
 <li>
 If a canonical result is found, check_set() must set the
 validate_state field of each non-ERROR result
@ -75,7 +79,7 @@ This tells the validator to write an error message and exit.
 </ul>

 <p>
-Neither function should delete files.
+Neither function should delete files or access the BOINC database.
 <p>
 A more detailed description is <a href=validate_logic.txt>here</a>.
 <p>
@ -90,46 +94,7 @@ and regards results as equivalent only if they agree byte for byte.
 regards any two results as equivalent if their CPU time
 exceeds a given minimum.
 </ul>
-<p>
-<b>validate_util.C</b> contains support functions for both of the above.

-<hr>
-<b>NOTE: the above code assumes that each result
-has a single output file.
-Revisions will be needed to handle multiple output files.
-To do this you will need to know the following:
-</b>
-
-<p>
-The database field 'result.xml_doc_out'
-describes a result's output files.
-It has the form
-<pre>
-",htmlspecialchars("
-<file_info>...</file_info>
-[ ... ]
-<result>
-    <name>foobar</name>
-    <wu_name>blah</wu_name>
-    <exit_status>blah</exit_status>
-    <file_ref>...</file_ref>
-    [ ... ]
-</result>
-"),"
-</pre>
-The components are:
-<ul>
-<li> The <b>&lt;name></b> element is the result name.
-<li> The <b>&lt;wu_name></b> element is the workunit name.
-<li> Each <b>&lt;file_ref></b> element is an association to an output file,
-described by a corresponding <b>&lt;file_info></b> element.
-</ul>
-<p>
-The XML document describing the sizes and checksums of the output
-files is a list of <b>&lt;file_info></b> elements,
-with the <b>nbytes</b> and <b>md5_cksum</b> fields present.
-The project back end
-must parse this field to find the locations and checksums of output files.
 ";
 page_tail();
 ?>
--- a/sched/assimilate_handler.h
+++ b/sched/assimilate_handler.h
@ -1,4 +1,4 @@
 #include <vector>
 #include "boinc_db.h"

-extern void assimilate_handler(WORKUNIT&, std::vector<RESULT>&, RESULT&);
+extern int assimilate_handler(WORKUNIT&, std::vector<RESULT>&, RESULT&);
--- a/sched/assimilator.C
+++ b/sched/assimilator.C
@ -84,7 +84,12 @@ bool do_pass(APP& app) {
            }
        }

-        assimilate_handler(wu, results, canonical_result);
+        retval = assimilate_handler(wu, results, canonical_result);
+        if (retval) {
+            log_messages.printf(SCHED_MSG_LOG::CRITICAL,
+                "[%s] handler returned error %d; exiting\n", wu.name, retval
+            );
+        }

        if (update_db) {
            sprintf(
--- a/sched/assimilator_placeholder.C
+++ b/sched/assimilator_placeholder.C
@ -35,7 +35,7 @@

 using std::vector;

-void assimilate_handler(
+int assimilate_handler(
    WORKUNIT& wu, vector<RESULT>& results, RESULT& canonical_result
 ) {
    SCOPE_MSG_LOG scope_messages(log_messages, SCHED_MSG_LOG::NORMAL);
@ -61,6 +61,7 @@ void assimilate_handler(
    if (wu.error_mask&WU_ERROR_TOO_MANY_SUCCESS_RESULTS) {
        log_messages.printf(SCHED_MSG_LOG::CRITICAL, "[%s] Error: too many success results\n", wu.name);
    }
+    return 0;
 }

 #ifdef __GNUC__
--- a/sched/file_deleter.C
+++ b/sched/file_deleter.C
@ -63,7 +63,7 @@ int wu_delete_files(WORKUNIT& wu) {
                retval = dir_hier_path(
                    filename, config.download_dir, config.uldl_dir_fanout,
                    pathname
-                    );
+                );
                if (retval) {
                    log_messages.printf(SCHED_MSG_LOG::CRITICAL, "[%s] dir_hier_path: %d\n", wu.name, retval);
                } else {
@ -147,8 +147,9 @@ bool do_pass() {
    while (!wu.enumerate(buf)) {
        did_something = true;
        
-        if (!preserve_wu_files)
+        if (!preserve_wu_files) {
            wu_delete_files(wu);
+        }
        wu.file_delete_state = FILE_DELETE_DONE;
        sprintf(buf, "file_delete_state=%d", wu.file_delete_state);
        retval= wu.update_field(buf);
@ -163,8 +164,9 @@ bool do_pass() {
    sprintf(buf, "where file_delete_state=%d limit 1000", FILE_DELETE_READY);
    while (!result.enumerate(buf)) {
        did_something = true;
-        if (!preserve_result_files)
+        if (!preserve_result_files) {
            result_delete_files(result);
+        }
        result.file_delete_state = FILE_DELETE_DONE;
        sprintf(buf, "file_delete_state=%d", result.file_delete_state); 
        retval= result.update_field(buf);
@ -190,18 +192,22 @@ int main(int argc, char** argv) {
        } else if (!strcmp(argv[i], "-one_pass")) {
            one_pass = true;
        } else if (!strcmp(argv[i], "-preserve_wu_files")) {
-            // This option is primarily for testing.  If enabled, the
-            // file_deleter will function 'normally' and will update
-            // the database, but will not actually delete the workunit
-            // input files.  It's equivalent to setting <no_delete/>
-            // [undocumented] for all workunit input files.
+            // This option is primarily for testing.
+            // If enabled, the file_deleter will function 'normally'
+            // and will update the database,
+            // but will not actually delete the workunit input files.
+            // It's equivalent to setting <no_delete/>
+            // for all workunit input files.
+            //
            preserve_wu_files = true;
        } else if (!strcmp(argv[i], "-preserve_result_files")) {
-            // This option is primarily for testing.  If enabled, the
-            // file_deleter will function 'normally' and will update
-            // the database but will not actually delete the result
-            // output files. It's equivalent to setting <no_delete/>
-            // [undocumented] for all result output files.
+            // This option is primarily for testing.
+            // If enabled, the file_deleter will function 'normally'
+            // and will update the database,
+            // but will not actually delete the result output files.
+            // It's equivalent to setting <no_delete/>
+            // for all result output files.
+            //
            preserve_result_files = true;
        } else if (!strcmp(argv[i], "-d")) {
            log_messages.set_debug_level(atoi(argv[++i]));
--- a/sched/sample_dummy_assimilator.C
+++ b/sched/sample_dummy_assimilator.C
@ -28,7 +28,7 @@

 using std::vector;

-void assimilate_handler(
+int assimilate_handler(
    WORKUNIT& wu, vector<RESULT>& results, RESULT& canonical_result
 ) {
    SCOPE_MSG_LOG scope_messages(log_messages, SCHED_MSG_LOG::NORMAL);
@ -54,6 +54,7 @@ void assimilate_handler(
    if (wu.error_mask&WU_ERROR_TOO_MANY_SUCCESS_RESULTS) {
        log_messages.printf(SCHED_MSG_LOG::CRITICAL, "[%s] Error: too many success results\n", wu.name);
    }
+    return 0;
 }

 #ifdef __GNUC__
--- a/sched/sched_util.C
+++ b/sched/sched_util.C
@ -93,7 +93,7 @@ int try_fopen(char* path, FILE*& f, char* mode) {

    f = fopen(path, mode);
    if (!f) {
-	memset(dirpath, '\0', sizeof(dirpath));
+        memset(dirpath, '\0', sizeof(dirpath));
        p = strrchr(path, '/');
        if (p) {
            strncpy(dirpath, path, (int)(p-path));
--- a/sched/validate_util.C
+++ b/sched/validate_util.C
@ -142,7 +142,8 @@ int generic_check_set(
            log_messages.printf(
                SCHED_MSG_LOG::CRITICAL,
                "generic_check_set: init_result([RESULT#%d %s]) failed\n",
-                results[i].id, results[i].name);
+                results[i].id, results[i].name
+            );
            goto cleanup;
        }
    }
@ -161,7 +162,8 @@ int generic_check_set(
                log_messages.printf(
                    SCHED_MSG_LOG::CRITICAL,
                    "generic_check_set: check_pair_with_data([RESULT#%d %s], [RESULT#%d %s]) failed\n",
-                    results[i].id, results[i].name, results[j].id, results[j].name);
+                    results[i].id, results[i].name, results[j].id, results[j].name
+                );
            } else if (match) {
                ++neq;
                matches[j] = true;
@ -197,7 +199,8 @@ int generic_check_set_majority(
    return generic_check_set(
        results, canonicalid, credit,
        init_result_f, check_pair_with_data_f, cleanup_result_f,
-        results.size() / 2);
+        results.size() / 2
+    );
 }

 int generic_check_pair(
@ -227,7 +230,7 @@ int generic_check_pair(
            SCHED_MSG_LOG::CRITICAL,
            "[RESULT#%d %s] [RESULT#%d %s] Couldn't initialize result 2\n",
            r1.id, r1.name, r2.id, r2.name
-            );
+        );
        cleanup_result_f(r1, data1);
        return retval;
    }