acct mgr fix

svn path=/trunk/boinc/; revision=9587
2006-03-02 07:17:18 +00:00 · 2006-03-02 07:17:18 +00:00 · 5d07c6378e
parent 776e976c6d
commit 5d07c6378e
12 changed files with 230 additions and 97 deletions
--- a/9
+++ b/9
@ -2478,3 +2478,12 @@ Rom    1 Mar 2006
 Rom    1 Mar 2006 (HEAD)
    - Tag for 5.3.23 release, all platforms
      boinc_core_release_5_3_23
+
+David  1 Mar 2006
+    - Core client: don't lose <send_gui_rpc_info/> tag from acct mgr URL file
+        when make acct mgr RPC
+    - Core client: make RAM size measurement work on HPUX > 4GB
+
+    client/
+        acct_mgr.C
+        hostinfo_unix.C
--- a/client/acct_mgr.C
+++ b/client/acct_mgr.C
@ -95,10 +95,10 @@ int ACCT_MGR_OP::do_rpc(
        gstate.core_client_release,
        run_mode_name[gstate.user_run_request]
    );
-    if (strlen(ami.previous_host_cpid)) {
+    if (strlen(gstate.acct_mgr_info.previous_host_cpid)) {
        fprintf(f,
            "   <previous_host_cpid>%s</previous_host_cpid>\n",
-            ami.previous_host_cpid
+            gstate.acct_mgr_info.previous_host_cpid
        );
    }
    if (gstate.acct_mgr_info.send_gui_rpc_info) {
@ -284,7 +284,8 @@ void ACCT_MGR_OP::handle_reply(int http_op_retval) {
    }

    if (sig_ok) {
-        gstate.acct_mgr_info = ami;
+        strcpy(gstate.acct_mgr_info.acct_mgr_name, ami.acct_mgr_name);
+        strcpy(gstate.acct_mgr_info.signing_key, ami.signing_key);

        // attach to new projects
        //
@ -318,7 +319,7 @@ void ACCT_MGR_OP::handle_reply(int http_op_retval) {
        }
    }

-    strcpy(ami.previous_host_cpid, gstate.host_info.host_cpid);
+    strcpy(gstate.acct_mgr_info.previous_host_cpid, gstate.host_info.host_cpid);
    if (repeat_sec) {
        gstate.acct_mgr_info.next_rpc_time = gstate.now + repeat_sec;
    } else {
@ -340,7 +341,7 @@ int ACCT_MGR_INFO::write_info() {
                acct_mgr_name,
                acct_mgr_url
            );
-            if (send_gui_rpc_info) fprintf(p,"   <send_gui_rpc_info/>\n");
+            if (send_gui_rpc_info) fprintf(p,"    <send_gui_rpc_info/>\n");
            if (strlen(signing_key)) {
                fprintf(p, 
                    "    <signing_key>\n%s</signing_key>\n",
@ -405,9 +406,7 @@ int ACCT_MGR_INFO::init() {
        if (match_tag(buf, "</acct_mgr>")) break;
        else if (parse_str(buf, "<name>", acct_mgr_name, 256)) continue;
        else if (parse_str(buf, "<url>", acct_mgr_url, 256)) continue;
-        else if (match_tag(buf, "<send_gui_rpc_info/>")) {
-            send_gui_rpc_info = true;
-        }
+        else if (parse_bool(buf, "send_gui_rpc_info", send_gui_rpc_info)) continue;
        else if (match_tag(buf, "<signing_key>")) {
            retval = copy_element_contents(
                p,
--- a/client/hostinfo_unix.C
+++ b/client/hostinfo_unix.C
@ -467,7 +467,7 @@ int HOST_INFO::get_host_info() {
 #elif defined(_HPUX_SOURCE)
    struct pst_static pst; 
    pstat_getstatic(&pst, sizeof(pst), (size_t)1, 0);
-    m_nbytes = (double)pst.physical_memory * (double)pst.page_size;
+    m_nbytes = (long double)pst.physical_memory * (long double)pst.page_size;
 #elif defined(__osf__)
    // Tru64 UNIX.
    // 2005-12-26 SMS.
--- a/doc/boinc_news.inc
+++ b/doc/boinc_news.inc
@ -1,6 +1,14 @@
 <?

 $project_news = array(
+array("February 24, 2006",
+    "A significant step has been made towards integrating
+    BOINC and Grid software:
+    beginning with Condor version 6.7.17,
+    Condor can be configured to run BOINC when it has no other work to perform.
+    This capability is described in <a href=http://www.cs.wisc.edu/condor/manual/v6.7/3_13Setting_Up.html#SECTION004138000000000000000>the Condor manual</a>.
+    "
+),
 array("February 22, 2006",
    "BOINC announces its
    <a href=mac_intel.php>support for the Macintosh/Intel platform</a>.
--- a/doc/contact.php
+++ b/doc/contact.php
@ -83,8 +83,8 @@ Each area is managed by an 'owner'.
 </tr>
 <tr>
    <td>Wiki-based documentation</td>
+    <td>Christopher Malton and Rytis Slatkevicius</td>
    <td>Paul Buck</td>
-    <td><br></td>
 </tr>
 <tr>
    <td>Windows installer and screensaver</td>
--- a/doc/email_lists.php
+++ b/doc/email_lists.php
@ -19,7 +19,7 @@ Help for SETI@home is available on the
 <a href=http://setiathome.berkeley.edu/forum_help_desk.php>SETI@home message boards</a>
 and help for BOINC is available at the
 <a href=dev/>BOINC message boards</a> or the
-<a href=http://boinc-doc.net/boinc-wiki/>BOINC Wiki</a>.
+<a href=http://boinc-wiki.ath.cx/>BOINC Wiki</a>.

 </font></b>
 ";
--- a/doc/index.php
+++ b/doc/index.php
@ -30,7 +30,7 @@ resources.
 <td valign=top>
 <table width=100% border=0 cellspacing=0 cellpadding=8>
  <tr><td bgcolor=$light_blue>
-    <h2>&nbsp;Donate computing power</h2>
+    <h2>&nbsp;Participate</h2>
  </td></tr>
  <tr><td>
    <p>
@ -63,18 +63,18 @@ resources.
    </ul>
    <p>
    You can participate in more than one project,
-    and you control the fraction of your computing power
-    that goes to each project.
+    and you control how much of your computing power
+    goes to each project.
    If you participate in several projects,
    your computer will be kept busy even when one project has no work.
    <p>
    To participate:
-    <ul>
+    <ol>
    <li> <a href=projects.php>Select projects</a>
    <li> <a href=download.php>Download</a> and run BOINC software
    <li> Enter the project's URL (visit the project's web site,
    and copy the URL from your browser's address field).
-    </ul>
+    </ol>
    <center>
    <a href=participate.php><b>More info</a> 
    | <a href=download.php><b>Download</a>
@ -86,12 +86,20 @@ resources.
  </td></tr>
  <tr><td bgcolor=$light_blue><h2>Create a volunteer computing project</h2></td></tr>
  <tr><td>
-    Scientists with computationally-intensive tasks
-    may be able to use BOINC.
+    If you are a scientist with a computationally-intensive task,
+    you may be able to use BOINC.
    A BOINC project with a single Linux server
    can provide computing power equivalent
    to a cluster with tens of thousands of CPUs.
-    <p><a href=create_project.php>More info</a> 
+    <p>
+    If you lack the resources (manpower, server capacity,
+    or network bandwidth) to operate a BOINC project directly,
+    organizations such as World Community Grid may be able
+    to assist you.
+    Please <a href=contact.php>contact us</a> for information.
+    <center>
+    <p><b><a href=create_project.php>More info</a> 
+    </center>
  </td></tr>
  <tr><td bgcolor=$light_blue><h2>Software</h2></td></tr>
  <tr><td>
--- a/doc/links.php
+++ b/doc/links.php
@ -9,6 +9,7 @@ function show_link($language, $name, $url) {

 function language($lang, $sites) {
    echo "<tr><td bgcolor=eeeeee valign=top>$lang</td><td>\n";
+    shuffle($sites);
    foreach ($sites as $s) {
        echo "$s<br>\n";
    }
@ -27,7 +28,7 @@ echo "
 User-editable
 sites with information and documentation about BOINC.
 <ul>
-<li> <a href=http://boinc-doc.net/boinc-wiki/>The Unofficial BOINC Wiki</a> (in English)
+<li> <a href=http://boinc-wiki.ath.cx/>The Unofficial BOINC Wiki</a> (in English)
 <li> <a href=http://www.seti-argentina.com.ar/instrucciones-boinc-manager>BOINC Argentina</a> (in Spanish)
 <li> <a href=http://faq.boinc.de/>Deutsche BOINC FAQ</a> (in German)
 <li> <a href=http://www.boincfrance.org/wakka.php?wiki=BienVenue>BOINCFrance.org</a> (in French)
@ -77,6 +78,7 @@ language("Dutch", array(
    site("http://www.boinc.be", "www.boinc.be"),
 ));
 language("English", array(
+    site("http://sirans-boincnews.com/", "Siran's BOINC Projects News Site"),
    site("http://www.ukboincteam.org.uk/", "UK BOINC Team"),
    site("http://symbion.madnezz.com/", "Symbion"),
    site("http://scotlandsseti.blogspot.com/", "Megacruncher's Blog"),
--- a/doc/participate.php
+++ b/doc/participate.php
@ -27,6 +27,7 @@ echo "
    <br>Run BOINC on uncommon platforms,
        or inspect the source code before you run it.
 <li> <a href=acct_mgrs.php>Account managers</a>
+<li> <a href=http://www.cs.wisc.edu/condor/manual/v6.7/3_13Setting_Up.html#SECTION004138000000000000000>BOINC as a Condor backfill job</a>
 </ul>
 <p>
 <h3>Web sites</h3>
--- a/doc/projects.php
+++ b/doc/projects.php
@ -16,7 +16,7 @@ have no control over the creation of BOINC-based projects,
 and in general do not endorse them.
 The BOINC web sites lists some, but not all, projects.
 A more complete list is on the
-<a href=http://boinc-doc.net/boinc-wiki/index.php?title=Catalog_of_BOINC_Powered_Projects>BOINC Wiki</a>.
+<a href=http://boinc-wiki.ath.cx/index.php?title=Catalog_of_BOINC_Powered_Projects>BOINC Wiki</a>.

 <p>
 When you participate in a project,
--- a/doc/sched.php
+++ b/doc/sched.php
@ -80,43 +80,54 @@ This screws up the scheduler.


 <h3>Result states</h3>
-A result is <b>runnable</b> if
+R is <b>runnable</b> if
 <ul>
-<li> Neither it nor its project is suspended, and
-<li> its files have been downloaded, and
-<li> it hasn't finished computing
+<li> Neither R nor R.project is suspended, and
+<li> R's files have been downloaded, and
+<li> R hasn't finished computing
 </ul>
-A result is <b>runnable soon</b> if
-<ul>
-<li> Neither it nor its project is suspended, and
-<li> it hasn't finished computing
-</ul>
-

 <h3>Project states</h3>
-A project is <b>runnable</b> if
+P is <b>runnable</b> if
 <ul>
-<li> it has at least one runnable result
+<li> P has at least one runnable result
+(this implies that P is not suspended).
 </ul>

-A project is <b>downloading</b> if
+P is <b>downloading</b> if
 <ul>
-<li> It's not suspended, and
-<li> it has at least one result whose files are being downloaded
+<li> P is not suspended, and
+<li> P has at least one result whose files are being downloaded
+and none of the downloads is deferred.
 </ul>

-A project is <b>contactable</b> if
+P is <b>fetchable</b>
+(i.e. the work-fetch policy allows work to be fetched from it) if
 <ul>
-<li> It's not suspended, and
-<li> its master file has already been fetched, and
-<li> it's not deferred (i.e. its minimum RPC time is in the past), and
-<li> it's no-new-work flag is not set
+<li> P is not suspended, and
+<li> P is not deferred (i.e. its minimum RPC time is in the past), and
+<li> P's no-new-work flag is not set, and
+<li> P is not overworked (see definition below), and
+<li> a fetch of P's master file is not pending
 </ul>

-A project is <b>potentially runnable</b> if
+P is <b>latency-limited</b> if
 <ul>
-<li> It's either runnable, downloading, or contactable.
+<li> The client's last scheduler RPC to P returned
+a 'no work because of deadlines' flag, and
+<li> the RPC reply's delay request has not yet elapsed.
 </ul>
+This means that P has work available,
+but didn't send any because the work's deadlines couldn't be met
+given the existing work queue.
+<p>
+P is <b>potentially runnable</b> if
+<ul>
+<li> P is either runnable, downloading, fetchable, overworked,
+or latency-limited.
+</ul>
+This means that, to the best of the client's knowledge,
+it could do work for P if it wanted to.

 <h3>Debt</h3>
 Intuitively, a project's 'debt' is how much work is owed to it,
@ -134,20 +145,21 @@ In each case, the debt is recalculated periodically as follows:
   <li> P.debt += W - P.wall_cpu_time (i.e. what P should have gotten
           minus what it got).
    </ul>
-<li> P.debt is normalized (e.g. so that the mean or minimum is zero).
+<li> P.debt is normalized so that the mean or minimum is zero.
 </ul>


 <b>Short-term debt</b> is used by the CPU scheduler.
 It is adjusted over the set of runnable projects.
 It is normalized so that minimum short-term debt is zero,
-and maximum short-term debt is no greater than 86400 (i.e. one day).
+and maximum short-term debt is no greater than 86,400 (i.e. one day).

 <p>
 <b>Long-term debt</b> is used by the work-fetch policy.
 It is defined for all projects,
 and adjusted over the set of potentially runnable projects.
-It is normalized so that average long-term debt is zero.
+It is normalized so that average long-term debt,
+over all project, is zero.

 <h2>CPU scheduling policy</h2>

@ -157,8 +169,9 @@ for results that are in danger of missing their deadline,
 and weighted round-robin among other projects if additional CPUs exist.
 This allows the client to meet deadlines that would otherwise be missed,
 while honoring resource shares over the long term.
-The scheduler uses the following data, which are obtained
-by a simulation of round-robin scheduling applied to the current work queue:
+The scheduler uses the following data,
+which are obtained by doing a simulation of round-robin scheduling
+applied to the current work queue:
 <ul>
 <li> deadline_missed(R): whether result R would miss
 its deadline with round-robin scheduling.
@ -168,23 +181,21 @@ be missed with round-robin scheduling.
 </ul>
 The scheduling policy is:
 <ol>
-<li> Find the project P for which deadlines_missed(P)>0,
-and whose earliest deadline is earliest.
-<li> Schedule deadline_missed(P)
-results of P in order of increasing deadline,
-with preference to those already running.
-<li> If there are more CPUs, and other projects for which
-deadlines_missed(P)>0, go to 1.
-<li> If all CPUs are scheduled, stop;
-otherwise continue to the next step, considering
-only projects with deadlines_missed==0.
+<li> Find the result R (not scheduled yet)
+for which deadline_missed(R)
+and whose deadline is earliest.
+Tiebreaker: preference to result already running.
+<li> If such an R exists, schedule R
+<li> If there are more CPUs, and unscheduled results
+deadline_missed(R), go to 1.
+<li> If all CPUs are scheduled, stop.
 <li> Set the 'anticipated debt' of each project to its short-term debt
 <li> Find the project P with the greatest anticipated debt,
 select one of P's runnable results
 (picking one that is already running, if possible)
 and schedule that result.
 <li> Decrement P's anticipated debt by the 'expected payoff'
-    (the total wall CPU in the last period divided by NCPUS).
+(the scheduling period divided by NCPUS).
 <li> Repeat steps 6 and 7 for additional CPUs
 </ol>

@ -196,20 +207,50 @@ or when the user performs a UI interaction
 (e.g. suspending or resuming a project or result).

 <p>
-The CPU scheduler produces a list of results to run,
-but they are not necessarily run immediately;
-the enforcement of the schedule is done asynchronously.
-A currently
+The CPU scheduler decides what result should run,
+but it doesn't enforce this decision
+(by preempting, resuming and starting applications).
+This enforcement is done by a separate function,
+which runs periodically, and is also called by
+the CPU scheduler at its conclusion.
+The following rules apply to application preemption:
+<ul>
+<li> If the 'leave in memory' preference is not set,
+an application scheduled for preemption is allowed to run for
+up to sched_interval/2 additional seconds, or until it checkpoints.
+<li>
+The above does not apply for application being preempted
+to run a result R for which deadline_missed(R).
+<li> If an application has never checkpointed,
+it is always left in memory on preemption.
+</ul>
+

 <h2>Work-fetch policy</h2>

+<p>
+When a result runs in EDF mode,
+its project may get more than its share of CPU time.
+The work-fetch policy is responsible for
+ensuring that this doesn't happen repeatedly.
+It does this by suppressing work fetch for the project.
+<p>
+A project P is <b>overworked</b> if
+<ul>
+<li> P.long_term_debt < -sched_period
+</ul>
+<p>
+This condition occurs if P's results run in EDF mode
+(and in extreme cases, when a project with large negative LTD
+is detached).
+
 <p>
 The work-fetch policy uses the functions
 <pre>
-prrs(project P)
+frs(project P)
 </pre>
 <blockquote>
-P's fractional resource share among potentially runnable projects.
+P's fractional resource share among fetchable projects.
 </blockquote>

 <pre>
@ -219,41 +260,43 @@ min_results(project P)
 The minimum number of runnable results needed to
 maintain P's resource share on this machine: namely,
 <br>
-ceil(ncpus*prrs(P))
+ceil(ncpus*frs(P))
 </blockquote>
+
 <pre>
 time_until_work_done(project P)
 </pre>
 <blockquote>
 The estimated wall time until the number of
 uncompleted results for this project will reach min_results(P)-1,
-assuming round-robin scheduling among
-the current potentially runnable projects.
+assuming round-robin scheduling among currently fetchable projects.
+</blockquote>
+
+<pre>
+time_until_free_cpu()
+</pre>
+<blockquote>
+The estimated wall time until there is a free CPU,
+assuming round-robin scheduling among currently fetchable projects.
 </blockquote>
 <p>
-The work-fetch policy function is called every 5 seconds
+The work-fetch policy function is called every few minutes
 (or as needed) by the scheduler RPC polling function.
+It sets the variable <b>work_request_size(P)</b> for each project P,
+which is the number of seconds of work to request
+if we do a scheduler RPC to P.
+This is computed as follows:
+<pre>
+    if P is suspended, deferred, overworked, or no-new-work
+        return 0
+    if time_until_work_done(P) > min_queue or CPU scheduler scheduled all CPUs by EDF
+        if time_until_cpu_free() < min_queue
+            return 1
+        else
+            return 0
+    y = estimated wall time of P's queued work
+    return max(1, (min_queue*ncpus*frs(P)) - y)
 </pre>
-It sets the following variable for each project P:
-<p>
-    <b>work_request_size(P)</b>:
-    the number of seconds of work to request if we do a scheduler RPC to P.
-    This is
-    <ul>
-    <li>
-    0 if P is suspended, deferred, or no-new-work
-    <li>
-    0 if time_until_work_done(P) > min_queue
-    <li>
-    0 if CPU scheduler scheduled all CPUs by EDF
-    <li>
-    0 if P.long_term_debt < -(CPU sched interval)
-        (project has been using more than its share of CPU;
-         e.g. repeatedly doing short-deadline results).
-    <li>
-    otherwise:
-    (min_queue*ncpus*prrs(P)) - (estimated wall time of queued work)
-    </ul>

 <p>
 The scheduler RPC mechanism may select a project to contact
@ -267,16 +310,66 @@ P.long_term_debt - time_until_work_done(P) is greatest
 </pre>
 and gets work from that project.
 <hr>
-Scheduler request must include:
-whether sporadic network connection (period);
-whether in EDF mode.
-Resource fractions?
+<h2>Scheduler work-send policy</h2>
 <p>
-Scheduler reply should include (it no work sent)
-if reason is because we're overloaded.
+NOTE: the following has not been implemented,
+and is independent of the above policies.
 <p>
-If last response was 'no work', don't accumulate LTD.
+The scheduler should avoid sending results whose
+deadlines are likely to be missed,
+or which are likely to cause existing results to miss their deadlines.
+This will be accomplished as follows:
+<ul>
+<li>
+Scheduler requests includes connection period,
+list of queued result (with estimated time remaining and deadline)
+and project resource fractions.
+<li>
+The scheduler won't send results whose deadlines are less than
+now + min_queue.
+<li>
+The scheduler does an EDF simulation of the initial workload
+to see what results meet their deadline.
+For each result R being considered for sending,
+the scheduler does an EDF simulation.
+If R meets its deadline,
+all results that missed their deadlines do so by
+no more than they did previously,
+and all results that originally met their deadline still do,
+R is sent.
+<li>
+If the scheduler has work but doesn't send any because of deadline misses,
+it returns a 'no work because of deadlines' flag.
+If the last RPC to a project returned this flag,
+it is marked as latency-limited and accumulates LTD.
+</ul>
+<hr>
+<h2>Describing scenarios</h2>
+<p>
+We encourage the use of the following notation for
+describing scheduling scenarios
+(times are given in hours):
+<p>
+P(C, D, R)
+<p>
+This describes a project with
+<ul>
+<li> C = CPU time per task
+<li> D = delay bound
+<li> R = fractional resource share
+</ul>

+A scenario is described by a list of project,
+plus the following optional parameters:
+<ul>
+<li> NCPUS: number of CPUS (default 1)
+<li> min_queue
+</ul>
+
+Hence a typical scenario description is:
+<pre>
+(P1(1000, 2000, .5), P2(1, 10, .5), NCPUS=4)
+</pre>
 ";
 page_tail();
 ?>
--- a/doc/test_matrix.php
+++ b/doc/test_matrix.php
@ -129,7 +129,7 @@ everyone else should connect to that process.
    (log out and in, and check the Message tab).
 <li> Verify that users other than
    admin and installing users
-    can run the Manager and connect to the running core client.
+    can't run the Manager and connect to the running core client.
 </ul>
 <h3>Tests for modem-connected computers</h3>
 <ul>
@ -211,6 +211,19 @@ run screensaver
 <li> Try to connect to core client
 with bad password, from host not on list, etc.

+</ul>
+
+<h3>Account manager functions</h3>
+<ul>
+<li> Attach to and detach from an account manager
+such as GridRepublic or BOINCStats.
+<li> Verify that the core client periodically synchronizes with
+the account manager.
+<li> Verify that the core client won't let you detach from
+a project that you attached via the account manager.
+<li> NOTE: If you find bugs in the account manager itself,
+report them directly to the account manager admins,
+not to BOINC Alpha.
 </ul>
 ";
 ?>