statefile atomic write

svn path=/trunk/boinc/; revision=9465
2006-02-14 21:35:26 +00:00 · 2006-02-14 21:35:26 +00:00 · ce89af8702
parent 15044d5d06
commit ce89af8702
5 changed files with 82 additions and 102 deletions
--- a/7
+++ b/7
@ -1806,3 +1806,10 @@ David  14 Feb 2006
 	client/
 		acct_setup.C,h
 		gui_http.C,h
 David  14 Feb 2006
    - core client: on Windows, use commit mode ("wc") for state file
        (from Walt Gribben)
    client
        cs_statefile.C
--- a/client/cs_statefile.C
+++ b/client/cs_statefile.C
@ -349,7 +349,11 @@ int CLIENT_STATE::write_state_file() {
    SCOPE_MSG_LOG scope_messages(log_messages, CLIENT_MSG_LOG::DEBUG_STATE);
    scope_messages.printf("CLIENT_STATE::write_state_file(): Writing state file\n");
 #ifdef _WIN32
    retval = mf.open(STATE_FILE_NEXT, "wc");
 #else
    retval = mf.open(STATE_FILE_NEXT, "w");
 #endif
    if (retval) {
        msg_printf(0, MSG_ERROR,
            "Can't open temporary state file: %s %s",
--- a/doc/boinc_news.inc
+++ b/doc/boinc_news.inc
@ -1,6 +1,25 @@
 <?
 $project_news = array(
 array("February 14, 2006",
    "BOINC and Climateprediction.net have joined forces with the BBC
    to launch a new experiment -- a full simulation of climate change
    from 1920 to 2080.
    The experiment is described on
    a BBC television documentary <b>Meltdown</b>
    (BBC-4, February 20th, for UK BOINCers).
    Simplified client software is available
    at <a href=http://bbc.co.uk/climatechange>bbc.co.uk/climatechange</a>.
    Experienced BOINCers can attach to http://bbc.cpdn.org
    (check out the new graphics!).
    Be warned that a 160-year climate simulation is a long workunit even
    by Climateprediction.net's extravagant standards.
    But if you have a fast machine and are prepared to give
    us first call on your CPU, just for the next couple of months,
    then you could see your results in a second BBC television programme
    scheduled for May.
    "
 ),
 array("February 8, 2006",
    "The <a href=http://szdg.lpds.sztaki.hu/szdg/>SZTAKI Desktop Grid</a>
    is now in production.
--- a/doc/index.php
+++ b/doc/index.php
@ -37,28 +37,29 @@ resources.
    BOINC lets you donate computing power
    to scientific research projects such as:
    <ul>
-    <li> <a href=http://climateprediction.net>Climateprediction.net</a>:
+    <li> <a href=http://climateprediction.net>Climateprediction.net</a>
-        study climate change
+        and <a href=http://bbc.cpdn.org>BBC Climate Change Experiment</a>:
        study climate change.
    <li> <a href=http://einstein.phys.uwm.edu/>Einstein@home</a>:
-        search for gravitational signals emitted by pulsars
+        search for gravitational signals emitted by pulsars.
    <li> <a href=http://lhcathome.cern.ch/>LHC@home</a>:
        improve the design of the CERN LHC particle accelerator
    <li> <a href=http://predictor.scripps.edu>Predictor@home</a>:
-        investigate protein-related diseases
+        investigate protein-related diseases.
    <li> <a href=http://boinc.bakerlab.org/rosetta/>Rosetta@home<a>:
-        help researchers develop cures for human diseases
+        help researchers develop cures for human diseases.
    <li> <a href=http://setiathome.berkeley.edu/>SETI@home</a>:
-        Look for radio evidence of extraterrestrial life
+        Look for radio evidence of extraterrestrial life.
    <li> <a href=http://boinc.bio.wzw.tum.de/boincsimap/>SIMAP</a>:
        calculate protein similarity data for use by many biological
        research projects.
    <li> <a href=http://szdg.lpds.sztaki.hu/szdg/>SZTAKI Desktop Grid</a>:
-        search for generalized binary number systems.
+        search for generalized binary number systems..
    <li> <a href=http://www.worldcommunitygrid.org/>World Community Grid</a>:
-        advance our knowledge of human disease. (Requires 5.2.1 or greater)
+        advance our knowledge of human disease (requires 5.2.1 or greater).
    <li> <a href=http://www.cellcomputing.net/>Cell Computing</a>
        biomedical research
-        (Japanese; requires nonstandard client software)
+        (Japanese; requires nonstandard client software).
    </ul>
    <p>
    You can participate in more than one project,
--- a/doc/sched.php
+++ b/doc/sched.php
@ -198,115 +198,64 @@ applied to the current work queue.
 <h2>Work-fetch policy</h2>
 <p>
-The work-fetch policy is defined in terms of a basic function
+The work-fetch policy uses the functions
 <pre>
-time_until_work_done(project, N, subset_resource_share)
+prrs(project P)
    // estimate wall time until the number of uncompleted results
    // for this project will reach N,
    // given the total resource share for a set of competing projects
 </pre>
 <blockquote>
 P's fractional resource share among potentially runnable projects.
 </blockquote>
 <pre>
 min_results(project P)
 </pre>
 <blockquote>
 The minimum number of runnable results needed to
 maintain P's resource share on this machine: namely,
 <br>
 ceil(ncpus*prrs(P))
 </blockquote>
 <pre>
 time_until_work_done(project P)
 </pre>
 <blockquote>
 The estimated wall time until the number of
 uncompleted results for this project will reach min_results(P)-1,
 assuming round-robin scheduling among
 the current potentially runnable projects.
 </blockquote>
 <p>
 The work-fetch policy function is called every 5 seconds
 (or as needed) by the scheduler RPC polling function.
 </pre>
-It sets the following variables:
+It sets the following variable for each project P:
-<ul>
+<p>
-<li> <b>global urgency</b>: one of
+    <b>work_request_size(P)</b>:
    the number of seconds of work to request if we do a scheduler RPC to P.
    This is
    <ul>
-    <li><b>DONT_NEED</b>: CPU scheduler is in EDF mode,
+    <li>
-        or fetching additional work would make it so.
+    0 if P is suspended, deferred, or no-new-work
-    <li><b>OK</b>: we have enough work, but it's OK to get more
+    <li>
-    <li><b>NEED</b>: a CPU will be idle within min_queue
+    0 if time_until_work_done(P) > min_queue
-    <li><b>NEED_IMMEDIATELY</b>: a CPU is idle.
+    <li>
    0 if CPU scheduler is in EDF mode and no CPU is idle
    <li>
    otherwise:
    (min_queue*ncpus*prrs(P)) - (estimated wall time of queued work)
    </ul>
 <li> For each project P
    <br>
    N = ncpus*(relative resource share)
    <br>
    prrs = potentially runnable resource share
    <br>
    X = time_until_work_done(P, N-1, prrs)
    <ul>
    <li><b>project urgency</b>
        <ul>
        <li><b>DONT_NEED</b>: P is suspended or deferred or no-new-work
        <li><b>OK</b>: X > min_queue
        <li><b>NEED</b>: X > 0
        <li><b>NEED_IMMEDIATELY</b>: X == 0
        </ul>
    <li> <b>work request size</b>
    (the number of seconds of work to request,
    if we do a scheduler RPC to this project).
    </ul>
 </ul>
 <p>
 The scheduler RPC mechanism may select a project to contact
 because of a user request, an outstanding trickle-up message,
 or a result that is overdue for reporting.
 If it does so, it will also request work from that project.
-
+Otherwise, the RPC mechanism chooses the project P for which
 <p>
 Otherwise, the RPC mechanism calls the following function and
 gets work from that project, if any.
 <pre>
-next_project_need_work()
+P.work_request_size>0 and
-    if global_urgency == DONT_NEED return null
+P.long_term_debt - time_until_work_done(P) is greatest
    Pbest = null;
    for each project P
        if P.urgency != DONT_NEED and P.work_request_size > 0
        if P.urgency == OK and global_urgency == OK
            continue
        P.score = P.long_term_debt - time_until_work_done(P, 0, prrs)
        if Pbest
            if P.score > Pbest.score
                Pbest = P
        else
            Pbest = p
    return Pbest
 </pre>
 <p>
 The work-fetch policy function is as follows:
 <pre>
 // compute global urgency
 x = delay until number of runnable results will be < ncpus
 if x == 0
    global_urgency = NEED_IMMEDIATELY
 else
    if CPU scheduling mode is EDF
        global_urgency = DONT_NEED
    else
        P = project with greatest long-term debt
        suppose we got work from P
        if round-robin would then miss a deadline
            global_urgency = DONT_NEED
        else
            if x &lt; min_queue
                global_urgency = NEED
            else
                global_urgency = OK
 // compute per-project urgencies and work request sizes
 if global_urgency != DONT_NEED
    for each project P
        N = ncpus/(fractional potentially runnable resource_share)
            (i.e. number of results we need on hand to
            keep enough CPUs busy to maintain resource share)
        x = time until # of runnable results for P will fall below N
        if x == 0
            P.urgency = NEED_IMMEDIATELY
        else if x < min_queue
            P.urgency = NEED
            P.work_request_size = min_queue - x
        else if global_urgency > OK
            P.urgency = OK
            P.work_request_size = 1
        else
            P.urgency = DONT_NEED
 </pre>
 and gets work from that project.
 ";
 page_tail();