diff --git a/checkin_notes b/checkin_notes index b7c469551f..451cc741a3 100755 --- a/checkin_notes +++ b/checkin_notes @@ -14172,3 +14172,11 @@ Karl 2004-06-27 sched/ main.C +David 27 June 2004 + - add a -mod N I option to the transitioner; + causes the transitioner to handle only WUs for which ID % N == I. + Can use this to parallelize the transitioner, + either on a single multi-CPU machine or across multiple machines + + sched/ + transitioner.C diff --git a/doc/anonymous_platform.php b/doc/anonymous_platform.php index b30b89055f..0b7906418a 100644 --- a/doc/anonymous_platform.php +++ b/doc/anonymous_platform.php @@ -42,7 +42,7 @@ Here's how it works:

CPU scheduling

-

CPU scheduling aims to achieve the following goals -(decreasing priority):

+

The CPU scheduling policy aims to achieve the following goals +(in decreasing priority):

  1. -Maximize CPU utilization +Maximize CPU utilization.
  2. -Respect the resource share allocation for each project. -A project's resource share represents how much computing resources -(CPU time, network bandwith, storage space) a user wants to allocate -to the project relative to the resources allocated to all of the other -projects in which he is participating. The client should respect this -allocation to be faithful to the user. In the case of CPU time, the -result computation scheduling should achieve the expected time shares -over a reasonable time period. +Enforce resource shares. +The ratio of CPU time allocated to projects that have work, +in a typical period of a day or two, +should be approximately the same as the ratio of +the user-specified resource shares. +If a process has no work for some period, +it does no accumulate a 'debt' of work.
  3. Satisfy result deadlines if possible.
  4. -Given a 'minimum variety' parameter MV (seconds), -reschedule CPUs at least once every MV seconds. -The motivation for this goal stems from the potential -orders-of-magnitude differences in expected completion time for -results from different projects. Some projects will have results that -complete in hours, while other projects may have results that take -months to complete. A scheduler that runs result computations to -completion before starting a new computation will keep projects with -short-running result computations stuck behind projects with -long-running result computations. A participant in multiple projects -will expect to see his computer work on each of these projects in a -reasonable time period, not just the project with the long-running -result computations. +Reschedule CPUs periodically. +This goal stems from the large differences in duration of +results from different projects. +Participant in multiple projects +will expect to see their computers do work on each of these projects in a +reasonable time period.
  5. -Minimize mean time to completion for results. -This means that the number of active result computations for a project should be minimized. +Minimize mean time to completion. For example, it's better to have one result from -project P complete in time T than to have two results from project P +a project complete in time T than to have two results simultaneously complete in time 2T.

A result is 'active' if there is a slot directory for it. -A consequence of result preemption is that there can -be more active results than CPUs. +There can be more active results than CPUs.

Debt

@@ -145,7 +136,7 @@ ready-to-compute result.
 data structures:
 ACTIVE_TASK:
-    double cpu_at_last_schedule_point
+    double period_start_cpu_time
     double current_cpu_time
     scheduler_state:
         PREEMPTED
@@ -155,7 +146,7 @@ PROJECT:
     double work_done_this_period    // temp
     double debt
     double anticipated_debt // temp
-    bool has_runnable_result
+    RESULT next_runnable_result
 
 schedule_cpus():
 
@@ -164,7 +155,7 @@ foreach project P
 
 total_work_done_this_period = 0
 foreach task T that is RUNNING:
-    x = current_cpu_time - T.cpu_at_last_schedule_point
+    x = T.current_cpu_time - T.period_start_cpu_time
     T.project.work_done_this_period += x
     total_work_done_this_period += x
 
@@ -186,7 +177,7 @@ do num_cpus times:
     if none:
         break
     if (some T in P is RUNNING):
-        t.next_scheduler_state = RUNNING
+        T.next_scheduler_state = RUNNING
         P.anticipated_debt -= expected_pay_off
         continue
     if (some T in P is PREEMPTED):
@@ -194,6 +185,7 @@ do num_cpus times:
         P.anticipated_debt -= expected_pay_off
         continue
     if (some R in results is for P, not active, and ready to run):
+        Choose R with the earliest deadline
         T = new ACTIVE_TASK for R
         T.next_scheduler_state = RUNNING
         P.anticipated_debt -= expected_pay_off
@@ -205,7 +197,7 @@ foreach task T
         suspend (or kill)
 
 foreach task T
-    T.cpu_at_last_schedule_point = current_cpu_time
+    T.period_start_cpu_time = T.current_cpu_time
 

Work fetch policy

@@ -215,11 +207,19 @@ The work fetch policy has the following goal:

When to get work

diff --git a/doc/index.php b/doc/index.php index f47bfef094..789a9b78c6 100644 --- a/doc/index.php +++ b/doc/index.php @@ -59,6 +59,7 @@ Help debug and enhance BOINC software. BOINC is supported by the National Science Foundation +through award SCI/0221529.
diff --git a/doc/links.php b/doc/links.php index b05d347a47..a21c2f42e9 100644 --- a/doc/links.php +++ b/doc/links.php @@ -30,11 +30,12 @@ list_item("French",
boinc-quebec.org (Canadian)" ); list_item("German", - "www.boinc.de" -); -list_item("Italian", - "boinc.homeunix.org" + "www.boinc.de +
www.boinc-forum.de" ); +//list_item("Italian", +// "boinc.homeunix.org" +//); list_item("Japanese", "translation by Komori Hitoshi" ); diff --git a/doc/proxy_server.php b/doc/proxy_server.php index 7862cf2103..ba8471ae51 100644 --- a/doc/proxy_server.php +++ b/doc/proxy_server.php @@ -2,16 +2,32 @@ require_once("docutil.php"); page_head("Proxy servers"); echo " -The original SETI@home benefited from the development -of 'Proxy servers' such as SETIQueue. +SETI@home Classic benefited from 'proxy servers' such as SETIQueue. These proxy servers buffer work units and results between participant computers and the main SETI@home server. They provide a smooth supply of work even when the main server is down, -and they make it possible to run SETI@home on computers +and they make it possible to run SETI@home Classic on computers not connected directly to the Internet.

-Things are trickier in BOINC. -Unlike SETI@home, with its 'one size fits all' work units, +These programs won't work with BOINC (see below), +but some of their functions can be performed by other means: +

+ +

Why won't SETIQueue work with BOINC?

+

+Unlike SETI@home Classic, with its 'one size fits all' work units, BOINC allows work units that have extreme requirements (memory, disk, CPU) and makes sure they're sent only to hosts that can handle them. @@ -19,7 +35,7 @@ In BOINC, a client communicates directly with the server, telling the server about its hardware (memory size, CPU speed etc.) and the server chooses work for it accordingly. Furthermore, BOINC has separate scheduling and data servers -(in SETI@home, a single server played both roles). +(in SETI@home Classic, a single server played both roles).

So a BOINC proxy would have to replicate much of the functionality of the BOINC core client @@ -27,10 +43,6 @@ of the functionality of the BOINC core client and the BOINC scheduling server (since it would have to implement the work-distribution policy). This is possible but it would be a lot of work. -

-BOINC has mechanisms - such as work buffering -and the ability to participate in multiple projects - -that reduce the importance of proxy servers. "; page_tail(); ?> diff --git a/sched/transitioner.C b/sched/transitioner.C index eacd8e6bc7..032df2127d 100644 --- a/sched/transitioner.C +++ b/sched/transitioner.C @@ -26,6 +26,7 @@ // [ -asynch ] be asynchronous // [ -one_pass ] do one pass, then exit // [ -d x ] debug level x +// [ -mod n i ] process only WUs with (id mod n) == i using namespace std; @@ -44,9 +45,13 @@ using namespace std; #define LOCKFILE "transitioner.out" #define PIDFILE "transitioner.pid" +#define SELECT_LIMIT 100 + int startup_time; SCHED_CONFIG config; R_RSA_PRIVATE_KEY key; +int mod_n, mod_i; +bool do_mod = false; void handle_wu(DB_WORKUNIT& wu) { vector results; @@ -355,7 +360,11 @@ bool do_pass() { check_stop_daemons(); // loop over WUs that are due to be checked // - sprintf(buf, "where transition_time<%d order by transition_time limit 5000", (int)time(0)); + if (do_mod) { + sprintf(buf, "where transition_time<%d and (mod(id, %d)==%d) order by transition_time limit %d", (int)time(0), mod_n, mod_i, SELECT_LIMIT); + } else { + sprintf(buf, "where transition_time<%d order by transition_time limit %d", (int)time(0), SELECT_LIMIT); + } while (!wu.enumerate(buf)) { did_something = true; handle_wu(wu); @@ -396,6 +405,10 @@ int main(int argc, char** argv) { one_pass = true; } else if (!strcmp(argv[i], "-d")) { log_messages.set_debug_level(atoi(argv[++i])); + } else if (!strcmp(argv[i], "-mod")) { + mod_n = atoi(argv[++i]); + mod_i = atoi(argv[++i]); + do_mod = true; } }