diff --git a/checkin_notes b/checkin_notes index 27310f7afa..6822ec4463 100755 --- a/checkin_notes +++ b/checkin_notes @@ -1708,3 +1708,12 @@ Charlie 8 Feb 2006 mac_build/ boinc.xcodeproj/ project.pbxproj + +David 10 Feb 2005 + - cosmetic + + client/ + client_msgs.h + client_types.h + cpu_sched.C + cs_scheduler.C diff --git a/client/client_msgs.h b/client/client_msgs.h index 11e98a8d62..d761e7901f 100644 --- a/client/client_msgs.h +++ b/client/client_msgs.h @@ -91,6 +91,7 @@ public: DEBUG_POLL, // show what polls are responding DEBUG_GUIRPC, DEBUG_SCHED_CPU, + DEBUG_WORK_FETCH, DEBUG_SCRSAVE }; CLIENT_MSG_LOG(): MSG_LOG(stdout) {} diff --git a/client/client_types.h b/client/client_types.h index 502830fa18..25641e2b90 100644 --- a/client/client_types.h +++ b/client/client_types.h @@ -254,6 +254,7 @@ public: std::vectoractive; std::vectorpending; double rrsim_proc_rate; + // fraction of each CPU this project will get in RR simulation void set_rrsim_proc_rate(double per_cpu_proc_rate, double rrs); // "debt" is how much CPU time we owe this project relative to others @@ -284,7 +285,7 @@ public: // X seconds of wall-clock time to complete, // taking into account // 1) other projects and resource share; - // 2) on_frac and active_frac + // 2) on_frac, active_frac, and cpu_effiency // see doc/work_req.php int work_request_urgency; diff --git a/client/cpu_sched.C b/client/cpu_sched.C index 1eb0903b09..8234dfa795 100644 --- a/client/cpu_sched.C +++ b/client/cpu_sched.C @@ -422,34 +422,6 @@ void CLIENT_STATE::enforce_schedule() { } } -// set the project's rrsim_proc_rate: -// the fraction of each CPU that it will get in round-robin mode -// -void PROJECT::set_rrsim_proc_rate(double per_cpu_proc_rate, double rrs) { - int nactive = (int)active.size(); - if (nactive == 0) return; - double x; - if (rrs) { - x = resource_share/rrs; - } else { - x = 1; // TODO - fix - } - - // if this project has fewer active results than CPUs, - // scale up its share to reflect this - // - if (nactive < gstate.ncpus) { - x *= ((double)gstate.ncpus)/nactive; - } - - // But its rate on a given CPU can't exceed the CPU speed - // - if (x>1) { - x = 1; - } - rrsim_proc_rate = x*per_cpu_proc_rate*CPU_PESSIMISM_FACTOR; -} - // return true if we don't have enough runnable tasks to keep all CPUs busy // bool CLIENT_STATE::no_work_for_a_cpu() { @@ -465,7 +437,39 @@ bool CLIENT_STATE::no_work_for_a_cpu() { return ncpus > count; } -// return true if round-robin scheduling will miss a deadline +// Set the project's rrsim_proc_rate: +// the fraction of each CPU that it will get in round-robin mode. +// Precondition: the project's "active" array is populated +// +void PROJECT::set_rrsim_proc_rate(double per_cpu_proc_rate, double rrs) { + int nactive = (int)active.size(); + if (nactive == 0) return; + double x; + + if (rrs) { + x = resource_share/rrs; + } else { + x = 1; // pathological case; maybe should be 1/# runnable projects + } + + // if this project has fewer active results than CPUs, + // scale up its share to reflect this + // + if (nactive < gstate.ncpus) { + x *= ((double)gstate.ncpus)/nactive; + } + + // But its rate on a given CPU can't exceed 1 + // + if (x>1) { + x = 1; + } + rrsim_proc_rate = x*per_cpu_proc_rate*CPU_PESSIMISM_FACTOR; +} + +// return true if round-robin scheduling will miss a deadline. +// per_cpu_proc_rate is the expected number of CPU seconds per wall second +// on each CPU; rrs is the resource share of runnable projects // bool CLIENT_STATE::rr_misses_deadline(double per_cpu_proc_rate, double rrs) { PROJECT* p, *pbest; @@ -486,7 +490,6 @@ bool CLIENT_STATE::rr_misses_deadline(double per_cpu_proc_rate, double rrs) { p->pending.clear(); } - for (i=0; iaborted_via_gui) continue; @@ -596,8 +599,10 @@ bool CLIENT_STATE::rr_misses_deadline(double per_cpu_proc_rate, double rrs) { // void CLIENT_STATE::set_scheduler_mode() { bool use_earliest_deadline_first = false; - double total_proc_rate = avg_proc_rate(); - double per_cpu_proc_rate = total_proc_rate/ncpus; + double per_cpu_proc_rate = avg_proc_rate()/ncpus; + // how many CPU seconds per wall second we get on each CPU, + // taking into account on_frac, active_frac, and cpu_efficiency + double rrs = runnable_resource_share(); if (rr_misses_deadline(per_cpu_proc_rate, rrs)) { diff --git a/client/cs_scheduler.C b/client/cs_scheduler.C index 201e6aae5e..84d1fc62de 100644 --- a/client/cs_scheduler.C +++ b/client/cs_scheduler.C @@ -147,7 +147,7 @@ PROJECT* CLIENT_STATE::next_project_trickle_up_pending() { // Return the best project to fetch work from, NULL if none // -// Basically, pick the one with largest long term debt - amount of current work +// Pick the one with largest (long term debt - amount of current work) // // PRECONDITIONS: // - work_request_urgency and work_request set for all projects @@ -160,7 +160,7 @@ PROJECT* CLIENT_STATE::next_project_need_work() { unsigned int i; double prrs = potentially_runnable_resource_share(); - for (i=0; iwork_request_urgency == WORK_FETCH_DONT_NEED) continue; if (p->work_request == 0) continue; @@ -502,11 +502,11 @@ double CLIENT_STATE::time_until_work_done( } } -// Compute: -// - work_request and work_request_urgency for all projects. +// Top-level function for work fetch policy. +// Outputs: // - overall_work_fetch_urgency -// -// Only set non-zero work requests for projects that are contactable +// - for each contactable project: +// - work_request and work_request_urgency // int CLIENT_STATE::compute_work_requests() { unsigned int i; @@ -517,7 +517,7 @@ int CLIENT_STATE::compute_work_requests() { SCOPE_MSG_LOG scope_messages(log_messages, CLIENT_MSG_LOG::DEBUG_SCHED_CPU); overall_work_fetch_urgency = WORK_FETCH_DONT_NEED; - for (i = 0; i < projects.size(); ++i) { + for (i=0; i< projects.size(); i++) { projects[i]->work_request_urgency = WORK_FETCH_DONT_NEED; projects[i]->work_request = 0; } @@ -556,7 +556,7 @@ int CLIENT_STATE::compute_work_requests() { // Then estimate how long it's going to be until we have fewer // than this # of results remaining. // - for (i=0; iwork_request = 0; @@ -1034,14 +1034,17 @@ int CLIENT_STATE::handle_scheduler_reply( } bool CLIENT_STATE::should_get_work() { - // if there are fewer runnable results then CPUS, we need more work. + // if there are fewer runnable results than CPUS, we need more work. // if (no_work_for_a_cpu()) return true; double tot_cpu_time_remaining = 0; - for (unsigned int i = 0; i < results.size();++i) { + for (unsigned int i=0; iestimated_cpu_time_remaining(); } + + // ????? shouldn't we scale by ncpus? by avg_proc_rate()?? + // if (tot_cpu_time_remaining < global_prefs.work_buf_min_days*SECONDS_PER_DAY) { return true; } @@ -1096,7 +1099,7 @@ void CLIENT_STATE::set_work_fetch_mode() { double CLIENT_STATE::work_needed_secs() { double total_work = 0; - for( unsigned int i = 0; i < results.size(); ++i) { + for(unsigned int i=0; iproject->non_cpu_intensive) continue; total_work += results[i]->estimated_cpu_time_remaining(); } diff --git a/doc/boinc_news.inc b/doc/boinc_news.inc index 2b14865096..78a18a7eea 100644 --- a/doc/boinc_news.inc +++ b/doc/boinc_news.inc @@ -1,6 +1,14 @@ SZTAKI Desktop Grid + is now in production. + The project, based at the MTA-SZTAKI Laboratory of Parallel + and Distributed Systems in Budapest, Hungary, is attempting + to find all the generalized binary number systems + (bases are matrices and digits are vectors) up to dimension 11." +), array("February 6, 2006", "The Similarity Matrix of Proteins (SIMAP) project is now in production. diff --git a/doc/index.php b/doc/index.php index a14da0ed7b..5ef476450c 100644 --- a/doc/index.php +++ b/doc/index.php @@ -37,29 +37,26 @@ resources. BOINC lets you donate computing power to scientific research projects such as: diff --git a/doc/sched.php b/doc/sched.php index ecc28f2470..01787f0d27 100644 --- a/doc/sched.php +++ b/doc/sched.php @@ -186,76 +186,127 @@ when the end of the user-specified scheduling period is reached, when new results become runnable, or when the user performs a UI interaction (e.g. suspending or resuming a project or result). - - -

The work-fetch policy

- -

-X is the estimated wall time by which the number of -runnable results will fall below NCPUS. -

-min_queue is the user's network-connection period general preference. -

-work_fetch_OK is a flag set by the mode selection algorithm (see below). -

-The work-fetch policy maintains an 'overall urgency': +It does the following:

    -
  • -NEED_IMMEDIATELY: - there is at least one idle CPU -
  • -NEED: - X < min_queue -
  • -OK: - X > min_queue, work_fetch_OK is true -
  • -DONT_NEED: - work_fetch_OK is false -
- -

-In addition, the work-fetch policy maintains a per-project work-fetch mode: -

-R(P) = fractional resource share of P -

-X(P) = estimated wall time when number of runnable results for P -will fall below NCPUS*R(P) -

    -
  • -NEED_IMMEDIATELY: - no results of P are runnable soon. -
  • -NEED: - X(P) < min_queue * R(P) -
  • -OK: - X(P) > min_queue * R(P), - and P is not suspended or deferred or no-new-work -
  • -DONT_NEED: - P is suspended or deferred or no-new-work -
- -

- -

Mode selection

-

-Work_fetch_OK is set to false if either -

    -
  • The sum of all RTFs is > 0.8 -
  • The CRTF of any result is > 0.8 -
- -EDF mode is used if either -
    -
  • The CRTF of any result is > 0.8 -
  • The deadline of any result is earlier than one day from now -
  • The deadline of any result is less than -2 * min_queue from now. +
  • Do a simulation of round-robin scheduling +applied to the current work queue. +
  • If all results meet their deadlines, + use round-robin; otherwise, use EDF.
+

Work-fetch policy

+ +

+The work-fetch policy is defined in terms of a basic function +

+time_until_work_done(project, N, subset_resource_share)
+    // estimate wall time until the number of uncompleted results
+    // for this project will reach N,
+    // given the total resource share for a set of competing projects
+
+

+The work-fetch policy function is called every 5 seconds +(or as needed) by the scheduler RPC polling function. + +It sets the following variables: +

    +
  • global urgency: one of +
      +
    • DONT_NEED: CPU scheduler is in EDF mode, + or fetching additional work would make it so. +
    • OK: we have enough work, but it's OK to get more +
    • NEED: a CPU will be idle within min_queue +
    • NEED_IMMEDIATELY: a CPU is idle. +
    +
  • For each project P +
    + N = ncpus*(relative resource share) +
    + prrs = potentially runnable resource share +
    + X = time_until_work_done(P, N-1, prrs) +
      +
    • project urgency +
        +
      • DONT_NEED: P is suspended or deferred or no-new-work +
      • OK: X > min_queue +
      • NEED: X > 0 +
      • NEED_IMMEDIATELY: X == 0 +
      +
    • work request size + (the number of seconds of work to request, + if we do a scheduler RPC to this project). +
    +
+ +

+The scheduler RPC mechanism may select a project to contact +because of a user request, an outstanding trickle-up message, +or a result that is overdue for reporting. +If it does so, it will also request work from that project. + +

+Otherwise, the RPC mechanism calls the following function and +gets work from that project, if any. +

+next_project_need_work()
+    if global_urgency == DONT_NEED return null
+    Pbest = null;
+    for each project P
+        if P.urgency != DONT_NEED and P.work_request_size > 0
+        if P.urgency == OK and global_urgency == OK
+            continue
+        P.score = P.long_term_debt - time_until_work_done(P, 0, prrs)
+        if Pbest
+            if P.score > Pbest.score
+                Pbest = P
+        else
+            Pbest = p
+    return Pbest
+
+ +

+The work-fetch policy function is as follows: +

+// compute global urgency
+
+x = delay until number of runnable results will be < ncpus
+if x == 0
+    global_urgency = NEED_IMMEDIATELY
+else
+    if CPU scheduling mode is EDF
+        global_urgency = DONT_NEED
+    else
+        P = project with greatest long-term debt
+        suppose we got work from P
+        if round-robin would then miss a deadline
+            global_urgency = DONT_NEED
+        else
+            if x < min_queue
+                global_urgency = NEED
+            else
+                global_urgency = OK
+    
+// compute per-project urgencies and work request sizes
+
+if global_urgency != DONT_NEED
+    for each project P
+        N = ncpus/(fractional potentially runnable resource_share)
+            (i.e. number of results we need on hand to
+            keep enough CPUs busy to maintain resource share)
+        x = time until # of runnable results for P will fall below N
+        if x == 0
+            P.urgency = NEED_IMMEDIATELY
+        else if x < min_queue
+            P.urgency = NEED
+            P.work_request_size = min_queue - x
+        else if global_urgency > OK
+            P.urgency = OK
+            P.work_request_size = 1
+        else
+            P.urgency = DONT_NEED
+
"; page_tail(); diff --git a/doc/server_status.php b/doc/server_status.php new file mode 100644 index 0000000000..032766a84a --- /dev/null +++ b/doc/server_status.php @@ -0,0 +1,53 @@ +URL/server_status.php. +These are generally updated every 10 minutes or so - +do not poll more often than that. +These can be used to make web sites showing +the server status of multiple BOINC projects. +

+"; +echo html_text(" + + 1128535206 + + + jocelyn + BOINC database + running + + + castelli + master science database + running + + + klaatu + data-driven web pages + disabled + + + galileo + feeder + not running + + + + 614830 + 1208959 + 8 + 2 + 4 + 15 + 0.00083333334 + +"); + +page_tail(); + +?>