From 289db963aba21839779e4251ecf607b543a656dc Mon Sep 17 00:00:00 2001
From: David Anderson <davea@ssl.berkeley.edu>
Date: Thu, 9 Jun 2005 21:37:34 +0000
Subject: [PATCH] *** empty log message ***

svn path=/trunk/boinc/; revision=6325
---
 checkin_notes         |  15 +++
 client/client_state.C |   2 +-
 client/client_state.h |   4 +-
 client/client_types.C |   2 +-
 client/client_types.h |   6 +-
 client/cs_apps.C      |  44 +++++---
 client/cs_cmdline.C   |   2 +-
 doc/sched.php         | 254 ++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 304 insertions(+), 25 deletions(-)
 create mode 100644 doc/sched.php

diff --git a/checkin_notes b/checkin_notes
index 8d7550bfd4..bb0a4d6d17 100755
--- a/checkin_notes
+++ b/checkin_notes
@@ -7596,3 +7596,18 @@ David  8 June 2005
         cs_apps.C
         cs_benchmark.C
         net_stats.C
+
+David  9 June 2005
+    - Changed variable names from "work_done" to "wall_cpu_time";
+        it's important to emphasize that debt accounting
+        is done on the basis of how long a process is running,
+        NOT how much CPU time it actually or reportedly got.
+    - don't count non-CPU-intensive apps in calculation of
+        total_wall_cpu_time_this_period
+    - fixed crash if you run core client with -attach_project X
+
+    client/
+        client_state.C,h
+        client_types.C,h
+        cs_apps.C
+        cs_cmdline.C
diff --git a/client/client_state.C b/client/client_state.C
index 33ae6eeb05..20e2c4e1a5 100644
--- a/client/client_state.C
+++ b/client/client_state.C
@@ -115,7 +115,7 @@ CLIENT_STATE::CLIENT_STATE() {
     cpu_earliest_deadline_first = false;
 
     cpu_sched_last_time = 0;
-    cpu_sched_work_done_this_period = 0;
+    total_wall_cpu_time_this_period = 0;
     must_schedule_cpus = true;
 }
 
diff --git a/client/client_state.h b/client/client_state.h
index 02367a7187..1bb185040f 100644
--- a/client/client_state.h
+++ b/client/client_state.h
@@ -156,8 +156,8 @@ private:
     // CPU sched state
     //
     double cpu_sched_last_time;
-    double cpu_sched_work_done_this_period;
-        // CPU time since last schedule_cpus()
+    double total_wall_cpu_time_this_period;
+        // "wall CPU time" accumulated since last schedule_cpus()
 	bool work_fetch_no_new_work;
 	bool cpu_earliest_deadline_first;
 
diff --git a/client/client_types.C b/client/client_types.C
index 168c19207d..531a4857fa 100644
--- a/client/client_types.C
+++ b/client/client_types.C
@@ -93,7 +93,7 @@ void PROJECT::init() {
     strcpy(code_sign_key, "");
     user_files.clear();
     anticipated_debt = 0;
-    work_done_this_period = 0;
+    wall_cpu_time_this_period = 0;
     next_runnable_result = NULL;
     work_request = 0;
     work_request_urgency = WORK_FETCH_DONT_NEED;
diff --git a/client/client_types.h b/client/client_types.h
index b171f67321..c091053f8f 100644
--- a/client/client_types.h
+++ b/client/client_types.h
@@ -248,9 +248,9 @@ public:
 
     double anticipated_debt;
         // expected debt by the end of the preemption period
-    double work_done_this_period;
-        // how much CPU time has been devoted to this
-        // project in the current period (secs)
+    double wall_cpu_time_this_period;
+        // how much "wall CPU time" has been devoted to this
+        // project in the current scheduling period (secs)
     struct RESULT *next_runnable_result;
         // the next result to run for this project
 
diff --git a/client/cs_apps.C b/client/cs_apps.C
index 0b56bdea9d..7f663a9fe4 100644
--- a/client/cs_apps.C
+++ b/client/cs_apps.C
@@ -83,7 +83,6 @@ int CLIENT_STATE::app_finished(ACTIVE_TASK& at) {
     char path[256];
     int retval;
     double size;
-    double task_cpu_time;
 
     bool had_error = false;
 
@@ -157,9 +156,9 @@ int CLIENT_STATE::app_finished(ACTIVE_TASK& at) {
         );
     }
 
-    task_cpu_time = at.current_cpu_time - at.cpu_time_at_last_sched;
-    at.result->project->work_done_this_period += task_cpu_time;
-    cpu_sched_work_done_this_period += task_cpu_time;
+    double wall_cpu_time = now - cpu_sched_last_time;
+    at.result->project->wall_cpu_time_this_period += wall_cpu_time;
+    total_wall_cpu_time_this_period += wall_cpu_time;
 
     return 0;
 }
@@ -394,18 +393,29 @@ void CLIENT_STATE::adjust_debts() {
     int count_cpu_intensive = 0;
     PROJECT *p;
     double min_short_term_debt=0, share_frac;
-    double elapsed_time = gstate.now - cpu_sched_last_time;
+    double wall_cpu_time = gstate.now - cpu_sched_last_time;
 
     SCOPE_MSG_LOG scope_messages(log_messages, CLIENT_MSG_LOG::DEBUG_TASK);
 
-    // total up work done since last CPU reschedule
+    // Total up total and per-project "wall CPU" since last CPU reschedule.
+    // "Wall CPU" is the wall time during which a task was
+    // runnable (at the OS level).
+    //
+    // We use wall CPU for debt calculation
+    // (instead of reported actual CPU) for two reasons:
+    // 1) the process might have paged a lot, so the actual CPU
+    //    may be a lot less than wall CPU
+    // 2) BOINC relies on apps to report their CPU time.
+    //    Sometimes there are bugs and apps report zero CPU.
+    //    It's safer not to trust them.
     //
     for (i=0; i<active_tasks.active_tasks.size(); i++) {
         ACTIVE_TASK* atp = active_tasks.active_tasks[i];
         if (atp->scheduler_state != CPU_SCHED_SCHEDULED) continue;
-        double task_cpu_time = elapsed_time;
-        atp->result->project->work_done_this_period += task_cpu_time;
-        cpu_sched_work_done_this_period += task_cpu_time;
+        if (atp->result->project->non_cpu_intensive) continue;
+
+        atp->result->project->wall_cpu_time_this_period += wall_cpu_time;
+        total_wall_cpu_time_this_period += wall_cpu_time;
     }
 
     // find total resource shares of runnable and potentially runnable projects
@@ -429,8 +439,8 @@ void CLIENT_STATE::adjust_debts() {
         //
         if (p->potentially_runnable()) {
             share_frac = p->resource_share/potentially_runnable_resource_share;
-            p->long_term_debt += share_frac*cpu_sched_work_done_this_period
-                - p->work_done_this_period
+            p->long_term_debt += share_frac*total_wall_cpu_time_this_period
+                - p->wall_cpu_time_this_period
             ;
         }
         total_long_term_debt += p->long_term_debt;
@@ -442,8 +452,8 @@ void CLIENT_STATE::adjust_debts() {
             p->anticipated_debt = 0;
         } else {
             share_frac = p->resource_share/runnable_resource_share;
-            p->short_term_debt += share_frac*cpu_sched_work_done_this_period
-                - p->work_done_this_period
+            p->short_term_debt += share_frac*total_wall_cpu_time_this_period
+                - p->wall_cpu_time_this_period
             ;
             if (first) {
                 first = false;
@@ -536,7 +546,7 @@ bool CLIENT_STATE::schedule_cpus() {
         atp = active_tasks.active_tasks[i];
         atp->next_scheduler_state = CPU_SCHED_PREEMPTED;
     }
-    expected_pay_off = cpu_sched_work_done_this_period / ncpus;
+    expected_pay_off = total_wall_cpu_time_this_period / ncpus;
     for (j=0; j<ncpus; j++) {
         if (cpu_earliest_deadline_first) {
             if (!schedule_earliest_deadline_result(expected_pay_off)) break;
@@ -590,13 +600,13 @@ bool CLIENT_STATE::schedule_cpus() {
 
     // reset work accounting
     // doing this at the end of schedule_cpus() because
-    // work_done_this_period's can change as apps finish
+    // wall_cpu_time_this_period's can change as apps finish
     //
     for (i=0; i<projects.size(); i++) {
         p = projects[i];
-        p->work_done_this_period = 0;
+        p->wall_cpu_time_this_period = 0;
     }
-    cpu_sched_work_done_this_period = 0;
+    total_wall_cpu_time_this_period = 0;
 
     set_client_state_dirty("schedule_cpus");
     return true;
diff --git a/client/cs_cmdline.C b/client/cs_cmdline.C
index f026a01a19..57e06d93bf 100644
--- a/client/cs_cmdline.C
+++ b/client/cs_cmdline.C
@@ -146,7 +146,7 @@ void CLIENT_STATE::parse_cmdline(int argc, char** argv) {
         } else if (ARG(run_cpu_benchmarks)) {
             run_cpu_benchmarks = true;
         } else if (ARG(attach_project)) {
-            if (i > argc-2) {
+            if (i >= argc-2) {
                 show_options = true;
             } else {
                 strcpy(attach_project_url, argv[++i]);
diff --git a/doc/sched.php b/doc/sched.php
new file mode 100644
index 0000000000..e30ecaf4f6
--- /dev/null
+++ b/doc/sched.php
@@ -0,0 +1,254 @@
+<?php
+require_once("docutil.php");
+page_head("Client scheduling");
+echo "
+
+This document describes two related parts of the BOINC core client
+(version 4.36 and later):
+<p>
+<b>CPU scheduling policy</b>:
+Of the set of results that are runnable (see below),
+which ones to execute?
+(On a machine with N CPUs, BOINC will try to execute N results at once).
+
+<p>
+<b>Work-fetch policy</b>:
+When should the core client ask a project for more work,
+which project should it ask,
+and how much work should it ask for?
+
+<p>
+The goals of the CPU scheduler and work-fetch policies are
+(in descending priority):
+<ul>
+<li> Results should be completed and reported by their deadline
+(results reported after their deadline
+may not have any value to the project and may not be granted credit).
+<li> Project resource shares should be honored over the long term;
+<li> If a computer is attached to multiple projects,
+    execution should rotate among projects on a frequent basis.
+</ul>
+The policies are designed to accommodate all scenarios,
+including those with computers that are slow or are attached
+to a large number of projects.
+
+<p>
+In previous versions of BOINC,
+the core client attempted to maintain at least one result
+for each attached project,
+and would do weighted round-robin CPU scheduling among all projects.
+In some scenarios (any combination of slow computer,
+lots of projects, and tight deadlines) a computer could
+miss the deadlines of all its results.
+The new policies solve this problem as follows:
+<ul>
+<li>
+Work fetch is limited to ensure that deadlines can be met.
+A computer attached to 10 projects might
+have work for only a few (perhaps only one) at a given time.
+<li>
+If deadlines are threatened,
+the CPU scheduling policy switches to a mode
+(earliest deadline first) that optimizes the likelihood
+of meeting deadlines, at the expense of variety.
+</ul>
+
+
+
+<h2>Concepts and terms</h2>
+
+<h3>Wall CPU time</h3>
+A result's <b>wall CPU time</b> is the amount of wall-clock time
+its process has been runnable at the OS level.
+The actual CPU time may be much less than this,
+e.g. if the process does a lot of paging,
+or if other (non-BOINC) processing jobs run at the same time.
+<p>
+BOINC uses wall CPU time as the measure of how much resource
+has been given to each project.
+Why not use actual CPU time instead?
+<ul>
+<li> Wall CPU time is more fair in the case of paging apps.
+<li> The measurement of actual CPU time depends on apps to
+report it correctly.
+Sometimes apps have bugs that cause them to always report zero.
+This screws up the scheduler.
+</ul>
+
+
+
+<h3>Result states</h3>
+A result is <b>runnable</b> if
+<ul>
+<li> Neither it nor its project is suspended, and
+<li> its files have been downloaded, and
+<li> it hasn't finished computing
+</ul>
+A result is <b>runnable soon</b> if
+<ul>
+<li> Neither it nor its project is suspended, and
+<li> it hasn't finished computing
+</ul>
+
+
+<h3>Project states</h3>
+A project is <b>runnable</b> if
+<ul>
+<li> It's not suspended, and
+<li> it has at least one runnable result
+</ul>
+
+A project is <b>downloading</b> if
+<ul>
+<li> It's not suspended, and
+<li> it has at least one result whose files are being downloaded
+</ul>
+
+A project is <b>contactable</b> if
+<ul>
+<li> It's not suspended, and
+<li> its master file has already been fetched, and
+<li> it's not deferred (i.e. its minimum RPC time is in the past), and
+<li> it's no-new-work flag is not set
+</ul>
+
+A project is <b>potentially runnable</b> if
+<ul>
+<li> It's either runnable, downloading, or contactable.
+</ul>
+
+<h3>Debt</h3>
+Intuitively, a project's 'debt' is how much work is owed to it,
+relative to other projects.
+BOINC uses two types of debt;
+each is defined related to a set S of projects.
+In each case, the debt is recalculated periodically as follows:
+<ul>
+<li> A = the wall CPU time used by projects in S during this period
+<li> R = sum of resource shares of projects in S
+<li> For each project P in S:
+   <ul>
+   <li> F = P.resource_share / R (i.e., P's fractional resource share)
+   <li> W = A*F (i.e., how much wall CPU time P should have gotten)
+   <li> P.debt += W - P.wall_cpu_time (i.e. what P should have gotten
+           minus what it got).
+    </ul>
+<li> P.debt is normalized (e.g. so that the mean or minimum is zero).
+</ul>
+
+
+<b>Short-term debt</b> is used by the CPU scheduler.
+It is adjusted over the set of runnable projects.
+It is normalized so that minimum short-term debt is zero,
+and maximum short-term debt is no greater than 86400 (i.e. one day).
+
+<p>
+<b>Long-term debt</b> is used by the work-fetch policy.
+It is adjusted over the set of potentially runnable projects.
+It is normalized so that average long-term debt is zero.
+
+
+<h2>The CPU scheduling policy</h2>
+<p>
+The CPU scheduler has two modes, <b>normal</b> and <b>panic</b>.
+In normal mode, the CPU scheduler runs the project(s)
+with the greatest short-term debt.
+Specifically:
+<ol>
+<li> Set the 'anticipated debt' of each project to its short-term debt
+<li> Find the project P with the greatest anticipated debt,
+    select one of P's runnable results
+    (picking one that is already running, if possible)
+    and schedule that result.
+<li> Decrement P's anticipated debt by the 'expected payoff'
+    (the total wall CPU in the last period divided by #CPUs).
+<li> Repeat steps 2 and 3 for additional CPUs
+</ol>
+Over the long term, this results in a round-robin policy,
+weighted by resource shares.
+
+<p>
+In panic mode, the CPU scheduler
+schedules the runnable results with the earliest deadlines.
+This allows the client to meet deadlines that would otherwise be missed.
+
+
+<p>
+The CPU scheduler runs when a result is completed,
+when the end of the user-specified scheduling period is reached,
+when new results become runnable,
+or when the user performs a UI interaction
+(e.g. suspending or resuming a project or result).
+
+
+<h2>The work-fetch policy</h2>
+
+<p>
+X is the estimated wall time by which the number of
+runnable results will fall below #CPUs.
+<p>
+min_queue is the user's network-connection period general preference.
+<p>
+work_fetch_OK is a flag set by the mode selection algorithm (see below).
+<p>
+The work-fetch policy maintains an 'overall urgency':
+<ul>
+<li>
+<b>NEED_IMMEDIATELY</b>:
+    there is at least one idle CPU
+<li>
+<b>NEED</b>:
+    X &lt; than min_queue 
+<li>
+<b>OK</b>:
+    X > min_queue, work_fetch_OK is true
+<li>
+<b>DONT_NEED</b>:
+    work_fetch_OK is false
+</ul>
+
+<p>
+In addition, the work-fetch policy maintains a per-project work-fetch mode:
+<p>
+R(P) = fractional resource share of P
+<p>
+X(P) = estimated wall time when number of runnable results for P
+will fall below #CPUs*R(P)
+<ul>
+<li>
+<b>NEED_IMMEDIATELY</b>:
+    no results of P are runnable soon.
+<li>
+<b>NEED</b>:
+    X(P) < min_queue * R(P)
+<li>
+<b>OK</b>:
+    X(P) > min_queue * R(P),
+    and P is not suspended or deferred or no-new-work
+<li>
+<b>DONT_NEED</b>:
+    P is suspended or deferred or no-new-work
+</ul>
+
+<p>
+
+<h2>Mode selection</h2>
+<p>
+Sort the work units by deadline, earliest first.
+If at any point in this list, the sum of the remaining 
+processing time is greater than 0.8 * up_frac * time to deadline,
+the CPU queue is overloaded.
+This triggers both no work requests and the CPU scheduler
+into earliest deadline first.
+
+<p>
+Sum the fraction that the remaining processing time is of the time
+to deadline for each work unit.
+If this is greater than 0.8 * up_frac, the CPU queue is fully loaded.
+This triggers no work fetch.
+
+
+
+";
+page_tail();
+?>