*** empty log message ***

svn path=/trunk/boinc/; revision=9445
This commit is contained in:
David Anderson 2006-02-11 03:00:37 +00:00
parent 6bc732f191
commit f433b73f6b
9 changed files with 257 additions and 129 deletions

View File

@ -1708,3 +1708,12 @@ Charlie 8 Feb 2006
mac_build/
boinc.xcodeproj/
project.pbxproj
David 10 Feb 2005
- cosmetic
client/
client_msgs.h
client_types.h
cpu_sched.C
cs_scheduler.C

View File

@ -91,6 +91,7 @@ public:
DEBUG_POLL, // show what polls are responding
DEBUG_GUIRPC,
DEBUG_SCHED_CPU,
DEBUG_WORK_FETCH,
DEBUG_SCRSAVE
};
CLIENT_MSG_LOG(): MSG_LOG(stdout) {}

View File

@ -254,6 +254,7 @@ public:
std::vector<RESULT*>active;
std::vector<RESULT*>pending;
double rrsim_proc_rate;
// fraction of each CPU this project will get in RR simulation
void set_rrsim_proc_rate(double per_cpu_proc_rate, double rrs);
// "debt" is how much CPU time we owe this project relative to others
@ -284,7 +285,7 @@ public:
// X seconds of wall-clock time to complete,
// taking into account
// 1) other projects and resource share;
// 2) on_frac and active_frac
// 2) on_frac, active_frac, and cpu_effiency
// see doc/work_req.php
int work_request_urgency;

View File

@ -422,34 +422,6 @@ void CLIENT_STATE::enforce_schedule() {
}
}
// set the project's rrsim_proc_rate:
// the fraction of each CPU that it will get in round-robin mode
//
void PROJECT::set_rrsim_proc_rate(double per_cpu_proc_rate, double rrs) {
int nactive = (int)active.size();
if (nactive == 0) return;
double x;
if (rrs) {
x = resource_share/rrs;
} else {
x = 1; // TODO - fix
}
// if this project has fewer active results than CPUs,
// scale up its share to reflect this
//
if (nactive < gstate.ncpus) {
x *= ((double)gstate.ncpus)/nactive;
}
// But its rate on a given CPU can't exceed the CPU speed
//
if (x>1) {
x = 1;
}
rrsim_proc_rate = x*per_cpu_proc_rate*CPU_PESSIMISM_FACTOR;
}
// return true if we don't have enough runnable tasks to keep all CPUs busy
//
bool CLIENT_STATE::no_work_for_a_cpu() {
@ -465,7 +437,39 @@ bool CLIENT_STATE::no_work_for_a_cpu() {
return ncpus > count;
}
// return true if round-robin scheduling will miss a deadline
// Set the project's rrsim_proc_rate:
// the fraction of each CPU that it will get in round-robin mode.
// Precondition: the project's "active" array is populated
//
void PROJECT::set_rrsim_proc_rate(double per_cpu_proc_rate, double rrs) {
int nactive = (int)active.size();
if (nactive == 0) return;
double x;
if (rrs) {
x = resource_share/rrs;
} else {
x = 1; // pathological case; maybe should be 1/# runnable projects
}
// if this project has fewer active results than CPUs,
// scale up its share to reflect this
//
if (nactive < gstate.ncpus) {
x *= ((double)gstate.ncpus)/nactive;
}
// But its rate on a given CPU can't exceed 1
//
if (x>1) {
x = 1;
}
rrsim_proc_rate = x*per_cpu_proc_rate*CPU_PESSIMISM_FACTOR;
}
// return true if round-robin scheduling will miss a deadline.
// per_cpu_proc_rate is the expected number of CPU seconds per wall second
// on each CPU; rrs is the resource share of runnable projects
//
bool CLIENT_STATE::rr_misses_deadline(double per_cpu_proc_rate, double rrs) {
PROJECT* p, *pbest;
@ -486,7 +490,6 @@ bool CLIENT_STATE::rr_misses_deadline(double per_cpu_proc_rate, double rrs) {
p->pending.clear();
}
for (i=0; i<results.size(); i++) {
rp = results[i];
if (rp->aborted_via_gui) continue;
@ -596,8 +599,10 @@ bool CLIENT_STATE::rr_misses_deadline(double per_cpu_proc_rate, double rrs) {
//
void CLIENT_STATE::set_scheduler_mode() {
bool use_earliest_deadline_first = false;
double total_proc_rate = avg_proc_rate();
double per_cpu_proc_rate = total_proc_rate/ncpus;
double per_cpu_proc_rate = avg_proc_rate()/ncpus;
// how many CPU seconds per wall second we get on each CPU,
// taking into account on_frac, active_frac, and cpu_efficiency
double rrs = runnable_resource_share();
if (rr_misses_deadline(per_cpu_proc_rate, rrs)) {

View File

@ -147,7 +147,7 @@ PROJECT* CLIENT_STATE::next_project_trickle_up_pending() {
// Return the best project to fetch work from, NULL if none
//
// Basically, pick the one with largest long term debt - amount of current work
// Pick the one with largest (long term debt - amount of current work)
//
// PRECONDITIONS:
// - work_request_urgency and work_request set for all projects
@ -160,7 +160,7 @@ PROJECT* CLIENT_STATE::next_project_need_work() {
unsigned int i;
double prrs = potentially_runnable_resource_share();
for (i=0; i<projects.size(); ++i) {
for (i=0; i<projects.size(); i++) {
p = projects[i];
if (p->work_request_urgency == WORK_FETCH_DONT_NEED) continue;
if (p->work_request == 0) continue;
@ -502,11 +502,11 @@ double CLIENT_STATE::time_until_work_done(
}
}
// Compute:
// - work_request and work_request_urgency for all projects.
// Top-level function for work fetch policy.
// Outputs:
// - overall_work_fetch_urgency
//
// Only set non-zero work requests for projects that are contactable
// - for each contactable project:
// - work_request and work_request_urgency
//
int CLIENT_STATE::compute_work_requests() {
unsigned int i;
@ -517,7 +517,7 @@ int CLIENT_STATE::compute_work_requests() {
SCOPE_MSG_LOG scope_messages(log_messages, CLIENT_MSG_LOG::DEBUG_SCHED_CPU);
overall_work_fetch_urgency = WORK_FETCH_DONT_NEED;
for (i = 0; i < projects.size(); ++i) {
for (i=0; i< projects.size(); i++) {
projects[i]->work_request_urgency = WORK_FETCH_DONT_NEED;
projects[i]->work_request = 0;
}
@ -556,7 +556,7 @@ int CLIENT_STATE::compute_work_requests() {
// Then estimate how long it's going to be until we have fewer
// than this # of results remaining.
//
for (i=0; i<projects.size(); ++i) {
for (i=0; i<projects.size(); i++) {
PROJECT *p = projects[i];
p->work_request = 0;
@ -1034,14 +1034,17 @@ int CLIENT_STATE::handle_scheduler_reply(
}
bool CLIENT_STATE::should_get_work() {
// if there are fewer runnable results then CPUS, we need more work.
// if there are fewer runnable results than CPUS, we need more work.
//
if (no_work_for_a_cpu()) return true;
double tot_cpu_time_remaining = 0;
for (unsigned int i = 0; i < results.size();++i) {
for (unsigned int i=0; i<results.size(); i++) {
tot_cpu_time_remaining += results[i]->estimated_cpu_time_remaining();
}
// ????? shouldn't we scale by ncpus? by avg_proc_rate()??
//
if (tot_cpu_time_remaining < global_prefs.work_buf_min_days*SECONDS_PER_DAY) {
return true;
}
@ -1096,7 +1099,7 @@ void CLIENT_STATE::set_work_fetch_mode() {
double CLIENT_STATE::work_needed_secs() {
double total_work = 0;
for( unsigned int i = 0; i < results.size(); ++i) {
for(unsigned int i=0; i<results.size(); i++) {
if (results[i]->project->non_cpu_intensive) continue;
total_work += results[i]->estimated_cpu_time_remaining();
}

View File

@ -1,6 +1,14 @@
<?
$project_news = array(
array("February 8, 2006",
"The <a href=http://szdg.lpds.sztaki.hu/szdg/>SZTAKI Desktop Grid</a>
is now in production.
The project, based at the MTA-SZTAKI Laboratory of Parallel
and Distributed Systems in Budapest, Hungary, is attempting
to find all the generalized binary number systems
(bases are matrices and digits are vectors) up to dimension 11."
),
array("February 6, 2006",
"The <a href=http://boinc.bio.wzw.tum.de/boincsimap/>Similarity
Matrix of Proteins (SIMAP)</a> project is now in production.

View File

@ -37,29 +37,26 @@ resources.
BOINC lets you donate computing power
to scientific research projects such as:
<ul>
<li>
<a href=http://climateprediction.net>Climateprediction.net</a>:
study climate change
<li>
<a href=http://einstein.phys.uwm.edu/>Einstein@home</a>:
search for gravitational signals emitted by pulsars
<li>
<a href=http://lhcathome.cern.ch/>LHC@home</a>:
improve the design of the CERN LHC particle accelerator
<li>
<a href=http://predictor.scripps.edu>Predictor@home</a>:
investigate protein-related diseases
<li>
<a href=http://boinc.bakerlab.org/rosetta/>Rosetta@home<a>:
help researchers develop cures for human diseases
<li><a href=http://setiathome.berkeley.edu/>SETI@home</a>:
Look for radio evidence of extraterrestrial life
<li><a href=http://boinc.bio.wzw.tum.de/boincsimap/>SIMAP</a>:
<li> <a href=http://climateprediction.net>Climateprediction.net</a>:
study climate change
<li> <a href=http://einstein.phys.uwm.edu/>Einstein@home</a>:
search for gravitational signals emitted by pulsars
<li> <a href=http://lhcathome.cern.ch/>LHC@home</a>:
improve the design of the CERN LHC particle accelerator
<li> <a href=http://predictor.scripps.edu>Predictor@home</a>:
investigate protein-related diseases
<li> <a href=http://boinc.bakerlab.org/rosetta/>Rosetta@home<a>:
help researchers develop cures for human diseases
<li> <a href=http://setiathome.berkeley.edu/>SETI@home</a>:
Look for radio evidence of extraterrestrial life
<li> <a href=http://boinc.bio.wzw.tum.de/boincsimap/>SIMAP</a>:
calculate protein similarity data for use by many biological
research projects.
<li> <a href=http://szdg.lpds.sztaki.hu/szdg/>SZTAKI Desktop Grid</a>:
search for generalized binary number systems.
<li> <a href=http://www.worldcommunitygrid.org/>World Community Grid</a>:
advance our knowledge of human disease. (Requires 5.2.1 or greater)
<li><a href=http://www.cellcomputing.net/>Cell Computing</a>
<li> <a href=http://www.cellcomputing.net/>Cell Computing</a>
biomedical research
(Japanese; requires nonstandard client software)
</ul>

View File

@ -186,76 +186,127 @@ when the end of the user-specified scheduling period is reached,
when new results become runnable,
or when the user performs a UI interaction
(e.g. suspending or resuming a project or result).
<h2>The work-fetch policy</h2>
<p>
X is the estimated wall time by which the number of
runnable results will fall below NCPUS.
<p>
min_queue is the user's network-connection period general preference.
<p>
work_fetch_OK is a flag set by the mode selection algorithm (see below).
<p>
The work-fetch policy maintains an 'overall urgency':
It does the following:
<ul>
<li>
<b>NEED_IMMEDIATELY</b>:
there is at least one idle CPU
<li>
<b>NEED</b>:
X &lt; min_queue
<li>
<b>OK</b>:
X &gt; min_queue, work_fetch_OK is true
<li>
<b>DONT_NEED</b>:
work_fetch_OK is false
</ul>
<p>
In addition, the work-fetch policy maintains a per-project work-fetch mode:
<p>
R(P) = fractional resource share of P
<p>
X(P) = estimated wall time when number of runnable results for P
will fall below NCPUS*R(P)
<ul>
<li>
<b>NEED_IMMEDIATELY</b>:
no results of P are runnable soon.
<li>
<b>NEED</b>:
X(P) &lt; min_queue * R(P)
<li>
<b>OK</b>:
X(P) > min_queue * R(P),
and P is not suspended or deferred or no-new-work
<li>
<b>DONT_NEED</b>:
P is suspended or deferred or no-new-work
</ul>
<p>
<h2>Mode selection</h2>
<p>
Work_fetch_OK is set to false if either
<ul>
<li> The sum of all RTFs is > 0.8
<li> The CRTF of any result is > 0.8
</ul>
EDF mode is used if either
<ul>
<li> The CRTF of any result is > 0.8
<li> The deadline of any result is earlier than one day from now
<li> The deadline of any result is less than
2 * min_queue from now.
<li> Do a simulation of round-robin scheduling
applied to the current work queue.
<li> If all results meet their deadlines,
use round-robin; otherwise, use EDF.
</ul>
<h2>Work-fetch policy</h2>
<p>
The work-fetch policy is defined in terms of a basic function
<pre>
time_until_work_done(project, N, subset_resource_share)
// estimate wall time until the number of uncompleted results
// for this project will reach N,
// given the total resource share for a set of competing projects
</pre>
<p>
The work-fetch policy function is called every 5 seconds
(or as needed) by the scheduler RPC polling function.
</pre>
It sets the following variables:
<ul>
<li> <b>global urgency</b>: one of
<ul>
<li><b>DONT_NEED</b>: CPU scheduler is in EDF mode,
or fetching additional work would make it so.
<li><b>OK</b>: we have enough work, but it's OK to get more
<li><b>NEED</b>: a CPU will be idle within min_queue
<li><b>NEED_IMMEDIATELY</b>: a CPU is idle.
</ul>
<li> For each project P
<br>
N = ncpus*(relative resource share)
<br>
prrs = potentially runnable resource share
<br>
X = time_until_work_done(P, N-1, prrs)
<ul>
<li><b>project urgency</b>
<ul>
<li><b>DONT_NEED</b>: P is suspended or deferred or no-new-work
<li><b>OK</b>: X > min_queue
<li><b>NEED</b>: X > 0
<li><b>NEED_IMMEDIATELY</b>: X == 0
</ul>
<li> <b>work request size</b>
(the number of seconds of work to request,
if we do a scheduler RPC to this project).
</ul>
</ul>
<p>
The scheduler RPC mechanism may select a project to contact
because of a user request, an outstanding trickle-up message,
or a result that is overdue for reporting.
If it does so, it will also request work from that project.
<p>
Otherwise, the RPC mechanism calls the following function and
gets work from that project, if any.
<pre>
next_project_need_work()
if global_urgency == DONT_NEED return null
Pbest = null;
for each project P
if P.urgency != DONT_NEED and P.work_request_size > 0
if P.urgency == OK and global_urgency == OK
continue
P.score = P.long_term_debt - time_until_work_done(P, 0, prrs)
if Pbest
if P.score > Pbest.score
Pbest = P
else
Pbest = p
return Pbest
</pre>
<p>
The work-fetch policy function is as follows:
<pre>
// compute global urgency
x = delay until number of runnable results will be < ncpus
if x == 0
global_urgency = NEED_IMMEDIATELY
else
if CPU scheduling mode is EDF
global_urgency = DONT_NEED
else
P = project with greatest long-term debt
suppose we got work from P
if round-robin would then miss a deadline
global_urgency = DONT_NEED
else
if x &lt; min_queue
global_urgency = NEED
else
global_urgency = OK
// compute per-project urgencies and work request sizes
if global_urgency != DONT_NEED
for each project P
N = ncpus/(fractional potentially runnable resource_share)
(i.e. number of results we need on hand to
keep enough CPUs busy to maintain resource share)
x = time until # of runnable results for P will fall below N
if x == 0
P.urgency = NEED_IMMEDIATELY
else if x < min_queue
P.urgency = NEED
P.work_request_size = min_queue - x
else if global_urgency > OK
P.urgency = OK
P.work_request_size = 1
else
P.urgency = DONT_NEED
</pre>
";
page_tail();

53
doc/server_status.php Normal file
View File

@ -0,0 +1,53 @@
<?php
require_once("docutil.php");
page_head("Server status XML export");
echo "
BOINC-based projects offer the following XML export
at <code>URL/server_status.php</code>.
These are generally updated every 10 minutes or so -
do not poll more often than that.
These can be used to make web sites showing
the server status of multiple BOINC projects.
<p>
";
echo html_text("
<server_status>
<update_time>1128535206</update_time>
<daemon_status>
<daemon>
<host>jocelyn</host>
<command>BOINC database</command>
<status>running</status>
</daemon>
<daemon>
<host>castelli</host>
<command>master science database</command>
<status>running</status>
</daemon>
<daemon>
<host>klaatu</host>
<command>data-driven web pages</command>
<status>disabled</status>
</daemon>
<daemon>
<host>galileo</host>
<command>feeder</command>
<status>not running</status>
</daemon>
</daemon_status>
<database_file_states>
<results_ready_to_send>614830</results_ready_to_send>
<results_in_progress>1208959</results_in_progress>
<workunits_waiting_for_validation>8</workunits_waiting_for_validation>
<workunits_waiting_for_assimilation>2</workunits_waiting_for_assimilation>
<workunits_waiting_for_deletion>4</workunits_waiting_for_deletion>
<results_waiting_for_deletion>15</results_waiting_for_deletion>
<transitioner_backlog_hours>0.00083333334</transitioner_backlog_hours>
</database_file_states>
</server_status>");
page_tail();
?>