mirror of https://github.com/BOINC/boinc.git
*** empty log message ***
svn path=/trunk/boinc/; revision=9445
This commit is contained in:
parent
6bc732f191
commit
f433b73f6b
|
@ -1708,3 +1708,12 @@ Charlie 8 Feb 2006
|
|||
mac_build/
|
||||
boinc.xcodeproj/
|
||||
project.pbxproj
|
||||
|
||||
David 10 Feb 2005
|
||||
- cosmetic
|
||||
|
||||
client/
|
||||
client_msgs.h
|
||||
client_types.h
|
||||
cpu_sched.C
|
||||
cs_scheduler.C
|
||||
|
|
|
@ -91,6 +91,7 @@ public:
|
|||
DEBUG_POLL, // show what polls are responding
|
||||
DEBUG_GUIRPC,
|
||||
DEBUG_SCHED_CPU,
|
||||
DEBUG_WORK_FETCH,
|
||||
DEBUG_SCRSAVE
|
||||
};
|
||||
CLIENT_MSG_LOG(): MSG_LOG(stdout) {}
|
||||
|
|
|
@ -254,6 +254,7 @@ public:
|
|||
std::vector<RESULT*>active;
|
||||
std::vector<RESULT*>pending;
|
||||
double rrsim_proc_rate;
|
||||
// fraction of each CPU this project will get in RR simulation
|
||||
void set_rrsim_proc_rate(double per_cpu_proc_rate, double rrs);
|
||||
|
||||
// "debt" is how much CPU time we owe this project relative to others
|
||||
|
@ -284,7 +285,7 @@ public:
|
|||
// X seconds of wall-clock time to complete,
|
||||
// taking into account
|
||||
// 1) other projects and resource share;
|
||||
// 2) on_frac and active_frac
|
||||
// 2) on_frac, active_frac, and cpu_effiency
|
||||
// see doc/work_req.php
|
||||
int work_request_urgency;
|
||||
|
||||
|
|
|
@ -422,34 +422,6 @@ void CLIENT_STATE::enforce_schedule() {
|
|||
}
|
||||
}
|
||||
|
||||
// set the project's rrsim_proc_rate:
|
||||
// the fraction of each CPU that it will get in round-robin mode
|
||||
//
|
||||
void PROJECT::set_rrsim_proc_rate(double per_cpu_proc_rate, double rrs) {
|
||||
int nactive = (int)active.size();
|
||||
if (nactive == 0) return;
|
||||
double x;
|
||||
if (rrs) {
|
||||
x = resource_share/rrs;
|
||||
} else {
|
||||
x = 1; // TODO - fix
|
||||
}
|
||||
|
||||
// if this project has fewer active results than CPUs,
|
||||
// scale up its share to reflect this
|
||||
//
|
||||
if (nactive < gstate.ncpus) {
|
||||
x *= ((double)gstate.ncpus)/nactive;
|
||||
}
|
||||
|
||||
// But its rate on a given CPU can't exceed the CPU speed
|
||||
//
|
||||
if (x>1) {
|
||||
x = 1;
|
||||
}
|
||||
rrsim_proc_rate = x*per_cpu_proc_rate*CPU_PESSIMISM_FACTOR;
|
||||
}
|
||||
|
||||
// return true if we don't have enough runnable tasks to keep all CPUs busy
|
||||
//
|
||||
bool CLIENT_STATE::no_work_for_a_cpu() {
|
||||
|
@ -465,7 +437,39 @@ bool CLIENT_STATE::no_work_for_a_cpu() {
|
|||
return ncpus > count;
|
||||
}
|
||||
|
||||
// return true if round-robin scheduling will miss a deadline
|
||||
// Set the project's rrsim_proc_rate:
|
||||
// the fraction of each CPU that it will get in round-robin mode.
|
||||
// Precondition: the project's "active" array is populated
|
||||
//
|
||||
void PROJECT::set_rrsim_proc_rate(double per_cpu_proc_rate, double rrs) {
|
||||
int nactive = (int)active.size();
|
||||
if (nactive == 0) return;
|
||||
double x;
|
||||
|
||||
if (rrs) {
|
||||
x = resource_share/rrs;
|
||||
} else {
|
||||
x = 1; // pathological case; maybe should be 1/# runnable projects
|
||||
}
|
||||
|
||||
// if this project has fewer active results than CPUs,
|
||||
// scale up its share to reflect this
|
||||
//
|
||||
if (nactive < gstate.ncpus) {
|
||||
x *= ((double)gstate.ncpus)/nactive;
|
||||
}
|
||||
|
||||
// But its rate on a given CPU can't exceed 1
|
||||
//
|
||||
if (x>1) {
|
||||
x = 1;
|
||||
}
|
||||
rrsim_proc_rate = x*per_cpu_proc_rate*CPU_PESSIMISM_FACTOR;
|
||||
}
|
||||
|
||||
// return true if round-robin scheduling will miss a deadline.
|
||||
// per_cpu_proc_rate is the expected number of CPU seconds per wall second
|
||||
// on each CPU; rrs is the resource share of runnable projects
|
||||
//
|
||||
bool CLIENT_STATE::rr_misses_deadline(double per_cpu_proc_rate, double rrs) {
|
||||
PROJECT* p, *pbest;
|
||||
|
@ -486,7 +490,6 @@ bool CLIENT_STATE::rr_misses_deadline(double per_cpu_proc_rate, double rrs) {
|
|||
p->pending.clear();
|
||||
}
|
||||
|
||||
|
||||
for (i=0; i<results.size(); i++) {
|
||||
rp = results[i];
|
||||
if (rp->aborted_via_gui) continue;
|
||||
|
@ -596,8 +599,10 @@ bool CLIENT_STATE::rr_misses_deadline(double per_cpu_proc_rate, double rrs) {
|
|||
//
|
||||
void CLIENT_STATE::set_scheduler_mode() {
|
||||
bool use_earliest_deadline_first = false;
|
||||
double total_proc_rate = avg_proc_rate();
|
||||
double per_cpu_proc_rate = total_proc_rate/ncpus;
|
||||
double per_cpu_proc_rate = avg_proc_rate()/ncpus;
|
||||
// how many CPU seconds per wall second we get on each CPU,
|
||||
// taking into account on_frac, active_frac, and cpu_efficiency
|
||||
|
||||
double rrs = runnable_resource_share();
|
||||
|
||||
if (rr_misses_deadline(per_cpu_proc_rate, rrs)) {
|
||||
|
|
|
@ -147,7 +147,7 @@ PROJECT* CLIENT_STATE::next_project_trickle_up_pending() {
|
|||
|
||||
// Return the best project to fetch work from, NULL if none
|
||||
//
|
||||
// Basically, pick the one with largest long term debt - amount of current work
|
||||
// Pick the one with largest (long term debt - amount of current work)
|
||||
//
|
||||
// PRECONDITIONS:
|
||||
// - work_request_urgency and work_request set for all projects
|
||||
|
@ -160,7 +160,7 @@ PROJECT* CLIENT_STATE::next_project_need_work() {
|
|||
unsigned int i;
|
||||
double prrs = potentially_runnable_resource_share();
|
||||
|
||||
for (i=0; i<projects.size(); ++i) {
|
||||
for (i=0; i<projects.size(); i++) {
|
||||
p = projects[i];
|
||||
if (p->work_request_urgency == WORK_FETCH_DONT_NEED) continue;
|
||||
if (p->work_request == 0) continue;
|
||||
|
@ -502,11 +502,11 @@ double CLIENT_STATE::time_until_work_done(
|
|||
}
|
||||
}
|
||||
|
||||
// Compute:
|
||||
// - work_request and work_request_urgency for all projects.
|
||||
// Top-level function for work fetch policy.
|
||||
// Outputs:
|
||||
// - overall_work_fetch_urgency
|
||||
//
|
||||
// Only set non-zero work requests for projects that are contactable
|
||||
// - for each contactable project:
|
||||
// - work_request and work_request_urgency
|
||||
//
|
||||
int CLIENT_STATE::compute_work_requests() {
|
||||
unsigned int i;
|
||||
|
@ -517,7 +517,7 @@ int CLIENT_STATE::compute_work_requests() {
|
|||
SCOPE_MSG_LOG scope_messages(log_messages, CLIENT_MSG_LOG::DEBUG_SCHED_CPU);
|
||||
|
||||
overall_work_fetch_urgency = WORK_FETCH_DONT_NEED;
|
||||
for (i = 0; i < projects.size(); ++i) {
|
||||
for (i=0; i< projects.size(); i++) {
|
||||
projects[i]->work_request_urgency = WORK_FETCH_DONT_NEED;
|
||||
projects[i]->work_request = 0;
|
||||
}
|
||||
|
@ -556,7 +556,7 @@ int CLIENT_STATE::compute_work_requests() {
|
|||
// Then estimate how long it's going to be until we have fewer
|
||||
// than this # of results remaining.
|
||||
//
|
||||
for (i=0; i<projects.size(); ++i) {
|
||||
for (i=0; i<projects.size(); i++) {
|
||||
PROJECT *p = projects[i];
|
||||
|
||||
p->work_request = 0;
|
||||
|
@ -1034,14 +1034,17 @@ int CLIENT_STATE::handle_scheduler_reply(
|
|||
}
|
||||
|
||||
bool CLIENT_STATE::should_get_work() {
|
||||
// if there are fewer runnable results then CPUS, we need more work.
|
||||
// if there are fewer runnable results than CPUS, we need more work.
|
||||
//
|
||||
if (no_work_for_a_cpu()) return true;
|
||||
|
||||
double tot_cpu_time_remaining = 0;
|
||||
for (unsigned int i = 0; i < results.size();++i) {
|
||||
for (unsigned int i=0; i<results.size(); i++) {
|
||||
tot_cpu_time_remaining += results[i]->estimated_cpu_time_remaining();
|
||||
}
|
||||
|
||||
// ????? shouldn't we scale by ncpus? by avg_proc_rate()??
|
||||
//
|
||||
if (tot_cpu_time_remaining < global_prefs.work_buf_min_days*SECONDS_PER_DAY) {
|
||||
return true;
|
||||
}
|
||||
|
@ -1096,7 +1099,7 @@ void CLIENT_STATE::set_work_fetch_mode() {
|
|||
|
||||
double CLIENT_STATE::work_needed_secs() {
|
||||
double total_work = 0;
|
||||
for( unsigned int i = 0; i < results.size(); ++i) {
|
||||
for(unsigned int i=0; i<results.size(); i++) {
|
||||
if (results[i]->project->non_cpu_intensive) continue;
|
||||
total_work += results[i]->estimated_cpu_time_remaining();
|
||||
}
|
||||
|
|
|
@ -1,6 +1,14 @@
|
|||
<?
|
||||
|
||||
$project_news = array(
|
||||
array("February 8, 2006",
|
||||
"The <a href=http://szdg.lpds.sztaki.hu/szdg/>SZTAKI Desktop Grid</a>
|
||||
is now in production.
|
||||
The project, based at the MTA-SZTAKI Laboratory of Parallel
|
||||
and Distributed Systems in Budapest, Hungary, is attempting
|
||||
to find all the generalized binary number systems
|
||||
(bases are matrices and digits are vectors) up to dimension 11."
|
||||
),
|
||||
array("February 6, 2006",
|
||||
"The <a href=http://boinc.bio.wzw.tum.de/boincsimap/>Similarity
|
||||
Matrix of Proteins (SIMAP)</a> project is now in production.
|
||||
|
|
|
@ -37,29 +37,26 @@ resources.
|
|||
BOINC lets you donate computing power
|
||||
to scientific research projects such as:
|
||||
<ul>
|
||||
<li>
|
||||
<a href=http://climateprediction.net>Climateprediction.net</a>:
|
||||
study climate change
|
||||
<li>
|
||||
<a href=http://einstein.phys.uwm.edu/>Einstein@home</a>:
|
||||
search for gravitational signals emitted by pulsars
|
||||
<li>
|
||||
<a href=http://lhcathome.cern.ch/>LHC@home</a>:
|
||||
improve the design of the CERN LHC particle accelerator
|
||||
<li>
|
||||
<a href=http://predictor.scripps.edu>Predictor@home</a>:
|
||||
investigate protein-related diseases
|
||||
<li>
|
||||
<a href=http://boinc.bakerlab.org/rosetta/>Rosetta@home<a>:
|
||||
help researchers develop cures for human diseases
|
||||
<li><a href=http://setiathome.berkeley.edu/>SETI@home</a>:
|
||||
Look for radio evidence of extraterrestrial life
|
||||
<li><a href=http://boinc.bio.wzw.tum.de/boincsimap/>SIMAP</a>:
|
||||
<li> <a href=http://climateprediction.net>Climateprediction.net</a>:
|
||||
study climate change
|
||||
<li> <a href=http://einstein.phys.uwm.edu/>Einstein@home</a>:
|
||||
search for gravitational signals emitted by pulsars
|
||||
<li> <a href=http://lhcathome.cern.ch/>LHC@home</a>:
|
||||
improve the design of the CERN LHC particle accelerator
|
||||
<li> <a href=http://predictor.scripps.edu>Predictor@home</a>:
|
||||
investigate protein-related diseases
|
||||
<li> <a href=http://boinc.bakerlab.org/rosetta/>Rosetta@home<a>:
|
||||
help researchers develop cures for human diseases
|
||||
<li> <a href=http://setiathome.berkeley.edu/>SETI@home</a>:
|
||||
Look for radio evidence of extraterrestrial life
|
||||
<li> <a href=http://boinc.bio.wzw.tum.de/boincsimap/>SIMAP</a>:
|
||||
calculate protein similarity data for use by many biological
|
||||
research projects.
|
||||
<li> <a href=http://szdg.lpds.sztaki.hu/szdg/>SZTAKI Desktop Grid</a>:
|
||||
search for generalized binary number systems.
|
||||
<li> <a href=http://www.worldcommunitygrid.org/>World Community Grid</a>:
|
||||
advance our knowledge of human disease. (Requires 5.2.1 or greater)
|
||||
<li><a href=http://www.cellcomputing.net/>Cell Computing</a>
|
||||
<li> <a href=http://www.cellcomputing.net/>Cell Computing</a>
|
||||
biomedical research
|
||||
(Japanese; requires nonstandard client software)
|
||||
</ul>
|
||||
|
|
183
doc/sched.php
183
doc/sched.php
|
@ -186,76 +186,127 @@ when the end of the user-specified scheduling period is reached,
|
|||
when new results become runnable,
|
||||
or when the user performs a UI interaction
|
||||
(e.g. suspending or resuming a project or result).
|
||||
|
||||
|
||||
<h2>The work-fetch policy</h2>
|
||||
|
||||
<p>
|
||||
X is the estimated wall time by which the number of
|
||||
runnable results will fall below NCPUS.
|
||||
<p>
|
||||
min_queue is the user's network-connection period general preference.
|
||||
<p>
|
||||
work_fetch_OK is a flag set by the mode selection algorithm (see below).
|
||||
<p>
|
||||
The work-fetch policy maintains an 'overall urgency':
|
||||
It does the following:
|
||||
<ul>
|
||||
<li>
|
||||
<b>NEED_IMMEDIATELY</b>:
|
||||
there is at least one idle CPU
|
||||
<li>
|
||||
<b>NEED</b>:
|
||||
X < min_queue
|
||||
<li>
|
||||
<b>OK</b>:
|
||||
X > min_queue, work_fetch_OK is true
|
||||
<li>
|
||||
<b>DONT_NEED</b>:
|
||||
work_fetch_OK is false
|
||||
</ul>
|
||||
|
||||
<p>
|
||||
In addition, the work-fetch policy maintains a per-project work-fetch mode:
|
||||
<p>
|
||||
R(P) = fractional resource share of P
|
||||
<p>
|
||||
X(P) = estimated wall time when number of runnable results for P
|
||||
will fall below NCPUS*R(P)
|
||||
<ul>
|
||||
<li>
|
||||
<b>NEED_IMMEDIATELY</b>:
|
||||
no results of P are runnable soon.
|
||||
<li>
|
||||
<b>NEED</b>:
|
||||
X(P) < min_queue * R(P)
|
||||
<li>
|
||||
<b>OK</b>:
|
||||
X(P) > min_queue * R(P),
|
||||
and P is not suspended or deferred or no-new-work
|
||||
<li>
|
||||
<b>DONT_NEED</b>:
|
||||
P is suspended or deferred or no-new-work
|
||||
</ul>
|
||||
|
||||
<p>
|
||||
|
||||
<h2>Mode selection</h2>
|
||||
<p>
|
||||
Work_fetch_OK is set to false if either
|
||||
<ul>
|
||||
<li> The sum of all RTFs is > 0.8
|
||||
<li> The CRTF of any result is > 0.8
|
||||
</ul>
|
||||
|
||||
EDF mode is used if either
|
||||
<ul>
|
||||
<li> The CRTF of any result is > 0.8
|
||||
<li> The deadline of any result is earlier than one day from now
|
||||
<li> The deadline of any result is less than
|
||||
2 * min_queue from now.
|
||||
<li> Do a simulation of round-robin scheduling
|
||||
applied to the current work queue.
|
||||
<li> If all results meet their deadlines,
|
||||
use round-robin; otherwise, use EDF.
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Work-fetch policy</h2>
|
||||
|
||||
<p>
|
||||
The work-fetch policy is defined in terms of a basic function
|
||||
<pre>
|
||||
time_until_work_done(project, N, subset_resource_share)
|
||||
// estimate wall time until the number of uncompleted results
|
||||
// for this project will reach N,
|
||||
// given the total resource share for a set of competing projects
|
||||
</pre>
|
||||
<p>
|
||||
The work-fetch policy function is called every 5 seconds
|
||||
(or as needed) by the scheduler RPC polling function.
|
||||
</pre>
|
||||
It sets the following variables:
|
||||
<ul>
|
||||
<li> <b>global urgency</b>: one of
|
||||
<ul>
|
||||
<li><b>DONT_NEED</b>: CPU scheduler is in EDF mode,
|
||||
or fetching additional work would make it so.
|
||||
<li><b>OK</b>: we have enough work, but it's OK to get more
|
||||
<li><b>NEED</b>: a CPU will be idle within min_queue
|
||||
<li><b>NEED_IMMEDIATELY</b>: a CPU is idle.
|
||||
</ul>
|
||||
<li> For each project P
|
||||
<br>
|
||||
N = ncpus*(relative resource share)
|
||||
<br>
|
||||
prrs = potentially runnable resource share
|
||||
<br>
|
||||
X = time_until_work_done(P, N-1, prrs)
|
||||
<ul>
|
||||
<li><b>project urgency</b>
|
||||
<ul>
|
||||
<li><b>DONT_NEED</b>: P is suspended or deferred or no-new-work
|
||||
<li><b>OK</b>: X > min_queue
|
||||
<li><b>NEED</b>: X > 0
|
||||
<li><b>NEED_IMMEDIATELY</b>: X == 0
|
||||
</ul>
|
||||
<li> <b>work request size</b>
|
||||
(the number of seconds of work to request,
|
||||
if we do a scheduler RPC to this project).
|
||||
</ul>
|
||||
</ul>
|
||||
|
||||
<p>
|
||||
The scheduler RPC mechanism may select a project to contact
|
||||
because of a user request, an outstanding trickle-up message,
|
||||
or a result that is overdue for reporting.
|
||||
If it does so, it will also request work from that project.
|
||||
|
||||
<p>
|
||||
Otherwise, the RPC mechanism calls the following function and
|
||||
gets work from that project, if any.
|
||||
<pre>
|
||||
next_project_need_work()
|
||||
if global_urgency == DONT_NEED return null
|
||||
Pbest = null;
|
||||
for each project P
|
||||
if P.urgency != DONT_NEED and P.work_request_size > 0
|
||||
if P.urgency == OK and global_urgency == OK
|
||||
continue
|
||||
P.score = P.long_term_debt - time_until_work_done(P, 0, prrs)
|
||||
if Pbest
|
||||
if P.score > Pbest.score
|
||||
Pbest = P
|
||||
else
|
||||
Pbest = p
|
||||
return Pbest
|
||||
</pre>
|
||||
|
||||
<p>
|
||||
The work-fetch policy function is as follows:
|
||||
<pre>
|
||||
// compute global urgency
|
||||
|
||||
x = delay until number of runnable results will be < ncpus
|
||||
if x == 0
|
||||
global_urgency = NEED_IMMEDIATELY
|
||||
else
|
||||
if CPU scheduling mode is EDF
|
||||
global_urgency = DONT_NEED
|
||||
else
|
||||
P = project with greatest long-term debt
|
||||
suppose we got work from P
|
||||
if round-robin would then miss a deadline
|
||||
global_urgency = DONT_NEED
|
||||
else
|
||||
if x < min_queue
|
||||
global_urgency = NEED
|
||||
else
|
||||
global_urgency = OK
|
||||
|
||||
// compute per-project urgencies and work request sizes
|
||||
|
||||
if global_urgency != DONT_NEED
|
||||
for each project P
|
||||
N = ncpus/(fractional potentially runnable resource_share)
|
||||
(i.e. number of results we need on hand to
|
||||
keep enough CPUs busy to maintain resource share)
|
||||
x = time until # of runnable results for P will fall below N
|
||||
if x == 0
|
||||
P.urgency = NEED_IMMEDIATELY
|
||||
else if x < min_queue
|
||||
P.urgency = NEED
|
||||
P.work_request_size = min_queue - x
|
||||
else if global_urgency > OK
|
||||
P.urgency = OK
|
||||
P.work_request_size = 1
|
||||
else
|
||||
P.urgency = DONT_NEED
|
||||
</pre>
|
||||
|
||||
";
|
||||
page_tail();
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
<?php
|
||||
require_once("docutil.php");
|
||||
|
||||
page_head("Server status XML export");
|
||||
|
||||
echo "
|
||||
BOINC-based projects offer the following XML export
|
||||
at <code>URL/server_status.php</code>.
|
||||
These are generally updated every 10 minutes or so -
|
||||
do not poll more often than that.
|
||||
These can be used to make web sites showing
|
||||
the server status of multiple BOINC projects.
|
||||
<p>
|
||||
";
|
||||
echo html_text("
|
||||
<server_status>
|
||||
<update_time>1128535206</update_time>
|
||||
<daemon_status>
|
||||
<daemon>
|
||||
<host>jocelyn</host>
|
||||
<command>BOINC database</command>
|
||||
<status>running</status>
|
||||
</daemon>
|
||||
<daemon>
|
||||
<host>castelli</host>
|
||||
<command>master science database</command>
|
||||
<status>running</status>
|
||||
</daemon>
|
||||
<daemon>
|
||||
<host>klaatu</host>
|
||||
<command>data-driven web pages</command>
|
||||
<status>disabled</status>
|
||||
</daemon>
|
||||
<daemon>
|
||||
<host>galileo</host>
|
||||
<command>feeder</command>
|
||||
<status>not running</status>
|
||||
</daemon>
|
||||
</daemon_status>
|
||||
<database_file_states>
|
||||
<results_ready_to_send>614830</results_ready_to_send>
|
||||
<results_in_progress>1208959</results_in_progress>
|
||||
<workunits_waiting_for_validation>8</workunits_waiting_for_validation>
|
||||
<workunits_waiting_for_assimilation>2</workunits_waiting_for_assimilation>
|
||||
<workunits_waiting_for_deletion>4</workunits_waiting_for_deletion>
|
||||
<results_waiting_for_deletion>15</results_waiting_for_deletion>
|
||||
<transitioner_backlog_hours>0.00083333334</transitioner_backlog_hours>
|
||||
</database_file_states>
|
||||
</server_status>");
|
||||
|
||||
page_tail();
|
||||
|
||||
?>
|
Loading…
Reference in New Issue