- scheduler: add <workload_sim> config option.

If set, the scheduler will use EDF simulation,
    together with the in-progress workload reported by the client,
    to avoid sending results that
    1) will miss their deadline, or
    2) will cause an in-progress result to miss its deadline, or
    3) will make an in-progress result miss its deadline
        by more than is already predicted.
    If this option is not set, or if the client request doesn't
    include a workload description (i.e. the client is old)
    use the existing approach, which assumes there's no workload.
    NOTE: this is experimental.  Production projects should not use it.
- EDF sim: write debug stuff to stderr instead of stdout
- Account manager:
    - if an account is detach_when_done, set dont_request_more_work
    - check done_request_more_work even for first-time projects
- update_uotd: generate a file for use by Google gadget
- user_links(): use full URLs (so can use in Google gadget)

client/
    acct_mgr.C
    work_fetch.C
html/
    inc/
        uotd.inc
        util.inc
    user/
        uotd_gadget.php (new)
sched/
    Makefile.am
    edf_sim.C
    sched_config.C,h
    sched_resend.C
    sched_send.C,h
    server_types.C,h

svn path=/trunk/boinc/; revision=12639
This commit is contained in:
David Anderson 2007-05-10 21:50:52 +00:00
parent a3c22a1bf7
commit a37403a673
15 changed files with 180 additions and 45 deletions

View File

@ -4739,3 +4739,39 @@ David 10 May 2007
win_build/
sim.vcproj
David 10 May 2007
- scheduler: add <workload_sim> config option.
If set, the scheduler will use EDF simulation,
together with the in-progress workload reported by the client,
to avoid sending results that
1) will miss their deadline, or
2) will cause an in-progress result to miss its deadline, or
3) will make an in-progress result miss its deadline
by more than is already predicted.
If this option is not set, or if the client request doesn't
include a workload description (i.e. the client is old)
use the existing approach, which assumes there's no workload.
NOTE: this is experimental. Production projects should not use it.
- EDF sim: write debug stuff to stderr instead of stdout
- Account manager:
- if an account is detach_when_done, set dont_request_more_work
- check done_request_more_work even for first-time projects
- update_uotd: generate a file for use by Google gadget
- user_links(): use full URLs (so can use in Google gadget)
client/
acct_mgr.C
work_fetch.C
html/
inc/
uotd.inc
util.inc
user/
uotd_gadget.php (new)
sched/
Makefile.am
edf_sim.C
sched_config.C,h
sched_resend.C
sched_send.C,h
server_types.C,h

View File

@ -411,6 +411,9 @@ void ACCT_MGR_OP::handle_reply(int http_op_retval) {
}
if (acct.detach_when_done.present) {
pp->detach_when_done = acct.detach_when_done.value;
if (pp->detach_when_done) {
pp->dont_request_more_work = true;
}
}
// initiate a scheduler RPC if requested by AMS
@ -448,6 +451,9 @@ void ACCT_MGR_OP::handle_reply(int http_op_retval) {
gstate.add_project(
acct.url.c_str(), acct.authenticator.c_str(), "", true
);
if (acct.dont_request_more_work.present) {
pp->dont_request_more_work = acct.dont_request_more_work.value;
}
}
}
}

View File

@ -147,7 +147,7 @@ PROJECT* CLIENT_STATE::next_project_sched_rpc_pending() {
p->sched_rpc_pending = RPC_REASON_PROJECT_REQ;
p->next_rpc_time = 0;
}
//if (p->suspended_via_gui) continue;
// if (p->suspended_via_gui) continue;
// do the RPC even if suspended.
// This is critical for acct mgrs, to propagate new host CPIDs
//

View File

@ -7,7 +7,8 @@ define('UOTD_THRESHOLD', 7);
function uotd_thumbnail($profile, $user) {
if ($profile->has_picture) {
return "<a href=view_profile.php?userid=$user->id><img border=0 vspace=4 hspace=8 align=left src=" . IMAGE_URL . $user->id . "_sm.jpg></a>";
$ub = URL_BASE;
return "<a href=$ub/view_profile.php?userid=$user->id><img border=0 vspace=4 hspace=8 align=left src=$ub".IMAGE_URL.$user->id."_sm.jpg></a>";
} else {
return "";
}
@ -51,7 +52,7 @@ function select_uotd() {
if ($assigned['mday'] == $now['mday']) {
$user = lookup_user_id($current_uotd->userid);
echo "Already have UOTD for today\n";
generate_uotd_gadget($profile, $user);
generate_uotd_gadget($current_uotd, $user);
exit();
}
}
@ -94,8 +95,6 @@ function select_uotd() {
$profile = mysql_fetch_object($result);
$user = lookup_user_id($profile->userid);
generate_uotd_gadget($profile, $user);
$sql = "UPDATE profile SET uotd_time = ".time()." WHERE userid=$user->id";
mysql_query($sql);
@ -106,6 +105,8 @@ function select_uotd() {
Your profile will be featured on the " . PROJECT . " website for the next 24 hours."
);
echo "Chose user $user->id as UOTD\n";
$profile->uotd_time = time();
generate_uotd_gadget($profile, $user);
}
// This is the default policy for choosing the UOTD on any BOINC project.
@ -153,7 +154,9 @@ function generate_uotd_gadget($profile, $user) {
$x = "<font size='2'>\n";
$gadget = PROFILE_PATH."uotd_gadget.html";
if( $h = fopen($gadget, "w") ){
if( (time()-$profile->uotd_time) <= 86400+3600 ) { // allow for slop
$age = time()-$profile->uotd_time;
echo "age: $age";
if($age <= 86400+3600) { // allow for slop
$x .= uotd_thumbnail($profile, $user);
$x .= user_links($user);
$x .= "&nbsp;&nbsp;".

View File

@ -512,9 +512,10 @@ function sched_stopped() {
}
function user_links($user) {
$x = '<a href="show_user.php?userid='.$user->id.'">'.$user->name.'</a>';
$ub = URL_BASE;
$x = '<a href="'.$ub.'show_user.php?userid='.$user->id.'">'.$user->name.'</a>';
if ($user->has_profile) {
$x .= ' <a href="view_profile.php?userid='.$user->id.'"><img border="0" src="img/head_20.png" alt="User profile image"></a>';
$x .= ' <a href="'.$ub.'view_profile.php?userid='.$user->id.'"><img border="0" src="'.$ub.'img/head_20.png" alt="User profile image"></a>';
}
# Does this project accept donations?
# If so, do you want to have a link next to user name as it appears on the web site?

23
html/user/uotd_gadget.php Normal file
View File

@ -0,0 +1,23 @@
<?php
require_once("../inc/xml.inc");
require_once("../project/project.inc");
xml_header();
echo "
<Module>
<ModulePrefs
title=\"".PROJECT." User of the Day\"
author=\"BOINC project\"
author_email=\"".SYS_ADMIN_EMAIL."\"
author_affiliation=\"".COPYRIGHT_HOLDER."\"
author_location=\"Unknown\"
description=\"Shows today's User of the Day for the BOINC-based distributed
computing project ".PROJECT."\"
height=\"100\"
/>
<Content type=\"url\" href=\"".URL_BASE."user_profile/uotd_gadget.html\" />
</Module>
";
?>

View File

@ -62,6 +62,7 @@ EXTRA_DIST = \
cgi_SOURCES = \
edf_sim.C \
handle_request.C \
main.C \
sched_array.C \

View File

@ -25,16 +25,21 @@
using std::vector;
//#define TEST
#ifdef TEST
#define INFO0 printf
#define INFO1 printf
#define INFO2 printf
#define DEBUG
#ifdef DEBUG
#define INFO0
#define INFO1
#define INFO2
#else
#define INFO0 //
#define INFO1 //
#define INFO2 //
#endif
// 0 shows initial workload and candidate decisions
// 1 shows function calls results of sim: make/miss deadline
// 2 shows every step of sim
bool lessthan_deadline(const IP_RESULT& p1, const IP_RESULT& p2) {
if (p1.computation_deadline < p2.computation_deadline) return true;
return false;
@ -48,7 +53,7 @@ void mark_edf_misses (int ncpus, vector<IP_RESULT>& ip_results){
double booked_to[128];
int j;
INFO1("mark_edf_misses\n");
INFO1 fprintf(stderr, "mark_edf_misses\n");
// keeps track of when each cpu is next free
//
@ -77,18 +82,18 @@ void mark_edf_misses (int ncpus, vector<IP_RESULT>& ip_results){
}
booked_to[lowest_booked_cpu] += r.cpu_time_remaining;
INFO2(" running %s on cpu %d; finishes at %f\n",
INFO2 fprintf(stderr, " running %s on cpu %d; finishes at %f\n",
r.name, lowest_booked_cpu, booked_to[lowest_booked_cpu]
);
if (booked_to[lowest_booked_cpu] > r.computation_deadline) {
r.misses_deadline = true;
r.estimated_completion_time = booked_to[lowest_booked_cpu];
INFO1(" %s misses_deadline; est completion %f\n",
INFO1 fprintf(stderr, " %s misses_deadline; est completion %f\n",
r.name, booked_to[lowest_booked_cpu]
);
} else {
r.misses_deadline = false;
INFO1(" %s makes deadline; est completion %f\n",
INFO1 fprintf(stderr, " %s makes deadline; est completion %f\n",
r.name, booked_to[lowest_booked_cpu]
);
// if result doesn't miss its deadline,
@ -106,9 +111,14 @@ void init_ip_results(
vector<IP_RESULT>& ip_results
){
unsigned int i;
INFO0 fprintf(stderr, "init_ip_results; work_buf_min %f ncpus %d:\n", work_buf_min, ncpus);
for (i=0; i<ip_results.size(); i++) {
IP_RESULT& r = ip_results[i];
r.computation_deadline = r.report_deadline - work_buf_min;
INFO0 fprintf(stderr, " %s: deadline %.2f cpu %.2f\n",
r.name, r.computation_deadline, r.cpu_time_remaining
);
}
// run edf simulation to determine whether any results miss their deadline
@ -179,7 +189,7 @@ bool check_candidate (
double booked_to[128]; // keeps track of when each cpu is free
int j;
INFO0 ("check_candidate %s: dl %f cpu %f\n",
INFO0 fprintf(stderr, "check_candidate %s: dl %f cpu %f\n",
candidate.name, candidate.computation_deadline,
candidate.cpu_time_remaining
);
@ -209,7 +219,7 @@ bool check_candidate (
}
}
booked_to[lowest_booked_cpu] += r.cpu_time_remaining;
INFO2(" running %s on cpu %d; finishes at %f\n",
INFO2 fprintf(stderr, " running %s on cpu %d; finishes at %f\n",
r.name, lowest_booked_cpu, booked_to[lowest_booked_cpu]
);
@ -219,7 +229,7 @@ bool check_candidate (
if (booked_to[lowest_booked_cpu] > r.computation_deadline
&& !r.misses_deadline
) {
INFO1 (" %s now misses deadline: %f\n",
INFO0 fprintf(stderr, " cand. fails; %s now misses deadline: %f\n",
r.name, booked_to[lowest_booked_cpu]
);
return false;
@ -230,10 +240,11 @@ bool check_candidate (
if (r.misses_deadline
&& booked_to[lowest_booked_cpu] > r.estimated_completion_time
){
INFO0 (" %s: late result to be returned even later\n", r.name);
INFO1 fprintf(stderr, " cand. fails; late result %s to be returned even later\n", r.name);
return false;
}
}
INFO1 fprintf(stderr, " cand. succeeds\n");
return true;
}

View File

@ -130,6 +130,7 @@ int SCHED_CONFIG::parse(FILE* f) {
else if (xp.parse_int(tag, "grace_period_hours", grace_period_hours)) continue;
else if (xp.parse_int(tag, "delete_delay_hours", delete_delay_hours)) continue;
else if (xp.parse_bool(tag, "workload_sim", workload_sim)) continue;
// some tags that scheduler doesn't care about
//

View File

@ -78,12 +78,24 @@ public:
int sched_debug_level;
int fuh_debug_level;
int reliable_time; // age of workunit before requiring reliable
int reliable_min_avg_credit; // min average credit for a host to be declared reliable
int reliable_max_avg_turnaround; // max average turnaround for a host to be declared reliable
int reliable_priority_on_over; // additional results generated after at least one result is over will have their priority boosted by this amount
int reliable_priority_on_over_except_error; // additional results generated after at least one result is over (unless their is an error) will have their priority boosted by this amount
int reliable_on_priority; // results with a priority equal or greater than this value will be sent to reliable hosts
double reliable_reduced_delay_bound; // Reduce the delay bounds for reliable hosts by this percent
int reliable_min_avg_credit;
// min average credit for a host to be declared reliable
int reliable_max_avg_turnaround;
// max average turnaround for a host to be declared reliable
int reliable_priority_on_over;
// additional results generated after at least one result
// is over will have their priority boosted by this amount
int reliable_priority_on_over_except_error;
// additional results generated after at least one result is over
// (unless their is an error) will have their priority boosted
// by this amount
int reliable_on_priority;
// results with a priority equal or greater than this value
// will be sent to reliable hosts
double reliable_reduced_delay_bound;
// Reduce the delay bounds for reliable hosts by this percent
bool workload_sim;
// Do workload simulation in deciding whether to send a result
int parse(FILE*);
int parse_file(const char* dir=".");

View File

@ -17,8 +17,14 @@
// or write to the Free Software Foundation, Inc.,
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
// scheduler code related to sending work
// scheduler code related to sending "lost" work
// (i.e. results we sent to the host, but which they're not reporting)
//
// TODO:
// - make sure result is still needed (no canonical result yet)
// - don't send if project has been reset since first send;
// this result may have been the cause of reset
// (need to pass last reset time from client)
#include "config.h"
#include "error_numbers.h"
@ -40,11 +46,9 @@
#define FCGI_ToFILE(x) (x)
#endif
// returns zero if result still feasible.
// result may hve been given a new report time.
// Returns nonzero if result is no longer feasible
// (not enough time to compute it on host).
// In this case result is unchanged.
// Assign a new deadline for the result;
// if it's not likely to complete by this time, return nonzero.
// TODO: EXPLAIN THE FORMULA FOR NEW DEADLINE
//
static int possibly_give_result_new_deadline(
DB_RESULT& result, WORKUNIT& wu, SCHEDULER_REPLY& reply

View File

@ -410,9 +410,27 @@ int wu_is_infeasible(
int reason = 0;
check_app_filter(wu, request, reply, reason);
if (reason) return reason;
check_memory(wu, request, reply, reason);
if (reason) return reason;
check_disk(wu, request, reply, reason);
check_deadline(wu, request, reply, reason);
if (reason) return reason;
if (config.workload_sim && request.have_other_results_list) {
double est_cpu = estimate_cpu_duration(wu, reply);
IP_RESULT candidate("", wu.delay_bound, est_cpu);
strcpy(candidate.name, wu.name);
if (check_candidate(candidate, reply.host.p_ncpus, request.ip_results)) {
// it passed the feasibility test,
// but don't add it the the workload yet;
// wait until we commit to sending it
} else {
reply.wreq.insufficient_speed = true;
reason |= INFEASIBLE_WORKLOAD;
}
} else {
check_deadline(wu, request, reply, reason);
}
return reason;
}
@ -799,6 +817,14 @@ int add_result_to_reply(
request.estimated_delay += wu_seconds_filled/reply.host.p_ncpus;
reply.wreq.nresults++;
if (!resent_result) reply.host.nresults_today++;
// add this result to workload for simulation
//
if (config.workload_sim && request.have_other_results_list) {
double est_cpu = estimate_cpu_duration(wu, reply);
IP_RESULT ipr ("", time(0)+wu.delay_bound, est_cpu);
request.ip_results.push_back(ipr);
}
return 0;
}
@ -837,15 +863,11 @@ int send_work(
reply.wreq.seconds_to_fill = MIN_SECONDS_TO_SEND;
}
// TODO: add code to send results that were sent earlier but not reported.
// Cautions (from John McLeod):
// - make sure the result is still needed
// - don't send if the project has been reset since first send,
// since result may have been cause of the reset
// (need to pass reset time?)
// - make sure can complete by deadline
// - don't send if project is suspended or "no more work" on client
// (need to pass these)
if (config.workload_sim && sreq.have_other_results_list) {
init_ip_results(
sreq.global_prefs.work_buf_min(), reply.host.p_ncpus, sreq.ip_results
);
}
if (config.locality_scheduling) {
reply.wreq.infeasible_only = false;

View File

@ -48,6 +48,7 @@ extern bool app_core_compatible(WORK_REQ& wreq, APP_VERSION& av);
#define INFEASIBLE_CPU 4
#define INFEASIBLE_WORK_BUF 8
#define INFEASIBLE_APP_SETTING 16
#define INFEASIBLE_WORKLOAD 32
extern int wu_is_infeasible(WORKUNIT&, SCHEDULER_REQUEST&, SCHEDULER_REPLY&);

View File

@ -92,10 +92,12 @@ int IP_RESULT::parse(FILE* f) {
report_deadline = 0;
cpu_time_remaining = 0;
strcpy(name, "");
while (fgets(buf, sizeof(buf), f)) {
if (match_tag(buf, "</ip_result>")) {
return 0;
}
if (parse_str(buf, "<name>", name, sizeof(name))) continue;
if (parse_double(buf, "<report_deadline>", report_deadline)) continue;
if (parse_double(buf, "<cpu_time_remaining>", cpu_time_remaining)) continue;
}
@ -257,12 +259,18 @@ int SCHEDULER_REQUEST::parse(FILE* fin) {
continue;
} else if (match_tag(buf, "<in_progress_results>")) {
have_ip_results_list = true;
int i = 0;
double now = time(0);
while (fgets(buf, sizeof(buf), fin)) {
if (match_tag(buf, "</in_progress_results>")) break;
if (match_tag(buf, "<ip_result>")) {
IP_RESULT ir;
retval = ir.parse(fin);
if (!retval) {
if (!strlen(ir.name)) {
sprintf(ir.name, "ip%d", i++);
}
ir.report_deadline -= now;
ip_results.push_back(ir);
}
}
@ -812,6 +820,10 @@ int RESULT::parse_from_client(FILE* fin) {
safe_strcat(stderr_out, buf);
}
continue;
} else if (match_tag(buf, "<platform>")) {
continue;
} else if (match_tag(buf, "<version_num>")) {
continue;
} else {
log_messages.printf(
SCHED_MSG_LOG::MSG_NORMAL,
@ -859,6 +871,8 @@ int HOST::parse(FILE* fin) {
// fields reported by 5.5+ clients, not currently used
//
else if (match_tag(buf, "<p_features>")) continue;
#if 0
else if (match_tag(buf, "<p_capabilities>")) continue;
else if (match_tag(buf, "<accelerators>")) continue;
@ -869,8 +883,7 @@ int HOST::parse(FILE* fin) {
else if (match_tag(buf, "<cache_l1>")) continue;
else if (match_tag(buf, "<cache_l2>")) continue;
else if (match_tag(buf, "<cache_l3>")) continue;
#endif
else {
log_messages.printf(SCHED_MSG_LOG::MSG_NORMAL,
"HOST::parse(): unrecognized: %s\n", buf

View File

@ -108,6 +108,7 @@ struct GLOBAL_PREFS {
void parse(const char* buf, const char* venue);
void defaults();
inline double work_buf_min() {return work_buf_min_days*86400;}
};
struct GUI_URLS {