- scheduler: add max_wus_in_progress option.

Limits total # of in-progress results per host
    (independently of #CPUs)

sched/
    sched_config.C,h
    sched_resend.C
    sched_send.C
    server_types.h


svn path=/trunk/boinc/; revision=12661
This commit is contained in:
David Anderson 2007-05-14 15:21:38 +00:00
parent 5f7ed5b8fc
commit 01f4851323
7 changed files with 55 additions and 0 deletions

View File

@ -4888,3 +4888,14 @@ Rom 12 May 2007
boinc_ss.rc
clientgui/
BOINCGUIApp.rc
David 14 May 2007
- scheduler: add max_wus_in_progress option.
Limits total # of in-progress results per host
(independently of #CPUs)
sched/
sched_config.C,h
sched_resend.C
sched_send.C
server_types.h

View File

@ -52,6 +52,9 @@
// client asked for work but we didn't send any,
// because of a reason not easily changed
// (like wrong kind of computer)
#define DELAY_NO_WORK_CACHE 20*60
// client asked for work but we didn't send any,
// because user had too many results in cache.
extern SCHED_CONFIG config;
extern GUI_URLS gui_urls;

View File

@ -92,6 +92,7 @@ int SCHED_CONFIG::parse(FILE* f) {
else if (xp.parse_bool(tag, "ignore_delay_bound", ignore_delay_bound)) continue;
else if (xp.parse_int(tag, "min_sendwork_interval", min_sendwork_interval)) continue;
else if (xp.parse_int(tag, "max_wus_to_send", max_wus_to_send)) continue;
else if (xp.parse_int(tag, "max_wus_in_progress", max_wus_in_progress)) continue;
else if (xp.parse_int(tag, "daily_result_quota", daily_result_quota)) continue;
else if (xp.parse_int(tag, "uldl_dir_fanout", uldl_dir_fanout)) continue;
else if (xp.parse_int(tag, "locality_scheduling_wait_period", locality_scheduling_wait_period)) continue;

View File

@ -47,6 +47,7 @@ public:
bool msg_to_host;
int min_sendwork_interval;
int max_wus_to_send;
int max_wus_in_progress;
bool non_cpu_intensive;
bool verify_files_on_app_start;
bool homogeneous_redundancy;

View File

@ -99,6 +99,7 @@ bool resend_lost_work(
bool did_any = false;
int num_to_resend=0;
int num_resent=0;
int num_on_host=0;
APP* app;
APP_VERSION* avp;
int retval;
@ -118,6 +119,7 @@ bool resend_lost_work(
);
while (!result.enumerate(buf)) {
bool found = false;
num_on_host++;
for (i=0; i<sreq.other_results.size(); i++) {
OTHER_RESULT& orp = sreq.other_results[i];
if (!strcmp(orp.name.c_str(), result.name)) {
@ -221,6 +223,13 @@ bool resend_lost_work(
did_any = true;
}
}
reply.wreq.nresults_on_host = num_on_host;
log_messages.printf(SCHED_MSG_LOG::MSG_DEBUG,
"[HOST#%d] %d results in progress, set for later checking\n",
reply.host.id, num_on_host
);
if (num_to_resend) {
log_messages.printf(SCHED_MSG_LOG::MSG_DEBUG,
"[HOST#%d] %d lost results, resent %d\n", reply.host.id, num_to_resend, num_resent

View File

@ -677,6 +677,19 @@ bool SCHEDULER_REPLY::work_needed(bool locality_sched) {
return false;
}
}
if (config.max_wus_in_progress) {
int limit = config.max_wus_in_progress;
if (wreq.nresults_on_host >= limit) {
log_messages.printf(
SCHED_MSG_LOG::MSG_DEBUG,
"cache limit exceeded; %d > %d\n",
wreq.nresults_on_host, config.max_wus_in_progress
);
wreq.cache_size_exceeded=true;
return false;
}
}
return true;
}
@ -818,6 +831,7 @@ int add_result_to_reply(
reply.wreq.seconds_to_fill -= wu_seconds_filled;
request.estimated_delay += wu_seconds_filled/reply.host.p_ncpus;
reply.wreq.nresults++;
reply.wreq.nresults_on_host++;
if (!resent_result) reply.host.nresults_today++;
// add this result to workload for simulation
@ -1021,6 +1035,20 @@ int send_work(
(int)(3600*(double)rand()/(double)RAND_MAX);
reply.set_delay(delay_time);
}
if (reply.wreq.cache_size_exceeded) {
char helpful[256];
sprintf(helpful, "(reached per-host limit of %d tasks)",
config.max_wus_in_progress
);
USER_MESSAGE um(helpful, "high");
reply.insert_message(um);
reply.set_delay(DELAY_NO_WORK_CACHE);
log_messages.printf(
SCHED_MSG_LOG::MSG_NORMAL,
"host %d already has %d result(s) on cache\n",
reply.host.id, reply.wreq.nresults_on_host
);
}
}
return 0;

View File

@ -70,6 +70,8 @@ struct WORK_REQ {
bool outdated_core;
bool daily_result_quota_exceeded;
int daily_result_quota; // for this machine: number of cpus * daily_quota/cpu
bool cache_size_exceeded;
int nresults_on_host;
void update_for_result(double seconds_filled);
};