From 01f48513233163ed102462109ac7def9f98f1109 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Mon, 14 May 2007 15:21:38 +0000 Subject: [PATCH] - scheduler: add max_wus_in_progress option. Limits total # of in-progress results per host (independently of #CPUs) sched/ sched_config.C,h sched_resend.C sched_send.C server_types.h svn path=/trunk/boinc/; revision=12661 --- checkin_notes | 11 +++++++++++ sched/main.h | 3 +++ sched/sched_config.C | 1 + sched/sched_config.h | 1 + sched/sched_resend.C | 9 +++++++++ sched/sched_send.C | 28 ++++++++++++++++++++++++++++ sched/server_types.h | 2 ++ 7 files changed, 55 insertions(+) diff --git a/checkin_notes b/checkin_notes index de6a147b0b..68f3e76f6a 100755 --- a/checkin_notes +++ b/checkin_notes @@ -4888,3 +4888,14 @@ Rom 12 May 2007 boinc_ss.rc clientgui/ BOINCGUIApp.rc + +David 14 May 2007 + - scheduler: add max_wus_in_progress option. + Limits total # of in-progress results per host + (independently of #CPUs) + + sched/ + sched_config.C,h + sched_resend.C + sched_send.C + server_types.h diff --git a/sched/main.h b/sched/main.h index 3b6b823aef..4f14ddf093 100644 --- a/sched/main.h +++ b/sched/main.h @@ -52,6 +52,9 @@ // client asked for work but we didn't send any, // because of a reason not easily changed // (like wrong kind of computer) +#define DELAY_NO_WORK_CACHE 20*60 + // client asked for work but we didn't send any, + // because user had too many results in cache. extern SCHED_CONFIG config; extern GUI_URLS gui_urls; diff --git a/sched/sched_config.C b/sched/sched_config.C index 3a51e4074d..92e5928a9f 100644 --- a/sched/sched_config.C +++ b/sched/sched_config.C @@ -92,6 +92,7 @@ int SCHED_CONFIG::parse(FILE* f) { else if (xp.parse_bool(tag, "ignore_delay_bound", ignore_delay_bound)) continue; else if (xp.parse_int(tag, "min_sendwork_interval", min_sendwork_interval)) continue; else if (xp.parse_int(tag, "max_wus_to_send", max_wus_to_send)) continue; + else if (xp.parse_int(tag, "max_wus_in_progress", max_wus_in_progress)) continue; else if (xp.parse_int(tag, "daily_result_quota", daily_result_quota)) continue; else if (xp.parse_int(tag, "uldl_dir_fanout", uldl_dir_fanout)) continue; else if (xp.parse_int(tag, "locality_scheduling_wait_period", locality_scheduling_wait_period)) continue; diff --git a/sched/sched_config.h b/sched/sched_config.h index a63bbe439c..c3ed57c4f8 100644 --- a/sched/sched_config.h +++ b/sched/sched_config.h @@ -47,6 +47,7 @@ public: bool msg_to_host; int min_sendwork_interval; int max_wus_to_send; + int max_wus_in_progress; bool non_cpu_intensive; bool verify_files_on_app_start; bool homogeneous_redundancy; diff --git a/sched/sched_resend.C b/sched/sched_resend.C index 2267bd9d29..c52ee35336 100644 --- a/sched/sched_resend.C +++ b/sched/sched_resend.C @@ -99,6 +99,7 @@ bool resend_lost_work( bool did_any = false; int num_to_resend=0; int num_resent=0; + int num_on_host=0; APP* app; APP_VERSION* avp; int retval; @@ -118,6 +119,7 @@ bool resend_lost_work( ); while (!result.enumerate(buf)) { bool found = false; + num_on_host++; for (i=0; i= limit) { + log_messages.printf( + SCHED_MSG_LOG::MSG_DEBUG, + "cache limit exceeded; %d > %d\n", + wreq.nresults_on_host, config.max_wus_in_progress + ); + wreq.cache_size_exceeded=true; + return false; + } + } return true; } @@ -818,6 +831,7 @@ int add_result_to_reply( reply.wreq.seconds_to_fill -= wu_seconds_filled; request.estimated_delay += wu_seconds_filled/reply.host.p_ncpus; reply.wreq.nresults++; + reply.wreq.nresults_on_host++; if (!resent_result) reply.host.nresults_today++; // add this result to workload for simulation @@ -1021,6 +1035,20 @@ int send_work( (int)(3600*(double)rand()/(double)RAND_MAX); reply.set_delay(delay_time); } + if (reply.wreq.cache_size_exceeded) { + char helpful[256]; + sprintf(helpful, "(reached per-host limit of %d tasks)", + config.max_wus_in_progress + ); + USER_MESSAGE um(helpful, "high"); + reply.insert_message(um); + reply.set_delay(DELAY_NO_WORK_CACHE); + log_messages.printf( + SCHED_MSG_LOG::MSG_NORMAL, + "host %d already has %d result(s) on cache\n", + reply.host.id, reply.wreq.nresults_on_host + ); + } } return 0; diff --git a/sched/server_types.h b/sched/server_types.h index 92ab61ab02..65bcb41272 100644 --- a/sched/server_types.h +++ b/sched/server_types.h @@ -70,6 +70,8 @@ struct WORK_REQ { bool outdated_core; bool daily_result_quota_exceeded; int daily_result_quota; // for this machine: number of cpus * daily_quota/cpu + bool cache_size_exceeded; + int nresults_on_host; void update_for_result(double seconds_filled); };