From b152c3f45308e9498fd5d1a783023ef2d4bc3add Mon Sep 17 00:00:00 2001 From: David Anderson Date: Sat, 26 Feb 2005 00:24:37 +0000 Subject: [PATCH] *** empty log message *** svn path=/trunk/boinc/; revision=5530 --- checkin_notes | 29 +++++++++++++++++++++++++++++ db/boinc_db.C | 5 +++-- db/boinc_db.h | 2 ++ db/schema.sql | 1 + html/ops/db_update.php | 3 +++ sched/handle_request.C | 4 +++- sched/sched_send.C | 25 ++++++++++++++++++++----- sched/server_types.h | 2 ++ sched/transitioner.C | 11 +++++++++-- 9 files changed, 72 insertions(+), 10 deletions(-) diff --git a/checkin_notes b/checkin_notes index 7183bc9f90..6a32d2e3b4 100755 --- a/checkin_notes +++ b/checkin_notes @@ -25306,3 +25306,32 @@ David 25 Feb 2005 app.C,h app_control.C cs_apps.C + +David 25 Feb 2005 + - Scheduler: added a mechanism to dynamically adjust the + max # of results sent per day on a per-host basis, + so that "bad hosts" (those that always return either errors or nothing) + are eventually cut back to 1 result per day. + + Added "max_results_day" field to host table. + Initialized to config.daily_result_quota. + When host returns an error result, or a result times out, + decrement max_results_day (but not below 1). + When the host return a success result, + double max_results_day (but not above config.daily_result_quota) + + Idea is from Bruce Allen + + NOTE TO PROJECTS: you must update your database + (see html/ops/db_update.php) prior to using this on your server. + + db/ + boinc_db.C,h + schema.sql + html/ops/ + db_update.php + sched/ + handle_request.C + sched_send.C + server_types.h + transitioner.C diff --git a/db/boinc_db.C b/db/boinc_db.C index 61eb86bcca..551a213500 100644 --- a/db/boinc_db.C +++ b/db/boinc_db.C @@ -359,7 +359,7 @@ void DB_HOST::db_print(char* buf){ "credit_per_cpu_sec=%.15e, " "venue='%s', nresults_today=%d, " "avg_turnaround=%f, " - "host_cpid='%s', external_ip_addr='%s' ", + "host_cpid='%s', external_ip_addr='%s', max_results_day=%d ", create_time, userid, rpc_seqno, rpc_time, total_credit, expavg_credit, expavg_time, @@ -376,7 +376,7 @@ void DB_HOST::db_print(char* buf){ credit_per_cpu_sec, venue, nresults_today, avg_turnaround, - host_cpid, external_ip_addr + host_cpid, external_ip_addr, max_results_day ); UNESCAPE(domain_name); UNESCAPE(serialnum); @@ -431,6 +431,7 @@ void DB_HOST::db_parse(MYSQL_ROW &r) { avg_turnaround = atof(r[i++]); strcpy2(host_cpid, r[i++]); strcpy2(external_ip_addr, r[i++]); + max_results_day = atoi(r[i++]); } void DB_WORKUNIT::db_print(char* buf){ diff --git a/db/boinc_db.h b/db/boinc_db.h index 92d1190428..763a824133 100755 --- a/db/boinc_db.h +++ b/db/boinc_db.h @@ -263,6 +263,8 @@ struct HOST { double avg_turnaround; // recent average result turnaround time char host_cpid[256]; // host cross-project ID char external_ip_addr[256]; // IP address seen by scheduler + int max_results_day; // maximum # of results to send per day per CPU + // this is dynamically adjusted to limit work sent to bad hosts int parse(FILE*); int parse_time_stats(FILE*); diff --git a/db/schema.sql b/db/schema.sql index b1d269d06a..0555b69840 100644 --- a/db/schema.sql +++ b/db/schema.sql @@ -169,6 +169,7 @@ create table host ( avg_turnaround double not null, host_cpid varchar(254), external_ip_addr varchar(254), + max_results_day integer not null, primary key (id) ) type=InnoDB; diff --git a/html/ops/db_update.php b/html/ops/db_update.php index 8ebef7b02f..640a9b9b57 100644 --- a/html/ops/db_update.php +++ b/html/ops/db_update.php @@ -241,6 +241,9 @@ function update_1_20a_2005() { mysql_query("alter table host add external_ip_addr varchar(254)"); } +function update_2_25_2005() { + mysql_query("alter table host add max_results_day integer not null"); + //update_10_25_2004(); ?> diff --git a/sched/handle_request.C b/sched/handle_request.C index 2be735f877..6a01f2f72b 100644 --- a/sched/handle_request.C +++ b/sched/handle_request.C @@ -417,7 +417,7 @@ int handle_results( // database and point srip to it. Quantities that MUST be // read from the DB are those where srip appears as an rval. // These are: id, name, server_state, received_time, hostid. - // // Quantities that must be WRITTEN to the DB are those for + // Quantities that must be WRITTEN to the DB are those for // which srip appears as an lval. These are: // hostid, // teamid, received_time, client_state, cpu_time, exit_status, @@ -535,6 +535,7 @@ int handle_results( "[RESULT#%d %s]: setting outcome SUCCESS\n", srip->id, srip->name ); + reply.got_good_result(); } else { log_messages.printf(SCHED_MSG_LOG::DEBUG, "[RESULT#%d %s]: client_state %d exit_status %d; setting outcome ERROR\n", @@ -542,6 +543,7 @@ int handle_results( ); srip->outcome = RESULT_OUTCOME_CLIENT_ERROR; srip->validate_state = VALIDATE_STATE_INVALID; + reply.got_bad_result(); } } // end of loop over all incoming results diff --git a/sched/sched_send.C b/sched/sched_send.C index 45242deb05..6ec2d8d00e 100644 --- a/sched/sched_send.C +++ b/sched/sched_send.C @@ -534,13 +534,15 @@ bool SCHEDULER_REPLY::work_needed(bool locality_sched) { } if (wreq.nresults >= config.max_wus_to_send) return false; if (config.daily_result_quota) { + if (host.max_results_day == 0) { + host.max_results_day = config.daily_result_quota; + } + // scale daily quota by #CPUs, up to a limit of 4 // - if (host.p_ncpus<4) { - wreq.daily_result_quota = host.p_ncpus*config.daily_result_quota; - } else { - wreq.daily_result_quota = 4*config.daily_result_quota; - } + int ncpus = host.p_ncpus; + if (ncpus > 4) ncpus = 4; + wreq.daily_result_quota = ncpus*host.max_results_day; if (host.nresults_today >= wreq.daily_result_quota) { wreq.daily_result_quota_exceeded = true; return false; @@ -549,6 +551,19 @@ bool SCHEDULER_REPLY::work_needed(bool locality_sched) { return true; } +void SCHEDULER_REPLY::got_good_result() { + host.max_results_day *= 2; + if (host.max_results_day > config.daily_result_quota) { + host.max_results_day = config.daily_result_quota; + } +} + +void SCHEDULER_REPLY::got_bad_result() { + host.max_results_day -= 1; + if (host.max_results_day < 1) { + host.max_results_day = 1; + } +} int add_result_to_reply( DB_RESULT& result, WORKUNIT& wu, SCHEDULER_REQUEST& request, diff --git a/sched/server_types.h b/sched/server_types.h index 0005af60fc..5d32d93222 100644 --- a/sched/server_types.h +++ b/sched/server_types.h @@ -186,6 +186,8 @@ struct SCHEDULER_REPLY { void insert_message(USER_MESSAGE&); bool work_needed(bool locality_sched=false); void set_delay(double); + void got_good_result(); // adjust max_results_day + void got_bad_result(); // adjust max_results_day }; #endif diff --git a/sched/transitioner.C b/sched/transitioner.C index 5e3cff318d..a5e546231e 100644 --- a/sched/transitioner.C +++ b/sched/transitioner.C @@ -62,7 +62,7 @@ int result_suffix(char* name) { } // The given result just timed out. -// Update the host's avg_turnaround. +// Update the host's avg_turnaround and max_results_day. // int penalize_host(int hostid, double delay_bound) { DB_HOST host; @@ -70,7 +70,14 @@ int penalize_host(int hostid, double delay_bound) { int retval = host.lookup_id(hostid); if (retval) return retval; compute_avg_turnaround(host, delay_bound); - sprintf(buf, "avg_turnaround=%f", host.avg_turnaround); + host.max_results_day -= 1; + if (host.max_results_day < 1) { + host.max_results_day = 1; + } + sprintf(buf, + "avg_turnaround=%f, max_results_day=%d", + host.avg_turnaround, host.max_results_day + ); return host.update_field(buf); }