diff --git a/checkin_notes b/checkin_notes index 643a9d8728..e06113d909 100755 --- a/checkin_notes +++ b/checkin_notes @@ -12220,3 +12220,39 @@ Rytis 4 Nov 2006 forum_email.inc project.sample/ project.inc + +David 4 Nov 2006 + - Fix systemic bug where clean-install client requests work + before doing benchmarks, and uses default benchmarks, + which are extremely low (1e7 flops). + The scheduler says "you're not fast enough" and + tells it to back off for an extremely long time + (20% of its estimated_delay, as calculated based on the low benchmarks). + Fixed this by: + 1) client: change default benchmarks to 1e9 (1 GFLOPS) + 2) server: don't request backoff in this case + - This led to an examination of server-requested backoff. + In some cases the server was requesting ridiculous backoff. + E.g. if a client didn't have enough available RAM + (a function of both actual RAM and RAM-related prefs) + it would always request a 48-hour backoff. + The naive user changes their prefs, and then waits for 47.9 hours. + I think it's better to err on the side of not requesting backoff, + and relying on the client's exponential backoff + in situations where it asks for work and doesn't get any. + + Also, the backoff values were hardwired throughout the + scheduler source code - bad programming practice. + So, I collected all the backoff values as #defines in main.h. + Mostly these are the same, but for the case where + we didn't send work because of CPU/disk/mem inadequacy, + the server now doesn't request additional backoff + (i.e., rely on the client's backoff mechanism) + + client/ + cs_benchmark.C + sched/ + handle_request.C + main.h + sched_locality.C + sched_send.C diff --git a/client/cs_benchmark.C b/client/cs_benchmark.C index ea67cc1096..f52cf99703 100644 --- a/client/cs_benchmark.C +++ b/client/cs_benchmark.C @@ -69,9 +69,9 @@ // defaults in case benchmarks fail or time out. // better to err on the low side so hosts don't get too much work -#define DEFAULT_FPOPS 1e7 -#define DEFAULT_IOPS 1e7 -#define DEFAULT_MEMBW 1e8 +#define DEFAULT_FPOPS 1e9 +#define DEFAULT_IOPS 1e9 +#define DEFAULT_MEMBW 1e9 #define DEFAULT_CACHE 1e6 #define FP_START 2 diff --git a/doc/addons.php b/doc/addons.php index 7b253237ca..b4c8630eed 100644 --- a/doc/addons.php +++ b/doc/addons.php @@ -49,7 +49,7 @@ function show_group($name, $list) { if (strstr($file, 'http://')) { $url = $file; } else { - $url = "addons/$file"; + $url = "http://boinc.berkeley.edu/addons/$file"; } echo " $itemname @@ -70,7 +70,7 @@ array('BoincLogX_Setup_v1.51.exe', 'BoincLogX', '1.51', 'BoincLogX creates detai array('BoincSpy_455.zip', 'BoincSpy', '1.0 (RC1)', 'Views BOINC project statistics'), array('boincview.zip', 'BoincView', '1.2.5', 'Advanced BOINC manager for networks'), array('setup.exe', 'CPDNSpy', '', 'Stats/Benchmarkprogram. Only for CPDN!!!'), -array('sahuserstats.xpi', 'SAH User Statistics', '', 'An externsion for Firefox (may work under Mozilla) that will display your stats for SAH'), +array('sahuserstats.xpi', 'SAH User Statistics', '', 'An extension for Firefox (may work under Mozilla) that will display your stats for SAH'), array('SETI_at_BOINCWatch_1.10.15.exe', 'SETI@BOINCWatch', '1.10.15', 'A SETI@Home/BOINC client watcher! User statistics powered by BOINCStats'), array('SETIatBOINCWatch(.NET2.0).exe', 'SETI@BOINCWatch(.NET 2.0)', '3.0.24 BETA', 'A SETI@Home/BOINC client watcher! User statistics powered by BOINCStats. But now built with the .NET environment.'), array('SETIatBOINCWatch(.NET).exe', 'SETI@BOINCWatch(.NET)', '2.0.24 BETA', 'A SETI@Home/BOINC client watcher! User statistics powered by BOINCStats. But now built with the .NET environment.'), diff --git a/sched/handle_request.C b/sched/handle_request.C index 1835a5799a..aa1ccffab6 100644 --- a/sched/handle_request.C +++ b/sched/handle_request.C @@ -230,7 +230,7 @@ int authenticate_user(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) { "high" ); reply.insert_message(um); - reply.set_delay(3600); + reply.set_delay(DELAY_MISSING_KEY); reply.nucleus_only = true; log_messages.printf( SCHED_MSG_LOG::MSG_CRITICAL, @@ -286,7 +286,7 @@ lookup_user_and_make_new_host: "low" ); reply.insert_message(um); - reply.set_delay(3600); + reply.set_delay(DELAY_MISSING_KEY); log_messages.printf( SCHED_MSG_LOG::MSG_CRITICAL, "[HOST#] Bad authenticator '%s'\n", @@ -1026,10 +1026,12 @@ bool unacceptable_os( "Unacceptable OS %s %s\n", sreq.host.os_name, sreq.host.os_version ); - USER_MESSAGE um("Project only supports MacOS Darwin versions 7.X and above", - "low"); + USER_MESSAGE um( + "Project only supports MacOS Darwin versions 7.X and above", + "low" + ); reply.insert_message(um); - reply.set_delay(3600*24); + reply.set_delay(DELAY_UNACCEPTABLE_OS); return true; } return false; @@ -1069,7 +1071,7 @@ bool wrong_core_client_version( if (wrong_version) { USER_MESSAGE um(msg, "low"); reply.insert_message(um); - reply.set_delay(3600*24); + reply.set_delay(DELAY_BAD_CLIENT_VERSION); return true; } return false; @@ -1175,7 +1177,7 @@ void process_request( ) { USER_MESSAGE um("No work available", "low"); reply.insert_message(um); - reply.set_delay(3600); + reply.set_delay(DELAY_NO_WORK_SKIP); if (!config.msg_to_host) { log_messages.printf( SCHED_MSG_LOG::MSG_NORMAL, "No work - skipping DB access\n" @@ -1270,7 +1272,7 @@ void process_request( SCHED_MSG_LOG::MSG_CRITICAL, "[HOST#%d] platform '%s' not found\n", reply.host.id, sreq.platform_name ); - reply.set_delay(3600*24); + reply.set_delay(DELAY_PLATFORM_UNSUPPORTED); goto leave; } diff --git a/sched/main.h b/sched/main.h index 17c10e8585..3bd182ae6a 100644 --- a/sched/main.h +++ b/sched/main.h @@ -22,6 +22,36 @@ #include "synch.h" #include "server_types.h" +// various delay params. +// Any of these could be moved into SCHED_CONFIG, if projects need control. + +#define DELAY_MISSING_KEY 3600 + // account key missing or invalid +#define DELAY_UNACCEPTABLE_OS 3600*24 + // Darwin 5.x or 6.x (E@h only) +#define DELAY_BAD_CLIENT_VERSION 3600*24 + // client version < config.min_core_client_version +#define DELAY_NO_WORK_SKIP 0 + // no work, config.nowork_skip is set + // Rely on the client's exponential backoff in this case +#define DELAY_PLATFORM_UNSUPPORTED 3600*24 + // platform not in our DB +#define DELAY_DISK_SPACE 3600 + // too little disk space or prefs (locality scheduling) +#define DELAY_DELETE_FILE 3600*4 + // wait for client to delete a file (locality scheduling) +#define DELAY_ANONYMOUS 3600*4 + // anonymous platform client doesn't have version +#define DELAY_NO_WORK_TEMP 0 + // client asked for work but we didn't send any, + // because of a reason that could be fixed by user + // (e.g. prefs, or run BOINC more) + // Rely on the client's exponential backoff in this case +#define DELAY_NO_WORK_PERM 3600*24 + // client asked for work but we didn't send any, + // because of a reason not easily changed + // (like wrong kind of computer) + extern SCHED_CONFIG config; extern GUI_URLS gui_urls; extern PROJECT_FILES project_files; diff --git a/sched/sched_locality.C b/sched/sched_locality.C index ab5281a736..9c723c3ec5 100644 --- a/sched/sched_locality.C +++ b/sched/sched_locality.C @@ -87,7 +87,7 @@ int delete_file_from_host(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply) { } USER_MESSAGE um(buf, "high"); sreply.insert_message(um); - sreply.set_delay(24*3600); + sreply.set_delay(DELAY_DISK_SPACE); return 1; } @@ -116,7 +116,7 @@ int delete_file_from_host(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply) { sprintf(buf, "BOINC will delete file %s when no longer needed", fi.name); USER_MESSAGE um(buf, "low"); sreply.insert_message(um); - sreply.set_delay(4*3600); + sreply.set_delay(DELAY_DELETE_FILE); return 0; } @@ -307,7 +307,7 @@ static int possibly_send_result( sprintf(help_msg_buf, "To get more %s work, finish current work, stop BOINC, remove app_info.xml file, and restart.", config.long_name); USER_MESSAGE um(help_msg_buf, "high"); reply.insert_message(um); - reply.set_delay(4*3600); + reply.set_delay(DELAY_ANONYMOUS); } if (retval) return ERR_NO_APP_VERSION; diff --git a/sched/sched_send.C b/sched/sched_send.C index ba9f97dfc8..5b207c79c3 100644 --- a/sched/sched_send.C +++ b/sched/sched_send.C @@ -408,7 +408,7 @@ int wu_is_infeasible( } reply.wreq.insufficient_mem = true; reason |= INFEASIBLE_MEM; - reply.set_delay(24*3600); + reply.set_delay(DELAY_NO_WORK_TEMP); } if (wu.rsc_disk_bound > reply.wreq.disk_available) { @@ -427,7 +427,6 @@ int wu_is_infeasible( "[WU#%d %s] needs %d seconds on [HOST#%d]; delay_bound is %d (request.estimated_delay is %f)\n", wu.id, wu.name, (int)ewd, reply.host.id, wu.delay_bound, request.estimated_delay ); - reply.set_delay(0.2*request.estimated_delay); reply.wreq.insufficient_speed = true; reason |= INFEASIBLE_CPU; } @@ -910,13 +909,13 @@ int send_work( ); if (reply.wreq.nresults == 0) { - reply.set_delay(3600); + reply.set_delay(DELAY_NO_WORK_TEMP); USER_MESSAGE um2("No work sent", "high"); reply.insert_message(um2); if (reply.wreq.no_app_version) { USER_MESSAGE um("(there was work for other platforms)", "high"); reply.insert_message(um); - reply.set_delay(3600*24); + reply.set_delay(DELAY_NO_WORK_PERM); } if (reply.wreq.no_allowed_apps_available) { USER_MESSAGE um( @@ -937,7 +936,6 @@ int send_work( "(there was work but your computer doesn't have enough memory)", "high" ); - reply.set_delay(24*3600); reply.insert_message(um); } if (reply.wreq.insufficient_speed) { @@ -979,7 +977,7 @@ int send_work( "high" ); reply.insert_message(um); - reply.set_delay(3600*24); + reply.set_delay(DELAY_NO_WORK_PERM); log_messages.printf( SCHED_MSG_LOG::MSG_NORMAL, "Not sending work because core client is outdated\n"