mirror of https://github.com/BOINC/boinc.git
*** empty log message ***
svn path=/trunk/boinc/; revision=11472
This commit is contained in:
parent
cc0eae7bc6
commit
60e224aa18
|
@ -12220,3 +12220,39 @@ Rytis 4 Nov 2006
|
|||
forum_email.inc
|
||||
project.sample/
|
||||
project.inc
|
||||
|
||||
David 4 Nov 2006
|
||||
- Fix systemic bug where clean-install client requests work
|
||||
before doing benchmarks, and uses default benchmarks,
|
||||
which are extremely low (1e7 flops).
|
||||
The scheduler says "you're not fast enough" and
|
||||
tells it to back off for an extremely long time
|
||||
(20% of its estimated_delay, as calculated based on the low benchmarks).
|
||||
Fixed this by:
|
||||
1) client: change default benchmarks to 1e9 (1 GFLOPS)
|
||||
2) server: don't request backoff in this case
|
||||
- This led to an examination of server-requested backoff.
|
||||
In some cases the server was requesting ridiculous backoff.
|
||||
E.g. if a client didn't have enough available RAM
|
||||
(a function of both actual RAM and RAM-related prefs)
|
||||
it would always request a 48-hour backoff.
|
||||
The naive user changes their prefs, and then waits for 47.9 hours.
|
||||
I think it's better to err on the side of not requesting backoff,
|
||||
and relying on the client's exponential backoff
|
||||
in situations where it asks for work and doesn't get any.
|
||||
|
||||
Also, the backoff values were hardwired throughout the
|
||||
scheduler source code - bad programming practice.
|
||||
So, I collected all the backoff values as #defines in main.h.
|
||||
Mostly these are the same, but for the case where
|
||||
we didn't send work because of CPU/disk/mem inadequacy,
|
||||
the server now doesn't request additional backoff
|
||||
(i.e., rely on the client's backoff mechanism)
|
||||
|
||||
client/
|
||||
cs_benchmark.C
|
||||
sched/
|
||||
handle_request.C
|
||||
main.h
|
||||
sched_locality.C
|
||||
sched_send.C
|
||||
|
|
|
@ -69,9 +69,9 @@
|
|||
// defaults in case benchmarks fail or time out.
|
||||
// better to err on the low side so hosts don't get too much work
|
||||
|
||||
#define DEFAULT_FPOPS 1e7
|
||||
#define DEFAULT_IOPS 1e7
|
||||
#define DEFAULT_MEMBW 1e8
|
||||
#define DEFAULT_FPOPS 1e9
|
||||
#define DEFAULT_IOPS 1e9
|
||||
#define DEFAULT_MEMBW 1e9
|
||||
#define DEFAULT_CACHE 1e6
|
||||
|
||||
#define FP_START 2
|
||||
|
|
|
@ -49,7 +49,7 @@ function show_group($name, $list) {
|
|||
if (strstr($file, 'http://')) {
|
||||
$url = $file;
|
||||
} else {
|
||||
$url = "addons/$file";
|
||||
$url = "http://boinc.berkeley.edu/addons/$file";
|
||||
}
|
||||
echo "
|
||||
<tr><td><a href=$url>$itemname</a></td>
|
||||
|
@ -70,7 +70,7 @@ array('BoincLogX_Setup_v1.51.exe', 'BoincLogX', '1.51', 'BoincLogX creates detai
|
|||
array('BoincSpy_455.zip', 'BoincSpy', '1.0 (RC1)', 'Views BOINC project statistics'),
|
||||
array('boincview.zip', 'BoincView', '1.2.5', 'Advanced BOINC manager for networks'),
|
||||
array('setup.exe', 'CPDNSpy', '', 'Stats/Benchmarkprogram. Only for CPDN!!!'),
|
||||
array('sahuserstats.xpi', 'SAH User Statistics', '', 'An externsion for Firefox (may work under Mozilla) that will display your stats for SAH'),
|
||||
array('sahuserstats.xpi', 'SAH User Statistics', '', 'An extension for Firefox (may work under Mozilla) that will display your stats for SAH'),
|
||||
array('SETI_at_BOINCWatch_1.10.15.exe', 'SETI@BOINCWatch', '1.10.15', 'A SETI@Home/BOINC client watcher! User statistics powered by BOINCStats'),
|
||||
array('SETIatBOINCWatch(.NET2.0).exe', 'SETI@BOINCWatch(.NET 2.0)', '3.0.24 BETA', 'A SETI@Home/BOINC client watcher! User statistics powered by BOINCStats. But now built with the .NET environment.'),
|
||||
array('SETIatBOINCWatch(.NET).exe', 'SETI@BOINCWatch(.NET)', '2.0.24 BETA', 'A SETI@Home/BOINC client watcher! User statistics powered by BOINCStats. But now built with the .NET environment.'),
|
||||
|
|
|
@ -230,7 +230,7 @@ int authenticate_user(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) {
|
|||
"high"
|
||||
);
|
||||
reply.insert_message(um);
|
||||
reply.set_delay(3600);
|
||||
reply.set_delay(DELAY_MISSING_KEY);
|
||||
reply.nucleus_only = true;
|
||||
log_messages.printf(
|
||||
SCHED_MSG_LOG::MSG_CRITICAL,
|
||||
|
@ -286,7 +286,7 @@ lookup_user_and_make_new_host:
|
|||
"low"
|
||||
);
|
||||
reply.insert_message(um);
|
||||
reply.set_delay(3600);
|
||||
reply.set_delay(DELAY_MISSING_KEY);
|
||||
log_messages.printf(
|
||||
SCHED_MSG_LOG::MSG_CRITICAL,
|
||||
"[HOST#<none>] Bad authenticator '%s'\n",
|
||||
|
@ -1026,10 +1026,12 @@ bool unacceptable_os(
|
|||
"Unacceptable OS %s %s\n",
|
||||
sreq.host.os_name, sreq.host.os_version
|
||||
);
|
||||
USER_MESSAGE um("Project only supports MacOS Darwin versions 7.X and above",
|
||||
"low");
|
||||
USER_MESSAGE um(
|
||||
"Project only supports MacOS Darwin versions 7.X and above",
|
||||
"low"
|
||||
);
|
||||
reply.insert_message(um);
|
||||
reply.set_delay(3600*24);
|
||||
reply.set_delay(DELAY_UNACCEPTABLE_OS);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -1069,7 +1071,7 @@ bool wrong_core_client_version(
|
|||
if (wrong_version) {
|
||||
USER_MESSAGE um(msg, "low");
|
||||
reply.insert_message(um);
|
||||
reply.set_delay(3600*24);
|
||||
reply.set_delay(DELAY_BAD_CLIENT_VERSION);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -1175,7 +1177,7 @@ void process_request(
|
|||
) {
|
||||
USER_MESSAGE um("No work available", "low");
|
||||
reply.insert_message(um);
|
||||
reply.set_delay(3600);
|
||||
reply.set_delay(DELAY_NO_WORK_SKIP);
|
||||
if (!config.msg_to_host) {
|
||||
log_messages.printf(
|
||||
SCHED_MSG_LOG::MSG_NORMAL, "No work - skipping DB access\n"
|
||||
|
@ -1270,7 +1272,7 @@ void process_request(
|
|||
SCHED_MSG_LOG::MSG_CRITICAL, "[HOST#%d] platform '%s' not found\n",
|
||||
reply.host.id, sreq.platform_name
|
||||
);
|
||||
reply.set_delay(3600*24);
|
||||
reply.set_delay(DELAY_PLATFORM_UNSUPPORTED);
|
||||
goto leave;
|
||||
}
|
||||
|
||||
|
|
30
sched/main.h
30
sched/main.h
|
@ -22,6 +22,36 @@
|
|||
#include "synch.h"
|
||||
#include "server_types.h"
|
||||
|
||||
// various delay params.
|
||||
// Any of these could be moved into SCHED_CONFIG, if projects need control.
|
||||
|
||||
#define DELAY_MISSING_KEY 3600
|
||||
// account key missing or invalid
|
||||
#define DELAY_UNACCEPTABLE_OS 3600*24
|
||||
// Darwin 5.x or 6.x (E@h only)
|
||||
#define DELAY_BAD_CLIENT_VERSION 3600*24
|
||||
// client version < config.min_core_client_version
|
||||
#define DELAY_NO_WORK_SKIP 0
|
||||
// no work, config.nowork_skip is set
|
||||
// Rely on the client's exponential backoff in this case
|
||||
#define DELAY_PLATFORM_UNSUPPORTED 3600*24
|
||||
// platform not in our DB
|
||||
#define DELAY_DISK_SPACE 3600
|
||||
// too little disk space or prefs (locality scheduling)
|
||||
#define DELAY_DELETE_FILE 3600*4
|
||||
// wait for client to delete a file (locality scheduling)
|
||||
#define DELAY_ANONYMOUS 3600*4
|
||||
// anonymous platform client doesn't have version
|
||||
#define DELAY_NO_WORK_TEMP 0
|
||||
// client asked for work but we didn't send any,
|
||||
// because of a reason that could be fixed by user
|
||||
// (e.g. prefs, or run BOINC more)
|
||||
// Rely on the client's exponential backoff in this case
|
||||
#define DELAY_NO_WORK_PERM 3600*24
|
||||
// client asked for work but we didn't send any,
|
||||
// because of a reason not easily changed
|
||||
// (like wrong kind of computer)
|
||||
|
||||
extern SCHED_CONFIG config;
|
||||
extern GUI_URLS gui_urls;
|
||||
extern PROJECT_FILES project_files;
|
||||
|
|
|
@ -87,7 +87,7 @@ int delete_file_from_host(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply) {
|
|||
}
|
||||
USER_MESSAGE um(buf, "high");
|
||||
sreply.insert_message(um);
|
||||
sreply.set_delay(24*3600);
|
||||
sreply.set_delay(DELAY_DISK_SPACE);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -116,7 +116,7 @@ int delete_file_from_host(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply) {
|
|||
sprintf(buf, "BOINC will delete file %s when no longer needed", fi.name);
|
||||
USER_MESSAGE um(buf, "low");
|
||||
sreply.insert_message(um);
|
||||
sreply.set_delay(4*3600);
|
||||
sreply.set_delay(DELAY_DELETE_FILE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -307,7 +307,7 @@ static int possibly_send_result(
|
|||
sprintf(help_msg_buf, "To get more %s work, finish current work, stop BOINC, remove app_info.xml file, and restart.", config.long_name);
|
||||
USER_MESSAGE um(help_msg_buf, "high");
|
||||
reply.insert_message(um);
|
||||
reply.set_delay(4*3600);
|
||||
reply.set_delay(DELAY_ANONYMOUS);
|
||||
}
|
||||
|
||||
if (retval) return ERR_NO_APP_VERSION;
|
||||
|
|
|
@ -408,7 +408,7 @@ int wu_is_infeasible(
|
|||
}
|
||||
reply.wreq.insufficient_mem = true;
|
||||
reason |= INFEASIBLE_MEM;
|
||||
reply.set_delay(24*3600);
|
||||
reply.set_delay(DELAY_NO_WORK_TEMP);
|
||||
}
|
||||
|
||||
if (wu.rsc_disk_bound > reply.wreq.disk_available) {
|
||||
|
@ -427,7 +427,6 @@ int wu_is_infeasible(
|
|||
"[WU#%d %s] needs %d seconds on [HOST#%d]; delay_bound is %d (request.estimated_delay is %f)\n",
|
||||
wu.id, wu.name, (int)ewd, reply.host.id, wu.delay_bound, request.estimated_delay
|
||||
);
|
||||
reply.set_delay(0.2*request.estimated_delay);
|
||||
reply.wreq.insufficient_speed = true;
|
||||
reason |= INFEASIBLE_CPU;
|
||||
}
|
||||
|
@ -910,13 +909,13 @@ int send_work(
|
|||
);
|
||||
|
||||
if (reply.wreq.nresults == 0) {
|
||||
reply.set_delay(3600);
|
||||
reply.set_delay(DELAY_NO_WORK_TEMP);
|
||||
USER_MESSAGE um2("No work sent", "high");
|
||||
reply.insert_message(um2);
|
||||
if (reply.wreq.no_app_version) {
|
||||
USER_MESSAGE um("(there was work for other platforms)", "high");
|
||||
reply.insert_message(um);
|
||||
reply.set_delay(3600*24);
|
||||
reply.set_delay(DELAY_NO_WORK_PERM);
|
||||
}
|
||||
if (reply.wreq.no_allowed_apps_available) {
|
||||
USER_MESSAGE um(
|
||||
|
@ -937,7 +936,6 @@ int send_work(
|
|||
"(there was work but your computer doesn't have enough memory)",
|
||||
"high"
|
||||
);
|
||||
reply.set_delay(24*3600);
|
||||
reply.insert_message(um);
|
||||
}
|
||||
if (reply.wreq.insufficient_speed) {
|
||||
|
@ -979,7 +977,7 @@ int send_work(
|
|||
"high"
|
||||
);
|
||||
reply.insert_message(um);
|
||||
reply.set_delay(3600*24);
|
||||
reply.set_delay(DELAY_NO_WORK_PERM);
|
||||
log_messages.printf(
|
||||
SCHED_MSG_LOG::MSG_NORMAL,
|
||||
"Not sending work because core client is outdated\n"
|
||||
|
|
Loading…
Reference in New Issue