scheduler: do a better job of accepting results from host machines

which may modify an EXISTING & OVER result in the database.

file_upload_handler: check for 'stop_upload' trigger file in the
project root directory.  If so, return ERR_TRANSIENT to hosts which
attempt uploads.  This is the equivalent of 'stop_sched' for the
scheduler.  (Eric, you may want to check that this is FCGI compatible).

svn path=/trunk/boinc/; revision=10227
This commit is contained in:
Bruce Allen 2006-05-31 20:39:32 +00:00
parent 210e22e537
commit 490d4858bf
5 changed files with 88 additions and 15 deletions

View File

@ -5257,3 +5257,22 @@ David 31 May 2006
boinc_db.C
sched/
sched_config.C
Bruce 31 May 2006
- scheduler: do a better job of accepting results from host machines
which may modify an EXISTING & OVER result in the database.
- file_upload_handler: check for 'stop_upload' trigger file in the
project root directory. If so, return ERR_TRANSIENT to hosts which
attempt uploads. This is the equivalent of 'stop_sched' for the
scheduler. (Eric, you may want to check that this is FCGI compatible).
db/
boinc_db.C
sched/
handle_request.C
file_upload_handler.C
sched_config.C

View File

@ -1308,6 +1308,7 @@ void SCHED_RESULT_ITEM::parse(MYSQL_ROW& r) {
userid = atoi(r[i++]);
sent_time = atoi(r[i++]);
received_time = atoi(r[i++]);
validate_state = atoi(r[i++]);
}
int DB_SCHED_RESULT_ITEM_SET::add_result(char* result_name) {
@ -1336,7 +1337,8 @@ int DB_SCHED_RESULT_ITEM_SET::enumerate() {
" hostid, "
" userid, "
" sent_time, "
" received_time "
" received_time, "
" validate_state "
"FROM "
" result "
"WHERE "

View File

@ -37,6 +37,7 @@
#include "parse.h"
#include "util.h"
#include "error_numbers.h"
#include "filesys.h"
#include "sched_config.h"
#include "sched_util.h"
@ -582,6 +583,11 @@ int main() {
log_messages.pid = getpid();
log_messages.set_debug_level(DEBUG_LEVEL);
if (boinc_file_exists("../stop_upload")) {
return_error(ERR_TRANSIENT, "Maintenance underway: file uploads are temporarily disabled.");
exit(1);
}
retval = config.parse_file("..");
if (retval) {
exit(1);

View File

@ -568,7 +568,7 @@ int handle_results(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) {
// read results from database into "result_handler".
// Quantities that must be read from the DB are those
// where srip (see below) appears as an rval.
// These are: id, name, server_state, received_time, hostid.
// These are: id, name, server_state, received_time, hostid, validate_state.
// Quantities that must be written to the DB are those for
// which srip appears as an lval. These are:
// hostid, teamid, received_time, client_state, cpu_time, exit_status,
@ -616,18 +616,64 @@ int handle_results(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) {
// If one of them fails, set srip->id = 0,
// which suppresses the DB update later on
//
if (srip->server_state == RESULT_SERVER_STATE_OVER && srip->outcome != RESULT_OUTCOME_NO_REPLY) {
log_messages.printf(
SCHED_MSG_LOG::MSG_CRITICAL,
"[HOST#%d] [RESULT#%d %s] result already over\n",
reply.host.id, srip->id, srip->name
);
// perhaps also send a message to the user saying
// that this result was already over?
srip->id = 0;
reply.result_acks.push_back(std::string(rp->name));
continue;
}
// If result is ALREADY over, do we replace it??
//
if (srip->server_state == RESULT_SERVER_STATE_OVER) {
char *dont_replace_result = NULL;
switch (srip->outcome) {
case RESULT_OUTCOME_INIT:
// should never happen!
dont_replace_result = "server shows no record of having sent this work";
break;
case RESULT_OUTCOME_SUCCESS:
// don't replace a successful result!
dont_replace_result = "successful result already reported for this result";
break;
case RESULT_OUTCOME_COULDNT_SEND:
// should never happen!
dont_replace_result = "server records show that this work was not sent (couldn't send)";
break;
case RESULT_OUTCOME_CLIENT_ERROR:
// result was previously cancelled on server side.
// keep this new, real result ONLY if validator has
// not already been invoked.
if (srip->validate_state != VALIDATE_STATE_INIT) {
dont_replace_result = "previous result reported as error, or canceled on server";
}
break;
case RESULT_OUTCOME_NO_REPLY:
// result is late in arriving, but keep it anyhow
break;
case RESULT_OUTCOME_DIDNT_NEED:
// should never happen
dont_replace_result = "server records show that this work was not sent (not needed)";
break;
case RESULT_OUTCOME_VALIDATE_ERROR:
// we already passed through the validator, so
// don't keep the new result
dont_replace_result = "server records show that an invalid result was already returned";
break;
default:
dont_replace_result = "server logic bug; please alert BOINC developers";
break;
}
if (dont_replace_result) {
char buf[256];
log_messages.printf(
SCHED_MSG_LOG::MSG_CRITICAL,
"[HOST#%d] [RESULT#%d %s] result already over [outcome=%d validate_state=%d]\n",
reply.host.id, srip->id, srip->name, srip->outcome, srip->validate_state
);
sprintf(buf, "Completed result %s refused: %s", srip->name, dont_replace_result);
USER_MESSAGE um(buf, "high");
reply.insert_message(um);
srip->id = 0;
reply.result_acks.push_back(std::string(rp->name));
continue;
}
}
if (srip->server_state == RESULT_SERVER_STATE_UNSENT) {
log_messages.printf(
SCHED_MSG_LOG::MSG_CRITICAL,

View File

@ -185,12 +185,12 @@ int SCHED_CONFIG::parse(char* buf) {
#endif
int SCHED_CONFIG::parse_file(const char* dir) {
char* p;
char path[256];
int retval;
sprintf(path, "%s/%s", dir, CONFIG_FILE);
#if 0
char* p;
retval = read_file_malloc(path, p);
if (retval) return retval;
retval = parse(p);