2005-01-20 23:22:22 +00:00
|
|
|
// Berkeley Open Infrastructure for Network Computing
|
|
|
|
// http://boinc.berkeley.edu
|
|
|
|
// Copyright (C) 2005 University of California
|
2004-07-13 12:55:22 +00:00
|
|
|
//
|
2005-01-20 23:22:22 +00:00
|
|
|
// This is free software; you can redistribute it and/or
|
|
|
|
// modify it under the terms of the GNU Lesser General Public
|
|
|
|
// License as published by the Free Software Foundation;
|
|
|
|
// either version 2.1 of the License, or (at your option) any later version.
|
2004-07-13 12:55:22 +00:00
|
|
|
//
|
2005-01-20 23:22:22 +00:00
|
|
|
// This software is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU Lesser General Public License for more details.
|
2004-07-13 12:55:22 +00:00
|
|
|
//
|
2005-01-20 23:22:22 +00:00
|
|
|
// To view the GNU Lesser General Public License visit
|
|
|
|
// http://www.gnu.org/copyleft/lesser.html
|
|
|
|
// or write to the Free Software Foundation, Inc.,
|
|
|
|
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
2004-07-13 12:55:22 +00:00
|
|
|
|
2005-02-22 20:12:31 +00:00
|
|
|
// validator - check and validate results, and grant credit
|
2004-07-13 12:55:22 +00:00
|
|
|
// -app appname
|
|
|
|
// [-d debug_level]
|
2005-02-22 20:12:31 +00:00
|
|
|
// [-one_pass_N_WU N] // Validate only N WU in one pass, then exit
|
|
|
|
// [-one_pass] // make one pass through WU table, then exit
|
|
|
|
// [-asynch] // fork, run in separate process
|
|
|
|
// [-mod n i] // process only WUs with (id mod n) == i
|
2007-04-13 04:22:20 +00:00
|
|
|
// [-max_granted_credit X] // limit maximum granted credit to X
|
|
|
|
// [-max_claimed_credit Y] // invalid if claims more than Y
|
|
|
|
// [-grant_claimed_credit] // just grant whatever is claimed
|
2004-07-13 12:55:22 +00:00
|
|
|
//
|
|
|
|
// This program must be linked with two project-specific functions:
|
2004-09-09 21:52:20 +00:00
|
|
|
// check_set() and check_pair().
|
|
|
|
// See doc/validate.php for a description.
|
2004-07-13 12:55:22 +00:00
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
2005-11-21 18:34:44 +00:00
|
|
|
#include "config.h"
|
2004-07-13 12:55:22 +00:00
|
|
|
#include <unistd.h>
|
2004-07-13 13:54:09 +00:00
|
|
|
#include <cmath>
|
2004-07-13 12:55:22 +00:00
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include "boinc_db.h"
|
|
|
|
#include "util.h"
|
2004-09-09 21:52:20 +00:00
|
|
|
#include "error_numbers.h"
|
2004-07-13 12:55:22 +00:00
|
|
|
#include "sched_config.h"
|
|
|
|
#include "sched_util.h"
|
|
|
|
#include "sched_msgs.h"
|
2007-01-12 17:42:29 +00:00
|
|
|
#include "validate_util.h"
|
2004-07-13 12:55:22 +00:00
|
|
|
|
|
|
|
#define LOCKFILE "validate.out"
|
|
|
|
#define PIDFILE "validate.pid"
|
|
|
|
|
2004-10-08 22:41:33 +00:00
|
|
|
#define SELECT_LIMIT 1000
|
2004-12-06 22:41:19 +00:00
|
|
|
#define SLEEP_PERIOD 5
|
2004-10-08 22:41:33 +00:00
|
|
|
|
2006-07-11 21:49:20 +00:00
|
|
|
int sleep_interval = SLEEP_PERIOD;
|
|
|
|
|
2005-09-10 06:09:55 +00:00
|
|
|
typedef enum {
|
|
|
|
NEVER,
|
|
|
|
DELAYED,
|
|
|
|
IMMEDIATE,
|
|
|
|
NO_CHANGE
|
|
|
|
} TRANSITION_TIME;
|
|
|
|
|
2004-09-09 21:52:20 +00:00
|
|
|
extern int check_set(
|
2004-10-08 22:41:33 +00:00
|
|
|
vector<RESULT>&, WORKUNIT& wu, int& canonical, double& credit,
|
2004-09-09 21:52:20 +00:00
|
|
|
bool& retry
|
|
|
|
);
|
|
|
|
extern int check_pair(
|
2004-09-09 22:25:10 +00:00
|
|
|
RESULT & new_result, RESULT const& canonical_result, bool& retry
|
2004-09-09 21:52:20 +00:00
|
|
|
);
|
2004-07-13 12:55:22 +00:00
|
|
|
|
|
|
|
SCHED_CONFIG config;
|
|
|
|
char app_name[256];
|
2005-02-22 20:12:31 +00:00
|
|
|
int wu_id_modulus=0;
|
|
|
|
int wu_id_remainder=0;
|
|
|
|
int one_pass_N_WU=0;
|
2005-07-17 19:52:44 +00:00
|
|
|
bool one_pass = false;
|
2006-11-28 03:27:13 +00:00
|
|
|
double max_granted_credit = 0;
|
|
|
|
double max_claimed_credit = 0;
|
|
|
|
bool grant_claimed_credit = false;
|
2005-02-22 20:12:31 +00:00
|
|
|
|
2004-07-13 12:55:22 +00:00
|
|
|
|
2006-12-29 22:16:08 +00:00
|
|
|
// here when a result has been validated and its granted_credit as been set.
|
2004-07-13 12:55:22 +00:00
|
|
|
// grant credit to host, user and team
|
|
|
|
//
|
2006-12-29 22:16:08 +00:00
|
|
|
int grant_credit(RESULT& result) {
|
2004-07-13 12:55:22 +00:00
|
|
|
DB_USER user;
|
|
|
|
DB_HOST host;
|
|
|
|
DB_TEAM team;
|
|
|
|
int retval;
|
2004-10-04 23:59:51 +00:00
|
|
|
char buf[256];
|
2004-07-13 12:55:22 +00:00
|
|
|
|
|
|
|
retval = host.lookup_id(result.hostid);
|
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(
|
2005-09-26 23:28:48 +00:00
|
|
|
SCHED_MSG_LOG::MSG_CRITICAL,
|
2004-07-13 12:55:22 +00:00
|
|
|
"[RESULT#%d] lookup of host %d failed %d\n",
|
|
|
|
result.id, result.hostid, retval
|
|
|
|
);
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
retval = user.lookup_id(host.userid);
|
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(
|
2005-09-26 23:28:48 +00:00
|
|
|
SCHED_MSG_LOG::MSG_CRITICAL,
|
2004-07-13 12:55:22 +00:00
|
|
|
"[RESULT#%d] lookup of user %d failed %d\n",
|
|
|
|
result.id, host.userid, retval
|
|
|
|
);
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
2007-01-12 17:42:29 +00:00
|
|
|
update_average(
|
|
|
|
result.sent_time, result.granted_credit, CREDIT_HALF_LIFE,
|
|
|
|
user.expavg_credit, user.expavg_time
|
|
|
|
);
|
2004-10-04 23:59:51 +00:00
|
|
|
sprintf(
|
2006-06-19 22:20:24 +00:00
|
|
|
buf, "total_credit=total_credit+%f, expavg_credit=%f, expavg_time=%f",
|
2006-12-29 22:16:08 +00:00
|
|
|
result.granted_credit, user.expavg_credit, user.expavg_time
|
2004-10-04 23:59:51 +00:00
|
|
|
);
|
|
|
|
retval = user.update_field(buf);
|
2004-07-13 12:55:22 +00:00
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(
|
2005-09-26 23:28:48 +00:00
|
|
|
SCHED_MSG_LOG::MSG_CRITICAL,
|
2004-07-13 12:55:22 +00:00
|
|
|
"[RESULT#%d] update of user %d failed %d\n",
|
|
|
|
result.id, host.userid, retval
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2006-12-29 22:16:08 +00:00
|
|
|
update_average(
|
|
|
|
result.sent_time, result.granted_credit, CREDIT_HALF_LIFE,
|
|
|
|
host.expavg_credit, host.expavg_time
|
|
|
|
);
|
2004-12-14 21:31:57 +00:00
|
|
|
|
|
|
|
double turnaround = result.received_time - result.sent_time;
|
2005-02-23 00:11:59 +00:00
|
|
|
compute_avg_turnaround(host, turnaround);
|
2006-09-11 11:41:54 +00:00
|
|
|
|
2007-01-12 17:42:29 +00:00
|
|
|
// compute new credit per CPU time
|
|
|
|
//
|
|
|
|
retval = update_credit_per_cpu_sec(
|
|
|
|
result.granted_credit, result.cpu_time, host.credit_per_cpu_sec
|
|
|
|
);
|
|
|
|
if (retval) {
|
2006-09-11 11:41:54 +00:00
|
|
|
log_messages.printf(
|
|
|
|
SCHED_MSG_LOG::MSG_CRITICAL,
|
|
|
|
"[RESULT#%d][HOST#%d] claimed too much credit (%f) in too little CPU time (%f)\n",
|
2006-12-29 22:16:08 +00:00
|
|
|
result.id, result.hostid, result.granted_credit, result.cpu_time
|
2006-09-11 11:41:54 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2004-10-04 23:59:51 +00:00
|
|
|
sprintf(
|
2004-12-14 21:31:57 +00:00
|
|
|
buf,
|
2006-09-11 11:41:54 +00:00
|
|
|
"total_credit=total_credit+%f, expavg_credit=%f, expavg_time=%f, avg_turnaround=%f, credit_per_cpu_sec=%f",
|
2006-12-29 22:16:08 +00:00
|
|
|
result.granted_credit, host.expavg_credit, host.expavg_time, host.avg_turnaround, host.credit_per_cpu_sec
|
2004-10-04 23:59:51 +00:00
|
|
|
);
|
|
|
|
retval = host.update_field(buf);
|
2004-07-13 12:55:22 +00:00
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(
|
2005-09-26 23:28:48 +00:00
|
|
|
SCHED_MSG_LOG::MSG_CRITICAL,
|
2004-07-13 12:55:22 +00:00
|
|
|
"[RESULT#%d] update of host %d failed %d\n",
|
|
|
|
result.id, result.hostid, retval
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (user.teamid) {
|
|
|
|
retval = team.lookup_id(user.teamid);
|
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(
|
2005-09-26 23:28:48 +00:00
|
|
|
SCHED_MSG_LOG::MSG_CRITICAL,
|
2004-07-13 12:55:22 +00:00
|
|
|
"[RESULT#%d] lookup of team %d failed %d\n",
|
|
|
|
result.id, user.teamid, retval
|
|
|
|
);
|
|
|
|
return retval;
|
|
|
|
}
|
2006-12-29 22:16:08 +00:00
|
|
|
update_average(result.sent_time, result.granted_credit, CREDIT_HALF_LIFE, team.expavg_credit, team.expavg_time);
|
2004-10-04 23:59:51 +00:00
|
|
|
sprintf(
|
2006-06-19 22:20:24 +00:00
|
|
|
buf, "total_credit=total_credit+%f, expavg_credit=%f, expavg_time=%f",
|
2006-12-29 22:16:08 +00:00
|
|
|
result.granted_credit, team.expavg_credit, team.expavg_time
|
2004-10-04 23:59:51 +00:00
|
|
|
);
|
|
|
|
retval = team.update_field(buf);
|
2004-07-13 12:55:22 +00:00
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(
|
2005-09-26 23:28:48 +00:00
|
|
|
SCHED_MSG_LOG::MSG_CRITICAL,
|
2004-07-13 12:55:22 +00:00
|
|
|
"[RESULT#%d] update of team %d failed %d\n",
|
|
|
|
result.id, team.id, retval
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2006-11-27 01:07:00 +00:00
|
|
|
// Return zero iff we resolved the WU
|
|
|
|
//
|
|
|
|
int handle_wu(
|
2004-10-08 23:07:59 +00:00
|
|
|
DB_VALIDATOR_ITEM_SET& validator, std::vector<VALIDATOR_ITEM>& items
|
|
|
|
) {
|
2005-09-10 06:09:55 +00:00
|
|
|
int canonical_result_index = -1;
|
2004-09-09 21:52:20 +00:00
|
|
|
bool update_result, retry;
|
2005-09-10 06:09:55 +00:00
|
|
|
TRANSITION_TIME transition_time = NO_CHANGE;
|
|
|
|
int retval = 0, canonicalid = 0, x;
|
2004-10-08 22:41:33 +00:00
|
|
|
double credit = 0;
|
2004-07-13 12:55:22 +00:00
|
|
|
unsigned int i;
|
2004-10-08 22:41:33 +00:00
|
|
|
|
2004-10-10 03:16:30 +00:00
|
|
|
WORKUNIT& wu = items[0].wu;
|
2004-10-08 23:07:59 +00:00
|
|
|
|
2004-10-08 23:59:44 +00:00
|
|
|
if (wu.canonical_resultid) {
|
2004-07-13 12:55:22 +00:00
|
|
|
log_messages.printf(
|
2005-09-26 23:28:48 +00:00
|
|
|
SCHED_MSG_LOG::MSG_NORMAL,
|
2004-10-08 22:41:33 +00:00
|
|
|
"[WU#%d %s] handle_wu(): Already has canonical result %d\n",
|
2004-10-08 23:59:44 +00:00
|
|
|
wu.id, wu.name, wu.canonical_resultid
|
2004-07-13 12:55:22 +00:00
|
|
|
);
|
|
|
|
++log_messages;
|
|
|
|
|
|
|
|
// Here if WU already has a canonical result.
|
|
|
|
// Get unchecked results and see if they match the canonical result
|
|
|
|
//
|
2004-10-08 22:41:33 +00:00
|
|
|
for (i=0; i<items.size(); i++) {
|
2004-10-10 03:16:30 +00:00
|
|
|
RESULT& result = items[i].res;
|
|
|
|
|
2004-09-09 21:52:20 +00:00
|
|
|
log_messages.printf(
|
2005-09-26 23:28:48 +00:00
|
|
|
SCHED_MSG_LOG::MSG_NORMAL,
|
2004-10-08 22:41:33 +00:00
|
|
|
"[WU#%d %s] handle_wu(): Analyzing result %d\n",
|
2004-10-10 03:16:30 +00:00
|
|
|
wu.id, wu.name, result.id
|
2004-10-08 22:41:33 +00:00
|
|
|
);
|
2004-10-10 03:16:30 +00:00
|
|
|
if (result.id == wu.canonical_resultid) {
|
|
|
|
canonical_result_index = i;
|
|
|
|
}
|
2004-10-08 22:41:33 +00:00
|
|
|
}
|
2004-11-15 01:58:30 +00:00
|
|
|
if (canonical_result_index == -1) {
|
2004-07-13 12:55:22 +00:00
|
|
|
log_messages.printf(
|
2005-09-26 23:28:48 +00:00
|
|
|
SCHED_MSG_LOG::MSG_CRITICAL,
|
2004-11-15 01:58:30 +00:00
|
|
|
"[WU#%d %s] Can't find canonical result %d\n",
|
2004-10-08 23:59:44 +00:00
|
|
|
wu.id, wu.name, wu.canonical_resultid
|
2004-07-13 12:55:22 +00:00
|
|
|
);
|
2006-11-27 01:07:00 +00:00
|
|
|
return 0;
|
2004-07-13 12:55:22 +00:00
|
|
|
}
|
|
|
|
|
2004-10-10 03:16:30 +00:00
|
|
|
RESULT& canonical_result = items[canonical_result_index].res;
|
|
|
|
|
2004-07-13 12:55:22 +00:00
|
|
|
// scan this WU's results, and check the unchecked ones
|
2004-10-10 03:16:30 +00:00
|
|
|
//
|
2004-10-08 22:41:33 +00:00
|
|
|
for (i=0; i<items.size(); i++) {
|
2004-10-10 03:16:30 +00:00
|
|
|
RESULT& result = items[i].res;
|
2004-10-08 22:41:33 +00:00
|
|
|
|
2004-12-14 00:57:03 +00:00
|
|
|
if (result.server_state != RESULT_SERVER_STATE_OVER) continue;
|
|
|
|
if (result.outcome != RESULT_OUTCOME_SUCCESS) continue;
|
|
|
|
switch (result.validate_state) {
|
|
|
|
case VALIDATE_STATE_INIT:
|
|
|
|
case VALIDATE_STATE_INCONCLUSIVE:
|
|
|
|
break;
|
|
|
|
default:
|
2004-10-08 22:41:33 +00:00
|
|
|
continue;
|
2004-10-10 03:16:30 +00:00
|
|
|
}
|
2004-10-08 22:41:33 +00:00
|
|
|
|
2006-11-27 01:07:00 +00:00
|
|
|
check_pair(result, canonical_result, retry);
|
2005-09-10 06:09:55 +00:00
|
|
|
if (retry) transition_time = DELAYED;
|
2004-09-09 21:52:20 +00:00
|
|
|
update_result = false;
|
2004-09-10 20:33:05 +00:00
|
|
|
|
2004-10-13 21:02:43 +00:00
|
|
|
if (result.outcome == RESULT_OUTCOME_VALIDATE_ERROR) {
|
2004-09-10 20:33:05 +00:00
|
|
|
update_result = true;
|
|
|
|
}
|
2004-11-15 01:58:30 +00:00
|
|
|
|
2004-12-14 00:57:03 +00:00
|
|
|
// this might be last result, so let validator
|
|
|
|
// trigger file delete etc. if needed
|
2004-11-15 01:58:30 +00:00
|
|
|
//
|
2005-09-10 06:09:55 +00:00
|
|
|
transition_time = IMMEDIATE;
|
2004-11-15 01:58:30 +00:00
|
|
|
|
2004-09-09 21:52:20 +00:00
|
|
|
switch (result.validate_state) {
|
|
|
|
case VALIDATE_STATE_VALID:
|
|
|
|
update_result = true;
|
2006-11-28 03:27:13 +00:00
|
|
|
result.granted_credit = grant_claimed_credit ? result.claimed_credit : wu.canonical_credit;
|
|
|
|
if (max_granted_credit && result.granted_credit > max_granted_credit) {
|
|
|
|
result.granted_credit = max_granted_credit;
|
|
|
|
}
|
2004-09-09 21:52:20 +00:00
|
|
|
log_messages.printf(
|
2005-09-26 23:28:48 +00:00
|
|
|
SCHED_MSG_LOG::MSG_NORMAL,
|
2004-09-09 21:52:20 +00:00
|
|
|
"[RESULT#%d %s] pair_check() matched: setting result to valid; credit %f\n",
|
|
|
|
result.id, result.name, result.granted_credit
|
|
|
|
);
|
2006-12-29 22:16:08 +00:00
|
|
|
retval = grant_credit(result);
|
2004-09-09 21:52:20 +00:00
|
|
|
if (retval) {
|
2004-07-13 12:55:22 +00:00
|
|
|
log_messages.printf(
|
2005-09-26 23:28:48 +00:00
|
|
|
SCHED_MSG_LOG::MSG_NORMAL,
|
2004-09-09 21:52:20 +00:00
|
|
|
"[RESULT#%d %s] Can't grant credit: %d\n",
|
|
|
|
result.id, result.name, retval
|
2004-07-13 12:55:22 +00:00
|
|
|
);
|
|
|
|
}
|
2004-09-09 21:52:20 +00:00
|
|
|
break;
|
|
|
|
case VALIDATE_STATE_INVALID:
|
|
|
|
update_result = true;
|
2004-07-13 12:55:22 +00:00
|
|
|
log_messages.printf(
|
2005-09-26 23:28:48 +00:00
|
|
|
SCHED_MSG_LOG::MSG_NORMAL,
|
2004-09-09 21:52:20 +00:00
|
|
|
"[RESULT#%d %s] pair_check() didn't match: setting result to invalid\n",
|
|
|
|
result.id, result.name
|
2004-07-13 12:55:22 +00:00
|
|
|
);
|
2004-09-09 21:52:20 +00:00
|
|
|
}
|
|
|
|
if (update_result) {
|
2004-11-15 01:58:30 +00:00
|
|
|
log_messages.printf(
|
2005-09-26 23:28:48 +00:00
|
|
|
SCHED_MSG_LOG::MSG_NORMAL,
|
2004-11-12 23:36:24 +00:00
|
|
|
"[RESULT#%d %s] granted_credit %f\n",
|
2004-10-10 03:16:30 +00:00
|
|
|
result.id, result.name, result.granted_credit
|
2004-10-04 23:59:51 +00:00
|
|
|
);
|
|
|
|
|
2004-10-08 22:41:33 +00:00
|
|
|
retval = validator.update_result(result);
|
2004-09-09 21:52:20 +00:00
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(
|
2005-09-26 23:28:48 +00:00
|
|
|
SCHED_MSG_LOG::MSG_CRITICAL,
|
2004-09-09 21:52:20 +00:00
|
|
|
"[RESULT#%d %s] Can't update result: %d\n",
|
|
|
|
result.id, result.name, retval
|
|
|
|
);
|
|
|
|
}
|
2004-07-13 12:55:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
vector<RESULT> results;
|
2004-12-10 22:04:20 +00:00
|
|
|
int nsuccess_results;
|
2004-07-13 12:55:22 +00:00
|
|
|
|
|
|
|
// Here if WU doesn't have a canonical result yet.
|
|
|
|
// Try to get one
|
|
|
|
|
|
|
|
log_messages.printf(
|
2005-09-26 23:28:48 +00:00
|
|
|
SCHED_MSG_LOG::MSG_NORMAL,
|
2004-10-10 03:16:30 +00:00
|
|
|
"[WU#%d %s] handle_wu(): No canonical result yet\n",
|
|
|
|
wu.id, wu.name
|
2004-07-13 12:55:22 +00:00
|
|
|
);
|
|
|
|
++log_messages;
|
|
|
|
|
2004-12-10 22:04:20 +00:00
|
|
|
// make a vector of only successful results
|
2004-11-15 01:58:30 +00:00
|
|
|
//
|
2004-10-08 22:41:33 +00:00
|
|
|
for (i=0; i<items.size(); i++) {
|
2004-10-10 03:16:30 +00:00
|
|
|
RESULT& result = items[i].res;
|
2004-10-08 22:41:33 +00:00
|
|
|
|
2004-12-10 22:04:20 +00:00
|
|
|
if ((result.server_state == RESULT_SERVER_STATE_OVER) &&
|
2004-11-15 01:58:30 +00:00
|
|
|
(result.outcome == RESULT_OUTCOME_SUCCESS)
|
2004-10-10 03:16:30 +00:00
|
|
|
) {
|
2004-11-15 01:58:30 +00:00
|
|
|
results.push_back(result);
|
2004-10-10 03:16:30 +00:00
|
|
|
}
|
2004-10-08 22:41:33 +00:00
|
|
|
|
2004-07-13 12:55:22 +00:00
|
|
|
}
|
2004-10-08 22:41:33 +00:00
|
|
|
|
2004-07-13 12:55:22 +00:00
|
|
|
log_messages.printf(
|
2005-09-26 23:28:48 +00:00
|
|
|
SCHED_MSG_LOG::MSG_DEBUG, "[WU#%d %s] Found %d successful results\n",
|
2004-10-10 03:16:30 +00:00
|
|
|
wu.id, wu.name, (int)results.size()
|
2004-07-13 12:55:22 +00:00
|
|
|
);
|
2004-10-10 03:16:30 +00:00
|
|
|
if (results.size() >= (unsigned int)wu.min_quorum) {
|
2004-07-13 12:55:22 +00:00
|
|
|
log_messages.printf(
|
2005-09-26 23:28:48 +00:00
|
|
|
SCHED_MSG_LOG::MSG_DEBUG,
|
2004-10-10 03:16:30 +00:00
|
|
|
"[WU#%d %s] Enough for quorum, checking set.\n",
|
|
|
|
wu.id, wu.name
|
2004-07-13 12:55:22 +00:00
|
|
|
);
|
2004-10-08 22:41:33 +00:00
|
|
|
|
2004-09-09 21:52:20 +00:00
|
|
|
retval = check_set(results, wu, canonicalid, credit, retry);
|
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(
|
2005-09-26 23:28:48 +00:00
|
|
|
SCHED_MSG_LOG::MSG_CRITICAL,
|
2004-10-08 22:41:33 +00:00
|
|
|
"[WU#%d %s] check_set returned %d, exiting\n",
|
2004-10-10 03:16:30 +00:00
|
|
|
wu.id, wu.name, retval
|
2004-09-09 21:52:20 +00:00
|
|
|
);
|
2006-11-27 01:07:00 +00:00
|
|
|
return retval;
|
2004-09-09 21:52:20 +00:00
|
|
|
}
|
2005-09-10 06:09:55 +00:00
|
|
|
if (retry) transition_time = DELAYED;
|
2004-09-10 20:33:05 +00:00
|
|
|
|
2004-12-10 22:04:20 +00:00
|
|
|
// scan results.
|
|
|
|
// update as needed, and count the # of results
|
|
|
|
// that are still outcome=SUCCESS
|
|
|
|
// (some may have changed to VALIDATE_ERROR)
|
2004-09-10 20:33:05 +00:00
|
|
|
//
|
2004-12-10 22:04:20 +00:00
|
|
|
nsuccess_results = 0;
|
2004-09-10 20:33:05 +00:00
|
|
|
for (i=0; i<results.size(); i++) {
|
2004-11-15 01:58:30 +00:00
|
|
|
update_result = false;
|
2004-10-10 03:16:30 +00:00
|
|
|
RESULT& result = results[i];
|
2004-10-13 21:02:43 +00:00
|
|
|
if (result.outcome == RESULT_OUTCOME_VALIDATE_ERROR) {
|
2005-09-10 06:09:55 +00:00
|
|
|
transition_time = IMMEDIATE;
|
2004-11-15 01:58:30 +00:00
|
|
|
update_result = true;
|
2004-12-10 22:04:20 +00:00
|
|
|
} else {
|
|
|
|
nsuccess_results++;
|
2004-11-15 01:58:30 +00:00
|
|
|
}
|
|
|
|
|
2004-12-14 00:57:03 +00:00
|
|
|
switch (result.validate_state) {
|
|
|
|
case VALIDATE_STATE_VALID:
|
|
|
|
// grant credit for valid results
|
|
|
|
//
|
2004-11-15 01:58:30 +00:00
|
|
|
update_result = true;
|
2006-12-29 22:16:08 +00:00
|
|
|
result.granted_credit = grant_claimed_credit ? result.claimed_credit : credit;
|
2006-11-28 03:27:13 +00:00
|
|
|
if (max_granted_credit && result.granted_credit > max_granted_credit) {
|
|
|
|
result.granted_credit = max_granted_credit;
|
|
|
|
}
|
2006-12-29 22:16:08 +00:00
|
|
|
retval = grant_credit(result);
|
2004-11-15 01:58:30 +00:00
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(
|
2005-09-26 23:28:48 +00:00
|
|
|
SCHED_MSG_LOG::MSG_DEBUG,
|
2004-11-15 01:58:30 +00:00
|
|
|
"[RESULT#%d %s] grant_credit() failed: %d\n",
|
|
|
|
result.id, result.name, retval
|
|
|
|
);
|
|
|
|
}
|
|
|
|
log_messages.printf(
|
2005-09-26 23:28:48 +00:00
|
|
|
SCHED_MSG_LOG::MSG_NORMAL,
|
2004-11-15 01:58:30 +00:00
|
|
|
"[RESULT#%d %s] Granted %f credit to valid result [HOST#%d]\n",
|
|
|
|
result.id, result.name, result.granted_credit, result.hostid
|
|
|
|
);
|
2004-12-14 00:57:03 +00:00
|
|
|
break;
|
|
|
|
case VALIDATE_STATE_INVALID:
|
|
|
|
update_result = true;
|
|
|
|
break;
|
|
|
|
case VALIDATE_STATE_INIT:
|
|
|
|
result.validate_state = VALIDATE_STATE_INCONCLUSIVE;
|
|
|
|
update_result = true;
|
|
|
|
break;
|
2004-11-15 01:58:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (update_result) {
|
2004-10-08 22:41:33 +00:00
|
|
|
retval = validator.update_result(result);
|
2004-09-10 20:33:05 +00:00
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(
|
2005-09-26 23:28:48 +00:00
|
|
|
SCHED_MSG_LOG::MSG_CRITICAL,
|
2004-09-10 20:33:05 +00:00
|
|
|
"[RESULT#%d %s] result.update() failed: %d\n",
|
|
|
|
result.id, result.name, retval
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2004-11-15 01:58:30 +00:00
|
|
|
|
2004-09-09 21:52:20 +00:00
|
|
|
if (canonicalid) {
|
2005-09-10 06:09:55 +00:00
|
|
|
// if we found a canonical result,
|
|
|
|
// trigger the assimilator, but do NOT trigger
|
|
|
|
// the transitioner - doing so creates a race condition
|
|
|
|
//
|
|
|
|
transition_time = NEVER;
|
2004-07-13 12:55:22 +00:00
|
|
|
log_messages.printf(
|
2005-09-26 23:28:48 +00:00
|
|
|
SCHED_MSG_LOG::MSG_DEBUG,
|
2004-07-13 12:55:22 +00:00
|
|
|
"[WU#%d %s] Found a canonical result: id=%d\n",
|
2004-10-10 03:16:30 +00:00
|
|
|
wu.id, wu.name, canonicalid
|
2004-07-13 12:55:22 +00:00
|
|
|
);
|
2004-10-10 03:16:30 +00:00
|
|
|
wu.canonical_resultid = canonicalid;
|
|
|
|
wu.canonical_credit = credit;
|
|
|
|
wu.assimilate_state = ASSIMILATE_READY;
|
2004-07-13 12:55:22 +00:00
|
|
|
|
2004-09-09 21:52:20 +00:00
|
|
|
// If found a canonical result, don't send any unsent results
|
2004-10-10 03:16:30 +00:00
|
|
|
//
|
2004-10-08 22:41:33 +00:00
|
|
|
for (i=0; i<items.size(); i++) {
|
2004-10-10 03:16:30 +00:00
|
|
|
RESULT& result = items[i].res;
|
2004-10-08 22:41:33 +00:00
|
|
|
|
2004-11-15 01:58:30 +00:00
|
|
|
if (result.server_state != RESULT_SERVER_STATE_UNSENT) {
|
2004-10-08 22:41:33 +00:00
|
|
|
continue;
|
2004-10-10 03:16:30 +00:00
|
|
|
}
|
2004-10-08 22:41:33 +00:00
|
|
|
|
2004-07-13 12:55:22 +00:00
|
|
|
result.server_state = RESULT_SERVER_STATE_OVER;
|
|
|
|
result.outcome = RESULT_OUTCOME_DIDNT_NEED;
|
2004-10-08 22:41:33 +00:00
|
|
|
retval = validator.update_result(result);
|
2004-07-13 12:55:22 +00:00
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(
|
2005-09-26 23:28:48 +00:00
|
|
|
SCHED_MSG_LOG::MSG_CRITICAL,
|
2004-07-13 12:55:22 +00:00
|
|
|
"[RESULT#%d %s] result.update() failed: %d\n",
|
|
|
|
result.id, result.name, retval
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
2004-12-10 22:04:20 +00:00
|
|
|
// here if no consensus.
|
|
|
|
|
2004-12-01 05:46:04 +00:00
|
|
|
// check if #success results is too large
|
2004-07-13 12:55:22 +00:00
|
|
|
//
|
2004-12-10 22:04:20 +00:00
|
|
|
if (nsuccess_results > wu.max_success_results) {
|
2004-10-10 03:16:30 +00:00
|
|
|
wu.error_mask |= WU_ERROR_TOO_MANY_SUCCESS_RESULTS;
|
2005-09-10 06:09:55 +00:00
|
|
|
transition_time = IMMEDIATE;
|
2004-07-13 12:55:22 +00:00
|
|
|
}
|
2004-12-10 22:04:20 +00:00
|
|
|
|
|
|
|
// if #success results == than target_nresults,
|
|
|
|
// we need more results, so bump target_nresults
|
|
|
|
// NOTE: nsuccess_results should never be > target_nresults,
|
|
|
|
// but accommodate that if it should happen
|
2004-12-01 05:46:04 +00:00
|
|
|
//
|
2004-12-10 22:04:20 +00:00
|
|
|
if (nsuccess_results >= wu.target_nresults) {
|
|
|
|
wu.target_nresults = nsuccess_results+1;
|
2005-09-10 06:09:55 +00:00
|
|
|
transition_time = IMMEDIATE;
|
2004-12-01 05:46:04 +00:00
|
|
|
}
|
2004-07-13 12:55:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
--log_messages;
|
|
|
|
|
2005-09-10 06:09:55 +00:00
|
|
|
switch (transition_time) {
|
|
|
|
case IMMEDIATE:
|
2004-10-10 03:16:30 +00:00
|
|
|
wu.transition_time = time(0);
|
2005-09-10 06:09:55 +00:00
|
|
|
break;
|
|
|
|
case DELAYED:
|
|
|
|
x = time(0) + 6*3600;
|
2004-10-10 03:16:30 +00:00
|
|
|
if (x < wu.transition_time) wu.transition_time = x;
|
2005-09-10 06:09:55 +00:00
|
|
|
break;
|
|
|
|
case NEVER:
|
|
|
|
wu.transition_time = INT_MAX;
|
2005-10-23 07:19:03 +00:00
|
|
|
break;
|
|
|
|
case NO_CHANGE:
|
|
|
|
break;
|
2004-09-09 21:52:20 +00:00
|
|
|
}
|
2004-07-13 12:55:22 +00:00
|
|
|
|
2004-11-12 23:36:24 +00:00
|
|
|
wu.need_validate = 0;
|
2004-10-13 21:02:43 +00:00
|
|
|
|
2004-10-08 22:41:33 +00:00
|
|
|
retval = validator.update_workunit(wu);
|
2004-07-13 12:55:22 +00:00
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(
|
2005-09-26 23:28:48 +00:00
|
|
|
SCHED_MSG_LOG::MSG_CRITICAL,
|
2004-10-10 03:16:30 +00:00
|
|
|
"[WU#%d %s] update_workunit() failed: %d; exiting\n",
|
|
|
|
wu.id, wu.name, retval
|
2004-07-13 12:55:22 +00:00
|
|
|
);
|
2006-11-27 01:07:00 +00:00
|
|
|
return retval;
|
2004-07-13 12:55:22 +00:00
|
|
|
}
|
2006-11-27 01:07:00 +00:00
|
|
|
return 0;
|
2004-07-13 12:55:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// make one pass through the workunits with need_validate set.
|
|
|
|
// return true if there were any
|
|
|
|
//
|
|
|
|
bool do_validate_scan(APP& app) {
|
2004-10-08 22:41:33 +00:00
|
|
|
DB_VALIDATOR_ITEM_SET validator;
|
|
|
|
std::vector<VALIDATOR_ITEM> items;
|
2004-07-13 12:55:22 +00:00
|
|
|
bool found=false;
|
2005-02-22 20:12:31 +00:00
|
|
|
int retval;
|
2004-07-13 12:55:22 +00:00
|
|
|
|
2004-10-08 22:41:33 +00:00
|
|
|
// loop over entries that need to be checked
|
|
|
|
//
|
2005-02-22 20:12:31 +00:00
|
|
|
while (1) {
|
|
|
|
retval = validator.enumerate(
|
|
|
|
app.id, one_pass_N_WU?one_pass_N_WU:SELECT_LIMIT,
|
|
|
|
wu_id_modulus, wu_id_remainder,
|
|
|
|
items
|
|
|
|
);
|
|
|
|
if (retval) break;
|
2006-11-27 01:07:00 +00:00
|
|
|
retval = handle_wu(validator, items);
|
|
|
|
if (!retval) found = true;
|
2004-07-13 12:55:22 +00:00
|
|
|
}
|
|
|
|
return found;
|
|
|
|
}
|
|
|
|
|
2005-07-17 19:52:44 +00:00
|
|
|
int main_loop() {
|
2004-07-13 12:55:22 +00:00
|
|
|
int retval;
|
|
|
|
DB_APP app;
|
|
|
|
bool did_something;
|
|
|
|
char buf[256];
|
|
|
|
|
|
|
|
retval = boinc_db.open(config.db_name, config.db_host, config.db_user, config.db_passwd);
|
|
|
|
if (retval) {
|
2005-09-26 23:28:48 +00:00
|
|
|
log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "boinc_db.open failed: %d\n", retval);
|
2004-07-13 12:55:22 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
sprintf(buf, "where name='%s'", app_name);
|
|
|
|
retval = app.lookup(buf);
|
|
|
|
if (retval) {
|
2005-09-26 23:28:48 +00:00
|
|
|
log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "can't find app %s\n", app_name);
|
2004-07-13 12:55:22 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
check_stop_daemons();
|
|
|
|
did_something = do_validate_scan(app);
|
|
|
|
if (!did_something) {
|
2005-07-17 19:52:44 +00:00
|
|
|
if (one_pass) break;
|
2006-07-11 21:49:20 +00:00
|
|
|
sleep(sleep_interval);
|
2004-07-13 12:55:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2004-11-18 13:44:58 +00:00
|
|
|
// For use by user routines check_set() and check_match() that link to
|
|
|
|
// this code.
|
|
|
|
int boinc_validator_debuglevel=0;
|
2004-07-13 12:55:22 +00:00
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
|
|
|
int i, retval;
|
2005-07-17 19:52:44 +00:00
|
|
|
bool asynch = false;
|
2004-07-13 12:55:22 +00:00
|
|
|
|
2005-03-19 16:03:21 +00:00
|
|
|
#if 0
|
|
|
|
int mypid=getpid();
|
|
|
|
char debugcmd[512];
|
|
|
|
sprintf(debugcmd, "ddd %s %d &", argv[0], mypid);
|
|
|
|
system(debugcmd);
|
|
|
|
sleep(30);
|
|
|
|
#endif
|
|
|
|
|
2007-01-15 00:19:37 +00:00
|
|
|
const char *usage =
|
|
|
|
"\nUsage: %s -app <app-name> [OPTIONS]\n"
|
|
|
|
"Start validator for application <app-name>\n\n"
|
|
|
|
"Optional arguments:\n"
|
|
|
|
" -one_pass_N_WU N Validate at most N WUs, then exit\n"
|
|
|
|
" -one_pass Make one pass through WU table, then exit\n"
|
|
|
|
" -mod n i Process only WUs with (id mod n) == i\n"
|
|
|
|
" -max_claimed_credit X If a result claims more credit than this, mark it as invalid\n"
|
|
|
|
" -max_granted_credit X Grant no more than this amount of credit to a result\n"
|
|
|
|
" -grant_claimed_credit Grant the claimed credit, regardless of what other results for this workunit claimed\n"
|
|
|
|
" -asynch fork, run in separate process\n"
|
|
|
|
" -sleep_interval n Set sleep-interval to n\n"
|
2007-01-15 00:23:37 +00:00
|
|
|
" -d level Set debug-level\n\n";
|
2007-01-15 00:19:37 +00:00
|
|
|
|
|
|
|
if ( (argc > 1) && ( !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help") ) ) {
|
|
|
|
printf (usage, argv[0] );
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2004-07-13 12:55:22 +00:00
|
|
|
check_stop_daemons();
|
|
|
|
|
|
|
|
for (i=1; i<argc; i++) {
|
|
|
|
if (!strcmp(argv[i], "-asynch")) {
|
|
|
|
asynch = true;
|
2005-01-03 17:18:32 +00:00
|
|
|
} else if (!strcmp(argv[i], "-one_pass_N_WU")) {
|
|
|
|
one_pass_N_WU = atoi(argv[++i]);
|
|
|
|
one_pass = true;
|
2006-07-11 21:49:20 +00:00
|
|
|
} else if (!strcmp(argv[i], "-sleep_interval")) {
|
|
|
|
sleep_interval = atoi(argv[++i]);
|
2004-07-13 12:55:22 +00:00
|
|
|
} else if (!strcmp(argv[i], "-one_pass")) {
|
|
|
|
one_pass = true;
|
|
|
|
} else if (!strcmp(argv[i], "-app")) {
|
|
|
|
strcpy(app_name, argv[++i]);
|
|
|
|
} else if (!strcmp(argv[i], "-d")) {
|
2004-12-14 00:57:03 +00:00
|
|
|
boinc_validator_debuglevel=atoi(argv[++i]);
|
2004-11-18 13:44:58 +00:00
|
|
|
log_messages.set_debug_level(boinc_validator_debuglevel);
|
2005-02-22 20:12:31 +00:00
|
|
|
} else if (!strcmp(argv[i], "-mod")) {
|
|
|
|
wu_id_modulus = atoi(argv[++i]);
|
|
|
|
wu_id_remainder = atoi(argv[++i]);
|
2006-11-28 03:27:13 +00:00
|
|
|
} else if (!strcmp(argv[i], "-max_granted_credit")) {
|
|
|
|
max_granted_credit = atof(argv[++i]);
|
|
|
|
} else if (!strcmp(argv[i], "-max_claimed_credit")) {
|
|
|
|
max_claimed_credit = atof(argv[++i]);
|
|
|
|
} else if (!strcmp(argv[i], "-grant_claimed_credit")) {
|
|
|
|
grant_claimed_credit = true;
|
2004-07-13 12:55:22 +00:00
|
|
|
} else {
|
2007-01-15 00:19:37 +00:00
|
|
|
fprintf(stderr, "Invalid option '%s'\nTry `%s --help` for more information\n", argv[i], argv[0]);
|
2005-09-26 23:28:48 +00:00
|
|
|
log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "unrecognized arg: %s\n", argv[i]);
|
2004-12-14 00:57:03 +00:00
|
|
|
exit(1);
|
2004-07-13 12:55:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-01-15 00:19:37 +00:00
|
|
|
// -app is required
|
|
|
|
if ( app_name[0] == 0 ) {
|
|
|
|
fprintf (stderr, "\nERROR: use '-app' to specify the application to run the validator for.\n");
|
|
|
|
printf (usage, argv[0] );
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2004-07-13 12:55:22 +00:00
|
|
|
retval = config.parse_file("..");
|
|
|
|
if (retval) {
|
2005-09-26 23:28:48 +00:00
|
|
|
log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL,
|
2004-07-13 12:55:22 +00:00
|
|
|
"Can't parse config file: %d\n", retval
|
|
|
|
);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (asynch) {
|
|
|
|
if (fork()) {
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-09-26 23:28:48 +00:00
|
|
|
log_messages.printf(SCHED_MSG_LOG::MSG_NORMAL, "Starting validator\n");
|
2005-02-22 20:12:31 +00:00
|
|
|
if (wu_id_modulus) {
|
2005-09-26 23:28:48 +00:00
|
|
|
log_messages.printf(SCHED_MSG_LOG::MSG_NORMAL,
|
2005-02-22 20:12:31 +00:00
|
|
|
"Modulus %d, remainder %d\n", wu_id_modulus, wu_id_remainder
|
|
|
|
);
|
|
|
|
}
|
2004-07-13 12:55:22 +00:00
|
|
|
|
|
|
|
install_stop_signal_handler();
|
|
|
|
|
2005-07-17 19:52:44 +00:00
|
|
|
main_loop();
|
2004-07-13 12:55:22 +00:00
|
|
|
}
|
2004-12-08 00:40:19 +00:00
|
|
|
|
2005-01-02 18:29:53 +00:00
|
|
|
const char *BOINC_RCSID_634dbda0b9 = "$Id$";
|