2008-08-06 18:36:30 +00:00
|
|
|
// This file is part of BOINC.
|
2005-01-20 23:22:22 +00:00
|
|
|
// http://boinc.berkeley.edu
|
2008-08-06 18:36:30 +00:00
|
|
|
// Copyright (C) 2008 University of California
|
2004-07-13 12:55:22 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is free software; you can redistribute it and/or modify it
|
|
|
|
// under the terms of the GNU Lesser General Public License
|
|
|
|
// as published by the Free Software Foundation,
|
|
|
|
// either version 3 of the License, or (at your option) any later version.
|
2004-07-13 12:55:22 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is distributed in the hope that it will be useful,
|
2005-01-20 23:22:22 +00:00
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU Lesser General Public License for more details.
|
2004-07-13 12:55:22 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
2004-07-13 12:55:22 +00:00
|
|
|
|
2005-02-22 20:12:31 +00:00
|
|
|
// validator - check and validate results, and grant credit
|
2012-04-10 00:32:35 +00:00
|
|
|
//
|
|
|
|
// Must be linked with two functions
|
|
|
|
// check_set(): find a canonical result from a set of results
|
|
|
|
// check_pair(): compare a result with a canonical result
|
|
|
|
//
|
|
|
|
// We recommend that you use the versions of these in validate_util2.cpp,
|
|
|
|
// in which case you have to supply 3 simpler functions
|
|
|
|
// init_result()
|
|
|
|
// compare_results()
|
|
|
|
// cleanup_result()
|
|
|
|
|
2010-04-05 18:59:16 +00:00
|
|
|
// --app appname
|
2011-09-13 21:01:42 +00:00
|
|
|
// [-d N] [--debug_level N] log verbosity (1=least, 4=most)
|
|
|
|
// [--one_pass_N_WU N] Validate only N WU in one pass, then exit
|
|
|
|
// [--one_pass] make one pass through WU table, then exit
|
|
|
|
// [--mod n i] process only WUs with (id mod n) == i
|
|
|
|
// [--max_granted_credit X] limit maximum granted credit to X
|
|
|
|
// [--update_credited_job] add userid/wuid pair to credited_job table
|
|
|
|
//
|
|
|
|
// credit options. The default is to grant credit using an
|
|
|
|
// adaptive scheme that provides devices neutrality
|
|
|
|
//
|
|
|
|
// [--no_credit] don't grant credit
|
|
|
|
// Use this, e.g., if using trickles for credit
|
|
|
|
// [--credit_from_wu] get credit from WU XML
|
|
|
|
// [--credit_from_runtime X] grant credit based on runtime,
|
|
|
|
// assuming single-CPU app.
|
|
|
|
// X is the max runtime.
|
2004-07-13 12:55:22 +00:00
|
|
|
|
2005-11-21 18:34:44 +00:00
|
|
|
#include "config.h"
|
2004-07-13 12:55:22 +00:00
|
|
|
#include <unistd.h>
|
2007-12-21 21:09:40 +00:00
|
|
|
#include <climits>
|
2004-07-13 13:54:09 +00:00
|
|
|
#include <cmath>
|
2004-07-13 12:55:22 +00:00
|
|
|
#include <vector>
|
2008-02-27 23:26:38 +00:00
|
|
|
#include <cstdlib>
|
|
|
|
#include <string>
|
2008-06-01 03:43:47 +00:00
|
|
|
#include <signal.h>
|
2004-07-13 12:55:22 +00:00
|
|
|
|
|
|
|
#include "boinc_db.h"
|
|
|
|
#include "util.h"
|
2007-05-11 16:30:13 +00:00
|
|
|
#include "str_util.h"
|
2004-09-09 21:52:20 +00:00
|
|
|
#include "error_numbers.h"
|
2010-07-16 07:15:57 +00:00
|
|
|
#include "svn_version.h"
|
2011-09-13 21:01:42 +00:00
|
|
|
#include "common_defs.h"
|
2007-05-11 16:30:13 +00:00
|
|
|
|
2009-08-13 03:35:26 +00:00
|
|
|
#include "credit.h"
|
2004-07-13 12:55:22 +00:00
|
|
|
#include "sched_config.h"
|
|
|
|
#include "sched_util.h"
|
|
|
|
#include "sched_msgs.h"
|
2008-08-21 20:58:32 +00:00
|
|
|
#include "validator.h"
|
2007-01-12 17:42:29 +00:00
|
|
|
#include "validate_util.h"
|
2010-03-29 22:28:20 +00:00
|
|
|
#include "validate_util2.h"
|
2008-09-22 17:52:41 +00:00
|
|
|
#ifdef GCL_SIMULATOR
|
|
|
|
#include "gcl_simulator.h"
|
|
|
|
#endif
|
2004-07-13 12:55:22 +00:00
|
|
|
|
|
|
|
#define LOCKFILE "validate.out"
|
|
|
|
#define PIDFILE "validate.pid"
|
|
|
|
|
2004-10-08 22:41:33 +00:00
|
|
|
#define SELECT_LIMIT 1000
|
2004-12-06 22:41:19 +00:00
|
|
|
#define SLEEP_PERIOD 5
|
2004-10-08 22:41:33 +00:00
|
|
|
|
2006-07-11 21:49:20 +00:00
|
|
|
int sleep_interval = SLEEP_PERIOD;
|
|
|
|
|
2005-09-10 06:09:55 +00:00
|
|
|
typedef enum {
|
|
|
|
NEVER,
|
|
|
|
DELAYED,
|
|
|
|
IMMEDIATE,
|
|
|
|
NO_CHANGE
|
|
|
|
} TRANSITION_TIME;
|
|
|
|
|
2004-07-13 12:55:22 +00:00
|
|
|
char app_name[256];
|
2008-06-04 23:04:12 +00:00
|
|
|
DB_APP app;
|
2005-02-22 20:12:31 +00:00
|
|
|
int wu_id_modulus=0;
|
|
|
|
int wu_id_remainder=0;
|
|
|
|
int one_pass_N_WU=0;
|
2005-07-17 19:52:44 +00:00
|
|
|
bool one_pass = false;
|
2012-06-29 22:24:07 +00:00
|
|
|
double max_granted_credit = 200 * 1000 * 365;
|
|
|
|
// limit credit to 1 TeraFLOP-year
|
2007-05-02 23:17:52 +00:00
|
|
|
bool update_credited_job = false;
|
2007-05-04 17:59:50 +00:00
|
|
|
bool credit_from_wu = false;
|
2011-09-13 21:01:42 +00:00
|
|
|
bool credit_from_runtime = false;
|
|
|
|
double max_runtime = 0;
|
2011-09-13 05:23:10 +00:00
|
|
|
bool no_credit = false;
|
2011-09-13 21:01:42 +00:00
|
|
|
|
2008-08-21 20:58:32 +00:00
|
|
|
WORKUNIT* g_wup;
|
2010-03-29 22:28:20 +00:00
|
|
|
vector<DB_APP_VERSION> app_versions;
|
|
|
|
// cache of app_versions; used by v2 credit system
|
2008-06-01 03:43:47 +00:00
|
|
|
|
2008-06-04 23:04:12 +00:00
|
|
|
bool is_unreplicated(WORKUNIT& wu) {
|
|
|
|
return (wu.target_nresults == 1 && app.target_nresults > 1);
|
2008-06-01 03:43:47 +00:00
|
|
|
}
|
2005-02-22 20:12:31 +00:00
|
|
|
|
2010-03-29 22:28:20 +00:00
|
|
|
// Here when a result has been validated.
|
2010-04-21 19:33:20 +00:00
|
|
|
// - update consecutive_valid
|
2010-03-29 22:28:20 +00:00
|
|
|
// - udpdate turnaround stats
|
|
|
|
// - insert credited_job record if needed
|
2004-07-13 12:55:22 +00:00
|
|
|
//
|
2012-06-29 22:24:07 +00:00
|
|
|
int is_valid(
|
|
|
|
DB_HOST& host, RESULT& result, WORKUNIT& wu, DB_HOST_APP_VERSION& hav
|
|
|
|
) {
|
2007-05-02 23:17:52 +00:00
|
|
|
DB_CREDITED_JOB credited_job;
|
2004-07-13 12:55:22 +00:00
|
|
|
int retval;
|
|
|
|
|
2004-12-14 21:31:57 +00:00
|
|
|
double turnaround = result.received_time - result.sent_time;
|
2005-02-23 00:11:59 +00:00
|
|
|
compute_avg_turnaround(host, turnaround);
|
2006-09-11 11:41:54 +00:00
|
|
|
|
- back end: change "daily result quota" mechanism.
Old: config.xml specifies an initial daily quota (say, 100).
Each host_app_version starts out with this quota.
On the return of a SUCCESS result,
the quota is doubled, up to the initial value.
On the return of an error result, or a timeout,
the quota is decremented down to 1.
Problem:
Doesn't accommodate hosts that can do more than 100 jobs/day.
New: similar, but
- on validation of a job, daily quota is incremented.
- on invalidation of a job, daily quota is decremented.
- on return of an error result, or a timeout,
daily quota is min'd with initial quota, then decremented.
Notes:
- This allows a host to have an unboundedly large quota
as long as it continues to return more valid
than invalid results.
- Even with this change, hosts that return SUCCESS but
invalid results will continue to get the initial daily quota.
It would be desirable to reduce their quota to 1.
svn path=/trunk/boinc/; revision=21675
2010-06-02 00:11:01 +00:00
|
|
|
// increment daily quota
|
|
|
|
//
|
|
|
|
hav.max_jobs_per_day++;
|
|
|
|
|
2010-04-21 19:33:20 +00:00
|
|
|
// increment consecutive_valid, but only if unreplicated
|
2010-03-29 22:28:20 +00:00
|
|
|
//
|
2008-06-04 23:04:12 +00:00
|
|
|
if (!is_unreplicated(wu)) {
|
2010-04-21 19:33:20 +00:00
|
|
|
hav.consecutive_valid++;
|
2010-03-29 22:28:20 +00:00
|
|
|
log_messages.printf(MSG_DEBUG,
|
2010-04-21 19:33:20 +00:00
|
|
|
"[HAV#%d] consecutive valid now %d\n",
|
|
|
|
hav.app_version_id, hav.consecutive_valid
|
2004-07-13 12:55:22 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2007-05-02 23:17:52 +00:00
|
|
|
if (update_credited_job) {
|
2009-08-13 03:35:26 +00:00
|
|
|
credited_job.userid = host.userid;
|
2007-05-02 23:17:52 +00:00
|
|
|
credited_job.workunitid = long(wu.opaque);
|
|
|
|
retval = credited_job.insert();
|
2007-05-02 18:51:51 +00:00
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2010-11-08 17:51:57 +00:00
|
|
|
"[RESULT#%d] Warning: credited_job insert failed (userid: %d workunit: %f err: %s)\n",
|
|
|
|
result.id, host.userid, wu.opaque, boincerror(retval)
|
2007-05-02 18:51:51 +00:00
|
|
|
);
|
|
|
|
} else {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_DEBUG,
|
2007-06-20 22:34:06 +00:00
|
|
|
"[RESULT#%d %s] added credited_job record [WU#%d OPAQUE#%f USER#%d]\n",
|
2009-08-13 03:35:26 +00:00
|
|
|
result.id, result.name, wu.id, wu.opaque, host.userid
|
2007-05-04 17:59:50 +00:00
|
|
|
);
|
2007-05-02 18:51:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2004-07-13 12:55:22 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-04-21 19:33:20 +00:00
|
|
|
static inline void is_invalid(DB_HOST_APP_VERSION& hav) {
|
|
|
|
hav.consecutive_valid = 0;
|
- back end: change "daily result quota" mechanism.
Old: config.xml specifies an initial daily quota (say, 100).
Each host_app_version starts out with this quota.
On the return of a SUCCESS result,
the quota is doubled, up to the initial value.
On the return of an error result, or a timeout,
the quota is decremented down to 1.
Problem:
Doesn't accommodate hosts that can do more than 100 jobs/day.
New: similar, but
- on validation of a job, daily quota is incremented.
- on invalidation of a job, daily quota is decremented.
- on return of an error result, or a timeout,
daily quota is min'd with initial quota, then decremented.
Notes:
- This allows a host to have an unboundedly large quota
as long as it continues to return more valid
than invalid results.
- Even with this change, hosts that return SUCCESS but
invalid results will continue to get the initial daily quota.
It would be desirable to reduce their quota to 1.
svn path=/trunk/boinc/; revision=21675
2010-06-02 00:11:01 +00:00
|
|
|
if (hav.max_jobs_per_day > config.daily_result_quota) {
|
|
|
|
hav.max_jobs_per_day--;
|
|
|
|
}
|
2007-04-24 23:21:42 +00:00
|
|
|
}
|
|
|
|
|
2010-03-29 22:28:20 +00:00
|
|
|
// handle a workunit which has new results
|
2006-11-27 01:07:00 +00:00
|
|
|
//
|
|
|
|
int handle_wu(
|
2004-10-08 23:07:59 +00:00
|
|
|
DB_VALIDATOR_ITEM_SET& validator, std::vector<VALIDATOR_ITEM>& items
|
2009-08-10 04:22:02 +00:00
|
|
|
) {
|
2005-09-10 06:09:55 +00:00
|
|
|
int canonical_result_index = -1;
|
2004-09-09 21:52:20 +00:00
|
|
|
bool update_result, retry;
|
2005-09-10 06:09:55 +00:00
|
|
|
TRANSITION_TIME transition_time = NO_CHANGE;
|
|
|
|
int retval = 0, canonicalid = 0, x;
|
2011-09-13 21:01:42 +00:00
|
|
|
double credit = 0;
|
2004-07-13 12:55:22 +00:00
|
|
|
unsigned int i;
|
2004-10-08 22:41:33 +00:00
|
|
|
|
2004-10-10 03:16:30 +00:00
|
|
|
WORKUNIT& wu = items[0].wu;
|
2008-08-21 20:58:32 +00:00
|
|
|
g_wup = &wu;
|
2004-10-08 23:07:59 +00:00
|
|
|
|
2004-10-08 23:59:44 +00:00
|
|
|
if (wu.canonical_resultid) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2008-05-29 20:11:43 +00:00
|
|
|
"[WU#%d %s] Already has canonical result %d\n",
|
2004-10-08 23:59:44 +00:00
|
|
|
wu.id, wu.name, wu.canonical_resultid
|
2004-07-13 12:55:22 +00:00
|
|
|
);
|
|
|
|
++log_messages;
|
|
|
|
|
|
|
|
// Here if WU already has a canonical result.
|
|
|
|
// Get unchecked results and see if they match the canonical result
|
|
|
|
//
|
2004-10-08 22:41:33 +00:00
|
|
|
for (i=0; i<items.size(); i++) {
|
2004-10-10 03:16:30 +00:00
|
|
|
RESULT& result = items[i].res;
|
|
|
|
|
|
|
|
if (result.id == wu.canonical_resultid) {
|
2009-08-10 04:22:02 +00:00
|
|
|
canonical_result_index = i;
|
2004-10-10 03:16:30 +00:00
|
|
|
}
|
2004-10-08 22:41:33 +00:00
|
|
|
}
|
2004-11-15 01:58:30 +00:00
|
|
|
if (canonical_result_index == -1) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2004-11-15 01:58:30 +00:00
|
|
|
"[WU#%d %s] Can't find canonical result %d\n",
|
2004-10-08 23:59:44 +00:00
|
|
|
wu.id, wu.name, wu.canonical_resultid
|
2004-07-13 12:55:22 +00:00
|
|
|
);
|
2006-11-27 01:07:00 +00:00
|
|
|
return 0;
|
2004-07-13 12:55:22 +00:00
|
|
|
}
|
|
|
|
|
2004-10-10 03:16:30 +00:00
|
|
|
RESULT& canonical_result = items[canonical_result_index].res;
|
|
|
|
|
2004-07-13 12:55:22 +00:00
|
|
|
// scan this WU's results, and check the unchecked ones
|
2004-10-10 03:16:30 +00:00
|
|
|
//
|
2004-10-08 22:41:33 +00:00
|
|
|
for (i=0; i<items.size(); i++) {
|
2004-10-10 03:16:30 +00:00
|
|
|
RESULT& result = items[i].res;
|
2004-10-08 22:41:33 +00:00
|
|
|
|
2004-12-14 00:57:03 +00:00
|
|
|
if (result.server_state != RESULT_SERVER_STATE_OVER) continue;
|
|
|
|
if (result.outcome != RESULT_OUTCOME_SUCCESS) continue;
|
|
|
|
switch (result.validate_state) {
|
|
|
|
case VALIDATE_STATE_INIT:
|
|
|
|
case VALIDATE_STATE_INCONCLUSIVE:
|
|
|
|
break;
|
|
|
|
default:
|
2004-10-08 22:41:33 +00:00
|
|
|
continue;
|
2004-10-10 03:16:30 +00:00
|
|
|
}
|
2008-05-29 20:11:43 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[WU#%d] handle_wu(): testing result %d\n",
|
|
|
|
wu.id, result.id
|
|
|
|
);
|
2004-10-08 22:41:33 +00:00
|
|
|
|
2006-11-27 01:07:00 +00:00
|
|
|
check_pair(result, canonical_result, retry);
|
2011-04-25 18:27:03 +00:00
|
|
|
if (retry) {
|
|
|
|
// this usually means an NFS mount has failed;
|
|
|
|
// arrange to try again later.
|
|
|
|
//
|
|
|
|
transition_time = DELAYED;
|
|
|
|
goto leave;
|
|
|
|
}
|
2004-09-09 21:52:20 +00:00
|
|
|
update_result = false;
|
2004-09-10 20:33:05 +00:00
|
|
|
|
2004-10-13 21:02:43 +00:00
|
|
|
if (result.outcome == RESULT_OUTCOME_VALIDATE_ERROR) {
|
2004-09-10 20:33:05 +00:00
|
|
|
update_result = true;
|
|
|
|
}
|
2004-11-15 01:58:30 +00:00
|
|
|
|
2010-03-29 22:28:20 +00:00
|
|
|
// this might be last result, so let transitioner
|
2004-12-14 00:57:03 +00:00
|
|
|
// trigger file delete etc. if needed
|
2004-11-15 01:58:30 +00:00
|
|
|
//
|
2005-09-10 06:09:55 +00:00
|
|
|
transition_time = IMMEDIATE;
|
2004-11-15 01:58:30 +00:00
|
|
|
|
2010-03-29 22:28:20 +00:00
|
|
|
DB_HOST host;
|
|
|
|
retval = host.lookup_id(result.hostid);
|
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2010-11-08 17:51:57 +00:00
|
|
|
"[RESULT#%d] lookup of host %d failed: %s\n",
|
|
|
|
result.id, result.hostid, boincerror(retval)
|
2010-03-29 22:28:20 +00:00
|
|
|
);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
HOST host_initial = host;
|
|
|
|
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
bool update_hav = false;
|
|
|
|
DB_HOST_APP_VERSION hav;
|
|
|
|
retval = hav_lookup(hav, result.hostid,
|
|
|
|
generalized_app_version_id(result.app_version_id, result.appid)
|
|
|
|
);
|
|
|
|
if (retval) {
|
2012-10-17 17:37:51 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"[RESULT#%d %s] hav_lookup returned %d\n",
|
|
|
|
result.id, result.name, retval
|
|
|
|
);
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
hav.host_id = 0;
|
|
|
|
}
|
|
|
|
DB_HOST_APP_VERSION hav_orig = hav;
|
|
|
|
vector<DB_HOST_APP_VERSION> havv;
|
|
|
|
havv.push_back(hav);
|
|
|
|
|
2010-03-29 22:28:20 +00:00
|
|
|
vector<RESULT> rv;
|
2004-09-09 21:52:20 +00:00
|
|
|
switch (result.validate_state) {
|
|
|
|
case VALIDATE_STATE_VALID:
|
|
|
|
update_result = true;
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
update_hav = true;
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2010-03-29 22:28:20 +00:00
|
|
|
"[RESULT#%d %s] pair_check() matched: setting result to valid\n",
|
|
|
|
result.id, result.name
|
2004-09-09 21:52:20 +00:00
|
|
|
);
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
retval = is_valid(host, result, wu, havv[0]);
|
2004-09-09 21:52:20 +00:00
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2010-11-08 17:51:57 +00:00
|
|
|
"[RESULT#%d %s] is_valid() error: %s\n",
|
|
|
|
result.id, result.name, boincerror(retval)
|
2004-07-13 12:55:22 +00:00
|
|
|
);
|
|
|
|
}
|
2010-03-29 22:28:20 +00:00
|
|
|
// do credit computation, but grant credit of canonical result
|
|
|
|
//
|
|
|
|
rv.push_back(result);
|
2010-04-05 18:59:16 +00:00
|
|
|
assign_credit_set(
|
2011-09-13 05:23:10 +00:00
|
|
|
wu, rv, app, app_versions, havv,
|
|
|
|
max_granted_credit, credit
|
2010-04-05 18:59:16 +00:00
|
|
|
);
|
2011-09-13 05:23:10 +00:00
|
|
|
if (!no_credit) {
|
|
|
|
result.granted_credit = canonical_result.granted_credit;
|
|
|
|
grant_credit(host, result.sent_time, result.granted_credit);
|
|
|
|
}
|
2004-09-09 21:52:20 +00:00
|
|
|
break;
|
|
|
|
case VALIDATE_STATE_INVALID:
|
|
|
|
update_result = true;
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
update_hav = true;
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2004-09-09 21:52:20 +00:00
|
|
|
"[RESULT#%d %s] pair_check() didn't match: setting result to invalid\n",
|
|
|
|
result.id, result.name
|
2004-07-13 12:55:22 +00:00
|
|
|
);
|
2010-04-21 19:33:20 +00:00
|
|
|
is_invalid(havv[0]);
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
}
|
|
|
|
if (hav.host_id && update_hav) {
|
2013-04-03 00:23:37 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[HOST#%d AV#%d] [outlier=%d] Updating HAV in db. pfc.n=%f->%f\n",
|
|
|
|
havv[0].host_id, havv[0].app_version_id, result.runtime_outlier, hav_orig.pfc.n,havv[0].pfc.n);
|
2013-03-26 22:24:45 +00:00
|
|
|
retval=havv[0].update_validator(hav_orig);
|
2013-04-03 00:23:37 +00:00
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2013-03-26 22:24:45 +00:00
|
|
|
"[HOST#%d AV%d] hav.update_validator() failed: %s\n",
|
|
|
|
hav.host_id, hav.app_version_id, boincerror(retval)
|
|
|
|
);
|
2013-04-03 00:23:37 +00:00
|
|
|
}
|
2004-09-09 21:52:20 +00:00
|
|
|
}
|
2010-03-29 22:28:20 +00:00
|
|
|
host.update_diff_validator(host_initial);
|
2004-09-09 21:52:20 +00:00
|
|
|
if (update_result) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-08-10 04:22:02 +00:00
|
|
|
"[RESULT#%d %s] granted_credit %f\n",
|
2004-10-10 03:16:30 +00:00
|
|
|
result.id, result.name, result.granted_credit
|
2004-10-04 23:59:51 +00:00
|
|
|
);
|
|
|
|
|
2004-10-08 22:41:33 +00:00
|
|
|
retval = validator.update_result(result);
|
2004-09-09 21:52:20 +00:00
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2010-11-08 17:51:57 +00:00
|
|
|
"[RESULT#%d %s] Can't update result: %s\n",
|
|
|
|
result.id, result.name, boincerror(retval)
|
2004-09-09 21:52:20 +00:00
|
|
|
);
|
|
|
|
}
|
2004-07-13 12:55:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Here if WU doesn't have a canonical result yet.
|
|
|
|
// Try to get one
|
|
|
|
|
2013-03-14 20:07:43 +00:00
|
|
|
vector<RESULT> viable_results;
|
|
|
|
vector<DB_HOST_APP_VERSION> host_app_versions, host_app_versions_orig;
|
|
|
|
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2004-10-10 03:16:30 +00:00
|
|
|
"[WU#%d %s] handle_wu(): No canonical result yet\n",
|
|
|
|
wu.id, wu.name
|
2004-07-13 12:55:22 +00:00
|
|
|
);
|
|
|
|
++log_messages;
|
|
|
|
|
2013-03-14 20:07:43 +00:00
|
|
|
// make a vector of the "viable" (i.e. possibly canonical) results,
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
// and a parallel vector of host_app_versions
|
2004-11-15 01:58:30 +00:00
|
|
|
//
|
2004-10-08 22:41:33 +00:00
|
|
|
for (i=0; i<items.size(); i++) {
|
2004-10-10 03:16:30 +00:00
|
|
|
RESULT& result = items[i].res;
|
2004-10-08 22:41:33 +00:00
|
|
|
|
2013-03-14 20:07:43 +00:00
|
|
|
if (result.server_state != RESULT_SERVER_STATE_OVER) continue;
|
|
|
|
if (result.outcome != RESULT_OUTCOME_SUCCESS) continue;
|
|
|
|
if (result.validate_state == VALIDATE_STATE_INVALID) continue;
|
|
|
|
|
|
|
|
viable_results.push_back(result);
|
|
|
|
DB_HOST_APP_VERSION hav;
|
|
|
|
retval = hav_lookup(hav, result.hostid,
|
|
|
|
generalized_app_version_id(result.app_version_id, result.appid)
|
|
|
|
);
|
|
|
|
if (retval) {
|
|
|
|
hav.host_id=0; // flag that it's missing
|
2004-10-10 03:16:30 +00:00
|
|
|
}
|
2013-03-14 20:07:43 +00:00
|
|
|
host_app_versions.push_back(hav);
|
|
|
|
host_app_versions_orig.push_back(hav);
|
2004-07-13 12:55:22 +00:00
|
|
|
}
|
2004-10-08 22:41:33 +00:00
|
|
|
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_DEBUG,
|
2013-03-14 20:07:43 +00:00
|
|
|
"[WU#%d %s] Found %d viable results\n",
|
|
|
|
wu.id, wu.name, (int)viable_results.size()
|
2004-07-13 12:55:22 +00:00
|
|
|
);
|
2013-03-14 20:07:43 +00:00
|
|
|
if (viable_results.size() >= (unsigned int)wu.min_quorum) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_DEBUG,
|
2004-10-10 03:16:30 +00:00
|
|
|
"[WU#%d %s] Enough for quorum, checking set.\n",
|
|
|
|
wu.id, wu.name
|
2004-07-13 12:55:22 +00:00
|
|
|
);
|
2009-08-10 04:22:02 +00:00
|
|
|
|
2010-03-29 22:28:20 +00:00
|
|
|
double dummy;
|
2013-03-14 20:07:43 +00:00
|
|
|
retval = check_set(viable_results, wu, canonicalid, dummy, retry);
|
2004-09-09 21:52:20 +00:00
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2012-10-15 18:47:55 +00:00
|
|
|
"[WU#%d %s] check_set() error: %s\n",
|
2010-11-08 17:51:57 +00:00
|
|
|
wu.id, wu.name, boincerror(retval)
|
2004-09-09 21:52:20 +00:00
|
|
|
);
|
2006-11-27 01:07:00 +00:00
|
|
|
return retval;
|
2004-09-09 21:52:20 +00:00
|
|
|
}
|
2005-09-10 06:09:55 +00:00
|
|
|
if (retry) transition_time = DELAYED;
|
2004-09-10 20:33:05 +00:00
|
|
|
|
2011-09-13 21:01:42 +00:00
|
|
|
// if we found a canonical instance, decide on credit
|
|
|
|
//
|
|
|
|
if (canonicalid) {
|
2011-09-16 16:43:15 +00:00
|
|
|
// always do the credit calculation, to update statistics,
|
|
|
|
// even if we're granting credit a different way
|
2011-08-25 22:12:48 +00:00
|
|
|
//
|
|
|
|
retval = assign_credit_set(
|
2013-03-14 20:07:43 +00:00
|
|
|
wu, viable_results, app, app_versions, host_app_versions,
|
2011-08-25 22:12:48 +00:00
|
|
|
max_granted_credit, credit
|
|
|
|
);
|
2008-08-07 22:50:05 +00:00
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2011-09-13 21:01:42 +00:00
|
|
|
"[WU#%d %s] assign_credit_set(): %s\n",
|
|
|
|
wu.id, wu.name, boincerror(retval)
|
2008-08-07 22:50:05 +00:00
|
|
|
);
|
2011-09-13 21:01:42 +00:00
|
|
|
transition_time = DELAYED;
|
|
|
|
goto leave;
|
2008-08-07 22:50:05 +00:00
|
|
|
}
|
2011-09-13 21:01:42 +00:00
|
|
|
|
|
|
|
if (credit_from_wu) {
|
2013-03-14 20:07:43 +00:00
|
|
|
retval = get_credit_from_wu(wu, viable_results, credit);
|
2010-11-03 22:06:56 +00:00
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2011-09-13 21:01:42 +00:00
|
|
|
"[WU#%d %s] get_credit_from_wu(): credit not specified in WU\n",
|
|
|
|
wu.id, wu.name
|
2010-11-03 22:06:56 +00:00
|
|
|
);
|
2011-09-13 21:01:42 +00:00
|
|
|
credit = 0;
|
2010-11-03 22:06:56 +00:00
|
|
|
}
|
2011-09-13 21:01:42 +00:00
|
|
|
} else if (credit_from_runtime) {
|
|
|
|
credit = 0;
|
2013-03-14 20:07:43 +00:00
|
|
|
for (i=0; i<viable_results.size(); i++) {
|
|
|
|
RESULT& result = viable_results[i];
|
2011-09-13 21:01:42 +00:00
|
|
|
if (result.id == canonicalid) {
|
|
|
|
DB_HOST host;
|
|
|
|
retval = host.lookup_id(result.hostid);
|
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"[WU#%d %s] host %d lookup failed\n",
|
|
|
|
wu.id, wu.name, result.hostid
|
|
|
|
);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
double runtime = result.elapsed_time;
|
|
|
|
if (runtime <=0 || runtime > max_runtime) {
|
|
|
|
runtime = max_runtime;
|
|
|
|
}
|
2012-01-06 22:22:02 +00:00
|
|
|
credit = result.flops_estimate * runtime * COBBLESTONE_SCALE;
|
2012-01-08 01:28:39 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[WU#%d][RESULT#%d] credit_from_runtime %.2f = %.0fs * %.2fGFLOPS\n",
|
|
|
|
wu.id, result.id,
|
|
|
|
credit, runtime, result.flops_estimate/1e9
|
|
|
|
);
|
2011-09-13 21:01:42 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (no_credit) {
|
|
|
|
credit = 0;
|
|
|
|
}
|
|
|
|
if (max_granted_credit && credit>max_granted_credit) {
|
|
|
|
credit = max_granted_credit;
|
2010-04-05 20:03:54 +00:00
|
|
|
}
|
2010-03-29 22:28:20 +00:00
|
|
|
}
|
|
|
|
|
2013-03-14 20:07:43 +00:00
|
|
|
// scan the viable results.
|
|
|
|
// update as needed,
|
|
|
|
// and count the # of results that are still viable
|
|
|
|
// (some may now have outcome VALIDATE_ERROR,
|
|
|
|
// or validate_state INVALID)
|
2004-09-10 20:33:05 +00:00
|
|
|
//
|
2013-03-14 20:07:43 +00:00
|
|
|
int n_viable_results = 0;
|
|
|
|
for (i=0; i<viable_results.size(); i++) {
|
|
|
|
RESULT& result = viable_results[i];
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
DB_HOST_APP_VERSION& hav = host_app_versions[i];
|
|
|
|
DB_HOST_APP_VERSION& hav_orig = host_app_versions_orig[i];
|
|
|
|
|
|
|
|
update_result = false;
|
|
|
|
bool update_host = false;
|
2013-03-14 20:07:43 +00:00
|
|
|
|
|
|
|
if (result.outcome != RESULT_OUTCOME_SUCCESS
|
|
|
|
|| result.validate_state == VALIDATE_STATE_INVALID
|
|
|
|
) {
|
2005-09-10 06:09:55 +00:00
|
|
|
transition_time = IMMEDIATE;
|
2004-11-15 01:58:30 +00:00
|
|
|
update_result = true;
|
2004-12-10 22:04:20 +00:00
|
|
|
} else {
|
2013-03-14 20:07:43 +00:00
|
|
|
n_viable_results++;
|
2004-11-15 01:58:30 +00:00
|
|
|
}
|
|
|
|
|
2010-03-29 22:28:20 +00:00
|
|
|
DB_HOST host;
|
|
|
|
HOST host_initial;
|
2004-12-14 00:57:03 +00:00
|
|
|
switch (result.validate_state) {
|
|
|
|
case VALIDATE_STATE_VALID:
|
2010-03-29 22:28:20 +00:00
|
|
|
case VALIDATE_STATE_INVALID:
|
|
|
|
retval = host.lookup_id(result.hostid);
|
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2010-11-08 17:51:57 +00:00
|
|
|
"[RESULT#%d] lookup of host %d: %s\n",
|
|
|
|
result.id, result.hostid, boincerror(retval)
|
2010-03-29 22:28:20 +00:00
|
|
|
);
|
|
|
|
continue;
|
2006-11-28 03:27:13 +00:00
|
|
|
}
|
2010-03-29 22:28:20 +00:00
|
|
|
host_initial = host;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (result.validate_state) {
|
|
|
|
case VALIDATE_STATE_VALID:
|
|
|
|
update_result = true;
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
update_host = true;
|
|
|
|
retval = is_valid(host, result, wu, host_app_versions[i]);
|
2004-11-15 01:58:30 +00:00
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_DEBUG,
|
2010-11-08 17:51:57 +00:00
|
|
|
"[RESULT#%d %s] is_valid() failed: %s\n",
|
|
|
|
result.id, result.name, boincerror(retval)
|
2004-11-15 01:58:30 +00:00
|
|
|
);
|
|
|
|
}
|
2011-09-13 05:23:10 +00:00
|
|
|
if (!no_credit) {
|
|
|
|
result.granted_credit = credit;
|
|
|
|
grant_credit(host, result.sent_time, credit);
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[RESULT#%d %s] Valid; granted %f credit [HOST#%d]\n",
|
|
|
|
result.id, result.name, result.granted_credit,
|
|
|
|
result.hostid
|
|
|
|
);
|
2011-08-30 22:28:52 +00:00
|
|
|
}
|
2004-12-14 00:57:03 +00:00
|
|
|
break;
|
|
|
|
case VALIDATE_STATE_INVALID:
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
update_result = true;
|
|
|
|
update_host = true;
|
2008-05-29 21:54:18 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[RESULT#%d %s] Invalid [HOST#%d]\n",
|
|
|
|
result.id, result.name, result.hostid
|
|
|
|
);
|
2010-04-21 19:33:20 +00:00
|
|
|
is_invalid(host_app_versions[i]);
|
2004-12-14 00:57:03 +00:00
|
|
|
break;
|
|
|
|
case VALIDATE_STATE_INIT:
|
2008-05-29 21:54:18 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[RESULT#%d %s] Inconclusive [HOST#%d]\n",
|
|
|
|
result.id, result.name, result.hostid
|
|
|
|
);
|
2004-12-14 00:57:03 +00:00
|
|
|
result.validate_state = VALIDATE_STATE_INCONCLUSIVE;
|
|
|
|
update_result = true;
|
|
|
|
break;
|
2004-11-15 01:58:30 +00:00
|
|
|
}
|
|
|
|
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
if (hav.host_id) {
|
2013-04-03 00:23:37 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[HOST#%d AV#%d] [outlier=%d] Updating HAV in db. pfc.n=%f->%f\n",
|
|
|
|
hav.host_id, hav.app_version_id, result.runtime_outlier, hav_orig.pfc.n,hav.pfc.n);
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
retval = hav.update_validator(hav_orig);
|
2013-04-03 00:23:37 +00:00
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2013-03-26 22:24:45 +00:00
|
|
|
"[HOST#%d AV%d] hav.update_validator() failed: %s\n",
|
|
|
|
hav.host_id, hav.app_version_id, boincerror(retval)
|
|
|
|
);
|
2013-04-03 00:23:37 +00:00
|
|
|
}
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
}
|
|
|
|
if (update_host) {
|
|
|
|
retval = host.update_diff_validator(host_initial);
|
|
|
|
}
|
2004-11-15 01:58:30 +00:00
|
|
|
if (update_result) {
|
2004-10-08 22:41:33 +00:00
|
|
|
retval = validator.update_result(result);
|
2004-09-10 20:33:05 +00:00
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2010-11-08 17:51:57 +00:00
|
|
|
"[RESULT#%d %s] result.update() failed: %s\n",
|
|
|
|
result.id, result.name, boincerror(retval)
|
2004-09-10 20:33:05 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2004-11-15 01:58:30 +00:00
|
|
|
|
2004-09-09 21:52:20 +00:00
|
|
|
if (canonicalid) {
|
2005-09-10 06:09:55 +00:00
|
|
|
// if we found a canonical result,
|
|
|
|
// trigger the assimilator, but do NOT trigger
|
|
|
|
// the transitioner - doing so creates a race condition
|
|
|
|
//
|
|
|
|
transition_time = NEVER;
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_DEBUG,
|
2004-07-13 12:55:22 +00:00
|
|
|
"[WU#%d %s] Found a canonical result: id=%d\n",
|
2004-10-10 03:16:30 +00:00
|
|
|
wu.id, wu.name, canonicalid
|
2004-07-13 12:55:22 +00:00
|
|
|
);
|
2004-10-10 03:16:30 +00:00
|
|
|
wu.canonical_resultid = canonicalid;
|
|
|
|
wu.canonical_credit = credit;
|
|
|
|
wu.assimilate_state = ASSIMILATE_READY;
|
2004-07-13 12:55:22 +00:00
|
|
|
|
2010-03-29 22:28:20 +00:00
|
|
|
// don't need to send any more results
|
2004-10-10 03:16:30 +00:00
|
|
|
//
|
2004-10-08 22:41:33 +00:00
|
|
|
for (i=0; i<items.size(); i++) {
|
2004-10-10 03:16:30 +00:00
|
|
|
RESULT& result = items[i].res;
|
2004-10-08 22:41:33 +00:00
|
|
|
|
2004-11-15 01:58:30 +00:00
|
|
|
if (result.server_state != RESULT_SERVER_STATE_UNSENT) {
|
2004-10-08 22:41:33 +00:00
|
|
|
continue;
|
2004-10-10 03:16:30 +00:00
|
|
|
}
|
2004-10-08 22:41:33 +00:00
|
|
|
|
2004-07-13 12:55:22 +00:00
|
|
|
result.server_state = RESULT_SERVER_STATE_OVER;
|
|
|
|
result.outcome = RESULT_OUTCOME_DIDNT_NEED;
|
2004-10-08 22:41:33 +00:00
|
|
|
retval = validator.update_result(result);
|
2004-07-13 12:55:22 +00:00
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2010-11-08 17:51:57 +00:00
|
|
|
"[RESULT#%d %s] result.update() failed: %s\n",
|
|
|
|
result.id, result.name, boincerror(retval)
|
2004-07-13 12:55:22 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
2004-12-10 22:04:20 +00:00
|
|
|
// here if no consensus.
|
|
|
|
|
2013-03-14 20:07:43 +00:00
|
|
|
// check if #viable results is too large
|
2004-07-13 12:55:22 +00:00
|
|
|
//
|
2013-03-14 20:07:43 +00:00
|
|
|
if (n_viable_results > wu.max_success_results) {
|
2004-10-10 03:16:30 +00:00
|
|
|
wu.error_mask |= WU_ERROR_TOO_MANY_SUCCESS_RESULTS;
|
2005-09-10 06:09:55 +00:00
|
|
|
transition_time = IMMEDIATE;
|
2004-07-13 12:55:22 +00:00
|
|
|
}
|
2004-12-10 22:04:20 +00:00
|
|
|
|
2013-03-14 20:07:43 +00:00
|
|
|
// if #viable results >= target_nresults,
|
2004-12-10 22:04:20 +00:00
|
|
|
// we need more results, so bump target_nresults
|
2013-03-14 20:07:43 +00:00
|
|
|
// NOTE: n_viable_results should never be > target_nresults,
|
2004-12-10 22:04:20 +00:00
|
|
|
// but accommodate that if it should happen
|
2004-12-01 05:46:04 +00:00
|
|
|
//
|
2013-03-14 20:07:43 +00:00
|
|
|
if (n_viable_results >= wu.target_nresults) {
|
|
|
|
wu.target_nresults = n_viable_results+1;
|
2005-09-10 06:09:55 +00:00
|
|
|
transition_time = IMMEDIATE;
|
2004-12-01 05:46:04 +00:00
|
|
|
}
|
2004-07-13 12:55:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-09-28 20:17:09 +00:00
|
|
|
leave:
|
2004-07-13 12:55:22 +00:00
|
|
|
--log_messages;
|
|
|
|
|
2005-09-10 06:09:55 +00:00
|
|
|
switch (transition_time) {
|
|
|
|
case IMMEDIATE:
|
2004-10-10 03:16:30 +00:00
|
|
|
wu.transition_time = time(0);
|
2005-09-10 06:09:55 +00:00
|
|
|
break;
|
|
|
|
case DELAYED:
|
|
|
|
x = time(0) + 6*3600;
|
2004-10-10 03:16:30 +00:00
|
|
|
if (x < wu.transition_time) wu.transition_time = x;
|
2005-09-10 06:09:55 +00:00
|
|
|
break;
|
|
|
|
case NEVER:
|
|
|
|
wu.transition_time = INT_MAX;
|
2005-10-23 07:19:03 +00:00
|
|
|
break;
|
|
|
|
case NO_CHANGE:
|
|
|
|
break;
|
2004-09-09 21:52:20 +00:00
|
|
|
}
|
2004-07-13 12:55:22 +00:00
|
|
|
|
2004-11-12 23:36:24 +00:00
|
|
|
wu.need_validate = 0;
|
2004-10-13 21:02:43 +00:00
|
|
|
|
2004-10-08 22:41:33 +00:00
|
|
|
retval = validator.update_workunit(wu);
|
2004-07-13 12:55:22 +00:00
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2012-10-15 18:47:55 +00:00
|
|
|
"[WU#%d %s] update_workunit() failed: %s\n",
|
2010-11-08 17:51:57 +00:00
|
|
|
wu.id, wu.name, boincerror(retval)
|
2004-07-13 12:55:22 +00:00
|
|
|
);
|
2006-11-27 01:07:00 +00:00
|
|
|
return retval;
|
2004-07-13 12:55:22 +00:00
|
|
|
}
|
2006-11-27 01:07:00 +00:00
|
|
|
return 0;
|
2004-07-13 12:55:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// make one pass through the workunits with need_validate set.
|
|
|
|
// return true if there were any
|
|
|
|
//
|
2008-06-26 21:20:07 +00:00
|
|
|
bool do_validate_scan() {
|
2004-10-08 22:41:33 +00:00
|
|
|
DB_VALIDATOR_ITEM_SET validator;
|
|
|
|
std::vector<VALIDATOR_ITEM> items;
|
2004-07-13 12:55:22 +00:00
|
|
|
bool found=false;
|
2009-04-27 23:51:46 +00:00
|
|
|
int retval, i=0;
|
2004-07-13 12:55:22 +00:00
|
|
|
|
2004-10-08 22:41:33 +00:00
|
|
|
// loop over entries that need to be checked
|
|
|
|
//
|
2005-02-22 20:12:31 +00:00
|
|
|
while (1) {
|
|
|
|
retval = validator.enumerate(
|
2009-04-27 23:51:46 +00:00
|
|
|
app.id, SELECT_LIMIT, wu_id_modulus, wu_id_remainder, items
|
2005-02-22 20:12:31 +00:00
|
|
|
);
|
2008-03-27 18:25:29 +00:00
|
|
|
if (retval) {
|
|
|
|
if (retval != ERR_DB_NOT_FOUND) {
|
|
|
|
log_messages.printf(MSG_DEBUG,
|
|
|
|
"DB connection lost, exiting\n"
|
|
|
|
);
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2006-11-27 01:07:00 +00:00
|
|
|
retval = handle_wu(validator, items);
|
|
|
|
if (!retval) found = true;
|
2009-04-27 23:51:46 +00:00
|
|
|
if (++i == one_pass_N_WU) break;
|
2004-07-13 12:55:22 +00:00
|
|
|
}
|
|
|
|
return found;
|
|
|
|
}
|
|
|
|
|
2005-07-17 19:52:44 +00:00
|
|
|
int main_loop() {
|
2004-07-13 12:55:22 +00:00
|
|
|
int retval;
|
|
|
|
bool did_something;
|
|
|
|
char buf[256];
|
|
|
|
|
|
|
|
sprintf(buf, "where name='%s'", app_name);
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
check_stop_daemons();
|
2010-03-29 22:28:20 +00:00
|
|
|
|
|
|
|
// look up app within the loop,
|
|
|
|
// in case its min_avg_pfc has been changed by the feeder
|
|
|
|
//
|
|
|
|
retval = app.lookup(buf);
|
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL, "can't find app %s\n", app_name);
|
|
|
|
exit(1);
|
|
|
|
}
|
2008-06-26 21:20:07 +00:00
|
|
|
did_something = do_validate_scan();
|
2004-07-13 12:55:22 +00:00
|
|
|
if (!did_something) {
|
2010-03-29 22:28:20 +00:00
|
|
|
write_modified_app_versions(app_versions);
|
2005-07-17 19:52:44 +00:00
|
|
|
if (one_pass) break;
|
2008-09-22 17:52:41 +00:00
|
|
|
#ifdef GCL_SIMULATOR
|
2009-08-10 04:22:02 +00:00
|
|
|
char nameforsim[64];
|
|
|
|
sprintf(nameforsim, "validator%i", app.id);
|
|
|
|
continue_simulation(nameforsim);
|
|
|
|
signal(SIGUSR2, simulator_signal_handler);
|
|
|
|
pause();
|
2008-09-22 17:52:41 +00:00
|
|
|
#else
|
2012-05-23 18:11:59 +00:00
|
|
|
daemon_sleep(sleep_interval);
|
2008-09-22 17:52:41 +00:00
|
|
|
#endif
|
2004-07-13 12:55:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-04-10 00:32:35 +00:00
|
|
|
// For use by project-supplied routines check_set() and check_pair()
|
2010-04-05 18:59:16 +00:00
|
|
|
//
|
|
|
|
int debug_level=0;
|
2004-07-13 12:55:22 +00:00
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
|
|
|
int i, retval;
|
|
|
|
|
2007-01-15 00:19:37 +00:00
|
|
|
const char *usage =
|
2011-05-05 14:56:32 +00:00
|
|
|
"\nUsage: %s --app <app-name> [OPTIONS]\n"
|
2007-01-15 00:19:37 +00:00
|
|
|
"Start validator for application <app-name>\n\n"
|
|
|
|
"Optional arguments:\n"
|
2010-04-05 18:59:16 +00:00
|
|
|
" --one_pass_N_WU N Validate at most N WUs, then exit\n"
|
|
|
|
" --one_pass Make one pass through WU table, then exit\n"
|
|
|
|
" --mod n i Process only WUs with (id mod n) == i\n"
|
|
|
|
" --max_granted_credit X Grant no more than this amount of credit to a result\n"
|
|
|
|
" --update_credited_job Add record to credited_job table after granting credit\n"
|
|
|
|
" --credit_from_wu Credit is specified in WU XML\n"
|
2012-01-08 01:28:39 +00:00
|
|
|
" --credit_from_runtime X Grant credit based on runtime (max X seconds)and estimated FLOPS\n"
|
2011-09-13 05:23:10 +00:00
|
|
|
" --no_credit Don't grant credit\n"
|
2010-04-05 18:59:16 +00:00
|
|
|
" --sleep_interval n Set sleep-interval to n\n"
|
2010-07-16 07:15:57 +00:00
|
|
|
" -d n, --debug_level n Set log verbosity level, 1-4\n"
|
|
|
|
" -h | --help Show this\n"
|
|
|
|
" -v | --version Show version information\n";
|
2007-01-15 00:19:37 +00:00
|
|
|
|
2012-02-27 11:54:02 +00:00
|
|
|
if (argc > 1) {
|
|
|
|
if (is_arg(argv[1], "h") || is_arg(argv[1], "help")) {
|
2010-04-05 18:59:16 +00:00
|
|
|
printf (usage, argv[0] );
|
2010-07-16 07:15:57 +00:00
|
|
|
exit(0);
|
2012-02-27 11:54:02 +00:00
|
|
|
} else if (is_arg(argv[1], "v") || is_arg(argv[1], "version")) {
|
|
|
|
printf("%s\n", SVN_VERSION);
|
|
|
|
exit(0);
|
|
|
|
}
|
2007-01-15 00:19:37 +00:00
|
|
|
}
|
|
|
|
|
2004-07-13 12:55:22 +00:00
|
|
|
check_stop_daemons();
|
|
|
|
|
|
|
|
for (i=1; i<argc; i++) {
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
if (is_arg(argv[i], "one_pass_N_WU")) {
|
2005-01-03 17:18:32 +00:00
|
|
|
one_pass_N_WU = atoi(argv[++i]);
|
|
|
|
one_pass = true;
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[i], "sleep_interval")) {
|
2006-07-11 21:49:20 +00:00
|
|
|
sleep_interval = atoi(argv[++i]);
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[i], "one_pass")) {
|
2004-07-13 12:55:22 +00:00
|
|
|
one_pass = true;
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[i], "app")) {
|
2004-07-13 12:55:22 +00:00
|
|
|
strcpy(app_name, argv[++i]);
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[i], "d") || is_arg(argv[i], "debug_level")) {
|
2010-04-05 18:59:16 +00:00
|
|
|
debug_level = atoi(argv[++i]);
|
|
|
|
log_messages.set_debug_level(debug_level);
|
|
|
|
if (debug_level == 4) g_print_queries = true;
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[i], "mod")) {
|
2005-02-22 20:12:31 +00:00
|
|
|
wu_id_modulus = atoi(argv[++i]);
|
|
|
|
wu_id_remainder = atoi(argv[++i]);
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[i], "max_granted_credit")) {
|
2006-11-28 03:27:13 +00:00
|
|
|
max_granted_credit = atof(argv[++i]);
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[i], "update_credited_job")) {
|
2007-05-04 17:59:50 +00:00
|
|
|
update_credited_job = true;
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[i], "credit_from_wu")) {
|
2007-05-04 17:59:50 +00:00
|
|
|
credit_from_wu = true;
|
2011-09-13 21:01:42 +00:00
|
|
|
} else if (is_arg(argv[i], "credit_from_runtime")) {
|
|
|
|
credit_from_runtime = true;
|
|
|
|
max_runtime = atof(argv[++i]);
|
2011-09-13 05:23:10 +00:00
|
|
|
} else if (is_arg(argv[i], "no_credit")) {
|
|
|
|
no_credit = true;
|
2004-07-13 12:55:22 +00:00
|
|
|
} else {
|
2009-08-10 04:22:02 +00:00
|
|
|
fprintf(stderr,
|
|
|
|
"Invalid option '%s'\nTry `%s --help` for more information\n",
|
|
|
|
argv[i], argv[0]
|
|
|
|
);
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "unrecognized arg: %s\n", argv[i]);
|
2004-12-14 00:57:03 +00:00
|
|
|
exit(1);
|
2004-07-13 12:55:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-03-13 23:35:13 +00:00
|
|
|
if (app_name[0] == 0) {
|
2010-04-05 18:59:16 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"must use '--app' to specify an application\n"
|
2009-08-10 04:22:02 +00:00
|
|
|
);
|
|
|
|
printf (usage, argv[0] );
|
|
|
|
exit(1);
|
2007-01-15 00:19:37 +00:00
|
|
|
}
|
|
|
|
|
2009-05-07 13:54:51 +00:00
|
|
|
retval = config.parse_file();
|
2004-07-13 12:55:22 +00:00
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2009-05-07 13:54:51 +00:00
|
|
|
"Can't parse config.xml: %s\n", boincerror(retval)
|
2004-07-13 12:55:22 +00:00
|
|
|
);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2011-09-13 21:01:42 +00:00
|
|
|
retval = boinc_db.open(
|
|
|
|
config.db_name, config.db_host, config.db_user, config.db_passwd
|
|
|
|
);
|
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"boinc_db.open failed: %s\n", boincerror(retval)
|
|
|
|
);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2007-04-24 23:21:42 +00:00
|
|
|
"Starting validator, debug level %d\n", log_messages.debug_level
|
|
|
|
);
|
2011-09-13 21:01:42 +00:00
|
|
|
|
|
|
|
if (credit_from_runtime) {
|
2012-01-06 22:22:02 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"using credit from runtime, max runtime: %f\n", max_runtime
|
|
|
|
);
|
2011-09-13 21:01:42 +00:00
|
|
|
}
|
|
|
|
|
2005-02-22 20:12:31 +00:00
|
|
|
if (wu_id_modulus) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2005-02-22 20:12:31 +00:00
|
|
|
"Modulus %d, remainder %d\n", wu_id_modulus, wu_id_remainder
|
|
|
|
);
|
|
|
|
}
|
2004-07-13 12:55:22 +00:00
|
|
|
|
|
|
|
install_stop_signal_handler();
|
|
|
|
|
2005-07-17 19:52:44 +00:00
|
|
|
main_loop();
|
2004-07-13 12:55:22 +00:00
|
|
|
}
|
2004-12-08 00:40:19 +00:00
|
|
|
|
2005-01-02 18:29:53 +00:00
|
|
|
const char *BOINC_RCSID_634dbda0b9 = "$Id$";
|