2008-08-06 18:36:30 +00:00
|
|
|
// This file is part of BOINC.
|
2005-01-20 23:22:22 +00:00
|
|
|
// http://boinc.berkeley.edu
|
2008-08-06 18:36:30 +00:00
|
|
|
// Copyright (C) 2008 University of California
|
2003-07-08 21:30:47 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is free software; you can redistribute it and/or modify it
|
|
|
|
// under the terms of the GNU Lesser General Public License
|
|
|
|
// as published by the Free Software Foundation,
|
|
|
|
// either version 3 of the License, or (at your option) any later version.
|
2003-07-08 21:30:47 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is distributed in the hope that it will be useful,
|
2005-01-20 23:22:22 +00:00
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU Lesser General Public License for more details.
|
2003-01-07 08:11:16 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
2003-01-07 08:11:16 +00:00
|
|
|
|
2004-09-09 23:45:45 +00:00
|
|
|
// This is a framework for an assimilator.
|
2008-07-25 22:13:41 +00:00
|
|
|
// You need to link this with an (application-specific) function
|
2004-09-09 23:45:45 +00:00
|
|
|
// assimilate_handler()
|
|
|
|
// in order to make a complete program.
|
2004-05-25 19:20:45 +00:00
|
|
|
//
|
|
|
|
|
2005-11-21 18:34:44 +00:00
|
|
|
#include "config.h"
|
2004-07-13 13:54:09 +00:00
|
|
|
#include <cstring>
|
|
|
|
#include <cstdlib>
|
2002-12-17 19:00:43 +00:00
|
|
|
#include <unistd.h>
|
2004-07-13 13:54:09 +00:00
|
|
|
#include <ctime>
|
2003-01-23 08:07:48 +00:00
|
|
|
#include <vector>
|
2002-12-17 19:00:43 +00:00
|
|
|
|
2003-04-07 19:06:00 +00:00
|
|
|
#include "boinc_db.h"
|
2002-12-17 19:00:43 +00:00
|
|
|
#include "parse.h"
|
2003-02-10 19:51:32 +00:00
|
|
|
#include "util.h"
|
2008-03-27 18:25:29 +00:00
|
|
|
#include "error_numbers.h"
|
2007-05-11 16:30:13 +00:00
|
|
|
#include "str_util.h"
|
2010-07-16 07:15:57 +00:00
|
|
|
#include "svn_version.h"
|
2007-05-11 16:30:13 +00:00
|
|
|
|
2003-08-15 00:45:25 +00:00
|
|
|
#include "sched_config.h"
|
2003-03-08 00:09:40 +00:00
|
|
|
#include "sched_util.h"
|
2004-04-08 08:15:23 +00:00
|
|
|
#include "sched_msgs.h"
|
2003-01-23 08:07:48 +00:00
|
|
|
#include "assimilate_handler.h"
|
2002-12-17 19:00:43 +00:00
|
|
|
|
2004-06-30 18:17:21 +00:00
|
|
|
using std::vector;
|
|
|
|
|
2003-06-20 01:31:03 +00:00
|
|
|
#define LOCKFILE "assimilator.out"
|
|
|
|
#define PIDFILE "assimilator.pid"
|
2008-07-25 22:13:41 +00:00
|
|
|
#define SLEEP_INTERVAL 10
|
2003-02-10 19:51:32 +00:00
|
|
|
|
2004-11-26 23:33:11 +00:00
|
|
|
bool update_db = true;
|
2004-11-30 00:28:19 +00:00
|
|
|
bool noinsert = false;
|
2005-07-26 22:14:55 +00:00
|
|
|
int wu_id_modulus=0, wu_id_remainder=0;
|
2006-10-22 00:42:44 +00:00
|
|
|
int sleep_interval = SLEEP_INTERVAL;
|
2005-01-03 17:18:32 +00:00
|
|
|
int one_pass_N_WU=0;
|
2008-07-25 22:13:41 +00:00
|
|
|
int g_argc;
|
|
|
|
char** g_argv;
|
2011-03-18 08:20:11 +00:00
|
|
|
char* results_prefix = NULL;
|
|
|
|
char* transcripts_prefix = NULL;
|
2005-01-03 17:18:32 +00:00
|
|
|
|
2008-10-03 19:31:56 +00:00
|
|
|
void usage(char** argv) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"This program is an 'assimilator'; it handles completed jobs.\n"
|
|
|
|
"Normally it is run as a daemon from config.xml.\n"
|
|
|
|
"See: http://boinc.berkeley.edu/trac/wiki/BackendPrograms\n\n"
|
|
|
|
);
|
|
|
|
|
|
|
|
fprintf(stderr, "usage: %s [options]\n"
|
|
|
|
" Options:\n"
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
" --app name Process jobs for the given application\n"
|
|
|
|
" [--sleep_interval X] Sleep X seconds if no jobs to process (default 10)\n"
|
|
|
|
" [--mod N R] Process jobs with mod(ID, N) == R\n"
|
|
|
|
" [--one_pass] Do one DB enumeration, then exit\n"
|
|
|
|
" [--one_pass_N_WU N] Process at most N jobs\n"
|
|
|
|
" [-d | --debug_level N] Set verbosity level (1 to 4)\n"
|
|
|
|
" [--dont_update_db] Don't update DB (for testing)\n"
|
2010-07-22 18:22:14 +00:00
|
|
|
" [--noinsert] Don't insert records in app-specific DB\n"
|
2010-07-16 07:15:57 +00:00
|
|
|
" [-h | --help] Show this\n"
|
|
|
|
" [-v | --version] Show version information\n",
|
2008-10-03 19:31:56 +00:00
|
|
|
argv[0]
|
|
|
|
);
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
|
2003-01-23 08:07:48 +00:00
|
|
|
// assimilate all WUs that need it
|
2010-02-18 17:58:05 +00:00
|
|
|
// return nonzero (true) if did anything
|
2002-12-17 19:00:43 +00:00
|
|
|
//
|
2003-01-23 08:07:48 +00:00
|
|
|
bool do_pass(APP& app) {
|
2003-06-04 17:21:26 +00:00
|
|
|
DB_WORKUNIT wu;
|
|
|
|
DB_RESULT canonical_result, result;
|
2003-08-15 20:35:44 +00:00
|
|
|
bool did_something = false;
|
2004-05-13 18:18:22 +00:00
|
|
|
char buf[256];
|
2005-07-26 22:14:55 +00:00
|
|
|
char mod_clause[256];
|
2004-10-04 23:59:51 +00:00
|
|
|
int retval;
|
2004-11-30 00:08:11 +00:00
|
|
|
int num_assimilated=0;
|
2004-06-24 21:00:13 +00:00
|
|
|
|
2004-05-03 19:30:01 +00:00
|
|
|
check_stop_daemons();
|
2003-03-08 00:09:40 +00:00
|
|
|
|
2005-07-26 22:14:55 +00:00
|
|
|
if (wu_id_modulus) {
|
2007-09-27 15:08:40 +00:00
|
|
|
sprintf(mod_clause, " and workunit.id %% %d = %d ",
|
2005-07-26 22:14:55 +00:00
|
|
|
wu_id_modulus, wu_id_remainder
|
|
|
|
);
|
|
|
|
} else {
|
|
|
|
strcpy(mod_clause, "");
|
|
|
|
}
|
|
|
|
|
2005-07-17 19:52:44 +00:00
|
|
|
sprintf(buf,
|
2005-07-26 22:14:55 +00:00
|
|
|
"where appid=%d and assimilate_state=%d %s limit %d",
|
|
|
|
app.id, ASSIMILATE_READY, mod_clause,
|
2005-07-17 19:52:44 +00:00
|
|
|
one_pass_N_WU ? one_pass_N_WU : 1000
|
|
|
|
);
|
2008-03-27 18:25:29 +00:00
|
|
|
while (1) {
|
|
|
|
retval = wu.enumerate(buf);
|
|
|
|
if (retval) {
|
|
|
|
if (retval != ERR_DB_NOT_FOUND) {
|
|
|
|
log_messages.printf(MSG_DEBUG,
|
|
|
|
"DB connection lost, exiting\n"
|
|
|
|
);
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2003-08-15 20:35:44 +00:00
|
|
|
vector<RESULT> results; // must be inside while()!
|
2004-11-26 23:33:11 +00:00
|
|
|
|
2004-11-29 22:26:34 +00:00
|
|
|
// for testing purposes, pretend we did nothing
|
|
|
|
//
|
|
|
|
if (update_db) {
|
|
|
|
did_something = true;
|
|
|
|
}
|
2003-01-07 22:49:42 +00:00
|
|
|
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_DEBUG,
|
2008-07-25 22:13:41 +00:00
|
|
|
"[%s] assimilating WU %d; state=%d\n", wu.name, wu.id, wu.assimilate_state
|
2003-08-12 20:58:24 +00:00
|
|
|
);
|
2003-01-07 22:49:42 +00:00
|
|
|
|
2003-06-04 17:21:26 +00:00
|
|
|
sprintf(buf, "where workunitid=%d", wu.id);
|
2007-10-23 17:11:56 +00:00
|
|
|
canonical_result.clear();
|
|
|
|
bool found = false;
|
2012-02-14 18:50:47 +00:00
|
|
|
while (1) {
|
|
|
|
retval = result.enumerate(buf);
|
|
|
|
if (retval) {
|
|
|
|
if (retval != ERR_DB_NOT_FOUND) {
|
|
|
|
log_messages.printf(MSG_DEBUG,
|
|
|
|
"DB connection lost, exiting\n"
|
|
|
|
);
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2003-01-23 08:07:48 +00:00
|
|
|
results.push_back(result);
|
|
|
|
if (result.id == wu.canonical_resultid) {
|
|
|
|
canonical_result = result;
|
2007-10-23 17:11:56 +00:00
|
|
|
found = true;
|
2003-01-23 08:07:48 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-10-23 17:11:56 +00:00
|
|
|
// If no canonical result found and WU had no other errors,
|
|
|
|
// something is wrong, e.g. result records got deleted prematurely.
|
|
|
|
// This is probably unrecoverable, so mark the WU as having
|
|
|
|
// an assimilation error and keep going.
|
|
|
|
//
|
|
|
|
if (!found && !wu.error_mask) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2007-10-23 17:11:56 +00:00
|
|
|
"[%s] no canonical result\n", wu.name
|
|
|
|
);
|
|
|
|
wu.error_mask = WU_ERROR_NO_CANONICAL_RESULT;
|
|
|
|
sprintf(buf, "error_mask=%d", wu.error_mask);
|
|
|
|
wu.update_field(buf);
|
|
|
|
}
|
|
|
|
|
2004-12-20 20:47:25 +00:00
|
|
|
retval = assimilate_handler(wu, results, canonical_result);
|
2008-03-07 21:13:01 +00:00
|
|
|
if (retval && retval != DEFER_ASSIMILATION) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2010-11-08 17:51:57 +00:00
|
|
|
"[%s] handler error: %s; exiting\n", wu.name, boincerror(retval)
|
2004-12-20 20:47:25 +00:00
|
|
|
);
|
2005-03-30 05:15:54 +00:00
|
|
|
exit(retval);
|
2004-12-20 20:47:25 +00:00
|
|
|
}
|
2003-01-23 08:07:48 +00:00
|
|
|
|
2004-11-29 22:26:34 +00:00
|
|
|
if (update_db) {
|
2008-03-07 21:13:01 +00:00
|
|
|
// Defer assimilation until next result is returned
|
|
|
|
int assimilate_state = ASSIMILATE_DONE;
|
2008-07-25 22:13:41 +00:00
|
|
|
if (retval == DEFER_ASSIMILATION) {
|
2008-03-07 21:13:01 +00:00
|
|
|
assimilate_state = ASSIMILATE_INIT;
|
|
|
|
}
|
2004-11-29 22:26:34 +00:00
|
|
|
sprintf(
|
2011-01-07 20:23:22 +00:00
|
|
|
buf, "assimilate_state=%d, transition_time=%d",
|
2008-03-07 21:13:01 +00:00
|
|
|
assimilate_state, (int)time(0)
|
2004-11-29 22:26:34 +00:00
|
|
|
);
|
|
|
|
retval = wu.update_field(buf);
|
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2010-11-08 17:51:57 +00:00
|
|
|
"[%s] update failed: %s\n", wu.name, boincerror(retval)
|
2004-11-29 22:26:34 +00:00
|
|
|
);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
2004-11-30 00:50:46 +00:00
|
|
|
|
|
|
|
num_assimilated++;
|
|
|
|
|
2002-12-17 19:00:43 +00:00
|
|
|
}
|
2004-11-30 00:50:46 +00:00
|
|
|
|
2004-11-15 23:46:50 +00:00
|
|
|
if (did_something) {
|
|
|
|
boinc_db.commit_transaction();
|
|
|
|
}
|
2004-11-30 00:08:11 +00:00
|
|
|
|
2004-12-14 00:57:03 +00:00
|
|
|
if (num_assimilated) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2004-11-30 00:08:11 +00:00
|
|
|
"Assimilated %d workunits.\n", num_assimilated
|
2004-12-14 00:57:03 +00:00
|
|
|
);
|
|
|
|
}
|
2004-11-30 00:08:11 +00:00
|
|
|
|
2002-12-17 19:00:43 +00:00
|
|
|
return did_something;
|
|
|
|
}
|
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
|
|
|
int retval;
|
2007-04-18 20:49:58 +00:00
|
|
|
bool one_pass = false;
|
2003-06-04 17:21:26 +00:00
|
|
|
DB_APP app;
|
2002-12-17 19:00:43 +00:00
|
|
|
int i;
|
2003-01-07 01:02:08 +00:00
|
|
|
char buf[256];
|
2002-12-17 19:00:43 +00:00
|
|
|
|
2008-10-03 19:31:56 +00:00
|
|
|
strcpy(app.name, "");
|
2004-05-03 19:30:01 +00:00
|
|
|
check_stop_daemons();
|
2008-07-25 22:13:41 +00:00
|
|
|
g_argc = argc;
|
|
|
|
g_argv = argv;
|
2002-12-17 19:00:43 +00:00
|
|
|
for (i=1; i<argc; i++) {
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
if (is_arg(argv[i], "one_pass_N_WU")) {
|
2005-01-03 17:18:32 +00:00
|
|
|
one_pass_N_WU = atoi(argv[++i]);
|
|
|
|
one_pass = true;
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[i], "sleep_interval")) {
|
2006-10-22 00:42:44 +00:00
|
|
|
sleep_interval = atoi(argv[++i]);
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[i], "one_pass")) {
|
2002-12-17 19:00:43 +00:00
|
|
|
one_pass = true;
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[i], "d") || is_arg(argv[i], "debug_level")) {
|
2010-04-05 21:59:33 +00:00
|
|
|
int dl = atoi(argv[++i]);
|
|
|
|
log_messages.set_debug_level(dl);
|
|
|
|
if (dl ==4) g_print_queries = true;
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[i], "app")) {
|
2002-12-18 01:34:51 +00:00
|
|
|
strcpy(app.name, argv[++i]);
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[i], "dont_update_db")) {
|
2004-12-14 00:57:03 +00:00
|
|
|
// This option is for testing your assimilator. When set,
|
|
|
|
// it ensures that the assimilator does not actually modify
|
|
|
|
// the assimilate_state of the workunits, so you can run
|
|
|
|
// your assimilator over and over again without affecting
|
|
|
|
// your project.
|
2004-11-29 22:26:34 +00:00
|
|
|
update_db = false;
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[i], "noinsert")) {
|
2011-01-07 20:23:22 +00:00
|
|
|
// This option is also for testing and is used to
|
2004-12-14 00:57:03 +00:00
|
|
|
// prevent the inserting of results into the *backend*
|
|
|
|
// (as opposed to the boinc) DB.
|
2004-11-30 00:28:19 +00:00
|
|
|
noinsert = true;
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[i], "mod")) {
|
2005-07-26 22:14:55 +00:00
|
|
|
wu_id_modulus = atoi(argv[++i]);
|
|
|
|
wu_id_remainder = atoi(argv[++i]);
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[i], "help") || is_arg(argv[i], "h")) {
|
2008-10-03 19:31:56 +00:00
|
|
|
usage(argv);
|
2010-07-16 07:15:57 +00:00
|
|
|
} else if (is_arg(argv[i], "v") || is_arg(argv[i], "version")) {
|
|
|
|
printf("%s\n", SVN_VERSION);
|
|
|
|
exit(0);
|
2011-03-18 08:20:11 +00:00
|
|
|
} else if (is_arg(argv[i], "results_prefix")) {
|
|
|
|
results_prefix=argv[++i];
|
|
|
|
} else if (is_arg(argv[i], "transcripts_prefix")) {
|
|
|
|
transcripts_prefix=argv[++i];
|
2002-12-17 19:00:43 +00:00
|
|
|
} else {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "Unrecognized arg: %s\n", argv[i]);
|
2008-10-03 19:31:56 +00:00
|
|
|
usage(argv);
|
2002-12-17 19:00:43 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-10-03 19:31:56 +00:00
|
|
|
if (!strlen(app.name)) {
|
|
|
|
usage(argv);
|
|
|
|
}
|
|
|
|
|
2005-07-26 22:14:55 +00:00
|
|
|
if (wu_id_modulus) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_DEBUG,
|
2007-09-27 15:08:40 +00:00
|
|
|
"Using mod'ed WU enumeration. modulus = %d remainder = %d\n",
|
|
|
|
wu_id_modulus, wu_id_remainder
|
|
|
|
);
|
2005-07-26 22:14:55 +00:00
|
|
|
}
|
|
|
|
|
2009-05-07 13:54:51 +00:00
|
|
|
retval = config.parse_file();
|
2002-12-17 19:00:43 +00:00
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2009-05-07 13:54:51 +00:00
|
|
|
"Can't parse config.xml: %s\n", boincerror(retval)
|
2007-05-11 16:30:13 +00:00
|
|
|
);
|
2002-12-17 19:00:43 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_NORMAL, "Starting\n");
|
2003-07-01 00:39:54 +00:00
|
|
|
|
2004-01-15 23:53:13 +00:00
|
|
|
retval = boinc_db.open(config.db_name, config.db_host, config.db_user, config.db_passwd);
|
2002-12-18 01:34:51 +00:00
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "Can't open DB\n");
|
2002-12-18 01:34:51 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
2003-06-04 17:21:26 +00:00
|
|
|
sprintf(buf, "where name='%s'", app.name);
|
|
|
|
retval = app.lookup(buf);
|
2002-12-18 01:34:51 +00:00
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "Can't find app\n");
|
2002-12-18 01:34:51 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
2003-12-31 23:09:21 +00:00
|
|
|
install_stop_signal_handler();
|
2010-02-18 17:58:05 +00:00
|
|
|
do {
|
2011-01-07 20:23:22 +00:00
|
|
|
if (!do_pass(app)) {
|
|
|
|
if (!one_pass) {
|
|
|
|
sleep(sleep_interval);
|
|
|
|
}
|
2002-12-17 19:00:43 +00:00
|
|
|
}
|
2010-02-18 17:58:05 +00:00
|
|
|
} while (!one_pass);
|
2002-12-17 19:00:43 +00:00
|
|
|
}
|
2004-12-06 22:41:19 +00:00
|
|
|
|
2004-12-08 00:40:19 +00:00
|
|
|
|
2005-01-02 18:29:53 +00:00
|
|
|
const char *BOINC_RCSID_7841370789 = "$Id$";
|