2008-08-06 18:36:30 +00:00
|
|
|
// This file is part of BOINC.
|
2005-01-20 23:22:22 +00:00
|
|
|
// http://boinc.berkeley.edu
|
2008-08-06 18:36:30 +00:00
|
|
|
// Copyright (C) 2008 University of California
|
2003-07-02 20:57:59 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is free software; you can redistribute it and/or modify it
|
|
|
|
// under the terms of the GNU Lesser General Public License
|
|
|
|
// as published by the Free Software Foundation,
|
|
|
|
// either version 3 of the License, or (at your option) any later version.
|
2003-07-02 20:57:59 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is distributed in the hope that it will be useful,
|
2005-01-20 23:22:22 +00:00
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU Lesser General Public License for more details.
|
2003-07-01 20:37:09 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
2002-12-03 18:57:40 +00:00
|
|
|
|
2002-10-18 16:52:28 +00:00
|
|
|
// make_work
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
// --wu_name name
|
|
|
|
// [ --wu_name name2 ... ]
|
|
|
|
// [ --cushion n ] // make work if fewer than N unsent results
|
|
|
|
// [ --max_wus n ] // don't make work if more than N total WUs
|
|
|
|
// [ --one_pass ] // quit after one pass
|
2002-10-14 23:10:12 +00:00
|
|
|
//
|
2003-12-06 01:49:30 +00:00
|
|
|
// Create WU and result records as needed to maintain a pool of work
|
|
|
|
// (for testing purposes).
|
2002-10-18 16:52:28 +00:00
|
|
|
// Clones the WU of the given name.
|
2002-10-09 04:56:41 +00:00
|
|
|
//
|
|
|
|
|
2005-11-21 18:34:44 +00:00
|
|
|
#include "config.h"
|
2012-05-09 16:11:50 +00:00
|
|
|
#include <sys/param.h>
|
2004-07-13 13:54:09 +00:00
|
|
|
#include <cstdio>
|
|
|
|
#include <cstdlib>
|
|
|
|
#include <cstring>
|
2005-07-17 19:52:44 +00:00
|
|
|
#include <errno.h>
|
2002-10-09 04:56:41 +00:00
|
|
|
#include <unistd.h>
|
2004-07-13 13:54:09 +00:00
|
|
|
#include <ctime>
|
2006-06-08 05:54:40 +00:00
|
|
|
#include <vector>
|
|
|
|
#include <string>
|
|
|
|
|
|
|
|
using std::vector;
|
|
|
|
using std::string;
|
2002-10-09 04:56:41 +00:00
|
|
|
|
2003-04-07 19:06:00 +00:00
|
|
|
#include "boinc_db.h"
|
2002-10-14 23:10:12 +00:00
|
|
|
#include "crypt.h"
|
2003-02-10 19:51:32 +00:00
|
|
|
#include "util.h"
|
2002-10-14 23:10:12 +00:00
|
|
|
#include "backend_lib.h"
|
2003-08-15 00:45:25 +00:00
|
|
|
#include "sched_config.h"
|
2002-12-20 02:12:27 +00:00
|
|
|
#include "parse.h"
|
2003-03-08 00:09:40 +00:00
|
|
|
#include "sched_util.h"
|
2004-04-08 08:15:23 +00:00
|
|
|
#include "sched_msgs.h"
|
2013-06-07 00:31:46 +00:00
|
|
|
#include "str_replace.h"
|
2009-05-07 13:54:51 +00:00
|
|
|
#include "str_util.h"
|
2009-09-17 17:56:59 +00:00
|
|
|
#include "svn_version.h"
|
2002-10-09 04:56:41 +00:00
|
|
|
|
2003-02-10 19:51:32 +00:00
|
|
|
#define LOCKFILE "make_work.out"
|
2003-06-20 01:31:03 +00:00
|
|
|
#define PIDFILE "make_work.pid"
|
2002-10-09 04:56:41 +00:00
|
|
|
|
2003-08-16 01:02:49 +00:00
|
|
|
int max_wus = 0;
|
2003-09-02 21:16:55 +00:00
|
|
|
int cushion = 300;
|
2005-07-17 19:52:44 +00:00
|
|
|
bool one_pass = false;
|
2003-08-15 23:54:50 +00:00
|
|
|
|
2003-01-02 23:12:05 +00:00
|
|
|
// edit a WU XML doc, replacing one filename by another
|
|
|
|
// (should appear twice, within <file_info> and <file_ref>)
|
2006-11-09 00:20:14 +00:00
|
|
|
// Don't patch the URL; we'll download the same file
|
2003-01-02 23:12:05 +00:00
|
|
|
//
|
2006-11-09 00:20:14 +00:00
|
|
|
void replace_file_name(char* xml_doc, char* filename, char* new_filename) {
|
2008-03-31 16:19:45 +00:00
|
|
|
char buf[BLOB_SIZE], temp[256];
|
2003-01-02 23:12:05 +00:00
|
|
|
char * p;
|
2003-06-19 22:55:50 +00:00
|
|
|
|
2013-06-04 05:42:53 +00:00
|
|
|
safe_strcpy(buf, xml_doc);
|
2003-01-02 23:12:05 +00:00
|
|
|
p = strtok(buf,"\n");
|
|
|
|
while (p) {
|
|
|
|
if (parse_str(p, "<name>", temp, sizeof(temp))) {
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
if (!strcmp(filename, temp)) {
|
2004-09-13 18:05:54 +00:00
|
|
|
replace_element_contents(
|
|
|
|
xml_doc + (p - buf),"<name>","</name>", new_filename
|
|
|
|
);
|
2003-01-02 23:12:05 +00:00
|
|
|
}
|
|
|
|
} else if (parse_str(p, "<file_name>", temp, sizeof(temp))) {
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
if (!strcmp(filename, temp)) {
|
2004-09-13 18:05:54 +00:00
|
|
|
replace_element_contents(
|
|
|
|
xml_doc+(p-buf), "<file_name>","</file_name>", new_filename
|
|
|
|
);
|
2003-01-02 23:12:05 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
p = strtok(0, "\n");
|
2002-12-20 02:12:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-11-09 00:20:14 +00:00
|
|
|
void make_new_wu(DB_WORKUNIT& original_wu, char* starting_xml, int start_time) {
|
2008-03-31 16:19:45 +00:00
|
|
|
char file_name[256], buf[BLOB_SIZE], new_file_name[256];
|
|
|
|
char new_buf[BLOB_SIZE];
|
2005-01-29 23:29:54 +00:00
|
|
|
char * p;
|
|
|
|
int retval;
|
2006-06-08 05:54:40 +00:00
|
|
|
DB_WORKUNIT wu = original_wu;
|
|
|
|
static int file_seqno = 0, wu_seqno = 0;
|
2005-01-29 23:29:54 +00:00
|
|
|
|
2013-06-04 05:42:53 +00:00
|
|
|
safe_strcpy(buf, starting_xml);
|
2005-01-29 23:29:54 +00:00
|
|
|
p = strtok(buf, "\n");
|
|
|
|
strcpy(file_name, "");
|
|
|
|
|
2006-11-09 00:20:14 +00:00
|
|
|
// make new names for the WU's input files,
|
|
|
|
// so clients will download them.
|
|
|
|
// (don't actually copy files; URL stays the same)
|
2005-01-29 23:29:54 +00:00
|
|
|
//
|
|
|
|
while (p) {
|
|
|
|
if (parse_str(p, "<name>", file_name, sizeof(file_name))) {
|
|
|
|
sprintf(
|
2006-06-08 05:54:40 +00:00
|
|
|
new_file_name, "%s__%d_%d", file_name, start_time, file_seqno++
|
2005-01-29 23:29:54 +00:00
|
|
|
);
|
2013-06-04 05:42:53 +00:00
|
|
|
safe_strcpy(new_buf, starting_xml);
|
2006-11-09 00:20:14 +00:00
|
|
|
replace_file_name(new_buf, file_name, new_file_name);
|
2013-06-04 05:42:53 +00:00
|
|
|
safe_strcpy(wu.xml_doc, new_buf);
|
2005-01-29 23:29:54 +00:00
|
|
|
}
|
|
|
|
p = strtok(0, "\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
// set various fields for new WU (all others are copied)
|
|
|
|
//
|
|
|
|
wu.id = 0;
|
|
|
|
wu.create_time = time(0);
|
2006-06-08 05:54:40 +00:00
|
|
|
|
|
|
|
// the name of the new WU cannot include the original WU name,
|
|
|
|
// because the original one probably contains "nodelete",
|
|
|
|
// but we want the copy to be eligible for file deletion
|
|
|
|
//
|
|
|
|
sprintf(wu.name, "wu_%d_%d", start_time, wu_seqno++);
|
2005-01-29 23:29:54 +00:00
|
|
|
wu.need_validate = false;
|
|
|
|
wu.canonical_resultid = 0;
|
|
|
|
wu.canonical_credit = 0;
|
2006-06-08 05:54:40 +00:00
|
|
|
wu.hr_class = 0;
|
2005-01-29 23:29:54 +00:00
|
|
|
wu.transition_time = time(0);
|
|
|
|
wu.error_mask = 0;
|
|
|
|
wu.file_delete_state = FILE_DELETE_INIT;
|
|
|
|
wu.assimilate_state = ASSIMILATE_INIT;
|
|
|
|
retval = wu.insert();
|
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2010-11-08 17:51:57 +00:00
|
|
|
"Failed to created WU: %s; exiting\n", boincerror(retval)
|
2005-01-29 23:29:54 +00:00
|
|
|
);
|
|
|
|
exit(retval);
|
|
|
|
}
|
2008-03-27 18:25:29 +00:00
|
|
|
original_wu.id = boinc_db.insert_id();
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_DEBUG,
|
2006-06-08 05:54:40 +00:00
|
|
|
"Created %s, clone of %s\n", wu.name, original_wu.name
|
|
|
|
);
|
2005-01-29 23:29:54 +00:00
|
|
|
}
|
|
|
|
|
2008-03-10 17:03:15 +00:00
|
|
|
// wait for the transitioner to create a result for the given WU.
|
|
|
|
// This keeps us from getting infinitely far ahead of the transitioner
|
|
|
|
// (e.g. if the transitioner isn't running)
|
|
|
|
//
|
|
|
|
void wait_for_results(int wu_id) {
|
|
|
|
DB_RESULT result;
|
|
|
|
int count, retval;
|
|
|
|
char buf[256];
|
|
|
|
|
|
|
|
sprintf(buf, "where workunitid=%d", wu_id);
|
|
|
|
while (1) {
|
|
|
|
retval = result.count(count, buf);
|
2010-11-08 17:51:57 +00:00
|
|
|
log_messages.printf(MSG_DEBUG, "result.count for %d returned %d, error: %s\n",
|
|
|
|
wu_id, count, boincerror(retval)
|
2008-03-27 18:25:29 +00:00
|
|
|
);
|
2008-03-10 17:03:15 +00:00
|
|
|
if (retval) {
|
2010-11-08 17:51:57 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "result.count: %s\n", boincerror(retval));
|
2008-03-10 17:03:15 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
if (count > 0) return;
|
2012-05-23 18:11:59 +00:00
|
|
|
daemon_sleep(10);
|
2008-10-04 23:44:24 +00:00
|
|
|
check_stop_daemons();
|
2008-03-10 17:03:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-06-08 05:54:40 +00:00
|
|
|
void make_work(vector<string> &wu_names) {
|
2003-08-16 01:02:49 +00:00
|
|
|
int retval, start_time=time(0);
|
2012-05-09 16:11:50 +00:00
|
|
|
char keypath[MAXPATHLEN];
|
2008-03-31 16:19:45 +00:00
|
|
|
char buf[BLOB_SIZE];
|
2002-10-14 23:10:12 +00:00
|
|
|
R_RSA_PRIVATE_KEY key;
|
2006-06-08 05:54:40 +00:00
|
|
|
int nwu_names = wu_names.size();
|
2013-05-20 20:00:22 +00:00
|
|
|
DB_WORKUNIT *wus = new DB_WORKUNIT[nwu_names]();
|
2006-06-08 05:54:40 +00:00
|
|
|
int i;
|
|
|
|
static int index=0;
|
2003-06-19 22:55:50 +00:00
|
|
|
|
2009-05-07 13:54:51 +00:00
|
|
|
retval = config.parse_file();
|
2002-10-09 04:56:41 +00:00
|
|
|
if (retval) {
|
2009-05-07 13:54:51 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "Can't parse config.xml: %s\n", boincerror(retval));
|
2002-10-09 04:56:41 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2004-01-15 23:53:13 +00:00
|
|
|
retval = boinc_db.open(config.db_name, config.db_host, config.db_user, config.db_passwd);
|
2002-10-09 04:56:41 +00:00
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "can't open db\n");
|
2002-10-09 04:56:41 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2006-06-08 05:54:40 +00:00
|
|
|
for (i=0; i<nwu_names; i++) {
|
|
|
|
DB_WORKUNIT& wu = wus[i];
|
|
|
|
sprintf(buf, "where name='%s'", wu_names[i].c_str());
|
|
|
|
retval = wu.lookup(buf);
|
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2006-06-08 05:54:40 +00:00
|
|
|
"can't find wu %s\n", wu_names[i].c_str()
|
|
|
|
);
|
|
|
|
exit(1);
|
|
|
|
}
|
2002-10-14 23:10:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
sprintf(keypath, "%s/upload_private", config.key_dir);
|
|
|
|
retval = read_key_file(keypath, key);
|
2002-10-09 04:56:41 +00:00
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "can't read key\n");
|
2002-10-09 04:56:41 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
2003-06-19 22:55:50 +00:00
|
|
|
|
2003-01-07 01:02:08 +00:00
|
|
|
while (1) {
|
2004-05-03 19:30:01 +00:00
|
|
|
check_stop_daemons();
|
2007-04-18 20:49:58 +00:00
|
|
|
int unsent_results;
|
2003-06-20 01:31:03 +00:00
|
|
|
|
2007-06-20 16:27:27 +00:00
|
|
|
retval = count_unsent_results(unsent_results, wus[0].appid);
|
2007-04-18 20:49:58 +00:00
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2007-04-18 20:49:58 +00:00
|
|
|
"can't get result count\n"
|
|
|
|
);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
int total_wus=0;
|
|
|
|
if (max_wus) {
|
2007-06-20 16:27:27 +00:00
|
|
|
retval = count_workunits(total_wus, "");
|
2007-04-18 20:49:58 +00:00
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2007-04-18 20:49:58 +00:00
|
|
|
"can't get wu count\n"
|
|
|
|
);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
2004-12-06 22:41:19 +00:00
|
|
|
log_messages.printf(
|
2008-02-21 21:00:58 +00:00
|
|
|
MSG_DEBUG, "unsent: %d cushion: %d\n",
|
2004-12-06 22:41:19 +00:00
|
|
|
unsent_results, cushion
|
|
|
|
);
|
2003-08-16 01:02:49 +00:00
|
|
|
if (unsent_results > cushion) {
|
2012-05-23 18:11:59 +00:00
|
|
|
daemon_sleep(10);
|
2002-10-09 04:56:41 +00:00
|
|
|
continue;
|
|
|
|
}
|
2002-10-14 23:10:12 +00:00
|
|
|
|
2006-06-08 05:54:40 +00:00
|
|
|
int results_needed = cushion - unsent_results;
|
2005-02-23 19:44:59 +00:00
|
|
|
|
2008-03-10 17:03:15 +00:00
|
|
|
int new_wu_id = 0;
|
2006-06-08 05:54:40 +00:00
|
|
|
while (1) {
|
|
|
|
DB_WORKUNIT& wu = wus[index++];
|
|
|
|
if (index == nwu_names) index=0;
|
|
|
|
if (max_wus && total_wus >= max_wus) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2006-06-08 05:54:40 +00:00
|
|
|
"Reached max_wus = %d\n", max_wus
|
|
|
|
);
|
|
|
|
exit(0);
|
2007-04-18 20:49:58 +00:00
|
|
|
total_wus++;
|
2006-06-08 05:54:40 +00:00
|
|
|
}
|
2006-11-09 00:20:14 +00:00
|
|
|
make_new_wu(wu, wu.xml_doc, start_time);
|
2008-03-10 17:03:15 +00:00
|
|
|
new_wu_id = wu.id;
|
2006-06-08 05:54:40 +00:00
|
|
|
results_needed -= wu.target_nresults;
|
|
|
|
if (results_needed <= 0) break;
|
2002-10-18 16:52:28 +00:00
|
|
|
}
|
2003-11-08 00:23:06 +00:00
|
|
|
|
2005-07-17 19:52:44 +00:00
|
|
|
if (one_pass) break;
|
2008-03-10 17:03:15 +00:00
|
|
|
|
|
|
|
wait_for_results(new_wu_id);
|
2002-10-09 04:56:41 +00:00
|
|
|
}
|
2013-05-20 20:00:22 +00:00
|
|
|
delete[] wus;
|
2002-10-09 04:56:41 +00:00
|
|
|
}
|
|
|
|
|
2009-09-17 17:56:59 +00:00
|
|
|
void usage(char *name) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"Create WU and result records as needed to maintain a pool of work\n"
|
|
|
|
"(for testing purposes).\n"
|
|
|
|
"Clones the WU of the given name.\n\n"
|
|
|
|
"Usage: %s [OPTION]...\n\n"
|
|
|
|
"Options:\n"
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
" --wu_name name the name for the WU\n"
|
|
|
|
" (can be repeated)\n"
|
|
|
|
" [ --cushion N ] make work if fewer than N unsent results\n"
|
|
|
|
" [ --max_wus n ] don't make work if more than N total WUs\n"
|
|
|
|
" [ --one_pass ] quit after one pass\n"
|
|
|
|
" [ --d X ] set debug level to X.\n"
|
|
|
|
" [ -h | --help ] shows this help text.\n"
|
|
|
|
" [ -v | --version ] shows version information\n",
|
2009-09-17 17:56:59 +00:00
|
|
|
name
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2002-10-09 04:56:41 +00:00
|
|
|
int main(int argc, char** argv) {
|
|
|
|
int i;
|
2006-06-08 05:54:40 +00:00
|
|
|
vector<string> wu_names;
|
2002-10-09 04:56:41 +00:00
|
|
|
|
|
|
|
for (i=1; i<argc; i++) {
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
if (is_arg(argv[i], "cushion")) {
|
|
|
|
if (!argv[++i]) {
|
2009-09-17 17:56:59 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "%s requires an argument\n\n", argv[--i]);
|
|
|
|
usage(argv[0]);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
cushion = atoi(argv[i]);
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[i], "d")) {
|
|
|
|
if (!argv[++i]) {
|
2009-09-17 17:56:59 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "%s requires an argument\n\n", argv[--i]);
|
|
|
|
usage(argv[0]);
|
|
|
|
exit(1);
|
|
|
|
}
|
2010-04-05 21:59:33 +00:00
|
|
|
int dl = atoi(argv[i]);
|
|
|
|
log_messages.set_debug_level(dl);
|
|
|
|
if (dl == 4) g_print_queries = true;
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[i], "wu_name")) {
|
|
|
|
if (!argv[++i]) {
|
2009-09-17 17:56:59 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "%s requires an argument\n\n", argv[--i]);
|
|
|
|
usage(argv[0]);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
wu_names.push_back(string(argv[i]));
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[i], "max_wus")) {
|
|
|
|
if (!argv[++i]) {
|
2009-09-17 17:56:59 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "%s requires an argument\n\n", argv[--i]);
|
|
|
|
usage(argv[0]);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
max_wus = atoi(argv[i]);
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[i], "one_pass")) {
|
2005-07-17 19:52:44 +00:00
|
|
|
one_pass = true;
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[1], "h") || is_arg(argv[1], "help")) {
|
2009-09-17 17:56:59 +00:00
|
|
|
usage(argv[0]);
|
|
|
|
exit(0);
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[1], "v") || is_arg(argv[1], "version")) {
|
2009-09-17 17:56:59 +00:00
|
|
|
printf("%s\n", SVN_VERSION);
|
|
|
|
exit(0);
|
2005-02-23 19:44:59 +00:00
|
|
|
} else {
|
2009-09-17 17:56:59 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "unknown command line argument: %s\n\n", argv[i]);
|
|
|
|
usage(argv[0]);
|
|
|
|
exit(1);
|
2002-10-09 04:56:41 +00:00
|
|
|
}
|
|
|
|
}
|
2005-07-17 19:52:44 +00:00
|
|
|
check_stop_daemons();
|
2002-10-09 04:56:41 +00:00
|
|
|
|
2006-06-08 05:54:40 +00:00
|
|
|
if (!wu_names.size()) {
|
|
|
|
fprintf(stderr, "Must supply at least one WU name\n");
|
|
|
|
exit(1);
|
2002-10-14 23:10:12 +00:00
|
|
|
|
2006-06-08 05:54:40 +00:00
|
|
|
}
|
2003-02-27 19:29:48 +00:00
|
|
|
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2004-04-13 21:31:55 +00:00
|
|
|
"Starting: cushion %d, max_wus %d\n",
|
|
|
|
cushion, max_wus
|
2003-09-02 21:16:55 +00:00
|
|
|
);
|
2003-12-31 23:09:21 +00:00
|
|
|
install_stop_signal_handler();
|
2003-04-01 07:13:43 +00:00
|
|
|
|
2003-04-01 03:28:37 +00:00
|
|
|
srand48(getpid() + time(0));
|
2006-06-08 05:54:40 +00:00
|
|
|
make_work(wu_names);
|
2002-10-09 04:56:41 +00:00
|
|
|
}
|
2004-12-08 00:40:19 +00:00
|
|
|
|
2005-01-02 18:29:53 +00:00
|
|
|
const char *BOINC_RCSID_d24265dc7f = "$Id$";
|