2008-08-06 18:36:30 +00:00
|
|
|
// This file is part of BOINC.
|
2007-06-20 22:34:06 +00:00
|
|
|
// http://boinc.berkeley.edu
|
2008-08-06 18:36:30 +00:00
|
|
|
// Copyright (C) 2008 University of California
|
2007-06-20 22:34:06 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is free software; you can redistribute it and/or modify it
|
|
|
|
// under the terms of the GNU Lesser General Public License
|
|
|
|
// as published by the Free Software Foundation,
|
|
|
|
// either version 3 of the License, or (at your option) any later version.
|
2007-06-20 22:34:06 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is distributed in the hope that it will be useful,
|
2007-06-20 22:34:06 +00:00
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU Lesser General Public License for more details.
|
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
2007-06-20 22:34:06 +00:00
|
|
|
|
|
|
|
// Census - create a file saying (for each HR type)
|
|
|
|
// how much RAC each HR class is getting.
|
|
|
|
// This info is used the feeder to decide how many shared-memory slots
|
|
|
|
// to devote to each HR class.
|
|
|
|
|
2009-02-26 00:23:23 +00:00
|
|
|
#include <cstdio>
|
2007-06-20 22:34:06 +00:00
|
|
|
|
|
|
|
#include "boinc_db.h"
|
|
|
|
#include "str_util.h"
|
|
|
|
#include "sched_config.h"
|
|
|
|
#include "sched_util.h"
|
|
|
|
#include "sched_msgs.h"
|
2007-06-22 03:46:42 +00:00
|
|
|
#include "hr_info.h"
|
2009-09-17 17:56:59 +00:00
|
|
|
#include "svn_version.h"
|
2007-06-20 22:34:06 +00:00
|
|
|
|
2009-09-17 17:56:59 +00:00
|
|
|
void usage(char *name) {
|
2007-09-27 15:08:40 +00:00
|
|
|
fprintf(stderr,
|
2008-10-03 19:31:56 +00:00
|
|
|
"This program scans the 'host' DB table and creates two files:\n\n"
|
|
|
|
"%s: how much RAC each HR class is getting\n"
|
|
|
|
" (needed if you use homogeneous redundancy).\n"
|
|
|
|
"%s: statistics of host performance\n"
|
|
|
|
" (needed if you use the 'job_size_matching' scheduling option).\n\n"
|
|
|
|
"This should be run as a periodic task (about once a day) from config.xml.\n"
|
|
|
|
"For more info, see http://boinc.berkeley.edu/trac/wiki/HomogeneousRedundancy\n\n"
|
2009-09-17 17:56:59 +00:00
|
|
|
"Usage: %s [OPTION]...\n\n"
|
|
|
|
"Options:\n"
|
|
|
|
" -h --help shows this help text.\n"
|
|
|
|
" -v --version shows version information.\n",
|
|
|
|
HR_INFO_FILENAME, PERF_INFO_FILENAME, name
|
2007-09-27 15:08:40 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
2007-06-20 22:34:06 +00:00
|
|
|
HR_INFO hri;
|
|
|
|
int retval;
|
|
|
|
|
2009-09-17 17:56:59 +00:00
|
|
|
for (int i=1; i<argc; i++) {
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
if (is_arg(argv[i], "help") || is_arg(argv[i], "h")) {
|
2009-09-17 17:56:59 +00:00
|
|
|
usage(argv[0]);
|
|
|
|
exit(0);
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[i], "version") || is_arg(argv[i], "v")) {
|
2009-09-17 17:56:59 +00:00
|
|
|
printf("%s\n", SVN_VERSION);
|
|
|
|
exit(0);
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else {
|
2009-09-17 17:56:59 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"unknown command line argument: %s\n\n", argv[i]
|
|
|
|
);
|
|
|
|
usage(argv[0]);
|
|
|
|
exit(1);
|
2007-09-27 15:08:40 +00:00
|
|
|
}
|
|
|
|
}
|
2007-06-20 22:34:06 +00:00
|
|
|
check_stop_daemons();
|
2009-05-07 13:54:51 +00:00
|
|
|
retval = config.parse_file();
|
2007-06-20 22:34:06 +00:00
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2009-05-07 13:54:51 +00:00
|
|
|
"Can't parse config.xml: %s\n", boincerror(retval)
|
2007-06-20 22:34:06 +00:00
|
|
|
);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
retval = boinc_db.open(
|
|
|
|
config.db_name, config.db_host, config.db_user, config.db_passwd
|
|
|
|
);
|
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "Can't open DB\n");
|
2007-06-20 22:34:06 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
2008-09-16 23:06:37 +00:00
|
|
|
log_messages.printf(MSG_NORMAL, "Starting\n");
|
2007-06-20 22:34:06 +00:00
|
|
|
boinc_db.set_isolation_level(READ_UNCOMMITTED);
|
2007-06-22 03:46:42 +00:00
|
|
|
hri.init();
|
2007-06-20 22:34:06 +00:00
|
|
|
hri.scan_db();
|
2007-06-22 03:46:42 +00:00
|
|
|
hri.write_file();
|
2008-05-06 19:53:49 +00:00
|
|
|
hri.perf_info.write_file();
|
2008-09-16 23:06:37 +00:00
|
|
|
log_messages.printf(MSG_NORMAL, "Finished\n");
|
2007-06-20 22:34:06 +00:00
|
|
|
}
|