2008-08-06 18:36:30 +00:00
|
|
|
// This file is part of BOINC.
|
2007-04-18 20:49:58 +00:00
|
|
|
// http://boinc.berkeley.edu
|
2008-08-06 18:36:30 +00:00
|
|
|
// Copyright (C) 2008 University of California
|
2007-04-18 20:49:58 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is free software; you can redistribute it and/or modify it
|
|
|
|
// under the terms of the GNU Lesser General Public License
|
|
|
|
// as published by the Free Software Foundation,
|
|
|
|
// either version 3 of the License, or (at your option) any later version.
|
2007-04-18 20:49:58 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is distributed in the hope that it will be useful,
|
2007-04-18 20:49:58 +00:00
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU Lesser General Public License for more details.
|
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
2007-04-18 20:49:58 +00:00
|
|
|
|
2013-04-20 20:49:51 +00:00
|
|
|
// sample_work_generator: example BOINC work generator.
|
|
|
|
//
|
|
|
|
// --app name app name (default example_app)
|
|
|
|
// --in_template_file input template file (default example_app_in)
|
|
|
|
// --out_template_file output template file (default example_app_out)
|
|
|
|
// -d N log verbosity level (0..4)
|
|
|
|
// --help show usage
|
|
|
|
// --version show version
|
2007-04-18 20:49:58 +00:00
|
|
|
//
|
|
|
|
// - Runs as a daemon, and creates an unbounded supply of work.
|
2013-04-20 20:49:51 +00:00
|
|
|
// It attempts to maintain a "cushion" of 100 unsent job instances
|
|
|
|
// for the given app.
|
2007-04-18 20:49:58 +00:00
|
|
|
// (your app may not work this way; e.g. you might create work in batches)
|
|
|
|
// - Creates a new input file for each job;
|
|
|
|
// the file (and the workunit names) contain a timestamp
|
2011-10-26 23:23:01 +00:00
|
|
|
// and sequence number, so they're unique.
|
2013-04-20 20:49:51 +00:00
|
|
|
//
|
|
|
|
// This is an example - customize for your needs
|
2007-04-18 20:49:58 +00:00
|
|
|
|
2012-05-09 16:11:50 +00:00
|
|
|
#include <sys/param.h>
|
2007-12-10 22:13:48 +00:00
|
|
|
#include <unistd.h>
|
2008-02-27 23:26:38 +00:00
|
|
|
#include <cstdlib>
|
|
|
|
#include <string>
|
|
|
|
#include <cstring>
|
2007-12-10 22:13:48 +00:00
|
|
|
|
2013-08-23 00:01:45 +00:00
|
|
|
#include "backend_lib.h"
|
2007-04-18 20:49:58 +00:00
|
|
|
#include "boinc_db.h"
|
|
|
|
#include "error_numbers.h"
|
2013-08-23 00:01:45 +00:00
|
|
|
#include "filesys.h"
|
2007-04-18 20:49:58 +00:00
|
|
|
#include "parse.h"
|
2013-06-07 00:31:46 +00:00
|
|
|
#include "str_replace.h"
|
2013-08-23 00:01:45 +00:00
|
|
|
#include "str_util.h"
|
2009-09-17 17:56:59 +00:00
|
|
|
#include "svn_version.h"
|
2013-08-23 00:01:45 +00:00
|
|
|
#include "util.h"
|
2007-04-18 20:49:58 +00:00
|
|
|
|
|
|
|
#include "sched_config.h"
|
|
|
|
#include "sched_util.h"
|
2007-04-20 17:14:08 +00:00
|
|
|
#include "sched_msgs.h"
|
2007-04-18 20:49:58 +00:00
|
|
|
|
2010-11-10 22:54:56 +00:00
|
|
|
#define CUSHION 10
|
2007-04-18 20:49:58 +00:00
|
|
|
// maintain at least this many unsent results
|
2010-11-10 22:54:56 +00:00
|
|
|
#define REPLICATION_FACTOR 1
|
2013-04-20 20:49:51 +00:00
|
|
|
// number of instances of each job
|
2007-04-18 20:49:58 +00:00
|
|
|
|
2011-07-01 02:12:11 +00:00
|
|
|
const char* app_name = "example_app";
|
2012-07-22 03:45:47 +00:00
|
|
|
const char* in_template_file = "example_app_in";
|
|
|
|
const char* out_template_file = "example_app_out";
|
2010-11-10 00:10:32 +00:00
|
|
|
|
|
|
|
char* in_template;
|
2007-04-18 20:49:58 +00:00
|
|
|
DB_APP app;
|
|
|
|
int start_time;
|
|
|
|
int seqno;
|
|
|
|
|
|
|
|
// create one new job
|
|
|
|
//
|
|
|
|
int make_job() {
|
|
|
|
DB_WORKUNIT wu;
|
2012-05-09 16:11:50 +00:00
|
|
|
char name[256], path[MAXPATHLEN];
|
2007-04-18 20:49:58 +00:00
|
|
|
const char* infiles[1];
|
2007-04-19 22:11:25 +00:00
|
|
|
int retval;
|
2007-04-18 20:49:58 +00:00
|
|
|
|
|
|
|
// make a unique name (for the job and its input file)
|
|
|
|
//
|
2010-11-10 00:10:32 +00:00
|
|
|
sprintf(name, "%s_%d_%d", app_name, start_time, seqno++);
|
2007-04-18 20:49:58 +00:00
|
|
|
|
|
|
|
// Create the input file.
|
|
|
|
// Put it at the right place in the download dir hierarchy
|
|
|
|
//
|
2007-04-19 22:11:25 +00:00
|
|
|
retval = config.download_path(name, path);
|
|
|
|
if (retval) return retval;
|
2007-04-18 20:49:58 +00:00
|
|
|
FILE* f = fopen(path, "w");
|
|
|
|
if (!f) return ERR_FOPEN;
|
|
|
|
fprintf(f, "This is the input file for job %s", name);
|
|
|
|
fclose(f);
|
|
|
|
|
|
|
|
// Fill in the job parameters
|
|
|
|
//
|
|
|
|
wu.clear();
|
|
|
|
wu.appid = app.id;
|
2013-06-04 05:42:53 +00:00
|
|
|
safe_strcpy(wu.name, name);
|
2007-04-23 16:14:47 +00:00
|
|
|
wu.rsc_fpops_est = 1e12;
|
|
|
|
wu.rsc_fpops_bound = 1e14;
|
|
|
|
wu.rsc_memory_bound = 1e8;
|
|
|
|
wu.rsc_disk_bound = 1e8;
|
|
|
|
wu.delay_bound = 86400;
|
|
|
|
wu.min_quorum = REPLICATION_FACTOR;
|
2007-04-18 20:49:58 +00:00
|
|
|
wu.target_nresults = REPLICATION_FACTOR;
|
2007-04-23 16:14:47 +00:00
|
|
|
wu.max_error_results = REPLICATION_FACTOR*4;
|
|
|
|
wu.max_total_results = REPLICATION_FACTOR*8;
|
|
|
|
wu.max_success_results = REPLICATION_FACTOR*4;
|
2007-04-19 22:11:25 +00:00
|
|
|
infiles[0] = name;
|
2007-04-18 20:49:58 +00:00
|
|
|
|
|
|
|
// Register the job with BOINC
|
|
|
|
//
|
2010-11-10 00:10:32 +00:00
|
|
|
sprintf(path, "templates/%s", out_template_file);
|
2007-04-18 20:49:58 +00:00
|
|
|
return create_work(
|
|
|
|
wu,
|
2010-11-10 00:10:32 +00:00
|
|
|
in_template,
|
|
|
|
path,
|
|
|
|
config.project_path(path),
|
2007-04-18 20:49:58 +00:00
|
|
|
infiles,
|
|
|
|
1,
|
|
|
|
config
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
void main_loop() {
|
|
|
|
int retval;
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
check_stop_daemons();
|
|
|
|
int n;
|
2013-04-20 20:49:51 +00:00
|
|
|
retval = count_unsent_results(n, app.id);
|
2011-10-11 17:41:10 +00:00
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"count_unsent_jobs() failed: %s\n", boincerror(retval)
|
|
|
|
);
|
|
|
|
exit(retval);
|
|
|
|
}
|
2007-04-18 20:49:58 +00:00
|
|
|
if (n > CUSHION) {
|
2012-05-23 18:11:59 +00:00
|
|
|
daemon_sleep(10);
|
2007-04-18 20:49:58 +00:00
|
|
|
} else {
|
2007-04-23 16:14:47 +00:00
|
|
|
int njobs = (CUSHION-n)/REPLICATION_FACTOR;
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_DEBUG,
|
2007-04-19 22:11:25 +00:00
|
|
|
"Making %d jobs\n", njobs
|
|
|
|
);
|
2007-04-18 20:49:58 +00:00
|
|
|
for (int i=0; i<njobs; i++) {
|
|
|
|
retval = make_job();
|
2007-04-19 22:11:25 +00:00
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2010-11-08 17:51:57 +00:00
|
|
|
"can't make job: %s\n", boincerror(retval)
|
2007-04-19 22:11:25 +00:00
|
|
|
);
|
2007-04-20 17:14:08 +00:00
|
|
|
exit(retval);
|
2007-04-19 22:11:25 +00:00
|
|
|
}
|
2007-04-18 20:49:58 +00:00
|
|
|
}
|
2013-12-15 00:36:18 +00:00
|
|
|
// Wait for the transitioner to create instances
|
|
|
|
// of the jobs we just created.
|
|
|
|
// Otherwise we'll create too many jobs.
|
|
|
|
//
|
|
|
|
double now = dtime();
|
|
|
|
while (1) {
|
|
|
|
daemon_sleep(5);
|
|
|
|
double x;
|
|
|
|
retval = min_transition_time(x);
|
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"min_transition_time failed: %s\n", boincerror(retval)
|
|
|
|
);
|
|
|
|
exit(retval);
|
|
|
|
}
|
|
|
|
if (x > now) break;
|
|
|
|
}
|
2007-04-18 20:49:58 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-09-17 17:56:59 +00:00
|
|
|
void usage(char *name) {
|
|
|
|
fprintf(stderr, "This is an example BOINC work generator.\n"
|
|
|
|
"This work generator has the following properties\n"
|
|
|
|
"(you may need to change some or all of these):\n"
|
|
|
|
" It attempts to maintain a \"cushion\" of 100 unsent job instances.\n"
|
|
|
|
" (your app may not work this way; e.g. you might create work in batches)\n"
|
2010-11-10 00:10:32 +00:00
|
|
|
"- Creates work for the application \"example_app\".\n"
|
2009-09-17 17:56:59 +00:00
|
|
|
"- Creates a new input file for each job;\n"
|
|
|
|
" the file (and the workunit names) contain a timestamp\n"
|
|
|
|
" and sequence number, so that they're unique.\n\n"
|
|
|
|
"Usage: %s [OPTION]...\n\n"
|
|
|
|
"Options:\n"
|
2010-11-10 00:10:32 +00:00
|
|
|
" [ --app X Application name (default: example_app)\n"
|
|
|
|
" [ --in_template_file Input template (default: example_app_in)\n"
|
|
|
|
" [ --out_template_file Output template (default: example_app_out)\n"
|
|
|
|
" [ -d X ] Sets debug level to X.\n"
|
|
|
|
" [ -h | --help ] Shows this help text.\n"
|
|
|
|
" [ -v | --version ] Shows version information.\n",
|
2009-09-17 17:56:59 +00:00
|
|
|
name
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2007-04-20 17:14:08 +00:00
|
|
|
int main(int argc, char** argv) {
|
|
|
|
int i, retval;
|
2010-11-10 00:10:32 +00:00
|
|
|
char buf[256];
|
2007-04-20 17:14:08 +00:00
|
|
|
|
|
|
|
for (i=1; i<argc; i++) {
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
if (is_arg(argv[i], "d")) {
|
2010-04-05 21:59:33 +00:00
|
|
|
if (!argv[++i]) {
|
2009-09-17 17:56:59 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "%s requires an argument\n\n", argv[--i]);
|
|
|
|
usage(argv[0]);
|
|
|
|
exit(1);
|
|
|
|
}
|
2010-04-05 21:59:33 +00:00
|
|
|
int dl = atoi(argv[i]);
|
|
|
|
log_messages.set_debug_level(dl);
|
|
|
|
if (dl == 4) g_print_queries = true;
|
2010-11-10 00:10:32 +00:00
|
|
|
} else if (!strcmp(argv[i], "--app")) {
|
|
|
|
app_name = argv[++i];
|
|
|
|
} else if (!strcmp(argv[i], "--in_template_file")) {
|
|
|
|
in_template_file = argv[++i];
|
|
|
|
} else if (!strcmp(argv[i], "--out_template_file")) {
|
|
|
|
out_template_file = argv[++i];
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[i], "h") || is_arg(argv[i], "help")) {
|
2009-09-17 17:56:59 +00:00
|
|
|
usage(argv[0]);
|
|
|
|
exit(0);
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
} else if (is_arg(argv[i], "v") || is_arg(argv[i], "version")) {
|
2009-09-17 17:56:59 +00:00
|
|
|
printf("%s\n", SVN_VERSION);
|
|
|
|
exit(0);
|
2007-04-20 17:14:08 +00:00
|
|
|
} else {
|
2009-09-17 17:56:59 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "unknown command line argument: %s\n\n", argv[i]);
|
|
|
|
usage(argv[0]);
|
|
|
|
exit(1);
|
2007-04-20 17:14:08 +00:00
|
|
|
}
|
|
|
|
}
|
2007-04-18 20:49:58 +00:00
|
|
|
|
2009-05-07 13:54:51 +00:00
|
|
|
retval = config.parse_file();
|
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2009-05-07 13:54:51 +00:00
|
|
|
"Can't parse config.xml: %s\n", boincerror(retval)
|
2007-04-18 20:49:58 +00:00
|
|
|
);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
retval = boinc_db.open(
|
|
|
|
config.db_name, config.db_host, config.db_user, config.db_passwd
|
|
|
|
);
|
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "can't open db\n");
|
2007-04-18 20:49:58 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
2010-11-10 00:10:32 +00:00
|
|
|
|
|
|
|
sprintf(buf, "where name='%s'", app_name);
|
|
|
|
if (app.lookup(buf)) {
|
|
|
|
log_messages.printf(MSG_CRITICAL, "can't find app %s\n", app_name);
|
2007-04-18 20:49:58 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
2010-11-10 00:10:32 +00:00
|
|
|
|
|
|
|
sprintf(buf, "templates/%s", in_template_file);
|
|
|
|
if (read_file_malloc(config.project_path(buf), in_template)) {
|
|
|
|
log_messages.printf(MSG_CRITICAL, "can't read input template %s\n", buf);
|
2007-04-18 20:49:58 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
start_time = time(0);
|
|
|
|
seqno = 0;
|
|
|
|
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_NORMAL, "Starting\n");
|
2007-04-19 22:11:25 +00:00
|
|
|
|
2007-04-18 20:49:58 +00:00
|
|
|
main_loop();
|
|
|
|
}
|