boinc/sched/sched_driver.cpp

203 lines
6.4 KiB
C++
Raw Normal View History

// This file is part of BOINC.
// http://boinc.berkeley.edu
// Copyright (C) 2008 University of California
//
// BOINC is free software; you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License
// as published by the Free Software Foundation,
// either version 3 of the License, or (at your option) any later version.
//
// BOINC is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
// This program generates a stream of scheduler requests;
// it acts as a "driver" for the scheduler when used as:
// sched_driver | cgi --batch --mark_jobs_done
//
// This was written to test the homogeneous redundancy features
// of the feeder and scheduler,
// but it could be used for a variety of other purposes.
//
// Usage: sched_driver --nrequests N --reqs_per_second X
//
// Each request asks for a uniformly-distributed random amount of work.
// The OS and CPU info is taken from the successive lines of a file of the form
// | os_name | p_vendor | p_model |
// Generate this file with a SQL query, trimming off the start and end.
// Notes:
// 1) Use sample_trivial_validator and sample_dummy_assimilator
// 2) Edit the following to something in your DB
#define AUTHENTICATOR "49bcae97f1788385b0f41123acdf5694"
// authenticator of a user record
#define HOSTID "7"
// ID of a host belonging to that user
#include <cstdio>
#include <vector>
#include <cmath>
#include <cstring>
#include "util.h"
#include "str_replace.h"
#include "str_util.h"
#include "svn_version.h"
using std::vector;
struct HOST_DESC{
char os_name[256];
char p_vendor[256];
char p_model[256];
};
vector<HOST_DESC> host_descs;
double min_time = 1;
double max_time = 1;
void read_hosts() {
char buf[256], buf2[256];
host_descs.clear();
FILE* f = fopen("host_descs.txt", "r");
if (!f) {
fprintf(stderr, "no input file\n");
exit(1);
}
while (fgets(buf, sizeof(buf), f)) {
HOST_DESC hd;
safe_strcpy(buf2, buf);
char* p1 = strtok(buf2, "\t\n");
char* p2 = strtok(0, "\t\n");
char* p3 = strtok(0, "\t\n");
if (!p1 || !p2 || !p3) {
fprintf(stderr, "bad line: %s\n", buf);
exit(1);
}
safe_strcpy(hd.os_name, p1);
safe_strcpy(hd.p_vendor, p2);
safe_strcpy(hd.p_model, p3);
host_descs.push_back(hd);
}
fclose(f);
}
inline double req_time() {
if (max_time == min_time) return min_time;
return min_time + drand()*(max_time-min_time);
}
inline double exponential(double mean) {
return -mean*log(1-drand());
}
void make_request(int i) {
HOST_DESC& hd = host_descs[i%host_descs.size()];
printf(
"<scheduler_request>\n"
" <authenticator>%s</authenticator>\n"
" <hostid>%s</hostid>\n"
" <work_req_seconds>%f</work_req_seconds>\n"
" <platform_name>windows_intelx86</platform_name>\n"
" <host_info>\n"
" <os_name>%s</os_name>\n"
" <p_vendor>%s</p_vendor>\n"
" <p_model>%s</p_model>\n"
" <p_fops>1e9</p_fops>\n"
" <m_nbytes>1e9</m_nbytes>\n"
" <d_total>1e11</d_total>\n"
" <d_free>1e11</d_free>\n"
" </host_info>\n"
"</scheduler_request>\n",
AUTHENTICATOR,
HOSTID,
req_time(),
hd.os_name,
hd.p_vendor,
hd.p_model
);
}
void usage(char *name) {
fprintf(stderr,
"This program generates a stream of scheduler requests;\n"
"it acts as a \"driver\" for the scheduler when used as:\n"
"%s | cgi --batch --mark_jobs_done\n\n"
"This was written to test the homogeneous redundancy features\n"
"of the feeder and scheduler, and to measure server performance,\n"
"but it could be used for other purposes.\n"
"\n"
"Each request asks for a uniformly-distributed random amount of work.\n"
"The OS and CPU info is taken from the successive lines of a file of the form\n"
"| os_name | p_vendor | p_model |\n"
"You can generate this file with a SQL query, trimming off the start and end.\n"
"\n"
"Notes:\n"
"1) Use sample_trivial_validator and sample_dummy_assimilator\n"
"\n"
"Usage: %s [OPTION]...\n"
"\n"
"Options: \n"
" --nrequests N Sets the total numberer of requests to N\n"
" --reqs_per_second X Sets the number of requests per second to X\n"
- server: change the following from per-host to per-(host, app version): - daily quota mechanism - reliable mechanism (accelerated retries) - "trusted" mechanism (adaptive replication) - scheduler: enforce host scale probation only for apps with host_scale_check set. - validator: do scale probation on invalid results (need this in addition to error and timeout cases) - feeder: update app version scales every 10 min, not 10 sec - back-end apps: support --foo as well as -foo for options Notes: - If you have, say, cuda, cuda23 and cuda_fermi plan classes, a host will have separate quotas for each one. That means it could error out on 100 jobs for cuda_fermi, and when its quota goes to zero, error out on 100 jobs for cuda23, etc. This is intentional; there may be cases where one version works but not the others. - host.error_rate and host.max_results_day are deprecated TODO: - the values in the app table for limits on jobs in progress etc. should override rather than config.xml. Implementation notes: scheduler: process_request(): read all host_app_versions for host at start; Compute "reliable" and "trusted" for each one. write modified records at end get_app_version(): add "reliable_only" arg; if set, use only reliable versions skip over-quota versions Multi-pass scheduling: if have at least one reliable version, do a pass for jobs that need reliable, and use only reliable versions. Then clear best_app_versions cache. Score-based scheduling: for need-reliable jobs, it will pick the fastest version, then give a score bonus if that version happens to be reliable. When get back a successful result from client: increase daily quota When get back an error result from client: impose scale probation decrease daily quota if not aborted Validator: when handling a WU, create a vector of HOST_APP_VERSION parallel to vector of RESULT. Pass it to assign_credit_set(). Make copies of originals so we can update only modified ones update HOST_APP_VERSION error rates Transitioner: decrease quota on timeout svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
" [ -h | --help ] Show this help text.\n"
" [ -v | --version ] Show version information\n",
name, name
);
}
int main(int argc, char** argv) {
int i, nrequests = 1;
double reqs_per_second = 1;
for (i=1; i<argc; i++) {
if (!strcmp(argv[i], "--nrequests")) {
- server: change the following from per-host to per-(host, app version): - daily quota mechanism - reliable mechanism (accelerated retries) - "trusted" mechanism (adaptive replication) - scheduler: enforce host scale probation only for apps with host_scale_check set. - validator: do scale probation on invalid results (need this in addition to error and timeout cases) - feeder: update app version scales every 10 min, not 10 sec - back-end apps: support --foo as well as -foo for options Notes: - If you have, say, cuda, cuda23 and cuda_fermi plan classes, a host will have separate quotas for each one. That means it could error out on 100 jobs for cuda_fermi, and when its quota goes to zero, error out on 100 jobs for cuda23, etc. This is intentional; there may be cases where one version works but not the others. - host.error_rate and host.max_results_day are deprecated TODO: - the values in the app table for limits on jobs in progress etc. should override rather than config.xml. Implementation notes: scheduler: process_request(): read all host_app_versions for host at start; Compute "reliable" and "trusted" for each one. write modified records at end get_app_version(): add "reliable_only" arg; if set, use only reliable versions skip over-quota versions Multi-pass scheduling: if have at least one reliable version, do a pass for jobs that need reliable, and use only reliable versions. Then clear best_app_versions cache. Score-based scheduling: for need-reliable jobs, it will pick the fastest version, then give a score bonus if that version happens to be reliable. When get back a successful result from client: increase daily quota When get back an error result from client: impose scale probation decrease daily quota if not aborted Validator: when handling a WU, create a vector of HOST_APP_VERSION parallel to vector of RESULT. Pass it to assign_credit_set(). Make copies of originals so we can update only modified ones update HOST_APP_VERSION error rates Transitioner: decrease quota on timeout svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
if (!argv[++i]) {
fprintf(stderr, "%s requires an argument\n\n", argv[--i]);
usage(argv[0]);
exit(1);
}
nrequests = atoi(argv[i]);
if (nrequests < 0) nrequests = 0;
if (nrequests > 10000000) nrequests = 10000000;
}
else if (!strcmp(argv[i], "--reqs_per_second")) {
- server: change the following from per-host to per-(host, app version): - daily quota mechanism - reliable mechanism (accelerated retries) - "trusted" mechanism (adaptive replication) - scheduler: enforce host scale probation only for apps with host_scale_check set. - validator: do scale probation on invalid results (need this in addition to error and timeout cases) - feeder: update app version scales every 10 min, not 10 sec - back-end apps: support --foo as well as -foo for options Notes: - If you have, say, cuda, cuda23 and cuda_fermi plan classes, a host will have separate quotas for each one. That means it could error out on 100 jobs for cuda_fermi, and when its quota goes to zero, error out on 100 jobs for cuda23, etc. This is intentional; there may be cases where one version works but not the others. - host.error_rate and host.max_results_day are deprecated TODO: - the values in the app table for limits on jobs in progress etc. should override rather than config.xml. Implementation notes: scheduler: process_request(): read all host_app_versions for host at start; Compute "reliable" and "trusted" for each one. write modified records at end get_app_version(): add "reliable_only" arg; if set, use only reliable versions skip over-quota versions Multi-pass scheduling: if have at least one reliable version, do a pass for jobs that need reliable, and use only reliable versions. Then clear best_app_versions cache. Score-based scheduling: for need-reliable jobs, it will pick the fastest version, then give a score bonus if that version happens to be reliable. When get back a successful result from client: increase daily quota When get back an error result from client: impose scale probation decrease daily quota if not aborted Validator: when handling a WU, create a vector of HOST_APP_VERSION parallel to vector of RESULT. Pass it to assign_credit_set(). Make copies of originals so we can update only modified ones update HOST_APP_VERSION error rates Transitioner: decrease quota on timeout svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
if (!argv[++i]) {
fprintf(stderr, "%s requires an argument\n\n", argv[--i]);
usage(argv[0]);
exit(1);
}
reqs_per_second = atof(argv[i]);
}
- server: change the following from per-host to per-(host, app version): - daily quota mechanism - reliable mechanism (accelerated retries) - "trusted" mechanism (adaptive replication) - scheduler: enforce host scale probation only for apps with host_scale_check set. - validator: do scale probation on invalid results (need this in addition to error and timeout cases) - feeder: update app version scales every 10 min, not 10 sec - back-end apps: support --foo as well as -foo for options Notes: - If you have, say, cuda, cuda23 and cuda_fermi plan classes, a host will have separate quotas for each one. That means it could error out on 100 jobs for cuda_fermi, and when its quota goes to zero, error out on 100 jobs for cuda23, etc. This is intentional; there may be cases where one version works but not the others. - host.error_rate and host.max_results_day are deprecated TODO: - the values in the app table for limits on jobs in progress etc. should override rather than config.xml. Implementation notes: scheduler: process_request(): read all host_app_versions for host at start; Compute "reliable" and "trusted" for each one. write modified records at end get_app_version(): add "reliable_only" arg; if set, use only reliable versions skip over-quota versions Multi-pass scheduling: if have at least one reliable version, do a pass for jobs that need reliable, and use only reliable versions. Then clear best_app_versions cache. Score-based scheduling: for need-reliable jobs, it will pick the fastest version, then give a score bonus if that version happens to be reliable. When get back a successful result from client: increase daily quota When get back an error result from client: impose scale probation decrease daily quota if not aborted Validator: when handling a WU, create a vector of HOST_APP_VERSION parallel to vector of RESULT. Pass it to assign_credit_set(). Make copies of originals so we can update only modified ones update HOST_APP_VERSION error rates Transitioner: decrease quota on timeout svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
else if (!strcmp(argv[i], "-h") || !strcmp(argv[i], "--help")) {
usage(argv[0]);
exit(0);
}
- server: change the following from per-host to per-(host, app version): - daily quota mechanism - reliable mechanism (accelerated retries) - "trusted" mechanism (adaptive replication) - scheduler: enforce host scale probation only for apps with host_scale_check set. - validator: do scale probation on invalid results (need this in addition to error and timeout cases) - feeder: update app version scales every 10 min, not 10 sec - back-end apps: support --foo as well as -foo for options Notes: - If you have, say, cuda, cuda23 and cuda_fermi plan classes, a host will have separate quotas for each one. That means it could error out on 100 jobs for cuda_fermi, and when its quota goes to zero, error out on 100 jobs for cuda23, etc. This is intentional; there may be cases where one version works but not the others. - host.error_rate and host.max_results_day are deprecated TODO: - the values in the app table for limits on jobs in progress etc. should override rather than config.xml. Implementation notes: scheduler: process_request(): read all host_app_versions for host at start; Compute "reliable" and "trusted" for each one. write modified records at end get_app_version(): add "reliable_only" arg; if set, use only reliable versions skip over-quota versions Multi-pass scheduling: if have at least one reliable version, do a pass for jobs that need reliable, and use only reliable versions. Then clear best_app_versions cache. Score-based scheduling: for need-reliable jobs, it will pick the fastest version, then give a score bonus if that version happens to be reliable. When get back a successful result from client: increase daily quota When get back an error result from client: impose scale probation decrease daily quota if not aborted Validator: when handling a WU, create a vector of HOST_APP_VERSION parallel to vector of RESULT. Pass it to assign_credit_set(). Make copies of originals so we can update only modified ones update HOST_APP_VERSION error rates Transitioner: decrease quota on timeout svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--version")) {
printf("%s\n", SVN_VERSION);
exit(0);
} else {
fprintf(stderr, "unknown command line argument: %s\n\n", argv[i]);
usage(argv[0]);
exit(1);
}
}
read_hosts();
double t1, t2, x;
for (i=0; i<nrequests; i++) {
t1 = dtime();
make_request(i);
t2 = dtime();
x = exponential(1./reqs_per_second);
if (t2 - t1 < x) {
boinc_sleep(x - (t2-t1));
}
}
}