2008-08-06 18:36:30 +00:00
|
|
|
// This file is part of BOINC.
|
2005-01-20 23:22:22 +00:00
|
|
|
// http://boinc.berkeley.edu
|
2008-08-06 18:36:30 +00:00
|
|
|
// Copyright (C) 2008 University of California
|
2005-01-20 23:22:22 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is free software; you can redistribute it and/or modify it
|
|
|
|
// under the terms of the GNU Lesser General Public License
|
|
|
|
// as published by the Free Software Foundation,
|
|
|
|
// either version 3 of the License, or (at your option) any later version.
|
2005-01-20 23:22:22 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is distributed in the hope that it will be useful,
|
2005-01-20 23:22:22 +00:00
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU Lesser General Public License for more details.
|
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
2003-03-08 00:09:40 +00:00
|
|
|
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
// general back-end utility functions (not scheduler-specific)
|
|
|
|
|
2003-07-02 02:02:18 +00:00
|
|
|
#ifndef SCHED_UTIL_H
|
|
|
|
#define SCHED_UTIL_H
|
2003-03-08 00:09:40 +00:00
|
|
|
|
2005-02-23 00:11:59 +00:00
|
|
|
#include "boinc_db.h"
|
2008-09-22 17:52:41 +00:00
|
|
|
#include "util.h"
|
2005-02-23 00:11:59 +00:00
|
|
|
|
2003-03-08 00:09:40 +00:00
|
|
|
// "average credit" uses an exponential decay so that recent
|
|
|
|
// activity is weighted more heavily.
|
2004-04-07 06:51:42 +00:00
|
|
|
// CREDIT_HALF_LIFE is the "half-life" period:
|
2003-08-12 20:58:24 +00:00
|
|
|
// the average decreases by 1/2 if idle for this period.
|
|
|
|
//
|
2003-03-08 00:09:40 +00:00
|
|
|
#define SECONDS_IN_DAY (3600*24)
|
2004-04-07 06:51:42 +00:00
|
|
|
#define CREDIT_HALF_LIFE (SECONDS_IN_DAY*7)
|
2003-03-08 00:09:40 +00:00
|
|
|
|
2003-06-20 01:31:03 +00:00
|
|
|
extern void write_pid_file(const char* filename);
|
2003-06-11 23:36:40 +00:00
|
|
|
extern void set_debug_level(int);
|
2004-05-03 19:30:01 +00:00
|
|
|
extern void check_stop_daemons();
|
|
|
|
extern bool check_stop_sched();
|
2003-12-31 23:09:21 +00:00
|
|
|
extern void install_stop_signal_handler();
|
2005-02-16 23:17:43 +00:00
|
|
|
extern int try_fopen(const char* path, FILE*& f, const char* mode);
|
|
|
|
extern void get_log_path(char*, const char*);
|
2003-03-08 00:09:40 +00:00
|
|
|
|
2005-01-08 06:54:03 +00:00
|
|
|
// convert filename to path in a hierarchical directory system
|
|
|
|
//
|
|
|
|
extern int dir_hier_path(
|
2005-09-23 21:09:00 +00:00
|
|
|
const char* filename, const char* root, int fanout,
|
2011-01-07 20:23:22 +00:00
|
|
|
char* result, bool create=false
|
2005-01-08 06:54:03 +00:00
|
|
|
);
|
|
|
|
|
|
|
|
// convert filename to URL in a hierarchical directory system
|
|
|
|
//
|
|
|
|
extern int dir_hier_url(
|
2005-09-23 21:09:00 +00:00
|
|
|
const char* filename, const char* root, int fanout,
|
2011-01-07 20:23:22 +00:00
|
|
|
char* result
|
2005-01-08 06:54:03 +00:00
|
|
|
);
|
|
|
|
|
2005-02-23 00:11:59 +00:00
|
|
|
extern void compute_avg_turnaround(HOST& host, double turnaround);
|
|
|
|
|
2012-01-09 17:35:48 +00:00
|
|
|
struct PERF_INFO {
|
|
|
|
double host_fpops_mean;
|
|
|
|
double host_fpops_stddev;
|
|
|
|
double host_fpops_50_percentile;
|
|
|
|
double host_fpops_95_percentile;
|
|
|
|
|
|
|
|
int get_from_db();
|
|
|
|
};
|
|
|
|
|
2005-05-17 21:08:48 +00:00
|
|
|
// returns zero if we get lock on file with file descriptor fd.
|
|
|
|
// returns < 0 if error
|
|
|
|
// returns PID > 0 if another process has lock
|
|
|
|
//
|
|
|
|
extern int mylockf(int fd);
|
|
|
|
|
2007-06-20 16:27:27 +00:00
|
|
|
extern int count_workunits(int&, const char* query);
|
2008-02-05 20:16:57 +00:00
|
|
|
extern int count_unsent_results(int&, int appid);
|
2008-09-22 17:52:41 +00:00
|
|
|
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
// Return a value for host_app_version.app_version_id.
|
|
|
|
// if the app version is anonymous platform,
|
|
|
|
// make a "pseudo ID" that combines the app ID and the resource type
|
|
|
|
// else just used the app_version ID
|
|
|
|
//
|
|
|
|
inline int generalized_app_version_id(int avid, int appid) {
|
|
|
|
if (avid < 0) {
|
|
|
|
return appid*1000000 - avid;
|
|
|
|
}
|
|
|
|
return avid;
|
|
|
|
}
|
|
|
|
|
|
|
|
// return true if x is -y or --y (for argv processing)
|
|
|
|
//
|
|
|
|
extern bool is_arg(const char*, const char*);
|
|
|
|
|
2011-12-01 18:44:19 +00:00
|
|
|
extern bool app_plan_uses_gpu(const char* plan_class);
|
|
|
|
|
2012-01-30 22:39:13 +00:00
|
|
|
extern int restrict_wu_to_user(DB_WORKUNIT& wu, int userid);
|
|
|
|
|
2008-09-22 17:52:41 +00:00
|
|
|
#ifdef GCL_SIMULATOR
|
|
|
|
extern void simulator_signal_handler(int signum);
|
|
|
|
extern void continue_simulation(const char *daemonname);
|
|
|
|
#endif
|
|
|
|
|
2008-02-05 20:16:57 +00:00
|
|
|
#endif
|