2008-08-06 18:36:30 +00:00
|
|
|
// This file is part of BOINC.
|
2005-01-20 23:22:22 +00:00
|
|
|
// http://boinc.berkeley.edu
|
2008-08-06 18:36:30 +00:00
|
|
|
// Copyright (C) 2008 University of California
|
2003-08-15 22:39:56 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is free software; you can redistribute it and/or modify it
|
|
|
|
// under the terms of the GNU Lesser General Public License
|
|
|
|
// as published by the Free Software Foundation,
|
|
|
|
// either version 3 of the License, or (at your option) any later version.
|
2003-08-15 22:39:56 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is distributed in the hope that it will be useful,
|
2005-01-20 23:22:22 +00:00
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU Lesser General Public License for more details.
|
2002-04-30 22:22:54 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2013-04-09 18:10:50 +00:00
|
|
|
#ifndef _SCHED_TYPES_
|
|
|
|
#define _SCHED_TYPES_
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2004-07-13 13:54:09 +00:00
|
|
|
#include <cstdio>
|
2002-04-30 22:22:54 +00:00
|
|
|
#include <vector>
|
|
|
|
|
2003-04-07 19:06:00 +00:00
|
|
|
#include "boinc_db.h"
|
2006-09-01 21:06:30 +00:00
|
|
|
#include "common_defs.h"
|
2004-05-03 02:18:35 +00:00
|
|
|
#include "md5_file.h"
|
2008-03-10 21:59:27 +00:00
|
|
|
#include "coproc.h"
|
2002-12-09 23:06:17 +00:00
|
|
|
|
2007-05-10 16:23:10 +00:00
|
|
|
#include "edf_sim.h"
|
|
|
|
|
2007-05-09 17:45:18 +00:00
|
|
|
// for projects that support work filtering by app,
|
|
|
|
// this records an app for which the user will accept work
|
|
|
|
//
|
2006-10-22 01:46:33 +00:00
|
|
|
struct APP_INFO {
|
2011-01-07 20:23:22 +00:00
|
|
|
int appid;
|
|
|
|
int work_available;
|
2006-10-22 00:42:44 +00:00
|
|
|
};
|
|
|
|
|
2007-08-16 17:33:41 +00:00
|
|
|
// represents a resource (disk etc.) that the client may not have enough of
|
|
|
|
//
|
|
|
|
struct RESOURCE {
|
|
|
|
bool insufficient;
|
|
|
|
double needed; // the min extra amount needed
|
|
|
|
|
|
|
|
inline void set_insufficient(double x) {
|
|
|
|
insufficient = true;
|
|
|
|
if (needed) {
|
|
|
|
if (x < needed) needed = x;
|
|
|
|
} else {
|
|
|
|
needed = x;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2012-09-12 22:31:23 +00:00
|
|
|
// a message for the volunteer
|
2008-03-07 21:13:01 +00:00
|
|
|
//
|
|
|
|
struct USER_MESSAGE {
|
|
|
|
std::string message;
|
|
|
|
std::string priority;
|
|
|
|
USER_MESSAGE(const char* m, const char*p);
|
|
|
|
};
|
|
|
|
|
2008-03-18 21:22:44 +00:00
|
|
|
struct HOST_USAGE {
|
2012-06-22 07:35:54 +00:00
|
|
|
int proc_type;
|
|
|
|
double gpu_usage;
|
2009-12-11 22:45:59 +00:00
|
|
|
double gpu_ram;
|
2008-03-27 18:25:29 +00:00
|
|
|
double avg_ncpus;
|
|
|
|
double max_ncpus;
|
2010-04-10 05:49:51 +00:00
|
|
|
double projected_flops;
|
|
|
|
// the scheduler's best estimate of wu.rsc_fpops_est/elapsed_time.
|
|
|
|
// Taken from host_app_version elapsed time statistics if available,
|
|
|
|
// else on estimate provided by app_plan()
|
|
|
|
double peak_flops;
|
2012-09-12 22:31:23 +00:00
|
|
|
// stored in result.flops_estimate, and used for credit calculations
|
2008-03-27 18:25:29 +00:00
|
|
|
char cmdline[256];
|
|
|
|
|
2008-07-14 22:32:20 +00:00
|
|
|
HOST_USAGE() {
|
2012-06-22 07:35:54 +00:00
|
|
|
proc_type = PROC_TYPE_CPU;
|
|
|
|
gpu_usage = 0;
|
2009-12-11 22:45:59 +00:00
|
|
|
gpu_ram = 0;
|
2008-07-14 22:32:20 +00:00
|
|
|
avg_ncpus = 1;
|
|
|
|
max_ncpus = 1;
|
2010-04-10 05:49:51 +00:00
|
|
|
projected_flops = 0;
|
|
|
|
peak_flops = 0;
|
2008-07-14 22:32:20 +00:00
|
|
|
strcpy(cmdline, "");
|
|
|
|
}
|
2012-01-08 01:28:39 +00:00
|
|
|
void sequential_app(double flops) {
|
2012-06-22 07:35:54 +00:00
|
|
|
proc_type = PROC_TYPE_CPU;
|
|
|
|
gpu_usage = 0;
|
2009-12-11 22:45:59 +00:00
|
|
|
gpu_ram = 0;
|
2008-03-27 18:25:29 +00:00
|
|
|
avg_ncpus = 1;
|
|
|
|
max_ncpus = 1;
|
2012-01-08 01:28:39 +00:00
|
|
|
if (flops <= 0) flops = 1e9;
|
|
|
|
projected_flops = flops;
|
|
|
|
peak_flops = flops;
|
2008-03-27 18:25:29 +00:00
|
|
|
strcpy(cmdline, "");
|
|
|
|
}
|
2010-12-13 22:58:15 +00:00
|
|
|
inline bool is_sequential_app() {
|
2012-06-22 07:35:54 +00:00
|
|
|
if (proc_type != PROC_TYPE_CPU) return false;
|
2010-12-13 22:58:15 +00:00
|
|
|
if (avg_ncpus != 1) return false;
|
|
|
|
return true;
|
|
|
|
}
|
2010-04-08 23:14:47 +00:00
|
|
|
inline int resource_type() {
|
2012-06-22 07:35:54 +00:00
|
|
|
switch (proc_type) {
|
2012-06-25 23:09:45 +00:00
|
|
|
case PROC_TYPE_NVIDIA_GPU: return ANON_PLATFORM_NVIDIA;
|
|
|
|
case PROC_TYPE_AMD_GPU: return ANON_PLATFORM_ATI;
|
|
|
|
case PROC_TYPE_INTEL_GPU: return ANON_PLATFORM_INTEL;
|
2012-06-22 07:35:54 +00:00
|
|
|
default: return ANON_PLATFORM_CPU;
|
2010-04-08 23:14:47 +00:00
|
|
|
}
|
|
|
|
}
|
2010-06-01 23:41:07 +00:00
|
|
|
inline bool uses_gpu() {
|
2012-06-22 07:35:54 +00:00
|
|
|
return (proc_type != PROC_TYPE_CPU);
|
2010-06-01 23:41:07 +00:00
|
|
|
}
|
2008-03-18 21:22:44 +00:00
|
|
|
};
|
|
|
|
|
2012-08-27 17:00:43 +00:00
|
|
|
// a description of a sticky file on host, or a job input file
|
2004-09-10 00:41:48 +00:00
|
|
|
//
|
|
|
|
struct FILE_INFO {
|
|
|
|
char name[256];
|
2012-07-07 19:44:48 +00:00
|
|
|
double nbytes;
|
|
|
|
int status;
|
2012-08-27 17:00:43 +00:00
|
|
|
bool sticky;
|
2008-03-07 21:13:01 +00:00
|
|
|
|
2011-08-10 17:11:08 +00:00
|
|
|
int parse(XML_PARSER&);
|
2004-09-10 00:41:48 +00:00
|
|
|
};
|
|
|
|
|
2004-06-24 21:00:13 +00:00
|
|
|
struct MSG_FROM_HOST_DESC {
|
2004-07-06 04:10:51 +00:00
|
|
|
char variety[256];
|
2004-09-10 00:41:48 +00:00
|
|
|
std::string msg_text;
|
2011-08-10 17:11:08 +00:00
|
|
|
int parse(XML_PARSER&);
|
2004-01-08 00:27:59 +00:00
|
|
|
};
|
|
|
|
|
2004-01-26 19:29:39 +00:00
|
|
|
// an app version from an anonymous-platform client
|
2010-06-01 23:41:07 +00:00
|
|
|
// (starting with 6.11, ALL clients send these)
|
2004-01-26 19:29:39 +00:00
|
|
|
//
|
|
|
|
struct CLIENT_APP_VERSION {
|
|
|
|
char app_name[256];
|
2009-03-05 17:54:39 +00:00
|
|
|
char platform[256];
|
2004-01-26 19:29:39 +00:00
|
|
|
int version_num;
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
char plan_class[256];
|
|
|
|
HOST_USAGE host_usage;
|
2010-04-08 23:14:47 +00:00
|
|
|
double rsc_fpops_scale;
|
|
|
|
// multiply wu.rsc_fpops_est and rsc_fpops_limit
|
|
|
|
// by this amount when send to client,
|
|
|
|
// to reflect the discrepancy between how fast the client
|
|
|
|
// thinks the app is versus how fast we think it is
|
|
|
|
APP* app;
|
2010-07-23 17:43:20 +00:00
|
|
|
// if NULL, this record is a place-holder,
|
|
|
|
// used to preserve array indices
|
2004-01-26 19:29:39 +00:00
|
|
|
|
2011-08-10 17:11:08 +00:00
|
|
|
int parse(XML_PARSER&);
|
2004-01-26 19:29:39 +00:00
|
|
|
};
|
|
|
|
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
// keep track of the best app_version for each app for this host
|
|
|
|
//
|
|
|
|
struct BEST_APP_VERSION {
|
|
|
|
int appid;
|
2010-10-01 19:54:09 +00:00
|
|
|
bool for_64b_jobs;
|
|
|
|
// maintain this separately for jobs that need > 2GB RAM,
|
|
|
|
// in which case we can't use 32-bit apps
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
|
|
|
|
bool present;
|
2010-04-08 18:27:27 +00:00
|
|
|
// false means there's no usable version for this app
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
|
|
|
|
CLIENT_APP_VERSION* cavp;
|
2010-04-08 18:27:27 +00:00
|
|
|
// populated if anonymous platform
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
|
|
|
|
APP_VERSION* avp;
|
2010-04-08 18:27:27 +00:00
|
|
|
// populated otherwise
|
2009-07-29 17:29:56 +00:00
|
|
|
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
HOST_USAGE host_usage;
|
2010-04-08 18:27:27 +00:00
|
|
|
// populated in either case
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
bool reliable;
|
|
|
|
bool trusted;
|
|
|
|
|
|
|
|
DB_HOST_APP_VERSION* host_app_version();
|
|
|
|
// get the HOST_APP_VERSION, if any
|
2011-01-07 20:23:22 +00:00
|
|
|
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
BEST_APP_VERSION() {
|
|
|
|
present = false;
|
|
|
|
cavp = NULL;
|
|
|
|
avp = NULL;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2011-06-06 03:40:42 +00:00
|
|
|
struct SCHED_DB_RESULT : DB_RESULT {
|
|
|
|
// the following used by the scheduler, but not stored in the DB
|
|
|
|
//
|
|
|
|
char wu_name[256];
|
|
|
|
int units; // used for granting credit by # of units processed
|
2011-08-10 17:11:08 +00:00
|
|
|
int parse_from_client(XML_PARSER&);
|
2011-06-06 03:40:42 +00:00
|
|
|
char platform_name[256];
|
|
|
|
BEST_APP_VERSION bav;
|
|
|
|
|
|
|
|
int write_to_client(FILE*);
|
|
|
|
};
|
|
|
|
|
2004-03-23 03:59:32 +00:00
|
|
|
// subset of global prefs used by scheduler
|
|
|
|
//
|
|
|
|
struct GLOBAL_PREFS {
|
2008-02-03 21:46:30 +00:00
|
|
|
double mod_time;
|
2004-03-23 03:59:32 +00:00
|
|
|
double disk_max_used_gb;
|
|
|
|
double disk_max_used_pct;
|
|
|
|
double disk_min_free_gb;
|
2005-04-18 18:42:29 +00:00
|
|
|
double work_buf_min_days;
|
2006-10-04 17:01:36 +00:00
|
|
|
double ram_max_used_busy_frac;
|
|
|
|
double ram_max_used_idle_frac;
|
2008-03-27 18:25:29 +00:00
|
|
|
double max_ncpus_pct;
|
2004-03-23 03:59:32 +00:00
|
|
|
|
2006-11-07 17:40:55 +00:00
|
|
|
void parse(const char* buf, const char* venue);
|
2006-10-06 18:52:50 +00:00
|
|
|
void defaults();
|
- scheduler: add <workload_sim> config option.
If set, the scheduler will use EDF simulation,
together with the in-progress workload reported by the client,
to avoid sending results that
1) will miss their deadline, or
2) will cause an in-progress result to miss its deadline, or
3) will make an in-progress result miss its deadline
by more than is already predicted.
If this option is not set, or if the client request doesn't
include a workload description (i.e. the client is old)
use the existing approach, which assumes there's no workload.
NOTE: this is experimental. Production projects should not use it.
- EDF sim: write debug stuff to stderr instead of stdout
- Account manager:
- if an account is detach_when_done, set dont_request_more_work
- check done_request_more_work even for first-time projects
- update_uotd: generate a file for use by Google gadget
- user_links(): use full URLs (so can use in Google gadget)
client/
acct_mgr.C
work_fetch.C
html/
inc/
uotd.inc
util.inc
user/
uotd_gadget.php (new)
sched/
Makefile.am
edf_sim.C
sched_config.C,h
sched_resend.C
sched_send.C,h
server_types.C,h
svn path=/trunk/boinc/; revision=12639
2007-05-10 21:50:52 +00:00
|
|
|
inline double work_buf_min() {return work_buf_min_days*86400;}
|
2004-03-23 03:59:32 +00:00
|
|
|
};
|
|
|
|
|
2004-09-13 18:05:54 +00:00
|
|
|
struct GUI_URLS {
|
|
|
|
char* text;
|
|
|
|
void init();
|
2013-06-04 05:42:53 +00:00
|
|
|
void get_gui_urls(USER& user, HOST& host, TEAM& team, char*, int len);
|
2004-09-13 18:05:54 +00:00
|
|
|
};
|
|
|
|
|
2006-06-26 22:58:24 +00:00
|
|
|
struct PROJECT_FILES {
|
|
|
|
char* text;
|
|
|
|
void init();
|
|
|
|
};
|
|
|
|
|
2007-04-30 23:35:39 +00:00
|
|
|
// Represents a result from this project that the client has.
|
|
|
|
// The request message has a list of these.
|
|
|
|
// The reply message may include a list of those to be aborted
|
|
|
|
// or aborted if not started
|
|
|
|
//
|
2005-07-28 09:00:19 +00:00
|
|
|
struct OTHER_RESULT {
|
2009-06-01 22:15:14 +00:00
|
|
|
char name[256];
|
2010-06-01 23:41:07 +00:00
|
|
|
int app_version; // index into CLIENT_APP_VERSION array
|
2009-06-01 22:15:14 +00:00
|
|
|
char plan_class[64];
|
|
|
|
bool have_plan_class;
|
2007-04-05 17:02:01 +00:00
|
|
|
bool abort;
|
|
|
|
bool abort_if_not_started;
|
2008-02-26 17:24:29 +00:00
|
|
|
int reason; // see codes below
|
2005-07-28 09:00:19 +00:00
|
|
|
|
2011-08-10 17:11:08 +00:00
|
|
|
int parse(XML_PARSER&);
|
2005-07-28 09:00:19 +00:00
|
|
|
};
|
|
|
|
|
2008-02-26 17:24:29 +00:00
|
|
|
#define ABORT_REASON_NOT_FOUND 1
|
|
|
|
#define ABORT_REASON_WU_CANCELLED 2
|
|
|
|
#define ABORT_REASON_ASSIMILATED 3
|
|
|
|
#define ABORT_REASON_TIMED_OUT 4
|
|
|
|
|
2007-04-19 22:45:57 +00:00
|
|
|
struct CLIENT_PLATFORM {
|
|
|
|
char name[256];
|
2011-08-10 17:11:08 +00:00
|
|
|
int parse(XML_PARSER&);
|
2007-04-19 22:45:57 +00:00
|
|
|
};
|
|
|
|
|
2007-04-30 21:19:24 +00:00
|
|
|
struct PLATFORM_LIST {
|
|
|
|
std::vector<PLATFORM*> list;
|
|
|
|
};
|
|
|
|
|
2002-04-30 22:22:54 +00:00
|
|
|
struct SCHEDULER_REQUEST {
|
|
|
|
char authenticator[256];
|
2007-04-19 22:45:57 +00:00
|
|
|
CLIENT_PLATFORM platform;
|
|
|
|
std::vector<CLIENT_PLATFORM> alt_platforms;
|
2008-02-21 00:47:50 +00:00
|
|
|
PLATFORM_LIST platforms;
|
2004-04-30 18:26:20 +00:00
|
|
|
char cross_project_id[256];
|
2002-04-30 22:22:54 +00:00
|
|
|
int hostid; // zero if first RPC
|
2002-12-02 04:29:40 +00:00
|
|
|
int core_client_major_version;
|
|
|
|
int core_client_minor_version;
|
2005-08-31 00:18:36 +00:00
|
|
|
int core_client_release;
|
2010-01-13 17:28:59 +00:00
|
|
|
int core_client_version; // 10000*major + 100*minor + release
|
2002-04-30 22:22:54 +00:00
|
|
|
int rpc_seqno;
|
2004-12-06 22:41:19 +00:00
|
|
|
double work_req_seconds;
|
2011-01-07 20:23:22 +00:00
|
|
|
// in "normalized CPU seconds" (see work_req.php)
|
2009-01-10 00:43:33 +00:00
|
|
|
double cpu_req_secs;
|
|
|
|
double cpu_req_instances;
|
2004-07-06 21:51:49 +00:00
|
|
|
double resource_share_fraction;
|
2005-10-04 21:44:58 +00:00
|
|
|
// this project's fraction of total resource share
|
|
|
|
double rrs_fraction;
|
|
|
|
// ... of runnable resource share
|
|
|
|
double prrs_fraction;
|
|
|
|
// ... of potentially runnable resource share
|
2009-01-30 21:25:24 +00:00
|
|
|
double cpu_estimated_delay;
|
|
|
|
// currently queued jobs saturate the CPU for this long;
|
|
|
|
// used for crude deadline check
|
2005-06-29 04:17:28 +00:00
|
|
|
double duration_correction_factor;
|
2012-10-17 19:21:22 +00:00
|
|
|
double uptime;
|
|
|
|
double previous_uptime;
|
2008-03-31 16:19:45 +00:00
|
|
|
char global_prefs_xml[BLOB_SIZE];
|
|
|
|
char working_global_prefs_xml[BLOB_SIZE];
|
2004-05-13 18:18:22 +00:00
|
|
|
char code_sign_key[4096];
|
2004-08-14 00:37:38 +00:00
|
|
|
|
2004-09-10 00:41:48 +00:00
|
|
|
std::vector<CLIENT_APP_VERSION> client_app_versions;
|
2011-10-03 23:43:53 +00:00
|
|
|
|
2004-03-23 03:59:32 +00:00
|
|
|
GLOBAL_PREFS global_prefs;
|
2004-05-03 02:18:35 +00:00
|
|
|
char global_prefs_source_email_hash[MD5_LEN];
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2004-05-13 04:48:19 +00:00
|
|
|
HOST host; // request message is parsed into here.
|
|
|
|
// does NOT contain the full host record.
|
2008-03-10 21:59:27 +00:00
|
|
|
COPROCS coprocs;
|
2011-06-06 03:40:42 +00:00
|
|
|
std::vector<SCHED_DB_RESULT> results;
|
2007-07-05 04:18:48 +00:00
|
|
|
// completed results being reported
|
2012-06-05 03:48:05 +00:00
|
|
|
bool results_truncated;
|
|
|
|
// set if (to limit memory usage) we capped this size of "results"
|
|
|
|
// In this case, don't resend lost results
|
|
|
|
// since we don't know what was lost.
|
2011-07-19 20:52:41 +00:00
|
|
|
std::vector<RESULT> file_xfer_results;
|
2004-09-10 00:41:48 +00:00
|
|
|
std::vector<MSG_FROM_HOST_DESC> msgs_from_host;
|
2007-07-06 16:37:00 +00:00
|
|
|
std::vector<FILE_INFO> file_infos;
|
2012-02-16 16:52:07 +00:00
|
|
|
// sticky files reported by host
|
|
|
|
|
|
|
|
// temps used by locality scheduling:
|
2007-07-06 16:37:00 +00:00
|
|
|
std::vector<FILE_INFO> file_delete_candidates;
|
2012-02-16 16:52:07 +00:00
|
|
|
// deletion candidates
|
2007-07-06 16:37:00 +00:00
|
|
|
std::vector<FILE_INFO> files_not_needed;
|
2012-02-16 16:52:07 +00:00
|
|
|
// files no longer needed
|
|
|
|
|
2005-07-28 09:00:19 +00:00
|
|
|
std::vector<OTHER_RESULT> other_results;
|
2007-07-05 04:18:48 +00:00
|
|
|
// in-progress results from this project
|
2005-07-28 09:00:19 +00:00
|
|
|
std::vector<IP_RESULT> ip_results;
|
2007-07-05 04:18:48 +00:00
|
|
|
// in-progress results from all projects
|
2005-07-28 10:13:30 +00:00
|
|
|
bool have_other_results_list;
|
|
|
|
bool have_ip_results_list;
|
2008-01-13 00:12:14 +00:00
|
|
|
bool have_time_stats_log;
|
2008-04-02 19:05:08 +00:00
|
|
|
bool client_cap_plan_class;
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
int sandbox;
|
|
|
|
// whether client uses account-based sandbox. -1 = don't know
|
- scheduler and client: fix the "allow multiple clients" feature.
This feature lets you run the BOINC client as a job on grid systems
that handle only 1-CPU jobs;
it disables various mechanisms that prevent multiple clients per host
(which is normally a bad thing).
Old:
- Run the client with a --allow_multiple_clients flag.
This tells it not to use a mutex that prevents
multiple clients per host.
- Run the project with the <multiple_clients_per_host> config flag.
This suppresses two mechanisms:
- (avoid duplicate host records)
on a scheduler request with no host ID,
looks for a host with same domain name, OS type,
and mem size, and assumes the request is from that host
- (job retry)
If we get a request that doesn't have a host ID
but does have a host CPID,
mark its in-progress results as over
NOTE: I CAN'T REMEMBER WHY WE SUPPRESS THIS;
MARK S, DO YOU REMEMBER?
Problem:
if the grid clients attach to a project that
doesn't use <multiple_clients_per_host>, bad things happen.
E.g., if there are several requests at about the same time,
most of them will fail with
"another RPC already in progress" errors.
If a project does include this flag,
it loses protection from duplicate host records.
New:
- If the client is run with --allow_multiple_clients flag,
it passes a <allow_multiple_clients> element
in scheduler requests.
- The scheduler skips the duplicate-host check on
requests that include this flag.
- There is no more <multiple_clients_per_host> scheduler option.
Note: if a project using the old mechanism upgrades to this change,
it will need to use new clients for its grid deployment.
svn path=/trunk/boinc/; revision=21839
2010-06-29 16:37:28 +00:00
|
|
|
int allow_multiple_clients;
|
|
|
|
// whether client allows multiple clients per host, -1 don't know
|
2009-08-18 20:44:12 +00:00
|
|
|
bool using_weak_auth;
|
|
|
|
// Request uses weak authenticator.
|
|
|
|
// Don't modify user prefs or CPID
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
int last_rpc_dayofyear;
|
|
|
|
int current_rpc_dayofyear;
|
2011-11-14 06:27:36 +00:00
|
|
|
std::string client_opaque;
|
2009-08-18 20:44:12 +00:00
|
|
|
|
2011-04-01 21:21:11 +00:00
|
|
|
SCHEDULER_REQUEST(){};
|
|
|
|
~SCHEDULER_REQUEST(){};
|
2011-08-10 17:11:08 +00:00
|
|
|
const char* parse(XML_PARSER&);
|
2005-01-31 16:10:49 +00:00
|
|
|
int write(FILE*); // write request info to file: not complete
|
2002-04-30 22:22:54 +00:00
|
|
|
};
|
|
|
|
|
2005-04-18 17:54:03 +00:00
|
|
|
// keep track of bottleneck disk preference
|
|
|
|
//
|
|
|
|
struct DISK_LIMITS {
|
|
|
|
double max_used;
|
|
|
|
double max_frac;
|
|
|
|
double min_free;
|
|
|
|
};
|
|
|
|
|
2010-05-21 21:49:54 +00:00
|
|
|
// summary of a client's request for work, and our response to it
|
|
|
|
// Note: this is zeroed out in SCHEDULER_REPLY constructor
|
|
|
|
//
|
2014-01-09 06:00:13 +00:00
|
|
|
struct WORK_REQ_BASE {
|
2010-05-21 21:49:54 +00:00
|
|
|
bool anonymous_platform;
|
|
|
|
|
2011-10-03 23:43:53 +00:00
|
|
|
// the following defined if anonymous platform
|
|
|
|
//
|
2012-06-25 23:09:45 +00:00
|
|
|
bool client_has_apps_for_proc_type[NPROC_TYPES];
|
2011-10-03 23:43:53 +00:00
|
|
|
|
2010-05-21 21:49:54 +00:00
|
|
|
// Flags used by old-style scheduling,
|
|
|
|
// while making multiple passes through the work array
|
2012-06-25 23:09:45 +00:00
|
|
|
//
|
2010-05-21 21:49:54 +00:00
|
|
|
bool infeasible_only;
|
|
|
|
bool reliable_only;
|
|
|
|
bool user_apps_only;
|
|
|
|
bool beta_only;
|
2012-08-27 17:00:43 +00:00
|
|
|
bool locality_sched_lite;
|
|
|
|
// for LSL apps, send only jobs where client has > 0 files
|
2010-05-21 21:49:54 +00:00
|
|
|
|
2010-08-11 22:02:41 +00:00
|
|
|
bool resend_lost_results;
|
|
|
|
// this is set if the request is reporting a result
|
|
|
|
// that was previously reported.
|
|
|
|
// This is evidence that the earlier reply was not received
|
|
|
|
// by the client. It may have contained results,
|
|
|
|
// so check and resend just in case.
|
|
|
|
|
2010-05-21 21:49:54 +00:00
|
|
|
// user preferences
|
2012-06-25 23:09:45 +00:00
|
|
|
//
|
2012-06-22 07:35:54 +00:00
|
|
|
bool dont_use_proc_type[NPROC_TYPES];
|
2011-01-07 20:23:22 +00:00
|
|
|
bool allow_non_preferred_apps;
|
|
|
|
bool allow_beta_work;
|
2010-05-21 21:49:54 +00:00
|
|
|
|
|
|
|
bool has_reliable_version;
|
|
|
|
// whether the host has a reliable app version
|
|
|
|
|
|
|
|
int effective_ncpus;
|
2013-12-03 23:54:56 +00:00
|
|
|
// # of usable CPUs on host, taking prefs into account
|
2010-05-21 21:49:54 +00:00
|
|
|
int effective_ngpus;
|
|
|
|
|
|
|
|
// 6.7+ clients send separate requests for different resource types:
|
|
|
|
//
|
2012-06-22 07:35:54 +00:00
|
|
|
double req_secs[NPROC_TYPES];
|
|
|
|
// instance-seconds requested
|
|
|
|
double req_instances[NPROC_TYPES];
|
|
|
|
// number of idle instances, use if possible
|
|
|
|
inline bool need_proc_type(int t) {
|
|
|
|
return (req_secs[t]>0) || (req_instances[t]>0);
|
2010-05-21 21:49:54 +00:00
|
|
|
}
|
2014-03-08 19:17:16 +00:00
|
|
|
inline void clear_req(int proc_type) {
|
|
|
|
req_secs[proc_type] = 0;
|
|
|
|
req_instances[proc_type] = 0;
|
2010-05-21 21:49:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// older clients send send a single number, the requested duration of jobs
|
|
|
|
//
|
|
|
|
double seconds_to_fill;
|
|
|
|
|
2010-08-11 22:02:41 +00:00
|
|
|
// true if new-type request, which has resource-specific requests
|
2010-05-21 21:49:54 +00:00
|
|
|
//
|
|
|
|
bool rsc_spec_request;
|
|
|
|
|
|
|
|
double disk_available;
|
|
|
|
double ram, usable_ram;
|
2014-03-07 05:23:02 +00:00
|
|
|
double cpu_available_frac;
|
|
|
|
double gpu_available_frac;
|
2010-05-21 21:49:54 +00:00
|
|
|
int njobs_sent;
|
|
|
|
|
|
|
|
// The following keep track of the "easiest" job that was rejected
|
|
|
|
// by EDF simulation.
|
|
|
|
// Any jobs harder than this can be rejected without doing the simulation.
|
|
|
|
//
|
|
|
|
double edf_reject_min_cpu;
|
|
|
|
int edf_reject_max_delay_bound;
|
|
|
|
bool have_edf_reject;
|
|
|
|
void edf_reject(double cpu, int delay_bound) {
|
|
|
|
if (have_edf_reject) {
|
|
|
|
if (cpu < edf_reject_min_cpu) edf_reject_min_cpu = cpu;
|
|
|
|
if (delay_bound> edf_reject_max_delay_bound) edf_reject_max_delay_bound = delay_bound;
|
|
|
|
} else {
|
|
|
|
edf_reject_min_cpu = cpu;
|
|
|
|
edf_reject_max_delay_bound = delay_bound;
|
|
|
|
have_edf_reject = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
bool edf_reject_test(double cpu, int delay_bound) {
|
|
|
|
if (!have_edf_reject) return false;
|
|
|
|
if (cpu < edf_reject_min_cpu) return false;
|
|
|
|
if (delay_bound > edf_reject_max_delay_bound) return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
RESOURCE disk;
|
|
|
|
RESOURCE mem;
|
|
|
|
RESOURCE speed;
|
|
|
|
RESOURCE bandwidth;
|
|
|
|
|
|
|
|
// various reasons for not sending jobs (used to explain why)
|
|
|
|
//
|
|
|
|
bool no_allowed_apps_available;
|
|
|
|
bool hr_reject_temp;
|
|
|
|
bool hr_reject_perm;
|
|
|
|
bool outdated_client;
|
|
|
|
bool max_jobs_on_host_exceeded;
|
2014-03-08 19:17:16 +00:00
|
|
|
bool max_jobs_on_host_proc_type_exceeded[NPROC_TYPES];
|
2010-05-21 21:49:54 +00:00
|
|
|
bool no_jobs_available; // project has no work right now
|
|
|
|
int max_jobs_per_rpc;
|
|
|
|
void get_job_limits();
|
|
|
|
|
2014-03-08 19:17:16 +00:00
|
|
|
bool max_jobs_exceeded() {
|
|
|
|
if (max_jobs_on_host_exceeded) return true;
|
|
|
|
for (int i=0; i<NPROC_TYPES; i++) {
|
|
|
|
if (max_jobs_on_host_proc_type_exceeded[i]) return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2014-01-09 06:00:13 +00:00
|
|
|
void clear() {
|
|
|
|
memset(this, 0, sizeof(WORK_REQ_BASE));
|
|
|
|
}
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
struct WORK_REQ : public WORK_REQ_BASE {
|
|
|
|
std::vector<APP_INFO> preferred_apps;
|
|
|
|
std::vector<USER_MESSAGE> no_work_messages;
|
|
|
|
std::vector<BEST_APP_VERSION*> best_app_versions;
|
|
|
|
std::vector<DB_HOST_APP_VERSION> host_app_versions;
|
|
|
|
std::vector<DB_HOST_APP_VERSION> host_app_versions_orig;
|
|
|
|
|
|
|
|
void add_no_work_message(const char*);
|
|
|
|
|
2011-06-06 03:40:42 +00:00
|
|
|
~WORK_REQ() {}
|
2010-05-21 21:49:54 +00:00
|
|
|
};
|
|
|
|
|
2002-09-22 23:27:14 +00:00
|
|
|
// NOTE: if any field requires initialization,
|
|
|
|
// you must do it in the constructor. Nothing is zeroed by default.
|
|
|
|
//
|
2002-04-30 22:22:54 +00:00
|
|
|
struct SCHEDULER_REPLY {
|
2005-01-31 23:20:49 +00:00
|
|
|
WORK_REQ wreq;
|
2005-04-18 17:54:03 +00:00
|
|
|
DISK_LIMITS disk_limits;
|
2005-02-16 23:17:43 +00:00
|
|
|
double request_delay; // don't request again until this time elapses
|
2005-01-31 22:19:03 +00:00
|
|
|
std::vector<USER_MESSAGE> messages;
|
2004-10-01 22:00:39 +00:00
|
|
|
int hostid;
|
|
|
|
// nonzero only if a new host record was created.
|
|
|
|
// this tells client to reset rpc_seqno
|
2005-05-17 05:55:24 +00:00
|
|
|
int lockfile_fd; // file descriptor of lockfile, or -1 if no lock.
|
2006-10-06 18:52:50 +00:00
|
|
|
bool send_global_prefs;
|
2002-12-02 04:29:40 +00:00
|
|
|
bool nucleus_only; // send only message
|
2002-05-29 23:25:21 +00:00
|
|
|
USER user;
|
2004-05-03 02:18:35 +00:00
|
|
|
char email_hash[MD5_LEN];
|
2004-05-13 04:48:19 +00:00
|
|
|
HOST host; // after validation, contains full host rec
|
2003-02-24 21:31:36 +00:00
|
|
|
TEAM team;
|
2004-09-10 00:41:48 +00:00
|
|
|
std::vector<APP> apps;
|
|
|
|
std::vector<APP_VERSION> app_versions;
|
|
|
|
std::vector<WORKUNIT>wus;
|
2011-06-06 03:40:42 +00:00
|
|
|
std::vector<SCHED_DB_RESULT>results;
|
2005-06-22 06:02:59 +00:00
|
|
|
std::vector<std::string>result_acks;
|
2007-04-05 17:02:01 +00:00
|
|
|
std::vector<std::string>result_aborts;
|
|
|
|
std::vector<std::string>result_abort_if_not_starteds;
|
2004-09-10 00:41:48 +00:00
|
|
|
std::vector<MSG_TO_HOST>msgs_to_host;
|
|
|
|
std::vector<FILE_INFO>file_deletes;
|
2012-02-29 01:11:28 +00:00
|
|
|
std::vector<std::string> file_transfer_requests;
|
2004-05-13 18:18:22 +00:00
|
|
|
char code_sign_key[4096];
|
|
|
|
char code_sign_key_signature[4096];
|
2004-06-24 21:00:13 +00:00
|
|
|
bool send_msg_ack;
|
2008-03-13 23:35:13 +00:00
|
|
|
bool project_is_down;
|
2011-12-17 22:11:26 +00:00
|
|
|
std::vector<APP_VERSION>old_app_versions;
|
|
|
|
// superceded app versions that we consider using because of
|
|
|
|
// homogeneous app version.
|
2002-04-30 22:22:54 +00:00
|
|
|
|
|
|
|
SCHEDULER_REPLY();
|
2012-03-23 16:25:19 +00:00
|
|
|
~SCHEDULER_REPLY(){};
|
2008-10-01 22:07:35 +00:00
|
|
|
int write(FILE*, SCHEDULER_REQUEST&);
|
2002-04-30 22:22:54 +00:00
|
|
|
void insert_app_unique(APP&);
|
|
|
|
void insert_app_version_unique(APP_VERSION&);
|
|
|
|
void insert_workunit_unique(WORKUNIT&);
|
2011-06-06 03:40:42 +00:00
|
|
|
void insert_result(SCHED_DB_RESULT&);
|
2009-08-21 19:14:15 +00:00
|
|
|
void insert_message(const char* msg, const char* prio);
|
|
|
|
void insert_message(USER_MESSAGE&);
|
2005-02-16 23:17:43 +00:00
|
|
|
void set_delay(double);
|
2002-04-30 22:22:54 +00:00
|
|
|
};
|
|
|
|
|
2008-12-15 21:14:32 +00:00
|
|
|
extern SCHEDULER_REQUEST* g_request;
|
|
|
|
extern SCHEDULER_REPLY* g_reply;
|
2008-12-16 16:29:54 +00:00
|
|
|
extern WORK_REQ* g_wreq;
|
2012-01-09 17:35:48 +00:00
|
|
|
extern double capped_host_fpops();
|
2008-12-15 21:14:32 +00:00
|
|
|
|
2010-04-01 22:51:19 +00:00
|
|
|
static inline void add_no_work_message(const char* m) {
|
2009-08-21 20:38:39 +00:00
|
|
|
g_wreq->add_no_work_message(m);
|
|
|
|
}
|
|
|
|
|
2010-01-12 21:53:40 +00:00
|
|
|
extern void get_weak_auth(USER&, char*);
|
|
|
|
extern void get_rss_auth(USER&, char*);
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
extern void read_host_app_versions();
|
|
|
|
extern DB_HOST_APP_VERSION* get_host_app_version(int gavid);
|
|
|
|
extern void write_host_app_versions();
|
|
|
|
|
|
|
|
extern DB_HOST_APP_VERSION* gavid_to_havp(int gavid);
|
|
|
|
extern DB_HOST_APP_VERSION* quota_exceeded_version();
|
2010-01-12 21:53:40 +00:00
|
|
|
|
2010-11-30 19:36:07 +00:00
|
|
|
inline bool is_64b_platform(const char* name) {
|
|
|
|
return (strstr(name, "64") != NULL);
|
|
|
|
}
|
|
|
|
|
2014-03-07 05:23:02 +00:00
|
|
|
extern double available_frac(BEST_APP_VERSION&);
|
2002-04-30 22:22:54 +00:00
|
|
|
#endif
|