mirror of https://github.com/BOINC/boinc.git
590 lines
17 KiB
C++
590 lines
17 KiB
C++
// This file is part of BOINC.
|
|
// http://boinc.berkeley.edu
|
|
// Copyright (C) 2008 University of California
|
|
//
|
|
// BOINC is free software; you can redistribute it and/or modify it
|
|
// under the terms of the GNU Lesser General Public License
|
|
// as published by the Free Software Foundation,
|
|
// either version 3 of the License, or (at your option) any later version.
|
|
//
|
|
// BOINC is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
// See the GNU Lesser General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
#ifndef BOINC_SCHED_TYPES_H
|
|
#define BOINC_SCHED_TYPES_H
|
|
|
|
#include <cstdio>
|
|
#include <vector>
|
|
|
|
#include "boinc_db.h"
|
|
#include "common_defs.h"
|
|
#include "md5_file.h"
|
|
#include "coproc.h"
|
|
#include "keyword.h"
|
|
|
|
#include "edf_sim.h"
|
|
|
|
// for projects that support work filtering by app,
|
|
// this records an app for which the user will accept work
|
|
//
|
|
struct APP_INFO {
|
|
int appid;
|
|
int work_available;
|
|
};
|
|
|
|
// represents a resource (disk etc.) that the client may not have enough of
|
|
//
|
|
struct RESOURCE {
|
|
bool insufficient;
|
|
double needed; // the min extra amount needed
|
|
|
|
inline void set_insufficient(double x) {
|
|
insufficient = true;
|
|
if (needed) {
|
|
if (x < needed) needed = x;
|
|
} else {
|
|
needed = x;
|
|
}
|
|
}
|
|
};
|
|
|
|
// a message for the volunteer
|
|
//
|
|
struct USER_MESSAGE {
|
|
std::string message;
|
|
std::string priority;
|
|
USER_MESSAGE(const char* m, const char*p);
|
|
};
|
|
|
|
struct HOST_USAGE {
|
|
int proc_type;
|
|
double gpu_usage;
|
|
double gpu_ram;
|
|
double avg_ncpus;
|
|
double mem_usage;
|
|
// mem usage if specified by the plan class
|
|
// (overrides wu.rsc_memory_bound)
|
|
double projected_flops;
|
|
// the scheduler's best estimate of wu.rsc_fpops_est/elapsed_time.
|
|
// Taken from host_app_version elapsed time statistics if available,
|
|
// else on estimate provided by app_plan()
|
|
double peak_flops;
|
|
// stored in result.flops_estimate, and used for credit calculations
|
|
char cmdline[256];
|
|
char custom_coproc_type[256];
|
|
// if we're using a custom GPU type, its name
|
|
// TODO: get rid of PROC_TYPE_*, and this
|
|
|
|
HOST_USAGE() {
|
|
proc_type = PROC_TYPE_CPU;
|
|
gpu_usage = 0;
|
|
gpu_ram = 0;
|
|
avg_ncpus = 1;
|
|
mem_usage = 0;
|
|
projected_flops = 0;
|
|
peak_flops = 0;
|
|
strcpy(cmdline, "");
|
|
strcpy(custom_coproc_type, "");
|
|
}
|
|
void sequential_app(double flops) {
|
|
proc_type = PROC_TYPE_CPU;
|
|
gpu_usage = 0;
|
|
gpu_ram = 0;
|
|
avg_ncpus = 1;
|
|
mem_usage = 0;
|
|
if (flops <= 0) flops = 1e9;
|
|
projected_flops = flops;
|
|
peak_flops = flops;
|
|
strcpy(cmdline, "");
|
|
}
|
|
inline bool is_sequential_app() {
|
|
if (proc_type != PROC_TYPE_CPU) return false;
|
|
if (avg_ncpus != 1) return false;
|
|
return true;
|
|
}
|
|
inline int resource_type() {
|
|
switch (proc_type) {
|
|
case PROC_TYPE_NVIDIA_GPU: return ANON_PLATFORM_NVIDIA;
|
|
case PROC_TYPE_AMD_GPU: return ANON_PLATFORM_ATI;
|
|
case PROC_TYPE_INTEL_GPU: return ANON_PLATFORM_INTEL;
|
|
default: return ANON_PLATFORM_CPU;
|
|
}
|
|
}
|
|
inline bool uses_gpu() {
|
|
return (proc_type != PROC_TYPE_CPU);
|
|
}
|
|
};
|
|
|
|
// a description of a sticky file on host, or a job input file
|
|
//
|
|
struct FILE_INFO {
|
|
char name[256];
|
|
double nbytes;
|
|
int status;
|
|
bool sticky;
|
|
|
|
int parse(XML_PARSER&);
|
|
};
|
|
|
|
struct MSG_FROM_HOST_DESC {
|
|
char variety[256];
|
|
std::string msg_text;
|
|
int parse(XML_PARSER&);
|
|
};
|
|
|
|
// an app version from an anonymous-platform client
|
|
// (starting with 6.11, ALL clients send these)
|
|
//
|
|
struct CLIENT_APP_VERSION {
|
|
char app_name[256];
|
|
char platform[256];
|
|
int version_num;
|
|
char plan_class[256];
|
|
HOST_USAGE host_usage;
|
|
double rsc_fpops_scale;
|
|
// multiply wu.rsc_fpops_est and rsc_fpops_limit
|
|
// by this amount when send to client,
|
|
// to reflect the discrepancy between how fast the client
|
|
// thinks the app is versus how fast we think it is
|
|
APP* app;
|
|
// if NULL, this record is a place-holder,
|
|
// used to preserve array indices
|
|
|
|
int parse(XML_PARSER&);
|
|
};
|
|
|
|
// keep track of the best app_version for each app for this host
|
|
//
|
|
struct BEST_APP_VERSION {
|
|
DB_ID_TYPE appid;
|
|
bool for_64b_jobs;
|
|
// maintain this separately for jobs that need > 2GB RAM,
|
|
// in which case we can't use 32-bit apps
|
|
|
|
bool present;
|
|
// false means there's no usable version for this app
|
|
|
|
CLIENT_APP_VERSION* cavp;
|
|
// populated if anonymous platform
|
|
|
|
APP_VERSION* avp;
|
|
// populated otherwise
|
|
|
|
HOST_USAGE host_usage;
|
|
// populated in either case
|
|
|
|
bool reliable;
|
|
bool trusted;
|
|
|
|
DB_HOST_APP_VERSION* host_app_version();
|
|
// get the HOST_APP_VERSION, if any
|
|
|
|
BEST_APP_VERSION() {
|
|
appid = 0;
|
|
for_64b_jobs = false;
|
|
present = false;
|
|
cavp = NULL;
|
|
avp = NULL;
|
|
reliable = false;
|
|
trusted = false;
|
|
}
|
|
};
|
|
|
|
struct SCHED_DB_RESULT : DB_RESULT {
|
|
// the following used by the scheduler, but not stored in the DB
|
|
//
|
|
char wu_name[256];
|
|
int units; // used for granting credit by # of units processed
|
|
int parse_from_client(XML_PARSER&);
|
|
char platform_name[256];
|
|
BEST_APP_VERSION bav;
|
|
|
|
int write_to_client(FILE*);
|
|
};
|
|
|
|
// subset of global prefs used by scheduler
|
|
//
|
|
struct GLOBAL_PREFS {
|
|
double mod_time;
|
|
double disk_max_used_gb;
|
|
double disk_max_used_pct;
|
|
double disk_min_free_gb;
|
|
double work_buf_min_days;
|
|
double ram_max_used_busy_frac;
|
|
double ram_max_used_idle_frac;
|
|
double max_ncpus_pct;
|
|
|
|
void parse(const char* buf, const char* venue);
|
|
void defaults();
|
|
inline double work_buf_min() {return work_buf_min_days*86400;}
|
|
};
|
|
|
|
struct GUI_URLS {
|
|
char* text;
|
|
void init();
|
|
void get_gui_urls(USER& user, HOST& host, TEAM& team, char*, int len);
|
|
};
|
|
|
|
struct PROJECT_FILES {
|
|
char* text;
|
|
void init();
|
|
};
|
|
|
|
// Represents a result from this project that the client has.
|
|
// The request message has a list of these.
|
|
// The reply message may include a list of those to be aborted
|
|
// or aborted if not started
|
|
//
|
|
struct OTHER_RESULT {
|
|
char name[256];
|
|
int app_version; // index into CLIENT_APP_VERSION array
|
|
char plan_class[64];
|
|
bool have_plan_class;
|
|
bool abort;
|
|
bool abort_if_not_started;
|
|
int reason; // see codes below
|
|
|
|
int parse(XML_PARSER&);
|
|
};
|
|
|
|
#define ABORT_REASON_NOT_FOUND 1
|
|
#define ABORT_REASON_WU_CANCELLED 2
|
|
#define ABORT_REASON_ASSIMILATED 3
|
|
#define ABORT_REASON_TIMED_OUT 4
|
|
|
|
struct CLIENT_PLATFORM {
|
|
char name[256];
|
|
int parse(XML_PARSER&);
|
|
};
|
|
|
|
struct PLATFORM_LIST {
|
|
std::vector<PLATFORM*> list;
|
|
};
|
|
|
|
struct SCHEDULER_REQUEST {
|
|
char authenticator[256];
|
|
CLIENT_PLATFORM platform;
|
|
std::vector<CLIENT_PLATFORM> alt_platforms;
|
|
PLATFORM_LIST platforms;
|
|
char cross_project_id[256];
|
|
DB_ID_TYPE hostid; // zero if first RPC
|
|
int core_client_major_version;
|
|
int core_client_minor_version;
|
|
int core_client_release;
|
|
int core_client_version; // 10000*major + 100*minor + release
|
|
int rpc_seqno;
|
|
double work_req_seconds;
|
|
// in "normalized CPU seconds" (see work_req.php)
|
|
double cpu_req_secs;
|
|
double cpu_req_instances;
|
|
double resource_share_fraction;
|
|
// this project's fraction of total resource share
|
|
double rrs_fraction;
|
|
// ... of runnable resource share
|
|
double prrs_fraction;
|
|
// ... of potentially runnable resource share
|
|
double cpu_estimated_delay;
|
|
// currently queued jobs saturate the CPU for this long;
|
|
// used for crude deadline check
|
|
double duration_correction_factor;
|
|
double uptime;
|
|
double previous_uptime;
|
|
char global_prefs_xml[BLOB_SIZE];
|
|
char working_global_prefs_xml[BLOB_SIZE];
|
|
char code_sign_key[4096];
|
|
bool dont_send_work;
|
|
char client_brand[256];
|
|
// as specified in client_brand.txt config file on client
|
|
|
|
std::vector<CLIENT_APP_VERSION> client_app_versions;
|
|
|
|
GLOBAL_PREFS global_prefs;
|
|
char global_prefs_source_email_hash[MD5_LEN];
|
|
|
|
HOST host; // request message is parsed into here.
|
|
// does NOT contain the full host record.
|
|
COPROCS coprocs;
|
|
std::vector<SCHED_DB_RESULT> results;
|
|
// completed results being reported
|
|
bool results_truncated;
|
|
// set if (to limit memory usage) we capped this size of "results"
|
|
// In this case, don't resend lost results
|
|
// since we don't know what was lost.
|
|
std::vector<RESULT> file_xfer_results;
|
|
std::vector<MSG_FROM_HOST_DESC> msgs_from_host;
|
|
std::vector<FILE_INFO> file_infos;
|
|
// sticky files reported by host
|
|
|
|
// temps used by locality scheduling:
|
|
std::vector<FILE_INFO> file_delete_candidates;
|
|
// deletion candidates
|
|
std::vector<FILE_INFO> files_not_needed;
|
|
// files no longer needed
|
|
|
|
std::vector<OTHER_RESULT> other_results;
|
|
// in-progress results from this project
|
|
std::vector<IP_RESULT> ip_results;
|
|
// in-progress results from all projects
|
|
bool have_other_results_list;
|
|
bool have_ip_results_list;
|
|
bool have_time_stats_log;
|
|
bool client_cap_plan_class;
|
|
int sandbox;
|
|
// whether client uses account-based sandbox. -1 = don't know
|
|
int allow_multiple_clients;
|
|
// whether client allows multiple clients per host, -1 don't know
|
|
bool using_weak_auth;
|
|
// Request uses weak authenticator.
|
|
// Don't modify user prefs or CPID
|
|
int last_rpc_dayofyear;
|
|
int current_rpc_dayofyear;
|
|
USER_KEYWORDS user_keywords;
|
|
|
|
SCHEDULER_REQUEST();
|
|
~SCHEDULER_REQUEST(){};
|
|
void clear();
|
|
const char* parse(XML_PARSER&);
|
|
int write(FILE*); // write request info to file: not complete
|
|
};
|
|
|
|
// keep track of bottleneck disk preference
|
|
//
|
|
struct DISK_LIMITS {
|
|
double max_used;
|
|
double max_frac;
|
|
double min_free;
|
|
};
|
|
|
|
// parsed version of project prefs that relate to scheduling
|
|
//
|
|
struct PROJECT_PREFS {
|
|
std::vector<APP_INFO> selected_apps;
|
|
bool dont_use_proc_type[NPROC_TYPES];
|
|
bool allow_non_preferred_apps;
|
|
bool allow_beta_work;
|
|
int max_jobs_in_progress;
|
|
int max_cpus;
|
|
|
|
void parse();
|
|
|
|
PROJECT_PREFS() {
|
|
memset(&dont_use_proc_type, 0, sizeof(dont_use_proc_type));
|
|
allow_non_preferred_apps = false;
|
|
allow_beta_work = false;
|
|
max_jobs_in_progress = 0;
|
|
max_cpus = 0;
|
|
}
|
|
};
|
|
|
|
// summary of a client's request for work, and our response to it
|
|
// Note: this is zeroed out in SCHEDULER_REPLY constructor,
|
|
// so don't put any vectors here
|
|
//
|
|
struct WORK_REQ_BASE {
|
|
bool anonymous_platform;
|
|
|
|
// the following defined if anonymous platform
|
|
//
|
|
bool client_has_apps_for_proc_type[NPROC_TYPES];
|
|
|
|
// Flags used by old-style scheduling,
|
|
// while making multiple passes through the work array
|
|
//
|
|
bool infeasible_only;
|
|
bool reliable_only;
|
|
bool user_apps_only;
|
|
bool beta_only;
|
|
bool locality_sched_lite;
|
|
// for LSL apps, send only jobs where client has > 0 files
|
|
|
|
bool resend_lost_results;
|
|
// this is set if the request is reporting a result
|
|
// that was previously reported.
|
|
// This is evidence that the earlier reply was not received
|
|
// by the client. It may have contained results,
|
|
// so check and resend just in case.
|
|
|
|
bool has_reliable_version;
|
|
// whether the host has a reliable app version
|
|
|
|
int effective_ncpus;
|
|
// # of usable CPUs on host, taking prefs into account
|
|
int effective_ngpus;
|
|
|
|
// 6.7+ clients send separate requests for different resource types:
|
|
//
|
|
double req_secs[NPROC_TYPES];
|
|
// instance-seconds requested
|
|
double req_instances[NPROC_TYPES];
|
|
// number of idle instances, use if possible
|
|
inline void clear_req(int proc_type) {
|
|
req_secs[proc_type] = 0;
|
|
req_instances[proc_type] = 0;
|
|
}
|
|
|
|
// older clients send send a single number, the requested duration of jobs
|
|
//
|
|
double seconds_to_fill;
|
|
|
|
// true if new-type request, which has resource-specific requests
|
|
//
|
|
bool rsc_spec_request;
|
|
|
|
inline bool need_proc_type(int t) {
|
|
if (rsc_spec_request) {
|
|
return (req_secs[t]>0) || (req_instances[t]>0);
|
|
}
|
|
return seconds_to_fill > 0;
|
|
}
|
|
|
|
double disk_available;
|
|
double ram, usable_ram;
|
|
double cpu_available_frac;
|
|
double gpu_available_frac;
|
|
int njobs_sent;
|
|
|
|
// The following keep track of the "easiest" job that was rejected
|
|
// by EDF simulation.
|
|
// Any jobs harder than this can be rejected without doing the simulation.
|
|
//
|
|
double edf_reject_min_cpu;
|
|
int edf_reject_max_delay_bound;
|
|
bool have_edf_reject;
|
|
void edf_reject(double cpu, int delay_bound) {
|
|
if (have_edf_reject) {
|
|
if (cpu < edf_reject_min_cpu) edf_reject_min_cpu = cpu;
|
|
if (delay_bound> edf_reject_max_delay_bound) edf_reject_max_delay_bound = delay_bound;
|
|
} else {
|
|
edf_reject_min_cpu = cpu;
|
|
edf_reject_max_delay_bound = delay_bound;
|
|
have_edf_reject = true;
|
|
}
|
|
}
|
|
bool edf_reject_test(double cpu, int delay_bound) {
|
|
if (!have_edf_reject) return false;
|
|
if (cpu < edf_reject_min_cpu) return false;
|
|
if (delay_bound > edf_reject_max_delay_bound) return false;
|
|
return true;
|
|
}
|
|
|
|
RESOURCE disk;
|
|
RESOURCE mem;
|
|
RESOURCE speed;
|
|
RESOURCE bandwidth;
|
|
|
|
// various reasons for not sending jobs (used to explain why)
|
|
//
|
|
bool no_allowed_apps_available;
|
|
bool hr_reject_temp;
|
|
bool hr_reject_perm;
|
|
bool outdated_client;
|
|
bool max_jobs_on_host_exceeded;
|
|
bool max_jobs_on_host_proc_type_exceeded[NPROC_TYPES];
|
|
bool no_jobs_available; // project has no work right now
|
|
int max_jobs_per_rpc;
|
|
|
|
bool max_jobs_exceeded() {
|
|
if (max_jobs_on_host_exceeded) return true;
|
|
for (int i=0; i<NPROC_TYPES; i++) {
|
|
if (max_jobs_on_host_proc_type_exceeded[i]) return true;
|
|
}
|
|
return false;
|
|
}
|
|
void clear() {
|
|
memset(this, 0, sizeof(WORK_REQ_BASE));
|
|
}
|
|
|
|
};
|
|
|
|
struct WORK_REQ : public WORK_REQ_BASE {
|
|
PROJECT_PREFS project_prefs;
|
|
std::vector<USER_MESSAGE> no_work_messages;
|
|
std::vector<BEST_APP_VERSION*> best_app_versions;
|
|
std::vector<DB_HOST_APP_VERSION> host_app_versions;
|
|
std::vector<DB_HOST_APP_VERSION> host_app_versions_orig;
|
|
|
|
void get_job_limits();
|
|
void add_no_work_message(const char*);
|
|
|
|
~WORK_REQ() {}
|
|
};
|
|
|
|
// NOTE: if any field requires initialization,
|
|
// you must do it in the constructor. Nothing is zeroed by default.
|
|
//
|
|
struct SCHEDULER_REPLY {
|
|
WORK_REQ wreq;
|
|
DISK_LIMITS disk_limits;
|
|
double request_delay; // don't request again until this time elapses
|
|
std::vector<USER_MESSAGE> messages;
|
|
DB_ID_TYPE hostid;
|
|
// nonzero only if a new host record was created.
|
|
// this tells client to reset rpc_seqno
|
|
int lockfile_fd; // file descriptor of lockfile, or -1 if no lock.
|
|
bool send_global_prefs;
|
|
bool nucleus_only; // send only message
|
|
USER user;
|
|
char email_hash[MD5_LEN];
|
|
HOST host; // after validation, contains full host rec
|
|
TEAM team;
|
|
std::vector<APP> apps;
|
|
std::vector<APP_VERSION> app_versions;
|
|
std::vector<WORKUNIT>wus;
|
|
std::vector<SCHED_DB_RESULT>results;
|
|
std::vector<std::string>result_acks;
|
|
std::vector<std::string>result_aborts;
|
|
std::vector<std::string>result_abort_if_not_starteds;
|
|
std::vector<MSG_TO_HOST>msgs_to_host;
|
|
std::vector<FILE_INFO>file_deletes;
|
|
std::vector<std::string> file_transfer_requests;
|
|
char code_sign_key[4096];
|
|
char code_sign_key_signature[4096];
|
|
bool send_msg_ack;
|
|
bool project_is_down;
|
|
std::vector<APP_VERSION>old_app_versions;
|
|
// superceded app versions that we consider using because of
|
|
// homogeneous app version.
|
|
|
|
SCHEDULER_REPLY();
|
|
~SCHEDULER_REPLY(){};
|
|
int write(FILE*, SCHEDULER_REQUEST&);
|
|
void insert_app_unique(APP&);
|
|
void insert_app_version_unique(APP_VERSION&);
|
|
void insert_workunit_unique(WORKUNIT&);
|
|
void insert_result(SCHED_DB_RESULT&);
|
|
void insert_message(const char* msg, const char* prio);
|
|
void insert_message(USER_MESSAGE&);
|
|
void set_delay(double);
|
|
};
|
|
|
|
extern SCHEDULER_REQUEST* g_request;
|
|
extern SCHEDULER_REPLY* g_reply;
|
|
extern WORK_REQ* g_wreq;
|
|
extern double capped_host_fpops();
|
|
|
|
static inline void add_no_work_message(const char* m) {
|
|
g_wreq->add_no_work_message(m);
|
|
}
|
|
|
|
extern void get_weak_auth(USER&, char*);
|
|
extern void get_rss_auth(USER&, char*);
|
|
extern void read_host_app_versions();
|
|
extern DB_HOST_APP_VERSION* get_host_app_version(DB_ID_TYPE gavid);
|
|
extern void write_host_app_versions();
|
|
|
|
extern DB_HOST_APP_VERSION* gavid_to_havp(DB_ID_TYPE gavid);
|
|
extern DB_HOST_APP_VERSION* quota_exceeded_version();
|
|
|
|
inline bool is_64b_platform(const char* name) {
|
|
return (strstr(name, "64") != NULL);
|
|
}
|
|
|
|
extern double available_frac(BEST_APP_VERSION&);
|
|
|
|
#endif
|