boinc/sched/sched_types.h

591 lines
17 KiB
C++

// This file is part of BOINC.
// http://boinc.berkeley.edu
// Copyright (C) 2008 University of California
//
// BOINC is free software; you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License
// as published by the Free Software Foundation,
// either version 3 of the License, or (at your option) any later version.
//
// BOINC is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
#ifndef BOINC_SCHED_TYPES_H
#define BOINC_SCHED_TYPES_H
#include <cstdio>
#include <vector>
#include "boinc_db.h"
#include "common_defs.h"
#include "md5_file.h"
#include "coproc.h"
#include "keyword.h"
#include "edf_sim.h"
// for projects that support work filtering by app,
// this records an app for which the user will accept work
//
struct APP_INFO {
int appid;
int work_available;
};
// represents a resource (disk etc.) that the client may not have enough of
//
struct RESOURCE {
bool insufficient;
double needed; // the min extra amount needed
inline void set_insufficient(double x) {
insufficient = true;
if (needed) {
if (x < needed) needed = x;
} else {
needed = x;
}
}
};
// a message for the volunteer
//
struct USER_MESSAGE {
std::string message;
std::string priority;
USER_MESSAGE(const char* m, const char*p);
};
struct HOST_USAGE {
int proc_type;
double gpu_usage;
double gpu_ram;
double avg_ncpus;
double mem_usage;
// mem usage if specified by the plan class
// (overrides wu.rsc_memory_bound)
double projected_flops;
// the scheduler's best estimate of wu.rsc_fpops_est/elapsed_time.
// Taken from host_app_version elapsed time statistics if available,
// else on estimate provided by app_plan()
double peak_flops;
// stored in result.flops_estimate, and used for credit calculations
char cmdline[256];
char custom_coproc_type[256];
// if we're using a custom GPU type, its name
// TODO: get rid of PROC_TYPE_*, and this
HOST_USAGE() {
proc_type = PROC_TYPE_CPU;
gpu_usage = 0;
gpu_ram = 0;
avg_ncpus = 1;
mem_usage = 0;
projected_flops = 0;
peak_flops = 0;
strcpy(cmdline, "");
strcpy(custom_coproc_type, "");
}
void sequential_app(double flops) {
proc_type = PROC_TYPE_CPU;
gpu_usage = 0;
gpu_ram = 0;
avg_ncpus = 1;
mem_usage = 0;
if (flops <= 0) flops = 1e9;
projected_flops = flops;
peak_flops = flops;
strcpy(cmdline, "");
}
inline bool is_sequential_app() {
if (proc_type != PROC_TYPE_CPU) return false;
if (avg_ncpus != 1) return false;
return true;
}
inline int resource_type() {
switch (proc_type) {
case PROC_TYPE_NVIDIA_GPU: return ANON_PLATFORM_NVIDIA;
case PROC_TYPE_AMD_GPU: return ANON_PLATFORM_ATI;
case PROC_TYPE_INTEL_GPU: return ANON_PLATFORM_INTEL;
default: return ANON_PLATFORM_CPU;
}
}
inline bool uses_gpu() {
return (proc_type != PROC_TYPE_CPU);
}
};
// a description of a sticky file on host, or a job input file
//
struct FILE_INFO {
char name[256];
double nbytes;
int status;
bool sticky;
int parse(XML_PARSER&);
};
struct MSG_FROM_HOST_DESC {
char variety[256];
std::string msg_text;
int parse(XML_PARSER&);
};
// an app version from an anonymous-platform client
// (starting with 6.11, ALL clients send these)
//
struct CLIENT_APP_VERSION {
char app_name[256];
char platform[256];
int version_num;
char plan_class[256];
HOST_USAGE host_usage;
double rsc_fpops_scale;
// multiply wu.rsc_fpops_est and rsc_fpops_limit
// by this amount when send to client,
// to reflect the discrepancy between how fast the client
// thinks the app is versus how fast we think it is
APP* app;
// if NULL, this record is a place-holder,
// used to preserve array indices
int parse(XML_PARSER&);
};
// keep track of the best app_version for each app for this host
//
struct BEST_APP_VERSION {
DB_ID_TYPE appid;
bool for_64b_jobs;
// maintain this separately for jobs that need > 2GB RAM,
// in which case we can't use 32-bit apps
bool present;
// false means there's no usable version for this app
CLIENT_APP_VERSION* cavp;
// populated if anonymous platform
APP_VERSION* avp;
// populated otherwise
HOST_USAGE host_usage;
// populated in either case
bool reliable;
bool trusted;
DB_HOST_APP_VERSION* host_app_version();
// get the HOST_APP_VERSION, if any
BEST_APP_VERSION() {
appid = 0;
for_64b_jobs = false;
present = false;
cavp = NULL;
avp = NULL;
reliable = false;
trusted = false;
}
};
struct SCHED_DB_RESULT : DB_RESULT {
// the following used by the scheduler, but not stored in the DB
//
char wu_name[256];
int units; // used for granting credit by # of units processed
int parse_from_client(XML_PARSER&);
char platform_name[256];
BEST_APP_VERSION bav;
int write_to_client(FILE*);
};
// subset of global prefs used by scheduler
//
struct GLOBAL_PREFS {
double mod_time;
double disk_max_used_gb;
double disk_max_used_pct;
double disk_min_free_gb;
double work_buf_min_days;
double ram_max_used_busy_frac;
double ram_max_used_idle_frac;
double max_ncpus_pct;
void parse(const char* buf, const char* venue);
void defaults();
inline double work_buf_min() {return work_buf_min_days*86400;}
};
struct GUI_URLS {
char* text;
void init();
void get_gui_urls(USER& user, HOST& host, TEAM& team, char*, int len);
};
struct PROJECT_FILES {
char* text;
void init();
};
// Represents a result from this project that the client has.
// The request message has a list of these.
// The reply message may include a list of those to be aborted
// or aborted if not started
//
struct OTHER_RESULT {
char name[256];
int app_version; // index into CLIENT_APP_VERSION array
char plan_class[64];
bool have_plan_class;
bool abort;
bool abort_if_not_started;
int reason; // see codes below
int parse(XML_PARSER&);
};
#define ABORT_REASON_NOT_FOUND 1
#define ABORT_REASON_WU_CANCELLED 2
#define ABORT_REASON_ASSIMILATED 3
#define ABORT_REASON_TIMED_OUT 4
struct CLIENT_PLATFORM {
char name[256];
int parse(XML_PARSER&);
};
struct PLATFORM_LIST {
std::vector<PLATFORM*> list;
};
struct SCHEDULER_REQUEST {
char authenticator[256];
CLIENT_PLATFORM platform;
std::vector<CLIENT_PLATFORM> alt_platforms;
PLATFORM_LIST platforms;
char cross_project_id[256];
DB_ID_TYPE hostid; // zero if first RPC
int core_client_major_version;
int core_client_minor_version;
int core_client_release;
int core_client_version; // 10000*major + 100*minor + release
int rpc_seqno;
double work_req_seconds;
// in "normalized CPU seconds" (see work_req.php)
double cpu_req_secs;
double cpu_req_instances;
double resource_share_fraction;
// this project's fraction of total resource share
double rrs_fraction;
// ... of runnable resource share
double prrs_fraction;
// ... of potentially runnable resource share
double cpu_estimated_delay;
// currently queued jobs saturate the CPU for this long;
// used for crude deadline check
double duration_correction_factor;
double uptime;
double previous_uptime;
char global_prefs_xml[BLOB_SIZE];
char working_global_prefs_xml[BLOB_SIZE];
char code_sign_key[4096];
bool dont_send_work;
char client_brand[256];
// as specified in client_brand.txt config file on client
std::vector<CLIENT_APP_VERSION> client_app_versions;
GLOBAL_PREFS global_prefs;
char global_prefs_source_email_hash[MD5_LEN];
HOST host; // request message is parsed into here.
// does NOT contain the full host record.
COPROCS coprocs;
std::vector<SCHED_DB_RESULT> results;
// completed results being reported
bool results_truncated;
// set if (to limit memory usage) we capped this size of "results"
// In this case, don't resend lost results
// since we don't know what was lost.
std::vector<RESULT> file_xfer_results;
std::vector<MSG_FROM_HOST_DESC> msgs_from_host;
std::vector<FILE_INFO> file_infos;
// sticky files reported by host
// temps used by locality scheduling:
std::vector<FILE_INFO> file_delete_candidates;
// deletion candidates
std::vector<FILE_INFO> files_not_needed;
// files no longer needed
std::vector<OTHER_RESULT> other_results;
// in-progress results from this project
std::vector<IP_RESULT> ip_results;
// in-progress results from all projects
bool have_other_results_list;
bool have_ip_results_list;
bool have_time_stats_log;
bool client_cap_plan_class;
int sandbox;
// whether client uses account-based sandbox. -1 = don't know
int allow_multiple_clients;
// whether client allows multiple clients per host, -1 don't know
bool using_weak_auth;
// Request uses weak authenticator.
// Don't modify user prefs or CPID
int last_rpc_dayofyear;
int current_rpc_dayofyear;
USER_KEYWORDS user_keywords;
int user_id;
SCHEDULER_REQUEST();
~SCHEDULER_REQUEST(){};
void clear();
const char* parse(XML_PARSER&);
int write(FILE*); // write request info to file: not complete
};
// keep track of bottleneck disk preference
//
struct DISK_LIMITS {
double max_used;
double max_frac;
double min_free;
};
// parsed version of project prefs that relate to scheduling
//
struct PROJECT_PREFS {
std::vector<APP_INFO> selected_apps;
bool dont_use_proc_type[NPROC_TYPES];
bool allow_non_preferred_apps;
bool allow_beta_work;
int max_jobs_in_progress;
int max_cpus;
void parse();
PROJECT_PREFS() {
memset(&dont_use_proc_type, 0, sizeof(dont_use_proc_type));
allow_non_preferred_apps = false;
allow_beta_work = false;
max_jobs_in_progress = 0;
max_cpus = 0;
}
};
// summary of a client's request for work, and our response to it
// Note: this is zeroed out in SCHEDULER_REPLY constructor,
// so don't put any vectors here
//
struct WORK_REQ_BASE {
bool anonymous_platform;
// the following defined if anonymous platform
//
bool client_has_apps_for_proc_type[NPROC_TYPES];
// Flags used by old-style scheduling,
// while making multiple passes through the work array
//
bool infeasible_only;
bool reliable_only;
bool user_apps_only;
bool beta_only;
bool locality_sched_lite;
// for LSL apps, send only jobs where client has > 0 files
bool resend_lost_results;
// this is set if the request is reporting a result
// that was previously reported.
// This is evidence that the earlier reply was not received
// by the client. It may have contained results,
// so check and resend just in case.
bool has_reliable_version;
// whether the host has a reliable app version
int effective_ncpus;
// # of usable CPUs on host, taking prefs into account
int effective_ngpus;
// 6.7+ clients send separate requests for different resource types:
//
double req_secs[NPROC_TYPES];
// instance-seconds requested
double req_instances[NPROC_TYPES];
// number of idle instances, use if possible
inline void clear_req(int proc_type) {
req_secs[proc_type] = 0;
req_instances[proc_type] = 0;
}
// older clients send send a single number, the requested duration of jobs
//
double seconds_to_fill;
// true if new-type request, which has resource-specific requests
//
bool rsc_spec_request;
inline bool need_proc_type(int t) {
if (rsc_spec_request) {
return (req_secs[t]>0) || (req_instances[t]>0);
}
return seconds_to_fill > 0;
}
double disk_available;
double ram, usable_ram;
double cpu_available_frac;
double gpu_available_frac;
int njobs_sent;
// The following keep track of the "easiest" job that was rejected
// by EDF simulation.
// Any jobs harder than this can be rejected without doing the simulation.
//
double edf_reject_min_cpu;
int edf_reject_max_delay_bound;
bool have_edf_reject;
void edf_reject(double cpu, int delay_bound) {
if (have_edf_reject) {
if (cpu < edf_reject_min_cpu) edf_reject_min_cpu = cpu;
if (delay_bound> edf_reject_max_delay_bound) edf_reject_max_delay_bound = delay_bound;
} else {
edf_reject_min_cpu = cpu;
edf_reject_max_delay_bound = delay_bound;
have_edf_reject = true;
}
}
bool edf_reject_test(double cpu, int delay_bound) {
if (!have_edf_reject) return false;
if (cpu < edf_reject_min_cpu) return false;
if (delay_bound > edf_reject_max_delay_bound) return false;
return true;
}
RESOURCE disk;
RESOURCE mem;
RESOURCE speed;
RESOURCE bandwidth;
// various reasons for not sending jobs (used to explain why)
//
bool no_allowed_apps_available;
bool hr_reject_temp;
bool hr_reject_perm;
bool outdated_client;
bool max_jobs_on_host_exceeded;
bool max_jobs_on_host_proc_type_exceeded[NPROC_TYPES];
bool no_jobs_available; // project has no work right now
int max_jobs_per_rpc;
bool max_jobs_exceeded() {
if (max_jobs_on_host_exceeded) return true;
for (int i=0; i<NPROC_TYPES; i++) {
if (max_jobs_on_host_proc_type_exceeded[i]) return true;
}
return false;
}
void clear() {
memset(this, 0, sizeof(WORK_REQ_BASE));
}
};
struct WORK_REQ : public WORK_REQ_BASE {
PROJECT_PREFS project_prefs;
std::vector<USER_MESSAGE> no_work_messages;
std::vector<BEST_APP_VERSION*> best_app_versions;
std::vector<DB_HOST_APP_VERSION> host_app_versions;
std::vector<DB_HOST_APP_VERSION> host_app_versions_orig;
void get_job_limits();
void add_no_work_message(const char*);
~WORK_REQ() {}
};
// NOTE: if any field requires initialization,
// you must do it in the constructor. Nothing is zeroed by default.
//
struct SCHEDULER_REPLY {
WORK_REQ wreq;
DISK_LIMITS disk_limits;
double request_delay; // don't request again until this time elapses
std::vector<USER_MESSAGE> messages;
DB_ID_TYPE hostid;
// nonzero only if a new host record was created.
// this tells client to reset rpc_seqno
int lockfile_fd; // file descriptor of lockfile, or -1 if no lock.
bool send_global_prefs;
bool nucleus_only; // send only message
USER user;
char email_hash[MD5_LEN];
HOST host; // after validation, contains full host rec
TEAM team;
std::vector<APP> apps;
std::vector<APP_VERSION> app_versions;
std::vector<WORKUNIT>wus;
std::vector<SCHED_DB_RESULT>results;
std::vector<std::string>result_acks;
std::vector<std::string>result_aborts;
std::vector<std::string>result_abort_if_not_starteds;
std::vector<MSG_TO_HOST>msgs_to_host;
std::vector<FILE_INFO>file_deletes;
std::vector<std::string> file_transfer_requests;
char code_sign_key[4096];
char code_sign_key_signature[4096];
bool send_msg_ack;
bool project_is_down;
std::vector<APP_VERSION>old_app_versions;
// superceded app versions that we consider using because of
// homogeneous app version.
SCHEDULER_REPLY();
~SCHEDULER_REPLY(){};
int write(FILE*, SCHEDULER_REQUEST&);
void insert_app_unique(APP&);
void insert_app_version_unique(APP_VERSION&);
void insert_workunit_unique(WORKUNIT&);
void insert_result(SCHED_DB_RESULT&);
void insert_message(const char* msg, const char* prio);
void insert_message(USER_MESSAGE&);
void set_delay(double);
};
extern SCHEDULER_REQUEST* g_request;
extern SCHEDULER_REPLY* g_reply;
extern WORK_REQ* g_wreq;
extern double capped_host_fpops();
static inline void add_no_work_message(const char* m) {
g_wreq->add_no_work_message(m);
}
extern void get_weak_auth(USER&, char*);
extern void get_rss_auth(USER&, char*);
extern void read_host_app_versions();
extern DB_HOST_APP_VERSION* get_host_app_version(DB_ID_TYPE gavid);
extern void write_host_app_versions();
extern DB_HOST_APP_VERSION* gavid_to_havp(DB_ID_TYPE gavid);
extern DB_HOST_APP_VERSION* quota_exceeded_version();
inline bool is_64b_platform(const char* name) {
return (strstr(name, "64") != NULL);
}
extern double available_frac(BEST_APP_VERSION&);
#endif