mirror of https://github.com/BOINC/boinc.git
386 lines
12 KiB
C++
386 lines
12 KiB
C++
// This file is part of BOINC.
|
|
// http://boinc.berkeley.edu
|
|
// Copyright (C) 2008 University of California
|
|
//
|
|
// BOINC is free software; you can redistribute it and/or modify it
|
|
// under the terms of the GNU Lesser General Public License
|
|
// as published by the Free Software Foundation,
|
|
// either version 3 of the License, or (at your option) any later version.
|
|
//
|
|
// BOINC is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
// See the GNU Lesser General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
// Work fetch logic for CPU, GPU, and other processing resources.
|
|
// See http://boinc.berkeley.edu/trac/wiki/GpuWorkFetch
|
|
|
|
#ifndef BOINC_WORK_FETCH_H
|
|
#define BOINC_WORK_FETCH_H
|
|
|
|
#include <vector>
|
|
#include <deque>
|
|
|
|
#define RSC_TYPE_ANY -1
|
|
#define RSC_TYPE_CPU 0
|
|
|
|
// reasons for not fetching work from a project
|
|
//
|
|
typedef enum {
|
|
PROJECT_REASON_NONE = 0,
|
|
PROJECT_REASON_NON_CPU_INTENSIVE,
|
|
PROJECT_REASON_SUSPENDED_VIA_GUI,
|
|
PROJECT_REASON_MASTER_URL_FETCH_PENDING,
|
|
PROJECT_REASON_MIN_RPC_TIME,
|
|
PROJECT_REASON_DONT_REQUEST_MORE_WORK,
|
|
PROJECT_REASON_DOWNLOAD_STALLED,
|
|
PROJECT_REASON_RESULT_SUSPENDED,
|
|
PROJECT_REASON_TOO_MANY_UPLOADS,
|
|
PROJECT_REASON_NOT_HIGHEST_PRIORITY,
|
|
PROJECT_REASON_DONT_NEED,
|
|
PROJECT_REASON_TOO_MANY_RUNNABLE,
|
|
PROJECT_REASON_MAX_CONCURRENT,
|
|
} PROJECT_REASON;
|
|
|
|
// in case of DONT_NEED, per-resource reason
|
|
//
|
|
typedef enum {
|
|
RSC_REASON_NONE = 0,
|
|
RSC_REASON_GPUS_NOT_USABLE,
|
|
RSC_REASON_PREFS,
|
|
RSC_REASON_CONFIG,
|
|
RSC_REASON_NO_APPS,
|
|
RSC_REASON_AMS,
|
|
RSC_REASON_ZERO_SHARE,
|
|
RSC_REASON_BUFFER_FULL,
|
|
RSC_REASON_NOT_HIGHEST_PRIO,
|
|
RSC_REASON_BACKED_OFF,
|
|
RSC_REASON_DEFER_SCHED
|
|
} RSC_REASON;
|
|
|
|
struct PROJECT;
|
|
struct RESULT;
|
|
struct ACTIVE_TASK;
|
|
struct RSC_WORK_FETCH;
|
|
struct SCHEDULER_REPLY;
|
|
struct APP_VERSION;
|
|
|
|
typedef long long COPROC_INSTANCE_BITMAP;
|
|
// should be at least MAX_COPROC_INSTANCES (64) bits
|
|
|
|
// state per (resource, project) pair
|
|
//
|
|
struct RSC_PROJECT_WORK_FETCH {
|
|
// the following are persistent (saved in state file)
|
|
double backoff_time;
|
|
double backoff_interval;
|
|
|
|
// the following used by REC accounting
|
|
double secs_this_rec_interval;
|
|
|
|
double queue_est;
|
|
// an estimate of instance-secs of queued work;
|
|
bool anonymous_platform_no_apps;
|
|
// set if this project is anonymous platform
|
|
// and it has no app version that uses this resource
|
|
double fetchable_share;
|
|
// this project's share relative to projects from which
|
|
// we could probably get work for this resource;
|
|
// determines how many instances this project deserves
|
|
int n_runnable_jobs;
|
|
double sim_nused;
|
|
// # of instances used at this point in the simulation
|
|
// Used for GPU exclusion logic
|
|
double nused_total; // sum of instances over all runnable jobs
|
|
int ncoprocs_excluded;
|
|
// number of excluded instances
|
|
COPROC_INSTANCE_BITMAP non_excluded_instances;
|
|
// bitmap of non-excluded instances
|
|
// (i.e. instances this project's jobs can run on)
|
|
int deadlines_missed;
|
|
int deadlines_missed_copy;
|
|
// copy of the above used during schedule_cpus()
|
|
std::deque<RESULT*> pending;
|
|
// temp during RR_SIM::simulate(); jobs running or waiting to run
|
|
std::deque<RESULT*>::iterator pending_iter;
|
|
// temp during RR_SIM::pick_jobs_to_run()
|
|
bool has_deferred_job;
|
|
// This project has a coproc job of the given type for which
|
|
// the job is deferred because of a temporary_exit() call.
|
|
// Don't fetch more jobs of this type; they might have same problem
|
|
RSC_REASON rsc_project_reason;
|
|
// If zero, it's OK to ask this project for this type of work.
|
|
// If nonzero, the reason why it's not OK
|
|
|
|
// stuff for max concurrent logic
|
|
//
|
|
double max_nused;
|
|
// max # instances used so far in simulation.
|
|
double mc_shortfall;
|
|
// project's shortfall for this resources, given MC limits
|
|
|
|
RSC_PROJECT_WORK_FETCH() {
|
|
backoff_time = 0;
|
|
backoff_interval = 0;
|
|
secs_this_rec_interval = 0;
|
|
queue_est = 0;
|
|
anonymous_platform_no_apps = false;
|
|
fetchable_share = 0;
|
|
n_runnable_jobs = 0;
|
|
sim_nused = 0;
|
|
nused_total = 0;
|
|
ncoprocs_excluded = 0;
|
|
non_excluded_instances = 0;
|
|
deadlines_missed = 0;
|
|
deadlines_missed_copy = 0;
|
|
pending.clear();
|
|
has_deferred_job = false;
|
|
rsc_project_reason = RSC_REASON_NONE;
|
|
max_nused = 0.0;
|
|
mc_shortfall = 0.0;
|
|
}
|
|
|
|
inline void reset() {
|
|
backoff_time = 0;
|
|
backoff_interval = 0;
|
|
}
|
|
|
|
inline void reset_rec_accounting() {
|
|
secs_this_rec_interval = 0;
|
|
}
|
|
RSC_REASON compute_rsc_project_reason(PROJECT*, int rsc_type);
|
|
void resource_backoff(PROJECT*, const char*);
|
|
void rr_init(PROJECT*);
|
|
void clear_backoff() {
|
|
backoff_time = 0;
|
|
backoff_interval = 0;
|
|
}
|
|
};
|
|
|
|
// estimate the time a resource will be saturated
|
|
// with high-priority jobs.
|
|
//
|
|
struct BUSY_TIME_ESTIMATOR {
|
|
std::vector<double> busy_time;
|
|
int ninstances;
|
|
inline void reset() {
|
|
for (int i=0; i<ninstances; i++) {
|
|
busy_time[i] = 0;
|
|
}
|
|
}
|
|
inline void init(int n) {
|
|
ninstances = n;
|
|
busy_time.resize(n);
|
|
reset();
|
|
}
|
|
// called for each high-priority job.
|
|
// Find the least-busy instance, and put this job
|
|
// on that and following instances
|
|
//
|
|
inline void update(double dur, double nused) {
|
|
if (ninstances==0) return;
|
|
int i, j;
|
|
if (nused < 1) return;
|
|
double best = 0;
|
|
int ibest = 0;
|
|
for (i=0; i<ninstances; i++) {
|
|
if (!i || busy_time[i] < best) {
|
|
best = busy_time[i];
|
|
ibest = i;
|
|
}
|
|
}
|
|
int inused = (int) nused; // ignore fractional usage
|
|
for (i=0; i<inused; i++) {
|
|
j = (ibest + i) % ninstances;
|
|
busy_time[j] += dur;
|
|
}
|
|
}
|
|
|
|
// the overall busy time is the busy time of
|
|
// the least busy instance
|
|
//
|
|
inline double get_busy_time() {
|
|
double best = 0;
|
|
for (int i=0; i<ninstances; i++) {
|
|
if (!i || busy_time[i] < best) {
|
|
best = busy_time[i];
|
|
}
|
|
}
|
|
return best;
|
|
}
|
|
};
|
|
|
|
// per-resource state
|
|
//
|
|
struct RSC_WORK_FETCH {
|
|
int rsc_type;
|
|
int ninstances;
|
|
double relative_speed; // total FLOPS relative to CPU total FLOPS
|
|
bool has_exclusions;
|
|
|
|
// the following used/set by rr_simulation():
|
|
//
|
|
double shortfall;
|
|
// seconds of idle instances between now and now+work_buf_total()
|
|
double nidle_now;
|
|
// # idle instances now (at the beginning of RR sim)
|
|
double sim_nused;
|
|
// # instance used at this point in RR sim
|
|
COPROC_INSTANCE_BITMAP sim_used_instances;
|
|
// bitmap of instances used in simulation,
|
|
// taking into account GPU exclusions
|
|
COPROC_INSTANCE_BITMAP sim_excluded_instances;
|
|
// bitmap of instances not used (i.e. starved because of exclusion)
|
|
double total_fetchable_share;
|
|
// total RS of projects from which we could fetch jobs for this device
|
|
double saturated_time;
|
|
// estimated time until resource is not saturated
|
|
// used to calculate work request
|
|
double deadline_missed_instances;
|
|
// instance count for jobs that miss deadline
|
|
BUSY_TIME_ESTIMATOR busy_time_estimator;
|
|
RSC_REASON dont_fetch_reason;
|
|
#ifdef SIM
|
|
double estimated_delay;
|
|
#endif
|
|
// the following specify the work request for this resource
|
|
//
|
|
double req_secs;
|
|
double req_instances;
|
|
// REC accounting
|
|
double secs_this_rec_interval;
|
|
// temp in choose_project()
|
|
PROJECT* found_project; // a project able to ask for this work
|
|
|
|
void init(int t, int n, double sp) {
|
|
rsc_type = t;
|
|
ninstances = n;
|
|
relative_speed = sp;
|
|
busy_time_estimator.init(n);
|
|
}
|
|
void rr_init();
|
|
void update_stats(double sim_now, double dt, double buf_end);
|
|
void update_busy_time(double dur, double nused);
|
|
void supplement(PROJECT*);
|
|
RSC_PROJECT_WORK_FETCH& project_state(PROJECT*);
|
|
void print_state(const char*);
|
|
void clear_request();
|
|
void set_request(PROJECT*);
|
|
void copy_request(COPROC&);
|
|
void set_request_excluded(PROJECT*);
|
|
bool may_have_work(PROJECT*);
|
|
int cant_fetch(PROJECT*);
|
|
bool backed_off(PROJECT*);
|
|
bool uses_starved_excluded_instances(PROJECT*);
|
|
inline void reset_rec_accounting() {
|
|
this->secs_this_rec_interval = 0;
|
|
}
|
|
RSC_WORK_FETCH() {
|
|
rsc_type = 0;
|
|
ninstances = 0;
|
|
relative_speed = 0;
|
|
has_exclusions = false;
|
|
shortfall = 0;
|
|
nidle_now = 0;
|
|
sim_nused = 0;
|
|
sim_used_instances = 0;
|
|
sim_excluded_instances = 0;
|
|
total_fetchable_share = 0;
|
|
saturated_time = 0;
|
|
deadline_missed_instances = 0;
|
|
busy_time_estimator.init(0);
|
|
dont_fetch_reason = RSC_REASON_NONE;
|
|
#ifdef SIM
|
|
estimated_delay = 0.0;
|
|
#endif
|
|
req_secs = 0.0;
|
|
req_instances = 0.0;
|
|
secs_this_rec_interval = 0.0;
|
|
found_project = NULL;
|
|
}
|
|
};
|
|
|
|
|
|
// per project state
|
|
//
|
|
struct PROJECT_WORK_FETCH {
|
|
double rec;
|
|
// recent estimated credit
|
|
double rec_time;
|
|
// when it was last updated
|
|
double rec_temp;
|
|
// temporary copy used during schedule_cpus() and work fetch
|
|
double rec_temp_save;
|
|
// temporary used during RR simulation
|
|
PROJECT_REASON project_reason;
|
|
// if nonzero, reason which we can't fetch work from this project
|
|
int n_runnable_jobs;
|
|
// set by RR simulation
|
|
bool at_max_concurrent_limit;
|
|
bool request_if_idle_and_uploading;
|
|
// Set when a job finishes.
|
|
// If we're uploading but a resource is idle, make a work request.
|
|
// If this succeeds, clear the flag.
|
|
|
|
PROJECT_WORK_FETCH(int) {}
|
|
void clear() {
|
|
static const PROJECT_WORK_FETCH x(0);
|
|
*this = x;
|
|
}
|
|
PROJECT_WORK_FETCH() {
|
|
clear();
|
|
}
|
|
void reset(PROJECT*);
|
|
void rr_init(PROJECT*);
|
|
void print_state(PROJECT*);
|
|
};
|
|
|
|
// global work fetch state
|
|
//
|
|
struct WORK_FETCH {
|
|
std::vector<PROJECT*> projects_sorted;
|
|
|
|
// projects in decreasing priority order
|
|
void setup();
|
|
PROJECT* choose_project();
|
|
// Find a project to ask for work.
|
|
PROJECT* non_cpu_intensive_project_needing_work();
|
|
void piggyback_work_request(PROJECT*);
|
|
// we're going to contact this project anyway;
|
|
// piggyback a work request if appropriate.
|
|
void accumulate_inst_sec(ACTIVE_TASK*, double dt);
|
|
void write_request(FILE*, PROJECT*);
|
|
void handle_reply(
|
|
PROJECT*, SCHEDULER_REPLY*, std::vector<RESULT*>new_results
|
|
);
|
|
void set_initial_work_request(PROJECT*);
|
|
void set_all_requests(PROJECT*);
|
|
void set_all_requests_hyst(PROJECT*, int rsc_type);
|
|
void print_state();
|
|
void init();
|
|
void rr_init();
|
|
void clear_request();
|
|
void compute_shares();
|
|
void clear_backoffs(APP_VERSION&);
|
|
void request_string(char*, int);
|
|
bool requested_work();
|
|
void copy_requests();
|
|
};
|
|
|
|
extern RSC_WORK_FETCH rsc_work_fetch[MAX_RSC];
|
|
extern WORK_FETCH work_fetch;
|
|
|
|
extern void project_priority_init(bool for_work_fetch);
|
|
extern double project_priority(PROJECT*);
|
|
extern void adjust_rec_sched(RESULT*);
|
|
extern void adjust_rec_work_fetch(RESULT*);
|
|
|
|
extern double total_peak_flops();
|
|
extern const char* project_reason_string(PROJECT* p, char* buf, int len);
|
|
extern const char* rsc_reason_string(RSC_REASON);
|
|
|
|
#endif
|