boinc/client/coproc_sched.cpp

474 lines
14 KiB
C++

// This file is part of BOINC.
// http://boinc.berkeley.edu
// Copyright (C) 2014 University of California
//
// BOINC is free software; you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License
// as published by the Free Software Foundation,
// either version 3 of the License, or (at your option) any later version.
//
// BOINC is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
#include <vector>
#include "client_msgs.h"
#include "client_state.h"
#include "client_types.h"
#include "coproc.h"
#include "result.h"
#include "coproc_sched.h"
using std::vector;
#if 0
#define COPROC_DEBUG(x) x
#else
#define COPROC_DEBUG(X)
#endif
////////// Coprocessor scheduling ////////////////
//
// theory of operation:
//
// Jobs can use one or more integral instances, or a fractional instance
//
// RESULT::coproc_indices
// for a running job, the coprocessor instances it's using
// COPROC::pending_usage[]: for each instance, its usage by running jobs
// Note: "running" includes jobs suspended due to CPU throttling.
// That's the only kind of suspended GPU job.
// CORPOC::usage[]: for each instance, its usage
//
// enforce_run_list() calls assign_coprocs(),
// which assigns coproc instances to scheduled jobs,
// and prunes jobs for which we can't make an assignment
// (the job list is in order of decreasing priority)
//
// assign_coprocs():
// clear usage and pending_usage of all instances
// for each running/suspended job J
// increment pending_usage for the instances assigned to J
// for each scheduled job J
// if J is running
// if J's assignment fits
// confirm assignment: dec pending_usage, inc usage
// else
// prune J
// else
// if J.usage is fractional
// look for an instance that's already fractionally assigned
// if that fails, look for a free instance
// if that fails, prune J
// else
// if there are enough instances with usage=0
// assign instances with pending_usage = usage = 0
// (avoid preempting running jobs)
// if need more, assign instances with usage = 0
// else
// prune J
// can the given task use this GPU instance? Enforce
// - GPU exclusions
// - OpenCL availability (relevant if use_all_gpus set)
//
static inline bool can_use_gpu(RESULT* rp, COPROC* cp, int i) {
if (gpu_excluded(rp->app, *cp, i)) {
COPROC_DEBUG(msg_printf(rp->project, MSG_INFO, "GPU %d is excluded for %s", i, rp->name));
return false;
}
if (rp->avp->is_opencl()) {
if (!cp->instance_has_opencl[i]) {
COPROC_DEBUG(msg_printf(rp->project, MSG_INFO, "GPU %d can't do OpenCL for %s", i, rp->name));
return false;
}
}
return true;
}
static inline void increment_pending_usage(
RESULT* rp, double usage, COPROC* cp
) {
double x = (usage<1)?usage:1;
for (int i=0; i<usage; i++) {
int j = rp->coproc_indices[i];
cp->pending_usage[j] += x;
if (log_flags.coproc_debug) {
msg_printf(rp->project, MSG_INFO,
"[coproc] %s instance %d; %f pending for %s", cp->type, i, x, rp->name
);
if (cp->pending_usage[j] > 1) {
msg_printf(rp->project, MSG_INFO,
"[coproc] huh? %s %d %s pending usage > 1",
cp->type, i, rp->name
);
}
}
}
}
// check the GPU assignment for a currently-running app.
// Note: don't check available RAM.
// It may not be known (e.g. NVIDIA) and in any case,
// if the app is still running, it has enough RAM
//
static inline bool current_assignment_ok(
RESULT* rp, double usage, COPROC* cp, bool& defer_sched
) {
defer_sched = false;
double x = (usage<1)?usage:1;
for (int i=0; i<usage; i++) {
int j = rp->coproc_indices[i];
if (cp->usage[j] + x > 1) {
if (log_flags.coproc_debug) {
msg_printf(rp->project, MSG_INFO,
"[coproc] %s %f instance of device %d already assigned to task %s",
cp->type, x, j, rp->name
);
}
return false;
}
}
return true;
}
static inline void confirm_current_assignment(
RESULT* rp, double usage, COPROC* cp
) {
double x = (usage<1)?usage:1;
for (int i=0; i<usage; i++) {
int j = rp->coproc_indices[i];
cp->usage[j] +=x;
cp->pending_usage[j] -=x;
if (log_flags.coproc_debug) {
msg_printf(rp->project, MSG_INFO,
"[coproc] %s instance %d: confirming %f instance for %s",
cp->type, j, x, rp->name
);
}
#if DEFER_ON_GPU_AVAIL_RAM
cp->available_ram_temp[j] -= rp->avp->gpu_ram;
#endif
}
}
static inline bool get_fractional_assignment(
RESULT* rp, double usage, COPROC* cp, bool& defer_sched
) {
int i;
defer_sched = false;
// try to assign an instance that's already fractionally assigned
//
for (i=0; i<cp->count; i++) {
if (!can_use_gpu(rp, cp, i)) {
continue;
}
if ((cp->usage[i] || cp->pending_usage[i])
&& (cp->usage[i] + cp->pending_usage[i] + usage <= 1)
) {
#if DEFER_ON_GPU_AVAIL_RAM
if (rp->avp->gpu_ram > cp->available_ram_temp[i]) {
defer_sched = true;
continue;
}
cp->available_ram_temp[i] -= rp->avp->gpu_ram;
#endif
rp->coproc_indices[0] = i;
cp->usage[i] += usage;
if (log_flags.coproc_debug) {
msg_printf(rp->project, MSG_INFO,
"[coproc] Assigning %f of %s instance %d to %s",
usage, cp->type, i, rp->name
);
}
return true;
}
}
// failing that, assign an unreserved instance
//
for (i=0; i<cp->count; i++) {
if (!can_use_gpu(rp, cp, i)) {
continue;
}
if (!cp->usage[i]) {
#if DEFER_ON_GPU_AVAIL_RAM
if (rp->avp->gpu_ram > cp->available_ram_temp[i]) {
defer_sched = true;
continue;
}
cp->available_ram_temp[i] -= rp->avp->gpu_ram;
#endif
rp->coproc_indices[0] = i;
cp->usage[i] += usage;
if (log_flags.coproc_debug) {
msg_printf(rp->project, MSG_INFO,
"[coproc] Assigning %f of %s free instance %d to %s",
usage, cp->type, i, rp->name
);
}
return true;
}
}
if (log_flags.coproc_debug) {
msg_printf(rp->project, MSG_INFO,
"[coproc] Insufficient %s for %s: need %f",
cp->type, rp->name, usage
);
}
return false;
}
static inline bool get_integer_assignment(
RESULT* rp, double usage, COPROC* cp, bool& defer_sched
) {
int i;
defer_sched = false;
// make sure we have enough free instances
//
int nfree = 0;
for (i=0; i<cp->count; i++) {
if (!can_use_gpu(rp, cp, i)) {
continue;
}
if (!cp->usage[i]) {
#if DEFER_ON_GPU_AVAIL_RAM
if (rp->avp->gpu_ram > cp->available_ram_temp[i]) {
defer_sched = true;
if (log_flags.coproc_debug) {
msg_printf(rp->project, MSG_INFO,
"[coproc] task %s needs %.0fMB RAM, %s GPU %d has %.0fMB available",
rp->name, rp->avp->gpu_ram/MEGA, cp->type, i, cp->available_ram_temp[i]/MEGA
);
}
continue;
};
#endif
nfree++;
}
}
if (nfree < usage) {
if (log_flags.coproc_debug) {
msg_printf(rp->project, MSG_INFO,
"[coproc] Insufficient %s for %s; need %d, available %d",
cp->type, rp->name, (int)usage, nfree
);
if (defer_sched) {
msg_printf(rp->project, MSG_INFO,
"[coproc] some instances lack available memory"
);
}
}
return false;
}
int n = 0;
// assign non-pending instances first
for (i=0; i<cp->count; i++) {
if (!can_use_gpu(rp, cp, i)) {
continue;
}
if (!cp->usage[i]
&& !cp->pending_usage[i]
#if DEFER_ON_GPU_AVAIL_RAM
&& (rp->avp->gpu_ram <= cp->available_ram_temp[i])
#endif
) {
cp->usage[i] = 1;
#if DEFER_ON_GPU_AVAIL_RAM
cp->available_ram_temp[i] -= rp->avp->gpu_ram;
#endif
rp->coproc_indices[n++] = i;
if (log_flags.coproc_debug) {
msg_printf(rp->project, MSG_INFO,
"[coproc] Assigning %s instance %d to %s",
cp->type, i, rp->name
);
}
if (n == usage) return true;
}
}
// if needed, assign pending instances
for (i=0; i<cp->count; i++) {
if (!can_use_gpu(rp, cp, i)) {
continue;
}
if (!cp->usage[i]
#if DEFER_ON_GPU_AVAIL_RAM
&& (rp->avp->gpu_ram <= cp->available_ram_temp[i])
#endif
) {
cp->usage[i] = 1;
#if DEFER_ON_GPU_AVAIL_RAM
cp->available_ram_temp[i] -= rp->avp->gpu_ram;
#endif
rp->coproc_indices[n++] = i;
if (log_flags.coproc_debug) {
msg_printf(rp->project, MSG_INFO,
"[coproc] Assigning %s pending instance %d to %s",
cp->type, i, rp->name
);
}
if (n == usage) return true;
}
}
if (log_flags.coproc_debug) {
msg_printf(rp->project, MSG_INFO,
"[coproc] huh??? ran out of %s instances for %s",
cp->type, rp->name
);
}
return false;
}
static inline void mark_as_defer_sched(RESULT* rp) {
int i = rp->avp->gpu_usage.rsc_type;
if (i) {
rp->project->rsc_defer_sched[i] = true;
}
rp->schedule_backoff = gstate.now + 300; // try again in 5 minutes
gstate.request_schedule_cpus("insufficient GPU RAM");
}
#if DEFER_ON_GPU_AVAIL_RAM
static void copy_available_ram(COPROC& cp, const char* name) {
int rt = rsc_index(name);
if (rt > 0) {
for (int i=0; i<MAX_COPROC_INSTANCES; i++) {
coprocs.coprocs[rt].available_ram_temp[i] = cp.available_ram;
}
}
}
#endif
void assign_coprocs(vector<RESULT*>& jobs) {
unsigned int i;
COPROC* cp;
double usage;
coprocs.clear_usage();
#if DEFER_ON_GPU_AVAIL_RAM
if (coprocs.have_nvidia()) {
copy_available_ram(coprocs.nvidia, GPU_TYPE_NVIDIA);
}
if (coprocs.have_ati()) {
copy_available_ram(coprocs.ati, GPU_TYPE_ATI);
}
if (coprocs.have_intel()) {
copy_available_ram(coprocs.intel_gpu, GPU_TYPE_INTEL);
}
#endif
// fill in pending usage
//
for (i=0; i<jobs.size(); i++) {
RESULT* rp = jobs[i];
APP_VERSION* avp = rp->avp;
int rt = avp->gpu_usage.rsc_type;
if (rt) {
usage = avp->gpu_usage.usage;
cp = &coprocs.coprocs[rt];
} else {
continue;
}
ACTIVE_TASK* atp = gstate.lookup_active_task_by_result(rp);
if (!atp) continue;
if (atp->is_gpu_task_running()) {
increment_pending_usage(rp, usage, cp);
}
}
vector<RESULT*>::iterator job_iter;
job_iter = jobs.begin();
while (job_iter != jobs.end()) {
RESULT* rp = *job_iter;
APP_VERSION* avp = rp->avp;
int rt = avp->gpu_usage.rsc_type;
if (rt) {
usage = avp->gpu_usage.usage;
cp = &coprocs.coprocs[rt];
} else {
job_iter++;
continue;
}
ACTIVE_TASK* atp = gstate.lookup_active_task_by_result(rp);
bool defer_sched;
if (atp && atp->is_gpu_task_running()) {
if (current_assignment_ok(rp, usage, cp, defer_sched)) {
confirm_current_assignment(rp, usage, cp);
job_iter++;
} else {
if (defer_sched) {
mark_as_defer_sched(rp);
}
job_iter = jobs.erase(job_iter);
}
} else {
if (usage < 1) {
if (get_fractional_assignment(rp, usage, cp, defer_sched)) {
job_iter++;
} else {
if (defer_sched) {
mark_as_defer_sched(rp);
}
job_iter = jobs.erase(job_iter);
}
} else {
if (get_integer_assignment(rp, usage, cp, defer_sched)) {
job_iter++;
} else {
if (defer_sched) {
mark_as_defer_sched(rp);
}
job_iter = jobs.erase(job_iter);
}
}
}
}
#if 0
// enforce "don't use GPUs while active" pref in NVIDIA case;
// it applies only to GPUs running a graphics app
//
if (gstate.host_info.coprocs.nvidia.count && gstate.user_active && !gstate.global_prefs.run_gpu_if_user_active) {
job_iter = jobs.begin();
while (job_iter != jobs.end()) {
RESULT* rp = *job_iter;
if (!rp->avp->ncudas) {
job_iter++;
continue;
}
ACTIVE_TASK* atp = gstate.lookup_active_task_by_result(rp);
bool some_gpu_busy = false;
for (i=0; i<rp->avp->ncudas; i++) {
int dev = atp->coproc_indices[i];
if (gstate.host_info.coprocs.cuda.running_graphics_app[dev]) {
some_gpu_busy = true;
break;
}
}
if (some_gpu_busy) {
job_iter = jobs.erase(job_iter);
} else {
job_iter++;
}
}
}
#endif
}