2008-08-06 18:36:30 +00:00
|
|
|
// This file is part of BOINC.
|
2008-03-20 03:13:30 +00:00
|
|
|
// http://boinc.berkeley.edu
|
|
|
|
// Copyright (C) 2008 University of California
|
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is free software; you can redistribute it and/or modify it
|
|
|
|
// under the terms of the GNU Lesser General Public License
|
|
|
|
// as published by the Free Software Foundation,
|
|
|
|
// either version 3 of the License, or (at your option) any later version.
|
2008-03-20 03:13:30 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is distributed in the hope that it will be useful,
|
2008-03-20 03:13:30 +00:00
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU Lesser General Public License for more details.
|
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
2008-03-20 03:13:30 +00:00
|
|
|
//
|
|
|
|
|
2009-07-29 18:34:27 +00:00
|
|
|
// This file contains functions that can be customized to
|
|
|
|
// implement project-specific scheduling policies.
|
|
|
|
// The functions are:
|
|
|
|
//
|
|
|
|
// wu_is_infeasible_custom()
|
2009-09-17 21:34:42 +00:00
|
|
|
// Decide whether host can run a job using a particular app version.
|
|
|
|
// In addition it can:
|
2010-06-08 18:56:53 +00:00
|
|
|
// - set the app version's resource usage and/or FLOPS rate estimate
|
2009-09-17 21:34:42 +00:00
|
|
|
// (by assigning to bav.host_usage)
|
2010-06-08 18:56:53 +00:00
|
|
|
// - modify command-line args
|
|
|
|
// (by assigning to bav.host_usage.cmdline)
|
|
|
|
// - set the job's FLOPS count
|
|
|
|
// (by assigning to wu.rsc_fpops_est)
|
|
|
|
//
|
2009-07-29 18:34:27 +00:00
|
|
|
// app_plan()
|
|
|
|
// Decide whether host can use an app version,
|
|
|
|
// and if so what resources it will use
|
2010-06-08 18:56:53 +00:00
|
|
|
//
|
2009-07-29 18:34:27 +00:00
|
|
|
// app_plan_uses_gpu():
|
|
|
|
// Which plan classes use GPUs
|
2010-06-08 18:56:53 +00:00
|
|
|
//
|
2009-07-29 18:34:27 +00:00
|
|
|
// JOB::get_score():
|
|
|
|
// Determine the value of sending a particular job to host;
|
|
|
|
// (used only by "matchmaker" scheduling)
|
|
|
|
//
|
|
|
|
// WARNING: if you modify this file, you must prevent it from
|
2008-03-20 03:13:30 +00:00
|
|
|
// being overwritten the next time you update BOINC source code.
|
|
|
|
// You can either:
|
|
|
|
// 1) write-protect this file, or
|
|
|
|
// 2) put this in a differently-named file and change the Makefile.am
|
2008-12-10 19:14:13 +00:00
|
|
|
// (and write-protect that)
|
2008-03-20 03:13:30 +00:00
|
|
|
// In either case, put your version under source-code control, e.g. SVN
|
|
|
|
|
2011-07-20 22:27:01 +00:00
|
|
|
#include <string>
|
|
|
|
|
|
|
|
using std::string;
|
|
|
|
|
2008-12-19 21:27:02 +00:00
|
|
|
#include "str_util.h"
|
2010-10-27 22:58:16 +00:00
|
|
|
#include "util.h"
|
2009-07-29 18:34:27 +00:00
|
|
|
|
2008-05-06 19:53:49 +00:00
|
|
|
#include "sched_config.h"
|
2009-08-10 04:49:02 +00:00
|
|
|
#include "sched_main.h"
|
2008-03-27 18:25:29 +00:00
|
|
|
#include "sched_msgs.h"
|
2009-06-01 22:15:14 +00:00
|
|
|
#include "sched_send.h"
|
2009-07-29 18:34:27 +00:00
|
|
|
#include "sched_score.h"
|
|
|
|
#include "sched_shmem.h"
|
|
|
|
#include "sched_version.h"
|
2009-07-29 18:55:50 +00:00
|
|
|
#include "sched_customize.h"
|
2008-03-27 18:25:29 +00:00
|
|
|
|
2009-07-29 18:34:27 +00:00
|
|
|
bool wu_is_infeasible_custom(WORKUNIT& wu, APP& app, BEST_APP_VERSION& bav) {
|
2010-02-04 17:34:55 +00:00
|
|
|
#if 0
|
|
|
|
// example: if WU name contains "_v1", don't use CUDA app
|
|
|
|
// Note: this is slightly suboptimal.
|
|
|
|
// If the host is able to accept both GPU and CPU jobs,
|
|
|
|
// we'll skip this job rather than send it for the CPU.
|
|
|
|
// Fixing this would require a big architectural change.
|
|
|
|
//
|
|
|
|
if (strstr(wu.name, "_v1") && bav.host_usage.ncudas) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
#endif
|
2009-07-30 17:00:43 +00:00
|
|
|
#if 0
|
2009-07-29 18:34:27 +00:00
|
|
|
// example: for CUDA app, wu.batch is the minimum number of processors.
|
|
|
|
// Don't send if #procs is less than this.
|
|
|
|
//
|
|
|
|
if (!strcmp(app.name, "foobar") && bav.host_usage.ncudas) {
|
|
|
|
int n = g_request->coproc_cuda->prop.multiProcessorCount;
|
|
|
|
if (n < wu.batch) {
|
2009-08-21 20:38:39 +00:00
|
|
|
return true;
|
2009-07-29 18:34:27 +00:00
|
|
|
}
|
|
|
|
}
|
2009-09-17 21:34:42 +00:00
|
|
|
#endif
|
|
|
|
#if 0
|
2010-07-12 22:43:53 +00:00
|
|
|
// example: if CUDA app and WU name contains ".vlar", don't send
|
2009-09-17 21:34:42 +00:00
|
|
|
//
|
|
|
|
if (bav.host_usage.ncudas) {
|
2010-07-12 22:43:53 +00:00
|
|
|
if (strstr(wu.name, ".vlar")) {
|
|
|
|
return true;
|
2009-09-17 21:34:42 +00:00
|
|
|
}
|
|
|
|
}
|
2009-07-29 18:34:27 +00:00
|
|
|
#endif
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2010-04-10 05:49:51 +00:00
|
|
|
// Suppose we have a computation that uses two devices alternately.
|
|
|
|
// The devices have speeds s1 and s2.
|
|
|
|
// The fraction of work done on device 1 is frac.
|
|
|
|
//
|
|
|
|
// This function returns:
|
|
|
|
// 1) the overall speed
|
|
|
|
// 2) the utilization of device 1, which is always in (0, 1).
|
|
|
|
//
|
|
|
|
static inline void coproc_perf(
|
|
|
|
double s1, double s2, double frac,
|
|
|
|
double& speed, double& u1
|
|
|
|
) {
|
|
|
|
double y = (frac*s2 + (1-frac)*s1);
|
|
|
|
speed = s1*s2/y;
|
|
|
|
// do the math
|
|
|
|
u1 = frac*s2/y;
|
|
|
|
}
|
|
|
|
|
2010-05-13 03:07:33 +00:00
|
|
|
// the following is for an app that can use anywhere from 1 to 64 threads
|
2010-04-18 03:00:33 +00:00
|
|
|
//
|
2010-05-13 03:07:33 +00:00
|
|
|
static inline bool app_plan_mt(
|
2010-11-30 19:36:07 +00:00
|
|
|
SCHEDULER_REQUEST& sreq, HOST_USAGE& hu
|
2010-05-13 03:07:33 +00:00
|
|
|
) {
|
|
|
|
double ncpus = g_wreq->effective_ncpus;
|
|
|
|
// number of usable CPUs, taking user prefs into account
|
|
|
|
int nthreads = (int)ncpus;
|
|
|
|
if (nthreads > 64) nthreads = 64;
|
|
|
|
hu.avg_ncpus = nthreads;
|
|
|
|
hu.max_ncpus = nthreads;
|
|
|
|
sprintf(hu.cmdline, "--nthreads %d", nthreads);
|
|
|
|
hu.projected_flops = sreq.host.p_fpops*hu.avg_ncpus*.99;
|
|
|
|
// the .99 ensures that on uniprocessors a sequential app
|
|
|
|
// will be used in preferences to this
|
|
|
|
hu.peak_flops = sreq.host.p_fpops*hu.avg_ncpus;
|
|
|
|
if (config.debug_version_select) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
2010-11-30 19:36:07 +00:00
|
|
|
"[version] Multi-thread app projected %.2fGS\n",
|
|
|
|
hu.projected_flops/1e9
|
2010-05-13 03:07:33 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2010-06-16 22:07:19 +00:00
|
|
|
GPU_REQUIREMENTS ati_requirements;
|
|
|
|
|
|
|
|
static bool ati_check(COPROC_ATI& c, HOST_USAGE& hu,
|
|
|
|
int min_driver_version,
|
|
|
|
bool need_amd_libs,
|
|
|
|
double min_ram,
|
|
|
|
double ndevs, // # of GPUs used; can be fractional
|
|
|
|
double cpu_frac, // fraction of FLOPS performed by CPU
|
|
|
|
double flops_scale
|
2010-05-13 03:07:33 +00:00
|
|
|
) {
|
2010-06-16 22:07:19 +00:00
|
|
|
ati_requirements.update(min_driver_version, min_ram);
|
|
|
|
|
|
|
|
if (need_amd_libs) {
|
|
|
|
if (!c.amdrt_detected) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (!c.atirt_detected) {
|
|
|
|
return false;
|
2009-08-17 17:07:38 +00:00
|
|
|
}
|
2010-06-16 22:07:19 +00:00
|
|
|
}
|
|
|
|
if (c.version_num < min_driver_version) {
|
|
|
|
return false;
|
|
|
|
}
|
2011-10-06 02:56:30 +00:00
|
|
|
if (c.available_ram < min_ram) {
|
2010-05-13 03:07:33 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2010-06-16 22:07:19 +00:00
|
|
|
hu.gpu_ram = min_ram;
|
2010-09-30 21:52:13 +00:00
|
|
|
hu.natis = ndevs;
|
2010-05-13 03:07:33 +00:00
|
|
|
|
2010-06-16 22:07:19 +00:00
|
|
|
coproc_perf(
|
|
|
|
g_request->host.p_fpops,
|
2011-10-16 06:04:13 +00:00
|
|
|
flops_scale * hu.natis*c.peak_flops,
|
2010-06-16 22:07:19 +00:00
|
|
|
cpu_frac,
|
|
|
|
hu.projected_flops,
|
|
|
|
hu.avg_ncpus
|
|
|
|
);
|
2011-01-12 01:17:07 +00:00
|
|
|
hu.peak_flops = hu.natis*c.peak_flops + hu.avg_ncpus*g_request->host.p_fpops;
|
2010-06-16 22:07:19 +00:00
|
|
|
hu.max_ncpus = hu.avg_ncpus;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define ATI_MIN_RAM 250*MEGA
|
|
|
|
static inline bool app_plan_ati(
|
|
|
|
SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu
|
|
|
|
) {
|
|
|
|
COPROC_ATI& c = sreq.coprocs.ati;
|
|
|
|
if (!c.count) {
|
|
|
|
return false;
|
|
|
|
}
|
2010-05-13 03:07:33 +00:00
|
|
|
|
|
|
|
if (!strcmp(plan_class, "ati")) {
|
2010-06-16 22:07:19 +00:00
|
|
|
if (!ati_check(c, hu,
|
|
|
|
1000000,
|
|
|
|
true,
|
|
|
|
ATI_MIN_RAM,
|
2011-10-16 06:04:13 +00:00
|
|
|
1,
|
|
|
|
.01,
|
|
|
|
.20
|
2010-06-16 22:07:19 +00:00
|
|
|
)) {
|
2010-05-13 03:07:33 +00:00
|
|
|
return false;
|
2009-09-25 20:59:13 +00:00
|
|
|
}
|
2010-05-13 03:07:33 +00:00
|
|
|
}
|
2009-09-25 20:59:13 +00:00
|
|
|
|
2010-05-13 03:07:33 +00:00
|
|
|
if (!strcmp(plan_class, "ati13amd")) {
|
2010-06-16 22:07:19 +00:00
|
|
|
if (!ati_check(c, hu,
|
|
|
|
1003000,
|
|
|
|
true,
|
|
|
|
ATI_MIN_RAM,
|
|
|
|
1, .01,
|
2011-10-16 06:04:13 +00:00
|
|
|
.21
|
2010-06-16 22:07:19 +00:00
|
|
|
)) {
|
2010-05-13 03:07:33 +00:00
|
|
|
return false;
|
2009-09-25 20:59:13 +00:00
|
|
|
}
|
2010-05-13 03:07:33 +00:00
|
|
|
}
|
2009-09-25 20:59:13 +00:00
|
|
|
|
2010-05-13 03:07:33 +00:00
|
|
|
if (!strcmp(plan_class, "ati13ati")) {
|
2010-06-16 22:07:19 +00:00
|
|
|
if (!ati_check(c, hu,
|
|
|
|
1003186,
|
|
|
|
false,
|
|
|
|
ATI_MIN_RAM,
|
|
|
|
1, .01,
|
2011-10-16 06:04:13 +00:00
|
|
|
.22
|
2010-06-16 22:07:19 +00:00
|
|
|
)) {
|
2010-05-13 03:07:33 +00:00
|
|
|
return false;
|
2009-09-25 20:59:13 +00:00
|
|
|
}
|
2010-05-13 03:07:33 +00:00
|
|
|
}
|
2009-09-25 20:59:13 +00:00
|
|
|
|
2010-05-13 03:07:33 +00:00
|
|
|
if (!strcmp(plan_class, "ati14")) {
|
2010-06-16 22:07:19 +00:00
|
|
|
if (!ati_check(c, hu,
|
|
|
|
1004000,
|
|
|
|
false,
|
|
|
|
ATI_MIN_RAM,
|
|
|
|
1, .01,
|
2011-10-16 06:04:13 +00:00
|
|
|
.23
|
2010-06-16 22:07:19 +00:00
|
|
|
)) {
|
2009-12-11 22:45:59 +00:00
|
|
|
return false;
|
|
|
|
}
|
2010-05-13 03:07:33 +00:00
|
|
|
}
|
|
|
|
|
2010-06-16 22:07:19 +00:00
|
|
|
if (config.debug_version_select) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[version] %s ATI app projected %.2fG peak %.2fG %.3f CPUs\n",
|
|
|
|
plan_class,
|
|
|
|
hu.projected_flops/1e9,
|
|
|
|
hu.peak_flops/1e9,
|
|
|
|
hu.avg_ncpus
|
2010-05-13 03:07:33 +00:00
|
|
|
);
|
|
|
|
}
|
2010-06-16 22:07:19 +00:00
|
|
|
return true;
|
|
|
|
}
|
2009-12-11 22:45:59 +00:00
|
|
|
|
2010-06-16 22:07:19 +00:00
|
|
|
GPU_REQUIREMENTS cuda_requirements;
|
|
|
|
|
|
|
|
#define CUDA_MIN_DRIVER_VERSION 17700
|
|
|
|
#define CUDA23_MIN_CUDA_VERSION 2030
|
|
|
|
#define CUDA23_MIN_DRIVER_VERSION 19038
|
|
|
|
#define CUDA3_MIN_CUDA_VERSION 3000
|
|
|
|
#define CUDA3_MIN_DRIVER_VERSION 19500
|
|
|
|
#define CUDA_OPENCL_MIN_DRIVER_VERSION 19713
|
|
|
|
|
2011-03-25 22:47:49 +00:00
|
|
|
static bool cuda_check(COPROC_NVIDIA& c, HOST_USAGE& hu,
|
2010-06-16 22:07:19 +00:00
|
|
|
int min_cc, int max_cc,
|
|
|
|
int min_cuda_version, int min_driver_version,
|
|
|
|
double min_ram,
|
|
|
|
double ndevs, // # of GPUs used; can be fractional
|
|
|
|
double cpu_frac, // fraction of FLOPS performed by CPU
|
|
|
|
double flops_scale
|
|
|
|
) {
|
|
|
|
int cc = c.prop.major*100 + c.prop.minor;
|
|
|
|
if (cc < min_cc) return false;
|
|
|
|
if (max_cc && cc >= max_cc) return false;
|
|
|
|
|
|
|
|
cuda_requirements.update(min_driver_version, min_ram);
|
2010-04-08 18:27:27 +00:00
|
|
|
|
2010-06-16 22:07:19 +00:00
|
|
|
// Old BOINC clients report display driver version;
|
|
|
|
// newer ones report CUDA RT version.
|
|
|
|
// Some Linux doesn't return either.
|
2010-05-13 03:07:33 +00:00
|
|
|
//
|
2010-06-16 22:07:19 +00:00
|
|
|
if (!c.cuda_version && !c.display_driver_version) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (c.cuda_version) {
|
|
|
|
if (min_cuda_version && (c.cuda_version < min_cuda_version)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (c.display_driver_version) {
|
|
|
|
if (min_driver_version && (c.display_driver_version < min_driver_version)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2011-10-06 02:56:30 +00:00
|
|
|
if (c.available_ram < min_ram) {
|
2010-06-16 22:07:19 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
hu.gpu_ram = min_ram;
|
|
|
|
hu.ncudas = ndevs;
|
2010-05-13 03:07:33 +00:00
|
|
|
|
|
|
|
coproc_perf(
|
2010-06-16 22:07:19 +00:00
|
|
|
g_request->host.p_fpops,
|
2011-10-16 06:04:13 +00:00
|
|
|
flops_scale * hu.ncudas*c.peak_flops,
|
2010-05-13 03:07:33 +00:00
|
|
|
cpu_frac,
|
|
|
|
hu.projected_flops,
|
|
|
|
hu.avg_ncpus
|
|
|
|
);
|
2011-01-12 01:17:07 +00:00
|
|
|
hu.peak_flops = hu.ncudas*c.peak_flops + hu.avg_ncpus*g_request->host.p_fpops;
|
2010-05-13 03:07:33 +00:00
|
|
|
hu.max_ncpus = hu.avg_ncpus;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// the following is for an app that uses an NVIDIA GPU
|
|
|
|
//
|
|
|
|
static inline bool app_plan_cuda(
|
|
|
|
SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu
|
|
|
|
) {
|
2011-03-25 22:47:49 +00:00
|
|
|
COPROC_NVIDIA& c = sreq.coprocs.nvidia;
|
2010-06-16 22:07:19 +00:00
|
|
|
if (!c.count) {
|
2010-05-13 03:07:33 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Macs require 6.10.28
|
|
|
|
//
|
2010-06-16 22:07:19 +00:00
|
|
|
if (strstr(sreq.host.os_name, "Darwin") && (sreq.core_client_version < 61028)) {
|
2010-05-13 03:07:33 +00:00
|
|
|
return false;
|
2010-06-16 22:07:19 +00:00
|
|
|
}
|
2010-02-02 17:18:39 +00:00
|
|
|
|
2010-05-13 03:07:33 +00:00
|
|
|
// for CUDA 2.3, we need to check the CUDA RT version.
|
|
|
|
// Old BOINC clients report display driver version;
|
|
|
|
// newer ones report CUDA RT version
|
|
|
|
//
|
|
|
|
if (!strcmp(plan_class, "cuda_fermi")) {
|
2010-06-16 22:07:19 +00:00
|
|
|
if (!cuda_check(c, hu,
|
|
|
|
200, 0,
|
|
|
|
CUDA3_MIN_CUDA_VERSION, CUDA3_MIN_DRIVER_VERSION,
|
|
|
|
384*MEGA,
|
2011-10-16 06:04:13 +00:00
|
|
|
1,
|
|
|
|
.01,
|
|
|
|
.22
|
2010-06-16 22:07:19 +00:00
|
|
|
)) {
|
2010-05-13 03:07:33 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} else if (!strcmp(plan_class, "cuda23")) {
|
2010-06-16 22:07:19 +00:00
|
|
|
if (!cuda_check(c, hu,
|
|
|
|
100,
|
|
|
|
200, // change to zero if app is compiled to byte code
|
|
|
|
CUDA23_MIN_CUDA_VERSION, CUDA23_MIN_DRIVER_VERSION,
|
|
|
|
384*MEGA,
|
2011-10-16 06:04:13 +00:00
|
|
|
1,
|
|
|
|
.01,
|
|
|
|
.21
|
2010-06-16 22:07:19 +00:00
|
|
|
)) {
|
2010-05-13 03:07:33 +00:00
|
|
|
return false;
|
2009-02-17 03:16:25 +00:00
|
|
|
}
|
2010-05-20 22:49:00 +00:00
|
|
|
} else if (!strcmp(plan_class, "cuda")) {
|
2010-06-16 22:07:19 +00:00
|
|
|
if (!cuda_check(c, hu,
|
|
|
|
100,
|
|
|
|
200, // change to zero if app is compiled to byte code
|
|
|
|
0, CUDA_MIN_DRIVER_VERSION,
|
|
|
|
254*MEGA,
|
2011-10-16 06:04:13 +00:00
|
|
|
1,
|
|
|
|
.01,
|
|
|
|
.20
|
2010-06-16 22:07:19 +00:00
|
|
|
)) {
|
2010-05-20 22:49:00 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"UNKNOWN PLAN CLASS %s\n", plan_class
|
|
|
|
);
|
|
|
|
return false;
|
2010-05-13 03:07:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (config.debug_version_select) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[version] %s app projected %.2fG peak %.2fG %.3f CPUs\n",
|
|
|
|
plan_class,
|
|
|
|
hu.projected_flops/1e9,
|
|
|
|
hu.peak_flops/1e9,
|
2010-04-10 05:49:51 +00:00
|
|
|
hu.avg_ncpus
|
|
|
|
);
|
2010-05-13 03:07:33 +00:00
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
2009-12-18 23:28:10 +00:00
|
|
|
|
2010-05-13 03:07:33 +00:00
|
|
|
// The following is for a non-CPU-intensive application.
|
|
|
|
// Say that we'll use 1% of a CPU.
|
|
|
|
// This will cause the client (6.7+) to run it at non-idle priority
|
|
|
|
//
|
|
|
|
static inline bool app_plan_nci(
|
2010-11-30 19:36:07 +00:00
|
|
|
SCHEDULER_REQUEST& sreq, HOST_USAGE& hu
|
2010-05-13 03:07:33 +00:00
|
|
|
) {
|
|
|
|
hu.avg_ncpus = .01;
|
|
|
|
hu.max_ncpus = .01;
|
|
|
|
hu.projected_flops = sreq.host.p_fpops*1.01;
|
|
|
|
// The *1.01 is needed to ensure that we'll send this app
|
|
|
|
// version rather than a non-plan-class one
|
|
|
|
hu.peak_flops = sreq.host.p_fpops*.01;
|
|
|
|
return true;
|
|
|
|
}
|
2009-03-05 00:10:16 +00:00
|
|
|
|
2010-12-09 23:27:11 +00:00
|
|
|
// the following is for an app version that requires a processor with SSE3,
|
|
|
|
// and will run 10% faster than the non-SSE3 version
|
2010-05-13 03:07:33 +00:00
|
|
|
//
|
|
|
|
static inline bool app_plan_sse3(
|
2010-11-30 19:36:07 +00:00
|
|
|
SCHEDULER_REQUEST& sreq, HOST_USAGE& hu
|
2010-05-13 03:07:33 +00:00
|
|
|
) {
|
|
|
|
downcase_string(sreq.host.p_features);
|
|
|
|
if (!strstr(sreq.host.p_features, "sse3")) {
|
2010-12-09 23:27:11 +00:00
|
|
|
// Pre-6.x clients report CPU features in p_model
|
|
|
|
//
|
|
|
|
if (!strstr(sreq.host.p_model, "sse3")) {
|
|
|
|
//add_no_work_message("Your CPU lacks SSE3");
|
|
|
|
return false;
|
|
|
|
}
|
2010-05-13 03:07:33 +00:00
|
|
|
}
|
|
|
|
hu.avg_ncpus = 1;
|
|
|
|
hu.max_ncpus = 1;
|
|
|
|
hu.projected_flops = 1.1*sreq.host.p_fpops;
|
|
|
|
hu.peak_flops = sreq.host.p_fpops;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2011-10-07 19:23:37 +00:00
|
|
|
static inline bool opencl_check(
|
|
|
|
COPROC& cp, HOST_USAGE& hu,
|
|
|
|
int min_opencl_device_version,
|
|
|
|
double min_global_mem_size,
|
|
|
|
double ndevs,
|
|
|
|
double cpu_frac,
|
|
|
|
double flops_scale
|
2010-05-13 03:07:33 +00:00
|
|
|
) {
|
2011-10-08 06:33:39 +00:00
|
|
|
if (cp.opencl_prop.opencl_device_version_int < min_opencl_device_version) {
|
2011-10-07 19:23:37 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (cp.opencl_prop.global_mem_size < min_global_mem_size) {
|
|
|
|
return false;
|
|
|
|
}
|
2010-05-13 03:07:33 +00:00
|
|
|
|
2011-10-07 19:23:37 +00:00
|
|
|
hu.gpu_ram = min_global_mem_size;
|
|
|
|
if (!strcmp(cp.type, "NVIDIA")) {
|
|
|
|
hu.ncudas = ndevs;
|
|
|
|
} else if (!strcmp(cp.type, "ATI")) {
|
|
|
|
hu.natis = ndevs;
|
|
|
|
}
|
|
|
|
|
|
|
|
coproc_perf(
|
|
|
|
g_request->host.p_fpops,
|
2011-10-12 23:59:38 +00:00
|
|
|
flops_scale * ndevs * cp.peak_flops,
|
2011-10-07 19:23:37 +00:00
|
|
|
cpu_frac,
|
|
|
|
hu.projected_flops,
|
|
|
|
hu.avg_ncpus
|
|
|
|
);
|
|
|
|
hu.peak_flops = ndevs*cp.peak_flops + hu.avg_ncpus*g_request->host.p_fpops;
|
|
|
|
hu.max_ncpus = hu.avg_ncpus;
|
|
|
|
return true;
|
2010-05-13 03:07:33 +00:00
|
|
|
}
|
|
|
|
|
2011-10-07 18:11:52 +00:00
|
|
|
static inline bool app_plan_opencl(
|
|
|
|
SCHEDULER_REQUEST& sreq, const char* plan_class, HOST_USAGE& hu
|
|
|
|
) {
|
2011-10-07 19:23:37 +00:00
|
|
|
if (strstr(plan_class, "nvidia")) {
|
|
|
|
COPROC_NVIDIA& c = sreq.coprocs.nvidia;
|
|
|
|
if (!c.count) return false;
|
|
|
|
if (!c.have_opencl) return false;
|
|
|
|
if (!strcmp(plan_class, "opencl_nvidia_101")) {
|
|
|
|
return opencl_check(
|
|
|
|
c, hu,
|
|
|
|
101,
|
|
|
|
256*MEGA,
|
|
|
|
1,
|
|
|
|
.1,
|
|
|
|
.2
|
|
|
|
);
|
|
|
|
} else {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"Unknown plan class: %s\n", plan_class
|
|
|
|
);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} else if (strstr(plan_class, "ati")) {
|
|
|
|
COPROC_ATI& c = sreq.coprocs.ati;
|
|
|
|
if (!c.count) return false;
|
|
|
|
if (!c.have_opencl) return false;
|
|
|
|
if (!strcmp(plan_class, "opencl_ati_101")) {
|
|
|
|
return opencl_check(
|
|
|
|
c, hu,
|
|
|
|
101,
|
|
|
|
256*MEGA,
|
|
|
|
1,
|
|
|
|
.1,
|
|
|
|
.2
|
|
|
|
);
|
|
|
|
} else {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"Unknown plan class: %s\n", plan_class
|
|
|
|
);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"Unknown plan class: %s\n", plan_class
|
|
|
|
);
|
|
|
|
return false;
|
|
|
|
}
|
2011-10-07 18:11:52 +00:00
|
|
|
}
|
|
|
|
|
2011-08-04 18:06:14 +00:00
|
|
|
static inline bool app_plan_vbox(
|
|
|
|
SCHEDULER_REQUEST& sreq, HOST_USAGE& hu, bool is_64bit
|
|
|
|
) {
|
|
|
|
// make sure they have VirtualBox
|
|
|
|
//
|
2010-11-30 19:36:07 +00:00
|
|
|
if (strlen(sreq.host.virtualbox_version) == 0) return false;
|
2011-02-22 23:11:34 +00:00
|
|
|
int n, maj, min, rel;
|
|
|
|
n = sscanf(sreq.host.virtualbox_version, "%d.%d.%d", &maj, &min, &rel);
|
|
|
|
if (n != 3) return false;
|
|
|
|
if (maj < 3) return false;
|
|
|
|
if (maj == 3 and min < 2) return false;
|
2011-08-04 18:06:14 +00:00
|
|
|
|
|
|
|
// only send the version for host's primary platform.
|
|
|
|
// A Win64 host can't run a 32-bit VM app:
|
|
|
|
// it will look in the 32-bit half of the registry and fail
|
|
|
|
//
|
|
|
|
PLATFORM* p = g_request->platforms.list[0];
|
|
|
|
if (is_64bit != is_64b_platform(p->name)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2011-08-03 16:45:00 +00:00
|
|
|
hu.avg_ncpus = 1;
|
|
|
|
hu.max_ncpus = 1;
|
|
|
|
hu.projected_flops = 1.1*sreq.host.p_fpops;
|
|
|
|
hu.peak_flops = sreq.host.p_fpops;
|
2010-11-30 19:36:07 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2010-05-13 03:07:33 +00:00
|
|
|
// app planning function.
|
|
|
|
// See http://boinc.berkeley.edu/trac/wiki/AppPlan
|
|
|
|
//
|
|
|
|
bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) {
|
|
|
|
if (!strcmp(plan_class, "mt")) {
|
2010-11-30 19:36:07 +00:00
|
|
|
return app_plan_mt(sreq, hu);
|
2010-05-13 03:07:33 +00:00
|
|
|
} else if (strstr(plan_class, "ati")) {
|
|
|
|
return app_plan_ati(sreq, plan_class, hu);
|
|
|
|
} else if (strstr(plan_class, "cuda")) {
|
|
|
|
return app_plan_cuda(sreq, plan_class, hu);
|
2009-01-22 19:51:04 +00:00
|
|
|
} else if (!strcmp(plan_class, "nci")) {
|
2010-11-30 19:36:07 +00:00
|
|
|
return app_plan_nci(sreq, hu);
|
2009-07-22 18:41:02 +00:00
|
|
|
} else if (!strcmp(plan_class, "sse3")) {
|
2010-11-30 19:36:07 +00:00
|
|
|
return app_plan_sse3(sreq, hu);
|
2011-08-04 18:06:14 +00:00
|
|
|
} else if (!strcmp(plan_class, "vbox32")) {
|
|
|
|
return app_plan_vbox(sreq, hu, false);
|
|
|
|
} else if (!strcmp(plan_class, "vbox64")) {
|
|
|
|
return app_plan_vbox(sreq, hu, true);
|
2011-10-07 19:23:37 +00:00
|
|
|
} else if (strstr(plan_class, "opencl")) {
|
2011-10-07 18:11:52 +00:00
|
|
|
return app_plan_opencl(sreq, plan_class, hu);
|
2008-03-27 18:25:29 +00:00
|
|
|
}
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"Unknown plan class: %s\n", plan_class
|
|
|
|
);
|
2009-08-21 20:38:39 +00:00
|
|
|
return false;
|
2008-03-20 03:13:30 +00:00
|
|
|
}
|
2009-06-01 22:15:14 +00:00
|
|
|
|
2009-06-05 21:11:50 +00:00
|
|
|
// the following is used to enforce limits on in-progress jobs
|
|
|
|
// for GPUs and CPUs (see handle_request.cpp)
|
|
|
|
//
|
2009-06-01 22:15:14 +00:00
|
|
|
bool app_plan_uses_gpu(const char* plan_class) {
|
2009-09-25 21:06:34 +00:00
|
|
|
if (strstr(plan_class, "cuda")) {
|
|
|
|
return true;
|
|
|
|
}
|
2011-11-03 05:26:19 +00:00
|
|
|
if (strstr(plan_class, "nvidia")) {
|
|
|
|
return true;
|
|
|
|
}
|
2009-09-25 21:06:34 +00:00
|
|
|
if (strstr(plan_class, "ati")) {
|
2009-06-01 22:15:14 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2009-07-29 18:34:27 +00:00
|
|
|
|
|
|
|
// compute a "score" for sending this job to this host.
|
|
|
|
// Return false if the WU is infeasible.
|
|
|
|
// Otherwise set est_time and disk_usage.
|
|
|
|
//
|
|
|
|
bool JOB::get_score() {
|
|
|
|
WORKUNIT wu;
|
|
|
|
int retval;
|
|
|
|
|
|
|
|
WU_RESULT& wu_result = ssp->wu_results[index];
|
|
|
|
wu = wu_result.workunit;
|
|
|
|
app = ssp->lookup_app(wu.appid);
|
|
|
|
|
|
|
|
score = 0;
|
|
|
|
|
|
|
|
// Find the best app version to use.
|
|
|
|
//
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
bavp = get_app_version(wu, true, false);
|
2009-07-29 18:34:27 +00:00
|
|
|
if (!bavp) return false;
|
|
|
|
|
2009-08-31 19:35:46 +00:00
|
|
|
retval = wu_is_infeasible_fast(
|
|
|
|
wu, wu_result.res_server_state, wu_result.res_priority,
|
|
|
|
wu_result.res_report_deadline,
|
|
|
|
*app, *bavp
|
|
|
|
);
|
2009-07-29 18:34:27 +00:00
|
|
|
if (retval) {
|
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] [HOST#%d] [WU#%d %s] WU is infeasible: %s\n",
|
|
|
|
g_reply->host.id, wu.id, wu.name, infeasible_string(retval)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
score = 1;
|
|
|
|
|
2009-07-30 17:00:43 +00:00
|
|
|
#if 0
|
2009-07-29 18:34:27 +00:00
|
|
|
// example: for CUDA app, wu.batch is the minimum number of processors.
|
|
|
|
// add min/actual to score
|
|
|
|
// (this favors sending jobs that need lots of procs to GPUs that have them)
|
|
|
|
// IF YOU USE THIS, USE THE PART IN wu_is_infeasible_custom() ALSO
|
|
|
|
//
|
|
|
|
if (!strcmp(app->name, "foobar") && bavp->host_usage.ncudas) {
|
|
|
|
int n = g_request->coproc_cuda->prop.multiProcessorCount;
|
|
|
|
score += ((double)wu.batch)/n;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
// check if user has selected apps,
|
|
|
|
// and send beta work to beta users
|
|
|
|
//
|
|
|
|
if (app->beta && !config.distinct_beta_apps) {
|
|
|
|
if (g_wreq->allow_beta_work) {
|
|
|
|
score += 1;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (app_not_selected(wu)) {
|
|
|
|
if (!g_wreq->allow_non_preferred_apps) {
|
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
// Allow work to be sent, but it will not get a bump in its score
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
score += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// if job needs to get done fast, send to fast/reliable host
|
|
|
|
//
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
if (bavp->reliable && (wu_result.need_reliable)) {
|
2009-07-29 18:34:27 +00:00
|
|
|
score += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// if job already committed to an HR class,
|
|
|
|
// try to send to host in that class
|
|
|
|
//
|
|
|
|
if (wu_result.infeasible_count) {
|
|
|
|
score += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Favor jobs that will run fast
|
|
|
|
//
|
2010-04-10 05:49:51 +00:00
|
|
|
score += bavp->host_usage.projected_flops/1e9;
|
2009-07-29 18:34:27 +00:00
|
|
|
|
|
|
|
// match large jobs to fast hosts
|
|
|
|
//
|
|
|
|
if (config.job_size_matching) {
|
|
|
|
double host_stdev = (g_reply->host.p_fpops - ssp->perf_info.host_fpops_mean)/ ssp->perf_info.host_fpops_stdev;
|
|
|
|
double diff = host_stdev - wu_result.fpops_size;
|
|
|
|
score -= diff*diff;
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: If user has selected some apps but will accept jobs from others,
|
|
|
|
// try to send them jobs from the selected apps
|
|
|
|
//
|
|
|
|
|
|
|
|
est_time = estimate_duration(wu, *bavp);
|
|
|
|
disk_usage = wu.rsc_disk_bound;
|
|
|
|
return true;
|
|
|
|
}
|
2011-07-19 20:52:41 +00:00
|
|
|
|
|
|
|
void handle_file_xfer_results() {
|
|
|
|
for (unsigned int i=0; i<g_request->file_xfer_results.size(); i++) {
|
|
|
|
RESULT& r = g_request->file_xfer_results[i];
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"completed file xfer %s\n", r.name
|
|
|
|
);
|
2011-07-20 22:27:01 +00:00
|
|
|
g_reply->result_acks.push_back(string(r.name));
|
2011-07-19 20:52:41 +00:00
|
|
|
}
|
|
|
|
}
|