mirror of https://github.com/BOINC/boinc.git
- client/scheduler: in COPROCS, instead of having a vector of
pointers to dynamically allocated COPROC-derived objects, just have the objects themselves. Dynamic allocation should be avoided at all costs. svn path=/trunk/boinc/; revision=21564
This commit is contained in:
parent
5a8142a23c
commit
40eebe00af
|
@ -3701,3 +3701,30 @@ David 18 May 2010
|
|||
|
||||
client/
|
||||
cpu_sched.cpp
|
||||
|
||||
David 18 May 2010
|
||||
- client/scheduler: in COPROCS, instead of having a vector of
|
||||
pointers to dynamically allocated COPROC-derived objects,
|
||||
just have the objects themselves.
|
||||
Dynamic allocation should be avoided at all costs.
|
||||
|
||||
client/
|
||||
app_start.cpp
|
||||
client_state.cpp,h
|
||||
client_types.cpp
|
||||
coproc_detect.cpp
|
||||
cpu_sched.cpp
|
||||
cs_scheduler.cpp
|
||||
cs_statefile.cpp
|
||||
rr_sim.cpp
|
||||
scheduler_op.cpp
|
||||
wim.h
|
||||
work_fetch.cpp
|
||||
lib/
|
||||
coproc.cpp,h
|
||||
hostinfo.cpp,h
|
||||
sched/
|
||||
handle_request.cpp
|
||||
sched_customize.cpp
|
||||
sched_send.cpp
|
||||
sched_types.cpp,h
|
||||
|
|
|
@ -119,7 +119,10 @@ static void debug_print_argv(char** argv) {
|
|||
static void coproc_cmdline(
|
||||
int rsc_type, RESULT* rp, double ninstances, char* cmdline
|
||||
) {
|
||||
COPROC* coproc = (rsc_type==RSC_TYPE_CUDA)?(COPROC*)coproc_cuda:(COPROC*)coproc_ati;
|
||||
COPROC* coproc = (rsc_type==RSC_TYPE_CUDA)
|
||||
?(COPROC*)&gstate.host_info.coprocs.cuda
|
||||
:(COPROC*)&gstate.host_info.coprocs.ati
|
||||
;
|
||||
for (int j=0; j<ninstances; j++) {
|
||||
int k = rp->coproc_indices[j];
|
||||
// sanity check
|
||||
|
|
|
@ -62,8 +62,6 @@
|
|||
using std::max;
|
||||
|
||||
CLIENT_STATE gstate;
|
||||
COPROC_CUDA* coproc_cuda;
|
||||
COPROC_ATI* coproc_ati;
|
||||
|
||||
CLIENT_STATE::CLIENT_STATE():
|
||||
lookup_website_op(&gui_http),
|
||||
|
@ -266,23 +264,21 @@ int CLIENT_STATE::init() {
|
|||
msg_printf(NULL, MSG_INFO, warnings[i].c_str());
|
||||
}
|
||||
}
|
||||
if (host_info.coprocs.coprocs.size() == 0) {
|
||||
if (host_info.coprocs.none() ) {
|
||||
msg_printf(NULL, MSG_INFO, "No usable GPUs found");
|
||||
}
|
||||
#if 0
|
||||
msg_printf(NULL, MSG_INFO, "Faking an NVIDIA GPU");
|
||||
coproc_cuda = fake_cuda(host_info.coprocs, 256*MEGA, 2);
|
||||
coproc_cuda->available_ram_fake[0] = 256*MEGA;
|
||||
coproc_cuda->available_ram_fake[1] = 192*MEGA;
|
||||
host_info.coprocs.cuda.fake(256*MEGA, 2);
|
||||
host_info.coprocs.cuda.available_ram_fake[0] = 256*MEGA;
|
||||
host_info.coprocs.cuda.available_ram_fake[1] = 192*MEGA;
|
||||
#endif
|
||||
#if 0
|
||||
msg_printf(NULL, MSG_INFO, "Faking an ATI GPU");
|
||||
coproc_ati = fake_ati(host_info.coprocs, 512*MEGA, 2);
|
||||
coproc_ati->available_ram_fake[0] = 256*MEGA;
|
||||
coproc_ati->available_ram_fake[1] = 192*MEGA;
|
||||
host_info.coprocs.ati.fake(512*MEGA, 2);
|
||||
host_info.coprocs.ati.available_ram_fake[0] = 256*MEGA;
|
||||
host_info.coprocs.ati.available_ram_fake[1] = 192*MEGA;
|
||||
#endif
|
||||
coproc_cuda = (COPROC_CUDA*)host_info.coprocs.lookup("CUDA");
|
||||
coproc_ati = (COPROC_ATI*)host_info.coprocs.lookup("ATI");
|
||||
}
|
||||
|
||||
// check for app_info.xml file in project dirs.
|
||||
|
@ -597,8 +593,8 @@ bool CLIENT_STATE::poll_slow_events() {
|
|||
// NVIDIA provides an interface for finding if a GPU is
|
||||
// running a graphics app. ATI doesn't as far as I know
|
||||
//
|
||||
if (coproc_cuda && user_active && !global_prefs.run_gpu_if_user_active) {
|
||||
if (coproc_cuda->check_running_graphics_app()) {
|
||||
if (host_info.have_cuda() && user_active && !global_prefs.run_gpu_if_user_active) {
|
||||
if (host_info.coprocs.cuda.check_running_graphics_app()) {
|
||||
request_schedule_cpus("GPU state change");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -488,8 +488,6 @@ public:
|
|||
|
||||
extern CLIENT_STATE gstate;
|
||||
|
||||
extern COPROC_CUDA* coproc_cuda;
|
||||
extern COPROC_ATI* coproc_ati;
|
||||
extern bool gpus_usable;
|
||||
|
||||
// return a random double in the range [MIN,min(e^n,MAX))
|
||||
|
|
|
@ -1264,10 +1264,10 @@ void APP_VERSION::get_file_errors(string& str) {
|
|||
}
|
||||
|
||||
bool APP_VERSION::missing_coproc() {
|
||||
if (ncudas && !coproc_cuda) {
|
||||
if (ncudas && gstate.host_info.coprocs.cuda.count==0) {
|
||||
return true;
|
||||
}
|
||||
if (natis && !coproc_ati) {
|
||||
if (natis && gstate.host_info.coprocs.ati.count==0) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -1790,9 +1790,9 @@ int RESULT::write_gui(MIOFILE& out) {
|
|||
char buf[256];
|
||||
strcpy(buf, "");
|
||||
if (atp && atp->task_state() == PROCESS_EXECUTING) {
|
||||
if (avp->ncudas && coproc_cuda->count>1) {
|
||||
if (avp->ncudas && gstate.host_info.coprocs.cuda.count>1) {
|
||||
sprintf(buf, " (device %d)", coproc_indices[0]);
|
||||
} else if (avp->natis && coproc_ati->count>1) {
|
||||
} else if (avp->natis && gstate.host_info.coprocs.ati.count>1) {
|
||||
sprintf(buf, " (device %d)", coproc_indices[0]);
|
||||
}
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -78,10 +78,6 @@ struct PROC_RESOURCES {
|
|||
double ram_left;
|
||||
COPROCS coprocs;
|
||||
|
||||
~PROC_RESOURCES() {
|
||||
coprocs.delete_coprocs();
|
||||
}
|
||||
|
||||
// should we stop scanning jobs?
|
||||
//
|
||||
inline bool stop_scan_cpu() {
|
||||
|
@ -130,14 +126,14 @@ struct PROC_RESOURCES {
|
|||
COPROC* cp2;
|
||||
if (av.ncudas) {
|
||||
x = av.ncudas;
|
||||
cp2 = coprocs.lookup("CUDA");
|
||||
cp2 = &gstate.host_info.coprocs.cuda;
|
||||
} else if (av.natis) {
|
||||
x = av.natis;
|
||||
cp2 = coprocs.lookup("ATI");
|
||||
cp2 = &gstate.host_info.coprocs.ati;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
if (!cp2) {
|
||||
if (!cp2->count) {
|
||||
msg_printf(NULL, MSG_INTERNAL_ERROR,
|
||||
"Missing a %s coprocessor", cp2->type
|
||||
);
|
||||
|
@ -162,10 +158,10 @@ struct PROC_RESOURCES {
|
|||
COPROC* cp2;
|
||||
if (av.ncudas) {
|
||||
x = av.ncudas;
|
||||
cp2 = coprocs.lookup("CUDA");
|
||||
cp2 = &gstate.host_info.coprocs.cuda;
|
||||
} else if (av.natis) {
|
||||
x = av.natis;
|
||||
cp2 = coprocs.lookup("ATI");
|
||||
cp2 = &gstate.host_info.coprocs.ati;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
@ -492,18 +488,18 @@ void CLIENT_STATE::reset_debt_accounting() {
|
|||
for (i=0; i<projects.size(); i++) {
|
||||
PROJECT* p = projects[i];
|
||||
p->cpu_pwf.reset_debt_accounting();
|
||||
if (coproc_cuda) {
|
||||
if (host_info.have_cuda()) {
|
||||
p->cuda_pwf.reset_debt_accounting();
|
||||
}
|
||||
if (coproc_ati) {
|
||||
if (host_info.have_ati()) {
|
||||
p->ati_pwf.reset_debt_accounting();
|
||||
}
|
||||
}
|
||||
cpu_work_fetch.reset_debt_accounting();
|
||||
if (coproc_cuda) {
|
||||
if (host_info.have_cuda()) {
|
||||
cuda_work_fetch.reset_debt_accounting();
|
||||
}
|
||||
if (coproc_ati) {
|
||||
if (host_info.have_ati()) {
|
||||
ati_work_fetch.reset_debt_accounting();
|
||||
}
|
||||
debt_interval_start = now;
|
||||
|
@ -548,11 +544,11 @@ void CLIENT_STATE::adjust_debts() {
|
|||
|
||||
cpu_work_fetch.update_long_term_debts();
|
||||
cpu_work_fetch.update_short_term_debts();
|
||||
if (coproc_cuda) {
|
||||
if (host_info.have_cuda()) {
|
||||
cuda_work_fetch.update_long_term_debts();
|
||||
cuda_work_fetch.update_short_term_debts();
|
||||
}
|
||||
if (coproc_ati) {
|
||||
if (host_info.have_ati()) {
|
||||
ati_work_fetch.update_long_term_debts();
|
||||
ati_work_fetch.update_short_term_debts();
|
||||
}
|
||||
|
@ -1194,16 +1190,16 @@ static inline void assign_coprocs(vector<RESULT*>& jobs) {
|
|||
|
||||
gstate.host_info.coprocs.clear_usage();
|
||||
#ifndef SIM
|
||||
if (coproc_cuda) {
|
||||
coproc_cuda->get_available_ram();
|
||||
if (gstate.host_info.have_cuda()) {
|
||||
gstate.host_info.coprocs.cuda.get_available_ram();
|
||||
if (log_flags.coproc_debug) {
|
||||
coproc_cuda->print_available_ram();
|
||||
gstate.host_info.coprocs.cuda.print_available_ram();
|
||||
}
|
||||
}
|
||||
if (coproc_ati) {
|
||||
coproc_ati->get_available_ram();
|
||||
if (gstate.host_info.have_ati()) {
|
||||
gstate.host_info.coprocs.ati.get_available_ram();
|
||||
if (log_flags.coproc_debug) {
|
||||
coproc_ati->print_available_ram();
|
||||
gstate.host_info.coprocs.ati.print_available_ram();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -1215,10 +1211,10 @@ static inline void assign_coprocs(vector<RESULT*>& jobs) {
|
|||
APP_VERSION* avp = rp->avp;
|
||||
if (avp->ncudas) {
|
||||
usage = avp->ncudas;
|
||||
cp = coproc_cuda;
|
||||
cp = &gstate.host_info.coprocs.cuda;
|
||||
} else if (avp->natis) {
|
||||
usage = avp->natis;
|
||||
cp = coproc_ati;
|
||||
cp = &gstate.host_info.coprocs.ati;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
@ -1235,10 +1231,10 @@ static inline void assign_coprocs(vector<RESULT*>& jobs) {
|
|||
APP_VERSION* avp = rp->avp;
|
||||
if (avp->ncudas) {
|
||||
usage = avp->ncudas;
|
||||
cp = coproc_cuda;
|
||||
cp = &gstate.host_info.coprocs.cuda;
|
||||
} else if (avp->natis) {
|
||||
usage = avp->natis;
|
||||
cp = coproc_ati;
|
||||
cp = &gstate.host_info.coprocs.ati;
|
||||
} else {
|
||||
job_iter++;
|
||||
continue;
|
||||
|
|
|
@ -220,18 +220,18 @@ int CLIENT_STATE::make_scheduler_request(PROJECT* p) {
|
|||
|
||||
// copy request values from RSC_WORK_FETCH to COPROC
|
||||
//
|
||||
if (coproc_cuda) {
|
||||
coproc_cuda->req_secs = cuda_work_fetch.req_secs;
|
||||
coproc_cuda->req_instances = cuda_work_fetch.req_instances;
|
||||
coproc_cuda->estimated_delay = cuda_work_fetch.req_secs?cuda_work_fetch.busy_time_estimator.get_busy_time():0;
|
||||
if (host_info.have_cuda()) {
|
||||
host_info.coprocs.cuda.req_secs = cuda_work_fetch.req_secs;
|
||||
host_info.coprocs.cuda.req_instances = cuda_work_fetch.req_instances;
|
||||
host_info.coprocs.cuda.estimated_delay = cuda_work_fetch.req_secs?cuda_work_fetch.busy_time_estimator.get_busy_time():0;
|
||||
}
|
||||
if (coproc_ati) {
|
||||
coproc_ati->req_secs = ati_work_fetch.req_secs;
|
||||
coproc_ati->req_instances = ati_work_fetch.req_instances;
|
||||
coproc_ati->estimated_delay = ati_work_fetch.req_secs?ati_work_fetch.busy_time_estimator.get_busy_time():0;
|
||||
if (host_info.have_ati()) {
|
||||
host_info.coprocs.ati.req_secs = ati_work_fetch.req_secs;
|
||||
host_info.coprocs.ati.req_instances = ati_work_fetch.req_instances;
|
||||
host_info.coprocs.ati.estimated_delay = ati_work_fetch.req_secs?ati_work_fetch.busy_time_estimator.get_busy_time():0;
|
||||
}
|
||||
|
||||
if (host_info.coprocs.coprocs.size()) {
|
||||
if (!host_info.coprocs.none()) {
|
||||
host_info.coprocs.write_xml(mf);
|
||||
}
|
||||
|
||||
|
@ -823,8 +823,8 @@ int CLIENT_STATE::handle_scheduler_reply(PROJECT* project, char* scheduler_url)
|
|||
);
|
||||
if (!rp->avp) {
|
||||
msg_printf(project, MSG_INTERNAL_ERROR,
|
||||
"No application found for task: %s %d %s; discarding",
|
||||
rp->platform, rp->version_num, rp->plan_class
|
||||
"No app version found for app %s platform %s ver %d class%s; discarding %s",
|
||||
rp->wup->app->name, rp->platform, rp->version_num, rp->plan_class, rp->name
|
||||
);
|
||||
delete rp;
|
||||
continue;
|
||||
|
@ -861,13 +861,13 @@ int CLIENT_STATE::handle_scheduler_reply(PROJECT* project, char* scheduler_url)
|
|||
"[sched_op] estimated total CPU task duration: %.0f seconds",
|
||||
est_cpu_duration
|
||||
);
|
||||
if (coproc_cuda) {
|
||||
if (host_info.have_cuda()) {
|
||||
msg_printf(project, MSG_INFO,
|
||||
"[sched_op] estimated total NVIDIA GPU task duration: %.0f seconds",
|
||||
est_cuda_duration
|
||||
);
|
||||
}
|
||||
if (coproc_ati) {
|
||||
if (host_info.have_ati()) {
|
||||
msg_printf(project, MSG_INFO,
|
||||
"[sched_op] estimated total ATI GPU task duration: %.0f seconds",
|
||||
est_ati_duration
|
||||
|
|
|
@ -913,8 +913,8 @@ int CLIENT_STATE::write_state_gui(MIOFILE& f) {
|
|||
core_client_version.minor,
|
||||
core_client_version.release,
|
||||
executing_as_daemon?1:0,
|
||||
coproc_cuda?1:0,
|
||||
coproc_ati?1:0
|
||||
host_info.have_cuda()?1:0,
|
||||
host_info.have_ati()?1:0
|
||||
);
|
||||
for (i=0; i<platforms.size(); i++) {
|
||||
f.printf(
|
||||
|
|
|
@ -1,490 +1,490 @@
|
|||
// This file is part of BOINC.
|
||||
// http://boinc.berkeley.edu
|
||||
// Copyright (C) 2008 University of California
|
||||
//
|
||||
// BOINC is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU Lesser General Public License
|
||||
// as published by the Free Software Foundation,
|
||||
// either version 3 of the License, or (at your option) any later version.
|
||||
//
|
||||
// BOINC is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
// See the GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
// Simulate the processing of the current workload
|
||||
// (include jobs that are downloading)
|
||||
// with weighted round-robin (WRR) scheduling.
|
||||
//
|
||||
// For efficiency, we simulate an approximation of WRR.
|
||||
// We don't model time-slicing.
|
||||
// Instead we use a continuous model where, at a given point,
|
||||
// each project has a set of running jobs that uses at most all CPUs.
|
||||
// These jobs are assumed to run at a rate proportionate to their avg_ncpus,
|
||||
// and each project gets total CPU proportionate to its RRS.
|
||||
//
|
||||
// For coprocessors, we saturate the resource;
|
||||
// i.e. with 2 GPUs, we'd let a 1-GPU app and a 2-GPU app run together.
|
||||
// Otherwise, there'd be the possibility of computing
|
||||
// a nonzero shortfall inappropriately.
|
||||
//
|
||||
// Outputs are changes to global state:
|
||||
// - deadline misses (per-project count, per-result flag)
|
||||
// Deadline misses are not counted for tasks
|
||||
// that are too large to run in RAM right now.
|
||||
// - resource shortfalls (per-project and total)
|
||||
// - counts of resources idle now
|
||||
//
|
||||
|
||||
#include "cpp.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "boinc_win.h"
|
||||
#else
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "client_state.h"
|
||||
#include "coproc.h"
|
||||
#include "client_msgs.h"
|
||||
|
||||
inline void rsc_string(RESULT* rp, char* buf) {
|
||||
APP_VERSION* avp = rp->avp;
|
||||
if (avp->ncudas) {
|
||||
sprintf(buf, "%.2f CPU + %.2f NV", avp->avg_ncpus, avp->ncudas);
|
||||
} else if (avp->natis) {
|
||||
sprintf(buf, "%.2f CPU + %.2f ATI", avp->avg_ncpus, avp->natis);
|
||||
} else {
|
||||
sprintf(buf, "%.2f CPU", avp->avg_ncpus);
|
||||
}
|
||||
}
|
||||
|
||||
// this is here (rather than rr_sim.h) because its inline functions
|
||||
// refer to RESULT
|
||||
//
|
||||
struct RR_SIM_STATUS {
|
||||
std::vector<RESULT*> active;
|
||||
double active_ncpus;
|
||||
double active_cudas;
|
||||
double active_atis;
|
||||
|
||||
inline void activate(RESULT* rp, double when) {
|
||||
PROJECT* p = rp->project;
|
||||
if (log_flags.rr_simulation) {
|
||||
char buf[256];
|
||||
rsc_string(rp, buf);
|
||||
msg_printf(p, MSG_INFO,
|
||||
"[rr_sim] %.2f: starting %s (%s)",
|
||||
when, rp->name, buf
|
||||
);
|
||||
}
|
||||
active.push_back(rp);
|
||||
cpu_work_fetch.sim_nused += rp->avp->avg_ncpus;
|
||||
cuda_work_fetch.sim_nused += rp->avp->ncudas;
|
||||
ati_work_fetch.sim_nused += rp->avp->natis;
|
||||
}
|
||||
// remove *rpbest from active set,
|
||||
// and adjust FLOPS left for other results
|
||||
//
|
||||
inline void remove_active(RESULT* rpbest) {
|
||||
vector<RESULT*>::iterator it = active.begin();
|
||||
while (it != active.end()) {
|
||||
RESULT* rp = *it;
|
||||
if (rp == rpbest) {
|
||||
it = active.erase(it);
|
||||
} else {
|
||||
rp->rrsim_flops_left -= rp->rrsim_flops*rpbest->rrsim_finish_delay;
|
||||
|
||||
// can be slightly less than 0 due to roundoff
|
||||
//
|
||||
if (rp->rrsim_flops_left < -1) {
|
||||
msg_printf(rp->project, MSG_INTERNAL_ERROR,
|
||||
"%s: negative FLOPs left %f", rp->name, rp->rrsim_flops_left
|
||||
);
|
||||
}
|
||||
if (rp->rrsim_flops_left < 0) {
|
||||
rp->rrsim_flops_left = 0;
|
||||
}
|
||||
it++;
|
||||
}
|
||||
}
|
||||
cpu_work_fetch.sim_nused -= rpbest->avp->avg_ncpus;
|
||||
cuda_work_fetch.sim_nused -= rpbest->avp->ncudas;
|
||||
ati_work_fetch.sim_nused -= rpbest->avp->natis;
|
||||
}
|
||||
|
||||
RR_SIM_STATUS() {
|
||||
active_ncpus = 0;
|
||||
active_cudas = 0;
|
||||
active_atis = 0;
|
||||
}
|
||||
~RR_SIM_STATUS() {}
|
||||
};
|
||||
|
||||
void RR_SIM_PROJECT_STATUS::activate(RESULT* rp) {
|
||||
active.push_back(rp);
|
||||
rp->project->cpu_pwf.sim_nused += rp->avp->avg_ncpus;
|
||||
rp->project->cuda_pwf.sim_nused += rp->avp->ncudas;
|
||||
rp->project->ati_pwf.sim_nused += rp->avp->natis;
|
||||
}
|
||||
|
||||
void RR_SIM_PROJECT_STATUS::remove_active(RESULT* rp) {
|
||||
std::vector<RESULT*>::iterator it = active.begin();
|
||||
while (it != active.end()) {
|
||||
if (*it == rp) {
|
||||
it = active.erase(it);
|
||||
} else {
|
||||
it++;
|
||||
}
|
||||
}
|
||||
rp->project->cpu_pwf.sim_nused -= rp->avp->avg_ncpus;
|
||||
rp->project->cuda_pwf.sim_nused -= rp->avp->ncudas;
|
||||
rp->project->ati_pwf.sim_nused -= rp->avp->natis;
|
||||
}
|
||||
|
||||
// estimate the rate (FLOPS) that this job will get long-term
|
||||
// with weighted round-robin scheduling
|
||||
//
|
||||
void set_rrsim_flops(RESULT* rp) {
|
||||
// For coproc jobs, use app version estimate
|
||||
//
|
||||
if (rp->uses_coprocs()) {
|
||||
rp->rrsim_flops = rp->avp->flops * gstate.overall_cpu_frac();
|
||||
return;
|
||||
}
|
||||
PROJECT* p = rp->project;
|
||||
|
||||
// For CPU jobs, estimate how many CPU seconds per second this job would get
|
||||
// running with other jobs of this project, ignoring other factors
|
||||
//
|
||||
double x = 1;
|
||||
if (p->cpu_pwf.sim_nused > gstate.ncpus) {
|
||||
x = gstate.ncpus/p->cpu_pwf.sim_nused;
|
||||
}
|
||||
double r1 = x*rp->avp->avg_ncpus;
|
||||
|
||||
// if the project's total CPU usage is more than its share, scale
|
||||
//
|
||||
double share_cpus = p->cpu_pwf.runnable_share*gstate.ncpus;
|
||||
if (!share_cpus) share_cpus = gstate.ncpus;
|
||||
// deal with projects w/ resource share = 0
|
||||
double r2 = r1;
|
||||
if (p->cpu_pwf.sim_nused > share_cpus) {
|
||||
r2 *= (share_cpus / p->cpu_pwf.sim_nused);
|
||||
}
|
||||
|
||||
// scale by overall CPU availability
|
||||
//
|
||||
double r3 = r2 * gstate.overall_cpu_frac();
|
||||
|
||||
rp->rrsim_flops = r3 * rp->avp->flops;
|
||||
#if 0
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(p, MSG_INFO,
|
||||
"[rr_sim] set_rrsim_flops: %.2fG (r1 %.4f r2 %.4f r3 %.4f)",
|
||||
rp->rrsim_flops/1e9, r1, r2, r3
|
||||
);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void CLIENT_STATE::print_deadline_misses() {
|
||||
unsigned int i;
|
||||
RESULT* rp;
|
||||
PROJECT* p;
|
||||
for (i=0; i<results.size(); i++){
|
||||
rp = results[i];
|
||||
if (rp->rr_sim_misses_deadline) {
|
||||
msg_printf(rp->project, MSG_INFO,
|
||||
"[cpu_sched] Result %s projected to miss deadline.",
|
||||
rp->name
|
||||
);
|
||||
}
|
||||
}
|
||||
for (i=0; i<projects.size(); i++) {
|
||||
p = projects[i];
|
||||
if (p->cpu_pwf.deadlines_missed) {
|
||||
msg_printf(p, MSG_INFO,
|
||||
"[cpu_sched] Project has %d projected CPU deadline misses",
|
||||
p->cpu_pwf.deadlines_missed
|
||||
);
|
||||
}
|
||||
if (p->cuda_pwf.deadlines_missed) {
|
||||
msg_printf(p, MSG_INFO,
|
||||
"[cpu_sched] Project has %d projected NVIDIA GPU deadline misses",
|
||||
p->cuda_pwf.deadlines_missed
|
||||
);
|
||||
}
|
||||
if (p->ati_pwf.deadlines_missed) {
|
||||
msg_printf(p, MSG_INFO,
|
||||
"[cpu_sched] Project has %d projected ATI GPU deadline misses",
|
||||
p->ati_pwf.deadlines_missed
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
// compute a per-app-version "temporary DCF" based on the elapsed time
|
||||
// and fraction done of running jobs
|
||||
//
|
||||
void compute_temp_dcf() {
|
||||
unsigned int i;
|
||||
for (i=0; i<gstate.app_versions.size(); i++) {
|
||||
gstate.app_versions[i]->temp_dcf = 1;
|
||||
}
|
||||
for (i=0; i<gstate.active_tasks.active_tasks.size(); i++) {
|
||||
ACTIVE_TASK* atp = gstate.active_tasks.active_tasks[i];
|
||||
double x = atp->est_dur(false) / atp->result->estimated_duration(false);
|
||||
APP_VERSION* avp = atp->result->avp;
|
||||
if (x < avp->temp_dcf) {
|
||||
avp->temp_dcf = x;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void CLIENT_STATE::rr_simulation() {
|
||||
PROJECT* p, *pbest;
|
||||
RESULT* rp, *rpbest;
|
||||
RR_SIM_STATUS sim_status;
|
||||
unsigned int i;
|
||||
|
||||
double ar = available_ram();
|
||||
|
||||
work_fetch.rr_init();
|
||||
//compute_temp_dcf();
|
||||
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(0, MSG_INFO,
|
||||
"[rr_sim] rr_sim start: work_buf_total %.2f on_frac %.3f active_frac %.3f",
|
||||
work_buf_total(), time_stats.on_frac, time_stats.active_frac
|
||||
);
|
||||
}
|
||||
|
||||
for (i=0; i<projects.size(); i++) {
|
||||
p = projects[i];
|
||||
if (p->non_cpu_intensive) continue;
|
||||
p->rr_sim_status.clear();
|
||||
}
|
||||
|
||||
// Decide what jobs to include in the simulation,
|
||||
// and pick the ones that are initially running.
|
||||
// NOTE: "results" is sorted by increasing arrival time
|
||||
//
|
||||
for (i=0; i<results.size(); i++) {
|
||||
rp = results[i];
|
||||
rp->rr_sim_misses_deadline = false;
|
||||
if (!rp->nearly_runnable()) continue;
|
||||
if (rp->some_download_stalled()) continue;
|
||||
if (rp->project->non_cpu_intensive) continue;
|
||||
rp->rrsim_flops_left = rp->estimated_flops_remaining();
|
||||
|
||||
//if (rp->rrsim_flops_left <= 0) continue;
|
||||
// job may have fraction_done=1 but not be done;
|
||||
// if it's past its deadline, we need to mark it as such
|
||||
|
||||
p = rp->project;
|
||||
p->pwf.has_runnable_jobs = true;
|
||||
p->cpu_pwf.nused_total += rp->avp->avg_ncpus;
|
||||
if (rp->uses_cuda() && coproc_cuda) {
|
||||
p->cuda_pwf.nused_total += rp->avp->ncudas;
|
||||
p->cuda_pwf.has_runnable_jobs = true;
|
||||
if (cuda_work_fetch.sim_nused < coproc_cuda->count) {
|
||||
sim_status.activate(rp, 0);
|
||||
p->rr_sim_status.activate(rp);
|
||||
} else {
|
||||
cuda_work_fetch.pending.push_back(rp);
|
||||
}
|
||||
} else if (rp->uses_ati() && coproc_ati) {
|
||||
p->ati_pwf.nused_total += rp->avp->natis;
|
||||
p->ati_pwf.has_runnable_jobs = true;
|
||||
if (ati_work_fetch.sim_nused < coproc_ati->count) {
|
||||
sim_status.activate(rp, 0);
|
||||
p->rr_sim_status.activate(rp);
|
||||
} else {
|
||||
ati_work_fetch.pending.push_back(rp);
|
||||
}
|
||||
} else {
|
||||
p->cpu_pwf.has_runnable_jobs = true;
|
||||
if (p->cpu_pwf.sim_nused < ncpus) {
|
||||
sim_status.activate(rp, 0);
|
||||
p->rr_sim_status.activate(rp);
|
||||
} else {
|
||||
p->rr_sim_status.add_pending(rp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// note the number of idle instances
|
||||
//
|
||||
cpu_work_fetch.nidle_now = ncpus - cpu_work_fetch.sim_nused;
|
||||
if (cpu_work_fetch.nidle_now < 0) cpu_work_fetch.nidle_now = 0;
|
||||
if (coproc_cuda) {
|
||||
cuda_work_fetch.nidle_now = coproc_cuda->count - cuda_work_fetch.sim_nused;
|
||||
if (cuda_work_fetch.nidle_now < 0) cuda_work_fetch.nidle_now = 0;
|
||||
}
|
||||
if (coproc_ati) {
|
||||
ati_work_fetch.nidle_now = coproc_ati->count - ati_work_fetch.sim_nused;
|
||||
if (ati_work_fetch.nidle_now < 0) ati_work_fetch.nidle_now = 0;
|
||||
}
|
||||
|
||||
work_fetch.compute_shares();
|
||||
|
||||
// Simulation loop. Keep going until all work done
|
||||
//
|
||||
double buf_end = now + work_buf_total();
|
||||
double sim_now = now;
|
||||
while (sim_status.active.size()) {
|
||||
|
||||
// compute finish times and see which result finishes first
|
||||
//
|
||||
rpbest = NULL;
|
||||
for (i=0; i<sim_status.active.size(); i++) {
|
||||
rp = sim_status.active[i];
|
||||
set_rrsim_flops(rp);
|
||||
//rp->rrsim_finish_delay = rp->avp->temp_dcf*rp->rrsim_flops_left/rp->rrsim_flops;
|
||||
rp->rrsim_finish_delay = rp->rrsim_flops_left/rp->rrsim_flops;
|
||||
if (!rpbest || rp->rrsim_finish_delay < rpbest->rrsim_finish_delay) {
|
||||
rpbest = rp;
|
||||
}
|
||||
}
|
||||
|
||||
pbest = rpbest->project;
|
||||
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(pbest, MSG_INFO,
|
||||
"[rr_sim] %.2f: %s finishes after %.2f (%.2fG/%.2fG)",
|
||||
sim_now - now,
|
||||
rpbest->name, rpbest->rrsim_finish_delay,
|
||||
rpbest->rrsim_flops_left/1e9, rpbest->rrsim_flops/1e9
|
||||
);
|
||||
}
|
||||
|
||||
// "rpbest" is first result to finish. Does it miss its deadline?
|
||||
//
|
||||
double diff = (sim_now + rpbest->rrsim_finish_delay) - rpbest->computation_deadline();
|
||||
if (diff > 0) {
|
||||
ACTIVE_TASK* atp = lookup_active_task_by_result(rpbest);
|
||||
if (atp && atp->procinfo.working_set_size_smoothed > ar) {
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(pbest, MSG_INFO,
|
||||
"[rr_sim] %s misses deadline but too large to run",
|
||||
rpbest->name
|
||||
);
|
||||
}
|
||||
} else {
|
||||
rpbest->rr_sim_misses_deadline = true;
|
||||
if (rpbest->uses_cuda()) {
|
||||
pbest->cuda_pwf.deadlines_missed++;
|
||||
cuda_work_fetch.deadline_missed_instances += rpbest->avp->ncudas;
|
||||
} else if (rpbest->uses_ati()) {
|
||||
pbest->ati_pwf.deadlines_missed++;
|
||||
ati_work_fetch.deadline_missed_instances += rpbest->avp->natis;
|
||||
} else {
|
||||
pbest->cpu_pwf.deadlines_missed++;
|
||||
cpu_work_fetch.deadline_missed_instances += rpbest->avp->avg_ncpus;
|
||||
}
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(pbest, MSG_INFO,
|
||||
"[rr_sim] %s misses deadline by %.2f",
|
||||
rpbest->name, diff
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// update saturated time
|
||||
//
|
||||
double end_time = sim_now + rpbest->rrsim_finish_delay;
|
||||
double x = end_time - gstate.now;
|
||||
cpu_work_fetch.update_saturated_time(x);
|
||||
if (coproc_cuda) {
|
||||
cuda_work_fetch.update_saturated_time(x);
|
||||
}
|
||||
if (coproc_ati) {
|
||||
ati_work_fetch.update_saturated_time(x);
|
||||
}
|
||||
|
||||
// update busy time
|
||||
//
|
||||
if (rpbest->rr_sim_misses_deadline) {
|
||||
double dur = rpbest->estimated_time_remaining(false) / gstate.overall_cpu_frac();
|
||||
cpu_work_fetch.update_busy_time(dur, rpbest->avp->avg_ncpus);
|
||||
if (rpbest->uses_cuda()) {
|
||||
cuda_work_fetch.update_busy_time(dur, rpbest->avp->ncudas);
|
||||
}
|
||||
if (rpbest->uses_ati()) {
|
||||
ati_work_fetch.update_busy_time(dur, rpbest->avp->natis);
|
||||
}
|
||||
}
|
||||
|
||||
// increment resource shortfalls
|
||||
//
|
||||
if (sim_now < buf_end) {
|
||||
if (end_time > buf_end) end_time = buf_end;
|
||||
double d_time = end_time - sim_now;
|
||||
|
||||
cpu_work_fetch.accumulate_shortfall(d_time);
|
||||
|
||||
if (coproc_cuda) {
|
||||
cuda_work_fetch.accumulate_shortfall(d_time);
|
||||
}
|
||||
if (coproc_ati) {
|
||||
ati_work_fetch.accumulate_shortfall(d_time);
|
||||
}
|
||||
}
|
||||
|
||||
sim_status.remove_active(rpbest);
|
||||
pbest->rr_sim_status.remove_active(rpbest);
|
||||
|
||||
sim_now += rpbest->rrsim_finish_delay;
|
||||
|
||||
// start new jobs; may need to start more than one
|
||||
// if this job used multiple resource instances
|
||||
//
|
||||
if (rpbest->uses_cuda()) {
|
||||
while (1) {
|
||||
if (cuda_work_fetch.sim_nused >= coproc_cuda->count) break;
|
||||
if (!cuda_work_fetch.pending.size()) break;
|
||||
RESULT* rp = cuda_work_fetch.pending[0];
|
||||
cuda_work_fetch.pending.erase(cuda_work_fetch.pending.begin());
|
||||
sim_status.activate(rp, sim_now-now);
|
||||
pbest->rr_sim_status.activate(rp);
|
||||
}
|
||||
} else if (rpbest->uses_ati()) {
|
||||
while (1) {
|
||||
if (ati_work_fetch.sim_nused >= coproc_ati->count) break;
|
||||
if (!ati_work_fetch.pending.size()) break;
|
||||
RESULT* rp = ati_work_fetch.pending[0];
|
||||
ati_work_fetch.pending.erase(ati_work_fetch.pending.begin());
|
||||
sim_status.activate(rp, sim_now-now);
|
||||
pbest->rr_sim_status.activate(rp);
|
||||
}
|
||||
} else {
|
||||
while (1) {
|
||||
if (pbest->cpu_pwf.sim_nused >= ncpus) break;
|
||||
RESULT* rp = pbest->rr_sim_status.get_pending();
|
||||
if (!rp) break;
|
||||
sim_status.activate(rp, sim_now-now);
|
||||
pbest->rr_sim_status.activate(rp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if simulation ends before end of buffer, take the tail into account
|
||||
//
|
||||
if (sim_now < buf_end) {
|
||||
double d_time = buf_end - sim_now;
|
||||
cpu_work_fetch.accumulate_shortfall(d_time);
|
||||
if (coproc_cuda) {
|
||||
cuda_work_fetch.accumulate_shortfall(d_time);
|
||||
}
|
||||
if (coproc_ati) {
|
||||
ati_work_fetch.accumulate_shortfall(d_time);
|
||||
}
|
||||
}
|
||||
}
|
||||
// This file is part of BOINC.
|
||||
// http://boinc.berkeley.edu
|
||||
// Copyright (C) 2008 University of California
|
||||
//
|
||||
// BOINC is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU Lesser General Public License
|
||||
// as published by the Free Software Foundation,
|
||||
// either version 3 of the License, or (at your option) any later version.
|
||||
//
|
||||
// BOINC is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
// See the GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
// Simulate the processing of the current workload
|
||||
// (include jobs that are downloading)
|
||||
// with weighted round-robin (WRR) scheduling.
|
||||
//
|
||||
// For efficiency, we simulate an approximation of WRR.
|
||||
// We don't model time-slicing.
|
||||
// Instead we use a continuous model where, at a given point,
|
||||
// each project has a set of running jobs that uses at most all CPUs.
|
||||
// These jobs are assumed to run at a rate proportionate to their avg_ncpus,
|
||||
// and each project gets total CPU proportionate to its RRS.
|
||||
//
|
||||
// For coprocessors, we saturate the resource;
|
||||
// i.e. with 2 GPUs, we'd let a 1-GPU app and a 2-GPU app run together.
|
||||
// Otherwise, there'd be the possibility of computing
|
||||
// a nonzero shortfall inappropriately.
|
||||
//
|
||||
// Outputs are changes to global state:
|
||||
// - deadline misses (per-project count, per-result flag)
|
||||
// Deadline misses are not counted for tasks
|
||||
// that are too large to run in RAM right now.
|
||||
// - resource shortfalls (per-project and total)
|
||||
// - counts of resources idle now
|
||||
//
|
||||
|
||||
#include "cpp.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "boinc_win.h"
|
||||
#else
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "client_state.h"
|
||||
#include "coproc.h"
|
||||
#include "client_msgs.h"
|
||||
|
||||
inline void rsc_string(RESULT* rp, char* buf) {
|
||||
APP_VERSION* avp = rp->avp;
|
||||
if (avp->ncudas) {
|
||||
sprintf(buf, "%.2f CPU + %.2f NV", avp->avg_ncpus, avp->ncudas);
|
||||
} else if (avp->natis) {
|
||||
sprintf(buf, "%.2f CPU + %.2f ATI", avp->avg_ncpus, avp->natis);
|
||||
} else {
|
||||
sprintf(buf, "%.2f CPU", avp->avg_ncpus);
|
||||
}
|
||||
}
|
||||
|
||||
// this is here (rather than rr_sim.h) because its inline functions
|
||||
// refer to RESULT
|
||||
//
|
||||
struct RR_SIM_STATUS {
|
||||
std::vector<RESULT*> active;
|
||||
double active_ncpus;
|
||||
double active_cudas;
|
||||
double active_atis;
|
||||
|
||||
inline void activate(RESULT* rp, double when) {
|
||||
PROJECT* p = rp->project;
|
||||
if (log_flags.rr_simulation) {
|
||||
char buf[256];
|
||||
rsc_string(rp, buf);
|
||||
msg_printf(p, MSG_INFO,
|
||||
"[rr_sim] %.2f: starting %s (%s)",
|
||||
when, rp->name, buf
|
||||
);
|
||||
}
|
||||
active.push_back(rp);
|
||||
cpu_work_fetch.sim_nused += rp->avp->avg_ncpus;
|
||||
cuda_work_fetch.sim_nused += rp->avp->ncudas;
|
||||
ati_work_fetch.sim_nused += rp->avp->natis;
|
||||
}
|
||||
// remove *rpbest from active set,
|
||||
// and adjust FLOPS left for other results
|
||||
//
|
||||
inline void remove_active(RESULT* rpbest) {
|
||||
vector<RESULT*>::iterator it = active.begin();
|
||||
while (it != active.end()) {
|
||||
RESULT* rp = *it;
|
||||
if (rp == rpbest) {
|
||||
it = active.erase(it);
|
||||
} else {
|
||||
rp->rrsim_flops_left -= rp->rrsim_flops*rpbest->rrsim_finish_delay;
|
||||
|
||||
// can be slightly less than 0 due to roundoff
|
||||
//
|
||||
if (rp->rrsim_flops_left < -1) {
|
||||
msg_printf(rp->project, MSG_INTERNAL_ERROR,
|
||||
"%s: negative FLOPs left %f", rp->name, rp->rrsim_flops_left
|
||||
);
|
||||
}
|
||||
if (rp->rrsim_flops_left < 0) {
|
||||
rp->rrsim_flops_left = 0;
|
||||
}
|
||||
it++;
|
||||
}
|
||||
}
|
||||
cpu_work_fetch.sim_nused -= rpbest->avp->avg_ncpus;
|
||||
cuda_work_fetch.sim_nused -= rpbest->avp->ncudas;
|
||||
ati_work_fetch.sim_nused -= rpbest->avp->natis;
|
||||
}
|
||||
|
||||
RR_SIM_STATUS() {
|
||||
active_ncpus = 0;
|
||||
active_cudas = 0;
|
||||
active_atis = 0;
|
||||
}
|
||||
~RR_SIM_STATUS() {}
|
||||
};
|
||||
|
||||
void RR_SIM_PROJECT_STATUS::activate(RESULT* rp) {
|
||||
active.push_back(rp);
|
||||
rp->project->cpu_pwf.sim_nused += rp->avp->avg_ncpus;
|
||||
rp->project->cuda_pwf.sim_nused += rp->avp->ncudas;
|
||||
rp->project->ati_pwf.sim_nused += rp->avp->natis;
|
||||
}
|
||||
|
||||
void RR_SIM_PROJECT_STATUS::remove_active(RESULT* rp) {
|
||||
std::vector<RESULT*>::iterator it = active.begin();
|
||||
while (it != active.end()) {
|
||||
if (*it == rp) {
|
||||
it = active.erase(it);
|
||||
} else {
|
||||
it++;
|
||||
}
|
||||
}
|
||||
rp->project->cpu_pwf.sim_nused -= rp->avp->avg_ncpus;
|
||||
rp->project->cuda_pwf.sim_nused -= rp->avp->ncudas;
|
||||
rp->project->ati_pwf.sim_nused -= rp->avp->natis;
|
||||
}
|
||||
|
||||
// estimate the rate (FLOPS) that this job will get long-term
|
||||
// with weighted round-robin scheduling
|
||||
//
|
||||
void set_rrsim_flops(RESULT* rp) {
|
||||
// For coproc jobs, use app version estimate
|
||||
//
|
||||
if (rp->uses_coprocs()) {
|
||||
rp->rrsim_flops = rp->avp->flops * gstate.overall_cpu_frac();
|
||||
return;
|
||||
}
|
||||
PROJECT* p = rp->project;
|
||||
|
||||
// For CPU jobs, estimate how many CPU seconds per second this job would get
|
||||
// running with other jobs of this project, ignoring other factors
|
||||
//
|
||||
double x = 1;
|
||||
if (p->cpu_pwf.sim_nused > gstate.ncpus) {
|
||||
x = gstate.ncpus/p->cpu_pwf.sim_nused;
|
||||
}
|
||||
double r1 = x*rp->avp->avg_ncpus;
|
||||
|
||||
// if the project's total CPU usage is more than its share, scale
|
||||
//
|
||||
double share_cpus = p->cpu_pwf.runnable_share*gstate.ncpus;
|
||||
if (!share_cpus) share_cpus = gstate.ncpus;
|
||||
// deal with projects w/ resource share = 0
|
||||
double r2 = r1;
|
||||
if (p->cpu_pwf.sim_nused > share_cpus) {
|
||||
r2 *= (share_cpus / p->cpu_pwf.sim_nused);
|
||||
}
|
||||
|
||||
// scale by overall CPU availability
|
||||
//
|
||||
double r3 = r2 * gstate.overall_cpu_frac();
|
||||
|
||||
rp->rrsim_flops = r3 * rp->avp->flops;
|
||||
#if 0
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(p, MSG_INFO,
|
||||
"[rr_sim] set_rrsim_flops: %.2fG (r1 %.4f r2 %.4f r3 %.4f)",
|
||||
rp->rrsim_flops/1e9, r1, r2, r3
|
||||
);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void CLIENT_STATE::print_deadline_misses() {
|
||||
unsigned int i;
|
||||
RESULT* rp;
|
||||
PROJECT* p;
|
||||
for (i=0; i<results.size(); i++){
|
||||
rp = results[i];
|
||||
if (rp->rr_sim_misses_deadline) {
|
||||
msg_printf(rp->project, MSG_INFO,
|
||||
"[cpu_sched] Result %s projected to miss deadline.",
|
||||
rp->name
|
||||
);
|
||||
}
|
||||
}
|
||||
for (i=0; i<projects.size(); i++) {
|
||||
p = projects[i];
|
||||
if (p->cpu_pwf.deadlines_missed) {
|
||||
msg_printf(p, MSG_INFO,
|
||||
"[cpu_sched] Project has %d projected CPU deadline misses",
|
||||
p->cpu_pwf.deadlines_missed
|
||||
);
|
||||
}
|
||||
if (p->cuda_pwf.deadlines_missed) {
|
||||
msg_printf(p, MSG_INFO,
|
||||
"[cpu_sched] Project has %d projected NVIDIA GPU deadline misses",
|
||||
p->cuda_pwf.deadlines_missed
|
||||
);
|
||||
}
|
||||
if (p->ati_pwf.deadlines_missed) {
|
||||
msg_printf(p, MSG_INFO,
|
||||
"[cpu_sched] Project has %d projected ATI GPU deadline misses",
|
||||
p->ati_pwf.deadlines_missed
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
// compute a per-app-version "temporary DCF" based on the elapsed time
|
||||
// and fraction done of running jobs
|
||||
//
|
||||
void compute_temp_dcf() {
|
||||
unsigned int i;
|
||||
for (i=0; i<gstate.app_versions.size(); i++) {
|
||||
gstate.app_versions[i]->temp_dcf = 1;
|
||||
}
|
||||
for (i=0; i<gstate.active_tasks.active_tasks.size(); i++) {
|
||||
ACTIVE_TASK* atp = gstate.active_tasks.active_tasks[i];
|
||||
double x = atp->est_dur(false) / atp->result->estimated_duration(false);
|
||||
APP_VERSION* avp = atp->result->avp;
|
||||
if (x < avp->temp_dcf) {
|
||||
avp->temp_dcf = x;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void CLIENT_STATE::rr_simulation() {
|
||||
PROJECT* p, *pbest;
|
||||
RESULT* rp, *rpbest;
|
||||
RR_SIM_STATUS sim_status;
|
||||
unsigned int i;
|
||||
|
||||
double ar = available_ram();
|
||||
|
||||
work_fetch.rr_init();
|
||||
//compute_temp_dcf();
|
||||
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(0, MSG_INFO,
|
||||
"[rr_sim] rr_sim start: work_buf_total %.2f on_frac %.3f active_frac %.3f",
|
||||
work_buf_total(), time_stats.on_frac, time_stats.active_frac
|
||||
);
|
||||
}
|
||||
|
||||
for (i=0; i<projects.size(); i++) {
|
||||
p = projects[i];
|
||||
if (p->non_cpu_intensive) continue;
|
||||
p->rr_sim_status.clear();
|
||||
}
|
||||
|
||||
// Decide what jobs to include in the simulation,
|
||||
// and pick the ones that are initially running.
|
||||
// NOTE: "results" is sorted by increasing arrival time
|
||||
//
|
||||
for (i=0; i<results.size(); i++) {
|
||||
rp = results[i];
|
||||
rp->rr_sim_misses_deadline = false;
|
||||
if (!rp->nearly_runnable()) continue;
|
||||
if (rp->some_download_stalled()) continue;
|
||||
if (rp->project->non_cpu_intensive) continue;
|
||||
rp->rrsim_flops_left = rp->estimated_flops_remaining();
|
||||
|
||||
//if (rp->rrsim_flops_left <= 0) continue;
|
||||
// job may have fraction_done=1 but not be done;
|
||||
// if it's past its deadline, we need to mark it as such
|
||||
|
||||
p = rp->project;
|
||||
p->pwf.has_runnable_jobs = true;
|
||||
p->cpu_pwf.nused_total += rp->avp->avg_ncpus;
|
||||
if (rp->uses_cuda() && host_info.have_cuda()) {
|
||||
p->cuda_pwf.nused_total += rp->avp->ncudas;
|
||||
p->cuda_pwf.has_runnable_jobs = true;
|
||||
if (cuda_work_fetch.sim_nused < host_info.coprocs.cuda.count) {
|
||||
sim_status.activate(rp, 0);
|
||||
p->rr_sim_status.activate(rp);
|
||||
} else {
|
||||
cuda_work_fetch.pending.push_back(rp);
|
||||
}
|
||||
} else if (rp->uses_ati() && host_info.have_ati()) {
|
||||
p->ati_pwf.nused_total += rp->avp->natis;
|
||||
p->ati_pwf.has_runnable_jobs = true;
|
||||
if (ati_work_fetch.sim_nused < host_info.coprocs.ati.count) {
|
||||
sim_status.activate(rp, 0);
|
||||
p->rr_sim_status.activate(rp);
|
||||
} else {
|
||||
ati_work_fetch.pending.push_back(rp);
|
||||
}
|
||||
} else {
|
||||
p->cpu_pwf.has_runnable_jobs = true;
|
||||
if (p->cpu_pwf.sim_nused < ncpus) {
|
||||
sim_status.activate(rp, 0);
|
||||
p->rr_sim_status.activate(rp);
|
||||
} else {
|
||||
p->rr_sim_status.add_pending(rp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// note the number of idle instances
|
||||
//
|
||||
cpu_work_fetch.nidle_now = ncpus - cpu_work_fetch.sim_nused;
|
||||
if (cpu_work_fetch.nidle_now < 0) cpu_work_fetch.nidle_now = 0;
|
||||
if (host_info.have_cuda()) {
|
||||
cuda_work_fetch.nidle_now = host_info.coprocs.cuda.count - cuda_work_fetch.sim_nused;
|
||||
if (cuda_work_fetch.nidle_now < 0) cuda_work_fetch.nidle_now = 0;
|
||||
}
|
||||
if (host_info.have_ati()) {
|
||||
ati_work_fetch.nidle_now = host_info.coprocs.ati.count - ati_work_fetch.sim_nused;
|
||||
if (ati_work_fetch.nidle_now < 0) ati_work_fetch.nidle_now = 0;
|
||||
}
|
||||
|
||||
work_fetch.compute_shares();
|
||||
|
||||
// Simulation loop. Keep going until all work done
|
||||
//
|
||||
double buf_end = now + work_buf_total();
|
||||
double sim_now = now;
|
||||
while (sim_status.active.size()) {
|
||||
|
||||
// compute finish times and see which result finishes first
|
||||
//
|
||||
rpbest = NULL;
|
||||
for (i=0; i<sim_status.active.size(); i++) {
|
||||
rp = sim_status.active[i];
|
||||
set_rrsim_flops(rp);
|
||||
//rp->rrsim_finish_delay = rp->avp->temp_dcf*rp->rrsim_flops_left/rp->rrsim_flops;
|
||||
rp->rrsim_finish_delay = rp->rrsim_flops_left/rp->rrsim_flops;
|
||||
if (!rpbest || rp->rrsim_finish_delay < rpbest->rrsim_finish_delay) {
|
||||
rpbest = rp;
|
||||
}
|
||||
}
|
||||
|
||||
pbest = rpbest->project;
|
||||
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(pbest, MSG_INFO,
|
||||
"[rr_sim] %.2f: %s finishes after %.2f (%.2fG/%.2fG)",
|
||||
sim_now - now,
|
||||
rpbest->name, rpbest->rrsim_finish_delay,
|
||||
rpbest->rrsim_flops_left/1e9, rpbest->rrsim_flops/1e9
|
||||
);
|
||||
}
|
||||
|
||||
// "rpbest" is first result to finish. Does it miss its deadline?
|
||||
//
|
||||
double diff = (sim_now + rpbest->rrsim_finish_delay) - rpbest->computation_deadline();
|
||||
if (diff > 0) {
|
||||
ACTIVE_TASK* atp = lookup_active_task_by_result(rpbest);
|
||||
if (atp && atp->procinfo.working_set_size_smoothed > ar) {
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(pbest, MSG_INFO,
|
||||
"[rr_sim] %s misses deadline but too large to run",
|
||||
rpbest->name
|
||||
);
|
||||
}
|
||||
} else {
|
||||
rpbest->rr_sim_misses_deadline = true;
|
||||
if (rpbest->uses_cuda()) {
|
||||
pbest->cuda_pwf.deadlines_missed++;
|
||||
cuda_work_fetch.deadline_missed_instances += rpbest->avp->ncudas;
|
||||
} else if (rpbest->uses_ati()) {
|
||||
pbest->ati_pwf.deadlines_missed++;
|
||||
ati_work_fetch.deadline_missed_instances += rpbest->avp->natis;
|
||||
} else {
|
||||
pbest->cpu_pwf.deadlines_missed++;
|
||||
cpu_work_fetch.deadline_missed_instances += rpbest->avp->avg_ncpus;
|
||||
}
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(pbest, MSG_INFO,
|
||||
"[rr_sim] %s misses deadline by %.2f",
|
||||
rpbest->name, diff
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// update saturated time
|
||||
//
|
||||
double end_time = sim_now + rpbest->rrsim_finish_delay;
|
||||
double x = end_time - gstate.now;
|
||||
cpu_work_fetch.update_saturated_time(x);
|
||||
if (host_info.have_cuda()) {
|
||||
cuda_work_fetch.update_saturated_time(x);
|
||||
}
|
||||
if (host_info.have_ati()) {
|
||||
ati_work_fetch.update_saturated_time(x);
|
||||
}
|
||||
|
||||
// update busy time
|
||||
//
|
||||
if (rpbest->rr_sim_misses_deadline) {
|
||||
double dur = rpbest->estimated_time_remaining(false) / gstate.overall_cpu_frac();
|
||||
cpu_work_fetch.update_busy_time(dur, rpbest->avp->avg_ncpus);
|
||||
if (rpbest->uses_cuda()) {
|
||||
cuda_work_fetch.update_busy_time(dur, rpbest->avp->ncudas);
|
||||
}
|
||||
if (rpbest->uses_ati()) {
|
||||
ati_work_fetch.update_busy_time(dur, rpbest->avp->natis);
|
||||
}
|
||||
}
|
||||
|
||||
// increment resource shortfalls
|
||||
//
|
||||
if (sim_now < buf_end) {
|
||||
if (end_time > buf_end) end_time = buf_end;
|
||||
double d_time = end_time - sim_now;
|
||||
|
||||
cpu_work_fetch.accumulate_shortfall(d_time);
|
||||
|
||||
if (host_info.have_cuda()) {
|
||||
cuda_work_fetch.accumulate_shortfall(d_time);
|
||||
}
|
||||
if (host_info.have_ati()) {
|
||||
ati_work_fetch.accumulate_shortfall(d_time);
|
||||
}
|
||||
}
|
||||
|
||||
sim_status.remove_active(rpbest);
|
||||
pbest->rr_sim_status.remove_active(rpbest);
|
||||
|
||||
sim_now += rpbest->rrsim_finish_delay;
|
||||
|
||||
// start new jobs; may need to start more than one
|
||||
// if this job used multiple resource instances
|
||||
//
|
||||
if (rpbest->uses_cuda()) {
|
||||
while (1) {
|
||||
if (cuda_work_fetch.sim_nused >= host_info.coprocs.cuda.count) break;
|
||||
if (!cuda_work_fetch.pending.size()) break;
|
||||
RESULT* rp = cuda_work_fetch.pending[0];
|
||||
cuda_work_fetch.pending.erase(cuda_work_fetch.pending.begin());
|
||||
sim_status.activate(rp, sim_now-now);
|
||||
pbest->rr_sim_status.activate(rp);
|
||||
}
|
||||
} else if (rpbest->uses_ati()) {
|
||||
while (1) {
|
||||
if (ati_work_fetch.sim_nused >= host_info.coprocs.ati.count) break;
|
||||
if (!ati_work_fetch.pending.size()) break;
|
||||
RESULT* rp = ati_work_fetch.pending[0];
|
||||
ati_work_fetch.pending.erase(ati_work_fetch.pending.begin());
|
||||
sim_status.activate(rp, sim_now-now);
|
||||
pbest->rr_sim_status.activate(rp);
|
||||
}
|
||||
} else {
|
||||
while (1) {
|
||||
if (pbest->cpu_pwf.sim_nused >= ncpus) break;
|
||||
RESULT* rp = pbest->rr_sim_status.get_pending();
|
||||
if (!rp) break;
|
||||
sim_status.activate(rp, sim_now-now);
|
||||
pbest->rr_sim_status.activate(rp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if simulation ends before end of buffer, take the tail into account
|
||||
//
|
||||
if (sim_now < buf_end) {
|
||||
double d_time = buf_end - sim_now;
|
||||
cpu_work_fetch.accumulate_shortfall(d_time);
|
||||
if (host_info.have_cuda()) {
|
||||
cuda_work_fetch.accumulate_shortfall(d_time);
|
||||
}
|
||||
if (host_info.have_ati()) {
|
||||
ati_work_fetch.accumulate_shortfall(d_time);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -219,7 +219,7 @@ int SCHEDULER_OP::start_rpc(PROJECT* p) {
|
|||
);
|
||||
double gpu_req = cuda_work_fetch.req_secs + ati_work_fetch.req_secs;
|
||||
if (cpu_work_fetch.req_secs || gpu_req) {
|
||||
if (coproc_cuda||coproc_ati) {
|
||||
if (gstate.host_info.have_cuda()||gstate.host_info.have_ati()) {
|
||||
if (cpu_work_fetch.req_secs && gpu_req) {
|
||||
sprintf(buf, " for CPU and GPU");
|
||||
} else if (cpu_work_fetch.req_secs) {
|
||||
|
@ -254,13 +254,13 @@ int SCHEDULER_OP::start_rpc(PROJECT* p) {
|
|||
"[sched_op] CPU work request: %.2f seconds; %.2f CPUs",
|
||||
cpu_work_fetch.req_secs, cpu_work_fetch.req_instances
|
||||
);
|
||||
if (coproc_cuda) {
|
||||
if (gstate.host_info.have_cuda()) {
|
||||
msg_printf(p, MSG_INFO,
|
||||
"[sched_op] NVIDIA GPU work request: %.2f seconds; %.2f GPUs",
|
||||
cuda_work_fetch.req_secs, cuda_work_fetch.req_instances
|
||||
);
|
||||
}
|
||||
if (coproc_ati) {
|
||||
if (gstate.host_info.have_ati()) {
|
||||
msg_printf(p, MSG_INFO,
|
||||
"[sched_op] ATI GPU work request: %.2f seconds; %.2f GPUs",
|
||||
ati_work_fetch.req_secs, ati_work_fetch.req_instances
|
||||
|
|
|
@ -295,8 +295,6 @@ public:
|
|||
};
|
||||
|
||||
extern CLIENT_STATE gstate;
|
||||
extern COPROC_CUDA* coproc_cuda;
|
||||
extern COPROC_ATI* coproc_ati;
|
||||
extern NET_STATUS net_status;
|
||||
extern FILE* logfile;
|
||||
extern bool user_active;
|
||||
|
|
|
@ -662,10 +662,10 @@ void WORK_FETCH::rr_init() {
|
|||
p->pwf.can_fetch_work = p->pwf.compute_can_fetch_work(p);
|
||||
p->pwf.has_runnable_jobs = false;
|
||||
p->cpu_pwf.rr_init(p, RSC_TYPE_CPU);
|
||||
if (coproc_cuda) {
|
||||
if (gstate.host_info.have_cuda()) {
|
||||
p->cuda_pwf.rr_init(p, RSC_TYPE_CUDA);
|
||||
}
|
||||
if (coproc_ati) {
|
||||
if (gstate.host_info.have_ati()) {
|
||||
p->ati_pwf.rr_init(p, RSC_TYPE_ATI);
|
||||
}
|
||||
}
|
||||
|
@ -673,10 +673,10 @@ void WORK_FETCH::rr_init() {
|
|||
|
||||
void WORK_FETCH::set_all_requests(PROJECT* p) {
|
||||
cpu_work_fetch.set_request(p, false);
|
||||
if (coproc_cuda && gpus_usable) {
|
||||
if (gstate.host_info.have_cuda() && gpus_usable) {
|
||||
cuda_work_fetch.set_request(p, false);
|
||||
}
|
||||
if (coproc_ati && gpus_usable) {
|
||||
if (gstate.host_info.have_ati() && gpus_usable) {
|
||||
ati_work_fetch.set_request(p, false);
|
||||
}
|
||||
}
|
||||
|
@ -712,13 +712,13 @@ void WORK_FETCH::set_overall_debts() {
|
|||
p = gstate.projects[i];
|
||||
double queue_debt = p->cpu_pwf.queue_est/gstate.ncpus;
|
||||
p->pwf.overall_debt = p->cpu_pwf.long_term_debt - queue_debt;
|
||||
if (coproc_cuda) {
|
||||
if (gstate.host_info.have_cuda()) {
|
||||
p->pwf.overall_debt += cuda_work_fetch.relative_speed*
|
||||
(p->cuda_pwf.long_term_debt - p->cuda_pwf.queue_est/coproc_cuda->count);
|
||||
(p->cuda_pwf.long_term_debt - p->cuda_pwf.queue_est/gstate.host_info.coprocs.cuda.count);
|
||||
}
|
||||
if (coproc_ati) {
|
||||
if (gstate.host_info.have_ati()) {
|
||||
p->pwf.overall_debt += ati_work_fetch.relative_speed*
|
||||
(p->ati_pwf.long_term_debt - p->ati_pwf.queue_est/coproc_ati->count);
|
||||
(p->ati_pwf.long_term_debt - p->ati_pwf.queue_est/gstate.host_info.coprocs.ati.count);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -738,10 +738,10 @@ void WORK_FETCH::print_state() {
|
|||
gstate.work_buf_min(), gstate.work_buf_additional()
|
||||
);
|
||||
cpu_work_fetch.print_state("CPU");
|
||||
if (coproc_cuda) {
|
||||
if (gstate.host_info.have_cuda()) {
|
||||
cuda_work_fetch.print_state("NVIDIA GPU");
|
||||
}
|
||||
if (coproc_ati) {
|
||||
if (gstate.host_info.have_ati()) {
|
||||
ati_work_fetch.print_state("ATI GPU");
|
||||
}
|
||||
for (unsigned int i=0; i<gstate.projects.size(); i++) {
|
||||
|
@ -833,8 +833,8 @@ PROJECT* WORK_FETCH::choose_project() {
|
|||
gstate.rr_simulation();
|
||||
set_overall_debts();
|
||||
|
||||
bool cuda_usable = coproc_cuda && gpus_usable;
|
||||
bool ati_usable = coproc_ati && gpus_usable;
|
||||
bool cuda_usable = gstate.host_info.have_cuda() && gpus_usable;
|
||||
bool ati_usable = gstate.host_info.have_ati() && gpus_usable;
|
||||
|
||||
if (cuda_usable) {
|
||||
p = cuda_work_fetch.choose_project(FETCH_IF_IDLE_INSTANCE);
|
||||
|
@ -894,12 +894,12 @@ void WORK_FETCH::accumulate_inst_sec(ACTIVE_TASK* atp, double dt) {
|
|||
double x = dt*avp->avg_ncpus;
|
||||
p->cpu_pwf.secs_this_debt_interval += x;
|
||||
cpu_work_fetch.secs_this_debt_interval += x;
|
||||
if (coproc_cuda) {
|
||||
if (gstate.host_info.have_cuda()) {
|
||||
x = dt*avp->ncudas;
|
||||
p->cuda_pwf.secs_this_debt_interval += x;
|
||||
cuda_work_fetch.secs_this_debt_interval += x;
|
||||
}
|
||||
if (coproc_ati) {
|
||||
if (gstate.host_info.have_ati()) {
|
||||
x = dt*avp->natis;
|
||||
p->ati_pwf.secs_this_debt_interval += x;
|
||||
ati_work_fetch.secs_this_debt_interval += x;
|
||||
|
@ -927,10 +927,10 @@ void WORK_FETCH::compute_shares() {
|
|||
if (p->cpu_pwf.may_have_work) {
|
||||
cpu_work_fetch.total_fetchable_share += p->resource_share;
|
||||
}
|
||||
if (coproc_cuda && p->cuda_pwf.may_have_work) {
|
||||
if (gstate.host_info.have_cuda() && p->cuda_pwf.may_have_work) {
|
||||
cuda_work_fetch.total_fetchable_share += p->resource_share;
|
||||
}
|
||||
if (coproc_ati && p->ati_pwf.may_have_work) {
|
||||
if (gstate.host_info.have_ati() && p->ati_pwf.may_have_work) {
|
||||
ati_work_fetch.total_fetchable_share += p->resource_share;
|
||||
}
|
||||
}
|
||||
|
@ -950,10 +950,10 @@ void WORK_FETCH::compute_shares() {
|
|||
if (p->cpu_pwf.may_have_work) {
|
||||
p->cpu_pwf.fetchable_share = cpu_work_fetch.total_fetchable_share?p->resource_share/cpu_work_fetch.total_fetchable_share:1;
|
||||
}
|
||||
if (coproc_cuda && p->cuda_pwf.may_have_work) {
|
||||
if (gstate.host_info.have_cuda() && p->cuda_pwf.may_have_work) {
|
||||
p->cuda_pwf.fetchable_share = cuda_work_fetch.total_fetchable_share?p->resource_share/cuda_work_fetch.total_fetchable_share:1;
|
||||
}
|
||||
if (coproc_ati && p->ati_pwf.may_have_work) {
|
||||
if (gstate.host_info.have_ati() && p->ati_pwf.may_have_work) {
|
||||
p->ati_pwf.fetchable_share = ati_work_fetch.total_fetchable_share?p->resource_share/ati_work_fetch.total_fetchable_share:1;
|
||||
}
|
||||
}
|
||||
|
@ -996,13 +996,13 @@ void WORK_FETCH::write_request(FILE* f, PROJECT* p) {
|
|||
work_req,
|
||||
cpu_work_fetch.req_secs, cpu_work_fetch.req_instances
|
||||
);
|
||||
if (coproc_cuda) {
|
||||
if (gstate.host_info.have_cuda()) {
|
||||
sprintf(buf2, " NVIDIA GPU (%.2f sec, %.2f)",
|
||||
cuda_work_fetch.req_secs, cuda_work_fetch.req_instances
|
||||
);
|
||||
strcat(buf, buf2);
|
||||
}
|
||||
if (coproc_ati) {
|
||||
if (gstate.host_info.have_ati()) {
|
||||
sprintf(buf2, " ATI GPU (%.2f sec, %.2f)",
|
||||
ati_work_fetch.req_secs, ati_work_fetch.req_instances
|
||||
);
|
||||
|
@ -1041,10 +1041,10 @@ void WORK_FETCH::handle_reply(
|
|||
if (cpu_work_fetch.req_secs && !srp->cpu_backoff) {
|
||||
p->cpu_pwf.backoff(p, "CPU");
|
||||
}
|
||||
if (coproc_cuda && coproc_cuda->req_secs && !srp->cuda_backoff) {
|
||||
if (gstate.host_info.have_cuda() && gstate.host_info.coprocs.cuda.req_secs && !srp->cuda_backoff) {
|
||||
p->cuda_pwf.backoff(p, "NVIDIA GPU");
|
||||
}
|
||||
if (coproc_ati && coproc_ati->req_secs && !srp->ati_backoff) {
|
||||
if (gstate.host_info.have_ati() && gstate.host_info.coprocs.ati.req_secs && !srp->ati_backoff) {
|
||||
p->ati_pwf.backoff(p, "ATI GPU");
|
||||
}
|
||||
}
|
||||
|
@ -1072,12 +1072,12 @@ void WORK_FETCH::set_initial_work_request() {
|
|||
cpu_work_fetch.req_secs = 1;
|
||||
cpu_work_fetch.req_instances = 0;
|
||||
cpu_work_fetch.busy_time_estimator.reset();
|
||||
if (coproc_cuda) {
|
||||
if (gstate.host_info.have_cuda()) {
|
||||
cuda_work_fetch.req_secs = 1;
|
||||
cuda_work_fetch.req_instances = 0;
|
||||
cuda_work_fetch.busy_time_estimator.reset();
|
||||
}
|
||||
if (coproc_ati) {
|
||||
if (gstate.host_info.have_ati()) {
|
||||
ati_work_fetch.req_secs = 1;
|
||||
ati_work_fetch.req_instances = 0;
|
||||
ati_work_fetch.busy_time_estimator.reset();
|
||||
|
@ -1092,17 +1092,17 @@ void WORK_FETCH::init() {
|
|||
|
||||
// use 20% as a rough estimate of GPU efficiency
|
||||
|
||||
if (coproc_cuda) {
|
||||
if (gstate.host_info.have_cuda()) {
|
||||
cuda_work_fetch.init(
|
||||
RSC_TYPE_CUDA, coproc_cuda->count,
|
||||
coproc_cuda->count*0.2*coproc_cuda->peak_flops()/cpu_flops
|
||||
RSC_TYPE_CUDA, gstate.host_info.coprocs.cuda.count,
|
||||
gstate.host_info.coprocs.cuda.count*0.2*gstate.host_info.coprocs.cuda.peak_flops()/cpu_flops
|
||||
);
|
||||
}
|
||||
if (coproc_ati) {
|
||||
if (gstate.host_info.have_ati()) {
|
||||
ati_work_fetch.init(
|
||||
RSC_TYPE_ATI,
|
||||
coproc_ati->count,
|
||||
coproc_ati->count*0.2*coproc_ati->peak_flops()/cpu_flops
|
||||
gstate.host_info.coprocs.ati.count,
|
||||
gstate.host_info.coprocs.ati.count*0.2*gstate.host_info.coprocs.ati.peak_flops()/cpu_flops
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -1149,11 +1149,11 @@ void CLIENT_STATE::compute_nuploading_results() {
|
|||
}
|
||||
}
|
||||
int n = gstate.ncpus;
|
||||
if (coproc_cuda && coproc_cuda->count > n) {
|
||||
n = coproc_cuda->count;
|
||||
if (gstate.host_info.have_cuda() && gstate.host_info.coprocs.cuda.count > n) {
|
||||
n = gstate.host_info.coprocs.cuda.count;
|
||||
}
|
||||
if (coproc_ati && coproc_ati->count > n) {
|
||||
n = coproc_ati->count;
|
||||
if (gstate.host_info.have_ati() && gstate.host_info.coprocs.ati.count > n) {
|
||||
n = gstate.host_info.coprocs.ati.count;
|
||||
}
|
||||
n *= 2;
|
||||
for (i=0; i<projects.size(); i++) {
|
||||
|
|
959
lib/coproc.cpp
959
lib/coproc.cpp
|
@ -1,485 +1,474 @@
|
|||
// This file is part of BOINC.
|
||||
// http://boinc.berkeley.edu
|
||||
// Copyright (C) 2007 University of California
|
||||
//
|
||||
// BOINC is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU Lesser General Public License
|
||||
// as published by the Free Software Foundation,
|
||||
// either version 3 of the License, or (at your option) any later version.
|
||||
//
|
||||
// BOINC is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
// See the GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#if defined(_WIN32) && !defined(__STDWX_H__)
|
||||
#include "boinc_win.h"
|
||||
#elif defined(_WIN32) && defined(__STDWX_H__)
|
||||
#include "stdwx.h"
|
||||
#else
|
||||
#ifdef _USING_FCGI_
|
||||
#include "boinc_fcgi.h"
|
||||
#else
|
||||
#include <cstdio>
|
||||
#endif
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "win_util.h"
|
||||
#endif
|
||||
|
||||
#include "error_numbers.h"
|
||||
#include "filesys.h"
|
||||
#include "parse.h"
|
||||
#include "str_util.h"
|
||||
|
||||
#include "coproc.h"
|
||||
|
||||
#ifndef _USING_FCGI_
|
||||
using std::perror;
|
||||
#endif
|
||||
|
||||
#ifndef _USING_FCGI_
|
||||
void COPROC::write_xml(MIOFILE& f) {
|
||||
f.printf(
|
||||
"<coproc>\n"
|
||||
" <type>%s</type>\n"
|
||||
" <count>%d</count>\n"
|
||||
"</coproc>\n",
|
||||
type, count
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
int COPROC_REQ::parse(MIOFILE& fin) {
|
||||
char buf[1024];
|
||||
strcpy(type, "");
|
||||
count = 0;
|
||||
while (fin.fgets(buf, sizeof(buf))) {
|
||||
if (match_tag(buf, "</coproc>")) {
|
||||
if (!strlen(type)) return ERR_XML_PARSE;
|
||||
return 0;
|
||||
}
|
||||
if (parse_str(buf, "<type>", type, sizeof(type))) continue;
|
||||
if (parse_double(buf, "<count>", count)) continue;
|
||||
}
|
||||
return ERR_XML_PARSE;
|
||||
}
|
||||
|
||||
int COPROC::parse(MIOFILE& fin) {
|
||||
char buf[1024];
|
||||
strcpy(type, "");
|
||||
count = 0;
|
||||
used = 0;
|
||||
req_secs = 0;
|
||||
estimated_delay = 0;
|
||||
req_instances = 0;
|
||||
while (fin.fgets(buf, sizeof(buf))) {
|
||||
if (match_tag(buf, "</coproc>")) {
|
||||
if (!strlen(type)) return ERR_XML_PARSE;
|
||||
return 0;
|
||||
}
|
||||
if (parse_str(buf, "<type>", type, sizeof(type))) continue;
|
||||
if (parse_int(buf, "<count>", count)) continue;
|
||||
if (parse_double(buf, "<req_secs>", req_secs)) continue;
|
||||
if (parse_double(buf, "<req_instances>", req_instances)) continue;
|
||||
if (parse_double(buf, "<estimated_delay>", estimated_delay)) continue;
|
||||
}
|
||||
return ERR_XML_PARSE;
|
||||
}
|
||||
|
||||
void COPROCS::summary_string(char* buf, int len) {
|
||||
char bigbuf[8192], buf2[1024];
|
||||
|
||||
strcpy(bigbuf, "");
|
||||
for (unsigned int i=0; i<coprocs.size(); i++) {
|
||||
COPROC* cp = coprocs[i];
|
||||
if (!strcmp(cp->type, "CUDA")) {
|
||||
COPROC_CUDA* cp2 = (COPROC_CUDA*) cp;
|
||||
int mem = (int)(cp2->prop.dtotalGlobalMem/MEGA);
|
||||
sprintf(buf2, "[CUDA|%s|%d|%dMB|%d]",
|
||||
cp2->prop.name, cp2->count, mem, cp2->display_driver_version
|
||||
);
|
||||
strcat(bigbuf, buf2);
|
||||
} else if (!strcmp(cp->type, "ATI")){
|
||||
COPROC_ATI* cp2 =(COPROC_ATI*) cp;
|
||||
sprintf(buf2,"[CAL|%s|%d|%dMB|%s]",
|
||||
cp2->name, cp2->count, cp2->attribs.localRAM, cp2->version
|
||||
);
|
||||
strcat(bigbuf,buf2);
|
||||
}
|
||||
}
|
||||
bigbuf[len-1] = 0;
|
||||
strcpy(buf, bigbuf);
|
||||
}
|
||||
|
||||
int COPROCS::parse(MIOFILE& fin) {
|
||||
char buf[1024];
|
||||
|
||||
while (fin.fgets(buf, sizeof(buf))) {
|
||||
if (match_tag(buf, "</coprocs>")) {
|
||||
return 0;
|
||||
}
|
||||
if (strstr(buf, "<coproc_cuda>")) {
|
||||
COPROC_CUDA* cc = new COPROC_CUDA;
|
||||
int retval = cc->parse(fin);
|
||||
if (!retval) {
|
||||
coprocs.push_back(cc);
|
||||
}
|
||||
}
|
||||
if (strstr(buf, "<coproc_ati>")) {
|
||||
COPROC_ATI* cc = new COPROC_ATI;
|
||||
int retval = cc->parse(fin);
|
||||
if (!retval) {
|
||||
coprocs.push_back(cc);
|
||||
}
|
||||
}
|
||||
}
|
||||
return ERR_XML_PARSE;
|
||||
}
|
||||
|
||||
void COPROCS::write_xml(MIOFILE& mf) {
|
||||
#ifndef _USING_FCGI_
|
||||
mf.printf(" <coprocs>\n");
|
||||
for (unsigned i=0; i<coprocs.size(); i++) {
|
||||
COPROC* c = coprocs[i];
|
||||
c->write_xml(mf);
|
||||
}
|
||||
mf.printf(" </coprocs>\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
COPROC* COPROCS::lookup(const char* type) {
|
||||
for (unsigned int i=0; i<coprocs.size(); i++) {
|
||||
COPROC* cp = coprocs[i];
|
||||
if (!strcmp(type, cp->type)) return cp;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#endif
|
||||
|
||||
void COPROC_CUDA::description(char* buf) {
|
||||
char vers[256];
|
||||
if (display_driver_version) {
|
||||
sprintf(vers, "%d", display_driver_version);
|
||||
} else {
|
||||
strcpy(vers, "unknown");
|
||||
}
|
||||
sprintf(buf, "%s (driver version %s, CUDA version %d, compute capability %d.%d, %.0fMB, %.0f GFLOPS peak)",
|
||||
prop.name, vers, cuda_version, prop.major, prop.minor,
|
||||
prop.totalGlobalMem/(1024.*1024.), peak_flops()/1e9
|
||||
);
|
||||
}
|
||||
|
||||
#ifndef _USING_FCGI_
|
||||
void COPROC_CUDA::write_xml(MIOFILE& f) {
|
||||
f.printf(
|
||||
"<coproc_cuda>\n"
|
||||
" <count>%d</count>\n"
|
||||
" <name>%s</name>\n"
|
||||
" <req_secs>%f</req_secs>\n"
|
||||
" <req_instances>%f</req_instances>\n"
|
||||
" <estimated_delay>%f</estimated_delay>\n"
|
||||
" <drvVersion>%d</drvVersion>\n"
|
||||
" <cudaVersion>%d</cudaVersion>\n"
|
||||
" <totalGlobalMem>%u</totalGlobalMem>\n"
|
||||
" <sharedMemPerBlock>%u</sharedMemPerBlock>\n"
|
||||
" <regsPerBlock>%d</regsPerBlock>\n"
|
||||
" <warpSize>%d</warpSize>\n"
|
||||
" <memPitch>%u</memPitch>\n"
|
||||
" <maxThreadsPerBlock>%d</maxThreadsPerBlock>\n"
|
||||
" <maxThreadsDim>%d %d %d</maxThreadsDim>\n"
|
||||
" <maxGridSize>%d %d %d</maxGridSize>\n"
|
||||
" <totalConstMem>%u</totalConstMem>\n"
|
||||
" <major>%d</major>\n"
|
||||
" <minor>%d</minor>\n"
|
||||
" <clockRate>%d</clockRate>\n"
|
||||
" <textureAlignment>%u</textureAlignment>\n"
|
||||
" <deviceOverlap>%d</deviceOverlap>\n"
|
||||
" <multiProcessorCount>%d</multiProcessorCount>\n"
|
||||
"</coproc_cuda>\n",
|
||||
count,
|
||||
prop.name,
|
||||
req_secs,
|
||||
req_instances,
|
||||
estimated_delay,
|
||||
display_driver_version,
|
||||
cuda_version,
|
||||
(unsigned int)prop.totalGlobalMem,
|
||||
(unsigned int)prop.sharedMemPerBlock,
|
||||
prop.regsPerBlock,
|
||||
prop.warpSize,
|
||||
(unsigned int)prop.memPitch,
|
||||
prop.maxThreadsPerBlock,
|
||||
prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2],
|
||||
prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2],
|
||||
(unsigned int)prop.totalConstMem,
|
||||
prop.major,
|
||||
prop.minor,
|
||||
prop.clockRate,
|
||||
(unsigned int)prop.textureAlignment,
|
||||
prop.deviceOverlap,
|
||||
prop.multiProcessorCount
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
void COPROC_CUDA::clear() {
|
||||
count = 0;
|
||||
used = 0;
|
||||
req_secs = 0;
|
||||
req_instances = 0;
|
||||
estimated_delay = -1; // mark as absent
|
||||
cuda_version = 0;
|
||||
display_driver_version = 0;
|
||||
strcpy(prop.name, "");
|
||||
prop.totalGlobalMem = 0;
|
||||
prop.sharedMemPerBlock = 0;
|
||||
prop.regsPerBlock = 0;
|
||||
prop.warpSize = 0;
|
||||
prop.memPitch = 0;
|
||||
prop.maxThreadsPerBlock = 0;
|
||||
prop.maxThreadsDim[0] = 0;
|
||||
prop.maxThreadsDim[1] = 0;
|
||||
prop.maxThreadsDim[2] = 0;
|
||||
prop.maxGridSize[0] = 0;
|
||||
prop.maxGridSize[1] = 0;
|
||||
prop.maxGridSize[2] = 0;
|
||||
prop.clockRate = 0;
|
||||
prop.totalConstMem = 0;
|
||||
prop.major = 0;
|
||||
prop.minor = 0;
|
||||
prop.textureAlignment = 0;
|
||||
prop.deviceOverlap = 0;
|
||||
prop.multiProcessorCount = 0;
|
||||
}
|
||||
|
||||
int COPROC_CUDA::parse(MIOFILE& fin) {
|
||||
char buf[1024], buf2[256];
|
||||
|
||||
clear();
|
||||
while (fin.fgets(buf, sizeof(buf))) {
|
||||
if (strstr(buf, "</coproc_cuda>")) {
|
||||
return 0;
|
||||
}
|
||||
if (parse_int(buf, "<count>", count)) continue;
|
||||
if (parse_double(buf, "<req_secs>", req_secs)) continue;
|
||||
if (parse_double(buf, "<req_instances>", req_instances)) continue;
|
||||
if (parse_double(buf, "<estimated_delay>", estimated_delay)) continue;
|
||||
if (parse_str(buf, "<name>", prop.name, sizeof(prop.name))) continue;
|
||||
if (parse_int(buf, "<drvVersion>", display_driver_version)) continue;
|
||||
if (parse_int(buf, "<cudaVersion>", cuda_version)) continue;
|
||||
if (parse_double(buf, "<totalGlobalMem>", prop.dtotalGlobalMem)) continue;
|
||||
if (parse_int(buf, "<sharedMemPerBlock>", (int&)prop.sharedMemPerBlock)) continue;
|
||||
if (parse_int(buf, "<regsPerBlock>", prop.regsPerBlock)) continue;
|
||||
if (parse_int(buf, "<warpSize>", prop.warpSize)) continue;
|
||||
if (parse_int(buf, "<memPitch>", (int&)prop.memPitch)) continue;
|
||||
if (parse_int(buf, "<maxThreadsPerBlock>", prop.maxThreadsPerBlock)) continue;
|
||||
if (parse_str(buf, "<maxThreadsDim>", buf2, sizeof(buf2))) {
|
||||
// can't use sscanf here (FCGI)
|
||||
//
|
||||
prop.maxThreadsDim[0] = atoi(buf2);
|
||||
char* p = strchr(buf2, ' ');
|
||||
if (p) {
|
||||
p++;
|
||||
prop.maxThreadsDim[1] = atoi(p);
|
||||
p = strchr(p, ' ');
|
||||
if (p) {
|
||||
p++;
|
||||
prop.maxThreadsDim[2] = atoi(p);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (parse_str(buf, "<maxGridSize>", buf2, sizeof(buf2))) {
|
||||
prop.maxGridSize[0] = atoi(buf2);
|
||||
char* p = strchr(buf2, ' ');
|
||||
if (p) {
|
||||
p++;
|
||||
prop.maxGridSize[1] = atoi(p);
|
||||
p = strchr(p, ' ');
|
||||
if (p) {
|
||||
p++;
|
||||
prop.maxGridSize[2] = atoi(p);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<clockRate>", prop.clockRate)) continue;
|
||||
if (parse_int(buf, "<totalConstMem>", (int&)prop.totalConstMem)) continue;
|
||||
if (parse_int(buf, "<major>", prop.major)) continue;
|
||||
if (parse_int(buf, "<minor>", prop.minor)) continue;
|
||||
if (parse_int(buf, "<textureAlignment>", (int&)prop.textureAlignment)) continue;
|
||||
if (parse_int(buf, "<deviceOverlap>", prop.deviceOverlap)) continue;
|
||||
if (parse_int(buf, "<multiProcessorCount>", prop.multiProcessorCount)) continue;
|
||||
}
|
||||
return ERR_XML_PARSE;
|
||||
}
|
||||
|
||||
////////////////// ATI STARTS HERE /////////////////
|
||||
|
||||
#ifndef _USING_FCGI_
|
||||
void COPROC_ATI::write_xml(MIOFILE& f) {
|
||||
f.printf(
|
||||
"<coproc_ati>\n"
|
||||
);
|
||||
|
||||
f.printf(
|
||||
" <count>%d</count>\n"
|
||||
" <name>%s</name>\n"
|
||||
" <req_secs>%f</req_secs>\n"
|
||||
" <req_instances>%f</req_instances>\n"
|
||||
" <estimated_delay>%f</estimated_delay>\n"
|
||||
" <target>%d</target>\n"
|
||||
" <localRAM>%d</localRAM>\n"
|
||||
" <uncachedRemoteRAM>%d</uncachedRemoteRAM>\n"
|
||||
" <cachedRemoteRAM>%d</cachedRemoteRAM>\n"
|
||||
" <engineClock>%u</engineClock>\n"
|
||||
" <memoryClock>%d</memoryClock>\n"
|
||||
" <wavefrontSize>%d</wavefrontSize>\n"
|
||||
" <numberOfSIMD>%d</numberOfSIMD>\n"
|
||||
" <doublePrecision>%d</doublePrecision>\n"
|
||||
" <pitch_alignment>%d</pitch_alignment>\n"
|
||||
" <surface_alignment>%d</surface_alignment>\n"
|
||||
" <maxResource1DWidth>%d</maxResource1DWidth>\n"
|
||||
" <maxResource2DWidth>%d</maxResource2DWidth>\n"
|
||||
" <maxResource2DHeight>%d</maxResource2DHeight>\n"
|
||||
" <CALVersion>%s</CALVersion>\n",
|
||||
count,
|
||||
name,
|
||||
req_secs,
|
||||
req_instances,
|
||||
estimated_delay,
|
||||
attribs.target,
|
||||
attribs.localRAM,
|
||||
attribs.uncachedRemoteRAM,
|
||||
attribs.cachedRemoteRAM,
|
||||
attribs.engineClock,
|
||||
attribs.memoryClock,
|
||||
attribs.wavefrontSize,
|
||||
attribs.numberOfSIMD,
|
||||
attribs.doublePrecision,
|
||||
attribs.pitch_alignment,
|
||||
attribs.surface_alignment,
|
||||
info.maxResource1DWidth,
|
||||
info.maxResource2DWidth,
|
||||
info.maxResource2DHeight,
|
||||
version
|
||||
);
|
||||
|
||||
if (atirt_detected) {
|
||||
f.printf(" <atirt_detected/>\n");
|
||||
}
|
||||
|
||||
if (amdrt_detected) {
|
||||
f.printf(" <amdrt_detected/>\n");
|
||||
}
|
||||
|
||||
f.printf("</coproc_ati>\n");
|
||||
};
|
||||
#endif
|
||||
|
||||
void COPROC_ATI::clear() {
|
||||
count = 0;
|
||||
used = 0;
|
||||
req_secs = 0;
|
||||
req_instances = 0;
|
||||
estimated_delay = -1;
|
||||
strcpy(name, "");
|
||||
strcpy(version, "");
|
||||
atirt_detected = false;
|
||||
amdrt_detected = false;
|
||||
memset(&attribs, 0, sizeof(attribs));
|
||||
memset(&info, 0, sizeof(info));
|
||||
}
|
||||
|
||||
int COPROC_ATI::parse(MIOFILE& fin) {
|
||||
char buf[1024];
|
||||
int n;
|
||||
|
||||
clear();
|
||||
|
||||
while (fin.fgets(buf, sizeof(buf))) {
|
||||
if (strstr(buf, "</coproc_ati>")) return 0;
|
||||
if (parse_int(buf, "<count>", count)) continue;
|
||||
if (parse_str(buf, "<name>", name, sizeof(name))) continue;
|
||||
if (parse_double(buf, "<req_secs>", req_secs)) continue;
|
||||
if (parse_double(buf, "<req_instances>", req_instances)) continue;
|
||||
if (parse_double(buf, "<estimated_delay>", estimated_delay)) continue;
|
||||
|
||||
if (parse_int(buf, "<target>", n)) {
|
||||
attribs.target = (CALtarget)n;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<localRAM>", n)) {
|
||||
attribs.localRAM = n;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<uncachedRemoteRAM>", n)) {
|
||||
attribs.uncachedRemoteRAM = n;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<cachedRemoteRAM>", n)) {
|
||||
attribs.cachedRemoteRAM = n;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<engineClock>", n)) {
|
||||
attribs.engineClock = n;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<memoryClock>", n)) {
|
||||
attribs.memoryClock = n;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<wavefrontSize>", n)) {
|
||||
attribs.wavefrontSize = n;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<numberOfSIMD>" , n)) {
|
||||
attribs.numberOfSIMD = n;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<doublePrecision>", n)) {
|
||||
attribs.doublePrecision = n?CAL_TRUE:CAL_FALSE;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<pitch_alignment>", n)) {
|
||||
attribs.pitch_alignment = n;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<surface_alignment>", n)) {
|
||||
attribs.surface_alignment = n;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<maxResource1DWidth>", n)) {
|
||||
info.maxResource1DWidth = n;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<maxResource2DWidth>", n)) {
|
||||
info.maxResource2DWidth = n;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<maxResource2DHeight>", n)) {
|
||||
info.maxResource2DHeight = n;
|
||||
continue;
|
||||
}
|
||||
if (parse_bool(buf, "amdrt_detected", amdrt_detected)) continue;
|
||||
if (parse_bool(buf, "atirt_detected", atirt_detected)) continue;
|
||||
if (parse_str(buf, "<CALVersion>", version, sizeof(version))) continue;
|
||||
}
|
||||
return ERR_XML_PARSE;
|
||||
}
|
||||
|
||||
void COPROC_ATI::description(char* buf) {
|
||||
sprintf(buf, "%s (CAL version %s, %.0fMB, %.0f GFLOPS peak)",
|
||||
name, version, attribs.localRAM/1024.*1024., peak_flops()/1.e9
|
||||
);
|
||||
}
|
||||
// This file is part of BOINC.
|
||||
// http://boinc.berkeley.edu
|
||||
// Copyright (C) 2007 University of California
|
||||
//
|
||||
// BOINC is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU Lesser General Public License
|
||||
// as published by the Free Software Foundation,
|
||||
// either version 3 of the License, or (at your option) any later version.
|
||||
//
|
||||
// BOINC is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
// See the GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#if defined(_WIN32) && !defined(__STDWX_H__)
|
||||
#include "boinc_win.h"
|
||||
#elif defined(_WIN32) && defined(__STDWX_H__)
|
||||
#include "stdwx.h"
|
||||
#else
|
||||
#ifdef _USING_FCGI_
|
||||
#include "boinc_fcgi.h"
|
||||
#else
|
||||
#include <cstdio>
|
||||
#endif
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "win_util.h"
|
||||
#endif
|
||||
|
||||
#include "error_numbers.h"
|
||||
#include "filesys.h"
|
||||
#include "parse.h"
|
||||
#include "str_util.h"
|
||||
|
||||
#include "coproc.h"
|
||||
|
||||
#ifndef _USING_FCGI_
|
||||
using std::perror;
|
||||
#endif
|
||||
|
||||
#ifndef _USING_FCGI_
|
||||
void COPROC::write_xml(MIOFILE& f) {
|
||||
f.printf(
|
||||
"<coproc>\n"
|
||||
" <type>%s</type>\n"
|
||||
" <count>%d</count>\n"
|
||||
"</coproc>\n",
|
||||
type, count
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
int COPROC_REQ::parse(MIOFILE& fin) {
|
||||
char buf[1024];
|
||||
strcpy(type, "");
|
||||
count = 0;
|
||||
while (fin.fgets(buf, sizeof(buf))) {
|
||||
if (match_tag(buf, "</coproc>")) {
|
||||
if (!strlen(type)) return ERR_XML_PARSE;
|
||||
return 0;
|
||||
}
|
||||
if (parse_str(buf, "<type>", type, sizeof(type))) continue;
|
||||
if (parse_double(buf, "<count>", count)) continue;
|
||||
}
|
||||
return ERR_XML_PARSE;
|
||||
}
|
||||
|
||||
int COPROC::parse(MIOFILE& fin) {
|
||||
char buf[1024];
|
||||
strcpy(type, "");
|
||||
count = 0;
|
||||
used = 0;
|
||||
req_secs = 0;
|
||||
estimated_delay = 0;
|
||||
req_instances = 0;
|
||||
while (fin.fgets(buf, sizeof(buf))) {
|
||||
if (match_tag(buf, "</coproc>")) {
|
||||
if (!strlen(type)) return ERR_XML_PARSE;
|
||||
return 0;
|
||||
}
|
||||
if (parse_str(buf, "<type>", type, sizeof(type))) continue;
|
||||
if (parse_int(buf, "<count>", count)) continue;
|
||||
if (parse_double(buf, "<req_secs>", req_secs)) continue;
|
||||
if (parse_double(buf, "<req_instances>", req_instances)) continue;
|
||||
if (parse_double(buf, "<estimated_delay>", estimated_delay)) continue;
|
||||
}
|
||||
return ERR_XML_PARSE;
|
||||
}
|
||||
|
||||
void COPROCS::summary_string(char* buf, int len) {
|
||||
char bigbuf[8192], buf2[1024];
|
||||
|
||||
strcpy(bigbuf, "");
|
||||
if (cuda.count) {
|
||||
int mem = (int)(cuda.prop.dtotalGlobalMem/MEGA);
|
||||
sprintf(buf2, "[CUDA|%s|%d|%dMB|%d]",
|
||||
cuda.prop.name, cuda.count, mem, cuda.display_driver_version
|
||||
);
|
||||
strcat(bigbuf, buf2);
|
||||
}
|
||||
if (ati.count) {
|
||||
sprintf(buf2,"[CAL|%s|%d|%dMB|%s]",
|
||||
ati.name, ati.count, ati.attribs.localRAM, ati.version
|
||||
);
|
||||
strcat(bigbuf,buf2);
|
||||
}
|
||||
bigbuf[len-1] = 0;
|
||||
strcpy(buf, bigbuf);
|
||||
}
|
||||
|
||||
int COPROCS::parse(MIOFILE& fin) {
|
||||
char buf[1024];
|
||||
int retval;
|
||||
|
||||
while (fin.fgets(buf, sizeof(buf))) {
|
||||
if (match_tag(buf, "</coprocs>")) {
|
||||
return 0;
|
||||
}
|
||||
if (strstr(buf, "<coproc_cuda>")) {
|
||||
retval = cuda.parse(fin);
|
||||
if (retval) {
|
||||
cuda.clear();
|
||||
}
|
||||
}
|
||||
if (strstr(buf, "<coproc_ati>")) {
|
||||
retval = ati.parse(fin);
|
||||
if (retval) {
|
||||
ati.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
return ERR_XML_PARSE;
|
||||
}
|
||||
|
||||
void COPROCS::write_xml(MIOFILE& mf) {
|
||||
#ifndef _USING_FCGI_
|
||||
mf.printf(" <coprocs>\n");
|
||||
if (cuda.count) {
|
||||
cuda.write_xml(mf);
|
||||
}
|
||||
if (ati.count) {
|
||||
ati.write_xml(mf);
|
||||
}
|
||||
mf.printf(" </coprocs>\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#endif
|
||||
|
||||
void COPROC_CUDA::description(char* buf) {
|
||||
char vers[256];
|
||||
if (display_driver_version) {
|
||||
sprintf(vers, "%d", display_driver_version);
|
||||
} else {
|
||||
strcpy(vers, "unknown");
|
||||
}
|
||||
sprintf(buf, "%s (driver version %s, CUDA version %d, compute capability %d.%d, %.0fMB, %.0f GFLOPS peak)",
|
||||
prop.name, vers, cuda_version, prop.major, prop.minor,
|
||||
prop.totalGlobalMem/(1024.*1024.), peak_flops()/1e9
|
||||
);
|
||||
}
|
||||
|
||||
#ifndef _USING_FCGI_
|
||||
void COPROC_CUDA::write_xml(MIOFILE& f) {
|
||||
f.printf(
|
||||
"<coproc_cuda>\n"
|
||||
" <count>%d</count>\n"
|
||||
" <name>%s</name>\n"
|
||||
" <req_secs>%f</req_secs>\n"
|
||||
" <req_instances>%f</req_instances>\n"
|
||||
" <estimated_delay>%f</estimated_delay>\n"
|
||||
" <drvVersion>%d</drvVersion>\n"
|
||||
" <cudaVersion>%d</cudaVersion>\n"
|
||||
" <totalGlobalMem>%u</totalGlobalMem>\n"
|
||||
" <sharedMemPerBlock>%u</sharedMemPerBlock>\n"
|
||||
" <regsPerBlock>%d</regsPerBlock>\n"
|
||||
" <warpSize>%d</warpSize>\n"
|
||||
" <memPitch>%u</memPitch>\n"
|
||||
" <maxThreadsPerBlock>%d</maxThreadsPerBlock>\n"
|
||||
" <maxThreadsDim>%d %d %d</maxThreadsDim>\n"
|
||||
" <maxGridSize>%d %d %d</maxGridSize>\n"
|
||||
" <totalConstMem>%u</totalConstMem>\n"
|
||||
" <major>%d</major>\n"
|
||||
" <minor>%d</minor>\n"
|
||||
" <clockRate>%d</clockRate>\n"
|
||||
" <textureAlignment>%u</textureAlignment>\n"
|
||||
" <deviceOverlap>%d</deviceOverlap>\n"
|
||||
" <multiProcessorCount>%d</multiProcessorCount>\n"
|
||||
"</coproc_cuda>\n",
|
||||
count,
|
||||
prop.name,
|
||||
req_secs,
|
||||
req_instances,
|
||||
estimated_delay,
|
||||
display_driver_version,
|
||||
cuda_version,
|
||||
(unsigned int)prop.totalGlobalMem,
|
||||
(unsigned int)prop.sharedMemPerBlock,
|
||||
prop.regsPerBlock,
|
||||
prop.warpSize,
|
||||
(unsigned int)prop.memPitch,
|
||||
prop.maxThreadsPerBlock,
|
||||
prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2],
|
||||
prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2],
|
||||
(unsigned int)prop.totalConstMem,
|
||||
prop.major,
|
||||
prop.minor,
|
||||
prop.clockRate,
|
||||
(unsigned int)prop.textureAlignment,
|
||||
prop.deviceOverlap,
|
||||
prop.multiProcessorCount
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
void COPROC_CUDA::clear() {
|
||||
count = 0;
|
||||
used = 0;
|
||||
req_secs = 0;
|
||||
req_instances = 0;
|
||||
estimated_delay = -1; // mark as absent
|
||||
cuda_version = 0;
|
||||
display_driver_version = 0;
|
||||
strcpy(prop.name, "");
|
||||
prop.totalGlobalMem = 0;
|
||||
prop.sharedMemPerBlock = 0;
|
||||
prop.regsPerBlock = 0;
|
||||
prop.warpSize = 0;
|
||||
prop.memPitch = 0;
|
||||
prop.maxThreadsPerBlock = 0;
|
||||
prop.maxThreadsDim[0] = 0;
|
||||
prop.maxThreadsDim[1] = 0;
|
||||
prop.maxThreadsDim[2] = 0;
|
||||
prop.maxGridSize[0] = 0;
|
||||
prop.maxGridSize[1] = 0;
|
||||
prop.maxGridSize[2] = 0;
|
||||
prop.clockRate = 0;
|
||||
prop.totalConstMem = 0;
|
||||
prop.major = 0;
|
||||
prop.minor = 0;
|
||||
prop.textureAlignment = 0;
|
||||
prop.deviceOverlap = 0;
|
||||
prop.multiProcessorCount = 0;
|
||||
}
|
||||
|
||||
int COPROC_CUDA::parse(MIOFILE& fin) {
|
||||
char buf[1024], buf2[256];
|
||||
|
||||
clear();
|
||||
while (fin.fgets(buf, sizeof(buf))) {
|
||||
if (strstr(buf, "</coproc_cuda>")) {
|
||||
return 0;
|
||||
}
|
||||
if (parse_int(buf, "<count>", count)) continue;
|
||||
if (parse_double(buf, "<req_secs>", req_secs)) continue;
|
||||
if (parse_double(buf, "<req_instances>", req_instances)) continue;
|
||||
if (parse_double(buf, "<estimated_delay>", estimated_delay)) continue;
|
||||
if (parse_str(buf, "<name>", prop.name, sizeof(prop.name))) continue;
|
||||
if (parse_int(buf, "<drvVersion>", display_driver_version)) continue;
|
||||
if (parse_int(buf, "<cudaVersion>", cuda_version)) continue;
|
||||
if (parse_double(buf, "<totalGlobalMem>", prop.dtotalGlobalMem)) continue;
|
||||
if (parse_int(buf, "<sharedMemPerBlock>", (int&)prop.sharedMemPerBlock)) continue;
|
||||
if (parse_int(buf, "<regsPerBlock>", prop.regsPerBlock)) continue;
|
||||
if (parse_int(buf, "<warpSize>", prop.warpSize)) continue;
|
||||
if (parse_int(buf, "<memPitch>", (int&)prop.memPitch)) continue;
|
||||
if (parse_int(buf, "<maxThreadsPerBlock>", prop.maxThreadsPerBlock)) continue;
|
||||
if (parse_str(buf, "<maxThreadsDim>", buf2, sizeof(buf2))) {
|
||||
// can't use sscanf here (FCGI)
|
||||
//
|
||||
prop.maxThreadsDim[0] = atoi(buf2);
|
||||
char* p = strchr(buf2, ' ');
|
||||
if (p) {
|
||||
p++;
|
||||
prop.maxThreadsDim[1] = atoi(p);
|
||||
p = strchr(p, ' ');
|
||||
if (p) {
|
||||
p++;
|
||||
prop.maxThreadsDim[2] = atoi(p);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (parse_str(buf, "<maxGridSize>", buf2, sizeof(buf2))) {
|
||||
prop.maxGridSize[0] = atoi(buf2);
|
||||
char* p = strchr(buf2, ' ');
|
||||
if (p) {
|
||||
p++;
|
||||
prop.maxGridSize[1] = atoi(p);
|
||||
p = strchr(p, ' ');
|
||||
if (p) {
|
||||
p++;
|
||||
prop.maxGridSize[2] = atoi(p);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<clockRate>", prop.clockRate)) continue;
|
||||
if (parse_int(buf, "<totalConstMem>", (int&)prop.totalConstMem)) continue;
|
||||
if (parse_int(buf, "<major>", prop.major)) continue;
|
||||
if (parse_int(buf, "<minor>", prop.minor)) continue;
|
||||
if (parse_int(buf, "<textureAlignment>", (int&)prop.textureAlignment)) continue;
|
||||
if (parse_int(buf, "<deviceOverlap>", prop.deviceOverlap)) continue;
|
||||
if (parse_int(buf, "<multiProcessorCount>", prop.multiProcessorCount)) continue;
|
||||
}
|
||||
return ERR_XML_PARSE;
|
||||
}
|
||||
|
||||
////////////////// ATI STARTS HERE /////////////////
|
||||
|
||||
#ifndef _USING_FCGI_
|
||||
void COPROC_ATI::write_xml(MIOFILE& f) {
|
||||
f.printf(
|
||||
"<coproc_ati>\n"
|
||||
);
|
||||
|
||||
f.printf(
|
||||
" <count>%d</count>\n"
|
||||
" <name>%s</name>\n"
|
||||
" <req_secs>%f</req_secs>\n"
|
||||
" <req_instances>%f</req_instances>\n"
|
||||
" <estimated_delay>%f</estimated_delay>\n"
|
||||
" <target>%d</target>\n"
|
||||
" <localRAM>%d</localRAM>\n"
|
||||
" <uncachedRemoteRAM>%d</uncachedRemoteRAM>\n"
|
||||
" <cachedRemoteRAM>%d</cachedRemoteRAM>\n"
|
||||
" <engineClock>%u</engineClock>\n"
|
||||
" <memoryClock>%d</memoryClock>\n"
|
||||
" <wavefrontSize>%d</wavefrontSize>\n"
|
||||
" <numberOfSIMD>%d</numberOfSIMD>\n"
|
||||
" <doublePrecision>%d</doublePrecision>\n"
|
||||
" <pitch_alignment>%d</pitch_alignment>\n"
|
||||
" <surface_alignment>%d</surface_alignment>\n"
|
||||
" <maxResource1DWidth>%d</maxResource1DWidth>\n"
|
||||
" <maxResource2DWidth>%d</maxResource2DWidth>\n"
|
||||
" <maxResource2DHeight>%d</maxResource2DHeight>\n"
|
||||
" <CALVersion>%s</CALVersion>\n",
|
||||
count,
|
||||
name,
|
||||
req_secs,
|
||||
req_instances,
|
||||
estimated_delay,
|
||||
attribs.target,
|
||||
attribs.localRAM,
|
||||
attribs.uncachedRemoteRAM,
|
||||
attribs.cachedRemoteRAM,
|
||||
attribs.engineClock,
|
||||
attribs.memoryClock,
|
||||
attribs.wavefrontSize,
|
||||
attribs.numberOfSIMD,
|
||||
attribs.doublePrecision,
|
||||
attribs.pitch_alignment,
|
||||
attribs.surface_alignment,
|
||||
info.maxResource1DWidth,
|
||||
info.maxResource2DWidth,
|
||||
info.maxResource2DHeight,
|
||||
version
|
||||
);
|
||||
|
||||
if (atirt_detected) {
|
||||
f.printf(" <atirt_detected/>\n");
|
||||
}
|
||||
|
||||
if (amdrt_detected) {
|
||||
f.printf(" <amdrt_detected/>\n");
|
||||
}
|
||||
|
||||
f.printf("</coproc_ati>\n");
|
||||
};
|
||||
#endif
|
||||
|
||||
void COPROC_ATI::clear() {
|
||||
count = 0;
|
||||
used = 0;
|
||||
req_secs = 0;
|
||||
req_instances = 0;
|
||||
estimated_delay = -1;
|
||||
strcpy(name, "");
|
||||
strcpy(version, "");
|
||||
atirt_detected = false;
|
||||
amdrt_detected = false;
|
||||
memset(&attribs, 0, sizeof(attribs));
|
||||
memset(&info, 0, sizeof(info));
|
||||
}
|
||||
|
||||
int COPROC_ATI::parse(MIOFILE& fin) {
|
||||
char buf[1024];
|
||||
int n;
|
||||
|
||||
clear();
|
||||
|
||||
while (fin.fgets(buf, sizeof(buf))) {
|
||||
if (strstr(buf, "</coproc_ati>")) return 0;
|
||||
if (parse_int(buf, "<count>", count)) continue;
|
||||
if (parse_str(buf, "<name>", name, sizeof(name))) continue;
|
||||
if (parse_double(buf, "<req_secs>", req_secs)) continue;
|
||||
if (parse_double(buf, "<req_instances>", req_instances)) continue;
|
||||
if (parse_double(buf, "<estimated_delay>", estimated_delay)) continue;
|
||||
|
||||
if (parse_int(buf, "<target>", n)) {
|
||||
attribs.target = (CALtarget)n;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<localRAM>", n)) {
|
||||
attribs.localRAM = n;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<uncachedRemoteRAM>", n)) {
|
||||
attribs.uncachedRemoteRAM = n;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<cachedRemoteRAM>", n)) {
|
||||
attribs.cachedRemoteRAM = n;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<engineClock>", n)) {
|
||||
attribs.engineClock = n;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<memoryClock>", n)) {
|
||||
attribs.memoryClock = n;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<wavefrontSize>", n)) {
|
||||
attribs.wavefrontSize = n;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<numberOfSIMD>" , n)) {
|
||||
attribs.numberOfSIMD = n;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<doublePrecision>", n)) {
|
||||
attribs.doublePrecision = n?CAL_TRUE:CAL_FALSE;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<pitch_alignment>", n)) {
|
||||
attribs.pitch_alignment = n;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<surface_alignment>", n)) {
|
||||
attribs.surface_alignment = n;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<maxResource1DWidth>", n)) {
|
||||
info.maxResource1DWidth = n;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<maxResource2DWidth>", n)) {
|
||||
info.maxResource2DWidth = n;
|
||||
continue;
|
||||
}
|
||||
if (parse_int(buf, "<maxResource2DHeight>", n)) {
|
||||
info.maxResource2DHeight = n;
|
||||
continue;
|
||||
}
|
||||
if (parse_bool(buf, "amdrt_detected", amdrt_detected)) continue;
|
||||
if (parse_bool(buf, "atirt_detected", atirt_detected)) continue;
|
||||
if (parse_str(buf, "<CALVersion>", version, sizeof(version))) continue;
|
||||
}
|
||||
return ERR_XML_PARSE;
|
||||
}
|
||||
|
||||
void COPROC_ATI::description(char* buf) {
|
||||
sprintf(buf, "%s (CAL version %s, %.0fMB, %.0f GFLOPS peak)",
|
||||
name, version, attribs.localRAM/1024.*1024., peak_flops()/1.e9
|
||||
);
|
||||
}
|
||||
|
|
115
lib/coproc.h
115
lib/coproc.h
|
@ -144,6 +144,12 @@ struct COPROC {
|
|||
available_ram_unknown[i] = true;
|
||||
}
|
||||
}
|
||||
inline void clear_usage() {
|
||||
for (int i=0; i<count; i++) {
|
||||
usage[i] = 0;
|
||||
pending_usage[i] = 0;
|
||||
}
|
||||
}
|
||||
COPROC(const char* t){
|
||||
clear();
|
||||
strcpy(type, t);
|
||||
|
@ -156,59 +162,6 @@ struct COPROC {
|
|||
void print_available_ram();
|
||||
};
|
||||
|
||||
struct COPROCS {
|
||||
std::vector<COPROC*> coprocs; // not deleted in destructor
|
||||
// so any structure that includes this needs to do it manually
|
||||
|
||||
COPROCS(){}
|
||||
~COPROCS(){} // don't delete coprocs; else crash in APP_INIT_DATA logic
|
||||
void write_xml(MIOFILE& out);
|
||||
void get(
|
||||
bool use_all, std::vector<std::string> &descs,
|
||||
std::vector<std::string> &warnings,
|
||||
std::vector<int>& ignore_cuda_dev,
|
||||
std::vector<int>& ignore_ati_dev
|
||||
);
|
||||
int parse(MIOFILE&);
|
||||
void summary_string(char*, int);
|
||||
COPROC* lookup(const char*);
|
||||
bool fully_used() {
|
||||
for (unsigned int i=0; i<coprocs.size(); i++) {
|
||||
COPROC* cp = coprocs[i];
|
||||
if (cp->used < cp->count) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Copy a coproc set, possibly setting usage to zero.
|
||||
// used in round-robin simulator and CPU scheduler,
|
||||
// to avoid messing w/ master copy
|
||||
//
|
||||
void clone(COPROCS& c, bool copy_used) {
|
||||
for (unsigned int i=0; i<c.coprocs.size(); i++) {
|
||||
COPROC* cp = c.coprocs[i];
|
||||
COPROC* cp2 = new COPROC(cp->type);
|
||||
cp2->count = cp->count;
|
||||
if (copy_used) cp2->used = cp->used;
|
||||
coprocs.push_back(cp2);
|
||||
}
|
||||
}
|
||||
inline void clear_usage() {
|
||||
for (unsigned int i=0; i<coprocs.size(); i++) {
|
||||
COPROC* cp = coprocs[i];
|
||||
for (int j=0; j<cp->count; j++) {
|
||||
cp->usage[j] = 0;
|
||||
cp->pending_usage[j] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
inline void delete_coprocs() {
|
||||
for (unsigned int i=0; i<coprocs.size(); i++) {
|
||||
delete coprocs[i];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// the following copied from /usr/local/cuda/include/driver_types.h
|
||||
//
|
||||
struct cudaDeviceProp {
|
||||
|
@ -243,8 +196,8 @@ struct COPROC_CUDA : public COPROC {
|
|||
#endif
|
||||
COPROC_CUDA(): COPROC("CUDA"){}
|
||||
virtual ~COPROC_CUDA(){}
|
||||
static void get(
|
||||
COPROCS&, bool use_all,
|
||||
void get(
|
||||
bool use_all,
|
||||
std::vector<std::string>&, std::vector<std::string>&,
|
||||
std::vector<int>& ignore_devs
|
||||
);
|
||||
|
@ -268,6 +221,8 @@ struct COPROC_CUDA : public COPROC {
|
|||
void get_available_ram();
|
||||
|
||||
bool check_running_graphics_app();
|
||||
void fake(double, int);
|
||||
|
||||
};
|
||||
|
||||
enum CUdevice_attribute_enum {
|
||||
|
@ -305,7 +260,7 @@ struct COPROC_ATI : public COPROC {
|
|||
#endif
|
||||
COPROC_ATI(): COPROC("ATI"){}
|
||||
virtual ~COPROC_ATI(){}
|
||||
static void get(COPROCS&,
|
||||
void get(
|
||||
std::vector<std::string>&, std::vector<std::string>&,
|
||||
std::vector<int>& ignore_devs
|
||||
);
|
||||
|
@ -318,9 +273,53 @@ struct COPROC_ATI : public COPROC {
|
|||
return x?x:5e10;
|
||||
}
|
||||
void get_available_ram();
|
||||
void fake(double, int);
|
||||
};
|
||||
|
||||
extern COPROC_CUDA* fake_cuda(COPROCS&, double, int);
|
||||
extern COPROC_ATI* fake_ati(COPROCS&, double, int);
|
||||
struct COPROCS {
|
||||
COPROC_CUDA cuda;
|
||||
COPROC_ATI ati;
|
||||
|
||||
COPROCS(){}
|
||||
~COPROCS(){} // don't delete coprocs; else crash in APP_INIT_DATA logic
|
||||
void write_xml(MIOFILE& out);
|
||||
void get(
|
||||
bool use_all, std::vector<std::string> &descs,
|
||||
std::vector<std::string> &warnings,
|
||||
std::vector<int>& ignore_cuda_dev,
|
||||
std::vector<int>& ignore_ati_dev
|
||||
);
|
||||
int parse(MIOFILE&);
|
||||
void summary_string(char*, int);
|
||||
bool fully_used() {
|
||||
if (cuda.used < cuda.count) return false;
|
||||
if (ati.used < ati.count) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Copy a coproc set, possibly setting usage to zero.
|
||||
// used in round-robin simulator and CPU scheduler,
|
||||
// to avoid messing w/ master copy
|
||||
//
|
||||
void clone(COPROCS& c, bool copy_used) {
|
||||
c.cuda = cuda;
|
||||
c.ati = ati;
|
||||
if (!copy_used) {
|
||||
c.cuda.used = 0;
|
||||
c.ati.used = 0;
|
||||
}
|
||||
}
|
||||
inline void clear() {
|
||||
cuda.count = 0;
|
||||
ati.count = 0;
|
||||
}
|
||||
inline void clear_usage() {
|
||||
cuda.clear_usage();
|
||||
ati.clear_usage();
|
||||
}
|
||||
inline bool none() {
|
||||
return (cuda.count==0) && (ati.count==0);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -64,6 +64,8 @@ void HOST_INFO::clear_host_info() {
|
|||
|
||||
strcpy(os_name, "");
|
||||
strcpy(os_version, "");
|
||||
|
||||
coprocs.clear();
|
||||
}
|
||||
|
||||
int HOST_INFO::parse(MIOFILE& in, bool benchmarks_only) {
|
||||
|
|
|
@ -78,6 +78,12 @@ public:
|
|||
void clear_host_info();
|
||||
void make_random_string(const char* salt, char* out);
|
||||
void generate_host_cpid();
|
||||
inline bool have_cuda() {
|
||||
return (coprocs.cuda.count > 0);
|
||||
}
|
||||
inline bool have_ati() {
|
||||
return (coprocs.ati.count > 0);
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef __APPLE__
|
||||
|
|
|
@ -1056,8 +1056,8 @@ bool bad_install_type() {
|
|||
static inline bool requesting_work() {
|
||||
if (g_request->work_req_seconds > 0) return true;
|
||||
if (g_request->cpu_req_secs > 0) return true;
|
||||
if (g_request->coproc_cuda && g_request->coproc_cuda->req_secs) return true;
|
||||
if (g_request->coproc_ati && g_request->coproc_ati->req_secs) return true;
|
||||
if (g_request->coprocs.cuda.count && g_request->coprocs.cuda.req_secs) return true;
|
||||
if (g_request->coprocs.ati.count && g_request->coprocs.ati.req_secs) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -138,8 +138,8 @@ static inline bool app_plan_ati(
|
|||
SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu
|
||||
) {
|
||||
char buf[256];
|
||||
COPROC_ATI* cp = (COPROC_ATI*)sreq.coprocs.lookup("ATI");
|
||||
if (!cp) {
|
||||
COPROC_ATI* cp = &sreq.coprocs.ati;
|
||||
if (!cp->count) {
|
||||
if (config.debug_version_select) {
|
||||
log_messages.printf(MSG_NORMAL,
|
||||
"[version] Host lacks ATI GPU for plan class ati\n"
|
||||
|
@ -320,8 +320,8 @@ static inline bool app_plan_cuda(
|
|||
SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu
|
||||
) {
|
||||
char buf[256];
|
||||
COPROC_CUDA* cp = (COPROC_CUDA*)sreq.coprocs.lookup("CUDA");
|
||||
if (!cp) {
|
||||
COPROC_CUDA* cp = &sreq.coprocs.cuda;
|
||||
if (!cp->count) {
|
||||
if (config.debug_version_select) {
|
||||
log_messages.printf(MSG_NORMAL,
|
||||
"[version] Host lacks CUDA coprocessor for plan class %s\n",
|
||||
|
|
|
@ -101,18 +101,8 @@ void WORK_REQ::get_job_limits() {
|
|||
if (n < 1) n = 1;
|
||||
effective_ncpus = n;
|
||||
|
||||
n = 0;
|
||||
COPROC* cp = g_request->coprocs.lookup("CUDA");
|
||||
if (cp) {
|
||||
n = cp->count;
|
||||
if (n > MAX_GPUS) n = MAX_GPUS;
|
||||
}
|
||||
cp = g_request->coprocs.lookup("ATI");
|
||||
if (cp) {
|
||||
if (cp->count <= MAX_GPUS && cp->count > n) {
|
||||
n = cp->count;
|
||||
}
|
||||
}
|
||||
n = g_request->coprocs.cuda.count + g_request->coprocs.ati.count;
|
||||
if (n > MAX_GPUS) n = MAX_GPUS;
|
||||
effective_ngpus = n;
|
||||
|
||||
int mult = effective_ncpus + config.gpu_multiplier * effective_ngpus;
|
||||
|
@ -581,9 +571,9 @@ static inline bool hard_app(APP& app) {
|
|||
|
||||
static inline double get_estimated_delay(BEST_APP_VERSION& bav) {
|
||||
if (bav.host_usage.ncudas) {
|
||||
return g_request->coproc_cuda->estimated_delay;
|
||||
return g_request->coprocs.cuda.estimated_delay;
|
||||
} else if (bav.host_usage.natis) {
|
||||
return g_request->coproc_ati->estimated_delay;
|
||||
return g_request->coprocs.ati.estimated_delay;
|
||||
} else {
|
||||
return g_request->cpu_estimated_delay;
|
||||
}
|
||||
|
@ -591,9 +581,9 @@ static inline double get_estimated_delay(BEST_APP_VERSION& bav) {
|
|||
|
||||
static inline void update_estimated_delay(BEST_APP_VERSION& bav, double dt) {
|
||||
if (bav.host_usage.ncudas) {
|
||||
g_request->coproc_cuda->estimated_delay += dt;
|
||||
g_request->coprocs.cuda.estimated_delay += dt;
|
||||
} else if (bav.host_usage.natis) {
|
||||
g_request->coproc_ati->estimated_delay += dt;
|
||||
g_request->coprocs.ati.estimated_delay += dt;
|
||||
} else {
|
||||
g_request->cpu_estimated_delay += dt;
|
||||
}
|
||||
|
@ -1475,18 +1465,18 @@ void send_work_setup() {
|
|||
g_wreq->cpu_req_instances = g_request->cpu_req_instances;
|
||||
g_wreq->anonymous_platform = anonymous(g_request->platforms.list[0]);
|
||||
|
||||
if (g_request->coproc_cuda) {
|
||||
g_wreq->cuda_req_secs = clamp_req_sec(g_request->coproc_cuda->req_secs);
|
||||
g_wreq->cuda_req_instances = g_request->coproc_cuda->req_instances;
|
||||
if (g_request->coproc_cuda->estimated_delay < 0) {
|
||||
g_request->coproc_cuda->estimated_delay = g_request->cpu_estimated_delay;
|
||||
if (g_request->coprocs.cuda.count) {
|
||||
g_wreq->cuda_req_secs = clamp_req_sec(g_request->coprocs.cuda.req_secs);
|
||||
g_wreq->cuda_req_instances = g_request->coprocs.cuda.req_instances;
|
||||
if (g_request->coprocs.cuda.estimated_delay < 0) {
|
||||
g_request->coprocs.cuda.estimated_delay = g_request->cpu_estimated_delay;
|
||||
}
|
||||
}
|
||||
if (g_request->coproc_ati) {
|
||||
g_wreq->ati_req_secs = clamp_req_sec(g_request->coproc_ati->req_secs);
|
||||
g_wreq->ati_req_instances = g_request->coproc_ati->req_instances;
|
||||
if (g_request->coproc_ati->estimated_delay < 0) {
|
||||
g_request->coproc_ati->estimated_delay = g_request->cpu_estimated_delay;
|
||||
if (g_request->coprocs.ati.count) {
|
||||
g_wreq->ati_req_secs = clamp_req_sec(g_request->coprocs.ati.req_secs);
|
||||
g_wreq->ati_req_instances = g_request->coprocs.ati.req_instances;
|
||||
if (g_request->coprocs.ati.estimated_delay < 0) {
|
||||
g_request->coprocs.ati.estimated_delay = g_request->cpu_estimated_delay;
|
||||
}
|
||||
}
|
||||
if (g_wreq->cpu_req_secs || g_wreq->cuda_req_secs || g_wreq->ati_req_secs) {
|
||||
|
@ -1508,18 +1498,18 @@ void send_work_setup() {
|
|||
g_wreq->cpu_req_secs, g_wreq->cpu_req_instances,
|
||||
g_request->cpu_estimated_delay
|
||||
);
|
||||
if (g_request->coproc_cuda) {
|
||||
if (g_request->coprocs.cuda.count) {
|
||||
log_messages.printf(MSG_NORMAL,
|
||||
"[send] CUDA: req %.2f sec, %.2f instances; est delay %.2f\n",
|
||||
g_wreq->cuda_req_secs, g_wreq->cuda_req_instances,
|
||||
g_request->coproc_cuda->estimated_delay
|
||||
g_request->coprocs.cuda.estimated_delay
|
||||
);
|
||||
}
|
||||
if (g_request->coproc_ati) {
|
||||
if (g_request->coprocs.ati.count) {
|
||||
log_messages.printf(MSG_NORMAL,
|
||||
"[send] ATI: req %.2f sec, %.2f instances; est delay %.2f\n",
|
||||
g_wreq->ati_req_secs, g_wreq->ati_req_instances,
|
||||
g_request->coproc_ati->estimated_delay
|
||||
g_request->coprocs.ati.estimated_delay
|
||||
);
|
||||
}
|
||||
log_messages.printf(MSG_NORMAL,
|
||||
|
|
|
@ -198,8 +198,7 @@ const char* SCHEDULER_REQUEST::parse(FILE* fin) {
|
|||
have_time_stats_log = false;
|
||||
client_cap_plan_class = false;
|
||||
sandbox = -1;
|
||||
coproc_cuda = 0;
|
||||
coproc_ati = 0;
|
||||
coprocs.clear();
|
||||
|
||||
fgets(buf, sizeof(buf), fin);
|
||||
if (!match_tag(buf, "<scheduler_request>")) return "no start tag";
|
||||
|
@ -367,8 +366,6 @@ const char* SCHEDULER_REQUEST::parse(FILE* fin) {
|
|||
MIOFILE mf;
|
||||
mf.init_file(fin);
|
||||
coprocs.parse(mf);
|
||||
coproc_cuda = (COPROC_CUDA*)coprocs.lookup("CUDA");
|
||||
coproc_ati = (COPROC_ATI*)coprocs.lookup("ATI");
|
||||
continue;
|
||||
}
|
||||
if (parse_bool(buf, "client_cap_plan_class", client_cap_plan_class)) continue;
|
||||
|
|
|
@ -402,8 +402,6 @@ struct SCHEDULER_REQUEST {
|
|||
HOST host; // request message is parsed into here.
|
||||
// does NOT contain the full host record.
|
||||
COPROCS coprocs;
|
||||
COPROC_CUDA* coproc_cuda;
|
||||
COPROC_ATI* coproc_ati;
|
||||
std::vector<RESULT> results;
|
||||
// completed results being reported
|
||||
std::vector<MSG_FROM_HOST_DESC> msgs_from_host;
|
||||
|
|
Loading…
Reference in New Issue