diff --git a/checkin_notes b/checkin_notes
index c33dc35789..59d2a48dd2 100644
--- a/checkin_notes
+++ b/checkin_notes
@@ -2371,3 +2371,15 @@ David 20 Apr 2011
util.inc
user/
login_form.php
+
+David 20 Apr 2011
+ - client: if an app version has a missing GPU,
+ we still need to keep track of the usage
+ and write it to the state file.
+ - get client simulator working again
+
+ client/
+ client_types.cpp,h
+ sim.cpp
+ tools/
+ backend_lib.cpp
diff --git a/client/client_types.cpp b/client/client_types.cpp
index dba6b9794c..fb32058fd8 100644
--- a/client/client_types.cpp
+++ b/client/client_types.cpp
@@ -1334,6 +1334,7 @@ int APP_VERSION::parse(MIOFILE& in) {
gpu_usage.usage = cp.count;
} else {
missing_coproc = true;
+ missing_coproc_usage = cp.count;
strcpy(missing_coproc_name, cp.type);
}
} else {
@@ -1394,6 +1395,16 @@ int APP_VERSION::write(MIOFILE& out, bool write_file_info) {
gpu_usage.usage
);
}
+ if (missing_coproc) {
+ out.printf(
+ " \n"
+ " %s\n"
+ " %f\n"
+ " \n",
+ missing_coproc_name,
+ missing_coproc_usage
+ );
+ }
if (gpu_ram) {
out.printf(
" %f\n",
diff --git a/client/client_types.h b/client/client_types.h
index 725ebb15bb..b7ee38fbfe 100644
--- a/client/client_types.h
+++ b/client/client_types.h
@@ -542,6 +542,7 @@ struct APP_VERSION {
// so that we don't run a long sequence of jobs,
// each of which turns out not to fit in available RAM
bool missing_coproc;
+ double missing_coproc_usage;
char missing_coproc_name[256];
int index; // temp var for make_scheduler_request()
diff --git a/client/sim.cpp b/client/sim.cpp
index f82a225b22..7701409dd7 100644
--- a/client/sim.cpp
+++ b/client/sim.cpp
@@ -78,8 +78,8 @@
#define SCHED_RETRY_DELAY_MIN 60 // 1 minute
#define SCHED_RETRY_DELAY_MAX (60*60*4) // 4 hours
-const char* infile_prefix = "";
-const char* outfile_prefix = "";
+const char* infile_prefix = ".";
+const char* outfile_prefix = ".";
#define TIMELINE_FNAME "timeline.html"
#define LOG_FNAME "log.txt"
@@ -133,20 +133,20 @@ void usage(char* prog) {
//
double app_peak_flops(APP_VERSION* avp, double cpu_scale) {
double x = avp->avg_ncpus*cpu_scale;
- if (avp->ncudas) {
- x += avp->ncudas * cuda_work_fetch.relative_speed;
- }
- if (avp->natis) {
- x += avp->natis * ati_work_fetch.relative_speed;
+ int rt = avp->gpu_usage.rsc_type;
+ if (rt) {
+ x += avp->gpu_usage.usage * rsc_work_fetch[rt].relative_speed;
}
x *= gstate.host_info.p_fpops;
return x;
}
double gpu_peak_flops() {
- double cuda = gstate.host_info.coprocs.cuda.count * cuda_work_fetch.relative_speed * gstate.host_info.p_fpops;
- double ati = gstate.host_info.coprocs.ati.count * ati_work_fetch.relative_speed * gstate.host_info.p_fpops;
- return cuda + ati;
+ double x = 0;
+ for (int i=1; i& apps) {
bool app_version_needs_work(APP_VERSION* avp) {
if (avp->dont_use) return false;
- if (avp->ncudas) {
- return (cuda_work_fetch.req_secs>0 || cuda_work_fetch.req_instances>0);
+ int rt = avp->gpu_usage.rsc_type;
+ if (rt) {
+ return (rsc_work_fetch[rt].req_secs>0 || rsc_work_fetch[rt].req_instances>0);
}
- if (avp->natis) {
- return (ati_work_fetch.req_secs>0 || ati_work_fetch.req_instances>0);
- }
- return (cpu_work_fetch.req_secs>0 || cpu_work_fetch.req_instances>0);
+ return (rsc_work_fetch[0].req_secs>0 || rsc_work_fetch[0].req_instances>0);
}
bool has_app_version_needing_work(APP* app) {
@@ -326,19 +324,16 @@ void decrement_request(RESULT* rp) {
APP_VERSION* avp = rp->avp;
double est_runtime = rp->wup->rsc_fpops_est/avp->flops;
est_runtime /= (gstate.time_stats.on_frac*gstate.time_stats.active_frac);
- decrement_request_rsc(cpu_work_fetch, avp->avg_ncpus, est_runtime);
- decrement_request_rsc(cuda_work_fetch, avp->ncudas, est_runtime);
- decrement_request_rsc(ati_work_fetch, avp->natis, est_runtime);
+ decrement_request_rsc(rsc_work_fetch[0], avp->avg_ncpus, est_runtime);
+ int rt = avp->gpu_usage.rsc_type;
+ if (rt) {
+ decrement_request_rsc(rsc_work_fetch[rt], avp->gpu_usage.usage, est_runtime);
+ }
}
double get_estimated_delay(RESULT* rp) {
- if (rp->avp->ncudas) {
- return cuda_work_fetch.estimated_delay;
- } else if (rp->avp->natis) {
- return ati_work_fetch.estimated_delay;
- } else {
- return cpu_work_fetch.estimated_delay;
- }
+ int rt = rp->avp->gpu_usage.rsc_type;
+ return rsc_work_fetch[rt].estimated_delay;
}
// simulate trying to do an RPC;
@@ -352,14 +347,21 @@ bool CLIENT_STATE::simulate_rpc(PROJECT* p) {
// save request params for WORK_FETCH::handle_reply
//
- double save_cpu_req_secs = cpu_work_fetch.req_secs;
- host_info.coprocs.cuda.req_secs = cuda_work_fetch.req_secs;
- host_info.coprocs.ati.req_secs = ati_work_fetch.req_secs;
+ double save_cpu_req_secs = rsc_work_fetch[0].req_secs;
+ for (int i=1; inrpc_failures = 0;
p->sched_rpc_pending = false;
@@ -800,13 +802,9 @@ void show_resource(int rsc_type) {
if (rsc_type!=RSC_TYPE_CPU && rp->resource_type() != rsc_type) continue;
if (atp->task_state() != PROCESS_EXECUTING) continue;
PROJECT* p = rp->project;
- double ninst;
- if (rsc_type == RSC_TYPE_CUDA) {
- ninst = rp->avp->ncudas;
- } else if (rsc_type == RSC_TYPE_ATI) {
- ninst = rp->avp->natis;
- } else {
- ninst = rp->avp->avg_ncpus;
+ double ninst=0;
+ if (rsc_type == rp->avp->gpu_usage.rsc_type) {
+ ninst = rp->avp->gpu_usage.usage;
}
fprintf(html_out, "%.2f: %s%s: %.2fG
",
@@ -856,11 +854,11 @@ void html_start() {
fprintf(html_out,
"CPU Job name and estimated time left color denotes project * means EDF mode | ", WIDTH2
);
- if (gstate.host_info.have_cuda()) {
+ if (coprocs.have_nvidia()) {
fprintf(html_out, "NVIDIA GPU | ", WIDTH2);
nproc_types++;
}
- if (gstate.host_info.have_ati()) {
+ if (coprocs.have_ati()) {
fprintf(html_out, "ATI GPU | ", WIDTH2);
nproc_types++;
}
@@ -885,26 +883,17 @@ void html_rec() {
if (active) {
show_resource(RSC_TYPE_CPU);
if (gpu_active) {
- if (gstate.host_info.have_cuda()) {
- show_resource(RSC_TYPE_CUDA);
- }
- if (gstate.host_info.have_ati()) {
- show_resource(RSC_TYPE_ATI);
+ for (int i=1; iOFF", WIDTH2);
- }
- if (gstate.host_info.have_ati()) {
+ for (int i=1; iOFF", WIDTH2);
}
}
} else {
fprintf(html_out, "OFF | ", WIDTH2);
- if (gstate.host_info.have_cuda()) {
- fprintf(html_out, "OFF | ", WIDTH2);
- }
- if (gstate.host_info.have_ati()) {
+ for (int i=1; iOFF", WIDTH2);
}
}
@@ -990,19 +979,13 @@ void write_debts() {
PROJECT* p = gstate.projects[i];
fprintf(debt_file, "%f %f %f ",
p->pwf.overall_debt,
- p->cpu_pwf.long_term_debt,
- p->cpu_pwf.short_term_debt
+ p->rsc_pwf[0].long_term_debt,
+ p->rsc_pwf[0].short_term_debt
);
- if (gstate.host_info.have_cuda()) {
+ for (int j=1; jcuda_pwf.long_term_debt,
- p->cuda_pwf.short_term_debt
- );
- }
- if (gstate.host_info.have_ati()) {
- fprintf(debt_file, "%f %f",
- p->ati_pwf.long_term_debt,
- p->ati_pwf.short_term_debt
+ p->rsc_pwf[j].long_term_debt,
+ p->rsc_pwf[j].short_term_debt
);
}
}
@@ -1038,16 +1021,16 @@ void make_graph(const char* title, const char* fname, int field, int nfields) {
}
void debt_graphs() {
- int nfields = 3 + (gstate.host_info.have_cuda()?2:0) + (gstate.host_info.have_ati()?2:0);
+ int nfields = 3 + (coprocs.have_nvidia()?2:0) + (coprocs.have_ati()?2:0);
make_graph("Overall debt", "debt_overall", 0, nfields);
make_graph("CPU LTD", "debt_cpu_ltd", 1, nfields);
make_graph("CPU STD", "debt_cpu_std", 2, nfields);
- if (gstate.host_info.have_cuda()) {
+ if (coprocs.have_nvidia()) {
make_graph("NVIDIA LTD", "debt_nvidia_ltd", 3, nfields);
make_graph("NVIDIA STD", "debt_nvidia_std", 4, nfields);
}
- if (gstate.host_info.have_ati()) {
- int off = gstate.host_info.have_cuda()?2:0;
+ if (coprocs.have_ati()) {
+ int off = coprocs.have_nvidia()?2:0;
make_graph("ATI LTD", "debt_ati_ltd", 3+off, nfields);
make_graph("ATI STD", "debt_ati_std", 4+off, nfields);
}
@@ -1091,11 +1074,12 @@ void simulate() {
"hardware\n %d CPUs, %fG\n",
gstate.host_info.p_ncpus, gstate.host_info.p_fpops/1e9
);
- if (gstate.host_info.have_cuda()) {
+ for (int i=1; iapp != app) continue;
fprintf(summary_file,
- " app version %s %d (%s): ncpus %.2f ncuda %.2f nati %.2f flops %.0fG\n",
+ " app version %s %d (%s): ncpus %.2f rsc %d usage %.2f flops %.0fG\n",
avp->app_name, avp->version_num, avp->plan_class,
- avp->avg_ncpus, avp->ncudas, avp->natis,
+ avp->avg_ncpus, avp->gpu_usage.rsc_type, avp->gpu_usage.usage,
avp->flops/1e9
);
}
@@ -1263,9 +1247,9 @@ void clear_backoff() {
unsigned int i;
for (i=0; icpu_pwf.reset();
- p->cuda_pwf.reset();
- p->ati_pwf.reset();
+ for (int j=0; jrsc_pwf[j].reset();
+ }
p->min_rpc_time = 0;
}
}
@@ -1280,20 +1264,15 @@ void cull_projects() {
for (i=0; idont_request_more_work = true;
- p->no_cpu_apps = true;
- p->no_cuda_apps = true;
- p->no_ati_apps = true;
+ for (int j=0; jno_rsc_apps[j] = true;
+ }
}
for (i=0; iapp->ignore) continue;
- if (avp->ncudas) {
- avp->project->no_cuda_apps = false;
- } else if (avp->natis) {
- avp->project->no_ati_apps = false;
- } else {
- avp->project->no_cpu_apps = false;
- }
+ int rt = avp->gpu_usage.rsc_type;
+ avp->project->no_rsc_apps[rt] = false;
}
for (i=0; iproject_name,
- p->no_cpu_apps?" no CPU apps":"",
- p->no_cuda_apps?" no nvidia apps":"",
- p->no_ati_apps?" no ATI apps":""
- );
+ fprintf(summary_file, "%s: ", p->project_name);
+ for (int j=0; jno_rsc_apps[j]) {
+ fprintf(summary_file, " no %s apps", coprocs.coprocs[j].type);
+ }
+ }
}
}
diff --git a/tools/backend_lib.cpp b/tools/backend_lib.cpp
index 98bcb4f232..4b7225770b 100644
--- a/tools/backend_lib.cpp
+++ b/tools/backend_lib.cpp
@@ -583,7 +583,7 @@ int create_work(
wu, wu_template, infiles, ninfiles, config_loc, command_line, additional_xml
);
if (retval) {
- fprintf(stderr, "process_wu_template: %d\n", retval);
+ fprintf(stderr, "process_wu_template(): %d\n", retval);
return retval;
}
@@ -591,7 +591,10 @@ int create_work(
result_template_filepath, _result_template, sizeof(_result_template)
);
if (retval) {
- fprintf(stderr, "create_work: can't read result template file %s\n", result_template_filepath);
+ fprintf(stderr,
+ "create_work: can't read result template file %s\n",
+ result_template_filepath
+ );
return retval;
}