- SCHED: Fixed problem that prevented proper driver version checking in cuda and

nvidia plan classes in plan_class_spec.xml
- SCHED: Scheduler was not using properly estimated performance when assigning
  work.  It was using theoretical performance to choose version and actual
  preformance to determine how long it would take.  I've changed that to start
  with theoretical performance and converge to actual performance as
  host_app_version pfc_n increases.
- SCHED: Added some additional app version selection debugging output.
This commit is contained in:
Eric J Korpela 2013-05-19 11:08:36 -07:00
parent 7ce699e6c3
commit 6c76ddd45c
3 changed files with 25 additions and 2 deletions

View File

@ -369,6 +369,9 @@ bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu) {
}
return false;
}
// in analogy to ATI/AMD
driver_version=cp.display_driver_version;
if (min_gpu_ram_mb) {
gpu_requirements[PROC_TYPE_NVIDIA_GPU].update(0, min_gpu_ram_mb * MEGA);
@ -603,6 +606,7 @@ bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu) {
}
hu.max_ncpus = hu.avg_ncpus;
#if 0
if (config.debug_version_select) {
log_messages.printf(MSG_NORMAL,
"[version] plan_class_spec: host_flops: %e, \tscale: %.2f, \tprojected_flops: %e, \tpeak_flops: %e\n",
@ -610,6 +614,7 @@ bool PLAN_CLASS_SPEC::check(SCHEDULER_REQUEST& sreq, HOST_USAGE& hu) {
hu.peak_flops
);
}
#endif
return true;

View File

@ -91,7 +91,7 @@ int SCHED_CONFIG::parse(FILE* f) {
strcpy(httpd_user, "apache");
max_ncpus = MAX_NCPUS;
scheduler_log_buffer = 32768;
version_select_random_factor = .1;
version_select_random_factor = 1.;
if (!xp.parse_start("boinc")) return ERR_XML_PARSE;
if (!xp.parse_start("config")) return ERR_XML_PARSE;

View File

@ -796,11 +796,22 @@ BEST_APP_VERSION* get_app_version(
double r = 1;
long n=1;
if (havp) {
// slowly move from raw calc to measured performance as number
// of results increases
n=std::max((long)havp->pfc.n,(long)n);
}
double old_projected_flops=host_usage.projected_flops;
estimate_flops(host_usage, av);
host_usage.projected_flops=(host_usage.projected_flops*(n-1)+old_projected_flops)/n;
}
if (config.version_select_random_factor) {
r += config.version_select_random_factor*rand_normal()/n;
}
if (config.debug_version_select && bavp && bavp->avp) {
log_messages.printf(MSG_NORMAL,
"[version] Comparing AV#%d (%.2f GFLOP) against AV#%d (%.2f GFLOP)\n",
av.id,host_usage.projected_flops/1e+9,bavp->avp->id,bavp->host_usage.projected_flops/1e+9
);
}
if (r*host_usage.projected_flops > bavp->host_usage.projected_flops) {
if (config.debug_version_select && (host_usage.projected_flops <= bavp->host_usage.projected_flops)) {
log_messages.printf(MSG_NORMAL,
@ -813,6 +824,13 @@ BEST_APP_VERSION* get_app_version(
bavp->avp = &av;
bavp->reliable = app_version_is_reliable(av.id);
bavp->trusted = app_version_is_trusted(av.id);
if (config.debug_version_select) {
log_messages.printf(MSG_NORMAL,
"[version] Best app version is now AV%d (%.2f GFLOP)\n",
bavp->avp->id, bavp->host_usage.projected_flops/1e+9
);
}
}
} // loop over app versions