mirror of https://github.com/BOINC/boinc.git
scheduler: add <rte_no_stats> config flag to estimate job runtime without stats
The scheduler estimates job runtime based on statistics of past jobs for this (host, app version). This doesn't work well if the distribution of runtimes is very wide, as may be the case of universal apps. If this flag is set, runtime estimation is based solely on CPU/GPU peak FLOPS and job FLOPs estimate.
This commit is contained in:
parent
28cc28b4a0
commit
306605e6d8
|
@ -299,6 +299,7 @@ int SCHED_CONFIG::parse(FILE* f) {
|
||||||
if (xp.parse_double("maintenance_delay", maintenance_delay)) continue;
|
if (xp.parse_double("maintenance_delay", maintenance_delay)) continue;
|
||||||
if (xp.parse_bool("credit_by_app", credit_by_app)) continue;
|
if (xp.parse_bool("credit_by_app", credit_by_app)) continue;
|
||||||
if (xp.parse_bool("keyword_sched", keyword_sched)) continue;
|
if (xp.parse_bool("keyword_sched", keyword_sched)) continue;
|
||||||
|
if (xp.parse_bool("rte_no_stats", rte_no_stats)) continue;
|
||||||
|
|
||||||
//////////// SCHEDULER LOG FLAGS /////////
|
//////////// SCHEDULER LOG FLAGS /////////
|
||||||
|
|
||||||
|
|
|
@ -182,6 +182,8 @@ struct SCHED_CONFIG {
|
||||||
// store per-app credit info in credit_user and credit_team
|
// store per-app credit info in credit_user and credit_team
|
||||||
bool keyword_sched;
|
bool keyword_sched;
|
||||||
// score jobs based on keywords
|
// score jobs based on keywords
|
||||||
|
bool rte_no_stats;
|
||||||
|
// don't use statistics in job runtime estimation
|
||||||
|
|
||||||
// time intervals
|
// time intervals
|
||||||
double maintenance_delay;
|
double maintenance_delay;
|
||||||
|
|
|
@ -344,9 +344,32 @@ void estimate_flops_anon_platform() {
|
||||||
// This prevents jobs from aborting with "time limit exceeded"
|
// This prevents jobs from aborting with "time limit exceeded"
|
||||||
// even if the estimate supplied by the plan class function is way off
|
// even if the estimate supplied by the plan class function is way off
|
||||||
//
|
//
|
||||||
|
|
||||||
|
#define RTE_HAV_STATS 1
|
||||||
|
#define RTE_AV_STATS 2
|
||||||
|
#define RTE_NO_STATS 3
|
||||||
|
|
||||||
void estimate_flops(HOST_USAGE& hu, APP_VERSION& av) {
|
void estimate_flops(HOST_USAGE& hu, APP_VERSION& av) {
|
||||||
DB_HOST_APP_VERSION* havp = gavid_to_havp(av.id);
|
int mode;
|
||||||
if (havp && havp->et.n > MIN_HOST_SAMPLES) {
|
DB_HOST_APP_VERSION* havp = NULL;
|
||||||
|
|
||||||
|
if (config.rte_no_stats) {
|
||||||
|
mode = RTE_NO_STATS;
|
||||||
|
} else {
|
||||||
|
havp = gavid_to_havp(av.id);
|
||||||
|
if (havp && havp->et.n > MIN_HOST_SAMPLES) {
|
||||||
|
mode = RTE_HAV_STATS;
|
||||||
|
} else {
|
||||||
|
if (av.pfc.n > MIN_VERSION_SAMPLES) {
|
||||||
|
mode = RTE_AV_STATS;
|
||||||
|
} else {
|
||||||
|
mode = RTE_NO_STATS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (mode) {
|
||||||
|
case RTE_HAV_STATS:
|
||||||
double new_flops;
|
double new_flops;
|
||||||
if (config.estimate_flops_from_hav_pfc) {
|
if (config.estimate_flops_from_hav_pfc) {
|
||||||
new_flops = hu.peak_flops / (havp->pfc.get_avg()+1e-18);
|
new_flops = hu.peak_flops / (havp->pfc.get_avg()+1e-18);
|
||||||
|
@ -387,24 +410,25 @@ void estimate_flops(HOST_USAGE& hu, APP_VERSION& av) {
|
||||||
1e-9/havp->et.get_avg()
|
1e-9/havp->et.get_avg()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
} else {
|
break;
|
||||||
if (av.pfc.n > MIN_VERSION_SAMPLES) {
|
case RTE_AV_STATS:
|
||||||
hu.projected_flops = hu.peak_flops/av.pfc.get_avg();
|
hu.projected_flops = hu.peak_flops/av.pfc.get_avg();
|
||||||
if (config.debug_version_select) {
|
if (config.debug_version_select) {
|
||||||
log_messages.printf(MSG_NORMAL,
|
log_messages.printf(MSG_NORMAL,
|
||||||
"[version] [AV#%lu] (%s) adjusting projected flops based on PFC avg: %.2fG\n",
|
"[version] [AV#%lu] (%s) adjusting projected flops based on PFC avg: %.2fG\n",
|
||||||
av.id, av.plan_class, hu.projected_flops/1e9
|
av.id, av.plan_class, hu.projected_flops/1e9
|
||||||
);
|
);
|
||||||
}
|
|
||||||
} else {
|
|
||||||
hu.projected_flops = g_reply->host.p_fpops * (hu.avg_ncpus + GPU_CPU_RATIO*hu.gpu_usage);
|
|
||||||
if (config.debug_version_select) {
|
|
||||||
log_messages.printf(MSG_NORMAL,
|
|
||||||
"[version] [AV#%lu] (%s) using conservative projected flops: %.2fG\n",
|
|
||||||
av.id, av.plan_class, hu.projected_flops/1e9
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
|
case RTE_NO_STATS:
|
||||||
|
hu.projected_flops = g_reply->host.p_fpops * (hu.avg_ncpus + GPU_CPU_RATIO*hu.gpu_usage);
|
||||||
|
if (config.debug_version_select) {
|
||||||
|
log_messages.printf(MSG_NORMAL,
|
||||||
|
"[version] [AV#%lu] (%s) using conservative projected flops: %.2fG\n",
|
||||||
|
av.id, av.plan_class, hu.projected_flops/1e9
|
||||||
|
);
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue