Client: don't start jobs that will exceed RAM

Fixes #5641
This commit is contained in:
David Anderson 2024-06-06 02:52:12 -07:00
parent 423f9f08d2
commit 5e0d0db47c
1 changed files with 18 additions and 11 deletions

View File

@ -1309,26 +1309,33 @@ bool CLIENT_STATE::enforce_run_list(vector<RESULT*>& run_list) {
// until it reaches deadline pressure.
// So we'll go with 1).
// skip jobs whose working set is too large to fit in available RAM
// skip jobs whose 'expected working set size' (EWSS)
// is too large to fit in available RAM.
// The EWSS is the max of
// 1) the recent average WSS of the job
// 2) if it hasn't run yet,
// the max WSS of other jobs of this app version
// 3) the WU's rsc_memory_bound
// This handles the case of CPDN, where jobs run with
// small WSS for a while and then get big.
//
double wss = 0;
double ewss = 0;
if (atp) {
atp->too_large = false;
wss = atp->procinfo.working_set_size_smoothed;
ewss = atp->procinfo.working_set_size_smoothed;
} else {
wss = rp->avp->max_working_set_size;
ewss = rp->avp->max_working_set_size;
}
if (wss == 0) {
wss = rp->wup->rsc_memory_bound;
}
if (wss > ram_left) {
ewss = std::max(ewss, rp->wup->rsc_memory_bound);
if (ewss > ram_left) {
if (atp) {
atp->too_large = true;
}
if (log_flags.cpu_sched_debug || log_flags.mem_usage_debug) {
msg_printf(rp->project, MSG_INFO,
"[cpu_sched_debug] can't run %s: WS too big %.2fMB > %.2fMB",
rp->name, wss/MEGA, ram_left/MEGA
"[cpu_sched_debug] skipping %s: estimated WSS (%.2fMB) exceeds RAM left (%.2fMB)",
rp->name, ewss/MEGA, ram_left/MEGA
);
}
continue;
@ -1357,7 +1364,7 @@ bool CLIENT_STATE::enforce_run_list(vector<RESULT*>& run_list) {
ncpus_used += rp->avp->avg_ncpus;
atp->next_scheduler_state = CPU_SCHED_SCHEDULED;
ram_left -= wss;
ram_left -= ewss;
if (have_max_concurrent) {
max_concurrent_inc(rp);
}