From a327a0e249bdeec4131062f6d9db9f35d5b36bdd Mon Sep 17 00:00:00 2001 From: David Anderson Date: Sun, 10 May 2020 19:27:10 -0700 Subject: [PATCH] client: avoid CPU starvation when GPU computing is suspended Suppose we have 1 CPU and have (only) a GPU job that takes 1 CPU. If GPU computing is suspended we won't fetch GPU work, because the RR simulation doesn't take "suspended" into account. Solution: in the RR simulation, if GPU computing is suspended, act like GPU jobs don't use any CPU. Seems like this should work though there may be unintended consequences. --- client/rr_sim.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/client/rr_sim.cpp b/client/rr_sim.cpp index 9c3a932478..7ffd55f8b0 100644 --- a/client/rr_sim.cpp +++ b/client/rr_sim.cpp @@ -97,10 +97,17 @@ struct RR_SIM { inline void activate(RESULT* rp) { PROJECT* p = rp->project; active.push_back(rp); - rsc_work_fetch[0].sim_nused += rp->avp->avg_ncpus; - p->rsc_pwf[0].sim_nused += rp->avp->avg_ncpus; - int rt = rp->avp->gpu_usage.rsc_type; + + // if this is a GPU app and GPU computing is suspended, + // don't count its CPU usage. + // That way we'll fetch more CPU work if needed. + // + if (!rt || !gpu_suspend_reason) { + rsc_work_fetch[0].sim_nused += rp->avp->avg_ncpus; + p->rsc_pwf[0].sim_nused += rp->avp->avg_ncpus; + } + if (rt) { rsc_work_fetch[rt].sim_nused += rp->avp->gpu_usage.usage; p->rsc_pwf[rt].sim_nused += rp->avp->gpu_usage.usage;