mirror of https://github.com/BOINC/boinc.git
Client: fix job scheduling bug that could starve CPUs
Job scheduling has 2 phases: make_run_list(): build a sorted list of runnable jobs enforce_run_list() go through the list and run jobs The run list in general contains more jobs than can actually be run. This is intentional. There are lots of reasons why enforce_run_list() might not be able to run a particular job, and we don't know these during make_run_list(). So we need to give enforce_run_list() a surplus of choices. The problem: make_run_list() was accounting RAM usage of jobs in the list, and stopping when this exceeded physical RAM. This led to a situation where we added a bunch of GPU jobs to the list - more than could actually be run - and this caused too few CPU jobs to be put in the list. Oddly, the comment at the start of cpu_sched.cpp said that RAM usage was ignored by make_run_list(); this was not the case. Anyway, I removed RAM accounting from make_run_list().
This commit is contained in:
parent
2a84d20eae
commit
17e44af601
|
@ -17,7 +17,7 @@
|
|||
|
||||
// CPU scheduling logic.
|
||||
//
|
||||
// - create an ordered "run list" (schedule_cpus).
|
||||
// - create an ordered "run list" (make_run_list()).
|
||||
// The ordering is roughly as follows:
|
||||
// - GPU jobs first, then CPU jobs
|
||||
// - for a given resource, jobs in deadline danger first
|
||||
|
@ -39,8 +39,8 @@
|
|||
// - sort the list according to "more_important()"
|
||||
// - shuffle the list to avoid starving multi-thread jobs
|
||||
//
|
||||
// - scan through the resulting list,
|
||||
// running the jobs and preempting other jobs.
|
||||
// - scan through the resulting list, running the jobs and preempting
|
||||
// other jobs (enforce_run_list).
|
||||
// Don't run a job if
|
||||
// - its GPUs can't be assigned (possible if need >1 GPU)
|
||||
// - it's a multi-thread job, and CPU usage would be #CPUs+1 or more
|
||||
|
@ -99,7 +99,6 @@ struct PROC_RESOURCES {
|
|||
double ncpus_used_st; // #CPUs of GPU or single-thread jobs
|
||||
double ncpus_used_mt; // #CPUs of multi-thread jobs
|
||||
COPROCS pr_coprocs;
|
||||
double ram_left;
|
||||
|
||||
void init() {
|
||||
ncpus = gstate.ncpus;
|
||||
|
@ -107,7 +106,6 @@ struct PROC_RESOURCES {
|
|||
ncpus_used_mt = 0;
|
||||
pr_coprocs.clone(coprocs, false);
|
||||
pr_coprocs.clear_usage();
|
||||
ram_left = gstate.available_ram();
|
||||
if (have_max_concurrent) {
|
||||
max_concurrent_init();
|
||||
}
|
||||
|
@ -116,7 +114,11 @@ struct PROC_RESOURCES {
|
|||
// should we stop scanning jobs?
|
||||
//
|
||||
inline bool stop_scan_cpu() {
|
||||
return ncpus_used_st >= ncpus;
|
||||
if (ncpus_used_st >= ncpus) return true;
|
||||
if (ncpus_used_mt >= 2*ncpus) return true;
|
||||
// kind of arbitrary, but need to have some limit
|
||||
// in case there are only MT jobs, and lots of them
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool stop_scan_coproc(int rsc_type) {
|
||||
|
@ -131,7 +133,6 @@ struct PROC_RESOURCES {
|
|||
// (i.e add it to the runnable list; not actually run it)
|
||||
//
|
||||
bool can_schedule(RESULT* rp, ACTIVE_TASK* atp) {
|
||||
double wss;
|
||||
if (max_concurrent_exceeded(rp)) return false;
|
||||
if (atp) {
|
||||
// don't schedule if something's pending
|
||||
|
@ -154,11 +155,7 @@ struct PROC_RESOURCES {
|
|||
}
|
||||
atp->needs_shmem = false;
|
||||
}
|
||||
wss = atp->procinfo.working_set_size_smoothed;
|
||||
} else {
|
||||
wss = rp->avp->max_working_set_size;
|
||||
}
|
||||
if (wss > ram_left) return false;
|
||||
if (rp->schedule_backoff > gstate.now) return false;
|
||||
if (rp->uses_coprocs()) {
|
||||
if (gpu_suspend_reason) return false;
|
||||
|
@ -211,13 +208,6 @@ struct PROC_RESOURCES {
|
|||
} else {
|
||||
ncpus_used_st += rp->avp->avg_ncpus;
|
||||
}
|
||||
double wss;
|
||||
if (atp) {
|
||||
wss = atp->procinfo.working_set_size_smoothed;
|
||||
} else {
|
||||
wss = rp->avp->max_working_set_size;
|
||||
}
|
||||
ram_left -= wss;
|
||||
|
||||
adjust_rec_sched(rp);
|
||||
max_concurrent_inc(rp);
|
||||
|
@ -1093,7 +1083,7 @@ void CLIENT_STATE::append_unfinished_time_slice(vector<RESULT*> &run_list) {
|
|||
// That's the only kind of suspended GPU job.
|
||||
// CORPOC::usage[]: for each instance, its usage
|
||||
//
|
||||
// enforce_schedule() calls assign_coprocs(),
|
||||
// enforce_run_list() calls assign_coprocs(),
|
||||
// which assigns coproc instances to scheduled jobs,
|
||||
// and prunes jobs for which we can't make an assignment
|
||||
// (the job list is in order of decreasing priority)
|
||||
|
@ -1536,7 +1526,7 @@ bool CLIENT_STATE::enforce_run_list(vector<RESULT*>& run_list) {
|
|||
#endif
|
||||
|
||||
if (log_flags.cpu_sched_debug) {
|
||||
msg_printf(0, MSG_INFO, "[cpu_sched_debug] enforce_schedule(): start");
|
||||
msg_printf(0, MSG_INFO, "[cpu_sched_debug] enforce_run_list(): start");
|
||||
msg_printf(0, MSG_INFO, "[cpu_sched_debug] preliminary job list:");
|
||||
print_job_list(run_list);
|
||||
}
|
||||
|
@ -1901,7 +1891,7 @@ bool CLIENT_STATE::enforce_run_list(vector<RESULT*>& run_list) {
|
|||
set_client_state_dirty("enforce_cpu_schedule");
|
||||
}
|
||||
if (log_flags.cpu_sched_debug) {
|
||||
msg_printf(0, MSG_INFO, "[cpu_sched_debug] enforce_schedule: end");
|
||||
msg_printf(0, MSG_INFO, "[cpu_sched_debug] enforce_run_list: end");
|
||||
}
|
||||
if (coproc_start_deferred) {
|
||||
if (log_flags.cpu_sched_debug) {
|
||||
|
|
Loading…
Reference in New Issue