mirror of https://github.com/BOINC/boinc.git
- client: fix problems with job scheduling policy.
Old: job scheduling has 2 phases. In the first phase (schedule_cpus()) we make a list of jobs, with deadline-miss and high-STD jobs first. Keep track of the RAM used, and skip jobs that would exceed available RAM. Stop scanning when the # of CPUs used by jobs in the list exceeds the # of actual CPUs. In the 2nd phase (enforce_schedule()), we add currently running jobs (which may be in the middle of a time slice) to the list, and reorder to give priority to such jobs, and possibly also to multi-thread jobs. We then run and/or preempt jobs, keeping track of RAM used. Problems: - suppose we add an EDF 1-CPU job to the list, then a MT job. We'll stop at that point because #CPUs is exceeded. But enforce_schedule() won't run the MT job, and CPUs will be idle. - Because the list may be reordered, skipping jobs based on RAM is not correct, and may cause deadlines to be missed. New: - when making the job list, keep track of #CPUs used by MT jobs and non-MT jobs separately. Stop the scan only if the non-MT count exceeds #CPUs. This ensures that we have enough jobs to use all the CPUs, even if the MT jobs can't be run for whatever reason. - don't skip jobs because of RAM usage - skip MT jobs if the MT CPU count is at least #CPUs Notes: - ignoring RAM usage in phase 1 can cause idleness in some cases, e.g. suppose there are 4 GB of RAM and the list has jobs that use 3 GB, but there are also some jobs that use 1 GB. I'm not sure how to fix this. - Maybe the 2-phase approach is not a good idea. We did it this way for efficiency, so that we don't have to recompute the job list each time a job checkpoints. But this is probably not a concern, and I like the idea of a simpler approach, e.g. reducing the policy to a single comparison function. svn path=/trunk/boinc/; revision=22615
This commit is contained in:
parent
4e10f8bb88
commit
2d6d69ac86
|
@ -7713,3 +7713,52 @@ David 01 Nov 2010
|
|||
|
||||
client/
|
||||
cpu_sched.cpp
|
||||
|
||||
David 01 Nov 2010
|
||||
- client: fix problems with job scheduling policy.
|
||||
Old: job scheduling has 2 phases.
|
||||
In the first phase (schedule_cpus()) we make a list of jobs,
|
||||
with deadline-miss and high-STD jobs first.
|
||||
Keep track of the RAM used,
|
||||
and skip jobs that would exceed available RAM.
|
||||
Stop scanning when the # of CPUs used by jobs in the list
|
||||
exceeds the # of actual CPUs.
|
||||
|
||||
In the 2nd phase (enforce_schedule()), we add currently running jobs
|
||||
(which may be in the middle of a time slice) to the list,
|
||||
and reorder to give priority to such jobs,
|
||||
and possibly also to multi-thread jobs.
|
||||
We then run and/or preempt jobs, keeping track of RAM used.
|
||||
|
||||
Problems:
|
||||
- suppose we add an EDF 1-CPU job to the list, then a MT job.
|
||||
We'll stop at that point because #CPUs is exceeded.
|
||||
But enforce_schedule() won't run the MT job,
|
||||
and CPUs will be idle.
|
||||
- Because the list may be reordered, skipping jobs based
|
||||
on RAM is not correct, and may cause deadlines to be missed.
|
||||
|
||||
New:
|
||||
- when making the job list, keep track of #CPUs used
|
||||
by MT jobs and non-MT jobs separately.
|
||||
Stop the scan only if the non-MT count exceeds #CPUs.
|
||||
This ensures that we have enough jobs to use all the CPUs,
|
||||
even if the MT jobs can't be run for whatever reason.
|
||||
- don't skip jobs because of RAM usage
|
||||
- skip MT jobs if the MT CPU count is at least #CPUs
|
||||
|
||||
Notes:
|
||||
- ignoring RAM usage in phase 1 can cause idleness in some cases,
|
||||
e.g. suppose there are 4 GB of RAM and the list has
|
||||
jobs that use 3 GB, but there are also some jobs that use 1 GB.
|
||||
I'm not sure how to fix this.
|
||||
- Maybe the 2-phase approach is not a good idea.
|
||||
We did it this way for efficiency,
|
||||
so that we don't have to recompute the job list
|
||||
each time a job checkpoints.
|
||||
But this is probably not a concern,
|
||||
and I like the idea of a simpler approach,
|
||||
e.g. reducing the policy to a single comparison function.
|
||||
|
||||
client/
|
||||
cpu_sched.cpp
|
||||
|
|
|
@ -71,14 +71,14 @@ using std::list;
|
|||
//
|
||||
struct PROC_RESOURCES {
|
||||
int ncpus;
|
||||
double ncpus_used;
|
||||
double ram_left;
|
||||
double ncpus_used_st; // #CPUs of GPU or single-thread jobs
|
||||
double ncpus_used_mt; // #CPUs of multi-thread jobs
|
||||
COPROCS coprocs;
|
||||
|
||||
// should we stop scanning jobs?
|
||||
//
|
||||
inline bool stop_scan_cpu() {
|
||||
return ncpus_used >= ncpus;
|
||||
return ncpus_used_st >= ncpus;
|
||||
}
|
||||
|
||||
inline bool stop_scan_coproc(int rsc_type) {
|
||||
|
@ -94,9 +94,7 @@ struct PROC_RESOURCES {
|
|||
if (rp->schedule_backoff > gstate.now) return false;
|
||||
if (rp->uses_coprocs()) {
|
||||
if (gpu_suspend_reason) return false;
|
||||
if (sufficient_coprocs(
|
||||
*rp->avp, log_flags.cpu_sched_debug)
|
||||
) {
|
||||
if (sufficient_coprocs(*rp->avp, log_flags.cpu_sched_debug)) {
|
||||
return true;
|
||||
} else {
|
||||
if (log_flags.cpu_sched_debug) {
|
||||
|
@ -106,10 +104,10 @@ struct PROC_RESOURCES {
|
|||
}
|
||||
return false;
|
||||
}
|
||||
} else if (rp->avp->avg_ncpus > 1) {
|
||||
return (ncpus_used_mt + rp->avp->avg_ncpus < ncpus);
|
||||
} else {
|
||||
// otherwise, only if CPUs are available
|
||||
//
|
||||
return (ncpus_used < ncpus);
|
||||
return (ncpus_used_st < ncpus);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -119,7 +117,13 @@ struct PROC_RESOURCES {
|
|||
reserve_coprocs(
|
||||
*rp->avp, log_flags.cpu_sched_debug, "cpu_sched_debug"
|
||||
);
|
||||
ncpus_used += rp->avp->avg_ncpus;
|
||||
if (rp->uses_coprocs()) {
|
||||
ncpus_used_st += rp->avp->avg_ncpus;
|
||||
} else if (rp->avp->avg_ncpus > 1) {
|
||||
ncpus_used_mt += rp->avp->avg_ncpus;
|
||||
} else {
|
||||
ncpus_used_st += rp->avp->avg_ncpus;
|
||||
}
|
||||
}
|
||||
|
||||
bool sufficient_coprocs(APP_VERSION& av, bool log_flag) {
|
||||
|
@ -409,7 +413,9 @@ RESULT* first_coproc_result(int rsc_type) {
|
|||
return best;
|
||||
}
|
||||
|
||||
// Return earliest-deadline result for given resource type.
|
||||
// Return earliest-deadline result for given resource type;
|
||||
// return only results projected to miss their deadline,
|
||||
// or from projects with extreme DCF
|
||||
//
|
||||
static RESULT* earliest_deadline_result(int rsc_type) {
|
||||
RESULT *best_result = NULL;
|
||||
|
@ -624,7 +630,6 @@ bool CLIENT_STATE::possibly_schedule_cpus() {
|
|||
}
|
||||
|
||||
// Check whether the job can be run:
|
||||
// - it will fit in RAM
|
||||
// - we have enough shared-mem segments (old Mac problem)
|
||||
// If so, update proc_rsc and anticipated debts, and return true
|
||||
//
|
||||
|
@ -633,20 +638,6 @@ static bool schedule_if_possible(
|
|||
const char* description
|
||||
) {
|
||||
if (atp) {
|
||||
// see if it fits in available RAM
|
||||
//
|
||||
if (atp->procinfo.working_set_size_smoothed > proc_rsc.ram_left) {
|
||||
if (log_flags.cpu_sched_debug) {
|
||||
msg_printf(rp->project, MSG_INFO,
|
||||
"[cpu_sched] %s working set too large: %.2fMB",
|
||||
rp->name, atp->procinfo.working_set_size_smoothed/MEGA
|
||||
);
|
||||
}
|
||||
atp->too_large = true;
|
||||
return false;
|
||||
}
|
||||
atp->too_large = false;
|
||||
|
||||
if (gstate.retry_shmem_time > gstate.now) {
|
||||
if (atp->app_client_shm.shm == NULL) {
|
||||
if (log_flags.cpu_sched_debug) {
|
||||
|
@ -660,17 +651,6 @@ static bool schedule_if_possible(
|
|||
}
|
||||
atp->needs_shmem = false;
|
||||
}
|
||||
proc_rsc.ram_left -= atp->procinfo.working_set_size_smoothed;
|
||||
} else {
|
||||
if (rp->avp->max_working_set_size > proc_rsc.ram_left) {
|
||||
if (log_flags.cpu_sched_debug) {
|
||||
msg_printf(rp->project, MSG_INFO,
|
||||
"[cpu_sched] %s projected working set too large: %.2fMB",
|
||||
rp->name, rp->avp->max_working_set_size/MEGA
|
||||
);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (log_flags.cpu_sched_debug) {
|
||||
|
@ -761,8 +741,8 @@ void CLIENT_STATE::schedule_cpus() {
|
|||
bool can_run;
|
||||
|
||||
proc_rsc.ncpus = ncpus;
|
||||
proc_rsc.ncpus_used = 0;
|
||||
proc_rsc.ram_left = available_ram();
|
||||
proc_rsc.ncpus_used_st = 0;
|
||||
proc_rsc.ncpus_used_mt = 0;
|
||||
proc_rsc.coprocs.clone(host_info.coprocs, false);
|
||||
|
||||
if (log_flags.cpu_sched_debug) {
|
||||
|
@ -819,9 +799,17 @@ void CLIENT_STATE::schedule_cpus() {
|
|||
|
||||
ordered_scheduled_results.clear();
|
||||
|
||||
// first, add GPU jobs
|
||||
|
||||
add_coproc_jobs(RSC_TYPE_CUDA, proc_rsc);
|
||||
add_coproc_jobs(RSC_TYPE_ATI, proc_rsc);
|
||||
|
||||
// then add CPU jobs.
|
||||
// Note: the jobs that actually get run are not necessarily
|
||||
// an initial segment of this list;
|
||||
// e.g. a multithread job may not get run because it has
|
||||
// a high-priority single-thread job ahead of it.
|
||||
|
||||
// choose CPU jobs from projects with CPU deadline misses
|
||||
//
|
||||
#ifdef SIM
|
||||
|
@ -874,6 +862,9 @@ static inline bool in_ordered_scheduled_results(ACTIVE_TASK* atp) {
|
|||
// if find a MT job J, and X < ncpus, move J before all non-MT jobs
|
||||
// But don't promote a MT job ahead of a job in EDF
|
||||
//
|
||||
// This is needed because there may always be a 1-CPU jobs
|
||||
// in the middle of its time-slice, and MT jobs could starve.
|
||||
//
|
||||
static void promote_multi_thread_jobs(vector<RESULT*>& runnable_jobs) {
|
||||
double cpus_used = 0;
|
||||
vector<RESULT*>::iterator first_non_mt = runnable_jobs.end();
|
||||
|
|
Loading…
Reference in New Issue