mirror of https://github.com/BOINC/boinc.git
client: scheduling and work fetch tweaks for GPU exclusion cases
Scheduling: if a resource has exclusions, put all jobs in the run list; otherwise we might fail to have a job for a GPU instance, and starve it. Work fetch: allow work fetch from zero-share projects if the resource has instances that are idle because of GPU exclusion
This commit is contained in:
parent
02003bc2da
commit
8d009ce3b3
|
@ -170,23 +170,19 @@ struct PROC_RESOURCES {
|
||||||
void schedule(RESULT* rp, ACTIVE_TASK* atp, const char* description) {
|
void schedule(RESULT* rp, ACTIVE_TASK* atp, const char* description) {
|
||||||
if (log_flags.cpu_sched_debug) {
|
if (log_flags.cpu_sched_debug) {
|
||||||
msg_printf(rp->project, MSG_INFO,
|
msg_printf(rp->project, MSG_INFO,
|
||||||
"[cpu_sched_debug] scheduling %s (%s) (prio %f)",
|
"[cpu_sched_debug] add to run list: %s (%s) (prio %f)",
|
||||||
rp->name, description,
|
rp->name, description,
|
||||||
rp->project->sched_priority
|
rp->project->sched_priority
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
if (rp->uses_coprocs()) {
|
if (rp->uses_coprocs()) {
|
||||||
// if this job is currently running,
|
// if the resource type has exclusions, don't reserve instances.
|
||||||
// and the resource type has exclusions,
|
// It means that the run list will include all jobs
|
||||||
// don't reserve instances;
|
// for that resource type.
|
||||||
// This allows more jobs in the run list
|
// Inefficient, but necessary to avoid starvation cases.
|
||||||
// and avoids a starvation case
|
|
||||||
//
|
//
|
||||||
int rt = rp->avp->gpu_usage.rsc_type;
|
int rt = rp->avp->gpu_usage.rsc_type;
|
||||||
bool dont_reserve =
|
bool dont_reserve = rsc_work_fetch[rt].has_exclusions;
|
||||||
rsc_work_fetch[rt].has_exclusions
|
|
||||||
&& atp != NULL
|
|
||||||
&& atp->is_gpu_task_running();
|
|
||||||
if (!dont_reserve) {
|
if (!dont_reserve) {
|
||||||
reserve_coprocs(*rp);
|
reserve_coprocs(*rp);
|
||||||
}
|
}
|
||||||
|
|
|
@ -270,7 +270,9 @@ void RSC_WORK_FETCH::print_state(const char* name) {
|
||||||
shortfall, nidle_now, saturated_time,
|
shortfall, nidle_now, saturated_time,
|
||||||
busy_time_estimator.get_busy_time()
|
busy_time_estimator.get_busy_time()
|
||||||
);
|
);
|
||||||
//msg_printf(0, MSG_INFO, "[work_fetch] sim used inst %d sim excl inst %d", sim_used_instances, sim_excluded_instances);
|
msg_printf(0, MSG_INFO, "[work_fetch] sim used inst %d sim excl inst %d",
|
||||||
|
sim_used_instances, sim_excluded_instances
|
||||||
|
);
|
||||||
for (unsigned int i=0; i<gstate.projects.size(); i++) {
|
for (unsigned int i=0; i<gstate.projects.size(); i++) {
|
||||||
char buf[256];
|
char buf[256];
|
||||||
PROJECT* p = gstate.projects[i];
|
PROJECT* p = gstate.projects[i];
|
||||||
|
@ -603,7 +605,7 @@ int RSC_WORK_FETCH::cant_fetch(PROJECT *p) {
|
||||||
// if project has zero resource share,
|
// if project has zero resource share,
|
||||||
// only fetch work if a device is idle
|
// only fetch work if a device is idle
|
||||||
//
|
//
|
||||||
if (p->resource_share == 0 && nidle_now == 0) {
|
if (p->resource_share == 0 && nidle_now == 0 && sim_excluded_instances==0) {
|
||||||
WF_DEBUG(msg_printf(p, MSG_INFO, "skip: zero share");)
|
WF_DEBUG(msg_printf(p, MSG_INFO, "skip: zero share");)
|
||||||
return DONT_FETCH_ZERO_SHARE;
|
return DONT_FETCH_ZERO_SHARE;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue