scheduler: fix bug that caused no jobs to be sent

This commit is contained in:
David Anderson 2014-03-12 15:31:12 -07:00
parent 2ea79d5d5f
commit c2fd2b33e0
4 changed files with 28 additions and 14 deletions

View File

@ -228,8 +228,18 @@ bool send_jobs(int assign_type) {
// OK, send the job
//
if (config.debug_send) {
log_messages.printf(MSG_NORMAL,
"sending targeted job: %s\n", wu.name
);
}
retval = send_assigned_job(asg);
if (retval) continue;
if (retval) {
log_messages.printf(MSG_NORMAL,
"failed to send targeted job: %s\n", boincerror(retval)
);
continue;
}
sent_something = true;
@ -251,6 +261,9 @@ bool send_jobs(int assign_type) {
//
bool send_targeted_jobs() {
bool sent_something = false;
if (config.debug_send) {
log_messages.printf(MSG_NORMAL, "checking for targeted jobs\n");
}
sent_something |= send_jobs(ASSIGN_USER);
sent_something |= send_jobs(ASSIGN_HOST);
sent_something |= send_jobs(ASSIGN_TEAM);

View File

@ -83,13 +83,13 @@ struct JOB_LIMIT {
}
}
inline bool _exceeded(int proc_type) {
inline bool exceeded(int proc_type) {
if (total.exceeded()) return true;
if (proc_type_limits[proc_type].exceeded()) return true;
return false;
}
inline void _register_job(int proc_type) {
inline void register_job(int proc_type) {
total.register_job();
proc_type_limits[proc_type].register_job();
}
@ -131,23 +131,23 @@ struct JOB_LIMITS {
return NULL;
}
inline bool _exceeded(APP* app, int proc_type) {
if (project_limits._exceeded(proc_type)) return true;
inline bool exceeded(APP* app, int proc_type) {
if (project_limits.exceeded(proc_type)) return true;
if (app) {
JOB_LIMIT* jlp = lookup_app(app->name);
if (jlp) {
if (jlp->_exceeded(proc_type)) return true;
if (jlp->exceeded(proc_type)) return true;
}
}
return false;
}
inline void _register_job(APP* app, int proc_type) {
project_limits._register_job(proc_type);
inline void register_job(APP* app, int proc_type) {
project_limits.register_job(proc_type);
if (app) {
JOB_LIMIT* jlp = lookup_app(app->name);
if (jlp) {
jlp->_register_job(proc_type);
jlp->register_job(proc_type);
}
}
}

View File

@ -144,6 +144,7 @@ void WORK_REQ_BASE::get_job_limits() {
if (n < 1) n = 1;
if (n > MAX_CPUS) n = MAX_CPUS;
ninstances[PROC_TYPE_CPU] = n;
effective_ncpus = n;
effective_ngpus = 0;
for (i=1; i<g_request->coprocs.n_rsc; i++) {
@ -788,7 +789,7 @@ bool work_needed(bool locality_sched) {
for (int i=0; i<NPROC_TYPES; i++) {
if (!have_apps(i)) continue;
if (config.max_jobs_in_progress._exceeded(NULL, i)) {
if (config.max_jobs_in_progress.exceeded(NULL, i)) {
if (config.debug_quota) {
log_messages.printf(MSG_NORMAL,
"[quota] reached limit on %s jobs in progress\n",
@ -1005,7 +1006,7 @@ int add_result_to_reply(
}
update_estimated_delay(*bavp, est_dur);
g_wreq->njobs_sent++;
config.max_jobs_in_progress._register_job(app, bavp->host_usage.proc_type);
config.max_jobs_in_progress.register_job(app, bavp->host_usage.proc_type);
if (!resent_result) {
DB_HOST_APP_VERSION* havp = bavp->host_app_version();
if (havp) {
@ -1430,7 +1431,7 @@ void send_work_setup() {
proc_type = plan_class_to_proc_type(r.plan_class);
}
}
config.max_jobs_in_progress._register_job(app, proc_type);
config.max_jobs_in_progress.register_job(app, proc_type);
}
// print details of request to log

View File

@ -583,7 +583,7 @@ BEST_APP_VERSION* get_app_version(
// if we're at the jobs-in-progress limit for this
// app and resource type, fall through and find another version
//
if (config.max_jobs_in_progress._exceeded(
if (config.max_jobs_in_progress.exceeded(
app, bavp->host_usage.proc_type
)) {
if (config.debug_version_select) {
@ -742,7 +742,7 @@ BEST_APP_VERSION* get_app_version(
// skip versions for which we're at the jobs-in-progress limit
//
if (config.max_jobs_in_progress._exceeded(app, host_usage.proc_type)) {
if (config.max_jobs_in_progress.exceeded(app, host_usage.proc_type)) {
if (config.debug_version_select) {
log_messages.printf(MSG_NORMAL,
"[version] [AV#%d] jobs in progress limit exceeded\n",