mirror of https://github.com/BOINC/boinc.git
- scheduler: fix a problem with job resend.
When we first send a job, we pick an app version, then call wu_is_infeasible_fast() to see if the host is able to run the job with that app version. In addition to checking disk space etc. this calls wu_is_infeasible_custom() to do project-specific checks (e.g. for SETI@home: don't use GPUs for VLAR jobs). However, when we resend a job, we pick an app version (possibly different from the original one) and send the job without any checking. So, for example, we might send a VLAR job to a GPU, or send a job to a host with insufficient disk space (because free space has changed since original send). Solution: call wu_is_infeasible_fast() before resending a job, and if it returns true, mark the job as done and don't resend it. svn path=/trunk/boinc/; revision=23098
This commit is contained in:
parent
00f7a7778b
commit
3b05dc6203
|
@ -1087,3 +1087,26 @@ David 23 Feb 2011
|
|||
cs_notice.cpp
|
||||
pers_file_xfer.cpp
|
||||
work_fetch.cpp
|
||||
|
||||
David 24 Feb 2011
|
||||
- scheduler: fix a problem with job resend.
|
||||
When we first send a job, we pick an app version,
|
||||
then call wu_is_infeasible_fast()
|
||||
to see if the host is able to run the job with that app version.
|
||||
In addition to checking disk space etc.
|
||||
this calls wu_is_infeasible_custom() to do project-specific checks
|
||||
(e.g. for SETI@home: don't use GPUs for VLAR jobs).
|
||||
|
||||
However, when we resend a job, we pick an app version
|
||||
(possibly different from the original one)
|
||||
and send the job without any checking.
|
||||
So, for example, we might send a VLAR job to a GPU,
|
||||
or send a job to a host with insufficient disk space
|
||||
(because free space has changed since original send).
|
||||
|
||||
Solution: call wu_is_infeasible_fast() before resending a job,
|
||||
and if it returns true, mark the job as done and don't resend it.
|
||||
|
||||
sched/
|
||||
sched_send.cpp
|
||||
sched_resend.cpp
|
||||
|
|
|
@ -826,6 +826,7 @@ static void promote_once_ran_edf() {
|
|||
);
|
||||
}
|
||||
rp->rr_sim_misses_deadline = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (gstate.now - atp->last_deadline_miss_time < gstate.global_prefs.cpu_scheduling_period()) {
|
||||
|
@ -833,7 +834,7 @@ static void promote_once_ran_edf() {
|
|||
RESULT* rp = atp->result;
|
||||
PROJECT* p = rp->project;
|
||||
msg_printf(p, MSG_INFO,
|
||||
"[cpu_sched] trashing prevention: mark %s as deadline miss",
|
||||
"[cpu_sched] thrashing prevention: mark %s as deadline miss",
|
||||
rp->name
|
||||
);
|
||||
}
|
||||
|
|
|
@ -20,9 +20,10 @@ require_once("../inc/boinc_db.inc");
|
|||
require_once("../inc/util.inc");
|
||||
require_once("../inc/host.inc");
|
||||
|
||||
check_get_args(array("hostid"));
|
||||
check_get_args(array("hostid", "ttok", "tnow"));
|
||||
|
||||
$user = get_logged_in_user();
|
||||
check_tokens($user->authenticator);
|
||||
|
||||
$hostid = get_int("hostid");
|
||||
$host = BoincHost::lookup_id($hostid);
|
||||
|
|
|
@ -148,9 +148,9 @@ bool resend_lost_work() {
|
|||
continue;
|
||||
}
|
||||
|
||||
APP* app = ssp->lookup_app(wu.appid);
|
||||
bavp = get_app_version(wu, false, false);
|
||||
if (!bavp) {
|
||||
APP* app = ssp->lookup_app(wu.appid);
|
||||
log_messages.printf(MSG_CRITICAL,
|
||||
"[HOST#%d] can't resend [RESULT#%d]: no app version for %s\n",
|
||||
g_reply->host.id, result.id, app->name
|
||||
|
@ -166,9 +166,13 @@ bool resend_lost_work() {
|
|||
// so that the transitioner does 'the right thing'.
|
||||
//
|
||||
if (
|
||||
wu.error_mask ||
|
||||
wu.canonical_resultid ||
|
||||
possibly_give_result_new_deadline(result, wu, *bavp)
|
||||
wu.error_mask
|
||||
|| wu.canonical_resultid
|
||||
|| wu_is_infeasible_fast(
|
||||
wu, result.server_state, result.priority, result.report_deadline,
|
||||
*app, *bavp
|
||||
)
|
||||
|| possibly_give_result_new_deadline(result, wu, *bavp)
|
||||
) {
|
||||
if (config.debug_resend) {
|
||||
log_messages.printf(MSG_NORMAL,
|
||||
|
|
|
@ -577,6 +577,7 @@ static inline void update_estimated_delay(BEST_APP_VERSION& bav, double dt) {
|
|||
// Actually, return two: optimistic (lower) and pessimistic (higher).
|
||||
// If the deadline check with the optimistic bound fails,
|
||||
// try the pessimistic bound.
|
||||
// TODO: clean up this mess
|
||||
//
|
||||
static void get_delay_bound_range(
|
||||
WORKUNIT& wu,
|
||||
|
|
Loading…
Reference in New Issue