- scheduler: when resend jobs:

- don't use devices for which work is not being requested
    - obey wu_is_infeasible_custom()
        (e.g. don't send SETI@home VLAR jobs to GPUs)
- scheduler: add <debug_array_detail> log flag for slot-level messages
- admin web: show and allow control of app.beta
This commit is contained in:
David Anderson 2012-11-07 23:43:43 -08:00 committed by Oliver Bock
parent 471e3b229e
commit 01c0a9a4b0
6 changed files with 129 additions and 57 deletions

View File

@ -6674,3 +6674,18 @@ David 7 Nov 2012
- a bunch of skin files had execute permissions (??). Clear them.
clientgui/skins/Charity Engine/graphic/*
David 7 Nov 2012
- scheduler: when resend jobs:
- don't use devices for which work is not being requested
- obey wu_is_infeasible_custom()
(e.g. don't send SETI@home VLAR jobs to GPUs)
- scheduler: add <debug_array_detail> log flag for slot-level messages
- admin web: show and allow control of app.beta
html/ops/
manage_apps.php
sched/
sched_array.cpp
sched_config.cpp,h
sched_resend.cpp

View File

@ -78,6 +78,13 @@ function do_updates() {
if ($new_v != $old_v ) {
$app->update("non_cpu_intensive=$new_v");
}
$field = "beta_".$id;
$new_v = (post_str($field, true)=='on') ? 1 : 0;
$old_v = $app->beta;
if ($new_v != $old_v ) {
$app->update("beta=$new_v");
}
}
// Adding a new application
@ -127,7 +134,8 @@ function show_form($updated) {
"homogeneous redundancy type<br><a href=http://boinc.berkeley.edu/trac/wiki/HomogeneousRedundancy><span class=note>details</span></a>",
"homogeneous app version?<br><a href=http://boinc.berkeley.edu/trac/wiki/HomogeneousAppVersion><span class=note>details</span></a>",
"deprecated?",
"Non-CPU-intensive?"
"Non-CPU-intensive?",
"Beta?"
);
$total_weight = mysql_query('SELECT SUM(weight) AS total_weight FROM app WHERE deprecated=0');
@ -189,6 +197,13 @@ function show_form($updated) {
<input name='$field' type='checkbox' $v></TD>
";
$field = "beta_".$id;
$v = '';
if ($item->beta) $v = ' CHECKED ';
echo " <TD align='center'>
<input name='$field' type='checkbox' $v></TD>
";
echo "</tr> ";
}
mysql_free_result($result);

View File

@ -57,8 +57,10 @@ static bool quick_check(
//
if (g_wreq->beta_only) {
if (!app->beta) {
if (config.debug_array) {
log_messages.printf(MSG_NORMAL, "[array] not beta\n");
if (config.debug_array_detail) {
log_messages.printf(MSG_NORMAL,
"[array_detail] job is not from beta app; skipping\n"
);
}
return false;
}
@ -70,8 +72,10 @@ static bool quick_check(
}
} else {
if (app->beta) {
if (config.debug_array) {
log_messages.printf(MSG_NORMAL, "[array] is beta\n");
if (config.debug_array_detail) {
log_messages.printf(MSG_NORMAL,
"[array_detail] job is from beta app; skipping\n"
);
}
return false;
}
@ -83,13 +87,17 @@ static bool quick_check(
//
if (!app->beta) {
if (g_wreq->reliable_only && (!wu_result.need_reliable)) {
if (config.debug_array) {
log_messages.printf(MSG_NORMAL, "[array] don't need reliable\n");
if (config.debug_array_detail) {
log_messages.printf(MSG_NORMAL,
"[array_detail] job doesn't need reliable host; skipping\n"
);
}
return false;
} else if (!g_wreq->reliable_only && wu_result.need_reliable) {
if (config.debug_array) {
log_messages.printf(MSG_NORMAL, "[array] need reliable\n");
if (config.debug_array_detail) {
log_messages.printf(MSG_NORMAL,
"[array_detail] job needs reliable host; skipping\n"
);
}
return false;
}
@ -99,8 +107,10 @@ static bool quick_check(
// and the result is not infeasible
//
if (g_wreq->infeasible_only && (wu_result.infeasible_count==0)) {
if (config.debug_array) {
log_messages.printf(MSG_NORMAL, "[array] not infeasible\n");
if (config.debug_array_detail) {
log_messages.printf(MSG_NORMAL,
"[array_detail] job is not infeasible; skipping\n"
);
}
return false;
}
@ -137,9 +147,9 @@ static bool quick_check(
//
bavp = get_app_version(wu, true, g_wreq->reliable_only);
if (!bavp) {
if (config.debug_array) {
if (config.debug_array_detail) {
log_messages.printf(MSG_NORMAL,
"[array] No app version\n"
"[array_detail] No app version for job; skipping\n"
);
}
return false;
@ -154,14 +164,12 @@ static bool quick_check(
) {
if (app_not_selected(wu)) {
g_wreq->no_allowed_apps_available = true;
#if 1
if (config.debug_array) {
if (config.debug_array_detail) {
log_messages.printf(MSG_NORMAL,
"[array] [USER#%d] [WU#%d] user doesn't want work for app %s\n",
"[array_detail] [USER#%d] [WU#%d] user doesn't want work for app %s\n",
g_reply->user.id, wu.id, app->name
);
}
#endif
return false;
}
}
@ -183,8 +191,10 @@ static bool quick_check(
);
}
last_retval = retval;
if (config.debug_array) {
log_messages.printf(MSG_NORMAL, "[array] infeasible\n");
if (config.debug_array_detail) {
log_messages.printf(MSG_NORMAL,
"[array_detail] is_infeasible_fast() failed; skipping\n"
);
}
return false;
}
@ -376,13 +386,11 @@ static bool scan_work_array() {
WU_RESULT& wu_result = ssp->wu_results[i];
#if 0
if (config.debug_array) {
if (config.debug_array_detail) {
log_messages.printf(MSG_NORMAL,
"[array] scanning slot %d\n", i
"[array_detail] scanning slot %d\n", i
);
}
#endif
if (wu_result.state != WR_STATE_PRESENT && wu_result.state != g_pid) {
continue;
@ -408,9 +416,9 @@ static bool scan_work_array() {
// This may modify wu.rsc_fpops_est
//
if (!quick_check(wu_result, wu, bavp, app, last_retval)) {
if (config.debug_array) {
if (config.debug_array_detail) {
log_messages.printf(MSG_NORMAL,
"[array] slot %d failed quick check\n", i
"[array_detail] slot %d failed quick check\n", i
);
}
continue;
@ -502,7 +510,7 @@ void send_work_old() {
} else {
if (config.debug_array) {
log_messages.printf(MSG_NORMAL,
"[array] host has no reliable app versions; skipping\n"
"[array] host has no reliable app versions; skipping scan\n"
);
}
}

View File

@ -297,6 +297,7 @@ int SCHED_CONFIG::parse(FILE* f) {
//////////// SCHEDULER LOG FLAGS /////////
if (xp.parse_bool("debug_array", debug_array)) continue;
if (xp.parse_bool("debug_array_detail", debug_array_detail)) continue;
if (xp.parse_bool("debug_assignment", debug_assignment)) continue;
if (xp.parse_bool("debug_credit", debug_credit)) continue;
if (xp.parse_bool("debug_edf_sim_detail", debug_edf_sim_detail)) continue;

View File

@ -180,6 +180,7 @@ struct SCHED_CONFIG {
// scheduler log flags
//
bool debug_array; // debug job-cache scheduling
bool debug_array_detail; // show slot-level info
bool debug_assignment;
bool debug_credit;
bool debug_edf_sim_detail; // show details of EDF sim

View File

@ -34,6 +34,7 @@
#include "sched_main.h"
#include "sched_config.h"
#include "sched_customize.h"
#include "sched_locality.h"
#include "sched_msgs.h"
#include "sched_send.h"
@ -140,50 +141,81 @@ bool resend_lost_work() {
}
DB_WORKUNIT wu;
bool cant_resend = false;
bool can_resend = true;
retval = wu.lookup_id(result.workunitid);
if (retval) {
log_messages.printf(MSG_CRITICAL,
"[HOST#%d] WU not found for [RESULT#%d]\n",
"[HOST#%d] can't resend - WU not found for [RESULT#%d]\n",
g_reply->host.id, result.id
);
cant_resend = true;
} else {
can_resend = false;
}
if (can_resend) {
app = ssp->lookup_app(wu.appid);
bavp = get_app_version(wu, false, false);
bavp = get_app_version(wu, true, false);
if (!bavp) {
log_messages.printf(MSG_CRITICAL,
"[HOST#%d] can't resend [RESULT#%d]: no app version for %s\n",
g_reply->host.id, result.id, app->name
);
cant_resend = true;
if (config.debug_resend) {
log_messages.printf(MSG_NORMAL,
"[HOST#%d] can't resend [RESULT#%d]: no app version for %s\n",
g_reply->host.id, result.id, app->name
);
}
can_resend = false;
}
}
// If error occurred,
// or time is too close to the deadline,
// or we already have a canonical result,
// or WU error flag is set,
// then don't resend this result.
// Instead make it time out right away
// so that the transitioner does 'the right thing'.
//
if (
cant_resend
|| wu.error_mask
|| wu.canonical_resultid
|| wu_is_infeasible_fast(
wu, result.server_state, result.priority, result.report_deadline,
*app, *bavp
)
|| possibly_give_result_new_deadline(result, wu, *bavp)
) {
if (can_resend && wu.error_mask) {
if (config.debug_resend) {
log_messages.printf(MSG_NORMAL,
"[resend] [HOST#%d][RESULT#%d] not needed or too close to deadline, expiring\n",
g_reply->host.id, result.id
"[resend] skipping [RESULT#%d]: WU error mask %d\n",
result.id, wu.error_mask
);
}
can_resend = false;
}
if (can_resend && wu.canonical_resultid) {
if (config.debug_resend) {
log_messages.printf(MSG_NORMAL,
"[resend] skipping [RESULT#%d]: already have canonical result\n",
result.id
);
}
can_resend = false;
}
if (can_resend && wu_is_infeasible_fast(
wu, result.server_state, result.priority, result.report_deadline,
*app, *bavp
)) {
if (config.debug_resend) {
log_messages.printf(MSG_NORMAL,
"[resend] skipping [RESULT#%d]: feasibility check failed\n",
result.id
);
}
can_resend = false;
}
if (can_resend && possibly_give_result_new_deadline(result, wu, *bavp)) {
if (config.debug_resend) {
log_messages.printf(MSG_NORMAL,
"[resend] skipping [RESULT#%d]: deadline assignment failed\n",
result.id
);
}
can_resend = false;
}
if (can_resend && wu_is_infeasible_custom(wu, *app, *bavp)) {
if (config.debug_resend) {
log_messages.printf(MSG_NORMAL,
"[resend] skipping [RESULT#%d]: custom feasibility check failed\n",
result.id
);
}
can_resend = false;
}
// If we can't resend this job for any of the above reasons,
// make it time out so that the transitioner does the right thing.
//
if (!can_resend) {
result.report_deadline = time(0)-1;
retval = result.mark_as_sent(result.server_state, config.report_grace_period);
if (retval) {