mirror of https://github.com/BOINC/boinc.git
- scheduler: when resend jobs:
- don't use devices for which work is not being requested - obey wu_is_infeasible_custom() (e.g. don't send SETI@home VLAR jobs to GPUs) - scheduler: add <debug_array_detail> log flag for slot-level messages - admin web: show and allow control of app.beta
This commit is contained in:
parent
471e3b229e
commit
01c0a9a4b0
|
@ -6674,3 +6674,18 @@ David 7 Nov 2012
|
|||
- a bunch of skin files had execute permissions (??). Clear them.
|
||||
|
||||
clientgui/skins/Charity Engine/graphic/*
|
||||
|
||||
David 7 Nov 2012
|
||||
- scheduler: when resend jobs:
|
||||
- don't use devices for which work is not being requested
|
||||
- obey wu_is_infeasible_custom()
|
||||
(e.g. don't send SETI@home VLAR jobs to GPUs)
|
||||
- scheduler: add <debug_array_detail> log flag for slot-level messages
|
||||
- admin web: show and allow control of app.beta
|
||||
|
||||
html/ops/
|
||||
manage_apps.php
|
||||
sched/
|
||||
sched_array.cpp
|
||||
sched_config.cpp,h
|
||||
sched_resend.cpp
|
||||
|
|
|
@ -78,6 +78,13 @@ function do_updates() {
|
|||
if ($new_v != $old_v ) {
|
||||
$app->update("non_cpu_intensive=$new_v");
|
||||
}
|
||||
|
||||
$field = "beta_".$id;
|
||||
$new_v = (post_str($field, true)=='on') ? 1 : 0;
|
||||
$old_v = $app->beta;
|
||||
if ($new_v != $old_v ) {
|
||||
$app->update("beta=$new_v");
|
||||
}
|
||||
}
|
||||
|
||||
// Adding a new application
|
||||
|
@ -127,7 +134,8 @@ function show_form($updated) {
|
|||
"homogeneous redundancy type<br><a href=http://boinc.berkeley.edu/trac/wiki/HomogeneousRedundancy><span class=note>details</span></a>",
|
||||
"homogeneous app version?<br><a href=http://boinc.berkeley.edu/trac/wiki/HomogeneousAppVersion><span class=note>details</span></a>",
|
||||
"deprecated?",
|
||||
"Non-CPU-intensive?"
|
||||
"Non-CPU-intensive?",
|
||||
"Beta?"
|
||||
);
|
||||
|
||||
$total_weight = mysql_query('SELECT SUM(weight) AS total_weight FROM app WHERE deprecated=0');
|
||||
|
@ -189,6 +197,13 @@ function show_form($updated) {
|
|||
<input name='$field' type='checkbox' $v></TD>
|
||||
";
|
||||
|
||||
$field = "beta_".$id;
|
||||
$v = '';
|
||||
if ($item->beta) $v = ' CHECKED ';
|
||||
echo " <TD align='center'>
|
||||
<input name='$field' type='checkbox' $v></TD>
|
||||
";
|
||||
|
||||
echo "</tr> ";
|
||||
}
|
||||
mysql_free_result($result);
|
||||
|
|
|
@ -57,8 +57,10 @@ static bool quick_check(
|
|||
//
|
||||
if (g_wreq->beta_only) {
|
||||
if (!app->beta) {
|
||||
if (config.debug_array) {
|
||||
log_messages.printf(MSG_NORMAL, "[array] not beta\n");
|
||||
if (config.debug_array_detail) {
|
||||
log_messages.printf(MSG_NORMAL,
|
||||
"[array_detail] job is not from beta app; skipping\n"
|
||||
);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -70,8 +72,10 @@ static bool quick_check(
|
|||
}
|
||||
} else {
|
||||
if (app->beta) {
|
||||
if (config.debug_array) {
|
||||
log_messages.printf(MSG_NORMAL, "[array] is beta\n");
|
||||
if (config.debug_array_detail) {
|
||||
log_messages.printf(MSG_NORMAL,
|
||||
"[array_detail] job is from beta app; skipping\n"
|
||||
);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -83,13 +87,17 @@ static bool quick_check(
|
|||
//
|
||||
if (!app->beta) {
|
||||
if (g_wreq->reliable_only && (!wu_result.need_reliable)) {
|
||||
if (config.debug_array) {
|
||||
log_messages.printf(MSG_NORMAL, "[array] don't need reliable\n");
|
||||
if (config.debug_array_detail) {
|
||||
log_messages.printf(MSG_NORMAL,
|
||||
"[array_detail] job doesn't need reliable host; skipping\n"
|
||||
);
|
||||
}
|
||||
return false;
|
||||
} else if (!g_wreq->reliable_only && wu_result.need_reliable) {
|
||||
if (config.debug_array) {
|
||||
log_messages.printf(MSG_NORMAL, "[array] need reliable\n");
|
||||
if (config.debug_array_detail) {
|
||||
log_messages.printf(MSG_NORMAL,
|
||||
"[array_detail] job needs reliable host; skipping\n"
|
||||
);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -99,8 +107,10 @@ static bool quick_check(
|
|||
// and the result is not infeasible
|
||||
//
|
||||
if (g_wreq->infeasible_only && (wu_result.infeasible_count==0)) {
|
||||
if (config.debug_array) {
|
||||
log_messages.printf(MSG_NORMAL, "[array] not infeasible\n");
|
||||
if (config.debug_array_detail) {
|
||||
log_messages.printf(MSG_NORMAL,
|
||||
"[array_detail] job is not infeasible; skipping\n"
|
||||
);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -137,9 +147,9 @@ static bool quick_check(
|
|||
//
|
||||
bavp = get_app_version(wu, true, g_wreq->reliable_only);
|
||||
if (!bavp) {
|
||||
if (config.debug_array) {
|
||||
if (config.debug_array_detail) {
|
||||
log_messages.printf(MSG_NORMAL,
|
||||
"[array] No app version\n"
|
||||
"[array_detail] No app version for job; skipping\n"
|
||||
);
|
||||
}
|
||||
return false;
|
||||
|
@ -154,14 +164,12 @@ static bool quick_check(
|
|||
) {
|
||||
if (app_not_selected(wu)) {
|
||||
g_wreq->no_allowed_apps_available = true;
|
||||
#if 1
|
||||
if (config.debug_array) {
|
||||
if (config.debug_array_detail) {
|
||||
log_messages.printf(MSG_NORMAL,
|
||||
"[array] [USER#%d] [WU#%d] user doesn't want work for app %s\n",
|
||||
"[array_detail] [USER#%d] [WU#%d] user doesn't want work for app %s\n",
|
||||
g_reply->user.id, wu.id, app->name
|
||||
);
|
||||
}
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -183,8 +191,10 @@ static bool quick_check(
|
|||
);
|
||||
}
|
||||
last_retval = retval;
|
||||
if (config.debug_array) {
|
||||
log_messages.printf(MSG_NORMAL, "[array] infeasible\n");
|
||||
if (config.debug_array_detail) {
|
||||
log_messages.printf(MSG_NORMAL,
|
||||
"[array_detail] is_infeasible_fast() failed; skipping\n"
|
||||
);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -376,13 +386,11 @@ static bool scan_work_array() {
|
|||
|
||||
WU_RESULT& wu_result = ssp->wu_results[i];
|
||||
|
||||
#if 0
|
||||
if (config.debug_array) {
|
||||
if (config.debug_array_detail) {
|
||||
log_messages.printf(MSG_NORMAL,
|
||||
"[array] scanning slot %d\n", i
|
||||
"[array_detail] scanning slot %d\n", i
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (wu_result.state != WR_STATE_PRESENT && wu_result.state != g_pid) {
|
||||
continue;
|
||||
|
@ -408,9 +416,9 @@ static bool scan_work_array() {
|
|||
// This may modify wu.rsc_fpops_est
|
||||
//
|
||||
if (!quick_check(wu_result, wu, bavp, app, last_retval)) {
|
||||
if (config.debug_array) {
|
||||
if (config.debug_array_detail) {
|
||||
log_messages.printf(MSG_NORMAL,
|
||||
"[array] slot %d failed quick check\n", i
|
||||
"[array_detail] slot %d failed quick check\n", i
|
||||
);
|
||||
}
|
||||
continue;
|
||||
|
@ -502,7 +510,7 @@ void send_work_old() {
|
|||
} else {
|
||||
if (config.debug_array) {
|
||||
log_messages.printf(MSG_NORMAL,
|
||||
"[array] host has no reliable app versions; skipping\n"
|
||||
"[array] host has no reliable app versions; skipping scan\n"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -297,6 +297,7 @@ int SCHED_CONFIG::parse(FILE* f) {
|
|||
//////////// SCHEDULER LOG FLAGS /////////
|
||||
|
||||
if (xp.parse_bool("debug_array", debug_array)) continue;
|
||||
if (xp.parse_bool("debug_array_detail", debug_array_detail)) continue;
|
||||
if (xp.parse_bool("debug_assignment", debug_assignment)) continue;
|
||||
if (xp.parse_bool("debug_credit", debug_credit)) continue;
|
||||
if (xp.parse_bool("debug_edf_sim_detail", debug_edf_sim_detail)) continue;
|
||||
|
|
|
@ -180,6 +180,7 @@ struct SCHED_CONFIG {
|
|||
// scheduler log flags
|
||||
//
|
||||
bool debug_array; // debug job-cache scheduling
|
||||
bool debug_array_detail; // show slot-level info
|
||||
bool debug_assignment;
|
||||
bool debug_credit;
|
||||
bool debug_edf_sim_detail; // show details of EDF sim
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
|
||||
#include "sched_main.h"
|
||||
#include "sched_config.h"
|
||||
#include "sched_customize.h"
|
||||
#include "sched_locality.h"
|
||||
#include "sched_msgs.h"
|
||||
#include "sched_send.h"
|
||||
|
@ -140,50 +141,81 @@ bool resend_lost_work() {
|
|||
}
|
||||
|
||||
DB_WORKUNIT wu;
|
||||
bool cant_resend = false;
|
||||
bool can_resend = true;
|
||||
retval = wu.lookup_id(result.workunitid);
|
||||
if (retval) {
|
||||
log_messages.printf(MSG_CRITICAL,
|
||||
"[HOST#%d] WU not found for [RESULT#%d]\n",
|
||||
"[HOST#%d] can't resend - WU not found for [RESULT#%d]\n",
|
||||
g_reply->host.id, result.id
|
||||
);
|
||||
cant_resend = true;
|
||||
} else {
|
||||
can_resend = false;
|
||||
}
|
||||
if (can_resend) {
|
||||
app = ssp->lookup_app(wu.appid);
|
||||
bavp = get_app_version(wu, false, false);
|
||||
bavp = get_app_version(wu, true, false);
|
||||
if (!bavp) {
|
||||
log_messages.printf(MSG_CRITICAL,
|
||||
"[HOST#%d] can't resend [RESULT#%d]: no app version for %s\n",
|
||||
g_reply->host.id, result.id, app->name
|
||||
);
|
||||
cant_resend = true;
|
||||
if (config.debug_resend) {
|
||||
log_messages.printf(MSG_NORMAL,
|
||||
"[HOST#%d] can't resend [RESULT#%d]: no app version for %s\n",
|
||||
g_reply->host.id, result.id, app->name
|
||||
);
|
||||
}
|
||||
can_resend = false;
|
||||
}
|
||||
}
|
||||
|
||||
// If error occurred,
|
||||
// or time is too close to the deadline,
|
||||
// or we already have a canonical result,
|
||||
// or WU error flag is set,
|
||||
// then don't resend this result.
|
||||
// Instead make it time out right away
|
||||
// so that the transitioner does 'the right thing'.
|
||||
//
|
||||
if (
|
||||
cant_resend
|
||||
|| wu.error_mask
|
||||
|| wu.canonical_resultid
|
||||
|| wu_is_infeasible_fast(
|
||||
wu, result.server_state, result.priority, result.report_deadline,
|
||||
*app, *bavp
|
||||
)
|
||||
|| possibly_give_result_new_deadline(result, wu, *bavp)
|
||||
) {
|
||||
if (can_resend && wu.error_mask) {
|
||||
if (config.debug_resend) {
|
||||
log_messages.printf(MSG_NORMAL,
|
||||
"[resend] [HOST#%d][RESULT#%d] not needed or too close to deadline, expiring\n",
|
||||
g_reply->host.id, result.id
|
||||
"[resend] skipping [RESULT#%d]: WU error mask %d\n",
|
||||
result.id, wu.error_mask
|
||||
);
|
||||
}
|
||||
can_resend = false;
|
||||
}
|
||||
if (can_resend && wu.canonical_resultid) {
|
||||
if (config.debug_resend) {
|
||||
log_messages.printf(MSG_NORMAL,
|
||||
"[resend] skipping [RESULT#%d]: already have canonical result\n",
|
||||
result.id
|
||||
);
|
||||
}
|
||||
can_resend = false;
|
||||
}
|
||||
if (can_resend && wu_is_infeasible_fast(
|
||||
wu, result.server_state, result.priority, result.report_deadline,
|
||||
*app, *bavp
|
||||
)) {
|
||||
if (config.debug_resend) {
|
||||
log_messages.printf(MSG_NORMAL,
|
||||
"[resend] skipping [RESULT#%d]: feasibility check failed\n",
|
||||
result.id
|
||||
);
|
||||
}
|
||||
can_resend = false;
|
||||
}
|
||||
if (can_resend && possibly_give_result_new_deadline(result, wu, *bavp)) {
|
||||
if (config.debug_resend) {
|
||||
log_messages.printf(MSG_NORMAL,
|
||||
"[resend] skipping [RESULT#%d]: deadline assignment failed\n",
|
||||
result.id
|
||||
);
|
||||
}
|
||||
can_resend = false;
|
||||
}
|
||||
if (can_resend && wu_is_infeasible_custom(wu, *app, *bavp)) {
|
||||
if (config.debug_resend) {
|
||||
log_messages.printf(MSG_NORMAL,
|
||||
"[resend] skipping [RESULT#%d]: custom feasibility check failed\n",
|
||||
result.id
|
||||
);
|
||||
}
|
||||
can_resend = false;
|
||||
}
|
||||
|
||||
// If we can't resend this job for any of the above reasons,
|
||||
// make it time out so that the transitioner does the right thing.
|
||||
//
|
||||
if (!can_resend) {
|
||||
result.report_deadline = time(0)-1;
|
||||
retval = result.mark_as_sent(result.server_state, config.report_grace_period);
|
||||
if (retval) {
|
||||
|
|
Loading…
Reference in New Issue