*** empty log message ***

svn path=/trunk/boinc/; revision=4813
This commit is contained in:
David Anderson 2004-12-10 22:04:20 +00:00
parent 5ca7258cf6
commit 01d9b49925
3 changed files with 48 additions and 14 deletions

View File

@ -21068,3 +21068,24 @@ David 10 Dec 2004
and with the shared library in the non-monolithic case)
graphics_lib.C,h (new)
(link this with the main program in the non-monolithic case)
David 10 Dec 2004
- Fixed a bug in the validator that could cause WUs to
get stuck in a state where they have no canonical result,
no results in progress, and no transition scheduled.
1) After calling check_set(), count the number "nsuccess_results"
of results that still have outcome == SUCCESS
(this may have changed, since check_set() can change
the outcome to VALIDATE_ERROR).
2) If check_set() fails to find a canonical result,
always schedule an immediate transition.
3) If nsuccess_results >= target_nresults,
set target_nresults to nsuccess_results+1.
This ensures that the transitioner will make a new result,
even in pathological cases (e.g. nsuccess_results > target_nresults)
sched/
transitioner.C

View File

@ -32,17 +32,18 @@ If, when an output file for a result has a nonrecoverable error
then it must set the result's outcome (in memory, not database)
to VALIDATE_ERROR.
Note: the function try_fopen() (in lib/util.C) can be used
to detect recoverable/nonrecoverable errors.
to distinguish recoverable and nonrecoverable file-open errors.
<li>
If a canonical result is found, check_set() must set the
validate_state field of each non-ERROR result to either VALID or INVALID.
validate_state field of each non-ERROR result
(in memory, not database) to either VALID or INVALID.
<li>
If a recoverable error occurs while reading output files
(e.g. a directory wasn't visible due to NFS mount failure)
then check_set() should return retry=true.
This tells the validator to arrange for this WU to be
examined again in a few hours.
processed again in a few hours.
<li>
check_set() should return nonzero if a major error occurs.
This tells the validator to write an error message and exit.

View File

@ -272,6 +272,7 @@ void handle_wu(
}
} else {
vector<RESULT> results;
int nsuccess_results;
// Here if WU doesn't have a canonical result yet.
// Try to get one
@ -283,13 +284,12 @@ void handle_wu(
);
++log_messages;
// make a vector of only successful, unvalidated results
// make a vector of only successful results
//
for (i=0; i<items.size(); i++) {
RESULT& result = items[i].res;
if ((result.validate_state == VALIDATE_STATE_INIT) &&
(result.server_state == RESULT_SERVER_STATE_OVER) &&
if ((result.server_state == RESULT_SERVER_STATE_OVER) &&
(result.outcome == RESULT_OUTCOME_SUCCESS)
) {
results.push_back(result);
@ -319,14 +319,20 @@ void handle_wu(
}
if (retry) need_delayed_transition = true;
// update results as needed
// scan results.
// update as needed, and count the # of results
// that are still outcome=SUCCESS
// (some may have changed to VALIDATE_ERROR)
//
nsuccess_results = 0;
for (i=0; i<results.size(); i++) {
update_result = false;
RESULT& result = results[i];
if (result.outcome == RESULT_OUTCOME_VALIDATE_ERROR) {
need_immediate_transition = true;
update_result = true;
} else {
nsuccess_results++;
}
// grant credit for valid results
@ -396,18 +402,24 @@ void handle_wu(
}
}
} else {
// here if no consensus
// here if no consensus.
// Trigger a transition to make more results if needed
//
need_immediate_transition = true;
// check if #success results is too large
//
if ((int)results.size() > wu.max_success_results) {
if (nsuccess_results > wu.max_success_results) {
wu.error_mask |= WU_ERROR_TOO_MANY_SUCCESS_RESULTS;
need_immediate_transition = true;
}
// if #success results is target_nresults, bump it up
// if #success results == than target_nresults,
// we need more results, so bump target_nresults
// NOTE: nsuccess_results should never be > target_nresults,
// but accommodate that if it should happen
//
if ((int)results.size() == wu.target_nresults) {
wu.target_nresults++;
need_immediate_transition = true;
if (nsuccess_results >= wu.target_nresults) {
wu.target_nresults = nsuccess_results+1;
}
}
}