mirror of https://github.com/BOINC/boinc.git
*** empty log message ***
svn path=/trunk/boinc/; revision=4813
This commit is contained in:
parent
5ca7258cf6
commit
01d9b49925
|
@ -21068,3 +21068,24 @@ David 10 Dec 2004
|
|||
and with the shared library in the non-monolithic case)
|
||||
graphics_lib.C,h (new)
|
||||
(link this with the main program in the non-monolithic case)
|
||||
|
||||
David 10 Dec 2004
|
||||
- Fixed a bug in the validator that could cause WUs to
|
||||
get stuck in a state where they have no canonical result,
|
||||
no results in progress, and no transition scheduled.
|
||||
|
||||
1) After calling check_set(), count the number "nsuccess_results"
|
||||
of results that still have outcome == SUCCESS
|
||||
(this may have changed, since check_set() can change
|
||||
the outcome to VALIDATE_ERROR).
|
||||
|
||||
2) If check_set() fails to find a canonical result,
|
||||
always schedule an immediate transition.
|
||||
|
||||
3) If nsuccess_results >= target_nresults,
|
||||
set target_nresults to nsuccess_results+1.
|
||||
This ensures that the transitioner will make a new result,
|
||||
even in pathological cases (e.g. nsuccess_results > target_nresults)
|
||||
|
||||
sched/
|
||||
transitioner.C
|
||||
|
|
|
@ -32,17 +32,18 @@ If, when an output file for a result has a nonrecoverable error
|
|||
then it must set the result's outcome (in memory, not database)
|
||||
to VALIDATE_ERROR.
|
||||
Note: the function try_fopen() (in lib/util.C) can be used
|
||||
to detect recoverable/nonrecoverable errors.
|
||||
to distinguish recoverable and nonrecoverable file-open errors.
|
||||
<li>
|
||||
If a canonical result is found, check_set() must set the
|
||||
validate_state field of each non-ERROR result to either VALID or INVALID.
|
||||
validate_state field of each non-ERROR result
|
||||
(in memory, not database) to either VALID or INVALID.
|
||||
|
||||
<li>
|
||||
If a recoverable error occurs while reading output files
|
||||
(e.g. a directory wasn't visible due to NFS mount failure)
|
||||
then check_set() should return retry=true.
|
||||
This tells the validator to arrange for this WU to be
|
||||
examined again in a few hours.
|
||||
processed again in a few hours.
|
||||
<li>
|
||||
check_set() should return nonzero if a major error occurs.
|
||||
This tells the validator to write an error message and exit.
|
||||
|
|
|
@ -272,6 +272,7 @@ void handle_wu(
|
|||
}
|
||||
} else {
|
||||
vector<RESULT> results;
|
||||
int nsuccess_results;
|
||||
|
||||
// Here if WU doesn't have a canonical result yet.
|
||||
// Try to get one
|
||||
|
@ -283,13 +284,12 @@ void handle_wu(
|
|||
);
|
||||
++log_messages;
|
||||
|
||||
// make a vector of only successful, unvalidated results
|
||||
// make a vector of only successful results
|
||||
//
|
||||
for (i=0; i<items.size(); i++) {
|
||||
RESULT& result = items[i].res;
|
||||
|
||||
if ((result.validate_state == VALIDATE_STATE_INIT) &&
|
||||
(result.server_state == RESULT_SERVER_STATE_OVER) &&
|
||||
if ((result.server_state == RESULT_SERVER_STATE_OVER) &&
|
||||
(result.outcome == RESULT_OUTCOME_SUCCESS)
|
||||
) {
|
||||
results.push_back(result);
|
||||
|
@ -319,14 +319,20 @@ void handle_wu(
|
|||
}
|
||||
if (retry) need_delayed_transition = true;
|
||||
|
||||
// update results as needed
|
||||
// scan results.
|
||||
// update as needed, and count the # of results
|
||||
// that are still outcome=SUCCESS
|
||||
// (some may have changed to VALIDATE_ERROR)
|
||||
//
|
||||
nsuccess_results = 0;
|
||||
for (i=0; i<results.size(); i++) {
|
||||
update_result = false;
|
||||
RESULT& result = results[i];
|
||||
if (result.outcome == RESULT_OUTCOME_VALIDATE_ERROR) {
|
||||
need_immediate_transition = true;
|
||||
update_result = true;
|
||||
} else {
|
||||
nsuccess_results++;
|
||||
}
|
||||
|
||||
// grant credit for valid results
|
||||
|
@ -396,18 +402,24 @@ void handle_wu(
|
|||
}
|
||||
}
|
||||
} else {
|
||||
// here if no consensus
|
||||
// here if no consensus.
|
||||
// Trigger a transition to make more results if needed
|
||||
//
|
||||
need_immediate_transition = true;
|
||||
|
||||
// check if #success results is too large
|
||||
//
|
||||
if ((int)results.size() > wu.max_success_results) {
|
||||
if (nsuccess_results > wu.max_success_results) {
|
||||
wu.error_mask |= WU_ERROR_TOO_MANY_SUCCESS_RESULTS;
|
||||
need_immediate_transition = true;
|
||||
}
|
||||
// if #success results is target_nresults, bump it up
|
||||
|
||||
// if #success results == than target_nresults,
|
||||
// we need more results, so bump target_nresults
|
||||
// NOTE: nsuccess_results should never be > target_nresults,
|
||||
// but accommodate that if it should happen
|
||||
//
|
||||
if ((int)results.size() == wu.target_nresults) {
|
||||
wu.target_nresults++;
|
||||
need_immediate_transition = true;
|
||||
if (nsuccess_results >= wu.target_nresults) {
|
||||
wu.target_nresults = nsuccess_results+1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue