From 01d9b49925de5066b961194be37093c5913bc6ad Mon Sep 17 00:00:00 2001 From: David Anderson Date: Fri, 10 Dec 2004 22:04:20 +0000 Subject: [PATCH] *** empty log message *** svn path=/trunk/boinc/; revision=4813 --- checkin_notes | 21 +++++++++++++++++++++ doc/validate.php | 7 ++++--- sched/validator.C | 34 +++++++++++++++++++++++----------- 3 files changed, 48 insertions(+), 14 deletions(-) diff --git a/checkin_notes b/checkin_notes index 7d7636b07e..329b7df3d4 100755 --- a/checkin_notes +++ b/checkin_notes @@ -21068,3 +21068,24 @@ David 10 Dec 2004 and with the shared library in the non-monolithic case) graphics_lib.C,h (new) (link this with the main program in the non-monolithic case) + +David 10 Dec 2004 + - Fixed a bug in the validator that could cause WUs to + get stuck in a state where they have no canonical result, + no results in progress, and no transition scheduled. + + 1) After calling check_set(), count the number "nsuccess_results" + of results that still have outcome == SUCCESS + (this may have changed, since check_set() can change + the outcome to VALIDATE_ERROR). + + 2) If check_set() fails to find a canonical result, + always schedule an immediate transition. + + 3) If nsuccess_results >= target_nresults, + set target_nresults to nsuccess_results+1. + This ensures that the transitioner will make a new result, + even in pathological cases (e.g. nsuccess_results > target_nresults) + + sched/ + transitioner.C diff --git a/doc/validate.php b/doc/validate.php index fe47b6a1eb..e257d900b4 100644 --- a/doc/validate.php +++ b/doc/validate.php @@ -32,17 +32,18 @@ If, when an output file for a result has a nonrecoverable error then it must set the result's outcome (in memory, not database) to VALIDATE_ERROR. Note: the function try_fopen() (in lib/util.C) can be used -to detect recoverable/nonrecoverable errors. +to distinguish recoverable and nonrecoverable file-open errors.
  • If a canonical result is found, check_set() must set the -validate_state field of each non-ERROR result to either VALID or INVALID. +validate_state field of each non-ERROR result +(in memory, not database) to either VALID or INVALID.
  • If a recoverable error occurs while reading output files (e.g. a directory wasn't visible due to NFS mount failure) then check_set() should return retry=true. This tells the validator to arrange for this WU to be -examined again in a few hours. +processed again in a few hours.
  • check_set() should return nonzero if a major error occurs. This tells the validator to write an error message and exit. diff --git a/sched/validator.C b/sched/validator.C index 5f2685abd1..433364c3b7 100644 --- a/sched/validator.C +++ b/sched/validator.C @@ -272,6 +272,7 @@ void handle_wu( } } else { vector results; + int nsuccess_results; // Here if WU doesn't have a canonical result yet. // Try to get one @@ -283,13 +284,12 @@ void handle_wu( ); ++log_messages; - // make a vector of only successful, unvalidated results + // make a vector of only successful results // for (i=0; i wu.max_success_results) { + if (nsuccess_results > wu.max_success_results) { wu.error_mask |= WU_ERROR_TOO_MANY_SUCCESS_RESULTS; - need_immediate_transition = true; } - // if #success results is target_nresults, bump it up + + // if #success results == than target_nresults, + // we need more results, so bump target_nresults + // NOTE: nsuccess_results should never be > target_nresults, + // but accommodate that if it should happen // - if ((int)results.size() == wu.target_nresults) { - wu.target_nresults++; - need_immediate_transition = true; + if (nsuccess_results >= wu.target_nresults) { + wu.target_nresults = nsuccess_results+1; } } }