diff --git a/checkin_notes b/checkin_notes
index 7d7636b07e..329b7df3d4 100755
--- a/checkin_notes
+++ b/checkin_notes
@@ -21068,3 +21068,24 @@ David  10 Dec 2004
              and with the shared library in the non-monolithic case)
         graphics_lib.C,h (new)
             (link this with the main program in the non-monolithic case)
+
+David  10 Dec 2004
+    - Fixed a bug in the validator that could cause WUs to
+        get stuck in a state where they have no canonical result,
+        no results in progress, and no transition scheduled.
+
+        1) After calling check_set(), count the number "nsuccess_results"
+        of results that still have outcome == SUCCESS
+        (this may have changed, since check_set() can change
+        the outcome to VALIDATE_ERROR).
+
+        2) If check_set() fails to find a canonical result,
+        always schedule an immediate transition.
+
+        3) If nsuccess_results >= target_nresults,
+        set target_nresults to nsuccess_results+1.
+        This ensures that the transitioner will make a new result,
+        even in pathological cases (e.g. nsuccess_results > target_nresults)
+
+    sched/
+        transitioner.C
diff --git a/doc/validate.php b/doc/validate.php
index fe47b6a1eb..e257d900b4 100644
--- a/doc/validate.php
+++ b/doc/validate.php
@@ -32,17 +32,18 @@ If, when an output file for a result has a nonrecoverable error
 then it must set the result's outcome (in memory, not database)
 to VALIDATE_ERROR.
 Note: the function try_fopen() (in lib/util.C) can be used
-to detect recoverable/nonrecoverable errors.
+to distinguish recoverable and nonrecoverable file-open errors.
 <li>
 If a canonical result is found, check_set() must set the
-validate_state field of each non-ERROR result to either VALID or INVALID.
+validate_state field of each non-ERROR result
+(in memory, not database) to either VALID or INVALID.
 
 <li>
 If a recoverable error occurs while reading output files
 (e.g. a directory wasn't visible due to NFS mount failure)
 then check_set() should return retry=true.
 This tells the validator to arrange for this WU to be
-examined again in a few hours.
+processed again in a few hours.
 <li>
 check_set() should return nonzero if a major error occurs.
 This tells the validator to write an error message and exit.
diff --git a/sched/validator.C b/sched/validator.C
index 5f2685abd1..433364c3b7 100644
--- a/sched/validator.C
+++ b/sched/validator.C
@@ -272,6 +272,7 @@ void handle_wu(
         }
     } else {
         vector<RESULT> results;
+        int nsuccess_results;
 
         // Here if WU doesn't have a canonical result yet.
         // Try to get one
@@ -283,13 +284,12 @@ void handle_wu(
         );
         ++log_messages;
 
-        // make a vector of only successful, unvalidated results
+        // make a vector of only successful results
         //
         for (i=0; i<items.size(); i++) {
             RESULT& result = items[i].res;
 
-            if ((result.validate_state == VALIDATE_STATE_INIT) &&
-                (result.server_state == RESULT_SERVER_STATE_OVER) &&
+            if ((result.server_state == RESULT_SERVER_STATE_OVER) &&
                 (result.outcome == RESULT_OUTCOME_SUCCESS)
             ) {
                 results.push_back(result);
@@ -319,14 +319,20 @@ void handle_wu(
             }
             if (retry) need_delayed_transition = true;
 
-            // update results as needed
+            // scan results.
+            // update as needed, and count the # of results
+            // that are still outcome=SUCCESS
+            // (some may have changed to VALIDATE_ERROR)
             //
+            nsuccess_results = 0;
             for (i=0; i<results.size(); i++) {
                 update_result = false;
                 RESULT& result = results[i];
                 if (result.outcome == RESULT_OUTCOME_VALIDATE_ERROR) {
                     need_immediate_transition = true;
                     update_result = true;
+                } else {
+                    nsuccess_results++;
                 }
 
                 // grant credit for valid results
@@ -396,18 +402,24 @@ void handle_wu(
                     }
                 }
             } else {
-                // here if no consensus
+                // here if no consensus.
+                // Trigger a transition to make more results if needed
+                //
+                need_immediate_transition = true;
+
                 // check if #success results is too large
                 //
-                if ((int)results.size() > wu.max_success_results) {
+                if (nsuccess_results > wu.max_success_results) {
                     wu.error_mask |= WU_ERROR_TOO_MANY_SUCCESS_RESULTS;
-                    need_immediate_transition = true;
                 }
-                // if #success results is target_nresults, bump it up
+
+                // if #success results == than target_nresults,
+                // we need more results, so bump target_nresults
+                // NOTE: nsuccess_results should never be > target_nresults,
+                // but accommodate that if it should happen
                 //
-                if ((int)results.size() == wu.target_nresults) {
-                    wu.target_nresults++;
-                    need_immediate_transition = true;
+                if (nsuccess_results >= wu.target_nresults) {
+                    wu.target_nresults = nsuccess_results+1;
                 }
             }
         }