validator: fix potential infinite loop

This commit is contained in:
David Anderson 2024-09-19 20:04:42 -07:00
parent 29af35cd72
commit 266c050895
2 changed files with 20 additions and 10 deletions

View File

@ -104,7 +104,11 @@ void validate_handler_usage() {
); );
} }
// see validate_util2.h for return values // run script to check a result
// if script exits with VAL_RESULT_TRANSIENT_ERROR, return that;
// the WU will be validated again after a delay.
//
// any other nonzero return means the result is not valid
// //
int init_result(RESULT& result, void*&) { int init_result(RESULT& result, void*&) {
if (init_script.empty()) { if (init_script.empty()) {
@ -144,10 +148,13 @@ int init_result(RESULT& result, void*&) {
int s = WEXITSTATUS(retval); int s = WEXITSTATUS(retval);
if (!s) return 0; if (!s) return 0;
if (s == VAL_RESULT_TRANSIENT_ERROR) { if (s == VAL_RESULT_TRANSIENT_ERROR) {
log_messages.printf(MSG_NORMAL,
"init script return transient error"
);
return VAL_RESULT_TRANSIENT_ERROR; return VAL_RESULT_TRANSIENT_ERROR;
} }
log_messages.printf(MSG_CRITICAL, log_messages.printf(MSG_NORMAL,
"init script %s failed: %d\n", cmd, s "init script %s returned: %d\n", cmd, s
); );
return -1; return -1;
} }

View File

@ -59,6 +59,8 @@ using std::vector;
// (i.e. there was a broken NFS mount). // (i.e. there was a broken NFS mount).
// Should call this again after a while. // Should call this again after a while.
// //
// always return zero
//
int check_set( int check_set(
vector<RESULT>& results, WORKUNIT& wu, vector<RESULT>& results, WORKUNIT& wu,
DB_ID_TYPE& canonicalid, double&, bool& retry DB_ID_TYPE& canonicalid, double&, bool& retry
@ -74,6 +76,9 @@ int check_set(
had_error.resize(n); had_error.resize(n);
// Initialize results // Initialize results
// For each one we potentially allocate data,
// so always exit via goto cleanup:
// to free this mem
for (i=0; i<n; i++) { for (i=0; i<n; i++) {
data[i] = NULL; data[i] = NULL;
@ -134,21 +139,22 @@ int check_set(
if (good_results < wu.min_quorum) goto cleanup; if (good_results < wu.min_quorum) goto cleanup;
// Compare results // for each result, count how many it matches (including itself)
// If this is at least min_valid, it's the canonical result
for (i=0; i<n; i++) { for (i=0; i<n; i++) {
if (had_error[i]) continue; if (had_error[i]) continue;
vector<bool> matches; vector<bool> matches;
matches.resize(n); matches.resize(n);
neq = 0; neq = 0;
for (j=0; j!=n; j++) { for (j=0; j<n; j++) {
if (had_error[j]) continue; if (had_error[j]) continue;
bool match = false;
if (i == j) { if (i == j) {
++neq; ++neq;
matches[j] = true; matches[j] = true;
continue; continue;
} }
bool match = false;
retval = compare_results( retval = compare_results(
results[i], data[i], results[j], data[j], match results[i], data[i], results[j], data[j], match
); );
@ -156,7 +162,6 @@ int check_set(
case ERR_OPENDIR: case ERR_OPENDIR:
case VAL_RESULT_TRANSIENT_ERROR: case VAL_RESULT_TRANSIENT_ERROR:
retry = true; retry = true;
retval = 0;
goto cleanup; goto cleanup;
case 0: case 0:
if (match) { if (match) {
@ -169,7 +174,6 @@ int check_set(
"check_set(): compare_results([RESULT#%lu %s], [RESULT#%lu %s]) failed\n", "check_set(): compare_results([RESULT#%lu %s], [RESULT#%lu %s]) failed\n",
results[i].id, results[i].name, results[j].id, results[j].name results[i].id, results[i].name, results[j].id, results[j].name
); );
goto cleanup;
} }
} }
if (neq >= min_valid) { if (neq >= min_valid) {
@ -185,12 +189,11 @@ int check_set(
} }
} }
retval = 0;
cleanup: cleanup:
for (i=0; i<n; i++) { for (i=0; i<n; i++) {
cleanup_result(results[i], data[i]); cleanup_result(results[i], data[i]);
} }
return retval; return 0;
} }
// a straggler instance has arrived after the WU is already validated. // a straggler instance has arrived after the WU is already validated.