2008-08-06 18:36:30 +00:00
|
|
|
// This file is part of BOINC.
|
2006-06-09 23:17:05 +00:00
|
|
|
// http://boinc.berkeley.edu
|
2008-08-06 18:36:30 +00:00
|
|
|
// Copyright (C) 2008 University of California
|
2006-06-09 23:17:05 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is free software; you can redistribute it and/or modify it
|
|
|
|
// under the terms of the GNU Lesser General Public License
|
|
|
|
// as published by the Free Software Foundation,
|
|
|
|
// either version 3 of the License, or (at your option) any later version.
|
2006-06-09 23:17:05 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is distributed in the hope that it will be useful,
|
2006-06-09 23:17:05 +00:00
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU Lesser General Public License for more details.
|
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
2006-06-09 23:17:05 +00:00
|
|
|
|
|
|
|
// Simple validator framework:
|
2008-09-17 23:35:16 +00:00
|
|
|
// Lets you create a custom validator by supplying three functions.
|
|
|
|
// See http://boinc.berkeley.edu/trac/wiki/ValidationSimple
|
2006-06-09 23:17:05 +00:00
|
|
|
//
|
|
|
|
|
|
|
|
#include "config.h"
|
|
|
|
#include <vector>
|
2008-02-27 23:26:38 +00:00
|
|
|
#include <cstdlib>
|
|
|
|
#include <string>
|
2006-06-09 23:17:05 +00:00
|
|
|
|
2013-06-19 17:15:08 +00:00
|
|
|
|
2006-06-09 23:17:05 +00:00
|
|
|
#include "boinc_db.h"
|
2006-06-20 17:36:28 +00:00
|
|
|
#include "error_numbers.h"
|
2006-06-09 23:17:05 +00:00
|
|
|
|
2006-06-10 12:55:23 +00:00
|
|
|
#include "sched_config.h"
|
2006-06-09 23:17:05 +00:00
|
|
|
#include "sched_msgs.h"
|
|
|
|
|
2006-06-10 21:29:51 +00:00
|
|
|
#include "validator.h"
|
2006-06-09 23:17:05 +00:00
|
|
|
#include "validate_util.h"
|
|
|
|
#include "validate_util2.h"
|
|
|
|
|
|
|
|
using std::vector;
|
|
|
|
|
2013-05-20 20:01:10 +00:00
|
|
|
// Given a set of results:
|
|
|
|
// 1) call init_result() for each one;
|
|
|
|
// this detects results with bad or missing output files
|
|
|
|
// 2) if # of good results is >= wu.min_quorum,
|
|
|
|
// check for a canonical result,
|
|
|
|
// i.e. a set of at least min_quorum/2+1 results for which
|
|
|
|
// that are equivalent according to check_pair().
|
2008-09-17 23:35:16 +00:00
|
|
|
//
|
2013-05-20 20:01:10 +00:00
|
|
|
// input invariants:
|
2006-06-20 17:36:28 +00:00
|
|
|
// for each result:
|
|
|
|
// result.outcome == SUCCESS
|
|
|
|
// result.validate_state == INIT
|
2008-09-17 23:35:16 +00:00
|
|
|
//
|
2013-05-20 20:01:10 +00:00
|
|
|
// Outputs:
|
|
|
|
// canonicalid: the ID of canonical result, if any
|
|
|
|
// result.outcome, result.validate_state
|
|
|
|
// modified; caller must update DB
|
|
|
|
// retry: set to true if some result had a transient failure
|
|
|
|
// (i.e. there was a broken NFS mount).
|
|
|
|
// Should call this again after a while.
|
|
|
|
//
|
2006-06-09 23:17:05 +00:00
|
|
|
int check_set(
|
2006-06-20 17:36:28 +00:00
|
|
|
vector<RESULT>& results, WORKUNIT& wu,
|
2015-07-28 23:19:31 +00:00
|
|
|
DB_ID_TYPE& canonicalid, double&, bool& retry
|
2006-06-09 23:17:05 +00:00
|
|
|
) {
|
|
|
|
vector<void*> data;
|
2006-06-20 17:36:28 +00:00
|
|
|
vector<bool> had_error;
|
|
|
|
int i, j, neq = 0, n, retval;
|
2006-06-09 23:17:05 +00:00
|
|
|
int min_valid = wu.min_quorum/2+1;
|
|
|
|
|
|
|
|
retry = false;
|
|
|
|
n = results.size();
|
|
|
|
data.resize(n);
|
2006-06-20 17:36:28 +00:00
|
|
|
had_error.resize(n);
|
2006-06-09 23:17:05 +00:00
|
|
|
|
2006-06-20 17:36:28 +00:00
|
|
|
// Initialize results
|
2006-06-09 23:17:05 +00:00
|
|
|
|
2006-06-20 17:36:28 +00:00
|
|
|
for (i=0; i<n; i++) {
|
|
|
|
data[i] = NULL;
|
|
|
|
had_error[i] = false;
|
|
|
|
}
|
|
|
|
int good_results = 0;
|
2016-08-25 11:03:08 +00:00
|
|
|
int suspicious_results = 0;
|
2006-06-20 17:36:28 +00:00
|
|
|
for (i=0; i<n; i++) {
|
|
|
|
retval = init_result(results[i], data[i]);
|
|
|
|
if (retval == ERR_OPENDIR) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2015-07-23 17:11:08 +00:00
|
|
|
"check_set: init_result([RESULT#%lu %s]) transient failure\n",
|
2006-06-09 23:17:05 +00:00
|
|
|
results[i].id, results[i].name
|
|
|
|
);
|
2013-05-20 20:01:10 +00:00
|
|
|
retry = true;
|
2006-06-20 17:36:28 +00:00
|
|
|
had_error[i] = true;
|
2016-08-25 11:03:08 +00:00
|
|
|
} else if (retval == VAL_RESULT_SUSPICIOUS) {
|
|
|
|
suspicious_results++;
|
2006-06-20 17:36:28 +00:00
|
|
|
} else if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2015-07-23 17:11:08 +00:00
|
|
|
"check_set: init_result([RESULT#%lu %s]) failed: %s\n",
|
2010-11-08 17:51:57 +00:00
|
|
|
results[i].id, results[i].name, boincerror(retval)
|
2006-06-20 17:36:28 +00:00
|
|
|
);
|
|
|
|
results[i].outcome = RESULT_OUTCOME_VALIDATE_ERROR;
|
|
|
|
results[i].validate_state = VALIDATE_STATE_INVALID;
|
2006-11-26 03:17:44 +00:00
|
|
|
had_error[i] = true;
|
2006-06-20 17:36:28 +00:00
|
|
|
} else {
|
|
|
|
good_results++;
|
2006-06-09 23:17:05 +00:00
|
|
|
}
|
|
|
|
}
|
2016-08-25 11:03:08 +00:00
|
|
|
|
|
|
|
// don't count a single "suspicious" result as "good",
|
|
|
|
// but do if there are more results to compare it with
|
|
|
|
//
|
|
|
|
if (suspicious_results > 1 || good_results > 0) {
|
|
|
|
good_results += suspicious_results;
|
|
|
|
}
|
|
|
|
|
2006-06-20 17:36:28 +00:00
|
|
|
if (good_results < wu.min_quorum) goto cleanup;
|
2006-06-09 23:17:05 +00:00
|
|
|
|
2006-06-20 17:36:28 +00:00
|
|
|
// Compare results
|
2006-06-09 23:17:05 +00:00
|
|
|
|
2006-06-20 17:36:28 +00:00
|
|
|
for (i=0; i<n; i++) {
|
|
|
|
if (had_error[i]) continue;
|
2006-06-09 23:17:05 +00:00
|
|
|
vector<bool> matches;
|
|
|
|
matches.resize(n);
|
|
|
|
neq = 0;
|
|
|
|
for (j=0; j!=n; j++) {
|
2006-06-20 17:36:28 +00:00
|
|
|
if (had_error[j]) continue;
|
2006-06-09 23:17:05 +00:00
|
|
|
bool match = false;
|
|
|
|
if (i == j) {
|
|
|
|
++neq;
|
|
|
|
matches[j] = true;
|
|
|
|
} else if (compare_results(results[i], data[i], results[j], data[j], match)) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2015-07-23 17:11:08 +00:00
|
|
|
"generic_check_set: check_pair_with_data([RESULT#%lu %s], [RESULT#%lu %s]) failed\n",
|
2006-06-09 23:17:05 +00:00
|
|
|
results[i].id, results[i].name, results[j].id, results[j].name
|
|
|
|
);
|
|
|
|
} else if (match) {
|
|
|
|
++neq;
|
|
|
|
matches[j] = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (neq >= min_valid) {
|
|
|
|
|
|
|
|
// set validate state for each result
|
|
|
|
//
|
|
|
|
for (j=0; j!=n; j++) {
|
2006-06-20 17:36:28 +00:00
|
|
|
if (had_error[j]) continue;
|
2011-09-13 21:01:42 +00:00
|
|
|
results[j].validate_state = matches[j] ? VALIDATE_STATE_VALID : VALIDATE_STATE_INVALID;
|
2006-06-09 23:17:05 +00:00
|
|
|
}
|
|
|
|
canonicalid = results[i].id;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
|
2006-06-20 17:36:28 +00:00
|
|
|
for (i=0; i<n; i++) {
|
2006-06-09 23:17:05 +00:00
|
|
|
cleanup_result(results[i], data[i]);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2006-11-27 01:07:00 +00:00
|
|
|
// r1 is the new result; r2 is canonical result
|
|
|
|
//
|
2008-03-13 23:35:13 +00:00
|
|
|
void check_pair(RESULT& r1, RESULT& r2, bool& retry) {
|
2006-06-09 23:17:05 +00:00
|
|
|
void* data1;
|
|
|
|
void* data2;
|
|
|
|
int retval;
|
|
|
|
bool match;
|
|
|
|
|
|
|
|
retry = false;
|
|
|
|
retval = init_result(r1, data1);
|
2006-11-27 01:07:00 +00:00
|
|
|
if (retval == ERR_OPENDIR) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2015-07-23 17:11:08 +00:00
|
|
|
"check_pair: init_result([RESULT#%lu %s]) transient failure 1\n",
|
2006-11-27 01:07:00 +00:00
|
|
|
r1.id, r1.name
|
|
|
|
);
|
|
|
|
retry = true;
|
|
|
|
return;
|
|
|
|
} else if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2015-07-23 17:11:08 +00:00
|
|
|
"check_pair: init_result([RESULT#%lu %s]) perm failure 1\n",
|
2006-11-27 01:07:00 +00:00
|
|
|
r1.id, r1.name
|
2006-06-09 23:17:05 +00:00
|
|
|
);
|
2006-11-27 01:07:00 +00:00
|
|
|
r1.outcome = RESULT_OUTCOME_VALIDATE_ERROR;
|
|
|
|
r1.validate_state = VALIDATE_STATE_INVALID;
|
|
|
|
return;
|
2006-06-09 23:17:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
retval = init_result(r2, data2);
|
2006-11-27 01:07:00 +00:00
|
|
|
if (retval == ERR_OPENDIR) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2015-07-23 17:11:08 +00:00
|
|
|
"check_pair: init_result([RESULT#%lu %s]) transient failure 2\n",
|
2006-11-27 01:07:00 +00:00
|
|
|
r2.id, r2.name
|
2006-06-09 23:17:05 +00:00
|
|
|
);
|
|
|
|
cleanup_result(r1, data1);
|
2006-11-27 01:07:00 +00:00
|
|
|
retry = true;
|
|
|
|
return;
|
|
|
|
} else if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2015-07-23 17:11:08 +00:00
|
|
|
"check_pair: init_result([RESULT#%lu %s]) perm failure2\n",
|
2006-11-27 01:07:00 +00:00
|
|
|
r2.id, r2.name
|
|
|
|
);
|
|
|
|
cleanup_result(r1, data1);
|
|
|
|
r1.outcome = RESULT_OUTCOME_VALIDATE_ERROR;
|
|
|
|
r1.validate_state = VALIDATE_STATE_INVALID;
|
|
|
|
return;
|
2006-06-09 23:17:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
retval = compare_results(r1, data1, r2, data2, match);
|
2011-09-13 21:01:42 +00:00
|
|
|
r1.validate_state = match?VALIDATE_STATE_VALID:VALIDATE_STATE_INVALID;
|
2006-06-09 23:17:05 +00:00
|
|
|
cleanup_result(r1, data1);
|
|
|
|
cleanup_result(r2, data2);
|
|
|
|
}
|