2005-01-20 23:22:22 +00:00
|
|
|
// Berkeley Open Infrastructure for Network Computing
|
|
|
|
// http://boinc.berkeley.edu
|
|
|
|
// Copyright (C) 2005 University of California
|
2003-08-14 00:02:15 +00:00
|
|
|
//
|
2005-01-20 23:22:22 +00:00
|
|
|
// This is free software; you can redistribute it and/or
|
|
|
|
// modify it under the terms of the GNU Lesser General Public
|
|
|
|
// License as published by the Free Software Foundation;
|
|
|
|
// either version 2.1 of the License, or (at your option) any later version.
|
2003-08-14 00:02:15 +00:00
|
|
|
//
|
2005-01-20 23:22:22 +00:00
|
|
|
// This software is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU Lesser General Public License for more details.
|
2003-08-14 00:02:15 +00:00
|
|
|
//
|
2005-01-20 23:22:22 +00:00
|
|
|
// To view the GNU Lesser General Public License visit
|
|
|
|
// http://www.gnu.org/copyleft/lesser.html
|
|
|
|
// or write to the Free Software Foundation, Inc.,
|
|
|
|
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
2003-08-14 00:02:15 +00:00
|
|
|
|
2003-09-20 17:38:13 +00:00
|
|
|
// Code to facilitate writing validators.
|
|
|
|
// Can be used as the basis for a validator that accepts everything
|
|
|
|
// (see validate_trivial.C),
|
|
|
|
// or that requires strict or fuzzy equality.
|
|
|
|
|
2003-10-21 04:06:55 +00:00
|
|
|
#include <cassert>
|
2003-09-20 17:38:13 +00:00
|
|
|
|
2003-10-21 04:06:55 +00:00
|
|
|
#include "error_numbers.h"
|
2003-08-14 00:02:15 +00:00
|
|
|
#include "parse.h"
|
2004-08-06 11:42:41 +00:00
|
|
|
#include "util.h"
|
2005-01-02 08:01:27 +00:00
|
|
|
#include "filesys.h"
|
2004-04-08 08:15:23 +00:00
|
|
|
|
|
|
|
#include "sched_util.h"
|
|
|
|
#include "sched_config.h"
|
|
|
|
#include "sched_msgs.h"
|
2003-10-21 04:06:55 +00:00
|
|
|
#include "validate_util.h"
|
2003-08-14 00:02:15 +00:00
|
|
|
|
2004-06-30 18:17:21 +00:00
|
|
|
using std::vector;
|
|
|
|
using std::string;
|
|
|
|
|
2003-09-02 21:16:55 +00:00
|
|
|
extern SCHED_CONFIG config;
|
2003-08-14 00:02:15 +00:00
|
|
|
|
|
|
|
// get the name of a result's (first) output file
|
|
|
|
//
|
2004-08-06 11:42:41 +00:00
|
|
|
int get_output_file_path(RESULT const& result, string& path_str) {
|
|
|
|
char buf[256], path[256];
|
2003-08-14 00:02:15 +00:00
|
|
|
bool flag;
|
|
|
|
|
2004-11-16 18:22:09 +00:00
|
|
|
flag = parse_str(result.xml_doc_out, "<name>", buf, sizeof(buf));
|
2003-10-21 04:06:55 +00:00
|
|
|
if (!flag) return ERR_XML_PARSE;
|
2005-01-02 07:44:40 +00:00
|
|
|
dir_hier_path(buf, config.upload_dir, config.uldl_dir_fanout, true, path);
|
|
|
|
if (!boinc_file_exists(path)) {
|
|
|
|
dir_hier_path(buf, config.upload_dir, config.uldl_dir_fanout, false, path);
|
|
|
|
}
|
2004-08-06 11:42:41 +00:00
|
|
|
path_str = path;
|
2003-08-14 00:02:15 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2005-03-07 23:15:53 +00:00
|
|
|
#define CREDIT_EPSILON .001
|
|
|
|
|
|
|
|
// If we have N correct results with nonzero claimed credit,
|
|
|
|
// compute a canonical credit as follows:
|
|
|
|
// - if N==0 (all claimed credits are infinitesmal), return CREDIT_EPSILON
|
|
|
|
// - if N==1, return that credit
|
|
|
|
// - if N==2, return min
|
|
|
|
// - if N>2, toss out min and max, return average of rest
|
2003-08-14 00:02:15 +00:00
|
|
|
//
|
2005-03-07 23:15:53 +00:00
|
|
|
double median_mean_credit(vector<RESULT>& results) {
|
|
|
|
int ilow=-1, ihigh=-1;
|
2003-08-14 00:02:15 +00:00
|
|
|
double credit_low = 0, credit_high = 0;
|
2005-03-07 23:15:53 +00:00
|
|
|
int nvalid = 0;
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
for (i=0; results.size(); i++) {
|
|
|
|
RESULT& result = results[i];
|
|
|
|
if (result.validate_state != VALIDATE_STATE_VALID) continue;
|
|
|
|
if (result.claimed_credit < CREDIT_EPSILON) continue;
|
|
|
|
if (ilow < 0) {
|
|
|
|
ilow = ihigh = i;
|
|
|
|
credit_low = credit_high = result.claimed_credit;
|
2003-08-14 00:02:15 +00:00
|
|
|
} else {
|
2005-03-07 23:15:53 +00:00
|
|
|
if (result.claimed_credit < credit_low) {
|
|
|
|
ilow = i;
|
|
|
|
credit_low = result.claimed_credit;
|
2003-08-14 00:02:15 +00:00
|
|
|
}
|
2005-03-07 23:15:53 +00:00
|
|
|
if (result.claimed_credit > credit_high) {
|
|
|
|
ihigh = i;
|
|
|
|
credit_high = result.claimed_credit;
|
2003-08-14 00:02:15 +00:00
|
|
|
}
|
|
|
|
}
|
2005-03-07 23:15:53 +00:00
|
|
|
nvalid++;
|
2003-08-14 00:02:15 +00:00
|
|
|
}
|
|
|
|
|
2005-03-07 23:15:53 +00:00
|
|
|
switch(nvalid) {
|
|
|
|
case 0:
|
|
|
|
return CREDIT_EPSILON;
|
|
|
|
case 1:
|
|
|
|
case 2:
|
2003-08-14 00:02:15 +00:00
|
|
|
return credit_low;
|
2005-03-07 23:15:53 +00:00
|
|
|
default:
|
2003-08-14 00:02:15 +00:00
|
|
|
double sum = 0;
|
2005-03-07 23:15:53 +00:00
|
|
|
for (i=0; results.size(); i++) {
|
|
|
|
if (i == ilow) continue;
|
|
|
|
if (i == ihigh) continue;
|
|
|
|
RESULT& result = results[i];
|
|
|
|
if (result.validate_state != VALIDATE_STATE_VALID) continue;
|
2003-08-14 00:02:15 +00:00
|
|
|
|
2005-03-07 23:15:53 +00:00
|
|
|
sum += result.claimed_credit;
|
2003-08-14 00:02:15 +00:00
|
|
|
}
|
2005-03-07 23:15:53 +00:00
|
|
|
return sum/(nvalid-2);
|
2003-08-14 00:02:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2004-07-13 12:23:47 +00:00
|
|
|
// Generic validation function that compares each result to each other one and
|
|
|
|
// sees if there MIN_VALID results match. The comparison function is similar
|
|
|
|
// to check_pair but takes an additional data parameter.
|
2003-08-14 00:02:15 +00:00
|
|
|
//
|
|
|
|
// This function takes 3 call-back functions, each of which accept a void*
|
|
|
|
// and should return !=0 on error:
|
|
|
|
//
|
|
|
|
// 1. init_result - initialize all results - for example, call
|
|
|
|
// read_file_string and compute an MD5. Return a void*
|
|
|
|
// 2. check_pair_with_data - same as check_pair but with extra data from
|
|
|
|
// init_result
|
2003-08-18 18:49:04 +00:00
|
|
|
// 3. cleanup_result - deallocate anything created by init_result. Should
|
|
|
|
// do nothing with NULL data
|
2003-08-14 00:02:15 +00:00
|
|
|
//
|
|
|
|
// see validate_test.C example usage.
|
|
|
|
//
|
2004-07-13 12:23:47 +00:00
|
|
|
int generic_check_set(
|
2003-08-26 18:33:21 +00:00
|
|
|
vector<RESULT>& results, int& canonicalid, double& credit,
|
|
|
|
init_result_f init_result_f,
|
|
|
|
check_pair_with_data_f check_pair_with_data_f,
|
2004-07-13 12:23:47 +00:00
|
|
|
cleanup_result_f cleanup_result_f,
|
2004-07-13 13:54:09 +00:00
|
|
|
size_t min_valid)
|
2004-07-13 12:23:47 +00:00
|
|
|
{
|
2003-08-14 00:02:15 +00:00
|
|
|
assert (!results.empty());
|
|
|
|
|
|
|
|
vector<void*> data;
|
|
|
|
vector<RESULT>::size_type i, j, neq = 0, n = results.size();
|
|
|
|
data.resize(n);
|
|
|
|
|
|
|
|
// 1. INITIALIZE DATA
|
2003-08-26 18:33:21 +00:00
|
|
|
for (i = 0; i != n; ++i) {
|
2003-08-14 00:02:15 +00:00
|
|
|
if (init_result_f(results[i], data[i])) {
|
|
|
|
log_messages.printf(
|
2004-04-08 08:15:23 +00:00
|
|
|
SCHED_MSG_LOG::CRITICAL,
|
2004-07-13 12:23:47 +00:00
|
|
|
"generic_check_set: init_result([RESULT#%d %s]) failed\n",
|
2004-12-20 20:47:25 +00:00
|
|
|
results[i].id, results[i].name
|
|
|
|
);
|
2003-08-14 00:02:15 +00:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// 2. COMPARE
|
2003-08-26 18:33:21 +00:00
|
|
|
for (i = 0; i != n; ++i) {
|
2003-08-14 00:02:15 +00:00
|
|
|
vector<bool> matches;
|
|
|
|
matches.resize(n);
|
|
|
|
neq = 0;
|
|
|
|
for (j = 0; j != n; ++j) {
|
|
|
|
bool match = false;
|
|
|
|
if (i == j) {
|
|
|
|
++neq;
|
|
|
|
matches[j] = true;
|
|
|
|
} else if (check_pair_with_data_f(results[i], data[i], results[j], data[j], match)) {
|
|
|
|
log_messages.printf(
|
2004-04-08 08:15:23 +00:00
|
|
|
SCHED_MSG_LOG::CRITICAL,
|
2004-07-13 12:23:47 +00:00
|
|
|
"generic_check_set: check_pair_with_data([RESULT#%d %s], [RESULT#%d %s]) failed\n",
|
2004-12-20 20:47:25 +00:00
|
|
|
results[i].id, results[i].name, results[j].id, results[j].name
|
|
|
|
);
|
2003-08-14 00:02:15 +00:00
|
|
|
} else if (match) {
|
|
|
|
++neq;
|
|
|
|
matches[j] = true;
|
|
|
|
}
|
|
|
|
}
|
2005-01-08 06:38:21 +00:00
|
|
|
if (neq >= min_valid) {
|
2003-08-14 00:02:15 +00:00
|
|
|
// set validate state for each result
|
|
|
|
for (j = 0; j != n; ++j) {
|
|
|
|
results[j].validate_state = matches[j] ? VALIDATE_STATE_VALID : VALIDATE_STATE_INVALID;
|
|
|
|
}
|
|
|
|
canonicalid = results[i].id;
|
|
|
|
credit = median_mean_credit(results);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
// 3. CLEANUP
|
|
|
|
for (i = 0; i != n; ++i) {
|
|
|
|
cleanup_result_f(results[i], data[i]);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2004-07-13 12:23:47 +00:00
|
|
|
// similar to generic_check_set, but require a strict majority of results
|
|
|
|
// (N_results / 2) to be valid
|
|
|
|
int generic_check_set_majority(
|
|
|
|
vector<RESULT>& results, int& canonicalid, double& credit,
|
|
|
|
init_result_f init_result_f,
|
|
|
|
check_pair_with_data_f check_pair_with_data_f,
|
|
|
|
cleanup_result_f cleanup_result_f)
|
|
|
|
{
|
2004-07-13 13:54:09 +00:00
|
|
|
return generic_check_set(
|
|
|
|
results, canonicalid, credit,
|
|
|
|
init_result_f, check_pair_with_data_f, cleanup_result_f,
|
2004-12-20 20:47:25 +00:00
|
|
|
results.size() / 2
|
|
|
|
);
|
2004-07-13 12:23:47 +00:00
|
|
|
}
|
|
|
|
|
2003-08-26 18:33:21 +00:00
|
|
|
int generic_check_pair(
|
2004-09-09 22:25:10 +00:00
|
|
|
RESULT & r1, RESULT const& r2,
|
2003-08-26 18:33:21 +00:00
|
|
|
init_result_f init_result_f,
|
|
|
|
check_pair_with_data_f check_pair_with_data_f,
|
|
|
|
cleanup_result_f cleanup_result_f
|
|
|
|
) {
|
2003-08-14 00:02:15 +00:00
|
|
|
void* data1;
|
|
|
|
void* data2;
|
|
|
|
int retval;
|
2004-09-09 22:25:10 +00:00
|
|
|
bool match;
|
2003-08-14 00:02:15 +00:00
|
|
|
|
|
|
|
retval = init_result_f(r1, data1);
|
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(
|
2004-04-08 08:15:23 +00:00
|
|
|
SCHED_MSG_LOG::CRITICAL,
|
2003-08-14 00:02:15 +00:00
|
|
|
"[RESULT#%d %s] [RESULT#%d %s] Couldn't initialize result 1\n",
|
|
|
|
r1.id, r1.name, r2.id, r2.name
|
|
|
|
);
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
|
|
|
retval = init_result_f(r2, data2);
|
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(
|
2004-04-08 08:15:23 +00:00
|
|
|
SCHED_MSG_LOG::CRITICAL,
|
2003-08-14 00:02:15 +00:00
|
|
|
"[RESULT#%d %s] [RESULT#%d %s] Couldn't initialize result 2\n",
|
|
|
|
r1.id, r1.name, r2.id, r2.name
|
2004-12-20 20:47:25 +00:00
|
|
|
);
|
2003-08-14 00:02:15 +00:00
|
|
|
cleanup_result_f(r1, data1);
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
|
|
|
retval = check_pair_with_data_f(r1, data1, r2, data2, match);
|
2004-09-09 22:25:10 +00:00
|
|
|
r1.validate_state = match?VALIDATE_STATE_VALID:VALIDATE_STATE_INVALID;
|
2003-08-14 00:02:15 +00:00
|
|
|
|
|
|
|
cleanup_result_f(r1, data1);
|
|
|
|
cleanup_result_f(r2, data2);
|
|
|
|
|
|
|
|
return retval;
|
|
|
|
}
|
2004-12-08 00:40:19 +00:00
|
|
|
|
2005-01-02 18:29:53 +00:00
|
|
|
const char *BOINC_RCSID_07049e8a0e = "$Id$";
|