diff --git a/checkin_notes b/checkin_notes index d60f01a9f0..dc93edfc87 100755 --- a/checkin_notes +++ b/checkin_notes @@ -5714,3 +5714,38 @@ David 9 June 2006 client/ hostinfo_unix.C + +David 9 June 2006 + - First pass on reforming the validator framework. + OLD: the documents made the task of creating a custom validator + seem impossibly daunting. + NEW: Defined two frameworks. + - A "simple framework" lets you create a custom validator + by supplying three simple functions: + 1) parse a result + 2) compare two parsed results + 3) free a parsed results + This is based on the "generic_check_set()" code that + Karl Chen wrote for the sample bitwise validator; + I simplified this, got rid of the function pointers, + and moved it to a separate file (validate_util2.C,h) + - An "advanced framework" requires you to supply + check_set() and check_pair() functions. + This sounds simple, but actually is not; see + http://boinc.berkeley.edu/validate_logic.txt + Note: the check_set() function defined by the simple framework + doesn't do everything the above spec says it should; + I'll get back to that later. + - added lookup_group() and associated error codes (for sandbox) + + lib/ + error_numbers.h + util.C,h + sched/ + Makefile.am + handle_request.C + sample_bitwise_validator.C + sample_trivial_validator.C + validate_util.C,h + validate_util2.C,h (new) + validator_placeholder.C diff --git a/doc/validate.php b/doc/validate.php index f95ce6d0c9..89ac81fe90 100644 --- a/doc/validate.php +++ b/doc/validate.php @@ -31,11 +31,11 @@ If you are using BOINC for 'desktop grid' computing then you can use the 'sample trivial validator' (see below).
  • Otherwise, you'll need to develop a custom validator for your application. -BOINC supplies a high-level validator framework +BOINC supplies a simple validator framework in which you plug in three short application-specific functions. -This is sufficient for more projects. +This is sufficient for most projects. If you need more control over the validation process, -you can use BOINC's low-level validator framework. +you can use BOINC's advanced validator framework.

    diff --git a/sched/Makefile.am b/sched/Makefile.am index 073b027ade..b35e18d489 100644 --- a/sched/Makefile.am +++ b/sched/Makefile.am @@ -103,13 +103,13 @@ show_shmem_DEPENDENCIES = $(LIB_SCHED) file_deleter_SOURCES = file_deleter.C file_deleter_DEPENDENCIES = $(LIB_SCHED) -sample_bitwise_validator_SOURCES = validator.C sample_bitwise_validator.C validate_util.C validate_util.h +sample_bitwise_validator_SOURCES = validator.C sample_bitwise_validator.C validate_util.C validate_util.h validate_util2.C sample_bitwise_validator_DEPENDENCIES = $(LIB_SCHED) -sample_trivial_validator_SOURCES = validator.C sample_trivial_validator.C validate_util.C validate_util.h +sample_trivial_validator_SOURCES = validator.C sample_trivial_validator.C validate_util.C validate_util.h validate_util2.C sample_trivial_validator_DEPENDENCIES = $(LIB_SCHED) -validator_placeholder_SOURCES = validator.C validator_placeholder.C validate_util.C validate_util.h +validator_placeholder_SOURCES = validator.C validator_placeholder.C validate_util.C validate_util.h validate_util2.C validator_placeholder_DEPENDENCIES = $(LIB_SCHED) sample_dummy_assimilator_SOURCES = assimilator.C sample_dummy_assimilator.C validate_util.C validate_util.h diff --git a/sched/handle_request.C b/sched/handle_request.C index 4155ec9b7a..24e37cfff9 100644 --- a/sched/handle_request.C +++ b/sched/handle_request.C @@ -619,7 +619,7 @@ int handle_results(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) { // else ignore it // if (srip->server_state == RESULT_SERVER_STATE_OVER) { - char *dont_replace_result = NULL; + const char *dont_replace_result = NULL; switch (srip->outcome) { case RESULT_OUTCOME_INIT: // should never happen! diff --git a/sched/sample_bitwise_validator.C b/sched/sample_bitwise_validator.C index fa352c3030..90da5daf21 100644 --- a/sched/sample_bitwise_validator.C +++ b/sched/sample_bitwise_validator.C @@ -39,6 +39,7 @@ public: const string filedata; FILE_CACHE(string const& filedata0) : filedata(filedata0) {} + ~FILE_CACHE(){} string const md5sum() const { if (_md5sum.empty()) { @@ -48,13 +49,13 @@ public: } }; -bool operator ==(FILE_CACHE const& f1, FILE_CACHE const& f2) { +bool files_match(FILE_CACHE const& f1, FILE_CACHE const& f2) { return (f1.md5sum() == f2.md5sum() && f1.filedata == f2.filedata); } // read file into memory // -int init_result_read_file(RESULT const & result, void*& data) { +int init_result(RESULT const & result, void*& data) { int retval; string path; @@ -82,7 +83,7 @@ int init_result_read_file(RESULT const & result, void*& data) { return 0; } -int check_pair_initialized_identical( +int compare_results( RESULT & /*r1*/, void* data1, RESULT const& /*r2*/, void* data2, bool& match @@ -90,40 +91,13 @@ int check_pair_initialized_identical( FILE_CACHE const* f1 = (FILE_CACHE*) data1; FILE_CACHE const* f2 = (FILE_CACHE*) data2; - match = (*f1 == *f2); + match = files_match(*f1, *f2); return 0; } -int cleanup_result_string(RESULT const& /*result*/, void* data) { +int cleanup_result(RESULT const& /*result*/, void* data) { delete (FILE_CACHE*) data; return 0; } -// See if there's a strict majority under equality. -// -int check_set( - vector& results, WORKUNIT& wu, int& canonicalid, double& credit, - bool& retry -) { - retry = false; - return generic_check_set( - results, canonicalid, credit, - init_result_read_file, - check_pair_initialized_identical, - cleanup_result_string, - wu.min_quorum/2+1 - ); -} - -int check_pair(RESULT & r1, RESULT const& r2, bool& retry) { - retry = false; - int retval = generic_check_pair( - r1, r2, - init_result_read_file, - check_pair_initialized_identical, - cleanup_result_string - ); - return retval; -} - const char *BOINC_RCSID_7ab2b7189c = "$Id$"; diff --git a/sched/sample_trivial_validator.C b/sched/sample_trivial_validator.C index 7ba3991d33..7a1465beb2 100644 --- a/sched/sample_trivial_validator.C +++ b/sched/sample_trivial_validator.C @@ -27,11 +27,11 @@ using std::vector; static const double MIN_CPU_TIME = 0; -int init_result_trivial(RESULT const& /*result*/, void*& /*data*/) { +int init_result(RESULT const& /*result*/, void*& /*data*/) { return 0; } -int check_pair_initialized_trivial( +int compare_results( RESULT & r1, void* /*data1*/, RESULT const& r2, void* /*data2*/, bool& match @@ -40,35 +40,8 @@ int check_pair_initialized_trivial( return 0; } -int cleanup_result_trivial(RESULT const&, void*) { +int cleanup_result(RESULT const&, void*) { return 0; } -int check_set( - vector& results, WORKUNIT&, int& canonicalid, double& credit, - bool& retry -) { - retry = false; - return generic_check_set( - results, canonicalid, credit, - init_result_trivial, - check_pair_initialized_trivial, - cleanup_result_trivial, - 1 - ); -} - -int check_pair(RESULT & r1, RESULT const& r2, bool& retry) { - bool match; - retry = false; - int retval = check_pair_initialized_trivial( - r1, NULL, - r2, NULL, - match - ); - r1.validate_state = match?VALIDATE_STATE_VALID:VALIDATE_STATE_INVALID; - return retval; -} - - const char *BOINC_RCSID_f3a7a34795 = "$Id$"; diff --git a/sched/validate_util.C b/sched/validate_util.C index 2d3a098e18..9171dc18a4 100644 --- a/sched/validate_util.C +++ b/sched/validate_util.C @@ -23,7 +23,6 @@ // or that requires strict or fuzzy equality. #include "config.h" -#include #include "error_numbers.h" #include "parse.h" @@ -33,13 +32,12 @@ #include "sched_util.h" #include "sched_config.h" #include "sched_msgs.h" +#include "validator.h" #include "validate_util.h" using std::vector; using std::string; -extern SCHED_CONFIG config; - // get the name of a result's (first) output file // int get_output_file_path(RESULT const& result, string& path_str) { @@ -109,140 +107,4 @@ double median_mean_credit(vector& results) { } } -// Generic validation function that compares each result to each other one and -// sees if MIN_VALID results match. -// The comparison function is similar to check_pair -// but takes an additional data parameter. -// -// This function takes 3 call-back functions, each of which accept a void* -// and should return !=0 on error: -// -// 1. init_result - initialize all results - for example, call -// read_file_string and compute an MD5. Return a void* -// 2. check_pair_with_data - same as check_pair but with extra data from -// init_result -// 3. cleanup_result - deallocate anything created by init_result. -// Should do nothing with NULL data -// -// see validate_test.C example usage. -// -int generic_check_set( - vector& results, int& canonicalid, double& credit, - init_result_f init_result_f, - check_pair_with_data_f check_pair_with_data_f, - cleanup_result_f cleanup_result_f, - int min_valid -) { - vector data; - int i, j, neq = 0, n; - - n = results.size(); - data.resize(n); - - // 1. INITIALIZE DATA - - for (i=0; i!=n; i++) { - if (init_result_f(results[i], data[i])) { - log_messages.printf( - SCHED_MSG_LOG::MSG_CRITICAL, - "generic_check_set: init_result([RESULT#%d %s]) failed\n", - results[i].id, results[i].name - ); - goto cleanup; - } - } - - // 2. COMPARE - - for (i=0; i!=n; i++) { - vector matches; - matches.resize(n); - neq = 0; - for (j=0; j!=n; j++) { - bool match = false; - if (i == j) { - ++neq; - matches[j] = true; - } else if (check_pair_with_data_f(results[i], data[i], results[j], data[j], match)) { - log_messages.printf( - SCHED_MSG_LOG::MSG_CRITICAL, - "generic_check_set: check_pair_with_data([RESULT#%d %s], [RESULT#%d %s]) failed\n", - results[i].id, results[i].name, results[j].id, results[j].name - ); - } else if (match) { - ++neq; - matches[j] = true; - } - } - if (neq >= min_valid) { - - // set validate state for each result - // - for (j=0; j!=n; j++) { - if (config.max_claimed_credit && results[j].claimed_credit > config.max_claimed_credit) { - results[j].validate_state = VALIDATE_STATE_INVALID; - } else { - results[j].validate_state = matches[j] ? VALIDATE_STATE_VALID : VALIDATE_STATE_INVALID; - } - } - canonicalid = results[i].id; - credit = median_mean_credit(results); - break; - } - } - -cleanup: - - // 3. CLEANUP - - for (i=0; i!=n; i++) { - cleanup_result_f(results[i], data[i]); - } - return 0; -} - -int generic_check_pair( - RESULT & r1, RESULT const& r2, - init_result_f init_result_f, - check_pair_with_data_f check_pair_with_data_f, - cleanup_result_f cleanup_result_f -) { - void* data1; - void* data2; - int retval; - bool match; - - retval = init_result_f(r1, data1); - if (retval) { - log_messages.printf( - SCHED_MSG_LOG::MSG_CRITICAL, - "[RESULT#%d %s] [RESULT#%d %s] Couldn't initialize result 1\n", - r1.id, r1.name, r2.id, r2.name - ); - return retval; - } - - retval = init_result_f(r2, data2); - if (retval) { - log_messages.printf( - SCHED_MSG_LOG::MSG_CRITICAL, - "[RESULT#%d %s] [RESULT#%d %s] Couldn't initialize result 2\n", - r1.id, r1.name, r2.id, r2.name - ); - cleanup_result_f(r1, data1); - return retval; - } - - retval = check_pair_with_data_f(r1, data1, r2, data2, match); - if (config.max_claimed_credit && r1.claimed_credit > config.max_claimed_credit) { - r1.validate_state = VALIDATE_STATE_INVALID; - } else { - r1.validate_state = match?VALIDATE_STATE_VALID:VALIDATE_STATE_INVALID; - } - cleanup_result_f(r1, data1); - cleanup_result_f(r2, data2); - - return retval; -} - const char *BOINC_RCSID_07049e8a0e = "$Id$"; diff --git a/sched/validate_util.h b/sched/validate_util.h index b5f2f5ca37..f63617fb92 100644 --- a/sched/validate_util.h +++ b/sched/validate_util.h @@ -24,25 +24,7 @@ #include #include -typedef int (*init_result_f)(RESULT const&, void*&); -typedef int (*check_pair_with_data_f)(RESULT &, void*, RESULT const&, void*, bool&); -typedef int (*cleanup_result_f)(RESULT const&, void*); extern int get_output_file_path(RESULT const& result, std::string& path); extern double median_mean_credit(std::vector& results); -extern int generic_check_set( - std::vector& results, int& canonicalid, double& credit, - init_result_f init_result_f, - check_pair_with_data_f check_pair_with_data_f, - cleanup_result_f cleanup_result_f, - int min_valid -); - -extern int generic_check_pair( - RESULT & r1, RESULT const& r2, - init_result_f init_result_f, - check_pair_with_data_f check_pair_with_data_f, - cleanup_result_f cleanup_result_f -); - #endif diff --git a/sched/validate_util2.C b/sched/validate_util2.C new file mode 100644 index 0000000000..c5c85a8be6 --- /dev/null +++ b/sched/validate_util2.C @@ -0,0 +1,150 @@ +// Berkeley Open Infrastructure for Network Computing +// http://boinc.berkeley.edu +// Copyright (C) 2005 University of California +// +// This is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; +// either version 2.1 of the License, or (at your option) any later version. +// +// This software is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// See the GNU Lesser General Public License for more details. +// +// To view the GNU Lesser General Public License visit +// http://www.gnu.org/copyleft/lesser.html +// or write to the Free Software Foundation, Inc., +// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +// Simple validator framework: +// Lets you create a custom validator by supplying three simple functions. +// See http://boinc.berkeley.edu/validate_simple.php +// + +#include "config.h" +#include + +#include "boinc_db.h" + +#include "sched_msgs.h" +#include "validator.h" + +#include "validate_util.h" +#include "validate_util2.h" + +using std::vector; + +int check_set( + vector& results, WORKUNIT& wu, int& canonicalid, double& credit, + bool& retry +) { + vector data; + int i, j, neq = 0, n; + int min_valid = wu.min_quorum/2+1; + + retry = false; + n = results.size(); + data.resize(n); + + // 1. INITIALIZE DATA + + for (i=0; i!=n; i++) { + if (init_result(results[i], data[i])) { + log_messages.printf( + SCHED_MSG_LOG::MSG_CRITICAL, + "generic_check_set: init_result([RESULT#%d %s]) failed\n", + results[i].id, results[i].name + ); + goto cleanup; + } + } + + // 2. COMPARE + + for (i=0; i!=n; i++) { + vector matches; + matches.resize(n); + neq = 0; + for (j=0; j!=n; j++) { + bool match = false; + if (i == j) { + ++neq; + matches[j] = true; + } else if (compare_results(results[i], data[i], results[j], data[j], match)) { + log_messages.printf( + SCHED_MSG_LOG::MSG_CRITICAL, + "generic_check_set: check_pair_with_data([RESULT#%d %s], [RESULT#%d %s]) failed\n", + results[i].id, results[i].name, results[j].id, results[j].name + ); + } else if (match) { + ++neq; + matches[j] = true; + } + } + if (neq >= min_valid) { + + // set validate state for each result + // + for (j=0; j!=n; j++) { + if (config.max_claimed_credit && results[j].claimed_credit > config.max_claimed_credit) { + results[j].validate_state = VALIDATE_STATE_INVALID; + } else { + results[j].validate_state = matches[j] ? VALIDATE_STATE_VALID : VALIDATE_STATE_INVALID; + } + } + canonicalid = results[i].id; + credit = median_mean_credit(results); + break; + } + } + +cleanup: + + // 3. CLEANUP + + for (i=0; i!=n; i++) { + cleanup_result(results[i], data[i]); + } + return 0; +} + +int check_pair(RESULT & r1, RESULT const& r2, bool& retry) { + void* data1; + void* data2; + int retval; + bool match; + + retry = false; + retval = init_result(r1, data1); + if (retval) { + log_messages.printf( + SCHED_MSG_LOG::MSG_CRITICAL, + "[RESULT#%d %s] [RESULT#%d %s] Couldn't initialize result 1\n", + r1.id, r1.name, r2.id, r2.name + ); + return retval; + } + + retval = init_result(r2, data2); + if (retval) { + log_messages.printf( + SCHED_MSG_LOG::MSG_CRITICAL, + "[RESULT#%d %s] [RESULT#%d %s] Couldn't initialize result 2\n", + r1.id, r1.name, r2.id, r2.name + ); + cleanup_result(r1, data1); + return retval; + } + + retval = compare_results(r1, data1, r2, data2, match); + if (config.max_claimed_credit && r1.claimed_credit > config.max_claimed_credit) { + r1.validate_state = VALIDATE_STATE_INVALID; + } else { + r1.validate_state = match?VALIDATE_STATE_VALID:VALIDATE_STATE_INVALID; + } + cleanup_result(r1, data1); + cleanup_result(r2, data2); + + return retval; +} diff --git a/sched/validate_util2.h b/sched/validate_util2.h new file mode 100644 index 0000000000..b80489c07c --- /dev/null +++ b/sched/validate_util2.h @@ -0,0 +1,8 @@ +#ifndef _VALIDATE_UTIL2_ +#define _VALIDATE_UTIL2_ + +extern int init_result(RESULT const&, void*&); +extern int compare_results(RESULT &, void*, RESULT const&, void*, bool&); +extern int cleanup_result(RESULT const&, void*); + +#endif diff --git a/sched/validator_placeholder.C b/sched/validator_placeholder.C index dae3cc79f5..90da5daf21 100644 --- a/sched/validator_placeholder.C +++ b/sched/validator_placeholder.C @@ -17,58 +17,87 @@ // or write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// A sample validator that grants credit to any result whose CPU time is above -// a certain minimum +// A sample validator that grants credit if the majority of results are +// bitwise identical. +// This is useful only if either +// 1) your application does no floating-point math, or +// 2) you use homogeneous redundancy #include "config.h" +#include "util.h" +#include "sched_util.h" +#include "sched_msgs.h" #include "validate_util.h" +#include "md5_file.h" +using std::string; using std::vector; -static const double MIN_CPU_TIME = 0; +class FILE_CACHE { + mutable string _md5sum; +public: + const string filedata; -int init_result_trivial(RESULT const& /*result*/, void*& /*data*/) { + FILE_CACHE(string const& filedata0) : filedata(filedata0) {} + ~FILE_CACHE(){} + + string const md5sum() const { + if (_md5sum.empty()) { + _md5sum = md5_string(filedata); + } + return _md5sum; + } +}; + +bool files_match(FILE_CACHE const& f1, FILE_CACHE const& f2) { + return (f1.md5sum() == f2.md5sum() && f1.filedata == f2.filedata); +} + +// read file into memory +// +int init_result(RESULT const & result, void*& data) { + int retval; + string path; + + retval = get_output_file_path(result, path); + if (retval) { + log_messages.printf( + SCHED_MSG_LOG::MSG_CRITICAL, + "[RESULT#%d %s] check_set: can't get output filename\n", + result.id, result.name + ); + return retval; + } + + string filedata; + retval = read_file_string(path.c_str(), filedata); + if (retval) { + log_messages.printf( + SCHED_MSG_LOG::MSG_CRITICAL, + "[RESULT#%d %s] Couldn't open %s\n", + result.id, result.name, path.c_str() + ); + return retval; + } + data = (void*) new FILE_CACHE(filedata); return 0; } -int check_pair_initialized_trivial( - RESULT & r1, void* /*data1*/, - RESULT const& r2, void* /*data2*/, +int compare_results( + RESULT & /*r1*/, void* data1, + RESULT const& /*r2*/, void* data2, bool& match ) { - match = (r1.cpu_time >= MIN_CPU_TIME && r2.cpu_time >= MIN_CPU_TIME); + FILE_CACHE const* f1 = (FILE_CACHE*) data1; + FILE_CACHE const* f2 = (FILE_CACHE*) data2; + + match = files_match(*f1, *f2); return 0; } -int cleanup_result_trivial(RESULT const&, void*) { +int cleanup_result(RESULT const& /*result*/, void* data) { + delete (FILE_CACHE*) data; return 0; } -int check_set( - vector& results, WORKUNIT&, int& canonicalid, double& credit, - bool& retry -) { - retry = false; - return generic_check_set( - results, canonicalid, credit, - init_result_trivial, - check_pair_initialized_trivial, - cleanup_result_trivial, - 1 - ); -} - -int check_pair(RESULT & r1, RESULT const& r2, bool& retry) { - bool match; - retry = false; - int retval = check_pair_initialized_trivial( - r1, NULL, - r2, NULL, - match - ); - r1.validate_state = match?VALIDATE_STATE_VALID:VALIDATE_STATE_INVALID; - return retval; -} - - -const char *BOINC_RCSID_01a414c729 = "$Id$"; +const char *BOINC_RCSID_7ab2b7189c = "$Id$";