// The contents of this file are subject to the BOINC Public License // Version 1.0 (the "License"); you may not use this file except in // compliance with the License. You may obtain a copy of the License at // http://boinc.berkeley.edu/license_1.0.txt // // Software distributed under the License is distributed on an "AS IS" // basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the // License for the specific language governing rights and limitations // under the License. // // The Original Code is the Berkeley Open Infrastructure for Network Computing. // // The Initial Developer of the Original Code is the SETI@home project. // Portions created by the SETI@home project are Copyright (C) 2002 // University of California at Berkeley. All Rights Reserved. // // Contributor(s): // // // validator - check and validate new results, and grant credit // -app appname // [-d debug_level] // [-one_pass] // make one pass through WU table, then exit // [-asynch] // fork, run in separate process // // This program must be linked with two project-specific functions: // check_set() and check_pair(). // See doc/validate.php for a description. using namespace std; #include #include #include #include "boinc_db.h" #include "util.h" #include "error_numbers.h" #include "sched_config.h" #include "sched_util.h" #include "sched_msgs.h" #define LOCKFILE "validate.out" #define PIDFILE "validate.pid" extern int check_set( vector&, DB_WORKUNIT& wu, int& canonical, double& credit, bool& retry ); extern int check_pair( RESULT & new_result, RESULT const& canonical_result, bool& retry ); SCHED_CONFIG config; char app_name[256]; // here when a result has been validated; // grant credit to host, user and team // int grant_credit(DB_RESULT& result, double credit) { DB_USER user; DB_HOST host; DB_TEAM team; int retval; char buf[256]; retval = host.lookup_id(result.hostid); if (retval) { log_messages.printf( SCHED_MSG_LOG::CRITICAL, "[RESULT#%d] lookup of host %d failed %d\n", result.id, result.hostid, retval ); return retval; } retval = user.lookup_id(host.userid); if (retval) { log_messages.printf( SCHED_MSG_LOG::CRITICAL, "[RESULT#%d] lookup of user %d failed %d\n", result.id, host.userid, retval ); return retval; } user.total_credit += credit; update_average(result.sent_time, credit, CREDIT_HALF_LIFE, user.expavg_credit, user.expavg_time); sprintf( buf, "total_credit=%f, expavg_credit=%f, expavg_time=%f", user.total_credit, user.expavg_credit, user.expavg_time ); retval = user.update_field(buf); if (retval) { log_messages.printf( SCHED_MSG_LOG::CRITICAL, "[RESULT#%d] update of user %d failed %d\n", result.id, host.userid, retval ); } host.total_credit += credit; update_average(result.sent_time, credit, CREDIT_HALF_LIFE, host.expavg_credit, host.expavg_time); sprintf( buf, "total_credit=%f, expavg_credit=%f, expavg_time=%f", host.total_credit, host.expavg_credit, host.expavg_time ); retval = host.update_field(buf); if (retval) { log_messages.printf( SCHED_MSG_LOG::CRITICAL, "[RESULT#%d] update of host %d failed %d\n", result.id, result.hostid, retval ); } if (user.teamid) { retval = team.lookup_id(user.teamid); if (retval) { log_messages.printf( SCHED_MSG_LOG::CRITICAL, "[RESULT#%d] lookup of team %d failed %d\n", result.id, user.teamid, retval ); return retval; } team.total_credit += credit; update_average(result.sent_time, credit, CREDIT_HALF_LIFE, team.expavg_credit, team.expavg_time); sprintf( buf, "total_credit=%f, expavg_credit=%f, expavg_time=%f", team.total_credit, team.expavg_credit, team.expavg_time ); retval = team.update_field(buf); if (retval) { log_messages.printf( SCHED_MSG_LOG::CRITICAL, "[RESULT#%d] update of team %d failed %d\n", result.id, team.id, retval ); } } return 0; } void handle_wu(DB_WORKUNIT& wu) { DB_RESULT result, canonical_result; bool update_result, retry; bool canonical_result_missing = false; bool need_immediate_transition = false, need_delayed_transition = false; int retval, canonicalid = 0; double credit; unsigned int i; char buf[256]; if (wu.canonical_resultid) { log_messages.printf( SCHED_MSG_LOG::NORMAL, "[WU#%d %s] handle_wu(): Already has canonical result\n", wu.id, wu.name ); ++log_messages; // Here if WU already has a canonical result. // Get unchecked results and see if they match the canonical result // retval = canonical_result.lookup_id(wu.canonical_resultid); if (retval == ERR_DB_NOT_FOUND) { log_messages.printf( SCHED_MSG_LOG::CRITICAL, "[WU#%d %s] Canonical result not in DB %d", wu.id, wu.name, retval ); canonical_result_missing = true; } else if (retval) { log_messages.printf( SCHED_MSG_LOG::CRITICAL, "[WU#%d %s] Can't read canonical result %d; exiting", wu.id, wu.name, retval ); exit(retval); } // scan this WU's results, and check the unchecked ones // TODO: do we have an index on these fields? // maybe better just to enum on workunitid // sprintf(buf, "where workunitid=%d and validate_state=%d and server_state=%d and outcome=%d", wu.id, VALIDATE_STATE_INIT, RESULT_SERVER_STATE_OVER, RESULT_OUTCOME_SUCCESS ); while (!result.enumerate(buf)) { need_immediate_transition = true; retval = check_pair( result, canonical_result, retry ); if (retval) { log_messages.printf( SCHED_MSG_LOG::DEBUG, "[RESULT#%d %s]: pair_check() failed for result: %d\n", result.id, result.name, retval ); exit(retval); } if (retry) need_delayed_transition = true; update_result = false; // if result had nonrecoverable error, make sure it gets updated // if (result.outcome != RESULT_OUTCOME_SUCCESS) { update_result = true; } switch (result.validate_state) { case VALIDATE_STATE_VALID: update_result = true; result.granted_credit = wu.canonical_credit; log_messages.printf( SCHED_MSG_LOG::NORMAL, "[RESULT#%d %s] pair_check() matched: setting result to valid; credit %f\n", result.id, result.name, result.granted_credit ); retval = grant_credit(result, result.granted_credit); if (retval) { log_messages.printf( SCHED_MSG_LOG::NORMAL, "[RESULT#%d %s] Can't grant credit: %d\n", result.id, result.name, retval ); } break; case VALIDATE_STATE_INVALID: update_result = true; log_messages.printf( SCHED_MSG_LOG::NORMAL, "[RESULT#%d %s] pair_check() didn't match: setting result to invalid\n", result.id, result.name ); } if (update_result) { sprintf( buf, "validate_state=%d, granted_credit=%f", result.validate_state, result.granted_credit ); log_messages.printf( SCHED_MSG_LOG::NORMAL, "[RESULT#%d %s] granted_credit %f", result.id, result.name, result.granted_credit ); retval = result.update_field(buf); if (retval) { log_messages.printf( SCHED_MSG_LOG::CRITICAL, "[RESULT#%d %s] Can't update result: %d\n", result.id, result.name, retval ); } } } } else { vector results; // Here if WU doesn't have a canonical result yet. // Try to get one log_messages.printf( SCHED_MSG_LOG::NORMAL, "[WU#%d %s] handle_wu(): No canonical result yet\n", wu.id, wu.name ); ++log_messages; // sprintf(buf, "where workunitid=%d", wu.id); // TODO: do we have an index on these fields? // maybe better to enum on workunitid // while (!result.enumerate(buf)) { // if (result.server_state == RESULT_SERVER_STATE_OVER // && result.outcome == RESULT_OUTCOME_SUCCESS // ) { sprintf(buf, "where workunitid=%d and validate_state=%d and server_state=%d and outcome=%d", wu.id, VALIDATE_STATE_INIT, RESULT_SERVER_STATE_OVER, RESULT_OUTCOME_SUCCESS ); while (!result.enumerate(buf)) { results.push_back(result); } log_messages.printf( SCHED_MSG_LOG::DEBUG, "[WU#%d %s] Found %d successful results\n", wu.id, wu.name, (int)results.size() ); if (results.size() >= (unsigned int)wu.min_quorum) { log_messages.printf( SCHED_MSG_LOG::DEBUG, "[WU#%d %s] Enough for quorum, checking set.\n", wu.id, wu.name ); retval = check_set(results, wu, canonicalid, credit, retry); if (retval) { log_messages.printf( SCHED_MSG_LOG::CRITICAL, "[WU#%d %s] check_set returned %d, exiting", wu.id, wu.name, retval ); exit(retval); } if (retry) need_delayed_transition = true; // See if any results had nonrecoverable errors // for (i=0; i wu.max_success_results) { wu.error_mask |= WU_ERROR_TOO_MANY_SUCCESS_RESULTS; need_immediate_transition = true; } } } } --log_messages; if (need_immediate_transition) { wu.transition_time = time(0); } else if (need_delayed_transition) { int x = time(0) + 6*3600; if (x < wu.transition_time) wu.transition_time = x; } // clear WU.need_validate // wu.need_validate = 0; sprintf(buf, "need_validate=%d, transition_time=%d, canonical_resultid=%d, canonical_credit=%f, assimilate_state=%d", wu.need_validate, wu.transition_time, wu.canonical_resultid, wu.canonical_credit, wu.assimilate_state ); retval = wu.update_field(buf); if (retval) { log_messages.printf( SCHED_MSG_LOG::CRITICAL, "[WU#%d %s] wu.update() failed: %d; exiting\n", wu.id, wu.name, retval ); exit(1); } } // make one pass through the workunits with need_validate set. // return true if there were any // bool do_validate_scan(APP& app) { DB_WORKUNIT wu; char buf[256]; bool found=false; sprintf(buf, "where appid=%d and need_validate > 0 limit 1000", app.id); while (!wu.enumerate(buf)) { handle_wu(wu); found = true; } return found; } int main_loop(bool one_pass) { int retval; DB_APP app; bool did_something; char buf[256]; retval = boinc_db.open(config.db_name, config.db_host, config.db_user, config.db_passwd); if (retval) { log_messages.printf(SCHED_MSG_LOG::CRITICAL, "boinc_db.open failed: %d\n", retval); exit(1); } sprintf(buf, "where name='%s'", app_name); retval = app.lookup(buf); if (retval) { log_messages.printf(SCHED_MSG_LOG::CRITICAL, "can't find app %s\n", app_name); exit(1); } while (1) { check_stop_daemons(); did_something = do_validate_scan(app); if (one_pass) break; if (!did_something) { sleep(5); } } return 0; } int main(int argc, char** argv) { int i, retval; bool asynch = false, one_pass = false; check_stop_daemons(); for (i=1; i