// The contents of this file are subject to the BOINC Public License // Version 1.0 (the "License"); you may not use this file except in // compliance with the License. You may obtain a copy of the License at // http://boinc.berkeley.edu/license_1.0.txt // // Software distributed under the License is distributed on an "AS IS" // basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the // License for the specific language governing rights and limitations // under the License. // // The Original Code is the Berkeley Open Infrastructure for Network Computing. // // The Initial Developer of the Original Code is the SETI@home project. // Portions created by the SETI@home project are Copyright (C) 2002 // University of California at Berkeley. All Rights Reserved. // // Contributor(s): // // transitioner - handle transitions in the state of a WU // - a result has become DONE (via timeout or client reply) // - the WU error mask is set (e.g. by validater) // - assimilation is finished // // cmdline: // [ -asynch ] be asynchronous // [ -one_pass ] do one pass, then exit // [ -d x ] debug level x using namespace std; #include #include #include #include #include "boinc_db.h" #include "util.h" #include "backend_lib.h" #include "sched_config.h" #include "sched_util.h" #define LOCKFILE "transitioner.out" #define PIDFILE "transitioner.pid" int startup_time; SCHED_CONFIG config; R_RSA_PRIVATE_KEY key; void handle_wu(DB_WORKUNIT& wu) { vector results; DB_RESULT* p_canonical_result = NULL; int nerrors, retval, ninprogress, nsuccess; int nunsent, ncouldnt_send, nover; char suffix[256], result_template[MAX_BLOB_SIZE]; time_t now = time(0), x; bool all_over, have_result_to_validate, do_delete; { char buf[256]; // scan the results for the WU // DB_RESULT result; sprintf(buf, "where workunitid=%d", wu.id); while (!result.enumerate(buf)) { results.push_back(result); } } ScopeMessages scope_messages(log_messages, SchedMessages::NORMAL); // count up the number of results in various states, // and check for timed-out results // nunsent = 0; ninprogress = 0; nover = 0; nerrors = 0; nsuccess = 0; ncouldnt_send = 0; have_result_to_validate = false; for (unsigned int i=0; iOVER; outcome:NO_REPLY\n", wu.id, wu.name, result.id, result.name, result.report_deadline, (int)now ); result.server_state = RESULT_SERVER_STATE_OVER; result.outcome = RESULT_OUTCOME_NO_REPLY; retval = result.update(); if (retval) { log_messages.printf( SchedMessages::CRITICAL, "[WU#%d %s] [RESULT#%d %s] result.update() == %d\n", wu.id, wu.name, result.id, result.name, retval ); } nover++; } else { ninprogress++; } break; case RESULT_SERVER_STATE_OVER: nover++; switch (result.outcome) { case RESULT_OUTCOME_COULDNT_SEND: log_messages.printf( SchedMessages::NORMAL, "[WU#%d %s] [RESULT#%d %s] result couldn't be sent\n", wu.id, wu.name, result.id, result.name ); ncouldnt_send++; break; case RESULT_OUTCOME_SUCCESS: if (result.validate_state == VALIDATE_STATE_INIT) { have_result_to_validate = true; } nsuccess++; break; case RESULT_OUTCOME_CLIENT_ERROR: nerrors++; break; } break; } } log_messages.printf( SchedMessages::DEBUG, "[WU#%d %s] %d results: unsent %d, in_progress %d, over %d (success %d, error %d, couldnt_send %d)\n", wu.id, wu.name, (int)results.size(), nunsent, ninprogress, nover, nsuccess, nerrors, ncouldnt_send ); // trigger validation if we have a quorum // and some result hasn't been validated // if (nsuccess >= wu.min_quorum && have_result_to_validate) { wu.need_validate = true; log_messages.printf( SchedMessages::NORMAL, "[WU#%d %s] need_validate:=>true [nsuccess=%d >= min_quorum=%d]\n", wu.id, wu.name, nsuccess, wu.min_quorum ); } // check for WU error conditions // NOTE: check on max # of success results is done in validater // if (ncouldnt_send > 0) { wu.error_mask |= WU_ERROR_COULDNT_SEND_RESULT; } if (nerrors > wu.max_error_results) { log_messages.printf( SchedMessages::NORMAL, "[WU#%d %s] WU has too many errors (%d errors for %d results)\n", wu.id, wu.name, nerrors, (int)results.size() ); wu.error_mask |= WU_ERROR_TOO_MANY_ERROR_RESULTS; } if ((int)results.size() > wu.max_total_results) { log_messages.printf( SchedMessages::NORMAL, "[WU#%d %s] WU has too many total results (%d)\n", wu.id, wu.name, (int)results.size() ); wu.error_mask |= WU_ERROR_TOO_MANY_TOTAL_RESULTS; } // if this WU had an error, don't send any unsent results, // and trigger assimilation if needed // if (wu.error_mask) { for (unsigned int i=0; iOVER; outcome:=>DIDNT_NEED\n", wu.id, wu.name, result.id, result.name ); result.server_state = RESULT_SERVER_STATE_OVER; result.outcome = RESULT_OUTCOME_DIDNT_NEED; retval = result.update(); if (retval) { log_messages.printf( SchedMessages::CRITICAL, "[WU#%d %s] [RESULT#%d %s] result.update() == %d\n", wu.id, wu.name, result.id, result.name, retval ); } } } if (wu.assimilate_state == ASSIMILATE_INIT) { wu.assimilate_state = ASSIMILATE_READY; log_messages.printf( SchedMessages::NORMAL, "[WU#%d %s] error_mask:%d assimilate_state:INIT=>READY\n", wu.id, wu.name, wu.error_mask ); } } else if (wu.assimilate_state == ASSIMILATE_INIT) { // If no error, generate new results if needed. // NOTE!! `n' must be a SIGNED integer! int n = wu.target_nresults - nunsent - ninprogress - nsuccess; if (n > 0) { log_messages.printf( SchedMessages::NORMAL, "[WU#%d %s] Generating %d more results (%d target - %d unsent - %d in progress - %d success)\n", wu.id, wu.name, n, wu.target_nresults, nunsent, ninprogress, nsuccess ); for (int i=0; iREADY\n", wu.id, wu.name ); } // output of error results can be deleted immediately; // output of success results can be deleted if validated // for (unsigned int i=0; iREADY\n", wu.id, wu.name, result.id, result.name ); result.file_delete_state = FILE_DELETE_READY; retval = result.update(); if (retval) { log_messages.printf( SchedMessages::CRITICAL, "[WU#%d %s] [RESULT#%d %s] result.update() == %d\n", wu.id, wu.name, result.id, result.name, retval ); } } } } wu.transition_time = INT_MAX; for (unsigned int i=0; i