mirror of https://github.com/BOINC/boinc.git
- scheduler: in send_result_abort(), distinguish between
the enumeration ending versus it failing. This fixes a bug where lots of results would be incorrectly aborted if there's a database problem. This fix only matters if you use the <send_result_abort> config flag. (from Kevin Reed) - validator: add -credit_from_wu option. This gets credit from the workunit XML (which must have a <credit> flag). This lets you use credit-from-WU with the standard validators (sample_bitwise and sample_trivial) sched/ handle_request.C validator.C validate_util.h svn path=/trunk/boinc/; revision=12558
This commit is contained in:
parent
bcc72e46be
commit
4c76f95a34
|
@ -4383,3 +4383,21 @@ Rom 3 May 2007
|
||||||
Rom 3 May 2007 (HEAD)
|
Rom 3 May 2007 (HEAD)
|
||||||
- Tag for 5.9.7 release, all platforms
|
- Tag for 5.9.7 release, all platforms
|
||||||
boinc_core_release_5_9_7
|
boinc_core_release_5_9_7
|
||||||
|
|
||||||
|
David 4 May 2007
|
||||||
|
- scheduler: in send_result_abort(), distinguish between
|
||||||
|
the enumeration ending versus it failing.
|
||||||
|
This fixes a bug where lots of results would be
|
||||||
|
incorrectly aborted if there's a database problem.
|
||||||
|
This fix only matters if you use the <send_result_abort> config flag.
|
||||||
|
(from Kevin Reed)
|
||||||
|
- validator: add -credit_from_wu option.
|
||||||
|
This gets credit from the workunit XML
|
||||||
|
(which must have a <credit> flag).
|
||||||
|
This lets you use credit-from-WU with the standard validators
|
||||||
|
(sample_bitwise and sample_trivial)
|
||||||
|
|
||||||
|
sched/
|
||||||
|
handle_request.C
|
||||||
|
validator.C
|
||||||
|
validate_util.h
|
||||||
|
|
|
@ -486,16 +486,20 @@ static int update_host_record(HOST& initial_host, HOST& xhost, USER& user) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Figure out which of the results the user currently has
|
||||||
|
// should be aborted outright, or aborted if not started yet
|
||||||
|
//
|
||||||
int send_result_abort(
|
int send_result_abort(
|
||||||
SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply, SCHED_SHMEM& ss
|
SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply, SCHED_SHMEM& ss
|
||||||
) {
|
) {
|
||||||
int aborts_sent = 0;
|
int aborts_sent = 0;
|
||||||
|
int retval = 0;
|
||||||
DB_IN_PROGRESS_RESULT result;
|
DB_IN_PROGRESS_RESULT result;
|
||||||
std::string result_names;
|
std::string result_names;
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
|
|
||||||
if (sreq.other_results.size() == 0) {
|
if (sreq.other_results.size() == 0) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// initially mark all results for abort and build list of results to query
|
// initially mark all results for abort and build list of results to query
|
||||||
|
@ -504,7 +508,7 @@ int send_result_abort(
|
||||||
OTHER_RESULT& orp=sreq.other_results[i];
|
OTHER_RESULT& orp=sreq.other_results[i];
|
||||||
orp.abort = true;
|
orp.abort = true;
|
||||||
orp.abort_if_not_started = false;
|
orp.abort_if_not_started = false;
|
||||||
if ( i > 0 ) result_names.append(", ");
|
if (i > 0) result_names.append(", ");
|
||||||
result_names.append("'");
|
result_names.append("'");
|
||||||
result_names.append(orp.name);
|
result_names.append(orp.name);
|
||||||
result_names.append("'");
|
result_names.append("'");
|
||||||
|
@ -516,54 +520,60 @@ int send_result_abort(
|
||||||
|
|
||||||
// query the db for the results and set the appropriate flag
|
// query the db for the results and set the appropriate flag
|
||||||
//
|
//
|
||||||
while (!result.enumerate(reply.host.id, result_names.c_str())) {
|
while (!(retval = result.enumerate(reply.host.id, result_names.c_str()))) {
|
||||||
for (i=0; i<sreq.other_results.size(); i++) {
|
for (i=0; i<sreq.other_results.size(); i++) {
|
||||||
OTHER_RESULT& orp = sreq.other_results[i];
|
OTHER_RESULT& orp = sreq.other_results[i];
|
||||||
if (!strcmp(orp.name.c_str(), result.result_name)) {
|
if (!strcmp(orp.name.c_str(), result.result_name)) {
|
||||||
if ( result.error_mask&WU_ERROR_CANCELLED ) {
|
if ( result.error_mask&WU_ERROR_CANCELLED ) {
|
||||||
// do nothing, it should be aborted
|
// do nothing, it should be aborted
|
||||||
} else if ( result.assimilate_state == ASSIMILATE_DONE ) {
|
} else if ( result.assimilate_state == ASSIMILATE_DONE ) {
|
||||||
// only send abort if not started
|
// only send abort if not started
|
||||||
orp.abort = false;
|
orp.abort = false;
|
||||||
orp.abort_if_not_started = true;
|
orp.abort_if_not_started = true;
|
||||||
} else if ( result.server_state == RESULT_SERVER_STATE_OVER && result.outcome == RESULT_OUTCOME_NO_REPLY ) {
|
} else if ( result.server_state == RESULT_SERVER_STATE_OVER && result.outcome == RESULT_OUTCOME_NO_REPLY ) {
|
||||||
// the result is late so abort it if it hasn't been started
|
// the result is late so abort it if it hasn't been started
|
||||||
orp.abort=false;
|
orp.abort=false;
|
||||||
orp.abort_if_not_started = true;
|
orp.abort_if_not_started = true;
|
||||||
} else {
|
} else {
|
||||||
// all is good with the result - let it process
|
// all is good with the result - let it process
|
||||||
orp.abort=false;
|
orp.abort=false;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If enumeration returned an error, don't send any aborts
|
||||||
|
//
|
||||||
|
if (retval && (retval != ERR_DB_NOT_FOUND)) {
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
// loop through the results and send the appropriate message (if any)
|
// loop through the results and send the appropriate message (if any)
|
||||||
//
|
//
|
||||||
for (i=0; i<sreq.other_results.size(); i++) {
|
for (i=0; i<sreq.other_results.size(); i++) {
|
||||||
OTHER_RESULT& orp = sreq.other_results[i];
|
OTHER_RESULT& orp = sreq.other_results[i];
|
||||||
if (orp.abort) {
|
if (orp.abort) {
|
||||||
reply.result_aborts.push_back(orp.name);
|
reply.result_aborts.push_back(orp.name);
|
||||||
log_messages.printf(SCHED_MSG_LOG::MSG_NORMAL,
|
log_messages.printf(SCHED_MSG_LOG::MSG_NORMAL,
|
||||||
"[HOST#%d]: Send result_abort for result %s\n",
|
"[HOST#%d]: Send result_abort for result %s\n",
|
||||||
reply.host.id, orp.name.c_str()
|
reply.host.id, orp.name.c_str()
|
||||||
);
|
);
|
||||||
// send user message
|
// send user message
|
||||||
char buf[256];
|
char buf[256];
|
||||||
sprintf(buf, "Result %s is no longer usable\n", orp.name.c_str());
|
sprintf(buf, "Result %s is no longer usable\n", orp.name.c_str());
|
||||||
USER_MESSAGE um(buf, "high");
|
USER_MESSAGE um(buf, "high");
|
||||||
reply.insert_message(um);
|
reply.insert_message(um);
|
||||||
} else if (orp.abort_if_not_started) {
|
} else if (orp.abort_if_not_started) {
|
||||||
reply.result_abort_if_not_starteds.push_back(orp.name);
|
reply.result_abort_if_not_starteds.push_back(orp.name);
|
||||||
log_messages.printf(SCHED_MSG_LOG::MSG_NORMAL,
|
log_messages.printf(SCHED_MSG_LOG::MSG_NORMAL,
|
||||||
"[HOST#%d]: Send result_abort_if_unstarted for result %s\n",
|
"[HOST#%d]: Send result_abort_if_unstarted for result %s\n",
|
||||||
reply.host.id, orp.name.c_str()
|
reply.host.id, orp.name.c_str()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return aborts_sent;
|
return aborts_sent;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 1) Decide which global prefs to use for sched decisions: either
|
// 1) Decide which global prefs to use for sched decisions: either
|
||||||
|
|
|
@ -28,6 +28,7 @@
|
||||||
extern int get_output_file_path(RESULT const& result, std::string& path);
|
extern int get_output_file_path(RESULT const& result, std::string& path);
|
||||||
extern int get_output_file_paths(RESULT const& result, std::vector<std::string>&);
|
extern int get_output_file_paths(RESULT const& result, std::vector<std::string>&);
|
||||||
extern double median_mean_credit(WORKUNIT&, std::vector<RESULT>& results);
|
extern double median_mean_credit(WORKUNIT&, std::vector<RESULT>& results);
|
||||||
|
extern double get_credit_from_wu(WORKUNIT&, std::vector<RESULT>& results);
|
||||||
extern int update_credit_per_cpu_sec(
|
extern int update_credit_per_cpu_sec(
|
||||||
double credit, double cpu_time, double& credit_per_cpu_sec
|
double credit, double cpu_time, double& credit_per_cpu_sec
|
||||||
);
|
);
|
||||||
|
|
|
@ -20,13 +20,14 @@
|
||||||
// validator - check and validate results, and grant credit
|
// validator - check and validate results, and grant credit
|
||||||
// -app appname
|
// -app appname
|
||||||
// [-d debug_level]
|
// [-d debug_level]
|
||||||
// [-one_pass_N_WU N] // Validate only N WU in one pass, then exit
|
// [-one_pass_N_WU N] // Validate only N WU in one pass, then exit
|
||||||
// [-one_pass] // make one pass through WU table, then exit
|
// [-one_pass] // make one pass through WU table, then exit
|
||||||
// [-mod n i] // process only WUs with (id mod n) == i
|
// [-mod n i] // process only WUs with (id mod n) == i
|
||||||
// [-max_granted_credit X] // limit maximum granted credit to X
|
// [-max_granted_credit X] // limit maximum granted credit to X
|
||||||
// [-max_claimed_credit Y] // invalid if claims more than Y
|
// [-max_claimed_credit Y] // invalid if claims more than Y
|
||||||
// [-grant_claimed_credit] // just grant whatever is claimed
|
// [-grant_claimed_credit] // just grant whatever is claimed
|
||||||
// [-update_credited_job] // add userid/wuid pair to credited_job table
|
// [-update_credited_job] // add userid/wuid pair to credited_job table
|
||||||
|
// [-credit_from_wu] // get credit from WU XML
|
||||||
//
|
//
|
||||||
// This program must be linked with two project-specific functions:
|
// This program must be linked with two project-specific functions:
|
||||||
// check_set() and check_pair().
|
// check_set() and check_pair().
|
||||||
|
@ -80,6 +81,7 @@ double max_granted_credit = 0;
|
||||||
double max_claimed_credit = 0;
|
double max_claimed_credit = 0;
|
||||||
bool grant_claimed_credit = false;
|
bool grant_claimed_credit = false;
|
||||||
bool update_credited_job = false;
|
bool update_credited_job = false;
|
||||||
|
bool credit_from_wu = false;
|
||||||
|
|
||||||
void update_error_rate(DB_HOST& host, bool valid) {
|
void update_error_rate(DB_HOST& host, bool valid) {
|
||||||
if (host.error_rate > 1) host.error_rate = 1;
|
if (host.error_rate > 1) host.error_rate = 1;
|
||||||
|
@ -210,16 +212,16 @@ int is_valid(RESULT& result, WORKUNIT& wu) {
|
||||||
retval = credited_job.insert();
|
retval = credited_job.insert();
|
||||||
if (retval) {
|
if (retval) {
|
||||||
log_messages.printf(
|
log_messages.printf(
|
||||||
SCHED_MSG_LOG::MSG_NORMAL,
|
SCHED_MSG_LOG::MSG_CRITICAL,
|
||||||
"[RESULT#%d] Warning: credited_job insert failed (userid: %d workunit: %d err: %d)\n",
|
"[RESULT#%d] Warning: credited_job insert failed (userid: %d workunit: %d err: %d)\n",
|
||||||
result.id, user.id, long(wu.opaque), retval
|
result.id, user.id, long(wu.opaque), retval
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
log_messages.printf(
|
log_messages.printf(
|
||||||
SCHED_MSG_LOG::MSG_DEBUG,
|
SCHED_MSG_LOG::MSG_DEBUG,
|
||||||
"[RESULT#%d %s] Granted contribution to valid result [WU#%d OPAQUE#%d USER#%d]\n",
|
"[RESULT#%d %s] added credited_job record [WU#%d OPAQUE#%d USER#%d]\n",
|
||||||
result.id, result.name, wu.id, long(wu.opaque), user.id
|
result.id, result.name, wu.id, long(wu.opaque), user.id
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -431,6 +433,10 @@ int handle_wu(
|
||||||
}
|
}
|
||||||
if (retry) transition_time = DELAYED;
|
if (retry) transition_time = DELAYED;
|
||||||
|
|
||||||
|
if (credit_from_wu) {
|
||||||
|
credit = get_credit_from_wu(wu, results);
|
||||||
|
}
|
||||||
|
|
||||||
// scan results.
|
// scan results.
|
||||||
// update as needed, and count the # of results
|
// update as needed, and count the # of results
|
||||||
// that are still outcome=SUCCESS
|
// that are still outcome=SUCCESS
|
||||||
|
@ -653,15 +659,16 @@ int main(int argc, char** argv) {
|
||||||
"\nUsage: %s -app <app-name> [OPTIONS]\n"
|
"\nUsage: %s -app <app-name> [OPTIONS]\n"
|
||||||
"Start validator for application <app-name>\n\n"
|
"Start validator for application <app-name>\n\n"
|
||||||
"Optional arguments:\n"
|
"Optional arguments:\n"
|
||||||
" -one_pass_N_WU N Validate at most N WUs, then exit\n"
|
" -one_pass_N_WU N Validate at most N WUs, then exit\n"
|
||||||
" -one_pass Make one pass through WU table, then exit\n"
|
" -one_pass Make one pass through WU table, then exit\n"
|
||||||
" -mod n i Process only WUs with (id mod n) == i\n"
|
" -mod n i Process only WUs with (id mod n) == i\n"
|
||||||
" -max_claimed_credit X If a result claims more credit than this, mark it as invalid\n"
|
" -max_claimed_credit X If a result claims more credit than this, mark it as invalid\n"
|
||||||
" -max_granted_credit X Grant no more than this amount of credit to a result\n"
|
" -max_granted_credit X Grant no more than this amount of credit to a result\n"
|
||||||
" -grant_claimed_credit Grant the claimed credit, regardless of what other results for this workunit claimed\n"
|
" -grant_claimed_credit Grant the claimed credit, regardless of what other results for this workunit claimed\n"
|
||||||
" -update_credited_job Add userid/wuid pair to credited_job after granting credit\n"
|
" -update_credited_job Add record to credited_job table after granting credit\n"
|
||||||
" -sleep_interval n Set sleep-interval to n\n"
|
" -credit_from_wu Credit is specified in WU XML\n"
|
||||||
" -d level Set debug-level\n\n";
|
" -sleep_interval n Set sleep-interval to n\n"
|
||||||
|
" -d level Set debug-level\n\n";
|
||||||
|
|
||||||
if ( (argc > 1) && ( !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help") ) ) {
|
if ( (argc > 1) && ( !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help") ) ) {
|
||||||
printf (usage, argv[0] );
|
printf (usage, argv[0] );
|
||||||
|
@ -694,7 +701,9 @@ int main(int argc, char** argv) {
|
||||||
} else if (!strcmp(argv[i], "-grant_claimed_credit")) {
|
} else if (!strcmp(argv[i], "-grant_claimed_credit")) {
|
||||||
grant_claimed_credit = true;
|
grant_claimed_credit = true;
|
||||||
} else if (!strcmp(argv[i], "-update_credited_job")) {
|
} else if (!strcmp(argv[i], "-update_credited_job")) {
|
||||||
update_credited_job= true;
|
update_credited_job = true;
|
||||||
|
} else if (!strcmp(argv[i], "-credit_from_wu")) {
|
||||||
|
credit_from_wu = true;
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "Invalid option '%s'\nTry `%s --help` for more information\n", argv[i], argv[0]);
|
fprintf(stderr, "Invalid option '%s'\nTry `%s --help` for more information\n", argv[i], argv[0]);
|
||||||
log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "unrecognized arg: %s\n", argv[i]);
|
log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "unrecognized arg: %s\n", argv[i]);
|
||||||
|
|
Loading…
Reference in New Issue