- validator: if job FLOPs estimates are accurate,

PFC values should be around 1.
    If they differ from 1 by a factor of > 1e4, ignore them,
    and put an error message into the validator log
- validator: if get_pfc() fails because an app version is
    missing from the DB (i.e. the project deleted it)
    keep going so we don't reprocess the WU forever


svn path=/trunk/boinc/; revision=23837
This commit is contained in:
David Anderson 2011-07-12 20:44:28 +00:00
parent e521a112a9
commit f44c9910e7
2 changed files with 37 additions and 8 deletions

View File

@ -4066,3 +4066,15 @@ David 12 July 2011
app_details.php app_details.php
inc/ inc/
boinc_db.inc boinc_db.inc
David 12 July 2011
- validator: if job FLOPs estimates are accurate,
PFC values should be around 1.
If they differ from 1 by a factor of > 1e4, ignore them,
and put an error message into the validator log
- validator: if get_pfc() fails because an app version is
missing from the DB (i.e. the project deleted it)
keep going so we don't reprocess the WU forever
sched/
credit.cpp

View File

@ -337,6 +337,17 @@ inline double wu_estimated_credit(WORKUNIT& wu, DB_APP& app) {
return wu_estimated_pfc(wu, app)*COBBLESTONE_SCALE; return wu_estimated_pfc(wu, app)*COBBLESTONE_SCALE;
} }
inline bool is_pfc_sane(double x, WORKUNIT& wu, DB_APP& app) {
if (x > 1e4 || x < 1e-4) {
log_messages.printf(MSG_CRITICAL,
"Bad FLOP ratio (%f): check workunit.rsc_fpops_est for %s (app %s)\n",
x, wu.name, app.name
);
return false;
}
return true;
}
// Compute or estimate "claimed peak FLOP count". // Compute or estimate "claimed peak FLOP count".
// Possibly update host_app_version records and write to DB. // Possibly update host_app_version records and write to DB.
// Possibly update app_version records in memory and let caller write to DB, // Possibly update app_version records in memory and let caller write to DB,
@ -638,7 +649,10 @@ int get_pfc(
raw_pfc, wu.rsc_fpops_est raw_pfc, wu.rsc_fpops_est
); );
} }
avp->pfc_samples.push_back(raw_pfc/wu.rsc_fpops_est); double x = raw_pfc / wu.rsc_fpops_est;
if (is_pfc_sane(x, wu, app)) {
avp->pfc_samples.push_back(x);
}
} }
if (config.debug_credit) { if (config.debug_credit) {
@ -649,11 +663,11 @@ int get_pfc(
(r.received_time - r.sent_time) (r.received_time - r.sent_time)
); );
} }
hav.pfc.update( double x = raw_pfc / wu.rsc_fpops_est;
raw_pfc / wu.rsc_fpops_est, if (is_pfc_sane(x, wu, app)) {
HAV_AVG_THRESH, HAV_AVG_WEIGHT, HAV_AVG_LIMIT hav.pfc.update(x, HAV_AVG_THRESH, HAV_AVG_WEIGHT, HAV_AVG_LIMIT);
); }
hav.et.update_var( hav.et.update_var(
r.elapsed_time / wu.rsc_fpops_est, r.elapsed_time / wu.rsc_fpops_est,
HAV_AVG_THRESH, HAV_AVG_WEIGHT, HAV_AVG_LIMIT HAV_AVG_THRESH, HAV_AVG_WEIGHT, HAV_AVG_LIMIT
@ -727,7 +741,7 @@ int assign_credit_set(
log_messages.printf(MSG_CRITICAL, log_messages.printf(MSG_CRITICAL,
"get_pfc() error: %s\n", boincerror(retval) "get_pfc() error: %s\n", boincerror(retval)
); );
return retval; continue;
} else { } else {
if (config.debug_credit) { if (config.debug_credit) {
log_messages.printf(MSG_NORMAL, log_messages.printf(MSG_NORMAL,
@ -766,9 +780,12 @@ int assign_credit_set(
double x; double x;
if (normal.size()) { if (normal.size()) {
x = low_average(normal); x = low_average(normal);
} else { } else if (approx.size()) {
x = vec_min(approx); x = vec_min(approx);
} else {
x = 0;
} }
x *= COBBLESTONE_SCALE; x *= COBBLESTONE_SCALE;
if (config.debug_credit) { if (config.debug_credit) {
log_messages.printf(MSG_NORMAL, log_messages.printf(MSG_NORMAL,