- sched: 2 bug fixes in credit.cpp

- It was possible if all results for a workunit were PFC_MODE_INVALID
          that NaN pfc would be used causing database update errors.  Solved
          by using wu_estimated_pfc() as pfc in that case.
        - Sanity check was comparing raw_pfc directly to rsc_fpops_bound.  That
          was causing problems GPUs with high performance estimates.  Fixed by
          including the app_version scale factor in the check.  I thought I had
          already committed this...
        - Removed a few lines of commented out experimental code accidentally
          comitted earlier.
        - Committed to git repository on 8/24


svn path=/trunk/boinc/; revision=26144
This commit is contained in:
Eric J. Korpela 2012-10-02 15:20:13 +00:00
parent be540c8318
commit 33962b77e1
2 changed files with 29 additions and 8 deletions

View File

@ -6064,6 +6064,21 @@ David 23 Sept 2012
lib/
prefs.cpp
Eric K 24 Sept 2012 (Date committed to git repository)
- sched: 2 bug fixes in credit.cpp
- It was possible if all results for a workunit were PFC_MODE_INVALID
that NaN pfc would be used causing database update errors. Solved
by using wu_estimated_pfc() as pfc in that case.
- Sanity check was comparing raw_pfc directly to rsc_fpops_bound. That
was causing problems GPUs with high performance estimates. Fixed by
including the app_version scale factor in the check. I thought I had
already committed this...
- Removed a few lines of commented out experimental code accidentally
comitted earlier.
sched/
credit.cpp
David 30 Sept 2012
- API: if get a resume message from client while in a critical section,
clear the suspend_request flag.

View File

@ -518,16 +518,20 @@ int get_pfc(
r.flops_estimate/1e9
);
}
// get app version
avp = av_lookup(r.app_version_id, app_versions);
// Sanity check
//
if (raw_pfc > wu.rsc_fpops_bound) {
// If an app version scale exists, use it. Otherwise assume 1.
double tmp_scale = (avp && (r.app_version_id>1) && avp->pfc_scale) ? (avp->pfc_scale) : 1.0;
if (raw_pfc*tmp_scale > wu.rsc_fpops_bound) {
char query[256], clause[256];
pfc = wu_estimated_pfc(wu, app);
if (config.debug_credit) {
log_messages.printf(MSG_NORMAL,
"[credit] [RESULT#%d] sanity check failed: %.2f>%.2f, return %.2f\n",
r.id, raw_pfc*COBBLESTONE_SCALE,
r.id, raw_pfc*tmp_scale*COBBLESTONE_SCALE,
wu.rsc_fpops_bound*COBBLESTONE_SCALE, pfc*COBBLESTONE_SCALE
);
}
@ -727,10 +731,6 @@ int get_pfc(
(r.received_time - r.sent_time),
HAV_AVG_THRESH, HAV_AVG_WEIGHT, HAV_AVG_LIMIT
);
// if ((r.elapsed_time > 0) && (r.cpu_time > 0)) {
// hav.rt.update(r.elapsed_time,HAV_AVG_THRESH,HAV_AVG_WEIGHT,HAV_AVG_LIMIT);
// hav.cpu.update(r.cpu_time,HAV_AVG_THRESH,HAV_AVG_WEIGHT,HAV_AVG_LIMIT);
// }
}
// keep track of credit per app version
@ -872,7 +872,13 @@ int assign_credit_set(
approx.push_back(normal[0]);
// fall through
case 0:
x = pegged_average(approx,wu_estimated_pfc(wu, app));
if (approx.size()) {
x = pegged_average(approx,wu_estimated_pfc(wu, app));
} else {
// there were only PFC_MODE_INVALID results, so
// we guess
x = wu_estimated_pfc(wu, app);
}
break;
default:
x = pegged_average(normal,wu_estimated_pfc(wu, app));