- scheduler: fix bug that tried to open plan class spec file

on each request.
- client: when showing how much work a scheduler request returned,
    scale by availability (as is done to show the amount of the request)
- client in account manager request, <not_started_dur> and
    <in_progress_dur> are in wall time, not run time
    (i.e. scale them by availability)

Note: there's some confusion in the code between runtime and wall time,
    where in general wall time = runtime / availability.
    New convention: let's use "runtime" for the former,
    and "duration" for the latter.

svn path=/trunk/boinc/; revision=25597
This commit is contained in:
David Anderson 2012-04-25 04:10:29 +00:00
parent 6041cb40f9
commit 9d25481174
9 changed files with 59 additions and 23 deletions

View File

@ -3486,3 +3486,27 @@ David 24 Apr 2012
tools/
update_versions
David 25 Apr 2012
- scheduler: fix bug that tried to open plan class spec file
on each request.
- client: when showing how much work a scheduler request returned,
scale by availability (as is done to show the amount of the request)
- client in account manager request, <not_started_dur> and
<in_progress_dur> are in wall time, not run time
(i.e. scale them by availability)
Note: there's some confusion in the code between runtime and wall time,
where in general wall time = runtime / availability.
New convention: let's use "runtime" for the former,
and "duration" for the latter.
sched/
sched_customize.cpp
client/
client_types.cpp,h
time_stats.h
work_fetch.cpp
rr_sim.cpp
cs_scheduler.cpp
cpu_sched.cpp

View File

@ -623,7 +623,8 @@ void PROJECT::get_task_durs(double& not_started_dur, double& in_progress_dur) {
for (unsigned int i=0; i<gstate.results.size(); i++) {
RESULT* rp = gstate.results[i];
if (rp->project != this) continue;
double d = rp->estimated_time_remaining();
double d = rp->estimated_runtime_remaining();
d /= gstate.time_stats.availability_frac(rp->avp->gpu_usage.rsc_type);
if (rp->is_not_started()) {
not_started_dur += d;
} else {
@ -2053,7 +2054,7 @@ int RESULT::write_gui(MIOFILE& out) {
state(),
report_deadline,
received_time,
estimated_time_remaining()
estimated_runtime_remaining()
);
if (got_server_ack) out.printf(" <got_server_ack/>\n");
if (ready_to_report) out.printf(" <ready_to_report/>\n");
@ -2240,7 +2241,7 @@ void RESULT::append_log_record() {
FILE* f = fopen(filename, "ab");
if (!f) return;
fprintf(f, "%.0f ue %f ct %f fe %.0f nm %s et %f\n",
gstate.now, estimated_duration_uncorrected(), final_cpu_time,
gstate.now, estimated_runtime_uncorrected(), final_cpu_time,
wup->rsc_fpops_est, name, final_elapsed_time
);
fclose(f);

View File

@ -751,14 +751,14 @@ struct RESULT {
// stuff related to CPU scheduling
bool is_not_started();
double estimated_duration();
double estimated_duration_uncorrected();
double estimated_time_remaining();
double estimated_runtime();
double estimated_runtime_uncorrected();
double estimated_runtime_remaining();
inline double estimated_flops_remaining() {
#ifdef SIM
return sim_flops_left;
#else
return estimated_time_remaining()*avp->flops;
return estimated_runtime_remaining()*avp->flops;
#endif
}

View File

@ -500,7 +500,7 @@ static RESULT* earliest_deadline_result(int rsc_type) {
//
ACTIVE_TASK* atp = gstate.lookup_active_task_by_result(rp);
if (best_atp && !atp) continue;
if (rp->estimated_time_remaining() < best_result->estimated_time_remaining()
if (rp->estimated_runtime_remaining() < best_result->estimated_runtime_remaining()
|| (!best_atp && atp)
) {
best_result = rp;
@ -1992,8 +1992,8 @@ void CLIENT_STATE::set_ncpus() {
void PROJECT::update_duration_correction_factor(ACTIVE_TASK* atp) {
if (dont_use_dcf) return;
RESULT* rp = atp->result;
double raw_ratio = atp->elapsed_time/rp->estimated_duration_uncorrected();
double adj_ratio = atp->elapsed_time/rp->estimated_duration();
double raw_ratio = atp->elapsed_time/rp->estimated_runtime_uncorrected();
double adj_ratio = atp->elapsed_time/rp->estimated_runtime();
double old_dcf = duration_correction_factor;
// it's OK to overestimate completion time,

View File

@ -334,7 +334,7 @@ int CLIENT_STATE::make_scheduler_request(PROJECT* p) {
fprintf(f, "<in_progress_results>\n");
for (i=0; i<results.size(); i++) {
rp = results[i];
double x = rp->estimated_time_remaining();
double x = rp->estimated_runtime_remaining();
if (x == 0) continue;
strcpy(buf, "");
int rt = rp->avp->gpu_usage.rsc_type;
@ -836,9 +836,9 @@ int CLIENT_STATE::handle_scheduler_reply(
wup->clear_errors();
workunits.push_back(wup);
}
double est_rsc_duration[MAX_RSC];
double est_rsc_runtime[MAX_RSC];
for (int j=0; j<coprocs.n_rsc; j++) {
est_rsc_duration[j] = 0;
est_rsc_runtime[j] = 0;
}
for (i=0; i<sr.results.size(); i++) {
if (lookup_result(project, sr.results[i].name)) {
@ -882,11 +882,11 @@ int CLIENT_STATE::handle_scheduler_reply(
rp->set_state(RESULT_NEW, "handle_scheduler_reply");
int rt = rp->avp->gpu_usage.rsc_type;
if (rt > 0) {
est_rsc_duration[rt] += rp->estimated_duration();
est_rsc_runtime[rt] += rp->estimated_runtime();
gpus_usable = true;
// trigger a check of whether GPU is actually usable
} else {
est_rsc_duration[0] += rp->estimated_duration();
est_rsc_runtime[0] += rp->estimated_runtime();
}
}
rp->wup->version_num = rp->version_num;
@ -901,7 +901,8 @@ int CLIENT_STATE::handle_scheduler_reply(
for (int j=0; j<coprocs.n_rsc; j++) {
msg_printf(project, MSG_INFO,
"[sched_op] estimated total %s task duration: %.0f seconds",
rsc_name(j), est_rsc_duration[j]
rsc_name(j),
est_rsc_runtime[j]/time_stats.availability_frac(j)
);
}
}

View File

@ -389,7 +389,7 @@ void RR_SIM::simulate() {
// update busy time of relevant processor types
//
double frac = rpbest->uses_coprocs()?gstate.overall_gpu_frac():gstate.overall_cpu_frac();
double dur = rpbest->estimated_time_remaining() / frac;
double dur = rpbest->estimated_runtime_remaining() / frac;
rsc_work_fetch[0].update_busy_time(dur, rpbest->avp->avg_ncpus);
int rt = rpbest->avp->gpu_usage.rsc_type;
if (rt) {

View File

@ -49,6 +49,15 @@ public:
int write(MIOFILE&, bool to_server);
int parse(XML_PARSER&);
double availability_frac(int rsc_type) {
double x;
if (rsc_type == 0) {
x = on_frac*active_frac;
} else {
x = on_frac*gpu_active_frac;
}
return x>0?x:1;
}
void log_append(const char*, double);
void log_append_net(int);
void trim_stats_log();

View File

@ -1026,21 +1026,21 @@ bool RESULT::downloading() {
return true;
}
double RESULT::estimated_duration_uncorrected() {
double RESULT::estimated_runtime_uncorrected() {
return wup->rsc_fpops_est/avp->flops;
}
// estimate how long a result will take on this host
//
double RESULT::estimated_duration() {
double x = estimated_duration_uncorrected();
double RESULT::estimated_runtime() {
double x = estimated_runtime_uncorrected();
if (!project->dont_use_dcf) {
x *= project->duration_correction_factor;
}
return x;
}
double RESULT::estimated_time_remaining() {
double RESULT::estimated_runtime_remaining() {
if (computing_done()) return 0;
ACTIVE_TASK* atp = gstate.lookup_active_task_by_result(this);
if (atp) {
@ -1050,7 +1050,7 @@ double RESULT::estimated_time_remaining() {
return atp->est_dur() - atp->elapsed_time;
#endif
}
return estimated_duration();
return estimated_runtime();
}
// Returns the estimated total elapsed time of this task.
@ -1060,7 +1060,7 @@ double RESULT::estimated_time_remaining() {
//
double ACTIVE_TASK::est_dur() {
if (fraction_done >= 1) return elapsed_time;
double wu_est = result->estimated_duration();
double wu_est = result->estimated_runtime();
if (fraction_done <= 0) return wu_est;
if (wu_est < elapsed_time) wu_est = elapsed_time;
double frac_est = fraction_done_elapsed_time / fraction_done;

View File

@ -604,6 +604,7 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) {
"[version] Couldn't open plan class spec file '%s'\n", buf
);
}
have_plan_class_spec = false;
} else if (retval) {
log_messages.printf(MSG_CRITICAL,
"Error parsing plan class spec file '%s'\n", buf