diff --git a/checkin_notes b/checkin_notes index 51840db92d..c94b44e381 100644 --- a/checkin_notes +++ b/checkin_notes @@ -3486,3 +3486,27 @@ David 24 Apr 2012 tools/ update_versions + +David 25 Apr 2012 + - scheduler: fix bug that tried to open plan class spec file + on each request. + - client: when showing how much work a scheduler request returned, + scale by availability (as is done to show the amount of the request) + - client in account manager request, and + are in wall time, not run time + (i.e. scale them by availability) + + Note: there's some confusion in the code between runtime and wall time, + where in general wall time = runtime / availability. + New convention: let's use "runtime" for the former, + and "duration" for the latter. + + sched/ + sched_customize.cpp + client/ + client_types.cpp,h + time_stats.h + work_fetch.cpp + rr_sim.cpp + cs_scheduler.cpp + cpu_sched.cpp diff --git a/client/client_types.cpp b/client/client_types.cpp index 1a62df03bf..298fe1c9a6 100644 --- a/client/client_types.cpp +++ b/client/client_types.cpp @@ -623,7 +623,8 @@ void PROJECT::get_task_durs(double& not_started_dur, double& in_progress_dur) { for (unsigned int i=0; iproject != this) continue; - double d = rp->estimated_time_remaining(); + double d = rp->estimated_runtime_remaining(); + d /= gstate.time_stats.availability_frac(rp->avp->gpu_usage.rsc_type); if (rp->is_not_started()) { not_started_dur += d; } else { @@ -2053,7 +2054,7 @@ int RESULT::write_gui(MIOFILE& out) { state(), report_deadline, received_time, - estimated_time_remaining() + estimated_runtime_remaining() ); if (got_server_ack) out.printf(" \n"); if (ready_to_report) out.printf(" \n"); @@ -2240,7 +2241,7 @@ void RESULT::append_log_record() { FILE* f = fopen(filename, "ab"); if (!f) return; fprintf(f, "%.0f ue %f ct %f fe %.0f nm %s et %f\n", - gstate.now, estimated_duration_uncorrected(), final_cpu_time, + gstate.now, estimated_runtime_uncorrected(), final_cpu_time, wup->rsc_fpops_est, name, final_elapsed_time ); fclose(f); diff --git a/client/client_types.h b/client/client_types.h index 3b064a3f35..d465bfed27 100644 --- a/client/client_types.h +++ b/client/client_types.h @@ -751,14 +751,14 @@ struct RESULT { // stuff related to CPU scheduling bool is_not_started(); - double estimated_duration(); - double estimated_duration_uncorrected(); - double estimated_time_remaining(); + double estimated_runtime(); + double estimated_runtime_uncorrected(); + double estimated_runtime_remaining(); inline double estimated_flops_remaining() { #ifdef SIM return sim_flops_left; #else - return estimated_time_remaining()*avp->flops; + return estimated_runtime_remaining()*avp->flops; #endif } diff --git a/client/cpu_sched.cpp b/client/cpu_sched.cpp index f3c06f3e11..801c2c8b63 100644 --- a/client/cpu_sched.cpp +++ b/client/cpu_sched.cpp @@ -500,7 +500,7 @@ static RESULT* earliest_deadline_result(int rsc_type) { // ACTIVE_TASK* atp = gstate.lookup_active_task_by_result(rp); if (best_atp && !atp) continue; - if (rp->estimated_time_remaining() < best_result->estimated_time_remaining() + if (rp->estimated_runtime_remaining() < best_result->estimated_runtime_remaining() || (!best_atp && atp) ) { best_result = rp; @@ -1992,8 +1992,8 @@ void CLIENT_STATE::set_ncpus() { void PROJECT::update_duration_correction_factor(ACTIVE_TASK* atp) { if (dont_use_dcf) return; RESULT* rp = atp->result; - double raw_ratio = atp->elapsed_time/rp->estimated_duration_uncorrected(); - double adj_ratio = atp->elapsed_time/rp->estimated_duration(); + double raw_ratio = atp->elapsed_time/rp->estimated_runtime_uncorrected(); + double adj_ratio = atp->elapsed_time/rp->estimated_runtime(); double old_dcf = duration_correction_factor; // it's OK to overestimate completion time, diff --git a/client/cs_scheduler.cpp b/client/cs_scheduler.cpp index 6e9b3b421f..7147ef21dd 100644 --- a/client/cs_scheduler.cpp +++ b/client/cs_scheduler.cpp @@ -334,7 +334,7 @@ int CLIENT_STATE::make_scheduler_request(PROJECT* p) { fprintf(f, "\n"); for (i=0; iestimated_time_remaining(); + double x = rp->estimated_runtime_remaining(); if (x == 0) continue; strcpy(buf, ""); int rt = rp->avp->gpu_usage.rsc_type; @@ -836,9 +836,9 @@ int CLIENT_STATE::handle_scheduler_reply( wup->clear_errors(); workunits.push_back(wup); } - double est_rsc_duration[MAX_RSC]; + double est_rsc_runtime[MAX_RSC]; for (int j=0; jset_state(RESULT_NEW, "handle_scheduler_reply"); int rt = rp->avp->gpu_usage.rsc_type; if (rt > 0) { - est_rsc_duration[rt] += rp->estimated_duration(); + est_rsc_runtime[rt] += rp->estimated_runtime(); gpus_usable = true; // trigger a check of whether GPU is actually usable } else { - est_rsc_duration[0] += rp->estimated_duration(); + est_rsc_runtime[0] += rp->estimated_runtime(); } } rp->wup->version_num = rp->version_num; @@ -901,7 +901,8 @@ int CLIENT_STATE::handle_scheduler_reply( for (int j=0; juses_coprocs()?gstate.overall_gpu_frac():gstate.overall_cpu_frac(); - double dur = rpbest->estimated_time_remaining() / frac; + double dur = rpbest->estimated_runtime_remaining() / frac; rsc_work_fetch[0].update_busy_time(dur, rpbest->avp->avg_ncpus); int rt = rpbest->avp->gpu_usage.rsc_type; if (rt) { diff --git a/client/time_stats.h b/client/time_stats.h index 74f80271d2..1d86127c5e 100644 --- a/client/time_stats.h +++ b/client/time_stats.h @@ -49,6 +49,15 @@ public: int write(MIOFILE&, bool to_server); int parse(XML_PARSER&); + double availability_frac(int rsc_type) { + double x; + if (rsc_type == 0) { + x = on_frac*active_frac; + } else { + x = on_frac*gpu_active_frac; + } + return x>0?x:1; + } void log_append(const char*, double); void log_append_net(int); void trim_stats_log(); diff --git a/client/work_fetch.cpp b/client/work_fetch.cpp index a97f71b370..5c1d42cda7 100644 --- a/client/work_fetch.cpp +++ b/client/work_fetch.cpp @@ -1026,21 +1026,21 @@ bool RESULT::downloading() { return true; } -double RESULT::estimated_duration_uncorrected() { +double RESULT::estimated_runtime_uncorrected() { return wup->rsc_fpops_est/avp->flops; } // estimate how long a result will take on this host // -double RESULT::estimated_duration() { - double x = estimated_duration_uncorrected(); +double RESULT::estimated_runtime() { + double x = estimated_runtime_uncorrected(); if (!project->dont_use_dcf) { x *= project->duration_correction_factor; } return x; } -double RESULT::estimated_time_remaining() { +double RESULT::estimated_runtime_remaining() { if (computing_done()) return 0; ACTIVE_TASK* atp = gstate.lookup_active_task_by_result(this); if (atp) { @@ -1050,7 +1050,7 @@ double RESULT::estimated_time_remaining() { return atp->est_dur() - atp->elapsed_time; #endif } - return estimated_duration(); + return estimated_runtime(); } // Returns the estimated total elapsed time of this task. @@ -1060,7 +1060,7 @@ double RESULT::estimated_time_remaining() { // double ACTIVE_TASK::est_dur() { if (fraction_done >= 1) return elapsed_time; - double wu_est = result->estimated_duration(); + double wu_est = result->estimated_runtime(); if (fraction_done <= 0) return wu_est; if (wu_est < elapsed_time) wu_est = elapsed_time; double frac_est = fraction_done_elapsed_time / fraction_done; diff --git a/sched/sched_customize.cpp b/sched/sched_customize.cpp index e90037fdd6..b1c6edd329 100644 --- a/sched/sched_customize.cpp +++ b/sched/sched_customize.cpp @@ -604,6 +604,7 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) { "[version] Couldn't open plan class spec file '%s'\n", buf ); } + have_plan_class_spec = false; } else if (retval) { log_messages.printf(MSG_CRITICAL, "Error parsing plan class spec file '%s'\n", buf