- client: 2nd try on my last checkin.

We need to estimate 2 different delays for each resource type:
    1) "saturated time": the time the resource will be fully utilized
        (new name for the old "estimated delay").
        This is used to compute work requests.
    2) "busy time": the time a new job would have to wait
        to start using this resource.
        This is passed to the scheduler and used for a crude deadline check.
        Note: this is ill-defined; a single number doesn't suffice.
        But as a very rough estimate, I'll use the sum of
            (J.duration * J.ninstances)/ninstances
        over all jobs that miss their deadline under RR sim.

svn path=/trunk/boinc/; revision=18629
This commit is contained in:
David Anderson 2009-07-17 18:29:10 +00:00
parent 34bdf155e8
commit 5753153909
5 changed files with 64 additions and 30 deletions

View File

@ -6364,3 +6364,22 @@ Rom 17 July 2009
/
configure.ac
Makefile.am
David 17 July 2009
- client: 2nd try on my last checkin.
We need to estimate 2 different delays for each resource type:
1) "saturated time": the time the resource will be fully utilized
(new name for the old "estimated delay").
This is used to compute work requests.
2) "busy time": the time a new job would have to wait
to start using this resource.
This is passed to the scheduler and used for a crude deadline check.
Note: this is ill-defined; a single number doesn't suffice.
But as a very rough estimate, I'll use the sum of
(J.duration * J.ninstances)/ninstances
over all jobs that miss their deadline under RR sim.
client/
work_fetch.cpp,h
rr_sim.cpp
cs_scheduler.cpp

View File

@ -231,7 +231,7 @@ int CLIENT_STATE::make_scheduler_request(PROJECT* p) {
if (coproc_cuda) {
coproc_cuda->req_secs = cuda_work_fetch.req_secs;
coproc_cuda->req_instances = cuda_work_fetch.req_instances;
coproc_cuda->estimated_delay = cuda_work_fetch.req_secs?cuda_work_fetch.estimated_delay:0;
coproc_cuda->estimated_delay = cuda_work_fetch.req_secs?cuda_work_fetch.busy_time:0;
}
if (coprocs.coprocs.size()) {

View File

@ -128,14 +128,15 @@ void RR_SIM_PROJECT_STATUS::remove_active(RESULT* rp) {
// with weighted round-robin scheduling
//
void set_rrsim_flops(RESULT* rp) {
// if it's a coproc job, use app version estimate
// For coproc jobs, use app version estimate
//
if (rp->uses_coprocs()) {
rp->rrsim_flops = rp->avp->flops * gstate.overall_cpu_frac();
return;
}
PROJECT* p = rp->project;
// first, estimate how many CPU seconds per second this job would get
// For CPU jobs, estimate how many CPU seconds per second this job would get
// running with other jobs of this project, ignoring other factors
//
double x = 1;
@ -298,10 +299,8 @@ void CLIENT_STATE::rr_simulation() {
// "rpbest" is first result to finish. Does it miss its deadline?
//
bool misses_deadline = false;
double diff = (sim_now + rpbest->rrsim_finish_delay) - rpbest->computation_deadline();
if (diff > 0) {
misses_deadline = true;
ACTIVE_TASK* atp = lookup_active_task_by_result(rpbest);
if (atp && atp->procinfo.working_set_size_smoothed > ar) {
if (log_flags.rr_simulation) {
@ -328,11 +327,23 @@ void CLIENT_STATE::rr_simulation() {
}
}
// update saturated time
//
double end_time = sim_now + rpbest->rrsim_finish_delay;
double x = end_time - gstate.now;
cpu_work_fetch.update_estimated_delay(x, misses_deadline);
cpu_work_fetch.update_saturated_time(x);
if (coproc_cuda) {
cuda_work_fetch.update_estimated_delay(x, misses_deadline);
cuda_work_fetch.update_saturated_time(x);
}
// update busy time
//
if (rpbest->rr_sim_misses_deadline) {
double dur = rpbest->estimated_time_remaining(false) / gstate.overall_cpu_frac();
cpu_work_fetch.update_busy_time(dur, rpbest->avp->avg_ncpus);
if (rpbest->uses_cuda()) {
cuda_work_fetch.update_busy_time(dur, rpbest->avp->ncudas);
}
}
// increment resource shortfalls

View File

@ -82,7 +82,8 @@ void RSC_WORK_FETCH::rr_init() {
total_fetchable_share = 0;
total_runnable_share = 0;
deadline_missed_instances = 0;
estimated_delay = 0;
saturated_time = 0;
busy_time = 0;
pending.clear();
}
@ -131,19 +132,15 @@ void RSC_WORK_FETCH::accumulate_shortfall(double d_time) {
#endif
}
// "estimated delay" is the interval for which we expect the
// resource to be saturated and have an EDF job.
//
void RSC_WORK_FETCH::update_estimated_delay(double dt, bool misses_deadline) {
void RSC_WORK_FETCH::update_saturated_time(double dt) {
double idle = ninstances - sim_nused;
if (misses_deadline && idle < 1e-6) {
estimated_delay = dt;
if (idle < 1e-6) {
saturated_time = dt;
}
#if 0
msg_printf(0, MSG_INFO, "est delay (%s): used %e instances %d dt %f est delay %f",
rsc_name(rsc_type), sim_nused, ninstances, dt, estimated_delay
);
#endif
}
void RSC_WORK_FETCH::update_busy_time(double dur, double nused) {
busy_time += (dur*nused)/ninstances;
}
// see if the project's debt is beyond what would normally happen;
@ -184,10 +181,10 @@ PROJECT* RSC_WORK_FETCH::choose_project(int criterion) {
if (nidle_now == 0) return NULL;
break;
case FETCH_IF_MAJOR_SHORTFALL:
if (estimated_delay > gstate.work_buf_min()) return NULL;
if (saturated_time > gstate.work_buf_min()) return NULL;
break;
case FETCH_IF_MINOR_SHORTFALL:
if (estimated_delay > gstate.work_buf_total()) return NULL;
if (saturated_time > gstate.work_buf_total()) return NULL;
break;
case FETCH_IF_PROJECT_STARVED:
if (deadline_missed_instances >= ninstances) return NULL;
@ -300,9 +297,9 @@ void WORK_FETCH::zero_debts() {
void RSC_WORK_FETCH::print_state(const char* name) {
msg_printf(0, MSG_INFO,
"[wfd] %s: shortfall %.2f nidle %.2f est. delay %.2f RS fetchable %.2f runnable %.2f",
"[wfd] %s: shortfall %.2f nidle %.2f saturated %.2f busy %.2f RS fetchable %.2f runnable %.2f",
name,
shortfall, nidle_now, estimated_delay,
shortfall, nidle_now, saturated_time, busy_time,
total_fetchable_share, total_runnable_share
);
for (unsigned int i=0; i<gstate.projects.size(); i++) {
@ -682,7 +679,7 @@ void WORK_FETCH::write_request(FILE* f) {
cpu_work_fetch.req_secs,
cpu_work_fetch.req_secs,
cpu_work_fetch.req_instances,
cpu_work_fetch.req_secs?cpu_work_fetch.estimated_delay:0
cpu_work_fetch.req_secs?cpu_work_fetch.busy_time:0
);
}
@ -726,11 +723,11 @@ void WORK_FETCH::handle_reply(PROJECT* p, vector<RESULT*> new_results) {
void WORK_FETCH::set_initial_work_request() {
cpu_work_fetch.req_secs = 1;
cpu_work_fetch.req_instances = 0;
cpu_work_fetch.estimated_delay = 0;
cpu_work_fetch.busy_time = 0;
if (coproc_cuda) {
cuda_work_fetch.req_secs = 1;
cuda_work_fetch.req_instances = 0;
cuda_work_fetch.estimated_delay = 0;
cuda_work_fetch.busy_time = 0;
}
}

View File

@ -101,9 +101,15 @@ struct RSC_WORK_FETCH {
// total RS of projects from which we could fetch jobs for this device
double total_runnable_share;
// total RS of projects with runnable jobs for this device
double estimated_delay;
// estimated time until resource is not saturated or has no EDF jobs.
// Passed to scheduler for crude deadline check
double saturated_time;
// estimated time until resource is not saturated
// used to calculate work request
double busy_time;
// estimated time until a new job would start;
// passed to scheduler for crude deadline check.
// This can't be estimated with any kind of precision.
// Instead we calculate it as the sum of instance-secs
// used be missed-deadline jobs, divided by # instances
double deadline_missed_instances;
// instance count for jobs that miss deadline
std::vector<RESULT*> pending;
@ -121,7 +127,8 @@ struct RSC_WORK_FETCH {
void rr_init();
void accumulate_shortfall(double d_time);
void update_estimated_delay(double dt, bool misses_deadline);
void update_saturated_time(double dt);
void update_busy_time(double dur, double nused);
PROJECT* choose_project(int);
void accumulate_debt();
RSC_PROJECT_WORK_FETCH& project_state(PROJECT*);