- client: 2nd try on my last checkin.

We need to estimate 2 different delays for each resource type: 1) "saturated time": the time the resource will be fully utilized (new name for the old "estimated delay"). This is used to compute work requests. 2) "busy time": the time a new job would have to wait to start using this resource. This is passed to the scheduler and used for a crude deadline check. Note: this is ill-defined; a single number doesn't suffice. But as a very rough estimate, I'll use the sum of (J.duration * J.ninstances)/ninstances over all jobs that miss their deadline under RR sim. svn path=/trunk/boinc/; revision=18629
2009-07-17 18:29:10 +00:00 · 2009-07-17 18:29:10 +00:00 · 5753153909
parent 34bdf155e8
commit 5753153909
5 changed files with 64 additions and 30 deletions
--- a/19
+++ b/19
@ -6364,3 +6364,22 @@ Rom    17 July 2009
    /
        configure.ac
        Makefile.am
+
+David  17 July 2009
+    - client: 2nd try on my last checkin.
+        We need to estimate 2 different delays for each resource type:
+        1) "saturated time": the time the resource will be fully utilized
+            (new name for the old "estimated delay").
+            This is used to compute work requests.
+        2) "busy time": the time a new job would have to wait
+            to start using this resource.
+            This is passed to the scheduler and used for a crude deadline check.
+            Note: this is ill-defined; a single number doesn't suffice.
+            But as a very rough estimate, I'll use the sum of
+                (J.duration * J.ninstances)/ninstances
+            over all jobs that miss their deadline under RR sim.
+
+    client/
+        work_fetch.cpp,h
+        rr_sim.cpp
+        cs_scheduler.cpp
--- a/client/cs_scheduler.cpp
+++ b/client/cs_scheduler.cpp
@ -231,7 +231,7 @@ int CLIENT_STATE::make_scheduler_request(PROJECT* p) {
    if (coproc_cuda) {
        coproc_cuda->req_secs = cuda_work_fetch.req_secs;
        coproc_cuda->req_instances = cuda_work_fetch.req_instances;
-        coproc_cuda->estimated_delay = cuda_work_fetch.req_secs?cuda_work_fetch.estimated_delay:0;
+        coproc_cuda->estimated_delay = cuda_work_fetch.req_secs?cuda_work_fetch.busy_time:0;
    }

    if (coprocs.coprocs.size()) {
--- a/client/rr_sim.cpp
+++ b/client/rr_sim.cpp
@ -128,14 +128,15 @@ void RR_SIM_PROJECT_STATUS::remove_active(RESULT* rp) {
 // with weighted round-robin scheduling
 //
 void set_rrsim_flops(RESULT* rp) {
-    // if it's a coproc job, use app version estimate
+    // For coproc jobs, use app version estimate
+    //
    if (rp->uses_coprocs()) {
        rp->rrsim_flops = rp->avp->flops * gstate.overall_cpu_frac();
        return;
    }
    PROJECT* p = rp->project;

-    // first, estimate how many CPU seconds per second this job would get
+    // For CPU jobs, estimate how many CPU seconds per second this job would get
    // running with other jobs of this project, ignoring other factors
    //
    double x = 1;
@ -298,10 +299,8 @@ void CLIENT_STATE::rr_simulation() {

        // "rpbest" is first result to finish.  Does it miss its deadline?
        //
-        bool misses_deadline = false;
        double diff = (sim_now + rpbest->rrsim_finish_delay) - rpbest->computation_deadline();
        if (diff > 0) {
-            misses_deadline = true;
            ACTIVE_TASK* atp = lookup_active_task_by_result(rpbest);
            if (atp && atp->procinfo.working_set_size_smoothed > ar) {
                if (log_flags.rr_simulation) {
@ -328,11 +327,23 @@ void CLIENT_STATE::rr_simulation() {
            }
        }

+        // update saturated time
+        //
        double end_time = sim_now + rpbest->rrsim_finish_delay;
        double x = end_time - gstate.now;
-        cpu_work_fetch.update_estimated_delay(x, misses_deadline);
+        cpu_work_fetch.update_saturated_time(x);
        if (coproc_cuda) {
-            cuda_work_fetch.update_estimated_delay(x, misses_deadline);
+            cuda_work_fetch.update_saturated_time(x);
+        }
+
+        // update busy time
+        //
+        if (rpbest->rr_sim_misses_deadline) {
+            double dur = rpbest->estimated_time_remaining(false) / gstate.overall_cpu_frac();
+            cpu_work_fetch.update_busy_time(dur, rpbest->avp->avg_ncpus);
+            if (rpbest->uses_cuda()) {
+                cuda_work_fetch.update_busy_time(dur, rpbest->avp->ncudas);
+            }
        }

        // increment resource shortfalls
--- a/client/work_fetch.cpp
+++ b/client/work_fetch.cpp
@ -82,7 +82,8 @@ void RSC_WORK_FETCH::rr_init() {
    total_fetchable_share = 0;
    total_runnable_share = 0;
    deadline_missed_instances = 0;
-    estimated_delay = 0;
+    saturated_time = 0;
+    busy_time = 0;
    pending.clear();
 }

@ -131,19 +132,15 @@ void RSC_WORK_FETCH::accumulate_shortfall(double d_time) {
 #endif
 }

-// "estimated delay" is the interval for which we expect the
-// resource to be saturated and have an EDF job.
-//
-void RSC_WORK_FETCH::update_estimated_delay(double dt, bool misses_deadline) {
+void RSC_WORK_FETCH::update_saturated_time(double dt) {
    double idle = ninstances - sim_nused;
-    if (misses_deadline && idle < 1e-6) {
-        estimated_delay = dt;
+    if (idle < 1e-6) {
+        saturated_time = dt;
    }
-#if 0
-    msg_printf(0, MSG_INFO, "est delay (%s): used %e instances %d dt %f est delay %f",
-        rsc_name(rsc_type), sim_nused, ninstances, dt, estimated_delay
-    );
-#endif
+}
+
+void RSC_WORK_FETCH::update_busy_time(double dur, double nused) {
+    busy_time += (dur*nused)/ninstances;
 }

 // see if the project's debt is beyond what would normally happen;
@ -184,10 +181,10 @@ PROJECT* RSC_WORK_FETCH::choose_project(int criterion) {
        if (nidle_now == 0) return NULL;
        break;
    case FETCH_IF_MAJOR_SHORTFALL:
-        if (estimated_delay > gstate.work_buf_min()) return NULL;
+        if (saturated_time > gstate.work_buf_min()) return NULL;
        break;
    case FETCH_IF_MINOR_SHORTFALL:
-        if (estimated_delay > gstate.work_buf_total()) return NULL;
+        if (saturated_time > gstate.work_buf_total()) return NULL;
        break;
    case FETCH_IF_PROJECT_STARVED:
        if (deadline_missed_instances >= ninstances) return NULL;
@ -300,9 +297,9 @@ void WORK_FETCH::zero_debts() {

 void RSC_WORK_FETCH::print_state(const char* name) {
    msg_printf(0, MSG_INFO,
-        "[wfd] %s: shortfall %.2f nidle %.2f est. delay %.2f RS fetchable %.2f runnable %.2f",
+        "[wfd] %s: shortfall %.2f nidle %.2f saturated %.2f busy %.2f RS fetchable %.2f runnable %.2f",
        name,
-        shortfall, nidle_now, estimated_delay,
+        shortfall, nidle_now, saturated_time, busy_time,
        total_fetchable_share, total_runnable_share
    );
    for (unsigned int i=0; i<gstate.projects.size(); i++) {
@ -682,7 +679,7 @@ void WORK_FETCH::write_request(FILE* f) {
        cpu_work_fetch.req_secs,
        cpu_work_fetch.req_secs,
        cpu_work_fetch.req_instances,
-        cpu_work_fetch.req_secs?cpu_work_fetch.estimated_delay:0
+        cpu_work_fetch.req_secs?cpu_work_fetch.busy_time:0
    );
 }

@ -726,11 +723,11 @@ void WORK_FETCH::handle_reply(PROJECT* p, vector<RESULT*> new_results) {
 void WORK_FETCH::set_initial_work_request() {
    cpu_work_fetch.req_secs = 1;
    cpu_work_fetch.req_instances = 0;
-    cpu_work_fetch.estimated_delay = 0;
+    cpu_work_fetch.busy_time = 0;
    if (coproc_cuda) {
        cuda_work_fetch.req_secs = 1;
        cuda_work_fetch.req_instances = 0;
-        cuda_work_fetch.estimated_delay = 0;
+        cuda_work_fetch.busy_time = 0;
    }
 }

--- a/client/work_fetch.h
+++ b/client/work_fetch.h
@ -101,9 +101,15 @@ struct RSC_WORK_FETCH {
        // total RS of projects from which we could fetch jobs for this device
    double total_runnable_share;
        // total RS of projects with runnable jobs for this device
-    double estimated_delay;
-        // estimated time until resource is not saturated or has no EDF jobs.
-        // Passed to scheduler for crude deadline check
+    double saturated_time;
+        // estimated time until resource is not saturated
+        // used to calculate work request
+    double busy_time;
+        // estimated time until a new job would start;
+        // passed to scheduler for crude deadline check.
+        // This can't be estimated with any kind of precision.
+        // Instead we calculate it as the sum of instance-secs
+        // used be missed-deadline jobs, divided by # instances
    double deadline_missed_instances;
        // instance count for jobs that miss deadline
    std::vector<RESULT*> pending;
@ -121,7 +127,8 @@ struct RSC_WORK_FETCH {

    void rr_init();
    void accumulate_shortfall(double d_time);
-    void update_estimated_delay(double dt, bool misses_deadline);
+    void update_saturated_time(double dt);
+    void update_busy_time(double dur, double nused);
    PROJECT* choose_project(int);
    void accumulate_debt();
    RSC_PROJECT_WORK_FETCH& project_state(PROJECT*);