// This file is part of BOINC. // http://boinc.berkeley.edu // Copyright (C) 2008 University of California // // BOINC is free software; you can redistribute it and/or modify it // under the terms of the GNU Lesser General Public License // as published by the Free Software Foundation, // either version 3 of the License, or (at your option) any later version. // // BOINC is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // See the GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License // along with BOINC. If not, see . // Simulate the processing of the current workload // (include jobs that are downloading) // with weighted round-robin (WRR) scheduling. // // For efficiency, we simulate an approximation of WRR. // We don't model time-slicing. // Instead we use a continuous model where, at a given point, // each project has a set of running jobs that uses at most all CPUs. // These jobs are assumed to run at a rate proportionate to their avg_ncpus, // and each project gets total CPU proportionate to its RRS. // // For coprocessors, we saturate the resource; // i.e. with 2 GPUs, we'd let a 1-GPU app and a 2-GPU app run together. // Otherwise, there'd be the possibility of computing // a nonzero shortfall inappropriately. // // Outputs are changes to global state: // - deadline misses (per-project count, per-result flag) // Deadline misses are not counted for tasks // that are too large to run in RAM right now. // - resource shortfalls (per-project and total) // - counts of resources idle now // #include "cpp.h" #ifdef _WIN32 #include "boinc_win.h" #else #include "config.h" #endif #include "client_state.h" #include "coproc.h" #include "client_msgs.h" inline void rsc_string(RESULT* rp, char* buf) { APP_VERSION* avp = rp->avp; if (avp->gpu_usage.rsc_type) { sprintf(buf, "%.2f CPU + %.2f %s", avp->avg_ncpus, avp->gpu_usage.usage, rsc_name(avp->gpu_usage.rsc_type) ); } else { sprintf(buf, "%.2f CPU", avp->avg_ncpus); } } // this is here (rather than rr_sim.h) because its inline functions // refer to RESULT // struct RR_SIM_STATUS { std::vector active; double active_rsc[MAX_RSC]; inline void activate(RESULT* rp, double when) { PROJECT* p = rp->project; if (log_flags.rr_simulation) { char buf[256]; rsc_string(rp, buf); msg_printf(p, MSG_INFO, "[rr_sim] %.2f: starting %s (%s)", when, rp->name, buf ); } active.push_back(rp); rsc_work_fetch[0].sim_nused += rp->avp->avg_ncpus; int rt = rp->avp->gpu_usage.rsc_type; if (rt) { rsc_work_fetch[rt].sim_nused += rp->avp->gpu_usage.usage; } } // remove *rpbest from active set, // and adjust FLOPS left for other results // inline void remove_active(RESULT* rpbest) { vector::iterator it = active.begin(); while (it != active.end()) { RESULT* rp = *it; if (rp == rpbest) { it = active.erase(it); } else { rp->rrsim_flops_left -= rp->rrsim_flops*rpbest->rrsim_finish_delay; // can be slightly less than 0 due to roundoff // if (rp->rrsim_flops_left < -1) { msg_printf(rp->project, MSG_INTERNAL_ERROR, "%s: negative FLOPs left %f", rp->name, rp->rrsim_flops_left ); } if (rp->rrsim_flops_left < 0) { rp->rrsim_flops_left = 0; } it++; } } rsc_work_fetch[0].sim_nused -= rpbest->avp->avg_ncpus; int rt = rpbest->avp->gpu_usage.rsc_type; if (rt) { rsc_work_fetch[rt].sim_nused -= rpbest->avp->gpu_usage.usage; } } RR_SIM_STATUS() { for (int i=0; irr_sim_cpu_share = 0; } for (i=0; iproject; p->rr_sim_cpu_share = p->resource_share; } double sum=0; for (i=0; irr_sim_cpu_share; } if (!sum) sum=1; for (i=0; irr_sim_cpu_share /= sum; } } }; void RR_SIM_PROJECT_STATUS::activate(RESULT* rp) { active.push_back(rp); rp->project->rsc_pwf[0].sim_nused += rp->avp->avg_ncpus; int rt = rp->avp->gpu_usage.rsc_type; if (rt) { rp->project->rsc_pwf[rt].sim_nused += rp->avp->gpu_usage.usage; } } void RR_SIM_PROJECT_STATUS::remove_active(RESULT* rp) { std::vector::iterator it = active.begin(); while (it != active.end()) { if (*it == rp) { it = active.erase(it); } else { it++; } } rp->project->rsc_pwf[0].sim_nused -= rp->avp->avg_ncpus; int rt = rp->avp->gpu_usage.rsc_type; if (rt) { rp->project->rsc_pwf[rt].sim_nused -= rp->avp->gpu_usage.usage; } } // estimate the rate (FLOPS) that this job will get long-term // with weighted round-robin scheduling // void set_rrsim_flops(RESULT* rp) { // For coproc jobs, use app version estimate // if (rp->uses_coprocs()) { rp->rrsim_flops = rp->avp->flops * gstate.overall_gpu_frac(); return; } PROJECT* p = rp->project; // For CPU jobs, estimate how many CPU seconds per second this job would get // running with other jobs of this project, ignoring other factors // double x = 1; if (p->rsc_pwf[0].sim_nused > gstate.ncpus) { x = gstate.ncpus/p->rsc_pwf[0].sim_nused; } double r1 = x*rp->avp->avg_ncpus; // if the project's total CPU usage is more than its share, scale // double share_cpus = p->rr_sim_cpu_share*gstate.ncpus; if (!share_cpus) share_cpus = gstate.ncpus; // deal with projects w/ resource share = 0 double r2 = r1; if (p->rsc_pwf[0].sim_nused > share_cpus) { r2 *= (share_cpus / p->rsc_pwf[0].sim_nused); } // scale by overall CPU availability // double r3 = r2 * gstate.overall_cpu_frac(); rp->rrsim_flops = r3 * rp->avp->flops; #if 0 if (log_flags.rr_simulation) { msg_printf(p, MSG_INFO, "[rr_sim] set_rrsim_flops: %.2fG (r1 %.4f r2 %.4f r3 %.4f)", rp->rrsim_flops/1e9, r1, r2, r3 ); } #endif } void CLIENT_STATE::print_deadline_misses() { unsigned int i; RESULT* rp; PROJECT* p; for (i=0; irr_sim_misses_deadline) { msg_printf(rp->project, MSG_INFO, "[cpu_sched] Result %s projected to miss deadline.", rp->name ); } } for (i=0; irsc_pwf[j].deadlines_missed) { msg_printf(p, MSG_INFO, "[cpu_sched] Project has %d projected %s deadline misses", p->rsc_pwf[j].deadlines_missed, rsc_name(j) ); } } } } void CLIENT_STATE::rr_simulation() { PROJECT* p, *pbest; RESULT* rp, *rpbest; RR_SIM_STATUS sim_status; unsigned int u; double ar = available_ram(); work_fetch.rr_init(); if (log_flags.rr_simulation) { msg_printf(0, MSG_INFO, "[rr_sim] start: work_buf min %.0f additional %.0f total %.0f on_frac %.3f active_frac %.3f", work_buf_min(), work_buf_additional(), work_buf_total(), time_stats.on_frac, time_stats.active_frac ); } for (u=0; unon_cpu_intensive) continue; p->rr_sim_status.clear(); } // Decide what jobs to include in the simulation, // and pick the ones that are initially running. // NOTE: "results" is sorted by increasing arrival time // for (u=0; urr_sim_misses_deadline = false; if (!rp->nearly_runnable()) continue; if (rp->some_download_stalled()) continue; if (rp->project->non_cpu_intensive) continue; rp->rrsim_flops_left = rp->estimated_flops_remaining(); //if (rp->rrsim_flops_left <= 0) continue; // job may have fraction_done=1 but not be done; // if it's past its deadline, we need to mark it as such p = rp->project; p->pwf.has_runnable_jobs = true; p->rsc_pwf[0].nused_total += rp->avp->avg_ncpus; int rt = rp->avp->gpu_usage.rsc_type; if (rt) { p->rsc_pwf[rt].nused_total += rp->avp->gpu_usage.usage; p->rsc_pwf[rt].has_runnable_jobs = true; if (rsc_work_fetch[rt].sim_nused < coprocs.coprocs[rt].count) { sim_status.activate(rp, 0); p->rr_sim_status.activate(rp); } else { rsc_work_fetch[rt].pending.push_back(rp); } } else { p->rsc_pwf[0].has_runnable_jobs = true; if (p->rsc_pwf[0].sim_nused < ncpus) { sim_status.activate(rp, 0); p->rr_sim_status.activate(rp); } else { p->rr_sim_status.add_pending(rp); } } } // note the number of idle instances // rsc_work_fetch[0].nidle_now = ncpus - rsc_work_fetch[0].sim_nused; if (rsc_work_fetch[0].nidle_now < 0) rsc_work_fetch[0].nidle_now = 0; for (int i=1; irrsim_finish_delay = rp->avp->temp_dcf*rp->rrsim_flops_left/rp->rrsim_flops; rp->rrsim_finish_delay = rp->rrsim_flops_left/rp->rrsim_flops; if (!rpbest || rp->rrsim_finish_delay < rpbest->rrsim_finish_delay) { rpbest = rp; } } pbest = rpbest->project; if (log_flags.rr_simulation) { msg_printf(pbest, MSG_INFO, "[rr_sim] %.2f: %s finishes after %.2f (%.2fG/%.2fG)", sim_now - now, rpbest->name, rpbest->rrsim_finish_delay, rpbest->rrsim_flops_left/1e9, rpbest->rrsim_flops/1e9 ); } // "rpbest" is first result to finish. Does it miss its deadline? // double diff = (sim_now + rpbest->rrsim_finish_delay) - rpbest->computation_deadline(); if (diff > 0) { ACTIVE_TASK* atp = lookup_active_task_by_result(rpbest); if (atp) { atp->last_deadline_miss_time = now; } if (atp && atp->procinfo.working_set_size_smoothed > ar) { if (log_flags.rr_simulation) { msg_printf(pbest, MSG_INFO, "[rr_sim] %s misses deadline but too large to run", rpbest->name ); } } else { rpbest->rr_sim_misses_deadline = true; int rt = rpbest->avp->gpu_usage.rsc_type; if (rt) { pbest->rsc_pwf[rt].deadlines_missed++; rsc_work_fetch[rt].deadline_missed_instances += rpbest->avp->gpu_usage.usage; } else { pbest->rsc_pwf[0].deadlines_missed++; rsc_work_fetch[0].deadline_missed_instances += rpbest->avp->avg_ncpus; } if (log_flags.rr_simulation) { msg_printf(pbest, MSG_INFO, "[rr_sim] %s misses deadline by %.2f", rpbest->name, diff ); } } } // update saturated time // double end_time = sim_now + rpbest->rrsim_finish_delay; double x = end_time - gstate.now; for (int i=0; irr_sim_misses_deadline) { double frac = rpbest->uses_coprocs()?gstate.overall_gpu_frac():gstate.overall_cpu_frac(); double dur = rpbest->estimated_time_remaining() / frac; rsc_work_fetch[0].update_busy_time(dur, rpbest->avp->avg_ncpus); int rt = rpbest->avp->gpu_usage.rsc_type; if (rt) { rsc_work_fetch[rt].update_busy_time(dur, rpbest->avp->gpu_usage.usage); } } // increment resource shortfalls // if (sim_now < buf_end) { if (end_time > buf_end) end_time = buf_end; double d_time = end_time - sim_now; for (int i=0; irr_sim_status.remove_active(rpbest); sim_now += rpbest->rrsim_finish_delay; // start new jobs; may need to start more than one // if this job used multiple resource instances // int rt = rpbest->avp->gpu_usage.rsc_type; if (rt) { while (1) { if (rsc_work_fetch[rt].sim_nused >= coprocs.coprocs[rt].count) break; if (!rsc_work_fetch[rt].pending.size()) break; rp = rsc_work_fetch[rt].pending[0]; rsc_work_fetch[rt].pending.erase(rsc_work_fetch[rt].pending.begin()); sim_status.activate(rp, sim_now-now); pbest->rr_sim_status.activate(rp); } } else { while (1) { if (pbest->rsc_pwf[0].sim_nused >= ncpus) break; rp = pbest->rr_sim_status.get_pending(); if (!rp) break; sim_status.activate(rp, sim_now-now); pbest->rr_sim_status.activate(rp); } } } // if simulation ends before end of buffer, take the tail into account // if (sim_now < buf_end) { double d_time = buf_end - sim_now; for (int i=0; i