diff --git a/client/app_config.cpp b/client/app_config.cpp index ab36fd881b..cfdb0047e1 100644 --- a/client/app_config.cpp +++ b/client/app_config.cpp @@ -99,6 +99,8 @@ int APP_CONFIGS::config_app_versions(PROJECT* p, bool show_warnings) { return 0; } +// clear app- and project-level counters to enforce max concurrent limits +// void max_concurrent_init() { for (unsigned int i=0; iapp_n_concurrent = 0; diff --git a/client/rr_sim.cpp b/client/rr_sim.cpp index 69ede13962..cdbb19271c 100644 --- a/client/rr_sim.cpp +++ b/client/rr_sim.cpp @@ -99,6 +99,7 @@ struct RR_SIM { active.push_back(rp); rsc_work_fetch[0].sim_nused += rp->avp->avg_ncpus; p->rsc_pwf[0].sim_nused += rp->avp->avg_ncpus; + int rt = rp->avp->gpu_usage.rsc_type; if (rt) { rsc_work_fetch[rt].sim_nused += rp->avp->gpu_usage.usage; @@ -118,7 +119,15 @@ struct RR_SIM { #endif } } - max_concurrent_inc(rp); + if (have_max_concurrent) { + max_concurrent_inc(rp); + if (p->rsc_pwf[0].sim_nused > p->rsc_pwf[0].max_nused) { + p->rsc_pwf[0].max_nused = p->rsc_pwf[0].sim_nused; + } + if (rt && p->rsc_pwf[rt].sim_nused > p->rsc_pwf[0].max_nused) { + p->rsc_pwf[rt].max_nused = p->rsc_pwf[0].sim_nused; + } + } } void init_pending_lists(); @@ -236,6 +245,7 @@ void RR_SIM::pick_jobs_to_run(double reltime) { for (unsigned int i=0; ipwf.rec_temp_save = p->pwf.rec_temp; + p->pwf.at_max_concurrent_limit = false; } rsc_work_fetch[0].sim_nused = 0; @@ -254,11 +264,11 @@ void RR_SIM::pick_jobs_to_run(double reltime) { // for (unsigned int i=0; ipwf.at_max_concurrent_limit) continue; RSC_PROJECT_WORK_FETCH& rsc_pwf = p->rsc_pwf[rt]; if (rsc_pwf.pending.size() ==0) continue; rsc_pwf.pending_iter = rsc_pwf.pending.begin(); rsc_pwf.sim_nused = 0; + rsc_pwf.max_nused = 0; p->pwf.rec_temp = p->pwf.rec; p->compute_sched_priority(); project_heap.push_back(p); @@ -418,6 +428,30 @@ static void handle_missed_deadline(RESULT* rpbest, double diff, double ar) { } } +// update "MC shortfall" for projects with max concurrent restrictions +// +static void mc_update_stats(double sim_now, double dt, double buf_end) { + for (unsigned int i=0; iapp_configs.project_has_mc) continue; + for (int rt=0; rtrsc_pwf[rt]; + RSC_WORK_FETCH& rwf = rsc_work_fetch[rt]; + double x = rsc_pwf.max_nused - rsc_pwf.sim_nused; + x = std::min(x, rwf.ninstances - rwf.sim_nused); + if (x > 1e-6 && sim_now < buf_end) { + double dt2; + if (sim_now + dt > buf_end) { + dt2 = buf_end - sim_now; + } else { + dt2 = dt; + } + rsc_pwf.mc_shortfall += x*dt2; + } + } + } +} + // do a round_robin simulation, // for either CPU scheduling (to find deadline misses) // or work fetch (do compute idleness and shortfall) @@ -557,9 +591,14 @@ void RR_SIM::simulate() { } } + // update shortfall and saturated time for each resource + // for (int i=0; i 1e-6 && sim_now < buf_end) { @@ -222,6 +226,15 @@ void RSC_WORK_FETCH::set_request(PROJECT* p) { } RSC_PROJECT_WORK_FETCH& w = project_state(p); double non_excl_inst = ninstances - w.ncoprocs_excluded; + + // if this project has max concurrent, + // use the project-specific "MC shortfall" instead of global shortfall + // + if (p->app_configs.project_has_mc) { + RSC_PROJECT_WORK_FETCH& rsc_pwf = p->rsc_pwf[rsc_type]; + shortfall = rsc_pwf.mc_shortfall; + } + if (shortfall) { if (wacky_dcf(p)) { // if project's DCF is too big or small, diff --git a/client/work_fetch.h b/client/work_fetch.h index 012fb0ed12..57bb924f23 100644 --- a/client/work_fetch.h +++ b/client/work_fetch.h @@ -93,6 +93,7 @@ struct RSC_PROJECT_WORK_FETCH { int n_runnable_jobs; double sim_nused; // # of instances used at this point in the simulation + // Used for GPU exclusion logic double nused_total; // sum of instances over all runnable jobs int ncoprocs_excluded; // number of excluded instances @@ -114,6 +115,13 @@ struct RSC_PROJECT_WORK_FETCH { // If zero, it's OK to ask this project for this type of work. // If nonzero, the reason why it's not OK + // stuff for max concurrent logic + // + double max_nused; + // max # instances used so far in simulation. + double mc_shortfall; + // project's shortfall for this resources, given MC limits + RSC_PROJECT_WORK_FETCH() { backoff_time = 0; backoff_interval = 0; @@ -218,6 +226,7 @@ struct RSC_WORK_FETCH { double nidle_now; // # idle instances now (at the beginning of RR sim) double sim_nused; + // # instance used at this point in RR sim COPROC_INSTANCE_BITMAP sim_used_instances; // bitmap of instances used in simulation, // taking into account GPU exclusions diff --git a/lib/cc_config.cpp b/lib/cc_config.cpp index 9de2e8f1bc..37a397324b 100644 --- a/lib/cc_config.cpp +++ b/lib/cc_config.cpp @@ -729,7 +729,9 @@ int APP_CONFIG::parse(XML_PARSER& xp, MSG_VEC& mv, LOG_FLAGS& log_flags) { if (xp.match_tag("/app")) return 0; if (xp.parse_str("name", name, 256)) continue; if (xp.parse_int("max_concurrent", max_concurrent)) { - if (max_concurrent) have_max_concurrent = true; + if (max_concurrent) { + have_max_concurrent = true; + } continue; } if (xp.match_tag("gpu_versions")) { @@ -800,6 +802,9 @@ int APP_CONFIGS::parse(XML_PARSER& xp, MSG_VEC& mv, LOG_FLAGS& log_flags) { int retval = ac.parse(xp, mv, log_flags); if (retval) return retval; app_configs.push_back(ac); + if (ac.max_concurrent) { + project_has_mc = true; + } continue; } if (xp.match_tag("app_version")) { @@ -812,6 +817,7 @@ int APP_CONFIGS::parse(XML_PARSER& xp, MSG_VEC& mv, LOG_FLAGS& log_flags) { if (xp.parse_int("project_max_concurrent", n)) { if (n >= 0) { have_max_concurrent = true; + project_has_mc = true; project_max_concurrent = n; } continue; diff --git a/lib/cc_config.h b/lib/cc_config.h index 3f49f8dcb0..e153e65a2d 100644 --- a/lib/cc_config.h +++ b/lib/cc_config.h @@ -240,6 +240,8 @@ struct APP_CONFIGS { std::vector app_configs; std::vector app_version_configs; int project_max_concurrent; + bool project_has_mc; + // have app- or project-level max concurrent restriction bool report_results_immediately; int parse(XML_PARSER&, MSG_VEC&, LOG_FLAGS&); @@ -250,6 +252,7 @@ struct APP_CONFIGS { app_configs.clear(); app_version_configs.clear(); project_max_concurrent = 0; + project_has_mc = false; report_results_immediately = false; } };