// This file is part of BOINC. // http://boinc.berkeley.edu // Copyright (C) 2008 University of California // // BOINC is free software; you can redistribute it and/or modify it // under the terms of the GNU Lesser General Public License // as published by the Free Software Foundation, // either version 3 of the License, or (at your option) any later version. // // BOINC is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // See the GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License // along with BOINC. If not, see . // BOINC client simulator. // // usage: directory options // // [--infile_prefix dir/] // Prefix of input filenames; default is blank. // Input files are: // client_state.xml // global_prefs.xml // global_prefs_override.xml // [--outfile_prefix X] // Prefix of output filenames; default is blank. // Output files are: // index.html (lists other files) // timeline.html // log.txt // results.dat (simulation results, machine-readable) // results.txt (simulation results, human-readable) // inputs.txt (sim parameters) // summary.txt (summary of inputs; detailed outputs) // rec.png // // Simulation params: // [--existing_jobs_only] // If set, simulate the specific set of jobs in the state file. // Otherwise simulate an infinite stream of jobs // modeled after those found in the state file. // [--duration x] // simulation duration (default 86400) // [--delta x] // delta = simulation time step (default 10) // // Policy options: // [--server_uses_workload] // simulate use of EDF sim by scheduler // [--cpu_sched_rr_only] // use only RR scheduling // [--rec_half_life X] // half-life of recent est credit #include #include "error_numbers.h" #include "str_util.h" #include "util.h" #include "log_flags.h" #include "filesys.h" #include "../sched/edf_sim.h" #include "client_msgs.h" #include "client_state.h" #include "project.h" #include "result.h" #include "scheduler_op.h" #include "sim.h" #define SCHED_RETRY_DELAY_MIN 60 // 1 minute #define SCHED_RETRY_DELAY_MAX (60*60*4) // 4 hours const char* infile_prefix = "./"; const char* outfile_prefix = "./"; #define TIMELINE_FNAME "timeline.html" #define LOG_FNAME "log.txt" #define INPUTS_FNAME "inputs.txt" #define RESULTS_DAT_FNAME "results.dat" #define RESULTS_TXT_FNAME "results.txt" #define SUMMARY_FNAME "summary.txt" #define REC_FNAME "rec.dat" bool user_active; double duration = 86400, delta = 60; FILE* logfile; FILE* html_out; FILE* rec_file; FILE* index_file; FILE* summary_file; char log_filename[256]; string html_msg; double active_time = 0; double gpu_active_time = 0; bool server_uses_workload = false; bool cpu_sched_rr_only = false; bool existing_jobs_only = false; RANDOM_PROCESS on_proc; RANDOM_PROCESS active_proc; RANDOM_PROCESS gpu_active_proc; RANDOM_PROCESS connected_proc; bool on; bool active; bool gpu_active; bool connected; extern double rec_adjust_period; SIM_RESULTS sim_results; int njobs; void usage(char* prog) { fprintf(stderr, "usage: %s\n" "[--infile_prefix F]\n" "[--outfile_prefix F]\n" "[--existing_jobs_only]\n" "[--duration X]\n" "[--delta X]\n" "[--server_uses_workload]\n" "[--cpu_sched_rr_only]\n" "[--rec_half_life X]\n", prog ); exit(1); } // peak flops of an app version // double app_peak_flops(APP_VERSION* avp, double cpu_scale) { double x = avp->avg_ncpus*cpu_scale; int rt = avp->gpu_usage.rsc_type; if (rt) { x += avp->gpu_usage.usage * rsc_work_fetch[rt].relative_speed; } x *= gstate.host_info.p_fpops; return x; } double gpu_peak_flops() { double x = 0; for (int i=1; iprint_results(f, sim_results); } } APP* choose_app(vector& apps) { double x = drand(); double sum = 0; unsigned int i; for (i=0; iweight; } for (i=0; iweight/sum; if (x <= 0) { return app; } } return apps.back(); } bool app_version_needs_work(APP_VERSION* avp) { if (avp->dont_use) return false; int rt = avp->gpu_usage.rsc_type; if (rt) { return (rsc_work_fetch[rt].req_secs>0 || rsc_work_fetch[rt].req_instances>0); } return (rsc_work_fetch[0].req_secs>0 || rsc_work_fetch[0].req_instances>0); } bool has_app_version_needing_work(APP* app) { for (unsigned int i=0; iapp != app) continue; if (app_version_needs_work(avp)) return true; } return false; } // choose a version for this app for which we need work // APP_VERSION* choose_app_version(APP* app) { APP_VERSION* best_avp = NULL; for (unsigned int i=0; iapp != app) continue; if (!app_version_needs_work(avp)) continue; if (!best_avp) { best_avp = avp; } else if (avp->flops > best_avp->flops) { best_avp = avp; } } return best_avp; } // generate a job; pick a random app for this project, // and pick a FLOP count from its distribution // void make_job( PROJECT* p, WORKUNIT* wup, RESULT* rp, vectorapp_list ) { APP* app = choose_app(app_list); APP_VERSION* avp = choose_app_version(app); rp->clear(); rp->avp = avp; rp->app = app; if (!rp->avp) { fprintf(stderr, "ERROR - NO APP VERSION\n"); exit(1); } rp->project = p; rp->wup = wup; sprintf(rp->name, "%s_%d", p->project_name, p->result_index++); wup->project = p; wup->rsc_fpops_est = app->fpops_est; rp->sim_flops_left = rp->wup->rsc_fpops_est; strcpy(wup->name, rp->name); strcpy(wup->app_name, app->name); wup->app = app; double ops = app->fpops.sample(); if (ops < 0) ops = 0; wup->rsc_fpops_est = ops; rp->report_deadline = gstate.now + app->latency_bound; } // process ready-to-report results // void CLIENT_STATE::handle_completed_results(PROJECT* p) { char buf[256]; vector::iterator result_iter; result_iter = results.begin(); while (result_iter != results.end()) { RESULT* rp = *result_iter; if (rp->project == p && rp->ready_to_report) { if (gstate.now > rp->report_deadline) { sprintf(buf, "result %s reported; " "MISSED DEADLINE by %f
\n", rp->name, gstate.now - rp->report_deadline ); } else { sprintf(buf, "result %s reported; " "MADE DEADLINE
\n", rp->name ); } PROJECT* spp = rp->project; if (gstate.now > rp->report_deadline) { sim_results.flops_wasted += rp->peak_flop_count; sim_results.nresults_missed_deadline++; spp->project_results.nresults_missed_deadline++; spp->project_results.flops_wasted += rp->peak_flop_count; } else { sim_results.nresults_met_deadline++; spp->project_results.nresults_met_deadline++; } html_msg += buf; delete rp; result_iter = results.erase(result_iter); } else { result_iter++; } } } // convert results in progress to IP_RESULTs, // and get an initial schedule for them // void CLIENT_STATE::get_workload(vector& ip_results) { for (unsigned int i=0; iestimated_runtime_remaining(); if (x == 0) continue; IP_RESULT ipr(rp->name, rp->report_deadline-now, x); ip_results.push_back(ipr); } //init_ip_results(work_buf_min(), ncpus, ip_results); init_ip_results(0, ncpus, ip_results); } void get_apps_needing_work(PROJECT* p, vector& apps) { apps.clear(); for (unsigned int i=0; iproject != p) continue; if (app->ignore) continue; if (!has_app_version_needing_work(app)) continue; apps.push_back(app); } } void decrement_request_rsc( RSC_WORK_FETCH& rwf, double ninstances, double est_runtime ) { rwf.req_secs -= est_runtime * ninstances; rwf.req_instances -= ninstances; rwf.estimated_delay += est_runtime*(ninstances/rwf.ninstances); } void decrement_request(RESULT* rp) { APP_VERSION* avp = rp->avp; double est_runtime = rp->wup->rsc_fpops_est/avp->flops; est_runtime /= (gstate.time_stats.on_frac*gstate.time_stats.active_frac); decrement_request_rsc(rsc_work_fetch[0], avp->avg_ncpus, est_runtime); int rt = avp->gpu_usage.rsc_type; if (rt) { decrement_request_rsc(rsc_work_fetch[rt], avp->gpu_usage.usage, est_runtime); } } double get_estimated_delay(RESULT* rp) { int rt = rp->avp->gpu_usage.rsc_type; return rsc_work_fetch[rt].estimated_delay; } // simulate trying to do an RPC; // return true if we actually did one // bool CLIENT_STATE::simulate_rpc(PROJECT* p) { char buf[256], buf2[256]; vector ip_results; int infeasible_count = 0; vector new_results; bool avail; if (p->last_rpc_time) { double delta = now - p->last_rpc_time; avail = p->available.sample(delta); } else { avail = p->available.sample(0); } p->last_rpc_time = now; if (!avail) { sprintf(buf, "RPC to %s skipped - project down
", p->project_name); html_msg += buf; msg_printf(p, MSG_INFO, "RPC skipped: project down"); gstate.scheduler_op->project_rpc_backoff(p, "project down"); p->master_url_fetch_pending = false; return false; } // save request params for WORK_FETCH::handle_reply // double save_cpu_req_secs = rsc_work_fetch[0].req_secs; for (int i=1; idont_use = false; } work_fetch.request_string(buf2); sprintf(buf, "RPC to %s: %s
", p->project_name, buf2); html_msg += buf; msg_printf(p, MSG_INFO, "RPC: %s", buf2); handle_completed_results(p); if (server_uses_workload) { get_workload(ip_results); } bool sent_something = false; while (!existing_jobs_only) { vector apps; get_apps_needing_work(p, apps); if (apps.empty()) break; RESULT* rp = new RESULT; WORKUNIT* wup = new WORKUNIT; make_job(p, wup, rp, apps); double et = wup->rsc_fpops_est / rp->avp->flops; if (server_uses_workload) { IP_RESULT c(rp->name, rp->report_deadline-now, et); if (check_candidate(c, ncpus, ip_results)) { ip_results.push_back(c); } else { msg_printf(p, MSG_INFO, "job for %s misses deadline sim\n", rp->app->name); APP_VERSION* avp = rp->avp; delete rp; delete wup; avp->dont_use = true; continue; } } else { double est_delay = get_estimated_delay(rp); if (est_delay + et > wup->app->latency_bound) { msg_printf(p, MSG_INFO, "job for %s misses deadline approx: del %f + et %f > %f\n", rp->app->name, est_delay, et, wup->app->latency_bound ); APP_VERSION* avp = rp->avp; delete rp; delete wup; avp->dont_use = true; continue; } } sent_something = true; rp->set_state(RESULT_FILES_DOWNLOADED, "simulate_rpc"); results.push_back(rp); new_results.push_back(rp); #if 0 sprintf(buf, "got job %s: CPU time %.2f, deadline %s
", rp->name, rp->final_cpu_time, time_to_string(rp->report_deadline) ); html_msg += buf; #endif decrement_request(rp); } njobs += new_results.size(); msg_printf(0, MSG_INFO, "Got %d tasks", new_results.size()); sprintf(buf, "got %d tasks
", new_results.size()); html_msg += buf; SCHEDULER_REPLY sr; rsc_work_fetch[0].req_secs = save_cpu_req_secs; work_fetch.handle_reply(p, &sr, new_results); p->nrpc_failures = 0; p->sched_rpc_pending = 0; p->min_rpc_time = now + 900; if (sent_something) { request_schedule_cpus("simulate_rpc"); request_work_fetch("simulate_rpc"); } sim_results.nrpcs++; return true; } void PROJECT::backoff() { nrpc_failures++; double backoff = calculate_exponential_backoff( nrpc_failures, SCHED_RETRY_DELAY_MIN, SCHED_RETRY_DELAY_MAX ); min_rpc_time = gstate.now + backoff; } bool CLIENT_STATE::scheduler_rpc_poll() { PROJECT *p; bool action = false; static double last_time=0; static double last_work_fetch_time = 0; double elapsed_time; // check only every 5 sec // if (now - last_time < SCHEDULER_RPC_POLL_PERIOD) { #if 0 msg_printf(NULL, MSG_INFO, "RPC poll: not time %f - %f < %f", now, last_time, SCHEDULER_RPC_POLL_PERIOD ); #endif return false; } last_time = now; //msg_printf(NULL, MSG_INFO, "RPC poll start"); while (1) { #if 0 p = next_project_sched_rpc_pending(); if (p) { work_fetch.piggyback_work_request(p); action = simulate_rpc(p); break; } #endif p = find_project_with_overdue_results(false); if (p) { msg_printf(p, MSG_INFO, "doing RPC to report results"); p->sched_rpc_pending = RPC_REASON_RESULTS_DUE; work_fetch.piggyback_work_request(p); action = simulate_rpc(p); break; } // should we check work fetch? Do this at most once/minute if (must_check_work_fetch) { last_work_fetch_time = 0; } elapsed_time = now - last_work_fetch_time; if (elapsed_time < WORK_FETCH_PERIOD) { return false; } must_check_work_fetch = false; last_work_fetch_time = now; p = work_fetch.choose_project(); if (p) { msg_printf(p, MSG_INFO, "doing RPC to get work"); action = simulate_rpc(p); break; } break; } #if 0 if (action) { msg_printf(p, MSG_INFO, "RPC poll: did an RPC"); } else { msg_printf(0, MSG_INFO, "RPC poll: didn't do an RPC"); } #endif return action; } bool ACTIVE_TASK_SET::poll() { unsigned int i; char buf[256]; bool action = false; static double last_time = START_TIME; double diff = gstate.now - last_time; if (diff < 1.0) return false; last_time = gstate.now; if (diff > delta) { diff = 0; } PROJECT* p; for (i=0; iidle = true; } // we do two kinds of FLOPs accounting: // 1) actual FLOPS (for job completion) // 2) peak FLOPS (for total and per-project resource usage) // // CPU may be overcommitted, in which case we compute // a "cpu_scale" factor that is < 1. // GPUs are never overcommitted. // // actual FLOPS is based on app_version.flops, scaled by cpu_scale for CPU jobs // peak FLOPS is based on device peak FLOPS, // with CPU component scaled by cpu_scale for all jobs // get CPU usage by GPU and CPU jobs // double cpu_usage_cpu=0; double cpu_usage_gpu=0; for (i=0; itask_state() != PROCESS_EXECUTING) continue; RESULT* rp = atp->result; if (rp->uses_coprocs()) { if (gpu_active) { cpu_usage_gpu += rp->avp->avg_ncpus; } } else { cpu_usage_cpu += rp->avp->avg_ncpus; } } double cpu_usage = cpu_usage_cpu + cpu_usage_gpu; // if CPU is overcommitted, compute cpu_scale // double cpu_scale = 1; if (cpu_usage > gstate.ncpus) { cpu_scale = (gstate.ncpus - cpu_usage_gpu) / (cpu_usage - cpu_usage_gpu); } double used = 0; for (i=0; itask_state() != PROCESS_EXECUTING) continue; RESULT* rp = atp->result; if (!gpu_active && rp->uses_coprocs()) { continue; } atp->elapsed_time += diff; double flops = rp->avp->flops; if (!rp->uses_coprocs()) { flops *= cpu_scale; } rp->sim_flops_left -= diff*flops; atp->fraction_done = 1 - rp->sim_flops_left / rp->wup->rsc_fpops_est; atp->checkpoint_wall_time = gstate.now; if (rp->sim_flops_left <= 0) { atp->set_task_state(PROCESS_EXITED, "poll"); rp->exit_status = 0; rp->ready_to_report = true; gstate.request_schedule_cpus("job finished"); gstate.request_work_fetch("job finished"); sprintf(buf, "result %s finished
", rp->name); html_msg += buf; action = true; } double pf = diff * app_peak_flops(rp->avp, cpu_scale); rp->project->project_results.flops_used += pf; rp->peak_flop_count += pf; sim_results.flops_used += pf; used += pf; rp->project->idle = false; } for (i=0; iidle) { p->idle_time += diff; p->idle_time_sumsq += diff*(p->idle_time*p->idle_time); } else { p->idle_time = 0; } } active_time += diff; if (gpu_active) { gpu_active_time += diff; } return action; } // return the fraction of flops that was spent in violation of shares // i.e., if a project got X and it should have got Y, // add up |X-Y| over all projects, and divide by total flops // double CLIENT_STATE::share_violation() { unsigned int i; double tot = 0, trs=0; for (i=0; iproject_results.flops_used; trs += p->resource_share; } double sum = 0; for (i=0; iproject_results.flops_used; double rs = p->resource_share/trs; double rt = tot*rs; sum += fabs(t - rt); } return sum/tot; } // "monotony" is defined as follows: // for each project P, maintain R(P), the time since P last ran, // let S(P) be the RMS of R(P). // Let X = mean(S(P))/(sched_interval*nprojects) // (the *nprojects reflects the fact that in the limit of nprojects, // each one waits for a time to run proportional to nprojects) // X varies from zero (no monotony) to infinity. // X is one in the case of round-robin on 1 CPU. // Let monotony = 1-1/(x+1) // double CLIENT_STATE::monotony() { double sum = 0; double schedint = global_prefs.cpu_scheduling_period(); unsigned int i; for (i=0; iidle_time_sumsq/active_time; double s = sqrt(avg_ss); sum += s; } int n = (int)projects.size(); double x = sum/(n*schedint*n); double m = 1-(1/(x+1)); //printf("sum: %f; x: %f m: %f\n", sum, x, m); return m; } // the CPU totals are there; compute the other fields // void SIM_RESULTS::compute_figures_of_merit() { double flops_total = cpu_peak_flops()*active_time + gpu_peak_flops()*gpu_active_time; double flops_idle = flops_total - flops_used; if (flops_idle<0) flops_idle=0; wasted_frac = flops_wasted/flops_total; idle_frac = flops_idle/flops_total; share_violation = gstate.share_violation(); monotony = gstate.monotony(); } void SIM_RESULTS::print(FILE* f, bool human_readable) { double r = njobs?((double)nrpcs)/(njobs*2):0; if (human_readable) { fprintf(f, "wasted fraction %f\n" "Idle fraction %f\n" "Share violation %f\n" "Monotony %f\n" "RPCs per job %f\n", wasted_frac, idle_frac, share_violation, monotony, r ); } else { fprintf(f, "wf %f if %f sv %f m %f r %f\n", wasted_frac, idle_frac, share_violation, monotony, r ); } } void SIM_RESULTS::parse(FILE* f) { fscanf(f, "wasted_frac %lf idle_frac %lf share_violation %lf monotony %lf", &wasted_frac, &idle_frac, &share_violation, &monotony ); } void SIM_RESULTS::add(SIM_RESULTS& r) { wasted_frac += r.wasted_frac; idle_frac += r.idle_frac; share_violation += r.share_violation; monotony += r.monotony; } void SIM_RESULTS::divide(int n) { wasted_frac /= n; idle_frac /= n; share_violation /= n; monotony /= n; } void SIM_RESULTS::clear() { memset(this, 0, sizeof(*this)); } void PROJECT::print_results(FILE* f, SIM_RESULTS& sr) { double t = project_results.flops_used; double gt = sr.flops_used; fprintf(f, "%s: share %.2f total flops %.2fG (%.2f%%)\n" " used %.2fG wasted %.2fG\n" " deadlines: met %d missed %d\n", project_name, resource_share, t/1e9, (t/gt)*100, project_results.flops_used/1e9, project_results.flops_wasted/1e9, project_results.nresults_met_deadline, project_results.nresults_missed_deadline ); } const char* colors[] = { "#000088", "#008800", "#880000", "#880088", "#888800", "#008888", "#0000aa", "#00aa00", "#aa0000", "#aa00aa", "#aaaa00", "#00aaaa", "#8800aa", "#aa0088", "#88aa00", "#aa8800", "#00aa88", "#0088aa", }; #define NCOLORS 18 #define WIDTH1 100 #define WIDTH2 400 void show_project_colors() { fprintf(html_out, "\n" " \n" ); for (unsigned int i=0; i\n", colors[p->index%NCOLORS], p->project_name, p->resource_share ); } fprintf(html_out, "
ProjectResource share
%s%.0f
\n"); } void job_count(PROJECT* p, int rsc_type, int& in_progress, int& done) { in_progress = done = 0; unsigned int i; for (i=0; iproject != p) continue; if (rp->resource_type() != rsc_type) continue; if (rp->state() < RESULT_FILES_UPLOADED) { in_progress++; } else { done++; } } } void show_resource(int rsc_type) { unsigned int i; char buf[256]; fprintf(html_out, "", WIDTH2); bool found = false; for (i=0; iresult; if (atp->task_state() != PROCESS_EXECUTING) continue; double ninst=0; if (rsc_type) { if (rp->avp->gpu_usage.rsc_type != rsc_type) continue; ninst = rp->avp->gpu_usage.usage; } else { ninst = rp->avp->avg_ncpus; } PROJECT* p = rp->project; if (!found) { found = true; fprintf(html_out, "\n" "%s\n", rsc_type?"":"" ); } if (rsc_type) { sprintf(buf, "", rp->coproc_indices[0]); } else { strcpy(buf, ""); } fprintf(html_out, "%s\n", ninst, colors[p->index%NCOLORS], rp->edf_scheduled?"*":"", rp->name, rp->sim_flops_left/1e9, buf ); } if (found) { fprintf(html_out, "
#devsJob name (* = high priority)GFLOPs left
GPU%d
%.2f%s%s%.0f
\n"); } else { fprintf(html_out, "IDLE\n"); } fprintf(html_out, "\n" ); found = false; for (i=0; i%s\n", colors[p->index%NCOLORS], p->project_name, in_progress, done, p->pwf.rec ); found = true; } } //if (!found) fprintf(html_out, " ---\n"); fprintf(html_out, "
ProjectIn progressdoneREC
%d%d%.3f
"); } int nproc_types = 1; void html_start() { char buf[256]; sprintf(buf, "%s%s", outfile_prefix, TIMELINE_FNAME); html_out = fopen(buf, "w"); if (!html_out) { fprintf(stderr, "can't open %s for writing\n", buf); exit(1); } setbuf(html_out, 0); fprintf(index_file, "
Timeline\n", TIMELINE_FNAME); fprintf(html_out, "\n" "

BOINC client emulator results

\n" ); show_project_colors(); fprintf(html_out, "\n", WIDTH1 ); fprintf(html_out, "", WIDTH2 ); if (coprocs.have_nvidia()) { fprintf(html_out, "", WIDTH2); nproc_types++; } if (coprocs.have_ati()) { fprintf(html_out, "", WIDTH2); nproc_types++; } fprintf(html_out, "
TimeCPUNVIDIA GPUATI GPU
\n"); } void html_rec() { if (html_msg.size()) { fprintf(html_out, "", WIDTH1, sim_time_string(gstate.now) ); fprintf(html_out, "
%s%s
\n", nproc_types*WIDTH2, html_msg.c_str() ); html_msg = ""; } fprintf(html_out, "", WIDTH1, sim_time_string(gstate.now)); if (active) { show_resource(0); if (gpu_active) { for (int i=1; iOFF", WIDTH2); } } } else { fprintf(html_out, "", WIDTH2); for (int i=1; iOFF", WIDTH2); } } fprintf(html_out, "
%sOFF
\n"); } void html_end() { fprintf(html_out, "
\n");
    sim_results.compute_figures_of_merit();
    sim_results.print(html_out);
    print_project_results(html_out);
    fprintf(html_out, "
\n"); fclose(html_out); } void set_initial_rec() { unsigned int i; double sum=0; double x = cpu_peak_flops() + gpu_peak_flops(); for (i=0; iresource_share; } for (i=0; ipwf.rec = 86400*x*(p->resource_share/sum)/1e9; } } static bool compare_names(PROJECT* p1, PROJECT* p2) { return (strcmp(p1->project_name, p2->project_name) < 0); } void write_recs() { fprintf(rec_file, "%f ", gstate.now); std::sort( gstate.projects.begin(), gstate.projects.end(), compare_names ); for (unsigned int i=0; ipwf.rec); } fprintf(rec_file, "\n"); } void make_graph(const char* title, const char* fname, int field) { char gp_fname[256], cmd[256], png_fname[256]; sprintf(gp_fname, "%s%s.gp", outfile_prefix, fname); FILE* f = fopen(gp_fname, "w"); fprintf(f, "set terminal png small size 1024, 768\n" "set title \"%s\"\n" "set yrange[0:]\n" "plot ", title ); for (unsigned int i=0; iproject_name, (i==gstate.projects.size()-1)?"\n":", \\\n" ); } fclose(f); sprintf(png_fname, "%s%s.png", outfile_prefix, fname); sprintf(cmd, "gnuplot < %s > %s", gp_fname, png_fname); fprintf(index_file, "
Graph of %s\n", fname, title); system(cmd); } static void write_inputs() { char buf[256]; sprintf(buf, "%s/%s", outfile_prefix, INPUTS_FNAME); FILE* f = fopen(buf, "w"); fprintf(f, "Existing jobs only: %s\n" "Round-robin only: %s\n" "scheduler EDF sim: %s\n", existing_jobs_only?"yes":"no", cpu_sched_rr_only?"yes":"no", server_uses_workload?"yes":"no" ); fprintf(f, "REC half-life: %f\n", config.rec_half_life ); fprintf(f, "Simulation duration: %f\nTime step: %f\n", duration, delta ); fclose(f); } void simulate() { bool action; double start = START_TIME; gstate.now = start; html_start(); fprintf(summary_file, "Hardware summary\n %d CPUs, %.1f GFLOPS\n", gstate.host_info.p_ncpus, gstate.host_info.p_fpops/1e9 ); for (int i=1; iname, timediff_format(rp->sim_flops_left/rp->avp->flops).c_str(), timediff_format(rp->report_deadline - START_TIME).c_str() ); } fprintf(summary_file, "Simulation parameters\n" " time step %f, duration %f\n" "-------------------\n", delta, duration ); write_inputs(); while (1) { on = on_proc.sample(delta); if (on) { active = active_proc.sample(delta); if (active) { gpu_active = gpu_active_proc.sample(delta); } else { gpu_active = false; } connected = connected_proc.sample(delta); } else { active = gpu_active = connected = false; } // do accounting for the period that just ended, // even if we're now in an "off" state. // // need both of the following, else crash // action |= gstate.active_tasks.poll(); action |= gstate.handle_finished_apps(); if (on) { while (1) { action = false; action |= gstate.schedule_cpus(); if (connected) { action |= gstate.scheduler_rpc_poll(); // this deletes completed results } action |= gstate.active_tasks.poll(); action |= gstate.handle_finished_apps(); gpu_suspend_reason = gpu_active?0:1; //msg_printf(0, MSG_INFO, action?"did action":"did no action"); if (!action) break; } } //msg_printf(0, MSG_INFO, "took time step"); for (unsigned int i=0; itask_state() == PROCESS_EXECUTING) { atp->elapsed_time += delta; } } html_rec(); write_recs(); gstate.now += delta; if (gstate.now > start + duration) break; } html_end(); } void show_app(APP* app) { fprintf(summary_file, " app %s\n" " job params: fpops_est %.0fG fpops mean %.0fG std_dev %.0fG\n" " latency %.2f weight %.2f", app->name, app->fpops_est/1e9, app->fpops.mean/1e9, app->fpops.std_dev/1e9, app->latency_bound, app->weight ); if (app->max_concurrent) { fprintf(summary_file, " max_concurrent %d\n", app->max_concurrent); } else { fprintf(summary_file, "\n"); } for (unsigned int i=0; iapp != app) continue; if (avp->gpu_usage.rsc_type) { fprintf(summary_file, " app version %d (%s)\n" " %.2f CPUs, %.2f %s GPUs, %.0f GFLOPS\n", avp->version_num, avp->plan_class, avp->avg_ncpus, avp->gpu_usage.usage, rsc_name(avp->gpu_usage.rsc_type), avp->flops/1e9 ); } else { fprintf(summary_file, " app version %d (%s)\n" " %.2f CPUs, %.0f GFLOPS\n", avp->version_num, avp->plan_class, avp->avg_ncpus, avp->flops/1e9 ); } } } // get application params, // and set "ignore" for apps that have no versions or no params. // // App params can be specified in 2 ways: // - the presence of a WU and result for that app // - app.latency_bound and app.fpops_est are populated // void get_app_params() { APP* app; unsigned int i, j; for (i=0; iapp; double latency_bound = rp->report_deadline - rp->received_time; if (!app->latency_bound) { app->latency_bound = latency_bound; } rp->received_time = START_TIME; rp->report_deadline = START_TIME + latency_bound; rp->sim_flops_left = rp->wup->rsc_fpops_est; } for (i=0; iapp; if (!app->fpops_est) { app->fpops_est = wup->rsc_fpops_est; } } for (i=0; iignore = true; } for (i=0; imissing_coproc) continue; avp->app->ignore = false; } fprintf(summary_file, "Applications and version\n"); for (j=0; jproject_name); for (i=0; iproject != p) continue; if (app->ignore) { fprintf(summary_file, " app %s: ignoring - no usable app versions\n", app->name ); continue; } if (app->non_cpu_intensive) { fprintf(summary_file, " app %s: ignoring - non CPU intensive\n", app->name ); app->ignore = true; continue; } // if missing app params, fill in defaults // if (!app->fpops_est) { app->fpops_est = 3600e9; } if (!app->latency_bound) { app->latency_bound = 864000; } if (!app->fpops_est || !app->latency_bound) { app->ignore = true; fprintf(summary_file, " app %s: ignoring - no job parameters (see below)\n", app->name ); } else if (app->ignore) { fprintf(summary_file, " app %s: ignoring - no app versions\n", app->name ); } else { if (!app->fpops.mean) { app->fpops.mean = app->fpops_est; } if (!app->weight) { app->weight = 1; } show_app(app); } } } fprintf(summary_file, "\n" "Note: an app's job parameters are taken from a job for that app.\n" " They can also be specified by adding tags to client_state.xml.\n" " See http://boinc.berkeley.edu/trac/wiki/ClientSim.\n" "\n" ); } // zero backoffs and REC // void clear_backoff() { unsigned int i; for (i=0; irsc_pwf[j].reset(); } p->min_rpc_time = 0; } } // remove apps with no app versions, // then projects with no apps // void cull_projects() { unsigned int i; PROJECT* p; for (i=0; ino_apps = true; } for (i=0; iapp->ignore) continue; int rt = avp->gpu_usage.rsc_type; } for (i=0; iignore) { app->project->no_apps = false; } } for (i=0; ino_apps) { fprintf(summary_file, "%s: Removing from simulation - no apps\n", p->project_name ); p->ignore = true; } else if (p->non_cpu_intensive) { fprintf(summary_file, "%s: Removing from simulation - non CPU intensive\n", p->project_name ); p->ignore = true; } } // remove results and active tasks of projects we're culling // vector::iterator ati = gstate.active_tasks.active_tasks.begin(); while (ati != gstate.active_tasks.active_tasks.end()) { ACTIVE_TASK* atp = *ati; if (atp->wup->project->ignore) { ati = gstate.active_tasks.active_tasks.erase(ati); } else { ati++; } } vector::iterator ri = gstate.results.begin(); while (ri != gstate.results.end()) { RESULT* rp = *ri; if (rp->project->ignore) { ri = gstate.results.erase(ri); } else { ri++; } } vector::iterator iter = gstate.projects.begin(); while (iter != gstate.projects.end()) { p = *iter; if (p->ignore) { iter = gstate.projects.erase(iter); } else { iter++; } } } void do_client_simulation() { char buf[256], buf2[256]; int retval; FILE* f; sprintf(buf, "%s%s", infile_prefix, CONFIG_FILE); config.defaults(); read_config_file(true, buf); log_flags.init(); sprintf(buf, "%s%s", outfile_prefix, "log_flags.xml"); f = fopen(buf, "r"); if (f) { MIOFILE mf; mf.init_file(f); XML_PARSER xp(&mf); xp.get_tag(); // skip open tag log_flags.parse(xp); fclose(f); } gstate.add_platform("client simulator"); sprintf(buf, "%s%s", infile_prefix, STATE_FILE_NAME); if (!boinc_file_exists(buf)) { fprintf(stderr, "No client state file\n"); exit(1); } retval = gstate.parse_state_file_aux(buf); if (retval) { fprintf(stderr, "state file parse error %d\n", retval); exit(1); } // if tasks have pending transfers, mark as completed // for (unsigned int i=0; istate() < RESULT_FILES_DOWNLOADED) { rp->set_state(RESULT_FILES_DOWNLOADED, "init"); } else if (rp->state() == RESULT_FILES_UPLOADING) { rp->set_state(RESULT_FILES_UPLOADED, "init"); } } config.show(); log_flags.show(); sprintf(buf, "%s%s", infile_prefix, GLOBAL_PREFS_FILE_NAME); sprintf(buf2, "%s%s", infile_prefix, GLOBAL_PREFS_OVERRIDE_FILE); gstate.read_global_prefs(buf, buf2); fprintf(index_file, "

Output files

\n" "Summary\n" "
Log file\n", SUMMARY_FNAME, LOG_FNAME ); // fill in GPU device nums // for (int i=0; iindex = j++; } clear_backoff(); gstate.log_show_projects(); gstate.set_ncpus(); work_fetch.init(); //set_initial_rec(); rec_adjust_period = delta; gstate.request_work_fetch("init"); simulate(); sim_results.compute_figures_of_merit(); sprintf(buf, "%s%s", outfile_prefix, RESULTS_DAT_FNAME); f = fopen(buf, "w"); sim_results.print(f); fclose(f); sprintf(buf, "%s%s", outfile_prefix, RESULTS_TXT_FNAME); f = fopen(buf, "w"); sim_results.print(f, true); fclose(f); fprintf(summary_file, "Simulation done.\n" "-------------------------\n" "Figures of merit:\n" ); sim_results.print(summary_file, true); double cpu_time; boinc_calling_thread_cpu_time(cpu_time); fprintf(summary_file, "-------------------------\n" "Simulator CPU time: %f secs\n" "-------------------------\n" "Peak FLOPS: CPU %.2fG GPU %.2fG\n", cpu_time, cpu_peak_flops()/1e9, gpu_peak_flops()/1e9 ); print_project_results(summary_file); fclose(rec_file); make_graph("REC", "rec", 0); } char* next_arg(int argc, char** argv, int& i) { if (i >= argc) { fprintf(stderr, "Missing command-line argument\n"); usage(argv[0]); } return argv[i++]; } int main(int argc, char** argv) { int i, retval; char buf[256]; sim_results.clear(); for (i=1; i