// credit_test // // Simulate the new credit system for the N most recent jobs // in project's database, and give a comparison of new and old systems. // Doesn't modify anything. // // You must first run html/ops/credit_test.php to create a data file // #include #include #include "sched_config.h" #include "sched_customize.h" #include "boinc_db.h" #define MAX_JOBS 100000 #define COBBLESTONE_SCALE 100/86400e9 #define PRINT_AV_PERIOD 100 #define SCALE_AV_PERIOD 20 #define MIN_HOST_SAMPLES 10 // don't use host scaling unless have this many samples for host #define MIN_VERSION_SAMPLES 100 // don't update a version's scale unless it has this many samples, // and don't accumulate stats until this occurs struct HOST_APP_VERSION { int host_id; int app_version_id; // 0 unknown, -1 anon platform int app_id; // if unknown or anon platform AVERAGE pfc; AVERAGE et; }; double min_credit = 0; vector app_versions; vector apps; vector host_app_versions; vector platforms; bool accumulate_stats = false; // set to true when we have PFC averages for // both a GPU and a CPU version void read_db() { DB_APP app; DB_APP_VERSION av; while (!app.enumerate("")) { apps.push_back(app); } while (!av.enumerate("where deprecated=0 order by id desc")) { av.pfc.init(50000, .005, 10); av.pfc_scale_factor = 1; //if (strstr(av.plan_class, "cuda")) { // av.pfc_scale_factor = 0.15; //} app_versions.push_back(av); } DB_PLATFORM platform; while (!platform.enumerate("")) { platforms.push_back(platform); } } PLATFORM* lookup_platform(int id) { unsigned int i; for (i=0; iname, av.plan_class, av.pfc_scale_factor ); print_average(av.pfc); printf("\n"); } printf("-------------\n"); } void lookup_host(DB_HOST& h, int id) { int retval = h.lookup_id(id); if (retval) { printf("can't find host %d\n", id); exit(1); } } struct RSC_INFO { double pfc_sum; double pfc_n; int nvers_thresh; // # app versions w/ lots of samples int nvers_total; RSC_INFO() { pfc_sum = 0; pfc_n = 0; nvers_thresh = 0; nvers_total = 0; } void update(APP_VERSION& av) { nvers_total++; if (av.pfc.n > MIN_VERSION_SAMPLES) { nvers_thresh++; pfc_sum += av.pfc.get_avg() * av.pfc.n; pfc_n += av.pfc.n; } } double avg() { return pfc_sum/pfc_n; } }; void scale_versions(APP& app, double avg) { for (unsigned int j=0; jname, av.plan_class ); printf(" n: %f avg PFC: %f new scale: %f\n", av.pfc.n, av.pfc.get_avg(), av.pfc_scale_factor ); } app.min_avg_pfc = avg; } // update app version scale factors, // and find the min average PFC for each app // void update_av_scales() { unsigned int i, j; printf("----- updating scales --------\n"); for (i=0; i 1) { scale_versions(app, cpu_info.avg()); accumulate_stats = true; } } } else { if (gpu_info.nvers_thresh > 1) { scale_versions(app, gpu_info.avg()); accumulate_stats = true; } } } printf("-------------\n"); } // Compute or estimate normalized peak FLOP count (PFC), // and update data structures. // Return true if the PFC was computed in the "normal" way, // i.e. not anon platform, and reflects version scaling // bool get_pfc(RESULT& r, WORKUNIT& wu, double& pfc) { APP_VERSION* avp = NULL; DB_HOST host; int rsc_type; APP& app = lookup_app(r.appid); HOST_APP_VERSION& hav = lookup_host_app_version( r.hostid, r.app_version_id ); if (r.elapsed_time) { // new client hav.et.update(r.elapsed_time/wu.rsc_fpops_est); if (r.app_version_id < 0) { // anon platform // pfc = app.min_avg_pfc; if (hav.et.n > MIN_HOST_SAMPLES) { pfc *= (r.elapsed_time/wu.rsc_fpops_est)/hav.et.get_avg(); } printf(" skipping: anon platform\n"); return false; } else { pfc = (r.elapsed_time * r.flops_estimate); avp = lookup_av(r.app_version_id); printf(" sec: %.0f GFLOPS: %.0f PFC: %.0fG raw credit: %.2f\n", r.elapsed_time, r.flops_estimate/1e9, pfc/1e9, pfc*COBBLESTONE_SCALE ); } } else { // old client // hav.et.update(r.cpu_time/wu.rsc_fpops_est); pfc = app.min_avg_pfc*wu.rsc_fpops_est; if (hav.et.n > MIN_HOST_SAMPLES) { double s = r.elapsed_time/hav.et.get_avg(); pfc *= s; printf(" old client: scaling by %f (%f/%f)\n", s, r.elapsed_time, hav.et.get_avg() ); } else { printf(" old client: not scaling\n"); } return false; } avp->pfc.update(pfc/wu.rsc_fpops_est); // version normalization double vnpfc = pfc * avp->pfc_scale_factor; PLATFORM* p = lookup_platform(avp->platformid); printf(" updated version PFC: %f\n", pfc/wu.rsc_fpops_est); printf(" version scale (%s %s): %f\n", p->name, avp->plan_class, avp->pfc_scale_factor ); // host normalization hav.pfc.update(pfc/wu.rsc_fpops_est); double host_scale = 1; if (hav.pfc.n > MIN_HOST_SAMPLES && avp->pfc.n > MIN_VERSION_SAMPLES) { host_scale = avp->pfc.get_avg()/hav.pfc.get_avg(); if (host_scale > 1) host_scale = 1; printf(" host scale: %f (%f/%f)\n", host_scale, avp->pfc.get_avg(), hav.pfc.get_avg() ); } pfc = vnpfc * host_scale; return avp->pfc.n > MIN_VERSION_SAMPLES; } int main(int argc, char** argv) { RESULT r; WORKUNIT wu; int retval; int appid=0; FILE* f = fopen("credit_test_unsorted", "w"); if (argc > 1) { min_credit = atof(argv[1]); } retval = config.parse_file(); if (retval) {printf("no config: %d\n", retval); exit(1);} retval = boinc_db.open( config.db_name, config.db_host, config.db_user, config.db_passwd ); if (retval) {printf("no db\n"); exit(1);} read_db(); int n=0, nstats=0; double total_old_credit = 0; double total_new_credit = 0; FILE* in = fopen("credit_test_data", "r"); printf("min credit: %f\n", min_credit); while (!feof(in)) { int c = fscanf(in, "%d %d %d %d %lf %d %lf %lf %lf %lf", &r.id, &r.workunitid, &r.appid, &r.hostid, &r.claimed_credit, &r.app_version_id, &r.elapsed_time, &r.flops_estimate, &r.cpu_time, &wu.rsc_fpops_est ); if (c != 10) break; printf("%d) result %d WU %d host %d old credit %f\n", n, r.id, r.workunitid, r.hostid, r.claimed_credit ); n++; if (r.claimed_credit < min_credit) { printf(" skipping: small credit\n"); continue; } double pfc; bool normal = get_pfc(r, wu, pfc); double new_claimed_credit = pfc * COBBLESTONE_SCALE; if (normal) { printf(" new credit %.2f old credit %.2f\n", new_claimed_credit, r.claimed_credit ); if (accumulate_stats) { total_old_credit += r.claimed_credit; total_new_credit += new_claimed_credit; nstats++; fprintf(f, "%d %d %.2f %.2f\n", r.workunitid, r.id, new_claimed_credit, r.claimed_credit ); } else { printf(" not accumulated\n"); } } else { printf(" new credit (average): %f\n", new_claimed_credit); } if (n%SCALE_AV_PERIOD ==0) { update_av_scales(); } if (n%PRINT_AV_PERIOD ==0) { print_avs(); } if (n%1000 == 0) { fprintf(stderr, "%d\n", n); } if (n >= MAX_JOBS) break; } fclose(f); if (nstats == 0) { printf("Insufficient jobs were read from DB\n"); exit(0); } print_avs(); printf("Average credit: old %.2f new %.2f (ratio %.2f)\n", total_old_credit/nstats, total_new_credit/nstats, total_new_credit/total_old_credit ); //printf("Variance claimed to grant old credit: %f\n", sqrt(variance_old/nstats)); //printf("Variance claimed to grant old credit: %f\n", sqrt(variance_old/nstats)); }