// credit_test [--app N] // // Simulate the new credit system for the N most recent jobs // in project's database, and give a comparison of new and old systems. // Doesn't modify anything. // // --app N: restrict to jobs from app with ID N #include #include #include "sched_config.h" #include "boinc_db.h" #define NJOBS 100000 // scan this many jobs #define MAX_CLAIMED_CREDIT 1e3 // Ignore jobs whose claimed (old) credit is greater than this. // Rejects jobs with garbage values. #define MIN_CLAIMED_CREDIT 50.0 // Ignore jobs whose claimed (old) credit is less than this. // Small jobs are noisy. #define COBBLESTONE_SCALE 100/86400e9 // FLOPS to cobblestones #define PRINT_AV_PERIOD 100 #define SCALE_AV_PERIOD 20 #define MIN_HOST_SCALE_SAMPLES 10 // don't use host scaling unless have this many samples for host #define MIN_SCALE_SAMPLES 100 // don't update a version's scale unless it has this many samples, // and don't accumulate stats until this occurs #define RSC_TYPE_CPU -1 #define RSC_TYPE_CUDA -2 #define RSC_TYPE_ATI -3 // guess (by looking at stderr_out) which type of app processed this job. // This is needed for jobs from pre-6.10 clients, // where the client doesn't report this. // THIS IS PROJECT-SPECIFIC: YOU'LL NEED TO EDIT THIS // int get_rsc_type(RESULT& r) { if (strstr(r.stderr_out, "CUDA")) return RSC_TYPE_CUDA; if (strstr(r.stderr_out, "ATI")) return RSC_TYPE_ATI; return RSC_TYPE_CPU; } inline const char* rsc_type_name(int t) { switch (t) { case RSC_TYPE_CPU: return "CPU"; case RSC_TYPE_CUDA: return "NVIDIA"; case RSC_TYPE_ATI: return "ATI"; } } struct HOST_APP_VERSION { int host_id; int app_version_id; // -1 means anon platform AVERAGE vnpfc; AVERAGE et; }; vector app_versions; vector apps; vector host_app_versions; vector platforms; int windows_platformid; int linux_platformid; int mac_platformid; bool accumulate_stats = false; void read_db() { DB_APP app; DB_APP_VERSION av; while (!app.enumerate("where deprecated=0")) { app.vnpfc.clear(); apps.push_back(app); } while (!av.enumerate("where deprecated=0 order by id desc")) { av.pfc.clear(); av.pfc_scale_factor = 1; //if (strstr(av.plan_class, "cuda")) { // av.pfc_scale_factor = 0.15; //} app_versions.push_back(av); } DB_PLATFORM platform; while (!platform.enumerate("")) { platforms.push_back(platform); if (!strcmp(platform.name, "windows_intelx86")) { windows_platformid = platform.id; } if (!strcmp(platform.name, "i686-pc-linux-gnu")) { linux_platformid = platform.id; } if (!strcmp(platform.name, "i686-apple-darwin")) { mac_platformid = platform.id; } } } PLATFORM* lookup_platform(int id) { unsigned int i; for (i=0; iname, av.plan_class, av.pfc_scale_factor ); print_average(av.pfc); printf("\n"); } printf("-------------\n"); } void lookup_host(DB_HOST& h, int id) { int retval = h.lookup_id(id); if (retval) { printf("can't find host %d\n", id); exit(1); } } // update app version scale factors // void update_av_scales() { unsigned int i, j; printf("----- updating scales --------\n"); for (i=0; iname, av.plan_class ); printf(" n: %0.f avg PFC: %0.fG new scale: %f\n", av.pfc.n, av.pfc.get_mean()/1e9, av.pfc_scale_factor ); } accumulate_stats = true; } printf("-------------\n"); } int main(int argc, char** argv) { DB_RESULT r; char clause[256], subclause[256]; int retval; int appid=0; FILE* f = fopen("credit_test_unsorted", "w"); if (argc >= 3 && !strcmp(argv[1], "--app")) { appid = atoi(argv[2]); } retval = config.parse_file(); if (retval) {printf("no config: %d\n", retval); exit(1);} //strcpy(config.db_host, "jocelyn"); retval = boinc_db.open( config.db_name, config.db_host, config.db_user, config.db_passwd ); if (retval) {printf("no db\n"); exit(1);} read_db(); strcpy(subclause, ""); if (appid) { sprintf(subclause, "and appid=%d", appid); } sprintf(clause, "where server_state=%d and outcome=%d and claimed_credit<%f and claimed_credit>%f %s order by id desc limit %d", RESULT_SERVER_STATE_OVER, RESULT_OUTCOME_SUCCESS, MAX_CLAIMED_CREDIT, MIN_CLAIMED_CREDIT, subclause, NJOBS ); int n=0, nstats=0, rsc_type; double total_old_credit = 0; double total_new_credit = 0; printf("DB query: select * from result %s\n", clause); while (!r.enumerate(clause)) { printf("%d) result %d WU %d host %d\n", n, r.id, r.workunitid, r.hostid); // Compute or estimate peak FLOP count (PFC). // This is done as follows: // if new client (reports elapsed time etc.) // if anonymous platform // user may not have set flops_estimate correctly. // So, if it looks like CUDA app (from stderr) // use the CUDA average PFC (but don't update the CUDA avg) // Otherwise use CPU speed // else // use ET*flops_est // else // if it looks like CUDA app, use CUDA avg // else use CPU // double pfc; APP_VERSION* avp = NULL; DB_HOST host; if (r.elapsed_time && r.flops_estimate && r.app_version_id) { // new client if (r.app_version_id < 0) { // user is using anon platform app. // Don't trust the FLOPS estimate. // If it's a CPU app, use benchmarks*time. // Otherwise use mean PFC for (resource type, app) // rsc_type = get_rsc_type(r); printf(" anonymous platform, rsc type %s\n", rsc_type_name(rsc_type) ); if (rsc_type == RSC_TYPE_CPU) { lookup_host(host, r.hostid); pfc = host.p_fpops * r.elapsed_time; } else { avp = lookup_av_anon(r.appid, rsc_type); if (!avp) { printf(" no version for resource type %s; skipping\n", rsc_type_name(rsc_type) ); continue; } if (avp->pfc.n < 10) { printf( " app version %d has too few samples %f; skipping\n", avp->id, avp->pfc.n ); continue; } pfc = avp->pfc.get_mean(); printf(" using mean PFC: %.0fG\n", pfc/1e9); } printf(" PFC: %.0fG raw credit: %.2f\n", pfc/1e9, pfc*COBBLESTONE_SCALE ); } else { pfc = r.elapsed_time * r.flops_estimate; avp = lookup_av(r.app_version_id); printf(" sec: %.0f GFLOPS: %.0f PFC: %.0fG raw credit: %.2f\n", r.elapsed_time, r.flops_estimate/1e9, pfc/1e9, pfc*COBBLESTONE_SCALE ); avp->pfc.update(pfc); } } else { // old client rsc_type = get_rsc_type(r); if (rsc_type != RSC_TYPE_CPU) { // ignore GPU jobs since old client doesn't report elapsed time printf(" old client, GPU app: skipping\n"); continue; } printf(" (old client)\n"); lookup_host(host, r.hostid); avp = lookup_av_old(r.appid, host); if (!avp) continue; r.elapsed_time = r.cpu_time; r.flops_estimate = host.p_fpops; pfc = r.elapsed_time * r.flops_estimate; printf(" sec: %.0f GFLOPS: %.0f PFC: %.0fG raw credit: %.2f\n", r.elapsed_time, r.flops_estimate/1e9, pfc/1e9, pfc*COBBLESTONE_SCALE ); avp->pfc.update(pfc); } APP& app = lookup_app(r.appid); double vnpfc = pfc; if (avp) { vnpfc *= avp->pfc_scale_factor; PLATFORM* p = lookup_platform(avp->platformid); printf(" version scale (%s %s): %f\n", p->name, avp->plan_class, avp->pfc_scale_factor ); } // host normalization HOST_APP_VERSION& hav = lookup_host_app_version( r.hostid, avp?avp->id:-1 ); double host_scale = 1; // only apply it if have at MIN_HOST_SCALE_SAMPLES if (hav.vnpfc.n >= MIN_HOST_SCALE_SAMPLES) { host_scale = app.vnpfc.get_mean()/hav.vnpfc.get_mean(); // if (host_scale > 1) host_scale = 1; printf(" host scale: %f\n", host_scale); } double claimed_flops = vnpfc * host_scale; double new_claimed_credit = claimed_flops * COBBLESTONE_SCALE; app.vnpfc.update(vnpfc); hav.vnpfc.update(vnpfc); printf(" new credit %.2f old credit %.2f\n", new_claimed_credit, r.claimed_credit ); if (accumulate_stats) { total_old_credit += r.claimed_credit; total_new_credit += new_claimed_credit; nstats++; fprintf(f, "%d %d %.2f %.2f\n", r.workunitid, r.id, new_claimed_credit, r.claimed_credit); } n++; if (n%SCALE_AV_PERIOD ==0) { update_av_scales(); } if (n%PRINT_AV_PERIOD ==0) { print_avs(); } if (n%1000 == 0) { fprintf(stderr, "%d\n", n); } } fclose(f); if (nstats == 0) { printf("Insufficient jobs were read from DB\n"); exit(0); } print_avs(); printf("Average credit: old %.2f new %.2f (ratio %.2f)\n", total_old_credit/nstats, total_new_credit/nstats, total_new_credit/total_old_credit ); //printf("Variance claimed to grant old credit: %f\n", sqrt(variance_old/nstats)); //printf("Variance claimed to grant old credit: %f\n", sqrt(variance_old/nstats)); }