From 4554fa5ce3fdf88d45ea67b277a46812ebff4349 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Fri, 28 Mar 2008 18:00:27 +0000 Subject: [PATCH] - server and client: in server->client reply messages and in the client itself, move app-planning info from RESULT to APP_VERSION. This was necessary to allow anonymous platform info (app_info.xml) to specify avg_ncpus, etc. e.g., if someone wants to write a multithread version of SETI@home, or a GPU/CUDA version, they can run it using the anonymous platform mechanism and it will be scheduled correctly. If a server sends an existing APP_VERSION but with different app-planning info, the client will accept and use the new info. svn path=/trunk/boinc/; revision=14978 --- checkin_notes | 25 ++++++++++++++++++ client/app_start.C | 7 +++-- client/client_types.C | 46 +++++++++++++++----------------- client/client_types.h | 11 ++++---- client/cpu_sched.C | 16 ++++++------ client/cs_scheduler.C | 10 +++++++ db/boinc_db.h | 7 +++-- sched/sched_plan.C | 23 +++++++++++++++- sched/sched_send.C | 2 +- sched/server_types.C | 61 +++++++++++++++++++++---------------------- 10 files changed, 133 insertions(+), 75 deletions(-) diff --git a/checkin_notes b/checkin_notes index dd3bc00dcf..94f7d0c4e6 100644 --- a/checkin_notes +++ b/checkin_notes @@ -2759,3 +2759,28 @@ Charlie Mar 28 2008 BOINCGUIApp.cpp mac/ SetupSecurity.cpp + +David Mar 28 2008 + - server and client: + in server->client reply messages and in the client itself, + move app-planning info from RESULT to APP_VERSION. + This was necessary to allow anonymous platform info (app_info.xml) + to specify avg_ncpus, etc. + e.g., if someone wants to write a multithread version of SETI@home, + or a GPU/CUDA version, + they can run it using the anonymous platform mechanism + and it will be scheduled correctly. + + If a server sends an existing APP_VERSION but with different + app-planning info, the client will accept and use the new info. + + client/ + app_start.C + client_types.C,h + cpu_sched.C + db/ + boinc_db.h + sched/ + sched_send.C + sched_plan.C + server_types.C diff --git a/client/app_start.C b/client/app_start.C index fca77bea48..36c3a85454 100644 --- a/client/app_start.C +++ b/client/app_start.C @@ -507,6 +507,9 @@ int ACTIVE_TASK::start(bool first_time) { // NOTE: in Windows, stderr is redirected in boinc_init_diagnostics(); cmd_line = exec_path + std::string(" ") + wup->command_line; + if (strlen(app_version->cmdline)) { + cmd_line += std::string(" ") + app_version->cmdline; + } relative_to_absolute(slot_dir, slotdirpath); bool success = false; @@ -760,9 +763,9 @@ int ACTIVE_TASK::start(bool first_time) { #endif char cmdline[8192]; strcpy(cmdline, wup->command_line.c_str()); - if (strlen(result->cmdline)) { + if (strlen(app_version->cmdline)) { strcat(cmdline, " "); - strcat(cmdline, result->cmdline); + strcat(cmdline, app_version->cmdline); } sprintf(buf, "../../%s", exec_path ); if (g_use_sandbox) { diff --git a/client/client_types.C b/client/client_types.C index 68ce8f1973..c4b53f7376 100644 --- a/client/client_types.C +++ b/client/client_types.C @@ -1080,8 +1080,12 @@ int APP_VERSION::parse(MIOFILE& in) { version_num = 0; strcpy(platform, ""); strcpy(plan_class, ""); + strcpy(cmdline, ""); + avg_ncpus = 1; + max_ncpus = 1; app = NULL; project = NULL; + flops = gstate.host_info.p_fpops; while (in.fgets(buf, 256)) { if (match_tag(buf, "")) return 0; if (parse_str(buf, "", app_name, sizeof(app_name))) continue; @@ -1094,6 +1098,10 @@ int APP_VERSION::parse(MIOFILE& in) { if (parse_str(buf, "", api_version, sizeof(api_version))) continue; if (parse_str(buf, "", platform, sizeof(platform))) continue; if (parse_str(buf, "", plan_class, sizeof(plan_class))) continue; + if (parse_double(buf, "", avg_ncpus)) continue; + if (parse_double(buf, "", max_ncpus)) continue; + if (parse_double(buf, "", flops)) continue; + if (parse_str(buf, "", cmdline, sizeof(cmdline))) continue; if (log_flags.unparsed_xml) { msg_printf(0, MSG_INFO, "[unparsed_xml] APP_VERSION::parse(): unrecognized: %s\n", buf @@ -1111,10 +1119,16 @@ int APP_VERSION::write(MIOFILE& out) { "\n" " %s\n" " %d\n" - " %s\n", + " %s\n" + " %f\n" + " %f\n" + " %f\n", app_name, version_num, - platform + platform, + avg_ncpus, + max_ncpus, + flops ); if (strlen(plan_class)) { out.printf(" %s\n", plan_class); @@ -1122,6 +1136,9 @@ int APP_VERSION::write(MIOFILE& out) { if (strlen(api_version)) { out.printf(" %s\n", api_version); } + if (strlen(cmdline)) { + out.printf(" %s\n", cmdline); + } for (i=0; i element from scheduling server. @@ -1431,10 +1444,6 @@ int RESULT::parse_server(MIOFILE& in) { if (parse_str(buf, "", platform, sizeof(platform))) continue; if (parse_str(buf, "", plan_class, sizeof(plan_class))) continue; if (parse_int(buf, "", version_num)) continue; - if (parse_double(buf, "", avg_ncpus)) continue; - if (parse_double(buf, "", max_ncpus)) continue; - if (parse_double(buf, "", flops)) continue; - if (parse_str(buf, "", cmdline, sizeof(cmdline))) continue; if (match_tag(buf, "")) { file_ref.parse(in); output_files.push_back(file_ref); @@ -1498,10 +1507,6 @@ int RESULT::parse_state(MIOFILE& in) { if (parse_str(buf, "", platform, sizeof(platform))) continue; if (parse_str(buf, "", plan_class, sizeof(plan_class))) continue; if (parse_int(buf, "", version_num)) continue; - if (parse_double(buf, "", avg_ncpus)) continue; - if (parse_double(buf, "", max_ncpus)) continue; - if (parse_double(buf, "", flops)) continue; - if (parse_str(buf, "", cmdline, sizeof(cmdline))) continue; if (log_flags.unparsed_xml) { msg_printf(0, MSG_INFO, "[unparsed_xml] RESULT::parse(): unrecognized: %s\n", buf @@ -1523,26 +1528,17 @@ int RESULT::write(MIOFILE& out, bool to_server) { " %d\n" " %d\n" " %s\n" - " %d\n" - " %f\n" - " %f\n" - " %f\n", + " %d\n", name, final_cpu_time, exit_status, state(), platform, - version_num, - avg_ncpus, - max_ncpus, - flops + version_num ); if (strlen(plan_class)) { out.printf(" %s\n", plan_class); } - if (strlen(cmdline)) { - out.printf(" %s\n", cmdline); - } if (fpops_per_cpu_sec) { out.printf(" %f\n", fpops_per_cpu_sec); } diff --git a/client/client_types.h b/client/client_types.h index ac128aa0f6..3148613791 100644 --- a/client/client_types.h +++ b/client/client_types.h @@ -407,6 +407,12 @@ struct APP_VERSION { char platform[256]; char plan_class[64]; char api_version[16]; + double avg_ncpus; + double max_ncpus; + double flops; + char cmdline[256]; // additional cmdline args + COPROCS coprocs; + APP* app; PROJECT* project; std::vector app_files; @@ -456,12 +462,7 @@ struct RESULT { int version_num; // identifies the app used char plan_class[64]; char platform[256]; - char cmdline[256]; // additional cmdline args APP_VERSION* avp; - double avg_ncpus; - double max_ncpus; - double flops; - COPROCS coprocs; std::vector output_files; bool ready_to_report; // we're ready to report this result to the server; diff --git a/client/cpu_sched.C b/client/cpu_sched.C index 9cb539a149..297663e2f8 100644 --- a/client/cpu_sched.C +++ b/client/cpu_sched.C @@ -546,7 +546,7 @@ void CLIENT_STATE::schedule_cpus() { } ram_left -= atp->procinfo.working_set_size_smoothed; } - ncpus_used += rp->avg_ncpus; + ncpus_used += rp->avp->avg_ncpus; rp->project->anticipated_debt -= (rp->project->resource_share / rrs) * expected_pay_off; rp->project->deadlines_missed--; @@ -595,7 +595,7 @@ void CLIENT_STATE::schedule_cpus() { } ram_left -= atp->procinfo.working_set_size_smoothed; } - ncpus_used += rp->avg_ncpus; + ncpus_used += rp->avp->avg_ncpus; double xx = (rp->project->resource_share / rrs) * expected_pay_off; rp->project->anticipated_debt -= xx; if (log_flags.cpu_sched_debug) { @@ -623,7 +623,7 @@ void CLIENT_STATE::make_running_task_heap( if (!atp->result->runnable()) continue; if (atp->scheduler_state != CPU_SCHED_SCHEDULED) continue; running_tasks.push_back(atp); - ncpus_used += atp->result->avg_ncpus; + ncpus_used += atp->app_version->avg_ncpus; } std::make_heap( @@ -702,7 +702,7 @@ bool CLIENT_STATE::enforce_schedule() { while (ncpus_used > ncpus) { atp = running_tasks[0]; atp->next_scheduler_state = CPU_SCHED_PREEMPTED; - ncpus_used -= atp->result->avg_ncpus; + ncpus_used -= atp->app_version->avg_ncpus; std::pop_heap( running_tasks.begin(), running_tasks.end(), @@ -762,7 +762,7 @@ bool CLIENT_STATE::enforce_schedule() { if (atp->procinfo.working_set_size_smoothed > ram_left) { atp->next_scheduler_state = CPU_SCHED_PREEMPTED; atp->too_large = true; - ncpus_used -= atp->result->avg_ncpus; + ncpus_used -= atp->app_version->avg_ncpus; if (log_flags.mem_usage_debug) { msg_printf(rp->project, MSG_INFO, "[mem_usage_debug] enforce: result %s can't continue, too big %.2fMB > %.2fMB", @@ -818,7 +818,7 @@ bool CLIENT_STATE::enforce_schedule() { rp->project->deadlines_missed--; } atp->next_scheduler_state = CPU_SCHED_PREEMPTED; - ncpus_used -= atp->result->avg_ncpus; + ncpus_used -= atp->app_version->avg_ncpus; std::pop_heap( running_tasks.begin(), running_tasks.end(), @@ -846,7 +846,7 @@ bool CLIENT_STATE::enforce_schedule() { if (run_task) { atp = get_task(rp); atp->next_scheduler_state = CPU_SCHED_SCHEDULED; - ncpus_used += rp->avg_ncpus; + ncpus_used += atp->app_version->avg_ncpus; ram_left -= atp->procinfo.working_set_size_smoothed; } } @@ -1476,7 +1476,7 @@ void CLIENT_STATE::set_ncpus() { if (config.ncpus>0) { ncpus = config.ncpus; } else if (host_info.p_ncpus>0) { - ncpus = (host_info.p_ncpus * global_prefs.max_ncpus_pct)/100; + ncpus = (int)((host_info.p_ncpus * global_prefs.max_ncpus_pct)/100); if (ncpus == 0) ncpus = 1; } else { ncpus = 1; diff --git a/client/cs_scheduler.C b/client/cs_scheduler.C index 42a0f8870b..5f898b36e2 100644 --- a/client/cs_scheduler.C +++ b/client/cs_scheduler.C @@ -664,6 +664,16 @@ int CLIENT_STATE::handle_scheduler_reply( app, avpp.platform, avpp.version_num, avpp.plan_class ); if (avp) { + // update performance-related info; + // generally this shouldn't change, + // but if it does it's better to use the new stuff + // + avp->avg_ncpus = avpp.avg_ncpus; + avp->max_ncpus = avpp.max_ncpus; + avp->flops = avpp.flops; + strcpy(avp->cmdline, avpp.cmdline); + avp->coprocs = avpp.coprocs; + // if we had download failures, clear them // avp->clear_errors(); diff --git a/db/boinc_db.h b/db/boinc_db.h index 39978b0b78..7a58513b69 100644 --- a/db/boinc_db.h +++ b/db/boinc_db.h @@ -44,6 +44,7 @@ extern DB_CONN boinc_db; // Dummy name for file xfers #define FILE_MOVER "move_file" +struct BEST_APP_VERSION; // A compilation target, i.e. a architecture/OS combination. // The core client will be given only applications with the same platform @@ -107,6 +108,10 @@ struct APP_VERSION { bool deprecated; char plan_class[256]; + // the following used by scheduler, not in DB + // + BEST_APP_VERSION* bavp; + int write(FILE*); void clear(); }; @@ -439,8 +444,6 @@ struct CREDITED_JOB { #define ASSIGN_USER 2 #define ASSIGN_TEAM 3 -struct BEST_APP_VERSION; - struct RESULT { int id; int create_time; diff --git a/sched/sched_plan.C b/sched/sched_plan.C index 97c3ec4120..f18e8ac28e 100644 --- a/sched/sched_plan.C +++ b/sched/sched_plan.C @@ -53,7 +53,8 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) { // the following is for an app that can use anywhere // from 1 to 64 threads, can control this exactly, // and whose speedup is .95N - // (so a sequential app will be used if one is available) + // (so on a uniprocessor, we'll use a sequential app + // if one is available) // int ncpus, nthreads; bool bounded; @@ -66,6 +67,26 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) { sprintf(hu.cmdline, "--nthreads %d", nthreads); hu.flops = 0.95*sreq.host.p_fpops*nthreads; return true; + } else if (!strcmp(plan_class, "cuda")) { + // the following is for an app that uses a CUDA GPU + // and some CPU also, and gets 50 GFLOPS total + // + for (unsigned int i=0; i 1) x = 1; + hu.avg_ncpus = x; + hu.max_ncpus = x; + hu.flops = 5e11; + return true; + } + } + return false; } log_messages.printf(MSG_CRITICAL, "Unknown plan class: %s\n", plan_class diff --git a/sched/sched_send.C b/sched/sched_send.C index ad7cce86e8..d3810470ae 100644 --- a/sched/sched_send.C +++ b/sched/sched_send.C @@ -723,6 +723,7 @@ int add_wu_to_reply( } reply.insert_app_unique(*app); + av2.bavp = bavp; reply.insert_app_version_unique(*avp2); log_messages.printf(MSG_DEBUG, "[HOST#%d] Sending app_version %s %d %d\n", @@ -993,7 +994,6 @@ int add_result_to_reply( ); return retval; } - result.bavp = bavp; reply.insert_result(result); reply.wreq.seconds_to_fill -= wu_seconds_filled; request.estimated_delay += wu_seconds_filled/effective_ncpus(reply.host); diff --git a/sched/server_types.C b/sched/server_types.C index 9d1cf44bfb..743a6bffec 100644 --- a/sched/server_types.C +++ b/sched/server_types.C @@ -832,6 +832,8 @@ int APP::write(FILE* fout) { int APP_VERSION::write(FILE* fout) { char buf[LARGE_BLOB_SIZE]; + unsigned int i; + strcpy(buf, xml_doc); char* p = strstr(buf, ""); if (!p) { @@ -845,6 +847,31 @@ int APP_VERSION::write(FILE* fout) { if (strlen(plan_class)) { fprintf(fout, " %s\n", plan_class); } + fprintf(fout, + " %f\n" + " %f\n" + " %f\n", + bavp->host_usage.avg_ncpus, + bavp->host_usage.max_ncpus, + bavp->host_usage.flops + ); + if (strlen(bavp->host_usage.cmdline)) { + fprintf(fout, + " %s\n", + bavp->host_usage.cmdline + ); + } + for (i=0; ihost_usage.coprocs.coprocs.size(); i++) { + COPROC& cp = bavp->host_usage.coprocs.coprocs[i]; + fprintf(fout, + " \n" + " %s\n" + " %d\n" + " \n", + cp.name, + cp.count + ); + } fputs("\n", fout); return 0; } @@ -868,38 +895,10 @@ int RESULT::write_to_client(FILE* fout) { PLATFORM* pp = ssp->lookup_platform_id(avp->platformid); fprintf(fout, " %s\n" - " %d\n", - pp->name, avp->version_num + " %d\n" + " %s\n", + pp->name, avp->version_num, avp->plan_class ); - if (strlen(avp->plan_class)) { - fprintf(fout, - " %s\n" - " %f\n" - " %f\n" - " %f\n", - avp->plan_class, - bavp->host_usage.avg_ncpus, - bavp->host_usage.max_ncpus, - bavp->host_usage.flops - ); - if (strlen(bavp->host_usage.cmdline)) { - fprintf(fout, - " %s\n", - bavp->host_usage.cmdline - ); - } - for (i=0; ihost_usage.coprocs.coprocs.size(); i++) { - COPROC& cp = bavp->host_usage.coprocs.coprocs[i]; - fprintf(fout, - " \n" - " %s\n" - " %d\n" - " \n", - cp.name, - cp.count - ); - } - } } fputs("\n", fout); return 0;