- server and client:

in server->client reply messages and in the client itself,
    move app-planning info from RESULT to APP_VERSION.
    This was necessary to allow anonymous platform info (app_info.xml)
    to specify avg_ncpus, etc.
    e.g., if someone wants to write a multithread version of SETI@home,
    or a GPU/CUDA version,
    they can run it using the anonymous platform mechanism
    and it will be scheduled correctly.

    If a server sends an existing APP_VERSION but with different
    app-planning info, the client will accept and use the new info.

svn path=/trunk/boinc/; revision=14978
This commit is contained in:
David Anderson 2008-03-28 18:00:27 +00:00
parent 38c5fa8108
commit 4554fa5ce3
10 changed files with 133 additions and 75 deletions

View File

@ -2759,3 +2759,28 @@ Charlie Mar 28 2008
BOINCGUIApp.cpp
mac/
SetupSecurity.cpp
David Mar 28 2008
- server and client:
in server->client reply messages and in the client itself,
move app-planning info from RESULT to APP_VERSION.
This was necessary to allow anonymous platform info (app_info.xml)
to specify avg_ncpus, etc.
e.g., if someone wants to write a multithread version of SETI@home,
or a GPU/CUDA version,
they can run it using the anonymous platform mechanism
and it will be scheduled correctly.
If a server sends an existing APP_VERSION but with different
app-planning info, the client will accept and use the new info.
client/
app_start.C
client_types.C,h
cpu_sched.C
db/
boinc_db.h
sched/
sched_send.C
sched_plan.C
server_types.C

View File

@ -507,6 +507,9 @@ int ACTIVE_TASK::start(bool first_time) {
// NOTE: in Windows, stderr is redirected in boinc_init_diagnostics();
cmd_line = exec_path + std::string(" ") + wup->command_line;
if (strlen(app_version->cmdline)) {
cmd_line += std::string(" ") + app_version->cmdline;
}
relative_to_absolute(slot_dir, slotdirpath);
bool success = false;
@ -760,9 +763,9 @@ int ACTIVE_TASK::start(bool first_time) {
#endif
char cmdline[8192];
strcpy(cmdline, wup->command_line.c_str());
if (strlen(result->cmdline)) {
if (strlen(app_version->cmdline)) {
strcat(cmdline, " ");
strcat(cmdline, result->cmdline);
strcat(cmdline, app_version->cmdline);
}
sprintf(buf, "../../%s", exec_path );
if (g_use_sandbox) {

View File

@ -1080,8 +1080,12 @@ int APP_VERSION::parse(MIOFILE& in) {
version_num = 0;
strcpy(platform, "");
strcpy(plan_class, "");
strcpy(cmdline, "");
avg_ncpus = 1;
max_ncpus = 1;
app = NULL;
project = NULL;
flops = gstate.host_info.p_fpops;
while (in.fgets(buf, 256)) {
if (match_tag(buf, "</app_version>")) return 0;
if (parse_str(buf, "<app_name>", app_name, sizeof(app_name))) continue;
@ -1094,6 +1098,10 @@ int APP_VERSION::parse(MIOFILE& in) {
if (parse_str(buf, "<api_version>", api_version, sizeof(api_version))) continue;
if (parse_str(buf, "<platform>", platform, sizeof(platform))) continue;
if (parse_str(buf, "<plan_class>", plan_class, sizeof(plan_class))) continue;
if (parse_double(buf, "<avg_ncpus>", avg_ncpus)) continue;
if (parse_double(buf, "<max_ncpus>", max_ncpus)) continue;
if (parse_double(buf, "<flops>", flops)) continue;
if (parse_str(buf, "<cmdline>", cmdline, sizeof(cmdline))) continue;
if (log_flags.unparsed_xml) {
msg_printf(0, MSG_INFO,
"[unparsed_xml] APP_VERSION::parse(): unrecognized: %s\n", buf
@ -1111,10 +1119,16 @@ int APP_VERSION::write(MIOFILE& out) {
"<app_version>\n"
" <app_name>%s</app_name>\n"
" <version_num>%d</version_num>\n"
" <platform>%s</platform>\n",
" <platform>%s</platform>\n"
" <avg_ncpus>%f</avg_ncpus>\n"
" <max_ncpus>%f</max_ncpus>\n"
" <flops>%f</flops>\n",
app_name,
version_num,
platform
platform,
avg_ncpus,
max_ncpus,
flops
);
if (strlen(plan_class)) {
out.printf(" <plan_class>%s</plan_class>\n", plan_class);
@ -1122,6 +1136,9 @@ int APP_VERSION::write(MIOFILE& out) {
if (strlen(api_version)) {
out.printf(" <api_version>%s</api_version>\n", api_version);
}
if (strlen(cmdline)) {
out.printf(" <cmdline>%s</cmdline>\n", cmdline);
}
for (i=0; i<app_files.size(); i++) {
retval = app_files[i].write(out);
if (retval) return retval;
@ -1410,10 +1427,6 @@ void RESULT::clear() {
version_num = 0;
strcpy(platform, "");
strcpy(plan_class, "");
strcpy(cmdline, "");
avg_ncpus = 1;
max_ncpus = 1;
flops = gstate.host_info.p_fpops;
}
// parse a <result> element from scheduling server.
@ -1431,10 +1444,6 @@ int RESULT::parse_server(MIOFILE& in) {
if (parse_str(buf, "<platform>", platform, sizeof(platform))) continue;
if (parse_str(buf, "<plan_class>", plan_class, sizeof(plan_class))) continue;
if (parse_int(buf, "<version_num>", version_num)) continue;
if (parse_double(buf, "<avg_ncpus>", avg_ncpus)) continue;
if (parse_double(buf, "<max_ncpus>", max_ncpus)) continue;
if (parse_double(buf, "<flops>", flops)) continue;
if (parse_str(buf, "<cmdline>", cmdline, sizeof(cmdline))) continue;
if (match_tag(buf, "<file_ref>")) {
file_ref.parse(in);
output_files.push_back(file_ref);
@ -1498,10 +1507,6 @@ int RESULT::parse_state(MIOFILE& in) {
if (parse_str(buf, "<platform>", platform, sizeof(platform))) continue;
if (parse_str(buf, "<plan_class>", plan_class, sizeof(plan_class))) continue;
if (parse_int(buf, "<version_num>", version_num)) continue;
if (parse_double(buf, "<avg_ncpus>", avg_ncpus)) continue;
if (parse_double(buf, "<max_ncpus>", max_ncpus)) continue;
if (parse_double(buf, "<flops>", flops)) continue;
if (parse_str(buf, "<cmdline>", cmdline, sizeof(cmdline))) continue;
if (log_flags.unparsed_xml) {
msg_printf(0, MSG_INFO,
"[unparsed_xml] RESULT::parse(): unrecognized: %s\n", buf
@ -1523,26 +1528,17 @@ int RESULT::write(MIOFILE& out, bool to_server) {
" <exit_status>%d</exit_status>\n"
" <state>%d</state>\n"
" <platform>%s</platform>\n"
" <version_num>%d</version_num>\n"
" <avg_ncpus>%f</avg_ncpus>\n"
" <max_ncpus>%f</max_ncpus>\n"
" <flops>%f</flops>\n",
" <version_num>%d</version_num>\n",
name,
final_cpu_time,
exit_status,
state(),
platform,
version_num,
avg_ncpus,
max_ncpus,
flops
version_num
);
if (strlen(plan_class)) {
out.printf(" <plan_class>%s</plan_class>\n", plan_class);
}
if (strlen(cmdline)) {
out.printf(" <cmdline>%s</cmdline>\n", cmdline);
}
if (fpops_per_cpu_sec) {
out.printf(" <fpops_per_cpu_sec>%f</fpops_per_cpu_sec>\n", fpops_per_cpu_sec);
}

View File

@ -407,6 +407,12 @@ struct APP_VERSION {
char platform[256];
char plan_class[64];
char api_version[16];
double avg_ncpus;
double max_ncpus;
double flops;
char cmdline[256]; // additional cmdline args
COPROCS coprocs;
APP* app;
PROJECT* project;
std::vector<FILE_REF> app_files;
@ -456,12 +462,7 @@ struct RESULT {
int version_num; // identifies the app used
char plan_class[64];
char platform[256];
char cmdline[256]; // additional cmdline args
APP_VERSION* avp;
double avg_ncpus;
double max_ncpus;
double flops;
COPROCS coprocs;
std::vector<FILE_REF> output_files;
bool ready_to_report;
// we're ready to report this result to the server;

View File

@ -546,7 +546,7 @@ void CLIENT_STATE::schedule_cpus() {
}
ram_left -= atp->procinfo.working_set_size_smoothed;
}
ncpus_used += rp->avg_ncpus;
ncpus_used += rp->avp->avg_ncpus;
rp->project->anticipated_debt -= (rp->project->resource_share / rrs) * expected_pay_off;
rp->project->deadlines_missed--;
@ -595,7 +595,7 @@ void CLIENT_STATE::schedule_cpus() {
}
ram_left -= atp->procinfo.working_set_size_smoothed;
}
ncpus_used += rp->avg_ncpus;
ncpus_used += rp->avp->avg_ncpus;
double xx = (rp->project->resource_share / rrs) * expected_pay_off;
rp->project->anticipated_debt -= xx;
if (log_flags.cpu_sched_debug) {
@ -623,7 +623,7 @@ void CLIENT_STATE::make_running_task_heap(
if (!atp->result->runnable()) continue;
if (atp->scheduler_state != CPU_SCHED_SCHEDULED) continue;
running_tasks.push_back(atp);
ncpus_used += atp->result->avg_ncpus;
ncpus_used += atp->app_version->avg_ncpus;
}
std::make_heap(
@ -702,7 +702,7 @@ bool CLIENT_STATE::enforce_schedule() {
while (ncpus_used > ncpus) {
atp = running_tasks[0];
atp->next_scheduler_state = CPU_SCHED_PREEMPTED;
ncpus_used -= atp->result->avg_ncpus;
ncpus_used -= atp->app_version->avg_ncpus;
std::pop_heap(
running_tasks.begin(),
running_tasks.end(),
@ -762,7 +762,7 @@ bool CLIENT_STATE::enforce_schedule() {
if (atp->procinfo.working_set_size_smoothed > ram_left) {
atp->next_scheduler_state = CPU_SCHED_PREEMPTED;
atp->too_large = true;
ncpus_used -= atp->result->avg_ncpus;
ncpus_used -= atp->app_version->avg_ncpus;
if (log_flags.mem_usage_debug) {
msg_printf(rp->project, MSG_INFO,
"[mem_usage_debug] enforce: result %s can't continue, too big %.2fMB > %.2fMB",
@ -818,7 +818,7 @@ bool CLIENT_STATE::enforce_schedule() {
rp->project->deadlines_missed--;
}
atp->next_scheduler_state = CPU_SCHED_PREEMPTED;
ncpus_used -= atp->result->avg_ncpus;
ncpus_used -= atp->app_version->avg_ncpus;
std::pop_heap(
running_tasks.begin(),
running_tasks.end(),
@ -846,7 +846,7 @@ bool CLIENT_STATE::enforce_schedule() {
if (run_task) {
atp = get_task(rp);
atp->next_scheduler_state = CPU_SCHED_SCHEDULED;
ncpus_used += rp->avg_ncpus;
ncpus_used += atp->app_version->avg_ncpus;
ram_left -= atp->procinfo.working_set_size_smoothed;
}
}
@ -1476,7 +1476,7 @@ void CLIENT_STATE::set_ncpus() {
if (config.ncpus>0) {
ncpus = config.ncpus;
} else if (host_info.p_ncpus>0) {
ncpus = (host_info.p_ncpus * global_prefs.max_ncpus_pct)/100;
ncpus = (int)((host_info.p_ncpus * global_prefs.max_ncpus_pct)/100);
if (ncpus == 0) ncpus = 1;
} else {
ncpus = 1;

View File

@ -664,6 +664,16 @@ int CLIENT_STATE::handle_scheduler_reply(
app, avpp.platform, avpp.version_num, avpp.plan_class
);
if (avp) {
// update performance-related info;
// generally this shouldn't change,
// but if it does it's better to use the new stuff
//
avp->avg_ncpus = avpp.avg_ncpus;
avp->max_ncpus = avpp.max_ncpus;
avp->flops = avpp.flops;
strcpy(avp->cmdline, avpp.cmdline);
avp->coprocs = avpp.coprocs;
// if we had download failures, clear them
//
avp->clear_errors();

View File

@ -44,6 +44,7 @@ extern DB_CONN boinc_db;
// Dummy name for file xfers
#define FILE_MOVER "move_file"
struct BEST_APP_VERSION;
// A compilation target, i.e. a architecture/OS combination.
// The core client will be given only applications with the same platform
@ -107,6 +108,10 @@ struct APP_VERSION {
bool deprecated;
char plan_class[256];
// the following used by scheduler, not in DB
//
BEST_APP_VERSION* bavp;
int write(FILE*);
void clear();
};
@ -439,8 +444,6 @@ struct CREDITED_JOB {
#define ASSIGN_USER 2
#define ASSIGN_TEAM 3
struct BEST_APP_VERSION;
struct RESULT {
int id;
int create_time;

View File

@ -53,7 +53,8 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) {
// the following is for an app that can use anywhere
// from 1 to 64 threads, can control this exactly,
// and whose speedup is .95N
// (so a sequential app will be used if one is available)
// (so on a uniprocessor, we'll use a sequential app
// if one is available)
//
int ncpus, nthreads;
bool bounded;
@ -66,6 +67,26 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) {
sprintf(hu.cmdline, "--nthreads %d", nthreads);
hu.flops = 0.95*sreq.host.p_fpops*nthreads;
return true;
} else if (!strcmp(plan_class, "cuda")) {
// the following is for an app that uses a CUDA GPU
// and some CPU also, and gets 50 GFLOPS total
//
for (unsigned int i=0; i<sreq.coprocs.coprocs.size(); i++) {
COPROC cp = sreq.coprocs.coprocs[i];
if (!strcmp(cp.name, "CUDA")) {
COPROC cu;
strcpy(cu.name, cp.name);
cu.count = 1;
hu.coprocs.coprocs.push_back(cu);
double x = 1e9/sreq.host.p_fpops;
if (x > 1) x = 1;
hu.avg_ncpus = x;
hu.max_ncpus = x;
hu.flops = 5e11;
return true;
}
}
return false;
}
log_messages.printf(MSG_CRITICAL,
"Unknown plan class: %s\n", plan_class

View File

@ -723,6 +723,7 @@ int add_wu_to_reply(
}
reply.insert_app_unique(*app);
av2.bavp = bavp;
reply.insert_app_version_unique(*avp2);
log_messages.printf(MSG_DEBUG,
"[HOST#%d] Sending app_version %s %d %d\n",
@ -993,7 +994,6 @@ int add_result_to_reply(
);
return retval;
}
result.bavp = bavp;
reply.insert_result(result);
reply.wreq.seconds_to_fill -= wu_seconds_filled;
request.estimated_delay += wu_seconds_filled/effective_ncpus(reply.host);

View File

@ -832,6 +832,8 @@ int APP::write(FILE* fout) {
int APP_VERSION::write(FILE* fout) {
char buf[LARGE_BLOB_SIZE];
unsigned int i;
strcpy(buf, xml_doc);
char* p = strstr(buf, "</app_version>");
if (!p) {
@ -845,6 +847,31 @@ int APP_VERSION::write(FILE* fout) {
if (strlen(plan_class)) {
fprintf(fout, " <plan_class>%s</plan_class>\n", plan_class);
}
fprintf(fout,
" <avg_ncpus>%f</avg_ncpus>\n"
" <max_ncpus>%f</max_ncpus>\n"
" <flops>%f</flops>\n",
bavp->host_usage.avg_ncpus,
bavp->host_usage.max_ncpus,
bavp->host_usage.flops
);
if (strlen(bavp->host_usage.cmdline)) {
fprintf(fout,
" <cmdline>%s</cmdline>\n",
bavp->host_usage.cmdline
);
}
for (i=0; i<bavp->host_usage.coprocs.coprocs.size(); i++) {
COPROC& cp = bavp->host_usage.coprocs.coprocs[i];
fprintf(fout,
" <coproc>\n"
" <name>%s</name>\n"
" <count>%d</count>\n"
" </coproc>\n",
cp.name,
cp.count
);
}
fputs("</app_version>\n", fout);
return 0;
}
@ -868,38 +895,10 @@ int RESULT::write_to_client(FILE* fout) {
PLATFORM* pp = ssp->lookup_platform_id(avp->platformid);
fprintf(fout,
" <platform>%s</platform>\n"
" <version_num>%d</version_num>\n",
pp->name, avp->version_num
" <version_num>%d</version_num>\n"
" <plan_class>%s</plan_class>\n",
pp->name, avp->version_num, avp->plan_class
);
if (strlen(avp->plan_class)) {
fprintf(fout,
" <plan_class>%s</plan_class>\n"
" <avg_ncpus>%f</avg_ncpus>\n"
" <max_ncpus>%f</max_ncpus>\n"
" <flops>%f</flops>\n",
avp->plan_class,
bavp->host_usage.avg_ncpus,
bavp->host_usage.max_ncpus,
bavp->host_usage.flops
);
if (strlen(bavp->host_usage.cmdline)) {
fprintf(fout,
" <cmdline>%s</cmdline>\n",
bavp->host_usage.cmdline
);
}
for (i=0; i<bavp->host_usage.coprocs.coprocs.size(); i++) {
COPROC& cp = bavp->host_usage.coprocs.coprocs[i];
fprintf(fout,
" <coproc>\n"
" <name>%s</name>\n"
" <count>%d</count>\n"
" </coproc>\n",
cp.name,
cp.count
);
}
}
}
fputs("</result>\n", fout);
return 0;