diff --git a/api/boinc_api.C b/api/boinc_api.C
index 562aadffa2..b2ea2ad696 100644
--- a/api/boinc_api.C
+++ b/api/boinc_api.C
@@ -202,18 +202,13 @@ static int setup_shared_mem() {
return 0;
}
-// Return CPU time of worker thread (and optionally others)
-// This may be called from any thread
+// Return CPU time of process.
//
double boinc_worker_thread_cpu_time() {
double cpu;
#ifdef _WIN32
int retval;
- if (options.all_threads_cpu_time) {
- retval = boinc_process_cpu_time(cpu);
- } else {
- retval = boinc_thread_cpu_time(worker_thread_handle, cpu);
- }
+ retval = boinc_process_cpu_time(cpu);
if (retval) {
cpu = nrunning_ticks * TIMER_PERIOD; // for Win9x
}
diff --git a/api/boinc_api.h b/api/boinc_api.h
index 9af1fe3334..7408500a40 100644
--- a/api/boinc_api.h
+++ b/api/boinc_api.h
@@ -50,9 +50,6 @@ typedef struct BOINC_OPTIONS {
// if heartbeat fail, or get process control msg, take
// direction action (exit, suspend, resume).
// Otherwise just set flag in BOINC status
- int all_threads_cpu_time;
- // count the CPU time of all threads
- // (for apps that have multiple worker threads)
int worker_thread_stack_size;
// if nonzero, the worker thread stack size limit
int backwards_compatible_graphics;
@@ -148,7 +145,6 @@ inline void boinc_options_defaults(BOINC_OPTIONS& b) {
b.handle_process_control = 1;
b.send_status_msgs = 1;
b.direct_process_action = 1;
- b.all_threads_cpu_time = 0;
b.worker_thread_stack_size = 0;
b.backwards_compatible_graphics = 1;
}
diff --git a/checkin_notes b/checkin_notes
index e93c0b1f67..9d2b7cc812 100644
--- a/checkin_notes
+++ b/checkin_notes
@@ -2556,13 +2556,15 @@ David Mar 19 2008
sched_send.C
Charlie Mar 20 2008
- - Client: fix a compiler warning which indicated a real logic error (variable
- used uninitialized).
- - Mac: More work on backtrace code: run atos utility via a bidirectional pipe
- instead of backtrace_symbols_fd() API to get better symbols in backtrace.
- Set visibility of all variables in Client and Manager back to hidden (as
- before) to reduce size of executables, since atos utility can use hidden
- symbols. This also involves return to previous wxWidgets build script.
+ - Client: fix a compiler warning which indicated a real logic error
+ (variable used uninitialized).
+ - Mac: More work on backtrace code: run atos utility via a bidirectional
+ pipe instead of backtrace_symbols_fd() API to get better symbols
+ in backtrace.
+ Set visibility of all variables in Client and Manager back to hidden
+ (as before) to reduce size of executables,
+ since atos utility can use hidden symbols.
+ This also involves return to previous wxWidgets build script.
client/
app_start.C
@@ -2641,3 +2643,65 @@ Rom Mar 27 2008
boinccas.dll
boinccas95.dll
+David Mar 27 2008
+ Changes for multithread app support:
+
+ - update_versions: use __ (not :) as separator for plan class
+ - client: add plan_class to APP_VERSION;
+ an app version is now identified by platform/version/plan_class
+ - client CPU scheduler: don't assume apps use 1 CPU
+ - client: add avg_ncpus, max_cpus, flops, cmdline to RESULT
+ - scheduler: implement app planning scheme
+
+ Other changes:
+
+ - client: if symlink() fails, make a XML soft link instead
+ (for Unix running off a FAT32 FS)
+ - client: don't accept nonpositive resource share from AMS
+ - daemons and DB: check for error returns from enumerations,
+ and exit if so. Thus, if the MySQL server goes down,
+ all the daemons will soon exit.
+ The cron script will restart them every 5 min,
+ so when the DB server comes back up so will the project.
+ - web: show empty max CPU % as ---
+ - API: get rid of all_threads_cpu_time option (always the case now)
+
+ api/
+ boinc_api.C,h
+ client/
+ acct_mgr.C
+ app.C,h
+ app_start.C
+ client_state.C,h
+ client_types.C,h
+ cpu_sched.C
+ cs_scheduler.C
+ cs_statefile.C
+ configure.ac
+ db/
+ boinc_db.C,h
+ html/inc/
+ countries.inc
+ prefs.inc
+ lib/
+ app_ipc.C,h
+ error_numbers.h
+ shmem.C
+ sched/
+ assimilator.C
+ db_purge.C
+ feeder.C
+ file_deleter.C
+ make_work.C
+ message_handler.C
+ sched_array.C
+ sched_assign.C
+ sched_locality.C
+ sched_plan.C,h
+ sched_resend.C
+ sched_send.C,h
+ server_types.C,h
+ validator.C
+ tools/
+ update_versions
+ version.h
diff --git a/client/acct_mgr.C b/client/acct_mgr.C
index 0341c0a1dc..9b13f97c8f 100644
--- a/client/acct_mgr.C
+++ b/client/acct_mgr.C
@@ -234,7 +234,13 @@ int AM_ACCOUNT::parse(XML_PARSER& xp) {
continue;
}
if (xp.parse_double(tag, "resource_share", dtemp)) {
- resource_share.set(dtemp);
+ if (dtemp > 0) {
+ resource_share.set(dtemp);
+ } else {
+ msg_printf(NULL, MSG_INFO,
+ "Resource share out of range: %f", dtemp
+ );
+ }
continue;
}
if (log_flags.unparsed_xml) {
diff --git a/client/app.C b/client/app.C
index 780ce25079..93dbf51da6 100644
--- a/client/app.C
+++ b/client/app.C
@@ -112,7 +112,6 @@ ACTIVE_TASK::ACTIVE_TASK() {
too_large = false;
needs_shmem = false;
want_network = 0;
- nthreads = 1;
memset(&procinfo, 0, sizeof(procinfo));
#ifdef _WIN32
pid_handle = 0;
@@ -554,7 +553,8 @@ int ACTIVE_TASK::parse(MIOFILE& fin) {
wup = result->wup;
app_version = gstate.lookup_app_version(
- result->app, result->platform, result->version_num
+ result->app, result->platform, result->version_num,
+ result->plan_class
);
if (!app_version) {
msg_printf(
diff --git a/client/app.h b/client/app.h
index fdc9d7224f..9ef5e7fc72 100644
--- a/client/app.h
+++ b/client/app.h
@@ -109,8 +109,6 @@ public:
double abort_time;
// when we sent an abort message to this app
// kill it 5 seconds later if it doesn't exit
- int nthreads;
- // current # of threads in app (assumed to be 1 by default)
APP_CLIENT_SHM app_client_shm; // core/app shared mem
MSG_QUEUE graphics_request_queue;
MSG_QUEUE process_control_queue;
diff --git a/client/app_start.C b/client/app_start.C
index 61d73b21a8..fca77bea48 100644
--- a/client/app_start.C
+++ b/client/app_start.C
@@ -211,6 +211,19 @@ int ACTIVE_TASK::write_app_init_file() {
return retval;
}
+static int make_soft_link(PROJECT* project, char* link_path, char* rel_file_path) {
+ FILE *fp = boinc_fopen(link_path, "w");
+ if (!fp) {
+ msg_printf(project, MSG_INTERNAL_ERROR,
+ "Can't create link file %s", link_path
+ );
+ return ERR_FOPEN;
+ }
+ fprintf(fp, "%s\n", rel_file_path);
+ fclose(fp);
+ return 0;
+}
+
// set up a file reference, given a slot dir and project dir.
// This means:
// 1) copy the file to slot dir, if reference is by copy
@@ -251,23 +264,17 @@ static int setup_file(
}
#ifdef _WIN32
- FILE *fp = boinc_fopen(link_path, "w");
- if (!fp) {
- msg_printf(project, MSG_INTERNAL_ERROR,
- "Can't open link file %s", link_path
- );
- return ERR_FOPEN;
- }
- fprintf(fp, "%s\n", rel_file_path);
- fclose(fp);
+ retval = make_soft_link(project, link_path, rel_file_path);
+ if (retval) return retval;
#else
retval = symlink(rel_file_path, link_path);
if (retval) {
- msg_printf(project, MSG_INTERNAL_ERROR,
- "Can't symlink %s to %s: %d", rel_file_path, link_path, retval
- );
- perror("symlink");
- return ERR_SYMLINK;
+ // A Unix system can't make symlinks if the filesystem if FAT32
+ // (e.g. external USB disk).
+ // Try making a soft link instead.
+ //
+ retval = make_soft_link(project, link_path, rel_file_path);
+ if (retval) return retval;
}
#endif
#ifdef SANDBOX
@@ -615,6 +622,10 @@ int ACTIVE_TASK::start(bool first_time) {
argv[0] = exec_name;
char cmdline[8192];
strcpy(cmdline, wup->command_line.c_str());
+ if (strlen(result->cmdline)) {
+ strcat(cmdline, " ");
+ strcat(cmdline, result->cmdline);
+ }
parse_command_line(cmdline, argv+1);
if (log_flags.task_debug) {
debug_print_argv(argv);
@@ -749,6 +760,10 @@ int ACTIVE_TASK::start(bool first_time) {
#endif
char cmdline[8192];
strcpy(cmdline, wup->command_line.c_str());
+ if (strlen(result->cmdline)) {
+ strcat(cmdline, " ");
+ strcat(cmdline, result->cmdline);
+ }
sprintf(buf, "../../%s", exec_path );
if (g_use_sandbox) {
char switcher_path[100];
diff --git a/client/client_state.C b/client/client_state.C
index 98b4dd3f64..c4074a58c1 100644
--- a/client/client_state.C
+++ b/client/client_state.C
@@ -665,7 +665,7 @@ WORKUNIT* CLIENT_STATE::lookup_workunit(PROJECT* p, const char* name) {
}
APP_VERSION* CLIENT_STATE::lookup_app_version(
- APP* app, char* platform, int version_num
+ APP* app, char* platform, int version_num, char* plan_class
) {
for (unsigned int i=0; iplatform, platform)) continue;
+ if (strcmp(avp->plan_class, plan_class)) continue;
return avp;
}
return 0;
@@ -722,10 +723,10 @@ int CLIENT_STATE::link_app_version(PROJECT* p, APP_VERSION* avp) {
}
avp->app = app;
- if (lookup_app_version(app, avp->platform, avp->version_num)) {
+ if (lookup_app_version(app, avp->platform, avp->version_num, avp->plan_class)) {
msg_printf(p, MSG_INTERNAL_ERROR,
- "State file error: duplicate app version: %s %s %d",
- avp->app_name, avp->platform, avp->version_num
+ "State file error: duplicate app version: %s %s %d %s",
+ avp->app_name, avp->platform, avp->version_num, avp->plan_class
);
return ERR_NOT_UNIQUE;
}
diff --git a/client/client_state.h b/client/client_state.h
index f0ed17f829..791d6035e2 100644
--- a/client/client_state.h
+++ b/client/client_state.h
@@ -222,7 +222,9 @@ public:
FILE_INFO* lookup_file_info(PROJECT*, const char* name);
RESULT* lookup_result(PROJECT*, const char*);
WORKUNIT* lookup_workunit(PROJECT*, const char*);
- APP_VERSION* lookup_app_version(APP*, char* platform, int ver);
+ APP_VERSION* lookup_app_version(
+ APP*, char* platform, int ver, char* plan_class
+ );
int detach_project(PROJECT*);
int report_result_error(RESULT&, const char *format, ...);
int reset_project(PROJECT*, bool detaching);
@@ -262,7 +264,7 @@ private:
bool enforce_schedule();
bool no_work_for_a_cpu();
void rr_simulation();
- void make_running_task_heap(vector&);
+ void make_running_task_heap(vector&, double&);
void print_deadline_misses();
public:
double retry_shmem_time;
diff --git a/client/client_types.C b/client/client_types.C
index 36738801d3..68ce8f1973 100644
--- a/client/client_types.C
+++ b/client/client_types.C
@@ -1079,6 +1079,7 @@ int APP_VERSION::parse(MIOFILE& in) {
strcpy(api_version, "");
version_num = 0;
strcpy(platform, "");
+ strcpy(plan_class, "");
app = NULL;
project = NULL;
while (in.fgets(buf, 256)) {
@@ -1092,6 +1093,7 @@ int APP_VERSION::parse(MIOFILE& in) {
if (parse_int(buf, "", version_num)) continue;
if (parse_str(buf, "", api_version, sizeof(api_version))) continue;
if (parse_str(buf, "", platform, sizeof(platform))) continue;
+ if (parse_str(buf, "", plan_class, sizeof(plan_class))) continue;
if (log_flags.unparsed_xml) {
msg_printf(0, MSG_INFO,
"[unparsed_xml] APP_VERSION::parse(): unrecognized: %s\n", buf
@@ -1114,6 +1116,9 @@ int APP_VERSION::write(MIOFILE& out) {
version_num,
platform
);
+ if (strlen(plan_class)) {
+ out.printf(" %s\n", plan_class);
+ }
if (strlen(api_version)) {
out.printf(" %s\n", api_version);
}
@@ -1404,6 +1409,11 @@ void RESULT::clear() {
project = NULL;
version_num = 0;
strcpy(platform, "");
+ strcpy(plan_class, "");
+ strcpy(cmdline, "");
+ avg_ncpus = 1;
+ max_ncpus = 1;
+ flops = gstate.host_info.p_fpops;
}
// parse a element from scheduling server.
@@ -1419,7 +1429,12 @@ int RESULT::parse_server(MIOFILE& in) {
if (parse_str(buf, "", wu_name, sizeof(wu_name))) continue;
if (parse_double(buf, "", report_deadline)) continue;
if (parse_str(buf, "", platform, sizeof(platform))) continue;
+ if (parse_str(buf, "", plan_class, sizeof(plan_class))) continue;
if (parse_int(buf, "", version_num)) continue;
+ if (parse_double(buf, "", avg_ncpus)) continue;
+ if (parse_double(buf, "", max_ncpus)) continue;
+ if (parse_double(buf, "", flops)) continue;
+ if (parse_str(buf, "", cmdline, sizeof(cmdline))) continue;
if (match_tag(buf, "")) {
file_ref.parse(in);
output_files.push_back(file_ref);
@@ -1481,7 +1496,12 @@ int RESULT::parse_state(MIOFILE& in) {
if (parse_double(buf, "", intops_per_cpu_sec)) continue;
if (parse_double(buf, "", intops_cumulative)) continue;
if (parse_str(buf, "", platform, sizeof(platform))) continue;
+ if (parse_str(buf, "", plan_class, sizeof(plan_class))) continue;
if (parse_int(buf, "", version_num)) continue;
+ if (parse_double(buf, "", avg_ncpus)) continue;
+ if (parse_double(buf, "", max_ncpus)) continue;
+ if (parse_double(buf, "", flops)) continue;
+ if (parse_str(buf, "", cmdline, sizeof(cmdline))) continue;
if (log_flags.unparsed_xml) {
msg_printf(0, MSG_INFO,
"[unparsed_xml] RESULT::parse(): unrecognized: %s\n", buf
@@ -1503,14 +1523,26 @@ int RESULT::write(MIOFILE& out, bool to_server) {
" %d\n"
" %d\n"
" %s\n"
- " %d\n",
+ " %d\n"
+ " %f\n"
+ " %f\n"
+ " %f\n",
name,
final_cpu_time,
exit_status,
state(),
platform,
- version_num
+ version_num,
+ avg_ncpus,
+ max_ncpus,
+ flops
);
+ if (strlen(plan_class)) {
+ out.printf(" %s\n", plan_class);
+ }
+ if (strlen(cmdline)) {
+ out.printf(" %s\n", cmdline);
+ }
if (fpops_per_cpu_sec) {
out.printf(" %f\n", fpops_per_cpu_sec);
}
diff --git a/client/client_types.h b/client/client_types.h
index 1ece334e55..ac128aa0f6 100644
--- a/client/client_types.h
+++ b/client/client_types.h
@@ -34,6 +34,7 @@
#include "md5_file.h"
#include "hostinfo.h"
+#include "coproc.h"
#include "miofile.h"
#define P_LOW 1
@@ -404,6 +405,7 @@ struct APP_VERSION {
char app_name[256];
int version_num;
char platform[256];
+ char plan_class[64];
char api_version[16];
APP* app;
PROJECT* project;
@@ -452,8 +454,14 @@ struct RESULT {
char wu_name[256];
double report_deadline;
int version_num; // identifies the app used
+ char plan_class[64];
char platform[256];
+ char cmdline[256]; // additional cmdline args
APP_VERSION* avp;
+ double avg_ncpus;
+ double max_ncpus;
+ double flops;
+ COPROCS coprocs;
std::vector output_files;
bool ready_to_report;
// we're ready to report this result to the server;
diff --git a/client/cpu_sched.C b/client/cpu_sched.C
index e70ca26a3a..e532653deb 100644
--- a/client/cpu_sched.C
+++ b/client/cpu_sched.C
@@ -513,9 +513,8 @@ void CLIENT_STATE::schedule_cpus() {
#ifdef SIM
if (!cpu_sched_rr_only) {
#endif
- int ncpus_used = 0;
- //while (ncpus_used < ncpus) {
- while ((int)ordered_scheduled_results.size() < ncpus) {
+ double ncpus_used = 0;
+ while (ncpus_used < ncpus) {
rp = earliest_deadline_result();
if (!rp) break;
rp->already_selected = true;
@@ -545,12 +544,8 @@ void CLIENT_STATE::schedule_cpus() {
atp->needs_shmem = false;
}
ram_left -= atp->procinfo.working_set_size_smoothed;
- ncpus_used += atp->nthreads;
- } else {
- // if we haven't run the app yet, assume it has one thread
- //
- ncpus_used++;
}
+ ncpus_used += rp->avg_ncpus;
rp->project->anticipated_debt -= (rp->project->resource_share / rrs) * expected_pay_off;
rp->project->deadlines_missed--;
@@ -569,7 +564,7 @@ void CLIENT_STATE::schedule_cpus() {
// Next, choose results from projects with large debt
//
- while ((int)ordered_scheduled_results.size() < ncpus) {
+ while (ncpus_used < ncpus) {
assign_results_to_projects();
rp = largest_debt_project_best_result();
if (!rp) break;
@@ -599,6 +594,7 @@ void CLIENT_STATE::schedule_cpus() {
}
ram_left -= atp->procinfo.working_set_size_smoothed;
}
+ ncpus_used += rp->avg_ncpus;
double xx = (rp->project->resource_share / rrs) * expected_pay_off;
rp->project->anticipated_debt -= xx;
if (log_flags.cpu_sched_debug) {
@@ -614,17 +610,19 @@ void CLIENT_STATE::schedule_cpus() {
// make a list of preemptable tasks, ordered by their preemptability.
//
void CLIENT_STATE::make_running_task_heap(
- vector &running_tasks
+ vector &running_tasks, double& ncpus_used
) {
unsigned int i;
ACTIVE_TASK* atp;
+ ncpus_used = 0;
for (i=0; iresult->project->non_cpu_intensive) continue;
if (!atp->result->runnable()) continue;
if (atp->scheduler_state != CPU_SCHED_SCHEDULED) continue;
running_tasks.push_back(atp);
+ ncpus_used += atp->result->avg_ncpus;
}
std::make_heap(
@@ -655,6 +653,7 @@ bool CLIENT_STATE::enforce_schedule() {
vector running_tasks;
static double last_time = 0;
int retval;
+ double ncpus_used;
// Do this when requested, and once a minute as a safety net
//
@@ -694,13 +693,15 @@ bool CLIENT_STATE::enforce_schedule() {
// make heap of currently running tasks, ordered by preemptibility
//
- make_running_task_heap(running_tasks);
+ make_running_task_heap(running_tasks, ncpus_used);
// if there are more running tasks than ncpus,
// then mark the extras for preemption
//
- while (running_tasks.size() > (unsigned int)ncpus) {
- running_tasks[0]->next_scheduler_state = CPU_SCHED_PREEMPTED;
+ while (ncpus_used > ncpus) {
+ atp = running_tasks[0];
+ atp->next_scheduler_state = CPU_SCHED_PREEMPTED;
+ ncpus_used -= atp->result->avg_ncpus;
std::pop_heap(
running_tasks.begin(),
running_tasks.end(),
@@ -718,11 +719,6 @@ bool CLIENT_STATE::enforce_schedule() {
);
}
- // keep track of how many tasks we plan on running
- // (i.e. have next_scheduler_state = SCHEDULED)
- //
- int nrunning = (int)running_tasks.size();
-
// Loop through the scheduled results
//
for (i=0; iprocinfo.working_set_size_smoothed > ram_left) {
atp->next_scheduler_state = CPU_SCHED_PREEMPTED;
atp->too_large = true;
- nrunning--;
+ ncpus_used -= atp->result->avg_ncpus;
if (log_flags.mem_usage_debug) {
msg_printf(rp->project, MSG_INFO,
"[mem_usage_debug] enforce: result %s can't continue, too big %.2fMB > %.2fMB",
@@ -801,7 +797,7 @@ bool CLIENT_STATE::enforce_schedule() {
// Preempt something if needed (and possible).
//
bool run_task = false;
- bool need_to_preempt = (nrunning==ncpus) && running_tasks.size();
+ bool need_to_preempt = (ncpus_used >= ncpus) && running_tasks.size();
// the 2nd half of the above is redundant
if (need_to_preempt) {
// examine the most preemptable task.
@@ -821,7 +817,7 @@ bool CLIENT_STATE::enforce_schedule() {
rp->project->deadlines_missed--;
}
atp->next_scheduler_state = CPU_SCHED_PREEMPTED;
- nrunning--;
+ ncpus_used -= atp->result->avg_ncpus;
std::pop_heap(
running_tasks.begin(),
running_tasks.end(),
@@ -849,14 +845,14 @@ bool CLIENT_STATE::enforce_schedule() {
if (run_task) {
atp = get_task(rp);
atp->next_scheduler_state = CPU_SCHED_SCHEDULED;
- nrunning++;
+ ncpus_used += rp->avg_ncpus;
ram_left -= atp->procinfo.working_set_size_smoothed;
}
}
if (log_flags.cpu_sched_debug) {
msg_printf(0, MSG_INFO,
- "[cpu_sched_debug] finished preempt loop, nrunning %d",
- nrunning
+ "[cpu_sched_debug] finished preempt loop, ncpus_used %f",
+ ncpus_used
);
}
@@ -879,16 +875,13 @@ bool CLIENT_STATE::enforce_schedule() {
}
}
- if (log_flags.cpu_sched_debug && nrunning < ncpus) {
- msg_printf(0, MSG_INFO, "[cpu_sched_debug] Some CPUs idle (%d<%d)",
- nrunning, ncpus
- );
- request_work_fetch("CPUs idle");
- }
- if (log_flags.cpu_sched_debug && nrunning > ncpus) {
- msg_printf(0, MSG_INFO, "[cpu_sched_debug] Too many tasks started (%d>%d)",
- nrunning, ncpus
+ if (log_flags.cpu_sched_debug && ncpus_used < ncpus) {
+ msg_printf(0, MSG_INFO, "[cpu_sched_debug] using %f out of %d CPUs",
+ ncpus_used, ncpus
);
+ if (ncpus_used < ncpus) {
+ request_work_fetch("CPUs idle");
+ }
}
// schedule new non CPU intensive tasks
diff --git a/client/cs_scheduler.C b/client/cs_scheduler.C
index 6f418995c2..42a0f8870b 100644
--- a/client/cs_scheduler.C
+++ b/client/cs_scheduler.C
@@ -660,7 +660,9 @@ int CLIENT_STATE::handle_scheduler_reply(
}
}
APP* app = lookup_app(project, avpp.app_name);
- APP_VERSION* avp = lookup_app_version(app, avpp.platform, avpp.version_num);
+ APP_VERSION* avp = lookup_app_version(
+ app, avpp.platform, avpp.version_num, avpp.plan_class
+ );
if (avp) {
// if we had download failures, clear them
//
@@ -713,7 +715,9 @@ int CLIENT_STATE::handle_scheduler_reply(
strcpy(rp->platform, get_primary_platform());
rp->version_num = latest_version(rp->wup->app, rp->platform);
}
- rp->avp = lookup_app_version(rp->wup->app, rp->platform, rp->version_num);
+ rp->avp = lookup_app_version(
+ rp->wup->app, rp->platform, rp->version_num, rp->plan_class
+ );
if (!rp->avp) {
msg_printf(project, MSG_INTERNAL_ERROR,
"No app version for result: %s %d",
diff --git a/client/cs_statefile.C b/client/cs_statefile.C
index f5ceccad02..2f9b9c11af 100644
--- a/client/cs_statefile.C
+++ b/client/cs_statefile.C
@@ -323,11 +323,13 @@ int CLIENT_STATE::parse_state_file() {
strcpy(rp->platform, get_primary_platform());
rp->version_num = latest_version(rp->wup->app, rp->platform);
}
- rp->avp = lookup_app_version(rp->wup->app, rp->platform, rp->version_num);
+ rp->avp = lookup_app_version(
+ rp->wup->app, rp->platform, rp->version_num, rp->plan_class
+ );
if (!rp->avp) {
msg_printf(project, MSG_INTERNAL_ERROR,
- "No app version for result: %s %d",
- rp->platform, rp->version_num
+ "No app version for result: %s %d %s",
+ rp->platform, rp->version_num, rp->plan_class
);
delete rp;
continue;
diff --git a/configure.ac b/configure.ac
index fb176e8dba..46b9772278 100644
--- a/configure.ac
+++ b/configure.ac
@@ -9,7 +9,7 @@ dnl not sure exactly what the minimum version is (but 2.13 wont work)
AC_PREREQ(2.57)
dnl Set the BOINC version here. You can also use the set-version script.
-AC_INIT(BOINC, 6.1.10)
+AC_INIT(BOINC, 6.1.11)
AC_ARG_ENABLE(debug,
AS_HELP_STRING([--enable-debug],
diff --git a/db/boinc_db.C b/db/boinc_db.C
index f12e9e6965..be1430527e 100644
--- a/db/boinc_db.C
+++ b/db/boinc_db.C
@@ -1024,7 +1024,7 @@ int DB_TRANSITIONER_ITEM_SET::enumerate(
mysql_free_result(cursor.rp);
cursor.active = false;
retval = mysql_errno(db->mysql);
- if (retval) return retval;
+ if (retval) return ERR_DB_CONN_LOST;
return ERR_DB_NOT_FOUND;
}
last_item.parse(row);
@@ -1227,7 +1227,7 @@ int DB_VALIDATOR_ITEM_SET::enumerate(
mysql_free_result(cursor.rp);
cursor.active = false;
retval = mysql_errno(db->mysql);
- if (retval) return retval;
+ if (retval) return ERR_DB_CONN_LOST;
return ERR_DB_NOT_FOUND;
}
last_item.parse(row);
@@ -1368,7 +1368,7 @@ int DB_WORK_ITEM::enumerate(
mysql_free_result(cursor.rp);
cursor.active = false;
retval = mysql_errno(db->mysql);
- if (retval) return retval;
+ if (retval) return ERR_DB_CONN_LOST;
return ERR_DB_NOT_FOUND;
} else {
parse(row);
@@ -1415,7 +1415,7 @@ int DB_WORK_ITEM::enumerate_all(
mysql_free_result(cursor.rp);
cursor.active = false;
retval = mysql_errno(db->mysql);
- if (retval) return retval;
+ if (retval) return ERR_DB_CONN_LOST;
return ERR_DB_NOT_FOUND;
} else {
parse(row);
@@ -1463,7 +1463,7 @@ int DB_IN_PROGRESS_RESULT::enumerate(int hostid, const char* result_names) {
mysql_free_result(cursor.rp);
cursor.active = false;
retval = mysql_errno(db->mysql);
- if (retval) return retval;
+ if (retval) return ERR_DB_CONN_LOST;
return ERR_DB_NOT_FOUND;
} else {
parse(row);
diff --git a/db/boinc_db.h b/db/boinc_db.h
index 3398af1b16..39978b0b78 100644
--- a/db/boinc_db.h
+++ b/db/boinc_db.h
@@ -439,6 +439,8 @@ struct CREDITED_JOB {
#define ASSIGN_USER 2
#define ASSIGN_TEAM 3
+struct BEST_APP_VERSION;
+
struct RESULT {
int id;
int create_time;
@@ -484,7 +486,7 @@ struct RESULT {
int units; // used for granting credit by # of units processed
int parse_from_client(FILE*);
char platform_name[256];
- int version_num;
+ BEST_APP_VERSION* bavp;
void clear();
int write_to_client(FILE*);
};
diff --git a/doc/boinc_news.php b/doc/boinc_news.php
index 4f582f517f..ec6e490128 100644
--- a/doc/boinc_news.php
+++ b/doc/boinc_news.php
@@ -1,6 +1,10 @@
$project_news = array(
+array("Mar 20, 2008",
+ "Watch an excellent talk by CERN's Francois Grey,
+ From distributed computing to distributed thinking."
+),
array("Mar 5, 2008",
"Read Volunteer Computing and the Search for Big Answers, an article about BOINC and volunteer computing on LinuxInsider.com."
),
diff --git a/html/inc/countries.inc b/html/inc/countries.inc
index 618379addf..eb82c1b9fd 100644
--- a/html/inc/countries.inc
+++ b/html/inc/countries.inc
@@ -252,7 +252,6 @@ function print_country_select($selected_country="None") {
if ($selected_country=="None" and $geoip_country!=""){
$selected_country=$geoip_country;
}
- echo "selected: $selected_country\n";
$numCountries = count($countries);
for ($i=0; $i<$numCountries; $i++) {
diff --git a/html/inc/prefs.inc b/html/inc/prefs.inc
index e67b66e820..3588c87fa9 100644
--- a/html/inc/prefs.inc
+++ b/html/inc/prefs.inc
@@ -716,8 +716,10 @@ function prefs_show_global($prefs) {
row2(START_END_DESC, $x);
row2(LEAVE_APPS_IN_MEMORY_DESC, $prefs->leave_apps_in_memory?"yes":"no");
row2(CPU_SCHEDULING_DESC, "$prefs->cpu_scheduling_period_minutes minutes");
- row2(MAX_CPUS_DESC, "$prefs->max_cpus ".MAX_CPUS_DESC2);
- row2(MAX_NCPUS_PCT_DESC, "$prefs->max_ncpus_pct ".MAX_NCPUS_PCT_DESC2);
+ $x = $prefs->max_cpus?$prefs->max_cpus:'---';
+ row2(MAX_CPUS_DESC, "$x ".MAX_CPUS_DESC2);
+ $x = $prefs->max_ncpus_pct?$prefs->max_ncpus_pct:'---';
+ row2(MAX_NCPUS_PCT_DESC, "$x ".MAX_NCPUS_PCT_DESC2);
row2(USE_AT_MOST2, "$prefs->cpu_usage_limit ".CPU_USAGE_LIMIT_DESC2);
row1(DISK_LIMIT_DESC);
row2(USE_AT_MOST, "$prefs->disk_max_used_gb GB disk space");
@@ -1289,7 +1291,7 @@ function prefs_global_parse_form(&$prefs) {
if (!verify_numeric($start_hour, 0)) $error->start_hour = true;
if (!verify_numeric($end_hour, 0)) $error->end_hour = true;
if (!verify_numeric($cpu_scheduling_period_minutes, 1)) $error->cpu_scheduling_period_minutes = true;;
- if (!verify_numeric($max_cpus, 1)) $error->max_cpus = true;
+ if (!verify_numeric($max_cpus, 0)) $error->max_cpus = true;
if (!verify_numeric($max_ncpus_pct, 0, 100)) $error->max_ncpus_pct = true;
if (!verify_numeric($cpu_usage_limit, 0, 100)) $error->cpu_usage_limit = true;
if (!verify_numeric($disk_max_used_gb, 0)) $error->disk_max_used_gb = true;
diff --git a/lib/app_ipc.C b/lib/app_ipc.C
index a4a0f16013..3fe22a55b6 100644
--- a/lib/app_ipc.C
+++ b/lib/app_ipc.C
@@ -228,7 +228,6 @@ int parse_init_data_file(FILE* f, APP_INIT_DATA& ai) {
if (xp.parse_str(tag, "project_dir", ai.project_dir, sizeof(ai.project_dir))) continue;
if (xp.parse_str(tag, "boinc_dir", ai.boinc_dir, sizeof(ai.boinc_dir))) continue;
if (xp.parse_str(tag, "authenticator", ai.authenticator, sizeof(ai.authenticator))) continue;
- if (xp.parse_str(tag, "opaque", ai.opaque, sizeof(ai.opaque))) continue;
if (xp.parse_str(tag, "wu_name", ai.wu_name, sizeof(ai.wu_name))) continue;
#ifdef _WIN32
if (xp.parse_str(tag, "comm_obj_name", ai.shmem_seg_name, sizeof(ai.shmem_seg_name))) continue;
@@ -250,7 +249,6 @@ int parse_init_data_file(FILE* f, APP_INIT_DATA& ai) {
if (xp.parse_double(tag, "fraction_done_update_period", ai.fraction_done_update_period)) continue;
if (xp.parse_double(tag, "fraction_done_start", ai.fraction_done_start)) continue;
if (xp.parse_double(tag, "fraction_done_end", ai.fraction_done_end)) continue;
- if (xp.parse_int(tag, "ncpus_available", ai.ncpus_available)) continue;
xp.skip_unexpected(tag, true, "parse_init_data_file");
}
fprintf(stderr, "parse_init_data_file: no end tag\n");
diff --git a/lib/app_ipc.h b/lib/app_ipc.h
index d79d96d24e..d4d24b297d 100644
--- a/lib/app_ipc.h
+++ b/lib/app_ipc.h
@@ -68,10 +68,8 @@ struct SHARED_MEM {
//
//
//
- //
MSG_CHANNEL process_control_reply;
// app->core
- //
MSG_CHANNEL graphics_request;
// core->app
// request a graphics mode:
@@ -166,7 +164,6 @@ struct APP_INIT_DATA {
char boinc_dir[256];
char wu_name[256];
char authenticator[256];
- char opaque[256];
int slot;
double user_total_credit;
double user_expavg_credit;
@@ -195,7 +192,6 @@ struct APP_INIT_DATA {
SHMEM_SEG_NAME shmem_seg_name;
double wu_cpu_time; // cpu time from previous episodes
double fraction_done_update_period;
- int ncpus_available;
APP_INIT_DATA();
APP_INIT_DATA(const APP_INIT_DATA&); // copy constructor
diff --git a/lib/error_numbers.h b/lib/error_numbers.h
index 93503f3421..1175d6c9f7 100644
--- a/lib/error_numbers.h
+++ b/lib/error_numbers.h
@@ -186,6 +186,7 @@
#define ERR_RMDIR -227
#define ERR_CHILD_FAILED -228
#define ERR_SYMLINK -229
+#define ERR_DB_CONN_LOST -230
// PLEASE: add a text description of your error to
// the text description function boincerror() in str_util.C.
diff --git a/lib/shmem.C b/lib/shmem.C
index 6103da1c82..abdc28d79a 100644
--- a/lib/shmem.C
+++ b/lib/shmem.C
@@ -268,8 +268,9 @@ int detach_shmem(void* p) {
}
#else
-// V6 mmap() shared memory for Unix/Linux/Mac
+// V6 mmap() shared memory for Unix/Linux/Mac
+//
int create_shmem_mmap(char *path, size_t size, void** pp) {
int fd, retval;
struct stat sbuf;
@@ -301,7 +302,6 @@ int create_shmem_mmap(char *path, size_t size, void** pp) {
*pp = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd, 0);
- // Now close the file. The kernel doesnŐt use our file descriptor.
close(fd);
if (*pp == MAP_FAILED) {
@@ -331,7 +331,6 @@ int attach_shmem_mmap(char *path, void** pp) {
*pp = mmap(NULL, sbuf.st_size, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd, 0);
- // Now close the file. The kernel doesnŐt use our file descriptor.
close(fd);
if (*pp == MAP_FAILED) {
@@ -349,6 +348,7 @@ int detach_shmem_mmap(void* p, size_t size) {
// Compatibility routines for Unix/Linux/Mac V5 applications
+//
int create_shmem(key_t key, int size, gid_t gid, void** pp) {
int id;
@@ -402,6 +402,7 @@ int create_shmem(key_t key, int size, gid_t gid, void** pp) {
// prevents any more processes from attaching (by clearing
// the key in the shared memory structure), so BOINC does it
// only after we are completey done with the segment.
+//
int destroy_shmem(key_t key){
struct shmid_ds buf;
int id, retval;
@@ -427,7 +428,7 @@ int attach_shmem(key_t key, void** pp){
id = shmget(key, 0, 0);
if (id < 0) {
- perror("shmget");
+ perror("shmget in attach_shmem");
return ERR_SHMGET;
}
p = shmat(id, 0, 0);
diff --git a/sched/assimilator.C b/sched/assimilator.C
index 2a7e0a1408..281e649a75 100644
--- a/sched/assimilator.C
+++ b/sched/assimilator.C
@@ -33,6 +33,7 @@
#include "boinc_db.h"
#include "parse.h"
#include "util.h"
+#include "error_numbers.h"
#include "str_util.h"
#include "sched_config.h"
@@ -83,7 +84,17 @@ bool do_pass(APP& app) {
app.id, ASSIMILATE_READY, mod_clause,
one_pass_N_WU ? one_pass_N_WU : 1000
);
- while (!wu.enumerate(buf)) {
+ while (1) {
+ retval = wu.enumerate(buf);
+ if (retval) {
+ if (retval != ERR_DB_NOT_FOUND) {
+ log_messages.printf(MSG_DEBUG,
+ "DB connection lost, exiting\n"
+ );
+ exit(0);
+ }
+ break;
+ }
vector results; // must be inside while()!
// for testing purposes, pretend we did nothing
diff --git a/sched/db_purge.C b/sched/db_purge.C
index 805f3f10f2..6876d60263 100644
--- a/sched/db_purge.C
+++ b/sched/db_purge.C
@@ -475,7 +475,17 @@ bool do_pass() {
}
int n=0;
- while (!wu.enumerate(buf)) {
+ while (1) {
+ retval = wu.enumerate(buf);
+ if (retval) {
+ if (retval != ERR_DB_NOT_FOUND) {
+ log_messages.printf(MSG_DEBUG,
+ "DB connection lost, exiting\n"
+ );
+ exit(0);
+ }
+ break;
+ }
if (strstr(wu.name, "nodelete")) continue;
did_something = true;
diff --git a/sched/feeder.C b/sched/feeder.C
index c8cbdd9ce4..53b2b7bdaf 100644
--- a/sched/feeder.C
+++ b/sched/feeder.C
@@ -241,6 +241,16 @@ static bool get_job_from_db(
retval = wi.enumerate(enum_size, select_clause, order_clause);
}
if (retval) {
+ if (retval != ERR_DB_NOT_FOUND) {
+ // If DB server dies, exit;
+ // so /start (run from crontab) will restart us eventually.
+ //
+ log_messages.printf(MSG_CRITICAL,
+ "DB connection lost, exiting\n"
+ );
+ exit(0);
+ }
+
// we've reach the end of the result set
//
switch (enum_phase) {
diff --git a/sched/file_deleter.C b/sched/file_deleter.C
index 815ce113ff..57710fa70b 100644
--- a/sched/file_deleter.C
+++ b/sched/file_deleter.C
@@ -272,7 +272,17 @@ bool do_pass(bool retry_error) {
clause, WUS_PER_ENUM
);
- while (!wu.enumerate(buf)) {
+ while (1) {
+ retval = wu.enumerate(buf);
+ if (retval) {
+ if (retval != ERR_DB_NOT_FOUND) {
+ log_messages.printf(MSG_DEBUG,
+ "DB connection lost, exiting\n"
+ );
+ exit(0);
+ }
+ break;
+ }
did_something = true;
retval = 0;
diff --git a/sched/make_work.C b/sched/make_work.C
index 25d9bf5221..c289019324 100644
--- a/sched/make_work.C
+++ b/sched/make_work.C
@@ -139,7 +139,7 @@ void make_new_wu(DB_WORKUNIT& original_wu, char* starting_xml, int start_time) {
);
exit(retval);
}
- wu.id = boinc_db.insert_id();
+ original_wu.id = boinc_db.insert_id();
log_messages.printf(MSG_DEBUG,
"Created %s, clone of %s\n", wu.name, original_wu.name
);
@@ -157,6 +157,9 @@ void wait_for_results(int wu_id) {
sprintf(buf, "where workunitid=%d", wu_id);
while (1) {
retval = result.count(count, buf);
+ log_messages.printf(MSG_DEBUG, "result.count for %d returned %d, %d\n",
+ wu_id, count, retval
+ );
if (retval) {
log_messages.printf(MSG_CRITICAL, "result.count: %d\n", retval);
exit(1);
diff --git a/sched/message_handler.C b/sched/message_handler.C
index 1e21bb5d54..806cbe07bd 100644
--- a/sched/message_handler.C
+++ b/sched/message_handler.C
@@ -36,6 +36,7 @@ using namespace std;
#include "boinc_db.h"
#include "util.h"
+#include "error_numbers.h"
#include "str_util.h"
#include "sched_config.h"
@@ -76,7 +77,17 @@ bool do_message_scan() {
int retval;
sprintf(buf, "where handled=0");
- while (!mfh.enumerate(buf)) {
+ while (1) {
+ retval = mfh.enumerate(buf);
+ if (retval) {
+ if (retval != ERR_DB_NOT_FOUND) {
+ log_messages.printf(MSG_DEBUG,
+ "DB connection lost, exiting\n"
+ );
+ exit(0);
+ }
+ break;
+ }
retval = handle_message(mfh);
if (!retval) {
mfh.handled = true;
diff --git a/sched/sched_array.C b/sched/sched_array.C
index caa24e4790..a2a287487f 100644
--- a/sched/sched_array.C
+++ b/sched/sched_array.C
@@ -124,8 +124,9 @@ void scan_work_array(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) {
// Find the app and best app_version for this host.
//
- found = get_app_version(sreq, reply, wu, app, avp);
- if (!found) {
+ BEST_APP_VERSION* bavp;
+ bavp = get_app_version(sreq, reply, wu);
+ if (!bavp) {
continue;
}
@@ -241,9 +242,7 @@ void scan_work_array(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) {
goto done;
}
- retval = add_result_to_reply(
- result, wu, sreq, reply, app, avp
- );
+ retval = add_result_to_reply(result, wu, sreq, reply, bavp);
// add_result_to_reply() fails only in fairly pathological cases -
// e.g. we couldn't update the DB record or modify XML fields.
diff --git a/sched/sched_assign.C b/sched/sched_assign.C
index 797411b540..49b623dafd 100644
--- a/sched/sched_assign.C
+++ b/sched/sched_assign.C
@@ -41,7 +41,7 @@ static int send_assigned_job(
static int seqno=0;
static R_RSA_PRIVATE_KEY key;
APP* app;
- APP_VERSION* avp;
+ BEST_APP_VERSION* bavp;
if (first) {
first = false;
@@ -60,16 +60,9 @@ static int send_assigned_job(
);
return retval;
}
- app = ssp->lookup_app(wu.appid);
- if (!app) {
- log_messages.printf(MSG_CRITICAL,
- "app %d for assigned WU %d not found\n",
- wu.appid, wu.id
- );
- return ERR_NOT_FOUND;
- }
- bool found = get_app_version(request, reply, wu, app, avp);
- if (!found) {
+
+ bavp = get_app_version(request, reply, wu);
+ if (!bavp) {
log_messages.printf(MSG_CRITICAL,
"App version for assigned WU not found\n"
);
@@ -88,7 +81,7 @@ static int send_assigned_job(
int result_id = boinc_db.insert_id();
DB_RESULT result;
retval = result.lookup_id(result_id);
- add_result_to_reply(result, wu, request, reply, app, avp);
+ add_result_to_reply(result, wu, request, reply, bavp);
// if this is a one-job assignment, fill in assignment.resultid
// so that it doesn't get sent again
diff --git a/sched/sched_locality.C b/sched/sched_locality.C
index 62916a717d..8d050d61a2 100644
--- a/sched/sched_locality.C
+++ b/sched/sched_locality.C
@@ -276,15 +276,14 @@ static int possibly_send_result(
DB_RESULT result2;
int retval, count;
char buf[256];
- APP* app;
- APP_VERSION* avp;
+ BEST_APP_VERSION* bavp;
retval = wu.lookup_id(result.workunitid);
if (retval) return ERR_DB_NOT_FOUND;
- bool found = get_app_version(sreq, reply, wu, app, avp);
+ bavp = get_app_version(sreq, reply, wu);
- if (!found && anonymous(sreq.platforms.list[0])) {
+ if (!bavp && anonymous(sreq.platforms.list[0])) {
char help_msg_buf[512];
sprintf(help_msg_buf,
"To get more %s work, finish current work, stop BOINC, remove app_info.xml file, and restart.",
@@ -295,12 +294,13 @@ static int possibly_send_result(
reply.set_delay(DELAY_ANONYMOUS);
}
- if (!found) return ERR_NO_APP_VERSION;
+ if (!bavp) return ERR_NO_APP_VERSION;
// wu_is_infeasible() returns the reason why the WU is not feasible;
// INFEASIBLE_MEM, INFEASIBLE_DISK, INFEASIBLE_CPU.
// see sched_send.h.
//
+ APP* app = ssp->lookup_app(wu.appid);
if (wu_is_infeasible(wu, sreq, reply, *app)) {
return ERR_INSUFFICIENT_RESOURCE;
}
@@ -312,7 +312,7 @@ static int possibly_send_result(
if (count > 0) return ERR_WU_USER_RULE;
}
- return add_result_to_reply(result, wu, sreq, reply, app, avp);
+ return add_result_to_reply(result, wu, sreq, reply, bavp);
}
// returns true if the work generator can not make more work for this
diff --git a/sched/sched_plan.C b/sched/sched_plan.C
index 78748af40b..97c3ec4120 100644
--- a/sched/sched_plan.C
+++ b/sched/sched_plan.C
@@ -27,8 +27,48 @@
// (you need to prevent that from being overwritten too)
// In either case, put your version under source-code control, e.g. SVN
+#include "sched_msgs.h"
#include "sched_plan.h"
-bool app_plan(HOST& host, char* plan_class, HOST_USAGE& hu) {
+// return the number of usable CPUs, taking prefs into account.
+// If prefs limit apply, set bounded to true.
+//
+static void get_ncpus(SCHEDULER_REQUEST& sreq, int& ncpus, bool& bounded) {
+ ncpus = sreq.host.p_ncpus;
+ bounded = false;
+ if (sreq.global_prefs.max_ncpus_pct && sreq.global_prefs.max_ncpus_pct < 100) {
+ bounded = true;
+ ncpus = (int)((ncpus*sreq.global_prefs.max_ncpus_pct)/100.);
+ }
+}
+
+bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) {
+ // clients before 6.1.11 don't understand plan_class
+ //
+ int v = sreq.core_client_major_version*10000
+ + sreq.core_client_minor_version*100
+ + sreq.core_client_release;
+ if (v < 60111) return false;
+ if (!strcmp(plan_class, "mt")) {
+ // the following is for an app that can use anywhere
+ // from 1 to 64 threads, can control this exactly,
+ // and whose speedup is .95N
+ // (so a sequential app will be used if one is available)
+ //
+ int ncpus, nthreads;
+ bool bounded;
+
+ get_ncpus(sreq, ncpus, bounded);
+ nthreads = ncpus;
+ if (nthreads > 64) nthreads = 64;
+ hu.avg_ncpus = nthreads;
+ hu.max_ncpus = nthreads;
+ sprintf(hu.cmdline, "--nthreads %d", nthreads);
+ hu.flops = 0.95*sreq.host.p_fpops*nthreads;
+ return true;
+ }
+ log_messages.printf(MSG_CRITICAL,
+ "Unknown plan class: %s\n", plan_class
+ );
return false;
}
diff --git a/sched/sched_plan.h b/sched/sched_plan.h
index a252965cc1..fc21503b25 100644
--- a/sched/sched_plan.h
+++ b/sched/sched_plan.h
@@ -20,4 +20,4 @@
#include "boinc_db.h"
#include "server_types.h"
-extern bool app_plan(HOST&, char* plan_class, HOST_USAGE&);
+extern bool app_plan(SCHEDULER_REQUEST&, char* plan_class, HOST_USAGE&);
diff --git a/sched/sched_resend.C b/sched/sched_resend.C
index 311ab97f82..bc9b0f052f 100644
--- a/sched/sched_resend.C
+++ b/sched/sched_resend.C
@@ -105,8 +105,7 @@ bool resend_lost_work(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) {
bool did_any = false;
int num_eligible_to_resend=0;
int num_resent=0;
- APP* app;
- APP_VERSION* avp;
+ BEST_APP_VERSION* bavp;
int retval;
sprintf(buf, " where hostid=%d and server_state=%d ",
@@ -139,8 +138,8 @@ bool resend_lost_work(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) {
continue;
}
- found = get_app_version(sreq, reply, wu, app, avp);
- if (!found) {
+ bavp = get_app_version(sreq, reply, wu);
+ if (!bavp) {
log_messages.printf(MSG_CRITICAL,
"[HOST#%d] no app version [RESULT#%d]\n",
reply.host.id, result.id
@@ -186,9 +185,7 @@ bool resend_lost_work(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) {
USER_MESSAGE um(warning_msg, "high");
reply.insert_message(um);
} else {
- retval = add_result_to_reply(
- result, wu, sreq, reply, app, avp
- );
+ retval = add_result_to_reply(result, wu, sreq, reply, bavp);
if (retval) {
log_messages.printf(MSG_CRITICAL,
"[HOST#%d] failed to send [RESULT#%d]\n",
diff --git a/sched/sched_send.C b/sched/sched_send.C
index 7a91763fc8..49bdf00652 100644
--- a/sched/sched_send.C
+++ b/sched/sched_send.C
@@ -109,57 +109,56 @@ bool SCHEDULER_REQUEST::has_version(APP& app) {
return false;
}
-// return the APP and the best APP_VERSION for the given host.
-// return false if none
+// return BEST_APP_VERSION for the given host, or NULL if none
//
//
-bool get_app_version(
- SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply,
- WORKUNIT& wu, APP* &app, APP_VERSION* &avp
+BEST_APP_VERSION* get_app_version(
+ SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply, WORKUNIT& wu
) {
bool found;
double flops;
unsigned int i;
int j;
+ BEST_APP_VERSION* bavp;
- app = ssp->lookup_app(wu.appid);
- if (!app) {
- log_messages.printf(MSG_CRITICAL, "Can't find APP#%d\n", wu.appid);
- return false;
+ //
+ // see if app is already in memoized array
+ //
+ for (i=0; iappid == wu.appid) {
+ if (!bavp->avp) return NULL;
+ return bavp;
+ }
}
+ APP* app = ssp->lookup_app(wu.appid);
+ if (!app) {
+ log_messages.printf(MSG_CRITICAL, "app not found: %d\n", wu.appid);
+ return NULL;
+ }
+
+ bavp = new BEST_APP_VERSION;
+ bavp->appid = wu.appid;
if (anonymous(sreq.platforms.list[0])) {
found = sreq.has_version(*app);
if (!found) {
log_messages.printf(MSG_DEBUG, "Didn't find anonymous app\n");
- return false;
+ bavp->avp = 0;
+ } else {
+ bavp->avp = (APP_VERSION*)1; // arbitrary nonzero value
}
- avp = NULL;
- return true;
+ reply.wreq.best_app_versions.push_back(bavp);
+ return bavp;
}
- // see if app is already in memoized array
- //
- for (i=0; ihost_usage.flops = 0;
+ bavp->avp = NULL;
for (i=0; inapp_versions; j++) {
@@ -172,51 +171,45 @@ bool get_app_version(
continue;
}
if (strlen(av.plan_class)) {
- if (app_plan(reply.host, av.plan_class, host_usage)) {
- flops = host_usage.flops;
- } else {
- flops = 0;
+ if (!app_plan(sreq, av.plan_class, host_usage)) {
+ continue;
}
} else {
- flops = reply.host.p_fpops;
+ host_usage.init_seq(reply.host.p_fpops);
}
- if (flops > bav.host_usage.flops) {
- bav.host_usage.flops = flops;
- bav.avp = &av;
+ if (host_usage.flops > bavp->host_usage.flops) {
+ bavp->host_usage = host_usage;
+ bavp->avp = &av;
}
}
}
- if (bav.avp) {
- reply.wreq.best_app_versions.push_back(bav);
- avp = bav.avp;
+ reply.wreq.best_app_versions.push_back(bavp);
+ if (bavp->avp) {
if (config.debug_version_select) {
log_messages.printf(MSG_DEBUG,
"Best version of app %s is %d (%f FLOPS)\n",
- app->name, avp->id, bav.host_usage.flops
+ app->name, bavp->avp->id, bavp->host_usage.flops
);
}
- return true;
- }
-
- // here if no app version exists
- //
- reply.wreq.best_app_versions.push_back(bav);
-
- if (config.debug_version_select) {
- log_messages.printf(MSG_DEBUG,
- "no app version available: APP#%d PLATFORM#%d min_version %d\n",
- app->id, sreq.platforms.list[0]->id, app->min_version
+ } else {
+ // here if no app version exists
+ //
+ if (config.debug_version_select) {
+ log_messages.printf(MSG_DEBUG,
+ "no app version available: APP#%d PLATFORM#%d min_version %d\n",
+ app->id, sreq.platforms.list[0]->id, app->min_version
+ );
+ }
+ char message[256];
+ sprintf(message,
+ "%s is not available for your type of computer.",
+ app->user_friendly_name
);
+ USER_MESSAGE um(message, "high");
+ reply.wreq.insert_no_work_message(um);
+ reply.wreq.no_app_version = true;
}
- char message[256];
- sprintf(message,
- "%s is not available for your type of computer.",
- app->user_friendly_name
- );
- USER_MESSAGE um(message, "high");
- reply.wreq.insert_no_work_message(um);
- reply.wreq.no_app_version = true;
- return false;
+ return bavp;
}
static char* find_user_friendly_name(int appid) {
@@ -711,11 +704,14 @@ bool app_core_compatible(WORK_REQ& wreq, APP_VERSION& av) {
// Add the app and app_version to the reply also.
//
int add_wu_to_reply(
- WORKUNIT& wu, SCHEDULER_REPLY& reply, APP* app, APP_VERSION* avp
+ WORKUNIT& wu, SCHEDULER_REPLY& reply, APP* app, BEST_APP_VERSION* bavp
) {
int retval;
WORKUNIT wu2, wu3;
+ APP_VERSION* avp = bavp->avp;
+ if (avp == (APP_VERSION*)1) avp = NULL;
+
// add the app, app_version, and workunit to the reply,
// but only if they aren't already there
//
@@ -878,13 +874,14 @@ void SCHEDULER_REPLY::got_bad_result() {
int add_result_to_reply(
DB_RESULT& result, WORKUNIT& wu, SCHEDULER_REQUEST& request,
- SCHEDULER_REPLY& reply, APP* app, APP_VERSION* avp
+ SCHEDULER_REPLY& reply, BEST_APP_VERSION* bavp
) {
int retval;
double wu_seconds_filled;
bool resent_result = false;
+ APP* app = ssp->lookup_app(wu.appid);
- retval = add_wu_to_reply(wu, reply, app, avp);
+ retval = add_wu_to_reply(wu, reply, app, bavp);
if (retval) return retval;
// in the scheduling locality case,
@@ -996,11 +993,7 @@ int add_result_to_reply(
);
return retval;
}
- if (avp) {
- PLATFORM* pp = ssp->lookup_platform_id(avp->platformid);
- strcpy(result.platform_name, pp->name);
- result.version_num = avp->version_num;
- }
+ result.bavp = bavp;
reply.insert_result(result);
reply.wreq.seconds_to_fill -= wu_seconds_filled;
request.estimated_delay += wu_seconds_filled/effective_ncpus(reply.host);
diff --git a/sched/sched_send.h b/sched/sched_send.h
index d59a34803c..a5027c2684 100644
--- a/sched/sched_send.h
+++ b/sched/sched_send.h
@@ -21,13 +21,13 @@ extern void send_work(SCHEDULER_REQUEST&, SCHEDULER_REPLY&);
extern int add_result_to_reply(
DB_RESULT& result, WORKUNIT& wu, SCHEDULER_REQUEST&, SCHEDULER_REPLY&,
- APP* app, APP_VERSION* avp
+ BEST_APP_VERSION* bavp
);
extern bool anonymous(PLATFORM*);
-extern bool get_app_version(
- SCHEDULER_REQUEST&, SCHEDULER_REPLY&, WORKUNIT&, APP*&, APP_VERSION*&
+extern BEST_APP_VERSION* get_app_version(
+ SCHEDULER_REQUEST&, SCHEDULER_REPLY&, WORKUNIT&
);
extern bool app_core_compatible(WORK_REQ& wreq, APP_VERSION& av);
diff --git a/sched/server_types.C b/sched/server_types.C
index 0fc0e5d574..9d1cf44bfb 100644
--- a/sched/server_types.C
+++ b/sched/server_types.C
@@ -831,7 +831,7 @@ int APP::write(FILE* fout) {
}
int APP_VERSION::write(FILE* fout) {
- char buf[LARGE_BLOB_SIZE], buf2[256];
+ char buf[LARGE_BLOB_SIZE];
strcpy(buf, xml_doc);
char* p = strstr(buf, "");
if (!p) {
@@ -841,14 +841,18 @@ int APP_VERSION::write(FILE* fout) {
*p = 0;
fputs(buf, fout);
PLATFORM* pp = ssp->lookup_platform_id(platformid);
- sprintf(buf2, " %s\n", pp->name);
- fputs(buf2, fout);
+ fprintf(fout, " %s\n", pp->name);
+ if (strlen(plan_class)) {
+ fprintf(fout, " %s\n", plan_class);
+ }
fputs("\n", fout);
return 0;
}
int RESULT::write_to_client(FILE* fout) {
- char buf[LARGE_BLOB_SIZE], buf2[256];
+ char buf[LARGE_BLOB_SIZE];
+ unsigned int i;
+
strcpy(buf, xml_doc_in);
char* p = strstr(buf, "");
if (!p) {
@@ -858,15 +862,44 @@ int RESULT::write_to_client(FILE* fout) {
*p = 0;
fputs(buf, fout);
- // platform name will be null in anonymous case; don't send
- //
- if (strlen(platform_name)) {
- sprintf(buf2,
+ APP_VERSION* avp = bavp->avp;
+ if (avp == (APP_VERSION*)1) avp = NULL;
+ if (avp) {
+ PLATFORM* pp = ssp->lookup_platform_id(avp->platformid);
+ fprintf(fout,
" %s\n"
" %d\n",
- platform_name, version_num
+ pp->name, avp->version_num
);
- fputs(buf2, fout);
+ if (strlen(avp->plan_class)) {
+ fprintf(fout,
+ " %s\n"
+ " %f\n"
+ " %f\n"
+ " %f\n",
+ avp->plan_class,
+ bavp->host_usage.avg_ncpus,
+ bavp->host_usage.max_ncpus,
+ bavp->host_usage.flops
+ );
+ if (strlen(bavp->host_usage.cmdline)) {
+ fprintf(fout,
+ " %s\n",
+ bavp->host_usage.cmdline
+ );
+ }
+ for (i=0; ihost_usage.coprocs.coprocs.size(); i++) {
+ COPROC& cp = bavp->host_usage.coprocs.coprocs[i];
+ fprintf(fout,
+ " \n"
+ " %s\n"
+ " %d\n"
+ " \n",
+ cp.name,
+ cp.count
+ );
+ }
+ }
}
fputs("\n", fout);
return 0;
@@ -1062,6 +1095,7 @@ void GLOBAL_PREFS::parse(const char* buf, const char* venue) {
if (parse_double(buf2, "", dtemp)) {
ram_max_used_idle_frac = dtemp/100.;
}
+ parse_double(buf2, "", max_ncpus_pct);
}
void GLOBAL_PREFS::defaults() {
diff --git a/sched/server_types.h b/sched/server_types.h
index 345e8f66f7..23e8725b13 100644
--- a/sched/server_types.h
+++ b/sched/server_types.h
@@ -73,9 +73,18 @@ struct USER_MESSAGE {
struct HOST_USAGE {
COPROCS coprocs;
- double ncpus;
+ double avg_ncpus;
+ double max_ncpus;
double flops;
- char opaque[256];
+ char cmdline[256];
+
+ void init_seq(double x) {
+ coprocs.coprocs.clear();
+ avg_ncpus = 1;
+ max_ncpus = 1;
+ flops = x;
+ strcpy(cmdline, "");
+ }
};
// keep track of the best app_version for each app for this host
@@ -107,7 +116,7 @@ struct WORK_REQ {
RESOURCE bandwidth;
std::vector no_work_messages;
- std::vector best_app_versions;
+ std::vector best_app_versions;
bool no_allowed_apps_available;
bool excessive_work_buf;
@@ -163,6 +172,7 @@ struct GLOBAL_PREFS {
double work_buf_min_days;
double ram_max_used_busy_frac;
double ram_max_used_idle_frac;
+ double max_ncpus_pct;
void parse(const char* buf, const char* venue);
void defaults();
diff --git a/sched/validator.C b/sched/validator.C
index 42b5cda66c..66b91752e1 100644
--- a/sched/validator.C
+++ b/sched/validator.C
@@ -586,7 +586,15 @@ bool do_validate_scan(APP& app) {
wu_id_modulus, wu_id_remainder,
items
);
- if (retval) break;
+ if (retval) {
+ if (retval != ERR_DB_NOT_FOUND) {
+ log_messages.printf(MSG_DEBUG,
+ "DB connection lost, exiting\n"
+ );
+ exit(0);
+ }
+ break;
+ }
retval = handle_wu(validator, items);
if (!retval) found = true;
}
diff --git a/tools/update_versions b/tools/update_versions
index 070fb9cdd1..13229388c0 100755
--- a/tools/update_versions
+++ b/tools/update_versions
@@ -68,7 +68,7 @@ def add_files(
assert(exec_files[0])
version_major, version_minor, platform_name, plan_class = match.groups()
if plan_class:
- plan_class = plan_class[1:] # drop leading :
+ plan_class = plan_class[2:] # drop leading __
version_num = int(version_major) * 100 + int(version_minor)
file_base = os.path.basename(exec_files[0])
@@ -118,7 +118,7 @@ def add_files(
def re_match_exec_filename(filepath):
file = os.path.basename(filepath)
- return re.match('[^.]+_([0-9]+)[.]([0-9]+)_([^.]+?(?:[0-9][0-9.]*[0-9])?)([:][^.]+)?(?:[.]gz|[.]exe|[.]sit|[.]msi)?$', file)
+ return re.match('[^.]+_([0-9]+)[.]([0-9]+)_([^.]+?(?:[0-9][0-9.]*[0-9])?)(__[^.]+)?(?:[.]gz|[.]exe|[.]sit|[.]msi)?$', file)
def find_versions(app, dir):
"""Find application versions in DIR.
diff --git a/version.h b/version.h
index a2dd6ad10f..52d5a93f55 100644
--- a/version.h
+++ b/version.h
@@ -10,10 +10,10 @@
#define BOINC_MINOR_VERSION 1
/* Release part of BOINC version number */
-#define BOINC_RELEASE 10
+#define BOINC_RELEASE 11
/* String representation of BOINC version number */
-#define BOINC_VERSION_STRING "6.1.10"
+#define BOINC_VERSION_STRING "6.1.11"
#if (defined(_WIN32) || defined(__APPLE__))
/* Name of package */
@@ -26,13 +26,13 @@
#define PACKAGE_NAME "BOINC"
/* Define to the full name and version of this package. */
-#define PACKAGE_STRING "BOINC 6.1.10"
+#define PACKAGE_STRING "BOINC 6.1.11"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "boinc"
/* Define to the version of this package. */
-#define PACKAGE_VERSION "6.1.10"
+#define PACKAGE_VERSION "6.1.11"
#endif /* #if (defined(_WIN32) || defined(__APPLE__)) */