diff --git a/api/boinc_api.C b/api/boinc_api.C index 562aadffa2..b2ea2ad696 100644 --- a/api/boinc_api.C +++ b/api/boinc_api.C @@ -202,18 +202,13 @@ static int setup_shared_mem() { return 0; } -// Return CPU time of worker thread (and optionally others) -// This may be called from any thread +// Return CPU time of process. // double boinc_worker_thread_cpu_time() { double cpu; #ifdef _WIN32 int retval; - if (options.all_threads_cpu_time) { - retval = boinc_process_cpu_time(cpu); - } else { - retval = boinc_thread_cpu_time(worker_thread_handle, cpu); - } + retval = boinc_process_cpu_time(cpu); if (retval) { cpu = nrunning_ticks * TIMER_PERIOD; // for Win9x } diff --git a/api/boinc_api.h b/api/boinc_api.h index 9af1fe3334..7408500a40 100644 --- a/api/boinc_api.h +++ b/api/boinc_api.h @@ -50,9 +50,6 @@ typedef struct BOINC_OPTIONS { // if heartbeat fail, or get process control msg, take // direction action (exit, suspend, resume). // Otherwise just set flag in BOINC status - int all_threads_cpu_time; - // count the CPU time of all threads - // (for apps that have multiple worker threads) int worker_thread_stack_size; // if nonzero, the worker thread stack size limit int backwards_compatible_graphics; @@ -148,7 +145,6 @@ inline void boinc_options_defaults(BOINC_OPTIONS& b) { b.handle_process_control = 1; b.send_status_msgs = 1; b.direct_process_action = 1; - b.all_threads_cpu_time = 0; b.worker_thread_stack_size = 0; b.backwards_compatible_graphics = 1; } diff --git a/checkin_notes b/checkin_notes index e93c0b1f67..9d2b7cc812 100644 --- a/checkin_notes +++ b/checkin_notes @@ -2556,13 +2556,15 @@ David Mar 19 2008 sched_send.C Charlie Mar 20 2008 - - Client: fix a compiler warning which indicated a real logic error (variable - used uninitialized). - - Mac: More work on backtrace code: run atos utility via a bidirectional pipe - instead of backtrace_symbols_fd() API to get better symbols in backtrace. - Set visibility of all variables in Client and Manager back to hidden (as - before) to reduce size of executables, since atos utility can use hidden - symbols. This also involves return to previous wxWidgets build script. + - Client: fix a compiler warning which indicated a real logic error + (variable used uninitialized). + - Mac: More work on backtrace code: run atos utility via a bidirectional + pipe instead of backtrace_symbols_fd() API to get better symbols + in backtrace. + Set visibility of all variables in Client and Manager back to hidden + (as before) to reduce size of executables, + since atos utility can use hidden symbols. + This also involves return to previous wxWidgets build script. client/ app_start.C @@ -2641,3 +2643,65 @@ Rom Mar 27 2008 boinccas.dll boinccas95.dll +David Mar 27 2008 + Changes for multithread app support: + + - update_versions: use __ (not :) as separator for plan class + - client: add plan_class to APP_VERSION; + an app version is now identified by platform/version/plan_class + - client CPU scheduler: don't assume apps use 1 CPU + - client: add avg_ncpus, max_cpus, flops, cmdline to RESULT + - scheduler: implement app planning scheme + + Other changes: + + - client: if symlink() fails, make a XML soft link instead + (for Unix running off a FAT32 FS) + - client: don't accept nonpositive resource share from AMS + - daemons and DB: check for error returns from enumerations, + and exit if so. Thus, if the MySQL server goes down, + all the daemons will soon exit. + The cron script will restart them every 5 min, + so when the DB server comes back up so will the project. + - web: show empty max CPU % as --- + - API: get rid of all_threads_cpu_time option (always the case now) + + api/ + boinc_api.C,h + client/ + acct_mgr.C + app.C,h + app_start.C + client_state.C,h + client_types.C,h + cpu_sched.C + cs_scheduler.C + cs_statefile.C + configure.ac + db/ + boinc_db.C,h + html/inc/ + countries.inc + prefs.inc + lib/ + app_ipc.C,h + error_numbers.h + shmem.C + sched/ + assimilator.C + db_purge.C + feeder.C + file_deleter.C + make_work.C + message_handler.C + sched_array.C + sched_assign.C + sched_locality.C + sched_plan.C,h + sched_resend.C + sched_send.C,h + server_types.C,h + validator.C + tools/ + update_versions + version.h diff --git a/client/acct_mgr.C b/client/acct_mgr.C index 0341c0a1dc..9b13f97c8f 100644 --- a/client/acct_mgr.C +++ b/client/acct_mgr.C @@ -234,7 +234,13 @@ int AM_ACCOUNT::parse(XML_PARSER& xp) { continue; } if (xp.parse_double(tag, "resource_share", dtemp)) { - resource_share.set(dtemp); + if (dtemp > 0) { + resource_share.set(dtemp); + } else { + msg_printf(NULL, MSG_INFO, + "Resource share out of range: %f", dtemp + ); + } continue; } if (log_flags.unparsed_xml) { diff --git a/client/app.C b/client/app.C index 780ce25079..93dbf51da6 100644 --- a/client/app.C +++ b/client/app.C @@ -112,7 +112,6 @@ ACTIVE_TASK::ACTIVE_TASK() { too_large = false; needs_shmem = false; want_network = 0; - nthreads = 1; memset(&procinfo, 0, sizeof(procinfo)); #ifdef _WIN32 pid_handle = 0; @@ -554,7 +553,8 @@ int ACTIVE_TASK::parse(MIOFILE& fin) { wup = result->wup; app_version = gstate.lookup_app_version( - result->app, result->platform, result->version_num + result->app, result->platform, result->version_num, + result->plan_class ); if (!app_version) { msg_printf( diff --git a/client/app.h b/client/app.h index fdc9d7224f..9ef5e7fc72 100644 --- a/client/app.h +++ b/client/app.h @@ -109,8 +109,6 @@ public: double abort_time; // when we sent an abort message to this app // kill it 5 seconds later if it doesn't exit - int nthreads; - // current # of threads in app (assumed to be 1 by default) APP_CLIENT_SHM app_client_shm; // core/app shared mem MSG_QUEUE graphics_request_queue; MSG_QUEUE process_control_queue; diff --git a/client/app_start.C b/client/app_start.C index 61d73b21a8..fca77bea48 100644 --- a/client/app_start.C +++ b/client/app_start.C @@ -211,6 +211,19 @@ int ACTIVE_TASK::write_app_init_file() { return retval; } +static int make_soft_link(PROJECT* project, char* link_path, char* rel_file_path) { + FILE *fp = boinc_fopen(link_path, "w"); + if (!fp) { + msg_printf(project, MSG_INTERNAL_ERROR, + "Can't create link file %s", link_path + ); + return ERR_FOPEN; + } + fprintf(fp, "%s\n", rel_file_path); + fclose(fp); + return 0; +} + // set up a file reference, given a slot dir and project dir. // This means: // 1) copy the file to slot dir, if reference is by copy @@ -251,23 +264,17 @@ static int setup_file( } #ifdef _WIN32 - FILE *fp = boinc_fopen(link_path, "w"); - if (!fp) { - msg_printf(project, MSG_INTERNAL_ERROR, - "Can't open link file %s", link_path - ); - return ERR_FOPEN; - } - fprintf(fp, "%s\n", rel_file_path); - fclose(fp); + retval = make_soft_link(project, link_path, rel_file_path); + if (retval) return retval; #else retval = symlink(rel_file_path, link_path); if (retval) { - msg_printf(project, MSG_INTERNAL_ERROR, - "Can't symlink %s to %s: %d", rel_file_path, link_path, retval - ); - perror("symlink"); - return ERR_SYMLINK; + // A Unix system can't make symlinks if the filesystem if FAT32 + // (e.g. external USB disk). + // Try making a soft link instead. + // + retval = make_soft_link(project, link_path, rel_file_path); + if (retval) return retval; } #endif #ifdef SANDBOX @@ -615,6 +622,10 @@ int ACTIVE_TASK::start(bool first_time) { argv[0] = exec_name; char cmdline[8192]; strcpy(cmdline, wup->command_line.c_str()); + if (strlen(result->cmdline)) { + strcat(cmdline, " "); + strcat(cmdline, result->cmdline); + } parse_command_line(cmdline, argv+1); if (log_flags.task_debug) { debug_print_argv(argv); @@ -749,6 +760,10 @@ int ACTIVE_TASK::start(bool first_time) { #endif char cmdline[8192]; strcpy(cmdline, wup->command_line.c_str()); + if (strlen(result->cmdline)) { + strcat(cmdline, " "); + strcat(cmdline, result->cmdline); + } sprintf(buf, "../../%s", exec_path ); if (g_use_sandbox) { char switcher_path[100]; diff --git a/client/client_state.C b/client/client_state.C index 98b4dd3f64..c4074a58c1 100644 --- a/client/client_state.C +++ b/client/client_state.C @@ -665,7 +665,7 @@ WORKUNIT* CLIENT_STATE::lookup_workunit(PROJECT* p, const char* name) { } APP_VERSION* CLIENT_STATE::lookup_app_version( - APP* app, char* platform, int version_num + APP* app, char* platform, int version_num, char* plan_class ) { for (unsigned int i=0; iplatform, platform)) continue; + if (strcmp(avp->plan_class, plan_class)) continue; return avp; } return 0; @@ -722,10 +723,10 @@ int CLIENT_STATE::link_app_version(PROJECT* p, APP_VERSION* avp) { } avp->app = app; - if (lookup_app_version(app, avp->platform, avp->version_num)) { + if (lookup_app_version(app, avp->platform, avp->version_num, avp->plan_class)) { msg_printf(p, MSG_INTERNAL_ERROR, - "State file error: duplicate app version: %s %s %d", - avp->app_name, avp->platform, avp->version_num + "State file error: duplicate app version: %s %s %d %s", + avp->app_name, avp->platform, avp->version_num, avp->plan_class ); return ERR_NOT_UNIQUE; } diff --git a/client/client_state.h b/client/client_state.h index f0ed17f829..791d6035e2 100644 --- a/client/client_state.h +++ b/client/client_state.h @@ -222,7 +222,9 @@ public: FILE_INFO* lookup_file_info(PROJECT*, const char* name); RESULT* lookup_result(PROJECT*, const char*); WORKUNIT* lookup_workunit(PROJECT*, const char*); - APP_VERSION* lookup_app_version(APP*, char* platform, int ver); + APP_VERSION* lookup_app_version( + APP*, char* platform, int ver, char* plan_class + ); int detach_project(PROJECT*); int report_result_error(RESULT&, const char *format, ...); int reset_project(PROJECT*, bool detaching); @@ -262,7 +264,7 @@ private: bool enforce_schedule(); bool no_work_for_a_cpu(); void rr_simulation(); - void make_running_task_heap(vector&); + void make_running_task_heap(vector&, double&); void print_deadline_misses(); public: double retry_shmem_time; diff --git a/client/client_types.C b/client/client_types.C index 36738801d3..68ce8f1973 100644 --- a/client/client_types.C +++ b/client/client_types.C @@ -1079,6 +1079,7 @@ int APP_VERSION::parse(MIOFILE& in) { strcpy(api_version, ""); version_num = 0; strcpy(platform, ""); + strcpy(plan_class, ""); app = NULL; project = NULL; while (in.fgets(buf, 256)) { @@ -1092,6 +1093,7 @@ int APP_VERSION::parse(MIOFILE& in) { if (parse_int(buf, "", version_num)) continue; if (parse_str(buf, "", api_version, sizeof(api_version))) continue; if (parse_str(buf, "", platform, sizeof(platform))) continue; + if (parse_str(buf, "", plan_class, sizeof(plan_class))) continue; if (log_flags.unparsed_xml) { msg_printf(0, MSG_INFO, "[unparsed_xml] APP_VERSION::parse(): unrecognized: %s\n", buf @@ -1114,6 +1116,9 @@ int APP_VERSION::write(MIOFILE& out) { version_num, platform ); + if (strlen(plan_class)) { + out.printf(" %s\n", plan_class); + } if (strlen(api_version)) { out.printf(" %s\n", api_version); } @@ -1404,6 +1409,11 @@ void RESULT::clear() { project = NULL; version_num = 0; strcpy(platform, ""); + strcpy(plan_class, ""); + strcpy(cmdline, ""); + avg_ncpus = 1; + max_ncpus = 1; + flops = gstate.host_info.p_fpops; } // parse a element from scheduling server. @@ -1419,7 +1429,12 @@ int RESULT::parse_server(MIOFILE& in) { if (parse_str(buf, "", wu_name, sizeof(wu_name))) continue; if (parse_double(buf, "", report_deadline)) continue; if (parse_str(buf, "", platform, sizeof(platform))) continue; + if (parse_str(buf, "", plan_class, sizeof(plan_class))) continue; if (parse_int(buf, "", version_num)) continue; + if (parse_double(buf, "", avg_ncpus)) continue; + if (parse_double(buf, "", max_ncpus)) continue; + if (parse_double(buf, "", flops)) continue; + if (parse_str(buf, "", cmdline, sizeof(cmdline))) continue; if (match_tag(buf, "")) { file_ref.parse(in); output_files.push_back(file_ref); @@ -1481,7 +1496,12 @@ int RESULT::parse_state(MIOFILE& in) { if (parse_double(buf, "", intops_per_cpu_sec)) continue; if (parse_double(buf, "", intops_cumulative)) continue; if (parse_str(buf, "", platform, sizeof(platform))) continue; + if (parse_str(buf, "", plan_class, sizeof(plan_class))) continue; if (parse_int(buf, "", version_num)) continue; + if (parse_double(buf, "", avg_ncpus)) continue; + if (parse_double(buf, "", max_ncpus)) continue; + if (parse_double(buf, "", flops)) continue; + if (parse_str(buf, "", cmdline, sizeof(cmdline))) continue; if (log_flags.unparsed_xml) { msg_printf(0, MSG_INFO, "[unparsed_xml] RESULT::parse(): unrecognized: %s\n", buf @@ -1503,14 +1523,26 @@ int RESULT::write(MIOFILE& out, bool to_server) { " %d\n" " %d\n" " %s\n" - " %d\n", + " %d\n" + " %f\n" + " %f\n" + " %f\n", name, final_cpu_time, exit_status, state(), platform, - version_num + version_num, + avg_ncpus, + max_ncpus, + flops ); + if (strlen(plan_class)) { + out.printf(" %s\n", plan_class); + } + if (strlen(cmdline)) { + out.printf(" %s\n", cmdline); + } if (fpops_per_cpu_sec) { out.printf(" %f\n", fpops_per_cpu_sec); } diff --git a/client/client_types.h b/client/client_types.h index 1ece334e55..ac128aa0f6 100644 --- a/client/client_types.h +++ b/client/client_types.h @@ -34,6 +34,7 @@ #include "md5_file.h" #include "hostinfo.h" +#include "coproc.h" #include "miofile.h" #define P_LOW 1 @@ -404,6 +405,7 @@ struct APP_VERSION { char app_name[256]; int version_num; char platform[256]; + char plan_class[64]; char api_version[16]; APP* app; PROJECT* project; @@ -452,8 +454,14 @@ struct RESULT { char wu_name[256]; double report_deadline; int version_num; // identifies the app used + char plan_class[64]; char platform[256]; + char cmdline[256]; // additional cmdline args APP_VERSION* avp; + double avg_ncpus; + double max_ncpus; + double flops; + COPROCS coprocs; std::vector output_files; bool ready_to_report; // we're ready to report this result to the server; diff --git a/client/cpu_sched.C b/client/cpu_sched.C index e70ca26a3a..e532653deb 100644 --- a/client/cpu_sched.C +++ b/client/cpu_sched.C @@ -513,9 +513,8 @@ void CLIENT_STATE::schedule_cpus() { #ifdef SIM if (!cpu_sched_rr_only) { #endif - int ncpus_used = 0; - //while (ncpus_used < ncpus) { - while ((int)ordered_scheduled_results.size() < ncpus) { + double ncpus_used = 0; + while (ncpus_used < ncpus) { rp = earliest_deadline_result(); if (!rp) break; rp->already_selected = true; @@ -545,12 +544,8 @@ void CLIENT_STATE::schedule_cpus() { atp->needs_shmem = false; } ram_left -= atp->procinfo.working_set_size_smoothed; - ncpus_used += atp->nthreads; - } else { - // if we haven't run the app yet, assume it has one thread - // - ncpus_used++; } + ncpus_used += rp->avg_ncpus; rp->project->anticipated_debt -= (rp->project->resource_share / rrs) * expected_pay_off; rp->project->deadlines_missed--; @@ -569,7 +564,7 @@ void CLIENT_STATE::schedule_cpus() { // Next, choose results from projects with large debt // - while ((int)ordered_scheduled_results.size() < ncpus) { + while (ncpus_used < ncpus) { assign_results_to_projects(); rp = largest_debt_project_best_result(); if (!rp) break; @@ -599,6 +594,7 @@ void CLIENT_STATE::schedule_cpus() { } ram_left -= atp->procinfo.working_set_size_smoothed; } + ncpus_used += rp->avg_ncpus; double xx = (rp->project->resource_share / rrs) * expected_pay_off; rp->project->anticipated_debt -= xx; if (log_flags.cpu_sched_debug) { @@ -614,17 +610,19 @@ void CLIENT_STATE::schedule_cpus() { // make a list of preemptable tasks, ordered by their preemptability. // void CLIENT_STATE::make_running_task_heap( - vector &running_tasks + vector &running_tasks, double& ncpus_used ) { unsigned int i; ACTIVE_TASK* atp; + ncpus_used = 0; for (i=0; iresult->project->non_cpu_intensive) continue; if (!atp->result->runnable()) continue; if (atp->scheduler_state != CPU_SCHED_SCHEDULED) continue; running_tasks.push_back(atp); + ncpus_used += atp->result->avg_ncpus; } std::make_heap( @@ -655,6 +653,7 @@ bool CLIENT_STATE::enforce_schedule() { vector running_tasks; static double last_time = 0; int retval; + double ncpus_used; // Do this when requested, and once a minute as a safety net // @@ -694,13 +693,15 @@ bool CLIENT_STATE::enforce_schedule() { // make heap of currently running tasks, ordered by preemptibility // - make_running_task_heap(running_tasks); + make_running_task_heap(running_tasks, ncpus_used); // if there are more running tasks than ncpus, // then mark the extras for preemption // - while (running_tasks.size() > (unsigned int)ncpus) { - running_tasks[0]->next_scheduler_state = CPU_SCHED_PREEMPTED; + while (ncpus_used > ncpus) { + atp = running_tasks[0]; + atp->next_scheduler_state = CPU_SCHED_PREEMPTED; + ncpus_used -= atp->result->avg_ncpus; std::pop_heap( running_tasks.begin(), running_tasks.end(), @@ -718,11 +719,6 @@ bool CLIENT_STATE::enforce_schedule() { ); } - // keep track of how many tasks we plan on running - // (i.e. have next_scheduler_state = SCHEDULED) - // - int nrunning = (int)running_tasks.size(); - // Loop through the scheduled results // for (i=0; iprocinfo.working_set_size_smoothed > ram_left) { atp->next_scheduler_state = CPU_SCHED_PREEMPTED; atp->too_large = true; - nrunning--; + ncpus_used -= atp->result->avg_ncpus; if (log_flags.mem_usage_debug) { msg_printf(rp->project, MSG_INFO, "[mem_usage_debug] enforce: result %s can't continue, too big %.2fMB > %.2fMB", @@ -801,7 +797,7 @@ bool CLIENT_STATE::enforce_schedule() { // Preempt something if needed (and possible). // bool run_task = false; - bool need_to_preempt = (nrunning==ncpus) && running_tasks.size(); + bool need_to_preempt = (ncpus_used >= ncpus) && running_tasks.size(); // the 2nd half of the above is redundant if (need_to_preempt) { // examine the most preemptable task. @@ -821,7 +817,7 @@ bool CLIENT_STATE::enforce_schedule() { rp->project->deadlines_missed--; } atp->next_scheduler_state = CPU_SCHED_PREEMPTED; - nrunning--; + ncpus_used -= atp->result->avg_ncpus; std::pop_heap( running_tasks.begin(), running_tasks.end(), @@ -849,14 +845,14 @@ bool CLIENT_STATE::enforce_schedule() { if (run_task) { atp = get_task(rp); atp->next_scheduler_state = CPU_SCHED_SCHEDULED; - nrunning++; + ncpus_used += rp->avg_ncpus; ram_left -= atp->procinfo.working_set_size_smoothed; } } if (log_flags.cpu_sched_debug) { msg_printf(0, MSG_INFO, - "[cpu_sched_debug] finished preempt loop, nrunning %d", - nrunning + "[cpu_sched_debug] finished preempt loop, ncpus_used %f", + ncpus_used ); } @@ -879,16 +875,13 @@ bool CLIENT_STATE::enforce_schedule() { } } - if (log_flags.cpu_sched_debug && nrunning < ncpus) { - msg_printf(0, MSG_INFO, "[cpu_sched_debug] Some CPUs idle (%d<%d)", - nrunning, ncpus - ); - request_work_fetch("CPUs idle"); - } - if (log_flags.cpu_sched_debug && nrunning > ncpus) { - msg_printf(0, MSG_INFO, "[cpu_sched_debug] Too many tasks started (%d>%d)", - nrunning, ncpus + if (log_flags.cpu_sched_debug && ncpus_used < ncpus) { + msg_printf(0, MSG_INFO, "[cpu_sched_debug] using %f out of %d CPUs", + ncpus_used, ncpus ); + if (ncpus_used < ncpus) { + request_work_fetch("CPUs idle"); + } } // schedule new non CPU intensive tasks diff --git a/client/cs_scheduler.C b/client/cs_scheduler.C index 6f418995c2..42a0f8870b 100644 --- a/client/cs_scheduler.C +++ b/client/cs_scheduler.C @@ -660,7 +660,9 @@ int CLIENT_STATE::handle_scheduler_reply( } } APP* app = lookup_app(project, avpp.app_name); - APP_VERSION* avp = lookup_app_version(app, avpp.platform, avpp.version_num); + APP_VERSION* avp = lookup_app_version( + app, avpp.platform, avpp.version_num, avpp.plan_class + ); if (avp) { // if we had download failures, clear them // @@ -713,7 +715,9 @@ int CLIENT_STATE::handle_scheduler_reply( strcpy(rp->platform, get_primary_platform()); rp->version_num = latest_version(rp->wup->app, rp->platform); } - rp->avp = lookup_app_version(rp->wup->app, rp->platform, rp->version_num); + rp->avp = lookup_app_version( + rp->wup->app, rp->platform, rp->version_num, rp->plan_class + ); if (!rp->avp) { msg_printf(project, MSG_INTERNAL_ERROR, "No app version for result: %s %d", diff --git a/client/cs_statefile.C b/client/cs_statefile.C index f5ceccad02..2f9b9c11af 100644 --- a/client/cs_statefile.C +++ b/client/cs_statefile.C @@ -323,11 +323,13 @@ int CLIENT_STATE::parse_state_file() { strcpy(rp->platform, get_primary_platform()); rp->version_num = latest_version(rp->wup->app, rp->platform); } - rp->avp = lookup_app_version(rp->wup->app, rp->platform, rp->version_num); + rp->avp = lookup_app_version( + rp->wup->app, rp->platform, rp->version_num, rp->plan_class + ); if (!rp->avp) { msg_printf(project, MSG_INTERNAL_ERROR, - "No app version for result: %s %d", - rp->platform, rp->version_num + "No app version for result: %s %d %s", + rp->platform, rp->version_num, rp->plan_class ); delete rp; continue; diff --git a/configure.ac b/configure.ac index fb176e8dba..46b9772278 100644 --- a/configure.ac +++ b/configure.ac @@ -9,7 +9,7 @@ dnl not sure exactly what the minimum version is (but 2.13 wont work) AC_PREREQ(2.57) dnl Set the BOINC version here. You can also use the set-version script. -AC_INIT(BOINC, 6.1.10) +AC_INIT(BOINC, 6.1.11) AC_ARG_ENABLE(debug, AS_HELP_STRING([--enable-debug], diff --git a/db/boinc_db.C b/db/boinc_db.C index f12e9e6965..be1430527e 100644 --- a/db/boinc_db.C +++ b/db/boinc_db.C @@ -1024,7 +1024,7 @@ int DB_TRANSITIONER_ITEM_SET::enumerate( mysql_free_result(cursor.rp); cursor.active = false; retval = mysql_errno(db->mysql); - if (retval) return retval; + if (retval) return ERR_DB_CONN_LOST; return ERR_DB_NOT_FOUND; } last_item.parse(row); @@ -1227,7 +1227,7 @@ int DB_VALIDATOR_ITEM_SET::enumerate( mysql_free_result(cursor.rp); cursor.active = false; retval = mysql_errno(db->mysql); - if (retval) return retval; + if (retval) return ERR_DB_CONN_LOST; return ERR_DB_NOT_FOUND; } last_item.parse(row); @@ -1368,7 +1368,7 @@ int DB_WORK_ITEM::enumerate( mysql_free_result(cursor.rp); cursor.active = false; retval = mysql_errno(db->mysql); - if (retval) return retval; + if (retval) return ERR_DB_CONN_LOST; return ERR_DB_NOT_FOUND; } else { parse(row); @@ -1415,7 +1415,7 @@ int DB_WORK_ITEM::enumerate_all( mysql_free_result(cursor.rp); cursor.active = false; retval = mysql_errno(db->mysql); - if (retval) return retval; + if (retval) return ERR_DB_CONN_LOST; return ERR_DB_NOT_FOUND; } else { parse(row); @@ -1463,7 +1463,7 @@ int DB_IN_PROGRESS_RESULT::enumerate(int hostid, const char* result_names) { mysql_free_result(cursor.rp); cursor.active = false; retval = mysql_errno(db->mysql); - if (retval) return retval; + if (retval) return ERR_DB_CONN_LOST; return ERR_DB_NOT_FOUND; } else { parse(row); diff --git a/db/boinc_db.h b/db/boinc_db.h index 3398af1b16..39978b0b78 100644 --- a/db/boinc_db.h +++ b/db/boinc_db.h @@ -439,6 +439,8 @@ struct CREDITED_JOB { #define ASSIGN_USER 2 #define ASSIGN_TEAM 3 +struct BEST_APP_VERSION; + struct RESULT { int id; int create_time; @@ -484,7 +486,7 @@ struct RESULT { int units; // used for granting credit by # of units processed int parse_from_client(FILE*); char platform_name[256]; - int version_num; + BEST_APP_VERSION* bavp; void clear(); int write_to_client(FILE*); }; diff --git a/doc/boinc_news.php b/doc/boinc_news.php index 4f582f517f..ec6e490128 100644 --- a/doc/boinc_news.php +++ b/doc/boinc_news.php @@ -1,6 +1,10 @@ From distributed computing to distributed thinking." +), array("Mar 5, 2008", "Read Volunteer Computing and the Search for Big Answers, an article about BOINC and volunteer computing on LinuxInsider.com." ), diff --git a/html/inc/countries.inc b/html/inc/countries.inc index 618379addf..eb82c1b9fd 100644 --- a/html/inc/countries.inc +++ b/html/inc/countries.inc @@ -252,7 +252,6 @@ function print_country_select($selected_country="None") { if ($selected_country=="None" and $geoip_country!=""){ $selected_country=$geoip_country; } - echo "selected: $selected_country\n"; $numCountries = count($countries); for ($i=0; $i<$numCountries; $i++) { diff --git a/html/inc/prefs.inc b/html/inc/prefs.inc index e67b66e820..3588c87fa9 100644 --- a/html/inc/prefs.inc +++ b/html/inc/prefs.inc @@ -716,8 +716,10 @@ function prefs_show_global($prefs) { row2(START_END_DESC, $x); row2(LEAVE_APPS_IN_MEMORY_DESC, $prefs->leave_apps_in_memory?"yes":"no"); row2(CPU_SCHEDULING_DESC, "$prefs->cpu_scheduling_period_minutes minutes"); - row2(MAX_CPUS_DESC, "$prefs->max_cpus ".MAX_CPUS_DESC2); - row2(MAX_NCPUS_PCT_DESC, "$prefs->max_ncpus_pct ".MAX_NCPUS_PCT_DESC2); + $x = $prefs->max_cpus?$prefs->max_cpus:'---'; + row2(MAX_CPUS_DESC, "$x ".MAX_CPUS_DESC2); + $x = $prefs->max_ncpus_pct?$prefs->max_ncpus_pct:'---'; + row2(MAX_NCPUS_PCT_DESC, "$x ".MAX_NCPUS_PCT_DESC2); row2(USE_AT_MOST2, "$prefs->cpu_usage_limit ".CPU_USAGE_LIMIT_DESC2); row1(DISK_LIMIT_DESC); row2(USE_AT_MOST, "$prefs->disk_max_used_gb GB disk space"); @@ -1289,7 +1291,7 @@ function prefs_global_parse_form(&$prefs) { if (!verify_numeric($start_hour, 0)) $error->start_hour = true; if (!verify_numeric($end_hour, 0)) $error->end_hour = true; if (!verify_numeric($cpu_scheduling_period_minutes, 1)) $error->cpu_scheduling_period_minutes = true;; - if (!verify_numeric($max_cpus, 1)) $error->max_cpus = true; + if (!verify_numeric($max_cpus, 0)) $error->max_cpus = true; if (!verify_numeric($max_ncpus_pct, 0, 100)) $error->max_ncpus_pct = true; if (!verify_numeric($cpu_usage_limit, 0, 100)) $error->cpu_usage_limit = true; if (!verify_numeric($disk_max_used_gb, 0)) $error->disk_max_used_gb = true; diff --git a/lib/app_ipc.C b/lib/app_ipc.C index a4a0f16013..3fe22a55b6 100644 --- a/lib/app_ipc.C +++ b/lib/app_ipc.C @@ -228,7 +228,6 @@ int parse_init_data_file(FILE* f, APP_INIT_DATA& ai) { if (xp.parse_str(tag, "project_dir", ai.project_dir, sizeof(ai.project_dir))) continue; if (xp.parse_str(tag, "boinc_dir", ai.boinc_dir, sizeof(ai.boinc_dir))) continue; if (xp.parse_str(tag, "authenticator", ai.authenticator, sizeof(ai.authenticator))) continue; - if (xp.parse_str(tag, "opaque", ai.opaque, sizeof(ai.opaque))) continue; if (xp.parse_str(tag, "wu_name", ai.wu_name, sizeof(ai.wu_name))) continue; #ifdef _WIN32 if (xp.parse_str(tag, "comm_obj_name", ai.shmem_seg_name, sizeof(ai.shmem_seg_name))) continue; @@ -250,7 +249,6 @@ int parse_init_data_file(FILE* f, APP_INIT_DATA& ai) { if (xp.parse_double(tag, "fraction_done_update_period", ai.fraction_done_update_period)) continue; if (xp.parse_double(tag, "fraction_done_start", ai.fraction_done_start)) continue; if (xp.parse_double(tag, "fraction_done_end", ai.fraction_done_end)) continue; - if (xp.parse_int(tag, "ncpus_available", ai.ncpus_available)) continue; xp.skip_unexpected(tag, true, "parse_init_data_file"); } fprintf(stderr, "parse_init_data_file: no end tag\n"); diff --git a/lib/app_ipc.h b/lib/app_ipc.h index d79d96d24e..d4d24b297d 100644 --- a/lib/app_ipc.h +++ b/lib/app_ipc.h @@ -68,10 +68,8 @@ struct SHARED_MEM { // // // - // MSG_CHANNEL process_control_reply; // app->core - // MSG_CHANNEL graphics_request; // core->app // request a graphics mode: @@ -166,7 +164,6 @@ struct APP_INIT_DATA { char boinc_dir[256]; char wu_name[256]; char authenticator[256]; - char opaque[256]; int slot; double user_total_credit; double user_expavg_credit; @@ -195,7 +192,6 @@ struct APP_INIT_DATA { SHMEM_SEG_NAME shmem_seg_name; double wu_cpu_time; // cpu time from previous episodes double fraction_done_update_period; - int ncpus_available; APP_INIT_DATA(); APP_INIT_DATA(const APP_INIT_DATA&); // copy constructor diff --git a/lib/error_numbers.h b/lib/error_numbers.h index 93503f3421..1175d6c9f7 100644 --- a/lib/error_numbers.h +++ b/lib/error_numbers.h @@ -186,6 +186,7 @@ #define ERR_RMDIR -227 #define ERR_CHILD_FAILED -228 #define ERR_SYMLINK -229 +#define ERR_DB_CONN_LOST -230 // PLEASE: add a text description of your error to // the text description function boincerror() in str_util.C. diff --git a/lib/shmem.C b/lib/shmem.C index 6103da1c82..abdc28d79a 100644 --- a/lib/shmem.C +++ b/lib/shmem.C @@ -268,8 +268,9 @@ int detach_shmem(void* p) { } #else -// V6 mmap() shared memory for Unix/Linux/Mac +// V6 mmap() shared memory for Unix/Linux/Mac +// int create_shmem_mmap(char *path, size_t size, void** pp) { int fd, retval; struct stat sbuf; @@ -301,7 +302,6 @@ int create_shmem_mmap(char *path, size_t size, void** pp) { *pp = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd, 0); - // Now close the file. The kernel doesnŐt use our file descriptor. close(fd); if (*pp == MAP_FAILED) { @@ -331,7 +331,6 @@ int attach_shmem_mmap(char *path, void** pp) { *pp = mmap(NULL, sbuf.st_size, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd, 0); - // Now close the file. The kernel doesnŐt use our file descriptor. close(fd); if (*pp == MAP_FAILED) { @@ -349,6 +348,7 @@ int detach_shmem_mmap(void* p, size_t size) { // Compatibility routines for Unix/Linux/Mac V5 applications +// int create_shmem(key_t key, int size, gid_t gid, void** pp) { int id; @@ -402,6 +402,7 @@ int create_shmem(key_t key, int size, gid_t gid, void** pp) { // prevents any more processes from attaching (by clearing // the key in the shared memory structure), so BOINC does it // only after we are completey done with the segment. +// int destroy_shmem(key_t key){ struct shmid_ds buf; int id, retval; @@ -427,7 +428,7 @@ int attach_shmem(key_t key, void** pp){ id = shmget(key, 0, 0); if (id < 0) { - perror("shmget"); + perror("shmget in attach_shmem"); return ERR_SHMGET; } p = shmat(id, 0, 0); diff --git a/sched/assimilator.C b/sched/assimilator.C index 2a7e0a1408..281e649a75 100644 --- a/sched/assimilator.C +++ b/sched/assimilator.C @@ -33,6 +33,7 @@ #include "boinc_db.h" #include "parse.h" #include "util.h" +#include "error_numbers.h" #include "str_util.h" #include "sched_config.h" @@ -83,7 +84,17 @@ bool do_pass(APP& app) { app.id, ASSIMILATE_READY, mod_clause, one_pass_N_WU ? one_pass_N_WU : 1000 ); - while (!wu.enumerate(buf)) { + while (1) { + retval = wu.enumerate(buf); + if (retval) { + if (retval != ERR_DB_NOT_FOUND) { + log_messages.printf(MSG_DEBUG, + "DB connection lost, exiting\n" + ); + exit(0); + } + break; + } vector results; // must be inside while()! // for testing purposes, pretend we did nothing diff --git a/sched/db_purge.C b/sched/db_purge.C index 805f3f10f2..6876d60263 100644 --- a/sched/db_purge.C +++ b/sched/db_purge.C @@ -475,7 +475,17 @@ bool do_pass() { } int n=0; - while (!wu.enumerate(buf)) { + while (1) { + retval = wu.enumerate(buf); + if (retval) { + if (retval != ERR_DB_NOT_FOUND) { + log_messages.printf(MSG_DEBUG, + "DB connection lost, exiting\n" + ); + exit(0); + } + break; + } if (strstr(wu.name, "nodelete")) continue; did_something = true; diff --git a/sched/feeder.C b/sched/feeder.C index c8cbdd9ce4..53b2b7bdaf 100644 --- a/sched/feeder.C +++ b/sched/feeder.C @@ -241,6 +241,16 @@ static bool get_job_from_db( retval = wi.enumerate(enum_size, select_clause, order_clause); } if (retval) { + if (retval != ERR_DB_NOT_FOUND) { + // If DB server dies, exit; + // so /start (run from crontab) will restart us eventually. + // + log_messages.printf(MSG_CRITICAL, + "DB connection lost, exiting\n" + ); + exit(0); + } + // we've reach the end of the result set // switch (enum_phase) { diff --git a/sched/file_deleter.C b/sched/file_deleter.C index 815ce113ff..57710fa70b 100644 --- a/sched/file_deleter.C +++ b/sched/file_deleter.C @@ -272,7 +272,17 @@ bool do_pass(bool retry_error) { clause, WUS_PER_ENUM ); - while (!wu.enumerate(buf)) { + while (1) { + retval = wu.enumerate(buf); + if (retval) { + if (retval != ERR_DB_NOT_FOUND) { + log_messages.printf(MSG_DEBUG, + "DB connection lost, exiting\n" + ); + exit(0); + } + break; + } did_something = true; retval = 0; diff --git a/sched/make_work.C b/sched/make_work.C index 25d9bf5221..c289019324 100644 --- a/sched/make_work.C +++ b/sched/make_work.C @@ -139,7 +139,7 @@ void make_new_wu(DB_WORKUNIT& original_wu, char* starting_xml, int start_time) { ); exit(retval); } - wu.id = boinc_db.insert_id(); + original_wu.id = boinc_db.insert_id(); log_messages.printf(MSG_DEBUG, "Created %s, clone of %s\n", wu.name, original_wu.name ); @@ -157,6 +157,9 @@ void wait_for_results(int wu_id) { sprintf(buf, "where workunitid=%d", wu_id); while (1) { retval = result.count(count, buf); + log_messages.printf(MSG_DEBUG, "result.count for %d returned %d, %d\n", + wu_id, count, retval + ); if (retval) { log_messages.printf(MSG_CRITICAL, "result.count: %d\n", retval); exit(1); diff --git a/sched/message_handler.C b/sched/message_handler.C index 1e21bb5d54..806cbe07bd 100644 --- a/sched/message_handler.C +++ b/sched/message_handler.C @@ -36,6 +36,7 @@ using namespace std; #include "boinc_db.h" #include "util.h" +#include "error_numbers.h" #include "str_util.h" #include "sched_config.h" @@ -76,7 +77,17 @@ bool do_message_scan() { int retval; sprintf(buf, "where handled=0"); - while (!mfh.enumerate(buf)) { + while (1) { + retval = mfh.enumerate(buf); + if (retval) { + if (retval != ERR_DB_NOT_FOUND) { + log_messages.printf(MSG_DEBUG, + "DB connection lost, exiting\n" + ); + exit(0); + } + break; + } retval = handle_message(mfh); if (!retval) { mfh.handled = true; diff --git a/sched/sched_array.C b/sched/sched_array.C index caa24e4790..a2a287487f 100644 --- a/sched/sched_array.C +++ b/sched/sched_array.C @@ -124,8 +124,9 @@ void scan_work_array(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) { // Find the app and best app_version for this host. // - found = get_app_version(sreq, reply, wu, app, avp); - if (!found) { + BEST_APP_VERSION* bavp; + bavp = get_app_version(sreq, reply, wu); + if (!bavp) { continue; } @@ -241,9 +242,7 @@ void scan_work_array(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) { goto done; } - retval = add_result_to_reply( - result, wu, sreq, reply, app, avp - ); + retval = add_result_to_reply(result, wu, sreq, reply, bavp); // add_result_to_reply() fails only in fairly pathological cases - // e.g. we couldn't update the DB record or modify XML fields. diff --git a/sched/sched_assign.C b/sched/sched_assign.C index 797411b540..49b623dafd 100644 --- a/sched/sched_assign.C +++ b/sched/sched_assign.C @@ -41,7 +41,7 @@ static int send_assigned_job( static int seqno=0; static R_RSA_PRIVATE_KEY key; APP* app; - APP_VERSION* avp; + BEST_APP_VERSION* bavp; if (first) { first = false; @@ -60,16 +60,9 @@ static int send_assigned_job( ); return retval; } - app = ssp->lookup_app(wu.appid); - if (!app) { - log_messages.printf(MSG_CRITICAL, - "app %d for assigned WU %d not found\n", - wu.appid, wu.id - ); - return ERR_NOT_FOUND; - } - bool found = get_app_version(request, reply, wu, app, avp); - if (!found) { + + bavp = get_app_version(request, reply, wu); + if (!bavp) { log_messages.printf(MSG_CRITICAL, "App version for assigned WU not found\n" ); @@ -88,7 +81,7 @@ static int send_assigned_job( int result_id = boinc_db.insert_id(); DB_RESULT result; retval = result.lookup_id(result_id); - add_result_to_reply(result, wu, request, reply, app, avp); + add_result_to_reply(result, wu, request, reply, bavp); // if this is a one-job assignment, fill in assignment.resultid // so that it doesn't get sent again diff --git a/sched/sched_locality.C b/sched/sched_locality.C index 62916a717d..8d050d61a2 100644 --- a/sched/sched_locality.C +++ b/sched/sched_locality.C @@ -276,15 +276,14 @@ static int possibly_send_result( DB_RESULT result2; int retval, count; char buf[256]; - APP* app; - APP_VERSION* avp; + BEST_APP_VERSION* bavp; retval = wu.lookup_id(result.workunitid); if (retval) return ERR_DB_NOT_FOUND; - bool found = get_app_version(sreq, reply, wu, app, avp); + bavp = get_app_version(sreq, reply, wu); - if (!found && anonymous(sreq.platforms.list[0])) { + if (!bavp && anonymous(sreq.platforms.list[0])) { char help_msg_buf[512]; sprintf(help_msg_buf, "To get more %s work, finish current work, stop BOINC, remove app_info.xml file, and restart.", @@ -295,12 +294,13 @@ static int possibly_send_result( reply.set_delay(DELAY_ANONYMOUS); } - if (!found) return ERR_NO_APP_VERSION; + if (!bavp) return ERR_NO_APP_VERSION; // wu_is_infeasible() returns the reason why the WU is not feasible; // INFEASIBLE_MEM, INFEASIBLE_DISK, INFEASIBLE_CPU. // see sched_send.h. // + APP* app = ssp->lookup_app(wu.appid); if (wu_is_infeasible(wu, sreq, reply, *app)) { return ERR_INSUFFICIENT_RESOURCE; } @@ -312,7 +312,7 @@ static int possibly_send_result( if (count > 0) return ERR_WU_USER_RULE; } - return add_result_to_reply(result, wu, sreq, reply, app, avp); + return add_result_to_reply(result, wu, sreq, reply, bavp); } // returns true if the work generator can not make more work for this diff --git a/sched/sched_plan.C b/sched/sched_plan.C index 78748af40b..97c3ec4120 100644 --- a/sched/sched_plan.C +++ b/sched/sched_plan.C @@ -27,8 +27,48 @@ // (you need to prevent that from being overwritten too) // In either case, put your version under source-code control, e.g. SVN +#include "sched_msgs.h" #include "sched_plan.h" -bool app_plan(HOST& host, char* plan_class, HOST_USAGE& hu) { +// return the number of usable CPUs, taking prefs into account. +// If prefs limit apply, set bounded to true. +// +static void get_ncpus(SCHEDULER_REQUEST& sreq, int& ncpus, bool& bounded) { + ncpus = sreq.host.p_ncpus; + bounded = false; + if (sreq.global_prefs.max_ncpus_pct && sreq.global_prefs.max_ncpus_pct < 100) { + bounded = true; + ncpus = (int)((ncpus*sreq.global_prefs.max_ncpus_pct)/100.); + } +} + +bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) { + // clients before 6.1.11 don't understand plan_class + // + int v = sreq.core_client_major_version*10000 + + sreq.core_client_minor_version*100 + + sreq.core_client_release; + if (v < 60111) return false; + if (!strcmp(plan_class, "mt")) { + // the following is for an app that can use anywhere + // from 1 to 64 threads, can control this exactly, + // and whose speedup is .95N + // (so a sequential app will be used if one is available) + // + int ncpus, nthreads; + bool bounded; + + get_ncpus(sreq, ncpus, bounded); + nthreads = ncpus; + if (nthreads > 64) nthreads = 64; + hu.avg_ncpus = nthreads; + hu.max_ncpus = nthreads; + sprintf(hu.cmdline, "--nthreads %d", nthreads); + hu.flops = 0.95*sreq.host.p_fpops*nthreads; + return true; + } + log_messages.printf(MSG_CRITICAL, + "Unknown plan class: %s\n", plan_class + ); return false; } diff --git a/sched/sched_plan.h b/sched/sched_plan.h index a252965cc1..fc21503b25 100644 --- a/sched/sched_plan.h +++ b/sched/sched_plan.h @@ -20,4 +20,4 @@ #include "boinc_db.h" #include "server_types.h" -extern bool app_plan(HOST&, char* plan_class, HOST_USAGE&); +extern bool app_plan(SCHEDULER_REQUEST&, char* plan_class, HOST_USAGE&); diff --git a/sched/sched_resend.C b/sched/sched_resend.C index 311ab97f82..bc9b0f052f 100644 --- a/sched/sched_resend.C +++ b/sched/sched_resend.C @@ -105,8 +105,7 @@ bool resend_lost_work(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) { bool did_any = false; int num_eligible_to_resend=0; int num_resent=0; - APP* app; - APP_VERSION* avp; + BEST_APP_VERSION* bavp; int retval; sprintf(buf, " where hostid=%d and server_state=%d ", @@ -139,8 +138,8 @@ bool resend_lost_work(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) { continue; } - found = get_app_version(sreq, reply, wu, app, avp); - if (!found) { + bavp = get_app_version(sreq, reply, wu); + if (!bavp) { log_messages.printf(MSG_CRITICAL, "[HOST#%d] no app version [RESULT#%d]\n", reply.host.id, result.id @@ -186,9 +185,7 @@ bool resend_lost_work(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) { USER_MESSAGE um(warning_msg, "high"); reply.insert_message(um); } else { - retval = add_result_to_reply( - result, wu, sreq, reply, app, avp - ); + retval = add_result_to_reply(result, wu, sreq, reply, bavp); if (retval) { log_messages.printf(MSG_CRITICAL, "[HOST#%d] failed to send [RESULT#%d]\n", diff --git a/sched/sched_send.C b/sched/sched_send.C index 7a91763fc8..49bdf00652 100644 --- a/sched/sched_send.C +++ b/sched/sched_send.C @@ -109,57 +109,56 @@ bool SCHEDULER_REQUEST::has_version(APP& app) { return false; } -// return the APP and the best APP_VERSION for the given host. -// return false if none +// return BEST_APP_VERSION for the given host, or NULL if none // // -bool get_app_version( - SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply, - WORKUNIT& wu, APP* &app, APP_VERSION* &avp +BEST_APP_VERSION* get_app_version( + SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply, WORKUNIT& wu ) { bool found; double flops; unsigned int i; int j; + BEST_APP_VERSION* bavp; - app = ssp->lookup_app(wu.appid); - if (!app) { - log_messages.printf(MSG_CRITICAL, "Can't find APP#%d\n", wu.appid); - return false; + // + // see if app is already in memoized array + // + for (i=0; iappid == wu.appid) { + if (!bavp->avp) return NULL; + return bavp; + } } + APP* app = ssp->lookup_app(wu.appid); + if (!app) { + log_messages.printf(MSG_CRITICAL, "app not found: %d\n", wu.appid); + return NULL; + } + + bavp = new BEST_APP_VERSION; + bavp->appid = wu.appid; if (anonymous(sreq.platforms.list[0])) { found = sreq.has_version(*app); if (!found) { log_messages.printf(MSG_DEBUG, "Didn't find anonymous app\n"); - return false; + bavp->avp = 0; + } else { + bavp->avp = (APP_VERSION*)1; // arbitrary nonzero value } - avp = NULL; - return true; + reply.wreq.best_app_versions.push_back(bavp); + return bavp; } - // see if app is already in memoized array - // - for (i=0; ihost_usage.flops = 0; + bavp->avp = NULL; for (i=0; inapp_versions; j++) { @@ -172,51 +171,45 @@ bool get_app_version( continue; } if (strlen(av.plan_class)) { - if (app_plan(reply.host, av.plan_class, host_usage)) { - flops = host_usage.flops; - } else { - flops = 0; + if (!app_plan(sreq, av.plan_class, host_usage)) { + continue; } } else { - flops = reply.host.p_fpops; + host_usage.init_seq(reply.host.p_fpops); } - if (flops > bav.host_usage.flops) { - bav.host_usage.flops = flops; - bav.avp = &av; + if (host_usage.flops > bavp->host_usage.flops) { + bavp->host_usage = host_usage; + bavp->avp = &av; } } } - if (bav.avp) { - reply.wreq.best_app_versions.push_back(bav); - avp = bav.avp; + reply.wreq.best_app_versions.push_back(bavp); + if (bavp->avp) { if (config.debug_version_select) { log_messages.printf(MSG_DEBUG, "Best version of app %s is %d (%f FLOPS)\n", - app->name, avp->id, bav.host_usage.flops + app->name, bavp->avp->id, bavp->host_usage.flops ); } - return true; - } - - // here if no app version exists - // - reply.wreq.best_app_versions.push_back(bav); - - if (config.debug_version_select) { - log_messages.printf(MSG_DEBUG, - "no app version available: APP#%d PLATFORM#%d min_version %d\n", - app->id, sreq.platforms.list[0]->id, app->min_version + } else { + // here if no app version exists + // + if (config.debug_version_select) { + log_messages.printf(MSG_DEBUG, + "no app version available: APP#%d PLATFORM#%d min_version %d\n", + app->id, sreq.platforms.list[0]->id, app->min_version + ); + } + char message[256]; + sprintf(message, + "%s is not available for your type of computer.", + app->user_friendly_name ); + USER_MESSAGE um(message, "high"); + reply.wreq.insert_no_work_message(um); + reply.wreq.no_app_version = true; } - char message[256]; - sprintf(message, - "%s is not available for your type of computer.", - app->user_friendly_name - ); - USER_MESSAGE um(message, "high"); - reply.wreq.insert_no_work_message(um); - reply.wreq.no_app_version = true; - return false; + return bavp; } static char* find_user_friendly_name(int appid) { @@ -711,11 +704,14 @@ bool app_core_compatible(WORK_REQ& wreq, APP_VERSION& av) { // Add the app and app_version to the reply also. // int add_wu_to_reply( - WORKUNIT& wu, SCHEDULER_REPLY& reply, APP* app, APP_VERSION* avp + WORKUNIT& wu, SCHEDULER_REPLY& reply, APP* app, BEST_APP_VERSION* bavp ) { int retval; WORKUNIT wu2, wu3; + APP_VERSION* avp = bavp->avp; + if (avp == (APP_VERSION*)1) avp = NULL; + // add the app, app_version, and workunit to the reply, // but only if they aren't already there // @@ -878,13 +874,14 @@ void SCHEDULER_REPLY::got_bad_result() { int add_result_to_reply( DB_RESULT& result, WORKUNIT& wu, SCHEDULER_REQUEST& request, - SCHEDULER_REPLY& reply, APP* app, APP_VERSION* avp + SCHEDULER_REPLY& reply, BEST_APP_VERSION* bavp ) { int retval; double wu_seconds_filled; bool resent_result = false; + APP* app = ssp->lookup_app(wu.appid); - retval = add_wu_to_reply(wu, reply, app, avp); + retval = add_wu_to_reply(wu, reply, app, bavp); if (retval) return retval; // in the scheduling locality case, @@ -996,11 +993,7 @@ int add_result_to_reply( ); return retval; } - if (avp) { - PLATFORM* pp = ssp->lookup_platform_id(avp->platformid); - strcpy(result.platform_name, pp->name); - result.version_num = avp->version_num; - } + result.bavp = bavp; reply.insert_result(result); reply.wreq.seconds_to_fill -= wu_seconds_filled; request.estimated_delay += wu_seconds_filled/effective_ncpus(reply.host); diff --git a/sched/sched_send.h b/sched/sched_send.h index d59a34803c..a5027c2684 100644 --- a/sched/sched_send.h +++ b/sched/sched_send.h @@ -21,13 +21,13 @@ extern void send_work(SCHEDULER_REQUEST&, SCHEDULER_REPLY&); extern int add_result_to_reply( DB_RESULT& result, WORKUNIT& wu, SCHEDULER_REQUEST&, SCHEDULER_REPLY&, - APP* app, APP_VERSION* avp + BEST_APP_VERSION* bavp ); extern bool anonymous(PLATFORM*); -extern bool get_app_version( - SCHEDULER_REQUEST&, SCHEDULER_REPLY&, WORKUNIT&, APP*&, APP_VERSION*& +extern BEST_APP_VERSION* get_app_version( + SCHEDULER_REQUEST&, SCHEDULER_REPLY&, WORKUNIT& ); extern bool app_core_compatible(WORK_REQ& wreq, APP_VERSION& av); diff --git a/sched/server_types.C b/sched/server_types.C index 0fc0e5d574..9d1cf44bfb 100644 --- a/sched/server_types.C +++ b/sched/server_types.C @@ -831,7 +831,7 @@ int APP::write(FILE* fout) { } int APP_VERSION::write(FILE* fout) { - char buf[LARGE_BLOB_SIZE], buf2[256]; + char buf[LARGE_BLOB_SIZE]; strcpy(buf, xml_doc); char* p = strstr(buf, ""); if (!p) { @@ -841,14 +841,18 @@ int APP_VERSION::write(FILE* fout) { *p = 0; fputs(buf, fout); PLATFORM* pp = ssp->lookup_platform_id(platformid); - sprintf(buf2, " %s\n", pp->name); - fputs(buf2, fout); + fprintf(fout, " %s\n", pp->name); + if (strlen(plan_class)) { + fprintf(fout, " %s\n", plan_class); + } fputs("\n", fout); return 0; } int RESULT::write_to_client(FILE* fout) { - char buf[LARGE_BLOB_SIZE], buf2[256]; + char buf[LARGE_BLOB_SIZE]; + unsigned int i; + strcpy(buf, xml_doc_in); char* p = strstr(buf, ""); if (!p) { @@ -858,15 +862,44 @@ int RESULT::write_to_client(FILE* fout) { *p = 0; fputs(buf, fout); - // platform name will be null in anonymous case; don't send - // - if (strlen(platform_name)) { - sprintf(buf2, + APP_VERSION* avp = bavp->avp; + if (avp == (APP_VERSION*)1) avp = NULL; + if (avp) { + PLATFORM* pp = ssp->lookup_platform_id(avp->platformid); + fprintf(fout, " %s\n" " %d\n", - platform_name, version_num + pp->name, avp->version_num ); - fputs(buf2, fout); + if (strlen(avp->plan_class)) { + fprintf(fout, + " %s\n" + " %f\n" + " %f\n" + " %f\n", + avp->plan_class, + bavp->host_usage.avg_ncpus, + bavp->host_usage.max_ncpus, + bavp->host_usage.flops + ); + if (strlen(bavp->host_usage.cmdline)) { + fprintf(fout, + " %s\n", + bavp->host_usage.cmdline + ); + } + for (i=0; ihost_usage.coprocs.coprocs.size(); i++) { + COPROC& cp = bavp->host_usage.coprocs.coprocs[i]; + fprintf(fout, + " \n" + " %s\n" + " %d\n" + " \n", + cp.name, + cp.count + ); + } + } } fputs("\n", fout); return 0; @@ -1062,6 +1095,7 @@ void GLOBAL_PREFS::parse(const char* buf, const char* venue) { if (parse_double(buf2, "", dtemp)) { ram_max_used_idle_frac = dtemp/100.; } + parse_double(buf2, "", max_ncpus_pct); } void GLOBAL_PREFS::defaults() { diff --git a/sched/server_types.h b/sched/server_types.h index 345e8f66f7..23e8725b13 100644 --- a/sched/server_types.h +++ b/sched/server_types.h @@ -73,9 +73,18 @@ struct USER_MESSAGE { struct HOST_USAGE { COPROCS coprocs; - double ncpus; + double avg_ncpus; + double max_ncpus; double flops; - char opaque[256]; + char cmdline[256]; + + void init_seq(double x) { + coprocs.coprocs.clear(); + avg_ncpus = 1; + max_ncpus = 1; + flops = x; + strcpy(cmdline, ""); + } }; // keep track of the best app_version for each app for this host @@ -107,7 +116,7 @@ struct WORK_REQ { RESOURCE bandwidth; std::vector no_work_messages; - std::vector best_app_versions; + std::vector best_app_versions; bool no_allowed_apps_available; bool excessive_work_buf; @@ -163,6 +172,7 @@ struct GLOBAL_PREFS { double work_buf_min_days; double ram_max_used_busy_frac; double ram_max_used_idle_frac; + double max_ncpus_pct; void parse(const char* buf, const char* venue); void defaults(); diff --git a/sched/validator.C b/sched/validator.C index 42b5cda66c..66b91752e1 100644 --- a/sched/validator.C +++ b/sched/validator.C @@ -586,7 +586,15 @@ bool do_validate_scan(APP& app) { wu_id_modulus, wu_id_remainder, items ); - if (retval) break; + if (retval) { + if (retval != ERR_DB_NOT_FOUND) { + log_messages.printf(MSG_DEBUG, + "DB connection lost, exiting\n" + ); + exit(0); + } + break; + } retval = handle_wu(validator, items); if (!retval) found = true; } diff --git a/tools/update_versions b/tools/update_versions index 070fb9cdd1..13229388c0 100755 --- a/tools/update_versions +++ b/tools/update_versions @@ -68,7 +68,7 @@ def add_files( assert(exec_files[0]) version_major, version_minor, platform_name, plan_class = match.groups() if plan_class: - plan_class = plan_class[1:] # drop leading : + plan_class = plan_class[2:] # drop leading __ version_num = int(version_major) * 100 + int(version_minor) file_base = os.path.basename(exec_files[0]) @@ -118,7 +118,7 @@ def add_files( def re_match_exec_filename(filepath): file = os.path.basename(filepath) - return re.match('[^.]+_([0-9]+)[.]([0-9]+)_([^.]+?(?:[0-9][0-9.]*[0-9])?)([:][^.]+)?(?:[.]gz|[.]exe|[.]sit|[.]msi)?$', file) + return re.match('[^.]+_([0-9]+)[.]([0-9]+)_([^.]+?(?:[0-9][0-9.]*[0-9])?)(__[^.]+)?(?:[.]gz|[.]exe|[.]sit|[.]msi)?$', file) def find_versions(app, dir): """Find application versions in DIR. diff --git a/version.h b/version.h index a2dd6ad10f..52d5a93f55 100644 --- a/version.h +++ b/version.h @@ -10,10 +10,10 @@ #define BOINC_MINOR_VERSION 1 /* Release part of BOINC version number */ -#define BOINC_RELEASE 10 +#define BOINC_RELEASE 11 /* String representation of BOINC version number */ -#define BOINC_VERSION_STRING "6.1.10" +#define BOINC_VERSION_STRING "6.1.11" #if (defined(_WIN32) || defined(__APPLE__)) /* Name of package */ @@ -26,13 +26,13 @@ #define PACKAGE_NAME "BOINC" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "BOINC 6.1.10" +#define PACKAGE_STRING "BOINC 6.1.11" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "boinc" /* Define to the version of this package. */ -#define PACKAGE_VERSION "6.1.10" +#define PACKAGE_VERSION "6.1.11" #endif /* #if (defined(_WIN32) || defined(__APPLE__)) */