Changes for multithread app support:

- update_versions: use __ (not :) as separator for plan class - client: add plan_class to APP_VERSION; an app version is now identified by platform/version/plan_class - client CPU scheduler: don't assume apps use 1 CPU - client: add avg_ncpus, max_cpus, flops, cmdline to RESULT - scheduler: implement app planning scheme Other changes: - client: if symlink() fails, make a XML soft link instead (for Unix running off a FAT32 FS) - client: don't accept nonpositive resource share from AMS - daemons and DB: check for error returns from enumerations, and exit if so. Thus, if the MySQL server goes down, all the daemons will soon exit. The cron script will restart them every 5 min, so when the DB server comes back up so will the project. - web: show empty max CPU % as --- - API: get rid of all_threads_cpu_time option (always the case now) svn path=/trunk/boinc/; revision=14966
2008-03-27 18:25:29 +00:00 · 2008-03-27 18:25:29 +00:00 · 13400c9516
parent 0a180fdfbc
commit 13400c9516
43 changed files with 481 additions and 233 deletions
--- a/api/boinc_api.C
+++ b/api/boinc_api.C
@ -202,18 +202,13 @@ static int setup_shared_mem() {
    return 0;
 }

-// Return CPU time of worker thread (and optionally others)
-// This may be called from any thread
+// Return CPU time of process.
 //
 double boinc_worker_thread_cpu_time() {
    double cpu;
 #ifdef _WIN32
    int retval;
-    if (options.all_threads_cpu_time) {
-        retval = boinc_process_cpu_time(cpu);
-    } else {
-        retval = boinc_thread_cpu_time(worker_thread_handle, cpu);
-    }
+    retval = boinc_process_cpu_time(cpu);
    if (retval) {
        cpu = nrunning_ticks * TIMER_PERIOD;   // for Win9x
    }
--- a/api/boinc_api.h
+++ b/api/boinc_api.h
@ -50,9 +50,6 @@ typedef struct BOINC_OPTIONS {
        // if heartbeat fail, or get process control msg, take
        // direction action (exit, suspend, resume).
        // Otherwise just set flag in BOINC status
-    int all_threads_cpu_time;
-        // count the CPU time of all threads
-        // (for apps that have multiple worker threads)
    int worker_thread_stack_size;
        // if nonzero, the worker thread stack size limit
    int backwards_compatible_graphics;
@ -148,7 +145,6 @@ inline void boinc_options_defaults(BOINC_OPTIONS& b) {
    b.handle_process_control = 1;
    b.send_status_msgs = 1;
    b.direct_process_action = 1;
-    b.all_threads_cpu_time = 0;
    b.worker_thread_stack_size = 0;
    b.backwards_compatible_graphics = 1;
 }
--- a/78
+++ b/78
@ -2556,13 +2556,15 @@ David  Mar 19 2008
        sched_send.C

 Charlie Mar 20 2008
-    - Client: fix  a compiler warning which indicated a real logic error (variable
-        used uninitialized).
-    - Mac: More work on backtrace code: run atos utility via a bidirectional pipe 
-        instead of backtrace_symbols_fd() API to get better symbols in backtrace.  
-        Set visibility of all variables in Client and Manager back to hidden (as 
-        before) to reduce size of executables, since atos utility can use hidden 
-        symbols.  This also involves return to previous wxWidgets build script.
+    - Client: fix  a compiler warning which indicated a real logic error
+        (variable used uninitialized).
+    - Mac: More work on backtrace code: run atos utility via a bidirectional
+        pipe instead of backtrace_symbols_fd() API to get better symbols
+        in backtrace.  
+        Set visibility of all variables in Client and Manager back to hidden
+        (as before) to reduce size of executables,
+        since atos utility can use hidden symbols.
+        This also involves return to previous wxWidgets build script.
       
    client/
        app_start.C
@ -2641,3 +2643,65 @@ Rom    Mar 27 2008
        boinccas.dll
        boinccas95.dll

+David  Mar 27 2008
+    Changes for multithread app support:
+
+    - update_versions: use __ (not :) as separator for plan class
+    - client: add plan_class to APP_VERSION;
+        an app version is now identified by platform/version/plan_class
+    - client CPU scheduler: don't assume apps use 1 CPU
+    - client: add avg_ncpus, max_cpus, flops, cmdline to RESULT
+    - scheduler: implement app planning scheme
+
+    Other changes:
+
+    - client: if symlink() fails, make a XML soft link instead
+        (for Unix running off a FAT32 FS)
+    - client: don't accept nonpositive resource share from AMS
+    - daemons and DB: check for error returns from enumerations,
+        and exit if so.  Thus, if the MySQL server goes down,
+        all the daemons will soon exit.
+        The cron script will restart them every 5 min,
+        so when the DB server comes back up so will the project.
+    - web: show empty max CPU % as ---
+    - API: get rid of all_threads_cpu_time option (always the case now)
+
+    api/
+        boinc_api.C,h
+    client/
+        acct_mgr.C
+        app.C,h
+        app_start.C
+        client_state.C,h
+        client_types.C,h
+        cpu_sched.C
+        cs_scheduler.C
+        cs_statefile.C
+    configure.ac
+    db/
+        boinc_db.C,h
+    html/inc/
+        countries.inc
+        prefs.inc
+    lib/
+        app_ipc.C,h
+        error_numbers.h
+        shmem.C
+    sched/
+        assimilator.C
+        db_purge.C
+        feeder.C
+        file_deleter.C
+        make_work.C
+        message_handler.C
+        sched_array.C
+        sched_assign.C
+        sched_locality.C
+        sched_plan.C,h
+        sched_resend.C
+        sched_send.C,h
+        server_types.C,h
+        validator.C
+    tools/
+        update_versions
+    version.h
--- a/client/acct_mgr.C
+++ b/client/acct_mgr.C
@ -234,7 +234,13 @@ int AM_ACCOUNT::parse(XML_PARSER& xp) {
            continue;
        }
        if (xp.parse_double(tag, "resource_share", dtemp)) {
-            resource_share.set(dtemp);
+            if (dtemp > 0) {
+                resource_share.set(dtemp);
+            } else {
+                msg_printf(NULL, MSG_INFO,
+                    "Resource share out of range: %f", dtemp
+                );
+            }
            continue;
        }
        if (log_flags.unparsed_xml) {
--- a/client/app.C
+++ b/client/app.C
@ -112,7 +112,6 @@ ACTIVE_TASK::ACTIVE_TASK() {
    too_large = false;
    needs_shmem = false;
    want_network = 0;
-    nthreads = 1;
    memset(&procinfo, 0, sizeof(procinfo));
 #ifdef _WIN32
    pid_handle = 0;
@ -554,7 +553,8 @@ int ACTIVE_TASK::parse(MIOFILE& fin) {

            wup = result->wup;
            app_version = gstate.lookup_app_version(
-                result->app, result->platform, result->version_num
+                result->app, result->platform, result->version_num,
+                result->plan_class
            );
            if (!app_version) {
                msg_printf(
--- a/client/app.h
+++ b/client/app.h
@ -109,8 +109,6 @@ public:
    double abort_time;
        // when we sent an abort message to this app
        // kill it 5 seconds later if it doesn't exit
-    int nthreads;
-        // current # of threads in app (assumed to be 1 by default)
    APP_CLIENT_SHM app_client_shm;        // core/app shared mem
    MSG_QUEUE graphics_request_queue;
    MSG_QUEUE process_control_queue;
--- a/client/app_start.C
+++ b/client/app_start.C
@ -211,6 +211,19 @@ int ACTIVE_TASK::write_app_init_file() {
    return retval;
 }

+static int make_soft_link(PROJECT* project, char* link_path, char* rel_file_path) {
+    FILE *fp = boinc_fopen(link_path, "w");
+    if (!fp) {
+        msg_printf(project, MSG_INTERNAL_ERROR,
+            "Can't create link file %s", link_path
+        );
+        return ERR_FOPEN;
+    }
+    fprintf(fp, "<soft_link>%s</soft_link>\n", rel_file_path);
+    fclose(fp);
+    return 0;
+}
+
 // set up a file reference, given a slot dir and project dir.
 // This means:
 // 1) copy the file to slot dir, if reference is by copy
@ -251,23 +264,17 @@ static int setup_file(
    }

 #ifdef _WIN32
-    FILE *fp = boinc_fopen(link_path, "w");
-    if (!fp) {
-        msg_printf(project, MSG_INTERNAL_ERROR,
-            "Can't open link file %s", link_path
-        );
-        return ERR_FOPEN;
-    }
-    fprintf(fp, "<soft_link>%s</soft_link>\n", rel_file_path);
-    fclose(fp);
+    retval = make_soft_link(project, link_path, rel_file_path);
+    if (retval) return retval;
 #else
    retval = symlink(rel_file_path, link_path);
    if (retval) {
-        msg_printf(project, MSG_INTERNAL_ERROR,
-            "Can't symlink %s to %s: %d", rel_file_path, link_path, retval
-        );
-        perror("symlink");
-        return ERR_SYMLINK;
+        // A Unix system can't make symlinks if the filesystem if FAT32
+        // (e.g. external USB disk).
+        // Try making a soft link instead.
+        //
+        retval = make_soft_link(project, link_path, rel_file_path);
+        if (retval) return retval;
    }
 #endif
 #ifdef SANDBOX
@ -615,6 +622,10 @@ int ACTIVE_TASK::start(bool first_time) {
    argv[0] = exec_name;
    char cmdline[8192];
    strcpy(cmdline, wup->command_line.c_str());
+    if (strlen(result->cmdline)) {
+        strcat(cmdline, " ");
+        strcat(cmdline, result->cmdline);
+    }
    parse_command_line(cmdline, argv+1);
    if (log_flags.task_debug) {
        debug_print_argv(argv);
@ -749,6 +760,10 @@ int ACTIVE_TASK::start(bool first_time) {
 #endif
        char cmdline[8192];
        strcpy(cmdline, wup->command_line.c_str());
+        if (strlen(result->cmdline)) {
+            strcat(cmdline, " ");
+            strcat(cmdline, result->cmdline);
+        }
        sprintf(buf, "../../%s", exec_path );
        if (g_use_sandbox) {
            char switcher_path[100];
--- a/client/client_state.C
+++ b/client/client_state.C
@ -665,7 +665,7 @@ WORKUNIT* CLIENT_STATE::lookup_workunit(PROJECT* p, const char* name) {
 }

 APP_VERSION* CLIENT_STATE::lookup_app_version(
-    APP* app, char* platform, int version_num
+    APP* app, char* platform, int version_num, char* plan_class
 ) {
    for (unsigned int i=0; i<app_versions.size(); i++) {
        APP_VERSION* avp = app_versions[i];
@ -675,6 +675,7 @@ APP_VERSION* CLIENT_STATE::lookup_app_version(
            return avp;
        }
        if (strcmp(avp->platform, platform)) continue;
+        if (strcmp(avp->plan_class, plan_class)) continue;
        return avp;
    }
    return 0;
@ -722,10 +723,10 @@ int CLIENT_STATE::link_app_version(PROJECT* p, APP_VERSION* avp) {
    }
    avp->app = app;

-    if (lookup_app_version(app, avp->platform, avp->version_num)) {
+    if (lookup_app_version(app, avp->platform, avp->version_num, avp->plan_class)) {
        msg_printf(p, MSG_INTERNAL_ERROR,
-            "State file error: duplicate app version: %s %s %d",
-            avp->app_name, avp->platform, avp->version_num
+            "State file error: duplicate app version: %s %s %d %s",
+            avp->app_name, avp->platform, avp->version_num, avp->plan_class
        );
        return ERR_NOT_UNIQUE;
    }
--- a/client/client_state.h
+++ b/client/client_state.h
@ -222,7 +222,9 @@ public:
    FILE_INFO* lookup_file_info(PROJECT*, const char* name);
    RESULT* lookup_result(PROJECT*, const char*);
    WORKUNIT* lookup_workunit(PROJECT*, const char*);
-    APP_VERSION* lookup_app_version(APP*, char* platform, int ver);
+    APP_VERSION* lookup_app_version(
+        APP*, char* platform, int ver, char* plan_class
+    );
    int detach_project(PROJECT*);
    int report_result_error(RESULT&, const char *format, ...);
    int reset_project(PROJECT*, bool detaching);
@ -262,7 +264,7 @@ private:
    bool enforce_schedule();
    bool no_work_for_a_cpu();
    void rr_simulation();
-    void make_running_task_heap(vector<ACTIVE_TASK*>&);
+    void make_running_task_heap(vector<ACTIVE_TASK*>&, double&);
    void print_deadline_misses();
 public:
    double retry_shmem_time;
--- a/client/client_types.C
+++ b/client/client_types.C
@ -1079,6 +1079,7 @@ int APP_VERSION::parse(MIOFILE& in) {
    strcpy(api_version, "");
    version_num = 0;
    strcpy(platform, "");
+    strcpy(plan_class, "");
    app = NULL;
    project = NULL;
    while (in.fgets(buf, 256)) {
@ -1092,6 +1093,7 @@ int APP_VERSION::parse(MIOFILE& in) {
        if (parse_int(buf, "<version_num>", version_num)) continue;
        if (parse_str(buf, "<api_version>", api_version, sizeof(api_version))) continue;
        if (parse_str(buf, "<platform>", platform, sizeof(platform))) continue;
+        if (parse_str(buf, "<plan_class>", plan_class, sizeof(plan_class))) continue;
        if (log_flags.unparsed_xml) {
            msg_printf(0, MSG_INFO,
                "[unparsed_xml] APP_VERSION::parse(): unrecognized: %s\n", buf
@ -1114,6 +1116,9 @@ int APP_VERSION::write(MIOFILE& out) {
        version_num,
        platform
    );
+    if (strlen(plan_class)) {
+        out.printf("    <plan_class>%s</plan_class>\n", plan_class);
+    }
    if (strlen(api_version)) {
        out.printf("    <api_version>%s</api_version>\n", api_version);
    }
@ -1404,6 +1409,11 @@ void RESULT::clear() {
    project = NULL;
    version_num = 0;
    strcpy(platform, "");
+    strcpy(plan_class, "");
+    strcpy(cmdline, "");
+    avg_ncpus = 1;
+    max_ncpus = 1;
+    flops = gstate.host_info.p_fpops;
 }

 // parse a <result> element from scheduling server.
@ -1419,7 +1429,12 @@ int RESULT::parse_server(MIOFILE& in) {
        if (parse_str(buf, "<wu_name>", wu_name, sizeof(wu_name))) continue;
        if (parse_double(buf, "<report_deadline>", report_deadline)) continue;
        if (parse_str(buf, "<platform>", platform, sizeof(platform))) continue;
+        if (parse_str(buf, "<plan_class>", plan_class, sizeof(plan_class))) continue;
        if (parse_int(buf, "<version_num>", version_num)) continue;
+        if (parse_double(buf, "<avg_ncpus>", avg_ncpus)) continue;
+        if (parse_double(buf, "<max_ncpus>", max_ncpus)) continue;
+        if (parse_double(buf, "<flops>", flops)) continue;
+        if (parse_str(buf, "<cmdline>", cmdline, sizeof(cmdline))) continue;
        if (match_tag(buf, "<file_ref>")) {
            file_ref.parse(in);
            output_files.push_back(file_ref);
@ -1481,7 +1496,12 @@ int RESULT::parse_state(MIOFILE& in) {
        if (parse_double(buf, "<intops_per_cpu_sec>", intops_per_cpu_sec)) continue;
        if (parse_double(buf, "<intops_cumulative>", intops_cumulative)) continue;
        if (parse_str(buf, "<platform>", platform, sizeof(platform))) continue;
+        if (parse_str(buf, "<plan_class>", plan_class, sizeof(plan_class))) continue;
        if (parse_int(buf, "<version_num>", version_num)) continue;
+        if (parse_double(buf, "<avg_ncpus>", avg_ncpus)) continue;
+        if (parse_double(buf, "<max_ncpus>", max_ncpus)) continue;
+        if (parse_double(buf, "<flops>", flops)) continue;
+        if (parse_str(buf, "<cmdline>", cmdline, sizeof(cmdline))) continue;
        if (log_flags.unparsed_xml) {
            msg_printf(0, MSG_INFO,
                "[unparsed_xml] RESULT::parse(): unrecognized: %s\n", buf
@ -1503,14 +1523,26 @@ int RESULT::write(MIOFILE& out, bool to_server) {
        "    <exit_status>%d</exit_status>\n"
        "    <state>%d</state>\n"
        "    <platform>%s</platform>\n"
-        "    <version_num>%d</version_num>\n",
+        "    <version_num>%d</version_num>\n"
+        "    <avg_ncpus>%f</avg_ncpus>\n"
+        "    <max_ncpus>%f</max_ncpus>\n"
+        "    <flops>%f</flops>\n",
        name,
        final_cpu_time,
        exit_status,
        state(),
        platform,
-        version_num
+        version_num,
+        avg_ncpus,
+        max_ncpus,
+        flops
    );
+    if (strlen(plan_class)) {
+        out.printf("    <plan_class>%s</plan_class>\n", plan_class);
+    }
+    if (strlen(cmdline)) {
+        out.printf("    <cmdline>%s</cmdline>\n", cmdline);
+    }
    if (fpops_per_cpu_sec) {
        out.printf("    <fpops_per_cpu_sec>%f</fpops_per_cpu_sec>\n", fpops_per_cpu_sec);
    }
--- a/client/client_types.h
+++ b/client/client_types.h
@ -34,6 +34,7 @@

 #include "md5_file.h"
 #include "hostinfo.h"
+#include "coproc.h"
 #include "miofile.h"

 #define P_LOW 1
@ -404,6 +405,7 @@ struct APP_VERSION {
    char app_name[256];
    int version_num;
    char platform[256];
+    char plan_class[64];
    char api_version[16];
    APP* app;
    PROJECT* project;
@ -452,8 +454,14 @@ struct RESULT {
    char wu_name[256];
    double report_deadline;
    int version_num;        // identifies the app used
+    char plan_class[64];
    char platform[256];
+    char cmdline[256];      // additional cmdline args
    APP_VERSION* avp;
+    double avg_ncpus;
+    double max_ncpus;
+    double flops;
+    COPROCS coprocs;
    std::vector<FILE_REF> output_files;
    bool ready_to_report;
        // we're ready to report this result to the server;
--- a/client/cpu_sched.C
+++ b/client/cpu_sched.C
@ -513,9 +513,8 @@ void CLIENT_STATE::schedule_cpus() {
 #ifdef SIM
    if (!cpu_sched_rr_only) {
 #endif
-    int ncpus_used = 0;
-    //while (ncpus_used < ncpus) {
-    while ((int)ordered_scheduled_results.size() < ncpus) {
+    double ncpus_used = 0;
+    while (ncpus_used < ncpus) {
        rp = earliest_deadline_result();
        if (!rp) break;
        rp->already_selected = true;
@ -545,12 +544,8 @@ void CLIENT_STATE::schedule_cpus() {
                atp->needs_shmem = false;
            }
 			ram_left -= atp->procinfo.working_set_size_smoothed;
-            ncpus_used += atp->nthreads;
-		} else {
-            // if we haven't run the app yet, assume it has one thread
-            //
-            ncpus_used++;
        }
+        ncpus_used += rp->avg_ncpus;

        rp->project->anticipated_debt -= (rp->project->resource_share / rrs) * expected_pay_off;
        rp->project->deadlines_missed--;
@ -569,7 +564,7 @@ void CLIENT_STATE::schedule_cpus() {

    // Next, choose results from projects with large debt
    //
-    while ((int)ordered_scheduled_results.size() < ncpus) {
+    while (ncpus_used < ncpus) {
        assign_results_to_projects();
        rp = largest_debt_project_best_result();
        if (!rp) break;
@ -599,6 +594,7 @@ void CLIENT_STATE::schedule_cpus() {
            }
 			ram_left -= atp->procinfo.working_set_size_smoothed;
 		}
+        ncpus_used += rp->avg_ncpus;
        double xx = (rp->project->resource_share / rrs) * expected_pay_off;
        rp->project->anticipated_debt -= xx;
        if (log_flags.cpu_sched_debug) {
@ -614,17 +610,19 @@ void CLIENT_STATE::schedule_cpus() {
 // make a list of preemptable tasks, ordered by their preemptability.
 //
 void CLIENT_STATE::make_running_task_heap(
-    vector<ACTIVE_TASK*> &running_tasks
+    vector<ACTIVE_TASK*> &running_tasks, double& ncpus_used
 ) {
    unsigned int i;
    ACTIVE_TASK* atp;

+    ncpus_used = 0;
    for (i=0; i<active_tasks.active_tasks.size(); i++) {
        atp = active_tasks.active_tasks[i];
        if (atp->result->project->non_cpu_intensive) continue;
        if (!atp->result->runnable()) continue;
        if (atp->scheduler_state != CPU_SCHED_SCHEDULED) continue;
        running_tasks.push_back(atp);
+        ncpus_used += atp->result->avg_ncpus;
    }

    std::make_heap(
@ -655,6 +653,7 @@ bool CLIENT_STATE::enforce_schedule() {
    vector<ACTIVE_TASK*> running_tasks;
 	static double last_time = 0;
    int retval;
+    double ncpus_used;

    // Do this when requested, and once a minute as a safety net
    //
@ -694,13 +693,15 @@ bool CLIENT_STATE::enforce_schedule() {

    // make heap of currently running tasks, ordered by preemptibility
    //
-    make_running_task_heap(running_tasks);
+    make_running_task_heap(running_tasks, ncpus_used);

    // if there are more running tasks than ncpus,
    // then mark the extras for preemption 
    //
-    while (running_tasks.size() > (unsigned int)ncpus) {
-        running_tasks[0]->next_scheduler_state = CPU_SCHED_PREEMPTED;
+    while (ncpus_used > ncpus) {
+        atp = running_tasks[0];
+        atp->next_scheduler_state = CPU_SCHED_PREEMPTED;
+        ncpus_used -= atp->result->avg_ncpus;
        std::pop_heap(
            running_tasks.begin(),
            running_tasks.end(),
@ -718,11 +719,6 @@ bool CLIENT_STATE::enforce_schedule() {
        );
    }

-    // keep track of how many tasks we plan on running
-    // (i.e. have next_scheduler_state = SCHEDULED)
-    //
-    int nrunning = (int)running_tasks.size();
-
    // Loop through the scheduled results
    //
    for (i=0; i<ordered_scheduled_results.size(); i++) {
@ -765,7 +761,7 @@ bool CLIENT_STATE::enforce_schedule() {
            if (atp->procinfo.working_set_size_smoothed > ram_left) {
                atp->next_scheduler_state = CPU_SCHED_PREEMPTED;
                atp->too_large = true;
-                nrunning--;
+                ncpus_used -= atp->result->avg_ncpus;
                if (log_flags.mem_usage_debug) {
                    msg_printf(rp->project, MSG_INFO,
                        "[mem_usage_debug] enforce: result %s can't continue, too big %.2fMB > %.2fMB",
@ -801,7 +797,7 @@ bool CLIENT_STATE::enforce_schedule() {
        // Preempt something if needed (and possible).
        //
        bool run_task = false;
-        bool need_to_preempt = (nrunning==ncpus) && running_tasks.size();
+        bool need_to_preempt = (ncpus_used >= ncpus) && running_tasks.size();
            // the 2nd half of the above is redundant
        if (need_to_preempt) {
            // examine the most preemptable task.
@ -821,7 +817,7 @@ bool CLIENT_STATE::enforce_schedule() {
                    rp->project->deadlines_missed--;
                }
                atp->next_scheduler_state = CPU_SCHED_PREEMPTED;
-                nrunning--;
+                ncpus_used -= atp->result->avg_ncpus;
                std::pop_heap(
                    running_tasks.begin(),
                    running_tasks.end(),
@ -849,14 +845,14 @@ bool CLIENT_STATE::enforce_schedule() {
        if (run_task) {
            atp = get_task(rp);
            atp->next_scheduler_state = CPU_SCHED_SCHEDULED;
-            nrunning++;
+            ncpus_used += rp->avg_ncpus;
            ram_left -= atp->procinfo.working_set_size_smoothed;
        }
    }
    if (log_flags.cpu_sched_debug) {
        msg_printf(0, MSG_INFO,
-            "[cpu_sched_debug] finished preempt loop, nrunning %d",
-            nrunning
+            "[cpu_sched_debug] finished preempt loop, ncpus_used %f",
+            ncpus_used
        );
    }

@ -879,16 +875,13 @@ bool CLIENT_STATE::enforce_schedule() {
        }
    }

-    if (log_flags.cpu_sched_debug && nrunning < ncpus) {
-        msg_printf(0, MSG_INFO, "[cpu_sched_debug] Some CPUs idle (%d<%d)",
-            nrunning, ncpus
-        );
-		request_work_fetch("CPUs idle");
-    }
-    if (log_flags.cpu_sched_debug && nrunning > ncpus) {
-        msg_printf(0, MSG_INFO, "[cpu_sched_debug] Too many tasks started (%d>%d)",
-            nrunning, ncpus
+    if (log_flags.cpu_sched_debug && ncpus_used < ncpus) {
+        msg_printf(0, MSG_INFO, "[cpu_sched_debug] using %f out of %d CPUs",
+            ncpus_used, ncpus
        );
+        if (ncpus_used < ncpus) {
+            request_work_fetch("CPUs idle");
+        }
    }

    // schedule new non CPU intensive tasks
--- a/client/cs_scheduler.C
+++ b/client/cs_scheduler.C
@ -660,7 +660,9 @@ int CLIENT_STATE::handle_scheduler_reply(
            }
        }
        APP* app = lookup_app(project, avpp.app_name);
-        APP_VERSION* avp = lookup_app_version(app, avpp.platform, avpp.version_num);
+        APP_VERSION* avp = lookup_app_version(
+            app, avpp.platform, avpp.version_num, avpp.plan_class
+        );
        if (avp) {
            // if we had download failures, clear them
            //
@ -713,7 +715,9 @@ int CLIENT_STATE::handle_scheduler_reply(
            strcpy(rp->platform, get_primary_platform());
            rp->version_num = latest_version(rp->wup->app, rp->platform);
        }
-        rp->avp = lookup_app_version(rp->wup->app, rp->platform, rp->version_num);
+        rp->avp = lookup_app_version(
+            rp->wup->app, rp->platform, rp->version_num, rp->plan_class
+        );
        if (!rp->avp) {
            msg_printf(project, MSG_INTERNAL_ERROR,
                "No app version for result: %s %d",
--- a/client/cs_statefile.C
+++ b/client/cs_statefile.C
@ -323,11 +323,13 @@ int CLIENT_STATE::parse_state_file() {
                strcpy(rp->platform, get_primary_platform());
                rp->version_num = latest_version(rp->wup->app, rp->platform);
            }
-            rp->avp = lookup_app_version(rp->wup->app, rp->platform, rp->version_num);
+            rp->avp = lookup_app_version(
+                rp->wup->app, rp->platform, rp->version_num, rp->plan_class
+            );
            if (!rp->avp) {
                msg_printf(project, MSG_INTERNAL_ERROR,
-                    "No app version for result: %s %d",
-                    rp->platform, rp->version_num
+                    "No app version for result: %s %d %s",
+                    rp->platform, rp->version_num, rp->plan_class
                );
                delete rp;
                continue;
--- a/configure.ac
+++ b/configure.ac
@ -9,7 +9,7 @@ dnl not sure exactly what the minimum version is (but 2.13 wont work)
 AC_PREREQ(2.57)

 dnl Set the BOINC version here.  You can also use the set-version script.
-AC_INIT(BOINC, 6.1.10)
+AC_INIT(BOINC, 6.1.11)

 AC_ARG_ENABLE(debug, 
    AS_HELP_STRING([--enable-debug],
--- a/db/boinc_db.C
+++ b/db/boinc_db.C
@ -1024,7 +1024,7 @@ int DB_TRANSITIONER_ITEM_SET::enumerate(
            mysql_free_result(cursor.rp);
            cursor.active = false;
            retval = mysql_errno(db->mysql);
-            if (retval) return retval;
+            if (retval) return ERR_DB_CONN_LOST;
            return ERR_DB_NOT_FOUND;
        }
        last_item.parse(row);
@ -1227,7 +1227,7 @@ int DB_VALIDATOR_ITEM_SET::enumerate(
            mysql_free_result(cursor.rp);
            cursor.active = false;
            retval = mysql_errno(db->mysql);
-            if (retval) return retval;
+            if (retval) return ERR_DB_CONN_LOST;
            return ERR_DB_NOT_FOUND;
        }
        last_item.parse(row);
@ -1368,7 +1368,7 @@ int DB_WORK_ITEM::enumerate(
        mysql_free_result(cursor.rp);
        cursor.active = false;
        retval = mysql_errno(db->mysql);
-        if (retval) return retval;
+        if (retval) return ERR_DB_CONN_LOST;
        return ERR_DB_NOT_FOUND;
    } else {
        parse(row);
@ -1415,7 +1415,7 @@ int DB_WORK_ITEM::enumerate_all(
        mysql_free_result(cursor.rp);
        cursor.active = false;
        retval = mysql_errno(db->mysql);
-        if (retval) return retval;
+        if (retval) return ERR_DB_CONN_LOST;
        return ERR_DB_NOT_FOUND;
    } else {
        parse(row);
@ -1463,7 +1463,7 @@ int DB_IN_PROGRESS_RESULT::enumerate(int hostid, const char* result_names) {
        mysql_free_result(cursor.rp);
        cursor.active = false;
        retval = mysql_errno(db->mysql);
-        if (retval) return retval;
+        if (retval) return ERR_DB_CONN_LOST;
        return ERR_DB_NOT_FOUND;
    } else {
        parse(row);
--- a/db/boinc_db.h
+++ b/db/boinc_db.h
@ -439,6 +439,8 @@ struct CREDITED_JOB {
 #define ASSIGN_USER     2
 #define ASSIGN_TEAM     3

+struct BEST_APP_VERSION;
+
 struct RESULT {
    int id;
    int create_time;
@ -484,7 +486,7 @@ struct RESULT {
    int units;      // used for granting credit by # of units processed
    int parse_from_client(FILE*);
    char platform_name[256];
-    int version_num;
+    BEST_APP_VERSION* bavp;
    void clear();
    int write_to_client(FILE*);
 };
--- a/doc/boinc_news.php
+++ b/doc/boinc_news.php
@ -1,6 +1,10 @@
 <?

 $project_news = array(
+array("Mar 20, 2008",
+    "Watch an excellent talk by CERN's Francois Grey,
+    <a href=http://www.liftconference.com/distributed-computing-distributed-thinking>From distributed computing to distributed thinking</a>."
+),
 array("Mar 5, 2008",
    "Read <a href=http://www.linuxinsider.com/story/Volunteer-Computing-and-the-Search-for-Big-Answers-61943.html>Volunteer Computing and the Search for Big Answers</a>, an article about BOINC and volunteer computing  on LinuxInsider.com."
 ),
--- a/html/inc/countries.inc
+++ b/html/inc/countries.inc
@ -252,7 +252,6 @@ function print_country_select($selected_country="None") {
    if ($selected_country=="None" and $geoip_country!=""){
        $selected_country=$geoip_country;
    }
-    echo "selected: $selected_country\n";

    $numCountries = count($countries);
    for ($i=0; $i<$numCountries; $i++) {
--- a/html/inc/prefs.inc
+++ b/html/inc/prefs.inc
@ -716,8 +716,10 @@ function prefs_show_global($prefs) {
    row2(START_END_DESC, $x);
    row2(LEAVE_APPS_IN_MEMORY_DESC, $prefs->leave_apps_in_memory?"yes":"no");
    row2(CPU_SCHEDULING_DESC, "$prefs->cpu_scheduling_period_minutes minutes");
-    row2(MAX_CPUS_DESC, "$prefs->max_cpus ".MAX_CPUS_DESC2);
-    row2(MAX_NCPUS_PCT_DESC, "$prefs->max_ncpus_pct ".MAX_NCPUS_PCT_DESC2);
+    $x = $prefs->max_cpus?$prefs->max_cpus:'---';
+    row2(MAX_CPUS_DESC, "$x ".MAX_CPUS_DESC2);
+    $x = $prefs->max_ncpus_pct?$prefs->max_ncpus_pct:'---';
+    row2(MAX_NCPUS_PCT_DESC, "$x ".MAX_NCPUS_PCT_DESC2);
    row2(USE_AT_MOST2, "$prefs->cpu_usage_limit ".CPU_USAGE_LIMIT_DESC2);
    row1(DISK_LIMIT_DESC);
    row2(USE_AT_MOST, "$prefs->disk_max_used_gb GB disk space");
@ -1289,7 +1291,7 @@ function prefs_global_parse_form(&$prefs) {
    if (!verify_numeric($start_hour, 0)) $error->start_hour = true;
    if (!verify_numeric($end_hour, 0)) $error->end_hour = true;
    if (!verify_numeric($cpu_scheduling_period_minutes, 1)) $error->cpu_scheduling_period_minutes = true;;
-    if (!verify_numeric($max_cpus, 1)) $error->max_cpus = true;
+    if (!verify_numeric($max_cpus, 0)) $error->max_cpus = true;
    if (!verify_numeric($max_ncpus_pct, 0, 100)) $error->max_ncpus_pct = true;
    if (!verify_numeric($cpu_usage_limit, 0, 100)) $error->cpu_usage_limit = true;
    if (!verify_numeric($disk_max_used_gb, 0)) $error->disk_max_used_gb = true;
--- a/lib/app_ipc.C
+++ b/lib/app_ipc.C
@ -228,7 +228,6 @@ int parse_init_data_file(FILE* f, APP_INIT_DATA& ai) {
        if (xp.parse_str(tag, "project_dir", ai.project_dir, sizeof(ai.project_dir))) continue;
        if (xp.parse_str(tag, "boinc_dir", ai.boinc_dir, sizeof(ai.boinc_dir))) continue;
        if (xp.parse_str(tag, "authenticator", ai.authenticator, sizeof(ai.authenticator))) continue;
-        if (xp.parse_str(tag, "opaque", ai.opaque, sizeof(ai.opaque))) continue;
        if (xp.parse_str(tag, "wu_name", ai.wu_name, sizeof(ai.wu_name))) continue;
 #ifdef _WIN32
        if (xp.parse_str(tag, "comm_obj_name", ai.shmem_seg_name, sizeof(ai.shmem_seg_name))) continue;
@ -250,7 +249,6 @@ int parse_init_data_file(FILE* f, APP_INIT_DATA& ai) {
        if (xp.parse_double(tag, "fraction_done_update_period", ai.fraction_done_update_period)) continue;
        if (xp.parse_double(tag, "fraction_done_start", ai.fraction_done_start)) continue;
        if (xp.parse_double(tag, "fraction_done_end", ai.fraction_done_end)) continue;
-        if (xp.parse_int(tag, "ncpus_available", ai.ncpus_available)) continue;
        xp.skip_unexpected(tag, true, "parse_init_data_file");
    }
    fprintf(stderr, "parse_init_data_file: no end tag\n");
--- a/lib/app_ipc.h
+++ b/lib/app_ipc.h
@ -68,10 +68,8 @@ struct SHARED_MEM {
        // <quit/>
        // <suspend/>
        // <resume/>
-        // <ncpus_available>
    MSG_CHANNEL process_control_reply;
        // app->core
-        // <nthreads>
    MSG_CHANNEL graphics_request;
        // core->app
        // request a graphics mode:
@ -166,7 +164,6 @@ struct APP_INIT_DATA {
    char boinc_dir[256];
    char wu_name[256];
    char authenticator[256];
-    char opaque[256];
    int slot;
    double user_total_credit;
    double user_expavg_credit;
@ -195,7 +192,6 @@ struct APP_INIT_DATA {
    SHMEM_SEG_NAME shmem_seg_name;
    double wu_cpu_time;       // cpu time from previous episodes
    double fraction_done_update_period;
-    int ncpus_available;

    APP_INIT_DATA();
    APP_INIT_DATA(const APP_INIT_DATA&);  // copy constructor
--- a/lib/error_numbers.h
+++ b/lib/error_numbers.h
@ -186,6 +186,7 @@
 #define ERR_RMDIR           -227
 #define ERR_CHILD_FAILED    -228
 #define ERR_SYMLINK         -229
+#define ERR_DB_CONN_LOST    -230

 // PLEASE: add a text description of your error to 
 // the text description function boincerror() in str_util.C.
--- a/lib/shmem.C
+++ b/lib/shmem.C
@ -268,8 +268,9 @@ int detach_shmem(void* p) {
 }

 #else
-// V6 mmap() shared memory for Unix/Linux/Mac

+// V6 mmap() shared memory for Unix/Linux/Mac
+//
 int create_shmem_mmap(char *path, size_t size, void** pp) {
    int fd, retval;
    struct stat sbuf;
@ -301,7 +302,6 @@ int create_shmem_mmap(char *path, size_t size, void** pp) {

    *pp = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd, 0);
    
-    // Now close the file. The kernel doesnÕt use our file descriptor.
    close(fd);

    if (*pp == MAP_FAILED) {
@ -331,7 +331,6 @@ int attach_shmem_mmap(char *path, void** pp) {

    *pp = mmap(NULL, sbuf.st_size, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd, 0);
    
-    // Now close the file. The kernel doesnÕt use our file descriptor.
    close(fd);

    if (*pp == MAP_FAILED) {
@ -349,6 +348,7 @@ int detach_shmem_mmap(void* p, size_t size) {


 // Compatibility routines for Unix/Linux/Mac V5 applications 
+//
 int create_shmem(key_t key, int size, gid_t gid, void** pp) {
    int id;
    
@ -402,6 +402,7 @@ int create_shmem(key_t key, int size, gid_t gid, void** pp) {
 // prevents any more processes from attaching (by clearing 
 // the key in the shared memory structure), so BOINC does it 
 // only after we are completey done with the segment.
+//
 int destroy_shmem(key_t key){
    struct shmid_ds buf;
    int id, retval;
@ -427,7 +428,7 @@ int attach_shmem(key_t key, void** pp){

    id = shmget(key, 0, 0);
    if (id < 0) {
-        perror("shmget");
+        perror("shmget in attach_shmem");
        return ERR_SHMGET;
    }
    p = shmat(id, 0, 0);
--- a/sched/assimilator.C
+++ b/sched/assimilator.C
@ -33,6 +33,7 @@
 #include "boinc_db.h"
 #include "parse.h"
 #include "util.h"
+#include "error_numbers.h"
 #include "str_util.h"

 #include "sched_config.h"
@ -83,7 +84,17 @@ bool do_pass(APP& app) {
        app.id, ASSIMILATE_READY, mod_clause,
        one_pass_N_WU ? one_pass_N_WU : 1000
    );
-    while (!wu.enumerate(buf)) {
+    while (1) {
+        retval = wu.enumerate(buf);
+        if (retval) {
+            if (retval != ERR_DB_NOT_FOUND) {
+                log_messages.printf(MSG_DEBUG,
+                    "DB connection lost, exiting\n"
+                );
+                exit(0);
+            }
+            break;
+        }
        vector<RESULT> results;     // must be inside while()!

        // for testing purposes, pretend we did nothing
--- a/sched/db_purge.C
+++ b/sched/db_purge.C
@ -475,7 +475,17 @@ bool do_pass() {
    }

    int n=0;
-    while (!wu.enumerate(buf)) {
+    while (1) {
+        retval = wu.enumerate(buf);
+        if (retval) {
+            if (retval != ERR_DB_NOT_FOUND) {
+                log_messages.printf(MSG_DEBUG,
+                    "DB connection lost, exiting\n"
+                );
+                exit(0);
+            }
+            break;
+        }
        if (strstr(wu.name, "nodelete")) continue;
        did_something = true;
        
--- a/sched/feeder.C
+++ b/sched/feeder.C
@ -241,6 +241,16 @@ static bool get_job_from_db(
            retval = wi.enumerate(enum_size, select_clause, order_clause);
        }
    	if (retval) {
+            if (retval != ERR_DB_NOT_FOUND) {
+                // If DB server dies, exit;
+                // so /start (run from crontab) will restart us eventually.
+                //
+                log_messages.printf(MSG_CRITICAL,
+                    "DB connection lost, exiting\n"
+                );
+                exit(0);
+            }
+
            // we've reach the end of the result set
            //
            switch (enum_phase) {
--- a/sched/file_deleter.C
+++ b/sched/file_deleter.C
@ -272,7 +272,17 @@ bool do_pass(bool retry_error) {
        clause, WUS_PER_ENUM
    );

-    while (!wu.enumerate(buf)) {
+    while (1) {
+        retval = wu.enumerate(buf);
+        if (retval) {
+            if (retval != ERR_DB_NOT_FOUND) {
+                log_messages.printf(MSG_DEBUG,
+                    "DB connection lost, exiting\n"
+                );
+                exit(0);
+            }
+            break;
+        }
        did_something = true;

        retval = 0;
--- a/sched/make_work.C
+++ b/sched/make_work.C
@ -139,7 +139,7 @@ void make_new_wu(DB_WORKUNIT& original_wu, char* starting_xml, int start_time) {
        );
        exit(retval);
    }
-    wu.id = boinc_db.insert_id();
+    original_wu.id = boinc_db.insert_id();
    log_messages.printf(MSG_DEBUG,
        "Created %s, clone of %s\n", wu.name, original_wu.name
    );
@ -157,6 +157,9 @@ void wait_for_results(int wu_id) {
    sprintf(buf, "where workunitid=%d", wu_id);
    while (1) {
        retval = result.count(count, buf);
+        log_messages.printf(MSG_DEBUG, "result.count for %d returned %d, %d\n",
+            wu_id, count, retval
+        );
        if (retval) {
            log_messages.printf(MSG_CRITICAL, "result.count: %d\n", retval);
            exit(1);
--- a/sched/message_handler.C
+++ b/sched/message_handler.C
@ -36,6 +36,7 @@ using namespace std;

 #include "boinc_db.h"
 #include "util.h"
+#include "error_numbers.h"
 #include "str_util.h"

 #include "sched_config.h"
@ -76,7 +77,17 @@ bool do_message_scan() {
    int retval;

    sprintf(buf, "where handled=0");
-    while (!mfh.enumerate(buf)) {
+    while (1) {
+        retval = mfh.enumerate(buf);
+        if (retval) {
+            if (retval != ERR_DB_NOT_FOUND) {
+                log_messages.printf(MSG_DEBUG,
+                    "DB connection lost, exiting\n"
+                );
+                exit(0);
+            }
+            break;
+        }
        retval = handle_message(mfh);
        if (!retval) {
            mfh.handled = true;
--- a/sched/sched_array.C
+++ b/sched/sched_array.C
@ -124,8 +124,9 @@ void scan_work_array(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) {

        // Find the app and best app_version for this host.
        //
-        found = get_app_version(sreq, reply, wu, app, avp);
-        if (!found) {
+        BEST_APP_VERSION* bavp;
+        bavp = get_app_version(sreq, reply, wu);
+        if (!bavp) {
            continue;
        }

@ -241,9 +242,7 @@ void scan_work_array(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) {
            goto done;
        }

-        retval = add_result_to_reply(
-            result, wu, sreq, reply, app, avp
-        );
+        retval = add_result_to_reply(result, wu, sreq, reply, bavp);

        // add_result_to_reply() fails only in fairly pathological cases -
        // e.g. we couldn't update the DB record or modify XML fields.
--- a/sched/sched_assign.C
+++ b/sched/sched_assign.C
@ -41,7 +41,7 @@ static int send_assigned_job(
    static int seqno=0;
    static R_RSA_PRIVATE_KEY key;
    APP* app;
-    APP_VERSION* avp;
+    BEST_APP_VERSION* bavp;
                                 
    if (first) {
        first = false;
@ -60,16 +60,9 @@ static int send_assigned_job(
        );
        return retval;
    }
-    app = ssp->lookup_app(wu.appid);
-    if (!app) {
-        log_messages.printf(MSG_CRITICAL,
-            "app %d for assigned WU %d not found\n",
-            wu.appid, wu.id
-        );
-        return ERR_NOT_FOUND;
-    }
-    bool found = get_app_version(request, reply, wu, app, avp);
-    if (!found) {
+
+    bavp = get_app_version(request, reply, wu);
+    if (!bavp) {
        log_messages.printf(MSG_CRITICAL,
            "App version for assigned WU not found\n"
        );
@ -88,7 +81,7 @@ static int send_assigned_job(
    int result_id = boinc_db.insert_id();
    DB_RESULT result;
    retval = result.lookup_id(result_id);
-    add_result_to_reply(result, wu, request, reply, app, avp);
+    add_result_to_reply(result, wu, request, reply, bavp);

    // if this is a one-job assignment, fill in assignment.resultid
    // so that it doesn't get sent again
--- a/sched/sched_locality.C
+++ b/sched/sched_locality.C
@ -276,15 +276,14 @@ static int possibly_send_result(
    DB_RESULT result2;
    int retval, count;
    char buf[256];
-    APP* app;
-    APP_VERSION* avp;
+    BEST_APP_VERSION* bavp;

    retval = wu.lookup_id(result.workunitid);
    if (retval) return ERR_DB_NOT_FOUND;

-    bool found = get_app_version(sreq, reply, wu, app, avp);
+    bavp = get_app_version(sreq, reply, wu);

-    if (!found && anonymous(sreq.platforms.list[0])) {
+    if (!bavp && anonymous(sreq.platforms.list[0])) {
        char help_msg_buf[512];
        sprintf(help_msg_buf,
            "To get more %s work, finish current work, stop BOINC, remove app_info.xml file, and restart.",
@ -295,12 +294,13 @@ static int possibly_send_result(
        reply.set_delay(DELAY_ANONYMOUS);
    }

-    if (!found) return ERR_NO_APP_VERSION;
+    if (!bavp) return ERR_NO_APP_VERSION;

    // wu_is_infeasible() returns the reason why the WU is not feasible;
    // INFEASIBLE_MEM, INFEASIBLE_DISK, INFEASIBLE_CPU.
    // see sched_send.h.
    // 
+    APP* app = ssp->lookup_app(wu.appid);
    if (wu_is_infeasible(wu, sreq, reply, *app)) {
        return ERR_INSUFFICIENT_RESOURCE;
    }
@ -312,7 +312,7 @@ static int possibly_send_result(
        if (count > 0) return ERR_WU_USER_RULE;
    }

-    return add_result_to_reply(result, wu, sreq, reply, app, avp);
+    return add_result_to_reply(result, wu, sreq, reply, bavp);
 }

 // returns true if the work generator can not make more work for this
--- a/sched/sched_plan.C
+++ b/sched/sched_plan.C
@ -27,8 +27,48 @@
 //    (you need to prevent that from being overwritten too)
 // In either case, put your version under source-code control, e.g. SVN

+#include "sched_msgs.h"
 #include "sched_plan.h"

-bool app_plan(HOST& host, char* plan_class, HOST_USAGE& hu) {
+// return the number of usable CPUs, taking prefs into account.
+// If prefs limit apply, set bounded to true.
+//
+static void get_ncpus(SCHEDULER_REQUEST& sreq, int& ncpus, bool& bounded) {
+    ncpus = sreq.host.p_ncpus;
+    bounded = false;
+    if (sreq.global_prefs.max_ncpus_pct && sreq.global_prefs.max_ncpus_pct < 100) {
+        bounded = true;
+        ncpus = (int)((ncpus*sreq.global_prefs.max_ncpus_pct)/100.);
+    }
+}
+
+bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) {
+    // clients before 6.1.11 don't understand plan_class
+    //
+    int v = sreq.core_client_major_version*10000
+        + sreq.core_client_minor_version*100
+        + sreq.core_client_release;
+    if (v < 60111) return false;
+    if (!strcmp(plan_class, "mt")) {
+        // the following is for an app that can use anywhere
+        // from 1 to 64 threads, can control this exactly,
+        // and whose speedup is .95N
+        // (so a sequential app will be used if one is available)
+        //
+        int ncpus, nthreads;
+        bool bounded;
+
+        get_ncpus(sreq, ncpus, bounded);
+        nthreads = ncpus;
+        if (nthreads > 64) nthreads = 64;
+        hu.avg_ncpus = nthreads;
+        hu.max_ncpus = nthreads;
+        sprintf(hu.cmdline, "--nthreads %d", nthreads);
+        hu.flops = 0.95*sreq.host.p_fpops*nthreads;
+        return true;
+    }
+    log_messages.printf(MSG_CRITICAL,
+        "Unknown plan class: %s\n", plan_class
+    );
    return false;
 }
--- a/sched/sched_plan.h
+++ b/sched/sched_plan.h
@ -20,4 +20,4 @@
 #include "boinc_db.h"
 #include "server_types.h"

-extern bool app_plan(HOST&, char* plan_class, HOST_USAGE&);
+extern bool app_plan(SCHEDULER_REQUEST&, char* plan_class, HOST_USAGE&);
--- a/sched/sched_resend.C
+++ b/sched/sched_resend.C
@ -105,8 +105,7 @@ bool resend_lost_work(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) {
    bool did_any = false;
    int num_eligible_to_resend=0;
    int num_resent=0;
-    APP* app;
-    APP_VERSION* avp;
+    BEST_APP_VERSION* bavp;
    int retval;

    sprintf(buf, " where hostid=%d and server_state=%d ",
@ -139,8 +138,8 @@ bool resend_lost_work(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) {
            continue;
        }

-        found = get_app_version(sreq, reply, wu, app, avp);
-        if (!found) {
+        bavp = get_app_version(sreq, reply, wu);
+        if (!bavp) {
            log_messages.printf(MSG_CRITICAL,
                "[HOST#%d] no app version [RESULT#%d]\n",
                reply.host.id, result.id
@ -186,9 +185,7 @@ bool resend_lost_work(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) {
            USER_MESSAGE um(warning_msg, "high");
            reply.insert_message(um);
        } else {
-            retval = add_result_to_reply(
-                result, wu, sreq, reply, app, avp
-            );
+            retval = add_result_to_reply(result, wu, sreq, reply, bavp);
            if (retval) {
                log_messages.printf(MSG_CRITICAL,
                    "[HOST#%d] failed to send [RESULT#%d]\n",
--- a/sched/sched_send.C
+++ b/sched/sched_send.C
@ -109,57 +109,56 @@ bool SCHEDULER_REQUEST::has_version(APP& app) {
    return false;
 }

-// return the APP and the best APP_VERSION for the given host.
-// return false if none
+// return BEST_APP_VERSION for the given host, or NULL if none
 //
 //
-bool get_app_version(
-    SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply,
-    WORKUNIT& wu, APP* &app, APP_VERSION* &avp
+BEST_APP_VERSION* get_app_version(
+    SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply, WORKUNIT& wu
 ) {
    bool found;
    double flops;
    unsigned int i;
    int j;
+    BEST_APP_VERSION* bavp;

-    app = ssp->lookup_app(wu.appid);
-    if (!app) {
-        log_messages.printf(MSG_CRITICAL, "Can't find APP#%d\n", wu.appid);
-        return false;
+    //
+    // see if app is already in memoized array
+    //
+    for (i=0; i<reply.wreq.best_app_versions.size(); i++) {
+        bavp = reply.wreq.best_app_versions[i];
+        if (bavp->appid == wu.appid) {
+            if (!bavp->avp) return NULL;
+            return bavp;
+        }
    }

+    APP* app = ssp->lookup_app(wu.appid);
+    if (!app) {
+        log_messages.printf(MSG_CRITICAL, "app not found: %d\n", wu.appid);
+        return NULL;
+    }
+
+    bavp = new BEST_APP_VERSION;
+    bavp->appid = wu.appid;
    if (anonymous(sreq.platforms.list[0])) {
        found = sreq.has_version(*app);
        if (!found) {
            log_messages.printf(MSG_DEBUG, "Didn't find anonymous app\n");
-            return false;
+            bavp->avp = 0;
+        } else {
+            bavp->avp = (APP_VERSION*)1;    // arbitrary nonzero value
        }
-        avp = NULL;
-        return true;
+        reply.wreq.best_app_versions.push_back(bavp);
+        return bavp;
    }

-    // see if app is already in memoized array
-    //
-    for (i=0; i<reply.wreq.best_app_versions.size(); i++) {
-        BEST_APP_VERSION& bav = reply.wreq.best_app_versions[i];
-        if (bav.appid == wu.appid) {
-            if (bav.avp) {
-                avp = bav.avp;
-                return true;
-            } else {
-                return false;
-            }
-        }
-    }

    // go through the client's platforms.
    // Scan the app versions for each platform.
    // Find the one with highest expected FLOPS
    //
-    BEST_APP_VERSION bav;
-    bav.appid = wu.appid;
-    bav.host_usage.flops = 0;
-    bav.avp = NULL;
+    bavp->host_usage.flops = 0;
+    bavp->avp = NULL;
    for (i=0; i<sreq.platforms.list.size(); i++) {
        PLATFORM* p = sreq.platforms.list[i];
        for (j=0; j<ssp->napp_versions; j++) {
@ -172,51 +171,45 @@ bool get_app_version(
                continue;
            }
            if (strlen(av.plan_class)) {
-                if (app_plan(reply.host, av.plan_class, host_usage)) {
-                    flops = host_usage.flops;
-                } else {
-                    flops = 0;
+                if (!app_plan(sreq, av.plan_class, host_usage)) {
+                    continue;
                }
            } else {
-                flops = reply.host.p_fpops;
+                host_usage.init_seq(reply.host.p_fpops);
            }
-            if (flops > bav.host_usage.flops) {
-                bav.host_usage.flops = flops;
-                bav.avp = &av;
+            if (host_usage.flops > bavp->host_usage.flops) {
+                bavp->host_usage = host_usage;
+                bavp->avp = &av;
            }
        }
    }
-    if (bav.avp) {
-        reply.wreq.best_app_versions.push_back(bav);
-        avp = bav.avp;
+    reply.wreq.best_app_versions.push_back(bavp);
+    if (bavp->avp) {
        if (config.debug_version_select) {
            log_messages.printf(MSG_DEBUG,
                "Best version of app %s is %d (%f FLOPS)\n",
-                app->name, avp->id, bav.host_usage.flops
+                app->name, bavp->avp->id, bavp->host_usage.flops
            );
        }
-        return true;
-    }
-
-    // here if no app version exists
-    //
-    reply.wreq.best_app_versions.push_back(bav);
-
-    if (config.debug_version_select) {
-        log_messages.printf(MSG_DEBUG,
-            "no app version available: APP#%d PLATFORM#%d min_version %d\n",
-            app->id, sreq.platforms.list[0]->id, app->min_version
+    } else {
+        // here if no app version exists
+        //
+        if (config.debug_version_select) {
+            log_messages.printf(MSG_DEBUG,
+                "no app version available: APP#%d PLATFORM#%d min_version %d\n",
+                app->id, sreq.platforms.list[0]->id, app->min_version
+            );
+        }
+        char message[256];
+        sprintf(message,
+            "%s is not available for your type of computer.",
+            app->user_friendly_name
        );
+        USER_MESSAGE um(message, "high");
+        reply.wreq.insert_no_work_message(um);
+        reply.wreq.no_app_version = true;
    }
-    char message[256];
-    sprintf(message,
-        "%s is not available for your type of computer.",
-        app->user_friendly_name
-    );
-    USER_MESSAGE um(message, "high");
-    reply.wreq.insert_no_work_message(um);
-    reply.wreq.no_app_version = true;
-    return false;
+    return bavp;
 }

 static char* find_user_friendly_name(int appid) {
@ -711,11 +704,14 @@ bool app_core_compatible(WORK_REQ& wreq, APP_VERSION& av) {
 // Add the app and app_version to the reply also.
 //
 int add_wu_to_reply(
-    WORKUNIT& wu, SCHEDULER_REPLY& reply, APP* app, APP_VERSION* avp
+    WORKUNIT& wu, SCHEDULER_REPLY& reply, APP* app, BEST_APP_VERSION* bavp
 ) {
    int retval;
    WORKUNIT wu2, wu3;
    
+    APP_VERSION* avp = bavp->avp;
+    if (avp == (APP_VERSION*)1) avp = NULL;
+
    // add the app, app_version, and workunit to the reply,
    // but only if they aren't already there
    //
@ -878,13 +874,14 @@ void SCHEDULER_REPLY::got_bad_result() {

 int add_result_to_reply(
    DB_RESULT& result, WORKUNIT& wu, SCHEDULER_REQUEST& request,
-    SCHEDULER_REPLY& reply, APP* app, APP_VERSION* avp
+    SCHEDULER_REPLY& reply, BEST_APP_VERSION* bavp
 ) {
    int retval;
    double wu_seconds_filled;
    bool resent_result = false;
+    APP* app = ssp->lookup_app(wu.appid);

-    retval = add_wu_to_reply(wu, reply, app, avp);
+    retval = add_wu_to_reply(wu, reply, app, bavp);
    if (retval) return retval;

    // in the scheduling locality case,
@ -996,11 +993,7 @@ int add_result_to_reply(
        );
        return retval;
    }
-    if (avp) {
-        PLATFORM* pp = ssp->lookup_platform_id(avp->platformid);
-        strcpy(result.platform_name, pp->name);
-        result.version_num = avp->version_num;
-    }
+    result.bavp = bavp;
    reply.insert_result(result);
    reply.wreq.seconds_to_fill -= wu_seconds_filled;
    request.estimated_delay += wu_seconds_filled/effective_ncpus(reply.host);
--- a/sched/sched_send.h
+++ b/sched/sched_send.h
@ -21,13 +21,13 @@ extern void send_work(SCHEDULER_REQUEST&, SCHEDULER_REPLY&);

 extern int add_result_to_reply(
    DB_RESULT& result, WORKUNIT& wu, SCHEDULER_REQUEST&, SCHEDULER_REPLY&,
-    APP* app, APP_VERSION* avp
+    BEST_APP_VERSION* bavp
 );

 extern bool anonymous(PLATFORM*);

-extern bool get_app_version(
-    SCHEDULER_REQUEST&, SCHEDULER_REPLY&, WORKUNIT&, APP*&, APP_VERSION*&
+extern BEST_APP_VERSION* get_app_version(
+    SCHEDULER_REQUEST&, SCHEDULER_REPLY&, WORKUNIT&
 );

 extern bool app_core_compatible(WORK_REQ& wreq, APP_VERSION& av);
--- a/sched/server_types.C
+++ b/sched/server_types.C
@ -831,7 +831,7 @@ int APP::write(FILE* fout) {
 }

 int APP_VERSION::write(FILE* fout) {
-    char buf[LARGE_BLOB_SIZE], buf2[256];
+    char buf[LARGE_BLOB_SIZE];
    strcpy(buf, xml_doc);
    char* p = strstr(buf, "</app_version>");
    if (!p) {
@ -841,14 +841,18 @@ int APP_VERSION::write(FILE* fout) {
    *p = 0;
    fputs(buf, fout);
    PLATFORM* pp = ssp->lookup_platform_id(platformid);
-    sprintf(buf2, "    <platform>%s</platform>\n", pp->name);
-    fputs(buf2, fout);
+    fprintf(fout, "    <platform>%s</platform>\n", pp->name);
+    if (strlen(plan_class)) {
+        fprintf(fout, "    <plan_class>%s</plan_class>\n", plan_class);
+    }
    fputs("</app_version>\n", fout);
    return 0;
 }

 int RESULT::write_to_client(FILE* fout) {
-    char buf[LARGE_BLOB_SIZE], buf2[256];
+    char buf[LARGE_BLOB_SIZE];
+    unsigned int i;
+
    strcpy(buf, xml_doc_in);
    char* p = strstr(buf, "</result>");
    if (!p) {
@ -858,15 +862,44 @@ int RESULT::write_to_client(FILE* fout) {
    *p = 0;
    fputs(buf, fout);

-    // platform name will be null in anonymous case; don't send
-    //
-    if (strlen(platform_name)) {
-        sprintf(buf2,
+    APP_VERSION* avp = bavp->avp;
+    if (avp == (APP_VERSION*)1) avp = NULL;
+    if (avp) {
+        PLATFORM* pp = ssp->lookup_platform_id(avp->platformid);
+        fprintf(fout,
            "    <platform>%s</platform>\n"
            "    <version_num>%d</version_num>\n",
-            platform_name, version_num
+            pp->name, avp->version_num
        );
-        fputs(buf2, fout);
+        if (strlen(avp->plan_class)) {
+            fprintf(fout,
+                "    <plan_class>%s</plan_class>\n"
+                "    <avg_ncpus>%f</avg_ncpus>\n"
+                "    <max_ncpus>%f</max_ncpus>\n"
+                "    <flops>%f</flops>\n",
+                avp->plan_class,
+                bavp->host_usage.avg_ncpus,
+                bavp->host_usage.max_ncpus,
+                bavp->host_usage.flops
+            );
+            if (strlen(bavp->host_usage.cmdline)) {
+                fprintf(fout,
+                    "    <cmdline>%s</cmdline>\n",
+                    bavp->host_usage.cmdline
+                );
+            }
+            for (i=0; i<bavp->host_usage.coprocs.coprocs.size(); i++) {
+                COPROC& cp = bavp->host_usage.coprocs.coprocs[i];
+                fprintf(fout,
+                    "    <coproc>\n"
+                    "        <name>%s</name>\n"
+                    "        <count>%d</count>\n"
+                    "    </coproc>\n",
+                    cp.name,
+                    cp.count
+                );
+            }
+        }
    }
    fputs("</result>\n", fout);
    return 0;
@ -1062,6 +1095,7 @@ void GLOBAL_PREFS::parse(const char* buf, const char* venue) {
    if (parse_double(buf2, "<ram_max_used_idle_pct>", dtemp)) {
        ram_max_used_idle_frac = dtemp/100.;
    }
+    parse_double(buf2, "<max_ncpus_pct>", max_ncpus_pct);
 }

 void GLOBAL_PREFS::defaults() {
--- a/sched/server_types.h
+++ b/sched/server_types.h
@ -73,9 +73,18 @@ struct USER_MESSAGE {

 struct HOST_USAGE {
    COPROCS coprocs;
-    double ncpus;
+    double avg_ncpus;
+    double max_ncpus;
    double flops;
-    char opaque[256];
+    char cmdline[256];
+
+    void init_seq(double x) {
+        coprocs.coprocs.clear();
+        avg_ncpus = 1;
+        max_ncpus = 1;
+        flops = x;
+        strcpy(cmdline, "");
+    }
 };

 // keep track of the best app_version for each app for this host
@ -107,7 +116,7 @@ struct WORK_REQ {
    RESOURCE bandwidth;

    std::vector<USER_MESSAGE> no_work_messages;
-    std::vector<BEST_APP_VERSION> best_app_versions;
+    std::vector<BEST_APP_VERSION*> best_app_versions;

    bool no_allowed_apps_available;
    bool excessive_work_buf;
@ -163,6 +172,7 @@ struct GLOBAL_PREFS {
    double work_buf_min_days;
    double ram_max_used_busy_frac;
    double ram_max_used_idle_frac;
+    double max_ncpus_pct;

    void parse(const char* buf, const char* venue);
    void defaults();
--- a/sched/validator.C
+++ b/sched/validator.C
@ -586,7 +586,15 @@ bool do_validate_scan(APP& app) {
            wu_id_modulus, wu_id_remainder,
            items
        );
-        if (retval) break;
+        if (retval) {
+            if (retval != ERR_DB_NOT_FOUND) {
+                log_messages.printf(MSG_DEBUG,
+                    "DB connection lost, exiting\n"
+                );
+                exit(0);
+            }
+            break;
+        }
        retval = handle_wu(validator, items);
        if (!retval) found = true;
    }
--- a/tools/update_versions
+++ b/tools/update_versions
@ -68,7 +68,7 @@ def add_files(
    assert(exec_files[0])
    version_major, version_minor, platform_name, plan_class = match.groups()
    if plan_class:
-        plan_class = plan_class[1:]      # drop leading :
+        plan_class = plan_class[2:]      # drop leading __
    version_num = int(version_major) * 100 + int(version_minor)

    file_base = os.path.basename(exec_files[0])
@ -118,7 +118,7 @@ def add_files(

 def re_match_exec_filename(filepath):
    file = os.path.basename(filepath)
-    return re.match('[^.]+_([0-9]+)[.]([0-9]+)_([^.]+?(?:[0-9][0-9.]*[0-9])?)([:][^.]+)?(?:[.]gz|[.]exe|[.]sit|[.]msi)?$', file)
+    return re.match('[^.]+_([0-9]+)[.]([0-9]+)_([^.]+?(?:[0-9][0-9.]*[0-9])?)(__[^.]+)?(?:[.]gz|[.]exe|[.]sit|[.]msi)?$', file)

 def find_versions(app, dir):
    """Find application versions in DIR.
--- a/version.h
+++ b/version.h
@ -10,10 +10,10 @@
 #define BOINC_MINOR_VERSION 1

 /* Release part of BOINC version number */
-#define BOINC_RELEASE 10
+#define BOINC_RELEASE 11

 /* String representation of BOINC version number */
-#define BOINC_VERSION_STRING "6.1.10"
+#define BOINC_VERSION_STRING "6.1.11"

 #if (defined(_WIN32) || defined(__APPLE__))
 /* Name of package */
@ -26,13 +26,13 @@
 #define PACKAGE_NAME "BOINC"

 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "BOINC 6.1.10"
+#define PACKAGE_STRING "BOINC 6.1.11"

 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "boinc"

 /* Define to the version of this package. */
-#define PACKAGE_VERSION "6.1.10"
+#define PACKAGE_VERSION "6.1.11"

 #endif /* #if (defined(_WIN32) || defined(__APPLE__)) */