- scheduler: app_plan() no longer has to guess how efficiently

an app version will run on a particular host. - scheduler: fix memory leak: BEST_APP_VERSIONs weren't being freed svn path=/trunk/boinc/; revision=21148
2010-04-08 18:27:27 +00:00 · 2010-04-08 18:27:27 +00:00 · 85e06afe4b
parent 1cfd7e85a8
commit 85e06afe4b
6 changed files with 99 additions and 111 deletions
--- a/11
+++ b/11
@ -2650,3 +2650,14 @@ Rom    8 Apr 2010
    win_build/installerv2/redist/Windows/x64/
        boinccas.dll
        boinccas95.dll
+
+David  7 Apr 2010
+    - scheduler: app_plan() no longer has to guess how efficiently
+        an app version will run on a particular host.
+    - scheduler: fix memory leak: BEST_APP_VERSIONs weren't being freed
+
+    sched/
+        sched_customize.cpp
+        sched_types.cpp
+        sched_version.cpp
+        sched_send.cpp
--- a/doc/index.php
+++ b/doc/index.php
@ -213,6 +213,7 @@ function language_form() {
        ."<option value=de>Deutsch (German)"
        ."<option value=en>English"
        ."<option value=es>Espa&ntilde;ol (Spanish)"
+        ."<option value=fi>Suomi (Finnish)"
        ."<option value=fr>Fran&ccedil;ais (French)"
        ."<option value=el>Ελληνικά (Greek)"
        ."<option value=hu>Magyar (Hungarian)"
--- a/sched/sched_customize.cpp
+++ b/sched/sched_customize.cpp
@ -99,24 +99,19 @@ bool wu_is_infeasible_custom(WORKUNIT& wu, APP& app, BEST_APP_VERSION& bav) {
 bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) {
    char buf[256];
    if (!strcmp(plan_class, "mt")) {
-        // the following is for an app that:
-        // - can use from 1 to 64 threads, and can control this exactly
-        // - if it uses N threads, will use .65N cores on average
-        // (hence on a uniprocessor we'll use a sequential app
-        // if one is available)
+        // the following is for an app that
+        // can use anywhere from 1 to 64 threads
        //
        double ncpus = g_wreq->effective_ncpus;
            // number of usable CPUs, taking user prefs into account
-        int nthreads = (int)(ncpus/.65);
-        if (!nthreads) {
-            add_no_work_message("Your computer has too few CPUs");
-            return false;
-        }
+        int nthreads = (int)ncpus;
        if (nthreads > 64) nthreads = 64;
-        hu.avg_ncpus = nthreads*.65;
+        hu.avg_ncpus = nthreads;
        hu.max_ncpus = nthreads;
        sprintf(hu.cmdline, "--nthreads %d", nthreads);
-        hu.flops = sreq.host.p_fpops*hu.avg_ncpus;
+        hu.flops = sreq.host.p_fpops*hu.avg_ncpus*.99;
+            // the .99 ensures that on uniprocessors a sequential app
+            // will be used in preferences to this
        if (config.debug_version_select) {
            log_messages.printf(MSG_NORMAL,
                "[version] %s Multi-thread app estimate %.2f GFLOPS\n",
@ -246,35 +241,22 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) {
        }
        hu.gpu_ram = 200*MEGA;

-        double cpu_frac;    // the fraction of the app's FLOPS that are
-                            // performed by the CPU
-                            // (GPU is assumed to be idle then)
-        double gpu_effic;   // when the app is using the GPU,
-                            // fraction of GPU's peak FLOPS it gets
-
-#if 1
-        // the following for an app that runs 99% on the GPU
-        cpu_frac = .01;
-        gpu_effic = .25;
-#endif
-
-#if 0
-        // the following for SETI@home Astropulse
-        cpu_frac = .75;
-        gpu_effic = .25;
-#endif
-
-        double p = sreq.host.p_fpops;
-        double g = cp->peak_flops()/5;
-        hu.flops = p*g/(cpu_frac*g + (1-cpu_frac)*p);
-
-        double x = (cpu_frac*g)/(cpu_frac*g + (1-cpu_frac)*p);
-        hu.avg_ncpus = x;
-        hu.max_ncpus = x;
-
        hu.natis = 1;
        //hu.natis = .5;    // you can use a fractional GPU if you want

+        double cpu_frac;    // the fraction of the app's FLOPS that are
+                            // performed by the CPU
+
+        cpu_frac = .01;     // an app that runs 99% on the GPU
+        //cpu_frac = .75;   // for SETI@home Astropulse
+
+        double p = sreq.host.p_fpops;
+        double c = hu.natis*cp->peak_flops();
+        double x = (c*cpu_frac)/sreq.host.p_fpops;
+        hu.avg_ncpus = x;
+        hu.max_ncpus = x;
+        hu.flops = c*(1+cpu_frac);
+
        // determine priority among variants of ATI
        //   1. ati14
        //   2. ati13ati
@ -410,34 +392,23 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) {
        }
        hu.gpu_ram = min_ram - 16*MEGA;

-        double cpu_frac;    // the fraction of the app's FLOPS that are
-                            // performed by the CPU
-                            // (GPU is assumed to be idle then)
-        double gpu_effic;   // when the app is using the GPU,
-                            // fraction of GPU's peak FLOPS it gets
-
-#if 1
-        // the following for an app that runs 99% on the GPU
-        cpu_frac = .01;
-        gpu_effic = .25;
-#endif
-
-#if 0
-        cpu_frac = .75;
-        gpu_effic = .25;
-#endif
-
-        double p = sreq.host.p_fpops;
-        double g = cp->peak_flops()/5;
-        hu.flops = p*g/(cpu_frac*g + (1-cpu_frac)*p);
-
-        double x = (cpu_frac*g)/(cpu_frac*g + (1-cpu_frac)*p);
-        hu.avg_ncpus = x;
-        hu.max_ncpus = x;

        hu.ncudas = 1;
        //hu.ncudas = .5;    // you can use a fractional GPU if you want

+        double cpu_frac;    // the fraction of the app's FLOPS that are
+                            // performed by the CPU
+
+        cpu_frac = .01;     // an app that runs 99% on the GPU
+        //cpu_frac = .75;   // for SETI@home Astropulse
+
+        double p = sreq.host.p_fpops;
+        double c = hu.natis*cp->peak_flops();
+        double x = (c*cpu_frac)/sreq.host.p_fpops;
+        hu.avg_ncpus = x;
+        hu.max_ncpus = x;
+        hu.flops = c*(1+cpu_frac);
+
        if (!strcmp(plan_class, "cuda23")) {
            hu.flops *= 1.01;
        } else if (!strcmp(plan_class, "cuda_fermi")) {
--- a/sched/sched_send.cpp
+++ b/sched/sched_send.cpp
@ -85,7 +85,10 @@ const double MAX_REQ_SECS = (28*SECONDS_IN_DAY);
 const int MAX_GPUS = 8;
    // don't believe clients who claim they have more GPUs than this

-// get limits on #jobs per day and per RPC, on in progress
+// get limits on:
+// # jobs per day
+// # jobs per RPC
+// # jobs in progress
 //
 void WORK_REQ::get_job_limits() {
    int n;
@ -312,8 +315,7 @@ static inline void get_dcf() {
 // estimate the amount of real time to complete this WU,
 // taking into account active_frac etc.
 // Note: don't factor in resource_share_fraction.
-// The core client no longer necessarily does round-robin
-// across all projects.
+// The core client doesn't necessarily round-robin across all projects.
 //
 double estimate_duration(WORKUNIT& wu, BEST_APP_VERSION& bav) {
    double edu = estimate_duration_unscaled(wu, bav);
@ -780,7 +782,7 @@ int wu_is_infeasible_fast(

 // insert "text" right after "after" in the given buffer
 //
-int insert_after(char* buffer, const char* after, const char* text) {
+static int insert_after(char* buffer, const char* after, const char* text) {
    char* p;
    char temp[BLOB_SIZE];

@ -807,7 +809,7 @@ int insert_after(char* buffer, const char* after, const char* text) {
 // add elements to WU's xml_doc,
 // in preparation for sending it to a client
 //
-int insert_wu_tags(WORKUNIT& wu, APP& app) {
+static int insert_wu_tags(WORKUNIT& wu, APP& app) {
    char buf[BLOB_SIZE];

    sprintf(buf,
@ -827,10 +829,9 @@ int insert_wu_tags(WORKUNIT& wu, APP& app) {
    return insert_after(wu.xml_doc, "<workunit>\n", buf);
 }

-// add the given workunit to a reply.
-// Add the app and app_version to the reply also.
+// Add the given workunit, app, and app version to a reply.
 //
-int add_wu_to_reply(
+static int add_wu_to_reply(
    WORKUNIT& wu, SCHEDULER_REPLY& reply, APP* app, BEST_APP_VERSION* bavp
 ) {
    int retval;
@ -861,7 +862,7 @@ int add_wu_to_reply(
        }
    }

-    // add time estimate to reply
+    // modify the WU's xml_doc; add <name>, <rsc_*> etc.
    //
    wu2 = wu;       // make copy since we're going to modify its XML field
    retval = insert_wu_tags(wu2, *app);
@ -881,7 +882,9 @@ int add_wu_to_reply(
    return 0;
 }

-int insert_name_tags(RESULT& result, WORKUNIT const& wu) {
+// add <name> tags to result's xml_doc_in
+//
+static int insert_name_tags(RESULT& result, WORKUNIT const& wu) {
    char buf[256];
    int retval;

@ -894,7 +897,7 @@ int insert_name_tags(RESULT& result, WORKUNIT const& wu) {
    return 0;
 }

-int insert_deadline_tag(RESULT& result) {
+static int insert_deadline_tag(RESULT& result) {
    char buf[256];
    sprintf(buf, "<report_deadline>%d</report_deadline>\n", result.report_deadline);
    int retval = insert_after(result.xml_doc_in, "<result>\n", buf);
@ -1586,7 +1589,7 @@ void send_work() {
                    "[assign] [HOST#%d] sent assigned jobs\n", g_reply->host.id
                );
            }
-            return;
+            goto done;
        }
    }

@ -1638,6 +1641,7 @@ void send_work() {
        send_work_old();
    }

+done:
    retval = update_host_scale_times(ssp, g_reply->results, g_reply->host.id);
    if (retval) {
        log_messages.printf(MSG_CRITICAL,
--- a/sched/sched_types.h
+++ b/sched/sched_types.h
@ -193,7 +193,8 @@ struct WORK_REQ {
    RESOURCE bandwidth;

    std::vector<USER_MESSAGE> no_work_messages;
-    std::vector<BEST_APP_VERSION*> best_app_versions;
+    std::vector<BEST_APP_VERSION> best_app_versions;
+    std::vector<DB_HOST_APP_VERSION> host_app_versions;

    // various reasons for not sending jobs (used to explain why)
    //
@ -266,15 +267,16 @@ struct BEST_APP_VERSION {
    int appid;

    bool present;
+        // false means there's no usable version for this app

-    // populated if anonymous platform:
    CLIENT_APP_VERSION* cavp;
+        // populated if anonymous platform

-    // populated otherwise:
    APP_VERSION* avp;
+        // populated otherwise

-    // populated in either case:
    HOST_USAGE host_usage;
+        // populated in either case

    BEST_APP_VERSION() {
        present = false;
--- a/sched/sched_version.cpp
+++ b/sched/sched_version.cpp
@ -123,24 +123,23 @@ BEST_APP_VERSION* get_app_version(WORKUNIT& wu, bool check_req) {
    bool found;
    unsigned int i;
    int retval, j;
-    BEST_APP_VERSION* bavp;
    char message[256], buf[256];

    // see if app is already in memoized array
    //
-    std::vector<BEST_APP_VERSION*>::iterator bavi;
+    std::vector<BEST_APP_VERSION>::iterator bavi;
    bavi = g_wreq->best_app_versions.begin();
    while (bavi != g_wreq->best_app_versions.end()) {
-        bavp = *bavi;
-        if (bavp->appid == wu.appid) {
-            if (!bavp->present) return NULL;
+        BEST_APP_VERSION& bav = *bavi;
+        if (bav.appid == wu.appid) {
+            if (!bav.present) return NULL;

            // if we previously chose a CUDA app but don't need more CUDA work,
            // delete record, fall through, and find another version
            //
            if (check_req
                && g_wreq->rsc_spec_request
-                && bavp->host_usage.ncudas > 0
+                && bav.host_usage.ncudas > 0
                && !g_wreq->need_cuda()
            ) {
                if (config.debug_version_select) {
@ -155,7 +154,7 @@ BEST_APP_VERSION* get_app_version(WORKUNIT& wu, bool check_req) {
            // same, ATI
            if (check_req
                && g_wreq->rsc_spec_request
-                && bavp->host_usage.natis > 0
+                && bav.host_usage.natis > 0
                && !g_wreq->need_ati()
            ) {
                if (config.debug_version_select) {
@ -171,8 +170,8 @@ BEST_APP_VERSION* get_app_version(WORKUNIT& wu, bool check_req) {
            //
            if (check_req
                && g_wreq->rsc_spec_request
-                && !bavp->host_usage.ncudas
-                && !bavp->host_usage.natis
+                && !bav.host_usage.ncudas
+                && !bav.host_usage.natis
                && !g_wreq->need_cpu()
            ) {
                if (config.debug_version_select) {
@ -184,7 +183,7 @@ BEST_APP_VERSION* get_app_version(WORKUNIT& wu, bool check_req) {
                break;
            }

-            return bavp;
+            return &bav;
        }
        bavi++;
    }
@ -197,44 +196,44 @@ BEST_APP_VERSION* get_app_version(WORKUNIT& wu, bool check_req) {
        return NULL;
    }

-    bavp = new BEST_APP_VERSION;
-    bavp->appid = wu.appid;
+    BEST_APP_VERSION bav;
+    bav.appid = wu.appid;
    if (g_wreq->anonymous_platform) {
        CLIENT_APP_VERSION* cavp = get_app_version_anonymous(*app);
        if (!cavp) {
-            bavp->present = false;
+            bav.present = false;
        } else {
-            bavp->present = true;
+            bav.present = true;
            if (config.debug_version_select) {
                log_messages.printf(MSG_NORMAL,
                    "[version] Found anonymous platform app for %s: plan class %s\n",
                    app->name, cavp->plan_class
                );
            }
-            bavp->host_usage = cavp->host_usage;
+            bav.host_usage = cavp->host_usage;

            // if client didn't tell us about the app version,
            // assume it uses 1 CPU
            //
-            if (bavp->host_usage.flops == 0) {
-                bavp->host_usage.flops = g_reply->host.p_fpops;
+            if (bav.host_usage.flops == 0) {
+                bav.host_usage.flops = g_reply->host.p_fpops;
            }
-            if (bavp->host_usage.avg_ncpus == 0 && bavp->host_usage.ncudas == 0 && bavp->host_usage.natis == 0) {
-                bavp->host_usage.avg_ncpus = 1;
+            if (bav.host_usage.avg_ncpus == 0 && bav.host_usage.ncudas == 0 && bav.host_usage.natis == 0) {
+                bav.host_usage.avg_ncpus = 1;
            }
-            bavp->cavp = cavp;
+            bav.cavp = cavp;
        }
-        g_wreq->best_app_versions.push_back(bavp);
-        if (!bavp->present) return NULL;
-        return bavp;
+        g_wreq->best_app_versions.push_back(bav);
+        if (!bav.present) return NULL;
+        return &(g_wreq->best_app_versions.back());
    }

    // Go through the client's platforms.
    // Scan the app versions for each platform.
    // Find the one with highest expected FLOPS
    //
-    bavp->host_usage.flops = 0;
-    bavp->avp = NULL;
+    bav.host_usage.flops = 0;
+    bav.avp = NULL;
    bool no_version_for_platform = true;
    for (i=0; i<g_request->platforms.list.size(); i++) {
        PLATFORM* p = g_request->platforms.list[i];
@ -305,21 +304,21 @@ BEST_APP_VERSION* get_app_version(WORKUNIT& wu, bool check_req) {

            // pick the fastest version
            //
-            if (host_usage.flops > bavp->host_usage.flops) {
-                bavp->host_usage = host_usage;
-                bavp->avp = &av;
+            if (host_usage.flops > bav.host_usage.flops) {
+                bav.host_usage = host_usage;
+                bav.avp = &av;
            }
        }
    }
-    g_wreq->best_app_versions.push_back(bavp);
-    if (bavp->avp) {
+    g_wreq->best_app_versions.push_back(bav);
+    if (bav.avp) {
        if (config.debug_version_select) {
            log_messages.printf(MSG_NORMAL,
                "[version] Best version of app %s is ID %d (%.2f GFLOPS)\n",
-                app->name, bavp->avp->id, bavp->host_usage.flops/1e9
+                app->name, bav.avp->id, bav.host_usage.flops/1e9
            );
        }
-        bavp->present = true;
+        bav.present = true;
    } else {
        // Here if there's no app version we can use.
        //
@ -341,6 +340,6 @@ BEST_APP_VERSION* get_app_version(WORKUNIT& wu, bool check_req) {
        }
        return NULL;
    }
-    return bavp;
+    return &(g_wreq->best_app_versions.back());
 }