*** empty log message ***

svn path=/trunk/boinc/; revision=6343
2005-06-14 20:31:15 +00:00 · 2005-06-14 20:31:15 +00:00 · 28cc1fb011
parent 917a5a4b5e
commit 28cc1fb011
14 changed files with 167 additions and 65 deletions
--- a/45
+++ b/45
@ -7801,3 +7801,48 @@ David  13 June 2005

    client/
        client_state.C
+
+David  14 June 2005
+    - core client: use "potentially runnable resource share"
+        instead of "total resource share" in several places:
+        - estimating how long a project's work will take
+        - how many CPUs a project should occupy
+        - the "resource share fraction" reported to schedulers
+
+        The idea: if a process is not potentially runnable
+        (e.g. because it's suspended by user)
+        it's not going to get CPU time,
+        so it shouldn't be included in resource share calculations.
+        (suggested by Jim Harris)
+    - rename CLIENT_STATE::ettprc() to time_until_work_done();
+        add a "subset resource share" parameter
+    - get rid of the global variable "trs" (total resource share)
+    - make proj_min_results() a member of CLIENT_STATE,
+        remove the "ncpus" param, and add a "subset resource share" param
+    - CLIENT_STATE::avg_proc_rate() now computes processing rate
+        across all projects
+    - Multiply by TIME_STATS::cpu_efficiency in avg_proc_rate()
+    - add functions runnable_resource_share() and
+        potentially_runnable_resource_share() to CLIENT_STATE
+    - set_cpu_scheduler_modes(): remove "* ncpus" from
+         if (frac_booked > MAX_CPU_LOAD_FACTOR * up_frac * ncpus)
+         (John: is this right?)
+
+    Note: in some cases (e.g. if the work-fetch policy has
+    decided to get work only for 1 or 2 project),
+    the set of projects used in resource share calculations
+    should be smaller than the potentially runnable ones.
+    It's not clear how to do this.
+
+    client/
+        client_state.C,h
+        cs_apps.C
+        cs_data.C
+        cs_scheduler.C
+
+David  14 June 2005
+    - code to get phys/virt memsize, NCPUs, OS name on HPUX
+        (from Lars Bausch)
+
+    client/
+        hostinfo_unix.C
--- a/client/client_state.C
+++ b/client/client_state.C
@ -1308,18 +1308,6 @@ int CLIENT_STATE::detach_project(PROJECT* project) {
    return 0;
 }

-double CLIENT_STATE::total_resource_share() {
-    unsigned int i;
-
-    double x = 0;
-    for (i=0; i<projects.size(); i++) {
-        if (!projects[i]->non_cpu_intensive ) {
-            x += projects[i]->resource_share;
-        }
-    }
-    return x;
-}
-
 int CLIENT_STATE::version() {
    return core_client_major_version*100 + core_client_minor_version;
 }
--- a/client/client_state.h
+++ b/client/client_state.h
@ -198,6 +198,8 @@ private:
    bool garbage_collect_always();
    bool update_results();
    double total_resource_share();
+    double runnable_resource_share();
+    double potentially_runnable_resource_share();

 // --------------- cs_account.C:
 public:
@ -300,10 +302,11 @@ private:
    PROJECT* next_project_sched_rpc_pending();
    //bool some_project_rpc_ok();
    bool scheduler_rpc_poll();
-    double ettprc(PROJECT*, int);
-    double avg_proc_rate(PROJECT*);
+    double time_until_work_done(PROJECT*, int, double);
+    double avg_proc_rate();
    bool should_get_work();
    bool no_work_for_a_cpu();
+    int proj_min_results(PROJECT*, double);
    void set_cpu_scheduler_modes();

 // --------------- cs_statefile.C:
--- a/client/cs_apps.C
+++ b/client/cs_apps.C
@ -383,14 +383,51 @@ bool CLIENT_STATE::schedule_earliest_deadline_result(double expected_pay_off) {
    return true;
 }

+// find total resource shares of all projects
+//
+double CLIENT_STATE::total_resource_share() {
+    double x = 0;
+    for (unsigned int i=0; i<projects.size(); i++) {
+        if (!projects[i]->non_cpu_intensive ) {
+            x += projects[i]->resource_share;
+        }
+    }
+    return x;
+}
+
+// same, but only runnable projects (can use CPU right now)
+//
+double CLIENT_STATE::runnable_resource_share() {
+    double x = 0;
+    for (unsigned int i=0; i<projects.size(); i++) {
+        PROJECT* p = projects[i];
+        if (p->runnable()) {
+            x += p->resource_share;
+        }
+    }
+    return x;
+}
+
+// same, but potentially runnable (could ask for work right now)
+//
+double CLIENT_STATE::potentially_runnable_resource_share() {
+    double x = 0;
+    for (unsigned int i=0; i<projects.size(); i++) {
+        PROJECT* p = projects[i];
+        if (p->potentially_runnable()) {
+            x += p->resource_share;
+        }
+    }
+    return x;
+}
+
 // adjust project debts (short, long-term)
 //
 void CLIENT_STATE::adjust_debts() {
    unsigned int i;
    bool first = true;
    double total_long_term_debt = 0;
-    double potentially_runnable_resource_share = 0;
-    double runnable_resource_share = 0;
+    double prrs, rrs;
    int count_cpu_intensive = 0;
    PROJECT *p;
    double min_short_term_debt=0, share_frac;
@ -424,17 +461,8 @@ void CLIENT_STATE::adjust_debts() {
        total_wall_cpu_time_this_period, total_cpu_time_this_period
    );

-    // find total resource shares of runnable and potentially runnable projects
-    //
-    for (i=0; i<projects.size(); ++i) {
-        p = projects[i];
-        if (p->runnable()) {
-            runnable_resource_share += p->resource_share;
-        }
-        if (p->potentially_runnable()) {
-            potentially_runnable_resource_share += p->resource_share;
-        }
-    }
+    rrs = runnable_resource_share();
+    prrs = potentially_runnable_resource_share();

    for (i=0; i<projects.size(); i++) {
        p = projects[i];
@ -444,7 +472,7 @@ void CLIENT_STATE::adjust_debts() {
        // adjust long-term debts
        //
        if (p->potentially_runnable()) {
-            share_frac = p->resource_share/potentially_runnable_resource_share;
+            share_frac = p->resource_share/prrs;
            p->long_term_debt += share_frac*total_wall_cpu_time_this_period
                - p->wall_cpu_time_this_period
            ;
@ -457,7 +485,7 @@ void CLIENT_STATE::adjust_debts() {
            p->short_term_debt = 0;
            p->anticipated_debt = 0;
        } else {
-            share_frac = p->resource_share/runnable_resource_share;
+            share_frac = p->resource_share/rrs;
            p->short_term_debt += share_frac*total_wall_cpu_time_this_period
                - p->wall_cpu_time_this_period
            ;
--- a/client/cs_data.C
+++ b/client/cs_data.C
@ -469,12 +469,7 @@ double CLIENT_STATE::offender(PROJECT* p) {
 // from all other projects
 //
 double CLIENT_STATE::compute_resource_share(PROJECT *p) {
-    double total_resource_share = 0;
-
-    for (unsigned int i=0; i<projects.size(); ++i) {
-        total_resource_share += projects[i]->resource_share;
-    }
-    return p->resource_share/total_resource_share;
+    return p->resource_share/total_resource_share();
 }

 // Computes the size of the allowed disk share in number of bytes.
--- a/client/cs_scheduler.C
+++ b/client/cs_scheduler.C
@ -53,7 +53,6 @@ using std::max;
 using std::vector;
 using std::string;

-static double trs;
 // quantities like avg CPU time decay by a factor of e every week
 //
 #define EXP_DECAY_RATE  (1./(SECONDS_PER_DAY*7))
@ -71,9 +70,16 @@ const int SECONDS_BEFORE_REPORTING_MIN_RPC_TIME_AGAIN = 60*60;
 //
 #define MAX_CPU_LOAD_FACTOR 0.8

-static int proj_min_results(PROJECT* p, int ncpus) {
-    return p->non_cpu_intensive ? 1 : (int)(ceil(ncpus*p->resource_share/trs));
+// how many CPUs should this project occupy on average,
+// based on its resource share relative to a given set
+//
+int CLIENT_STATE::proj_min_results(PROJECT* p, double subset_resource_share) {
+    if (p->non_cpu_intensive) {
+        return 1;
+    }
+    return (int)(ceil(ncpus*p->resource_share/subset_resource_share));
 }
+
 void PROJECT::set_min_rpc_time(double future_time) {
    if (future_time > min_rpc_time) {
        min_rpc_time = future_time;
@ -146,6 +152,7 @@ PROJECT* CLIENT_STATE::next_project_need_work() {
    PROJECT *p, *p_prospect = NULL;
    double work_on_prospect=0;
    unsigned int i;
+    double prrs = potentially_runnable_resource_share();

    for (i=0; i<projects.size(); ++i) {
        p = projects[i];
@ -162,7 +169,7 @@ PROJECT* CLIENT_STATE::next_project_need_work() {
            }
        }

-        double work_on_current = ettprc(p, 0);
+        double work_on_current = time_until_work_done (p, 0, prrs);
        if (p_prospect) {
            if (p->work_request_urgency == WORK_FETCH_OK && 
                p_prospect->work_request_urgency > WORK_FETCH_OK
@ -202,7 +209,7 @@ int CLIENT_STATE::make_scheduler_request(PROJECT* p) {
    double free, possible;
 #endif

-    trs = total_resource_share();
+    double prrs = potentially_runnable_resource_share();

    if (!f) return ERR_FOPEN;
    mf.init_file(f);
@ -224,8 +231,8 @@ int CLIENT_STATE::make_scheduler_request(PROJECT* p) {
        core_client_major_version,
        core_client_minor_version,
        p->work_request,
-        p->resource_share / trs,
-        ettprc(p, proj_min_results(p, ncpus)-1)
+        p->resource_share / prrs,
+        time_until_work_done(p, proj_min_results(p, prrs)-1, prrs)
    );
    if (p->anonymous_platform) {
        fprintf(f, "    <app_versions>\n");
@ -410,21 +417,23 @@ bool CLIENT_STATE::some_project_rpc_ok() {
 #endif

 // return the expected number of CPU seconds completed by the client
-// for project p in a second of wall-clock time.
+// in a second of wall-clock time.
 // May be > 1 on a multiprocessor.
 //
-double CLIENT_STATE::avg_proc_rate(PROJECT *p) {
+double CLIENT_STATE::avg_proc_rate() {
    double running_frac = time_stats.on_frac * time_stats.active_frac;
    if (running_frac < 0.1) running_frac = 0.1;
    if (running_frac > 1) running_frac = 1;
-    return (p ? (p->resource_share / trs) : 1) * ncpus * running_frac;
+    return ncpus*running_frac*time_stats.cpu_efficiency;
 }

-// "estimated time to project result count"
-// return the estimated wall-clock time until the
-// number of results for project p will reach k
+// estimate wall-clock time until the number of uncompleted results
+// for project p will reach k,
+// given the total resource share of a set of competing projects
 //
-double CLIENT_STATE::ettprc(PROJECT *p, int k) {
+double CLIENT_STATE::time_until_work_done(
+    PROJECT *p, int k, double subset_resource_share
+) {
    int num_results_to_skip = k;
    double est = 0;
    
@ -453,7 +462,7 @@ double CLIENT_STATE::ettprc(PROJECT *p, int k) {
            est += rp->estimated_cpu_time_remaining();
        }
    }
-    double apr = avg_proc_rate(p);
+    double apr = avg_proc_rate()*p->resource_share/subset_resource_share;
    return est/apr;
 }

@ -467,6 +476,7 @@ int CLIENT_STATE::compute_work_requests() {
    unsigned int i;
    double work_min_period = global_prefs.work_buf_min_days * SECONDS_PER_DAY;
    double global_work_need = work_needed_secs();
+    double prrs;

    SCOPE_MSG_LOG scope_messages(log_messages, CLIENT_MSG_LOG::DEBUG_SCHED_CPU);

@ -502,7 +512,7 @@ int CLIENT_STATE::compute_work_requests() {
        max_fetch = 1.0;
    }

-    trs = total_resource_share();
+    prrs = potentially_runnable_resource_share();

    // for each project, compute
    // min_results = min # of results for project needed by CPU scheduling,
@ -521,8 +531,8 @@ int CLIENT_STATE::compute_work_requests() {
        //
        if ((p->long_term_debt < -global_prefs.cpu_scheduling_period_minutes * 60) && (overall_work_fetch_urgency != WORK_FETCH_NEED_IMMEDIATELY)) continue;

-        int min_results = proj_min_results(p, ncpus);
-        double estimated_time_to_starvation = ettprc(p, min_results-1);
+        int min_results = proj_min_results(p, prrs);
+        double estimated_time_to_starvation = time_until_work_done(p, min_results-1, prrs);

        // determine project urgency
        //
@ -994,7 +1004,7 @@ void CLIENT_STATE::set_cpu_scheduler_modes() {
    double frac_booked = 0;
    std::vector <double> booked_to;
    std::map<double, RESULT*>::iterator it;
-    double up_frac = avg_proc_rate(0);
+    double apr = avg_proc_rate();

    SCOPE_MSG_LOG scope_messages(log_messages, CLIENT_MSG_LOG::DEBUG_SCHED_CPU);

@ -1035,7 +1045,7 @@ void CLIENT_STATE::set_cpu_scheduler_modes() {

        // Are the deadlines too tight to meet reliably?
        //
-        if (booked_to[lowest_booked_cpu] - gstate.now > (rp->report_deadline - gstate.now) * MAX_CPU_LOAD_FACTOR * (up_frac / ncpus)) {
+        if (booked_to[lowest_booked_cpu] - gstate.now > (rp->report_deadline - gstate.now) * MAX_CPU_LOAD_FACTOR * (apr / ncpus)) {
            should_not_fetch_work = true;
            use_earliest_deadline_first = true;
            scope_messages.printf(
@ -1063,7 +1073,7 @@ void CLIENT_STATE::set_cpu_scheduler_modes() {
        frac_booked += rp->estimated_cpu_time_remaining() / (rp->report_deadline - gstate.now);
    }

-    if (frac_booked > MAX_CPU_LOAD_FACTOR * up_frac * ncpus) {
+    if (frac_booked > MAX_CPU_LOAD_FACTOR * apr) {
        should_not_fetch_work = true;
        scope_messages.printf(
            "CLIENT_STATE::compute_work_requests(): Nearly overcommitted.\n"
@ -1105,7 +1115,7 @@ double CLIENT_STATE::work_needed_secs() {
        if (results[i]->project->non_cpu_intensive) continue;
        total_work += results[i]->estimated_cpu_time_remaining();
    }
-    double x = global_prefs.work_buf_min_days*SECONDS_PER_DAY*avg_proc_rate(0) - total_work;
+    double x = global_prefs.work_buf_min_days*SECONDS_PER_DAY*avg_proc_rate() - total_work;
    if (x < 0) {
        return 0;
    }
--- a/client/hostinfo_unix.C
+++ b/client/hostinfo_unix.C
@ -85,6 +85,10 @@ extern "C" {
 NXEventHandle gEventHandle;
 #endif  // __APPLE__

+#ifdef _HPUX_SOURCE
+#include <sys/pstat.h>
+#endif
+
 // functions to get name/addr of local host

 // Converts a int ip address to a string representation (i.e. "66.218.71.198")
@ -388,6 +392,10 @@ int HOST_INFO::get_host_info() {
    mib[1] = HW_NCPU;
    len = sizeof(p_ncpus);
    sysctl(mib, 2, &p_ncpus, &len, NULL, 0);
+#elif defined(_HPUX_SOURCE)
+    struct pst_dynamic psd; 
+    pstat_getdynamic ( &psd, sizeof ( psd ), (size_t)1, 0 );
+    p_ncpus = psd.psd_proc_cnt;
 #else
 #error Need to specify a sysconf() define to obtain number of processors
 #endif
@ -409,6 +417,10 @@ int HOST_INFO::get_host_info() {
    len = sizeof(mem_size);
    sysctl(mib, 2, &mem_size, &len, NULL, 0);    // Mac OS X
    m_nbytes = mem_size;
+#elif defined(_HPUX_SOURCE)
+    struct pst_static pst; 
+    pstat_getstatic(&pst, sizeof(pst), (size_t)1, 0);
+    m_nbytes = (pst.physical_memory * pst.page_size);
 #else
 #error Need to specify a sysconf() define to obtain memory size
 #endif
@ -455,6 +467,10 @@ int HOST_INFO::get_host_info() {
    if (!sysctl(mib, 2, &vm_info, &len, NULL, 0)) {
        m_swap = 1024. * getpagesize() * (double) vm_info.t_vm;
    }
+#elif defined(_HPUX_SOURCE)
+    struct pst_vminfo vminfo;
+    pstat_getvminfo(&vminfo, sizeof(vminfo), (size_t)1, 0);
+    m_swap = (vminfo.psv_swapspc_max * pst.page_size);
 #else
 #endif

@ -467,6 +483,7 @@ int HOST_INFO::get_host_info() {
 #ifdef SI_PLATFORM
    sysinfo(SI_PLATFORM, p_vendor, sizeof(p_vendor));
 #endif
+
 #ifdef SI_ISALIST
    sysinfo(SI_ISALIST, p_model, sizeof(p_model));
    for (unsigned int i=0; i<sizeof(p_model); i++) {
@ -490,6 +507,10 @@ int HOST_INFO::get_host_info() {
    uname(&u);
    safe_strcpy(os_name, u.sysname);
    safe_strcpy(os_version, u.release);
+#ifdef _HPUX_SOURCE
+    safe_strcpy(p_model, u.machine);
+    safe_strcpy(p_vendor, "Hewlett-Packard");
+#endif
 #elif defined(HAVE_SYS_SYSCTL_H) && defined(CTL_KERN) && defined(KERN_OSTYPE) && defined(KERN_OSRELEASE)
    mib[0] = CTL_KERN;
    mib[1] = KERN_OSTYPE;
--- a/doc/acct_mgt.php
+++ b/doc/acct_mgt.php
@ -214,6 +214,7 @@ list_item("input", "account_key");
 list_item("output",
 	html_text("<am_get_info_reply>
    <success/>
+    <id>ID</id>
    <name>NAME</name>
    <country>COUNTRY</country>
    <postal_code>POSTAL_CODE</postal_code>
--- a/doc/contact.php
+++ b/doc/contact.php
@ -45,6 +45,7 @@ show_name("Bruce Allen");
 show_name("David Anderson");
 show_name("Noaa Avital");
 show_name("Don Bashford");
+show_name("Lars Bausch");
 show_name("Brian Boshes");
 show_name("Jens Breitbart");
 show_name("Tim Brown");
--- a/doc/db_dump.php
+++ b/doc/db_dump.php
@ -10,8 +10,8 @@ This data can be imported and used to produce
 web sites that show statistics and leaderboards
 for one or more BOINC projects.
 Examples of such sites are listed at
-<a href=http://setiweb.ssl.berkeley.edu/sah/stats.php>
-http://setiweb.ssl.berkeley.edu/sah/stats.php</a>.
+<a href=http://setiathome.berkeley.edu/stats.php>
+http://setiathome.berkeley.edu/stats.php</a>.

 <p>
 Statistics data is exported in XML-format files.
@ -19,9 +19,12 @@ XML schemas for these files are
 <a href=BOINCSchemas/>here</a>,
 and a graphical representation is
 <a href=BOINCSchemas/documentation/>here</a>.
+NOTE: these are both somewhat out of date.
+Just look at the actual files - they're pretty self-explanatory.
 <p>
 These files are contained in a download directory,
-linked to from the project's web site.
+linked to from the project's web site
+(generally X/stats/, where X is the project URL).
 A project can decide what data to export,
 and how it is divided into files.
 This is described by a file <b>db_dump.xml</b> of the following form:
--- a/doc/links.php
+++ b/doc/links.php
@ -77,6 +77,7 @@ language("French", array(
    site("http://boinc-quebec.org", "boinc-quebec.org")
 ));
 language("German", array(
+    site("http://www.crunching-family.com/", "Crunching Family"),
    site("http://www.boinc-halle-saale.de.vu", "BOINC@Halle/Saale"),
    site("http://www.boincfun.tk/", "BOINCfun"),
    site("http://www.setigermany.de", "SETI.Germany"),
--- a/doc/tools_work.php
+++ b/doc/tools_work.php
@ -154,7 +154,8 @@ to the download directory hierarchy.
 Workunits can be created using either a script
 (using the <code>create_work</code> program)
 or a program (using the <code>create_work()</code> function).
-The input files must already be in the download directory.
+The input files must already be in the download hierarchy.
+
 <p>
 The utility program is
 <pre>
--- a/doc/work_distribution.php
+++ b/doc/work_distribution.php
@ -25,10 +25,14 @@ results of the same workunit,
 in an attempt to obtain unearned credit
 or have erroneous results accepted as correct.
 </ul>
+
+
 Work distribution is constrained by a number of rules:
 <ul>
 <li> A result is sent only if an application version
 is available for the host's platform.
+If the application's min_version field is nonzero,
+the version number must at list this value.
 <li>
 A result is not sent if its disk or memory requirements
 are not met by the host.
@ -53,9 +57,6 @@ A result is not sent if
 <a href=homogeneous_redundancy.php>homogeneous redundancy</a>
 is enabled and another result of the same workunit
 has been sent to a different type of host.
-<li>
-No results are sent of the core client has a different
-major version than the scheduling server.
 </ul>

 In general, the BOINC scheduler responds to a work request
--- a/tools/backend_lib.C
+++ b/tools/backend_lib.C
@ -188,12 +188,16 @@ static int process_wu_template(
                        config.uldl_dir_fanout, true,
 						path, true
                    );
+
+                    // if file isn't found in hierarchy,
+                    // look for it at top level and copy
+                    //
                    if (!boinc_file_exists(path)) {
                        sprintf(top_download_path,
                            "%s/%s",config.download_dir,
                            infiles[file_number]
                        );
-                        boinc_copy(top_download_path,path);
+                        boinc_copy(top_download_path, path);
                    }

                    if (!config.cache_md5_info || !got_md5_info(path, md5, &nbytes)) {