*** empty log message ***

svn path=/trunk/boinc/; revision=6081
2005-05-09 04:49:56 +00:00 · 2005-05-09 04:49:56 +00:00 · 9cd02523b2
parent e7b6066a77
commit 9cd02523b2
8 changed files with 120 additions and 37 deletions
--- a/8
+++ b/8
@ -6339,3 +6339,11 @@ Janus  8 May 2005
            payment_success.php (new)
            payment_process.php (new)

+David  8 May 2005
+    - work fetch and CPU scheduling fixes (from John McLeod)
+
+    client/
+        client_state.C,h
+        client_types.C,h
+        cs_scheduler.C
+        scheduler_op.C,h
--- a/client/client_state.C
+++ b/client/client_state.C
@ -1319,7 +1319,7 @@ double CLIENT_STATE::total_resource_share() {

    double x = 0;
    for (i=0; i<projects.size(); i++) {
-        x += projects[i]->resource_share;
+        if (!projects[i]->non_cpu_intensive ) x += projects[i]->resource_share;
    }
    return x;
 }
--- a/client/client_state.h
+++ b/client/client_state.h
@ -47,8 +47,9 @@
 #define USER_RUN_REQUEST_NEVER      3

 #define WORK_FETCH_DONT_NEED 0
-#define WORK_FETCH_NEED      1
-#define WORK_FETCH_NEED_IMMEDIATELY 2
+#define WORK_FETCH_OK        1
+#define WORK_FETCH_NEED      2
+#define WORK_FETCH_NEED_IMMEDIATELY 3

 enum SUSPEND_REASON {
    SUSPEND_REASON_BATTERIES = 1,
@ -273,7 +274,7 @@ private:
 public:
    double work_needed_secs();
    PROJECT* next_project_master_pending();
-    PROJECT* next_project_need_work(PROJECT*);
+    PROJECT* next_project_need_work(PROJECT* old, int urgency);
    int make_scheduler_request(PROJECT*, double);
    int handle_scheduler_reply(PROJECT*, char* scheduler_url, int& nresults);
    int compute_work_requests();
--- a/client/client_types.C
+++ b/client/client_types.C
@ -1129,6 +1129,8 @@ void RESULT::clear() {
    app = NULL;
    wup = NULL;
    project = NULL;
+    high_priority = false;
+    return_result_immediately = false;
 }

 // parse a <result> element from scheduling server.
@ -1148,6 +1150,13 @@ int RESULT::parse_server(MIOFILE& in) {
            validate_time(report_deadline);
            continue;
        }
+        if (match_tag(buf, "<high_priority>")) {
+            high_priority = true;
+            return_result_immediately = true;
+        }
+        if (match_tag(buf, "<return_result_immediately>")) {
+            return_result_immediately = true;
+        }
        if (match_tag(buf, "<file_ref>")) {
            file_ref.parse(in);
            output_files.push_back(file_ref);
@ -1257,6 +1266,8 @@ int RESULT::write(MIOFILE& out, bool to_server) {
        if (ready_to_report) out.printf("    <ready_to_report/>\n");
        if (suspended_via_gui) out.printf("    <suspended_via_gui/>\n");
        if (aborted_via_gui) out.printf("    <aborted_via_gui/>\n");
+        if (high_priority) out.printf("    <high_priority/>\n");
+        if (return_result_immediately) out.printf("    <return_result_immediately/>\n");
        out.printf(
            "    <wu_name>%s</wu_name>\n"
            "    <report_deadline>%f</report_deadline>\n",
@ -1296,6 +1307,8 @@ int RESULT::write_gui(MIOFILE& out) {
    if (ready_to_report) out.printf("    <ready_to_report/>\n");
    if (suspended_via_gui) out.printf("    <suspended_via_gui/>\n");
    if (aborted_via_gui) out.printf("    <aborted_via_gui/>\n");
+    if (high_priority) out.printf("    <high_priority/>\n");
+    if (return_result_immediately) out.printf("    <return_result_immediately/>\n");
    ACTIVE_TASK* atp = gstate.active_tasks.lookup_result(this);
    if (atp) {
        atp->write(out);
--- a/client/client_types.h
+++ b/client/client_types.h
@ -365,6 +365,13 @@ struct RESULT {
    bool already_selected;
        // used to keep cpu scheduler from scheduling a result twice
        // transient; used only within schedule_cpus()
+    bool high_priority; // this will override the normal CPU handler so that this WU will run to completion
+                        // before any other normal work is run.  It will not override the panic mode scheduler.
+                        // this can be set by the server supplying the WU.  This was strongly requested by
+                        // Folding@Home.
+    bool return_result_immediately; // this is what it says.  It can be set by the project supplying the WU.
+                                    // It will be set to true by the parsing code if high_priority is set, even
+                                    // if it is not set by the calling server.
    void clear();
    int parse_server(MIOFILE&);
    int parse_state(MIOFILE&);
--- a/client/cs_scheduler.C
+++ b/client/cs_scheduler.C
@ -75,7 +75,7 @@ const int SECONDS_BEFORE_REPORTING_MIN_RPC_TIME_AGAIN = 60*60;
 #define MAX_CPU_LOAD_FACTOR 0.8

 static int proj_min_results(PROJECT* p, int ncpus) {
-    return (int)(ceil(ncpus*p->resource_share/trs));
+    return p->non_cpu_intensive ? 1 : (int)(ceil(ncpus*p->resource_share/trs));
 }
 void PROJECT::set_min_rpc_time(double future_time) {
    if (future_time > min_rpc_time) {
@ -144,7 +144,7 @@ PROJECT* CLIENT_STATE::next_project_sched_rpc_pending() {
 // 3) has master_url_fetch_pending == false
 // 4) has dont_request_more_work == false
 //
-PROJECT* CLIENT_STATE::next_project_need_work(PROJECT* old) {
+PROJECT* CLIENT_STATE::next_project_need_work(PROJECT* old, int urgency) {
    PROJECT *p, *p_prospect = NULL;
    double work_on_prospect;
    double now = dtime();
@ -163,7 +163,18 @@ PROJECT* CLIENT_STATE::next_project_need_work(PROJECT* old) {
        if (p->suspended_via_gui) continue;
        if (p->dont_request_more_work) continue;
        if (p->long_term_debt < 0 && !cpu_idle) continue;
-        if (p->non_cpu_intensive) continue;
+        if (p->work_request_urgency == WORK_FETCH_DONT_NEED) continue;
+
+        // if we don't really need work, and we don't really need work from this project, pass.
+        if (urgency <= WORK_FETCH_OK && p->work_request_urgency <= WORK_FETCH_OK) continue;
+
+        // if there is a project for which a work request is OK and one that has a higher priority, take the one
+        // with the higher priority.
+        if (p_prospect && p->work_request_urgency == WORK_FETCH_OK && 
+            p_prospect->work_request_urgency > p->work_request_urgency) {
+        
+            continue;
+        }

        double work_on_current = ettprc(p, 0);
        if (p_prospect
@ -172,17 +183,13 @@ PROJECT* CLIENT_STATE::next_project_need_work(PROJECT* old) {
        ) {
            continue;
        }
-        if (p->work_request_urgency == WORK_FETCH_DONT_NEED
-            && (!cpu_idle || p->non_cpu_intensive)
-        ) {
-            continue;
-        }

-        if (found_old && p->work_request > 0) {
+        if (p->work_request > 0) {
            p_prospect = p;
            work_on_prospect = work_on_current;
        }
    }
+    if (p_prospect && !(p_prospect->work_request > 0.0)) p_prospect->work_request = 1.0;
    return p_prospect;
 }

@ -405,8 +412,13 @@ double CLIENT_STATE::ettprc(PROJECT *p, int k) {
            --num_results_to_skip;
            continue;
        }
+        if (rp->project->non_cpu_intensive) {
+            // if it is a non_cpu intensive project, it needs only one at a time.
+            est = max(rp->estimated_cpu_time_remaining(), global_prefs.work_buf_min_days * SECONDS_PER_DAY);  
+        } else {
            est += rp->estimated_cpu_time_remaining();
        }
+    }
    double apr = avg_proc_rate(p);
    return est/apr;
 }
@ -418,9 +430,35 @@ double CLIENT_STATE::ettprc(PROJECT *p, int k) {
 //
 int CLIENT_STATE::compute_work_requests() {
    int urgency = WORK_FETCH_DONT_NEED;
+    int highest_project_urgency = WORK_FETCH_DONT_NEED;
    unsigned int i;
    double work_min_period = global_prefs.work_buf_min_days * SECONDS_PER_DAY;
    double now = dtime();
+    double global_work_need = work_needed_secs();
+    
+    for (i = 0; i < projects.size(); ++i) {
+        projects[i]->work_request_urgency = WORK_FETCH_DONT_NEED;
+        projects[i]->work_request = 0;
+    }
+
+
+    if (!should_get_work()) {
+        return WORK_FETCH_DONT_NEED;
+    } else if (no_work_for_a_cpu()) {
+        urgency = WORK_FETCH_NEED_IMMEDIATELY;
+    } else if (global_work_need > 0) {
+        urgency = WORK_FETCH_NEED;
+    } else {
+        urgency = WORK_FETCH_OK;
+    }
+
+    double max_fetch = work_min_period;
+        // it is possible to have a work fetch policy of no new work and also have 
+        // a CPU idle or not enough to fill the cache.  In this case, we get work, but in little tiny increments
+        // as we are already in trouble and we need to minimize the damage.
+    if (this->work_fetch_no_new_work) {
+        max_fetch = 1.0;
+    }

    trs = total_resource_share();

@ -435,10 +473,11 @@ int CLIENT_STATE::compute_work_requests() {

        p->work_request = 0;
        p->work_request_urgency = WORK_FETCH_DONT_NEED;
+        if (p->master_url_fetch_pending) continue;
        if (p->min_rpc_time >= now) continue;
        if (p->dont_request_more_work) continue;
        if (p->suspended_via_gui) continue;
-        if (p->long_term_debt < 0 && !no_work_for_a_cpu()) continue;
+        if ((p->long_term_debt < 0) && (urgency != WORK_FETCH_NEED_IMMEDIATELY)) continue;

        int min_results = proj_min_results(p, ncpus);
        double estimated_time_to_starvation = ettprc(p, min_results-1);
@ -450,7 +489,6 @@ int CLIENT_STATE::compute_work_requests() {
 #if DEBUG_SCHED
                msg_printf(p, MSG_INFO, "is starved");
 #endif
-                urgency = WORK_FETCH_NEED_IMMEDIATELY;
                p->work_request_urgency = WORK_FETCH_NEED_IMMEDIATELY;
            } else {
 #if DEBUG_SCHED
@ -458,11 +496,15 @@ int CLIENT_STATE::compute_work_requests() {
                    estimated_time_to_starvation
                );
 #endif
-                urgency = max(WORK_FETCH_NEED, urgency);
-                urgency = WORK_FETCH_NEED;
+                p->work_request_urgency = WORK_FETCH_NEED;
            }
+        } else if (WORK_FETCH_OK < urgency) {
+            p->work_request_urgency = WORK_FETCH_OK;
+            p->work_request = global_work_need;
        }

+        highest_project_urgency = max(highest_project_urgency, p->work_request_urgency);
+
        // determine work requests for each project
        // NOTE: don't need to divide by active_frac etc.;
        // the scheduler does that (see sched/sched_send.C)
@ -477,12 +519,6 @@ int CLIENT_STATE::compute_work_requests() {
 #endif
    }

-    if (urgency == WORK_FETCH_DONT_NEED) {
-        for (i=0; i<projects.size(); ++i) {
-            projects[i]->work_request = 0;
-        }
-    }
-
    return urgency;
 }

@ -522,10 +558,10 @@ bool CLIENT_STATE::scheduler_rpc_poll(double now) {
                    "Insufficient work; requesting more"
                );
            }
-            scheduler_op->init_get_work(false);
+            scheduler_op->init_get_work(false, urgency);
            action = true;
        } else if ((p=next_project_master_pending())) {
-            scheduler_op->init_get_work(true);
+            scheduler_op->init_get_work(true, urgency);
            action = true;
        } else if ((p=next_project_sched_rpc_pending())) {
            scheduler_op->init_return_results(p);
@ -1012,4 +1048,18 @@ void CLIENT_STATE::set_cpu_scheduler_modes() {
    cpu_earliest_deadline_first = use_earliest_deadline_first;
 }

+double CLIENT_STATE::work_needed_secs()
+{
+    double total_work = 0;
+    for( unsigned int i = 0; i < results.size(); ++i) {
+        if (results[i]->project->non_cpu_intensive) continue;
+        total_work += results[i]->estimated_cpu_time_remaining();
+    }
+    if (total_work > global_prefs.work_buf_min_days) {
+        return 0;
+    } else {
+        return global_prefs.work_buf_min_days - total_work;
+    }
+}
+
 const char *BOINC_RCSID_d35a4a7711 = "$Id$";
--- a/client/scheduler_op.C
+++ b/client/scheduler_op.C
@ -80,13 +80,13 @@ bool SCHEDULER_OP::check_master_fetch_start() {
 // PRECONDITION: compute_work_requests() has been called
 // to fill in PROJECT::work_request
 //
-int SCHEDULER_OP::init_get_work(bool master_file_only) {
+int SCHEDULER_OP::init_get_work(bool master_file_only, int urgency) {
    int retval;
    char err_msg[256];
    double ns;

    must_get_work = true;
-    project = gstate.next_project_need_work(0);
+    project = gstate.next_project_need_work(0, urgency);
    if (project && !master_file_only) {
        ns = project->work_request;
        msg_printf(project, MSG_INFO, "Requesting %.2f seconds of work", ns);
@ -209,6 +209,7 @@ void SCHEDULER_OP::backoff(PROJECT* p, const char *error_msg ) {
        p->nrpc_failures++;
    }
    set_min_rpc_time(p);
+    p->long_term_debt -= (p->min_rpc_time - dtime()) / gstate.global_prefs.max_projects_on_client;
 }

 // low-level routine to initiate an RPC
@ -448,12 +449,15 @@ bool SCHEDULER_OP::poll() {
                    backoff(project, "No schedulers responded");
                    if (must_get_work) {
                        int urgency = gstate.compute_work_requests();
-                        project = gstate.next_project_need_work(project);
-                        if (project && urgency != WORK_FETCH_DONT_NEED) {
+                        if (urgency != WORK_FETCH_DONT_NEED) {
+                            project = gstate.next_project_need_work(project, urgency);
+                            if (project) {
                                retval = init_op_project(project->work_request);
                            } else {
                                scheduler_op_done = true;
                            }
+                            scheduler_op_done = true;
+                        }
                    } else {
                        scheduler_op_done = true;
                    }
@ -512,7 +516,7 @@ bool SCHEDULER_OP::poll() {
                if (must_get_work) {
                    int urgency = gstate.compute_work_requests();
                    if (urgency != WORK_FETCH_DONT_NEED) {
-                        project = gstate.next_project_need_work(project);
+                        project = gstate.next_project_need_work(project, urgency);
                        if (project) {
                            retval = init_op_project(project->work_request);
                        } else {
--- a/client/scheduler_op.h
+++ b/client/scheduler_op.h
@ -73,7 +73,7 @@ struct SCHEDULER_OP {

    SCHEDULER_OP(HTTP_OP_SET*);
    bool poll();
-    int init_get_work(bool master_file_only);
+    int init_get_work(bool master_file_only, int urgency);
    int init_return_results(PROJECT*);
    int init_op_project(double ns);
    int init_master_fetch();