diff --git a/checkin_notes b/checkin_notes index c852ecc9ad..ac8d85927a 100755 --- a/checkin_notes +++ b/checkin_notes @@ -14049,3 +14049,35 @@ David 21 June 2004 html/inc/ result.inc + +David 22 June 2004 + - client: handle a "down for maintenance" RPC reply as a failure, + i.e. do a backoff on the project. + This should hopefully fix a problem where if + a project is down, and a client has a result to report, + it will do an RPC every second, creating an overload on the server. + Do this by adding a new optional element + to scheduler replies. + - If get a from scheduler, + don't let that override the current min_rpc_time; + instead, take the max of the two. + - Change the implementation of trickle messages. + Instead of using dedicated trickle_up and trickle_down DB tables, + use more general msg_from_host and msg_to_host tables. + These will be used also for data management, + i.e. to send upoad or download requests to specific hosts. + + client/ + cs_scheduler.C + scheduler_op.C,h + db/ + boinc_db.C,h + schema.sql + lib/ + error_numbers.h + sched/ + assimilator.C + handle_request.C + main.C + server_types.C,h + trickle_handler.C diff --git a/client/cs_scheduler.C b/client/cs_scheduler.C index 5f8a7a124d..9a3e24c483 100644 --- a/client/cs_scheduler.C +++ b/client/cs_scheduler.C @@ -489,14 +489,23 @@ int CLIENT_STATE::handle_scheduler_reply( project->user_total_credit = sr.user_total_credit; project->user_expavg_credit = sr.user_expavg_credit; project->user_create_time = sr.user_create_time; + + if (sr.request_delay) { + time_t x = time(0) + sr.request_delay; + if (x > project->min_rpc_time) project->min_rpc_time = x; + } + if (strlen(sr.message)) { sprintf(buf, "Message from server: %s", sr.message); int prio = (!strcmp(sr.message_priority, "high"))?MSG_ERROR:MSG_INFO; show_message(project, buf, prio); } - if (sr.request_delay) { - project->min_rpc_time = time(0) + sr.request_delay; + // if project is down, return error (so that we back off) + // and don't do anything else + // + if (sr.project_is_down) { + return ERR_PROJECT_DOWN; } project->host_total_credit = sr.host_total_credit; diff --git a/client/scheduler_op.C b/client/scheduler_op.C index c86433cfef..e3727b6503 100644 --- a/client/scheduler_op.C +++ b/client/scheduler_op.C @@ -477,9 +477,8 @@ bool SCHEDULER_OP::poll() { project->write_account_file(); } } else { - if (retval) { - backoff(project, "Can't parse scheduler reply"); - } else { + switch (retval) { + case 0: // if we asked for work and didn't get any, // back off this project // @@ -489,6 +488,13 @@ bool SCHEDULER_OP::poll() { project->nrpc_failures = 0; project->min_rpc_time = 0; } + break; + case ERR_PROJECT_DOWN: + backoff(project, "Project is down"); + break; + default: + backoff(project, "Can't parse scheduler reply"); + break; } } @@ -576,6 +582,7 @@ int SCHEDULER_REPLY::parse(FILE* in, PROJECT* project) { code_sign_key = 0; code_sign_key_signature = 0; trickle_up_ack = false; + project_is_down = false; p = fgets(buf, 256, in); if (!p) { @@ -670,6 +677,8 @@ int SCHEDULER_REPLY::parse(FILE* in, PROJECT* project) { continue; } else if (match_tag(buf, "")) { trickle_up_ack = true; + } else if (match_tag(buf, "")) { + project_is_down = true; } else if (parse_str(buf, "", project->email_hash, sizeof(project->email_hash))) { continue; } else if (parse_str(buf, "", project->cross_project_id, sizeof(project->cross_project_id))) { diff --git a/client/scheduler_op.h b/client/scheduler_op.h index 731ab8b0e2..34e9b2c4d3 100644 --- a/client/scheduler_op.h +++ b/client/scheduler_op.h @@ -112,6 +112,7 @@ struct SCHEDULER_REPLY { char* code_sign_key; char* code_sign_key_signature; bool trickle_up_ack; + bool project_is_down; SCHEDULER_REPLY(); ~SCHEDULER_REPLY(); diff --git a/db/boinc_db.C b/db/boinc_db.C index 146ba79403..fffe285b48 100644 --- a/db/boinc_db.C +++ b/db/boinc_db.C @@ -59,8 +59,8 @@ void TEAM::clear() {memset(this, 0, sizeof(*this));} void HOST::clear() {memset(this, 0, sizeof(*this));} void RESULT::clear() {memset(this, 0, sizeof(*this));} void WORKUNIT::clear() {memset(this, 0, sizeof(*this));} -void TRICKLE_UP::clear() {memset(this, 0, sizeof(*this));} -void TRICKLE_DOWN::clear() {memset(this, 0, sizeof(*this));} +void MSG_FROM_HOST::clear() {memset(this, 0, sizeof(*this));} +void MSG_TO_HOST::clear() {memset(this, 0, sizeof(*this));} DB_PLATFORM::DB_PLATFORM() : DB_BASE(boinc_db, "platform"){} DB_CORE_VERSION::DB_CORE_VERSION() : DB_BASE(boinc_db, "core_version"){} @@ -71,8 +71,8 @@ DB_TEAM::DB_TEAM() : DB_BASE(boinc_db, "team"){} DB_HOST::DB_HOST() : DB_BASE(boinc_db, "host"){} DB_WORKUNIT::DB_WORKUNIT() : DB_BASE(boinc_db, "workunit"){} DB_RESULT::DB_RESULT() : DB_BASE(boinc_db, "result"){} -DB_TRICKLE_UP::DB_TRICKLE_UP() : DB_BASE(boinc_db, "trickle_up"){} -DB_TRICKLE_DOWN::DB_TRICKLE_DOWN() : DB_BASE(boinc_db, "trickle_down"){} +DB_MSG_FROM_HOST::DB_MSG_FROM_HOST() : DB_BASE(boinc_db, "msg_from_host"){} +DB_MSG_TO_HOST::DB_MSG_TO_HOST() : DB_BASE(boinc_db, "msg_to_host"){} int DB_PLATFORM::get_id() {return id;} int DB_CORE_VERSION::get_id() {return id;} @@ -83,8 +83,8 @@ int DB_TEAM::get_id() {return id;} int DB_HOST::get_id() {return id;} int DB_WORKUNIT::get_id() {return id;} int DB_RESULT::get_id() {return id;} -int DB_TRICKLE_UP::get_id() {return id;} -int DB_TRICKLE_DOWN::get_id() {return id;} +int DB_MSG_FROM_HOST::get_id() {return id;} +int DB_MSG_TO_HOST::get_id() {return id;} void DB_PLATFORM::db_print(char* buf){ sprintf(buf, @@ -533,52 +533,51 @@ int DB_RESULT::insert() { return DB_BASE::insert(); } -void DB_TRICKLE_UP::db_print(char* buf) { +void DB_MSG_FROM_HOST::db_print(char* buf) { ESCAPE(xml); sprintf(buf, "create_time=%d, send_time=%d, " - "resultid=%d, appid=%d, hostid=%d, " + "hostid=%d, variety=%d, " "handled=%d, xml='%s'", create_time, send_time, - resultid, appid, hostid, + hostid, variety, handled, xml ); UNESCAPE(xml); } -void DB_TRICKLE_UP::db_parse(MYSQL_ROW& r) { +void DB_MSG_FROM_HOST::db_parse(MYSQL_ROW& r) { int i=0; clear(); id = atol(r[i++]); create_time = atol(r[i++]); send_time = atol(r[i++]); - resultid = atol(r[i++]); - appid = atol(r[i++]); hostid = atol(r[i++]); + variety = atol(r[i++]); handled = atoi(r[i++]); strcpy2(xml, r[i++]); } -void DB_TRICKLE_DOWN::db_print(char* buf) { +void DB_MSG_TO_HOST::db_print(char* buf) { ESCAPE(xml); sprintf(buf, "create_time=%d, " - "resultid=%d, hostid=%d, " + "hostid=%d, variety=%d, " "handled=%d, xml='%s'", create_time, - resultid, hostid, + hostid, variety, handled, xml ); UNESCAPE(xml); } -void DB_TRICKLE_DOWN::db_parse(MYSQL_ROW& r) { +void DB_MSG_TO_HOST::db_parse(MYSQL_ROW& r) { int i=0; clear(); id = atol(r[i++]); create_time = atol(r[i++]); - resultid = atol(r[i++]); hostid = atol(r[i++]); + variety = atol(r[i++]); handled = atol(r[i++]); strcpy2(xml, r[i++]); } diff --git a/db/boinc_db.h b/db/boinc_db.h index 0003c9ea39..c8cb76eaf2 100755 --- a/db/boinc_db.h +++ b/db/boinc_db.h @@ -406,23 +406,22 @@ struct RESULT { void clear(); }; -struct TRICKLE_UP { +struct MSG_FROM_HOST { int id; int create_time; int send_time; // when API call was made - int resultid; - int appid; int hostid; + int variety; // project-defined; generally app ID bool handled; // trickle handler has processed this char xml[LARGE_BLOB_SIZE]; void clear(); }; -struct TRICKLE_DOWN { +struct MSG_TO_HOST { int id; int create_time; - int resultid; int hostid; + int variety; // project-defined; generally app ID bool handled; // scheduler has sent this char xml[LARGE_BLOB_SIZE]; void clear(); @@ -522,17 +521,17 @@ public: void operator=(WORKUNIT& w) {WORKUNIT::operator=(w);} }; -class DB_TRICKLE_UP : public DB_BASE, public TRICKLE_UP { +class DB_MSG_FROM_HOST : public DB_BASE, public MSG_FROM_HOST { public: - DB_TRICKLE_UP(); + DB_MSG_FROM_HOST(); int get_id(); void db_print(char*); void db_parse(MYSQL_ROW &row); }; -class DB_TRICKLE_DOWN : public DB_BASE, public TRICKLE_DOWN { +class DB_MSG_TO_HOST : public DB_BASE, public MSG_TO_HOST { public: - DB_TRICKLE_DOWN(); + DB_MSG_TO_HOST(); int get_id(); void db_print(char*); void db_parse(MYSQL_ROW &row); diff --git a/db/schema.sql b/db/schema.sql index 6100b45168..77af16c1f1 100644 --- a/db/schema.sql +++ b/db/schema.sql @@ -220,23 +220,22 @@ create table result ( primary key (id) ); -create table trickle_up ( +create table msg_from_host ( id integer not null auto_increment, create_time integer not null, send_time integer not null, - resultid integer not null, - appid integer not null, hostid integer not null, + variety integer not null, handled smallint not null, xml text, primary key (id) ); -create table trickle_down ( +create table msg_to_host ( id integer not null auto_increment, create_time integer not null, - resultid integer not null, hostid integer not null, + variety integer not null, handled smallint not null, xml text, primary key (id) diff --git a/doc/client_sched.php b/doc/client_sched.php index 972df46f01..190d1c9c2a 100644 --- a/doc/client_sched.php +++ b/doc/client_sched.php @@ -2,28 +2,42 @@ require_once("docutil.php"); page_head("Result scheduling"); echo " -

Goals and motivation

-

The BOINC client result computation scheduling aims to achieve the -following goals:

+This document describes BOINC's policies for the following: + +
    +
  • CPU scheduling policy: what result to run when. +
  • Work fetch policy: when to contact scheduling servers, +and which one(s) to contact. +
+ +

CPU scheduling

+ +

CPU scheduling aims to achieve the following goals +(decreasing priority):

    -
  1. Efficiently and effectively use system resources.
    This -is clearly desirable.
  2. +
  3. +Maximize CPU utilization -
  4. Maintain a sense of minimum variety of projects for which -results are computed in a given amount of time.
    A user -participating in multiple projects can get bored seeing his computer -work only on one project for a long time.
  5. +
  6. +Respect the resource share allocation for each project. +A project's resource share represents how much computing resources +(CPU time, network bandwith, storage space) a user wants to allocate +to the project relative to the resources allocated to all of the other +projects in which he is participating. The client should respect this +allocation to be faithful to the user. In the case of CPU time, the +result computation scheduling should achieve the expected time shares +over a reasonable time period. -
  7. Respect the resource share allocation for each -project.
    The user specifies the resource shares and thus -expects them to be honored.
  8. +
  9. +Satisfy result deadlines if possible. -
- -

The motivation for the second goal stems from the potential +

  • +Given a 'minimum variety' parameter MV (seconds), +reschedule CPUs at least once every MV seconds. +The motivation for this goal stems from the potential orders-of-magnitude differences in expected completion time for results from different projects. Some projects will have results that complete in hours, while other projects may have results that take @@ -33,334 +47,272 @@ short-running result computations stuck behind projects with long-running result computations. A participant in multiple projects will expect to see his computer work on each of these projects in a reasonable time period, not just the project with the long-running -result computations.

    +result computations. -

    A project's resource share represents how much computing resources -(CPU time, network bandwith, storage space) a user wants to allocate -to the project relative to the resources allocated to all of the other -projects in which he is participating. The client should respect this -allocation to be faithful to the user. In the case of CPU time, the -result computation scheduling should achieve the expected time shares -over a reasonable time period.

    - -

    At the same time, the scheduler RPC policy needs to complement the -result scheduling. We have the following goals for this policy:

    - -
      - -
    1. Have enough work to keep all CPUs busy
    2. - -
    3. Have enough work to provide for minimum variety of -projects
    4. - -
    5. Respect work_buf_min and work_buf_max
    6. - -
    - -

    BOINC client result computation scheduling

    - -

    We address the goals using result preemption. After a given time -period, the client decides on a new set of projects for which results -will be computed in the next time period. This decision will consider -the projects' resource shares by tracking the debt owed to a project. -The debt to a project accrues according to the project's resource -share, and is paid off when CPU time is devoted to the project.

    - -

    A consequence of result preemption is that projects can have -multiple active result computations at a given time. For example, -consider a two processor system participating in two projects, A and -B, with resource shares 75% and 25%, respectively. Ideally, one -processor will run a result computation for A, while the other -processor will switch between running result computations for A and B. -Thus, A will have two active result computations. This consequence -implies a desirable property of the result preemption scheme: that -the number of active result computations for a project be -minimized. For example, it's better to have one result from +

  • +Minimize mean time to completion for results. +This means that the number of active result computations for a project should be minimized. +For example, it's better to have one result from project P complete in time T than to have two results from project P -simultaneously complete in time 2T. Maintaining more active result -computations than necessary increases the mean-time-to-completion if -the client switches between these active result computations.

    +simultaneously complete in time 2T. -

    We will attempt to minimize the number of active result + + +

    +A result is 'active' if there is a slot directory for it. +A consequence of result preemption is that there can +be more active results than CPUs. + + +

    Debt

    + +

    +The notion of 'debt' is used to respect the resource share allocation +for each project. +The debt to a project represents the amount of work +(in CPU time) we owe it. +Debt is decreased when CPU time is devoted to a project. +We increase the debt to a project according to the +total amount of work done in a time period scaled by the project's +resource share. + +

    +For example, consider a system participating in two projects, A and B, +with resource shares 75% and 25%, respectively. +Suppose in some time period, the system devotes 25 minutes of CPU time to project A +and 15 minutes of CPU time to project B. +We decrease the debt to A by 20 minutes and increase it by 30 minutes (75% of 25 + 15). +So the debt increases overall. +This makes sense because we expected to devote a +larger percentage of the system resources to project A than it +actually got. + +

    +The choice of projects for which to start result computations +can simply follow the debt ordering of the projects. +The algorithm computes the 'anticipated debt' to a project +(the debt we expect to owe after the time period expires) +as it chooses result computations to run. + +

    A sketch of the CPU scheduling algorithm

    + +

    +This algorithm is run: +

      +
    • Whenever a CPU is free +
    • Whenever a new result arrives (via scheduler RPC) +
    • Whenever it hasn't run for MV seconds +
    + +

    +We will attempt to minimize the number of active result computations for a project by dynamically choosing results to compute -from a global pool. When we allocate CPU time to project, we will -choose results to compute intelligently: choose already running tasks -first, then preempted tasks, and only choose to start a new result -computation in the last resort. This will not guarantee the above -property, but we hope it will be close to achieving it.

    - -

    A sketch of the result preemption algorithm

    - -

    The algorithm requires that a time period length be defined (e.g. -one hour). The result preemption algorithm is run at the beginning of -each period. It proceeds as follows:

    +from a global pool. +When we allocate CPU time to project, +we will choose already running tasks first, +then preempted tasks, and only choose to start a new result +computation in the last resort. +This will not guarantee the above +property, but we hope it will be close to achieving it.
      -
    1. Pay off debts to projects according to the amount of work done for -the projects in the last period.
    2. +
    3. Decrease debts to projects according to the amount of work done for +the projects in the last period. -
    4. Accrue debts to projects according to the projects' resource -shares.
    5. +
    6. Increase debts to projects according to the projects' resource shares. -
    7. Let the expected future debt for each project be initialized to -its actual debt.
    8. +
    9. Let the anticipated debt for each project be initialized to +its current debt. -
    10. Repeat until we decide on a result to compute for each -processor:
    11. +
    12. Repeat until we decide on a result to compute for each processor:
        -
      1. Choose the project that has the largest expected future debt and a -ready-to-compute result.
      2. +
      3. Choose the project that has the largest anticipated debt and a +ready-to-compute result. -
      4. Decrease the expected future debt of the project by the amount we -expect to pay off, and return the project back into consideration for -running on another processor.
      5. +
      6. Decrease the anticipated debt of the project by the expected amount of CPU time.
      -
    13. Preempt the current result computations with the new ones.
    14. +
    15. Preempt current result computations, and start new ones.
    -

    Because result computations may finish before the time period -expires, we need to account for such a gap in a project's debt -payment. So, we need to also keep track of the amount of work done -during the current time period for each project as results finish. -This accounting should be reset for each time period.

    - -

    Finally, the starting of new result computations in the middle of a -time period needs to use this accounting instead of the expected -future debts that were estimated at the beginning of the time period. -Otherwise, it will be similar to the decision of choosing which tasks -to run at the beginning of a time period.

    -

    Pseudocode

    -

    We'll initialize total_work_done_this_period to -num_cpus * period_length.

    -
    -preempt_apps(): // called after every period_length
    +data structures:
    +ACTIVE_TASK:
    +    double cpu_at_last_schedule_point
    +    double current_cpu_time
    +    scheduler_state:
    +        PREEMPTED
    +        RUNNING
    +    next_scheduler_state    // temp
    +PROJECT:
    +    double work_done_this_period    // temp
    +    double debt
    +    double anticipated_debt // temp
    +    bool has_runnable_result
     
    -// finish accounting
    -foreach T in running_tasks:
    -    T.project.work_done_this_period += T.work_done_this_period
    -    total_work_done_this_period += T.work_done_this_period
    +schedule_cpus():
    +
    +foreach project P
    +    P.work_done_this_period = 0
    +
    +total_work_done_this_period = 0
    +foreach task T that is RUNNING:
    +    x = current_cpu_time - T.cpu_at_last_schedule_point
    +    T.project.work_done_this_period += x
    +    total_work_done_this_period += x
     
    -// pay off and accrue debts
     foreach P in projects:
         P.debt += P.resource_share * total_work_done_this_period
                 - P.work_done_this_period
     
    -// make preemption decisions
     expected_pay_off = total_work_done_this_period / num_cpus
    +
     foreach P in projects:
    -    P.expected_future_debt = P.debt
    -to_preempt.addAll(running_tasks) // assume we'll preempt everything at first
    -to_run = ()
    +    P.anticipated_debt = P.debt
    +
    +foreach task T
    +    T.next_scheduler_state = PREEMPTED
    +
     do num_cpus times:
    -    found = false
    -    do projects.size times:
    -        // choose the project with the largest expected future debt
    -        P = argmax { P.expected_future_debt } over all P in projects
    -        if (some T in to_preempt is for P):
    -            // P has a task that ran last period, so just don't preempt it
    -            to_preempt.remove(T)
    -            T.expected_pay_off = expected_pay_off
    -            found = true
    -            break
    -        if (some T in preempted_tasks is for P):
    -            // P has a task that was preempted
    -            preempted_tasks.remove(T)
    -            to_run.add(T)
    -            T.expected_pay_off = expected_pay_off
    -            found = true
    -            break
    -        if (some R in results is for P, not active, and ready to run):
    -            T = new ACTIVE_TASK for R
    -            to_run.add(T)
    -            T.expected_pay_off = expected_pay_off
    -            found = true
    -            break
    -        remove P from consideration in the argmax
    -    if found:
    -        P.expected_future_debt -= expected_pay_off
    -    else:
    +    // choose the project with the largest anticipated debt
    +    P = argmax { P.anticipated_debt } over all P in projects with runnable result
    +    if none:
             break
    -suspend tasks in to_preempt (reset T.expected_pay_off for each T in to_preempt)
    -run or unsuspend tasks in to_run (and put in running_tasks)
    +    if (some T in P is RUNNING):
    +        t.next_scheduler_state = RUNNING
    +        P.anticipated_debt -= expected_pay_off
    +        continue
    +    if (some T in P is PREEMPTED):
    +        T.next_scheduler_state = RUNNING
    +        P.anticipated_debt -= expected_pay_off
    +        continue
    +    if (some R in results is for P, not active, and ready to run):
    +        T = new ACTIVE_TASK for R
    +        T.next_scheduler_state = RUNNING
    +        P.anticipated_debt -= expected_pay_off
     
    -// reset accounting
    -foreach P in projects:
    -    P.work_done_this_period = 0
    -total_work_done_this_period = 0
    -
    -----------
    -
    -start_apps(): // called at each iteration of the BOINC main loop
    -
    -foreach P in projects:
    -    // expected_future_debt should account for any tasks that finished
    -    // and for tasks that are still running
    -    P.expected_future_debt = P.debt - P.work_done_this_period
    -foreach T in running_tasks:
    -    T.project.expected_future_debt -= T.expected_pay_off
    -
    -to_run = ()
    -while running_tasks.size < num_cpus:
    -    do projects.size times:
    -        // choose the project with the largest expected future debt
    -        P = argmax { P.expected_future_debt } over all P in projects
    -        if (some T in preempted_tasks is for P):
    -            // P has a task that was preempted
    -            preempted_tasks.remove(T)
    -            to_run.add(T)
    -            T.expected_pay_off = fraction_of_period_left * expected_pay_off
    -            found = true
    -            break
    -        if (some R in results is for P, not active, and ready to run):
    -            T = new ACTIVE_TASK for R
    -            to_run.add(T)
    -            T.expected_pay_off = fraction_of_period_left * expected_pay_off
    -            found = true
    -            break
    -        remove P from consideration in the argmax
    -    if found:
    -        P.expected_future_debt -= fraction_of_period_left * expected_pay_off
    -    else:
    -        break
    -run or unsuspend tasks in to_run
    -
    -----------
    -
    -handle_finished_apps(): // called at each iteration of the BOINC main loop
    -
    -foreach T in running_tasks:
    -    if T finished:
    -        // do some accounting
    -        T.project.work_done_this_period += T.work_done_this_period
    -        total_work_done_this_period += T.work_done_this_period
    -        do other clean up stuff
    +foreach task T
    +    if scheduler_state == PREEMPTED and next_scheduler_state = RUNNING
    +        unsuspend or run
    +    if scheduler_state == RUNNING and next_scheduler_state = PREEMPTED
    +        suspend (or kill)
     
    +foreach task T
    +    T.cpu_at_last_schedule_point = current_cpu_time
     
    -

    Debt computation

    +

    Work fetch policy

    -

    The notion of debt is used to respect the resource share allocation -for each project. The debt to a project represents the amount of work -(in CPU time) we owe to a project. Debt is paid off when CPU time is -devoted to a project. We accrue the debt to a project according to the -total amount of work done in a time period scaled by the project's -resource share.

    +

    +The work fetch policy has the following goal: -

    For example, consider a system participating in two projects, A and -B, with resource shares 75% and 25%, respectively. Suppose in some -time period, the system devotes 25 minutes of CPU time to project A -and 15 minutes of CPU time to project B. We decrease the debt to A by -20 minutes and accrue it by 30 minutes (75% of 25 + 15). So the debt -increases overall. This makes sense because we expected to devote a -larger percentage of the system resources to project A than it -actually got.

    +
      +
    • +Given a 'connection frequency' parameter 1/T (1/days), have enough +work for each project to meet CPU scheduling needs for T days. +The client should expect to contact scheduling servers only every T +days. +So, it should try to maintain between T and 2T days worth of work. +
    -

    The choosing of projects for which to start result computations at -the beginning of each time period can simply follow the debt ordering -of the projects. The algorithm computes the expected future debt to a -project (the debt we expect to owe after the time period expires) as -it chooses result computations to run.

    +

    When to get work

    -
    expected future debt = debt - expected pay off * number of -tasks to run this period
    +

    +The CPU scheduler needs a minimum number of results from a project +in order to respect the project's resource share. +We effectively have too little work when the number of results for a +project is less than this minimum number. -

    However, choosing projects to run in the middle of a time period is -a little different. The preemption algorithm expected each of the -tasks it started to last for the entire time period. However, when a -task finishes in the middle of a time period, the expected future debt -to the respective project is an overestimate. We thus change the -expected future debt to reflect what has taken place: it is the debt -owed to the project at the beginning of the time period, minus the -amount of work that has already been done this time period, and minus -the amount of work we expect to complete by the end of the time -period. When projects have results chosen to run, we decrease the -expected future debt by the amount of work we expect to be done for -the project in the remainder of the time period.

    +
    +min_results(P) = ceil(ncpus * P.resource_share) +
    -
    expected future debt = debt - (work completed + expected -pay off of tasks already running this period + expected pay off * -fraction of period left * number of new tasks for this -period)
    +

    +The client can estimate the amount of time that will elapse until we +have too little work for a project. +When this length of time is less than T, it is time to get more work. -

    Scheduler RPC policy

    - -

    The client should get more work when either of the following are -true:

    +

    A sketch of the work fetch algorithm

    +

    +This algorithm determines if a project needs more work. If a project +does need work, then the amount of work it needs is computed. +It is called whenever the client can make a scheduler RPC. +

      - -
    1. The client will have no work in at most work_buf_min days.
    2. - -
    3. The client will not have enough work for a project to get its fair -share of computation time (according to its resource share)
    4. - -
    5. The client will have fewer than num_cpus tasks
    6. - +
    7. +For each project +
        +
      1. +If the number of results for the project is too few +
          +
        1. +Set the project's work request to 2T +
        2. +Return NEED WORK IMMEDIATELY +
        +
      2. +For all but the top (min_results - 1) results with the longest +expected time to completion: +
          +
        1. +Sum the expected completion time of the result scaled by the work rate +and the project's resource share +
        +
      3. +If the sum S is less than T +
          +
        1. Set the project's work request to 2T - S +
        2. Return NEED WORK +
        +
      4. +Else, set the project's work request to 0 and return DON'T NEED WORK +
    -

    Ignoring the second case can cause long running result computations -to monopolize the CPU, even with result preemption. For example, -suppose a project has work units that finish on the order of months. -Then, when work_buf_min is on the order of days, the client will never -think it is out of work. However, projects with shorter result -computations may run out of work. So, even with preemption, we cannot -have minimum variety.

    +

    +The mechanism for actually getting work checks if a project has a +non-zero work request and if so, makes the scheduler RPC call to +request the work. -

    need_to_get_work()

    - -

    The second case (running out of work for one project) is addressed -by capping the amount of work counted for a project. We cap it by the -total amount of work that can be done in min_work_buf_secs, scaled by -the project's resource share. Thus, the client will get more work when -any one project has too little work.

    - -

    The case of having fewer results than CPUs is addressed by -\"packing\" results into CPU \"bins\".

    +

    Pseudocode

    -need_to_get_work():
    +data structures:
    +PROJECT:
    +    double work_request_days
     
    -    num_cpus_busy = 0
    -    total_work_secs = 0
    -    work_secs_for_one_cpu = 0
    -    foreach P in projects:
    -        P.avail_work_secs = 0
    +check_work_needed(Project P):
     
    -    sort results in order of decreasing estimated_cpu_time
    +if num_results(P) < min_results(P):
    +    P.work_request_days = 2T
    +    return NEED_WORK_IMMEDIATELY
     
    -    // pack results into CPU bins
    -    foreach R in results:
    -        result_work_secs = estimated_cpu_time(R)
    -        work_secs_for_one_cpu += result_work_secs
    -        R.project.avail_work_secs += result_work_secs
    -        if work_secs_for_one_cpu >= min_work_buf_secs
    -            work_secs_for_one_cpu = 0
    -            num_cpus_busy += 1
    +top_results = top (min_results(P) - 1) results of P by expected
    +completion time
    +
    +work_remaining = 0
    +foreach result R of P that is not in top_results:
    +    work_remaining += R.expected_completion_time
    +work_remaining *= P.resource_share * active_frac / ncpus
    +
    +if work_remaining < T:
    +    P.work_request_days = 2T - work_remaining / seconds_per_day
    +    return NEED_WORK
    +else:
    +    P.work_request_days = 0
    +    return DONT_NEED_WORK
     
    -    // count total amount of work, but cap amount any one project contributes
    -    // to this total
    -    foreach P in projects:
    -        total_work_secs += min { P.avail_work_secs,
    -                            P.resource_share * min_work_buf_secs * num_cpus }
     
    -    return (num_cpus_busy < num_cpus)
    -        || (total_work_secs < min_work_secs * num_cpus)
     
    -

    XXX it will be useful to know what caused this predicate to return -true, so maybe it should be split into separate predicates.

    - -

    XXX also need to factor in if we are able to currently contact a -project (according to min_rpc_time).

    "; page_tail(); ?> diff --git a/doc/credit.php b/doc/credit.php index 2b92fdc954..9a6862655c 100644 --- a/doc/credit.php +++ b/doc/credit.php @@ -97,6 +97,8 @@ void update_average( avg_time = now; } +#define CREDIT_HALF_LIFE (SECONDS_IN_DAY*7) + "),"
    diff --git a/doc/db_dump.php b/doc/db_dump.php index 560da9dd56..32f8e56a03 100644 --- a/doc/db_dump.php +++ b/doc/db_dump.php @@ -4,62 +4,94 @@ page_head("Downloadable statistics data"); echo "

    -BOINC projects can export data describing teams, users and hosts. -This data is exported in downloadable XML files, -and can be summarized and represented as Web pages. -Some examples are listed at +BOINC projects can export statistics data +describing teams, users and hosts. +This data can be imported and used to produce +web sites that show statistics and leaderboards +for one or more BOINC projects. +Examples of such sites are listed at http://setiboinc.ssl.berkeley.edu/ap/stats.php.

    -The data is presented in several 'views': -teams ordered by credit, teams ordered by ID, etc. -Each view is available in two forms: -

      -
    • As a single file. -
    • -Divided into a number of files, -each containing a limited number of records. -This lets you get a single record or range of records efficiently. -For views that are ordered by ID, -each file contains a fixed-size segment of the ID range, -not a fixed number of records. -If the database ID allocation has gaps, -files will have fewer than this number of records. -
    +Statistics data is exported in XML-format files. +These files are contained in a download directory, +linked to from the project's web site. +A project can decide what data to export, +and how it is divided into files. +This is described by a file db_dump.xml of the following form: +"; +echo html_text(" + + + x
    + x + x + + n + + x + + ... +
    + ... +
    +"); +echo " +An 'enumeration' is a listing of particular table. +The fields are: +"; +list_start(); +list_item("table", "'user', 'host' or 'team'"); +list_item("filename", "The base filename."); +list_item("sort", "The sorting criterion: + 'total_credit', 'expavg_credit', or 'id'. + 'id' is the default." +); +list_end(); +echo +"An 'output' is a file or set of files containing an enumeration. +The fields are:"; +list_start(); +list_item("recs_per_file", + "If present, the listing is divided into multiple files + with the given number of records per file. + The file names have the form xxx_N, + where xxx is the base filename. + For views that are ordered by ID, + each file contains a fixed-size segment of the ID range, + not a fixed number of records. + If the database ID allocation has gaps, + files will have fewer than this number of records. +

    + If zero or absent, + the listing is written to a single file." +); +list_item("detail", + "If present, records are 'detailed': + user records include a list of hosts, + and team records include a list of users." +); +list_end(); +echo"

    -The entries in a given file are in either 'summary' or 'detail' form. -For example, the summary of a team gives its ID, name, and credit, -while the detailed form also contains a list of its members. -

    -The files are as follows: - -

    -tables.xml -

    -For each entity type (team, user, and host) this gives -

      -
    • the total number of records -
    • the number of records per file for summary files -
    • the number of records per file for detail files -
    +The download directory contains the following files: +"; +list_start(); +list_item("tables.xml", +"This gives the total number of records +for each entity type (team, user, and host). It also includes the UNIX time when the files were last generated, and a list of the project's applications, with counts of results in various states.
    For example: -
    ",
    +
    ".
     htmlspecialchars("
         1046220857
         127
    -    1000
    -    100
         14
    -    1000
    -    100
         681
    -    1000
    -    100
         
             
                 setiathome
    @@ -70,41 +102,32 @@ htmlspecialchars("
             ...
         
     
    -"),
    -"
    -Other files: -"; -list_start(); -list_heading("File name", "Contents"); -list_item( -"team_total_credit.xml
    team_total_credit_N.xml", -"Team summaries, ordered by decreasing total credit. -The first file is the complete list; -the remaining files (for N = 0, 1, ...) is the list -in limited-size chunks." +"). +"
    "); +list_item("core_versions.xml", +"A list of versions of the core client in the project's database" ); -list_item("team_expavg_credit.xml
    team_expavg_credit_N.xml", -"Team summaries, ordered by decreasing recent-average credit."); -list_item("team_id.xml
    team_id_N.xml", -"Team details, ordered by increasing ID."); -list_item("user_total_credit.xml
    user_total_credit_N.xml", -"User summaries, ordered by decreasing total credit."); -list_item("user_expavg_credit.xml
    user_expavg_credit_N.xml", -"User summaries, ordered by decreasing recent-average credit."); -list_item("user_id.xml, user_id_N.xml", -"User details, ordered by increasing ID."); -list_item("host_total_credit.xml
    host_total_credit_N.xml", -"Host summaries, ordered by decreasing total credit."); -list_item("host_expavg_credit.xml
    host_expavg_credit_N.xml", -"Host summaries, ordered by decreasing recent-average credit."); -list_item("host_id.xml
    host_id_N.xml", -"Host details, ordered by increasing ID."); -list_item("core_versions.xml", "A list of version of the core client - in the project's database"); list_end(); echo "

    -The format of the various XML elements is as follows: +The format of the various XML elements +in the output files is as follows. +Notes: +

      +
    • +<cpid> ('cross-project identifier') +is a unique identifier across multiple projects. +Accounts with the same email address on different projects +will have the same cross-project identifier +(as long as at least one computer is attached to both accounts). +
    • +All 'expavg_credit' values were computed at some point +in the past (given by 'expavg_time'). +To compute their current values, +they must be scaled according to the formula given +here. +
    +

    Team summary @@ -115,6 +138,7 @@ htmlspecialchars(" Broadband Reports Team Starfire 153402.872429 503030.483254 + 1087542007.701900 14 "), @@ -128,6 +152,7 @@ htmlspecialchars(" Broadband Reports Team Starfire 153402.872429 503030.483254 + 1087542007.701900 14 0 %3Ca%20href%3D%27http%3A%2F%2Fbroadbandreports%2Ecom%2Fforum%2Fseti%2 @@ -139,6 +164,7 @@ g%27%3E John Keck 42698.813543 117348.653646 + 1087542007.701900 5 @@ -146,6 +172,7 @@ g%27%3E Liontaur 46389.595430 122936.372641 + 1087542007.701900 5 @@ -159,18 +186,13 @@ htmlspecialchars(" John Keck 42698.813543 117348.653646 + 1087542007.701900 283472938743489759837498347 [ 5 ] [ ] "), "

    -Note: <cpid> ('cross-project identifier') -is a unique identifier across multiple projects. -Accounts with the same email address on different projects -will have the same cross-project identifier -(as long as at least one computer is attached to both accounts). -

    User detail

    ",
     htmlspecialchars("
    @@ -179,6 +201,7 @@ htmlspecialchars("
      Eric Heien
      4897.904591
      9820.631754
    + 1087542007.701900
      United States
      1046220857
      [ 14 ]
    @@ -187,6 +210,7 @@ htmlspecialchars("
         27
         0.000000
         0.000000
    +    1087542007.701900
         
         
         Darwin
    @@ -196,6 +220,7 @@ htmlspecialchars("
         266
         0.000000
         0.000000
    +    1087542007.701900
         GenuineIntel
         Intel(R)
         Linux
    @@ -211,6 +236,7 @@ htmlspecialchars("
       266
       0.000000
       0.000000
    +  1087542007.701900
       GenuineIntel
       Intel(R)
       Linux
    @@ -226,6 +252,7 @@ htmlspecialchars("
       3
       0.000000
       0.000000
    +  1087542007.701900
       GenuineIntel
       Pentium
       Windows XP
    diff --git a/doc/white.css b/doc/white.css
    index 11230e8c7d..6736d524a1 100644
    --- a/doc/white.css
    +++ b/doc/white.css
    @@ -25,7 +25,7 @@ a:active {
     
     body , table , input , select {
     	font-family: "Trebuchet MS", Verdana, Arial, Sans Serif;
    -#font-size: small;
    +    font-size: small;
     }
     
     body {
    diff --git a/lib/error_numbers.h b/lib/error_numbers.h
    index 1350875572..f890ce3116 100755
    --- a/lib/error_numbers.h
    +++ b/lib/error_numbers.h
    @@ -131,3 +131,4 @@
     #define ERR_BIND            -180
     #define ERR_LISTEN          -181
     #define ERR_TIMEOUT         -182
    +#define ERR_PROJECT_DOWN    -183
    diff --git a/sched/assimilator.C b/sched/assimilator.C
    index bdcc95f566..72a54c2742 100644
    --- a/sched/assimilator.C
    +++ b/sched/assimilator.C
    @@ -50,7 +50,6 @@ bool do_pass(APP& app) {
         DB_RESULT canonical_result, result;
         bool did_something = false;
         char buf[256];
    -    int retval;
     
         check_stop_daemons();
     
    diff --git a/sched/handle_request.C b/sched/handle_request.C
    index 038870c8aa..1cc720b541 100644
    --- a/sched/handle_request.C
    +++ b/sched/handle_request.C
    @@ -547,7 +547,7 @@ inline static const char* get_remote_addr() {
     void handle_trickle_ups(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) {
         unsigned int i;
         DB_RESULT result;
    -    DB_TRICKLE_UP tup;
    +    DB_MSG_FROM_HOST mfh;
         int retval;
         char buf[256];
     
    @@ -577,15 +577,18 @@ void handle_trickle_ups(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) {
                 );
                 continue;
             }
    -        tup.clear();
    -        tup.create_time = time(0);
    -        tup.send_time = td.send_time;
    -        tup.resultid = result.id;
    -        tup.appid = result.appid;
    -        tup.hostid = reply.host.id;
    -        tup.handled = false;
    -        safe_strcpy(tup.xml, td.trickle_text.c_str());
    -        retval = tup.insert();
    +        mfh.clear();
    +        mfh.create_time = time(0);
    +        mfh.send_time = td.send_time;
    +        mfh.variety = result.appid;
    +        mfh.hostid = reply.host.id;
    +        mfh.handled = false;
    +        sprintf(buf, "%s\n", td.result_name);
    +        string foobar;
    +        foobar = buf;
    +        foobar += td.trickle_text;
    +        safe_strcpy(mfh.xml, foobar.c_str());
    +        retval = mfh.insert();
             if (retval) {
                 log_messages.printf(SCHED_MSG_LOG::CRITICAL,
                     "[HOST#%d] trickle insert failed: %d\n", 
    @@ -596,14 +599,14 @@ void handle_trickle_ups(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) {
     }
     
     void handle_trickle_downs(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) {
    -    DB_TRICKLE_DOWN td;
    +    DB_MSG_TO_HOST mth;
         char buf[256];
     
         sprintf(buf, "where hostid = %d", reply.host.id);
    -    while (!td.enumerate(buf)) {
    -        reply.trickle_downs.push_back(td);
    -        td.handled = true;
    -        td.update();
    +    while (!mth.enumerate(buf)) {
    +        reply.msgs_to_host.push_back(mth);
    +        mth.handled = true;
    +        mth.update();
         }
     }
     
    diff --git a/sched/main.C b/sched/main.C
    index 0e52efefc0..54a693aa8c 100644
    --- a/sched/main.C
    +++ b/sched/main.C
    @@ -74,6 +74,7 @@ void send_message(char* msg, int delay) {
             "\n"
             "    %s\n"
             "    %d\n"
    +        "    \n"
             "\n",
             msg, delay
         );
    diff --git a/sched/server_types.C b/sched/server_types.C
    index b176c51d51..3b3e286f34 100644
    --- a/sched/server_types.C
    +++ b/sched/server_types.C
    @@ -185,7 +185,6 @@ SCHEDULER_REPLY::~SCHEDULER_REPLY() {
     int SCHEDULER_REPLY::write(FILE* fout) {
         unsigned int i, j;
         string u1, u2, t1, t2;
    -    int retval;
     
         fprintf(fout,
             "\n"
    @@ -317,25 +316,9 @@ int SCHEDULER_REPLY::write(FILE* fout) {
         if (send_trickle_up_ack) {
             fputs("\n", fout);
         }
    -    for (i=0; i\n"
    -            "    %s\n"
    -            "    %d\n"
    -            "    \n"
    -            "%s\n"
    -            "    \n"
    -            "\n",
    -            result.name,
    -            td.create_time,
    -            td.xml
    -        );
    +    for (i=0; i\n");
    diff --git a/sched/server_types.h b/sched/server_types.h
    index 90ea7b0030..b3cf0ba221 100644
    --- a/sched/server_types.h
    +++ b/sched/server_types.h
    @@ -104,7 +104,7 @@ struct SCHEDULER_REPLY {
         vectorwus;
         vectorresults;
         vectorresult_acks;
    -    vectortrickle_downs;
    +    vectormsgs_to_host;
         char code_sign_key[4096];
         char code_sign_key_signature[4096];
         bool send_trickle_up_ack;
    diff --git a/sched/trickle_handler.C b/sched/trickle_handler.C
    index 0cd79dcba9..74d2eb9615 100644
    --- a/sched/trickle_handler.C
    +++ b/sched/trickle_handler.C
    @@ -44,23 +44,23 @@ using namespace std;
     SCHED_CONFIG config;
     char app_name[256];
     
    -extern int handle_trickle(TRICKLE_UP&);
    +extern int handle_trickle(MSG_FROM_HOST&);
     
    -int handle_trickle(TRICKLE_UP& tup) {
    +int handle_trickle(MSG_FROM_HOST& mfh) {
         int retval;
     
         printf(
    -        "got trickle-up \n%s\nfor result %d\n",
    -        tup.xml, tup.resultid
    +        "got trickle-up \n%s\n\n",
    +        mfh.xml
         );
    -    DB_TRICKLE_DOWN tdown;
    -    tdown.clear();
    -    tdown.create_time = time(0);
    -    tdown.resultid = tup.resultid;
    -    tdown.hostid = tup.hostid;
    -    tdown.handled = false;
    -    strcpy(tdown.xml, tup.xml);
    -    retval = tdown.insert();
    +    DB_MSG_TO_HOST mth;
    +    mth.clear();
    +    mth.create_time = time(0);
    +    mth.hostid = mfh.hostid;
    +    mth.variety = mfh.variety;
    +    mth.handled = false;
    +    strcpy(mth.xml, mfh.xml);
    +    retval = mth.insert();
         if (retval) {
             printf("insert failed %d\n", retval);
         }
    @@ -71,17 +71,17 @@ int handle_trickle(TRICKLE_UP& tup) {
     // return true if there were any
     //
     bool do_trickle_scan(APP& app) {
    -    DB_TRICKLE_UP tup;
    +    DB_MSG_FROM_HOST mfh;
         char buf[256];
         bool found=false;
         int retval;
     
    -    sprintf(buf, "where appid=%d and handled=0", app.id);
    -    while (!tup.enumerate(buf)) {
    -        retval = handle_trickle(tup);
    +    sprintf(buf, "where variety=%d and handled=0", app.id);
    +    while (!mfh.enumerate(buf)) {
    +        retval = handle_trickle(mfh);
             if (!retval) {
    -            tup.handled = true;
    -            tup.update();
    +            mfh.handled = true;
    +            mfh.update();
             }
             found = true;
         }