mirror of https://github.com/BOINC/boinc.git
- client: move round-robin simulation to its own file
- web: check for profile existence before trying to show it - file deleter: add some debugging msgs svn path=/trunk/boinc/; revision=16338
This commit is contained in:
parent
c521f35792
commit
2d1d47de15
|
@ -8793,3 +8793,22 @@ Charlie 28 Oct 2008
|
|||
|
||||
clientgui/
|
||||
BOINCBaseView.cpp
|
||||
|
||||
David 28 Oct 2008
|
||||
- client: move round-robin simulation to its own file
|
||||
- web: check for profile existence before trying to show it
|
||||
- file deleter: add some debugging msgs
|
||||
|
||||
client/
|
||||
rr_sim.cpp,h (new)
|
||||
Makefile.am
|
||||
client_state.h
|
||||
client_types.h
|
||||
cpu_sched.cpp
|
||||
html/
|
||||
inc/
|
||||
profile.inc
|
||||
user/
|
||||
view_profile.php
|
||||
sched/
|
||||
file_deleter.cpp
|
||||
|
|
|
@ -54,6 +54,7 @@ boinc_client_SOURCES = \
|
|||
main.cpp \
|
||||
net_stats.cpp \
|
||||
pers_file_xfer.cpp \
|
||||
rr_sim.cpp \
|
||||
sandbox.cpp \
|
||||
scheduler_op.cpp \
|
||||
time_stats.cpp \
|
||||
|
|
|
@ -182,12 +182,12 @@ private:
|
|||
/// when the most recent app was started
|
||||
double app_started;
|
||||
|
||||
// --------------- acct_mgr.C:
|
||||
// --------------- acct_mgr.cpp:
|
||||
public:
|
||||
ACCT_MGR_OP acct_mgr_op;
|
||||
ACCT_MGR_INFO acct_mgr_info;
|
||||
|
||||
// --------------- acct_setup.C:
|
||||
// --------------- acct_setup.cpp:
|
||||
public:
|
||||
PROJECT_INIT project_init;
|
||||
PROJECT_ATTACH project_attach;
|
||||
|
@ -200,11 +200,11 @@ public:
|
|||
double all_projects_list_check_time;
|
||||
string newer_version;
|
||||
|
||||
// --------------- auto_update.C:
|
||||
// --------------- auto_update.cpp:
|
||||
public:
|
||||
AUTO_UPDATE auto_update;
|
||||
|
||||
// --------------- client_state.C:
|
||||
// --------------- client_state.cpp:
|
||||
public:
|
||||
CLIENT_STATE();
|
||||
void show_host_info();
|
||||
|
@ -242,7 +242,7 @@ private:
|
|||
int nresults_for_project(PROJECT*);
|
||||
void check_clock_reset();
|
||||
|
||||
// --------------- cpu_sched.C:
|
||||
// --------------- cpu_sched.cpp:
|
||||
private:
|
||||
double debt_interval_start;
|
||||
double total_wall_cpu_time_this_debt_interval;
|
||||
|
@ -263,7 +263,6 @@ private:
|
|||
void schedule_cpus();
|
||||
bool enforce_schedule();
|
||||
bool no_work_for_a_cpu();
|
||||
void rr_simulation();
|
||||
void make_preemptable_task_list(vector<ACTIVE_TASK*>&, double&);
|
||||
void print_deadline_misses();
|
||||
public:
|
||||
|
@ -295,7 +294,7 @@ public:
|
|||
/// - any result op is done via RPC (suspend/resume)
|
||||
void request_schedule_cpus(const char*);
|
||||
|
||||
// --------------- cs_account.C:
|
||||
// --------------- cs_account.cpp:
|
||||
public:
|
||||
int add_project(
|
||||
const char* master_url, const char* authenticator,
|
||||
|
@ -308,7 +307,7 @@ private:
|
|||
int parse_statistics_files();
|
||||
// should be move to a new file, but this will do it for testing
|
||||
|
||||
// --------------- cs_apps.C:
|
||||
// --------------- cs_apps.cpp:
|
||||
private:
|
||||
double total_resource_share();
|
||||
double potentially_runnable_resource_share();
|
||||
|
@ -344,7 +343,7 @@ private:
|
|||
public:
|
||||
ACTIVE_TASK* get_task(RESULT*);
|
||||
|
||||
// --------------- cs_benchmark.C:
|
||||
// --------------- cs_benchmark.cpp:
|
||||
public:
|
||||
bool should_run_cpu_benchmarks();
|
||||
void start_cpu_benchmarks();
|
||||
|
@ -355,13 +354,13 @@ public:
|
|||
void cpu_benchmarks_set_defaults();
|
||||
void print_benchmark_results();
|
||||
|
||||
// --------------- cs_cmdline.C:
|
||||
// --------------- cs_cmdline.cpp:
|
||||
public:
|
||||
void parse_cmdline(int argc, char** argv);
|
||||
void parse_env_vars();
|
||||
void do_cmdline_actions();
|
||||
|
||||
// --------------- cs_files.C:
|
||||
// --------------- cs_files.cpp:
|
||||
public:
|
||||
void check_file_existence();
|
||||
bool start_new_file_xfer(PERS_FILE_XFER&);
|
||||
|
@ -369,7 +368,7 @@ private:
|
|||
int make_project_dirs();
|
||||
bool handle_pers_file_xfers();
|
||||
|
||||
// --------------- cs_platforms.C:
|
||||
// --------------- cs_platforms.cpp:
|
||||
public:
|
||||
const char* get_primary_platform();
|
||||
private:
|
||||
|
@ -378,7 +377,7 @@ private:
|
|||
void write_platforms(PROJECT*, MIOFILE&);
|
||||
bool is_supported_platform(const char*);
|
||||
|
||||
// --------------- cs_prefs.C:
|
||||
// --------------- cs_prefs.cpp:
|
||||
public:
|
||||
int project_disk_usage(PROJECT*, double&);
|
||||
/// returns the total disk usage of BOINC on this host
|
||||
|
@ -400,7 +399,7 @@ private:
|
|||
PROJECT* global_prefs_source_project();
|
||||
void show_global_prefs_source(bool);
|
||||
|
||||
// --------------- cs_scheduler.C:
|
||||
// --------------- cs_scheduler.cpp:
|
||||
public:
|
||||
int make_scheduler_request(PROJECT*);
|
||||
int handle_scheduler_reply(PROJECT*, char* scheduler_url, int& nresults);
|
||||
|
@ -413,7 +412,7 @@ private:
|
|||
double avg_proc_rate();
|
||||
bool should_get_work();
|
||||
|
||||
// --------------- cs_statefile.C:
|
||||
// --------------- cs_statefile.cpp:
|
||||
public:
|
||||
void set_client_state_dirty(const char*);
|
||||
int parse_state_file();
|
||||
|
@ -426,14 +425,14 @@ public:
|
|||
int write_file_transfers_gui(MIOFILE&);
|
||||
int write_tasks_gui(MIOFILE&);
|
||||
|
||||
// --------------- cs_trickle.C:
|
||||
// --------------- cs_trickle.cpp:
|
||||
private:
|
||||
int read_trickle_files(PROJECT*, FILE*);
|
||||
int remove_trickle_files(PROJECT*);
|
||||
public:
|
||||
int handle_trickle_down(PROJECT*, FILE*);
|
||||
|
||||
// --------------- check_state.C:
|
||||
// --------------- check_state.cpp:
|
||||
// stuff related to data-structure integrity checking
|
||||
//
|
||||
public:
|
||||
|
@ -459,7 +458,10 @@ public:
|
|||
void check_all();
|
||||
void free_mem();
|
||||
|
||||
// --------------- work_fetch.C:
|
||||
// --------------- rr_sim.cpp:
|
||||
void rr_simulation();
|
||||
|
||||
// --------------- work_fetch.cpp:
|
||||
public:
|
||||
int proj_min_results(PROJECT*, double);
|
||||
void check_project_timeout();
|
||||
|
@ -492,4 +494,8 @@ extern void print_suspend_tasks_message(int);
|
|||
// for up to POLL_INTERVAL seconds before calling poll_slow_events()
|
||||
// to call the polling functions
|
||||
|
||||
#define CPU_PESSIMISM_FACTOR 0.9
|
||||
// assume actual CPU utilization will be this multiple
|
||||
// of what we've actually measured recently
|
||||
|
||||
#endif
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include "hostinfo.h"
|
||||
#include "coproc.h"
|
||||
#include "miofile.h"
|
||||
#include "rr_sim.h"
|
||||
|
||||
#define P_LOW 1
|
||||
#define P_MEDIUM 3
|
||||
|
@ -158,57 +159,6 @@ struct DAILY_STATS {
|
|||
};
|
||||
bool operator < (const DAILY_STATS&, const DAILY_STATS&);
|
||||
|
||||
struct RR_SIM_PROJECT_STATUS {
|
||||
/// jobs currently running (in simulation)
|
||||
std::vector<RESULT*>active;
|
||||
/// jobs runnable but not running yet
|
||||
std::vector<RESULT*>pending;
|
||||
int deadlines_missed;
|
||||
/// fraction of each CPU this project will get
|
||||
/// set in CLIENT_STATE::rr_misses_deadline();
|
||||
double proc_rate;
|
||||
double cpu_shortfall;
|
||||
|
||||
inline void clear() {
|
||||
active.clear();
|
||||
pending.clear();
|
||||
deadlines_missed = 0;
|
||||
proc_rate = 0;
|
||||
cpu_shortfall = 0;
|
||||
}
|
||||
inline void activate(RESULT* rp) {
|
||||
active.push_back(rp);
|
||||
}
|
||||
inline void add_pending(RESULT* rp) {
|
||||
pending.push_back(rp);
|
||||
}
|
||||
inline bool none_active() {
|
||||
return !active.size();
|
||||
}
|
||||
inline bool can_run(RESULT*, int ncpus) {
|
||||
return (int)active.size() < ncpus;
|
||||
}
|
||||
inline void remove_active(RESULT* r) {
|
||||
std::vector<RESULT*>::iterator it = active.begin();
|
||||
while (it != active.end()) {
|
||||
if (*it == r) {
|
||||
it = active.erase(it);
|
||||
} else {
|
||||
it++;
|
||||
}
|
||||
}
|
||||
}
|
||||
inline RESULT* get_pending() {
|
||||
if (!pending.size()) return NULL;
|
||||
RESULT* rp = pending[0];
|
||||
pending.erase(pending.begin());
|
||||
return rp;
|
||||
}
|
||||
inline int cpus_used() {
|
||||
return (int) active.size();
|
||||
}
|
||||
};
|
||||
|
||||
class PROJECT {
|
||||
public:
|
||||
// the following items come from the account file
|
||||
|
|
|
@ -61,10 +61,6 @@ using std::vector;
|
|||
#define MAX_STD (86400)
|
||||
// maximum short-term debt
|
||||
|
||||
#define CPU_PESSIMISM_FACTOR 0.9
|
||||
// assume actual CPU utilization will be this multiple
|
||||
// of what we've actually measured recently
|
||||
|
||||
#define DEADLINE_CUSHION 0
|
||||
// try to finish jobs this much in advance of their deadline
|
||||
|
||||
|
@ -1166,296 +1162,6 @@ void PROJECT::set_rrsim_proc_rate(double rrs) {
|
|||
}
|
||||
}
|
||||
|
||||
struct RR_SIM_STATUS {
|
||||
vector<RESULT*> active;
|
||||
COPROCS coprocs;
|
||||
|
||||
inline bool can_run(RESULT* rp) {
|
||||
return coprocs.sufficient_coprocs(
|
||||
rp->avp->coprocs, log_flags.rr_simulation, "rr_simulation"
|
||||
);
|
||||
}
|
||||
inline void activate(RESULT* rp) {
|
||||
coprocs.reserve_coprocs(
|
||||
rp->avp->coprocs, rp, log_flags.rr_simulation, "rr_simulation"
|
||||
);
|
||||
active.push_back(rp);
|
||||
}
|
||||
// remove *rpbest from active set,
|
||||
// and adjust CPU time left for other results
|
||||
//
|
||||
inline void remove_active(RESULT* rpbest) {
|
||||
coprocs.free_coprocs(rpbest->avp->coprocs, rpbest, log_flags.rr_simulation, "rr_simulation");
|
||||
vector<RESULT*>::iterator it = active.begin();
|
||||
while (it != active.end()) {
|
||||
RESULT* rp = *it;
|
||||
if (rp == rpbest) {
|
||||
it = active.erase(it);
|
||||
} else {
|
||||
rp->rrsim_cpu_left -= rp->project->rr_sim_status.proc_rate*rpbest->rrsim_finish_delay;
|
||||
it++;
|
||||
}
|
||||
}
|
||||
}
|
||||
inline int nactive() {
|
||||
return (int) active.size();
|
||||
}
|
||||
~RR_SIM_STATUS() {
|
||||
coprocs.delete_coprocs();
|
||||
}
|
||||
};
|
||||
|
||||
// Do a simulation of the current workload
|
||||
// with weighted round-robin (WRR) scheduling.
|
||||
// Include jobs that are downloading.
|
||||
//
|
||||
// For efficiency, we simulate a crude approximation of WRR.
|
||||
// We don't model time-slicing.
|
||||
// Instead we use a continuous model where, at a given point,
|
||||
// each project has a set of running jobs that uses all CPUs
|
||||
// (and obeys coprocessor limits).
|
||||
// These jobs are assumed to run at a rate proportionate to their avg_ncpus,
|
||||
// and each project gets CPU proportionate to its RRS.
|
||||
//
|
||||
// Outputs are changes to global state:
|
||||
// For each project p:
|
||||
// p->rr_sim_deadlines_missed
|
||||
// p->cpu_shortfall
|
||||
// For each result r:
|
||||
// r->rr_sim_misses_deadline
|
||||
// r->last_rr_sim_missed_deadline
|
||||
// gstate.cpu_shortfall
|
||||
//
|
||||
// Deadline misses are not counted for tasks
|
||||
// that are too large to run in RAM right now.
|
||||
//
|
||||
void CLIENT_STATE::rr_simulation() {
|
||||
double rrs = nearly_runnable_resource_share();
|
||||
double trs = total_resource_share();
|
||||
PROJECT* p, *pbest;
|
||||
RESULT* rp, *rpbest;
|
||||
RR_SIM_STATUS sim_status;
|
||||
unsigned int i;
|
||||
|
||||
sim_status.coprocs.clone(coprocs, false);
|
||||
double ar = available_ram();
|
||||
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(0, MSG_INFO,
|
||||
"[rr_sim] rr_sim start: work_buf_total %f rrs %f trs %f ncpus %d",
|
||||
work_buf_total(), rrs, trs, ncpus
|
||||
);
|
||||
}
|
||||
|
||||
for (i=0; i<projects.size(); i++) {
|
||||
p = projects[i];
|
||||
p->rr_sim_status.clear();
|
||||
}
|
||||
|
||||
// Decide what jobs to include in the simulation,
|
||||
// and pick the ones that are initially running
|
||||
//
|
||||
for (i=0; i<results.size(); i++) {
|
||||
rp = results[i];
|
||||
if (!rp->nearly_runnable()) continue;
|
||||
if (rp->some_download_stalled()) continue;
|
||||
if (rp->project->non_cpu_intensive) continue;
|
||||
rp->rrsim_cpu_left = rp->estimated_cpu_time_remaining(false);
|
||||
p = rp->project;
|
||||
if (p->rr_sim_status.can_run(rp, gstate.ncpus) && sim_status.can_run(rp)) {
|
||||
sim_status.activate(rp);
|
||||
p->rr_sim_status.activate(rp);
|
||||
} else {
|
||||
p->rr_sim_status.add_pending(rp);
|
||||
}
|
||||
rp->last_rr_sim_missed_deadline = rp->rr_sim_misses_deadline;
|
||||
rp->rr_sim_misses_deadline = false;
|
||||
}
|
||||
|
||||
for (i=0; i<projects.size(); i++) {
|
||||
p = projects[i];
|
||||
p->set_rrsim_proc_rate(rrs);
|
||||
// if there are no results for a project,
|
||||
// the shortfall is its entire share.
|
||||
//
|
||||
if (p->rr_sim_status.none_active()) {
|
||||
double rsf = trs ? p->resource_share/trs : 1;
|
||||
p->rr_sim_status.cpu_shortfall = work_buf_total() * overall_cpu_frac() * ncpus * rsf;
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(p, MSG_INFO,
|
||||
"[rr_sim] no results; shortfall %f wbt %f ocf %f rsf %f",
|
||||
p->rr_sim_status.cpu_shortfall, work_buf_total(), overall_cpu_frac(), rsf
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
double buf_end = now + work_buf_total();
|
||||
|
||||
// Simulation loop. Keep going until work done
|
||||
//
|
||||
double sim_now = now;
|
||||
cpu_shortfall = 0;
|
||||
while (sim_status.nactive()) {
|
||||
|
||||
// compute finish times and see which result finishes first
|
||||
//
|
||||
rpbest = NULL;
|
||||
for (i=0; i<sim_status.active.size(); i++) {
|
||||
rp = sim_status.active[i];
|
||||
p = rp->project;
|
||||
rp->rrsim_finish_delay = rp->rrsim_cpu_left/p->rr_sim_status.proc_rate;
|
||||
if (!rpbest || rp->rrsim_finish_delay < rpbest->rrsim_finish_delay) {
|
||||
rpbest = rp;
|
||||
}
|
||||
}
|
||||
|
||||
pbest = rpbest->project;
|
||||
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(pbest, MSG_INFO,
|
||||
"[rr_sim] result %s finishes after %f (%f/%f)",
|
||||
rpbest->name, rpbest->rrsim_finish_delay,
|
||||
rpbest->rrsim_cpu_left, pbest->rr_sim_status.proc_rate
|
||||
);
|
||||
}
|
||||
|
||||
// "rpbest" is first result to finish. Does it miss its deadline?
|
||||
//
|
||||
double diff = sim_now + rpbest->rrsim_finish_delay - ((rpbest->computation_deadline()-now)*CPU_PESSIMISM_FACTOR + now);
|
||||
if (diff > 0) {
|
||||
ACTIVE_TASK* atp = lookup_active_task_by_result(rpbest);
|
||||
if (atp && atp->procinfo.working_set_size_smoothed > ar) {
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(pbest, MSG_INFO,
|
||||
"[rr_sim] result %s misses deadline but too large to run",
|
||||
rpbest->name
|
||||
);
|
||||
}
|
||||
} else {
|
||||
rpbest->rr_sim_misses_deadline = true;
|
||||
pbest->rr_sim_status.deadlines_missed++;
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(pbest, MSG_INFO,
|
||||
"[rr_sim] result %s misses deadline by %f",
|
||||
rpbest->name, diff
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int last_active_size = sim_status.nactive();
|
||||
int last_proj_active_size = pbest->rr_sim_status.cpus_used();
|
||||
|
||||
sim_status.remove_active(rpbest);
|
||||
|
||||
pbest->rr_sim_status.remove_active(rpbest);
|
||||
|
||||
// If project has more results, add one or more to active set.
|
||||
//
|
||||
while (1) {
|
||||
rp = pbest->rr_sim_status.get_pending();
|
||||
if (!rp) break;
|
||||
if (pbest->rr_sim_status.can_run(rp, gstate.ncpus) && sim_status.can_run(rp)) {
|
||||
sim_status.activate(rp);
|
||||
pbest->rr_sim_status.activate(rp);
|
||||
} else {
|
||||
pbest->rr_sim_status.add_pending(rp);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If all work done for a project, subtract that project's share
|
||||
// and recompute processing rates
|
||||
//
|
||||
if (pbest->rr_sim_status.none_active()) {
|
||||
rrs -= pbest->resource_share;
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(pbest, MSG_INFO,
|
||||
"[rr_sim] decr rrs by %f, new value %f",
|
||||
pbest->resource_share, rrs
|
||||
);
|
||||
}
|
||||
for (i=0; i<projects.size(); i++) {
|
||||
p = projects[i];
|
||||
p->set_rrsim_proc_rate(rrs);
|
||||
}
|
||||
}
|
||||
|
||||
// increment CPU shortfalls if necessary
|
||||
//
|
||||
if (sim_now < buf_end) {
|
||||
double end_time = sim_now + rpbest->rrsim_finish_delay;
|
||||
if (end_time > buf_end) end_time = buf_end;
|
||||
double d_time = end_time - sim_now;
|
||||
int nidle_cpus = ncpus - last_active_size;
|
||||
if (nidle_cpus<0) nidle_cpus = 0;
|
||||
if (nidle_cpus > 0) cpu_shortfall += d_time*nidle_cpus;
|
||||
|
||||
double rsf = trs?pbest->resource_share/trs:1;
|
||||
double proj_cpu_share = ncpus*rsf;
|
||||
|
||||
if (last_proj_active_size < proj_cpu_share) {
|
||||
pbest->rr_sim_status.cpu_shortfall += d_time*(proj_cpu_share - last_proj_active_size);
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(pbest, MSG_INFO,
|
||||
"[rr_sim] new shortfall %f d_time %f proj_cpu_share %f lpas %d",
|
||||
pbest->rr_sim_status.cpu_shortfall, d_time, proj_cpu_share, last_proj_active_size
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (end_time < buf_end) {
|
||||
d_time = buf_end - end_time;
|
||||
// if this is the last result for this project, account for the tail
|
||||
if (pbest->rr_sim_status.none_active()) {
|
||||
pbest->rr_sim_status.cpu_shortfall += d_time * proj_cpu_share;
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(pbest, MSG_INFO, "[rr_sim] proj out of work; shortfall %f d %f pcs %f",
|
||||
pbest->rr_sim_status.cpu_shortfall, d_time, proj_cpu_share
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(0, MSG_INFO,
|
||||
"[rr_sim] total: idle cpus %d, last active %d, active %d, shortfall %f",
|
||||
nidle_cpus, last_active_size, sim_status.nactive(),
|
||||
cpu_shortfall
|
||||
|
||||
);
|
||||
msg_printf(0, MSG_INFO,
|
||||
"[rr_sim] proj %s: last active %d, active %d, shortfall %f",
|
||||
pbest->get_project_name(), last_proj_active_size,
|
||||
pbest->rr_sim_status.cpus_used(),
|
||||
pbest->rr_sim_status.cpu_shortfall
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
sim_now += rpbest->rrsim_finish_delay;
|
||||
}
|
||||
|
||||
if (sim_now < buf_end) {
|
||||
cpu_shortfall += (buf_end - sim_now) * ncpus;
|
||||
}
|
||||
|
||||
if (log_flags.rr_simulation) {
|
||||
for (i=0; i<projects.size(); i++) {
|
||||
p = projects[i];
|
||||
if (p->rr_sim_status.cpu_shortfall) {
|
||||
msg_printf(p, MSG_INFO,
|
||||
"[rr_sim] shortfall %f\n", p->rr_sim_status.cpu_shortfall
|
||||
);
|
||||
}
|
||||
}
|
||||
msg_printf(NULL, MSG_INFO,
|
||||
"[rr_sim] done; total shortfall %f\n",
|
||||
cpu_shortfall
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// trigger CPU schedule enforcement.
|
||||
// Called when a new schedule is computed,
|
||||
// and when an app checkpoints.
|
||||
|
|
|
@ -0,0 +1,310 @@
|
|||
// This file is part of BOINC.
|
||||
// http://boinc.berkeley.edu
|
||||
// Copyright (C) 2008 University of California
|
||||
//
|
||||
// BOINC is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU Lesser General Public License
|
||||
// as published by the Free Software Foundation,
|
||||
// either version 3 of the License, or (at your option) any later version.
|
||||
//
|
||||
// BOINC is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
// See the GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#include "client_state.h"
|
||||
#include "client_msgs.h"
|
||||
|
||||
struct RR_SIM_STATUS {
|
||||
std::vector<RESULT*> active;
|
||||
COPROCS coprocs;
|
||||
|
||||
inline bool can_run(RESULT* rp) {
|
||||
return coprocs.sufficient_coprocs(
|
||||
rp->avp->coprocs, log_flags.rr_simulation, "rr_simulation"
|
||||
);
|
||||
}
|
||||
inline void activate(RESULT* rp) {
|
||||
coprocs.reserve_coprocs(
|
||||
rp->avp->coprocs, rp, log_flags.rr_simulation, "rr_simulation"
|
||||
);
|
||||
active.push_back(rp);
|
||||
}
|
||||
// remove *rpbest from active set,
|
||||
// and adjust CPU time left for other results
|
||||
//
|
||||
inline void remove_active(RESULT* rpbest) {
|
||||
coprocs.free_coprocs(rpbest->avp->coprocs, rpbest, log_flags.rr_simulation, "rr_simulation");
|
||||
vector<RESULT*>::iterator it = active.begin();
|
||||
while (it != active.end()) {
|
||||
RESULT* rp = *it;
|
||||
if (rp == rpbest) {
|
||||
it = active.erase(it);
|
||||
} else {
|
||||
rp->rrsim_cpu_left -= rp->project->rr_sim_status.proc_rate*rpbest->rrsim_finish_delay;
|
||||
it++;
|
||||
}
|
||||
}
|
||||
}
|
||||
inline int nactive() {
|
||||
return (int) active.size();
|
||||
}
|
||||
~RR_SIM_STATUS() {
|
||||
coprocs.delete_coprocs();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// Do a simulation of the current workload
|
||||
// with weighted round-robin (WRR) scheduling.
|
||||
// Include jobs that are downloading.
|
||||
//
|
||||
// For efficiency, we simulate a crude approximation of WRR.
|
||||
// We don't model time-slicing.
|
||||
// Instead we use a continuous model where, at a given point,
|
||||
// each project has a set of running jobs that uses all CPUs
|
||||
// (and obeys coprocessor limits).
|
||||
// These jobs are assumed to run at a rate proportionate to their avg_ncpus,
|
||||
// and each project gets CPU proportionate to its RRS.
|
||||
//
|
||||
// Outputs are changes to global state:
|
||||
// For each project p:
|
||||
// p->rr_sim_deadlines_missed
|
||||
// p->cpu_shortfall
|
||||
// For each result r:
|
||||
// r->rr_sim_misses_deadline
|
||||
// r->last_rr_sim_missed_deadline
|
||||
// gstate.cpu_shortfall
|
||||
//
|
||||
// Deadline misses are not counted for tasks
|
||||
// that are too large to run in RAM right now.
|
||||
//
|
||||
void CLIENT_STATE::rr_simulation() {
|
||||
double rrs = nearly_runnable_resource_share();
|
||||
double trs = total_resource_share();
|
||||
PROJECT* p, *pbest;
|
||||
RESULT* rp, *rpbest;
|
||||
RR_SIM_STATUS sim_status;
|
||||
unsigned int i;
|
||||
|
||||
sim_status.coprocs.clone(coprocs, false);
|
||||
double ar = available_ram();
|
||||
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(0, MSG_INFO,
|
||||
"[rr_sim] rr_sim start: work_buf_total %f rrs %f trs %f ncpus %d",
|
||||
work_buf_total(), rrs, trs, ncpus
|
||||
);
|
||||
}
|
||||
|
||||
for (i=0; i<projects.size(); i++) {
|
||||
p = projects[i];
|
||||
p->rr_sim_status.clear();
|
||||
}
|
||||
|
||||
// Decide what jobs to include in the simulation,
|
||||
// and pick the ones that are initially running
|
||||
//
|
||||
for (i=0; i<results.size(); i++) {
|
||||
rp = results[i];
|
||||
if (!rp->nearly_runnable()) continue;
|
||||
if (rp->some_download_stalled()) continue;
|
||||
if (rp->project->non_cpu_intensive) continue;
|
||||
rp->rrsim_cpu_left = rp->estimated_cpu_time_remaining(false);
|
||||
p = rp->project;
|
||||
if (p->rr_sim_status.can_run(rp, gstate.ncpus) && sim_status.can_run(rp)) {
|
||||
sim_status.activate(rp);
|
||||
p->rr_sim_status.activate(rp);
|
||||
} else {
|
||||
p->rr_sim_status.add_pending(rp);
|
||||
}
|
||||
rp->last_rr_sim_missed_deadline = rp->rr_sim_misses_deadline;
|
||||
rp->rr_sim_misses_deadline = false;
|
||||
}
|
||||
|
||||
for (i=0; i<projects.size(); i++) {
|
||||
p = projects[i];
|
||||
p->set_rrsim_proc_rate(rrs);
|
||||
// if there are no results for a project,
|
||||
// the shortfall is its entire share.
|
||||
//
|
||||
if (p->rr_sim_status.none_active()) {
|
||||
double rsf = trs ? p->resource_share/trs : 1;
|
||||
p->rr_sim_status.cpu_shortfall = work_buf_total() * overall_cpu_frac() * ncpus * rsf;
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(p, MSG_INFO,
|
||||
"[rr_sim] no results; shortfall %f wbt %f ocf %f rsf %f",
|
||||
p->rr_sim_status.cpu_shortfall, work_buf_total(), overall_cpu_frac(), rsf
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
double buf_end = now + work_buf_total();
|
||||
|
||||
// Simulation loop. Keep going until work done
|
||||
//
|
||||
double sim_now = now;
|
||||
cpu_shortfall = 0;
|
||||
while (sim_status.nactive()) {
|
||||
|
||||
// compute finish times and see which result finishes first
|
||||
//
|
||||
rpbest = NULL;
|
||||
for (i=0; i<sim_status.active.size(); i++) {
|
||||
rp = sim_status.active[i];
|
||||
p = rp->project;
|
||||
rp->rrsim_finish_delay = rp->rrsim_cpu_left/p->rr_sim_status.proc_rate;
|
||||
if (!rpbest || rp->rrsim_finish_delay < rpbest->rrsim_finish_delay) {
|
||||
rpbest = rp;
|
||||
}
|
||||
}
|
||||
|
||||
pbest = rpbest->project;
|
||||
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(pbest, MSG_INFO,
|
||||
"[rr_sim] result %s finishes after %f (%f/%f)",
|
||||
rpbest->name, rpbest->rrsim_finish_delay,
|
||||
rpbest->rrsim_cpu_left, pbest->rr_sim_status.proc_rate
|
||||
);
|
||||
}
|
||||
|
||||
// "rpbest" is first result to finish. Does it miss its deadline?
|
||||
//
|
||||
double diff = sim_now + rpbest->rrsim_finish_delay - ((rpbest->computation_deadline()-now)*CPU_PESSIMISM_FACTOR + now);
|
||||
if (diff > 0) {
|
||||
ACTIVE_TASK* atp = lookup_active_task_by_result(rpbest);
|
||||
if (atp && atp->procinfo.working_set_size_smoothed > ar) {
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(pbest, MSG_INFO,
|
||||
"[rr_sim] result %s misses deadline but too large to run",
|
||||
rpbest->name
|
||||
);
|
||||
}
|
||||
} else {
|
||||
rpbest->rr_sim_misses_deadline = true;
|
||||
pbest->rr_sim_status.deadlines_missed++;
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(pbest, MSG_INFO,
|
||||
"[rr_sim] result %s misses deadline by %f",
|
||||
rpbest->name, diff
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int last_active_size = sim_status.nactive();
|
||||
int last_proj_active_size = pbest->rr_sim_status.cpus_used();
|
||||
|
||||
sim_status.remove_active(rpbest);
|
||||
|
||||
pbest->rr_sim_status.remove_active(rpbest);
|
||||
|
||||
// If project has more results, add one or more to active set.
|
||||
//
|
||||
while (1) {
|
||||
rp = pbest->rr_sim_status.get_pending();
|
||||
if (!rp) break;
|
||||
if (pbest->rr_sim_status.can_run(rp, gstate.ncpus) && sim_status.can_run(rp)) {
|
||||
sim_status.activate(rp);
|
||||
pbest->rr_sim_status.activate(rp);
|
||||
} else {
|
||||
pbest->rr_sim_status.add_pending(rp);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If all work done for a project, subtract that project's share
|
||||
// and recompute processing rates
|
||||
//
|
||||
if (pbest->rr_sim_status.none_active()) {
|
||||
rrs -= pbest->resource_share;
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(pbest, MSG_INFO,
|
||||
"[rr_sim] decr rrs by %f, new value %f",
|
||||
pbest->resource_share, rrs
|
||||
);
|
||||
}
|
||||
for (i=0; i<projects.size(); i++) {
|
||||
p = projects[i];
|
||||
p->set_rrsim_proc_rate(rrs);
|
||||
}
|
||||
}
|
||||
|
||||
// increment CPU shortfalls if necessary
|
||||
//
|
||||
if (sim_now < buf_end) {
|
||||
double end_time = sim_now + rpbest->rrsim_finish_delay;
|
||||
if (end_time > buf_end) end_time = buf_end;
|
||||
double d_time = end_time - sim_now;
|
||||
int nidle_cpus = ncpus - last_active_size;
|
||||
if (nidle_cpus<0) nidle_cpus = 0;
|
||||
if (nidle_cpus > 0) cpu_shortfall += d_time*nidle_cpus;
|
||||
|
||||
double rsf = trs?pbest->resource_share/trs:1;
|
||||
double proj_cpu_share = ncpus*rsf;
|
||||
|
||||
if (last_proj_active_size < proj_cpu_share) {
|
||||
pbest->rr_sim_status.cpu_shortfall += d_time*(proj_cpu_share - last_proj_active_size);
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(pbest, MSG_INFO,
|
||||
"[rr_sim] new shortfall %f d_time %f proj_cpu_share %f lpas %d",
|
||||
pbest->rr_sim_status.cpu_shortfall, d_time, proj_cpu_share, last_proj_active_size
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (end_time < buf_end) {
|
||||
d_time = buf_end - end_time;
|
||||
// if this is the last result for this project, account for the tail
|
||||
if (pbest->rr_sim_status.none_active()) {
|
||||
pbest->rr_sim_status.cpu_shortfall += d_time * proj_cpu_share;
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(pbest, MSG_INFO, "[rr_sim] proj out of work; shortfall %f d %f pcs %f",
|
||||
pbest->rr_sim_status.cpu_shortfall, d_time, proj_cpu_share
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (log_flags.rr_simulation) {
|
||||
msg_printf(0, MSG_INFO,
|
||||
"[rr_sim] total: idle cpus %d, last active %d, active %d, shortfall %f",
|
||||
nidle_cpus, last_active_size, sim_status.nactive(),
|
||||
cpu_shortfall
|
||||
|
||||
);
|
||||
msg_printf(0, MSG_INFO,
|
||||
"[rr_sim] proj %s: last active %d, active %d, shortfall %f",
|
||||
pbest->get_project_name(), last_proj_active_size,
|
||||
pbest->rr_sim_status.cpus_used(),
|
||||
pbest->rr_sim_status.cpu_shortfall
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
sim_now += rpbest->rrsim_finish_delay;
|
||||
}
|
||||
|
||||
if (sim_now < buf_end) {
|
||||
cpu_shortfall += (buf_end - sim_now) * ncpus;
|
||||
}
|
||||
|
||||
if (log_flags.rr_simulation) {
|
||||
for (i=0; i<projects.size(); i++) {
|
||||
p = projects[i];
|
||||
if (p->rr_sim_status.cpu_shortfall) {
|
||||
msg_printf(p, MSG_INFO,
|
||||
"[rr_sim] shortfall %f\n", p->rr_sim_status.cpu_shortfall
|
||||
);
|
||||
}
|
||||
}
|
||||
msg_printf(NULL, MSG_INFO,
|
||||
"[rr_sim] done; total shortfall %f\n",
|
||||
cpu_shortfall
|
||||
);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,76 @@
|
|||
// This file is part of BOINC.
|
||||
// http://boinc.berkeley.edu
|
||||
// Copyright (C) 2008 University of California
|
||||
//
|
||||
// BOINC is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU Lesser General Public License
|
||||
// as published by the Free Software Foundation,
|
||||
// either version 3 of the License, or (at your option) any later version.
|
||||
//
|
||||
// BOINC is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
// See the GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef __RR_SIM__
|
||||
#define __RR_SIM__
|
||||
|
||||
#include "client_types.h"
|
||||
|
||||
struct RESULT;
|
||||
|
||||
struct RR_SIM_PROJECT_STATUS {
|
||||
/// jobs currently running (in simulation)
|
||||
std::vector<RESULT*>active;
|
||||
/// jobs runnable but not running yet
|
||||
std::vector<RESULT*>pending;
|
||||
int deadlines_missed;
|
||||
/// fraction of each CPU this project will get
|
||||
/// set in CLIENT_STATE::rr_misses_deadline();
|
||||
double proc_rate;
|
||||
double cpu_shortfall;
|
||||
|
||||
inline void clear() {
|
||||
active.clear();
|
||||
pending.clear();
|
||||
deadlines_missed = 0;
|
||||
proc_rate = 0;
|
||||
cpu_shortfall = 0;
|
||||
}
|
||||
inline void activate(RESULT* rp) {
|
||||
active.push_back(rp);
|
||||
}
|
||||
inline void add_pending(RESULT* rp) {
|
||||
pending.push_back(rp);
|
||||
}
|
||||
inline bool none_active() {
|
||||
return !active.size();
|
||||
}
|
||||
inline bool can_run(RESULT*, int ncpus) {
|
||||
return (int)active.size() < ncpus;
|
||||
}
|
||||
inline void remove_active(RESULT* r) {
|
||||
std::vector<RESULT*>::iterator it = active.begin();
|
||||
while (it != active.end()) {
|
||||
if (*it == r) {
|
||||
it = active.erase(it);
|
||||
} else {
|
||||
it++;
|
||||
}
|
||||
}
|
||||
}
|
||||
inline RESULT* get_pending() {
|
||||
if (!pending.size()) return NULL;
|
||||
RESULT* rp = pending[0];
|
||||
pending.erase(pending.begin());
|
||||
return rp;
|
||||
}
|
||||
inline int cpus_used() {
|
||||
return (int) active.size();
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
|
@ -190,6 +190,11 @@ function get_profile_summary($profile) {
|
|||
//
|
||||
function show_profile($user, $logged_in_user, $screen_mode = false) {
|
||||
BoincForumPrefs::lookup($user);
|
||||
$profile = BoincProfile::lookup("userid = $user->id");
|
||||
if (!$profile) {
|
||||
error_page("No user profile exists for that user ID.");
|
||||
$user->update("has_profile = 0");
|
||||
}
|
||||
|
||||
$is_logged_in = isset($logged_in_user);
|
||||
|
||||
|
@ -204,10 +209,6 @@ function show_profile($user, $logged_in_user, $screen_mode = false) {
|
|||
if (is_banished($user)) {
|
||||
error_page("User is banished");
|
||||
}
|
||||
$profile = get_profile($user->id);
|
||||
if (!$profile) {
|
||||
error_page("No user profile exists for that user ID.");
|
||||
}
|
||||
|
||||
if ($can_edit) {
|
||||
row1("<a href=create_profile.php>Edit your profile</a>");
|
||||
|
|
|
@ -23,6 +23,9 @@ $user = BoincUser::lookup_id($userid);
|
|||
if (!$user) {
|
||||
error_page("No such user");
|
||||
}
|
||||
if (!$user->has_profile) {
|
||||
error_page("No profile");
|
||||
}
|
||||
|
||||
$logged_in_user = get_logged_in_user(false);
|
||||
$caching = false;
|
||||
|
|
|
@ -66,7 +66,7 @@
|
|||
|
||||
// how often to retry errors
|
||||
//
|
||||
#define ERROR_INTERVAL 86400
|
||||
#define ERROR_INTERVAL 3600
|
||||
|
||||
#include "config.h"
|
||||
#include <list>
|
||||
|
@ -361,6 +361,9 @@ std::list<FILE_RECORD> files_to_delete;
|
|||
int delete_antique_files() {
|
||||
int nfiles=0;
|
||||
|
||||
log_messages.printf(MSG_DEBUG,
|
||||
"delete_antique_files(): start (%d files)\n", files_to_delete.size()
|
||||
);
|
||||
while (!files_to_delete.empty()) {
|
||||
char timestamp[128];
|
||||
char pathname[1024];
|
||||
|
@ -378,7 +381,7 @@ int delete_antique_files() {
|
|||
"get_file_path(%s) failed: %d\n",
|
||||
fr.name.c_str(), retval
|
||||
);
|
||||
return -1;
|
||||
return retval;
|
||||
}
|
||||
|
||||
strcpy(timestamp, time_to_string(fr.date_modified));
|
||||
|
@ -392,13 +395,16 @@ int delete_antique_files() {
|
|||
"unlink(%s) failed: %s\n",
|
||||
pathname, strerror(save_error)
|
||||
);
|
||||
return -1;
|
||||
return retval;
|
||||
} else {
|
||||
nfiles++;
|
||||
files_to_delete.pop_front();
|
||||
}
|
||||
}
|
||||
return nfiles;
|
||||
log_messages.printf(MSG_DEBUG,
|
||||
"delete_antique_files(): done, deleted %d files\n", nfiles
|
||||
);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -550,7 +556,7 @@ void do_antique_pass() {
|
|||
}
|
||||
|
||||
retval = delete_antique_files();
|
||||
if (retval < 0) {
|
||||
if (retval) {
|
||||
log_messages.printf(MSG_CRITICAL,
|
||||
"Problem 2 [%d] in antique file deletion: turning OFF -delete_antiques switch\n", retval
|
||||
);
|
||||
|
|
Loading…
Reference in New Issue