- client: change the way project disk share is computed.

- Allow projects to report "desired disk usage" (DDU).
        If the client learns that a project wants disk space,
        it can shrink the allocation to other projects.
    - Base share computation on DDU rather than disk usage.
    - Introduce the notion of "disk resource share".
        This is defined (somewhat arbitrarily) as resource share
        plus 1/10 of the largest resource share.
        This is intended to ensure that even zero-share projects
        get enough disk space to store app versions and data files;
        otherwise they wouldn't be able to compute.
- server: use host.d_boinc_max (which wasn't being used)
    to start d_project_share reported by client.
- volunteer storage: change the way hosts are allocated to chunks.
    Allow hosts to store several chunks of the same file, if needed


svn path=/trunk/boinc/; revision=26052
This commit is contained in:
David Anderson 2012-08-22 04:02:52 +00:00
parent 446bc4ca28
commit e79d3ea4c8
11 changed files with 204 additions and 48 deletions

View File

@ -5647,3 +5647,33 @@ David 20 Aug 2012
vda/
vdad.cpp
vda_lib2.dpp
David 21 Aug 2012
- client: change the way project disk share is computed.
- Allow projects to report "desired disk usage" (DDU).
If the client learns that a project wants disk space,
it can shrink the allocation to other projects.
- Base share computation on DDU rather than disk usage.
- Introduce the notion of "disk resource share".
This is defined (somewhat arbitrarily) as resource share
plus 1/10 of the largest resource share.
This is intended to ensure that even zero-share projects
get enough disk space to store app versions and data files;
otherwise they wouldn't be able to compute.
- server: use host.d_boinc_max (which wasn't being used)
to start d_project_share reported by client.
- volunteer storage: change the way hosts are allocated to chunks.
Allow hosts to store several chunks of the same file, if needed
vda/
vda_lib2.cpp
vda_lib.h
client/
project.cpp,h
client_state.h
cs_prefs.cpp
scheduler_op.cpp
db/
boinc_db_types.h
sched/
sched_types.h

View File

@ -379,6 +379,7 @@ struct CLIENT_STATE {
// disk usage not counting projects
// computed by get_disk_usages()
double total_disk_usage;
// client plus projects
int get_disk_usages();
void get_disk_shares();
double allowed_disk_usage(double boinc_total);

View File

@ -114,37 +114,68 @@ int CLIENT_STATE::get_disk_usages() {
return 0;
}
// populate PROJECT::disk_share for all projects
// populate PROJECT::disk_share for all projects,
// i.e. the max space we should allocate to the project.
// This is calculated as follows:
// - each project has a "disk_resource_share" (DRS)
// This is the resource share plus .1*(max resource share).
// This ensures that backup projects get some disk.
// - each project as a "desired_disk_usage (DDU)",
// which is either its current usage
// or an amount sent from the scheduler.
// - each project has a "quota": (available space)*(drs/total_drs).
// - a project is "greedy" if DDU > quota.
// - if a project is non-greedy, share = quota
// - X = available space - space used by non-greedy projects
// - if a project is greedy, share = quota
// + X*drs/(total drs of greedy projects)
//
void CLIENT_STATE::get_disk_shares() {
PROJECT* p;
unsigned int i;
double rss = 0;
// compute disk resource shares
//
double trs = 0;
double max_rs = 0;
for (i=0; i<projects.size(); i++) {
p = projects[i];
rss += p->resource_share;
p->disk_share = p->disk_usage;
p->ddu = std::max(p->disk_usage, p->desired_disk_usage);
double rs = p->resource_share;
trs += rs;
if (rs > max_rs) max_rs = rs;
}
if (trs) {
max_rs /= 10;
for (i=0; i<projects.size(); i++) {
p = projects[i];
p->disk_resource_share = p->resource_share + max_rs;
}
} else {
for (i=0; i<projects.size(); i++) {
p = projects[i];
p->disk_resource_share = 1;
}
}
if (!rss) return;
// a project is "greedy" if it's using more than its share of disk
// Compute:
// greedy_drs: total disk resource share of greedy projects
// non_greedy_ddu: total desired disk usage of non-greedy projects
//
double greedy_rs = 0;
double non_greedy_usage = 0;
double greedy_drs = 0;
double non_greedy_ddu = 0;
double allowed = allowed_disk_usage(total_disk_usage);
for (i=0; i<projects.size(); i++) {
p = projects[i];
double rs = p->resource_share/rss;
if (p->disk_usage > allowed*rs) {
greedy_rs += p->resource_share;
p->disk_quota = allowed*p->disk_resource_share/trs;
if (p->ddu > p->disk_quota) {
greedy_drs += p->disk_resource_share;
} else {
non_greedy_usage += p->disk_usage;
non_greedy_ddu += p->ddu;
}
}
if (!greedy_rs) greedy_rs = 1; // handle projects w/ zero resource share
double greedy_allowed = allowed - non_greedy_usage;
double greedy_allowed = allowed - non_greedy_ddu;
if (log_flags.disk_usage_debug) {
msg_printf(0, MSG_INFO,
"[disk_usage] allowed %.2fMB used %.2fMB",
@ -153,9 +184,11 @@ void CLIENT_STATE::get_disk_shares() {
}
for (i=0; i<projects.size(); i++) {
p = projects[i];
double rs = p->resource_share/rss;
if (p->disk_usage > allowed*rs) {
p->disk_share = greedy_allowed*p->resource_share/greedy_rs;
double rs = p->disk_resource_share/trs;
if (p->ddu > allowed*rs) {
p->disk_share = greedy_allowed*p->disk_resource_share/greedy_drs;
} else {
p->disk_share = p->disk_quota;
}
if (log_flags.disk_usage_debug) {
msg_printf(p, MSG_INFO,
@ -650,4 +683,3 @@ double CLIENT_STATE::max_available_ram() {
global_prefs.ram_max_used_busy_frac, global_prefs.ram_max_used_idle_frac
);
}

View File

@ -37,6 +37,7 @@ void PROJECT::init() {
project_specific_prefs = "";
gui_urls = "";
resource_share = 100;
desired_disk_usage = 0;
for (int i=0; i<MAX_RSC; i++) {
no_rsc_pref[i] = false;
no_rsc_config[i] = false;
@ -292,6 +293,7 @@ int PROJECT::parse_state(XML_PARSER& xp) {
trickle_up_ops.push_back(new TRICKLE_UP_OP(stemp));
continue;
}
if (xp.parse_double("desired_disk_usage", desired_disk_usage)) continue;
if (log_flags.unparsed_xml) {
msg_printf(0, MSG_INFO,
"[unparsed_xml] PROJECT::parse_state(): unrecognized: %s",
@ -343,6 +345,7 @@ int PROJECT::write_state(MIOFILE& out, bool gui_rpc) {
" <rec_time>%f</rec_time>\n"
" <resource_share>%f</resource_share>\n"
" <desired_disk_usage>%f</desired_disk_usage>\n"
" <duration_correction_factor>%f</duration_correction_factor>\n"
" <sched_rpc_pending>%d</sched_rpc_pending>\n"
" <send_time_stats_log>%d</send_time_stats_log>\n"
@ -374,6 +377,7 @@ int PROJECT::write_state(MIOFILE& out, bool gui_rpc) {
pwf.rec,
pwf.rec_time,
resource_share,
desired_disk_usage,
duration_correction_factor,
sched_rpc_pending,
send_time_stats_log,
@ -529,6 +533,7 @@ void PROJECT::copy_state_fields(PROJECT& p) {
if (ams_resource_share >= 0) {
resource_share = ams_resource_share;
}
desired_disk_usage = p.desired_disk_usage;
use_symlinks = p.use_symlinks;
}

View File

@ -39,7 +39,15 @@ struct PROJECT : PROJ_AM {
double resource_share;
// project's resource share relative to other projects.
double resource_share_frac;
// fraction of RS of non-suspended, compute-intensive projects
// temp; fraction of RS of non-suspended, compute-intensive projects
double disk_resource_share;
// temp in get_disk_shares()
double desired_disk_usage;
// reported by project
double ddu;
// temp in get_disk_shares()
double disk_quota;
// temp in get_disk_shares()
// the following are from the user's project prefs
//

View File

@ -899,6 +899,8 @@ int SCHEDULER_REPLY::parse(FILE* in, PROJECT* project) {
continue;
} else if (xp.parse_int("teamid", project->teamid)) {
continue;
} else if (xp.parse_double("desired_disk_usage", project->desired_disk_usage)) {
continue;
} else {
if (log_flags.unparsed_xml) {
msg_printf(project, MSG_INFO,

View File

@ -287,11 +287,10 @@ struct HOST {
double d_boinc_used_project;
// amount being used for this project
// The following item is not used.
// It's redundant (server can compute based on other params and prefs)
//
double d_boinc_max; // max disk space that BOINC is allowed to use,
// reflecting user preferences
double d_boinc_max;
// This field has been repurposed.
// it's now used to store the project's share of available disk space
// (reported by recent clients as <d_project_share>
double n_bwup; // Average upload bandwidth, bytes/sec
double n_bwdown; // Average download bandwidth, bytes/sec
// The above are derived from actual
@ -324,7 +323,6 @@ struct HOST {
char p_features[1024];
char virtualbox_version[256];
bool p_vm_extensions_disabled;
double d_project_share; // this project's share of available disk space
int parse(XML_PARSER&);
int parse_time_stats(XML_PARSER&);

View File

@ -1260,7 +1260,7 @@ int HOST::parse_disk_usage(XML_PARSER& xp) {
if (xp.match_tag("/disk_usage")) return 0;
if (xp.parse_double("d_boinc_used_total", d_boinc_used_total)) continue;
if (xp.parse_double("d_boinc_used_project", d_boinc_used_project)) continue;
if (xp.parse_double("d_project_share", d_project_share)) continue;
if (xp.parse_double("d_project_share", d_boinc_max)) continue;
log_messages.printf(MSG_NORMAL,
"HOST::parse_disk_usage(): unrecognized: %s\n",
xp.parsed_tag

View File

@ -345,17 +345,17 @@ static int process_chunks_missing_on_client(CHUNK_LIST& chunks) {
// remove some chunks and mark vda_files for update
//
static int enforce_quota(CHUNK_LIST& chunks) {
if (!g_request->host.d_project_share) return 0;
if (!g_request->host.d_boinc_max) return 0;
double x = g_request->host.d_boinc_used_project;
if (config.debug_vda) {
log_messages.printf(MSG_NORMAL,
"[vda] share: %f used: %f\n",
g_request->host.d_project_share, x
g_request->host.d_boinc_max, x
);
}
CHUNK_LIST::iterator it = chunks.begin();
while (x > g_request->host.d_project_share && it != chunks.end()) {
while (x > g_request->host.d_boinc_max && it != chunks.end()) {
DB_VDA_CHUNK_HOST& ch = it->second;
if (!ch.found) continue;
FILE_INFO fi;

View File

@ -43,11 +43,6 @@ struct VDA_FILE_AUX : VDA_FILE {
POLICY policy;
META_CHUNK* meta_chunk;
VDA_FILE_AUX(){
meta_chunk = NULL;
}
VDA_FILE_AUX(DB_VDA_FILE f) : VDA_FILE(f){}
// the following for the simulator
//
double accounting_start_time;
@ -63,14 +58,29 @@ struct VDA_FILE_AUX : VDA_FILE {
inline bool collecting_stats() {
return (pending_init_downloads == 0);
}
VDA_FILE_AUX(){
meta_chunk = NULL;
}
// the following for vdad
//
std::vector<int> available_hosts;
// list of IDs of hosts with no chunks of this file
DB_HOST enum_host;
char enum_query[256];
int max_chunks;
int last_id;
bool enum_active;
bool found_this_scan;
bool found_any_this_scan;
bool found_any_this_enum;
int init();
int get_state();
int choose_host();
VDA_FILE_AUX(DB_VDA_FILE f) : VDA_FILE(f) {
max_chunks = 0;
enum_active = false;
}
};
#define PRESENT 0

View File

@ -570,23 +570,92 @@ int VDA_FILE_AUX::get_state() {
}
// Pick a host to send a chunk of this file to.
// The host must:
// 1) be alive (recent RPC time)
// 2) not have any chunks of this file
//
// We maintain a cache of such hosts
// We want to pick the host that has the fewest chunks
// of this file already (preferably zero).
// The policy is:
//
// - scan the cache, removing hosts that are no longer alive;
// return if find a live host
// - pick a random starting point in host ID space,
// and enumerate 100 live hosts; wrap around if needed.
// Return one and put the rest in cache
// - maintain a threshold "max_chunks".
// - enumerate all hosts that are alive
// - if find a host H w/ at most max_chunks of this file,
// set max_chunks to nchunks(H) and return H
// - if scan all hosts w/o finding one, increment max_chunks and start over
//
int VDA_FILE_AUX::choose_host() {
int retval;
DB_HOST host;
char buf[256];
// terminology:
// "enum" is the result of one DB query (typically 100 hosts)
// "scan" is a set of enums covering the entire host table
//
while (1) {
if (!enum_active) {
sprintf(enum_query, "where %s and id > %d order by id limit 100",
host_alive_clause(), last_id
);
enum_active = true;
found_any_this_enum = false;
if (last_id == 0) {
found_this_scan = false;
found_any_this_scan = false;
}
}
retval = enum_host.enumerate(enum_query);
if (retval == ERR_DB_NOT_FOUND) {
// we've finished an enum
//
enum_active = false;
if (found_any_this_enum) {
// if we found anything in this enum, continue the scan
continue;
}
// we've finished a scan
//
last_id = 0;
if (!found_any_this_scan) {
log_messages.printf(MSG_CRITICAL,
"choose_host(): no live hosts\n"
);
return 0;
}
if (!found_this_scan) {
max_chunks++;
log_messages.printf(MSG_NORMAL,
"choose_host(): completed scan, new max_chunks %d\n",
max_chunks
);
continue;
}
}
if (retval) {
// a DB error occurred
enum_active = false;
return 0;
}
found_any_this_enum = true;
found_any_this_scan = true;
last_id = enum_host.id;
// we have a live host.
// see whether it satisfies max_chunks
//
DB_VDA_CHUNK_HOST ch;
int count;
sprintf(buf, "where vda_file_id=%d and host_id=%d", id, enum_host.id);
retval = ch.count(count, buf);
if (retval) {
log_messages.printf(MSG_CRITICAL, "ch.count failed\n");
return 0;
}
if (count <= max_chunks) {
found_this_scan = true;
max_chunks = count;
return enum_host.id;
}
}
#if 0
// replenish cache if needed
//
if (!available_hosts.size()) {
@ -659,5 +728,6 @@ int VDA_FILE_AUX::choose_host() {
log_messages.printf(MSG_CRITICAL, "No hosts available\n");
return 0;
#endif
}