diff --git a/checkin_notes b/checkin_notes index cd1a8dfc56..5133ef430f 100644 --- a/checkin_notes +++ b/checkin_notes @@ -8531,3 +8531,12 @@ Charlie 17 Nov 2011 lib/ cl_boinc.h parse.h + +David 18 Nov 2011 + - storage simulator work + + lib/ + unix_util.cpp + ssim/ + des.h + ssim.cpp diff --git a/doc/index.php b/doc/index.php index cf37a87f71..a8711daa98 100644 --- a/doc/index.php +++ b/doc/index.php @@ -75,6 +75,8 @@ function show_news_items() { function show_participate() { echo " + +
".tra("Volunteer")."
@@ -247,6 +249,7 @@ echo " + BOINC diff --git a/lib/unix_util.cpp b/lib/unix_util.cpp index 1d9176a6a5..3f657cc3b3 100644 --- a/lib/unix_util.cpp +++ b/lib/unix_util.cpp @@ -53,19 +53,19 @@ int setenv(const char *name, const char *value, int overwrite) { if (strlen(buf)<(strlen(name)+strlen(value)+1)) { // no. See if we originally allocated this string. std::vector::iterator i=envstrings.begin(); - for (;i!=envstrings.end();i++) { + for (;i!=envstrings.end();i++) { if (*i == buf) break; - } - if (i!=envstrings.end()) { + } + if (i!=envstrings.end()) { // we allocated this string. Reallocate it. buf=(char *)realloc(buf,strlen(name)+strlen(value)+2); *i=buf; - } else { + } else { // someone else allocated the string. Allocate new memory. buf=(char *)malloc(strlen(name)+strlen(value)+2); if (buf) envstrings.push_back(buf); } - } + } } if (!buf) { errno=ENOMEM; diff --git a/ssim/des.h b/ssim/des.h index adbb7c73bc..1ce14ed9d3 100644 --- a/ssim/des.h +++ b/ssim/des.h @@ -14,6 +14,7 @@ bool compare(EVENT* e1, EVENT* e2) { struct SIMULATOR { vector events; + double now; void insert(EVENT* e) { events.push_back(e); push_heap(events.begin(), events.end(), compare); @@ -23,7 +24,8 @@ struct SIMULATOR { EVENT* e = events.front(); pop_heap(events.begin(), events.end(), compare); events.pop_back(); - if (e->t > dur) break; + now = e->t; + if (now > dur) break; e->handle(); } } diff --git a/ssim/ssim.cpp b/ssim/ssim.cpp index 7f6a4daaf1..adf4a1fb9b 100644 --- a/ssim/ssim.cpp +++ b/ssim/ssim.cpp @@ -1,4 +1,82 @@ +// This file is part of BOINC. +// http://boinc.berkeley.edu +// Copyright (C) 2011 University of California +// +// BOINC is free software; you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License +// as published by the Free Software Foundation, +// either version 3 of the License, or (at your option) any later version. +// +// BOINC is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with BOINC. If not, see . + // ssim - simulator for distributed storage +// +// Simulates the storage of files on a dynamic set of hosts. +// +// The model of the host population is: +// - hosts arrival is a Poisson process +// - host lifetime is exponentially distributed +// - the time needed to upload or download n bytes of data +// to/from a host is +// MTD*U() + n/XFER_BYTES_SEC +// where C1 and C2 are parameters and U() is a uniform random var + +#define HOSTS_PER_DAY 10. +#define HOST_LIFE_MEAN 100.*86400 +#define MAX_TRANSFER_DELAY 86400 +#define UPLOAD_BYTES_SEC 1e6 +#define DOWNLOAD_BYTES_SEC 5e6 + +// We simulate policies based on coding and replication. +// +// Coding means that data is divided into M = N+K units, +// of which any N are sufficient to reconstruct the data. +// +// The units in an encoding can themselves be encoded. +// In general we model C levels of encoding. +// +// The bottom-level data units ("chunks") are stored on hosts, +// with R-fold replication + +#define ENCODING_N 10 +#define ENCODING_K 5 +#define ENCODING_M 15 +#define ENCODING_LEVELS 1 +#define REPLICATION_LEVEL 2 + +// When we need to reconstruct an encoded unit on the server, +// we try to upload N_UPLOAD subunits, +// where N <= N_UPLOAD <= M + +#define N_UPLOAD 12 + +// Terminology: +// +// A chunk may or may not be "present_on_server". +// An encoded data unit is "present_on_server" if at least N +// of its subunits are present_on_server (recursive definition). + +// A chunk is "recoverable" if it is present on at least 1 host. +// An encoded data unit is "recoverable" if at least N +// of its subunits are recoverable. + +// A chunk is "uploading" if at least one of its instances +// is being uploaded to the server. +// An encoded data unit is "uploading" if at least +// 1 of its subunits is uploading, +// and at least N of its subunits are either present_on_server or uploading + +// The scheduling policy can be briefly described as: +// 1) distribute chunks to hosts when possible, up to the replication level +// Put at most 1 chunk of a file on a given host. +// 2) if a data unit becomes unrecoverable, +// upload its parent unit, reconstruct the data, then do 1) #include #include @@ -15,8 +93,6 @@ using std::set; // similar, meta-packets per file #define META_N 10 -#define HOSTS_PER_DAY 10. -#define HOST_LIFE_MEAN 100.*86400 SIMULATOR sim; @@ -30,21 +106,26 @@ double ran_exp(double mean) { return (drand() + .5)*mean; } +struct CHUNK; +struct META_CHUNK; +struct DFILE; struct HOST; -set hosts; +struct CHUNK_ON_HOST : public EVENT { + HOST* host; + CHUNK* chunk; + bool present_on_host; + bool transfer_in_progress; // upload if present_on_host, else download + virtual void handle(); +}; struct HOST : public EVENT { - double upload_bytes_sec; - double download_bytes_sec; - virtual void handle() { - // the host has departed - // - set::iterator i = hosts.find(this); - hosts.erase(i); - } + set chunks; + virtual void handle(); }; +set hosts; + struct HOST_ARRIVAL : public EVENT { virtual void handle() { HOST* h = new HOST; @@ -64,71 +145,177 @@ struct REPORT_STATS : public EVENT { } }; -// a packet is associated with at most one host +// base class for chunks and meta-chunks // -struct PACKET { - DFILE* dfile; - META_PACKET* meta_packet; - enum {DOWNLOADING, PRESENT, UPLOADING} state; - HOST* host; - bool present; // present on server - virtual void handle() { - // transfer has finished - // - } - - void assign() { - set::iterator i = dfile.unused_hosts.front(); - HOST* h = *i; - dfile.unused_hosts.erase(i); - double t = now + 1/h->bw_down; - } - - // This packet has been lost. - // If it's present on server, assign it to a host. - // Otherwise reassemble the meta-packet - // - void lost() { - if (present_on_server) { - assign(); - } else { - meta_packet->reassemble(); - } - } +struct DATA_UNIT { + virtual bool recoverable(){}; + // can be reconstructed w/o reconstructing parent, + // assuming that current downloads succeed + virtual void start_upload(){}; + virtual void assign(){}; + bool present_on_server; + bool is_uploading; }; -struct META_PACKET { - vector packets; - int npackets_present; +struct CHUNK : DATA_UNIT { + set hosts; + META_CHUNK* parent; + double size; - // we need to reassemble this meta-packet on the server - // - void reassemble() { + CHUNK(META_CHUNK* mc, double s) { + parent = mc; + size = s; } - void reassembly_complete() { + virtual void assign(); + void host_failed(CHUNK_ON_HOST* p); + void upload_complete(); +}; + +struct META_CHUNK : DATA_UNIT { + vector children; + META_CHUNK* parent; + int n_children_present; + DFILE* dfile; + bool uploading; + + META_CHUNK(DFILE* d, META_CHUNK* par, double size, int encoding_level) { + dfile = d; + parent = par; + if (encoding_level) { + for (int j=0; jrecoverable()) { + n++; + } + } + } + + // a child has become unrecoverable. + // reconstruct this data unit if we still can. + // + void child_unrecoverable() { + if (n_recoverable_children() >= ENCODING_N) { + for (int i=0; irecoverable()) { + c->start_upload(); + } + } + } + } + + virtual void assign() { + for (unsigned int i=0; iassign(); + } + } + + void child_upload_complete() { + } + + void upload_complete() { } }; struct DFILE : EVENT { - vector meta_packets; + META_CHUNK* meta_chunk; + double size; set unused_hosts; // hosts that don't have any packets of this file - int nmeta_packets_present; + + // the creation of a file + // virtual void handle() { - for (int i=0; inpackets_present = K; - meta_packets.push_back(mp); - for (int j=0; jpresent = true; - mp->packets.push_back(p); - } - } + meta_chunk = new META_CHUNK(this, NULL, size, ENCODING_LEVELS); + meta_chunk->assign(); } }; +//////////////////// method defs //////////////////// + +// transfer has finished +// +void CHUNK_ON_HOST::handle() { + transfer_in_progress = false; + if (present_on_host) { + // it was an upload + chunk->upload_complete(); // create new replicas if needed + } else { + present_on_host = true; + } +} + +// the host has departed +// +void HOST:: handle() { + set::iterator i = hosts.find(this); + hosts.erase(i); + + set::iterator p; + for (p = chunks.begin(); p != chunks.end(); p++) { + CHUNK_ON_HOST* c = *p; + c->chunk->host_failed(c); + delete c; + } +} + +void CHUNK::host_failed(CHUNK_ON_HOST* p) { + set::iterator i = hosts.find(p); + hosts.erase(i); + if (present_on_server) { + // if data is on server, make a new replica + // + assign(); + } else if (!hosts.empty()) { + // if there's another replica, start upload of 1st instance + // NOTE: all instances are inherently present_on_host + // + CHUNK_ON_HOST *c = *(hosts.begin()); + c->transfer_in_progress = true; + c->t = sim.now + size/UPLOAD_BYTES_SEC; + sim.insert(c); + } else { + parent->child_unrecoverable(); + } +} + +void CHUNK::upload_complete() { + assign(); + if (parent->uploading) { + parent->child_upload_complete(); + } +} + +void CHUNK::assign() { + while (hosts.size() < REPLICATION_LEVEL) { + set::iterator i = parent->dfile->unused_hosts.begin(); + HOST* h = *i; + parent->dfile->unused_hosts.erase(i); + CHUNK_ON_HOST *c = new CHUNK_ON_HOST(); + c->host = h; + c->chunk = this; + c->t = sim.now + size/DOWNLOAD_BYTES_SEC; + sim.insert(c); + } +} + set dfiles; int main() {