diff --git a/checkin_notes b/checkin_notes
index cd1a8dfc56..5133ef430f 100644
--- a/checkin_notes
+++ b/checkin_notes
@@ -8531,3 +8531,12 @@ Charlie 17 Nov 2011
lib/
cl_boinc.h
parse.h
+
+David 18 Nov 2011
+ - storage simulator work
+
+ lib/
+ unix_util.cpp
+ ssim/
+ des.h
+ ssim.cpp
diff --git a/doc/index.php b/doc/index.php
index cf37a87f71..a8711daa98 100644
--- a/doc/index.php
+++ b/doc/index.php
@@ -75,6 +75,8 @@ function show_news_items() {
function show_participate() {
echo "
+
+
".tra("Volunteer")."
@@ -247,6 +249,7 @@ echo "
+
BOINC
diff --git a/lib/unix_util.cpp b/lib/unix_util.cpp
index 1d9176a6a5..3f657cc3b3 100644
--- a/lib/unix_util.cpp
+++ b/lib/unix_util.cpp
@@ -53,19 +53,19 @@ int setenv(const char *name, const char *value, int overwrite) {
if (strlen(buf)<(strlen(name)+strlen(value)+1)) {
// no. See if we originally allocated this string.
std::vector::iterator i=envstrings.begin();
- for (;i!=envstrings.end();i++) {
+ for (;i!=envstrings.end();i++) {
if (*i == buf) break;
- }
- if (i!=envstrings.end()) {
+ }
+ if (i!=envstrings.end()) {
// we allocated this string. Reallocate it.
buf=(char *)realloc(buf,strlen(name)+strlen(value)+2);
*i=buf;
- } else {
+ } else {
// someone else allocated the string. Allocate new memory.
buf=(char *)malloc(strlen(name)+strlen(value)+2);
if (buf) envstrings.push_back(buf);
}
- }
+ }
}
if (!buf) {
errno=ENOMEM;
diff --git a/ssim/des.h b/ssim/des.h
index adbb7c73bc..1ce14ed9d3 100644
--- a/ssim/des.h
+++ b/ssim/des.h
@@ -14,6 +14,7 @@ bool compare(EVENT* e1, EVENT* e2) {
struct SIMULATOR {
vector events;
+ double now;
void insert(EVENT* e) {
events.push_back(e);
push_heap(events.begin(), events.end(), compare);
@@ -23,7 +24,8 @@ struct SIMULATOR {
EVENT* e = events.front();
pop_heap(events.begin(), events.end(), compare);
events.pop_back();
- if (e->t > dur) break;
+ now = e->t;
+ if (now > dur) break;
e->handle();
}
}
diff --git a/ssim/ssim.cpp b/ssim/ssim.cpp
index 7f6a4daaf1..adf4a1fb9b 100644
--- a/ssim/ssim.cpp
+++ b/ssim/ssim.cpp
@@ -1,4 +1,82 @@
+// This file is part of BOINC.
+// http://boinc.berkeley.edu
+// Copyright (C) 2011 University of California
+//
+// BOINC is free software; you can redistribute it and/or modify it
+// under the terms of the GNU Lesser General Public License
+// as published by the Free Software Foundation,
+// either version 3 of the License, or (at your option) any later version.
+//
+// BOINC is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+// See the GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with BOINC. If not, see .
+
// ssim - simulator for distributed storage
+//
+// Simulates the storage of files on a dynamic set of hosts.
+//
+// The model of the host population is:
+// - hosts arrival is a Poisson process
+// - host lifetime is exponentially distributed
+// - the time needed to upload or download n bytes of data
+// to/from a host is
+// MTD*U() + n/XFER_BYTES_SEC
+// where C1 and C2 are parameters and U() is a uniform random var
+
+#define HOSTS_PER_DAY 10.
+#define HOST_LIFE_MEAN 100.*86400
+#define MAX_TRANSFER_DELAY 86400
+#define UPLOAD_BYTES_SEC 1e6
+#define DOWNLOAD_BYTES_SEC 5e6
+
+// We simulate policies based on coding and replication.
+//
+// Coding means that data is divided into M = N+K units,
+// of which any N are sufficient to reconstruct the data.
+//
+// The units in an encoding can themselves be encoded.
+// In general we model C levels of encoding.
+//
+// The bottom-level data units ("chunks") are stored on hosts,
+// with R-fold replication
+
+#define ENCODING_N 10
+#define ENCODING_K 5
+#define ENCODING_M 15
+#define ENCODING_LEVELS 1
+#define REPLICATION_LEVEL 2
+
+// When we need to reconstruct an encoded unit on the server,
+// we try to upload N_UPLOAD subunits,
+// where N <= N_UPLOAD <= M
+
+#define N_UPLOAD 12
+
+// Terminology:
+//
+// A chunk may or may not be "present_on_server".
+// An encoded data unit is "present_on_server" if at least N
+// of its subunits are present_on_server (recursive definition).
+
+// A chunk is "recoverable" if it is present on at least 1 host.
+// An encoded data unit is "recoverable" if at least N
+// of its subunits are recoverable.
+
+// A chunk is "uploading" if at least one of its instances
+// is being uploaded to the server.
+// An encoded data unit is "uploading" if at least
+// 1 of its subunits is uploading,
+// and at least N of its subunits are either present_on_server or uploading
+
+// The scheduling policy can be briefly described as:
+// 1) distribute chunks to hosts when possible, up to the replication level
+// Put at most 1 chunk of a file on a given host.
+// 2) if a data unit becomes unrecoverable,
+// upload its parent unit, reconstruct the data, then do 1)
#include
#include
@@ -15,8 +93,6 @@ using std::set;
// similar, meta-packets per file
#define META_N 10
-#define HOSTS_PER_DAY 10.
-#define HOST_LIFE_MEAN 100.*86400
SIMULATOR sim;
@@ -30,21 +106,26 @@ double ran_exp(double mean) {
return (drand() + .5)*mean;
}
+struct CHUNK;
+struct META_CHUNK;
+struct DFILE;
struct HOST;
-set hosts;
+struct CHUNK_ON_HOST : public EVENT {
+ HOST* host;
+ CHUNK* chunk;
+ bool present_on_host;
+ bool transfer_in_progress; // upload if present_on_host, else download
+ virtual void handle();
+};
struct HOST : public EVENT {
- double upload_bytes_sec;
- double download_bytes_sec;
- virtual void handle() {
- // the host has departed
- //
- set::iterator i = hosts.find(this);
- hosts.erase(i);
- }
+ set chunks;
+ virtual void handle();
};
+set hosts;
+
struct HOST_ARRIVAL : public EVENT {
virtual void handle() {
HOST* h = new HOST;
@@ -64,71 +145,177 @@ struct REPORT_STATS : public EVENT {
}
};
-// a packet is associated with at most one host
+// base class for chunks and meta-chunks
//
-struct PACKET {
- DFILE* dfile;
- META_PACKET* meta_packet;
- enum {DOWNLOADING, PRESENT, UPLOADING} state;
- HOST* host;
- bool present; // present on server
- virtual void handle() {
- // transfer has finished
- //
- }
-
- void assign() {
- set::iterator i = dfile.unused_hosts.front();
- HOST* h = *i;
- dfile.unused_hosts.erase(i);
- double t = now + 1/h->bw_down;
- }
-
- // This packet has been lost.
- // If it's present on server, assign it to a host.
- // Otherwise reassemble the meta-packet
- //
- void lost() {
- if (present_on_server) {
- assign();
- } else {
- meta_packet->reassemble();
- }
- }
+struct DATA_UNIT {
+ virtual bool recoverable(){};
+ // can be reconstructed w/o reconstructing parent,
+ // assuming that current downloads succeed
+ virtual void start_upload(){};
+ virtual void assign(){};
+ bool present_on_server;
+ bool is_uploading;
};
-struct META_PACKET {
- vector packets;
- int npackets_present;
+struct CHUNK : DATA_UNIT {
+ set hosts;
+ META_CHUNK* parent;
+ double size;
- // we need to reassemble this meta-packet on the server
- //
- void reassemble() {
+ CHUNK(META_CHUNK* mc, double s) {
+ parent = mc;
+ size = s;
}
- void reassembly_complete() {
+ virtual void assign();
+ void host_failed(CHUNK_ON_HOST* p);
+ void upload_complete();
+};
+
+struct META_CHUNK : DATA_UNIT {
+ vector children;
+ META_CHUNK* parent;
+ int n_children_present;
+ DFILE* dfile;
+ bool uploading;
+
+ META_CHUNK(DFILE* d, META_CHUNK* par, double size, int encoding_level) {
+ dfile = d;
+ parent = par;
+ if (encoding_level) {
+ for (int j=0; jrecoverable()) {
+ n++;
+ }
+ }
+ }
+
+ // a child has become unrecoverable.
+ // reconstruct this data unit if we still can.
+ //
+ void child_unrecoverable() {
+ if (n_recoverable_children() >= ENCODING_N) {
+ for (int i=0; irecoverable()) {
+ c->start_upload();
+ }
+ }
+ }
+ }
+
+ virtual void assign() {
+ for (unsigned int i=0; iassign();
+ }
+ }
+
+ void child_upload_complete() {
+ }
+
+ void upload_complete() {
}
};
struct DFILE : EVENT {
- vector meta_packets;
+ META_CHUNK* meta_chunk;
+ double size;
set unused_hosts;
// hosts that don't have any packets of this file
- int nmeta_packets_present;
+
+ // the creation of a file
+ //
virtual void handle() {
- for (int i=0; inpackets_present = K;
- meta_packets.push_back(mp);
- for (int j=0; jpresent = true;
- mp->packets.push_back(p);
- }
- }
+ meta_chunk = new META_CHUNK(this, NULL, size, ENCODING_LEVELS);
+ meta_chunk->assign();
}
};
+//////////////////// method defs ////////////////////
+
+// transfer has finished
+//
+void CHUNK_ON_HOST::handle() {
+ transfer_in_progress = false;
+ if (present_on_host) {
+ // it was an upload
+ chunk->upload_complete(); // create new replicas if needed
+ } else {
+ present_on_host = true;
+ }
+}
+
+// the host has departed
+//
+void HOST:: handle() {
+ set::iterator i = hosts.find(this);
+ hosts.erase(i);
+
+ set::iterator p;
+ for (p = chunks.begin(); p != chunks.end(); p++) {
+ CHUNK_ON_HOST* c = *p;
+ c->chunk->host_failed(c);
+ delete c;
+ }
+}
+
+void CHUNK::host_failed(CHUNK_ON_HOST* p) {
+ set::iterator i = hosts.find(p);
+ hosts.erase(i);
+ if (present_on_server) {
+ // if data is on server, make a new replica
+ //
+ assign();
+ } else if (!hosts.empty()) {
+ // if there's another replica, start upload of 1st instance
+ // NOTE: all instances are inherently present_on_host
+ //
+ CHUNK_ON_HOST *c = *(hosts.begin());
+ c->transfer_in_progress = true;
+ c->t = sim.now + size/UPLOAD_BYTES_SEC;
+ sim.insert(c);
+ } else {
+ parent->child_unrecoverable();
+ }
+}
+
+void CHUNK::upload_complete() {
+ assign();
+ if (parent->uploading) {
+ parent->child_upload_complete();
+ }
+}
+
+void CHUNK::assign() {
+ while (hosts.size() < REPLICATION_LEVEL) {
+ set::iterator i = parent->dfile->unused_hosts.begin();
+ HOST* h = *i;
+ parent->dfile->unused_hosts.erase(i);
+ CHUNK_ON_HOST *c = new CHUNK_ON_HOST();
+ c->host = h;
+ c->chunk = this;
+ c->t = sim.now + size/DOWNLOAD_BYTES_SEC;
+ sim.insert(c);
+ }
+}
+
set dfiles;
int main() {