mirror of https://github.com/BOINC/boinc.git
- file upload handler: add an #ifdef for disabling locking of files
while writing to them. It's not clear to me that this locking is beneficial, and it may be causing filesystem problems at WCG - volunteer storage stuff svn path=/trunk/boinc/; revision=26021
This commit is contained in:
parent
5cc5c8b5b8
commit
0d42a4aa5c
|
@ -5549,3 +5549,23 @@ Charlie 14 Aug 2012
|
|||
PostInstall.cpp
|
||||
release_boinc.sh
|
||||
Uninstall.cpp
|
||||
|
||||
David 15 Aug 2012
|
||||
- file upload handler: add an #ifdef for disabling locking of files
|
||||
while writing to them.
|
||||
It's not clear to me that this locking is beneficial,
|
||||
and it may be causing filesystem problems at WCG
|
||||
- volunteer storage stuff
|
||||
|
||||
vda/
|
||||
vda_lib.cpp,h
|
||||
vda.cpp
|
||||
ssim.cpp
|
||||
vdad.cpp
|
||||
vda_lib2.cpp
|
||||
db/
|
||||
boinc_db_types.h
|
||||
boinc_db.cpp
|
||||
schema_vda.sql
|
||||
sched/
|
||||
file_upload_handler.cpp
|
||||
|
|
|
@ -2350,7 +2350,8 @@ void DB_VDA_FILE::db_print(char* buf){
|
|||
"chunk_size=%f, "
|
||||
"need_update=%d, "
|
||||
"initialized=%d, "
|
||||
"retrieving=%d",
|
||||
"retrieving=%d"
|
||||
"retrieved=%d",
|
||||
create_time,
|
||||
dir,
|
||||
file_name,
|
||||
|
@ -2358,7 +2359,8 @@ void DB_VDA_FILE::db_print(char* buf){
|
|||
chunk_size,
|
||||
need_update?1:0,
|
||||
initialized?1:0,
|
||||
retrieving?1:0
|
||||
retrieving?1:0,
|
||||
retrieved?1:0
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -2374,6 +2376,7 @@ void DB_VDA_FILE::db_parse(MYSQL_ROW &r) {
|
|||
need_update = (atoi(r[i++]) != 0);
|
||||
initialized = (atoi(r[i++]) != 0);
|
||||
retrieving = (atoi(r[i++]) != 0);
|
||||
retrieved = (atoi(r[i++]) != 0);
|
||||
}
|
||||
|
||||
void DB_VDA_CHUNK_HOST::db_print(char* buf) {
|
||||
|
|
|
@ -685,6 +685,7 @@ struct VDA_FILE {
|
|||
bool need_update;
|
||||
bool initialized;
|
||||
bool retrieving;
|
||||
bool retrieved;
|
||||
void clear();
|
||||
};
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@ create table vda_file (
|
|||
need_update tinyint not null default 0,
|
||||
initialized tinyint not null default 0,
|
||||
retrieving tinyint not null default 0,
|
||||
retrieved tinyint not null default 0,
|
||||
primary key(id)
|
||||
) engine = InnoDB;
|
||||
|
||||
|
|
|
@ -50,6 +50,10 @@
|
|||
|
||||
#include "sched_msgs.h"
|
||||
|
||||
#define LOCK_FILES
|
||||
// comment this out to not lock files
|
||||
// this may avoid filesystem hangs
|
||||
|
||||
#define ERR_TRANSIENT true
|
||||
#define ERR_PERMANENT false
|
||||
|
||||
|
@ -127,6 +131,7 @@ int copy_socket_to_file(FILE* in, char* path, double offset, double nbytes) {
|
|||
);
|
||||
}
|
||||
|
||||
#ifdef LOCK_FILES
|
||||
// Put an advisory lock on the file.
|
||||
// This will prevent OTHER instances of file_upload_handler
|
||||
// from being able to write to the file.
|
||||
|
@ -142,6 +147,7 @@ int copy_socket_to_file(FILE* in, char* path, double offset, double nbytes) {
|
|||
close(fd);
|
||||
return return_error(ERR_TRANSIENT, "can't lock file %s\n", path);
|
||||
}
|
||||
#endif
|
||||
|
||||
// check that file length corresponds to offset
|
||||
// TODO: use a 64-bit variant
|
||||
|
|
|
@ -238,6 +238,7 @@ struct SIM_FILE : VDA_FILE_AUX, EVENT {
|
|||
meta_chunk->reconstruct_and_cleanup();
|
||||
printf("recovery_action():\n");
|
||||
meta_chunk->recovery_action(sim.now);
|
||||
meta_chunk->compute_min_failures();
|
||||
fault_tolerance.sample(
|
||||
meta_chunk->min_failures-1, collecting_stats(), sim.now
|
||||
);
|
||||
|
@ -467,6 +468,14 @@ void CHUNK::download_complete() {
|
|||
sfp->recover();
|
||||
}
|
||||
|
||||
int CHUNK::upload_all() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int META_CHUNK::upload_all() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int META_CHUNK::encode(bool) {
|
||||
printf("%s: encoding metachunk %s\n", now_str(), name);
|
||||
return 0;
|
||||
|
|
|
@ -179,7 +179,7 @@ int handle_retrieve(const char* name) {
|
|||
sprintf(buf, "where file_name='%s'", name);
|
||||
int retval = vf.lookup(buf);
|
||||
if (retval) return retval;
|
||||
retval = vf.update_field("retrieving=1");
|
||||
retval = vf.update_field("retrieving=1, need_update=1");
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
@ -203,7 +203,10 @@ int handle_status(const char* name) {
|
|||
return retval;
|
||||
}
|
||||
printf("status for file %s:", vf.file_name);
|
||||
vf.meta_chunk->recovery_plan();
|
||||
vf.meta_chunk->compute_min_failures();
|
||||
vf.meta_chunk->print_status(0);
|
||||
printf("fault tolerance level: %d\n", vf.meta_chunk->min_failures-1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -128,7 +128,7 @@ META_CHUNK::META_CHUNK(
|
|||
// and whether to delete chunk data currently on server.
|
||||
// Also compute min_failures
|
||||
//
|
||||
int META_CHUNK::recovery_plan() {
|
||||
void META_CHUNK::recovery_plan() {
|
||||
vector<DATA_UNIT*> recoverable;
|
||||
vector<DATA_UNIT*> present;
|
||||
|
||||
|
@ -146,8 +146,7 @@ int META_CHUNK::recovery_plan() {
|
|||
c->in_recovery_set = false;
|
||||
c->data_needed = false;
|
||||
c->data_now_present = false;
|
||||
int retval = c->recovery_plan();
|
||||
if (retval) return retval;
|
||||
c->recovery_plan();
|
||||
switch (c->status) {
|
||||
case PRESENT:
|
||||
present.push_back(c);
|
||||
|
@ -191,7 +190,6 @@ int META_CHUNK::recovery_plan() {
|
|||
} else {
|
||||
status = UNRECOVERABLE;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int META_CHUNK::recovery_action(double now) {
|
||||
|
@ -230,10 +228,19 @@ int META_CHUNK::recovery_action(double now) {
|
|||
retval = c->recovery_action(now);
|
||||
if (retval) return retval;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Compute min_failures: the smallest # of host failures
|
||||
// that would make this unit unrecoverable.
|
||||
//
|
||||
int META_CHUNK::compute_min_failures() {
|
||||
unsigned int i;
|
||||
for (i=0; i<children.size(); i++) {
|
||||
DATA_UNIT* c = children[i];
|
||||
c->compute_min_failures();
|
||||
}
|
||||
|
||||
// Compute min_failures: the smallest # of host failures
|
||||
// that would make this unit unrecoverable.
|
||||
//
|
||||
// Because of recovery action,
|
||||
// some of our children may have changed status and fault tolerance,
|
||||
// so ours may have changed too.
|
||||
|
@ -440,12 +447,11 @@ int META_CHUNK::expand() {
|
|||
|
||||
///////////////// CHUNK ///////////////////////
|
||||
|
||||
int CHUNK::recovery_plan() {
|
||||
void CHUNK::recovery_plan() {
|
||||
keep_present = false;
|
||||
if (present_on_server) {
|
||||
status = PRESENT;
|
||||
cost = 0;
|
||||
min_failures = INT_MAX;
|
||||
} else if (hosts.size() > 0) {
|
||||
// if file is not present on server, assume that it's present
|
||||
// on all hosts (otherwise we wouldn't have downloaded it).
|
||||
|
@ -455,14 +461,28 @@ int CHUNK::recovery_plan() {
|
|||
if ((int)(hosts.size()) < parent->dfile->policy.replication) {
|
||||
data_needed = true;
|
||||
}
|
||||
min_failures = hosts.size();
|
||||
} else {
|
||||
status = UNRECOVERABLE;
|
||||
min_failures = 0;
|
||||
}
|
||||
#ifdef DEBUG_RECOVERY
|
||||
printf(" chunk %s: status %s\n", name, status_str(status));
|
||||
#endif
|
||||
}
|
||||
|
||||
int CHUNK::compute_min_failures() {
|
||||
if (present_on_server) {
|
||||
min_failures = INT_MAX;
|
||||
return 0;
|
||||
}
|
||||
int nreplicas = 0;
|
||||
set<VDA_CHUNK_HOST*>::iterator i;
|
||||
for (i=hosts.begin(); i!=hosts.end(); i++) {
|
||||
VDA_CHUNK_HOST* ch = *i;
|
||||
if (ch->present_on_host) {
|
||||
nreplicas++;
|
||||
}
|
||||
}
|
||||
min_failures = nreplicas;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -85,8 +85,10 @@ struct VDA_FILE_AUX : VDA_FILE {
|
|||
// base class for chunks and meta-chunks
|
||||
//
|
||||
struct DATA_UNIT {
|
||||
virtual int recovery_plan(){return 0;};
|
||||
virtual void recovery_plan(){};
|
||||
virtual int recovery_action(double){return 0;};
|
||||
virtual int compute_min_failures(){return 0;};
|
||||
virtual int upload_all(){return 0;};
|
||||
|
||||
char name[64];
|
||||
char dir[1024];
|
||||
|
@ -139,8 +141,10 @@ struct META_CHUNK : DATA_UNIT {
|
|||
int init(const char* dir, POLICY&, int level);
|
||||
int get_state(const char* dir, POLICY&, int level);
|
||||
|
||||
virtual int recovery_plan();
|
||||
virtual void recovery_plan();
|
||||
virtual int recovery_action(double);
|
||||
virtual int compute_min_failures();
|
||||
virtual int upload_all();
|
||||
|
||||
int decide_reconstruct();
|
||||
int reconstruct_and_cleanup();
|
||||
|
@ -154,6 +158,7 @@ struct META_CHUNK : DATA_UNIT {
|
|||
}
|
||||
int decode();
|
||||
int encode(bool first);
|
||||
int reconstruct();
|
||||
|
||||
// used by vda
|
||||
void print_status(int indent_level);
|
||||
|
@ -173,14 +178,19 @@ struct CHUNK : DATA_UNIT {
|
|||
void upload_complete();
|
||||
void download_complete();
|
||||
int assign();
|
||||
virtual int recovery_plan();
|
||||
virtual void recovery_plan();
|
||||
virtual int recovery_action(double);
|
||||
virtual int compute_min_failures();
|
||||
virtual int upload_all();
|
||||
bool need_more_replicas() {
|
||||
return ((int)hosts.size() < parent->dfile->policy.replication);
|
||||
}
|
||||
|
||||
// used by vda
|
||||
void print_status(int indent_level);
|
||||
|
||||
// used by vdad
|
||||
int start_upload_from_host(VDA_CHUNK_HOST&);
|
||||
};
|
||||
|
||||
// names
|
||||
|
|
|
@ -272,6 +272,45 @@ int META_CHUNK::decode() {
|
|||
return 0;
|
||||
}
|
||||
|
||||
// the meta-chunk is present, and we're retrieving the file
|
||||
//
|
||||
int META_CHUNK::reconstruct() {
|
||||
unsigned int i;
|
||||
|
||||
// reconstruct enough children that we can reconstruct ourself
|
||||
//
|
||||
if (!bottom_level) {
|
||||
int n = 0;
|
||||
for (i=0; i<children.size(); i++) {
|
||||
META_CHUNK* cp = (META_CHUNK*)children[i];
|
||||
if (cp->status == PRESENT) {
|
||||
cp->reconstruct();
|
||||
n++;
|
||||
if (n == coding.n) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
decode();
|
||||
|
||||
// then delete childrens' files
|
||||
//
|
||||
for (i=0; i<children.size(); i++) {
|
||||
children[i]->delete_file();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// We're retrieving the file.
|
||||
// Start all possible uploads.
|
||||
//
|
||||
int META_CHUNK::upload_all() {
|
||||
unsigned int i;
|
||||
for (i=0; i<children.size(); i++) {
|
||||
children[i]->upload_all();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
///////////////// CHUNK ///////////////////////
|
||||
|
||||
CHUNK::CHUNK(META_CHUNK* mc, double s, int index) {
|
||||
|
@ -324,18 +363,7 @@ int CHUNK::assign() {
|
|||
return 0;
|
||||
}
|
||||
|
||||
int CHUNK::start_upload() {
|
||||
// if no upload of this chunk is in progress, start one.
|
||||
// NOTE: all instances are inherently present_on_host,
|
||||
// since this is only called if chunk is not present on server
|
||||
//
|
||||
VDA_CHUNK_HOST* chp;
|
||||
set<VDA_CHUNK_HOST*>::iterator i;
|
||||
for (i=hosts.begin(); i!=hosts.end(); i++) {
|
||||
chp = *i;
|
||||
if (chp->transfer_in_progress) return 0;
|
||||
}
|
||||
chp = *(hosts.begin());
|
||||
int CHUNK::start_upload_from_host(VDA_CHUNK_HOST& ch) {
|
||||
DB_VDA_CHUNK_HOST dch;
|
||||
char set_clause[256], where_clause[256];
|
||||
sprintf(set_clause,
|
||||
|
@ -344,12 +372,46 @@ int CHUNK::start_upload() {
|
|||
);
|
||||
sprintf(where_clause,
|
||||
"where vda_file_id=%d and host_id=%d and name='%s'",
|
||||
chp->vda_file_id,
|
||||
chp->host_id,
|
||||
ch.vda_file_id,
|
||||
ch.host_id,
|
||||
name
|
||||
);
|
||||
int retval = dch.update_fields_noid(set_clause, where_clause);
|
||||
return retval;
|
||||
return dch.update_fields_noid(set_clause, where_clause);
|
||||
}
|
||||
|
||||
// if no upload of this chunk is in progress, start one.
|
||||
// NOTES:
|
||||
// - all instances are inherently present_on_host,
|
||||
// since this is only called if chunk is not present on server
|
||||
// - we arbitrarily pick the first host in the list.
|
||||
// Could randomize this or use other criteria.
|
||||
//
|
||||
int CHUNK::start_upload() {
|
||||
VDA_CHUNK_HOST* chp;
|
||||
set<VDA_CHUNK_HOST*>::iterator i;
|
||||
for (i=hosts.begin(); i!=hosts.end(); i++) {
|
||||
chp = *i;
|
||||
if (chp->transfer_in_progress) return 0;
|
||||
}
|
||||
chp = *(hosts.begin());
|
||||
return start_upload_from_host(*chp);
|
||||
}
|
||||
|
||||
// Start uploads of all instances.
|
||||
// Used when retrieving the file.
|
||||
//
|
||||
int CHUNK::upload_all() {
|
||||
if (present_on_server) return 0;
|
||||
|
||||
VDA_CHUNK_HOST* chp;
|
||||
set<VDA_CHUNK_HOST*>::iterator i;
|
||||
for (i=hosts.begin(); i!=hosts.end(); i++) {
|
||||
chp = *i;
|
||||
if (chp->transfer_in_progress) continue;
|
||||
int retval = start_upload_from_host(*chp);
|
||||
if (retval) return retval;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
///////////////// VDA_FILE_AUX ///////////////////////
|
||||
|
|
33
vda/vdad.cpp
33
vda/vdad.cpp
|
@ -78,11 +78,35 @@ int handle_file(VDA_FILE_AUX& vf, DB_VDA_FILE& dvf) {
|
|||
dvf.update_field(buf);
|
||||
}
|
||||
log_messages.printf(MSG_NORMAL, "Recovery plan:\n");
|
||||
retval = vf.meta_chunk->recovery_plan();
|
||||
if (retval) {
|
||||
log_messages.printf(MSG_CRITICAL, "vf.recovery_plan failed %d\n", retval);
|
||||
return retval;
|
||||
vf.meta_chunk->recovery_plan();
|
||||
|
||||
// see if we're retrieving this file
|
||||
//
|
||||
if (vf.retrieving) {
|
||||
if (vf.retrieved) return 0;
|
||||
switch (vf.meta_chunk->status) {
|
||||
case PRESENT:
|
||||
// we have enough chunks to reconstruct it - do so
|
||||
//
|
||||
vf.meta_chunk->reconstruct();
|
||||
break;
|
||||
case RECOVERABLE:
|
||||
// otherwise start all possible uploads
|
||||
//
|
||||
vf.meta_chunk->upload_all();
|
||||
break;
|
||||
case UNRECOVERABLE:
|
||||
// if it looks like we can't recover the file, print a msg
|
||||
//
|
||||
log_messages.printf(MSG_CRITICAL,
|
||||
"Can't retrieve %s: unrecoverable\n",
|
||||
vf.file_name
|
||||
);
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
retval = vf.meta_chunk->decide_reconstruct();
|
||||
if (retval) {
|
||||
log_messages.printf(MSG_CRITICAL, "vf.decide_reconstruct failed %d\n", retval);
|
||||
|
@ -99,6 +123,7 @@ int handle_file(VDA_FILE_AUX& vf, DB_VDA_FILE& dvf) {
|
|||
log_messages.printf(MSG_CRITICAL, "vf.recovery_action failed %d\n", retval);
|
||||
return retval;
|
||||
}
|
||||
vf.meta_chunk->compute_min_failures();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue