2012-01-20 20:21:29 +00:00
|
|
|
// This file is part of BOINC.
|
|
|
|
// http://boinc.berkeley.edu
|
|
|
|
// Copyright (C) 2012 University of California
|
|
|
|
//
|
|
|
|
// BOINC is free software; you can redistribute it and/or modify it
|
|
|
|
// under the terms of the GNU Lesser General Public License
|
|
|
|
// as published by the Free Software Foundation,
|
|
|
|
// either version 3 of the License, or (at your option) any later version.
|
|
|
|
//
|
|
|
|
// BOINC is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU Lesser General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
2012-02-15 04:40:50 +00:00
|
|
|
// vdad - volunteer data archival daemon
|
|
|
|
//
|
|
|
|
// Enumerates files needing updating from the DB.
|
|
|
|
// Creates the corresponding tree of META_CHUNKs, CHUNKs,
|
|
|
|
// and VDA_CHUNK_HOSTs.
|
|
|
|
// Calls the recovery routines to initiate transfers,
|
|
|
|
// update the DB, etc.
|
|
|
|
|
2012-07-02 18:51:02 +00:00
|
|
|
#include <set>
|
2012-01-25 03:35:50 +00:00
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <sys/types.h>
|
2012-07-02 18:51:02 +00:00
|
|
|
#include <unistd.h>
|
2012-02-15 04:40:50 +00:00
|
|
|
#include <vector>
|
2012-01-25 03:35:50 +00:00
|
|
|
|
2012-01-23 05:03:52 +00:00
|
|
|
#include "boinc_db.h"
|
2012-02-21 20:55:09 +00:00
|
|
|
#include "sched_config.h"
|
2012-02-29 07:22:59 +00:00
|
|
|
#include "sched_util.h"
|
2012-01-20 20:21:29 +00:00
|
|
|
|
2012-02-15 04:40:50 +00:00
|
|
|
#include "error_numbers.h"
|
2012-01-23 08:04:41 +00:00
|
|
|
#include "util.h"
|
2012-02-15 04:40:50 +00:00
|
|
|
#include "filesys.h"
|
2012-01-23 08:04:41 +00:00
|
|
|
|
2012-01-24 18:23:18 +00:00
|
|
|
#include "vda_lib.h"
|
|
|
|
|
2012-07-02 18:51:02 +00:00
|
|
|
using std::vector;
|
|
|
|
using std::set;
|
|
|
|
|
2012-06-14 17:14:52 +00:00
|
|
|
void show_msg(char* msg) {
|
|
|
|
printf("%s", msg);
|
|
|
|
}
|
|
|
|
|
2012-02-24 03:09:56 +00:00
|
|
|
int handle_file(VDA_FILE_AUX& vf, DB_VDA_FILE& dvf) {
|
2012-02-15 04:40:50 +00:00
|
|
|
int retval;
|
|
|
|
char buf[1024];
|
|
|
|
|
2012-07-07 19:44:48 +00:00
|
|
|
log_messages.printf(MSG_NORMAL, "processing file %s\n", vf.file_name);
|
2012-02-24 03:09:56 +00:00
|
|
|
|
2012-01-24 18:23:18 +00:00
|
|
|
// read the policy file
|
|
|
|
//
|
|
|
|
sprintf(buf, "%s/boinc_meta.txt", vf.dir);
|
2012-02-15 04:40:50 +00:00
|
|
|
retval = vf.policy.parse(buf);
|
2012-01-25 03:35:50 +00:00
|
|
|
if (retval) {
|
2012-02-24 03:09:56 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "Can't parse policy file %s\n", buf);
|
2012-01-25 03:35:50 +00:00
|
|
|
return retval;
|
|
|
|
}
|
2012-02-24 03:09:56 +00:00
|
|
|
if (vf.initialized) {
|
2012-08-01 17:57:56 +00:00
|
|
|
log_messages.printf(MSG_NORMAL, "Getting state\n");
|
2012-02-15 04:40:50 +00:00
|
|
|
retval = vf.get_state();
|
2012-02-24 03:09:56 +00:00
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL, "vf.get_state failed %d\n", retval);
|
|
|
|
return retval;
|
|
|
|
}
|
2012-02-15 04:40:50 +00:00
|
|
|
} else {
|
2012-08-01 17:57:56 +00:00
|
|
|
log_messages.printf(MSG_NORMAL, "Initializing\n");
|
2012-02-15 04:40:50 +00:00
|
|
|
retval = vf.init();
|
2012-02-24 03:09:56 +00:00
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL, "vf.init failed %d\n", retval);
|
|
|
|
return retval;
|
|
|
|
}
|
2012-07-25 21:41:32 +00:00
|
|
|
sprintf(buf, "initialized=1, chunk_size=%.0f", vf.policy.chunk_size());
|
|
|
|
dvf.update_field(buf);
|
2012-01-23 21:59:12 +00:00
|
|
|
}
|
2012-08-01 17:57:56 +00:00
|
|
|
log_messages.printf(MSG_NORMAL, "Recovery plan:\n");
|
2012-08-15 21:27:38 +00:00
|
|
|
vf.meta_chunk->recovery_plan();
|
|
|
|
|
|
|
|
// see if we're retrieving this file
|
|
|
|
//
|
|
|
|
if (vf.retrieving) {
|
|
|
|
if (vf.retrieved) return 0;
|
|
|
|
switch (vf.meta_chunk->status) {
|
|
|
|
case PRESENT:
|
|
|
|
// we have enough chunks to reconstruct it - do so
|
|
|
|
//
|
2012-08-20 23:41:27 +00:00
|
|
|
retval = vf.meta_chunk->reconstruct();
|
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"reconstruct of %s failed: %d\n", vf.file_name, retval
|
|
|
|
);
|
|
|
|
} else {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"retrieval of %s completed successfully\n", vf.file_name
|
|
|
|
);
|
|
|
|
|
|
|
|
// Decoding produces a file with unused space at the end.
|
|
|
|
// Remove this space.
|
|
|
|
//
|
|
|
|
sprintf(buf, "truncate %s/%s --reference %s/%s",
|
|
|
|
vf.dir, vf.file_name, vf.dir, vf.file_name
|
|
|
|
);
|
2012-12-12 20:31:40 +00:00
|
|
|
retval = system(buf);
|
2023-05-05 18:05:20 +00:00
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"Can't truncate %s: %d\n", vf.file_name, retval
|
|
|
|
);
|
|
|
|
}
|
2012-08-20 23:41:27 +00:00
|
|
|
dvf.retrieved = true;
|
|
|
|
dvf.update();
|
|
|
|
}
|
2012-08-15 21:27:38 +00:00
|
|
|
break;
|
|
|
|
case RECOVERABLE:
|
|
|
|
// otherwise start all possible uploads
|
|
|
|
//
|
|
|
|
vf.meta_chunk->upload_all();
|
|
|
|
break;
|
|
|
|
case UNRECOVERABLE:
|
|
|
|
// if it looks like we can't recover the file, print a msg
|
|
|
|
//
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"Can't retrieve %s: unrecoverable\n",
|
|
|
|
vf.file_name
|
|
|
|
);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return 0;
|
2012-02-24 03:09:56 +00:00
|
|
|
}
|
2012-08-15 21:27:38 +00:00
|
|
|
|
2012-08-03 16:41:00 +00:00
|
|
|
retval = vf.meta_chunk->decide_reconstruct();
|
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL, "vf.decide_reconstruct failed %d\n", retval);
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
retval = vf.meta_chunk->reconstruct_and_cleanup();
|
|
|
|
if (retval) {
|
2012-08-17 23:27:44 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"vf.reconstruct_and_cleanup failed %d\n", retval
|
|
|
|
);
|
2012-08-03 16:41:00 +00:00
|
|
|
return retval;
|
|
|
|
}
|
2012-08-01 17:57:56 +00:00
|
|
|
log_messages.printf(MSG_NORMAL, "Recovery action:\n");
|
2012-02-21 20:55:09 +00:00
|
|
|
retval = vf.meta_chunk->recovery_action(dtime());
|
2012-02-24 03:09:56 +00:00
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL, "vf.recovery_action failed %d\n", retval);
|
|
|
|
return retval;
|
|
|
|
}
|
2012-08-15 21:27:38 +00:00
|
|
|
vf.meta_chunk->compute_min_failures();
|
2012-01-24 18:23:18 +00:00
|
|
|
return 0;
|
2012-01-23 05:03:52 +00:00
|
|
|
}
|
|
|
|
|
2012-02-13 08:41:48 +00:00
|
|
|
// handle files
|
|
|
|
//
|
2012-01-23 05:03:52 +00:00
|
|
|
bool scan_files() {
|
|
|
|
DB_VDA_FILE vf;
|
|
|
|
bool found = false;
|
2012-01-25 03:35:50 +00:00
|
|
|
int retval;
|
2012-01-23 05:03:52 +00:00
|
|
|
|
2012-02-22 19:51:09 +00:00
|
|
|
while (1) {
|
|
|
|
retval = vf.enumerate("where need_update<>0");
|
|
|
|
if (retval == ERR_DB_NOT_FOUND) break;
|
|
|
|
if (retval) {
|
2012-02-24 03:09:56 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "VDA_FILE enumerate failed\n");
|
2012-02-22 19:51:09 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
2012-02-15 04:40:50 +00:00
|
|
|
VDA_FILE_AUX vfa(vf);
|
2012-01-23 05:03:52 +00:00
|
|
|
found = true;
|
2012-02-24 03:09:56 +00:00
|
|
|
retval = handle_file(vfa, vf);
|
2012-01-25 03:35:50 +00:00
|
|
|
if (retval) {
|
2012-02-24 03:09:56 +00:00
|
|
|
log_messages.printf(
|
|
|
|
MSG_CRITICAL, "handle_file() failed: %d\n", retval
|
|
|
|
);
|
2012-02-21 20:55:09 +00:00
|
|
|
exit(1);
|
2012-01-25 03:35:50 +00:00
|
|
|
} else {
|
2012-07-25 21:41:32 +00:00
|
|
|
retval = vf.update_field("need_update=0");
|
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(
|
|
|
|
MSG_CRITICAL, "update_field() failed: %d\n", retval
|
|
|
|
);
|
|
|
|
exit(1);
|
|
|
|
}
|
2012-01-25 03:35:50 +00:00
|
|
|
}
|
2012-01-23 05:03:52 +00:00
|
|
|
}
|
|
|
|
return found;
|
|
|
|
}
|
|
|
|
|
2012-02-22 19:51:09 +00:00
|
|
|
// this host is declared dead; deal with the loss of data
|
|
|
|
//
|
2012-08-24 19:06:41 +00:00
|
|
|
int handle_dead_host(DB_HOST& h) {
|
2012-02-22 19:51:09 +00:00
|
|
|
DB_VDA_CHUNK_HOST ch;
|
|
|
|
char buf[256];
|
|
|
|
int retval;
|
|
|
|
|
2015-07-23 17:11:08 +00:00
|
|
|
log_messages.printf(MSG_NORMAL, "processing dead host %lu\n", h.id);
|
2012-02-24 03:09:56 +00:00
|
|
|
|
2015-07-23 17:11:08 +00:00
|
|
|
sprintf(buf, "where host_id=%lu", h.id);
|
2012-02-22 19:51:09 +00:00
|
|
|
while (1) {
|
|
|
|
retval = ch.enumerate(buf);
|
|
|
|
if (retval == ERR_DB_NOT_FOUND) break;
|
|
|
|
if (retval) return retval;
|
2012-08-28 06:33:53 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2015-07-23 17:11:08 +00:00
|
|
|
" updating file %lu\n", ch.vda_file_id
|
2012-08-28 06:33:53 +00:00
|
|
|
);
|
2012-02-22 19:51:09 +00:00
|
|
|
DB_VDA_FILE vf;
|
|
|
|
retval = vf.lookup_id(ch.vda_file_id);
|
2012-02-24 03:09:56 +00:00
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2015-07-23 17:11:08 +00:00
|
|
|
" file lookup failed %lu\n", ch.vda_file_id
|
2012-02-24 03:09:56 +00:00
|
|
|
);
|
|
|
|
return retval;
|
|
|
|
}
|
2012-02-22 19:51:09 +00:00
|
|
|
retval = vf.update_field("need_update=1");
|
2012-02-24 03:09:56 +00:00
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2015-07-23 17:11:08 +00:00
|
|
|
" file update failed %lu\n", ch.vda_file_id
|
2012-02-24 03:09:56 +00:00
|
|
|
);
|
|
|
|
return retval;
|
|
|
|
}
|
2012-02-22 19:51:09 +00:00
|
|
|
}
|
|
|
|
return 0;
|
2012-01-23 05:03:52 +00:00
|
|
|
}
|
|
|
|
|
2012-08-24 19:06:41 +00:00
|
|
|
// identify and process dead (i.e. timed-out) hosts
|
2012-02-13 08:41:48 +00:00
|
|
|
//
|
2012-02-22 19:51:09 +00:00
|
|
|
bool scan_hosts() {
|
|
|
|
DB_HOST h;
|
2012-01-23 08:04:41 +00:00
|
|
|
char buf[256];
|
2012-02-22 19:51:09 +00:00
|
|
|
int retval;
|
2012-01-23 08:04:41 +00:00
|
|
|
bool found = false;
|
2012-01-20 20:21:29 +00:00
|
|
|
|
2012-08-24 19:06:41 +00:00
|
|
|
sprintf(buf,
|
|
|
|
"where cpu_efficiency=0 and rpc_time < %f",
|
|
|
|
dtime() - config.vda_host_timeout
|
|
|
|
);
|
2012-02-22 19:51:09 +00:00
|
|
|
while (1) {
|
|
|
|
retval = h.enumerate(buf);
|
|
|
|
if (retval == ERR_DB_NOT_FOUND) break;
|
|
|
|
if (retval) {
|
2012-02-24 03:09:56 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "host.enumerate() failed\n");
|
2012-02-22 19:51:09 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
2012-01-23 08:04:41 +00:00
|
|
|
found = true;
|
2012-08-24 19:06:41 +00:00
|
|
|
retval = handle_dead_host(h);
|
2012-02-22 19:51:09 +00:00
|
|
|
if (retval) {
|
2012-02-24 03:09:56 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "handle_host() failed: %d\n", retval);
|
2012-02-22 19:51:09 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
2012-08-24 19:06:41 +00:00
|
|
|
retval = h.update_field("cpu_efficiency=1");
|
2012-02-24 03:09:56 +00:00
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL, "h.update_field() failed: %d\n", retval);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2012-01-23 05:03:52 +00:00
|
|
|
}
|
2012-01-23 08:04:41 +00:00
|
|
|
return found;
|
2012-01-23 05:03:52 +00:00
|
|
|
}
|
2012-01-20 20:21:29 +00:00
|
|
|
|
2012-01-23 05:03:52 +00:00
|
|
|
int main(int argc, char** argv) {
|
2012-08-07 22:49:40 +00:00
|
|
|
setbuf(stdout, 0);
|
2012-02-24 03:09:56 +00:00
|
|
|
for (int i=1; i<argc; i++) {
|
|
|
|
if (!strcmp(argv[i], "-d") || !strcmp(argv[i], "--debug_level")) {
|
|
|
|
int dl = atoi(argv[++i]);
|
|
|
|
log_messages.set_debug_level(dl);
|
|
|
|
if (dl == 4) g_print_queries = true;
|
|
|
|
}
|
|
|
|
}
|
2012-02-21 20:55:09 +00:00
|
|
|
int retval = config.parse_file();
|
|
|
|
if (retval) {
|
2012-02-24 03:09:56 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "can't parse config file\n");
|
2012-02-21 20:55:09 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
2012-08-24 19:06:41 +00:00
|
|
|
if (!config.enable_vda) {
|
|
|
|
log_messages.printf(MSG_CRITICAL, "VDA not enabled\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
if (config.vda_host_timeout == 0) {
|
|
|
|
log_messages.printf(MSG_CRITICAL, "Must specify VDA host timeout\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
2012-02-21 20:55:09 +00:00
|
|
|
retval = boinc_db.open(
|
|
|
|
config.db_name, config.db_host, config.db_user, config.db_passwd
|
|
|
|
);
|
|
|
|
if (retval) {
|
2012-02-24 03:09:56 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "can't open DB\n");
|
2012-02-21 20:55:09 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
#if 0
|
2012-02-18 23:19:06 +00:00
|
|
|
VDA_FILE_AUX vf;
|
|
|
|
memset(&vf, 0, sizeof(vf));
|
2012-01-25 03:35:50 +00:00
|
|
|
strcpy(vf.dir, "/mydisks/b/users/boincadm/vda_test");
|
|
|
|
strcpy(vf.name, "file.ext");
|
2012-02-15 04:40:50 +00:00
|
|
|
handle_file(vf);
|
2012-01-25 03:35:50 +00:00
|
|
|
exit(0);
|
2012-02-15 04:40:50 +00:00
|
|
|
#endif
|
2012-01-23 05:03:52 +00:00
|
|
|
while(1) {
|
2012-08-24 19:06:41 +00:00
|
|
|
bool action = scan_hosts();
|
|
|
|
action |= scan_files();
|
2012-01-23 05:03:52 +00:00
|
|
|
if (!action) boinc_sleep(5.);
|
|
|
|
}
|
|
|
|
}
|