mirror of https://github.com/BOINC/boinc.git
618 lines
18 KiB
C
618 lines
18 KiB
C
// Berkeley Open Infrastructure for Network Computing
|
|
// http://boinc.berkeley.edu
|
|
// Copyright (C) 2005 University of California
|
|
//
|
|
// This is free software; you can redistribute it and/or
|
|
// modify it under the terms of the GNU Lesser General Public
|
|
// License as published by the Free Software Foundation;
|
|
// either version 2.1 of the License, or (at your option) any later version.
|
|
//
|
|
// This software is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
// See the GNU Lesser General Public License for more details.
|
|
//
|
|
// To view the GNU Lesser General Public License visit
|
|
// http://www.gnu.org/copyleft/lesser.html
|
|
// or write to the Free Software Foundation, Inc.,
|
|
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
// db_purge options
|
|
//
|
|
// purge workunit and result records that are no longer needed.
|
|
// Specifically, purges WUs for which file_delete_state=DONE;
|
|
// this occurs only when it has been assimilated
|
|
// and all results have server_state=OVER.
|
|
// Purging a WU means writing it and all its results
|
|
// to XML-format archive files, then deleting it and its results from the DB.
|
|
//
|
|
// The XML files have names of the form
|
|
// wu_archive_TIME and result_archive_TIME
|
|
// where TIME is the time it was created.
|
|
// In addition there are index files associating each WU and result ID
|
|
// with the timestamp of the file it's in.
|
|
//
|
|
// Options:
|
|
//
|
|
// -min_age_days n purge WUs with mod_time at least N days in the past
|
|
// -max n purge at most N WUs
|
|
// -one_pass purge a few (~1000) WUs, then exit
|
|
// default: keep scanning indefinitely
|
|
// -max_wu_per_file n write at most N WUs to an archive file
|
|
// The file is then closed and another file is opened.
|
|
// This can be used to get a series of small files
|
|
// instead of one huge file.
|
|
// -zip
|
|
// compress output files using zip. If used with
|
|
// -max_wu_per_file then the files get compressed after
|
|
// being closed. In any case the files are compressed
|
|
// when db_purge exits on a signal.
|
|
// -gzip
|
|
// compress output files using gzip. If used with
|
|
// -max_wu_per_file then the files get compressed after
|
|
// being closed. In any case the files are compressed
|
|
// when db_purge exits on a signal.
|
|
|
|
#include <cstdio>
|
|
#include <unistd.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/types.h>
|
|
#include <cstring>
|
|
#include <cstdlib>
|
|
#include <ctime>
|
|
#include <string>
|
|
#include <time.h>
|
|
#include <errno.h>
|
|
|
|
using namespace std;
|
|
|
|
#include "boinc_db.h"
|
|
#include "util.h"
|
|
#include "parse.h"
|
|
#include "sched_config.h"
|
|
#include "sched_util.h"
|
|
#include "sched_msgs.h"
|
|
|
|
#include "error_numbers.h"
|
|
|
|
#define WU_FILENAME_PREFIX "wu_archive"
|
|
#define RESULT_FILENAME_PREFIX "result_archive"
|
|
#define WU_INDEX_FILENAME_PREFIX "wu_index"
|
|
#define RESULT_INDEX_FILENAME_PREFIX "result_index"
|
|
|
|
#define DB_QUERY_LIMIT 1000
|
|
|
|
SCHED_CONFIG config;
|
|
FILE *wu_stream=NULL;
|
|
FILE *re_stream=NULL;
|
|
FILE *wu_index_stream=NULL;
|
|
FILE *re_index_stream=NULL;
|
|
int time_int=0;
|
|
int min_age_days=0;
|
|
|
|
// These is used if limiting the total number of workunits to eliminate
|
|
int purged_workunits= 0;
|
|
|
|
// If nonzero, maximum number of workunits to purge.
|
|
// Since all results associated with a purged workunit are also purged,
|
|
// this also limits the number of purged results.
|
|
//
|
|
int max_number_workunits_to_purge=0;
|
|
|
|
// set on command line if compression of archives is desired
|
|
#define COMPRESSION_NONE 0
|
|
#define COMPRESSION_GZIP 1
|
|
#define COMPRESSION_ZIP 2
|
|
|
|
// subscripts MUST be in agreement with defines above
|
|
char *suffix[3]={"", ".gz", ".zip"};
|
|
|
|
// default is no compression
|
|
int compression_type=COMPRESSION_NONE;
|
|
|
|
// set on command line if archive files should be closed and re-opened
|
|
// after getting some max no of WU in the file
|
|
int max_wu_per_file=0;
|
|
|
|
// keep track of how many WU archived in file so far
|
|
int wu_stored_in_file=0;
|
|
|
|
bool time_to_quit() {
|
|
if (max_number_workunits_to_purge) {
|
|
if (purged_workunits >= max_number_workunits_to_purge) return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// this opens the archive. Only subtle thing is that if the user has
|
|
// asked for compression, then we popen(2) a pipe to gzip or zip.
|
|
// This does 'in place' compression.
|
|
//
|
|
void open_archive(char* filename_prefix, FILE*& f){
|
|
char path[256];
|
|
char command[512];
|
|
|
|
// append appropriate suffix for file type
|
|
sprintf(path, "../archives/%s_%d.xml", filename_prefix, time_int);
|
|
strcat(path, suffix[compression_type]);
|
|
|
|
// and construct appropriate command if needed
|
|
if (compression_type==COMPRESSION_GZIP) {
|
|
sprintf(command, "gzip - > %s", path);
|
|
}
|
|
|
|
if (compression_type==COMPRESSION_ZIP) {
|
|
sprintf(command, "zip %s -", path);
|
|
}
|
|
|
|
log_messages.printf(SCHED_MSG_LOG::NORMAL, "Opening archive %s\n", path);
|
|
|
|
// in the case with no compression, just open the file, else open
|
|
// a pipe to the compression executable.
|
|
//
|
|
if (compression_type==COMPRESSION_NONE) {
|
|
if (!(f = fopen( path,"w"))) {
|
|
log_messages.printf(
|
|
SCHED_MSG_LOG::CRITICAL,"Can't open archive file %s %s\n",
|
|
path, errno?strerror(errno):""
|
|
);
|
|
exit(3);
|
|
}
|
|
} else if (!(f = popen(command,"w"))) {
|
|
log_messages.printf(
|
|
SCHED_MSG_LOG::CRITICAL,"Can't open pipe %s %s\n",
|
|
command, errno?strerror(errno):""
|
|
);
|
|
exit(4);
|
|
}
|
|
|
|
// set buffering to line buffered, since we are outputing XML on a
|
|
// line-by-line basis.
|
|
//
|
|
setlinebuf(f);
|
|
|
|
return;
|
|
}
|
|
|
|
void close_archive(char *filename, FILE*& fp){
|
|
char path[256];
|
|
|
|
// Set file pointer to NULL after closing file to indicate that it's closed.
|
|
//
|
|
if (!fp) return;
|
|
|
|
// In case of errors, carry on anyway. This is deliberate, not lazy
|
|
//
|
|
if (compression_type==COMPRESSION_NONE) {
|
|
fclose(fp);
|
|
} else {
|
|
pclose(fp);
|
|
}
|
|
|
|
fp=NULL;
|
|
|
|
// append appropriate file type
|
|
sprintf(path, "../archives/%s_%d.xml", filename, time_int);
|
|
strcat(path, suffix[compression_type]);
|
|
|
|
log_messages.printf(SCHED_MSG_LOG::NORMAL,
|
|
"Closed archive file %s containing records of %d workunits\n",
|
|
path, wu_stored_in_file
|
|
);
|
|
|
|
return;
|
|
}
|
|
|
|
// opens the various archive files. Guarantees that the timestamp
|
|
// does not equal the previous timestamp
|
|
//
|
|
void open_all_archives() {
|
|
int old_time=time_int;
|
|
|
|
// make sure we get a NEW value of the file timestamp!
|
|
while (old_time == (time_int = (int)time(0))) {
|
|
sleep(1);
|
|
}
|
|
|
|
// open all the archives.
|
|
open_archive(WU_FILENAME_PREFIX, wu_stream);
|
|
open_archive(RESULT_FILENAME_PREFIX, re_stream);
|
|
open_archive(RESULT_INDEX_FILENAME_PREFIX, re_index_stream);
|
|
open_archive(WU_INDEX_FILENAME_PREFIX, wu_index_stream);
|
|
|
|
return;
|
|
}
|
|
|
|
// closes (and optionally compresses) the archive files. Clears file
|
|
// pointers to indicate that files are not open.
|
|
//
|
|
void close_all_archives() {
|
|
close_archive(WU_FILENAME_PREFIX, wu_stream);
|
|
close_archive(RESULT_FILENAME_PREFIX, re_stream);
|
|
close_archive(RESULT_INDEX_FILENAME_PREFIX, re_index_stream);
|
|
close_archive(WU_INDEX_FILENAME_PREFIX, wu_index_stream);
|
|
log_messages.printf(SCHED_MSG_LOG::NORMAL,
|
|
"Closed archive files with %d workunits\n",
|
|
wu_stored_in_file
|
|
);
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
// The exit handler always calls this at the end to be sure that the
|
|
// database is closed cleanly.
|
|
void close_db_exit_handler() {
|
|
boinc_db.close();
|
|
return;
|
|
}
|
|
|
|
int archive_result(DB_RESULT& result) {
|
|
fprintf(re_stream,
|
|
"<result_archive>\n"
|
|
" <id>%d</id>\n",
|
|
result.id
|
|
);
|
|
|
|
string r1, r2;
|
|
r1= result.stderr_out;
|
|
xml_escape(r1, r2);
|
|
|
|
fprintf(
|
|
re_stream,
|
|
" <create_time>%d</create_time>\n"
|
|
" <workunitid>%d</workunitid>\n"
|
|
" <server_state>%d</server_state>\n"
|
|
" <outcome>%d</outcome>\n"
|
|
" <client_state>%d</client_state>\n"
|
|
" <hostid>%d</hostid>\n"
|
|
" <userid>%d</userid>\n"
|
|
" <report_deadline>%d</report_deadline>\n"
|
|
" <sent_time>%d</sent_time>\n"
|
|
" <received_time>%d</received_time>\n"
|
|
" <name>%s</name>\n"
|
|
" <cpu_time>%.15e</cpu_time>\n"
|
|
" <xml_doc_in>%s</xml_doc_in>\n"
|
|
" <xml_doc_out>%s</xml_doc_out>\n"
|
|
" <stderr_out>%s</stderr_out>\n"
|
|
" <batch>%d</batch>\n"
|
|
" <file_delete_state>%d</file_delete_state>\n"
|
|
" <validate_state>%d</validate_state>\n"
|
|
" <claimed_credit>%.15e</claimed_credit>\n"
|
|
" <granted_credit>%.15e</granted_credit>\n"
|
|
" <opaque>%f</opaque>\n"
|
|
" <random>%d</random>\n"
|
|
" <app_version_num>%d</app_version_num>\n"
|
|
" <appid>%d</appid>\n"
|
|
" <exit_status>%d</exit_status>\n"
|
|
" <teamid>%d</teamid>\n"
|
|
" <priority>%d</priority>\n"
|
|
" <mod_time>%s</mod_time>\n",
|
|
result.create_time,
|
|
result.workunitid,
|
|
result.server_state,
|
|
result.outcome,
|
|
result.client_state,
|
|
result.hostid,
|
|
result.userid,
|
|
result.report_deadline,
|
|
result.sent_time,
|
|
result.received_time,
|
|
result.name,
|
|
result.cpu_time,
|
|
result.xml_doc_in,
|
|
result.xml_doc_out,
|
|
r2.c_str(),
|
|
result.batch,
|
|
result.file_delete_state,
|
|
result.validate_state,
|
|
result.claimed_credit,
|
|
result.granted_credit,
|
|
result.opaque,
|
|
result.random,
|
|
result.app_version_num,
|
|
result.appid,
|
|
result.exit_status,
|
|
result.teamid,
|
|
result.priority,
|
|
result.mod_time
|
|
);
|
|
|
|
fprintf(re_stream,
|
|
"</result_archive>\n"
|
|
);
|
|
|
|
fprintf(re_index_stream,
|
|
"%d %d\n",
|
|
result.id, time_int
|
|
);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int archive_wu(DB_WORKUNIT& wu) {
|
|
fprintf(wu_stream,
|
|
"<workunit_archive>\n"
|
|
" <id>%d</id>\n",
|
|
wu.id
|
|
);
|
|
fprintf(wu_stream,
|
|
" <create_time>%d</create_time>\n"
|
|
" <appid>%d</appid>\n"
|
|
" <name>%s</name>\n"
|
|
" <xml_doc>%s</xml_doc>\n"
|
|
" <batch>%d</batch>\n"
|
|
" <rsc_fpops_est>%.15e</rsc_fpops_est>\n"
|
|
" <rsc_fpops_bound>%.15e</rsc_fpops_bound>\n"
|
|
" <rsc_memory_bound>%.15e</rsc_memory_bound>\n"
|
|
" <rsc_disk_bound>%.15e</rsc_disk_bound>\n"
|
|
" <need_validate>%d</need_validate>\n"
|
|
" <canonical_resultid>%d</canonical_resultid>\n"
|
|
" <canonical_credit>%.15e</canonical_credit>\n"
|
|
" <transition_time>%d</transition_time>\n"
|
|
" <delay_bound>%d</delay_bound>\n"
|
|
" <error_mask>%d</error_mask>\n"
|
|
" <file_delete_state>%d</file_delete_state>\n"
|
|
" <assimilate_state>%d</assimilate_state>\n"
|
|
" <hr_class>%d</hr_class>\n"
|
|
" <opaque>%f</opaque>\n"
|
|
" <min_quorum>%d</min_quorum>\n"
|
|
" <target_nresults>%d</target_nresults>\n"
|
|
" <max_error_results>%d</max_error_results>\n"
|
|
" <max_total_results>%d</max_total_results>\n"
|
|
" <max_success_results>%d</max_success_results>\n"
|
|
" <result_template_file>%s</result_template_file>\n"
|
|
" <priority>%d</priority>\n"
|
|
" <mod_time>%s</mod_time>\n",
|
|
wu.create_time,
|
|
wu.appid,
|
|
wu.name,
|
|
wu.xml_doc,
|
|
wu.batch,
|
|
wu.rsc_fpops_est,
|
|
wu.rsc_fpops_bound,
|
|
wu.rsc_memory_bound,
|
|
wu.rsc_disk_bound,
|
|
wu.need_validate,
|
|
wu.canonical_resultid,
|
|
wu.canonical_credit,
|
|
wu.transition_time,
|
|
wu.delay_bound,
|
|
wu.error_mask,
|
|
wu.file_delete_state,
|
|
wu.assimilate_state,
|
|
wu.hr_class,
|
|
wu.opaque,
|
|
wu.min_quorum,
|
|
wu.target_nresults,
|
|
wu.max_error_results,
|
|
wu.max_total_results,
|
|
wu.max_success_results,
|
|
wu.result_template_file,
|
|
wu.priority,
|
|
wu.mod_time
|
|
);
|
|
|
|
fprintf(wu_stream,
|
|
"</workunit_archive>\n"
|
|
);
|
|
|
|
fprintf(wu_index_stream,
|
|
"%d %d\n",
|
|
wu.id, time_int
|
|
);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int purge_and_archive_results(DB_WORKUNIT& wu, int& number_results) {
|
|
int retval= 0;
|
|
DB_RESULT result;
|
|
char buf[256];
|
|
|
|
number_results=0;
|
|
|
|
sprintf(buf, "where workunitid=%d", wu.id);
|
|
while (!result.enumerate(buf)) {
|
|
retval= archive_result(result);
|
|
if (retval) return retval;
|
|
log_messages.printf(
|
|
SCHED_MSG_LOG::DEBUG,
|
|
"Archived result [%d] to a file\n", result.id
|
|
);
|
|
retval= result.delete_from_db();
|
|
if (retval) return retval;
|
|
log_messages.printf(
|
|
SCHED_MSG_LOG::DEBUG,
|
|
"Purged result [%d] from database\n", result.id
|
|
);
|
|
number_results++;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// return true if did anything
|
|
//
|
|
bool do_pass() {
|
|
int retval= 0;
|
|
|
|
// The number of workunits/results purged in a single pass of do_pass().
|
|
// Since do_pass() may be invoked multiple times,
|
|
// corresponding global variables store global totals.
|
|
//
|
|
int do_pass_purged_workunits = 0;
|
|
int do_pass_purged_results = 0;
|
|
|
|
// check to see if we got a stop signal.
|
|
// Note that if we do catch a stop signal here,
|
|
// we call an exit handler that closes [and optionally compresses] files
|
|
// before returning to the OS.
|
|
//
|
|
check_stop_daemons();
|
|
|
|
bool did_something = false;
|
|
DB_WORKUNIT wu;
|
|
char buf[256];
|
|
|
|
if (min_age_days) {
|
|
char timestamp[15];
|
|
mysql_timestamp(dtime()-min_age_days*86400, timestamp);
|
|
sprintf(buf,
|
|
"where file_delete_state=%d and mod_time<'%s' limit %d",
|
|
FILE_DELETE_DONE, timestamp, DB_QUERY_LIMIT
|
|
);
|
|
} else {
|
|
sprintf(buf,
|
|
"where file_delete_state=%d limit %d",
|
|
FILE_DELETE_DONE, DB_QUERY_LIMIT
|
|
);
|
|
}
|
|
|
|
int n=0;
|
|
while (!wu.enumerate(buf)) {
|
|
if (strstr(wu.name, "nodelete")) continue;
|
|
did_something = true;
|
|
|
|
// if archives have not already been opened, then open them.
|
|
if (!wu_stream) {
|
|
open_all_archives();
|
|
}
|
|
|
|
retval = purge_and_archive_results(wu, n);
|
|
do_pass_purged_results += n;
|
|
|
|
retval= archive_wu(wu);
|
|
if (retval) {
|
|
log_messages.printf(SCHED_MSG_LOG::CRITICAL,
|
|
"Failed to write to XML file workunit:%d\n", wu.id
|
|
);
|
|
exit(5);
|
|
}
|
|
log_messages.printf(SCHED_MSG_LOG::DEBUG,"Archived workunit [%d] to a file\n", wu.id);
|
|
|
|
//purge workunit from DB
|
|
retval= wu.delete_from_db();
|
|
if (retval) {
|
|
log_messages.printf(SCHED_MSG_LOG::CRITICAL,"Can't delete workunit [%d] from database:%d\n", wu.id, retval);
|
|
exit(6);
|
|
}
|
|
log_messages.printf(SCHED_MSG_LOG::DEBUG,"Purged workunit [%d] from database\n", wu.id);
|
|
|
|
purged_workunits++;
|
|
do_pass_purged_workunits++;
|
|
wu_stored_in_file++;
|
|
|
|
// flush the various output files.
|
|
fflush(NULL);
|
|
|
|
// if file has got max # of workunits, close and compress it.
|
|
// This sets file pointers to NULL
|
|
if (max_wu_per_file && wu_stored_in_file>=max_wu_per_file) {
|
|
close_all_archives();
|
|
wu_stored_in_file=0;
|
|
}
|
|
|
|
if (time_to_quit()) {
|
|
break;
|
|
}
|
|
|
|
}
|
|
|
|
log_messages.printf(SCHED_MSG_LOG::NORMAL,
|
|
"Archived %d workunits and %d results\n",
|
|
do_pass_purged_workunits,do_pass_purged_results
|
|
);
|
|
|
|
if (wu_stored_in_file>0) {
|
|
log_messages.printf(SCHED_MSG_LOG::DEBUG,
|
|
"Currently open archive files contain %d workunits\n",
|
|
wu_stored_in_file);
|
|
}
|
|
|
|
return did_something;
|
|
}
|
|
|
|
int main(int argc, char** argv) {
|
|
int retval;
|
|
bool asynch = false, one_pass = false;
|
|
int i;
|
|
|
|
check_stop_daemons();
|
|
for (i=1; i<argc; i++) {
|
|
if (!strcmp(argv[i], "-asynch")) {
|
|
asynch = true;
|
|
} else if (!strcmp(argv[i], "-one_pass")) {
|
|
one_pass = true;
|
|
} else if (!strcmp(argv[i], "-d")) {
|
|
log_messages.set_debug_level(atoi(argv[++i]));
|
|
} else if (!strcmp(argv[i], "-min_age_days")) {
|
|
min_age_days = atoi(argv[++i]);
|
|
} else if (!strcmp(argv[i], "-max")) {
|
|
max_number_workunits_to_purge= atoi(argv[++i]);
|
|
} else if (!strcmp(argv[i], "-zip")) {
|
|
compression_type=COMPRESSION_ZIP;
|
|
} else if (!strcmp(argv[i], "-gzip")) {
|
|
compression_type=COMPRESSION_GZIP;
|
|
} else if (!strcmp(argv[i], "-max_wu_per_file")) {
|
|
max_wu_per_file = atoi(argv[++i]);
|
|
} else {
|
|
log_messages.printf(SCHED_MSG_LOG::CRITICAL,
|
|
"Unrecognized arg: %s\n",
|
|
argv[i]
|
|
);
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
retval = config.parse_file("..");
|
|
if (retval) {
|
|
log_messages.printf(SCHED_MSG_LOG::CRITICAL,
|
|
"Can't parse config file\n"
|
|
);
|
|
exit(1);
|
|
}
|
|
|
|
if (asynch) {
|
|
if (fork()) {
|
|
exit(0);
|
|
}
|
|
}
|
|
|
|
log_messages.printf(SCHED_MSG_LOG::NORMAL, "Starting\n");
|
|
|
|
retval = boinc_db.open(
|
|
config.db_name, config.db_host, config.db_user, config.db_passwd
|
|
);
|
|
if (retval) {
|
|
log_messages.printf(SCHED_MSG_LOG::CRITICAL, "Can't open DB\n");
|
|
exit(2);
|
|
}
|
|
install_stop_signal_handler();
|
|
mkdir("../archives", 0777);
|
|
|
|
// on exit, either via the check_stop_daemons signal handler, or
|
|
// through a regular call to exit, these functions will be called
|
|
// in the opposite order of registration.
|
|
//
|
|
atexit(close_db_exit_handler);
|
|
atexit(close_all_archives);
|
|
|
|
if (one_pass) {
|
|
do_pass();
|
|
} else {
|
|
while (1) {
|
|
if (time_to_quit()) {
|
|
break;
|
|
}
|
|
if (!do_pass()) {
|
|
sleep(10);
|
|
}
|
|
}
|
|
}
|
|
|
|
// files and database are closed by exit handler
|
|
exit(0);
|
|
}
|
|
|
|
const char *BOINC_RCSID_0c1c4336f1 = "$Id$";
|