// Berkeley Open Infrastructure for Network Computing // http://boinc.berkeley.edu // Copyright (C) 2005 University of California // // This is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; // either version 2.1 of the License, or (at your option) any later version. // // This software is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // See the GNU Lesser General Public License for more details. // // To view the GNU Lesser General Public License visit // http://www.gnu.org/copyleft/lesser.html // or write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // file_deleter: deletes files that are no longer needed. // // default operation: // 1) enumerate N WUs and M results (N,M compile params) // that are ready to file-delete, and try to delete their files // 2) if the enums didn't yield anything, sleep for K seconds // 3) repeat from 1) // 4) every 1 hour, enumerate everything in state FILE_DELETE_ERROR // and try to delete it. // 5) after 1 hour, and every 24 hours thereafter, // scan for and delete all files in the upload/download directories // that are older than any WU in the database, // and were created at least one month ago. // This deletes files uploaded by hosts after the WU was deleted. // // options: // // -d N // set debug output level (1/2/3) // -mod M R // handle only WUs with ID mod M == R // -one_pass // instead of sleeping in 2), exit // -dont_retry_error // don't do 4) // -dont_delete_antiques // don't do 5) // -preserve_result_files // update the DB, but don't delete output files. // For debugging. // -preserve_wu_files // update the DB, but don't delete input files. // For debugging. // -dont_delete_batches // don't delete anything with positive batch number // enum sizes. RESULT_PER_ENUM is three times larger on the // assumption of 3-fold average redundancy. // This balances the rate at which input and output files are deleted // #define WUS_PER_ENUM 500 #define RESULTS_PER_ENUM 1500 // how long to wait until delete antiques, and how often to do it // #define ANTIQUE_DELAY 3600 #define ANTIQUE_INTERVAL 86400 // how often to retry errors // #define ERROR_INTERVAL 86400 #include "config.h" #include #include #include #include #include #include #include #include #include #ifdef HAVE_STRINGS_H #include #endif #include "boinc_db.h" #include "parse.h" #include "error_numbers.h" #include "util.h" #include "str_util.h" #include "filesys.h" #include "strings.h" #include "sched_config.h" #include "sched_util.h" #include "sched_msgs.h" using namespace std; #define LOCKFILE "file_deleter.out" #define PIDFILE "file_deleter.pid" #define SLEEP_INTERVAL 5 #define RESULTS_PER_WU 4 // an estimate of redundancy SCHED_CONFIG config; int id_modulus=0, id_remainder=0; bool dont_retry_errors = false; bool dont_delete_antiques = false; bool dont_delete_batches = false; // Given a filename, find its full path in the upload directory hierarchy // Return an error if file isn't there. // int get_file_path( const char *filename, char* upload_dir, int fanout, char* path ) { dir_hier_path(filename, upload_dir, fanout, path); if (boinc_file_exists(path)) { return 0; } return ERR_NOT_FOUND; } int wu_delete_files(WORKUNIT& wu) { char* p; char filename[256], pathname[256], buf[LARGE_BLOB_SIZE]; bool no_delete=false; int count_deleted = 0, retval, mthd_retval = 0; if (strstr(wu.name, "nodelete")) return 0; safe_strcpy(buf, wu.xml_doc); p = strtok(buf, "\n"); strcpy(filename, ""); while (p) { if (parse_str(p, "", filename, sizeof(filename))) { } else if (match_tag(p, "")) { no_delete = false; strcpy(filename, ""); } else if (match_tag(p, "")) { no_delete = true; } else if (match_tag(p, "")) { if (!no_delete) { retval = get_file_path(filename, config.download_dir, config.uldl_dir_fanout, pathname ); if (retval) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "[WU#%d] get_file_path: %s: %d\n", wu.id, filename, retval ); } else { log_messages.printf(SCHED_MSG_LOG::MSG_NORMAL, "[WU#%d] deleting %s\n", wu.id, filename ); retval = unlink(pathname); if (retval) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "[WU#%d] unlink %s failed: %d\n", wu.id, filename, retval ); mthd_retval = ERR_UNLINK; } else { count_deleted++; } } } } p = strtok(0, "\n"); } log_messages.printf(SCHED_MSG_LOG::MSG_DEBUG, "[WU#%d] deleted %d file(s)\n", wu.id, count_deleted ); return mthd_retval; } int result_delete_files(RESULT& result) { char* p; char filename[256], pathname[256], buf[LARGE_BLOB_SIZE]; bool no_delete=false; int count_deleted = 0, retval, mthd_retval = 0; safe_strcpy(buf, result.xml_doc_in); p = strtok(buf,"\n"); while (p) { if (parse_str(p, "", filename, sizeof(filename))) { } else if (match_tag(p, "")) { no_delete = false; strcpy(filename, ""); } else if (match_tag(p, "")) { no_delete = true; } else if (match_tag(p, "")) { if (!no_delete) { retval = get_file_path( filename, config.upload_dir, config.uldl_dir_fanout, pathname ); if (retval) { // the fact that no result files were found is a critical // error if this was a successful result, but is to be // expected if the result outcome was failure, since in // that case there may well be no output file produced. // int debug_or_crit; if (RESULT_OUTCOME_SUCCESS == result.outcome) { debug_or_crit=SCHED_MSG_LOG::MSG_CRITICAL; } else { debug_or_crit=SCHED_MSG_LOG::MSG_DEBUG; } log_messages.printf(debug_or_crit, "[RESULT#%d] outcome=%d client_state=%d No file %s to delete\n", result.id, result.outcome, result.client_state, filename ); } else { retval = unlink(pathname); if (retval) { mthd_retval = ERR_UNLINK; log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "[RESULT#%d] unlink %s returned %d %s\n", result.id, pathname, retval, (retval && errno)?strerror(errno):"" ); } else { count_deleted++; log_messages.printf(SCHED_MSG_LOG::MSG_NORMAL, "[RESULT#%d] unlinked %s\n", result.id, pathname ); } } } } p = strtok(0, "\n"); } log_messages.printf(SCHED_MSG_LOG::MSG_DEBUG, "[RESULT#%d] deleted %d file(s)\n", result.id, count_deleted ); return mthd_retval; } // set by corresponding command line arguments. static bool preserve_wu_files=false; static bool preserve_result_files=false; // return nonzero if did anything // bool do_pass(bool retry_error) { DB_WORKUNIT wu; DB_RESULT result; bool did_something = false; char buf[256]; char clause[256]; int retval; check_stop_daemons(); strcpy(clause, ""); if (id_modulus) { sprintf(clause, " and id %% %d = %d ", id_modulus, id_remainder); } if (dont_delete_batches) { strcat(clause, " and batch <= 0 "); } sprintf(buf, "where file_delete_state=%d %s limit %d", retry_error?FILE_DELETE_ERROR:FILE_DELETE_READY, clause, WUS_PER_ENUM ); while (!wu.enumerate(buf)) { did_something = true; retval = 0; if (!preserve_wu_files) { retval = wu_delete_files(wu); } if (retval) { wu.file_delete_state = FILE_DELETE_ERROR; log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "[WU#%d] update failed: %d\n", wu.id, retval ); } else { wu.file_delete_state = FILE_DELETE_DONE; } sprintf(buf, "file_delete_state=%d", wu.file_delete_state); retval= wu.update_field(buf); } sprintf(buf, "where file_delete_state=%d %s limit %d", retry_error?FILE_DELETE_ERROR:FILE_DELETE_READY, clause, RESULTS_PER_ENUM ); while (!result.enumerate(buf)) { did_something = true; retval = 0; if (!preserve_result_files) { retval = result_delete_files(result); } if (retval) { result.file_delete_state = FILE_DELETE_ERROR; log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "[RESULT#%d] update failed: %d\n", result.id, retval ); } else { result.file_delete_state = FILE_DELETE_DONE; } sprintf(buf, "file_delete_state=%d", result.file_delete_state); retval= result.update_field(buf); } return did_something; } struct FILE_RECORD { string name; int date_modified; }; bool operator == (const FILE_RECORD& fr1, const FILE_RECORD& fr2) { return (fr1.date_modified == fr2.date_modified && fr1.name == fr2.name); } bool operator < (const FILE_RECORD& fr1, const FILE_RECORD& fr2) { if (fr1.date_modified < fr2.date_modified) return true; if (fr1.date_modified > fr2.date_modified) return false; if (fr1.name < fr2.name) return true; return false; } // list of antique files to delete, // sorted by mod time (primary key) and name(secondary key) // std::list files_to_delete; // delete files in antique files list, and empty the list. // Returns number of files deleted, or negative for error. // int delete_antique_files() { int nfiles=0; while (!files_to_delete.empty()) { char timestamp[128]; char pathname[1024]; int retval; FILE_RECORD fr = files_to_delete.front(); check_stop_daemons(); retval = get_file_path( fr.name.c_str(), config.upload_dir, config.uldl_dir_fanout, pathname ); if (retval) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "get_file_path(%s) failed: %d\n", fr.name.c_str(), retval ); return -1; } strcpy(timestamp, time_to_string(fr.date_modified)); log_messages.printf(SCHED_MSG_LOG::MSG_DEBUG, "deleting [antique %s] %s\n", timestamp, pathname ); if (unlink(pathname)) { int save_error=errno; log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "unlink(%s) failed: %s\n", pathname, strerror(save_error) ); return -1; } else { nfiles++; files_to_delete.pop_front(); } } return nfiles; } // construct a list "file_to_delete" of old files. // Return number of files added to list, or negative for error. // int add_antiques_to_list(int days) { char command[256]; char single_line[1024]; FILE *fp; int dirlen=strlen(config.upload_dir); struct passwd *apache_info=getpwnam("apache"); int del_time=time(0)-86400*days; int nfiles=0; if (!apache_info) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "no user named 'apache' found!\n" ); return -1; } sprintf(command, "find %s -type f -mtime +%d -follow", config.upload_dir, days); // Now execute the command, read output on a stream. We could use // find to also exec a 'delete' command. But we want to log all // file names into the log, and do lots of sanity checking, so // this way is better. // if (!(fp=popen(command, "r"))) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "command %s failed\n", command ); return -2; } while (fgets(single_line, 1024, fp)) { char pathname[1024]; char *fname_at_end=NULL; int nchars=strlen(single_line); struct stat statbuf; const char *err=NULL; FILE_RECORD fr; // We can interrupt this at any point. // pclose() is called when process exits. check_stop_daemons(); // Do serious sanity checking on the path before // adding the file!! // if (!err && nchars > 1022) err="line too long"; if (!err && nchars < dirlen + 1) err="path shorter than upload directory name"; if (!err && single_line[nchars-1] != '\n') err="no newline terminator in line"; if (!err && strncmp(config.upload_dir, single_line, dirlen)) err="upload directory not in path"; if (!err && single_line[dirlen] != '/') err="no slash separator in path"; if (!err) single_line[nchars-1]='\0'; if (!err && stat(single_line, &statbuf)) err="stat failed"; if (!err && statbuf.st_mtime > del_time) err="file too recent"; if (!err && apache_info->pw_uid != statbuf.st_uid) err="file not owned by httpd user"; if (!err && !(fname_at_end=rindex(single_line+dirlen, '/'))) err="no trailing filename"; if (!err) fname_at_end++; if (!err && !strlen(fname_at_end)) err="trailing filename too short"; // skip NFS file system markers of form .nfs* // if (!err && !strncmp(fname_at_end, ".nfs", 4)) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "Ignoring antique (stale) NFS lockfile %s\n", single_line ); continue; } if (!err && get_file_path(fname_at_end, config.upload_dir, config.uldl_dir_fanout, pathname)) err="get_file_path() failed"; if (!err && strcmp(pathname, single_line)) err="file in wrong hierarchical upload subdirectory"; if (err) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "Can't list %s for deletion: %s\n", single_line, err ); // This file deleting business is SERIOUS. Give up at the // first sign of ANYTHING amiss. // pclose(fp); return -3; } // insert this file onto the list fr.date_modified = statbuf.st_mtime; fr.name = fname_at_end; files_to_delete.push_back(fr); nfiles++; } // while (fgets(single_line, 1024, fp)) { pclose(fp); log_messages.printf(SCHED_MSG_LOG::MSG_DEBUG, "Found %d antique files to delete\n", nfiles ); files_to_delete.sort(); files_to_delete.unique(); return nfiles; } // returns number of files found & added, or negative for error. // int find_antique_files() { char buf[256]; DB_WORKUNIT wu; check_stop_daemons(); // Find the oldest workunit. We could add // "where file_delete_state!=FILE_DELETE_DONE" to the query, // but this might create some race condition // with the 'regular' file delete mechanism, // so better to do it like this. // sprintf(buf, "order by create_time limit 1"); if (!wu.enumerate(buf)) { // Don't ever delete files younger than a month. // int days = 1 + (time(0) - wu.create_time)/86400; if (days<31) days=31; return add_antiques_to_list(days); } return 0; } void do_antique_pass() { int retval; // If any problems appear in deleting antique files // immediately DISABLE this feature. // retval = find_antique_files(); if (retval < 0) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "Problem 1 [%d] in antique file deletion: turning OFF -delete_antiques switch\n", retval ); dont_delete_antiques = true; return; } retval = delete_antique_files(); if (retval < 0) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "Problem 2 [%d] in antique file deletion: turning OFF -delete_antiques switch\n", retval ); dont_delete_antiques = true; } } int main(int argc, char** argv) { int retval; bool one_pass = false, retry_error = false, delete_antiques = false; int i; check_stop_daemons(); for (i=1; i next_antique_time)) { log_messages.printf(SCHED_MSG_LOG::MSG_DEBUG, "Doing antique deletion pass\n" ); do_antique_pass(); next_antique_time = dtime() + ANTIQUE_INTERVAL; } if (!dont_retry_errors && !retry_errors_now && (dtime() > next_error_time)) { retry_errors_now = true; log_messages.printf(SCHED_MSG_LOG::MSG_DEBUG, "starting retry of previous errors\n" ); } } } const char *BOINC_RCSID_bd0d4938a6 = "$Id$";