// Berkeley Open Infrastructure for Network Computing // http://boinc.berkeley.edu // Copyright (C) 2005 University of California // // This is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; // either version 2.1 of the License, or (at your option) any later version. // // This software is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // See the GNU Lesser General Public License for more details. // // To view the GNU Lesser General Public License visit // http://www.gnu.org/copyleft/lesser.html // or write to the Free Software Foundation, Inc., // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // db_dump: dump database views in XML format // // usage: db_dump [-d n] -dump_spec file // -d debug level (1,2,3) // // dump_spec file: // // x // x // [ X ] // // user

// x // x x = id, total_credit, expavg_credit // // [n] // [] // [x ] x = zip or gzip // // ... // // ... // // output_dir is temp directory (usually ../html/stats_tmp) // final_out_dir is what to rename this to when done (usually ../html/stats) // (this is to avoid exporting incomplete stats) // archive_dir: if present, when done, move old final_out_dir // to archive_dir/stats_DATE // Otherwise rename old final_out_dir to final_out_dir_DATE // Note: this program is way more configurable than it needs to be. // All projects export stats in the same format, // as described in the default db_dump_spec.xml that is created for you. #include "config.h" #include #include #include #include #include #include #include #include #include "boinc_db.h" #include "filesys.h" #include "util.h" #include "str_util.h" #include "error_numbers.h" #include "md5_file.h" #include "parse.h" #include "sched_config.h" #include "sched_util.h" #include "sched_msgs.h" using std::string; using std::vector; #define LOCKFILE "db_dump.out" #define COMPRESSION_NONE 0 #define COMPRESSION_GZIP 1 #define COMPRESSION_ZIP 2 #define SORT_NONE 0 #define SORT_ID 1 #define SORT_TOTAL_CREDIT 2 #define SORT_EXPAVG_CREDIT 3 #define TABLE_USER 0 #define TABLE_TEAM 1 #define TABLE_HOST 2 // must match the above const char* table_name[3] = {"user", "team", "host"}; const char* tag_name[3] = {"users", "teams", "hosts"}; int nusers, nhosts, nteams; double total_credit; struct OUTPUT { int recs_per_file; bool detail; int compression; class ZFILE* zfile; class NUMBERED_ZFILE* nzfile; int parse(FILE*); }; struct ENUMERATION { int table; int sort; char filename[256]; vector outputs; int parse(FILE*); int make_it_happen(char*); }; struct DUMP_SPEC { char output_dir[256]; char final_output_dir[256]; char archive_dir[256]; vector enumerations; int parse(FILE*); }; int OUTPUT::parse(FILE* in) { char buf[256], buf2[256]; recs_per_file = 0; detail = false; compression = COMPRESSION_NONE; zfile = 0; nzfile = 0; while (fgets(buf, 256, in)) { if (match_tag(buf, "")) return 0; if (parse_int(buf, "", recs_per_file)) continue; if (match_tag(buf, "")) { detail = true; continue; } if (parse_str(buf, "", buf2, sizeof(buf2))) { if (!strcmp(buf2, "gzip")) { compression = COMPRESSION_GZIP; } else if (!strcmp(buf2, "zip")) { compression = COMPRESSION_ZIP; } else { log_messages.printf( SCHED_MSG_LOG::MSG_CRITICAL, "unrecognized compression type: %s", buf ); } continue; } log_messages.printf( SCHED_MSG_LOG::MSG_CRITICAL, "OUTPUT::parse: unrecognized: %s", buf ); } return ERR_XML_PARSE; } int ENUMERATION::parse(FILE* in) { char buf[256], buf2[256]; int retval, i; table = -1; sort = SORT_NONE; strcpy(filename, ""); while (fgets(buf, 256, in)) { if (match_tag(buf, "")) { if (table == -1) return ERR_XML_PARSE; if (sort == -1) return ERR_XML_PARSE; if (!strlen(filename)) return ERR_XML_PARSE; return 0; } if (match_tag(buf, "")) { OUTPUT output; retval = output.parse(in); if (!retval) outputs.push_back(output); } if (parse_str(buf, "", filename, sizeof(filename))) { continue; } if (parse_str(buf, "", buf2, sizeof(buf2))) { for (i=0; i<3; i++) { if (!strcmp(buf2, table_name[i])) { table = i; break; } } } if (parse_str(buf, "", buf2, sizeof(buf2))) { if (!strcmp(buf2, "id")) sort = SORT_ID; if (!strcmp(buf2, "total_credit")) sort = SORT_TOTAL_CREDIT; if (!strcmp(buf2, "expavg_credit")) sort = SORT_EXPAVG_CREDIT; } } return ERR_XML_PARSE; } int DUMP_SPEC::parse(FILE* in) { char buf[256]; int retval; strcpy(output_dir, ""); strcpy(final_output_dir, ""); strcpy(archive_dir, ""); while (fgets(buf, 256, in)) { if (match_tag(buf, "")) { if (!strlen(output_dir)) return ERR_XML_PARSE; if (!strlen(final_output_dir)) return ERR_XML_PARSE; return 0; } if (match_tag(buf, "")) { ENUMERATION e; retval = e.parse(in); if (!retval) enumerations.push_back(e); } if (parse_str(buf, "\n<%s>\n", tag.c_str() ); safe_strcpy(current_path, filename); } void open_num(const char* filename, int filenum) { char buf[256]; sprintf(buf, "%s_%d", filename, filenum); open(buf); } void close() { char buf[256]; int retval; if (f) { fprintf(f, "\n", tag.c_str()); fclose(f); switch(compression) { case COMPRESSION_ZIP: sprintf(buf, "zip -q %s", current_path); retval = system(buf); if (retval) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "%s failed: %d\n", buf, retval ); exit(retval); } break; case COMPRESSION_GZIP: sprintf(buf, "gzip -fq %s", current_path); retval = system(buf); if (retval) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "%s failed: %d\n", buf, retval ); exit(retval); } break; } f = 0; } } }; // class that automatically opens a new file every N IDs // class NUMBERED_ZFILE : public ZFILE { const char* filename_base; int nids_per_file; int last_filenum; public: NUMBERED_ZFILE(string tag_, int comp, const char* fb, int nids_per_file_) : ZFILE(tag_, comp), filename_base(fb), nids_per_file(nids_per_file_), last_filenum(-1) {} void set_id(int); }; void NUMBERED_ZFILE::set_id(int id) { int filenum = id/nids_per_file; if (!f || (filenum != last_filenum)) { open_num(filename_base, filenum); last_filenum = filenum; } } void write_host(HOST& host, FILE* f, bool detail) { int retval; char p_vendor[2048], p_model[2048], os_name[2048], os_version[2048]; xml_escape(host.p_vendor, p_vendor); xml_escape(host.p_model, p_model); xml_escape(host.os_name, os_name); xml_escape(host.os_version, os_version); fprintf(f, "\n" " %d\n", host.id ); if (detail) { DB_USER user; retval = user.lookup_id(host.userid); if (retval) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "user lookup of user %d for host %d: %d\n", host.userid, host.id, retval ); } else { if (user.show_hosts) { fprintf(f, " %d\n", host.userid ); } } } fprintf(f, " %f\n" " %f\n" " %f\n" " %s\n" " %s\n" " %s\n" " %s\n", host.total_credit, host.expavg_credit, host.expavg_time, p_vendor, p_model, os_name, os_version ); if (detail) { fprintf(f, " %d\n" " %d\n" " %d\n" " %d\n" " %f\n" " %f\n" " %f\n" " %f\n" " %f\n" " %f\n" " %f\n" " %f\n" " %f\n" " %f\n" " %f\n" " %f\n" " %s\n", host.create_time, host.rpc_time, host.timezone, host.p_ncpus, host.p_fpops, host.p_iops, host.p_membw, host.m_nbytes, host.m_cache, host.m_swap, host.d_total, host.d_free, host.n_bwup, host.n_bwdown, host.avg_turnaround, host.credit_per_cpu_sec, host.host_cpid ); } fprintf(f, "\n" ); } void write_user(USER& user, FILE* f, bool /*detail*/) { char buf[1024]; char cpid[MD5_LEN]; char name[2048], url[2048]; xml_escape(user.name, name); xml_escape(user.url, url); safe_strcpy(buf, user.cross_project_id); safe_strcat(buf, user.email_addr); md5_block((unsigned char*)buf, strlen(buf), cpid); fprintf(f, "\n" " %d\n" " %s\n" " %s\n" " %d\n" " %f\n" " %f\n" " %f\n" " %s\n", user.id, name, user.country, user.create_time, user.total_credit, user.expavg_credit, user.expavg_time, cpid ); if (strlen(user.url)) { fprintf(f, " %s\n", url ); } if (user.teamid) { fprintf(f, " %d\n", user.teamid ); } if (user.has_profile) { fprintf(f, " \n" ); } #if 0 if (detail && user.show_hosts) { DB_HOST host; sprintf(buf, "where userid=%d", user.id); while (1) { retval = host.enumerate(buf) if (retval) break; if (host.total_credit > 0) { write_host(host, f, false); } } if (retval != ERR_DB_NOT_FOUND) { boinc_db.print_error("host enum: %d", retval); exit(retval); } } #endif fprintf(f, "\n" ); } void write_team(TEAM& team, FILE* f, bool detail) { DB_USER user; char buf[256]; char name[2048]; char url[2048], name_html[2048]; int retval; char description[8192]; // this should be plenty of room for xml escaping a 1024 string xml_escape(team.name, name); fprintf(f, "\n" " %d\n" " %d\n" " %s\n" " %d\n" " %f\n" " %f\n" " %f\n", team.id, team.type, name, team.userid, team.total_credit, team.expavg_credit, team.expavg_time ); // show founder name since that user might not be active // retval = user.lookup_id(team.userid); if (!retval) { char fname[2048]; xml_escape(user.name, fname); fprintf(f, " %s\n", fname ); } fprintf(f, " %d\n", team.create_time ); if (strlen(team.url)) { xml_escape(team.url, url); fprintf(f, " %s\n", url ); } if (strlen(team.name_html)) { xml_escape(team.name_html, name_html); fprintf(f, "%s\n", name_html ); } if (strlen(team.description)) { team.description[1024] = 0; // truncate xml_escape(team.description, description); fprintf(f, "%s\n", description ); } fprintf(f, " %s\n", team.country ); if (detail) { sprintf(buf, "where teamid=%d", team.id); while (1) { retval = user.enumerate(buf); if (retval) break; write_user(user, f, false); } if (retval != ERR_DB_NOT_FOUND) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "user enum: %d", retval ); exit(retval); } } fprintf(f, "\n" ); } int print_app(FILE* f, APP& app) { fprintf(f, " \n"); fprintf(f, " %s\n", app.user_friendly_name); #if 0 DB_RESULT result; char buf[256]; int n, retval; // can't do this stuff because MySQL/InnoDB can't do counts efficiently // sprintf(buf, "where appid=%d and server_state=%d", app.id, RESULT_SERVER_STATE_UNSENT); retval = result.count(n, buf); if (!retval) { fprintf(f, " %d\n", n); } sprintf(buf, "where appid=%d and server_state=%d", app.id, RESULT_SERVER_STATE_IN_PROGRESS); retval = result.count(n, buf); if (!retval) { fprintf(f, " %d\n", n); } sprintf(buf, "where appid=%d and server_state=%d", app.id, RESULT_SERVER_STATE_OVER); retval = result.count(n, buf); if (!retval) { fprintf(f, " %d\n", n); } #endif fprintf(f, " \n"); return 0; } int print_apps(FILE* f) { DB_APP app; fprintf(f, " \n"); while (!app.enumerate()) { print_app(f, app); } fprintf(f, " \n"); return 0; } int tables_file(char* dir) { char buf[256]; ZFILE f("tables", false); sprintf(buf, "%s/tables.xml", dir); f.open(buf); fprintf(f.f, " %d\n", (int)time(0) ); if (nusers) fprintf(f.f, " %d\n", nusers); if (nteams) fprintf(f.f, " %d\n", nteams); if (nhosts) fprintf(f.f, " %d\n", nhosts); if (total_credit) fprintf(f.f, " %lf\n", total_credit); print_apps(f.f); f.close(); return 0; } int ENUMERATION::make_it_happen(char* output_dir) { unsigned int i; int n, retval; DB_USER user; DB_TEAM team; DB_HOST host; char clause[256]; char path[256]; sprintf(path, "%s/%s", output_dir, filename); for (i=0; iopen(path); } } switch(sort) { case SORT_NONE: strcpy(clause, "where total_credit > 0"); break; case SORT_ID: strcpy(clause, "where total_credit > 0 order by id"); break; case SORT_TOTAL_CREDIT: strcpy(clause, "where total_credit > 0 order by total_credit desc"); break; case SORT_EXPAVG_CREDIT: strcpy(clause, "where total_credit > 0 order by expavg_credit desc"); break; } switch(table) { case TABLE_USER: n = 0; while (1) { retval = user.enumerate(clause, true); if (retval) break; nusers++; total_credit += user.total_credit; for (i=0; iset_id(n++); } if (out.zfile) { write_user(user, out.zfile->f, out.detail); } else { write_user(user, out.nzfile->f, out.detail); } } } if (retval != ERR_DB_NOT_FOUND) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "user enum: %d", retval ); exit(retval); } break; case TABLE_HOST: n = 0; while(1) { retval = host.enumerate(clause); if (retval) break; if (!host.userid) continue; nhosts++; for (i=0; iset_id(n++); } if (out.zfile) { write_host(host, out.zfile->f, out.detail); } else { write_host(host, out.nzfile->f, out.detail); } } } if (retval != ERR_DB_NOT_FOUND) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "host enum: %d", retval ); exit(retval); } break; case TABLE_TEAM: n = 0; while(1) { retval = team.enumerate(clause); if (retval) break; nteams++; for (i=0; iset_id(n++); } if (out.zfile) { write_team(team, out.zfile->f, out.detail); } else { write_team(team, out.nzfile->f, out.detail); } } } if (retval != ERR_DB_NOT_FOUND) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "team enum: %d", retval ); exit(retval); } break; } for (i=0; iclose(); if (out.nzfile) out.nzfile->close(); } return 0; } void show_help() { fprintf(stderr, "This program generates XML files containing project statistics.\n" "It should be run once a day as a periodic task in config.xml.\n" "For more info, see http://boinc.berkeley.edu/trac/wiki/DbDump\n" ); } int main(int argc, char** argv) { SCHED_CONFIG config; int retval, i; DUMP_SPEC spec; char* db_host = 0; char spec_filename[256], buf[256]; FILE_LOCK file_lock; check_stop_daemons(); setbuf(stderr, 0); log_messages.printf(SCHED_MSG_LOG::MSG_NORMAL, "db_dump starting\n"); strcpy(spec_filename, ""); for (i=1; itm_year, tmp->tm_mon+1, tmp->tm_mday, tmp->tm_hour, tmp->tm_min, tmp->tm_sec ); retval = system(buf); if (retval) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "Can't rename old stats\n"); exit(1); } sprintf(buf, "mv %s %s", spec.output_dir, spec.final_output_dir); retval = system(buf); if (retval) { log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "Can't rename new stats\n"); exit(1); } log_messages.printf(SCHED_MSG_LOG::MSG_NORMAL, "db_dump finished\n"); } const char *BOINC_RCSID_500089bde6 = "$Id$";