From fec574f4e8aec411a2ed1c7ba4c066d47ac37e26 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Thu, 10 Apr 2014 23:53:19 -0700 Subject: [PATCH] create_work: increase the efficiency of bulk job creation The job submission RPC handler (PHP) originally ran the create_work program once per job. This took about 1.5 minutes to create 1000 jobs. Recently I changed this so that create_work only is run once; it does one SQL insert per job. Disappointingly, this was only slightly faster: 1 min per 1000 jobs. This commit changes create_work to create multiple jobs per SQL insert (as many as will fit in a 1 MB query, which is the default limit). This speeds things up by a factor of 100: 1000 jobs in 0.5 sec. --- db/boinc_db.cpp | 43 ++++++++++++++++++++++++++++++++++++++ db/boinc_db.h | 1 + tools/backend_lib.cpp | 7 +++++-- tools/backend_lib.h | 3 ++- tools/create_work.cpp | 48 +++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 97 insertions(+), 5 deletions(-) diff --git a/db/boinc_db.cpp b/db/boinc_db.cpp index ff653e5db0..8c78338abd 100644 --- a/db/boinc_db.cpp +++ b/db/boinc_db.cpp @@ -884,6 +884,49 @@ void DB_WORKUNIT::db_print(char* buf){ ); } +void DB_WORKUNIT::db_print_values(char* buf) { + sprintf(buf, + "(0, %d, %d, " + "'%s', '%s', %d, " + "%f, %f, " + "%f, %f, " + "%d, " + "%u, %f, " + "%d, %d, " + "%d, %d, %d, " + "%d, %f, " + "%d, %d, %d, " + "%d, %d, " + "'%s', NOW(), " + "%d, " + "%f, " + "%d, " + "%d, " + "%d, " + "%d)", + create_time, appid, + name, xml_doc, batch, + rsc_fpops_est, rsc_fpops_bound, rsc_memory_bound, rsc_disk_bound, + need_validate, + canonical_resultid, canonical_credit, + transition_time, delay_bound, + error_mask, file_delete_state, assimilate_state, + hr_class, opaque, + min_quorum, + target_nresults, + max_error_results, + max_total_results, + max_success_results, + result_template_file, + priority, + rsc_bandwidth_bound, + fileset_id, + app_version_id, + transitioner_flags, + size_class + ); +} + void DB_WORKUNIT::db_parse(MYSQL_ROW &r) { int i=0; clear(); diff --git a/db/boinc_db.h b/db/boinc_db.h index 4c6555903d..bb9ba61868 100644 --- a/db/boinc_db.h +++ b/db/boinc_db.h @@ -194,6 +194,7 @@ public: DB_WORKUNIT(DB_CONN* p=0); int get_id(); void db_print(char*); + void db_print_values(char*); void db_parse(MYSQL_ROW &row); void operator=(WORKUNIT& w) {WORKUNIT::operator=(w);} }; diff --git a/tools/backend_lib.cpp b/tools/backend_lib.cpp index 69c4b47f91..97fdb4d353 100644 --- a/tools/backend_lib.cpp +++ b/tools/backend_lib.cpp @@ -231,7 +231,8 @@ int create_work( int ninfiles, SCHED_CONFIG& config_loc, const char* command_line, - const char* additional_xml + const char* additional_xml, + char* query_string ) { int retval; char _result_template[BLOB_SIZE]; @@ -321,7 +322,9 @@ int create_work( } else { wu.transition_time = time(0); } - if (wu.id) { + if (query_string) { + wu.db_print_values(query_string); + } else if (wu.id) { retval = wu.update(); if (retval) { fprintf(stderr, diff --git a/tools/backend_lib.h b/tools/backend_lib.h index e90f156ba3..97df682a00 100644 --- a/tools/backend_lib.h +++ b/tools/backend_lib.h @@ -68,7 +68,8 @@ extern int create_work( int ninfiles, SCHED_CONFIG&, const char* command_line = NULL, - const char* additional_xml = NULL + const char* additional_xml = NULL, + char* query_string = 0 ); extern int stage_file(const char*, bool); diff --git a/tools/create_work.cpp b/tools/create_work.cpp index 518677c126..407ee0a976 100644 --- a/tools/create_work.cpp +++ b/tools/create_work.cpp @@ -39,6 +39,8 @@ #include "backend_lib.h" +using std::string; + void usage() { fprintf(stderr, "usage: create_work [options] infile1 infile2 ...\n" @@ -292,6 +294,7 @@ int main(int argc, char** argv) { ); exit(1); } + boinc_db.set_isolation_level(READ_UNCOMMITTED); sprintf(buf, "where name='%s'", app.name); retval = app.lookup(buf); if (retval) { @@ -314,8 +317,10 @@ int main(int argc, char** argv) { strcpy(jd.result_template_path, "./"); strcat(jd.result_template_path, jd.result_template_file); if (use_stdin) { + string values; + DB_WORKUNIT wu; int _argc; - char* _argv[100]; + char* _argv[100], value_buf[MAX_QUERY_LEN]; for (int j=0; ; j++) { char* p = fgets(buf, sizeof(buf), stdin); if (p == NULL) break; @@ -326,7 +331,46 @@ int main(int argc, char** argv) { if (!strlen(jd2.wu.name)) { sprintf(jd2.wu.name, "%s_%d", jd.wu.name, j); } - jd2.create(); + create_work( + jd2.wu, + jd2.wu_template, + jd2.result_template_file, + jd2.result_template_path, + const_cast(jd2.infiles), + jd2.ninfiles, + config, + jd2.command_line, + jd2.additional_xml, + value_buf + ); + if (values.size()) { + values += ","; + values += value_buf; + } else { + values = value_buf; + } + // MySQL can handles queries at least 1 MB + // + int n = strlen(value_buf); + if (values.size() + 2*n > 1000000) { + retval = wu.insert_batch(values); + if (retval) { + fprintf(stderr, + "wu.insert_batch() failed: %d\n", retval + ); + exit(1); + } + values.clear(); + } + } + if (values.size()) { + retval = wu.insert_batch(values); + if (retval) { + fprintf(stderr, + "wu.insert_batch() failed: %d\n", retval + ); + exit(1); + } } } else { jd.create();