create_work: increase the efficiency of bulk job creation

The job submission RPC handler (PHP) originally ran the
create_work program once per job.
This took about 1.5 minutes to create 1000 jobs.
Recently I changed this so that create_work only is run once;
it does one SQL insert per job.
Disappointingly, this was only slightly faster: 1 min per 1000 jobs.

This commit changes create_work to create multiple jobs per SQL insert
(as many as will fit in a 1 MB query, which is the default limit).
This speeds things up by a factor of 100: 1000 jobs in 0.5 sec.
This commit is contained in:
David Anderson 2014-04-10 23:53:19 -07:00
parent bb4f4194d0
commit fec574f4e8
5 changed files with 97 additions and 5 deletions

View File

@ -884,6 +884,49 @@ void DB_WORKUNIT::db_print(char* buf){
);
}
void DB_WORKUNIT::db_print_values(char* buf) {
sprintf(buf,
"(0, %d, %d, "
"'%s', '%s', %d, "
"%f, %f, "
"%f, %f, "
"%d, "
"%u, %f, "
"%d, %d, "
"%d, %d, %d, "
"%d, %f, "
"%d, %d, %d, "
"%d, %d, "
"'%s', NOW(), "
"%d, "
"%f, "
"%d, "
"%d, "
"%d, "
"%d)",
create_time, appid,
name, xml_doc, batch,
rsc_fpops_est, rsc_fpops_bound, rsc_memory_bound, rsc_disk_bound,
need_validate,
canonical_resultid, canonical_credit,
transition_time, delay_bound,
error_mask, file_delete_state, assimilate_state,
hr_class, opaque,
min_quorum,
target_nresults,
max_error_results,
max_total_results,
max_success_results,
result_template_file,
priority,
rsc_bandwidth_bound,
fileset_id,
app_version_id,
transitioner_flags,
size_class
);
}
void DB_WORKUNIT::db_parse(MYSQL_ROW &r) {
int i=0;
clear();

View File

@ -194,6 +194,7 @@ public:
DB_WORKUNIT(DB_CONN* p=0);
int get_id();
void db_print(char*);
void db_print_values(char*);
void db_parse(MYSQL_ROW &row);
void operator=(WORKUNIT& w) {WORKUNIT::operator=(w);}
};

View File

@ -231,7 +231,8 @@ int create_work(
int ninfiles,
SCHED_CONFIG& config_loc,
const char* command_line,
const char* additional_xml
const char* additional_xml,
char* query_string
) {
int retval;
char _result_template[BLOB_SIZE];
@ -321,7 +322,9 @@ int create_work(
} else {
wu.transition_time = time(0);
}
if (wu.id) {
if (query_string) {
wu.db_print_values(query_string);
} else if (wu.id) {
retval = wu.update();
if (retval) {
fprintf(stderr,

View File

@ -68,7 +68,8 @@ extern int create_work(
int ninfiles,
SCHED_CONFIG&,
const char* command_line = NULL,
const char* additional_xml = NULL
const char* additional_xml = NULL,
char* query_string = 0
);
extern int stage_file(const char*, bool);

View File

@ -39,6 +39,8 @@
#include "backend_lib.h"
using std::string;
void usage() {
fprintf(stderr,
"usage: create_work [options] infile1 infile2 ...\n"
@ -292,6 +294,7 @@ int main(int argc, char** argv) {
);
exit(1);
}
boinc_db.set_isolation_level(READ_UNCOMMITTED);
sprintf(buf, "where name='%s'", app.name);
retval = app.lookup(buf);
if (retval) {
@ -314,8 +317,10 @@ int main(int argc, char** argv) {
strcpy(jd.result_template_path, "./");
strcat(jd.result_template_path, jd.result_template_file);
if (use_stdin) {
string values;
DB_WORKUNIT wu;
int _argc;
char* _argv[100];
char* _argv[100], value_buf[MAX_QUERY_LEN];
for (int j=0; ; j++) {
char* p = fgets(buf, sizeof(buf), stdin);
if (p == NULL) break;
@ -326,7 +331,46 @@ int main(int argc, char** argv) {
if (!strlen(jd2.wu.name)) {
sprintf(jd2.wu.name, "%s_%d", jd.wu.name, j);
}
jd2.create();
create_work(
jd2.wu,
jd2.wu_template,
jd2.result_template_file,
jd2.result_template_path,
const_cast<const char **>(jd2.infiles),
jd2.ninfiles,
config,
jd2.command_line,
jd2.additional_xml,
value_buf
);
if (values.size()) {
values += ",";
values += value_buf;
} else {
values = value_buf;
}
// MySQL can handles queries at least 1 MB
//
int n = strlen(value_buf);
if (values.size() + 2*n > 1000000) {
retval = wu.insert_batch(values);
if (retval) {
fprintf(stderr,
"wu.insert_batch() failed: %d\n", retval
);
exit(1);
}
values.clear();
}
}
if (values.size()) {
retval = wu.insert_batch(values);
if (retval) {
fprintf(stderr,
"wu.insert_batch() failed: %d\n", retval
);
exit(1);
}
}
} else {
jd.create();