From 2a2c9c4ad8fbd8cabbb41e845e942b4c0570ee67 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Tue, 17 Sep 2013 13:35:55 -0700 Subject: [PATCH] remote job submission: add notion of "expire time" for batches (for Condor) - Batches now have optional "expire time". If this time passes and the batch is not retired, abort and retire it. - Add script "expire_batches" which enforces the above. Run it as a periodic task. - Add a web RPC for setting the expire time of a batch (it can be changed multiple times) - Add a C++ interface for this RPC - Add a BOINC_SET_LEASE command to the BOINC GAHP ("lease" is Condor term for expire time) --- db/boinc_db_types.h | 3 ++ db/schema.sql | 4 +-- html/inc/common_defs.inc | 16 +++++++++++ html/inc/submit_db.inc | 6 ++++ html/inc/submit_util.inc | 12 +++++++- html/inc/util_ops.inc | 5 +++- html/ops/db_update.php | 5 ++++ html/ops/delete_job_files | 0 html/user/submit_rpc_handler.php | 18 +++++++++++- lib/remote_submit.cpp | 47 +++++++++++++++++++++++++++++++- lib/remote_submit.h | 9 ++++++ samples/condor/boinc_gahp.cpp | 34 +++++++++++++++++++++-- 12 files changed, 151 insertions(+), 8 deletions(-) mode change 100644 => 100755 html/ops/delete_job_files diff --git a/db/boinc_db_types.h b/db/boinc_db_types.h index 37af35ad52..9ca17825df 100644 --- a/db/boinc_db_types.h +++ b/db/boinc_db_types.h @@ -633,6 +633,9 @@ struct BATCH { // project-assigned char description[256]; // project-assigned + double expire_time; + // if nonzero, retire the batch after this time + // Condor calls this the batch's "lease". }; // values of batch.state diff --git a/db/schema.sql b/db/schema.sql index bcb00bb541..063f47a9bc 100644 --- a/db/schema.sql +++ b/db/schema.sql @@ -290,7 +290,6 @@ create table result ( primary key (id) ) engine=InnoDB; --- see boinc_db.h for doc create table batch ( id serial primary key, user_id integer not null, @@ -309,7 +308,8 @@ create table batch ( name varchar(255) not null, app_id integer not null, project_state integer not null, - description varchar(255) not null + description varchar(255) not null, + expire_time double not null ) engine = InnoDB; -- permissions for job submission diff --git a/html/inc/common_defs.inc b/html/inc/common_defs.inc index e1f366061e..8bdbb1d382 100644 --- a/html/inc/common_defs.inc +++ b/html/inc/common_defs.inc @@ -52,6 +52,22 @@ define('VALIDATE_STATE_NO_CHECK', 3); define('VALIDATE_STATE_INCONCLUSIVE',4); define('VALIDATE_STATE_TOO_LATE', 5); +define('WU_ERROR_COULDNT_SEND_RESULT', 1); +define('WU_ERROR_TOO_MANY_ERROR_RESULTS', 2); +define('WU_ERROR_TOO_MANY_SUCCESS_RESULTS', 4); +define('WU_ERROR_TOO_MANY_TOTAL_RESULTS', 8); +define('WU_ERROR_CANCELLED', 16); +define('WU_ERROR_NO_CANONICAL_RESULT', 32); + +define('FILE_DELETE_INIT', 0); +define('FILE_DELETE_READY', 1); +define('FILE_DELETE_DONE', 2); +define('FILE_DELETE_ERROR', 3); + +define('ASSIMILATE_INIT', 0); +define('ASSIMILATE_READY', 1); +define('ASSIMILATE_DONE', 2); + // from lib/common_defs.h // define('RESULT_NEW', 0); diff --git a/html/inc/submit_db.inc b/html/inc/submit_db.inc index b91ac84e8b..c890a58736 100644 --- a/html/inc/submit_db.inc +++ b/html/inc/submit_db.inc @@ -137,6 +137,12 @@ class BoincBatchFileAssoc { "job_file_id=$this->job_file_id and batch_id=$this->batch_id" ); } + static function delete_batch($batch_id) { + $db = BoincDb::get(); + $db->delete_aux('batch_file_assoc', + "batch_id=$batch_id" + ); + } } ?> diff --git a/html/inc/submit_util.inc b/html/inc/submit_util.inc index 20069f7ead..5baf95e02a 100644 --- a/html/inc/submit_util.inc +++ b/html/inc/submit_util.inc @@ -123,15 +123,25 @@ function abort_batch($batch) { return 0; } +// mark WUs as assimilated; this lets them be purged +// function retire_batch($batch) { $wus = BoincWorkunit::enum("batch=$batch->id"); $now = time(); foreach ($wus as $wu) { - $wu->update("assimilate_state=2, transition_time=$now"); + $wu->update( + "assimilate_state=".ASSIMILATE_DONE.", transition_time=$now" + ); } $batch->update("state=".BATCH_STATE_RETIRED); } +function expire_batch($batch) { + abort_batch($batch); + retire_batch($batch); + $batch->update("state=".BATCH_STATE_EXPIRED); +} + function batch_state_string($state) { switch ($state) { case BATCH_STATE_INIT: return "new"; diff --git a/html/inc/util_ops.inc b/html/inc/util_ops.inc index 811e0507f4..9ae27a6cb1 100644 --- a/html/inc/util_ops.inc +++ b/html/inc/util_ops.inc @@ -30,8 +30,11 @@ define("TABLE2", ""); function admin_page_head($title) { $stylesheet = URL_BASE.STYLESHEET; + echo ""; echo "$title - "; + + + "; if (defined('ADMIN_STYLESHEET')) { echo ""; } diff --git a/html/ops/db_update.php b/html/ops/db_update.php index 38430d6b4e..8aafa6f86d 100644 --- a/html/ops/db_update.php +++ b/html/ops/db_update.php @@ -858,6 +858,10 @@ function update_9_10_2013() { do_query("alter table result change mod_time mod_time timestamp default current_timestamp on update current_timestamp"); } +function update_9_17_2013() { + do_query("alter table batch add expire_time double not null"); +} + // Updates are done automatically if you use "upgrade". // // If you need to do updates manually, @@ -893,6 +897,7 @@ $db_updates = array ( array(27001, "update_4_26_2013"), array(27002, "update_5_23_2013"), array(27003, "update_9_10_2013"), + array(27004, "update_9_17_2013"), ); ?> diff --git a/html/ops/delete_job_files b/html/ops/delete_job_files old mode 100644 new mode 100755 diff --git a/html/user/submit_rpc_handler.php b/html/user/submit_rpc_handler.php index fa55d1a26b..9d0918223d 100644 --- a/html/user/submit_rpc_handler.php +++ b/html/user/submit_rpc_handler.php @@ -255,8 +255,9 @@ function create_batch($r) { list($user, $user_submit) = authenticate_user($r, $app); $now = time(); $batch_name = (string)($r->batch->batch_name); + $expire_time = (double)($r->expire_time); $batch_id = BoincBatch::insert( - "(user_id, create_time, name, app_id, state) values ($user->id, $now, '$batch_name', $app->id, ".BATCH_STATE_INIT.")" + "(user_id, create_time, name, app_id, state, expire_time) values ($user->id, $now, '$batch_name', $app->id, ".BATCH_STATE_INIT.", $expire_time)" ); if (!$batch_id) { xml_error(-1, "BOINC server: Can't create batch: ".mysql_error()); @@ -270,6 +271,7 @@ function print_batch_params($batch) { echo " $batch->id$batch->create_time + $batch->expire_time$batch->est_completion_time$batch->njobs$batch->fraction_done @@ -515,6 +517,20 @@ function handle_retire_batch($r) { echo "1"; } +function handle_set_expire_time($r) { + list($user, $user_submit) = authenticate_user($r, null); + $batch = get_batch($r); + if ($batch->user_id != $user->id) { + xml_error(-1, "not owner"); + } + $expire_time = (double)($r->expire_time); + if ($batch->update("expire_time=$expire_time")) { + echo "1"; + } else { + xml_error(-1, "update failed"); + } +} + function get_templates($r) { $app_name = (string)($r->app_name); if ($app_name) { diff --git a/lib/remote_submit.cpp b/lib/remote_submit.cpp index 2c54a439df..0891e47d0b 100644 --- a/lib/remote_submit.cpp +++ b/lib/remote_submit.cpp @@ -226,6 +226,7 @@ int create_batch( const char* authenticator, const char* batch_name, const char* app_name, + double expire_time, int& batch_id, string& error_msg ) { @@ -237,11 +238,13 @@ int create_batch( " \n" " %s\n" " %s\n" + " %f\n" " \n" "\n", authenticator, batch_name, - app_name + app_name, + expire_time ); sprintf(url, "%ssubmit_rpc_handler.php", project_url); FILE* reply = tmpfile(); @@ -651,6 +654,48 @@ int retire_batch( return retval; } +int set_expire_time( + const char* project_url, + const char* authenticator, + const char* batch_name, + double expire_time, + string &error_msg +) { + string request; + char url[1024], buf[256]; + request = "\n"; + sprintf(buf, "%s\n", authenticator); + request += string(buf); + sprintf(buf, "%s\n", batch_name); + request += string(buf); + sprintf(buf, "%f\n", expire_time); + request += "\n"; + sprintf(url, "%ssubmit_rpc_handler.php", project_url); + FILE* reply = tmpfile(); + vector x; + int retval = do_http_post(url, request.c_str(), reply, x); + if (retval) { + fclose(reply); + return retval; + } + retval = -1; + error_msg = ""; + fseek(reply, 0, SEEK_SET); + while (fgets(buf, 256, reply)) { +#ifdef SHOW_REPLY + printf("set_expire_time reply: %s", buf); +#endif + if (parse_int(buf, "", retval)) continue; + if (parse_str(buf, "", error_msg)) continue; + if (strstr(buf, "success")) { + retval = 0; + continue; + } + } + fclose(reply); + return retval; +} + int ping_server( const char* project_url, string &error_msg diff --git a/lib/remote_submit.h b/lib/remote_submit.h index c2365b10c9..3e3c797d99 100644 --- a/lib/remote_submit.h +++ b/lib/remote_submit.h @@ -127,6 +127,7 @@ extern int create_batch( const char* authenticator, const char* batch_name, const char* app_name, + double expire_time, int &batch_id, string& error_msg ); @@ -186,6 +187,14 @@ extern int retire_batch( string& error_msg ); +extern int set_expire_time( + const char* project_url, + const char* authenticator, + const char* batch_name, + double expire_time, + string& error_msg +); + extern int ping_server( const char* project_url, string& error_msg diff --git a/samples/condor/boinc_gahp.cpp b/samples/condor/boinc_gahp.cpp index f17b8fb8bd..58749bbad8 100644 --- a/samples/condor/boinc_gahp.cpp +++ b/samples/condor/boinc_gahp.cpp @@ -75,6 +75,7 @@ struct COMMAND { vector abort_job_names; vector batch_names; char batch_name[256]; + double lease_end_time; COMMAND(char* _in) { in = _in; @@ -90,6 +91,7 @@ struct COMMAND { int parse_fetch_output(char*); int parse_abort_jobs(char*); int parse_retire_batch(char*); + int parse_set_lease(char*); }; vector commands; @@ -239,8 +241,10 @@ void handle_submit(COMMAND& c) { c.out = strdup(s.c_str()); return; } + double expire_time = time(0) + 3600; retval = create_batch( - project_url, authenticator, req.batch_name, req.app_name, req.batch_id, error_msg + project_url, authenticator, req.batch_name, req.app_name, expire_time, + req.batch_id, error_msg ); if (retval) { sprintf(buf, "error\\ creating\\ batch:\\ %d\\ ", retval); @@ -529,6 +533,28 @@ void handle_retire_batch(COMMAND& c) { c.out = strdup(s.c_str()); } +int COMMAND::parse_set_lease(char* p) { + strcpy(batch_name, strtok_r(NULL, " ", &p)); + lease_end_time = atof(strtok_r(NULL, " ", &p)); + return 0; +} + +void handle_set_lease(COMMAND& c) { + string error_msg; + int retval = set_expire_time( + project_url, authenticator, c.batch_name, c.lease_end_time, error_msg + ); + string s; + char buf[256]; + if (retval) { + sprintf(buf, "set_lease()\\ returned\\ %d\\ ", retval); + s = string(buf) + escape_str(error_msg); + } else { + s = "NULL"; + } + c.out = strdup(s.c_str()); +} + void handle_ping(COMMAND& c) { string error_msg, s; char buf[256]; @@ -554,6 +580,8 @@ void* handle_command_aux(void* q) { handle_abort_jobs(c); } else if (!strcasecmp(c.cmd, "BOINC_RETIRE_BATCH")) { handle_retire_batch(c); + } else if (!strcasecmp(c.cmd, "BOINC_SET_LEASE")) { + handle_set_lease(c); } else if (!strcasecmp(c.cmd, "BOINC_PING")) { handle_ping(c); } else { @@ -589,6 +617,8 @@ int COMMAND::parse_command() { retval = parse_abort_jobs(p); } else if (!strcasecmp(cmd, "BOINC_RETIRE_BATCH")) { retval = parse_retire_batch(p); + } else if (!strcasecmp(cmd, "BOINC_SET_LEASE")) { + retval = parse_set_lease(p); } else if (!strcasecmp(cmd, "BOINC_PING")) { retval = 0; } else { @@ -624,7 +654,7 @@ int handle_command(char* p) { if (!strcasecmp(cmd, "VERSION")) { print_version(false); } else if (!strcasecmp(cmd, "COMMANDS")) { - BPRINTF("S ASYNC_MODE_OFF ASYNC_MODE_ON BOINC_ABORT_JOBS BOINC_FETCH_OUTPUT BOINC_PING BOINC_QUERY_BATCHES BOINC_RETIRE_BATCH BOINC_SELECT_PROJECT BOINC_SUBMIT COMMANDS QUIT RESULTS VERSION\n"); + BPRINTF("S ASYNC_MODE_OFF ASYNC_MODE_ON BOINC_ABORT_JOBS BOINC_FETCH_OUTPUT BOINC_PING BOINC_QUERY_BATCHES BOINC_RETIRE_BATCH BOINC_SELECT_PROJECT BOINC_SET_LEASE BOINC_SUBMIT COMMANDS QUIT RESULTS VERSION\n"); } else if (!strcasecmp(cmd, "RESPONSE_PREFIX")) { flockfile(stdout); BPRINTF("S\n");