remote job submission: add notion of "expire time" for batches (for Condor)

- Batches now have optional "expire time".
  If this time passes and the batch is not retired, abort and retire it.
- Add script "expire_batches" which enforces the above.
  Run it as a periodic task.
- Add a web RPC for setting the expire time of a batch
  (it can be changed multiple times)
- Add a C++ interface for this RPC
- Add a BOINC_SET_LEASE command to the BOINC GAHP
  ("lease" is Condor term for expire time)
This commit is contained in:
David Anderson 2013-09-17 13:35:55 -07:00
parent f4cfc8a624
commit 2a2c9c4ad8
12 changed files with 151 additions and 8 deletions

View File

@ -633,6 +633,9 @@ struct BATCH {
// project-assigned
char description[256];
// project-assigned
double expire_time;
// if nonzero, retire the batch after this time
// Condor calls this the batch's "lease".
};
// values of batch.state

View File

@ -290,7 +290,6 @@ create table result (
primary key (id)
) engine=InnoDB;
-- see boinc_db.h for doc
create table batch (
id serial primary key,
user_id integer not null,
@ -309,7 +308,8 @@ create table batch (
name varchar(255) not null,
app_id integer not null,
project_state integer not null,
description varchar(255) not null
description varchar(255) not null,
expire_time double not null
) engine = InnoDB;
-- permissions for job submission

View File

@ -52,6 +52,22 @@ define('VALIDATE_STATE_NO_CHECK', 3);
define('VALIDATE_STATE_INCONCLUSIVE',4);
define('VALIDATE_STATE_TOO_LATE', 5);
define('WU_ERROR_COULDNT_SEND_RESULT', 1);
define('WU_ERROR_TOO_MANY_ERROR_RESULTS', 2);
define('WU_ERROR_TOO_MANY_SUCCESS_RESULTS', 4);
define('WU_ERROR_TOO_MANY_TOTAL_RESULTS', 8);
define('WU_ERROR_CANCELLED', 16);
define('WU_ERROR_NO_CANONICAL_RESULT', 32);
define('FILE_DELETE_INIT', 0);
define('FILE_DELETE_READY', 1);
define('FILE_DELETE_DONE', 2);
define('FILE_DELETE_ERROR', 3);
define('ASSIMILATE_INIT', 0);
define('ASSIMILATE_READY', 1);
define('ASSIMILATE_DONE', 2);
// from lib/common_defs.h
//
define('RESULT_NEW', 0);

View File

@ -137,6 +137,12 @@ class BoincBatchFileAssoc {
"job_file_id=$this->job_file_id and batch_id=$this->batch_id"
);
}
static function delete_batch($batch_id) {
$db = BoincDb::get();
$db->delete_aux('batch_file_assoc',
"batch_id=$batch_id"
);
}
}
?>

View File

@ -123,15 +123,25 @@ function abort_batch($batch) {
return 0;
}
// mark WUs as assimilated; this lets them be purged
//
function retire_batch($batch) {
$wus = BoincWorkunit::enum("batch=$batch->id");
$now = time();
foreach ($wus as $wu) {
$wu->update("assimilate_state=2, transition_time=$now");
$wu->update(
"assimilate_state=".ASSIMILATE_DONE.", transition_time=$now"
);
}
$batch->update("state=".BATCH_STATE_RETIRED);
}
function expire_batch($batch) {
abort_batch($batch);
retire_batch($batch);
$batch->update("state=".BATCH_STATE_EXPIRED);
}
function batch_state_string($state) {
switch ($state) {
case BATCH_STATE_INIT: return "new";

View File

@ -30,8 +30,11 @@ define("TABLE2", "<table width=\"580\">");
function admin_page_head($title) {
$stylesheet = URL_BASE.STYLESHEET;
echo "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">";
echo "<html><head><title>$title</title>
<link rel=stylesheet type=\"text/css\" href=\"".URL_BASE."main.css\" media=\"all\" />";
<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\" />
<link rel=stylesheet type=\"text/css\" href=\"".URL_BASE."main.css\" media=\"all\" />
";
if (defined('ADMIN_STYLESHEET')) {
echo "<link rel=\"stylesheet\" type=\"text/css\" href=\"".URL_BASE.ADMIN_STYLESHEET."\" media=\"all\" />";
}

View File

@ -858,6 +858,10 @@ function update_9_10_2013() {
do_query("alter table result change mod_time mod_time timestamp default current_timestamp on update current_timestamp");
}
function update_9_17_2013() {
do_query("alter table batch add expire_time double not null");
}
// Updates are done automatically if you use "upgrade".
//
// If you need to do updates manually,
@ -893,6 +897,7 @@ $db_updates = array (
array(27001, "update_4_26_2013"),
array(27002, "update_5_23_2013"),
array(27003, "update_9_10_2013"),
array(27004, "update_9_17_2013"),
);
?>

0
html/ops/delete_job_files Normal file → Executable file
View File

View File

@ -255,8 +255,9 @@ function create_batch($r) {
list($user, $user_submit) = authenticate_user($r, $app);
$now = time();
$batch_name = (string)($r->batch->batch_name);
$expire_time = (double)($r->expire_time);
$batch_id = BoincBatch::insert(
"(user_id, create_time, name, app_id, state) values ($user->id, $now, '$batch_name', $app->id, ".BATCH_STATE_INIT.")"
"(user_id, create_time, name, app_id, state, expire_time) values ($user->id, $now, '$batch_name', $app->id, ".BATCH_STATE_INIT.", $expire_time)"
);
if (!$batch_id) {
xml_error(-1, "BOINC server: Can't create batch: ".mysql_error());
@ -270,6 +271,7 @@ function print_batch_params($batch) {
echo "
<id>$batch->id</id>
<create_time>$batch->create_time</create_time>
<expire_time>$batch->expire_time</expire_time>
<est_completion_time>$batch->est_completion_time</est_completion_time>
<njobs>$batch->njobs</njobs>
<fraction_done>$batch->fraction_done</fraction_done>
@ -515,6 +517,20 @@ function handle_retire_batch($r) {
echo "<success>1</success>";
}
function handle_set_expire_time($r) {
list($user, $user_submit) = authenticate_user($r, null);
$batch = get_batch($r);
if ($batch->user_id != $user->id) {
xml_error(-1, "not owner");
}
$expire_time = (double)($r->expire_time);
if ($batch->update("expire_time=$expire_time")) {
echo "<success>1</success>";
} else {
xml_error(-1, "update failed");
}
}
function get_templates($r) {
$app_name = (string)($r->app_name);
if ($app_name) {

View File

@ -226,6 +226,7 @@ int create_batch(
const char* authenticator,
const char* batch_name,
const char* app_name,
double expire_time,
int& batch_id,
string& error_msg
) {
@ -237,11 +238,13 @@ int create_batch(
" <batch>\n"
" <batch_name>%s</batch_name>\n"
" <app_name>%s</app_name>\n"
" <expire_time>%f</expire_time>\n"
" </batch>\n"
"</create_batch>\n",
authenticator,
batch_name,
app_name
app_name,
expire_time
);
sprintf(url, "%ssubmit_rpc_handler.php", project_url);
FILE* reply = tmpfile();
@ -651,6 +654,48 @@ int retire_batch(
return retval;
}
int set_expire_time(
const char* project_url,
const char* authenticator,
const char* batch_name,
double expire_time,
string &error_msg
) {
string request;
char url[1024], buf[256];
request = "<set_expire_time>\n";
sprintf(buf, "<authenticator>%s</authenticator>\n", authenticator);
request += string(buf);
sprintf(buf, "<batch_name>%s</batch_name>\n", batch_name);
request += string(buf);
sprintf(buf, "<expire_time>%f</expire_time>\n", expire_time);
request += "</set_expire_time>\n";
sprintf(url, "%ssubmit_rpc_handler.php", project_url);
FILE* reply = tmpfile();
vector<string> x;
int retval = do_http_post(url, request.c_str(), reply, x);
if (retval) {
fclose(reply);
return retval;
}
retval = -1;
error_msg = "";
fseek(reply, 0, SEEK_SET);
while (fgets(buf, 256, reply)) {
#ifdef SHOW_REPLY
printf("set_expire_time reply: %s", buf);
#endif
if (parse_int(buf, "<error_num>", retval)) continue;
if (parse_str(buf, "<error_msg>", error_msg)) continue;
if (strstr(buf, "success")) {
retval = 0;
continue;
}
}
fclose(reply);
return retval;
}
int ping_server(
const char* project_url,
string &error_msg

View File

@ -127,6 +127,7 @@ extern int create_batch(
const char* authenticator,
const char* batch_name,
const char* app_name,
double expire_time,
int &batch_id,
string& error_msg
);
@ -186,6 +187,14 @@ extern int retire_batch(
string& error_msg
);
extern int set_expire_time(
const char* project_url,
const char* authenticator,
const char* batch_name,
double expire_time,
string& error_msg
);
extern int ping_server(
const char* project_url,
string& error_msg

View File

@ -75,6 +75,7 @@ struct COMMAND {
vector<string> abort_job_names;
vector<string> batch_names;
char batch_name[256];
double lease_end_time;
COMMAND(char* _in) {
in = _in;
@ -90,6 +91,7 @@ struct COMMAND {
int parse_fetch_output(char*);
int parse_abort_jobs(char*);
int parse_retire_batch(char*);
int parse_set_lease(char*);
};
vector<COMMAND*> commands;
@ -239,8 +241,10 @@ void handle_submit(COMMAND& c) {
c.out = strdup(s.c_str());
return;
}
double expire_time = time(0) + 3600;
retval = create_batch(
project_url, authenticator, req.batch_name, req.app_name, req.batch_id, error_msg
project_url, authenticator, req.batch_name, req.app_name, expire_time,
req.batch_id, error_msg
);
if (retval) {
sprintf(buf, "error\\ creating\\ batch:\\ %d\\ ", retval);
@ -529,6 +533,28 @@ void handle_retire_batch(COMMAND& c) {
c.out = strdup(s.c_str());
}
int COMMAND::parse_set_lease(char* p) {
strcpy(batch_name, strtok_r(NULL, " ", &p));
lease_end_time = atof(strtok_r(NULL, " ", &p));
return 0;
}
void handle_set_lease(COMMAND& c) {
string error_msg;
int retval = set_expire_time(
project_url, authenticator, c.batch_name, c.lease_end_time, error_msg
);
string s;
char buf[256];
if (retval) {
sprintf(buf, "set_lease()\\ returned\\ %d\\ ", retval);
s = string(buf) + escape_str(error_msg);
} else {
s = "NULL";
}
c.out = strdup(s.c_str());
}
void handle_ping(COMMAND& c) {
string error_msg, s;
char buf[256];
@ -554,6 +580,8 @@ void* handle_command_aux(void* q) {
handle_abort_jobs(c);
} else if (!strcasecmp(c.cmd, "BOINC_RETIRE_BATCH")) {
handle_retire_batch(c);
} else if (!strcasecmp(c.cmd, "BOINC_SET_LEASE")) {
handle_set_lease(c);
} else if (!strcasecmp(c.cmd, "BOINC_PING")) {
handle_ping(c);
} else {
@ -589,6 +617,8 @@ int COMMAND::parse_command() {
retval = parse_abort_jobs(p);
} else if (!strcasecmp(cmd, "BOINC_RETIRE_BATCH")) {
retval = parse_retire_batch(p);
} else if (!strcasecmp(cmd, "BOINC_SET_LEASE")) {
retval = parse_set_lease(p);
} else if (!strcasecmp(cmd, "BOINC_PING")) {
retval = 0;
} else {
@ -624,7 +654,7 @@ int handle_command(char* p) {
if (!strcasecmp(cmd, "VERSION")) {
print_version(false);
} else if (!strcasecmp(cmd, "COMMANDS")) {
BPRINTF("S ASYNC_MODE_OFF ASYNC_MODE_ON BOINC_ABORT_JOBS BOINC_FETCH_OUTPUT BOINC_PING BOINC_QUERY_BATCHES BOINC_RETIRE_BATCH BOINC_SELECT_PROJECT BOINC_SUBMIT COMMANDS QUIT RESULTS VERSION\n");
BPRINTF("S ASYNC_MODE_OFF ASYNC_MODE_ON BOINC_ABORT_JOBS BOINC_FETCH_OUTPUT BOINC_PING BOINC_QUERY_BATCHES BOINC_RETIRE_BATCH BOINC_SELECT_PROJECT BOINC_SET_LEASE BOINC_SUBMIT COMMANDS QUIT RESULTS VERSION\n");
} else if (!strcasecmp(cmd, "RESPONSE_PREFIX")) {
flockfile(stdout);
BPRINTF("S\n");