diff --git a/db/constraints.sql b/db/constraints.sql index 948de8fe37..e32daffb21 100644 --- a/db/constraints.sql +++ b/db/constraints.sql @@ -131,4 +131,4 @@ alter table assignment add index asgn_target(target_type, target_id); alter table job_file - add index md5(md5); + add unique jf_md5(md5); diff --git a/html/user/job_file.php b/html/user/job_file.php new file mode 100644 index 0000000000..9e01ad5d17 --- /dev/null +++ b/html/user/job_file.php @@ -0,0 +1,196 @@ +. + +// Web RPCs for managing job input files on the server. +// +// Issues: +// +// 1) how are files named? +// Their name is a function of their MD5. +// This eliminates issues related to file immutability +// +// 2) how do we keep track of the files? +// In the MySQL database, in a table called job_files. +// Each row describes a file currently on the server. +// In addition, we maintain a table batch_file_assoc to record +// that a file is used by a particular batch. +// (Note: the association could be at the job level instead. +// but this way is more efficient if all the jobs in a batch use +// a particular file.) +// +// 3) how do we clean up unused files? +// A daemon (job_file_deleter) deletes files for which +// - the delete date (if given) is in the past, and +// - there are no associations to active batches +// +// 4) what are the RPC operations? +// query_files +// in: +// authenticator +// list of MD5s +// batch ID (optional) +// new delete time (optional) +// out: +// error message, +// or list of files (indices in the MD5 list) not present on server +// action: for each MD5 in in the input list: +// if present on server +// update delete time +// create batch/file association +// add MD5 to output list +// upload_files +// in: +// authenticator +// delete time (optional) +// batch ID (optional) +// list of MD5s +// files (as multipart attachments) +// out: +// error message, or success +// action: +// for each file in list +// move to project download dir w/ appropriate name +// create job_files record +// create batch_file_assoc record if needed + +error_reporting(E_ALL); +ini_set('display_errors', true); +ini_set('display_startup_errors', true); + +require_once("../inc/boinc_db.inc"); +require_once("../inc/dir_hier.inc"); +require_once("../inc/xml.inc"); +require_once("../inc/submit_util.inc"); + +// the physical name of a file is jf_(md5). +// Prepend the jf_ to make the source of the file clear +// +function job_file_name($md5) { + return "jf_$md5"; +} + +function query_files($r) { + list($user, $user_submit) = authenticate_user($r, null); + $absent_files = array(); + $now = time(); + $delete_time = (int)$r->delete_time; + $batch_id = (int)$r->delete_time; + $fanout = parse_config(get_config(), ""); + $i = 0; + foreach($r->md5 as $f) { + $md5 = (string)$f; + echo "processing $md5\n"; + $fname = job_file_name($md5); + $path = dir_hier_path($fname, "../../download", $fanout); + + // if the job_file record is there, + // update the delete time first to avoid race condition + // with job file deleter + // + $job_file = BoincJobFile::lookup_md5($md5); + if ($job_file && $job_file->delete_time < $delete_time) { + $retval = $job_file::update("delete_time=$delete_time"); + if ($retval) { + xml_error(-1, "job_file::update() failed"); + } + } + if (file_exists($path)) { + // create the DB record if needed + // + if (!$job_file) { + BoincJobFile::insert( + "(md5, create_time, delete_time) values ('$md5', $now, $delete_time)" + ); + } + } else { + if ($job_file) { + $job_file->delete(); + } + $absent_files[] = $i; + } + $i++; + } + echo "\n"; + foreach ($absent_files as $i) { + echo "$i\n"; + } + echo "\n"; +} + +// upload_files +// in: list of MD5s, and the files themselves as multipart attachment +// out: error code +// +function upload_files($r) { + list($user, $user_submit) = authenticate_user($r, null); + $fanout = parse_config(get_config(), ""); + $delete_time = (int)$r->delete_time; + print_r($_FILES); + $i = 0; + foreach ($r->md5 as $f) { + $md5 = (string)$f; + $name = "file_$i"; + $tmp_name = $_FILES[$name]['tmp_name']; + if (!is_uploaded_file($tmp_name)) { + xml_error(-1, "$tmp_name is not an uploaded file"); + } + $fname = job_file_name($md5); + $path = dir_hier_path($fname, "../../download", $fanout); + rename($tmp_name, $path); + $now = time(); + $id = BoincJobFile::insert( + "(md5, create_time, delete_time) values ('$md5', $now, $delete_time)" + ); + if (!$id) { + xml_error(-1, "BoincJobFile::insert() failed"); + } + $i++; + } + echo "\n"; +} + +if (0) { +$r = simplexml_load_string("\n0\n 80bf244b43fb5d39541ea7011883b7e0\n a6037b05afb05f36e6a85a7c5138cbc1\n\n "); +submit_batch($r); +exit; +} +if (0) { + $r = simplexml_load_string("\n157f96a018b0b2f2b466e2ce3c7f54db\n1\n80bf244b43fb5d39541ea7011883b7e0\na6037b05afb05f36e6a85a7c5138cbc1\n"); + upload_files($r); + exit; +} + +xml_header(); +$r = simplexml_load_string($_POST['request']); +if (!$r) { + xml_error(-1, "can't parse request message"); +} + +switch($r->getName()) { +case 'query_files': + query_files($r); + break; +case 'upload_files': + echo "foo\n"; + upload_files($r); + break; +default: + xml_error(-1, "no such action"); +} + +?> diff --git a/samples/condor/curl.cpp b/samples/condor/curl.cpp index 1725afcaeb..a0635cce1f 100644 --- a/samples/condor/curl.cpp +++ b/samples/condor/curl.cpp @@ -138,11 +138,17 @@ int upload_files ( ) { char buf[1024]; string req_msg = "\n"; + sprintf(buf, "%s\n", authenticator); + req_msg += string(buf); + if (batch_id) { + sprintf(buf, "%d\n", batch_id); + req_msg += string(buf); + } for (unsigned int i=0; i%s\n", md5s[i].c_str()); req_msg += string(buf); } - req_msg = "\n"; + req_msg += "\n"; FILE* reply = tmpfile(); char url[256]; sprintf(url, "%sjob_file.php", project_url); @@ -151,8 +157,10 @@ int upload_files ( fclose(reply); return retval; } + fseek(reply, 0, SEEK_SET); bool success = false; while (fgets(buf, 256, reply)) { + printf("reply: %s", buf); if (strstr(buf, "success")) { success = true; break;