- remote file management stuff for Condor

This commit is contained in:
David Anderson 2013-01-25 11:30:50 -08:00 committed by Oliver Bock
parent a46a5926ae
commit 2a73dc0e01
3 changed files with 206 additions and 2 deletions

View File

@ -131,4 +131,4 @@ alter table assignment
add index asgn_target(target_type, target_id);
alter table job_file
add index md5(md5);
add unique jf_md5(md5);

196
html/user/job_file.php Normal file
View File

@ -0,0 +1,196 @@
<?php
// This file is part of BOINC.
// http://boinc.berkeley.edu
// Copyright (C) 2013 University of California
//
// BOINC is free software; you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License
// as published by the Free Software Foundation,
// either version 3 of the License, or (at your option) any later version.
//
// BOINC is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
// Web RPCs for managing job input files on the server.
//
// Issues:
//
// 1) how are files named?
// Their name is a function of their MD5.
// This eliminates issues related to file immutability
//
// 2) how do we keep track of the files?
// In the MySQL database, in a table called job_files.
// Each row describes a file currently on the server.
// In addition, we maintain a table batch_file_assoc to record
// that a file is used by a particular batch.
// (Note: the association could be at the job level instead.
// but this way is more efficient if all the jobs in a batch use
// a particular file.)
//
// 3) how do we clean up unused files?
// A daemon (job_file_deleter) deletes files for which
// - the delete date (if given) is in the past, and
// - there are no associations to active batches
//
// 4) what are the RPC operations?
// query_files
// in:
// authenticator
// list of MD5s
// batch ID (optional)
// new delete time (optional)
// out:
// error message,
// or list of files (indices in the MD5 list) not present on server
// action: for each MD5 in in the input list:
// if present on server
// update delete time
// create batch/file association
// add MD5 to output list
// upload_files
// in:
// authenticator
// delete time (optional)
// batch ID (optional)
// list of MD5s
// files (as multipart attachments)
// out:
// error message, or success
// action:
// for each file in list
// move to project download dir w/ appropriate name
// create job_files record
// create batch_file_assoc record if needed
error_reporting(E_ALL);
ini_set('display_errors', true);
ini_set('display_startup_errors', true);
require_once("../inc/boinc_db.inc");
require_once("../inc/dir_hier.inc");
require_once("../inc/xml.inc");
require_once("../inc/submit_util.inc");
// the physical name of a file is jf_(md5).
// Prepend the jf_ to make the source of the file clear
//
function job_file_name($md5) {
return "jf_$md5";
}
function query_files($r) {
list($user, $user_submit) = authenticate_user($r, null);
$absent_files = array();
$now = time();
$delete_time = (int)$r->delete_time;
$batch_id = (int)$r->delete_time;
$fanout = parse_config(get_config(), "<uldl_dir_fanout>");
$i = 0;
foreach($r->md5 as $f) {
$md5 = (string)$f;
echo "processing $md5\n";
$fname = job_file_name($md5);
$path = dir_hier_path($fname, "../../download", $fanout);
// if the job_file record is there,
// update the delete time first to avoid race condition
// with job file deleter
//
$job_file = BoincJobFile::lookup_md5($md5);
if ($job_file && $job_file->delete_time < $delete_time) {
$retval = $job_file::update("delete_time=$delete_time");
if ($retval) {
xml_error(-1, "job_file::update() failed");
}
}
if (file_exists($path)) {
// create the DB record if needed
//
if (!$job_file) {
BoincJobFile::insert(
"(md5, create_time, delete_time) values ('$md5', $now, $delete_time)"
);
}
} else {
if ($job_file) {
$job_file->delete();
}
$absent_files[] = $i;
}
$i++;
}
echo "<absent_files>\n";
foreach ($absent_files as $i) {
echo "<file>$i</file>\n";
}
echo "</absent_files>\n";
}
// upload_files
// in: list of MD5s, and the files themselves as multipart attachment
// out: error code
//
function upload_files($r) {
list($user, $user_submit) = authenticate_user($r, null);
$fanout = parse_config(get_config(), "<uldl_dir_fanout>");
$delete_time = (int)$r->delete_time;
print_r($_FILES);
$i = 0;
foreach ($r->md5 as $f) {
$md5 = (string)$f;
$name = "file_$i";
$tmp_name = $_FILES[$name]['tmp_name'];
if (!is_uploaded_file($tmp_name)) {
xml_error(-1, "$tmp_name is not an uploaded file");
}
$fname = job_file_name($md5);
$path = dir_hier_path($fname, "../../download", $fanout);
rename($tmp_name, $path);
$now = time();
$id = BoincJobFile::insert(
"(md5, create_time, delete_time) values ('$md5', $now, $delete_time)"
);
if (!$id) {
xml_error(-1, "BoincJobFile::insert() failed");
}
$i++;
}
echo "<success/>\n";
}
if (0) {
$r = simplexml_load_string("<query_files>\n<batch_id>0</batch_id>\n <md5>80bf244b43fb5d39541ea7011883b7e0</md5>\n <md5>a6037b05afb05f36e6a85a7c5138cbc1</md5>\n</query_files>\n ");
submit_batch($r);
exit;
}
if (0) {
$r = simplexml_load_string("<upload_files>\n<authenticator>157f96a018b0b2f2b466e2ce3c7f54db</authenticator>\n<batch_id>1</batch_id>\n<md5>80bf244b43fb5d39541ea7011883b7e0</md5>\n<md5>a6037b05afb05f36e6a85a7c5138cbc1</md5>\n</upload_files>");
upload_files($r);
exit;
}
xml_header();
$r = simplexml_load_string($_POST['request']);
if (!$r) {
xml_error(-1, "can't parse request message");
}
switch($r->getName()) {
case 'query_files':
query_files($r);
break;
case 'upload_files':
echo "foo\n";
upload_files($r);
break;
default:
xml_error(-1, "no such action");
}
?>

View File

@ -138,11 +138,17 @@ int upload_files (
) {
char buf[1024];
string req_msg = "<upload_files>\n";
sprintf(buf, "<authenticator>%s</authenticator>\n", authenticator);
req_msg += string(buf);
if (batch_id) {
sprintf(buf, "<batch_id>%d</batch_id>\n", batch_id);
req_msg += string(buf);
}
for (unsigned int i=0; i<md5s.size(); i++) {
sprintf(buf, "<md5>%s</md5>\n", md5s[i].c_str());
req_msg += string(buf);
}
req_msg = "</upload_files>\n";
req_msg += "</upload_files>\n";
FILE* reply = tmpfile();
char url[256];
sprintf(url, "%sjob_file.php", project_url);
@ -151,8 +157,10 @@ int upload_files (
fclose(reply);
return retval;
}
fseek(reply, 0, SEEK_SET);
bool success = false;
while (fgets(buf, 256, reply)) {
printf("reply: %s", buf);
if (strstr(buf, "success")) {
success = true;
break;