web-based remote job submission: improve interface for getting output files

Problem: the old interface for getting output files (get_output.php)
downloaded files with their physical name,
which is a cryptic string with no extension.
If the output file is, for example, a .zip, you don't see this.
You also don't see the logical name of the file.

Solution: give downloaded files better names.
For example, if a result has a single output file
with logical name 'output.zip',
download it as resultname__output.zip
Similar for multiple output files per result,
for workunits, for single files from a result, and for batches.

Fixes #5262

This can be used as a web API, or directly from web pages (submit.php)
This replaces the old interface,
but I'm leaving it there in case anyone is using it.

Add utility functions for
- getting the full path of an output file
- getting the logical names of a result's output files
This commit is contained in:
David Anderson 2023-06-07 14:59:09 -07:00
parent c520ed6c60
commit 3c51640b94
5 changed files with 221 additions and 13 deletions

2
gl
View File

@ -1,6 +1,6 @@
// This file is part of BOINC.
// http://boinc.berkeley.edu
// Copyright (C) 2021 University of California
// Copyright (C) 2023 University of California
//
// BOINC is free software; you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License

View File

@ -36,6 +36,16 @@ function dir_hier_path($filename, $root, $fanout) {
return "$dirpath/$filename";
}
function upload_path($filename) {
static $upload_dir=null;
if (!$upload_dir) {
$config = get_config();
$upload_dir = parse_config($config, '<upload_dir>');
$uldl_dir_fanout = parse_config($config, '<uldl_dir_fanout>');
}
return dir_hier_path($filename, $upload_dir, $uldl_dir_fanout);
}
function dir_hier_url($filename, $base, $fanout) {
$dir = filename_hash($filename, $fanout);
return "$base/$dir/$filename";

View File

@ -149,7 +149,7 @@ function get_batch_params($batch, $wus) {
}
function get_outfile_names($result) {
$names = array();
$names = [];
$xml = "<a>".$result->xml_doc_out."</a>";
$r = simplexml_load_string($xml);
if (!$r) return $names;
@ -159,6 +159,17 @@ function get_outfile_names($result) {
return $names;
}
function get_outfile_log_names($result) {
$names = [];
$xml = "<a>".$result->xml_doc_in."</a>";
$r = simplexml_load_string($xml);
if (!$r) return $names;
foreach ($r->result->file_ref as $fr) {
$names[] = (string)($fr->open_name);
}
return $names;
}
function get_outfile_paths($result) {
$fanout = parse_config(get_config(), "<uldl_dir_fanout>");
$upload_dir = parse_config(get_config(), "<upload_dir>");

182
html/user/get_output2.php Normal file
View File

@ -0,0 +1,182 @@
<?php
// This file is part of BOINC.
// http://boinc.berkeley.edu
// Copyright (C) 2023 University of California
//
// BOINC is free software; you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License
// as published by the Free Software Foundation,
// either version 3 of the License, or (at your option) any later version.
//
// BOINC is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
// web API for fetching output files.
// This is an updated version of get_output.php;
// I didn't want to change that on the (unlikely) chance
// that someone is using it.
// args:
// cmd: batch, workunit, or result
// auth: user authenticator
// batch_id, wu_id, result_id: as needed
//
// action (see https://github.com/BOINC/boinc/issues/5262):
// result: if 1 output file, return it as resultname__logicalname
// (that way you know what it is, and it has the right extension)
// else return a zip file as resultname.zip,
// containing the output files with their logical names
// workunit:
// as above, for canonical instance
// batch:
// as above for each workunit; return as batch_(batchid).zip
//
// In the result and workunit cases, they must be part of a batch
// (so that we know who the submitter is)
require_once("../inc/boinc_db.inc");
require_once("../inc/util.inc");
require_once("../inc/dir_hier.inc");
require_once("../inc/submit_util.inc");
function check_auth($auth, $batch) {
$user = BoincUser::lookup_id($batch->user_id);
if (!$user) die("no batch owner for $batch->id");
if ($user->authenticator != $auth) die('bad auth');
}
function do_result_aux($result, $batch, $file_num=null) {
$phys_names = get_outfile_names($result);
$log_names = get_outfile_log_names($result);
if ($file_num !== null) {
$path = upload_path($phys_names[$file_num]);
do_download($path,
sprintf("%s__%s", $result->name, $log_names[$file_num])
);
}
if (count($phys_names) == 1) {
$path = upload_path($phys_names[0]);
do_download($path,
sprintf("%s__%s", $result->name, $log_names[0])
);
} else {
// make a temp dir in submit/batchid;
// make symlinks there, and zip it
$dir_path = "submit/$batch->id/$result->name";
system("rm -r $dir_path");
mkdir($dir_path);
for ($i=0; $i<count($phys_names); $i++) {
$cmd = sprintf('ln -s %s %s/%s',
upload_path($phys_names[$i]),
$dir_path,
$log_names[$i]
);
system($cmd);
}
$cmd = sprintf('cd submit/%d; zip %s',
$batch->id,
$result->name
);
system($cmd);
do_download("$dir_path.zip");
}
}
function do_result($result_id, $auth, $file_num=null) {
$result = BoincResult::lookup_id($result_id);
if (!$result) die("no result $result_id");
$workunit = BoincWorkunit::lookup_id($result->workunitid);
if (!$workunit) die("no workunit for $result_id");
$batch = BoincBatch::lookup_id($workunit->batch);
if (!$batch) die("no batch for $result_id");
check_auth($auth, $batch);
do_result_aux($result, $batch);
}
function do_wu($wu_id, $auth) {
$workunit = BoincWorkunit::lookup_id($wu_id);
if (!$workunit) die("no workunit for $result_id");
$batch = BoincBatch::lookup_id($workunit->batch);
if (!$batch) die("no batch for $result_id");
$result = BoincResult::lookup_id($workunit->canonical_resultid);
do_result_aux($result, $batch);
}
// make a temp dir in submit/batchid
// for each workunit in batch,
// put symlinks to its output file (or a dir of its output files) there.
// send the zipped temp dir.
//
function do_batch($batch_id, $auth) {
$batch = BoincBatch::lookup_id($batch_id);
if (!$batch) die("no batch $batch_id");
$dir_path = sprintf('submit/%d/batch_%d', $batch_id, $batch_id);
system("rm -r $dir_path");
mkdir($dir_path);
$wus = BoincWorkunit::enum("batch=$batch_id and canonical_resultid<>0");
foreach ($wus as $wu) {
$result = BoincResult::lookup_id($wu->canonical_resultid);
$phys_names = get_outfile_names($result);
$log_names = get_outfile_log_names($result);
if (count($phys_names) == 1) {
$cmd = sprintf('ln -s %s %s/%s__%s',
upload_path($phys_names[0]),
$dir_path,
$result->name,
$log_names[0]
);
system($cmd);
} else {
mkdir(sprintf('%s/%s', $dir_path, $result->name));
for ($i=0; $i<count($phys_names); $i++) {
$cmd = sprintf('ln -s %s %s/%s/%s',
upload_path($phys_names[$i]),
$dir_path,
$result->name,
$log_names[$i]
);
system($cmd);
}
}
}
$cmd = sprintf('cd submit/%d/batch_%d; zip -q -r ../batch_%d.zip *',
$batch_id,
$batch_id,
$batch_id
);
system($cmd);
do_download(sprintf('submit/%d/batch_%d.zip', $batch_id, $batch_id));
// todo: clean up
}
$cmd = get_str('cmd');
$user = get_logged_in_user(false);
if ($user) {
$auth = $user->authenticator;
} else {
$auth = get_str('auth');
}
switch ($cmd) {
case 'result':
do_result(get_int('result_id'), $auth);
break;
case 'workunit':
do_wu(get_int('wu_id'), $auth);
break;
case 'batch':
do_batch(get_int('batch_id'), $auth);
break;
case 'file':
do_result(get_int('result_id'), $auth, get_int('file_num'));
break;
default:
die("bad cmd\n");
}
?>

View File

@ -244,7 +244,7 @@ function handle_main($user) {
echo "<li>$app->user_friendly_name<br>
<a href=submit.php?action=admin&app_id=$app->id>Batches</a>
&middot;
<a href=manage_app.php?app_id=$app->id&action=app_version_form>Versions</a>
<a href=manage_app.php?app_id=$app->id>Manage</a>
";
}
} else {
@ -381,7 +381,7 @@ function handle_query_batch($user) {
row2("GFLOP/hours, actual", number_format(credit_to_gflop_hours($batch->credit_canonical), 2));
row2("Output File Size", size_string(batch_output_file_size($batch->id)));
end_table();
$url = boinc_get_output_files_url($user, $batch_id);
$url = "get_output2.php?cmd=batch&batch_id=$batch->id";
show_button($url, "Get zipped output files");
switch ($batch->state) {
case BATCH_STATE_IN_PROGRESS:
@ -414,11 +414,10 @@ function handle_query_batch($user) {
);
foreach($wus as $wu) {
$resultid = $wu->canonical_resultid;
$durl = boinc_get_wu_output_files_url($user,$wu->id);
if ($resultid) {
$x = "<a href=result.php?resultid=$resultid>$resultid</a>";
$y = '<font color="green">completed</font>';
$text = "<a href=$durl> Download Result Files</a>";
$text = "<a href=get_output2.php?cmd=workunit&wu_id=$wu->id>Download output files</a>";
} else {
$x = "---";
$text = "---";
@ -497,16 +496,22 @@ function handle_query_job($user) {
";
$i = 0;
if ($result->server_state == 5) {
$names = get_outfile_names($result);
$i = 0;
foreach ($names as $name) {
$url = boinc_get_output_file_url($user, $result, $i++);
$path = dir_hier_path($name, $upload_dir, $fanout);
$phys_names = get_outfile_names($result);
$log_names = get_outfile_log_names($result);
for ($i=0; $i<count($phys_names); $i++) {
$url = sprintf(
'get_output2.php?cmd=result&result_id=%d&file_num=%d',
$result->id, $i
);
$path = dir_hier_path($phys_names[$i], $upload_dir, $fanout);
$s = stat($path);
$size = $s['size'];
echo "<a href=$url>$name </a> (".number_format($size)." bytes)<br/>";
echo sprintf('<a href=%s>%s</a> (%s bytes)<br/>',
$url,
$log_names[$i],
number_format($size)
);
}
$i++;
}
echo "</td></tr>\n";
}