From 3c51640b94183d724cc1b605981b5c5031c54617 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Wed, 7 Jun 2023 14:59:09 -0700 Subject: [PATCH] web-based remote job submission: improve interface for getting output files Problem: the old interface for getting output files (get_output.php) downloaded files with their physical name, which is a cryptic string with no extension. If the output file is, for example, a .zip, you don't see this. You also don't see the logical name of the file. Solution: give downloaded files better names. For example, if a result has a single output file with logical name 'output.zip', download it as resultname__output.zip Similar for multiple output files per result, for workunits, for single files from a result, and for batches. Fixes #5262 This can be used as a web API, or directly from web pages (submit.php) This replaces the old interface, but I'm leaving it there in case anyone is using it. Add utility functions for - getting the full path of an output file - getting the logical names of a result's output files --- gl | 2 +- html/inc/dir_hier.inc | 10 +++ html/inc/submit_util.inc | 13 ++- html/user/get_output2.php | 182 ++++++++++++++++++++++++++++++++++++++ html/user/submit.php | 27 +++--- 5 files changed, 221 insertions(+), 13 deletions(-) create mode 100644 html/user/get_output2.php diff --git a/gl b/gl index 506aed9d45..d7cda381c9 100644 --- a/gl +++ b/gl @@ -1,6 +1,6 @@ // This file is part of BOINC. // http://boinc.berkeley.edu -// Copyright (C) 2021 University of California +// Copyright (C) 2023 University of California // // BOINC is free software; you can redistribute it and/or modify it // under the terms of the GNU Lesser General Public License diff --git a/html/inc/dir_hier.inc b/html/inc/dir_hier.inc index 671f8ed952..ea898bab91 100644 --- a/html/inc/dir_hier.inc +++ b/html/inc/dir_hier.inc @@ -36,6 +36,16 @@ function dir_hier_path($filename, $root, $fanout) { return "$dirpath/$filename"; } +function upload_path($filename) { + static $upload_dir=null; + if (!$upload_dir) { + $config = get_config(); + $upload_dir = parse_config($config, ''); + $uldl_dir_fanout = parse_config($config, ''); + } + return dir_hier_path($filename, $upload_dir, $uldl_dir_fanout); +} + function dir_hier_url($filename, $base, $fanout) { $dir = filename_hash($filename, $fanout); return "$base/$dir/$filename"; diff --git a/html/inc/submit_util.inc b/html/inc/submit_util.inc index 281bede8d1..7a49f96964 100644 --- a/html/inc/submit_util.inc +++ b/html/inc/submit_util.inc @@ -149,7 +149,7 @@ function get_batch_params($batch, $wus) { } function get_outfile_names($result) { - $names = array(); + $names = []; $xml = "".$result->xml_doc_out.""; $r = simplexml_load_string($xml); if (!$r) return $names; @@ -159,6 +159,17 @@ function get_outfile_names($result) { return $names; } +function get_outfile_log_names($result) { + $names = []; + $xml = "".$result->xml_doc_in.""; + $r = simplexml_load_string($xml); + if (!$r) return $names; + foreach ($r->result->file_ref as $fr) { + $names[] = (string)($fr->open_name); + } + return $names; +} + function get_outfile_paths($result) { $fanout = parse_config(get_config(), ""); $upload_dir = parse_config(get_config(), ""); diff --git a/html/user/get_output2.php b/html/user/get_output2.php new file mode 100644 index 0000000000..a7f5a1e885 --- /dev/null +++ b/html/user/get_output2.php @@ -0,0 +1,182 @@ +. + +// web API for fetching output files. +// This is an updated version of get_output.php; +// I didn't want to change that on the (unlikely) chance +// that someone is using it. + +// args: +// cmd: batch, workunit, or result +// auth: user authenticator +// batch_id, wu_id, result_id: as needed +// +// action (see https://github.com/BOINC/boinc/issues/5262): +// result: if 1 output file, return it as resultname__logicalname +// (that way you know what it is, and it has the right extension) +// else return a zip file as resultname.zip, +// containing the output files with their logical names +// workunit: +// as above, for canonical instance +// batch: +// as above for each workunit; return as batch_(batchid).zip +// +// In the result and workunit cases, they must be part of a batch +// (so that we know who the submitter is) + +require_once("../inc/boinc_db.inc"); +require_once("../inc/util.inc"); +require_once("../inc/dir_hier.inc"); +require_once("../inc/submit_util.inc"); + +function check_auth($auth, $batch) { + $user = BoincUser::lookup_id($batch->user_id); + if (!$user) die("no batch owner for $batch->id"); + if ($user->authenticator != $auth) die('bad auth'); +} + +function do_result_aux($result, $batch, $file_num=null) { + $phys_names = get_outfile_names($result); + $log_names = get_outfile_log_names($result); + if ($file_num !== null) { + $path = upload_path($phys_names[$file_num]); + do_download($path, + sprintf("%s__%s", $result->name, $log_names[$file_num]) + ); + } + if (count($phys_names) == 1) { + $path = upload_path($phys_names[0]); + do_download($path, + sprintf("%s__%s", $result->name, $log_names[0]) + ); + } else { + // make a temp dir in submit/batchid; + // make symlinks there, and zip it + $dir_path = "submit/$batch->id/$result->name"; + system("rm -r $dir_path"); + mkdir($dir_path); + for ($i=0; $iid, + $result->name + ); + system($cmd); + do_download("$dir_path.zip"); + } +} + +function do_result($result_id, $auth, $file_num=null) { + $result = BoincResult::lookup_id($result_id); + if (!$result) die("no result $result_id"); + $workunit = BoincWorkunit::lookup_id($result->workunitid); + if (!$workunit) die("no workunit for $result_id"); + $batch = BoincBatch::lookup_id($workunit->batch); + if (!$batch) die("no batch for $result_id"); + check_auth($auth, $batch); + do_result_aux($result, $batch); +} + +function do_wu($wu_id, $auth) { + $workunit = BoincWorkunit::lookup_id($wu_id); + if (!$workunit) die("no workunit for $result_id"); + $batch = BoincBatch::lookup_id($workunit->batch); + if (!$batch) die("no batch for $result_id"); + $result = BoincResult::lookup_id($workunit->canonical_resultid); + do_result_aux($result, $batch); +} + +// make a temp dir in submit/batchid +// for each workunit in batch, +// put symlinks to its output file (or a dir of its output files) there. +// send the zipped temp dir. +// +function do_batch($batch_id, $auth) { + $batch = BoincBatch::lookup_id($batch_id); + if (!$batch) die("no batch $batch_id"); + $dir_path = sprintf('submit/%d/batch_%d', $batch_id, $batch_id); + system("rm -r $dir_path"); + mkdir($dir_path); + + $wus = BoincWorkunit::enum("batch=$batch_id and canonical_resultid<>0"); + foreach ($wus as $wu) { + $result = BoincResult::lookup_id($wu->canonical_resultid); + $phys_names = get_outfile_names($result); + $log_names = get_outfile_log_names($result); + if (count($phys_names) == 1) { + $cmd = sprintf('ln -s %s %s/%s__%s', + upload_path($phys_names[0]), + $dir_path, + $result->name, + $log_names[0] + ); + system($cmd); + } else { + mkdir(sprintf('%s/%s', $dir_path, $result->name)); + for ($i=0; $iname, + $log_names[$i] + ); + system($cmd); + } + } + } + $cmd = sprintf('cd submit/%d/batch_%d; zip -q -r ../batch_%d.zip *', + $batch_id, + $batch_id, + $batch_id + ); + system($cmd); + do_download(sprintf('submit/%d/batch_%d.zip', $batch_id, $batch_id)); + // todo: clean up +} + +$cmd = get_str('cmd'); +$user = get_logged_in_user(false); +if ($user) { + $auth = $user->authenticator; +} else { + $auth = get_str('auth'); +} +switch ($cmd) { +case 'result': + do_result(get_int('result_id'), $auth); + break; +case 'workunit': + do_wu(get_int('wu_id'), $auth); + break; +case 'batch': + do_batch(get_int('batch_id'), $auth); + break; +case 'file': + do_result(get_int('result_id'), $auth, get_int('file_num')); + break; +default: + die("bad cmd\n"); +} +?> diff --git a/html/user/submit.php b/html/user/submit.php index 1ea17f4c26..1d2f855778 100644 --- a/html/user/submit.php +++ b/html/user/submit.php @@ -244,7 +244,7 @@ function handle_main($user) { echo "
  • $app->user_friendly_name
    id>Batches · - id&action=app_version_form>Versions + id>Manage "; } } else { @@ -381,7 +381,7 @@ function handle_query_batch($user) { row2("GFLOP/hours, actual", number_format(credit_to_gflop_hours($batch->credit_canonical), 2)); row2("Output File Size", size_string(batch_output_file_size($batch->id))); end_table(); - $url = boinc_get_output_files_url($user, $batch_id); + $url = "get_output2.php?cmd=batch&batch_id=$batch->id"; show_button($url, "Get zipped output files"); switch ($batch->state) { case BATCH_STATE_IN_PROGRESS: @@ -414,11 +414,10 @@ function handle_query_batch($user) { ); foreach($wus as $wu) { $resultid = $wu->canonical_resultid; - $durl = boinc_get_wu_output_files_url($user,$wu->id); if ($resultid) { $x = "$resultid"; $y = 'completed'; - $text = " Download Result Files"; + $text = "id>Download output files"; } else { $x = "---"; $text = "---"; @@ -497,16 +496,22 @@ function handle_query_job($user) { "; $i = 0; if ($result->server_state == 5) { - $names = get_outfile_names($result); - $i = 0; - foreach ($names as $name) { - $url = boinc_get_output_file_url($user, $result, $i++); - $path = dir_hier_path($name, $upload_dir, $fanout); + $phys_names = get_outfile_names($result); + $log_names = get_outfile_log_names($result); + for ($i=0; $iid, $i + ); + $path = dir_hier_path($phys_names[$i], $upload_dir, $fanout); $s = stat($path); $size = $s['size']; - echo "$name (".number_format($size)." bytes)
    "; + echo sprintf('%s (%s bytes)
    ', + $url, + $log_names[$i], + number_format($size) + ); } - $i++; } echo "\n"; }