From 234234569b76fc127507941e0585a1e8d233b0dd Mon Sep 17 00:00:00 2001 From: David Anderson Date: Fri, 4 Jul 2008 19:41:58 +0000 Subject: [PATCH] - Improvements to single-job submission: 1) the assimilator creates a job_summary_WUID file saying - what host did the job - error status if any - info about missing output files 2) all output files are now optional. 3) the assimilator deletes temp files (templates, job dir file) svn path=/trunk/boinc/; revision=15551 --- checkin_notes | 16 ++++++++++ client/app_start.C | 3 +- sched/single_job_assimilator.C | 55 ++++++++++++++++++++++++++-------- tools/boinc_submit | 11 ++++--- 4 files changed, 67 insertions(+), 18 deletions(-) diff --git a/checkin_notes b/checkin_notes index 3c8ed05c9c..7b83923639 100644 --- a/checkin_notes +++ b/checkin_notes @@ -5459,3 +5459,19 @@ David 3 July 2008 api/ boinc_api.C + +David 4 July 2008 + - Improvements to single-job submission: + 1) the assimilator creates a job_summary_WUID file saying + - what host did the job + - error status if any + - info about missing output files + 2) all output files are now optional. + 3) the assimilator deletes temp files (templates, job dir file) + + client/ + app_start.C + sched/ + single_job_assimilator.C + tools/ + boinc_submit diff --git a/client/app_start.C b/client/app_start.C index 8c4f8596a4..5ec460e62b 100644 --- a/client/app_start.C +++ b/client/app_start.C @@ -239,8 +239,7 @@ static int make_soft_link(PROJECT* project, char* link_path, char* rel_file_path // set up a file reference, given a slot dir and project dir. // This means: // 1) copy the file to slot dir, if reference is by copy -// 2) (Unix) make a symbolic link -// 3) (Windows) make a +// 2) else make a soft link // static int setup_file( PROJECT* project, FILE_INFO* fip, FILE_REF& fref, diff --git a/sched/single_job_assimilator.C b/sched/single_job_assimilator.C index a4ccad7010..9556908efd 100644 --- a/sched/single_job_assimilator.C +++ b/sched/single_job_assimilator.C @@ -42,17 +42,33 @@ int assimilate_handler( WORKUNIT& wu, vector& /*results*/, RESULT& canonical_result ) { int retval; - char buf[1024], filename[256], job_dir[256]; + char buf[1024], filename[256], job_dir[256], job_dir_file[256]; unsigned int i; + // delete the template files + // + sprintf(buf, "../templates/sj_wu_template_%d", wu.id); + unlink(buf); + sprintf(buf, "../templates/sj_result_template_%d", wu.id); + unlink(buf); + + // read and delete the job directory file + // sprintf(filename, "sj_%d", wu.id); - dir_hier_path(filename, config.upload_dir, config.uldl_dir_fanout, buf); - FILE* f = fopen(buf, "r"); + dir_hier_path( + filename, config.upload_dir, config.uldl_dir_fanout, job_dir_file + ); + FILE* f = fopen(job_dir_file, "r"); if (!f) { log_messages.printf(MSG_CRITICAL, "Can't open job file %s\n", buf); return 0; } fgets(buf, 1024, f); + fclose(f); + unlink(job_dir_file); + + // parse the job directory file + // char* p = strstr(buf, ""); if (!p) { log_messages.printf(MSG_CRITICAL, "garbage in job file: %s\n", buf); @@ -65,7 +81,21 @@ int assimilate_handler( return 0; } *p = 0; + + // Create a job summary file + // + sprintf(filename, "%s/job_summary_%d", job_dir, wu.id); + f = fopen(filename, "w"); + + // If job was successful, copy the output files + // if (wu.canonical_resultid) { + fprintf(f, + "Job was completed by host %d.\n" + "CPU time: %f seconds\n", + canonical_result.hostid, + canonical_result.cpu_time + ); vector output_files; char copy_path[256]; get_output_file_infos(canonical_result, output_files); @@ -75,26 +105,27 @@ int assimilate_handler( string logical_name; retval = get_logical_name(canonical_result, fi.path, logical_name); if (retval) { - log_messages.printf(MSG_CRITICAL, + fprintf(f, "Couldn't get logical name for %s: %d\n", fi.path.c_str(), retval ); - return retval; + continue; } sprintf(copy_path, "%s/%s", job_dir, logical_name.c_str()); retval = boinc_copy(fi.path.c_str() , copy_path); if (retval) { - log_messages.printf(MSG_CRITICAL, - "couldn't copy file %s to %s\n", - fi.path.c_str(), copy_path + fprintf(f, + "Output file %s not present.\n", logical_name.c_str() ); - return retval; + continue; } } } else { - sprintf(buf, "%s/error_msg", job_dir); - f = fopen(buf, "w"); - fprintf(f, "Error: 0x%x\n", wu.error_mask); + fprintf(f, + "The job was not successfully completed.\n" + "Error: 0x%x\n", wu.error_mask + ); } + fclose(f); return 0; } diff --git a/tools/boinc_submit b/tools/boinc_submit index f0aebd01dd..22183d0c17 100755 --- a/tools/boinc_submit +++ b/tools/boinc_submit @@ -6,18 +6,19 @@ // Implementation notes: // - The jobs use the app "single_job_PLATFORM". // This app has a single app_version containing the wrapper for that platform -// - the executable is part of the WU, and has the sticky bit set, +// - the executable is part of the WU, has the sticky bit set, // and has a signature // - The logical and physical name of the executable // (as stored in the download directory) is "program_platform_cksum" // where cksum is the last 8 chars of the MD5 -// - The physical name of the job file is job_WUID.xml +// - The physical name of the job file is sj_WUID.xml // - The physical names of the input/output files are name_WUID // - a file containing the job directory is stored in // sj_WUID in the upload hierarchy -// - a workunit template sj_WUID is created in templates/ +// - workunit and result templates sj_wu_template_WUID +// and sj_result_template_WUID are created in templates/ // - the single_job_assimilator copies the output files to the job dir, -// and cleans up the sj_WUID and WU template files +// and cleans up the sj_WUID and template files ini_set('error_reporting', E_ALL); @@ -285,6 +286,7 @@ function make_result_template() { " $outfile + "); @@ -296,6 +298,7 @@ function make_result_template() { " $stdout_file + "); }