- Improvements to single-job submission:

1) the assimilator creates a job_summary_WUID file saying
        - what host did the job
        - error status if any
        - info about missing output files
    2) all output files are now optional.
    3) the assimilator deletes temp files (templates, job dir file)

svn path=/trunk/boinc/; revision=15551
This commit is contained in:
David Anderson 2008-07-04 19:41:58 +00:00
parent a5a7332b6e
commit 234234569b
4 changed files with 67 additions and 18 deletions

View File

@ -5459,3 +5459,19 @@ David 3 July 2008
api/
boinc_api.C
David 4 July 2008
- Improvements to single-job submission:
1) the assimilator creates a job_summary_WUID file saying
- what host did the job
- error status if any
- info about missing output files
2) all output files are now optional.
3) the assimilator deletes temp files (templates, job dir file)
client/
app_start.C
sched/
single_job_assimilator.C
tools/
boinc_submit

View File

@ -239,8 +239,7 @@ static int make_soft_link(PROJECT* project, char* link_path, char* rel_file_path
// set up a file reference, given a slot dir and project dir.
// This means:
// 1) copy the file to slot dir, if reference is by copy
// 2) (Unix) make a symbolic link
// 3) (Windows) make a
// 2) else make a soft link
//
static int setup_file(
PROJECT* project, FILE_INFO* fip, FILE_REF& fref,

View File

@ -42,17 +42,33 @@ int assimilate_handler(
WORKUNIT& wu, vector<RESULT>& /*results*/, RESULT& canonical_result
) {
int retval;
char buf[1024], filename[256], job_dir[256];
char buf[1024], filename[256], job_dir[256], job_dir_file[256];
unsigned int i;
// delete the template files
//
sprintf(buf, "../templates/sj_wu_template_%d", wu.id);
unlink(buf);
sprintf(buf, "../templates/sj_result_template_%d", wu.id);
unlink(buf);
// read and delete the job directory file
//
sprintf(filename, "sj_%d", wu.id);
dir_hier_path(filename, config.upload_dir, config.uldl_dir_fanout, buf);
FILE* f = fopen(buf, "r");
dir_hier_path(
filename, config.upload_dir, config.uldl_dir_fanout, job_dir_file
);
FILE* f = fopen(job_dir_file, "r");
if (!f) {
log_messages.printf(MSG_CRITICAL, "Can't open job file %s\n", buf);
return 0;
}
fgets(buf, 1024, f);
fclose(f);
unlink(job_dir_file);
// parse the job directory file
//
char* p = strstr(buf, "<job_dir>");
if (!p) {
log_messages.printf(MSG_CRITICAL, "garbage in job file: %s\n", buf);
@ -65,7 +81,21 @@ int assimilate_handler(
return 0;
}
*p = 0;
// Create a job summary file
//
sprintf(filename, "%s/job_summary_%d", job_dir, wu.id);
f = fopen(filename, "w");
// If job was successful, copy the output files
//
if (wu.canonical_resultid) {
fprintf(f,
"Job was completed by host %d.\n"
"CPU time: %f seconds\n",
canonical_result.hostid,
canonical_result.cpu_time
);
vector<FILE_INFO> output_files;
char copy_path[256];
get_output_file_infos(canonical_result, output_files);
@ -75,26 +105,27 @@ int assimilate_handler(
string logical_name;
retval = get_logical_name(canonical_result, fi.path, logical_name);
if (retval) {
log_messages.printf(MSG_CRITICAL,
fprintf(f,
"Couldn't get logical name for %s: %d\n",
fi.path.c_str(), retval
);
return retval;
continue;
}
sprintf(copy_path, "%s/%s", job_dir, logical_name.c_str());
retval = boinc_copy(fi.path.c_str() , copy_path);
if (retval) {
log_messages.printf(MSG_CRITICAL,
"couldn't copy file %s to %s\n",
fi.path.c_str(), copy_path
fprintf(f,
"Output file %s not present.\n", logical_name.c_str()
);
return retval;
continue;
}
}
} else {
sprintf(buf, "%s/error_msg", job_dir);
f = fopen(buf, "w");
fprintf(f, "Error: 0x%x\n", wu.error_mask);
fprintf(f,
"The job was not successfully completed.\n"
"Error: 0x%x\n", wu.error_mask
);
}
fclose(f);
return 0;
}

View File

@ -6,18 +6,19 @@
// Implementation notes:
// - The jobs use the app "single_job_PLATFORM".
// This app has a single app_version containing the wrapper for that platform
// - the executable is part of the WU, and has the sticky bit set,
// - the executable is part of the WU, has the sticky bit set,
// and has a signature
// - The logical and physical name of the executable
// (as stored in the download directory) is "program_platform_cksum"
// where cksum is the last 8 chars of the MD5
// - The physical name of the job file is job_WUID.xml
// - The physical name of the job file is sj_WUID.xml
// - The physical names of the input/output files are name_WUID
// - a file containing the job directory is stored in
// sj_WUID in the upload hierarchy
// - a workunit template sj_WUID is created in templates/
// - workunit and result templates sj_wu_template_WUID
// and sj_result_template_WUID are created in templates/
// - the single_job_assimilator copies the output files to the job dir,
// and cleans up the sj_WUID and WU template files
// and cleans up the sj_WUID and template files
ini_set('error_reporting', E_ALL);
@ -285,6 +286,7 @@ function make_result_template() {
" <file_ref>
<file_name><OUTFILE_$i/></file_name>
<open_name>$outfile</open_name>
<optional/>
<copy_file/>
</file_ref>
");
@ -296,6 +298,7 @@ function make_result_template() {
" <file_ref>
<file_name><OUTFILE_$i/></file_name>
<open_name>$stdout_file</open_name>
<optional/>
</file_ref>
");
}