- Improvements to single-job submission:

1) the assimilator creates a job_summary_WUID file saying
        - what host did the job
        - error status if any
        - info about missing output files
    2) all output files are now optional.
    3) the assimilator deletes temp files (templates, job dir file)

svn path=/trunk/boinc/; revision=15551
This commit is contained in:
David Anderson 2008-07-04 19:41:58 +00:00
parent a5a7332b6e
commit 234234569b
4 changed files with 67 additions and 18 deletions

View File

@ -5459,3 +5459,19 @@ David 3 July 2008
api/ api/
boinc_api.C boinc_api.C
David 4 July 2008
- Improvements to single-job submission:
1) the assimilator creates a job_summary_WUID file saying
- what host did the job
- error status if any
- info about missing output files
2) all output files are now optional.
3) the assimilator deletes temp files (templates, job dir file)
client/
app_start.C
sched/
single_job_assimilator.C
tools/
boinc_submit

View File

@ -239,8 +239,7 @@ static int make_soft_link(PROJECT* project, char* link_path, char* rel_file_path
// set up a file reference, given a slot dir and project dir. // set up a file reference, given a slot dir and project dir.
// This means: // This means:
// 1) copy the file to slot dir, if reference is by copy // 1) copy the file to slot dir, if reference is by copy
// 2) (Unix) make a symbolic link // 2) else make a soft link
// 3) (Windows) make a
// //
static int setup_file( static int setup_file(
PROJECT* project, FILE_INFO* fip, FILE_REF& fref, PROJECT* project, FILE_INFO* fip, FILE_REF& fref,

View File

@ -42,17 +42,33 @@ int assimilate_handler(
WORKUNIT& wu, vector<RESULT>& /*results*/, RESULT& canonical_result WORKUNIT& wu, vector<RESULT>& /*results*/, RESULT& canonical_result
) { ) {
int retval; int retval;
char buf[1024], filename[256], job_dir[256]; char buf[1024], filename[256], job_dir[256], job_dir_file[256];
unsigned int i; unsigned int i;
// delete the template files
//
sprintf(buf, "../templates/sj_wu_template_%d", wu.id);
unlink(buf);
sprintf(buf, "../templates/sj_result_template_%d", wu.id);
unlink(buf);
// read and delete the job directory file
//
sprintf(filename, "sj_%d", wu.id); sprintf(filename, "sj_%d", wu.id);
dir_hier_path(filename, config.upload_dir, config.uldl_dir_fanout, buf); dir_hier_path(
FILE* f = fopen(buf, "r"); filename, config.upload_dir, config.uldl_dir_fanout, job_dir_file
);
FILE* f = fopen(job_dir_file, "r");
if (!f) { if (!f) {
log_messages.printf(MSG_CRITICAL, "Can't open job file %s\n", buf); log_messages.printf(MSG_CRITICAL, "Can't open job file %s\n", buf);
return 0; return 0;
} }
fgets(buf, 1024, f); fgets(buf, 1024, f);
fclose(f);
unlink(job_dir_file);
// parse the job directory file
//
char* p = strstr(buf, "<job_dir>"); char* p = strstr(buf, "<job_dir>");
if (!p) { if (!p) {
log_messages.printf(MSG_CRITICAL, "garbage in job file: %s\n", buf); log_messages.printf(MSG_CRITICAL, "garbage in job file: %s\n", buf);
@ -65,7 +81,21 @@ int assimilate_handler(
return 0; return 0;
} }
*p = 0; *p = 0;
// Create a job summary file
//
sprintf(filename, "%s/job_summary_%d", job_dir, wu.id);
f = fopen(filename, "w");
// If job was successful, copy the output files
//
if (wu.canonical_resultid) { if (wu.canonical_resultid) {
fprintf(f,
"Job was completed by host %d.\n"
"CPU time: %f seconds\n",
canonical_result.hostid,
canonical_result.cpu_time
);
vector<FILE_INFO> output_files; vector<FILE_INFO> output_files;
char copy_path[256]; char copy_path[256];
get_output_file_infos(canonical_result, output_files); get_output_file_infos(canonical_result, output_files);
@ -75,26 +105,27 @@ int assimilate_handler(
string logical_name; string logical_name;
retval = get_logical_name(canonical_result, fi.path, logical_name); retval = get_logical_name(canonical_result, fi.path, logical_name);
if (retval) { if (retval) {
log_messages.printf(MSG_CRITICAL, fprintf(f,
"Couldn't get logical name for %s: %d\n", "Couldn't get logical name for %s: %d\n",
fi.path.c_str(), retval fi.path.c_str(), retval
); );
return retval; continue;
} }
sprintf(copy_path, "%s/%s", job_dir, logical_name.c_str()); sprintf(copy_path, "%s/%s", job_dir, logical_name.c_str());
retval = boinc_copy(fi.path.c_str() , copy_path); retval = boinc_copy(fi.path.c_str() , copy_path);
if (retval) { if (retval) {
log_messages.printf(MSG_CRITICAL, fprintf(f,
"couldn't copy file %s to %s\n", "Output file %s not present.\n", logical_name.c_str()
fi.path.c_str(), copy_path
); );
return retval; continue;
} }
} }
} else { } else {
sprintf(buf, "%s/error_msg", job_dir); fprintf(f,
f = fopen(buf, "w"); "The job was not successfully completed.\n"
fprintf(f, "Error: 0x%x\n", wu.error_mask); "Error: 0x%x\n", wu.error_mask
);
} }
fclose(f);
return 0; return 0;
} }

View File

@ -6,18 +6,19 @@
// Implementation notes: // Implementation notes:
// - The jobs use the app "single_job_PLATFORM". // - The jobs use the app "single_job_PLATFORM".
// This app has a single app_version containing the wrapper for that platform // This app has a single app_version containing the wrapper for that platform
// - the executable is part of the WU, and has the sticky bit set, // - the executable is part of the WU, has the sticky bit set,
// and has a signature // and has a signature
// - The logical and physical name of the executable // - The logical and physical name of the executable
// (as stored in the download directory) is "program_platform_cksum" // (as stored in the download directory) is "program_platform_cksum"
// where cksum is the last 8 chars of the MD5 // where cksum is the last 8 chars of the MD5
// - The physical name of the job file is job_WUID.xml // - The physical name of the job file is sj_WUID.xml
// - The physical names of the input/output files are name_WUID // - The physical names of the input/output files are name_WUID
// - a file containing the job directory is stored in // - a file containing the job directory is stored in
// sj_WUID in the upload hierarchy // sj_WUID in the upload hierarchy
// - a workunit template sj_WUID is created in templates/ // - workunit and result templates sj_wu_template_WUID
// and sj_result_template_WUID are created in templates/
// - the single_job_assimilator copies the output files to the job dir, // - the single_job_assimilator copies the output files to the job dir,
// and cleans up the sj_WUID and WU template files // and cleans up the sj_WUID and template files
ini_set('error_reporting', E_ALL); ini_set('error_reporting', E_ALL);
@ -285,6 +286,7 @@ function make_result_template() {
" <file_ref> " <file_ref>
<file_name><OUTFILE_$i/></file_name> <file_name><OUTFILE_$i/></file_name>
<open_name>$outfile</open_name> <open_name>$outfile</open_name>
<optional/>
<copy_file/> <copy_file/>
</file_ref> </file_ref>
"); ");
@ -296,6 +298,7 @@ function make_result_template() {
" <file_ref> " <file_ref>
<file_name><OUTFILE_$i/></file_name> <file_name><OUTFILE_$i/></file_name>
<open_name>$stdout_file</open_name> <open_name>$stdout_file</open_name>
<optional/>
</file_ref> </file_ref>
"); ");
} }