diff --git a/checkin_notes b/checkin_notes
index 3676618b34..589bac4b7c 100755
--- a/checkin_notes
+++ b/checkin_notes
@@ -2276,3 +2276,45 @@ David Nov 5 2002
Makefile.in
make_work.C
result_retry.C
+
+David Nov 7 2002
+ - Finished and did basic testing of result retry mechanism.
+ It now correctly generates new results with unique names,
+ unique filenames, and upload signatures.
+ TODO: make a test script
+ - added DB field workunit.delay_bound:
+ determines deadlines of result, determines retry check period
+ Mandatory argument to create_work.
+ - Added DB indices for all enumeration functions
+
+ client/
+ client_types.C
+ db/
+ constraints.sql
+ db.h
+ db_mysql.C
+ schema.sql
+ doc/
+ result.html
+ tools_work.html
+ work.html
+ html_ops/
+ db.inc
+ sched/
+ Makefile.in
+ feeder.C
+ handle_request.C
+ result_retry.C
+ test/
+ 1sec_wu
+ concat_wu
+ sah_result
+ sah_wu
+ test.inc
+ test_uc.php
+ uc_wu
+ ucs_wu
+ tools/
+ backend_lib.Ch
+ create_work.C
+ process_result_template.C
diff --git a/client/client_types.C b/client/client_types.C
index c8ba0122af..62343d9422 100644
--- a/client/client_types.C
+++ b/client/client_types.C
@@ -368,7 +368,7 @@ int FILE_INFO::write(FILE* out, bool to_server) {
pers_file_xfer->write(out);
}
if (!to_server) {
- if (signed_xml) {
+ if (signed_xml && xml_signature) {
fprintf(out, "\n%s\n", signed_xml);
}
if (xml_signature) {
diff --git a/db/constraints.sql b/db/constraints.sql
index 5d1f5601b1..6b43c35dee 100644
--- a/db/constraints.sql
+++ b/db/constraints.sql
@@ -11,14 +11,18 @@ alter table app_version
alter table user
add unique(email_addr);
+alter table user
+ add index ind_tid (teamid);
alter table team
add unique(name);
alter table workunit
add unique(name);
+create index wu_val on workunit(appid, need_validate);
+create index wu_retry on workunit(appid, retry_check_time);
alter table result
add unique(name);
-
+create index res_wuid on result(workunitid);
create index ind_res_st on result(state);
diff --git a/db/db.h b/db/db.h
index 29bf5f9077..fa1cde7f6b 100644
--- a/db/db.h
+++ b/db/db.h
@@ -197,12 +197,9 @@ struct HOST {
struct WORKUNIT {
int id;
- unsigned int create_time; // time of record creation
- unsigned int check_time; // when to check for result retury, or zero
- int appid; // ID of APP record tied to this workunit
- int previous_wuid;
- bool has_successor;
- char name[256]; // Name of the workunit
+ unsigned int create_time;
+ int appid; // associated app
+ char name[256];
char xml_doc[MAX_BLOB_SIZE];
int batch;
double rsc_fpops; // estimated # of FP operations
@@ -214,7 +211,8 @@ struct WORKUNIT {
int canonical_resultid; // ID of canonical result, or zero
double canonical_credit; // credit that all correct results get
double retry_check_time; // when to check for result retry
- int state; // see above
+ int state; // see values above
+ int delay_bound; // determines result deadline, retry check time
// the following not used in the DB
char app_name[256];
@@ -303,7 +301,6 @@ extern int db_workunit_new(WORKUNIT& p);
extern int db_workunit(int id, WORKUNIT&);
extern int db_workunit_update(WORKUNIT& p);
extern int db_workunit_lookup_name(WORKUNIT&);
-//extern int db_workunit_enum_dynamic_to_send(WORKUNIT&, int);
extern int db_workunit_enum_app_need_validate(WORKUNIT&);
extern int db_workunit_enum_retry_check_time(WORKUNIT&);
@@ -311,7 +308,7 @@ extern int db_result_new(RESULT& p);
extern int db_result(int id, RESULT&);
extern int db_result_update(RESULT& p);
extern int db_result_lookup_name(RESULT& p);
-extern int db_result_enum_to_send(RESULT&, int);
+extern int db_result_enum_state(RESULT&, int);
extern int db_result_enum_wuid(RESULT&);
extern int db_result_count_state(int state, int&);
diff --git a/db/db_mysql.C b/db/db_mysql.C
index ea2791d88a..0c82429ed4 100644
--- a/db/db_mysql.C
+++ b/db/db_mysql.C
@@ -190,18 +190,18 @@ void struct_to_str(void* vp, char* q, int type) {
case TYPE_WORKUNIT:
wup = (WORKUNIT*)vp;
sprintf(q,
- "id=%d, create_time=%d, appid=%d, previous_wuid=%d, "
- "has_successor=%d, name='%s', xml_doc='%s', batch=%d, "
+ "id=%d, create_time=%d, appid=%d, "
+ "name='%s', xml_doc='%s', batch=%d, "
"rsc_fpops=%f, rsc_iops=%f, rsc_memory=%f, rsc_disk=%f, "
"need_validate=%d, "
"canonical_resultid=%d, canonical_credit=%f, "
- "retry_check_time=%f, state=%d",
- wup->id, wup->create_time, wup->appid, wup->previous_wuid,
- wup->has_successor?1:0, wup->name, wup->xml_doc, wup->batch,
+ "retry_check_time=%f, delay_bound=%d, state=%d",
+ wup->id, wup->create_time, wup->appid,
+ wup->name, wup->xml_doc, wup->batch,
wup->rsc_fpops, wup->rsc_iops, wup->rsc_memory, wup->rsc_disk,
wup->need_validate,
wup->canonical_resultid, wup->canonical_credit,
- wup->retry_check_time, wup->state
+ wup->retry_check_time, wup->delay_bound, wup->state
);
break;
case TYPE_RESULT:
@@ -350,8 +350,6 @@ void row_to_struct(MYSQL_ROW& r, void* vp, int type) {
wup->id = atoi(r[i++]);
wup->create_time = atoi(r[i++]);
wup->appid = atoi(r[i++]);
- wup->previous_wuid = atoi(r[i++]);
- wup->has_successor = (atoi(r[i++])!=0);
strcpy2(wup->name, r[i++]);
strcpy2(wup->xml_doc, r[i++]);
wup->batch = atoi(r[i++]);
@@ -363,6 +361,7 @@ void row_to_struct(MYSQL_ROW& r, void* vp, int type) {
wup->canonical_resultid = atoi(r[i++]);
wup->canonical_credit = atof(r[i++]);
wup->retry_check_time = atof(r[i++]);
+ wup->delay_bound = atoi(r[i++]);
wup->state = atoi(r[i++]);
break;
case TYPE_RESULT:
@@ -583,7 +582,10 @@ int db_workunit_enum_retry_check_time(WORKUNIT& p) {
char buf[256];
if (!e.active) {
- sprintf(buf, "where retry_check_time > 0 and retry_check_time < %f", p.retry_check_time);
+ sprintf(buf,
+ "where appid=%d and retry_check_time > 0 and retry_check_time < %f",
+ p.appid, p.retry_check_time
+ );
}
return db_enum(e, &p, TYPE_WORKUNIT, buf);
}
@@ -610,11 +612,11 @@ int db_result_lookup_name(RESULT& p) {
return db_lookup(&p, TYPE_RESULT, buf);
}
-int db_result_enum_to_send(RESULT& p, int limit) {
+int db_result_enum_state(RESULT& p, int limit) {
static ENUM e;
char buf[256];
- if (!e.active) sprintf(buf, "where state=%d", RESULT_STATE_UNSENT);
+ if (!e.active) sprintf(buf, "where state=%d", p.state);
return db_enum(e, &p, TYPE_RESULT, buf, limit);
}
diff --git a/db/schema.sql b/db/schema.sql
index 06440a3e54..8027e69c3e 100644
--- a/db/schema.sql
+++ b/db/schema.sql
@@ -116,8 +116,6 @@ create table workunit (
id integer not null auto_increment,
create_time integer not null,
appid integer not null,
- previous_wuid integer not null,
- has_successor smallint not null,
name varchar(254) not null,
xml_doc blob,
batch integer not null,
@@ -129,6 +127,7 @@ create table workunit (
canonical_resultid integer not null,
canonical_credit double not null,
retry_check_time double not null,
+ delay_bound integer not null,
state integer not null,
primary key (id)
);
diff --git a/doc/result.html b/doc/result.html
index aa2a70bf22..91c768311c 100644
--- a/doc/result.html
+++ b/doc/result.html
@@ -9,20 +9,14 @@ The attributes of a result include:
- The name of the result (unique across all results in the project).
-
- The name of the associated workunit.
+
- The associated workunit.
- The time when the completed result should be reported to a
scheduling server.
This is assigned by the project, and is used by
clients to prioritize operations and to initiate scheduler RPCs.
There is no guarantee that the result will actually be reported by this time.
- An XML document listing the names of its output files; see below.
-
- An XML document giving the sizes and checksums of its output
-files (filled in after the result is completed).
-
- The stderr output of the result.
-
- The host that computed the result.
-
- The times when the result was dispatched and received.
-
- The exit status of the application.
-
- The reported CPU time.
+
- The time when the result was dispatched.
- Its state. Values include:
- Inactive (not ready to dispatch)
@@ -34,49 +28,41 @@ files (filled in after the result is completed).
+The following attributes are defined after the result is completed:
+
+- An XML document giving the sizes and checksums of its output
+files (filled in after the result is completed).
+
- The stderr output of the result.
+
- The host that was sent the result.
+
- The times when the result was received.
+
- The exit status of the application.
+
- The reported CPU time.
+
+
Results are normally created using the
create_work utility.
The XML document listing the output files has the form:
-<file_info>...</file_info>
+<file_info>...</file_info>
[ ... ]
-<result>
- <name>foobar</name>
- <wu_name>blah</wu_name>
- <exit_status>blah</exit_status>
- <file_ref>...</file_ref>
+<result>
+ <name>foobar</name>
+ <wu_name>blah</wu_name>
+ <exit_status>blah</exit_status>
+ <file_ref>...</file_ref>
[ ... ]
-</result>
+</result>
The components are:
-- The <name> element is the result name.
-
- The <wu_name> element is the workunit name.
-
- Each <file_ref> element is an association to an
-output file, described by a corresponding <file_info> element.
+
- The <name> element is the result name.
+
- The <wu_name> element is the workunit name.
+
- Each <file_ref> element is an association to an
+output file, described by a corresponding <file_info> element.
The XML document describing the sizes and checksums of the output
-files is just a list of <file_info> elements, with the
+files is just a list of <file_info> elements, with the
nbytes and md5_cksum fields present.
The project back end
must parse this field to find the locations and checksums of output files.
-
-Several results may be associated with a single workunit.
-Results
-may be generated in either of two ways (selected as part of the application):
-
-- Advance generation of results.
-One or more result records are stored in the database
-when the workunit is produced.
-The scheduling server dispatches each result to a single participant host.
-When all result records have been dispatched,
-participants hosts are "turned away".
-
- On-demand generation of results.
-The application specifies a "result template",
-which has place-holder tokens for the output filenames.
-The scheduling server, in response to a host request,
-generates a new result record and sends the result template.
-The host generates unique output filenames,
-and returns them along when it the computation is done.
-
diff --git a/doc/tools_work.html b/doc/tools_work.html
index db888ec706..5a83547919 100644
--- a/doc/tools_work.html
+++ b/doc/tools_work.html
@@ -1,77 +1,72 @@
-Generating work
+Generating work
-Generating work
+Generating work
Workunits and results can be created using either a utility program
-or a C++ function.
+or a C++ function.
-The program
+The utility program is
create_work
- -appname name
- -wu_name name
- -wu_template file
- -result_template file
- -nresults n
- infile_1 ... infile_m
-
- -appname specifies the name of the application
-
-wu_name gives the name of the workunit.
+ -appname name // application name
+ -wu_name name // workunit name
+ -wu_template filename // WU template filename
+ -result_template filename // result template filename
+ -nresults n // # of results to create
+ -db_name x // database name
+ -db_passwd x // database password
+ -upload_url x // URL for output file upload
+ -download_url x // base URL for input file download
+ -download_dir x // where to move input files
+ -rsc_fpops x // est. # floating-point ops
+ -rsc_iops x // est. # integer ops
+ -rsc_memory x // est. RAM working set size, bytes
+ -rsc_disk x // est. disk space required
+ -keyfile x // path of upload private key
+ -delay_bound x // delay bound for result completion
+ infile_1 ... infile_m // input files
+
--wu_template gives the filename of a template for the
-workunit XML document.
-This template is macro-substituted as follows:
+The workunit template file is macro-substituted as follows:
-
-<INFILE_n/> is replaced with the name of the nth input
+<INFILE_n/> is replaced with the name of the nth input
file.
-
-<MD5_n/> is replaced with the MD5 checksum of the nth
+<MD5_n/> is replaced with the MD5 checksum of the nth
input file.
-
-<WU_NAME/> is replaced with the workunit name.
+<DOWNLOAD_URL/> is replaced with the download URL.
--result_template gives the filename of a template for the
-result XML document.
-This template is macro-substituted as follows:
+The result file template is macro-substituted as follows:
-
-<OUTFILE_n> is replaced with a string of the form
+<OUTFILE_n> is replaced with a string of the form
"wuname_resultnum_n" where wuname is the workunit name and resultnum is
the ordinal number of the result (0, 1, ...).
-
-<WU_NAME> is replaced with the workunit name.
-
-
-<RESULT_NAME> is replaced with a string of the form
-"wuname_resultnum".
+<UPLOAD_URL> is replaced with the upload URL.
--nresults gives the number of results to be created.
-
-infile_1 ... infile_m are the names of the input files.
-The C++ library (backend_lib.C,h) provides the function:
+The C++ library (backend_lib.C,h) provides the function:
int create_work(
- int appid,
- char* wu_name,
+ WORKUNIT&,
char* wu_template,
- char* result_template,
+ char* result_template_filename,
int nresults,
char* infile_dir,
char** infiles,
int ninfiles
+ R_RSA_PRIVATE_KEY& key,
+ char* upload_url,
+ char* download_url
);
-
+
This creates a workunit and one or more results.
-The arguments are
-the same as for the utility program, except that the application is
-identified by its database ID.
-
-"infile_dir" is the path of the directory containing the input
-files.
-
-"infiles" is an array of names of the input files, of length "ninfiles".
+The arguments are similar to those of the utility program;
+some of the information is passed in the WORKUNIT structure.
diff --git a/doc/work.html b/doc/work.html
index e33ce2ccd7..19af2c64df 100644
--- a/doc/work.html
+++ b/doc/work.html
@@ -7,15 +7,17 @@ Workunits are maintained in the workunit table in the BOINC DB.
The attributes of a workunit include:
-- Its application.
- Its name (unique across all workunits in the project).
+
- Its application.
- An XML document describing its input files and other parameters
(see below).
- The estimated resource requirements of the work unit
-(computation, memory, disk space, network traffic).
-
- Counts of how many times this workunit should be dispatched, how
-many times it has been dispatched, how many results have been returned,
-and how many failures have occurred.
+(computation, memory, disk space).
+
- A delay bound: upper bound on how long
+an instance of this work unit should take to complete.
+This limits what hosts the workunit is sent to,
+and it's used to assign result deadlines and
+times for retrying results.
The inputs to a workunit are described by an XML document of the form
@@ -23,9 +25,7 @@ The inputs to a workunit are described by an XML document of the form
[ <file_info>...</file_info> ]
[ ... ]
<workunit>
- <name>foobar</name>
<app_name>blah</app_name>
- [ <version_num>1</version_num> ]
[ <command_line>-flags xyz</command_line> ]
[ <env_vars>name=val&name=val</env_vars> ]
[ <file_ref>...</file_ref> ]
@@ -34,10 +34,6 @@ The inputs to a workunit are described by an XML document of the form
The components are:
-- The <name> element is the name of the workunit.
-
- The <app_name> element is the name of the
-application.
-
- The <version_num> element is ???.
- The <command_line> element, if present, is the
command-line arguments to be passed to the main program.
- The <env_vars> element, if present, is a list of
@@ -48,7 +44,7 @@ described by a <file_info> element.
A workunit is associated with an application, not with a particular
-version or set of versions???.
+version or range of versions.
If the format of your input data changes in
a way that is incompatible with older versions,
you must create a new application.
diff --git a/html/ops/db.inc b/html/ops/db.inc
index 7de6bfda6e..0185eff4aa 100644
--- a/html/ops/db.inc
+++ b/html/ops/db.inc
@@ -113,6 +113,16 @@ function show_host($host) {
}
+function retry_state_str($s) {
+ switch($s) {
+ case 0: return "Default";
+ case 1: return "Send failed";
+ case 2: return "Too many errors (may have bug)";
+ case 3: return "Too many done (may be nondeterministic)";
+ }
+ return "unknown";
+}
+
function show_workunit($wu,$show_xml_doc) {
start_table();
row("Created", time_str($wu->create_time));
@@ -129,7 +139,10 @@ function show_workunit($wu,$show_xml_doc) {
row("Need validate?", $wu->need_validate?"yes":"no");
row("Canonical resultid", $wu->canonical_resultid);
row("Canonical credit", $wu->canonical_credit);
- row("","id>Show Associated Results");
+ row("Retry check time", time_str($wu->retry_check_time));
+ row("Retry state", retry_state_str($wu->state));
+ row("Delay bound", $wu->delay_bound);
+ row("","id>Show associated results");
end_table();
echo "
";
}
@@ -143,6 +156,17 @@ function res_state_string($s) {
case 5: return "Timeout";
case 6: return "Error";
}
+ return "unknown";
+}
+
+function validate_state_str($s) {
+ switch($s) {
+ case 0: return "Initial";
+ case 1: return "Need check";
+ case 2: return "Valid";
+ case 3: return "Invalid";
+ }
+ return "unknown";
}
function show_result($result,$show_xml_docs,$show_stderr,$show_times) {
@@ -155,6 +179,7 @@ function show_result($result,$show_xml_docs,$show_stderr,$show_times) {
row("Name", $result->name);
row("Workunit", "workunitid>" . wu_name_by_id($result->workunitid) . "" );
row("state", res_state_string($result->state));
+ row("Deadline", time_str($result->report_deadline));
row("Host ID", "hostid>" . host_name_by_id($result->hostid) . "");
row("Exit Status", $result->exit_status);
row("CPU time", $result->cpu_time);
@@ -166,7 +191,10 @@ function show_result($result,$show_xml_docs,$show_stderr,$show_times) {
row("stderr out", "
".htmlspecialchars($result->stderr_out)."
");
}
row("batch", $result->batch);
- row("validated", $result->validated);
+ row("project state", $result->project_state);
+ row("Validate state", validate_state_str($result->validate_state));
+ row("claimed credit", $result->claimed_credit);
+ row("granted credit", $result->granted_credit);
end_table();
echo "";
}
diff --git a/sched/Makefile.in b/sched/Makefile.in
index 07aa60d2f8..2e0e5d0446 100644
--- a/sched/Makefile.in
+++ b/sched/Makefile.in
@@ -16,7 +16,7 @@ CC = g++ $(CFLAGS)
CLIBS = @LIBS@
-PROGS = cgi feeder show_shmem file_upload_handler validate_test make_work
+PROGS = cgi feeder show_shmem file_upload_handler validate_test make_work result_retry
all: $(PROGS)
@@ -81,8 +81,16 @@ MAKE_WORK_OBJS = \
RESULT_RETRY_OBJS = \
result_retry.o \
+ config.o \
../db/db_mysql.o \
- ../db/mysql_util.o
+ ../db/mysql_util.o \
+ ../lib/parse.o \
+ ../lib/md5_file.o \
+ ../lib/md5.o \
+ ../lib/crypt.o \
+ ../tools/backend_lib.o \
+ ../tools/process_result_template.o \
+ ../RSAEuro/source/rsaeuro.a
FCGI_OBJS = \
handle_request.fcgi.o \
@@ -137,6 +145,9 @@ validate_test: $(VALIDATE_OBJS)
make_work: $(MAKE_WORK_OBJS)
$(CC) $(MAKE_WORK_OBJS) $(MYSQL_LIBS) $(CLIBS) -o make_work
+result_retry: $(RESULT_RETRY_OBJS)
+ $(CC) $(RESULT_RETRY_OBJS) $(MYSQL_LIBS) $(CLIBS) -o result_retry
+
fcgi: $(FCGI_OBJS)
$(CC) $(FCGI_OBJS) $(MYSQL_LIBS) $(CLIBS) $(FCGI_LIBS) \
-o fcgi
diff --git a/sched/feeder.C b/sched/feeder.C
index 73b6c26f5c..8c69e270c7 100644
--- a/sched/feeder.C
+++ b/sched/feeder.C
@@ -116,7 +116,8 @@ void feeder_loop(SCHED_SHMEM* ssp) {
restarted_enum = false;
for (i=0; inwu_results; i++) {
if (!ssp->wu_results[i].present) {
- retval = db_result_enum_to_send(result, RESULTS_PER_ENUM);
+ result.state = RESULT_STATE_UNSENT;
+ retval = db_result_enum_state(result, RESULTS_PER_ENUM);
if (retval) {
// if we already restarted the enum on this pass,
@@ -130,7 +131,8 @@ void feeder_loop(SCHED_SHMEM* ssp) {
// restart the enumeration
//
restarted_enum = true;
- retval = db_result_enum_to_send(result, RESULTS_PER_ENUM);
+ result.state = RESULT_STATE_UNSENT;
+ retval = db_result_enum_state(result, RESULTS_PER_ENUM);
printf("feeder: restarting enumeration: %d\n", retval);
if (retval) {
printf("feeder: enumeration returned nothing\n");
diff --git a/sched/handle_request.C b/sched/handle_request.C
index 84412caa29..dd1d979739 100644
--- a/sched/handle_request.C
+++ b/sched/handle_request.C
@@ -73,9 +73,10 @@ int insert_after(char* buffer, char* after, char* text) {
}
// add elements in xml_doc:
-// WU name, and estimation of how many seconds it will take
+// WU name, app name,
+// and estimate of how many seconds it will take on this host
//
-int insert_wu_tags(WORKUNIT& wu, double seconds) {
+int insert_wu_tags(WORKUNIT& wu, double seconds, APP& app) {
char buf[256];
int retval;
@@ -85,7 +86,10 @@ int insert_wu_tags(WORKUNIT& wu, double seconds) {
);
retval = insert_after(wu.xml_doc, "\n", buf);
if (retval) return retval;
- sprintf(buf, " %s\n", wu.name);
+ sprintf(buf,
+ " %s\n %s\n",
+ wu.name, app.name
+ );
return insert_after(wu.xml_doc, "\n", buf);
}
@@ -116,7 +120,7 @@ int add_wu_to_reply(
// add time estimate to reply
//
wu2 = wu; // make copy since we're going to modify its XML field
- retval = insert_wu_tags(wu2, seconds_to_complete);
+ retval = insert_wu_tags(wu2, seconds_to_complete, *app);
if (retval) return retval;
reply.insert_workunit_unique(wu2);
return 0;
@@ -382,7 +386,7 @@ int send_work(
) {
int i, retval, nresults = 0, seconds_to_fill;
WORKUNIT wu;
- RESULT result;
+ RESULT result, result_copy;
#if 0
APP* app;
char prefix [256];
@@ -415,13 +419,21 @@ int send_work(
);
if (retval) continue;
- fprintf(stderr, "sending result name %s, id %d\n", result.name, result.id);
+ fprintf(stderr,
+ "sending result name %s, id %d\n",
+ result.name, result.id
+ );
+
+ // copy the result so we don't overwrite its XML fields
+ //
+ result_copy = result;
- retval = insert_name_tags(result, wu);
+ retval = insert_name_tags(result_copy, wu);
if (retval) {
fprintf(stderr, "send_work: can't insert name tags\n");
}
- reply.insert_result(result);
+ reply.insert_result(result_copy);
+
seconds_to_fill -= (int)estimate_duration(wu, reply.host);
result.state = RESULT_STATE_IN_PROGRESS;
diff --git a/sched/result_retry.C b/sched/result_retry.C
index 9c1c5799b0..f89dd91e0d 100644
--- a/sched/result_retry.C
+++ b/sched/result_retry.C
@@ -1,21 +1,27 @@
// result_retry - create new results to make up for lost ones
//
// result_retry
-// [ -dwu n ]
-// [ -dresult n ]
-// [ -nerror n ]
-// [ -ndet n ]
-// [ -nredundancy n ]
+// -app appname
+// [ -nerror n ] if get this many errors, bail on WU
+// [ -ndet n ] if get this results w/o consensus, bail
+// [ -nredundancy n ] try to get at least this many done results
+// [ -asynch ] be asynchronous
#include
+#include
#include
#include "db.h"
+#include "backend_lib.h"
+#include "config.h"
int max_errors = 999;
int max_done = 999;
-int nredundancy = 999;
+int nredundancy = 0;
int startup_time;
+CONFIG config;
+R_RSA_PRIVATE_KEY key;
+char app_name[256];
// The scheme for generating unique output filenames is as follows.
// If the original filename is of the form x__y,
@@ -31,62 +37,116 @@ void make_unique_name(char* name) {
char buf[256], *p;
static int seqno;
- sprintf(buf, "%d_%d", startup_time, seqno);
+ sprintf(buf, "%d_%d", startup_time, seqno++);
p = strstr(name, "__");
if (p) {
strcpy(p+2, buf);
} else {
+ strcat(name, "__");
strcat(name, buf);
}
}
-// convert a result's XML document to generate new output filenames
-// Look for ... elements and convert the name;
+// convert a result's XML document to generate new output filenames.
+// The input has the form
+//
+// xxx
+// ...
+//
+// ...
+//
+//
+// xxx
+// ...
+//
+// ...
+//
+//
+// Look for ... elements within
+// and make a unique name based on it;
// apply the same conversion to the element later on.
//
-int assign_new_names(char* in, char* out) {
- char *p = in, *n1, *n2;
- char name[256], buf[MAX_BLOB_SIZE];
+// TODO: this is ad-hoc. Would be nice to use some generic
+// XML parsing routines, or XSLT or something.
+//
+int assign_new_names(char* in) {
+ char *p = in, *n1, *n2, *r;
+ char name[256], newname[256], element[256], buf[MAX_BLOB_SIZE];
int len;
+ // notice where the is so we don't try to convert
+ // the result name
+ //
+ r = strstr(in, "");
+
while (1) {
n1 = strstr(p, "");
if (!n1) break;
+
+ if (n1 > r) break; // don't go past
+
n1 += strlen("");
n2 = strstr(p, "");
if (!n2) {
- fprintf(stderr, "malformed XML:\n%s", in);
+ fprintf(stderr, "assign_new_names(): malformed XML:\n%s", in);
return 1;
}
len = n2 - n1;
memcpy(name, n1, len);
name[len] = 0;
- make_unique_name(name);
+ strcpy(newname, name);
+ make_unique_name(newname);
strcpy(buf, n2);
- strcpy(n1, name);
+ strcpy(n1, newname);
strcat(n1, buf);
+
+ // replace the name in the element
+ //
+ sprintf(element, "%s", name);
+ n2 = strstr(n1, element);
+ if (!n2) {
+ fprintf(stderr, "assign_new_names(): no :\n%s", in);
+ return 1;
+ }
+ strcpy(buf, n2+strlen(element));
+ sprintf(element, "%s", newname);
+ strcpy(n2, element);
+ strcat(n2, buf);
p = n1;
}
return 0;
}
-void main_loop() {
+bool do_pass(APP& app) {
WORKUNIT wu;
RESULT result;
- int nerrors, ndone;
+ int nerrors, ndone, retval;
unsigned int i, n;
+ bool did_something = false;
wu.retry_check_time = time(0);
+ wu.appid = app.id;
// loop over WUs that are due to be checked
//
- while (db_workunit_enum_retry_check_time(wu)) {
+ while (!db_workunit_enum_retry_check_time(wu)) {
vector results;
+ did_something = true;
+
+ // if this WU has a canonical result, we're done
+ // (this normally doesn't happen since the retry check time
+ // is zeroed when canonical result found, but just in case).
+ //
+ if (wu.canonical_resultid) {
+ wu.retry_check_time = 0;
+ goto update_wu;
+ }
+
// enumerate all the results for the WU
//
result.workunitid = wu.id;
- while (db_result_enum_wuid(result)) {
+ while (!db_result_enum_wuid(result)) {
results.push_back(result);
}
@@ -100,8 +160,8 @@ void main_loop() {
if (result.state == RESULT_STATE_UNSENT) {
fprintf(stderr, "WU %s has unsent result\n", wu.name);
wu.state = WU_STATE_SEND_FAIL;
- db_workunit_update(wu);
- goto next_wu;
+ wu.retry_check_time = 0;
+ goto update_wu;
}
if (result.state == RESULT_STATE_ERROR) {
nerrors++;
@@ -116,38 +176,96 @@ void main_loop() {
if (nerrors > max_errors) {
fprintf(stderr, "WU %s has too many errors\n", wu.name);
wu.state = WU_STATE_TOO_MANY_ERRORS;
- db_workunit_update(wu);
- goto next_wu;
+ wu.retry_check_time = 0;
+ goto update_wu;
}
if (ndone > max_done) {
fprintf(stderr, "WU %s has too many answers\n", wu.name);
wu.state = WU_STATE_TOO_MANY_DONE;
- db_workunit_update(wu);
- goto next_wu;
+ wu.retry_check_time = 0;
+ goto update_wu;
}
// Generate new results if needed.
// Munge the XML of an existing result
// to create unique new output filenames.
//
- n = nredundancy - ndone;
- for (i=0; i ndone) {
+ n = nredundancy - ndone;
+ for (i=0; i
- 1sec
diff --git a/test/concat_wu b/test/concat_wu
index 2db68e077b..39f2005d54 100644
--- a/test/concat_wu
+++ b/test/concat_wu
@@ -9,7 +9,6 @@
- concat
in1
diff --git a/test/sah_result b/test/sah_result
index fb78b1d779..166a7ecd11 100644
--- a/test/sah_result
+++ b/test/sah_result
@@ -6,8 +6,8 @@
100000
-
+
outfile.sah
-
+
diff --git a/test/sah_wu b/test/sah_wu
index efea48a109..be2af74fb0 100644
--- a/test/sah_wu
+++ b/test/sah_wu
@@ -4,7 +4,6 @@
- setiathome-3.06
work_unit.sah
diff --git a/test/test.inc b/test/test.inc
index d742466229..0123f8b226 100644
--- a/test/test.inc
+++ b/test/test.inc
@@ -54,6 +54,7 @@ function db_query($query) {
function run_tool($cmd) {
$tool_dir = get_env_var("BOINC_SRC_DIR")."/tools/";
$cmd = $tool_dir.$cmd;
+ //echo $cmd;
PassThru($cmd);
}
@@ -205,6 +206,7 @@ class Project {
PassThru("cp $source_dir/sched/file_upload_handler $this->project_dir/cgi/");
PassThru("cp $source_dir/sched/make_work $this->project_dir/cgi/");
PassThru("cp $source_dir/sched/feeder $this->project_dir/cgi/");
+ PassThru("cp $source_dir/sched/result_retry $this->project_dir/cgi/");
PassThru("cp $source_dir/sched/validate_test $this->project_dir/cgi/");
$f = fopen("$this->project_dir/cgi/config.xml", "w");
fputs($f, "\n");
@@ -435,6 +437,7 @@ class Work {
var $rsc_iops;
var $rsc_fpops;
var $rsc_disk;
+ var $delay_bound;
function Work($app) {
$this->app = $app;
@@ -442,6 +445,7 @@ class Work {
$this->rcs_iops = 180000000000;
$this->rcs_fpops = 100000000000;
$this->rcs_disk = 1000000;
+ $this->delay_bound = 1000;
}
function install($project) {
@@ -450,7 +454,7 @@ class Work {
$x = $this->input_files[$i];
PassThru("cp $x $project->project_dir/download");
}
- $cmd = "create_work -db_name $project->db_name -download_dir $project->project_dir/download -upload_url $project->upload_url -download_url $project->download_url/ -keyfile $project->key_dir/upload_private -appname $app->name -rsc_iops $this->rcs_iops -rsc_fpops $this->rsc_fpops -rsc_disk $this->rsc_disk -wu_template $this->wu_template -result_template $this->result_template -nresults $this->nresults -wu_name $this->wu_template";
+ $cmd = "create_work -db_name $project->db_name -download_dir $project->project_dir/download -upload_url $project->upload_url -download_url $project->download_url/ -keyfile $project->key_dir/upload_private -appname $app->name -rsc_iops $this->rcs_iops -rsc_fpops $this->rsc_fpops -rsc_disk $this->rsc_disk -wu_template $this->wu_template -result_template $this->result_template -nresults $this->nresults -wu_name $this->wu_template -delay_bound $this->delay_bound";
for ($i=0; $iinput_files); $i++) {
$x = $this->input_files[$i];
$cmd = $cmd." ".$x;
diff --git a/test/test_uc.php b/test/test_uc.php
index 7665758b4a..0357ade844 100644
--- a/test/test_uc.php
+++ b/test/test_uc.php
@@ -26,6 +26,7 @@
$work->wu_template = "uc_wu";
$work->result_template = "uc_result";
$work->nresults = 2;
+ $work->delay_bound = 10;
array_push($work->input_files, "input");
$work->install($project);
diff --git a/test/uc_wu b/test/uc_wu
index 73e050eca7..6daf5e0145 100644
--- a/test/uc_wu
+++ b/test/uc_wu
@@ -4,7 +4,6 @@
- upper_case
in
diff --git a/test/ucs_wu b/test/ucs_wu
index e40ebe575e..63a0a1f20c 100644
--- a/test/ucs_wu
+++ b/test/ucs_wu
@@ -4,7 +4,6 @@
- upper_case
in
diff --git a/tools/backend_lib.C b/tools/backend_lib.C
index 4cefe31b49..cea226bbbf 100644
--- a/tools/backend_lib.C
+++ b/tools/backend_lib.C
@@ -140,6 +140,29 @@ static int process_wu_template(
return 0;
}
+// Set the time-varying fields of a result to their initial state.
+// This is used to create clones of existing results,
+// so set only the time-varying fields
+//
+void initialize_result(RESULT& result, WORKUNIT& wu) {
+ result.id = 0;
+ result.create_time = time(0);
+ result.workunitid = wu.id;
+ result.state = RESULT_STATE_UNSENT;
+ result.hostid = 0;
+ result.report_deadline = time(0) + wu.delay_bound;
+ result.sent_time = 0;
+ result.received_time = 0;
+ result.exit_status = 0;
+ result.cpu_time = 0;
+ strcpy(result.xml_doc_out, "");
+ strcpy(result.stderr_out, "");
+ result.project_state = 0;
+ result.validate_state = VALIDATE_STATE_INITIAL;
+ result.claimed_credit = 0;
+ result.granted_credit = 0;
+}
+
// Create a new result for the given WU.
//
int create_result(
@@ -153,18 +176,13 @@ int create_result(
int retval;
memset(&r, 0, sizeof(r));
- r.report_deadline = time(0) + 1000;
- // TODO: pass this in
- r.create_time = time(0);
- r.workunitid = wu.id;
- r.state = RESULT_STATE_UNSENT;
- r.validate_state = VALIDATE_STATE_INITIAL;
+ initialize_result(r, wu);
sprintf(r.name, "%s_%s", wu.name, result_name_suffix);
sprintf(base_outfile_name, "%s_", r.name);
strcpy(result_template_copy, result_template);
retval = process_result_template(
- result_template,
+ result_template_copy,
key,
base_outfile_name,
upload_url, download_url
@@ -216,9 +234,13 @@ int create_work(
}
for (i=0; i