diff --git a/checkin_notes b/checkin_notes
index fc85246a0e..3676618b34 100755
--- a/checkin_notes
+++ b/checkin_notes
@@ -2263,3 +2263,16 @@ Eric October 29, 2002
mac_carbon_dsp.c,h
mac_carbon_gl.c,h
+David Nov 5 2002
+ - Continued work on result retry mechanism:
+ - new fields "retry_check_time" and "state" in workunit table
+ - logic for creating new output filenames
+ - needs a bit more work
+ db/
+ db.h
+ db_mysql.C
+ schema.sql
+ sched/
+ Makefile.in
+ make_work.C
+ result_retry.C
diff --git a/db/db.h b/db/db.h
index 044a8835de..29bf5f9077 100644
--- a/db/db.h
+++ b/db/db.h
@@ -188,6 +188,13 @@ struct HOST {
int parse_net_stats(FILE*);
};
+#define WU_STATE_SEND_FAIL 1
+ // failed to send results for this WU
+#define WU_STATE_TOO_MANY_ERRORS 2
+ // too many errors; may have bug
+#define WU_STATE_TOO_MANY_DONE 3
+ // too many results without consensus; may be nondeterministic
+
struct WORKUNIT {
int id;
unsigned int create_time; // time of record creation
@@ -206,6 +213,8 @@ struct WORKUNIT {
// VALIDATE_STATE_NEED_CHECK state
int canonical_resultid; // ID of canonical result, or zero
double canonical_credit; // credit that all correct results get
+ double retry_check_time; // when to check for result retry
+ int state; // see above
// the following not used in the DB
char app_name[256];
@@ -296,6 +305,7 @@ extern int db_workunit_update(WORKUNIT& p);
extern int db_workunit_lookup_name(WORKUNIT&);
//extern int db_workunit_enum_dynamic_to_send(WORKUNIT&, int);
extern int db_workunit_enum_app_need_validate(WORKUNIT&);
+extern int db_workunit_enum_retry_check_time(WORKUNIT&);
extern int db_result_new(RESULT& p);
extern int db_result(int id, RESULT&);
diff --git a/db/db_mysql.C b/db/db_mysql.C
index c7ec071a18..ea2791d88a 100644
--- a/db/db_mysql.C
+++ b/db/db_mysql.C
@@ -194,12 +194,14 @@ void struct_to_str(void* vp, char* q, int type) {
"has_successor=%d, name='%s', xml_doc='%s', batch=%d, "
"rsc_fpops=%f, rsc_iops=%f, rsc_memory=%f, rsc_disk=%f, "
"need_validate=%d, "
- "canonical_resultid=%d, canonical_credit=%f",
+ "canonical_resultid=%d, canonical_credit=%f, "
+ "retry_check_time=%f, state=%d",
wup->id, wup->create_time, wup->appid, wup->previous_wuid,
wup->has_successor?1:0, wup->name, wup->xml_doc, wup->batch,
wup->rsc_fpops, wup->rsc_iops, wup->rsc_memory, wup->rsc_disk,
wup->need_validate,
- wup->canonical_resultid, wup->canonical_credit
+ wup->canonical_resultid, wup->canonical_credit,
+ wup->retry_check_time, wup->state
);
break;
case TYPE_RESULT:
@@ -360,6 +362,8 @@ void row_to_struct(MYSQL_ROW& r, void* vp, int type) {
wup->need_validate = atoi(r[i++]);
wup->canonical_resultid = atoi(r[i++]);
wup->canonical_credit = atof(r[i++]);
+ wup->retry_check_time = atof(r[i++]);
+ wup->state = atoi(r[i++]);
break;
case TYPE_RESULT:
rp = (RESULT*)vp;
@@ -574,6 +578,16 @@ int db_workunit_enum_app_need_validate(WORKUNIT& p) {
return db_enum(e, &p, TYPE_WORKUNIT, buf);
}
+int db_workunit_enum_retry_check_time(WORKUNIT& p) {
+ static ENUM e;
+ char buf[256];
+
+ if (!e.active) {
+ sprintf(buf, "where retry_check_time > 0 and retry_check_time < %f", p.retry_check_time);
+ }
+ return db_enum(e, &p, TYPE_WORKUNIT, buf);
+}
+
////////// RESULT /////////
diff --git a/db/schema.sql b/db/schema.sql
index a2e83d889a..06440a3e54 100644
--- a/db/schema.sql
+++ b/db/schema.sql
@@ -128,6 +128,8 @@ create table workunit (
need_validate smallint not null,
canonical_resultid integer not null,
canonical_credit double not null,
+ retry_check_time double not null,
+ state integer not null,
primary key (id)
);
diff --git a/doc/backend.html b/doc/backend.html
index 64df7e42f9..864205b4ec 100644
--- a/doc/backend.html
+++ b/doc/backend.html
@@ -4,7 +4,7 @@
The back end of a BOINC project consists
of a number of components that together are responsible
-for generating work and absorbing the results.
+for generating work and handling results.
Some of these components are supplied by BOINC;
other parts are project- or application-specific:
@@ -14,47 +14,68 @@ other parts are project- or application-specific:
BOINC-supplied part |
project-supplied part |
-
-Work generator: generates work units and their input files,
-and generates one or more results per work unit.
+ |
+Work generator: generates work units, work sequences, results,
+and the corresponding input files.
|
-
+ |
Functions and programs that handle the details of
-creating workunit and result database records.
+creating workunit, work sequence, and result database records.
|
-
+ |
Programs or scripts that generate input files,
install them on data servers,
and call the BOINC functions.
|
-Result retry generator:
-generates additional results if some of those initially sent are lost. |
-A program, result_retry, that does the job. |
-Some parameters used by result_retry. |
+Result retry generator:
+generates additional results if some of those initially sent are lost
+not completed, or completed erroneously. |
+A program, result_retry, that does the job. |
+Some parameters used by result_retry. |
-Result validation and accounting:
+ | Result validation and accounting:
compare redundant results; select a canonical result
representing the correct output,
and a canonical credit granted to users and hosts
that return the correct output. |
-A program, validate, that contains the
+ | A program, validate, that contains the
basic logic for validation. |
-An application-specific function, linked with validate,
+ | An application-specific function, linked with validate,
that compares sets of redundant results. |
-Result processing: parse output files of
-completed results, and take appropriate action
-(record results in a database, and/or generate more work) |
-Function for enumerating unprocessed results. |
-The rest. |
+Work sequence relocater:
+detects work sequences whose hosts have failed,
+and relocates them to other hosts. |
+A program seq_relocate |
+Some parameters used by seq_relocate |
-Garbage collector: delete input and output files
+ | Work sequence validation and accounting:
+Similar to result validation, but for work sequences.
+ |
+A program seq_validate |
+An application-specific function,
+linked with seq_validate, that compares sets of redundant results. |
+
+
+Result processing: parse output files of
+completed results, and take appropriate action
+(record results in a database, and/or generate more work) |
+Function for enumerating unprocessed results.
+Functions and programs that handle the details of
+creating workunit, work sequence, and result database records.
+ |
+
+The rest.
+ |
+
+
+Garbage collector: delete input and output files
when they are no longer needed. |
-A program, garbage_collect, that does the job. |
-None. |
+A program, garbage_collect, that does the job. |
+None. |
diff --git a/sched/Makefile.in b/sched/Makefile.in
index cd71d616eb..07aa60d2f8 100644
--- a/sched/Makefile.in
+++ b/sched/Makefile.in
@@ -79,6 +79,11 @@ MAKE_WORK_OBJS = \
../lib/crypt.o \
../RSAEuro/source/rsaeuro.a
+RESULT_RETRY_OBJS = \
+ result_retry.o \
+ ../db/db_mysql.o \
+ ../db/mysql_util.o
+
FCGI_OBJS = \
handle_request.fcgi.o \
main.fcgi.o \
diff --git a/sched/make_work.C b/sched/make_work.C
index 378b290eb6..750b171738 100644
--- a/sched/make_work.C
+++ b/sched/make_work.C
@@ -6,7 +6,8 @@
// [ -redundancy n ]
// [ -cushion n ]
//
-// Create WU and result records as needed to maintain a pool of work.
+// Create WU and result records as needed to maintain a pool of work
+// (for testing purposes).
// Makes a new WU for every "redundancy" results.
// Clones the WU of the given name.
//
diff --git a/sched/result_retry.C b/sched/result_retry.C
index 243a2fe1ce..9c1c5799b0 100644
--- a/sched/result_retry.C
+++ b/sched/result_retry.C
@@ -7,29 +7,93 @@
// [ -ndet n ]
// [ -nredundancy n ]
+#include
+#include
+
+#include "db.h"
+
+int max_errors = 999;
+int max_done = 999;
+int nredundancy = 999;
+int startup_time;
+
+// The scheme for generating unique output filenames is as follows.
+// If the original filename is of the form x__y,
+// then y is replaced with a string of the form time_seqno,
+// where "time" is when this program started up.
+// NOTE: if you ever need to start up multiple copies of this,
+// you'll need to add a PID in there somewhere.
+//
+// If the original filename doesn't have __, add a string
+// of the form __time_seqno
+
+void make_unique_name(char* name) {
+ char buf[256], *p;
+ static int seqno;
+
+ sprintf(buf, "%d_%d", startup_time, seqno);
+ p = strstr(name, "__");
+ if (p) {
+ strcpy(p+2, buf);
+ } else {
+ strcat(name, buf);
+ }
+}
+
+// convert a result's XML document to generate new output filenames
+// Look for ... elements and convert the name;
+// apply the same conversion to the element later on.
+//
+int assign_new_names(char* in, char* out) {
+ char *p = in, *n1, *n2;
+ char name[256], buf[MAX_BLOB_SIZE];
+ int len;
+
+ while (1) {
+ n1 = strstr(p, "");
+ if (!n1) break;
+ n1 += strlen("");
+ n2 = strstr(p, "");
+ if (!n2) {
+ fprintf(stderr, "malformed XML:\n%s", in);
+ return 1;
+ }
+ len = n2 - n1;
+ memcpy(name, n1, len);
+ name[len] = 0;
+ make_unique_name(name);
+ strcpy(buf, n2);
+ strcpy(n1, name);
+ strcat(n1, buf);
+ p = n1;
+ }
+ return 0;
+}
+
void main_loop() {
WORKUNIT wu;
RESULT result;
int nerrors, ndone;
+ unsigned int i, n;
wu.retry_check_time = time(0);
// loop over WUs that are due to be checked
//
- while (db_workunit_enum_check_time(wu)) {
+ while (db_workunit_enum_retry_check_time(wu)) {
vector results;
// enumerate all the results for the WU
//
result.workunitid = wu.id;
- while (db_result_enum_workunitid(result)) {
+ while (db_result_enum_wuid(result)) {
results.push_back(result);
}
nerrors = 0;
ndone = 0;
for (i=0; i max_done) {
fprintf(stderr, "WU %s has too many answers\n", wu.name);
wu.state = WU_STATE_TOO_MANY_DONE;
db_workunit_update(wu);
- go next_wu;
+ goto next_wu;
}
- // generate new results if needed
+ // Generate new results if needed.
+ // Munge the XML of an existing result
+ // to create unique new output filenames.
//
n = nredundancy - ndone;
for (i=0; i