From 783239db37dd7f980c7fdc8af4a088e83cecc8ea Mon Sep 17 00:00:00 2001 From: David Anderson Date: Thu, 7 Nov 2002 19:31:34 +0000 Subject: [PATCH] result retry svn path=/trunk/boinc/; revision=584 --- checkin_notes | 42 +++++++ client/client_types.C | 2 +- db/constraints.sql | 6 +- db/db.h | 15 +-- db/db_mysql.C | 24 ++-- db/schema.sql | 3 +- doc/result.html | 64 ++++------- doc/tools_work.html | 81 +++++++------ doc/work.html | 20 ++-- html/ops/db.inc | 32 +++++- sched/Makefile.in | 15 ++- sched/feeder.C | 6 +- sched/handle_request.C | 28 +++-- sched/result_retry.C | 194 +++++++++++++++++++++++++------- test/1sec_wu | 1 - test/concat_wu | 1 - test/sah_result | 4 +- test/sah_wu | 1 - test/test.inc | 6 +- test/test_uc.php | 1 + test/uc_wu | 1 - test/ucs_wu | 1 - tools/backend_lib.C | 38 +++++-- tools/backend_lib.h | 4 + tools/create_work.C | 22 +++- tools/process_result_template.C | 9 +- 26 files changed, 424 insertions(+), 197 deletions(-) diff --git a/checkin_notes b/checkin_notes index 3676618b34..589bac4b7c 100755 --- a/checkin_notes +++ b/checkin_notes @@ -2276,3 +2276,45 @@ David Nov 5 2002 Makefile.in make_work.C result_retry.C + +David Nov 7 2002 + - Finished and did basic testing of result retry mechanism. + It now correctly generates new results with unique names, + unique filenames, and upload signatures. + TODO: make a test script + - added DB field workunit.delay_bound: + determines deadlines of result, determines retry check period + Mandatory argument to create_work. + - Added DB indices for all enumeration functions + + client/ + client_types.C + db/ + constraints.sql + db.h + db_mysql.C + schema.sql + doc/ + result.html + tools_work.html + work.html + html_ops/ + db.inc + sched/ + Makefile.in + feeder.C + handle_request.C + result_retry.C + test/ + 1sec_wu + concat_wu + sah_result + sah_wu + test.inc + test_uc.php + uc_wu + ucs_wu + tools/ + backend_lib.Ch + create_work.C + process_result_template.C diff --git a/client/client_types.C b/client/client_types.C index c8ba0122af..62343d9422 100644 --- a/client/client_types.C +++ b/client/client_types.C @@ -368,7 +368,7 @@ int FILE_INFO::write(FILE* out, bool to_server) { pers_file_xfer->write(out); } if (!to_server) { - if (signed_xml) { + if (signed_xml && xml_signature) { fprintf(out, "\n%s\n", signed_xml); } if (xml_signature) { diff --git a/db/constraints.sql b/db/constraints.sql index 5d1f5601b1..6b43c35dee 100644 --- a/db/constraints.sql +++ b/db/constraints.sql @@ -11,14 +11,18 @@ alter table app_version alter table user add unique(email_addr); +alter table user + add index ind_tid (teamid); alter table team add unique(name); alter table workunit add unique(name); +create index wu_val on workunit(appid, need_validate); +create index wu_retry on workunit(appid, retry_check_time); alter table result add unique(name); - +create index res_wuid on result(workunitid); create index ind_res_st on result(state); diff --git a/db/db.h b/db/db.h index 29bf5f9077..fa1cde7f6b 100644 --- a/db/db.h +++ b/db/db.h @@ -197,12 +197,9 @@ struct HOST { struct WORKUNIT { int id; - unsigned int create_time; // time of record creation - unsigned int check_time; // when to check for result retury, or zero - int appid; // ID of APP record tied to this workunit - int previous_wuid; - bool has_successor; - char name[256]; // Name of the workunit + unsigned int create_time; + int appid; // associated app + char name[256]; char xml_doc[MAX_BLOB_SIZE]; int batch; double rsc_fpops; // estimated # of FP operations @@ -214,7 +211,8 @@ struct WORKUNIT { int canonical_resultid; // ID of canonical result, or zero double canonical_credit; // credit that all correct results get double retry_check_time; // when to check for result retry - int state; // see above + int state; // see values above + int delay_bound; // determines result deadline, retry check time // the following not used in the DB char app_name[256]; @@ -303,7 +301,6 @@ extern int db_workunit_new(WORKUNIT& p); extern int db_workunit(int id, WORKUNIT&); extern int db_workunit_update(WORKUNIT& p); extern int db_workunit_lookup_name(WORKUNIT&); -//extern int db_workunit_enum_dynamic_to_send(WORKUNIT&, int); extern int db_workunit_enum_app_need_validate(WORKUNIT&); extern int db_workunit_enum_retry_check_time(WORKUNIT&); @@ -311,7 +308,7 @@ extern int db_result_new(RESULT& p); extern int db_result(int id, RESULT&); extern int db_result_update(RESULT& p); extern int db_result_lookup_name(RESULT& p); -extern int db_result_enum_to_send(RESULT&, int); +extern int db_result_enum_state(RESULT&, int); extern int db_result_enum_wuid(RESULT&); extern int db_result_count_state(int state, int&); diff --git a/db/db_mysql.C b/db/db_mysql.C index ea2791d88a..0c82429ed4 100644 --- a/db/db_mysql.C +++ b/db/db_mysql.C @@ -190,18 +190,18 @@ void struct_to_str(void* vp, char* q, int type) { case TYPE_WORKUNIT: wup = (WORKUNIT*)vp; sprintf(q, - "id=%d, create_time=%d, appid=%d, previous_wuid=%d, " - "has_successor=%d, name='%s', xml_doc='%s', batch=%d, " + "id=%d, create_time=%d, appid=%d, " + "name='%s', xml_doc='%s', batch=%d, " "rsc_fpops=%f, rsc_iops=%f, rsc_memory=%f, rsc_disk=%f, " "need_validate=%d, " "canonical_resultid=%d, canonical_credit=%f, " - "retry_check_time=%f, state=%d", - wup->id, wup->create_time, wup->appid, wup->previous_wuid, - wup->has_successor?1:0, wup->name, wup->xml_doc, wup->batch, + "retry_check_time=%f, delay_bound=%d, state=%d", + wup->id, wup->create_time, wup->appid, + wup->name, wup->xml_doc, wup->batch, wup->rsc_fpops, wup->rsc_iops, wup->rsc_memory, wup->rsc_disk, wup->need_validate, wup->canonical_resultid, wup->canonical_credit, - wup->retry_check_time, wup->state + wup->retry_check_time, wup->delay_bound, wup->state ); break; case TYPE_RESULT: @@ -350,8 +350,6 @@ void row_to_struct(MYSQL_ROW& r, void* vp, int type) { wup->id = atoi(r[i++]); wup->create_time = atoi(r[i++]); wup->appid = atoi(r[i++]); - wup->previous_wuid = atoi(r[i++]); - wup->has_successor = (atoi(r[i++])!=0); strcpy2(wup->name, r[i++]); strcpy2(wup->xml_doc, r[i++]); wup->batch = atoi(r[i++]); @@ -363,6 +361,7 @@ void row_to_struct(MYSQL_ROW& r, void* vp, int type) { wup->canonical_resultid = atoi(r[i++]); wup->canonical_credit = atof(r[i++]); wup->retry_check_time = atof(r[i++]); + wup->delay_bound = atoi(r[i++]); wup->state = atoi(r[i++]); break; case TYPE_RESULT: @@ -583,7 +582,10 @@ int db_workunit_enum_retry_check_time(WORKUNIT& p) { char buf[256]; if (!e.active) { - sprintf(buf, "where retry_check_time > 0 and retry_check_time < %f", p.retry_check_time); + sprintf(buf, + "where appid=%d and retry_check_time > 0 and retry_check_time < %f", + p.appid, p.retry_check_time + ); } return db_enum(e, &p, TYPE_WORKUNIT, buf); } @@ -610,11 +612,11 @@ int db_result_lookup_name(RESULT& p) { return db_lookup(&p, TYPE_RESULT, buf); } -int db_result_enum_to_send(RESULT& p, int limit) { +int db_result_enum_state(RESULT& p, int limit) { static ENUM e; char buf[256]; - if (!e.active) sprintf(buf, "where state=%d", RESULT_STATE_UNSENT); + if (!e.active) sprintf(buf, "where state=%d", p.state); return db_enum(e, &p, TYPE_RESULT, buf, limit); } diff --git a/db/schema.sql b/db/schema.sql index 06440a3e54..8027e69c3e 100644 --- a/db/schema.sql +++ b/db/schema.sql @@ -116,8 +116,6 @@ create table workunit ( id integer not null auto_increment, create_time integer not null, appid integer not null, - previous_wuid integer not null, - has_successor smallint not null, name varchar(254) not null, xml_doc blob, batch integer not null, @@ -129,6 +127,7 @@ create table workunit ( canonical_resultid integer not null, canonical_credit double not null, retry_check_time double not null, + delay_bound integer not null, state integer not null, primary key (id) ); diff --git a/doc/result.html b/doc/result.html index aa2a70bf22..91c768311c 100644 --- a/doc/result.html +++ b/doc/result.html @@ -9,20 +9,14 @@ The attributes of a result include:

+The following attributes are defined after the result is completed: +

+

Results are normally created using the create_work utility.

The XML document listing the output files has the form:

-<file_info>...</file_info>
+<file_info>...</file_info>
 [ ... ]
-<result>
-    <name>foobar</name>
-    <wu_name>blah</wu_name>
-    <exit_status>blah</exit_status>
-    <file_ref>...</file_ref>
+<result>
+    <name>foobar</name>
+    <wu_name>blah</wu_name>
+    <exit_status>blah</exit_status>
+    <file_ref>...</file_ref>
     [ ... ]
-</result>
+</result>
 
The components are:

The XML document describing the sizes and checksums of the output -files is just a list of <file_info> elements, with the +files is just a list of <file_info> elements, with the nbytes and md5_cksum fields present. The project back end must parse this field to find the locations and checksums of output files. -

-Several results may be associated with a single workunit. -Results -may be generated in either of two ways (selected as part of the application): -

diff --git a/doc/tools_work.html b/doc/tools_work.html index db888ec706..5a83547919 100644 --- a/doc/tools_work.html +++ b/doc/tools_work.html @@ -1,77 +1,72 @@ -Generating work +Generating work -

Generating work

+

Generating work

Workunits and results can be created using either a utility program -or a C++ function. +or a C++ function.

-The program +The utility program is

 create_work
-    -appname name
-    -wu_name name
-    -wu_template file
-    -result_template file
-    -nresults n
-    infile_1 ... infile_m
-
-

-appname specifies the name of the application -

-wu_name gives the name of the workunit. + -appname name // application name + -wu_name name // workunit name + -wu_template filename // WU template filename + -result_template filename // result template filename + -nresults n // # of results to create + -db_name x // database name + -db_passwd x // database password + -upload_url x // URL for output file upload + -download_url x // base URL for input file download + -download_dir x // where to move input files + -rsc_fpops x // est. # floating-point ops + -rsc_iops x // est. # integer ops + -rsc_memory x // est. RAM working set size, bytes + -rsc_disk x // est. disk space required + -keyfile x // path of upload private key + -delay_bound x // delay bound for result completion + infile_1 ... infile_m // input files +

--wu_template gives the filename of a template for the -workunit XML document. -This template is macro-substituted as follows: +The workunit template file is macro-substituted as follows:

--result_template gives the filename of a template for the -result XML document. -This template is macro-substituted as follows: +The result file template is macro-substituted as follows:

--nresults gives the number of results to be created. -

-infile_1 ... infile_m are the names of the input files.


-The C++ library (backend_lib.C,h) provides the function: +The C++ library (backend_lib.C,h) provides the function:

 int create_work(
-    int appid,
-    char* wu_name,
+    WORKUNIT&,
     char* wu_template,
-    char* result_template,
+    char* result_template_filename,
     int nresults,
     char* infile_dir,
     char** infiles,
     int ninfiles
+    R_RSA_PRIVATE_KEY& key,
+    char* upload_url,
+    char* download_url
 );
-
+

This creates a workunit and one or more results. -The arguments are -the same as for the utility program, except that the application is -identified by its database ID. -

-"infile_dir" is the path of the directory containing the input -files. -

-"infiles" is an array of names of the input files, of length "ninfiles". +The arguments are similar to those of the utility program; +some of the information is passed in the WORKUNIT structure. diff --git a/doc/work.html b/doc/work.html index e33ce2ccd7..19af2c64df 100644 --- a/doc/work.html +++ b/doc/work.html @@ -7,15 +7,17 @@ Workunits are maintained in the workunit table in the BOINC DB. The attributes of a workunit include:

The inputs to a workunit are described by an XML document of the form @@ -23,9 +25,7 @@ The inputs to a workunit are described by an XML document of the form [ <file_info>...</file_info> ] [ ... ] <workunit> - <name>foobar</name> <app_name>blah</app_name> - [ <version_num>1</version_num> ] [ <command_line>-flags xyz</command_line> ] [ <env_vars>name=val&name=val</env_vars> ] [ <file_ref>...</file_ref> ] @@ -34,10 +34,6 @@ The inputs to a workunit are described by an XML document of the form The components are:

A workunit is associated with an application, not with a particular -version or set of versions???. +version or range of versions. If the format of your input data changes in a way that is incompatible with older versions, you must create a new application. diff --git a/html/ops/db.inc b/html/ops/db.inc index 7de6bfda6e..0185eff4aa 100644 --- a/html/ops/db.inc +++ b/html/ops/db.inc @@ -113,6 +113,16 @@ function show_host($host) { } +function retry_state_str($s) { + switch($s) { + case 0: return "Default"; + case 1: return "Send failed"; + case 2: return "Too many errors (may have bug)"; + case 3: return "Too many done (may be nondeterministic)"; + } + return "unknown"; +} + function show_workunit($wu,$show_xml_doc) { start_table(); row("Created", time_str($wu->create_time)); @@ -129,7 +139,10 @@ function show_workunit($wu,$show_xml_doc) { row("Need validate?", $wu->need_validate?"yes":"no"); row("Canonical resultid", $wu->canonical_resultid); row("Canonical credit", $wu->canonical_credit); - row("","id>Show Associated Results"); + row("Retry check time", time_str($wu->retry_check_time)); + row("Retry state", retry_state_str($wu->state)); + row("Delay bound", $wu->delay_bound); + row("","id>Show associated results"); end_table(); echo "

"; } @@ -143,6 +156,17 @@ function res_state_string($s) { case 5: return "Timeout"; case 6: return "Error"; } + return "unknown"; +} + +function validate_state_str($s) { + switch($s) { + case 0: return "Initial"; + case 1: return "Need check"; + case 2: return "Valid"; + case 3: return "Invalid"; + } + return "unknown"; } function show_result($result,$show_xml_docs,$show_stderr,$show_times) { @@ -155,6 +179,7 @@ function show_result($result,$show_xml_docs,$show_stderr,$show_times) { row("Name", $result->name); row("Workunit", "workunitid>" . wu_name_by_id($result->workunitid) . "" ); row("state", res_state_string($result->state)); + row("Deadline", time_str($result->report_deadline)); row("Host ID", "hostid>" . host_name_by_id($result->hostid) . ""); row("Exit Status", $result->exit_status); row("CPU time", $result->cpu_time); @@ -166,7 +191,10 @@ function show_result($result,$show_xml_docs,$show_stderr,$show_times) { row("stderr out", "

".htmlspecialchars($result->stderr_out)."
"); } row("batch", $result->batch); - row("validated", $result->validated); + row("project state", $result->project_state); + row("Validate state", validate_state_str($result->validate_state)); + row("claimed credit", $result->claimed_credit); + row("granted credit", $result->granted_credit); end_table(); echo "

"; } diff --git a/sched/Makefile.in b/sched/Makefile.in index 07aa60d2f8..2e0e5d0446 100644 --- a/sched/Makefile.in +++ b/sched/Makefile.in @@ -16,7 +16,7 @@ CC = g++ $(CFLAGS) CLIBS = @LIBS@ -PROGS = cgi feeder show_shmem file_upload_handler validate_test make_work +PROGS = cgi feeder show_shmem file_upload_handler validate_test make_work result_retry all: $(PROGS) @@ -81,8 +81,16 @@ MAKE_WORK_OBJS = \ RESULT_RETRY_OBJS = \ result_retry.o \ + config.o \ ../db/db_mysql.o \ - ../db/mysql_util.o + ../db/mysql_util.o \ + ../lib/parse.o \ + ../lib/md5_file.o \ + ../lib/md5.o \ + ../lib/crypt.o \ + ../tools/backend_lib.o \ + ../tools/process_result_template.o \ + ../RSAEuro/source/rsaeuro.a FCGI_OBJS = \ handle_request.fcgi.o \ @@ -137,6 +145,9 @@ validate_test: $(VALIDATE_OBJS) make_work: $(MAKE_WORK_OBJS) $(CC) $(MAKE_WORK_OBJS) $(MYSQL_LIBS) $(CLIBS) -o make_work +result_retry: $(RESULT_RETRY_OBJS) + $(CC) $(RESULT_RETRY_OBJS) $(MYSQL_LIBS) $(CLIBS) -o result_retry + fcgi: $(FCGI_OBJS) $(CC) $(FCGI_OBJS) $(MYSQL_LIBS) $(CLIBS) $(FCGI_LIBS) \ -o fcgi diff --git a/sched/feeder.C b/sched/feeder.C index 73b6c26f5c..8c69e270c7 100644 --- a/sched/feeder.C +++ b/sched/feeder.C @@ -116,7 +116,8 @@ void feeder_loop(SCHED_SHMEM* ssp) { restarted_enum = false; for (i=0; inwu_results; i++) { if (!ssp->wu_results[i].present) { - retval = db_result_enum_to_send(result, RESULTS_PER_ENUM); + result.state = RESULT_STATE_UNSENT; + retval = db_result_enum_state(result, RESULTS_PER_ENUM); if (retval) { // if we already restarted the enum on this pass, @@ -130,7 +131,8 @@ void feeder_loop(SCHED_SHMEM* ssp) { // restart the enumeration // restarted_enum = true; - retval = db_result_enum_to_send(result, RESULTS_PER_ENUM); + result.state = RESULT_STATE_UNSENT; + retval = db_result_enum_state(result, RESULTS_PER_ENUM); printf("feeder: restarting enumeration: %d\n", retval); if (retval) { printf("feeder: enumeration returned nothing\n"); diff --git a/sched/handle_request.C b/sched/handle_request.C index 84412caa29..dd1d979739 100644 --- a/sched/handle_request.C +++ b/sched/handle_request.C @@ -73,9 +73,10 @@ int insert_after(char* buffer, char* after, char* text) { } // add elements in xml_doc: -// WU name, and estimation of how many seconds it will take +// WU name, app name, +// and estimate of how many seconds it will take on this host // -int insert_wu_tags(WORKUNIT& wu, double seconds) { +int insert_wu_tags(WORKUNIT& wu, double seconds, APP& app) { char buf[256]; int retval; @@ -85,7 +86,10 @@ int insert_wu_tags(WORKUNIT& wu, double seconds) { ); retval = insert_after(wu.xml_doc, "\n", buf); if (retval) return retval; - sprintf(buf, " %s\n", wu.name); + sprintf(buf, + " %s\n %s\n", + wu.name, app.name + ); return insert_after(wu.xml_doc, "\n", buf); } @@ -116,7 +120,7 @@ int add_wu_to_reply( // add time estimate to reply // wu2 = wu; // make copy since we're going to modify its XML field - retval = insert_wu_tags(wu2, seconds_to_complete); + retval = insert_wu_tags(wu2, seconds_to_complete, *app); if (retval) return retval; reply.insert_workunit_unique(wu2); return 0; @@ -382,7 +386,7 @@ int send_work( ) { int i, retval, nresults = 0, seconds_to_fill; WORKUNIT wu; - RESULT result; + RESULT result, result_copy; #if 0 APP* app; char prefix [256]; @@ -415,13 +419,21 @@ int send_work( ); if (retval) continue; - fprintf(stderr, "sending result name %s, id %d\n", result.name, result.id); + fprintf(stderr, + "sending result name %s, id %d\n", + result.name, result.id + ); + + // copy the result so we don't overwrite its XML fields + // + result_copy = result; - retval = insert_name_tags(result, wu); + retval = insert_name_tags(result_copy, wu); if (retval) { fprintf(stderr, "send_work: can't insert name tags\n"); } - reply.insert_result(result); + reply.insert_result(result_copy); + seconds_to_fill -= (int)estimate_duration(wu, reply.host); result.state = RESULT_STATE_IN_PROGRESS; diff --git a/sched/result_retry.C b/sched/result_retry.C index 9c1c5799b0..f89dd91e0d 100644 --- a/sched/result_retry.C +++ b/sched/result_retry.C @@ -1,21 +1,27 @@ // result_retry - create new results to make up for lost ones // // result_retry -// [ -dwu n ] -// [ -dresult n ] -// [ -nerror n ] -// [ -ndet n ] -// [ -nredundancy n ] +// -app appname +// [ -nerror n ] if get this many errors, bail on WU +// [ -ndet n ] if get this results w/o consensus, bail +// [ -nredundancy n ] try to get at least this many done results +// [ -asynch ] be asynchronous #include +#include #include #include "db.h" +#include "backend_lib.h" +#include "config.h" int max_errors = 999; int max_done = 999; -int nredundancy = 999; +int nredundancy = 0; int startup_time; +CONFIG config; +R_RSA_PRIVATE_KEY key; +char app_name[256]; // The scheme for generating unique output filenames is as follows. // If the original filename is of the form x__y, @@ -31,62 +37,116 @@ void make_unique_name(char* name) { char buf[256], *p; static int seqno; - sprintf(buf, "%d_%d", startup_time, seqno); + sprintf(buf, "%d_%d", startup_time, seqno++); p = strstr(name, "__"); if (p) { strcpy(p+2, buf); } else { + strcat(name, "__"); strcat(name, buf); } } -// convert a result's XML document to generate new output filenames -// Look for ... elements and convert the name; +// convert a result's XML document to generate new output filenames. +// The input has the form +// +// xxx +// ... +// +// ... +// +// +// xxx +// ... +// +// ... +// +// +// Look for ... elements within +// and make a unique name based on it; // apply the same conversion to the element later on. // -int assign_new_names(char* in, char* out) { - char *p = in, *n1, *n2; - char name[256], buf[MAX_BLOB_SIZE]; +// TODO: this is ad-hoc. Would be nice to use some generic +// XML parsing routines, or XSLT or something. +// +int assign_new_names(char* in) { + char *p = in, *n1, *n2, *r; + char name[256], newname[256], element[256], buf[MAX_BLOB_SIZE]; int len; + // notice where the is so we don't try to convert + // the result name + // + r = strstr(in, ""); + while (1) { n1 = strstr(p, ""); if (!n1) break; + + if (n1 > r) break; // don't go past + n1 += strlen(""); n2 = strstr(p, ""); if (!n2) { - fprintf(stderr, "malformed XML:\n%s", in); + fprintf(stderr, "assign_new_names(): malformed XML:\n%s", in); return 1; } len = n2 - n1; memcpy(name, n1, len); name[len] = 0; - make_unique_name(name); + strcpy(newname, name); + make_unique_name(newname); strcpy(buf, n2); - strcpy(n1, name); + strcpy(n1, newname); strcat(n1, buf); + + // replace the name in the element + // + sprintf(element, "%s", name); + n2 = strstr(n1, element); + if (!n2) { + fprintf(stderr, "assign_new_names(): no :\n%s", in); + return 1; + } + strcpy(buf, n2+strlen(element)); + sprintf(element, "%s", newname); + strcpy(n2, element); + strcat(n2, buf); p = n1; } return 0; } -void main_loop() { +bool do_pass(APP& app) { WORKUNIT wu; RESULT result; - int nerrors, ndone; + int nerrors, ndone, retval; unsigned int i, n; + bool did_something = false; wu.retry_check_time = time(0); + wu.appid = app.id; // loop over WUs that are due to be checked // - while (db_workunit_enum_retry_check_time(wu)) { + while (!db_workunit_enum_retry_check_time(wu)) { vector results; + did_something = true; + + // if this WU has a canonical result, we're done + // (this normally doesn't happen since the retry check time + // is zeroed when canonical result found, but just in case). + // + if (wu.canonical_resultid) { + wu.retry_check_time = 0; + goto update_wu; + } + // enumerate all the results for the WU // result.workunitid = wu.id; - while (db_result_enum_wuid(result)) { + while (!db_result_enum_wuid(result)) { results.push_back(result); } @@ -100,8 +160,8 @@ void main_loop() { if (result.state == RESULT_STATE_UNSENT) { fprintf(stderr, "WU %s has unsent result\n", wu.name); wu.state = WU_STATE_SEND_FAIL; - db_workunit_update(wu); - goto next_wu; + wu.retry_check_time = 0; + goto update_wu; } if (result.state == RESULT_STATE_ERROR) { nerrors++; @@ -116,38 +176,96 @@ void main_loop() { if (nerrors > max_errors) { fprintf(stderr, "WU %s has too many errors\n", wu.name); wu.state = WU_STATE_TOO_MANY_ERRORS; - db_workunit_update(wu); - goto next_wu; + wu.retry_check_time = 0; + goto update_wu; } if (ndone > max_done) { fprintf(stderr, "WU %s has too many answers\n", wu.name); wu.state = WU_STATE_TOO_MANY_DONE; - db_workunit_update(wu); - goto next_wu; + wu.retry_check_time = 0; + goto update_wu; } // Generate new results if needed. // Munge the XML of an existing result // to create unique new output filenames. // - n = nredundancy - ndone; - for (i=0; i ndone) { + n = nredundancy - ndone; + for (i=0; i - 1sec diff --git a/test/concat_wu b/test/concat_wu index 2db68e077b..39f2005d54 100644 --- a/test/concat_wu +++ b/test/concat_wu @@ -9,7 +9,6 @@ - concat in1 diff --git a/test/sah_result b/test/sah_result index fb78b1d779..166a7ecd11 100644 --- a/test/sah_result +++ b/test/sah_result @@ -6,8 +6,8 @@ 100000 - + outfile.sah - + diff --git a/test/sah_wu b/test/sah_wu index efea48a109..be2af74fb0 100644 --- a/test/sah_wu +++ b/test/sah_wu @@ -4,7 +4,6 @@ - setiathome-3.06 work_unit.sah diff --git a/test/test.inc b/test/test.inc index d742466229..0123f8b226 100644 --- a/test/test.inc +++ b/test/test.inc @@ -54,6 +54,7 @@ function db_query($query) { function run_tool($cmd) { $tool_dir = get_env_var("BOINC_SRC_DIR")."/tools/"; $cmd = $tool_dir.$cmd; + //echo $cmd; PassThru($cmd); } @@ -205,6 +206,7 @@ class Project { PassThru("cp $source_dir/sched/file_upload_handler $this->project_dir/cgi/"); PassThru("cp $source_dir/sched/make_work $this->project_dir/cgi/"); PassThru("cp $source_dir/sched/feeder $this->project_dir/cgi/"); + PassThru("cp $source_dir/sched/result_retry $this->project_dir/cgi/"); PassThru("cp $source_dir/sched/validate_test $this->project_dir/cgi/"); $f = fopen("$this->project_dir/cgi/config.xml", "w"); fputs($f, "\n"); @@ -435,6 +437,7 @@ class Work { var $rsc_iops; var $rsc_fpops; var $rsc_disk; + var $delay_bound; function Work($app) { $this->app = $app; @@ -442,6 +445,7 @@ class Work { $this->rcs_iops = 180000000000; $this->rcs_fpops = 100000000000; $this->rcs_disk = 1000000; + $this->delay_bound = 1000; } function install($project) { @@ -450,7 +454,7 @@ class Work { $x = $this->input_files[$i]; PassThru("cp $x $project->project_dir/download"); } - $cmd = "create_work -db_name $project->db_name -download_dir $project->project_dir/download -upload_url $project->upload_url -download_url $project->download_url/ -keyfile $project->key_dir/upload_private -appname $app->name -rsc_iops $this->rcs_iops -rsc_fpops $this->rsc_fpops -rsc_disk $this->rsc_disk -wu_template $this->wu_template -result_template $this->result_template -nresults $this->nresults -wu_name $this->wu_template"; + $cmd = "create_work -db_name $project->db_name -download_dir $project->project_dir/download -upload_url $project->upload_url -download_url $project->download_url/ -keyfile $project->key_dir/upload_private -appname $app->name -rsc_iops $this->rcs_iops -rsc_fpops $this->rsc_fpops -rsc_disk $this->rsc_disk -wu_template $this->wu_template -result_template $this->result_template -nresults $this->nresults -wu_name $this->wu_template -delay_bound $this->delay_bound"; for ($i=0; $iinput_files); $i++) { $x = $this->input_files[$i]; $cmd = $cmd." ".$x; diff --git a/test/test_uc.php b/test/test_uc.php index 7665758b4a..0357ade844 100644 --- a/test/test_uc.php +++ b/test/test_uc.php @@ -26,6 +26,7 @@ $work->wu_template = "uc_wu"; $work->result_template = "uc_result"; $work->nresults = 2; + $work->delay_bound = 10; array_push($work->input_files, "input"); $work->install($project); diff --git a/test/uc_wu b/test/uc_wu index 73e050eca7..6daf5e0145 100644 --- a/test/uc_wu +++ b/test/uc_wu @@ -4,7 +4,6 @@ - upper_case in diff --git a/test/ucs_wu b/test/ucs_wu index e40ebe575e..63a0a1f20c 100644 --- a/test/ucs_wu +++ b/test/ucs_wu @@ -4,7 +4,6 @@ - upper_case in diff --git a/tools/backend_lib.C b/tools/backend_lib.C index 4cefe31b49..cea226bbbf 100644 --- a/tools/backend_lib.C +++ b/tools/backend_lib.C @@ -140,6 +140,29 @@ static int process_wu_template( return 0; } +// Set the time-varying fields of a result to their initial state. +// This is used to create clones of existing results, +// so set only the time-varying fields +// +void initialize_result(RESULT& result, WORKUNIT& wu) { + result.id = 0; + result.create_time = time(0); + result.workunitid = wu.id; + result.state = RESULT_STATE_UNSENT; + result.hostid = 0; + result.report_deadline = time(0) + wu.delay_bound; + result.sent_time = 0; + result.received_time = 0; + result.exit_status = 0; + result.cpu_time = 0; + strcpy(result.xml_doc_out, ""); + strcpy(result.stderr_out, ""); + result.project_state = 0; + result.validate_state = VALIDATE_STATE_INITIAL; + result.claimed_credit = 0; + result.granted_credit = 0; +} + // Create a new result for the given WU. // int create_result( @@ -153,18 +176,13 @@ int create_result( int retval; memset(&r, 0, sizeof(r)); - r.report_deadline = time(0) + 1000; - // TODO: pass this in - r.create_time = time(0); - r.workunitid = wu.id; - r.state = RESULT_STATE_UNSENT; - r.validate_state = VALIDATE_STATE_INITIAL; + initialize_result(r, wu); sprintf(r.name, "%s_%s", wu.name, result_name_suffix); sprintf(base_outfile_name, "%s_", r.name); strcpy(result_template_copy, result_template); retval = process_result_template( - result_template, + result_template_copy, key, base_outfile_name, upload_url, download_url @@ -216,9 +234,13 @@ int create_work( } for (i=0; i