mirror of https://github.com/BOINC/boinc.git
parent
646718a2b8
commit
d3942cc197
|
@ -2263,3 +2263,16 @@ Eric October 29, 2002
|
|||
mac_carbon_dsp.c,h
|
||||
mac_carbon_gl.c,h
|
||||
|
||||
David Nov 5 2002
|
||||
- Continued work on result retry mechanism:
|
||||
- new fields "retry_check_time" and "state" in workunit table
|
||||
- logic for creating new output filenames
|
||||
- needs a bit more work
|
||||
db/
|
||||
db.h
|
||||
db_mysql.C
|
||||
schema.sql
|
||||
sched/
|
||||
Makefile.in
|
||||
make_work.C
|
||||
result_retry.C
|
||||
|
|
10
db/db.h
10
db/db.h
|
@ -188,6 +188,13 @@ struct HOST {
|
|||
int parse_net_stats(FILE*);
|
||||
};
|
||||
|
||||
#define WU_STATE_SEND_FAIL 1
|
||||
// failed to send results for this WU
|
||||
#define WU_STATE_TOO_MANY_ERRORS 2
|
||||
// too many errors; may have bug
|
||||
#define WU_STATE_TOO_MANY_DONE 3
|
||||
// too many results without consensus; may be nondeterministic
|
||||
|
||||
struct WORKUNIT {
|
||||
int id;
|
||||
unsigned int create_time; // time of record creation
|
||||
|
@ -206,6 +213,8 @@ struct WORKUNIT {
|
|||
// VALIDATE_STATE_NEED_CHECK state
|
||||
int canonical_resultid; // ID of canonical result, or zero
|
||||
double canonical_credit; // credit that all correct results get
|
||||
double retry_check_time; // when to check for result retry
|
||||
int state; // see above
|
||||
|
||||
// the following not used in the DB
|
||||
char app_name[256];
|
||||
|
@ -296,6 +305,7 @@ extern int db_workunit_update(WORKUNIT& p);
|
|||
extern int db_workunit_lookup_name(WORKUNIT&);
|
||||
//extern int db_workunit_enum_dynamic_to_send(WORKUNIT&, int);
|
||||
extern int db_workunit_enum_app_need_validate(WORKUNIT&);
|
||||
extern int db_workunit_enum_retry_check_time(WORKUNIT&);
|
||||
|
||||
extern int db_result_new(RESULT& p);
|
||||
extern int db_result(int id, RESULT&);
|
||||
|
|
|
@ -194,12 +194,14 @@ void struct_to_str(void* vp, char* q, int type) {
|
|||
"has_successor=%d, name='%s', xml_doc='%s', batch=%d, "
|
||||
"rsc_fpops=%f, rsc_iops=%f, rsc_memory=%f, rsc_disk=%f, "
|
||||
"need_validate=%d, "
|
||||
"canonical_resultid=%d, canonical_credit=%f",
|
||||
"canonical_resultid=%d, canonical_credit=%f, "
|
||||
"retry_check_time=%f, state=%d",
|
||||
wup->id, wup->create_time, wup->appid, wup->previous_wuid,
|
||||
wup->has_successor?1:0, wup->name, wup->xml_doc, wup->batch,
|
||||
wup->rsc_fpops, wup->rsc_iops, wup->rsc_memory, wup->rsc_disk,
|
||||
wup->need_validate,
|
||||
wup->canonical_resultid, wup->canonical_credit
|
||||
wup->canonical_resultid, wup->canonical_credit,
|
||||
wup->retry_check_time, wup->state
|
||||
);
|
||||
break;
|
||||
case TYPE_RESULT:
|
||||
|
@ -360,6 +362,8 @@ void row_to_struct(MYSQL_ROW& r, void* vp, int type) {
|
|||
wup->need_validate = atoi(r[i++]);
|
||||
wup->canonical_resultid = atoi(r[i++]);
|
||||
wup->canonical_credit = atof(r[i++]);
|
||||
wup->retry_check_time = atof(r[i++]);
|
||||
wup->state = atoi(r[i++]);
|
||||
break;
|
||||
case TYPE_RESULT:
|
||||
rp = (RESULT*)vp;
|
||||
|
@ -574,6 +578,16 @@ int db_workunit_enum_app_need_validate(WORKUNIT& p) {
|
|||
return db_enum(e, &p, TYPE_WORKUNIT, buf);
|
||||
}
|
||||
|
||||
int db_workunit_enum_retry_check_time(WORKUNIT& p) {
|
||||
static ENUM e;
|
||||
char buf[256];
|
||||
|
||||
if (!e.active) {
|
||||
sprintf(buf, "where retry_check_time > 0 and retry_check_time < %f", p.retry_check_time);
|
||||
}
|
||||
return db_enum(e, &p, TYPE_WORKUNIT, buf);
|
||||
}
|
||||
|
||||
|
||||
////////// RESULT /////////
|
||||
|
||||
|
|
|
@ -128,6 +128,8 @@ create table workunit (
|
|||
need_validate smallint not null,
|
||||
canonical_resultid integer not null,
|
||||
canonical_credit double not null,
|
||||
retry_check_time double not null,
|
||||
state integer not null,
|
||||
primary key (id)
|
||||
);
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
<p>
|
||||
The <b>back end</b> of a BOINC project consists
|
||||
of a number of components that together are responsible
|
||||
for generating work and absorbing the results.
|
||||
for generating work and handling results.
|
||||
Some of these components are supplied by BOINC;
|
||||
other parts are project- or application-specific:
|
||||
|
||||
|
@ -14,47 +14,68 @@ other parts are project- or application-specific:
|
|||
<th>BOINC-supplied part</th>
|
||||
<th>project-supplied part</th>
|
||||
<tr>
|
||||
<td>
|
||||
<b>Work generator</b>: generates work units and their input files,
|
||||
and generates one or more results per work unit.
|
||||
<td valign=top>
|
||||
<b>Work generator</b>: generates work units, work sequences, results,
|
||||
and the corresponding input files.
|
||||
</td>
|
||||
<td>
|
||||
<td valign=top>
|
||||
Functions and programs that handle the details of
|
||||
creating workunit and result database records.
|
||||
creating workunit, work sequence, and result database records.
|
||||
</td>
|
||||
<td>
|
||||
<td valign=top>
|
||||
Programs or scripts that generate input files,
|
||||
install them on data servers,
|
||||
and call the BOINC functions.
|
||||
</td></tr>
|
||||
<tr>
|
||||
<td><b>Result retry generator</b>:
|
||||
generates additional results if some of those initially sent are lost.</td>
|
||||
<td>A program, <b>result_retry</b>, that does the job.</td>
|
||||
<td>Some parameters used by result_retry.</td>
|
||||
<td valign=top><b>Result retry generator</b>:
|
||||
generates additional results if some of those initially sent are lost
|
||||
not completed, or completed erroneously.</td>
|
||||
<td valign=top>A program, <b>result_retry</b>, that does the job.</td>
|
||||
<td valign=top>Some parameters used by result_retry.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>Result validation and accounting</b>:
|
||||
<td valign=top><b>Result validation and accounting</b>:
|
||||
compare redundant results; select a <b>canonical result</b>
|
||||
representing the correct output,
|
||||
and a <b>canonical credit</b> granted to users and hosts
|
||||
that return the correct output.</td>
|
||||
<td>A program, <b>validate</b>, that contains the
|
||||
<td valign=top>A program, <b>validate</b>, that contains the
|
||||
basic logic for validation.</td>
|
||||
<td>An application-specific function, linked with <b>validate</b>,
|
||||
<td valign=top>An application-specific function, linked with <b>validate</b>,
|
||||
that compares sets of redundant results.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>Result processing</b>: parse output files of
|
||||
completed results, and take appropriate action
|
||||
(record results in a database, and/or generate more work)</td>
|
||||
<td>Function for enumerating unprocessed results.</td>
|
||||
<td>The rest.</td>
|
||||
<td valign=top><b>Work sequence relocater</b>:
|
||||
detects work sequences whose hosts have failed,
|
||||
and relocates them to other hosts.</td>
|
||||
<td valign=top>A program <b>seq_relocate</b></td>
|
||||
<td valign=top>Some parameters used by seq_relocate</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>Garbage collector</b>: delete input and output files
|
||||
<td valign=top><b>Work sequence validation and accounting</b>:
|
||||
Similar to result validation, but for work sequences.
|
||||
</td>
|
||||
<td valign=top>A program <b>seq_validate</b></td>
|
||||
<td valign=top>An application-specific function,
|
||||
linked with <b>seq_validate</b>, that compares sets of redundant results.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign=top><b>Result processing</b>: parse output files of
|
||||
completed results, and take appropriate action
|
||||
(record results in a database, and/or generate more work)</td>
|
||||
<td valign=top>Function for enumerating unprocessed results.
|
||||
Functions and programs that handle the details of
|
||||
creating workunit, work sequence, and result database records.
|
||||
</td>
|
||||
<td valign=top>
|
||||
The rest.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign=top><b>Garbage collector</b>: delete input and output files
|
||||
when they are no longer needed.</td>
|
||||
<td>A program, <b>garbage_collect</b>, that does the job.</td>
|
||||
<td>None.</td>
|
||||
<td valign=top>A program, <b>garbage_collect</b>, that does the job.</td>
|
||||
<td valign=top>None.</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
|
|
@ -79,6 +79,11 @@ MAKE_WORK_OBJS = \
|
|||
../lib/crypt.o \
|
||||
../RSAEuro/source/rsaeuro.a
|
||||
|
||||
RESULT_RETRY_OBJS = \
|
||||
result_retry.o \
|
||||
../db/db_mysql.o \
|
||||
../db/mysql_util.o
|
||||
|
||||
FCGI_OBJS = \
|
||||
handle_request.fcgi.o \
|
||||
main.fcgi.o \
|
||||
|
|
|
@ -6,7 +6,8 @@
|
|||
// [ -redundancy n ]
|
||||
// [ -cushion n ]
|
||||
//
|
||||
// Create WU and result records as needed to maintain a pool of work.
|
||||
// Create WU and result records as needed to maintain a pool of work
|
||||
// (for testing purposes).
|
||||
// Makes a new WU for every "redundancy" results.
|
||||
// Clones the WU of the given name.
|
||||
//
|
||||
|
|
|
@ -7,29 +7,93 @@
|
|||
// [ -ndet n ]
|
||||
// [ -nredundancy n ]
|
||||
|
||||
#include <vector>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include "db.h"
|
||||
|
||||
int max_errors = 999;
|
||||
int max_done = 999;
|
||||
int nredundancy = 999;
|
||||
int startup_time;
|
||||
|
||||
// The scheme for generating unique output filenames is as follows.
|
||||
// If the original filename is of the form x__y,
|
||||
// then y is replaced with a string of the form time_seqno,
|
||||
// where "time" is when this program started up.
|
||||
// NOTE: if you ever need to start up multiple copies of this,
|
||||
// you'll need to add a PID in there somewhere.
|
||||
//
|
||||
// If the original filename doesn't have __, add a string
|
||||
// of the form __time_seqno
|
||||
|
||||
void make_unique_name(char* name) {
|
||||
char buf[256], *p;
|
||||
static int seqno;
|
||||
|
||||
sprintf(buf, "%d_%d", startup_time, seqno);
|
||||
p = strstr(name, "__");
|
||||
if (p) {
|
||||
strcpy(p+2, buf);
|
||||
} else {
|
||||
strcat(name, buf);
|
||||
}
|
||||
}
|
||||
|
||||
// convert a result's XML document to generate new output filenames
|
||||
// Look for <name>...</name> elements and convert the name;
|
||||
// apply the same conversion to the <file_name> element later on.
|
||||
//
|
||||
int assign_new_names(char* in, char* out) {
|
||||
char *p = in, *n1, *n2;
|
||||
char name[256], buf[MAX_BLOB_SIZE];
|
||||
int len;
|
||||
|
||||
while (1) {
|
||||
n1 = strstr(p, "<name>");
|
||||
if (!n1) break;
|
||||
n1 += strlen("<name>");
|
||||
n2 = strstr(p, "</name>");
|
||||
if (!n2) {
|
||||
fprintf(stderr, "malformed XML:\n%s", in);
|
||||
return 1;
|
||||
}
|
||||
len = n2 - n1;
|
||||
memcpy(name, n1, len);
|
||||
name[len] = 0;
|
||||
make_unique_name(name);
|
||||
strcpy(buf, n2);
|
||||
strcpy(n1, name);
|
||||
strcat(n1, buf);
|
||||
p = n1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main_loop() {
|
||||
WORKUNIT wu;
|
||||
RESULT result;
|
||||
int nerrors, ndone;
|
||||
unsigned int i, n;
|
||||
|
||||
wu.retry_check_time = time(0);
|
||||
|
||||
// loop over WUs that are due to be checked
|
||||
//
|
||||
while (db_workunit_enum_check_time(wu)) {
|
||||
while (db_workunit_enum_retry_check_time(wu)) {
|
||||
vector<RESULT> results;
|
||||
|
||||
// enumerate all the results for the WU
|
||||
//
|
||||
result.workunitid = wu.id;
|
||||
while (db_result_enum_workunitid(result)) {
|
||||
while (db_result_enum_wuid(result)) {
|
||||
results.push_back(result);
|
||||
}
|
||||
|
||||
nerrors = 0;
|
||||
ndone = 0;
|
||||
for (i=0; i<results.size(); i++) {
|
||||
result = result[i];
|
||||
result = results[i];
|
||||
|
||||
// if any result is unsent, give up on the WU
|
||||
//
|
||||
|
@ -53,16 +117,18 @@ void main_loop() {
|
|||
fprintf(stderr, "WU %s has too many errors\n", wu.name);
|
||||
wu.state = WU_STATE_TOO_MANY_ERRORS;
|
||||
db_workunit_update(wu);
|
||||
go next_wu;
|
||||
goto next_wu;
|
||||
}
|
||||
if (ndone > max_done) {
|
||||
fprintf(stderr, "WU %s has too many answers\n", wu.name);
|
||||
wu.state = WU_STATE_TOO_MANY_DONE;
|
||||
db_workunit_update(wu);
|
||||
go next_wu;
|
||||
goto next_wu;
|
||||
}
|
||||
|
||||
// generate new results if needed
|
||||
// Generate new results if needed.
|
||||
// Munge the XML of an existing result
|
||||
// to create unique new output filenames.
|
||||
//
|
||||
n = nredundancy - ndone;
|
||||
for (i=0; i<n; i++) {
|
||||
|
@ -76,4 +142,30 @@ next_wu:
|
|||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
int i;
|
||||
bool asynch = false;
|
||||
|
||||
startup_time = time(0);
|
||||
for (i=1; i<argc; i++) {
|
||||
if (!strcmp(argv[i], "-nerror")) {
|
||||
max_errors = atoi(argv[++i]);
|
||||
} else if (!strcmp(argv[i], "-ndet")) {
|
||||
max_done = atoi(argv[++i]);
|
||||
} else if (!strcmp(argv[i], "-asynch")) {
|
||||
asynch = true;
|
||||
} else if (!strcmp(argv[i], "-nredundancy")) {
|
||||
nredundancy = atoi(argv[++i]);;
|
||||
}
|
||||
}
|
||||
if (asynch) {
|
||||
if (fork()==0) {
|
||||
while(1) {
|
||||
main_loop();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
while (1) {
|
||||
main_loop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue