2004-06-09 19:09:16 +00:00
|
|
|
<?php
|
2003-08-19 06:44:58 +00:00
|
|
|
require_once("docutil.php");
|
2004-02-15 19:22:01 +00:00
|
|
|
page_head("Workunit and result state transitions");
|
2003-08-19 06:44:58 +00:00
|
|
|
echo "
|
2003-01-23 08:07:48 +00:00
|
|
|
|
|
|
|
<p>
|
2004-02-15 19:22:01 +00:00
|
|
|
The processing of workunits and results
|
|
|
|
can be described in terms of transitions of their state variables.
|
2003-03-16 21:59:11 +00:00
|
|
|
|
2004-02-15 19:22:01 +00:00
|
|
|
<h3>Workunit state variables</h3>
|
2003-08-26 18:33:21 +00:00
|
|
|
<p>
|
2004-11-29 22:26:34 +00:00
|
|
|
Workunits parameters are described <a href=work.php>here</a>.
|
2003-03-16 21:59:11 +00:00
|
|
|
|
2004-11-29 22:26:34 +00:00
|
|
|
<p>
|
|
|
|
Workunit state variables are as follows:
|
2003-08-26 18:33:21 +00:00
|
|
|
";
|
|
|
|
list_start();
|
|
|
|
list_item(
|
|
|
|
"canonical_resultid",
|
2004-12-15 07:08:54 +00:00
|
|
|
"The ID of the canonical result for this workunit, or zero.
|
|
|
|
<ul>
|
|
|
|
<li> Initially zero
|
|
|
|
<li> Set by the validator (by check_set())
|
|
|
|
</ul>
|
|
|
|
"
|
2003-08-26 18:33:21 +00:00
|
|
|
);
|
|
|
|
|
|
|
|
list_item("transition_time",
|
|
|
|
"The next time to check for state transitions for this WU.
|
|
|
|
<ul>
|
|
|
|
<li>Initially now.
|
|
|
|
<li>Set to now by scheduler when get a result for this WU.
|
|
|
|
<li>Set to min(current value, now + delay_bound) by scheduler
|
|
|
|
when send a result for this WU
|
|
|
|
<li>Set to min(x.sent_time + wu.delay_bound) over IN_PROGRESS results x
|
|
|
|
by transitioner when done handling this WU
|
2004-09-10 00:41:48 +00:00
|
|
|
<li>Set to now by validator if it finds canonical result,
|
2003-08-26 18:33:21 +00:00
|
|
|
or if there is already a canonical result
|
|
|
|
and some other results have validate_state = INIT,
|
|
|
|
or if there is no consensus and the number of successful results
|
|
|
|
is > wu.max_success_results
|
|
|
|
</ul>
|
|
|
|
"
|
|
|
|
);
|
|
|
|
list_item("file_delete_state",
|
|
|
|
"Indicates whether input files should be deleted.
|
|
|
|
<ul>
|
|
|
|
<li>Initially INIT
|
|
|
|
<li>Set to READY by transitioner when all results have server_state=OVER
|
|
|
|
and wu.assimilate_state=DONE
|
2004-12-27 06:54:23 +00:00
|
|
|
Note: db_purge purges a WU and all its results when
|
|
|
|
file_delete_state=DONE;
|
|
|
|
therefore it is critical that it only be set to DONE
|
|
|
|
if all results have server_state=OVER.
|
2003-08-26 18:33:21 +00:00
|
|
|
<li>Set to DONE by file_deleter when it has attempted to delete files.
|
|
|
|
</ul>
|
|
|
|
"
|
|
|
|
);
|
|
|
|
|
|
|
|
list_item("assimilate_state",
|
|
|
|
"Indicates whether the workunit should be assimilated.
|
|
|
|
<ul>
|
|
|
|
<li> Initially INIT
|
|
|
|
<li> Set to READY by transitioner if wu.assimilate_state=INIT
|
|
|
|
and WU has error condition
|
2004-09-10 00:41:48 +00:00
|
|
|
<li> Set to READY by validator when find canonical result
|
2003-08-26 18:33:21 +00:00
|
|
|
and wu.assimilate_state=INIT
|
|
|
|
<li> Set to DONE by assimilator when done
|
|
|
|
</ul>
|
|
|
|
"
|
|
|
|
);
|
|
|
|
|
|
|
|
list_item("need_validate",
|
|
|
|
"Indicates that the workunit has a result that needs validation.
|
|
|
|
<ul>
|
|
|
|
<li> Initially FALSE
|
|
|
|
<li> Set to TRUE by transitioner if the number of success results
|
|
|
|
is at least wu.min_quorum and there is a success result
|
|
|
|
not validated yet
|
2004-09-10 00:41:48 +00:00
|
|
|
<li> Set to FALSE by validator
|
2003-08-26 18:33:21 +00:00
|
|
|
</ul>
|
|
|
|
"
|
|
|
|
);
|
|
|
|
|
|
|
|
list_item("error_mask",
|
|
|
|
"A bit mask for error conditions.
|
|
|
|
<ul>
|
|
|
|
<li> Initially zero
|
|
|
|
<li> Transitioner sets COULDNT_SEND_RESULT if some result couldn't be sent.
|
|
|
|
<li> Transitioner sets TOO_MANY_RESULTS if too many error results
|
|
|
|
<li> Transitioner sets TOO_MANY_TOTAL_RESULTS if too many total results
|
2006-03-21 06:52:30 +00:00
|
|
|
<li> Validator sets TOO_MANY_SUCCESS_RESULTS if no consensus
|
2003-08-26 18:33:21 +00:00
|
|
|
and too many success results
|
|
|
|
</ul>
|
|
|
|
"
|
|
|
|
);
|
|
|
|
list_end();
|
|
|
|
echo "
|
2003-01-23 08:07:48 +00:00
|
|
|
|
2003-03-16 21:59:11 +00:00
|
|
|
Workunit invariants:
|
2003-01-23 08:07:48 +00:00
|
|
|
<ul>
|
|
|
|
<li> eventually either canonical_resultid or error_mask is set
|
2003-08-26 18:33:21 +00:00
|
|
|
<li> eventually transition_time = infinity
|
2003-08-31 00:13:09 +00:00
|
|
|
<li> Each WU is assimilated exactly once
|
|
|
|
</ul>
|
|
|
|
|
|
|
|
<p>
|
2004-02-15 19:22:01 +00:00
|
|
|
Notes on deletion of input files:
|
|
|
|
<ul>
|
2003-08-31 00:13:09 +00:00
|
|
|
<li> Input files are eventually deleted,
|
2003-01-23 08:07:48 +00:00
|
|
|
but only when all results have state=OVER
|
2003-08-31 00:13:09 +00:00
|
|
|
(so that clients don't get download failures)
|
|
|
|
and the WU has been assimilated
|
2004-11-29 22:26:34 +00:00
|
|
|
(in case the project wants to examine input files in error cases).
|
2003-01-23 08:07:48 +00:00
|
|
|
</ul>
|
2004-02-15 19:22:01 +00:00
|
|
|
|
|
|
|
<h3>Result state variable</h3>
|
|
|
|
Result state variables are listed in the following table:
|
2003-08-26 18:33:21 +00:00
|
|
|
";
|
2003-01-23 08:07:48 +00:00
|
|
|
|
|
|
|
|
2003-08-26 18:33:21 +00:00
|
|
|
list_start();
|
|
|
|
list_item("report_deadline",
|
|
|
|
"Give up on result (and possibly delete input files)
|
|
|
|
if don't get reply by this time.
|
|
|
|
<ul>
|
|
|
|
<li> Set by scheduler to now + wu.delay_bound when send result
|
|
|
|
</ul>
|
|
|
|
"
|
|
|
|
);
|
|
|
|
list_item("server_state",
|
|
|
|
"Values: UNSENT, IN_PROGRESS, OVER
|
|
|
|
<ul>
|
|
|
|
<li> Initially UNSENT
|
|
|
|
<li> Set by scheduler to IN_PROGRESS when send result
|
2006-06-06 18:45:40 +00:00
|
|
|
<li> Set by scheduler to OVER when result is reported
|
|
|
|
in request message from client.
|
|
|
|
<li> Set by scheduler to OVER when it thinks
|
|
|
|
host has detached project.
|
|
|
|
<li> Set by transitioner to OVER if now > result.report_deadline
|
2003-08-26 18:33:21 +00:00
|
|
|
<li> Set by transitioner to OVER if WU has error condition
|
|
|
|
and result.server_state=UNSENT
|
2004-09-10 00:41:48 +00:00
|
|
|
<li> Set by validator to OVER if WU has canonical result
|
2003-08-26 18:33:21 +00:00
|
|
|
and result.server_state=UNSENT
|
|
|
|
</ul>
|
|
|
|
"
|
|
|
|
);
|
|
|
|
list_item("outcome",
|
2006-06-06 18:45:40 +00:00
|
|
|
"Values: SUCCESS, COULDNT_SEND, CLIENT_ERROR, NO_REPLY, DIDNT_NEED,
|
2006-07-06 17:30:03 +00:00
|
|
|
VALIDATE_ERROR, CLIENT_DETACHED.
|
2003-08-26 18:33:21 +00:00
|
|
|
<br>Defined iff result.server_state=OVER
|
|
|
|
<ul>
|
|
|
|
<li> Set by scheduler to SUCCESS if get reply and no client error
|
|
|
|
<li> Set by scheduler to CLIENT_ERROR if get reply and client error
|
2006-06-06 18:45:40 +00:00
|
|
|
<li> Set by scheduler to NO_REPLY if it thinks host has detached project.
|
2003-08-26 18:33:21 +00:00
|
|
|
<li> Set by transitioner to NO_REPLY if server_state=IN_PROGRESS
|
2006-06-06 18:45:40 +00:00
|
|
|
and now < report_deadline
|
2003-08-26 18:33:21 +00:00
|
|
|
<li> Set by transitioner to DIDNT_NEED if WU has error condition
|
|
|
|
and result.server_state=UNSENT
|
2004-09-10 00:41:48 +00:00
|
|
|
<li> Set by validator to DIDNT_NEED if WU has canonical result
|
2003-08-26 18:33:21 +00:00
|
|
|
and result.server_state=UNSENT
|
2006-06-06 18:45:40 +00:00
|
|
|
<li> Set by validator to VALIDATE_ERROR if outcome was initially
|
|
|
|
SUCCESS, but the validator had a permanent error reading a result file,
|
|
|
|
or a file had a syntax error.
|
|
|
|
Prevents the validator from trying again.
|
2006-07-06 17:30:03 +00:00
|
|
|
<li> Set by scheduler to CLIENT_DETACHED if it gets a request
|
|
|
|
indicating that the client detached, then reattached
|
2003-08-26 18:33:21 +00:00
|
|
|
</ul>
|
|
|
|
"
|
|
|
|
);
|
|
|
|
list_item("client_state",
|
2006-06-06 18:45:40 +00:00
|
|
|
"Records the client state (DOWNLOADING, DOWNLOADED,
|
|
|
|
COMPUTE_ERROR, UPLOADING, UPLOADED, ABORTED)
|
2003-08-26 18:33:21 +00:00
|
|
|
where an error occurred.
|
|
|
|
Defined if outcome is CLIENT_ERROR.
|
|
|
|
"
|
|
|
|
);
|
|
|
|
|
|
|
|
list_item("file_delete_state",
|
|
|
|
"
|
|
|
|
<ul>
|
|
|
|
<li> Initially INIT
|
|
|
|
<li> Set by transitioner to READY if this is the canonical result,
|
|
|
|
and file_delete_state=INIT,
|
2004-09-14 22:32:10 +00:00
|
|
|
and wu.assimilate_state=DONE,
|
|
|
|
and all the results have server_state=OVER,
|
2006-06-06 18:45:40 +00:00
|
|
|
and all all the results with outcome=SUCCESS have validate_state<>INIT
|
2003-08-26 18:33:21 +00:00
|
|
|
<li> Set by transitioner to READY if wu.assimilate_state=DONE
|
|
|
|
and result.outcome=CLIENT_ERROR
|
|
|
|
or result.validate_state!=INIT
|
2005-11-07 21:21:21 +00:00
|
|
|
</ul>
|
2003-08-26 18:33:21 +00:00
|
|
|
"
|
|
|
|
);
|
|
|
|
|
|
|
|
list_item("validate_state",
|
|
|
|
"
|
|
|
|
Defined iff result.outcome=SUCCESS
|
|
|
|
<ul>
|
|
|
|
<li> Initially INIT
|
2004-09-10 00:41:48 +00:00
|
|
|
<li> Set by validator to VALID if outcome=SUCCESS and matches canonical result
|
|
|
|
<li> Set by validator to INVALID if outcome=SUCCESS and doesn't match canonical result
|
2006-06-06 18:45:40 +00:00
|
|
|
<li> Set by transitioner to NO_CHECK if the WU had an error;
|
|
|
|
this avoids showing claimed credit as 'pending'.
|
2004-09-10 00:41:48 +00:00
|
|
|
<li> Set by validator to ERROR if outcome=SUCCESS and
|
2004-12-15 07:08:54 +00:00
|
|
|
had a permanent error trying to read an output file,
|
|
|
|
or an output file had a syntax error.
|
|
|
|
<li> Set by validator to INCONCLUSIVE if check_set()
|
|
|
|
didn't find a consensus in a set of results containing this one.
|
2006-06-06 18:45:40 +00:00
|
|
|
<li> Set by scheduler to TOO_LATE if the result was reported
|
|
|
|
after the canonical result's files were deleted.
|
|
|
|
|
2005-11-07 21:21:21 +00:00
|
|
|
</ul>
|
2003-08-26 18:33:21 +00:00
|
|
|
"
|
|
|
|
);
|
|
|
|
list_end();
|
2003-03-16 21:59:11 +00:00
|
|
|
|
2003-08-26 18:33:21 +00:00
|
|
|
echo "
|
2003-03-16 21:59:11 +00:00
|
|
|
|
|
|
|
<p>
|
|
|
|
Result invariants:
|
2003-01-23 08:07:48 +00:00
|
|
|
<ul>
|
2004-02-15 19:22:01 +00:00
|
|
|
<li> Eventually server_state = OVER.
|
|
|
|
<li> Output files are eventually deleted.
|
|
|
|
</ul>
|
|
|
|
Notes on deletion of output files:
|
|
|
|
<ul>
|
|
|
|
<li> Non-canonical results can be deleted as soon as the WU is assimilated.
|
2004-11-29 22:26:34 +00:00
|
|
|
<li> Canonical results can be deleted only when all results have server_state=OVER and all success results are validated.
|
2004-02-15 19:22:01 +00:00
|
|
|
<li> If a result reply arrives after its timeout,
|
2003-01-23 08:07:48 +00:00
|
|
|
the output files can be immediately deleted.
|
2004-11-29 22:26:34 +00:00
|
|
|
|
2004-02-15 19:22:01 +00:00
|
|
|
</ul>
|
2003-01-23 08:07:48 +00:00
|
|
|
How do we delete output files that arrive REALLY late?
|
|
|
|
(e.g. uploaded after all results have timed out, and never reported)?
|
2004-02-15 19:22:01 +00:00
|
|
|
Possible answer:
|
|
|
|
let X = create time of oldest unassimilated WU.
|
2003-01-23 08:07:48 +00:00
|
|
|
Any output files created before X can be deleted.
|
2003-08-19 06:44:58 +00:00
|
|
|
";
|
|
|
|
page_tail();
|
|
|
|
?>
|