file upload permanent errors

svn path=/trunk/boinc/; revision=1159
This commit is contained in:
David Anderson 2003-05-06 21:43:26 +00:00
parent 77f8686412
commit 20984fa488
12 changed files with 352 additions and 81 deletions

View File

@ -4049,3 +4049,37 @@ David Apr 7 2003
*.C
tools/
*.C
Erik May 6 2003
- Added a notion of "transient" and "permanent" errors on file uploads.
A permanent error is returned by the file upload handler if
- the request is malformed
- filename includes ".."
- the signature is bad
- the file size is larger than allowed
PERS_FILE_XFER checks for this return from FILE_XFER
and gives up immediately.
- FILE_XFER now always parses the server response (not just for file size)
- Added some missing error checks to FILE_XFER
- Punted on the timezone-related ifdefs:
added hardwired ifdefs for linux and unix.
Need to revisit this
- in PHP test library, add a provision for multiple files per application
client/
file_xfer.C
hostinfo_unix.C
pers_file_xfer.C,h
doc/
client.html
index.html
old_news.html (new)
result.fig
setiathome.jpg
upload.html
lib/
error_numbers.h
sched/
file_upload_handler.C
test/
test.inc

View File

@ -23,6 +23,7 @@
#include "file_names.h"
#include "filesys.h"
#include "log_flags.h"
#include "message.h"
#include "file_xfer.h"
#include "parse.h"
#include "error_numbers.h"
@ -108,16 +109,24 @@ int FILE_XFER::init_upload(FILE_INFO& file_info) {
// Parse the server response in req1
//
int FILE_XFER::parse_server_response(double &nbytes) {
int status = 0;
int status = ERR_UPLOAD_TRANSIENT, x;
char buf[256];
nbytes = -1;
parse_double(req1, "<file_size>", nbytes);
parse_int(req1, "<status>", status);
if (parse_int(req1, "<status>", x)) {
switch (x) {
case -1: status = ERR_UPLOAD_PERMANENT; break;
case 0: status = 0; break;
case 1: status = ERR_UPLOAD_TRANSIENT; break;
default: status = ERR_UPLOAD_TRANSIENT; break;
}
} else {
status = ERR_UPLOAD_TRANSIENT;
}
// TODO: show error message to user
//
if (parse_str(req1, "<message>", buf, sizeof(buf))) {
fprintf(stderr, "%s\n", buf);
show_message(fip->project, buf, MSG_ERROR);
}
return status;
@ -177,28 +186,29 @@ bool FILE_XFER_SET::poll() {
printf("http op done; retval %d\n", fxp->http_op_retval);
}
if (fxp->http_op_retval == 0) {
fxp->file_xfer_retval = fxp->parse_server_response(
fxp->fip->upload_offset
);
// If this was a file size query, restart the transfer
// using the remote file size information
//
if (fxp->file_size_query) {
retval = fxp->parse_server_response(fxp->fip->upload_offset);
if (retval) {
if (fxp->file_xfer_retval) {
printf("ERROR: file upload returned %d\n", retval);
fxp->fip->upload_offset = -1;
fxp->file_xfer_retval = retval;
} else {
remove(fxp);
i--;
// Restart the upload, using the newly obtained upload_offset
retval = fxp->init_upload(*fxp->fip);
fxp->file_xfer_retval = fxp->init_upload(*fxp->fip);
if (!retval) {
retval = insert(fxp);
if (!retval) {
if (!fxp->file_xfer_retval) {
fxp->file_xfer_retval = insert(fxp);
if (!fxp->file_xfer_retval) {
fxp->file_xfer_done = false;
fxp->file_xfer_retval = 0;
fxp->http_op_retval = 0;
}
}

View File

@ -102,9 +102,9 @@ int get_timezone( void ) {
cur_time = time(NULL);
time_data = localtime( &cur_time );
return time_data->tm_gmtoff;
#elif defined(__timezone)
#elif defined(linux)
return __timezone;
#elif defined(timezone)
#elif defined(unix)
return timezone;
#else
#error timezone
@ -178,14 +178,12 @@ void get_host_disk_info( double &total_space, double &free_space ) {
// General function to get all relevant host information
//
int get_host_info(HOST_INFO& host) {
get_host_disk_info( host.d_total, host.d_free );
#if HAVE_SYS_SYSCTL_H
int mib[2], mem_size;
size_t len;
#endif
get_host_disk_info( host.d_total, host.d_free );
#if defined(HAVE_SYS_SYSCTL_H) && defined(CTL_HW) && defined(HW_MACHINE) && defined(HW_MODEL)
// Get machine
mib[0] = CTL_HW;
mib[1] = HW_MACHINE;
@ -197,8 +195,6 @@ int get_host_info(HOST_INFO& host) {
mib[1] = HW_MODEL;
len = sizeof(host.p_model);
sysctl(mib, 2, &host.p_model, &len, NULL, 0);
#else
#error Need to specify a method to obtain vendor/model
#endif
#ifdef linux

View File

@ -211,6 +211,8 @@ bool PERS_FILE_XFER::poll(time_t now) {
}
xfer_done = true;
}
} else if (fxp->file_xfer_retval == ERR_UPLOAD_PERMANENT) {
giveup();
} else {
handle_xfer_failure(now);
}
@ -226,10 +228,24 @@ bool PERS_FILE_XFER::poll(time_t now) {
return false;
}
void PERS_FILE_XFER::giveup() {
char buf[256];
if (is_upload) {
fip->status = ERR_GIVEUP_UPLOAD;
} else {
fip->status = ERR_GIVEUP_DOWNLOAD;
}
xfer_done = true;
sprintf(buf,
"Giving up on file transfer for %s: %d",
fip->name, fip->status
);
show_message(fip->project, buf, MSG_ERROR);
}
// Handle a transfer failure
//
void PERS_FILE_XFER::handle_xfer_failure(time_t cur_time) {
char buf[256];
// If it was a bad range request, delete the file and start over
//
@ -242,17 +258,7 @@ void PERS_FILE_XFER::handle_xfer_failure(time_t cur_time) {
// See if it's time to give up on the persistent file xfer
//
if ((cur_time - first_request_time) > gstate.file_xfer_giveup_period) {
if (is_upload) {
fip->status = ERR_GIVEUP_UPLOAD;
} else {
fip->status = ERR_GIVEUP_DOWNLOAD;
}
xfer_done = true;
sprintf(buf,
"Giving up on file transfer for %s: %d",
fip->name, fip->status
);
show_message(fip->project, buf, MSG_ERROR);
giveup();
}
}

View File

@ -31,11 +31,12 @@
// using an exponential backoff policy to avoid flooding servers.
// For upload, try to upload the file to the first URL;
// if that fails try the others.
// if that gets transient failure, try the others.
#define PERS_RETRY_DELAY_MIN 60 // 1 minute
#define PERS_RETRY_DELAY_MAX (60*60*4) // 4 hours
#define PERS_GIVEUP (60*60*24*7*2) // 2 weeks
// give up on xfer if this time elapses since last byte xferred
#ifdef DEBUG
#define PERS_RETRY_DELAY_MIN 1
@ -43,8 +44,6 @@
#define PERS_GIVEUP 30
#endif
// give up on xfer if this time elapses since last byte xferred
class PERS_FILE_XFER {
int nretry; // # of retries so far
int first_request_time; // UNIX time of first file request
@ -62,6 +61,7 @@ public:
bool poll(time_t now);
void handle_xfer_failure(time_t cur_time);
void retry_or_backoff(time_t cur_time);
void giveup();
int write(FILE* fout);
int parse(FILE* fin);
bool start_xfer();

View File

@ -3,21 +3,124 @@
<h2>Running the client</h2>
<p>
<hr>
<h3>The Windows GUI client</h3>
<h3>BOINC for Windows</h3>
<p>
TO BE WRITTEN
<h4>Language files</h4>
The text strings (menu names, etc.) in the Windows GUI client are stored in
The <b>BOINC work manager</b> program controls
the use of your computer's disk, network, and processor resources.
It is normally started at boot time.
It is represented by a icon in the system tray.
Double-click on the icon to open the work manager window.
Right-click on the icon to:
<ul>
<li> <b>Suspend</b>: this stops current work.
<li> <b>Resume</b>: this resumes work.
<li> <b>Exit</b>: this exits the work manager and all running BOINC applications.
No further work will take place until you run the work manager again.
</ul>
<p>
When the icon is flashing,
there is an unread error message.
To view it, open the work manager window and go to the Messages tab.
<p>
The work manager window has several tabs:
<ul>
<li> <b>Projects</b>:
Shows the projects in which you're participating.
Right-click on a project name to:
<ul>
<li> Visit its web site.
<li> Update preferences.
This will connect to the project
and get the newest version of your preferences.
(Note: BOINC preferences are managed using the web,
rather than in the application.
This makes it easy to manage preferences for a number of computers.)
<li> Detach from the project.
Your computer will stop working for the project.
<li> Clear project state.
This stops the project's current work, if any,
and starts from scratch.
Use this if BOINC has become stuck for some reason.
</ul>
<li> <b>Work</b>:
Shows the work units currently on your computer.
Each work unit is either
<ul>
<li>Downloading: input files are being downloaded.
<li>Ready: waiting to run.
An estimate of the total CPU time is shown.
<li>In progress: currently running.
Elapsed CPU time and estimated percent done is shown.
<li>Uploading: output files are being uploaded.
<li>Done: waiting to notify the scheduling server.
</ul>
Right-click on a work unit to:
<ul>
<li> Open a window showing application graphics for the work unit.
</ul>
<li> <b>Transfers</b>:
Shows file transfers (uploads and downloads).
These may be ready to start, in progress, and completed.
<li> <b>Messages</b>:
Shows status and error messages.
Messages can be sorted by project or time.
You can <a href=messages.html>control what messages are shown</a>.
Messages are also written to a file "messages.txt".
<li> <b>Disk</b>:
This shows how much disk space is available for use by BOINC,
and how much is currently being used by each project.
</ul>
The work manager's menu items are as follows:
<ul>
<li> <b>File</b>
<ul>
<li> <b>Clear Messages</b>: clear the message window and file.
<li> <b>Clear Inactive</b>: clear entries in the Transfers and Work window
that are completed.
<li> <b>Suspend</b>: this stops current work.
<li> <b>Resume</b>: this resumes work.
<li> <b>Close</b>: close the work manager window.
This does not exit the work manager.
To do this, use the Exit command on the system tray icon menu.
</ul>
<li> <b>Settings</b>
<ul>
<li> <b>Attach to Project</b>:
Enroll this computer in a project.
You must have already created an account with the project.
You will be asked to enter the project's URL and your account ID.
<li> <b>Detach from Project</b>
Stop using this computer for a project.
<li> <b>Proxy Server</b>
If you connect to the web through an HTTP or SOCKS proxy,
enter its address and port here.
</ul>
<li> <b>Help</b>
<li> <b>Help</b>: show this web page.
<li> <b>About</b>: show work manage version number.
</ul>
<li> <b>Help</b>
</ul>
<p>
Menu names and other text in the work manager are stored in
a file called <i>language.ini</i>.
The release uses American English.
Other languages are available
<a href=http://216.198.119.31/BOINC/language_ini/language.htm>here</a>
(thanks to Robi Buechler and other volunteers for this).
<hr>
<h3>The command-line client</h3>
<p>
Command-line options:
The <b>BOINC screensaver</b> can be selected using the Display Properties dialog.
The BOINC screensaver draws graphics from a running application,
if any is available.
Otherwise it draws the BOINC logo bouncing around the screen.
<hr>
<h3>The BOINC command-line client</h3>
<p>
The command line client has several options:
<dl>
<dt> -add_new_project
<dd> Enroll in a new project.

View File

@ -69,6 +69,23 @@ is hosted at </font>
<center>
<h3>Status and news</h3>
</center>
<b>April 29, 2003</b>
<br>
We have created a second project, allowing beta testers to
experiment with dividing their resources between multiple projects.
<br><br>
<b>April 23, 2003</b>
<br>
We have resumed the BOINC beta test.
<br><br>
<b>April 9, 2003</b>
<br>
Due to a legal issue,
we are suspending the BOINC beta test,
and we have requested that BOINC source code
no longer be available at Sourceforge.net.
We hope to resolve this issue soon.
<br><br>
<b>March 31, 2003</b>
<br>
We are preparing a BOINC-based version of SETI@home.
@ -93,31 +110,5 @@ Participants can have separate preferences
(such as network and disk usage limits)
for computers at home, work, and school.
<br><br>
<b>February 25, 2003</b>
<br>
Participants can limit their upstream and downstream network bandwidth.
<br><br>
<b>February 22, 2003</b>
<br>
Participants can control the parameters (color, transparency,
timing) of the Astropulse graphics.
<br><br>
<b>February 19, 2003</b>
New feature: secure, verified email address update.
<br><br>
<b>January 29, 2003</b>
<br>
A <a href=http://setiathome.berkeley.edu/~eheien/ap_ss.jpg>screenshot</a> of the
BOINC client running AstroPulse, our first test application.
<br><br>
<b>December 10, 2002</b>
<br>
We have started a beta test of BOINC using
the Astropulse application.
Many bugs have been found and fixed.
<br><br>
<b>August 24, 2002</b>
<br>BOINC is under development.
The basic features are working on UNIX platforms.
We plan to release the first public application of BOINC later this year.
<a href=old_news.html>Archived news</a>
</td> </tr></table>

27
doc/old_news.html Normal file
View File

@ -0,0 +1,27 @@
<b>February 25, 2003</b>
<br>
Participants can limit their upstream and downstream network bandwidth.
<br><br>
<b>February 22, 2003</b>
<br>
Participants can control the parameters (color, transparency,
timing) of the Astropulse graphics.
<br><br>
<b>February 19, 2003</b>
New feature: secure, verified email address update.
<br><br>
<b>January 29, 2003</b>
<br>
A <a href=http://setiathome.berkeley.edu/~eheien/ap_ss.jpg>screenshot</a> of the
BOINC client running AstroPulse, our first test application.
<br><br>
<b>December 10, 2002</b>
<br>
We have started a beta test of BOINC using
the Astropulse application.
Many bugs have been found and fixed.
<br><br>
<b>August 24, 2002</b>
<br>BOINC is under development.
The basic features are working on UNIX platforms.
We plan to release the first public application of BOINC later this year.

37
doc/result.fig Normal file
View File

@ -0,0 +1,37 @@
#FIG 2.1
80 2
6 244 79 534 364
6 279 149 344 179
2 2 0 1 -1 0 0 0 0.000 0 0 0
344 179 344 149 279 149 279 179 344 179 9999 9999
4 0 0 12 0 -1 0 0.00000 4 16 48 289 169 UNSENT
-6
6 264 234 359 264
2 2 0 1 -1 0 0 0 0.000 0 0 0
359 264 359 234 264 234 264 264 359 264 9999 9999
4 0 0 12 0 -1 0 0.00000 4 16 81 274 254 IN_PROGRESS
-6
6 284 334 334 364
2 2 0 1 -1 0 0 0 0.000 0 0 0
334 364 334 334 284 334 284 364 334 364 9999 9999
4 0 0 12 0 -1 0 0.00000 4 16 33 289 354 OVER
-6
2 1 0 1 -1 0 0 0 0.000 -1 1 0
0 0 1.000 4.000 8.000
309 79 309 149 9999 9999
2 1 0 1 -1 0 0 0 0.000 -1 1 0
0 0 1.000 4.000 8.000
309 179 309 234 9999 9999
2 1 0 1 -1 0 0 0 0.000 -1 1 0
0 0 1.000 4.000 8.000
309 264 309 334 9999 9999
2 1 0 1 -1 0 0 0 0.000 -1 1 0
0 0 1.000 4.000 8.000
279 164 244 164 244 349 284 349 9999 9999
4 0 0 12 0 -1 0 0.00000 4 16 26 324 109 initial
4 0 0 12 0 -1 0 0.00000 4 16 143 329 309 scheduler gets reply from host
4 0 0 12 0 -1 0 0.00000 4 16 203 329 328 or now > report_deadline in timeout_check
4 0 0 12 0 -1 0 0.00000 4 16 124 319 204 scheduler sends this result
-6
4 0 0 12 0 -1 0 0.00000 4 16 152 84 269 or timeout_check: WU has error
4 0 0 12 0 -1 0 0.00000 4 16 193 44 249 validate: got canonical result for this WU

BIN
doc/setiathome.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 285 KiB

View File

@ -1,3 +1,4 @@
<body bgcolor=ffffff>
<title>Data server protocol</title>
<h2>Data server protocol</h2>
@ -41,12 +42,17 @@ The reply message has the form:
| &lt;file_size>nbytes&lt;/file_size> ]
&lt;/data_server_reply>
</pre>
The &lt;file_size> element is omitted in error cases;
nbytes is -1 if the file doesn't exist.
There are no security mechanisms.
TODO: should distinguish between permanent and transient errors
(i.e. whether should retry or give up).
Status is
<ul>
<li> 0 on success.
Nbytes is 0 if the file doesn't exist.
<li> 1 on transient error.
The client should try another data server, or try this one later.
<li> -1 on permanent error.
The client should give up on the result.
</ul>
In the error cases, the &lt;file_size> element is omitted
and the &lt;message> element gives an explanation.
<p>
<b>2) Upload file</b>
<p>
@ -65,7 +71,7 @@ Request message format:
&lt;nbytes>x&lt;/nbytes>
&lt;offset>x&lt;/offset>
&lt;data>
... (nbytes bytes of data; may be binary)
... (nbytes bytes of data; may include non-ASCII data)
&lt;/data>
</pre>
<p>
@ -80,9 +86,19 @@ Reply message format:
<pre>
&lt;data_server_reply>
&lt;status>x&lt;/status>
&lt;message>x&lt;/message>
&lt;message>text&lt;/message>
&lt;/data_server_reply>
</pre>
The status is zero if the operation succeeded.
TODO: should distinguish between permanent and transient errors
(i.e. whether should retry or give up).
Status is
<ul>
<li> 0 on success.
<li> 1 on transient error.
The client should try another data server, or try this one later.
<li> -1 on permanent error.
The client should give up on the result.
</ul>
In the error cases, the &lt;message> element gives an explanation.
<p>
TODO: if there's an error in the header
(bad signature, or file is too large)
the client should learn this without actually uploading the file.

51
todo
View File

@ -21,11 +21,39 @@ BUGS (arranged from high to low priority)
run_minimized
run_on_startup
hangup_if_dialed
- trim leading/trailing spaces from account ID (Win GUI)
- astropulse wanted to display screensaver graphics when it wasn't supposed to,
causing the screen to flicker wildly and uncontrollably until ap.exe was killed
This happened on 98 and 2000. recreate using Display Properties, preview
- I entered in a wrong URL - there was no obvious feedback that it wasn't correct.
Messages showed up in the messages tab, but I was looking at the progress tab.
Shouldn't the client expect something from the server? If it doesn't get it,
especially on logging in for the first time, you should get an obvious warning.
- when i quit a project, I have to exit and restart the client, which is ugly.
- after quitting a project, the project name still showed up in gray in the projects
list - I could right click on it to "relogin" (which did nothing) or "quit project"
which I thought I already did.
- consider warning message during windows (and perhaps other platforms)
install that checks to see if the BOINC directory already exists,
and if so, should the user overwrite it? or upgrade it?
- Screensaver on win98 shows up as "boinc_ss" in the screensaver list. Should be "BOINC"
- When previewing the screensaver (on Win98) I had the AP graphics already on on the
desktop - the screensaver just shows a bouncing "B" icon. Should that happen?
- After running all night (on Win98) I shook the mouse to wake up the blank screen, and
all I saw was the top half of the screen was solid gray, and the bottom half the
bottom half of the astropulse graphics. They weren't moving. The computer was frozen.
I had to ctrl-alt-del to restart.
-----------------------
HIGH-PRIORITY (should do for beta test)
-----------------------
On "add project" the core client should immediately attempt to
get project master page and verify user account.
If failure, let user retype URL/ID
test cpu-counting on red hat
Delete files if needed to honor disk usage constraint
should include per-result constraints (e.g. giant stderr files)
inform user if files deleted
@ -38,6 +66,7 @@ implement server watchdogs
-----------------------
THINGS TO TEST (preferably with test scripts)
-----------------------
time-of-day limitation
Limit frequency of disk writes
make sure it actually works
- Test suspend/resume functionality on Windows/UNIX
@ -56,6 +85,28 @@ Limit frequency of disk writes
MEDIUM-PRIORITY (should do before public release)
-----------------------
add an RPC to verify an account ID (returns DB ID for user)
needed for multi-project stats sites
implement a "clear project" command (GUI and cmdline)
implement a "fetch prefs" command (regular RPC w/o work request)
all RPCs should return a "user-specific project URL"
to be used in GUI (might refer to user page)
screensaver should say (in text below logo) if
- core client isn't running
- core client running, but in no-work period
- in work period, but no app w/ graphics to run
when you create a team you should join that team
in GUI, project name should hyperlink to a project-specified URL
(typically user page for that project)
preference flag for confirm before accepting executable file
abort result if any file exceeds max_nbytes
per-WU limits