mirror of https://github.com/BOINC/boinc.git
*** empty log message ***
svn path=/trunk/boinc/; revision=10258
This commit is contained in:
parent
50e72f72f9
commit
923075d005
|
@ -5468,3 +5468,28 @@ Charlie 6 June 2006
|
|||
mac_build/
|
||||
boinc.xcodeproj/
|
||||
project.pbxproj
|
||||
|
||||
David 6 June 2006
|
||||
- scheduler: if we think a host has detached
|
||||
(because of zero host ID but existing CPID)
|
||||
marks its in-progress results as NO_REPLY, not CLIENT_ERROR
|
||||
- scheduler: remove special handling for reported results
|
||||
that are OVER/CLIENT_ERROR
|
||||
(with above change, this shouldn't happen)
|
||||
- scheduler: if we can't attach to shared memory,
|
||||
return "Project encountered internal error" to client,
|
||||
not misleading "Project is temporarily shut down for maintenance"
|
||||
- shmem-related functions: call perror() on error
|
||||
- make_project: <show_results>, not <show_result>
|
||||
|
||||
db/
|
||||
boinc_db.h
|
||||
html/ops/
|
||||
watchdog.php
|
||||
lib/
|
||||
shmem.C
|
||||
py/Boinc/
|
||||
setup_project.py
|
||||
sched/
|
||||
handle_request.C
|
||||
main.C
|
||||
|
|
|
@ -362,10 +362,8 @@ struct WORKUNIT {
|
|||
// especially for a project already running -
|
||||
// the database will become inconsistent
|
||||
|
||||
#define RESULT_SERVER_STATE_INACTIVE 1
|
||||
//#define RESULT_SERVER_STATE_INACTIVE 1
|
||||
#define RESULT_SERVER_STATE_UNSENT 2
|
||||
#define RESULT_SERVER_STATE_UNSENT_SEQ 3
|
||||
// unsent, part of a work sequence
|
||||
#define RESULT_SERVER_STATE_IN_PROGRESS 4
|
||||
#define RESULT_SERVER_STATE_OVER 5
|
||||
// we received a reply, timed out, or decided not to send.
|
||||
|
|
|
@ -23,8 +23,16 @@ echo "
|
|||
<li><a href=\"#Symbol Stores\">Symbol Stores</a>
|
||||
<ul>
|
||||
<li><a href=\"#SymIntroduction\">Introduction</a>
|
||||
<li><a href=\"#SymRequirements\">Requirements</a>
|
||||
<li><a href=\"#SymProject\">Project Symbol Store</a>
|
||||
<li><a href=\"#SymAdd\">Adding symbols to the symbol store</a>
|
||||
<li><a href=\"#SymUpload\">Uploading symbols to the symbol store</a>
|
||||
</ul>
|
||||
<li><a href=\"#Common Issues\">Common Issues</a>
|
||||
<ul>
|
||||
<li><a href=\"#CommonIntroduction\">Introduction</a>
|
||||
<li><a href=\"#Common0xc0000096\">Privileged Instruction (0xc0000096)</a>
|
||||
</ul>
|
||||
</ul>
|
||||
|
||||
<h3><a name=\"Anatomy of a Windows stack trace\">Anatomy of a Windows stack trace</a></h3>
|
||||
|
@ -94,7 +102,7 @@ callstack which you can use to diagnose problems.
|
|||
<p>
|
||||
Export symbols usually only appear on DLLs since DLLs
|
||||
can export function pointers via the export table.
|
||||
When you see this in the module list you'll only see functions which
|
||||
When you see this in the module list you’ll only see functions which
|
||||
are listed in the export table in the callstack.
|
||||
<p>
|
||||
No symbols means that the runtime debugger could not determine a way to give you any
|
||||
|
@ -283,11 +291,136 @@ This feature will probably be removed in the future.
|
|||
<h3><a name=\"Symbol Stores\">Symbol Stores</a></h3>
|
||||
<h4><a name=\"SymIntroduction\">Introduction</a></h4>
|
||||
<p>
|
||||
|
||||
In order to obtain useful diagnostic information in the event of an application crash,
|
||||
it is necessary to dump a callstack and any other relevant information about what was
|
||||
going on at the time of the crash. Symbols are only needed during a crash event,
|
||||
therefore they are stripped from most applications to cut down on the binary size and
|
||||
bandwidth requirements to deploy a new release.
|
||||
<p>
|
||||
Without symbols, callstacks tend to be nothing more than a list of function pointers
|
||||
in memory. A developer has to load the un-stripped executable in memory using the
|
||||
same operating system and similar processor to jump to that memory address in order
|
||||
to determine the function name and parameters. This is very labor intensive and
|
||||
generally not a very fun job.
|
||||
<p>
|
||||
Microsoft created a technology called a 'Symbol Store' to use with their debugger
|
||||
technology which allows Windows debuggers to locate and download compressed symbol
|
||||
files to diagnose problems and convert function pointers into human readable text.
|
||||
This greatly speeds up the process of diagnosing and fixing bugs.
|
||||
<p>
|
||||
With the BOINC Runtime Debugger for Windows framework a project can publish their
|
||||
symbol files and only have to distribute the application to each of the BOINC
|
||||
clients. When a crash event occurs the runtime framework will download the symbol
|
||||
file from the symbol store and then proceed to dump as much diagnostic information
|
||||
as possible to help projects diagnose the failure.
|
||||
<p>
|
||||
<h4><a name=\"SymRequirements\">Requirements</a></h4>
|
||||
<p>
|
||||
You'll need the latest stable release of the
|
||||
<a href='http://www.microsoft.com/whdc/devtools/debugging/default.mspx'>
|
||||
Debugging Tools for Windows.
|
||||
</a>
|
||||
<p>
|
||||
Verify that your executable is setup to generate PDB debugging symbols for a release
|
||||
build.
|
||||
<p>
|
||||
Verify that the advance linker option to generate a checksum is enabled for a release
|
||||
build.
|
||||
<p>
|
||||
You'll need to explictly name both your EXE and PDB before compilation since
|
||||
the debugger bases the name of the PDB file off of information that is stored in the
|
||||
executable header.
|
||||
<p>
|
||||
<h4><a name=\"SymProject\">Project Symbol Store</a></h4>
|
||||
<p>
|
||||
Specifying a project wide symbol store is as easy as adding the symstore element
|
||||
to your config.xml file for the project.
|
||||
<p>
|
||||
Below is an XML shred with an example symstore element.
|
||||
<p>
|
||||
". html_text("
|
||||
<boinc>
|
||||
<config>
|
||||
<symstore>http://sample.example.com/symstore</symstore>
|
||||
</config>
|
||||
</boinc>
|
||||
")."
|
||||
<p>
|
||||
<h4><a name=\"SymAdd\">Adding symbols to the symbol store</a></h4>
|
||||
<p>
|
||||
<a href='http://msdn.microsoft.com/library/default.asp?url=/library/en-us/debug/base/using_symstore.asp'>Symstore</a>
|
||||
is a utility to manage symbol stores. You'll want to create a local symbol store on
|
||||
your Windows build machine in which you'll initially add new symbol files with each
|
||||
revision of your application.
|
||||
<p>
|
||||
Symstore will compress the symbol file and then copy it into your local symbol store.
|
||||
<p>
|
||||
Below is an example command which you can run from the Windows command line or
|
||||
cygwin command line.
|
||||
<p>
|
||||
"; block_start(); echo "
|
||||
symstore.exe add /l /f c:\SampleSrc\*.pdb /s c:\symstore /compress /t \"Sample\" /v \"5.02\" /o /c \"Application Release\"
|
||||
"; block_end(); echo "
|
||||
<p>
|
||||
<h4><a name=\"SymUpload\">Uploading symbols to the symbol store</a></h4>
|
||||
<p>
|
||||
Most projects tend to use scp to copy files between Windows machines and their project
|
||||
server.
|
||||
<p>
|
||||
The example below copies the entire symstore to the target location. After the copy
|
||||
operation you can delete all the subdirectories except '000Admin' to save time uploading
|
||||
for future application symbols.
|
||||
<p>
|
||||
"; block_start(); echo "
|
||||
pscp.exe -r -C -batch c:\symstore sample@project.example.com:projects/sample/html/user/symstore
|
||||
"; block_end(); echo "
|
||||
<p>
|
||||
<h3><a name=\"Common Issues\">Common Issues</a></h3>
|
||||
<h4><a name=\"CommonIntroduction\">Introduction</a></h4>
|
||||
<p>
|
||||
<p>
|
||||
<h4><a name=\"Common0xc0000096\">Privileged Instruction (0xc0000096)</a></h4>
|
||||
"; block_start(); echo "
|
||||
- Unhandled Exception Record -
|
||||
Reason: Privileged Instruction (0xc0000096) at address 0x008E9808
|
||||
|
||||
- Registers -
|
||||
eax=00000400 ebx=00000000 ecx=00002922 edx=00b0c650 esi=01e1f7ec edi=027e2abc
|
||||
eip=008e9808 esp=01e1f778 ebp=ffffffff
|
||||
cs=001b ss=0023 ds=0023 es=0023 fs=003b gs=0000 efl=00010202
|
||||
|
||||
- Callstack -
|
||||
ChildEBP RetAddr Args to Child
|
||||
01e1f7b4 008ea16b 3f4bcaf9 3f827d51 01e1f7ec 01e1f7fc rosetta_beta_5.19_windows_intel!spherical+0x1 (rosetta++\structure.cc:1436)
|
||||
01e1f83c 008ec11f 00b38180 00000003 00000009 01e1f974 rosetta_beta_5.19_windows_intel!HSpair_score+0x0 (rosetta++\structure.cc:367)
|
||||
01e1f854 008b6d18 00b38058 00b3805c 00b38180 00b381b0 rosetta_beta_5.19_windows_intel!evaluate_ss+0x6 (rosetta++\structure.cc:102)
|
||||
01e1f974 00937bf6 a8af5c9d 0001c3f9 00001473 00000100 rosetta_beta_5.19_windows_intel!scorefxn+0x25 (rosetta++\score.cc:190)
|
||||
01e1f9b0 005e435f 00000009 008b7960 0001c3f9 a8af5cd5 rosetta_beta_5.19_windows_intel!main_frag_trial+0x4 (rosetta++\torsion_bbmove_trials.cc:446)
|
||||
01e1fb74 006f1c01 a8af5e9d 3030302e 3c303030 00000000 rosetta_beta_5.19_windows_intel!fold_abinitio+0xc (rosetta++\fold_abinitio.cc:270)
|
||||
01e1ffb0 006363c0 7c80b50b 00000000 3030302e 3c303030 rosetta_beta_5.19_windows_intel!main_rosetta+0x5 (rosetta++\main.cc:343)
|
||||
01e1ffb4 7c80b50b 00000000 3030302e 3c303030 00000000 rosetta_beta_5.19_windows_intel!foobar+0x0 (boinc\api\graphics_impl.c:75)
|
||||
01e1ffec 00000000 006363b0 00000000 00000000 00000000 kernel32!_BaseThreadStart@8+0x0 (boinc\api\graphics_impl.c:75)
|
||||
"; block_end(); echo "
|
||||
<p>
|
||||
In this example it appears the processor took exception to the fact that a user mode
|
||||
process attempted to push a kernel mode address onto the stack without first switching
|
||||
to kernel mode.
|
||||
<p>
|
||||
Look at the EBP register, 'ffffffff' when converted into a signed int is equal to '-1'
|
||||
and when converted to an unsigned int it is equal to 4GB. On Windows anything above 2GB
|
||||
is considered a kernel mode address. If the Windows machine supports PAE and the /3GB
|
||||
boot option is specified in BOOT.INI then kernel addresses will start at 3GB instead.
|
||||
<p>
|
||||
What has probably happened here is that a function is about to be called and a 'push EBP'
|
||||
instruction was called to push a new address onto the stack, the CPU threw the exception
|
||||
since the address was outside user mode land. EBP should have had a similar progression
|
||||
as all the other stack frames ChildEBP values.
|
||||
<p>
|
||||
If EBP had some random kernel mode address it would be pretty easy to dismiss this as
|
||||
a CPU overheating. 'ffffffff' begs the question is the stack being overwritten by an
|
||||
error result from another function?
|
||||
<p>
|
||||
Investigation of this issue is still ongoing.
|
||||
";
|
||||
|
||||
page_tail();
|
||||
|
|
|
@ -133,8 +133,11 @@ list_item("server_state",
|
|||
<ul>
|
||||
<li> Initially UNSENT
|
||||
<li> Set by scheduler to IN_PROGRESS when send result
|
||||
<li> Set by scheduler to OVER when get reply from client
|
||||
<li> Set by transitioner to OVER if now > result.report_deadline
|
||||
<li> Set by scheduler to OVER when result is reported
|
||||
in request message from client.
|
||||
<li> Set by scheduler to OVER when it thinks
|
||||
host has detached project.
|
||||
<li> Set by transitioner to OVER if now > result.report_deadline
|
||||
<li> Set by transitioner to OVER if WU has error condition
|
||||
and result.server_state=UNSENT
|
||||
<li> Set by validator to OVER if WU has canonical result
|
||||
|
@ -143,22 +146,29 @@ list_item("server_state",
|
|||
"
|
||||
);
|
||||
list_item("outcome",
|
||||
"Values: SUCCESS, COULDNT_SEND, CLIENT_ERROR, NO_REPLY, DIDNT_NEED.
|
||||
"Values: SUCCESS, COULDNT_SEND, CLIENT_ERROR, NO_REPLY, DIDNT_NEED,
|
||||
VALIDATE_ERROR.
|
||||
<br>Defined iff result.server_state=OVER
|
||||
<ul>
|
||||
<li> Set by scheduler to SUCCESS if get reply and no client error
|
||||
<li> Set by scheduler to CLIENT_ERROR if get reply and client error
|
||||
<li> Set by scheduler to NO_REPLY if it thinks host has detached project.
|
||||
<li> Set by transitioner to NO_REPLY if server_state=IN_PROGRESS
|
||||
and now<report_deadline
|
||||
and now < report_deadline
|
||||
<li> Set by transitioner to DIDNT_NEED if WU has error condition
|
||||
and result.server_state=UNSENT
|
||||
<li> Set by validator to DIDNT_NEED if WU has canonical result
|
||||
and result.server_state=UNSENT
|
||||
<li> Set by validator to VALIDATE_ERROR if outcome was initially
|
||||
SUCCESS, but the validator had a permanent error reading a result file,
|
||||
or a file had a syntax error.
|
||||
Prevents the validator from trying again.
|
||||
</ul>
|
||||
"
|
||||
);
|
||||
list_item("client_state",
|
||||
"Records the client state (upload, process, or download)
|
||||
"Records the client state (DOWNLOADING, DOWNLOADED,
|
||||
COMPUTE_ERROR, UPLOADING, UPLOADED, ABORTED)
|
||||
where an error occurred.
|
||||
Defined if outcome is CLIENT_ERROR.
|
||||
"
|
||||
|
@ -172,7 +182,7 @@ list_item("file_delete_state",
|
|||
and file_delete_state=INIT,
|
||||
and wu.assimilate_state=DONE,
|
||||
and all the results have server_state=OVER,
|
||||
and all all the results with outcome=SUCCESS have validate_state<>INIT
|
||||
and all all the results with outcome=SUCCESS have validate_state<>INIT
|
||||
<li> Set by transitioner to READY if wu.assimilate_state=DONE
|
||||
and result.outcome=CLIENT_ERROR
|
||||
or result.validate_state!=INIT
|
||||
|
@ -187,11 +197,16 @@ list_item("validate_state",
|
|||
<li> Initially INIT
|
||||
<li> Set by validator to VALID if outcome=SUCCESS and matches canonical result
|
||||
<li> Set by validator to INVALID if outcome=SUCCESS and doesn't match canonical result
|
||||
<li> Set by transitioner to NO_CHECK if the WU had an error;
|
||||
this avoids showing claimed credit as 'pending'.
|
||||
<li> Set by validator to ERROR if outcome=SUCCESS and
|
||||
had a permanent error trying to read an output file,
|
||||
or an output file had a syntax error.
|
||||
<li> Set by validator to INCONCLUSIVE if check_set()
|
||||
didn't find a consensus in a set of results containing this one.
|
||||
<li> Set by scheduler to TOO_LATE if the result was reported
|
||||
after the canonical result's files were deleted.
|
||||
|
||||
</ul>
|
||||
"
|
||||
);
|
||||
|
|
|
@ -146,7 +146,6 @@ foreach ($projects as $p) {
|
|||
echo "
|
||||
</ul>
|
||||
<p>
|
||||
BOINC is an open-source software platform for volunteer computing.
|
||||
You can participate in several projects, ensuring that
|
||||
your computer will be kept busy even when one project has no work.
|
||||
<p>
|
||||
|
|
|
@ -296,14 +296,14 @@ Let X be the set of scheduled results that are not currently running,
|
|||
let Y be the set of running results that are not scheduled,
|
||||
and let T be the time the scheduler last ran.
|
||||
The enforcement policy is as follows:
|
||||
<ul>
|
||||
<ol>
|
||||
<li> If deadline_missed(R) for some R in X,
|
||||
then preempt a result in Y, and run R
|
||||
(preempt the result with the least CPU wall time since checkpoint).
|
||||
Repeat as needed.
|
||||
<li> If there is a result R in Y that checkpointed more recently than T,
|
||||
then preempt R and run a result in X.
|
||||
</ul>
|
||||
</ol>
|
||||
|
||||
|
||||
|
||||
|
@ -362,6 +362,10 @@ if total_shortfall > 0
|
|||
and are proportional to P.resource_share
|
||||
</pre>
|
||||
|
||||
<p>
|
||||
For non-CPU-intensive projects,
|
||||
P.work_request_size is set to 1 if P has no nearly-runnable result,
|
||||
otherwise 0.
|
||||
<p>
|
||||
The scheduler RPC mechanism may select a project to contact
|
||||
because of a user request, an outstanding trickle-up message,
|
||||
|
|
|
@ -15,8 +15,7 @@
|
|||
|
||||
function sound_alarm($x) {
|
||||
//echo "alarm: $x\n";
|
||||
mail("davea@ssl.berkeley.edu", "BOINC problem", $x);
|
||||
mail("eheien@ssl.berkeley.edu", "BOINC problem", $x);
|
||||
mail(SYS_ADMIN_EMAIL, "BOINC problem", $x);
|
||||
}
|
||||
|
||||
function check_log_file($file, $last_time) {
|
||||
|
|
|
@ -166,6 +166,7 @@ int create_shmem(key_t key, int size, void** pp) {
|
|||
id = shmget(key, size, IPC_CREAT|SHM_R|SHM_W);
|
||||
}
|
||||
if (id < 0) {
|
||||
perror("shmget");
|
||||
return ERR_SHMGET;
|
||||
}
|
||||
return attach_shmem(key, pp);
|
||||
|
@ -179,9 +180,13 @@ int destroy_shmem(key_t key){
|
|||
id = shmget(key, 0, 0);
|
||||
if (id < 0) return 0; // assume it doesn't exist
|
||||
retval = shmctl(id, IPC_STAT, &buf);
|
||||
if (retval) return ERR_SHMCTL;
|
||||
if (retval) {
|
||||
perror("shmctl STAT");
|
||||
return ERR_SHMCTL;
|
||||
}
|
||||
retval = shmctl(id, IPC_RMID, 0);
|
||||
if (retval) {
|
||||
perror("shmctl RMID");
|
||||
return ERR_SHMCTL;
|
||||
}
|
||||
return 0;
|
||||
|
@ -193,10 +198,12 @@ int attach_shmem(key_t key, void** pp){
|
|||
|
||||
id = shmget(key, 0, 0);
|
||||
if (id < 0) {
|
||||
perror("shmget");
|
||||
return ERR_SHMGET;
|
||||
}
|
||||
p = shmat(id, 0, 0);
|
||||
if ((long)p == -1) {
|
||||
perror("shmat");
|
||||
return ERR_SHMAT;
|
||||
}
|
||||
*pp = p;
|
||||
|
|
|
@ -385,7 +385,7 @@ class Project:
|
|||
config.max_wus_to_send = 50
|
||||
config.daily_result_quota = 500
|
||||
config.disable_account_creation = 1
|
||||
config.show_result = 1
|
||||
config.show_results = 1
|
||||
|
||||
config.master_url = master_url or os.path.join(options.html_url , self.short_name , '')
|
||||
config.download_url = os.path.join(config.master_url, 'download')
|
||||
|
|
|
@ -125,10 +125,15 @@ static bool find_host_by_cpid(DB_USER& user, char* host_cpid, DB_HOST& host) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// scan in-progress results for the given host,
|
||||
// and mark them as done, client error
|
||||
// Called when there's evidence that the host has detached.
|
||||
// Mark in-progress results for the given host
|
||||
// as server state OVER, outcome NO_REPLY.
|
||||
// This serves two purposes:
|
||||
// 1) make sure we don't resend these results to the host
|
||||
// (they may be the reason the user detached)
|
||||
// 2) trigger the generation of new results for these WUs
|
||||
//
|
||||
static void mark_results_aborted(DB_HOST& host) {
|
||||
static void mark_results_over(DB_HOST& host) {
|
||||
char buf[256], buf2[256];
|
||||
DB_RESULT result;
|
||||
sprintf(buf, "where hostid=%d and server_state=%d",
|
||||
|
@ -139,7 +144,7 @@ static void mark_results_aborted(DB_HOST& host) {
|
|||
sprintf(buf2,
|
||||
"server_state=%d, outcome=%d",
|
||||
RESULT_SERVER_STATE_OVER,
|
||||
RESULT_OUTCOME_CLIENT_ERROR
|
||||
RESULT_OUTCOME_NO_REPLY
|
||||
);
|
||||
result.update_field(buf2);
|
||||
|
||||
|
@ -305,7 +310,7 @@ lookup_user_and_make_new_host:
|
|||
"[HOST#%d] [USER#%d] User has another host with same CPID.\n",
|
||||
host.id, host.userid
|
||||
);
|
||||
mark_results_aborted(host);
|
||||
mark_results_over(host);
|
||||
goto got_host;
|
||||
}
|
||||
}
|
||||
|
@ -617,7 +622,9 @@ int handle_results(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) {
|
|||
// which suppresses the DB update later on
|
||||
//
|
||||
|
||||
// If result is ALREADY over, do we replace it??
|
||||
// If result has server_state OVER
|
||||
// if outcome NO_REPLY accept it (it's just late).
|
||||
// else ignore it
|
||||
//
|
||||
if (srip->server_state == RESULT_SERVER_STATE_OVER) {
|
||||
char *dont_replace_result = NULL;
|
||||
|
@ -635,14 +642,8 @@ int handle_results(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) {
|
|||
dont_replace_result = "this work could NOT be sent";
|
||||
break;
|
||||
case RESULT_OUTCOME_CLIENT_ERROR:
|
||||
// result was previously cancelled on server side.
|
||||
// keep this new, real result ONLY if validator has
|
||||
// not already been invoked.
|
||||
if (srip->validate_state != VALIDATE_STATE_INIT) {
|
||||
dont_replace_result = "result ALREADY reported as error, or canceled on server";
|
||||
} else if (srip->file_delete_state != FILE_DELETE_INIT) {
|
||||
dont_replace_result = "result ALREADY reported as error or canceled on server, and deleted";
|
||||
}
|
||||
// should never happen!
|
||||
dont_replace_result = "result ALREADY reported as error";
|
||||
break;
|
||||
case RESULT_OUTCOME_NO_REPLY:
|
||||
// result is late in arriving, but keep it anyhow
|
||||
|
|
41
sched/main.C
41
sched/main.C
|
@ -76,7 +76,7 @@ GUI_URLS gui_urls;
|
|||
key_t sema_key;
|
||||
int g_pid;
|
||||
static bool db_opened=false;
|
||||
bool project_stopped = false;
|
||||
bool shmem_failed = false;
|
||||
|
||||
void send_message(const char* msg, int delay, bool send_header) {
|
||||
if (send_header) {
|
||||
|
@ -198,9 +198,10 @@ SCHED_SHMEM* attach_to_feeder_shmem() {
|
|||
retval = attach_shmem(config.shmem_key, &p);
|
||||
if (retval || p==0) {
|
||||
log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL,
|
||||
"Can't attach shmem (feeder not running?)\n"
|
||||
"Can't attach shmem: %d (feeder not running?)\n",
|
||||
retval
|
||||
);
|
||||
project_stopped = true;
|
||||
shmem_failed = true;
|
||||
} else {
|
||||
ssp = (SCHED_SHMEM*)p;
|
||||
retval = ssp->verify();
|
||||
|
@ -214,12 +215,16 @@ SCHED_SHMEM* attach_to_feeder_shmem() {
|
|||
|
||||
for (i=0; i<10; i++) {
|
||||
if (ssp->ready) break;
|
||||
log_messages.printf(SCHED_MSG_LOG::MSG_DEBUG, "waiting for ready flag\n");
|
||||
log_messages.printf(SCHED_MSG_LOG::MSG_DEBUG,
|
||||
"waiting for ready flag\n"
|
||||
);
|
||||
sleep(1);
|
||||
}
|
||||
if (!ssp->ready) {
|
||||
log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "feeder doesn't seem to be running\n");
|
||||
send_message("Server has software problem", 3600, true);
|
||||
log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL,
|
||||
"feeder doesn't seem to be running\n"
|
||||
);
|
||||
send_message("Project encountered internal error: feeder not running", 3600, true);
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
@ -286,7 +291,9 @@ int main(int argc, char** argv) {
|
|||
|
||||
retval = config.parse_file("..");
|
||||
if (retval) {
|
||||
log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "Can't parse config file\n");
|
||||
log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL,
|
||||
"Can't parse config file\n"
|
||||
);
|
||||
send_message("Server can't parse configuration file", 3600, true);
|
||||
exit(0);
|
||||
}
|
||||
|
@ -311,8 +318,8 @@ int main(int argc, char** argv) {
|
|||
while(FCGI_Accept() >= 0) {
|
||||
counter++;
|
||||
#endif
|
||||
if (project_stopped) {
|
||||
send_message("Project is temporarily shut down for maintenance", 3600, true);
|
||||
if (shmem_failed) {
|
||||
send_message("Project encountered internal error: shared memory", 3600, true);
|
||||
goto done;
|
||||
}
|
||||
log_request_info(length);
|
||||
|
@ -330,7 +337,9 @@ int main(int argc, char** argv) {
|
|||
sprintf(reply_path, "%s%d_%u", REPLY_FILE_PREFIX, g_pid, counter);
|
||||
fout = fopen(req_path, "w");
|
||||
if (!fout) {
|
||||
log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "can't write request file\n");
|
||||
log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL,
|
||||
"can't write request file\n"
|
||||
);
|
||||
exit(1);
|
||||
}
|
||||
copy_stream(stdin, fout);
|
||||
|
@ -345,12 +354,16 @@ int main(int argc, char** argv) {
|
|||
|
||||
fin = fopen(req_path, "r");
|
||||
if (!fin) {
|
||||
log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "can't read request file\n");
|
||||
log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL,
|
||||
"can't read request file\n"
|
||||
);
|
||||
exit(1);
|
||||
}
|
||||
fout = fopen(reply_path, "w");
|
||||
if (!fout) {
|
||||
log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "can't write reply file\n");
|
||||
log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL,
|
||||
"can't write reply file\n"
|
||||
);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
@ -359,7 +372,9 @@ int main(int argc, char** argv) {
|
|||
fclose(fout);
|
||||
fin = fopen(reply_path, "r");
|
||||
if (!fin) {
|
||||
log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL, "can't read reply file\n");
|
||||
log_messages.printf(SCHED_MSG_LOG::MSG_CRITICAL,
|
||||
"can't read reply file\n"
|
||||
);
|
||||
exit(1);
|
||||
}
|
||||
copy_stream(fin, stdout);
|
||||
|
|
Loading…
Reference in New Issue