scheduler: Another feature for debugging the scheduler.

Previously (little known) the scheduler could be hacked to preserve
  the sched_request.xml and sched_reply.xml in own directories
  (you had to modify the initial value of use_files in sched_main.cpp).
  This feature could now be switched on and off on the fly just by
  changing the project config.
  When there is an (existing) directory configured as
  <debug_req_reply_dir>, each schduler instance will write three
  files in there: PID_C_sched.log, PID_C_sched_request.xml and (if all
  goes well) PID_C_sched_reply.xml. PID is the process id of this
  scheduler instance, C is an internal counter within the process if
  FCGI is used. The sched.log will contain nothing else than the
  pid and the IP address of the client. This should allow for
  identifying the scheduler instance responsible for a given
  apache error log message ("premature end of script headers") when
  a scheduler crashed. sched_request.xml (obviously) is the scheduler
  request, and if the scheduler doesn't crash in between, there will
  also be the reply to the client kept in sched_reply.xml
  Remove the <debug_req_reply_dir> tag from the project config
  to turn this feature off.

svn path=/trunk/boinc/; revision=25349
This commit is contained in:
Bernd Machenschalk 2012-02-27 13:12:24 +00:00
parent 3fa88ac1e3
commit 9cb28dd25c
4 changed files with 68 additions and 16 deletions

View File

@ -2262,3 +2262,30 @@ Bernd 27 Feb 2012
sched_config.h
sched_config.cpp
sched_main.cpp
Bernd 27 Feb 2012
- scheduler: Another feature for debugging the scheduler.
Previously (little known) the scheduler could be hacked to preserve
the sched_request.xml and sched_reply.xml in own directories
(you had to modify the initial value of use_files in sched_main.cpp).
This feature could now be switched on and off on the fly just by
changing the project config.
When there is an (existing) directory configured as
<debug_req_reply_dir>, each schduler instance will write three
files in there: PID_C_sched.log, PID_C_sched_request.xml and (if all
goes well) PID_C_sched_reply.xml. PID is the process id of this
scheduler instance, C is an internal counter within the process if
FCGI is used. The sched.log will contain nothing else than the
pid and the IP address of the client. This should allow for
identifying the scheduler instance responsible for a given
apache error log message ("premature end of script headers") when
a scheduler crashed. sched_request.xml (obviously) is the scheduler
request, and if the scheduler doesn't crash in between, there will
also be the reply to the client kept in sched_reply.xml
Remove the <debug_req_reply_dir> tag from the project config
to turn this feature off.
sched/
sched_config.h
sched_config.cpp
sched_main.cpp

View File

@ -320,6 +320,8 @@ int SCHED_CONFIG::parse(FILE* f) {
if (xp.parse_bool("debug_user_messages", debug_user_messages)) continue;
if (xp.parse_bool("debug_version_select", debug_version_select)) continue;
if (xp.parse_str("debug_req_reply_dir", debug_req_reply_dir, sizeof(debug_req_reply_dir))) continue;
// don't complain about unparsed XML;
// there are lots of tags the scheduler doesn't know about

View File

@ -190,6 +190,8 @@ struct SCHED_CONFIG {
bool debug_user_messages;
bool debug_version_select;
char debug_req_reply_dir[256]; // keep sched_request and sched_reply
// in files in this directory
int parse(FILE*);
int parse_aux(FILE*);
int parse_file(const char *dir = 0);

View File

@ -17,7 +17,6 @@
// The BOINC scheduling server.
// Note: use_files is a compile setting that records everything in files.
// Also, You can call debug_sched() for whatever situation is of
// interest to you. It won't do anything unless you create
// (touch) the file 'debug_sched' in the project root directory.
@ -69,10 +68,6 @@
#define DEBUG_LEVEL 999
#define MAX_FCGI_COUNT 20
#define REQ_FILE_PREFIX "boinc_req/"
#define REPLY_FILE_PREFIX "boinc_reply/"
bool use_files = false; // use disk files for req/reply msgs (for debugging)
GUI_URLS gui_urls;
PROJECT_FILES project_files;
key_t sema_key;
@ -348,6 +343,11 @@ void attach_to_feeder_shmem() {
}
}
inline static const char* get_remote_addr() {
const char * r = getenv("REMOTE_ADDR");
return r ? r : "?.?.?.?";
}
int main(int argc, char** argv) {
#ifndef _USING_FCGI_
FILE* fin, *fout;
@ -355,7 +355,7 @@ int main(int argc, char** argv) {
FCGI_FILE *fin, *fout;
#endif
int i, retval;
char req_path[256], reply_path[256], path[256];
char req_path[256], reply_path[256], log_path[256], path[256];
unsigned int counter=0;
char* code_sign_key;
int length=-1;
@ -503,7 +503,7 @@ int main(int argc, char** argv) {
goto done;
}
if (use_files) {
if (strlen(config.debug_req_reply_dir)) {
struct stat statbuf;
// the code below is convoluted because,
// instead of going from stdin to stdout directly,
@ -513,8 +513,25 @@ int main(int argc, char** argv) {
// NOTE: to use this, you must create group-writeable dirs
// boinc_req and boinc_reply in the project dir
//
sprintf(req_path, "%s%d_%u", config.project_path(REQ_FILE_PREFIX), g_pid, counter);
sprintf(reply_path, "%s%d_%u", config.project_path(REPLY_FILE_PREFIX), g_pid, counter);
sprintf(req_path, "%s/%d_%u_sched_request.xml", config.debug_req_reply_dir, g_pid, counter);
sprintf(reply_path, "%s/%d_%u_sched_reply.xml", config.debug_req_reply_dir, g_pid, counter);
// keep an own 'log' per PID in case general logging fails
// this allows to associate at leas the scheduler request with the client
// IP address (as shown in httpd error log) in case of a crash
sprintf(log_path, "%s/%d_%u_sched.log", config.debug_req_reply_dir, g_pid, counter);
#ifndef _USING_FCGI_
fout = fopen(log_path, "a");
#else
fout = FCGI::fopen(log_path,"a");
#endif
fprintf(fout, "PID: %d Client IP: %s\n", g_pid, get_remote_addr());
fclose(fout);
log_messages.printf(MSG_DEBUG,
"keeping sched_request in %s, sched_reply in %s, custom log in %s\n",
req_path, reply_path, log_path
);
#ifndef _USING_FCGI_
fout = fopen(req_path, "w");
#else
@ -575,13 +592,17 @@ int main(int argc, char** argv) {
}
copy_stream(fin, stdout);
fclose(fin);
#ifdef EINSTEIN_AT_HOME
if (getenv("CONTENT_LENGTH")) unlink(req_path);
if (getenv("CONTENT_LENGTH")) unlink(reply_path);
#else
// unlink(req_path);
// unlink(reply_path);
#endif
// if not contacted from a client, don't keep the log files
/* not sure what lead to the assumption of a client setting
CONTENT_LENGTH, but it's wrong at least on our current
project / Apache / Client configuration. Commented out.
if (getenv("CONTENT_LENGTH")) {
unlink(req_path);
unlink(reply_path);
}
*/
#ifndef _USING_FCGI_
} else if (batch) {
while (!feof(stdin)) {