diff --git a/checkin_notes b/checkin_notes index 0dc77ef2f1..38ef26c279 100644 --- a/checkin_notes +++ b/checkin_notes @@ -2262,3 +2262,30 @@ Bernd 27 Feb 2012 sched_config.h sched_config.cpp sched_main.cpp + +Bernd 27 Feb 2012 + - scheduler: Another feature for debugging the scheduler. + Previously (little known) the scheduler could be hacked to preserve + the sched_request.xml and sched_reply.xml in own directories + (you had to modify the initial value of use_files in sched_main.cpp). + This feature could now be switched on and off on the fly just by + changing the project config. + When there is an (existing) directory configured as + , each schduler instance will write three + files in there: PID_C_sched.log, PID_C_sched_request.xml and (if all + goes well) PID_C_sched_reply.xml. PID is the process id of this + scheduler instance, C is an internal counter within the process if + FCGI is used. The sched.log will contain nothing else than the + pid and the IP address of the client. This should allow for + identifying the scheduler instance responsible for a given + apache error log message ("premature end of script headers") when + a scheduler crashed. sched_request.xml (obviously) is the scheduler + request, and if the scheduler doesn't crash in between, there will + also be the reply to the client kept in sched_reply.xml + Remove the tag from the project config + to turn this feature off. + + sched/ + sched_config.h + sched_config.cpp + sched_main.cpp diff --git a/sched/sched_config.cpp b/sched/sched_config.cpp index 869251969a..bfc40492ed 100644 --- a/sched/sched_config.cpp +++ b/sched/sched_config.cpp @@ -320,6 +320,8 @@ int SCHED_CONFIG::parse(FILE* f) { if (xp.parse_bool("debug_user_messages", debug_user_messages)) continue; if (xp.parse_bool("debug_version_select", debug_version_select)) continue; + if (xp.parse_str("debug_req_reply_dir", debug_req_reply_dir, sizeof(debug_req_reply_dir))) continue; + // don't complain about unparsed XML; // there are lots of tags the scheduler doesn't know about diff --git a/sched/sched_config.h b/sched/sched_config.h index 08568dddb8..3fe05c83b9 100644 --- a/sched/sched_config.h +++ b/sched/sched_config.h @@ -190,6 +190,8 @@ struct SCHED_CONFIG { bool debug_user_messages; bool debug_version_select; + char debug_req_reply_dir[256]; // keep sched_request and sched_reply + // in files in this directory int parse(FILE*); int parse_aux(FILE*); int parse_file(const char *dir = 0); diff --git a/sched/sched_main.cpp b/sched/sched_main.cpp index 829a441744..efdca4ce65 100644 --- a/sched/sched_main.cpp +++ b/sched/sched_main.cpp @@ -17,7 +17,6 @@ // The BOINC scheduling server. -// Note: use_files is a compile setting that records everything in files. // Also, You can call debug_sched() for whatever situation is of // interest to you. It won't do anything unless you create // (touch) the file 'debug_sched' in the project root directory. @@ -69,10 +68,6 @@ #define DEBUG_LEVEL 999 #define MAX_FCGI_COUNT 20 -#define REQ_FILE_PREFIX "boinc_req/" -#define REPLY_FILE_PREFIX "boinc_reply/" -bool use_files = false; // use disk files for req/reply msgs (for debugging) - GUI_URLS gui_urls; PROJECT_FILES project_files; key_t sema_key; @@ -348,6 +343,11 @@ void attach_to_feeder_shmem() { } } +inline static const char* get_remote_addr() { + const char * r = getenv("REMOTE_ADDR"); + return r ? r : "?.?.?.?"; +} + int main(int argc, char** argv) { #ifndef _USING_FCGI_ FILE* fin, *fout; @@ -355,7 +355,7 @@ int main(int argc, char** argv) { FCGI_FILE *fin, *fout; #endif int i, retval; - char req_path[256], reply_path[256], path[256]; + char req_path[256], reply_path[256], log_path[256], path[256]; unsigned int counter=0; char* code_sign_key; int length=-1; @@ -503,7 +503,7 @@ int main(int argc, char** argv) { goto done; } - if (use_files) { + if (strlen(config.debug_req_reply_dir)) { struct stat statbuf; // the code below is convoluted because, // instead of going from stdin to stdout directly, @@ -513,8 +513,25 @@ int main(int argc, char** argv) { // NOTE: to use this, you must create group-writeable dirs // boinc_req and boinc_reply in the project dir // - sprintf(req_path, "%s%d_%u", config.project_path(REQ_FILE_PREFIX), g_pid, counter); - sprintf(reply_path, "%s%d_%u", config.project_path(REPLY_FILE_PREFIX), g_pid, counter); + sprintf(req_path, "%s/%d_%u_sched_request.xml", config.debug_req_reply_dir, g_pid, counter); + sprintf(reply_path, "%s/%d_%u_sched_reply.xml", config.debug_req_reply_dir, g_pid, counter); + + // keep an own 'log' per PID in case general logging fails + // this allows to associate at leas the scheduler request with the client + // IP address (as shown in httpd error log) in case of a crash + sprintf(log_path, "%s/%d_%u_sched.log", config.debug_req_reply_dir, g_pid, counter); +#ifndef _USING_FCGI_ + fout = fopen(log_path, "a"); +#else + fout = FCGI::fopen(log_path,"a"); +#endif + fprintf(fout, "PID: %d Client IP: %s\n", g_pid, get_remote_addr()); + fclose(fout); + + log_messages.printf(MSG_DEBUG, + "keeping sched_request in %s, sched_reply in %s, custom log in %s\n", + req_path, reply_path, log_path + ); #ifndef _USING_FCGI_ fout = fopen(req_path, "w"); #else @@ -575,13 +592,17 @@ int main(int argc, char** argv) { } copy_stream(fin, stdout); fclose(fin); -#ifdef EINSTEIN_AT_HOME - if (getenv("CONTENT_LENGTH")) unlink(req_path); - if (getenv("CONTENT_LENGTH")) unlink(reply_path); -#else - // unlink(req_path); - // unlink(reply_path); -#endif + + // if not contacted from a client, don't keep the log files + /* not sure what lead to the assumption of a client setting + CONTENT_LENGTH, but it's wrong at least on our current + project / Apache / Client configuration. Commented out. + if (getenv("CONTENT_LENGTH")) { + unlink(req_path); + unlink(reply_path); + } + */ + #ifndef _USING_FCGI_ } else if (batch) { while (!feof(stdin)) {