2008-08-06 18:36:30 +00:00
|
|
|
// This file is part of BOINC.
|
2005-01-20 23:22:22 +00:00
|
|
|
// http://boinc.berkeley.edu
|
2008-08-06 18:36:30 +00:00
|
|
|
// Copyright (C) 2008 University of California
|
2003-07-09 23:54:45 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is free software; you can redistribute it and/or modify it
|
|
|
|
// under the terms of the GNU Lesser General Public License
|
|
|
|
// as published by the Free Software Foundation,
|
|
|
|
// either version 3 of the License, or (at your option) any later version.
|
2003-07-09 23:54:45 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is distributed in the hope that it will be useful,
|
2005-01-20 23:22:22 +00:00
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU Lesser General Public License for more details.
|
2003-03-08 00:09:40 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
2003-03-08 00:09:40 +00:00
|
|
|
|
2003-03-08 00:18:33 +00:00
|
|
|
|
2005-11-21 18:34:44 +00:00
|
|
|
#include "config.h"
|
2004-07-13 13:54:09 +00:00
|
|
|
#include <cstdlib>
|
|
|
|
#include <csignal>
|
2005-01-08 06:54:03 +00:00
|
|
|
#include <cerrno>
|
2003-06-20 21:33:49 +00:00
|
|
|
#include <unistd.h>
|
2004-12-06 22:41:19 +00:00
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <sys/types.h>
|
2005-05-17 21:08:48 +00:00
|
|
|
#include <fcntl.h>
|
2003-03-08 00:09:40 +00:00
|
|
|
|
2004-05-03 19:30:01 +00:00
|
|
|
#include "filesys.h"
|
2005-01-08 06:54:03 +00:00
|
|
|
#include "md5_file.h"
|
2004-09-13 18:05:54 +00:00
|
|
|
#include "error_numbers.h"
|
2005-01-08 06:54:03 +00:00
|
|
|
|
2004-04-08 08:15:23 +00:00
|
|
|
#include "sched_msgs.h"
|
2003-03-08 00:09:40 +00:00
|
|
|
#include "sched_util.h"
|
2009-05-07 13:54:51 +00:00
|
|
|
#include "sched_config.h"
|
2005-10-14 04:41:53 +00:00
|
|
|
#include "util.h"
|
2003-03-08 00:09:40 +00:00
|
|
|
|
2004-04-30 23:18:56 +00:00
|
|
|
#ifdef _USING_FCGI_
|
2008-09-09 19:10:42 +00:00
|
|
|
#include "boinc_fcgi.h"
|
2004-04-30 23:18:56 +00:00
|
|
|
#endif
|
|
|
|
|
2009-05-07 13:54:51 +00:00
|
|
|
const char* STOP_DAEMONS_FILENAME = "stop_daemons";
|
2004-05-03 19:30:01 +00:00
|
|
|
// NOTE: this must be same as in the "start" script
|
2009-05-07 13:54:51 +00:00
|
|
|
const char* STOP_SCHED_FILENAME = "stop_sched";
|
2004-05-03 19:30:01 +00:00
|
|
|
// NOTE: this must be same as in the "start" script
|
2003-12-31 23:09:21 +00:00
|
|
|
const int STOP_SIGNAL = SIGHUP;
|
2004-05-03 19:30:01 +00:00
|
|
|
// NOTE: this must be same as in the "start" script
|
2003-07-09 23:54:45 +00:00
|
|
|
|
2003-07-13 01:10:24 +00:00
|
|
|
void write_pid_file(const char* filename) {
|
2008-09-09 19:10:42 +00:00
|
|
|
#ifndef _USING_FCGI_
|
2003-06-20 01:31:03 +00:00
|
|
|
FILE* fpid = fopen(filename, "w");
|
2008-09-09 19:10:42 +00:00
|
|
|
#else
|
|
|
|
FCGI_FILE* fpid = FCGI::fopen(filename,"w");
|
|
|
|
#endif
|
|
|
|
|
2003-06-20 01:31:03 +00:00
|
|
|
if (!fpid) {
|
2008-08-19 03:00:17 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "Couldn't write pid file\n");
|
2003-06-20 01:31:03 +00:00
|
|
|
return;
|
|
|
|
}
|
2003-08-12 20:58:24 +00:00
|
|
|
fprintf(fpid, "%d\n", (int)getpid());
|
2003-06-20 01:31:03 +00:00
|
|
|
fclose(fpid);
|
|
|
|
}
|
|
|
|
|
2005-01-09 07:48:33 +00:00
|
|
|
// caught_sig_int will be set to true if STOP_SIGNAL (normally SIGHUP)
|
|
|
|
// is caught.
|
2003-12-31 23:09:21 +00:00
|
|
|
bool caught_stop_signal = false;
|
|
|
|
static void stop_signal_handler(int) {
|
|
|
|
fprintf(stderr, "GOT STOP SIGNAL\n");
|
|
|
|
caught_stop_signal = true;
|
2003-06-20 01:31:03 +00:00
|
|
|
}
|
|
|
|
|
2003-12-31 23:09:21 +00:00
|
|
|
void install_stop_signal_handler() {
|
|
|
|
signal(STOP_SIGNAL, stop_signal_handler);
|
|
|
|
// handler is now default again so hitting ^C again will kill the program.
|
2003-06-20 01:31:03 +00:00
|
|
|
}
|
|
|
|
|
2004-05-03 19:30:01 +00:00
|
|
|
void check_stop_daemons() {
|
2003-12-31 23:09:21 +00:00
|
|
|
if (caught_stop_signal) {
|
2008-08-19 03:00:17 +00:00
|
|
|
log_messages.printf(MSG_NORMAL, "Quitting due to SIGHUP\n");
|
2003-06-20 01:31:03 +00:00
|
|
|
exit(0);
|
|
|
|
}
|
2009-05-07 13:54:51 +00:00
|
|
|
const char *stop_file = config.project_path(STOP_DAEMONS_FILENAME);
|
|
|
|
if (boinc_file_exists(stop_file)) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"Quitting because trigger file '%s' is present\n",
|
2009-05-07 13:54:51 +00:00
|
|
|
stop_file
|
2008-02-21 21:00:58 +00:00
|
|
|
);
|
2003-06-20 01:31:03 +00:00
|
|
|
exit(0);
|
|
|
|
}
|
2003-03-08 00:09:40 +00:00
|
|
|
}
|
|
|
|
|
2004-05-03 19:30:01 +00:00
|
|
|
bool check_stop_sched() {
|
2009-05-07 13:54:51 +00:00
|
|
|
return boinc_file_exists(config.project_path(STOP_SCHED_FILENAME));
|
2003-12-10 00:54:17 +00:00
|
|
|
}
|
2004-09-13 18:05:54 +00:00
|
|
|
|
|
|
|
// try to open a file.
|
|
|
|
// On failure:
|
|
|
|
// return ERR_FOPEN if the dir is there but not file
|
|
|
|
// (this is generally a nonrecoverable failure)
|
|
|
|
// return ERR_OPENDIR if dir is not there.
|
|
|
|
// (this is generally a recoverable error,
|
|
|
|
// like NFS mount failure, that may go away later)
|
|
|
|
//
|
2008-09-09 19:10:42 +00:00
|
|
|
#ifndef _USING_FCGI_
|
2005-02-16 23:17:43 +00:00
|
|
|
int try_fopen(const char* path, FILE*& f, const char* mode) {
|
2008-09-09 19:10:42 +00:00
|
|
|
#else
|
|
|
|
int try_fopen(const char* path, FCGI_FILE*& f, const char *mode) {
|
|
|
|
#endif
|
2009-03-04 22:12:16 +00:00
|
|
|
const char* p;
|
2004-09-13 18:05:54 +00:00
|
|
|
DIR* d;
|
|
|
|
char dirpath[256];
|
|
|
|
|
2008-09-09 19:10:42 +00:00
|
|
|
#ifndef _USING_FCGI_
|
2004-09-13 18:05:54 +00:00
|
|
|
f = fopen(path, mode);
|
2008-09-09 19:10:42 +00:00
|
|
|
#else
|
|
|
|
f = FCGI::fopen(path, mode);
|
|
|
|
#endif
|
|
|
|
|
2004-09-13 18:05:54 +00:00
|
|
|
if (!f) {
|
2004-12-20 20:47:25 +00:00
|
|
|
memset(dirpath, '\0', sizeof(dirpath));
|
2004-09-13 18:05:54 +00:00
|
|
|
p = strrchr(path, '/');
|
|
|
|
if (p) {
|
|
|
|
strncpy(dirpath, path, (int)(p-path));
|
|
|
|
} else {
|
|
|
|
strcpy(dirpath, ".");
|
|
|
|
}
|
|
|
|
if ((d = opendir(dirpath)) == NULL) {
|
|
|
|
return ERR_OPENDIR;
|
|
|
|
} else {
|
|
|
|
closedir(d);
|
|
|
|
return ERR_FOPEN;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2004-12-06 22:41:19 +00:00
|
|
|
|
2005-02-16 23:17:43 +00:00
|
|
|
void get_log_path(char* p, const char* filename) {
|
2005-04-01 22:30:08 +00:00
|
|
|
char host[256];
|
2009-05-07 13:54:51 +00:00
|
|
|
const char *dir;
|
|
|
|
|
2005-04-01 22:30:08 +00:00
|
|
|
gethostname(host, 256);
|
|
|
|
char* q = strchr(host, '.');
|
2004-12-06 22:41:19 +00:00
|
|
|
if (q) *q=0;
|
2009-05-07 13:54:51 +00:00
|
|
|
dir = config.project_path("log_%s", host);
|
2005-04-01 22:30:08 +00:00
|
|
|
sprintf(p, "%s/%s", dir, filename);
|
2007-06-26 21:14:58 +00:00
|
|
|
mode_t old_mask = umask(0);
|
2008-04-02 20:04:06 +00:00
|
|
|
mkdir(dir, 01770);
|
2007-06-26 21:14:58 +00:00
|
|
|
// make log_x directory sticky and group-rwx
|
|
|
|
// so that whatever apache puts there will be owned by us
|
|
|
|
umask(old_mask);
|
2004-12-06 22:41:19 +00:00
|
|
|
}
|
2004-12-08 00:40:19 +00:00
|
|
|
|
2005-01-08 06:54:03 +00:00
|
|
|
static void filename_hash(const char* filename, int fanout, char* dir) {
|
2011-01-07 20:23:22 +00:00
|
|
|
std::string s = md5_string((const unsigned char*)filename, strlen(filename));
|
|
|
|
int x = strtol(s.substr(1, 7).c_str(), 0, 16);
|
2005-01-08 06:54:03 +00:00
|
|
|
sprintf(dir, "%x", x % fanout);
|
|
|
|
}
|
|
|
|
|
|
|
|
// given a filename, compute its path in a directory hierarchy
|
|
|
|
// If create is true, create the directory if needed
|
|
|
|
//
|
|
|
|
int dir_hier_path(
|
2005-09-23 21:09:00 +00:00
|
|
|
const char* filename, const char* root, int fanout,
|
2011-01-07 20:23:22 +00:00
|
|
|
char* path, bool create
|
2005-01-08 06:54:03 +00:00
|
|
|
) {
|
|
|
|
char dir[256], dirpath[256];
|
|
|
|
int retval;
|
|
|
|
|
|
|
|
if (fanout==0) {
|
|
|
|
sprintf(path, "%s/%s", root, filename);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2005-09-23 21:09:00 +00:00
|
|
|
filename_hash(filename, fanout, dir);
|
2005-01-08 06:54:03 +00:00
|
|
|
|
|
|
|
sprintf(dirpath, "%s/%s", root, dir);
|
|
|
|
if (create) {
|
|
|
|
retval = boinc_mkdir(dirpath);
|
2006-06-14 20:17:35 +00:00
|
|
|
if (retval && (errno != EEXIST)) {
|
2010-11-08 17:51:57 +00:00
|
|
|
fprintf(stderr, "boinc_mkdir(%s): %s: errno %d\n",
|
|
|
|
dirpath, boincerror(retval), errno
|
|
|
|
);
|
2005-01-08 06:54:03 +00:00
|
|
|
return ERR_MKDIR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
sprintf(path, "%s/%s", dirpath, filename);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-04-18 20:49:58 +00:00
|
|
|
// same, but the output is a URL (used by tools/backend_lib.C)
|
|
|
|
//
|
2005-01-08 06:54:03 +00:00
|
|
|
int dir_hier_url(
|
2005-09-23 21:09:00 +00:00
|
|
|
const char* filename, const char* root, int fanout,
|
2011-01-07 20:23:22 +00:00
|
|
|
char* result
|
2005-01-08 06:54:03 +00:00
|
|
|
) {
|
|
|
|
char dir[256];
|
|
|
|
|
|
|
|
if (fanout==0) {
|
|
|
|
sprintf(result, "%s/%s", root, filename);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2005-09-23 21:09:00 +00:00
|
|
|
filename_hash(filename, fanout, dir);
|
2005-01-08 06:54:03 +00:00
|
|
|
sprintf(result, "%s/%s/%s", root, dir, filename);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2005-02-23 00:11:59 +00:00
|
|
|
void compute_avg_turnaround(HOST& host, double turnaround) {
|
|
|
|
double new_avg;
|
|
|
|
if (host.avg_turnaround == 0) {
|
|
|
|
new_avg = turnaround;
|
|
|
|
} else {
|
|
|
|
new_avg = .7*host.avg_turnaround + .3*turnaround;
|
|
|
|
}
|
|
|
|
host.avg_turnaround = new_avg;
|
|
|
|
}
|
|
|
|
|
2005-06-03 21:39:38 +00:00
|
|
|
// Request lock on the given file with given fd. Returns:
|
|
|
|
// 0 if we get lock
|
|
|
|
// PID (>0) if another process has lock
|
|
|
|
// -1 if error
|
2005-05-17 21:08:48 +00:00
|
|
|
//
|
|
|
|
int mylockf(int fd) {
|
|
|
|
struct flock fl;
|
|
|
|
fl.l_type=F_WRLCK;
|
|
|
|
fl.l_whence=SEEK_SET;
|
|
|
|
fl.l_start=0;
|
|
|
|
fl.l_len=0;
|
|
|
|
if (-1 != fcntl(fd, F_SETLK, &fl)) return 0;
|
|
|
|
|
|
|
|
// if lock failed, find out why
|
|
|
|
errno=0;
|
|
|
|
fcntl(fd, F_GETLK, &fl);
|
|
|
|
if (fl.l_pid>0) return fl.l_pid;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2007-04-18 20:49:58 +00:00
|
|
|
int count_results(char* query, int& n) {
|
|
|
|
DB_RESULT result;
|
|
|
|
int retval = result.count(n, query);
|
|
|
|
if (retval) return retval;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int count_workunits(int& n, const char* query) {
|
|
|
|
DB_WORKUNIT workunit;
|
2007-10-12 19:28:30 +00:00
|
|
|
int retval = workunit.count(n, query);
|
2007-04-18 20:49:58 +00:00
|
|
|
if (retval) return retval;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-06-20 16:27:27 +00:00
|
|
|
int count_unsent_results(int& n, int appid) {
|
2007-04-18 20:49:58 +00:00
|
|
|
char buf[256];
|
2007-06-20 16:27:27 +00:00
|
|
|
if (appid) {
|
|
|
|
sprintf(buf, "where server_state=%d and appid=%d ",
|
|
|
|
RESULT_SERVER_STATE_UNSENT, appid
|
|
|
|
);
|
|
|
|
} else {
|
|
|
|
sprintf(buf, "where server_state=%d", RESULT_SERVER_STATE_UNSENT);
|
|
|
|
}
|
2007-04-18 20:49:58 +00:00
|
|
|
return count_results(buf, n);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
bool is_arg(const char* x, const char* y) {
|
|
|
|
char buf[256];
|
|
|
|
strcpy(buf, "--");
|
|
|
|
strcat(buf, y);
|
|
|
|
if (!strcmp(buf, x)) return true;
|
|
|
|
if (!strcmp(buf+1, x)) return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2008-09-22 17:52:41 +00:00
|
|
|
#ifdef GCL_SIMULATOR
|
|
|
|
|
2011-01-07 20:23:22 +00:00
|
|
|
void simulator_signal_handler(int signum) {
|
2008-09-22 17:52:41 +00:00
|
|
|
FILE *fsim;
|
|
|
|
char currenttime[64];
|
2009-05-07 13:54:51 +00:00
|
|
|
fsim = fopen(config.project_path("simulator/sim_time.txt"),"r");
|
2008-09-22 17:52:41 +00:00
|
|
|
if(fsim){
|
2008-09-30 18:21:41 +00:00
|
|
|
fscanf(fsim,"%s", currenttime);
|
2011-01-07 20:23:22 +00:00
|
|
|
simtime = atof(currenttime);
|
2008-09-22 17:52:41 +00:00
|
|
|
fclose(fsim);
|
|
|
|
}
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2008-09-22 17:52:41 +00:00
|
|
|
"Invoked by the simulator at time %.0f... \n", simtime
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
int itime() {
|
|
|
|
return (int) simtime;
|
|
|
|
}
|
|
|
|
|
|
|
|
void continue_simulation(const char *daemonname){
|
2008-09-30 18:21:41 +00:00
|
|
|
char daemonfilelok[64];
|
|
|
|
char daemonfile[64];
|
2011-01-20 21:32:00 +00:00
|
|
|
sprintf(daemonfile, strcat((char*)config.project_path("simulator/"),"sim_%s.txt"),daemonname);
|
|
|
|
sprintf(daemonfilelok, strcat((char*)config.project_path("simulator/"),"sim_%s.lok"),daemonname);
|
2008-09-30 18:21:41 +00:00
|
|
|
FILE *fsimlok = fopen(daemonfilelok, "w");
|
|
|
|
if (fsimlok){
|
2008-09-22 17:52:41 +00:00
|
|
|
fclose(fsimlok);
|
2008-09-30 18:21:41 +00:00
|
|
|
FILE *fsim = fopen(daemonfile, "w");
|
|
|
|
if (fsim) {
|
2008-09-22 17:52:41 +00:00
|
|
|
fclose(fsim);
|
|
|
|
}
|
|
|
|
}
|
2008-09-30 18:21:41 +00:00
|
|
|
remove(daemonfilelok);
|
2008-09-22 17:52:41 +00:00
|
|
|
}
|
2008-09-30 18:21:41 +00:00
|
|
|
|
2008-09-22 17:52:41 +00:00
|
|
|
#endif
|
|
|
|
|
2005-01-02 18:29:53 +00:00
|
|
|
const char *BOINC_RCSID_affa6ef1e4 = "$Id$";
|