*** empty log message ***

svn path=/trunk/boinc/; revision=6182
This commit is contained in:
David Anderson 2005-05-17 21:08:48 +00:00
parent 3a055d9360
commit ece2ec37db
7 changed files with 122 additions and 113 deletions

View File

@ -6699,3 +6699,18 @@ Bruce 17 May 2005
inc/
forum.inc
David 17 May 2005
- (from Bruce Allen) fix bug in scheduler in case where
host has been merged,
and results are reported with old (zombie) host ID.
Results in the DB have already been updated to new hostid;
compare with that ID, not old ID
- move mylockf() to sched_util.C
(eliminate duplicate definition in scheduler/file upload handler)
- move delete_file_from_host() to sched_locality.C
sched/
file_upload_handler.C
handle_request.C
sched_locality.C,h
sched_util.C,h

View File

@ -131,25 +131,6 @@ int return_success(const char* text) {
return 0;
}
// Lock entire file for writing.
// returns zero on success, else PID of process currently holding lock, or -1
// if something else is wrong, for example a bad file descriptor
//
int mylockf(int fd) {
struct flock fl;
fl.l_type=F_WRLCK;
fl.l_whence=SEEK_SET;
fl.l_start=0;
fl.l_len=0;
if (-1 != fcntl(fd, F_SETLK, &fl)) return 0;
// if lock failed, find out why
errno=0;
fcntl(fd, F_GETLK, &fl);
if (fl.l_pid>0) return fl.l_pid;
return -1;
}
#define BLOCK_SIZE 16382
double bytes_left=-1;

View File

@ -38,38 +38,21 @@ using namespace std;
#include "error_numbers.h"
#include "parse.h"
#include "util.h"
#include "filesys.h"
#include "main.h"
#include "server_types.h"
#include "sched_util.h"
#include "main.h"
#include "handle_request.h"
#include "sched_msgs.h"
#include "sched_send.h"
#include "sched_config.h"
#include "../lib/filesys.h"
#include "sched_locality.h"
#ifdef _USING_FCGI_
#include "fcgi_stdio.h"
#endif
// returns zero if we get lock on file with file descriptor fd.
// returns < 0 if error
// returns PID > 0 if another process has lock
//
int mylockf(int fd) {
struct flock fl;
fl.l_type=F_WRLCK;
fl.l_whence=SEEK_SET;
fl.l_start=0;
fl.l_len=0;
if (-1 != fcntl(fd, F_SETLK, &fl)) return 0;
// if lock failed, find out why
errno=0;
fcntl(fd, F_GETLK, &fl);
if (fl.l_pid>0) return fl.l_pid;
return -1;
}
// use advisory locking to establish a lock to run a scheduler
// instance for this host. Return values same as mylockf().
//
@ -119,9 +102,14 @@ int unmunge_email_addr(DB_USER& user) {
return 0;
}
// Look up the host and its user, and make sure the authenticator matches.
// If no host ID is supplied, or if RPC seqno mismatch,
// create a new host record and return its ID
// Based on the info in the request message,
// look up the host and its user, and make sure the authenticator matches.
// Some special cases:
// 1) If no host ID is supplied, or if RPC seqno mismatch,
// create a new host record
// 2) If the host record specified by sreq.hostid is a "zombie"
// (i.e. it was merged with another host via the web site)
// then follow links to find the proper host
//
// POSTCONDITION:
// If this returns zero, then:
@ -456,9 +444,7 @@ int handle_global_prefs(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) {
// New handle completed results
//
int handle_results(
SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply
) {
int handle_results(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply) {
DB_SCHED_RESULT_ITEM_SET result_handler;
SCHED_RESULT_ITEM* srip;
unsigned int i;
@ -548,7 +534,7 @@ int handle_results(
continue;
}
if (srip->hostid != sreq.hostid) {
if (srip->hostid != reply.host.id) {
log_messages.printf(
SCHED_MSG_LOG::CRITICAL,
"[HOST#%d] [RESULT#%d %s] got result from wrong host; expected [HOST#%d]\n",
@ -627,7 +613,7 @@ int handle_results(
srip->validate_state = VALIDATE_STATE_INVALID;
reply.got_bad_result();
}
} // end of loop over all incoming results
} // loop over all incoming results
// update all the results we have kept in memory, by storing to database.
@ -1056,10 +1042,6 @@ void process_request(
goto leave;
}
#if 0
notify_if_newer_core_version(sreq, reply, *platform, ss);
#endif
handle_global_prefs(sreq, reply);
#if 0
@ -1108,67 +1090,9 @@ leave:
}
}
// returns zero if there is a file we can delete.
//
int delete_file_from_host(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply) {
int nfiles = (int)sreq.file_infos.size();
char buf[256];
if (!nfiles) {
log_messages.printf(
SCHED_MSG_LOG::CRITICAL,
"[HOST#%d]: no disk space but no files we can delete!\n", sreply.host.id
);
sprintf(buf,
"No disk space (you must free %.1f MB before BOINC gets space). ",
fabs(max_allowable_disk(sreq, sreply))/1.e6
);
if (sreply.disk_limits.max_used != 0.0) {
strcat(buf, "Review preferences for maximum disk space used.");
} else if (sreply.disk_limits.max_frac != 0.0) {
strcat(buf, "Review preferences for maximum disk percentage used.");
} else if (sreply.disk_limits.min_free != 0.0) {
strcat(buf, "Review preferences for minimum disk free space allowed.");
}
USER_MESSAGE um(buf, "high");
sreply.insert_message(um);
sreply.set_delay(24*3600);
return 1;
}
// pick a data file to delete.
// Do this deterministically so that we always tell host to delete the same file.
// But to prevent all hosts from removing 'the same' file,
// choose a file which depends upon the hostid.
//
// Assumption is that if nothing has changed on the host,
// the order in which it reports files is fixed.
// If this is false, we need to sort files into order by name!
//
int j = sreply.host.id % nfiles;
FILE_INFO& fi = sreq.file_infos[j];
sreply.file_deletes.push_back(fi);
log_messages.printf(
SCHED_MSG_LOG::DEBUG,
"[HOST#%d]: delete file %s (make space)\n", sreply.host.id, fi.name
);
// give host 4 hours to nuke the file and come back.
// This might in general be too soon, since host needs to complete any work
// that depends upon this file, before it will be removed by core client.
//
sprintf(buf, "Removing file %s to free up disk space", fi.name);
USER_MESSAGE um(buf, "low");
sreply.insert_message(um);
sreply.set_delay(4*3600);
return 0;
}
void debug_sched(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply, const char *trigger) {
void debug_sched(
SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply, const char *trigger
) {
char tmpfilename[256];
FILE *fp;
@ -1272,6 +1196,7 @@ void handle_request(
#endif
// if we got no work, and we have no file space, delete some files
//
if (sreply.results.size()==0 && (sreply.wreq.insufficient_disk || sreply.wreq.disk_available<0)) {
// try to delete a file to make more space.
// Also give some hints to the user about what's going wrong

View File

@ -40,6 +40,65 @@
#define VERBOSE_DEBUG
// returns zero if there is a file we can delete.
//
int delete_file_from_host(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply) {
int nfiles = (int)sreq.file_infos.size();
char buf[256];
if (!nfiles) {
log_messages.printf(
SCHED_MSG_LOG::CRITICAL,
"[HOST#%d]: no disk space but no files we can delete!\n", sreply.host.id
);
sprintf(buf,
"No disk space (you must free %.1f MB before BOINC gets space). ",
fabs(max_allowable_disk(sreq, sreply))/1.e6
);
if (sreply.disk_limits.max_used != 0.0) {
strcat(buf, "Review preferences for maximum disk space used.");
} else if (sreply.disk_limits.max_frac != 0.0) {
strcat(buf, "Review preferences for maximum disk percentage used.");
} else if (sreply.disk_limits.min_free != 0.0) {
strcat(buf, "Review preferences for minimum disk free space allowed.");
}
USER_MESSAGE um(buf, "high");
sreply.insert_message(um);
sreply.set_delay(24*3600);
return 1;
}
// pick a data file to delete.
// Do this deterministically so that we always tell host
// to delete the same file.
// But to prevent all hosts from removing 'the same' file,
// choose a file which depends upon the hostid.
//
// Assumption is that if nothing has changed on the host,
// the order in which it reports files is fixed.
// If this is false, we need to sort files into order by name!
//
int j = sreply.host.id % nfiles;
FILE_INFO& fi = sreq.file_infos[j];
sreply.file_deletes.push_back(fi);
log_messages.printf(
SCHED_MSG_LOG::DEBUG,
"[HOST#%d]: delete file %s (make space)\n", sreply.host.id, fi.name
);
// give host 4 hours to nuke the file and come back.
// This might in general be too soon, since host needs to complete any work
// that depends upon this file, before it will be removed by core client.
//
sprintf(buf, "Removing file %s to free up disk space", fi.name);
USER_MESSAGE um(buf, "low");
sreply.insert_message(um);
sreply.set_delay(4*3600);
return 0;
}
// returns true if the host already has the file, or if the file is
// included with a previous result being sent to this host.
//

View File

@ -21,7 +21,10 @@ extern void send_work_locality(
SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply, PLATFORM& platform,
SCHED_SHMEM& ss
);
extern int decrement_disk_space_locality(
WORKUNIT& wu, SCHEDULER_REQUEST& request,
SCHEDULER_REPLY& reply
);
extern int delete_file_from_host(SCHEDULER_REQUEST&, SCHEDULER_REPLY&);

View File

@ -25,6 +25,7 @@ using namespace std;
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include "filesys.h"
#include "md5_file.h"
@ -233,4 +234,23 @@ int elapsed_time() {
return (int)(time(0)-execution_time);
}
// returns zero if we get lock on file with file descriptor fd.
// returns < 0 if error
// returns PID > 0 if another process has lock
//
int mylockf(int fd) {
struct flock fl;
fl.l_type=F_WRLCK;
fl.l_whence=SEEK_SET;
fl.l_start=0;
fl.l_len=0;
if (-1 != fcntl(fd, F_SETLK, &fl)) return 0;
// if lock failed, find out why
errno=0;
fcntl(fd, F_GETLK, &fl);
if (fl.l_pid>0) return fl.l_pid;
return -1;
}
const char *BOINC_RCSID_affa6ef1e4 = "$Id$";

View File

@ -61,5 +61,11 @@ extern void compute_avg_turnaround(HOST& host, double turnaround);
// used to track execution time of cgi scripts
extern int elapsed_time();
// returns zero if we get lock on file with file descriptor fd.
// returns < 0 if error
// returns PID > 0 if another process has lock
//
extern int mylockf(int fd);
#endif