More of the same, for reviewing by David

svn path=/trunk/boinc/; revision=5251
This commit is contained in:
Bruce Allen 2005-01-31 19:34:43 +00:00
parent 7e65212ed3
commit 8b6402cf21
2 changed files with 89 additions and 32 deletions

View File

@ -887,40 +887,73 @@ leave:
}
}
void delete_file_from_host(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply) {
extern double max_allowable_disk(SCHEDULER_REQUEST& req);
extern double watch_diskspace[3];
// returns zero if there is a file we can delete. Return non-zero if
// no file to delete.
//
int delete_file_from_host(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply) {
int nfiles = (int)sreq.file_infos.size();
char helpful_hint[256];
if (!nfiles) {
log_messages.printf(
SCHED_MSG_LOG::CRITICAL,
"[HOST#%d]: no disk space but no files we can delete!\n", sreply.host.id
);
sprintf(helpful_hint,
"\nNo disk space (BOINC needs %.1f MB more)\n", max_allowable_disk(sreq)/1.e6);
strcat(sreply.message, helpful_hint);
if (watch_diskspace[0] != 0.0) {
strcat(sreply.message,
"Review preferences for maximum disk space used\n");
}
else if (watch_diskspace[1] != 0.0) {
strcat(sreply.message,
"Review preferences for maximum disk percentage used\n");
}
else if (watch_diskspace[2] != 0.0) {
strcat(sreply.message,
"Review preferences for minimum disk free space allowed\n");
}
strcpy(sreply.message_priority, "high");
sreply.request_delay = 24*3600;
return 1;
}
else {
// pick a data file to delete. Do this deterministically
// so that we always tell host to delete the same file. But to prevent
// all hosts from removing 'the same' file, we choose a file which depends
// upon the hostid.
//
// Assumption is that if nothing has changed on the host, the order in
// which it reports files is fixed. If this is false, we need to sort
// files into order by name!
//
int j = sreply.host.id % nfiles;
FILE_INFO& fi = sreq.file_infos[j];
sreply.file_deletes.push_back(fi);
log_messages.printf(
SCHED_MSG_LOG::DEBUG,
"[HOST#%d]: delete file %s (make space)\n", sreply.host.id, fi.name
);
// give host an hour to nuke the file and come back. This might
// in general be too soon, since host needs to complete any work
// that depends upon this file, before it will be removed by core client.
//
sreply.request_delay = 3600;
}
return;
}
// pick a data file to delete. Do this deterministically
// so that we always tell host to delete the same file. But to prevent
// all hosts from removing 'the same' file, we choose a file which depends
// upon the hostid.
//
// Assumption is that if nothing has changed on the host, the order in
// which it reports files is fixed. If this is false, we need to sort
// files into order by name!
//
int j = sreply.host.id % nfiles;
FILE_INFO& fi = sreq.file_infos[j];
sreply.file_deletes.push_back(fi);
log_messages.printf(
SCHED_MSG_LOG::DEBUG,
"[HOST#%d]: delete file %s (make space)\n", sreply.host.id, fi.name
);
// give host an hour to nuke the file and come back. This might
// in general be too soon, since host needs to complete any work
// that depends upon this file, before it will be removed by core client.
//
strcat(sreply.message, "\nRemoving file ");
strcat(sreply.message, fi.name);
strcat(sreply.message, " to free up disk space\n");
strcpy(sreply.message_priority, "low");
sreply.request_delay = 4*3600;
return 0;
}
void debug_sched(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply, const char *trigger) {
char tmpfilename[256];
@ -970,8 +1003,6 @@ void debug_sched(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply, const char *t
return;
}
extern double max_allowable_disk(SCHEDULER_REQUEST& req);
void handle_request(
FILE* fin, FILE* fout, SCHED_SHMEM& ss, char* code_sign_key
) {
@ -1001,11 +1032,18 @@ void handle_request(
}
// if we got no work, and we have no file space, delete some files
if (sreq.results.size()==0 && max_allowable_disk(sreq)<0) {
delete_file_from_host(sreq, sreply);
if (sreply.results.size()==0 && max_allowable_disk(sreq)<0) {
// try to delete a file to make more space. Also give some
// hints to the user about what's going wrong (lack of disk
// space).
delete_file_from_host(sreq, sreply);
}
debug_sched(sreq, sreply, "../debug_sched");
#if 1
if (sreply.results.size()==0)
debug_sched(sreq, sreply, "../debug_sched");
#endif
sreply.write(fout);
}

View File

@ -60,6 +60,14 @@ bool SCHEDULER_REQUEST::has_version(APP& app) {
return false;
}
// This is an ugly way to keep track of *why* a particular host didn't
// get its work request satisfied. Unfortunately I don't see a clean
// way of doing this without global vars. David, Rom?
//
// Initialized to zero, since it's static memory.
double watch_diskspace[3];
// compute the max additional disk usage we can impose on the host
//
double max_allowable_disk(SCHEDULER_REQUEST& req) {
@ -88,6 +96,15 @@ double max_allowable_disk(SCHEDULER_REQUEST& req) {
x3 = host.d_free - prefs.disk_min_free_gb*1e9; // may be negative
x = min(x1, min(x2, x3));
// keep track of which bound is the most stringent
if (x==x1)
watch_diskspace[0]=x;
else if (x==x2)
watch_diskspace[1]=x;
else
watch_diskspace[2]=x;
if (x < 0) {
log_messages.printf(
SCHED_MSG_LOG::NORMAL,
@ -747,6 +764,8 @@ int send_work(
if (config.locality_scheduling) {
wreq.infeasible_only = false;
send_work_locality(sreq, reply, platform, wreq, ss);
if (wreq.disk_available < 0)
wreq.insufficient_disk = true;
} else {
// give priority to results that were infeasible for some other host
//