From 8b6402cf21984e3cbc838c1913a8ed205c33ac6a Mon Sep 17 00:00:00 2001 From: Bruce Allen Date: Mon, 31 Jan 2005 19:34:43 +0000 Subject: [PATCH] More of the same, for reviewing by David svn path=/trunk/boinc/; revision=5251 --- sched/handle_request.C | 102 ++++++++++++++++++++++++++++------------- sched/sched_send.C | 19 ++++++++ 2 files changed, 89 insertions(+), 32 deletions(-) diff --git a/sched/handle_request.C b/sched/handle_request.C index 6a009ab913..f3896cec5d 100644 --- a/sched/handle_request.C +++ b/sched/handle_request.C @@ -887,40 +887,73 @@ leave: } } -void delete_file_from_host(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply) { + + +extern double max_allowable_disk(SCHEDULER_REQUEST& req); +extern double watch_diskspace[3]; + +// returns zero if there is a file we can delete. Return non-zero if +// no file to delete. +// +int delete_file_from_host(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply) { int nfiles = (int)sreq.file_infos.size(); - + char helpful_hint[256]; + if (!nfiles) { log_messages.printf( SCHED_MSG_LOG::CRITICAL, "[HOST#%d]: no disk space but no files we can delete!\n", sreply.host.id ); + + sprintf(helpful_hint, + "\nNo disk space (BOINC needs %.1f MB more)\n", max_allowable_disk(sreq)/1.e6); + strcat(sreply.message, helpful_hint); + + if (watch_diskspace[0] != 0.0) { + strcat(sreply.message, + "Review preferences for maximum disk space used\n"); + } + else if (watch_diskspace[1] != 0.0) { + strcat(sreply.message, + "Review preferences for maximum disk percentage used\n"); + } + else if (watch_diskspace[2] != 0.0) { + strcat(sreply.message, + "Review preferences for minimum disk free space allowed\n"); + } + strcpy(sreply.message_priority, "high"); + sreply.request_delay = 24*3600; + return 1; } - else { - // pick a data file to delete. Do this deterministically - // so that we always tell host to delete the same file. But to prevent - // all hosts from removing 'the same' file, we choose a file which depends - // upon the hostid. - // - // Assumption is that if nothing has changed on the host, the order in - // which it reports files is fixed. If this is false, we need to sort - // files into order by name! - // - int j = sreply.host.id % nfiles; - FILE_INFO& fi = sreq.file_infos[j]; - sreply.file_deletes.push_back(fi); - log_messages.printf( - SCHED_MSG_LOG::DEBUG, - "[HOST#%d]: delete file %s (make space)\n", sreply.host.id, fi.name - ); - // give host an hour to nuke the file and come back. This might - // in general be too soon, since host needs to complete any work - // that depends upon this file, before it will be removed by core client. - // - sreply.request_delay = 3600; - } - return; -} + + // pick a data file to delete. Do this deterministically + // so that we always tell host to delete the same file. But to prevent + // all hosts from removing 'the same' file, we choose a file which depends + // upon the hostid. + // + // Assumption is that if nothing has changed on the host, the order in + // which it reports files is fixed. If this is false, we need to sort + // files into order by name! + // + int j = sreply.host.id % nfiles; + FILE_INFO& fi = sreq.file_infos[j]; + sreply.file_deletes.push_back(fi); + log_messages.printf( + SCHED_MSG_LOG::DEBUG, + "[HOST#%d]: delete file %s (make space)\n", sreply.host.id, fi.name + ); + // give host an hour to nuke the file and come back. This might + // in general be too soon, since host needs to complete any work + // that depends upon this file, before it will be removed by core client. + // + + strcat(sreply.message, "\nRemoving file "); + strcat(sreply.message, fi.name); + strcat(sreply.message, " to free up disk space\n"); + strcpy(sreply.message_priority, "low"); + sreply.request_delay = 4*3600; + return 0; +} void debug_sched(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply, const char *trigger) { char tmpfilename[256]; @@ -970,8 +1003,6 @@ void debug_sched(SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& sreply, const char *t return; } -extern double max_allowable_disk(SCHEDULER_REQUEST& req); - void handle_request( FILE* fin, FILE* fout, SCHED_SHMEM& ss, char* code_sign_key ) { @@ -1001,11 +1032,18 @@ void handle_request( } // if we got no work, and we have no file space, delete some files - if (sreq.results.size()==0 && max_allowable_disk(sreq)<0) { - delete_file_from_host(sreq, sreply); + if (sreply.results.size()==0 && max_allowable_disk(sreq)<0) { + // try to delete a file to make more space. Also give some + // hints to the user about what's going wrong (lack of disk + // space). + delete_file_from_host(sreq, sreply); } - debug_sched(sreq, sreply, "../debug_sched"); +#if 1 + if (sreply.results.size()==0) + debug_sched(sreq, sreply, "../debug_sched"); +#endif + sreply.write(fout); } diff --git a/sched/sched_send.C b/sched/sched_send.C index b9fb931985..d774704ab4 100644 --- a/sched/sched_send.C +++ b/sched/sched_send.C @@ -60,6 +60,14 @@ bool SCHEDULER_REQUEST::has_version(APP& app) { return false; } +// This is an ugly way to keep track of *why* a particular host didn't +// get its work request satisfied. Unfortunately I don't see a clean +// way of doing this without global vars. David, Rom? +// + +// Initialized to zero, since it's static memory. +double watch_diskspace[3]; + // compute the max additional disk usage we can impose on the host // double max_allowable_disk(SCHEDULER_REQUEST& req) { @@ -88,6 +96,15 @@ double max_allowable_disk(SCHEDULER_REQUEST& req) { x3 = host.d_free - prefs.disk_min_free_gb*1e9; // may be negative x = min(x1, min(x2, x3)); + + // keep track of which bound is the most stringent + if (x==x1) + watch_diskspace[0]=x; + else if (x==x2) + watch_diskspace[1]=x; + else + watch_diskspace[2]=x; + if (x < 0) { log_messages.printf( SCHED_MSG_LOG::NORMAL, @@ -747,6 +764,8 @@ int send_work( if (config.locality_scheduling) { wreq.infeasible_only = false; send_work_locality(sreq, reply, platform, wreq, ss); + if (wreq.disk_available < 0) + wreq.insufficient_disk = true; } else { // give priority to results that were infeasible for some other host //