diff --git a/checkin_notes b/checkin_notes index dfb1e6e55d..ade729fd57 100755 --- a/checkin_notes +++ b/checkin_notes @@ -23802,14 +23802,42 @@ Bruce 2 Feb 2005 backend_lib.C Rom 2 Feb 2005 - - Bug Fix: Adjust the manager so that it'll set it's current directory to that - of it's installed location. This will fix the bug of the log files ending + - Bug Fix: Adjust the manager so that it'll set it's current directory + to that of its installed location. + This will fix the bug of the log files ending up where the installer is executed from. - - Bug Fix: Remove the message cache from the message view in the manager. This - should clear up a crashing problem that seems to strike the Win9x platform. + - Bug Fix: Remove the message cache from the message view in the manager. + This should clear up a crashing problem that seems to + strike the Win9x platform. clientgui/ BOINCGUIApp.cpp ViewMessages.cpp, .h +David 2 Feb 2005 + - Scheduler: make things more uniform between locality + and non-locality (work array) scheduling. + Both cases use the following functions: + 1) call wu_is_feasible() to see if the host has enough + disk, memory, and CPU speed to handle a result. + (added this to locality scheduling) + 2) call add_result_to_reply() when you decide to send a result. + This updates database and SCHED_REPLY. + - If using locality scheduling, add_result_to_reply() doesn't + decrement wreq.disk_available, + since several results may involve the same file. + BRUCE: we need to decrement disk_available in + the locality scheduling code. + - add disk space check to wu_is_feasible() + (and removed it from scan_work_array()) + - simplified the args of wu_is_feasible() + - work_needed(): if wreq.disk_available is <= zero, + set the wreq.insufficient_disk flag and return false. + NOTE: this and wu_is_feasible() are now the only places + where we disk space is checked + lib/ + error_numbers.h + sched/ + sched_locality.C + sched_send.C,h diff --git a/doc/links.php b/doc/links.php index 6c711bca50..1eb83be8d9 100644 --- a/doc/links.php +++ b/doc/links.php @@ -31,7 +31,7 @@ echo " "; language("Chinese", array( - site("http://www.equn.com/boinchina", "www.equn.com/boinchina") + site("http://boinc.equn.com/", "boinc.equn.com") )); language("Czech", array( site("http://www.boinc.cz/", "www.boinc.cz") diff --git a/lib/error_numbers.h b/lib/error_numbers.h index 540bfe211f..85688292ab 100755 --- a/lib/error_numbers.h +++ b/lib/error_numbers.h @@ -143,3 +143,4 @@ #define ERR_NO_APP_VERSION -195 #define ERR_WU_USER_RULE -196 #define ERR_ABORTED_VIA_GUI -197 +#define ERR_INSUFFICIENT_RESOURCE -198 diff --git a/sched/sched_locality.C b/sched/sched_locality.C index 68b64fa712..b7fabd7917 100644 --- a/sched/sched_locality.C +++ b/sched/sched_locality.C @@ -79,6 +79,7 @@ static int get_app_version( // Try to send the client this result // This can fail because: +// - result needs more disk/mem/speed than host has // - already sent a result for this WU // - no app_version available // @@ -94,9 +95,14 @@ static int possibly_send_result( APP* app; APP_VERSION* avp; + retval = wu.lookup_id(result.workunitid); + if (retval) return retval; + + if (!wu_is_feasible(wu, sreq, reply)) { + return ERR_INSUFFICIENT_RESOURCE; + } + if (config.one_result_per_user_per_wu) { - retval = wu.lookup_id(result.workunitid); - if (retval) return retval; sprintf(buf, "where userid=%d and workunitid=%d", reply.user.id, wu.id); retval = result2.count(count, buf); if (retval) return retval; diff --git a/sched/sched_send.C b/sched/sched_send.C index 11fca65097..019c423394 100644 --- a/sched/sched_send.C +++ b/sched/sched_send.C @@ -173,36 +173,49 @@ static double estimate_wallclock_duration( ; } -// return false if the WU can't be executed on the host -// because of insufficient memory, CPU speed, or resource share +// return false if the WU can't be executed on the host because either +// 1) the host doesn't have enough memory; +// 2) the host doesn't have enough disk space; +// 3) based on CPU speed, resource share and estimated delay, +// the host probably won't get the result done within the delay bound // -static bool wu_is_feasible( - WORKUNIT& wu, HOST& host, WORK_REQ& wreq, - double resource_share_fraction, double estimated_delay +// NOTE: This is a "fast" check; no DB access allowed. +// In particular it doesn't enforce the one-result-per-user-per-wu rule +// +bool wu_is_feasible( + WORKUNIT& wu, SCHEDULER_REQUEST& request, SCHEDULER_REPLY& reply ) { - double m_nbytes = host.m_nbytes; + double m_nbytes = reply.host.m_nbytes; if (m_nbytes < MIN_POSSIBLE_RAM) m_nbytes = MIN_POSSIBLE_RAM; + if (wu.rsc_disk_bound > reply.wreq.disk_available) { + reply.wreq.insufficient_disk = true; + return false; + } + if (wu.rsc_memory_bound > m_nbytes) { log_messages.printf( SCHED_MSG_LOG::DEBUG, "[WU#%d %s] needs %f mem; [HOST#%d] has %f\n", - wu.id, wu.name, wu.rsc_memory_bound, host.id, m_nbytes + wu.id, wu.name, wu.rsc_memory_bound, reply.host.id, m_nbytes ); - wreq.insufficient_mem = true; + reply.wreq.insufficient_mem = true; return false; } if (config.enforce_delay_bound) { - double wu_wallclock_time = - estimate_wallclock_duration(wu, host, resource_share_fraction); - double host_remaining_time = estimated_delay; + double wu_wallclock_time = estimate_wallclock_duration( + wu, reply.host, request.resource_share_fraction + ); + double host_remaining_time = request.estimated_delay; if (host_remaining_time + wu_wallclock_time > wu.delay_bound) { log_messages.printf( - SCHED_MSG_LOG::DEBUG, "[WU#%d %s] needs requires %d seconds on [HOST#%d]; delay_bound is %d\n", - wu.id, wu.name, (int)wu_wallclock_time, host.id, wu.delay_bound + SCHED_MSG_LOG::DEBUG, + "[WU#%d %s] needs %d seconds on [HOST#%d]; delay_bound is %d\n", + wu.id, wu.name, (int)wu_wallclock_time, reply.host.id, + wu.delay_bound ); - wreq.insufficient_speed = true; + reply.wreq.insufficient_speed = true; return false; } } @@ -687,7 +700,10 @@ void unlock_sema() { bool SCHEDULER_REPLY::work_needed() { if (wreq.seconds_to_fill <= 0) return false; - if (wreq.disk_available <= 0) return false; + if (wreq.disk_available <= 0) { + wreq.insufficient_disk = true; + return false; + } if (wreq.nresults >= config.max_wus_to_send) return false; if (config.daily_result_quota) { if (host.nresults_today >= config.daily_result_quota) { @@ -708,7 +724,12 @@ int add_result_to_reply( retval = add_wu_to_reply(wu, reply, platform, app, avp); if (retval) return retval; - reply.wreq.disk_available -= wu.rsc_disk_bound; + // If using locality scheduling, there are probably many + // result that use same file, so don't decrement available space + // + if (!config.locality_scheduling) { + reply.wreq.disk_available -= wu.rsc_disk_bound; + } // update the result in DB // @@ -773,8 +794,6 @@ static void scan_work_array( APP_VERSION* avp; bool found; - if (reply.wreq.disk_available < 0) reply.wreq.insufficient_disk = true; - lock_sema(); rnd_off = rand() % ss.nwu_results; @@ -796,12 +815,6 @@ static void scan_work_array( continue; } - if (wu_result.workunit.rsc_disk_bound > reply.wreq.disk_available) { - reply.wreq.insufficient_disk = true; - wu_result.infeasible_count++; - continue; - } - // don't send if we're already sending a result for same WU // if (config.one_result_per_user_per_wu) { @@ -813,10 +826,7 @@ static void scan_work_array( // don't send if host can't handle it // wu = wu_result.workunit; - if (!wu_is_feasible( - wu, reply.host, reply.wreq, sreq.resource_share_fraction, - sreq.estimated_delay - )) { + if (!wu_is_feasible(wu, sreq, reply)) { log_messages.printf( SCHED_MSG_LOG::DEBUG, "[HOST#%d] [WU#%d %s] WU is infeasible\n", reply.host.id, wu.id, wu.name @@ -986,9 +996,6 @@ int send_work( if (config.locality_scheduling) { reply.wreq.infeasible_only = false; send_work_locality(sreq, reply, platform, ss); - if (reply.wreq.disk_available < 0) { - reply.wreq.insufficient_disk = true; - } } else { // give priority to results that were infeasible for some other host // diff --git a/sched/sched_send.h b/sched/sched_send.h index e485202a0b..6642b99f3d 100644 --- a/sched/sched_send.h +++ b/sched/sched_send.h @@ -34,3 +34,5 @@ extern bool find_app_version( ); extern bool app_core_compatible(WORK_REQ& wreq, APP_VERSION& av); + +extern bool wu_is_feasible(WORKUNIT&, SCHEDULER_REQUEST&, SCHEDULER_REPLY&);