From 5d76e13277e27b48779d1e7332a5d74026a5e75b Mon Sep 17 00:00:00 2001 From: David Anderson Date: Wed, 26 Oct 2011 16:51:10 +0000 Subject: [PATCH] - scheduler: tweaks to last night's checkin. In the inner loop of scan_work_array() there are two WORKUNITs: - the one that's part of wu_result (in the shared-mem array) - a temp copy. quick_check() may modify this in host-specific ways (e.g., adjusting rsc_fpops_est or delay_bound). This is the one we pass to add_result_to_reply(). When we reread hr_class and app_version_id from the DB, update both structs. svn path=/trunk/boinc/; revision=24493 --- checkin_notes | 15 +++++++++++ sched/sched_array.cpp | 58 +++++++++++++++++++++++++++---------------- sched/sched_send.cpp | 4 ++- 3 files changed, 54 insertions(+), 23 deletions(-) diff --git a/checkin_notes b/checkin_notes index 7fbe144b25..1516651a43 100644 --- a/checkin_notes +++ b/checkin_notes @@ -7848,3 +7848,18 @@ Charlie 26 Oct 2011 postInstall PostInstall.cpp release_boinc.sh + +David 26 Oct 2011 + - scheduler: tweaks to last night's checkin. + In the inner loop of scan_work_array() there are two WORKUNITs: + - the one that's part of wu_result (in the shared-mem array) + - a temp copy. + quick_check() may modify this in host-specific ways + (e.g., adjusting rsc_fpops_est or delay_bound). + This is the one we pass to add_result_to_reply(). + When we reread hr_class and app_version_id from the DB, + update both structs. + + sched/ + sched_array.cpp + sched_send.cpp diff --git a/sched/sched_array.cpp b/sched/sched_array.cpp index f30ef237e1..8cf97dadd1 100644 --- a/sched/sched_array.cpp +++ b/sched/sched_array.cpp @@ -42,7 +42,10 @@ // if any check fails, return false // static bool quick_check( - WU_RESULT& wu_result, WORKUNIT& wu, BEST_APP_VERSION* &bavp, + WU_RESULT& wu_result, + WORKUNIT& wu, // a mutable copy of wu_result.workunit. + // We may modify its delay_bound and rsc_fpops_est + BEST_APP_VERSION* &bavp, APP* &app, int& last_retval ) { int retval; @@ -128,7 +131,8 @@ static bool quick_check( } } - // don't send job if host can't handle it + // Check whether we can send this job. + // This may modify wu.delay_bound and wu.rsc_fpops_est // retval = wu_is_infeasible_fast( wu, @@ -152,22 +156,25 @@ static bool quick_check( return true; } -// do slow checks (ones that require DB access) -// return true if OK to send +// Do checks that require DB access for whether we can send this job, +// and return true if OK to send // static bool slow_check( - WU_RESULT& wu_result, WORKUNIT& wu, APP* app, BEST_APP_VERSION* bavp + WU_RESULT& wu_result, // the job cache entry. + // We may refresh its hr_class and app_version_id fields. + APP* app, + BEST_APP_VERSION* bavp // the app version to be used ) { int n, retval; DB_RESULT result; char buf[256]; + WORKUNIT& wu = wu_result.workunit; // Don't send if we've already sent a result of this WU to this user. // if (config.one_result_per_user_per_wu) { sprintf(buf, - "where workunitid=%d and userid=%d", - wu_result.workunit.id, g_reply->user.id + "where workunitid=%d and userid=%d", wu.id, g_reply->user.id ); retval = result.count(n, buf); if (retval) { @@ -180,21 +187,18 @@ static bool slow_check( if (config.debug_send) { log_messages.printf(MSG_NORMAL, "[send] [USER#%d] already has %d result(s) for [WU#%d]\n", - g_reply->user.id, n, wu_result.workunit.id + g_reply->user.id, n, wu.id ); } return false; } } } else if (config.one_result_per_host_per_wu) { - // Don't send if we've already sent a result - // of this WU to this host. - // We only have to check this - // if we don't send one result per user. + // Don't send if we've already sent a result of this WU to this host. + // We only have to check this if we don't send one result per user. // sprintf(buf, - "where workunitid=%d and hostid=%d", - wu_result.workunit.id, g_reply->host.id + "where workunitid=%d and hostid=%d", wu.id, g_reply->host.id ); retval = result.count(n, buf); if (retval) { @@ -207,7 +211,7 @@ static bool slow_check( if (config.debug_send) { log_messages.printf(MSG_NORMAL, "[send] [HOST#%d] already has %d result(s) for [WU#%d]\n", - g_reply->host.id, n, wu_result.workunit.id + g_reply->host.id, n, wu.id ); } return false; @@ -220,7 +224,7 @@ static bool slow_check( // if (app_hr_type(*app) || app->homogeneous_app_version) { DB_WORKUNIT db_wu; - db_wu.id = wu_result.workunit.id; + db_wu.id = wu.id; int vals[2]; retval = db_wu.get_field_ints("hr_class, app_version_id", 2, vals); if (retval) { @@ -230,8 +234,8 @@ static bool slow_check( return false; } if (app_hr_type(*app)) { - wu_result.workunit.hr_class = vals[0]; - if (already_sent_to_different_hr_class( wu_result.workunit, *app)) { + wu.hr_class = vals[0]; + if (already_sent_to_different_hr_class(wu, *app)) { if (config.debug_send) { log_messages.printf(MSG_NORMAL, "[send] [HOST#%d] [WU#%d %s] is assigned to different HR class\n", @@ -248,7 +252,7 @@ static bool slow_check( } if (app->homogeneous_app_version) { int wu_avid = vals[1]; - wu_result.workunit.app_version_id = wu_avid; + wu.app_version_id = wu_avid; if (wu_avid && wu_avid != bavp->avp->id) { if (config.debug_send) { log_messages.printf(MSG_NORMAL, @@ -320,9 +324,14 @@ static bool scan_work_array() { i = (j+rnd_off) % ssp->max_wu_results; WU_RESULT& wu_result = ssp->wu_results[i]; + + // make a copy of the WORKUNIT part, + // which we can modify without affecting the cache + // WORKUNIT wu = wu_result.workunit; - // do fast (non-DB) checks + // do fast (non-DB) checks. + // This may modify wu.rsc_fpops_est // if (!quick_check(wu_result, wu, bavp, app, last_retval)) { continue; @@ -339,13 +348,17 @@ static bool scan_work_array() { wu_result.state = g_pid; unlock_sema(); - if (!slow_check(wu_result, wu, app, bavp)) { + if (!slow_check(wu_result, app, bavp)) { // if we couldn't send the result to this host, // set its state back to PRESENT // wu_result.state = WR_STATE_PRESENT; } else { - result.id = wu_result.resultid; + // slow_check() refreshes fields of wu_result.workunit; + // update our copy too + // + wu.hr_class = wu_result.workunit.hr_class; + wu.app_version_id = wu_result.workunit.app_version_id; // mark slot as empty AFTER we've copied out of it // (since otherwise feeder might overwrite it) @@ -356,6 +369,7 @@ static bool scan_work_array() { // TODO: from here to end of add_result_to_reply() // (which updates the DB record) should be a transaction // + result.id = wu_result.resultid; if (result_still_sendable(result, wu)) { retval = add_result_to_reply(result, wu, bavp, false); diff --git a/sched/sched_send.cpp b/sched/sched_send.cpp index 857535164f..b0170f9646 100644 --- a/sched/sched_send.cpp +++ b/sched/sched_send.cpp @@ -1133,7 +1133,9 @@ inline static int get_app_version_id(BEST_APP_VERSION* bavp) { } int add_result_to_reply( - SCHED_DB_RESULT& result, WORKUNIT& wu, BEST_APP_VERSION* bavp, + SCHED_DB_RESULT& result, + WORKUNIT& wu, + BEST_APP_VERSION* bavp, bool locality_scheduling ) { int retval;