diff --git a/checkin_notes b/checkin_notes index 8495c96eb9..b43c59274d 100755 --- a/checkin_notes +++ b/checkin_notes @@ -9153,3 +9153,13 @@ David 4 Oct 2007 prefs.C sched/ file_upload_handler.C + +David 4 Oct 2007 + - scheduler: bug fix in HR code: + in wu_is_infeasible(), check whether host type is unknown + before seeing if WU is already committed to different type + + sched/ + sched_array.C + sched_locality.C + sched_send.C,h diff --git a/sched/sched_array.C b/sched/sched_array.C index 75add705bf..8c3c33dfe9 100644 --- a/sched/sched_array.C +++ b/sched/sched_array.C @@ -113,7 +113,7 @@ void scan_work_array( // don't send if host can't handle it // wu = wu_result.workunit; - retval = wu_is_infeasible(wu, sreq, reply, app); + retval = wu_is_infeasible(wu, sreq, reply, *app); if (retval) { log_messages.printf( SCHED_MSG_LOG::MSG_DEBUG, "[HOST#%d] [WU#%d %s] WU is infeasible: %d\n", @@ -224,7 +224,7 @@ void scan_work_array( // This ensures that jobs already assigned to a platform // are processed first. // - wu_result.infeasible_count++; + wu_result.infeasible_count++; goto dont_send; } } diff --git a/sched/sched_locality.C b/sched/sched_locality.C index 82bab758b6..e0f596f16a 100644 --- a/sched/sched_locality.C +++ b/sched/sched_locality.C @@ -306,7 +306,7 @@ static int possibly_send_result( // INFEASIBLE_MEM, INFEASIBLE_DISK, INFEASIBLE_CPU. // see sched_send.h. // - if (wu_is_infeasible(wu, sreq, reply, app)) { + if (wu_is_infeasible(wu, sreq, reply, *app)) { return ERR_INSUFFICIENT_RESOURCE; } diff --git a/sched/sched_send.C b/sched/sched_send.C index 7970bdd922..ef3de01628 100644 --- a/sched/sched_send.C +++ b/sched/sched_send.C @@ -422,19 +422,26 @@ static inline int check_deadline( // Should move a few other checks from sched_array.C // int wu_is_infeasible( - WORKUNIT& wu, SCHEDULER_REQUEST& request, SCHEDULER_REPLY& reply, - APP* app + WORKUNIT& wu, SCHEDULER_REQUEST& request, SCHEDULER_REPLY& reply, APP& app ) { int retval; // homogeneous redundancy, quick check // - if (config.homogeneous_redundancy || app->homogeneous_redundancy) { - if (already_sent_to_different_platform_quick(request, wu, *app)) { + if (app_hr_type(app)) { + if (hr_unknown_platform_type(reply.host, app_hr_type(app))) { + log_messages.printf( + SCHED_MSG_LOG::MSG_DEBUG, + "[HOST#%d] [WU#%d %s] host is of unknown class in HR type %d\n", + reply.host.id, wu.id, app_hr_type(app) + ); + return INFEASIBLE_HR; + } + if (already_sent_to_different_platform_quick(request, wu, app)) { log_messages.printf( SCHED_MSG_LOG::MSG_DEBUG, "[HOST#%d] [WU#%d %s] failed quick HR check: WU is class %d, host is class %d\n", - reply.host.id, wu.id, wu.name, wu.hr_class, hr_class(request.host, app_hr_type(*app)) + reply.host.id, wu.id, wu.name, wu.hr_class, hr_class(request.host, app_hr_type(app)) ); return INFEASIBLE_HR; } diff --git a/sched/sched_send.h b/sched/sched_send.h index 2e5ffa726b..8ebf82a274 100644 --- a/sched/sched_send.h +++ b/sched/sched_send.h @@ -53,7 +53,7 @@ extern bool app_core_compatible(WORK_REQ& wreq, APP_VERSION& av); #define INFEASIBLE_HR 8 extern int wu_is_infeasible( - WORKUNIT&, SCHEDULER_REQUEST&, SCHEDULER_REPLY&, APP* + WORKUNIT&, SCHEDULER_REQUEST&, SCHEDULER_REPLY&, APP& ); extern double max_allowable_disk(SCHEDULER_REQUEST&, SCHEDULER_REPLY&);