diff --git a/checkin_notes b/checkin_notes index 11773c4e6a..263f48c193 100755 --- a/checkin_notes +++ b/checkin_notes @@ -24387,3 +24387,16 @@ Eric K. 9 Feb 2005 Makefile.am lib/ std_fixes.h + +David 9 Feb 2005 + - locality scheduling: move check for transient infeasibility + to SCHEDULER_REPLY::work_needed() for uniformity + - scheduler: if request has different CPID, accept it + whether or not it's greater than current one + - file deleter: show full path in error messages + + sched/ + handle_request.C + sched_locality.C + sched_send.C + server_types.h diff --git a/sched/file_deleter.C b/sched/file_deleter.C index 2dd157bfeb..6eaead0a79 100644 --- a/sched/file_deleter.C +++ b/sched/file_deleter.C @@ -134,7 +134,8 @@ int result_delete_files(RESULT& result) { ++count_deleted; log_messages.printf(SCHED_MSG_LOG::NORMAL, "[%s] unlinked %s; retval %d %s\n", - result.name, filename, retval, (retval && errno)?strerror(errno):"" + result.name, pathname, retval, + (retval && errno)?strerror(errno):"" ); } } diff --git a/sched/handle_request.C b/sched/handle_request.C index 2c8fd2d805..5f7847aa1a 100644 --- a/sched/handle_request.C +++ b/sched/handle_request.C @@ -198,10 +198,10 @@ make_new_host: reply.email_hash ); - // see if new cross-project ID (i.e., greater than the one we have) + // see if new cross-project ID // if (strlen(sreq.cross_project_id)) { - if (strcmp(sreq.cross_project_id, reply.user.cross_project_id) > 0) { + if (strcmp(sreq.cross_project_id, reply.user.cross_project_id)) { strcpy(reply.user.cross_project_id, sreq.cross_project_id); reply.update_user_record = true; } diff --git a/sched/sched_locality.C b/sched/sched_locality.C index 7b6762398b..d37db8f843 100644 --- a/sched/sched_locality.C +++ b/sched/sched_locality.C @@ -278,15 +278,8 @@ static int send_results_for_file( for (i=0; i<100; i++) { // avoid infinite loop int query_retval; - if (!reply.work_needed()) break; + if (!reply.work_needed(true)) break; - // if we've failed to send a result because of a transient condition, - // leave loop to preserve invariant - // - if (reply.wreq.insufficient_disk || reply.wreq.insufficient_speed) { - break; - } - log_messages.printf(SCHED_MSG_LOG::DEBUG, "in_send_results_for_file(%s, %d) prev_result.id=%d\n", filename, i, prev_result.id ); @@ -519,7 +512,7 @@ static int send_new_file_work_deterministic( // continue deterministic search at lexically first possible // filename, continue to randomly choosen one - if (!getfile_retval && reply.work_needed()) { + if (!getfile_retval && reply.work_needed(true)) { send_new_file_work_deterministic_seeded(sreq, reply, platform, ss, nsent, "", start_filename); if (nsent) { return 0; @@ -562,7 +555,7 @@ static int send_new_file_work( SCHED_SHMEM& ss ) { - while (reply.work_needed()) { + while (reply.work_needed(true)) { int random_time=6*3600+rand()%(6*3600); // send work that's been hanging around the queue for more than 6 @@ -571,14 +564,14 @@ static int send_new_file_work( "send_new_file_work() trying to send results created > %.1f hours ago\n", ((double)random_time)/3600.0); send_old_work(sreq, reply, platform, ss, random_time); - if (reply.work_needed()) { + if (reply.work_needed(true)) { log_messages.printf(SCHED_MSG_LOG::DEBUG, "send_new_file_work() trying to send from working set\n" ); send_new_file_work_working_set(sreq, reply, platform, ss); } - if (reply.work_needed()) { + if (reply.work_needed(true)) { log_messages.printf(SCHED_MSG_LOG::DEBUG, "send_new_file_work() trying deterministic method\n" ); @@ -674,7 +667,7 @@ void send_work_locality( // for (i=0; i<(int)sreq.file_infos.size(); i++) { k = (i+j)%nfiles; - if (!reply.work_needed()) break; + if (!reply.work_needed(true)) break; FILE_INFO& fi = sreq.file_infos[k]; send_results_for_file( fi.name, nsent, sreq, reply, platform, ss, false @@ -693,7 +686,7 @@ void send_work_locality( // send new files if needed // - if (reply.work_needed()) { + if (reply.work_needed(true)) { send_new_file_work(sreq, reply, platform, ss); } } diff --git a/sched/sched_send.C b/sched/sched_send.C index 9127bed855..ab009878b8 100644 --- a/sched/sched_send.C +++ b/sched/sched_send.C @@ -748,7 +748,15 @@ void unlock_sema() { // and we haven't exceeded result per RPC limit, // and we haven't exceeded results per day limit // -bool SCHEDULER_REPLY::work_needed() { +bool SCHEDULER_REPLY::work_needed(bool locality_sched) { + if (locality_sched) { + // if we've failed to send a result because of a transient condition, + // return false to preserve invariant + // + if (wreq.insufficient_disk || wreq.insufficient_speed) { + return false; + } + } if (wreq.seconds_to_fill <= 0) return false; if (wreq.disk_available <= 0) { wreq.insufficient_disk = true; diff --git a/sched/server_types.h b/sched/server_types.h index 375bba0964..08c3bb318d 100644 --- a/sched/server_types.h +++ b/sched/server_types.h @@ -183,7 +183,7 @@ struct SCHEDULER_REPLY { void insert_workunit_unique(WORKUNIT&); void insert_result(RESULT&); void insert_message(USER_MESSAGE&); - bool work_needed(); + bool work_needed(bool locality_sched=false); }; #endif