diff --git a/sched/handle_request.C b/sched/handle_request.C index 40d37da165..1b528a009e 100644 --- a/sched/handle_request.C +++ b/sched/handle_request.C @@ -690,6 +690,11 @@ static void scan_work_array( continue; } + // XXX FIXME: result should be re-read from database here and checked + // that server_state is still UNSENT -- if no longer `UNSENT' + // (perhaps because the WU had an unrecoverable error), then skip it. + // Should handle_request or feeder take care of removing it? + result = wu_result.result; retval = add_wu_to_reply(wu, reply, platform, ss); diff --git a/sched/transitioner.C b/sched/transitioner.C index 47ae08a8f2..98ab89d9ea 100644 --- a/sched/transitioner.C +++ b/sched/transitioner.C @@ -94,13 +94,20 @@ void handle_wu(DB_WORKUNIT& wu) { if (result.report_deadline < now) { log_messages.printf( SchedMessages::NORMAL, - "[WU#%d %s] [RESULT#%d %s] result timed out (%d < %d)\n", + "[WU#%d %s] [RESULT#%d %s] result timed out (%d < %d) server_state:IN_PROGRESS=>OVER; outcome:NO_REPLY\n", wu.id, wu.name, result.id, result.name, result.report_deadline, (int)now ); result.server_state = RESULT_SERVER_STATE_OVER; result.outcome = RESULT_OUTCOME_NO_REPLY; - result.update(); + retval = result.update(); + if (retval) { + log_messages.printf( + SchedMessages::CRITICAL, + "[WU#%d %s] [RESULT#%d %s] result.update() == %d\n", + wu.id, wu.name, result.id, result.name, retval + ); + } nover++; } else { ninprogress++; @@ -169,9 +176,21 @@ void handle_wu(DB_WORKUNIT& wu) { for (unsigned int i=0; iOVER; outcome:=>DIDNT_NEED\n", + wu.id, wu.name, result.id, result.name + ); result.server_state = RESULT_SERVER_STATE_OVER; result.outcome = RESULT_OUTCOME_DIDNT_NEED; - result.update(); + retval = result.update(); + if (retval) { + log_messages.printf( + SchedMessages::CRITICAL, + "[WU#%d %s] [RESULT#%d %s] result.update() == %d\n", + wu.id, wu.name, result.id, result.name, retval + ); + } } } if (wu.assimilate_state == ASSIMILATE_INIT) { @@ -232,7 +251,7 @@ void handle_wu(DB_WORKUNIT& wu) { wu.file_delete_state = FILE_DELETE_READY; log_messages.printf( SchedMessages::DEBUG, - "[WU#%d %s] ASSIMILATE_DONE => setting FILE_DELETE_READY\n", + "[WU#%d %s] ASSIMILATE_DONE: file_delete_state:=>READY\n", wu.id, wu.name ); } @@ -258,8 +277,20 @@ void handle_wu(DB_WORKUNIT& wu) { break; } if (do_delete && result.file_delete_state == FILE_DELETE_INIT) { + log_messages.printf( + SchedMessages::NORMAL, + "[WU#%d %s] [RESULT#%d %s] file_delete_state:=>READY\n", + wu.id, wu.name, result.id, result.name + ); result.file_delete_state = FILE_DELETE_READY; - result.update(); + retval = result.update(); + if (retval) { + log_messages.printf( + SchedMessages::CRITICAL, + "[WU#%d %s] [RESULT#%d %s] result.update() == %d\n", + wu.id, wu.name, result.id, result.name, retval + ); + } } } } @@ -278,7 +309,7 @@ void handle_wu(DB_WORKUNIT& wu) { if (retval) { log_messages.printf( SchedMessages::CRITICAL, - "[WU#%d %s] workunit.update() %d\n", wu.id, wu.name, retval + "[WU#%d %s] workunit.update() == %d\n", wu.id, wu.name, retval ); } } diff --git a/tools/update_versions b/tools/update_versions index bd1d5ce558..c73d6710fd 100755 --- a/tools/update_versions +++ b/tools/update_versions @@ -69,7 +69,7 @@ def find_versions(app, dir): platform = platforms[0] if app: - existing_versions = database.AppVersions.find(version_num = version_num) + existing_versions = database.AppVersions.find(app=app, platform=platform, version_num=version_num) if existing_versions: print " Skipping existing %s %3d: %s" %(app.name, version_num, file) continue