From 5e862ac4950b77ce4bd8fe54abfd101267a91f34 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Mon, 12 Oct 2009 16:28:17 +0000 Subject: [PATCH] - client: on startup, if a coproc needed by a job is missing, set a "coproc_missing" flag rather than aborting the job. If use removes a GPU board while there's a large queue of GPU jobs, they'll stay queued (until their deadline passes). Note: this doesn't fix the situation where user connects via Remote Desktop while GPU jobs are running or queued. We should check for Remote Desktop every minute or so, and stop GPU jobs. svn path=/trunk/boinc/; revision=19287 --- checkin_notes | 23 ++++++++++++++++++++++- client/client_state.cpp | 4 ++-- client/client_types.cpp | 2 ++ client/client_types.h | 7 +++---- client/cs_statefile.cpp | 4 ++-- client/work_fetch.cpp | 1 + clientgui/ViewWork.cpp | 10 ++++------ lib/gui_rpc_client.h | 1 + lib/gui_rpc_client_ops.cpp | 2 ++ 9 files changed, 39 insertions(+), 15 deletions(-) diff --git a/checkin_notes b/checkin_notes index 62d93b3e6a..e32c8fd014 100644 --- a/checkin_notes +++ b/checkin_notes @@ -8499,7 +8499,7 @@ Rom 7 Oct 2009 ViewTransfersGrid.cpp, .h (deleted) ViewWorkGrid.cpp, .h (deleted) -Bernd 7 Oct 2009 +Bernd 7 Oct 2009 - lib: some changes to help building the WIN32 library with MinGW/gcc enable by defining MINGW_WIN32 macro, nothing should change if this is not set @@ -8524,3 +8524,24 @@ Charlie 7 Oct 2009 clientgui/ ViewWork.cpp + +David 12 Oct 2009 + - client: on startup, if a coproc needed by a job is missing, + set a "coproc_missing" flag rather than aborting the job. + If use removes a GPU board while there's a large queue of GPU jobs, + they'll stay queued (until their deadline passes). + + Note: this doesn't fix the situation where user connects via + Remote Desktop while GPU jobs are running or queued. + We should check for Remote Desktop every minute or so, and stop GPU jobs. + + client/ + client_state.cpp + client_types.cpp,h + cs_statefile.cpp + work_fetch.cpp + clientgui/ + ViewWork.cpp + lib/ + gui_rpc_client.h + gui_rpc_client_ops.cpp diff --git a/client/client_state.cpp b/client/client_state.cpp index a91c474602..e290af7ec7 100644 --- a/client/client_state.cpp +++ b/client/client_state.cpp @@ -277,8 +277,8 @@ int CLIENT_STATE::init() { fake_cuda(coprocs, 2); msg_printf(NULL, MSG_INFO, "Faking an NVIDIA GPU"); #endif -#if 0 - fake_ati(coprocs, 1); +#if 1 + fake_ati(coprocs, 2); msg_printf(NULL, MSG_INFO, "Faking an ATI GPU"); #endif coproc_cuda = (COPROC_CUDA*)coprocs.lookup("CUDA"); diff --git a/client/client_types.cpp b/client/client_types.cpp index bae44941ac..acc276e02e 100644 --- a/client/client_types.cpp +++ b/client/client_types.cpp @@ -1512,6 +1512,7 @@ void RESULT::clear() { strcpy(platform, ""); strcpy(plan_class, ""); strcpy(resources, ""); + coproc_missing = false; } // parse a element from scheduling server. @@ -1742,6 +1743,7 @@ int RESULT::write_gui(MIOFILE& out) { if (suspended_via_gui) out.printf(" \n"); if (project->suspended_via_gui) out.printf(" \n"); if (edf_scheduled) out.printf(" \n"); + if (coproc_missing) out.printf(" \n"); ACTIVE_TASK* atp = gstate.active_tasks.lookup_result(this); if (atp) { atp->write_gui(out); diff --git a/client/client_types.h b/client/client_types.h index 049ba8e9c4..71d42d098b 100644 --- a/client/client_types.h +++ b/client/client_types.h @@ -40,10 +40,6 @@ #include "rr_sim.h" #include "work_fetch.h" -#define P_LOW 1 -#define P_MEDIUM 3 -#define P_HIGH 5 - #define MAX_FILE_INFO_LEN 4096 #define MAX_SIGNATURE_LEN 4096 #define MAX_KEY_LEN 4096 @@ -531,6 +527,9 @@ struct RESULT { /// - X, where X is the app's stderr output std::string stderr_out; bool suspended_via_gui; + bool coproc_missing; + // a coproc needed by this job is missing + // (e.g. because user removed their GPU board). APP* app; /// this may be NULL after result is finished diff --git a/client/cs_statefile.cpp b/client/cs_statefile.cpp index a222c3b903..b8073e5d5d 100644 --- a/client/cs_statefile.cpp +++ b/client/cs_statefile.cpp @@ -354,9 +354,9 @@ int CLIENT_STATE::parse_state_file() { } if (rp->avp->missing_coproc()) { msg_printf(project, MSG_INFO, - "Missing coprocessor for task %s; aborting", rp->name + "Missing coprocessor for task %s", rp->name ); - rp->abort_inactive(ERR_MISSING_COPROC); + rp->coproc_missing = true; } rp->wup->version_num = rp->version_num; results.push_back(rp); diff --git a/client/work_fetch.cpp b/client/work_fetch.cpp index 0f0856af7a..62250f37cf 100644 --- a/client/work_fetch.cpp +++ b/client/work_fetch.cpp @@ -1017,6 +1017,7 @@ bool RESULT::runnable() { if (suspended_via_gui) return false; if (project->suspended_via_gui) return false; if (state() != RESULT_FILES_DOWNLOADED) return false; + if (coproc_missing) return false; return true; } diff --git a/clientgui/ViewWork.cpp b/clientgui/ViewWork.cpp index a7a78778fd..33153c1f04 100644 --- a/clientgui/ViewWork.cpp +++ b/clientgui/ViewWork.cpp @@ -1102,13 +1102,11 @@ void CViewWork::GetDocStatus(wxInt32 item, wxString& strBuffer) const { if (retval || !result) { return; } -#if 0 - if (result->active_task) { - char buf[256]; - sprintf(buf, "<%d> ", result->slot); - strBuffer += wxString(buf, wxConvUTF8); + + if (result->coproc_missing) { + strBuffer += _("GPU missing, "); } -#endif + int throttled = status.task_suspend_reason & SUSPEND_REASON_CPU_USAGE_LIMIT; switch(result->state) { case RESULT_NEW: diff --git a/lib/gui_rpc_client.h b/lib/gui_rpc_client.h index e677a8198e..067feff746 100644 --- a/lib/gui_rpc_client.h +++ b/lib/gui_rpc_client.h @@ -235,6 +235,7 @@ public: std::string stderr_out; bool suspended_via_gui; bool project_suspended_via_gui; + bool coproc_missing; // the following defined if active bool active_task; diff --git a/lib/gui_rpc_client_ops.cpp b/lib/gui_rpc_client_ops.cpp index e6dcd9f42a..6923e87373 100644 --- a/lib/gui_rpc_client_ops.cpp +++ b/lib/gui_rpc_client_ops.cpp @@ -439,6 +439,7 @@ int RESULT::parse(MIOFILE& in) { if (parse_bool(buf, "got_server_ack", got_server_ack)) continue; if (parse_bool(buf, "suspended_via_gui", suspended_via_gui)) continue; if (parse_bool(buf, "project_suspended_via_gui", project_suspended_via_gui)) continue; + if (parse_bool(buf, "coproc_missing", coproc_missing)) continue; if (match_tag(buf, "")) { active_task = true; continue; @@ -496,6 +497,7 @@ void RESULT::clear() { stderr_out.clear(); suspended_via_gui = false; project_suspended_via_gui = false; + coproc_missing = false; active_task = false; active_task_state = 0;