- client: on startup, if a coproc needed by a job is missing,

set a "coproc_missing" flag rather than aborting the job.
        If use removes a GPU board while there's a large queue of GPU jobs,
        they'll stay queued (until their deadline passes).

        Note: this doesn't fix the situation where user connects via
        Remote Desktop while GPU jobs are running or queued.
        We should check for Remote Desktop every minute or so, and stop GPU jobs.

svn path=/trunk/boinc/; revision=19287
This commit is contained in:
David Anderson 2009-10-12 16:28:17 +00:00
parent 11911cd6b4
commit 5e862ac495
9 changed files with 39 additions and 15 deletions

View File

@ -8499,7 +8499,7 @@ Rom 7 Oct 2009
ViewTransfersGrid.cpp, .h (deleted)
ViewWorkGrid.cpp, .h (deleted)
Bernd 7 Oct 2009
Bernd 7 Oct 2009
- lib: some changes to help building the WIN32 library with MinGW/gcc
enable by defining MINGW_WIN32 macro, nothing should change if
this is not set
@ -8524,3 +8524,24 @@ Charlie 7 Oct 2009
clientgui/
ViewWork.cpp
David 12 Oct 2009
- client: on startup, if a coproc needed by a job is missing,
set a "coproc_missing" flag rather than aborting the job.
If use removes a GPU board while there's a large queue of GPU jobs,
they'll stay queued (until their deadline passes).
Note: this doesn't fix the situation where user connects via
Remote Desktop while GPU jobs are running or queued.
We should check for Remote Desktop every minute or so, and stop GPU jobs.
client/
client_state.cpp
client_types.cpp,h
cs_statefile.cpp
work_fetch.cpp
clientgui/
ViewWork.cpp
lib/
gui_rpc_client.h
gui_rpc_client_ops.cpp

View File

@ -277,8 +277,8 @@ int CLIENT_STATE::init() {
fake_cuda(coprocs, 2);
msg_printf(NULL, MSG_INFO, "Faking an NVIDIA GPU");
#endif
#if 0
fake_ati(coprocs, 1);
#if 1
fake_ati(coprocs, 2);
msg_printf(NULL, MSG_INFO, "Faking an ATI GPU");
#endif
coproc_cuda = (COPROC_CUDA*)coprocs.lookup("CUDA");

View File

@ -1512,6 +1512,7 @@ void RESULT::clear() {
strcpy(platform, "");
strcpy(plan_class, "");
strcpy(resources, "");
coproc_missing = false;
}
// parse a <result> element from scheduling server.
@ -1742,6 +1743,7 @@ int RESULT::write_gui(MIOFILE& out) {
if (suspended_via_gui) out.printf(" <suspended_via_gui/>\n");
if (project->suspended_via_gui) out.printf(" <project_suspended_via_gui/>\n");
if (edf_scheduled) out.printf(" <edf_scheduled/>\n");
if (coproc_missing) out.printf(" <coproc_missing/>\n");
ACTIVE_TASK* atp = gstate.active_tasks.lookup_result(this);
if (atp) {
atp->write_gui(out);

View File

@ -40,10 +40,6 @@
#include "rr_sim.h"
#include "work_fetch.h"
#define P_LOW 1
#define P_MEDIUM 3
#define P_HIGH 5
#define MAX_FILE_INFO_LEN 4096
#define MAX_SIGNATURE_LEN 4096
#define MAX_KEY_LEN 4096
@ -531,6 +527,9 @@ struct RESULT {
/// - <stderr_txt>X</stderr_txt>, where X is the app's stderr output
std::string stderr_out;
bool suspended_via_gui;
bool coproc_missing;
// a coproc needed by this job is missing
// (e.g. because user removed their GPU board).
APP* app;
/// this may be NULL after result is finished

View File

@ -354,9 +354,9 @@ int CLIENT_STATE::parse_state_file() {
}
if (rp->avp->missing_coproc()) {
msg_printf(project, MSG_INFO,
"Missing coprocessor for task %s; aborting", rp->name
"Missing coprocessor for task %s", rp->name
);
rp->abort_inactive(ERR_MISSING_COPROC);
rp->coproc_missing = true;
}
rp->wup->version_num = rp->version_num;
results.push_back(rp);

View File

@ -1017,6 +1017,7 @@ bool RESULT::runnable() {
if (suspended_via_gui) return false;
if (project->suspended_via_gui) return false;
if (state() != RESULT_FILES_DOWNLOADED) return false;
if (coproc_missing) return false;
return true;
}

View File

@ -1102,13 +1102,11 @@ void CViewWork::GetDocStatus(wxInt32 item, wxString& strBuffer) const {
if (retval || !result) {
return;
}
#if 0
if (result->active_task) {
char buf[256];
sprintf(buf, "<%d> ", result->slot);
strBuffer += wxString(buf, wxConvUTF8);
if (result->coproc_missing) {
strBuffer += _("GPU missing, ");
}
#endif
int throttled = status.task_suspend_reason & SUSPEND_REASON_CPU_USAGE_LIMIT;
switch(result->state) {
case RESULT_NEW:

View File

@ -235,6 +235,7 @@ public:
std::string stderr_out;
bool suspended_via_gui;
bool project_suspended_via_gui;
bool coproc_missing;
// the following defined if active
bool active_task;

View File

@ -439,6 +439,7 @@ int RESULT::parse(MIOFILE& in) {
if (parse_bool(buf, "got_server_ack", got_server_ack)) continue;
if (parse_bool(buf, "suspended_via_gui", suspended_via_gui)) continue;
if (parse_bool(buf, "project_suspended_via_gui", project_suspended_via_gui)) continue;
if (parse_bool(buf, "coproc_missing", coproc_missing)) continue;
if (match_tag(buf, "<active_task>")) {
active_task = true;
continue;
@ -496,6 +497,7 @@ void RESULT::clear() {
stderr_out.clear();
suspended_via_gui = false;
project_suspended_via_gui = false;
coproc_missing = false;
active_task = false;
active_task_state = 0;