mirror of https://github.com/BOINC/boinc.git
- client: better behavior if a GPU goes away:
1) if an APP_VERSION is missing a coprocessor, don't delete it and its files. (If the coprocessor returns, we won't need to re-download) 2) if a RESULT uses an app version that is missing a coprocessor, abort it (rather than deleting it). The client will report the result on the next scheduler RPC, and the server will make a new instance. svn path=/trunk/boinc/; revision=19235
This commit is contained in:
parent
fca2cb8016
commit
833f417ae5
|
@ -8350,3 +8350,20 @@ David 2 Oct 2009
|
|||
lib/
|
||||
Makefile.am
|
||||
coproc.cpp,h
|
||||
|
||||
David 2 Oct 2009
|
||||
- client: better behavior if a GPU goes away:
|
||||
1) if an APP_VERSION is missing a coprocessor,
|
||||
don't delete it and its files.
|
||||
(If the coprocessor returns, we won't need to re-download)
|
||||
2) if a RESULT uses an app version that is missing a coprocessor,
|
||||
abort it (rather than deleting it).
|
||||
The client will report the result on the next scheduler RPC,
|
||||
and the server will make a new instance.
|
||||
|
||||
client/
|
||||
client_types.cpp
|
||||
cs_scheduler.cpp
|
||||
cs_statefile.cpp
|
||||
lib/
|
||||
error_numbers.h
|
||||
|
|
|
@ -1249,15 +1249,9 @@ void APP_VERSION::get_file_errors(string& str) {
|
|||
|
||||
bool APP_VERSION::missing_coproc() {
|
||||
if (ncudas && !coproc_cuda) {
|
||||
msg_printf(project, MSG_INTERNAL_ERROR,
|
||||
"Application uses missing NVIDIA GPU"
|
||||
);
|
||||
return true;
|
||||
}
|
||||
if (natis && !coproc_ati) {
|
||||
msg_printf(project, MSG_INTERNAL_ERROR,
|
||||
"Application uses missing ATI GPU"
|
||||
);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
|
|
@ -731,11 +731,17 @@ int CLIENT_STATE::handle_scheduler_reply(PROJECT* project, char* scheduler_url)
|
|||
}
|
||||
if (avpp.missing_coproc()) {
|
||||
msg_printf(project, MSG_INTERNAL_ERROR,
|
||||
"App version uses non-existent coprocessor; ignoring"
|
||||
"App version uses non-existent %s GPU",
|
||||
avpp.ncudas?"NVIDIA":"ATI"
|
||||
);
|
||||
}
|
||||
APP* app = lookup_app(project, avpp.app_name);
|
||||
if (!app) {
|
||||
msg_printf(project, MSG_INTERNAL_ERROR,
|
||||
"Missing app %s", avpp.app_name
|
||||
);
|
||||
continue;
|
||||
}
|
||||
APP* app = lookup_app(project, avpp.app_name);
|
||||
APP_VERSION* avp = lookup_app_version(
|
||||
app, avpp.platform, avpp.version_num, avpp.plan_class
|
||||
);
|
||||
|
@ -817,18 +823,26 @@ int CLIENT_STATE::handle_scheduler_reply(PROJECT* project, char* scheduler_url)
|
|||
delete rp;
|
||||
continue;
|
||||
}
|
||||
if (rp->avp->missing_coproc()) {
|
||||
msg_printf(project, MSG_INTERNAL_ERROR,
|
||||
"Missing coprocessor for task %s; aborting", rp->name
|
||||
);
|
||||
rp->abort_inactive(ERR_MISSING_COPROC);
|
||||
continue;
|
||||
} else {
|
||||
rp->set_state(RESULT_NEW, "handle_scheduler_reply");
|
||||
if (rp->avp->ncudas) {
|
||||
est_cuda_duration += rp->estimated_duration(false);
|
||||
} else if (rp->avp->natis) {
|
||||
est_ati_duration += rp->estimated_duration(false);
|
||||
} else {
|
||||
est_cpu_duration += rp->estimated_duration(false);
|
||||
}
|
||||
}
|
||||
rp->wup->version_num = rp->version_num;
|
||||
rp->received_time = now;
|
||||
results.push_back(rp);
|
||||
new_results.push_back(rp);
|
||||
rp->set_state(RESULT_NEW, "handle_scheduler_reply");
|
||||
if (rp->avp->ncudas) {
|
||||
est_cuda_duration += rp->estimated_duration(false);
|
||||
} else if (rp->avp->natis) {
|
||||
est_ati_duration += rp->estimated_duration(false);
|
||||
} else {
|
||||
est_cpu_duration += rp->estimated_duration(false);
|
||||
}
|
||||
results.push_back(rp);
|
||||
}
|
||||
if (log_flags.sched_op_debug) {
|
||||
if (sr.results.size()) {
|
||||
|
|
|
@ -269,8 +269,10 @@ int CLIENT_STATE::parse_state_file() {
|
|||
}
|
||||
}
|
||||
if (avp->missing_coproc()) {
|
||||
delete avp;
|
||||
continue;
|
||||
msg_printf(project, MSG_INTERNAL_ERROR,
|
||||
"Application uses missing %s GPU",
|
||||
avp->ncudas?"NVIDIA":"ATI"
|
||||
);
|
||||
}
|
||||
retval = link_app_version(project, avp);
|
||||
if (retval) {
|
||||
|
@ -350,6 +352,12 @@ int CLIENT_STATE::parse_state_file() {
|
|||
delete rp;
|
||||
continue;
|
||||
}
|
||||
if (rp->avp->missing_coproc()) {
|
||||
msg_printf(project, MSG_INTERNAL_ERROR,
|
||||
"Missing coprocessor for task %s; aborting", rp->name
|
||||
);
|
||||
rp->abort_inactive(ERR_MISSING_COPROC);
|
||||
}
|
||||
rp->wup->version_num = rp->version_num;
|
||||
results.push_back(rp);
|
||||
continue;
|
||||
|
@ -800,10 +808,6 @@ int CLIENT_STATE::parse_app_info(PROJECT* p, FILE* in) {
|
|||
if (strlen(avp->platform) == 0) {
|
||||
strcpy(avp->platform, get_primary_platform());
|
||||
}
|
||||
if (avp->missing_coproc()) {
|
||||
delete avp;
|
||||
continue;
|
||||
}
|
||||
if (link_app_version(p, avp)) {
|
||||
delete avp;
|
||||
continue;
|
||||
|
|
|
@ -112,12 +112,12 @@
|
|||
#define ERR_ASYNCSELECT -157
|
||||
#define ERR_BAD_RESULT_STATE -158
|
||||
#define ERR_DB_CANT_INIT -159
|
||||
#define ERR_NOT_UNIQUE -160
|
||||
// state files had redundant entries
|
||||
#define ERR_NOT_FOUND -161
|
||||
// inconsistent client state
|
||||
#define ERR_NO_EXIT_STATUS -162
|
||||
// exit_status not found in scheduler request
|
||||
#define ERR_NOT_UNIQUE -160
|
||||
// state files had redundant entries
|
||||
#define ERR_NOT_FOUND -161
|
||||
// inconsistent client state
|
||||
#define ERR_NO_EXIT_STATUS -162
|
||||
// exit_status not found in scheduler request
|
||||
#define ERR_FILE_MISSING -163
|
||||
#define ERR_SEMGET -165
|
||||
#define ERR_SEMCTL -166
|
||||
|
@ -188,6 +188,7 @@
|
|||
#define ERR_CRYPTO -231
|
||||
#define ERR_ABORTED_ON_EXIT -232
|
||||
#define ERR_UNSTARTED_LATE -233
|
||||
#define ERR_MISSING_COPROC -234
|
||||
|
||||
// PLEASE: add a text description of your error to
|
||||
// the text description function boincerror() in str_util.cpp.
|
||||
|
|
Loading…
Reference in New Issue