mirror of https://github.com/BOINC/boinc.git
- client: add a new mechanism for assigning coproc instances to tasks,
and passing them the corresponding --device N cmdline args. This fixes a bug introduced in 17402 (Feb 26) that broke the --device feature, presumably causing problems on systems with multiple GPUs. svn path=/trunk/boinc/; revision=17549
This commit is contained in:
parent
2152c5b19d
commit
e1b94a1e53
|
@ -2857,3 +2857,16 @@ Rom 6 Mar 2009
|
|||
|
||||
clientgui/
|
||||
AdvancedFrame.cpp
|
||||
|
||||
David 6 Mar 2009
|
||||
- client: add a new mechanism for assigning coproc instances to tasks,
|
||||
and passing them the corresponding --device N cmdline args.
|
||||
This fixes a bug introduced in 17402 (Feb 26)
|
||||
that broke the --device feature,
|
||||
presumably causing problems on systems with multiple GPUs.
|
||||
|
||||
client/
|
||||
app_start.cpp
|
||||
client_types.h
|
||||
lib/
|
||||
coproc.h
|
||||
|
|
|
@ -106,20 +106,66 @@ static void debug_print_argv(char** argv) {
|
|||
}
|
||||
#endif
|
||||
|
||||
// for apps that use CUDA coprocessors, append "--device x" to the command line
|
||||
// for apps that use coprocessors, reserve the instances,
|
||||
// and append "--device x" to the command line
|
||||
//
|
||||
static void cuda_cmdline(ACTIVE_TASK* atp, char* cmdline) {
|
||||
#if 0
|
||||
// TODO: do this another way
|
||||
char buf[256];
|
||||
if (!coproc_cuda) return;
|
||||
for (int i=0; i<MAX_COPROC_INSTANCES; i++) {
|
||||
if (coproc_cuda->owner[i] == atp) {
|
||||
sprintf(buf, " --device %d", i);
|
||||
strcat(cmdline, buf);
|
||||
static void coproc_cmdline(
|
||||
COPROC* coproc, ACTIVE_TASK* atp, int ninstances, char* cmdline
|
||||
) {
|
||||
unsigned int i;
|
||||
int j, k;
|
||||
vector<ACTIVE_TASK*> tasks_using_coproc;
|
||||
|
||||
// make a list of the executing tasks (other than this) using this coproc
|
||||
//
|
||||
for (i=0; i<gstate.active_tasks.active_tasks.size(); i++) {
|
||||
ACTIVE_TASK* p = gstate.active_tasks.active_tasks[i];
|
||||
if (p == atp) continue;
|
||||
if (p->task_state() != PROCESS_EXECUTING) continue;
|
||||
if (p->app_version->coprocs.lookup(coproc->type)) {
|
||||
tasks_using_coproc.push_back(p);
|
||||
}
|
||||
}
|
||||
|
||||
// scan the coproc's owner array,
|
||||
// clearing any entries not in the above list
|
||||
//
|
||||
for (j=0; j<coproc->count; j++) {
|
||||
if (coproc->owner[j]) {
|
||||
bool found = false;
|
||||
for (k=0; k<tasks_using_coproc.size(); k++) {
|
||||
if (coproc->owner[j] == tasks_using_coproc[k]) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
coproc->owner[j] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// reserve instances for this job
|
||||
//
|
||||
char buf[256];
|
||||
k = 0;
|
||||
for (j=0; j<ninstances; j++) {
|
||||
while (1) {
|
||||
if (k == coproc->count) {
|
||||
msg_printf(atp->result->project, MSG_INTERNAL_ERROR,
|
||||
"Can't find free %s", coproc->type
|
||||
);
|
||||
return;
|
||||
}
|
||||
if (coproc->owner[k] == NULL) {
|
||||
sprintf(buf, " --device %d", k);
|
||||
strcat(cmdline, buf);
|
||||
coproc->owner[k++] = atp;
|
||||
break;
|
||||
}
|
||||
k++;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Make a unique key for core/app shared memory segment.
|
||||
|
@ -527,7 +573,9 @@ int ACTIVE_TASK::start(bool first_time) {
|
|||
sprintf(cmdline, "%s %s %s",
|
||||
exec_path, wup->command_line.c_str(), app_version->cmdline
|
||||
);
|
||||
cuda_cmdline(this, cmdline);
|
||||
if (coproc_cuda && app_version->ncudas) {
|
||||
coproc_cmdline(coproc_cuda, this, app_version->ncudas, cmdline);
|
||||
}
|
||||
|
||||
relative_to_absolute(slot_dir, slotdirpath);
|
||||
bool success = false;
|
||||
|
@ -819,7 +867,9 @@ int ACTIVE_TASK::start(bool first_time) {
|
|||
}
|
||||
#endif
|
||||
sprintf(cmdline, "%s %s", wup->command_line.c_str(), app_version->cmdline);
|
||||
cuda_cmdline(this, cmdline);
|
||||
if (coproc_cuda && app_version->ncudas) {
|
||||
coproc_cmdline(coproc_cuda, this, app_version->ncudas, cmdline);
|
||||
}
|
||||
sprintf(buf, "../../%s", exec_path );
|
||||
if (g_use_sandbox) {
|
||||
char switcher_path[100];
|
||||
|
|
|
@ -412,7 +412,7 @@ struct APP_VERSION {
|
|||
char api_version[16];
|
||||
double avg_ncpus;
|
||||
double max_ncpus;
|
||||
double ncudas;
|
||||
int ncudas;
|
||||
double flops;
|
||||
/// additional cmdline args
|
||||
char cmdline[256];
|
||||
|
|
|
@ -28,6 +28,8 @@
|
|||
|
||||
#include "miofile.h"
|
||||
|
||||
#define MAX_COPROC_INSTANCES 64
|
||||
|
||||
struct COPROC {
|
||||
char type[256]; // must be unique
|
||||
int count; // how many are present
|
||||
|
@ -39,6 +41,11 @@ struct COPROC {
|
|||
int req_instances; // requesting enough jobs to use this many instances
|
||||
double estimated_delay; // resource will be saturated for this long
|
||||
|
||||
// Used in client to keep track of which tasks are using which instances
|
||||
// The pointers point to ACTIVE_TASK
|
||||
//
|
||||
void* owner[MAX_COPROC_INSTANCES];
|
||||
|
||||
#ifndef _USING_FCGI_
|
||||
virtual void write_xml(MIOFILE&);
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue