- client: add a new mechanism for assigning coproc instances to tasks,

and passing them the corresponding --device N cmdline args.
    This fixes a bug introduced in 17402 (Feb 26)
    that broke the --device feature,
    presumably causing problems on systems with multiple GPUs.

svn path=/trunk/boinc/; revision=17549
This commit is contained in:
David Anderson 2009-03-06 23:10:45 +00:00
parent 2152c5b19d
commit e1b94a1e53
4 changed files with 84 additions and 14 deletions

View File

@ -2857,3 +2857,16 @@ Rom 6 Mar 2009
clientgui/
AdvancedFrame.cpp
David 6 Mar 2009
- client: add a new mechanism for assigning coproc instances to tasks,
and passing them the corresponding --device N cmdline args.
This fixes a bug introduced in 17402 (Feb 26)
that broke the --device feature,
presumably causing problems on systems with multiple GPUs.
client/
app_start.cpp
client_types.h
lib/
coproc.h

View File

@ -106,20 +106,66 @@ static void debug_print_argv(char** argv) {
}
#endif
// for apps that use CUDA coprocessors, append "--device x" to the command line
// for apps that use coprocessors, reserve the instances,
// and append "--device x" to the command line
//
static void cuda_cmdline(ACTIVE_TASK* atp, char* cmdline) {
#if 0
// TODO: do this another way
char buf[256];
if (!coproc_cuda) return;
for (int i=0; i<MAX_COPROC_INSTANCES; i++) {
if (coproc_cuda->owner[i] == atp) {
sprintf(buf, " --device %d", i);
strcat(cmdline, buf);
static void coproc_cmdline(
COPROC* coproc, ACTIVE_TASK* atp, int ninstances, char* cmdline
) {
unsigned int i;
int j, k;
vector<ACTIVE_TASK*> tasks_using_coproc;
// make a list of the executing tasks (other than this) using this coproc
//
for (i=0; i<gstate.active_tasks.active_tasks.size(); i++) {
ACTIVE_TASK* p = gstate.active_tasks.active_tasks[i];
if (p == atp) continue;
if (p->task_state() != PROCESS_EXECUTING) continue;
if (p->app_version->coprocs.lookup(coproc->type)) {
tasks_using_coproc.push_back(p);
}
}
// scan the coproc's owner array,
// clearing any entries not in the above list
//
for (j=0; j<coproc->count; j++) {
if (coproc->owner[j]) {
bool found = false;
for (k=0; k<tasks_using_coproc.size(); k++) {
if (coproc->owner[j] == tasks_using_coproc[k]) {
found = true;
break;
}
}
if (!found) {
coproc->owner[j] = NULL;
}
}
}
// reserve instances for this job
//
char buf[256];
k = 0;
for (j=0; j<ninstances; j++) {
while (1) {
if (k == coproc->count) {
msg_printf(atp->result->project, MSG_INTERNAL_ERROR,
"Can't find free %s", coproc->type
);
return;
}
if (coproc->owner[k] == NULL) {
sprintf(buf, " --device %d", k);
strcat(cmdline, buf);
coproc->owner[k++] = atp;
break;
}
k++;
}
}
#endif
}
// Make a unique key for core/app shared memory segment.
@ -527,7 +573,9 @@ int ACTIVE_TASK::start(bool first_time) {
sprintf(cmdline, "%s %s %s",
exec_path, wup->command_line.c_str(), app_version->cmdline
);
cuda_cmdline(this, cmdline);
if (coproc_cuda && app_version->ncudas) {
coproc_cmdline(coproc_cuda, this, app_version->ncudas, cmdline);
}
relative_to_absolute(slot_dir, slotdirpath);
bool success = false;
@ -819,7 +867,9 @@ int ACTIVE_TASK::start(bool first_time) {
}
#endif
sprintf(cmdline, "%s %s", wup->command_line.c_str(), app_version->cmdline);
cuda_cmdline(this, cmdline);
if (coproc_cuda && app_version->ncudas) {
coproc_cmdline(coproc_cuda, this, app_version->ncudas, cmdline);
}
sprintf(buf, "../../%s", exec_path );
if (g_use_sandbox) {
char switcher_path[100];

View File

@ -412,7 +412,7 @@ struct APP_VERSION {
char api_version[16];
double avg_ncpus;
double max_ncpus;
double ncudas;
int ncudas;
double flops;
/// additional cmdline args
char cmdline[256];

View File

@ -28,6 +28,8 @@
#include "miofile.h"
#define MAX_COPROC_INSTANCES 64
struct COPROC {
char type[256]; // must be unique
int count; // how many are present
@ -39,6 +41,11 @@ struct COPROC {
int req_instances; // requesting enough jobs to use this many instances
double estimated_delay; // resource will be saturated for this long
// Used in client to keep track of which tasks are using which instances
// The pointers point to ACTIVE_TASK
//
void* owner[MAX_COPROC_INSTANCES];
#ifndef _USING_FCGI_
virtual void write_xml(MIOFILE&);
#endif