API: Fix a bug in boinc_get_opencl_ids() which incorrectly returned CL_INVALID_DEVICE on some systems. This would happen on hosts with multiple GPUs if:

* the host has GPUs from two different vendors with different OpenCL platforms (e.g., NVIDIA and AMD), and
 * the second platform queried has more GPUs than the first one, and
 * the requested GPU is a higher numbed one in the second platform.
This commit is contained in:
Charlie Fenton 2014-09-22 04:57:23 -07:00
parent c2a0421074
commit 328d87be26
3 changed files with 97 additions and 37 deletions

View File

@ -41,6 +41,8 @@
#include "boinc_opencl.h"
static int compareBOINCVersionTo(int toMajor, int toMinor, int toRelease);
// A few complicating factors:
// Windows & Linux have a separate OpenCL platform for each vendor
// (NVIDIA, AMD, Intel).
@ -95,7 +97,7 @@ int boinc_get_opencl_ids_aux(
int device_num_for_type = -1;
int device_index;
if ((!type) || (!strlen(type))) return CL_DEVICE_NOT_FOUND;
if ((!type) || (!strlen(type))) return CL_INVALID_DEVICE_TYPE;
retval = clGetPlatformIDs(MAX_OPENCL_PLATFORMS, platforms, &num_platforms);
if (num_platforms == 0) return CL_DEVICE_NOT_FOUND;
@ -110,25 +112,23 @@ int boinc_get_opencl_ids_aux(
// Use gpu_opencl_dev_index if available
if (opencl_device_index >= 0) {
if (opencl_device_index >= (int)num_devices) {
fprintf(stderr, "Invalid OpenCL GPU index: %d \n", opencl_device_index);
return CL_INVALID_DEVICE;
}
if (opencl_device_index < (int)num_devices) {
device_id = devices[opencl_device_index];
retval = get_vendor(device_id, vendor, sizeof(vendor));
if (retval != CL_SUCCESS) continue;
device_id = devices[opencl_device_index];
retval = get_vendor(device_id, vendor, sizeof(vendor));
if (retval != CL_SUCCESS) continue;
if (!strcmp(vendor, type)) {
*device = device_id;
*platform = platforms[platform_index];
return 0;
if (!strcmp(vendor, type)) {
*device = device_id;
*platform = platforms[platform_index];
return 0;
}
}
continue;
}
// Older versions of init_data.xml don't have gpu_opencl_dev_index field
// Older versions of init_data.xml don't have the gpu_opencl_dev_index
// field so use the value of gpu_device_num.
// NOTE: This may return the wrong device on older versions of BOINC if
// OpenCL does not recognize all GPU models detected by CUDA or CAL
for (device_index=0; device_index<(int)num_devices; ++device_index) {
@ -147,6 +147,9 @@ int boinc_get_opencl_ids_aux(
}
}
fprintf(stderr, "GPU not found: type=%s, opencl_device_index=%d, device_num=%d\n",
type, opencl_device_index, device_num);
return CL_DEVICE_NOT_FOUND;
}
@ -157,14 +160,15 @@ int boinc_get_opencl_ids_aux(
// type: may be PROC_TYPE_NVIDIA_GPU, PROC_TYPE_AMD_GPU or PROC_TYPE_INTEL_GPU
// (it may also be 0, but then it will fail on older clients.)
//
// The argc, argv and type arguments are ignored for 7.0.12 or later clients.
// The argc, argv and type arguments are ignored for 6.13.3 or later clients.
//
// returns
// - 0 if success
// - ERR_FOPEN if init_data.xml missing
// - ERR_XML_PARSE if can't parse init_data.xml
// - CL_INVALID_DEVICE_TYPE if unable to get gpu_type information
// - ERR_NOT_FOUND if unable to get opencl_device_index or gpu device_num
// - CL_INVALID_DEVICE if unable to get opencl_device_index or gpu device_num
// - CL_DEVICE_NOT_FOUND if the requested device was not found
// - an OpenCL error number if OpenCL error
//
int boinc_get_opencl_ids(
@ -203,24 +207,39 @@ int boinc_get_opencl_ids(
}
if (aid.gpu_opencl_dev_index < 0) {
// Older versions of init_data.xml don't have gpu_opencl_dev_index field
//
if (compareBOINCVersionTo(7,0,12) >= 0) {
// gpu_opencl_dev_index was added in BOINC version 7.0.12.
// A gpu_opencl_dev_index value of -1 in version 7.0.12 or later
// means BOINC client did not assign an OpenCL GPU to this task.
fprintf(stderr, "Illegal value for gpu_opencl_dev_index: %d in BOINC Client %d.%d.%d\n",
aid.gpu_opencl_dev_index, aid.major_version, aid.minor_version, aid.release);
return CL_INVALID_DEVICE;
}
// Older versions of init_data.xml don't have the gpu_opencl_dev_index
// field so use the value of gpu_device_num if available.
gpu_device_num = aid.gpu_device_num;
if (gpu_device_num < 0) {
// Even older versions of init_data.xml don't have gpu_device_num field
for (i=0; i<argc-1; i++) {
if ((!strcmp(argv[i], "--device")) || (!strcmp(argv[i], "-device"))) {
gpu_device_num = atoi(argv[i+1]);
break;
if (compareBOINCVersionTo(6,13,3) < 0) {
// gpu_device_num and gpu_type fields were added in BOINC version 6.13.3.
// Very old versions of init_data.xml don't have gpu_device_num field
// but instead pass the device number as a command-line argument.
for (i=0; i<argc-1; i++) {
if ((!strcmp(argv[i], "--device")) || (!strcmp(argv[i], "-device"))) {
gpu_device_num = atoi(argv[i+1]);
break;
}
}
}
if (gpu_device_num < 0) {
// BOINC client apparently did not assign a GPU to this task.
fprintf(stderr, "Illegal value for gpu_device_num: %d in BOINC Client %d.%d.%d\n",
aid.gpu_device_num, aid.major_version, aid.minor_version, aid.release);
return CL_INVALID_DEVICE;
}
}
}
if ((aid.gpu_opencl_dev_index < 0) && (gpu_device_num < 0)) {
fprintf(stderr, "GPU device # not found in %s\n", INIT_DATA_FILE);
return ERR_NOT_FOUND;
}
} // End if (aid.gpu_opencl_dev_index < 0)
retval = boinc_get_opencl_ids_aux(
gpu_type, aid.gpu_opencl_dev_index, gpu_device_num, device, platform
@ -236,7 +255,9 @@ int boinc_get_opencl_ids(
// - 0 if success
// - ERR_FOPEN if init_data.xml missing
// - ERR_XML_PARSE if can't parse init_data.xml
// - ERR_NOT_FOUND if missing <gpu_type> or <gpu_device_num> fields
// - CL_INVALID_DEVICE_TYPE if unable to get gpu_type information
// - CL_INVALID_DEVICE if unable to get opencl_device_index or gpu device_num
// - CL_DEVICE_NOT_FOUND if the requested device was not found
// - an OpenCL error number if OpenCL error
//
int boinc_get_opencl_ids(cl_device_id* device, cl_platform_id* platform) {
@ -249,12 +270,29 @@ int boinc_get_opencl_ids(cl_device_id* device, cl_platform_id* platform) {
if (!strlen(aid.gpu_type)) {
fprintf(stderr, "GPU type not found in %s\n", INIT_DATA_FILE);
return ERR_NOT_FOUND;
return CL_INVALID_DEVICE_TYPE;
}
if ((aid.gpu_opencl_dev_index < 0) && (aid.gpu_device_num < 0)) {
fprintf(stderr, "GPU device # not found in %s\n", INIT_DATA_FILE);
return ERR_NOT_FOUND;
if (aid.gpu_opencl_dev_index < 0) {
if (compareBOINCVersionTo(7,0,12) >= 0) {
// gpu_opencl_dev_index was added in BOINC version 7.0.12.
// A gpu_opencl_dev_index value of -1 in version 7.0.12 or
// later means BOINC did not assign an OpenCL GPU to this task.
fprintf(stderr, "Illegal value for gpu_opencl_dev_index: %d in BOINC Client %d.%d.%d\n",
aid.gpu_opencl_dev_index, aid.major_version, aid.minor_version, aid.release);
return CL_INVALID_DEVICE;
}
if (aid.gpu_device_num < 0) {
if (compareBOINCVersionTo(6,13,3) >= 0) {
// gpu_device_num and gpu_type fields were added in BOINC version 6.13.3.
// A gpu_device_num value of -1 in version 6.13.3 or later means
// BOINC did not assign a GPU to this task.
fprintf(stderr, "Illegal value for gpu_device_num: %d in BOINC Client %d.%d.%d\n",
aid.gpu_device_num, aid.major_version, aid.minor_version, aid.release);
return CL_INVALID_DEVICE;
}
}
}
retval = boinc_get_opencl_ids_aux(
@ -263,3 +301,20 @@ int boinc_get_opencl_ids(cl_device_id* device, cl_platform_id* platform) {
return retval;
}
static int compareBOINCVersionTo(int toMajor, int toMinor, int toRelease) {
APP_INIT_DATA aid;
boinc_get_init_data(aid);
if (aid.major_version < toMajor) return -1;
if (aid.major_version > toMajor) return 1;
if (aid.minor_version < toMinor) return -1;
if (aid.minor_version > toMinor) return 1;
if (aid.release < toRelease) return -1;
if (aid.release > toRelease) return 1;
return 0;
}

View File

@ -281,9 +281,14 @@ void APP_INIT_DATA::clear() {
fraction_done_start = 0;
fraction_done_end = 0;
checkpoint_period = 0;
// gpu_type is an empty string for client versions before 6.13.3 without this
// field or (on newer clients) if BOINC did not assign an OpenCL GPU to task.
strcpy(gpu_type, "");
// gpu_device_num < 0 for client versions before 6.13.3 without this field
// or (on newer clients) if BOINC did not assign an OpenCL GPU to task.
gpu_device_num = -1;
// -1 means an older version without gpu_opencl_dev_index field
// gpu_opencl_dev_index < 0 for client versions before 7.0.12 without this
// field or (on newer clients) if BOINC did not assign any GPU to task.
gpu_opencl_dev_index = -1;
gpu_usage = 0;
ncpus = 0;

View File

@ -63,7 +63,7 @@ struct OPENCL_DEVICE_PROP {
double peak_flops; // temp used in scan process
COPROC_USAGE is_used; // temp used in scan process
double opencl_available_ram; // temp used in scan process
int opencl_device_index; // zero-based device number within this COPROC type
int opencl_device_index; // zero-based device number within this OpenCL platform
void write_xml(MIOFILE&, const char* tag, bool temp_file=false);
int parse(XML_PARSER&, const char* end_tag);