mirror of https://github.com/BOINC/boinc.git
client: apparently CUDA does not always report GPUs in order of ascending PCI slot number, but Apple's OpenCL does. We must take this into account win correlating NVIDIA OpenCL GPUs with CUDA GPUs.
Since NVIDIA provides drivers for both OpenCL and CUDA on Windows and Linux, I am assuming (until shown otherwise) that they will report GPUs in the same order.
This commit is contained in:
parent
92ed94f154
commit
a6e276b93e
|
@ -113,7 +113,7 @@ static bool is_intel(char* vendor) {
|
|||
|
||||
// If "loose", tolerate small diff
|
||||
//
|
||||
int opencl_compare(OPENCL_DEVICE_PROP& c1, OPENCL_DEVICE_PROP& c2, bool loose) {
|
||||
static int opencl_compare(OPENCL_DEVICE_PROP& c1, OPENCL_DEVICE_PROP& c2, bool loose) {
|
||||
if (c1.opencl_device_version_int > c2.opencl_device_version_int) return 1;
|
||||
if (c1.opencl_device_version_int < c2.opencl_device_version_int) return -1;
|
||||
if (loose) {
|
||||
|
@ -128,6 +128,18 @@ int opencl_compare(OPENCL_DEVICE_PROP& c1, OPENCL_DEVICE_PROP& c2, bool loose) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
#ifdef __APPLE__
|
||||
static bool compare_pci_slots(int NVIDIA_GPU_Index1, int NVIDIA_GPU_Index2) {
|
||||
if (NVIDIA_GPU_Index1 >= (int)nvidia_gpus.size()) return false; // Should never happen
|
||||
if (NVIDIA_GPU_Index2 >= (int)nvidia_gpus.size()) return false; // Should never happen
|
||||
return (
|
||||
nvidia_gpus[NVIDIA_GPU_Index1].pci_info.bus_id <
|
||||
nvidia_gpus[NVIDIA_GPU_Index2].pci_info.bus_id
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
// OpenCL interfaces are documented here:
|
||||
// http://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/ and
|
||||
// http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/
|
||||
|
@ -147,6 +159,8 @@ void COPROCS::get_opencl(
|
|||
int current_CAL_index;
|
||||
int min_CAL_target;
|
||||
int num_CAL_devices = (int)ati_gpus.size();
|
||||
vector<int>devnums_pci_slot_sort;
|
||||
vector<OPENCL_DEVICE_PROP>::iterator it;
|
||||
|
||||
#ifdef _WIN32
|
||||
opencl_lib = LoadLibrary("OpenCL.dll");
|
||||
|
@ -199,6 +213,19 @@ void COPROCS::get_opencl(
|
|||
return;
|
||||
}
|
||||
|
||||
if (nvidia_gpus.size()) {
|
||||
for (int i=0; i<(int)nvidia_gpus.size(); ++i) {
|
||||
devnums_pci_slot_sort.push_back(i);
|
||||
|
||||
}
|
||||
#ifdef __APPLE__
|
||||
std::stable_sort(devnums_pci_slot_sort.begin(),
|
||||
devnums_pci_slot_sort.end(),
|
||||
compare_pci_slots
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
for (platform_index=0; platform_index<num_platforms; ++platform_index) {
|
||||
ciErrNum = (*__clGetPlatformInfo)(
|
||||
platforms[platform_index], CL_PLATFORM_VERSION,
|
||||
|
@ -308,8 +335,12 @@ void COPROCS::get_opencl(
|
|||
if (is_NVIDIA(prop.vendor)) {
|
||||
if (nvidia.have_cuda) {
|
||||
// Mac OpenCL does not recognize all NVIDIA GPUs returned by
|
||||
// CUDA but we assume that OpenCL and CUDA return devices in
|
||||
// the same order and with identical model name strings
|
||||
// CUDA but we assume that OpenCL and CUDA return devices
|
||||
// with identical model name strings and that OpenCL returns
|
||||
// devices in order of acending PCI slot.
|
||||
//
|
||||
// On other systems, assume OpenCL and CUDA return devices
|
||||
// in the same order.
|
||||
//
|
||||
while (1) {
|
||||
if (current_CUDA_index >= (int)(nvidia_gpus.size())) {
|
||||
|
@ -320,7 +351,9 @@ void COPROCS::get_opencl(
|
|||
warnings.push_back(buf);
|
||||
return; // Should never happen
|
||||
}
|
||||
if (!strcmp(prop.name, nvidia_gpus[current_CUDA_index].prop.name)) {
|
||||
if (!strcmp(prop.name,
|
||||
nvidia_gpus[devnums_pci_slot_sort[current_CUDA_index]].prop.name)
|
||||
) {
|
||||
break; // We have a match
|
||||
}
|
||||
// This CUDA GPU is not recognized by OpenCL,
|
||||
|
@ -328,7 +361,7 @@ void COPROCS::get_opencl(
|
|||
//
|
||||
++current_CUDA_index;
|
||||
}
|
||||
prop.device_num = current_CUDA_index;
|
||||
prop.device_num = devnums_pci_slot_sort[current_CUDA_index];
|
||||
} else {
|
||||
prop.device_num = (int)(nvidia_opencls.size());
|
||||
}
|
||||
|
@ -343,14 +376,19 @@ void COPROCS::get_opencl(
|
|||
prop.peak_flops = c.peak_flops;
|
||||
}
|
||||
if (nvidia_gpus.size()) {
|
||||
// Assumes OpenCL and CUDA return the devices
|
||||
// in the same order
|
||||
// Assumes OpenCL device_num and CUDA device_num now match
|
||||
//
|
||||
prop.opencl_available_ram = nvidia_gpus[prop.device_num].available_ram;
|
||||
} else {
|
||||
prop.opencl_available_ram = prop.global_mem_size;
|
||||
}
|
||||
nvidia_opencls.push_back(prop);
|
||||
|
||||
// Build nvidia_opencls vector in device_num order
|
||||
for (it=nvidia_opencls.begin(); it<nvidia_opencls.end(); it++) {
|
||||
if (it->device_num > prop.device_num) break;
|
||||
}
|
||||
nvidia_opencls.insert(it, prop);
|
||||
|
||||
++current_CUDA_index;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue