client: apparently CUDA does not always report GPUs in order of ascending PCI slot number, but Apple's OpenCL does. We must take this into account win correlating NVIDIA OpenCL GPUs with CUDA GPUs.

Since NVIDIA provides drivers for both OpenCL and CUDA on Windows and Linux, I am assuming (until shown otherwise) that they will report GPUs in the same order.
This commit is contained in:
Charlie Fenton 2013-06-28 02:37:57 -07:00
parent 92ed94f154
commit a6e276b93e
1 changed files with 46 additions and 8 deletions

View File

@ -113,7 +113,7 @@ static bool is_intel(char* vendor) {
// If "loose", tolerate small diff
//
int opencl_compare(OPENCL_DEVICE_PROP& c1, OPENCL_DEVICE_PROP& c2, bool loose) {
static int opencl_compare(OPENCL_DEVICE_PROP& c1, OPENCL_DEVICE_PROP& c2, bool loose) {
if (c1.opencl_device_version_int > c2.opencl_device_version_int) return 1;
if (c1.opencl_device_version_int < c2.opencl_device_version_int) return -1;
if (loose) {
@ -128,6 +128,18 @@ int opencl_compare(OPENCL_DEVICE_PROP& c1, OPENCL_DEVICE_PROP& c2, bool loose) {
return 0;
}
#ifdef __APPLE__
static bool compare_pci_slots(int NVIDIA_GPU_Index1, int NVIDIA_GPU_Index2) {
if (NVIDIA_GPU_Index1 >= (int)nvidia_gpus.size()) return false; // Should never happen
if (NVIDIA_GPU_Index2 >= (int)nvidia_gpus.size()) return false; // Should never happen
return (
nvidia_gpus[NVIDIA_GPU_Index1].pci_info.bus_id <
nvidia_gpus[NVIDIA_GPU_Index2].pci_info.bus_id
);
}
#endif
// OpenCL interfaces are documented here:
// http://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/ and
// http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/
@ -147,6 +159,8 @@ void COPROCS::get_opencl(
int current_CAL_index;
int min_CAL_target;
int num_CAL_devices = (int)ati_gpus.size();
vector<int>devnums_pci_slot_sort;
vector<OPENCL_DEVICE_PROP>::iterator it;
#ifdef _WIN32
opencl_lib = LoadLibrary("OpenCL.dll");
@ -199,6 +213,19 @@ void COPROCS::get_opencl(
return;
}
if (nvidia_gpus.size()) {
for (int i=0; i<(int)nvidia_gpus.size(); ++i) {
devnums_pci_slot_sort.push_back(i);
}
#ifdef __APPLE__
std::stable_sort(devnums_pci_slot_sort.begin(),
devnums_pci_slot_sort.end(),
compare_pci_slots
);
#endif
}
for (platform_index=0; platform_index<num_platforms; ++platform_index) {
ciErrNum = (*__clGetPlatformInfo)(
platforms[platform_index], CL_PLATFORM_VERSION,
@ -308,8 +335,12 @@ void COPROCS::get_opencl(
if (is_NVIDIA(prop.vendor)) {
if (nvidia.have_cuda) {
// Mac OpenCL does not recognize all NVIDIA GPUs returned by
// CUDA but we assume that OpenCL and CUDA return devices in
// the same order and with identical model name strings
// CUDA but we assume that OpenCL and CUDA return devices
// with identical model name strings and that OpenCL returns
// devices in order of acending PCI slot.
//
// On other systems, assume OpenCL and CUDA return devices
// in the same order.
//
while (1) {
if (current_CUDA_index >= (int)(nvidia_gpus.size())) {
@ -320,7 +351,9 @@ void COPROCS::get_opencl(
warnings.push_back(buf);
return; // Should never happen
}
if (!strcmp(prop.name, nvidia_gpus[current_CUDA_index].prop.name)) {
if (!strcmp(prop.name,
nvidia_gpus[devnums_pci_slot_sort[current_CUDA_index]].prop.name)
) {
break; // We have a match
}
// This CUDA GPU is not recognized by OpenCL,
@ -328,7 +361,7 @@ void COPROCS::get_opencl(
//
++current_CUDA_index;
}
prop.device_num = current_CUDA_index;
prop.device_num = devnums_pci_slot_sort[current_CUDA_index];
} else {
prop.device_num = (int)(nvidia_opencls.size());
}
@ -343,14 +376,19 @@ void COPROCS::get_opencl(
prop.peak_flops = c.peak_flops;
}
if (nvidia_gpus.size()) {
// Assumes OpenCL and CUDA return the devices
// in the same order
// Assumes OpenCL device_num and CUDA device_num now match
//
prop.opencl_available_ram = nvidia_gpus[prop.device_num].available_ram;
} else {
prop.opencl_available_ram = prop.global_mem_size;
}
nvidia_opencls.push_back(prop);
// Build nvidia_opencls vector in device_num order
for (it=nvidia_opencls.begin(); it<nvidia_opencls.end(); it++) {
if (it->device_num > prop.device_num) break;
}
nvidia_opencls.insert(it, prop);
++current_CUDA_index;
}