From a6e276b93e07f3efab957c0aa815f105487277b6 Mon Sep 17 00:00:00 2001 From: Charlie Fenton Date: Fri, 28 Jun 2013 02:37:57 -0700 Subject: [PATCH] client: apparently CUDA does not always report GPUs in order of ascending PCI slot number, but Apple's OpenCL does. We must take this into account win correlating NVIDIA OpenCL GPUs with CUDA GPUs. Since NVIDIA provides drivers for both OpenCL and CUDA on Windows and Linux, I am assuming (until shown otherwise) that they will report GPUs in the same order. --- client/gpu_opencl.cpp | 54 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 8 deletions(-) diff --git a/client/gpu_opencl.cpp b/client/gpu_opencl.cpp index 47c50170ea..894ff7ec9f 100644 --- a/client/gpu_opencl.cpp +++ b/client/gpu_opencl.cpp @@ -113,7 +113,7 @@ static bool is_intel(char* vendor) { // If "loose", tolerate small diff // -int opencl_compare(OPENCL_DEVICE_PROP& c1, OPENCL_DEVICE_PROP& c2, bool loose) { +static int opencl_compare(OPENCL_DEVICE_PROP& c1, OPENCL_DEVICE_PROP& c2, bool loose) { if (c1.opencl_device_version_int > c2.opencl_device_version_int) return 1; if (c1.opencl_device_version_int < c2.opencl_device_version_int) return -1; if (loose) { @@ -128,6 +128,18 @@ int opencl_compare(OPENCL_DEVICE_PROP& c1, OPENCL_DEVICE_PROP& c2, bool loose) { return 0; } +#ifdef __APPLE__ +static bool compare_pci_slots(int NVIDIA_GPU_Index1, int NVIDIA_GPU_Index2) { + if (NVIDIA_GPU_Index1 >= (int)nvidia_gpus.size()) return false; // Should never happen + if (NVIDIA_GPU_Index2 >= (int)nvidia_gpus.size()) return false; // Should never happen + return ( + nvidia_gpus[NVIDIA_GPU_Index1].pci_info.bus_id < + nvidia_gpus[NVIDIA_GPU_Index2].pci_info.bus_id + ); +} +#endif + + // OpenCL interfaces are documented here: // http://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/ and // http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/ @@ -147,6 +159,8 @@ void COPROCS::get_opencl( int current_CAL_index; int min_CAL_target; int num_CAL_devices = (int)ati_gpus.size(); + vectordevnums_pci_slot_sort; + vector::iterator it; #ifdef _WIN32 opencl_lib = LoadLibrary("OpenCL.dll"); @@ -199,6 +213,19 @@ void COPROCS::get_opencl( return; } + if (nvidia_gpus.size()) { + for (int i=0; i<(int)nvidia_gpus.size(); ++i) { + devnums_pci_slot_sort.push_back(i); + + } +#ifdef __APPLE__ + std::stable_sort(devnums_pci_slot_sort.begin(), + devnums_pci_slot_sort.end(), + compare_pci_slots + ); +#endif + } + for (platform_index=0; platform_index= (int)(nvidia_gpus.size())) { @@ -320,7 +351,9 @@ void COPROCS::get_opencl( warnings.push_back(buf); return; // Should never happen } - if (!strcmp(prop.name, nvidia_gpus[current_CUDA_index].prop.name)) { + if (!strcmp(prop.name, + nvidia_gpus[devnums_pci_slot_sort[current_CUDA_index]].prop.name) + ) { break; // We have a match } // This CUDA GPU is not recognized by OpenCL, @@ -328,7 +361,7 @@ void COPROCS::get_opencl( // ++current_CUDA_index; } - prop.device_num = current_CUDA_index; + prop.device_num = devnums_pci_slot_sort[current_CUDA_index]; } else { prop.device_num = (int)(nvidia_opencls.size()); } @@ -343,14 +376,19 @@ void COPROCS::get_opencl( prop.peak_flops = c.peak_flops; } if (nvidia_gpus.size()) { - // Assumes OpenCL and CUDA return the devices - // in the same order + // Assumes OpenCL device_num and CUDA device_num now match // prop.opencl_available_ram = nvidia_gpus[prop.device_num].available_ram; } else { prop.opencl_available_ram = prop.global_mem_size; } - nvidia_opencls.push_back(prop); + + // Build nvidia_opencls vector in device_num order + for (it=nvidia_opencls.begin(); itdevice_num > prop.device_num) break; + } + nvidia_opencls.insert(it, prop); + ++current_CUDA_index; }