Merge branch 'master' of ssh://boinc.berkeley.edu/boinc-v2

2013-06-28 13:01:57 -04:00 · 2013-06-28 13:01:57 -04:00 · 7f27f2c4a2
parent d574235ee4 a6e276b93e
commit 7f27f2c4a2
1 changed files with 46 additions and 8 deletions
--- a/client/gpu_opencl.cpp
+++ b/client/gpu_opencl.cpp
@ -113,7 +113,7 @@ static bool is_intel(char* vendor) {
 // If "loose", tolerate small diff
 //
-int opencl_compare(OPENCL_DEVICE_PROP& c1, OPENCL_DEVICE_PROP& c2, bool loose) {
+static int opencl_compare(OPENCL_DEVICE_PROP& c1, OPENCL_DEVICE_PROP& c2, bool loose) {
    if (c1.opencl_device_version_int > c2.opencl_device_version_int) return 1;
    if (c1.opencl_device_version_int < c2.opencl_device_version_int) return -1;
    if (loose) {
@ -128,6 +128,18 @@ int opencl_compare(OPENCL_DEVICE_PROP& c1, OPENCL_DEVICE_PROP& c2, bool loose) {
    return 0;
 }
 #ifdef __APPLE__
 static bool compare_pci_slots(int NVIDIA_GPU_Index1, int NVIDIA_GPU_Index2) {
    if (NVIDIA_GPU_Index1 >= (int)nvidia_gpus.size()) return false;  // Should never happen
    if (NVIDIA_GPU_Index2 >= (int)nvidia_gpus.size()) return false;  // Should never happen
    return (
        nvidia_gpus[NVIDIA_GPU_Index1].pci_info.bus_id <
                nvidia_gpus[NVIDIA_GPU_Index2].pci_info.bus_id
    );
 }
 #endif
 // OpenCL interfaces are documented here:
 // http://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/ and
 // http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/
@ -147,6 +159,8 @@ void COPROCS::get_opencl(
    int current_CAL_index;
    int min_CAL_target;
    int num_CAL_devices = (int)ati_gpus.size();
    vector<int>devnums_pci_slot_sort;
    vector<OPENCL_DEVICE_PROP>::iterator it;
 #ifdef _WIN32
    opencl_lib = LoadLibrary("OpenCL.dll");
@ -199,6 +213,19 @@ void COPROCS::get_opencl(
        return;
    }
    if (nvidia_gpus.size()) {
        for (int i=0; i<(int)nvidia_gpus.size(); ++i) {
            devnums_pci_slot_sort.push_back(i);
        }
 #ifdef __APPLE__
        std::stable_sort(devnums_pci_slot_sort.begin(),
                        devnums_pci_slot_sort.end(),
                        compare_pci_slots
        );
 #endif
    }
    for (platform_index=0; platform_index<num_platforms; ++platform_index) {
        ciErrNum = (*__clGetPlatformInfo)(
            platforms[platform_index], CL_PLATFORM_VERSION,
@ -308,8 +335,12 @@ void COPROCS::get_opencl(
            if (is_NVIDIA(prop.vendor)) {
                if (nvidia.have_cuda) {
                    // Mac OpenCL does not recognize all NVIDIA GPUs returned by
-                    // CUDA but we assume that OpenCL and CUDA return devices in
+                    // CUDA but we assume that OpenCL and CUDA return devices 
-                    // the same order and with identical model name strings
+                    // with identical model name strings and that OpenCL returns
                    // devices in order of acending PCI slot.
                    //
                    // On other systems, assume OpenCL and CUDA return devices 
                    // in the same order.
                    //
                    while (1) {
                        if (current_CUDA_index >= (int)(nvidia_gpus.size())) {
@ -320,7 +351,9 @@ void COPROCS::get_opencl(
                            warnings.push_back(buf);
                            return; // Should never happen
                        }
-                        if (!strcmp(prop.name, nvidia_gpus[current_CUDA_index].prop.name)) {
+                        if (!strcmp(prop.name,
                            nvidia_gpus[devnums_pci_slot_sort[current_CUDA_index]].prop.name)
                            ) {
                            break;  // We have a match
                        }
                        // This CUDA GPU is not recognized by OpenCL,
@ -328,7 +361,7 @@ void COPROCS::get_opencl(
                        //
                        ++current_CUDA_index;
                    }
-                    prop.device_num = current_CUDA_index;
+                    prop.device_num = devnums_pci_slot_sort[current_CUDA_index];
                } else {
                    prop.device_num = (int)(nvidia_opencls.size());
                }
@ -343,14 +376,19 @@ void COPROCS::get_opencl(
                    prop.peak_flops = c.peak_flops;
                }
                if (nvidia_gpus.size()) {
-                    // Assumes OpenCL and CUDA return the devices
+                    // Assumes OpenCL device_num and CUDA device_num now match
                    // in the same order
                    //
                    prop.opencl_available_ram = nvidia_gpus[prop.device_num].available_ram;
                } else {
                    prop.opencl_available_ram = prop.global_mem_size;
                }
-                nvidia_opencls.push_back(prop);
+                
                // Build nvidia_opencls vector in device_num order
                for (it=nvidia_opencls.begin(); it<nvidia_opencls.end(); it++) {
                    if (it->device_num > prop.device_num) break;
                }
                nvidia_opencls.insert(it, prop);
                ++current_CUDA_index;
            }