mirror of https://github.com/BOINC/boinc.git
1406 lines
54 KiB
C++
1406 lines
54 KiB
C++
// This file is part of BOINC.
|
|
// http://boinc.berkeley.edu
|
|
// Copyright (C) 2012 University of California
|
|
//
|
|
// BOINC is free software; you can redistribute it and/or modify it
|
|
// under the terms of the GNU Lesser General Public License
|
|
// as published by the Free Software Foundation,
|
|
// either version 3 of the License, or (at your option) any later version.
|
|
//
|
|
// BOINC is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
// See the GNU Lesser General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
// Detection of GPUs using OpenCL
|
|
|
|
#define TEST_OTHER_COPROC_LOGIC 0
|
|
|
|
#ifdef _WIN32
|
|
#include "boinc_win.h"
|
|
#else
|
|
#ifdef __APPLE__
|
|
// Suppress obsolete warning when building for OS 10.3.9
|
|
#define DLOPEN_NO_WARN
|
|
#include <mach-o/dyld.h>
|
|
#endif
|
|
#include "config.h"
|
|
#include <dlfcn.h>
|
|
#endif
|
|
|
|
#include <vector>
|
|
#include <string>
|
|
|
|
using std::vector;
|
|
using std::string;
|
|
|
|
#include "coproc.h"
|
|
#include "str_replace.h"
|
|
#include "util.h"
|
|
|
|
#include "client_msgs.h"
|
|
#include "client_state.h"
|
|
#include "gpu_detect.h"
|
|
|
|
#ifdef _WIN32
|
|
|
|
HMODULE opencl_lib = NULL;
|
|
|
|
typedef cl_int (__stdcall *CL_PLATFORMIDS) (cl_uint, cl_platform_id*, cl_uint*);
|
|
typedef cl_int (__stdcall *CL_PLATFORMINFO) (cl_platform_id, cl_platform_info, size_t, void*, size_t*);
|
|
typedef cl_int (__stdcall *CL_DEVICEIDS)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*);
|
|
typedef cl_int (__stdcall *CL_INFO) (cl_device_id, cl_device_info, size_t, void*, size_t*);
|
|
|
|
CL_PLATFORMIDS p_clGetPlatformIDs = NULL;
|
|
CL_PLATFORMINFO p_clGetPlatformInfo = NULL;
|
|
CL_DEVICEIDS p_clGetDeviceIDs = NULL;
|
|
CL_INFO p_clGetDeviceInfo = NULL;
|
|
|
|
#else
|
|
|
|
void* opencl_lib = NULL;
|
|
|
|
cl_int (*p_clGetPlatformIDs)(
|
|
cl_uint, // num_entries,
|
|
cl_platform_id*, // platforms
|
|
cl_uint * // num_platforms
|
|
);
|
|
cl_int (*p_clGetPlatformInfo)(
|
|
cl_platform_id, // platform
|
|
cl_platform_info, // param_name
|
|
size_t, // param_value_size
|
|
void*, // param_value
|
|
size_t* // param_value_size_ret
|
|
);
|
|
cl_int (*p_clGetDeviceIDs)(
|
|
cl_platform_id, // platform
|
|
cl_device_type, // device_type
|
|
cl_uint, // num_entries
|
|
cl_device_id*, // devices
|
|
cl_uint* // num_devices
|
|
);
|
|
cl_int (*p_clGetDeviceInfo)(
|
|
cl_device_id, // device
|
|
cl_device_info, // param_name
|
|
size_t, // param_value_size
|
|
void*, // param_value
|
|
size_t* // param_value_size_ret
|
|
);
|
|
|
|
#endif
|
|
|
|
static bool is_AMD(char *vendor) {
|
|
if (strstr(vendor, "ATI")) return true;
|
|
if (strstr(vendor, "AMD")) return true;
|
|
if (strstr(vendor, "Advanced Micro Devices, Inc.")) return true;
|
|
return false;
|
|
}
|
|
|
|
static bool is_NVIDIA(char* vendor) {
|
|
if (strstr(vendor, "NVIDIA")) return true;
|
|
return false;
|
|
}
|
|
|
|
static bool is_intel(char* vendor) {
|
|
if (strcasestr(vendor, "intel")) return true;
|
|
return false;
|
|
}
|
|
|
|
// If "loose", tolerate small diff
|
|
//
|
|
static int opencl_compare(OPENCL_DEVICE_PROP& c1, OPENCL_DEVICE_PROP& c2, bool loose) {
|
|
if (c1.opencl_device_version_int > c2.opencl_device_version_int) return 1;
|
|
if (c1.opencl_device_version_int < c2.opencl_device_version_int) return -1;
|
|
if (loose) {
|
|
if (c1.global_mem_size > 1.4*c2.global_mem_size) return 1;
|
|
if (c1.global_mem_size < .7*c2.global_mem_size) return -1;
|
|
return 0;
|
|
}
|
|
if (c1.global_mem_size > c2.global_mem_size) return 1;
|
|
if (c1.global_mem_size < c2.global_mem_size) return -1;
|
|
if (c1.peak_flops > c2.peak_flops) return 1;
|
|
if (c1.peak_flops < c2.peak_flops) return -1;
|
|
return 0;
|
|
}
|
|
|
|
#ifdef __APPLE__
|
|
static bool compare_pci_slots(int NVIDIA_GPU_Index1, int NVIDIA_GPU_Index2) {
|
|
if (NVIDIA_GPU_Index1 >= (int)nvidia_gpus.size()) return false; // Should never happen
|
|
if (NVIDIA_GPU_Index2 >= (int)nvidia_gpus.size()) return false; // Should never happen
|
|
return (
|
|
nvidia_gpus[NVIDIA_GPU_Index1].pci_info.bus_id <
|
|
nvidia_gpus[NVIDIA_GPU_Index2].pci_info.bus_id
|
|
);
|
|
}
|
|
|
|
|
|
// Test OS version number on all versions of OS X without using deprecated Gestalt
|
|
// compareOSVersionTo(x, y) returns:
|
|
// -1 if the OS version we are running on is less than x.y
|
|
// 0 if the OS version we are running on is equal to x.y
|
|
// +1 if the OS version we are running on is lgreater than x.y
|
|
int compareOSVersionTo(int toMajor, int toMinor) {
|
|
static SInt32 major = -1;
|
|
static SInt32 minor = -1;
|
|
|
|
if (major < 0) {
|
|
char vers[100], *p1 = NULL;
|
|
FILE *f;
|
|
vers[0] = '\0';
|
|
f = popen("sw_vers -productVersion", "r");
|
|
if (f) {
|
|
fscanf(f, "%s", vers);
|
|
pclose(f);
|
|
}
|
|
if (vers[0] == '\0') {
|
|
fprintf(stderr, "popen(\"sw_vers -productVersion\" failed\n");
|
|
fflush(stderr);
|
|
return 0;
|
|
}
|
|
// Extract the major system version number
|
|
major = atoi(vers);
|
|
// Extract the minor system version number
|
|
p1 = strchr(vers, '.');
|
|
minor = atoi(p1+1);
|
|
}
|
|
|
|
if (major < toMajor) return -1;
|
|
if (major > toMajor) return 1;
|
|
// if (major == toMajor) compare minor version numbers
|
|
if (minor < toMinor) return -1;
|
|
if (minor > toMinor) return 1;
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
|
|
// OpenCL interfaces are documented here:
|
|
// http://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/ and
|
|
// http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/
|
|
|
|
void COPROCS::get_opencl(
|
|
vector<string>& warnings
|
|
) {
|
|
cl_int ciErrNum;
|
|
cl_platform_id platforms[MAX_OPENCL_PLATFORMS];
|
|
cl_uint num_platforms, platform_index, num_devices, device_index;
|
|
cl_device_id devices[MAX_COPROC_INSTANCES];
|
|
char platform_version[256];
|
|
char platform_vendor[256];
|
|
char buf[256];
|
|
OPENCL_DEVICE_PROP prop;
|
|
int current_CUDA_index;
|
|
int current_CAL_index;
|
|
int min_CAL_target;
|
|
int num_CAL_devices = (int)ati_gpus.size();
|
|
vector<int>devnums_pci_slot_sort;
|
|
vector<OPENCL_DEVICE_PROP>::iterator it;
|
|
int max_other_coprocs = MAX_RSC-1; // coprocs[0] is reserved for CPU
|
|
string s;
|
|
|
|
if (cc_config.no_opencl) {
|
|
return;
|
|
}
|
|
|
|
#ifdef _WIN32
|
|
opencl_lib = LoadLibrary("OpenCL.dll");
|
|
if (!opencl_lib) {
|
|
warnings.push_back("No OpenCL library found");
|
|
return;
|
|
}
|
|
|
|
p_clGetPlatformIDs = (CL_PLATFORMIDS)GetProcAddress( opencl_lib, "clGetPlatformIDs" );
|
|
p_clGetPlatformInfo = (CL_PLATFORMINFO)GetProcAddress( opencl_lib, "clGetPlatformInfo" );
|
|
p_clGetDeviceIDs = (CL_DEVICEIDS)GetProcAddress( opencl_lib, "clGetDeviceIDs" );
|
|
p_clGetDeviceInfo = (CL_INFO)GetProcAddress( opencl_lib, "clGetDeviceInfo" );
|
|
#else
|
|
#ifdef __APPLE__
|
|
opencl_lib = dlopen("/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL", RTLD_NOW);
|
|
#else
|
|
opencl_lib = dlopen("libOpenCL.so", RTLD_NOW);
|
|
if (!opencl_lib) {
|
|
opencl_lib = dlopen("libOpenCL.so.1", RTLD_NOW);
|
|
}
|
|
#endif
|
|
if (!opencl_lib) {
|
|
sprintf(buf, "OpenCL: %s", dlerror());
|
|
warnings.push_back(buf);
|
|
return;
|
|
}
|
|
p_clGetPlatformIDs = (cl_int(*)(cl_uint, cl_platform_id*, cl_uint*)) dlsym( opencl_lib, "clGetPlatformIDs" );
|
|
p_clGetPlatformInfo = (cl_int(*)(cl_platform_id, cl_platform_info, size_t, void*, size_t*)) dlsym( opencl_lib, "clGetPlatformInfo" );
|
|
p_clGetDeviceIDs = (cl_int(*)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*)) dlsym( opencl_lib, "clGetDeviceIDs" );
|
|
p_clGetDeviceInfo = (cl_int(*)(cl_device_id, cl_device_info, size_t, void*, size_t*)) dlsym( opencl_lib, "clGetDeviceInfo" );
|
|
#endif
|
|
|
|
if (!p_clGetPlatformIDs) {
|
|
warnings.push_back("clGetPlatformIDs() missing from OpenCL library");
|
|
goto leave;
|
|
}
|
|
if (!p_clGetPlatformInfo) {
|
|
warnings.push_back("clGetPlatformInfo() missing from OpenCL library");
|
|
goto leave;
|
|
}
|
|
if (!p_clGetDeviceIDs) {
|
|
warnings.push_back("clGetDeviceIDs() missing from OpenCL library");
|
|
goto leave;
|
|
}
|
|
if (!p_clGetDeviceInfo) {
|
|
warnings.push_back("clGetDeviceInfo() missing from OpenCL library");
|
|
goto leave;
|
|
}
|
|
|
|
ciErrNum = (*p_clGetPlatformIDs)(MAX_OPENCL_PLATFORMS, platforms, &num_platforms);
|
|
if ((ciErrNum != CL_SUCCESS) || (num_platforms == 0)) {
|
|
warnings.push_back("clGetPlatformIDs() failed to return any OpenCL platforms");
|
|
goto leave;
|
|
}
|
|
|
|
if (nvidia_gpus.size()) {
|
|
for (int i=0; i<(int)nvidia_gpus.size(); ++i) {
|
|
devnums_pci_slot_sort.push_back(i);
|
|
}
|
|
#ifdef __APPLE__
|
|
std::stable_sort(
|
|
devnums_pci_slot_sort.begin(),
|
|
devnums_pci_slot_sort.end(),
|
|
compare_pci_slots
|
|
);
|
|
#endif
|
|
}
|
|
|
|
for (platform_index=0; platform_index<num_platforms; ++platform_index) {
|
|
ciErrNum = (*p_clGetPlatformInfo)(
|
|
platforms[platform_index], CL_PLATFORM_VERSION,
|
|
sizeof(platform_version), &platform_version, NULL
|
|
);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
snprintf(buf, sizeof(buf),
|
|
"Couldn't get PLATFORM_VERSION for platform #%u; error %d",
|
|
platform_index, ciErrNum
|
|
);
|
|
warnings.push_back(buf);
|
|
continue;
|
|
}
|
|
|
|
ciErrNum = (*p_clGetPlatformInfo)(
|
|
platforms[platform_index], CL_PLATFORM_VENDOR,
|
|
sizeof(platform_vendor), &platform_vendor, NULL
|
|
);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
snprintf(buf, sizeof(buf),
|
|
"Couldn't get PLATFORM_VENDOR for platform #%u; error %d",
|
|
platform_index, ciErrNum
|
|
);
|
|
warnings.push_back(buf);
|
|
}
|
|
|
|
//////////// CPU //////////////
|
|
|
|
ciErrNum = (*p_clGetDeviceIDs)(
|
|
platforms[platform_index], (CL_DEVICE_TYPE_CPU),
|
|
MAX_COPROC_INSTANCES, devices, &num_devices
|
|
);
|
|
|
|
if ((ciErrNum != CL_SUCCESS) && (num_devices != 0)) {
|
|
num_devices = 0; // No devices
|
|
if (ciErrNum != CL_DEVICE_NOT_FOUND) {
|
|
snprintf(buf, sizeof(buf),
|
|
"Couldn't get CPU Device IDs for platform #%u: error %d",
|
|
platform_index, ciErrNum
|
|
);
|
|
warnings.push_back(buf);
|
|
}
|
|
}
|
|
|
|
for (device_index=0; device_index<num_devices; ++device_index) {
|
|
prop.clear();
|
|
prop.device_id = devices[device_index];
|
|
strlcpy(
|
|
prop.opencl_platform_version, platform_version,
|
|
sizeof(prop.opencl_platform_version)
|
|
);
|
|
|
|
ciErrNum = get_opencl_info(prop, device_index, warnings);
|
|
if (ciErrNum != CL_SUCCESS) continue;
|
|
|
|
prop.is_used = COPROC_UNUSED;
|
|
prop.get_device_version_int();
|
|
|
|
OPENCL_CPU_PROP c;
|
|
strlcpy(c.platform_vendor, platform_vendor, sizeof(c.platform_vendor));
|
|
c.opencl_prop = prop;
|
|
cpu_opencls.push_back(c);
|
|
}
|
|
|
|
//////////// GPUs and Accelerators //////////////
|
|
|
|
ciErrNum = (*p_clGetDeviceIDs)(
|
|
platforms[platform_index],
|
|
(CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR),
|
|
MAX_COPROC_INSTANCES, devices, &num_devices
|
|
);
|
|
|
|
if (ciErrNum == CL_DEVICE_NOT_FOUND) continue; // No devices
|
|
if (num_devices == 0) continue; // No devices
|
|
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
snprintf(buf, sizeof(buf),
|
|
"Couldn't get Device IDs for platform #%u: error %d",
|
|
platform_index, ciErrNum
|
|
);
|
|
warnings.push_back(buf);
|
|
continue;
|
|
}
|
|
|
|
// Mac OpenCL does not recognize all NVIDIA GPUs returned by CUDA
|
|
// Fortunately, CUDA and OpenCL return the same GPU model name on
|
|
// the Mac, so we can use this to match OpenCL devices with CUDA.
|
|
//
|
|
current_CUDA_index = 0;
|
|
|
|
// ATI/AMD OpenCL does not always recognize all GPUs returned by CAL.
|
|
// This is complicated for several reasons:
|
|
// * CAL returns only an enum (CALtargetEnum) for the GPU's family,
|
|
// not specific model information.
|
|
// * OpenCL return only the GPU family name
|
|
// * Which GPUs support OpenCL varies with different versions of the
|
|
// AMD Catalyst drivers.
|
|
//
|
|
// To deal with this, we make some (probably imperfect) assumptions:
|
|
// * AMD drivers eliminate OpenCL support for older GPU families first.
|
|
// * Lower values of CALtargetEnum represent older GPU families.
|
|
// * All ATI/AMD GPUs reported by OpenCL are also reported by CAL (on
|
|
// systems where CAL is available) though the converse may not be true.
|
|
//
|
|
current_CAL_index = 0;
|
|
min_CAL_target = 0;
|
|
if (is_AMD(platform_vendor) && (num_CAL_devices > 0)) {
|
|
while (1) {
|
|
int numToMatch = 0;
|
|
for (int i=0; i<num_CAL_devices; ++i) {
|
|
if ((int)ati_gpus[i].attribs.target >= min_CAL_target) {
|
|
++numToMatch;
|
|
}
|
|
}
|
|
if (numToMatch == (int)num_devices) break;
|
|
if (numToMatch < (int)num_devices) {
|
|
warnings.push_back(
|
|
"Could not match ATI OpenCL and CAL GPUs: ignoring CAL."
|
|
);
|
|
// If we can't match ATI OpenCL and CAL GPUs, ignore CAL
|
|
// and keep OpenCL because AMD has deprecated CAL.
|
|
ati_gpus.clear();
|
|
ati.have_cal = false;
|
|
num_CAL_devices = 0;
|
|
break;
|
|
}
|
|
++min_CAL_target;
|
|
}
|
|
}
|
|
|
|
for (device_index=0; device_index<num_devices; ++device_index) {
|
|
prop.clear();
|
|
prop.device_id = devices[device_index];
|
|
strlcpy(
|
|
prop.opencl_platform_version, platform_version,
|
|
sizeof(prop.opencl_platform_version)
|
|
);
|
|
|
|
//TODO: Should we store the platform(s) for each GPU found?
|
|
//TODO: Must we check if multiple platforms found the same GPU and merge the records?
|
|
ciErrNum = get_opencl_info(prop, device_index, warnings);
|
|
if (ciErrNum != CL_SUCCESS) continue;
|
|
|
|
// TODO: Eliminate this, or improve it
|
|
#if TEST_OTHER_COPROC_LOGIC
|
|
if (is_NVIDIA(prop.vendor)) {
|
|
safe_strcpy(prop.vendor, "FAKE VENDOR X");
|
|
} else if (is_AMD(prop.vendor)) {
|
|
safe_strcpy(prop.vendor, "FAKE VENDOR Y");
|
|
} else {
|
|
safe_strcpy(prop.vendor, "FAKE VENDOR Z");
|
|
}
|
|
#endif
|
|
|
|
prop.is_used = COPROC_UNUSED;
|
|
prop.get_device_version_int();
|
|
|
|
//////////// NVIDIA //////////////
|
|
if (is_NVIDIA(prop.vendor)) {
|
|
bool cuda_match_found = false;
|
|
if (nvidia.have_cuda) {
|
|
// Mac OpenCL does not recognize all NVIDIA GPUs returned by
|
|
// CUDA but we assume that OpenCL and CUDA return devices
|
|
// with identical model name strings and that OpenCL returns
|
|
// devices in order of acending PCI slot.
|
|
//
|
|
// On other systems, assume OpenCL and CUDA return devices
|
|
// in the same order.
|
|
//
|
|
int saved_CUDA_index = current_CUDA_index;
|
|
|
|
while (1) {
|
|
if (current_CUDA_index >= (int)(nvidia_gpus.size())) {
|
|
snprintf(buf, sizeof(buf),
|
|
"OpenCL NVIDIA index #%u does not match any CUDA device",
|
|
device_index
|
|
);
|
|
warnings.push_back(buf);
|
|
// Newer versions of CUDA driver don't support older NVIDIA GPUs
|
|
if (nvidia.cuda_version >= 6050) {
|
|
prop.device_num = (int)(nvidia_opencls.size());
|
|
current_CUDA_index = saved_CUDA_index;
|
|
prop.warn_bad_cuda = true;
|
|
break;
|
|
} else {
|
|
// Older CUDA drivers should report all NVIDIA GPUs reported by OpenCL
|
|
goto leave; // Should never happen
|
|
}
|
|
}
|
|
if (!strcmp(prop.name,
|
|
nvidia_gpus[devnums_pci_slot_sort[current_CUDA_index]].prop.name)
|
|
) {
|
|
cuda_match_found = true;
|
|
prop.device_num = devnums_pci_slot_sort[current_CUDA_index];
|
|
break; // We have a match
|
|
}
|
|
// This CUDA GPU is not recognized by OpenCL,
|
|
// so try the next
|
|
//
|
|
++current_CUDA_index;
|
|
}
|
|
} else {
|
|
prop.device_num = (int)(nvidia_opencls.size());
|
|
}
|
|
prop.opencl_device_index = device_index;
|
|
|
|
if (cuda_match_found) {
|
|
prop.peak_flops = nvidia_gpus[prop.device_num].peak_flops;
|
|
} else {
|
|
COPROC_NVIDIA c;
|
|
c.opencl_prop = prop;
|
|
c.set_peak_flops();
|
|
if (c.bad_gpu_peak_flops("NVIDIA OpenCL", s)) {
|
|
warnings.push_back(s);
|
|
}
|
|
prop.peak_flops = c.peak_flops;
|
|
}
|
|
if (cuda_match_found) {
|
|
// Assumes OpenCL device_num and CUDA device_num now match
|
|
//
|
|
prop.opencl_available_ram = nvidia_gpus[prop.device_num].available_ram;
|
|
} else {
|
|
prop.opencl_available_ram = prop.global_mem_size;
|
|
}
|
|
|
|
// Build nvidia_opencls vector in device_num order
|
|
for (it=nvidia_opencls.begin(); it != nvidia_opencls.end(); ++it) {
|
|
if (it->device_num > prop.device_num) break;
|
|
}
|
|
nvidia_opencls.insert(it, prop);
|
|
|
|
if (cuda_match_found) ++current_CUDA_index;
|
|
}
|
|
|
|
//////////// AMD / ATI //////////////
|
|
else if (is_AMD(prop.vendor)) {
|
|
prop.opencl_device_index = device_index;
|
|
|
|
if (ati.have_cal) {
|
|
// AMD OpenCL does not recognize all AMD GPUs returned by
|
|
// CAL but we assume that OpenCL and CAL return devices in
|
|
// the same order. See additional comments earlier in
|
|
// this source file for more details.
|
|
//
|
|
while (1) {
|
|
if (current_CAL_index >= num_CAL_devices) {
|
|
snprintf(buf, sizeof(buf),
|
|
"OpenCL ATI device #%u does not match any CAL device",
|
|
device_index
|
|
);
|
|
warnings.push_back(buf);
|
|
goto leave; // Should never happen
|
|
}
|
|
if ((int)ati_gpus[current_CAL_index].attribs.target >= min_CAL_target) {
|
|
break; // We have a match
|
|
}
|
|
// This CAL GPU is not recognized by OpenCL,
|
|
// so try the next
|
|
//
|
|
++current_CAL_index;
|
|
}
|
|
prop.device_num = current_CAL_index++;
|
|
|
|
// Always use GPU model name from CAL if
|
|
// available for ATI / AMD GPUs because
|
|
// (we believe) it is more user-friendly.
|
|
//
|
|
safe_strcpy(prop.name, ati_gpus[prop.device_num].name);
|
|
|
|
// Work around a bug in OpenCL which returns only
|
|
// 1/2 of total global RAM size: use the value from CAL.
|
|
// This bug applies only to ATI GPUs, not to NVIDIA
|
|
// See also further workaround code for Macs.
|
|
//
|
|
prop.global_mem_size = ati_gpus[prop.device_num].attribs.localRAM * MEGA;
|
|
prop.peak_flops = ati_gpus[prop.device_num].peak_flops;
|
|
} else { // ! ati.have_cal
|
|
prop.device_num = (int)(ati_opencls.size());
|
|
COPROC_ATI c;
|
|
c.opencl_prop = prop;
|
|
c.set_peak_flops();
|
|
if (c.bad_gpu_peak_flops("AMD OpenCL", s)) {
|
|
warnings.push_back(s);
|
|
}
|
|
prop.peak_flops = c.peak_flops;
|
|
}
|
|
|
|
if (ati_gpus.size()) {
|
|
prop.opencl_available_ram = ati_gpus[prop.device_num].available_ram;
|
|
} else {
|
|
prop.opencl_available_ram = prop.global_mem_size;
|
|
}
|
|
ati_opencls.push_back(prop);
|
|
}
|
|
|
|
//////////// INTEL GPU //////////////
|
|
else if (is_intel(prop.vendor)) {
|
|
prop.device_num = (int)(intel_gpu_opencls.size());
|
|
prop.opencl_device_index = device_index;
|
|
|
|
COPROC_INTEL c;
|
|
c.opencl_prop = prop;
|
|
c.is_used = COPROC_UNUSED;
|
|
c.available_ram = prop.global_mem_size;
|
|
safe_strcpy(c.name, prop.name);
|
|
safe_strcpy(c.version, prop.opencl_driver_version);
|
|
|
|
c.set_peak_flops();
|
|
if (c.bad_gpu_peak_flops("Intel OpenCL", s)) {
|
|
warnings.push_back(s);
|
|
}
|
|
prop.peak_flops = c.peak_flops;
|
|
prop.opencl_available_ram = prop.global_mem_size;
|
|
|
|
intel_gpu_opencls.push_back(prop);
|
|
|
|
// At present Intel GPUs only support OpenCL
|
|
// and do not have a native GPGPU framework,
|
|
// so treat each detected Intel OpenCL GPU device as
|
|
// a native device.
|
|
//
|
|
intel_gpus.push_back(c);
|
|
} else {
|
|
//////////// OTHER GPU OR ACCELERATOR //////////////
|
|
// Put each coprocessor instance into a separate other_opencls element
|
|
|
|
// opencl_device_index is passed to project apps via init_data.xml
|
|
// to differentiate among OpenCL devices from the same vendor. It is
|
|
// used by boinc_get_opencl_ids() to select the correct OpenCL device.
|
|
int opencl_device_index = 0;
|
|
for (unsigned int coproc_index=0; coproc_index<other_opencls.size(); coproc_index++) {
|
|
if (!strcmp(other_opencls[coproc_index].vendor, prop.vendor)) {
|
|
opencl_device_index++; // Another OpenCL device from same vendor
|
|
}
|
|
}
|
|
|
|
prop.device_num = 0; // Each vector entry has only one device
|
|
prop.opencl_device_index = opencl_device_index;
|
|
prop.opencl_available_ram = prop.global_mem_size;
|
|
prop.is_used = COPROC_USED;
|
|
|
|
// TODO: is there a better way to estimate peak_flops?
|
|
//
|
|
prop.peak_flops = 0;
|
|
if (prop.max_compute_units) {
|
|
double freq = ((double)prop.max_clock_frequency) * MEGA;
|
|
prop.peak_flops = ((double)prop.max_compute_units) * freq;
|
|
}
|
|
if (prop.peak_flops <= 0 || prop.peak_flops > GPU_MAX_PEAK_FLOPS) {
|
|
char buf2[256];
|
|
sprintf(buf2,
|
|
"OpenCL generic: bad peak FLOPS; Max units %u, max freq %u MHz",
|
|
prop.max_compute_units, prop.max_clock_frequency
|
|
);
|
|
warnings.push_back(buf2);
|
|
prop.peak_flops = GPU_DEFAULT_PEAK_FLOPS;
|
|
}
|
|
|
|
other_opencls.push_back(prop);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Neither nvidia.count, ati.count nor intel_gpu.count have been set yet,
|
|
// so we can't test have_nvidia(), have_ati() or have_intel_gpu() here.
|
|
//
|
|
if ((nvidia_opencls.size() > 0) || nvidia.have_cuda) max_other_coprocs--;
|
|
if ((ati_opencls.size() > 0) || ati.have_cal) max_other_coprocs--;
|
|
if (intel_gpu_opencls.size() > 0) max_other_coprocs--;
|
|
if ((int)other_opencls.size() > max_other_coprocs) {
|
|
warnings.push_back("Too many OpenCL device types found");
|
|
}
|
|
|
|
|
|
#ifdef __APPLE__
|
|
// Work around a bug in OpenCL which returns only
|
|
// 1/2 of total global RAM size.
|
|
// This bug applies only to ATI GPUs, not to NVIDIA
|
|
// This has already been fixed on latest Catalyst
|
|
// drivers, but Mac does not use Catalyst drivers.
|
|
if (ati_opencls.size() > 0) {
|
|
// This problem seems to be fixed in OS 10.7
|
|
if (compareOSVersionTo(10, 7) < 0) {
|
|
opencl_get_ati_mem_size_from_opengl(warnings);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
if ((nvidia_opencls.size() == 0) &&
|
|
(ati_opencls.size() == 0) &&
|
|
(intel_gpu_opencls.size() == 0) &&
|
|
(cpu_opencls.size() == 0) &&
|
|
(other_opencls.size() == 0)
|
|
) {
|
|
warnings.push_back(
|
|
"OpenCL library present but no OpenCL-capable devices found"
|
|
);
|
|
}
|
|
leave:
|
|
#ifdef _WIN32
|
|
if (opencl_lib) FreeLibrary(opencl_lib);
|
|
#else
|
|
if (opencl_lib) dlclose(opencl_lib);
|
|
#endif
|
|
}
|
|
|
|
void COPROCS::correlate_opencl(
|
|
bool use_all,
|
|
IGNORE_GPU_INSTANCE& ignore_gpu_instance
|
|
) {
|
|
if (nvidia_opencls.size() > 0) {
|
|
if (nvidia.have_cuda) { // If CUDA already found the "best" NVIDIA GPU
|
|
nvidia.merge_opencl(
|
|
nvidia_opencls, ignore_gpu_instance[PROC_TYPE_NVIDIA_GPU]
|
|
);
|
|
} else {
|
|
nvidia.find_best_opencls(
|
|
use_all, nvidia_opencls, ignore_gpu_instance[PROC_TYPE_NVIDIA_GPU]
|
|
);
|
|
nvidia.prop.totalGlobalMem = nvidia.opencl_prop.global_mem_size;
|
|
nvidia.available_ram = nvidia.opencl_prop.global_mem_size;
|
|
nvidia.prop.clockRate = nvidia.opencl_prop.max_clock_frequency * 1000;
|
|
safe_strcpy(nvidia.prop.name, nvidia.opencl_prop.name);
|
|
}
|
|
}
|
|
|
|
if (ati_opencls.size() > 0) {
|
|
if (ati.have_cal) { // If CAL already found the "best" CAL GPU
|
|
ati.merge_opencl(ati_opencls, ignore_gpu_instance[PROC_TYPE_AMD_GPU]);
|
|
} else {
|
|
ati.find_best_opencls(use_all, ati_opencls, ignore_gpu_instance[PROC_TYPE_AMD_GPU]);
|
|
ati.attribs.localRAM = ati.opencl_prop.global_mem_size/MEGA;
|
|
ati.available_ram = ati.opencl_prop.global_mem_size;
|
|
ati.attribs.engineClock = ati.opencl_prop.max_clock_frequency;
|
|
safe_strcpy(ati.name, ati.opencl_prop.name);
|
|
}
|
|
}
|
|
|
|
if (intel_gpu_opencls.size() > 0) {
|
|
intel_gpu.find_best_opencls(use_all, intel_gpu_opencls, ignore_gpu_instance[PROC_TYPE_INTEL_GPU]);
|
|
intel_gpu.available_ram = intel_gpu.opencl_prop.global_mem_size;
|
|
safe_strcpy(intel_gpu.name, intel_gpu.opencl_prop.name);
|
|
}
|
|
}
|
|
|
|
cl_int COPROCS::get_opencl_info(
|
|
OPENCL_DEVICE_PROP& prop,
|
|
cl_uint device_index,
|
|
vector<string>&warnings
|
|
) {
|
|
cl_int ciErrNum;
|
|
char buf[256];
|
|
|
|
ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_NAME, sizeof(prop.name), prop.name, NULL);
|
|
if ((ciErrNum != CL_SUCCESS) || (prop.name[0] == 0)) {
|
|
snprintf(buf, sizeof(buf),
|
|
"clGetDeviceInfo failed to get name for device %d",
|
|
(int)device_index
|
|
);
|
|
warnings.push_back(buf);
|
|
return ciErrNum;
|
|
}
|
|
|
|
ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_VENDOR, sizeof(prop.vendor), prop.vendor, NULL);
|
|
if ((ciErrNum != CL_SUCCESS) || (prop.vendor[0] == 0)) {
|
|
snprintf(buf, sizeof(buf),
|
|
"clGetDeviceInfo failed to get vendor for device %d",
|
|
(int)device_index
|
|
);
|
|
warnings.push_back(buf);
|
|
return ciErrNum;
|
|
}
|
|
|
|
ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_VENDOR_ID, sizeof(prop.vendor_id), &prop.vendor_id, NULL);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
snprintf(buf, sizeof(buf),
|
|
"clGetDeviceInfo failed to get vendor ID for device %d",
|
|
(int)device_index
|
|
);
|
|
warnings.push_back(buf);
|
|
return ciErrNum;
|
|
}
|
|
|
|
ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_AVAILABLE, sizeof(prop.available), &prop.available, NULL);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
snprintf(buf, sizeof(buf),
|
|
"clGetDeviceInfo failed to get availability for device %d",
|
|
(int)device_index
|
|
);
|
|
warnings.push_back(buf);
|
|
return ciErrNum;
|
|
}
|
|
|
|
ciErrNum = (*p_clGetDeviceInfo)(
|
|
prop.device_id, CL_DEVICE_HALF_FP_CONFIG,
|
|
sizeof(prop.half_fp_config), &prop.half_fp_config, NULL
|
|
);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
if ((ciErrNum == CL_INVALID_VALUE) || (ciErrNum == CL_INVALID_OPERATION)) {
|
|
prop.half_fp_config = 0; // Not supported by OpenCL 1.0
|
|
} else {
|
|
snprintf(buf, sizeof(buf),
|
|
"clGetDeviceInfo failed to get half-precision floating point capabilities for device %d",
|
|
(int)device_index
|
|
);
|
|
warnings.push_back(buf);
|
|
return ciErrNum;
|
|
}
|
|
}
|
|
|
|
ciErrNum = (*p_clGetDeviceInfo)(
|
|
prop.device_id, CL_DEVICE_SINGLE_FP_CONFIG,
|
|
sizeof(prop.single_fp_config), &prop.single_fp_config, NULL
|
|
);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
snprintf(buf, sizeof(buf),
|
|
"clGetDeviceInfo failed to get single-precision floating point capabilities for device %d",
|
|
(int)device_index
|
|
);
|
|
warnings.push_back(buf);
|
|
return ciErrNum;
|
|
}
|
|
|
|
ciErrNum = (*p_clGetDeviceInfo)(
|
|
prop.device_id, CL_DEVICE_DOUBLE_FP_CONFIG,
|
|
sizeof(prop.double_fp_config), &prop.double_fp_config, NULL
|
|
);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
if ((ciErrNum == CL_INVALID_VALUE) || (ciErrNum == CL_INVALID_OPERATION)) {
|
|
prop.double_fp_config = 0; // Not supported by OpenCL 1.0
|
|
} else {
|
|
snprintf(buf, sizeof(buf),
|
|
"clGetDeviceInfo failed to get double-precision floating point capabilities for device %d",
|
|
(int)device_index
|
|
);
|
|
warnings.push_back(buf);
|
|
return ciErrNum;
|
|
}
|
|
}
|
|
|
|
ciErrNum = (*p_clGetDeviceInfo)(
|
|
prop.device_id, CL_DEVICE_ENDIAN_LITTLE, sizeof(prop.endian_little),
|
|
&prop.endian_little, NULL
|
|
);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
snprintf(buf, sizeof(buf),
|
|
"clGetDeviceInfo failed to get little or big endian for device %d",
|
|
(int)device_index
|
|
);
|
|
warnings.push_back(buf);
|
|
return ciErrNum;
|
|
}
|
|
|
|
ciErrNum = (*p_clGetDeviceInfo)(
|
|
prop.device_id, CL_DEVICE_EXECUTION_CAPABILITIES,
|
|
sizeof(prop.execution_capabilities), &prop.execution_capabilities, NULL
|
|
);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
snprintf(buf, sizeof(buf),
|
|
"clGetDeviceInfo failed to get execution capabilities for device %d",
|
|
(int)device_index
|
|
);
|
|
warnings.push_back(buf);
|
|
return ciErrNum;
|
|
}
|
|
|
|
ciErrNum = (*p_clGetDeviceInfo)(
|
|
prop.device_id, CL_DEVICE_EXTENSIONS, sizeof(prop.extensions),
|
|
prop.extensions, NULL
|
|
);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
snprintf(buf, sizeof(buf),
|
|
"clGetDeviceInfo failed to get device extensions for device %d",
|
|
(int)device_index
|
|
);
|
|
warnings.push_back(buf);
|
|
return ciErrNum;
|
|
}
|
|
|
|
ciErrNum = (*p_clGetDeviceInfo)(
|
|
prop.device_id, CL_DEVICE_GLOBAL_MEM_SIZE,
|
|
sizeof(prop.global_mem_size), &prop.global_mem_size, NULL
|
|
);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
snprintf(buf, sizeof(buf),
|
|
"clGetDeviceInfo failed to get global memory size for device %d",
|
|
(int)device_index
|
|
);
|
|
warnings.push_back(buf);
|
|
return ciErrNum;
|
|
}
|
|
|
|
ciErrNum = (*p_clGetDeviceInfo)(
|
|
prop.device_id, CL_DEVICE_LOCAL_MEM_SIZE,
|
|
sizeof(prop.local_mem_size), &prop.local_mem_size, NULL
|
|
);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
snprintf(buf, sizeof(buf),
|
|
"clGetDeviceInfo failed to get local memory size for device %d",
|
|
(int)device_index
|
|
);
|
|
warnings.push_back(buf);
|
|
return ciErrNum;
|
|
}
|
|
|
|
ciErrNum = (*p_clGetDeviceInfo)(
|
|
prop.device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY,
|
|
sizeof(prop.max_clock_frequency), &prop.max_clock_frequency, NULL
|
|
);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
snprintf(buf, sizeof(buf),
|
|
"clGetDeviceInfo failed to get max clock frequency for device %d",
|
|
(int)device_index
|
|
);
|
|
warnings.push_back(buf);
|
|
return ciErrNum;
|
|
}
|
|
|
|
ciErrNum = (*p_clGetDeviceInfo)(
|
|
prop.device_id, CL_DEVICE_MAX_COMPUTE_UNITS,
|
|
sizeof(prop.max_compute_units), &prop.max_compute_units, NULL
|
|
);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
snprintf(buf, sizeof(buf),
|
|
"clGetDeviceInfo failed to get max compute units for device %d",
|
|
(int)device_index
|
|
);
|
|
warnings.push_back(buf);
|
|
return ciErrNum;
|
|
}
|
|
|
|
ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_VERSION, sizeof(prop.opencl_device_version), prop.opencl_device_version, NULL);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
snprintf(buf, sizeof(buf),
|
|
"clGetDeviceInfo failed to get OpenCL version supported by device %d",
|
|
(int)device_index
|
|
);
|
|
warnings.push_back(buf);
|
|
return ciErrNum;
|
|
}
|
|
|
|
ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DRIVER_VERSION, sizeof(prop.opencl_driver_version), prop.opencl_driver_version, NULL);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
snprintf(buf, sizeof(buf),
|
|
"clGetDeviceInfo failed to get OpenCL driver version for device %d",
|
|
(int)device_index
|
|
);
|
|
warnings.push_back(buf);
|
|
return ciErrNum;
|
|
}
|
|
|
|
// Nvidia Specific Extensions
|
|
if (strstr(prop.extensions, "cl_nv_device_attribute_query") != NULL) {
|
|
|
|
ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof(prop.nv_compute_capability_major), &prop.nv_compute_capability_major, NULL);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
snprintf(buf, sizeof(buf),
|
|
"clGetDeviceInfo failed to get CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV for device %d",
|
|
(int)device_index
|
|
);
|
|
warnings.push_back(buf);
|
|
return ciErrNum;
|
|
}
|
|
|
|
ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof(prop.nv_compute_capability_minor), &prop.nv_compute_capability_minor, NULL);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
snprintf(buf, sizeof(buf),
|
|
"clGetDeviceInfo failed to get CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV for device %d",
|
|
(int)device_index
|
|
);
|
|
warnings.push_back(buf);
|
|
return ciErrNum;
|
|
}
|
|
|
|
}
|
|
|
|
// AMD Specific Extensions
|
|
if (strstr(prop.extensions, "cl_amd_device_attribute_query") != NULL) {
|
|
|
|
ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_BOARD_NAME_AMD, sizeof(buf), buf, NULL);
|
|
if (strlen(buf) && ciErrNum == CL_SUCCESS) {
|
|
safe_strcpy(prop.name, buf);
|
|
} else if (ciErrNum != CL_SUCCESS) {
|
|
snprintf(buf, sizeof(buf),
|
|
"clGetDeviceInfo failed to get AMD Board Name for device %d",
|
|
(int)device_index
|
|
);
|
|
warnings.push_back(buf);
|
|
return ciErrNum;
|
|
}
|
|
|
|
ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, sizeof(prop.amd_simd_per_compute_unit), &prop.amd_simd_per_compute_unit, NULL);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
snprintf(buf, sizeof(buf),
|
|
"clGetDeviceInfo failed to get CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD for device %d",
|
|
(int)device_index
|
|
);
|
|
warnings.push_back(buf);
|
|
return ciErrNum;
|
|
}
|
|
|
|
ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_SIMD_WIDTH_AMD, sizeof(prop.amd_simd_width), &prop.amd_simd_width, NULL);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
snprintf(buf, sizeof(buf),
|
|
"clGetDeviceInfo failed to get CL_DEVICE_SIMD_WIDTH_AMD for device %d",
|
|
(int)device_index
|
|
);
|
|
warnings.push_back(buf);
|
|
return ciErrNum;
|
|
}
|
|
|
|
ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, sizeof(prop.amd_simd_instruction_width), &prop.amd_simd_instruction_width, NULL);
|
|
if (ciErrNum != CL_SUCCESS) {
|
|
snprintf(buf, sizeof(buf),
|
|
"clGetDeviceInfo failed to get CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD for device %d",
|
|
(int)device_index
|
|
);
|
|
warnings.push_back(buf);
|
|
return ciErrNum;
|
|
}
|
|
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
// This is called for ATI GPUs with CAL or NVIDIA GPUs with CUDA, to merge
|
|
// the OpenCL info into the CAL or CUDA data for the "best" CAL or CUDA GPU.
|
|
// This assumes that, for each GPU, we have previously correlated its CAL
|
|
// or CUDA device_num with its opencl_device_index.
|
|
//
|
|
void COPROC::merge_opencl(
|
|
vector<OPENCL_DEVICE_PROP> &opencls,
|
|
vector<int>& ignore_dev
|
|
) {
|
|
unsigned int i, j;
|
|
|
|
for (i=0; i<opencls.size(); i++) {
|
|
opencls[i].is_used = COPROC_UNUSED;
|
|
|
|
if (in_vector(opencls[i].device_num, ignore_dev)) {
|
|
opencls[i].is_used = COPROC_IGNORED;
|
|
continue;
|
|
}
|
|
if (device_num == opencls[i].device_num) {
|
|
opencl_prop = opencls[i];
|
|
opencl_device_ids[0] = opencls[i].device_id;
|
|
have_opencl = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
opencl_device_count = 0;
|
|
|
|
// Fill in info for other GPUs which CAL or CUDA found equivalent to best
|
|
//
|
|
for (i=0; i<(unsigned int)count; ++i) {
|
|
for (j=0; j<opencls.size(); j++) {
|
|
if (device_nums[i] == opencls[j].device_num) {
|
|
opencls[j].is_used = COPROC_USED;
|
|
opencl_device_indexes[opencl_device_count] = opencls[j].opencl_device_index;
|
|
opencl_device_ids[opencl_device_count++] = opencls[j].device_id;
|
|
instance_has_opencl[i] = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// This is called for ATI GPUs without CAL or NVIDIA GPUs without CUDA
|
|
//
|
|
void COPROC::find_best_opencls(
|
|
bool use_all,
|
|
vector<OPENCL_DEVICE_PROP> &opencls,
|
|
vector<int>& ignore_dev
|
|
) {
|
|
unsigned int i;
|
|
|
|
// identify the most capable ATI, NVIDIA or Intel OpenCL GPU
|
|
//
|
|
bool first = true;
|
|
for (i=0; i<opencls.size(); i++) {
|
|
if (in_vector(opencls[i].device_num, ignore_dev)) {
|
|
opencls[i].is_used = COPROC_IGNORED;
|
|
continue;
|
|
}
|
|
bool is_best = false;
|
|
if (first) {
|
|
is_best = true;
|
|
first = false;
|
|
} else if (opencl_compare(opencls[i], opencl_prop, false) > 0) {
|
|
is_best = true;
|
|
}
|
|
if (is_best) {
|
|
// fill in what info we have
|
|
opencl_prop = opencls[i];
|
|
device_num = opencls[i].device_num;
|
|
peak_flops = opencls[i].peak_flops;
|
|
have_opencl = true;
|
|
}
|
|
}
|
|
|
|
// see which other instances are equivalent, and set the count,
|
|
// device_nums, opencl_device_count and opencl_device_ids fields
|
|
//
|
|
count = 0;
|
|
opencl_device_count = 0;
|
|
for (i=0; i<opencls.size(); i++) {
|
|
if (in_vector(opencls[i].device_num, ignore_dev)) {
|
|
opencls[i].is_used = COPROC_IGNORED;
|
|
continue;
|
|
}
|
|
if (use_all || !opencl_compare(opencls[i], opencl_prop, true)) {
|
|
instance_has_opencl[count] = true;
|
|
device_nums[count++] = opencls[i].device_num;
|
|
opencl_device_indexes[opencl_device_count] = opencls[i].opencl_device_index;
|
|
opencl_device_ids[opencl_device_count++] = opencls[i].device_id;
|
|
opencls[i].is_used = COPROC_USED;
|
|
}
|
|
}
|
|
}
|
|
|
|
void fake_opencl_gpu(char* type) {
|
|
OPENCL_DEVICE_PROP op;
|
|
op.clear();
|
|
strcpy(op.name, type);
|
|
strcpy(op.vendor, "ARM");
|
|
op.vendor_id = 102760464;
|
|
op.available = 1;
|
|
op.half_fp_config = 63;
|
|
op.single_fp_config = 63;
|
|
op.double_fp_config = 63;
|
|
op.endian_little = 1;
|
|
op.execution_capabilities = 1;
|
|
strcpy(op.extensions, "cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_byte_addressable_store cl_khr_3d_image_writes cl_khr_fp64 cl_khr_int64_base_atomics cl_khr_int64_extended_atomics cl_khr_fp16 cl_khr_gl_sharing cl_khr_icd cl_khr_egl_event cl_khr_egl_image cl_khr_image2d_from_buffer cl_arm_core_id cl_arm_printf cl_arm_thread_limit_hint cl_arm_non_uniform_work_group_size cl_arm_import_memory");
|
|
op.global_mem_size = 2086998016;
|
|
op.local_mem_size = 32768;
|
|
op.max_clock_frequency = 600;
|
|
op.max_compute_units = 2;
|
|
strcpy(op.opencl_platform_version, "OpenCL 1.2 v1.r14p0-01rel0.0fe2d25ca074016740f8ab3fb451b151");
|
|
strcpy(op.opencl_device_version, "OpenCL 1.2 v1.r14p0-01rel0.0fe2d25ca074016740f8ab3fb451b151");
|
|
strcpy(op.opencl_driver_version, "1.2");
|
|
op.is_used = COPROC_USED;
|
|
other_opencls.push_back(op);
|
|
}
|
|
|
|
#ifdef __APPLE__
|
|
// OpenCL returns incorrect total RAM size for some
|
|
// ATI GPUs so we get that info from OpenGL on Macs
|
|
|
|
#include <OpenGL/OpenGL.h>
|
|
#include <OpenGL/gl.h>
|
|
#include <OpenGL/glu.h>
|
|
#include <Carbon/Carbon.h>
|
|
#include <IOKit/graphics/IOGraphicsLib.h>
|
|
|
|
static io_service_t IOServicePortFromCGDisplayID(CGDirectDisplayID displayID);
|
|
|
|
void COPROCS::opencl_get_ati_mem_size_from_opengl(vector<string>& warnings) {
|
|
CGLRendererInfoObj info;
|
|
long i, j;
|
|
GLint numRenderers = 0, rv = 0, deviceVRAM, rendererID;
|
|
cl_ulong deviceMemSize;
|
|
CGLError theErr2 = kCGLNoError;
|
|
CGLContextObj curr_ctx = CGLGetCurrentContext (); // save current CGL context
|
|
int ati_gpu_index = 0;
|
|
GLint rendererIDs[32];
|
|
CFDataRef modelName[32];
|
|
char opencl_name[256], iokit_name[256], buf[256];
|
|
char *p;
|
|
|
|
if (log_flags.coproc_debug) {
|
|
|
|
for (i=0; i<32; ++i) {
|
|
rendererIDs[i] = 0;
|
|
modelName[i] = NULL;
|
|
|
|
CGOpenGLDisplayMask myMask = 1 << i;
|
|
CGDirectDisplayID displayID = CGOpenGLDisplayMaskToDisplayID(myMask);
|
|
theErr2 = CGLQueryRendererInfo(myMask, &info, &numRenderers);
|
|
if ((displayID != kCGNullDirectDisplay) && (theErr2 == kCGLNoError)) {
|
|
// Get the I/O Kit service port for the display
|
|
// io_registry_entry_t dspPort = CGDisplayIOServicePort(displayID); // Deprecated in OS 10.9
|
|
io_registry_entry_t dspPort = IOServicePortFromCGDisplayID(displayID);
|
|
|
|
for (j = 0; j < numRenderers; j++) {
|
|
// find accelerated renderer (assume only one)
|
|
CGLDescribeRenderer (info, j, kCGLRPAcceleratedCompute, &rv);
|
|
if (true == rv) { // if openCL-capable
|
|
// what is the renderer ID
|
|
CGLDescribeRenderer (info, j, kCGLRPRendererID, &rendererIDs[i]);
|
|
modelName[i] = (CFDataRef)IORegistryEntrySearchCFProperty(
|
|
dspPort,
|
|
kIOServicePlane, CFSTR("model"), kCFAllocatorDefault,
|
|
kIORegistryIterateRecursively | kIORegistryIterateParents
|
|
);
|
|
}
|
|
if (modelName[i] != NULL) break;
|
|
}
|
|
}
|
|
}
|
|
} // End if (log_flags.coproc_debug) {
|
|
|
|
theErr2 = CGLQueryRendererInfo( 0xffffffff, &info, &numRenderers);
|
|
if (theErr2 == kCGLNoError) {
|
|
CGLDescribeRenderer (info, 0, kCGLRPRendererCount, &numRenderers);
|
|
for (i = 0; i < numRenderers; i++) {
|
|
if (ati_gpu_index >= (int)ati_opencls.size()) {
|
|
break;
|
|
}
|
|
|
|
CGLDescribeRenderer (info, i, kCGLRPAcceleratedCompute, &rv);
|
|
if (true == rv) { // if openCL-capable
|
|
// what is the renderer ID
|
|
CGLDescribeRenderer (info, i, kCGLRPRendererID, &rendererID);
|
|
// what is the VRAM?
|
|
CGLError notAvail = CGLDescribeRenderer (info, i, kCGLRPVideoMemoryMegabytes, &deviceVRAM);
|
|
if (notAvail == kCGLNoError) {
|
|
deviceMemSize = ((cl_ulong)deviceVRAM) * (1024L*1024L);
|
|
} else { // kCGLRPVideoMemoryMegabytes is not available before OS 10.7
|
|
#pragma clang diagnostic push
|
|
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
|
// kCGLRPVideoMemory=120 is deprecated in OS 10.7 and may not be
|
|
// defined in later SDKs, so use a literal value here instead
|
|
// CGLDescribeRenderer (info, i, kCGLRPVideoMemory, &deviceVRAM);
|
|
CGLDescribeRenderer (info, i, (CGLRendererProperty)120, &deviceVRAM);
|
|
deviceMemSize = deviceVRAM;
|
|
#pragma clang diagnostic pop
|
|
}
|
|
|
|
// build context and context specific info
|
|
CGLPixelFormatAttribute attribs[] = {
|
|
kCGLPFARendererID,
|
|
(CGLPixelFormatAttribute)rendererID,
|
|
kCGLPFAAllowOfflineRenderers,
|
|
(CGLPixelFormatAttribute)0
|
|
};
|
|
CGLPixelFormatObj pixelFormat = NULL;
|
|
GLint numPixelFormats = 0;
|
|
CGLContextObj cglContext;
|
|
|
|
CGLChoosePixelFormat (attribs, &pixelFormat, &numPixelFormats);
|
|
if (pixelFormat) {
|
|
CGLCreateContext(pixelFormat, NULL, &cglContext);
|
|
CGLDestroyPixelFormat (pixelFormat);
|
|
CGLSetCurrentContext (cglContext);
|
|
if (cglContext) {
|
|
// get vendor string from renderer
|
|
const GLubyte * strVend = glGetString (GL_VENDOR);
|
|
if (is_AMD((char *)strVend)) {
|
|
ati_opencls[ati_gpu_index].global_mem_size = deviceMemSize;
|
|
ati_opencls[ati_gpu_index].opencl_available_ram = deviceMemSize;
|
|
|
|
if (log_flags.coproc_debug) {
|
|
// For some GPUs, one API returns "ATI" but the other API returns
|
|
// "AMD" in the model name, so we normalize both to "AMD"
|
|
strlcpy(opencl_name, ati_opencls[ati_gpu_index].name, sizeof(opencl_name));
|
|
if ((p = strstr(opencl_name, "ATI")) != NULL) {
|
|
*++p='M';
|
|
*++p='D';
|
|
}
|
|
|
|
for (j=0; j<32; j++) {
|
|
if ((rendererID == rendererIDs[j]) && (modelName[j] != NULL)) {
|
|
break;
|
|
}
|
|
}
|
|
if (j < 32) {
|
|
strlcpy(iokit_name, (char *)CFDataGetBytePtr(modelName[j]), sizeof(iokit_name));
|
|
if ((p = strstr(iokit_name, "ATI")) != NULL) {
|
|
*++p='M';
|
|
*++p='D';
|
|
}
|
|
if (strcmp(iokit_name, opencl_name)) {
|
|
snprintf(buf, sizeof(buf),
|
|
"opencl_get_ati_mem_size_from_opengl model name mismatch: %s vs %s\n",
|
|
ati_opencls[ati_gpu_index].name, (char *)CFDataGetBytePtr(modelName[j])
|
|
);
|
|
warnings.push_back(buf);
|
|
}
|
|
} else {
|
|
// Could not get model name from IOKit, so use renderer name
|
|
const GLubyte * strRend = glGetString (GL_RENDERER);
|
|
if (strRend != NULL) {
|
|
strlcpy(iokit_name, (char *)strRend, sizeof(iokit_name));
|
|
if ((p = strstr(iokit_name, "ATI")) != NULL) {
|
|
*++p='M';
|
|
*++p='D';
|
|
}
|
|
}
|
|
|
|
if ((strRend == NULL) ||
|
|
(!strstr(iokit_name, opencl_name))) {
|
|
snprintf(buf, sizeof(buf),
|
|
"opencl_get_ati_mem_size_from_opengl model name to renderer mismatch: %s vs %s\n",
|
|
strRend, ati_opencls[ati_gpu_index].name
|
|
);
|
|
warnings.push_back(buf);
|
|
}
|
|
}
|
|
} // End if (log_flags.coproc_debug) {
|
|
|
|
ati_gpu_index++;
|
|
} // End if ATI / AMD GPU
|
|
|
|
CGLDestroyContext (cglContext);
|
|
} else {
|
|
warnings.push_back(
|
|
"opencl_get_ati_mem_size_from_opengl failed to create context\n"
|
|
);
|
|
}
|
|
} else {
|
|
warnings.push_back(
|
|
"opencl_get_ati_mem_size_from_opengl failed to create PixelFormat\n"
|
|
);
|
|
}
|
|
} // End if kCGLRPAcceleratedCompute attribute
|
|
} // End loop: for (i = 0; i < numRenderers; i++)
|
|
CGLDestroyRendererInfo (info);
|
|
}
|
|
|
|
if (log_flags.coproc_debug) {
|
|
for (j=0; j<32; j++) {
|
|
if (modelName[j] != NULL) {
|
|
CFRelease(modelName[j]);
|
|
}
|
|
}
|
|
}
|
|
CGLSetCurrentContext (curr_ctx); // restore current CGL context
|
|
}
|
|
|
|
|
|
|
|
// The following replaces CGDisplayIOServicePort which is deprecated in OS 10.9
|
|
//
|
|
//========================================================================
|
|
// GLFW 3.1 OS X - www.glfw.org
|
|
//------------------------------------------------------------------------
|
|
// Copyright (c) 2002-2006 Marcus Geelnard
|
|
// Copyright (c) 2006-2010 Camilla Berglund <elmindreda@elmindreda.org>
|
|
//
|
|
// This software is provided 'as-is', without any express or implied
|
|
// warranty. In no event will the authors be held liable for any damages
|
|
// arising from the use of this software.
|
|
//
|
|
// Permission is granted to anyone to use this software for any purpose,
|
|
// including commercial applications, and to alter it and redistribute it
|
|
// freely, subject to the following restrictions:
|
|
//
|
|
// 1. The origin of this software must not be misrepresented; you must not
|
|
// claim that you wrote the original software. If you use this software
|
|
// in a product, an acknowledgment in the product documentation would
|
|
// be appreciated but is not required.
|
|
//
|
|
// 2. Altered source versions must be plainly marked as such, and must not
|
|
// be misrepresented as being the original software.
|
|
//
|
|
// 3. This notice may not be removed or altered from any source
|
|
// distribution.
|
|
//
|
|
//========================================================================
|
|
|
|
// Returns the io_service_t corresponding to a CG display ID, or 0 on failure.
|
|
// The io_service_t should be released with IOObjectRelease when not needed.
|
|
//
|
|
|
|
static io_service_t IOServicePortFromCGDisplayID(CGDirectDisplayID displayID)
|
|
{
|
|
io_iterator_t iter;
|
|
io_service_t serv, servicePort = 0;
|
|
|
|
CFMutableDictionaryRef matching = IOServiceMatching("IODisplayConnect");
|
|
|
|
// releases matching for us
|
|
kern_return_t err = IOServiceGetMatchingServices(kIOMasterPortDefault,
|
|
matching,
|
|
&iter);
|
|
if (err)
|
|
return 0;
|
|
|
|
while ((serv = IOIteratorNext(iter)) != 0)
|
|
{
|
|
CFDictionaryRef info;
|
|
CFIndex vendorID, productID, serialNumber;
|
|
CFNumberRef vendorIDRef, productIDRef, serialNumberRef;
|
|
Boolean success;
|
|
|
|
info = IODisplayCreateInfoDictionary(serv,
|
|
kIODisplayOnlyPreferredName);
|
|
|
|
vendorIDRef = (CFNumberRef)CFDictionaryGetValue(info,
|
|
CFSTR(kDisplayVendorID));
|
|
productIDRef = (CFNumberRef)CFDictionaryGetValue(info,
|
|
CFSTR(kDisplayProductID));
|
|
serialNumberRef = (CFNumberRef)CFDictionaryGetValue(info,
|
|
CFSTR(kDisplaySerialNumber));
|
|
|
|
success = CFNumberGetValue(vendorIDRef, kCFNumberCFIndexType,
|
|
&vendorID);
|
|
success &= CFNumberGetValue(productIDRef, kCFNumberCFIndexType,
|
|
&productID);
|
|
success &= CFNumberGetValue(serialNumberRef, kCFNumberCFIndexType,
|
|
&serialNumber);
|
|
|
|
if (!success)
|
|
{
|
|
CFRelease(info);
|
|
continue;
|
|
}
|
|
// If the vendor and product id along with the serial don't match
|
|
// then we are not looking at the correct monitor.
|
|
// NOTE: The serial number is important in cases where two monitors
|
|
// are the exact same.
|
|
if (CGDisplayVendorNumber(displayID) != vendorID ||
|
|
CGDisplayModelNumber(displayID) != productID ||
|
|
CGDisplaySerialNumber(displayID) != serialNumber)
|
|
{
|
|
CFRelease(info);
|
|
continue;
|
|
}
|
|
|
|
// The VendorID, Product ID, and the Serial Number all Match Up!
|
|
// Therefore we have found the appropriate display io_service
|
|
servicePort = serv;
|
|
CFRelease(info);
|
|
break;
|
|
}
|
|
|
|
IOObjectRelease(iter);
|
|
return servicePort;
|
|
}
|
|
#endif// __APPLE__
|