mirror of https://github.com/BOINC/boinc.git
- client: compare OpenCL-only devices the same as other devices
- code cleanup svn path=/trunk/boinc/; revision=24354
This commit is contained in:
parent
279c3a2b37
commit
dd3b628748
|
@ -6983,3 +6983,14 @@ David 7 Oct 2011
|
|||
|
||||
sched/
|
||||
handle_request.cpp
|
||||
|
||||
David 7 Oct 2011
|
||||
- client: compare OpenCL-only devices the same as other devices
|
||||
- code cleanup
|
||||
|
||||
lib/
|
||||
coproc.cpp,h
|
||||
client/
|
||||
coproc_detect.cpp
|
||||
sched/
|
||||
sched_customize.cpp
|
||||
|
|
|
@ -156,39 +156,18 @@ cl_int (*__clGetDeviceInfo)(cl_device_id /* device */,
|
|||
|
||||
// If "loose", tolerate small diff
|
||||
//
|
||||
int nvidia_opencl_compare(OPENCL_DEVICE_PROP& opencl_prop, COPROC_NVIDIA& coproc_nvidia, bool loose) {
|
||||
double peak_flops1, peak_flops2;
|
||||
|
||||
peak_flops1 = coproc_nvidia.get_peak_flops(opencl_prop);
|
||||
peak_flops2 = coproc_nvidia.get_peak_flops(coproc_nvidia.opencl_prop);
|
||||
int opencl_compare(COPROC& c1, COPROC& c2, bool loose) {
|
||||
if (c1.opencl_prop.opencl_device_version_int > c2.opencl_prop.opencl_device_version_int) return 1;
|
||||
if (c1.opencl_prop.opencl_device_version_int < c2.opencl_prop.opencl_device_version_int) return -1;
|
||||
if (loose) {
|
||||
if (peak_flops1 > (peak_flops2 * 1.1)) return 1;
|
||||
if ((peak_flops1 * 1.1) < (peak_flops2)) return -1;
|
||||
if (c1.opencl_prop.global_mem_size > 1.4*c2.opencl_prop.global_mem_size) return 1;
|
||||
if (c1.opencl_prop.global_mem_size < .7*c2.opencl_prop.global_mem_size) return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (peak_flops1 > peak_flops2) return 1;
|
||||
if (peak_flops1 < peak_flops2) return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int ati_opencl_compare(OPENCL_DEVICE_PROP& opencl_prop, COPROC_ATI& coproc_ati, bool loose) {
|
||||
|
||||
double peak_flops1, peak_flops2;
|
||||
|
||||
peak_flops1 = coproc_ati.get_peak_flops(opencl_prop);
|
||||
peak_flops2 = coproc_ati.get_peak_flops(coproc_ati.opencl_prop);
|
||||
if (loose) {
|
||||
if (peak_flops1 > (peak_flops2 * 1.1)) return 1;
|
||||
if ((peak_flops1 * 1.1) < (peak_flops2)) return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (peak_flops1 > peak_flops2) return 1;
|
||||
if (peak_flops1 < peak_flops2) return -1;
|
||||
|
||||
if (c1.opencl_prop.global_mem_size > c2.opencl_prop.global_mem_size) return 1;
|
||||
if (c1.opencl_prop.global_mem_size < c2.opencl_prop.global_mem_size) return -1;
|
||||
if (c1.peak_flops > c2.peak_flops) return 1;
|
||||
if (c1.peak_flops < c2.peak_flops) return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -208,8 +187,8 @@ void COPROCS::get_opencl(bool use_all,
|
|||
cl_device_id devices[MAX_COPROC_INSTANCES];
|
||||
char platform_version[64];
|
||||
OPENCL_DEVICE_PROP prop;
|
||||
vector<OPENCL_DEVICE_PROP> nvidia_opencls;
|
||||
vector<OPENCL_DEVICE_PROP> ati_opencls;
|
||||
vector<COPROC_NVIDIA> nvidia_opencls;
|
||||
vector<COPROC_ATI> ati_opencls;
|
||||
unsigned int i;
|
||||
int j;
|
||||
char buf[256];
|
||||
|
@ -293,19 +272,24 @@ void COPROCS::get_opencl(bool use_all,
|
|||
//TODO: Should we store the platform(s) for each GPU found?
|
||||
//TODO: Must we check if multiple platforms found the same GPU and merge the records?
|
||||
|
||||
ciErrNum = get_opencl_info(prop, device_index, descs, warnings);
|
||||
ciErrNum = get_opencl_info(prop, device_index, warnings);
|
||||
if (ciErrNum != CL_SUCCESS) break;
|
||||
|
||||
prop.get_device_version_int();
|
||||
if (strstr(prop.vendor, GPU_TYPE_NVIDIA)) {
|
||||
prop.device_num = (int)(nvidia_opencls.size());
|
||||
nvidia_opencls.push_back(prop);
|
||||
COPROC_NVIDIA c;
|
||||
c.opencl_prop = prop;
|
||||
nvidia_opencls.push_back(c);
|
||||
}
|
||||
if ((strstr(prop.vendor, GPU_TYPE_ATI)) ||
|
||||
(strstr(prop.vendor, "AMD")) ||
|
||||
(strstr(prop.vendor, "Advanced Micro Devices, Inc."))
|
||||
) {
|
||||
prop.device_num = (int)(ati_opencls.size());
|
||||
ati_opencls.push_back(prop);
|
||||
COPROC_ATI c;
|
||||
c.opencl_prop = prop;
|
||||
ati_opencls.push_back(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -318,16 +302,19 @@ void COPROCS::get_opencl(bool use_all,
|
|||
if (nvidia.have_cuda) { // If CUDA already found the "best" NVIDIA GPU
|
||||
for (i=0; i<nvidia_opencls.size(); i++) {
|
||||
if (in_vector(nvidia_opencls[i].device_num, ignore_nvidia_dev)) continue;
|
||||
if (nvidia.matches(nvidia_opencls[i])) {
|
||||
nvidia.opencl_prop = nvidia_opencls[i];
|
||||
nvidia.opencl_device_ids[0] = nvidia_opencls[i].device_id;
|
||||
if (nvidia.matches(nvidia_opencls[i].opencl_prop)) {
|
||||
nvidia.opencl_prop = nvidia_opencls[i].opencl_prop;
|
||||
nvidia.opencl_device_ids[0] = nvidia_opencls[i].opencl_prop.device_id;
|
||||
nvidia.have_opencl = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
//TODO: This assumes OpenCL and NVIDIA return the same device with the same index
|
||||
|
||||
//TODO: This assumes OpenCL and NVIDIA return the same device
|
||||
// with the same index
|
||||
//
|
||||
for (i=0; i<(unsigned int)nvidia.count; ++i) {
|
||||
nvidia.opencl_device_ids[i] = nvidia_opencls[nvidia.device_nums[i]].device_id;
|
||||
nvidia.opencl_device_ids[i] = nvidia_opencls[nvidia.device_nums[i]].opencl_prop.device_id;
|
||||
}
|
||||
nvidia.opencl_device_count = nvidia.count;
|
||||
} else {
|
||||
|
@ -339,27 +326,30 @@ void COPROCS::get_opencl(bool use_all,
|
|||
if (log_flags.coproc_debug) {
|
||||
msg_printf(0, MSG_INFO,
|
||||
"[coproc_debug] COPROC_NVIDIA [no CUDA]: nvidia_opencls[%d].global_mem_size = %llu; nvidia_opencls[%d].local_mem_size = %llu",
|
||||
i, (unsigned long long)nvidia_opencls[i].global_mem_size,
|
||||
i, (unsigned long long)nvidia_opencls[i].local_mem_size);
|
||||
i, (unsigned long long)nvidia_opencls[i].opencl_prop.global_mem_size,
|
||||
i, (unsigned long long)nvidia_opencls[i].opencl_prop.local_mem_size
|
||||
);
|
||||
msg_printf(0, MSG_INFO,
|
||||
"[coproc_debug] COPROC_NVIDIA [no CUDA]: nvidia_opencls[%d].global_mem_size = %llu; nvidia_opencls[%d].local_mem_size = %llu",
|
||||
i, (unsigned long long)nvidia_opencls[i].global_mem_size,
|
||||
i, (unsigned long long)nvidia_opencls[i].local_mem_size);
|
||||
i, (unsigned long long)nvidia_opencls[i].opencl_prop.global_mem_size,
|
||||
i, (unsigned long long)nvidia_opencls[i].opencl_prop.local_mem_size
|
||||
);
|
||||
}
|
||||
if (in_vector(nvidia_opencls[i].device_num, ignore_nvidia_dev)) continue;
|
||||
if (in_vector(nvidia_opencls[i].opencl_prop.device_num, ignore_nvidia_dev)) continue;
|
||||
bool is_best = false;
|
||||
if (first) {
|
||||
is_best = true;
|
||||
first = false;
|
||||
} else if (nvidia_opencl_compare(nvidia_opencls[i], nvidia, false) > 0) {
|
||||
} else if (opencl_compare(nvidia_opencls[i], nvidia, false) > 0) {
|
||||
is_best = true;
|
||||
}
|
||||
if (is_best) {
|
||||
nvidia.opencl_prop = nvidia_opencls[i]; // fill in what info we have
|
||||
strcpy(nvidia.prop.name, nvidia_opencls[i].name);
|
||||
nvidia.prop.totalGlobalMem = nvidia_opencls[i].global_mem_size;
|
||||
nvidia.prop.clockRate = nvidia_opencls[i].max_clock_frequency * 1000;
|
||||
nvidia.device_num = nvidia_opencls[i].device_num;
|
||||
// fill in what info we have
|
||||
nvidia.opencl_prop = nvidia_opencls[i].opencl_prop;
|
||||
strcpy(nvidia.prop.name, nvidia_opencls[i].opencl_prop.name);
|
||||
nvidia.prop.totalGlobalMem = nvidia_opencls[i].opencl_prop.global_mem_size;
|
||||
nvidia.prop.clockRate = nvidia_opencls[i].opencl_prop.max_clock_frequency * 1000;
|
||||
nvidia.device_num = nvidia_opencls[i].opencl_prop.device_num;
|
||||
nvidia.have_opencl = true;
|
||||
nvidia.set_peak_flops();
|
||||
}
|
||||
|
@ -371,10 +361,10 @@ void COPROCS::get_opencl(bool use_all,
|
|||
nvidia.count = 0;
|
||||
nvidia.opencl_device_count = 0;
|
||||
for (i=0; i<nvidia_opencls.size(); i++) {
|
||||
if (!in_vector(nvidia_opencls[i].device_num, ignore_nvidia_dev)) {
|
||||
if (use_all || !nvidia_opencl_compare(nvidia_opencls[i], nvidia, true)) {
|
||||
nvidia.device_nums[nvidia.count++] = nvidia_opencls[i].device_num;
|
||||
nvidia.opencl_device_ids[nvidia.opencl_device_count++] = nvidia_opencls[i].device_id;
|
||||
if (!in_vector(nvidia_opencls[i].opencl_prop.device_num, ignore_nvidia_dev)) {
|
||||
if (use_all || !opencl_compare(nvidia_opencls[i], nvidia, true)) {
|
||||
nvidia.device_nums[nvidia.count++] = nvidia_opencls[i].opencl_prop.device_num;
|
||||
nvidia.opencl_device_ids[nvidia.opencl_device_count++] = nvidia_opencls[i].opencl_prop.device_id;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -383,13 +373,13 @@ void COPROCS::get_opencl(bool use_all,
|
|||
// Create descriptions for OpenCL ATI GPUs
|
||||
for (i=0; i<nvidia_opencls.size(); i++) {
|
||||
char buf2[256];
|
||||
opencl_description(nvidia_opencls[i], buf);
|
||||
opencl_description(nvidia_opencls[i].opencl_prop, buf);
|
||||
if (in_vector(nvidia_opencls[i].device_num, ignore_nvidia_dev)) {
|
||||
sprintf(buf2, "OpenCL: NVIDIA GPU %d (ignored by config): %s", nvidia_opencls[i].device_num, buf);
|
||||
} else {
|
||||
used = false;
|
||||
for (j=0; j<nvidia.opencl_device_count; j++) {
|
||||
if (nvidia_opencls[i].device_id == nvidia.opencl_device_ids[j]) {
|
||||
if (nvidia_opencls[i].opencl_prop.device_id == nvidia.opencl_device_ids[j]) {
|
||||
used = true;
|
||||
break;
|
||||
}
|
||||
|
@ -405,10 +395,10 @@ void COPROCS::get_opencl(bool use_all,
|
|||
|
||||
if (ati.have_cal) { // If CAL already found the "best" CAL GPU
|
||||
for (i=0; i<ati_opencls.size(); i++) {
|
||||
if (in_vector(ati_opencls[i].device_num, ignore_ati_dev)) continue;
|
||||
if (ati.matches(ati_opencls[i])) {
|
||||
ati.opencl_prop = ati_opencls[i];
|
||||
ati.opencl_device_ids[0] = ati_opencls[i].device_id;
|
||||
if (in_vector(ati_opencls[i].opencl_prop.device_num, ignore_ati_dev)) continue;
|
||||
if (ati.matches(ati_opencls[i].opencl_prop)) {
|
||||
ati.opencl_prop = ati_opencls[i].opencl_prop;
|
||||
ati.opencl_device_ids[0] = ati_opencls[i].opencl_prop.device_id;
|
||||
ati.have_opencl = true;
|
||||
|
||||
// Work around a bug in OpenCL which returns only
|
||||
|
@ -420,7 +410,7 @@ void COPROCS::get_opencl(bool use_all,
|
|||
}
|
||||
//TODO: This assumes OpenCL and CAL return the same device with the same index
|
||||
for (i=0; i<(unsigned int)ati.count; ++i) {
|
||||
ati.opencl_device_ids[i] = ati_opencls[ati.device_nums[i]].device_id;
|
||||
ati.opencl_device_ids[i] = ati_opencls[ati.device_nums[i]].opencl_prop.device_id;
|
||||
}
|
||||
ati.opencl_device_count = ati.count;
|
||||
} else {
|
||||
|
@ -432,35 +422,35 @@ void COPROCS::get_opencl(bool use_all,
|
|||
// 1/2 of total global RAM size.
|
||||
// This bug applies only to ATI GPUs, not to NVIDIA
|
||||
// Assume this will be fixed in openCL 1.2.
|
||||
if ((!strstr("1.0", ati_opencls[i].opencl_platform_version)) ||
|
||||
(!strstr("1.1", ati_opencls[i].opencl_platform_version))) {
|
||||
ati_opencls[i].global_mem_size *= 2;
|
||||
if ((!strstr("1.0", ati_opencls[i].opencl_prop.opencl_platform_version)) ||
|
||||
(!strstr("1.1", ati_opencls[i].opencl_prop.opencl_platform_version))) {
|
||||
ati_opencls[i].opencl_prop.global_mem_size *= 2;
|
||||
}
|
||||
|
||||
//TODO: Temporary code for testing
|
||||
if (log_flags.coproc_debug) {
|
||||
msg_printf(0, MSG_INFO,
|
||||
"[coproc_debug] COPROC_ATI [no CAL]: ati_opencls[%d].name = '%s'; ati_opencls[%d].device_id = %p, ati_opencls[%d].device_num = %d",
|
||||
i, ati_opencls[i].name, i, ati_opencls[i].device_id, i, ati_opencls[i].device_num);
|
||||
"[coproc_debug] COPROC_ATI [no CAL]: ati_opencls[%d].opencl_prop.name = '%s'; ati_opencls[%d].opencl_prop.device_id = %p, ati_opencls[%d].opencl_prop.device_num = %d",
|
||||
i, ati_opencls[i].opencl_prop.name, i, ati_opencls[i].opencl_prop.device_id, i, ati_opencls[i].opencl_prop.device_num);
|
||||
msg_printf(0, MSG_INFO,
|
||||
"[coproc_debug] COPROC_ATI [no CAL]: ati_opencls[%d].global_mem_size = %llu; ati_opencls[%d].local_mem_size = %llu",
|
||||
i, (unsigned long long)ati_opencls[i].global_mem_size,
|
||||
i, (unsigned long long)ati_opencls[i].local_mem_size);
|
||||
"[coproc_debug] COPROC_ATI [no CAL]: ati_opencls[%d].opencl_prop.global_mem_size = %llu; ati_opencls[%d].opencl_prop.local_mem_size = %llu",
|
||||
i, (unsigned long long)ati_opencls[i].opencl_prop.global_mem_size,
|
||||
i, (unsigned long long)ati_opencls[i].opencl_prop.local_mem_size);
|
||||
}
|
||||
if (in_vector(ati_opencls[i].device_num, ignore_ati_dev)) continue;
|
||||
if (in_vector(ati_opencls[i].opencl_prop.device_num, ignore_ati_dev)) continue;
|
||||
bool is_best = false;
|
||||
if (first) {
|
||||
is_best = true;
|
||||
first = false;
|
||||
} else if (ati_opencl_compare(ati_opencls[i], ati, false) > 0) {
|
||||
} else if (opencl_compare(ati_opencls[i], ati, false) > 0) {
|
||||
is_best = true;
|
||||
}
|
||||
if (is_best) {
|
||||
ati.opencl_prop = ati_opencls[i]; // fill in what info we have
|
||||
strcpy(ati.name, ati_opencls[i].name);
|
||||
ati.attribs.localRAM = ati_opencls[i].local_mem_size;
|
||||
ati.attribs.engineClock = ati_opencls[i].max_clock_frequency;
|
||||
ati.device_num = ati_opencls[i].device_num;
|
||||
ati.opencl_prop = ati_opencls[i].opencl_prop; // fill in what info we have
|
||||
strcpy(ati.name, ati_opencls[i].opencl_prop.name);
|
||||
ati.attribs.localRAM = ati_opencls[i].opencl_prop.local_mem_size;
|
||||
ati.attribs.engineClock = ati_opencls[i].opencl_prop.max_clock_frequency;
|
||||
ati.device_num = ati_opencls[i].opencl_prop.device_num;
|
||||
ati.set_peak_flops();
|
||||
ati.have_opencl = true;
|
||||
}
|
||||
|
@ -473,9 +463,9 @@ void COPROCS::get_opencl(bool use_all,
|
|||
ati.opencl_device_count = 0;
|
||||
for (i=0; i<ati_opencls.size(); i++) {
|
||||
if (!in_vector(ati_opencls[i].device_num, ignore_ati_dev)) {
|
||||
if (use_all || !ati_opencl_compare(ati_opencls[i], ati, true)) {
|
||||
if (use_all || !opencl_compare(ati_opencls[i], ati, true)) {
|
||||
ati.device_nums[ati.count++] = ati_opencls[i].device_num;
|
||||
ati.opencl_device_ids[ati.opencl_device_count++] = ati_opencls[i].device_id;
|
||||
ati.opencl_device_ids[ati.opencl_device_count++] = ati_opencls[i].opencl_prop.device_id;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -484,13 +474,13 @@ void COPROCS::get_opencl(bool use_all,
|
|||
// Create descriptions for OpenCL ATI GPUs
|
||||
for (i=0; i<(unsigned int)ati.opencl_device_count; i++) {
|
||||
char buf2[256];
|
||||
opencl_description(ati_opencls[i], buf);
|
||||
opencl_description(ati_opencls[i].opencl_prop, buf);
|
||||
if (in_vector(ati_opencls[i].device_num, ignore_ati_dev)) {
|
||||
sprintf(buf2, "OpenCL: ATI GPU %d (ignored by config): %s", ati_opencls[i].device_num, buf);
|
||||
} else {
|
||||
used = false;
|
||||
for (j=0; j<ati.opencl_device_count; j++) {
|
||||
if (ati_opencls[i].device_id == ati.opencl_device_ids[j]) {
|
||||
if (ati_opencls[i].opencl_prop.device_id == ati.opencl_device_ids[j]) {
|
||||
used = true;
|
||||
break;
|
||||
}
|
||||
|
@ -510,7 +500,6 @@ void COPROCS::get_opencl(bool use_all,
|
|||
cl_int COPROCS::get_opencl_info(
|
||||
OPENCL_DEVICE_PROP& prop,
|
||||
cl_uint device_index,
|
||||
vector<string>& descs,
|
||||
vector<string>&warnings
|
||||
) {
|
||||
cl_int ciErrNum;
|
||||
|
|
|
@ -171,6 +171,7 @@ int COPROC::parse_opencl(XML_PARSER& xp) {
|
|||
|
||||
while (!xp.get_tag()) {
|
||||
if (xp.match_tag("/coproc_opencl")) {
|
||||
opencl_prop.get_device_version_int();
|
||||
return 0;
|
||||
}
|
||||
if (xp.parse_str("name", opencl_prop.name, sizeof(opencl_prop.name))) continue;
|
||||
|
@ -244,6 +245,18 @@ int COPROC::parse_opencl(XML_PARSER& xp) {
|
|||
return ERR_XML_PARSE;
|
||||
}
|
||||
|
||||
int OPENCL_DEVICE_PROP::get_device_version_int() {
|
||||
int maj, min;
|
||||
int n = sscanf(
|
||||
opencl_device_version, "OpenCL %d.%d", &maj, &min
|
||||
);
|
||||
if (n != 2) {
|
||||
return ERR_NOT_FOUND;
|
||||
}
|
||||
opencl_device_version_int = 100*maj + min;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void COPROCS::opencl_description(OPENCL_DEVICE_PROP& prop, char* buf) {
|
||||
char s[256];
|
||||
int n;
|
||||
|
@ -515,15 +528,6 @@ int COPROC_NVIDIA::parse(XML_PARSER& xp) {
|
|||
return ERR_XML_PARSE;
|
||||
}
|
||||
|
||||
double COPROC_NVIDIA::get_peak_flops(OPENCL_DEVICE_PROP& prop) {
|
||||
double x=0;
|
||||
// OpenCL doesn't give us compute capability.
|
||||
// assume cores_per_proc is 8 and flops_per_clock is 2
|
||||
//
|
||||
x = prop.max_compute_units * 8 * 2 * prop.max_clock_frequency * 1e6;
|
||||
return x;
|
||||
}
|
||||
|
||||
void COPROC_NVIDIA::set_peak_flops() {
|
||||
double x=0;
|
||||
if (have_cuda) {
|
||||
|
@ -551,7 +555,7 @@ void COPROC_NVIDIA::set_peak_flops() {
|
|||
// OpenCL doesn't give us compute capability.
|
||||
// assume cores_per_proc is 8 and flops_per_clock is 2
|
||||
//
|
||||
x = get_peak_flops(opencl_prop);
|
||||
x = opencl_prop.max_compute_units * 8 * 2 * opencl_prop.max_clock_frequency * 1e6;
|
||||
}
|
||||
peak_flops = (x>0)?x:5e10;
|
||||
}
|
||||
|
@ -742,19 +746,6 @@ void COPROC_ATI::description(char* buf) {
|
|||
);
|
||||
}
|
||||
|
||||
double COPROC_ATI::get_peak_flops(OPENCL_DEVICE_PROP& prop) {
|
||||
double x = 0;
|
||||
// OpenCL gives us only:
|
||||
// - max_compute_units
|
||||
// (which I'll assume is the same as attribs.numberOfSIMD)
|
||||
// - max_clock_frequency (which I'll assume is the same as engineClock)
|
||||
// It doesn't give wavefrontSize, which can be 16/32/64.
|
||||
// So let's be conservative and use 16
|
||||
//
|
||||
x = prop.max_compute_units * 16 * 5 * prop.max_clock_frequency * 1e6;
|
||||
return x;
|
||||
}
|
||||
|
||||
void COPROC_ATI::set_peak_flops() {
|
||||
double x = 0;
|
||||
if (have_cal) {
|
||||
|
@ -768,7 +759,7 @@ void COPROC_ATI::set_peak_flops() {
|
|||
// It doesn't give wavefrontSize, which can be 16/32/64.
|
||||
// So let's be conservative and use 16
|
||||
//
|
||||
x = get_peak_flops(opencl_prop);
|
||||
x = opencl_prop.max_compute_units * 16 * 5 * opencl_prop.max_clock_frequency * 1e6;
|
||||
}
|
||||
peak_flops = (x>0)?x:5e10;
|
||||
}
|
||||
|
|
15
lib/coproc.h
15
lib/coproc.h
|
@ -114,11 +114,15 @@ struct OPENCL_DEVICE_PROP {
|
|||
cl_ulong local_mem_size;
|
||||
cl_uint max_clock_frequency; // in MHz
|
||||
cl_uint max_compute_units;
|
||||
char opencl_platform_version[64]; // Version of OpenCL platform for this device
|
||||
char opencl_platform_version[64]; // Version of OpenCL supported
|
||||
// the device's platform
|
||||
char opencl_device_version[64]; // OpenCL version supported by device;
|
||||
// example: "OpenCL 1.1 beta"
|
||||
int opencl_device_version_int; // same, encoded as e.g. 101
|
||||
int get_device_version_int(); // call this to encode
|
||||
char opencl_driver_version[32]; // For example: "CLH 1.0"
|
||||
int device_num; // temp used in scan process
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
@ -260,12 +264,11 @@ struct COPROC_NVIDIA : public COPROC {
|
|||
std::vector<std::string>&, std::vector<std::string>&,
|
||||
std::vector<int>& ignore_devs
|
||||
);
|
||||
void description(char*);
|
||||
void description(char*);
|
||||
void clear();
|
||||
int parse(XML_PARSER&);
|
||||
void get_available_ram();
|
||||
double get_peak_flops(OPENCL_DEVICE_PROP& prop);
|
||||
void set_peak_flops();
|
||||
void set_peak_flops();
|
||||
bool check_running_graphics_app();
|
||||
bool matches(OPENCL_DEVICE_PROP& OpenCLprop);
|
||||
void fake(int driver_version, double ram, double avail_ram, int count);
|
||||
|
@ -296,8 +299,7 @@ struct COPROC_ATI : public COPROC {
|
|||
int parse(XML_PARSER&);
|
||||
void get_available_ram();
|
||||
bool matches(OPENCL_DEVICE_PROP& OpenCLprop);
|
||||
double get_peak_flops(OPENCL_DEVICE_PROP& prop);
|
||||
void set_peak_flops();
|
||||
void set_peak_flops();
|
||||
void fake(double ram, double avail_ram, int);
|
||||
};
|
||||
|
||||
|
@ -323,7 +325,6 @@ struct COPROCS {
|
|||
cl_int get_opencl_info(
|
||||
OPENCL_DEVICE_PROP& prop,
|
||||
cl_uint device_index,
|
||||
std::vector<std::string>& descs,
|
||||
std::vector<std::string>& warnings
|
||||
);
|
||||
void opencl_description(OPENCL_DEVICE_PROP& prop, char* buf);
|
||||
|
|
|
@ -430,19 +430,7 @@ static inline bool opencl_check(
|
|||
double cpu_frac,
|
||||
double flops_scale
|
||||
) {
|
||||
int device_version, maj, min;
|
||||
int n = sscanf(
|
||||
cp.opencl_prop.opencl_device_version, "OpenCL %d.%d", &maj, &min
|
||||
);
|
||||
if (n != 2) {
|
||||
log_messages.printf(MSG_CRITICAL,
|
||||
"can't parse device version: %s\n",
|
||||
cp.opencl_prop.opencl_device_version
|
||||
);
|
||||
return false;
|
||||
}
|
||||
device_version = 100*maj + min;
|
||||
if (device_version < min_opencl_device_version) {
|
||||
if (cp.opencl_prop.opencl_device_version_int < min_opencl_device_version) {
|
||||
return false;
|
||||
}
|
||||
if (cp.opencl_prop.global_mem_size < min_global_mem_size) {
|
||||
|
|
Loading…
Reference in New Issue