- client: compare OpenCL-only devices the same as other devices

- code cleanup


svn path=/trunk/boinc/; revision=24354
This commit is contained in:
David Anderson 2011-10-08 06:33:39 +00:00
parent 279c3a2b37
commit dd3b628748
5 changed files with 106 additions and 126 deletions

View File

@ -6983,3 +6983,14 @@ David 7 Oct 2011
sched/
handle_request.cpp
David 7 Oct 2011
- client: compare OpenCL-only devices the same as other devices
- code cleanup
lib/
coproc.cpp,h
client/
coproc_detect.cpp
sched/
sched_customize.cpp

View File

@ -156,39 +156,18 @@ cl_int (*__clGetDeviceInfo)(cl_device_id /* device */,
// If "loose", tolerate small diff
//
int nvidia_opencl_compare(OPENCL_DEVICE_PROP& opencl_prop, COPROC_NVIDIA& coproc_nvidia, bool loose) {
double peak_flops1, peak_flops2;
peak_flops1 = coproc_nvidia.get_peak_flops(opencl_prop);
peak_flops2 = coproc_nvidia.get_peak_flops(coproc_nvidia.opencl_prop);
int opencl_compare(COPROC& c1, COPROC& c2, bool loose) {
if (c1.opencl_prop.opencl_device_version_int > c2.opencl_prop.opencl_device_version_int) return 1;
if (c1.opencl_prop.opencl_device_version_int < c2.opencl_prop.opencl_device_version_int) return -1;
if (loose) {
if (peak_flops1 > (peak_flops2 * 1.1)) return 1;
if ((peak_flops1 * 1.1) < (peak_flops2)) return -1;
if (c1.opencl_prop.global_mem_size > 1.4*c2.opencl_prop.global_mem_size) return 1;
if (c1.opencl_prop.global_mem_size < .7*c2.opencl_prop.global_mem_size) return -1;
return 0;
}
if (peak_flops1 > peak_flops2) return 1;
if (peak_flops1 < peak_flops2) return -1;
return 0;
}
int ati_opencl_compare(OPENCL_DEVICE_PROP& opencl_prop, COPROC_ATI& coproc_ati, bool loose) {
double peak_flops1, peak_flops2;
peak_flops1 = coproc_ati.get_peak_flops(opencl_prop);
peak_flops2 = coproc_ati.get_peak_flops(coproc_ati.opencl_prop);
if (loose) {
if (peak_flops1 > (peak_flops2 * 1.1)) return 1;
if ((peak_flops1 * 1.1) < (peak_flops2)) return -1;
return 0;
}
if (peak_flops1 > peak_flops2) return 1;
if (peak_flops1 < peak_flops2) return -1;
if (c1.opencl_prop.global_mem_size > c2.opencl_prop.global_mem_size) return 1;
if (c1.opencl_prop.global_mem_size < c2.opencl_prop.global_mem_size) return -1;
if (c1.peak_flops > c2.peak_flops) return 1;
if (c1.peak_flops < c2.peak_flops) return -1;
return 0;
}
@ -208,8 +187,8 @@ void COPROCS::get_opencl(bool use_all,
cl_device_id devices[MAX_COPROC_INSTANCES];
char platform_version[64];
OPENCL_DEVICE_PROP prop;
vector<OPENCL_DEVICE_PROP> nvidia_opencls;
vector<OPENCL_DEVICE_PROP> ati_opencls;
vector<COPROC_NVIDIA> nvidia_opencls;
vector<COPROC_ATI> ati_opencls;
unsigned int i;
int j;
char buf[256];
@ -293,19 +272,24 @@ void COPROCS::get_opencl(bool use_all,
//TODO: Should we store the platform(s) for each GPU found?
//TODO: Must we check if multiple platforms found the same GPU and merge the records?
ciErrNum = get_opencl_info(prop, device_index, descs, warnings);
ciErrNum = get_opencl_info(prop, device_index, warnings);
if (ciErrNum != CL_SUCCESS) break;
prop.get_device_version_int();
if (strstr(prop.vendor, GPU_TYPE_NVIDIA)) {
prop.device_num = (int)(nvidia_opencls.size());
nvidia_opencls.push_back(prop);
COPROC_NVIDIA c;
c.opencl_prop = prop;
nvidia_opencls.push_back(c);
}
if ((strstr(prop.vendor, GPU_TYPE_ATI)) ||
(strstr(prop.vendor, "AMD")) ||
(strstr(prop.vendor, "Advanced Micro Devices, Inc."))
) {
prop.device_num = (int)(ati_opencls.size());
ati_opencls.push_back(prop);
COPROC_ATI c;
c.opencl_prop = prop;
ati_opencls.push_back(c);
}
}
}
@ -318,16 +302,19 @@ void COPROCS::get_opencl(bool use_all,
if (nvidia.have_cuda) { // If CUDA already found the "best" NVIDIA GPU
for (i=0; i<nvidia_opencls.size(); i++) {
if (in_vector(nvidia_opencls[i].device_num, ignore_nvidia_dev)) continue;
if (nvidia.matches(nvidia_opencls[i])) {
nvidia.opencl_prop = nvidia_opencls[i];
nvidia.opencl_device_ids[0] = nvidia_opencls[i].device_id;
if (nvidia.matches(nvidia_opencls[i].opencl_prop)) {
nvidia.opencl_prop = nvidia_opencls[i].opencl_prop;
nvidia.opencl_device_ids[0] = nvidia_opencls[i].opencl_prop.device_id;
nvidia.have_opencl = true;
break;
}
}
//TODO: This assumes OpenCL and NVIDIA return the same device with the same index
//TODO: This assumes OpenCL and NVIDIA return the same device
// with the same index
//
for (i=0; i<(unsigned int)nvidia.count; ++i) {
nvidia.opencl_device_ids[i] = nvidia_opencls[nvidia.device_nums[i]].device_id;
nvidia.opencl_device_ids[i] = nvidia_opencls[nvidia.device_nums[i]].opencl_prop.device_id;
}
nvidia.opencl_device_count = nvidia.count;
} else {
@ -339,27 +326,30 @@ void COPROCS::get_opencl(bool use_all,
if (log_flags.coproc_debug) {
msg_printf(0, MSG_INFO,
"[coproc_debug] COPROC_NVIDIA [no CUDA]: nvidia_opencls[%d].global_mem_size = %llu; nvidia_opencls[%d].local_mem_size = %llu",
i, (unsigned long long)nvidia_opencls[i].global_mem_size,
i, (unsigned long long)nvidia_opencls[i].local_mem_size);
i, (unsigned long long)nvidia_opencls[i].opencl_prop.global_mem_size,
i, (unsigned long long)nvidia_opencls[i].opencl_prop.local_mem_size
);
msg_printf(0, MSG_INFO,
"[coproc_debug] COPROC_NVIDIA [no CUDA]: nvidia_opencls[%d].global_mem_size = %llu; nvidia_opencls[%d].local_mem_size = %llu",
i, (unsigned long long)nvidia_opencls[i].global_mem_size,
i, (unsigned long long)nvidia_opencls[i].local_mem_size);
i, (unsigned long long)nvidia_opencls[i].opencl_prop.global_mem_size,
i, (unsigned long long)nvidia_opencls[i].opencl_prop.local_mem_size
);
}
if (in_vector(nvidia_opencls[i].device_num, ignore_nvidia_dev)) continue;
if (in_vector(nvidia_opencls[i].opencl_prop.device_num, ignore_nvidia_dev)) continue;
bool is_best = false;
if (first) {
is_best = true;
first = false;
} else if (nvidia_opencl_compare(nvidia_opencls[i], nvidia, false) > 0) {
} else if (opencl_compare(nvidia_opencls[i], nvidia, false) > 0) {
is_best = true;
}
if (is_best) {
nvidia.opencl_prop = nvidia_opencls[i]; // fill in what info we have
strcpy(nvidia.prop.name, nvidia_opencls[i].name);
nvidia.prop.totalGlobalMem = nvidia_opencls[i].global_mem_size;
nvidia.prop.clockRate = nvidia_opencls[i].max_clock_frequency * 1000;
nvidia.device_num = nvidia_opencls[i].device_num;
// fill in what info we have
nvidia.opencl_prop = nvidia_opencls[i].opencl_prop;
strcpy(nvidia.prop.name, nvidia_opencls[i].opencl_prop.name);
nvidia.prop.totalGlobalMem = nvidia_opencls[i].opencl_prop.global_mem_size;
nvidia.prop.clockRate = nvidia_opencls[i].opencl_prop.max_clock_frequency * 1000;
nvidia.device_num = nvidia_opencls[i].opencl_prop.device_num;
nvidia.have_opencl = true;
nvidia.set_peak_flops();
}
@ -371,10 +361,10 @@ void COPROCS::get_opencl(bool use_all,
nvidia.count = 0;
nvidia.opencl_device_count = 0;
for (i=0; i<nvidia_opencls.size(); i++) {
if (!in_vector(nvidia_opencls[i].device_num, ignore_nvidia_dev)) {
if (use_all || !nvidia_opencl_compare(nvidia_opencls[i], nvidia, true)) {
nvidia.device_nums[nvidia.count++] = nvidia_opencls[i].device_num;
nvidia.opencl_device_ids[nvidia.opencl_device_count++] = nvidia_opencls[i].device_id;
if (!in_vector(nvidia_opencls[i].opencl_prop.device_num, ignore_nvidia_dev)) {
if (use_all || !opencl_compare(nvidia_opencls[i], nvidia, true)) {
nvidia.device_nums[nvidia.count++] = nvidia_opencls[i].opencl_prop.device_num;
nvidia.opencl_device_ids[nvidia.opencl_device_count++] = nvidia_opencls[i].opencl_prop.device_id;
}
}
}
@ -383,13 +373,13 @@ void COPROCS::get_opencl(bool use_all,
// Create descriptions for OpenCL ATI GPUs
for (i=0; i<nvidia_opencls.size(); i++) {
char buf2[256];
opencl_description(nvidia_opencls[i], buf);
opencl_description(nvidia_opencls[i].opencl_prop, buf);
if (in_vector(nvidia_opencls[i].device_num, ignore_nvidia_dev)) {
sprintf(buf2, "OpenCL: NVIDIA GPU %d (ignored by config): %s", nvidia_opencls[i].device_num, buf);
} else {
used = false;
for (j=0; j<nvidia.opencl_device_count; j++) {
if (nvidia_opencls[i].device_id == nvidia.opencl_device_ids[j]) {
if (nvidia_opencls[i].opencl_prop.device_id == nvidia.opencl_device_ids[j]) {
used = true;
break;
}
@ -405,10 +395,10 @@ void COPROCS::get_opencl(bool use_all,
if (ati.have_cal) { // If CAL already found the "best" CAL GPU
for (i=0; i<ati_opencls.size(); i++) {
if (in_vector(ati_opencls[i].device_num, ignore_ati_dev)) continue;
if (ati.matches(ati_opencls[i])) {
ati.opencl_prop = ati_opencls[i];
ati.opencl_device_ids[0] = ati_opencls[i].device_id;
if (in_vector(ati_opencls[i].opencl_prop.device_num, ignore_ati_dev)) continue;
if (ati.matches(ati_opencls[i].opencl_prop)) {
ati.opencl_prop = ati_opencls[i].opencl_prop;
ati.opencl_device_ids[0] = ati_opencls[i].opencl_prop.device_id;
ati.have_opencl = true;
// Work around a bug in OpenCL which returns only
@ -420,7 +410,7 @@ void COPROCS::get_opencl(bool use_all,
}
//TODO: This assumes OpenCL and CAL return the same device with the same index
for (i=0; i<(unsigned int)ati.count; ++i) {
ati.opencl_device_ids[i] = ati_opencls[ati.device_nums[i]].device_id;
ati.opencl_device_ids[i] = ati_opencls[ati.device_nums[i]].opencl_prop.device_id;
}
ati.opencl_device_count = ati.count;
} else {
@ -432,35 +422,35 @@ void COPROCS::get_opencl(bool use_all,
// 1/2 of total global RAM size.
// This bug applies only to ATI GPUs, not to NVIDIA
// Assume this will be fixed in openCL 1.2.
if ((!strstr("1.0", ati_opencls[i].opencl_platform_version)) ||
(!strstr("1.1", ati_opencls[i].opencl_platform_version))) {
ati_opencls[i].global_mem_size *= 2;
if ((!strstr("1.0", ati_opencls[i].opencl_prop.opencl_platform_version)) ||
(!strstr("1.1", ati_opencls[i].opencl_prop.opencl_platform_version))) {
ati_opencls[i].opencl_prop.global_mem_size *= 2;
}
//TODO: Temporary code for testing
if (log_flags.coproc_debug) {
msg_printf(0, MSG_INFO,
"[coproc_debug] COPROC_ATI [no CAL]: ati_opencls[%d].name = '%s'; ati_opencls[%d].device_id = %p, ati_opencls[%d].device_num = %d",
i, ati_opencls[i].name, i, ati_opencls[i].device_id, i, ati_opencls[i].device_num);
"[coproc_debug] COPROC_ATI [no CAL]: ati_opencls[%d].opencl_prop.name = '%s'; ati_opencls[%d].opencl_prop.device_id = %p, ati_opencls[%d].opencl_prop.device_num = %d",
i, ati_opencls[i].opencl_prop.name, i, ati_opencls[i].opencl_prop.device_id, i, ati_opencls[i].opencl_prop.device_num);
msg_printf(0, MSG_INFO,
"[coproc_debug] COPROC_ATI [no CAL]: ati_opencls[%d].global_mem_size = %llu; ati_opencls[%d].local_mem_size = %llu",
i, (unsigned long long)ati_opencls[i].global_mem_size,
i, (unsigned long long)ati_opencls[i].local_mem_size);
"[coproc_debug] COPROC_ATI [no CAL]: ati_opencls[%d].opencl_prop.global_mem_size = %llu; ati_opencls[%d].opencl_prop.local_mem_size = %llu",
i, (unsigned long long)ati_opencls[i].opencl_prop.global_mem_size,
i, (unsigned long long)ati_opencls[i].opencl_prop.local_mem_size);
}
if (in_vector(ati_opencls[i].device_num, ignore_ati_dev)) continue;
if (in_vector(ati_opencls[i].opencl_prop.device_num, ignore_ati_dev)) continue;
bool is_best = false;
if (first) {
is_best = true;
first = false;
} else if (ati_opencl_compare(ati_opencls[i], ati, false) > 0) {
} else if (opencl_compare(ati_opencls[i], ati, false) > 0) {
is_best = true;
}
if (is_best) {
ati.opencl_prop = ati_opencls[i]; // fill in what info we have
strcpy(ati.name, ati_opencls[i].name);
ati.attribs.localRAM = ati_opencls[i].local_mem_size;
ati.attribs.engineClock = ati_opencls[i].max_clock_frequency;
ati.device_num = ati_opencls[i].device_num;
ati.opencl_prop = ati_opencls[i].opencl_prop; // fill in what info we have
strcpy(ati.name, ati_opencls[i].opencl_prop.name);
ati.attribs.localRAM = ati_opencls[i].opencl_prop.local_mem_size;
ati.attribs.engineClock = ati_opencls[i].opencl_prop.max_clock_frequency;
ati.device_num = ati_opencls[i].opencl_prop.device_num;
ati.set_peak_flops();
ati.have_opencl = true;
}
@ -473,9 +463,9 @@ void COPROCS::get_opencl(bool use_all,
ati.opencl_device_count = 0;
for (i=0; i<ati_opencls.size(); i++) {
if (!in_vector(ati_opencls[i].device_num, ignore_ati_dev)) {
if (use_all || !ati_opencl_compare(ati_opencls[i], ati, true)) {
if (use_all || !opencl_compare(ati_opencls[i], ati, true)) {
ati.device_nums[ati.count++] = ati_opencls[i].device_num;
ati.opencl_device_ids[ati.opencl_device_count++] = ati_opencls[i].device_id;
ati.opencl_device_ids[ati.opencl_device_count++] = ati_opencls[i].opencl_prop.device_id;
}
}
}
@ -484,13 +474,13 @@ void COPROCS::get_opencl(bool use_all,
// Create descriptions for OpenCL ATI GPUs
for (i=0; i<(unsigned int)ati.opencl_device_count; i++) {
char buf2[256];
opencl_description(ati_opencls[i], buf);
opencl_description(ati_opencls[i].opencl_prop, buf);
if (in_vector(ati_opencls[i].device_num, ignore_ati_dev)) {
sprintf(buf2, "OpenCL: ATI GPU %d (ignored by config): %s", ati_opencls[i].device_num, buf);
} else {
used = false;
for (j=0; j<ati.opencl_device_count; j++) {
if (ati_opencls[i].device_id == ati.opencl_device_ids[j]) {
if (ati_opencls[i].opencl_prop.device_id == ati.opencl_device_ids[j]) {
used = true;
break;
}
@ -510,7 +500,6 @@ void COPROCS::get_opencl(bool use_all,
cl_int COPROCS::get_opencl_info(
OPENCL_DEVICE_PROP& prop,
cl_uint device_index,
vector<string>& descs,
vector<string>&warnings
) {
cl_int ciErrNum;

View File

@ -171,6 +171,7 @@ int COPROC::parse_opencl(XML_PARSER& xp) {
while (!xp.get_tag()) {
if (xp.match_tag("/coproc_opencl")) {
opencl_prop.get_device_version_int();
return 0;
}
if (xp.parse_str("name", opencl_prop.name, sizeof(opencl_prop.name))) continue;
@ -244,6 +245,18 @@ int COPROC::parse_opencl(XML_PARSER& xp) {
return ERR_XML_PARSE;
}
int OPENCL_DEVICE_PROP::get_device_version_int() {
int maj, min;
int n = sscanf(
opencl_device_version, "OpenCL %d.%d", &maj, &min
);
if (n != 2) {
return ERR_NOT_FOUND;
}
opencl_device_version_int = 100*maj + min;
return 0;
}
void COPROCS::opencl_description(OPENCL_DEVICE_PROP& prop, char* buf) {
char s[256];
int n;
@ -515,15 +528,6 @@ int COPROC_NVIDIA::parse(XML_PARSER& xp) {
return ERR_XML_PARSE;
}
double COPROC_NVIDIA::get_peak_flops(OPENCL_DEVICE_PROP& prop) {
double x=0;
// OpenCL doesn't give us compute capability.
// assume cores_per_proc is 8 and flops_per_clock is 2
//
x = prop.max_compute_units * 8 * 2 * prop.max_clock_frequency * 1e6;
return x;
}
void COPROC_NVIDIA::set_peak_flops() {
double x=0;
if (have_cuda) {
@ -551,7 +555,7 @@ void COPROC_NVIDIA::set_peak_flops() {
// OpenCL doesn't give us compute capability.
// assume cores_per_proc is 8 and flops_per_clock is 2
//
x = get_peak_flops(opencl_prop);
x = opencl_prop.max_compute_units * 8 * 2 * opencl_prop.max_clock_frequency * 1e6;
}
peak_flops = (x>0)?x:5e10;
}
@ -742,19 +746,6 @@ void COPROC_ATI::description(char* buf) {
);
}
double COPROC_ATI::get_peak_flops(OPENCL_DEVICE_PROP& prop) {
double x = 0;
// OpenCL gives us only:
// - max_compute_units
// (which I'll assume is the same as attribs.numberOfSIMD)
// - max_clock_frequency (which I'll assume is the same as engineClock)
// It doesn't give wavefrontSize, which can be 16/32/64.
// So let's be conservative and use 16
//
x = prop.max_compute_units * 16 * 5 * prop.max_clock_frequency * 1e6;
return x;
}
void COPROC_ATI::set_peak_flops() {
double x = 0;
if (have_cal) {
@ -768,7 +759,7 @@ void COPROC_ATI::set_peak_flops() {
// It doesn't give wavefrontSize, which can be 16/32/64.
// So let's be conservative and use 16
//
x = get_peak_flops(opencl_prop);
x = opencl_prop.max_compute_units * 16 * 5 * opencl_prop.max_clock_frequency * 1e6;
}
peak_flops = (x>0)?x:5e10;
}

View File

@ -114,11 +114,15 @@ struct OPENCL_DEVICE_PROP {
cl_ulong local_mem_size;
cl_uint max_clock_frequency; // in MHz
cl_uint max_compute_units;
char opencl_platform_version[64]; // Version of OpenCL platform for this device
char opencl_platform_version[64]; // Version of OpenCL supported
// the device's platform
char opencl_device_version[64]; // OpenCL version supported by device;
// example: "OpenCL 1.1 beta"
int opencl_device_version_int; // same, encoded as e.g. 101
int get_device_version_int(); // call this to encode
char opencl_driver_version[32]; // For example: "CLH 1.0"
int device_num; // temp used in scan process
};
@ -260,12 +264,11 @@ struct COPROC_NVIDIA : public COPROC {
std::vector<std::string>&, std::vector<std::string>&,
std::vector<int>& ignore_devs
);
void description(char*);
void description(char*);
void clear();
int parse(XML_PARSER&);
void get_available_ram();
double get_peak_flops(OPENCL_DEVICE_PROP& prop);
void set_peak_flops();
void set_peak_flops();
bool check_running_graphics_app();
bool matches(OPENCL_DEVICE_PROP& OpenCLprop);
void fake(int driver_version, double ram, double avail_ram, int count);
@ -296,8 +299,7 @@ struct COPROC_ATI : public COPROC {
int parse(XML_PARSER&);
void get_available_ram();
bool matches(OPENCL_DEVICE_PROP& OpenCLprop);
double get_peak_flops(OPENCL_DEVICE_PROP& prop);
void set_peak_flops();
void set_peak_flops();
void fake(double ram, double avail_ram, int);
};
@ -323,7 +325,6 @@ struct COPROCS {
cl_int get_opencl_info(
OPENCL_DEVICE_PROP& prop,
cl_uint device_index,
std::vector<std::string>& descs,
std::vector<std::string>& warnings
);
void opencl_description(OPENCL_DEVICE_PROP& prop, char* buf);

View File

@ -430,19 +430,7 @@ static inline bool opencl_check(
double cpu_frac,
double flops_scale
) {
int device_version, maj, min;
int n = sscanf(
cp.opencl_prop.opencl_device_version, "OpenCL %d.%d", &maj, &min
);
if (n != 2) {
log_messages.printf(MSG_CRITICAL,
"can't parse device version: %s\n",
cp.opencl_prop.opencl_device_version
);
return false;
}
device_version = 100*maj + min;
if (device_version < min_opencl_device_version) {
if (cp.opencl_prop.opencl_device_version_int < min_opencl_device_version) {
return false;
}
if (cp.opencl_prop.global_mem_size < min_global_mem_size) {