From e456d3c8318de8bd46c8452e3c62f60ac571a201 Mon Sep 17 00:00:00 2001 From: Charlie Fenton Date: Mon, 10 Oct 2011 11:50:00 +0000 Subject: [PATCH] code cleanup svn path=/trunk/boinc/; revision=24361 --- client/coproc_detect.cpp | 405 ++++++++++++--------------------------- lib/coproc.cpp | 27 ++- lib/coproc.h | 31 ++- 3 files changed, 167 insertions(+), 296 deletions(-) diff --git a/client/coproc_detect.cpp b/client/coproc_detect.cpp index 8f95197f72..9659b748dc 100644 --- a/client/coproc_detect.cpp +++ b/client/coproc_detect.cpp @@ -156,16 +156,16 @@ cl_int (*__clGetDeviceInfo)(cl_device_id /* device */, // If "loose", tolerate small diff // -int opencl_compare(COPROC& c1, COPROC& c2, bool loose) { - if (c1.opencl_prop.opencl_device_version_int > c2.opencl_prop.opencl_device_version_int) return 1; - if (c1.opencl_prop.opencl_device_version_int < c2.opencl_prop.opencl_device_version_int) return -1; +int opencl_compare(OPENCL_DEVICE_PROP& c1, OPENCL_DEVICE_PROP& c2, bool loose) { + if (c1.opencl_device_version_int > c2.opencl_device_version_int) return 1; + if (c1.opencl_device_version_int < c2.opencl_device_version_int) return -1; if (loose) { - if (c1.opencl_prop.global_mem_size > 1.4*c2.opencl_prop.global_mem_size) return 1; - if (c1.opencl_prop.global_mem_size < .7*c2.opencl_prop.global_mem_size) return -1; + if (c1.global_mem_size > 1.4*c2.global_mem_size) return 1; + if (c1.global_mem_size < .7*c2.global_mem_size) return -1; return 0; } - if (c1.opencl_prop.global_mem_size > c2.opencl_prop.global_mem_size) return 1; - if (c1.opencl_prop.global_mem_size < c2.opencl_prop.global_mem_size) return -1; + if (c1.global_mem_size > c2.global_mem_size) return 1; + if (c1.global_mem_size < c2.global_mem_size) return -1; if (c1.peak_flops > c2.peak_flops) return 1; if (c1.peak_flops < c2.peak_flops) return -1; return 0; @@ -175,9 +175,10 @@ int opencl_compare(COPROC& c1, COPROC& c2, bool loose) { // http://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/ and // http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/ -void COPROCS::get_opencl(bool use_all, +void COPROCS::get_opencl( + bool use_all, vector& descs, - vector&warnings, + vector& warnings, vector& ignore_nvidia_dev, vector& ignore_ati_dev ) { @@ -187,12 +188,12 @@ void COPROCS::get_opencl(bool use_all, cl_device_id devices[MAX_COPROC_INSTANCES]; char platform_version[64]; OPENCL_DEVICE_PROP prop; - vector nvidia_opencls; - vector ati_opencls; + vector nvidia_opencls; + vector ati_opencls; + COPROC_NVIDIA nvidia_temp; + COPROC_ATI ati_temp; unsigned int i; - int j; char buf[256]; - bool used; #ifdef _WIN32 opencl_lib = LoadLibrary("OpenCL.dll"); @@ -275,23 +276,34 @@ void COPROCS::get_opencl(bool use_all, ciErrNum = get_opencl_info(prop, device_index, warnings); if (ciErrNum != CL_SUCCESS) break; + prop.is_used = COPROC_UNUSED; prop.get_device_version_int(); if (strstr(prop.vendor, GPU_TYPE_NVIDIA)) { prop.device_num = (int)(nvidia_opencls.size()); COPROC_NVIDIA c; c.opencl_prop = prop; c.set_peak_flops(); - nvidia_opencls.push_back(c); + prop.peak_flops = c.peak_flops; + nvidia_opencls.push_back(prop); } if ((strstr(prop.vendor, GPU_TYPE_ATI)) || (strstr(prop.vendor, "AMD")) || (strstr(prop.vendor, "Advanced Micro Devices, Inc.")) ) { prop.device_num = (int)(ati_opencls.size()); + // Work around a bug in OpenCL which returns only + // 1/2 of total global RAM size. + // This bug applies only to ATI GPUs, not to NVIDIA + // Assume this will be fixed in openCL 1.2. + if ((!strstr("1.0", prop.opencl_platform_version)) || + (!strstr("1.1", prop.opencl_platform_version))) { + prop.global_mem_size *= 2; + } COPROC_ATI c; c.opencl_prop = prop; c.set_peak_flops(); - ati_opencls.push_back(c); + prop.peak_flops = c.peak_flops; + ati_opencls.push_back(prop); } } } @@ -300,200 +312,37 @@ void COPROCS::get_opencl(bool use_all, warnings.push_back("OpenCL library present but no OpenCL-capable GPUs found"); return; } - + if (nvidia.have_cuda) { // If CUDA already found the "best" NVIDIA GPU - for (i=0; i 0) { - is_best = true; - } - if (is_best) { - // fill in what info we have - nvidia.opencl_prop = nvidia_opencls[i].opencl_prop; - strcpy(nvidia.prop.name, nvidia_opencls[i].opencl_prop.name); - nvidia.prop.totalGlobalMem = nvidia_opencls[i].opencl_prop.global_mem_size; - nvidia.prop.clockRate = nvidia_opencls[i].opencl_prop.max_clock_frequency * 1000; - nvidia.device_num = nvidia_opencls[i].opencl_prop.device_num; - nvidia.have_opencl = true; - nvidia.set_peak_flops(); - } - } + find_best_opencls(use_all, nvidia, nvidia_opencls, ignore_nvidia_dev); + nvidia.prop.totalGlobalMem = nvidia.opencl_prop.global_mem_size; + nvidia.prop.clockRate = nvidia.opencl_prop.max_clock_frequency * 1000; + } - // see which other instances are equivalent, and set the count, - // device_nums, opencl_device_count and opencl_device_ids fields - // - nvidia.count = 0; - nvidia.opencl_device_count = 0; - for (i=0; i 0) { - is_best = true; - } - if (is_best) { - ati.opencl_prop = ati_opencls[i].opencl_prop; // fill in what info we have - strcpy(ati.name, ati_opencls[i].opencl_prop.name); - ati.attribs.localRAM = ati_opencls[i].opencl_prop.local_mem_size; - ati.attribs.engineClock = ati_opencls[i].opencl_prop.max_clock_frequency; - ati.device_num = ati_opencls[i].opencl_prop.device_num; - ati.set_peak_flops(); - ati.have_opencl = true; - } - } - - // see which other instances are equivalent, and set the count, - // device_nums, opencl_device_count and opencl_device_ids fields - // - ati.count = 0; - ati.opencl_device_count = 0; - for (i=0; i &opencls, + vector& ignore_dev +) { + unsigned int i; + + for (i=0; i &opencls, + vector& ignore_dev +) { + unsigned int i; + + // identify the most capable ATI or NVIDIA OpenCL GPU + // + bool first = true; + for (i=0; i 0) { + is_best = true; + } + if (is_best) { + // fill in what info we have + best.opencl_prop = opencls[i]; + best.device_num = opencls[i].device_num; + best.peak_flops = opencls[i].peak_flops; + best.have_opencl = true; + } + } + + // see which other instances are equivalent, and set the count, + // device_nums, opencl_device_count and opencl_device_ids fields + // + best.count = 0; + best.opencl_device_count = 0; + for (i=0; i 0) && (s[n] == ' ')) s[n] = '\0'; - sprintf(buf, "%s (driver version %s, device version %s, %.0fMB)", - name, opencl_driver_version, s, global_mem_size/MEGA + strlcpy(s1, opencl_device_version, sizeof(s1)); + n = strlen(s1) - 1; + if ((n > 0) && (s1[n] == ' ')) s1[n] = '\0'; + sprintf(s2, "%s (driver version %s, device version %s, %.0fMB)", + name, opencl_driver_version, s1, global_mem_size/MEGA ); + + switch(is_used) { + case COPROC_IGNORED: + sprintf(buf, "OpenCL: %s GPU %d (ignored by config): %s", type, device_num, s2); + break; + case COPROC_USED: + sprintf(buf, "OpenCL: %s GPU %d: %s", type, device_num, s2); + break; + case COPROC_UNUSED: + default: + sprintf(buf, "OpenCL: %s GPU %d (not used): %s", type, device_num, s2); + break; + } } void COPROCS::summary_string(char* buf, int len) { diff --git a/lib/coproc.h b/lib/coproc.h index 3ca07481c6..a33f6e94bf 100644 --- a/lib/coproc.h +++ b/lib/coproc.h @@ -85,6 +85,13 @@ #define GPU_TYPE_NVIDIA "NVIDIA" #define GPU_TYPE_ATI "ATI" +enum COPROC_USAGE { + COPROC_IGNORED, + COPROC_UNUSED, + COPROC_USED +}; + + // represents a requirement for a coproc. // This is a parsed version of the elements in an // (used in client only) @@ -122,13 +129,14 @@ struct OPENCL_DEVICE_PROP { int get_device_version_int(); // call this to encode char opencl_driver_version[32]; // For example: "CLH 1.0" int device_num; // temp used in scan process + double peak_flops; // temp used in scan process + COPROC_USAGE is_used; // temp used in scan process #ifndef _USING_FCGI_ void write_xml(MIOFILE&); #endif int parse(XML_PARSER&); - void description(char*); - +void description(char* buf, char* type); }; @@ -274,7 +282,6 @@ struct COPROC_NVIDIA : public COPROC { void get_available_ram(); void set_peak_flops(); bool check_running_graphics_app(); - bool matches(OPENCL_DEVICE_PROP& OpenCLprop); void fake(int driver_version, double ram, double avail_ram, int count); }; @@ -302,7 +309,6 @@ struct COPROC_ATI : public COPROC { void clear(); int parse(XML_PARSER&); void get_available_ram(); - bool matches(OPENCL_DEVICE_PROP& OpenCLprop); void set_peak_flops(); void fake(double ram, double avail_ram, int); }; @@ -315,13 +321,15 @@ struct COPROCS { void write_xml(MIOFILE& out, bool include_request); void get( - bool use_all, std::vector &descs, + bool use_all, + std::vector &descs, std::vector &warnings, std::vector& ignore_nvidia_dev, std::vector& ignore_ati_dev ); void get_opencl( - bool use_all, std::vector& descs, + bool use_all, + std::vector& descs, std::vector &warnings, std::vector& ignore_nvidia_dev, std::vector& ignore_ati_dev @@ -331,6 +339,17 @@ struct COPROCS { cl_uint device_index, std::vector& warnings ); + void merge_opencl_into_best( + COPROC &best, + std::vector &opencls, + std::vector& ignore_dev + ); + void find_best_opencls( + bool use_all, + COPROC &best, + std::vector &opencls, + std::vector& ignore_dev + ); int parse(XML_PARSER&); void summary_string(char* buf, int len);