From b9981d7a56255512aaf184f6e93c29643fece255 Mon Sep 17 00:00:00 2001 From: Rom Walton Date: Thu, 7 Jan 2016 01:01:51 -0500 Subject: [PATCH] client: Properly calculate the peak FLOPS of new AMD GPU(s) that only support OpenCL. Use AMD's vendor specific extension if it is available to calculate the total number of shaders and determine the peak FLOP rate from that. My new GPU I got for Christmas was only reporting 30% of its peak FLOP rate and does not support CAL. --- client/gpu_opencl.cpp | 40 ++++++++++++++++++++++++++++++++++++++++ lib/coproc.cpp | 8 ++++++++ lib/opencl_boinc.cpp | 24 ++++++++++++++++++++++++ lib/opencl_boinc.h | 13 +++++++++++++ 4 files changed, 85 insertions(+) diff --git a/client/gpu_opencl.cpp b/client/gpu_opencl.cpp index 3aeeaa2f49..113998d130 100644 --- a/client/gpu_opencl.cpp +++ b/client/gpu_opencl.cpp @@ -877,6 +877,46 @@ cl_int COPROCS::get_opencl_info( return ciErrNum; } + ciErrNum = (*__clGetDeviceInfo)(prop.device_id, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, sizeof(prop.amd_simd_per_compute_unit), &prop.amd_simd_per_compute_unit, NULL); + if (ciErrNum != CL_SUCCESS) { + snprintf(buf, sizeof(buf), + "clGetDeviceInfo failed to get CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD for device %d", + (int)device_index + ); + warnings.push_back(buf); + return ciErrNum; + } + + ciErrNum = (*__clGetDeviceInfo)(prop.device_id, CL_DEVICE_SIMD_WIDTH_AMD, sizeof(prop.amd_simd_width), &prop.amd_simd_width, NULL); + if (ciErrNum != CL_SUCCESS) { + snprintf(buf, sizeof(buf), + "clGetDeviceInfo failed to get CL_DEVICE_SIMD_WIDTH_AMD for device %d", + (int)device_index + ); + warnings.push_back(buf); + return ciErrNum; + } + + ciErrNum = (*__clGetDeviceInfo)(prop.device_id, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, sizeof(prop.amd_simd_instruction_width), &prop.amd_simd_instruction_width, NULL); + if (ciErrNum != CL_SUCCESS) { + snprintf(buf, sizeof(buf), + "clGetDeviceInfo failed to get CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD for device %d", + (int)device_index + ); + warnings.push_back(buf); + return ciErrNum; + } + + ciErrNum = (*__clGetDeviceInfo)(prop.device_id, CL_DEVICE_WAVEFRONT_WIDTH_AMD, sizeof(prop.amd_wavefront_width), &prop.amd_wavefront_width, NULL); + if (ciErrNum != CL_SUCCESS) { + snprintf(buf, sizeof(buf), + "clGetDeviceInfo failed to get CL_DEVICE_WAVEFRONT_WIDTH_AMD for device %d", + (int)device_index + ); + warnings.push_back(buf); + return ciErrNum; + } + } return CL_SUCCESS; diff --git a/lib/coproc.cpp b/lib/coproc.cpp index 239b4a4fd0..5e4605d035 100644 --- a/lib/coproc.cpp +++ b/lib/coproc.cpp @@ -773,6 +773,14 @@ void COPROC_ATI::set_peak_flops() { if (attribs.numberOfSIMD) { x = attribs.numberOfSIMD * attribs.wavefrontSize * 5 * attribs.engineClock * 1.e6; // clock is in MHz + } else if (opencl_prop.amd_simd_per_compute_unit) { + // OpenCL w/ cl_amd_device_attribute_query extension + // Per: https://en.wikipedia.org/wiki/List_of_AMD_graphics_processing_units + // + // Single precision performance is calculated as two times the number of shaders multiplied by the base core clock speed. + // + // clock is in MHz + x = opencl_prop.max_compute_units * opencl_prop.amd_simd_per_compute_unit * opencl_prop.amd_simd_width * 2 * (opencl_prop.max_clock_frequency * 1.e6); } else if (opencl_prop.max_compute_units) { // OpenCL gives us only: // - max_compute_units diff --git a/lib/opencl_boinc.cpp b/lib/opencl_boinc.cpp index 4efd3764cd..df60b404b2 100644 --- a/lib/opencl_boinc.cpp +++ b/lib/opencl_boinc.cpp @@ -60,6 +60,10 @@ void OPENCL_DEVICE_PROP::write_xml(MIOFILE& f, const char* tag, bool temp_file) " %llu\n" " %lu\n" " %lu\n" + " %lu\n" + " %lu\n" + " %lu\n" + " %lu\n" " %s\n" " %s\n" " %s\n", @@ -78,6 +82,10 @@ void OPENCL_DEVICE_PROP::write_xml(MIOFILE& f, const char* tag, bool temp_file) local_mem_size, (unsigned long)max_clock_frequency, (unsigned long)max_compute_units, + (unsigned long)amd_simd_per_compute_unit, + (unsigned long)amd_simd_width, + (unsigned long)amd_simd_instruction_width, + (unsigned long)amd_wavefront_width, opencl_platform_version, opencl_device_version, opencl_driver_version @@ -161,6 +169,22 @@ int OPENCL_DEVICE_PROP::parse(XML_PARSER& xp, const char* end_tag) { max_compute_units = n; continue; } + if (xp.parse_int("amd_simd_per_compute_unit", n)) { + amd_simd_per_compute_unit = n; + continue; + } + if (xp.parse_int("amd_simd_width", n)) { + amd_simd_width = n; + continue; + } + if (xp.parse_int("amd_simd_instruction_width", n)) { + amd_simd_instruction_width = n; + continue; + } + if (xp.parse_int("amd_wavefront_width", n)) { + amd_wavefront_width = n; + continue; + } if (xp.parse_str("opencl_platform_version", opencl_platform_version, sizeof(opencl_platform_version) diff --git a/lib/opencl_boinc.h b/lib/opencl_boinc.h index aaf490e83d..73430ecf2e 100644 --- a/lib/opencl_boinc.h +++ b/lib/opencl_boinc.h @@ -50,6 +50,19 @@ struct OPENCL_DEVICE_PROP { cl_ulong local_mem_size; cl_uint max_clock_frequency; // in MHz cl_uint max_compute_units; + + // + // cl_nv_device_attribute_query + // + + // + // cl_amd_device_attribute_query + // + cl_uint amd_simd_per_compute_unit; + cl_uint amd_simd_width; + cl_uint amd_simd_instruction_width; + cl_uint amd_wavefront_width; + char opencl_platform_version[64]; // Version of OpenCL supported // the device's platform char opencl_device_version[64]; // OpenCL version supported by device;