client: begin adding support for OpenCL devices other than AMD/ATI, NVIDIA or Intel

2014-07-16 04:33:26 -07:00 · 2014-07-16 04:33:26 -07:00 · b37cf4cd9a
parent ac1b78b1fe
commit b37cf4cd9a
5 changed files with 169 additions and 21 deletions
--- a/client/gpu_detect.cpp
+++ b/client/gpu_detect.cpp
@ -60,8 +60,12 @@ vector<COPROC_INTEL> intel_gpus;
 vector<OPENCL_DEVICE_PROP> ati_opencls;
 vector<OPENCL_DEVICE_PROP> nvidia_opencls;
 vector<OPENCL_DEVICE_PROP> intel_gpu_opencls;
+vector<OPENCL_DEVICE_PROP> other_opencls[MAX_RSC];
 vector<OPENCL_CPU_PROP> cpu_opencls;

+// Number of OpenCL coproc vendors other than AMD, NVIDIA or Intel
+int num_other_opencl_types;
+
 static char* client_path;
    // argv[0] from the command used to run client.
    // May be absolute or relative.
@ -100,6 +104,8 @@ void COPROCS::get(


 void COPROCS::detect_gpus(std::vector<std::string> &warnings) {
+    num_other_opencl_types = 0;
+    
 #ifdef _WIN32
    try {
        nvidia.get(warnings);
@ -163,7 +169,7 @@ void COPROCS::correlate_gpus(
    std::vector<std::string> &descs,
    IGNORE_GPU_INSTANCE &ignore_gpu_instance
 ) {
-    unsigned int i;
+    unsigned int i, j;
    char buf[256], buf2[256];

    nvidia.correlate(use_all, ignore_gpu_instance[PROC_TYPE_NVIDIA_GPU]);
@ -251,6 +257,17 @@ void COPROCS::correlate_gpus(
        descs.push_back(string(buf));
    }

+    // Create descriptions for other OpenCL GPUs
+    //
+    for (j=0; j<num_other_opencl_types; j++) {
+        strcpy(coprocs[n_rsc++].type, proc_type_name_xml(PROC_TYPE_OTHER_COPROC));
+        snprintf(buf2, sizeof(buf2), "device type %c number", 'A'+j);
+        for (i=0; i<other_opencls[j].size(); i++) {
+            other_opencls[j][i].description(buf, sizeof(buf), buf2);
+            descs.push_back(string(buf));
+        }
+    }
+    
    // Create descriptions for OpenCL CPUs
    //
    for (i=0; i<cpu_opencls.size(); i++) {
@ -264,8 +281,10 @@ void COPROCS::correlate_gpus(
    ati_opencls.clear();
    nvidia_opencls.clear();
    intel_gpu_opencls.clear();
+    for (j=0; j<MAX_RSC; j++) {
+        other_opencls[j].clear();
+    }
    cpu_opencls.clear();
-
 }

 // Some dual-GPU laptops (e.g., Macbook Pro) don't 
@ -285,7 +304,7 @@ void COPROCS::set_path_to_client(char *path) {

 int COPROCS::write_coproc_info_file(vector<string> &warnings) {
    MIOFILE mf;
-    unsigned int i, temp;
+    unsigned int i, j, temp;
    FILE* f;
    
    f = boinc_fopen(COPROC_INFO_FILENAME, "wb");
@ -316,6 +335,11 @@ int COPROCS::write_coproc_info_file(vector<string> &warnings) {
    for (i=0; i<intel_gpu_opencls.size(); ++i) {
        intel_gpu_opencls[i].write_xml(mf, "intel_gpu_opencl", true);
    }
+    for (j=0; j<num_other_opencl_types; j++) {
+        for (i=0; i<other_opencls[j].size(); i++) {
+            other_opencls[j][i].write_xml(mf, "other_opencl", true);
+        }
+    }
    for (i=0; i<cpu_opencls.size(); i++) {
        cpu_opencls[i].write_xml(mf);
    }
@ -340,6 +364,7 @@ int COPROCS::read_coproc_info_file(vector<string> &warnings) {
    OPENCL_DEVICE_PROP ati_opencl;
    OPENCL_DEVICE_PROP nvidia_opencl;
    OPENCL_DEVICE_PROP intel_gpu_opencl;
+    OPENCL_DEVICE_PROP other_opencl;
    OPENCL_CPU_PROP cpu_opencl;

    ati_gpus.clear();
@ -348,7 +373,11 @@ int COPROCS::read_coproc_info_file(vector<string> &warnings) {
    ati_opencls.clear();
    nvidia_opencls.clear();
    intel_gpu_opencls.clear();
+    for (int j=0; j<MAX_RSC; j++) {
+        other_opencls[j].clear();
+    }
    cpu_opencls.clear();
+    num_other_opencl_types = 0;

    f = boinc_fopen(COPROC_INFO_FILENAME, "r");
    if (!f) return ERR_FOPEN;
@ -434,6 +463,39 @@ int COPROCS::read_coproc_info_file(vector<string> &warnings) {
            continue;
        }

+        if (xp.match_tag("other_opencl")) {
+            memset(&other_opencl, 0, sizeof(other_opencl));
+            retval = other_opencl.parse(xp, "/other_opencl");
+            if (retval) {
+                memset(&other_opencl, 0, sizeof(other_opencl));
+            } else {
+                other_opencl.is_used = COPROC_IGNORED;
+                
+                int vendor_index;
+                // Put all coprocessors from same vendor in same other_opencls vector
+                for (vendor_index=0; vendor_index<num_other_opencl_types; vendor_index++) {
+                    if (other_opencls[vendor_index].size() == 0) {
+                        continue;       // Should never happen
+                    }
+                    if (other_opencls[vendor_index][0].vendor_id == other_opencl.vendor_id) {
+                        break;  // This vector contains coproc(s) from same vendor
+                    }
+                }
+                
+                if (vendor_index >= MAX_RSC) {
+                    // Too many OpenCL device vendors found (should never happen here)
+                    continue;   // Discard this coprocessor's info
+                }
+
+                if (vendor_index >= num_other_opencl_types) {
+                    num_other_opencl_types = vendor_index + 1;
+                }
+                
+                other_opencls[vendor_index].push_back(other_opencl);
+            }
+            continue;
+        }
+
        if (xp.match_tag("opencl_cpu_prop")) {
            memset(&cpu_opencl, 0, sizeof(cpu_opencl));
            retval = cpu_opencl.parse(xp);
@ -528,7 +590,11 @@ int COPROCS::launch_child_process_to_detect_gpus() {
        client_path,
        argc,
        argv, 
+#ifdef _DEBUG
+        1,
+#else
        0,
+#endif
        prog
    );

--- a/client/gpu_detect.h
+++ b/client/gpu_detect.h
@ -21,5 +21,8 @@ extern vector<COPROC_INTEL> intel_gpus;
 extern vector<OPENCL_DEVICE_PROP> nvidia_opencls;
 extern vector<OPENCL_DEVICE_PROP> ati_opencls;
 extern vector<OPENCL_DEVICE_PROP> intel_gpu_opencls;
+extern vector<OPENCL_DEVICE_PROP> other_opencls[MAX_RSC];
 extern vector<OPENCL_CPU_PROP> cpu_opencls;

+extern int num_other_opencl_types;
+
--- a/client/gpu_opencl.cpp
+++ b/client/gpu_opencl.cpp
@ -17,6 +17,9 @@

 // Detection of GPUs using OpenCL

+// TODO: Eliminate this, or improve it
+#define TEST_OTHER_COPROC_LOGIC 0
+
 #ifdef _WIN32
 #include "boinc_win.h"
 #ifdef _MSC_VER
@ -289,10 +292,11 @@ void COPROCS::get_opencl(
            cpu_opencls.push_back(c);
        }

-        //////////// GPUs //////////////
+        //////////// GPUs and Accelerators //////////////
        
        ciErrNum = (*__clGetDeviceIDs)(
-            platforms[platform_index], (CL_DEVICE_TYPE_GPU),
+            platforms[platform_index],
+            (CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR),
            MAX_COPROC_INSTANCES, devices, &num_devices
        );

@ -367,6 +371,11 @@ void COPROCS::get_opencl(
            ciErrNum = get_opencl_info(prop, device_index, warnings);
            if (ciErrNum != CL_SUCCESS) continue;

+// TODO: Eliminate this, or improve it
+#if TEST_OTHER_COPROC_LOGIC
+            safe_strcpy(prop.vendor, "Some Other GPU");
+#endif
+
            prop.is_used = COPROC_UNUSED;
            prop.get_device_version_int();

@ -432,7 +441,7 @@ void COPROCS::get_opencl(
            }
            
            //////////// AMD / ATI //////////////
-            if (is_AMD(prop.vendor)) {
+            else if (is_AMD(prop.vendor)) {
                prop.opencl_device_index = device_index;

                if (ati.have_cal) {
@ -490,8 +499,7 @@ void COPROCS::get_opencl(
            }

            //////////// INTEL GPU //////////////
-            //
-            if (is_intel(prop.vendor)) {
+            else if (is_intel(prop.vendor)) {
                prop.device_num = (int)(intel_gpu_opencls.size());
                prop.opencl_device_index = device_index;

@ -515,6 +523,43 @@ void COPROCS::get_opencl(
                //
                intel_gpus.push_back(c);
            }
+
+            //////////// OTHER GPU OR ACCELERTOR //////////////
+            else {
+                int vendor_index;
+                // Put all coprocessors from same vendor in same other_opencls vector
+                for (vendor_index=0; vendor_index<num_other_opencl_types; vendor_index++) {
+                    if (other_opencls[vendor_index].size() == 0) {
+                        continue;       // Should never happen
+                    }
+                    if (other_opencls[vendor_index][0].vendor_id == prop.vendor_id) {
+                        break;  // This vector contains coproc(s) from same vendor
+                    }
+                }
+                
+                if (vendor_index >= MAX_RSC) {
+                    warnings.push_back("Too many OpenCL device vendors found");
+                    continue;   // Discard this coprocessor's info
+                }
+
+                if (vendor_index >= num_other_opencl_types) {
+                    num_other_opencl_types = vendor_index + 1;
+                }
+                
+                prop.device_num = (int)(other_opencls[vendor_index].size());
+                prop.opencl_device_index = device_index;
+                prop.opencl_available_ram = prop.global_mem_size;
+
+                // TODO: this is a temporary place holder.  How do we want to 
+                // calculate / estimate peak_flops for future new coprocessors?
+                prop.peak_flops = 0;
+                if (prop.max_compute_units) {
+                    prop.peak_flops = prop.max_compute_units * 8 * prop.max_clock_frequency * 1e6;
+                }
+                if (prop.peak_flops <= 0) prop.peak_flops = 45e9;
+
+                other_opencls[vendor_index].push_back(prop);
+            }
        }
    }

@ -532,10 +577,12 @@ void COPROCS::get_opencl(

    if ((nvidia_opencls.size() == 0) &&
        (ati_opencls.size() == 0) &&
-        (intel_gpu_opencls.size() == 0)
+        (intel_gpu_opencls.size() == 0) &&
+        (cpu_opencls.size() == 0) &&
+        (num_other_opencl_types == 0)
    ) {
        warnings.push_back(
-            "OpenCL library present but no OpenCL-capable GPUs found"
+            "OpenCL library present but no OpenCL-capable devices found"
        );
    }
 }
@ -578,7 +625,16 @@ void COPROCS::correlate_opencl(
        safe_strcpy(intel_gpu.name, intel_gpu.opencl_prop.name);
    }
    
-// TODO: Add code to allow adding other GPU vendors
+    // TODO: implement cc_config ignore vectors for other (future) OpenCL coprocessors
+    std::vector<int> ignoreNone;
+    ignoreNone.clear();
+    for (int vendor_index=0; vendor_index<num_other_opencl_types; vendor_index++) {
+        if (other_opencls[vendor_index].size() > 0) {
+            // NOTE: coprocs[0] is reserved for CPU
+            coprocs[vendor_index+1].find_best_opencls(use_all, other_opencls[vendor_index], ignoreNone);
+            coprocs[vendor_index+1].available_ram = coprocs[vendor_index].opencl_prop.global_mem_size;
+        }
+    }
 }

 cl_int COPROCS::get_opencl_info(
@ -839,7 +895,7 @@ void COPROC::find_best_opencls(
 ) {
    unsigned int i;

-    // identify the most capable ATI or NVIDIA OpenCL GPU
+    // identify the most capable OpenCL GPU of this "type" (vendor)
    //
    bool first = true;
    for (i=0; i<opencls.size(); i++) {
--- a/lib/coproc.cpp
+++ b/lib/coproc.cpp
@ -102,14 +102,23 @@ void PCI_INFO::write(MIOFILE& f) {
    );
 }

-void COPROC::write_xml(MIOFILE& f) {
+void COPROC::write_xml(MIOFILE& f, bool scheduler_rpc) {
    f.printf(
        "<coproc>\n"
        "   <type>%s</type>\n"
-        "   <count>%d</count>\n"
-        "</coproc>\n",
+        "   <count>%d</count>\n",
        type, count
    );
+    
+    if (scheduler_rpc) {
+        write_request(f);
+    }
+
+    if (have_opencl) {
+        opencl_prop.write_xml(f, "coproc_opencl");
+    }
+    
+    f.printf("</coproc>\n");
 }

 void COPROC::write_request(MIOFILE& f) {
@ -243,6 +252,15 @@ void COPROCS::write_xml(MIOFILE& mf, bool scheduler_rpc) {
    if (intel_gpu.count) {
        intel_gpu.write_xml(mf, scheduler_rpc);
    }
+    
+    for (int i=1; i<n_rsc; i++) {
+       if (!strcmp("CUDA", coprocs[i].type)) continue;
+       if (!strcmp(GPU_TYPE_NVIDIA, coprocs[i].type)) continue;
+       if (!strcmp(GPU_TYPE_ATI, coprocs[i].type)) continue;
+       if (!strcmp(GPU_TYPE_INTEL, coprocs[i].type)) continue;
+       coprocs[i].write_xml(mf, scheduler_rpc);
+    }
+    
    mf.printf("    </coprocs>\n");
 #endif
 }
@ -895,6 +913,7 @@ const char* proc_type_name_xml(int pt) {
    case PROC_TYPE_NVIDIA_GPU: return "NVIDIA";
    case PROC_TYPE_AMD_GPU: return "ATI";
    case PROC_TYPE_INTEL_GPU: return "intel_gpu";
+    case PROC_TYPE_OTHER_COPROC: return "OTHER_COPROC";
    }
    return "unknown";
 }
@ -905,6 +924,7 @@ const char* proc_type_name(int pt) {
    case PROC_TYPE_NVIDIA_GPU: return "NVIDIA GPU";
    case PROC_TYPE_AMD_GPU: return "AMD/ATI GPU";
    case PROC_TYPE_INTEL_GPU: return "Intel GPU";
+    case PROC_TYPE_OTHER_COPROC: return "OTHER COPROC";
    }
    return "unknown";
 }
@ -914,5 +934,6 @@ int coproc_type_name_to_num(const char* name) {
    if (!strcmp(name, "NVIDIA")) return PROC_TYPE_NVIDIA_GPU;
    if (!strcmp(name, "ATI")) return PROC_TYPE_AMD_GPU;
    if (!strcmp(name, "intel_gpu")) return PROC_TYPE_INTEL_GPU;
+    if (!strcmp(name, "OTHER_COPROC")) return PROC_TYPE_OTHER_COPROC;
    return 0;
 }
--- a/lib/coproc.h
+++ b/lib/coproc.h
@ -88,11 +88,12 @@

 // arguments to proc_type_name() and proc_type_name_xml().
 //
-#define PROC_TYPE_CPU        0
-#define PROC_TYPE_NVIDIA_GPU 1
-#define PROC_TYPE_AMD_GPU    2
-#define PROC_TYPE_INTEL_GPU  3
-#define NPROC_TYPES          4
+#define PROC_TYPE_CPU           0
+#define PROC_TYPE_NVIDIA_GPU    1
+#define PROC_TYPE_AMD_GPU       2
+#define PROC_TYPE_INTEL_GPU     3
+#define PROC_TYPE_OTHER_COPROC  4
+#define NPROC_TYPES             5

 extern const char* proc_type_name(int);
    // user-readable name
@ -104,6 +105,7 @@ extern int coproc_type_name_to_num(const char* name);
 #define GPU_TYPE_NVIDIA proc_type_name_xml(PROC_TYPE_NVIDIA_GPU)
 #define GPU_TYPE_ATI proc_type_name_xml(PROC_TYPE_AMD_GPU)
 #define GPU_TYPE_INTEL proc_type_name_xml(PROC_TYPE_INTEL_GPU)
+#define COPROC_TYPE_OTHER proc_type_name_xml(PROC_TYPE_OTHER_COPROC)

 // represents a requirement for a coproc.
 // This is a parsed version of the <coproc> elements in an <app_version>
@ -184,7 +186,7 @@ struct COPROC {
    OPENCL_DEVICE_PROP opencl_prop;

 #ifndef _USING_FCGI_
-    void write_xml(MIOFILE&);
+    void write_xml(MIOFILE&, bool scheduler_rpc=false);
    void write_request(MIOFILE&);
 #endif
    int parse(XML_PARSER&);