From b37cf4cd9a56ab312af22ebcee5b422f779ff2ba Mon Sep 17 00:00:00 2001
From: Charlie Fenton <charlief@example.com>
Date: Wed, 16 Jul 2014 04:33:26 -0700
Subject: [PATCH] client: begin adding support for OpenCL devices other than
 AMD/ATI, NVIDIA or Intel

---
 client/gpu_detect.cpp | 72 +++++++++++++++++++++++++++++++++++++++--
 client/gpu_detect.h   |  3 ++
 client/gpu_opencl.cpp | 74 +++++++++++++++++++++++++++++++++++++------
 lib/coproc.cpp        | 27 ++++++++++++++--
 lib/coproc.h          | 14 ++++----
 5 files changed, 169 insertions(+), 21 deletions(-)
diff --git a/client/gpu_detect.cpp b/client/gpu_detect.cpp
index d604d9792a..bdb43a52ca 100644
--- a/client/gpu_detect.cpp
+++ b/client/gpu_detect.cpp
@@ -60,8 +60,12 @@ vector<COPROC_INTEL> intel_gpus;
 vector<OPENCL_DEVICE_PROP> ati_opencls;
 vector<OPENCL_DEVICE_PROP> nvidia_opencls;
 vector<OPENCL_DEVICE_PROP> intel_gpu_opencls;
+vector<OPENCL_DEVICE_PROP> other_opencls[MAX_RSC];
 vector<OPENCL_CPU_PROP> cpu_opencls;
 
+// Number of OpenCL coproc vendors other than AMD, NVIDIA or Intel
+int num_other_opencl_types;
+
 static char* client_path;
     // argv[0] from the command used to run client.
     // May be absolute or relative.
@@ -100,6 +104,8 @@ void COPROCS::get(
 
 
 void COPROCS::detect_gpus(std::vector<std::string> &warnings) {
+    num_other_opencl_types = 0;
+    
 #ifdef _WIN32
     try {
         nvidia.get(warnings);
@@ -163,7 +169,7 @@ void COPROCS::correlate_gpus(
     std::vector<std::string> &descs,
     IGNORE_GPU_INSTANCE &ignore_gpu_instance
 ) {
-    unsigned int i;
+    unsigned int i, j;
     char buf[256], buf2[256];
 
     nvidia.correlate(use_all, ignore_gpu_instance[PROC_TYPE_NVIDIA_GPU]);
@@ -251,6 +257,17 @@ void COPROCS::correlate_gpus(
         descs.push_back(string(buf));
     }
 
+    // Create descriptions for other OpenCL GPUs
+    //
+    for (j=0; j<num_other_opencl_types; j++) {
+        strcpy(coprocs[n_rsc++].type, proc_type_name_xml(PROC_TYPE_OTHER_COPROC));
+        snprintf(buf2, sizeof(buf2), "device type %c number", 'A'+j);
+        for (i=0; i<other_opencls[j].size(); i++) {
+            other_opencls[j][i].description(buf, sizeof(buf), buf2);
+            descs.push_back(string(buf));
+        }
+    }
+    
     // Create descriptions for OpenCL CPUs
     //
     for (i=0; i<cpu_opencls.size(); i++) {
@@ -264,8 +281,10 @@ void COPROCS::correlate_gpus(
     ati_opencls.clear();
     nvidia_opencls.clear();
     intel_gpu_opencls.clear();
+    for (j=0; j<MAX_RSC; j++) {
+        other_opencls[j].clear();
+    }
     cpu_opencls.clear();
-
 }
 
 // Some dual-GPU laptops (e.g., Macbook Pro) don't 
@@ -285,7 +304,7 @@ void COPROCS::set_path_to_client(char *path) {
 
 int COPROCS::write_coproc_info_file(vector<string> &warnings) {
     MIOFILE mf;
-    unsigned int i, temp;
+    unsigned int i, j, temp;
     FILE* f;
     
     f = boinc_fopen(COPROC_INFO_FILENAME, "wb");
@@ -316,6 +335,11 @@ int COPROCS::write_coproc_info_file(vector<string> &warnings) {
     for (i=0; i<intel_gpu_opencls.size(); ++i) {
         intel_gpu_opencls[i].write_xml(mf, "intel_gpu_opencl", true);
     }
+    for (j=0; j<num_other_opencl_types; j++) {
+        for (i=0; i<other_opencls[j].size(); i++) {
+            other_opencls[j][i].write_xml(mf, "other_opencl", true);
+        }
+    }
     for (i=0; i<cpu_opencls.size(); i++) {
         cpu_opencls[i].write_xml(mf);
     }
@@ -340,6 +364,7 @@ int COPROCS::read_coproc_info_file(vector<string> &warnings) {
     OPENCL_DEVICE_PROP ati_opencl;
     OPENCL_DEVICE_PROP nvidia_opencl;
     OPENCL_DEVICE_PROP intel_gpu_opencl;
+    OPENCL_DEVICE_PROP other_opencl;
     OPENCL_CPU_PROP cpu_opencl;
 
     ati_gpus.clear();
@@ -348,7 +373,11 @@ int COPROCS::read_coproc_info_file(vector<string> &warnings) {
     ati_opencls.clear();
     nvidia_opencls.clear();
     intel_gpu_opencls.clear();
+    for (int j=0; j<MAX_RSC; j++) {
+        other_opencls[j].clear();
+    }
     cpu_opencls.clear();
+    num_other_opencl_types = 0;
 
     f = boinc_fopen(COPROC_INFO_FILENAME, "r");
     if (!f) return ERR_FOPEN;
@@ -434,6 +463,39 @@ int COPROCS::read_coproc_info_file(vector<string> &warnings) {
             continue;
         }
 
+        if (xp.match_tag("other_opencl")) {
+            memset(&other_opencl, 0, sizeof(other_opencl));
+            retval = other_opencl.parse(xp, "/other_opencl");
+            if (retval) {
+                memset(&other_opencl, 0, sizeof(other_opencl));
+            } else {
+                other_opencl.is_used = COPROC_IGNORED;
+                
+                int vendor_index;
+                // Put all coprocessors from same vendor in same other_opencls vector
+                for (vendor_index=0; vendor_index<num_other_opencl_types; vendor_index++) {
+                    if (other_opencls[vendor_index].size() == 0) {
+                        continue;       // Should never happen
+                    }
+                    if (other_opencls[vendor_index][0].vendor_id == other_opencl.vendor_id) {
+                        break;  // This vector contains coproc(s) from same vendor
+                    }
+                }
+                
+                if (vendor_index >= MAX_RSC) {
+                    // Too many OpenCL device vendors found (should never happen here)
+                    continue;   // Discard this coprocessor's info
+                }
+
+                if (vendor_index >= num_other_opencl_types) {
+                    num_other_opencl_types = vendor_index + 1;
+                }
+                
+                other_opencls[vendor_index].push_back(other_opencl);
+            }
+            continue;
+        }
+
         if (xp.match_tag("opencl_cpu_prop")) {
             memset(&cpu_opencl, 0, sizeof(cpu_opencl));
             retval = cpu_opencl.parse(xp);
@@ -528,7 +590,11 @@ int COPROCS::launch_child_process_to_detect_gpus() {
         client_path,
         argc,
         argv, 
+#ifdef _DEBUG
+        1,
+#else
         0,
+#endif
         prog
     );
 
diff --git a/client/gpu_detect.h b/client/gpu_detect.h
index 366a1bc3da..ff554d10b4 100644
--- a/client/gpu_detect.h
+++ b/client/gpu_detect.h
@@ -21,5 +21,8 @@ extern vector<COPROC_INTEL> intel_gpus;
 extern vector<OPENCL_DEVICE_PROP> nvidia_opencls;
 extern vector<OPENCL_DEVICE_PROP> ati_opencls;
 extern vector<OPENCL_DEVICE_PROP> intel_gpu_opencls;
+extern vector<OPENCL_DEVICE_PROP> other_opencls[MAX_RSC];
 extern vector<OPENCL_CPU_PROP> cpu_opencls;
 
+extern int num_other_opencl_types;
+
diff --git a/client/gpu_opencl.cpp b/client/gpu_opencl.cpp
index 3902b3a75c..f660d9199a 100644
--- a/client/gpu_opencl.cpp
+++ b/client/gpu_opencl.cpp
@@ -17,6 +17,9 @@
 
 // Detection of GPUs using OpenCL
 
+// TODO: Eliminate this, or improve it
+#define TEST_OTHER_COPROC_LOGIC 0
+
 #ifdef _WIN32
 #include "boinc_win.h"
 #ifdef _MSC_VER
@@ -289,10 +292,11 @@ void COPROCS::get_opencl(
             cpu_opencls.push_back(c);
         }
 
-        //////////// GPUs //////////////
+        //////////// GPUs and Accelerators //////////////
         
         ciErrNum = (*__clGetDeviceIDs)(
-            platforms[platform_index], (CL_DEVICE_TYPE_GPU),
+            platforms[platform_index],
+            (CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR),
             MAX_COPROC_INSTANCES, devices, &num_devices
         );
 
@@ -367,6 +371,11 @@ void COPROCS::get_opencl(
             ciErrNum = get_opencl_info(prop, device_index, warnings);
             if (ciErrNum != CL_SUCCESS) continue;
 
+// TODO: Eliminate this, or improve it
+#if TEST_OTHER_COPROC_LOGIC
+            safe_strcpy(prop.vendor, "Some Other GPU");
+#endif
+
             prop.is_used = COPROC_UNUSED;
             prop.get_device_version_int();
 
@@ -432,7 +441,7 @@ void COPROCS::get_opencl(
             }
             
             //////////// AMD / ATI //////////////
-            if (is_AMD(prop.vendor)) {
+            else if (is_AMD(prop.vendor)) {
                 prop.opencl_device_index = device_index;
 
                 if (ati.have_cal) {
@@ -490,8 +499,7 @@ void COPROCS::get_opencl(
             }
 
             //////////// INTEL GPU //////////////
-            //
-            if (is_intel(prop.vendor)) {
+            else if (is_intel(prop.vendor)) {
                 prop.device_num = (int)(intel_gpu_opencls.size());
                 prop.opencl_device_index = device_index;
 
@@ -515,6 +523,43 @@ void COPROCS::get_opencl(
                 //
                 intel_gpus.push_back(c);
             }
+
+            //////////// OTHER GPU OR ACCELERTOR //////////////
+            else {
+                int vendor_index;
+                // Put all coprocessors from same vendor in same other_opencls vector
+                for (vendor_index=0; vendor_index<num_other_opencl_types; vendor_index++) {
+                    if (other_opencls[vendor_index].size() == 0) {
+                        continue;       // Should never happen
+                    }
+                    if (other_opencls[vendor_index][0].vendor_id == prop.vendor_id) {
+                        break;  // This vector contains coproc(s) from same vendor
+                    }
+                }
+                
+                if (vendor_index >= MAX_RSC) {
+                    warnings.push_back("Too many OpenCL device vendors found");
+                    continue;   // Discard this coprocessor's info
+                }
+
+                if (vendor_index >= num_other_opencl_types) {
+                    num_other_opencl_types = vendor_index + 1;
+                }
+                
+                prop.device_num = (int)(other_opencls[vendor_index].size());
+                prop.opencl_device_index = device_index;
+                prop.opencl_available_ram = prop.global_mem_size;
+
+                // TODO: this is a temporary place holder.  How do we want to 
+                // calculate / estimate peak_flops for future new coprocessors?
+                prop.peak_flops = 0;
+                if (prop.max_compute_units) {
+                    prop.peak_flops = prop.max_compute_units * 8 * prop.max_clock_frequency * 1e6;
+                }
+                if (prop.peak_flops <= 0) prop.peak_flops = 45e9;
+
+                other_opencls[vendor_index].push_back(prop);
+            }
         }
     }
 
@@ -532,10 +577,12 @@ void COPROCS::get_opencl(
 
     if ((nvidia_opencls.size() == 0) &&
         (ati_opencls.size() == 0) &&
-        (intel_gpu_opencls.size() == 0)
+        (intel_gpu_opencls.size() == 0) &&
+        (cpu_opencls.size() == 0) &&
+        (num_other_opencl_types == 0)
     ) {
         warnings.push_back(
-            "OpenCL library present but no OpenCL-capable GPUs found"
+            "OpenCL library present but no OpenCL-capable devices found"
         );
     }
 }
@@ -578,7 +625,16 @@ void COPROCS::correlate_opencl(
         safe_strcpy(intel_gpu.name, intel_gpu.opencl_prop.name);
     }
     
-// TODO: Add code to allow adding other GPU vendors
+    // TODO: implement cc_config ignore vectors for other (future) OpenCL coprocessors
+    std::vector<int> ignoreNone;
+    ignoreNone.clear();
+    for (int vendor_index=0; vendor_index<num_other_opencl_types; vendor_index++) {
+        if (other_opencls[vendor_index].size() > 0) {
+            // NOTE: coprocs[0] is reserved for CPU
+            coprocs[vendor_index+1].find_best_opencls(use_all, other_opencls[vendor_index], ignoreNone);
+            coprocs[vendor_index+1].available_ram = coprocs[vendor_index].opencl_prop.global_mem_size;
+        }
+    }
 }
 
 cl_int COPROCS::get_opencl_info(
@@ -839,7 +895,7 @@ void COPROC::find_best_opencls(
 ) {
     unsigned int i;
 
-    // identify the most capable ATI or NVIDIA OpenCL GPU
+    // identify the most capable OpenCL GPU of this "type" (vendor)
     //
     bool first = true;
     for (i=0; i<opencls.size(); i++) {
diff --git a/lib/coproc.cpp b/lib/coproc.cpp
index 42bfd8ee65..80cebc61d5 100644
--- a/lib/coproc.cpp
+++ b/lib/coproc.cpp
@@ -102,14 +102,23 @@ void PCI_INFO::write(MIOFILE& f) {
     );
 }
 
-void COPROC::write_xml(MIOFILE& f) {
+void COPROC::write_xml(MIOFILE& f, bool scheduler_rpc) {
     f.printf(
         "<coproc>\n"
         "   <type>%s</type>\n"
-        "   <count>%d</count>\n"
-        "</coproc>\n",
+        "   <count>%d</count>\n",
         type, count
     );
+    
+    if (scheduler_rpc) {
+        write_request(f);
+    }
+
+    if (have_opencl) {
+        opencl_prop.write_xml(f, "coproc_opencl");
+    }
+    
+    f.printf("</coproc>\n");
 }
 
 void COPROC::write_request(MIOFILE& f) {
@@ -243,6 +252,15 @@ void COPROCS::write_xml(MIOFILE& mf, bool scheduler_rpc) {
     if (intel_gpu.count) {
         intel_gpu.write_xml(mf, scheduler_rpc);
     }
+    
+    for (int i=1; i<n_rsc; i++) {
+       if (!strcmp("CUDA", coprocs[i].type)) continue;
+       if (!strcmp(GPU_TYPE_NVIDIA, coprocs[i].type)) continue;
+       if (!strcmp(GPU_TYPE_ATI, coprocs[i].type)) continue;
+       if (!strcmp(GPU_TYPE_INTEL, coprocs[i].type)) continue;
+       coprocs[i].write_xml(mf, scheduler_rpc);
+    }
+    
     mf.printf("    </coprocs>\n");
 #endif
 }
@@ -895,6 +913,7 @@ const char* proc_type_name_xml(int pt) {
     case PROC_TYPE_NVIDIA_GPU: return "NVIDIA";
     case PROC_TYPE_AMD_GPU: return "ATI";
     case PROC_TYPE_INTEL_GPU: return "intel_gpu";
+    case PROC_TYPE_OTHER_COPROC: return "OTHER_COPROC";
     }
     return "unknown";
 }
@@ -905,6 +924,7 @@ const char* proc_type_name(int pt) {
     case PROC_TYPE_NVIDIA_GPU: return "NVIDIA GPU";
     case PROC_TYPE_AMD_GPU: return "AMD/ATI GPU";
     case PROC_TYPE_INTEL_GPU: return "Intel GPU";
+    case PROC_TYPE_OTHER_COPROC: return "OTHER COPROC";
     }
     return "unknown";
 }
@@ -914,5 +934,6 @@ int coproc_type_name_to_num(const char* name) {
     if (!strcmp(name, "NVIDIA")) return PROC_TYPE_NVIDIA_GPU;
     if (!strcmp(name, "ATI")) return PROC_TYPE_AMD_GPU;
     if (!strcmp(name, "intel_gpu")) return PROC_TYPE_INTEL_GPU;
+    if (!strcmp(name, "OTHER_COPROC")) return PROC_TYPE_OTHER_COPROC;
     return 0;
 }
diff --git a/lib/coproc.h b/lib/coproc.h
index 792b99f452..b4e68795ba 100644
--- a/lib/coproc.h
+++ b/lib/coproc.h
@@ -88,11 +88,12 @@
 
 // arguments to proc_type_name() and proc_type_name_xml().
 //
-#define PROC_TYPE_CPU        0
-#define PROC_TYPE_NVIDIA_GPU 1
-#define PROC_TYPE_AMD_GPU    2
-#define PROC_TYPE_INTEL_GPU  3
-#define NPROC_TYPES          4
+#define PROC_TYPE_CPU           0
+#define PROC_TYPE_NVIDIA_GPU    1
+#define PROC_TYPE_AMD_GPU       2
+#define PROC_TYPE_INTEL_GPU     3
+#define PROC_TYPE_OTHER_COPROC  4
+#define NPROC_TYPES             5
 
 extern const char* proc_type_name(int);
     // user-readable name
@@ -104,6 +105,7 @@ extern int coproc_type_name_to_num(const char* name);
 #define GPU_TYPE_NVIDIA proc_type_name_xml(PROC_TYPE_NVIDIA_GPU)
 #define GPU_TYPE_ATI proc_type_name_xml(PROC_TYPE_AMD_GPU)
 #define GPU_TYPE_INTEL proc_type_name_xml(PROC_TYPE_INTEL_GPU)
+#define COPROC_TYPE_OTHER proc_type_name_xml(PROC_TYPE_OTHER_COPROC)
 
 // represents a requirement for a coproc.
 // This is a parsed version of the <coproc> elements in an <app_version>
@@ -184,7 +186,7 @@ struct COPROC {
     OPENCL_DEVICE_PROP opencl_prop;
 
 #ifndef _USING_FCGI_
-    void write_xml(MIOFILE&);
+    void write_xml(MIOFILE&, bool scheduler_rpc=false);
     void write_request(MIOFILE&);
 #endif
     int parse(XML_PARSER&);