From 2039e67638980e38331cbfca0a30cbbf16150e23 Mon Sep 17 00:00:00 2001
From: David Anderson <davea@ssl.berkeley.edu>
Date: Fri, 28 Aug 2009 22:55:04 +0000
Subject: [PATCH] - client: NVIDIA offers an API which tells you whether a GPU 
    is running a graphics application.     Change the semantics of the "don't
 use GPU while computer in use" pref     to "don't use a GPU that's running a
 graphics app while     computer is in use".     This will increase GPU
 utilization on multi-GPU systems.

svn path=/trunk/boinc/; revision=18942
---
 checkin_notes           |  16 +++++
 client/client_state.cpp |   7 +++
 client/cpu_sched.cpp    |  34 +++++++++-
 lib/coproc.cpp          | 136 +++++++++++++++++++++++++---------------
 lib/coproc.h            |   8 +++
 sched/file_deleter.cpp  |   1 -
 6 files changed, 148 insertions(+), 54 deletions(-)
diff --git a/checkin_notes b/checkin_notes
index 7a37020cbd..888b4e0455 100644
--- a/checkin_notes
+++ b/checkin_notes
@@ -7293,3 +7293,19 @@ David  28 Aug 2009
 
     client/
         work_fetch.cpp
+
+David  28 Aug 2009
+    - client: NVIDIA offers an API which tells you whether a GPU
+        is running a graphics application.
+        Change the semantics of the "don't use GPU while computer in use" pref
+        to "don't use a GPU that's running a graphics app while
+        computer is in use".
+        This will increase GPU utilization on multi-GPU systems.
+
+    client/
+        client_state.cpp
+        cpu_sched.cpp
+    sched/
+        file_deleter.cpp
+    lib/
+        coproc.cpp,h
diff --git a/client/client_state.cpp b/client/client_state.cpp
index 451ee53d05..78bda4d0c4 100644
--- a/client/client_state.cpp
+++ b/client/client_state.cpp
@@ -559,6 +559,13 @@ bool CLIENT_STATE::poll_slow_events() {
     if (user_active != old_user_active) {
         request_schedule_cpus("Idle state change");
     }
+
+    if (coproc_cuda && user_active && !global_prefs.run_gpu_if_user_active) {
+        if (coproc_cuda->check_running_graphics_app()) {
+            request_schedule_cpus("GPU state change");
+        }
+    }
+
 #ifdef __APPLE__
     // Mac screensaver launches client if not already running.
     // OS X quits screensaver when energy saver puts display to sleep,
diff --git a/client/cpu_sched.cpp b/client/cpu_sched.cpp
index 602ed83120..ff61097f81 100644
--- a/client/cpu_sched.cpp
+++ b/client/cpu_sched.cpp
@@ -88,7 +88,12 @@ struct PROC_RESOURCES {
     bool can_schedule(RESULT* rp) {
         if (rp->uses_coprocs()) {
             if (gstate.user_active && !gstate.global_prefs.run_gpu_if_user_active) {
-                return false;
+                if (rp->avp->natis) {
+                    return false;
+                }
+                // if it's NVIDIA, defer deciding because
+                // some GPUs may not be running user apps
+                //
             }
             if (sufficient_coprocs(
                 *rp->avp, log_flags.cpu_sched_debug, "cpu_sched_debug")
@@ -984,6 +989,33 @@ static inline void assign_coprocs(vector<RESULT*> jobs) {
             }
         }
     }
+
+    // enforce user pref in NVIDIA case
+    //
+    if (coproc_cuda && gstate.user_active && !gstate.global_prefs.run_gpu_if_user_active) {
+        job_iter = jobs.begin();
+        while (job_iter != jobs.end()) {
+            RESULT* rp = *job_iter;
+            if (!rp->avp->ncudas) {
+                job_iter++;
+                continue;
+            }
+            ACTIVE_TASK* atp = gstate.lookup_active_task_by_result(rp);
+            bool some_gpu_busy = false;
+            for (i=0; i<rp->avp->ncudas; i++) {
+                int dev = atp->coproc_indices[i];
+                if (coproc_cuda->running_graphics_app[dev]) {
+                    some_gpu_busy = true;
+                    break;
+                }
+            }
+            if (some_gpu_busy) {
+                job_iter = jobs.erase(job_iter);
+            } else {
+                job_iter++;
+            }
+        }
+    }
 }
 
 // Enforce the CPU schedule.
diff --git a/lib/coproc.cpp b/lib/coproc.cpp
index 2128ffc58e..4deb17dc4e 100644
--- a/lib/coproc.cpp
+++ b/lib/coproc.cpp
@@ -194,6 +194,42 @@ int cuda_compare(COPROC_CUDA& c1, COPROC_CUDA& c2, bool loose) {
     return 0;
 }
 
+#ifdef _WIN32
+typedef int (__stdcall *PCGDC)(int *count);
+typedef int (__stdcall *PCGDP)(struct cudaDeviceProp *prop, int device);
+typedef int (__stdcall *PCGDV)(int* version);
+typedef int (__stdcall *PCGDI)(int);
+typedef int (__stdcall *PCGDG)(int*, int);
+typedef int (__stdcall *PCGDA)(int*, int, int);
+typedef int (__stdcall *PCGDN)(char*, int, int);
+typedef int (__stdcall *PCGDM)(unsigned int*, int);
+typedef int (__stdcall *PCGDCC)(int*, int*, int);
+
+PCGDC __cuDeviceGetCount = NULL;
+//PCGDP __cuDeviceGetProperties = NULL;
+PCGDV __cuDriverGetVersion = NULL;
+PCGDI __cuInit = NULL;
+PCGDG __cuDeviceGet = NULL;
+PCGDA __cuDeviceGetAttribute = NULL;
+PCGDN __cuDeviceGetName = NULL;
+PCGDM __cuDeviceTotalMem = NULL;
+PCGDCC __cuDeviceComputeCapability = NULL;
+#else 
+void* cudalib;
+int (*__cuInit)(int);
+int (*__cuDeviceGetCount)(int*);
+//int (*__cuDeviceGetProperties)(cudaDeviceProp*, int);
+int (*__cuDriverGetVersion)(int*);
+int (*__cuDeviceGet)(int*, int);
+int (*__cuDeviceGetAttribute)(int*, int, int);
+int (*__cuDeviceGetName)(char*, int, int);
+int (*__cuDeviceTotalMem)(unsigned int*, int);
+int (*__cuDeviceComputeCapability)(int*, int*, int);
+#endif
+
+// NVIDIA interfaces are documented here:
+// http://developer.download.nvidia.com/compute/cuda/2_3/toolkit/docs/online/index.html
+
 void COPROC_CUDA::get(
     COPROCS& coprocs, vector<string>& strings,
     bool use_all    // if false, use only those equivalent to most capable
@@ -202,33 +238,11 @@ void COPROC_CUDA::get(
     char buf[256];
 
 #ifdef _WIN32
-
-    typedef int (__stdcall *PCGDC)(int *count);
-    typedef int (__stdcall *PCGDP)(struct cudaDeviceProp *prop, int device);
-    typedef int (__stdcall *PCGDV)(int* version);
-    typedef int (__stdcall *PCGDI)(int);
-    typedef int (__stdcall *PCGDG)(int*, int);
-    typedef int (__stdcall *PCGDA)(int*, int, int);
-    typedef int (__stdcall *PCGDN)(char*, int, int);
-    typedef int (__stdcall *PCGDM)(unsigned int*, int);
-    typedef int (__stdcall *PCGDCC)(int*, int*, int);
-
-    PCGDC __cuDeviceGetCount = NULL;
-    //PCGDP __cuDeviceGetProperties = NULL;
-    PCGDV __cuDriverGetVersion = NULL;
-    PCGDI __cuInit = NULL;
-    PCGDG __cuDeviceGet = NULL;
-    PCGDA __cuDeviceGetAttribute = NULL;
-    PCGDN __cuDeviceGetName = NULL;
-    PCGDM __cuDeviceTotalMem = NULL;
-    PCGDCC __cuDeviceComputeCapability = NULL;
-
     HMODULE cudalib = LoadLibrary("nvcuda.dll");
     if (!cudalib) {
         strings.push_back("Can't load library nvcuda.dll");
         return;
     }
-
     __cuDeviceGetCount = (PCGDC)GetProcAddress(cudalib, "cuDeviceGetCount");
     //__cuDeviceGetProperties = (PCGDP)GetProcAddress(cudalib, "cuDeviceGetProperties");
     __cuDriverGetVersion = (PCGDV)GetProcAddress(cudalib, "cuDriverGetVersion" );
@@ -255,16 +269,6 @@ void COPROC_CUDA::get(
     }
 #endif
 #else
-    void* cudalib;
-    int (*__cuInit)(int);
-    int (*__cuDeviceGetCount)(int*);
-    //int (*__cuDeviceGetProperties)(cudaDeviceProp*, int);
-    int (*__cuDriverGetVersion)(int*);
-    int (*__cuDeviceGet)(int*, int);
-    int (*__cuDeviceGetAttribute)(int*, int, int);
-    int (*__cuDeviceGetName)(char*, int, int);
-    int (*__cuDeviceTotalMem)(unsigned int*, int);
-    int (*__cuDeviceComputeCapability)(int*, int*, int);
 
 #ifdef __APPLE__
     cudalib = dlopen("/usr/local/cuda/lib/libcuda.dylib", RTLD_NOW);
@@ -613,6 +617,54 @@ int COPROC_CUDA::parse(FILE* fin) {
     return ERR_XML_PARSE;
 }
 
+// check whether each GPU is running a graphics app (assume yes)
+// return true if there's been a change since last time
+//
+bool COPROC_CUDA::check_running_graphics_app() {
+    int retval, j;
+    bool change = false;
+    for (j=0; j<count; j++) {
+        bool new_val = true;
+        int device, kernel_timeout;
+        retval = (*__cuDeviceGet)(&device, j);
+        if (!retval) {
+            retval = (*__cuDeviceGetAttribute)(&kernel_timeout, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, device);
+            if (!retval && !kernel_timeout) {
+                new_val = false;
+            }
+        }
+        if (new_val != running_graphics_app[j]) {
+            change = true;
+        }
+        running_graphics_app[j] = new_val;
+    }
+}
+
+////////////////// ATI STARTS HERE /////////////////
+
+#ifdef _WIN32
+typedef int (__stdcall *PCGDC)(CALuint *numDevices);
+typedef int (__stdcall *ATTRIBS) (CALdeviceattribs *attribs, CALuint ordinal);
+typedef int (__stdcall *INFO) (CALdeviceinfo *info, CALuint ordinal);
+typedef int (__stdcall *VER) (CALuint *cal_major, CALuint *cal_minor, CALuint *cal_imp);
+typedef int (__stdcall *PCGDI)(void);
+typedef int (__stdcall *CLOSE)(void);
+
+PCGDI	__calInit = NULL;
+VER		__calGetVersion = NULL;
+PCGDC	__calDeviceGetCount = NULL;
+ATTRIBS __calDeviceGetAttribs = NULL;
+INFO    __calDeviceGetInfo = NULL;
+CLOSE	__calShutdown = NULL;
+#else
+int (*__calInit)();
+int (*__calGetVersion)(CALuint*, CALuint*, CALuint*);
+int (*__calDeviceGetCount)(CALuint*);
+int (*__calDeviceGetAttribs)(CALdeviceattribs*, CALuint);
+int (*__calDeviceGetInfo)(CALdeviceinfo*, CALuint);
+int (*__calShutdown)();
+#endif
+
 void COPROC_ATI::get(COPROCS& coprocs, vector<string>& strings) {
     CALuint numDevices, cal_major, cal_minor, cal_imp;
     CALdevice device;
@@ -626,20 +678,6 @@ void COPROC_ATI::get(COPROCS& coprocs, vector<string>& strings) {
     numDevices =0;
 
 #ifdef _WIN32
-    typedef int (__stdcall *PCGDC)(CALuint *numDevices);
-    typedef int (__stdcall *ATTRIBS) (CALdeviceattribs *attribs, CALuint ordinal);
-    typedef int (__stdcall *INFO) (CALdeviceinfo *info, CALuint ordinal);
-    typedef int (__stdcall *VER) (CALuint *cal_major, CALuint *cal_minor, CALuint *cal_imp);
-    typedef int (__stdcall *PCGDI)(void);
-    typedef int (__stdcall *CLOSE)(void);
-
-    PCGDI	__calInit = NULL;
-    VER		__calGetVersion = NULL;
-    PCGDC	__calDeviceGetCount = NULL;
-    ATTRIBS __calDeviceGetAttribs = NULL;
-    INFO    __calDeviceGetInfo = NULL;
-    CLOSE	__calShutdown = NULL;
-
 #if defined _M_X64
     // TRY CAL 1.4 first driver > 9.2
     HINSTANCE callib = LoadLibrary("aticalrt64.dll");
@@ -665,12 +703,6 @@ void COPROC_ATI::get(COPROCS& coprocs, vector<string>& strings) {
     __calShutdown = (CLOSE)GetProcAddress(callib, "calShutdown" );
 #else
     void* callib;
-    int (*__calInit)();
-    int (*__calGetVersion)(CALuint*, CALuint*, CALuint*);
-    int (*__calDeviceGetCount)(CALuint*);
-    int (*__calDeviceGetAttribs)(CALdeviceattribs*, CALuint);
-    int (*__calDeviceGetInfo)(CALdeviceinfo*, CALuint);
-    int (*__calShutdown)();
 
     callib = dlopen("libaticalrt.so", RTLD_NOW);
     if (!callib) {
diff --git a/lib/coproc.h b/lib/coproc.h
index a193918095..9586777d82 100644
--- a/lib/coproc.h
+++ b/lib/coproc.h
@@ -114,6 +114,8 @@ struct COPROC {
     //
     int device_nums[MAX_COPROC_INSTANCES];
     int device_num;     // temp used in scan process
+    bool running_graphics_app[MAX_COPROC_INSTANCES];
+        // is this GPU running a graphics app (NVIDIA only)
 
 #ifndef _USING_FCGI_
     virtual void write_xml(MIOFILE&);
@@ -126,6 +128,10 @@ struct COPROC {
         req_secs = 0;
         req_instances = 0;
         estimated_delay = 0;
+        for (int i=0; i<MAX_COPROC_INSTANCES; i++) {
+            device_nums[i] = 0;
+            running_graphics_app[i] = true;
+        }
     }
     COPROC(const char* t){
         clear();
@@ -242,6 +248,8 @@ struct COPROC_CUDA : public COPROC {
         double x = (prop.clockRate * prop.multiProcessorCount)*5e10/(14*1.25e6);
         return x?x:5e10;
     }
+
+    bool check_running_graphics_app();
 };
 
 void fake_cuda(COPROCS&, int);
diff --git a/sched/file_deleter.cpp b/sched/file_deleter.cpp
index 658d39e9f4..d9df846d9c 100644
--- a/sched/file_deleter.cpp
+++ b/sched/file_deleter.cpp
@@ -62,7 +62,6 @@
 #include "sched_util.h"
 #include "sched_msgs.h"
 
-
 #define LOCKFILE "file_deleter.out"
 #define PIDFILE  "file_deleter.pid"