From 40eebe00af840bd5eedb65c8cda6264a4d0eddda Mon Sep 17 00:00:00 2001
From: David Anderson <davea@ssl.berkeley.edu>
Date: Tue, 18 May 2010 19:22:34 +0000
Subject: [PATCH] - client/scheduler: in COPROCS, instead of having a vector of
     pointers to dynamically allocated COPROC-derived objects,     just have
 the objects themselves.     Dynamic allocation should be avoided at all
 costs.

svn path=/trunk/boinc/; revision=21564
---
 checkin_notes             |   27 +
 client/app_start.cpp      |    5 +-
 client/client_state.cpp   |   22 +-
 client/client_state.h     |    2 -
 client/client_types.cpp   |    8 +-
 client/coproc_detect.cpp  | 1821 ++++++++++++++++++-------------------
 client/cpu_sched.cpp      |   46 +-
 client/cs_scheduler.cpp   |   26 +-
 client/cs_statefile.cpp   |    4 +-
 client/rr_sim.cpp         |  980 ++++++++++----------
 client/scheduler_op.cpp   |    6 +-
 client/sim.h              |    2 -
 client/work_fetch.cpp     |   68 +-
 lib/coproc.cpp            |  959 ++++++++++---------
 lib/coproc.h              |  115 ++-
 lib/hostinfo.cpp          |    2 +
 lib/hostinfo.h            |    6 +
 sched/handle_request.cpp  |    4 +-
 sched/sched_customize.cpp |    8 +-
 sched/sched_send.cpp      |   50 +-
 sched/sched_types.cpp     |    5 +-
 sched/sched_types.h       |    2 -
 22 files changed, 2078 insertions(+), 2090 deletions(-)

diff --git a/checkin_notes b/checkin_notes
index 3951bbaf24..8acfe8b5b0 100644
--- a/checkin_notes
+++ b/checkin_notes
@@ -3701,3 +3701,30 @@ David  18 May 2010
 
     client/
         cpu_sched.cpp
+
+David  18 May 2010
+    - client/scheduler: in COPROCS, instead of having a vector of
+        pointers to dynamically allocated COPROC-derived objects,
+        just have the objects themselves.
+        Dynamic allocation should be avoided at all costs.
+
+    client/
+        app_start.cpp
+        client_state.cpp,h
+        client_types.cpp
+        coproc_detect.cpp
+        cpu_sched.cpp
+        cs_scheduler.cpp
+        cs_statefile.cpp
+        rr_sim.cpp
+        scheduler_op.cpp
+        wim.h
+        work_fetch.cpp
+    lib/
+        coproc.cpp,h
+        hostinfo.cpp,h
+    sched/
+        handle_request.cpp
+        sched_customize.cpp
+        sched_send.cpp
+        sched_types.cpp,h
diff --git a/client/app_start.cpp b/client/app_start.cpp
index ed1e65bbc2..ca997ab5fc 100644
--- a/client/app_start.cpp
+++ b/client/app_start.cpp
@@ -119,7 +119,10 @@ static void debug_print_argv(char** argv) {
 static void coproc_cmdline(
     int rsc_type, RESULT* rp, double ninstances, char* cmdline
 ) {
-    COPROC* coproc = (rsc_type==RSC_TYPE_CUDA)?(COPROC*)coproc_cuda:(COPROC*)coproc_ati;
+    COPROC* coproc = (rsc_type==RSC_TYPE_CUDA)
+        ?(COPROC*)&gstate.host_info.coprocs.cuda
+        :(COPROC*)&gstate.host_info.coprocs.ati
+    ;
     for (int j=0; j<ninstances; j++) {
         int k = rp->coproc_indices[j];
         // sanity check
diff --git a/client/client_state.cpp b/client/client_state.cpp
index a53bacfa58..5be0083c93 100644
--- a/client/client_state.cpp
+++ b/client/client_state.cpp
@@ -62,8 +62,6 @@
 using std::max;
 
 CLIENT_STATE gstate;
-COPROC_CUDA* coproc_cuda;
-COPROC_ATI* coproc_ati;
 
 CLIENT_STATE::CLIENT_STATE():
     lookup_website_op(&gui_http),
@@ -266,23 +264,21 @@ int CLIENT_STATE::init() {
                 msg_printf(NULL, MSG_INFO, warnings[i].c_str());
             }
         }
-        if (host_info.coprocs.coprocs.size() == 0) {
+        if (host_info.coprocs.none() ) {
             msg_printf(NULL, MSG_INFO, "No usable GPUs found");
         }
 #if 0
         msg_printf(NULL, MSG_INFO, "Faking an NVIDIA GPU");
-        coproc_cuda = fake_cuda(host_info.coprocs, 256*MEGA, 2);
-        coproc_cuda->available_ram_fake[0] = 256*MEGA;
-        coproc_cuda->available_ram_fake[1] = 192*MEGA;
+        host_info.coprocs.cuda.fake(256*MEGA, 2);
+        host_info.coprocs.cuda.available_ram_fake[0] = 256*MEGA;
+        host_info.coprocs.cuda.available_ram_fake[1] = 192*MEGA;
 #endif
 #if 0
         msg_printf(NULL, MSG_INFO, "Faking an ATI GPU");
-        coproc_ati = fake_ati(host_info.coprocs, 512*MEGA, 2);
-        coproc_ati->available_ram_fake[0] = 256*MEGA;
-        coproc_ati->available_ram_fake[1] = 192*MEGA;
+        host_info.coprocs.ati.fake(512*MEGA, 2);
+        host_info.coprocs.ati.available_ram_fake[0] = 256*MEGA;
+        host_info.coprocs.ati.available_ram_fake[1] = 192*MEGA;
 #endif
-        coproc_cuda = (COPROC_CUDA*)host_info.coprocs.lookup("CUDA");
-        coproc_ati = (COPROC_ATI*)host_info.coprocs.lookup("ATI");
     }
 
     // check for app_info.xml file in project dirs.
@@ -597,8 +593,8 @@ bool CLIENT_STATE::poll_slow_events() {
     // NVIDIA provides an interface for finding if a GPU is
     // running a graphics app.  ATI doesn't as far as I know
     //
-    if (coproc_cuda && user_active && !global_prefs.run_gpu_if_user_active) {
-        if (coproc_cuda->check_running_graphics_app()) {
+    if (host_info.have_cuda() && user_active && !global_prefs.run_gpu_if_user_active) {
+        if (host_info.coprocs.cuda.check_running_graphics_app()) {
             request_schedule_cpus("GPU state change");
         }
     }
diff --git a/client/client_state.h b/client/client_state.h
index 1383b84a62..882fda6073 100644
--- a/client/client_state.h
+++ b/client/client_state.h
@@ -488,8 +488,6 @@ public:
 
 extern CLIENT_STATE gstate;
 
-extern COPROC_CUDA* coproc_cuda;
-extern COPROC_ATI* coproc_ati;
 extern bool gpus_usable;
 
 // return a random double in the range [MIN,min(e^n,MAX))
diff --git a/client/client_types.cpp b/client/client_types.cpp
index dbcb27d934..097f961864 100644
--- a/client/client_types.cpp
+++ b/client/client_types.cpp
@@ -1264,10 +1264,10 @@ void APP_VERSION::get_file_errors(string& str) {
 }
 
 bool APP_VERSION::missing_coproc() {
-    if (ncudas && !coproc_cuda) {
+    if (ncudas && gstate.host_info.coprocs.cuda.count==0) {
         return true;
     }
-    if (natis && !coproc_ati) {
+    if (natis && gstate.host_info.coprocs.ati.count==0) {
         return true;
     }
     return false;
@@ -1790,9 +1790,9 @@ int RESULT::write_gui(MIOFILE& out) {
         char buf[256];
         strcpy(buf, "");
         if (atp && atp->task_state() == PROCESS_EXECUTING) {
-            if (avp->ncudas && coproc_cuda->count>1) {
+            if (avp->ncudas && gstate.host_info.coprocs.cuda.count>1) {
                 sprintf(buf, " (device %d)", coproc_indices[0]);
-            } else if (avp->natis && coproc_ati->count>1) {
+            } else if (avp->natis && gstate.host_info.coprocs.ati.count>1) {
                 sprintf(buf, " (device %d)", coproc_indices[0]);
             }
         }
diff --git a/client/coproc_detect.cpp b/client/coproc_detect.cpp
index 4e4b5147a7..8849d01796 100644
--- a/client/coproc_detect.cpp
+++ b/client/coproc_detect.cpp
@@ -1,916 +1,905 @@
-// This file is part of BOINC.
-// http://boinc.berkeley.edu
-// Copyright (C) 2009 University of California
-//
-// BOINC is free software; you can redistribute it and/or modify it
-// under the terms of the GNU Lesser General Public License
-// as published by the Free Software Foundation,
-// either version 3 of the License, or (at your option) any later version.
-//
-// BOINC is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-// See the GNU Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public License
-// along with BOINC.  If not, see <http://www.gnu.org/licenses/>.
-
-
-// client-specific GPU code.  Mostly GPU detection
-
-#include "cpp.h"
-
-#ifdef _WIN32
-#include "boinc_win.h"
-#ifndef SIM
-#include <nvapi.h>
-#endif
-#else
-#ifdef __APPLE__
-// Suppress obsolete warning when building for OS 10.3.9
-#define DLOPEN_NO_WARN
-#endif
-#include "config.h"
-#include <dlfcn.h>
-#include <setjmp.h>
-#include <signal.h>
-#endif
-
-#include "coproc.h"
-#include "str_util.h"
-#include "util.h"
-
-#include "client_state.h"
-#include "client_msgs.h"
-
-using std::string;
-using std::vector;
-
-//#define MEASURE_AVAILABLE_RAM
-
-static bool in_vector(int n, vector<int>& v) {
-    for (unsigned int i=0; i<v.size(); i++) {
-        if (v[i] == n) return true;
-    }
-    return false;
-}
-
-#ifndef _WIN32
-jmp_buf resume;
-
-void segv_handler(int) {
-    longjmp(resume, 1);
-}
-#endif
-
-void COPROC::print_available_ram() {
-#ifdef MEASURE_AVAILABLE_RAM
-    if (gstate.now - last_print_time < 60) return;
-    last_print_time = gstate.now;
-
-    for (int i=0; i<count; i++) {
-        if (available_ram_unknown[i]) {
-            if (log_flags.coproc_debug) {
-                msg_printf(0, MSG_INFO,
-                    "[coproc] %s device %d: available RAM unknown",
-                    type, device_nums[i]
-                );
-            }
-        } else {
-            if (log_flags.coproc_debug) {
-                msg_printf(0, MSG_INFO,
-                    "[coproc] %s device %d: available RAM %d MB",
-                    type, device_nums[i],
-                    (int)(available_ram[i]/MEGA)
-                );
-            }
-        }
-    }
-#endif
-}
-
-void COPROCS::get(
-    bool use_all, vector<string>&descs, vector<string>&warnings,
-    vector<int>& ignore_cuda_dev,
-    vector<int>& ignore_ati_dev
-) {
-
-#ifdef _WIN32
-    COPROC_CUDA::get(*this, use_all, descs, warnings, ignore_cuda_dev);
-    COPROC_ATI::get(*this, descs, warnings, ignore_ati_dev);
-#else
-    void (*old_sig)(int) = signal(SIGSEGV, segv_handler);
-    if (setjmp(resume)) {
-        warnings.push_back("Caught SIGSEGV in NVIDIA GPU detection");
-    } else {
-        COPROC_CUDA::get(*this, use_all, descs, warnings, ignore_cuda_dev);
-    }
-#ifndef __APPLE__       // ATI does not yet support CAL on Macs
-    if (setjmp(resume)) {
-        warnings.push_back("Caught SIGSEGV in ATI GPU detection");
-    } else {
-        COPROC_ATI::get(*this, descs, warnings, ignore_ati_dev);
-    }
-#endif
-    signal(SIGSEGV, old_sig);
-#endif
-}
-
-// return 1/-1/0 if device 1 is more/less/same capable than device 2.
-// If "loose", ignore FLOPS and tolerate small memory diff
-//
-int cuda_compare(COPROC_CUDA& c1, COPROC_CUDA& c2, bool loose) {
-    if (c1.prop.major > c2.prop.major) return 1;
-    if (c1.prop.major < c2.prop.major) return -1;
-    if (c1.prop.minor > c2.prop.minor) return 1;
-    if (c1.prop.minor < c2.prop.minor) return -1;
-    if (c1.cuda_version > c2.cuda_version) return 1;
-    if (c1.cuda_version < c2.cuda_version) return -1;
-    if (loose) {
-        if (c1.prop.totalGlobalMem > 1.4*c2.prop.totalGlobalMem) return 1;
-        if (c1.prop.totalGlobalMem < .7* c2.prop.totalGlobalMem) return -1;
-        return 0;
-    }
-    if (c1.prop.totalGlobalMem > c2.prop.totalGlobalMem) return 1;
-    if (c1.prop.totalGlobalMem < c2.prop.totalGlobalMem) return -1;
-    double s1 = c1.peak_flops();
-    double s2 = c2.peak_flops();
-    if (s1 > s2) return 1;
-    if (s1 < s2) return -1;
-    return 0;
-}
-
-#ifdef _WIN32
-typedef int (__stdcall *CUDA_GDC)(int *count);
-typedef int (__stdcall *CUDA_GDV)(int* version);
-typedef int (__stdcall *CUDA_GDI)(int);
-typedef int (__stdcall *CUDA_GDG)(int*, int);
-typedef int (__stdcall *CUDA_GDA)(int*, int, int);
-typedef int (__stdcall *CUDA_GDN)(char*, int, int);
-typedef int (__stdcall *CUDA_GDM)(unsigned int*, int);
-typedef int (__stdcall *CUDA_GDCC)(int*, int*, int);
-typedef int (__stdcall *CUDA_CC)(unsigned int*, unsigned int, unsigned int);
-typedef int (__stdcall *CUDA_CD)(unsigned int);
-typedef int (__stdcall *CUDA_MA)(unsigned int*, unsigned int);
-typedef int (__stdcall *CUDA_MF)(unsigned int);
-typedef int (__stdcall *CUDA_MGI)(unsigned int*, unsigned int*);
-
-CUDA_GDC __cuDeviceGetCount = NULL;
-CUDA_GDV __cuDriverGetVersion = NULL;
-CUDA_GDI __cuInit = NULL;
-CUDA_GDG __cuDeviceGet = NULL;
-CUDA_GDA __cuDeviceGetAttribute = NULL;
-CUDA_GDN __cuDeviceGetName = NULL;
-CUDA_GDM __cuDeviceTotalMem = NULL;
-CUDA_GDCC __cuDeviceComputeCapability = NULL;
-CUDA_CC __cuCtxCreate = NULL;
-CUDA_CD __cuCtxDestroy = NULL;
-CUDA_MA __cuMemAlloc = NULL;
-CUDA_MF __cuMemFree = NULL;
-CUDA_MGI __cuMemGetInfo = NULL;
-#else
-void* cudalib;
-int (*__cuInit)(int);
-int (*__cuDeviceGetCount)(int*);
-int (*__cuDriverGetVersion)(int*);
-int (*__cuDeviceGet)(int*, int);
-int (*__cuDeviceGetAttribute)(int*, int, int);
-int (*__cuDeviceGetName)(char*, int, int);
-int (*__cuDeviceTotalMem)(unsigned int*, int);
-int (*__cuDeviceComputeCapability)(int*, int*, int);
-int (*__cuCtxCreate)(unsigned int*, unsigned int, unsigned int);
-int (*__cuCtxDestroy)(unsigned int);
-int (*__cuMemAlloc)(unsigned int*, unsigned int);
-int (*__cuMemFree)(unsigned int);
-int (*__cuMemGetInfo)(unsigned int*, unsigned int*);
-#endif
-
-// NVIDIA interfaces are documented here:
-// http://developer.download.nvidia.com/compute/cuda/2_3/toolkit/docs/online/index.html
-
-void COPROC_CUDA::get(
-    COPROCS& coprocs,
-    bool use_all,    // if false, use only those equivalent to most capable
-    vector<string>& descs,
-    vector<string>& warnings,
-    vector<int>& ignore_devs
-) {
-    int count, retval;
-    char buf[256];
-
-#ifdef _WIN32
-    HMODULE cudalib = LoadLibrary("nvcuda.dll");
-    if (!cudalib) {
-        warnings.push_back("No NVIDIA library found");
-        return;
-    }
-    __cuDeviceGetCount = (CUDA_GDC)GetProcAddress( cudalib, "cuDeviceGetCount" );
-    __cuDriverGetVersion = (CUDA_GDV)GetProcAddress( cudalib, "cuDriverGetVersion" );
-    __cuInit = (CUDA_GDI)GetProcAddress( cudalib, "cuInit" );
-    __cuDeviceGet = (CUDA_GDG)GetProcAddress( cudalib, "cuDeviceGet" );
-    __cuDeviceGetAttribute = (CUDA_GDA)GetProcAddress( cudalib, "cuDeviceGetAttribute" );
-    __cuDeviceGetName = (CUDA_GDN)GetProcAddress( cudalib, "cuDeviceGetName" );
-    __cuDeviceTotalMem = (CUDA_GDM)GetProcAddress( cudalib, "cuDeviceTotalMem" );
-    __cuDeviceComputeCapability = (CUDA_GDCC)GetProcAddress( cudalib, "cuDeviceComputeCapability" );
-    __cuCtxCreate = (CUDA_CC)GetProcAddress( cudalib, "cuCtxCreate" );
-    __cuCtxDestroy = (CUDA_CD)GetProcAddress( cudalib, "cuCtxDestroy" );
-    __cuMemAlloc = (CUDA_MA)GetProcAddress( cudalib, "cuMemAlloc" );
-    __cuMemFree = (CUDA_MF)GetProcAddress( cudalib, "cuMemFree" );
-    __cuMemGetInfo = (CUDA_MGI)GetProcAddress( cudalib, "cuMemGetInfo" );
-
-#ifndef SIM
-    NvAPI_Status nvapiStatus;
-    NvDisplayHandle hDisplay;
-    NV_DISPLAY_DRIVER_VERSION Version;
-    memset(&Version, 0, sizeof(Version));
-    Version.version = NV_DISPLAY_DRIVER_VERSION_VER;
-
-    NvAPI_Initialize();
-    for (int i=0; ; i++) {
-        nvapiStatus = NvAPI_EnumNvidiaDisplayHandle(i, &hDisplay);
-        if (nvapiStatus != NVAPI_OK) break;
-        nvapiStatus = NvAPI_GetDisplayDriverVersion(hDisplay, &Version);
-        if (nvapiStatus == NVAPI_OK) break;
-    }
-#endif
-#else
-
-#ifdef __APPLE__
-    cudalib = dlopen("/usr/local/cuda/lib/libcuda.dylib", RTLD_NOW);
-#else
-    cudalib = dlopen("libcuda.so", RTLD_NOW);
-#endif
-    if (!cudalib) {
-        warnings.push_back("No NVIDIA library found");
-        return;
-    }
-    __cuDeviceGetCount = (int(*)(int*)) dlsym(cudalib, "cuDeviceGetCount");
-    __cuDriverGetVersion = (int(*)(int*)) dlsym( cudalib, "cuDriverGetVersion" );
-    __cuInit = (int(*)(int)) dlsym( cudalib, "cuInit" );
-    __cuDeviceGet = (int(*)(int*, int)) dlsym( cudalib, "cuDeviceGet" );
-    __cuDeviceGetAttribute = (int(*)(int*, int, int)) dlsym( cudalib, "cuDeviceGetAttribute" );
-    __cuDeviceGetName = (int(*)(char*, int, int)) dlsym( cudalib, "cuDeviceGetName" );
-    __cuDeviceTotalMem = (int(*)(unsigned int*, int)) dlsym( cudalib, "cuDeviceTotalMem" );
-    __cuDeviceComputeCapability = (int(*)(int*, int*, int)) dlsym( cudalib, "cuDeviceComputeCapability" );
-    __cuCtxCreate = (int(*)(unsigned int*, unsigned int, unsigned int)) dlsym( cudalib, "cuCtxCreate" );
-    __cuCtxDestroy = (int(*)(unsigned int)) dlsym( cudalib, "cuCtxDestroy" );
-    __cuMemAlloc = (int(*)(unsigned int*, unsigned int)) dlsym( cudalib, "cuMemAlloc" );
-    __cuMemFree = (int(*)(unsigned int)) dlsym( cudalib, "cuMemFree" );
-    __cuMemGetInfo = (int(*)(unsigned int*, unsigned int*)) dlsym( cudalib, "cuMemGetInfo" );
-#endif
-
-    if (!__cuDriverGetVersion) {
-        warnings.push_back("cuDriverGetVersion() missing from NVIDIA library");
-        return;
-    }
-    if (!__cuInit) {
-        warnings.push_back("cuInit() missing from NVIDIA library");
-        return;
-    }
-    if (!__cuDeviceGetCount) {
-        warnings.push_back("cuDeviceGetCount() missing from NVIDIA library");
-        return;
-    }
-    if (!__cuDeviceGet) {
-        warnings.push_back("cuDeviceGet() missing from NVIDIA library");
-        return;
-    }
-    if (!__cuDeviceGetAttribute) {
-        warnings.push_back("cuDeviceGetAttribute() missing from NVIDIA library");
-        return;
-    }
-    if (!__cuDeviceTotalMem) {
-        warnings.push_back("cuDeviceTotalMem() missing from NVIDIA library");
-        return;
-    }
-    if (!__cuDeviceComputeCapability) {
-        warnings.push_back("cuDeviceComputeCapability() missing from NVIDIA library");
-        return;
-    }
-    if (!__cuCtxCreate) {
-        warnings.push_back("cuCtxCreate() missing from NVIDIA library");
-        return;
-    }
-    if (!__cuCtxDestroy) {
-        warnings.push_back("cuCtxDestroy() missing from NVIDIA library");
-        return;
-    }
-    if (!__cuMemAlloc) {
-        warnings.push_back("cuMemAlloc() missing from NVIDIA library");
-        return;
-    }
-    if (!__cuMemFree) {
-        warnings.push_back("cuMemFree() missing from NVIDIA library");
-        return;
-    }
-    if (!__cuMemGetInfo) {
-        warnings.push_back("cuMemGetInfo() missing from NVIDIA library");
-        return;
-    }
-
-    retval = (*__cuInit)(0);
-    if (retval) {
-        sprintf(buf, "NVIDIA drivers present but no GPUs found");
-        warnings.push_back(buf);
-        return;
-    }
-
-    int cuda_version;
-    retval = (*__cuDriverGetVersion)(&cuda_version);
-    if (retval) {
-        sprintf(buf, "cuDriverGetVersion() returned %d", retval);
-        warnings.push_back(buf);
-        return;
-    }
-
-    vector<COPROC_CUDA> gpus;
-    retval = (*__cuDeviceGetCount)(&count);
-    if (retval) {
-        sprintf(buf, "cuDeviceGetCount() returned %d", retval);
-        warnings.push_back(buf);
-        return;
-    }
-    sprintf(buf, "NVIDIA library reports %d GPU%s", count, (count==1)?"":"s");
-    warnings.push_back(buf);
-
-    int j;
-    unsigned int i;
-    COPROC_CUDA cc;
-    string s;
-    for (j=0; j<count; j++) {
-        memset(&cc.prop, 0, sizeof(cc.prop));
-        int device;
-        retval = (*__cuDeviceGet)(&device, j);
-        if (retval) {
-            sprintf(buf, "cuDeviceGet(%d) returned %d", j, retval);
-            warnings.push_back(buf);
-            return;
-        }
-        (*__cuDeviceGetName)(cc.prop.name, 256, device);
-        if (retval) {
-            sprintf(buf, "cuDeviceGetName(%d) returned %d", j, retval);
-            warnings.push_back(buf);
-            return;
-        }
-        (*__cuDeviceComputeCapability)(&cc.prop.major, &cc.prop.minor, device);
-        (*__cuDeviceTotalMem)(&cc.prop.totalGlobalMem, device);
-        (*__cuDeviceGetAttribute)(&cc.prop.sharedMemPerBlock, CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK, device);
-        (*__cuDeviceGetAttribute)(&cc.prop.regsPerBlock, CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK, device);
-        (*__cuDeviceGetAttribute)(&cc.prop.warpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE, device);
-        (*__cuDeviceGetAttribute)(&cc.prop.memPitch, CU_DEVICE_ATTRIBUTE_MAX_PITCH, device);
-        retval = (*__cuDeviceGetAttribute)(&cc.prop.maxThreadsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, device);
-        retval = (*__cuDeviceGetAttribute)(&cc.prop.maxThreadsDim[0], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, device);
-        (*__cuDeviceGetAttribute)(&cc.prop.maxThreadsDim[1], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, device);
-        (*__cuDeviceGetAttribute)(&cc.prop.maxThreadsDim[2], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, device);
-        (*__cuDeviceGetAttribute)(&cc.prop.maxGridSize[0], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, device);
-        (*__cuDeviceGetAttribute)(&cc.prop.maxGridSize[1], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, device);
-        (*__cuDeviceGetAttribute)(&cc.prop.maxGridSize[2], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, device);
-        (*__cuDeviceGetAttribute)(&cc.prop.clockRate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, device);
-        (*__cuDeviceGetAttribute)(&cc.prop.totalConstMem, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, device);
-        (*__cuDeviceGetAttribute)(&cc.prop.textureAlignment, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, device);
-        (*__cuDeviceGetAttribute)(&cc.prop.deviceOverlap, CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, device);
-        retval = (*__cuDeviceGetAttribute)(&cc.prop.multiProcessorCount, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device);
-        //retval = (*__cuDeviceGetProperties)(&cc.prop, device);
-        if (cc.prop.major <= 0) continue;  // major == 0 means emulation
-        if (cc.prop.major > 100) continue;  // e.g. 9999 is an error
-#if defined(_WIN32) && !defined(SIM)
-        cc.display_driver_version = Version.drvVersion;
-#else
-        cc.display_driver_version = 0;
-#endif
-        cc.cuda_version = cuda_version;
-        cc.device_num = j;
-        gpus.push_back(cc);
-    }
-
-    if (!gpus.size()) {
-        warnings.push_back("No CUDA-capable NVIDIA GPUs found");
-        return;
-    }
-
-    // identify the most capable non-ignored instance
-    //
-    COPROC_CUDA best;
-    bool first = true;
-    for (i=0; i<gpus.size(); i++) {
-        if (in_vector(gpus[i].device_num, ignore_devs)) continue;
-        if (first) {
-            best = gpus[i];
-            first = false;
-        } else if (cuda_compare(gpus[i], best, false) > 0) {
-            best = gpus[i];
-        }
-    }
-
-    // see which other instances are equivalent,
-    // and set the "count" and "device_nums" fields
-    //
-    best.count = 0;
-    for (i=0; i<gpus.size(); i++) {
-        char buf2[256];
-        gpus[i].description(buf);
-        if (in_vector(gpus[i].device_num, ignore_devs)) {
-            sprintf(buf2, "NVIDIA GPU %d (ignored by config): %s", gpus[i].device_num, buf);
-        } else if (use_all || !cuda_compare(gpus[i], best, true)) {
-            best.device_nums[best.count] = gpus[i].device_num;
-            best.count++;
-            sprintf(buf2, "NVIDIA GPU %d: %s", gpus[i].device_num, buf);
-        } else {
-            sprintf(buf2, "NVIDIA GPU %d (not used): %s", gpus[i].device_num, buf);
-        }
-        descs.push_back(string(buf2));
-    }
-
-    if (best.count) {
-        COPROC_CUDA* ccp = new COPROC_CUDA;
-        *ccp = best;
-        coprocs.coprocs.push_back(ccp);
-    }
-}
-
-// fake a NVIDIA GPU (for debugging)
-//
-COPROC_CUDA* fake_cuda(COPROCS& coprocs, double ram, int count) {
-   COPROC_CUDA* cc = new COPROC_CUDA;
-   strcpy(cc->type, "CUDA");
-   cc->count = count;
-   for (int i=0; i<count; i++) {
-       cc->device_nums[i] = i;
-   }
-   cc->display_driver_version = 18000;
-   cc->cuda_version = 2020;
-   strcpy(cc->prop.name, "Fake NVIDIA GPU");
-   cc->prop.totalGlobalMem = (unsigned int)ram;
-   cc->prop.sharedMemPerBlock = 100;
-   cc->prop.regsPerBlock = 8;
-   cc->prop.warpSize = 10;
-   cc->prop.memPitch = 10;
-   cc->prop.maxThreadsPerBlock = 20;
-   cc->prop.maxThreadsDim[0] = 2;
-   cc->prop.maxThreadsDim[1] = 2;
-   cc->prop.maxThreadsDim[2] = 2;
-   cc->prop.maxGridSize[0] = 10;
-   cc->prop.maxGridSize[1] = 10;
-   cc->prop.maxGridSize[2] = 10;
-   cc->prop.totalConstMem = 10;
-   cc->prop.major = 1;
-   cc->prop.minor = 2;
-   cc->prop.clockRate = 1250000;
-   cc->prop.textureAlignment = 1000;
-   cc->prop.multiProcessorCount = 14;
-   coprocs.coprocs.push_back(cc);
-   return cc;
-}
-
-// See how much RAM is available on each GPU.
-// If this fails, set "available_ram_unknown"
-//
-void COPROC_CUDA::get_available_ram() {
-#ifdef MEASURE_AVAILABLE_RAM
-    int device, i, retval;
-    unsigned int memfree, memtotal;
-    unsigned int ctx;
-    
-    // avoid crash if faked GPU
-    //
-    if (!__cuDeviceGet) {
-        for (i=0; i<count; i++) {
-            available_ram[i] = available_ram_fake[i];
-            available_ram_unknown[i] = false;
-        }
-        return;
-    }
-    for (i=0; i<count; i++) {
-        int devnum = device_nums[i];
-        available_ram[i] = 0;
-        available_ram_unknown[i] = true;
-        retval = (*__cuDeviceGet)(&device, devnum);
-        if (retval) {
-            if (log_flags.coproc_debug) {
-                msg_printf(0, MSG_INFO,
-                    "[coproc] cuDeviceGet(%d) returned %d", devnum, retval
-                );
-            }
-            continue;
-        }
-        retval = (*__cuCtxCreate)(&ctx, 0, device);
-        if (retval) {
-            if (log_flags.coproc_debug) {
-                msg_printf(0, MSG_INFO,
-                    "[coproc] cuCtxCreate(%d) returned %d", devnum, retval
-                );
-            }
-            continue;
-        }
-        retval = (*__cuMemGetInfo)(&memfree, &memtotal);
-        if (retval) {
-            if (log_flags.coproc_debug) {
-                msg_printf(0, MSG_INFO,
-                    "[coproc] cuMemGetInfo(%d) returned %d", devnum, retval
-                );
-            }
-            (*__cuCtxDestroy)(ctx);
-            continue;
-        }
-        (*__cuCtxDestroy)(ctx);
-        available_ram[i] = (double) memfree;
-        available_ram_unknown[i] = false;
-    }
-#else
-    for (int i=0; i<count; i++) {
-        available_ram_unknown[i] = false;
-        available_ram[i] = prop.totalGlobalMem;
-    }
-#endif
-}
-
-// check whether each GPU is running a graphics app (assume yes)
-// return true if there's been a change since last time
-//
-bool COPROC_CUDA::check_running_graphics_app() {
-    int retval, j;
-    bool change = false;
-    for (j=0; j<count; j++) {
-        bool new_val = true;
-        int device, kernel_timeout;
-        retval = (*__cuDeviceGet)(&device, j);
-        if (!retval) {
-            retval = (*__cuDeviceGetAttribute)(&kernel_timeout, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, device);
-            if (!retval && !kernel_timeout) {
-                new_val = false;
-            }
-        }
-        if (new_val != running_graphics_app[j]) {
-            change = true;
-        }
-        running_graphics_app[j] = new_val;
-    }
-    return change;
-}
-
-////////////////// ATI STARTS HERE /////////////////
-//
-// Docs:
-// http://developer.amd.com/gpu_assets/Stream_Computing_User_Guide.pdf
-// ?? why don't they have HTML docs??
-
-#ifdef _WIN32
-typedef int (__stdcall *ATI_ATTRIBS) (CALdeviceattribs *attribs, CALuint ordinal);
-typedef int (__stdcall *ATI_CLOSE)(void);
-typedef int (__stdcall *ATI_GDC)(CALuint *numDevices);
-typedef int (__stdcall *ATI_GDI)(void);
-typedef int (__stdcall *ATI_INFO) (CALdeviceinfo *info, CALuint ordinal);
-typedef int (__stdcall *ATI_VER) (CALuint *cal_major, CALuint *cal_minor, CALuint *cal_imp);
-typedef int (__stdcall *ATI_STATUS) (CALdevicestatus*, CALdevice);
-typedef int (__stdcall *ATI_DEVICEOPEN) (CALdevice*, CALuint);
-typedef int (__stdcall *ATI_DEVICECLOSE) (CALdevice);
-
-ATI_ATTRIBS __calDeviceGetAttribs = NULL;
-ATI_CLOSE   __calShutdown = NULL;
-ATI_GDC     __calDeviceGetCount = NULL;
-ATI_GDI     __calInit = NULL;
-ATI_INFO    __calDeviceGetInfo = NULL;
-ATI_VER     __calGetVersion = NULL;
-ATI_STATUS  __calDeviceGetStatus = NULL;
-ATI_DEVICEOPEN  __calDeviceOpen = NULL;
-ATI_DEVICECLOSE  __calDeviceClose = NULL;
-
-#else
-
-int (*__calInit)();
-int (*__calGetVersion)(CALuint*, CALuint*, CALuint*);
-int (*__calDeviceGetCount)(CALuint*);
-int (*__calDeviceGetAttribs)(CALdeviceattribs*, CALuint);
-int (*__calShutdown)();
-int (*__calDeviceGetInfo)(CALdeviceinfo*, CALuint);
-int (*__calDeviceGetStatus)(CALdevicestatus*, CALdevice);
-int (*__calDeviceOpen)(CALdevice*, CALuint);
-int (*__calDeviceClose)(CALdevice);
-
-#endif
-
-void COPROC_ATI::get(COPROCS& coprocs,
-    vector<string>& descs, vector<string>& warnings, vector<int>& ignore_devs
-) {
-    CALuint numDevices, cal_major, cal_minor, cal_imp;
-    CALdevice device;
-    CALdeviceinfo info;
-    CALdeviceattribs attribs;
-    char buf[256];
-    bool amdrt_detected = false;
-    bool atirt_detected = false;
-    int retval;
-
-    attribs.struct_size = sizeof(CALdeviceattribs);
-    device = 0;
-    numDevices =0;
-
-#ifdef _WIN32
-
-#if defined _M_X64
-    const char* atilib_name = "aticalrt64.dll";
-    const char* amdlib_name = "amdcalrt64.dll";
-#else
-    const char* atilib_name = "aticalrt.dll";
-    const char* amdlib_name = "amdcalrt.dll";
-#endif
-
-    HINSTANCE callib = LoadLibrary(atilib_name);
-    if (callib) {
-        atirt_detected = true;
-    } else {
-        callib = LoadLibrary(amdlib_name);
-        if (callib) {
-            amdrt_detected = true;
-        }
-    }
-
-    if (!callib) {
-        warnings.push_back("No ATI library found.");
-        return;
-    }
-
-    __calInit = (ATI_GDI)GetProcAddress(callib, "calInit" );
-    __calGetVersion = (ATI_VER)GetProcAddress(callib, "calGetVersion" );
-    __calDeviceGetCount = (ATI_GDC)GetProcAddress(callib, "calDeviceGetCount" );
-    __calDeviceGetAttribs =(ATI_ATTRIBS)GetProcAddress(callib, "calDeviceGetAttribs" );
-    __calShutdown = (ATI_CLOSE)GetProcAddress(callib, "calShutdown" );
-    __calDeviceGetInfo = (ATI_INFO)GetProcAddress(callib, "calDeviceGetInfo" );
-    __calDeviceGetStatus = (ATI_STATUS)GetProcAddress(callib, "calDeviceGetStatus" );
-    __calDeviceOpen = (ATI_DEVICEOPEN)GetProcAddress(callib, "calDeviceOpen" );
-    __calDeviceClose = (ATI_DEVICECLOSE)GetProcAddress(callib, "calDeviceClose" );
-
-#else
-
-    void* callib;
-
-    callib = dlopen("libaticalrt.so", RTLD_NOW);
-    if (!callib) {
-        warnings.push_back("No ATI library found");
-        return;
-    }
-
-    atirt_detected = true;
-
-    __calInit = (int(*)()) dlsym(callib, "calInit");
-    __calGetVersion = (int(*)(CALuint*, CALuint*, CALuint*)) dlsym(callib, "calGetVersion");
-    __calDeviceGetCount = (int(*)(CALuint*)) dlsym(callib, "calDeviceGetCount");
-    __calDeviceGetAttribs = (int(*)(CALdeviceattribs*, CALuint)) dlsym(callib, "calDeviceGetAttribs");
-    __calShutdown = (int(*)()) dlsym(callib, "calShutdown");
-    __calDeviceGetInfo = (int(*)(CALdeviceinfo*, CALuint)) dlsym(callib, "calDeviceGetInfo");
-    __calDeviceGetStatus = (int(*)(CALdevicestatus*, CALdevice)) dlsym(callib, "calDeviceGetStatus");
-    __calDeviceOpen = (int(*)(CALdevice*, CALuint)) dlsym(callib, "calDeviceOpen");
-    __calDeviceClose = (int(*)(CALdevice)) dlsym(callib, "calDeviceClose");
-
-#endif
-
-    if (!__calInit) {
-        warnings.push_back("calInit() missing from CAL library");
-        return;
-    }
-    if (!__calGetVersion) {
-        warnings.push_back("calGetVersion() missing from CAL library");
-        return;
-    }
-    if (!__calDeviceGetCount) {
-        warnings.push_back("calDeviceGetCount() missing from CAL library");
-        return;
-    }
-    if (!__calDeviceGetAttribs) {
-        warnings.push_back("calDeviceGetAttribs() missing from CAL library");
-        return;
-    }
-    if (!__calDeviceGetInfo) {
-        warnings.push_back("calDeviceGetInfo() missing from CAL library");
-        return;
-    }
-    if (!__calDeviceGetStatus) {
-        warnings.push_back("calDeviceGetStatus() missing from CAL library");
-        return;
-    }
-    if (!__calDeviceOpen) {
-        warnings.push_back("calDeviceOpen() missing from CAL library");
-        return;
-    }
-    if (!__calDeviceClose) {
-        warnings.push_back("calDeviceClose() missing from CAL library");
-        return;
-    }
-
-    retval = (*__calInit)();
-    if (retval != CAL_RESULT_OK) {
-        sprintf(buf, "calInit() returned %d", retval);
-        warnings.push_back(buf);
-        return;
-    }
-    retval = (*__calDeviceGetCount)(&numDevices);
-    if (retval != CAL_RESULT_OK) {
-        sprintf(buf, "calDeviceGetCount() returned %d", retval);
-        warnings.push_back(buf);
-        return;
-    }
-    retval = (*__calGetVersion)(&cal_major, &cal_minor, &cal_imp);
-    if (retval != CAL_RESULT_OK) {
-        sprintf(buf, "calGetVersion() returned %d", retval);
-        warnings.push_back(buf);
-        return;
-    }
-
-    if (!numDevices) {
-        warnings.push_back("No usable CAL devices found");
-        return;
-    }
-
-    COPROC_ATI cc, cc2;
-    string s, gpu_name;
-    vector<COPROC_ATI> gpus;
-    for (CALuint i=0; i<numDevices; i++) {
-        retval = (*__calDeviceGetInfo)(&info, i);
-        if (retval != CAL_RESULT_OK) {
-            sprintf(buf, "calDeviceGetInfo() returned %d", retval);
-            warnings.push_back(buf);
-            return;
-        }
-        retval = (*__calDeviceGetAttribs)(&attribs, i);
-        if (retval != CAL_RESULT_OK) {
-            sprintf(buf, "calDeviceGetAttribs() returned %d", retval);
-            warnings.push_back(buf);
-            return;
-        }
-        switch ((int)attribs.target) {
-        case CAL_TARGET_600:
-            gpu_name="ATI Radeon HD 2900 (RV600)";
-            break;
-        case CAL_TARGET_610:
-            gpu_name="ATI Radeon HD 2300/2400/3200 (RV610)";
-            attribs.numberOfSIMD=1;        // set correct values (reported wrong by driver)
-            attribs.wavefrontSize=32;
-            break;
-        case CAL_TARGET_630:
-            gpu_name="ATI Radeon HD 2600 (RV630)";
-            // set correct values (reported wrong by driver)
-            attribs.numberOfSIMD=3;
-            attribs.wavefrontSize=32;
-            break;
-        case CAL_TARGET_670:
-            gpu_name="ATI Radeon HD 3800 (RV670)";
-            break;
-        case CAL_TARGET_710:
-            gpu_name="ATI Radeon HD 4350/4550 (R710)";
-            break;
-        case CAL_TARGET_730:
-            gpu_name="ATI Radeon HD 4600 series (R730)";
-            break;
-        case CAL_TARGET_7XX:
-            gpu_name="ATI Radeon (RV700 class)";
-            break;
-        case CAL_TARGET_770:
-            gpu_name="ATI Radeon HD 4700/4800 (RV740/RV770)";
-            break;
-        case 8:
-            gpu_name="ATI Radeon HD5800 series (Cypress)";
-            break;
-        case 9:
-            gpu_name="ATI Radeon HD5700 series (Juniper)";
-            break;
-        case 10:
-            gpu_name="ATI Radeon HD5x00 series (Redwood)";
-            break;
-        case 11:
-            gpu_name="ATI Radeon HD5x00 series (Cedar)";
-            break;
-        default:
-            gpu_name="ATI unknown";
-            break;
-        }
-        cc.attribs = attribs;
-        cc.info = info;
-        strcpy(cc.name, gpu_name.c_str());
-        sprintf(cc.version, "%d.%d.%d", cal_major, cal_minor, cal_imp);
-        cc.amdrt_detected = amdrt_detected;
-        cc.atirt_detected = atirt_detected;
-        cc.device_num = i;
-        gpus.push_back(cc);
-    }
-
-    // TODO: count only GPUs with as much memory as fastest one,
-    // same as for NVIDIA
-
-    COPROC_ATI best;
-    bool first = true;
-    for (unsigned int i=0; i<gpus.size(); i++) {
-        char buf[256], buf2[256];
-        gpus[i].description(buf);
-        if (in_vector(gpus[i].device_num, ignore_devs)) {
-            sprintf(buf2, "ATI GPU %d (ignored by config): %s", gpus[i].device_num, buf);
-        } else {
-            if (first) {
-                best = gpus[i];
-                first = false;
-            } else if (gpus[i].peak_flops() > best.peak_flops()) {
-                best = gpus[i];
-            }
-            sprintf(buf2, "ATI GPU %d: %s", gpus[i].device_num, buf);
-        }
-        descs.push_back(buf2);
-    }
-    best.count = 0;
-    for (unsigned int i=0; i<gpus.size(); i++) {
-        if (in_vector(gpus[i].device_num, ignore_devs)) continue;
-        best.device_nums[best.count] = i;
-        best.count++;
-    }
-
-    COPROC_ATI* ccp = new COPROC_ATI;
-    *ccp = best;
-    strcpy(ccp->type, "ATI");
-    coprocs.coprocs.push_back(ccp);
-
-    // shut down, otherwise Lenovo won't be able to switch to low-power GPU
-    //
-    retval = (*__calShutdown)();
-}
-
-COPROC_ATI* fake_ati(COPROCS& coprocs, double ram, int count) {
-    COPROC_ATI* cc = new COPROC_ATI;
-    strcpy(cc->type, "ATI");
-    strcpy(cc->version, "1.4.3");
-    strcpy(cc->name, "foobar");
-    cc->count = count;
-    memset(&cc->attribs, 0, sizeof(cc->attribs));
-    memset(&cc->info, 0, sizeof(cc->info));
-    cc->attribs.localRAM = (int)(ram/MEGA);
-    cc->attribs.numberOfSIMD = 32;
-    cc->attribs.wavefrontSize = 32;
-    cc->attribs.engineClock = 50;
-    for (int i=0; i<count; i++) {
-        cc->device_nums[i] = i;
-    }
-    coprocs.coprocs.push_back(cc);
-    return cc;
-}
-
-void COPROC_ATI::get_available_ram() {
-#ifdef MEASURE_AVAILABLE_RAM
-    CALdevicestatus st;
-    CALdevice dev;
-    int i, retval;
-
-    st.struct_size = sizeof(CALdevicestatus);
-
-    // avoid crash if faked GPU
-    if (!__calInit) {
-        for (i=0; i<count; i++) {
-            available_ram[i] = available_ram_fake[i];
-            available_ram_unknown[i] = false;
-        }
-        return;
-    }
-    for (i=0; i<count; i++) {
-        available_ram[i] = 0;
-        available_ram_unknown[i] = true;
-    }
-    retval = (*__calInit)();
-    if (retval) {
-        if (log_flags.coproc_debug) {
-            msg_printf(0, MSG_INFO,
-                "[coproc] calInit() returned %d", retval
-            );
-        }
-        return;
-    }
-
-    for (i=0; i<count; i++) {
-        int devnum = device_nums[i];
-        retval = (*__calDeviceOpen)(&dev, devnum);
-        if (retval) {
-            if (log_flags.coproc_debug) {
-                msg_printf(0, MSG_INFO,
-                    "[coproc] calDeviceOpen(%d) returned %d", devnum, retval
-                );
-            }
-            continue;
-        }
-        retval = (*__calDeviceGetStatus)(&st, dev);
-        if (retval) {
-            if (log_flags.coproc_debug) {
-                msg_printf(0, MSG_INFO,
-                    "[coproc] calDeviceGetStatus(%d) returned %d",
-                    devnum, retval
-                );
-            }
-            (*__calDeviceClose)(dev);
-            continue;
-        }
-        available_ram[i] = st.availLocalRAM*MEGA;
-        available_ram_unknown[i] = false;
-        (*__calDeviceClose)(dev);
-    }
-    (*__calShutdown)();
-#else
-    for (int i=0; i<count; i++) {
-        available_ram_unknown[i] = false;
-        available_ram[i] = attribs.localRAM*MEGA;
-    }
-#endif
-}
+// This file is part of BOINC.
+// http://boinc.berkeley.edu
+// Copyright (C) 2009 University of California
+//
+// BOINC is free software; you can redistribute it and/or modify it
+// under the terms of the GNU Lesser General Public License
+// as published by the Free Software Foundation,
+// either version 3 of the License, or (at your option) any later version.
+//
+// BOINC is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+// See the GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with BOINC.  If not, see <http://www.gnu.org/licenses/>.
+
+
+// client-specific GPU code.  Mostly GPU detection
+
+#include "cpp.h"
+
+#ifdef _WIN32
+#include "boinc_win.h"
+#ifndef SIM
+#include <nvapi.h>
+#endif
+#else
+#ifdef __APPLE__
+// Suppress obsolete warning when building for OS 10.3.9
+#define DLOPEN_NO_WARN
+#endif
+#include "config.h"
+#include <dlfcn.h>
+#include <setjmp.h>
+#include <signal.h>
+#endif
+
+#include "coproc.h"
+#include "str_util.h"
+#include "util.h"
+
+#include "client_state.h"
+#include "client_msgs.h"
+
+using std::string;
+using std::vector;
+
+//#define MEASURE_AVAILABLE_RAM
+
+static bool in_vector(int n, vector<int>& v) {
+    for (unsigned int i=0; i<v.size(); i++) {
+        if (v[i] == n) return true;
+    }
+    return false;
+}
+
+#ifndef _WIN32
+jmp_buf resume;
+
+void segv_handler(int) {
+    longjmp(resume, 1);
+}
+#endif
+
+void COPROC::print_available_ram() {
+#ifdef MEASURE_AVAILABLE_RAM
+    if (gstate.now - last_print_time < 60) return;
+    last_print_time = gstate.now;
+
+    for (int i=0; i<count; i++) {
+        if (available_ram_unknown[i]) {
+            if (log_flags.coproc_debug) {
+                msg_printf(0, MSG_INFO,
+                    "[coproc] %s device %d: available RAM unknown",
+                    type, device_nums[i]
+                );
+            }
+        } else {
+            if (log_flags.coproc_debug) {
+                msg_printf(0, MSG_INFO,
+                    "[coproc] %s device %d: available RAM %d MB",
+                    type, device_nums[i],
+                    (int)(available_ram[i]/MEGA)
+                );
+            }
+        }
+    }
+#endif
+}
+
+void COPROCS::get(
+    bool use_all, vector<string>&descs, vector<string>&warnings,
+    vector<int>& ignore_cuda_dev,
+    vector<int>& ignore_ati_dev
+) {
+
+#ifdef _WIN32
+    cuda.get(use_all, descs, warnings, ignore_cuda_dev);
+    ati.get(descs, warnings, ignore_ati_dev);
+#else
+    void (*old_sig)(int) = signal(SIGSEGV, segv_handler);
+    if (setjmp(resume)) {
+        warnings.push_back("Caught SIGSEGV in NVIDIA GPU detection");
+    } else {
+        cuda.get(use_all, descs, warnings, ignore_cuda_dev);
+    }
+#ifndef __APPLE__       // ATI does not yet support CAL on Macs
+    if (setjmp(resume)) {
+        warnings.push_back("Caught SIGSEGV in ATI GPU detection");
+    } else {
+        ati.get(descs, warnings, ignore_ati_dev);
+    }
+#endif
+    signal(SIGSEGV, old_sig);
+#endif
+}
+
+// return 1/-1/0 if device 1 is more/less/same capable than device 2.
+// If "loose", ignore FLOPS and tolerate small memory diff
+//
+int cuda_compare(COPROC_CUDA& c1, COPROC_CUDA& c2, bool loose) {
+    if (c1.prop.major > c2.prop.major) return 1;
+    if (c1.prop.major < c2.prop.major) return -1;
+    if (c1.prop.minor > c2.prop.minor) return 1;
+    if (c1.prop.minor < c2.prop.minor) return -1;
+    if (c1.cuda_version > c2.cuda_version) return 1;
+    if (c1.cuda_version < c2.cuda_version) return -1;
+    if (loose) {
+        if (c1.prop.totalGlobalMem > 1.4*c2.prop.totalGlobalMem) return 1;
+        if (c1.prop.totalGlobalMem < .7* c2.prop.totalGlobalMem) return -1;
+        return 0;
+    }
+    if (c1.prop.totalGlobalMem > c2.prop.totalGlobalMem) return 1;
+    if (c1.prop.totalGlobalMem < c2.prop.totalGlobalMem) return -1;
+    double s1 = c1.peak_flops();
+    double s2 = c2.peak_flops();
+    if (s1 > s2) return 1;
+    if (s1 < s2) return -1;
+    return 0;
+}
+
+#ifdef _WIN32
+typedef int (__stdcall *CUDA_GDC)(int *count);
+typedef int (__stdcall *CUDA_GDV)(int* version);
+typedef int (__stdcall *CUDA_GDI)(int);
+typedef int (__stdcall *CUDA_GDG)(int*, int);
+typedef int (__stdcall *CUDA_GDA)(int*, int, int);
+typedef int (__stdcall *CUDA_GDN)(char*, int, int);
+typedef int (__stdcall *CUDA_GDM)(unsigned int*, int);
+typedef int (__stdcall *CUDA_GDCC)(int*, int*, int);
+typedef int (__stdcall *CUDA_CC)(unsigned int*, unsigned int, unsigned int);
+typedef int (__stdcall *CUDA_CD)(unsigned int);
+typedef int (__stdcall *CUDA_MA)(unsigned int*, unsigned int);
+typedef int (__stdcall *CUDA_MF)(unsigned int);
+typedef int (__stdcall *CUDA_MGI)(unsigned int*, unsigned int*);
+
+CUDA_GDC __cuDeviceGetCount = NULL;
+CUDA_GDV __cuDriverGetVersion = NULL;
+CUDA_GDI __cuInit = NULL;
+CUDA_GDG __cuDeviceGet = NULL;
+CUDA_GDA __cuDeviceGetAttribute = NULL;
+CUDA_GDN __cuDeviceGetName = NULL;
+CUDA_GDM __cuDeviceTotalMem = NULL;
+CUDA_GDCC __cuDeviceComputeCapability = NULL;
+CUDA_CC __cuCtxCreate = NULL;
+CUDA_CD __cuCtxDestroy = NULL;
+CUDA_MA __cuMemAlloc = NULL;
+CUDA_MF __cuMemFree = NULL;
+CUDA_MGI __cuMemGetInfo = NULL;
+#else
+void* cudalib;
+int (*__cuInit)(int);
+int (*__cuDeviceGetCount)(int*);
+int (*__cuDriverGetVersion)(int*);
+int (*__cuDeviceGet)(int*, int);
+int (*__cuDeviceGetAttribute)(int*, int, int);
+int (*__cuDeviceGetName)(char*, int, int);
+int (*__cuDeviceTotalMem)(unsigned int*, int);
+int (*__cuDeviceComputeCapability)(int*, int*, int);
+int (*__cuCtxCreate)(unsigned int*, unsigned int, unsigned int);
+int (*__cuCtxDestroy)(unsigned int);
+int (*__cuMemAlloc)(unsigned int*, unsigned int);
+int (*__cuMemFree)(unsigned int);
+int (*__cuMemGetInfo)(unsigned int*, unsigned int*);
+#endif
+
+// NVIDIA interfaces are documented here:
+// http://developer.download.nvidia.com/compute/cuda/2_3/toolkit/docs/online/index.html
+
+void COPROC_CUDA::get(
+    bool use_all,    // if false, use only those equivalent to most capable
+    vector<string>& descs,
+    vector<string>& warnings,
+    vector<int>& ignore_devs
+) {
+    int count, retval;
+    char buf[256];
+
+#ifdef _WIN32
+    HMODULE cudalib = LoadLibrary("nvcuda.dll");
+    if (!cudalib) {
+        warnings.push_back("No NVIDIA library found");
+        return;
+    }
+    __cuDeviceGetCount = (CUDA_GDC)GetProcAddress( cudalib, "cuDeviceGetCount" );
+    __cuDriverGetVersion = (CUDA_GDV)GetProcAddress( cudalib, "cuDriverGetVersion" );
+    __cuInit = (CUDA_GDI)GetProcAddress( cudalib, "cuInit" );
+    __cuDeviceGet = (CUDA_GDG)GetProcAddress( cudalib, "cuDeviceGet" );
+    __cuDeviceGetAttribute = (CUDA_GDA)GetProcAddress( cudalib, "cuDeviceGetAttribute" );
+    __cuDeviceGetName = (CUDA_GDN)GetProcAddress( cudalib, "cuDeviceGetName" );
+    __cuDeviceTotalMem = (CUDA_GDM)GetProcAddress( cudalib, "cuDeviceTotalMem" );
+    __cuDeviceComputeCapability = (CUDA_GDCC)GetProcAddress( cudalib, "cuDeviceComputeCapability" );
+    __cuCtxCreate = (CUDA_CC)GetProcAddress( cudalib, "cuCtxCreate" );
+    __cuCtxDestroy = (CUDA_CD)GetProcAddress( cudalib, "cuCtxDestroy" );
+    __cuMemAlloc = (CUDA_MA)GetProcAddress( cudalib, "cuMemAlloc" );
+    __cuMemFree = (CUDA_MF)GetProcAddress( cudalib, "cuMemFree" );
+    __cuMemGetInfo = (CUDA_MGI)GetProcAddress( cudalib, "cuMemGetInfo" );
+
+#ifndef SIM
+    NvAPI_Status nvapiStatus;
+    NvDisplayHandle hDisplay;
+    NV_DISPLAY_DRIVER_VERSION Version;
+    memset(&Version, 0, sizeof(Version));
+    Version.version = NV_DISPLAY_DRIVER_VERSION_VER;
+
+    NvAPI_Initialize();
+    for (int i=0; ; i++) {
+        nvapiStatus = NvAPI_EnumNvidiaDisplayHandle(i, &hDisplay);
+        if (nvapiStatus != NVAPI_OK) break;
+        nvapiStatus = NvAPI_GetDisplayDriverVersion(hDisplay, &Version);
+        if (nvapiStatus == NVAPI_OK) break;
+    }
+#endif
+#else
+
+#ifdef __APPLE__
+    cudalib = dlopen("/usr/local/cuda/lib/libcuda.dylib", RTLD_NOW);
+#else
+    cudalib = dlopen("libcuda.so", RTLD_NOW);
+#endif
+    if (!cudalib) {
+        warnings.push_back("No NVIDIA library found");
+        return;
+    }
+    __cuDeviceGetCount = (int(*)(int*)) dlsym(cudalib, "cuDeviceGetCount");
+    __cuDriverGetVersion = (int(*)(int*)) dlsym( cudalib, "cuDriverGetVersion" );
+    __cuInit = (int(*)(int)) dlsym( cudalib, "cuInit" );
+    __cuDeviceGet = (int(*)(int*, int)) dlsym( cudalib, "cuDeviceGet" );
+    __cuDeviceGetAttribute = (int(*)(int*, int, int)) dlsym( cudalib, "cuDeviceGetAttribute" );
+    __cuDeviceGetName = (int(*)(char*, int, int)) dlsym( cudalib, "cuDeviceGetName" );
+    __cuDeviceTotalMem = (int(*)(unsigned int*, int)) dlsym( cudalib, "cuDeviceTotalMem" );
+    __cuDeviceComputeCapability = (int(*)(int*, int*, int)) dlsym( cudalib, "cuDeviceComputeCapability" );
+    __cuCtxCreate = (int(*)(unsigned int*, unsigned int, unsigned int)) dlsym( cudalib, "cuCtxCreate" );
+    __cuCtxDestroy = (int(*)(unsigned int)) dlsym( cudalib, "cuCtxDestroy" );
+    __cuMemAlloc = (int(*)(unsigned int*, unsigned int)) dlsym( cudalib, "cuMemAlloc" );
+    __cuMemFree = (int(*)(unsigned int)) dlsym( cudalib, "cuMemFree" );
+    __cuMemGetInfo = (int(*)(unsigned int*, unsigned int*)) dlsym( cudalib, "cuMemGetInfo" );
+#endif
+
+    if (!__cuDriverGetVersion) {
+        warnings.push_back("cuDriverGetVersion() missing from NVIDIA library");
+        return;
+    }
+    if (!__cuInit) {
+        warnings.push_back("cuInit() missing from NVIDIA library");
+        return;
+    }
+    if (!__cuDeviceGetCount) {
+        warnings.push_back("cuDeviceGetCount() missing from NVIDIA library");
+        return;
+    }
+    if (!__cuDeviceGet) {
+        warnings.push_back("cuDeviceGet() missing from NVIDIA library");
+        return;
+    }
+    if (!__cuDeviceGetAttribute) {
+        warnings.push_back("cuDeviceGetAttribute() missing from NVIDIA library");
+        return;
+    }
+    if (!__cuDeviceTotalMem) {
+        warnings.push_back("cuDeviceTotalMem() missing from NVIDIA library");
+        return;
+    }
+    if (!__cuDeviceComputeCapability) {
+        warnings.push_back("cuDeviceComputeCapability() missing from NVIDIA library");
+        return;
+    }
+    if (!__cuCtxCreate) {
+        warnings.push_back("cuCtxCreate() missing from NVIDIA library");
+        return;
+    }
+    if (!__cuCtxDestroy) {
+        warnings.push_back("cuCtxDestroy() missing from NVIDIA library");
+        return;
+    }
+    if (!__cuMemAlloc) {
+        warnings.push_back("cuMemAlloc() missing from NVIDIA library");
+        return;
+    }
+    if (!__cuMemFree) {
+        warnings.push_back("cuMemFree() missing from NVIDIA library");
+        return;
+    }
+    if (!__cuMemGetInfo) {
+        warnings.push_back("cuMemGetInfo() missing from NVIDIA library");
+        return;
+    }
+
+    retval = (*__cuInit)(0);
+    if (retval) {
+        sprintf(buf, "NVIDIA drivers present but no GPUs found");
+        warnings.push_back(buf);
+        return;
+    }
+
+    int cuda_version;
+    retval = (*__cuDriverGetVersion)(&cuda_version);
+    if (retval) {
+        sprintf(buf, "cuDriverGetVersion() returned %d", retval);
+        warnings.push_back(buf);
+        return;
+    }
+
+    vector<COPROC_CUDA> gpus;
+    retval = (*__cuDeviceGetCount)(&count);
+    if (retval) {
+        sprintf(buf, "cuDeviceGetCount() returned %d", retval);
+        warnings.push_back(buf);
+        return;
+    }
+    sprintf(buf, "NVIDIA library reports %d GPU%s", count, (count==1)?"":"s");
+    warnings.push_back(buf);
+
+    int j;
+    unsigned int i;
+    COPROC_CUDA cc;
+    string s;
+    for (j=0; j<count; j++) {
+        memset(&cc.prop, 0, sizeof(cc.prop));
+        int device;
+        retval = (*__cuDeviceGet)(&device, j);
+        if (retval) {
+            sprintf(buf, "cuDeviceGet(%d) returned %d", j, retval);
+            warnings.push_back(buf);
+            return;
+        }
+        (*__cuDeviceGetName)(cc.prop.name, 256, device);
+        if (retval) {
+            sprintf(buf, "cuDeviceGetName(%d) returned %d", j, retval);
+            warnings.push_back(buf);
+            return;
+        }
+        (*__cuDeviceComputeCapability)(&cc.prop.major, &cc.prop.minor, device);
+        (*__cuDeviceTotalMem)(&cc.prop.totalGlobalMem, device);
+        (*__cuDeviceGetAttribute)(&cc.prop.sharedMemPerBlock, CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK, device);
+        (*__cuDeviceGetAttribute)(&cc.prop.regsPerBlock, CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK, device);
+        (*__cuDeviceGetAttribute)(&cc.prop.warpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE, device);
+        (*__cuDeviceGetAttribute)(&cc.prop.memPitch, CU_DEVICE_ATTRIBUTE_MAX_PITCH, device);
+        retval = (*__cuDeviceGetAttribute)(&cc.prop.maxThreadsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, device);
+        retval = (*__cuDeviceGetAttribute)(&cc.prop.maxThreadsDim[0], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, device);
+        (*__cuDeviceGetAttribute)(&cc.prop.maxThreadsDim[1], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, device);
+        (*__cuDeviceGetAttribute)(&cc.prop.maxThreadsDim[2], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, device);
+        (*__cuDeviceGetAttribute)(&cc.prop.maxGridSize[0], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, device);
+        (*__cuDeviceGetAttribute)(&cc.prop.maxGridSize[1], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, device);
+        (*__cuDeviceGetAttribute)(&cc.prop.maxGridSize[2], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, device);
+        (*__cuDeviceGetAttribute)(&cc.prop.clockRate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, device);
+        (*__cuDeviceGetAttribute)(&cc.prop.totalConstMem, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, device);
+        (*__cuDeviceGetAttribute)(&cc.prop.textureAlignment, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, device);
+        (*__cuDeviceGetAttribute)(&cc.prop.deviceOverlap, CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, device);
+        retval = (*__cuDeviceGetAttribute)(&cc.prop.multiProcessorCount, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device);
+        //retval = (*__cuDeviceGetProperties)(&cc.prop, device);
+        if (cc.prop.major <= 0) continue;  // major == 0 means emulation
+        if (cc.prop.major > 100) continue;  // e.g. 9999 is an error
+#if defined(_WIN32) && !defined(SIM)
+        cc.display_driver_version = Version.drvVersion;
+#else
+        cc.display_driver_version = 0;
+#endif
+        cc.cuda_version = cuda_version;
+        cc.device_num = j;
+        gpus.push_back(cc);
+    }
+
+    if (!gpus.size()) {
+        warnings.push_back("No CUDA-capable NVIDIA GPUs found");
+        return;
+    }
+
+    // identify the most capable non-ignored instance
+    //
+    COPROC_CUDA best;
+    bool first = true;
+    for (i=0; i<gpus.size(); i++) {
+        if (in_vector(gpus[i].device_num, ignore_devs)) continue;
+        if (first) {
+            best = gpus[i];
+            first = false;
+        } else if (cuda_compare(gpus[i], best, false) > 0) {
+            best = gpus[i];
+        }
+    }
+
+    // see which other instances are equivalent,
+    // and set the "count" and "device_nums" fields
+    //
+    best.count = 0;
+    for (i=0; i<gpus.size(); i++) {
+        char buf2[256];
+        gpus[i].description(buf);
+        if (in_vector(gpus[i].device_num, ignore_devs)) {
+            sprintf(buf2, "NVIDIA GPU %d (ignored by config): %s", gpus[i].device_num, buf);
+        } else if (use_all || !cuda_compare(gpus[i], best, true)) {
+            best.device_nums[best.count] = gpus[i].device_num;
+            best.count++;
+            sprintf(buf2, "NVIDIA GPU %d: %s", gpus[i].device_num, buf);
+        } else {
+            sprintf(buf2, "NVIDIA GPU %d (not used): %s", gpus[i].device_num, buf);
+        }
+        descs.push_back(string(buf2));
+    }
+
+    if (best.count) {
+        *this = best;
+    }
+}
+
+// fake a NVIDIA GPU (for debugging)
+//
+void COPROC_CUDA::fake(double ram, int count) {
+   strcpy(type, "CUDA");
+   count = count;
+   for (int i=0; i<count; i++) {
+       device_nums[i] = i;
+   }
+   display_driver_version = 18000;
+   cuda_version = 2020;
+   strcpy(prop.name, "Fake NVIDIA GPU");
+   prop.totalGlobalMem = (unsigned int)ram;
+   prop.sharedMemPerBlock = 100;
+   prop.regsPerBlock = 8;
+   prop.warpSize = 10;
+   prop.memPitch = 10;
+   prop.maxThreadsPerBlock = 20;
+   prop.maxThreadsDim[0] = 2;
+   prop.maxThreadsDim[1] = 2;
+   prop.maxThreadsDim[2] = 2;
+   prop.maxGridSize[0] = 10;
+   prop.maxGridSize[1] = 10;
+   prop.maxGridSize[2] = 10;
+   prop.totalConstMem = 10;
+   prop.major = 1;
+   prop.minor = 2;
+   prop.clockRate = 1250000;
+   prop.textureAlignment = 1000;
+   prop.multiProcessorCount = 14;
+}
+
+// See how much RAM is available on each GPU.
+// If this fails, set "available_ram_unknown"
+//
+void COPROC_CUDA::get_available_ram() {
+#ifdef MEASURE_AVAILABLE_RAM
+    int device, i, retval;
+    unsigned int memfree, memtotal;
+    unsigned int ctx;
+    
+    // avoid crash if faked GPU
+    //
+    if (!__cuDeviceGet) {
+        for (i=0; i<count; i++) {
+            available_ram[i] = available_ram_fake[i];
+            available_ram_unknown[i] = false;
+        }
+        return;
+    }
+    for (i=0; i<count; i++) {
+        int devnum = device_nums[i];
+        available_ram[i] = 0;
+        available_ram_unknown[i] = true;
+        retval = (*__cuDeviceGet)(&device, devnum);
+        if (retval) {
+            if (log_flags.coproc_debug) {
+                msg_printf(0, MSG_INFO,
+                    "[coproc] cuDeviceGet(%d) returned %d", devnum, retval
+                );
+            }
+            continue;
+        }
+        retval = (*__cuCtxCreate)(&ctx, 0, device);
+        if (retval) {
+            if (log_flags.coproc_debug) {
+                msg_printf(0, MSG_INFO,
+                    "[coproc] cuCtxCreate(%d) returned %d", devnum, retval
+                );
+            }
+            continue;
+        }
+        retval = (*__cuMemGetInfo)(&memfree, &memtotal);
+        if (retval) {
+            if (log_flags.coproc_debug) {
+                msg_printf(0, MSG_INFO,
+                    "[coproc] cuMemGetInfo(%d) returned %d", devnum, retval
+                );
+            }
+            (*__cuCtxDestroy)(ctx);
+            continue;
+        }
+        (*__cuCtxDestroy)(ctx);
+        available_ram[i] = (double) memfree;
+        available_ram_unknown[i] = false;
+    }
+#else
+    for (int i=0; i<count; i++) {
+        available_ram_unknown[i] = false;
+        available_ram[i] = prop.totalGlobalMem;
+    }
+#endif
+}
+
+// check whether each GPU is running a graphics app (assume yes)
+// return true if there's been a change since last time
+//
+bool COPROC_CUDA::check_running_graphics_app() {
+    int retval, j;
+    bool change = false;
+    for (j=0; j<count; j++) {
+        bool new_val = true;
+        int device, kernel_timeout;
+        retval = (*__cuDeviceGet)(&device, j);
+        if (!retval) {
+            retval = (*__cuDeviceGetAttribute)(&kernel_timeout, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, device);
+            if (!retval && !kernel_timeout) {
+                new_val = false;
+            }
+        }
+        if (new_val != running_graphics_app[j]) {
+            change = true;
+        }
+        running_graphics_app[j] = new_val;
+    }
+    return change;
+}
+
+////////////////// ATI STARTS HERE /////////////////
+//
+// Docs:
+// http://developer.amd.com/gpu_assets/Stream_Computing_User_Guide.pdf
+// ?? why don't they have HTML docs??
+
+#ifdef _WIN32
+typedef int (__stdcall *ATI_ATTRIBS) (CALdeviceattribs *attribs, CALuint ordinal);
+typedef int (__stdcall *ATI_CLOSE)(void);
+typedef int (__stdcall *ATI_GDC)(CALuint *numDevices);
+typedef int (__stdcall *ATI_GDI)(void);
+typedef int (__stdcall *ATI_INFO) (CALdeviceinfo *info, CALuint ordinal);
+typedef int (__stdcall *ATI_VER) (CALuint *cal_major, CALuint *cal_minor, CALuint *cal_imp);
+typedef int (__stdcall *ATI_STATUS) (CALdevicestatus*, CALdevice);
+typedef int (__stdcall *ATI_DEVICEOPEN) (CALdevice*, CALuint);
+typedef int (__stdcall *ATI_DEVICECLOSE) (CALdevice);
+
+ATI_ATTRIBS __calDeviceGetAttribs = NULL;
+ATI_CLOSE   __calShutdown = NULL;
+ATI_GDC     __calDeviceGetCount = NULL;
+ATI_GDI     __calInit = NULL;
+ATI_INFO    __calDeviceGetInfo = NULL;
+ATI_VER     __calGetVersion = NULL;
+ATI_STATUS  __calDeviceGetStatus = NULL;
+ATI_DEVICEOPEN  __calDeviceOpen = NULL;
+ATI_DEVICECLOSE  __calDeviceClose = NULL;
+
+#else
+
+int (*__calInit)();
+int (*__calGetVersion)(CALuint*, CALuint*, CALuint*);
+int (*__calDeviceGetCount)(CALuint*);
+int (*__calDeviceGetAttribs)(CALdeviceattribs*, CALuint);
+int (*__calShutdown)();
+int (*__calDeviceGetInfo)(CALdeviceinfo*, CALuint);
+int (*__calDeviceGetStatus)(CALdevicestatus*, CALdevice);
+int (*__calDeviceOpen)(CALdevice*, CALuint);
+int (*__calDeviceClose)(CALdevice);
+
+#endif
+
+void COPROC_ATI::get(
+    vector<string>& descs, vector<string>& warnings, vector<int>& ignore_devs
+) {
+    CALuint numDevices, cal_major, cal_minor, cal_imp;
+    CALdevice device;
+    CALdeviceinfo info;
+    CALdeviceattribs attribs;
+    char buf[256];
+    bool amdrt_detected = false;
+    bool atirt_detected = false;
+    int retval;
+
+    attribs.struct_size = sizeof(CALdeviceattribs);
+    device = 0;
+    numDevices =0;
+
+#ifdef _WIN32
+
+#if defined _M_X64
+    const char* atilib_name = "aticalrt64.dll";
+    const char* amdlib_name = "amdcalrt64.dll";
+#else
+    const char* atilib_name = "aticalrt.dll";
+    const char* amdlib_name = "amdcalrt.dll";
+#endif
+
+    HINSTANCE callib = LoadLibrary(atilib_name);
+    if (callib) {
+        atirt_detected = true;
+    } else {
+        callib = LoadLibrary(amdlib_name);
+        if (callib) {
+            amdrt_detected = true;
+        }
+    }
+
+    if (!callib) {
+        warnings.push_back("No ATI library found.");
+        return;
+    }
+
+    __calInit = (ATI_GDI)GetProcAddress(callib, "calInit" );
+    __calGetVersion = (ATI_VER)GetProcAddress(callib, "calGetVersion" );
+    __calDeviceGetCount = (ATI_GDC)GetProcAddress(callib, "calDeviceGetCount" );
+    __calDeviceGetAttribs =(ATI_ATTRIBS)GetProcAddress(callib, "calDeviceGetAttribs" );
+    __calShutdown = (ATI_CLOSE)GetProcAddress(callib, "calShutdown" );
+    __calDeviceGetInfo = (ATI_INFO)GetProcAddress(callib, "calDeviceGetInfo" );
+    __calDeviceGetStatus = (ATI_STATUS)GetProcAddress(callib, "calDeviceGetStatus" );
+    __calDeviceOpen = (ATI_DEVICEOPEN)GetProcAddress(callib, "calDeviceOpen" );
+    __calDeviceClose = (ATI_DEVICECLOSE)GetProcAddress(callib, "calDeviceClose" );
+
+#else
+
+    void* callib;
+
+    callib = dlopen("libaticalrt.so", RTLD_NOW);
+    if (!callib) {
+        warnings.push_back("No ATI library found");
+        return;
+    }
+
+    atirt_detected = true;
+
+    __calInit = (int(*)()) dlsym(callib, "calInit");
+    __calGetVersion = (int(*)(CALuint*, CALuint*, CALuint*)) dlsym(callib, "calGetVersion");
+    __calDeviceGetCount = (int(*)(CALuint*)) dlsym(callib, "calDeviceGetCount");
+    __calDeviceGetAttribs = (int(*)(CALdeviceattribs*, CALuint)) dlsym(callib, "calDeviceGetAttribs");
+    __calShutdown = (int(*)()) dlsym(callib, "calShutdown");
+    __calDeviceGetInfo = (int(*)(CALdeviceinfo*, CALuint)) dlsym(callib, "calDeviceGetInfo");
+    __calDeviceGetStatus = (int(*)(CALdevicestatus*, CALdevice)) dlsym(callib, "calDeviceGetStatus");
+    __calDeviceOpen = (int(*)(CALdevice*, CALuint)) dlsym(callib, "calDeviceOpen");
+    __calDeviceClose = (int(*)(CALdevice)) dlsym(callib, "calDeviceClose");
+
+#endif
+
+    if (!__calInit) {
+        warnings.push_back("calInit() missing from CAL library");
+        return;
+    }
+    if (!__calGetVersion) {
+        warnings.push_back("calGetVersion() missing from CAL library");
+        return;
+    }
+    if (!__calDeviceGetCount) {
+        warnings.push_back("calDeviceGetCount() missing from CAL library");
+        return;
+    }
+    if (!__calDeviceGetAttribs) {
+        warnings.push_back("calDeviceGetAttribs() missing from CAL library");
+        return;
+    }
+    if (!__calDeviceGetInfo) {
+        warnings.push_back("calDeviceGetInfo() missing from CAL library");
+        return;
+    }
+    if (!__calDeviceGetStatus) {
+        warnings.push_back("calDeviceGetStatus() missing from CAL library");
+        return;
+    }
+    if (!__calDeviceOpen) {
+        warnings.push_back("calDeviceOpen() missing from CAL library");
+        return;
+    }
+    if (!__calDeviceClose) {
+        warnings.push_back("calDeviceClose() missing from CAL library");
+        return;
+    }
+
+    retval = (*__calInit)();
+    if (retval != CAL_RESULT_OK) {
+        sprintf(buf, "calInit() returned %d", retval);
+        warnings.push_back(buf);
+        return;
+    }
+    retval = (*__calDeviceGetCount)(&numDevices);
+    if (retval != CAL_RESULT_OK) {
+        sprintf(buf, "calDeviceGetCount() returned %d", retval);
+        warnings.push_back(buf);
+        return;
+    }
+    retval = (*__calGetVersion)(&cal_major, &cal_minor, &cal_imp);
+    if (retval != CAL_RESULT_OK) {
+        sprintf(buf, "calGetVersion() returned %d", retval);
+        warnings.push_back(buf);
+        return;
+    }
+
+    if (!numDevices) {
+        warnings.push_back("No usable CAL devices found");
+        return;
+    }
+
+    COPROC_ATI cc, cc2;
+    string s, gpu_name;
+    vector<COPROC_ATI> gpus;
+    for (CALuint i=0; i<numDevices; i++) {
+        retval = (*__calDeviceGetInfo)(&info, i);
+        if (retval != CAL_RESULT_OK) {
+            sprintf(buf, "calDeviceGetInfo() returned %d", retval);
+            warnings.push_back(buf);
+            return;
+        }
+        retval = (*__calDeviceGetAttribs)(&attribs, i);
+        if (retval != CAL_RESULT_OK) {
+            sprintf(buf, "calDeviceGetAttribs() returned %d", retval);
+            warnings.push_back(buf);
+            return;
+        }
+        switch ((int)attribs.target) {
+        case CAL_TARGET_600:
+            gpu_name="ATI Radeon HD 2900 (RV600)";
+            break;
+        case CAL_TARGET_610:
+            gpu_name="ATI Radeon HD 2300/2400/3200 (RV610)";
+            attribs.numberOfSIMD=1;        // set correct values (reported wrong by driver)
+            attribs.wavefrontSize=32;
+            break;
+        case CAL_TARGET_630:
+            gpu_name="ATI Radeon HD 2600 (RV630)";
+            // set correct values (reported wrong by driver)
+            attribs.numberOfSIMD=3;
+            attribs.wavefrontSize=32;
+            break;
+        case CAL_TARGET_670:
+            gpu_name="ATI Radeon HD 3800 (RV670)";
+            break;
+        case CAL_TARGET_710:
+            gpu_name="ATI Radeon HD 4350/4550 (R710)";
+            break;
+        case CAL_TARGET_730:
+            gpu_name="ATI Radeon HD 4600 series (R730)";
+            break;
+        case CAL_TARGET_7XX:
+            gpu_name="ATI Radeon (RV700 class)";
+            break;
+        case CAL_TARGET_770:
+            gpu_name="ATI Radeon HD 4700/4800 (RV740/RV770)";
+            break;
+        case 8:
+            gpu_name="ATI Radeon HD5800 series (Cypress)";
+            break;
+        case 9:
+            gpu_name="ATI Radeon HD5700 series (Juniper)";
+            break;
+        case 10:
+            gpu_name="ATI Radeon HD5x00 series (Redwood)";
+            break;
+        case 11:
+            gpu_name="ATI Radeon HD5x00 series (Cedar)";
+            break;
+        default:
+            gpu_name="ATI unknown";
+            break;
+        }
+        cc.attribs = attribs;
+        cc.info = info;
+        strcpy(cc.name, gpu_name.c_str());
+        sprintf(cc.version, "%d.%d.%d", cal_major, cal_minor, cal_imp);
+        cc.amdrt_detected = amdrt_detected;
+        cc.atirt_detected = atirt_detected;
+        cc.device_num = i;
+        gpus.push_back(cc);
+    }
+
+    // TODO: count only GPUs with as much memory as fastest one,
+    // same as for NVIDIA
+
+    COPROC_ATI best;
+    bool first = true;
+    for (unsigned int i=0; i<gpus.size(); i++) {
+        char buf[256], buf2[256];
+        gpus[i].description(buf);
+        if (in_vector(gpus[i].device_num, ignore_devs)) {
+            sprintf(buf2, "ATI GPU %d (ignored by config): %s", gpus[i].device_num, buf);
+        } else {
+            if (first) {
+                best = gpus[i];
+                first = false;
+            } else if (gpus[i].peak_flops() > best.peak_flops()) {
+                best = gpus[i];
+            }
+            sprintf(buf2, "ATI GPU %d: %s", gpus[i].device_num, buf);
+        }
+        descs.push_back(buf2);
+    }
+    best.count = 0;
+    for (unsigned int i=0; i<gpus.size(); i++) {
+        if (in_vector(gpus[i].device_num, ignore_devs)) continue;
+        best.device_nums[best.count] = i;
+        best.count++;
+    }
+
+    *this = best;
+    strcpy(type, "ATI");
+
+    // shut down, otherwise Lenovo won't be able to switch to low-power GPU
+    //
+    retval = (*__calShutdown)();
+}
+
+void COPROC_ATI::fake(double ram, int count) {
+    strcpy(type, "ATI");
+    strcpy(version, "1.4.3");
+    strcpy(name, "foobar");
+    count = count;
+    memset(&attribs, 0, sizeof(attribs));
+    memset(&info, 0, sizeof(info));
+    attribs.localRAM = (int)(ram/MEGA);
+    attribs.numberOfSIMD = 32;
+    attribs.wavefrontSize = 32;
+    attribs.engineClock = 50;
+    for (int i=0; i<count; i++) {
+        device_nums[i] = i;
+    }
+}
+
+void COPROC_ATI::get_available_ram() {
+#ifdef MEASURE_AVAILABLE_RAM
+    CALdevicestatus st;
+    CALdevice dev;
+    int i, retval;
+
+    st.struct_size = sizeof(CALdevicestatus);
+
+    // avoid crash if faked GPU
+    if (!__calInit) {
+        for (i=0; i<count; i++) {
+            available_ram[i] = available_ram_fake[i];
+            available_ram_unknown[i] = false;
+        }
+        return;
+    }
+    for (i=0; i<count; i++) {
+        available_ram[i] = 0;
+        available_ram_unknown[i] = true;
+    }
+    retval = (*__calInit)();
+    if (retval) {
+        if (log_flags.coproc_debug) {
+            msg_printf(0, MSG_INFO,
+                "[coproc] calInit() returned %d", retval
+            );
+        }
+        return;
+    }
+
+    for (i=0; i<count; i++) {
+        int devnum = device_nums[i];
+        retval = (*__calDeviceOpen)(&dev, devnum);
+        if (retval) {
+            if (log_flags.coproc_debug) {
+                msg_printf(0, MSG_INFO,
+                    "[coproc] calDeviceOpen(%d) returned %d", devnum, retval
+                );
+            }
+            continue;
+        }
+        retval = (*__calDeviceGetStatus)(&st, dev);
+        if (retval) {
+            if (log_flags.coproc_debug) {
+                msg_printf(0, MSG_INFO,
+                    "[coproc] calDeviceGetStatus(%d) returned %d",
+                    devnum, retval
+                );
+            }
+            (*__calDeviceClose)(dev);
+            continue;
+        }
+        available_ram[i] = st.availLocalRAM*MEGA;
+        available_ram_unknown[i] = false;
+        (*__calDeviceClose)(dev);
+    }
+    (*__calShutdown)();
+#else
+    for (int i=0; i<count; i++) {
+        available_ram_unknown[i] = false;
+        available_ram[i] = attribs.localRAM*MEGA;
+    }
+#endif
+}
diff --git a/client/cpu_sched.cpp b/client/cpu_sched.cpp
index f5b03f3bf2..66bfcfda5c 100644
--- a/client/cpu_sched.cpp
+++ b/client/cpu_sched.cpp
@@ -78,10 +78,6 @@ struct PROC_RESOURCES {
     double ram_left;
     COPROCS coprocs;
 
-    ~PROC_RESOURCES() {
-        coprocs.delete_coprocs();
-    }
-
     // should we stop scanning jobs?
     //
     inline bool stop_scan_cpu() {
@@ -130,14 +126,14 @@ struct PROC_RESOURCES {
         COPROC* cp2;
         if (av.ncudas) {
             x = av.ncudas;
-            cp2 = coprocs.lookup("CUDA");
+            cp2 = &gstate.host_info.coprocs.cuda;
         } else if (av.natis) {
             x = av.natis;
-            cp2 = coprocs.lookup("ATI");
+            cp2 = &gstate.host_info.coprocs.ati;
         } else {
             return true;
         }
-        if (!cp2) {
+        if (!cp2->count) {
             msg_printf(NULL, MSG_INTERNAL_ERROR,
                 "Missing a %s coprocessor", cp2->type
             );
@@ -162,10 +158,10 @@ struct PROC_RESOURCES {
         COPROC* cp2;
         if (av.ncudas) {
             x = av.ncudas;
-            cp2 = coprocs.lookup("CUDA");
+            cp2 = &gstate.host_info.coprocs.cuda;
         } else if (av.natis) {
             x = av.natis;
-            cp2 = coprocs.lookup("ATI");
+            cp2 = &gstate.host_info.coprocs.ati;
         } else {
             return;
         }
@@ -492,18 +488,18 @@ void CLIENT_STATE::reset_debt_accounting() {
     for (i=0; i<projects.size(); i++) {
         PROJECT* p = projects[i];
         p->cpu_pwf.reset_debt_accounting();
-        if (coproc_cuda) {
+        if (host_info.have_cuda()) {
             p->cuda_pwf.reset_debt_accounting();
         }
-        if (coproc_ati) {
+        if (host_info.have_ati()) {
             p->ati_pwf.reset_debt_accounting();
         }
     }
     cpu_work_fetch.reset_debt_accounting();
-    if (coproc_cuda) {
+    if (host_info.have_cuda()) {
         cuda_work_fetch.reset_debt_accounting();
     }
-    if (coproc_ati) {
+    if (host_info.have_ati()) {
         ati_work_fetch.reset_debt_accounting();
     }
     debt_interval_start = now;
@@ -548,11 +544,11 @@ void CLIENT_STATE::adjust_debts() {
 
     cpu_work_fetch.update_long_term_debts();
     cpu_work_fetch.update_short_term_debts();
-    if (coproc_cuda) {
+    if (host_info.have_cuda()) {
         cuda_work_fetch.update_long_term_debts();
         cuda_work_fetch.update_short_term_debts();
     }
-    if (coproc_ati) {
+    if (host_info.have_ati()) {
         ati_work_fetch.update_long_term_debts();
         ati_work_fetch.update_short_term_debts();
     }
@@ -1194,16 +1190,16 @@ static inline void assign_coprocs(vector<RESULT*>& jobs) {
 
     gstate.host_info.coprocs.clear_usage();
 #ifndef SIM
-    if (coproc_cuda) {
-        coproc_cuda->get_available_ram();
+    if (gstate.host_info.have_cuda()) {
+        gstate.host_info.coprocs.cuda.get_available_ram();
         if (log_flags.coproc_debug) {
-            coproc_cuda->print_available_ram();
+            gstate.host_info.coprocs.cuda.print_available_ram();
         }
     }
-    if (coproc_ati) {
-        coproc_ati->get_available_ram();
+    if (gstate.host_info.have_ati()) {
+        gstate.host_info.coprocs.ati.get_available_ram();
         if (log_flags.coproc_debug) {
-            coproc_ati->print_available_ram();
+            gstate.host_info.coprocs.ati.print_available_ram();
         }
     }
 #endif
@@ -1215,10 +1211,10 @@ static inline void assign_coprocs(vector<RESULT*>& jobs) {
         APP_VERSION* avp = rp->avp;
         if (avp->ncudas) {
             usage = avp->ncudas;
-            cp = coproc_cuda;
+            cp = &gstate.host_info.coprocs.cuda;
         } else if (avp->natis) {
             usage = avp->natis;
-            cp = coproc_ati;
+            cp = &gstate.host_info.coprocs.ati;
         } else {
             continue;
         }
@@ -1235,10 +1231,10 @@ static inline void assign_coprocs(vector<RESULT*>& jobs) {
         APP_VERSION* avp = rp->avp;
         if (avp->ncudas) {
             usage = avp->ncudas;
-            cp = coproc_cuda;
+            cp = &gstate.host_info.coprocs.cuda;
         } else if (avp->natis) {
             usage = avp->natis;
-            cp = coproc_ati;
+            cp = &gstate.host_info.coprocs.ati;
         } else {
             job_iter++;
             continue;
diff --git a/client/cs_scheduler.cpp b/client/cs_scheduler.cpp
index eb446c42d5..7ebb763cc8 100644
--- a/client/cs_scheduler.cpp
+++ b/client/cs_scheduler.cpp
@@ -220,18 +220,18 @@ int CLIENT_STATE::make_scheduler_request(PROJECT* p) {
 
     // copy request values from RSC_WORK_FETCH to COPROC
     //
-    if (coproc_cuda) {
-        coproc_cuda->req_secs = cuda_work_fetch.req_secs;
-        coproc_cuda->req_instances = cuda_work_fetch.req_instances;
-        coproc_cuda->estimated_delay = cuda_work_fetch.req_secs?cuda_work_fetch.busy_time_estimator.get_busy_time():0;
+    if (host_info.have_cuda()) {
+        host_info.coprocs.cuda.req_secs = cuda_work_fetch.req_secs;
+        host_info.coprocs.cuda.req_instances = cuda_work_fetch.req_instances;
+        host_info.coprocs.cuda.estimated_delay = cuda_work_fetch.req_secs?cuda_work_fetch.busy_time_estimator.get_busy_time():0;
     }
-    if (coproc_ati) {
-        coproc_ati->req_secs = ati_work_fetch.req_secs;
-        coproc_ati->req_instances = ati_work_fetch.req_instances;
-        coproc_ati->estimated_delay = ati_work_fetch.req_secs?ati_work_fetch.busy_time_estimator.get_busy_time():0;
+    if (host_info.have_ati()) {
+        host_info.coprocs.ati.req_secs = ati_work_fetch.req_secs;
+        host_info.coprocs.ati.req_instances = ati_work_fetch.req_instances;
+        host_info.coprocs.ati.estimated_delay = ati_work_fetch.req_secs?ati_work_fetch.busy_time_estimator.get_busy_time():0;
     }
 
-    if (host_info.coprocs.coprocs.size()) {
+    if (!host_info.coprocs.none()) {
         host_info.coprocs.write_xml(mf);
     }
 
@@ -823,8 +823,8 @@ int CLIENT_STATE::handle_scheduler_reply(PROJECT* project, char* scheduler_url)
         );
         if (!rp->avp) {
             msg_printf(project, MSG_INTERNAL_ERROR,
-                "No application found for task: %s %d %s; discarding",
-                rp->platform, rp->version_num, rp->plan_class
+                "No app version found for app %s platform %s ver %d class%s; discarding %s",
+                rp->wup->app->name, rp->platform, rp->version_num, rp->plan_class, rp->name
             );
             delete rp;
             continue;
@@ -861,13 +861,13 @@ int CLIENT_STATE::handle_scheduler_reply(PROJECT* project, char* scheduler_url)
                 "[sched_op] estimated total CPU task duration: %.0f seconds",
                 est_cpu_duration
             );
-            if (coproc_cuda) {
+            if (host_info.have_cuda()) {
                 msg_printf(project, MSG_INFO,
                     "[sched_op] estimated total NVIDIA GPU task duration: %.0f seconds",
                     est_cuda_duration
                 );
             }
-            if (coproc_ati) {
+            if (host_info.have_ati()) {
                 msg_printf(project, MSG_INFO,
                     "[sched_op] estimated total ATI GPU task duration: %.0f seconds",
                     est_ati_duration
diff --git a/client/cs_statefile.cpp b/client/cs_statefile.cpp
index 154e4df54d..3974f0f810 100644
--- a/client/cs_statefile.cpp
+++ b/client/cs_statefile.cpp
@@ -913,8 +913,8 @@ int CLIENT_STATE::write_state_gui(MIOFILE& f) {
         core_client_version.minor,
         core_client_version.release,
         executing_as_daemon?1:0,
-        coproc_cuda?1:0,
-        coproc_ati?1:0
+        host_info.have_cuda()?1:0,
+        host_info.have_ati()?1:0
     );
     for (i=0; i<platforms.size(); i++) {
         f.printf(
diff --git a/client/rr_sim.cpp b/client/rr_sim.cpp
index c462077e5a..1c2d280744 100644
--- a/client/rr_sim.cpp
+++ b/client/rr_sim.cpp
@@ -1,490 +1,490 @@
-// This file is part of BOINC.
-// http://boinc.berkeley.edu
-// Copyright (C) 2008 University of California
-//
-// BOINC is free software; you can redistribute it and/or modify it
-// under the terms of the GNU Lesser General Public License
-// as published by the Free Software Foundation,
-// either version 3 of the License, or (at your option) any later version.
-//
-// BOINC is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-// See the GNU Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public License
-// along with BOINC.  If not, see <http://www.gnu.org/licenses/>.
-
-// Simulate the processing of the current workload
-// (include jobs that are downloading)
-// with weighted round-robin (WRR) scheduling.
-//
-// For efficiency, we simulate an approximation of WRR.
-// We don't model time-slicing.
-// Instead we use a continuous model where, at a given point,
-// each project has a set of running jobs that uses at most all CPUs.
-// These jobs are assumed to run at a rate proportionate to their avg_ncpus,
-// and each project gets total CPU proportionate to its RRS.
-//
-// For coprocessors, we saturate the resource;
-// i.e. with 2 GPUs, we'd let a 1-GPU app and a 2-GPU app run together.
-// Otherwise, there'd be the possibility of computing
-// a nonzero shortfall inappropriately.
-//
-// Outputs are changes to global state:
-// - deadline misses (per-project count, per-result flag)
-//      Deadline misses are not counted for tasks
-//      that are too large to run in RAM right now.
-// - resource shortfalls (per-project and total)
-// - counts of resources idle now
-//
-
-#include "cpp.h"
-
-#ifdef _WIN32
-#include "boinc_win.h"
-#else
-#include "config.h"
-#endif
-
-#include "client_state.h"
-#include "coproc.h"
-#include "client_msgs.h"
-
-inline void rsc_string(RESULT* rp, char* buf) {
-    APP_VERSION* avp = rp->avp;
-    if (avp->ncudas) {
-        sprintf(buf, "%.2f CPU + %.2f NV", avp->avg_ncpus, avp->ncudas);
-    } else if (avp->natis) {
-        sprintf(buf, "%.2f CPU + %.2f ATI", avp->avg_ncpus, avp->natis);
-    } else {
-        sprintf(buf, "%.2f CPU", avp->avg_ncpus);
-    }
-}
-
-// this is here (rather than rr_sim.h) because its inline functions
-// refer to RESULT
-//
-struct RR_SIM_STATUS {
-    std::vector<RESULT*> active;
-    double active_ncpus;
-    double active_cudas;
-    double active_atis;
-
-    inline void activate(RESULT* rp, double when) {
-        PROJECT* p = rp->project;
-        if (log_flags.rr_simulation) {
-            char buf[256];
-            rsc_string(rp, buf);
-            msg_printf(p, MSG_INFO,
-                "[rr_sim] %.2f: starting %s (%s)",
-                when, rp->name, buf
-            );
-        }
-        active.push_back(rp);
-        cpu_work_fetch.sim_nused += rp->avp->avg_ncpus;
-        cuda_work_fetch.sim_nused += rp->avp->ncudas;
-        ati_work_fetch.sim_nused += rp->avp->natis;
-    }
-    // remove *rpbest from active set,
-    // and adjust FLOPS left for other results
-    //
-    inline void remove_active(RESULT* rpbest) {
-        vector<RESULT*>::iterator it = active.begin();
-        while (it != active.end()) {
-            RESULT* rp = *it;
-            if (rp == rpbest) {
-                it = active.erase(it);
-            } else {
-                rp->rrsim_flops_left -= rp->rrsim_flops*rpbest->rrsim_finish_delay;
-
-                // can be slightly less than 0 due to roundoff
-                //
-                if (rp->rrsim_flops_left < -1) {
-                    msg_printf(rp->project, MSG_INTERNAL_ERROR,
-                        "%s: negative FLOPs left %f", rp->name, rp->rrsim_flops_left
-                    );
-                }
-                if (rp->rrsim_flops_left < 0) {
-                    rp->rrsim_flops_left = 0;
-                }
-                it++;
-            }
-        }
-        cpu_work_fetch.sim_nused -= rpbest->avp->avg_ncpus;
-        cuda_work_fetch.sim_nused -= rpbest->avp->ncudas;
-        ati_work_fetch.sim_nused -= rpbest->avp->natis;
-    }
-
-    RR_SIM_STATUS() {
-        active_ncpus = 0;
-        active_cudas = 0;
-        active_atis = 0;
-    }
-    ~RR_SIM_STATUS() {}
-};
-
-void RR_SIM_PROJECT_STATUS::activate(RESULT* rp) {
-    active.push_back(rp);
-    rp->project->cpu_pwf.sim_nused += rp->avp->avg_ncpus;
-    rp->project->cuda_pwf.sim_nused += rp->avp->ncudas;
-    rp->project->ati_pwf.sim_nused += rp->avp->natis;
-}
-
-void RR_SIM_PROJECT_STATUS::remove_active(RESULT* rp) {
-    std::vector<RESULT*>::iterator it = active.begin();
-    while (it != active.end()) {
-        if (*it == rp) {
-            it = active.erase(it);
-        } else {
-            it++;
-        }
-    }
-    rp->project->cpu_pwf.sim_nused -= rp->avp->avg_ncpus;
-    rp->project->cuda_pwf.sim_nused -= rp->avp->ncudas;
-    rp->project->ati_pwf.sim_nused -= rp->avp->natis;
-}
-
-// estimate the rate (FLOPS) that this job will get long-term
-// with weighted round-robin scheduling
-//
-void set_rrsim_flops(RESULT* rp) {
-    // For coproc jobs, use app version estimate
-    //
-    if (rp->uses_coprocs()) {
-        rp->rrsim_flops = rp->avp->flops * gstate.overall_cpu_frac();
-        return;
-    }
-    PROJECT* p = rp->project;
-
-    // For CPU jobs, estimate how many CPU seconds per second this job would get
-    // running with other jobs of this project, ignoring other factors
-    //
-    double x = 1;
-    if (p->cpu_pwf.sim_nused > gstate.ncpus) {
-        x = gstate.ncpus/p->cpu_pwf.sim_nused;
-    }
-    double r1 = x*rp->avp->avg_ncpus;
-
-    // if the project's total CPU usage is more than its share, scale
-    //
-    double share_cpus = p->cpu_pwf.runnable_share*gstate.ncpus;
-    if (!share_cpus) share_cpus = gstate.ncpus;
-        // deal with projects w/ resource share = 0
-    double r2 = r1;
-    if (p->cpu_pwf.sim_nused > share_cpus) {
-        r2 *= (share_cpus / p->cpu_pwf.sim_nused);
-    }
-
-    // scale by overall CPU availability
-    //
-    double r3 = r2 * gstate.overall_cpu_frac();
-
-    rp->rrsim_flops = r3 * rp->avp->flops;
-#if 0
-    if (log_flags.rr_simulation) {
-        msg_printf(p, MSG_INFO,
-            "[rr_sim] set_rrsim_flops: %.2fG (r1 %.4f r2 %.4f r3 %.4f)",
-            rp->rrsim_flops/1e9, r1, r2, r3
-        );
-    }
-#endif
-}
-
-void CLIENT_STATE::print_deadline_misses() {
-    unsigned int i;
-    RESULT* rp;
-    PROJECT* p;
-    for (i=0; i<results.size(); i++){
-        rp = results[i];
-        if (rp->rr_sim_misses_deadline) {
-            msg_printf(rp->project, MSG_INFO,
-                "[cpu_sched] Result %s projected to miss deadline.",
-                rp->name
-            );
-        }
-    }
-    for (i=0; i<projects.size(); i++) {
-        p = projects[i];
-        if (p->cpu_pwf.deadlines_missed) {
-            msg_printf(p, MSG_INFO,
-                "[cpu_sched] Project has %d projected CPU deadline misses",
-                p->cpu_pwf.deadlines_missed
-            );
-        }
-        if (p->cuda_pwf.deadlines_missed) {
-            msg_printf(p, MSG_INFO,
-                "[cpu_sched] Project has %d projected NVIDIA GPU deadline misses",
-                p->cuda_pwf.deadlines_missed
-            );
-        }
-        if (p->ati_pwf.deadlines_missed) {
-            msg_printf(p, MSG_INFO,
-                "[cpu_sched] Project has %d projected ATI GPU deadline misses",
-                p->ati_pwf.deadlines_missed
-            );
-        }
-    }
-}
-
-#if 0
-// compute a per-app-version "temporary DCF" based on the elapsed time
-// and fraction done of running jobs
-//
-void compute_temp_dcf() {
-    unsigned int i;
-    for (i=0; i<gstate.app_versions.size(); i++) {
-        gstate.app_versions[i]->temp_dcf = 1;
-    }
-    for (i=0; i<gstate.active_tasks.active_tasks.size(); i++) {
-        ACTIVE_TASK* atp = gstate.active_tasks.active_tasks[i];
-        double x = atp->est_dur(false) / atp->result->estimated_duration(false);
-        APP_VERSION* avp = atp->result->avp;
-        if (x < avp->temp_dcf) {
-            avp->temp_dcf = x;
-        }
-    }
-}
-#endif
-
-void CLIENT_STATE::rr_simulation() {
-    PROJECT* p, *pbest;
-    RESULT* rp, *rpbest;
-    RR_SIM_STATUS sim_status;
-    unsigned int i;
-
-    double ar = available_ram();
-
-    work_fetch.rr_init();
-    //compute_temp_dcf();
-
-    if (log_flags.rr_simulation) {
-        msg_printf(0, MSG_INFO,
-            "[rr_sim] rr_sim start: work_buf_total %.2f on_frac %.3f active_frac %.3f",
-            work_buf_total(), time_stats.on_frac, time_stats.active_frac
-        );
-    }
-
-    for (i=0; i<projects.size(); i++) {
-        p = projects[i];
-        if (p->non_cpu_intensive) continue;
-        p->rr_sim_status.clear();
-    }
-
-    // Decide what jobs to include in the simulation,
-    // and pick the ones that are initially running.
-    // NOTE: "results" is sorted by increasing arrival time
-    //
-    for (i=0; i<results.size(); i++) {
-        rp = results[i];
-        rp->rr_sim_misses_deadline = false;
-        if (!rp->nearly_runnable()) continue;
-        if (rp->some_download_stalled()) continue;
-        if (rp->project->non_cpu_intensive) continue;
-        rp->rrsim_flops_left = rp->estimated_flops_remaining();
-
-        //if (rp->rrsim_flops_left <= 0) continue;
-            // job may have fraction_done=1 but not be done;
-            // if it's past its deadline, we need to mark it as such
-
-        p = rp->project;
-        p->pwf.has_runnable_jobs = true;
-        p->cpu_pwf.nused_total += rp->avp->avg_ncpus;
-        if (rp->uses_cuda() && coproc_cuda) {
-            p->cuda_pwf.nused_total += rp->avp->ncudas;
-            p->cuda_pwf.has_runnable_jobs = true;
-            if (cuda_work_fetch.sim_nused < coproc_cuda->count) {
-                sim_status.activate(rp, 0);
-                p->rr_sim_status.activate(rp);
-            } else {
-                cuda_work_fetch.pending.push_back(rp);
-            }
-        } else if (rp->uses_ati() && coproc_ati) {
-            p->ati_pwf.nused_total += rp->avp->natis;
-            p->ati_pwf.has_runnable_jobs = true;
-            if (ati_work_fetch.sim_nused < coproc_ati->count) {
-                sim_status.activate(rp, 0);
-                p->rr_sim_status.activate(rp);
-            } else {
-                ati_work_fetch.pending.push_back(rp);
-            }
-        } else {
-            p->cpu_pwf.has_runnable_jobs = true;
-            if (p->cpu_pwf.sim_nused < ncpus) {
-                sim_status.activate(rp, 0);
-                p->rr_sim_status.activate(rp);
-            } else {
-                p->rr_sim_status.add_pending(rp);
-            }
-        }
-    }
-
-    // note the number of idle instances
-    //
-    cpu_work_fetch.nidle_now = ncpus - cpu_work_fetch.sim_nused;
-    if (cpu_work_fetch.nidle_now < 0) cpu_work_fetch.nidle_now = 0;
-    if (coproc_cuda) {
-        cuda_work_fetch.nidle_now = coproc_cuda->count - cuda_work_fetch.sim_nused;
-        if (cuda_work_fetch.nidle_now < 0) cuda_work_fetch.nidle_now = 0;
-    }
-    if (coproc_ati) {
-        ati_work_fetch.nidle_now = coproc_ati->count - ati_work_fetch.sim_nused;
-        if (ati_work_fetch.nidle_now < 0) ati_work_fetch.nidle_now = 0;
-    }
-
-    work_fetch.compute_shares();
-
-    // Simulation loop.  Keep going until all work done
-    //
-    double buf_end = now + work_buf_total();
-    double sim_now = now;
-    while (sim_status.active.size()) {
-
-        // compute finish times and see which result finishes first
-        //
-        rpbest = NULL;
-        for (i=0; i<sim_status.active.size(); i++) {
-            rp = sim_status.active[i];
-            set_rrsim_flops(rp);
-            //rp->rrsim_finish_delay = rp->avp->temp_dcf*rp->rrsim_flops_left/rp->rrsim_flops;
-            rp->rrsim_finish_delay = rp->rrsim_flops_left/rp->rrsim_flops;
-            if (!rpbest || rp->rrsim_finish_delay < rpbest->rrsim_finish_delay) {
-                rpbest = rp;
-            }
-        }
-
-        pbest = rpbest->project;
-
-        if (log_flags.rr_simulation) {
-            msg_printf(pbest, MSG_INFO,
-                "[rr_sim] %.2f: %s finishes after %.2f (%.2fG/%.2fG)",
-                sim_now - now,
-                rpbest->name, rpbest->rrsim_finish_delay,
-                rpbest->rrsim_flops_left/1e9, rpbest->rrsim_flops/1e9
-            );
-        }
-
-        // "rpbest" is first result to finish.  Does it miss its deadline?
-        //
-        double diff = (sim_now + rpbest->rrsim_finish_delay) - rpbest->computation_deadline();
-        if (diff > 0) {
-            ACTIVE_TASK* atp = lookup_active_task_by_result(rpbest);
-            if (atp && atp->procinfo.working_set_size_smoothed > ar) {
-                if (log_flags.rr_simulation) {
-                    msg_printf(pbest, MSG_INFO,
-                        "[rr_sim] %s misses deadline but too large to run",
-                        rpbest->name
-                    );
-                }
-            } else {
-                rpbest->rr_sim_misses_deadline = true;
-                if (rpbest->uses_cuda()) {
-                    pbest->cuda_pwf.deadlines_missed++;
-                    cuda_work_fetch.deadline_missed_instances += rpbest->avp->ncudas;
-                } else if (rpbest->uses_ati()) {
-                    pbest->ati_pwf.deadlines_missed++;
-                    ati_work_fetch.deadline_missed_instances += rpbest->avp->natis;
-                } else {
-                    pbest->cpu_pwf.deadlines_missed++;
-                    cpu_work_fetch.deadline_missed_instances += rpbest->avp->avg_ncpus;
-                }
-                if (log_flags.rr_simulation) {
-                    msg_printf(pbest, MSG_INFO,
-                        "[rr_sim] %s misses deadline by %.2f",
-                        rpbest->name, diff
-                    );
-                }
-            }
-        }
-
-        // update saturated time
-        //
-        double end_time = sim_now + rpbest->rrsim_finish_delay;
-        double x = end_time - gstate.now;
-        cpu_work_fetch.update_saturated_time(x);
-        if (coproc_cuda) {
-            cuda_work_fetch.update_saturated_time(x);
-        }
-        if (coproc_ati) {
-            ati_work_fetch.update_saturated_time(x);
-        }
-
-        // update busy time
-        //
-        if (rpbest->rr_sim_misses_deadline) {
-            double dur = rpbest->estimated_time_remaining(false) / gstate.overall_cpu_frac();
-            cpu_work_fetch.update_busy_time(dur, rpbest->avp->avg_ncpus);
-            if (rpbest->uses_cuda()) {
-                cuda_work_fetch.update_busy_time(dur, rpbest->avp->ncudas);
-            }
-            if (rpbest->uses_ati()) {
-                ati_work_fetch.update_busy_time(dur, rpbest->avp->natis);
-            }
-        }
-
-        // increment resource shortfalls
-        //
-        if (sim_now < buf_end) {
-            if (end_time > buf_end) end_time = buf_end;
-            double d_time = end_time - sim_now;
-
-            cpu_work_fetch.accumulate_shortfall(d_time);
-
-            if (coproc_cuda) {
-                cuda_work_fetch.accumulate_shortfall(d_time);
-            }
-            if (coproc_ati) {
-                ati_work_fetch.accumulate_shortfall(d_time);
-            }
-        }
-
-        sim_status.remove_active(rpbest);
-        pbest->rr_sim_status.remove_active(rpbest);
-
-        sim_now += rpbest->rrsim_finish_delay;
-
-        // start new jobs; may need to start more than one
-        // if this job used multiple resource instances
-        //
-        if (rpbest->uses_cuda()) {
-            while (1) {
-                if (cuda_work_fetch.sim_nused >= coproc_cuda->count) break;
-                if (!cuda_work_fetch.pending.size()) break;
-                RESULT* rp = cuda_work_fetch.pending[0];
-                cuda_work_fetch.pending.erase(cuda_work_fetch.pending.begin());
-                sim_status.activate(rp, sim_now-now);
-                pbest->rr_sim_status.activate(rp);
-            }
-        } else if (rpbest->uses_ati()) {
-            while (1) {
-                if (ati_work_fetch.sim_nused >= coproc_ati->count) break;
-                if (!ati_work_fetch.pending.size()) break;
-                RESULT* rp = ati_work_fetch.pending[0];
-                ati_work_fetch.pending.erase(ati_work_fetch.pending.begin());
-                sim_status.activate(rp, sim_now-now);
-                pbest->rr_sim_status.activate(rp);
-            }
-        } else {
-            while (1) {
-                if (pbest->cpu_pwf.sim_nused >= ncpus) break;
-                RESULT* rp = pbest->rr_sim_status.get_pending();
-                if (!rp) break;
-                sim_status.activate(rp, sim_now-now);
-                pbest->rr_sim_status.activate(rp);
-            }
-        }
-    }
-
-    // if simulation ends before end of buffer, take the tail into account
-    //
-    if (sim_now < buf_end) {
-        double d_time = buf_end - sim_now;
-        cpu_work_fetch.accumulate_shortfall(d_time);
-        if (coproc_cuda) {
-            cuda_work_fetch.accumulate_shortfall(d_time);
-        }
-        if (coproc_ati) {
-            ati_work_fetch.accumulate_shortfall(d_time);
-        }
-    }
-}
+// This file is part of BOINC.
+// http://boinc.berkeley.edu
+// Copyright (C) 2008 University of California
+//
+// BOINC is free software; you can redistribute it and/or modify it
+// under the terms of the GNU Lesser General Public License
+// as published by the Free Software Foundation,
+// either version 3 of the License, or (at your option) any later version.
+//
+// BOINC is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+// See the GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with BOINC.  If not, see <http://www.gnu.org/licenses/>.
+
+// Simulate the processing of the current workload
+// (include jobs that are downloading)
+// with weighted round-robin (WRR) scheduling.
+//
+// For efficiency, we simulate an approximation of WRR.
+// We don't model time-slicing.
+// Instead we use a continuous model where, at a given point,
+// each project has a set of running jobs that uses at most all CPUs.
+// These jobs are assumed to run at a rate proportionate to their avg_ncpus,
+// and each project gets total CPU proportionate to its RRS.
+//
+// For coprocessors, we saturate the resource;
+// i.e. with 2 GPUs, we'd let a 1-GPU app and a 2-GPU app run together.
+// Otherwise, there'd be the possibility of computing
+// a nonzero shortfall inappropriately.
+//
+// Outputs are changes to global state:
+// - deadline misses (per-project count, per-result flag)
+//      Deadline misses are not counted for tasks
+//      that are too large to run in RAM right now.
+// - resource shortfalls (per-project and total)
+// - counts of resources idle now
+//
+
+#include "cpp.h"
+
+#ifdef _WIN32
+#include "boinc_win.h"
+#else
+#include "config.h"
+#endif
+
+#include "client_state.h"
+#include "coproc.h"
+#include "client_msgs.h"
+
+inline void rsc_string(RESULT* rp, char* buf) {
+    APP_VERSION* avp = rp->avp;
+    if (avp->ncudas) {
+        sprintf(buf, "%.2f CPU + %.2f NV", avp->avg_ncpus, avp->ncudas);
+    } else if (avp->natis) {
+        sprintf(buf, "%.2f CPU + %.2f ATI", avp->avg_ncpus, avp->natis);
+    } else {
+        sprintf(buf, "%.2f CPU", avp->avg_ncpus);
+    }
+}
+
+// this is here (rather than rr_sim.h) because its inline functions
+// refer to RESULT
+//
+struct RR_SIM_STATUS {
+    std::vector<RESULT*> active;
+    double active_ncpus;
+    double active_cudas;
+    double active_atis;
+
+    inline void activate(RESULT* rp, double when) {
+        PROJECT* p = rp->project;
+        if (log_flags.rr_simulation) {
+            char buf[256];
+            rsc_string(rp, buf);
+            msg_printf(p, MSG_INFO,
+                "[rr_sim] %.2f: starting %s (%s)",
+                when, rp->name, buf
+            );
+        }
+        active.push_back(rp);
+        cpu_work_fetch.sim_nused += rp->avp->avg_ncpus;
+        cuda_work_fetch.sim_nused += rp->avp->ncudas;
+        ati_work_fetch.sim_nused += rp->avp->natis;
+    }
+    // remove *rpbest from active set,
+    // and adjust FLOPS left for other results
+    //
+    inline void remove_active(RESULT* rpbest) {
+        vector<RESULT*>::iterator it = active.begin();
+        while (it != active.end()) {
+            RESULT* rp = *it;
+            if (rp == rpbest) {
+                it = active.erase(it);
+            } else {
+                rp->rrsim_flops_left -= rp->rrsim_flops*rpbest->rrsim_finish_delay;
+
+                // can be slightly less than 0 due to roundoff
+                //
+                if (rp->rrsim_flops_left < -1) {
+                    msg_printf(rp->project, MSG_INTERNAL_ERROR,
+                        "%s: negative FLOPs left %f", rp->name, rp->rrsim_flops_left
+                    );
+                }
+                if (rp->rrsim_flops_left < 0) {
+                    rp->rrsim_flops_left = 0;
+                }
+                it++;
+            }
+        }
+        cpu_work_fetch.sim_nused -= rpbest->avp->avg_ncpus;
+        cuda_work_fetch.sim_nused -= rpbest->avp->ncudas;
+        ati_work_fetch.sim_nused -= rpbest->avp->natis;
+    }
+
+    RR_SIM_STATUS() {
+        active_ncpus = 0;
+        active_cudas = 0;
+        active_atis = 0;
+    }
+    ~RR_SIM_STATUS() {}
+};
+
+void RR_SIM_PROJECT_STATUS::activate(RESULT* rp) {
+    active.push_back(rp);
+    rp->project->cpu_pwf.sim_nused += rp->avp->avg_ncpus;
+    rp->project->cuda_pwf.sim_nused += rp->avp->ncudas;
+    rp->project->ati_pwf.sim_nused += rp->avp->natis;
+}
+
+void RR_SIM_PROJECT_STATUS::remove_active(RESULT* rp) {
+    std::vector<RESULT*>::iterator it = active.begin();
+    while (it != active.end()) {
+        if (*it == rp) {
+            it = active.erase(it);
+        } else {
+            it++;
+        }
+    }
+    rp->project->cpu_pwf.sim_nused -= rp->avp->avg_ncpus;
+    rp->project->cuda_pwf.sim_nused -= rp->avp->ncudas;
+    rp->project->ati_pwf.sim_nused -= rp->avp->natis;
+}
+
+// estimate the rate (FLOPS) that this job will get long-term
+// with weighted round-robin scheduling
+//
+void set_rrsim_flops(RESULT* rp) {
+    // For coproc jobs, use app version estimate
+    //
+    if (rp->uses_coprocs()) {
+        rp->rrsim_flops = rp->avp->flops * gstate.overall_cpu_frac();
+        return;
+    }
+    PROJECT* p = rp->project;
+
+    // For CPU jobs, estimate how many CPU seconds per second this job would get
+    // running with other jobs of this project, ignoring other factors
+    //
+    double x = 1;
+    if (p->cpu_pwf.sim_nused > gstate.ncpus) {
+        x = gstate.ncpus/p->cpu_pwf.sim_nused;
+    }
+    double r1 = x*rp->avp->avg_ncpus;
+
+    // if the project's total CPU usage is more than its share, scale
+    //
+    double share_cpus = p->cpu_pwf.runnable_share*gstate.ncpus;
+    if (!share_cpus) share_cpus = gstate.ncpus;
+        // deal with projects w/ resource share = 0
+    double r2 = r1;
+    if (p->cpu_pwf.sim_nused > share_cpus) {
+        r2 *= (share_cpus / p->cpu_pwf.sim_nused);
+    }
+
+    // scale by overall CPU availability
+    //
+    double r3 = r2 * gstate.overall_cpu_frac();
+
+    rp->rrsim_flops = r3 * rp->avp->flops;
+#if 0
+    if (log_flags.rr_simulation) {
+        msg_printf(p, MSG_INFO,
+            "[rr_sim] set_rrsim_flops: %.2fG (r1 %.4f r2 %.4f r3 %.4f)",
+            rp->rrsim_flops/1e9, r1, r2, r3
+        );
+    }
+#endif
+}
+
+void CLIENT_STATE::print_deadline_misses() {
+    unsigned int i;
+    RESULT* rp;
+    PROJECT* p;
+    for (i=0; i<results.size(); i++){
+        rp = results[i];
+        if (rp->rr_sim_misses_deadline) {
+            msg_printf(rp->project, MSG_INFO,
+                "[cpu_sched] Result %s projected to miss deadline.",
+                rp->name
+            );
+        }
+    }
+    for (i=0; i<projects.size(); i++) {
+        p = projects[i];
+        if (p->cpu_pwf.deadlines_missed) {
+            msg_printf(p, MSG_INFO,
+                "[cpu_sched] Project has %d projected CPU deadline misses",
+                p->cpu_pwf.deadlines_missed
+            );
+        }
+        if (p->cuda_pwf.deadlines_missed) {
+            msg_printf(p, MSG_INFO,
+                "[cpu_sched] Project has %d projected NVIDIA GPU deadline misses",
+                p->cuda_pwf.deadlines_missed
+            );
+        }
+        if (p->ati_pwf.deadlines_missed) {
+            msg_printf(p, MSG_INFO,
+                "[cpu_sched] Project has %d projected ATI GPU deadline misses",
+                p->ati_pwf.deadlines_missed
+            );
+        }
+    }
+}
+
+#if 0
+// compute a per-app-version "temporary DCF" based on the elapsed time
+// and fraction done of running jobs
+//
+void compute_temp_dcf() {
+    unsigned int i;
+    for (i=0; i<gstate.app_versions.size(); i++) {
+        gstate.app_versions[i]->temp_dcf = 1;
+    }
+    for (i=0; i<gstate.active_tasks.active_tasks.size(); i++) {
+        ACTIVE_TASK* atp = gstate.active_tasks.active_tasks[i];
+        double x = atp->est_dur(false) / atp->result->estimated_duration(false);
+        APP_VERSION* avp = atp->result->avp;
+        if (x < avp->temp_dcf) {
+            avp->temp_dcf = x;
+        }
+    }
+}
+#endif
+
+void CLIENT_STATE::rr_simulation() {
+    PROJECT* p, *pbest;
+    RESULT* rp, *rpbest;
+    RR_SIM_STATUS sim_status;
+    unsigned int i;
+
+    double ar = available_ram();
+
+    work_fetch.rr_init();
+    //compute_temp_dcf();
+
+    if (log_flags.rr_simulation) {
+        msg_printf(0, MSG_INFO,
+            "[rr_sim] rr_sim start: work_buf_total %.2f on_frac %.3f active_frac %.3f",
+            work_buf_total(), time_stats.on_frac, time_stats.active_frac
+        );
+    }
+
+    for (i=0; i<projects.size(); i++) {
+        p = projects[i];
+        if (p->non_cpu_intensive) continue;
+        p->rr_sim_status.clear();
+    }
+
+    // Decide what jobs to include in the simulation,
+    // and pick the ones that are initially running.
+    // NOTE: "results" is sorted by increasing arrival time
+    //
+    for (i=0; i<results.size(); i++) {
+        rp = results[i];
+        rp->rr_sim_misses_deadline = false;
+        if (!rp->nearly_runnable()) continue;
+        if (rp->some_download_stalled()) continue;
+        if (rp->project->non_cpu_intensive) continue;
+        rp->rrsim_flops_left = rp->estimated_flops_remaining();
+
+        //if (rp->rrsim_flops_left <= 0) continue;
+            // job may have fraction_done=1 but not be done;
+            // if it's past its deadline, we need to mark it as such
+
+        p = rp->project;
+        p->pwf.has_runnable_jobs = true;
+        p->cpu_pwf.nused_total += rp->avp->avg_ncpus;
+        if (rp->uses_cuda() && host_info.have_cuda()) {
+            p->cuda_pwf.nused_total += rp->avp->ncudas;
+            p->cuda_pwf.has_runnable_jobs = true;
+            if (cuda_work_fetch.sim_nused < host_info.coprocs.cuda.count) {
+                sim_status.activate(rp, 0);
+                p->rr_sim_status.activate(rp);
+            } else {
+                cuda_work_fetch.pending.push_back(rp);
+            }
+        } else if (rp->uses_ati() && host_info.have_ati()) {
+            p->ati_pwf.nused_total += rp->avp->natis;
+            p->ati_pwf.has_runnable_jobs = true;
+            if (ati_work_fetch.sim_nused < host_info.coprocs.ati.count) {
+                sim_status.activate(rp, 0);
+                p->rr_sim_status.activate(rp);
+            } else {
+                ati_work_fetch.pending.push_back(rp);
+            }
+        } else {
+            p->cpu_pwf.has_runnable_jobs = true;
+            if (p->cpu_pwf.sim_nused < ncpus) {
+                sim_status.activate(rp, 0);
+                p->rr_sim_status.activate(rp);
+            } else {
+                p->rr_sim_status.add_pending(rp);
+            }
+        }
+    }
+
+    // note the number of idle instances
+    //
+    cpu_work_fetch.nidle_now = ncpus - cpu_work_fetch.sim_nused;
+    if (cpu_work_fetch.nidle_now < 0) cpu_work_fetch.nidle_now = 0;
+    if (host_info.have_cuda()) {
+        cuda_work_fetch.nidle_now = host_info.coprocs.cuda.count - cuda_work_fetch.sim_nused;
+        if (cuda_work_fetch.nidle_now < 0) cuda_work_fetch.nidle_now = 0;
+    }
+    if (host_info.have_ati()) {
+        ati_work_fetch.nidle_now = host_info.coprocs.ati.count - ati_work_fetch.sim_nused;
+        if (ati_work_fetch.nidle_now < 0) ati_work_fetch.nidle_now = 0;
+    }
+
+    work_fetch.compute_shares();
+
+    // Simulation loop.  Keep going until all work done
+    //
+    double buf_end = now + work_buf_total();
+    double sim_now = now;
+    while (sim_status.active.size()) {
+
+        // compute finish times and see which result finishes first
+        //
+        rpbest = NULL;
+        for (i=0; i<sim_status.active.size(); i++) {
+            rp = sim_status.active[i];
+            set_rrsim_flops(rp);
+            //rp->rrsim_finish_delay = rp->avp->temp_dcf*rp->rrsim_flops_left/rp->rrsim_flops;
+            rp->rrsim_finish_delay = rp->rrsim_flops_left/rp->rrsim_flops;
+            if (!rpbest || rp->rrsim_finish_delay < rpbest->rrsim_finish_delay) {
+                rpbest = rp;
+            }
+        }
+
+        pbest = rpbest->project;
+
+        if (log_flags.rr_simulation) {
+            msg_printf(pbest, MSG_INFO,
+                "[rr_sim] %.2f: %s finishes after %.2f (%.2fG/%.2fG)",
+                sim_now - now,
+                rpbest->name, rpbest->rrsim_finish_delay,
+                rpbest->rrsim_flops_left/1e9, rpbest->rrsim_flops/1e9
+            );
+        }
+
+        // "rpbest" is first result to finish.  Does it miss its deadline?
+        //
+        double diff = (sim_now + rpbest->rrsim_finish_delay) - rpbest->computation_deadline();
+        if (diff > 0) {
+            ACTIVE_TASK* atp = lookup_active_task_by_result(rpbest);
+            if (atp && atp->procinfo.working_set_size_smoothed > ar) {
+                if (log_flags.rr_simulation) {
+                    msg_printf(pbest, MSG_INFO,
+                        "[rr_sim] %s misses deadline but too large to run",
+                        rpbest->name
+                    );
+                }
+            } else {
+                rpbest->rr_sim_misses_deadline = true;
+                if (rpbest->uses_cuda()) {
+                    pbest->cuda_pwf.deadlines_missed++;
+                    cuda_work_fetch.deadline_missed_instances += rpbest->avp->ncudas;
+                } else if (rpbest->uses_ati()) {
+                    pbest->ati_pwf.deadlines_missed++;
+                    ati_work_fetch.deadline_missed_instances += rpbest->avp->natis;
+                } else {
+                    pbest->cpu_pwf.deadlines_missed++;
+                    cpu_work_fetch.deadline_missed_instances += rpbest->avp->avg_ncpus;
+                }
+                if (log_flags.rr_simulation) {
+                    msg_printf(pbest, MSG_INFO,
+                        "[rr_sim] %s misses deadline by %.2f",
+                        rpbest->name, diff
+                    );
+                }
+            }
+        }
+
+        // update saturated time
+        //
+        double end_time = sim_now + rpbest->rrsim_finish_delay;
+        double x = end_time - gstate.now;
+        cpu_work_fetch.update_saturated_time(x);
+        if (host_info.have_cuda()) {
+            cuda_work_fetch.update_saturated_time(x);
+        }
+        if (host_info.have_ati()) {
+            ati_work_fetch.update_saturated_time(x);
+        }
+
+        // update busy time
+        //
+        if (rpbest->rr_sim_misses_deadline) {
+            double dur = rpbest->estimated_time_remaining(false) / gstate.overall_cpu_frac();
+            cpu_work_fetch.update_busy_time(dur, rpbest->avp->avg_ncpus);
+            if (rpbest->uses_cuda()) {
+                cuda_work_fetch.update_busy_time(dur, rpbest->avp->ncudas);
+            }
+            if (rpbest->uses_ati()) {
+                ati_work_fetch.update_busy_time(dur, rpbest->avp->natis);
+            }
+        }
+
+        // increment resource shortfalls
+        //
+        if (sim_now < buf_end) {
+            if (end_time > buf_end) end_time = buf_end;
+            double d_time = end_time - sim_now;
+
+            cpu_work_fetch.accumulate_shortfall(d_time);
+
+            if (host_info.have_cuda()) {
+                cuda_work_fetch.accumulate_shortfall(d_time);
+            }
+            if (host_info.have_ati()) {
+                ati_work_fetch.accumulate_shortfall(d_time);
+            }
+        }
+
+        sim_status.remove_active(rpbest);
+        pbest->rr_sim_status.remove_active(rpbest);
+
+        sim_now += rpbest->rrsim_finish_delay;
+
+        // start new jobs; may need to start more than one
+        // if this job used multiple resource instances
+        //
+        if (rpbest->uses_cuda()) {
+            while (1) {
+                if (cuda_work_fetch.sim_nused >= host_info.coprocs.cuda.count) break;
+                if (!cuda_work_fetch.pending.size()) break;
+                RESULT* rp = cuda_work_fetch.pending[0];
+                cuda_work_fetch.pending.erase(cuda_work_fetch.pending.begin());
+                sim_status.activate(rp, sim_now-now);
+                pbest->rr_sim_status.activate(rp);
+            }
+        } else if (rpbest->uses_ati()) {
+            while (1) {
+                if (ati_work_fetch.sim_nused >= host_info.coprocs.ati.count) break;
+                if (!ati_work_fetch.pending.size()) break;
+                RESULT* rp = ati_work_fetch.pending[0];
+                ati_work_fetch.pending.erase(ati_work_fetch.pending.begin());
+                sim_status.activate(rp, sim_now-now);
+                pbest->rr_sim_status.activate(rp);
+            }
+        } else {
+            while (1) {
+                if (pbest->cpu_pwf.sim_nused >= ncpus) break;
+                RESULT* rp = pbest->rr_sim_status.get_pending();
+                if (!rp) break;
+                sim_status.activate(rp, sim_now-now);
+                pbest->rr_sim_status.activate(rp);
+            }
+        }
+    }
+
+    // if simulation ends before end of buffer, take the tail into account
+    //
+    if (sim_now < buf_end) {
+        double d_time = buf_end - sim_now;
+        cpu_work_fetch.accumulate_shortfall(d_time);
+        if (host_info.have_cuda()) {
+            cuda_work_fetch.accumulate_shortfall(d_time);
+        }
+        if (host_info.have_ati()) {
+            ati_work_fetch.accumulate_shortfall(d_time);
+        }
+    }
+}
diff --git a/client/scheduler_op.cpp b/client/scheduler_op.cpp
index f382e6871f..a358f2a067 100644
--- a/client/scheduler_op.cpp
+++ b/client/scheduler_op.cpp
@@ -219,7 +219,7 @@ int SCHEDULER_OP::start_rpc(PROJECT* p) {
         );
         double gpu_req = cuda_work_fetch.req_secs + ati_work_fetch.req_secs;
         if (cpu_work_fetch.req_secs || gpu_req) {
-            if (coproc_cuda||coproc_ati) {
+            if (gstate.host_info.have_cuda()||gstate.host_info.have_ati()) {
                 if (cpu_work_fetch.req_secs && gpu_req) {
                     sprintf(buf, " for CPU and GPU");
                 } else if (cpu_work_fetch.req_secs) {
@@ -254,13 +254,13 @@ int SCHEDULER_OP::start_rpc(PROJECT* p) {
             "[sched_op] CPU work request: %.2f seconds; %.2f CPUs",
             cpu_work_fetch.req_secs, cpu_work_fetch.req_instances
         );
-        if (coproc_cuda) {
+        if (gstate.host_info.have_cuda()) {
             msg_printf(p, MSG_INFO,
                 "[sched_op] NVIDIA GPU work request: %.2f seconds; %.2f GPUs",
                 cuda_work_fetch.req_secs, cuda_work_fetch.req_instances
             );
         }
-        if (coproc_ati) {
+        if (gstate.host_info.have_ati()) {
             msg_printf(p, MSG_INFO,
                 "[sched_op] ATI GPU work request: %.2f seconds; %.2f GPUs",
                 ati_work_fetch.req_secs, ati_work_fetch.req_instances
diff --git a/client/sim.h b/client/sim.h
index 83bfb052ab..760c8aab84 100644
--- a/client/sim.h
+++ b/client/sim.h
@@ -295,8 +295,6 @@ public:
 };
 
 extern CLIENT_STATE gstate;
-extern COPROC_CUDA* coproc_cuda;
-extern COPROC_ATI* coproc_ati;
 extern NET_STATUS net_status;
 extern FILE* logfile;
 extern bool user_active;
diff --git a/client/work_fetch.cpp b/client/work_fetch.cpp
index c8dfac93dd..e6eff8b06d 100644
--- a/client/work_fetch.cpp
+++ b/client/work_fetch.cpp
@@ -662,10 +662,10 @@ void WORK_FETCH::rr_init() {
         p->pwf.can_fetch_work = p->pwf.compute_can_fetch_work(p);
         p->pwf.has_runnable_jobs = false;
         p->cpu_pwf.rr_init(p, RSC_TYPE_CPU);
-        if (coproc_cuda) {
+        if (gstate.host_info.have_cuda()) {
             p->cuda_pwf.rr_init(p, RSC_TYPE_CUDA);
         }
-        if (coproc_ati) {
+        if (gstate.host_info.have_ati()) {
             p->ati_pwf.rr_init(p, RSC_TYPE_ATI);
         }
     }
@@ -673,10 +673,10 @@ void WORK_FETCH::rr_init() {
 
 void WORK_FETCH::set_all_requests(PROJECT* p) {
     cpu_work_fetch.set_request(p, false);
-    if (coproc_cuda && gpus_usable) {
+    if (gstate.host_info.have_cuda() && gpus_usable) {
         cuda_work_fetch.set_request(p, false);
     }
-    if (coproc_ati && gpus_usable) {
+    if (gstate.host_info.have_ati() && gpus_usable) {
         ati_work_fetch.set_request(p, false);
     }
 }
@@ -712,13 +712,13 @@ void WORK_FETCH::set_overall_debts() {
         p = gstate.projects[i];
         double queue_debt = p->cpu_pwf.queue_est/gstate.ncpus;
         p->pwf.overall_debt = p->cpu_pwf.long_term_debt - queue_debt;
-        if (coproc_cuda) {
+        if (gstate.host_info.have_cuda()) {
             p->pwf.overall_debt += cuda_work_fetch.relative_speed*
-                (p->cuda_pwf.long_term_debt - p->cuda_pwf.queue_est/coproc_cuda->count);
+                (p->cuda_pwf.long_term_debt - p->cuda_pwf.queue_est/gstate.host_info.coprocs.cuda.count);
         }
-        if (coproc_ati) {
+        if (gstate.host_info.have_ati()) {
             p->pwf.overall_debt += ati_work_fetch.relative_speed*
-                (p->ati_pwf.long_term_debt - p->ati_pwf.queue_est/coproc_ati->count);
+                (p->ati_pwf.long_term_debt - p->ati_pwf.queue_est/gstate.host_info.coprocs.ati.count);
         }
     }
 }
@@ -738,10 +738,10 @@ void WORK_FETCH::print_state() {
         gstate.work_buf_min(), gstate.work_buf_additional()
     );
     cpu_work_fetch.print_state("CPU");
-    if (coproc_cuda) {
+    if (gstate.host_info.have_cuda()) {
         cuda_work_fetch.print_state("NVIDIA GPU");
     }
-    if (coproc_ati) {
+    if (gstate.host_info.have_ati()) {
         ati_work_fetch.print_state("ATI GPU");
     }
     for (unsigned int i=0; i<gstate.projects.size(); i++) {
@@ -833,8 +833,8 @@ PROJECT* WORK_FETCH::choose_project() {
     gstate.rr_simulation();
     set_overall_debts();
 
-    bool cuda_usable = coproc_cuda && gpus_usable;
-    bool ati_usable = coproc_ati && gpus_usable;
+    bool cuda_usable = gstate.host_info.have_cuda() && gpus_usable;
+    bool ati_usable = gstate.host_info.have_ati() && gpus_usable;
 
     if (cuda_usable) {
         p = cuda_work_fetch.choose_project(FETCH_IF_IDLE_INSTANCE);
@@ -894,12 +894,12 @@ void WORK_FETCH::accumulate_inst_sec(ACTIVE_TASK* atp, double dt) {
     double x = dt*avp->avg_ncpus;
     p->cpu_pwf.secs_this_debt_interval += x;
     cpu_work_fetch.secs_this_debt_interval += x;
-    if (coproc_cuda) {
+    if (gstate.host_info.have_cuda()) {
         x = dt*avp->ncudas;
         p->cuda_pwf.secs_this_debt_interval += x;
         cuda_work_fetch.secs_this_debt_interval += x;
     }
-    if (coproc_ati) {
+    if (gstate.host_info.have_ati()) {
         x = dt*avp->natis;
         p->ati_pwf.secs_this_debt_interval += x;
         ati_work_fetch.secs_this_debt_interval += x;
@@ -927,10 +927,10 @@ void WORK_FETCH::compute_shares() {
         if (p->cpu_pwf.may_have_work) {
             cpu_work_fetch.total_fetchable_share += p->resource_share;
         }
-        if (coproc_cuda && p->cuda_pwf.may_have_work) {
+        if (gstate.host_info.have_cuda() && p->cuda_pwf.may_have_work) {
             cuda_work_fetch.total_fetchable_share += p->resource_share;
         }
-        if (coproc_ati && p->ati_pwf.may_have_work) {
+        if (gstate.host_info.have_ati() && p->ati_pwf.may_have_work) {
             ati_work_fetch.total_fetchable_share += p->resource_share;
         }
     }
@@ -950,10 +950,10 @@ void WORK_FETCH::compute_shares() {
         if (p->cpu_pwf.may_have_work) {
             p->cpu_pwf.fetchable_share = cpu_work_fetch.total_fetchable_share?p->resource_share/cpu_work_fetch.total_fetchable_share:1;
         }
-        if (coproc_cuda && p->cuda_pwf.may_have_work) {
+        if (gstate.host_info.have_cuda() && p->cuda_pwf.may_have_work) {
             p->cuda_pwf.fetchable_share = cuda_work_fetch.total_fetchable_share?p->resource_share/cuda_work_fetch.total_fetchable_share:1;
         }
-        if (coproc_ati && p->ati_pwf.may_have_work) {
+        if (gstate.host_info.have_ati() && p->ati_pwf.may_have_work) {
             p->ati_pwf.fetchable_share = ati_work_fetch.total_fetchable_share?p->resource_share/ati_work_fetch.total_fetchable_share:1;
         }
     }
@@ -996,13 +996,13 @@ void WORK_FETCH::write_request(FILE* f, PROJECT* p) {
             work_req,
             cpu_work_fetch.req_secs, cpu_work_fetch.req_instances
         );
-        if (coproc_cuda) {
+        if (gstate.host_info.have_cuda()) {
             sprintf(buf2, " NVIDIA GPU (%.2f sec, %.2f)",
                 cuda_work_fetch.req_secs, cuda_work_fetch.req_instances
             );
             strcat(buf, buf2);
         }
-        if (coproc_ati) {
+        if (gstate.host_info.have_ati()) {
             sprintf(buf2, " ATI GPU (%.2f sec, %.2f)",
                 ati_work_fetch.req_secs, ati_work_fetch.req_instances
             );
@@ -1041,10 +1041,10 @@ void WORK_FETCH::handle_reply(
             if (cpu_work_fetch.req_secs && !srp->cpu_backoff) {
                 p->cpu_pwf.backoff(p, "CPU");
             }
-            if (coproc_cuda && coproc_cuda->req_secs && !srp->cuda_backoff) {
+            if (gstate.host_info.have_cuda() && gstate.host_info.coprocs.cuda.req_secs && !srp->cuda_backoff) {
                 p->cuda_pwf.backoff(p, "NVIDIA GPU");
             }
-            if (coproc_ati && coproc_ati->req_secs && !srp->ati_backoff) {
+            if (gstate.host_info.have_ati() && gstate.host_info.coprocs.ati.req_secs && !srp->ati_backoff) {
                 p->ati_pwf.backoff(p, "ATI GPU");
             }
         }
@@ -1072,12 +1072,12 @@ void WORK_FETCH::set_initial_work_request() {
     cpu_work_fetch.req_secs = 1;
     cpu_work_fetch.req_instances = 0;
     cpu_work_fetch.busy_time_estimator.reset();
-    if (coproc_cuda) {
+    if (gstate.host_info.have_cuda()) {
         cuda_work_fetch.req_secs = 1;
         cuda_work_fetch.req_instances = 0;
         cuda_work_fetch.busy_time_estimator.reset();
     }
-    if (coproc_ati) {
+    if (gstate.host_info.have_ati()) {
         ati_work_fetch.req_secs = 1;
         ati_work_fetch.req_instances = 0;
         ati_work_fetch.busy_time_estimator.reset();
@@ -1092,17 +1092,17 @@ void WORK_FETCH::init() {
 
     // use 20% as a rough estimate of GPU efficiency
 
-    if (coproc_cuda) {
+    if (gstate.host_info.have_cuda()) {
         cuda_work_fetch.init(
-            RSC_TYPE_CUDA, coproc_cuda->count,
-            coproc_cuda->count*0.2*coproc_cuda->peak_flops()/cpu_flops
+            RSC_TYPE_CUDA, gstate.host_info.coprocs.cuda.count,
+            gstate.host_info.coprocs.cuda.count*0.2*gstate.host_info.coprocs.cuda.peak_flops()/cpu_flops
         );
     }
-    if (coproc_ati) {
+    if (gstate.host_info.have_ati()) {
         ati_work_fetch.init(
             RSC_TYPE_ATI,
-            coproc_ati->count,
-            coproc_ati->count*0.2*coproc_ati->peak_flops()/cpu_flops
+            gstate.host_info.coprocs.ati.count,
+            gstate.host_info.coprocs.ati.count*0.2*gstate.host_info.coprocs.ati.peak_flops()/cpu_flops
         );
     }
 
@@ -1149,11 +1149,11 @@ void CLIENT_STATE::compute_nuploading_results() {
         }
     }
     int n = gstate.ncpus;
-    if (coproc_cuda && coproc_cuda->count > n) {
-        n = coproc_cuda->count;
+    if (gstate.host_info.have_cuda() && gstate.host_info.coprocs.cuda.count > n) {
+        n = gstate.host_info.coprocs.cuda.count;
     }
-    if (coproc_ati && coproc_ati->count > n) {
-        n = coproc_ati->count;
+    if (gstate.host_info.have_ati() && gstate.host_info.coprocs.ati.count > n) {
+        n = gstate.host_info.coprocs.ati.count;
     }
     n *= 2;
     for (i=0; i<projects.size(); i++) {
diff --git a/lib/coproc.cpp b/lib/coproc.cpp
index 13ccefcc85..0d8caa2fd6 100644
--- a/lib/coproc.cpp
+++ b/lib/coproc.cpp
@@ -1,485 +1,474 @@
-// This file is part of BOINC.
-// http://boinc.berkeley.edu
-// Copyright (C) 2007 University of California
-//
-// BOINC is free software; you can redistribute it and/or modify it
-// under the terms of the GNU Lesser General Public License
-// as published by the Free Software Foundation,
-// either version 3 of the License, or (at your option) any later version.
-//
-// BOINC is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-// See the GNU Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public License
-// along with BOINC.  If not, see <http://www.gnu.org/licenses/>.
-
-#if   defined(_WIN32) && !defined(__STDWX_H__)
-#include "boinc_win.h"
-#elif defined(_WIN32) && defined(__STDWX_H__)
-#include "stdwx.h"
-#else
-#ifdef _USING_FCGI_
-#include "boinc_fcgi.h"
-#else
-#include <cstdio>
-#endif
-#include <cstring>
-#include <cstdlib>
-#endif
-
-#ifdef _WIN32
-#include "win_util.h"
-#endif
-
-#include "error_numbers.h"
-#include "filesys.h"
-#include "parse.h"
-#include "str_util.h"
-
-#include "coproc.h"
-
-#ifndef _USING_FCGI_
-using std::perror;
-#endif
-
-#ifndef _USING_FCGI_
-void COPROC::write_xml(MIOFILE& f) {
-    f.printf(
-        "<coproc>\n"
-        "   <type>%s</type>\n"
-        "   <count>%d</count>\n"
-        "</coproc>\n",
-        type, count
-    );
-}
-#endif
-
-int COPROC_REQ::parse(MIOFILE& fin) {
-    char buf[1024];
-    strcpy(type, "");
-    count = 0;
-    while (fin.fgets(buf, sizeof(buf))) {
-        if (match_tag(buf, "</coproc>")) {
-            if (!strlen(type)) return ERR_XML_PARSE;
-            return 0;
-        }
-        if (parse_str(buf, "<type>", type, sizeof(type))) continue;
-        if (parse_double(buf, "<count>", count)) continue;
-    }
-    return ERR_XML_PARSE;
-}
-
-int COPROC::parse(MIOFILE& fin) {
-    char buf[1024];
-    strcpy(type, "");
-    count = 0;
-    used = 0;
-    req_secs = 0;
-    estimated_delay = 0;
-    req_instances = 0;
-    while (fin.fgets(buf, sizeof(buf))) {
-        if (match_tag(buf, "</coproc>")) {
-            if (!strlen(type)) return ERR_XML_PARSE;
-            return 0;
-        }
-        if (parse_str(buf, "<type>", type, sizeof(type))) continue;
-        if (parse_int(buf, "<count>", count)) continue;
-        if (parse_double(buf, "<req_secs>", req_secs)) continue;
-        if (parse_double(buf, "<req_instances>", req_instances)) continue;
-        if (parse_double(buf, "<estimated_delay>", estimated_delay)) continue;
-    }
-    return ERR_XML_PARSE;
-}
-
-void COPROCS::summary_string(char* buf, int len) {
-    char bigbuf[8192], buf2[1024];
-
-    strcpy(bigbuf, "");
-    for (unsigned int i=0; i<coprocs.size(); i++) {
-        COPROC* cp = coprocs[i];
-        if (!strcmp(cp->type, "CUDA")) {
-            COPROC_CUDA* cp2 = (COPROC_CUDA*) cp;
-            int mem = (int)(cp2->prop.dtotalGlobalMem/MEGA);
-            sprintf(buf2, "[CUDA|%s|%d|%dMB|%d]",
-                cp2->prop.name, cp2->count, mem, cp2->display_driver_version
-            );
-            strcat(bigbuf, buf2);
-        } else if (!strcmp(cp->type, "ATI")){
-            COPROC_ATI* cp2 =(COPROC_ATI*) cp;
-            sprintf(buf2,"[CAL|%s|%d|%dMB|%s]",
-                cp2->name, cp2->count, cp2->attribs.localRAM, cp2->version
-            );
-            strcat(bigbuf,buf2);
-        }
-    }
-    bigbuf[len-1] = 0;
-    strcpy(buf, bigbuf);
-}
-
-int COPROCS::parse(MIOFILE& fin) {
-    char buf[1024];
-
-    while (fin.fgets(buf, sizeof(buf))) {
-        if (match_tag(buf, "</coprocs>")) {
-            return 0;
-        }
-        if (strstr(buf, "<coproc_cuda>")) {
-            COPROC_CUDA* cc = new COPROC_CUDA;
-            int retval = cc->parse(fin);
-            if (!retval) {
-                coprocs.push_back(cc);
-            }
-        }
-        if (strstr(buf, "<coproc_ati>")) {
-            COPROC_ATI* cc = new COPROC_ATI;
-            int retval = cc->parse(fin);
-            if (!retval) {
-                coprocs.push_back(cc);
-            }
-        }
-    }
-    return ERR_XML_PARSE;
-}
-
-void COPROCS::write_xml(MIOFILE& mf) {
-#ifndef _USING_FCGI_
-    mf.printf("    <coprocs>\n");
-    for (unsigned i=0; i<coprocs.size(); i++) {
-        COPROC* c = coprocs[i];
-        c->write_xml(mf);
-    }
-    mf.printf("    </coprocs>\n");
-#endif
-}
-
-COPROC* COPROCS::lookup(const char* type) {
-    for (unsigned int i=0; i<coprocs.size(); i++) {
-        COPROC* cp = coprocs[i];
-        if (!strcmp(type, cp->type)) return cp;
-    }
-    return NULL;
-}
-
-#ifdef _WIN32
-
-#endif
-
-void COPROC_CUDA::description(char* buf) {
-    char vers[256];
-    if (display_driver_version) {
-        sprintf(vers, "%d", display_driver_version);
-    } else {
-        strcpy(vers, "unknown");
-    }
-    sprintf(buf, "%s (driver version %s, CUDA version %d, compute capability %d.%d, %.0fMB, %.0f GFLOPS peak)",
-        prop.name, vers, cuda_version, prop.major, prop.minor,
-        prop.totalGlobalMem/(1024.*1024.), peak_flops()/1e9
-    );
-}
-
-#ifndef _USING_FCGI_
-void COPROC_CUDA::write_xml(MIOFILE& f) {
-    f.printf(
-        "<coproc_cuda>\n"
-        "   <count>%d</count>\n"
-        "   <name>%s</name>\n"
-        "   <req_secs>%f</req_secs>\n"
-        "   <req_instances>%f</req_instances>\n"
-        "   <estimated_delay>%f</estimated_delay>\n"
-        "   <drvVersion>%d</drvVersion>\n"
-        "   <cudaVersion>%d</cudaVersion>\n"
-        "   <totalGlobalMem>%u</totalGlobalMem>\n"
-        "   <sharedMemPerBlock>%u</sharedMemPerBlock>\n"
-        "   <regsPerBlock>%d</regsPerBlock>\n"
-        "   <warpSize>%d</warpSize>\n"
-        "   <memPitch>%u</memPitch>\n"
-        "   <maxThreadsPerBlock>%d</maxThreadsPerBlock>\n"
-        "   <maxThreadsDim>%d %d %d</maxThreadsDim>\n"
-        "   <maxGridSize>%d %d %d</maxGridSize>\n"
-        "   <totalConstMem>%u</totalConstMem>\n"
-        "   <major>%d</major>\n"
-        "   <minor>%d</minor>\n"
-        "   <clockRate>%d</clockRate>\n"
-        "   <textureAlignment>%u</textureAlignment>\n"
-        "   <deviceOverlap>%d</deviceOverlap>\n"
-        "   <multiProcessorCount>%d</multiProcessorCount>\n"
-        "</coproc_cuda>\n",
-        count,
-        prop.name,
-        req_secs,
-        req_instances,
-        estimated_delay,
-        display_driver_version,
-        cuda_version,
-        (unsigned int)prop.totalGlobalMem,
-        (unsigned int)prop.sharedMemPerBlock,
-        prop.regsPerBlock,
-        prop.warpSize,
-        (unsigned int)prop.memPitch,
-        prop.maxThreadsPerBlock,
-        prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2],
-        prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2],
-        (unsigned int)prop.totalConstMem,
-        prop.major,
-        prop.minor,
-        prop.clockRate,
-        (unsigned int)prop.textureAlignment,
-        prop.deviceOverlap,
-        prop.multiProcessorCount
-    );
-}
-#endif
-
-void COPROC_CUDA::clear() {
-    count = 0;
-    used = 0;
-    req_secs = 0;
-    req_instances = 0;
-    estimated_delay = -1;   // mark as absent
-    cuda_version = 0;
-    display_driver_version = 0;
-    strcpy(prop.name, "");
-    prop.totalGlobalMem = 0;
-    prop.sharedMemPerBlock = 0;
-    prop.regsPerBlock = 0;
-    prop.warpSize = 0;
-    prop.memPitch = 0;
-    prop.maxThreadsPerBlock = 0;
-    prop.maxThreadsDim[0] = 0;
-    prop.maxThreadsDim[1] = 0;
-    prop.maxThreadsDim[2] = 0;
-    prop.maxGridSize[0] = 0;
-    prop.maxGridSize[1] = 0;
-    prop.maxGridSize[2] = 0;
-    prop.clockRate = 0;
-    prop.totalConstMem = 0;
-    prop.major = 0;
-    prop.minor = 0;
-    prop.textureAlignment = 0;
-    prop.deviceOverlap = 0;
-    prop.multiProcessorCount = 0;
-}
-
-int COPROC_CUDA::parse(MIOFILE& fin) {
-    char buf[1024], buf2[256];
-
-    clear();
-    while (fin.fgets(buf, sizeof(buf))) {
-        if (strstr(buf, "</coproc_cuda>")) {
-            return 0;
-        }
-        if (parse_int(buf, "<count>", count)) continue;
-        if (parse_double(buf, "<req_secs>", req_secs)) continue;
-        if (parse_double(buf, "<req_instances>", req_instances)) continue;
-        if (parse_double(buf, "<estimated_delay>", estimated_delay)) continue;
-        if (parse_str(buf, "<name>", prop.name, sizeof(prop.name))) continue;
-        if (parse_int(buf, "<drvVersion>", display_driver_version)) continue;
-        if (parse_int(buf, "<cudaVersion>", cuda_version)) continue;
-        if (parse_double(buf, "<totalGlobalMem>", prop.dtotalGlobalMem)) continue;
-        if (parse_int(buf, "<sharedMemPerBlock>", (int&)prop.sharedMemPerBlock)) continue;
-        if (parse_int(buf, "<regsPerBlock>", prop.regsPerBlock)) continue;
-        if (parse_int(buf, "<warpSize>", prop.warpSize)) continue;
-        if (parse_int(buf, "<memPitch>", (int&)prop.memPitch)) continue;
-        if (parse_int(buf, "<maxThreadsPerBlock>", prop.maxThreadsPerBlock)) continue;
-        if (parse_str(buf, "<maxThreadsDim>", buf2, sizeof(buf2))) {
-            // can't use sscanf here (FCGI)
-            //
-            prop.maxThreadsDim[0] = atoi(buf2);
-            char* p = strchr(buf2, ' ');
-            if (p) {
-                p++;
-                prop.maxThreadsDim[1] = atoi(p);
-                p = strchr(p, ' ');
-                if (p) {
-                    p++;
-                    prop.maxThreadsDim[2] = atoi(p);
-                }
-            }
-            continue;
-        }
-        if (parse_str(buf, "<maxGridSize>", buf2, sizeof(buf2))) {
-            prop.maxGridSize[0] = atoi(buf2);
-            char* p = strchr(buf2, ' ');
-            if (p) {
-                p++;
-                prop.maxGridSize[1] = atoi(p);
-                p = strchr(p, ' ');
-                if (p) {
-                    p++;
-                    prop.maxGridSize[2] = atoi(p);
-                }
-            }
-            continue;
-        }
-        if (parse_int(buf, "<clockRate>", prop.clockRate)) continue;
-        if (parse_int(buf, "<totalConstMem>", (int&)prop.totalConstMem)) continue;
-        if (parse_int(buf, "<major>", prop.major)) continue;
-        if (parse_int(buf, "<minor>", prop.minor)) continue;
-        if (parse_int(buf, "<textureAlignment>", (int&)prop.textureAlignment)) continue;
-        if (parse_int(buf, "<deviceOverlap>", prop.deviceOverlap)) continue;
-        if (parse_int(buf, "<multiProcessorCount>", prop.multiProcessorCount)) continue;
-    }
-    return ERR_XML_PARSE;
-}
-
-////////////////// ATI STARTS HERE /////////////////
-
-#ifndef _USING_FCGI_
-void COPROC_ATI::write_xml(MIOFILE& f) {
-    f.printf(
-        "<coproc_ati>\n"
-    );
-
-    f.printf(
-        "   <count>%d</count>\n"
-        "   <name>%s</name>\n"
-        "   <req_secs>%f</req_secs>\n"
-        "   <req_instances>%f</req_instances>\n"
-        "   <estimated_delay>%f</estimated_delay>\n"
-        "   <target>%d</target>\n"
-        "   <localRAM>%d</localRAM>\n"
-        "   <uncachedRemoteRAM>%d</uncachedRemoteRAM>\n"
-        "   <cachedRemoteRAM>%d</cachedRemoteRAM>\n"
-        "   <engineClock>%u</engineClock>\n"
-        "   <memoryClock>%d</memoryClock>\n"
-        "   <wavefrontSize>%d</wavefrontSize>\n"
-        "   <numberOfSIMD>%d</numberOfSIMD>\n"
-        "   <doublePrecision>%d</doublePrecision>\n"
-        "   <pitch_alignment>%d</pitch_alignment>\n"
-        "   <surface_alignment>%d</surface_alignment>\n"
-        "   <maxResource1DWidth>%d</maxResource1DWidth>\n"
-        "   <maxResource2DWidth>%d</maxResource2DWidth>\n"
-        "   <maxResource2DHeight>%d</maxResource2DHeight>\n"
-        "   <CALVersion>%s</CALVersion>\n",
-        count,
-        name,
-        req_secs,
-        req_instances,
-        estimated_delay,
-        attribs.target,
-        attribs.localRAM,
-        attribs.uncachedRemoteRAM,
-        attribs.cachedRemoteRAM,
-        attribs.engineClock,
-        attribs.memoryClock,
-        attribs.wavefrontSize,
-        attribs.numberOfSIMD,
-        attribs.doublePrecision,
-        attribs.pitch_alignment,
-        attribs.surface_alignment,
-        info.maxResource1DWidth,
-        info.maxResource2DWidth,
-        info.maxResource2DHeight,
-        version
-    );
-
-    if (atirt_detected) {
-        f.printf("    <atirt_detected/>\n");
-    }
-
-    if (amdrt_detected) {
-        f.printf("    <amdrt_detected/>\n");
-    }
-
-    f.printf("</coproc_ati>\n");
-};
-#endif
-
-void COPROC_ATI::clear() {
-    count = 0;
-    used = 0;
-    req_secs = 0;
-    req_instances = 0;
-    estimated_delay = -1;
-    strcpy(name, "");
-    strcpy(version, "");
-    atirt_detected = false;
-    amdrt_detected = false;
-    memset(&attribs, 0, sizeof(attribs));
-    memset(&info, 0, sizeof(info));
-}
-
-int COPROC_ATI::parse(MIOFILE& fin) {
-    char buf[1024];
-    int n;
-
-    clear();
-
-    while (fin.fgets(buf, sizeof(buf))) {
-        if (strstr(buf, "</coproc_ati>")) return 0;
-        if (parse_int(buf, "<count>", count)) continue;
-        if (parse_str(buf, "<name>", name, sizeof(name))) continue;
-        if (parse_double(buf, "<req_secs>", req_secs)) continue;
-        if (parse_double(buf, "<req_instances>", req_instances)) continue;
-        if (parse_double(buf, "<estimated_delay>", estimated_delay)) continue;
-
-        if (parse_int(buf, "<target>", n)) {
-            attribs.target = (CALtarget)n;
-            continue;
-        }
-        if (parse_int(buf, "<localRAM>", n)) {
-            attribs.localRAM = n;
-            continue;
-        }
-        if (parse_int(buf, "<uncachedRemoteRAM>", n)) {
-            attribs.uncachedRemoteRAM = n;
-            continue;
-        }
-        if (parse_int(buf, "<cachedRemoteRAM>", n)) {
-            attribs.cachedRemoteRAM = n;
-            continue;
-        }
-        if (parse_int(buf, "<engineClock>", n)) {
-            attribs.engineClock = n;
-            continue;
-        }
-        if (parse_int(buf, "<memoryClock>", n)) {
-            attribs.memoryClock = n;
-            continue;
-        }
-        if (parse_int(buf, "<wavefrontSize>", n)) {
-            attribs.wavefrontSize = n;
-            continue;
-        }
-        if (parse_int(buf, "<numberOfSIMD>"  , n)) {
-            attribs.numberOfSIMD = n;
-            continue;
-        }
-        if (parse_int(buf, "<doublePrecision>", n)) {
-            attribs.doublePrecision = n?CAL_TRUE:CAL_FALSE;
-            continue;
-        }
-        if (parse_int(buf, "<pitch_alignment>", n)) {
-            attribs.pitch_alignment = n;
-            continue;
-        }
-        if (parse_int(buf, "<surface_alignment>", n)) {
-            attribs.surface_alignment = n;
-            continue;
-        }
-        if (parse_int(buf, "<maxResource1DWidth>", n)) {
-            info.maxResource1DWidth = n;
-            continue;
-        }
-        if (parse_int(buf, "<maxResource2DWidth>", n)) {
-            info.maxResource2DWidth = n;
-            continue;
-        }
-        if (parse_int(buf, "<maxResource2DHeight>", n)) {
-            info.maxResource2DHeight = n;
-            continue;
-        }
-        if (parse_bool(buf, "amdrt_detected", amdrt_detected)) continue;
-        if (parse_bool(buf, "atirt_detected", atirt_detected)) continue;
-        if (parse_str(buf, "<CALVersion>", version, sizeof(version))) continue;
-    }
-    return ERR_XML_PARSE;
-}
-
-void COPROC_ATI::description(char* buf) {
-    sprintf(buf, "%s (CAL version %s, %.0fMB, %.0f GFLOPS peak)",
-        name, version, attribs.localRAM/1024.*1024., peak_flops()/1.e9
-    );
-}
+// This file is part of BOINC.
+// http://boinc.berkeley.edu
+// Copyright (C) 2007 University of California
+//
+// BOINC is free software; you can redistribute it and/or modify it
+// under the terms of the GNU Lesser General Public License
+// as published by the Free Software Foundation,
+// either version 3 of the License, or (at your option) any later version.
+//
+// BOINC is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+// See the GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with BOINC.  If not, see <http://www.gnu.org/licenses/>.
+
+#if   defined(_WIN32) && !defined(__STDWX_H__)
+#include "boinc_win.h"
+#elif defined(_WIN32) && defined(__STDWX_H__)
+#include "stdwx.h"
+#else
+#ifdef _USING_FCGI_
+#include "boinc_fcgi.h"
+#else
+#include <cstdio>
+#endif
+#include <cstring>
+#include <cstdlib>
+#endif
+
+#ifdef _WIN32
+#include "win_util.h"
+#endif
+
+#include "error_numbers.h"
+#include "filesys.h"
+#include "parse.h"
+#include "str_util.h"
+
+#include "coproc.h"
+
+#ifndef _USING_FCGI_
+using std::perror;
+#endif
+
+#ifndef _USING_FCGI_
+void COPROC::write_xml(MIOFILE& f) {
+    f.printf(
+        "<coproc>\n"
+        "   <type>%s</type>\n"
+        "   <count>%d</count>\n"
+        "</coproc>\n",
+        type, count
+    );
+}
+#endif
+
+int COPROC_REQ::parse(MIOFILE& fin) {
+    char buf[1024];
+    strcpy(type, "");
+    count = 0;
+    while (fin.fgets(buf, sizeof(buf))) {
+        if (match_tag(buf, "</coproc>")) {
+            if (!strlen(type)) return ERR_XML_PARSE;
+            return 0;
+        }
+        if (parse_str(buf, "<type>", type, sizeof(type))) continue;
+        if (parse_double(buf, "<count>", count)) continue;
+    }
+    return ERR_XML_PARSE;
+}
+
+int COPROC::parse(MIOFILE& fin) {
+    char buf[1024];
+    strcpy(type, "");
+    count = 0;
+    used = 0;
+    req_secs = 0;
+    estimated_delay = 0;
+    req_instances = 0;
+    while (fin.fgets(buf, sizeof(buf))) {
+        if (match_tag(buf, "</coproc>")) {
+            if (!strlen(type)) return ERR_XML_PARSE;
+            return 0;
+        }
+        if (parse_str(buf, "<type>", type, sizeof(type))) continue;
+        if (parse_int(buf, "<count>", count)) continue;
+        if (parse_double(buf, "<req_secs>", req_secs)) continue;
+        if (parse_double(buf, "<req_instances>", req_instances)) continue;
+        if (parse_double(buf, "<estimated_delay>", estimated_delay)) continue;
+    }
+    return ERR_XML_PARSE;
+}
+
+void COPROCS::summary_string(char* buf, int len) {
+    char bigbuf[8192], buf2[1024];
+
+    strcpy(bigbuf, "");
+    if (cuda.count) {
+        int mem = (int)(cuda.prop.dtotalGlobalMem/MEGA);
+        sprintf(buf2, "[CUDA|%s|%d|%dMB|%d]",
+            cuda.prop.name, cuda.count, mem, cuda.display_driver_version
+        );
+        strcat(bigbuf, buf2);
+    }
+    if (ati.count) {
+        sprintf(buf2,"[CAL|%s|%d|%dMB|%s]",
+            ati.name, ati.count, ati.attribs.localRAM, ati.version
+        );
+        strcat(bigbuf,buf2);
+    }
+    bigbuf[len-1] = 0;
+    strcpy(buf, bigbuf);
+}
+
+int COPROCS::parse(MIOFILE& fin) {
+    char buf[1024];
+    int retval;
+
+    while (fin.fgets(buf, sizeof(buf))) {
+        if (match_tag(buf, "</coprocs>")) {
+            return 0;
+        }
+        if (strstr(buf, "<coproc_cuda>")) {
+            retval = cuda.parse(fin);
+            if (retval) {
+                cuda.clear();
+            }
+        }
+        if (strstr(buf, "<coproc_ati>")) {
+            retval = ati.parse(fin);
+            if (retval) {
+                ati.clear();
+            }
+        }
+    }
+    return ERR_XML_PARSE;
+}
+
+void COPROCS::write_xml(MIOFILE& mf) {
+#ifndef _USING_FCGI_
+    mf.printf("    <coprocs>\n");
+    if (cuda.count) {
+        cuda.write_xml(mf);
+    }
+    if (ati.count) {
+        ati.write_xml(mf);
+    }
+    mf.printf("    </coprocs>\n");
+#endif
+}
+
+#ifdef _WIN32
+
+#endif
+
+void COPROC_CUDA::description(char* buf) {
+    char vers[256];
+    if (display_driver_version) {
+        sprintf(vers, "%d", display_driver_version);
+    } else {
+        strcpy(vers, "unknown");
+    }
+    sprintf(buf, "%s (driver version %s, CUDA version %d, compute capability %d.%d, %.0fMB, %.0f GFLOPS peak)",
+        prop.name, vers, cuda_version, prop.major, prop.minor,
+        prop.totalGlobalMem/(1024.*1024.), peak_flops()/1e9
+    );
+}
+
+#ifndef _USING_FCGI_
+void COPROC_CUDA::write_xml(MIOFILE& f) {
+    f.printf(
+        "<coproc_cuda>\n"
+        "   <count>%d</count>\n"
+        "   <name>%s</name>\n"
+        "   <req_secs>%f</req_secs>\n"
+        "   <req_instances>%f</req_instances>\n"
+        "   <estimated_delay>%f</estimated_delay>\n"
+        "   <drvVersion>%d</drvVersion>\n"
+        "   <cudaVersion>%d</cudaVersion>\n"
+        "   <totalGlobalMem>%u</totalGlobalMem>\n"
+        "   <sharedMemPerBlock>%u</sharedMemPerBlock>\n"
+        "   <regsPerBlock>%d</regsPerBlock>\n"
+        "   <warpSize>%d</warpSize>\n"
+        "   <memPitch>%u</memPitch>\n"
+        "   <maxThreadsPerBlock>%d</maxThreadsPerBlock>\n"
+        "   <maxThreadsDim>%d %d %d</maxThreadsDim>\n"
+        "   <maxGridSize>%d %d %d</maxGridSize>\n"
+        "   <totalConstMem>%u</totalConstMem>\n"
+        "   <major>%d</major>\n"
+        "   <minor>%d</minor>\n"
+        "   <clockRate>%d</clockRate>\n"
+        "   <textureAlignment>%u</textureAlignment>\n"
+        "   <deviceOverlap>%d</deviceOverlap>\n"
+        "   <multiProcessorCount>%d</multiProcessorCount>\n"
+        "</coproc_cuda>\n",
+        count,
+        prop.name,
+        req_secs,
+        req_instances,
+        estimated_delay,
+        display_driver_version,
+        cuda_version,
+        (unsigned int)prop.totalGlobalMem,
+        (unsigned int)prop.sharedMemPerBlock,
+        prop.regsPerBlock,
+        prop.warpSize,
+        (unsigned int)prop.memPitch,
+        prop.maxThreadsPerBlock,
+        prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2],
+        prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2],
+        (unsigned int)prop.totalConstMem,
+        prop.major,
+        prop.minor,
+        prop.clockRate,
+        (unsigned int)prop.textureAlignment,
+        prop.deviceOverlap,
+        prop.multiProcessorCount
+    );
+}
+#endif
+
+void COPROC_CUDA::clear() {
+    count = 0;
+    used = 0;
+    req_secs = 0;
+    req_instances = 0;
+    estimated_delay = -1;   // mark as absent
+    cuda_version = 0;
+    display_driver_version = 0;
+    strcpy(prop.name, "");
+    prop.totalGlobalMem = 0;
+    prop.sharedMemPerBlock = 0;
+    prop.regsPerBlock = 0;
+    prop.warpSize = 0;
+    prop.memPitch = 0;
+    prop.maxThreadsPerBlock = 0;
+    prop.maxThreadsDim[0] = 0;
+    prop.maxThreadsDim[1] = 0;
+    prop.maxThreadsDim[2] = 0;
+    prop.maxGridSize[0] = 0;
+    prop.maxGridSize[1] = 0;
+    prop.maxGridSize[2] = 0;
+    prop.clockRate = 0;
+    prop.totalConstMem = 0;
+    prop.major = 0;
+    prop.minor = 0;
+    prop.textureAlignment = 0;
+    prop.deviceOverlap = 0;
+    prop.multiProcessorCount = 0;
+}
+
+int COPROC_CUDA::parse(MIOFILE& fin) {
+    char buf[1024], buf2[256];
+
+    clear();
+    while (fin.fgets(buf, sizeof(buf))) {
+        if (strstr(buf, "</coproc_cuda>")) {
+            return 0;
+        }
+        if (parse_int(buf, "<count>", count)) continue;
+        if (parse_double(buf, "<req_secs>", req_secs)) continue;
+        if (parse_double(buf, "<req_instances>", req_instances)) continue;
+        if (parse_double(buf, "<estimated_delay>", estimated_delay)) continue;
+        if (parse_str(buf, "<name>", prop.name, sizeof(prop.name))) continue;
+        if (parse_int(buf, "<drvVersion>", display_driver_version)) continue;
+        if (parse_int(buf, "<cudaVersion>", cuda_version)) continue;
+        if (parse_double(buf, "<totalGlobalMem>", prop.dtotalGlobalMem)) continue;
+        if (parse_int(buf, "<sharedMemPerBlock>", (int&)prop.sharedMemPerBlock)) continue;
+        if (parse_int(buf, "<regsPerBlock>", prop.regsPerBlock)) continue;
+        if (parse_int(buf, "<warpSize>", prop.warpSize)) continue;
+        if (parse_int(buf, "<memPitch>", (int&)prop.memPitch)) continue;
+        if (parse_int(buf, "<maxThreadsPerBlock>", prop.maxThreadsPerBlock)) continue;
+        if (parse_str(buf, "<maxThreadsDim>", buf2, sizeof(buf2))) {
+            // can't use sscanf here (FCGI)
+            //
+            prop.maxThreadsDim[0] = atoi(buf2);
+            char* p = strchr(buf2, ' ');
+            if (p) {
+                p++;
+                prop.maxThreadsDim[1] = atoi(p);
+                p = strchr(p, ' ');
+                if (p) {
+                    p++;
+                    prop.maxThreadsDim[2] = atoi(p);
+                }
+            }
+            continue;
+        }
+        if (parse_str(buf, "<maxGridSize>", buf2, sizeof(buf2))) {
+            prop.maxGridSize[0] = atoi(buf2);
+            char* p = strchr(buf2, ' ');
+            if (p) {
+                p++;
+                prop.maxGridSize[1] = atoi(p);
+                p = strchr(p, ' ');
+                if (p) {
+                    p++;
+                    prop.maxGridSize[2] = atoi(p);
+                }
+            }
+            continue;
+        }
+        if (parse_int(buf, "<clockRate>", prop.clockRate)) continue;
+        if (parse_int(buf, "<totalConstMem>", (int&)prop.totalConstMem)) continue;
+        if (parse_int(buf, "<major>", prop.major)) continue;
+        if (parse_int(buf, "<minor>", prop.minor)) continue;
+        if (parse_int(buf, "<textureAlignment>", (int&)prop.textureAlignment)) continue;
+        if (parse_int(buf, "<deviceOverlap>", prop.deviceOverlap)) continue;
+        if (parse_int(buf, "<multiProcessorCount>", prop.multiProcessorCount)) continue;
+    }
+    return ERR_XML_PARSE;
+}
+
+////////////////// ATI STARTS HERE /////////////////
+
+#ifndef _USING_FCGI_
+void COPROC_ATI::write_xml(MIOFILE& f) {
+    f.printf(
+        "<coproc_ati>\n"
+    );
+
+    f.printf(
+        "   <count>%d</count>\n"
+        "   <name>%s</name>\n"
+        "   <req_secs>%f</req_secs>\n"
+        "   <req_instances>%f</req_instances>\n"
+        "   <estimated_delay>%f</estimated_delay>\n"
+        "   <target>%d</target>\n"
+        "   <localRAM>%d</localRAM>\n"
+        "   <uncachedRemoteRAM>%d</uncachedRemoteRAM>\n"
+        "   <cachedRemoteRAM>%d</cachedRemoteRAM>\n"
+        "   <engineClock>%u</engineClock>\n"
+        "   <memoryClock>%d</memoryClock>\n"
+        "   <wavefrontSize>%d</wavefrontSize>\n"
+        "   <numberOfSIMD>%d</numberOfSIMD>\n"
+        "   <doublePrecision>%d</doublePrecision>\n"
+        "   <pitch_alignment>%d</pitch_alignment>\n"
+        "   <surface_alignment>%d</surface_alignment>\n"
+        "   <maxResource1DWidth>%d</maxResource1DWidth>\n"
+        "   <maxResource2DWidth>%d</maxResource2DWidth>\n"
+        "   <maxResource2DHeight>%d</maxResource2DHeight>\n"
+        "   <CALVersion>%s</CALVersion>\n",
+        count,
+        name,
+        req_secs,
+        req_instances,
+        estimated_delay,
+        attribs.target,
+        attribs.localRAM,
+        attribs.uncachedRemoteRAM,
+        attribs.cachedRemoteRAM,
+        attribs.engineClock,
+        attribs.memoryClock,
+        attribs.wavefrontSize,
+        attribs.numberOfSIMD,
+        attribs.doublePrecision,
+        attribs.pitch_alignment,
+        attribs.surface_alignment,
+        info.maxResource1DWidth,
+        info.maxResource2DWidth,
+        info.maxResource2DHeight,
+        version
+    );
+
+    if (atirt_detected) {
+        f.printf("    <atirt_detected/>\n");
+    }
+
+    if (amdrt_detected) {
+        f.printf("    <amdrt_detected/>\n");
+    }
+
+    f.printf("</coproc_ati>\n");
+};
+#endif
+
+void COPROC_ATI::clear() {
+    count = 0;
+    used = 0;
+    req_secs = 0;
+    req_instances = 0;
+    estimated_delay = -1;
+    strcpy(name, "");
+    strcpy(version, "");
+    atirt_detected = false;
+    amdrt_detected = false;
+    memset(&attribs, 0, sizeof(attribs));
+    memset(&info, 0, sizeof(info));
+}
+
+int COPROC_ATI::parse(MIOFILE& fin) {
+    char buf[1024];
+    int n;
+
+    clear();
+
+    while (fin.fgets(buf, sizeof(buf))) {
+        if (strstr(buf, "</coproc_ati>")) return 0;
+        if (parse_int(buf, "<count>", count)) continue;
+        if (parse_str(buf, "<name>", name, sizeof(name))) continue;
+        if (parse_double(buf, "<req_secs>", req_secs)) continue;
+        if (parse_double(buf, "<req_instances>", req_instances)) continue;
+        if (parse_double(buf, "<estimated_delay>", estimated_delay)) continue;
+
+        if (parse_int(buf, "<target>", n)) {
+            attribs.target = (CALtarget)n;
+            continue;
+        }
+        if (parse_int(buf, "<localRAM>", n)) {
+            attribs.localRAM = n;
+            continue;
+        }
+        if (parse_int(buf, "<uncachedRemoteRAM>", n)) {
+            attribs.uncachedRemoteRAM = n;
+            continue;
+        }
+        if (parse_int(buf, "<cachedRemoteRAM>", n)) {
+            attribs.cachedRemoteRAM = n;
+            continue;
+        }
+        if (parse_int(buf, "<engineClock>", n)) {
+            attribs.engineClock = n;
+            continue;
+        }
+        if (parse_int(buf, "<memoryClock>", n)) {
+            attribs.memoryClock = n;
+            continue;
+        }
+        if (parse_int(buf, "<wavefrontSize>", n)) {
+            attribs.wavefrontSize = n;
+            continue;
+        }
+        if (parse_int(buf, "<numberOfSIMD>"  , n)) {
+            attribs.numberOfSIMD = n;
+            continue;
+        }
+        if (parse_int(buf, "<doublePrecision>", n)) {
+            attribs.doublePrecision = n?CAL_TRUE:CAL_FALSE;
+            continue;
+        }
+        if (parse_int(buf, "<pitch_alignment>", n)) {
+            attribs.pitch_alignment = n;
+            continue;
+        }
+        if (parse_int(buf, "<surface_alignment>", n)) {
+            attribs.surface_alignment = n;
+            continue;
+        }
+        if (parse_int(buf, "<maxResource1DWidth>", n)) {
+            info.maxResource1DWidth = n;
+            continue;
+        }
+        if (parse_int(buf, "<maxResource2DWidth>", n)) {
+            info.maxResource2DWidth = n;
+            continue;
+        }
+        if (parse_int(buf, "<maxResource2DHeight>", n)) {
+            info.maxResource2DHeight = n;
+            continue;
+        }
+        if (parse_bool(buf, "amdrt_detected", amdrt_detected)) continue;
+        if (parse_bool(buf, "atirt_detected", atirt_detected)) continue;
+        if (parse_str(buf, "<CALVersion>", version, sizeof(version))) continue;
+    }
+    return ERR_XML_PARSE;
+}
+
+void COPROC_ATI::description(char* buf) {
+    sprintf(buf, "%s (CAL version %s, %.0fMB, %.0f GFLOPS peak)",
+        name, version, attribs.localRAM/1024.*1024., peak_flops()/1.e9
+    );
+}
diff --git a/lib/coproc.h b/lib/coproc.h
index e0e964a829..c5f022b58f 100644
--- a/lib/coproc.h
+++ b/lib/coproc.h
@@ -144,6 +144,12 @@ struct COPROC {
             available_ram_unknown[i] = true;
         }
     }
+    inline void clear_usage() {
+        for (int i=0; i<count; i++) {
+            usage[i] = 0;
+            pending_usage[i] = 0;
+        }
+    }
     COPROC(const char* t){
         clear();
         strcpy(type, t);
@@ -156,59 +162,6 @@ struct COPROC {
     void print_available_ram();
 };
 
-struct COPROCS {
-    std::vector<COPROC*> coprocs;   // not deleted in destructor
-        // so any structure that includes this needs to do it manually
-
-    COPROCS(){}
-    ~COPROCS(){}    // don't delete coprocs; else crash in APP_INIT_DATA logic
-    void write_xml(MIOFILE& out);
-    void get(
-        bool use_all, std::vector<std::string> &descs,
-        std::vector<std::string> &warnings,
-        std::vector<int>& ignore_cuda_dev,
-        std::vector<int>& ignore_ati_dev
-    );
-    int parse(MIOFILE&);
-    void summary_string(char*, int);
-    COPROC* lookup(const char*);
-    bool fully_used() {
-        for (unsigned int i=0; i<coprocs.size(); i++) {
-            COPROC* cp = coprocs[i];
-            if (cp->used < cp->count) return false;
-        }
-        return true;
-    }
-
-    // Copy a coproc set, possibly setting usage to zero.
-    // used in round-robin simulator and CPU scheduler,
-    // to avoid messing w/ master copy
-    //
-    void clone(COPROCS& c, bool copy_used) {
-        for (unsigned int i=0; i<c.coprocs.size(); i++) {
-            COPROC* cp = c.coprocs[i];
-            COPROC* cp2 = new COPROC(cp->type);
-            cp2->count = cp->count;
-			if (copy_used) cp2->used = cp->used;
-            coprocs.push_back(cp2);
-        }
-    }
-    inline void clear_usage() {
-        for (unsigned int i=0; i<coprocs.size(); i++) {
-            COPROC* cp = coprocs[i];
-            for (int j=0; j<cp->count; j++) {
-                cp->usage[j] = 0;
-                cp->pending_usage[j] = 0;
-            }
-        }
-    }
-    inline void delete_coprocs() {
-        for (unsigned int i=0; i<coprocs.size(); i++) {
-            delete coprocs[i];
-        }
-    }
-};
-
 // the following copied from /usr/local/cuda/include/driver_types.h
 //
 struct cudaDeviceProp {
@@ -243,8 +196,8 @@ struct COPROC_CUDA : public COPROC {
 #endif
     COPROC_CUDA(): COPROC("CUDA"){}
     virtual ~COPROC_CUDA(){}
-    static void get(
-        COPROCS&, bool use_all,
+    void get(
+        bool use_all,
         std::vector<std::string>&, std::vector<std::string>&,
         std::vector<int>& ignore_devs
     );
@@ -268,6 +221,8 @@ struct COPROC_CUDA : public COPROC {
     void get_available_ram();
 
     bool check_running_graphics_app();
+    void fake(double, int);
+
 };
 
 enum CUdevice_attribute_enum {
@@ -305,7 +260,7 @@ struct COPROC_ATI : public COPROC {
 #endif
     COPROC_ATI(): COPROC("ATI"){}
     virtual ~COPROC_ATI(){}
-    static void get(COPROCS&,
+    void get(
         std::vector<std::string>&, std::vector<std::string>&,
         std::vector<int>& ignore_devs
     );
@@ -318,9 +273,53 @@ struct COPROC_ATI : public COPROC {
         return x?x:5e10;
     }
     void get_available_ram();
+    void fake(double, int);
 };
 
-extern COPROC_CUDA* fake_cuda(COPROCS&, double, int);
-extern COPROC_ATI* fake_ati(COPROCS&, double, int);
+struct COPROCS {
+    COPROC_CUDA cuda;
+    COPROC_ATI ati;
+
+    COPROCS(){}
+    ~COPROCS(){}    // don't delete coprocs; else crash in APP_INIT_DATA logic
+    void write_xml(MIOFILE& out);
+    void get(
+        bool use_all, std::vector<std::string> &descs,
+        std::vector<std::string> &warnings,
+        std::vector<int>& ignore_cuda_dev,
+        std::vector<int>& ignore_ati_dev
+    );
+    int parse(MIOFILE&);
+    void summary_string(char*, int);
+    bool fully_used() {
+        if (cuda.used < cuda.count) return false;
+        if (ati.used < ati.count) return false;
+        return true;
+    }
+
+    // Copy a coproc set, possibly setting usage to zero.
+    // used in round-robin simulator and CPU scheduler,
+    // to avoid messing w/ master copy
+    //
+    void clone(COPROCS& c, bool copy_used) {
+        c.cuda = cuda;
+        c.ati = ati;
+        if (!copy_used) {
+            c.cuda.used = 0;
+            c.ati.used = 0;
+        }
+    }
+    inline void clear() {
+        cuda.count = 0;
+        ati.count = 0;
+    }
+    inline void clear_usage() {
+        cuda.clear_usage();
+        ati.clear_usage();
+    }
+    inline bool none() {
+        return (cuda.count==0) && (ati.count==0);
+    }
+};
 
 #endif
diff --git a/lib/hostinfo.cpp b/lib/hostinfo.cpp
index 57c2d2fa7f..9785b86214 100644
--- a/lib/hostinfo.cpp
+++ b/lib/hostinfo.cpp
@@ -64,6 +64,8 @@ void HOST_INFO::clear_host_info() {
 
     strcpy(os_name, "");
     strcpy(os_version, "");
+
+    coprocs.clear();
 }
 
 int HOST_INFO::parse(MIOFILE& in, bool benchmarks_only) {
diff --git a/lib/hostinfo.h b/lib/hostinfo.h
index 810d6f83d4..c676a5b74c 100644
--- a/lib/hostinfo.h
+++ b/lib/hostinfo.h
@@ -78,6 +78,12 @@ public:
     void clear_host_info();
     void make_random_string(const char* salt, char* out);
     void generate_host_cpid();
+    inline bool have_cuda() {
+        return (coprocs.cuda.count > 0);
+    }
+    inline bool have_ati() {
+        return (coprocs.ati.count > 0);
+    }
 };
 
 #ifdef __APPLE__
diff --git a/sched/handle_request.cpp b/sched/handle_request.cpp
index c0e1098bd3..b74f61d0c1 100644
--- a/sched/handle_request.cpp
+++ b/sched/handle_request.cpp
@@ -1056,8 +1056,8 @@ bool bad_install_type() {
 static inline bool requesting_work() {
     if (g_request->work_req_seconds > 0) return true;
     if (g_request->cpu_req_secs > 0) return true;
-    if (g_request->coproc_cuda && g_request->coproc_cuda->req_secs) return true;
-    if (g_request->coproc_ati && g_request->coproc_ati->req_secs) return true;
+    if (g_request->coprocs.cuda.count && g_request->coprocs.cuda.req_secs) return true;
+    if (g_request->coprocs.ati.count && g_request->coprocs.ati.req_secs) return true;
     return false;
 }
 
diff --git a/sched/sched_customize.cpp b/sched/sched_customize.cpp
index 70e5971b46..d384c7ab5f 100644
--- a/sched/sched_customize.cpp
+++ b/sched/sched_customize.cpp
@@ -138,8 +138,8 @@ static inline bool app_plan_ati(
     SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu
 ) {
     char buf[256];
-    COPROC_ATI* cp = (COPROC_ATI*)sreq.coprocs.lookup("ATI");
-    if (!cp) {
+    COPROC_ATI* cp = &sreq.coprocs.ati;
+    if (!cp->count) {
         if (config.debug_version_select) {
             log_messages.printf(MSG_NORMAL,
                 "[version] Host lacks ATI GPU for plan class ati\n"
@@ -320,8 +320,8 @@ static inline bool app_plan_cuda(
     SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu
 ) {
     char buf[256];
-    COPROC_CUDA* cp = (COPROC_CUDA*)sreq.coprocs.lookup("CUDA");
-    if (!cp) {
+    COPROC_CUDA* cp = &sreq.coprocs.cuda;
+    if (!cp->count) {
         if (config.debug_version_select) {
             log_messages.printf(MSG_NORMAL,
                 "[version] Host lacks CUDA coprocessor for plan class %s\n",
diff --git a/sched/sched_send.cpp b/sched/sched_send.cpp
index 3d170d4dbc..9eb98c1bd7 100644
--- a/sched/sched_send.cpp
+++ b/sched/sched_send.cpp
@@ -101,18 +101,8 @@ void WORK_REQ::get_job_limits() {
     if (n < 1) n = 1;
     effective_ncpus = n;
 
-    n = 0;
-    COPROC* cp = g_request->coprocs.lookup("CUDA");
-    if (cp) {
-        n = cp->count;
-        if (n > MAX_GPUS) n = MAX_GPUS;
-    }
-    cp = g_request->coprocs.lookup("ATI");
-    if (cp) {
-        if (cp->count <= MAX_GPUS && cp->count > n) {
-            n = cp->count;
-        }
-    }
+    n = g_request->coprocs.cuda.count + g_request->coprocs.ati.count;
+    if (n > MAX_GPUS) n = MAX_GPUS;
     effective_ngpus = n;
 
     int mult = effective_ncpus + config.gpu_multiplier * effective_ngpus;
@@ -581,9 +571,9 @@ static inline bool hard_app(APP& app) {
 
 static inline double get_estimated_delay(BEST_APP_VERSION& bav) {
     if (bav.host_usage.ncudas) {
-        return g_request->coproc_cuda->estimated_delay;
+        return g_request->coprocs.cuda.estimated_delay;
     } else if (bav.host_usage.natis) {
-        return g_request->coproc_ati->estimated_delay;
+        return g_request->coprocs.ati.estimated_delay;
     } else {
         return g_request->cpu_estimated_delay;
     }
@@ -591,9 +581,9 @@ static inline double get_estimated_delay(BEST_APP_VERSION& bav) {
 
 static inline void update_estimated_delay(BEST_APP_VERSION& bav, double dt) {
     if (bav.host_usage.ncudas) {
-        g_request->coproc_cuda->estimated_delay += dt;
+        g_request->coprocs.cuda.estimated_delay += dt;
     } else if (bav.host_usage.natis) {
-        g_request->coproc_ati->estimated_delay += dt;
+        g_request->coprocs.ati.estimated_delay += dt;
     } else {
         g_request->cpu_estimated_delay += dt;
     }
@@ -1475,18 +1465,18 @@ void send_work_setup() {
     g_wreq->cpu_req_instances = g_request->cpu_req_instances;
     g_wreq->anonymous_platform = anonymous(g_request->platforms.list[0]);
 
-    if (g_request->coproc_cuda) {
-        g_wreq->cuda_req_secs = clamp_req_sec(g_request->coproc_cuda->req_secs);
-        g_wreq->cuda_req_instances = g_request->coproc_cuda->req_instances;
-        if (g_request->coproc_cuda->estimated_delay < 0) {
-            g_request->coproc_cuda->estimated_delay = g_request->cpu_estimated_delay;
+    if (g_request->coprocs.cuda.count) {
+        g_wreq->cuda_req_secs = clamp_req_sec(g_request->coprocs.cuda.req_secs);
+        g_wreq->cuda_req_instances = g_request->coprocs.cuda.req_instances;
+        if (g_request->coprocs.cuda.estimated_delay < 0) {
+            g_request->coprocs.cuda.estimated_delay = g_request->cpu_estimated_delay;
         }
     }
-    if (g_request->coproc_ati) {
-        g_wreq->ati_req_secs = clamp_req_sec(g_request->coproc_ati->req_secs);
-        g_wreq->ati_req_instances = g_request->coproc_ati->req_instances;
-        if (g_request->coproc_ati->estimated_delay < 0) {
-            g_request->coproc_ati->estimated_delay = g_request->cpu_estimated_delay;
+    if (g_request->coprocs.ati.count) {
+        g_wreq->ati_req_secs = clamp_req_sec(g_request->coprocs.ati.req_secs);
+        g_wreq->ati_req_instances = g_request->coprocs.ati.req_instances;
+        if (g_request->coprocs.ati.estimated_delay < 0) {
+            g_request->coprocs.ati.estimated_delay = g_request->cpu_estimated_delay;
         }
     }
     if (g_wreq->cpu_req_secs || g_wreq->cuda_req_secs || g_wreq->ati_req_secs) {
@@ -1508,18 +1498,18 @@ void send_work_setup() {
             g_wreq->cpu_req_secs, g_wreq->cpu_req_instances,
             g_request->cpu_estimated_delay
         );
-        if (g_request->coproc_cuda) {
+        if (g_request->coprocs.cuda.count) {
             log_messages.printf(MSG_NORMAL,
                 "[send] CUDA: req %.2f sec, %.2f instances; est delay %.2f\n",
                 g_wreq->cuda_req_secs, g_wreq->cuda_req_instances,
-                g_request->coproc_cuda->estimated_delay
+                g_request->coprocs.cuda.estimated_delay
             );
         }
-        if (g_request->coproc_ati) {
+        if (g_request->coprocs.ati.count) {
             log_messages.printf(MSG_NORMAL,
                 "[send] ATI: req %.2f sec, %.2f instances; est delay %.2f\n",
                 g_wreq->ati_req_secs, g_wreq->ati_req_instances,
-                g_request->coproc_ati->estimated_delay
+                g_request->coprocs.ati.estimated_delay
             );
         }
         log_messages.printf(MSG_NORMAL,
diff --git a/sched/sched_types.cpp b/sched/sched_types.cpp
index 2f9c01c0b4..f92a9993b6 100644
--- a/sched/sched_types.cpp
+++ b/sched/sched_types.cpp
@@ -198,8 +198,7 @@ const char* SCHEDULER_REQUEST::parse(FILE* fin) {
     have_time_stats_log = false;
     client_cap_plan_class = false;
     sandbox = -1;
-    coproc_cuda = 0;
-    coproc_ati = 0;
+    coprocs.clear();
 
     fgets(buf, sizeof(buf), fin);
     if (!match_tag(buf, "<scheduler_request>")) return "no start tag";
@@ -367,8 +366,6 @@ const char* SCHEDULER_REQUEST::parse(FILE* fin) {
             MIOFILE mf;
             mf.init_file(fin);
             coprocs.parse(mf);
-            coproc_cuda = (COPROC_CUDA*)coprocs.lookup("CUDA");
-            coproc_ati = (COPROC_ATI*)coprocs.lookup("ATI");
             continue;
         }
         if (parse_bool(buf, "client_cap_plan_class", client_cap_plan_class)) continue;
diff --git a/sched/sched_types.h b/sched/sched_types.h
index b0ef3fe111..c72972f486 100644
--- a/sched/sched_types.h
+++ b/sched/sched_types.h
@@ -402,8 +402,6 @@ struct SCHEDULER_REQUEST {
     HOST host;      // request message is parsed into here.
                     // does NOT contain the full host record.
     COPROCS coprocs;
-    COPROC_CUDA* coproc_cuda;
-    COPROC_ATI* coproc_ati;
     std::vector<RESULT> results;
         // completed results being reported
     std::vector<MSG_FROM_HOST_DESC> msgs_from_host;