From 40eebe00af840bd5eedb65c8cda6264a4d0eddda Mon Sep 17 00:00:00 2001 From: David Anderson Date: Tue, 18 May 2010 19:22:34 +0000 Subject: [PATCH] - client/scheduler: in COPROCS, instead of having a vector of pointers to dynamically allocated COPROC-derived objects, just have the objects themselves. Dynamic allocation should be avoided at all costs. svn path=/trunk/boinc/; revision=21564 --- checkin_notes | 27 + client/app_start.cpp | 5 +- client/client_state.cpp | 22 +- client/client_state.h | 2 - client/client_types.cpp | 8 +- client/coproc_detect.cpp | 1821 ++++++++++++++++++------------------- client/cpu_sched.cpp | 46 +- client/cs_scheduler.cpp | 26 +- client/cs_statefile.cpp | 4 +- client/rr_sim.cpp | 980 ++++++++++---------- client/scheduler_op.cpp | 6 +- client/sim.h | 2 - client/work_fetch.cpp | 68 +- lib/coproc.cpp | 959 ++++++++++--------- lib/coproc.h | 115 ++- lib/hostinfo.cpp | 2 + lib/hostinfo.h | 6 + sched/handle_request.cpp | 4 +- sched/sched_customize.cpp | 8 +- sched/sched_send.cpp | 50 +- sched/sched_types.cpp | 5 +- sched/sched_types.h | 2 - 22 files changed, 2078 insertions(+), 2090 deletions(-) diff --git a/checkin_notes b/checkin_notes index 3951bbaf24..8acfe8b5b0 100644 --- a/checkin_notes +++ b/checkin_notes @@ -3701,3 +3701,30 @@ David 18 May 2010 client/ cpu_sched.cpp + +David 18 May 2010 + - client/scheduler: in COPROCS, instead of having a vector of + pointers to dynamically allocated COPROC-derived objects, + just have the objects themselves. + Dynamic allocation should be avoided at all costs. + + client/ + app_start.cpp + client_state.cpp,h + client_types.cpp + coproc_detect.cpp + cpu_sched.cpp + cs_scheduler.cpp + cs_statefile.cpp + rr_sim.cpp + scheduler_op.cpp + wim.h + work_fetch.cpp + lib/ + coproc.cpp,h + hostinfo.cpp,h + sched/ + handle_request.cpp + sched_customize.cpp + sched_send.cpp + sched_types.cpp,h diff --git a/client/app_start.cpp b/client/app_start.cpp index ed1e65bbc2..ca997ab5fc 100644 --- a/client/app_start.cpp +++ b/client/app_start.cpp @@ -119,7 +119,10 @@ static void debug_print_argv(char** argv) { static void coproc_cmdline( int rsc_type, RESULT* rp, double ninstances, char* cmdline ) { - COPROC* coproc = (rsc_type==RSC_TYPE_CUDA)?(COPROC*)coproc_cuda:(COPROC*)coproc_ati; + COPROC* coproc = (rsc_type==RSC_TYPE_CUDA) + ?(COPROC*)&gstate.host_info.coprocs.cuda + :(COPROC*)&gstate.host_info.coprocs.ati + ; for (int j=0; jcoproc_indices[j]; // sanity check diff --git a/client/client_state.cpp b/client/client_state.cpp index a53bacfa58..5be0083c93 100644 --- a/client/client_state.cpp +++ b/client/client_state.cpp @@ -62,8 +62,6 @@ using std::max; CLIENT_STATE gstate; -COPROC_CUDA* coproc_cuda; -COPROC_ATI* coproc_ati; CLIENT_STATE::CLIENT_STATE(): lookup_website_op(&gui_http), @@ -266,23 +264,21 @@ int CLIENT_STATE::init() { msg_printf(NULL, MSG_INFO, warnings[i].c_str()); } } - if (host_info.coprocs.coprocs.size() == 0) { + if (host_info.coprocs.none() ) { msg_printf(NULL, MSG_INFO, "No usable GPUs found"); } #if 0 msg_printf(NULL, MSG_INFO, "Faking an NVIDIA GPU"); - coproc_cuda = fake_cuda(host_info.coprocs, 256*MEGA, 2); - coproc_cuda->available_ram_fake[0] = 256*MEGA; - coproc_cuda->available_ram_fake[1] = 192*MEGA; + host_info.coprocs.cuda.fake(256*MEGA, 2); + host_info.coprocs.cuda.available_ram_fake[0] = 256*MEGA; + host_info.coprocs.cuda.available_ram_fake[1] = 192*MEGA; #endif #if 0 msg_printf(NULL, MSG_INFO, "Faking an ATI GPU"); - coproc_ati = fake_ati(host_info.coprocs, 512*MEGA, 2); - coproc_ati->available_ram_fake[0] = 256*MEGA; - coproc_ati->available_ram_fake[1] = 192*MEGA; + host_info.coprocs.ati.fake(512*MEGA, 2); + host_info.coprocs.ati.available_ram_fake[0] = 256*MEGA; + host_info.coprocs.ati.available_ram_fake[1] = 192*MEGA; #endif - coproc_cuda = (COPROC_CUDA*)host_info.coprocs.lookup("CUDA"); - coproc_ati = (COPROC_ATI*)host_info.coprocs.lookup("ATI"); } // check for app_info.xml file in project dirs. @@ -597,8 +593,8 @@ bool CLIENT_STATE::poll_slow_events() { // NVIDIA provides an interface for finding if a GPU is // running a graphics app. ATI doesn't as far as I know // - if (coproc_cuda && user_active && !global_prefs.run_gpu_if_user_active) { - if (coproc_cuda->check_running_graphics_app()) { + if (host_info.have_cuda() && user_active && !global_prefs.run_gpu_if_user_active) { + if (host_info.coprocs.cuda.check_running_graphics_app()) { request_schedule_cpus("GPU state change"); } } diff --git a/client/client_state.h b/client/client_state.h index 1383b84a62..882fda6073 100644 --- a/client/client_state.h +++ b/client/client_state.h @@ -488,8 +488,6 @@ public: extern CLIENT_STATE gstate; -extern COPROC_CUDA* coproc_cuda; -extern COPROC_ATI* coproc_ati; extern bool gpus_usable; // return a random double in the range [MIN,min(e^n,MAX)) diff --git a/client/client_types.cpp b/client/client_types.cpp index dbcb27d934..097f961864 100644 --- a/client/client_types.cpp +++ b/client/client_types.cpp @@ -1264,10 +1264,10 @@ void APP_VERSION::get_file_errors(string& str) { } bool APP_VERSION::missing_coproc() { - if (ncudas && !coproc_cuda) { + if (ncudas && gstate.host_info.coprocs.cuda.count==0) { return true; } - if (natis && !coproc_ati) { + if (natis && gstate.host_info.coprocs.ati.count==0) { return true; } return false; @@ -1790,9 +1790,9 @@ int RESULT::write_gui(MIOFILE& out) { char buf[256]; strcpy(buf, ""); if (atp && atp->task_state() == PROCESS_EXECUTING) { - if (avp->ncudas && coproc_cuda->count>1) { + if (avp->ncudas && gstate.host_info.coprocs.cuda.count>1) { sprintf(buf, " (device %d)", coproc_indices[0]); - } else if (avp->natis && coproc_ati->count>1) { + } else if (avp->natis && gstate.host_info.coprocs.ati.count>1) { sprintf(buf, " (device %d)", coproc_indices[0]); } } diff --git a/client/coproc_detect.cpp b/client/coproc_detect.cpp index 4e4b5147a7..8849d01796 100644 --- a/client/coproc_detect.cpp +++ b/client/coproc_detect.cpp @@ -1,916 +1,905 @@ -// This file is part of BOINC. -// http://boinc.berkeley.edu -// Copyright (C) 2009 University of California -// -// BOINC is free software; you can redistribute it and/or modify it -// under the terms of the GNU Lesser General Public License -// as published by the Free Software Foundation, -// either version 3 of the License, or (at your option) any later version. -// -// BOINC is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -// See the GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with BOINC. If not, see . - - -// client-specific GPU code. Mostly GPU detection - -#include "cpp.h" - -#ifdef _WIN32 -#include "boinc_win.h" -#ifndef SIM -#include -#endif -#else -#ifdef __APPLE__ -// Suppress obsolete warning when building for OS 10.3.9 -#define DLOPEN_NO_WARN -#endif -#include "config.h" -#include -#include -#include -#endif - -#include "coproc.h" -#include "str_util.h" -#include "util.h" - -#include "client_state.h" -#include "client_msgs.h" - -using std::string; -using std::vector; - -//#define MEASURE_AVAILABLE_RAM - -static bool in_vector(int n, vector& v) { - for (unsigned int i=0; i&descs, vector&warnings, - vector& ignore_cuda_dev, - vector& ignore_ati_dev -) { - -#ifdef _WIN32 - COPROC_CUDA::get(*this, use_all, descs, warnings, ignore_cuda_dev); - COPROC_ATI::get(*this, descs, warnings, ignore_ati_dev); -#else - void (*old_sig)(int) = signal(SIGSEGV, segv_handler); - if (setjmp(resume)) { - warnings.push_back("Caught SIGSEGV in NVIDIA GPU detection"); - } else { - COPROC_CUDA::get(*this, use_all, descs, warnings, ignore_cuda_dev); - } -#ifndef __APPLE__ // ATI does not yet support CAL on Macs - if (setjmp(resume)) { - warnings.push_back("Caught SIGSEGV in ATI GPU detection"); - } else { - COPROC_ATI::get(*this, descs, warnings, ignore_ati_dev); - } -#endif - signal(SIGSEGV, old_sig); -#endif -} - -// return 1/-1/0 if device 1 is more/less/same capable than device 2. -// If "loose", ignore FLOPS and tolerate small memory diff -// -int cuda_compare(COPROC_CUDA& c1, COPROC_CUDA& c2, bool loose) { - if (c1.prop.major > c2.prop.major) return 1; - if (c1.prop.major < c2.prop.major) return -1; - if (c1.prop.minor > c2.prop.minor) return 1; - if (c1.prop.minor < c2.prop.minor) return -1; - if (c1.cuda_version > c2.cuda_version) return 1; - if (c1.cuda_version < c2.cuda_version) return -1; - if (loose) { - if (c1.prop.totalGlobalMem > 1.4*c2.prop.totalGlobalMem) return 1; - if (c1.prop.totalGlobalMem < .7* c2.prop.totalGlobalMem) return -1; - return 0; - } - if (c1.prop.totalGlobalMem > c2.prop.totalGlobalMem) return 1; - if (c1.prop.totalGlobalMem < c2.prop.totalGlobalMem) return -1; - double s1 = c1.peak_flops(); - double s2 = c2.peak_flops(); - if (s1 > s2) return 1; - if (s1 < s2) return -1; - return 0; -} - -#ifdef _WIN32 -typedef int (__stdcall *CUDA_GDC)(int *count); -typedef int (__stdcall *CUDA_GDV)(int* version); -typedef int (__stdcall *CUDA_GDI)(int); -typedef int (__stdcall *CUDA_GDG)(int*, int); -typedef int (__stdcall *CUDA_GDA)(int*, int, int); -typedef int (__stdcall *CUDA_GDN)(char*, int, int); -typedef int (__stdcall *CUDA_GDM)(unsigned int*, int); -typedef int (__stdcall *CUDA_GDCC)(int*, int*, int); -typedef int (__stdcall *CUDA_CC)(unsigned int*, unsigned int, unsigned int); -typedef int (__stdcall *CUDA_CD)(unsigned int); -typedef int (__stdcall *CUDA_MA)(unsigned int*, unsigned int); -typedef int (__stdcall *CUDA_MF)(unsigned int); -typedef int (__stdcall *CUDA_MGI)(unsigned int*, unsigned int*); - -CUDA_GDC __cuDeviceGetCount = NULL; -CUDA_GDV __cuDriverGetVersion = NULL; -CUDA_GDI __cuInit = NULL; -CUDA_GDG __cuDeviceGet = NULL; -CUDA_GDA __cuDeviceGetAttribute = NULL; -CUDA_GDN __cuDeviceGetName = NULL; -CUDA_GDM __cuDeviceTotalMem = NULL; -CUDA_GDCC __cuDeviceComputeCapability = NULL; -CUDA_CC __cuCtxCreate = NULL; -CUDA_CD __cuCtxDestroy = NULL; -CUDA_MA __cuMemAlloc = NULL; -CUDA_MF __cuMemFree = NULL; -CUDA_MGI __cuMemGetInfo = NULL; -#else -void* cudalib; -int (*__cuInit)(int); -int (*__cuDeviceGetCount)(int*); -int (*__cuDriverGetVersion)(int*); -int (*__cuDeviceGet)(int*, int); -int (*__cuDeviceGetAttribute)(int*, int, int); -int (*__cuDeviceGetName)(char*, int, int); -int (*__cuDeviceTotalMem)(unsigned int*, int); -int (*__cuDeviceComputeCapability)(int*, int*, int); -int (*__cuCtxCreate)(unsigned int*, unsigned int, unsigned int); -int (*__cuCtxDestroy)(unsigned int); -int (*__cuMemAlloc)(unsigned int*, unsigned int); -int (*__cuMemFree)(unsigned int); -int (*__cuMemGetInfo)(unsigned int*, unsigned int*); -#endif - -// NVIDIA interfaces are documented here: -// http://developer.download.nvidia.com/compute/cuda/2_3/toolkit/docs/online/index.html - -void COPROC_CUDA::get( - COPROCS& coprocs, - bool use_all, // if false, use only those equivalent to most capable - vector& descs, - vector& warnings, - vector& ignore_devs -) { - int count, retval; - char buf[256]; - -#ifdef _WIN32 - HMODULE cudalib = LoadLibrary("nvcuda.dll"); - if (!cudalib) { - warnings.push_back("No NVIDIA library found"); - return; - } - __cuDeviceGetCount = (CUDA_GDC)GetProcAddress( cudalib, "cuDeviceGetCount" ); - __cuDriverGetVersion = (CUDA_GDV)GetProcAddress( cudalib, "cuDriverGetVersion" ); - __cuInit = (CUDA_GDI)GetProcAddress( cudalib, "cuInit" ); - __cuDeviceGet = (CUDA_GDG)GetProcAddress( cudalib, "cuDeviceGet" ); - __cuDeviceGetAttribute = (CUDA_GDA)GetProcAddress( cudalib, "cuDeviceGetAttribute" ); - __cuDeviceGetName = (CUDA_GDN)GetProcAddress( cudalib, "cuDeviceGetName" ); - __cuDeviceTotalMem = (CUDA_GDM)GetProcAddress( cudalib, "cuDeviceTotalMem" ); - __cuDeviceComputeCapability = (CUDA_GDCC)GetProcAddress( cudalib, "cuDeviceComputeCapability" ); - __cuCtxCreate = (CUDA_CC)GetProcAddress( cudalib, "cuCtxCreate" ); - __cuCtxDestroy = (CUDA_CD)GetProcAddress( cudalib, "cuCtxDestroy" ); - __cuMemAlloc = (CUDA_MA)GetProcAddress( cudalib, "cuMemAlloc" ); - __cuMemFree = (CUDA_MF)GetProcAddress( cudalib, "cuMemFree" ); - __cuMemGetInfo = (CUDA_MGI)GetProcAddress( cudalib, "cuMemGetInfo" ); - -#ifndef SIM - NvAPI_Status nvapiStatus; - NvDisplayHandle hDisplay; - NV_DISPLAY_DRIVER_VERSION Version; - memset(&Version, 0, sizeof(Version)); - Version.version = NV_DISPLAY_DRIVER_VERSION_VER; - - NvAPI_Initialize(); - for (int i=0; ; i++) { - nvapiStatus = NvAPI_EnumNvidiaDisplayHandle(i, &hDisplay); - if (nvapiStatus != NVAPI_OK) break; - nvapiStatus = NvAPI_GetDisplayDriverVersion(hDisplay, &Version); - if (nvapiStatus == NVAPI_OK) break; - } -#endif -#else - -#ifdef __APPLE__ - cudalib = dlopen("/usr/local/cuda/lib/libcuda.dylib", RTLD_NOW); -#else - cudalib = dlopen("libcuda.so", RTLD_NOW); -#endif - if (!cudalib) { - warnings.push_back("No NVIDIA library found"); - return; - } - __cuDeviceGetCount = (int(*)(int*)) dlsym(cudalib, "cuDeviceGetCount"); - __cuDriverGetVersion = (int(*)(int*)) dlsym( cudalib, "cuDriverGetVersion" ); - __cuInit = (int(*)(int)) dlsym( cudalib, "cuInit" ); - __cuDeviceGet = (int(*)(int*, int)) dlsym( cudalib, "cuDeviceGet" ); - __cuDeviceGetAttribute = (int(*)(int*, int, int)) dlsym( cudalib, "cuDeviceGetAttribute" ); - __cuDeviceGetName = (int(*)(char*, int, int)) dlsym( cudalib, "cuDeviceGetName" ); - __cuDeviceTotalMem = (int(*)(unsigned int*, int)) dlsym( cudalib, "cuDeviceTotalMem" ); - __cuDeviceComputeCapability = (int(*)(int*, int*, int)) dlsym( cudalib, "cuDeviceComputeCapability" ); - __cuCtxCreate = (int(*)(unsigned int*, unsigned int, unsigned int)) dlsym( cudalib, "cuCtxCreate" ); - __cuCtxDestroy = (int(*)(unsigned int)) dlsym( cudalib, "cuCtxDestroy" ); - __cuMemAlloc = (int(*)(unsigned int*, unsigned int)) dlsym( cudalib, "cuMemAlloc" ); - __cuMemFree = (int(*)(unsigned int)) dlsym( cudalib, "cuMemFree" ); - __cuMemGetInfo = (int(*)(unsigned int*, unsigned int*)) dlsym( cudalib, "cuMemGetInfo" ); -#endif - - if (!__cuDriverGetVersion) { - warnings.push_back("cuDriverGetVersion() missing from NVIDIA library"); - return; - } - if (!__cuInit) { - warnings.push_back("cuInit() missing from NVIDIA library"); - return; - } - if (!__cuDeviceGetCount) { - warnings.push_back("cuDeviceGetCount() missing from NVIDIA library"); - return; - } - if (!__cuDeviceGet) { - warnings.push_back("cuDeviceGet() missing from NVIDIA library"); - return; - } - if (!__cuDeviceGetAttribute) { - warnings.push_back("cuDeviceGetAttribute() missing from NVIDIA library"); - return; - } - if (!__cuDeviceTotalMem) { - warnings.push_back("cuDeviceTotalMem() missing from NVIDIA library"); - return; - } - if (!__cuDeviceComputeCapability) { - warnings.push_back("cuDeviceComputeCapability() missing from NVIDIA library"); - return; - } - if (!__cuCtxCreate) { - warnings.push_back("cuCtxCreate() missing from NVIDIA library"); - return; - } - if (!__cuCtxDestroy) { - warnings.push_back("cuCtxDestroy() missing from NVIDIA library"); - return; - } - if (!__cuMemAlloc) { - warnings.push_back("cuMemAlloc() missing from NVIDIA library"); - return; - } - if (!__cuMemFree) { - warnings.push_back("cuMemFree() missing from NVIDIA library"); - return; - } - if (!__cuMemGetInfo) { - warnings.push_back("cuMemGetInfo() missing from NVIDIA library"); - return; - } - - retval = (*__cuInit)(0); - if (retval) { - sprintf(buf, "NVIDIA drivers present but no GPUs found"); - warnings.push_back(buf); - return; - } - - int cuda_version; - retval = (*__cuDriverGetVersion)(&cuda_version); - if (retval) { - sprintf(buf, "cuDriverGetVersion() returned %d", retval); - warnings.push_back(buf); - return; - } - - vector gpus; - retval = (*__cuDeviceGetCount)(&count); - if (retval) { - sprintf(buf, "cuDeviceGetCount() returned %d", retval); - warnings.push_back(buf); - return; - } - sprintf(buf, "NVIDIA library reports %d GPU%s", count, (count==1)?"":"s"); - warnings.push_back(buf); - - int j; - unsigned int i; - COPROC_CUDA cc; - string s; - for (j=0; j 100) continue; // e.g. 9999 is an error -#if defined(_WIN32) && !defined(SIM) - cc.display_driver_version = Version.drvVersion; -#else - cc.display_driver_version = 0; -#endif - cc.cuda_version = cuda_version; - cc.device_num = j; - gpus.push_back(cc); - } - - if (!gpus.size()) { - warnings.push_back("No CUDA-capable NVIDIA GPUs found"); - return; - } - - // identify the most capable non-ignored instance - // - COPROC_CUDA best; - bool first = true; - for (i=0; i 0) { - best = gpus[i]; - } - } - - // see which other instances are equivalent, - // and set the "count" and "device_nums" fields - // - best.count = 0; - for (i=0; itype, "CUDA"); - cc->count = count; - for (int i=0; idevice_nums[i] = i; - } - cc->display_driver_version = 18000; - cc->cuda_version = 2020; - strcpy(cc->prop.name, "Fake NVIDIA GPU"); - cc->prop.totalGlobalMem = (unsigned int)ram; - cc->prop.sharedMemPerBlock = 100; - cc->prop.regsPerBlock = 8; - cc->prop.warpSize = 10; - cc->prop.memPitch = 10; - cc->prop.maxThreadsPerBlock = 20; - cc->prop.maxThreadsDim[0] = 2; - cc->prop.maxThreadsDim[1] = 2; - cc->prop.maxThreadsDim[2] = 2; - cc->prop.maxGridSize[0] = 10; - cc->prop.maxGridSize[1] = 10; - cc->prop.maxGridSize[2] = 10; - cc->prop.totalConstMem = 10; - cc->prop.major = 1; - cc->prop.minor = 2; - cc->prop.clockRate = 1250000; - cc->prop.textureAlignment = 1000; - cc->prop.multiProcessorCount = 14; - coprocs.coprocs.push_back(cc); - return cc; -} - -// See how much RAM is available on each GPU. -// If this fails, set "available_ram_unknown" -// -void COPROC_CUDA::get_available_ram() { -#ifdef MEASURE_AVAILABLE_RAM - int device, i, retval; - unsigned int memfree, memtotal; - unsigned int ctx; - - // avoid crash if faked GPU - // - if (!__cuDeviceGet) { - for (i=0; i& descs, vector& warnings, vector& ignore_devs -) { - CALuint numDevices, cal_major, cal_minor, cal_imp; - CALdevice device; - CALdeviceinfo info; - CALdeviceattribs attribs; - char buf[256]; - bool amdrt_detected = false; - bool atirt_detected = false; - int retval; - - attribs.struct_size = sizeof(CALdeviceattribs); - device = 0; - numDevices =0; - -#ifdef _WIN32 - -#if defined _M_X64 - const char* atilib_name = "aticalrt64.dll"; - const char* amdlib_name = "amdcalrt64.dll"; -#else - const char* atilib_name = "aticalrt.dll"; - const char* amdlib_name = "amdcalrt.dll"; -#endif - - HINSTANCE callib = LoadLibrary(atilib_name); - if (callib) { - atirt_detected = true; - } else { - callib = LoadLibrary(amdlib_name); - if (callib) { - amdrt_detected = true; - } - } - - if (!callib) { - warnings.push_back("No ATI library found."); - return; - } - - __calInit = (ATI_GDI)GetProcAddress(callib, "calInit" ); - __calGetVersion = (ATI_VER)GetProcAddress(callib, "calGetVersion" ); - __calDeviceGetCount = (ATI_GDC)GetProcAddress(callib, "calDeviceGetCount" ); - __calDeviceGetAttribs =(ATI_ATTRIBS)GetProcAddress(callib, "calDeviceGetAttribs" ); - __calShutdown = (ATI_CLOSE)GetProcAddress(callib, "calShutdown" ); - __calDeviceGetInfo = (ATI_INFO)GetProcAddress(callib, "calDeviceGetInfo" ); - __calDeviceGetStatus = (ATI_STATUS)GetProcAddress(callib, "calDeviceGetStatus" ); - __calDeviceOpen = (ATI_DEVICEOPEN)GetProcAddress(callib, "calDeviceOpen" ); - __calDeviceClose = (ATI_DEVICECLOSE)GetProcAddress(callib, "calDeviceClose" ); - -#else - - void* callib; - - callib = dlopen("libaticalrt.so", RTLD_NOW); - if (!callib) { - warnings.push_back("No ATI library found"); - return; - } - - atirt_detected = true; - - __calInit = (int(*)()) dlsym(callib, "calInit"); - __calGetVersion = (int(*)(CALuint*, CALuint*, CALuint*)) dlsym(callib, "calGetVersion"); - __calDeviceGetCount = (int(*)(CALuint*)) dlsym(callib, "calDeviceGetCount"); - __calDeviceGetAttribs = (int(*)(CALdeviceattribs*, CALuint)) dlsym(callib, "calDeviceGetAttribs"); - __calShutdown = (int(*)()) dlsym(callib, "calShutdown"); - __calDeviceGetInfo = (int(*)(CALdeviceinfo*, CALuint)) dlsym(callib, "calDeviceGetInfo"); - __calDeviceGetStatus = (int(*)(CALdevicestatus*, CALdevice)) dlsym(callib, "calDeviceGetStatus"); - __calDeviceOpen = (int(*)(CALdevice*, CALuint)) dlsym(callib, "calDeviceOpen"); - __calDeviceClose = (int(*)(CALdevice)) dlsym(callib, "calDeviceClose"); - -#endif - - if (!__calInit) { - warnings.push_back("calInit() missing from CAL library"); - return; - } - if (!__calGetVersion) { - warnings.push_back("calGetVersion() missing from CAL library"); - return; - } - if (!__calDeviceGetCount) { - warnings.push_back("calDeviceGetCount() missing from CAL library"); - return; - } - if (!__calDeviceGetAttribs) { - warnings.push_back("calDeviceGetAttribs() missing from CAL library"); - return; - } - if (!__calDeviceGetInfo) { - warnings.push_back("calDeviceGetInfo() missing from CAL library"); - return; - } - if (!__calDeviceGetStatus) { - warnings.push_back("calDeviceGetStatus() missing from CAL library"); - return; - } - if (!__calDeviceOpen) { - warnings.push_back("calDeviceOpen() missing from CAL library"); - return; - } - if (!__calDeviceClose) { - warnings.push_back("calDeviceClose() missing from CAL library"); - return; - } - - retval = (*__calInit)(); - if (retval != CAL_RESULT_OK) { - sprintf(buf, "calInit() returned %d", retval); - warnings.push_back(buf); - return; - } - retval = (*__calDeviceGetCount)(&numDevices); - if (retval != CAL_RESULT_OK) { - sprintf(buf, "calDeviceGetCount() returned %d", retval); - warnings.push_back(buf); - return; - } - retval = (*__calGetVersion)(&cal_major, &cal_minor, &cal_imp); - if (retval != CAL_RESULT_OK) { - sprintf(buf, "calGetVersion() returned %d", retval); - warnings.push_back(buf); - return; - } - - if (!numDevices) { - warnings.push_back("No usable CAL devices found"); - return; - } - - COPROC_ATI cc, cc2; - string s, gpu_name; - vector gpus; - for (CALuint i=0; i best.peak_flops()) { - best = gpus[i]; - } - sprintf(buf2, "ATI GPU %d: %s", gpus[i].device_num, buf); - } - descs.push_back(buf2); - } - best.count = 0; - for (unsigned int i=0; itype, "ATI"); - coprocs.coprocs.push_back(ccp); - - // shut down, otherwise Lenovo won't be able to switch to low-power GPU - // - retval = (*__calShutdown)(); -} - -COPROC_ATI* fake_ati(COPROCS& coprocs, double ram, int count) { - COPROC_ATI* cc = new COPROC_ATI; - strcpy(cc->type, "ATI"); - strcpy(cc->version, "1.4.3"); - strcpy(cc->name, "foobar"); - cc->count = count; - memset(&cc->attribs, 0, sizeof(cc->attribs)); - memset(&cc->info, 0, sizeof(cc->info)); - cc->attribs.localRAM = (int)(ram/MEGA); - cc->attribs.numberOfSIMD = 32; - cc->attribs.wavefrontSize = 32; - cc->attribs.engineClock = 50; - for (int i=0; idevice_nums[i] = i; - } - coprocs.coprocs.push_back(cc); - return cc; -} - -void COPROC_ATI::get_available_ram() { -#ifdef MEASURE_AVAILABLE_RAM - CALdevicestatus st; - CALdevice dev; - int i, retval; - - st.struct_size = sizeof(CALdevicestatus); - - // avoid crash if faked GPU - if (!__calInit) { - for (i=0; i. + + +// client-specific GPU code. Mostly GPU detection + +#include "cpp.h" + +#ifdef _WIN32 +#include "boinc_win.h" +#ifndef SIM +#include +#endif +#else +#ifdef __APPLE__ +// Suppress obsolete warning when building for OS 10.3.9 +#define DLOPEN_NO_WARN +#endif +#include "config.h" +#include +#include +#include +#endif + +#include "coproc.h" +#include "str_util.h" +#include "util.h" + +#include "client_state.h" +#include "client_msgs.h" + +using std::string; +using std::vector; + +//#define MEASURE_AVAILABLE_RAM + +static bool in_vector(int n, vector& v) { + for (unsigned int i=0; i&descs, vector&warnings, + vector& ignore_cuda_dev, + vector& ignore_ati_dev +) { + +#ifdef _WIN32 + cuda.get(use_all, descs, warnings, ignore_cuda_dev); + ati.get(descs, warnings, ignore_ati_dev); +#else + void (*old_sig)(int) = signal(SIGSEGV, segv_handler); + if (setjmp(resume)) { + warnings.push_back("Caught SIGSEGV in NVIDIA GPU detection"); + } else { + cuda.get(use_all, descs, warnings, ignore_cuda_dev); + } +#ifndef __APPLE__ // ATI does not yet support CAL on Macs + if (setjmp(resume)) { + warnings.push_back("Caught SIGSEGV in ATI GPU detection"); + } else { + ati.get(descs, warnings, ignore_ati_dev); + } +#endif + signal(SIGSEGV, old_sig); +#endif +} + +// return 1/-1/0 if device 1 is more/less/same capable than device 2. +// If "loose", ignore FLOPS and tolerate small memory diff +// +int cuda_compare(COPROC_CUDA& c1, COPROC_CUDA& c2, bool loose) { + if (c1.prop.major > c2.prop.major) return 1; + if (c1.prop.major < c2.prop.major) return -1; + if (c1.prop.minor > c2.prop.minor) return 1; + if (c1.prop.minor < c2.prop.minor) return -1; + if (c1.cuda_version > c2.cuda_version) return 1; + if (c1.cuda_version < c2.cuda_version) return -1; + if (loose) { + if (c1.prop.totalGlobalMem > 1.4*c2.prop.totalGlobalMem) return 1; + if (c1.prop.totalGlobalMem < .7* c2.prop.totalGlobalMem) return -1; + return 0; + } + if (c1.prop.totalGlobalMem > c2.prop.totalGlobalMem) return 1; + if (c1.prop.totalGlobalMem < c2.prop.totalGlobalMem) return -1; + double s1 = c1.peak_flops(); + double s2 = c2.peak_flops(); + if (s1 > s2) return 1; + if (s1 < s2) return -1; + return 0; +} + +#ifdef _WIN32 +typedef int (__stdcall *CUDA_GDC)(int *count); +typedef int (__stdcall *CUDA_GDV)(int* version); +typedef int (__stdcall *CUDA_GDI)(int); +typedef int (__stdcall *CUDA_GDG)(int*, int); +typedef int (__stdcall *CUDA_GDA)(int*, int, int); +typedef int (__stdcall *CUDA_GDN)(char*, int, int); +typedef int (__stdcall *CUDA_GDM)(unsigned int*, int); +typedef int (__stdcall *CUDA_GDCC)(int*, int*, int); +typedef int (__stdcall *CUDA_CC)(unsigned int*, unsigned int, unsigned int); +typedef int (__stdcall *CUDA_CD)(unsigned int); +typedef int (__stdcall *CUDA_MA)(unsigned int*, unsigned int); +typedef int (__stdcall *CUDA_MF)(unsigned int); +typedef int (__stdcall *CUDA_MGI)(unsigned int*, unsigned int*); + +CUDA_GDC __cuDeviceGetCount = NULL; +CUDA_GDV __cuDriverGetVersion = NULL; +CUDA_GDI __cuInit = NULL; +CUDA_GDG __cuDeviceGet = NULL; +CUDA_GDA __cuDeviceGetAttribute = NULL; +CUDA_GDN __cuDeviceGetName = NULL; +CUDA_GDM __cuDeviceTotalMem = NULL; +CUDA_GDCC __cuDeviceComputeCapability = NULL; +CUDA_CC __cuCtxCreate = NULL; +CUDA_CD __cuCtxDestroy = NULL; +CUDA_MA __cuMemAlloc = NULL; +CUDA_MF __cuMemFree = NULL; +CUDA_MGI __cuMemGetInfo = NULL; +#else +void* cudalib; +int (*__cuInit)(int); +int (*__cuDeviceGetCount)(int*); +int (*__cuDriverGetVersion)(int*); +int (*__cuDeviceGet)(int*, int); +int (*__cuDeviceGetAttribute)(int*, int, int); +int (*__cuDeviceGetName)(char*, int, int); +int (*__cuDeviceTotalMem)(unsigned int*, int); +int (*__cuDeviceComputeCapability)(int*, int*, int); +int (*__cuCtxCreate)(unsigned int*, unsigned int, unsigned int); +int (*__cuCtxDestroy)(unsigned int); +int (*__cuMemAlloc)(unsigned int*, unsigned int); +int (*__cuMemFree)(unsigned int); +int (*__cuMemGetInfo)(unsigned int*, unsigned int*); +#endif + +// NVIDIA interfaces are documented here: +// http://developer.download.nvidia.com/compute/cuda/2_3/toolkit/docs/online/index.html + +void COPROC_CUDA::get( + bool use_all, // if false, use only those equivalent to most capable + vector& descs, + vector& warnings, + vector& ignore_devs +) { + int count, retval; + char buf[256]; + +#ifdef _WIN32 + HMODULE cudalib = LoadLibrary("nvcuda.dll"); + if (!cudalib) { + warnings.push_back("No NVIDIA library found"); + return; + } + __cuDeviceGetCount = (CUDA_GDC)GetProcAddress( cudalib, "cuDeviceGetCount" ); + __cuDriverGetVersion = (CUDA_GDV)GetProcAddress( cudalib, "cuDriverGetVersion" ); + __cuInit = (CUDA_GDI)GetProcAddress( cudalib, "cuInit" ); + __cuDeviceGet = (CUDA_GDG)GetProcAddress( cudalib, "cuDeviceGet" ); + __cuDeviceGetAttribute = (CUDA_GDA)GetProcAddress( cudalib, "cuDeviceGetAttribute" ); + __cuDeviceGetName = (CUDA_GDN)GetProcAddress( cudalib, "cuDeviceGetName" ); + __cuDeviceTotalMem = (CUDA_GDM)GetProcAddress( cudalib, "cuDeviceTotalMem" ); + __cuDeviceComputeCapability = (CUDA_GDCC)GetProcAddress( cudalib, "cuDeviceComputeCapability" ); + __cuCtxCreate = (CUDA_CC)GetProcAddress( cudalib, "cuCtxCreate" ); + __cuCtxDestroy = (CUDA_CD)GetProcAddress( cudalib, "cuCtxDestroy" ); + __cuMemAlloc = (CUDA_MA)GetProcAddress( cudalib, "cuMemAlloc" ); + __cuMemFree = (CUDA_MF)GetProcAddress( cudalib, "cuMemFree" ); + __cuMemGetInfo = (CUDA_MGI)GetProcAddress( cudalib, "cuMemGetInfo" ); + +#ifndef SIM + NvAPI_Status nvapiStatus; + NvDisplayHandle hDisplay; + NV_DISPLAY_DRIVER_VERSION Version; + memset(&Version, 0, sizeof(Version)); + Version.version = NV_DISPLAY_DRIVER_VERSION_VER; + + NvAPI_Initialize(); + for (int i=0; ; i++) { + nvapiStatus = NvAPI_EnumNvidiaDisplayHandle(i, &hDisplay); + if (nvapiStatus != NVAPI_OK) break; + nvapiStatus = NvAPI_GetDisplayDriverVersion(hDisplay, &Version); + if (nvapiStatus == NVAPI_OK) break; + } +#endif +#else + +#ifdef __APPLE__ + cudalib = dlopen("/usr/local/cuda/lib/libcuda.dylib", RTLD_NOW); +#else + cudalib = dlopen("libcuda.so", RTLD_NOW); +#endif + if (!cudalib) { + warnings.push_back("No NVIDIA library found"); + return; + } + __cuDeviceGetCount = (int(*)(int*)) dlsym(cudalib, "cuDeviceGetCount"); + __cuDriverGetVersion = (int(*)(int*)) dlsym( cudalib, "cuDriverGetVersion" ); + __cuInit = (int(*)(int)) dlsym( cudalib, "cuInit" ); + __cuDeviceGet = (int(*)(int*, int)) dlsym( cudalib, "cuDeviceGet" ); + __cuDeviceGetAttribute = (int(*)(int*, int, int)) dlsym( cudalib, "cuDeviceGetAttribute" ); + __cuDeviceGetName = (int(*)(char*, int, int)) dlsym( cudalib, "cuDeviceGetName" ); + __cuDeviceTotalMem = (int(*)(unsigned int*, int)) dlsym( cudalib, "cuDeviceTotalMem" ); + __cuDeviceComputeCapability = (int(*)(int*, int*, int)) dlsym( cudalib, "cuDeviceComputeCapability" ); + __cuCtxCreate = (int(*)(unsigned int*, unsigned int, unsigned int)) dlsym( cudalib, "cuCtxCreate" ); + __cuCtxDestroy = (int(*)(unsigned int)) dlsym( cudalib, "cuCtxDestroy" ); + __cuMemAlloc = (int(*)(unsigned int*, unsigned int)) dlsym( cudalib, "cuMemAlloc" ); + __cuMemFree = (int(*)(unsigned int)) dlsym( cudalib, "cuMemFree" ); + __cuMemGetInfo = (int(*)(unsigned int*, unsigned int*)) dlsym( cudalib, "cuMemGetInfo" ); +#endif + + if (!__cuDriverGetVersion) { + warnings.push_back("cuDriverGetVersion() missing from NVIDIA library"); + return; + } + if (!__cuInit) { + warnings.push_back("cuInit() missing from NVIDIA library"); + return; + } + if (!__cuDeviceGetCount) { + warnings.push_back("cuDeviceGetCount() missing from NVIDIA library"); + return; + } + if (!__cuDeviceGet) { + warnings.push_back("cuDeviceGet() missing from NVIDIA library"); + return; + } + if (!__cuDeviceGetAttribute) { + warnings.push_back("cuDeviceGetAttribute() missing from NVIDIA library"); + return; + } + if (!__cuDeviceTotalMem) { + warnings.push_back("cuDeviceTotalMem() missing from NVIDIA library"); + return; + } + if (!__cuDeviceComputeCapability) { + warnings.push_back("cuDeviceComputeCapability() missing from NVIDIA library"); + return; + } + if (!__cuCtxCreate) { + warnings.push_back("cuCtxCreate() missing from NVIDIA library"); + return; + } + if (!__cuCtxDestroy) { + warnings.push_back("cuCtxDestroy() missing from NVIDIA library"); + return; + } + if (!__cuMemAlloc) { + warnings.push_back("cuMemAlloc() missing from NVIDIA library"); + return; + } + if (!__cuMemFree) { + warnings.push_back("cuMemFree() missing from NVIDIA library"); + return; + } + if (!__cuMemGetInfo) { + warnings.push_back("cuMemGetInfo() missing from NVIDIA library"); + return; + } + + retval = (*__cuInit)(0); + if (retval) { + sprintf(buf, "NVIDIA drivers present but no GPUs found"); + warnings.push_back(buf); + return; + } + + int cuda_version; + retval = (*__cuDriverGetVersion)(&cuda_version); + if (retval) { + sprintf(buf, "cuDriverGetVersion() returned %d", retval); + warnings.push_back(buf); + return; + } + + vector gpus; + retval = (*__cuDeviceGetCount)(&count); + if (retval) { + sprintf(buf, "cuDeviceGetCount() returned %d", retval); + warnings.push_back(buf); + return; + } + sprintf(buf, "NVIDIA library reports %d GPU%s", count, (count==1)?"":"s"); + warnings.push_back(buf); + + int j; + unsigned int i; + COPROC_CUDA cc; + string s; + for (j=0; j 100) continue; // e.g. 9999 is an error +#if defined(_WIN32) && !defined(SIM) + cc.display_driver_version = Version.drvVersion; +#else + cc.display_driver_version = 0; +#endif + cc.cuda_version = cuda_version; + cc.device_num = j; + gpus.push_back(cc); + } + + if (!gpus.size()) { + warnings.push_back("No CUDA-capable NVIDIA GPUs found"); + return; + } + + // identify the most capable non-ignored instance + // + COPROC_CUDA best; + bool first = true; + for (i=0; i 0) { + best = gpus[i]; + } + } + + // see which other instances are equivalent, + // and set the "count" and "device_nums" fields + // + best.count = 0; + for (i=0; i& descs, vector& warnings, vector& ignore_devs +) { + CALuint numDevices, cal_major, cal_minor, cal_imp; + CALdevice device; + CALdeviceinfo info; + CALdeviceattribs attribs; + char buf[256]; + bool amdrt_detected = false; + bool atirt_detected = false; + int retval; + + attribs.struct_size = sizeof(CALdeviceattribs); + device = 0; + numDevices =0; + +#ifdef _WIN32 + +#if defined _M_X64 + const char* atilib_name = "aticalrt64.dll"; + const char* amdlib_name = "amdcalrt64.dll"; +#else + const char* atilib_name = "aticalrt.dll"; + const char* amdlib_name = "amdcalrt.dll"; +#endif + + HINSTANCE callib = LoadLibrary(atilib_name); + if (callib) { + atirt_detected = true; + } else { + callib = LoadLibrary(amdlib_name); + if (callib) { + amdrt_detected = true; + } + } + + if (!callib) { + warnings.push_back("No ATI library found."); + return; + } + + __calInit = (ATI_GDI)GetProcAddress(callib, "calInit" ); + __calGetVersion = (ATI_VER)GetProcAddress(callib, "calGetVersion" ); + __calDeviceGetCount = (ATI_GDC)GetProcAddress(callib, "calDeviceGetCount" ); + __calDeviceGetAttribs =(ATI_ATTRIBS)GetProcAddress(callib, "calDeviceGetAttribs" ); + __calShutdown = (ATI_CLOSE)GetProcAddress(callib, "calShutdown" ); + __calDeviceGetInfo = (ATI_INFO)GetProcAddress(callib, "calDeviceGetInfo" ); + __calDeviceGetStatus = (ATI_STATUS)GetProcAddress(callib, "calDeviceGetStatus" ); + __calDeviceOpen = (ATI_DEVICEOPEN)GetProcAddress(callib, "calDeviceOpen" ); + __calDeviceClose = (ATI_DEVICECLOSE)GetProcAddress(callib, "calDeviceClose" ); + +#else + + void* callib; + + callib = dlopen("libaticalrt.so", RTLD_NOW); + if (!callib) { + warnings.push_back("No ATI library found"); + return; + } + + atirt_detected = true; + + __calInit = (int(*)()) dlsym(callib, "calInit"); + __calGetVersion = (int(*)(CALuint*, CALuint*, CALuint*)) dlsym(callib, "calGetVersion"); + __calDeviceGetCount = (int(*)(CALuint*)) dlsym(callib, "calDeviceGetCount"); + __calDeviceGetAttribs = (int(*)(CALdeviceattribs*, CALuint)) dlsym(callib, "calDeviceGetAttribs"); + __calShutdown = (int(*)()) dlsym(callib, "calShutdown"); + __calDeviceGetInfo = (int(*)(CALdeviceinfo*, CALuint)) dlsym(callib, "calDeviceGetInfo"); + __calDeviceGetStatus = (int(*)(CALdevicestatus*, CALdevice)) dlsym(callib, "calDeviceGetStatus"); + __calDeviceOpen = (int(*)(CALdevice*, CALuint)) dlsym(callib, "calDeviceOpen"); + __calDeviceClose = (int(*)(CALdevice)) dlsym(callib, "calDeviceClose"); + +#endif + + if (!__calInit) { + warnings.push_back("calInit() missing from CAL library"); + return; + } + if (!__calGetVersion) { + warnings.push_back("calGetVersion() missing from CAL library"); + return; + } + if (!__calDeviceGetCount) { + warnings.push_back("calDeviceGetCount() missing from CAL library"); + return; + } + if (!__calDeviceGetAttribs) { + warnings.push_back("calDeviceGetAttribs() missing from CAL library"); + return; + } + if (!__calDeviceGetInfo) { + warnings.push_back("calDeviceGetInfo() missing from CAL library"); + return; + } + if (!__calDeviceGetStatus) { + warnings.push_back("calDeviceGetStatus() missing from CAL library"); + return; + } + if (!__calDeviceOpen) { + warnings.push_back("calDeviceOpen() missing from CAL library"); + return; + } + if (!__calDeviceClose) { + warnings.push_back("calDeviceClose() missing from CAL library"); + return; + } + + retval = (*__calInit)(); + if (retval != CAL_RESULT_OK) { + sprintf(buf, "calInit() returned %d", retval); + warnings.push_back(buf); + return; + } + retval = (*__calDeviceGetCount)(&numDevices); + if (retval != CAL_RESULT_OK) { + sprintf(buf, "calDeviceGetCount() returned %d", retval); + warnings.push_back(buf); + return; + } + retval = (*__calGetVersion)(&cal_major, &cal_minor, &cal_imp); + if (retval != CAL_RESULT_OK) { + sprintf(buf, "calGetVersion() returned %d", retval); + warnings.push_back(buf); + return; + } + + if (!numDevices) { + warnings.push_back("No usable CAL devices found"); + return; + } + + COPROC_ATI cc, cc2; + string s, gpu_name; + vector gpus; + for (CALuint i=0; i best.peak_flops()) { + best = gpus[i]; + } + sprintf(buf2, "ATI GPU %d: %s", gpus[i].device_num, buf); + } + descs.push_back(buf2); + } + best.count = 0; + for (unsigned int i=0; icount) { msg_printf(NULL, MSG_INTERNAL_ERROR, "Missing a %s coprocessor", cp2->type ); @@ -162,10 +158,10 @@ struct PROC_RESOURCES { COPROC* cp2; if (av.ncudas) { x = av.ncudas; - cp2 = coprocs.lookup("CUDA"); + cp2 = &gstate.host_info.coprocs.cuda; } else if (av.natis) { x = av.natis; - cp2 = coprocs.lookup("ATI"); + cp2 = &gstate.host_info.coprocs.ati; } else { return; } @@ -492,18 +488,18 @@ void CLIENT_STATE::reset_debt_accounting() { for (i=0; icpu_pwf.reset_debt_accounting(); - if (coproc_cuda) { + if (host_info.have_cuda()) { p->cuda_pwf.reset_debt_accounting(); } - if (coproc_ati) { + if (host_info.have_ati()) { p->ati_pwf.reset_debt_accounting(); } } cpu_work_fetch.reset_debt_accounting(); - if (coproc_cuda) { + if (host_info.have_cuda()) { cuda_work_fetch.reset_debt_accounting(); } - if (coproc_ati) { + if (host_info.have_ati()) { ati_work_fetch.reset_debt_accounting(); } debt_interval_start = now; @@ -548,11 +544,11 @@ void CLIENT_STATE::adjust_debts() { cpu_work_fetch.update_long_term_debts(); cpu_work_fetch.update_short_term_debts(); - if (coproc_cuda) { + if (host_info.have_cuda()) { cuda_work_fetch.update_long_term_debts(); cuda_work_fetch.update_short_term_debts(); } - if (coproc_ati) { + if (host_info.have_ati()) { ati_work_fetch.update_long_term_debts(); ati_work_fetch.update_short_term_debts(); } @@ -1194,16 +1190,16 @@ static inline void assign_coprocs(vector& jobs) { gstate.host_info.coprocs.clear_usage(); #ifndef SIM - if (coproc_cuda) { - coproc_cuda->get_available_ram(); + if (gstate.host_info.have_cuda()) { + gstate.host_info.coprocs.cuda.get_available_ram(); if (log_flags.coproc_debug) { - coproc_cuda->print_available_ram(); + gstate.host_info.coprocs.cuda.print_available_ram(); } } - if (coproc_ati) { - coproc_ati->get_available_ram(); + if (gstate.host_info.have_ati()) { + gstate.host_info.coprocs.ati.get_available_ram(); if (log_flags.coproc_debug) { - coproc_ati->print_available_ram(); + gstate.host_info.coprocs.ati.print_available_ram(); } } #endif @@ -1215,10 +1211,10 @@ static inline void assign_coprocs(vector& jobs) { APP_VERSION* avp = rp->avp; if (avp->ncudas) { usage = avp->ncudas; - cp = coproc_cuda; + cp = &gstate.host_info.coprocs.cuda; } else if (avp->natis) { usage = avp->natis; - cp = coproc_ati; + cp = &gstate.host_info.coprocs.ati; } else { continue; } @@ -1235,10 +1231,10 @@ static inline void assign_coprocs(vector& jobs) { APP_VERSION* avp = rp->avp; if (avp->ncudas) { usage = avp->ncudas; - cp = coproc_cuda; + cp = &gstate.host_info.coprocs.cuda; } else if (avp->natis) { usage = avp->natis; - cp = coproc_ati; + cp = &gstate.host_info.coprocs.ati; } else { job_iter++; continue; diff --git a/client/cs_scheduler.cpp b/client/cs_scheduler.cpp index eb446c42d5..7ebb763cc8 100644 --- a/client/cs_scheduler.cpp +++ b/client/cs_scheduler.cpp @@ -220,18 +220,18 @@ int CLIENT_STATE::make_scheduler_request(PROJECT* p) { // copy request values from RSC_WORK_FETCH to COPROC // - if (coproc_cuda) { - coproc_cuda->req_secs = cuda_work_fetch.req_secs; - coproc_cuda->req_instances = cuda_work_fetch.req_instances; - coproc_cuda->estimated_delay = cuda_work_fetch.req_secs?cuda_work_fetch.busy_time_estimator.get_busy_time():0; + if (host_info.have_cuda()) { + host_info.coprocs.cuda.req_secs = cuda_work_fetch.req_secs; + host_info.coprocs.cuda.req_instances = cuda_work_fetch.req_instances; + host_info.coprocs.cuda.estimated_delay = cuda_work_fetch.req_secs?cuda_work_fetch.busy_time_estimator.get_busy_time():0; } - if (coproc_ati) { - coproc_ati->req_secs = ati_work_fetch.req_secs; - coproc_ati->req_instances = ati_work_fetch.req_instances; - coproc_ati->estimated_delay = ati_work_fetch.req_secs?ati_work_fetch.busy_time_estimator.get_busy_time():0; + if (host_info.have_ati()) { + host_info.coprocs.ati.req_secs = ati_work_fetch.req_secs; + host_info.coprocs.ati.req_instances = ati_work_fetch.req_instances; + host_info.coprocs.ati.estimated_delay = ati_work_fetch.req_secs?ati_work_fetch.busy_time_estimator.get_busy_time():0; } - if (host_info.coprocs.coprocs.size()) { + if (!host_info.coprocs.none()) { host_info.coprocs.write_xml(mf); } @@ -823,8 +823,8 @@ int CLIENT_STATE::handle_scheduler_reply(PROJECT* project, char* scheduler_url) ); if (!rp->avp) { msg_printf(project, MSG_INTERNAL_ERROR, - "No application found for task: %s %d %s; discarding", - rp->platform, rp->version_num, rp->plan_class + "No app version found for app %s platform %s ver %d class%s; discarding %s", + rp->wup->app->name, rp->platform, rp->version_num, rp->plan_class, rp->name ); delete rp; continue; @@ -861,13 +861,13 @@ int CLIENT_STATE::handle_scheduler_reply(PROJECT* project, char* scheduler_url) "[sched_op] estimated total CPU task duration: %.0f seconds", est_cpu_duration ); - if (coproc_cuda) { + if (host_info.have_cuda()) { msg_printf(project, MSG_INFO, "[sched_op] estimated total NVIDIA GPU task duration: %.0f seconds", est_cuda_duration ); } - if (coproc_ati) { + if (host_info.have_ati()) { msg_printf(project, MSG_INFO, "[sched_op] estimated total ATI GPU task duration: %.0f seconds", est_ati_duration diff --git a/client/cs_statefile.cpp b/client/cs_statefile.cpp index 154e4df54d..3974f0f810 100644 --- a/client/cs_statefile.cpp +++ b/client/cs_statefile.cpp @@ -913,8 +913,8 @@ int CLIENT_STATE::write_state_gui(MIOFILE& f) { core_client_version.minor, core_client_version.release, executing_as_daemon?1:0, - coproc_cuda?1:0, - coproc_ati?1:0 + host_info.have_cuda()?1:0, + host_info.have_ati()?1:0 ); for (i=0; i. - -// Simulate the processing of the current workload -// (include jobs that are downloading) -// with weighted round-robin (WRR) scheduling. -// -// For efficiency, we simulate an approximation of WRR. -// We don't model time-slicing. -// Instead we use a continuous model where, at a given point, -// each project has a set of running jobs that uses at most all CPUs. -// These jobs are assumed to run at a rate proportionate to their avg_ncpus, -// and each project gets total CPU proportionate to its RRS. -// -// For coprocessors, we saturate the resource; -// i.e. with 2 GPUs, we'd let a 1-GPU app and a 2-GPU app run together. -// Otherwise, there'd be the possibility of computing -// a nonzero shortfall inappropriately. -// -// Outputs are changes to global state: -// - deadline misses (per-project count, per-result flag) -// Deadline misses are not counted for tasks -// that are too large to run in RAM right now. -// - resource shortfalls (per-project and total) -// - counts of resources idle now -// - -#include "cpp.h" - -#ifdef _WIN32 -#include "boinc_win.h" -#else -#include "config.h" -#endif - -#include "client_state.h" -#include "coproc.h" -#include "client_msgs.h" - -inline void rsc_string(RESULT* rp, char* buf) { - APP_VERSION* avp = rp->avp; - if (avp->ncudas) { - sprintf(buf, "%.2f CPU + %.2f NV", avp->avg_ncpus, avp->ncudas); - } else if (avp->natis) { - sprintf(buf, "%.2f CPU + %.2f ATI", avp->avg_ncpus, avp->natis); - } else { - sprintf(buf, "%.2f CPU", avp->avg_ncpus); - } -} - -// this is here (rather than rr_sim.h) because its inline functions -// refer to RESULT -// -struct RR_SIM_STATUS { - std::vector active; - double active_ncpus; - double active_cudas; - double active_atis; - - inline void activate(RESULT* rp, double when) { - PROJECT* p = rp->project; - if (log_flags.rr_simulation) { - char buf[256]; - rsc_string(rp, buf); - msg_printf(p, MSG_INFO, - "[rr_sim] %.2f: starting %s (%s)", - when, rp->name, buf - ); - } - active.push_back(rp); - cpu_work_fetch.sim_nused += rp->avp->avg_ncpus; - cuda_work_fetch.sim_nused += rp->avp->ncudas; - ati_work_fetch.sim_nused += rp->avp->natis; - } - // remove *rpbest from active set, - // and adjust FLOPS left for other results - // - inline void remove_active(RESULT* rpbest) { - vector::iterator it = active.begin(); - while (it != active.end()) { - RESULT* rp = *it; - if (rp == rpbest) { - it = active.erase(it); - } else { - rp->rrsim_flops_left -= rp->rrsim_flops*rpbest->rrsim_finish_delay; - - // can be slightly less than 0 due to roundoff - // - if (rp->rrsim_flops_left < -1) { - msg_printf(rp->project, MSG_INTERNAL_ERROR, - "%s: negative FLOPs left %f", rp->name, rp->rrsim_flops_left - ); - } - if (rp->rrsim_flops_left < 0) { - rp->rrsim_flops_left = 0; - } - it++; - } - } - cpu_work_fetch.sim_nused -= rpbest->avp->avg_ncpus; - cuda_work_fetch.sim_nused -= rpbest->avp->ncudas; - ati_work_fetch.sim_nused -= rpbest->avp->natis; - } - - RR_SIM_STATUS() { - active_ncpus = 0; - active_cudas = 0; - active_atis = 0; - } - ~RR_SIM_STATUS() {} -}; - -void RR_SIM_PROJECT_STATUS::activate(RESULT* rp) { - active.push_back(rp); - rp->project->cpu_pwf.sim_nused += rp->avp->avg_ncpus; - rp->project->cuda_pwf.sim_nused += rp->avp->ncudas; - rp->project->ati_pwf.sim_nused += rp->avp->natis; -} - -void RR_SIM_PROJECT_STATUS::remove_active(RESULT* rp) { - std::vector::iterator it = active.begin(); - while (it != active.end()) { - if (*it == rp) { - it = active.erase(it); - } else { - it++; - } - } - rp->project->cpu_pwf.sim_nused -= rp->avp->avg_ncpus; - rp->project->cuda_pwf.sim_nused -= rp->avp->ncudas; - rp->project->ati_pwf.sim_nused -= rp->avp->natis; -} - -// estimate the rate (FLOPS) that this job will get long-term -// with weighted round-robin scheduling -// -void set_rrsim_flops(RESULT* rp) { - // For coproc jobs, use app version estimate - // - if (rp->uses_coprocs()) { - rp->rrsim_flops = rp->avp->flops * gstate.overall_cpu_frac(); - return; - } - PROJECT* p = rp->project; - - // For CPU jobs, estimate how many CPU seconds per second this job would get - // running with other jobs of this project, ignoring other factors - // - double x = 1; - if (p->cpu_pwf.sim_nused > gstate.ncpus) { - x = gstate.ncpus/p->cpu_pwf.sim_nused; - } - double r1 = x*rp->avp->avg_ncpus; - - // if the project's total CPU usage is more than its share, scale - // - double share_cpus = p->cpu_pwf.runnable_share*gstate.ncpus; - if (!share_cpus) share_cpus = gstate.ncpus; - // deal with projects w/ resource share = 0 - double r2 = r1; - if (p->cpu_pwf.sim_nused > share_cpus) { - r2 *= (share_cpus / p->cpu_pwf.sim_nused); - } - - // scale by overall CPU availability - // - double r3 = r2 * gstate.overall_cpu_frac(); - - rp->rrsim_flops = r3 * rp->avp->flops; -#if 0 - if (log_flags.rr_simulation) { - msg_printf(p, MSG_INFO, - "[rr_sim] set_rrsim_flops: %.2fG (r1 %.4f r2 %.4f r3 %.4f)", - rp->rrsim_flops/1e9, r1, r2, r3 - ); - } -#endif -} - -void CLIENT_STATE::print_deadline_misses() { - unsigned int i; - RESULT* rp; - PROJECT* p; - for (i=0; irr_sim_misses_deadline) { - msg_printf(rp->project, MSG_INFO, - "[cpu_sched] Result %s projected to miss deadline.", - rp->name - ); - } - } - for (i=0; icpu_pwf.deadlines_missed) { - msg_printf(p, MSG_INFO, - "[cpu_sched] Project has %d projected CPU deadline misses", - p->cpu_pwf.deadlines_missed - ); - } - if (p->cuda_pwf.deadlines_missed) { - msg_printf(p, MSG_INFO, - "[cpu_sched] Project has %d projected NVIDIA GPU deadline misses", - p->cuda_pwf.deadlines_missed - ); - } - if (p->ati_pwf.deadlines_missed) { - msg_printf(p, MSG_INFO, - "[cpu_sched] Project has %d projected ATI GPU deadline misses", - p->ati_pwf.deadlines_missed - ); - } - } -} - -#if 0 -// compute a per-app-version "temporary DCF" based on the elapsed time -// and fraction done of running jobs -// -void compute_temp_dcf() { - unsigned int i; - for (i=0; itemp_dcf = 1; - } - for (i=0; iest_dur(false) / atp->result->estimated_duration(false); - APP_VERSION* avp = atp->result->avp; - if (x < avp->temp_dcf) { - avp->temp_dcf = x; - } - } -} -#endif - -void CLIENT_STATE::rr_simulation() { - PROJECT* p, *pbest; - RESULT* rp, *rpbest; - RR_SIM_STATUS sim_status; - unsigned int i; - - double ar = available_ram(); - - work_fetch.rr_init(); - //compute_temp_dcf(); - - if (log_flags.rr_simulation) { - msg_printf(0, MSG_INFO, - "[rr_sim] rr_sim start: work_buf_total %.2f on_frac %.3f active_frac %.3f", - work_buf_total(), time_stats.on_frac, time_stats.active_frac - ); - } - - for (i=0; inon_cpu_intensive) continue; - p->rr_sim_status.clear(); - } - - // Decide what jobs to include in the simulation, - // and pick the ones that are initially running. - // NOTE: "results" is sorted by increasing arrival time - // - for (i=0; irr_sim_misses_deadline = false; - if (!rp->nearly_runnable()) continue; - if (rp->some_download_stalled()) continue; - if (rp->project->non_cpu_intensive) continue; - rp->rrsim_flops_left = rp->estimated_flops_remaining(); - - //if (rp->rrsim_flops_left <= 0) continue; - // job may have fraction_done=1 but not be done; - // if it's past its deadline, we need to mark it as such - - p = rp->project; - p->pwf.has_runnable_jobs = true; - p->cpu_pwf.nused_total += rp->avp->avg_ncpus; - if (rp->uses_cuda() && coproc_cuda) { - p->cuda_pwf.nused_total += rp->avp->ncudas; - p->cuda_pwf.has_runnable_jobs = true; - if (cuda_work_fetch.sim_nused < coproc_cuda->count) { - sim_status.activate(rp, 0); - p->rr_sim_status.activate(rp); - } else { - cuda_work_fetch.pending.push_back(rp); - } - } else if (rp->uses_ati() && coproc_ati) { - p->ati_pwf.nused_total += rp->avp->natis; - p->ati_pwf.has_runnable_jobs = true; - if (ati_work_fetch.sim_nused < coproc_ati->count) { - sim_status.activate(rp, 0); - p->rr_sim_status.activate(rp); - } else { - ati_work_fetch.pending.push_back(rp); - } - } else { - p->cpu_pwf.has_runnable_jobs = true; - if (p->cpu_pwf.sim_nused < ncpus) { - sim_status.activate(rp, 0); - p->rr_sim_status.activate(rp); - } else { - p->rr_sim_status.add_pending(rp); - } - } - } - - // note the number of idle instances - // - cpu_work_fetch.nidle_now = ncpus - cpu_work_fetch.sim_nused; - if (cpu_work_fetch.nidle_now < 0) cpu_work_fetch.nidle_now = 0; - if (coproc_cuda) { - cuda_work_fetch.nidle_now = coproc_cuda->count - cuda_work_fetch.sim_nused; - if (cuda_work_fetch.nidle_now < 0) cuda_work_fetch.nidle_now = 0; - } - if (coproc_ati) { - ati_work_fetch.nidle_now = coproc_ati->count - ati_work_fetch.sim_nused; - if (ati_work_fetch.nidle_now < 0) ati_work_fetch.nidle_now = 0; - } - - work_fetch.compute_shares(); - - // Simulation loop. Keep going until all work done - // - double buf_end = now + work_buf_total(); - double sim_now = now; - while (sim_status.active.size()) { - - // compute finish times and see which result finishes first - // - rpbest = NULL; - for (i=0; irrsim_finish_delay = rp->avp->temp_dcf*rp->rrsim_flops_left/rp->rrsim_flops; - rp->rrsim_finish_delay = rp->rrsim_flops_left/rp->rrsim_flops; - if (!rpbest || rp->rrsim_finish_delay < rpbest->rrsim_finish_delay) { - rpbest = rp; - } - } - - pbest = rpbest->project; - - if (log_flags.rr_simulation) { - msg_printf(pbest, MSG_INFO, - "[rr_sim] %.2f: %s finishes after %.2f (%.2fG/%.2fG)", - sim_now - now, - rpbest->name, rpbest->rrsim_finish_delay, - rpbest->rrsim_flops_left/1e9, rpbest->rrsim_flops/1e9 - ); - } - - // "rpbest" is first result to finish. Does it miss its deadline? - // - double diff = (sim_now + rpbest->rrsim_finish_delay) - rpbest->computation_deadline(); - if (diff > 0) { - ACTIVE_TASK* atp = lookup_active_task_by_result(rpbest); - if (atp && atp->procinfo.working_set_size_smoothed > ar) { - if (log_flags.rr_simulation) { - msg_printf(pbest, MSG_INFO, - "[rr_sim] %s misses deadline but too large to run", - rpbest->name - ); - } - } else { - rpbest->rr_sim_misses_deadline = true; - if (rpbest->uses_cuda()) { - pbest->cuda_pwf.deadlines_missed++; - cuda_work_fetch.deadline_missed_instances += rpbest->avp->ncudas; - } else if (rpbest->uses_ati()) { - pbest->ati_pwf.deadlines_missed++; - ati_work_fetch.deadline_missed_instances += rpbest->avp->natis; - } else { - pbest->cpu_pwf.deadlines_missed++; - cpu_work_fetch.deadline_missed_instances += rpbest->avp->avg_ncpus; - } - if (log_flags.rr_simulation) { - msg_printf(pbest, MSG_INFO, - "[rr_sim] %s misses deadline by %.2f", - rpbest->name, diff - ); - } - } - } - - // update saturated time - // - double end_time = sim_now + rpbest->rrsim_finish_delay; - double x = end_time - gstate.now; - cpu_work_fetch.update_saturated_time(x); - if (coproc_cuda) { - cuda_work_fetch.update_saturated_time(x); - } - if (coproc_ati) { - ati_work_fetch.update_saturated_time(x); - } - - // update busy time - // - if (rpbest->rr_sim_misses_deadline) { - double dur = rpbest->estimated_time_remaining(false) / gstate.overall_cpu_frac(); - cpu_work_fetch.update_busy_time(dur, rpbest->avp->avg_ncpus); - if (rpbest->uses_cuda()) { - cuda_work_fetch.update_busy_time(dur, rpbest->avp->ncudas); - } - if (rpbest->uses_ati()) { - ati_work_fetch.update_busy_time(dur, rpbest->avp->natis); - } - } - - // increment resource shortfalls - // - if (sim_now < buf_end) { - if (end_time > buf_end) end_time = buf_end; - double d_time = end_time - sim_now; - - cpu_work_fetch.accumulate_shortfall(d_time); - - if (coproc_cuda) { - cuda_work_fetch.accumulate_shortfall(d_time); - } - if (coproc_ati) { - ati_work_fetch.accumulate_shortfall(d_time); - } - } - - sim_status.remove_active(rpbest); - pbest->rr_sim_status.remove_active(rpbest); - - sim_now += rpbest->rrsim_finish_delay; - - // start new jobs; may need to start more than one - // if this job used multiple resource instances - // - if (rpbest->uses_cuda()) { - while (1) { - if (cuda_work_fetch.sim_nused >= coproc_cuda->count) break; - if (!cuda_work_fetch.pending.size()) break; - RESULT* rp = cuda_work_fetch.pending[0]; - cuda_work_fetch.pending.erase(cuda_work_fetch.pending.begin()); - sim_status.activate(rp, sim_now-now); - pbest->rr_sim_status.activate(rp); - } - } else if (rpbest->uses_ati()) { - while (1) { - if (ati_work_fetch.sim_nused >= coproc_ati->count) break; - if (!ati_work_fetch.pending.size()) break; - RESULT* rp = ati_work_fetch.pending[0]; - ati_work_fetch.pending.erase(ati_work_fetch.pending.begin()); - sim_status.activate(rp, sim_now-now); - pbest->rr_sim_status.activate(rp); - } - } else { - while (1) { - if (pbest->cpu_pwf.sim_nused >= ncpus) break; - RESULT* rp = pbest->rr_sim_status.get_pending(); - if (!rp) break; - sim_status.activate(rp, sim_now-now); - pbest->rr_sim_status.activate(rp); - } - } - } - - // if simulation ends before end of buffer, take the tail into account - // - if (sim_now < buf_end) { - double d_time = buf_end - sim_now; - cpu_work_fetch.accumulate_shortfall(d_time); - if (coproc_cuda) { - cuda_work_fetch.accumulate_shortfall(d_time); - } - if (coproc_ati) { - ati_work_fetch.accumulate_shortfall(d_time); - } - } -} +// This file is part of BOINC. +// http://boinc.berkeley.edu +// Copyright (C) 2008 University of California +// +// BOINC is free software; you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License +// as published by the Free Software Foundation, +// either version 3 of the License, or (at your option) any later version. +// +// BOINC is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with BOINC. If not, see . + +// Simulate the processing of the current workload +// (include jobs that are downloading) +// with weighted round-robin (WRR) scheduling. +// +// For efficiency, we simulate an approximation of WRR. +// We don't model time-slicing. +// Instead we use a continuous model where, at a given point, +// each project has a set of running jobs that uses at most all CPUs. +// These jobs are assumed to run at a rate proportionate to their avg_ncpus, +// and each project gets total CPU proportionate to its RRS. +// +// For coprocessors, we saturate the resource; +// i.e. with 2 GPUs, we'd let a 1-GPU app and a 2-GPU app run together. +// Otherwise, there'd be the possibility of computing +// a nonzero shortfall inappropriately. +// +// Outputs are changes to global state: +// - deadline misses (per-project count, per-result flag) +// Deadline misses are not counted for tasks +// that are too large to run in RAM right now. +// - resource shortfalls (per-project and total) +// - counts of resources idle now +// + +#include "cpp.h" + +#ifdef _WIN32 +#include "boinc_win.h" +#else +#include "config.h" +#endif + +#include "client_state.h" +#include "coproc.h" +#include "client_msgs.h" + +inline void rsc_string(RESULT* rp, char* buf) { + APP_VERSION* avp = rp->avp; + if (avp->ncudas) { + sprintf(buf, "%.2f CPU + %.2f NV", avp->avg_ncpus, avp->ncudas); + } else if (avp->natis) { + sprintf(buf, "%.2f CPU + %.2f ATI", avp->avg_ncpus, avp->natis); + } else { + sprintf(buf, "%.2f CPU", avp->avg_ncpus); + } +} + +// this is here (rather than rr_sim.h) because its inline functions +// refer to RESULT +// +struct RR_SIM_STATUS { + std::vector active; + double active_ncpus; + double active_cudas; + double active_atis; + + inline void activate(RESULT* rp, double when) { + PROJECT* p = rp->project; + if (log_flags.rr_simulation) { + char buf[256]; + rsc_string(rp, buf); + msg_printf(p, MSG_INFO, + "[rr_sim] %.2f: starting %s (%s)", + when, rp->name, buf + ); + } + active.push_back(rp); + cpu_work_fetch.sim_nused += rp->avp->avg_ncpus; + cuda_work_fetch.sim_nused += rp->avp->ncudas; + ati_work_fetch.sim_nused += rp->avp->natis; + } + // remove *rpbest from active set, + // and adjust FLOPS left for other results + // + inline void remove_active(RESULT* rpbest) { + vector::iterator it = active.begin(); + while (it != active.end()) { + RESULT* rp = *it; + if (rp == rpbest) { + it = active.erase(it); + } else { + rp->rrsim_flops_left -= rp->rrsim_flops*rpbest->rrsim_finish_delay; + + // can be slightly less than 0 due to roundoff + // + if (rp->rrsim_flops_left < -1) { + msg_printf(rp->project, MSG_INTERNAL_ERROR, + "%s: negative FLOPs left %f", rp->name, rp->rrsim_flops_left + ); + } + if (rp->rrsim_flops_left < 0) { + rp->rrsim_flops_left = 0; + } + it++; + } + } + cpu_work_fetch.sim_nused -= rpbest->avp->avg_ncpus; + cuda_work_fetch.sim_nused -= rpbest->avp->ncudas; + ati_work_fetch.sim_nused -= rpbest->avp->natis; + } + + RR_SIM_STATUS() { + active_ncpus = 0; + active_cudas = 0; + active_atis = 0; + } + ~RR_SIM_STATUS() {} +}; + +void RR_SIM_PROJECT_STATUS::activate(RESULT* rp) { + active.push_back(rp); + rp->project->cpu_pwf.sim_nused += rp->avp->avg_ncpus; + rp->project->cuda_pwf.sim_nused += rp->avp->ncudas; + rp->project->ati_pwf.sim_nused += rp->avp->natis; +} + +void RR_SIM_PROJECT_STATUS::remove_active(RESULT* rp) { + std::vector::iterator it = active.begin(); + while (it != active.end()) { + if (*it == rp) { + it = active.erase(it); + } else { + it++; + } + } + rp->project->cpu_pwf.sim_nused -= rp->avp->avg_ncpus; + rp->project->cuda_pwf.sim_nused -= rp->avp->ncudas; + rp->project->ati_pwf.sim_nused -= rp->avp->natis; +} + +// estimate the rate (FLOPS) that this job will get long-term +// with weighted round-robin scheduling +// +void set_rrsim_flops(RESULT* rp) { + // For coproc jobs, use app version estimate + // + if (rp->uses_coprocs()) { + rp->rrsim_flops = rp->avp->flops * gstate.overall_cpu_frac(); + return; + } + PROJECT* p = rp->project; + + // For CPU jobs, estimate how many CPU seconds per second this job would get + // running with other jobs of this project, ignoring other factors + // + double x = 1; + if (p->cpu_pwf.sim_nused > gstate.ncpus) { + x = gstate.ncpus/p->cpu_pwf.sim_nused; + } + double r1 = x*rp->avp->avg_ncpus; + + // if the project's total CPU usage is more than its share, scale + // + double share_cpus = p->cpu_pwf.runnable_share*gstate.ncpus; + if (!share_cpus) share_cpus = gstate.ncpus; + // deal with projects w/ resource share = 0 + double r2 = r1; + if (p->cpu_pwf.sim_nused > share_cpus) { + r2 *= (share_cpus / p->cpu_pwf.sim_nused); + } + + // scale by overall CPU availability + // + double r3 = r2 * gstate.overall_cpu_frac(); + + rp->rrsim_flops = r3 * rp->avp->flops; +#if 0 + if (log_flags.rr_simulation) { + msg_printf(p, MSG_INFO, + "[rr_sim] set_rrsim_flops: %.2fG (r1 %.4f r2 %.4f r3 %.4f)", + rp->rrsim_flops/1e9, r1, r2, r3 + ); + } +#endif +} + +void CLIENT_STATE::print_deadline_misses() { + unsigned int i; + RESULT* rp; + PROJECT* p; + for (i=0; irr_sim_misses_deadline) { + msg_printf(rp->project, MSG_INFO, + "[cpu_sched] Result %s projected to miss deadline.", + rp->name + ); + } + } + for (i=0; icpu_pwf.deadlines_missed) { + msg_printf(p, MSG_INFO, + "[cpu_sched] Project has %d projected CPU deadline misses", + p->cpu_pwf.deadlines_missed + ); + } + if (p->cuda_pwf.deadlines_missed) { + msg_printf(p, MSG_INFO, + "[cpu_sched] Project has %d projected NVIDIA GPU deadline misses", + p->cuda_pwf.deadlines_missed + ); + } + if (p->ati_pwf.deadlines_missed) { + msg_printf(p, MSG_INFO, + "[cpu_sched] Project has %d projected ATI GPU deadline misses", + p->ati_pwf.deadlines_missed + ); + } + } +} + +#if 0 +// compute a per-app-version "temporary DCF" based on the elapsed time +// and fraction done of running jobs +// +void compute_temp_dcf() { + unsigned int i; + for (i=0; itemp_dcf = 1; + } + for (i=0; iest_dur(false) / atp->result->estimated_duration(false); + APP_VERSION* avp = atp->result->avp; + if (x < avp->temp_dcf) { + avp->temp_dcf = x; + } + } +} +#endif + +void CLIENT_STATE::rr_simulation() { + PROJECT* p, *pbest; + RESULT* rp, *rpbest; + RR_SIM_STATUS sim_status; + unsigned int i; + + double ar = available_ram(); + + work_fetch.rr_init(); + //compute_temp_dcf(); + + if (log_flags.rr_simulation) { + msg_printf(0, MSG_INFO, + "[rr_sim] rr_sim start: work_buf_total %.2f on_frac %.3f active_frac %.3f", + work_buf_total(), time_stats.on_frac, time_stats.active_frac + ); + } + + for (i=0; inon_cpu_intensive) continue; + p->rr_sim_status.clear(); + } + + // Decide what jobs to include in the simulation, + // and pick the ones that are initially running. + // NOTE: "results" is sorted by increasing arrival time + // + for (i=0; irr_sim_misses_deadline = false; + if (!rp->nearly_runnable()) continue; + if (rp->some_download_stalled()) continue; + if (rp->project->non_cpu_intensive) continue; + rp->rrsim_flops_left = rp->estimated_flops_remaining(); + + //if (rp->rrsim_flops_left <= 0) continue; + // job may have fraction_done=1 but not be done; + // if it's past its deadline, we need to mark it as such + + p = rp->project; + p->pwf.has_runnable_jobs = true; + p->cpu_pwf.nused_total += rp->avp->avg_ncpus; + if (rp->uses_cuda() && host_info.have_cuda()) { + p->cuda_pwf.nused_total += rp->avp->ncudas; + p->cuda_pwf.has_runnable_jobs = true; + if (cuda_work_fetch.sim_nused < host_info.coprocs.cuda.count) { + sim_status.activate(rp, 0); + p->rr_sim_status.activate(rp); + } else { + cuda_work_fetch.pending.push_back(rp); + } + } else if (rp->uses_ati() && host_info.have_ati()) { + p->ati_pwf.nused_total += rp->avp->natis; + p->ati_pwf.has_runnable_jobs = true; + if (ati_work_fetch.sim_nused < host_info.coprocs.ati.count) { + sim_status.activate(rp, 0); + p->rr_sim_status.activate(rp); + } else { + ati_work_fetch.pending.push_back(rp); + } + } else { + p->cpu_pwf.has_runnable_jobs = true; + if (p->cpu_pwf.sim_nused < ncpus) { + sim_status.activate(rp, 0); + p->rr_sim_status.activate(rp); + } else { + p->rr_sim_status.add_pending(rp); + } + } + } + + // note the number of idle instances + // + cpu_work_fetch.nidle_now = ncpus - cpu_work_fetch.sim_nused; + if (cpu_work_fetch.nidle_now < 0) cpu_work_fetch.nidle_now = 0; + if (host_info.have_cuda()) { + cuda_work_fetch.nidle_now = host_info.coprocs.cuda.count - cuda_work_fetch.sim_nused; + if (cuda_work_fetch.nidle_now < 0) cuda_work_fetch.nidle_now = 0; + } + if (host_info.have_ati()) { + ati_work_fetch.nidle_now = host_info.coprocs.ati.count - ati_work_fetch.sim_nused; + if (ati_work_fetch.nidle_now < 0) ati_work_fetch.nidle_now = 0; + } + + work_fetch.compute_shares(); + + // Simulation loop. Keep going until all work done + // + double buf_end = now + work_buf_total(); + double sim_now = now; + while (sim_status.active.size()) { + + // compute finish times and see which result finishes first + // + rpbest = NULL; + for (i=0; irrsim_finish_delay = rp->avp->temp_dcf*rp->rrsim_flops_left/rp->rrsim_flops; + rp->rrsim_finish_delay = rp->rrsim_flops_left/rp->rrsim_flops; + if (!rpbest || rp->rrsim_finish_delay < rpbest->rrsim_finish_delay) { + rpbest = rp; + } + } + + pbest = rpbest->project; + + if (log_flags.rr_simulation) { + msg_printf(pbest, MSG_INFO, + "[rr_sim] %.2f: %s finishes after %.2f (%.2fG/%.2fG)", + sim_now - now, + rpbest->name, rpbest->rrsim_finish_delay, + rpbest->rrsim_flops_left/1e9, rpbest->rrsim_flops/1e9 + ); + } + + // "rpbest" is first result to finish. Does it miss its deadline? + // + double diff = (sim_now + rpbest->rrsim_finish_delay) - rpbest->computation_deadline(); + if (diff > 0) { + ACTIVE_TASK* atp = lookup_active_task_by_result(rpbest); + if (atp && atp->procinfo.working_set_size_smoothed > ar) { + if (log_flags.rr_simulation) { + msg_printf(pbest, MSG_INFO, + "[rr_sim] %s misses deadline but too large to run", + rpbest->name + ); + } + } else { + rpbest->rr_sim_misses_deadline = true; + if (rpbest->uses_cuda()) { + pbest->cuda_pwf.deadlines_missed++; + cuda_work_fetch.deadline_missed_instances += rpbest->avp->ncudas; + } else if (rpbest->uses_ati()) { + pbest->ati_pwf.deadlines_missed++; + ati_work_fetch.deadline_missed_instances += rpbest->avp->natis; + } else { + pbest->cpu_pwf.deadlines_missed++; + cpu_work_fetch.deadline_missed_instances += rpbest->avp->avg_ncpus; + } + if (log_flags.rr_simulation) { + msg_printf(pbest, MSG_INFO, + "[rr_sim] %s misses deadline by %.2f", + rpbest->name, diff + ); + } + } + } + + // update saturated time + // + double end_time = sim_now + rpbest->rrsim_finish_delay; + double x = end_time - gstate.now; + cpu_work_fetch.update_saturated_time(x); + if (host_info.have_cuda()) { + cuda_work_fetch.update_saturated_time(x); + } + if (host_info.have_ati()) { + ati_work_fetch.update_saturated_time(x); + } + + // update busy time + // + if (rpbest->rr_sim_misses_deadline) { + double dur = rpbest->estimated_time_remaining(false) / gstate.overall_cpu_frac(); + cpu_work_fetch.update_busy_time(dur, rpbest->avp->avg_ncpus); + if (rpbest->uses_cuda()) { + cuda_work_fetch.update_busy_time(dur, rpbest->avp->ncudas); + } + if (rpbest->uses_ati()) { + ati_work_fetch.update_busy_time(dur, rpbest->avp->natis); + } + } + + // increment resource shortfalls + // + if (sim_now < buf_end) { + if (end_time > buf_end) end_time = buf_end; + double d_time = end_time - sim_now; + + cpu_work_fetch.accumulate_shortfall(d_time); + + if (host_info.have_cuda()) { + cuda_work_fetch.accumulate_shortfall(d_time); + } + if (host_info.have_ati()) { + ati_work_fetch.accumulate_shortfall(d_time); + } + } + + sim_status.remove_active(rpbest); + pbest->rr_sim_status.remove_active(rpbest); + + sim_now += rpbest->rrsim_finish_delay; + + // start new jobs; may need to start more than one + // if this job used multiple resource instances + // + if (rpbest->uses_cuda()) { + while (1) { + if (cuda_work_fetch.sim_nused >= host_info.coprocs.cuda.count) break; + if (!cuda_work_fetch.pending.size()) break; + RESULT* rp = cuda_work_fetch.pending[0]; + cuda_work_fetch.pending.erase(cuda_work_fetch.pending.begin()); + sim_status.activate(rp, sim_now-now); + pbest->rr_sim_status.activate(rp); + } + } else if (rpbest->uses_ati()) { + while (1) { + if (ati_work_fetch.sim_nused >= host_info.coprocs.ati.count) break; + if (!ati_work_fetch.pending.size()) break; + RESULT* rp = ati_work_fetch.pending[0]; + ati_work_fetch.pending.erase(ati_work_fetch.pending.begin()); + sim_status.activate(rp, sim_now-now); + pbest->rr_sim_status.activate(rp); + } + } else { + while (1) { + if (pbest->cpu_pwf.sim_nused >= ncpus) break; + RESULT* rp = pbest->rr_sim_status.get_pending(); + if (!rp) break; + sim_status.activate(rp, sim_now-now); + pbest->rr_sim_status.activate(rp); + } + } + } + + // if simulation ends before end of buffer, take the tail into account + // + if (sim_now < buf_end) { + double d_time = buf_end - sim_now; + cpu_work_fetch.accumulate_shortfall(d_time); + if (host_info.have_cuda()) { + cuda_work_fetch.accumulate_shortfall(d_time); + } + if (host_info.have_ati()) { + ati_work_fetch.accumulate_shortfall(d_time); + } + } +} diff --git a/client/scheduler_op.cpp b/client/scheduler_op.cpp index f382e6871f..a358f2a067 100644 --- a/client/scheduler_op.cpp +++ b/client/scheduler_op.cpp @@ -219,7 +219,7 @@ int SCHEDULER_OP::start_rpc(PROJECT* p) { ); double gpu_req = cuda_work_fetch.req_secs + ati_work_fetch.req_secs; if (cpu_work_fetch.req_secs || gpu_req) { - if (coproc_cuda||coproc_ati) { + if (gstate.host_info.have_cuda()||gstate.host_info.have_ati()) { if (cpu_work_fetch.req_secs && gpu_req) { sprintf(buf, " for CPU and GPU"); } else if (cpu_work_fetch.req_secs) { @@ -254,13 +254,13 @@ int SCHEDULER_OP::start_rpc(PROJECT* p) { "[sched_op] CPU work request: %.2f seconds; %.2f CPUs", cpu_work_fetch.req_secs, cpu_work_fetch.req_instances ); - if (coproc_cuda) { + if (gstate.host_info.have_cuda()) { msg_printf(p, MSG_INFO, "[sched_op] NVIDIA GPU work request: %.2f seconds; %.2f GPUs", cuda_work_fetch.req_secs, cuda_work_fetch.req_instances ); } - if (coproc_ati) { + if (gstate.host_info.have_ati()) { msg_printf(p, MSG_INFO, "[sched_op] ATI GPU work request: %.2f seconds; %.2f GPUs", ati_work_fetch.req_secs, ati_work_fetch.req_instances diff --git a/client/sim.h b/client/sim.h index 83bfb052ab..760c8aab84 100644 --- a/client/sim.h +++ b/client/sim.h @@ -295,8 +295,6 @@ public: }; extern CLIENT_STATE gstate; -extern COPROC_CUDA* coproc_cuda; -extern COPROC_ATI* coproc_ati; extern NET_STATUS net_status; extern FILE* logfile; extern bool user_active; diff --git a/client/work_fetch.cpp b/client/work_fetch.cpp index c8dfac93dd..e6eff8b06d 100644 --- a/client/work_fetch.cpp +++ b/client/work_fetch.cpp @@ -662,10 +662,10 @@ void WORK_FETCH::rr_init() { p->pwf.can_fetch_work = p->pwf.compute_can_fetch_work(p); p->pwf.has_runnable_jobs = false; p->cpu_pwf.rr_init(p, RSC_TYPE_CPU); - if (coproc_cuda) { + if (gstate.host_info.have_cuda()) { p->cuda_pwf.rr_init(p, RSC_TYPE_CUDA); } - if (coproc_ati) { + if (gstate.host_info.have_ati()) { p->ati_pwf.rr_init(p, RSC_TYPE_ATI); } } @@ -673,10 +673,10 @@ void WORK_FETCH::rr_init() { void WORK_FETCH::set_all_requests(PROJECT* p) { cpu_work_fetch.set_request(p, false); - if (coproc_cuda && gpus_usable) { + if (gstate.host_info.have_cuda() && gpus_usable) { cuda_work_fetch.set_request(p, false); } - if (coproc_ati && gpus_usable) { + if (gstate.host_info.have_ati() && gpus_usable) { ati_work_fetch.set_request(p, false); } } @@ -712,13 +712,13 @@ void WORK_FETCH::set_overall_debts() { p = gstate.projects[i]; double queue_debt = p->cpu_pwf.queue_est/gstate.ncpus; p->pwf.overall_debt = p->cpu_pwf.long_term_debt - queue_debt; - if (coproc_cuda) { + if (gstate.host_info.have_cuda()) { p->pwf.overall_debt += cuda_work_fetch.relative_speed* - (p->cuda_pwf.long_term_debt - p->cuda_pwf.queue_est/coproc_cuda->count); + (p->cuda_pwf.long_term_debt - p->cuda_pwf.queue_est/gstate.host_info.coprocs.cuda.count); } - if (coproc_ati) { + if (gstate.host_info.have_ati()) { p->pwf.overall_debt += ati_work_fetch.relative_speed* - (p->ati_pwf.long_term_debt - p->ati_pwf.queue_est/coproc_ati->count); + (p->ati_pwf.long_term_debt - p->ati_pwf.queue_est/gstate.host_info.coprocs.ati.count); } } } @@ -738,10 +738,10 @@ void WORK_FETCH::print_state() { gstate.work_buf_min(), gstate.work_buf_additional() ); cpu_work_fetch.print_state("CPU"); - if (coproc_cuda) { + if (gstate.host_info.have_cuda()) { cuda_work_fetch.print_state("NVIDIA GPU"); } - if (coproc_ati) { + if (gstate.host_info.have_ati()) { ati_work_fetch.print_state("ATI GPU"); } for (unsigned int i=0; iavg_ncpus; p->cpu_pwf.secs_this_debt_interval += x; cpu_work_fetch.secs_this_debt_interval += x; - if (coproc_cuda) { + if (gstate.host_info.have_cuda()) { x = dt*avp->ncudas; p->cuda_pwf.secs_this_debt_interval += x; cuda_work_fetch.secs_this_debt_interval += x; } - if (coproc_ati) { + if (gstate.host_info.have_ati()) { x = dt*avp->natis; p->ati_pwf.secs_this_debt_interval += x; ati_work_fetch.secs_this_debt_interval += x; @@ -927,10 +927,10 @@ void WORK_FETCH::compute_shares() { if (p->cpu_pwf.may_have_work) { cpu_work_fetch.total_fetchable_share += p->resource_share; } - if (coproc_cuda && p->cuda_pwf.may_have_work) { + if (gstate.host_info.have_cuda() && p->cuda_pwf.may_have_work) { cuda_work_fetch.total_fetchable_share += p->resource_share; } - if (coproc_ati && p->ati_pwf.may_have_work) { + if (gstate.host_info.have_ati() && p->ati_pwf.may_have_work) { ati_work_fetch.total_fetchable_share += p->resource_share; } } @@ -950,10 +950,10 @@ void WORK_FETCH::compute_shares() { if (p->cpu_pwf.may_have_work) { p->cpu_pwf.fetchable_share = cpu_work_fetch.total_fetchable_share?p->resource_share/cpu_work_fetch.total_fetchable_share:1; } - if (coproc_cuda && p->cuda_pwf.may_have_work) { + if (gstate.host_info.have_cuda() && p->cuda_pwf.may_have_work) { p->cuda_pwf.fetchable_share = cuda_work_fetch.total_fetchable_share?p->resource_share/cuda_work_fetch.total_fetchable_share:1; } - if (coproc_ati && p->ati_pwf.may_have_work) { + if (gstate.host_info.have_ati() && p->ati_pwf.may_have_work) { p->ati_pwf.fetchable_share = ati_work_fetch.total_fetchable_share?p->resource_share/ati_work_fetch.total_fetchable_share:1; } } @@ -996,13 +996,13 @@ void WORK_FETCH::write_request(FILE* f, PROJECT* p) { work_req, cpu_work_fetch.req_secs, cpu_work_fetch.req_instances ); - if (coproc_cuda) { + if (gstate.host_info.have_cuda()) { sprintf(buf2, " NVIDIA GPU (%.2f sec, %.2f)", cuda_work_fetch.req_secs, cuda_work_fetch.req_instances ); strcat(buf, buf2); } - if (coproc_ati) { + if (gstate.host_info.have_ati()) { sprintf(buf2, " ATI GPU (%.2f sec, %.2f)", ati_work_fetch.req_secs, ati_work_fetch.req_instances ); @@ -1041,10 +1041,10 @@ void WORK_FETCH::handle_reply( if (cpu_work_fetch.req_secs && !srp->cpu_backoff) { p->cpu_pwf.backoff(p, "CPU"); } - if (coproc_cuda && coproc_cuda->req_secs && !srp->cuda_backoff) { + if (gstate.host_info.have_cuda() && gstate.host_info.coprocs.cuda.req_secs && !srp->cuda_backoff) { p->cuda_pwf.backoff(p, "NVIDIA GPU"); } - if (coproc_ati && coproc_ati->req_secs && !srp->ati_backoff) { + if (gstate.host_info.have_ati() && gstate.host_info.coprocs.ati.req_secs && !srp->ati_backoff) { p->ati_pwf.backoff(p, "ATI GPU"); } } @@ -1072,12 +1072,12 @@ void WORK_FETCH::set_initial_work_request() { cpu_work_fetch.req_secs = 1; cpu_work_fetch.req_instances = 0; cpu_work_fetch.busy_time_estimator.reset(); - if (coproc_cuda) { + if (gstate.host_info.have_cuda()) { cuda_work_fetch.req_secs = 1; cuda_work_fetch.req_instances = 0; cuda_work_fetch.busy_time_estimator.reset(); } - if (coproc_ati) { + if (gstate.host_info.have_ati()) { ati_work_fetch.req_secs = 1; ati_work_fetch.req_instances = 0; ati_work_fetch.busy_time_estimator.reset(); @@ -1092,17 +1092,17 @@ void WORK_FETCH::init() { // use 20% as a rough estimate of GPU efficiency - if (coproc_cuda) { + if (gstate.host_info.have_cuda()) { cuda_work_fetch.init( - RSC_TYPE_CUDA, coproc_cuda->count, - coproc_cuda->count*0.2*coproc_cuda->peak_flops()/cpu_flops + RSC_TYPE_CUDA, gstate.host_info.coprocs.cuda.count, + gstate.host_info.coprocs.cuda.count*0.2*gstate.host_info.coprocs.cuda.peak_flops()/cpu_flops ); } - if (coproc_ati) { + if (gstate.host_info.have_ati()) { ati_work_fetch.init( RSC_TYPE_ATI, - coproc_ati->count, - coproc_ati->count*0.2*coproc_ati->peak_flops()/cpu_flops + gstate.host_info.coprocs.ati.count, + gstate.host_info.coprocs.ati.count*0.2*gstate.host_info.coprocs.ati.peak_flops()/cpu_flops ); } @@ -1149,11 +1149,11 @@ void CLIENT_STATE::compute_nuploading_results() { } } int n = gstate.ncpus; - if (coproc_cuda && coproc_cuda->count > n) { - n = coproc_cuda->count; + if (gstate.host_info.have_cuda() && gstate.host_info.coprocs.cuda.count > n) { + n = gstate.host_info.coprocs.cuda.count; } - if (coproc_ati && coproc_ati->count > n) { - n = coproc_ati->count; + if (gstate.host_info.have_ati() && gstate.host_info.coprocs.ati.count > n) { + n = gstate.host_info.coprocs.ati.count; } n *= 2; for (i=0; i. - -#if defined(_WIN32) && !defined(__STDWX_H__) -#include "boinc_win.h" -#elif defined(_WIN32) && defined(__STDWX_H__) -#include "stdwx.h" -#else -#ifdef _USING_FCGI_ -#include "boinc_fcgi.h" -#else -#include -#endif -#include -#include -#endif - -#ifdef _WIN32 -#include "win_util.h" -#endif - -#include "error_numbers.h" -#include "filesys.h" -#include "parse.h" -#include "str_util.h" - -#include "coproc.h" - -#ifndef _USING_FCGI_ -using std::perror; -#endif - -#ifndef _USING_FCGI_ -void COPROC::write_xml(MIOFILE& f) { - f.printf( - "\n" - " %s\n" - " %d\n" - "\n", - type, count - ); -} -#endif - -int COPROC_REQ::parse(MIOFILE& fin) { - char buf[1024]; - strcpy(type, ""); - count = 0; - while (fin.fgets(buf, sizeof(buf))) { - if (match_tag(buf, "")) { - if (!strlen(type)) return ERR_XML_PARSE; - return 0; - } - if (parse_str(buf, "", type, sizeof(type))) continue; - if (parse_double(buf, "", count)) continue; - } - return ERR_XML_PARSE; -} - -int COPROC::parse(MIOFILE& fin) { - char buf[1024]; - strcpy(type, ""); - count = 0; - used = 0; - req_secs = 0; - estimated_delay = 0; - req_instances = 0; - while (fin.fgets(buf, sizeof(buf))) { - if (match_tag(buf, "")) { - if (!strlen(type)) return ERR_XML_PARSE; - return 0; - } - if (parse_str(buf, "", type, sizeof(type))) continue; - if (parse_int(buf, "", count)) continue; - if (parse_double(buf, "", req_secs)) continue; - if (parse_double(buf, "", req_instances)) continue; - if (parse_double(buf, "", estimated_delay)) continue; - } - return ERR_XML_PARSE; -} - -void COPROCS::summary_string(char* buf, int len) { - char bigbuf[8192], buf2[1024]; - - strcpy(bigbuf, ""); - for (unsigned int i=0; itype, "CUDA")) { - COPROC_CUDA* cp2 = (COPROC_CUDA*) cp; - int mem = (int)(cp2->prop.dtotalGlobalMem/MEGA); - sprintf(buf2, "[CUDA|%s|%d|%dMB|%d]", - cp2->prop.name, cp2->count, mem, cp2->display_driver_version - ); - strcat(bigbuf, buf2); - } else if (!strcmp(cp->type, "ATI")){ - COPROC_ATI* cp2 =(COPROC_ATI*) cp; - sprintf(buf2,"[CAL|%s|%d|%dMB|%s]", - cp2->name, cp2->count, cp2->attribs.localRAM, cp2->version - ); - strcat(bigbuf,buf2); - } - } - bigbuf[len-1] = 0; - strcpy(buf, bigbuf); -} - -int COPROCS::parse(MIOFILE& fin) { - char buf[1024]; - - while (fin.fgets(buf, sizeof(buf))) { - if (match_tag(buf, "")) { - return 0; - } - if (strstr(buf, "")) { - COPROC_CUDA* cc = new COPROC_CUDA; - int retval = cc->parse(fin); - if (!retval) { - coprocs.push_back(cc); - } - } - if (strstr(buf, "")) { - COPROC_ATI* cc = new COPROC_ATI; - int retval = cc->parse(fin); - if (!retval) { - coprocs.push_back(cc); - } - } - } - return ERR_XML_PARSE; -} - -void COPROCS::write_xml(MIOFILE& mf) { -#ifndef _USING_FCGI_ - mf.printf(" \n"); - for (unsigned i=0; iwrite_xml(mf); - } - mf.printf(" \n"); -#endif -} - -COPROC* COPROCS::lookup(const char* type) { - for (unsigned int i=0; itype)) return cp; - } - return NULL; -} - -#ifdef _WIN32 - -#endif - -void COPROC_CUDA::description(char* buf) { - char vers[256]; - if (display_driver_version) { - sprintf(vers, "%d", display_driver_version); - } else { - strcpy(vers, "unknown"); - } - sprintf(buf, "%s (driver version %s, CUDA version %d, compute capability %d.%d, %.0fMB, %.0f GFLOPS peak)", - prop.name, vers, cuda_version, prop.major, prop.minor, - prop.totalGlobalMem/(1024.*1024.), peak_flops()/1e9 - ); -} - -#ifndef _USING_FCGI_ -void COPROC_CUDA::write_xml(MIOFILE& f) { - f.printf( - "\n" - " %d\n" - " %s\n" - " %f\n" - " %f\n" - " %f\n" - " %d\n" - " %d\n" - " %u\n" - " %u\n" - " %d\n" - " %d\n" - " %u\n" - " %d\n" - " %d %d %d\n" - " %d %d %d\n" - " %u\n" - " %d\n" - " %d\n" - " %d\n" - " %u\n" - " %d\n" - " %d\n" - "\n", - count, - prop.name, - req_secs, - req_instances, - estimated_delay, - display_driver_version, - cuda_version, - (unsigned int)prop.totalGlobalMem, - (unsigned int)prop.sharedMemPerBlock, - prop.regsPerBlock, - prop.warpSize, - (unsigned int)prop.memPitch, - prop.maxThreadsPerBlock, - prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2], - prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2], - (unsigned int)prop.totalConstMem, - prop.major, - prop.minor, - prop.clockRate, - (unsigned int)prop.textureAlignment, - prop.deviceOverlap, - prop.multiProcessorCount - ); -} -#endif - -void COPROC_CUDA::clear() { - count = 0; - used = 0; - req_secs = 0; - req_instances = 0; - estimated_delay = -1; // mark as absent - cuda_version = 0; - display_driver_version = 0; - strcpy(prop.name, ""); - prop.totalGlobalMem = 0; - prop.sharedMemPerBlock = 0; - prop.regsPerBlock = 0; - prop.warpSize = 0; - prop.memPitch = 0; - prop.maxThreadsPerBlock = 0; - prop.maxThreadsDim[0] = 0; - prop.maxThreadsDim[1] = 0; - prop.maxThreadsDim[2] = 0; - prop.maxGridSize[0] = 0; - prop.maxGridSize[1] = 0; - prop.maxGridSize[2] = 0; - prop.clockRate = 0; - prop.totalConstMem = 0; - prop.major = 0; - prop.minor = 0; - prop.textureAlignment = 0; - prop.deviceOverlap = 0; - prop.multiProcessorCount = 0; -} - -int COPROC_CUDA::parse(MIOFILE& fin) { - char buf[1024], buf2[256]; - - clear(); - while (fin.fgets(buf, sizeof(buf))) { - if (strstr(buf, "")) { - return 0; - } - if (parse_int(buf, "", count)) continue; - if (parse_double(buf, "", req_secs)) continue; - if (parse_double(buf, "", req_instances)) continue; - if (parse_double(buf, "", estimated_delay)) continue; - if (parse_str(buf, "", prop.name, sizeof(prop.name))) continue; - if (parse_int(buf, "", display_driver_version)) continue; - if (parse_int(buf, "", cuda_version)) continue; - if (parse_double(buf, "", prop.dtotalGlobalMem)) continue; - if (parse_int(buf, "", (int&)prop.sharedMemPerBlock)) continue; - if (parse_int(buf, "", prop.regsPerBlock)) continue; - if (parse_int(buf, "", prop.warpSize)) continue; - if (parse_int(buf, "", (int&)prop.memPitch)) continue; - if (parse_int(buf, "", prop.maxThreadsPerBlock)) continue; - if (parse_str(buf, "", buf2, sizeof(buf2))) { - // can't use sscanf here (FCGI) - // - prop.maxThreadsDim[0] = atoi(buf2); - char* p = strchr(buf2, ' '); - if (p) { - p++; - prop.maxThreadsDim[1] = atoi(p); - p = strchr(p, ' '); - if (p) { - p++; - prop.maxThreadsDim[2] = atoi(p); - } - } - continue; - } - if (parse_str(buf, "", buf2, sizeof(buf2))) { - prop.maxGridSize[0] = atoi(buf2); - char* p = strchr(buf2, ' '); - if (p) { - p++; - prop.maxGridSize[1] = atoi(p); - p = strchr(p, ' '); - if (p) { - p++; - prop.maxGridSize[2] = atoi(p); - } - } - continue; - } - if (parse_int(buf, "", prop.clockRate)) continue; - if (parse_int(buf, "", (int&)prop.totalConstMem)) continue; - if (parse_int(buf, "", prop.major)) continue; - if (parse_int(buf, "", prop.minor)) continue; - if (parse_int(buf, "", (int&)prop.textureAlignment)) continue; - if (parse_int(buf, "", prop.deviceOverlap)) continue; - if (parse_int(buf, "", prop.multiProcessorCount)) continue; - } - return ERR_XML_PARSE; -} - -////////////////// ATI STARTS HERE ///////////////// - -#ifndef _USING_FCGI_ -void COPROC_ATI::write_xml(MIOFILE& f) { - f.printf( - "\n" - ); - - f.printf( - " %d\n" - " %s\n" - " %f\n" - " %f\n" - " %f\n" - " %d\n" - " %d\n" - " %d\n" - " %d\n" - " %u\n" - " %d\n" - " %d\n" - " %d\n" - " %d\n" - " %d\n" - " %d\n" - " %d\n" - " %d\n" - " %d\n" - " %s\n", - count, - name, - req_secs, - req_instances, - estimated_delay, - attribs.target, - attribs.localRAM, - attribs.uncachedRemoteRAM, - attribs.cachedRemoteRAM, - attribs.engineClock, - attribs.memoryClock, - attribs.wavefrontSize, - attribs.numberOfSIMD, - attribs.doublePrecision, - attribs.pitch_alignment, - attribs.surface_alignment, - info.maxResource1DWidth, - info.maxResource2DWidth, - info.maxResource2DHeight, - version - ); - - if (atirt_detected) { - f.printf(" \n"); - } - - if (amdrt_detected) { - f.printf(" \n"); - } - - f.printf("\n"); -}; -#endif - -void COPROC_ATI::clear() { - count = 0; - used = 0; - req_secs = 0; - req_instances = 0; - estimated_delay = -1; - strcpy(name, ""); - strcpy(version, ""); - atirt_detected = false; - amdrt_detected = false; - memset(&attribs, 0, sizeof(attribs)); - memset(&info, 0, sizeof(info)); -} - -int COPROC_ATI::parse(MIOFILE& fin) { - char buf[1024]; - int n; - - clear(); - - while (fin.fgets(buf, sizeof(buf))) { - if (strstr(buf, "")) return 0; - if (parse_int(buf, "", count)) continue; - if (parse_str(buf, "", name, sizeof(name))) continue; - if (parse_double(buf, "", req_secs)) continue; - if (parse_double(buf, "", req_instances)) continue; - if (parse_double(buf, "", estimated_delay)) continue; - - if (parse_int(buf, "", n)) { - attribs.target = (CALtarget)n; - continue; - } - if (parse_int(buf, "", n)) { - attribs.localRAM = n; - continue; - } - if (parse_int(buf, "", n)) { - attribs.uncachedRemoteRAM = n; - continue; - } - if (parse_int(buf, "", n)) { - attribs.cachedRemoteRAM = n; - continue; - } - if (parse_int(buf, "", n)) { - attribs.engineClock = n; - continue; - } - if (parse_int(buf, "", n)) { - attribs.memoryClock = n; - continue; - } - if (parse_int(buf, "", n)) { - attribs.wavefrontSize = n; - continue; - } - if (parse_int(buf, "" , n)) { - attribs.numberOfSIMD = n; - continue; - } - if (parse_int(buf, "", n)) { - attribs.doublePrecision = n?CAL_TRUE:CAL_FALSE; - continue; - } - if (parse_int(buf, "", n)) { - attribs.pitch_alignment = n; - continue; - } - if (parse_int(buf, "", n)) { - attribs.surface_alignment = n; - continue; - } - if (parse_int(buf, "", n)) { - info.maxResource1DWidth = n; - continue; - } - if (parse_int(buf, "", n)) { - info.maxResource2DWidth = n; - continue; - } - if (parse_int(buf, "", n)) { - info.maxResource2DHeight = n; - continue; - } - if (parse_bool(buf, "amdrt_detected", amdrt_detected)) continue; - if (parse_bool(buf, "atirt_detected", atirt_detected)) continue; - if (parse_str(buf, "", version, sizeof(version))) continue; - } - return ERR_XML_PARSE; -} - -void COPROC_ATI::description(char* buf) { - sprintf(buf, "%s (CAL version %s, %.0fMB, %.0f GFLOPS peak)", - name, version, attribs.localRAM/1024.*1024., peak_flops()/1.e9 - ); -} +// This file is part of BOINC. +// http://boinc.berkeley.edu +// Copyright (C) 2007 University of California +// +// BOINC is free software; you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License +// as published by the Free Software Foundation, +// either version 3 of the License, or (at your option) any later version. +// +// BOINC is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with BOINC. If not, see . + +#if defined(_WIN32) && !defined(__STDWX_H__) +#include "boinc_win.h" +#elif defined(_WIN32) && defined(__STDWX_H__) +#include "stdwx.h" +#else +#ifdef _USING_FCGI_ +#include "boinc_fcgi.h" +#else +#include +#endif +#include +#include +#endif + +#ifdef _WIN32 +#include "win_util.h" +#endif + +#include "error_numbers.h" +#include "filesys.h" +#include "parse.h" +#include "str_util.h" + +#include "coproc.h" + +#ifndef _USING_FCGI_ +using std::perror; +#endif + +#ifndef _USING_FCGI_ +void COPROC::write_xml(MIOFILE& f) { + f.printf( + "\n" + " %s\n" + " %d\n" + "\n", + type, count + ); +} +#endif + +int COPROC_REQ::parse(MIOFILE& fin) { + char buf[1024]; + strcpy(type, ""); + count = 0; + while (fin.fgets(buf, sizeof(buf))) { + if (match_tag(buf, "")) { + if (!strlen(type)) return ERR_XML_PARSE; + return 0; + } + if (parse_str(buf, "", type, sizeof(type))) continue; + if (parse_double(buf, "", count)) continue; + } + return ERR_XML_PARSE; +} + +int COPROC::parse(MIOFILE& fin) { + char buf[1024]; + strcpy(type, ""); + count = 0; + used = 0; + req_secs = 0; + estimated_delay = 0; + req_instances = 0; + while (fin.fgets(buf, sizeof(buf))) { + if (match_tag(buf, "")) { + if (!strlen(type)) return ERR_XML_PARSE; + return 0; + } + if (parse_str(buf, "", type, sizeof(type))) continue; + if (parse_int(buf, "", count)) continue; + if (parse_double(buf, "", req_secs)) continue; + if (parse_double(buf, "", req_instances)) continue; + if (parse_double(buf, "", estimated_delay)) continue; + } + return ERR_XML_PARSE; +} + +void COPROCS::summary_string(char* buf, int len) { + char bigbuf[8192], buf2[1024]; + + strcpy(bigbuf, ""); + if (cuda.count) { + int mem = (int)(cuda.prop.dtotalGlobalMem/MEGA); + sprintf(buf2, "[CUDA|%s|%d|%dMB|%d]", + cuda.prop.name, cuda.count, mem, cuda.display_driver_version + ); + strcat(bigbuf, buf2); + } + if (ati.count) { + sprintf(buf2,"[CAL|%s|%d|%dMB|%s]", + ati.name, ati.count, ati.attribs.localRAM, ati.version + ); + strcat(bigbuf,buf2); + } + bigbuf[len-1] = 0; + strcpy(buf, bigbuf); +} + +int COPROCS::parse(MIOFILE& fin) { + char buf[1024]; + int retval; + + while (fin.fgets(buf, sizeof(buf))) { + if (match_tag(buf, "")) { + return 0; + } + if (strstr(buf, "")) { + retval = cuda.parse(fin); + if (retval) { + cuda.clear(); + } + } + if (strstr(buf, "")) { + retval = ati.parse(fin); + if (retval) { + ati.clear(); + } + } + } + return ERR_XML_PARSE; +} + +void COPROCS::write_xml(MIOFILE& mf) { +#ifndef _USING_FCGI_ + mf.printf(" \n"); + if (cuda.count) { + cuda.write_xml(mf); + } + if (ati.count) { + ati.write_xml(mf); + } + mf.printf(" \n"); +#endif +} + +#ifdef _WIN32 + +#endif + +void COPROC_CUDA::description(char* buf) { + char vers[256]; + if (display_driver_version) { + sprintf(vers, "%d", display_driver_version); + } else { + strcpy(vers, "unknown"); + } + sprintf(buf, "%s (driver version %s, CUDA version %d, compute capability %d.%d, %.0fMB, %.0f GFLOPS peak)", + prop.name, vers, cuda_version, prop.major, prop.minor, + prop.totalGlobalMem/(1024.*1024.), peak_flops()/1e9 + ); +} + +#ifndef _USING_FCGI_ +void COPROC_CUDA::write_xml(MIOFILE& f) { + f.printf( + "\n" + " %d\n" + " %s\n" + " %f\n" + " %f\n" + " %f\n" + " %d\n" + " %d\n" + " %u\n" + " %u\n" + " %d\n" + " %d\n" + " %u\n" + " %d\n" + " %d %d %d\n" + " %d %d %d\n" + " %u\n" + " %d\n" + " %d\n" + " %d\n" + " %u\n" + " %d\n" + " %d\n" + "\n", + count, + prop.name, + req_secs, + req_instances, + estimated_delay, + display_driver_version, + cuda_version, + (unsigned int)prop.totalGlobalMem, + (unsigned int)prop.sharedMemPerBlock, + prop.regsPerBlock, + prop.warpSize, + (unsigned int)prop.memPitch, + prop.maxThreadsPerBlock, + prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2], + prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2], + (unsigned int)prop.totalConstMem, + prop.major, + prop.minor, + prop.clockRate, + (unsigned int)prop.textureAlignment, + prop.deviceOverlap, + prop.multiProcessorCount + ); +} +#endif + +void COPROC_CUDA::clear() { + count = 0; + used = 0; + req_secs = 0; + req_instances = 0; + estimated_delay = -1; // mark as absent + cuda_version = 0; + display_driver_version = 0; + strcpy(prop.name, ""); + prop.totalGlobalMem = 0; + prop.sharedMemPerBlock = 0; + prop.regsPerBlock = 0; + prop.warpSize = 0; + prop.memPitch = 0; + prop.maxThreadsPerBlock = 0; + prop.maxThreadsDim[0] = 0; + prop.maxThreadsDim[1] = 0; + prop.maxThreadsDim[2] = 0; + prop.maxGridSize[0] = 0; + prop.maxGridSize[1] = 0; + prop.maxGridSize[2] = 0; + prop.clockRate = 0; + prop.totalConstMem = 0; + prop.major = 0; + prop.minor = 0; + prop.textureAlignment = 0; + prop.deviceOverlap = 0; + prop.multiProcessorCount = 0; +} + +int COPROC_CUDA::parse(MIOFILE& fin) { + char buf[1024], buf2[256]; + + clear(); + while (fin.fgets(buf, sizeof(buf))) { + if (strstr(buf, "")) { + return 0; + } + if (parse_int(buf, "", count)) continue; + if (parse_double(buf, "", req_secs)) continue; + if (parse_double(buf, "", req_instances)) continue; + if (parse_double(buf, "", estimated_delay)) continue; + if (parse_str(buf, "", prop.name, sizeof(prop.name))) continue; + if (parse_int(buf, "", display_driver_version)) continue; + if (parse_int(buf, "", cuda_version)) continue; + if (parse_double(buf, "", prop.dtotalGlobalMem)) continue; + if (parse_int(buf, "", (int&)prop.sharedMemPerBlock)) continue; + if (parse_int(buf, "", prop.regsPerBlock)) continue; + if (parse_int(buf, "", prop.warpSize)) continue; + if (parse_int(buf, "", (int&)prop.memPitch)) continue; + if (parse_int(buf, "", prop.maxThreadsPerBlock)) continue; + if (parse_str(buf, "", buf2, sizeof(buf2))) { + // can't use sscanf here (FCGI) + // + prop.maxThreadsDim[0] = atoi(buf2); + char* p = strchr(buf2, ' '); + if (p) { + p++; + prop.maxThreadsDim[1] = atoi(p); + p = strchr(p, ' '); + if (p) { + p++; + prop.maxThreadsDim[2] = atoi(p); + } + } + continue; + } + if (parse_str(buf, "", buf2, sizeof(buf2))) { + prop.maxGridSize[0] = atoi(buf2); + char* p = strchr(buf2, ' '); + if (p) { + p++; + prop.maxGridSize[1] = atoi(p); + p = strchr(p, ' '); + if (p) { + p++; + prop.maxGridSize[2] = atoi(p); + } + } + continue; + } + if (parse_int(buf, "", prop.clockRate)) continue; + if (parse_int(buf, "", (int&)prop.totalConstMem)) continue; + if (parse_int(buf, "", prop.major)) continue; + if (parse_int(buf, "", prop.minor)) continue; + if (parse_int(buf, "", (int&)prop.textureAlignment)) continue; + if (parse_int(buf, "", prop.deviceOverlap)) continue; + if (parse_int(buf, "", prop.multiProcessorCount)) continue; + } + return ERR_XML_PARSE; +} + +////////////////// ATI STARTS HERE ///////////////// + +#ifndef _USING_FCGI_ +void COPROC_ATI::write_xml(MIOFILE& f) { + f.printf( + "\n" + ); + + f.printf( + " %d\n" + " %s\n" + " %f\n" + " %f\n" + " %f\n" + " %d\n" + " %d\n" + " %d\n" + " %d\n" + " %u\n" + " %d\n" + " %d\n" + " %d\n" + " %d\n" + " %d\n" + " %d\n" + " %d\n" + " %d\n" + " %d\n" + " %s\n", + count, + name, + req_secs, + req_instances, + estimated_delay, + attribs.target, + attribs.localRAM, + attribs.uncachedRemoteRAM, + attribs.cachedRemoteRAM, + attribs.engineClock, + attribs.memoryClock, + attribs.wavefrontSize, + attribs.numberOfSIMD, + attribs.doublePrecision, + attribs.pitch_alignment, + attribs.surface_alignment, + info.maxResource1DWidth, + info.maxResource2DWidth, + info.maxResource2DHeight, + version + ); + + if (atirt_detected) { + f.printf(" \n"); + } + + if (amdrt_detected) { + f.printf(" \n"); + } + + f.printf("\n"); +}; +#endif + +void COPROC_ATI::clear() { + count = 0; + used = 0; + req_secs = 0; + req_instances = 0; + estimated_delay = -1; + strcpy(name, ""); + strcpy(version, ""); + atirt_detected = false; + amdrt_detected = false; + memset(&attribs, 0, sizeof(attribs)); + memset(&info, 0, sizeof(info)); +} + +int COPROC_ATI::parse(MIOFILE& fin) { + char buf[1024]; + int n; + + clear(); + + while (fin.fgets(buf, sizeof(buf))) { + if (strstr(buf, "")) return 0; + if (parse_int(buf, "", count)) continue; + if (parse_str(buf, "", name, sizeof(name))) continue; + if (parse_double(buf, "", req_secs)) continue; + if (parse_double(buf, "", req_instances)) continue; + if (parse_double(buf, "", estimated_delay)) continue; + + if (parse_int(buf, "", n)) { + attribs.target = (CALtarget)n; + continue; + } + if (parse_int(buf, "", n)) { + attribs.localRAM = n; + continue; + } + if (parse_int(buf, "", n)) { + attribs.uncachedRemoteRAM = n; + continue; + } + if (parse_int(buf, "", n)) { + attribs.cachedRemoteRAM = n; + continue; + } + if (parse_int(buf, "", n)) { + attribs.engineClock = n; + continue; + } + if (parse_int(buf, "", n)) { + attribs.memoryClock = n; + continue; + } + if (parse_int(buf, "", n)) { + attribs.wavefrontSize = n; + continue; + } + if (parse_int(buf, "" , n)) { + attribs.numberOfSIMD = n; + continue; + } + if (parse_int(buf, "", n)) { + attribs.doublePrecision = n?CAL_TRUE:CAL_FALSE; + continue; + } + if (parse_int(buf, "", n)) { + attribs.pitch_alignment = n; + continue; + } + if (parse_int(buf, "", n)) { + attribs.surface_alignment = n; + continue; + } + if (parse_int(buf, "", n)) { + info.maxResource1DWidth = n; + continue; + } + if (parse_int(buf, "", n)) { + info.maxResource2DWidth = n; + continue; + } + if (parse_int(buf, "", n)) { + info.maxResource2DHeight = n; + continue; + } + if (parse_bool(buf, "amdrt_detected", amdrt_detected)) continue; + if (parse_bool(buf, "atirt_detected", atirt_detected)) continue; + if (parse_str(buf, "", version, sizeof(version))) continue; + } + return ERR_XML_PARSE; +} + +void COPROC_ATI::description(char* buf) { + sprintf(buf, "%s (CAL version %s, %.0fMB, %.0f GFLOPS peak)", + name, version, attribs.localRAM/1024.*1024., peak_flops()/1.e9 + ); +} diff --git a/lib/coproc.h b/lib/coproc.h index e0e964a829..c5f022b58f 100644 --- a/lib/coproc.h +++ b/lib/coproc.h @@ -144,6 +144,12 @@ struct COPROC { available_ram_unknown[i] = true; } } + inline void clear_usage() { + for (int i=0; i coprocs; // not deleted in destructor - // so any structure that includes this needs to do it manually - - COPROCS(){} - ~COPROCS(){} // don't delete coprocs; else crash in APP_INIT_DATA logic - void write_xml(MIOFILE& out); - void get( - bool use_all, std::vector &descs, - std::vector &warnings, - std::vector& ignore_cuda_dev, - std::vector& ignore_ati_dev - ); - int parse(MIOFILE&); - void summary_string(char*, int); - COPROC* lookup(const char*); - bool fully_used() { - for (unsigned int i=0; iused < cp->count) return false; - } - return true; - } - - // Copy a coproc set, possibly setting usage to zero. - // used in round-robin simulator and CPU scheduler, - // to avoid messing w/ master copy - // - void clone(COPROCS& c, bool copy_used) { - for (unsigned int i=0; itype); - cp2->count = cp->count; - if (copy_used) cp2->used = cp->used; - coprocs.push_back(cp2); - } - } - inline void clear_usage() { - for (unsigned int i=0; icount; j++) { - cp->usage[j] = 0; - cp->pending_usage[j] = 0; - } - } - } - inline void delete_coprocs() { - for (unsigned int i=0; i&, std::vector&, std::vector& ignore_devs ); @@ -268,6 +221,8 @@ struct COPROC_CUDA : public COPROC { void get_available_ram(); bool check_running_graphics_app(); + void fake(double, int); + }; enum CUdevice_attribute_enum { @@ -305,7 +260,7 @@ struct COPROC_ATI : public COPROC { #endif COPROC_ATI(): COPROC("ATI"){} virtual ~COPROC_ATI(){} - static void get(COPROCS&, + void get( std::vector&, std::vector&, std::vector& ignore_devs ); @@ -318,9 +273,53 @@ struct COPROC_ATI : public COPROC { return x?x:5e10; } void get_available_ram(); + void fake(double, int); }; -extern COPROC_CUDA* fake_cuda(COPROCS&, double, int); -extern COPROC_ATI* fake_ati(COPROCS&, double, int); +struct COPROCS { + COPROC_CUDA cuda; + COPROC_ATI ati; + + COPROCS(){} + ~COPROCS(){} // don't delete coprocs; else crash in APP_INIT_DATA logic + void write_xml(MIOFILE& out); + void get( + bool use_all, std::vector &descs, + std::vector &warnings, + std::vector& ignore_cuda_dev, + std::vector& ignore_ati_dev + ); + int parse(MIOFILE&); + void summary_string(char*, int); + bool fully_used() { + if (cuda.used < cuda.count) return false; + if (ati.used < ati.count) return false; + return true; + } + + // Copy a coproc set, possibly setting usage to zero. + // used in round-robin simulator and CPU scheduler, + // to avoid messing w/ master copy + // + void clone(COPROCS& c, bool copy_used) { + c.cuda = cuda; + c.ati = ati; + if (!copy_used) { + c.cuda.used = 0; + c.ati.used = 0; + } + } + inline void clear() { + cuda.count = 0; + ati.count = 0; + } + inline void clear_usage() { + cuda.clear_usage(); + ati.clear_usage(); + } + inline bool none() { + return (cuda.count==0) && (ati.count==0); + } +}; #endif diff --git a/lib/hostinfo.cpp b/lib/hostinfo.cpp index 57c2d2fa7f..9785b86214 100644 --- a/lib/hostinfo.cpp +++ b/lib/hostinfo.cpp @@ -64,6 +64,8 @@ void HOST_INFO::clear_host_info() { strcpy(os_name, ""); strcpy(os_version, ""); + + coprocs.clear(); } int HOST_INFO::parse(MIOFILE& in, bool benchmarks_only) { diff --git a/lib/hostinfo.h b/lib/hostinfo.h index 810d6f83d4..c676a5b74c 100644 --- a/lib/hostinfo.h +++ b/lib/hostinfo.h @@ -78,6 +78,12 @@ public: void clear_host_info(); void make_random_string(const char* salt, char* out); void generate_host_cpid(); + inline bool have_cuda() { + return (coprocs.cuda.count > 0); + } + inline bool have_ati() { + return (coprocs.ati.count > 0); + } }; #ifdef __APPLE__ diff --git a/sched/handle_request.cpp b/sched/handle_request.cpp index c0e1098bd3..b74f61d0c1 100644 --- a/sched/handle_request.cpp +++ b/sched/handle_request.cpp @@ -1056,8 +1056,8 @@ bool bad_install_type() { static inline bool requesting_work() { if (g_request->work_req_seconds > 0) return true; if (g_request->cpu_req_secs > 0) return true; - if (g_request->coproc_cuda && g_request->coproc_cuda->req_secs) return true; - if (g_request->coproc_ati && g_request->coproc_ati->req_secs) return true; + if (g_request->coprocs.cuda.count && g_request->coprocs.cuda.req_secs) return true; + if (g_request->coprocs.ati.count && g_request->coprocs.ati.req_secs) return true; return false; } diff --git a/sched/sched_customize.cpp b/sched/sched_customize.cpp index 70e5971b46..d384c7ab5f 100644 --- a/sched/sched_customize.cpp +++ b/sched/sched_customize.cpp @@ -138,8 +138,8 @@ static inline bool app_plan_ati( SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu ) { char buf[256]; - COPROC_ATI* cp = (COPROC_ATI*)sreq.coprocs.lookup("ATI"); - if (!cp) { + COPROC_ATI* cp = &sreq.coprocs.ati; + if (!cp->count) { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] Host lacks ATI GPU for plan class ati\n" @@ -320,8 +320,8 @@ static inline bool app_plan_cuda( SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu ) { char buf[256]; - COPROC_CUDA* cp = (COPROC_CUDA*)sreq.coprocs.lookup("CUDA"); - if (!cp) { + COPROC_CUDA* cp = &sreq.coprocs.cuda; + if (!cp->count) { if (config.debug_version_select) { log_messages.printf(MSG_NORMAL, "[version] Host lacks CUDA coprocessor for plan class %s\n", diff --git a/sched/sched_send.cpp b/sched/sched_send.cpp index 3d170d4dbc..9eb98c1bd7 100644 --- a/sched/sched_send.cpp +++ b/sched/sched_send.cpp @@ -101,18 +101,8 @@ void WORK_REQ::get_job_limits() { if (n < 1) n = 1; effective_ncpus = n; - n = 0; - COPROC* cp = g_request->coprocs.lookup("CUDA"); - if (cp) { - n = cp->count; - if (n > MAX_GPUS) n = MAX_GPUS; - } - cp = g_request->coprocs.lookup("ATI"); - if (cp) { - if (cp->count <= MAX_GPUS && cp->count > n) { - n = cp->count; - } - } + n = g_request->coprocs.cuda.count + g_request->coprocs.ati.count; + if (n > MAX_GPUS) n = MAX_GPUS; effective_ngpus = n; int mult = effective_ncpus + config.gpu_multiplier * effective_ngpus; @@ -581,9 +571,9 @@ static inline bool hard_app(APP& app) { static inline double get_estimated_delay(BEST_APP_VERSION& bav) { if (bav.host_usage.ncudas) { - return g_request->coproc_cuda->estimated_delay; + return g_request->coprocs.cuda.estimated_delay; } else if (bav.host_usage.natis) { - return g_request->coproc_ati->estimated_delay; + return g_request->coprocs.ati.estimated_delay; } else { return g_request->cpu_estimated_delay; } @@ -591,9 +581,9 @@ static inline double get_estimated_delay(BEST_APP_VERSION& bav) { static inline void update_estimated_delay(BEST_APP_VERSION& bav, double dt) { if (bav.host_usage.ncudas) { - g_request->coproc_cuda->estimated_delay += dt; + g_request->coprocs.cuda.estimated_delay += dt; } else if (bav.host_usage.natis) { - g_request->coproc_ati->estimated_delay += dt; + g_request->coprocs.ati.estimated_delay += dt; } else { g_request->cpu_estimated_delay += dt; } @@ -1475,18 +1465,18 @@ void send_work_setup() { g_wreq->cpu_req_instances = g_request->cpu_req_instances; g_wreq->anonymous_platform = anonymous(g_request->platforms.list[0]); - if (g_request->coproc_cuda) { - g_wreq->cuda_req_secs = clamp_req_sec(g_request->coproc_cuda->req_secs); - g_wreq->cuda_req_instances = g_request->coproc_cuda->req_instances; - if (g_request->coproc_cuda->estimated_delay < 0) { - g_request->coproc_cuda->estimated_delay = g_request->cpu_estimated_delay; + if (g_request->coprocs.cuda.count) { + g_wreq->cuda_req_secs = clamp_req_sec(g_request->coprocs.cuda.req_secs); + g_wreq->cuda_req_instances = g_request->coprocs.cuda.req_instances; + if (g_request->coprocs.cuda.estimated_delay < 0) { + g_request->coprocs.cuda.estimated_delay = g_request->cpu_estimated_delay; } } - if (g_request->coproc_ati) { - g_wreq->ati_req_secs = clamp_req_sec(g_request->coproc_ati->req_secs); - g_wreq->ati_req_instances = g_request->coproc_ati->req_instances; - if (g_request->coproc_ati->estimated_delay < 0) { - g_request->coproc_ati->estimated_delay = g_request->cpu_estimated_delay; + if (g_request->coprocs.ati.count) { + g_wreq->ati_req_secs = clamp_req_sec(g_request->coprocs.ati.req_secs); + g_wreq->ati_req_instances = g_request->coprocs.ati.req_instances; + if (g_request->coprocs.ati.estimated_delay < 0) { + g_request->coprocs.ati.estimated_delay = g_request->cpu_estimated_delay; } } if (g_wreq->cpu_req_secs || g_wreq->cuda_req_secs || g_wreq->ati_req_secs) { @@ -1508,18 +1498,18 @@ void send_work_setup() { g_wreq->cpu_req_secs, g_wreq->cpu_req_instances, g_request->cpu_estimated_delay ); - if (g_request->coproc_cuda) { + if (g_request->coprocs.cuda.count) { log_messages.printf(MSG_NORMAL, "[send] CUDA: req %.2f sec, %.2f instances; est delay %.2f\n", g_wreq->cuda_req_secs, g_wreq->cuda_req_instances, - g_request->coproc_cuda->estimated_delay + g_request->coprocs.cuda.estimated_delay ); } - if (g_request->coproc_ati) { + if (g_request->coprocs.ati.count) { log_messages.printf(MSG_NORMAL, "[send] ATI: req %.2f sec, %.2f instances; est delay %.2f\n", g_wreq->ati_req_secs, g_wreq->ati_req_instances, - g_request->coproc_ati->estimated_delay + g_request->coprocs.ati.estimated_delay ); } log_messages.printf(MSG_NORMAL, diff --git a/sched/sched_types.cpp b/sched/sched_types.cpp index 2f9c01c0b4..f92a9993b6 100644 --- a/sched/sched_types.cpp +++ b/sched/sched_types.cpp @@ -198,8 +198,7 @@ const char* SCHEDULER_REQUEST::parse(FILE* fin) { have_time_stats_log = false; client_cap_plan_class = false; sandbox = -1; - coproc_cuda = 0; - coproc_ati = 0; + coprocs.clear(); fgets(buf, sizeof(buf), fin); if (!match_tag(buf, "")) return "no start tag"; @@ -367,8 +366,6 @@ const char* SCHEDULER_REQUEST::parse(FILE* fin) { MIOFILE mf; mf.init_file(fin); coprocs.parse(mf); - coproc_cuda = (COPROC_CUDA*)coprocs.lookup("CUDA"); - coproc_ati = (COPROC_ATI*)coprocs.lookup("ATI"); continue; } if (parse_bool(buf, "client_cap_plan_class", client_cap_plan_class)) continue; diff --git a/sched/sched_types.h b/sched/sched_types.h index b0ef3fe111..c72972f486 100644 --- a/sched/sched_types.h +++ b/sched/sched_types.h @@ -402,8 +402,6 @@ struct SCHEDULER_REQUEST { HOST host; // request message is parsed into here. // does NOT contain the full host record. COPROCS coprocs; - COPROC_CUDA* coproc_cuda; - COPROC_ATI* coproc_ati; std::vector results; // completed results being reported std::vector msgs_from_host;