From 95579e7ebe84854c95eb13b259d8de786b24ed4a Mon Sep 17 00:00:00 2001 From: David Anderson Date: Mon, 7 Dec 2009 00:49:36 +0000 Subject: [PATCH] svn path=/trunk/boinc/; revision=19795 --- client/coproc_detect.cpp | 703 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 703 insertions(+) create mode 100644 client/coproc_detect.cpp diff --git a/client/coproc_detect.cpp b/client/coproc_detect.cpp new file mode 100644 index 0000000000..4a9aec4b47 --- /dev/null +++ b/client/coproc_detect.cpp @@ -0,0 +1,703 @@ +// This file is part of BOINC. +// http://boinc.berkeley.edu +// Copyright (C) 2009 University of California +// +// BOINC is free software; you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License +// as published by the Free Software Foundation, +// either version 3 of the License, or (at your option) any later version. +// +// BOINC is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with BOINC. If not, see . + + +// client-specific GPU code. Mostly GPU detection + +#ifdef _WIN32 +#ifndef SIM +#include +#endif +#else +#ifdef __APPLE__ +// Suppress obsolete warning when building for OS 10.3.9 +#define DLOPEN_NO_WARN +#endif +#include +#include +#include +#endif + +#include "coproc.h" + +using std::string; +using std::vector; + +static bool in_vector(int n, vector& v) { + for (unsigned int i=0; i&descs, vector&warnings, + vector& ignore_cuda_dev, + vector& ignore_ati_dev +) { + +#ifdef _WIN32 + COPROC_CUDA::get(*this, use_all, descs, warnings, ignore_cuda_dev); + COPROC_ATI::get(*this, descs, warnings, ignore_ati_dev); +#else + void (*old_sig)(int) = signal(SIGSEGV, segv_handler); + if (setjmp(resume)) { + warnings.push_back("Caught SIGSEGV in NVIDIA GPU detection"); + } else { + COPROC_CUDA::get(*this, use_all, descs, warnings, ignore_cuda_dev); + } +#ifndef __APPLE__ // ATI does not yet support CAL on Macs + if (setjmp(resume)) { + warnings.push_back("Caught SIGSEGV in ATI GPU detection"); + } else { + COPROC_ATI::get(*this, descs, warnings, ignore_ati_dev); + } +#endif + signal(SIGSEGV, old_sig); +#endif +} + +// return 1/-1/0 if device 1 is more/less/same capable than device 2. +// If "loose", ignore FLOPS and tolerate small memory diff +// +int cuda_compare(COPROC_CUDA& c1, COPROC_CUDA& c2, bool loose) { + if (c1.prop.major > c2.prop.major) return 1; + if (c1.prop.major < c2.prop.major) return -1; + if (c1.prop.minor > c2.prop.minor) return 1; + if (c1.prop.minor < c2.prop.minor) return -1; + if (c1.cuda_version > c2.cuda_version) return 1; + if (c1.cuda_version < c2.cuda_version) return -1; + if (loose) { + if (c1.prop.totalGlobalMem > 1.4*c2.prop.totalGlobalMem) return 1; + if (c1.prop.totalGlobalMem < .7* c2.prop.totalGlobalMem) return -1; + return 0; + } + if (c1.prop.totalGlobalMem > c2.prop.totalGlobalMem) return 1; + if (c1.prop.totalGlobalMem < c2.prop.totalGlobalMem) return -1; + double s1 = c1.peak_flops(); + double s2 = c2.peak_flops(); + if (s1 > s2) return 1; + if (s1 < s2) return -1; + return 0; +} + +#ifdef _WIN32 +typedef int (__stdcall *CUDA_GDC)(int *count); +typedef int (__stdcall *CUDA_GDV)(int* version); +typedef int (__stdcall *CUDA_GDI)(int); +typedef int (__stdcall *CUDA_GDG)(int*, int); +typedef int (__stdcall *CUDA_GDA)(int*, int, int); +typedef int (__stdcall *CUDA_GDN)(char*, int, int); +typedef int (__stdcall *CUDA_GDM)(unsigned int*, int); +typedef int (__stdcall *CUDA_GDCC)(int*, int*, int); +typedef int (__stdcall *CUDA_CC)(unsigned int*, unsigned int, unsigned int); +typedef int (__stdcall *CUDA_CD)(unsigned int); +typedef int (__stdcall *CUDA_MA)(unsigned int*, unsigned int); +typedef int (__stdcall *CUDA_MF)(unsigned int); + +CUDA_GDC __cuDeviceGetCount = NULL; +CUDA_GDV __cuDriverGetVersion = NULL; +CUDA_GDI __cuInit = NULL; +CUDA_GDG __cuDeviceGet = NULL; +CUDA_GDA __cuDeviceGetAttribute = NULL; +CUDA_GDN __cuDeviceGetName = NULL; +CUDA_GDM __cuDeviceTotalMem = NULL; +CUDA_GDCC __cuDeviceComputeCapability = NULL; +CUDA_CC __cuCtxCreate = NULL; +CUDA_CD __cuCtxDestroy = NULL; +CUDA_MA __cuMemAlloc = NULL; +CUDA_MF __cuMemFree = NULL; +#else +void* cudalib; +int (*__cuInit)(int); +int (*__cuDeviceGetCount)(int*); +int (*__cuDriverGetVersion)(int*); +int (*__cuDeviceGet)(int*, int); +int (*__cuDeviceGetAttribute)(int*, int, int); +int (*__cuDeviceGetName)(char*, int, int); +int (*__cuDeviceTotalMem)(unsigned int*, int); +int (*__cuDeviceComputeCapability)(int*, int*, int); +int (*__cuCtxCreate)(unsigned int*, unsigned int, unsigned int); +int (*__cuCtxDestroy)(unsigned int); +int (*__cuMemAlloc)(unsigned int*, unsigned int); +int (*__cuMemFree)(unsigned int); +#endif + +// NVIDIA interfaces are documented here: +// http://developer.download.nvidia.com/compute/cuda/2_3/toolkit/docs/online/index.html + +void COPROC_CUDA::get( + COPROCS& coprocs, + bool use_all, // if false, use only those equivalent to most capable + vector& descs, + vector& warnings, + vector& ignore_devs +) { + int count, retval; + char buf[256]; + +#ifdef _WIN32 + HMODULE cudalib = LoadLibrary("nvcuda.dll"); + if (!cudalib) { + warnings.push_back("No NVIDIA library found"); + return; + } + __cuDeviceGetCount = (CUDA_GDC)GetProcAddress( cudalib, "cuDeviceGetCount" ); + __cuDriverGetVersion = (CUDA_GDV)GetProcAddress( cudalib, "cuDriverGetVersion" ); + __cuInit = (CUDA_GDI)GetProcAddress( cudalib, "cuInit" ); + __cuDeviceGet = (CUDA_GDG)GetProcAddress( cudalib, "cuDeviceGet" ); + __cuDeviceGetAttribute = (CUDA_GDA)GetProcAddress( cudalib, "cuDeviceGetAttribute" ); + __cuDeviceGetName = (CUDA_GDN)GetProcAddress( cudalib, "cuDeviceGetName" ); + __cuDeviceTotalMem = (CUDA_GDM)GetProcAddress( cudalib, "cuDeviceTotalMem" ); + __cuDeviceComputeCapability = (CUDA_GDCC)GetProcAddress( cudalib, "cuDeviceComputeCapability" ); + __cuCtxCreate = (CUDA_CC)GetProcAddress( cudalib, "cuCtxCreate" ); + __cuCtxDestroy = (CUDA_CD)GetProcAddress( cudalib, "cuCtxDestroy" ); + __cuMemAlloc = (CUDA_MA)GetProcAddress( cudalib, "cuMemAlloc" ); + __cuMemFree = (CUDA_MF)GetProcAddress( cudalib, "cuMemFree" ); + +#ifndef SIM + NvAPI_Status nvapiStatus; + NvDisplayHandle hDisplay; + NV_DISPLAY_DRIVER_VERSION Version; + memset(&Version, 0, sizeof(Version)); + Version.version = NV_DISPLAY_DRIVER_VERSION_VER; + + NvAPI_Initialize(); + for (int i=0; ; i++) { + nvapiStatus = NvAPI_EnumNvidiaDisplayHandle(i, &hDisplay); + if (nvapiStatus != NVAPI_OK) break; + nvapiStatus = NvAPI_GetDisplayDriverVersion(hDisplay, &Version); + if (nvapiStatus == NVAPI_OK) break; + } +#endif +#else + +#ifdef __APPLE__ + cudalib = dlopen("/usr/local/cuda/lib/libcuda.dylib", RTLD_NOW); +#else + cudalib = dlopen("libcuda.so", RTLD_NOW); +#endif + if (!cudalib) { + warnings.push_back("No NVIDIA library found"); + return; + } + __cuDeviceGetCount = (int(*)(int*)) dlsym(cudalib, "cuDeviceGetCount"); + __cuDriverGetVersion = (int(*)(int*)) dlsym( cudalib, "cuDriverGetVersion" ); + __cuInit = (int(*)(int)) dlsym( cudalib, "cuInit" ); + __cuDeviceGet = (int(*)(int*, int)) dlsym( cudalib, "cuDeviceGet" ); + __cuDeviceGetAttribute = (int(*)(int*, int, int)) dlsym( cudalib, "cuDeviceGetAttribute" ); + __cuDeviceGetName = (int(*)(char*, int, int)) dlsym( cudalib, "cuDeviceGetName" ); + __cuDeviceTotalMem = (int(*)(unsigned int*, int)) dlsym( cudalib, "cuDeviceTotalMem" ); + __cuDeviceComputeCapability = (int(*)(int*, int*, int)) dlsym( cudalib, "cuDeviceComputeCapability" ); + __cuCtxCreate = (int(*)(unsigned int*, unsigned int, unsigned int)) dlsym( cudalib, "cuCtxCreate" ); + __cuCtxDestroy = (int(*)(unsigned int)) dlsym( cudalib, "cuCtxDestroy" ); + __cuMemAlloc = (int(*)(unsigned int*, unsigned int)) dlsym( cudalib, "cuMemAlloc" ); + __cuMemFree = (int(*)(unsigned int)) dlsym( cudalib, "cuMemFree" ); +#endif + + if (!__cuDriverGetVersion) { + warnings.push_back("cuDriverGetVersion() missing from NVIDIA library"); + return; + } + if (!__cuInit) { + warnings.push_back("cuInit() missing from NVIDIA library"); + return; + } + if (!__cuDeviceGetCount) { + warnings.push_back("cuDeviceGetCount() missing from NVIDIA library"); + return; + } + if (!__cuDeviceGet) { + warnings.push_back("cuDeviceGet() missing from NVIDIA library"); + return; + } + if (!__cuDeviceGetAttribute) { + warnings.push_back("cuDeviceGetAttribute() missing from NVIDIA library"); + return; + } + if (!__cuDeviceTotalMem) { + warnings.push_back("cuDeviceTotalMem() missing from NVIDIA library"); + return; + } + if (!__cuDeviceComputeCapability) { + warnings.push_back("cuDeviceComputeCapability() missing from NVIDIA library"); + return; + } + if (!__cuCtxCreate) { + warnings.push_back("cuCtxCreate() missing from NVIDIA library"); + return; + } + if (!__cuCtxDestroy) { + warnings.push_back("cuCtxDestroy() missing from NVIDIA library"); + return; + } + if (!__cuMemAlloc) { + warnings.push_back("cuMemAlloc() missing from NVIDIA library"); + return; + } + if (!__cuMemFree) { + warnings.push_back("cuMemFree() missing from NVIDIA library"); + return; + } + + retval = (*__cuInit)(0); + if (retval) { + sprintf(buf, "NVIDIA drivers present but no GPUs found"); + warnings.push_back(buf); + return; + } + + int cuda_version; + retval = (*__cuDriverGetVersion)(&cuda_version); + if (retval) { + sprintf(buf, "cuDriverGetVersion() returned %d", retval); + warnings.push_back(buf); + return; + } + + vector gpus; + retval = (*__cuDeviceGetCount)(&count); + if (retval) { + sprintf(buf, "cuDeviceGetCount() returned %d", retval); + warnings.push_back(buf); + return; + } + sprintf(buf, "NVIDIA library reports %d GPU%s", count, (count==1)?"":"s"); + warnings.push_back(buf); + + int j; + unsigned int i; + COPROC_CUDA cc; + string s; + for (j=0; j 100) continue; // e.g. 9999 is an error +#if defined(_WIN32) && !defined(SIM) + cc.display_driver_version = Version.drvVersion; +#else + cc.display_driver_version = 0; +#endif + cc.cuda_version = cuda_version; + cc.device_num = j; + gpus.push_back(cc); + } + + if (!gpus.size()) { + warnings.push_back("No CUDA-capable NVIDIA GPUs found"); + return; + } + + // identify the most capable non-ignored instance + // + COPROC_CUDA best; + bool first = true; + for (i=0; i 0) { + best = gpus[i]; + } + } + + // see which other instances are equivalent, + // and set the "count" and "device_nums" fields + // + best.count = 0; + for (i=0; itype, "CUDA"); + cc->count = count; + for (int i=0; idevice_nums[i] = i; + } + cc->display_driver_version = 18000; + cc->cuda_version = 2020; + strcpy(cc->prop.name, "Fake NVIDIA GPU"); + cc->prop.totalGlobalMem = 256*1024*1024; + cc->prop.sharedMemPerBlock = 100; + cc->prop.regsPerBlock = 8; + cc->prop.warpSize = 10; + cc->prop.memPitch = 10; + cc->prop.maxThreadsPerBlock = 20; + cc->prop.maxThreadsDim[0] = 2; + cc->prop.maxThreadsDim[1] = 2; + cc->prop.maxThreadsDim[2] = 2; + cc->prop.maxGridSize[0] = 10; + cc->prop.maxGridSize[1] = 10; + cc->prop.maxGridSize[2] = 10; + cc->prop.totalConstMem = 10; + cc->prop.major = 1; + cc->prop.minor = 2; + cc->prop.clockRate = 1250000; + cc->prop.textureAlignment = 1000; + cc->prop.multiProcessorCount = 14; + coprocs.coprocs.push_back(cc); +} + +// check whether each GPU is running a graphics app (assume yes) +// return true if there's been a change since last time +// +bool COPROC_CUDA::check_running_graphics_app() { + int retval, j; + bool change = false; + for (j=0; j& descs, vector& warnings, vector& ignore_devs +) { + CALuint numDevices, cal_major, cal_minor, cal_imp; + CALdevice device; + CALdeviceinfo info; + CALdeviceattribs attribs; + char buf[256]; + bool amdrt_detected = false; + bool atirt_detected = false; + int retval; + + attribs.struct_size = sizeof(CALdeviceattribs); + device = 0; + numDevices =0; + +#ifdef _WIN32 + +#if defined _M_X64 + const char* atilib_name = "aticalrt64.dll"; + const char* amdlib_name = "amdcalrt64.dll"; +#else + const char* atilib_name = "aticalrt.dll"; + const char* amdlib_name = "amdcalrt.dll"; +#endif + + HINSTANCE callib = LoadLibrary(atilib_name); + if (callib) { + atirt_detected = true; + } else { + callib = LoadLibrary(amdlib_name); + if (callib) { + amdrt_detected = true; + } + } + + if (!callib) { + warnings.push_back("No ATI library found."); + return; + } + + __calInit = (ATI_GDI)GetProcAddress(callib, "calInit" ); + __calGetVersion = (ATI_VER)GetProcAddress(callib, "calGetVersion" ); + __calDeviceGetCount = (ATI_GDC)GetProcAddress(callib, "calDeviceGetCount" ); + __calDeviceGetAttribs =(ATI_ATTRIBS)GetProcAddress(callib, "calDeviceGetAttribs" ); + __calShutdown = (ATI_CLOSE)GetProcAddress(callib, "calShutdown" ); + __calDeviceGetInfo = (ATI_INFO)GetProcAddress(callib, "calDeviceGetInfo" ); + +#else + + void* callib; + + callib = dlopen("libaticalrt.so", RTLD_NOW); + if (!callib) { + warnings.push_back("No ATI library found"); + return; + } + + atirt_detected = true; + + __calInit = (int(*)()) dlsym(callib, "calInit"); + __calGetVersion = (int(*)(CALuint*, CALuint*, CALuint*)) dlsym(callib, "calGetVersion"); + __calDeviceGetCount = (int(*)(CALuint*)) dlsym(callib, "calDeviceGetCount"); + __calDeviceGetAttribs = (int(*)(CALdeviceattribs*, CALuint)) dlsym(callib, "calDeviceGetAttribs"); + __calShutdown = (int(*)()) dlsym(callib, "calShutdown"); + __calDeviceGetInfo = (int(*)(CALdeviceinfo*, CALuint)) dlsym(callib, "calDeviceGetInfo"); + +#endif + + if (!__calInit) { + warnings.push_back("calInit() missing from CAL library"); + return; + } + if (!__calGetVersion) { + warnings.push_back("calGetVersion() missing from CAL library"); + return; + } + if (!__calDeviceGetCount) { + warnings.push_back("calDeviceGetCount() missing from CAL library"); + return; + } + if (!__calDeviceGetAttribs) { + warnings.push_back("calDeviceGetAttribs() missing from CAL library"); + return; + } + if (!__calDeviceGetInfo) { + warnings.push_back("calDeviceGetInfo() missing from CAL library"); + return; + } + + retval = (*__calInit)(); + if (retval != CAL_RESULT_OK) { + sprintf(buf, "calInit() returned %d", retval); + warnings.push_back(buf); + return; + } + retval = (*__calDeviceGetCount)(&numDevices); + if (retval != CAL_RESULT_OK) { + sprintf(buf, "calDeviceGetCount() returned %d", retval); + warnings.push_back(buf); + return; + } + retval = (*__calGetVersion)(&cal_major, &cal_minor, &cal_imp); + if (retval != CAL_RESULT_OK) { + sprintf(buf, "calGetVersion() returned %d", retval); + warnings.push_back(buf); + return; + } + + if (!numDevices) { + warnings.push_back("No usable CAL devices found"); + return; + } + + COPROC_ATI cc, cc2; + string s, gpu_name; + vector gpus; + for (CALuint i=0; i best.peak_flops()) { + best = gpus[i]; + } + sprintf(buf2, "ATI GPU %d: %s", gpus[i].device_num, buf); + } + descs.push_back(buf2); + } + best.count = 0; + for (unsigned int i=0; itype, "ATI"); + coprocs.coprocs.push_back(ccp); +} + +void fake_ati(COPROCS& coprocs, int count) { + COPROC_ATI* cc = new COPROC_ATI; + strcpy(cc->type, "ATI"); + strcpy(cc->version, "1.4.3"); + cc->count = count; + cc->attribs.numberOfSIMD = 32; + cc->attribs.wavefrontSize = 32; + cc->attribs.engineClock = 50; + for (int i=0; idevice_nums[i] = i; + } + coprocs.coprocs.push_back(cc); +}