cuda.cu -> fix boinc_msg_prefix()

common.mk, Makefile for Linux
readme.txt for troubleshooting 

svn path=/trunk/boinc/; revision=21798
This commit is contained in:
Tuan Le 2010-06-23 23:20:25 +00:00
parent 3405baeff3
commit ea59d1b326
6 changed files with 564 additions and 36 deletions

16
samples/nvcuda/Makefile Normal file
View File

@ -0,0 +1,16 @@
# Tuan Le
# University of California, Berkeley
# Berkeley Space Sciences Lab
# tuanle86@berkeley.ed
# Add source files here
EXECUTABLE := example_app_nvcuda
# Cuda source files (compiled with cudacc)
CUFILES := cuda.cu
# C/C++ source files (compiled with gcc / c++)
CCFILES := \
################################################################################
# Rules and targets
include common.mk

464
samples/nvcuda/common.mk Normal file
View File

@ -0,0 +1,464 @@
# Tuan Le
# University of California, Berkeley
# Berkeley Space Sciences Lab
# tuanle86@berkeley.edu
.SUFFIXES : .cu .cu_dbg.o .c_dbg.o .cpp_dbg.o .cu_rel.o .c_rel.o .cpp_rel.o .cubin .ptx
# Add new SM Versions here as devices with new Compute Capability are released
SM_VERSIONS := 10 11 12 13 20
CUDA_INSTALL_PATH ?= /usr/local/cuda
ifdef cuda-install
CUDA_INSTALL_PATH := $(cuda-install)
endif
# detect OS
OSUPPER = $(shell uname -s 2>/dev/null | tr [:lower:] [:upper:])
OSLOWER = $(shell uname -s 2>/dev/null | tr [:upper:] [:lower:])
# 'linux' is output for Linux system, 'darwin' for OS X
DARWIN = $(strip $(findstring DARWIN, $(OSUPPER)))
ifneq ($(DARWIN),)
SNOWLEOPARD = $(strip $(findstring 10.6, $(shell egrep "<string>10\.6" /System/Library/CoreServices/SystemVersion.plist)))
endif
# detect 32-bit or 64-bit platform
HP_64 = $(shell uname -m | grep 64)
OSARCH= $(shell uname -m)
# Basic directory setup for SDK
# (override directories only if they are not already defined)
SRCDIR ?=
ROOTDIR ?= ..
# ROOTDIR/bin is the directory where executable file will be put in
ROOTBINDIR ?= ../../samples/nvcuda
BINDIR ?= $(ROOTBINDIR)/$(OSLOWER)
ROOTOBJDIR ?= obj
# BOINC directory
BOINC_DIR = ../..
BOINC_API_DIR = $(BOINC_DIR)/api
BOINC_LIB_DIR = $(BOINC_DIR)/lib
# Directory for cutil_i386 as well as other utils while linking (-lcutil_i386)
LIBDIR := /home/tuanle/NVIDIA_GPU_Computing_SDK/C/lib
COMMONDIR := /home/tuanle/NVIDIA_GPU_Computing_SDK/C/common
SHAREDDIR := /home/tuanle/NVIDIA_GPU_Computing_SDK/shared
# Compilers
NVCC := $(CUDA_INSTALL_PATH)/bin/nvcc
CXX := g++-4.3
CC := gcc-4.3
LINK := g++-4.3 -fPIC
# Includes
INCLUDES += -I. -I$(CUDA_INSTALL_PATH)/include -I$(COMMONDIR)/inc -I$(SHAREDDIR)/inc -I $(BOINC_API_DIR) -I $(BOINC_LIB_DIR) -I $(BOINC_DIR)
# Warning flags
CXXWARN_FLAGS := \
-W -Wall \
-Wimplicit \
-Wswitch \
-Wformat \
-Wchar-subscripts \
-Wparentheses \
-Wmultichar \
-Wtrigraphs \
-Wpointer-arith \
-Wcast-align \
-Wreturn-type \
-Wno-unused-function \
$(SPACE)
CWARN_FLAGS := $(CXXWARN_FLAGS) \
-Wstrict-prototypes \
-Wmissing-prototypes \
-Wmissing-declarations \
-Wnested-externs \
-Wmain \
# architecture flag for nvcc and gcc compilers build
CUBIN_ARCH_FLAG :=
CXX_ARCH_FLAGS :=
NVCCFLAGS :=
LIB_ARCH := $(OSARCH)
# Determining the necessary Cross-Compilation Flags
# 32-bit OS, but we target 64-bit cross compilation
ifeq ($(x86_64),1)
NVCCFLAGS += -m64
LIB_ARCH = x86_64
CUDPPLIB_SUFFIX = x86_64
ifneq ($(DARWIN),)
CXX_ARCH_FLAGS += -arch x86_64
else
CXX_ARCH_FLAGS += -m64
endif
else
# 64-bit OS, and we target 32-bit cross compilation
ifeq ($(i386),1)
NVCCFLAGS += -m32
LIB_ARCH = i386
CUDPPLIB_SUFFIX = i386
ifneq ($(DARWIN),)
CXX_ARCH_FLAGS += -arch i386
else
CXX_ARCH_FLAGS += -m32
endif
else
ifneq ($(SNOWLEOPARD),)
NVCCFLAGS += -m32
CXX_ARCH_FLAGS += -m32 -arch i386
LIB_ARCH = i386
CUDPPLIB_SUFFIX = i386
else
ifeq "$(strip $(HP_64))" ""
LIB_ARCH = i386
CUDPPLIB_SUFFIX = i386
else
LIB_ARCH = x86_64
CUDPPLIB_SUFFIX = x86_64
endif
endif
endif
endif
# Compiler-specific flags (by default, we always use sm_10 and sm_20), unless we use the SMVERSION template
GENCODE_SM10 := -gencode=arch=compute_10,code=\"sm_10,compute_10\"
GENCODE_SM20 := -gencode=arch=compute_20,code=\"sm_20,compute_20\"
CXXFLAGS += $(CXXWARN_FLAGS) $(CXX_ARCH_FLAGS)
CFLAGS += $(CWARN_FLAGS) $(CXX_ARCH_FLAGS)
LINKFLAGS +=
LINK += $(LINKFLAGS) $(CXX_ARCH_FLAGS)
# This option for Mac allows CUDA applications to work without requiring to set DYLD_LIBRARY_PATH
ifneq ($(DARWIN),)
LINK += -Xlinker -rpath $(CUDA_INSTALL_PATH)/lib
endif
# Common flags
COMMONFLAGS += $(INCLUDES) -DUNIX
# Debug/release configuration
ifeq ($(dbg),1)
COMMONFLAGS += -g
NVCCFLAGS += -D_DEBUG
CXXFLAGS += -D_DEBUG
CFLAGS += -D_DEBUG
BINSUBDIR := debug
LIBSUFFIX := D
else
COMMONFLAGS += -O2
BINSUBDIR := release
LIBSUFFIX :=
NVCCFLAGS += --compiler-options -fno-strict-aliasing
CXXFLAGS += -fno-strict-aliasing
CFLAGS += -fno-strict-aliasing
endif
# architecture flag for cubin build
CUBIN_ARCH_FLAG :=
# OpenGL is used or not (if it is used, then it is necessary to include GLEW)
ifeq ($(USEGLLIB),1)
ifneq ($(DARWIN),)
OPENGLLIB := -L/System/Library/Frameworks/OpenGL.framework/Libraries
OPENGLLIB += -lGL -lGLU $(COMMONDIR)/lib/$(OSLOWER)/libGLEW.a
else
# this case for linux platforms
OPENGLLIB := -lGL -lGLU -lX11 -lXi -lXmu
# check if x86_64 flag has been set, otherwise, check HP_64 is i386/x86_64
ifeq ($(x86_64),1)
OPENGLLIB += -lGLEW_x86_64 -L/usr/X11R6/lib64
else
ifeq ($(i386),)
ifeq "$(strip $(HP_64))" ""
OPENGLLIB += -lGLEW -L/usr/X11R6/lib
else
OPENGLLIB += -lGLEW_x86_64 -L/usr/X11R6/lib64
endif
endif
endif
# check if i386 flag has been set, otehrwise check HP_64 is i386/x86_64
ifeq ($(i386),1)
OPENGLLIB += -lGLEW -L/usr/X11R6/lib
else
ifeq ($(x86_64),)
ifeq "$(strip $(HP_64))" ""
OPENGLLIB += -lGLEW -L/usr/X11R6/lib
else
OPENGLLIB += -lGLEW_x86_64 -L/usr/X11R6/lib64
endif
endif
endif
endif
endif
ifeq ($(USEGLUT),1)
ifneq ($(DARWIN),)
OPENGLLIB += -framework GLUT
else
ifeq ($(x86_64),1)
OPENGLLIB += -lglut -L/usr/lib64
endif
ifeq ($(i386),1)
OPENGLLIB += -lglut -L/usr/lib
endif
ifeq ($(x86_64),)
ifeq ($(i386),)
OPENGLLIB += -lglut
endif
endif
endif
endif
ifeq ($(USEPARAMGL),1)
PARAMGLLIB := -lparamgl_$(LIB_ARCH)$(LIBSUFFIX)
endif
ifeq ($(USERENDERCHECKGL),1)
RENDERCHECKGLLIB := -lrendercheckgl_$(LIB_ARCH)$(LIBSUFFIX)
endif
ifeq ($(USECUDPP), 1)
CUDPPLIB := -lcudpp_$(CUDPPLIB_SUFFIX)$(LIBSUFFIX)
ifeq ($(emu), 1)
CUDPPLIB := $(CUDPPLIB)_emu
endif
endif
ifeq ($(USENVCUVID), 1)
ifneq ($(DARWIN),)
NVCUVIDLIB := -L/home/tuanle/NVIDIA_GPU_Computing_SDK/C/common/lib/darwin -lnvcuvid
endif
endif
# Libs
ifneq ($(DARWIN),)
LIB := -L$(CUDA_INSTALL_PATH)/lib -L$(LIBDIR) -L$(COMMONDIR)/lib/$(OSLOWER) -L$(SHAREDDIR)/lib $(NVCUVIDLIB)
else
ifeq "$(strip $(HP_64))" ""
ifeq ($(x86_64),1)
LIB := -L$(CUDA_INSTALL_PATH)/lib64 -L$(LIBDIR) -L$(COMMONDIR)/lib/$(OSLOWER) -L$(SHAREDDIR)/lib
else
LIB := -L$(CUDA_INSTALL_PATH)/lib -L$(LIBDIR) -L$(COMMONDIR)/lib/$(OSLOWER) -L$(SHAREDDIR)/lib
endif
else
ifeq ($(i386),1)
LIB := -L$(CUDA_INSTALL_PATH)/lib -L$(LIBDIR) -L$(COMMONDIR)/lib/$(OSLOWER) -L$(SHAREDDIR)/lib
else
LIB := -L$(CUDA_INSTALL_PATH)/lib64 -L$(LIBDIR) -L$(COMMONDIR)/lib/$(OSLOWER) -L$(SHAREDDIR)/lib
endif
endif
endif
# If dynamically linking to CUDA and CUDART, we exclude the libraries from the LIB
ifeq ($(USECUDADYNLIB),1)
LIB += ${OPENGLLIB} $(PARAMGLLIB) $(RENDERCHECKGLLIB) $(CUDPPLIB) ${LIB} -ldl -rdynamic
else
# static linking, we will statically link against CUDA and CUDART
ifeq ($(USEDRVAPI),1)
LIB += -lcuda ${OPENGLLIB} $(PARAMGLLIB) $(RENDERCHECKGLLIB) $(CUDPPLIB) ${LIB}
else
ifeq ($(emu),1)
LIB += -lcudartemu
else
LIB += -lcudart
endif
LIB += ${OPENGLLIB} $(PARAMGLLIB) $(RENDERCHECKGLLIB) $(CUDPPLIB) ${LIB}
endif
endif
ifeq ($(USECUFFT),1)
ifeq ($(emu),1)
LIB += -lcufftemu
else
LIB += -lcufft
endif
endif
ifeq ($(USECUBLAS),1)
ifeq ($(emu),1)
LIB += -lcublasemu
else
LIB += -lcublas
endif
endif
# Lib/exe configuration
ifneq ($(STATIC_LIB),)
TARGETDIR := $(LIBDIR)
TARGET := $(subst .a,_$(LIB_ARCH)$(LIBSUFFIX).a,$(LIBDIR)/$(STATIC_LIB))
LINKLINE = ar rucv $(TARGET) $(OBJS)
else
ifneq ($(OMIT_CUTIL_LIB),1)
#TUAN add -lboinc and -lboinc_api below
LIB += -lcutil_$(LIB_ARCH)$(LIBSUFFIX) -lshrutil_$(LIB_ARCH)$(LIBSUFFIX) -lboinc_api -L$(BOINC_API_DIR) -lboinc -L$(BOINC_LIB_DIR)
endif
# Device emulation configuration
ifeq ($(emu), 1)
NVCCFLAGS += -deviceemu
CUDACCFLAGS +=
BINSUBDIR := emu$(BINSUBDIR)
# consistency, makes developing easier
CXXFLAGS += -D__DEVICE_EMULATION__
CFLAGS += -D__DEVICE_EMULATION__
endif
TARGETDIR := $(BINDIR)/$(BINSUBDIR)
TARGET := $(TARGETDIR)/$(EXECUTABLE)
LINKLINE = $(LINK) -o $(TARGET) $(OBJS) $(LIB)
endif
# check if verbose
ifeq ($(verbose), 1)
VERBOSE :=
else
VERBOSE := @
endif
################################################################################
# Check for input flags and set compiler flags appropriately
################################################################################
ifeq ($(fastmath), 1)
NVCCFLAGS += -use_fast_math
endif
ifeq ($(keep), 1)
NVCCFLAGS += -keep
NVCC_KEEP_CLEAN := *.i* *.cubin *.cu.c *.cudafe* *.fatbin.c *.ptx
endif
ifdef maxregisters
NVCCFLAGS += -maxrregcount $(maxregisters)
endif
# Add cudacc flags
NVCCFLAGS += $(CUDACCFLAGS)
# Add common flags
NVCCFLAGS += $(COMMONFLAGS)
CXXFLAGS += $(COMMONFLAGS)
CFLAGS += $(COMMONFLAGS)
# use gcc-4.3
NVCCFLAGS+=--compiler-bindir=${HOME}/NVIDIA_GPU_Computing_SDK/C/mygcc
ifeq ($(nvcc_warn_verbose),1)
NVCCFLAGS += $(addprefix --compiler-options ,$(CXXWARN_FLAGS))
NVCCFLAGS += --compiler-options -fno-strict-aliasing
endif
################################################################################
# Set up object files
################################################################################
OBJDIR := $(ROOTOBJDIR)/$(LIB_ARCH)/$(BINSUBDIR)
OBJS += $(patsubst %.cpp,$(OBJDIR)/%.cpp.o,$(notdir $(CCFILES)))
OBJS += $(patsubst %.c,$(OBJDIR)/%.c.o,$(notdir $(CFILES)))
OBJS += $(patsubst %.cu,$(OBJDIR)/%.cu.o,$(notdir $(CUFILES)))
################################################################################
# Set up cubin output files
################################################################################
CUBINDIR := $(SRCDIR)data
CUBINS += $(patsubst %.cu,$(CUBINDIR)/%.cubin,$(notdir $(CUBINFILES)))
################################################################################
# Set up PTX output files
################################################################################
PTXDIR := $(SRCDIR)data
PTXBINS += $(patsubst %.cu,$(PTXDIR)/%.ptx,$(notdir $(PTXFILES)))
################################################################################
# Rules
################################################################################
$(OBJDIR)/%.c.o : $(SRCDIR)%.c $(C_DEPS)
$(VERBOSE)$(CC) $(CFLAGS) -o $@ -c $< -pthread \
$(BOINC_API_DIR)/libboinc_api.a \
$(BOINC_LIB_DIR)/libboinc.a
$(OBJDIR)/%.cpp.o : $(SRCDIR)%.cpp $(C_DEPS)
$(VERBOSE)$(CXX) $(CXXFLAGS) -o $@ -c $< -pthread \
$(BOINC_API_DIR)/libboinc_api.a \
$(BOINC_LIB_DIR)/libboinc.a
# Default arch includes gencode for sm_10, sm_20, and other archs from GENCODE_ARCH declared in the makefile
$(OBJDIR)/%.cu.o : $(SRCDIR)%.cu $(CU_DEPS)
$(VERBOSE)$(NVCC) $(GENCODE_SM10) $(GENCODE_ARCH) $(GENCODE_SM20) $(NVCCFLAGS) $(SMVERSIONFLAGS) -o $@ -c $<
# Default arch includes gencode for sm_10, sm_20, and other archs from GENCODE_ARCH declared in the makefile
$(CUBINDIR)/%.cubin : $(SRCDIR)%.cu cubindirectory
$(VERBOSE)$(NVCC) $(GENCODE_SM10) $(GENCODE_ARCH) $(GENCODE_SM20) $(CUBIN_ARCH_FLAG) $(NVCCFLAGS) $(SMVERSIONFLAGS) -o $@ -cubin $<
$(PTXDIR)/%.ptx : $(SRCDIR)%.cu ptxdirectory
$(VERBOSE)$(NVCC) $(CUBIN_ARCH_FLAG) $(NVCCFLAGS) $(SMVERSIONFLAGS) -o $@ -ptx $<
#
# The following definition is a template that gets instantiated for each SM
# version (sm_10, sm_13, etc.) stored in SMVERSIONS. It does 2 things:
# 1. It adds to OBJS a .cu_sm_XX.o for each .cu file it finds in CUFILES_sm_XX.
# 2. It generates a rule for building .cu_sm_XX.o files from the corresponding
# .cu file.
#
# The intended use for this is to allow Makefiles that use common.mk to compile
# files to different Compute Capability targets (aka SM arch version). To do
# so, in the Makefile, list files for each SM arch separately, like so:
# This will be used over the default rule abov
#
# CUFILES_sm_10 := mycudakernel_sm10.cu app.cu
# CUFILES_sm_12 := anothercudakernel_sm12.cu
#
define SMVERSION_template
#OBJS += $(patsubst %.cu,$(OBJDIR)/%.cu_$(1).o,$(notdir $(CUFILES_$(1))))
OBJS += $(patsubst %.cu,$(OBJDIR)/%.cu_$(1).o,$(notdir $(CUFILES_sm_$(1))))
$(OBJDIR)/%.cu_$(1).o : $(SRCDIR)%.cu $(CU_DEPS)
# $(VERBOSE)$(NVCC) -o $$@ -c $$< $(NVCCFLAGS) $(1)
$(VERBOSE)$(NVCC) -gencode=arch=compute_$(1),code=\"sm_$(1),compute_$(1)\" $(GENCODE_SM20) -o $$@ -c $$< $(NVCCFLAGS)
endef
# This line invokes the above template for each arch version stored in
# SM_VERSIONS. The call funtion invokes the template, and the eval
# function interprets it as make commands.
$(foreach smver,$(SM_VERSIONS),$(eval $(call SMVERSION_template,$(smver))))
$(TARGET): makedirectories $(OBJS) $(CUBINS) $(PTXBINS) Makefile
$(VERBOSE)$(LINKLINE)
cubindirectory:
$(VERBOSE)mkdir -p $(CUBINDIR)
ptxdirectory:
$(VERBOSE)mkdir -p $(PTXDIR)
makedirectories:
$(VERBOSE)mkdir -p $(LIBDIR)
$(VERBOSE)mkdir -p $(OBJDIR)
$(VERBOSE)mkdir -p $(TARGETDIR)
tidy :
$(VERBOSE)find . | egrep "#" | xargs rm -f
$(VERBOSE)find . | egrep "\~" | xargs rm -f
clean : tidy
$(VERBOSE)rm -f $(OBJS)
$(VERBOSE)rm -f $(CUBINS)
$(VERBOSE)rm -f $(PTXBINS)
$(VERBOSE)rm -f $(TARGET)
$(VERBOSE)rm -f $(NVCC_KEEP_CLEAN)
$(VERBOSE)rm -f $(ROOTBINDIR)/$(OSLOWER)/$(BINSUBDIR)/*.ppm
$(VERBOSE)rm -f $(ROOTBINDIR)/$(OSLOWER)/$(BINSUBDIR)/*.pgm
$(VERBOSE)rm -f $(ROOTBINDIR)/$(OSLOWER)/$(BINSUBDIR)/*.bin
$(VERBOSE)rm -f $(ROOTBINDIR)/$(OSLOWER)/$(BINSUBDIR)/*.bmp
clobber : clean
$(VERBOSE)rm -rf $(ROOTOBJDIR)

View File

@ -1,22 +1,7 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* cuda.cu
* Copyright (C) 2010 Tuan Le
* Tuan Le
* University of California, Berkeley
* Berkeley Space Sciences Lab
* tuanle86@berkeley.edu
*/
@ -45,6 +30,17 @@
#include "mfile.h"
#include "graphics2.h"
struct UC_SHMEM {
double update_time;
double fraction_done;
double cpu_time;
BOINC_STATUS status;
int countdown;
// graphics app sets this to 5 repeatedly,
// main program decrements it once/sec.
// If it's zero, don't bother updating shmem
};
#ifdef APP_GRAPHICS
#include "uc2.h"
UC_SHMEM* shmem;
@ -59,7 +55,7 @@ using std::string;
// execute the kernel NUM_ITERATIONS times
#define NUM_ITERATIONS 19
#define NUM_ITERATIONS 51
bool run_slow = false;
bool early_exit = false;
@ -143,13 +139,13 @@ int main(int argc, char** argv)
{
int i, retval, lastInversion=0, checkpointExists=0, dimension=0;
double fd;
char input_path[512], output_path[512], chkpt_path[512];
char input_path[512], output_path[512], chkpt_path[512], buf[256];
REAL* h_idata;
unsigned int mem_size;
MFILE out;
FILE* state, *infile;
//generateRandomInputFile(MATRIX_SIZE); //call this if you don't want to construct the input file manually
generateRandomInputFile(MATRIX_SIZE); //call this if you don't want to construct the input file manually
for (i=0; i<argc; i++) {
if (!strcmp(argv[i], "-early_exit")) early_exit = true;
@ -164,7 +160,7 @@ int main(int argc, char** argv)
retval = boinc_init();
if (retval) {
fprintf(stderr, "%s boinc_init returned %d\n",
boinc_msg_prefix(), retval
boinc_msg_prefix(buf), retval
);
exit(retval);
}
@ -176,7 +172,7 @@ int main(int argc, char** argv)
if (!infile) {
fprintf(stderr,
"%s Couldn't find input file, resolved name %s.\n",
boinc_msg_prefix(), input_path
boinc_msg_prefix(buf), input_path
);
getchar();
exit(-1);
@ -210,10 +206,10 @@ int main(int argc, char** argv)
if (retval) {
fprintf(stderr, "%s APP: matrix_inversion output open failed:\n",
boinc_msg_prefix()
boinc_msg_prefix(buf)
);
fprintf(stderr, "%s resolved name %s, retval %d\n",
boinc_msg_prefix(), output_path, retval
boinc_msg_prefix(buf), output_path, retval
);
perror("open");
exit(1);
@ -222,10 +218,10 @@ int main(int argc, char** argv)
#ifdef APP_GRAPHICS
// create shared mem segment for graphics, and arrange to update it
//
shmem = (UC_SHMEM*)boinc_graphics_make_shmem("uppercase", sizeof(UC_SHMEM));
shmem = (UC_SHMEM*)boinc_graphics_make_shmem("matrix_inversion", sizeof(UC_SHMEM));
if (!shmem) {
fprintf(stderr, "%s failed to create shared mem segment\n",
boinc_msg_prefix()
boinc_msg_prefix(buf)
);
}
update_shmem();
@ -276,7 +272,7 @@ int main(int argc, char** argv)
retval = do_checkpoint(out, i, h_idata, dimension);
if (retval) {
fprintf(stderr, "%s APP: matrix_inversion checkpoint failed %d\n",
boinc_msg_prefix(), retval
boinc_msg_prefix(buf), retval
);
exit(retval);
}
@ -296,7 +292,7 @@ int main(int argc, char** argv)
retval = out.flush(); //force the output file to be closed.
if (retval) {
fprintf(stderr, "%s APP: matrix_inversion flush failed %d\n",
boinc_msg_prefix(), retval
boinc_msg_prefix(buf), retval
);
exit(1);
}
@ -315,7 +311,7 @@ int main(int argc, char** argv)
retval = do_checkpoint(out, NUM_ITERATIONS, h_idata, dimension);
if (retval) {
fprintf(stderr, "%s APP: maxtrix_inversion checkpoint failed %d\n",
boinc_msg_prefix(), retval
boinc_msg_prefix(buf), retval
);
exit(1);
}
@ -442,4 +438,4 @@ void printToFile(MFILE *out, float *h_odata, int dimension) {
}
--num_elements;
}
}
}

View File

@ -1,3 +1,9 @@
/*
* Tuan Le
* University of California, Berkeley
* Berkeley Space Sciences Lab
* tuanle86@berkeley.edu
*/
#ifdef DOUBLE_PRECISION
#define REAL double
@ -9,8 +15,6 @@
#define jREALArray jfloatArray
#endif
inline void __cudaSafeCall( int err, const char *file, const int line )
{
do {

View File

@ -1,3 +1,9 @@
/*
* Tuan Le
* University of California, Berkeley
* Berkeley Space Sciences Lab
* tuanle86@berkeley.edu
*/
// When VERIFY is defined, the sum of squared errors is calculated between the
// identity matrix and the product A * incerse(A). For debugging...
@ -21,7 +27,7 @@ void mathdispAI(const REAL *mat, int lda, int MAT_SIZE_h) {
} // mathdisp2
void mathdispAId(const REAL * AId, int lda, int n) {
REAL * AI = new REAL[n*lda*2];
REAL * AI = (REAL *)malloc(sizeof(REAL)*(n*lda*2));
cudaMemcpy(AI,AId,sizeof(REAL)*n*lda*2,cudaMemcpyDeviceToHost);
mathdispAI(AI, lda, n);
delete [] AI;
@ -90,7 +96,7 @@ fprintf(stderr,"starting inversion n = %d ", n);
int lda = ((n+15)&~15|16);
//lda=n;
REAL * AI = new REAL[n*lda*2];
REAL * AI = (REAL *)malloc(sizeof(REAL)*(n*lda*2));
memset(AI,0,sizeof(REAL)*n*lda*2);
for (int i = 0; i < n; i++) {
memcpy(&AI[lda*i*2], &A[n*i], sizeof(REAL)*n);
@ -134,4 +140,4 @@ fprintf(stderr,"starting inversion n = %d ", n);
}
free(AI);
fprintf(stderr," done!\n");
} // invert
} // invert

42
samples/nvcuda/readme.txt Normal file
View File

@ -0,0 +1,42 @@
Tuan Le
University of California, Berkeley
Berkeley Space Sciences Lab
tuanle86@berkeley.edu
----------------------- Linux Makefile ----------------------
Makefile needs to be edited on your machine before running. Please follow these steps:
1) Open "boinc/samples/nvcuda/common.mk" with gedit
2) Ctrl+f and search for "tuanle". You will find the following:
LIBDIR := /home/tuanle/NVIDIA_GPU_Computing_SDK/C/lib
COMMONDIR := /home/tuanle/NVIDIA_GPU_Computing_SDK/C/common
SHAREDDIR := /home/tuanle/NVIDIA_GPU_Computing_SDK/shared
...
...
NVCUVIDLIB := -L/home/tuanle/NVIDIA_GPU_Computing_SDK/C/common/lib/darwin -lnvcuvid
3) Replace these above paths by appropriate paths on your machine.
4) Done!
----------------------- Linux Troubleshooting ----------------
Error: ./example_app_nvcuda: error while loading shared libraries: libcudart.so.3: cannot open
shared object file: No such file or directory
Read: http://developer.download.nvidia.com/compute/cuda/3_0/docs/GettingStartedLinux.pdf (on top of page 6)
Solution: export PATH=/usr/local/cuda/bin:$PATH
export LD_LIBRARY_PATH=/usr/local/cuda/lib:$LD_LIBRARY_PATH
----------------------- Run Executable file ------------------
"make" command will create an executable file in "boinc/samples/nvcuda/linux/release/".
If your machine doesn't have CUDA-enabled GPU, then the executable file for this sample app in
the release directory will stop execution after the statement "Start at inversion #1" is printed out on
the terminal. In this case, it's best to run in emurelease mode. To generate an executable file in
emurelease mode, type "make emu=1". The executable file is then created in "boinc/samples/nvcuda/linux/emurelease/".