cuda.cu -> fix boinc_msg_prefix()

common.mk, Makefile for Linux readme.txt for troubleshooting svn path=/trunk/boinc/; revision=21798
2010-06-23 23:20:25 +00:00 · 2010-06-23 23:20:25 +00:00 · ea59d1b326
parent 3405baeff3
commit ea59d1b326
6 changed files with 564 additions and 36 deletions
--- a/samples/nvcuda/Makefile
+++ b/samples/nvcuda/Makefile
@ -0,0 +1,16 @@
+# Tuan Le
+# University of California, Berkeley
+# Berkeley Space Sciences Lab
+# tuanle86@berkeley.ed
+
+# Add source files here
+EXECUTABLE	:= example_app_nvcuda
+# Cuda source files (compiled with cudacc)
+CUFILES		:= cuda.cu
+# C/C++ source files (compiled with gcc / c++)
+CCFILES		:= \
+
+################################################################################
+# Rules and targets
+
+include common.mk
--- a/samples/nvcuda/common.mk
+++ b/samples/nvcuda/common.mk
@ -0,0 +1,464 @@
+# Tuan Le
+# University of California, Berkeley
+# Berkeley Space Sciences Lab
+# tuanle86@berkeley.edu
+
+.SUFFIXES : .cu .cu_dbg.o .c_dbg.o .cpp_dbg.o .cu_rel.o .c_rel.o .cpp_rel.o .cubin .ptx
+
+# Add new SM Versions here as devices with new Compute Capability are released
+SM_VERSIONS   := 10 11 12 13 20
+
+CUDA_INSTALL_PATH ?= /usr/local/cuda
+
+ifdef cuda-install
+	CUDA_INSTALL_PATH := $(cuda-install)
+endif
+
+# detect OS
+OSUPPER = $(shell uname -s 2>/dev/null | tr [:lower:] [:upper:])
+OSLOWER = $(shell uname -s 2>/dev/null | tr [:upper:] [:lower:])
+
+# 'linux' is output for Linux system, 'darwin' for OS X
+DARWIN = $(strip $(findstring DARWIN, $(OSUPPER)))
+ifneq ($(DARWIN),)
+   SNOWLEOPARD = $(strip $(findstring 10.6, $(shell egrep "<string>10\.6" /System/Library/CoreServices/SystemVersion.plist)))
+endif
+
+# detect 32-bit or 64-bit platform
+HP_64 = $(shell uname -m | grep 64)
+OSARCH= $(shell uname -m)
+
+# Basic directory setup for SDK
+# (override directories only if they are not already defined)
+SRCDIR     ?= 
+ROOTDIR    ?= ..
+
+# ROOTDIR/bin is the directory where executable file will be put in
+
+ROOTBINDIR ?= ../../samples/nvcuda
+BINDIR     ?= $(ROOTBINDIR)/$(OSLOWER)
+ROOTOBJDIR ?= obj
+
+# BOINC directory
+BOINC_DIR = ../..
+BOINC_API_DIR = $(BOINC_DIR)/api
+BOINC_LIB_DIR = $(BOINC_DIR)/lib
+
+# Directory for cutil_i386 as well as other utils while linking (-lcutil_i386)
+LIBDIR     := /home/tuanle/NVIDIA_GPU_Computing_SDK/C/lib             
+COMMONDIR  := /home/tuanle/NVIDIA_GPU_Computing_SDK/C/common
+SHAREDDIR  := /home/tuanle/NVIDIA_GPU_Computing_SDK/shared
+
+# Compilers
+NVCC       := $(CUDA_INSTALL_PATH)/bin/nvcc 
+CXX        := g++-4.3
+CC         := gcc-4.3
+LINK       := g++-4.3 -fPIC
+
+# Includes
+INCLUDES  += -I. -I$(CUDA_INSTALL_PATH)/include -I$(COMMONDIR)/inc -I$(SHAREDDIR)/inc -I $(BOINC_API_DIR) -I $(BOINC_LIB_DIR) -I $(BOINC_DIR)
+
+# Warning flags
+CXXWARN_FLAGS := \
+	-W -Wall \
+	-Wimplicit \
+	-Wswitch \
+	-Wformat \
+	-Wchar-subscripts \
+	-Wparentheses \
+	-Wmultichar \
+	-Wtrigraphs \
+	-Wpointer-arith \
+	-Wcast-align \
+	-Wreturn-type \
+	-Wno-unused-function \
+	$(SPACE)
+
+CWARN_FLAGS := $(CXXWARN_FLAGS) \
+	-Wstrict-prototypes \
+	-Wmissing-prototypes \
+	-Wmissing-declarations \
+	-Wnested-externs \
+	-Wmain \
+
+# architecture flag for nvcc and gcc compilers build
+CUBIN_ARCH_FLAG :=
+CXX_ARCH_FLAGS  :=
+NVCCFLAGS       :=
+LIB_ARCH        := $(OSARCH)
+
+# Determining the necessary Cross-Compilation Flags
+# 32-bit OS, but we target 64-bit cross compilation
+ifeq ($(x86_64),1) 
+    NVCCFLAGS       += -m64
+    LIB_ARCH         = x86_64
+    CUDPPLIB_SUFFIX  = x86_64
+
+    ifneq ($(DARWIN),)
+         CXX_ARCH_FLAGS += -arch x86_64
+    else
+         CXX_ARCH_FLAGS += -m64
+    endif
+else 
+# 64-bit OS, and we target 32-bit cross compilation
+    ifeq ($(i386),1)
+        NVCCFLAGS       += -m32
+        LIB_ARCH         = i386
+        CUDPPLIB_SUFFIX  = i386
+
+        ifneq ($(DARWIN),)
+             CXX_ARCH_FLAGS += -arch i386
+        else
+             CXX_ARCH_FLAGS += -m32
+        endif
+    else 
+        ifneq ($(SNOWLEOPARD),)
+             NVCCFLAGS += -m32
+             CXX_ARCH_FLAGS += -m32 -arch i386
+             LIB_ARCH        = i386
+             CUDPPLIB_SUFFIX = i386
+        else
+             ifeq "$(strip $(HP_64))" ""
+                LIB_ARCH        = i386
+                CUDPPLIB_SUFFIX = i386
+             else
+                LIB_ARCH        = x86_64
+                CUDPPLIB_SUFFIX = x86_64
+             endif
+        endif
+    endif
+endif
+
+# Compiler-specific flags (by default, we always use sm_10 and sm_20), unless we use the SMVERSION template
+GENCODE_SM10 := -gencode=arch=compute_10,code=\"sm_10,compute_10\"
+GENCODE_SM20 := -gencode=arch=compute_20,code=\"sm_20,compute_20\"
+
+CXXFLAGS  += $(CXXWARN_FLAGS) $(CXX_ARCH_FLAGS)
+CFLAGS    += $(CWARN_FLAGS) $(CXX_ARCH_FLAGS)
+LINKFLAGS +=
+LINK      += $(LINKFLAGS) $(CXX_ARCH_FLAGS)
+
+# This option for Mac allows CUDA applications to work without requiring to set DYLD_LIBRARY_PATH
+ifneq ($(DARWIN),)
+   LINK += -Xlinker -rpath $(CUDA_INSTALL_PATH)/lib
+endif
+
+# Common flags
+COMMONFLAGS += $(INCLUDES) -DUNIX
+
+# Debug/release configuration
+ifeq ($(dbg),1)
+	COMMONFLAGS += -g
+	NVCCFLAGS   += -D_DEBUG
+	CXXFLAGS    += -D_DEBUG
+	CFLAGS      += -D_DEBUG
+	BINSUBDIR   := debug
+	LIBSUFFIX   := D
+else 
+	COMMONFLAGS += -O2 
+	BINSUBDIR   := release
+	LIBSUFFIX   := 
+	NVCCFLAGS   += --compiler-options -fno-strict-aliasing
+	CXXFLAGS    += -fno-strict-aliasing
+	CFLAGS      += -fno-strict-aliasing
+endif
+
+# architecture flag for cubin build
+CUBIN_ARCH_FLAG :=
+
+# OpenGL is used or not (if it is used, then it is necessary to include GLEW)
+ifeq ($(USEGLLIB),1)
+    ifneq ($(DARWIN),)
+        OPENGLLIB := -L/System/Library/Frameworks/OpenGL.framework/Libraries 
+        OPENGLLIB += -lGL -lGLU $(COMMONDIR)/lib/$(OSLOWER)/libGLEW.a
+    else
+# this case for linux platforms
+	OPENGLLIB := -lGL -lGLU -lX11 -lXi -lXmu
+# check if x86_64 flag has been set, otherwise, check HP_64 is i386/x86_64
+        ifeq ($(x86_64),1) 
+	       OPENGLLIB += -lGLEW_x86_64 -L/usr/X11R6/lib64
+        else
+             ifeq ($(i386),)
+                 ifeq "$(strip $(HP_64))" ""
+	             OPENGLLIB += -lGLEW -L/usr/X11R6/lib
+                 else
+	             OPENGLLIB += -lGLEW_x86_64 -L/usr/X11R6/lib64
+                 endif
+             endif
+        endif
+# check if i386 flag has been set, otehrwise check HP_64 is i386/x86_64
+        ifeq ($(i386),1)
+	       OPENGLLIB += -lGLEW -L/usr/X11R6/lib
+        else
+             ifeq ($(x86_64),)
+                 ifeq "$(strip $(HP_64))" ""
+	             OPENGLLIB += -lGLEW -L/usr/X11R6/lib
+                 else
+	             OPENGLLIB += -lGLEW_x86_64 -L/usr/X11R6/lib64
+                 endif
+             endif
+        endif
+    endif
+endif
+
+ifeq ($(USEGLUT),1)
+    ifneq ($(DARWIN),)
+	OPENGLLIB += -framework GLUT
+    else
+        ifeq ($(x86_64),1)
+	     OPENGLLIB += -lglut -L/usr/lib64 
+        endif
+        ifeq ($(i386),1)
+	     OPENGLLIB += -lglut -L/usr/lib 
+        endif
+
+        ifeq ($(x86_64),)
+            ifeq ($(i386),)  
+	        OPENGLLIB += -lglut
+            endif
+        endif
+    endif
+endif
+
+ifeq ($(USEPARAMGL),1)
+	PARAMGLLIB := -lparamgl_$(LIB_ARCH)$(LIBSUFFIX)
+endif
+
+ifeq ($(USERENDERCHECKGL),1)
+	RENDERCHECKGLLIB := -lrendercheckgl_$(LIB_ARCH)$(LIBSUFFIX)
+endif
+
+ifeq ($(USECUDPP), 1)
+    CUDPPLIB := -lcudpp_$(CUDPPLIB_SUFFIX)$(LIBSUFFIX)
+
+    ifeq ($(emu), 1)
+        CUDPPLIB := $(CUDPPLIB)_emu
+    endif
+endif
+
+ifeq ($(USENVCUVID), 1)
+     ifneq ($(DARWIN),)
+         NVCUVIDLIB := -L/home/tuanle/NVIDIA_GPU_Computing_SDK/C/common/lib/darwin -lnvcuvid
+     endif
+endif
+
+# Libs
+ifneq ($(DARWIN),)
+    LIB       := -L$(CUDA_INSTALL_PATH)/lib -L$(LIBDIR) -L$(COMMONDIR)/lib/$(OSLOWER) -L$(SHAREDDIR)/lib $(NVCUVIDLIB) 
+else
+  ifeq "$(strip $(HP_64))" ""
+    ifeq ($(x86_64),1)
+       LIB       := -L$(CUDA_INSTALL_PATH)/lib64 -L$(LIBDIR) -L$(COMMONDIR)/lib/$(OSLOWER) -L$(SHAREDDIR)/lib 
+    else
+       LIB       := -L$(CUDA_INSTALL_PATH)/lib -L$(LIBDIR) -L$(COMMONDIR)/lib/$(OSLOWER) -L$(SHAREDDIR)/lib
+    endif
+  else
+    ifeq ($(i386),1)
+       LIB       := -L$(CUDA_INSTALL_PATH)/lib -L$(LIBDIR) -L$(COMMONDIR)/lib/$(OSLOWER) -L$(SHAREDDIR)/lib
+    else
+       LIB       := -L$(CUDA_INSTALL_PATH)/lib64 -L$(LIBDIR) -L$(COMMONDIR)/lib/$(OSLOWER) -L$(SHAREDDIR)/lib
+    endif
+  endif
+endif
+
+# If dynamically linking to CUDA and CUDART, we exclude the libraries from the LIB
+ifeq ($(USECUDADYNLIB),1)
+     LIB += ${OPENGLLIB} $(PARAMGLLIB) $(RENDERCHECKGLLIB) $(CUDPPLIB) ${LIB} -ldl -rdynamic 
+else
+# static linking, we will statically link against CUDA and CUDART
+  ifeq ($(USEDRVAPI),1)
+     LIB += -lcuda   ${OPENGLLIB} $(PARAMGLLIB) $(RENDERCHECKGLLIB) $(CUDPPLIB) ${LIB} 
+  else
+     ifeq ($(emu),1) 
+         LIB += -lcudartemu
+     else 
+         LIB += -lcudart
+     endif
+     LIB += ${OPENGLLIB} $(PARAMGLLIB) $(RENDERCHECKGLLIB) $(CUDPPLIB) ${LIB}
+  endif
+endif
+
+ifeq ($(USECUFFT),1)
+  ifeq ($(emu),1)
+    LIB += -lcufftemu
+  else
+    LIB += -lcufft
+  endif
+endif
+
+ifeq ($(USECUBLAS),1)
+  ifeq ($(emu),1)
+    LIB += -lcublasemu
+  else
+    LIB += -lcublas
+  endif
+endif
+
+# Lib/exe configuration
+ifneq ($(STATIC_LIB),)
+	TARGETDIR := $(LIBDIR)
+	TARGET   := $(subst .a,_$(LIB_ARCH)$(LIBSUFFIX).a,$(LIBDIR)/$(STATIC_LIB))
+	LINKLINE  = ar rucv $(TARGET) $(OBJS)
+else
+	ifneq ($(OMIT_CUTIL_LIB),1)
+
+#TUAN add -lboinc and -lboinc_api below
+		LIB += -lcutil_$(LIB_ARCH)$(LIBSUFFIX) -lshrutil_$(LIB_ARCH)$(LIBSUFFIX) -lboinc_api -L$(BOINC_API_DIR) -lboinc -L$(BOINC_LIB_DIR) 
+	endif
+	# Device emulation configuration
+	ifeq ($(emu), 1)
+		NVCCFLAGS   += -deviceemu
+		CUDACCFLAGS += 
+		BINSUBDIR   := emu$(BINSUBDIR)
+		# consistency, makes developing easier
+		CXXFLAGS		+= -D__DEVICE_EMULATION__
+		CFLAGS			+= -D__DEVICE_EMULATION__
+	endif
+	TARGETDIR := $(BINDIR)/$(BINSUBDIR)
+	TARGET    := $(TARGETDIR)/$(EXECUTABLE)
+	LINKLINE  = $(LINK) -o $(TARGET) $(OBJS) $(LIB)
+endif
+
+# check if verbose 
+ifeq ($(verbose), 1)
+	VERBOSE :=
+else
+	VERBOSE := @
+endif
+
+################################################################################
+# Check for input flags and set compiler flags appropriately
+################################################################################
+ifeq ($(fastmath), 1)
+	NVCCFLAGS += -use_fast_math
+endif
+
+ifeq ($(keep), 1)
+	NVCCFLAGS += -keep
+	NVCC_KEEP_CLEAN := *.i* *.cubin *.cu.c *.cudafe* *.fatbin.c *.ptx
+endif
+
+ifdef maxregisters
+	NVCCFLAGS += -maxrregcount $(maxregisters)
+endif
+
+# Add cudacc flags
+NVCCFLAGS += $(CUDACCFLAGS)
+
+# Add common flags
+NVCCFLAGS += $(COMMONFLAGS)
+CXXFLAGS  += $(COMMONFLAGS)
+CFLAGS    += $(COMMONFLAGS)
+
+# use gcc-4.3
+NVCCFLAGS+=--compiler-bindir=${HOME}/NVIDIA_GPU_Computing_SDK/C/mygcc
+
+ifeq ($(nvcc_warn_verbose),1)
+	NVCCFLAGS += $(addprefix --compiler-options ,$(CXXWARN_FLAGS)) 
+	NVCCFLAGS += --compiler-options -fno-strict-aliasing
+endif
+
+################################################################################
+# Set up object files
+################################################################################
+OBJDIR := $(ROOTOBJDIR)/$(LIB_ARCH)/$(BINSUBDIR)
+OBJS +=  $(patsubst %.cpp,$(OBJDIR)/%.cpp.o,$(notdir $(CCFILES)))
+OBJS +=  $(patsubst %.c,$(OBJDIR)/%.c.o,$(notdir $(CFILES)))
+OBJS +=  $(patsubst %.cu,$(OBJDIR)/%.cu.o,$(notdir $(CUFILES)))
+
+################################################################################
+# Set up cubin output files
+################################################################################
+CUBINDIR := $(SRCDIR)data
+CUBINS +=  $(patsubst %.cu,$(CUBINDIR)/%.cubin,$(notdir $(CUBINFILES)))
+
+################################################################################
+# Set up PTX output files
+################################################################################
+PTXDIR := $(SRCDIR)data
+PTXBINS +=  $(patsubst %.cu,$(PTXDIR)/%.ptx,$(notdir $(PTXFILES)))
+
+################################################################################
+# Rules
+################################################################################
+$(OBJDIR)/%.c.o : $(SRCDIR)%.c $(C_DEPS)
+	$(VERBOSE)$(CC) $(CFLAGS) -o $@ -c $< -pthread \
+	$(BOINC_API_DIR)/libboinc_api.a \
+	$(BOINC_LIB_DIR)/libboinc.a
+
+$(OBJDIR)/%.cpp.o : $(SRCDIR)%.cpp $(C_DEPS)
+	$(VERBOSE)$(CXX) $(CXXFLAGS) -o $@ -c $< -pthread \
+	$(BOINC_API_DIR)/libboinc_api.a \
+	$(BOINC_LIB_DIR)/libboinc.a
+
+# Default arch includes gencode for sm_10, sm_20, and other archs from GENCODE_ARCH declared in the makefile
+$(OBJDIR)/%.cu.o : $(SRCDIR)%.cu $(CU_DEPS)
+	$(VERBOSE)$(NVCC) $(GENCODE_SM10) $(GENCODE_ARCH) $(GENCODE_SM20) $(NVCCFLAGS) $(SMVERSIONFLAGS) -o $@ -c $< 
+
+# Default arch includes gencode for sm_10, sm_20, and other archs from GENCODE_ARCH declared in the makefile
+$(CUBINDIR)/%.cubin : $(SRCDIR)%.cu cubindirectory
+	$(VERBOSE)$(NVCC) $(GENCODE_SM10) $(GENCODE_ARCH) $(GENCODE_SM20) $(CUBIN_ARCH_FLAG) $(NVCCFLAGS) $(SMVERSIONFLAGS) -o $@ -cubin $< 
+
+$(PTXDIR)/%.ptx : $(SRCDIR)%.cu ptxdirectory
+	$(VERBOSE)$(NVCC) $(CUBIN_ARCH_FLAG) $(NVCCFLAGS) $(SMVERSIONFLAGS) -o $@ -ptx $<
+
+#
+# The following definition is a template that gets instantiated for each SM
+# version (sm_10, sm_13, etc.) stored in SMVERSIONS.  It does 2 things:
+# 1. It adds to OBJS a .cu_sm_XX.o for each .cu file it finds in CUFILES_sm_XX.
+# 2. It generates a rule for building .cu_sm_XX.o files from the corresponding 
+#    .cu file.
+#
+# The intended use for this is to allow Makefiles that use common.mk to compile
+# files to different Compute Capability targets (aka SM arch version).  To do
+# so, in the Makefile, list files for each SM arch separately, like so:
+# This will be used over the default rule abov
+#
+# CUFILES_sm_10 := mycudakernel_sm10.cu app.cu
+# CUFILES_sm_12 := anothercudakernel_sm12.cu
+#
+define SMVERSION_template
+#OBJS += $(patsubst %.cu,$(OBJDIR)/%.cu_$(1).o,$(notdir $(CUFILES_$(1))))
+OBJS += $(patsubst %.cu,$(OBJDIR)/%.cu_$(1).o,$(notdir $(CUFILES_sm_$(1))))
+$(OBJDIR)/%.cu_$(1).o : $(SRCDIR)%.cu $(CU_DEPS)
+#	$(VERBOSE)$(NVCC) -o $$@ -c $$< $(NVCCFLAGS)  $(1)
+	$(VERBOSE)$(NVCC) -gencode=arch=compute_$(1),code=\"sm_$(1),compute_$(1)\" $(GENCODE_SM20) -o $$@ -c $$< $(NVCCFLAGS)
+endef
+
+# This line invokes the above template for each arch version stored in
+# SM_VERSIONS.  The call funtion invokes the template, and the eval
+# function interprets it as make commands.
+$(foreach smver,$(SM_VERSIONS),$(eval $(call SMVERSION_template,$(smver))))
+
+$(TARGET): makedirectories $(OBJS) $(CUBINS) $(PTXBINS) Makefile
+	$(VERBOSE)$(LINKLINE)
+
+cubindirectory:
+	$(VERBOSE)mkdir -p $(CUBINDIR)
+
+ptxdirectory:
+	$(VERBOSE)mkdir -p $(PTXDIR)
+
+makedirectories:
+	$(VERBOSE)mkdir -p $(LIBDIR)
+	$(VERBOSE)mkdir -p $(OBJDIR)
+	$(VERBOSE)mkdir -p $(TARGETDIR)
+
+
+tidy :
+	$(VERBOSE)find . | egrep "#" | xargs rm -f
+	$(VERBOSE)find . | egrep "\~" | xargs rm -f
+
+clean : tidy
+	$(VERBOSE)rm -f $(OBJS)
+	$(VERBOSE)rm -f $(CUBINS)
+	$(VERBOSE)rm -f $(PTXBINS)
+	$(VERBOSE)rm -f $(TARGET)
+	$(VERBOSE)rm -f $(NVCC_KEEP_CLEAN)
+	$(VERBOSE)rm -f $(ROOTBINDIR)/$(OSLOWER)/$(BINSUBDIR)/*.ppm
+	$(VERBOSE)rm -f $(ROOTBINDIR)/$(OSLOWER)/$(BINSUBDIR)/*.pgm
+	$(VERBOSE)rm -f $(ROOTBINDIR)/$(OSLOWER)/$(BINSUBDIR)/*.bin
+	$(VERBOSE)rm -f $(ROOTBINDIR)/$(OSLOWER)/$(BINSUBDIR)/*.bmp
+
+clobber : clean
+	$(VERBOSE)rm -rf $(ROOTOBJDIR)
--- a/samples/nvcuda/cuda.cu
+++ b/samples/nvcuda/cuda.cu
@ -1,22 +1,7 @@
 /*
- *    This program is free software; you can redistribute it and/or modify
- *    it under the terms of the GNU General Public License as published by
- *    the Free Software Foundation; either version 2 of the License, or
- *    (at your option) any later version.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU General Public License for more details.
- *
- *    You should have received a copy of the GNU General Public License
- *    along with this program; if not, write to the Free Software
- *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/*
- * cuda.cu
- * Copyright (C) 2010 Tuan Le
+ * Tuan Le
+ * University of California, Berkeley
+ * Berkeley Space Sciences Lab
 * tuanle86@berkeley.edu
 */

@ -45,6 +30,17 @@
 #include "mfile.h"
 #include "graphics2.h"

+struct UC_SHMEM {
+    double update_time;
+    double fraction_done;
+    double cpu_time;
+    BOINC_STATUS status;
+    int countdown;
+        // graphics app sets this to 5 repeatedly,
+        // main program decrements it once/sec.
+        // If it's zero, don't bother updating shmem
+};
+
 #ifdef APP_GRAPHICS
 #include "uc2.h"
 UC_SHMEM* shmem;
@ -59,7 +55,7 @@ using std::string;


 // execute the kernel NUM_ITERATIONS times
-#define NUM_ITERATIONS 19
+#define NUM_ITERATIONS 51

 bool run_slow = false;
 bool early_exit = false;
@ -143,13 +139,13 @@ int main(int argc, char** argv)
 {   
    int i, retval, lastInversion=0, checkpointExists=0, dimension=0;
    double fd;
-    char input_path[512], output_path[512], chkpt_path[512];
+    char input_path[512], output_path[512], chkpt_path[512], buf[256];
    REAL* h_idata;
    unsigned int mem_size;
    MFILE out;
    FILE* state, *infile;
    
-    //generateRandomInputFile(MATRIX_SIZE); //call this if you don't want to construct the input file manually
+    generateRandomInputFile(MATRIX_SIZE); //call this if you don't want to construct the input file manually

    for (i=0; i<argc; i++) {
        if (!strcmp(argv[i], "-early_exit")) early_exit = true;
@ -164,7 +160,7 @@ int main(int argc, char** argv)
 	retval = boinc_init();
    if (retval) {
        fprintf(stderr, "%s boinc_init returned %d\n",
-            boinc_msg_prefix(), retval
+            boinc_msg_prefix(buf), retval
        );
        exit(retval);
    }
@ -176,7 +172,7 @@ int main(int argc, char** argv)
    if (!infile) {
        fprintf(stderr,
            "%s Couldn't find input file, resolved name %s.\n",
-            boinc_msg_prefix(), input_path
+            boinc_msg_prefix(buf), input_path
        );
        getchar();
        exit(-1);
@ -210,10 +206,10 @@ int main(int argc, char** argv)
    
    if (retval) {
        fprintf(stderr, "%s APP: matrix_inversion output open failed:\n",
-            boinc_msg_prefix()
+            boinc_msg_prefix(buf)
        );
        fprintf(stderr, "%s resolved name %s, retval %d\n",
-            boinc_msg_prefix(), output_path, retval
+            boinc_msg_prefix(buf), output_path, retval
        );
        perror("open");
        exit(1);
@ -222,10 +218,10 @@ int main(int argc, char** argv)
 #ifdef APP_GRAPHICS
    // create shared mem segment for graphics, and arrange to update it
    //
-    shmem = (UC_SHMEM*)boinc_graphics_make_shmem("uppercase", sizeof(UC_SHMEM));
+    shmem = (UC_SHMEM*)boinc_graphics_make_shmem("matrix_inversion", sizeof(UC_SHMEM));
    if (!shmem) {
        fprintf(stderr, "%s failed to create shared mem segment\n",
-            boinc_msg_prefix()
+            boinc_msg_prefix(buf)
        );
    }
    update_shmem();
@ -276,7 +272,7 @@ int main(int argc, char** argv)
 			retval = do_checkpoint(out, i, h_idata, dimension); 
            if (retval) {
                fprintf(stderr, "%s APP: matrix_inversion checkpoint failed %d\n",
-                    boinc_msg_prefix(), retval
+                    boinc_msg_prefix(buf), retval
                );
                exit(retval);
            }
@ -296,7 +292,7 @@ int main(int argc, char** argv)
    retval = out.flush(); //force the output file to be closed.
    if (retval) {
        fprintf(stderr, "%s APP: matrix_inversion flush failed %d\n",
-            boinc_msg_prefix(), retval
+            boinc_msg_prefix(buf), retval
        );
        exit(1);
    }
@ -315,7 +311,7 @@ int main(int argc, char** argv)
                retval = do_checkpoint(out, NUM_ITERATIONS, h_idata, dimension);
                if (retval) {
                    fprintf(stderr, "%s APP: maxtrix_inversion checkpoint failed %d\n",
-                        boinc_msg_prefix(), retval
+                        boinc_msg_prefix(buf), retval
                    );
                    exit(1);
                }
@ -442,4 +438,4 @@ void printToFile(MFILE *out, float *h_odata, int dimension) {
 		}
 		--num_elements;
 	}
-}
+}
--- a/samples/nvcuda/cuda_config.h
+++ b/samples/nvcuda/cuda_config.h
@ -1,3 +1,9 @@
+/*
+ * Tuan Le
+ * University of California, Berkeley
+ * Berkeley Space Sciences Lab
+ * tuanle86@berkeley.edu
+ */

 #ifdef DOUBLE_PRECISION
 #define REAL            double
@ -9,8 +15,6 @@
 #define jREALArray           jfloatArray
 #endif

-
-
 inline void __cudaSafeCall( int err, const char *file, const int line )
 {
  do {
--- a/samples/nvcuda/cuda_kernel.cu
+++ b/samples/nvcuda/cuda_kernel.cu
@ -1,3 +1,9 @@
+/*
+ * Tuan Le
+ * University of California, Berkeley
+ * Berkeley Space Sciences Lab
+ * tuanle86@berkeley.edu
+ */

 // When VERIFY is defined, the sum of squared errors is calculated between the
 // identity matrix and the product A * incerse(A). For debugging...
@ -21,7 +27,7 @@ void mathdispAI(const REAL *mat, int lda, int MAT_SIZE_h) {
 } // mathdisp2

 void mathdispAId(const REAL * AId, int lda, int n) {
-	REAL * AI = new REAL[n*lda*2];
+	REAL * AI = (REAL *)malloc(sizeof(REAL)*(n*lda*2));
 	cudaMemcpy(AI,AId,sizeof(REAL)*n*lda*2,cudaMemcpyDeviceToHost);
 	mathdispAI(AI, lda, n);
 	delete [] AI;
@ -90,7 +96,7 @@ fprintf(stderr,"starting inversion n = %d ", n);

    int lda = ((n+15)&~15|16);
 //lda=n;
-	REAL * AI = new REAL[n*lda*2];
+	REAL * AI = (REAL *)malloc(sizeof(REAL)*(n*lda*2));
 	memset(AI,0,sizeof(REAL)*n*lda*2);
 	for (int i = 0; i < n; i++) {
 		memcpy(&AI[lda*i*2], &A[n*i], sizeof(REAL)*n);
@ -134,4 +140,4 @@ fprintf(stderr,"starting inversion n = %d ", n);
 	}
 	free(AI);
 	fprintf(stderr," done!\n");
-} // invert
+} // invert
--- a/samples/nvcuda/readme.txt
+++ b/samples/nvcuda/readme.txt
@ -0,0 +1,42 @@
+Tuan Le
+University of California, Berkeley
+Berkeley Space Sciences Lab
+tuanle86@berkeley.edu
+
+
+----------------------- Linux Makefile ----------------------
+
+Makefile needs to be edited on your machine before running. Please follow these steps:
+
+1) Open "boinc/samples/nvcuda/common.mk" with gedit
+2) Ctrl+f and search for "tuanle". You will find the following:
+
+LIBDIR     := /home/tuanle/NVIDIA_GPU_Computing_SDK/C/lib
+
+COMMONDIR  := /home/tuanle/NVIDIA_GPU_Computing_SDK/C/common
+
+SHAREDDIR  := /home/tuanle/NVIDIA_GPU_Computing_SDK/shared
+...
+...
+NVCUVIDLIB := -L/home/tuanle/NVIDIA_GPU_Computing_SDK/C/common/lib/darwin -lnvcuvid
+
+3) Replace these above paths by appropriate paths on your machine.
+4) Done!
+
+
+----------------------- Linux Troubleshooting ----------------
+
+Error: ./example_app_nvcuda: error while loading shared libraries: libcudart.so.3: cannot open
+       shared object file: No such file or directory
+Read: http://developer.download.nvidia.com/compute/cuda/3_0/docs/GettingStartedLinux.pdf  (on top of page 6)
+Solution: export PATH=/usr/local/cuda/bin:$PATH
+          export LD_LIBRARY_PATH=/usr/local/cuda/lib:$LD_LIBRARY_PATH
+
+
+----------------------- Run Executable file ------------------
+
+"make" command will create an executable file in "boinc/samples/nvcuda/linux/release/".
+If your machine doesn't have CUDA-enabled GPU, then the executable file for this sample app in 
+the release directory will stop execution after the statement "Start at inversion #1" is printed out on
+the terminal. In this case, it's best to run in emurelease mode. To generate an executable file in
+emurelease mode, type "make emu=1". The executable file is then created in "boinc/samples/nvcuda/linux/emurelease/".