diff --git a/samples/openclapp/INIT_DATA test files/ATI-AMD/init_data.xml b/samples/openclapp/INIT_DATA test files/ATI-AMD/init_data.xml new file mode 100644 index 0000000000..0c4320fc2d --- /dev/null +++ b/samples/openclapp/INIT_DATA test files/ATI-AMD/init_data.xml @@ -0,0 +1,4 @@ + +ATI +0 + diff --git a/samples/openclapp/INIT_DATA test files/INTEL_GPU/init_data.xml b/samples/openclapp/INIT_DATA test files/INTEL_GPU/init_data.xml new file mode 100644 index 0000000000..34bdc9c241 --- /dev/null +++ b/samples/openclapp/INIT_DATA test files/INTEL_GPU/init_data.xml @@ -0,0 +1,4 @@ + +intel_gpu +0 + diff --git a/samples/openclapp/INIT_DATA test files/NVIDIA/init_data.xml b/samples/openclapp/INIT_DATA test files/NVIDIA/init_data.xml new file mode 100644 index 0000000000..ac3bb1bb7c --- /dev/null +++ b/samples/openclapp/INIT_DATA test files/NVIDIA/init_data.xml @@ -0,0 +1,4 @@ + +NVIDIA +0 + diff --git a/samples/openclapp/Makefile_AMD b/samples/openclapp/Makefile_AMD new file mode 100644 index 0000000000..8bbe87f0f3 --- /dev/null +++ b/samples/openclapp/Makefile_AMD @@ -0,0 +1,44 @@ +# makefile for openclapp BOINC example appication using AMD GPU on Linux. + +BOINC_DIR = ../.. +BOINC_API_DIR = $(BOINC_DIR)/api +BOINC_LIB_DIR = $(BOINC_DIR)/lib + +CXXFLAGS = -g \ + -I$(BOINC_DIR) \ + -I$(BOINC_LIB_DIR) \ + -I$(BOINC_API_DIR) \ + -I$(AMDAPPSDKROOT)/include \ + -L. \ + -L$(BOINC_LIB_DIR) \ + -L$(BOINC_API_DIR) \ + -L$(AMDAPPSDKROOT)/lib/x86 \ + -L/lib/i386-linux-gnu +## The above 2 lines work around an Ubuntu LD_LIBRARY_PATH bug described in: +## + +PROGS = openclapp \ + +all: $(PROGS) + +libstdc++.a: + ln -s `g++ -print-file-name=libstdc++.a` + +clean: + /bin/rm -f $(PROGS) *.o libstdc++.a + +distclean: + /bin/rm -f $(PROGS) *.o libstdc++.a + +install: openclapp + +openclapp: openclapp.o boinc_opencl.o libstdc++.a + $(CXX) $(CXXFLAGS) -o openclapp openclapp.o boinc_opencl.o \ + libstdc++.a -lOpenCL -lboinc_api -lboinc -lpthread + +openclapp.o: openclapp.cpp openclapp.hpp + $(CXX) $(CXXFLAGS) -c openclapp.cpp + +boinc_opencl.o: $(BOINC_API_DIR)/boinc_opencl.cpp $(BOINC_API_DIR)/boinc_opencl.h + $(CXX) $(CXXFLAGS) -c $(BOINC_API_DIR)/boinc_opencl.cpp + diff --git a/samples/openclapp/Makefile_NVIDIA b/samples/openclapp/Makefile_NVIDIA new file mode 100644 index 0000000000..66ea2c6253 --- /dev/null +++ b/samples/openclapp/Makefile_NVIDIA @@ -0,0 +1,44 @@ +# makefile for openclapp BOINC example appication using NVIDIA GPU on Linux. + +BOINC_DIR = ../.. +BOINC_API_DIR = $(BOINC_DIR)/api +BOINC_LIB_DIR = $(BOINC_DIR)/lib + +CXXFLAGS = -g \ + -I$(BOINC_DIR) \ + -I$(BOINC_LIB_DIR) \ + -I$(BOINC_API_DIR) \ + -I/usr/local/cuda/include \ + -L$(BOINC_LIB_DIR) \ + -L$(BOINC_API_DIR) \ + -L. \ + -L/usr/local/cuda/lib \ + -L/lib/i386-linux-gnu +## The above 2 lines work around an Ubuntu LD_LIBRARY_PATH bug described in: +## + +PROGS = openclapp \ + +all: $(PROGS) + +libstdc++.a: + ln -s `g++ -print-file-name=libstdc++.a` + +clean: + /bin/rm -f $(PROGS) *.o libstdc++.a + +distclean: + /bin/rm -f $(PROGS) *.o libstdc++.a + +install: openclapp + +openclapp: openclapp.o boinc_opencl.o libstdc++.a + $(CXX) $(CXXFLAGS) -o openclapp openclapp.o boinc_opencl.o \ + libstdc++.a -lOpenCL -lboinc_api -lboinc -lpthread + +openclapp.o: openclapp.cpp openclapp.hpp + $(CXX) $(CXXFLAGS) -c openclapp.cpp + +boinc_opencl.o: $(BOINC_API_DIR)/boinc_opencl.cpp $(BOINC_API_DIR)/boinc_opencl.h + $(CXX) $(CXXFLAGS) -c $(BOINC_API_DIR)/boinc_opencl.cpp + diff --git a/samples/openclapp/Makefile_mac b/samples/openclapp/Makefile_mac index a677fb272d..f352b110b8 100644 --- a/samples/openclapp/Makefile_mac +++ b/samples/openclapp/Makefile_mac @@ -1,9 +1,10 @@ -# makefile for atiopencl BOINC example appication on Mac OS X 10.7 +# makefile for openclapp BOINC example appication on Mac OS X 10.7 # To build: -# cd to the boinc/samples/atiopencl directory +# cd to the boinc/samples/openclapp directory # make -f Makefile_mac [clean] all # + BOINC_DIR = ../.. BOINC_API_DIR = $(BOINC_DIR)/api BOINC_LIB_DIR = $(BOINC_DIR)/lib @@ -14,10 +15,9 @@ CXXFLAGS = -g \ -I$(BOINC_DIR) \ -I$(BOINC_LIB_DIR) \ -I$(BOINC_API_DIR) \ - -I$(BOINC_MAC_CONFIG_DIR) \ - -L. + -I$(BOINC_MAC_CONFIG_DIR) -PROGS = atiopencl \ +PROGS = openclapp \ all: $(PROGS) @@ -27,17 +27,17 @@ clean: distclean: /bin/rm -f $(PROGS) *.o -install: atiopencl +install: openclapp -atiopencl: atiopencl.o boinc_opencl.o - $(CXX) $(CXXFLAGS) -o atiopencl atiopencl.o \ +openclapp: openclapp.o boinc_opencl.o + $(CXX) $(CXXFLAGS) -o openclapp openclapp.o \ boinc_opencl.o \ -framework OpenCL \ -lboinc_api -L$(BOINC_BUILD_DIR) \ -lboinc -L$(BOINC_BUILD_DIR) -atiopencl.o: atiopencl.cpp atiopencl.hpp - $(CXX) $(CXXFLAGS) -c atiopencl.cpp +openclapp.o: openclapp.cpp openclapp.hpp + $(CXX) $(CXXFLAGS) -c openclapp.cpp boinc_opencl.o: $(BOINC_API_DIR)/boinc_opencl.cpp $(BOINC_API_DIR)/boinc_opencl.h $(CXX) $(CXXFLAGS) -c $(BOINC_API_DIR)/boinc_opencl.cpp diff --git a/samples/openclapp/ReadMe.txt b/samples/openclapp/ReadMe.txt new file mode 100644 index 0000000000..29e89ce6ea --- /dev/null +++ b/samples/openclapp/ReadMe.txt @@ -0,0 +1,56 @@ +Windows projects. + +To build for Mac: +make -f Makefile_mac + +To build for Linux (assuming you have installed the appropriate GPU computing SDK): +For ATI/AMD: +make -f Makefile_AMD + +For NVIDIA: +make -f Makefile_NVIDIA + +For intel Ivy Bridge: modify one of the above make files for the appropriate paths to the OpenCL headers and libraries. + +Adjust the -I and -L arguments for Linux if the OpenCL headers and libraries are in non-standard locations. + +To run: +This same sample is designed to run with AMD, NVIDIA and Intel Ivy Bridge GPUs. It is supplied with 3 minimal init_data.xml files, one for each of these 3 vendors (GPU "types".) Copy the appropriate init_data.xml file into the directory containing the openclapp executable. Then run from the Terminal: +$ cd to/the/directory/containing/executable/and/init_data.xml/file +$ ./openclapp [options] + +command line options + -run_slow: sleep 1 second after each character + -cpu_time N: use about N CPU seconds after copying files + -early_exit: exit(10) after 30 iterations + -early_crash: crash after 30 iterations + -early_sleep: go into infinite sleep after 30 iterations + +============================================================== + +Important notes about the sample code: + +Since a computer can have multiple GPUs, the application must use the GPU assigned by the BOINC client. To do this, it must call the following API: +int boinc_get_opencl_ids( + int argc, char** argv, int type, + cl_device_id* device, cl_platform_id* platform +); + +The arguments are as follows: +argc, argv: the argv and argc received by the application's main() from the BOINC client. +type: may be PROC_TYPE_NVIDIA_GPU, PROC_TYPE_AMD_GPU or PROC_TYPE_INTEL_GPU. +device: a pointer to the variable to receive the cl_device_id of the desired GPU. +platform: a pointer to the variable to receive the cl_platform_id of the desired GPU. + +Currently, BOINC expects projects to provide separate production applications for each GPU vendor (GPU type), with a separate "plan class" for each. BOINC currently supports GPUs from the three major vendors: AMD (ATI). NVIDIA or Intel (Ivy Bridge or later). BOINC refers to the vendors as gpu "types." + +Because older clients do not write the field into the init_data.xml file, your application must pass the appropriate GPU type as the third argument in the boinc_get_opencl_ids() call, or it will not be compatible with older clients. + +However, to avoid redundancy, this one sample is designed to work with OpenCl-capable GPUs from any of the three vendors. To accomplish this, it does not pass a valid type in the boinc_get_opencl_ids() call; it requires init_data.xml file to have a valid field, and so would not be compatible with older clients. This shortcut is not acceptable for production OpenCL applications; you _must_ pass in a type of either PROC_TYPE_NVIDIA_GPU, PROC_TYPE_AMD_GPU or PROC_TYPE_INTEL_GPU. + +============================================================== + +What is the difference between a GPU's gpu_device_num and its gpu_opencl_dev_index? + +In most cases, they are identical. But on Macs which have CUDA installed, Mac OpenCL does not always recognize all NVIDIA GPUs recognized by CUDA. In that case, the gpu_device_num is the device's position among all the CUDA-capable GPUs, and the gpu_opencl_dev_index is the device's position among all the OpenCL-capable GPUs. + diff --git a/samples/openclapp/openclapp.cpp b/samples/openclapp/openclapp.cpp index faee54c6ac..b283e354e7 100644 --- a/samples/openclapp/openclapp.cpp +++ b/samples/openclapp/openclapp.cpp @@ -1,6 +1,6 @@ // This file is part of BOINC. // http://boinc.berkeley.edu -// Copyright (C) 2008 University of California +// Copyright (C) 2013 University of California // // BOINC is free software; you can redistribute it and/or modify it // under the terms of the GNU Lesser General Public License @@ -16,23 +16,30 @@ // along with BOINC. If not, see . // // This program serves as both -// - An example BOINC-ATIOpenCL application, illustrating the use of the BOINC API -// and ATIStream OpenCL API. +// - An example BOINC-OpenCL application, illustrating the use of the BOINC API +// and OpenCL API. // - A program for testing various features of BOINC. // // The program reads the input nxn matrix from the "input" file, inverts the // matrix NUM_ITERATIONS times and write to "output" file. // +// To run, place the executable in the same directory as an init_data.xml +// file specifying the gpu_type (vendor) and gpu_device_num, then invoke +// form the command line as follows: +// $ cd to/the/directory/containing/executable/and/init_data.mxl/file +// $ ./openclapp [options] +// // command line options // -run_slow: sleep 1 second after each character // -cpu_time N: use about N CPU seconds after copying files -// -early_exit: exit(10) after 30 chars -// -early_crash: crash after 30 chars +// -early_exit: exit(10) after 30 iterations +// -early_crash: crash after 30 iterations +// -early_sleep: go into infinite sleep after 30 iterations // // See http://boinc.berkeley.edu/trac/wiki/GPUApp for any compiling issues. -// Contributor: Tuan Le (tuanle86@berkeley.edu) +// Original contributor: Tuan Le (tuanle86@berkeley.edu) -#include "atiopencl.hpp" +#include "openclapp.hpp" #include "boinc_opencl.h" using std::string; @@ -46,7 +53,6 @@ int main(int argc, char * argv[]) { generate_random_input_file(MATRIX_SIZE); //call this if you don't want to //construct the input file manually - for (i=0; i field in the init_data.xml file. + // + // This sample passes -1 for the type argument to allow using + // just one sample for any GPU vendor (AMD, NVIDIA or Intel.) + // As a result, the init_data.xml file for this sample must + // specify the GPU type (vendor) and either gpu_device_num (the + // GPU's index from that vendor) or gpu_opencl_dev_index (the + // GPU's index among OpenCL-capable devices from that vendor.) + // + // See the ReadMe file for more details, including an explanation + // of the difference between the gpu_device_num and the + // gpu_opencl_dev_index. + retval = boinc_get_opencl_ids(argc, argv, -1, &device, &platform); + if (retval) { + fprintf(stderr, + "Error: boinc_get_opencl_ids() failed with error %d\n", + retval + ); + return 1; + } - cl_uint numPlatforms; - status = clGetPlatformIDs(0, NULL, &numPlatforms); - if(status != CL_SUCCESS) { - fprintf(stderr, - "Error: Getting Platforms. (clGetPlatformsIDs) returned %d\n", - status - ); - return 1; - } - - if (numPlatforms > 0) { - cl_platform_id* platforms = (cl_platform_id *) - malloc(sizeof(cl_platform_id)*numPlatforms); - status = clGetPlatformIDs(numPlatforms, platforms, NULL); - if (status != CL_SUCCESS) { - fprintf(stderr, - "Error: Getting Platform Ids. (clGetPlatformsIDs) returned %d\n", - status - ); - - return 1; - } - for (unsigned int i=0; i < numPlatforms; ++i) { - char pbuff[100]; - status = clGetPlatformInfo(platforms[i], - CL_PLATFORM_VENDOR, - sizeof(pbuff), - pbuff, - NULL); - if (status != CL_SUCCESS) { - fprintf(stderr, - "Error: Getting Platform Info.(clGetPlatformInfo)returned %d\n", - status - ); - - return 1; - } - platform = platforms[i]; - if (!strcmp(pbuff, "Advanced Micro Devices, Inc.")) { - break; - } - } - delete platforms; - } - - if(NULL == platform) { - fprintf(stderr, "ERROR: NULL platform found so Exiting Application."); - return 1; - } - - /* - * If we could find our platform, use it. Otherwise use just available platform. - */ - cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, - (cl_context_properties)platform, - 0 - }; - - ///////////////////////////////////////////////////////////////// - // Create an OpenCL context - ///////////////////////////////////////////////////////////////// - context = clCreateContextFromType(cps, CL_DEVICE_TYPE_ALL, NULL, NULL, &status); - if (status != CL_SUCCESS) { - fprintf(stderr, - "Error: Creating Context. (clCreateContextFromType) returned %d\n", - status - ); - - return 1; - } - - /* First, get the size of device list data */ - status = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceListSize); - if (status != CL_SUCCESS) { - fprintf(stderr, - "Error: Getting Context Info (device list size, clGetContextInfo)returned %d\n", - status - ); - - return 1; - } - - ///////////////////////////////////////////////////////////////// - // Detect OpenCL devices - ///////////////////////////////////////////////////////////////// - devices = (cl_device_id *)malloc(deviceListSize); - if (devices == 0) { - fprintf(stderr, "Error: No devices found.\n"); - return 1; - } - - /* Now, get the device list data */ - status = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceListSize, devices, NULL); - if (status != CL_SUCCESS) { - fprintf(stderr, - "Error: Getting Context Info (device list, clGetContextInfo) returned %d\n", - status - ); - - return 1; - } - - device = devices[0]; - - } else { // NOT stand_alone - retval = boinc_get_opencl_ids(&device, &platform); - if (retval) { - fprintf(stderr, - "Error: boinc_get_opencl_ids() failed with error %d\n", - retval - ); - return 1; - } - - cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, - (cl_context_properties)platform, - 0 - }; - context = clCreateContext(cps, 1, &device, NULL, NULL, &status); - if (status != CL_SUCCESS) { - fprintf(stderr, "Error: clCreateContext() returned %d\n", status); - return 1; - } + cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, + (cl_context_properties)platform, + 0 + }; + context = clCreateContext(cps, 1, &device, NULL, NULL, &status); + if (status != CL_SUCCESS) { + fprintf(stderr, "Error: clCreateContext() returned %d\n", status); + return 1; } ///////////////////////////////////////////////////////////////// @@ -618,7 +526,6 @@ int initialize_cl(void) { "Error: Creating Command Queue. (clCreateCommandQueue) returned %d\n", status ); - return 1; } @@ -638,7 +545,7 @@ int initialize_cl(void) { } /* create a cl program executable for all the devices specified */ - status = clBuildProgram(program, 1, devices, NULL, NULL, NULL); + status = clBuildProgram(program, 1, &device, NULL, NULL, NULL); if (status != CL_SUCCESS) { fprintf(stderr, "Error: Building Program (clBuildProgram) returned %d\n", @@ -724,15 +631,6 @@ int cleanup_cl(void) { return 1; } - status = clReleaseMemObject(inputBuffer); - if (status != CL_SUCCESS) { - fprintf(stderr, - "Error: In clReleaseMemObject (inputBuffer) returned %d\n", - status - ); - return 1; - } - status = clReleaseCommandQueue(commandQueue); if (status != CL_SUCCESS) { fprintf(stderr, @@ -768,11 +666,6 @@ void cleanup_host(void) { output = NULL; } - if (devices != NULL) { - free(devices); - devices = NULL; - } - if (source != NULL) { free((char *)source); source = NULL; @@ -806,7 +699,6 @@ void print_to_file(MFILE *out, float *h_odata, int n) { */ int run_GEStep1A_kernel(cl_float * AI, int i, int n2, int lda2) { cl_int status; - cl_event events[2]; /* * the input array to the kernel. This array will eventually be modified @@ -862,7 +754,8 @@ int run_GEStep1A_kernel(cl_float * AI, int i, int n2, int lda2) { localThreads, 0, NULL, - &events[0]); + NULL); + if (status != CL_SUCCESS) { fprintf(stderr, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel) returned %d\n", @@ -871,35 +764,18 @@ int run_GEStep1A_kernel(cl_float * AI, int i, int n2, int lda2) { return 1; } - /* wait for the kernel call to finish execution */ - status = clWaitForEvents(1, &events[0]); - if (status != CL_SUCCESS) { - fprintf(stderr, - "Error: Waiting for kernel run to finish. (clWaitForEvents) returned %d\n", - status - ); - return 1; - } - - status = clReleaseEvent(events[0]); - if (status != CL_SUCCESS) { - fprintf(stderr, - "Error: Release event object. (clReleaseEvent) returned %d\n", - status - ); - return 1; - } + clFinish(commandQueue); /* Enqueue readBuffer*/ //Note: we are reading back from inputBuffer since AI is modified directly in kernel status = clEnqueueReadBuffer(commandQueue, inputBuffer, - CL_TRUE, + CL_FALSE, 0, globalThreads[0] * sizeof(cl_float), AI, 0, NULL, - &events[1]); + NULL); if(status != CL_SUCCESS) { fprintf(stderr, @@ -909,30 +785,11 @@ int run_GEStep1A_kernel(cl_float * AI, int i, int n2, int lda2) { return 1; } - /* Wait for the read buffer to finish execution */ - status = clWaitForEvents(1, &events[1]); - if (status != CL_SUCCESS) { - fprintf(stderr, - "Error: Waiting for read buffer call to finish. (clWaitForEvents) returned %d\n", - status - ); - return 1; - } - - status = clReleaseEvent(events[1]); - if (status != CL_SUCCESS) { - fprintf(stderr, - "Error: Release event object. (clReleaseEvent) returned %d\n", - status - ); - return 1; - } return 0; } int run_GEStep2_kernel(cl_float * AI, cl_float diag, int i, int n2, int lda2) { cl_int status; - cl_event events[2]; /* * the input array to the kernel. This array will eventually be modified @@ -998,8 +855,9 @@ int run_GEStep2_kernel(cl_float * AI, cl_float diag, int i, int n2, int lda2) { localThreads, 0, NULL, - &events[0]); - if (status != CL_SUCCESS) { + NULL); + + if (status != CL_SUCCESS) { fprintf(stderr, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel) returned %d\n", status @@ -1007,65 +865,29 @@ int run_GEStep2_kernel(cl_float * AI, cl_float diag, int i, int n2, int lda2) { return 1; } - /* wait for the kernel call to finish execution */ - status = clWaitForEvents(1, &events[0]); - if (status != CL_SUCCESS) { - fprintf(stderr, - "Error: Waiting for kernel run to finish. (clWaitForEvents) returned %d\n", - status - ); - return 1; - } - - status = clReleaseEvent(events[0]); - if (status != CL_SUCCESS) { - fprintf(stderr, - "Error: Release event object. (clReleaseEvent) returned %d\n", - status - ); - return 1; - } - + clFinish(commandQueue); + /* Enqueue readBuffer*/ //Note: we are reading back from inputBuffer since AI is modified directly in kernel status = clEnqueueReadBuffer(commandQueue, inputBuffer, - CL_TRUE, + CL_FALSE, 0, globalThreads[0] * sizeof(cl_float), AI, 0, NULL, - &events[1]); + NULL); if (status != CL_SUCCESS) { fprintf(stderr, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer) returned %d\n", status); return 1; } - /* Wait for the read buffer to finish execution */ - status = clWaitForEvents(1, &events[1]); - if (status != CL_SUCCESS) { - fprintf(stderr, - "Error: Waiting for read buffer call to finish. (clWaitForEvents) returned %d\n", - status - ); - return 1; - } - - status = clReleaseEvent(events[1]); - if (status != CL_SUCCESS) { - fprintf(stderr, - "Error: Release event object. (clReleaseEvent) returned %d\n", - status - ); - return 1; - } return 0; } int run_GEStep3_kernel(cl_float * AI, int i, int n2, int lda2) { cl_int status; - cl_event events[2]; /* * The input array to the kernel. This array will eventually be modified @@ -1121,8 +943,9 @@ int run_GEStep3_kernel(cl_float * AI, int i, int n2, int lda2) { localThreads, 0, NULL, - &events[0]); - if (status != CL_SUCCESS) { + NULL); + + if (status != CL_SUCCESS) { fprintf(stderr, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel) returned %d\n", status @@ -1130,24 +953,7 @@ int run_GEStep3_kernel(cl_float * AI, int i, int n2, int lda2) { return 1; } - /* wait for the kernel call to finish execution */ - status = clWaitForEvents(1, &events[0]); - if (status != CL_SUCCESS) { - fprintf(stderr, - "Error: Waiting for kernel run to finish. (clWaitForEvents) returned %d\n", - status - ); - return 1; - } - - status = clReleaseEvent(events[0]); - if (status != CL_SUCCESS) { - fprintf(stderr, - "Error: Release event object. (clReleaseEvent) returned %d\n", - status - ); - return 1; - } + clFinish(commandQueue); /* Enqueue readBuffer*/ //Note: we are reading back from inputBuffer since AI is modified directly in kernel @@ -1159,7 +965,7 @@ int run_GEStep3_kernel(cl_float * AI, int i, int n2, int lda2) { AI, 0, NULL, - &events[1]); + NULL); if (status != CL_SUCCESS) { fprintf(stderr, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer) returned %d\n", @@ -1168,25 +974,6 @@ int run_GEStep3_kernel(cl_float * AI, int i, int n2, int lda2) { return 1; } - /* Wait for the read buffer to finish execution */ - status = clWaitForEvents(1, &events[1]); - if (status != CL_SUCCESS) { - fprintf(stderr, - "Error: Waiting for read buffer call to finish. (clWaitForEvents) returned %d\n", - status - ); - return 1; - } - - status = clReleaseEvent(events[1]); - if(status != CL_SUCCESS) { - fprintf(stderr, - "Error: Release event object. (clReleaseEvent) returned %d\n", - status - ); - return 1; - } - return 0; } @@ -1213,7 +1000,7 @@ void invert(cl_float * input, cl_float *output, int n) { volatile clock_t gputime; gputime=clock(); - int lda = ((n+15)&~15|16); + int lda = (((n+15)&(~15))|16); cl_float * AI_d = (cl_float *)malloc(sizeof(cl_float)*n*lda*2); memset(AI_d,0,sizeof(cl_float)*n*lda*2); for (int i = 0; i < n; i++) { @@ -1238,14 +1025,15 @@ void invert(cl_float * input, cl_float *output, int n) { ); exit(0); } + // Note: there's no output buffer. In kernel, AI_d is modified directly. // Thus, we should read the result back to host from inputBuffer as well. - invertge(AI_d, lda, n); - gputime=clock()-gputime;fprintf(stderr, " %7.1f ms ",gputime/1.e3f); + gputime=clock()-gputime; + fprintf(stderr, " %7.1f ms ",gputime/1.e3f); fprintf(stderr, " %7.2f Gflops", 1e-3*(3.0)*n*n*n/3.0/gputime); -#ifdef VERIFY +#ifdef VERIFY // let's verify that cl_float error=0.0; @@ -1270,6 +1058,15 @@ void invert(cl_float * input, cl_float *output, int n) { for (int i = 0; i < n; i++) { memcpy(&output[n*i], &AI_d[lda*i*2+n], sizeof(cl_float)*n); } + + status = clReleaseMemObject(inputBuffer); + if (status != CL_SUCCESS) { + fprintf(stderr, + "Error: In clReleaseMemObject (inputBuffer) returned %d\n", + status + ); + } + free(AI_d); fprintf(stderr," done!\n"); } diff --git a/samples/openclapp/openclapp.hpp b/samples/openclapp/openclapp.hpp index 6efd3d7ced..d291d70641 100644 --- a/samples/openclapp/openclapp.hpp +++ b/samples/openclapp/openclapp.hpp @@ -1,6 +1,6 @@ // This file is part of BOINC. // http://boinc.berkeley.edu -// Copyright (C) 2008 University of California +// Copyright (C) 2013 University of California // // BOINC is free software; you can redistribute it and/or modify it // under the terms of the GNU Lesser General Public License @@ -16,10 +16,10 @@ // along with BOINC. If not, see . // // See http://boinc.berkeley.edu/trac/wiki/GPUApp for any compiling issues. -// Contributor: Tuan Le (tuanle86@berkeley.edu) +// Original contributor: Tuan Le (tuanle86@berkeley.edu) -#ifndef ATIOPENCL_H_ -#define ATIOPENCL_H_ +#ifndef OPENCLAPP_H_ +#define OPENCLAPP_H_ #ifdef __APPLE__ #include @@ -37,8 +37,8 @@ #define INPUT_FILENAME "input" #define OUTPUT_FILENAME "output" -#define KERNELS_FILENAME "atiopencl_kernels.cl" -#define KERNELS_FILEPATH "../../atiopencl_kernels.cl" // for Linux and Mac +#define KERNELS_FILENAME "openclapp_kernels.cl" +#define KERNELS_FILEPATH "../../openclapp_kernels.cl" // for Linux and Mac #define CHECKPOINT_FILE "matrix_inversion_state" #define LOCAL_WORK_SIZE 1 @@ -115,7 +115,6 @@ cl_uint height; cl_mem inputBuffer; //in this sample app, we will read the result //from the device back to host from inputBuffer as well. cl_context context; -cl_device_id *devices; cl_command_queue commandQueue; cl_program program; @@ -185,7 +184,7 @@ void update_shmem() { * Calls are made to set up OpenCL memory buffers that this program uses * and to load the programs into memory and get kernel handles. */ -int initialize_cl(void); +int initialize_cl(int argc, char * argv[]); int initialize_host(FILE *infile); @@ -226,4 +225,4 @@ void invertge(cl_float * AI_d, int lda, int n); -#endif /* #ifndef ATIOPENCL_H_ */ +#endif /* #ifndef OPENCLAPP_H_ */ diff --git a/samples/openclapp/openclapp_kernels.cl b/samples/openclapp/openclapp_kernels.cl index a6b4a73498..e93ce7d454 100644 --- a/samples/openclapp/openclapp_kernels.cl +++ b/samples/openclapp/openclapp_kernels.cl @@ -1,6 +1,6 @@ // This file is part of BOINC. // http://boinc.berkeley.edu -// Copyright (C) 2008 University of California +// Copyright (C) 2013 University of California // // BOINC is free software; you can redistribute it and/or modify it // under the terms of the GNU Lesser General Public License @@ -21,6 +21,7 @@ // Contributor: Tuan Le (tuanle86@berkeley.edu) __kernel void GEStep1A(__global float * AI, int i, int n2, int lda2) { + //int k = get_group_id(0) * get_local_size(0) + get_local_id(0); int k=get_global_id(0); if (k>i && k < n2 && AI[i*lda2+k]!=0) { float multiplyer = -AI[i*lda2+k]/AI[i*lda2+i];