mirror of https://github.com/BOINC/boinc.git
Create unified OpenCL sample; update to use boinc_get_opencl_ids();fix bugs
This commit is contained in:
parent
52a25f8194
commit
f8c7518c56
|
@ -0,0 +1,4 @@
|
|||
<app_init_data>
|
||||
<gpu_type>ATI</gpu_type>
|
||||
<gpu_device_num>0</gpu_device_num>
|
||||
</app_init_data>
|
|
@ -0,0 +1,4 @@
|
|||
<app_init_data>
|
||||
<gpu_type>intel_gpu</gpu_type>
|
||||
<gpu_device_num>0</gpu_device_num>
|
||||
</app_init_data>
|
|
@ -0,0 +1,4 @@
|
|||
<app_init_data>
|
||||
<gpu_type>NVIDIA</gpu_type>
|
||||
<gpu_device_num>0</gpu_device_num>
|
||||
</app_init_data>
|
|
@ -0,0 +1,44 @@
|
|||
# makefile for openclapp BOINC example appication using AMD GPU on Linux.
|
||||
|
||||
BOINC_DIR = ../..
|
||||
BOINC_API_DIR = $(BOINC_DIR)/api
|
||||
BOINC_LIB_DIR = $(BOINC_DIR)/lib
|
||||
|
||||
CXXFLAGS = -g \
|
||||
-I$(BOINC_DIR) \
|
||||
-I$(BOINC_LIB_DIR) \
|
||||
-I$(BOINC_API_DIR) \
|
||||
-I$(AMDAPPSDKROOT)/include \
|
||||
-L. \
|
||||
-L$(BOINC_LIB_DIR) \
|
||||
-L$(BOINC_API_DIR) \
|
||||
-L$(AMDAPPSDKROOT)/lib/x86 \
|
||||
-L/lib/i386-linux-gnu
|
||||
## The above 2 lines work around an Ubuntu LD_LIBRARY_PATH bug described in:
|
||||
## <https://help.ubuntu.com/community/EnvironmentVariables#File-location_related_variables>
|
||||
|
||||
PROGS = openclapp \
|
||||
|
||||
all: $(PROGS)
|
||||
|
||||
libstdc++.a:
|
||||
ln -s `g++ -print-file-name=libstdc++.a`
|
||||
|
||||
clean:
|
||||
/bin/rm -f $(PROGS) *.o libstdc++.a
|
||||
|
||||
distclean:
|
||||
/bin/rm -f $(PROGS) *.o libstdc++.a
|
||||
|
||||
install: openclapp
|
||||
|
||||
openclapp: openclapp.o boinc_opencl.o libstdc++.a
|
||||
$(CXX) $(CXXFLAGS) -o openclapp openclapp.o boinc_opencl.o \
|
||||
libstdc++.a -lOpenCL -lboinc_api -lboinc -lpthread
|
||||
|
||||
openclapp.o: openclapp.cpp openclapp.hpp
|
||||
$(CXX) $(CXXFLAGS) -c openclapp.cpp
|
||||
|
||||
boinc_opencl.o: $(BOINC_API_DIR)/boinc_opencl.cpp $(BOINC_API_DIR)/boinc_opencl.h
|
||||
$(CXX) $(CXXFLAGS) -c $(BOINC_API_DIR)/boinc_opencl.cpp
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
# makefile for openclapp BOINC example appication using NVIDIA GPU on Linux.
|
||||
|
||||
BOINC_DIR = ../..
|
||||
BOINC_API_DIR = $(BOINC_DIR)/api
|
||||
BOINC_LIB_DIR = $(BOINC_DIR)/lib
|
||||
|
||||
CXXFLAGS = -g \
|
||||
-I$(BOINC_DIR) \
|
||||
-I$(BOINC_LIB_DIR) \
|
||||
-I$(BOINC_API_DIR) \
|
||||
-I/usr/local/cuda/include \
|
||||
-L$(BOINC_LIB_DIR) \
|
||||
-L$(BOINC_API_DIR) \
|
||||
-L. \
|
||||
-L/usr/local/cuda/lib \
|
||||
-L/lib/i386-linux-gnu
|
||||
## The above 2 lines work around an Ubuntu LD_LIBRARY_PATH bug described in:
|
||||
## <https://help.ubuntu.com/community/EnvironmentVariables#File-location_related_variables>
|
||||
|
||||
PROGS = openclapp \
|
||||
|
||||
all: $(PROGS)
|
||||
|
||||
libstdc++.a:
|
||||
ln -s `g++ -print-file-name=libstdc++.a`
|
||||
|
||||
clean:
|
||||
/bin/rm -f $(PROGS) *.o libstdc++.a
|
||||
|
||||
distclean:
|
||||
/bin/rm -f $(PROGS) *.o libstdc++.a
|
||||
|
||||
install: openclapp
|
||||
|
||||
openclapp: openclapp.o boinc_opencl.o libstdc++.a
|
||||
$(CXX) $(CXXFLAGS) -o openclapp openclapp.o boinc_opencl.o \
|
||||
libstdc++.a -lOpenCL -lboinc_api -lboinc -lpthread
|
||||
|
||||
openclapp.o: openclapp.cpp openclapp.hpp
|
||||
$(CXX) $(CXXFLAGS) -c openclapp.cpp
|
||||
|
||||
boinc_opencl.o: $(BOINC_API_DIR)/boinc_opencl.cpp $(BOINC_API_DIR)/boinc_opencl.h
|
||||
$(CXX) $(CXXFLAGS) -c $(BOINC_API_DIR)/boinc_opencl.cpp
|
||||
|
|
@ -1,9 +1,10 @@
|
|||
# makefile for atiopencl BOINC example appication on Mac OS X 10.7
|
||||
# makefile for openclapp BOINC example appication on Mac OS X 10.7
|
||||
# To build:
|
||||
# cd to the boinc/samples/atiopencl directory
|
||||
# cd to the boinc/samples/openclapp directory
|
||||
# make -f Makefile_mac [clean] all
|
||||
#
|
||||
|
||||
|
||||
BOINC_DIR = ../..
|
||||
BOINC_API_DIR = $(BOINC_DIR)/api
|
||||
BOINC_LIB_DIR = $(BOINC_DIR)/lib
|
||||
|
@ -14,10 +15,9 @@ CXXFLAGS = -g \
|
|||
-I$(BOINC_DIR) \
|
||||
-I$(BOINC_LIB_DIR) \
|
||||
-I$(BOINC_API_DIR) \
|
||||
-I$(BOINC_MAC_CONFIG_DIR) \
|
||||
-L.
|
||||
-I$(BOINC_MAC_CONFIG_DIR)
|
||||
|
||||
PROGS = atiopencl \
|
||||
PROGS = openclapp \
|
||||
|
||||
all: $(PROGS)
|
||||
|
||||
|
@ -27,17 +27,17 @@ clean:
|
|||
distclean:
|
||||
/bin/rm -f $(PROGS) *.o
|
||||
|
||||
install: atiopencl
|
||||
install: openclapp
|
||||
|
||||
atiopencl: atiopencl.o boinc_opencl.o
|
||||
$(CXX) $(CXXFLAGS) -o atiopencl atiopencl.o \
|
||||
openclapp: openclapp.o boinc_opencl.o
|
||||
$(CXX) $(CXXFLAGS) -o openclapp openclapp.o \
|
||||
boinc_opencl.o \
|
||||
-framework OpenCL \
|
||||
-lboinc_api -L$(BOINC_BUILD_DIR) \
|
||||
-lboinc -L$(BOINC_BUILD_DIR)
|
||||
|
||||
atiopencl.o: atiopencl.cpp atiopencl.hpp
|
||||
$(CXX) $(CXXFLAGS) -c atiopencl.cpp
|
||||
openclapp.o: openclapp.cpp openclapp.hpp
|
||||
$(CXX) $(CXXFLAGS) -c openclapp.cpp
|
||||
|
||||
boinc_opencl.o: $(BOINC_API_DIR)/boinc_opencl.cpp $(BOINC_API_DIR)/boinc_opencl.h
|
||||
$(CXX) $(CXXFLAGS) -c $(BOINC_API_DIR)/boinc_opencl.cpp
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
Windows projects.
|
||||
|
||||
To build for Mac:
|
||||
make -f Makefile_mac
|
||||
|
||||
To build for Linux (assuming you have installed the appropriate GPU computing SDK):
|
||||
For ATI/AMD:
|
||||
make -f Makefile_AMD
|
||||
|
||||
For NVIDIA:
|
||||
make -f Makefile_NVIDIA
|
||||
|
||||
For intel Ivy Bridge: modify one of the above make files for the appropriate paths to the OpenCL headers and libraries.
|
||||
|
||||
Adjust the -I and -L arguments for Linux if the OpenCL headers and libraries are in non-standard locations.
|
||||
|
||||
To run:
|
||||
This same sample is designed to run with AMD, NVIDIA and Intel Ivy Bridge GPUs. It is supplied with 3 minimal init_data.xml files, one for each of these 3 vendors (GPU "types".) Copy the appropriate init_data.xml file into the directory containing the openclapp executable. Then run from the Terminal:
|
||||
$ cd to/the/directory/containing/executable/and/init_data.xml/file
|
||||
$ ./openclapp [options]
|
||||
|
||||
command line options
|
||||
-run_slow: sleep 1 second after each character
|
||||
-cpu_time N: use about N CPU seconds after copying files
|
||||
-early_exit: exit(10) after 30 iterations
|
||||
-early_crash: crash after 30 iterations
|
||||
-early_sleep: go into infinite sleep after 30 iterations
|
||||
|
||||
==============================================================
|
||||
|
||||
Important notes about the sample code:
|
||||
|
||||
Since a computer can have multiple GPUs, the application must use the GPU assigned by the BOINC client. To do this, it must call the following API:
|
||||
int boinc_get_opencl_ids(
|
||||
int argc, char** argv, int type,
|
||||
cl_device_id* device, cl_platform_id* platform
|
||||
);
|
||||
|
||||
The arguments are as follows:
|
||||
argc, argv: the argv and argc received by the application's main() from the BOINC client.
|
||||
type: may be PROC_TYPE_NVIDIA_GPU, PROC_TYPE_AMD_GPU or PROC_TYPE_INTEL_GPU.
|
||||
device: a pointer to the variable to receive the cl_device_id of the desired GPU.
|
||||
platform: a pointer to the variable to receive the cl_platform_id of the desired GPU.
|
||||
|
||||
Currently, BOINC expects projects to provide separate production applications for each GPU vendor (GPU type), with a separate "plan class" for each. BOINC currently supports GPUs from the three major vendors: AMD (ATI). NVIDIA or Intel (Ivy Bridge or later). BOINC refers to the vendors as gpu "types."
|
||||
|
||||
Because older clients do not write the <gpu_type> field into the init_data.xml file, your application must pass the appropriate GPU type as the third argument in the boinc_get_opencl_ids() call, or it will not be compatible with older clients.
|
||||
|
||||
However, to avoid redundancy, this one sample is designed to work with OpenCl-capable GPUs from any of the three vendors. To accomplish this, it does not pass a valid type in the boinc_get_opencl_ids() call; it requires init_data.xml file to have a valid <gpu_type> field, and so would not be compatible with older clients. This shortcut is not acceptable for production OpenCL applications; you _must_ pass in a type of either PROC_TYPE_NVIDIA_GPU, PROC_TYPE_AMD_GPU or PROC_TYPE_INTEL_GPU.
|
||||
|
||||
==============================================================
|
||||
|
||||
What is the difference between a GPU's gpu_device_num and its gpu_opencl_dev_index?
|
||||
|
||||
In most cases, they are identical. But on Macs which have CUDA installed, Mac OpenCL does not always recognize all NVIDIA GPUs recognized by CUDA. In that case, the gpu_device_num is the device's position among all the CUDA-capable GPUs, and the gpu_opencl_dev_index is the device's position among all the OpenCL-capable GPUs.
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
// This file is part of BOINC.
|
||||
// http://boinc.berkeley.edu
|
||||
// Copyright (C) 2008 University of California
|
||||
// Copyright (C) 2013 University of California
|
||||
//
|
||||
// BOINC is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU Lesser General Public License
|
||||
|
@ -16,23 +16,30 @@
|
|||
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
// This program serves as both
|
||||
// - An example BOINC-ATIOpenCL application, illustrating the use of the BOINC API
|
||||
// and ATIStream OpenCL API.
|
||||
// - An example BOINC-OpenCL application, illustrating the use of the BOINC API
|
||||
// and OpenCL API.
|
||||
// - A program for testing various features of BOINC.
|
||||
//
|
||||
// The program reads the input nxn matrix from the "input" file, inverts the
|
||||
// matrix NUM_ITERATIONS times and write to "output" file.
|
||||
//
|
||||
// To run, place the executable in the same directory as an init_data.xml
|
||||
// file specifying the gpu_type (vendor) and gpu_device_num, then invoke
|
||||
// form the command line as follows:
|
||||
// $ cd to/the/directory/containing/executable/and/init_data.mxl/file
|
||||
// $ ./openclapp [options]
|
||||
//
|
||||
// command line options
|
||||
// -run_slow: sleep 1 second after each character
|
||||
// -cpu_time N: use about N CPU seconds after copying files
|
||||
// -early_exit: exit(10) after 30 chars
|
||||
// -early_crash: crash after 30 chars
|
||||
// -early_exit: exit(10) after 30 iterations
|
||||
// -early_crash: crash after 30 iterations
|
||||
// -early_sleep: go into infinite sleep after 30 iterations
|
||||
//
|
||||
// See http://boinc.berkeley.edu/trac/wiki/GPUApp for any compiling issues.
|
||||
// Contributor: Tuan Le (tuanle86@berkeley.edu)
|
||||
// Original contributor: Tuan Le (tuanle86@berkeley.edu)
|
||||
|
||||
#include "atiopencl.hpp"
|
||||
#include "openclapp.hpp"
|
||||
#include "boinc_opencl.h"
|
||||
|
||||
using std::string;
|
||||
|
@ -46,7 +53,6 @@ int main(int argc, char * argv[]) {
|
|||
|
||||
generate_random_input_file(MATRIX_SIZE); //call this if you don't want to
|
||||
//construct the input file manually
|
||||
|
||||
for (i=0; i<argc; i++) {
|
||||
if (!strcmp(argv[i], "-early_exit")) early_exit = true;
|
||||
if (!strcmp(argv[i], "-early_crash")) early_crash = true;
|
||||
|
@ -61,7 +67,8 @@ int main(int argc, char * argv[]) {
|
|||
if (retval) {
|
||||
fprintf(stderr,
|
||||
"ERROR: %s boinc_init returned %d\n",
|
||||
boinc_msg_prefix(buf, sizeof(buf)), retval );
|
||||
boinc_msg_prefix(buf, sizeof(buf)), retval
|
||||
);
|
||||
exit(retval);
|
||||
}
|
||||
|
||||
|
@ -122,7 +129,7 @@ int main(int argc, char * argv[]) {
|
|||
shmem = (UC_SHMEM*)boinc_graphics_make_shmem("matrix_inversion", sizeof(UC_SHMEM));
|
||||
if (!shmem) {
|
||||
fprintf(stderr,
|
||||
"%s failed to create shared mem segment\n",
|
||||
"ERROR: %s failed to create shared mem segment\n",
|
||||
boinc_msg_prefix(buf, sizeof(buf))
|
||||
);
|
||||
}
|
||||
|
@ -150,7 +157,7 @@ int main(int argc, char * argv[]) {
|
|||
}
|
||||
|
||||
// Initialize OpenCL resources
|
||||
if (initialize_cl()==1) {
|
||||
if (initialize_cl(argc, argv) != 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -213,7 +220,7 @@ int main(int argc, char * argv[]) {
|
|||
|
||||
// Releases OpenCL resources
|
||||
if (cleanup_cl()==1) {
|
||||
printf("Error from cleanup_cl() !");
|
||||
fprintf(stderr, "Error from cleanup_cl() !");
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -432,15 +439,15 @@ char * convert_to_string(const char *fileName) {
|
|||
char c;
|
||||
int i=0;
|
||||
|
||||
// look for "atiopencl_kernels.cl" in "boinc/samples/atiopencl/debug" or
|
||||
// in "boinc/samples/atiopencl/release". Note that "atiopencl_kernels.cl"
|
||||
// look for "openclapp_kernels.cl" in "boinc/samples/openclapp/debug" or
|
||||
// in "boinc/samples/openclapp/release". Note that "openclapp_kernels.cl"
|
||||
// is automatically copied to these directories along the building process.
|
||||
FILE *infile=fopen(fileName,"r");
|
||||
if (!infile) { //not found. This typically happens on Linux or Mac.
|
||||
//look for "atiopencl_kernels.cl" in "boinc/sample/atiopencl/" instead.
|
||||
//look for "openclapp_kernels.cl" in "boinc/sample/openclapp/" instead.
|
||||
infile = fopen(KERNELS_FILEPATH,"r");
|
||||
if (!infile) {
|
||||
fprintf(stderr, "File open Error!");
|
||||
fprintf(stderr, "ERROR: Failed to open file %s!", fileName);
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
@ -465,131 +472,33 @@ char * convert_to_string(const char *fileName) {
|
|||
|
||||
// Note: OpenCL memory buffer objects will be created in invert
|
||||
// function before kernel calls are made.
|
||||
int initialize_cl(void) {
|
||||
int initialize_cl(int argc, char * argv[]) {
|
||||
cl_int status = 0;
|
||||
size_t deviceListSize;
|
||||
bool standalone = false;
|
||||
int retval;
|
||||
|
||||
devices = NULL;
|
||||
int retval = 0;
|
||||
|
||||
localThreads[0] = LOCAL_WORK_SIZE;
|
||||
globalThreads[0] = GLOBAL_WORK_SIZE;
|
||||
cl_platform_id platform = NULL;
|
||||
cl_device_id device;
|
||||
|
||||
if (boinc_is_standalone()) {
|
||||
/*
|
||||
* Have a look at the available platforms and pick either
|
||||
* the AMD one if available or a reasonable default.
|
||||
*/
|
||||
|
||||
cl_uint numPlatforms;
|
||||
status = clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
if(status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: Getting Platforms. (clGetPlatformsIDs) returned %d\n",
|
||||
status
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (numPlatforms > 0) {
|
||||
cl_platform_id* platforms = (cl_platform_id *)
|
||||
malloc(sizeof(cl_platform_id)*numPlatforms);
|
||||
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: Getting Platform Ids. (clGetPlatformsIDs) returned %d\n",
|
||||
status
|
||||
);
|
||||
|
||||
return 1;
|
||||
}
|
||||
for (unsigned int i=0; i < numPlatforms; ++i) {
|
||||
char pbuff[100];
|
||||
status = clGetPlatformInfo(platforms[i],
|
||||
CL_PLATFORM_VENDOR,
|
||||
sizeof(pbuff),
|
||||
pbuff,
|
||||
NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: Getting Platform Info.(clGetPlatformInfo)returned %d\n",
|
||||
status
|
||||
);
|
||||
|
||||
return 1;
|
||||
}
|
||||
platform = platforms[i];
|
||||
if (!strcmp(pbuff, "Advanced Micro Devices, Inc.")) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
delete platforms;
|
||||
}
|
||||
|
||||
if(NULL == platform) {
|
||||
fprintf(stderr, "ERROR: NULL platform found so Exiting Application.");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we could find our platform, use it. Otherwise use just available platform.
|
||||
*/
|
||||
cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM,
|
||||
(cl_context_properties)platform,
|
||||
0
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Create an OpenCL context
|
||||
/////////////////////////////////////////////////////////////////
|
||||
context = clCreateContextFromType(cps, CL_DEVICE_TYPE_ALL, NULL, NULL, &status);
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: Creating Context. (clCreateContextFromType) returned %d\n",
|
||||
status
|
||||
);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* First, get the size of device list data */
|
||||
status = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceListSize);
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: Getting Context Info (device list size, clGetContextInfo)returned %d\n",
|
||||
status
|
||||
);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Detect OpenCL devices
|
||||
/////////////////////////////////////////////////////////////////
|
||||
devices = (cl_device_id *)malloc(deviceListSize);
|
||||
if (devices == 0) {
|
||||
fprintf(stderr, "Error: No devices found.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Now, get the device list data */
|
||||
status = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceListSize, devices, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: Getting Context Info (device list, clGetContextInfo) returned %d\n",
|
||||
status
|
||||
);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
device = devices[0];
|
||||
|
||||
} else { // NOT stand_alone
|
||||
retval = boinc_get_opencl_ids(&device, &platform);
|
||||
// IMPORTANT NOTE: production applications should always specify
|
||||
// the GPU type (vendor) in the call to boinc_get_opencl_ids as
|
||||
// the third argument: it must be either PROC_TYPE_NVIDIA_GPU,
|
||||
// PROC_TYPE_AMD_GPU or PROC_TYPE_INTEL_GPU. This is to support
|
||||
// older versions of the BOINC client which do not include the
|
||||
// <gpu-type> field in the init_data.xml file.
|
||||
//
|
||||
// This sample passes -1 for the type argument to allow using
|
||||
// just one sample for any GPU vendor (AMD, NVIDIA or Intel.)
|
||||
// As a result, the init_data.xml file for this sample must
|
||||
// specify the GPU type (vendor) and either gpu_device_num (the
|
||||
// GPU's index from that vendor) or gpu_opencl_dev_index (the
|
||||
// GPU's index among OpenCL-capable devices from that vendor.)
|
||||
//
|
||||
// See the ReadMe file for more details, including an explanation
|
||||
// of the difference between the gpu_device_num and the
|
||||
// gpu_opencl_dev_index.
|
||||
retval = boinc_get_opencl_ids(argc, argv, -1, &device, &platform);
|
||||
if (retval) {
|
||||
fprintf(stderr,
|
||||
"Error: boinc_get_opencl_ids() failed with error %d\n",
|
||||
|
@ -607,7 +516,6 @@ int initialize_cl(void) {
|
|||
fprintf(stderr, "Error: clCreateContext() returned %d\n", status);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Create an OpenCL command queue
|
||||
|
@ -618,7 +526,6 @@ int initialize_cl(void) {
|
|||
"Error: Creating Command Queue. (clCreateCommandQueue) returned %d\n",
|
||||
status
|
||||
);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -638,7 +545,7 @@ int initialize_cl(void) {
|
|||
}
|
||||
|
||||
/* create a cl program executable for all the devices specified */
|
||||
status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);
|
||||
status = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: Building Program (clBuildProgram) returned %d\n",
|
||||
|
@ -724,15 +631,6 @@ int cleanup_cl(void) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
status = clReleaseMemObject(inputBuffer);
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: In clReleaseMemObject (inputBuffer) returned %d\n",
|
||||
status
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
|
||||
status = clReleaseCommandQueue(commandQueue);
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
|
@ -768,11 +666,6 @@ void cleanup_host(void) {
|
|||
output = NULL;
|
||||
}
|
||||
|
||||
if (devices != NULL) {
|
||||
free(devices);
|
||||
devices = NULL;
|
||||
}
|
||||
|
||||
if (source != NULL) {
|
||||
free((char *)source);
|
||||
source = NULL;
|
||||
|
@ -806,7 +699,6 @@ void print_to_file(MFILE *out, float *h_odata, int n) {
|
|||
*/
|
||||
int run_GEStep1A_kernel(cl_float * AI, int i, int n2, int lda2) {
|
||||
cl_int status;
|
||||
cl_event events[2];
|
||||
|
||||
/*
|
||||
* the input array to the kernel. This array will eventually be modified
|
||||
|
@ -862,7 +754,8 @@ int run_GEStep1A_kernel(cl_float * AI, int i, int n2, int lda2) {
|
|||
localThreads,
|
||||
0,
|
||||
NULL,
|
||||
&events[0]);
|
||||
NULL);
|
||||
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel) returned %d\n",
|
||||
|
@ -871,35 +764,18 @@ int run_GEStep1A_kernel(cl_float * AI, int i, int n2, int lda2) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
/* wait for the kernel call to finish execution */
|
||||
status = clWaitForEvents(1, &events[0]);
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: Waiting for kernel run to finish. (clWaitForEvents) returned %d\n",
|
||||
status
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
|
||||
status = clReleaseEvent(events[0]);
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: Release event object. (clReleaseEvent) returned %d\n",
|
||||
status
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
clFinish(commandQueue);
|
||||
|
||||
/* Enqueue readBuffer*/ //Note: we are reading back from inputBuffer since AI is modified directly in kernel
|
||||
status = clEnqueueReadBuffer(commandQueue,
|
||||
inputBuffer,
|
||||
CL_TRUE,
|
||||
CL_FALSE,
|
||||
0,
|
||||
globalThreads[0] * sizeof(cl_float),
|
||||
AI,
|
||||
0,
|
||||
NULL,
|
||||
&events[1]);
|
||||
NULL);
|
||||
|
||||
if(status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
|
@ -909,30 +785,11 @@ int run_GEStep1A_kernel(cl_float * AI, int i, int n2, int lda2) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
/* Wait for the read buffer to finish execution */
|
||||
status = clWaitForEvents(1, &events[1]);
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: Waiting for read buffer call to finish. (clWaitForEvents) returned %d\n",
|
||||
status
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
|
||||
status = clReleaseEvent(events[1]);
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: Release event object. (clReleaseEvent) returned %d\n",
|
||||
status
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int run_GEStep2_kernel(cl_float * AI, cl_float diag, int i, int n2, int lda2) {
|
||||
cl_int status;
|
||||
cl_event events[2];
|
||||
|
||||
/*
|
||||
* the input array to the kernel. This array will eventually be modified
|
||||
|
@ -998,7 +855,8 @@ int run_GEStep2_kernel(cl_float * AI, cl_float diag, int i, int n2, int lda2) {
|
|||
localThreads,
|
||||
0,
|
||||
NULL,
|
||||
&events[0]);
|
||||
NULL);
|
||||
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel) returned %d\n",
|
||||
|
@ -1007,65 +865,29 @@ int run_GEStep2_kernel(cl_float * AI, cl_float diag, int i, int n2, int lda2) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
/* wait for the kernel call to finish execution */
|
||||
status = clWaitForEvents(1, &events[0]);
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: Waiting for kernel run to finish. (clWaitForEvents) returned %d\n",
|
||||
status
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
|
||||
status = clReleaseEvent(events[0]);
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: Release event object. (clReleaseEvent) returned %d\n",
|
||||
status
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
clFinish(commandQueue);
|
||||
|
||||
/* Enqueue readBuffer*/
|
||||
//Note: we are reading back from inputBuffer since AI is modified directly in kernel
|
||||
status = clEnqueueReadBuffer(commandQueue,
|
||||
inputBuffer,
|
||||
CL_TRUE,
|
||||
CL_FALSE,
|
||||
0,
|
||||
globalThreads[0] * sizeof(cl_float),
|
||||
AI,
|
||||
0,
|
||||
NULL,
|
||||
&events[1]);
|
||||
NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer) returned %d\n", status);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Wait for the read buffer to finish execution */
|
||||
status = clWaitForEvents(1, &events[1]);
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: Waiting for read buffer call to finish. (clWaitForEvents) returned %d\n",
|
||||
status
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
|
||||
status = clReleaseEvent(events[1]);
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: Release event object. (clReleaseEvent) returned %d\n",
|
||||
status
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int run_GEStep3_kernel(cl_float * AI, int i, int n2, int lda2) {
|
||||
cl_int status;
|
||||
cl_event events[2];
|
||||
|
||||
/*
|
||||
* The input array to the kernel. This array will eventually be modified
|
||||
|
@ -1121,7 +943,8 @@ int run_GEStep3_kernel(cl_float * AI, int i, int n2, int lda2) {
|
|||
localThreads,
|
||||
0,
|
||||
NULL,
|
||||
&events[0]);
|
||||
NULL);
|
||||
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel) returned %d\n",
|
||||
|
@ -1130,24 +953,7 @@ int run_GEStep3_kernel(cl_float * AI, int i, int n2, int lda2) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
/* wait for the kernel call to finish execution */
|
||||
status = clWaitForEvents(1, &events[0]);
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: Waiting for kernel run to finish. (clWaitForEvents) returned %d\n",
|
||||
status
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
|
||||
status = clReleaseEvent(events[0]);
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: Release event object. (clReleaseEvent) returned %d\n",
|
||||
status
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
clFinish(commandQueue);
|
||||
|
||||
/* Enqueue readBuffer*/
|
||||
//Note: we are reading back from inputBuffer since AI is modified directly in kernel
|
||||
|
@ -1159,7 +965,7 @@ int run_GEStep3_kernel(cl_float * AI, int i, int n2, int lda2) {
|
|||
AI,
|
||||
0,
|
||||
NULL,
|
||||
&events[1]);
|
||||
NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer) returned %d\n",
|
||||
|
@ -1168,25 +974,6 @@ int run_GEStep3_kernel(cl_float * AI, int i, int n2, int lda2) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
/* Wait for the read buffer to finish execution */
|
||||
status = clWaitForEvents(1, &events[1]);
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: Waiting for read buffer call to finish. (clWaitForEvents) returned %d\n",
|
||||
status
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
|
||||
status = clReleaseEvent(events[1]);
|
||||
if(status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: Release event object. (clReleaseEvent) returned %d\n",
|
||||
status
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1213,7 +1000,7 @@ void invert(cl_float * input, cl_float *output, int n) {
|
|||
volatile clock_t gputime;
|
||||
gputime=clock();
|
||||
|
||||
int lda = ((n+15)&~15|16);
|
||||
int lda = (((n+15)&(~15))|16);
|
||||
cl_float * AI_d = (cl_float *)malloc(sizeof(cl_float)*n*lda*2);
|
||||
memset(AI_d,0,sizeof(cl_float)*n*lda*2);
|
||||
for (int i = 0; i < n; i++) {
|
||||
|
@ -1238,11 +1025,12 @@ void invert(cl_float * input, cl_float *output, int n) {
|
|||
);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
// Note: there's no output buffer. In kernel, AI_d is modified directly.
|
||||
// Thus, we should read the result back to host from inputBuffer as well.
|
||||
|
||||
invertge(AI_d, lda, n);
|
||||
gputime=clock()-gputime;fprintf(stderr, " %7.1f ms ",gputime/1.e3f);
|
||||
gputime=clock()-gputime;
|
||||
fprintf(stderr, " %7.1f ms ",gputime/1.e3f);
|
||||
fprintf(stderr, " %7.2f Gflops", 1e-3*(3.0)*n*n*n/3.0/gputime);
|
||||
|
||||
#ifdef VERIFY
|
||||
|
@ -1270,6 +1058,15 @@ void invert(cl_float * input, cl_float *output, int n) {
|
|||
for (int i = 0; i < n; i++) {
|
||||
memcpy(&output[n*i], &AI_d[lda*i*2+n], sizeof(cl_float)*n);
|
||||
}
|
||||
|
||||
status = clReleaseMemObject(inputBuffer);
|
||||
if (status != CL_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"Error: In clReleaseMemObject (inputBuffer) returned %d\n",
|
||||
status
|
||||
);
|
||||
}
|
||||
|
||||
free(AI_d);
|
||||
fprintf(stderr," done!\n");
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// This file is part of BOINC.
|
||||
// http://boinc.berkeley.edu
|
||||
// Copyright (C) 2008 University of California
|
||||
// Copyright (C) 2013 University of California
|
||||
//
|
||||
// BOINC is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU Lesser General Public License
|
||||
|
@ -16,10 +16,10 @@
|
|||
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
// See http://boinc.berkeley.edu/trac/wiki/GPUApp for any compiling issues.
|
||||
// Contributor: Tuan Le (tuanle86@berkeley.edu)
|
||||
// Original contributor: Tuan Le (tuanle86@berkeley.edu)
|
||||
|
||||
#ifndef ATIOPENCL_H_
|
||||
#define ATIOPENCL_H_
|
||||
#ifndef OPENCLAPP_H_
|
||||
#define OPENCLAPP_H_
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/opencl.h>
|
||||
|
@ -37,8 +37,8 @@
|
|||
|
||||
#define INPUT_FILENAME "input"
|
||||
#define OUTPUT_FILENAME "output"
|
||||
#define KERNELS_FILENAME "atiopencl_kernels.cl"
|
||||
#define KERNELS_FILEPATH "../../atiopencl_kernels.cl" // for Linux and Mac
|
||||
#define KERNELS_FILENAME "openclapp_kernels.cl"
|
||||
#define KERNELS_FILEPATH "../../openclapp_kernels.cl" // for Linux and Mac
|
||||
#define CHECKPOINT_FILE "matrix_inversion_state"
|
||||
|
||||
#define LOCAL_WORK_SIZE 1
|
||||
|
@ -115,7 +115,6 @@ cl_uint height;
|
|||
cl_mem inputBuffer; //in this sample app, we will read the result
|
||||
//from the device back to host from inputBuffer as well.
|
||||
cl_context context;
|
||||
cl_device_id *devices;
|
||||
cl_command_queue commandQueue;
|
||||
|
||||
cl_program program;
|
||||
|
@ -185,7 +184,7 @@ void update_shmem() {
|
|||
* Calls are made to set up OpenCL memory buffers that this program uses
|
||||
* and to load the programs into memory and get kernel handles.
|
||||
*/
|
||||
int initialize_cl(void);
|
||||
int initialize_cl(int argc, char * argv[]);
|
||||
|
||||
int initialize_host(FILE *infile);
|
||||
|
||||
|
@ -226,4 +225,4 @@ void invertge(cl_float * AI_d,
|
|||
int lda,
|
||||
int n);
|
||||
|
||||
#endif /* #ifndef ATIOPENCL_H_ */
|
||||
#endif /* #ifndef OPENCLAPP_H_ */
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// This file is part of BOINC.
|
||||
// http://boinc.berkeley.edu
|
||||
// Copyright (C) 2008 University of California
|
||||
// Copyright (C) 2013 University of California
|
||||
//
|
||||
// BOINC is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU Lesser General Public License
|
||||
|
@ -21,6 +21,7 @@
|
|||
// Contributor: Tuan Le (tuanle86@berkeley.edu)
|
||||
|
||||
__kernel void GEStep1A(__global float * AI, int i, int n2, int lda2) {
|
||||
//int k = get_group_id(0) * get_local_size(0) + get_local_id(0);
|
||||
int k=get_global_id(0);
|
||||
if (k>i && k < n2 && AI[i*lda2+k]!=0) {
|
||||
float multiplyer = -AI[i*lda2+k]/AI[i*lda2+i];
|
||||
|
|
Loading…
Reference in New Issue