diff --git a/samples/nvopencl/nvopencl.cpp b/samples/nvopencl/nvopencl.cpp index 0ae00c167d..9ce7bd7dec 100644 --- a/samples/nvopencl/nvopencl.cpp +++ b/samples/nvopencl/nvopencl.cpp @@ -458,8 +458,7 @@ int initialize_cl(void) { size_t deviceListSize; localThreads[0] = LOCAL_WORK_SIZE; - // rounded up to the nearest multiple of the LocalWorkSize - globalThreads[0] = shrRoundUp((int)(localThreads[0]),width*height); + globalThreads[0] = shrRoundUp(GLOBAL_WORK_SIZE,width*height); /* * Have a look at the available platforms and pick either diff --git a/samples/nvopencl/nvopencl.hpp b/samples/nvopencl/nvopencl.hpp index 031613077b..2390a4731e 100644 --- a/samples/nvopencl/nvopencl.hpp +++ b/samples/nvopencl/nvopencl.hpp @@ -37,7 +37,8 @@ #define KERNELS_FILEPATH "../../nvopencl_kernels.cl" // for Linux and Mac #define CHECKPOINT_FILE "matrix_inversion_state" -#define LOCAL_WORK_SIZE 400 +#define LOCAL_WORK_SIZE 1 +#define GLOBAL_WORK_SIZE 400 #define MATRIX_SIZE 10 #define NUM_ITERATIONS 501 // execute the kernel NUM_ITERATIONS times