hello.c
| // | 
| // File: hello.c | 
| // | 
| // Abstract: A simple "Hello World" compute example showing basic usage of OpenCL which | 
| // calculates the mathematical square (X[i] = pow(X[i],2)) for a buffer of | 
| // floating point values. | 
| // | 
| // | 
| // Version: <1.0> | 
| // | 
| // Disclaimer: IMPORTANT:  This Apple software is supplied to you by Apple Inc. ("Apple") | 
| // in consideration of your agreement to the following terms, and your use, | 
| // installation, modification or redistribution of this Apple software | 
| // constitutes acceptance of these terms. If you do not agree with these | 
| // terms, please do not use, install, modify or redistribute this Apple | 
| // software. | 
| // | 
| // In consideration of your agreement to abide by the following terms, and | 
| // subject to these terms, Apple grants you a personal, non - exclusive | 
| // license, under Apple's copyrights in this original Apple software ( the | 
| // "Apple Software" ), to use, reproduce, modify and redistribute the Apple | 
| // Software, with or without modifications, in source and / or binary forms; | 
| // provided that if you redistribute the Apple Software in its entirety and | 
| // without modifications, you must retain this notice and the following text | 
| // and disclaimers in all such redistributions of the Apple Software. Neither | 
| // the name, trademarks, service marks or logos of Apple Inc. may be used to | 
| // endorse or promote products derived from the Apple Software without specific | 
| // prior written permission from Apple. Except as expressly stated in this | 
| // notice, no other rights or licenses, express or implied, are granted by | 
| // Apple herein, including but not limited to any patent rights that may be | 
| // infringed by your derivative works or by other works in which the Apple | 
| // Software may be incorporated. | 
| // | 
| // The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO | 
| // WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED | 
| // WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A | 
| // PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION | 
| // ALONE OR IN COMBINATION WITH YOUR PRODUCTS. | 
| // | 
| // IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR | 
| // CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 
| // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 
| // INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION | 
| // AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER | 
| // UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR | 
| // OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
| // | 
| // Copyright ( C ) 2008 Apple Inc. All Rights Reserved. | 
| // | 
| //////////////////////////////////////////////////////////////////////////////// | 
| #include <fcntl.h> | 
| #include <stdio.h> | 
| #include <stdlib.h> | 
| #include <string.h> | 
| #include <math.h> | 
| #include <unistd.h> | 
| #include <sys/types.h> | 
| #include <sys/stat.h> | 
| #include <OpenCL/opencl.h> | 
| //////////////////////////////////////////////////////////////////////////////// | 
| // Use a static data size for simplicity | 
| // | 
| #define DATA_SIZE (1024) | 
| //////////////////////////////////////////////////////////////////////////////// | 
| // Simple compute kernel which computes the square of an input array | 
| // | 
| const char *KernelSource = "\n" \ | 
| "__kernel void square( \n" \ | 
| " __global float* input, \n" \ | 
| " __global float* output, \n" \ | 
| " const unsigned int count) \n" \ | 
| "{                                                                      \n" \ | 
| " int i = get_global_id(0); \n" \ | 
| " if(i < count) \n" \ | 
| " output[i] = input[i] * input[i]; \n" \ | 
| "} \n" \ | 
| "\n"; | 
| //////////////////////////////////////////////////////////////////////////////// | 
| int main(int argc, char** argv) | 
| { | 
| int err; // error code returned from api calls | 
| float data[DATA_SIZE]; // original data set given to device | 
| float results[DATA_SIZE]; // results returned from device | 
| unsigned int correct; // number of correct results returned | 
| size_t global; // global domain size for our calculation | 
| size_t local; // local domain size for our calculation | 
| cl_device_id device_id; // compute device id | 
| cl_context context; // compute context | 
| cl_command_queue commands; // compute command queue | 
| cl_program program; // compute program | 
| cl_kernel kernel; // compute kernel | 
| cl_mem input; // device memory used for the input array | 
| cl_mem output; // device memory used for the output array | 
| // Fill our data set with random float values | 
| // | 
| int i = 0; | 
| unsigned int count = DATA_SIZE; | 
| for(i = 0; i < count; i++) | 
| data[i] = rand() / (float)RAND_MAX; | 
| // Connect to a compute device | 
| // | 
| int gpu = 1; | 
| err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL); | 
| if (err != CL_SUCCESS) | 
|     { | 
|         printf("Error: Failed to create a device group!\n"); | 
| return EXIT_FAILURE; | 
| } | 
| // Create a compute context | 
| // | 
| context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); | 
| if (!context) | 
|     { | 
|         printf("Error: Failed to create a compute context!\n"); | 
| return EXIT_FAILURE; | 
| } | 
| // Create a command commands | 
| // | 
| commands = clCreateCommandQueue(context, device_id, 0, &err); | 
| if (!commands) | 
|     { | 
|         printf("Error: Failed to create a command commands!\n"); | 
| return EXIT_FAILURE; | 
| } | 
| // Create the compute program from the source buffer | 
| // | 
| program = clCreateProgramWithSource(context, 1, (const char **) & KernelSource, NULL, &err); | 
| if (!program) | 
|     { | 
|         printf("Error: Failed to create compute program!\n"); | 
| return EXIT_FAILURE; | 
| } | 
| // Build the program executable | 
| // | 
| err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); | 
| if (err != CL_SUCCESS) | 
|     { | 
| size_t len; | 
| char buffer[2048]; | 
|         printf("Error: Failed to build program executable!\n"); | 
| clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); | 
|         printf("%s\n", buffer); | 
| exit(1); | 
| } | 
| // Create the compute kernel in the program we wish to run | 
| // | 
| kernel = clCreateKernel(program, "square", &err); | 
| if (!kernel || err != CL_SUCCESS) | 
|     { | 
|         printf("Error: Failed to create compute kernel!\n"); | 
| exit(1); | 
| } | 
| // Create the input and output arrays in device memory for our calculation | 
| // | 
| input = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * count, NULL, NULL); | 
| output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * count, NULL, NULL); | 
| if (!input || !output) | 
|     { | 
|         printf("Error: Failed to allocate device memory!\n"); | 
| exit(1); | 
| } | 
| // Write our data set into the input array in device memory | 
| // | 
| err = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, sizeof(float) * count, data, 0, NULL, NULL); | 
| if (err != CL_SUCCESS) | 
|     { | 
|         printf("Error: Failed to write to source array!\n"); | 
| exit(1); | 
| } | 
| // Set the arguments to our compute kernel | 
| // | 
| err = 0; | 
| err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input); | 
| err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output); | 
| err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count); | 
| if (err != CL_SUCCESS) | 
|     { | 
|         printf("Error: Failed to set kernel arguments! %d\n", err); | 
| exit(1); | 
| } | 
| // Get the maximum work group size for executing the kernel on the device | 
| // | 
| err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL); | 
| if (err != CL_SUCCESS) | 
|     { | 
|         printf("Error: Failed to retrieve kernel work group info! %d\n", err); | 
| exit(1); | 
| } | 
| // Execute the kernel over the entire range of our 1d input data set | 
| // using the maximum number of work group items for this device | 
| // | 
| global = count; | 
| err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL); | 
| if (err) | 
|     { | 
|         printf("Error: Failed to execute kernel!\n"); | 
| return EXIT_FAILURE; | 
| } | 
| // Wait for the command commands to get serviced before reading back results | 
| // | 
| clFinish(commands); | 
| // Read back the results from the device to verify the output | 
| // | 
| err = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(float) * count, results, 0, NULL, NULL ); | 
| if (err != CL_SUCCESS) | 
|     { | 
|         printf("Error: Failed to read output array! %d\n", err); | 
| exit(1); | 
| } | 
| // Validate our results | 
| // | 
| correct = 0; | 
| for(i = 0; i < count; i++) | 
|     { | 
| if(results[i] == data[i] * data[i]) | 
| correct++; | 
| } | 
| // Print a brief summary detailing the results | 
| // | 
|     printf("Computed '%d/%d' correct values!\n", correct, count); | 
| // Shutdown and cleanup | 
| // | 
| clReleaseMemObject(input); | 
| clReleaseMemObject(output); | 
| clReleaseProgram(program); | 
| clReleaseKernel(kernel); | 
| clReleaseCommandQueue(commands); | 
| clReleaseContext(context); | 
| return 0; | 
| } | 
Copyright © 2011 Apple Inc. All Rights Reserved. Terms of Use | Privacy Policy | Updated: 2011-03-01