Sources/OpenCL/Sources/OpenCLKernel.cpp
//--------------------------------------------------------------------------- |
// |
// File: OpenCLKernel.cpp |
// |
// Abstract: A utility class to manage OpenCL kernels |
// |
// Disclaimer: IMPORTANT: This Apple software is supplied to you by |
// Inc. ("Apple") in consideration of your agreement to the following terms, |
// and your use, installation, modification or redistribution of this Apple |
// software constitutes acceptance of these terms. If you do not agree with |
// these terms, please do not use, install, modify or redistribute this |
// Apple software. |
// |
// In consideration of your agreement to abide by the following terms, and |
// subject to these terms, Apple grants you a personal, non-exclusive |
// license, under Apple's copyrights in this original Apple software (the |
// "Apple Software"), to use, reproduce, modify and redistribute the Apple |
// Software, with or without modifications, in source and/or binary forms; |
// provided that if you redistribute the Apple Software in its entirety and |
// without modifications, you must retain this notice and the following |
// text and disclaimers in all such redistributions of the Apple Software. |
// Neither the name, trademarks, service marks or logos of Apple Inc. may |
// be used to endorse or promote products derived from the Apple Software |
// without specific prior written permission from Apple. Except as |
// expressly stated in this notice, no other rights or licenses, express |
// or implied, are granted by Apple herein, including but not limited to |
// any patent rights that may be infringed by your derivative works or by |
// other works in which the Apple Software may be incorporated. |
// |
// The Apple Software is provided by Apple on an "AS IS" basis. APPLE |
// MAKES NO WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION |
// THE IMPLIED WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY AND FITNESS |
// FOR A PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND |
// OPERATION ALONE OR IN COMBINATION WITH YOUR PRODUCTS. |
// |
// IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL |
// OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
// INTERRUPTION) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, |
// MODIFICATION AND/OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED |
// AND WHETHER UNDER THEORY OF CONTRACT, TORT (INCLUDING NEGLIGENCE), |
// STRICT LIABILITY OR OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE |
// POSSIBILITY OF SUCH DAMAGE. |
// |
// Copyright (c) 2009 Apple Inc., All rights reserved. |
// |
//--------------------------------------------------------------------------- |
//--------------------------------------------------------------------------- |
#include <iostream> |
#include <map> |
//--------------------------------------------------------------------------- |
#include <OpenCL/opencl.h> |
//--------------------------------------------------------------------------- |
#include "OpenCLKernel.h" |
//--------------------------------------------------------------------------- |
//--------------------------------------------------------------------------- |
using namespace OpenCL; |
//--------------------------------------------------------------------------- |
#pragma mark - |
#pragma mark Private - Constants |
//--------------------------------------------------------------------------- |
static const size_t kOpenCLBufferSize = sizeof(cl_mem); |
//--------------------------------------------------------------------------- |
//--------------------------------------------------------------------------- |
#pragma mark - |
#pragma mark Private - Data Structures |
//--------------------------------------------------------------------------- |
typedef std::map<std::string,cl_uint> OpenCLWorkDimMap; |
typedef std::map<std::string,cl_kernel> OpenCLKernelMap; |
typedef OpenCLWorkDimMap::iterator OpenCLWorkDimMapIterator; |
typedef OpenCLKernelMap::iterator OpenCLKernelMapIterator; |
//--------------------------------------------------------------------------- |
class OpenCL::KernelStruct |
{ |
public: |
size_t mnLocalDomainSize; |
cl_int mnError; |
cl_device_id mnDeviceId; |
cl_command_queue mpCommandQueue; |
cl_program mpProgram; |
OpenCLWorkDimMap maWorkDimMap; // Per kernel work dimension associative array |
OpenCLKernelMap maKernelMap; // Kernels associated array |
OpenCLKernelMapIterator mpKernelMapIter; // Kernel associative array iterator |
}; |
//--------------------------------------------------------------------------- |
//--------------------------------------------------------------------------- |
#pragma mark - |
#pragma mark Private - Utilities - Kernel |
//--------------------------------------------------------------------------- |
// |
// The core of the OpenCL execution model is defined by how the kernels |
// execute. When a kernel is submitted for execution by the host, an |
// index space is defined. An instance of the kernel executes for each |
// point in this index space. This kernel instance is called a work-item |
// and is identified by its point in the index space, which provides a |
// global ID for the work-item. Each work-item executes the same code but |
// the specific execution pathway through the code and the data operated |
// upon can vary per work-item. |
// |
// Work-items are organized into work-groups. The work-groups provide a |
// more coarse-grained decomposition of the index space. Work-groups |
// are assigned a unique work-group ID with the same dimensionality as |
// the index space used for the work-items. Work-items are assigned a |
// unique local ID within a work-group so that a single work-item can |
// be uniquely identified by its global ID or by a combination of its |
// local ID and work-group ID. The work-items in a given work-group |
// execute concurrently on the processing elements of a single compute |
// unit. |
// |
//--------------------------------------------------------------------------- |
// |
// For a complete discussion of OpenCL Kernel APIs refer to, |
// |
// http://www.khronos.org/registry/cl/specs/opencl-1.0.33.pdf |
// |
//--------------------------------------------------------------------------- |
static bool OpenCLKernelCreate(const std::string &rKernelName, |
OpenCL::KernelStruct *pKernel) |
{ |
const char *pKernelName = rKernelName.c_str(); |
cl_kernel pKernelMem = clCreateKernel(pKernel->mpProgram, |
pKernelName, |
&pKernel->mnError); |
bool bCreatedKernel = ( pKernelMem != NULL ) && ( pKernel->mnError == CL_SUCCESS ); |
if( !bCreatedKernel ) |
{ |
std::cerr << ">> ERROR: OpenCL Kernel - Failed to create a compute kernel!" << std::endl; |
} // if |
else |
{ |
// Insert the allocated kernel memory object into the associative array |
pKernel->maKernelMap.insert(std::make_pair(rKernelName,pKernelMem)); |
// Check to see if now you can find the kernel memory object |
pKernel->mpKernelMapIter = pKernel->maKernelMap.find(rKernelName); |
// If the end of the associative arry was reached then the memory |
// object was not inserted |
if( pKernel->mpKernelMapIter == pKernel->maKernelMap.end() ) |
{ |
std::cerr << ">> ERROR: OpenCL Kernel - Failed to insert the kernel \"" << rKernelName << "\" into the map!" << std::endl; |
} // if |
} // else |
return( bCreatedKernel ); |
} // OpenCLKernelCreate |
//--------------------------------------------------------------------------- |
static inline bool OpenCLKernelSetParameter(OpenCL::KernelStruct *pKernel, |
const cl_uint nParamIndex, |
const size_t nParamSize, |
const void *pParam) |
{ |
pKernel->mnError = clSetKernelArg(pKernel->mpKernelMapIter->second, |
nParamIndex, |
nParamSize, |
pParam); |
bool bParamSet = pKernel->mnError == CL_SUCCESS; |
if( !bParamSet ) |
{ |
std::cerr << ">> ERROR: OpenCL Kernel - Failed to set kernel arguments!" << std::endl; |
} // if |
return( bParamSet ); |
} // OpenCLKernelSetParameter |
//--------------------------------------------------------------------------- |
static inline bool OpenCLKernelEnqueueNDRange(OpenCL::KernelStruct *pKernel, |
const size_t *pGlobalWorkOffset, |
const size_t *pGlobalWorkSize, |
const size_t *pLocalWorkSize) |
{ |
cl_uint nWorkDim = pKernel->maWorkDimMap[pKernel->mpKernelMapIter->first]; |
pKernel->mnError = clEnqueueNDRangeKernel(pKernel->mpCommandQueue, |
pKernel->mpKernelMapIter->second, |
nWorkDim, |
pGlobalWorkOffset, |
pGlobalWorkSize, |
pLocalWorkSize, |
0, |
NULL, |
NULL); |
bool bEnqueued = pKernel->mnError == CL_SUCCESS; |
if( !bEnqueued ) |
{ |
std::cerr << ">> ERROR: OpenCL Kernel - Failed to execute kernel!" << std::endl; |
} // if |
return( bEnqueued ); |
} // OpenCLKernelEnqueueNDRange |
//--------------------------------------------------------------------------- |
static inline bool OpenCLKernelGetWorkGroupSize( OpenCL::KernelStruct *pKernel ) |
{ |
pKernel->mnError = clGetKernelWorkGroupInfo(pKernel->mpKernelMapIter->second, |
pKernel->mnDeviceId, |
CL_KERNEL_WORK_GROUP_SIZE, |
sizeof(size_t), |
&pKernel->mnLocalDomainSize, |
NULL); |
bool bGotKernelWGS = pKernel->mnError == CL_SUCCESS; |
if( !bGotKernelWGS ) |
{ |
std::cerr << ">> ERROR: OpenCL Kernel - Failed to retrieve kernel work group info!" << std::endl; |
} // if |
return( bGotKernelWGS ); |
} // OpenCLKernelGetWorkGroupSize |
//--------------------------------------------------------------------------- |
#pragma mark - |
#pragma mark Private - Utilities - Misc. |
//--------------------------------------------------------------------------- |
// |
// Execute the kernel using the global work offset, global work size, |
// and local work size. If the input local work size was NULL, then |
// determine the local domain size of the device. |
// |
//--------------------------------------------------------------------------- |
static bool OpenCLKernelExecute(OpenCL::KernelStruct *pKernel, |
const size_t *pGlobalWorkOffset, |
const size_t *pGlobalWorkSize, |
const size_t *pLocalWorkSize) |
{ |
bool bKernelExeced = false; |
if( pLocalWorkSize == NULL ) |
{ |
// Get the maximum work group size for executing the kernel on the device |
bKernelExeced = OpenCLKernelGetWorkGroupSize( pKernel ); |
if( bKernelExeced ) |
{ |
bKernelExeced = OpenCLKernelEnqueueNDRange(pKernel, |
pGlobalWorkOffset, |
pGlobalWorkSize, |
&pKernel->mnLocalDomainSize); |
} // if |
} // if |
else |
{ |
bKernelExeced = OpenCLKernelEnqueueNDRange(pKernel, |
pGlobalWorkOffset, |
pGlobalWorkSize, |
pLocalWorkSize); |
} // else |
return( bKernelExeced ); |
} // OpenCLKernelExecute |
//--------------------------------------------------------------------------- |
// |
// Acquire a kernel object. If the kernel object was previously acquired, |
// then return its pointer. |
// |
//--------------------------------------------------------------------------- |
static bool OpenCLKernelAcquire(const std::string &rKernelName, |
OpenCL::KernelStruct *pKernel) |
{ |
bool bKernelAcquired = false; |
if( rKernelName.length() ) |
{ |
OpenCLKernelMapIterator pKernelMapIterEnd = pKernel->maKernelMap.end(); |
pKernel->mpKernelMapIter = pKernel->maKernelMap.find(rKernelName); |
if( pKernel->mpKernelMapIter == pKernelMapIterEnd ) |
{ |
bKernelAcquired = OpenCLKernelCreate(rKernelName, pKernel ); |
} // if |
else |
{ |
bKernelAcquired = pKernel->mpKernelMapIter != pKernelMapIterEnd; |
} // else |
} // if |
else |
{ |
std::cerr << ">> ERROR: OpenCL Kernel - Invalid kernel name!" << std::endl; |
} // else |
return( bKernelAcquired ); |
} // OpenCLKernelAcquire |
//--------------------------------------------------------------------------- |
#pragma mark - |
#pragma mark Private - Utilities - Memory |
//--------------------------------------------------------------------------- |
// |
// Release the kernel object memory structure. |
// |
//--------------------------------------------------------------------------- |
static void OpenCLKernelRelease( OpenCL::KernelStruct *pKernel ) |
{ |
if( pKernel != NULL ) |
{ |
OpenCLKernelMapIterator pKernelMapPos; |
OpenCLKernelMapIterator pKernelMapPosBegin = pKernel->maKernelMap.begin(); |
OpenCLKernelMapIterator pKernelMapPosEnd = pKernel->maKernelMap.end(); |
for(pKernelMapPos = pKernelMapPosBegin; |
pKernelMapPos != pKernelMapPosEnd; |
++pKernelMapPos ) |
{ |
if( pKernelMapPos->second ) |
{ |
clReleaseKernel(pKernelMapPos->second); |
} // if |
} // for |
pKernel->maKernelMap.clear(); |
pKernel->maWorkDimMap.clear(); |
delete pKernel; |
} // if |
} // OpenCLKernelRelease |
//--------------------------------------------------------------------------- |
// |
// Clone kernels' work dimension associative array. |
// |
//--------------------------------------------------------------------------- |
static void OpenCLKernelWorkDimMapClone(OpenCL::KernelStruct *pSrcKernel, |
OpenCL::KernelStruct *pDstKernel) |
{ |
OpenCLWorkDimMapIterator pSrcWorkDimMapPos; |
OpenCLWorkDimMapIterator pSrcWorkDimMapPosBegin = pSrcKernel->maWorkDimMap.begin(); |
OpenCLWorkDimMapIterator pSrcWorkDimMapPosEnd = pSrcKernel->maWorkDimMap.end(); |
for(pSrcWorkDimMapPos = pSrcWorkDimMapPosBegin; |
pSrcWorkDimMapPos != pSrcWorkDimMapPosEnd; |
++pSrcWorkDimMapPos ) |
{ |
pDstKernel->maWorkDimMap.insert(std::make_pair(pSrcWorkDimMapPos->first, |
pSrcWorkDimMapPos->second)); |
} // for |
} // OpenCLKernelWorkDimMapClone |
//--------------------------------------------------------------------------- |
// |
// Clone all kernels in the associative array. |
// |
//--------------------------------------------------------------------------- |
static bool OpenCLKernelMapClone(OpenCL::KernelStruct *pSrcKernel, |
OpenCL::KernelStruct *pDstKernel) |
{ |
OpenCLKernelMapIterator pSrcKernelMapPos; |
OpenCLKernelMapIterator pSrcKernelMapPosBegin = pSrcKernel->maKernelMap.begin(); |
OpenCLKernelMapIterator pSrcKernelMapPosEnd = pSrcKernel->maKernelMap.end(); |
for(pSrcKernelMapPos = pSrcKernelMapPosBegin; |
pSrcKernelMapPos != pSrcKernelMapPosEnd; |
++pSrcKernelMapPos ) |
{ |
OpenCLKernelAcquire(pSrcKernelMapPos->first, pDstKernel); |
} // for |
} // OpenCLKernelMapClone |
//--------------------------------------------------------------------------- |
// |
// Clone a kernel structure. |
// |
//--------------------------------------------------------------------------- |
static bool OpenCLKernelClone(OpenCL::KernelStruct *pSrcKernel, |
OpenCL::KernelStruct *pDstKernel) |
{ |
bool bKernelsCopied = false; |
pDstKernel = new OpenCL::KernelStruct; |
if( pDstKernel != NULL ) |
{ |
pDstKernel->mnLocalDomainSize = pSrcKernel->mnLocalDomainSize; |
pDstKernel->mnDeviceId = pSrcKernel->mnDeviceId; |
pDstKernel->mpCommandQueue = pSrcKernel->mpCommandQueue; |
pDstKernel->mpProgram = pSrcKernel->mpProgram; |
pDstKernel->mpKernelMapIter = pSrcKernel->mpKernelMapIter; |
OpenCLKernelWorkDimMapClone(pSrcKernel, pDstKernel); |
OpenCLKernelMapClone(pSrcKernel, pDstKernel); |
bKernelsCopied = true; |
} // if |
return( bKernelsCopied ); |
} // OpenCLKernelClone |
//--------------------------------------------------------------------------- |
//--------------------------------------------------------------------------- |
#pragma mark - |
#pragma mark Public - Constructors |
//--------------------------------------------------------------------------- |
// |
// Construct a kernel object from a program object alias. |
// |
//--------------------------------------------------------------------------- |
Kernel::Kernel( const Program &rProgram ) |
{ |
mpKernel = new OpenCL::KernelStruct; |
if( mpKernel != NULL ) |
{ |
mpKernel->mnLocalDomainSize = 0; |
mpKernel->mnDeviceId = rProgram.GetDeviceId(); |
mpKernel->mpCommandQueue = rProgram.GetCommandQueue(); |
mpKernel->mpProgram = rProgram.GetProgram(); |
} // if |
} // Constructor |
//--------------------------------------------------------------------------- |
// |
// Construct a kernel object from a program object reference. |
// |
//--------------------------------------------------------------------------- |
Kernel::Kernel( const Program *pProgram ) |
{ |
mpKernel = new OpenCL::KernelStruct; |
if( mpKernel != NULL ) |
{ |
mpKernel->mnLocalDomainSize = 0; |
mpKernel->mnDeviceId = pProgram->GetDeviceId(); |
mpKernel->mpCommandQueue = pProgram->GetCommandQueue(); |
mpKernel->mpProgram = pProgram->GetProgram(); |
} // if |
} // Constructor |
//--------------------------------------------------------------------------- |
#pragma mark - |
#pragma mark Public - Copy Constructor |
//--------------------------------------------------------------------------- |
// |
// Construct a deep copy of a kernel object from another. |
// |
//--------------------------------------------------------------------------- |
Kernel::Kernel( const Kernel &rKernel ) |
{ |
if( rKernel.mpKernel != NULL ) |
{ |
OpenCLKernelClone( rKernel.mpKernel, mpKernel ); |
} // if |
} // Copy Constructor |
//--------------------------------------------------------------------------- |
#pragma mark - |
#pragma mark Public - Assignment Operator |
//--------------------------------------------------------------------------- |
// |
// Construct a deep copy of a kernel object from another using the |
// assignment operator. |
// |
//--------------------------------------------------------------------------- |
Kernel &Kernel::operator=(const Kernel &rKernel) |
{ |
if( ( this != &rKernel ) && ( rKernel.mpKernel != NULL ) ) |
{ |
OpenCLKernelRelease( mpKernel ); |
OpenCLKernelClone( rKernel.mpKernel, mpKernel ); |
} // if |
return( *this ); |
} // Assignment Operator |
//--------------------------------------------------------------------------- |
#pragma mark - |
#pragma mark Public - Destructor |
//--------------------------------------------------------------------------- |
// |
// Release the kernel object. |
// |
//--------------------------------------------------------------------------- |
Kernel::~Kernel() |
{ |
OpenCLKernelRelease( mpKernel ); |
} // Destructor |
//--------------------------------------------------------------------------- |
#pragma mark - |
#pragma mark Public - Accessors |
//--------------------------------------------------------------------------- |
// |
// Set the kernel's work dimension. |
// |
//--------------------------------------------------------------------------- |
void Kernel::SetWorkDimension(const std::string &rKernelName, |
const cl_uint nWorkDim) |
{ |
mpKernel->maWorkDimMap[rKernelName] = nWorkDim; |
} // SetWorkDimension |
//--------------------------------------------------------------------------- |
#pragma mark - |
#pragma mark Public - Utilities |
//--------------------------------------------------------------------------- |
// |
// Using a buffer object alias, bind a buffer object to this kernel, with |
// its index and memory. |
// |
//--------------------------------------------------------------------------- |
bool Kernel::BindBuffer(Buffer &rBuffer) |
{ |
const cl_uint nParamIndex = rBuffer.GetBufferIndex(); |
const void *pParam = rBuffer.GetBuffer(); |
return( OpenCLKernelSetParameter(mpKernel, |
nParamIndex, |
kOpenCLBufferSize, |
&pParam) ); |
} // BindBuffer |
//--------------------------------------------------------------------------- |
// |
// Using a buffer object reference, bind a buffer object to this kernel, |
// with its index and memory. |
// |
//--------------------------------------------------------------------------- |
bool Kernel::BindBuffer(Buffer *pBuffer) |
{ |
const cl_uint nParamIndex = pBuffer->GetBufferIndex(); |
const void *pParam = pBuffer->GetBuffer(); |
return( OpenCLKernelSetParameter(mpKernel, |
nParamIndex, |
kOpenCLBufferSize, |
&pParam) ); |
} // BindBuffer |
//--------------------------------------------------------------------------- |
// |
// Bind a parameter to this kernel, using its index and size. |
// |
//--------------------------------------------------------------------------- |
bool Kernel::BindParameter(const cl_uint nParamIndex, |
const size_t nParamSize) |
{ |
return( OpenCLKernelSetParameter(mpKernel, |
nParamIndex, |
nParamSize, |
NULL) ); |
} // BindParameter |
//--------------------------------------------------------------------------- |
// |
// Bind a parameter to this kernel; using its index, size, and memory |
// contents. |
// |
//--------------------------------------------------------------------------- |
bool Kernel::BindParameter(const cl_uint nParamIndex, |
const size_t nParamSize, |
const void *pParam) |
{ |
return( OpenCLKernelSetParameter(mpKernel, |
nParamIndex, |
nParamSize, |
pParam) ); |
} // BindParameter |
//--------------------------------------------------------------------------- |
// |
// After kernel work dimension is set, acquire a named kernel from an |
// OpenCL program. |
// |
//--------------------------------------------------------------------------- |
bool Kernel::Acquire( const std::string &rKernelName ) |
{ |
return( OpenCLKernelAcquire(rKernelName,mpKernel) ); |
} // Acquire |
//--------------------------------------------------------------------------- |
// |
// For a detailed discussion on global and local work size refer to the |
// OpenCL documentation: |
// |
// http://www.khronos.org/registry/cl/specs/opencl-1.0.33.pdf |
// |
//--------------------------------------------------------------------------- |
//--------------------------------------------------------------------------- |
// |
// Excute an acquired kernel using a global work size. |
// |
//--------------------------------------------------------------------------- |
bool Kernel::Execute(const size_t *pGlobalWorkSize) |
{ |
return( OpenCLKernelExecute(mpKernel, |
NULL, |
pGlobalWorkSize, |
NULL) ); |
} // Execute |
//--------------------------------------------------------------------------- |
// |
// Excute an acquired kernel using a global work offset and size. |
// |
//--------------------------------------------------------------------------- |
bool Kernel::Execute(const size_t *pGlobalWorkOffset, |
const size_t *pGlobalWorkSize) |
{ |
return( OpenCLKernelExecute(mpKernel, |
pGlobalWorkOffset, |
pGlobalWorkSize, |
NULL) ); |
} // Execute |
//--------------------------------------------------------------------------- |
// |
// Excute an acquired kernel using a global work offset and size, and the |
// local work size. |
// |
//--------------------------------------------------------------------------- |
bool Kernel::Execute(const size_t *pGlobalWorkOffset, |
const size_t *pGlobalWorkSize, |
const size_t *pLocalWorkSize) |
{ |
return( OpenCLKernelExecute(mpKernel, |
pGlobalWorkOffset, |
pGlobalWorkSize, |
pLocalWorkSize) ); |
} // Execute |
//--------------------------------------------------------------------------- |
//--------------------------------------------------------------------------- |
Copyright © 2009 Apple Inc. All Rights Reserved. Terms of Use | Privacy Policy | Updated: 2009-09-24