gpu_histogram_image.cl
/********************************************************************************************* |
// |
// OpenCL Histogram kernels for GPU |
// |
// File: gpu_histogram_image.cl |
// |
// Abstract: This example demonstrates a CL histogram implementation using images |
// |
// Version: <1.0> |
// |
// Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple") |
// in consideration of your agreement to the following terms, and your use, |
// installation, modification or redistribution of this Apple software |
// constitutes acceptance of these terms. If you do not agree with these |
// terms, please do not use, install, modify or redistribute this Apple |
// software. |
// |
// In consideration of your agreement to abide by the following terms, and |
// subject to these terms, Apple grants you a personal, non - exclusive |
// license, under Apple's copyrights in this original Apple software ( the |
// "Apple Software" ), to use, reproduce, modify and redistribute the Apple |
// Software, with or without modifications, in source and / or binary forms; |
// provided that if you redistribute the Apple Software in its entirety and |
// without modifications, you must retain this notice and the following text |
// and disclaimers in all such redistributions of the Apple Software. Neither |
// the name, trademarks, service marks or logos of Apple Inc. may be used to |
// endorse or promote products derived from the Apple Software without specific |
// prior written permission from Apple. Except as expressly stated in this |
// notice, no other rights or licenses, express or implied, are granted by |
// Apple herein, including but not limited to any patent rights that may be |
// infringed by your derivative works or by other works in which the Apple |
// Software may be incorporated. |
// |
// The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO |
// WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED |
// WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A |
// PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION |
// ALONE OR IN COMBINATION WITH YOUR PRODUCTS. |
// |
// IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR |
// CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
// INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION |
// AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER |
// UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR |
// OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// Copyright: (c) 2008-2009 by Apple Inc. All Rights Reserved. |
// |
*********************************************************************************************/ |
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable |
// |
// sum partial histogram results into final histogram bins |
// |
// num_groups is the number of work-groups used to compute partial histograms. |
// partial_histogram is an array of num_groups * (257 * 3 * 32-bits/entry) entries |
// we store 257 Red bins, followed by 257 Green bins and then the 257 Blue bins. |
// |
// final summed results are returned in histogram. |
// |
kernel |
void histogram_sum_partial_results_fp(global uint *partial_histogram, int num_groups, global uint *histogram) |
{ |
int tid = (int)get_global_id(0); |
int group_id = (int)get_group_id(0); |
int group_indx; |
int n = num_groups; |
local uint tmp_histogram[257 * 3]; |
int first_workitem_not_in_first_group = ((get_local_id(0) == 0) && group_id); |
tid += group_id; |
int tid_first = tid - 1; |
if (first_workitem_not_in_first_group) |
tmp_histogram[tid_first] = partial_histogram[tid_first]; |
tmp_histogram[tid] = partial_histogram[tid]; |
group_indx = 257*3; |
while (--n > 0) |
{ |
if (first_workitem_not_in_first_group) |
tmp_histogram[tid_first] += partial_histogram[tid_first]; |
tmp_histogram[tid] += partial_histogram[group_indx+tid]; |
group_indx += 257*3; |
} |
if (first_workitem_not_in_first_group) |
histogram[tid_first] = tmp_histogram[tid_first]; |
histogram[tid] = tmp_histogram[tid]; |
} |
// |
// this kernel takes a RGBA 32-bit or 16-bit FP / channel input image and produces a partial histogram. |
// the kernel is executed over multiple work-groups. for each work-group a partial histogram is generated |
// partial_histogram is an array of num_groups * (257 * 3 * 32-bits/entry) entries |
// we store 257 Red bins, followed by 257 Green bins and then the 257 Blue bins. |
// |
kernel |
void histogram_image_rgba_fp(image2d_t img, global uint *histogram) |
{ |
int local_size = (int)get_local_size(0) * (int)get_local_size(1); |
int image_width = get_image_width(img); |
int image_height = get_image_height(img); |
int group_indx = mad24(get_group_id(1), get_num_groups(0), get_group_id(0)) * 257 * 3; |
int x = get_global_id(0); |
int y = get_global_id(1); |
local uint tmp_histogram[257 * 3]; |
int tid = mad24(get_local_id(1), get_local_size(0), get_local_id(0)); |
int j = 257 * 3; |
int indx = 0; |
// clear the local buffer that will generate the partial histogram |
do |
{ |
if (tid < j) |
tmp_histogram[indx+tid] = 0; |
j -= local_size; |
indx += local_size; |
} while (j > 0); |
barrier(CLK_LOCAL_MEM_FENCE); |
if ((x < image_width) && (y < image_height)) |
{ |
float4 clr = read_imagef(img, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST, (float2)(x, y)); |
ushort indx; |
indx = convert_ushort_sat(min(clr.x, 1.0f) * 256.0f); |
atom_inc(&tmp_histogram[indx]); |
indx = convert_ushort_sat(min(clr.y, 1.0f) * 256.0f); |
atom_inc(&tmp_histogram[257+indx]); |
indx = convert_ushort_sat(min(clr.z, 1.0f) * 256.0f); |
atom_inc(&tmp_histogram[514+indx]); |
} |
barrier(CLK_LOCAL_MEM_FENCE); |
// copy the partial histogram to appropriate location in histogram given by group_indx |
if (local_size >= (257 * 3)) |
{ |
if (tid < (257 * 3)) |
histogram[group_indx + tid] = tmp_histogram[tid]; |
} |
else |
{ |
j = 257 * 3; |
indx = 0; |
do |
{ |
if (tid < j) |
histogram[group_indx + indx + tid] = tmp_histogram[indx + tid]; |
j -= local_size; |
indx += local_size; |
} while (j > 0); |
} |
} |
/***************************************************************************************************************/ |
// |
// sum partial histogram results into final histogram bins |
// |
// num_groups is the number of work-groups used to compute partial histograms. |
// partial_histogram is an array of num_groups * (256 * 3 * 32-bits/entry) entries |
// we store 256 Red bins, followed by 256 Green bins and then the 256 Blue bins. |
// |
// final summed results are returned in histogram. |
// |
kernel |
void histogram_sum_partial_results_unorm8(global uint *partial_histogram, int num_groups, global uint *histogram) |
{ |
int tid = (int)get_global_id(0); |
int group_indx; |
int n = num_groups; |
local uint tmp_histogram[256 * 3]; |
tmp_histogram[tid] = partial_histogram[tid]; |
group_indx = 256*3; |
while (--n > 0) |
{ |
tmp_histogram[tid] += partial_histogram[group_indx + tid]; |
group_indx += 256*3; |
} |
histogram[tid] = tmp_histogram[tid]; |
} |
// |
// this kernel takes a RGBA 8-bit / channel input image and produces a partial histogram. |
// the kernel is executed over multiple work-groups. for each work-group a partial histogram is generated |
// partial_histogram is an array of num_groups * (256 * 3 * 32-bits/entry) entries |
// we store 256 Red bins, followed by 256 Green bins and then the 256 Blue bins. |
// |
kernel |
void histogram_image_rgba_unorm8(image2d_t img, global uint *histogram) |
{ |
int local_size = (int)get_local_size(0) * (int)get_local_size(1); |
int image_width = get_image_width(img); |
int image_height = get_image_height(img); |
int group_indx = mad24(get_group_id(1), get_num_groups(0), get_group_id(0)) * 256 * 3; |
int x = get_global_id(0); |
int y = get_global_id(1); |
local uint tmp_histogram[256 * 3]; |
int tid = mad24(get_local_id(1), get_local_size(0), get_local_id(0)); |
int j = 256 * 3; |
int indx = 0; |
// clear the local buffer that will generate the partial histogram |
do |
{ |
if (tid < j) |
tmp_histogram[indx+tid] = 0; |
j -= local_size; |
indx += local_size; |
} while (j > 0); |
barrier(CLK_LOCAL_MEM_FENCE); |
if ((x < image_width) && (y < image_height)) |
{ |
float4 clr = read_imagef(img, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST, (float2)(x, y)); |
uchar indx_x, indx_y, indx_z; |
indx_x = convert_uchar_sat(clr.x * 255.0f); |
indx_y = convert_uchar_sat(clr.y * 255.0f); |
indx_z = convert_uchar_sat(clr.z * 255.0f); |
atom_inc(&tmp_histogram[indx_x]); |
atom_inc(&tmp_histogram[256+(uint)indx_y]); |
atom_inc(&tmp_histogram[512+(uint)indx_z]); |
} |
barrier(CLK_LOCAL_MEM_FENCE); |
// copy the partial histogram to appropriate location in histogram given by group_indx |
if (local_size >= (256 * 3)) |
{ |
if (tid < (256 * 3)) |
histogram[group_indx + tid] = tmp_histogram[tid]; |
} |
else |
{ |
j = 256 * 3; |
indx = 0; |
do |
{ |
if (tid < j) |
histogram[group_indx + indx + tid] = tmp_histogram[indx + tid]; |
j -= local_size; |
indx += local_size; |
} while (j > 0); |
} |
} |
Copyright © 2010 Apple Inc. All Rights Reserved. Terms of Use | Privacy Policy | Updated: 2010-01-04