DispatchFractal.c
// |
// File: DispatchFractal.c |
// |
// Abstract: This example shows how to combine parallel computation on the CPU |
// via GCD with results processing and display on the GPU via OpenCL |
// and OpenGL. It computes escape-time fractals in parallel on the |
// global concurrent GCD queue and uses another GCD queue to upload |
// results to the GPU for processing via two OpenCL kernels. Calls to |
// OpenCL and OpenGL for display are serialized with a third GCD queue. |
// |
// Version: <1.0> |
// |
// Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple") |
// in consideration of your agreement to the following terms, and your use, |
// installation, modification or redistribution of this Apple software |
// constitutes acceptance of these terms. If you do not agree with these |
// terms, please do not use, install, modify or redistribute this Apple |
// software. |
// |
// In consideration of your agreement to abide by the following terms, and |
// subject to these terms, Apple grants you a personal, non - exclusive |
// license, under Apple's copyrights in this original Apple software ( the |
// "Apple Software" ), to use, reproduce, modify and redistribute the Apple |
// Software, with or without modifications, in source and / or binary forms; |
// provided that if you redistribute the Apple Software in its entirety and |
// without modifications, you must retain this notice and the following text |
// and disclaimers in all such redistributions of the Apple Software. Neither |
// the name, trademarks, service marks or logos of Apple Inc. may be used to |
// endorse or promote products derived from the Apple Software without specific |
// prior written permission from Apple. Except as expressly stated in this |
// notice, no other rights or licenses, express or implied, are granted by |
// Apple herein, including but not limited to any patent rights that may be |
// infringed by your derivative works or by other works in which the Apple |
// Software may be incorporated. |
// |
// The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO |
// WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED |
// WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A |
// PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION |
// ALONE OR IN COMBINATION WITH YOUR PRODUCTS. |
// |
// IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR |
// CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
// INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION |
// AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER |
// UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR |
// OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// Copyright 2009 Apple Inc. All rights reserved. |
// |
#include "DispatchFractal.h" |
#include <stdlib.h> |
#include <strings.h> |
#include <dispatch/dispatch.h> |
#include <Block.h> |
#include <libkern/OSAtomic.h> |
#include <assert.h> |
#pragma mark Data |
typedef struct { |
real minradius; |
natural maxiterations, stride, quadtreewidth; |
fractal_out_t *quadtree; |
bool enabledisplay, collectstats; |
fractal_compute_t compute; |
dispatch_group_t group; |
dispatch_queue_t computequeue; |
volatile counter generation, stopping; |
#if FRACTAL_TIMING || FRACTAL_STATISTICS |
nanoseconds startTime; |
dispatch_source_t statstimer; |
#endif |
#if FRACTAL_STATISTICS |
volatile counter computequeued, computedone; |
volatile counter flops; |
#endif |
} fractal_data_t; |
#define newGeneration(data) \ |
OSAtomicAdd64Barrier(1, &((data)->generation)) |
#define generationValid(data, generation) \ |
((data)->generation == (generation)) |
#define stopBlocks(data) \ |
OSAtomicAdd64Barrier(1, &((data)->stopping)) |
#define quadtreeLoc(data, x, y, o) ((data)->quadtree + \ |
(((data)->quadtreewidth - (o)) + (x) + (y) * (data)->quadtreewidth)) |
#pragma mark Timing |
#if FRACTAL_TIMING || FRACTAL_STATISTICS |
#include <mach/mach_time.h> |
static mach_timebase_info_data_t tb = {}; |
#define timingSetup(data) \ |
if (!tb.numer) { mach_timebase_info(&tb); }; \ |
data->startTime = mach_absolute_time() |
#define timingStart(data) \ |
data->startTime = mach_absolute_time() |
#define elapsedNs(data) \ |
((mach_absolute_time() - data->startTime) * tb.numer / tb.denom) |
#else /* FRACTAL_TIMING */ |
#define timingSetup(data) |
#define timingStart(data) |
#define elapsedNs(data) 0 |
#endif /* FRACTAL_TIMING */ |
#pragma mark Statistics |
#if FRACTAL_STATISTICS |
#define computeBlockQueued(data) \ |
if (data->collectstats) { OSAtomicAdd64( 1, &(data->computequeued)); } |
#define computeBlockDequeued(data) \ |
if (data->collectstats) { OSAtomicAdd64(-1, &(data->computequeued)); } |
#define computeBlockDone(data, g) \ |
if (data->collectstats && generationValid(data, g)) { \ |
OSAtomicAdd64( 1, &(data->computedone)); } |
#define opsStatsSetup() natural n = 0 |
#define ops (data->collectstats ? &n : NULL) |
#define opsStatsUpdate(data) \ |
if (data->collectstats) { OSAtomicAdd64(n, &(data->flops)); } |
#define updateStatsDisplay(data, computemax, stats_b) \ |
stats_b(data->computedone, data->computequeued, computemax, \ |
data->flops, elapsedNs(data)) |
#else /* FRACTAL_STATISTICS */ |
#define computeBlockQueued(data) |
#define computeBlockDequeued(data) |
#define computeBlockDone(data, g) |
#define opsStatsSetup() |
#define ops NULL |
#define opsStatsUpdate(data) |
#endif /* FRACTAL_STATISTICS */ |
#pragma mark Computation Blocks |
#define enqueueCompute(data, cX, cY, r, px, py, po, i, g) \ |
computeBlockQueued(data); \ |
dispatch_group_async(data->group, data->computequeue, ^{ \ |
if (generationValid(data, g) && !data->stopping) { \ |
compute(data, cX, cY, r, px, py, po, i, g); \ |
computeBlockDone(data, g); \ |
} computeBlockDequeued(data); \ |
}) |
#if FRACTAL_STATISTICS |
static natural totalBlocks(const natural subdiv, const natural stride) |
__attribute__((const)); |
natural totalBlocks(const natural subdiv, const natural stride) { |
natural b = 1; |
if (subdiv > 0 && subdiv >= stride) { |
b += 4; |
if (subdiv > 1 && subdiv > stride) { |
const natural k = (subdiv % stride ? subdiv % stride : stride) + 1; |
/* |
* Sum(i=k, i<=subdiv, i+=stride, 4^i) := |
* (4^k) * ((4^stride)^((subdiv-k)/stride+1)-1) / ((4^stride)-1)) |
*/ |
#define e(x) (1ul << (2ul * (x))) |
b += e(k) * (e(stride * (((subdiv - k) / stride) + 1ul)) - 1ul) / |
(e(stride) - 1ul); |
#undef e |
} |
} |
return b; |
} |
#endif /* FRACTAL_STATISTICS */ |
static void compute(fractal_data_t * const data, const real centerX, |
const real centerY, const real radius, |
const natural px, const natural py, const natural po, |
natural iteration, const counter generation) |
{ |
opsStatsSetup(); |
real val = data->compute(centerX, centerY, data->maxiterations, ops); |
if (!generationValid(data, generation) || data->stopping) return; |
opsStatsUpdate(data); |
if (data->enabledisplay) { |
*quadtreeLoc(data, px, py, po) = val; |
} |
if (radius > data->minradius) { |
const real r = radius / 2; |
const real x1 = centerX - r, x2 = centerX + r; |
const real y1 = centerY + r, y2 = centerY - r; |
const natural o = po << 1ul; |
const natural px1 = px << 1ul, px2 = px1 + 1; |
const natural py1 = py << 1ul, py2 = py1 + 1; |
if (iteration++ < data->stride) { |
compute(data, x1, y1, r, px1, py1, o, iteration, generation); |
compute(data, x2, y1, r, px2, py1, o, iteration, generation); |
compute(data, x1, y2, r, px1, py2, o, iteration, generation); |
compute(data, x2, y2, r, px2, py2, o, iteration, generation); |
} else { |
iteration -= data->stride; |
enqueueCompute(data, x1, y1, r, px1, py1, o, iteration, generation); |
enqueueCompute(data, x2, y1, r, px2, py1, o, iteration, generation); |
enqueueCompute(data, x1, y2, r, px1, py2, o, iteration, generation); |
enqueueCompute(data, x2, y2, r, px2, py2, o, iteration, generation); |
} |
} |
} |
#pragma mark Fractal API |
fractal_t fractalNew(void) { |
return calloc(1, sizeof(fractal_data_t)); |
} |
void fractalFree(fractal_t fractal) { |
fractal_data_t * const data = fractal; |
if (data->group) { |
if (!data->stopping) { |
fractalStop(fractal); |
} |
dispatch_group_wait(data->group, DISPATCH_TIME_FOREVER); |
} |
free((void*) fractal); |
} |
void fractalStart(fractal_t fractal, |
fractal_params_t (^params_b)(void), fractal_compute_t compute_b, |
void (^start_b)(fractal_out_t * const, const natural), |
void (^stop_b)(const nanoseconds), |
void (^stats_b)(const counter, const counter, const counter, |
const counter, const nanoseconds)) |
{ |
fractal_data_t * const data = fractal; |
if (data->stopping) return; |
const counter generation = newGeneration(data); |
timingSetup(data); |
const fractal_params_t params = params_b(); |
const natural pixels = 1ul << params.subdivisions; |
const real radius = params.width / 2; |
data->minradius = radius / pixels; |
data->maxiterations = params.maxiterations; |
data->stride = params.stride; |
data->enabledisplay = params.enabledisplay; |
if (data->enabledisplay) { |
if (!data->quadtree) { |
data->quadtreewidth = 2 * pixels; |
data->quadtree = calloc(2 * pixels * pixels, sizeof(fractal_out_t)); |
} else { |
assert(data->quadtreewidth == 2 * pixels); |
bzero(data->quadtree, 2 * pixels * pixels * sizeof(fractal_out_t)); |
} |
*quadtreeLoc(data, 0, 0, 2) = 0.0001; |
} |
data->collectstats = params.collectstats; |
if (data->compute) { Block_release(data->compute); } |
data->compute = Block_copy(compute_b); |
if (!data->computequeue) { |
dispatch_queue_t globalqueue = dispatch_get_global_queue( |
DISPATCH_QUEUE_PRIORITY_LOW, 0); |
assert(globalqueue); |
if (params.computeqconcurrent) { |
data->computequeue = globalqueue; |
} else { |
data->computequeue = dispatch_queue_create( |
"com.dispatchfractal.compute", NULL); |
dispatch_set_target_queue(data->computequeue, globalqueue); |
assert(data->computequeue); |
} |
data->group = dispatch_group_create(); |
} |
#if FRACTAL_STATISTICS |
const counter computemax = totalBlocks(params.subdivisions, params.stride); |
data->computedone = 0; data->flops = 0; |
if (params.collectstats && params.displaystats) { |
updateStatsDisplay(data, computemax, stats_b); |
if (data->statstimer) { |
dispatch_suspend(data->statstimer); |
} else { |
data->statstimer = dispatch_source_create( |
DISPATCH_SOURCE_TYPE_TIMER, 0, 0, |
dispatch_get_main_queue()); |
} |
dispatch_source_set_timer(data->statstimer, dispatch_time( |
DISPATCH_TIME_NOW, .5 * NSEC_PER_SEC), .5 * NSEC_PER_SEC, |
.1 * NSEC_PER_SEC); |
dispatch_source_set_event_handler(data->statstimer, ^{ |
updateStatsDisplay(data, computemax, stats_b); |
}); |
dispatch_resume(data->statstimer); |
} |
#endif |
start_b(data->quadtree, pixels); |
timingStart(data); |
enqueueCompute(data, params.centerX, params.centerY, radius, 0, 0, 2, |
params.subdivisions >= params.stride ? params.stride + |
(params.subdivisions % params.stride ? params.stride - |
params.subdivisions % params.stride : 0) : 1, generation); |
dispatch_group_notify(data->group, dispatch_get_main_queue(), ^{ |
if (generationValid(data, generation)) { |
dispatch_release(data->computequeue); data->computequeue = NULL; |
dispatch_release(data->group); data->group = NULL; |
Block_release(data->compute); data->compute = NULL; |
#if FRACTAL_STATISTICS |
if (data->statstimer) { |
dispatch_source_cancel(data->statstimer); |
dispatch_release(data->statstimer); |
data->statstimer = NULL; |
} |
updateStatsDisplay(data, computemax, stats_b); |
#endif |
data->stopping = 0; |
stop_b(elapsedNs(data)); |
if (data->quadtree) { |
free(data->quadtree); |
data->quadtree = NULL; |
} |
} |
}); |
} |
void fractalStop(fractal_t fractal) { |
stopBlocks((fractal_data_t *)fractal); |
} |
Copyright © 2009 Apple Inc. All Rights Reserved. Terms of Use | Privacy Policy | Updated: 2009-06-05