DispatchFractal.c

//
// File:       DispatchFractal.c
//
// Abstract:   This example shows how to combine parallel computation on the CPU
//             via GCD with results processing and display on the GPU via OpenCL
//             and OpenGL. It computes escape-time fractals in parallel on the
//             global concurrent GCD queue and uses another GCD queue to upload
//             results to the GPU for processing via two OpenCL kernels. Calls to
//             OpenCL and OpenGL for display are serialized with a third GCD queue.
//
// Version:    <1.0>
//
// Disclaimer: IMPORTANT:  This Apple software is supplied to you by Apple Inc. ("Apple")
//             in consideration of your agreement to the following terms, and your use,
//             installation, modification or redistribution of this Apple software
//             constitutes acceptance of these terms.  If you do not agree with these
//             terms, please do not use, install, modify or redistribute this Apple
//             software.
//
//             In consideration of your agreement to abide by the following terms, and
//             subject to these terms, Apple grants you a personal, non - exclusive
//             license, under Apple's copyrights in this original Apple software ( the
//             "Apple Software" ), to use, reproduce, modify and redistribute the Apple
//             Software, with or without modifications, in source and / or binary forms;
//             provided that if you redistribute the Apple Software in its entirety and
//             without modifications, you must retain this notice and the following text
//             and disclaimers in all such redistributions of the Apple Software. Neither
//             the name, trademarks, service marks or logos of Apple Inc. may be used to
//             endorse or promote products derived from the Apple Software without specific
//             prior written permission from Apple.  Except as expressly stated in this
//             notice, no other rights or licenses, express or implied, are granted by
//             Apple herein, including but not limited to any patent rights that may be
//             infringed by your derivative works or by other works in which the Apple
//             Software may be incorporated.
//
//             The Apple Software is provided by Apple on an "AS IS" basis.  APPLE MAKES NO
//             WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED
//             WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A
//             PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION
//             ALONE OR IN COMBINATION WITH YOUR PRODUCTS.
//
//             IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR
//             CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
//             SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
//             INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION
//             AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER
//             UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR
//             OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
//  Copyright 2009 Apple Inc. All rights reserved.
//
 
#include "DispatchFractal.h"
#include <stdlib.h>
#include <strings.h>
#include <dispatch/dispatch.h>
#include <Block.h>
#include <libkern/OSAtomic.h>
#include <assert.h>
 
#pragma mark Data
 
typedef struct {
    real minradius;
    natural maxiterations, stride, quadtreewidth;
    fractal_out_t *quadtree;
    bool enabledisplay, collectstats;
    fractal_compute_t compute;
    dispatch_group_t group;
    dispatch_queue_t computequeue;
    volatile counter generation, stopping;
#if FRACTAL_TIMING || FRACTAL_STATISTICS
    nanoseconds startTime;
    dispatch_source_t statstimer;
#endif
#if FRACTAL_STATISTICS
    volatile counter computequeued, computedone;
    volatile counter flops;
#endif
} fractal_data_t;
 
#define newGeneration(data) \
    OSAtomicAdd64Barrier(1, &((data)->generation))
#define generationValid(data, generation) \
    ((data)->generation == (generation))
#define stopBlocks(data) \
    OSAtomicAdd64Barrier(1, &((data)->stopping))
#define quadtreeLoc(data, x, y, o) ((data)->quadtree + \
    (((data)->quadtreewidth - (o)) + (x) + (y) * (data)->quadtreewidth))
 
#pragma mark Timing
 
#if FRACTAL_TIMING || FRACTAL_STATISTICS
#include <mach/mach_time.h>
static mach_timebase_info_data_t tb = {};
#define timingSetup(data) \
    if (!tb.numer) { mach_timebase_info(&tb); }; \
    data->startTime = mach_absolute_time()
#define timingStart(data) \
    data->startTime = mach_absolute_time()
#define elapsedNs(data) \
    ((mach_absolute_time() - data->startTime) * tb.numer / tb.denom)
#else /* FRACTAL_TIMING */
#define timingSetup(data)
#define timingStart(data)
#define elapsedNs(data) 0
#endif /* FRACTAL_TIMING */
 
#pragma mark Statistics
 
#if FRACTAL_STATISTICS
 
#define computeBlockQueued(data) \
    if (data->collectstats) { OSAtomicAdd64( 1, &(data->computequeued)); }
#define computeBlockDequeued(data) \
    if (data->collectstats) { OSAtomicAdd64(-1, &(data->computequeued)); }
#define computeBlockDone(data, g) \
    if (data->collectstats && generationValid(data, g)) { \
    OSAtomicAdd64( 1, &(data->computedone)); }
#define opsStatsSetup() natural n = 0
#define ops (data->collectstats ? &n : NULL)
#define opsStatsUpdate(data) \
    if (data->collectstats) { OSAtomicAdd64(n, &(data->flops)); }
#define updateStatsDisplay(data, computemax, stats_b) \
    stats_b(data->computedone, data->computequeued, computemax, \
    data->flops, elapsedNs(data))
#else /* FRACTAL_STATISTICS */
#define computeBlockQueued(data)
#define computeBlockDequeued(data)
#define computeBlockDone(data, g)
#define opsStatsSetup()
#define ops NULL
#define opsStatsUpdate(data)
#endif /* FRACTAL_STATISTICS */
 
#pragma mark Computation Blocks
 
#define enqueueCompute(data, cX, cY, r, px, py, po, i, g) \
    computeBlockQueued(data); \
    dispatch_group_async(data->group, data->computequeue, ^{ \
        if (generationValid(data, g) && !data->stopping) { \
        compute(data, cX, cY, r, px, py, po, i, g); \
        computeBlockDone(data, g); \
        } computeBlockDequeued(data); \
    })
 
#if FRACTAL_STATISTICS
static natural totalBlocks(const natural subdiv, const natural stride)
    __attribute__((const));
natural totalBlocks(const natural subdiv, const natural stride) {
    natural b = 1;
    if (subdiv > 0 && subdiv >= stride) {
        b += 4;
    if (subdiv > 1 && subdiv > stride) {
        const natural k = (subdiv % stride ? subdiv % stride : stride) + 1;
        /*
         * Sum(i=k, i<=subdiv, i+=stride, 4^i) := 
         * (4^k) * ((4^stride)^((subdiv-k)/stride+1)-1) / ((4^stride)-1))
         */
        #define e(x) (1ul << (2ul * (x)))
        b += e(k) * (e(stride * (((subdiv - k) / stride) + 1ul)) - 1ul) /
            (e(stride) - 1ul);
        #undef e
        }
    }
    return b;
}
#endif /* FRACTAL_STATISTICS */
 
static void compute(fractal_data_t * const data, const real centerX,
    const real centerY, const real radius,
    const natural px, const natural py, const natural po,
    natural iteration, const counter generation)
{
    opsStatsSetup();
    real val = data->compute(centerX, centerY, data->maxiterations, ops);
    if (!generationValid(data, generation) || data->stopping) return;
    opsStatsUpdate(data);
    if (data->enabledisplay) {
    *quadtreeLoc(data, px, py, po) = val;
    }
    if (radius > data->minradius) {
    const real r = radius / 2;
    const real x1 = centerX - r, x2 = centerX + r;
    const real y1 = centerY + r, y2 = centerY - r;
    const natural o = po << 1ul;
    const natural px1 = px << 1ul, px2 = px1 + 1;
    const natural py1 = py << 1ul, py2 = py1 + 1;  
    if (iteration++ < data->stride) {
        compute(data, x1, y1, r, px1, py1, o, iteration, generation);
        compute(data, x2, y1, r, px2, py1, o, iteration, generation);
        compute(data, x1, y2, r, px1, py2, o, iteration, generation);
        compute(data, x2, y2, r, px2, py2, o, iteration, generation);
    } else {
        iteration -= data->stride;
        enqueueCompute(data, x1, y1, r, px1, py1, o, iteration, generation);
        enqueueCompute(data, x2, y1, r, px2, py1, o, iteration, generation);
        enqueueCompute(data, x1, y2, r, px1, py2, o, iteration, generation);
        enqueueCompute(data, x2, y2, r, px2, py2, o, iteration, generation);
    }
    }
}
 
#pragma mark Fractal API
 
fractal_t fractalNew(void) {
    return calloc(1, sizeof(fractal_data_t));
}
 
void fractalFree(fractal_t fractal) {
    fractal_data_t * const data = fractal;
    if (data->group) {
    if (!data->stopping) { 
        fractalStop(fractal);
    }
    dispatch_group_wait(data->group, DISPATCH_TIME_FOREVER);
    }
    free((void*) fractal);
}
 
void fractalStart(fractal_t fractal,
    fractal_params_t (^params_b)(void), fractal_compute_t compute_b,
    void (^start_b)(fractal_out_t * const, const natural),
    void (^stop_b)(const nanoseconds),
    void (^stats_b)(const counter, const counter, const counter,
    const counter, const nanoseconds))
{
    fractal_data_t * const data = fractal;
    if (data->stopping) return;
    const counter generation = newGeneration(data);
    timingSetup(data);
    const fractal_params_t params = params_b();
    const natural pixels = 1ul << params.subdivisions;
    const real radius = params.width / 2;
    data->minradius = radius / pixels;
    data->maxiterations = params.maxiterations;
    data->stride = params.stride;
    data->enabledisplay = params.enabledisplay;
    if (data->enabledisplay) {
    if (!data->quadtree) {
        data->quadtreewidth = 2 * pixels;
        data->quadtree = calloc(2 * pixels * pixels, sizeof(fractal_out_t));
    } else {
        assert(data->quadtreewidth == 2 * pixels);
        bzero(data->quadtree, 2 * pixels * pixels * sizeof(fractal_out_t));
    }
    *quadtreeLoc(data, 0, 0, 2) = 0.0001;
    }
    data->collectstats = params.collectstats;
    if (data->compute) { Block_release(data->compute); }
    data->compute = Block_copy(compute_b);
    if (!data->computequeue) {
    dispatch_queue_t globalqueue = dispatch_get_global_queue(
        DISPATCH_QUEUE_PRIORITY_LOW, 0);
    assert(globalqueue);
    if (params.computeqconcurrent) {
        data->computequeue = globalqueue;
    } else {
        data->computequeue = dispatch_queue_create(
            "com.dispatchfractal.compute", NULL);
        dispatch_set_target_queue(data->computequeue, globalqueue);
        assert(data->computequeue);
    }
    data->group = dispatch_group_create();
    }
#if FRACTAL_STATISTICS
    const counter computemax = totalBlocks(params.subdivisions, params.stride);
    data->computedone = 0; data->flops = 0;
    if (params.collectstats && params.displaystats) {
    updateStatsDisplay(data, computemax, stats_b);
    if (data->statstimer) {
        dispatch_suspend(data->statstimer); 
    } else {
        data->statstimer = dispatch_source_create(
            DISPATCH_SOURCE_TYPE_TIMER, 0, 0,
            dispatch_get_main_queue());
    }
    dispatch_source_set_timer(data->statstimer, dispatch_time(
        DISPATCH_TIME_NOW, .5 * NSEC_PER_SEC), .5 * NSEC_PER_SEC,
        .1 * NSEC_PER_SEC);
    dispatch_source_set_event_handler(data->statstimer, ^{
        updateStatsDisplay(data, computemax, stats_b);
    });
    dispatch_resume(data->statstimer);
    }
#endif
    start_b(data->quadtree, pixels);
    timingStart(data);
    enqueueCompute(data, params.centerX, params.centerY, radius, 0, 0, 2,
        params.subdivisions >= params.stride ? params.stride +
        (params.subdivisions % params.stride ? params.stride - 
        params.subdivisions % params.stride : 0) : 1, generation);
    dispatch_group_notify(data->group, dispatch_get_main_queue(), ^{
    if (generationValid(data, generation)) {
        dispatch_release(data->computequeue); data->computequeue = NULL;
        dispatch_release(data->group); data->group = NULL;
        Block_release(data->compute); data->compute = NULL;
#if FRACTAL_STATISTICS
        if (data->statstimer) {
        dispatch_source_cancel(data->statstimer);
        dispatch_release(data->statstimer);
        data->statstimer = NULL;
        }
        updateStatsDisplay(data, computemax, stats_b);
#endif
        data->stopping = 0;
        stop_b(elapsedNs(data));
        if (data->quadtree) {
        free(data->quadtree);
        data->quadtree = NULL;
        }
    }
    });
}
 
void fractalStop(fractal_t fractal) {
    stopBlocks((fractal_data_t *)fractal);
}