Next Previous

Retired Document

Important: This sample code may not represent best practices for current development. The project may use deprecated symbols and illustrate technologies and techniques that are no longer recommended.

NaiveEncoder.c

/*

File: NaiveEncoder.c, part of ExampleIPBCodec

Abstract: Core routines of an encoder for a simplistic video encoding format.

Version: 1.0

© Copyright 2005 Apple Computer, Inc. All rights reserved.

IMPORTANT:  This Apple software is supplied to

you by Apple Computer, Inc. ("Apple") in

consideration of your agreement to the following

terms, and your use, installation, modification

or redistribution of this Apple software

constitutes acceptance of these terms.  If you do

not agree with these terms, please do not use,

install, modify or redistribute this Apple

software.

In consideration of your agreement to abide by

the following terms, and subject to these terms,

Apple grants you a personal, non-exclusive

license, under Apple's copyrights in this

original Apple software (the "Apple Software"),

to use, reproduce, modify and redistribute the

Apple Software, with or without modifications, in

source and/or binary forms; provided that if you

redistribute the Apple Software in its entirety

and without modifications, you must retain this

notice and the following text and disclaimers in

all such redistributions of the Apple Software.

Neither the name, trademarks, service marks or

logos of Apple Computer, Inc. may be used to

endorse or promote products derived from the

Apple Software without specific prior written

permission from Apple.  Except as expressly

stated in this notice, no other rights or

licenses, express or implied, are granted by

Apple herein, including but not limited to any

patent rights that may be infringed by your

derivative works or by other works in which the

Apple Software may be incorporated.

The Apple Software is provided by Apple on an "AS

IS" basis.  APPLE MAKES NO WARRANTIES, EXPRESS OR

IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED

WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY

AND FITNESS FOR A PARTICULAR PURPOSE, REGARDING

THE APPLE SOFTWARE OR ITS USE AND OPERATION ALONE

OR IN COMBINATION WITH YOUR PRODUCTS.

IN NO EVENT SHALL APPLE BE LIABLE FOR ANY

SPECIAL, INDIRECT, INCIDENTAL OR CONSEQUENTIAL

DAMAGES (INCLUDING, BUT NOT LIMITED TO,

PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS

OF USE, DATA, OR PROFITS; OR BUSINESS

INTERRUPTION) ARISING IN ANY WAY OUT OF THE USE,

REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION OF

THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER

UNDER THEORY OF CONTRACT, TORT (INCLUDING

NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN

IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF

SUCH DAMAGE.

*/

#include "NaiveEncoder.h"

// OR together all of the bytes in the block and return the result.

static int

getBlockDifferentBitsFromZeroBlock(

        const UInt8 *srcBaseAddr,

        size_t srcRowBytes )

    int x, y;

    const UInt8 *srcLineBase = srcBaseAddr;

    UInt8 accumulator = 0;

    for( y = 0; y < 8; y++ ) {

        for( x = 0; x < 8; x++ ) {

            accumulator |= srcLineBase[x]; // same as srcLineBase[x] ^ 0 -- we're measuring the bits different from 0.

        srcLineBase += srcRowBytes;

    return accumulator;

// XOR each byte in the block with the corresponding byte in the prediction block;

// OR together the results and return that.

static int

getBlockDifferentBitsFromBlock(

        const UInt8 *srcBaseAddr,

        size_t srcRowBytes,

        const UInt8 *predBaseAddr,

        size_t predRowBytes )

    int x, y;

    const UInt8 *srcLineBase = srcBaseAddr;

    const UInt8 *predLineBase = predBaseAddr;

    UInt8 accumulator = 0;

    for( y = 0; y < 8; y++ ) {

        for( x = 0; x < 8; x++ ) {

            accumulator |= ( srcLineBase[x] ^ predLineBase[x] );

        srcLineBase += srcRowBytes;

        predLineBase += predRowBytes;

    return accumulator;

// Find the most significant and least significant set bit in the byte.

// Return the most significant set bit in *firstBit and the width of the range

// including the most and least significant set bits in *bitCount.

// Number bits 7..0, so bit n is 1<<n.

static void

getFirstBitAndBitCount( int byte, int *firstBit, int *bitCount )

    int mostSignificantSetBit, leastSignificantSetBit;

    for( mostSignificantSetBit = 7; mostSignificantSetBit >= 0; mostSignificantSetBit-- ) {

        if( ( 1 << mostSignificantSetBit ) & byte )

            break;

    for( leastSignificantSetBit = 0; leastSignificantSetBit <= 7; leastSignificantSetBit++ ) {

        if( ( 1 << leastSignificantSetBit ) & byte )

            break;

    *firstBit = mostSignificantSetBit;

    if( mostSignificantSetBit < leastSignificantSetBit )

        *bitCount = 0; // byte must have been zero.

    *bitCount = mostSignificantSetBit - leastSignificantSetBit + 1;

// Collect the (1<<bitShift) bit from each byte of an 8x8 block, writing them into an 8-byte vector at dataPtr.

static void

encodeBitAcrossBlock(

        const UInt8 *srcBaseAddr,

        size_t srcRowBytes,

        int bitShift,

        UInt8 *dataPtr )

    int x, y;

    int bit = 1<<bitShift;

    const UInt8 *srcLineBase = srcBaseAddr;

    for( y = 0; y < 8; y++ ) {

        UInt8 cluster = 0;

        for( x = 0; x < 8; x++ ) {

            if( srcLineBase[x] & bit )

                cluster |= ( 1 << ( 7 - x ) );

        dataPtr[y] = cluster;

        srcLineBase += srcRowBytes;

// Collect the (1<<bitShift) bit from XORing each byte of an 8x8 block with the corresponding byte of

// an 8x8 predictor block, writing them into an 8-byte vector at dataPtr.

static void

encodeBitAcrossBlockWithPredictor(

        const UInt8 *srcBaseAddr,

        size_t srcRowBytes,

        int bitShift,

        UInt8 *dataPtr,

        const UInt8 *predBaseAddr,

        size_t predRowBytes )

    int x, y;

    int bit = 1<<bitShift;

    const UInt8 *srcLineBase = srcBaseAddr;

    const UInt8 *predLineBase = predBaseAddr;

    for( y = 0; y < 8; y++ ) {

        UInt8 cluster = 0;

        for( x = 0; x < 8; x++ ) {

            if( ( srcLineBase[x] ^ predLineBase[x] ) & bit )

                cluster |= ( 1 << ( 7 - x ) );

        dataPtr[y] = cluster;

        srcLineBase += srcRowBytes;

        predLineBase += predRowBytes;

// Encode an 8x8 block from srcBaseAddr/srcRowBytes into the bitstream, writing it at dataPtr.

// Return in *bytesWrittenOut the number of bytes emitted.

// srcBaseAddr points to the first byte of the block; srcRowBytes is the offset from one row to the next.

// byteBudget indicates how many bytes are available for this block; if possible we should emit

// this many bytes or fewer, but it takes 3 bytes per block to code a no-op block in our naive format.

// predAllowedArray is an array of booleans indicating which prediction buffers may be used.

// predBaseAddrArray and predRowBytesArray indicate the corresponding block of the prediction buffers.

static OSStatus

encodeBlock(

        const UInt8 *srcBaseAddr,

        size_t srcRowBytes,

        UInt8 *dataPtr,

        size_t *bytesWrittenOut,

        int byteBudget,

        const Boolean predAllowedArray[kMaxStoredFrames],

        const UInt8 *predBaseAddrArray[kMaxStoredFrames],

        const size_t predRowBytesArray[kMaxStoredFrames] )

    int frameIndex;

    int differentBitsFromZeroPredictor, differentBitsFromPredictor[kMaxStoredFrames];

    int chosenPredictor; // 0 for no predictor, n+1 for predictorArray[n]

    int differentBits;

    int firstBit, bitCount;

    int bitIndex;

    int bytesWritten = 0;

    // Find the closest block out of (a) an 8x8 block full of zeros and (b) the available predictor blocks.

    // (In a real codec, we would probably perform some kind of motion vector search.)

    differentBitsFromZeroPredictor = getBlockDifferentBitsFromZeroBlock( srcBaseAddr, srcRowBytes );

    chosenPredictor = 0;

    differentBits = differentBitsFromZeroPredictor;

    for( frameIndex = 0; frameIndex < kMaxStoredFrames; frameIndex++ ) {

        if( predAllowedArray[frameIndex] ) {

            differentBitsFromPredictor[frameIndex] = getBlockDifferentBitsFromBlock(

                    srcBaseAddr, srcRowBytes,

                    predBaseAddrArray[frameIndex], predRowBytesArray[frameIndex] );

            if( differentBits > differentBitsFromPredictor[frameIndex] ) {

                // This predictor is a better match.

                chosenPredictor = frameIndex + 1;

                differentBits = differentBitsFromPredictor[frameIndex];

    // If we have an exact match, code that; we're done.

    if( 0 == differentBits ) {

        dataPtr[0] = chosenPredictor;

        dataPtr[1] = 0;

        dataPtr[2] = 0;

        *bytesWrittenOut = 3;

    else {

        // Work out which bits we would need to code to get an exact match.

        getFirstBitAndBitCount( differentBits, &firstBit, &bitCount );

        // If that would overflow our byte budget, slim the bit count down until we fit.

        // If we did not choose a predictor (ie, prediction from a zero block), always code at least one bit.

        // (When the budget is tight it would be smarter to spend bytes on more significant bits across the

        // frame -- this code is daft in the interests of simplicity.)

        while( ( bitCount > 0 ) && ( ( 3 + bitCount * 8 ) > byteBudget ) ) {

            if( ( bitCount == 1 ) && ( chosenPredictor == 0 ) )

                break;

            bitCount--;

        if( bitCount == 0 )

            firstBit = 0;

        // Encode the bits we've decided to.

        dataPtr[0] = chosenPredictor;

        dataPtr[1] = firstBit;

        dataPtr[2] = bitCount;

        dataPtr += 3;

        bytesWritten += 3;

        for( bitIndex = 0; bitIndex < bitCount; bitIndex++ ) {

            if( 0 == chosenPredictor ) {

                encodeBitAcrossBlock( srcBaseAddr, srcRowBytes, firstBit - bitIndex, dataPtr );

            else {

                encodeBitAcrossBlockWithPredictor( srcBaseAddr, srcRowBytes, firstBit - bitIndex, dataPtr,

                        predBaseAddrArray[chosenPredictor-1], predRowBytesArray[chosenPredictor-1] );

            dataPtr += 8;

            bytesWritten += 8;

        *bytesWrittenOut = bytesWritten;

    return noErr;

// Encode an entire plane.

// Write bytes at dataPtr and report the number of bytes written in *bytesWrittenOut.

// planeByteBudget indicates how many bytes are available for this plane; if possible we should emit

// this many bytes or fewer, but it takes 3 bytes per block to code a no-op block in our naive format.

// predAllowedArray is an array of booleans indicating which prediction buffers may be used.

// predBaseAddrArray and predRowBytesArray indicate the corresponding plane of the prediction buffers.

static OSStatus

encodePlane(

        size_t width,

        size_t height,

        const UInt8 *srcPlaneBaseAddr,

        size_t srcPlaneRowBytes,

        UInt8 *dataPtr,

        size_t *bytesWrittenOut,

        size_t planeByteBudget,

        const Boolean predAllowedArray[kMaxStoredFrames],

        const UInt8 *predBaseAddrArray[kMaxStoredFrames],

        const size_t predRowBytesArray[kMaxStoredFrames] )

    OSStatus err = noErr;

    size_t widthInBlocks, heightInBlocks, xblock, yblock;

    size_t bytesWritten;

    size_t totalBytesWritten = 0;

    const UInt8 *blockLineBaseAddr;

    int frameIndex;

    const UInt8 *predLineBaseAddrArray[kMaxStoredFrames];

    float averageBlockByteBudget, runningBlockByteBudget;

    widthInBlocks = ( width + 7 ) / 8;

    heightInBlocks = ( height + 7 ) / 8;

    // We'll serve out the byte budget as we go down the frame.

    averageBlockByteBudget = ((float)planeByteBudget) / ((float)widthInBlocks * heightInBlocks);

    runningBlockByteBudget = 0.0;

    blockLineBaseAddr = srcPlaneBaseAddr;

    for( frameIndex = 0; frameIndex < kMaxStoredFrames; frameIndex++ )

        predLineBaseAddrArray[frameIndex] = predBaseAddrArray[frameIndex];

    for( yblock = 0; yblock < heightInBlocks; yblock++ ) {

        const UInt8 *blockBaseAddr = blockLineBaseAddr;

        const UInt8 *predBlockBaseAddrArray[kMaxStoredFrames];

        for( frameIndex = 0; frameIndex < kMaxStoredFrames; frameIndex++ )

            predBlockBaseAddrArray[frameIndex] = predLineBaseAddrArray[frameIndex];

        for( xblock = 0; xblock < widthInBlocks; xblock++ ) {

            runningBlockByteBudget += averageBlockByteBudget;

            err = encodeBlock( blockBaseAddr, srcPlaneRowBytes, dataPtr, &bytesWritten,

                    (int)runningBlockByteBudget, predAllowedArray,

                    predBlockBaseAddrArray, predRowBytesArray );

            if( err )

                goto bail;

            dataPtr += bytesWritten;

            totalBytesWritten += bytesWritten;

            runningBlockByteBudget -= bytesWritten;

            blockBaseAddr += 8;

            for( frameIndex = 0; frameIndex < kMaxStoredFrames; frameIndex++ )

                predBlockBaseAddrArray[frameIndex] += 8;

        blockLineBaseAddr += 8 * srcPlaneRowBytes;

        for( frameIndex = 0; frameIndex < kMaxStoredFrames; frameIndex++ )

            predLineBaseAddrArray[frameIndex] += 8 * predRowBytesArray[frameIndex];

    *bytesWrittenOut = totalBytesWritten;

bail:

    return err;

static void

selectPlaneFromEachBuffer(

        struct InternalPixelBuffer predictionBuffers[kMaxStoredFrames],

        int planeIndex,

        const UInt8 *predBaseAddrArray[kMaxStoredFrames],

        size_t predRowBytesArray[kMaxStoredFrames] )

    int frameIndex;

    for( frameIndex = 0; frameIndex < kMaxStoredFrames; frameIndex++ ) {

        predBaseAddrArray[frameIndex] = predictionBuffers[frameIndex].planeArray[planeIndex].planeBaseAddr;

        predRowBytesArray[frameIndex] = predictionBuffers[frameIndex].planeArray[planeIndex].planeRowBytes;

// Encode the source frame in sourceBuffer, possibly predicted from the frames in predictionBuffers.

// Write bytes at dataPtr and report the number of bytes written in *bytesWrittenOut.

// planeByteBudget indicates how many bytes are available for this plane; if possible we should emit

// this many bytes or fewer, but we do not guarantee to hit a limit precisely.

// predAllowedArray is an array of booleans indicating which prediction buffers may be used.

// keyFrame is true if this is to be a key frame (it is the caller's responsibility to ensure

// that frames earlier in display order have already been encoded).

// if droppableFrame is true, this is a droppable frame; if false, storageIndex indicates the

// prediction buffer it will be stored in.

extern OSStatus

NaiveEncoder_EncodeFrame(

        size_t width,

        size_t height,

        struct InternalPixelBuffer *sourceBuffer,

        UInt8 *dataPtr,

        size_t *bytesWrittenOut,

        size_t frameByteBudget,

        Boolean predAllowedArray[kMaxStoredFrames],

        struct InternalPixelBuffer predictionBuffers[kMaxStoredFrames],

        Boolean keyFrame,

        Boolean droppableFrame,

        int storageIndex )

    OSStatus err = noErr;

    size_t totalBytesWritten = 0, bytesWrittenForPlane;

    Boolean standaloneFrame;

    int frameIndex;

    const UInt8 *predBaseAddrArray[kMaxStoredFrames];

    size_t predRowBytesArray[kMaxStoredFrames];

    UInt32 boxSize;

    OSType boxKind;

    int planeIndex, subsampling;

    size_t planeByteBudget;

    // Make sure we won't use the same buffer to predict a frame and to store it afterwards.

    if( ! droppableFrame )

        predAllowedArray[storageIndex] = false;

    // Make sure we don't use any prediction for key frames.

    if( keyFrame )

        for( frameIndex = 0; frameIndex < kMaxStoredFrames; frameIndex++ )

            predAllowedArray[frameIndex] = false;

    // The frame is a standalone frame (an I-frame) if it will does not use any prediction buffers.

    // Do not confuse this with a key frame, which is a standalone frame AND a sync point in

    // the sequence of frames: no later frames depend on information from earlier frames.

    standaloneFrame = true;

    for( frameIndex = 0; frameIndex < kMaxStoredFrames; frameIndex++ ) {

        if( predAllowedArray[frameIndex] )

            standaloneFrame = false;

    // Subtract the fixed overheads from the budget we'll portion out to the planes.

    frameByteBudget -= 11 + 8 + 8 + 8 + 8;

    // Write the header.

    *(UInt32 *)(dataPtr + 0) = EndianU32_NtoB( 11 );

    *(OSType *)(dataPtr + 4) = EndianU32_NtoB( kNaiveHeaderCheckCode );

    dataPtr[8] = keyFrame ? 1 : 0;

    dataPtr[9] = standaloneFrame ? 1 : 0;

    dataPtr[10] = droppableFrame ? 0 : ( storageIndex + 1 );

    dataPtr += 11;

    totalBytesWritten += 11;

    for( planeIndex = 0; planeIndex < 3; planeIndex++ ) {

        static const OSType planeKindArray[3] = { kNaiveYPlaneCode, kNaiveCbPlaneCode, kNaiveCrPlaneCode };

        static const int planeSubsamplingArray[3] = { 1, 2, 2 };

        static const float planeProportionOfBudgetArray[3] = { 0.66, 0.17, 0.17 };

        boxKind = planeKindArray[planeIndex];

        subsampling = planeSubsamplingArray[planeIndex];

        planeByteBudget = planeProportionOfBudgetArray[planeIndex] * frameByteBudget;

        // Write the box header.

        // We don't know yet how many bytes it will be -- we'll fix that after we write the box contents.

        *(UInt32 *)(dataPtr + 0) = EndianU32_NtoB( 0 );

        *(OSType *)(dataPtr + 4) = EndianU32_NtoB( boxKind );

        selectPlaneFromEachBuffer( predictionBuffers, planeIndex, predBaseAddrArray, predRowBytesArray );

        err = encodePlane( width/subsampling, height/subsampling,

                sourceBuffer->planeArray[planeIndex].planeBaseAddr,

                sourceBuffer->planeArray[planeIndex].planeRowBytes,

                dataPtr + 8, &bytesWrittenForPlane, planeByteBudget,

                predAllowedArray, predBaseAddrArray, predRowBytesArray );

        if( err )

            goto bail;

        // (In the interests of simplicity we do not apply spare byte budget from one plane to the next.)

        boxSize = 8 + bytesWrittenForPlane;

        *(UInt32 *)(dataPtr + 0) = EndianU32_NtoB( boxSize );

        dataPtr += boxSize;

        totalBytesWritten += boxSize;

    // Write the termination atom.

    *(UInt32 *)(dataPtr + 0) = EndianU32_NtoB( 0 );

    *(UInt32 *)(dataPtr + 4) = EndianU32_NtoB( 0 );

    dataPtr += 8;

    totalBytesWritten += 8;

    *bytesWrittenOut = totalBytesWritten;

bail:

    return err;

// Report the maximum number of bytes it could take to encode a frame of given dimensions.

extern void

NaiveEncoder_GetMaxEncodedDataSize( size_t width, size_t height, size_t *maxBytesOut )

    size_t widthInMacroBlocks, heightInMacroBlocks;

    widthInMacroBlocks = ( width + 15 ) / 16;

    heightInMacroBlocks = ( height + 15 ) / 16;

    *maxBytesOut =

          11 + 8 + 8 + 8 + 8 // constant overheads for header and 4 boxes

        + widthInMacroBlocks * heightInMacroBlocks * 6 * (3+8*8); // 6 blocks per macroblock, max 69 bytes each

// Report the minimum number of bytes it could take to encode a frame of given dimensions.

extern void

NaiveEncoder_GetMinEncodedDataSize( size_t width, size_t height, size_t *minBytesOut )

    size_t widthInMacroBlocks, heightInMacroBlocks;

    widthInMacroBlocks = ( width + 15 ) / 16;

    heightInMacroBlocks = ( height + 15 ) / 16;

    *minBytesOut =

          11 + 8 + 8 + 8 + 8 // constant overheads for header and 4 boxes

        + widthInMacroBlocks * heightInMacroBlocks * 6 * (3+8*0); // 6 blocks per macroblock, min 3 bytes each

Next Previous