EatClosedCaptionSCC.c

/*
 
File: EatClosedCaptionSCC.c
 
Abstract: Code to read and parse a Scenarist Closed Caption (SCC)
          file into a QuickTime Movie 
 
Version: 1.0
 
Disclaimer: IMPORTANT:  This Apple software is supplied to you by 
Apple Inc. ("Apple") in consideration of your agreement to the
following terms, and your use, installation, modification or
redistribution of this Apple software constitutes acceptance of these
terms.  If you do not agree with these terms, please do not use,
install, modify or redistribute this Apple software.
 
In consideration of your agreement to abide by the following terms, and
subject to these terms, Apple grants you a personal, non-exclusive
license, under Apple's copyrights in this original Apple software (the
"Apple Software"), to use, reproduce, modify and redistribute the Apple
Software, with or without modifications, in source and/or binary forms;
provided that if you redistribute the Apple Software in its entirety and
without modifications, you must retain this notice and the following
text and disclaimers in all such redistributions of the Apple Software. 
Neither the name, trademarks, service marks or logos of Apple Inc. 
may be used to endorse or promote products derived from the Apple
Software without specific prior written permission from Apple.  Except
as expressly stated in this notice, no other rights or licenses, express
or implied, are granted by Apple herein, including but not limited to
any patent rights that may be infringed by your derivative works or by
other works in which the Apple Software may be incorporated.
 
The Apple Software is provided by Apple on an "AS IS" basis.  APPLE
MAKES NO WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION
THE IMPLIED WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND
OPERATION ALONE OR IN COMBINATION WITH YOUR PRODUCTS.
 
IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL
OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION,
MODIFICATION AND/OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED
AND WHETHER UNDER THEORY OF CONTRACT, TORT (INCLUDING NEGLIGENCE),
STRICT LIABILITY OR OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
 
Copyright (C) 2007 Apple Inc. All Rights Reserved.
 
*/
 
 
#include "EatClosedCaptionSCC.h"
 
#ifdef MIN
    #undef MIN
#endif
#define MIN(x,y) ((x) < (y) ? (x) : (y))
 
#ifdef MAX
    #undef MAX
#endif
#define MAX(x,y) ((x) > (y) ? (x) : (y))
 
 
#pragma mark- Declarations
 
 
// Utility Routines
ComponentResult ParseByte(const char *string, UInt8 *byte, Boolean hex);
 
 
#pragma mark-
#pragma mark- Scenarist Closed Caption Functions
 
// Initialization routine that sets up globals
ComponentResult ScenaristClosedCaptionParserInit(EatClosedCaptionGlobals store)
{
    EatScenaristClosedCaptionGlobals globals = NULL;
    ComponentResult err = noErr;
 
    store->parserGlobals = globals = (EatScenaristClosedCaptionGlobals)NewPtrClear(sizeof(EatScenaristClosedCaptionGlobalsRecord));
    require_action(store->parserGlobals, bail, err = memFullErr);
 
    globals->ccGlobals = store;
    
    globals->firstSampleDecoded = false;
    globals->firstSampleMediaTime = -1;
 
    globals->timeCodeDef.flags = 0;
    globals->timeCodeDef.fTimeScale = 30000;
    globals->timeCodeDef.frameDuration = 1001;
    globals->timeCodeDef.numFrames = 30;
    
    globals->sampleDesc = (SampleDescriptionHandle)NewHandleClear(sizeof(SampleDescription));
    require_action(globals->sampleDesc, bail, err = memFullErr);
 
    globals->sampleData = NewHandle(0);
    require_action(globals->sampleData, bail, err = memFullErr);
    
    // Compile our regular expressions
    require_action(regcomp(&globals->lineRegEx, kSCCLineRegEx, REG_EXTENDED | REG_NEWLINE) == 0, bail, err = memFullErr);
    require_action(regcomp(&globals->emptyRegEx, kSCCEmptyRegEx, REG_EXTENDED | REG_NEWLINE) == 0, bail, err = memFullErr);
    require_action(regcomp(&globals->headerRegEx, kSCCHeaderRegEx, REG_EXTENDED | REG_NEWLINE) == 0, bail, err = memFullErr);
    require_action(regcomp(&globals->sampleRegEx, kSCCSampleRegEx, REG_EXTENDED | REG_NEWLINE) == 0, bail, err = memFullErr);
 
    // Allocate enough space to hold the maximum number of regmatch_t entries that any of our regular expressions might generate
    globals->regExMatches = (regmatch_t*)calloc(1 + MAX(1, MAX(MAX(globals->lineRegEx.re_nsub, globals->emptyRegEx.re_nsub),
                                        MAX(globals->headerRegEx.re_nsub, globals->sampleRegEx.re_nsub))), sizeof(regmatch_t));
    require_action(globals->regExMatches, bail, err = memFullErr);
    
bail:
    // Clean up
    if (err != noErr)
        ScenaristClosedCaptionParserTerminate(store);
    
    return err;
}
 
 
// Clean-up routine
void ScenaristClosedCaptionParserTerminate(EatClosedCaptionGlobals store)
{
    if (store)
    {
        EatScenaristClosedCaptionGlobals globals = (EatScenaristClosedCaptionGlobals)store->parserGlobals;
 
        // Clean up
        if (globals)
        {
            DisposeHandle((Handle)globals->sampleDesc);
            DisposeHandle(globals->sampleData);
            
            regfree(&globals->lineRegEx);
            regfree(&globals->emptyRegEx);
            regfree(&globals->headerRegEx);
            regfree(&globals->sampleRegEx);
            free(globals->regExMatches);
            
            DisposePtr(store->parserGlobals); store->parserGlobals = NULL;
        }
        
        if (store->editingClosedCaption)
        {
            EndMediaEdits(store->closedCaptionMedia);
            store->editingClosedCaption = false;
        }
 
        if (store->editingTimeCode)
        {
            EndMediaEdits(store->timeCodeMedia);
            store->editingTimeCode = false;
        }
    }
}
 
 
ComponentResult ScenaristClosedCaptionParserDoParse(EatClosedCaptionGlobals store, TimeValue atTime, TimeValue *durationAdded)
{
    EatScenaristClosedCaptionGlobals globals = (EatScenaristClosedCaptionGlobals)store->parserGlobals;
 
    ComponentResult err = noErr;
    Boolean openedForRead = false;
    int reResult = 0;
    long dataSize = 0, dataOffset = 0;
    long bufferSize = 0, bufferUsed = 0, bufferOffset = 0;
    Ptr buffer = NULL;
    Boolean headerParsed = false;
    
    // Prepare for reading
    require_noerr(err = DataHOpenForRead(store->dataHandler), bail);
    openedForRead = true;
 
    // Get the data size
    require_noerr(err = DataHGetFileSize(store->dataHandler, &dataSize), bail);
    
    // Allocate a buffer to read into
    bufferSize = 512;
    buffer = malloc(bufferSize);
    require_action(buffer, bail, err = memFullErr);
    
    // Parse the data
    while (dataOffset < dataSize)
    {
        // Some useful bookkeeping
        long bufferAvailable = bufferSize - bufferUsed;
        long dataAvailable = dataSize - dataOffset;
        long bytesToRead = 0;
 
        // See if we need to increase our buffer size
        if (bufferAvailable <= 0)
        {
            Ptr newBuffer = NULL;
            
            bufferSize *= 2;
            newBuffer = realloc(buffer, bufferSize);
            require_action(newBuffer, bail, err = memFullErr);
 
            buffer = newBuffer;
            bufferAvailable = bufferSize - bufferUsed;
        }
        
        // Read some data
        bytesToRead = MIN(bufferAvailable, dataAvailable);
        require_noerr(err = DataHScheduleData(store->dataHandler, buffer + bufferUsed, dataOffset, bytesToRead, 0, NULL, NULL), bail);
        bufferUsed += bytesToRead;
        bufferAvailable -= bytesToRead;
        dataOffset += bytesToRead;
        dataAvailable -= bytesToRead;
        
        // See if there's enough data to read in one or more lines of caption data
        globals->regExMatches[0].rm_so = bufferOffset;
        globals->regExMatches[0].rm_eo = bufferUsed;
        while (regexec(&globals->lineRegEx, buffer, globals->lineRegEx.re_nsub + 1, globals->regExMatches, REG_NOTEOL | REG_STARTEND) == 0)
        {
            globals->lastMatchString = buffer;
            bufferOffset = globals->regExMatches[0].rm_eo + 1;
            
            // regexec can't seem to get around cr/lf pairs, so we have to skip the cr if present
            if (bufferOffset < bufferSize && *(char*)(buffer + bufferOffset) == '\r')
                ++bufferOffset;
 
            // Read past the header line if we haven't already done so
            if (!headerParsed)
            {
                require_action((reResult = regexec(&globals->headerRegEx, buffer, globals->headerRegEx.re_nsub + 1, globals->regExMatches, REG_STARTEND)) == 0, bail, err = paramErr);
                globals->lastMatchString = buffer;
                
                // Found a valid header
                headerParsed = true;
            }
            else
            {
                // Skip empty lines and decode caption lines
                regoff_t lineStart = globals->regExMatches[0].rm_so;
                regoff_t lineEnd = globals->regExMatches[0].rm_eo;
                if ((reResult = regexec(&globals->emptyRegEx, buffer, globals->emptyRegEx.re_nsub + 1, globals->regExMatches, REG_STARTEND)) == REG_NOMATCH ||
                    (globals->regExMatches[0].rm_so != lineStart || globals->regExMatches[0].rm_eo != lineEnd))
                {
                    globals->lastMatchString = buffer;
                    globals->regExMatches[0].rm_so = lineStart;
                    globals->regExMatches[0].rm_eo = lineEnd;
 
                    reResult = regexec(&globals->sampleRegEx, buffer, globals->sampleRegEx.re_nsub + 1, globals->regExMatches, REG_STARTEND);
                    globals->lastMatchString = buffer;
 
                    require_action(reResult == 0, bail, err = paramErr);
                    
                    // Found a valid line of sample data... decode it
                    require_noerr(err = ScenaristClosedCaptionParserDecodeSample(store), bail);
                }
            }
            
            globals->regExMatches[0].rm_so = bufferOffset;
            globals->regExMatches[0].rm_eo = bufferUsed;
        }
        
        bufferUsed = bufferUsed - bufferOffset;
        
        if (bufferUsed)
            memmove(buffer, buffer + bufferOffset, bufferUsed);
        
        bufferOffset = 0;
        bufferAvailable = bufferSize - bufferUsed;
    }
    
    // Make sure we used it all up
    if (dataSize != dataOffset || bufferUsed > 0)
        require_noerr(err = paramErr, bail);
    
    // Write out the last sample and the timecode
    if (err == noErr && globals->firstSampleDecoded)
    {
        require_noerr(err = ScenaristClosedCaptionParserAddSample(store, -1, true), bail);
        require_noerr(err = ScenaristClosedCaptionParserAddTimeCode(store), bail);
    }
    
bail:
    // Done reading
    if (openedForRead)
        DataHCloseForRead(store->dataHandler);
    
    // Clean up
    free(buffer);
    
    return err;
}
 
 
// Decode a caption read from the source data ref
ComponentResult ScenaristClosedCaptionParserDecodeSample(EatClosedCaptionGlobals store)
{
    EatScenaristClosedCaptionGlobals globals = (EatScenaristClosedCaptionGlobals)store->parserGlobals;
 
    // The need for error checking in this function is minimal, as we would not have made it this far without a regular
    //  expression match, whick already validates the data string
    ComponentResult err = noErr;
    size_t atomHeaderSize = sizeof(long) + sizeof(OSType);
 
    // Timecode
    {
        TimeCodeRecord newSampleTimeCode = { 0 };
 
        // The entire file must be either drop frame or non-drop frame... no mix and match
        if (!globals->firstSampleDecoded && globals->lastMatchString[globals->regExMatches[kSCCSampleMatchTCDropFrame].rm_so] == ';')
            globals->timeCodeDef.flags |= tcDropFrame;
        else
            require_action((globals->timeCodeDef.flags & tcDropFrame == tcDropFrame) == (globals->lastMatchString[globals->regExMatches[kSCCSampleMatchTCDropFrame].rm_so] == ';'), bail, err = paramErr);
        
        // Parse all of the timecode fields
        ParseByte(globals->lastMatchString + globals->regExMatches[kSCCSampleMatchTCHours].rm_so, &newSampleTimeCode.t.hours, false);
        ParseByte(globals->lastMatchString + globals->regExMatches[kSCCSampleMatchTCMinutes].rm_so, &newSampleTimeCode.t.minutes, false);
        ParseByte(globals->lastMatchString + globals->regExMatches[kSCCSampleMatchTCSeconds].rm_so, &newSampleTimeCode.t.seconds, false);
        ParseByte(globals->lastMatchString + globals->regExMatches[kSCCSampleMatchTCFrames].rm_so, &newSampleTimeCode.t.frames, false);
        
        if (globals->firstSampleDecoded)
        {
            // Once we've decoded more than one sample, we can calculate the duration of the previous sample
            long prevSampleFrameNumber = 0, sampleFrameNumber = 0, sampleSize = 0;
 
            require_noerr(err = TCTimeCodeToFrameNumber(GetMediaHandler(store->timeCodeMedia), &globals->timeCodeDef, &globals->sampleTimeCode, &prevSampleFrameNumber), bail);
            require_noerr(err = TCTimeCodeToFrameNumber(GetMediaHandler(store->timeCodeMedia), &globals->timeCodeDef, &newSampleTimeCode, &sampleFrameNumber), bail);
            sampleSize = GetHandleSize(globals->sampleData) - atomHeaderSize;
 
            if (sampleSize > (sampleFrameNumber - prevSampleFrameNumber) * 2)
            {
                // Timecode of current sample overlaps previous sample
                require_noerr(err = paramErr, bail);
            }
 
            // Add the previous sample using the duration we calculated above
            require_noerr(err = ScenaristClosedCaptionParserAddSample(store, (sampleFrameNumber - prevSampleFrameNumber) * 1001, false), bail);
        }
        else
        {
            // Save off the first timecode that was encountered
            globals->firstSampleTimeCode = newSampleTimeCode;
        }
        
        // Save off the current timecode for use in calculating the duration of this sample next time around
        globals->sampleTimeCode = newSampleTimeCode;
    }
    
    // Closed caption
    {
        const char *sampleString = NULL;
        UInt8 *sampleData = NULL;
        size_t ndx = 0;
 
        // The sample data string is made up of exactly one substring corresponding to the regular expression match "kSCCSampleMatchLastSampleData" and
        //  zero or more substrings corresponding to the regular expression match "kSCCSampleSecondLastSampleData".  Each of these matches represents
        //  two bytes of sample data, so we can calculate the necessary size of our sample buffer
        size_t sampleStringLength = globals->regExMatches[kSCCSampleMatchAllSampleData].rm_eo - globals->regExMatches[kSCCSampleMatchAllSampleData].rm_so;
        size_t lastMatchLength = globals->regExMatches[kSCCSampleMatchLastSampleData].rm_eo - globals->regExMatches[kSCCSampleMatchLastSampleData].rm_so;
        size_t otherMatchLength = globals->regExMatches[kSCCSampleMatchSecondLastSampleData].rm_eo - globals->regExMatches[kSCCSampleMatchSecondLastSampleData].rm_so;
        size_t numSampleBytes =  2 * (1 + ((sampleStringLength - lastMatchLength) / (otherMatchLength ? otherMatchLength : 1)));
        
        // Make the sample data handle big enough to hold all of the data plus an atom header
        SetHandleSize(globals->sampleData, atomHeaderSize + numSampleBytes);
 
        sampleString = globals->lastMatchString + globals->regExMatches[kSCCSampleMatchAllSampleData].rm_so;
 
        HLock(globals->sampleData);
        sampleData = (UInt8*)*globals->sampleData;
        
        // Write out the size of the atom
        *(long*)sampleData = EndianS32_NtoB(atomHeaderSize + numSampleBytes);
        sampleData += sizeof(long);
        
        // Write out the atom type
        *(OSType*)sampleData = EndianU32_NtoB('cdat');
        sampleData += sizeof(OSType);
 
        // Parse and write the sample data
        for (ndx = 0; ndx < numSampleBytes / 2; ndx++)
        {
            ParseByte(sampleString, (UInt8*)sampleData, true);
            ParseByte(sampleString + 2, (UInt8*)sampleData + 1, true);
 
            sampleString += otherMatchLength;
            sampleData += 2;
        }
        HUnlock(globals->sampleData);
    }
    
    // We've decoded a sample
    globals->firstSampleDecoded = true;
 
bail:   
    return err;
}
 
 
// Add a decoded sample to the Movie
ComponentResult ScenaristClosedCaptionParserAddSample(EatClosedCaptionGlobals store, TimeValue duration, Boolean lastSample)
{
    EatScenaristClosedCaptionGlobals globals = (EatScenaristClosedCaptionGlobals)store->parserGlobals;
 
    ComponentResult err = noErr;
    TimeRecord sampleDuration = { 0 };
    
    // Setup on the first call
    if (!store->editingClosedCaption)
    {
        require_noerr(err = BeginMediaEdits(store->closedCaptionMedia), bail);
        store->editingClosedCaption = true;
                      
        (*globals->sampleDesc)->descSize = GetHandleSize((Handle)globals->sampleDesc);
        (*globals->sampleDesc)->dataFormat = 'c608';
        (*globals->sampleDesc)->resvd1 = 0;
        (*globals->sampleDesc)->resvd2 = 0;
    }
 
    // Add the caption sample to the media
    sampleDuration.base = NULL;
    sampleDuration.scale = 30000;
    sampleDuration.value.lo = duration >= 0 ? duration : GetHandleSize(globals->sampleData) / 2 * 1001;
    ConvertTimeScale(&sampleDuration, GetMediaTimeScale(store->closedCaptionMedia));
 
    HLock(globals->sampleData);
    require_noerr(err = AddMediaSample2(store->closedCaptionMedia, (UInt8*)*(globals->sampleData), GetHandleSize(globals->sampleData), sampleDuration.value.lo, 0,
                                        globals->sampleDesc, 1, 0, globals->firstSampleMediaTime < 0 ? &globals->firstSampleMediaTime : NULL), bail);
    HUnlock(globals->sampleData);
    
    // Teardown on last call
    if (lastSample)
    {
        TimeRecord  captionTrackStart = { 0 };
        TimeValue   captionInsertDuration = 0;
 
        require_noerr(err = EndMediaEdits(store->closedCaptionMedia), bail);
        store->editingClosedCaption = false;
 
        // Insert the media into the track
        captionInsertDuration = GetMediaDuration(store->closedCaptionMedia) - globals->firstSampleMediaTime;
        if (captionInsertDuration > 0)
            require_noerr(err = InsertMediaIntoTrack(store->closedCaptionTrack, captionTrackStart.value.lo, globals->firstSampleMediaTime, captionInsertDuration, fixed1), bail);
    }
 
bail:
    if (err)
    {
        if (store->editingClosedCaption)
        {
            EndMediaEdits(store->closedCaptionMedia);
            store->editingClosedCaption = false;
        }
    }
    
    return err;
}
 
 
// Add timecode to the Movie
ComponentResult ScenaristClosedCaptionParserAddTimeCode(EatClosedCaptionGlobals store)
{
    EatScenaristClosedCaptionGlobals globals = (EatScenaristClosedCaptionGlobals)store->parserGlobals;
 
    ComponentResult err = noErr;
    TimeCodeDescriptionHandle timeCodeDesc = NULL;
 
    if (store->timeCodeTrack)
    {
        long firstFrameNumber = 0;
        long sampleFrameNumber = 0;
        long sampleFrames = 0;
        TimeValue64 duration = 0;
        long sampleData = 0;
        
        require_noerr(err = BeginMediaEdits(store->timeCodeMedia), bail);
 
        // Use the saved timecode values from the first and last captions (adding the duration of the last sample as well)
        TCTimeCodeToFrameNumber(GetMediaHandler(store->timeCodeMedia), &globals->timeCodeDef, &globals->firstSampleTimeCode, &firstFrameNumber);
        TCTimeCodeToFrameNumber(GetMediaHandler(store->timeCodeMedia), &globals->timeCodeDef, &globals->sampleTimeCode, &sampleFrameNumber);
        sampleFrames = GetHandleSize(globals->sampleData) / 2;
        
        duration = (sampleFrameNumber - firstFrameNumber + sampleFrames) * 1001;
        
        timeCodeDesc = (TimeCodeDescriptionHandle)NewHandleClear(sizeof(TimeCodeDescription));
 
        HLock((Handle)timeCodeDesc);
        (*timeCodeDesc)->descSize = GetHandleSize((Handle)timeCodeDesc);
        (*timeCodeDesc)->dataFormat = TimeCodeMediaType;
        (*timeCodeDesc)->timeCodeDef = globals->timeCodeDef;
        
        sampleData = EndianS32_NtoB(firstFrameNumber);
        require_noerr(err = AddMediaSample2(store->timeCodeMedia, (UInt8*)&sampleData, sizeof(sampleData), duration, 0,
                                            (SampleDescriptionHandle)timeCodeDesc, 1, 0, NULL), bail);
        
        HUnlock((Handle)timeCodeDesc);
        
        // Teardown
        {
            TimeRecord  timeCodeTrackStart = { 0 };
            TimeValue   timeCodeInsertDuration = GetMediaDuration(store->timeCodeMedia);
            
            // End editing and add media to the track
            timeCodeTrackStart.value.lo = 0;
            timeCodeTrackStart.scale = GetMediaTimeScale(store->timeCodeMedia);
            timeCodeTrackStart.base = NULL;
            
            require_noerr(err = EndMediaEdits(store->timeCodeMedia), bail);
 
            if (timeCodeInsertDuration > 0)
                require_noerr(err = InsertMediaIntoTrack(store->timeCodeTrack, timeCodeTrackStart.value.lo, 0, timeCodeInsertDuration, fixed1), bail);
        }
    }
 
bail:
    if (timeCodeDesc);
        DisposeHandle((Handle)timeCodeDesc);
 
    return err;
}
 
 
// Utility routine for parsing numerical strings
ComponentResult ParseByte(const char *string, UInt8 *byte, Boolean hex)
{
    ComponentResult err = paramErr;
    char chars[2];
 
    if (sscanf(string, "%2c", chars) == 1)
    {
        chars[0] = (char)tolower(chars[0]);
        chars[1] = (char)tolower(chars[1]);
 
        if (((chars[0] >= '0' && chars[0] <= '9') || (hex && (chars[0] >= 'a' && chars[0] <= 'f'))) &&
            ((chars[1] >= '0' && chars[1] <= '9') || (hex && (chars[1] >= 'a' && chars[1] <= 'f'))))
        {
            *byte = 0;
            if (chars[0] >= '0' && chars[0] <= '9')
                *byte = (chars[0] - '0') * (hex ? 16 : 10);
            else if (chars[0] >= 'a' && chars[0] <= 'f')
                *byte = (chars[0] - 'a' + 10) * 16;
            
            if (chars[1] >= '0' && chars[1] <= '9')
                *byte += (chars[1] - '0');
            else if (chars[1] >= 'a' && chars[1] <= 'f')
                *byte += (chars[1] - 'a' + 10);
            
            err = noErr;
        }
    }
    
    return err;
}