VRSpeech.c

//////////
//
//  File:       VRSpeech.c
//
//  Contains:   Speech recognition support for QuickTime VR movies.
//
//  Written by: Tim Monroe
//
//  Copyright:  © 1996-1998 by Apple Computer, Inc., all rights reserved.
//
//  Change History (most recent first):
//
//     <6>      08/17/02    rtm     fixed copy and paste bug in VRSpeech_HandleSpeechBegunAppleEvent
//     <5>      04/05/00    rtm     made changes to get things running under CarbonLib
//     <4>      06/19/98    rtm     minor clean-up for inclusion in QT3 SDK
//     <3>      07/10/97    rtm     added ReleaseResource call to VRSpeech_ReadLanguageModelsFromResource
//     <2>      03/07/97    rtm     added VRSpeech_ prefix to all routines
//     <1>      12/05/96    rtm     ported earlier speech recognition support functions to VRShell
//
//////////
 
// TO DO:
// + on a mousedown event in QTVR window, stop spinning immediately??
// + implement node navigation
// + presumably, spinning should be on a per-instance basis
 
 
//////////
//
// header files
//
//////////
 
#include "MacFramework.h"
 
#include "VRSpeech.h"
#include "LMSpeech.h"       // refcons of language model elements
 
 
//////////
//
// global variables
//
//////////
 
SRRecognitionSystem     gSystem;
SRRecognizer            gRecognizer;
SRLanguageModel         gVRLM;
MyTMTask                gTMTaskRec;
TimerUPP                gTimerTaskUPP = NULL;
Boolean                 gDoSpeechTask = false;      // is a speech-initiated periodical active?
 
extern Boolean          gHasSpeechRec;
 
 
//////////
//
// VRSpeech_Init
// Initialize speech recognition, if it's available.
//
//////////
 
void VRSpeech_Init (void)
{
    long            myResponse;
    unsigned long   myParam;
    OSErr           myErr = noErr;
    
    myErr = Gestalt(gestaltSpeechRecognitionVersion, &myResponse);
    // version must be at least 1.5.0 to support SRM API used here
    if (myErr == noErr)
        if (myResponse >= 0x00000150)
            gHasSpeechRec = true;
        
    if (!gHasSpeechRec)
        return;
        
    // open a recognition system
    if (myErr == noErr)
        myErr = SROpenRecognitionSystem(&gSystem, kSRDefaultRecognitionSystemID);
    
    // set recognition system properties
    // we want the user-selected feedback and listening modes
    if (myErr == noErr) {
        short myModes = kSRHasFeedbackHasListenModes;
        
        myErr = SRSetProperty(gSystem, kSRFeedbackAndListeningModes, &myModes, sizeof(myModes));
    }
 
    // create a recognizer with default speech source
    if (myErr == noErr)
        myErr = SRNewRecognizer(gSystem, &gRecognizer, kSRDefaultSpeechSource);
                        
    // set recognizer properties
    if (myErr == noErr) {
        // we'd like *our* top-level LM to be the only one active;
        Boolean myBlock = true;
        
        myErr = SRSetProperty(gRecognizer, kSRBlockBackground, &myBlock, sizeof(myBlock));
        
        // we want to receive speech-begun and recognition-done Apple events
        myParam = kSRNotifyRecognitionBeginning | kSRNotifyRecognitionDone;
        myErr = SRSetProperty(gRecognizer, kSRNotificationParam, &myParam, sizeof(myParam));
    }
 
    // install Apple event handlers
    if (myErr == noErr) {
        myErr = AEInstallEventHandler(kAESpeechSuite, kAESpeechDetected, NewAEEventHandlerUPP(VRSpeech_HandleSpeechBegunAppleEvent), 0, false);
        myErr = AEInstallEventHandler(kAESpeechSuite, kAESpeechDone, NewAEEventHandlerUPP(VRSpeech_HandleSpeechDoneAppleEvent), 0, false);
    }
            
    // get our language models
    if (myErr == noErr)
        myErr = VRSpeech_ReadLanguageModelsFromResource();
 
    // install initial language model
    if (myErr == noErr)
        myErr = SRSetLanguageModel(gRecognizer, gVRLM);
 
    // have the recognizer start processing sound
    if (myErr == noErr)
        myErr = SRStartListening(gRecognizer);  
    
    // allocate our spinning task
    gTimerTaskUPP = NewTimerUPP(VRSpeech_SpinTask);
}
 
 
//////////
//
// VRSpeech_Stop
// Shut down speech recognition.
//
//////////
 
void VRSpeech_Stop (void)
{
    // stop any spinning before we exit
    if (VRSpeech_IsSpinning())
        VRSpeech_StopSpinning();
 
    // release any existing language models
    SRReleaseObject(gVRLM);
    
    // shut down speech recognition
    SRStopListening(gRecognizer);           // stop processing incoming sound
    SRReleaseObject(gRecognizer);           // balance SRNewRecognizer call
    SRCloseRecognitionSystem(gSystem);      // balance SROpenRecognitionSystem call
    
    // deallocate our spinning task
    DisposeTimerUPP(gTimerTaskUPP);
}
 
 
//////////
//
// VRSpeech_HandleSpeechBegunAppleEvent
// Handle speech-begun events; currently this does nothing interesting; 
// in the future, we'll use it to adjust our language models according to context.
//
//////////
 
PASCAL_RTN OSErr VRSpeech_HandleSpeechBegunAppleEvent (const AppleEvent *theMessage, AppleEvent *theReply, long theRefcon)
{
#pragma unused(theReply, theRefcon)
 
    long                myActualSize;
    DescType            myActualType;
    SRRecognizer        myRec;
    OSErr               myErr = noErr, myRecErr = noErr;
    
    // get status and recognizer
    myErr = AEGetParamPtr(theMessage, keySRSpeechStatus, typeShortInteger, &myActualType, (Ptr)&myRecErr, sizeof(myRecErr), &myActualSize);
    if ((myErr == noErr) && (myRecErr == noErr))
        myErr = AEGetParamPtr(theMessage, keySRRecognizer, typeSRRecognizer, &myActualType, (Ptr)&myRec, sizeof(myRec), &myActualSize);
    
    // better bail if we couldn't get status or recognizer!
    if (myErr != noErr)
        if (myRecErr == NULL)
            return(myErr);
            
    // here is where we would adjust LMs according to context
    
    // now tell the recognizer to continue
    myErr = SRContinueRecognition(myRec);
    
    return(myErr);
}
 
 
//////////
//
// VRSpeech_HandleSpeechDoneAppleEvent
// Handle recognition-done Apple event.
//
//////////
 
PASCAL_RTN OSErr VRSpeech_HandleSpeechDoneAppleEvent (const AppleEvent *theMessage, AppleEvent *theReply, long theRefcon)
{
#pragma unused(theReply, theRefcon)
 
    long                    myActualSize;
    DescType                myActualType;
    SRRecognitionResult     myRecResult;    
    SRLanguageModel         myResultLM;
    Size                    myLen;
    QTVRInstance            myInstance;
    long                    myDir;          // the direction we're moving
    long                    myAmt;          // the amount we're moving
    long                    myCount;
    OSErr                   myErr = noErr, myRecErr = noErr;
    
    // get recognition result status
    myErr = AEGetParamPtr(theMessage, keySRSpeechStatus, typeShortInteger, &myActualType, (Ptr)&myRecErr, sizeof(myRecErr), &myActualSize);
 
    // get recognition result
    if ((myErr == noErr) && (myRecErr == noErr))
        myErr = AEGetParamPtr(theMessage, keySRSpeechResult, typeSRSpeechResult, &myActualType, (Ptr)&myRecResult, sizeof(myRecResult), &myActualSize);
                    
    // better bail if we couldn't get the recognition result!
    if (myErr != noErr)
        return(myErr);
 
    // get the current movie
    myInstance = QTFrame_GetQTVRInstanceFromFrontWindow();
    if (myInstance == NULL) 
        return(invalidMovie);
    
    // extract the language model from the recognition result...
    myLen = sizeof(myResultLM);
    myErr = SRGetProperty(myRecResult, kSRLanguageModelFormat, &myResultLM, &myLen);
        
    if (myErr == noErr) {
        long                    myRefCon;
        SRLanguageObject        myItem1;
        SRLanguageObject        myItem2;
        SRPath                  myPath;
        
        // ...and then get its refcon, so we know which one it is
        myLen = sizeof(myRefCon);
        myErr = SRGetProperty(myResultLM, kSRRefCon, &myRefCon, &myLen);
            
        // at this point, the refcon better be kVRAllCmd; otherwise, bail
        if (myRefCon != kVRAllCmd)
            return(kSRModelMismatch);
        
        // get the one and only item in the top-level language model, a path
        myErr = SRGetIndexedItem(myResultLM, &myPath, 0);
        myLen = sizeof(myRefCon);
        myErr = SRGetProperty(myPath, kSRRefCon, &myRefCon, &myLen);
 
        switch (myRefCon) {
            case kMoveDirAndDeg:    // these two parse similarly
            case kMoveDirAndRad:
                    myErr = SRGetIndexedItem(myPath, &myItem1, 1);      // it's a one-item LM!
                    myErr = SRGetIndexedItem(myItem1, &myItem2, 0);     // so get the enclosed item
                    myLen = sizeof(myRefCon);
                    myErr = SRGetProperty(myItem2, kSRRefCon, &myDir, &myLen);
                    myErr = SRGetIndexedItem(myPath, &myItem1, 2);      // it's a one-item LM!
                    myErr = SRGetIndexedItem(myItem1, &myItem2, 0);     // so get the enclosed item
                    myLen = sizeof(myRefCon);
                    myErr = SRGetProperty(myItem2, kSRRefCon, &myAmt, &myLen);
                    if (myRefCon == kMoveDirAndDeg)
                        VRSpeech_GoDirByDegrees(myInstance, myDir, myAmt);
                    else
                        VRSpeech_GoDirByRadians(myInstance, myDir, myAmt);
                    break;
                break;
            
            case kMoveToNode:
                break;
                
            case kZoomDir:
                myErr = SRCountItems(myPath, &myCount);
                myErr = SRGetIndexedItem(myPath, &myItem1, myCount - 1);    // it's a one-item LM!
                myErr = SRGetIndexedItem(myItem1, &myItem2, 0);             // so get the enclosed item
                myLen = sizeof(myRefCon);
                myErr = SRGetProperty(myItem2, kSRRefCon, &myDir, &myLen);
                VRSpeech_ZoomInOrOut(myInstance, myDir);
                break;
                
            case kSpinStart:
                myErr = SRGetIndexedItem(myPath, &myItem1, 1);          // it's a one-item LM!
                myErr = SRGetIndexedItem(myItem1, &myItem2, 0);         // so get the enclosed item
                myLen = sizeof(myRefCon);
                myErr = SRGetProperty(myItem2, kSRRefCon, &myDir, &myLen);
                VRSpeech_StartSpinning(myInstance, myDir);
                break;
                
            case kSpinStop:
                VRSpeech_StopSpinning();
                break;
                
            default:
                break;
        }           
        
        SRReleaseObject(myItem1);
        SRReleaseObject(myItem2);
        SRReleaseObject(myPath);
    }
                        
    // release recognition result, since we are done with it
    SRReleaseObject(myRecResult);
    SRReleaseObject(myResultLM);
 
    return(myErr);
}
 
 
//////////
//
// VRSpeech_ReadLanguageModelsFromResource
// Get our language model(s); here we read a pre-rolled model from a resource.
//
//////////
 
OSErr VRSpeech_ReadLanguageModelsFromResource (void)
{
    Handle          myResourceHandle = NULL;
    OSErr           myErr = noErr;
    
    // open the language model resource from the resource fork
    myResourceHandle = GetResource(kLMResourceType, kLMResourceID);
    if (myResourceHandle == NULL) 
        return(ResError());
    
    // convert language model resource to a language model
    myErr = SRNewLanguageObjectFromHandle(gSystem, &gVRLM, myResourceHandle);
    ReleaseResource(myResourceHandle);
    return(myErr);
}
 
 
//////////
//
// VRSpeech_GoDirByDegrees
// Move a given number of degrees in a given direction.
// Return value: TRUE if a movement was made; FALSE if no movement was made.
//
//////////
 
Boolean VRSpeech_GoDirByDegrees (QTVRInstance theInstance, long theDir, long theAmt)
{
    float       myAngle;
    Boolean     isMoved = false;
    
    QTVRSetAngularUnits(theInstance, kQTVRDegrees);
    
    // convert the constant to a number of degrees;
    // sheesh, could there be an uglier way of doing this?
    switch (theAmt) {
        case kAng45:
            theAmt = 45.0;
            break;
        case kAng90 :
            theAmt = 90.0;
            break;
        case kAng135:
            theAmt = 135.0;
            break;
        case kAng180:
            theAmt = 180.0;
            break;
        case kAng225:
            theAmt = 225.0;
            break;
        case kAng270:
            theAmt = 270.0;
            break;
        case kAng315:
            theAmt = 315.0;
            break;
        case kAng10:
            theAmt = 10.0;
            break;
        case kAng36:
            theAmt = 36.0;
            break;
        case kUndefinedDegrees:
            theAmt = 5.0;
            break;
        default:
            theAmt = 10.0;
            break;
    }
    
    switch (theDir) {
        case kDirUp:
            myAngle = QTVRGetTiltAngle(theInstance);
            QTVRSetTiltAngle(theInstance, myAngle + theAmt);
            break;
            
        case kDirDown:
            myAngle = QTVRGetTiltAngle(theInstance);
            QTVRSetTiltAngle(theInstance, myAngle - theAmt);
            break;
            
        case kDirLeft:
            myAngle = QTVRGetPanAngle(theInstance);
            QTVRSetPanAngle(theInstance, myAngle + theAmt);
            break;
            
        case kDirRight:
            myAngle = QTVRGetPanAngle(theInstance);
            QTVRSetPanAngle(theInstance, myAngle - theAmt);
            break;
            
        default:
            break;
    }
 
    QTVRUpdate(theInstance, kQTVRStatic);
    
    // determine whether a movement actually occurred
    switch (theDir) {
        case kDirUp:
        case kDirDown:
            isMoved = (myAngle != QTVRGetTiltAngle(theInstance));
            break;
        case kDirLeft:
        case kDirRight:
            isMoved = (myAngle != QTVRGetPanAngle(theInstance));
            break;
        default:
            break;
    }
    
    return(isMoved);
}
 
 
//////////
//
// VRSpeech_GoDirByRadians
// Move a given number of radians in a given direction.
// Return value: TRUE if a movement was made; FALSE if no movement was made.
//
//////////
 
Boolean VRSpeech_GoDirByRadians (QTVRInstance theInstance, long theDir, long theAmt)
{
    // convert radians to degrees, then call VRSpeech_GoDirByDegrees
    switch (theAmt) { 
        case kRad1PiOver4:
            theAmt = kAng45;
            break;
        case kRad2PiOver4:
            theAmt = kAng90;
            break;
        case kRad3PiOver4:
            theAmt = kAng135;
            break;
        case kRad4PiOver4:
            theAmt = kAng180;
            break;
        case kRad5PiOver4:
            theAmt = kAng225;
            break;
        case kRad6PiOver4:
            theAmt = kAng270;
            break;
        case kRad7PiOver4:
            theAmt = kAng315;
            break;
        default:
            theAmt = kAng10;
            break;
    }
    
    return(VRSpeech_GoDirByDegrees(theInstance, theDir, theAmt));
}
 
 
//////////
//
// VRSpeech_ZoomInOrOut
// Zoom in or out.
//
//////////
 
void VRSpeech_ZoomInOrOut (QTVRInstance theInstance, long theDir)
{
    float   myFloat;
    
    myFloat = QTVRGetFieldOfView(theInstance);
    switch (theDir) {
        case kDirIn:
            myFloat = myFloat / 2.0; 
            break;
        case kDirOut:
            myFloat = myFloat * 2.0; 
            break;
        default:
            break;
    }
    
    QTVRSetFieldOfView(theInstance, myFloat);   
    QTVRUpdate(theInstance, kQTVRStatic);
}
 
 
//////////
//
// VRSpeech_SpinTask
// Move in the desired direction, then re-prime the timer task.
// (Here we just set an app global to alert code in event loop to do the move).
//
//////////
 
PASCAL_RTN void VRSpeech_SpinTask (TMTaskPtr theTaskPtr)
{
    gDoSpeechTask = true;
    PrimeTime((QElemPtr)theTaskPtr, ((MyTMTaskPtr)theTaskPtr)->theSpinDelay);
}
 
 
//////////
//
// VRSpeech_DoEventLoopSpinCheck
// See whether a spin task is active, and respond appropriately.
//
//////////
 
void VRSpeech_DoEventLoopSpinCheck (void)
{
    if (gDoSpeechTask) {
        if (!VRSpeech_GoDirByDegrees(gTMTaskRec.theInstance, gTMTaskRec.theSpinDir, gTMTaskRec.theSpinAmt))
            VRSpeech_StopSpinning();
        
        gDoSpeechTask = false;
    }
}
 
 
//////////
//
// VRSpeech_IsSpinning
// Is the spinning task installed?
//
//////////
 
Boolean VRSpeech_IsSpinning (void)
{
    return(gTMTaskRec.theTMTask.qType && kTMTaskActive);
}
 
 
//////////
//
// VRSpeech_StartSpinning
// Start spinning in a given direction.
//
//////////
 
void VRSpeech_StartSpinning (QTVRInstance theInstance, long theDir)
{
    // first we should check that our task isn't already installed;
    // if it is, remove it (and then continue to install new task)
    if (VRSpeech_IsSpinning())
        VRSpeech_StopSpinning();
        
    // install a Time Manager task that periodically moves a small amount (5 degrees)
    gTMTaskRec.theTMTask.tmAddr = gTimerTaskUPP;
    gTMTaskRec.theTMTask.tmWakeUp = 0;
    gTMTaskRec.theTMTask.tmReserved = 0;
    gTMTaskRec.theInstance = theInstance;
    gTMTaskRec.theSpinDir = theDir;
    gTMTaskRec.theSpinAmt = kUndefinedDegrees;
    gTMTaskRec.theSpinDelay = kSpinMillisecsDelay;
 
    InsXTime((QElemPtr)&gTMTaskRec);
    PrimeTime((QElemPtr)&gTMTaskRec, kSpinMillisecsDelay);
}
 
 
//////////
//
// VRSpeech_StopSpinning
// Stop spinning: remove the Time Manager task.
//
//////////
 
void VRSpeech_StopSpinning (void)
{
    RmvTime((QElemPtr)&gTMTaskRec);
}
 
 
//////////
//
// VRSpeech_InstallSpeechFeedbackRoutine
// Set up QTVR intercept routines to do some speech.
//
//////////
 
void VRSpeech_InstallSpeechFeedbackRoutine (QTVRInstance theInstance)
{
    QTVRInterceptUPP    myInterceptProc;
    
    myInterceptProc = NewQTVRInterceptUPP(VRSpeech_SpeechFeedbackRoutine);  
    
    // We'll just use the same intercept proc for each intercepted procedure.
    QTVRInstallInterceptProc(theInstance, kQTVRSetPanAngleSelector, myInterceptProc, 0, 0);
    QTVRInstallInterceptProc(theInstance, kQTVRSetTiltAngleSelector, myInterceptProc, 0, 0);
    QTVRInstallInterceptProc(theInstance, kQTVRSetFieldOfViewSelector, myInterceptProc, 0, 0);
    QTVRInstallInterceptProc(theInstance, kQTVRTriggerHotSpotSelector, myInterceptProc, 0, 0);
}
 
 
//////////
//
// VRSpeech_SpeechFeedbackRoutine
//
//////////
 
PASCAL_RTN void VRSpeech_SpeechFeedbackRoutine (QTVRInstance theInstance, QTVRInterceptPtr theMsg, SInt32 theRefcon, Boolean *theCancel)
{
#pragma unused(theRefcon)
 
    Str255  myCaption;
    Boolean myCancelInterceptedProc = false;            // true == do NOT call thru; false == call thru
    float   myAngle, *myAnglePtr;
    
    switch (theMsg->selector) {
        case kQTVRSetTiltAngleSelector: 
        case kQTVRSetPanAngleSelector:
        case kQTVRSetFieldOfViewSelector:
            myAnglePtr = (float *)theMsg->parameter[0];
            myAngle = *myAnglePtr;                      //this is always in radians!
            myAngle = VRRadiansToDegrees(myAngle);
            NumToString(Fix2Long(FloatToFixed(myAngle)), myCaption);            
            QTVRCallInterceptedProc(theInstance, theMsg);
            SpeakString(myCaption);
            myCancelInterceptedProc = true;
            break;
            
        case kQTVRTriggerHotSpotSelector:               // get the hot spot ID and speak it 
            NumToString((long)theMsg->parameter[0], myCaption);         
            SRSpeakAndDrawText(gRecognizer, &myCaption[1], myCaption[0]);
            break;
            
        default:
            break;
    }
    
    *theCancel = myCancelInterceptedProc;
}
 
 
//////////
//
// VRSpeech_SpeakNameOfNode
// A sample node-entering procedure; we just welcome the user to the new node.
//
//////////
 
PASCAL_RTN OSErr VRSpeech_SpeakNameOfNode (QTVRInstance theInstance, UInt32 nodeID, SInt32 theRefCon)
{
#pragma unused(theRefCon)
 
    char        *myString;
    StringPtr   myPString = NULL;
    
    myString = QTVRUtils_GetNodeName(theInstance, nodeID);
    if (myString != NULL) {
        myPString = QTUtils_ConvertCToPascalString(myString);
        
        SpeakString("\p[[emph +]]Welcome [[emph -]] two");
        while (SpeechBusy())
            ;
        SpeakString(myPString);
        
        free(myPString);
    }
    
    free(myString);
    return(noErr);
}