MPSMatrixRandom SEGFAULTs when ran in an async context

The following minimal snippet SEGFAULTS with SDK 26.0 and 26.1. Won't crash if I remove async from the enclosing function signature - but it's impractical in a real project.

import Metal
import MetalPerformanceShaders

let SEED = UInt64(0x0)

typealias T = Float16

/* Why ran in async context? Because global GPU object,
   and async makeMTLFunction,
   and async makeMTLComputePipelineState.
   Nevertheless, can trigger the bug without using global
@MainActor
let myGPU = MyGPU()
*/

@main
struct CMDLine {

    static func main() async {

        let ptr = UnsafeMutablePointer<T>.allocate(capacity: 0)

        async let future: Void = randomFillOnGPU(ptr, count: 0)

        print("Main thread is playing around")
        await future

        print("Successfully reached the end.")
    }

static func randomFillOnGPU(_ buf: UnsafeMutablePointer<T>, count destbufcount: Int) async {
//    let (device, queue) = await (myGPU.device, myGPU.commandqueue)
    let myGPU = MyGPU()
    let (device, queue) = (myGPU.device, myGPU.commandqueue)

    // Init MTLBuffer, async let makeFunction, makeComputePipelineState, etc.

    let tempDataType = MPSDataType.uInt32

    let randfiller = MPSMatrixRandomMTGP32(device: device, destinationDataType: tempDataType, seed: Int(bitPattern:UInt(SEED)))
    print("randomFillOnGPU: successfully created MPSMatrixRandom.")

    // try await computePipelineState
    // ^ Crashes before this could return
    // Or in this minimal case, after randomFillOnGPU() returns
    // make encoder, set pso, dispatch, commit...
}
}

actor MyGPU {
    let device : MTLDevice
    let commandqueue : MTLCommandQueue

    init() {
        guard let dev: MTLDevice = MPSGetPreferredDevice(.skipRemovable),
              let cq = dev.makeCommandQueue(),
              dev.supportsFamily(.apple6) || dev.supportsFamily(.mac2)
        else { print("Unable to get Metal Device! Exiting"); exit(EX_UNAVAILABLE) }
        print("Selected device: \(String(format: "%llX", dev.registryID))")

        self.device = dev
        self.commandqueue = cq

        print("myGPU: initialization complete.")
    }
}

See FB20916929. Apparently objc autorelease pool is releasing the wrong address during context switch (across suspension points). I wonder why such obvious case has not been caught before.

MPSMatrixRandom SEGFAULTs when ran in an async context
 
 
Q