Hello, I am using MTKView to display: camera preview & video playback. I am testing on iPhone 16. App crashes at a random moment whenever MTKView is rendering CIImage.
MetalView:
public enum MetalActionType {
case image(CIImage)
case buffer(CVPixelBuffer)
}
public struct MetalView: UIViewRepresentable {
let mtkView = MTKView()
public let actionPublisher: any Publisher<MetalActionType, Never>
public func makeCoordinator() -> Coordinator {
Coordinator(self)
}
public func makeUIView(context: UIViewRepresentableContext<MetalView>) -> MTKView {
guard let metalDevice = MTLCreateSystemDefaultDevice() else {
return mtkView
}
mtkView.device = metalDevice
mtkView.framebufferOnly = false
mtkView.clearColor = MTLClearColor(red: 0, green: 0, blue: 0, alpha: 0)
mtkView.drawableSize = mtkView.frame.size
mtkView.delegate = context.coordinator
mtkView.isPaused = true
mtkView.enableSetNeedsDisplay = true
mtkView.preferredFramesPerSecond = 60
context.coordinator.ciContext = CIContext(
mtlDevice: metalDevice, options: [.priorityRequestLow: true, .highQualityDownsample: false])
context.coordinator.metalCommandQueue = metalDevice.makeCommandQueue()
context.coordinator.actionSubscriber = actionPublisher.sink { type in
switch type {
case .buffer(let pixelBuffer):
context.coordinator.updateCIImage(pixelBuffer)
break
case .image(let image):
context.coordinator.updateCIImage(image)
break
}
}
return mtkView
}
public func updateUIView(_ nsView: MTKView, context: UIViewRepresentableContext<MetalView>) {
}
public class Coordinator: NSObject, MTKViewDelegate {
var parent: MetalView
var metalCommandQueue: MTLCommandQueue!
var ciContext: CIContext!
private var image: CIImage? {
didSet {
Task { @MainActor in
self.parent.mtkView.setNeedsDisplay() //<--- call Draw method
}
}
}
var actionSubscriber: (any Combine.Cancellable)?
private let operationQueue = OperationQueue()
init(_ parent: MetalView) {
self.parent = parent
operationQueue.qualityOfService = .background
super.init()
}
public func mtkView(_ view: MTKView, drawableSizeWillChange size: CGSize) {
}
public func draw(in view: MTKView) {
guard let drawable = view.currentDrawable, let ciImage = image,
let commandBuffer = metalCommandQueue.makeCommandBuffer(), let ci = ciContext
else {
return
}
//making sure nothing is nil, now we can add the current frame to the operationQueue for processing
operationQueue.addOperation(
MetalOperation(
drawable: drawable, drawableSize: view.drawableSize, ciImage: ciImage,
commandBuffer: commandBuffer, pixelFormat: view.colorPixelFormat, ciContext: ci))
}
//consumed by Subscriber
func updateCIImage(_ img: CIImage) {
image = img
}
//consumed by Subscriber
func updateCIImage(_ buffer: CVPixelBuffer) {
image = CIImage(cvPixelBuffer: buffer)
}
}
}
now the MetalOperation class:
private class MetalOperation: Operation, @unchecked Sendable {
let drawable: CAMetalDrawable
let drawableSize: CGSize
let ciImage: CIImage
let commandBuffer: MTLCommandBuffer
let pixelFormat: MTLPixelFormat
let ciContext: CIContext
init(
drawable: CAMetalDrawable, drawableSize: CGSize, ciImage: CIImage,
commandBuffer: MTLCommandBuffer, pixelFormat: MTLPixelFormat, ciContext: CIContext
) {
self.drawable = drawable
self.drawableSize = drawableSize
self.ciImage = ciImage
self.commandBuffer = commandBuffer
self.pixelFormat = pixelFormat
self.ciContext = ciContext
}
override func main() {
let width = Int(drawableSize.width)
let height = Int(drawableSize.height)
let ciWidth = Int(ciImage.extent.width) //<-- Thread 22: EXC_BAD_ACCESS (code=1, address=0x5e71f5490) A bad access to memory terminated the process.
let ciHeight = Int(ciImage.extent.height)
let destination = CIRenderDestination(
width: width, height: height, pixelFormat: pixelFormat, commandBuffer: commandBuffer,
mtlTextureProvider: { [self] () -> MTLTexture in
return drawable.texture
})
let transform = CGAffineTransform(
scaleX: CGFloat(width) / CGFloat(ciWidth), y: CGFloat(height) / CGFloat(ciHeight))
do {
try ciContext.startTask(toClear: destination)
try ciContext.startTask(toRender: ciImage.transformed(by: transform), to: destination)
} catch {
}
commandBuffer.present(drawable)
commandBuffer.commit()
commandBuffer.waitUntilCompleted()
}
}
Now I am no Metal expert, but I believe it's a very simple execution that shouldn't cause memory leak especially after we have already checked for whether CIImage is nil or not. I have also tried running this code without OperationQueue and also tried with @autoreleasepool but none of them has solved this problem.
Am I missing something?
Metal
RSS for tagRender advanced 3D graphics and perform data-parallel computations using graphics processors using Metal.
Selecting any option will automatically load the page
Post
Replies
Boosts
Views
Activity
Currently looking for Metal developers to port Quake 2 RTX to Metal RT in order to give Apple Silicon Macs an amazing Pathtracing demo, This project falls under NightSightProductions who is also working on a Portal 2 with RTX Remaster. if you are interested and want to help further Mac gaming, message me here or on discord at king_vulpes
Hi! I just installed GPTK2 on my new Mac , but the Terminal gave “Error:OpenSSL1.1 has been disabled.”
How should I fix it?Or waiting for the GPTK2 beta4?
Thanks.
I’ve been trying to run Jurassic World Evolution 2 using the Game Porting Toolkit on macOS, but the game doesn’t launch and crashes immediately. Based on the error and research, it seems the issue is related to missing support for D3D12_TILED_RESOURCES_TIER_2 in the Metal API.
If this is the case, does anyone know if support for tiled resources is planned for future updates of the toolkit? Or are there any potential workarounds for bypassing this limitation?
Is there a working example of imageblock_slice with implicit layout somewhere?
I get a compilation error when i write this:
imageblock_slilce color_slice = img_blk.slice(frag->color);
Error:
No matching member function for call to 'slice'
candidate template ignored: couldn't infer template argument 'E'
candidate function template not viable: requires 2 arguments, but 1 was provided
Too few template arguments for class template 'imageblock_slice'
It seems the syntax has changed since the Imageblocks presentation https://developer.apple.com/videos/play/tech-talks/603/
I tried supplying the struct type of the image block between <> but it still does not work.
When inspecting the geometry in Xcode's metal debugger, I noticed that the shown "frustrum box" didn't make sense. Since Metal uses depth range 0,1 in NDC space, I would expect a vertex that is projected to z:0 to be on the front clipping plane of the frustrum shown in the geometry inspector. This is however not the case. A vertex with ndc z:0 is shown halfway inside the frustrum. Vertices with ndc z less than 0 are correctly culled during rendering, while the geometry inspector's frustrum shows that the vertex is stil inside the frustrum.
The image shows vertices that are visually in the middle of the frustrum on z axis, but at the same time the out position shows that they are projected to z:0. How is this possible, unless there's a bug in the geometry inspector?
I used xcode gpu capture to profile render pipeline's bandwidth of my game.Then i found depth buffer and stencil buffer use the same buffer whitch it's format is Depth32Float_Stencil8.
But why in a single pass of pipeline, this buffer was loaded twice, and the Load Attachment Size of Encoder Statistics was double.
Is there any bug with xcode gpu capture?Or the pass really loaded the buffer twice times?
Topic:
Graphics & Games
SubTopic:
Metal
Hi,
Apple’s documentation on Order-Independent Transparency (OIT) describes an approach using image blocks, where an array of size 4 is allocated per fragment to store depth and color in a tile shading compute pass.
However, when increasing the scene’s depth complexity by adding more overlapping quads, the OIT implementation fails due to the fixed array size.
Is there a way to dynamically allocate storage for fragments based on actual depth complexity encountered during rasterization, rather than using a fixed-size array? Specifically, can an adaptive array of fragments be maintained and sorted by depth, where the size grows as needed instead of being limited to 4 entries?
Any insights or alternative approaches would be greatly appreciated.
Thank you!
Hello ladies and gentlemen, I'm writing a simple renderer on the main actor using Metal and Swift 6. I am at the stage now where I want to create a render pipeline state using asynchronous API:
@MainActor
class Renderer {
let opaqueMeshRPS: MTLRenderPipelineState
init(/*...*/) async throws {
let descriptor = MTLRenderPipelineDescriptor()
// ...
opaqueMeshRPS = try await device.makeRenderPipelineState(descriptor: descriptor)
}
}
I get a compilation error if try to use the asynchronous version of the makeRenderPipelineState method:
Non-sendable type 'any MTLRenderPipelineState' returned by implicitly asynchronous call to nonisolated function cannot cross actor boundary
Which is understandable, since MTLRenderPipelineState is not Sendable. But it looks like no matter where or how I try to access this method, I just can't do it - you have this API, but you can't use it, you can only use the synchronous versions.
Am I missing something or is Metal just not usable with Swift 6 right now?
Hey all! I'm got my hands on a refurbished mac mini m1 and already diving into metal. At the moment, i'm currently studying graphics programming with opengl and got to a point where I can almost create a 3d cube. However, I noticed there aren't many tutorials for metal cpp but rather demos. One thing I love about graphic programming, is skinning/skeletal animation. At the moment, I can't find any sources or tutorials on how to load skeletal animations into metal-cpp. So, if I create my character in blender and had all types of animations all loaded into a .FBX or maybe .DAE and load this into metal api with metal-cpp, how can I go on about how this works?
The flushContextInternal function in glr_sync.mm:262 called abort internally. What caused this? Was it due to high device temperature or some other reason?
Date/Time: 2024-08-29 09:20:09.3102 +0800
Launch Time: 2024-08-29 08:53:11.3878 +0800
OS Version: iPhone OS 16.7.10 (20H350)
Release Type: User
Baseband Version: 8.50.04
Report Version: 104
Exception Type: EXC_CRASH (SIGABRT)
Exception Codes: 0x0000000000000000, 0x0000000000000000
Triggered by Thread: 0
Thread 0 name:
Thread 0 Crashed:
0 libsystem_kernel.dylib 0x00000001ed053198 __pthread_kill + 8 (:-1)
1 libsystem_pthread.dylib 0x00000001fc5e25f8 pthread_kill + 208 (pthread.c:1670)
2 libsystem_c.dylib 0x00000001b869c4b8 abort + 124 (abort.c:118)
3 AppleMetalGLRenderer 0x00000002349f574c GLDContextRec::flushContextInternal() + 700 (glr_sync.mm:262)
4 DiSpecialDriver 0x000000010824b07c Di::RHI::onRenderFrameEnd() + 184 (RHIDevice.cpp:118)
5 DiSpecialDriver 0x00000001081b85f8 Di::Client::drawFrame() + 120 (Client.cpp:155)
2024-08-27_14-44-10.8104_+0800-07d9de9207ce4c73289507e608e5de4320d02ccf.crash
Topic:
Graphics & Games
SubTopic:
Metal
In this video, tile fragment shading is recommended for image processing. In this example, the unpack function takes two arguments, one of which is RasterizerData. As I understand it, this is the data passed to us from the previous stage (Vertex) of the graphics pipeline.
However, the properties of MTLTileRenderPipelineDescriptor do not include an option for specifying a Vertex function. Therefore, in this render pass, a mix of commands is used: first, a draw command is executed to obtain UV coordinates, and then threads are dispatched.
My question is: without using a draw command, only dispatch, how can I get pixel coordinates in the fragment tile function? For the kernel tile function, everything is clear.
typedef struct
{
float4 OPTexture [[ color(0) ]];
float4 IntermediateTex [[ color(1) ]];
} FragmentIO;
fragment FragmentIO Unpack(RasterizerData in [[ stage_in ]],
texture2d<float, access::sample> srcImageTexture [[texture(0)]])
{
FragmentIO out;
//...
// Run necessary per-pixel operations
out.OPTexture = // assign computed value;
out.IntermediateTex = // assign computed value;
return out;
}
I have this drawing app that I have been working on for the past few years when I have free time. I recently rebuilt the app in Metal to build out other brushes and improve performance, need to render 10000s of lines in realtime.
I’m running into this issue trying to create a uniform opacity per path. I have a solution but do not love it - as this is a realtime app and the solution could have some bottlenecks. If I just generate a triangle strip from touch points and do my best to smooth, resample, and handle miters I will always get some overlaps. See:
To create a uniform opacity I render to an offscreen texture with blending disabled. I then pre-multiply the color and draw that texture to a composite texture with blending on (I do this per path). This works but gets tricky when you introduce a textured brush, the edges of the texture in the frag shader cut out the line.
Pasted Graphic 1.png
Solution: I discard below a threshold
fragment float4 fragment_line(VertexOut in [[stage_in]],
texture2d<float> texture [[ texture(0) ]]) {
constexpr sampler s(coord::normalized, address::mirrored_repeat, filter::linear);
float2 texCoord = in.texCoord;
float4 texColor = texture.sample(s, texCoord);
if (texColor.a < 0.01) discard_fragment(); // may be slow (from what I read)
return in.color * texColor;
}
Better but still not perfect.
Question: I'm looking for better ways to create a uniform opacity per path. I tried .max blending but that will cause no blending of other paths. Any tips, ideas, much appreciated. If this is too detailed of a question just achieve.
Anyone else unable to download the "Rendering a Scene with Deferred Lighting in C++" (https://developer.apple.com/documentation/metal/rendering-a-scene-with-deferred-lighting-in-c++?language=objc)?
I just an error page:
Is there another place to download this sample?
Topic:
Graphics & Games
SubTopic:
Metal
Hello
I am trying to get thread group memory access in fragment shader. In essence, I would like to have all the fragments in a tile to bitwiseOR some value. My idea was to use simd_or across the SIMD group, then make each SIMD group thread 0 to atomic or the value into thread group memory. Finally very first thread of the tile would be tasked with writing the value down to texture with write access.
Now, I can allocate the thread group memory argument to the fragment function all right. MTLRenderEncoder has setThreadgroupMemoryLength call, which I am using the following way
[renderEncoder setThreagroupMemoryLength: 16 offset: 0 atIndex:0]
Unfortunately, all I am getting is the following error (runtime assertion)
-[MTLDebugRenderCommandEncoder setThreadgroupMemoryLength:offset:atIndex:]:3487: failed assertion Set Threadgroup Memory Length Validation
offset + length(16) must be <= threadgroupMemoryLength(0).`
What I am doing wrong? How I can get thread group memory in the fragment shader? I know I could use tile shading and compute function but the problem is that here I really like to use fragment stuff. Will be grateful for help.
Hi,
seems MSL is missing support for a clock() shader instruction available in other graphics APIs like Vulkan or OpenGL for example..
useful for counting cost in number of clock cycles of some code insider shader with much finer granularity than launching a micro kernel with same instructions and measuring cycles cost from CPU..
also useful for MoltenVK to support that extensions..
thanks..
In the Creating A 3D Application With Hydra Rendering tutorial on the Apple Developer website, on the last step where I execute this command:
cmake -S ~/Users/macuser/CreatingA3DApplicationWithHydraRendering/ -B ~/Users/macuser/CreatingA3DApplicationWithHydraRendering/
I keep getting an error:
CMake Error at CMakeLists.txt:5 (include):
include could not find requested file:
/Users/macuser/USDInstall/bin/pxrConfig.cmake
I've tried to follow the instructions as mentioned in the README.md file included in the project files at least 5 times as well as moving the pxrConfig.cmake file around and copying it in different folders, then executed the command and was still unsuccessful into generating the proper file expected to compile and render the HydraPlayer renderer. How do I get cmake to generate the Xcode file to create the HydraPlayer renderer?
Hey, I've been struggling with this for some days now.
I am trying to write to a sparse texture in a compute shader. I'm performing the following steps:
Set up a sparse heap and create a texture from it
Map the whole area of the sparse texture using updateTextureMapping(..)
Overwrite every value with the value "4" in a compute shader
Blit the texture to a shared buffer
Assert that the values in the buffer are "4".
I have a minimal example (which is still pretty long unfortunately).
It works perfectly when removing the line heapDesc.type = .sparse.
What am I missing? I could not find any information that writes to sparse textures are unsupported. Any help would be greatly appreciated.
import Metal
func sparseTexture64x64Demo() throws {
// ── Metal objects
guard let device = MTLCreateSystemDefaultDevice()
else { throw NSError(domain: "SparseNotSupported", code: -1) }
let queue = device.makeCommandQueue()!
let lib = device.makeDefaultLibrary()!
let pipeline = try device.makeComputePipelineState(function: lib.makeFunction(name: "addOne")!)
// ── Texture descriptor
let width = 64, height = 64
let format: MTLPixelFormat = .r32Uint // 4 B per texel
let desc = MTLTextureDescriptor()
desc.textureType = .type2D
desc.pixelFormat = format
desc.width = width
desc.height = height
desc.storageMode = .private
desc.usage = [.shaderWrite, .shaderRead]
// ── Sparse heap
let bytesPerTile = device.sparseTileSizeInBytes
let meta = device.heapTextureSizeAndAlign(descriptor: desc)
let heapBytes = ((bytesPerTile + meta.size + bytesPerTile - 1) / bytesPerTile) * bytesPerTile
let heapDesc = MTLHeapDescriptor()
heapDesc.type = .sparse
heapDesc.storageMode = .private
heapDesc.size = heapBytes
let heap = device.makeHeap(descriptor: heapDesc)!
let tex = heap.makeTexture(descriptor: desc)!
// ── CPU buffers
let bytesPerPixel = MemoryLayout<UInt32>.stride
let rowStride = width * bytesPerPixel
let totalBytes = rowStride * height
let dstBuf = device.makeBuffer(length: totalBytes, options: .storageModeShared)!
let cb = queue.makeCommandBuffer()!
let fence = device.makeFence()!
// 2. Map the sparse tile, then signal the fence
let rse = cb.makeResourceStateCommandEncoder()!
rse.updateTextureMapping(
tex,
mode: .map,
region: MTLRegionMake2D(0, 0, width, height),
mipLevel: 0,
slice: 0)
rse.update(fence) // ← capture all work so far
rse.endEncoding()
let ce = cb.makeComputeCommandEncoder()!
ce.waitForFence(fence)
ce.setComputePipelineState(pipeline)
ce.setTexture(tex, index: 0)
let threadsPerTG = MTLSize(width: 8, height: 8, depth: 1)
let tgCount = MTLSize(width: (width + 7) / 8,
height: (height + 7) / 8,
depth: 1)
ce.dispatchThreadgroups(tgCount, threadsPerThreadgroup: threadsPerTG)
ce.updateFence(fence)
ce.endEncoding()
// Blit texture into shared buffer
let blit = cb.makeBlitCommandEncoder()!
blit.waitForFence(fence)
blit.copy(
from: tex,
sourceSlice: 0,
sourceLevel: 0,
sourceOrigin: MTLOrigin(x: 0, y: 0, z: 0),
sourceSize: MTLSize(width: width, height: height, depth: 1),
to: dstBuf,
destinationOffset: 0,
destinationBytesPerRow: rowStride,
destinationBytesPerImage: totalBytes)
blit.endEncoding()
cb.commit()
cb.waitUntilCompleted()
assert(cb.error == nil, "GPU error: \(String(describing: cb.error))")
// ── Verify a few texels
let out = dstBuf.contents().bindMemory(to: UInt32.self, capacity: width * height)
print("first three texels:", out[0], out[1], out[width]) // 0 1 64
assert(out[0] == 4 && out[1] == 4 && out[width] == 4)
}
Metal shader:
#include <metal_stdlib>
using namespace metal;
kernel void addOne(texture2d<uint, access::write> tex [[texture(0)]],
uint2 gid [[thread_position_in_grid]])
{
tex.write(4, gid);
}
I'm running into a persistent visual issue while deploying a floral corridor scene to Apple Vision Pro using Unity 6.0 with URP and Metal. The issue only appears on the Vision Pro device — everything looks fine in the Unity Editor.
Issue Description
When the frame rate drops to around 60–70 FPS, noticeable distortion artifacts appear around the edges of foliage models. It seems like the background meshes (behind the plants) get warped and leak through the edges of the foliage. Although this is most visible around the leaves, even solid objects like standard URP wall or box models show distorted edges when the issue occurs.
All the foliage uses Opaque or Alpha Clipping materials.
Things I've Tried
Changing the foliage materials to Transparent mode —distortion around edges disappears, but using Transparent for a large number of foliage assets is not ideal for performance or sorting complexity.
Reducing the number of foliage objects — with only a few plants in the scene and the frame rate staying around 100 FPS, the distortion disappears. However, this isn’t a practical solution for a full environment.
Possible Cause?
I came across this note in the Unity documentation:
"Ensure depth-buffer for each pixel is non-zero - on visionOS, the depth buffer is used for reprojection. To ensure visual effects like skyboxes and shaders are displayed beautifully, ensure that some value is written to the depth for each pixel."
Could this be related to the issue? Is it possible that Alpha Clipping with low pixel coverage leads to some pixels not writing to the depth buffer, which then causes problems during Vision Pro’s reprojection or foveated rendering? However, even when I disable Alpha Clipping entirely, the distortion issue still persists, so it may not be solely caused by clipping itself.
Project Setup
Unity 6.0 (URP)
Depth Texture: Enable
Using Metal as the graphics backend
Running on real Vision Pro hardware (not simulator)
Any advice on how to avoid these distortion issues on Vision Pro would be greatly appreciated.
Thanks!
hi everyone,
我们发现了一个和Metal相关崩溃。应用中使用了Metal相关的接口,在进行性能测试时,打开了设置-开发者-显示HUD图形。运行应用后,正常展示HUD,但应用很快发生了崩溃,日志主要信息如下:
Incident Identifier: 1F093635-2DB8-4B29-9DA5-488A6609277B
CrashReporter Key: 233e54398e2a0266d95265cfb96c5a89eb3403fd
Hardware Model: iPhone14,3
Process: waimai [16584]
Path: /private/var/containers/Bundle/Application/CCCFC0AE-EFB8-4BD8-B674-ED089B776221/waimai.app/waimai
Identifier:
Version: 61488 (8.53.0)
Code Type: ARM-64
Parent Process: ? [1]
Date/Time: 2025-06-12 14:41:45.296 +0800
OS Version: iOS 18.0 (22A3354)
Report Version: 104
Monitor Type: Mach Exception
Exception Type: EXC_BAD_ACCESS (SIGBUS)
Exception Codes: KERN_PROTECTION_FAILURE at 0x000000014fffae00
Crashed Thread: 57
Thread 57 Crashed:
0 libMTLHud.dylib esfm_GenerateTriangesForString + 408
1 libMTLHud.dylib esfm_GenerateTriangesForString + 92
2 libMTLHud.dylib Renderer::DrawText(char const*, int, unsigned int) + 204
3 libMTLHud.dylib Overlay::onPresent(id<CAMetalDrawable>) + 1656
4 libMTLHud.dylib CAMetalDrawable_present(void (*)(), objc_object*, objc_selector*) + 72
5 libMTLHud.dylib invocation function for block in void replaceMethod<void>(objc_class*, objc_selector*, void (*)(void (*)(), objc_object*, objc_selector*)) + 56
6 Metal __45-[_MTLCommandBuffer presentDrawable:options:]_block_invoke + 104
7 Metal MTLDispatchListApply + 52
8 Metal -[_MTLCommandBuffer didScheduleWithStartTime:endTime:error:] + 312
9 IOGPU IOGPUNotificationQueueDispatchAvailableCompletionNotifications + 136
10 IOGPU __IOGPUNotificationQueueSetDispatchQueue_block_invoke + 64
11 libdispatch.dylib _dispatch_client_callout4 + 20
12 libdispatch.dylib _dispatch_mach_msg_invoke + 464
13 libdispatch.dylib _dispatch_lane_serial_drain + 368
14 libdispatch.dylib _dispatch_mach_invoke + 456
15 libdispatch.dylib _dispatch_lane_serial_drain + 368
16 libdispatch.dylib _dispatch_lane_invoke + 432
17 libdispatch.dylib _dispatch_lane_serial_drain + 368
18 libdispatch.dylib _dispatch_lane_invoke + 380
19 libdispatch.dylib _dispatch_root_queue_drain_deferred_wlh + 288
20 libdispatch.dylib _dispatch_workloop_worker_thread + 540
21 libsystem_pthread.dylib _pthread_wqthread + 288
我们测试了几个不同的机型,只有iPhone 13 Pro Max会发生崩溃。
Q1:为什么会发生这个崩溃?
Q2:相同的逻辑,为什么仅在iPhone 13 Pro Max机型上出现崩溃?
期待您的解答。