Render advanced 3D graphics and perform data-parallel computations using graphics processors using Metal.

Metal Documentation

Post

Replies

Boosts

Views

Activity

assertion failure trying to create MTLFXTemporalScaler
I'm trying to create a MTLFXTemporalScaler as follows (this is adapted from the sample code): func updateTemporalScaler() { let desc = MTLFXTemporalScalerDescriptor() desc.inputWidth = renderTarget.renderSize.width desc.inputHeight = renderTarget.renderSize.height desc.outputWidth = renderTarget.windowSize.width desc.outputHeight = renderTarget.windowSize.height desc.colorTextureFormat = .bgra8Unorm desc.depthTextureFormat = .depth32Float desc.motionTextureFormat = .rg16Float desc.outputTextureFormat = .bgra8Unorm guard let temporalScaler = desc.makeTemporalScaler(device: device) else { fatalError("The temporal scaler effect is not usable!") } temporalScaler.motionVectorScaleX = Float(renderTarget.renderSize.width) temporalScaler.motionVectorScaleY = Float(renderTarget.renderSize.height) mfxTemporalScaler = temporalScaler } I'm getting the following error the 3rd time the code is called: /AppleInternal/Library/BuildRoots/91a344b1-f985-11ee-b563-fe8bc7981bff/Library/Caches/com.apple.xbs/Sources/MetalPerformanceShadersGraph/mpsgraph/MetalPerformanceShadersGraph/Runtimes/MPSRuntime/Operations/RegionOps/ANRegion.mm:855: failed assertion `ANE intermediate buffer handle not same!' When I copy the code out to a playground, it succeeds when called with the same sequence of descriptors. Does this seem like a bug with MTLFXTemporalScaler?
0
0
478
Jul ’24
Unity IOS shader vector array bug
Unity 2022.3.33f1 For some reason modifying MeshRenderer material shader SetVectorArray doesn't work on IOS, but it works on android and windows builds!! I was working on Fog Of War, where I used SimpleFOW by Revision3 it's very simple FOW shader where it manipulates the alpha based on UVs vertices. This is the FogOfWarShaderControl.cs script using System.Collections; using System.Collections.Generic; using UnityEngine; namespace SimpleFOW { [RequireComponent(typeof(MeshRenderer))] public class FogOfWarShaderControl : MonoBehaviour { public static FogOfWarShaderControl Instance { get; private set; } [Header("Maximum amount of revealing points")] [SerializeField] private uint maximumPoints = 512; [Header("Game Camera")] [SerializeField] private Camera mainCamera; private List<Vector4> points = new List<Vector4>(); private Vector2 meshSize, meshExtents; private Vector4[] sendBuffer; private MeshRenderer meshRenderer; private void Awake() { Instance = this; Init(); } // Initialize required variables public void Init() { meshRenderer = GetComponent<MeshRenderer>(); meshExtents = meshRenderer.bounds.extents; meshSize = meshRenderer.bounds.size; points = new List<Vector4>(); sendBuffer = new Vector4[maximumPoints]; } // Transform world point to UV coordinate of FOW mesh public Vector2 WorldPointToMeshUV(Vector2 wp) { Vector2 toRet = Vector2.zero; toRet.x = (transform.position.x - wp.x + meshExtents.x) / meshSize.x; toRet.y = (transform.position.y - wp.y + meshExtents.y) / meshSize.y; return toRet; } // Show or hide FOW public void SetEnabled(bool on) { meshRenderer.enabled = on; } // Add revealing point to FOW renderer if amount of points is lower than MAX_POINTS public void AddPoint(Vector2 worldPoint) { if (points.Count < maximumPoints) { points.Add(WorldPointToMeshUV(worldPoint)); } } // Remove FOW revealing point public void RemovePoint(Vector2 worldPoint) { if (worldPoint == new Vector2(0, 0)) { return; } if (points.Contains(WorldPointToMeshUV(worldPoint))) { points.Remove(WorldPointToMeshUV(worldPoint)); } } // Send any change to revealing point list to shader for rendering public void SendPoints() { points.ToArray().CopyTo(sendBuffer, 0); meshRenderer.material.SetVectorArray("_PointArray", sendBuffer); meshRenderer.material.SetInt("_PointCount", points.Count); } // Send new range value to shader public void SendRange(float range) { meshRenderer.material.SetFloat("_RadarRange", range); } // Send new scale value to shader public void SendScale(float scale) { meshRenderer.material.SetFloat("_Scale", scale); } } } And this is the FogOfWar.shader Shader "Revision3/FogOfWar" { Properties { _MainTex ("Texture", 2D) = "black" {} _PointCount("Point count", Range(0,512)) = 0 _Scale("Scale", Float) = 1.0 _RadarRange("Range", Float) = .5 _MaxAlpha("Maximum Alpha", Float) = 1.0 } SubShader { Tags { "RenderType"="Transparent" "Queue"="Transparent" } LOD 100 ZWrite Off Blend SrcAlpha OneMinusSrcAlpha Pass { CGPROGRAM #pragma vertex vert #pragma fragment frag // make fog work #pragma multi_compile_fog #include "UnityCG.cginc" struct appdata { float4 vertex : POSITION; float2 uv : TEXCOORD0; }; struct v2f { float2 uv : TEXCOORD0; UNITY_FOG_COORDS(1) float4 vertex : SV_POSITION; }; sampler2D _MainTex; float4 _MainTex_ST; float _RadarRange; uint _PointCount; float _Scale; float _MaxAlpha; float2 _PointArray[512]; v2f vert (appdata v) { v2f o; o.vertex = UnityObjectToClipPos(v.vertex); o.uv = TRANSFORM_TEX(v.uv, _MainTex); return o; } float getDistance(float2 pa[512], float2 uv) { float cdist = 99999.0; for (uint i = 0; i < _PointCount; i++) { cdist = min(cdist, distance(pa[i]*_Scale, uv)); } return cdist; } fixed4 frag (v2f i) : SV_Target { // sample the texture fixed4 col = tex2D(_MainTex, i.uv); i.uv *= _Scale; if (_PointCount > 0) col.w = min(_MaxAlpha, max(0.0f, getDistance(_PointArray, i.uv) - _RadarRange)); else col.w = _MaxAlpha; return col; } ENDCG } } } Now I create a gameobject called FogOfWar as follows And then in Unit.cs script and Building.cs script I add the following logic private Vector3 lastPos; private void Update() { if (lastPos != transform.position) { FogOfWarShaderControl.Instance.RemovePoint(lastPos); FogOfWarShaderControl.Instance.AddPoint(transform.position); lastPos = transform.position; FogOfWarShaderControl.Instance.SendPoints(); } } Now this gives me the effect of FOW as follows on IOS where the result should be as follows on other devices I don't know what causes this to happen only on IOS devices. The logic works fine on android/windows/Linux/editor but not IOS devices. So why metal API doesn't support shader set vector array?!
0
0
563
Jul ’24
Issue entering immersive mode with Metal
Hi. I am developing an immersive application in Metal. The problem is that when the application starts for the first time, it shows two system windows: one related to the player's surrounding environment and the other to hand tracking permissions. The latter is the one in focus, and when I select the option to grant permissions, both windows disappear and the transition to the immersive view does not occur. I don't know how to handle this issue.
0
0
424
Jul ’24
What is the info property of SwiftUI::Layer
What is the info property of SwiftUI::Layer? I couldn't find any document or resource about it. It appears in SwiftUI::Layer's definition: struct Layer { metal::texture2d<half> tex; float2 info[5]; /// Samples the layer at `p`, in user-space coordinates, /// interpolating linearly between pixel values. Returns an RGBA /// pixel value, with color components premultipled by alpha (i.e. /// [R*A, G*A, B*A, A]), in the layer's working color space. half4 sample(float2 p) const { p = metal::fma(p.x, info[0], metal::fma(p.y, info[1], info[2])); p = metal::clamp(p, info[3], info[4]); return tex.sample(metal::sampler(metal::filter::linear), p); } };
0
1
493
Jun ’24
Why do I get a “Texture usage flags mismatch” shader validation error when declaring an imageblock in a compute pass?
Suppose I want to draw a red rectangle onto my render target using a compute shader. id<MTLComputeCommandEncoder> encoder = [commandBuffer computeCommandEncoder]; [encoder setComputePipelineState:pipelineState]; simd_ushort2 position = simd_make_ushort2(100, 100); simd_ushort2 size = simd_make_ushort2(50, 50); [encoder setBytes:&position length:sizeof(position) atIndex:0]; [encoder setTexture:drawable.texture atIndex:0]; [encoder dispatchThreads:MTLSizeMake(size.x, size.y, 1) threadsPerThreadgroup:MTLSizeMake(32, 32, 1)]; [encoder endEncoding]; #include <metal_stdlib> using namespace metal; kernel void Compute(ushort2 position_in_grid [[thread_position_in_grid]], constant ushort2 &position, texture2d<half, access::write> texture) { texture.write(half4(1, 0, 0, 1), position_in_grid + position); } This works just fine: Now, say for whatever reason I want to start using imageblocks in my compute kernel. First, I set the imageblock size on the CPU side: id<MTLComputeCommandEncoder> encoder = [commandBuffer computeCommandEncoder]; [encoder setComputePipelineState:pipelineState]; MTLSize threadgroupSize = MTLSizeMake(32, 32, 1); [encoder setImageblockWidth:threadgroupSize.width height:threadgroupSize.height]; simd_ushort2 position = simd_make_ushort2(100, 100); simd_ushort2 size = simd_make_ushort2(50, 50); [encoder setBytes:&position length:sizeof(position) atIndex:0]; [encoder setTexture:drawable.texture atIndex:0]; MTLSize gridSize = MTLSizeMake(size.x, size.y, 1); [encoder dispatchThreads:gridSize threadsPerThreadgroup:threadgroupSize]; And then I update the compute kernel to simply declare the imageblock – note I never actually read from or write to it: #include <metal_stdlib> using namespace metal; struct Foo { int foo; }; kernel void Compute(ushort2 position_in_grid [[thread_position_in_grid]], constant ushort2 &position, texture2d<half, access::write> texture, imageblock<Foo> imageblock) { texture.write(half4(1, 0, 0, 1), position_in_grid + position); } And now out of nowhere Metal’s shader validation starts complaining about mismatched texture usage flags: 2024-06-22 00:57:15.663132+1000 TextureUsage[80558:4539093] [GPUDebug] Texture usage flags mismatch executing kernel function "Compute" encoder: "1", dispatch: 0 2024-06-22 00:57:15.672004+1000 TextureUsage[80558:4539093] [GPUDebug] Texture usage flags mismatch executing kernel function "Compute" encoder: "1", dispatch: 0 2024-06-22 00:57:15.682422+1000 TextureUsage[80558:4539093] [GPUDebug] Texture usage flags mismatch executing kernel function "Compute" encoder: "1", dispatch: 0 2024-06-22 00:57:15.687587+1000 TextureUsage[80558:4539093] [GPUDebug] Texture usage flags mismatch executing kernel function "Compute" encoder: "1", dispatch: 0 2024-06-22 00:57:15.698106+1000 TextureUsage[80558:4539093] [GPUDebug] Texture usage flags mismatch executing kernel function "Compute" encoder: "1", dispatch: 0 The texture I’m writing to comes from a CAMetalDrawable whose associated CAMetalLayer has framebufferOnly set to NO. What am I missing?
3
0
698
Jun ’24
copyFromBuffer offset and size working even when not multiple of 4
Hi, Reading the copyFromBuffer documentation states that on macOS, sourceOffset, destinationOffset, and size "needs to be a multiple of 4, but can be any value in iOS and tvOS". However, I have noticed that, at least on my M2 Max, this limitation does not seem to exist as there are no warnings and the copy works correctly regardless of the offset value. I'm curious to know if this is something that should still be avoided. Is the multiple of 4 limitation reserved for non Apple Silicon devices and that note can be ignored for Apple Silicon? I ask because I am a contributor to Metal.jl, and recently noticed that our tests pass even when copying using copyWithBuffer with offsets and sizes that are not multiples of 4. If that coul cause issues/correctness problems, we would need to fix that. Thank you. Christian
0
1
530
Jun ’24
Metal and Swift Concurrency
Hi, Introducing Swift Concurrency to my Metal app has been a bit challenging as Swift Concurrency is limited by the cooperative thread pool. GPU work is obviously not CPU bound and can block forward moving progress, especially when using waitUntilCompleted on the command buffer. For concurrent render work this has the potential of under utilizing the CPU and even creating dead locks. My question is, what is the Metal's teams general recommendation when it comes to concurrency? It seems to me that Dispatch or OperationQueues are still the preferred way for Metal bound tasks in order to gain maximum performance? To integrate with Swift Concurrency my idea is to use continuations that kick off render jobs via Dispatch or Queues? Would this be the best solution to bridge async tasks with Metal work? Thanks!
4
0
659
Jun ’24
Bug? Xcode 16 macOS 15 SDK on macos 14.5 causes Metal Shader Colors to be Wrong
I've been upgrading Xcode consistently for years and have never seen Metal shaders behave differently from one version to another until now. On macOS 14.5, Xcode 16 beta, suddenly several color outputs turn out completely black where there should be color. All validation is on and nothing seems to be wrong (and hasn't been since maybe Xcode version 11). I've attached two screens. The first is the normal color scheme, the second is in Xcode 16. The settings are the exact same. Normal: Buggy with black + transparent colors (so it seems like either colors are overflowing or are all 0s)? Before I file a bug report or code level request, may I have some thoughts on how to debug this? The only clue I have is that I'm using bindless to multiply color texture samples with color values from my vertex struct. But it still fails even if I use hard-coded values for the texture samples, meaning somehow the color values are not being sent to the shader correctly? This is the most stable part of my rendering pipeline, so I'm surprised if the issue is there. Thank you.
1
0
832
Jun ’24
Issue with Hand Occlusion in a Metal CompositorLayer
I have an issue with hand occlusion in immersive mode. I have an entry view for the app and a Metal CompositorLayer (which is the immersive volume) where I have set .upperLimbVisibility(Visibility.hidden). The problem is that when I dismiss the entry view, sometimes it hides the hands and sometimes it doesn't (randomly). @main struct AVPainterApp: App { @State var hand: Int32 = 0 var body: some Scene { WindowGroup() { ContentView(hand: $hand) } .windowResizability(.contentSize) ImmersiveSpace(id: "ImmersiveSpace") { CompositorLayer(configuration: MetalLayerConfiguration()) { layerRenderer in SpatialSceneRun(layerRenderer, hand) } } .upperLimbVisibility(Visibility.hidden) .immersionStyle(selection: .constant(.full), in: .full) } }
1
0
563
Jun ’24
Metal 3.2 device memory coherency
I am seeking clarification regarding the new device-coherent memory (buffers and textures) in Metal 3.2. Do I understand the documentation correctly that this feature allows threads from different threadgroups to update data in device memory cooperatively? The documentation mentions, "[results of operations] are visible to other threads across thread groups if you synchronize them properly." How does one do proper synchronization? From what I understand, Metal has no device-scoped barriers.
1
0
849
Jun ’24
How to implement HLSL's globallycoherent for Metal?
Hello all We would like to use AMD's FidelityFx Downsampler in our custom game engine and we are having difficulties to correctly implement it for Metal due to its use of the globallycoherent keyword. We have done extensive search online but have not succeeded in finding an answer. What we have found is the largely undocumented 'volatile' keyword, so we were hypothesising that marking a texture with 'volatile' (which implies 'device volatile' since it's a texture) could have the same effect but we are far from convinced it would work. Does anyone have insights into this?
1
0
752
Apr ’24
CAMetalLayer VS AVSampleBufferDisplayLayer ( gpu usage, performance, ...)
I am a VOIP app developer. I am planning to develop a VOIP app on iOS using WebRTC that operates in PiP (Picture-in-Picture) mode. Since MTKView (CAMetalLayer) cannot be used in PiP mode, I am considering using AVSampleBufferDisplayLayer. Regarding this, I am curious about the performance differences between CAMetalLayer and AVSampleBufferDisplayLayer. As far as I know, CAMetalLayer utilizes the GPU. Does AVSampleBufferDisplayLayer also render using the GPU? If AVSampleBufferDisplayLayer renders using the GPU, will the rendering performance be similar? => Based on tests, there seems to be no difference in CPU usage between the two, which leads me to speculate that AVSampleBufferDisplayLayer also uses the GPU. If both use the GPU and there are no performance differences, is there a significant advantage to using CAMetalLayer? Thank you in advance.
1
0
647
May ’24
SwiftUI full screen animation uses less energy than Metal Game template
I've got a full-screen animation of a bunch of circles filled with gradients, with plenty of (careless) overdraw, plus real-time audio processing driving the animation, plus the overhead of SwiftUI's dependency analysis, and that app uses less energy (on iPhone 13) than the Xcode "Metal Game" template which is a rotating textured cube (a trivial GPU workload). Why is that? How can I investigate further? Does CoreAnimation have access to a compositor fast-path that a Metal app cannot access? Maybe another data point: when I do the same circles animation using SwiftUI's Canvas, the energy use is "Very High" and GPU utilization is also quite high. Eventually the phone's thermal state goes "Serious" and I get a message on the device that "Charging will resume when iPhone returns to normal temperature".
0
5
807
May ’24
How do I properly set tagged color data in MTKView and CIContext?
I have provided a test UIKit app which displays three different images, side by side, each inside a separate MTKView. Each image is tagged with a different color profile: Display P3 uRGB Test RGB (from an image supplied in Apple's ImageApp sample). I set up default values for all color spaces and formats. I then check if the image is tagged and, if so, I override those values with state from the tagged color space. The variables I am setting: “workingColorSpace” in the Metal CIContext, default = sRGB “workingFormat” in the Metal CIContext, default = RGBAf “outputColorSpace” in the Metal CIContext, default = displayP3 “colorPixelFormat” in the MTKView, default = bgra8Unorm “colorSpace” in a CIRenderDestination that I use in the MTKView delegate draw method The “colorSpace” default value = CGColorSpaceCreateDeviceRGB() I also set “pixelFormat” in CIRenderDestination with the MTKView.colorPixelFormat. If the image is tagged, I override the following values with the tagged colorSpace: CIContext.workingColorSpace CIContext.outputColorSpace CIRenderDestination.colorSpace If the tagged colorSpace.isWideGamutRGB = true, then I set the CIRenderDestination.colorSpace to extendedSRGB, ignoring the color space in the tagged wide gamut color space, as well as set the colorPixelFormat = bgr10_xr Results: The above scenario will properly render the DisplayP3 image, and the uRGB image. The “Test RGB” image fails: If I do not override the CIRenderDestination.colorSpace with a value from the tagged image, then the “Test RGB” image succeeds, but the “uRGB” image fails to render properly: Question: Do I have everything hooked up correctly and, if so, why does one image fail, and the other succeed? Link to sample project: https://www.dropbox.com/scl/fi/57u2fcrgdvys7jtzykzxt/ColorSpaceTest.zip?rlkey=unjeeiu7mi0wx9wfpylt78nwd&dl=0
2
0
805
Mar ’24
FxPlug outputTexture have wrong usage.
After the build 4.2.9. I have a weird bug. It keep crashing and when I read the message, it display validateRenderPassDescriptor:782: failed assertion `RenderPass Descriptor Validation Texture at colorAttachment[0] has usage (0x01) which doesn't specify MTLTextureUsageRenderTarget (0x04) This happen when I run in debug mode and try to hook up the motion template. I found out that the output texture create have usage only "MTLTextureUsageShaderRead" but no "MTLTextureUsageRenderTarget" Anyone have problem like me? I uusing fxplug 4.2.9 motion 5.7 and final cut 10.7.1. running in sonoma 14.2.1
2
0
506
Mar ’24