RealityKit Intercept the picture

I'm developing an app in which I need to render pictures and contain some models in a RealityView. I want to set up a camera, intercept virtual content through the camera, and save it as an image.

Answered by Vision Pro Engineer in 823129022

Hi @lijiaxu

RealityRenderer enables you to setup a scene with RealityKit entities and a camera. You can render that scene to an image. Here's a snippet I made to demonstrate. There's lots of ways to structure this code, and I wrote this in a hurry, so please don't consider this "best practice". Instead it's a very rough starting point for you to improve and build on.

ContentView

struct ContentView: View {
    @State private var image:Image?
    @State private var entityToImage:EntityToImage?
    @State private var sphere:Entity
    
    init() {
        
        // Build the initial scene
        let root = Entity()
        let sphere = Entity()
        var wireframeMaterial = UnlitMaterial()
        wireframeMaterial.triangleFillMode = .lines
        sphere.position = [0, 0, -1]
        let modelComponent = ModelComponent(
            mesh: .generateSphere(radius: 0.2),
            // Note, if you use a lit material be sure to
            // add an IBL to EntityToImage.
            materials: [wireframeMaterial]
        )
        sphere.components.set(modelComponent)
        root.addChild(sphere)
        
        // Initialize the helper with a root entity.
        entityToImage = try? EntityToImage(root: root)
        self.sphere = sphere
        self.entityToImage = entityToImage
    }
    
    var body: some View {
        VStack {
            if let image = entityToImage?.image {
                image
                    .resizable()
                    .scaledToFit()
            }
            else {
                Text("Unable to generate image.")
            }
            
            Button("Move Sphere") {
                Task { @MainActor in
                    // Update the entity
                    sphere.position.x += 0.2
                    
                    // Re-render the image
                    try? await entityToImage?.renderImage()
                }
            }
        }
        .task {
            // render the initial scene
            try? await entityToImage?.renderImage()
        }
    }
}

Helper class

@MainActor
@Observable
class EntityToImage {
    private let renderer:RealityRenderer?
    private let cameraEntity = PerspectiveCamera()
    var image:Image?
    
    init(root: Entity) throws {
        do {
            let renderer = try RealityRenderer()
            renderer.entities.append(root)
            renderer.entities.append(cameraEntity)
            cameraEntity.camera.near = 0.01
            cameraEntity.camera.far = 100
            cameraEntity.camera.fieldOfViewOrientation = .horizontal
            cameraEntity.camera.fieldOfViewInDegrees = 105
            renderer.activeCamera = cameraEntity
            renderer.cameraSettings.colorBackground = .color(.init(gray: 0.0, alpha: 0.0))
            renderer.cameraSettings.antialiasing = .none
            
            self.renderer = renderer
        }
        catch {
            renderer = nil
            throw error
        }
    }
    
    private func textureImage(from texture: MTLTexture) -> Image? {
        let componentCount = 4
        let bitmapInfo = CGImageByteOrderInfo.order32Big.rawValue | CGImageAlphaInfo.premultipliedLast.rawValue
        let bitsPerComponent = 8
        let colorSpace = CGColorSpace(name: CGColorSpace.sRGB)!
        
        let bytesPerRow = texture.width * componentCount
        guard let pixelBuffer = malloc(texture.height * bytesPerRow) else {
            return nil
        }

        defer {
            free(pixelBuffer)
        }
        
        let region = MTLRegionMake2D(0, 0, texture.width, texture.height)
        texture.getBytes(pixelBuffer, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0)
        let ctx = CGContext(data: pixelBuffer,
                            width: texture.width,
                            height: texture.height,
                            bitsPerComponent: bitsPerComponent,
                            bytesPerRow: bytesPerRow,
                            space: colorSpace,
                            bitmapInfo: bitmapInfo)
        
        guard let cgImage = ctx?.makeImage() else {
            return nil
        }
        let ciImage = CIImage(cgImage: cgImage)
        let context = CIContext(options: nil)
        guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else {return nil}
        let uiImage = UIImage(cgImage: cgImage)
        
        return Image(uiImage: uiImage)
    }

    func renderImage() async throws {
        guard let renderer = renderer else { return }
        
        // If you use a lit material you'll need an ibl
//        renderer.lighting.resource = try await EnvironmentResource(named: "ImageBasedLighting")
        
        let imageWidth:Double = 1920
        let imageHeight:Double = 1080
        
        let contentSize = CGSize(width: imageWidth, height: imageHeight)
        let descriptor = MTLTextureDescriptor()
        descriptor.width = Int(contentSize.width)
        descriptor.height = Int(contentSize.height)
        descriptor.pixelFormat = .rgba8Unorm_srgb
        descriptor.sampleCount = 1
        descriptor.usage = [.renderTarget, .shaderRead, .shaderWrite]
        
        guard let texture = MTLCreateSystemDefaultDevice()?.makeTexture(descriptor: descriptor) else {
            return
        }
        
        image = await withCheckedContinuation { (continuation: CheckedContinuation<Image?, Never>) in
            do {
                let output = try RealityRenderer.CameraOutput(RealityRenderer.CameraOutput.Descriptor.singleProjection(colorTexture: texture))
                try renderer.updateAndRender(deltaTime: 0.1, cameraOutput: output, onComplete: { _ in
                    let textureImage = self.textureImage(from: texture)
                    continuation.resume(returning: textureImage)
                })
            } catch {
                continuation.resume(returning: nil)
            }
        }
    }
}
Accepted Answer

Hi @lijiaxu

RealityRenderer enables you to setup a scene with RealityKit entities and a camera. You can render that scene to an image. Here's a snippet I made to demonstrate. There's lots of ways to structure this code, and I wrote this in a hurry, so please don't consider this "best practice". Instead it's a very rough starting point for you to improve and build on.

ContentView

struct ContentView: View {
    @State private var image:Image?
    @State private var entityToImage:EntityToImage?
    @State private var sphere:Entity
    
    init() {
        
        // Build the initial scene
        let root = Entity()
        let sphere = Entity()
        var wireframeMaterial = UnlitMaterial()
        wireframeMaterial.triangleFillMode = .lines
        sphere.position = [0, 0, -1]
        let modelComponent = ModelComponent(
            mesh: .generateSphere(radius: 0.2),
            // Note, if you use a lit material be sure to
            // add an IBL to EntityToImage.
            materials: [wireframeMaterial]
        )
        sphere.components.set(modelComponent)
        root.addChild(sphere)
        
        // Initialize the helper with a root entity.
        entityToImage = try? EntityToImage(root: root)
        self.sphere = sphere
        self.entityToImage = entityToImage
    }
    
    var body: some View {
        VStack {
            if let image = entityToImage?.image {
                image
                    .resizable()
                    .scaledToFit()
            }
            else {
                Text("Unable to generate image.")
            }
            
            Button("Move Sphere") {
                Task { @MainActor in
                    // Update the entity
                    sphere.position.x += 0.2
                    
                    // Re-render the image
                    try? await entityToImage?.renderImage()
                }
            }
        }
        .task {
            // render the initial scene
            try? await entityToImage?.renderImage()
        }
    }
}

Helper class

@MainActor
@Observable
class EntityToImage {
    private let renderer:RealityRenderer?
    private let cameraEntity = PerspectiveCamera()
    var image:Image?
    
    init(root: Entity) throws {
        do {
            let renderer = try RealityRenderer()
            renderer.entities.append(root)
            renderer.entities.append(cameraEntity)
            cameraEntity.camera.near = 0.01
            cameraEntity.camera.far = 100
            cameraEntity.camera.fieldOfViewOrientation = .horizontal
            cameraEntity.camera.fieldOfViewInDegrees = 105
            renderer.activeCamera = cameraEntity
            renderer.cameraSettings.colorBackground = .color(.init(gray: 0.0, alpha: 0.0))
            renderer.cameraSettings.antialiasing = .none
            
            self.renderer = renderer
        }
        catch {
            renderer = nil
            throw error
        }
    }
    
    private func textureImage(from texture: MTLTexture) -> Image? {
        let componentCount = 4
        let bitmapInfo = CGImageByteOrderInfo.order32Big.rawValue | CGImageAlphaInfo.premultipliedLast.rawValue
        let bitsPerComponent = 8
        let colorSpace = CGColorSpace(name: CGColorSpace.sRGB)!
        
        let bytesPerRow = texture.width * componentCount
        guard let pixelBuffer = malloc(texture.height * bytesPerRow) else {
            return nil
        }

        defer {
            free(pixelBuffer)
        }
        
        let region = MTLRegionMake2D(0, 0, texture.width, texture.height)
        texture.getBytes(pixelBuffer, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0)
        let ctx = CGContext(data: pixelBuffer,
                            width: texture.width,
                            height: texture.height,
                            bitsPerComponent: bitsPerComponent,
                            bytesPerRow: bytesPerRow,
                            space: colorSpace,
                            bitmapInfo: bitmapInfo)
        
        guard let cgImage = ctx?.makeImage() else {
            return nil
        }
        let ciImage = CIImage(cgImage: cgImage)
        let context = CIContext(options: nil)
        guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else {return nil}
        let uiImage = UIImage(cgImage: cgImage)
        
        return Image(uiImage: uiImage)
    }

    func renderImage() async throws {
        guard let renderer = renderer else { return }
        
        // If you use a lit material you'll need an ibl
//        renderer.lighting.resource = try await EnvironmentResource(named: "ImageBasedLighting")
        
        let imageWidth:Double = 1920
        let imageHeight:Double = 1080
        
        let contentSize = CGSize(width: imageWidth, height: imageHeight)
        let descriptor = MTLTextureDescriptor()
        descriptor.width = Int(contentSize.width)
        descriptor.height = Int(contentSize.height)
        descriptor.pixelFormat = .rgba8Unorm_srgb
        descriptor.sampleCount = 1
        descriptor.usage = [.renderTarget, .shaderRead, .shaderWrite]
        
        guard let texture = MTLCreateSystemDefaultDevice()?.makeTexture(descriptor: descriptor) else {
            return
        }
        
        image = await withCheckedContinuation { (continuation: CheckedContinuation<Image?, Never>) in
            do {
                let output = try RealityRenderer.CameraOutput(RealityRenderer.CameraOutput.Descriptor.singleProjection(colorTexture: texture))
                try renderer.updateAndRender(deltaTime: 0.1, cameraOutput: output, onComplete: { _ in
                    let textureImage = self.textureImage(from: texture)
                    continuation.resume(returning: textureImage)
                })
            } catch {
                continuation.resume(returning: nil)
            }
        }
    }
}
RealityKit Intercept the picture
 
 
Q