View and Projection Matrix in Metal for a First Person Camera

So I'm trying to make a simple scene with some geometry of sorts and a movable camera. So far I've been able to render basic geometry in 2D alongside transforming set geometry using matrices. Following this I moved on to the Calculating Primitive Visibility Using Depth Testing Sample ... also smooth sailing.

Then I had my first go at transforming positions between different coordinate spaces. I didn't get quite far with my rather blurry memory from OpenGL, all dough when I compared my view and projection matrix with the ones from the OpenGL glm::lookAt() and glm::perspective() functions there seemed to be no fundamental differences. Figuring Metal doing things differently I browsed the Metal Sample Code library for a sample containing a first-person camera. The only one I could find was Rendering Terrain Dynamically with Argument Buffers. Luckily it contained code for calculating view and projection matrices, which seemed to differ from my code. But I still have problems

Problem Description

When positioning the camera right in front of the geometry the view as well as the projection matrix produce seemingly accurate results:

Camera Positon(0, 0, 1); Camera Directio(0, 0, -1)

When moving further away though, parts of the scene are being wrongfully culled. Notably the ones farther away from the camera:

Camera Position(0, 0, 2); Camera Direction(0, 0, -1)

Rotating the Camera also produces confusing results:

Camera Position: (0, 0, 1); Camera Direction: (cos(250°), 0, sin(250°)), yes I converted to radians

My Suspicions

  • The Projection isn't converting the vertices from view space to Normalised Device Coordinates correctly. Also when comparing two first two images, the lower part of the triangle seems to get bigger as the camera moves away which also doesn't appear to be right.
  • Obviously the view matrix is also not correct as I'm pretty sure what's describe above isn't supposed to happen.

Code Samples

MainShader.metal

#include <metal_stdlib>
#include <Shared/Primitives.h>
#include <Shared/MainRendererShared.h>


using namespace metal;


struct transformed_data {
    float4 position [[position]];
    float4 color;
};


vertex transformed_data vertex_shader(uint vertex_id [[vertex_id]],
                                      constant _vertex *vertices [[buffer(0)]],
                                      constant _uniforms& uniforms [[buffer(1)]])

{
    transformed_data output;

    float3 dir = {0, 0, -1};
    float3 inEye = float3{ 0, 0, 1 }; // position
    float3 inTo = inEye + dir; // position + direction
    float3 inUp = float3{ 0, 1, 0};
    
    float3 z = normalize(inTo - inEye);
    float3 x = normalize(cross(inUp, z));
    float3 y = cross(z, x);
    float3 t = (float3) { -dot(x, inEye), -dot(y, inEye), -dot(z, inEye) };

    float4x4 viewm = float4x4(float4 { x.x, y.x, z.x, 0 },
                              float4 { x.y, y.y, z.y, 0 },
                              float4 { x.z, y.z, z.z, 0 },
                              float4 { t.x, t.y, t.z, 1 });

    

    float _nearPlane = 0.1f;
    float _farPlane = 100.0f;
    float _aspectRatio = uniforms.viewport_size.x / uniforms.viewport_size.y;

    float va_tan = 1.0f / tan(0.6f * 3.14f * 0.5f);
    float ys = va_tan;
    float xs = ys / _aspectRatio;
    float zs = _farPlane / (_farPlane - _nearPlane);

    float4x4 projectionm = float4x4((float4){ xs,  0,  0, 0},
                                    (float4){  0, ys,  0, 0},
                                    (float4){  0,  0, zs, 1},
                                    (float4){  0,  0, -_nearPlane * zs, 0 } );
    
    float4 projected = (projectionm*viewm) * float4(vertices[vertex_id].position,1);

    vector_float2 viewport_dim = vector_float2(uniforms.viewport_size);

    output.position = vector_float4(0.0, 0.0, 0.0, 1.0);
    output.position.xy = projected.xy / (viewport_dim / 2);
    output.position.z = projected.z;

    output.color = vertices[vertex_id].color;

    return output;

}


fragment float4 fragment_shader(transformed_data in [[stage_in]]) {return in.color;}

These are the vertices definitions

let triangle_vertices = [_vertex(position: [ 480.0, -270.0, 1.0], color: [1.0, 0.0, 0.0, 1.0]),
                         _vertex(position: [-480.0, -270.0, 1.0], color: [0.0, 1.0, 0.0, 1.0]),
                         _vertex(position: [   0.0,  270.0, 0.0], color: [0.0, 0.0, 1.0, 1.0])]

// TO-DO: make this use 4 vertecies and 6 indecies
let quad_vertices = [_vertex(position: [ 480.0,  270.0, 0.5], color: [0.5, 0.5, 0.5, 1.0]),
                     _vertex(position: [ 480.0, -270.0, 0.5], color: [0.5, 0.5, 0.5, 1.0]),
                     _vertex(position: [-480.0, -270.0, 0.5], color: [0.5, 0.5, 0.5, 1.0]),

                     _vertex(position: [-480.0,  270.0, 0.5], color: [0.5, 0.5, 0.5, 1.0]),
                     _vertex(position: [ 480.0,  270.0, 0.5], color: [0.5, 0.5, 0.5, 1.0]),
                     _vertex(position: [-480.0, -270.0, 0.5], color: [0.5, 0.5, 0.5, 1.0])]

This is the initialisation code of the depth stencil descriptor and state

_view.depthStencilPixelFormat = MTLPixelFormat.depth32Float
_view.clearDepth = 1.0

// other render initialisation code

let depth_stencil_descriptor = MTLDepthStencilDescriptor()

depth_stencil_descriptor.depthCompareFunction = MTLCompareFunction.lessEqual depth_stencil_descriptor.isDepthWriteEnabled = true;

depth_stencil_state = try! _view.device!.makeDepthStencilState(descriptor: depth_stencil_descriptor)!

So if you have any idea on why its not working or have some code of your own that's working or know of any public samples containing a working first-person camera, feel free to help me out.

Thank you in advance!

(please ignore any spelling or similar mistakes, english is not my primary language)

Answered by _tally in 723058022

Fixed!

The problem seemed to be that I was utilising the "To-NDC" conversion from the Calculating Primitive Visibility Using Depth Testing Sample. This bit:

out.position = vector_float4(0.0, 0.0, 0.0, 1.0);
out.position.xy = pixelSpacePosition / (viewportSize / 2.0);

But that is already being taken care of by the Projection Matrix (not entirely sure on that, correct me if I'm wrong), so I basically transformed the X- and Y-Coordinates twice to Normalised Device Space which, I guess, messed it up.

The correct Vertex Shader Function would be:

vertex transformed_data vertex_shader(uint vertex_id [[vertex_id]],
                                      constant _vertex *vertices [[buffer(0)]],
                                      constant _uniforms& uniforms [[buffer(1)]])

{
    transformed_data output;

    float3 dir = {0, 0, -1};
    float3 inEye = float3{ 0, 0, 1 }; // position
    float3 inTo = inEye + dir; // position + direction
    float3 inUp = float3{ 0, 1, 0};
    
    float3 z = normalize(inTo - inEye);
    float3 x = normalize(cross(inUp, z));
    float3 y = cross(z, x);
    float3 t = (float3) { -dot(x, inEye), -dot(y, inEye), -dot(z, inEye) };

    float4x4 viewm = float4x4(float4 { x.x, y.x, z.x, 0 },
                              float4 { x.y, y.y, z.y, 0 },
                              float4 { x.z, y.z, z.z, 0 },
                              float4 { t.x, t.y, t.z, 1 });


    float _nearPlane = 0.1f;
    float _farPlane = 100.0f;
    float _aspectRatio = uniforms.viewport_size.x / uniforms.viewport_size.y;

    float va_tan = 1.0f / tan(0.6f * 3.14f * 0.5f);
    float ys = va_tan;
    float xs = ys / _aspectRatio;
    float zs = _farPlane / (_farPlane - _nearPlane);

    float4x4 projectionm = float4x4((float4){ xs,  0,  0, 0},
                                    (float4){  0, ys,  0, 0},
                                    (float4){  0,  0, zs, 1},
                                    (float4){  0,  0, -_nearPlane * zs, 0 } );

    output.position = projectionm * viewm * float4(vertices[vertex_id].position,1);
    output.color = vertices[vertex_id].color;

    return output;

}

Of course you can calculate the View Matrix and Projection Matrix and CPU and send it to the GPU, which has many advantages. I solely chose this setup to eliminate as much error potential as possible and to allow easy debugging.

If you still have any resources or sample code of working FPS Cameras in Metal please post them. There are basically none on the internet.

Accepted Answer

Fixed!

The problem seemed to be that I was utilising the "To-NDC" conversion from the Calculating Primitive Visibility Using Depth Testing Sample. This bit:

out.position = vector_float4(0.0, 0.0, 0.0, 1.0);
out.position.xy = pixelSpacePosition / (viewportSize / 2.0);

But that is already being taken care of by the Projection Matrix (not entirely sure on that, correct me if I'm wrong), so I basically transformed the X- and Y-Coordinates twice to Normalised Device Space which, I guess, messed it up.

The correct Vertex Shader Function would be:

vertex transformed_data vertex_shader(uint vertex_id [[vertex_id]],
                                      constant _vertex *vertices [[buffer(0)]],
                                      constant _uniforms& uniforms [[buffer(1)]])

{
    transformed_data output;

    float3 dir = {0, 0, -1};
    float3 inEye = float3{ 0, 0, 1 }; // position
    float3 inTo = inEye + dir; // position + direction
    float3 inUp = float3{ 0, 1, 0};
    
    float3 z = normalize(inTo - inEye);
    float3 x = normalize(cross(inUp, z));
    float3 y = cross(z, x);
    float3 t = (float3) { -dot(x, inEye), -dot(y, inEye), -dot(z, inEye) };

    float4x4 viewm = float4x4(float4 { x.x, y.x, z.x, 0 },
                              float4 { x.y, y.y, z.y, 0 },
                              float4 { x.z, y.z, z.z, 0 },
                              float4 { t.x, t.y, t.z, 1 });


    float _nearPlane = 0.1f;
    float _farPlane = 100.0f;
    float _aspectRatio = uniforms.viewport_size.x / uniforms.viewport_size.y;

    float va_tan = 1.0f / tan(0.6f * 3.14f * 0.5f);
    float ys = va_tan;
    float xs = ys / _aspectRatio;
    float zs = _farPlane / (_farPlane - _nearPlane);

    float4x4 projectionm = float4x4((float4){ xs,  0,  0, 0},
                                    (float4){  0, ys,  0, 0},
                                    (float4){  0,  0, zs, 1},
                                    (float4){  0,  0, -_nearPlane * zs, 0 } );

    output.position = projectionm * viewm * float4(vertices[vertex_id].position,1);
    output.color = vertices[vertex_id].color;

    return output;

}

Of course you can calculate the View Matrix and Projection Matrix and CPU and send it to the GPU, which has many advantages. I solely chose this setup to eliminate as much error potential as possible and to allow easy debugging.

If you still have any resources or sample code of working FPS Cameras in Metal please post them. There are basically none on the internet.

I'd recommend using reverseZ and infiniteFar plane. There are plenty of online resources for this.

correct me if I'm wrong

You're not wrong. I checked everything else: the projection and view matrix code looks correct. You're also correct that all this shouldn't be done in the shader. Most of these (excluding the model matrix) only have to be computed once per render loop.

If it helps here's my projection and view matrix routines (they match yours):

static __inline__ simd_float4x4 matrix4x4_perspective_projection(float inAspect, float inFovRAD, float inNear, float inFar)
{
    float y = 1 / tan(inFovRAD * 0.5);
    float x = y / inAspect;
    float z = inFar / (inFar - inNear);

    simd_float4 X = { x, 0, 0,          0};
    simd_float4 Y = { 0, y, 0,          0};
    simd_float4 Z = { 0, 0, z,          1};
    simd_float4 W = { 0, 0, z * -inNear,  0};

    return (matrix_float4x4) {{X, Y, Z, W}};
}

static __inline__ simd_float4x4 matrix4x4_lookAt(const simd_float3 inEye,
                                                 const simd_float3 inTo,
                                                 const simd_float3 inUp)
{
    //forward vector
    simd_float3 zAxis = simd_normalize(inTo - inEye);
    //horizontal vector
    simd_float3 xAxis = simd_normalize(simd_cross(inUp, zAxis));
    //vertical vector
    simd_float3 yAxis = simd_cross(zAxis, xAxis);
    //translation vector
    simd_float3 t = (simd_float3) {-simd_dot(xAxis, inEye), -simd_dot(yAxis, inEye), -simd_dot(zAxis, inEye)};

    return (matrix_float4x4) {{
        { xAxis.x, yAxis.x, zAxis.x, 0 },
        { xAxis.y, yAxis.y, zAxis.y, 0 },
        { xAxis.z, yAxis.z, zAxis.z, 0 },
        { t.x, t.y, t.z, 1 }
    }};
} // lookAt
View and Projection Matrix in Metal for a First Person Camera
 
 
Q