Discover Metal enhancements for A14 Bionic

Explore how Metal is bringing sophisticated rendering and powerful compute features to A14 Bionic. We'll take you through the Metal capabilities delivered in the Apple GPU Family 7 feature set, including new texture addressing modes, fast SIMD reduction and matrix multiplication operations, and a deep dive into implementing a visibility buffer using barycentric coordinates and primitive ID.

Recursos

Videos relacionados

WWDC21

Explore HDR rendering with EDR

WWDC20

Tech Talks

WWDC19

Metal for Machine Learning

9:50 - Visibility buffer - Geometry stage

// Vertex Shader - Geometry stage

vertex float4 geoVertex(float3* pos   [[buffer(0)]], 
                        float4x4* vpm [[buffer(1)]],
                        Uint vid      [[vertex_id]]) {
   float3 p = pos[vid];          // Position fetch
   return doTransform(p, vpm);   // Transform step
}


// Fragment Shader - Geometry stage

fragment VisBufferFragment geoFrag(uint primid  [[primitive_id]],
                                   float3 baryc [[barycentric_coords]],
                                   uint drawid  [[buffer(0)]]) { 
   VisBufferFragment out;    out.surface_id  = makeID(primid, drawid);   // Combine draw ID and primitive ID
   out.barycentric = baryc.xy; 
   return out;
}

10:21 - Visibility buffer - Lighting stage

// Lighting Compute Kernel

kernel lightingCompute(Draws* draws          [[buffer(0)]],
                       texture2d<float4> lit [[texture(0)]]) {

  /* Reconstruction using Visibility Buffer */
  MaterialInput mi  = doReconstruct(surface_id, baryc, draws);
  
  /* Apply material model */
  MaterialOutput mo = doMaterial(mi, mat);
  
  /* Apply lighting function */
  lit[thread_id]    = doLighting(mo, lights);

}

12:27 - Visibility buffer - Reconstruction step

MaterialInput doReconstruct (uint32 surface_id, short2 baryc, Draws* draws, Mesh* meshes)
{
    MaterialInput mi;

    /* Retrieve primitive ID and Draw ID */
    uint primid   = surface_id & 0x0000FFFF; uint surface_id = ids >> 16;
    /* Retrieve the 3rd barycentric coordinate */
    float3 bc     = float3(baryc.xy, 1.0 - baryc.x - baryc.y);
    /* Retrieve (mesh ID, vertex ID) using primitive ID and draw ID */
    uint meshid   = draws[drawid].meshid;
    uint3 vertid  = uint3(draws[meshid].ib[primid*3+0], 
                          draws[meshid].ib[primid*3+1], 
                          draws[meshid].ib[primid*3+2]);
    /* Interpolate vertex buffer data cross barycentric coordinates */
    mi.uv  = meshes[meshid].vb[vertid.x].uv.xy * bc.x +
             meshes[meshid].vb[vertid.y].uv.xy * bc.y +
             meshes[meshid].vb[vertid.z].uv.xy * bc.z;
    /* Other draw state such as normals */
    mi.normal     = normalize(draws[drawid].vpm * float4(normal, 0)).xyz;
    
    return mi;
}

15:25 - Texture addressing modes

/* * rAddressMode : The address mode for the texture depth (r) coordinate.
   * sAddressMode : The address mode for the texture width (s) coordinate.
   * tAddressMode : The address mode for the texture height (t) coordinate.
   * borderColor : The border color for clamped texture values.
 */

let device = MTLCreateSystemDefaultDevice()!

let samplerDesc = MTLSamplerDescriptor()

/* … Program other sample state… */
samplerDesc.magFilter = .linear;

samplerDesc.sAddressMode = .mirrorClampToEdge
samplerDesc.tAddressMode = .mirrorClampToEdge

samplerDesc.rAddressMode = .clampToBorderColor
samplerDesc.borderColor  = .transparentBlack

let samplerState = device.makeSamplerState(descriptor: samplerDesc)!

20:45 - SIMD reduce_sum

void reduce_sum(device float const* input_array,
                device float* total_sum)
{
     threadgroup float SSA[32];

     float a = input_array[read_offset];

     float simdgroup_sum = simd_sum(a);

     SSA[sg_index] = simdgroup_sum;

     threadgroup_barrier(mem_flags::mem_threadgroup);

     if (simdgroup_index_in_threadgroup == 0)
     {
         *total_sum  = simd_sum(SSA[sg_index]);
     }
}

24:56 - SIMD 16x16 matrix multiplication

// SIMD 16x16 matrix multiplication

void matmul(threadgroup float const* A, threadgroup float const* B, threadgroup float* C)
{

    simdgroup_float8x8 matA, matB, matC(0.0f);


    A += A_increment(sg_index, A_row_stride);
    B += B_increment(sg_index, B_row_stride);
    C += C_increment(sg_index, C_row_stride);

    for (ushort k = 0; k < 16; k += 8) {
        simdgroup_load(matA, A + k, A_row_stride);
        simdgroup_load(matB, B + k * B_row_stride, B_row_stride);
        simdgroup_multiply_accumulate(matC, matA, matB, matC);
    }
 
    simdgroup_store(matC, C, C_row_stride);

}

25:51 - Metal Performance Shaders

// General matrix multiplication
let matMulKernel = MPSMatrixMultiplication(device: device!, resultRows: M, 
    resultColumns: N, interiorColumns: K)
matMulKernel.encode(commandBuffer: commandBuffer, leftMatrix: A, rightMatrix: B,
    resultMatrix: C)

// CNN convolution
let convKernel = MPSCNNConvolution(device: device!, weights: convDataSource)
convKernel.encodeBatch(commandBuffer: commandBuffer, 
    sourceImages: sources, destinationImages: results)

// MPS Graph
let graph = MPSGraph()
let A = graph.placeholder(shape:[M, K], dataType: .float32)
let B = graph.placeholder(shape:[K, N], dataType: .float32)
let C = graph.matrixMultiplication(primaryTensor: A, secondaryTensor: B)
graph.run(feeds: [A, B] targetTensors: C targetOperations: nil)

Explore Get Started

Stay Updated

Explore Platforms

Featured

Explore Technologies

Featured

Explore Community

Featured

Explore Documentation

Release Notes

Explore Downloads

Featured

Explore Support

Featured

Quick Links

Discover Metal enhancements for A14 Bionic

Recursos

Videos relacionados

WWDC21

WWDC20

Tech Talks

WWDC19