Metal을 사용해 GPU 렌더러 최적화하기
Metal의 최신 기능과 모범 사례를 활용해 GPU 렌더러를 최적화하는 방법을 살펴보세요. 함수 특수화 및 병렬 셰이더 컴파일로 제작 워크플로의 응답성을 유지하고 렌더링 속도를 높이는 방법을 소개해 드립니다. 그리고 컴퓨트 셰이더를 조정해 성능을 최적화하는 방법도 확인해 보세요.
- 0:00 - Intro
- 2:00 - Maximize shader performance
- 7:45 - Asynchronous compilation
- 10:10 - Fast runtime compilation
- 12:46 - Tune compiler options
- 16:10 - Wrap-Up
// Reduce branch performance cost fragment FragOut frag_material_main(device Material &material [[buffer(0)]]) { if(material.is_glossy) { material_glossy(material); } if(material.has_shadows) { light_shadows(material); } if(material.has_reflections) { trace_reflections(material); } if(material.is_volumetric) { output_volume_parameters(material); } return output_material(); }
3:55 - Function constant declaration per material feature
constant bool IsGlossy [[function_constant(0)]]; constant bool HasShadows [[function_constant(1)]]; constant bool HasReflections [[function_constant(2)]]; constant bool IsVolumetric [[function_constant(3)]];
3:59 - Dynamic branch for the feature codepath is replaced with function constants
if(material.has_reflections) { trace_reflections(material); }
4:05 - Dynamic branch for the feature codepath is replaced with function constants
/* replaced with function constants*/ if(HasReflections) { trace_reflections(material); }
4:13 - Reduce branch performance cost with function constants
constant bool IsGlossy [[function_constant(0)]]; constant bool HasShadows [[function_constant(1)]]; constant bool HasReflections [[function_constant(2)]]; constant bool IsVolumetric [[function_constant(3)]]; // Reduce branch performance cost fragment FragOut frag_material_main(device Material &material [[buffer(0)]]) { if(IsGlossy) { material_glossy(material); } if(HasShadows) { light_shadows(material); } if(HasReflections) { trace_reflections(material); } if(IsVolumetric) { output_volume_parameters(material); } return output_material(); }
4:58 - Function constants for material parameters
// Function constants for material parameters constant float4 MaterialColor [[function_constant(0)]]; constant float4 MaterialWeight [[function_constant(1)]]; constant float4 SheenColor [[function_constant(2)]]; constant float4 SheenFactor [[function_constant(3)]]; struct Material { float4 blend_factor; }; void material_glossy(const constant Material& material) { float4 light, sheen; light = glossy_eval(MaterialColor, MaterialWeight); sheen = sheen_eval(SheenColor, SheenFactor); glossy_output_write(light, sheen, material.blend_factor); }
5:21 - MaterialParameter structure for constant parameters
struct MaterialParameter { NSString* name; MTLDataType type; void* value_ptr; }; MaterialParameter is_glossy{@"IsGlossy", MTLDataTypeBool, &material.is_glossy}; MaterialParameter mat_color{@"MaterialColor", MTLDataTypeFloat4, &material.color};
5:51 - Declare and populate MTLFunctionConstantValues
// Declare and populate MTLFunctionConstantValues MTLFunctionConstantValues* values = [MTLFunctionConstantValues new]; for(const MaterialParameter& parameter : shader_parameters) { [values setConstantValue: parameter.value_ptr type: parameter.type withName: parameter.name]; }
5:51 - Create pipeline render state object with function constant declarations
struct Material { bool is_glossy; float color[4]; }; struct MaterialParameter { NSString* name; MTLDataType type; void* value_ptr; }; // Declare material Material material = {true, {1.0f,0.0f,0.0f,1.0f}}; // Declare function constant paramters MaterialParameter is_glossy{@"IsGlossy", MTLDataTypeBool, &material.is_glossy}; MaterialParameter mat_color{@"MaterialColor", MTLDataTypeFloat4, &material.color}; MaterialParameter shader_parameters[2] = {is_glossy, mat_color}; // Declare and populate MTLFunctionConstantValues MTLFunctionConstantValues* values = [MTLFunctionConstantValues new]; for(const MaterialParameter& parameter : shader_parameters) { [values setConstantValue: parameter.value_ptr type: parameter.type withName: parameter.name]; } // Create MTLRenderPipelineDescriptor and create shader function from MTLLibrary MTLRenderPipelineDescriptor *dsc = [MTLRenderPipelineDescriptor new]; NSError* error = nil; dsc.fragmentFunction = [shader_library newFunctionWithName:@"frag_material_main" constantValues:values error:&error]; // Create pipeline render state object id<MTLRenderPipelineState> pso = [device newRenderPipelineStateWithDescriptor:dsc error:&error];
6:14 - Create MTLRenderPipelineDescriptor and create shader function from MTLLibrary
// Create MTLRenderPipelineDescriptor and create shader function from MTLLibrary MTLRenderPipelineDescriptor *dsc = [MTLRenderPipelineDescriptor new]; NSError* error = nil; dsc.fragmentFunction = [shader_library newFunctionWithName:@"frag_material_main" constantValues:values error:&error];
8:07 - Shader library creation
- (void)newLibraryWithSource:(NSString *)source options:(MTLCompileOptions *)options completionHandler:(MTLNewLibraryCompletionHandler)completionHandler;
8:09 - Render pipeline state creation
- (void)newRenderPipelineStateWithDescriptor:(MTLRenderPipelineDescriptor *)descriptor completionHandler:(MTLNewRenderPipelineStateCompletionHandler)completionHandler;
9:00 - Use as many threads as possible for concurrent compilation
@property (atomic) BOOL shouldMaximizeConcurrentCompilation;
10:58 - Assign symbol visibility to default or hidden
__attribute__((visibility(“default"))) void matrix_mul(); __attribute__((visibility(“hidden"))) void matrix_mul_internal();
11:19 - Verify device support
//For render pipelines @property (readonly) BOOL supportsRenderDynamicLibraries; //For compute pipelines @property(readonly) BOOL supportsDynamicLibraries;
11:46 - Compile dynamic libraries
//create a dynamic library from an existing Metal library - (id<MTLDynamicLibrary>) newDynamicLibrary:(id<MTLLibrary>) library error:(NSError **) error //create from the URL - (id<MTLDynamicLibrary>) newDynamicLibraryWithURL:(NSURL *) url error:(NSError **) error
12:18 - Dynamically link shaders
//Pipeline state MTLRenderPipelineDescriptor* dsc = [MTLRenderPipelineDescriptor new]; dsc.vertexPreloadedLibraries = @[dylib_Math, dylib_Shadows]; dsc.fragmentPreloadedLibraries = @[dylib_Math, dylib_Shadows]; //Compile options MTLCompileOptions* options = [MTLCompileOptions new]; options.libraries = @[dylib_Math, dylib_Shadows]; [device newLibraryWithSource:programString options:options error:&error];
13:45 - Specify desired max total threads per threadgroup
@interface MTLComputePipelineDescriptor : NSObject @property (readwrite, nonatomic) NSUInteger maxTotalThreadsPerThreadgroup;
14:12 - Match desired max total threads per threadgroup
@interface MTLCompileOptions : NSObject @property (readwrite, nonatomic) NSUInteger maxTotalThreadsPerThreadgroup;
14:25 - Tune Metal dynamic libraries
MTLCompileOptions* options = [MTLCompileOptions new]; options.libraryType = MTLLibraryTypeDynamic; options.installName = @"executable_path/dylib_Math.metallib"; if(@available(macOS 13.3, *)) { options.maxTotalThreadsPerThreadgroup = 768; } id<MTLLibrary> lib = [device newLibraryWithSource:programString options:options error:&error]; id<MTLDynamicLibrary> dynamicLib = [device newDynamicLibrary:lib error:&error];
