//in this sample we're trying to manually release MTLCommandBuffer and MTLComputeCommandEncoder //but after releasing MTLCommandBuffer buffer retainCount still 1 //and used memory rapidly flies into the sky #import <Foundation/Foundation.h> #import <Metal/Metal.h> const unsigned int arrayLength = 1 << 12; const unsigned int bufferSize = arrayLength * sizeof(float); @interface Helper : NSObject -(void) initState; -(void) reset; -(void) enqueue: (MTLSize)gridSize threadgroupSize:(MTLSize)threadgroupSize; -(void) finish; @property id<MTLCommandBuffer> commandBuffer ; @property id<MTLComputeCommandEncoder> encoder; @property id<MTLComputePipelineState> processFunctionPSO; @property id<MTLCommandQueue> commandQueue; @end @implementation Helper -(void) initState { _commandBuffer = [_commandQueue commandBuffer]; _encoder = [_commandBuffer computeCommandEncoder]; [_encoder setComputePipelineState:_processFunctionPSO]; } -(void) reset { NSLog(@"CommandEncoder retainCount before release= %lu", [_encoder retainCount]); NSLog(@"CommandBuffer retainCount before release= %lu", [_commandBuffer retainCount]); [_encoder release]; [_commandBuffer release]; //fall if uncomment next line - _encoder really released //NSLog(@"CommandEncoder retainCount after release= %lu", [_encoder retainCount]); //does not fall, CommandBuffer still present NSLog(@"CommandBuffer retainCount after release= %lu, refs = %@", [_commandBuffer retainCount], [_commandBuffer retainedReferences]? @"True" : @"False"); _encoder = nil; _commandBuffer = nil; [self initState]; } -(void) enqueue: (MTLSize)gridSize threadgroupSize:(MTLSize)threadgroupSize { [_encoder dispatchThreads:gridSize threadsPerThreadgroup:threadgroupSize]; [_encoder endEncoding]; } -(void) finish { [_commandBuffer commit]; [_commandBuffer waitUntilCompleted]; [self reset]; } @end int main(int argc, const char * argv[]) { //@autoreleasepool { id<MTLDevice> device = MTLCreateSystemDefaultDevice(); id<MTLBuffer> buffer = [device newBufferWithLength:bufferSize options:MTLResourceStorageModeShared]; if(buffer == nil) { NSLog(@"Failed to create buffer."); return -1; } const char * source = "#include <metal_stdlib>\n\ using namespace metal;\n\ \n\ kernel void init_array(\n\ device float* result,\n\ constant int32_t & value,\n\ \n\ uint2 global_id [[thread_position_in_grid]]\n\ )\n\ {\n\ result[global_id.x] = 1+value;\n\ }\n"; NSError* error = nil; MTLCompileOptions * options = [MTLCompileOptions new]; // if (@available(macOS 10.15, iOS 13.0, *)) // options.languageVersion = MTLLanguageVersion::MTLLanguageVersion2_2; NSString * srcStr = [NSString stringWithUTF8String:source]; id <MTLLibrary> library = [device newLibraryWithSource:srcStr options:options error:&error]; if (library == nil) { NSLog(@"Failed to createLibrary from source: %@ %@", error, [error userInfo]); return -1; } NSString * name = @"init_array"; id<MTLFunction> processFunction = [library newFunctionWithName:name]; if (processFunction == nil) { NSLog(@"Failed to find the process function."); return -1; } id<MTLComputePipelineState> processFunctionPSO = [device newComputePipelineStateWithFunction: processFunction error:&error]; if (processFunctionPSO == nil) { NSLog(@"Failed to created pipeline state object, error %@.", error); return -1; } id<MTLCommandQueue> commandQueue = [device newCommandQueue]; if (commandQueue == nil) { NSLog(@"Failed to find the command queue."); return -1; } MTLSize gridSize = MTLSizeMake(arrayLength, 1, 1); NSUInteger threadGroupSize = processFunctionPSO.maxTotalThreadsPerThreadgroup; if (threadGroupSize > arrayLength) { threadGroupSize = arrayLength; } MTLSize threadgroupSize = MTLSizeMake(threadGroupSize, 1, 1); Helper * helper = [[Helper alloc] init]; helper.commandQueue = commandQueue; helper.processFunctionPSO = processFunctionPSO; [helper initState]; for (int32_t i = 0; i<100000; ++i) { //this functions will be called from different places and threads //not in single loop(it's just to demonstrate the issue) //so we can't just put this part into @autoreleasepool block [helper.encoder setBytes:&i length:sizeof(int32_t) atIndex:1]; [helper.encoder setBuffer:buffer offset:0 atIndex:0]; [helper enqueue:gridSize threadgroupSize:threadgroupSize]; [helper finish]; } float *outputData = (float*)(buffer.contents); for (int k = 0; k < arrayLength; k++ ) { NSLog(@"result(%d)= %f", k, outputData[k]); } } return 0; }