Hi , I am a new one for using metal shader.
and I need using compute shader to caculate one image's mean value from all pixels in this image, then I will write this mean value to output texture for using.
Here is my shader kenerl , so anybody can help me to check it is right ?
kernel void instance_normalize(texture2d_array<uint, access::read> texture_in [[texture(0)]],
texture2d_array<float, access::write> texture_out [[texture(1)]],
ushort3 groupdim_in_grid [[threadgroups_per_grid]],
ushort3 groupdim_idx [[threadgroup_position_in_grid]],
ushort3 threaddim_in_group [[threads_per_threadgroup]],
ushort3 threaddim_idx [[thread_position_in_threadgroup]])
{
uint width = texture_in.get_width();
uint height = texture_in.get_height();
threadgroup atomic_uint rgbsum[3] ;
threadgroup float rgbmean[3];
atomic_store_explicit( rgbsum, 0, memory_order_relaxed );
atomic_store_explicit( (rgbsum+1), 0, memory_order_relaxed );
atomic_store_explicit( (rgbsum+2), 0, memory_order_relaxed );
/
for (uint j = groupdim_idx.y * threaddim_in_group.y + threaddim_idx.y; j < height; j += threaddim_in_group.y * groupdim_in_grid.y)
{
ushort tid = threaddim_idx.x;
threadgroup uint3 *sdata;
ushort i = groupdim_idx.x * (threaddim_in_group.x*2) + threaddim_idx.x;
/
ushort2 pos = ushort2(i,j);
ushort2 pos0= ushort2(i+threaddim_in_group.x, j);
sdata[tid] = texture_in.read(pos,threaddim_idx.z).xyz + texture_in.read(pos0,threaddim_idx.z).xyz ;
threadgroup_barrier(mem_flags::mem_threadgroup);
/
for(ushort s = threaddim_in_group.x/2 ; s>0; s>>=1)
{
if(tid < s)
{
sdata[tid] += sdata[tid+s];
}
threadgroup_barrier(mem_flags::mem_threadgroup);
}
if(tid == 0)
{
atomic_fetch_add_explicit(rgbsum, sdata[0].x, memory_order_relaxed );
atomic_fetch_add_explicit((rgbsum+1), sdata[0].y, memory_order_relaxed );
atomic_fetch_add_explicit((rgbsum+2), sdata[0].z, memory_order_relaxed );
}
}
float r = (float)rgbsum[0]/(float)(width*height);//ERROR:ios convert 'threadgroup metal::atomic_uint' to 'float'
float g = (float)rgbsum[1]/(float)(width*height);
float b = (float)rgbsum[2]/(float)(width*height);
float4 a = float4(r,g,b,1.0);
texture_out.write(a, threaddim_idx);
}
So my problem is how to convert auto_uint reslut into output?