Shader "Builtin Gaussian Splat Utilities" { HLSLINCLUDE struct GaussianSplatOtherData { float4 rotation; float4 scaleReserved; }; struct GaussianSplatViewData { float4 clipCenter; float4 ellipseAxisU; float4 ellipseAxisV; float4 colorOpacity; }; struct GaussianSplatSHData { float coefficients[45]; }; static const float SH_C1 = 0.4886025; static const float SH_C2[] = { 1.0925484, -1.0925484, 0.3153916, -1.0925484, 0.5462742 }; static const float SH_C3[] = { -0.5900436, 2.8906114, -0.4570458, 0.3731763, -0.4570458, 1.4453057, -0.5900436 }; float3 LoadSHCoefficientTriplet(GaussianSplatSHData data, uint coefficientIndex) { return float3( data.coefficients[coefficientIndex + 0u], data.coefficients[coefficientIndex + 15u], data.coefficients[coefficientIndex + 30u]); } float3 ShadeGaussianSplatSH(float3 baseColor, GaussianSplatSHData data, float3 direction, uint shOrder) { direction *= -1.0; const float x = direction.x; const float y = direction.y; const float z = direction.z; float3 result = baseColor; if (shOrder >= 1u) { result += SH_C1 * ( -LoadSHCoefficientTriplet(data, 0u) * y + LoadSHCoefficientTriplet(data, 1u) * z - LoadSHCoefficientTriplet(data, 2u) * x); if (shOrder >= 2u) { const float xx = x * x; const float yy = y * y; const float zz = z * z; const float xy = x * y; const float yz = y * z; const float xz = x * z; result += (SH_C2[0] * xy) * LoadSHCoefficientTriplet(data, 3u) + (SH_C2[1] * yz) * LoadSHCoefficientTriplet(data, 4u) + (SH_C2[2] * (2.0 * zz - xx - yy)) * LoadSHCoefficientTriplet(data, 5u) + (SH_C2[3] * xz) * LoadSHCoefficientTriplet(data, 6u) + (SH_C2[4] * (xx - yy)) * LoadSHCoefficientTriplet(data, 7u); if (shOrder >= 3u) { result += (SH_C3[0] * y * (3.0 * xx - yy)) * LoadSHCoefficientTriplet(data, 8u) + (SH_C3[1] * xy * z) * LoadSHCoefficientTriplet(data, 9u) + (SH_C3[2] * y * (4.0 * zz - xx - yy)) * LoadSHCoefficientTriplet(data, 10u) + (SH_C3[3] * z * (2.0 * zz - 3.0 * xx - 3.0 * yy)) * LoadSHCoefficientTriplet(data, 11u) + (SH_C3[4] * x * (4.0 * zz - xx - yy)) * LoadSHCoefficientTriplet(data, 12u) + (SH_C3[5] * z * (xx - yy)) * LoadSHCoefficientTriplet(data, 13u) + (SH_C3[6] * x * (xx - 3.0 * yy)) * LoadSHCoefficientTriplet(data, 14u); } } } return max(result, 0.0); } uint FloatToSortableUint(float value) { const uint rawValue = asuint(value); const uint mask = (rawValue & 0x80000000u) != 0u ? 0xffffffffu : 0x80000000u; return rawValue ^ mask; } float3x3 CalcMatrixFromRotationScale(float4 rotation, float3 scale) { const float x = rotation.x; const float y = rotation.y; const float z = rotation.z; const float w = rotation.w; const float3x3 rotationMatrix = float3x3( 1.0 - 2.0 * (y * y + z * z), 2.0 * (x * y - w * z), 2.0 * (x * z + w * y), 2.0 * (x * y + w * z), 1.0 - 2.0 * (x * x + z * z), 2.0 * (y * z - w * x), 2.0 * (x * z - w * y), 2.0 * (y * z + w * x), 1.0 - 2.0 * (x * x + y * y)); const float3x3 scaleMatrix = float3x3( scale.x, 0.0, 0.0, 0.0, scale.y, 0.0, 0.0, 0.0, scale.z); return mul(rotationMatrix, scaleMatrix); } void CalcCovariance3D(float3x3 rotationScaleMatrix, out float3 sigma0, out float3 sigma1) { const float3x3 sigma = mul(rotationScaleMatrix, transpose(rotationScaleMatrix)); sigma0 = float3(sigma._m00, sigma._m01, sigma._m02); sigma1 = float3(sigma._m11, sigma._m12, sigma._m22); } float3 CalcCovariance2D( float3 viewPosition, float3 covariance3D0, float3 covariance3D1, float4x4 viewMatrix, float4x4 projectionMatrix, float2 screenSize) { if (abs(viewPosition.z) <= 1.0e-5) { return float3(0.3, 0.0, 0.3); } const float aspect = projectionMatrix[0][0] / projectionMatrix[1][1]; const float tanFovX = rcp(projectionMatrix[0][0]); const float tanFovY = rcp(projectionMatrix[1][1] * aspect); const float limitX = 1.3 * tanFovX; const float limitY = 1.3 * tanFovY; float3 clampedViewPosition = viewPosition; clampedViewPosition.x = clamp(clampedViewPosition.x / clampedViewPosition.z, -limitX, limitX) * clampedViewPosition.z; clampedViewPosition.y = clamp(clampedViewPosition.y / clampedViewPosition.z, -limitY, limitY) * clampedViewPosition.z; const float focalLength = screenSize.x * projectionMatrix[0][0] * 0.5; const float3x3 jacobian = float3x3( focalLength / clampedViewPosition.z, 0.0, -(focalLength * clampedViewPosition.x) / (clampedViewPosition.z * clampedViewPosition.z), 0.0, focalLength / clampedViewPosition.z, -(focalLength * clampedViewPosition.y) / (clampedViewPosition.z * clampedViewPosition.z), 0.0, 0.0, 0.0); const float3x3 worldToView = (float3x3)viewMatrix; const float3x3 transform = mul(jacobian, worldToView); const float3x3 covariance3D = float3x3( covariance3D0.x, covariance3D0.y, covariance3D0.z, covariance3D0.y, covariance3D1.x, covariance3D1.y, covariance3D0.z, covariance3D1.y, covariance3D1.z); float3x3 covariance2D = mul(transform, mul(covariance3D, transpose(transform))); covariance2D._m00 += 0.3; covariance2D._m11 += 0.3; return float3(covariance2D._m00, covariance2D._m01, covariance2D._m11); } void DecomposeCovariance(float3 covariance2D, out float2 axisU, out float2 axisV) { const float diagonal0 = covariance2D.x; const float diagonal1 = covariance2D.z; const float offDiagonal = covariance2D.y; const float mid = 0.5 * (diagonal0 + diagonal1); const float radius = length(float2((diagonal0 - diagonal1) * 0.5, offDiagonal)); const float lambda0 = max(mid + radius, 0.1); const float lambda1 = max(mid - radius, 0.1); float2 basis = normalize(float2(offDiagonal, lambda0 - diagonal0)); if (all(abs(basis) < 1.0e-5)) { basis = float2(1.0, 0.0); } basis.y = -basis.y; const float maxAxisLength = 4096.0; axisU = min(sqrt(2.0 * lambda0), maxAxisLength) * basis; axisV = min(sqrt(2.0 * lambda1), maxAxisLength) * float2(basis.y, -basis.x); } ENDHLSL SubShader { Pass { Name "GaussianSplatPrepareOrder" HLSLPROGRAM #pragma target 4.5 #pragma compute GaussianSplatPrepareOrderCS cbuffer PerObjectConstants { float4x4 gProjectionMatrix; float4x4 gViewMatrix; float4x4 gModelMatrix; float4x4 gWorldToObjectMatrix; float4 gCameraRight; float4 gCameraUp; float4 gCameraWorldPos; float4 gScreenParams; float4 gSplatParams; }; StructuredBuffer GaussianSplatPositions; StructuredBuffer GaussianSplatOther; StructuredBuffer GaussianSplatColor; StructuredBuffer GaussianSplatSH; RWStructuredBuffer GaussianSplatSortDistances; RWStructuredBuffer GaussianSplatOrderBuffer; RWStructuredBuffer GaussianSplatViewDataBuffer; [numthreads(64, 1, 1)] void GaussianSplatPrepareOrderCS(uint3 dispatchThreadId : SV_DispatchThreadID) { const uint splatCount = (uint)gSplatParams.x; const uint sortCapacity = max((uint)gSplatParams.y, splatCount); const uint index = dispatchThreadId.x; if (index >= sortCapacity) { return; } if (index >= splatCount) { GaussianSplatSortDistances[index] = 0xffffffffu; GaussianSplatOrderBuffer[index] = 0u; return; } GaussianSplatOrderBuffer[index] = index; GaussianSplatViewData viewData = (GaussianSplatViewData)0; const float3 localCenter = GaussianSplatPositions[index].xyz; const GaussianSplatOtherData otherData = GaussianSplatOther[index]; const float4 colorOpacity = GaussianSplatColor[index]; const GaussianSplatSHData shData = GaussianSplatSH[index]; const uint shOrder = min((uint)gSplatParams.z, 3u); const float3 worldCenter = mul(gModelMatrix, float4(localCenter, 1.0)).xyz; const float3 viewCenter = mul(gViewMatrix, float4(worldCenter, 1.0)).xyz; GaussianSplatSortDistances[index] = FloatToSortableUint(viewCenter.z); const float4 clipCenter = mul(gProjectionMatrix, float4(viewCenter, 1.0)); if (clipCenter.w > 0.0) { const float3x3 modelLinear = (float3x3)gModelMatrix; const float3x3 rotationScaleMatrix = CalcMatrixFromRotationScale(otherData.rotation, otherData.scaleReserved.xyz); const float3x3 worldRotationScale = mul(modelLinear, rotationScaleMatrix); float3 covariance3D0 = 0.0; float3 covariance3D1 = 0.0; CalcCovariance3D(worldRotationScale, covariance3D0, covariance3D1); const float3 covariance2D = CalcCovariance2D( viewCenter, covariance3D0, covariance3D1, gViewMatrix, gProjectionMatrix, gScreenParams.xy); float2 axisU = 0.0; float2 axisV = 0.0; DecomposeCovariance(covariance2D, axisU, axisV); viewData.clipCenter = clipCenter; viewData.ellipseAxisU = float4(axisU, 0.0, 0.0); viewData.ellipseAxisV = float4(axisV, 0.0, 0.0); float3 shadedColor = colorOpacity.rgb; if (shOrder > 0u) { const float3 worldViewDirection = gCameraWorldPos.xyz - worldCenter; const float3 objectViewDirection = normalize( mul((float3x3)gWorldToObjectMatrix, worldViewDirection)); shadedColor = ShadeGaussianSplatSH(colorOpacity.rgb, shData, objectViewDirection, shOrder); } viewData.colorOpacity = float4(shadedColor, colorOpacity.a); } GaussianSplatViewDataBuffer[index] = viewData; } ENDHLSL } Pass { Name "GaussianSplatBitonicSort" HLSLPROGRAM #pragma target 4.5 #pragma compute GaussianSplatBitonicSortCS cbuffer PerObjectConstants { float4x4 gProjectionMatrix; float4x4 gViewMatrix; float4x4 gModelMatrix; float4x4 gWorldToObjectMatrix; float4 gCameraRight; float4 gCameraUp; float4 gCameraWorldPos; float4 gScreenParams; float4 gSplatParams; }; RWStructuredBuffer GaussianSplatSortDistances; RWStructuredBuffer GaussianSplatOrderBuffer; [numthreads(256, 1, 1)] void GaussianSplatBitonicSortCS(uint3 dispatchThreadId : SV_DispatchThreadID) { const uint sortCapacity = (uint)gSplatParams.y; const uint partnerMask = (uint)gSplatParams.z; const uint levelMask = (uint)gSplatParams.w; const uint index = dispatchThreadId.x; if (index >= sortCapacity) { return; } const uint partnerIndex = index ^ partnerMask; if (partnerIndex >= sortCapacity || partnerIndex <= index) { return; } const uint leftDistance = GaussianSplatSortDistances[index]; const uint rightDistance = GaussianSplatSortDistances[partnerIndex]; const uint leftOrder = GaussianSplatOrderBuffer[index]; const uint rightOrder = GaussianSplatOrderBuffer[partnerIndex]; const bool ascending = (index & levelMask) == 0u; const bool shouldSwap = ascending ? (leftDistance > rightDistance) : (leftDistance < rightDistance); if (!shouldSwap) { return; } GaussianSplatSortDistances[index] = rightDistance; GaussianSplatSortDistances[partnerIndex] = leftDistance; GaussianSplatOrderBuffer[index] = rightOrder; GaussianSplatOrderBuffer[partnerIndex] = leftOrder; } ENDHLSL } } }