Files
XCEngine/engine/assets/builtin/shaders/gaussian-splat-utilities.shader

532 lines
20 KiB
GLSL

Shader "Builtin Gaussian Splat Utilities"
{
HLSLINCLUDE
struct GaussianSplatOtherData
{
float4 rotation;
float4 scaleReserved;
};
struct GaussianSplatViewData
{
float4 clipCenter;
float4 ellipseAxisU;
float4 ellipseAxisV;
float4 colorOpacity;
};
struct GaussianSplatSHData
{
float coefficients[45];
};
struct GaussianSplatChunkData
{
uint colR;
uint colG;
uint colB;
uint colA;
float2 posX;
float2 posY;
float2 posZ;
uint sclX;
uint sclY;
uint sclZ;
uint shR;
uint shG;
uint shB;
};
static const uint GAUSSIAN_SPLAT_CHUNK_SIZE = 256u;
static const float SH_C1 = 0.4886025;
static const float SH_C2[] = { 1.0925484, -1.0925484, 0.3153916, -1.0925484, 0.5462742 };
static const float SH_C3[] = { -0.5900436, 2.8906114, -0.4570458, 0.3731763, -0.4570458, 1.4453057, -0.5900436 };
float3 LoadSHCoefficientTriplet(GaussianSplatSHData data, uint coefficientIndex)
{
return float3(
data.coefficients[coefficientIndex + 0u],
data.coefficients[coefficientIndex + 15u],
data.coefficients[coefficientIndex + 30u]);
}
float3 ShadeGaussianSplatSH(float3 baseColor, GaussianSplatSHData data, float3 direction, uint shOrder)
{
direction *= -1.0;
const float x = direction.x;
const float y = direction.y;
const float z = direction.z;
float3 result = baseColor;
if (shOrder >= 1u)
{
result += SH_C1 * (
-LoadSHCoefficientTriplet(data, 0u) * y +
LoadSHCoefficientTriplet(data, 1u) * z -
LoadSHCoefficientTriplet(data, 2u) * x);
if (shOrder >= 2u)
{
const float xx = x * x;
const float yy = y * y;
const float zz = z * z;
const float xy = x * y;
const float yz = y * z;
const float xz = x * z;
result +=
(SH_C2[0] * xy) * LoadSHCoefficientTriplet(data, 3u) +
(SH_C2[1] * yz) * LoadSHCoefficientTriplet(data, 4u) +
(SH_C2[2] * (2.0 * zz - xx - yy)) * LoadSHCoefficientTriplet(data, 5u) +
(SH_C2[3] * xz) * LoadSHCoefficientTriplet(data, 6u) +
(SH_C2[4] * (xx - yy)) * LoadSHCoefficientTriplet(data, 7u);
if (shOrder >= 3u)
{
result +=
(SH_C3[0] * y * (3.0 * xx - yy)) * LoadSHCoefficientTriplet(data, 8u) +
(SH_C3[1] * xy * z) * LoadSHCoefficientTriplet(data, 9u) +
(SH_C3[2] * y * (4.0 * zz - xx - yy)) * LoadSHCoefficientTriplet(data, 10u) +
(SH_C3[3] * z * (2.0 * zz - 3.0 * xx - 3.0 * yy)) * LoadSHCoefficientTriplet(data, 11u) +
(SH_C3[4] * x * (4.0 * zz - xx - yy)) * LoadSHCoefficientTriplet(data, 12u) +
(SH_C3[5] * z * (xx - yy)) * LoadSHCoefficientTriplet(data, 13u) +
(SH_C3[6] * x * (xx - 3.0 * yy)) * LoadSHCoefficientTriplet(data, 14u);
}
}
}
return max(result, 0.0);
}
uint FloatToSortableUint(float value)
{
const uint rawValue = asuint(value);
const uint mask = (rawValue & 0x80000000u) != 0u ? 0xffffffffu : 0x80000000u;
return rawValue ^ mask;
}
float2 UnpackHalfRange(uint packedValue)
{
return float2(
f16tof32(packedValue & 0xffffu),
f16tof32(packedValue >> 16u));
}
float4 TransformLocalPointToClip(
float3 localPosition,
float4x4 modelMatrix,
float4x4 viewMatrix,
float4x4 projectionMatrix)
{
const float3 worldPosition = mul(modelMatrix, float4(localPosition, 1.0)).xyz;
const float3 viewPosition = mul(viewMatrix, float4(worldPosition, 1.0)).xyz;
return mul(projectionMatrix, float4(viewPosition, 1.0));
}
bool IsChunkDefinitelyOutsideFrustum(
GaussianSplatChunkData chunkData,
uint chunkCount,
uint chunkIndex,
float4x4 modelMatrix,
float4x4 viewMatrix,
float4x4 projectionMatrix)
{
if (chunkCount == 0u || chunkIndex >= chunkCount)
{
return false;
}
float3 localMin = float3(chunkData.posX.x, chunkData.posY.x, chunkData.posZ.x);
float3 localMax = float3(chunkData.posX.y, chunkData.posY.y, chunkData.posZ.y);
if (localMin.x > localMax.x || localMin.y > localMax.y || localMin.z > localMax.z)
{
return true;
}
// Inflate by a conservative 3-sigma envelope derived from the chunk's maximum splat scale.
const float3 maxScale = float3(
UnpackHalfRange(chunkData.sclX).y,
UnpackHalfRange(chunkData.sclY).y,
UnpackHalfRange(chunkData.sclZ).y);
const float radius = max(maxScale.x, max(maxScale.y, maxScale.z)) * 3.0;
localMin -= radius.xxx;
localMax += radius.xxx;
bool allBehind = true;
bool anyBehind = false;
bool outsideLeft = true;
bool outsideRight = true;
bool outsideBottom = true;
bool outsideTop = true;
bool outsideNear = true;
bool outsideFar = true;
[unroll]
for (uint cornerIndex = 0u; cornerIndex < 8u; ++cornerIndex)
{
const float3 localCorner = float3(
(cornerIndex & 1u) != 0u ? localMax.x : localMin.x,
(cornerIndex & 2u) != 0u ? localMax.y : localMin.y,
(cornerIndex & 4u) != 0u ? localMax.z : localMin.z);
const float4 clipCorner = TransformLocalPointToClip(
localCorner,
modelMatrix,
viewMatrix,
projectionMatrix);
const bool behind = clipCorner.w <= 0.0;
allBehind = allBehind && behind;
anyBehind = anyBehind || behind;
if (!behind)
{
outsideLeft = outsideLeft && (clipCorner.x < -clipCorner.w);
outsideRight = outsideRight && (clipCorner.x > clipCorner.w);
outsideBottom = outsideBottom && (clipCorner.y < -clipCorner.w);
outsideTop = outsideTop && (clipCorner.y > clipCorner.w);
outsideNear = outsideNear && (clipCorner.z < 0.0);
outsideFar = outsideFar && (clipCorner.z > clipCorner.w);
}
}
if (allBehind)
{
return true;
}
if (anyBehind)
{
return false;
}
return outsideLeft || outsideRight || outsideBottom || outsideTop || outsideNear || outsideFar;
}
float3x3 CalcMatrixFromRotationScale(float4 rotation, float3 scale)
{
const float x = rotation.x;
const float y = rotation.y;
const float z = rotation.z;
const float w = rotation.w;
const float3x3 rotationMatrix = float3x3(
1.0 - 2.0 * (y * y + z * z), 2.0 * (x * y - w * z), 2.0 * (x * z + w * y),
2.0 * (x * y + w * z), 1.0 - 2.0 * (x * x + z * z), 2.0 * (y * z - w * x),
2.0 * (x * z - w * y), 2.0 * (y * z + w * x), 1.0 - 2.0 * (x * x + y * y));
const float3x3 scaleMatrix = float3x3(
scale.x, 0.0, 0.0,
0.0, scale.y, 0.0,
0.0, 0.0, scale.z);
return mul(rotationMatrix, scaleMatrix);
}
void CalcCovariance3D(float3x3 rotationScaleMatrix, out float3 sigma0, out float3 sigma1)
{
const float3x3 sigma = mul(rotationScaleMatrix, transpose(rotationScaleMatrix));
sigma0 = float3(sigma._m00, sigma._m01, sigma._m02);
sigma1 = float3(sigma._m11, sigma._m12, sigma._m22);
}
float3 CalcCovariance2D(
float3 localPosition,
float3 covariance3D0,
float3 covariance3D1,
float4x4 modelViewMatrix,
float4x4 projectionMatrix,
float2 screenSize)
{
const float3 viewPosition = mul(modelViewMatrix, float4(localPosition, 1.0)).xyz;
if (abs(viewPosition.z) <= 1.0e-5)
{
return float3(0.3, 0.0, 0.3);
}
const float tanFovX = rcp(projectionMatrix[0][0]);
const float tanFovY = rcp(projectionMatrix[1][1]);
const float limitX = 1.3 * tanFovX;
const float limitY = 1.3 * tanFovY;
float3 clampedViewPosition = viewPosition;
clampedViewPosition.x =
clamp(clampedViewPosition.x / clampedViewPosition.z, -limitX, limitX) * clampedViewPosition.z;
clampedViewPosition.y =
clamp(clampedViewPosition.y / clampedViewPosition.z, -limitY, limitY) * clampedViewPosition.z;
const float focalLength = screenSize.x * projectionMatrix[0][0] * 0.5;
const float3x3 jacobian = float3x3(
focalLength / clampedViewPosition.z,
0.0,
-(focalLength * clampedViewPosition.x) / (clampedViewPosition.z * clampedViewPosition.z),
0.0,
focalLength / clampedViewPosition.z,
-(focalLength * clampedViewPosition.y) / (clampedViewPosition.z * clampedViewPosition.z),
0.0,
0.0,
0.0);
const float3x3 worldToView = (float3x3)modelViewMatrix;
const float3x3 transform = mul(jacobian, worldToView);
const float3x3 covariance3D = float3x3(
covariance3D0.x, covariance3D0.y, covariance3D0.z,
covariance3D0.y, covariance3D1.x, covariance3D1.y,
covariance3D0.z, covariance3D1.y, covariance3D1.z);
float3x3 covariance2D = mul(transform, mul(covariance3D, transpose(transform)));
covariance2D._m00 += 0.3;
covariance2D._m11 += 0.3;
return float3(covariance2D._m00, covariance2D._m01, covariance2D._m11);
}
void DecomposeCovariance(float3 covariance2D, out float2 axisU, out float2 axisV)
{
const float diagonal0 = covariance2D.x;
const float diagonal1 = covariance2D.z;
const float offDiagonal = covariance2D.y;
const float mid = 0.5 * (diagonal0 + diagonal1);
const float radius = length(float2((diagonal0 - diagonal1) * 0.5, offDiagonal));
const float lambda0 = mid + radius;
const float lambda1 = max(mid - radius, 0.1);
float2 basis = normalize(float2(offDiagonal, lambda0 - diagonal0));
if (all(abs(basis) < 1.0e-5))
{
basis = float2(1.0, 0.0);
}
basis.y = -basis.y;
const float maxAxisLength = 4096.0;
axisU = min(sqrt(2.0 * lambda0), maxAxisLength) * basis;
axisV = min(sqrt(2.0 * lambda1), maxAxisLength) * float2(basis.y, -basis.x);
}
ENDHLSL
SubShader
{
Pass
{
Name "GaussianSplatMarkVisibleChunks"
HLSLPROGRAM
#pragma target 4.5
#pragma compute GaussianSplatMarkVisibleChunksCS
cbuffer PerObjectConstants
{
float4x4 gProjectionMatrix;
float4x4 gViewMatrix;
float4x4 gModelMatrix;
float4x4 gWorldToObjectMatrix;
float4 gCameraRight;
float4 gCameraUp;
float4 gCameraWorldPos;
float4 gScreenParams;
float4 gSplatParams;
};
StructuredBuffer<GaussianSplatChunkData> GaussianSplatChunks;
RWStructuredBuffer<uint> GaussianSplatVisibleChunks;
[numthreads(64, 1, 1)]
void GaussianSplatMarkVisibleChunksCS(uint3 dispatchThreadId : SV_DispatchThreadID)
{
const uint chunkCount = (uint)gSplatParams.w;
const uint chunkIndex = dispatchThreadId.x;
if (chunkIndex >= chunkCount)
{
return;
}
const GaussianSplatChunkData chunkData = GaussianSplatChunks[chunkIndex];
uint visibleFlag = 1u;
if (chunkData.posX.x > chunkData.posX.y ||
chunkData.posY.x > chunkData.posY.y ||
chunkData.posZ.x > chunkData.posZ.y ||
IsChunkDefinitelyOutsideFrustum(
chunkData,
chunkCount,
chunkIndex,
gModelMatrix,
gViewMatrix,
gProjectionMatrix))
{
visibleFlag = 0u;
}
GaussianSplatVisibleChunks[chunkIndex] = visibleFlag;
}
ENDHLSL
}
Pass
{
Name "GaussianSplatPrepareOrder"
HLSLPROGRAM
#pragma target 4.5
#pragma compute GaussianSplatPrepareOrderCS
cbuffer PerObjectConstants
{
float4x4 gProjectionMatrix;
float4x4 gViewMatrix;
float4x4 gModelMatrix;
float4x4 gWorldToObjectMatrix;
float4 gCameraRight;
float4 gCameraUp;
float4 gCameraWorldPos;
float4 gScreenParams;
float4 gSplatParams;
};
StructuredBuffer<float4> GaussianSplatPositions;
StructuredBuffer<GaussianSplatOtherData> GaussianSplatOther;
StructuredBuffer<float4> GaussianSplatColor;
StructuredBuffer<GaussianSplatSHData> GaussianSplatSH;
StructuredBuffer<uint> GaussianSplatVisibleChunks : register(t0, space3);
RWStructuredBuffer<uint> GaussianSplatSortDistances;
RWStructuredBuffer<uint> GaussianSplatOrderBuffer;
RWStructuredBuffer<GaussianSplatViewData> GaussianSplatViewDataBuffer;
[numthreads(64, 1, 1)]
void GaussianSplatPrepareOrderCS(uint3 dispatchThreadId : SV_DispatchThreadID)
{
const uint splatCount = (uint)gSplatParams.x;
const uint sortCapacity = max((uint)gSplatParams.y, splatCount);
const uint index = dispatchThreadId.x;
if (index >= sortCapacity)
{
return;
}
if (index >= splatCount)
{
GaussianSplatSortDistances[index] = 0xffffffffu;
GaussianSplatOrderBuffer[index] = 0u;
return;
}
GaussianSplatOrderBuffer[index] = index;
GaussianSplatViewData viewData = (GaussianSplatViewData)0;
const float3 localCenter = GaussianSplatPositions[index].xyz;
const GaussianSplatOtherData otherData = GaussianSplatOther[index];
const float4 colorOpacity = GaussianSplatColor[index];
const GaussianSplatSHData shData = GaussianSplatSH[index];
const uint chunkIndex = index / GAUSSIAN_SPLAT_CHUNK_SIZE;
const uint chunkCount = (uint)gSplatParams.w;
const uint shOrder = min((uint)gSplatParams.z, 3u);
if (chunkIndex >= chunkCount || GaussianSplatVisibleChunks[chunkIndex] == 0u)
{
GaussianSplatSortDistances[index] = 0xffffffffu;
GaussianSplatViewDataBuffer[index] = viewData;
return;
}
const float3 worldCenter = mul(gModelMatrix, float4(localCenter, 1.0)).xyz;
const float3 viewCenter = mul(gViewMatrix, float4(worldCenter, 1.0)).xyz;
GaussianSplatSortDistances[index] = FloatToSortableUint(viewCenter.z);
const float4 clipCenter = mul(gProjectionMatrix, float4(viewCenter, 1.0));
const float nearClip = max(gCameraWorldPos.w, 1.0e-4);
if (clipCenter.w > 0.0 && viewCenter.z > nearClip)
{
const float4x4 modelViewMatrix = mul(gViewMatrix, gModelMatrix);
const float3x3 rotationScaleMatrix =
CalcMatrixFromRotationScale(otherData.rotation, otherData.scaleReserved.xyz);
float3 covariance3D0 = 0.0;
float3 covariance3D1 = 0.0;
CalcCovariance3D(rotationScaleMatrix, covariance3D0, covariance3D1);
const float3 covariance2D = CalcCovariance2D(
localCenter,
covariance3D0,
covariance3D1,
modelViewMatrix,
gProjectionMatrix,
gScreenParams.xy);
float2 axisU = 0.0;
float2 axisV = 0.0;
DecomposeCovariance(covariance2D, axisU, axisV);
viewData.clipCenter = clipCenter;
viewData.ellipseAxisU = float4(axisU, 0.0, 0.0);
viewData.ellipseAxisV = float4(axisV, 0.0, 0.0);
float3 shadedColor = colorOpacity.rgb;
if (shOrder > 0u)
{
const float3 worldViewDirection = gCameraWorldPos.xyz - worldCenter;
const float3 objectViewDirection = normalize(
mul((float3x3)gWorldToObjectMatrix, worldViewDirection));
shadedColor = ShadeGaussianSplatSH(colorOpacity.rgb, shData, objectViewDirection, shOrder);
}
viewData.colorOpacity = float4(shadedColor, colorOpacity.a);
}
GaussianSplatViewDataBuffer[index] = viewData;
}
ENDHLSL
}
Pass
{
Name "GaussianSplatBitonicSort"
HLSLPROGRAM
#pragma target 4.5
#pragma compute GaussianSplatBitonicSortCS
cbuffer PerObjectConstants
{
float4x4 gProjectionMatrix;
float4x4 gViewMatrix;
float4x4 gModelMatrix;
float4x4 gWorldToObjectMatrix;
float4 gCameraRight;
float4 gCameraUp;
float4 gCameraWorldPos;
float4 gScreenParams;
float4 gSplatParams;
};
RWStructuredBuffer<uint> GaussianSplatSortDistances;
RWStructuredBuffer<uint> GaussianSplatOrderBuffer;
[numthreads(256, 1, 1)]
void GaussianSplatBitonicSortCS(uint3 dispatchThreadId : SV_DispatchThreadID)
{
const uint sortCapacity = (uint)gSplatParams.y;
const uint partnerMask = (uint)gSplatParams.z;
const uint levelMask = (uint)gSplatParams.w;
const uint index = dispatchThreadId.x;
if (index >= sortCapacity)
{
return;
}
const uint partnerIndex = index ^ partnerMask;
if (partnerIndex >= sortCapacity || partnerIndex <= index)
{
return;
}
const uint leftDistance = GaussianSplatSortDistances[index];
const uint rightDistance = GaussianSplatSortDistances[partnerIndex];
const uint leftOrder = GaussianSplatOrderBuffer[index];
const uint rightOrder = GaussianSplatOrderBuffer[partnerIndex];
const bool ascending = (index & levelMask) == 0u;
const bool shouldSwap = ascending ? (leftDistance > rightDistance) : (leftDistance < rightDistance);
if (!shouldSwap)
{
return;
}
GaussianSplatSortDistances[index] = rightDistance;
GaussianSplatSortDistances[partnerIndex] = leftDistance;
GaussianSplatOrderBuffer[index] = rightOrder;
GaussianSplatOrderBuffer[partnerIndex] = leftOrder;
}
ENDHLSL
}
}
}