532 lines
20 KiB
GLSL
532 lines
20 KiB
GLSL
Shader "Builtin Gaussian Splat Utilities"
|
|
{
|
|
HLSLINCLUDE
|
|
struct GaussianSplatOtherData
|
|
{
|
|
float4 rotation;
|
|
float4 scaleReserved;
|
|
};
|
|
|
|
struct GaussianSplatViewData
|
|
{
|
|
float4 clipCenter;
|
|
float4 ellipseAxisU;
|
|
float4 ellipseAxisV;
|
|
float4 colorOpacity;
|
|
};
|
|
|
|
struct GaussianSplatSHData
|
|
{
|
|
float coefficients[45];
|
|
};
|
|
|
|
struct GaussianSplatChunkData
|
|
{
|
|
uint colR;
|
|
uint colG;
|
|
uint colB;
|
|
uint colA;
|
|
float2 posX;
|
|
float2 posY;
|
|
float2 posZ;
|
|
uint sclX;
|
|
uint sclY;
|
|
uint sclZ;
|
|
uint shR;
|
|
uint shG;
|
|
uint shB;
|
|
};
|
|
|
|
static const uint GAUSSIAN_SPLAT_CHUNK_SIZE = 256u;
|
|
|
|
static const float SH_C1 = 0.4886025;
|
|
static const float SH_C2[] = { 1.0925484, -1.0925484, 0.3153916, -1.0925484, 0.5462742 };
|
|
static const float SH_C3[] = { -0.5900436, 2.8906114, -0.4570458, 0.3731763, -0.4570458, 1.4453057, -0.5900436 };
|
|
|
|
float3 LoadSHCoefficientTriplet(GaussianSplatSHData data, uint coefficientIndex)
|
|
{
|
|
return float3(
|
|
data.coefficients[coefficientIndex + 0u],
|
|
data.coefficients[coefficientIndex + 15u],
|
|
data.coefficients[coefficientIndex + 30u]);
|
|
}
|
|
|
|
float3 ShadeGaussianSplatSH(float3 baseColor, GaussianSplatSHData data, float3 direction, uint shOrder)
|
|
{
|
|
direction *= -1.0;
|
|
|
|
const float x = direction.x;
|
|
const float y = direction.y;
|
|
const float z = direction.z;
|
|
float3 result = baseColor;
|
|
|
|
if (shOrder >= 1u)
|
|
{
|
|
result += SH_C1 * (
|
|
-LoadSHCoefficientTriplet(data, 0u) * y +
|
|
LoadSHCoefficientTriplet(data, 1u) * z -
|
|
LoadSHCoefficientTriplet(data, 2u) * x);
|
|
|
|
if (shOrder >= 2u)
|
|
{
|
|
const float xx = x * x;
|
|
const float yy = y * y;
|
|
const float zz = z * z;
|
|
const float xy = x * y;
|
|
const float yz = y * z;
|
|
const float xz = x * z;
|
|
|
|
result +=
|
|
(SH_C2[0] * xy) * LoadSHCoefficientTriplet(data, 3u) +
|
|
(SH_C2[1] * yz) * LoadSHCoefficientTriplet(data, 4u) +
|
|
(SH_C2[2] * (2.0 * zz - xx - yy)) * LoadSHCoefficientTriplet(data, 5u) +
|
|
(SH_C2[3] * xz) * LoadSHCoefficientTriplet(data, 6u) +
|
|
(SH_C2[4] * (xx - yy)) * LoadSHCoefficientTriplet(data, 7u);
|
|
|
|
if (shOrder >= 3u)
|
|
{
|
|
result +=
|
|
(SH_C3[0] * y * (3.0 * xx - yy)) * LoadSHCoefficientTriplet(data, 8u) +
|
|
(SH_C3[1] * xy * z) * LoadSHCoefficientTriplet(data, 9u) +
|
|
(SH_C3[2] * y * (4.0 * zz - xx - yy)) * LoadSHCoefficientTriplet(data, 10u) +
|
|
(SH_C3[3] * z * (2.0 * zz - 3.0 * xx - 3.0 * yy)) * LoadSHCoefficientTriplet(data, 11u) +
|
|
(SH_C3[4] * x * (4.0 * zz - xx - yy)) * LoadSHCoefficientTriplet(data, 12u) +
|
|
(SH_C3[5] * z * (xx - yy)) * LoadSHCoefficientTriplet(data, 13u) +
|
|
(SH_C3[6] * x * (xx - 3.0 * yy)) * LoadSHCoefficientTriplet(data, 14u);
|
|
}
|
|
}
|
|
}
|
|
|
|
return max(result, 0.0);
|
|
}
|
|
|
|
uint FloatToSortableUint(float value)
|
|
{
|
|
const uint rawValue = asuint(value);
|
|
const uint mask = (rawValue & 0x80000000u) != 0u ? 0xffffffffu : 0x80000000u;
|
|
return rawValue ^ mask;
|
|
}
|
|
|
|
float2 UnpackHalfRange(uint packedValue)
|
|
{
|
|
return float2(
|
|
f16tof32(packedValue & 0xffffu),
|
|
f16tof32(packedValue >> 16u));
|
|
}
|
|
|
|
float4 TransformLocalPointToClip(
|
|
float3 localPosition,
|
|
float4x4 modelMatrix,
|
|
float4x4 viewMatrix,
|
|
float4x4 projectionMatrix)
|
|
{
|
|
const float3 worldPosition = mul(modelMatrix, float4(localPosition, 1.0)).xyz;
|
|
const float3 viewPosition = mul(viewMatrix, float4(worldPosition, 1.0)).xyz;
|
|
return mul(projectionMatrix, float4(viewPosition, 1.0));
|
|
}
|
|
|
|
bool IsChunkDefinitelyOutsideFrustum(
|
|
GaussianSplatChunkData chunkData,
|
|
uint chunkCount,
|
|
uint chunkIndex,
|
|
float4x4 modelMatrix,
|
|
float4x4 viewMatrix,
|
|
float4x4 projectionMatrix)
|
|
{
|
|
if (chunkCount == 0u || chunkIndex >= chunkCount)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
float3 localMin = float3(chunkData.posX.x, chunkData.posY.x, chunkData.posZ.x);
|
|
float3 localMax = float3(chunkData.posX.y, chunkData.posY.y, chunkData.posZ.y);
|
|
if (localMin.x > localMax.x || localMin.y > localMax.y || localMin.z > localMax.z)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
// Inflate by a conservative 3-sigma envelope derived from the chunk's maximum splat scale.
|
|
const float3 maxScale = float3(
|
|
UnpackHalfRange(chunkData.sclX).y,
|
|
UnpackHalfRange(chunkData.sclY).y,
|
|
UnpackHalfRange(chunkData.sclZ).y);
|
|
const float radius = max(maxScale.x, max(maxScale.y, maxScale.z)) * 3.0;
|
|
localMin -= radius.xxx;
|
|
localMax += radius.xxx;
|
|
|
|
bool allBehind = true;
|
|
bool anyBehind = false;
|
|
bool outsideLeft = true;
|
|
bool outsideRight = true;
|
|
bool outsideBottom = true;
|
|
bool outsideTop = true;
|
|
bool outsideNear = true;
|
|
bool outsideFar = true;
|
|
|
|
[unroll]
|
|
for (uint cornerIndex = 0u; cornerIndex < 8u; ++cornerIndex)
|
|
{
|
|
const float3 localCorner = float3(
|
|
(cornerIndex & 1u) != 0u ? localMax.x : localMin.x,
|
|
(cornerIndex & 2u) != 0u ? localMax.y : localMin.y,
|
|
(cornerIndex & 4u) != 0u ? localMax.z : localMin.z);
|
|
const float4 clipCorner = TransformLocalPointToClip(
|
|
localCorner,
|
|
modelMatrix,
|
|
viewMatrix,
|
|
projectionMatrix);
|
|
const bool behind = clipCorner.w <= 0.0;
|
|
allBehind = allBehind && behind;
|
|
anyBehind = anyBehind || behind;
|
|
|
|
if (!behind)
|
|
{
|
|
outsideLeft = outsideLeft && (clipCorner.x < -clipCorner.w);
|
|
outsideRight = outsideRight && (clipCorner.x > clipCorner.w);
|
|
outsideBottom = outsideBottom && (clipCorner.y < -clipCorner.w);
|
|
outsideTop = outsideTop && (clipCorner.y > clipCorner.w);
|
|
outsideNear = outsideNear && (clipCorner.z < 0.0);
|
|
outsideFar = outsideFar && (clipCorner.z > clipCorner.w);
|
|
}
|
|
}
|
|
|
|
if (allBehind)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
if (anyBehind)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
return outsideLeft || outsideRight || outsideBottom || outsideTop || outsideNear || outsideFar;
|
|
}
|
|
|
|
float3x3 CalcMatrixFromRotationScale(float4 rotation, float3 scale)
|
|
{
|
|
const float x = rotation.x;
|
|
const float y = rotation.y;
|
|
const float z = rotation.z;
|
|
const float w = rotation.w;
|
|
|
|
const float3x3 rotationMatrix = float3x3(
|
|
1.0 - 2.0 * (y * y + z * z), 2.0 * (x * y - w * z), 2.0 * (x * z + w * y),
|
|
2.0 * (x * y + w * z), 1.0 - 2.0 * (x * x + z * z), 2.0 * (y * z - w * x),
|
|
2.0 * (x * z - w * y), 2.0 * (y * z + w * x), 1.0 - 2.0 * (x * x + y * y));
|
|
const float3x3 scaleMatrix = float3x3(
|
|
scale.x, 0.0, 0.0,
|
|
0.0, scale.y, 0.0,
|
|
0.0, 0.0, scale.z);
|
|
return mul(rotationMatrix, scaleMatrix);
|
|
}
|
|
|
|
void CalcCovariance3D(float3x3 rotationScaleMatrix, out float3 sigma0, out float3 sigma1)
|
|
{
|
|
const float3x3 sigma = mul(rotationScaleMatrix, transpose(rotationScaleMatrix));
|
|
sigma0 = float3(sigma._m00, sigma._m01, sigma._m02);
|
|
sigma1 = float3(sigma._m11, sigma._m12, sigma._m22);
|
|
}
|
|
|
|
float3 CalcCovariance2D(
|
|
float3 viewPosition,
|
|
float3 covariance3D0,
|
|
float3 covariance3D1,
|
|
float4x4 viewMatrix,
|
|
float4x4 projectionMatrix,
|
|
float2 screenSize)
|
|
{
|
|
if (abs(viewPosition.z) <= 1.0e-5)
|
|
{
|
|
return float3(0.3, 0.0, 0.3);
|
|
}
|
|
|
|
const float aspect = projectionMatrix[0][0] / projectionMatrix[1][1];
|
|
const float tanFovX = rcp(projectionMatrix[0][0]);
|
|
const float tanFovY = rcp(projectionMatrix[1][1] * aspect);
|
|
const float limitX = 1.3 * tanFovX;
|
|
const float limitY = 1.3 * tanFovY;
|
|
|
|
float3 clampedViewPosition = viewPosition;
|
|
clampedViewPosition.x =
|
|
clamp(clampedViewPosition.x / clampedViewPosition.z, -limitX, limitX) * clampedViewPosition.z;
|
|
clampedViewPosition.y =
|
|
clamp(clampedViewPosition.y / clampedViewPosition.z, -limitY, limitY) * clampedViewPosition.z;
|
|
|
|
const float focalLength = screenSize.x * projectionMatrix[0][0] * 0.5;
|
|
const float3x3 jacobian = float3x3(
|
|
focalLength / clampedViewPosition.z,
|
|
0.0,
|
|
-(focalLength * clampedViewPosition.x) / (clampedViewPosition.z * clampedViewPosition.z),
|
|
0.0,
|
|
focalLength / clampedViewPosition.z,
|
|
-(focalLength * clampedViewPosition.y) / (clampedViewPosition.z * clampedViewPosition.z),
|
|
0.0,
|
|
0.0,
|
|
0.0);
|
|
const float3x3 worldToView = (float3x3)viewMatrix;
|
|
const float3x3 transform = mul(jacobian, worldToView);
|
|
const float3x3 covariance3D = float3x3(
|
|
covariance3D0.x, covariance3D0.y, covariance3D0.z,
|
|
covariance3D0.y, covariance3D1.x, covariance3D1.y,
|
|
covariance3D0.z, covariance3D1.y, covariance3D1.z);
|
|
float3x3 covariance2D = mul(transform, mul(covariance3D, transpose(transform)));
|
|
|
|
covariance2D._m00 += 0.3;
|
|
covariance2D._m11 += 0.3;
|
|
return float3(covariance2D._m00, covariance2D._m01, covariance2D._m11);
|
|
}
|
|
|
|
void DecomposeCovariance(float3 covariance2D, out float2 axisU, out float2 axisV)
|
|
{
|
|
const float diagonal0 = covariance2D.x;
|
|
const float diagonal1 = covariance2D.z;
|
|
const float offDiagonal = covariance2D.y;
|
|
const float mid = 0.5 * (diagonal0 + diagonal1);
|
|
const float radius = length(float2((diagonal0 - diagonal1) * 0.5, offDiagonal));
|
|
const float lambda0 = max(mid + radius, 0.1);
|
|
const float lambda1 = max(mid - radius, 0.1);
|
|
|
|
float2 basis = normalize(float2(offDiagonal, lambda0 - diagonal0));
|
|
if (all(abs(basis) < 1.0e-5))
|
|
{
|
|
basis = float2(1.0, 0.0);
|
|
}
|
|
|
|
basis.y = -basis.y;
|
|
const float maxAxisLength = 4096.0;
|
|
axisU = min(sqrt(2.0 * lambda0), maxAxisLength) * basis;
|
|
axisV = min(sqrt(2.0 * lambda1), maxAxisLength) * float2(basis.y, -basis.x);
|
|
}
|
|
ENDHLSL
|
|
|
|
SubShader
|
|
{
|
|
Pass
|
|
{
|
|
Name "GaussianSplatMarkVisibleChunks"
|
|
HLSLPROGRAM
|
|
#pragma target 4.5
|
|
#pragma compute GaussianSplatMarkVisibleChunksCS
|
|
|
|
cbuffer PerObjectConstants
|
|
{
|
|
float4x4 gProjectionMatrix;
|
|
float4x4 gViewMatrix;
|
|
float4x4 gModelMatrix;
|
|
float4x4 gWorldToObjectMatrix;
|
|
float4 gCameraRight;
|
|
float4 gCameraUp;
|
|
float4 gCameraWorldPos;
|
|
float4 gScreenParams;
|
|
float4 gSplatParams;
|
|
};
|
|
|
|
StructuredBuffer<GaussianSplatChunkData> GaussianSplatChunks;
|
|
RWStructuredBuffer<uint> GaussianSplatVisibleChunks;
|
|
|
|
[numthreads(64, 1, 1)]
|
|
void GaussianSplatMarkVisibleChunksCS(uint3 dispatchThreadId : SV_DispatchThreadID)
|
|
{
|
|
const uint chunkCount = (uint)gSplatParams.w;
|
|
const uint chunkIndex = dispatchThreadId.x;
|
|
if (chunkIndex >= chunkCount)
|
|
{
|
|
return;
|
|
}
|
|
|
|
const GaussianSplatChunkData chunkData = GaussianSplatChunks[chunkIndex];
|
|
uint visibleFlag = 1u;
|
|
if (chunkData.posX.x > chunkData.posX.y ||
|
|
chunkData.posY.x > chunkData.posY.y ||
|
|
chunkData.posZ.x > chunkData.posZ.y ||
|
|
IsChunkDefinitelyOutsideFrustum(
|
|
chunkData,
|
|
chunkCount,
|
|
chunkIndex,
|
|
gModelMatrix,
|
|
gViewMatrix,
|
|
gProjectionMatrix))
|
|
{
|
|
visibleFlag = 0u;
|
|
}
|
|
|
|
GaussianSplatVisibleChunks[chunkIndex] = visibleFlag;
|
|
}
|
|
ENDHLSL
|
|
}
|
|
|
|
Pass
|
|
{
|
|
Name "GaussianSplatPrepareOrder"
|
|
HLSLPROGRAM
|
|
#pragma target 4.5
|
|
#pragma compute GaussianSplatPrepareOrderCS
|
|
|
|
cbuffer PerObjectConstants
|
|
{
|
|
float4x4 gProjectionMatrix;
|
|
float4x4 gViewMatrix;
|
|
float4x4 gModelMatrix;
|
|
float4x4 gWorldToObjectMatrix;
|
|
float4 gCameraRight;
|
|
float4 gCameraUp;
|
|
float4 gCameraWorldPos;
|
|
float4 gScreenParams;
|
|
float4 gSplatParams;
|
|
};
|
|
|
|
StructuredBuffer<float4> GaussianSplatPositions;
|
|
StructuredBuffer<GaussianSplatOtherData> GaussianSplatOther;
|
|
StructuredBuffer<float4> GaussianSplatColor;
|
|
StructuredBuffer<GaussianSplatSHData> GaussianSplatSH;
|
|
StructuredBuffer<uint> GaussianSplatVisibleChunks : register(t0, space3);
|
|
RWStructuredBuffer<uint> GaussianSplatSortDistances;
|
|
RWStructuredBuffer<uint> GaussianSplatOrderBuffer;
|
|
RWStructuredBuffer<GaussianSplatViewData> GaussianSplatViewDataBuffer;
|
|
|
|
[numthreads(64, 1, 1)]
|
|
void GaussianSplatPrepareOrderCS(uint3 dispatchThreadId : SV_DispatchThreadID)
|
|
{
|
|
const uint splatCount = (uint)gSplatParams.x;
|
|
const uint sortCapacity = max((uint)gSplatParams.y, splatCount);
|
|
const uint index = dispatchThreadId.x;
|
|
if (index >= sortCapacity)
|
|
{
|
|
return;
|
|
}
|
|
|
|
if (index >= splatCount)
|
|
{
|
|
GaussianSplatSortDistances[index] = 0xffffffffu;
|
|
GaussianSplatOrderBuffer[index] = 0u;
|
|
return;
|
|
}
|
|
|
|
GaussianSplatOrderBuffer[index] = index;
|
|
|
|
GaussianSplatViewData viewData = (GaussianSplatViewData)0;
|
|
const float3 localCenter = GaussianSplatPositions[index].xyz;
|
|
const GaussianSplatOtherData otherData = GaussianSplatOther[index];
|
|
const float4 colorOpacity = GaussianSplatColor[index];
|
|
const GaussianSplatSHData shData = GaussianSplatSH[index];
|
|
const uint chunkIndex = index / GAUSSIAN_SPLAT_CHUNK_SIZE;
|
|
const uint chunkCount = (uint)gSplatParams.w;
|
|
const uint shOrder = min((uint)gSplatParams.z, 3u);
|
|
if (chunkIndex >= chunkCount || GaussianSplatVisibleChunks[chunkIndex] == 0u)
|
|
{
|
|
GaussianSplatSortDistances[index] = 0xffffffffu;
|
|
GaussianSplatViewDataBuffer[index] = viewData;
|
|
return;
|
|
}
|
|
|
|
const float3 worldCenter = mul(gModelMatrix, float4(localCenter, 1.0)).xyz;
|
|
const float3 viewCenter = mul(gViewMatrix, float4(worldCenter, 1.0)).xyz;
|
|
GaussianSplatSortDistances[index] = FloatToSortableUint(viewCenter.z);
|
|
|
|
const float4 clipCenter = mul(gProjectionMatrix, float4(viewCenter, 1.0));
|
|
if (clipCenter.w > 0.0)
|
|
{
|
|
const float3x3 modelLinear = (float3x3)gModelMatrix;
|
|
const float3x3 rotationScaleMatrix =
|
|
CalcMatrixFromRotationScale(otherData.rotation, otherData.scaleReserved.xyz);
|
|
const float3x3 worldRotationScale = mul(modelLinear, rotationScaleMatrix);
|
|
|
|
float3 covariance3D0 = 0.0;
|
|
float3 covariance3D1 = 0.0;
|
|
CalcCovariance3D(worldRotationScale, covariance3D0, covariance3D1);
|
|
|
|
const float3 covariance2D = CalcCovariance2D(
|
|
viewCenter,
|
|
covariance3D0,
|
|
covariance3D1,
|
|
gViewMatrix,
|
|
gProjectionMatrix,
|
|
gScreenParams.xy);
|
|
|
|
float2 axisU = 0.0;
|
|
float2 axisV = 0.0;
|
|
DecomposeCovariance(covariance2D, axisU, axisV);
|
|
|
|
viewData.clipCenter = clipCenter;
|
|
viewData.ellipseAxisU = float4(axisU, 0.0, 0.0);
|
|
viewData.ellipseAxisV = float4(axisV, 0.0, 0.0);
|
|
float3 shadedColor = colorOpacity.rgb;
|
|
if (shOrder > 0u)
|
|
{
|
|
const float3 worldViewDirection = gCameraWorldPos.xyz - worldCenter;
|
|
const float3 objectViewDirection = normalize(
|
|
mul((float3x3)gWorldToObjectMatrix, worldViewDirection));
|
|
shadedColor = ShadeGaussianSplatSH(colorOpacity.rgb, shData, objectViewDirection, shOrder);
|
|
}
|
|
|
|
viewData.colorOpacity = float4(shadedColor, colorOpacity.a);
|
|
}
|
|
|
|
GaussianSplatViewDataBuffer[index] = viewData;
|
|
}
|
|
ENDHLSL
|
|
}
|
|
|
|
Pass
|
|
{
|
|
Name "GaussianSplatBitonicSort"
|
|
HLSLPROGRAM
|
|
#pragma target 4.5
|
|
#pragma compute GaussianSplatBitonicSortCS
|
|
|
|
cbuffer PerObjectConstants
|
|
{
|
|
float4x4 gProjectionMatrix;
|
|
float4x4 gViewMatrix;
|
|
float4x4 gModelMatrix;
|
|
float4x4 gWorldToObjectMatrix;
|
|
float4 gCameraRight;
|
|
float4 gCameraUp;
|
|
float4 gCameraWorldPos;
|
|
float4 gScreenParams;
|
|
float4 gSplatParams;
|
|
};
|
|
|
|
RWStructuredBuffer<uint> GaussianSplatSortDistances;
|
|
RWStructuredBuffer<uint> GaussianSplatOrderBuffer;
|
|
|
|
[numthreads(256, 1, 1)]
|
|
void GaussianSplatBitonicSortCS(uint3 dispatchThreadId : SV_DispatchThreadID)
|
|
{
|
|
const uint sortCapacity = (uint)gSplatParams.y;
|
|
const uint partnerMask = (uint)gSplatParams.z;
|
|
const uint levelMask = (uint)gSplatParams.w;
|
|
const uint index = dispatchThreadId.x;
|
|
if (index >= sortCapacity)
|
|
{
|
|
return;
|
|
}
|
|
|
|
const uint partnerIndex = index ^ partnerMask;
|
|
if (partnerIndex >= sortCapacity || partnerIndex <= index)
|
|
{
|
|
return;
|
|
}
|
|
|
|
const uint leftDistance = GaussianSplatSortDistances[index];
|
|
const uint rightDistance = GaussianSplatSortDistances[partnerIndex];
|
|
const uint leftOrder = GaussianSplatOrderBuffer[index];
|
|
const uint rightOrder = GaussianSplatOrderBuffer[partnerIndex];
|
|
const bool ascending = (index & levelMask) == 0u;
|
|
const bool shouldSwap = ascending ? (leftDistance > rightDistance) : (leftDistance < rightDistance);
|
|
if (!shouldSwap)
|
|
{
|
|
return;
|
|
}
|
|
|
|
GaussianSplatSortDistances[index] = rightDistance;
|
|
GaussianSplatSortDistances[partnerIndex] = leftDistance;
|
|
GaussianSplatOrderBuffer[index] = rightOrder;
|
|
GaussianSplatOrderBuffer[partnerIndex] = leftOrder;
|
|
}
|
|
ENDHLSL
|
|
}
|
|
}
|
|
}
|