Files
XCEngine/MVS/3DGS-Unity/Shaders/GaussianSplatting.hlsl

618 lines
22 KiB
HLSL
Raw Normal View History

2026-03-29 01:36:53 +08:00
// SPDX-License-Identifier: MIT
#ifndef GAUSSIAN_SPLATTING_HLSL
#define GAUSSIAN_SPLATTING_HLSL
float InvSquareCentered01(float x)
{
x -= 0.5;
x *= 0.5;
x = sqrt(abs(x)) * sign(x);
return x + 0.5;
}
float3 QuatRotateVector(float3 v, float4 r)
{
float3 t = 2 * cross(r.xyz, v);
return v + r.w * t + cross(r.xyz, t);
}
float4 QuatMul(float4 a, float4 b)
{
return float4(a.wwww * b + (a.xyzx * b.wwwx + a.yzxy * b.zxyy) * float4(1,1,1,-1) - a.zxyz * b.yzxz);
}
float4 QuatInverse(float4 q)
{
return rcp(dot(q, q)) * q * float4(-1,-1,-1,1);
}
float3x3 CalcMatrixFromRotationScale(float4 rot, float3 scale)
{
float3x3 ms = float3x3(
scale.x, 0, 0,
0, scale.y, 0,
0, 0, scale.z
);
float x = rot.x;
float y = rot.y;
float z = rot.z;
float w = rot.w;
float3x3 mr = float3x3(
1-2*(y*y + z*z), 2*(x*y - w*z), 2*(x*z + w*y),
2*(x*y + w*z), 1-2*(x*x + z*z), 2*(y*z - w*x),
2*(x*z - w*y), 2*(y*z + w*x), 1-2*(x*x + y*y)
);
return mul(mr, ms);
}
void CalcCovariance3D(float3x3 rotMat, out float3 sigma0, out float3 sigma1)
{
float3x3 sig = mul(rotMat, transpose(rotMat));
sigma0 = float3(sig._m00, sig._m01, sig._m02);
sigma1 = float3(sig._m11, sig._m12, sig._m22);
}
// from "EWA Splatting" (Zwicker et al 2002) eq. 31
float3 CalcCovariance2D(float3 worldPos, float3 cov3d0, float3 cov3d1, float4x4 matrixV, float4x4 matrixP, float4 screenParams)
{
float4x4 viewMatrix = matrixV;
float3 viewPos = mul(viewMatrix, float4(worldPos, 1)).xyz;
// this is needed in order for splats that are visible in view but clipped "quite a lot" to work
float aspect = matrixP._m00 / matrixP._m11;
float tanFovX = rcp(matrixP._m00);
float tanFovY = rcp(matrixP._m11 * aspect);
float limX = 1.3 * tanFovX;
float limY = 1.3 * tanFovY;
viewPos.x = clamp(viewPos.x / viewPos.z, -limX, limX) * viewPos.z;
viewPos.y = clamp(viewPos.y / viewPos.z, -limY, limY) * viewPos.z;
float focal = screenParams.x * matrixP._m00 / 2;
float3x3 J = float3x3(
focal / viewPos.z, 0, -(focal * viewPos.x) / (viewPos.z * viewPos.z),
0, focal / viewPos.z, -(focal * viewPos.y) / (viewPos.z * viewPos.z),
0, 0, 0
);
float3x3 W = (float3x3)viewMatrix;
float3x3 T = mul(J, W);
float3x3 V = float3x3(
cov3d0.x, cov3d0.y, cov3d0.z,
cov3d0.y, cov3d1.x, cov3d1.y,
cov3d0.z, cov3d1.y, cov3d1.z
);
float3x3 cov = mul(T, mul(V, transpose(T)));
// Low pass filter to make each splat at least 1px size.
cov._m00 += 0.3;
cov._m11 += 0.3;
return float3(cov._m00, cov._m01, cov._m11);
}
float3 CalcConic(float3 cov2d)
{
float det = cov2d.x * cov2d.z - cov2d.y * cov2d.y;
return float3(cov2d.z, -cov2d.y, cov2d.x) * rcp(det);
}
float2 CalcScreenSpaceDelta(float2 svPositionXY, float2 centerXY, float4 projectionParams)
{
float2 d = svPositionXY - centerXY;
d.y *= projectionParams.x;
return d;
}
float CalcPowerFromConic(float3 conic, float2 d)
{
return -0.5 * (conic.x * d.x*d.x + conic.z * d.y*d.y) + conic.y * d.x*d.y;
}
// Morton interleaving 16x16 group i.e. by 4 bits of coordinates, based on this thread:
// https://twitter.com/rygorous/status/986715358852608000
// which is simplified version of https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/
uint EncodeMorton2D_16x16(uint2 c)
{
uint t = ((c.y & 0xF) << 8) | (c.x & 0xF); // ----EFGH----ABCD
t = (t ^ (t << 2)) & 0x3333; // --EF--GH--AB--CD
t = (t ^ (t << 1)) & 0x5555; // -E-F-G-H-A-B-C-D
return (t | (t >> 7)) & 0xFF; // --------EAFBGCHD
}
uint2 DecodeMorton2D_16x16(uint t) // --------EAFBGCHD
{
t = (t & 0xFF) | ((t & 0xFE) << 7); // -EAFBGCHEAFBGCHD
t &= 0x5555; // -E-F-G-H-A-B-C-D
t = (t ^ (t >> 1)) & 0x3333; // --EF--GH--AB--CD
t = (t ^ (t >> 2)) & 0x0f0f; // ----EFGH----ABCD
return uint2(t & 0xF, t >> 8); // --------EFGHABCD
}
static const float SH_C1 = 0.4886025;
static const float SH_C2[] = { 1.0925484, -1.0925484, 0.3153916, -1.0925484, 0.5462742 };
static const float SH_C3[] = { -0.5900436, 2.8906114, -0.4570458, 0.3731763, -0.4570458, 1.4453057, -0.5900436 };
struct SplatSHData
{
half3 col, sh1, sh2, sh3, sh4, sh5, sh6, sh7, sh8, sh9, sh10, sh11, sh12, sh13, sh14, sh15;
};
half3 ShadeSH(SplatSHData splat, half3 dir, int shOrder, bool onlySH)
{
dir *= -1;
half x = dir.x, y = dir.y, z = dir.z;
// ambient band
half3 res = splat.col; // col = sh0 * SH_C0 + 0.5 is already precomputed
if (onlySH)
res = 0.5;
// 1st degree
if (shOrder >= 1)
{
res += SH_C1 * (-splat.sh1 * y + splat.sh2 * z - splat.sh3 * x);
// 2nd degree
if (shOrder >= 2)
{
half xx = x * x, yy = y * y, zz = z * z;
half xy = x * y, yz = y * z, xz = x * z;
res +=
(SH_C2[0] * xy) * splat.sh4 +
(SH_C2[1] * yz) * splat.sh5 +
(SH_C2[2] * (2 * zz - xx - yy)) * splat.sh6 +
(SH_C2[3] * xz) * splat.sh7 +
(SH_C2[4] * (xx - yy)) * splat.sh8;
// 3rd degree
if (shOrder >= 3)
{
res +=
(SH_C3[0] * y * (3 * xx - yy)) * splat.sh9 +
(SH_C3[1] * xy * z) * splat.sh10 +
(SH_C3[2] * y * (4 * zz - xx - yy)) * splat.sh11 +
(SH_C3[3] * z * (2 * zz - 3 * xx - 3 * yy)) * splat.sh12 +
(SH_C3[4] * x * (4 * zz - xx - yy)) * splat.sh13 +
(SH_C3[5] * z * (xx - yy)) * splat.sh14 +
(SH_C3[6] * x * (xx - 3 * yy)) * splat.sh15;
}
}
}
return max(res, 0);
}
static const uint kTexWidth = 2048;
uint3 SplatIndexToPixelIndex(uint idx)
{
uint3 res;
uint2 xy = DecodeMorton2D_16x16(idx);
uint width = kTexWidth / 16;
idx >>= 8;
res.x = (idx % width) * 16 + xy.x;
res.y = (idx / width) * 16 + xy.y;
res.z = 0;
return res;
}
struct SplatChunkInfo
{
uint colR, colG, colB, colA;
float2 posX, posY, posZ;
uint sclX, sclY, sclZ;
uint shR, shG, shB;
};
StructuredBuffer<SplatChunkInfo> _SplatChunks;
uint _SplatChunkCount;
static const uint kChunkSize = 256;
struct SplatData
{
float3 pos;
float4 rot;
float3 scale;
half opacity;
SplatSHData sh;
};
// Decode quaternion from a "smallest 3" e.g. 10.10.10.2 format
float4 DecodeRotation(float4 pq)
{
uint idx = (uint)round(pq.w * 3.0); // note: need to round or index might come out wrong in some formats (e.g. fp16.fp16.fp16.fp16)
float4 q;
q.xyz = pq.xyz * sqrt(2.0) - (1.0 / sqrt(2.0));
q.w = sqrt(1.0 - saturate(dot(q.xyz, q.xyz)));
if (idx == 0) q = q.wxyz;
if (idx == 1) q = q.xwyz;
if (idx == 2) q = q.xywz;
return q;
}
float4 PackSmallest3Rotation(float4 q)
{
// find biggest component
float4 absQ = abs(q);
int index = 0;
float maxV = absQ.x;
if (absQ.y > maxV)
{
index = 1;
maxV = absQ.y;
}
if (absQ.z > maxV)
{
index = 2;
maxV = absQ.z;
}
if (absQ.w > maxV)
{
index = 3;
maxV = absQ.w;
}
if (index == 0) q = q.yzwx;
if (index == 1) q = q.xzwy;
if (index == 2) q = q.xywz;
float3 three = q.xyz * (q.w >= 0 ? 1 : -1); // -1/sqrt2..+1/sqrt2 range
three = (three * sqrt(2.0)) * 0.5 + 0.5; // 0..1 range
return float4(three, index / 3.0);
}
half3 DecodePacked_6_5_5(uint enc)
{
return half3(
(enc & 63) / 63.0,
((enc >> 6) & 31) / 31.0,
((enc >> 11) & 31) / 31.0);
}
half3 DecodePacked_5_6_5(uint enc)
{
return half3(
(enc & 31) / 31.0,
((enc >> 5) & 63) / 63.0,
((enc >> 11) & 31) / 31.0);
}
half3 DecodePacked_11_10_11(uint enc)
{
return half3(
(enc & 2047) / 2047.0,
((enc >> 11) & 1023) / 1023.0,
((enc >> 21) & 2047) / 2047.0);
}
float3 DecodePacked_16_16_16(uint2 enc)
{
return float3(
(enc.x & 65535) / 65535.0,
((enc.x >> 16) & 65535) / 65535.0,
(enc.y & 65535) / 65535.0);
}
float4 DecodePacked_10_10_10_2(uint enc)
{
return float4(
(enc & 1023) / 1023.0,
((enc >> 10) & 1023) / 1023.0,
((enc >> 20) & 1023) / 1023.0,
((enc >> 30) & 3) / 3.0);
}
uint EncodeQuatToNorm10(float4 v) // 32 bits: 10.10.10.2
{
return (uint) (v.x * 1023.5f) | ((uint) (v.y * 1023.5f) << 10) | ((uint) (v.z * 1023.5f) << 20) | ((uint) (v.w * 3.5f) << 30);
}
#ifdef SHADER_STAGE_COMPUTE
#define SplatBufferDataType RWByteAddressBuffer
#else
#define SplatBufferDataType ByteAddressBuffer
#endif
SplatBufferDataType _SplatPos;
SplatBufferDataType _SplatOther;
SplatBufferDataType _SplatSH;
Texture2D _SplatColor;
uint _SplatFormat;
// Match GaussianSplatAsset.VectorFormat
#define VECTOR_FMT_32F 0
#define VECTOR_FMT_16 1
#define VECTOR_FMT_11 2
#define VECTOR_FMT_6 3
uint LoadUShort(SplatBufferDataType dataBuffer, uint addrU)
{
uint addrA = addrU & ~0x3;
uint val = dataBuffer.Load(addrA);
if (addrU != addrA)
val >>= 16;
return val & 0xFFFF;
}
uint LoadUInt(SplatBufferDataType dataBuffer, uint addrU)
{
uint addrA = addrU & ~0x3;
uint val = dataBuffer.Load(addrA);
if (addrU != addrA)
{
uint val1 = dataBuffer.Load(addrA + 4);
val = (val >> 16) | ((val1 & 0xFFFF) << 16);
}
return val;
}
float3 LoadAndDecodeVector(SplatBufferDataType dataBuffer, uint addrU, uint fmt)
{
uint addrA = addrU & ~0x3;
uint val0 = dataBuffer.Load(addrA);
float3 res = 0;
if (fmt == VECTOR_FMT_32F)
{
uint val1 = dataBuffer.Load(addrA + 4);
uint val2 = dataBuffer.Load(addrA + 8);
if (addrU != addrA)
{
uint val3 = dataBuffer.Load(addrA + 12);
val0 = (val0 >> 16) | ((val1 & 0xFFFF) << 16);
val1 = (val1 >> 16) | ((val2 & 0xFFFF) << 16);
val2 = (val2 >> 16) | ((val3 & 0xFFFF) << 16);
}
res = float3(asfloat(val0), asfloat(val1), asfloat(val2));
}
else if (fmt == VECTOR_FMT_16)
{
uint val1 = dataBuffer.Load(addrA + 4);
if (addrU != addrA)
{
val0 = (val0 >> 16) | ((val1 & 0xFFFF) << 16);
val1 >>= 16;
}
res = DecodePacked_16_16_16(uint2(val0, val1));
}
else if (fmt == VECTOR_FMT_11)
{
uint val1 = dataBuffer.Load(addrA + 4);
if (addrU != addrA)
{
val0 = (val0 >> 16) | ((val1 & 0xFFFF) << 16);
}
res = DecodePacked_11_10_11(val0);
}
else if (fmt == VECTOR_FMT_6)
{
if (addrU != addrA)
val0 >>= 16;
res = DecodePacked_6_5_5(val0);
}
return res;
}
float3 LoadSplatPosValue(uint index)
{
uint fmt = _SplatFormat & 0xFF;
uint stride = 0;
if (fmt == VECTOR_FMT_32F)
stride = 12;
else if (fmt == VECTOR_FMT_16)
stride = 6;
else if (fmt == VECTOR_FMT_11)
stride = 4;
else if (fmt == VECTOR_FMT_6)
stride = 2;
return LoadAndDecodeVector(_SplatPos, index * stride, fmt);
}
float3 LoadSplatPos(uint idx)
{
float3 pos = LoadSplatPosValue(idx);
uint chunkIdx = idx / kChunkSize;
if (chunkIdx < _SplatChunkCount)
{
SplatChunkInfo chunk = _SplatChunks[chunkIdx];
float3 posMin = float3(chunk.posX.x, chunk.posY.x, chunk.posZ.x);
float3 posMax = float3(chunk.posX.y, chunk.posY.y, chunk.posZ.y);
pos = lerp(posMin, posMax, pos);
}
return pos;
}
half4 LoadSplatColTex(uint3 coord)
{
return _SplatColor.Load(coord);
}
SplatData LoadSplatData(uint idx)
{
SplatData s = (SplatData)0;
// figure out raw data offsets / locations
uint3 coord = SplatIndexToPixelIndex(idx);
uint scaleFmt = (_SplatFormat >> 8) & 0xFF;
uint shFormat = (_SplatFormat >> 16) & 0xFF;
uint otherStride = 4; // rotation is 10.10.10.2
if (scaleFmt == VECTOR_FMT_32F)
otherStride += 12;
else if (scaleFmt == VECTOR_FMT_16)
otherStride += 6;
else if (scaleFmt == VECTOR_FMT_11)
otherStride += 4;
else if (scaleFmt == VECTOR_FMT_6)
otherStride += 2;
if (shFormat > VECTOR_FMT_6)
otherStride += 2;
uint otherAddr = idx * otherStride;
uint shStride = 0;
if (shFormat == VECTOR_FMT_32F)
shStride = 192; // 15*3 fp32, rounded up to multiple of 16
else if (shFormat == VECTOR_FMT_16 || shFormat > VECTOR_FMT_6)
shStride = 96; // 15*3 fp16, rounded up to multiple of 16
else if (shFormat == VECTOR_FMT_11)
shStride = 60; // 15x uint
else if (shFormat == VECTOR_FMT_6)
shStride = 32; // 15x ushort, rounded up to multiple of 4
// load raw splat data, which might be chunk-relative
s.pos = LoadSplatPosValue(idx);
s.rot = DecodeRotation(DecodePacked_10_10_10_2(LoadUInt(_SplatOther, otherAddr)));
s.scale = LoadAndDecodeVector(_SplatOther, otherAddr + 4, scaleFmt);
half4 col = LoadSplatColTex(coord);
uint shIndex = idx;
if (shFormat > VECTOR_FMT_6)
shIndex = LoadUShort(_SplatOther, otherAddr + otherStride - 2);
uint shOffset = shIndex * shStride;
uint4 shRaw0 = _SplatSH.Load4(shOffset);
uint4 shRaw1 = _SplatSH.Load4(shOffset + 16);
if (shFormat == VECTOR_FMT_32F)
{
uint4 shRaw2 = _SplatSH.Load4(shOffset + 32);
uint4 shRaw3 = _SplatSH.Load4(shOffset + 48);
uint4 shRaw4 = _SplatSH.Load4(shOffset + 64);
uint4 shRaw5 = _SplatSH.Load4(shOffset + 80);
uint4 shRaw6 = _SplatSH.Load4(shOffset + 96);
uint4 shRaw7 = _SplatSH.Load4(shOffset + 112);
uint4 shRaw8 = _SplatSH.Load4(shOffset + 128);
uint4 shRaw9 = _SplatSH.Load4(shOffset + 144);
uint4 shRawA = _SplatSH.Load4(shOffset + 160);
uint shRawB = _SplatSH.Load(shOffset + 176);
s.sh.sh1.r = asfloat(shRaw0.x); s.sh.sh1.g = asfloat(shRaw0.y); s.sh.sh1.b = asfloat(shRaw0.z);
s.sh.sh2.r = asfloat(shRaw0.w); s.sh.sh2.g = asfloat(shRaw1.x); s.sh.sh2.b = asfloat(shRaw1.y);
s.sh.sh3.r = asfloat(shRaw1.z); s.sh.sh3.g = asfloat(shRaw1.w); s.sh.sh3.b = asfloat(shRaw2.x);
s.sh.sh4.r = asfloat(shRaw2.y); s.sh.sh4.g = asfloat(shRaw2.z); s.sh.sh4.b = asfloat(shRaw2.w);
s.sh.sh5.r = asfloat(shRaw3.x); s.sh.sh5.g = asfloat(shRaw3.y); s.sh.sh5.b = asfloat(shRaw3.z);
s.sh.sh6.r = asfloat(shRaw3.w); s.sh.sh6.g = asfloat(shRaw4.x); s.sh.sh6.b = asfloat(shRaw4.y);
s.sh.sh7.r = asfloat(shRaw4.z); s.sh.sh7.g = asfloat(shRaw4.w); s.sh.sh7.b = asfloat(shRaw5.x);
s.sh.sh8.r = asfloat(shRaw5.y); s.sh.sh8.g = asfloat(shRaw5.z); s.sh.sh8.b = asfloat(shRaw5.w);
s.sh.sh9.r = asfloat(shRaw6.x); s.sh.sh9.g = asfloat(shRaw6.y); s.sh.sh9.b = asfloat(shRaw6.z);
s.sh.sh10.r = asfloat(shRaw6.w); s.sh.sh10.g = asfloat(shRaw7.x); s.sh.sh10.b = asfloat(shRaw7.y);
s.sh.sh11.r = asfloat(shRaw7.z); s.sh.sh11.g = asfloat(shRaw7.w); s.sh.sh11.b = asfloat(shRaw8.x);
s.sh.sh12.r = asfloat(shRaw8.y); s.sh.sh12.g = asfloat(shRaw8.z); s.sh.sh12.b = asfloat(shRaw8.w);
s.sh.sh13.r = asfloat(shRaw9.x); s.sh.sh13.g = asfloat(shRaw9.y); s.sh.sh13.b = asfloat(shRaw9.z);
s.sh.sh14.r = asfloat(shRaw9.w); s.sh.sh14.g = asfloat(shRawA.x); s.sh.sh14.b = asfloat(shRawA.y);
s.sh.sh15.r = asfloat(shRawA.z); s.sh.sh15.g = asfloat(shRawA.w); s.sh.sh15.b = asfloat(shRawB);
}
else if (shFormat == VECTOR_FMT_16 || shFormat > VECTOR_FMT_6)
{
uint4 shRaw2 = _SplatSH.Load4(shOffset + 32);
uint4 shRaw3 = _SplatSH.Load4(shOffset + 48);
uint4 shRaw4 = _SplatSH.Load4(shOffset + 64);
uint3 shRaw5 = _SplatSH.Load3(shOffset + 80);
s.sh.sh1.r = f16tof32(shRaw0.x ); s.sh.sh1.g = f16tof32(shRaw0.x >> 16); s.sh.sh1.b = f16tof32(shRaw0.y );
s.sh.sh2.r = f16tof32(shRaw0.y >> 16); s.sh.sh2.g = f16tof32(shRaw0.z ); s.sh.sh2.b = f16tof32(shRaw0.z >> 16);
s.sh.sh3.r = f16tof32(shRaw0.w ); s.sh.sh3.g = f16tof32(shRaw0.w >> 16); s.sh.sh3.b = f16tof32(shRaw1.x );
s.sh.sh4.r = f16tof32(shRaw1.x >> 16); s.sh.sh4.g = f16tof32(shRaw1.y ); s.sh.sh4.b = f16tof32(shRaw1.y >> 16);
s.sh.sh5.r = f16tof32(shRaw1.z ); s.sh.sh5.g = f16tof32(shRaw1.z >> 16); s.sh.sh5.b = f16tof32(shRaw1.w );
s.sh.sh6.r = f16tof32(shRaw1.w >> 16); s.sh.sh6.g = f16tof32(shRaw2.x ); s.sh.sh6.b = f16tof32(shRaw2.x >> 16);
s.sh.sh7.r = f16tof32(shRaw2.y ); s.sh.sh7.g = f16tof32(shRaw2.y >> 16); s.sh.sh7.b = f16tof32(shRaw2.z );
s.sh.sh8.r = f16tof32(shRaw2.z >> 16); s.sh.sh8.g = f16tof32(shRaw2.w ); s.sh.sh8.b = f16tof32(shRaw2.w >> 16);
s.sh.sh9.r = f16tof32(shRaw3.x ); s.sh.sh9.g = f16tof32(shRaw3.x >> 16); s.sh.sh9.b = f16tof32(shRaw3.y );
s.sh.sh10.r = f16tof32(shRaw3.y >> 16); s.sh.sh10.g = f16tof32(shRaw3.z ); s.sh.sh10.b = f16tof32(shRaw3.z >> 16);
s.sh.sh11.r = f16tof32(shRaw3.w ); s.sh.sh11.g = f16tof32(shRaw3.w >> 16); s.sh.sh11.b = f16tof32(shRaw4.x );
s.sh.sh12.r = f16tof32(shRaw4.x >> 16); s.sh.sh12.g = f16tof32(shRaw4.y ); s.sh.sh12.b = f16tof32(shRaw4.y >> 16);
s.sh.sh13.r = f16tof32(shRaw4.z ); s.sh.sh13.g = f16tof32(shRaw4.z >> 16); s.sh.sh13.b = f16tof32(shRaw4.w );
s.sh.sh14.r = f16tof32(shRaw4.w >> 16); s.sh.sh14.g = f16tof32(shRaw5.x ); s.sh.sh14.b = f16tof32(shRaw5.x >> 16);
s.sh.sh15.r = f16tof32(shRaw5.y ); s.sh.sh15.g = f16tof32(shRaw5.y >> 16); s.sh.sh15.b = f16tof32(shRaw5.z );
}
else if (shFormat == VECTOR_FMT_11)
{
uint4 shRaw2 = _SplatSH.Load4(shOffset + 32);
uint3 shRaw3 = _SplatSH.Load3(shOffset + 48);
s.sh.sh1 = DecodePacked_11_10_11(shRaw0.x);
s.sh.sh2 = DecodePacked_11_10_11(shRaw0.y);
s.sh.sh3 = DecodePacked_11_10_11(shRaw0.z);
s.sh.sh4 = DecodePacked_11_10_11(shRaw0.w);
s.sh.sh5 = DecodePacked_11_10_11(shRaw1.x);
s.sh.sh6 = DecodePacked_11_10_11(shRaw1.y);
s.sh.sh7 = DecodePacked_11_10_11(shRaw1.z);
s.sh.sh8 = DecodePacked_11_10_11(shRaw1.w);
s.sh.sh9 = DecodePacked_11_10_11(shRaw2.x);
s.sh.sh10 = DecodePacked_11_10_11(shRaw2.y);
s.sh.sh11 = DecodePacked_11_10_11(shRaw2.z);
s.sh.sh12 = DecodePacked_11_10_11(shRaw2.w);
s.sh.sh13 = DecodePacked_11_10_11(shRaw3.x);
s.sh.sh14 = DecodePacked_11_10_11(shRaw3.y);
s.sh.sh15 = DecodePacked_11_10_11(shRaw3.z);
}
else if (shFormat == VECTOR_FMT_6)
{
s.sh.sh1 = DecodePacked_5_6_5(shRaw0.x);
s.sh.sh2 = DecodePacked_5_6_5(shRaw0.x >> 16);
s.sh.sh3 = DecodePacked_5_6_5(shRaw0.y);
s.sh.sh4 = DecodePacked_5_6_5(shRaw0.y >> 16);
s.sh.sh5 = DecodePacked_5_6_5(shRaw0.z);
s.sh.sh6 = DecodePacked_5_6_5(shRaw0.z >> 16);
s.sh.sh7 = DecodePacked_5_6_5(shRaw0.w);
s.sh.sh8 = DecodePacked_5_6_5(shRaw0.w >> 16);
s.sh.sh9 = DecodePacked_5_6_5(shRaw1.x);
s.sh.sh10 = DecodePacked_5_6_5(shRaw1.x >> 16);
s.sh.sh11 = DecodePacked_5_6_5(shRaw1.y);
s.sh.sh12 = DecodePacked_5_6_5(shRaw1.y >> 16);
s.sh.sh13 = DecodePacked_5_6_5(shRaw1.z);
s.sh.sh14 = DecodePacked_5_6_5(shRaw1.z >> 16);
s.sh.sh15 = DecodePacked_5_6_5(shRaw1.w);
}
// if raw data is chunk-relative, convert to final values by interpolating between chunk min/max
uint chunkIdx = idx / kChunkSize;
if (chunkIdx < _SplatChunkCount)
{
SplatChunkInfo chunk = _SplatChunks[chunkIdx];
float3 posMin = float3(chunk.posX.x, chunk.posY.x, chunk.posZ.x);
float3 posMax = float3(chunk.posX.y, chunk.posY.y, chunk.posZ.y);
half3 sclMin = half3(f16tof32(chunk.sclX ), f16tof32(chunk.sclY ), f16tof32(chunk.sclZ ));
half3 sclMax = half3(f16tof32(chunk.sclX>>16), f16tof32(chunk.sclY>>16), f16tof32(chunk.sclZ>>16));
half4 colMin = half4(f16tof32(chunk.colR ), f16tof32(chunk.colG ), f16tof32(chunk.colB ), f16tof32(chunk.colA ));
half4 colMax = half4(f16tof32(chunk.colR>>16), f16tof32(chunk.colG>>16), f16tof32(chunk.colB>>16), f16tof32(chunk.colA>>16));
half3 shMin = half3(f16tof32(chunk.shR ), f16tof32(chunk.shG ), f16tof32(chunk.shB ));
half3 shMax = half3(f16tof32(chunk.shR>>16), f16tof32(chunk.shG>>16), f16tof32(chunk.shB>>16));
s.pos = lerp(posMin, posMax, s.pos);
s.scale = lerp(sclMin, sclMax, s.scale);
s.scale *= s.scale;
s.scale *= s.scale;
s.scale *= s.scale;
col = lerp(colMin, colMax, col);
col.a = InvSquareCentered01(col.a);
if (shFormat > VECTOR_FMT_32F && shFormat <= VECTOR_FMT_6)
{
s.sh.sh1 = lerp(shMin, shMax, s.sh.sh1 );
s.sh.sh2 = lerp(shMin, shMax, s.sh.sh2 );
s.sh.sh3 = lerp(shMin, shMax, s.sh.sh3 );
s.sh.sh4 = lerp(shMin, shMax, s.sh.sh4 );
s.sh.sh5 = lerp(shMin, shMax, s.sh.sh5 );
s.sh.sh6 = lerp(shMin, shMax, s.sh.sh6 );
s.sh.sh7 = lerp(shMin, shMax, s.sh.sh7 );
s.sh.sh8 = lerp(shMin, shMax, s.sh.sh8 );
s.sh.sh9 = lerp(shMin, shMax, s.sh.sh9 );
s.sh.sh10 = lerp(shMin, shMax, s.sh.sh10);
s.sh.sh11 = lerp(shMin, shMax, s.sh.sh11);
s.sh.sh12 = lerp(shMin, shMax, s.sh.sh12);
s.sh.sh13 = lerp(shMin, shMax, s.sh.sh13);
s.sh.sh14 = lerp(shMin, shMax, s.sh.sh14);
s.sh.sh15 = lerp(shMin, shMax, s.sh.sh15);
}
}
s.opacity = col.a;
s.sh.col = col.rgb;
return s;
}
struct SplatViewData
{
float4 pos;
float2 axis1, axis2;
uint2 color; // 4xFP16
};
#endif // GAUSSIAN_SPLATTING_HLSL