// SPDX-License-Identifier: MIT #ifndef GAUSSIAN_SPLATTING_HLSL #define GAUSSIAN_SPLATTING_HLSL float InvSquareCentered01(float x) { x -= 0.5; x *= 0.5; x = sqrt(abs(x)) * sign(x); return x + 0.5; } float3 QuatRotateVector(float3 v, float4 r) { float3 t = 2 * cross(r.xyz, v); return v + r.w * t + cross(r.xyz, t); } float4 QuatMul(float4 a, float4 b) { return float4(a.wwww * b + (a.xyzx * b.wwwx + a.yzxy * b.zxyy) * float4(1,1,1,-1) - a.zxyz * b.yzxz); } float4 QuatInverse(float4 q) { return rcp(dot(q, q)) * q * float4(-1,-1,-1,1); } float3x3 CalcMatrixFromRotationScale(float4 rot, float3 scale) { float3x3 ms = float3x3( scale.x, 0, 0, 0, scale.y, 0, 0, 0, scale.z ); float x = rot.x; float y = rot.y; float z = rot.z; float w = rot.w; float3x3 mr = float3x3( 1-2*(y*y + z*z), 2*(x*y - w*z), 2*(x*z + w*y), 2*(x*y + w*z), 1-2*(x*x + z*z), 2*(y*z - w*x), 2*(x*z - w*y), 2*(y*z + w*x), 1-2*(x*x + y*y) ); return mul(mr, ms); } void CalcCovariance3D(float3x3 rotMat, out float3 sigma0, out float3 sigma1) { float3x3 sig = mul(rotMat, transpose(rotMat)); sigma0 = float3(sig._m00, sig._m01, sig._m02); sigma1 = float3(sig._m11, sig._m12, sig._m22); } // from "EWA Splatting" (Zwicker et al 2002) eq. 31 float3 CalcCovariance2D(float3 worldPos, float3 cov3d0, float3 cov3d1, float4x4 matrixV, float4x4 matrixP, float4 screenParams) { float4x4 viewMatrix = matrixV; float3 viewPos = mul(viewMatrix, float4(worldPos, 1)).xyz; // this is needed in order for splats that are visible in view but clipped "quite a lot" to work float aspect = matrixP._m00 / matrixP._m11; float tanFovX = rcp(matrixP._m00); float tanFovY = rcp(matrixP._m11 * aspect); float limX = 1.3 * tanFovX; float limY = 1.3 * tanFovY; viewPos.x = clamp(viewPos.x / viewPos.z, -limX, limX) * viewPos.z; viewPos.y = clamp(viewPos.y / viewPos.z, -limY, limY) * viewPos.z; float focal = screenParams.x * matrixP._m00 / 2; float3x3 J = float3x3( focal / viewPos.z, 0, -(focal * viewPos.x) / (viewPos.z * viewPos.z), 0, focal / viewPos.z, -(focal * viewPos.y) / (viewPos.z * viewPos.z), 0, 0, 0 ); float3x3 W = (float3x3)viewMatrix; float3x3 T = mul(J, W); float3x3 V = float3x3( cov3d0.x, cov3d0.y, cov3d0.z, cov3d0.y, cov3d1.x, cov3d1.y, cov3d0.z, cov3d1.y, cov3d1.z ); float3x3 cov = mul(T, mul(V, transpose(T))); // Low pass filter to make each splat at least 1px size. cov._m00 += 0.3; cov._m11 += 0.3; return float3(cov._m00, cov._m01, cov._m11); } float3 CalcConic(float3 cov2d) { float det = cov2d.x * cov2d.z - cov2d.y * cov2d.y; return float3(cov2d.z, -cov2d.y, cov2d.x) * rcp(det); } float2 CalcScreenSpaceDelta(float2 svPositionXY, float2 centerXY, float4 projectionParams) { float2 d = svPositionXY - centerXY; d.y *= projectionParams.x; return d; } float CalcPowerFromConic(float3 conic, float2 d) { return -0.5 * (conic.x * d.x*d.x + conic.z * d.y*d.y) + conic.y * d.x*d.y; } // Morton interleaving 16x16 group i.e. by 4 bits of coordinates, based on this thread: // https://twitter.com/rygorous/status/986715358852608000 // which is simplified version of https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ uint EncodeMorton2D_16x16(uint2 c) { uint t = ((c.y & 0xF) << 8) | (c.x & 0xF); // ----EFGH----ABCD t = (t ^ (t << 2)) & 0x3333; // --EF--GH--AB--CD t = (t ^ (t << 1)) & 0x5555; // -E-F-G-H-A-B-C-D return (t | (t >> 7)) & 0xFF; // --------EAFBGCHD } uint2 DecodeMorton2D_16x16(uint t) // --------EAFBGCHD { t = (t & 0xFF) | ((t & 0xFE) << 7); // -EAFBGCHEAFBGCHD t &= 0x5555; // -E-F-G-H-A-B-C-D t = (t ^ (t >> 1)) & 0x3333; // --EF--GH--AB--CD t = (t ^ (t >> 2)) & 0x0f0f; // ----EFGH----ABCD return uint2(t & 0xF, t >> 8); // --------EFGHABCD } static const float SH_C1 = 0.4886025; static const float SH_C2[] = { 1.0925484, -1.0925484, 0.3153916, -1.0925484, 0.5462742 }; static const float SH_C3[] = { -0.5900436, 2.8906114, -0.4570458, 0.3731763, -0.4570458, 1.4453057, -0.5900436 }; struct SplatSHData { half3 col, sh1, sh2, sh3, sh4, sh5, sh6, sh7, sh8, sh9, sh10, sh11, sh12, sh13, sh14, sh15; }; half3 ShadeSH(SplatSHData splat, half3 dir, int shOrder, bool onlySH) { dir *= -1; half x = dir.x, y = dir.y, z = dir.z; // ambient band half3 res = splat.col; // col = sh0 * SH_C0 + 0.5 is already precomputed if (onlySH) res = 0.5; // 1st degree if (shOrder >= 1) { res += SH_C1 * (-splat.sh1 * y + splat.sh2 * z - splat.sh3 * x); // 2nd degree if (shOrder >= 2) { half xx = x * x, yy = y * y, zz = z * z; half xy = x * y, yz = y * z, xz = x * z; res += (SH_C2[0] * xy) * splat.sh4 + (SH_C2[1] * yz) * splat.sh5 + (SH_C2[2] * (2 * zz - xx - yy)) * splat.sh6 + (SH_C2[3] * xz) * splat.sh7 + (SH_C2[4] * (xx - yy)) * splat.sh8; // 3rd degree if (shOrder >= 3) { res += (SH_C3[0] * y * (3 * xx - yy)) * splat.sh9 + (SH_C3[1] * xy * z) * splat.sh10 + (SH_C3[2] * y * (4 * zz - xx - yy)) * splat.sh11 + (SH_C3[3] * z * (2 * zz - 3 * xx - 3 * yy)) * splat.sh12 + (SH_C3[4] * x * (4 * zz - xx - yy)) * splat.sh13 + (SH_C3[5] * z * (xx - yy)) * splat.sh14 + (SH_C3[6] * x * (xx - 3 * yy)) * splat.sh15; } } } return max(res, 0); } static const uint kTexWidth = 2048; uint3 SplatIndexToPixelIndex(uint idx) { uint3 res; uint2 xy = DecodeMorton2D_16x16(idx); uint width = kTexWidth / 16; idx >>= 8; res.x = (idx % width) * 16 + xy.x; res.y = (idx / width) * 16 + xy.y; res.z = 0; return res; } struct SplatChunkInfo { uint colR, colG, colB, colA; float2 posX, posY, posZ; uint sclX, sclY, sclZ; uint shR, shG, shB; }; StructuredBuffer _SplatChunks; uint _SplatChunkCount; static const uint kChunkSize = 256; struct SplatData { float3 pos; float4 rot; float3 scale; half opacity; SplatSHData sh; }; // Decode quaternion from a "smallest 3" e.g. 10.10.10.2 format float4 DecodeRotation(float4 pq) { uint idx = (uint)round(pq.w * 3.0); // note: need to round or index might come out wrong in some formats (e.g. fp16.fp16.fp16.fp16) float4 q; q.xyz = pq.xyz * sqrt(2.0) - (1.0 / sqrt(2.0)); q.w = sqrt(1.0 - saturate(dot(q.xyz, q.xyz))); if (idx == 0) q = q.wxyz; if (idx == 1) q = q.xwyz; if (idx == 2) q = q.xywz; return q; } float4 PackSmallest3Rotation(float4 q) { // find biggest component float4 absQ = abs(q); int index = 0; float maxV = absQ.x; if (absQ.y > maxV) { index = 1; maxV = absQ.y; } if (absQ.z > maxV) { index = 2; maxV = absQ.z; } if (absQ.w > maxV) { index = 3; maxV = absQ.w; } if (index == 0) q = q.yzwx; if (index == 1) q = q.xzwy; if (index == 2) q = q.xywz; float3 three = q.xyz * (q.w >= 0 ? 1 : -1); // -1/sqrt2..+1/sqrt2 range three = (three * sqrt(2.0)) * 0.5 + 0.5; // 0..1 range return float4(three, index / 3.0); } half3 DecodePacked_6_5_5(uint enc) { return half3( (enc & 63) / 63.0, ((enc >> 6) & 31) / 31.0, ((enc >> 11) & 31) / 31.0); } half3 DecodePacked_5_6_5(uint enc) { return half3( (enc & 31) / 31.0, ((enc >> 5) & 63) / 63.0, ((enc >> 11) & 31) / 31.0); } half3 DecodePacked_11_10_11(uint enc) { return half3( (enc & 2047) / 2047.0, ((enc >> 11) & 1023) / 1023.0, ((enc >> 21) & 2047) / 2047.0); } float3 DecodePacked_16_16_16(uint2 enc) { return float3( (enc.x & 65535) / 65535.0, ((enc.x >> 16) & 65535) / 65535.0, (enc.y & 65535) / 65535.0); } float4 DecodePacked_10_10_10_2(uint enc) { return float4( (enc & 1023) / 1023.0, ((enc >> 10) & 1023) / 1023.0, ((enc >> 20) & 1023) / 1023.0, ((enc >> 30) & 3) / 3.0); } uint EncodeQuatToNorm10(float4 v) // 32 bits: 10.10.10.2 { return (uint) (v.x * 1023.5f) | ((uint) (v.y * 1023.5f) << 10) | ((uint) (v.z * 1023.5f) << 20) | ((uint) (v.w * 3.5f) << 30); } #ifdef SHADER_STAGE_COMPUTE #define SplatBufferDataType RWByteAddressBuffer #else #define SplatBufferDataType ByteAddressBuffer #endif SplatBufferDataType _SplatPos; SplatBufferDataType _SplatOther; SplatBufferDataType _SplatSH; Texture2D _SplatColor; uint _SplatFormat; // Match GaussianSplatAsset.VectorFormat #define VECTOR_FMT_32F 0 #define VECTOR_FMT_16 1 #define VECTOR_FMT_11 2 #define VECTOR_FMT_6 3 uint LoadUShort(SplatBufferDataType dataBuffer, uint addrU) { uint addrA = addrU & ~0x3; uint val = dataBuffer.Load(addrA); if (addrU != addrA) val >>= 16; return val & 0xFFFF; } uint LoadUInt(SplatBufferDataType dataBuffer, uint addrU) { uint addrA = addrU & ~0x3; uint val = dataBuffer.Load(addrA); if (addrU != addrA) { uint val1 = dataBuffer.Load(addrA + 4); val = (val >> 16) | ((val1 & 0xFFFF) << 16); } return val; } float3 LoadAndDecodeVector(SplatBufferDataType dataBuffer, uint addrU, uint fmt) { uint addrA = addrU & ~0x3; uint val0 = dataBuffer.Load(addrA); float3 res = 0; if (fmt == VECTOR_FMT_32F) { uint val1 = dataBuffer.Load(addrA + 4); uint val2 = dataBuffer.Load(addrA + 8); if (addrU != addrA) { uint val3 = dataBuffer.Load(addrA + 12); val0 = (val0 >> 16) | ((val1 & 0xFFFF) << 16); val1 = (val1 >> 16) | ((val2 & 0xFFFF) << 16); val2 = (val2 >> 16) | ((val3 & 0xFFFF) << 16); } res = float3(asfloat(val0), asfloat(val1), asfloat(val2)); } else if (fmt == VECTOR_FMT_16) { uint val1 = dataBuffer.Load(addrA + 4); if (addrU != addrA) { val0 = (val0 >> 16) | ((val1 & 0xFFFF) << 16); val1 >>= 16; } res = DecodePacked_16_16_16(uint2(val0, val1)); } else if (fmt == VECTOR_FMT_11) { uint val1 = dataBuffer.Load(addrA + 4); if (addrU != addrA) { val0 = (val0 >> 16) | ((val1 & 0xFFFF) << 16); } res = DecodePacked_11_10_11(val0); } else if (fmt == VECTOR_FMT_6) { if (addrU != addrA) val0 >>= 16; res = DecodePacked_6_5_5(val0); } return res; } float3 LoadSplatPosValue(uint index) { uint fmt = _SplatFormat & 0xFF; uint stride = 0; if (fmt == VECTOR_FMT_32F) stride = 12; else if (fmt == VECTOR_FMT_16) stride = 6; else if (fmt == VECTOR_FMT_11) stride = 4; else if (fmt == VECTOR_FMT_6) stride = 2; return LoadAndDecodeVector(_SplatPos, index * stride, fmt); } float3 LoadSplatPos(uint idx) { float3 pos = LoadSplatPosValue(idx); uint chunkIdx = idx / kChunkSize; if (chunkIdx < _SplatChunkCount) { SplatChunkInfo chunk = _SplatChunks[chunkIdx]; float3 posMin = float3(chunk.posX.x, chunk.posY.x, chunk.posZ.x); float3 posMax = float3(chunk.posX.y, chunk.posY.y, chunk.posZ.y); pos = lerp(posMin, posMax, pos); } return pos; } half4 LoadSplatColTex(uint3 coord) { return _SplatColor.Load(coord); } SplatData LoadSplatData(uint idx) { SplatData s = (SplatData)0; // figure out raw data offsets / locations uint3 coord = SplatIndexToPixelIndex(idx); uint scaleFmt = (_SplatFormat >> 8) & 0xFF; uint shFormat = (_SplatFormat >> 16) & 0xFF; uint otherStride = 4; // rotation is 10.10.10.2 if (scaleFmt == VECTOR_FMT_32F) otherStride += 12; else if (scaleFmt == VECTOR_FMT_16) otherStride += 6; else if (scaleFmt == VECTOR_FMT_11) otherStride += 4; else if (scaleFmt == VECTOR_FMT_6) otherStride += 2; if (shFormat > VECTOR_FMT_6) otherStride += 2; uint otherAddr = idx * otherStride; uint shStride = 0; if (shFormat == VECTOR_FMT_32F) shStride = 192; // 15*3 fp32, rounded up to multiple of 16 else if (shFormat == VECTOR_FMT_16 || shFormat > VECTOR_FMT_6) shStride = 96; // 15*3 fp16, rounded up to multiple of 16 else if (shFormat == VECTOR_FMT_11) shStride = 60; // 15x uint else if (shFormat == VECTOR_FMT_6) shStride = 32; // 15x ushort, rounded up to multiple of 4 // load raw splat data, which might be chunk-relative s.pos = LoadSplatPosValue(idx); s.rot = DecodeRotation(DecodePacked_10_10_10_2(LoadUInt(_SplatOther, otherAddr))); s.scale = LoadAndDecodeVector(_SplatOther, otherAddr + 4, scaleFmt); half4 col = LoadSplatColTex(coord); uint shIndex = idx; if (shFormat > VECTOR_FMT_6) shIndex = LoadUShort(_SplatOther, otherAddr + otherStride - 2); uint shOffset = shIndex * shStride; uint4 shRaw0 = _SplatSH.Load4(shOffset); uint4 shRaw1 = _SplatSH.Load4(shOffset + 16); if (shFormat == VECTOR_FMT_32F) { uint4 shRaw2 = _SplatSH.Load4(shOffset + 32); uint4 shRaw3 = _SplatSH.Load4(shOffset + 48); uint4 shRaw4 = _SplatSH.Load4(shOffset + 64); uint4 shRaw5 = _SplatSH.Load4(shOffset + 80); uint4 shRaw6 = _SplatSH.Load4(shOffset + 96); uint4 shRaw7 = _SplatSH.Load4(shOffset + 112); uint4 shRaw8 = _SplatSH.Load4(shOffset + 128); uint4 shRaw9 = _SplatSH.Load4(shOffset + 144); uint4 shRawA = _SplatSH.Load4(shOffset + 160); uint shRawB = _SplatSH.Load(shOffset + 176); s.sh.sh1.r = asfloat(shRaw0.x); s.sh.sh1.g = asfloat(shRaw0.y); s.sh.sh1.b = asfloat(shRaw0.z); s.sh.sh2.r = asfloat(shRaw0.w); s.sh.sh2.g = asfloat(shRaw1.x); s.sh.sh2.b = asfloat(shRaw1.y); s.sh.sh3.r = asfloat(shRaw1.z); s.sh.sh3.g = asfloat(shRaw1.w); s.sh.sh3.b = asfloat(shRaw2.x); s.sh.sh4.r = asfloat(shRaw2.y); s.sh.sh4.g = asfloat(shRaw2.z); s.sh.sh4.b = asfloat(shRaw2.w); s.sh.sh5.r = asfloat(shRaw3.x); s.sh.sh5.g = asfloat(shRaw3.y); s.sh.sh5.b = asfloat(shRaw3.z); s.sh.sh6.r = asfloat(shRaw3.w); s.sh.sh6.g = asfloat(shRaw4.x); s.sh.sh6.b = asfloat(shRaw4.y); s.sh.sh7.r = asfloat(shRaw4.z); s.sh.sh7.g = asfloat(shRaw4.w); s.sh.sh7.b = asfloat(shRaw5.x); s.sh.sh8.r = asfloat(shRaw5.y); s.sh.sh8.g = asfloat(shRaw5.z); s.sh.sh8.b = asfloat(shRaw5.w); s.sh.sh9.r = asfloat(shRaw6.x); s.sh.sh9.g = asfloat(shRaw6.y); s.sh.sh9.b = asfloat(shRaw6.z); s.sh.sh10.r = asfloat(shRaw6.w); s.sh.sh10.g = asfloat(shRaw7.x); s.sh.sh10.b = asfloat(shRaw7.y); s.sh.sh11.r = asfloat(shRaw7.z); s.sh.sh11.g = asfloat(shRaw7.w); s.sh.sh11.b = asfloat(shRaw8.x); s.sh.sh12.r = asfloat(shRaw8.y); s.sh.sh12.g = asfloat(shRaw8.z); s.sh.sh12.b = asfloat(shRaw8.w); s.sh.sh13.r = asfloat(shRaw9.x); s.sh.sh13.g = asfloat(shRaw9.y); s.sh.sh13.b = asfloat(shRaw9.z); s.sh.sh14.r = asfloat(shRaw9.w); s.sh.sh14.g = asfloat(shRawA.x); s.sh.sh14.b = asfloat(shRawA.y); s.sh.sh15.r = asfloat(shRawA.z); s.sh.sh15.g = asfloat(shRawA.w); s.sh.sh15.b = asfloat(shRawB); } else if (shFormat == VECTOR_FMT_16 || shFormat > VECTOR_FMT_6) { uint4 shRaw2 = _SplatSH.Load4(shOffset + 32); uint4 shRaw3 = _SplatSH.Load4(shOffset + 48); uint4 shRaw4 = _SplatSH.Load4(shOffset + 64); uint3 shRaw5 = _SplatSH.Load3(shOffset + 80); s.sh.sh1.r = f16tof32(shRaw0.x ); s.sh.sh1.g = f16tof32(shRaw0.x >> 16); s.sh.sh1.b = f16tof32(shRaw0.y ); s.sh.sh2.r = f16tof32(shRaw0.y >> 16); s.sh.sh2.g = f16tof32(shRaw0.z ); s.sh.sh2.b = f16tof32(shRaw0.z >> 16); s.sh.sh3.r = f16tof32(shRaw0.w ); s.sh.sh3.g = f16tof32(shRaw0.w >> 16); s.sh.sh3.b = f16tof32(shRaw1.x ); s.sh.sh4.r = f16tof32(shRaw1.x >> 16); s.sh.sh4.g = f16tof32(shRaw1.y ); s.sh.sh4.b = f16tof32(shRaw1.y >> 16); s.sh.sh5.r = f16tof32(shRaw1.z ); s.sh.sh5.g = f16tof32(shRaw1.z >> 16); s.sh.sh5.b = f16tof32(shRaw1.w ); s.sh.sh6.r = f16tof32(shRaw1.w >> 16); s.sh.sh6.g = f16tof32(shRaw2.x ); s.sh.sh6.b = f16tof32(shRaw2.x >> 16); s.sh.sh7.r = f16tof32(shRaw2.y ); s.sh.sh7.g = f16tof32(shRaw2.y >> 16); s.sh.sh7.b = f16tof32(shRaw2.z ); s.sh.sh8.r = f16tof32(shRaw2.z >> 16); s.sh.sh8.g = f16tof32(shRaw2.w ); s.sh.sh8.b = f16tof32(shRaw2.w >> 16); s.sh.sh9.r = f16tof32(shRaw3.x ); s.sh.sh9.g = f16tof32(shRaw3.x >> 16); s.sh.sh9.b = f16tof32(shRaw3.y ); s.sh.sh10.r = f16tof32(shRaw3.y >> 16); s.sh.sh10.g = f16tof32(shRaw3.z ); s.sh.sh10.b = f16tof32(shRaw3.z >> 16); s.sh.sh11.r = f16tof32(shRaw3.w ); s.sh.sh11.g = f16tof32(shRaw3.w >> 16); s.sh.sh11.b = f16tof32(shRaw4.x ); s.sh.sh12.r = f16tof32(shRaw4.x >> 16); s.sh.sh12.g = f16tof32(shRaw4.y ); s.sh.sh12.b = f16tof32(shRaw4.y >> 16); s.sh.sh13.r = f16tof32(shRaw4.z ); s.sh.sh13.g = f16tof32(shRaw4.z >> 16); s.sh.sh13.b = f16tof32(shRaw4.w ); s.sh.sh14.r = f16tof32(shRaw4.w >> 16); s.sh.sh14.g = f16tof32(shRaw5.x ); s.sh.sh14.b = f16tof32(shRaw5.x >> 16); s.sh.sh15.r = f16tof32(shRaw5.y ); s.sh.sh15.g = f16tof32(shRaw5.y >> 16); s.sh.sh15.b = f16tof32(shRaw5.z ); } else if (shFormat == VECTOR_FMT_11) { uint4 shRaw2 = _SplatSH.Load4(shOffset + 32); uint3 shRaw3 = _SplatSH.Load3(shOffset + 48); s.sh.sh1 = DecodePacked_11_10_11(shRaw0.x); s.sh.sh2 = DecodePacked_11_10_11(shRaw0.y); s.sh.sh3 = DecodePacked_11_10_11(shRaw0.z); s.sh.sh4 = DecodePacked_11_10_11(shRaw0.w); s.sh.sh5 = DecodePacked_11_10_11(shRaw1.x); s.sh.sh6 = DecodePacked_11_10_11(shRaw1.y); s.sh.sh7 = DecodePacked_11_10_11(shRaw1.z); s.sh.sh8 = DecodePacked_11_10_11(shRaw1.w); s.sh.sh9 = DecodePacked_11_10_11(shRaw2.x); s.sh.sh10 = DecodePacked_11_10_11(shRaw2.y); s.sh.sh11 = DecodePacked_11_10_11(shRaw2.z); s.sh.sh12 = DecodePacked_11_10_11(shRaw2.w); s.sh.sh13 = DecodePacked_11_10_11(shRaw3.x); s.sh.sh14 = DecodePacked_11_10_11(shRaw3.y); s.sh.sh15 = DecodePacked_11_10_11(shRaw3.z); } else if (shFormat == VECTOR_FMT_6) { s.sh.sh1 = DecodePacked_5_6_5(shRaw0.x); s.sh.sh2 = DecodePacked_5_6_5(shRaw0.x >> 16); s.sh.sh3 = DecodePacked_5_6_5(shRaw0.y); s.sh.sh4 = DecodePacked_5_6_5(shRaw0.y >> 16); s.sh.sh5 = DecodePacked_5_6_5(shRaw0.z); s.sh.sh6 = DecodePacked_5_6_5(shRaw0.z >> 16); s.sh.sh7 = DecodePacked_5_6_5(shRaw0.w); s.sh.sh8 = DecodePacked_5_6_5(shRaw0.w >> 16); s.sh.sh9 = DecodePacked_5_6_5(shRaw1.x); s.sh.sh10 = DecodePacked_5_6_5(shRaw1.x >> 16); s.sh.sh11 = DecodePacked_5_6_5(shRaw1.y); s.sh.sh12 = DecodePacked_5_6_5(shRaw1.y >> 16); s.sh.sh13 = DecodePacked_5_6_5(shRaw1.z); s.sh.sh14 = DecodePacked_5_6_5(shRaw1.z >> 16); s.sh.sh15 = DecodePacked_5_6_5(shRaw1.w); } // if raw data is chunk-relative, convert to final values by interpolating between chunk min/max uint chunkIdx = idx / kChunkSize; if (chunkIdx < _SplatChunkCount) { SplatChunkInfo chunk = _SplatChunks[chunkIdx]; float3 posMin = float3(chunk.posX.x, chunk.posY.x, chunk.posZ.x); float3 posMax = float3(chunk.posX.y, chunk.posY.y, chunk.posZ.y); half3 sclMin = half3(f16tof32(chunk.sclX ), f16tof32(chunk.sclY ), f16tof32(chunk.sclZ )); half3 sclMax = half3(f16tof32(chunk.sclX>>16), f16tof32(chunk.sclY>>16), f16tof32(chunk.sclZ>>16)); half4 colMin = half4(f16tof32(chunk.colR ), f16tof32(chunk.colG ), f16tof32(chunk.colB ), f16tof32(chunk.colA )); half4 colMax = half4(f16tof32(chunk.colR>>16), f16tof32(chunk.colG>>16), f16tof32(chunk.colB>>16), f16tof32(chunk.colA>>16)); half3 shMin = half3(f16tof32(chunk.shR ), f16tof32(chunk.shG ), f16tof32(chunk.shB )); half3 shMax = half3(f16tof32(chunk.shR>>16), f16tof32(chunk.shG>>16), f16tof32(chunk.shB>>16)); s.pos = lerp(posMin, posMax, s.pos); s.scale = lerp(sclMin, sclMax, s.scale); s.scale *= s.scale; s.scale *= s.scale; s.scale *= s.scale; col = lerp(colMin, colMax, col); col.a = InvSquareCentered01(col.a); if (shFormat > VECTOR_FMT_32F && shFormat <= VECTOR_FMT_6) { s.sh.sh1 = lerp(shMin, shMax, s.sh.sh1 ); s.sh.sh2 = lerp(shMin, shMax, s.sh.sh2 ); s.sh.sh3 = lerp(shMin, shMax, s.sh.sh3 ); s.sh.sh4 = lerp(shMin, shMax, s.sh.sh4 ); s.sh.sh5 = lerp(shMin, shMax, s.sh.sh5 ); s.sh.sh6 = lerp(shMin, shMax, s.sh.sh6 ); s.sh.sh7 = lerp(shMin, shMax, s.sh.sh7 ); s.sh.sh8 = lerp(shMin, shMax, s.sh.sh8 ); s.sh.sh9 = lerp(shMin, shMax, s.sh.sh9 ); s.sh.sh10 = lerp(shMin, shMax, s.sh.sh10); s.sh.sh11 = lerp(shMin, shMax, s.sh.sh11); s.sh.sh12 = lerp(shMin, shMax, s.sh.sh12); s.sh.sh13 = lerp(shMin, shMax, s.sh.sh13); s.sh.sh14 = lerp(shMin, shMax, s.sh.sh14); s.sh.sh15 = lerp(shMin, shMax, s.sh.sh15); } } s.opacity = col.a; s.sh.col = col.rgb; return s; } struct SplatViewData { float4 pos; float2 axis1, axis2; uint2 color; // 4xFP16 }; #endif // GAUSSIAN_SPLATTING_HLSL