// Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of NVIDIA CORPORATION nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved. // Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. // Copyright (c) 2001-2004 NovodeX AG. All rights reserved. #include "vector_types.h" #include "foundation/PxVec3.h" #include "foundation/PxVec4.h" #include "stdio.h" #include "assert.h" #include "atomic.cuh" #include "marchingCubesTables.cuh" #include "PxgIsosurfaceData.h" #include "denseGridStandalone.cuh" #include "sparseGridStandalone.cuh" #define ENABLE_KERNEL_LAUNCH_ERROR_CHECK 0 extern "C" __host__ void initIsosurfaceExtractionKernels0() {} __inline__ __device__ void atomicAdd3(PxVec4* p, PxVec3 q) { PxRedAddGlobal(&p->x, q.x); PxRedAddGlobal(&p->y, q.y); PxRedAddGlobal(&p->z, q.z); } __inline__ __device__ PxReal* getDensity(PxIsosurfaceExtractionData& data) { return data.buffer[data.swapState]; } __inline__ __device__ PxU32* getFirstCellTriId(PxIsosurfaceExtractionData& data) { return reinterpret_cast(data.buffer[1 - data.swapState]); } __inline__ __device__ PxReal dotXYZ(const float4& l, const PxVec3& r) { return l.x*r.x + l.y*r.y + l.z*r.z; } PX_FORCE_INLINE __device__ PxReal dotProduct(const float4& l, const float4& r) { return l.x*r.x + l.y*r.y + l.z*r.z + l.w*r.w; } __inline__ __device__ PxReal* getDensity(PxSparseIsosurfaceExtractionData& data) { return data.buffer[data.swapState]; } __inline__ __device__ PxU32* getFirstCellTriId(PxSparseIsosurfaceExtractionData& data) { return reinterpret_cast(data.buffer[1 - data.swapState]); } PX_FORCE_INLINE __device__ float4 scaleW(float4 v, PxReal s) { v.w *= s; return v; } template __device__ void computeParticleDensityUsingSDF(PxVec4* PX_RESTRICT deviceParticlePos, int numParticles, PxU32* PX_RESTRICT phases, PxU32 validPhaseMask, T& data, PxU32* PX_RESTRICT activeIndices, float4* PX_RESTRICT anisotropy1, float4* PX_RESTRICT anisotropy2, float4* PX_RESTRICT anisotropy3, PxReal anisotropyFactor) { PxI32 pNr = blockIdx.x * blockDim.x + threadIdx.x; if (pNr >= numParticles) return; if (activeIndices) pNr = activeIndices[pNr]; if (phases && !(phases[pNr] & validPhaseMask)) return; PxVec3 p = deviceParticlePos[pNr].getXYZ(); int4 xyz = getCellIndexFromParticleAndTransformToLocalCoordinates(data.mGrid, p); if (outOfBounds(data.mGrid, xyz)) return; anisotropyFactor = 1.0f / anisotropyFactor; float4 e1 = anisotropy1 ? scaleW(anisotropy1[pNr], anisotropyFactor) : make_float4(1, 0, 0, 1); float4 e2 = anisotropy2 ? scaleW(anisotropy2[pNr], anisotropyFactor) : make_float4(0, 1, 0, 1); float4 e3 = anisotropy3 ? scaleW(anisotropy3[pNr], anisotropyFactor) : make_float4(0, 0, 1, 1); float4 xTransformInv = make_float4(e1.x / e1.w, e1.y / e1.w, e1.z / e1.w, 0.f); float4 yTransformInv = make_float4(e2.x / e2.w, e2.y / e2.w, e2.z / e2.w, 0.f); float4 zTransformInv = make_float4(e3.x / e3.w, e3.y / e3.w, e3.z / e3.w, 0.f); xTransformInv.w = -dotXYZ(xTransformInv, p); yTransformInv.w = -dotXYZ(yTransformInv, p); zTransformInv.w = -dotXYZ(zTransformInv, p); const PxReal dx = data.getSpacing(); const PxReal invDx = 1.0f / dx; const PxReal scale = 1.0f; // powf(e1.w*e2.w*e3.w, 1.0f / 3.0f); const PxReal kernelSize = data.kernelSize; //lower and upper allow to limit the maximal neighborhood on the grid to have an upper bound in case of very big kernelSizes const PxI32 lower = 2; //1; const PxI32 upper = lower + 1; const PxI32 lowerX = PxMax(xyz.x - lower, (PxI32)PxCeil((p.x - kernelSize) * invDx)); const PxI32 upperX = PxMin(xyz.x + upper, (PxI32)PxFloor((p.x + kernelSize) * invDx)); const PxI32 lowerY = PxMax(xyz.y - lower, (PxI32)PxCeil((p.y - kernelSize) * invDx)); const PxI32 upperY = PxMin(xyz.y + upper, (PxI32)PxFloor((p.y + kernelSize) * invDx)); const PxI32 lowerZ = PxMax(xyz.z - lower, (PxI32)PxCeil((p.z - kernelSize) * invDx)); const PxI32 upperZ = PxMin(xyz.z + upper, (PxI32)PxFloor((p.z + kernelSize) * invDx)); /*if (pNr == 0) printf("xyz: %i, %i, %i, %f, %f\n", upperX - lowerX + 1, upperY - lowerY + 1, upperZ - lowerZ + 1, kernelSize, dx);*/ PxReal* density = getDensity(data); for (PxI32 zi = lowerZ; zi <= upperZ; zi++) { for (PxI32 yi = lowerY; yi <= upperY; yi++) { for (PxI32 xi = lowerX; xi <= upperX; xi++) { float4 worldPos = make_float4(xi * dx, yi * dx, zi * dx, 1.0f); PxVec3 localPos( dotProduct(xTransformInv, worldPos), dotProduct(yTransformInv, worldPos), dotProduct(zTransformInv, worldPos) ); PxReal distance = scale * localPos.magnitude(); PxReal v = distance - kernelSize; if (v < 0.0f) { PxI32 cellNr = getCellIndexSafe(data.mGrid, xyz, xi - xyz.x, yi - xyz.y, zi - xyz.z); if (cellNr == EMPTY_SUBGRID) continue; if (v < density[cellNr]) AtomicMin(&density[cellNr], v); } } } } } extern "C" __global__ void iso_ComputeParticleDensityUsingSDF(PxVec4* PX_RESTRICT deviceParticlePos, int numParticles, PxU32* PX_RESTRICT phases, PxU32 validPhaseMask, PxIsosurfaceExtractionData data, PxU32* PX_RESTRICT activeIndices, float4* PX_RESTRICT anisotropy1, float4* PX_RESTRICT anisotropy2, float4* PX_RESTRICT anisotropy3, PxReal anisotropyFactor) { computeParticleDensityUsingSDF(deviceParticlePos, numParticles, phases, validPhaseMask, data, activeIndices, anisotropy1, anisotropy2, anisotropy3, anisotropyFactor); } extern "C" __global__ void iso_ComputeParticleDensityUsingSDFSparse(PxVec4* deviceParticlePos, int numParticles, PxU32* phases, PxU32 validPhaseMask, PxSparseIsosurfaceExtractionData data, PxU32* PX_RESTRICT activeIndices, float4* anisotropy1, float4* anisotropy2, float4* anisotropy3, PxReal anisotropyFactor) { computeParticleDensityUsingSDF(deviceParticlePos, numParticles, phases, validPhaseMask, data, activeIndices, anisotropy1, anisotropy2, anisotropy3, anisotropyFactor); } template __device__ void computeParticleDensity(PxVec4* deviceParticlePos, int numParticles, PxU32* phases, PxU32 validPhaseMask, T& data) { PxI32 pNr = blockIdx.x * blockDim.x + threadIdx.x; if (pNr >= numParticles) return; if (phases && !(phases[pNr] & validPhaseMask)) return; PxVec3 p = deviceParticlePos[pNr].getXYZ(); int4 xyz = getCellIndexFromParticleAndTransformToLocalCoordinates(data.mGrid, p); if (outOfBounds(data.mGrid, xyz)) return; PxReal dx = data.getSpacing(); PxReal invDx = 1.0f / dx; PxReal h = data.kernelSize; PxReal h2 = h * h; PxReal kernelScale = 315.0f / (64.0f * 3.14159265f * h2 * h2 * h2 * h2 * h) / data.restDensity; //lower and upper allow to limit the maximal neighborhood on the grid to have an upper bound in case of very big kernelSizes const PxI32 lower = 2; //1; const PxI32 upper = lower + 1; const PxI32 lowerX = PxMax(xyz.x - lower, (PxI32)PxFloor((p.x - h) * invDx)); const PxI32 upperX = PxMin(xyz.x + upper, (PxI32)PxCeil((p.x + h) * invDx)); const PxI32 lowerY = PxMax(xyz.y - lower, (PxI32)PxFloor((p.y - h) * invDx)); const PxI32 upperY = PxMin(xyz.y + upper, (PxI32)PxCeil((p.y + h) * invDx)); const PxI32 lowerZ = PxMax(xyz.z - lower, (PxI32)PxFloor((p.z - h) * invDx)); const PxI32 upperZ = PxMin(xyz.z + upper, (PxI32)PxCeil((p.z + h) * invDx)); PxReal* density = getDensity(data); for (PxI32 xi = lowerX; xi <= upperX; xi++) { for (PxI32 yi = lowerY; yi <= upperY; yi++) { for (PxI32 zi = lowerZ; zi <= upperZ; zi++) { PxVec3 cellPos = PxVec3(xi * dx, yi * dx, zi * dx); PxVec3 r = p - cellPos; PxReal r2 = r.magnitudeSquared(); if (r2 < h2) { PxI32 cellNr = getCellIndexSafe(data.mGrid, xyz, xi - xyz.x, yi - xyz.y, zi - xyz.z); // getCellNr(data, xi, yi, zi); if (cellNr == EMPTY_SUBGRID) continue; PxReal w = (h2 - r2); w = kernelScale * w * w * w; PxRedAddGlobal(&density[cellNr], w); } } } } } extern "C" __global__ void iso_ComputeParticleDensity(PxVec4* deviceParticlePos, int numParticles, PxU32* phases, PxU32 validPhaseMask, PxIsosurfaceExtractionData data) { computeParticleDensity(deviceParticlePos, numParticles, phases, validPhaseMask, data); } extern "C" __global__ void iso_ComputeParticleDensitySparse(PxVec4* deviceParticlePos, int numParticles, PxU32* phases, PxU32 validPhaseMask, PxSparseIsosurfaceExtractionData data) { computeParticleDensity(deviceParticlePos, numParticles, phases, validPhaseMask, data); } template __device__ void countCellVerts(T& data) { int threadIndex = blockIdx.x * blockDim.x + threadIdx.x; if (outOfRange(data.mGrid, threadIndex)) return; int4 xyz = getGridCoordinates(data.mGrid, threadIndex); data.firstCellVert[threadIndex] = 0; if (outOfBounds(data.mGrid, xyz)) return; PxReal* density = getDensity(data); PxReal d0 = getGridValue(data.mGrid, density, xyz, 0, 0, 0); PxReal ds[3]; ds[0] = getGridValue(data.mGrid, density, xyz, 1, 0, 0); ds[1] = getGridValue(data.mGrid, density, xyz, 0, 1, 0); ds[2] = getGridValue(data.mGrid, density, xyz, 0, 0, 1); int num = 0; #pragma unroll for (int dim = 0; dim < 3; dim++) { PxReal d = ds[dim]; if ((d0 <= data.threshold && d >= data.threshold) || (d <= data.threshold && d0 >= data.threshold)) num++; } data.firstCellVert[threadIndex] = num; } extern "C" __global__ void iso_CountCellVerts(PxIsosurfaceExtractionData data) { countCellVerts(data); } extern "C" __global__ void iso_CountCellVertsSparse(PxSparseIsosurfaceExtractionData data) { countCellVerts(data); } template __device__ void countCellVertsDC(T& data) { int threadIndex = blockIdx.x * blockDim.x + threadIdx.x; if (outOfRange(data.mGrid, threadIndex)) return; int4 xyz = getGridCoordinates(data.mGrid, threadIndex); data.firstCellVert[threadIndex] = 0; if (outOfBounds(data.mGrid, xyz)) return; PxReal* density = getDensity(data); /*PxU32 aboveThresholdCounter = 0; PxU32 belowThresholdCounter = 0; PxU32 counter = 0; for (int x = 0; x <= 1; ++x) for (int y = 0; y <= 1; ++y) for (int z = 0; z <= 1; ++z) { PxReal d = getGridValue(data, density, xyz, x, y, z); if (d >= data.threshold) aboveThresholdCounter++; if (d <= data.threshold) belowThresholdCounter++; if (aboveThresholdCounter > 0 && belowThresholdCounter > 0) { counter = 1; break; } }*/ PxReal corners[2][2][2]; for (int x = 0; x <= 1; ++x) for (int y = 0; y <= 1; ++y) for (int z = 0; z <= 1; ++z) { corners[x][y][z] = getGridValue(data.mGrid, density, xyz, x, y, z); } PxU32 counter = 0; for (int a = 0; a <= 1; ++a) for (int b = 0; b <= 1; ++b) { PxReal p = corners[a][b][0]; PxReal q = corners[a][b][1]; if ((p <= data.threshold && q >= data.threshold) || (q <= data.threshold && p >= data.threshold)) { ++counter; } } for (int a = 0; a <= 1; ++a) for (int b = 0; b <= 1; ++b) { PxReal p = corners[b][0][a]; PxReal q = corners[b][1][a]; if ((p <= data.threshold && q >= data.threshold) || (q <= data.threshold && p >= data.threshold)) { ++counter; } } for (int a = 0; a <= 1; ++a) for (int b = 0; b <= 1; ++b) { PxReal p = corners[0][a][b]; PxReal q = corners[1][a][b]; if ((p <= data.threshold && q >= data.threshold) || (q <= data.threshold && p >= data.threshold)) { ++counter; } } if (counter > 0) counter = 1; data.firstCellVert[threadIndex] = counter; } extern "C" __global__ void iso_CountCellVertsDC(PxIsosurfaceExtractionData data) { countCellVertsDC(data); } extern "C" __global__ void iso_CountCellVertsDCSparse(PxSparseIsosurfaceExtractionData data) { countCellVertsDC(data); } template __device__ void createVerts(T& data) { int threadIndex = blockIdx.x * blockDim.x + threadIdx.x; if (outOfRange(data.mGrid, threadIndex)) return; if (isLastCell(data.mGrid, threadIndex)) data.numVerticesNumIndices[0] = PxMin(data.maxVerts, data.firstCellVert[threadIndex]); int4 xyz = getGridCoordinates(data.mGrid, threadIndex); if (outOfBounds(data.mGrid, xyz)) return; PxU32 first = data.firstCellVert[threadIndex]; PxVec3 p = getLocation(data.mGrid, xyz); PxReal* density = getDensity(data); PxReal d0 = getGridValue(data.mGrid, density, xyz, 0, 0, 0); PxReal ds[3]; ds[0] = getGridValue(data.mGrid, density, xyz, 1, 0, 0); ds[1] = getGridValue(data.mGrid, density, xyz, 0, 1, 0); ds[2] = getGridValue(data.mGrid, density, xyz, 0, 0, 1); PxU32 encoded = first; #pragma unroll for (int dim = 0; dim < 3; dim++) { PxReal d = ds[dim]; if ((d0 <= data.threshold && d >= data.threshold) || (d <= data.threshold && d0 >= data.threshold)) { PxReal t = (d != d0) ? PxClamp((data.threshold - d0) / (d - d0), 0.0f, 1.0f) : 0.5f; PxU32 id = first++; encoded |= 1 << (29 + dim); PxVec3 off(0.0f); off[dim] = t * data.getSpacing(); if (id < data.maxVerts) data.verts[id] = PxVec4(p + off, 0.0f); } } data.firstCellVert[threadIndex] = encoded; if (isLastCell(data.mGrid, threadIndex)) data.numVerticesNumIndices[0] = PxMin(data.maxVerts, first); //This allows to obtain the total number of vertices in the last array element } extern "C" __global__ void iso_CreateVerts(PxIsosurfaceExtractionData data) { createVerts(data); } extern "C" __global__ void iso_CreateVertsSparse(PxSparseIsosurfaceExtractionData data) { createVerts(data); } template __device__ void createVertsDC(T& data) { int threadIndex = blockIdx.x * blockDim.x + threadIdx.x; if (outOfRange(data.mGrid, threadIndex)) return; if (isLastCell(data.mGrid, threadIndex)) data.numVerticesNumIndices[0] = PxMin(data.maxVerts, data.firstCellVert[threadIndex]); int4 xyz = getGridCoordinates(data.mGrid, threadIndex); if (outOfBounds(data.mGrid, xyz)) return; PxU32 id = data.firstCellVert[threadIndex]; if (!isLastCell(data.mGrid, threadIndex) && id == data.firstCellVert[threadIndex + 1]) return; PxVec3 p = getLocation(data.mGrid, xyz); PxReal* density = getDensity(data); /*PxReal counter = 0; PxVec3 sum(0.0f); for (int x = 0; x <= 1; ++x) for (int y = 0; y <= 1; ++y) for (int z = 0; z <= 1; ++z) { PxReal d = getGridValue(data, density, xyz, x, y, z) - data.threshold; PxReal inverseDistance = 1.0f / PxMax(1e-6f, PxAbs(d)); counter += inverseDistance; sum += inverseDistance * PxVec3(x, y, z); } p = p + sum * (data.getSpacing() / counter);*/ PxReal corners[2][2][2]; for (int x = 0; x <= 1; ++x) for (int y = 0; y <= 1; ++y) for (int z = 0; z <= 1; ++z) { corners[x][y][z] = getGridValue(data.mGrid, density, xyz, x, y, z); } PxU32 counter = 0; PxVec3 sum(0.0f); for (int a = 0; a <= 1; ++a) for (int b = 0; b <= 1; ++b) { PxReal p = corners[a][b][0]; PxReal q = corners[a][b][1]; if ((p <= data.threshold && q >= data.threshold) || (q <= data.threshold && p >= data.threshold)) { PxReal t = (q != p) ? PxClamp((data.threshold - p) / (q - p), 0.0f, 1.0f) : 0.5f; sum += PxVec3(a, b, t); ++counter; } } for (int a = 0; a <= 1; ++a) for (int b = 0; b <= 1; ++b) { PxReal p = corners[b][0][a]; PxReal q = corners[b][1][a]; if ((p <= data.threshold && q >= data.threshold) || (q <= data.threshold && p >= data.threshold)) { PxReal t = (q != p) ? PxClamp((data.threshold - p) / (q - p), 0.0f, 1.0f) : 0.5f; sum += PxVec3(b, t, a); ++counter; } } for (int a = 0; a <= 1; ++a) for (int b = 0; b <= 1; ++b) { PxReal p = corners[0][a][b]; PxReal q = corners[1][a][b]; if ((p <= data.threshold && q >= data.threshold) || (q <= data.threshold && p >= data.threshold)) { PxReal t = (q != p) ? PxClamp((data.threshold - p) / (q - p), 0.0f, 1.0f) : 0.5f; sum += PxVec3(t, a, b); ++counter; } } p = p + sum * (data.getSpacing() / counter); if (counter > 0) data.verts[id] = PxVec4(p, 0.0f); if (isLastCell(data.mGrid, threadIndex)) data.numVerticesNumIndices[0] = PxMin(data.maxVerts, id + 1); //This allows to obtain the total number of vertices in the last array element } extern "C" __global__ void iso_CreateVertsDC(PxIsosurfaceExtractionData data) { createVertsDC(data); } extern "C" __global__ void iso_CreateVertsDCSparse(PxSparseIsosurfaceExtractionData data) { createVertsDC(data); } __device__ PxU32 getDecodedId(PxU32 encoded, PxU32 dim) { if (!(encoded & (1 << (29 + dim)))) return 0xFFFFFFFF; PxU32 id = encoded & 0x1FFFFFFF; if (dim > 0 && (encoded & (1 << (29 + 0)))) id++; if (dim > 1 && (encoded & (1 << (29 + 1)))) id++; return id; } template PX_FORCE_INLINE __device__ bool constructTriangle(T& data, int4& xyz, int i, PxU32 firstIn, PxU32 firstOut, int* buffer = NULL) { #pragma unroll for (int j = 0; j < 3; ++j) { int eid = marchingCubesIds[firstIn + i + j]; int edgeNr = marchingCubesEdgeLocations[eid][3]; const PxU32 nr = getCellIndex(data.mGrid, xyz, marchingCubesEdgeLocations[eid][0], marchingCubesEdgeLocations[eid][1], marchingCubesEdgeLocations[eid][2], false); if (nr == EMPTY_SUBGRID) return false; PxU32 id = getDecodedId(data.firstCellVert[nr], edgeNr); if (id >= data.maxVerts || firstOut + j >= data.maxTriIds) return false; if (buffer) buffer[j] = id; } return true; } template __device__ void countTriIds(T& data) { int threadIndex = blockIdx.x * blockDim.x + threadIdx.x; if (outOfRange(data.mGrid, threadIndex)) return; int4 xyz = getGridCoordinates(data.mGrid, threadIndex); PxU32* firstCellTriId = getFirstCellTriId(data); firstCellTriId[threadIndex] = 0; if (outOfBounds(data.mGrid, xyz)) return; PxReal* density = getDensity(data); int code = 0; #pragma unroll for (int i = 0; i < 8; i++) { if (getGridValue(data.mGrid, density, xyz, marchingCubeCorners[i][0], marchingCubeCorners[i][1], marchingCubeCorners[i][2]) >= data.threshold) code |= (1 << i); } int firstIn = firstMarchingCubesId[code]; int num = firstMarchingCubesId[code + 1] - firstIn; PxI32 counter = 0; for (int i = 0; i < num; i += 3) { if (!constructTriangle(data, xyz, i, firstIn, 0)) continue; counter += 3; } firstCellTriId[threadIndex] = counter; } extern "C" __global__ void iso_CountTriIds(PxIsosurfaceExtractionData data) { countTriIds(data); } extern "C" __global__ void iso_CountTriIdsSparse(PxSparseIsosurfaceExtractionData data) { countTriIds(data); } template __device__ void countTriIdsDC(T& data) { int threadIndex = blockIdx.x * blockDim.x + threadIdx.x; if (outOfRange(data.mGrid, threadIndex)) return; int4 xyz = getGridCoordinates(data.mGrid, threadIndex); PxU32* firstCellTriId = getFirstCellTriId(data); firstCellTriId[threadIndex] = 0; if (outOfBounds(data.mGrid, xyz)) return; PxReal* density = getDensity(data); PxReal d0 = getGridValue(data.mGrid, density, xyz, 0, 0, 0); PxReal ds[3]; ds[0] = getGridValue(data.mGrid, density, xyz, 1, 0, 0); ds[1] = getGridValue(data.mGrid, density, xyz, 0, 1, 0); ds[2] = getGridValue(data.mGrid, density, xyz, 0, 0, 1); int num = 0; #pragma unroll for (int dim = 0; dim < 3; dim++) { PxReal d = ds[dim]; if ((d0 <= data.threshold && d >= data.threshold) || (d <= data.threshold && d0 >= data.threshold)) num++; } firstCellTriId[threadIndex] = 2 * 3 * num; } extern "C" __global__ void iso_CountTriIdsDC(PxIsosurfaceExtractionData data) { countTriIdsDC(data); } extern "C" __global__ void iso_CountTriIdsDCSparse(PxSparseIsosurfaceExtractionData data) { countTriIdsDC(data); } template __device__ void createTriIds(T& data, bool flipTriangleOrientation) { int threadIndex = blockIdx.x * blockDim.x + threadIdx.x; if (outOfRange(data.mGrid, threadIndex)) return; const PxU32* firstCellTriId = getFirstCellTriId(data); if (isLastCell(data.mGrid, threadIndex)) data.numVerticesNumIndices[1] = PxMin(data.maxTriIds, firstCellTriId[threadIndex]); int4 xyz = getGridCoordinates(data.mGrid, threadIndex); if (outOfBounds(data.mGrid, xyz)) return; PxU32 firstOut = firstCellTriId[threadIndex]; if (!isLastCell(data.mGrid, threadIndex) && firstOut == firstCellTriId[threadIndex + 1]) return; //This means no slice reserved for this instance, this is known because of the already completed counting kernel const PxReal* density = getDensity(data); int code = 0; #pragma unroll for (int i = 0; i < 8; i++) { if (getGridValue(data.mGrid, density, xyz, marchingCubeCorners[i][0], marchingCubeCorners[i][1], marchingCubeCorners[i][2]) >= data.threshold) code |= (1 << i); } int firstIn = firstMarchingCubesId[code]; int num = firstMarchingCubesId[code + 1] - firstIn; int buffer[3]; for (int i = 0; i < num; i += 3) { if (!constructTriangle(data, xyz, i, firstIn, firstOut, buffer)) continue; if (flipTriangleOrientation) { for (int j = 2; j >= 0; --j) data.triIds[firstOut++] = buffer[j]; } else { for (int j = 0; j < 3; ++j) data.triIds[firstOut++] = buffer[j]; } } if (isLastCell(data.mGrid, threadIndex)) data.numVerticesNumIndices[1] = PxMin(data.maxTriIds, firstOut); //This allows to obtain the total number of indices in the last array element } extern "C" __global__ void iso_CreateTriIds(PxIsosurfaceExtractionData data, bool flipTriangleOrientation) { createTriIds(data, flipTriangleOrientation); } extern "C" __global__ void iso_CreateTriIdsSparse(PxSparseIsosurfaceExtractionData data, bool flipTriangleOrientation) { createTriIds(data, flipTriangleOrientation); } __constant__ int offsets[3][3][3] = { { {0,-1,0}, {0,-1,-1},{0,0,-1} }, { {0,0,-1}, {-1,0,-1},{-1,0,0} } , { {-1,0,0}, {-1,-1,0},{0,-1,0} } }; __constant__ int projections[3][2] = { {1, 2}, {2, 0}, {0, 1} }; __device__ float directionSign(int principalDirection, const PxVec3& start, const PxVec3& middle, const PxVec3& end) { PxReal a0 = middle[projections[principalDirection][0]] - start[projections[principalDirection][0]]; PxReal a1 = middle[projections[principalDirection][1]] - start[projections[principalDirection][1]]; PxReal b0 = end[projections[principalDirection][0]] - middle[projections[principalDirection][0]]; PxReal b1 = end[projections[principalDirection][1]] - middle[projections[principalDirection][1]]; return a0 * b1 - a1 * b0; } __device__ int indexOfConcaveCorner(int principalDirection, const PxVec3& a, const PxVec3& b, const PxVec3& c, const PxVec3& d) { float minimum = 0; int result = -1; float s = directionSign(principalDirection, a, b, c); if (s <= minimum) { minimum = s; result = 1; } s = directionSign(principalDirection, b, c, d); if (s <= minimum) { minimum = s; result = 2; } s = directionSign(principalDirection, c, d, a); if (s <= minimum) { minimum = s; result = 3; } s = directionSign(principalDirection, d, a, b); if (s <= minimum) { minimum = s; result = 0; } return result; } template __device__ void createTriIdsDC(T& data, bool flipTriangleOrientation) { int threadIndex = blockIdx.x * blockDim.x + threadIdx.x; if (outOfRange(data.mGrid, threadIndex)) return; int4 xyz = getGridCoordinates(data.mGrid, threadIndex); PxU32* firstCellTriId = getFirstCellTriId(data); if (isLastCell(data.mGrid, threadIndex)) data.numVerticesNumIndices[1] = PxMin(data.maxTriIds, firstCellTriId[threadIndex]); if (outOfBounds(data.mGrid, xyz)) return; PxReal* density = getDensity(data); PxReal d0 = getGridValue(data.mGrid, density, xyz, 0, 0, 0); PxU32 firstOut = firstCellTriId[threadIndex]; PxReal ds[3]; ds[0] = getGridValue(data.mGrid, density, xyz, 1, 0, 0); ds[1] = getGridValue(data.mGrid, density, xyz, 0, 1, 0); ds[2] = getGridValue(data.mGrid, density, xyz, 0, 0, 1); int buffer[4]; buffer[0] = data.firstCellVert[threadIndex]; PxVec3 v0 = data.verts[buffer[0]].getXYZ(); #pragma unroll for (int dim = 0; dim < 3; dim++) { PxReal d = ds[dim]; bool b1 = d0 <= data.threshold && d >= data.threshold; bool b2 = d <= data.threshold && d0 >= data.threshold; if (b1 || b2) { bool flip = flipTriangleOrientation == b1; bool skip = false; #pragma unroll for (int i = 0; i < 3; ++i) { int id = getCellIndexSafe(data.mGrid, xyz, offsets[dim][i][0], offsets[dim][i][1], offsets[dim][i][2], false); if (id == EMPTY_SUBGRID) skip = true; buffer[i + 1] = data.firstCellVert[id]; } if (skip) continue; #pragma unroll for (int i = 0; i < 4; ++i) if (buffer[i] >= data.maxVerts) continue; //PxReal d02 = (data.verts[buffer[2]].getXYZ() - v0).magnitudeSquared(); //PxReal d13 = (data.verts[buffer[3]].getXYZ() - data.verts[buffer[1]].getXYZ()).magnitudeSquared(); int shift = PxMax(0, indexOfConcaveCorner(dim, v0, data.verts[buffer[1]].getXYZ(), data.verts[buffer[2]].getXYZ(), data.verts[buffer[3]].getXYZ())) % 2; //int shift = d02 < d13 ? 0 : 1;//TODO: select better diagonal //shift = 1 - shift; //Split the quad into two triangles for (int i = 0; i < 2; ++i) { if (firstOut + 3 >= data.maxTriIds) break; data.triIds[firstOut++] = buffer[shift]; if (flip) { for (int j = 2; j >= 1; --j) data.triIds[firstOut++] = buffer[(i + j + shift) % 4]; } else { for (int j = 1; j < 3; ++j) data.triIds[firstOut++] = buffer[(i + j + shift) % 4]; } } } } if (isLastCell(data.mGrid, threadIndex)) data.numVerticesNumIndices[1] = PxMin(data.maxTriIds, firstOut); //This allows to obtain the total number of indices in the last array element } extern "C" __global__ void iso_CreateTriIdsDC(PxIsosurfaceExtractionData data, bool flipTriangleOrientation) { createTriIdsDC(data, flipTriangleOrientation); } extern "C" __global__ void iso_CreateTriIdsDCSparse(PxSparseIsosurfaceExtractionData data, bool flipTriangleOrientation) { createTriIdsDC(data, flipTriangleOrientation); } //https://stackoverflow.com/questions/3380628/fast-arc-cos-algorithm PX_FORCE_INLINE __device__ PxReal approxAcos(PxReal x) { return PxMax(0.0f, (-0.69813170079773212f * x * x - 0.87266462599716477f) * x + 1.5707963267948966f); //return (-0.69813170079773212f * x * x - 0.87266462599716477f) * x + 1.5707963267948966f; } PX_FORCE_INLINE __device__ PxReal angle(const PxVec3 v1, const PxVec3 v2) { return approxAcos(v1.dot(v2) * __frsqrt_rn(v1.magnitudeSquared() * v2.magnitudeSquared())); } PX_FORCE_INLINE __device__ PxReal areaTimes2(const PxVec3 v1, const PxVec3 v2) { return v1.cross(v2).magnitude(); } __device__ void smoothVerts(const PxVec4* vertices, PxVec4* output, const PxU32* triIds, const PxU32 numTriIds) { const PxU32 numTriangles = numTriIds / 3; for (PxI32 triNr = blockIdx.x * blockDim.x + threadIdx.x; triNr < numTriangles; triNr += blockDim.x * gridDim.x) { for (int i = 0; i < 3; i++) { int id0 = triIds[3 * triNr + i]; int id1 = triIds[3 * triNr + (i + 1) % 3]; int id2 = triIds[3 * triNr + (i + 2) % 3]; PxVec3 p0 = vertices[id0].getXYZ(); PxVec3 p1 = vertices[id1].getXYZ(); PxVec3 p2 = vertices[id2].getXYZ(); PxVec3 p01 = p1 - p0; PxVec3 p02 = p2 - p0; PxReal a0 = angle(p01, p02); //PxReal a0 = ((0.5f * (p1 + p2)) - p0).magnitude(); //PxReal a0 = PxSqrt(areaTimes2(p01, p02)); //PxReal a0 = 1.0f; PxRedAddGlobal(&output[id0].w, a0); atomicAdd3(&output[id0], a0 * (0.5f * (p1 + p2))); } } } extern "C" __global__ void iso_SmoothVerts(const PxVec4* PX_RESTRICT vertices, PxVec4* PX_RESTRICT output, const PxU32* PX_RESTRICT triIds, const PxU32* PX_RESTRICT numTriIds) { smoothVerts(vertices, output, triIds, *numTriIds); } __device__ void averageVerts(PxVec4* PX_RESTRICT vertices, PxVec4* PX_RESTRICT output, const PxU32 length, PxReal blendWeight = 1.0f) { const PxU32 numVertices = length; for (PxI32 vNr = blockIdx.x * blockDim.x + threadIdx.x; vNr < numVertices; vNr += blockDim.x * gridDim.x) { PxVec4 v = vertices[vNr]; if (v.w > 0) { PxVec3 x = output[vNr].getXYZ(); output[vNr] = PxVec4((1.0f - blendWeight) * x + blendWeight * (v.getXYZ() / v.w), 0.0f); } //Clear the buffer array to use it for followingsmoothing passes vertices[vNr] = PxVec4(0.0f); } } extern "C" __global__ void iso_AverageVerts(PxVec4* PX_RESTRICT vertices, PxVec4* PX_RESTRICT output, const PxU32* PX_RESTRICT length, PxReal blendWeight) { averageVerts(vertices, output, *length, blendWeight); } __device__ PxVec3 normalize(PxVec3 v) { const float m = v.magnitude(); if (m > 0.0f) v /= m; return v; } template __device__ void smoothNormals(T& data) { const PxU32 numTriangles = data.numVerticesNumIndices[1] / 3; for (PxI32 triNr = blockIdx.x * blockDim.x + threadIdx.x; triNr < numTriangles; triNr += blockDim.x * gridDim.x) { int id0 = data.triIds[3 * triNr]; int id1 = data.triIds[3 * triNr + 1]; int id2 = data.triIds[3 * triNr + 2]; PxVec3 n0 = data.normals[id0].getXYZ(); PxVec3 n1 = data.normals[id1].getXYZ(); PxVec3 n2 = data.normals[id2].getXYZ(); PxVec3 n = normalize(n0 + n1 + n2); atomicAdd3(&data.smoothingBuffer[id0], n); atomicAdd3(&data.smoothingBuffer[id1], n); atomicAdd3(&data.smoothingBuffer[id2], n); } } extern "C" __global__ void iso_SmoothNormals(PxIsosurfaceExtractionData data) { smoothNormals(data); } extern "C" __global__ void iso_SmoothNormalsSparse(PxSparseIsosurfaceExtractionData data) { smoothNormals(data); } __device__ PxVec4 normalize(const PxVec4& v) { PxReal s = 1.0f / PxMax(1e-6f, PxSqrt(v.x*v.x + v.y*v.y + v.z*v.z)); return PxVec4(s*v.x, s*v.y, s*v.z, v.w); } template __device__ void smoothNormalsNormalize(T& data) { const PxU32 numVertices = data.numVerticesNumIndices[0]; for (PxI32 vNr = blockIdx.x * blockDim.x + threadIdx.x; vNr < numVertices; vNr += blockDim.x * gridDim.x) { data.normals[vNr] = normalize(data.smoothingBuffer[vNr]); data.smoothingBuffer[vNr] = PxVec4(0, 0, 0, 0); } } extern "C" __global__ void iso_SmoothNormalsNormalize(PxIsosurfaceExtractionData data) { smoothNormalsNormalize(data); } extern "C" __global__ void iso_SmoothNormalsNormalizeSparse(PxSparseIsosurfaceExtractionData data) { smoothNormalsNormalize(data); } template __device__ void computeNormals(T& data) { const PxU32 numTriangles = data.numVerticesNumIndices[1] / 3; for (PxI32 triNr = blockIdx.x * blockDim.x + threadIdx.x; triNr < numTriangles; triNr += blockDim.x * gridDim.x) { /*for (int i = 0; i < 3; i++) { int id0 = data.triIds[3 * triNr + i]; int id1 = data.triIds[3 * triNr + (i + 1) % 3]; int id2 = data.triIds[3 * triNr + (i + 2) % 3]; PxVec3 p0 = data.verts[id0].getXYZ(); PxVec3 p1 = data.verts[id1].getXYZ(); PxVec3 p2 = data.verts[id2].getXYZ(); PxVec3 p01 = p1 - p0; PxVec3 p02 = p2 - p0; PxReal a0 = angle(p01, p02); PxVec3 n = (p1 - p0).cross(p2 - p0); PxReal l2 = n.magnitudeSquared(); if (l2 > 1e-6f) { atomicAdd3(&data.normals[id0], (a0 * __frsqrt_rn(l2)) * n); } }*/ int id0 = data.triIds[3 * triNr]; int id1 = data.triIds[3 * triNr + 1]; int id2 = data.triIds[3 * triNr + 2]; PxVec3 p0 = data.verts[id0].getXYZ(); PxVec3 p1 = data.verts[id1].getXYZ(); PxVec3 p2 = data.verts[id2].getXYZ(); PxVec3 n = (p1 - p0).cross(p2 - p0); atomicAdd3(&data.normals[id0], n); atomicAdd3(&data.normals[id1], n); atomicAdd3(&data.normals[id2], n); } } extern "C" __global__ void iso_ComputeNormals(PxIsosurfaceExtractionData data) { computeNormals(data); } extern "C" __global__ void iso_ComputeNormalsSparse(PxSparseIsosurfaceExtractionData data) { computeNormals(data); } template __device__ void normalizeNormals(T& data) { const PxU32 numVertices = data.numVerticesNumIndices[0]; for (PxI32 vNr = blockIdx.x * blockDim.x + threadIdx.x; vNr < numVertices; vNr += blockDim.x * gridDim.x) data.normals[vNr] = normalize(data.normals[vNr]); } extern "C" __global__ void iso_NormalizeNormals(PxIsosurfaceExtractionData data) { normalizeNormals(data); } extern "C" __global__ void iso_NormalizeNormalsSparse(PxSparseIsosurfaceExtractionData data) { normalizeNormals(data); } template __device__ void gridFilterGauss(T& data, PxReal neighborWeight) { int threadIndex = blockIdx.x * blockDim.x + threadIdx.x; if (outOfRange(data.mGrid, threadIndex)) return; //Gaussian blur - use fact that it is a separable filter //Radius is relative to cell size PxReal w[3]; w[0] = neighborWeight; // expf(-1.0f / (2.0f*radius*radius)); w[1] = 1.0f; w[2] = w[0]; PxReal weightSum = 0; PxReal m = 0; int4 xyz = getGridCoordinates(data.mGrid, threadIndex); PxReal* source = data.buffer[data.swapState]; PxReal* destination = data.buffer[1 - data.swapState]; for (int x = -1; x <= 1; ++x) for (int y = -1; y <= 1; ++y) for (int z = -1; z <= 1; ++z) { PxReal weight = w[x + 1] * w[y + 1] * w[z + 1]; weightSum += weight; m += weight * getGridValueSafe(data.mGrid, source, xyz, x, y, z); } destination[threadIndex] = m / weightSum; } extern "C" __global__ void iso_GridFilterGauss(PxIsosurfaceExtractionData data, PxReal neighborWeight) { gridFilterGauss(data, neighborWeight); } extern "C" __global__ void iso_GridFilterGaussSparse(PxSparseIsosurfaceExtractionData data, PxReal neighborWeight) { gridFilterGauss(data, neighborWeight); } template __device__ void gridFilterDilateErode(T& data, PxReal sign) { int threadIndex = blockIdx.x * blockDim.x + threadIdx.x; if (outOfRange(data.mGrid, threadIndex)) return; int4 xyz = getGridCoordinates(data.mGrid, threadIndex); PxReal* source = data.buffer[data.swapState]; PxReal* destination = data.buffer[1 - data.swapState]; PxReal m = sign * source[threadIndex]; for (int x = -1; x <= 1; ++x) for (int y = -1; y <= 1; ++y) for (int z = -1; z <= 1; ++z) m = PxMax(m, sign * getGridValueSafe(data.mGrid, source, xyz, x, y, z)); destination[threadIndex] = sign * m; } extern "C" __global__ void iso_GridFilterDilateErode(PxIsosurfaceExtractionData data, PxReal sign) { gridFilterDilateErode(data, sign); } extern "C" __global__ void iso_GridFilterDilateErodeSparse(PxSparseIsosurfaceExtractionData data, PxReal sign) { gridFilterDilateErode(data, sign); }