Files
XCEngine/engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/isosurfaceExtraction.cu

1165 lines
35 KiB
Plaintext

// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "vector_types.h"
#include "foundation/PxVec3.h"
#include "foundation/PxVec4.h"
#include "stdio.h"
#include "assert.h"
#include "atomic.cuh"
#include "marchingCubesTables.cuh"
#include "PxgIsosurfaceData.h"
#include "denseGridStandalone.cuh"
#include "sparseGridStandalone.cuh"
#define ENABLE_KERNEL_LAUNCH_ERROR_CHECK 0
extern "C" __host__ void initIsosurfaceExtractionKernels0() {}
__inline__ __device__ void atomicAdd3(PxVec4* p, PxVec3 q)
{
PxRedAddGlobal(&p->x, q.x);
PxRedAddGlobal(&p->y, q.y);
PxRedAddGlobal(&p->z, q.z);
}
__inline__ __device__ PxReal* getDensity(PxIsosurfaceExtractionData& data)
{
return data.buffer[data.swapState];
}
__inline__ __device__ PxU32* getFirstCellTriId(PxIsosurfaceExtractionData& data)
{
return reinterpret_cast<PxU32*>(data.buffer[1 - data.swapState]);
}
__inline__ __device__ PxReal dotXYZ(const float4& l, const PxVec3& r)
{
return l.x*r.x + l.y*r.y + l.z*r.z;
}
PX_FORCE_INLINE __device__ PxReal dotProduct(const float4& l, const float4& r)
{
return l.x*r.x + l.y*r.y + l.z*r.z + l.w*r.w;
}
__inline__ __device__ PxReal* getDensity(PxSparseIsosurfaceExtractionData& data)
{
return data.buffer[data.swapState];
}
__inline__ __device__ PxU32* getFirstCellTriId(PxSparseIsosurfaceExtractionData& data)
{
return reinterpret_cast<PxU32*>(data.buffer[1 - data.swapState]);
}
PX_FORCE_INLINE __device__ float4 scaleW(float4 v, PxReal s)
{
v.w *= s;
return v;
}
template<typename T>
__device__ void computeParticleDensityUsingSDF(PxVec4* PX_RESTRICT deviceParticlePos, int numParticles, PxU32* PX_RESTRICT phases, PxU32 validPhaseMask, T& data, PxU32* PX_RESTRICT activeIndices,
float4* PX_RESTRICT anisotropy1, float4* PX_RESTRICT anisotropy2, float4* PX_RESTRICT anisotropy3, PxReal anisotropyFactor)
{
PxI32 pNr = blockIdx.x * blockDim.x + threadIdx.x;
if (pNr >= numParticles)
return;
if (activeIndices)
pNr = activeIndices[pNr];
if (phases && !(phases[pNr] & validPhaseMask))
return;
PxVec3 p = deviceParticlePos[pNr].getXYZ();
int4 xyz = getCellIndexFromParticleAndTransformToLocalCoordinates(data.mGrid, p);
if (outOfBounds(data.mGrid, xyz))
return;
anisotropyFactor = 1.0f / anisotropyFactor;
float4 e1 = anisotropy1 ? scaleW(anisotropy1[pNr], anisotropyFactor) : make_float4(1, 0, 0, 1);
float4 e2 = anisotropy2 ? scaleW(anisotropy2[pNr], anisotropyFactor) : make_float4(0, 1, 0, 1);
float4 e3 = anisotropy3 ? scaleW(anisotropy3[pNr], anisotropyFactor) : make_float4(0, 0, 1, 1);
float4 xTransformInv = make_float4(e1.x / e1.w, e1.y / e1.w, e1.z / e1.w, 0.f);
float4 yTransformInv = make_float4(e2.x / e2.w, e2.y / e2.w, e2.z / e2.w, 0.f);
float4 zTransformInv = make_float4(e3.x / e3.w, e3.y / e3.w, e3.z / e3.w, 0.f);
xTransformInv.w = -dotXYZ(xTransformInv, p);
yTransformInv.w = -dotXYZ(yTransformInv, p);
zTransformInv.w = -dotXYZ(zTransformInv, p);
const PxReal dx = data.getSpacing();
const PxReal invDx = 1.0f / dx;
const PxReal scale = 1.0f; // powf(e1.w*e2.w*e3.w, 1.0f / 3.0f);
const PxReal kernelSize = data.kernelSize;
//lower and upper allow to limit the maximal neighborhood on the grid to have an upper bound in case of very big kernelSizes
const PxI32 lower = 2; //1;
const PxI32 upper = lower + 1;
const PxI32 lowerX = PxMax(xyz.x - lower, (PxI32)PxCeil((p.x - kernelSize) * invDx));
const PxI32 upperX = PxMin(xyz.x + upper, (PxI32)PxFloor((p.x + kernelSize) * invDx));
const PxI32 lowerY = PxMax(xyz.y - lower, (PxI32)PxCeil((p.y - kernelSize) * invDx));
const PxI32 upperY = PxMin(xyz.y + upper, (PxI32)PxFloor((p.y + kernelSize) * invDx));
const PxI32 lowerZ = PxMax(xyz.z - lower, (PxI32)PxCeil((p.z - kernelSize) * invDx));
const PxI32 upperZ = PxMin(xyz.z + upper, (PxI32)PxFloor((p.z + kernelSize) * invDx));
/*if (pNr == 0)
printf("xyz: %i, %i, %i, %f, %f\n", upperX - lowerX + 1, upperY - lowerY + 1, upperZ - lowerZ + 1, kernelSize, dx);*/
PxReal* density = getDensity(data);
for (PxI32 zi = lowerZ; zi <= upperZ; zi++)
{
for (PxI32 yi = lowerY; yi <= upperY; yi++)
{
for (PxI32 xi = lowerX; xi <= upperX; xi++)
{
float4 worldPos = make_float4(xi * dx, yi * dx, zi * dx, 1.0f);
PxVec3 localPos(
dotProduct(xTransformInv, worldPos),
dotProduct(yTransformInv, worldPos),
dotProduct(zTransformInv, worldPos)
);
PxReal distance = scale * localPos.magnitude();
PxReal v = distance - kernelSize;
if (v < 0.0f)
{
PxI32 cellNr = getCellIndexSafe(data.mGrid, xyz, xi - xyz.x, yi - xyz.y, zi - xyz.z);
if (cellNr == EMPTY_SUBGRID)
continue;
if (v < density[cellNr])
AtomicMin(&density[cellNr], v);
}
}
}
}
}
extern "C" __global__ void iso_ComputeParticleDensityUsingSDF(PxVec4* PX_RESTRICT deviceParticlePos, int numParticles, PxU32* PX_RESTRICT phases, PxU32 validPhaseMask, PxIsosurfaceExtractionData data,
PxU32* PX_RESTRICT activeIndices, float4* PX_RESTRICT anisotropy1, float4* PX_RESTRICT anisotropy2, float4* PX_RESTRICT anisotropy3, PxReal anisotropyFactor)
{
computeParticleDensityUsingSDF(deviceParticlePos, numParticles, phases, validPhaseMask, data, activeIndices,
anisotropy1, anisotropy2, anisotropy3, anisotropyFactor);
}
extern "C" __global__ void iso_ComputeParticleDensityUsingSDFSparse(PxVec4* deviceParticlePos, int numParticles, PxU32* phases, PxU32 validPhaseMask, PxSparseIsosurfaceExtractionData data,
PxU32* PX_RESTRICT activeIndices, float4* anisotropy1, float4* anisotropy2, float4* anisotropy3, PxReal anisotropyFactor)
{
computeParticleDensityUsingSDF(deviceParticlePos, numParticles, phases, validPhaseMask, data, activeIndices,
anisotropy1, anisotropy2, anisotropy3, anisotropyFactor);
}
template<typename T>
__device__ void computeParticleDensity(PxVec4* deviceParticlePos, int numParticles, PxU32* phases, PxU32 validPhaseMask, T& data)
{
PxI32 pNr = blockIdx.x * blockDim.x + threadIdx.x;
if (pNr >= numParticles)
return;
if (phases && !(phases[pNr] & validPhaseMask))
return;
PxVec3 p = deviceParticlePos[pNr].getXYZ();
int4 xyz = getCellIndexFromParticleAndTransformToLocalCoordinates(data.mGrid, p);
if (outOfBounds(data.mGrid, xyz))
return;
PxReal dx = data.getSpacing();
PxReal invDx = 1.0f / dx;
PxReal h = data.kernelSize;
PxReal h2 = h * h;
PxReal kernelScale = 315.0f / (64.0f * 3.14159265f * h2 * h2 * h2 * h2 * h) / data.restDensity;
//lower and upper allow to limit the maximal neighborhood on the grid to have an upper bound in case of very big kernelSizes
const PxI32 lower = 2; //1;
const PxI32 upper = lower + 1;
const PxI32 lowerX = PxMax(xyz.x - lower, (PxI32)PxFloor((p.x - h) * invDx));
const PxI32 upperX = PxMin(xyz.x + upper, (PxI32)PxCeil((p.x + h) * invDx));
const PxI32 lowerY = PxMax(xyz.y - lower, (PxI32)PxFloor((p.y - h) * invDx));
const PxI32 upperY = PxMin(xyz.y + upper, (PxI32)PxCeil((p.y + h) * invDx));
const PxI32 lowerZ = PxMax(xyz.z - lower, (PxI32)PxFloor((p.z - h) * invDx));
const PxI32 upperZ = PxMin(xyz.z + upper, (PxI32)PxCeil((p.z + h) * invDx));
PxReal* density = getDensity(data);
for (PxI32 xi = lowerX; xi <= upperX; xi++)
{
for (PxI32 yi = lowerY; yi <= upperY; yi++)
{
for (PxI32 zi = lowerZ; zi <= upperZ; zi++)
{
PxVec3 cellPos = PxVec3(xi * dx, yi * dx, zi * dx);
PxVec3 r = p - cellPos;
PxReal r2 = r.magnitudeSquared();
if (r2 < h2)
{
PxI32 cellNr = getCellIndexSafe(data.mGrid, xyz, xi - xyz.x, yi - xyz.y, zi - xyz.z); // getCellNr(data, xi, yi, zi);
if (cellNr == EMPTY_SUBGRID)
continue;
PxReal w = (h2 - r2);
w = kernelScale * w * w * w;
PxRedAddGlobal(&density[cellNr], w);
}
}
}
}
}
extern "C" __global__ void iso_ComputeParticleDensity(PxVec4* deviceParticlePos, int numParticles, PxU32* phases, PxU32 validPhaseMask, PxIsosurfaceExtractionData data)
{
computeParticleDensity(deviceParticlePos, numParticles, phases, validPhaseMask, data);
}
extern "C" __global__ void iso_ComputeParticleDensitySparse(PxVec4* deviceParticlePos, int numParticles, PxU32* phases, PxU32 validPhaseMask, PxSparseIsosurfaceExtractionData data)
{
computeParticleDensity(deviceParticlePos, numParticles, phases, validPhaseMask, data);
}
template<typename T>
__device__ void countCellVerts(T& data)
{
int threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
if (outOfRange(data.mGrid, threadIndex))
return;
int4 xyz = getGridCoordinates(data.mGrid, threadIndex);
data.firstCellVert[threadIndex] = 0;
if (outOfBounds(data.mGrid, xyz))
return;
PxReal* density = getDensity(data);
PxReal d0 = getGridValue(data.mGrid, density, xyz, 0, 0, 0);
PxReal ds[3];
ds[0] = getGridValue(data.mGrid, density, xyz, 1, 0, 0);
ds[1] = getGridValue(data.mGrid, density, xyz, 0, 1, 0);
ds[2] = getGridValue(data.mGrid, density, xyz, 0, 0, 1);
int num = 0;
#pragma unroll
for (int dim = 0; dim < 3; dim++)
{
PxReal d = ds[dim];
if ((d0 <= data.threshold && d >= data.threshold) || (d <= data.threshold && d0 >= data.threshold))
num++;
}
data.firstCellVert[threadIndex] = num;
}
extern "C" __global__ void iso_CountCellVerts(PxIsosurfaceExtractionData data)
{
countCellVerts(data);
}
extern "C" __global__ void iso_CountCellVertsSparse(PxSparseIsosurfaceExtractionData data)
{
countCellVerts(data);
}
template<typename T>
__device__ void countCellVertsDC(T& data)
{
int threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
if (outOfRange(data.mGrid, threadIndex))
return;
int4 xyz = getGridCoordinates(data.mGrid, threadIndex);
data.firstCellVert[threadIndex] = 0;
if (outOfBounds(data.mGrid, xyz))
return;
PxReal* density = getDensity(data);
/*PxU32 aboveThresholdCounter = 0;
PxU32 belowThresholdCounter = 0;
PxU32 counter = 0;
for (int x = 0; x <= 1; ++x) for (int y = 0; y <= 1; ++y) for (int z = 0; z <= 1; ++z)
{
PxReal d = getGridValue(data, density, xyz, x, y, z);
if (d >= data.threshold)
aboveThresholdCounter++;
if (d <= data.threshold)
belowThresholdCounter++;
if (aboveThresholdCounter > 0 && belowThresholdCounter > 0)
{
counter = 1;
break;
}
}*/
PxReal corners[2][2][2];
for (int x = 0; x <= 1; ++x) for (int y = 0; y <= 1; ++y) for (int z = 0; z <= 1; ++z)
{
corners[x][y][z] = getGridValue(data.mGrid, density, xyz, x, y, z);
}
PxU32 counter = 0;
for (int a = 0; a <= 1; ++a) for (int b = 0; b <= 1; ++b)
{
PxReal p = corners[a][b][0];
PxReal q = corners[a][b][1];
if ((p <= data.threshold && q >= data.threshold) || (q <= data.threshold && p >= data.threshold))
{
++counter;
}
}
for (int a = 0; a <= 1; ++a) for (int b = 0; b <= 1; ++b)
{
PxReal p = corners[b][0][a];
PxReal q = corners[b][1][a];
if ((p <= data.threshold && q >= data.threshold) || (q <= data.threshold && p >= data.threshold))
{
++counter;
}
}
for (int a = 0; a <= 1; ++a) for (int b = 0; b <= 1; ++b)
{
PxReal p = corners[0][a][b];
PxReal q = corners[1][a][b];
if ((p <= data.threshold && q >= data.threshold) || (q <= data.threshold && p >= data.threshold))
{
++counter;
}
}
if (counter > 0)
counter = 1;
data.firstCellVert[threadIndex] = counter;
}
extern "C" __global__ void iso_CountCellVertsDC(PxIsosurfaceExtractionData data)
{
countCellVertsDC(data);
}
extern "C" __global__ void iso_CountCellVertsDCSparse(PxSparseIsosurfaceExtractionData data)
{
countCellVertsDC(data);
}
template<typename T>
__device__ void createVerts(T& data)
{
int threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
if (outOfRange(data.mGrid, threadIndex))
return;
if (isLastCell(data.mGrid, threadIndex))
data.numVerticesNumIndices[0] = PxMin(data.maxVerts, data.firstCellVert[threadIndex]);
int4 xyz = getGridCoordinates(data.mGrid, threadIndex);
if (outOfBounds(data.mGrid, xyz))
return;
PxU32 first = data.firstCellVert[threadIndex];
PxVec3 p = getLocation(data.mGrid, xyz);
PxReal* density = getDensity(data);
PxReal d0 = getGridValue(data.mGrid, density, xyz, 0, 0, 0);
PxReal ds[3];
ds[0] = getGridValue(data.mGrid, density, xyz, 1, 0, 0);
ds[1] = getGridValue(data.mGrid, density, xyz, 0, 1, 0);
ds[2] = getGridValue(data.mGrid, density, xyz, 0, 0, 1);
PxU32 encoded = first;
#pragma unroll
for (int dim = 0; dim < 3; dim++)
{
PxReal d = ds[dim];
if ((d0 <= data.threshold && d >= data.threshold) || (d <= data.threshold && d0 >= data.threshold))
{
PxReal t = (d != d0) ? PxClamp((data.threshold - d0) / (d - d0), 0.0f, 1.0f) : 0.5f;
PxU32 id = first++;
encoded |= 1 << (29 + dim);
PxVec3 off(0.0f);
off[dim] = t * data.getSpacing();
if (id < data.maxVerts)
data.verts[id] = PxVec4(p + off, 0.0f);
}
}
data.firstCellVert[threadIndex] = encoded;
if (isLastCell(data.mGrid, threadIndex))
data.numVerticesNumIndices[0] = PxMin(data.maxVerts, first); //This allows to obtain the total number of vertices in the last array element
}
extern "C" __global__ void iso_CreateVerts(PxIsosurfaceExtractionData data)
{
createVerts(data);
}
extern "C" __global__ void iso_CreateVertsSparse(PxSparseIsosurfaceExtractionData data)
{
createVerts(data);
}
template<typename T>
__device__ void createVertsDC(T& data)
{
int threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
if (outOfRange(data.mGrid, threadIndex))
return;
if (isLastCell(data.mGrid, threadIndex))
data.numVerticesNumIndices[0] = PxMin(data.maxVerts, data.firstCellVert[threadIndex]);
int4 xyz = getGridCoordinates(data.mGrid, threadIndex);
if (outOfBounds(data.mGrid, xyz))
return;
PxU32 id = data.firstCellVert[threadIndex];
if (!isLastCell(data.mGrid, threadIndex) && id == data.firstCellVert[threadIndex + 1])
return;
PxVec3 p = getLocation(data.mGrid, xyz);
PxReal* density = getDensity(data);
/*PxReal counter = 0;
PxVec3 sum(0.0f);
for (int x = 0; x <= 1; ++x) for (int y = 0; y <= 1; ++y) for (int z = 0; z <= 1; ++z)
{
PxReal d = getGridValue(data, density, xyz, x, y, z) - data.threshold;
PxReal inverseDistance = 1.0f / PxMax(1e-6f, PxAbs(d));
counter += inverseDistance;
sum += inverseDistance * PxVec3(x, y, z);
}
p = p + sum * (data.getSpacing() / counter);*/
PxReal corners[2][2][2];
for (int x = 0; x <= 1; ++x) for (int y = 0; y <= 1; ++y) for (int z = 0; z <= 1; ++z)
{
corners[x][y][z] = getGridValue(data.mGrid, density, xyz, x, y, z);
}
PxU32 counter = 0;
PxVec3 sum(0.0f);
for (int a = 0; a <= 1; ++a) for (int b = 0; b <= 1; ++b)
{
PxReal p = corners[a][b][0];
PxReal q = corners[a][b][1];
if ((p <= data.threshold && q >= data.threshold) || (q <= data.threshold && p >= data.threshold))
{
PxReal t = (q != p) ? PxClamp((data.threshold - p) / (q - p), 0.0f, 1.0f) : 0.5f;
sum += PxVec3(a, b, t);
++counter;
}
}
for (int a = 0; a <= 1; ++a) for (int b = 0; b <= 1; ++b)
{
PxReal p = corners[b][0][a];
PxReal q = corners[b][1][a];
if ((p <= data.threshold && q >= data.threshold) || (q <= data.threshold && p >= data.threshold))
{
PxReal t = (q != p) ? PxClamp((data.threshold - p) / (q - p), 0.0f, 1.0f) : 0.5f;
sum += PxVec3(b, t, a);
++counter;
}
}
for (int a = 0; a <= 1; ++a) for (int b = 0; b <= 1; ++b)
{
PxReal p = corners[0][a][b];
PxReal q = corners[1][a][b];
if ((p <= data.threshold && q >= data.threshold) || (q <= data.threshold && p >= data.threshold))
{
PxReal t = (q != p) ? PxClamp((data.threshold - p) / (q - p), 0.0f, 1.0f) : 0.5f;
sum += PxVec3(t, a, b);
++counter;
}
}
p = p + sum * (data.getSpacing() / counter);
if (counter > 0)
data.verts[id] = PxVec4(p, 0.0f);
if (isLastCell(data.mGrid, threadIndex))
data.numVerticesNumIndices[0] = PxMin(data.maxVerts, id + 1); //This allows to obtain the total number of vertices in the last array element
}
extern "C" __global__ void iso_CreateVertsDC(PxIsosurfaceExtractionData data)
{
createVertsDC(data);
}
extern "C" __global__ void iso_CreateVertsDCSparse(PxSparseIsosurfaceExtractionData data)
{
createVertsDC(data);
}
__device__ PxU32 getDecodedId(PxU32 encoded, PxU32 dim)
{
if (!(encoded & (1 << (29 + dim))))
return 0xFFFFFFFF;
PxU32 id = encoded & 0x1FFFFFFF;
if (dim > 0 && (encoded & (1 << (29 + 0))))
id++;
if (dim > 1 && (encoded & (1 << (29 + 1))))
id++;
return id;
}
template<typename T>
PX_FORCE_INLINE __device__ bool constructTriangle(T& data, int4& xyz, int i, PxU32 firstIn, PxU32 firstOut, int* buffer = NULL)
{
#pragma unroll
for (int j = 0; j < 3; ++j)
{
int eid = marchingCubesIds[firstIn + i + j];
int edgeNr = marchingCubesEdgeLocations[eid][3];
const PxU32 nr = getCellIndex(data.mGrid, xyz, marchingCubesEdgeLocations[eid][0], marchingCubesEdgeLocations[eid][1], marchingCubesEdgeLocations[eid][2], false);
if (nr == EMPTY_SUBGRID)
return false;
PxU32 id = getDecodedId(data.firstCellVert[nr], edgeNr);
if (id >= data.maxVerts || firstOut + j >= data.maxTriIds)
return false;
if (buffer)
buffer[j] = id;
}
return true;
}
template<typename T>
__device__ void countTriIds(T& data)
{
int threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
if (outOfRange(data.mGrid, threadIndex))
return;
int4 xyz = getGridCoordinates(data.mGrid, threadIndex);
PxU32* firstCellTriId = getFirstCellTriId(data);
firstCellTriId[threadIndex] = 0;
if (outOfBounds(data.mGrid, xyz))
return;
PxReal* density = getDensity(data);
int code = 0;
#pragma unroll
for (int i = 0; i < 8; i++)
{
if (getGridValue(data.mGrid, density, xyz, marchingCubeCorners[i][0], marchingCubeCorners[i][1], marchingCubeCorners[i][2]) >= data.threshold)
code |= (1 << i);
}
int firstIn = firstMarchingCubesId[code];
int num = firstMarchingCubesId[code + 1] - firstIn;
PxI32 counter = 0;
for (int i = 0; i < num; i += 3)
{
if (!constructTriangle(data, xyz, i, firstIn, 0))
continue;
counter += 3;
}
firstCellTriId[threadIndex] = counter;
}
extern "C" __global__ void iso_CountTriIds(PxIsosurfaceExtractionData data)
{
countTriIds(data);
}
extern "C" __global__ void iso_CountTriIdsSparse(PxSparseIsosurfaceExtractionData data)
{
countTriIds(data);
}
template<typename T>
__device__ void countTriIdsDC(T& data)
{
int threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
if (outOfRange(data.mGrid, threadIndex))
return;
int4 xyz = getGridCoordinates(data.mGrid, threadIndex);
PxU32* firstCellTriId = getFirstCellTriId(data);
firstCellTriId[threadIndex] = 0;
if (outOfBounds(data.mGrid, xyz))
return;
PxReal* density = getDensity(data);
PxReal d0 = getGridValue(data.mGrid, density, xyz, 0, 0, 0);
PxReal ds[3];
ds[0] = getGridValue(data.mGrid, density, xyz, 1, 0, 0);
ds[1] = getGridValue(data.mGrid, density, xyz, 0, 1, 0);
ds[2] = getGridValue(data.mGrid, density, xyz, 0, 0, 1);
int num = 0;
#pragma unroll
for (int dim = 0; dim < 3; dim++)
{
PxReal d = ds[dim];
if ((d0 <= data.threshold && d >= data.threshold) || (d <= data.threshold && d0 >= data.threshold))
num++;
}
firstCellTriId[threadIndex] = 2 * 3 * num;
}
extern "C" __global__ void iso_CountTriIdsDC(PxIsosurfaceExtractionData data)
{
countTriIdsDC(data);
}
extern "C" __global__ void iso_CountTriIdsDCSparse(PxSparseIsosurfaceExtractionData data)
{
countTriIdsDC(data);
}
template<typename T>
__device__ void createTriIds(T& data, bool flipTriangleOrientation)
{
int threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
if (outOfRange(data.mGrid, threadIndex))
return;
const PxU32* firstCellTriId = getFirstCellTriId(data);
if (isLastCell(data.mGrid, threadIndex))
data.numVerticesNumIndices[1] = PxMin(data.maxTriIds, firstCellTriId[threadIndex]);
int4 xyz = getGridCoordinates(data.mGrid, threadIndex);
if (outOfBounds(data.mGrid, xyz))
return;
PxU32 firstOut = firstCellTriId[threadIndex];
if (!isLastCell(data.mGrid, threadIndex) && firstOut == firstCellTriId[threadIndex + 1])
return; //This means no slice reserved for this instance, this is known because of the already completed counting kernel
const PxReal* density = getDensity(data);
int code = 0;
#pragma unroll
for (int i = 0; i < 8; i++)
{
if (getGridValue(data.mGrid, density, xyz, marchingCubeCorners[i][0], marchingCubeCorners[i][1], marchingCubeCorners[i][2]) >= data.threshold)
code |= (1 << i);
}
int firstIn = firstMarchingCubesId[code];
int num = firstMarchingCubesId[code + 1] - firstIn;
int buffer[3];
for (int i = 0; i < num; i += 3)
{
if (!constructTriangle(data, xyz, i, firstIn, firstOut, buffer))
continue;
if (flipTriangleOrientation)
{
for (int j = 2; j >= 0; --j)
data.triIds[firstOut++] = buffer[j];
}
else
{
for (int j = 0; j < 3; ++j)
data.triIds[firstOut++] = buffer[j];
}
}
if (isLastCell(data.mGrid, threadIndex))
data.numVerticesNumIndices[1] = PxMin(data.maxTriIds, firstOut); //This allows to obtain the total number of indices in the last array element
}
extern "C" __global__ void iso_CreateTriIds(PxIsosurfaceExtractionData data, bool flipTriangleOrientation)
{
createTriIds(data, flipTriangleOrientation);
}
extern "C" __global__ void iso_CreateTriIdsSparse(PxSparseIsosurfaceExtractionData data, bool flipTriangleOrientation)
{
createTriIds(data, flipTriangleOrientation);
}
__constant__ int offsets[3][3][3] = { { {0,-1,0}, {0,-1,-1},{0,0,-1} },
{ {0,0,-1}, {-1,0,-1},{-1,0,0} } ,
{ {-1,0,0}, {-1,-1,0},{0,-1,0} } };
__constant__ int projections[3][2] = { {1, 2}, {2, 0}, {0, 1} };
__device__ float directionSign(int principalDirection, const PxVec3& start, const PxVec3& middle, const PxVec3& end)
{
PxReal a0 = middle[projections[principalDirection][0]] - start[projections[principalDirection][0]];
PxReal a1 = middle[projections[principalDirection][1]] - start[projections[principalDirection][1]];
PxReal b0 = end[projections[principalDirection][0]] - middle[projections[principalDirection][0]];
PxReal b1 = end[projections[principalDirection][1]] - middle[projections[principalDirection][1]];
return a0 * b1 - a1 * b0;
}
__device__ int indexOfConcaveCorner(int principalDirection, const PxVec3& a, const PxVec3& b, const PxVec3& c, const PxVec3& d)
{
float minimum = 0;
int result = -1;
float s = directionSign(principalDirection, a, b, c);
if (s <= minimum)
{
minimum = s;
result = 1;
}
s = directionSign(principalDirection, b, c, d);
if (s <= minimum)
{
minimum = s;
result = 2;
}
s = directionSign(principalDirection, c, d, a);
if (s <= minimum)
{
minimum = s;
result = 3;
}
s = directionSign(principalDirection, d, a, b);
if (s <= minimum)
{
minimum = s;
result = 0;
}
return result;
}
template<typename T>
__device__ void createTriIdsDC(T& data, bool flipTriangleOrientation)
{
int threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
if (outOfRange(data.mGrid, threadIndex))
return;
int4 xyz = getGridCoordinates(data.mGrid, threadIndex);
PxU32* firstCellTriId = getFirstCellTriId(data);
if (isLastCell(data.mGrid, threadIndex))
data.numVerticesNumIndices[1] = PxMin(data.maxTriIds, firstCellTriId[threadIndex]);
if (outOfBounds(data.mGrid, xyz))
return;
PxReal* density = getDensity(data);
PxReal d0 = getGridValue(data.mGrid, density, xyz, 0, 0, 0);
PxU32 firstOut = firstCellTriId[threadIndex];
PxReal ds[3];
ds[0] = getGridValue(data.mGrid, density, xyz, 1, 0, 0);
ds[1] = getGridValue(data.mGrid, density, xyz, 0, 1, 0);
ds[2] = getGridValue(data.mGrid, density, xyz, 0, 0, 1);
int buffer[4];
buffer[0] = data.firstCellVert[threadIndex];
PxVec3 v0 = data.verts[buffer[0]].getXYZ();
#pragma unroll
for (int dim = 0; dim < 3; dim++)
{
PxReal d = ds[dim];
bool b1 = d0 <= data.threshold && d >= data.threshold;
bool b2 = d <= data.threshold && d0 >= data.threshold;
if (b1 || b2)
{
bool flip = flipTriangleOrientation == b1;
bool skip = false;
#pragma unroll
for (int i = 0; i < 3; ++i)
{
int id = getCellIndexSafe(data.mGrid, xyz, offsets[dim][i][0], offsets[dim][i][1], offsets[dim][i][2], false);
if (id == EMPTY_SUBGRID)
skip = true;
buffer[i + 1] = data.firstCellVert[id];
}
if (skip)
continue;
#pragma unroll
for (int i = 0; i < 4; ++i)
if (buffer[i] >= data.maxVerts)
continue;
//PxReal d02 = (data.verts[buffer[2]].getXYZ() - v0).magnitudeSquared();
//PxReal d13 = (data.verts[buffer[3]].getXYZ() - data.verts[buffer[1]].getXYZ()).magnitudeSquared();
int shift = PxMax(0, indexOfConcaveCorner(dim, v0, data.verts[buffer[1]].getXYZ(), data.verts[buffer[2]].getXYZ(), data.verts[buffer[3]].getXYZ())) % 2;
//int shift = d02 < d13 ? 0 : 1;//TODO: select better diagonal
//shift = 1 - shift;
//Split the quad into two triangles
for (int i = 0; i < 2; ++i)
{
if (firstOut + 3 >= data.maxTriIds)
break;
data.triIds[firstOut++] = buffer[shift];
if (flip)
{
for (int j = 2; j >= 1; --j)
data.triIds[firstOut++] = buffer[(i + j + shift) % 4];
}
else
{
for (int j = 1; j < 3; ++j)
data.triIds[firstOut++] = buffer[(i + j + shift) % 4];
}
}
}
}
if (isLastCell(data.mGrid, threadIndex))
data.numVerticesNumIndices[1] = PxMin(data.maxTriIds, firstOut); //This allows to obtain the total number of indices in the last array element
}
extern "C" __global__ void iso_CreateTriIdsDC(PxIsosurfaceExtractionData data, bool flipTriangleOrientation)
{
createTriIdsDC(data, flipTriangleOrientation);
}
extern "C" __global__ void iso_CreateTriIdsDCSparse(PxSparseIsosurfaceExtractionData data, bool flipTriangleOrientation)
{
createTriIdsDC(data, flipTriangleOrientation);
}
//https://stackoverflow.com/questions/3380628/fast-arc-cos-algorithm
PX_FORCE_INLINE __device__ PxReal approxAcos(PxReal x)
{
return PxMax(0.0f, (-0.69813170079773212f * x * x - 0.87266462599716477f) * x + 1.5707963267948966f);
//return (-0.69813170079773212f * x * x - 0.87266462599716477f) * x + 1.5707963267948966f;
}
PX_FORCE_INLINE __device__ PxReal angle(const PxVec3 v1, const PxVec3 v2)
{
return approxAcos(v1.dot(v2) * __frsqrt_rn(v1.magnitudeSquared() * v2.magnitudeSquared()));
}
PX_FORCE_INLINE __device__ PxReal areaTimes2(const PxVec3 v1, const PxVec3 v2)
{
return v1.cross(v2).magnitude();
}
__device__ void smoothVerts(const PxVec4* vertices, PxVec4* output, const PxU32* triIds, const PxU32 numTriIds)
{
const PxU32 numTriangles = numTriIds / 3;
for (PxI32 triNr = blockIdx.x * blockDim.x + threadIdx.x; triNr < numTriangles; triNr += blockDim.x * gridDim.x)
{
for (int i = 0; i < 3; i++)
{
int id0 = triIds[3 * triNr + i];
int id1 = triIds[3 * triNr + (i + 1) % 3];
int id2 = triIds[3 * triNr + (i + 2) % 3];
PxVec3 p0 = vertices[id0].getXYZ();
PxVec3 p1 = vertices[id1].getXYZ();
PxVec3 p2 = vertices[id2].getXYZ();
PxVec3 p01 = p1 - p0;
PxVec3 p02 = p2 - p0;
PxReal a0 = angle(p01, p02);
//PxReal a0 = ((0.5f * (p1 + p2)) - p0).magnitude();
//PxReal a0 = PxSqrt(areaTimes2(p01, p02));
//PxReal a0 = 1.0f;
PxRedAddGlobal(&output[id0].w, a0);
atomicAdd3(&output[id0], a0 * (0.5f * (p1 + p2)));
}
}
}
extern "C" __global__ void iso_SmoothVerts(const PxVec4* PX_RESTRICT vertices, PxVec4* PX_RESTRICT output, const PxU32* PX_RESTRICT triIds, const PxU32* PX_RESTRICT numTriIds)
{
smoothVerts(vertices, output, triIds, *numTriIds);
}
__device__ void averageVerts(PxVec4* PX_RESTRICT vertices, PxVec4* PX_RESTRICT output, const PxU32 length, PxReal blendWeight = 1.0f)
{
const PxU32 numVertices = length;
for (PxI32 vNr = blockIdx.x * blockDim.x + threadIdx.x; vNr < numVertices; vNr += blockDim.x * gridDim.x)
{
PxVec4 v = vertices[vNr];
if (v.w > 0)
{
PxVec3 x = output[vNr].getXYZ();
output[vNr] = PxVec4((1.0f - blendWeight) * x + blendWeight * (v.getXYZ() / v.w), 0.0f);
}
//Clear the buffer array to use it for followingsmoothing passes
vertices[vNr] = PxVec4(0.0f);
}
}
extern "C" __global__ void iso_AverageVerts(PxVec4* PX_RESTRICT vertices, PxVec4* PX_RESTRICT output, const PxU32* PX_RESTRICT length, PxReal blendWeight)
{
averageVerts(vertices, output, *length, blendWeight);
}
__device__ PxVec3 normalize(PxVec3 v)
{
const float m = v.magnitude();
if (m > 0.0f)
v /= m;
return v;
}
template<typename T>
__device__ void smoothNormals(T& data)
{
const PxU32 numTriangles = data.numVerticesNumIndices[1] / 3;
for (PxI32 triNr = blockIdx.x * blockDim.x + threadIdx.x; triNr < numTriangles; triNr += blockDim.x * gridDim.x)
{
int id0 = data.triIds[3 * triNr];
int id1 = data.triIds[3 * triNr + 1];
int id2 = data.triIds[3 * triNr + 2];
PxVec3 n0 = data.normals[id0].getXYZ();
PxVec3 n1 = data.normals[id1].getXYZ();
PxVec3 n2 = data.normals[id2].getXYZ();
PxVec3 n = normalize(n0 + n1 + n2);
atomicAdd3(&data.smoothingBuffer[id0], n);
atomicAdd3(&data.smoothingBuffer[id1], n);
atomicAdd3(&data.smoothingBuffer[id2], n);
}
}
extern "C" __global__ void iso_SmoothNormals(PxIsosurfaceExtractionData data)
{
smoothNormals(data);
}
extern "C" __global__ void iso_SmoothNormalsSparse(PxSparseIsosurfaceExtractionData data)
{
smoothNormals(data);
}
__device__ PxVec4 normalize(const PxVec4& v)
{
PxReal s = 1.0f / PxMax(1e-6f, PxSqrt(v.x*v.x + v.y*v.y + v.z*v.z));
return PxVec4(s*v.x, s*v.y, s*v.z, v.w);
}
template<typename T>
__device__ void smoothNormalsNormalize(T& data)
{
const PxU32 numVertices = data.numVerticesNumIndices[0];
for (PxI32 vNr = blockIdx.x * blockDim.x + threadIdx.x; vNr < numVertices; vNr += blockDim.x * gridDim.x)
{
data.normals[vNr] = normalize(data.smoothingBuffer[vNr]);
data.smoothingBuffer[vNr] = PxVec4(0, 0, 0, 0);
}
}
extern "C" __global__ void iso_SmoothNormalsNormalize(PxIsosurfaceExtractionData data)
{
smoothNormalsNormalize(data);
}
extern "C" __global__ void iso_SmoothNormalsNormalizeSparse(PxSparseIsosurfaceExtractionData data)
{
smoothNormalsNormalize(data);
}
template<typename T>
__device__ void computeNormals(T& data)
{
const PxU32 numTriangles = data.numVerticesNumIndices[1] / 3;
for (PxI32 triNr = blockIdx.x * blockDim.x + threadIdx.x; triNr < numTriangles; triNr += blockDim.x * gridDim.x)
{
/*for (int i = 0; i < 3; i++)
{
int id0 = data.triIds[3 * triNr + i];
int id1 = data.triIds[3 * triNr + (i + 1) % 3];
int id2 = data.triIds[3 * triNr + (i + 2) % 3];
PxVec3 p0 = data.verts[id0].getXYZ();
PxVec3 p1 = data.verts[id1].getXYZ();
PxVec3 p2 = data.verts[id2].getXYZ();
PxVec3 p01 = p1 - p0;
PxVec3 p02 = p2 - p0;
PxReal a0 = angle(p01, p02);
PxVec3 n = (p1 - p0).cross(p2 - p0);
PxReal l2 = n.magnitudeSquared();
if (l2 > 1e-6f)
{
atomicAdd3(&data.normals[id0], (a0 * __frsqrt_rn(l2)) * n);
}
}*/
int id0 = data.triIds[3 * triNr];
int id1 = data.triIds[3 * triNr + 1];
int id2 = data.triIds[3 * triNr + 2];
PxVec3 p0 = data.verts[id0].getXYZ();
PxVec3 p1 = data.verts[id1].getXYZ();
PxVec3 p2 = data.verts[id2].getXYZ();
PxVec3 n = (p1 - p0).cross(p2 - p0);
atomicAdd3(&data.normals[id0], n);
atomicAdd3(&data.normals[id1], n);
atomicAdd3(&data.normals[id2], n);
}
}
extern "C" __global__ void iso_ComputeNormals(PxIsosurfaceExtractionData data)
{
computeNormals(data);
}
extern "C" __global__ void iso_ComputeNormalsSparse(PxSparseIsosurfaceExtractionData data)
{
computeNormals(data);
}
template<typename T>
__device__ void normalizeNormals(T& data)
{
const PxU32 numVertices = data.numVerticesNumIndices[0];
for (PxI32 vNr = blockIdx.x * blockDim.x + threadIdx.x; vNr < numVertices; vNr += blockDim.x * gridDim.x)
data.normals[vNr] = normalize(data.normals[vNr]);
}
extern "C" __global__ void iso_NormalizeNormals(PxIsosurfaceExtractionData data)
{
normalizeNormals(data);
}
extern "C" __global__ void iso_NormalizeNormalsSparse(PxSparseIsosurfaceExtractionData data)
{
normalizeNormals(data);
}
template<typename T>
__device__ void gridFilterGauss(T& data, PxReal neighborWeight)
{
int threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
if (outOfRange(data.mGrid, threadIndex))
return;
//Gaussian blur - use fact that it is a separable filter
//Radius is relative to cell size
PxReal w[3];
w[0] = neighborWeight; // expf(-1.0f / (2.0f*radius*radius));
w[1] = 1.0f;
w[2] = w[0];
PxReal weightSum = 0;
PxReal m = 0;
int4 xyz = getGridCoordinates(data.mGrid, threadIndex);
PxReal* source = data.buffer[data.swapState];
PxReal* destination = data.buffer[1 - data.swapState];
for (int x = -1; x <= 1; ++x) for (int y = -1; y <= 1; ++y) for (int z = -1; z <= 1; ++z)
{
PxReal weight = w[x + 1] * w[y + 1] * w[z + 1];
weightSum += weight;
m += weight * getGridValueSafe(data.mGrid, source, xyz, x, y, z);
}
destination[threadIndex] = m / weightSum;
}
extern "C" __global__ void iso_GridFilterGauss(PxIsosurfaceExtractionData data, PxReal neighborWeight)
{
gridFilterGauss(data, neighborWeight);
}
extern "C" __global__ void iso_GridFilterGaussSparse(PxSparseIsosurfaceExtractionData data, PxReal neighborWeight)
{
gridFilterGauss(data, neighborWeight);
}
template<typename T>
__device__ void gridFilterDilateErode(T& data, PxReal sign)
{
int threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
if (outOfRange(data.mGrid, threadIndex))
return;
int4 xyz = getGridCoordinates(data.mGrid, threadIndex);
PxReal* source = data.buffer[data.swapState];
PxReal* destination = data.buffer[1 - data.swapState];
PxReal m = sign * source[threadIndex];
for (int x = -1; x <= 1; ++x) for (int y = -1; y <= 1; ++y) for (int z = -1; z <= 1; ++z)
m = PxMax(m, sign * getGridValueSafe(data.mGrid, source, xyz, x, y, z));
destination[threadIndex] = sign * m;
}
extern "C" __global__ void iso_GridFilterDilateErode(PxIsosurfaceExtractionData data, PxReal sign)
{
gridFilterDilateErode(data, sign);
}
extern "C" __global__ void iso_GridFilterDilateErodeSparse(PxSparseIsosurfaceExtractionData data, PxReal sign)
{
gridFilterDilateErode(data, sign);
}