Files
XCEngine/engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/anisotropy.cu

630 lines
20 KiB
Plaintext

// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "vector_types.h"
#include "foundation/PxVec3.h"
#include "foundation/PxVec4.h"
#include "matrixDecomposition.cuh"
#include "PxParticleGpu.h"
#include "PxgAnisotropyData.h"
#include "sparseGridStandalone.cuh"
#define ENABLE_KERNEL_LAUNCH_ERROR_CHECK 0
extern "C" __host__ void initAnisotropyKernels0() {}
__device__ inline PxVec3 PxLoad3(const float4& v) { float4 tmp = v; return PxVec3(tmp.x, tmp.y, tmp.z); }
__device__ inline PxVec4 PxLoad4(const float4& v) { float4 tmp = v; return PxVec4(tmp.x, tmp.y, tmp.z, tmp.w); }
__device__ inline PxReal cube(PxReal x) { return x * x * x; }
__device__ inline PxReal Wa(PxReal x, PxReal invr)
{
return 1.f - cube(x*invr);
}
template <typename V, typename T>
__device__ inline V Lerp(const V& start, const V& end, const T& t)
{
return start + (end - start) * t;
}
template <typename V, typename T>
__device__ inline V Clamp(const V& a, const T s, const T t) {
return V(PxMin(t, PxMax(s, a[0])),
PxMin(t, PxMax(s, a[1])),
PxMin(t, PxMax(s, a[2])));
}
//
//extern "C" __global__ void smoothPositionsLaunch(PxU32* sortedOrder, PxU32* cellEnds, PxU float4* pose, PxU32* phase, float particleContactDistance)
//{
// const float4* const PX_RESTRICT sortedPose = reinterpret_cast<float4*>(particleSystem.mSortedPositions_InvMass);
// const PxU32* const PX_RESTRICT sortedPhases = particleSystem.mSortedPhaseArray;
//
// const PxU32* const PX_RESTRICT cellStart = particleSystem.mCellStart;
// const PxU32* const PX_RESTRICT cellEnd = particleSystem.mCellEnd;
//
// const PxReal particleContactDistanceSq = particleContactDistance * particleContactDistance;
// const PxReal particleContactDistanceInv = 1.0f / particleContactDistance;
//
// // calculated the sum of weights and weighted avg position for particle neighborhood
// PxVec3 xs(0.0f); //sum of positions
// PxReal ws = 0.0f; //sum of weights
// for (int z = -1; z <= 1; z++)
// {
// for (int y = -1; y <= 1; y++)
// {
// for (int x = -1; x <= 1; x++)
// {
// int3 neighbourPos = make_int3(gridPos.x + x, gridPos.y + y, gridPos.z + z);
// PxU32 gridHash = calcGridHash(neighbourPos, gridSize);
// PxU32 startIndex = cellStart[gridHash];
// PxU32 endIndex = cellEnd[gridHash];
//
// if (startIndex != EMPTY_CELL)
// {
// PxU32 nextPhase = sortedPhases[startIndex];
// float4 nextPos = fetch(&sortedPose[startIndex]);
//
// for (PxU32 particleIndex1 = startIndex; particleIndex1 < endIndex /*&& numCollidedParticles < maxNeighborhood*/; particleIndex1++)
// {
// const PxU32 phase2 = nextPhase;
// const float4 pos2 = nextPos;
//
// if ((particleIndex1 + 1) < endIndex)
// {
// nextPhase = sortedPhases[particleIndex1 + 1];
// nextPos = fetch(&sortedPose[particleIndex1 + 1]);
// }
//
// if (phase2 & validPhaseMask)
// {
// const PxVec3 xj = PxLoad3(pos2);
// const PxVec3 xij = xi - xj;
//
// const PxReal dsq = xij.magnitudeSquared();
//
// if (0.0f < dsq && dsq < particleContactDistanceSq)
// {
// const PxReal w = Wa(sqrtf(dsq), particleContactDistanceInv);
// ws += w;
// xs += xj * w;
// }
// }
// }
// }
// }
// }
// }
//
// if (ws > 0.f)
// {
// PxReal f = 4.0f*Wa(particleContactDistance*0.5f, particleContactDistanceInv);
// PxReal smooth = PxMin(1.0f, ws / f)*smoothing;
// xs /= ws;
// xi = Lerp(xi, xs, smooth);
// }
//
// //write smoothed positions back in API order
// smoothedPositions[origIdx] = make_float4(xi.x, xi.y, xi.z, xi4.w);
//}
// Smooths particle positions in a fluid by moving them closer to the weighted
// average position of their local neighborhood
extern "C" __global__ void smoothPositionsLaunch(PxGpuParticleSystem* particleSystems, const PxU32 id, PxSmoothedPositionData* smoothingDataPerParticleSystem)
{
PxGpuParticleSystem& particleSystem = particleSystems[id];
const PxU32 numParticles = particleSystem.mCommonData.mNumParticles;
PxSmoothedPositionData& data = smoothingDataPerParticleSystem[id];
const PxReal smoothing = data.mSmoothing;
//pointers to global memory buffers -- inputs
const float4* PX_RESTRICT sortedNewPositions = particleSystem.mSortedPositions_InvMass;
const PxU32* PX_RESTRICT phases = particleSystem.mSortedPhaseArray;
const PxU32* PX_RESTRICT collisionIndex = particleSystem.mCollisionIndex;
const PxU32* PX_RESTRICT gridParticleIndices = particleSystem.mSortedToUnsortedMapping;
//pointers to smoothed position buffers -- outputs
float4* PX_RESTRICT smoothPosOrig = reinterpret_cast<float4*>(data.mPositions); // particleSystem.mSmoothedOriginPos_InvMass;
const PxU32 globalThreadIdx = blockIdx.x * blockDim.x + threadIdx.x;
if (globalThreadIdx >= numParticles)
return; //skip thread if it's past the end of particle list
const PxU32 p = globalThreadIdx; //index of particle in sorted order
const PxU32 origIdx = gridParticleIndices[globalThreadIdx];
const PxVec4 xi4 = PxLoad4(sortedNewPositions[p]);
//ignore smoothing for non-fluid particles & write original position
if (!PxGetFluid(phases[p]))
{
PxVec4 x = PxLoad4(sortedNewPositions[p]);
smoothPosOrig[origIdx] = make_float4(x.x, x.y, x.z, x.w);
return;
}
PxVec3 xi = PxVec3(xi4.x, xi4.y, xi4.z);
PxU32 contactCount = particleSystem.mParticleSelfCollisionCount[p];
// calculated the sum of weights and weighted avg position for particle neighborhood
PxVec3 xs(0.0f); //sum of positions
PxReal ws = 0.0f; //sum of weights
for (PxU32 i = 0, offset = p; i < contactCount; ++i, offset += numParticles)
{
const PxU32 q = collisionIndex[offset];
if (PxGetFluid(phases[q])) //ignore non-fluid particles
{
const PxVec3 xj = PxLoad3(sortedNewPositions[q]);
const PxVec3 xij = xi - xj;
const PxReal dsq = xij.magnitudeSquared();
if (0.0f < dsq && dsq < particleSystem.mCommonData.mParticleContactDistanceSq)
{
const PxReal w = Wa(sqrtf(dsq), particleSystem.mCommonData.mParticleContactDistanceInv);
ws += w;
xs += xj * w;
}
}
}
if (ws > 0.f)
{
PxReal f = 4.0f*Wa(particleSystem.mCommonData.mParticleContactDistance*0.5f, particleSystem.mCommonData.mParticleContactDistanceInv);
PxReal smooth = PxMin(1.0f, ws / f)*smoothing;
xs /= ws;
xi = Lerp(xi, xs, smooth);
}
//write smoothed positions back in API order
smoothPosOrig[origIdx] = make_float4(xi.x, xi.y, xi.z, xi4.w);
}
// Calculates Eigen-decomposition of the particle covariance matrix according
// to "Reconstructing Surfaces of Particle-Based Fluids Using Anisotropic Kernels"
extern "C" __global__ void calculateAnisotropyLaunch(PxGpuParticleSystem* particleSystems, const PxU32 id, PxAnisotropyData* anisotropyDataPerParticleSystem)
{
PxGpuParticleSystem& particleSystem = particleSystems[id];
const PxU32 numParticles = particleSystem.mCommonData.mNumParticles;
//pointers to global memory buffers -- inputs
const float4* PX_RESTRICT sortedNewPositions = particleSystem.mSortedPositions_InvMass;
const PxU32* PX_RESTRICT phases = particleSystem.mSortedPhaseArray;
const PxU32* PX_RESTRICT collisionIndex = particleSystem.mCollisionIndex;
const PxU32* PX_RESTRICT gridParticleIndices = particleSystem.mSortedToUnsortedMapping;
const PxAnisotropyData& anisotropyData = anisotropyDataPerParticleSystem[id];
float4* PX_RESTRICT q1 = reinterpret_cast<float4*>(anisotropyData.mAnisotropy_q1);
float4* PX_RESTRICT q2 = reinterpret_cast<float4*>(anisotropyData.mAnisotropy_q2);
float4* PX_RESTRICT q3 = reinterpret_cast<float4*>(anisotropyData.mAnisotropy_q3);
const PxU32 globalThreadIdx = blockIdx.x * blockDim.x + threadIdx.x;
if (globalThreadIdx >= numParticles)
return; //skip thread if it's past the end of particle list
const PxU32 p = globalThreadIdx; //index of particle in sorted order
const PxU32 origIdx = gridParticleIndices[globalThreadIdx];
//ignore anisotropy for non-fluid particles
if (!PxGetFluid(phases[p]))
{
float r = anisotropyData.mAnisotropyMin * particleSystem.mCommonData.mParticleContactDistance;
q1[origIdx] = make_float4(1.0f, 0.0f, 0.0f, r);
q2[origIdx] = make_float4(0.0f, 1.0f, 0.0f, r);
q3[origIdx] = make_float4(0.0f, 0.0f, 1.0f, r);
/*if (globalThreadIdx == 0)
printf("PxGetFluid\n");*/
return;
}
const PxVec3 xi = PxLoad3(sortedNewPositions[p]);
PxU32 contactCount = particleSystem.mParticleSelfCollisionCount[p];
// calculated the sum of weights and weighted avg position for particle neighborhood
PxVec3 xs(0.f); //sum of positions
float ws = 0.f; //sum of weights
PxU32 nextQ;
PxU32 nextNextQ;
PxVec4 xj4Next;
PxU32 nextPhase;
PxU32 offset = p;
if (contactCount > 0)
{
nextQ = collisionIndex[offset];
xj4Next = PxLoad4(sortedNewPositions[nextQ]);
nextPhase = phases[nextQ];
offset += numParticles;
}
if (contactCount > 1)
{
nextNextQ = collisionIndex[offset];
offset += numParticles;
}
for (PxU32 i = 0; i < contactCount; ++i, offset += numParticles)
{
const PxVec4 xj4 = xj4Next;
const PxU32 phase2 = nextPhase;
if ((i + 1) < contactCount)
{
xj4Next = PxLoad4(sortedNewPositions[nextNextQ]);
nextPhase = phases[nextNextQ];
nextQ = nextNextQ;
if ((i + 2) < contactCount)
nextNextQ = collisionIndex[offset];
}
if (PxGetFluid(phase2)) //ignore non-fluid particles
{
const PxVec3 xj(xj4.x, xj4.y, xj4.z);
const PxVec3 xij = xi - xj;
const PxReal dsq = xij.magnitudeSquared();
if (0.f < dsq && dsq < particleSystem.mCommonData.mParticleContactDistanceSq)
{
const PxReal w = Wa(sqrtf(dsq), particleSystem.mCommonData.mParticleContactDistanceInv);
ws += w;
xs += xj * w;
}
}
}
// set to radial and exit early in case of isolated particles
if (ws == 0.0f)
{
float r = anisotropyData.mAnisotropyMin * particleSystem.mCommonData.mParticleContactDistance;
q1[origIdx] = make_float4(1.0f, 0.0f, 0.0f, r);
q2[origIdx] = make_float4(0.0f, 1.0f, 0.0f, r);
q3[origIdx] = make_float4(0.0f, 0.0f, 1.0f, r);
//if(globalThreadIdx==0)
//printf("%i, ws\n", contactCount);
return;
}
//compute inverse sum weight and weight the average position
float invWs = 1.f / ws;
xs *= invWs;
PxMat33 covariance(PxVec3(0.0f), PxVec3(0.0f), PxVec3(0.0f));
offset = p;
if (contactCount > 0)
{
nextQ = collisionIndex[offset];
xj4Next = PxLoad4(sortedNewPositions[nextQ]);
nextPhase = phases[nextQ];
offset += numParticles;
}
if (contactCount > 1)
{
nextNextQ = collisionIndex[offset];
offset += numParticles;
}
// use weighted average position to calculate the covariance matrix
for (PxU32 i = 0; i < contactCount; ++i, offset += numParticles)
{
const PxVec4 xj4 = xj4Next;
const PxU32 phase2 = nextPhase;
if ((i + 1) < contactCount)
{
xj4Next = PxLoad4(sortedNewPositions[nextNextQ]);
nextPhase = phases[nextNextQ];
nextQ = nextNextQ;
if ((i + 2) < contactCount)
nextNextQ = collisionIndex[offset];
}
if (PxGetFluid(phase2)) //ignore non-fluid particles
{
const PxVec3 xj(xj4.x, xj4.y, xj4.z);
const PxVec3 xij = xi - xj;
const PxReal dsq = xij.magnitudeSquared();
if (0.f < dsq && dsq < particleSystem.mCommonData.mParticleContactDistanceSq)
{
const PxReal w = Wa(sqrtf(dsq), particleSystem.mCommonData.mParticleContactDistanceInv);
const PxVec3 xjs = xj - xs;
covariance += PxMat33::outer(w*xjs, xjs);
}
}
}
covariance *= invWs;
//calculate the eigen decomposition
PxMat33 r;
eigenDecomposition(covariance, r);
//sanitize the eigen values (diagonal of covariance matrix)
covariance[0][0] = max(covariance[0][0], 0.f);
covariance[1][1] = max(covariance[1][1], 0.f);
covariance[2][2] = max(covariance[2][2], 0.f);
PxVec3 lambda(sqrtf(covariance[0][0]), sqrtf(covariance[1][1]), sqrtf(covariance[2][2]));
//PxVec3 lambda(covariance[0][0], covariance[1][1], covariance[2][2]);
const float ks = anisotropyData.mAnisotropy;
const float kmin = anisotropyData.mAnisotropyMin * particleSystem.mCommonData.mParticleContactDistance;
const float kmax = anisotropyData.mAnisotropyMax * particleSystem.mCommonData.mParticleContactDistance;
lambda *= ks;
lambda = Clamp(lambda, kmin, kmax);
//write out the anisotropy vectors
q1[origIdx] = make_float4(r.column0.x, r.column0.y, r.column0.z, lambda.x);
q2[origIdx] = make_float4(r.column1.x, r.column1.y, r.column1.z, lambda.y);
q3[origIdx] = make_float4(r.column2.x, r.column2.y, r.column2.z, lambda.z);
}
extern "C" __global__ __launch_bounds__(256, 1) void anisotropyKernel(const float4* const PX_RESTRICT deviceParticlePos,
const PxU32* const PX_RESTRICT sortedToOriginalParticleIndex, const PxU32* const PX_RESTRICT sortedParticleToSubgrid, PxU32 maxNumSubgrids,
const PxU32* const PX_RESTRICT subgridNeighbors, const PxU32* const PX_RESTRICT subgridEndIndices, int numParticles, PxU32* phases, PxU32 validPhaseMask,
float4* q1, float4* q2, float4* q3, PxReal anisotropy, PxReal anisotropyMin, PxReal anisotropyMax, PxReal particleContactDistance)
{
PxI32 threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
if (threadIndex >= numParticles)
return;
PxI32 pNr = sortedToOriginalParticleIndex[threadIndex];
PxU32 subgridIndex = sortedParticleToSubgrid[threadIndex];
if (subgridIndex >= maxNumSubgrids || (phases && !(phases[pNr] & validPhaseMask)))
{
float r = anisotropyMin * particleContactDistance;
q1[pNr] = make_float4(1.0f, 0.0f, 0.0f, r);
q2[pNr] = make_float4(0.0f, 1.0f, 0.0f, r);
q3[pNr] = make_float4(0.0f, 0.0f, 1.0f, r);
return;
}
PxVec3 xi = PxLoad3(deviceParticlePos[pNr]);
const PxReal particleContactDistanceSq = particleContactDistance * particleContactDistance;
const PxReal particleContactDistanceInv = 1.0f / particleContactDistance;
// calculated the sum of weights and weighted avg position for particle neighborhood
PxVec3 xs(0.f); //sum of positions
float ws = 0.f; //sum of weights
for (int z = -1; z <= 1; z++)
{
for (int y = -1; y <= 1; y++)
{
for (int x = -1; x <= 1; x++)
{
PxU32 n = subgridNeighborOffset(subgridNeighbors, subgridIndex, x, y, z);
if (n == EMPTY_SUBGRID)
continue;
int start = n == 0 ? 0 : subgridEndIndices[n - 1];
int end = subgridEndIndices[n];
for (int i = start; i < end; ++i)
{
int j = sortedToOriginalParticleIndex[i];
if (phases && !(phases[j] & validPhaseMask))
continue;
PxVec3 xj = PxLoad3(deviceParticlePos[j]);
const PxVec3 xij = xi - xj;
const PxReal dsq = xij.magnitudeSquared();
if (0.f < dsq && dsq < particleContactDistanceSq)
{
const PxReal w = Wa(sqrtf(dsq), particleContactDistanceInv);
ws += w;
xs += xj * w;
}
}
}
}
}
// set to radial and exit early in case of isolated particles
if (ws == 0.0f)
{
float r = anisotropyMin * particleContactDistance;
q1[pNr] = make_float4(1.0f, 0.0f, 0.0f, r);
q2[pNr] = make_float4(0.0f, 1.0f, 0.0f, r);
q3[pNr] = make_float4(0.0f, 0.0f, 1.0f, r);
//if(globalThreadIdx==0)
//printf("%i, ws\n", contactCount);
return;
}
//compute inverse sum weight and weight the average position
float invWs = 1.f / ws;
xs *= invWs;
PxMat33 covariance(PxVec3(0.0f), PxVec3(0.0f), PxVec3(0.0f));
for (int z = -1; z <= 1; z++)
{
for (int y = -1; y <= 1; y++)
{
for (int x = -1; x <= 1; x++)
{
PxU32 n = subgridNeighborOffset(subgridNeighbors, subgridIndex, x, y, z);
if (n == EMPTY_SUBGRID)
continue;
int start = n == 0 ? 0 : subgridEndIndices[n - 1];
int end = subgridEndIndices[n];
for (int i = start; i < end; ++i)
{
int j = sortedToOriginalParticleIndex[i];
if (phases && !(phases[j] & validPhaseMask))
continue;
PxVec3 xj = PxLoad3(deviceParticlePos[j]);
const PxVec3 xij = xi - xj;
const PxReal dsq = xij.magnitudeSquared();
if (0.f < dsq && dsq < particleContactDistanceSq)
{
const PxReal w = Wa(sqrtf(dsq), particleContactDistanceInv);
const PxVec3 xjs = xj - xs;
covariance += PxMat33::outer(w*xjs, xjs);
}
}
}
}
}
covariance *= invWs;
//calculate the eigen decomposition
PxMat33 r;
eigenDecomposition(covariance, r);
//sanitize the eigen values (diagonal of covariance matrix)
covariance[0][0] = max(covariance[0][0], 0.f);
covariance[1][1] = max(covariance[1][1], 0.f);
covariance[2][2] = max(covariance[2][2], 0.f);
PxVec3 lambda(sqrtf(covariance[0][0]), sqrtf(covariance[1][1]), sqrtf(covariance[2][2]));
//PxVec3 lambda(covariance[0][0], covariance[1][1], covariance[2][2]);
const float ks = anisotropy;
const float kmin = anisotropyMin * particleContactDistance;
const float kmax = anisotropyMax * particleContactDistance;
lambda *= ks;
lambda = Clamp(lambda, kmin, kmax);
//write out the anisotropy vectors
q1[pNr] = make_float4(r.column0.x, r.column0.y, r.column0.z, lambda.x);
q2[pNr] = make_float4(r.column1.x, r.column1.y, r.column1.z, lambda.y);
q3[pNr] = make_float4(r.column2.x, r.column2.y, r.column2.z, lambda.z);
}
extern "C" __global__ void smoothPositionsKernel(float4* deviceParticlePos, PxU32* sortedToOriginalParticleIndex, PxU32* sortedParticleToSubgrid, PxU32 maxNumSubgrids,
PxU32* subgridNeighbors, PxU32* subgridEndIndices, int numParticles, PxU32* phases, PxU32 validPhaseMask, float4* smoothPos, PxReal smoothing, PxReal particleContactDistance)
{
PxI32 threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
if (threadIndex >= numParticles)
return;
PxI32 pNr = sortedToOriginalParticleIndex[threadIndex];
float4 xi4 = deviceParticlePos[pNr];
PxU32 subgridIndex = sortedParticleToSubgrid[threadIndex];
if (subgridIndex >= maxNumSubgrids || (phases && !(phases[pNr] & validPhaseMask)))
{
smoothPos[pNr] = xi4;
return;
}
PxVec3 xi = PxLoad3(deviceParticlePos[pNr]);
const PxReal particleContactDistanceSq = particleContactDistance * particleContactDistance;
const PxReal particleContactDistanceInv = 1.0f / particleContactDistance;
// calculated the sum of weights and weighted avg position for particle neighborhood
PxVec3 xs(0.0f); //sum of positions
PxReal ws = 0.0f; //sum of weights
for (int z = -1; z <= 1; z++)
{
for (int y = -1; y <= 1; y++)
{
for (int x = -1; x <= 1; x++)
{
PxU32 n = subgridNeighborOffset(subgridNeighbors, subgridIndex, x, y, z);
if (n == EMPTY_SUBGRID)
continue;
int start = n == 0 ? 0 : subgridEndIndices[n - 1];
int end = subgridEndIndices[n];
for (int i = start; i < end; ++i)
{
int j = sortedToOriginalParticleIndex[i];
if (phases && !(phases[j] & validPhaseMask))
continue;
PxVec3 xj = PxLoad3(deviceParticlePos[j]);
//Now do the actual calculation
const PxVec3 xij = xi - xj;
const PxReal dsq = xij.magnitudeSquared();
if (0.0f < dsq && dsq < particleContactDistanceSq)
{
const PxReal w = Wa(sqrtf(dsq), particleContactDistanceInv);
ws += w;
xs += xj * w;
}
}
}
}
}
if (ws > 0.f)
{
PxReal f = 4.0f*Wa(particleContactDistance*0.5f, particleContactDistanceInv);
PxReal smooth = PxMin(1.0f, ws / f)*smoothing;
xs /= ws;
xi = Lerp(xi, xs, smooth);
}
//write smoothed positions back in API order
smoothPos[pNr] = make_float4(xi.x, xi.y, xi.z, xi4.w);
}