553 lines
18 KiB
Plaintext
553 lines
18 KiB
Plaintext
|
|
// Redistribution and use in source and binary forms, with or without
|
||
|
|
// modification, are permitted provided that the following conditions
|
||
|
|
// are met:
|
||
|
|
// * Redistributions of source code must retain the above copyright
|
||
|
|
// notice, this list of conditions and the following disclaimer.
|
||
|
|
// * Redistributions in binary form must reproduce the above copyright
|
||
|
|
// notice, this list of conditions and the following disclaimer in the
|
||
|
|
// documentation and/or other materials provided with the distribution.
|
||
|
|
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||
|
|
// contributors may be used to endorse or promote products derived
|
||
|
|
// from this software without specific prior written permission.
|
||
|
|
//
|
||
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||
|
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||
|
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||
|
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||
|
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||
|
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||
|
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||
|
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||
|
|
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||
|
|
//
|
||
|
|
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||
|
|
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||
|
|
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||
|
|
|
||
|
|
|
||
|
|
#include "vector_types.h"
|
||
|
|
#include "foundation/PxVec3.h"
|
||
|
|
#include "foundation/PxVec4.h"
|
||
|
|
#include "foundation/PxBounds3.h"
|
||
|
|
#include "PxgParticleSystemCore.h"
|
||
|
|
#include "PxgParticleSystem.h"
|
||
|
|
#include "PxgParticleSystemCoreKernelIndices.h"
|
||
|
|
#include "PxgBodySim.h"
|
||
|
|
#include "PxgCommonDefines.h"
|
||
|
|
#include "reduction.cuh"
|
||
|
|
#include "shuffle.cuh"
|
||
|
|
#include "stdio.h"
|
||
|
|
#include "PxgSolverBody.h"
|
||
|
|
#include "PxgSolverCoreDesc.h"
|
||
|
|
#include "PxParticleSystem.h"
|
||
|
|
#include "assert.h"
|
||
|
|
#include "copy.cuh"
|
||
|
|
#include "PxgSimulationCoreDesc.h"
|
||
|
|
#include "gridCal.cuh"
|
||
|
|
#include "particleSystem.cuh"
|
||
|
|
#include "atomic.cuh"
|
||
|
|
#include "utils.cuh"
|
||
|
|
|
||
|
|
using namespace physx;
|
||
|
|
|
||
|
|
// simpler kernel for diffuse weighting
|
||
|
|
__device__ inline PxReal WDiffuse(const PxReal h, const PxReal invR)
|
||
|
|
{
|
||
|
|
return (1.0f - h * invR);
|
||
|
|
}
|
||
|
|
|
||
|
|
extern "C" __host__ void initDiffuseParticlesKernels0() {}
|
||
|
|
|
||
|
|
extern "C" __global__ void ps_updateUnsortedDiffuseArrayLaunch(
|
||
|
|
const PxgParticleSystem * PX_RESTRICT particleSystems,
|
||
|
|
const PxU32 * PX_RESTRICT activeParticleSystems)
|
||
|
|
{
|
||
|
|
const PxU32 particleId = activeParticleSystems[blockIdx.z];
|
||
|
|
|
||
|
|
const PxgParticleSystem& particleSystem = particleSystems[particleId];
|
||
|
|
|
||
|
|
const PxU32 bufferIndex = blockIdx.y;
|
||
|
|
|
||
|
|
if (bufferIndex < particleSystem.mNumDiffuseBuffers)
|
||
|
|
{
|
||
|
|
const PxU32 threadIndexInWarp = threadIdx.x & 31;
|
||
|
|
|
||
|
|
float4* PX_RESTRICT unsortedPositions = reinterpret_cast<float4*>(particleSystem.mDiffusePosition_LifeTime);
|
||
|
|
float4* PX_RESTRICT unsortedVels = reinterpret_cast<float4*>(particleSystem.mDiffuseVelocity);
|
||
|
|
|
||
|
|
PxU32 localSum = 0;
|
||
|
|
|
||
|
|
for (PxU32 i = threadIndexInWarp; i < bufferIndex; i += WARP_SIZE)
|
||
|
|
{
|
||
|
|
localSum += particleSystem.mDiffuseSimBuffers[i].mNumDiffuseParticles[0];
|
||
|
|
}
|
||
|
|
|
||
|
|
PxU32 bufferOffset = warpReduction<AddOpPxU32, PxU32>(FULL_MASK, localSum);
|
||
|
|
|
||
|
|
PxgParticleDiffuseSimBuffer& buffer = particleSystem.mDiffuseSimBuffers[bufferIndex];
|
||
|
|
|
||
|
|
int numDiffuseParticles = buffer.mNumDiffuseParticles[0];
|
||
|
|
|
||
|
|
const float4* particles = buffer.mDiffusePositions_LifeTime;
|
||
|
|
const float4* vels = buffer.mDiffuseVelocities;
|
||
|
|
|
||
|
|
const PxU32 globalThreadIndex = threadIdx.x + blockDim.x * blockIdx.x;
|
||
|
|
if (globalThreadIndex >= numDiffuseParticles)
|
||
|
|
return;
|
||
|
|
|
||
|
|
if (globalThreadIndex == 0)
|
||
|
|
{
|
||
|
|
buffer.mStartIndex = bufferOffset;
|
||
|
|
}
|
||
|
|
|
||
|
|
const PxU32 ind = bufferOffset + globalThreadIndex;
|
||
|
|
unsortedPositions[ind] = particles[globalThreadIndex];
|
||
|
|
unsortedVels[ind] = vels[globalThreadIndex];
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
extern "C" __global__ void ps_diffuseParticleOneWayCollision(
|
||
|
|
PxgParticleSystem * PX_RESTRICT particleSystems,
|
||
|
|
const PxU32* PX_RESTRICT activeParticleSystems,
|
||
|
|
const PxU32 count
|
||
|
|
)
|
||
|
|
{
|
||
|
|
__shared__ __align__(16) PxU8 particleSystemMemory[sizeof(PxgParticleSystem)];
|
||
|
|
PxgParticleSystem& shParticleSystem = *(reinterpret_cast<PxgParticleSystem*>(particleSystemMemory));
|
||
|
|
|
||
|
|
const PxU32 id = activeParticleSystems[blockIdx.y];
|
||
|
|
|
||
|
|
const PxgParticleSystem& particleSystem = particleSystems[id];
|
||
|
|
|
||
|
|
const uint2* sParticleSystem = reinterpret_cast<const uint2*>(&particleSystem);
|
||
|
|
uint2* dParticleSystem = reinterpret_cast<uint2*>(&shParticleSystem);
|
||
|
|
|
||
|
|
blockCopy<uint2>(dParticleSystem, sParticleSystem, sizeof(PxgParticleSystem));
|
||
|
|
__syncthreads();
|
||
|
|
|
||
|
|
const PxU32 pi = threadIdx.x + blockIdx.x * blockDim.x;
|
||
|
|
|
||
|
|
const PxU32 numParticles = *shParticleSystem.mNumDiffuseParticles;
|
||
|
|
|
||
|
|
|
||
|
|
if (pi >= numParticles)
|
||
|
|
return;
|
||
|
|
|
||
|
|
float4* PX_RESTRICT newPos = reinterpret_cast<float4*>(shParticleSystem.mDiffuseSortedPos_LifeTime);
|
||
|
|
|
||
|
|
const PxgParticleContactInfo* PX_RESTRICT contacts = shParticleSystem.mDiffuseOneWayContactInfos;
|
||
|
|
const PxU32* PX_RESTRICT contactCounts = shParticleSystem.mDiffuseOneWayContactCount;
|
||
|
|
|
||
|
|
const PxU32 contactCount = PxMin(PxgParticleContactInfo::MaxStaticContactsPerParticle, contactCounts[pi]);
|
||
|
|
|
||
|
|
|
||
|
|
if (contactCount)
|
||
|
|
{
|
||
|
|
PxVec3 posCorr = PxLoad3(newPos[pi]);
|
||
|
|
for (PxU32 c = 0, offset = pi; c < contactCount; ++c, offset += numParticles)
|
||
|
|
{
|
||
|
|
const PxgParticleContactInfo& contact = contacts[offset];
|
||
|
|
|
||
|
|
const PxVec3 surfaceNormal = PxLoad3(contact.mNormal_PenW);
|
||
|
|
|
||
|
|
const PxVec3 deltaP = -surfaceNormal * contact.mNormal_PenW.w;
|
||
|
|
posCorr += deltaP;
|
||
|
|
}
|
||
|
|
|
||
|
|
newPos[pi] = make_float4(posCorr.x, posCorr.y, posCorr.z, newPos[pi].w);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
extern "C" __global__ void ps_diffuseParticleUpdatePBF(
|
||
|
|
PxgParticleSystem* PX_RESTRICT particleSystems,
|
||
|
|
const PxU32* activeParticleSystems,
|
||
|
|
const PxVec3 gravity,
|
||
|
|
const PxReal dt)
|
||
|
|
{
|
||
|
|
__shared__ __align__(16) PxU8 particleSystemMemory[sizeof(PxgParticleSystem)];
|
||
|
|
PxgParticleSystem& shParticleSystem = *(reinterpret_cast<PxgParticleSystem*>(particleSystemMemory));
|
||
|
|
|
||
|
|
__shared__ int offset[3];
|
||
|
|
|
||
|
|
if (threadIdx.x == 0)
|
||
|
|
{
|
||
|
|
offset[0] = 0; offset[1] = -1; offset[2] = 1;
|
||
|
|
}
|
||
|
|
|
||
|
|
const PxU32 id = activeParticleSystems[blockIdx.y];
|
||
|
|
|
||
|
|
const PxgParticleSystem& particleSystem = particleSystems[id];
|
||
|
|
|
||
|
|
const uint2* sParticleSystem = reinterpret_cast<const uint2*>(&particleSystem);
|
||
|
|
uint2* dParticleSystem = reinterpret_cast<uint2*>(&shParticleSystem);
|
||
|
|
|
||
|
|
blockCopy<uint2>(dParticleSystem, sParticleSystem, sizeof(PxgParticleSystem));
|
||
|
|
__syncthreads();
|
||
|
|
|
||
|
|
{
|
||
|
|
int numDiffuse = *shParticleSystem.mNumDiffuseParticles;
|
||
|
|
|
||
|
|
const PxU32 pi = threadIdx.x + blockIdx.x * blockDim.x;
|
||
|
|
|
||
|
|
if (pi >= numDiffuse)
|
||
|
|
return;
|
||
|
|
|
||
|
|
const PxU32* const PX_RESTRICT cellStarts = shParticleSystem.mCellStart;
|
||
|
|
const PxU32* const PX_RESTRICT cellEnds = shParticleSystem.mCellEnd;
|
||
|
|
|
||
|
|
// per-particle data
|
||
|
|
const float4* const PX_RESTRICT sortedPose = reinterpret_cast<float4*>(shParticleSystem.mSortedPositions_InvMass);
|
||
|
|
const float4* const PX_RESTRICT sortedVel = reinterpret_cast<float4*>(shParticleSystem.mSortedVelocities);
|
||
|
|
|
||
|
|
float4* PX_RESTRICT diffusePositions = reinterpret_cast<float4*>(shParticleSystem.mDiffuseSortedPos_LifeTime);
|
||
|
|
|
||
|
|
//Overloading this buffer to store the new velocity...
|
||
|
|
float4* PX_RESTRICT newVel = reinterpret_cast<float4*>(shParticleSystem.mDiffuseSortedOriginPos_LifeTime);
|
||
|
|
|
||
|
|
// get elements
|
||
|
|
const float4 xi4 = diffusePositions[pi];
|
||
|
|
const PxVec3 pos = PxLoad3(xi4);
|
||
|
|
|
||
|
|
// interpolate
|
||
|
|
PxVec3 velAvg(PxZero);
|
||
|
|
PxU32 numNeighbors = 0;
|
||
|
|
|
||
|
|
const PxReal cellWidth = shParticleSystem.mCommonData.mGridCellWidth;
|
||
|
|
const PxReal contactDistanceSq = shParticleSystem.mCommonData.mParticleContactDistanceSq;
|
||
|
|
const PxReal invContactDistance = shParticleSystem.mCommonData.mParticleContactDistanceInv;
|
||
|
|
const int3 gridPos = calcGridPos(xi4, cellWidth);
|
||
|
|
const uint3 gridSize = make_uint3(shParticleSystem.mCommonData.mGridSizeX, shParticleSystem.mCommonData.mGridSizeY, shParticleSystem.mCommonData.mGridSizeZ);
|
||
|
|
|
||
|
|
// Iterate over cell
|
||
|
|
PxReal weightSum = 0.0f;
|
||
|
|
PxVec3 velocitySum(0.f);
|
||
|
|
|
||
|
|
const PxU32 maxNeighbors = 16;
|
||
|
|
|
||
|
|
const PxU32 end = (shParticleSystem.mData.mFlags & PxParticleFlag::eFULL_DIFFUSE_ADVECTION) ? 3 : 1;
|
||
|
|
|
||
|
|
|
||
|
|
for (int z = 0; z < end; ++z)
|
||
|
|
for (int y = 0; y < end; ++y)
|
||
|
|
for (int x = 0; x < end; ++x)
|
||
|
|
{
|
||
|
|
const int3 neighbourPos = make_int3(gridPos.x + offset[x], gridPos.y + offset[y], gridPos.z + offset[z]);
|
||
|
|
const PxU32 gridHash = calcGridHash(neighbourPos, gridSize);
|
||
|
|
const PxU32 startIndex = cellStarts[gridHash];
|
||
|
|
|
||
|
|
if (startIndex != EMPTY_CELL)
|
||
|
|
{
|
||
|
|
const PxU32 endIndex = cellEnds[gridHash];
|
||
|
|
for (PxU32 q = startIndex; q < endIndex; ++q)
|
||
|
|
{
|
||
|
|
const PxVec3 xj = PxLoad3(sortedPose[q]);
|
||
|
|
const PxVec3 xij = pos - xj;
|
||
|
|
|
||
|
|
const PxReal dSq = xij.dot(xij);
|
||
|
|
|
||
|
|
if (dSq < contactDistanceSq)
|
||
|
|
{
|
||
|
|
const PxVec3 vj = PxLoad3(sortedVel[q]);
|
||
|
|
const PxReal w = WDiffuse(sqrtf(dSq), invContactDistance);
|
||
|
|
|
||
|
|
weightSum += w;
|
||
|
|
velocitySum += vj * w;
|
||
|
|
|
||
|
|
++numNeighbors;
|
||
|
|
if (numNeighbors == maxNeighbors)
|
||
|
|
goto weight_sum;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
weight_sum:
|
||
|
|
if (weightSum > 0)
|
||
|
|
velAvg = velocitySum / weightSum;
|
||
|
|
|
||
|
|
newVel[pi] = make_float4(velAvg.x, velAvg.y, velAvg.z, PxReal(numNeighbors));
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
extern "C" __global__ void ps_diffuseParticleCompact(
|
||
|
|
PxgParticleSystem* PX_RESTRICT particleSystems,
|
||
|
|
const PxU32* activeParticleSystems,
|
||
|
|
const PxVec3 gravity,
|
||
|
|
const PxReal dt)
|
||
|
|
{
|
||
|
|
__shared__ __align__(16) PxU8 particleSystemMemory[sizeof(PxgParticleSystem)];
|
||
|
|
PxgParticleSystem& shParticleSystem = *(reinterpret_cast<PxgParticleSystem*>(particleSystemMemory));
|
||
|
|
|
||
|
|
const PxU32 id = activeParticleSystems[blockIdx.z];
|
||
|
|
|
||
|
|
const PxgParticleSystem& particleSystem = particleSystems[id];
|
||
|
|
|
||
|
|
const uint2* sParticleSystem = reinterpret_cast<const uint2*>(&particleSystem);
|
||
|
|
uint2* dParticleSystem = reinterpret_cast<uint2*>(&shParticleSystem);
|
||
|
|
|
||
|
|
blockCopy<uint2>(dParticleSystem, sParticleSystem, sizeof(PxgParticleSystem));
|
||
|
|
__syncthreads();
|
||
|
|
|
||
|
|
const PxU32 bufferIndex = blockIdx.y;
|
||
|
|
if (bufferIndex < shParticleSystem.mNumDiffuseBuffers)
|
||
|
|
{
|
||
|
|
|
||
|
|
PxgParticleDiffuseSimBuffer& buffer = shParticleSystem.mDiffuseSimBuffers[bufferIndex];
|
||
|
|
|
||
|
|
int* numDiffuseParticles = buffer.mNumDiffuseParticles;
|
||
|
|
int numDiffuse = numDiffuseParticles[0];
|
||
|
|
|
||
|
|
const PxU32 pi = threadIdx.x + blockIdx.x * blockDim.x;
|
||
|
|
const PxU32 threadIndexInWarp = threadIdx.x & 31;
|
||
|
|
|
||
|
|
if (pi >= numDiffuse)
|
||
|
|
return;
|
||
|
|
|
||
|
|
float4* PX_RESTRICT diffusePositionsNew = buffer.mDiffusePositions_LifeTime;
|
||
|
|
float4* PX_RESTRICT diffuseVelocitiesNew = buffer.mDiffuseVelocities;
|
||
|
|
|
||
|
|
float4* PX_RESTRICT velAvgs = reinterpret_cast<float4*>(shParticleSystem.mDiffuseSortedOriginPos_LifeTime);
|
||
|
|
|
||
|
|
float4* PX_RESTRICT diffusePositions = reinterpret_cast<float4*>(shParticleSystem.mDiffuseSortedPos_LifeTime);
|
||
|
|
float4* PX_RESTRICT diffusePositionsOld = reinterpret_cast<float4*>(shParticleSystem.mDiffuseOriginPos_LifeTime);
|
||
|
|
|
||
|
|
const PxU32* reverseLookup = shParticleSystem.mDiffuseUnsortedToSortedMapping;
|
||
|
|
|
||
|
|
const PxU32 index = pi + buffer.mStartIndex;
|
||
|
|
const PxU32 sortedInd = reverseLookup[index];
|
||
|
|
|
||
|
|
// get elements
|
||
|
|
const float4 xi4 = diffusePositions[sortedInd];
|
||
|
|
const float4 vi4Old = diffusePositionsOld[index];
|
||
|
|
const float4 xiva4 = velAvgs[sortedInd];
|
||
|
|
const PxVec3 pos = PxLoad3(xi4);
|
||
|
|
const PxVec3 oldPos = PxLoad3(vi4Old);
|
||
|
|
const PxVec3 velAvg = PxLoad3(xiva4);
|
||
|
|
|
||
|
|
const PxReal lifeDelta = dt;
|
||
|
|
|
||
|
|
PxVec3 vel = (pos - oldPos)*(1.f / dt);
|
||
|
|
|
||
|
|
// integrate diffuse particle
|
||
|
|
PxVec3 newVel;
|
||
|
|
if (xiva4.w < 4.f)
|
||
|
|
{
|
||
|
|
// spray (ballistic)
|
||
|
|
newVel = vel * (1.0f - buffer.mParams.airDrag * dt);
|
||
|
|
}
|
||
|
|
else if (xiva4.w < 8.f)
|
||
|
|
{
|
||
|
|
// foam
|
||
|
|
newVel = velAvg;
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
// bubble
|
||
|
|
newVel = vel - (1.f + buffer.mParams.buoyancy) * gravity * dt + buffer.mParams.bubbleDrag * (velAvg - vel);
|
||
|
|
}
|
||
|
|
|
||
|
|
const float maxVel = shParticleSystem.mData.mMaxVelocity;
|
||
|
|
if (newVel.magnitudeSquared() > 0)
|
||
|
|
{
|
||
|
|
newVel = PxMin(newVel.magnitude(), maxVel) * newVel.getNormalized();
|
||
|
|
}
|
||
|
|
|
||
|
|
PxVec3 newPosCorr = pos + (newVel - vel) * dt;
|
||
|
|
PxVec3 newVelCorr = newVel;
|
||
|
|
|
||
|
|
__syncwarp();
|
||
|
|
|
||
|
|
const PxReal lifeTime = fmaxf(xi4.w - lifeDelta, 0.0f);
|
||
|
|
|
||
|
|
PxU32 res = __ballot_sync(FULL_MASK, lifeTime > 0.f);
|
||
|
|
|
||
|
|
PxU32 offset = 0;
|
||
|
|
|
||
|
|
if (threadIndexInWarp == 0)
|
||
|
|
offset = atomicAdd(&numDiffuseParticles[1], __popc(res));
|
||
|
|
|
||
|
|
offset = __shfl_sync(FULL_MASK, offset, 0);
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
if (lifeTime > 0.f)
|
||
|
|
{
|
||
|
|
PxU32 newIndex = offset + warpScanExclusive(res, threadIndexInWarp);
|
||
|
|
|
||
|
|
diffusePositionsNew[newIndex] = make_float4(newPosCorr.x, newPosCorr.y, newPosCorr.z, lifeTime);
|
||
|
|
diffuseVelocitiesNew[newIndex] = make_float4(newVelCorr.x, newVelCorr.y, newVelCorr.z, 0.0f);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
extern "C" __global__ void ps_diffuseParticleCreate(
|
||
|
|
PxgParticleSystem * PX_RESTRICT particleSystems,
|
||
|
|
const PxU32* const PX_RESTRICT activeParticleSystems,
|
||
|
|
const PxReal* const PX_RESTRICT randomTable,
|
||
|
|
const PxU32 randomTableSize,
|
||
|
|
const PxReal dt)
|
||
|
|
{
|
||
|
|
__shared__ __align__(16) PxU8 particleSystemMemory[sizeof(PxgParticleSystem)];
|
||
|
|
PxgParticleSystem& shParticleSystem = *(reinterpret_cast<PxgParticleSystem*>(particleSystemMemory));
|
||
|
|
|
||
|
|
const PxU32 id = activeParticleSystems[blockIdx.z];
|
||
|
|
const PxgParticleSystem& particleSystem = particleSystems[id];
|
||
|
|
|
||
|
|
const uint2* sParticleSystem = reinterpret_cast<const uint2*>(&particleSystem);
|
||
|
|
uint2* dParticleSystem = reinterpret_cast<uint2*>(&shParticleSystem);
|
||
|
|
|
||
|
|
blockCopy<uint2>(dParticleSystem, sParticleSystem, sizeof(PxgParticleSystem));
|
||
|
|
__syncthreads();
|
||
|
|
|
||
|
|
const PxU32 bufferIndex = blockIdx.y;
|
||
|
|
if (bufferIndex < shParticleSystem.mCommonData.mNumParticleBuffers)
|
||
|
|
{
|
||
|
|
|
||
|
|
const PxgParticleSimBuffer& buffer = shParticleSystem.mParticleSimBuffers[bufferIndex];
|
||
|
|
|
||
|
|
const PxU32 diffuseParticleBufferIndex = buffer.mDiffuseParticleBufferIndex;
|
||
|
|
|
||
|
|
if (diffuseParticleBufferIndex == 0xffffffff)
|
||
|
|
return;
|
||
|
|
|
||
|
|
const PxgParticleSystemData& data = shParticleSystem.mData;
|
||
|
|
|
||
|
|
const PxU32 pi = threadIdx.x + blockIdx.x * blockDim.x;
|
||
|
|
|
||
|
|
const PxU32 numParticles = buffer.mNumActiveParticles;
|
||
|
|
|
||
|
|
if (pi >= numParticles)
|
||
|
|
return;
|
||
|
|
|
||
|
|
|
||
|
|
PxgParticleDiffuseSimBuffer& diffuseBuffer = shParticleSystem.mDiffuseSimBuffers[diffuseParticleBufferIndex];
|
||
|
|
|
||
|
|
if (diffuseBuffer.mMaxNumParticles == 0)
|
||
|
|
return;
|
||
|
|
|
||
|
|
// get arrays
|
||
|
|
const float4* const PX_RESTRICT sortedPose = reinterpret_cast<float4*>(shParticleSystem.mSortedPositions_InvMass);
|
||
|
|
const float4* const PX_RESTRICT sortedVel = reinterpret_cast<float4*>(shParticleSystem.mSortedVelocities);
|
||
|
|
const PxU32* PX_RESTRICT phases = shParticleSystem.mSortedPhaseArray;
|
||
|
|
const float2* const PX_RESTRICT potentials = reinterpret_cast<float2*>(shParticleSystem.mDiffusePotentials);
|
||
|
|
|
||
|
|
float4* PX_RESTRICT diffusePositionsNew = diffuseBuffer.mDiffusePositions_LifeTime;
|
||
|
|
float4* PX_RESTRICT diffuseVelocitiesNew = diffuseBuffer.mDiffuseVelocities;
|
||
|
|
|
||
|
|
int* numDiffuseParticles = diffuseBuffer.mNumDiffuseParticles;
|
||
|
|
|
||
|
|
const PxU32* reverseLookup = shParticleSystem.mUnsortedToSortedMapping;
|
||
|
|
const PxU32 offset = particleSystem.mParticleBufferRunsum[bufferIndex];
|
||
|
|
|
||
|
|
const PxU32 sortedInd = reverseLookup[pi + offset];
|
||
|
|
// get elements
|
||
|
|
const float2 ptnts = potentials[sortedInd];
|
||
|
|
const PxReal threshold = diffuseBuffer.mParams.threshold;
|
||
|
|
const PxU32 phase = phases[sortedInd];
|
||
|
|
|
||
|
|
if (!PxGetFluid(phase))
|
||
|
|
return;
|
||
|
|
|
||
|
|
const float4 vi4 = sortedVel[sortedInd];
|
||
|
|
|
||
|
|
//Kinetic energy + pressure
|
||
|
|
const PxReal kineticEnergy = dot3(vi4, vi4) * diffuseBuffer.mParams.kineticEnergyWeight;
|
||
|
|
const PxReal divergence = diffuseBuffer.mParams.divergenceWeight * ptnts.x;
|
||
|
|
const PxReal pressure = diffuseBuffer.mParams.pressureWeight * ptnts.y;
|
||
|
|
PxReal intensity = pressure - divergence + kineticEnergy;
|
||
|
|
|
||
|
|
//if (pi == 0)
|
||
|
|
// printf("numParticles %i diffuseParticleBufferIndex %i numDiffuseParticles[1] %i threshold %f\n", numParticles, diffuseParticleBufferIndex, numDiffuseParticles[1], threshold);
|
||
|
|
|
||
|
|
const PxReal r0 = randomTable[(sortedInd + 0) % randomTableSize];
|
||
|
|
|
||
|
|
if(r0 * intensity > threshold)
|
||
|
|
{
|
||
|
|
const float4 xi4 = sortedPose[sortedInd];
|
||
|
|
|
||
|
|
|
||
|
|
//for (int i=0; i < 5; ++i)
|
||
|
|
{
|
||
|
|
// try and allocate new diffuse particles
|
||
|
|
const int newIndex = atomicAdd(&numDiffuseParticles[1], 1);
|
||
|
|
|
||
|
|
if (newIndex < diffuseBuffer.mMaxNumParticles)
|
||
|
|
{
|
||
|
|
|
||
|
|
const PxVec3 xi = PxLoad3(xi4);
|
||
|
|
const PxVec3 vi = PxLoad3(vi4);
|
||
|
|
|
||
|
|
const PxReal r1 = randomTable[(sortedInd + 1) % randomTableSize];
|
||
|
|
const PxReal r2 = randomTable[(sortedInd + 2) % randomTableSize];
|
||
|
|
const PxReal r3 = randomTable[(sortedInd + 3) % randomTableSize];
|
||
|
|
|
||
|
|
const PxReal lifeMin = 1.0f;
|
||
|
|
const PxReal lifeMax = diffuseBuffer.mParams.lifetime;
|
||
|
|
const PxReal lifeScale = fminf(intensity / threshold, 1.f) * r1;
|
||
|
|
const PxReal lifetime = lifeMin + lifeScale * (lifeMax - lifeMin);
|
||
|
|
|
||
|
|
const PxVec3 q = xi - r2 * vi * dt + PxVec3(r1, r2, r3) * data.mRestOffset * 0.25f;
|
||
|
|
|
||
|
|
diffusePositionsNew[newIndex] = make_float4(q.x, q.y, q.z, lifetime);
|
||
|
|
diffuseVelocitiesNew[newIndex] = make_float4(vi.x, vi.y, vi.z, 0.0f);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
extern "C" __global__ void ps_diffuseParticleCopy(
|
||
|
|
PxgParticleSystem * PX_RESTRICT particleSystems,
|
||
|
|
const PxU32* const PX_RESTRICT activeParticleSystems,
|
||
|
|
const PxU32 count)
|
||
|
|
{
|
||
|
|
const PxU32 id = activeParticleSystems[blockIdx.z];
|
||
|
|
PxgParticleSystem& particleSystem = particleSystems[id];
|
||
|
|
|
||
|
|
const PxU32 numDiffuseBuffers = particleSystem.mNumDiffuseBuffers;
|
||
|
|
|
||
|
|
const PxU32 bufferIndex = blockIdx.y;
|
||
|
|
if (bufferIndex < numDiffuseBuffers)
|
||
|
|
{
|
||
|
|
PxgParticleDiffuseSimBuffer& diffuseBuffer = particleSystem.mDiffuseSimBuffers[bufferIndex];
|
||
|
|
|
||
|
|
int* numDiffuseParticles = diffuseBuffer.mNumDiffuseParticles;
|
||
|
|
const PxU32 numDiffuse = PxMin(PxI32(diffuseBuffer.mMaxNumParticles), numDiffuseParticles[1]);
|
||
|
|
*diffuseBuffer.mNumActiveDiffuseParticles = numDiffuse; //pinned memory
|
||
|
|
numDiffuseParticles[0] = numDiffuse;
|
||
|
|
numDiffuseParticles[1] = 0;
|
||
|
|
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
extern "C" __global__ void ps_diffuseParticleSum(
|
||
|
|
PxgParticleSystem * PX_RESTRICT particleSystems,
|
||
|
|
const PxU32* const PX_RESTRICT activeParticleSystems,
|
||
|
|
const PxU32 count)
|
||
|
|
{
|
||
|
|
const PxU32 id = activeParticleSystems[blockIdx.x];
|
||
|
|
PxgParticleSystem& particleSystem = particleSystems[id];
|
||
|
|
|
||
|
|
const PxU32 numDiffuseBuffers = particleSystem.mNumDiffuseBuffers;
|
||
|
|
|
||
|
|
PxU32 totalDiffuse = 0;
|
||
|
|
for (PxU32 i = threadIdx.x; i < numDiffuseBuffers; i += WARP_SIZE)
|
||
|
|
{
|
||
|
|
PxgParticleDiffuseSimBuffer& diffuseBuffer = particleSystem.mDiffuseSimBuffers[i];
|
||
|
|
totalDiffuse += diffuseBuffer.mNumDiffuseParticles[0];
|
||
|
|
}
|
||
|
|
|
||
|
|
totalDiffuse = warpReduction<AddOpPxU32, PxU32>(FULL_MASK, totalDiffuse);
|
||
|
|
|
||
|
|
|
||
|
|
if(threadIdx.x == 0)
|
||
|
|
{
|
||
|
|
*particleSystem.mNumDiffuseParticles = totalDiffuse;
|
||
|
|
}
|
||
|
|
}
|