feat(physics): wire physx sdk into build

This commit is contained in:
2026-04-15 12:22:15 +08:00
parent 5bf258df6d
commit 31f40e2cbb
2044 changed files with 752623 additions and 1 deletions

View File

@@ -0,0 +1,48 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef PXG_CONTACT_MANAGER_H
#define PXG_CONTACT_MANAGER_H
#include "foundation/PxSimpleTypes.h"
namespace physx
{
struct PX_ALIGN_PREFIX(16) PxgContactManagerInput
{
//Body refs are not needed here. World-space transforms are computed using transforCacheRefs instead!
PxU32 shapeRef0; //Ref to shape 0
PxU32 shapeRef1; //Ref to shape 1
PxU32 transformCacheRef0; //Ref to shape0's transforms in transform cache
PxU32 transformCacheRef1; //Ref to shape1's transform in transform cache
}
PX_ALIGN_SUFFIX(16);
}
#endif

View File

@@ -0,0 +1,69 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef PXG_CONVEX_CONVEX_SHAPE_H
#define PXG_CONVEX_CONVEX_SHAPE_H
#include "geometry/PxMeshScale.h"
#define PXG_MAX_PCM_CONTACTS 4
namespace physx
{
template<PxU32 byteSize>
class FlexiblePad
{
PxU8 pad[byteSize];
};
template<>
class FlexiblePad<0>
{
};
//ML: PxgShape don't need to have contactOffset because we will dma a separated contact offset array later
struct PX_ALIGN_PREFIX(16) PxgShape
{
PxMeshScale scale; //28
PxU32 materialIndex; //32
size_t hullOrMeshPtr; //36 or 40
PxU32 type; //40 or 44
PxU32 particleOrSoftbodyId; //44 or 48
//FlexiblePad<16 - sizeof(size_t) - sizeof(PxU32)> pad; //48
#if !PX_P64_FAMILY
PxU32 pad0;
#endif
}
PX_ALIGN_SUFFIX(16);
PX_COMPILE_TIME_ASSERT(sizeof(PxgShape) == 48);
}
#endif

View File

@@ -0,0 +1,218 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef PXG_GEOMETRY_MANAGER_H
#define PXG_GEOMETRY_MANAGER_H
#include "foundation/PxPreprocessor.h"
#include "foundation/PxArray.h"
#include "foundation/PxHashMap.h"
#include "foundation/PxSimpleTypes.h"
#include "PxgCopyManager.h"
#if PX_LINUX && PX_CLANG
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdocumentation"
#pragma clang diagnostic ignored "-Wdisabled-macro-expansion"
#endif
#include "cuda.h"
#if PX_LINUX && PX_CLANG
#pragma clang diagnostic pop
#endif
// AD: PxgGeometryManager manages the CPU/GPU data transfers and the lifetime of collision geometries: Convex Hulls, Trimeshes, SDFs, Heightfields.
/*
Short overview:
- geometries are added using the add* functions.
- during add, device memory is allocated and indices are given to geometries. We need to allocate the device memory here despite not writing to it
because the pointer is uploaded to GPU together with the shapeInstance data, which happens before we perform the final copy of the geometry in scheduleCopyHtoD.
- We also accumulate the pinned memory requirements for the staging buffer during add.
- scheduleCopyHtoD is performing the bulk of the work:
- we allocate the proper amount of pinned memory
- for each scheduled geometry, we lay it out in the pinned memory
- we push the copy descriptor to the copy manager.
- the pinned memory is deallocated and the queues reset in resetAfterMemCopyCompleted().
Future things:
- look into the sequences of device uploads: we might be able to defer device memory allocations and use pools if we can rearrange things such that
we don't need the device pointer upfront.
- The pinned memory is contiguous, so the copy manager taking pairs of (src, dest) could potentially be replaced by a linear thing.
*/
namespace physx
{
class PxgHeapMemoryAllocatorManager;
class PxgHeapMemoryAllocator;
class PxCudaContext;
namespace Gu
{
struct ConvexHullData;
class TriangleMesh;
struct HeightFieldData;
};
template<typename ArrayT>
class PxgFreeIndicesProvider
{
PX_NOCOPY(PxgFreeIndicesProvider)
public:
PxgFreeIndicesProvider(ArrayT& ref): mDataArray(ref) {}
PxU32 getFreeIndex()
{
if (!mFreeIndices.empty())
{
return mFreeIndices.popBack();
}
else
{
PxU32 currSz = mDataArray.size();
if (currSz + 1 > mDataArray.capacity())
{
mDataArray.reserve(currSz * 2);
}
mDataArray.resize(currSz + 1);
return currSz;
}
}
void setFreeIndex(PxU32 idx)
{
PX_ASSERT(idx < mDataArray.size());
mDeferredFreeIndices.pushBack(idx);
}
void releaseFreeIndices()
{
for (PxU32 a = 0; a < mDeferredFreeIndices.size(); ++a)
{
mFreeIndices.pushBack(mDeferredFreeIndices[a]);
}
mDeferredFreeIndices.forceSize_Unsafe(0);
}
private:
PxArray<PxU32> mDeferredFreeIndices;
PxArray<PxU32> mFreeIndices;
ArrayT& mDataArray;
};
struct PxgMeshTextureData
{
CUarray cuArray;
CUtexObject cuTexRef;
CUarray cuArraySubgrids;
CUtexObject cuTexRefSubgrids;
};
struct HullOrMeshData
{
void* mDeviceMemPointer;
PxU32 mCopyDescIndex;
static const PxU32 INVALID_COPY_DESC_INDEX = 0xFFffFFff;
};
class PxgGeometryManager;
// Holds all the data needed to upload a geometry to the GPU. Created when calling add* functions, read during scheduleCopyHtoD.
struct ScheduledCopyData
{
private:
// AD this is local, not intended for widespread usage.
struct UploadGeometryType
{
enum Enum
{
eTRIANGLEMESH = 1,
eCONVEXHULL = 2,
eHEIGHTFIELD = 3,
eBOXHULL = 4
};
};
public:
PxgCopyManager::CopyDesc mCopyDesc; //!< Copy descriptor for upload to GPU. lives until copy completed.
PxU32 mHullOrTrimeshIdx; //!< Index into mGeometrydata
UploadGeometryType::Enum mType; //!< Geometry type: see UploadGeometryType. Needed for deferred pinned memory allocation.
PxU32 mNumPolyVertices; //!< Number of polygon vertices in case this is a convex hull
const void* mGeometryPtr; //!< Pointer to CPU geometry data.
friend class PxgGeometryManager;
};
class PxgGeometryManager
{
PX_NOCOPY(PxgGeometryManager)
using UploadGeometryType = ScheduledCopyData::UploadGeometryType;
public:
PxgGeometryManager(PxgHeapMemoryAllocatorManager* heapMemoryManager);
virtual ~PxgGeometryManager();
PxU32 addHull(const Gu::ConvexHullData& hull);
PxU32 addTriMesh(const Gu::TriangleMesh& triMesh);
PxU32 addHeightfield(const Gu::HeightFieldData& hf);
void addBoxHull();
void removeGeometry(PxU32 idx);
void scheduleCopyHtoD(PxgCopyManager& copyMan, PxCudaContext& cudaContext, CUstream stream);
CUdeviceptr getGeometryDevPtrByIndex(PxU32 idx) const;
CUdeviceptr getBoxHullDevPtr() const;
void resetAfterMemcpyCompleted();
private:
PxU32 addGeometryInternal(PxU64 byteSize, const void* geomPtr, UploadGeometryType::Enum type, PxU32 numPolyVertices = 0);
PxgHeapMemoryAllocator* mDeviceMemoryAllocator; //device memory
PxgHeapMemoryAllocator* mPinnedMemoryAllocator; //mapped memory
void* mPinnedMemoryBasePtr; //stores the pointer to the current pinned memory allocation.
PxU64 mPinnedHostMemoryRequirements; // accumulates pinned memory requirements for copies.
PxArray<ScheduledCopyData> mScheduledCopies; // copies in current step, cleared after scheduleCopyHtoD
PxArray<HullOrMeshData> mGeometryData; //persistent geometry data, holds idx -> device pointer mapping
PxgFreeIndicesProvider<PxArray<HullOrMeshData> > mFreeGeometryIndices; //freeIndex provider for mGeometry.
PxU32 mBoxHullIdx; //index into mGeometryData for hardcoded convex hull for box.
PxHashMap<void*, PxgMeshTextureData> mMeshToTextureMap; //maps meshes to SDF textures for lifetime management
};
};
#endif

View File

@@ -0,0 +1,39 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef PXG_NARROWPHASE_H
#define PXG_NARROWPHASE_H
namespace physx
{
//this is needed to force PhysXNarrowphaseGpu linkage as Static Library!
void createPxgNarrowphase();
}
#endif

View File

@@ -0,0 +1,817 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef PXG_NARROWPHASE_CORE_H
#define PXG_NARROWPHASE_CORE_H
#include "foundation/PxSimpleTypes.h"
#include "foundation/PxTransform.h"
#include "foundation/PxPinnedArray.h"
#include "foundation/PxUserAllocated.h"
#include "task/PxTask.h"
#include "geometry/PxMeshScale.h"
#include "foundation/PxMutex.h"
#include "PxsContactManagerState.h"
#include "PxgContactManager.h"
#include "PxgPersistentContactManifold.h"
#include "PxgCudaBuffer.h"
#include "cudaNpCommon.h"
#include "PxgCudaPagedLinearAllocator.h"
#include "PxgCopyManager.h"
#include "PxgConvexConvexShape.h"
#include "PxgGeometryManager.h"
#include "PxvNphaseImplementationContext.h"
#include "PxgHeapMemAllocator.h"
#include "PxgShapeManager.h"
#include "PxgRadixSortDesc.h"
#include "geometry/PxParticleSystemGeometry.h"
#include "PxvGeometry.h"
namespace physx
{
class PxsTransformCache;
struct PxsMaterialData;
struct PxsCachedTransform;
struct PxcDataStreamPool;
class PxgCudaKernelWranglerManager;
class PxgNphaseImplementationContext;
class PxRenderOutput;
class PxgGpuContext;
struct PxTriangleMeshGeometryLL;
class PxgParticleSystemCore;
struct PxgFemFemContactInfo;
struct PxgFemOtherContactInfo;
namespace Gu
{
struct ConvexHullData;
class TriangleMesh;
class DeformableVolumeMesh;
class BVTetrahedronMesh;
struct HeightFieldData;
class PersistentContactManifold;
}
namespace Cm
{
class FlushPool;
}
namespace IG
{
class IslandSim;
}
struct GPU_BUCKET_ID
{
enum Enum
{
eFallback = 0,
eConvex = 1, //manifold
eConvexPlane = 2, //manifold
eConvexTrimesh = 3, //multi-manifold
eConvexHeightfield = 4, //multi-manifold
eSphereTrimesh = 5, //multi-manifold
eSphereHeightfield = 6, //multi-manifold
eTrianglePlane = 7, //multi-manifold - technically could be single manifold but needs 6 points
eSphere = 8, //no manifold
eBoxBox = 9,
eConvexCorePlane = 10,
eConvexCoreConvex = 11,
eConvexCoreTrimesh = 12,
eConvexCoreTetmesh = 13,
eConvexCoreClothmesh = 14,
eTriangleHeightfield = 15, //no mainfold
eTriangleTriangle = 16, //no manifold
eSoftbody = 17, //no manifold
eSoftbodies = 18, //no manifold
eSoftbodyFemCloth = 19, //no manifold
eSoftbodyTrimesh = 20, //no manifold
eSoftbodySdfTrimesh = 21, //no manifold
eSoftbodyHeightfield = 22, //no manifold
eFemClothSphere = 23, //no manifold
eFemClothPlane = 24, //no manifold
eFemClothBox = 25, //no manifold
eFemClothConvexes = 26, //no manifold
eFemClothes = 27, //no manifold
eFemClothTrimesh = 28, //no manifold
eFemClothSdfTrimesh = 29,
eFemClothHeightfield = 30, //no manifold
eConvexParticle = 31, //no manifold
eParticlesystems = 32, //no manifold
eParticlesystemSoftbody = 33, //no manifold
eParticlesystemFemCloth = 34, //no manifold
eParticlesystemTrimesh = 35, //no manifold
eParticlesystemSdfTrimesh = 36, //no manifold
eParticlesystemHeightfield = 37, //no manifold
eCount
};
};
const PxU32 BUCKET_ManifoldSize[GPU_BUCKET_ID::eCount] = {
0, //0
sizeof(PxgPersistentContactManifold), //1
sizeof(PxgPersistentContactManifold), //2
sizeof(PxgPersistentContactMultiManifold), //3
sizeof(PxgPersistentContactMultiManifold), //4
sizeof(PxgPersistentContactMultiManifold), //5
sizeof(PxgPersistentContactMultiManifold), //6
sizeof(PxgPersistentContactMultiManifold), //7
0, //8
0, //9
0, //10
0, //11
0, //12
0, //13
0, //14
0, //15
0, //16
0, //17
0, //18
0, //19
0, //20
0, //21
0, //22
0, //23
0, //24
0, //25
0, //26
0, //27
0, //28
0, //29
0, //30
0, //31
0, //32
0, //33
0, //34
0, //35
0, //36
0, //37
};
struct PxgContactManagers : public PxsContactManagerBase
{
PxgContactManagers(const PxU32 bucketId, const PxVirtualAllocator& allocator) : PxsContactManagerBase(bucketId),
mGpuInputContactManagers(allocator),
mCpuContactManagerMapping(allocator),
mShapeInteractions(allocator),
mRestDistances(allocator),
mTorsionalProperties(allocator)
{
}
PxPinnedArray<PxgContactManagerInput> mGpuInputContactManagers;
PxPinnedArray<PxsContactManager*> mCpuContactManagerMapping;
PxPinnedArray<const Sc::ShapeInteraction*> mShapeInteractions;
PxFloatArrayPinned mRestDistances;
PxPinnedArray<PxsTorsionalFrictionData> mTorsionalProperties;
void clear()
{
mGpuInputContactManagers.forceSize_Unsafe(0);
mCpuContactManagerMapping.forceSize_Unsafe(0);
mShapeInteractions.forceSize_Unsafe(0);
mRestDistances.forceSize_Unsafe(0);
mTorsionalProperties.forceSize_Unsafe(0);
}
void preallocateNewBuffers(PxU32 nbToPreallocate)
{
mGpuInputContactManagers.reserve(nbToPreallocate);
mCpuContactManagerMapping.reserve(nbToPreallocate);
mShapeInteractions.reserve(nbToPreallocate);
mRestDistances.reserve(nbToPreallocate);
mTorsionalProperties.reserve(nbToPreallocate);
}
private:
PX_NOCOPY(PxgContactManagers)
};
struct PxgNewContactManagers : public PxgContactManagers
{
PxPinnedArray<PxsContactManagerOutput> mGpuOutputContactManagers;
PxgNewContactManagers(const PxU32 bucketIndex, const PxVirtualAllocator& allocator) : PxgContactManagers(bucketIndex, allocator),
mGpuOutputContactManagers(allocator)
{
}
void clear()
{
PxgContactManagers::clear();
mGpuOutputContactManagers.forceSize_Unsafe(0);
}
void preallocateNewBuffers(PxU32 nbToPreallocate)
{
mGpuOutputContactManagers.reserve(nbToPreallocate);
PxgContactManagers::preallocateNewBuffers(nbToPreallocate);
}
};
struct PxgGpuContactManagers
{
PxgTypedCudaBuffer<PxgContactManagerInput> mContactManagerInputData;
PxgTypedCudaBuffer<PxsContactManagerOutput> mContactManagerOutputData;
PxgCudaBuffer mPersistentContactManifolds;
PxgTypedCudaBuffer<PxU32> mTempRunsumArray;
PxgTypedCudaBuffer<PxU32> mTempRunsumArray2;
PxgTypedCudaBuffer<PxU32> mBlockAccumulationArray;
PxgTypedCudaBuffer<PxsContactManagerOutputCounts> mLostFoundPairsOutputData;
PxgTypedCudaBuffer<PxsContactManager*> mLostFoundPairsCms;
PxgTypedCudaBuffer<PxsContactManager*> mCpuContactManagerMapping;
PxgTypedCudaBuffer<Sc::ShapeInteraction*> mShapeInteractions;
PxgTypedCudaBuffer<PxReal> mRestDistances;
PxgTypedCudaBuffer<PxsTorsionalFrictionData> mTorsionalProperties;
uint2* mLostAndTotalReportedPairsCountPinned;
const PxU32 mBucketIndex;
PxgGpuContactManagers(const PxU32 bucketIndex, PxgHeapMemoryAllocatorManager* manager) :
mContactManagerInputData(manager, PxsHeapStats::eNARROWPHASE),
mContactManagerOutputData(manager, PxsHeapStats::eNARROWPHASE),
mPersistentContactManifolds(manager, PxsHeapStats::eNARROWPHASE),
mTempRunsumArray(manager, PxsHeapStats::eNARROWPHASE),
mTempRunsumArray2(manager, PxsHeapStats::eNARROWPHASE),
mBlockAccumulationArray(manager, PxsHeapStats::eNARROWPHASE),
mLostFoundPairsOutputData(manager, PxsHeapStats::eNARROWPHASE),
mLostFoundPairsCms(manager, PxsHeapStats::eNARROWPHASE),
mCpuContactManagerMapping(manager, PxsHeapStats::eNARROWPHASE),
mShapeInteractions(manager, PxsHeapStats::eNARROWPHASE),
mRestDistances(manager, PxsHeapStats::eNARROWPHASE),
mTorsionalProperties(manager, PxsHeapStats::eNARROWPHASE),
mLostAndTotalReportedPairsCountPinned(NULL),
mBucketIndex(bucketIndex)
{
PX_ASSERT(bucketIndex < (1 << PxsContactManagerBase::MaxBucketBits));
}
private:
PX_NOCOPY(PxgGpuContactManagers)
};
struct PxgGpuPairManagementBuffers
{
PxgCudaBuffer mTempRunsumArray;
PxgCudaBuffer mBlockAccumulationArray;
PxgCudaBuffer mRemovedIndicesArray;
PxgGpuPairManagementBuffers(PxgHeapMemoryAllocatorManager* manager):
mTempRunsumArray(manager, PxsHeapStats::eNARROWPHASE), mBlockAccumulationArray(manager, PxsHeapStats::eNARROWPHASE), mRemovedIndicesArray(manager, PxsHeapStats::eNARROWPHASE)
{
}
private:
PX_NOCOPY(PxgGpuPairManagementBuffers)
};
class PxgMirroredContactManagerPair : public PxUserAllocated
{
public:
PxgMirroredContactManagerPair(const PxU32 bucketId, const PxVirtualAllocator& allocator) :
mContactManagers(bucketId, allocator),
mNewContactManagers(bucketId, allocator)
{
}
PX_FORCE_INLINE PxU32 getNbFirstPassTests() { return mContactManagers.mCpuContactManagerMapping.size(); }
PX_FORCE_INLINE PxU32 getNbSecondPassTests() { return mNewContactManagers.mCpuContactManagerMapping.size(); }
PX_FORCE_INLINE PxU32 getNbPassTests() { return mContactManagers.mCpuContactManagerMapping.size() + mNewContactManagers.mCpuContactManagerMapping.size(); }
PX_FORCE_INLINE void allocateContactManagers()
{
const PxU32 newPairCount = mContactManagers.mGpuInputContactManagers.size() + mNewContactManagers.mGpuInputContactManagers.size();
if (newPairCount > mContactManagers.mGpuInputContactManagers.capacity())
{
PxU32 newSize = PxMax(newPairCount, mContactManagers.mGpuInputContactManagers.capacity() * 2);
mContactManagers.mGpuInputContactManagers.reserve(newSize);
//mContactManagers.mCpuContactManagerMapping.reserve(newSize);
}
}
PxgContactManagers mContactManagers; //existing contact managers
PxgNewContactManagers mNewContactManagers; //new contact managers
};
class PxgGpuContactManagerPair : public PxUserAllocated
{
public:
PxgGpuContactManagerPair(const PxU32 bucketId, PxgHeapMemoryAllocatorManager* manager) :
mContactManagers(bucketId, manager),
mNewContactManagers(bucketId, manager)
{
}
PX_FORCE_INLINE void allocateBlockAccumulationArray(const PxU32 size)
{
mContactManagers.mBlockAccumulationArray.allocate(sizeof(PxU32) * size, PX_FL);
mNewContactManagers.mBlockAccumulationArray.allocate(sizeof(PxU32) * size, PX_FL);
}
PX_FORCE_INLINE void allocateLostAndTotalReportedPairsCount(PxsHeapMemoryAllocator* mappedMemoryAllocators)
{
mContactManagers.mLostAndTotalReportedPairsCountPinned = reinterpret_cast<uint2*>(mappedMemoryAllocators->allocate(sizeof(uint2), PxsHeapStats::eNARROWPHASE, PX_FL));
mNewContactManagers.mLostAndTotalReportedPairsCountPinned = reinterpret_cast<uint2*>(mappedMemoryAllocators->allocate(sizeof(uint2), PxsHeapStats::eNARROWPHASE, PX_FL));
}
PX_FORCE_INLINE PxU32 getTotalLostFoundPairs()
{
return mContactManagers.mLostAndTotalReportedPairsCountPinned->y + mNewContactManagers.mLostAndTotalReportedPairsCountPinned->y;
}
PX_FORCE_INLINE PxU32 getTotalLostFoundPatches()
{
return mContactManagers.mLostAndTotalReportedPairsCountPinned->x + mNewContactManagers.mLostAndTotalReportedPairsCountPinned->x;
}
PxgGpuContactManagers mContactManagers;
PxgGpuContactManagers mNewContactManagers;
};
struct PxsShapeCore;
struct PxgKernelIds;
class PxgGpuNarrowphaseCore : public PxUserAllocated
{
PX_NOCOPY(PxgGpuNarrowphaseCore)
public:
PxgMirroredContactManagerPair* mContactManagers[GPU_BUCKET_ID::eCount]; //some order as BUCKET_ID. 0 is fallback and it will be NULL in this array
PxgGpuContactManagerPair* mGpuContactManagers[GPU_BUCKET_ID::eCount]; //some order as BUCKET_ID. 0 is fallback and it will be NULL in this array
PxgGpuPairManagementBuffers mPairManagementBuffers;
PxgCudaBuffer mGpuTransformCache;
PxgCudaBuffer mGpuContactDistance;
typedef PxInt32ArrayPinned RemovedIndicesArray;
RemovedIndicesArray* mRemovedIndices[GPU_BUCKET_ID::eCount];
PxBitMap mKeepMap;
PxPinnedArray<PxsContactManagerOutputCounts> mLostFoundPairsOutputData;
PxPinnedArray<PxsContactManager*> mLostFoundPairsCms;
PxU32 mTotalLostFoundPairs;
PxU32 mTotalLostFoundPatches;
PxU32 mTotalNumPairs;
PxgPairManagementData* mPairManagementData[GPU_BUCKET_ID::eCount]; //mapped memory
PxgCudaBuffer mGpuPairManagementData;
PxPinnedArray<PxgRadixSortDesc> mRSDesc;
PxgCudaBufferN<2> mRadixSortDescBuf; //radix sort with rank
PxgCudaBuffer mTempGpuRigidIndiceBuf;
PxgCudaBuffer mTempGpuShapeIndiceBuf;
PxgCudaBuffer mRadixCountTotalBuf;
CUdeviceptr mContactStream;
CUdeviceptr mPatchStream;
CUdeviceptr mForceAndIndiceStream;
PxgTypedCudaBuffer<PxgPatchAndContactCounters> mPatchAndContactCountersOnDevice; //device memory
PxgPatchAndContactCounters* mPatchAndContactCountersReadback; //host memory
PxgShapeManager mGpuShapesManager;
PxgMaterialManager mGpuMaterialManager;
PxgFEMSoftBodyMaterialManager mGpuFEMMaterialManager;
PxgFEMClothMaterialManager mGpuFEMClothMaterialManager;
PxgPBDMaterialManager mGpuPBDMaterialManager;
//device memory
PxgCudaPagedLinearAllocator<PxgHeapMemoryAllocator> mIntermStackAlloc;
CUstream mStream;
CUstream mSolverStream; //this is the stream handle belong to the solver, we can't destroy the solver stream
PxgCudaKernelWranglerManager* mGpuKernelWranglerManager;
PxCudaContextManager* mCudaContextManager;
PxCudaContext* mCudaContext;
PxgHeapMemoryAllocatorManager* mHeapMemoryManager;
PxgCopyManager mCopyMan;
PxgCopyManager mCopyManBp;
PxgGeometryManager mGeometryManager;
IG::IslandSim* mIslandSim;
PxgNphaseImplementationContext* mNphaseImplContext;
PxgGpuContext* mGpuContext;
PxU32 mCollisionStackSizeBytes;
PxU32* mMaxConvexMeshTempMemory;
struct RefcountedRecord
{
PxU32 refCnt;
PxU32 idx;
};
typedef PxHashMap<size_t, RefcountedRecord> RefcountedRecordsMap;
RefcountedRecordsMap* mShapesMap;
RefcountedRecordsMap* mGeometriesMap;
RefcountedRecordsMap* mMaterialsMap;
RefcountedRecordsMap* mFEMMaterialsMap;
RefcountedRecordsMap* mFEMClothMaterialsMap;
RefcountedRecordsMap* mPBDMaterialsMap;
PxgCudaBuffer mGpuMultiManifold;
PxgCudaBuffer mGpuManifold;
CUevent mParticleEvent;
CUevent mSoftbodyEvent;
CUevent mFemClothEvent;
CUevent mDirectApiDmaEvent;
#if PX_ENABLE_SIM_STATS
PxU32 mGpuDynamicsRigidContactCountStats;
PxU32 mGpuDynamicsRigidPatchCountStats;
PxU32 mGpuDynamicsCollisionStackSizeStats;
#else
PX_CATCH_UNDEFINED_ENABLE_SIM_STATS
#endif
void createGpuStreamsAndEvents();
void releaseGpuStreamsAndEvents();
void removeLostPairsGpu(const PxU32 bucketID, const PxU16 stage5KernelID, const bool copyManifold);
void appendContactManagersGpu(PxU32 nbExistingManagers, PxU32 nbNewManagers, PxgGpuContactManagers& gpuContactManagers, PxgGpuContactManagers& newGpuContactManagers, PxU32 manifoldSize);
void drawManifold(PxgPersistentContactManifold* manifolds, PxgContactManagerInput* cmInput, PxsCachedTransform* cachedTransform, const PxU32 numTests, PxRenderOutput& renderOutput,
const PxU32 color, const PxF32 size);
void drawPoint(PxRenderOutput& renderOutput, PxVec3 point, const PxU32 color, const PxF32 size);
void drawLine(PxRenderOutput& out, PxVec3 a, const PxVec3 b, const PxU32 color);
template <typename ManagementData, typename Manifold>
void removeLostPairsGpuInternal( ManagementData& cpuBuffer, CUdeviceptr gpuBuffer,
PxgContactManagers& contactManagers, PxgGpuContactManagers& gpuContactManagers, PxInt32ArrayPinned& removedIndices, PxgGpuPairManagementBuffers& pairManagementBuffers,
PxU16 stage5KernelID, const bool copyManifold = true);
public:
PxgGpuNarrowphaseCore(PxgCudaKernelWranglerManager* gpuKernelWrangler, PxCudaContextManager* cudaContextManager, const PxGpuDynamicsMemoryConfig& gpuDynamicsConfig, void* contactStreamBase,
void* patchStreamBase, void* forceAndIndiceStreamBase, IG::IslandSim* islandSim, CUstream solverStream, PxgHeapMemoryAllocatorManager* heapMemoryManager,
PxgNphaseImplementationContext* nphaseImplContext);
virtual ~PxgGpuNarrowphaseCore();
void testSDKSphereGpu(PxgGpuContactManagers& gpuManagers, const PxU32 numTests,
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces, PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit,
PxRenderOutput* renderOutput);
void testSDKBoxBoxGpu(PxgGpuContactManagers& gpuManagers, const PxU32 numTests,
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces, PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit,
PxRenderOutput* renderOutput);
void testSDKConvexConvexGjkEpaGpu(PxgGpuContactManagers& gpuManagers, bool insertAveragePoint, const PxU32 numTests,
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces, PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit,
PxRenderOutput* renderOutput);
void testSDKConvexPlaneGjkEpaGpu(PxgGpuContactManagers& gpuManagers, bool insertAveragePoint, const PxU32 numTests,
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces, PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit,
PxRenderOutput* renderOutput);
void testSDKConvexCorePlaneGjkEpaGpu(PxgGpuContactManagers& gpuManagers, bool insertAveragePoint, const PxU32 numTests,
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces, PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit,
PxRenderOutput* renderOutput);
void testSDKConvexCoreConvexGjkEpaGpu(PxgGpuContactManagers& gpuManagers, bool insertAveragePoint, const PxU32 numTests,
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces, PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit,
PxRenderOutput* renderOutput);
void testSDKConvexCoreTrimeshGjkEpaGpu(PxgGpuContactManagers& gpuManagers, bool insertAveragePoint, const PxU32 numTests,
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces, PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit,
PxRenderOutput* renderOutput);
void testSDKConvexCoreTetmeshGjkEpaGpu(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
void testSDKConvexCoreClothmeshGjkEpaGpu(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
void testSDKSphereTriMeshSATGpu(PxgGpuContactManagers& gpuManagers, bool insertAveragePoint, const PxU32 numTests,
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces,
PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit);
void testSDKSphereHeightfieldGpu(PxgGpuContactManagers& gpuManagers, bool insertAveragePoint, const PxU32 numTests,
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces,
PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit);
void testSDKTriMeshPlaneGpu(PxgGpuContactManagers& gpuManagers, const PxU32 numTests,
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces,
PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit);
void testSDKTriMeshHeightfieldGpu(PxgGpuContactManagers& gpuManagers, const PxU32 numTests,
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces,
PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit);
void testSDKTriMeshTriMeshGpu(PxgGpuContactManagers& gpuManagers,const PxU32 numTests,
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces,
PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit);
void testSDKConvexTriMeshSATGpu(PxgGpuContactManagers& gpuManagers, bool insertAveragePoint, const PxU32 numTests,
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces,
PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit);
void testSDKConvexHeightfieldGpu(PxgGpuContactManagers& gpuManagers, bool insertAveragePoint, const PxU32 numTests,
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces,
PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit);
void testSDKParticleSystemGpu(PxgGpuContactManagers& gpuManagers, const PxU32 numTests);
void testSDKParticleSoftbody(PxgGpuContactManagers& gpuManagers, const PxU32 numTests,
PxRenderOutput* renderOutput);
void testSDKParticleFemCloth(PxgGpuContactManagers& gpuManagers, const PxU32 numTests,
PxRenderOutput* renderOutput);
void testSDKConvexParticle(PxgGpuContactManagers& gpuManagers, const PxU32 numTests);
void testSDKParticleSdfTriMesh(PxgGpuContactManagers& gpuManagers, const PxU32 numTests);
void testSDKParticleTriMesh(PxgGpuContactManagers& gpuManagers,const PxU32 numTests);
void testSDKParticleHeightfield(PxgGpuContactManagers& gpuManagers, const PxU32 numTests);
void testSDKSoftbody(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
void testSDKSoftbodies(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
void testSDKSoftbodyCloth(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
void testSDKSoftbodyTrimesh(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
void testSDKSoftbodySdfTrimesh(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
void testSDKSoftbodyHeightfield(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
void testSDKFemClothSphere(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
void testSDKFemClothPlane(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
void testSDKFemClothBox(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
void testSDKFemClothConvexes(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
void testSDKFemClothCloth(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
void testSDKFemClothSdfTrimesh(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
void testSDKFemClothTrimesh(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
void testSDKFemClothHeightfield(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
void updateFrictionPatches(PxgGpuContactManagers& gpuManagers, PxU32 count, PxU8* baseContactPatches, PxU8* baseFrictionPatches);
void fetchNarrowPhaseResults(PxcDataStreamPool* contactStreamPool, PxcDataStreamPool* patchStreamPool, PxcDataStreamPool* forceStreamPool,
PxsContactManagerOutput* cmOutputs, const Sc::ShapeInteraction*const* shapeInteractions, const PxReal* restDistances, const PxsTorsionalFrictionData* torsionalData, PxU32 nbFallbackPairs,
const PxsContactManagerOutputCounts* foundPatchCounts, const PxsContactManager*const* foundPatchManagers, PxU32 nbFoundPatchManagers);
void syncNotRigidWithNp();
void acquireContext();
void releaseContext();
CUstream getStream();
void registerContactManager(PxsContactManager* cm, const Sc::ShapeInteraction* shapeInteraction, PxsContactManagerOutput& output, const PxU32 bucketId);
void unregisterContactManager(PxsContactManager* manager, const PxU32 bucketId);
void refreshContactManager(PxsContactManager* manager, PxsContactManagerOutput* cmOutputs, PxgContactManagerInput& input, const PxU32 bucketId);
void removeLostPairs();
void appendContactManagers(PxsContactManagerOutput* cmOutputs, PxU32 nbFallbackPairs);
void prepareTempContactManagers();
template <GPU_BUCKET_ID::Enum>
void prepareTempContactManagers();
void prepareTempContactManagersTasks(Cm::FlushPool& flushPool, PxBaseTask* continuation);
bool isMeshGPUCompatible(const PxTriangleMeshGeometryLL& meshData);
bool isClothMeshGPUCompatible(const PxTriangleMeshGeometryLL& meshData); // skipping material counts
bool isTetMeshGPUCompatible(const Gu::BVTetrahedronMesh* meshData);
void computeRigidsToShapes();
void prepareGpuNarrowphase(PxsTransformCache& cache, const PxReal* contactDistances, bool hasContactDistanceChanged);
PxsContactManagerOutput* getGPUContactManagerOutputBase() { return reinterpret_cast<PxsContactManagerOutput*>(mGpuContactManagers[GPU_BUCKET_ID::eConvex]->mContactManagers.mContactManagerOutputData.getDevicePtr()); }
PxReal* getGPURestDistances() { return reinterpret_cast<PxReal*>(mGpuContactManagers[GPU_BUCKET_ID::eConvex]->mContactManagers.mRestDistances.getDevicePtr()); }
PxsTorsionalFrictionData* getGPUTorsionalData() { return reinterpret_cast<PxsTorsionalFrictionData*>(mGpuContactManagers[GPU_BUCKET_ID::eConvex]->mContactManagers.mTorsionalProperties.getDevicePtr()); }
Sc::ShapeInteraction** getGPUShapeInteractions() { return reinterpret_cast<Sc::ShapeInteraction**>(mGpuContactManagers[GPU_BUCKET_ID::eConvex]->mContactManagers.mShapeInteractions.getDevicePtr()); }
PxgContactManagers& getExistingContactManagers(GPU_BUCKET_ID::Enum type) { return mContactManagers[type]->mContactManagers; }
PxgNewContactManagers& getNewContactManagers(GPU_BUCKET_ID::Enum type) { return mContactManagers[type]->mNewContactManagers; }
PxgGpuContactManagers& getExistingGpuContactManagers(GPU_BUCKET_ID::Enum type) { return mGpuContactManagers[type]->mContactManagers; }
PxgGpuContactManagers& getNewGpuContactManagers(GPU_BUCKET_ID::Enum type) { return mGpuContactManagers[type]->mNewContactManagers; }
PxPinnedArray<PxsContactManagerOutputCounts>& getLostFoundPairsOutput() { return mLostFoundPairsOutputData; }
PxPinnedArray<PxsContactManager*>& getLostFoundPairsCms() { return mLostFoundPairsCms; }
PxgCudaBuffer& getTransformCache() { return mGpuTransformCache; }
PxU32 getTotalNbLostFoundPairs() const { return mTotalLostFoundPairs; }
PxU32 getTotalNbLostFoundPatches() const { return mTotalLostFoundPatches; }
void preallocateNewBuffers(PxU32 nbNewPairs);
void pushBuffer();
PxU32 addHull(const Gu::ConvexHullData& hull);
PxU32 getHullIdxByHostPtr(const Gu::ConvexHullData* hull);
void removeGeometry(PxU32 idx);
//schedules data copies on the near phase stream
void uploadDataChunksToGpu();
void waitAndResetCopyQueues();
//schedules particle material copies on the broad phase stream,
//because particle materials are already accessed in pre integreation
//- we migth be able to remove this dependency
void uploadDataChunksToGpuBp();
void waitAndResetCopyQueuesBp();
PxU32 addTriMesh(const Gu::TriangleMesh& mesh);
PxU32 getTriMeshIdxByHostPtr(const Gu::TriangleMesh* mesh);
PxU32 addHeightfield(const Gu::HeightFieldData& hf);
PxU32 getHeightfieldIdxByHostPtr(const Gu::HeightFieldData* hf);
void registerShape(const PxNodeIndex& nodeIndex, const PxsShapeCore& shapeCore, const PxU32 transformCacheID, const bool isFemCloth, PxActor* actor);
void updateShapeMaterial(const PxsShapeCore& shapeCore);
PxU32 getShapeIndex(const PxsShapeCore& shapeCore);
void unregisterShape(const PxsShapeCore& shapeCore, const PxU32 transformCacheID, const bool isFemCloth);
void registerAggregate(const PxU32 transformCacheID);
void registerMaterial(const PxsMaterialCore& materialCore);
void updateMaterial(const PxsMaterialCore& materialCore);
void unregisterMaterial(const PxsMaterialCore& materialCore);
void registerFEMMaterial(const PxsDeformableSurfaceMaterialCore& materialCore);
void updateFEMMaterial(const PxsDeformableSurfaceMaterialCore& materialCore);
void unregisterFEMMaterial(const PxsDeformableSurfaceMaterialCore& materialCore);
void registerFEMMaterial(const PxsDeformableVolumeMaterialCore& materialCore);
void updateFEMMaterial(const PxsDeformableVolumeMaterialCore& materialCore);
void unregisterFEMMaterial(const PxsDeformableVolumeMaterialCore& materialCore);
void registerParticleMaterial(const PxsPBDMaterialCore& materialCore);
void updateParticleMaterial(const PxsPBDMaterialCore& materialCore);
void unregisterParticleMaterial(const PxsPBDMaterialCore& materialCore);
//direct gpu contact access
bool copyContactData(void* data, PxU32* numContactPairs, const PxU32 maxContactPairs, CUevent startEvent,
CUevent finishEvent, PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces);
bool evaluateSDFDistances(PxVec4* localGradientAndSDFConcatenated, const PxShapeGPUIndex* shapeIndices, const PxVec4* localSamplePointsConcatenated, const PxU32* samplePointCountPerShape, PxU32 nbElements, PxU32 maxPointCount, CUevent startEvent, CUevent finishEvent);
void synchronizedStreams(CUstream artiStream);
PxgShapeManager& getGpuShapeManager() { return mGpuShapesManager; }
template <class MaterialData>
void mapMaterialIndices(PxU16* gpuMaterialIndices, const PxU16* materialIndices, PxU32 numMaterialIndices);
private:
void compactLostFoundPairs(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxU32* touchChangeFlags, PxsContactManagerOutput* cmOutputs);
void softbodyOtherContactApplyCollisionToSimMeshMapping(
PxgDevicePointer<float4> contactsd,
PxgDevicePointer<float4> barycentricsd,
PxgDevicePointer<PxgFemOtherContactInfo> contactInfosd,
PxgDevicePointer<PxU32> totalNumCountsd,
PxgDevicePointer<PxU32> prevNumCountsd);
public:
void softbodyFemContactApplyCollisionToSimMeshMapping(
PxgDevicePointer<float4> barycentrics0d,
PxgDevicePointer<float4> barycentrics1d,
PxgDevicePointer<PxgFemFemContactInfo> contactInfosd,
PxgDevicePointer<PxU32> totalNumCountsd,
PxgDevicePointer<PxU32> prevNumCountsd, bool isSelfCollision, bool isCloth);
private:
void adjustNpIndices(PxgNewContactManagers& newContactManagers, PxPinnedArray<PxgContactManagerInput>& itMainInputs,
PxPinnedArray<PxsContactManager*>& itCms, PxPinnedArray<const Sc::ShapeInteraction*>& itSIs,
PxFloatArrayPinned& itR, PxPinnedArray<PxsTorsionalFrictionData>& itTor,
PxPinnedArray<PxgContactManagerInput>& itNewInputs,
PxPinnedArray<PxsContactManager*>& itNewCms,
PxPinnedArray<const Sc::ShapeInteraction*>& itNewSIs, PxFloatArrayPinned& itNewR,
PxPinnedArray<PxsTorsionalFrictionData>& itNewTor);
void registerContactManagerInternal(PxsContactManager* cm, const Sc::ShapeInteraction* shapeInteraction, PxgContactManagerInput* input, PxsContactManagerOutput& output, PxgNewContactManagers& newContactManagers);
void unregisterContactManagerInternal(PxsContactManager* cm, PxInt32ArrayPinned& removedIndices, PxgNewContactManagers& newContactManagers);
void refreshContactManagerInternal(PxsContactManager* cm, PxsContactManagerOutput* cmOutputs, const Sc::ShapeInteraction** shapeInteractions, PxgContactManagerInput& input, PxgNewContactManagers& newContactManagers,
PxInt32ArrayPinned& removedIndices);
template <typename Manifold>
void prepareTempContactManagers(PxgGpuContactManagers& gpuManagers, PxgNewContactManagers& newManagers, Manifold* emptyManifold);
void prepareTempContactManagers(PxgGpuContactManagers& gpuManagers, PxgNewContactManagers& newManagers);
void removeLostPairsInternal(PxInt32ArrayPinned& removedIndices, PxgContactManagers& contactManagers);
void prepareTempContactManagersInternal(PxgNewContactManagers& newManagers, Cm::FlushPool& flushPool, PxBaseTask* continuation);
void drawContacts(PxRenderOutput& out, CUdeviceptr contactsd, CUdeviceptr normalPensd, const PxU32 numContacts);
template <typename MaterialCore, typename MaterialData>
PxU32 registerMaterialInternal(const MaterialCore& materialCore, RefcountedRecordsMap* materialsMap, PxgMaterialManager& materialManager);
template <typename MaterialCore, typename MaterialData >
void updateMaterialInternal(const MaterialCore& materialCore, RefcountedRecordsMap* materialsMap, PxgMaterialManager& materialManager);
template <typename MaterialData>
PxU16 mapMaterialIndex(PxU16 sdkMaterialIndex, RefcountedRecordsMap* materialsMap, PxgMaterialManager& materialManager)
{
return mapMaterialIndexInternal(sdkMaterialIndex, materialsMap, materialManager, sizeof(MaterialData));
}
PxU16 mapMaterialIndexInternal(PxU16 sdkMaterialIndex, RefcountedRecordsMap* materialsMap,
PxgMaterialManager& materialManager, PxU32 materialDataByteSize);
template <typename MaterialCore>
void unregisterMaterialInternal(const MaterialCore& materialCore, RefcountedRecordsMap* materialsMap, PxgMaterialManager& materialManager);
void updateContactDistance(const PxReal* contactDistances, const PxU32 numContactDistance);
};
template <>
PX_FORCE_INLINE void PxgGpuNarrowphaseCore::mapMaterialIndices<PxsPBDMaterialData>(PxU16* gpuMaterialIndices,
const PxU16* materialIndices, PxU32 numMaterialIndices)
{
PxgGpuNarrowphaseCore::RefcountedRecordsMap* map = mPBDMaterialsMap;
PxgPBDMaterialManager& manager = mGpuPBDMaterialManager;
for(PxU32 i = 0; i < numMaterialIndices; ++i)
{
const PxU16 gpuIndex = mapMaterialIndex<PxsPBDMaterialData>(materialIndices[i], map, manager);
gpuMaterialIndices[i] = gpuIndex;
}
}
}
#endif

View File

@@ -0,0 +1,71 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef PXG_NP_KERNEL_INDICES_H
#define PXG_NP_KERNEL_INDICES_H
#define MIDPHASE_WARPS_PER_BLOCK 2
#define NP_TRIMESH_WARPS_PER_BLOCK 2
#define CORRELATE_WARPS_PER_BLOCK 2
#define PS_MIDPHASE_COLLISION_WAPRS_PER_BLOCK 1
#define SB_REFIT_WAPRS_PER_BLOCK 32
namespace physx
{
struct PxgNarrowPhaseBlockDims
{
enum
{
REMOVE_CONTACT_MANAGERS = 512,
COMPACT_LOST_FOUND_PAIRS = 512,
FINISH_CONTACTS = 128,
EARLY_OUT_KERNEL = 128,
COLLIDE_KERNEL = 128,
INITIALIZE_MANIFOLDS = 512,
COMPRESS_CONTACT = 256
};
};
struct PxgNarrowPhaseGridDims
{
enum
{
REMOVE_CONTACT_MANAGERS = 32,
COMPACT_LOST_FOUND_PAIRS = 32,
EARLY_OUT_KERNEL = 32,
COLLIDE_KERNEL = 32,
INITIALIZE_MANIFOLDS = 128,
COMPRESS_CONTACT = 32,
};
};
}
#endif

View File

@@ -0,0 +1,305 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef PXG_NPHASE_IMPLEMENTATION_CONTEXT_H
#define PXG_NPHASE_IMPLEMENTATION_CONTEXT_H
#include "CmTask.h"
#include "foundation/PxMutex.h"
#include "PxvNphaseImplementationContext.h"
#include "PxgNarrowphaseCore.h"
#include "PxgSimulationCore.h"
#include "foundation/PxHashMap.h"
#include "PxsContactManagerState.h"
#include "PxgContactManager.h"
#include "PxgPersistentContactManifold.h"
#include "PxgHeapMemAllocator.h"
namespace physx
{
struct PxcNpWorkUnit;
class PxgNphaseImplementationContext;
class PxgGpuContext;
class PxgParticleSystemCore;
class PxsKernelWranglerManager;
namespace Bp
{
class BpNonVirtualMemAllocator;
}
namespace Dy
{
class Context;
}
class PxgCMGpuDiscreteUpdateBase: public Cm::Task
{
private:
PX_NOCOPY(PxgCMGpuDiscreteUpdateBase)
public:
PxgCMGpuDiscreteUpdateBase(PxgNphaseImplementationContext* context):
Cm::Task(0), mDt(FLT_MAX), mContext(context)
{
}
virtual ~PxgCMGpuDiscreteUpdateBase()
{}
void setDt(PxReal dt)
{
mDt = dt;
}
void processContactManagers(PxgContactManagers& managers, PxgGpuContactManagers& managersGPU, GPU_BUCKET_ID::Enum type);
protected:
PxReal mDt; //we could probably retrieve from context to save space?
PxgNphaseImplementationContext* mContext;
};
class PxgCMGpuDiscreteUpdateFallbackTask : public Cm::Task
{
public:
PxgCMGpuDiscreteUpdateFallbackTask(PxgNphaseImplementationContext* context) :
Cm::Task(0), mDt(FLT_MAX), mContext(context)
{
}
virtual ~PxgCMGpuDiscreteUpdateFallbackTask()
{}
void setDt(PxReal dt)
{
mDt = dt;
}
virtual const char* getName() const
{
return "PxgCMGpuDiscreteUpdateFallbackTask";
}
void runInternal();
protected:
PxReal mDt;
PxgNphaseImplementationContext* mContext;
};
class PxgCMGpuDiscreteUpdateTask : public PxgCMGpuDiscreteUpdateBase
{
PxBaseTask* mFirstPassNpContinuation;
PxBaseTask* mPostBroadPhaseTask;
public:
PxgCMGpuDiscreteUpdateTask(PxgNphaseImplementationContext* context) : PxgCMGpuDiscreteUpdateBase(context), mFirstPassNpContinuation(NULL)
{
mPostBroadPhaseTask = NULL;
}
virtual ~PxgCMGpuDiscreteUpdateTask()
{
}
virtual void setPostBroadPhaseTask(PxBaseTask* postBroadPhaseTask)
{
mPostBroadPhaseTask = postBroadPhaseTask;
}
virtual void release()
{
if(mFirstPassNpContinuation)
mFirstPassNpContinuation->removeReference();
PxgCMGpuDiscreteUpdateBase::release();
}
void setFirstPassContinuation(PxBaseTask* firstPassContinuation) { mFirstPassNpContinuation = firstPassContinuation; }
void runInternal();
virtual const char* getName() const
{
return "PxgNphaseImplementationContext.firstContactManagerDiscreteUpdate";
}
};
class PxgCMGpuDiscreteSecondPassUpdateTask : public PxgCMGpuDiscreteUpdateBase
{
public:
PxgCMGpuDiscreteSecondPassUpdateTask(PxgNphaseImplementationContext* context): PxgCMGpuDiscreteUpdateBase(context)
{
}
virtual ~PxgCMGpuDiscreteSecondPassUpdateTask()
{}
void runInternal();
virtual const char* getName() const
{
return "PxgNphaseImplementationContext.secondContactManagerDiscreteUpdate";
}
protected:
};
class PxgNphaseImplementationContext : public PxvNphaseImplementationContext
{
private:
PX_NOCOPY(PxgNphaseImplementationContext)
public:
PxgNphaseImplementationContext(PxsContext& context, PxsKernelWranglerManager* gpuKernelWrangler, PxvNphaseImplementationFallback* fallbackForUnsupportedCMs,
const PxGpuDynamicsMemoryConfig& gpuDynamicsConfig, void* contactStreamBase, void* patchStreamBase, void* forceAndIndiceStreamBase,
PxBoundsArrayPinned& bounds, IG::IslandSim* islandSim,
physx::Dy::Context* dynamicsContext, PxgHeapMemoryAllocatorManager* heapMemoryManager,
bool useGPUNP);
virtual ~PxgNphaseImplementationContext();
virtual void destroy() PX_OVERRIDE PX_FINAL;
virtual void updateContactManager(PxReal dt, bool hasContactDistanceChanged, PxBaseTask* continuation, PxBaseTask* firstPassNpContinuation, Cm::FanoutTask* updateBoundAndShapeTask) PX_OVERRIDE PX_FINAL;
void updateContactManagersFallback(PxReal dt, PxBaseTask* continuation);
virtual void postBroadPhaseUpdateContactManager(PxBaseTask* continuation) PX_OVERRIDE PX_FINAL;
virtual void secondPassUpdateContactManager(PxReal dt, PxBaseTask* continuation) PX_OVERRIDE PX_FINAL;
virtual void fetchUpdateContactManager() PX_OVERRIDE PX_FINAL;
virtual void preallocateNewBuffers(PxU32 nbNewPairs, PxU32 maxIndex) PX_OVERRIDE PX_FINAL;
virtual void registerContactManager(PxsContactManager* cm, const Sc::ShapeInteraction* shapeInteraction, PxI32 touching, PxU32 numPatches) PX_OVERRIDE PX_FINAL;
virtual void unregisterContactManager(PxsContactManager* cm) PX_OVERRIDE PX_FINAL;
virtual void refreshContactManager(PxsContactManager* cm) PX_OVERRIDE PX_FINAL;
virtual void registerShape(const PxNodeIndex& nodeIndex, const PxsShapeCore& shapeCore, const PxU32 transformCacheID, PxActor* actor, const bool isFemCloth) PX_OVERRIDE PX_FINAL;
virtual void updateShapeMaterial(const PxsShapeCore& shapeCore) PX_OVERRIDE PX_FINAL;
virtual void unregisterShape(const PxsShapeCore& shapeCore, const PxU32 transformCacheID, const bool isFemCloth) PX_OVERRIDE PX_FINAL;
virtual void registerAggregate(const PxU32 transformCacheID) PX_OVERRIDE PX_FINAL;
virtual void registerMaterial(const PxsMaterialCore& materialCore) PX_OVERRIDE PX_FINAL;
virtual void updateMaterial(const PxsMaterialCore& materialCore) PX_OVERRIDE PX_FINAL;
virtual void unregisterMaterial(const PxsMaterialCore& materialCore) PX_OVERRIDE PX_FINAL;
virtual void registerMaterial(const PxsDeformableSurfaceMaterialCore& materialCore) PX_OVERRIDE PX_FINAL;
virtual void updateMaterial(const PxsDeformableSurfaceMaterialCore& materialCore) PX_OVERRIDE PX_FINAL;
virtual void unregisterMaterial(const PxsDeformableSurfaceMaterialCore& materialCore) PX_OVERRIDE PX_FINAL;
virtual void registerMaterial(const PxsDeformableVolumeMaterialCore& materialCore) PX_OVERRIDE PX_FINAL;
virtual void updateMaterial(const PxsDeformableVolumeMaterialCore& materialCore) PX_OVERRIDE PX_FINAL;
virtual void unregisterMaterial(const PxsDeformableVolumeMaterialCore& materialCore) PX_OVERRIDE PX_FINAL;
virtual void registerMaterial(const PxsPBDMaterialCore& materialCore) PX_OVERRIDE PX_FINAL;
virtual void updateMaterial(const PxsPBDMaterialCore& materialCore) PX_OVERRIDE PX_FINAL;
virtual void unregisterMaterial(const PxsPBDMaterialCore& materialCore) PX_OVERRIDE PX_FINAL;
virtual PxsContactManagerOutput& getNewContactManagerOutput(PxU32 npId) PX_OVERRIDE PX_FINAL;
virtual void appendContactManagers() PX_OVERRIDE PX_FINAL;
virtual PxsContactManagerOutputIterator getContactManagerOutputs() PX_OVERRIDE PX_FINAL;
virtual void setContactModifyCallback(PxContactModifyCallback* callback) PX_OVERRIDE PX_FINAL { mFallbackForUnsupportedCMs->setContactModifyCallback(callback); }
void updateNarrowPhaseShape();
void mergeContactManagers(PxBaseTask* continuation);
void processResults();
virtual void startNarrowPhaseTasks() PX_OVERRIDE PX_FINAL;
PxReal getToleranceLength();
PxgGpuNarrowphaseCore* getGpuNarrowphaseCore() { return mGpuNarrowphaseCore; }
PxgSimulationCore* getSimulationCore() { return mGpuSimulationCore; }
void setSimulationCore(PxgSimulationCore* sc) { mGpuSimulationCore = sc; }
virtual void acquireContext() PX_OVERRIDE PX_FINAL;
virtual void releaseContext() PX_OVERRIDE PX_FINAL;
virtual void lock() PX_OVERRIDE PX_FINAL { mFallbackForUnsupportedCMs->lock();}
virtual void unlock() PX_OVERRIDE PX_FINAL { mFallbackForUnsupportedCMs->unlock(); }
PxsContext& getContext() { return mContext; }
virtual PxsContactManagerOutputCounts* getLostFoundPatchOutputCounts() PX_OVERRIDE PX_FINAL;
virtual PxsContactManager** getLostFoundPatchManagers() PX_OVERRIDE PX_FINAL;
virtual PxU32 getNbLostFoundPatchManagers() PX_OVERRIDE PX_FINAL;
virtual PxsContactManagerOutput* getGPUContactManagerOutputBase() PX_OVERRIDE PX_FINAL;
virtual PxReal* getGPURestDistances() PX_OVERRIDE PX_FINAL;
virtual Sc::ShapeInteraction** getGPUShapeInteractions() PX_OVERRIDE PX_FINAL;
virtual PxsTorsionalFrictionData* getGPUTorsionalData() PX_OVERRIDE PX_FINAL;
protected:
void prepareTempContactManagers(PxBaseTask* continuation);
void removeLostPairs();
void registerContactManagerInternal(PxsContactManager* cm, const PxcNpWorkUnit& workUnit, PxU32 patchCount, PxI32 touching,
const Sc::ShapeInteraction* shapeInteraction, GPU_BUCKET_ID::Enum bucketId);
PxvNphaseImplementationFallback* mFallbackForUnsupportedCMs;
//those arrays logically belongs to the task, but we keep it here to avoid reallocations each frame
friend class PxgCMGpuDiscreteUpdateBase;
friend class PxgCMGpuDiscreteUpdateTask;
friend class PxgCMGpuDiscreteSecondPassUpdateTask;
friend class PxgCMGpuDiscreteUpdateConvexTriMeshTask;
friend class PxgCMGpuDiscreteSecondPassUpdateConvexTriMeshTask;
PxgCMGpuDiscreteUpdateTask mUpdateCMsFirstPassTask;
PxgCMGpuDiscreteSecondPassUpdateTask mUpdateCMsSecondPassTask;
PxgCMGpuDiscreteUpdateFallbackTask mUpdateFallbackPairs;
PxPinnedArray<PxsContactManagerOutput> mContactManagerOutputs;
PxBitMap mGpuContactManagerBitMap[GPU_BUCKET_ID::eCount];
PxU32 mRecordedGpuPairCount[GPU_BUCKET_ID::eCount];
PxU32 mNbPairCount[GPU_BUCKET_ID::eCount];
PxgGpuNarrowphaseCore* mGpuNarrowphaseCore;
PxgSimulationCore* mGpuSimulationCore;
PxgGpuContext* mGpuDynamicContext;
PxU32 mTotalNbPairs;
PxBoundsArrayPinned& mBounds;
bool mHasContactDistanceChanged;
bool mUseGPUBP;
PxU32 mMaxPatches;
};
}
#endif

View File

@@ -0,0 +1,98 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef PXG_PERSISTENT_CONTACT_MANIFOLD_H
#define PXG_PERSISTENT_CONTACT_MANIFOLD_H
#include "cutil_math.h"
#include "AlignedTransform.h"
#define PXG_MAX_PCM_CONTACTS 4
#define PXG_MULTIMANIFOLD_MAX_SUBMANIFOLDS 4
#define PXG_SUBMANIFOLD_MAX_CONTACTS 6
namespace physx
{
struct PX_ALIGN_PREFIX(16) PxgPersistentContactManifold
{
float4 mLocalNormal_PenW[PXG_MAX_PCM_CONTACTS]; //local normal + penetration
float4 mLocalContactA[PXG_MAX_PCM_CONTACTS]; //local contact in body A's space
float4 mLocalContactB[PXG_MAX_PCM_CONTACTS]; //local contact in body B's space
float4 mRelativePos; //Cached relative transforms
PxAlignedQuat mQuatA; //Cached body A's quatenion
PxAlignedQuat mQuatB; //Cached body B's quatenion
PxU32 mNbContacts; //NbContacts
PxU32 mNbWarmStartPoints; //Nb warm start points
PxU32 mWarmStartA; //GJK warm-start mask for shape A
PxU32 mWarmStartB; //GJK warm-start mask for shape B
PX_FORCE_INLINE void clear()
{
mNbContacts = 0;
mNbWarmStartPoints = 0;
mRelativePos = make_float4(PX_MAX_REAL, PX_MAX_REAL, PX_MAX_REAL, PX_MAX_REAL);
mQuatA = PxAlignedQuat(0.f, 0.f, 0.f, 1.f);
mQuatB = PxAlignedQuat(0.f, 0.f, 0.f, 1.f);
}
//KS - TODO - extend and add functionality as is required...
}
PX_ALIGN_SUFFIX(16);
struct PX_ALIGN_PREFIX(16) PxgContact
{
PxVec3 pointA;
PxVec3 pointB;
PxVec3 normal;
PxReal penetration;
PxU32 triIndex;
PxU32 pad;
} // 48 due to padding
PX_ALIGN_SUFFIX(16);
struct PX_ALIGN_PREFIX(16) PxgPersistentContactMultiManifold
{
PxgContact mContacts[PXG_MULTIMANIFOLD_MAX_SUBMANIFOLDS][PXG_SUBMANIFOLD_MAX_CONTACTS]; // 1152
PxU32 mNbContacts[PXG_MULTIMANIFOLD_MAX_SUBMANIFOLDS]; // 1168
PxAlignedTransform mRelativeTransform; // 1200
PxU32 mNbManifolds; // 1216 due to padding
PX_FORCE_INLINE void clear()
{
mNbManifolds = 0;
mRelativeTransform.p = make_float4(PX_MAX_REAL, PX_MAX_REAL, PX_MAX_REAL, PX_MAX_REAL);
mRelativeTransform.q = PxAlignedQuat(0.f, 0.f, 0.f, 1.f);
}
}
PX_ALIGN_SUFFIX(16);
}
#endif

View File

@@ -0,0 +1,163 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef PXG_SHAPE_MANAGER_H
#define PXG_SHAPE_MANAGER_H
#include "foundation/PxPreprocessor.h"
#include "PxSceneDesc.h"
#include "foundation/PxPinnedArray.h"
#include "CmIDPool.h"
#include "PxgCudaBuffer.h"
#include "foundation/PxBitMap.h"
#include "PxsMaterialCore.h"
#include "PxsDeformableSurfaceMaterialCore.h"
#include "PxsDeformableVolumeMaterialCore.h"
#include "PxsPBDMaterialCore.h"
#include "PxgConvexConvexShape.h"
namespace physx
{
class PxCudaContext;
class PxgHeapMemoryAllocatorManager;
class PxgCopyManager;
class PxNodeIndex;
class PxgShapeManager
{
public:
PxgShapeManager(PxgHeapMemoryAllocatorManager* heapManager);
~PxgShapeManager(){}
//this method push CopyDesc to PxgCopyManager
void initialize(PxCudaContext* cudaContext, CUstream stream);
void scheduleCopyHtoD(PxgCopyManager& copyManager, PxCudaContext* cudaContext, CUstream stream);
PxU32 registerShape(PxgShape& shape);
void registerShapeInstance(const PxNodeIndex& nodeIndex, const PxU32 transformCacheID, PxActor* actor, bool aggregate = false);
void unregisterShape(const PxU32 shapeId);
void unregisterShapeInstance(const PxU32 transformCacheID);
void updateShapeMaterial(const PxU32 materalIndex, const PxU32 id);
void releaseIDs() { mIdPool.processDeferredIds(); }
Cm::DeferredIDPool mIdPool;
PxgHeapMemoryAllocatorManager* mHeapManager;
PxPinnedArray<PxgShape> mHostShapes;
PxPinnedArray<PxNodeIndex> mHostShapesRemapTable;//remap table from shape to node index
PxInt32ArrayPinned mHostShapeIdTable;
PxPinnedArray<PxActor*> mHostTransformCacheIdToActorTable; // remap table from transformCacheId to PxActor
PxgCudaBuffer mGpuShapesBuffer;
PxgCudaBuffer mGpuShapesRemapTableBuffer; // shape to rigid
PxgCudaBuffer mGpuTransformCacheIdToActorTableBuffer; // transformCacheId to PxActor
PxgCudaBuffer mGpuRigidIndiceBuffer; // 00011 nodeIndex 0 has shape(0, 2, 3), nodeIndex 1 has shape(1, 4)(it can be either articulation link or rigid body)
PxgCudaBuffer mGpuShapeIndiceBuffer; // 02314
PxgCudaBuffer mGpuUnsortedShapeIndicesBuffer; // needed when recomputing the shape to rigids mapping
PxgCudaBuffer mGpuTempRigidBitIndiceBuffer; //either the lower 32 bits or higher 32 bits of mGpuRigidIndiceBuffer
PxgCudaBuffer mGpuTempRigidIndiceBuffer;
PxBitMap mDirtyShapeMap;
PxBitMap mDirtyTransformCacheMap;
bool mResizeRequired;
bool mTransformCacheResizeRequired;
PxI32 mMaxShapeId;
PxI32 mMaxTransformCacheID;
bool mHasShapeChanged;
bool mHasShapeInstanceChanged;
private:
PX_NOCOPY(PxgShapeManager)
};
class PxgMaterialManager
{
PX_NOCOPY(PxgMaterialManager)
public:
PxgMaterialManager(PxgHeapMemoryAllocatorManager* heapManager, const PxU32 elemSize = sizeof(PxsMaterialData));
virtual ~PxgMaterialManager(){}
//this method push CopyDesc to PxgCopyManager
virtual void scheduleCopyHtoD(PxgCopyManager& copyManager, PxCudaContext* cudaContext, CUstream stream, const PxU32 elemSize);
PxU32 registerMaterial(const PxU8* materialData, const PxU32 elemSize);
void updateMaterial(const PxU8* materialData, const PxU32 elemSize, const PxU32 id);
void unregisterMaterial(const PxU32 materialId);
void releaseIDs() { mIdPool.processDeferredIds(); }
PxgCudaBuffer mGpuMaterialBuffer;
protected:
Cm::DeferredIDPool mIdPool;
PxgHeapMemoryAllocatorManager* mHeapManager;
PxInt8ArrayPinned mHostMaterial;
PxBitMap mDirtyMaterialMap;
bool mResizeRequired;
};
class PxgFEMMaterialManager : public PxgMaterialManager
{
public:
PxgFEMMaterialManager(PxgHeapMemoryAllocatorManager* heapManager, const PxU32 elemSize);
virtual void scheduleCopyHtoD(PxgCopyManager& copyManager, PxCudaContext* cudaContext, CUstream stream, const PxU32 elemSize);
};
class PxgFEMSoftBodyMaterialManager : public PxgFEMMaterialManager
{
public:
PxgFEMSoftBodyMaterialManager(PxgHeapMemoryAllocatorManager* heapManager);
};
class PxgFEMClothMaterialManager : public PxgMaterialManager
{
public:
PxgFEMClothMaterialManager(PxgHeapMemoryAllocatorManager* heapManager) :
PxgMaterialManager(heapManager, sizeof(PxsDeformableSurfaceMaterialData))
{
}
PxsDeformableSurfaceMaterialData* getCPUMaterialData()
{
return reinterpret_cast<PxsDeformableSurfaceMaterialData*>(mHostMaterial.begin());
}
};
class PxgPBDMaterialManager : public PxgMaterialManager
{
public:
PxgPBDMaterialManager(PxgHeapMemoryAllocatorManager* heapManager) :
PxgMaterialManager(heapManager, sizeof(PxsPBDMaterialData))
{
}
};
}
#endif

View File

@@ -0,0 +1,137 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef CONVEX_FORMAT_H
#define CONVEX_FORMAT_H
#include "foundation/PxVec3.h"
#include "foundation/PxVec4.h"
#include "foundation/PxBounds3.h"
#include "foundation/PxPlane.h"
#include "foundation/PxMat44.h"
#include "foundation/PxMemory.h"
#include "foundation/PxUtilities.h"
namespace physx
{
struct ConvexHullCooked
{
struct Valency
{
PxU16 mCount;
PxU16 mOffset;
};
struct BigConvexRawData
{
// Support vertex map
PxU16 mSubdiv; // "Gaussmap" subdivision
PxU16 mNbSamples; // Total #samples in gaussmap PT: this is not even needed at runtime!
PxU8* mSamples; //mNbSamples * 2 elements
PX_FORCE_INLINE const PxU8* getSamples2() const
{
return mSamples + mNbSamples;
}
//~Support vertex map
// Valencies data
PxU32 mNbVerts; //!< Number of vertices
PxU32 mNbAdjVerts; //!< Total number of adjacent vertices ### PT: this is useless at runtime and should not be stored here
Valency* mValencies; //!< A list of mNbVerts valencies (= number of neighbors) mNbVerts elements
PxU8* mAdjacentVerts; //!< List of adjacent vertices
//~Valencies data
};
struct InternalObjectsData
{
PxReal mRadius;
PxReal mExtents[3];
PX_FORCE_INLINE void reset()
{
mRadius = 0.0f;
mExtents[0] = 0.0f;
mExtents[1] = 0.0f;
mExtents[2] = 0.0f;
}
};
struct HullPolygon
{
PxPlane mPlane; //!< Plane equation for this polygon //Could drop 4th elem as it can be computed from any vertex as: d = - p.dot(n);
PxU16 mVRef8; //!< Offset of vertex references in hull vertex data (CS: can we assume indices are tightly packed and offsets are ascending?? DrawObjects makes and uses this assumption)
PxU8 mNbVerts; //!< Number of vertices/edges in the polygon
PxU8 mMinIndex; //!< Index of the polygon vertex that has minimal projection along this plane's normal.
PX_FORCE_INLINE PxReal getMin(const PxVec3* PX_RESTRICT hullVertices) const //minimum of projection of the hull along this plane normal
{
return mPlane.n.dot(hullVertices[mMinIndex]);
}
PX_FORCE_INLINE PxReal getMax() const { return -mPlane.d; } //maximum of projection of the hull along this plane normal
};
PxBounds3 mAABB;
PxVec3 mCenterOfMass;
PxU16 mNbEdges;
PxU8 mNbHullVertices;
PxU8 mNbPolygons;
HullPolygon* mPolygons; //!< Array of mNbPolygons structures
PxVec3* mVertices; //mNbHullVertices elements
PxU8* mFacesByEdges8; //mNbEdges * 2 elements
PxU8* mFacesByVertices8; //mNbHullVertices * 3 elements
PxU16* mVerticesByEdges16; //mNbEdges * 2 elements
PxU8* mVertexData8; //G-d knows how many elements
BigConvexRawData* mBigConvexRawData; //!< Hill climbing data, only for large convexes! else NULL.
InternalObjectsData mInternal;
bool isGpuFriendly()
{
bool ret = mNbHullVertices <= 64;
if(!ret)
return false;
for(PxU32 i = 0; i < mNbPolygons; ++i)
ret &= mPolygons[i].mNbVerts <= 31;
return ret;
}
};
}
#endif

View File

@@ -0,0 +1,306 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __CONVEXNPCOMMON_H__
#define __CONVEXNPCOMMON_H__
#define NUM_TMP_CONTACTS_PER_PAIR 32
#define CVX_TRI_MAX_CONTACTS 5
#define MAX_MESH_MESH_PATCHES 128
// Patches correlation - same normal threshold
// 8 degrees
//#define PATCH_ACCEPTANCE_EPS 0.990268f
// 5 degrees
#define PATCH_ACCEPTANCE_EPS 0.996194698f
// 3 degrees
//#define PATCH_ACCEPTANCE_EPS 0.99862953f
#define CONVEX_TRIMESH_CACHED 0xFFffFFff
// AD: used to specify sizes of shared memory structs
// for collisions involving convexes. Make sure you
// know what you are doing if you change this.
#define CONVEX_MAX_VERTICES_POLYGONS 64
#include "foundation/PxPreprocessor.h"
#include "foundation/PxSimpleTypes.h"
#include "foundation/PxVec3.h"
#include "foundation/PxQuat.h"
#include "foundation/PxTransform.h"
#include "geometry/PxMeshScale.h"
#include "GuBV32.h"
#include "PxgCommonDefines.h"
#include <vector_types.h>
namespace physx
{
struct ConvexMeshPair
{
physx::PxTransform aToB; // pB = aToB.transform(pA + normal * separation) 28
int startIndex; // start index in the ConvexTriContacts and ConvexTriPairs arrays 32
int count; // the number of triangles in the ConvexTriContacts and ConvexTriPairs 36
int cmIndex; // index of the original CM, to index into the output buffer and pass on to contact reporting 40
int roundedStartIndex; // start index in the sorted triangle index array. For each pair, the number of indices 44
// assigned to is the number of triangles padded to a multiple of 4 and duplicated to have
// space for the temp buffer for the radix sort. The start index is aligned to a mutliple of 4.
int pad; // pad 48
uint2 materialIndices; // material index for shape0 and shape1 56
};
struct ConvexTriNormalAndIndex
{
static const PxU32 DeferredContactMask = 0x80000000;
static const PxU32 NbContactsShift = 27;
physx::PxVec3 normal; // normal
int index; // isDeferredContact is in bit 31, nbContacts is in bits 27-30, rest is cpu mesh index for the triangle
static PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 getTriangleIndex(PxU32 index)
{
return (index & ((1<<NbContactsShift)-1));
}
static PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 getNbContacts(PxU32 index)
{
return (index & (~DeferredContactMask)) >> NbContactsShift;
}
static PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 isDeferred(const PxU32 index)
{
return index & DeferredContactMask;
}
};
struct ConvexTriContacts
{
PxU32 index;
};
struct ConvexTriContact
{
float4 contact_sepW;
};
//each convex triangle pairs will generate the intermediate data in case we need to
//do post process of this pair
struct ConvexTriIntermediateData
{
//Enums to indicate the barycentric weighting of the contacts
enum VoronoiState: PxU32
{
eV0 = 1u << 29,
eV1 = 2u << 29,
eV2 = 3u << 29,
eE01 = 4u << 29,
eE02 = 5u << 29,
eE12 = 6u << 29,
eMASK = 0xe0000000
};
physx::PxU32 gpuTriIndex; //gpu triangle index
static PX_CUDA_CALLABLE PX_FORCE_INLINE bool isEdgeContact(const PxU32 mask)
{
return mask & 0x80000000;
}
};
typedef ConvexMeshPair SphereMeshPair;
typedef ConvexTriContacts SphereTriContacts;
typedef ConvexTriNormalAndIndex SphereTriNormalAndIndex;
typedef ConvexTriIntermediateData SphereTriIntermediateData;
}
#if PX_CUDA_COMPILER
#include "PxgCommonDefines.h"
namespace physx
{
PX_ALIGN_PREFIX(16)
struct MidphaseScratch
{
const float4 * PX_RESTRICT trimeshVerts;
const uint4 * PX_RESTRICT trimeshTriIndices;
PxTransform shapeToMeshNoScale;
PxTransform meshToWorld;
PxMeshScale trimeshScale;
PxVec3 inflatedExtents;
PxVec3 center;
PxU32 shape_materialIndex;
PxU32 trimeshShape_materialIndex;
//bv32 tree
const Gu::BV32DataPacked* bv32PackedNodes;
//stack for traversal
int sBv32Nodes[320]; //10 depth of the bv32 tree
}PX_ALIGN_SUFFIX(16);
PX_COMPILE_TIME_ASSERT(sizeof(MidphaseScratch) <= WARP_SIZE * 16 * sizeof(PxU32));
}
#include "schlockShared.h"
//0 is primitive, 1 is tetrahedron world space
struct TetCollideScratch
{
physx::PxVec3 vA[CONVEX_MAX_VERTICES_POLYGONS];
physx::PxVec3 vB[4];
schlock::GjkCachedData cachedData;
schlock::GjkOutput gjkOutput;
physx::PxQuat rot0;
physx::PxVec3 scale0;
physx::PxReal contactDistance;
physx::PxReal inSphereRadius0;
physx::PxReal inSphereRadius1;
physx::PxVec3 searchDir;
physx::PxU8 nbVertices0;
physx::PxU8 nbVertices1;
physx::PxU32 tetIndex0;
physx::PxU32 tetIndex1;
__device__ void Clear()
{
if (threadIdx.x == 0)
cachedData.size = 0;
__syncwarp();
}
};
#include "epa.cuh"
#include "gjk.cuh"
__device__ static schlock::GjkResult::Enum tetPrimitivesCollide2(
squawk::EpaScratch& ss_epa_scratch,
TetCollideScratch& ss_scratch,
const physx::PxU32 globalWarpIndex)
{
typedef schlock::GjkResult GjkResult;
const physx::PxReal convergenceRatio = 1 - 0.000225f;
assert(ss_scratch.nbVertices0 <= 64);
assert(ss_scratch.nbVertices1 <= 64);
physx::PxVec3 initialDir(1, 0, 0);
physx::PxVec3 aToB_p = ss_scratch.searchDir; // ss_scratch.aToB.p;
if (aToB_p.magnitudeSquared() > 0)
initialDir = aToB_p; // GJK's initial start dir is usually from the warm cache => lazy normalize inside GJK
GjkResult::Enum gjkResult = squawk::gjk(
ss_scratch.vA, ss_scratch.nbVertices0,
ss_scratch.vB, ss_scratch.nbVertices1,
initialDir,
ss_scratch.contactDistance,
convergenceRatio,
ss_scratch.gjkOutput, ss_scratch.cachedData);
__syncwarp();
if (gjkResult == GjkResult::eDISTANT)
{
return GjkResult::eDISTANT;
}
bool separated = (gjkResult == GjkResult::eCLOSE);
bool anomaly = separated && ss_scratch.gjkOutput.closestPointDir.dot(ss_scratch.gjkOutput.direction) < 0.999f;
GjkResult::Enum result = GjkResult::eCLOSE;
if (!separated || anomaly)
{
GjkResult::Enum epaResult = squawk::epa(ss_epa_scratch,
ss_scratch.vA, ss_scratch.nbVertices0,
ss_scratch.vB, ss_scratch.nbVertices1,
&(ss_scratch.cachedData),
convergenceRatio,
0.5f * (ss_scratch.inSphereRadius0 + ss_scratch.inSphereRadius1),
ss_scratch.gjkOutput);
//separated = epaResult == GjkResult::eCLOSE;
result = epaResult;
__syncwarp();
if (ss_scratch.gjkOutput.degenerate)
{
//we need to re-run gjk epa with other configurations
ss_scratch.cachedData.size = 0;
//we need to re-run gjk epa with other configurations
initialDir = physx::PxVec3(0.f, 1.f, 0.f);
GjkResult::Enum gjkResult = squawk::gjk(
ss_scratch.vA, ss_scratch.nbVertices0,
ss_scratch.vB, ss_scratch.nbVertices1,
initialDir,
ss_scratch.contactDistance,
convergenceRatio,
ss_scratch.gjkOutput, ss_scratch.cachedData);
__syncwarp();
GjkResult::Enum epaResult = squawk::epa(ss_epa_scratch,
ss_scratch.vA, ss_scratch.nbVertices0,
ss_scratch.vB, ss_scratch.nbVertices1,
&(ss_scratch.cachedData),
convergenceRatio,
0.5f * (ss_scratch.inSphereRadius0 + ss_scratch.inSphereRadius1),
ss_scratch.gjkOutput);
//separated = epaResult == GjkResult::eCLOSE;
result = epaResult;
}
__syncwarp();
}
return result;
}
#endif
#endif

View File

@@ -0,0 +1,424 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef CUDA_NP_COMMON_H
#define CUDA_NP_COMMON_H
#include "foundation/PxVec3.h"
#include <stdio.h>
#include "convexFormat.h"
#include "cutil_math.h"
#include "PxgCommonDefines.h"
#include "PxgPersistentContactManifold.h"
#include "assert.h"
#define CONVEX_PLANE_WARPS_PER_BLOCK 4
#define GJK_EPA_WARPS_PER_BLOCK 1
#define NUM_TMP_CONTACTS_PER_PAIR 32
//The smallest epsilon we will permit (scaled by PxTolerancesScale.length)
#define PCM_WITNESS_POINT_LOWER_EPS 1e-2f
//The largest epsilon we will permit (scaled by PxTolerancesScale.length)
#define PCM_WITNESS_POINT_UPPER_EPS 5e-2f
namespace physx
{
struct PxgPersistentContactManifold;
struct PxgPersistentContactMultiManifold;
struct PxgContactManagerInput;
struct PxsContactManagerOutput;
class PxsContactManager;
struct PxsTorsionalFrictionData;
namespace Sc
{
class ShapeInteraction;
}
struct PxgPairManagementData
{
PxgContactManagerInput* mContactManagerInputData;
PxsContactManagerOutput* mContactManagerOutputData;
PxsContactManager** mCpuContactManagerMapping;
Sc::ShapeInteraction** mShapeInteractions;
PxReal* mRestDistances;
PxsTorsionalFrictionData* mTorsionalData;
PxU32* mTempAccumulator;
PxU32* mBlockSharedAccumulator;
PxU32* mRemoveIndices;
PxU32 mNbPairs;
PxU32 mNbToRemove;
void* mPersistentContactManagers;// either PxgPersistentContactManifold or PxgPersistentMultiManagementData
PxgPairManagementData() : mContactManagerInputData(NULL), mContactManagerOutputData(NULL), mCpuContactManagerMapping(NULL), mShapeInteractions(NULL),
mRestDistances(NULL), mTorsionalData(NULL), mTempAccumulator(NULL), mBlockSharedAccumulator(NULL), mNbPairs(0)
{
}
};
struct PX_ALIGN_PREFIX(16) PxgPatchAndContactCounters
{
enum OverflowError
{
NO_OVERFLOW = 0,
CONTACT_BUFFER_OVERFLOW = 1 << 0,
FORCE_BUFFER_OVERFLOW = 1 << 1,
PATCH_BUFFER_OVERFLOW = 1 << 2
};
PxU32 patchesBytes;
PxU32 contactsBytes;
PxU32 forceAndIndiceBytes;
PxU32 overflowError;
__device__
void setOverflowError(const OverflowError& err)
{
PX_UNUSED(err);
#if __CUDA_ARCH__
atomicOr(&overflowError, PxU32(err));
#endif
}
__host__ __device__
PxU32 getOverflowError() const
{
return overflowError;
}
} PX_ALIGN_SUFFIX(16);
}
__host__ __device__ inline
physx::ConvexHullCooked::Valency u32ToValency(physx::PxU32 countOffset)
{
physx::ConvexHullCooked::Valency v;
v.mCount = u32High(countOffset);
v.mOffset = u32Low(countOffset);
return v;
}
__host__ __device__ inline
physx::PxU32 valencyToPxU32(const physx::ConvexHullCooked::Valency& v)
{
return merge(v.mCount, v.mOffset);
}
__host__ __device__ inline
physx::PxU8 getNbAdjVerts(physx::PxU32 val)
{
return u16Low(u32Low(val));
}
__host__ __device__ inline
physx::PxU16 getNbEdges(physx::PxU32 val)
{
return u32High(val);
}
__host__ __device__ inline
physx::PxU8 getNbPolygons(physx::PxU32 val)
{
return u16Low(u32Low(val));
}
__host__ __device__ inline
physx::PxU16 getVRef8(physx::PxU32 val)
{
return u32High(val);
}
__host__ __device__ inline
physx::PxU8 getNbVerts(physx::PxU32 val)
{
return u16High(u32Low(val));
}
__host__ __device__ inline
physx::PxU8 getMinIndex(physx::PxU32 val)
{
return u16Low(u32Low(val));
}
__host__ __device__ inline
physx::PxU32 merge(physx::PxU16 hi, physx::PxU8 lohi, physx::PxU8 lolo)
{
return merge(hi, merge(lohi, lolo));
}
__host__ __device__ inline bool isValidTet(const physx::PxVec3& worldV0, const physx::PxVec3& worldV1, const physx::PxVec3& worldV2, const physx::PxVec3& worldV3)
{
return (worldV1 - worldV0).dot((worldV2 - worldV0).cross(worldV3 - worldV0)) > 1e-9f;
}
//device only functions
#if PX_CUDA_COMPILER
#include "reduction.cuh"
class MeshScaling
{
public:
__device__ MeshScaling(const physx::PxVec3& scale_, const physx::PxQuat& rotation_)
{
scale = scale_;
rotation = rotation_;
idtMeshScale = (scale.x == 1.0f && scale.y == 1.0f && scale.z == 1.0f);
flipNormal = ((scale.x * scale.y * scale.z) < 0.0f);
}
__device__ inline physx::PxVec3 vertex2Shape(const physx::PxVec3& v) const
{
using namespace physx;
PxVec3 temp = rotation.rotate(v);
temp.x *= scale.x;
temp.y *= scale.y;
temp.z *= scale.z;
return rotation.rotateInv(temp);
}
__device__ inline physx::PxVec3 shape2Vertex(const physx::PxVec3& v) const
{
using namespace physx;
PxVec3 temp = rotation.rotate(v);
temp.x /= scale.x;
temp.y /= scale.y;
temp.z /= scale.z;
return rotation.rotateInv(temp);
}
//Transforms a normal vector from vertex to shape space. This keeps the normal vector perpendicular to surfaces that get scaled with the same transform.
//Does not preserve length. Is applicable to other covariant vectors too.
__device__ inline physx::PxVec3 vertex2ShapeNormalVector(const physx::PxVec3& normal) const
{
return shape2Vertex(normal);
}
//Transforms a normal vector from shape to vertex space. This keeps the normal vector perpendicular to surfaces that get scaled with the same transform.
//Does not preserve length. Is applicable to other covariant vectors too.
__device__ inline physx::PxVec3 shape2VertexNormalVector(const physx::PxVec3& normal) const
{
return vertex2Shape(normal);
}
__device__ inline void getShapeSpaceVert(physx::PxVec3& triV0, physx::PxVec3& triV1, physx::PxVec3& triV2,
physx::PxVec3& v0, physx::PxVec3& v1, physx::PxVec3& v2) const
{
if (idtMeshScale)
{
v0 = triV0;
v1 = triV1;
v2 = triV2;
}
else
{
// Triangle scaling, triangle verts in shape space
triV0 = vertex2Shape(triV0);
triV1 = vertex2Shape(triV1);
triV2 = vertex2Shape(triV2);
v0 = triV0;
v1 = flipNormal ? triV2 : triV1;
v2 = flipNormal ? triV1 : triV2;
}
}
physx::PxVec3 scale;
physx::PxQuat rotation;
bool idtMeshScale;
bool flipNormal;
};
//Applies a potentially non-uniform scaling to the point v. The scaling can be expressed in a rotated coordinate frame defined by the quaternion called rotation.
__device__ inline
physx::PxVec3 vertex2Shape(const physx::PxVec3& v, const physx::PxVec3& scale, const physx::PxQuat& rotation)
{
using namespace physx;
PxVec3 temp = rotation.rotate(v);
temp.x *= scale.x;
temp.y *= scale.y;
temp.z *= scale.z;
return rotation.rotateInv(temp);
}
//Removes a potentially non-uniform scaling from the point v. The scaling can be expressed in a rotated coordinate frame defined by the quaternion called rotation.
__device__ inline
physx::PxVec3 shape2Vertex(const physx::PxVec3& v, const physx::PxVec3& scale, const physx::PxQuat& rotation)
{
using namespace physx;
PxVec3 temp = rotation.rotate(v);
temp.x /= scale.x;
temp.y /= scale.y;
temp.z /= scale.z;
return rotation.rotateInv(temp);
}
//Transforms a normal vector from vertex to shape space. This keeps the normal vector perpendicular to surfaces that get scaled with the same transform.
//Does not preserve length. Is applicable to other covariant vectors too.
__device__ inline
physx::PxVec3 vertex2ShapeNormalVector(const physx::PxVec3& normal, const physx::PxVec3& scale, const physx::PxQuat& rotation)
{
return shape2Vertex(normal, scale, rotation);
}
//Transforms a normal vector from shape to vertex space. This keeps the normal vector perpendicular to surfaces that get scaled with the same transform.
//Does not preserve length. Is applicable to other covariant vectors too.
__device__ inline
physx::PxVec3 shape2VertexNormalVector(const physx::PxVec3& normal, const physx::PxVec3& scale, const physx::PxQuat& rotation)
{
return vertex2Shape(normal, scale, rotation);
}
__device__ inline static
void prepareVertices(const PxTransform& transf,
PxVec3 scale, PxQuat rot,
PxU32 nbHullVertices,
const float4* pVertices,
PxVec3* s_vertices
)
{
assert(transf.isFinite());
for (PxU32 i = threadIdx.x; i < nbHullVertices; i += WARP_SIZE)
{
float4 f = pVertices[i];
s_vertices[i] = transf.transform(vertex2Shape(PxVec3(f.x, f.y, f.z), scale, rot));
assert(s_vertices[i].isFinite());
}
}
__device__ inline static
void prepareVertices(PxVec3 scale, PxQuat rot,
PxU32 nbHullVertices,
const float4* pVertices,
PxVec3* s_vertices
)
{
for (PxU32 i = threadIdx.x; i < nbHullVertices; i += WARP_SIZE)
{
float4 f = pVertices[i];
s_vertices[i] = vertex2Shape(PxVec3(f.x, f.y, f.z), scale, rot);
}
}
#define MESH_MANIFOLD_EPSILON 0.05f
#define BOX_MARGIN_RATIO 0.25f
__device__ inline static
PxReal calculatePCMConvexMargin(const float4& extents_, const PxVec3& scale, const PxReal toleranceLength)
{
using namespace physx;
const PxVec3 extents = PxVec3(extents_.x * scale.x,
extents_.y * scale.y,
extents_.z * scale.z);
const PxReal min = fminf(fminf(extents.x, extents.y), extents.z);
const PxReal toleranceMargin = toleranceLength * MESH_MANIFOLD_EPSILON;
//ML: 25% of the minimum extents of the internal AABB as this convex hull's margin
return fminf((min * BOX_MARGIN_RATIO), toleranceMargin);
}
__device__ inline static
PxReal calculatePCMConvexMargin(const PxVec3& extents, const PxReal toleranceLength)
{
using namespace physx;
const PxReal min = fminf(fminf(extents.x, extents.y), extents.z);
const PxReal toleranceMargin = toleranceLength * MESH_MANIFOLD_EPSILON;
//ML: 25% of the minimum extents of the internal AABB as this convex hull's margin
return fminf((min * BOX_MARGIN_RATIO), toleranceMargin);
}
__device__ inline static physx::PxReal maxTransformPositionDelta(const physx::PxVec3& curP, const physx::PxVec3& preP)
{
using namespace physx;
const PxVec3 deltaP = curP - preP;
return PxMax(PxAbs(deltaP.x), PxMax(PxAbs(deltaP.y), PxAbs(deltaP.z)));
}
__constant__ __device__ PxF32 local_invalidateThresholdsConvex[5] = { 0.5f, 0.125f, 0.25f, 0.375f, 0.375f };
__constant__ __device__ PxF32 local_invalidateQuatThresholdsConvex[5] = { 0.9998f, 0.9999f, 0.9999f, 0.9999f, 0.9999f };
__constant__ __device__ PxF32 local_invalidateThresholdsSphere[3] = { 0.5f, 0.1f, 0.75f };
__constant__ __device__ PxF32 local_invalidateQuatThresholdsSphere[3] = { 0.9995f, 0.9999f, 0.9997f };
__constant__ __device__ PxF32 local_invalidateQuatThresholdsConvexPlane[5] = { 0.99996f, 0.99996f, 0.99996f, 0.99996f, 0.99996f };
__device__ inline static
PxU32 invalidate_BoxConvex(const PxVec3& curRelPos, const PxQuat& curQuatA, const PxQuat& curQuatB,
const PxVec3& preRelPos, const PxQuat& preRelQuatA, const PxQuat& preRelQuatB,
const physx::PxReal minMargin, const physx::PxReal radiusA, const physx::PxReal radiusB,
const physx::PxU8 manifold_NumContacts, PxF32* local_invalidateThresholds, PxF32* local_invalidateQuatThresholds)
{
using namespace physx;
//This is the translational threshold used in invalidate_BoxConvexHull. 0.5 is 50% of the object margin. we use different threshold between
//0 and 4 points. This threashold is a scale that is multiplied by the objects' margins.
const PxReal ratio = local_invalidateThresholds[manifold_NumContacts];
const PxReal thresholdP = minMargin * ratio;
const PxReal deltaP = maxTransformPositionDelta(curRelPos, preRelPos);
//This is the rotational threshold used in invalidate_BoxConvexHull. 0.9998 is a threshold for quat difference
//between previous and current frame
const PxReal thresholdQ = local_invalidateQuatThresholds[manifold_NumContacts];
const PxReal deltaQA = curQuatA.dot(preRelQuatA);
const PxReal deltaQB = curQuatB.dot(preRelQuatB);
bool generateContacts = (deltaP > thresholdP) || (thresholdQ > deltaQA) || (thresholdQ > deltaQB);
if (!generateContacts)
{
const PxReal aRadian = deltaQA < 1.0f ? acosf(deltaQA) : 0.f;
const PxReal bRadian = deltaQB < 1.0f ? acosf(deltaQB) : 0.f;
const PxReal travelDistA = aRadian * radiusA;
const PxReal travelDistB = bRadian * radiusB;
generateContacts = (travelDistA > thresholdP) || (travelDistB > thresholdP);
}
return generateContacts;
}
#endif
#endif

View File

@@ -0,0 +1,230 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef GU_SCHLOCKSHARED_H
#define GU_SCHLOCKSHARED_H
#include "foundation/PxVec3.h"
#include "foundation/PxMat33.h"
#include "foundation/PxTransform.h"
#include "geometry/PxMeshScale.h"
#include <assert.h>
#include <stdio.h>
#define GJK_LOGGING 0
#define EPA_REPLACE_TRIANGLE_LOGGING 0
#define EPA_LOGGING 0
#define EPA_LOG_DEGENERATE_CASES 0
#if GJK_LOGGING
#define GJK_LOG(...) if(gjkDebug) printf(__VA_ARGS__)
#else
#define GJK_LOG(...)
#endif
namespace schlock
{
extern bool epaDebug;
using namespace physx;
template<typename T>
PX_CUDA_CALLABLE PX_FORCE_INLINE void swap(T& a, T& b)
{
T tmp = a;
a = b;
b = tmp;
}
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU32 encodeIndices(PxU8 a, PxU8 b) { return 0x3f800000u | b<<8 | a;}
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU8 aIndex(PxU32 indices) { return indices&0xff; }
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU8 bIndex(PxU32 indices) { return (indices>>8)&0xff; }
struct CachedVertex
{
PxVec3 v;
PxU32 indices;
PX_FORCE_INLINE PX_CUDA_CALLABLE CachedVertex() {}
PX_FORCE_INLINE PX_CUDA_CALLABLE CachedVertex(PxU8 iA, PxU8 iB, const PxVec3& iV): v(iV), indices(encodeIndices(iA,iB)) {}
PX_FORCE_INLINE PX_CUDA_CALLABLE CachedVertex(const CachedVertex& other): v(other.v), indices(other.indices) {}
PX_FORCE_INLINE PX_CUDA_CALLABLE void operator=(const CachedVertex& other) { v = other.v, indices = other.indices; }
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU8 a() const { return aIndex(indices); }
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU8 b() const { return bIndex(indices); }
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU8 a() const volatile { return aIndex(indices); }
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU8 b() const volatile { return bIndex(indices); }
PX_FORCE_INLINE PX_CUDA_CALLABLE PxVec3 getV() const volatile { return PxVec3(v.x, v.y, v.z); }
};
struct GjkCachedData
{
CachedVertex vertices[4];
struct
{
PxVec3 n;
PxReal d;
} normals[4];
PxU32 size;
PX_FORCE_INLINE PX_CUDA_CALLABLE GjkCachedData(){}
__device__ void Reset() { size = 0; __syncwarp(); }
};
struct GjkResult
{
enum Enum
{
eDISTANT,
eCLOSE,
eOVERLAP
};
};
struct GjkOutput
{
PxVec3 direction; // unit witness for lower bound
PxReal lowerBound; // provable lower bound
PxReal upperBound; // provable upper bound
PxVec3 closestPointA; // witnesses for upper bound
PxVec3 closestPointB;
bool degenerate;
PxVec3 closestPointDir;
};
PX_CUDA_CALLABLE PX_FORCE_INLINE void barycentrics(const PxVec3& a, const PxVec3& b, const PxVec3& c, float &u, float &v, float &w, const PxVec3& p)
{
PxVec3 m = (b - a).cross(c - a), ma = m.abs(), a1 = (b-p).cross(c-p), a2 = (c-p).cross(a-p);
PxReal d;
// area ratios in plane with largest normal component
if (ma.x >= ma.y && ma.x >= ma.z)
d = 1.0f/m.x, u = a1.x * d, v = a2.x * d;
else if (ma.y >= ma.z)
d = 1.0f/m.y, u = a1.y * d, v = a2.y * d;
else
d = 1.0f/m.z, u = a1.z * d, v = a2.z * d;
w = 1.0f - u - v;
}
// top-level functions
// unit-level functions for testing
static const PxU32 EPA_MAX_VERTS = 18, EPA_MAX_FACES = 2*EPA_MAX_VERTS-4; // for N verts, we'll have 2(N-2) faces.
typedef int EPABitMap;
#define MAKE_Vec3I8(A, B, C) (physx::PxU32(A) | (physx::PxU32(B) << 8) | (physx::PxU32(C) << 16))
struct Vec3I8
{
PxU32 a;
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU8 get(PxU32 i) const { return (a >> (i << 3)) & 0xFFul; }
PX_FORCE_INLINE PX_CUDA_CALLABLE void set(PxU32 i, PxU8 v) { a = (a & ~(0xFFul << (i << 3))) | (PxU32(v) << (i << 3)); }
PX_FORCE_INLINE PX_CUDA_CALLABLE void setInt(int i) { a = i; }
PX_FORCE_INLINE PX_CUDA_CALLABLE int getInt() { return a; }
PX_FORCE_INLINE PX_CUDA_CALLABLE const Vec3I8& operator=(const Vec3I8& other) {a = other.a; return *this;}
bool operator==(const Vec3I8& other) { return (a & 0xFFffFFul) == (other.a & 0xFFffFFul); }
};
struct EPATriangle
{
PxVec3 normal; // unit normal
PxReal distance; // *plane* distance from origin
Vec3I8 v; // vertices
};
struct EPAHullData
{
PxVec3 v[EPA_MAX_VERTS];
PxU32 i[EPA_MAX_VERTS];
EPATriangle t[EPA_MAX_FACES];
Vec3I8 a[EPA_MAX_FACES];
PxU8 nbVertices;
EPABitMap liveMap;
};
// GJK routine.
//
// Input:
// convexA, convexB: two convexes in the same space
// initialDir: an unnormalized start direction for GJK iteration
// minSep: the minimum distance at which the two objects are judged to be separated. Must be positive.
// convergenceRatio: minimum ratio of lowerBound/upperBound for convergence (a number close to, but less than, 1)
//
// returns DISTANT if a lower bound on distance is found that is > minSep
// OVERLAP if the objects are provably overlapping
// else CLOSE.
//
// for OVERLAP, the cachedData (vertex indices & search directions) can be passed to EPA
// for DISTANT, a direction is generated which provides a witness for the lower bound
// for CLOSE, a pair of closest points is generated, which provide (in MS space) a witness for the upper bound, and
// if nondegenerate, also a direction which witnesses the lower bound
//
// the degenerate flag is raised in the output if the algorithm encountered geometric instability while the minSep value is in [lower, upper].
// In particular, we are not overlapping (lower>minSep) nor overlapping (upper == 0), so this can only accompany a result of CLOSE.
// geometric instability can only happen very close to convergence, so unless very high tolerances are necessary, convergence can be assumed.
// in this case if the lower bound is zero, then the the lower bound witness direction is not generated.
// So when CLOSE is returned, the upper bound should be used as the authoritative distance, and the direction should be taken from the closest points
// EPA routine.
//
// Input:
// convexA, convexB: two convexes in the same space
// cache: a cache of initial vertices produced by GJK. Must have size at least 2
// convergenceRatio: minimum ratio of lowerBound/upperBound for convergence (a number close to, but less than, 1)
//
// on output, the following are emitted:
// * a pair of closest points which are the best depenetration found in the allowed space
// * a lower bound and upper bound on penetration depth.
// * a direction corresponding to the lower bound (this direction *opposes* closestB - closestA), so if we translate object B by lowerBound * direction, we
// should approximately separate the shapes
//
// the degenerate flag is raised in the output if the algorithm encountered geometric instability before convergence, or ran out of space.
// in this case, the bounds, direction and closest points are still generated.
//
// The algorithm returns GjkResult::eOVERLAP in almost all cases. However, it's possible for EPA to find separation even if GJK found penetration,
// and in this anomalous case it returns GjkResult::eCLOSE
//
// // There is currently no direction for the upper bound, which would allow for reliable (if not minimal) separation in degenerate cases
}
#endif

View File

@@ -0,0 +1,87 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __TRI32DATA_H__
#define __TRI32DATA_H__
struct Triangle32Data
{
PxU32 v[3];
Triangle32Data() {}
Triangle32Data(PxU32 v0,PxU32 v1,PxU32 v2)
{
v[0]=v0;
v[1]=v1;
v[2]=v2;
}
};
struct Triangle32DataPad
{
PxU32 v[3];
PxU32 pad;
PX_CUDA_CALLABLE Triangle32DataPad() {}
PX_CUDA_CALLABLE Triangle32DataPad(PxU32 v0, PxU32 v1, PxU32 v2, PxU32 v3)
{
v[0] = v0;
v[1] = v1;
v[2] = v2;
pad = v3;
}
PX_CUDA_CALLABLE Triangle32DataPad(PxU32 v0, PxU32 v1, PxU32 v2)
{
v[0] = v0;
v[1] = v1;
v[2] = v2;
pad = 0;
}
PX_CUDA_CALLABLE Triangle32DataPad(Triangle32Data t)
{
v[0] = t.v[0];
v[1] = t.v[1];
v[2] = t.v[2];
pad = 0;
}
PX_CUDA_CALLABLE Triangle32DataPad(Triangle32Data t, PxU32 ipad = 0)
{
v[0] = t.v[0];
v[1] = t.v[1];
v[2] = t.v[2];
pad = ipad;
}
};
#endif

View File

@@ -0,0 +1,62 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __TYPEHELPERS_H__
#define __TYPEHELPERS_H__
PX_CUDA_CALLABLE static inline PxVec3 float4ToVec3(const float4 & data)
{
return PxVec3(data.x, data.y, data.z);
}
PX_CUDA_CALLABLE static inline float4 vec3ToFloat4(const PxVec3 & data)
{
return make_float4(data.x, data.y, data.z, 0.0f);
}
PX_CUDA_CALLABLE static inline PxQuat float4ToQuat(const float4 & data)
{
return PxQuat(data.x, data.y, data.z, data.w);
}
PX_CUDA_CALLABLE static inline float4 quatToFloat4(const PxQuat & data)
{
return make_float4(data.x, data.y, data.z, data.w);
}
PX_CUDA_CALLABLE static inline void transformToFloat4s(float4 & pos, float4 & rot, const PxTransform & transform)
{
pos = make_float4(transform.p.x, transform.p.y, transform.p.z, 0.0f);
rot = make_float4(transform.q.x, transform.q.y, transform.q.z, transform.q.w);
}
PX_CUDA_CALLABLE static inline void float4sToTransform(PxTransform & transform, const float4 & pos, const float4 & rot)
{
transform.p = PxVec3(pos.x, pos.y, pos.z);
transform.q = PxQuat(rot.x, rot.y, rot.z, rot.w);
}
#endif

View File

@@ -0,0 +1,208 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __BV32_TRAVERSAL_CUH__
#define __BV32_TRAVERSAL_CUH__
#include "copy.cuh"
#include "reduction.cuh"
//Not used but handy when creating a new type of tree traversal
struct BV32TraversalTemplate
{
PX_FORCE_INLINE __device__ BV32TraversalTemplate()
{
}
//Called on leaf nodes to process them
//A full warp will call the function. If primitiveIndex is 0xFFFFFFFF, then the thread has no leaf node to check.
PX_FORCE_INLINE __device__ void intersectPrimitiveFullWarp(PxU32 primitiveIndex, PxU32 idxInWarp) const
{
}
//Called to determine if the traversal should continue or stop on a branch node
//A full warp will call the function. If hasBox is false, then the thread has no branch node to check.
PX_FORCE_INLINE __device__ bool intersectBoxFullWarp(bool hasBox, const PxVec3& min, const PxVec3& max) const
{
if (hasBox)
return true; //Actually test against the axis aligned box defined by min and max
return false;
}
};
// TW: On my laptop RTX 3080 the FPS of some FEM cloth VTs actually drops by about 10% when enabling optimized traversal...
#define OPTIMIZE_TRAVERSAL 0
template<typename T, unsigned int WarpsPerBlock>
__device__ static void bv32TreeTraversal(
const physx::Gu::BV32DataPacked* bv32PackedNodes,
int* sBv32Nodes,
T& callback
)
{
#if OPTIMIZE_TRAVERSAL
__shared__ Gu::BV32DataPacked shNode[WarpsPerBlock];
const unsigned int warpId = threadIdx.y;
#endif
//thread index in warp
const unsigned int idxInWarp = threadIdx.x;
PxU32 currentNodeIndex = 0; // start at the root
const Gu::BV32DataPacked* PX_RESTRICT currentNode = &bv32PackedNodes[0];
PxU32 nbStack = 0; // number of nodes on the stack of to-be-expanded nodes
while (currentNode)
{
#if OPTIMIZE_TRAVERSAL
//This optimization was found in cudaParticleSystem.cu, line 2373
// VR: This variant works ~6 times *faster* on my GTX 1080
Gu::BV32DataPacked& node = shNode[warpId];
warpCopy((uint*)&node, (uint*)currentNode, sizeof(Gu::BV32DataPacked));
__syncwarp();
#else
// VR: This variant works ~6 times *slower* on my GTX 1080
const Gu::BV32DataPacked& node = *currentNode;
#endif
__syncwarp(); //sBv32Nodes (shared memory) is read and written in the same loop - read and write must be separated with a sync
//check to see whether this is a leaf or not
if (currentNodeIndex & 1)
{
const PxU32 leafOffset = (currentNodeIndex >> 1) & 31;
//at max 32 primitives at a leaf node
const PxU32 nbPrimitives = node.getNbReferencedPrimitives(leafOffset);
const PxU32 primStartIndex = node.getPrimitiveStartIndex(leafOffset);
// now every node checks a primitive (triangle, tetrahedron etc) against the query geometry
// result variables:
PxU32 primitiveIdx = 0xffffffff;
if (idxInWarp < nbPrimitives)
{
primitiveIdx = idxInWarp + primStartIndex;
}
callback.intersectPrimitiveFullWarp(primitiveIdx, idxInWarp);
}
else // not a leave node
{
const PxU32 nbChildren = node.mNbNodes;
// there are max 32 children nodes. Every thread checks whether its child's bounding box
// would intersect with the query object (which can be a box, a ray or anything)
PxVec3 min, max;
bool hasBox = idxInWarp < nbChildren;
if (hasBox)
{
min = PxLoad3(reinterpret_cast<const float4&>(node.mMin[idxInWarp]));
max = PxLoad3(reinterpret_cast<const float4&>(node.mMax[idxInWarp]));
}
bool intersect = callback.intersectBoxFullWarp(hasBox, min, max);
//if (!hasBox)
// intersect = false;
/*if (workIndex == 0 )
{
printf("newActiveParticles %x idx %i Finished\n", newActiveParticles, idxInWarp);
}*/
// ML: the first one bit is to decide whether the node is a leaf or not.The subsequence 5 bits is used to
//store the row index of mData so that if the node is leaf we can get the the reference triangle. The subsequence 26 bits
//is used to store either the next node index if the node isn't a leaf or store the parent node index if the node is leaf node.
//Masking currentNodeIndex with 0xFFFFFFC0 clears the 6 lowest bits, so we can mask in our idxInWarp<<1 | 1 for leaf masks
PxU32 currentIndex = ((currentNodeIndex) & 0xFFFFFFC0) | (idxInWarp << 1) | 1;
//create node bitmap
bool isValidNode = false;
if (idxInWarp < nbChildren)
{
const PxU32 childOffset = node.getChildOffset(idxInWarp);
isValidNode = intersect;
if (isValidNode && !node.isLeaf(idxInWarp))
{
currentIndex = (childOffset) << 6;
}
}
PxU32 resultWarp = __ballot_sync(FULL_MASK, isValidNode);
/*if (idxInWarp == 0)
{
printf("ResultWarp = %x\n", resultWarp);
}*/
//PxU32 resultWarp = __ballot_sync(FULL_MASK, intersect);
PxU32 offset = warpScanExclusive(resultWarp, idxInWarp);
PxU32 validCount = __popc(resultWarp);
if (isValidNode)
{
//printf("Push Bv32Nodes[%i] = %i mask = %x\n", nbStack + offset, currentIndex, newActiveParticles);
sBv32Nodes[nbStack + offset] = currentIndex;
}
nbStack += validCount;
}
__syncwarp();
// select the next node from the stack, same for all warps
if (nbStack > 0)
{
currentNodeIndex = sBv32Nodes[--nbStack];
currentNode = &bv32PackedNodes[currentNodeIndex >> 6];
/*if (idxInWarp == 0)
{
printf("Pop CurrentIndex = %i, currentNode = %i, currentActiveParticles = %x\n", currentNodeIndex, currentNodeIndex >> 6, currentNodeActiveParticles);
}*/
}
else
currentNode = NULL;
}
}
PX_FORCE_INLINE __device__ void pushOntoStackFullWarp(bool threadPushesElement, PxU32* atomicCounter, uint4* PX_RESTRICT stackPtr, PxU32 stackSize, const uint4& stackItemToPush)
{
PxU32 stackIndex = globalScanExclusiveSingleWarp(threadPushesElement, atomicCounter);
// Each thread stores its contact. If we run out of stacksize, drop contacts
if (threadPushesElement && (stackIndex < stackSize))
{
stackPtr[stackIndex] = stackItemToPush;
}
}
#endif

View File

@@ -0,0 +1,476 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __CAPSULE_COLLISION_CUH__
#define __CAPSULE_COLLISION_CUH__
#include "distanceSegmentSegment.cuh"
#define ETD_CONVEX_EDGE_01 (1 << 3) // PT: important value, don't change
#define ETD_CONVEX_EDGE_12 (1 << 4) // PT: important value, don't change
#define ETD_CONVEX_EDGE_20 (1 << 5) // PT: important value, don't change
__device__ __forceinline__ bool selectNormal(const PxReal u,
const PxReal v, PxU8 triFlags)
{
const PxReal zero = 1e-6f;
const PxReal one = 0.999999f;
// Analysis
if (zero > u)
{
if (zero > v)
{
// Vertex 0
if (!(triFlags & (ETD_CONVEX_EDGE_01 | ETD_CONVEX_EDGE_20)))
return true;
}
else if (v > one)
{
// Vertex 2
if (!(triFlags & (ETD_CONVEX_EDGE_12 | ETD_CONVEX_EDGE_20)))
return true;
}
else
{
// Edge 0-2
if (!(triFlags & ETD_CONVEX_EDGE_20))
return true;
}
}
else if (u > one)
{
if (zero > v)
{
// Vertex 1
if (!(triFlags & (ETD_CONVEX_EDGE_01 | ETD_CONVEX_EDGE_12)))
return true;
}
}
else
{
if (zero > v)
{
// Edge 0-1
if (!(triFlags & ETD_CONVEX_EDGE_01))
return true;
}
else
{
const PxReal threshold = 0.9999f;
const PxReal temp = u + v;
if (temp >= threshold)
{
// Edge 1-2
if (!(triFlags & ETD_CONVEX_EDGE_12))
return true;
}
else
{
// Face
return true;
}
}
}
return false;
}
__device__ __forceinline__ bool isValidTriangleBarycentricCoord(
const PxReal v, const PxReal w)
{
const PxReal eps = 1e-6f;
const PxReal zero(-eps);
const PxReal one(1.f + eps);
return (v >= zero && v <= one) && (w >= zero && w <= one) && ((v + w) <= one);
}
/*
t is the barycenteric coordinate of a segment
u is the barycenteric coordinate of a triangle
v is the barycenteric coordinate of a triangle
*/
__device__ __forceinline__ PxReal distanceSegmentTriangleSquared(
const PxVec3& p, const PxVec3& q,
const PxVec3& a, const PxVec3& b,
const PxVec3& c, PxReal& t, PxReal& u,
PxReal& v)
{
const PxVec3 pq = q - p;
const PxVec3 ab = b - a;
const PxVec3 ac = c - a;
const PxVec3 bc = c - b;
const PxVec3 ap = p - a;
const PxVec3 aq = q - a;
//This is used to calculate the barycentric coordinate
const PxReal d00 = ab.dot(ab);
const PxReal d01 = ab.dot(ac);
const PxReal d11 = ac.dot(ac);
const PxReal tDenom = d00 * d11 - d01 * d01;
const PxReal bdenom = tDenom > 0.f ? 1.f / tDenom : PX_MAX_F32;// FSel(FIsGrtr(tDenom, zero), FRecip(tDenom), zero);
const PxVec3 n = (ab.cross(ac)).getNormalized();// V3Normalize(V3Cross(ab, ac)); // normalize vector
//compute the closest point of p and triangle plane abc
const PxReal dist3 = ap.dot(n);// V3Dot(ap, n);
const PxReal sqDist3 = dist3 * dist3;
//compute the closest point of q and triangle plane abc
const PxReal dist4 = aq.dot(n);// V3Dot(aq, n);
const PxReal sqDist4 = dist4 * dist4;// FMul(dist4, dist4);
const PxReal dMul = dist3 * dist4;//FMul(dist3, dist4);
//const BoolV con = FIsGrtr(zero, dMul);
// intersect with the plane
if (dMul <= 0.f)
{
//compute the intersect point
const PxReal nom = -n.dot(ap); // FNeg(V3Dot(n, ap));
const PxReal denom = 1.f / n.dot(pq);// FRecip(V3Dot(n, pq));
const PxReal t0 = nom * denom; // FMul(nom, denom);
const PxVec3 ip = p + pq * t0;// V3ScaleAdd(pq, t, p);//V3Add(p, V3Scale(pq, t));
const PxVec3 v2 = ip - a;// V3Sub(ip, a);
const PxReal d20 = v2.dot(ab);// V3Dot(v2, ab);
const PxReal d21 = v2.dot(ac);// V3Dot(v2, ac);
const PxReal v00 = (d11 * d20 - d01 * d21) * bdenom; //FMul(FSub(FMul(d11, d20), FMul(d01, d21)), bdenom);
const PxReal w00 = (d00 * d21 - d01 * d20) * bdenom; //FMul(FSub(FMul(d00, d21), FMul(d01, d20)), bdenom);
if (isValidTriangleBarycentricCoord(v00, w00))
{
t = t0;
u = v00;
v = w00;
return 0.f;
}
}
const PxVec3 closestP31 = p - n * dist3;// V3NegScaleSub(n, dist3, p);//V3Sub(p, V3Scale(n, dist3));
//const PxVec3 closestP30 = p;
//Compute the barycentric coordinate for project point of q
const PxVec3 pV20 = closestP31 - a;
const PxReal pD20 = pV20.dot(ab);
const PxReal pD21 = pV20.dot(ac);
const PxReal v0 = (d11 * pD20 - d01 * pD21) * bdenom;// FMul(FSub(FMul(d11, pD20), FMul(d01, pD21)), bdenom);
const PxReal w0 = (d00 * pD21 - d01 * pD20) * bdenom;// FMul(FSub(FMul(d00, pD21), FMul(d01, pD20)), bdenom);
//check closestP3 is inside the triangle
const bool con0 = isValidTriangleBarycentricCoord(v0, w0);
const PxVec3 closestP41 = q - n * dist4;// V3NegScaleSub(n, dist4, q);// V3Sub(q, V3Scale(n, dist4));
//const PxVec3 closestP40 = q;
//Compute the barycentric coordinate for project point of q
const PxVec3 qV20 = closestP41 - a;
const PxReal qD20 = qV20.dot(ab);
const PxReal qD21 = qV20.dot(ac);
const PxReal v1 = (d11 * qD20 - d01 * qD21) * bdenom;// FMul(FSub(FMul(d11, qD20), FMul(d01, qD21)), bdenom);
const PxReal w1 = (d00 * qD21 - d01 * qD20) * bdenom;// FMul(FSub(FMul(d00, qD21), FMul(d01, qD20)), bdenom);
const bool con1 = isValidTriangleBarycentricCoord(v1, w1);
if (con0 && con1)
{
/*
both p and q project points are interior point
*/
if (sqDist4 > sqDist3)
{
t = 0.f;
u = v0;
v = w0;
return sqDist3;
}
else
{
t = 1.f;
u = v1;
v = w1;
return sqDist4;
}
}
else
{
PxReal t00, t01, t02;
PxReal t10, t11, t12;
distanceSegmentSegmentSquared(p, pq, a, ab, t00, t10);
distanceSegmentSegmentSquared(p, pq, b, bc, t01, t11);
distanceSegmentSegmentSquared(p, pq, a, ac, t02, t12);
const PxVec3 closestP00 = p + pq * t00;// V3ScaleAdd(pq, t00, p);
const PxVec3 closestP01 = a + ab * t10;// V3ScaleAdd(ab, t01, a);
PxVec3 tempV = closestP00 - closestP01;
const PxReal sqDist0 = tempV.dot(tempV);
const PxVec3 closestP10 = p + pq * t01;// V3ScaleAdd(pq, t10, p);
const PxVec3 closestP11 = b + bc * t11;// V3ScaleAdd(bc, t11, b);
tempV = closestP10 - closestP11;
const PxReal sqDist1 = tempV.dot(tempV);
const PxVec3 closestP20 = p + pq * t02;// V3ScaleAdd(pq, t20, p);
const PxVec3 closestP21 = a + ac * t12;// V3ScaleAdd(ac, t21, a);
tempV = closestP20 - closestP21;
const PxReal sqDist2 = tempV.dot(tempV);
//edge ab
const PxReal u10 = t10;
const PxReal v10 = 0.f;
//edge bc
const PxReal u11 = 1.f - t11;
const PxReal v11 = t11;
//edge ac
const PxReal u12 = 0.f;
const PxReal v12 = t12;
PxReal sqDistPE;
PxReal uEdge, vEdge, tSeg;
if (sqDist1 > sqDist0 && sqDist2 > sqDist0)
{
//edge ab
sqDistPE = sqDist0;
uEdge = u10;
vEdge = v10;
tSeg = t00;
}
else if (sqDist2 > sqDist1)
{
//edge bc
sqDistPE = sqDist1;
uEdge = u11;
vEdge = v11;
tSeg = t01;
}
else
{
//edge ac
sqDistPE = sqDist2;
uEdge = u12;
vEdge = v12;
tSeg = t02;
}
if (con0)
{
//p's project point is an interior point
if (sqDistPE > sqDist3)
{
t = 0.f;
u = v0;
v = w0;
return sqDist3;
}
else
{
t = tSeg;
u = uEdge;
v = vEdge;
return sqDistPE;
}
}
else if (con1)
{
//q's project point is an interior point
if (sqDistPE > sqDist4)
{
t = 1.f;
u = v1;
v = w1;
return sqDist4;
}
else
{
t = tSeg;
u = uEdge;
v = vEdge;
return sqDistPE;
}
}
else
{
t = tSeg;
u = uEdge;
v = vEdge;
return sqDistPE;
}
}
}
__device__ __forceinline__ PxU32 generateContacts(
const PxVec3& a, const PxVec3& b,
const PxVec3& c, const PxVec3& planeNormal,
const PxVec3& normal, const PxVec3& p,
const PxVec3& q, const PxReal inflatedRadius,
PxVec3* outContacts, PxReal* outPen)
{
const PxVec3 ab = b - a;
const PxVec3 ac = c - a;
const PxVec3 ap = p - a;
const PxVec3 aq = q - a;
//This is used to calculate the barycentric coordinate
const PxReal d00 = ab.dot(ab);// V3Dot(ab, ab);
const PxReal d01 = ab.dot(ac);// V3Dot(ab, ac);
const PxReal d11 = ac.dot(ac);// V3Dot(ac, ac);
const PxReal tdenom = (d00 * d11 - d01 * d01);
const PxReal bdenom = (tdenom > 0.f) ? (1.f / tdenom) : PX_MAX_F32;// FRecip(FSub(FMul(d00, d11), FMul(d01, d01)));
//compute the intersect point of p and triangle plane abc
const PxReal inomp = planeNormal.dot(-ap);// V3Dot(planeNormal, V3Neg(ap));
const PxReal ideom = planeNormal.dot(normal);// V3Dot(planeNormal, normal);
const PxReal ipt = ideom > 0.f ? -(inomp / ideom) : PX_MAX_F32;// FSel(FIsGrtr(ideom, FZero()), FDiv(inomp, ideom), FZero());
const PxVec3 closestP31 = p - normal * ipt;// V3ScaleAdd(normal, ipt, p);
//const PxVec3 closestP30 = p;
//Compute the barycentric coordinate for project point of q
const PxVec3 pV20 = closestP31 - a;
const PxReal pD20 = pV20.dot(ab);
const PxReal pD21 = pV20.dot(ac);
const PxReal v0 = (d11 * pD20 - d01 * pD21) * bdenom;// FMul(FSub(FMul(d11, pD20), FMul(d01, pD21)), bdenom);
const PxReal w0 = (d00 * pD21 - d01 * pD20) * bdenom;// FMul(FSub(FMul(d00, pD21), FMul(d01, pD20)), bdenom);
//check closestP3 is inside the triangle
const bool con0 = isValidTriangleBarycentricCoord(v0, w0);
//printf("v0 %f, w0 %f\n", v0, w0);
PxU32 numContacts = 0;
if (con0 && (inflatedRadius > ipt))
{
outContacts[numContacts] = p;
outPen[numContacts] = ipt;
numContacts++;
//printf("con0 %i inflatedRadius %f dist %f pen %f\n", con0, inflatedRadius, dist3, -ipt);
}
const PxReal inomq = planeNormal.dot(-aq);
const PxReal iqt = ideom > 0.f ? -(inomq / ideom) : PX_MAX_F32;// FSel(FIsGrtr(ideom, FZero()), FDiv(inomq, ideom), FZero());
const PxVec3 closestP41 = q - normal * iqt;// V3ScaleAdd(normal, iqt, q);
//const PxVec3 closestP40 = q;
//Compute the barycentric coordinate for project point of q
const PxVec3 qV20 = closestP41 - a;// V3Sub(closestP41, a);
const PxReal qD20 = qV20.dot(ab);// V3Dot(qV20, ab);
const PxReal qD21 = qV20.dot(ac);// V3Dot(qV20, ac);
const PxReal v1 = (d11 * qD20 - d01 * qD21) * bdenom;// FMul(FSub(FMul(d11, qD20), FMul(d01, qD21)), bdenom);
const PxReal w1 = (d00 * qD21 - d01 * qD20) * bdenom;// FMul(FSub(FMul(d00, qD21), FMul(d01, qD20)), bdenom);
const bool con1 = isValidTriangleBarycentricCoord(v1, w1);
if (con1 && (inflatedRadius > iqt))
{
outContacts[numContacts] = q;
outPen[numContacts] = iqt;
numContacts++;
//printf("con1 %i inflatedRadius %f dist %f pen %f\n", con1, inflatedRadius, dist4, -iqt);
}
return numContacts;
}
__device__ __forceinline__ void generateEE(const PxVec3& p, const PxVec3& q, const PxReal sqInflatedRadius,
const PxVec3& normal, const PxVec3& a, const PxVec3& b,
PxVec3* outContacts, PxReal* outPen, PxU32& numContacts)
{
const PxVec3 ab = b - a;
const PxVec3 n = ab.cross(normal);// V3Cross(ab, normal);
const PxReal d = n.dot(a);// V3Dot(n, a);
const PxReal np = n.dot(p);// V3Dot(n, p);
const PxReal nq = n.dot(q);// V3Dot(n, q);
const PxReal signP = np - d;// FSub(np, d);
const PxReal signQ = nq - d;
const PxReal temp = signP * signQ;
if (temp > 0.f) return;//If both points in the same side as the plane, no intersect points
// if colliding edge (p3,p4) and plane are parallel return no collision
const PxVec3 pq = q - p;
const PxReal npq = n.dot(pq);// V3Dot(n, pq);
if (npq == 0.f) return;
//calculate the intersect point in the segment pq
const PxReal segTValue = (d - np) / npq;//FDiv(FSub(d, np), npq);
const PxVec3 localPointA = p + pq * segTValue;// V3ScaleAdd(pq, segTValue, p);
//printf("edge segTValue %f p (%f, %f, %f) pq(%f, %f, %f)\n", segTValue, p.x, p.y, p.z,
// pq.x, pq.y, pq.z);
//calculate a normal perpendicular to ray localPointA + normal, 2D segment segment intersection
const PxVec3 perNormal = normal.cross(pq);// V3Cross(normal, pq);
const PxVec3 ap = localPointA - a;// V3Sub(localPointA, a);
const PxReal nom = perNormal.dot(ap);// V3Dot(perNormal, ap);
const PxReal denom = perNormal.dot(ab);// V3Dot(perNormal, ab);
//const FloatV tValue = FClamp(FDiv(nom, denom), zero, FOne());
const PxReal tValue = nom / denom;// FDiv(nom, denom);
//printf("edge segTValue %f tValue %f\n", segTValue, tValue);
//const bool con = 1.0 >= tValue && tValue >= 0.f;// BAnd(FIsGrtrOrEq(one, tValue), FIsGrtrOrEq(tValue, zero));
if (tValue > 1.f || tValue < 0.f)
return;
//const Vec3V localPointB = V3ScaleAdd(ab, tValue, a); v = V3Sub(localPointA, localPointB); v = V3NegScaleSub(ab, tValue, tap)
const PxVec3 v = ap - ab * tValue;// V3NegScaleSub(ab, tValue, ap);
const PxReal sqDist = v.dot(v); // V3Dot(v, v);
//printf("edge sqDist %f sqInflatedRadius %f\n", sqDist, sqInflatedRadius);
if (sqInflatedRadius > sqDist)
{
const PxReal signedDist = v.dot(normal);
//printf("edge signedDist %f localPointA(%f, %f, %f)\n", signedDist, localPointA.x, localPointA.y, localPointA.z);
outContacts[numContacts] = localPointA;
outPen[numContacts] = signedDist;
numContacts++;
}
}
__device__ __forceinline__ void generateEEContacts(const PxVec3& a, const PxVec3& b,
const PxVec3& c, const PxVec3& normal,
const PxVec3& p, const PxVec3& q, const PxReal sqInflatedRadius,
PxVec3* outContacts, PxReal* outPens, PxU32& numContacts)
{
if (numContacts < 2)
generateEE(p, q, sqInflatedRadius, normal, a, b, outContacts, outPens, numContacts);
if (numContacts < 2)
generateEE(p, q, sqInflatedRadius, normal, b, c, outContacts, outPens, numContacts);
if (numContacts < 2)
generateEE(p, q, sqInflatedRadius, normal, a, c, outContacts, outPens, numContacts);
}
#endif

View File

@@ -0,0 +1,292 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include <cuda.h>
#include <cuda_runtime.h>
#include <assert.h>
//#include "foundation/PxVec3.h"
//#include "foundation/PxTransform.h"
#include "PxgContactManager.h"
#include "PxsContactManagerState.h"
#include "PxContact.h"
#include "PxgNpKernelIndices.h"
#include "PxgCommonDefines.h"
#include "reduction.cuh"
#include "stdio.h"
#include "PxNodeIndex.h"
using namespace physx;
extern "C" __host__ void initNarrowphaseKernels17() {}
extern "C" __global__ void compressContactStage1(
PxsContactManagerOutput* outputData, //input
PxU32 numTotalPairs, //input
PxU32* gBlockNumPairs //output
)
{
const PxU32 warpPerBlock = PxgNarrowPhaseBlockDims::COMPRESS_CONTACT / WARP_SIZE;
__shared__ PxU32 sWarpAccum[warpPerBlock];
__shared__ PxU32 sAccum;
const PxU32 block_size = PxgNarrowPhaseGridDims::COMPRESS_CONTACT;
const PxU32 totalBlockRequired = (numTotalPairs + (blockDim.x - 1)) / blockDim.x;
const PxU32 numIterationPerBlock = (totalBlockRequired + (block_size - 1)) / block_size;
const PxU32 idx = threadIdx.x;
const PxU32 threadIndexInWarp = threadIdx.x&(WARP_SIZE - 1);
const PxU32 warpIndex = threadIdx.x / WARP_SIZE;
if (idx == (WARP_SIZE - 1))
{
sAccum = 0;
}
__syncthreads();
for (PxU32 i = 0; i < numIterationPerBlock; ++i)
{
const PxU32 workIndex = i * blockDim.x + idx + numIterationPerBlock * blockIdx.x * blockDim.x;
PxU32 nbContacts = 0;
if (workIndex < numTotalPairs)
{
nbContacts = outputData[workIndex].nbContacts;
}
//we don't care about how many contacts in the header. There either have contacts or doesn't
//has cotacts
const PxU32 contactAccum = __popc(__ballot_sync(FULL_MASK, nbContacts));
if (threadIndexInWarp == (WARP_SIZE - 1))
{
sWarpAccum[warpIndex] = contactAccum;
//printf("blockIdx %i warpIndex %i contactAccum %i\n", blockIdx.x, warpIndex, contactAccum);
}
__syncthreads();
unsigned mask_idx = __ballot_sync(FULL_MASK, threadIndexInWarp < warpPerBlock);
if (idx < warpPerBlock)
{
const PxU32 value = sWarpAccum[idx];
const PxU32 res = warpReduction<AddOpPxU32, PxU32>(mask_idx, value);
if (idx == (warpPerBlock - 1))
{
sAccum += res;
}
}
__syncthreads();
}
if (idx == (warpPerBlock - 1))
{
gBlockNumPairs[blockIdx.x] = sAccum;
//printf("blockIdx.x %i numPairs %i \n", blockIdx.x, sAccum);
}
}
//PxgNarrowPhaseGridDims::COMPRESS_CONTACT is 32, which means we can use one warp to do the run sum for
//block
extern "C" __global__ void compressContactStage2(
const PxgContactManagerInput* inputData, //input
const PxsContactManagerOutput* outputData, //input
const PxU32 numTotalPairs, //input
PxNodeIndex* shapeToRigidRemapTable, //input
PxActor** transformCacheIdToActorTable, //input
PxU32* gBlockNumPairs, //input
PxU8* cpuCompressedPatchesBase, //input
PxU8* cpuCompressedContactsBase, //input
PxReal* cpuForceBufferBase, //input
PxU8* gpuCompressedPatchesBase, //input
PxU8* gpuCompressedContactsBase, //input
PxReal* gpuForceBufferBase, //input
PxU8* gpuFrictionPatchesBase, //input
PxU32* numPairs, //input
PxGpuContactPair* outputPatchs, //output
const PxU32 maxOutputPatches //input
)
{
const PxU32 warpPerBlock = PxgNarrowPhaseBlockDims::COMPRESS_CONTACT / WARP_SIZE;
const PxU32 block_size = PxgNarrowPhaseGridDims::COMPRESS_CONTACT;
__shared__ PxU32 sWarpAccum[warpPerBlock];
__shared__ PxU32 sBlockHistogram[block_size];
__shared__ PxU32 sAccum;
const PxU32 idx = threadIdx.x;
const PxU32 threadIndexInWarp = threadIdx.x&(WARP_SIZE - 1);
const PxU32 warpIndex = threadIdx.x / WARP_SIZE;
if (idx == (WARP_SIZE - 1))
{
sAccum = 0;
}
//accumulate num pairs per block and compute exclusive run sum
unsigned mask_idx = __ballot_sync(FULL_MASK, threadIndexInWarp < block_size);
if (warpIndex == 0 && threadIndexInWarp < block_size)
{
const PxU32 blockNumPairs = gBlockNumPairs[threadIndexInWarp];
const PxU32 result = warpScan<AddOpPxU32, PxU32>(mask_idx, blockNumPairs);
//store exclusive run sum
sBlockHistogram[threadIndexInWarp] = result - blockNumPairs;
//printf("tInd %i blockNumPairs %i result %i\n", threadIndexInWarp, blockNumPairs, result);
if (threadIndexInWarp == block_size - 1)
{
(*numPairs) = result;
//printf("blockIdx %i blockNumPairs %i numPairs %i\n", blockIdx.x, blockNumPairs, result);
}
}
__syncthreads();
const PxU32 totalBlockRequired = (numTotalPairs + (blockDim.x - 1)) / blockDim.x;
const PxU32 numIterationPerBlock = (totalBlockRequired + (block_size - 1)) / block_size;
const PxU32 blockStartIndex = sBlockHistogram[blockIdx.x];
for (PxU32 i = 0; i < numIterationPerBlock; ++i)
{
const PxU32 workIndex = i * blockDim.x + idx + numIterationPerBlock * blockIdx.x * blockDim.x;
PxU32 nbContacts = 0;
if (workIndex < numTotalPairs)
{
nbContacts = outputData[workIndex].nbContacts;
}
const PxU32 threadMask = (1 << threadIndexInWarp) - 1;
//we don't care about how many contacts in the header. There either have contacts or doesn't
//has contacts
const PxU32 contactMask = __ballot_sync(FULL_MASK, nbContacts);
const PxU32 contactAccum = __popc(contactMask);
const PxU32 offset = __popc(contactMask & threadMask);
if (threadIndexInWarp == (WARP_SIZE - 1))
{
sWarpAccum[warpIndex] = contactAccum;
}
const PxU32 prevAccum = sAccum;
__syncthreads();
unsigned mask_idx = __ballot_sync(FULL_MASK, threadIndexInWarp < warpPerBlock);
if (idx < warpPerBlock)
{
const PxU32 value = sWarpAccum[idx];
const PxU32 res = warpScan<AddOpPxU32, PxU32>(mask_idx, value);
sWarpAccum[idx] = res - value;
if (idx == warpPerBlock - 1)
{
sAccum += res;
}
}
__syncthreads();
//write to output
if (nbContacts > 0)
{
const PxgContactManagerInput& cmInput = inputData[workIndex];
const PxsContactManagerOutput& cmOutput = outputData[workIndex];
const PxU32 index = offset + prevAccum + sWarpAccum[warpIndex] + blockStartIndex;
if (index < maxOutputPatches)
{
PxU32 patchStartIndex = cmOutput.contactPatches - cpuCompressedPatchesBase;
PxU32 contactIndex = cmOutput.contactPoints - cpuCompressedContactsBase;
PxU32 forceIndex = 0xFFFFFFFF;
if (cmOutput.contactForces)
forceIndex = cmOutput.contactForces - cpuForceBufferBase;
PxGpuContactPair & contact = outputPatchs[index];
const PxU32 transformCacheRef0 = cmInput.transformCacheRef0;
const PxU32 transformCacheRef1 = cmInput.transformCacheRef1;
contact.transformCacheRef0 = transformCacheRef0;
contact.transformCacheRef1 = transformCacheRef1;
contact.nodeIndex0 = shapeToRigidRemapTable[transformCacheRef0];
contact.nodeIndex1 = shapeToRigidRemapTable[transformCacheRef1];
contact.contactForces = gpuForceBufferBase + forceIndex;
contact.frictionPatches = gpuFrictionPatchesBase + patchStartIndex / sizeof(PxContactPatch) * sizeof(PxFrictionPatch);
contact.contactPatches = gpuCompressedPatchesBase + patchStartIndex;
contact.contactPoints = gpuCompressedContactsBase + contactIndex;
contact.nbPatches = cmOutput.nbPatches;
contact.nbContacts = cmOutput.nbContacts;
contact.actor0 = transformCacheIdToActorTable[transformCacheRef0];
contact.actor1 = transformCacheIdToActorTable[transformCacheRef1];
}
}
}
}
// update frictionPatches CPU pointers from contactPatches CPU pointers
extern "C" __global__ void updateFrictionPatches(
const PxU32 pairCount, //input
const PxU8* PX_RESTRICT startContactPatches, //input
PxU8* PX_RESTRICT startFrictionPatches, //input (but we need it non-const)
PxsContactManagerOutput* PX_RESTRICT outputs //input/output
)
{
const PxU32 threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
if (threadIndex < pairCount)
{
PxsContactManagerOutput& output = outputs[threadIndex];
output.frictionPatches = startFrictionPatches + (output.contactPatches - startContactPatches) * sizeof(PxFrictionPatch) / sizeof(PxContactPatch);
}
}

View File

@@ -0,0 +1,182 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __CONTACT_PATCH_UTILS_CUH__
#define __CONTACT_PATCH_UTILS_CUH__
#include "materialCombiner.cuh"
#include "PxsContactManagerState.h"
#include "PxgContactManager.h"
PX_FORCE_INLINE __device__ PxU32 setContactPointAndForcePointers(PxsContactManagerOutput* PX_RESTRICT cmOutputs, PxgPatchAndContactCounters* PX_RESTRICT patchAndContactCounters,
PxU8* PX_RESTRICT startContactPoints, PxU8* PX_RESTRICT startContactForces, PxU32 contactBytesLimit, PxU32 forceBytesLimit, PxU32 workIndex, PxU32 nbContacts)
{
PxU32 contactByteOffset = 0xFFFFFFFF;
PxU32 forceAndIndiceByteOffset = 0xFFFFFFFF;
if (nbContacts)
{
PxsContactManagerOutput* output = &cmOutputs[workIndex];
contactByteOffset = atomicAdd(&(patchAndContactCounters->contactsBytes), sizeof(PxContact) * nbContacts);
forceAndIndiceByteOffset = atomicAdd(&(patchAndContactCounters->forceAndIndiceBytes), sizeof(PxU32) * nbContacts);
if ((contactByteOffset + sizeof(PxContact)) > contactBytesLimit)
{
patchAndContactCounters->setOverflowError(PxgPatchAndContactCounters::CONTACT_BUFFER_OVERFLOW);
contactByteOffset = 0xFFFFFFFF; //overflow
}
if ((forceAndIndiceByteOffset + sizeof(PxU32)) > forceBytesLimit)
{
patchAndContactCounters->setOverflowError(PxgPatchAndContactCounters::FORCE_BUFFER_OVERFLOW);
forceAndIndiceByteOffset = 0xFFFFFFFF; //overflow
}
if (contactByteOffset != 0xFFFFFFFF)
{
output->contactForces = reinterpret_cast<PxReal*>(startContactForces + forceAndIndiceByteOffset);
output->contactPoints = startContactPoints + contactByteOffset;
}
else
{
output->contactForces = NULL;
output->contactPoints = NULL;
}
}
return contactByteOffset;
}
PX_FORCE_INLINE __device__ PxU32 registerContactPatch(PxsContactManagerOutput* PX_RESTRICT cmOutputs, PxgPatchAndContactCounters* PX_RESTRICT patchAndContactCounters,
PxU32* PX_RESTRICT touchChangeFlags, PxU32* PX_RESTRICT patchChangeFlags, PxU8* PX_RESTRICT startContactPatches, PxU32 patchBytesLimit, PxU32 workIndex, PxU32 nbContacts)
{
PxU32 allflags = reinterpret_cast<PxU32*>(&((cmOutputs + workIndex)->allflagsStart))[0];
PxU8 oldStatusFlags = u16Low(u32High(allflags));
PxU8 statusFlags = oldStatusFlags;
statusFlags &= (~PxsContactManagerStatusFlag::eTOUCH_KNOWN);
if (nbContacts)
statusFlags |= PxsContactManagerStatusFlag::eHAS_TOUCH;
else
statusFlags |= PxsContactManagerStatusFlag::eHAS_NO_TOUCH;
PxU8 prevPatches = u16High(u32Low(allflags)); //Get out the current number of patches to store as the previous frame's number of patches
bool previouslyHadTouch = oldStatusFlags & PxsContactManagerStatusFlag::eHAS_TOUCH;
bool prevTouchKnown = oldStatusFlags & PxsContactManagerStatusFlag::eTOUCH_KNOWN;
PxU8 numPatches = (nbContacts > 0) ? 1 : 0;
bool currentlyHasTouch = nbContacts > 0;
const bool change = (previouslyHadTouch ^ currentlyHasTouch) || (!prevTouchKnown);
touchChangeFlags[workIndex] = change;
patchChangeFlags[workIndex] = (prevPatches != numPatches);
reinterpret_cast<PxU32*>(&((cmOutputs + workIndex)->allflagsStart))[0] = merge(merge(prevPatches, statusFlags),
merge(numPatches, 0));
cmOutputs[workIndex].nbContacts = nbContacts;
//fill in patch information
PxU32 patchIndex = 0xFFFFFFFF;
if (nbContacts)
{
patchIndex = atomicAdd(&(patchAndContactCounters->patchesBytes), sizeof(PxContactPatch));
if ((patchIndex + sizeof(PxContactPatch)) > patchBytesLimit)
{
//patchAndContactCounters->setOverflowError(PxgPatchAndContactCounters::PATCH_BUFFER_OVERFLOW);
//patchIndex = 0xFFFFFFFF; //overflow
patchAndContactCounters->setOverflowError(PxgPatchAndContactCounters::PATCH_BUFFER_OVERFLOW);
patchIndex = 0xFFFFFFFF; //overflow
statusFlags &= (~PxsContactManagerStatusFlag::eTOUCH_KNOWN);
statusFlags |= PxsContactManagerStatusFlag::eHAS_NO_TOUCH;
reinterpret_cast<PxU32*>(&((cmOutputs + workIndex)->allflagsStart))[0] = merge(merge(prevPatches, statusFlags), 0);
cmOutputs[workIndex].nbContacts = 0;
touchChangeFlags[workIndex] = previouslyHadTouch;
patchChangeFlags[workIndex] = prevPatches != 0;
}
else
{
(cmOutputs + workIndex)->contactPatches = startContactPatches + patchIndex;
}
}
return patchIndex;
}
PX_FORCE_INLINE __device__ void insertIntoPatchStream(const PxsMaterialData* PX_RESTRICT materials, PxU8* PX_RESTRICT patchStream,
const PxgShape& shape0, const PxgShape& shape1, PxU32 patchIndex, const PxVec3& normal, PxU32 nbContacts)
{
//write to patch to stream
if (patchIndex != 0xFFFFFFFF)
{
//fill in material
PxReal restitution, dynamicFriction, staticFriction, damping;
PxU32 materialFlags;
PxU16 materialIndex0 = shape0.materialIndex;
PxU16 materialIndex1 = shape1.materialIndex;
combineMaterials(materials, materialIndex0,
materialIndex1,
materialFlags,
staticFriction,
dynamicFriction,
restitution,
damping
);
PxContactPatch* patch = ((PxContactPatch*)(patchStream + patchIndex));
patch->normal = normal;
patch->nbContacts = nbContacts;
patch->startContactIndex = 0;
//KS - we could probably compress this further into the header but the complexity might not be worth it
patch->staticFriction = staticFriction;
patch->dynamicFriction = dynamicFriction;
patch->damping = damping;
patch->restitution = restitution;
patch->materialIndex0 = materialIndex0;
patch->materialIndex1 = materialIndex1;
assert(materialFlags <= PX_MAX_U8);
patch->materialFlags = PxU8(materialFlags);
patch->internalFlags = 0;
patch->mMassModification.linear0 = 1.0f;
patch->mMassModification.linear1 = 1.0f;
patch->mMassModification.angular0 = 1.0f;
patch->mMassModification.angular1 = 1.0f;
}
}
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,394 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "foundation/PxBounds3.h"
#include "foundation/PxSimpleTypes.h"
#include "foundation/PxTransform.h"
#include "geometry/PxGeometry.h"
#include "geometry/PxHeightFieldSample.h"
#include "PxgContactManager.h"
#include "PxgNpKernelIndices.h"
#include "PxgPersistentContactManifold.h"
#include "PxgSimulationCoreDesc.h"
#include "PxsContactManagerState.h"
#include "PxsTransformCache.h"
#include "convexNpCommon.h"
#include "cudaNpCommon.h"
#include "PxgCommonDefines.h"
#include "dataReadWriteHelper.cuh"
#include "heightfieldUtil.cuh"
#include "manifold.cuh"
#include "midphaseAllocate.cuh"
#include <vector_types.h>
using namespace physx;
extern "C" __host__ void initNarrowphaseKernels0() {}
PX_ALIGN_PREFIX(16)
struct HeigtFieldDataScratch
{
PxTransform convexToHeightfieldNoScale;
PxU32 convexShape_materialIndex;
PxU32 heightfieldShape_materialIndex;
}PX_ALIGN_SUFFIX(16);
template<unsigned int WarpsPerBlock>
__device__ static inline void heightfieldMidphaseCore(
PxU32 numContactManagers,
const PxReal toleranceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxBounds3* PX_RESTRICT bounds,
const PxReal* PX_RESTRICT contactDistance,
const PxgShape* PX_RESTRICT gpuShapes,
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair,
PxgPersistentContactMultiManifold* PX_RESTRICT multiManifolds,
PxsContactManagerOutput * PX_RESTRICT cmOutputs,
uint4* PX_RESTRICT stackBasePtr,
PxU32* PX_RESTRICT nbPairsFound,
PxU32* PX_RESTRICT midphasePairsNum,
PxU32* PX_RESTRICT midphasePairsNumPadded,
HeigtFieldDataScratch* s_warpScratch,
const PxU32 maxPairs
)
{
//thread index in warp
const unsigned int idxInWarp = threadIdx.x;
//wrap index
const unsigned int warpIdx = threadIdx.y;
unsigned int cmIdx = warpIdx + blockIdx.x * blockDim.y;
//this is number of contact managers
PxU32 nbPairsPerCM = 0;
if (cmIdx < numContactManagers)
{
PxgContactManagerInput npWorkItem;
PxgContactManagerInput_ReadWarp(npWorkItem, cmInputs, cmIdx);
PxsCachedTransform transformCached, heightfieldTransformCached;
PxsCachedTransform_ReadWarp(transformCached, transformCache + npWorkItem.transformCacheRef0);
PxsCachedTransform_ReadWarp(heightfieldTransformCached, transformCache + npWorkItem.transformCacheRef1);
const PxTransform shapeToHeightfieldNoScale = heightfieldTransformCached.transform.transformInv(transformCached.transform);
//read convex/sphere shape
PxgShape shape;
PxgShape_ReadWarp(shape, gpuShapes + npWorkItem.shapeRef0);
PxReal ratio, minMargin, breakingThresholdRatio;
if (shape.type == PxGeometryType::eSPHERE)
{
minMargin = shape.scale.scale.x; //sphere radius
ratio = 0.02f;
breakingThresholdRatio = 0.05f;
}
else if (shape.type == PxGeometryType::eCAPSULE)
{
minMargin = shape.scale.scale.y; //capsule radius
ratio = 0.02;
breakingThresholdRatio = 0.05f;
}
else
{
PxU8* hullPtr = reinterpret_cast<PxU8*>(shape.hullOrMeshPtr);
const float4 extents4_f = *reinterpret_cast<float4*>(hullPtr + sizeof(float4) * 2);
minMargin = calculatePCMConvexMargin(extents4_f, shape.scale.scale, toleranceLength);
ratio = 0.2f;
breakingThresholdRatio = 0.8f;
}
/*PxU8* hullPtr = reinterpret_cast<PxU8*>(convexShape.hullOrMeshPtr);
const float4 extents4_f = *reinterpret_cast<float4*>(hullPtr + sizeof(float4) * 2);
const PxReal minMargin = calculatePCMConvexMargin(extents4_f, convexShape.scale.scale, toleranceLength);*/
bool lostContacts = false;
const bool invalidate = invalidateManifold(shapeToHeightfieldNoScale, multiManifolds[cmIdx], minMargin, ratio);
if (!invalidate)
{
const PxReal projectBreakingThreshold = minMargin * breakingThresholdRatio;
lostContacts = refreshManifolds(
shapeToHeightfieldNoScale,
projectBreakingThreshold,
multiManifolds + cmIdx
);
}
bool fullContactGen = invalidate || lostContacts;
//read heightfield shape
PxgShape heightFieldShape;
PxgShape_ReadWarp(heightFieldShape, gpuShapes + npWorkItem.shapeRef1);
if (idxInWarp == 0)
{
s_warpScratch->convexToHeightfieldNoScale = shapeToHeightfieldNoScale;
s_warpScratch->convexShape_materialIndex = shape.materialIndex;
s_warpScratch->heightfieldShape_materialIndex = heightFieldShape.materialIndex;
}
__syncwarp();
//if invalidate is true, generate full contacts
if (fullContactGen)
{
PxU32* heightfieldData = reinterpret_cast<PxU32*>(heightFieldShape.hullOrMeshPtr);
const PxU32 nbRows = heightfieldData[0];
const PxU32 nbCols = heightfieldData[1];
PxHeightFieldSample* samples = reinterpret_cast<PxHeightFieldSample*>(&heightfieldData[2]);
const PxReal oneOverHeightScale = 1.f / heightFieldShape.scale.scale.y;
const PxReal oneOverRowScale = 1.f / PxAbs(heightFieldShape.scale.scale.x);
const PxReal oneOverlColScale = 1.f / PxAbs(heightFieldShape.scale.scale.z);
PxBounds3 worldBound;
PxBounds3_ReadWarp(worldBound, bounds + npWorkItem.transformCacheRef0);
//bound is in world space, we need to transform the bound to the local space of height field
PxBounds3 localBound = PxBounds3::transformFast(heightfieldTransformCached.transform.getInverse(), worldBound);
const PxReal contactDist = contactDistance[npWorkItem.transformCacheRef0] + contactDistance[npWorkItem.transformCacheRef1];
localBound.fattenFast(contactDist);
localBound.minimum.x *= oneOverRowScale;
localBound.minimum.y *= oneOverHeightScale;
localBound.minimum.z *= oneOverlColScale;
localBound.maximum.x *= oneOverRowScale;
localBound.maximum.y *= oneOverHeightScale;
localBound.maximum.z *= oneOverlColScale;
//row scale
if (heightFieldShape.scale.scale.x < 0.f)
{
//swap min and max row scale
const PxReal temp = localBound.minimum.x;
localBound.minimum.x = localBound.maximum.x;
localBound.maximum.x = temp;
}
//col scale
if (heightFieldShape.scale.scale.z < 0.f)
{
PxReal swap = localBound.minimum.z;
localBound.minimum.z = localBound.maximum.z;
localBound.maximum.z = swap;
}
bool boundsDontOverlap = false;
// this tests if the complete shape is outside of the bounds of the HF in the XZ plane.
if ((localBound.minimum.x > nbRows - 1) || (localBound.minimum.z > nbCols - 1)
|| (localBound.maximum.x < 0) || (localBound.maximum.z < 0))
{
boundsDontOverlap = true;
}
if (!boundsDontOverlap)
{
PxU32 minRow = getMinRow(localBound.minimum.x, nbRows);
PxU32 maxRow = getMaxRow(localBound.maximum.x, nbRows);
PxU32 minColumn = getMinColumn(localBound.minimum.z, nbCols);
PxU32 maxColumn = getMaxColumn(localBound.maximum.z, nbCols);
bool noTriangles = false;
// AD: This test whether we have any triangles at all.
// Given the clamping above I can only see this happening if we have a
// flat shape that lies exactly on one of the grid lines for sampling.
// Also, the 2x looks unnecessary here if my basic math isn't failing me.
// This is the same code as CPU and has been there since 2009, probably before,
// so I'm not going to change it now.
if ((2 * (maxColumn - minColumn) * (maxRow - minRow)) == 0)
{
noTriangles = true;
}
if (!noTriangles)
{
const PxReal miny = localBound.minimum.y;
const PxReal maxy = localBound.maximum.y;
const PxU32 columnSpan = maxColumn - minColumn;
//we have two materials corresponding to one vertexIndex, so each thread will deal with one of the materials
const PxU32 totalNumProcessed = (maxRow - minRow) * columnSpan * 2;
for (PxU32 i = 0; i < totalNumProcessed; i += WARP_SIZE)
{
bool result = false;
PxU32 triangleIndex = 0xFFffFFff;
const PxU32 workIndex = idxInWarp + i;
if (workIndex < totalNumProcessed)
{
const PxU32 index = workIndex / 2;
const PxU32 vertexIndex = (minRow + index / columnSpan) * nbCols + (minColumn + index % columnSpan);
assert(isValidVertex(vertexIndex, nbRows, nbCols));
PxReal h0 = getHeight(vertexIndex, samples);
PxReal h1 = getHeight(vertexIndex + 1, samples);
PxReal h2 = getHeight(vertexIndex + nbCols, samples);
PxReal h3 = getHeight(vertexIndex + nbCols + 1, samples);
const bool con0 = maxy < h0 && maxy < h1 && maxy < h2 && maxy < h3;
const bool con1 = miny > h0 && miny > h1 && miny > h2 && miny > h3;
if (!(con0 || con1))
{
const PxHeightFieldSample& sample = getSample(vertexIndex, samples);
const bool isMaterial1 = (workIndex & 1) ? 1 : 0;
PxU32 material = isMaterial1 ? sample.materialIndex1 : sample.materialIndex0;
if (material != PxHeightFieldMaterial::eHOLE)
{
triangleIndex = isMaterial1 ? ((vertexIndex << 1) + 1) : (vertexIndex << 1);
result = true;
}
}
}
PxU32 resultWarp = __ballot_sync(FULL_MASK, result);
PxU32 offset = warpScanExclusive(resultWarp, idxInWarp);
PxU32 validCount = __popc(resultWarp);
// Allocate only amount of memory, needed for single warp-wide write
PxU32 prevNbPairs = 0xFFffFFff;
if (idxInWarp == 0 && validCount > 0)
{
prevNbPairs = atomicAdd(nbPairsFound, validCount);
}
prevNbPairs = __shfl_sync(FULL_MASK, prevNbPairs, 0);
if (result && (prevNbPairs + offset) < maxPairs)
{
stackBasePtr[prevNbPairs + offset] = make_uint4(cmIdx, triangleIndex, nbPairsPerCM + offset, npWorkItem.shapeRef1);
}
if ((validCount > 0) && ((validCount + prevNbPairs) >= maxPairs))
{
validCount = PxMax(maxPairs, prevNbPairs) - prevNbPairs;
}
assert(((validCount + prevNbPairs) <= maxPairs) || (validCount == 0));
nbPairsPerCM += validCount;
}
} // noTriangles
} // boundsDontOverlap
}
PxU32 prevIntermArraysOffset = 0xFFffFFff;
PxU32 prevIntermArraysPaddedOffset = 0xFFffFFff;
if (idxInWarp == 0 && nbPairsPerCM > 0)
{
prevIntermArraysOffset = atomicAdd(midphasePairsNum, nbPairsPerCM);
prevIntermArraysPaddedOffset = atomicAdd(midphasePairsNumPadded, ((nbPairsPerCM + 3)&(~3)) * 2); // AD: we need 2x space for the radix sort.
}
prevIntermArraysOffset = __shfl_sync(FULL_MASK, prevIntermArraysOffset, 0);
prevIntermArraysPaddedOffset = __shfl_sync(FULL_MASK, prevIntermArraysPaddedOffset, 0);
ConvexMeshPair pairInfo;
pairInfo.aToB = s_warpScratch->convexToHeightfieldNoScale;
pairInfo.cmIndex = cmIdx;
pairInfo.startIndex = prevIntermArraysOffset;
pairInfo.count = fullContactGen ? nbPairsPerCM : CONVEX_TRIMESH_CACHED;
pairInfo.roundedStartIndex = prevIntermArraysPaddedOffset;
pairInfo.materialIndices = make_uint2(s_warpScratch->convexShape_materialIndex, s_warpScratch->heightfieldShape_materialIndex);
ConvexMeshPair_WriteWarp(cvxTrimeshPair + cmIdx, pairInfo);
assert(*midphasePairsNum <= maxPairs);
}
}
extern "C" __global__ void convexHeightFieldMidphase(
PxU32 numContactManagers,
const PxReal toleranceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxBounds3* PX_RESTRICT bounds,
const PxReal* PX_RESTRICT contactDistance,
const PxgShape* PX_RESTRICT gpuShapes,
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair,
PxgPersistentContactMultiManifold* PX_RESTRICT multiManifolds,
PxsContactManagerOutput* PX_RESTRICT cmOutputs,
PxU8* PX_RESTRICT stackPtr,
PxU32* PX_RESTRICT stackOffset,
PxU32* PX_RESTRICT midphasePairsNum,
PxU32* PX_RESTRICT midphasePairsNumPadded,
const PxU32 stackSizeBytes
)
{
__shared__ PxU32 scratchMem[MIDPHASE_WARPS_PER_BLOCK][WARP_SIZE * 2];
const PxU32 maxPairs = calculateMaxPairs(stackSizeBytes, numContactManagers);
heightfieldMidphaseCore<MIDPHASE_WARPS_PER_BLOCK>(
numContactManagers,
toleranceLength,
cmInputs,
transformCache,
bounds,
contactDistance,
gpuShapes,
cvxTrimeshPair,
multiManifolds,
cmOutputs,
reinterpret_cast<uint4*>(stackPtr),
stackOffset,
midphasePairsNum,
midphasePairsNumPadded,
(HeigtFieldDataScratch*)scratchMem[threadIdx.y],
maxPairs
);
}

View File

@@ -0,0 +1,997 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include <cuda.h>
#include <cuda_runtime.h>
#include <assert.h>
#include "foundation/PxVec3.h"
#include "foundation/PxTransform.h"
#include "geometry/PxMeshScale.h"
#include "convexNpCommon.h"
#include "cudaNpCommon.h"
#include "PxgPersistentContactManifold.h"
#include "PxgContactManager.h"
#include "PxgConvexConvexShape.h"
#include "PxgNpKernelIndices.h"
#include "PxsTransformCache.h"
#include "PxgCommonDefines.h"
#include "reduction.cuh"
#include "nputils.cuh"
#include "midphaseAllocate.cuh"
#include "dataReadWriteHelper.cuh"
#include "convexTriangle.cuh"
#include "heightfieldUtil.cuh"
#include "sphereTriangle.cuh"
#include "sphereCollision.cuh"
#include "capsuleTriangle.cuh"
#include "geometry/PxHeightFieldFlag.h"
#include "geometry/PxGeometry.h"
using namespace physx;
extern "C" __host__ void initNarrowphaseKernels1() {}
////ML: for heightfield, the maximum numbers of adjacent verts will be 6
//#define HEIGHTFIELD_MAX_ADJACENCY_VERT_INDICE 6
//#define HEIGHTFIELD_MAX_PROCESS_ADJACENCY_VERT_INDICE 9
#define HEIGHTFIELD_MAX_VERTS 18 //allow duplication
//
//#define HEIGHTFIELD_DEBUG 1
struct PxgTriangle
{
public:
__device__ PX_FORCE_INLINE PxVec3 denormalizedNormal() const
{
return (verts[1] - verts[0]).cross(verts[2] - verts[0]);
}
PxVec3 verts[3];
};
__device__ static void getTriangle(PxgTriangle& triLoc, PxU32* adjacencyIndices, const PxU32 triangleIndex, const PxMeshScale& scale, const PxU32 rows, const PxU32 columns, const PxHeightFieldSample* samples)
{
const PxReal rowScale = scale.scale.x;
const PxReal heightScale = scale.scale.y;
const PxReal columnScale = scale.scale.z;
PxVec3 handedness(1.0f); // Vector to invert normal coordinates according to the heightfield scales
bool wrongHanded = false;
if (columnScale < 0.f)
{
wrongHanded = !wrongHanded;
handedness.z = -1.0f;
}
if (rowScale < 0.f)
{
wrongHanded = !wrongHanded;
handedness.x = -1.0f;
}
PxU32 tVertexIndices[3];
getTriangleVertexIndices(triangleIndex, tVertexIndices[0], tVertexIndices[1 + wrongHanded], tVertexIndices[2 - wrongHanded], columns, samples);
if (adjacencyIndices)
{
getTriangleAdjacencyIndices(triangleIndex, adjacencyIndices[wrongHanded ? 2 : 0], adjacencyIndices[1], adjacencyIndices[wrongHanded ? 0 : 2], rows, columns, samples);
}
for (PxU32 vi = 0; vi < 3; vi++)
{
const PxVec3 vertex = getVertex(tVertexIndices[vi], columns, samples);
triLoc.verts[vi] = hf2shapep(vertex, rowScale, heightScale, columnScale);
}
}
__device__ void buildTriangleInformations(const PxU32 triangleIdx, PxgShape& heightfieldShape, ConvexMeshScratch* s_scratch,
PxTransform& trimeshToConvexTransform)
{
const PxU32 * heightfieldGeomPtr = reinterpret_cast<const PxU32 *>(heightfieldShape.hullOrMeshPtr);
const PxU32 rows = *heightfieldGeomPtr++;
const PxU32 columns = *heightfieldGeomPtr++;
const PxHeightFieldSample* samples = reinterpret_cast<const PxHeightFieldSample*>(heightfieldGeomPtr);
//PxU32 vertexIndices[3];
PxU32 adjacencyIndices[3];
PxgTriangle triLocal; // height field local space triangle
getTriangle(triLocal, adjacencyIndices, triangleIdx, heightfieldShape.scale, rows, columns, samples);
const PxVec3 triNormal = triLocal.denormalizedNormal();
const PxVec3 triangleLocNormal = trimeshToConvexTransform.rotate(triNormal).getNormalized();
if (threadIdx.x < 1)
{
s_scratch->triangleLocNormal = triangleLocNormal;
s_scratch->trimeshToConvexTransform = trimeshToConvexTransform;
s_scratch->trimeshVerts = NULL;
//s_scratch->trimeshVerts = trimeshVerts;
}
if (threadIdx.x < 3)
{
//assign adjacent triangle indices to the scratch memory
reinterpret_cast<PxU32*>(&s_scratch->triAdjTrisIdx)[threadIdx.x] = adjacencyIndices[threadIdx.x];
//assign triangle vertes in the local space of convex hull to the scratch memory
s_scratch->triLocVerts[threadIdx.x] = trimeshToConvexTransform.transform(triLocal.verts[threadIdx.x]);
}
}
__device__ void convexHeightfieldNarrowphaseCore(
PxU32 globalWarpIndex,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxReal* PX_RESTRICT contactDistance,
const PxgShape* PX_RESTRICT gpuShapes,
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair, // per CM
ConvexTriNormalAndIndex** PX_RESTRICT cvxTriNIPtr, // per cvx-tri
ConvexTriContacts** PX_RESTRICT cvxTriContactsPtr, // per cvx-tri
PxReal** PX_RESTRICT cvxTriMaxDepthPtr, // per cvx-tri
ConvexTriIntermediateData** PX_RESTRICT cvxTriIntermPtr, // per cvx-tri
PxU32** PX_RESTRICT orderedCvxTriIntermPtr, // per cvx-tri
PxU32** PX_RESTRICT cvxTriSecondPassPairPtr, // per cvx-tri
const uint4 * PX_RESTRICT pairsGPU,
PxU32* PX_RESTRICT nbPairsGlobal,
PxU32* PX_RESTRICT nbSecondPassPairs,
ConvexTriContact* tempConvexTriContacts,
const PxU32 tempContactSizeBytes,
PxU32* pTempContactIndex
)
{
const PxU32 warpsPerBlock = NP_TRIMESH_WARPS_PER_BLOCK;
const PxU32 wrapIndex = threadIdx.y;
//for heightfield, each vertex in a triangle has maximum 6 adacent vertexes, which invoke 12 vertexes in total
//__shared__ PxU32 sTrimeshAdjVerts[warpsPerBlock * HEIGHTFIELD_MAX_VERTS];
__shared__ volatile PxU32 sharedMem[warpsPerBlock][ WARP_SIZE * 16];
volatile PxU32* s_WarpSharedMemory = sharedMem[wrapIndex];
ConvexMeshScratch* s_scratch = (ConvexMeshScratch*)s_WarpSharedMemory;
// Pair
uint4 curPair = pairsGPU[globalWarpIndex];
PxU32 cmIdx = curPair.x;
PxU32 triangleIdx = curPair.y;
PxU32 testOffset = curPair.z;
PxgContactManagerInput npWorkItem;
PxgContactManagerInput_ReadWarp(npWorkItem, cmInputs, cmIdx);
PxsCachedTransform convexTransformCached, trimeshTransformCached;
PxsCachedTransform_ReadWarp(convexTransformCached, transformCache + npWorkItem.transformCacheRef0);
PxsCachedTransform_ReadWarp(trimeshTransformCached, transformCache + npWorkItem.transformCacheRef1);
PxTransform trimeshToConvexTransform(convexTransformCached.transform.transformInv(trimeshTransformCached.transform));
PxgShape convexShape;
PxgShape_ReadWarp(convexShape, gpuShapes + npWorkItem.shapeRef0);
PxgShape heightfieldShape;
PxgShape_ReadWarp(heightfieldShape, gpuShapes + npWorkItem.shapeRef1);
/*const PxU32 startIndex = wrapIndex * HEIGHTFIELD_MAX_VERTS;
PxU32* startTrimeshAdjVerts = &sTrimeshAdjVerts[startIndex];
float4* startTrimeshVerts = &sTrimeshVerts[startIndex];
buildAdjacencyInformations(triangleIdx, heightfieldShape, startTrimeshAdjVerts, startTrimeshVerts, s_scratch, trimeshToConvexTransform);*/
buildTriangleInformations(triangleIdx, heightfieldShape, s_scratch, trimeshToConvexTransform);
ConvexMeshPair& pair = cvxTrimeshPair[cmIdx];
PxU32 convexTriPairOffset = pair.startIndex + testOffset;
PxU32 convexTriPairOffsetPadded = pair.roundedStartIndex + testOffset;
// Geometries : Convex
if (threadIdx.x < 7)
{
reinterpret_cast<PxU32*>(&s_scratch->convexScale)[threadIdx.x] = reinterpret_cast<PxU32*>(&convexShape.scale)[threadIdx.x];
}
if (threadIdx.x == 0)
{
// Shapes
//s_scratch->contactDist = convexShape.contactOffset + heightfieldShape.contactOffset;
s_scratch->contactDist = contactDistance[npWorkItem.transformCacheRef0] + contactDistance[npWorkItem.transformCacheRef1];
s_scratch->trimeshToConvexTransform = trimeshToConvexTransform;
const PxU8* convexPtrA = reinterpret_cast<const PxU8 *>(convexShape.hullOrMeshPtr);
s_scratch->convexPtrA = convexPtrA;
const float4 hull0_centerOfMass_f4 = *reinterpret_cast<const float4 *>(convexPtrA);
const PxVec3 hull0_centerOfMass(hull0_centerOfMass_f4.x, hull0_centerOfMass_f4.y, hull0_centerOfMass_f4.z);
// Transform CoM into shape space
PxVec3 shapeSpaceCenterOfMass0 = vertex2Shape(hull0_centerOfMass, convexShape.scale.scale, convexShape.scale.rotation);
s_scratch->convexCenterOfMass = shapeSpaceCenterOfMass0;
convexPtrA += sizeof(float4);
const uint4 tmp = *((uint4*)convexPtrA);
const PxU32 polyData0_NbEdgesNbHullVerticesNbPolygons = tmp.x;
s_scratch->nbEdgesNbHullVerticesNbPolygons = polyData0_NbEdgesNbHullVerticesNbPolygons;
convexPtrA += sizeof(uint4);
/*const float4 polyData0_Extents = *((float4*)(convexPtrA));
PxVec3 convex_extents = toVec3(polyData0_Extents);
s_scratch->extents = convex_extents;*/
}
__syncwarp();
//height field don't have cpu remap triangle index
convexTriangleContactGen(
s_WarpSharedMemory, s_scratch, convexTriPairOffset, convexTriPairOffsetPadded, triangleIdx, triangleIdx, globalWarpIndex,
cvxTriNIPtr, cvxTriContactsPtr,cvxTriMaxDepthPtr, cvxTriIntermPtr, orderedCvxTriIntermPtr, cvxTriSecondPassPairPtr, nbSecondPassPairs,
tempConvexTriContacts, tempContactSizeBytes / sizeof(ConvexTriContact), pTempContactIndex);
}
extern "C" __global__
__launch_bounds__(NP_TRIMESH_WARPS_PER_BLOCK * WARP_SIZE, 32 / NP_TRIMESH_WARPS_PER_BLOCK)
void convexHeightfieldNarrowphase(
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxReal* PX_RESTRICT contactDistance,
const PxgShape* PX_RESTRICT gpuShapes,
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair, // per CM
ConvexTriNormalAndIndex** PX_RESTRICT cvxTriNIPtr, // per cvx-tri
ConvexTriContacts** PX_RESTRICT cvxTriContactsPtr, // per cvx-tri
PxReal** PX_RESTRICT cvxTriMaxDepthPtr, // per cvx-tri
ConvexTriIntermediateData** PX_RESTRICT cvxTriIntermPtr, // per cvx-tri
PxU32** PX_RESTRICT orderedCvxTriIntermPtr, // per cvx-tri
PxU32** PX_RESTRICT cvxTriSecondPassPairsPtr, // per cvx-tri
PxU8* PX_RESTRICT stackPtr,
PxU32* PX_RESTRICT nbPairsGlobal,
PxU32* PX_RESTRICT nbPaddedPairsGlobal,
PxU32* PX_RESTRICT nbSecondPassPairs,
const PxU32 stackSizeBytes,
ConvexTriContact* tempConvexTriContacts,
PxU32* pTempContactIndex,
PxU32* maxTempMemRequirement,
PxU32* midphasePairsNeeded,
const PxU32 nbContactManagers
)
{
__shared__ ConvexTriNormalAndIndex* sCvxTriNIPtr;
__shared__ ConvexTriContacts* sCvxTriContactsPtr;
__shared__ PxReal* sCvxTriMaxDepthPtr;
__shared__ ConvexTriIntermediateData* sCvxTriIntermPtr;
__shared__ PxU32* sOrderedCvxTriIntermPtr;
__shared__ uint4* sPairsGPU;
__shared__ PxU32* sCvxTriSecondPassPairsPtr;
const PxU32 nbPairs = *nbPairsGlobal;
const PxU32 nbPaddedPairs = *nbPaddedPairsGlobal;
//each block assign the corresponding ptr from the stack memory to sCvxTriNIPtr, sCvxTriContactPtr and sCvxTriMaxDepthPtr
if (threadIdx.x == 0 && threadIdx.y == 0)
{
midphaseAllocate(&sCvxTriNIPtr, &sCvxTriContactsPtr, &sCvxTriMaxDepthPtr, &sCvxTriIntermPtr, &sOrderedCvxTriIntermPtr, &sCvxTriSecondPassPairsPtr, &sPairsGPU, stackPtr, nbPairs, nbPaddedPairs);
if (blockIdx.x == 0)
{
atomicMax(maxTempMemRequirement, calculateConvexMeshPairMemRequirement() * (*midphasePairsNeeded) + calculateAdditionalPadding(nbContactManagers));
}
}
__syncthreads();
if (threadIdx.x == 0 && threadIdx.y == 0 && blockIdx.x == 0)
{
*cvxTriNIPtr = sCvxTriNIPtr;
*cvxTriContactsPtr = sCvxTriContactsPtr;
*cvxTriMaxDepthPtr = sCvxTriMaxDepthPtr;
*cvxTriIntermPtr = sCvxTriIntermPtr;
*orderedCvxTriIntermPtr = sOrderedCvxTriIntermPtr;
*cvxTriSecondPassPairsPtr = sCvxTriSecondPassPairsPtr;
}
for (PxU32 globalWarpIndex = blockIdx.x * blockDim.y + threadIdx.y; globalWarpIndex < nbPairs; globalWarpIndex += gridDim.x * blockDim.y)
{
convexHeightfieldNarrowphaseCore(
globalWarpIndex,
cmInputs,
transformCache,
contactDistance,
gpuShapes,
cvxTrimeshPair,
&sCvxTriNIPtr,
&sCvxTriContactsPtr,
&sCvxTriMaxDepthPtr,
&sCvxTriIntermPtr,
&sOrderedCvxTriIntermPtr,
&sCvxTriSecondPassPairsPtr,
sPairsGPU,
nbPairsGlobal,
nbSecondPassPairs,
tempConvexTriContacts,
stackSizeBytes,
pTempContactIndex
);
}
}
__device__ PxU32 sphereHeightfieldNarrowphaseCore(
PxU32 globalThreadIndex,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxReal* PX_RESTRICT contactDistance,
const PxgShape* PX_RESTRICT gpuShapes,
SphereMeshPair* PX_RESTRICT sphereTrimeshPair, // per CM
SphereTriNormalAndIndex** PX_RESTRICT sphereTriNIPtr, // per sphere-tri
SphereTriContacts** PX_RESTRICT sphereTriContactsPtr, // per sphere-tri
PxReal** PX_RESTRICT sphereTriMaxDepthPtr, // per sphere-tri
SphereTriIntermediateData** PX_RESTRICT sphereTriIntermPtr, // per sphere-tri
PxU32** PX_RESTRICT orderedSphereTriIntermPtr, // per sphere-tri
PxU32** PX_RESTRICT sphereTriSecondPassPairPtr, // per sphere-tri
const uint4& curPair,
const PxgContactManagerInput& npWorkItem,
PxgShape& sphereShape,
PxU32* PX_RESTRICT nbPairsGlobal,
PxU32* PX_RESTRICT nbSecondPassPairs,
PxVec3& outContact,
PxReal& outSep,
PxU32& outMask
)
{
// Pair
PxU32 cmIdx = curPair.x;
PxU32 triangleIdx = curPair.y;
PxU32 testOffset = curPair.z;
PxsCachedTransform sphereTransformCached = transformCache[npWorkItem.transformCacheRef0];
PxsCachedTransform trimeshTransformCached = transformCache[npWorkItem.transformCacheRef1];
//PxTransform trimeshToConvexTransform(sphereTransformCached.transform.transformInv(trimeshTransformCached.transform));
PxgShape heightfieldShape = gpuShapes[npWorkItem.shapeRef1];
//const PxU32 startIndex = threadIdx.x * HEIGHTFIELD_MAX_VERTS;
//float4* startTrimeshVerts = &sTrimeshVerts[startIndex];
//buildTriangleInformations
const PxU32 * heightfieldGeomPtr = reinterpret_cast<const PxU32 *>(heightfieldShape.hullOrMeshPtr);
const PxU32 rows = *heightfieldGeomPtr++;
const PxU32 columns = *heightfieldGeomPtr++;
const PxHeightFieldSample* samples = reinterpret_cast<const PxHeightFieldSample*>(heightfieldGeomPtr);
//PxU32 vertexIndices[3];
uint4 triAdjTriIndices;
//PxgTriangle triLocal; // height field local space triangle
PxVec3 triLocV0, triLocV1, triLocV2;
getTriangle(triLocV0, triLocV1, triLocV2, &triAdjTriIndices, triangleIdx, heightfieldShape.scale, rows, columns, samples);
//printf("threadIdx.x %i triangleIdx %i\n", threadIdx.x, triangleIdx);
const PxVec3 triNormal = ((triLocV1 - triLocV0).cross(triLocV2 - triLocV0)).getNormalized();
//const PxVec3 triNormal = triLocal.denormalizedNormal().getNormalized();
//const PxVec3 triangleLocNormal = trimeshToConvexTransform.rotate(triNormal).getNormalized();
ConvexMeshPair& pair = sphereTrimeshPair[cmIdx];
PxU32 sphereTriPairOffset = pair.startIndex + testOffset;
PxU32 sphereTriPairOffsetPadded = pair.roundedStartIndex + testOffset;
// Shapes
const PxReal contactDist = contactDistance[npWorkItem.transformCacheRef0] + contactDistance[npWorkItem.transformCacheRef1];
const PxReal sphereRadius = sphereShape.scale.scale.x;
//Geometries : Sphere
const PxVec3 sphereCenterInTriMesh = trimeshTransformCached.transform.transformInv(sphereTransformCached.transform.p);
const PxReal inflatedRadius = sphereRadius + contactDist;
//const PxReal d = triLocal.verts[0].dot(triNormal);
//const PxReal dist0 = sphereCenterInTriMesh.dot(triNormal) - d;
const PxReal dist = (sphereCenterInTriMesh - triLocV0).dot(triNormal);
PxReal separation = PX_MAX_F32;
PxU32 nbOutputContacts = 0;
PxVec3 patchNormal(0.f);
PxVec3 closestP;
//mSphereCenter will be in the local space of the triangle mesh
bool generateContact = false;
bool faceContact = false;
bool delayContacts = false;
PxU32 mask;
PxReal sqDist = distancePointTriangleSquared(sphereCenterInTriMesh, triLocV0, triLocV1, triLocV2, triAdjTriIndices, closestP, generateContact, faceContact, &mask);
const PxReal sqInflatedSphereRadius = inflatedRadius * inflatedRadius;
//sphere overlap with triangles
if (dist >= 0.f && sqInflatedSphereRadius > sqDist && generateContact)
{
//printf("triangleIdx %i sqDist %f\n", triangleIdx, sqDist);
//printf("triangleIdx %i triNormal(%f, %f, %f)\n", triangleIdx, triNormal.x, triNormal.y, triNormal.z);
//printf("triangleIdx % i closestP(%f, %f, %f)\n", triangleIdx, closestP.x, closestP.y, closestP.z);
//printf("triangleIdx %i v0(%f, %f, %f)\n", triangleIdx, triLocal.verts[0].x, triLocal.verts[0].y, triLocal.verts[0].z);
//printf("triangleIdx %i v1(%f, %f, %f)\n", triangleIdx, triLocal.verts[1].x, triLocal.verts[1].y, triLocal.verts[1].z);
//printf("triangleIdx %i v2(%f, %f, %f)\n", triangleIdx, triLocal.verts[2].x, triLocal.verts[2].y, triLocal.verts[2].z);
//printf("remapCpuTriangleIdx %i sqInflatedSphereRadius %f sqDist %f faceContact %i\n", remapCpuTriangleIdx, sqInflatedSphereRadius, sqDist, PxU32(faceContact));
switch (mask)
{
case ConvexTriIntermediateData::eE01:
{
PxVec3 triLocV0Adj, triLocV1Adj, triLocV2Adj;
getTriangle(triLocV0Adj, triLocV1Adj, triLocV2Adj, NULL, triAdjTriIndices.x, heightfieldShape.scale, rows, columns, samples);
const PxVec3 triNormalAdj = ((triLocV1Adj - triLocV0Adj).cross(triLocV2Adj - triLocV0Adj)).getNormalized();
generateContact = ((triNormalAdj.dot(triLocV2 - triLocV0Adj)) < 0.f);
break;
}
case ConvexTriIntermediateData::eE12:
{
PxVec3 triLocV0Adj, triLocV1Adj, triLocV2Adj;
getTriangle(triLocV0Adj, triLocV1Adj, triLocV2Adj, NULL, triAdjTriIndices.y, heightfieldShape.scale, rows, columns, samples);
const PxVec3 triNormalAdj = ((triLocV1Adj - triLocV0Adj).cross(triLocV2Adj - triLocV0Adj)).getNormalized();
/*printf("%x: triNormalAdj = (%f, %f, %f), triNormal = (%f, %f, %f)\n", triangleIdx, triNormalAdj.x, triNormalAdj.y, triNormalAdj.z,
triNormal.x, triNormal.y, triNormal.z);*/
generateContact = (triNormalAdj.dot(triLocV0 - triLocV0Adj) < 0.f);
break;
}
case ConvexTriIntermediateData::eE02:
{
PxVec3 triLocV0Adj, triLocV1Adj, triLocV2Adj;
getTriangle(triLocV0Adj, triLocV1Adj, triLocV2Adj, NULL, triAdjTriIndices.z, heightfieldShape.scale, rows, columns, samples);
const PxVec3 triNormalAdj = ((triLocV1Adj - triLocV0Adj).cross(triLocV2Adj - triLocV0Adj)).getNormalized();
generateContact = ((triNormalAdj.dot(triLocV1 - triLocV0Adj)) < 0.f);
break;
}
}
if (generateContact)
{
patchNormal = triNormal;
if (!faceContact)
patchNormal = (sphereCenterInTriMesh - closestP).getNormalized();
//const PxU32 value = PxU32(faceContact);
//printf("triangleIdx %i faceContact %i\n", triangleIdx, value);
//printf("remapCpuTriangleIdx %i closestP(%f, %f, %f)\n", remapCpuTriangleIdx, closestP.x, closestP.y, closestP.z);
const PxReal cosTheta = patchNormal.dot(triNormal);
const PxReal tolerance = 0.996f;//around 5 degree
//two normal's projection less than 5 degree, generate contacts
delayContacts = cosTheta <= tolerance;
if (delayContacts)
{
//delay contacts
/*PxU32* PX_RESTRICT sphereTriSecondPairPass = *sphereTriSecondPassPairPtr;
const PxU32 startIndex = atomicAdd(nbSecondPassPairs, 1);
sphereTriSecondPairPass[startIndex] = globalThreadIndex;*/
outMask = globalThreadIndex | mask;
//printf("remapCpuTriangleIdx %i gpuTriangleIdx %i delayContacts\n", remapCpuTriangleIdx, triangleIdx);
}
//printf("triangleIdx % i delayContacts %i\n", triangleIdx, PxU32(delayContacts));
nbOutputContacts = 1;
separation = PxSqrt(sqDist);
outContact = PxVec3(0.f);
outSep = separation;
}
}
PxReal* PX_RESTRICT sphereTriMaxDepth = *sphereTriMaxDepthPtr;
sphereTriMaxDepth[sphereTriPairOffset] = separation;
SphereTriIntermediateData* PX_RESTRICT sphereTriInterm = *sphereTriIntermPtr;
PxU32* PX_RESTRICT orderedSphereTriInterm = *orderedSphereTriIntermPtr;
sphereTriInterm[sphereTriPairOffset].gpuTriIndex = triangleIdx;
orderedSphereTriInterm[sphereTriPairOffsetPadded] = (nbOutputContacts && !delayContacts) ? (0x80000000 | triangleIdx) : triangleIdx;
SphereTriNormalAndIndex* PX_RESTRICT sphereTriNI = *sphereTriNIPtr;
PxU32 delayContactMask = delayContacts ? SphereTriNormalAndIndex::DeferredContactMask : 0;
//rotate the normal into A space
PxVec3 worldNormal = trimeshTransformCached.transform.rotate(-patchNormal);
sphereTriNI[sphereTriPairOffset].normal = sphereTransformCached.transform.rotateInv(worldNormal);
sphereTriNI[sphereTriPairOffset].index = delayContactMask + (nbOutputContacts << SphereTriNormalAndIndex::NbContactsShift) + triangleIdx;
//printf("triangleIdx %i sphereTriPairOffset %i\n", triangleIdx, sphereTriPairOffset);
return nbOutputContacts;
}
__device__ PxU8 computeTriangleFlags(
const PxgTriangle& currentTriangle,
const PxVec3& triNormal,
const PxU32* adjTriIndices,
const PxU16 flags,
const PxMeshScale& scale,
const PxU32 rows,
const PxU32 columns,
const PxHeightFieldSample* samples
)
{
const bool boundaryCollisions = !(flags & PxHeightFieldFlag::eNO_BOUNDARY_EDGES);
PxU8 triFlags = 0;
const PxU8 nextInd[] = { 2,0,1 };
//fill in triangle flag
for (PxU32 a = 0; a < 3; ++a)
{
if (adjTriIndices[a] != 0xFFFFFFFF)
{
PxgTriangle adjTri;
getTriangle(adjTri, NULL, adjTriIndices[a], scale, rows, columns, samples);
PxVec3 adjNormal = adjTri.denormalizedNormal();
PxU32 otherIndex = nextInd[a];
PxReal projD = adjNormal.dot(currentTriangle.verts[otherIndex] - adjTri.verts[0]);
if (projD < 0.f)
{
adjNormal.normalize();
PxReal proj = adjNormal.dot(triNormal);
if (proj < 0.997f)
{
triFlags |= (1 << (a + 3));
}
}
else if (boundaryCollisions)
{
triFlags |= (1 << (a + 3)); //Mark boundary edge active
}
else
triFlags |= (1 << a); //Mark as silhouette edge
}
}
return triFlags;
}
__device__ PxU32 capsuleHeightfieldNarrowphaseCore(
PxU32 globalThreadIndex,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxReal* PX_RESTRICT contactDistance,
const PxgShape* PX_RESTRICT gpuShapes,
SphereMeshPair* PX_RESTRICT sphereTrimeshPair, // per CM
SphereTriNormalAndIndex** PX_RESTRICT sphereTriNIPtr, // per sphere-tri
SphereTriContacts** PX_RESTRICT sphereTriContactsPtr, // per sphere-tri
PxReal** PX_RESTRICT sphereTriMaxDepthPtr, // per sphere-tri
SphereTriIntermediateData** PX_RESTRICT sphereTriIntermPtr, // per sphere-tri
PxU32** PX_RESTRICT orderedSphereTriIntermPtr, // per sphere-tri
PxU32** PX_RESTRICT sphereTriSecondPassPairPtr, // per sphere-tri
const uint4& curPair,
const PxgContactManagerInput& npWorkItem,
PxgShape& capsuleShape,
PxVec3* contacts,
PxReal* separations
)
{
// Pair
const PxU32 cmIdx = curPair.x;
PxU32 triangleIdx = curPair.y;
PxU32 testOffset = curPair.z;
PxsCachedTransform capsuleTransformCached = transformCache[npWorkItem.transformCacheRef0];
PxsCachedTransform trimeshTransformCached = transformCache[npWorkItem.transformCacheRef1];
PxgShape heightfieldShape = gpuShapes[npWorkItem.shapeRef1];
//buildTriangleInformations
const PxU32 * heightfieldGeomPtr = reinterpret_cast<const PxU32 *>(heightfieldShape.hullOrMeshPtr);
const PxU32 rows = *heightfieldGeomPtr++;
const PxU32 columns = *heightfieldGeomPtr++;
const PxHeightFieldSample* samples = reinterpret_cast<const PxHeightFieldSample*>(heightfieldGeomPtr);
heightfieldGeomPtr += sizeof(PxU32) * rows * columns;
const PxU16 flags = reinterpret_cast<PxU16&>(heightfieldGeomPtr);
PxU32 adjTriIndices[3];
PxgTriangle currentTriangle; // height field local space triangle
getTriangle(currentTriangle, adjTriIndices, triangleIdx, heightfieldShape.scale, rows, columns, samples);
ConvexMeshPair& pair = sphereTrimeshPair[cmIdx];
PxU32 sphereTriPairOffset = pair.startIndex + testOffset;
PxU32 sphereTriPairOffsetPadded = pair.roundedStartIndex + testOffset;
// Shapes
const PxReal contactDist = contactDistance[npWorkItem.transformCacheRef0] + contactDistance[npWorkItem.transformCacheRef1];
const PxReal capsuleRadius = capsuleShape.scale.scale.y;
const PxReal capsuleHalfHeight = capsuleShape.scale.scale.x;
const PxTransform heighfieldToCapsule = capsuleTransformCached.transform.transformInv(trimeshTransformCached.transform);
const PxVec3 tmp = capsuleTransformCached.transform.q.getBasisVector0() * capsuleHalfHeight;
const PxVec3 capsuleCenterInMesh = trimeshTransformCached.transform.transformInv(capsuleTransformCached.transform.p);
const PxVec3 capsuleDirInMesh = trimeshTransformCached.transform.rotateInv(tmp);
//Geometries : Capsule in height field local space
const PxVec3 p0 = capsuleCenterInMesh + tmp;
const PxVec3 p1 = capsuleCenterInMesh - tmp;
const PxReal inflatedRadius = capsuleRadius + contactDist;
const PxVec3 triNormal = currentTriangle.denormalizedNormal().getNormalized();// ((triLocV1 - triLocV0).cross(triLocV2 - triLocV0)).getNormalized();
//const PxReal d = triLocal.verts[0].dot(triNormal);
//const PxReal dist0 = sphereCenterInTriMesh.dot(triNormal) - d;
const PxReal dist = (capsuleCenterInMesh - currentTriangle.verts[0]).dot(triNormal);
PxU32 nbOutputContacts = 0;
PxVec3 patchNormal(0.f);
PxVec3 closestP;
//mSphereCenter will be in the local space of the triangle mesh
PxReal t, u, v;
PxReal sqDist = distanceSegmentTriangleSquared(p0, p1, currentTriangle.verts[0], currentTriangle.verts[1], currentTriangle.verts[2], t, u, v);
const PxReal sqInflatedRadius = inflatedRadius * inflatedRadius;
PxReal separation = PX_MAX_F32;
bool deferred = false;
//capsule overlap with triangles
if (dist >= 0.f && sqInflatedRadius > sqDist)
{
const PxU8 triFlags = computeTriangleFlags(currentTriangle, triNormal, adjTriIndices,
flags, heightfieldShape.scale, rows, columns, samples);
//printf("triangleIdx %i sqDist %f\n", triangleIdx, sqDist);
//printf("triangleIdx %i triNormal(%f, %f, %f)\n", triangleIdx, triNormal.x, triNormal.y, triNormal.z);
//printf("triangleIdx % i closestP(%f, %f, %f)\n", triangleIdx, closestP.x, closestP.y, closestP.z);
//printf("triangleIdx %i v0(%f, %f, %f)\n", triangleIdx, triLocal.verts[0].x, triLocal.verts[0].y, triLocal.verts[0].z);
//printf("triangleIdx %i v1(%f, %f, %f)\n", triangleIdx, triLocal.verts[1].x, triLocal.verts[1].y, triLocal.verts[1].z);
//printf("triangleIdx %i v2(%f, %f, %f)\n", triangleIdx, triLocal.verts[2].x, triLocal.verts[2].y, triLocal.verts[2].z);
//printf("remapCpuTriangleIdx %i sqInflatedSphereRadius %f sqDist %f faceContact %i\n", remapCpuTriangleIdx, sqInflatedSphereRadius, sqDist, PxU32(faceContact));
//patchNormal = triNormal;
if (selectNormal(u, v, triFlags))
{
patchNormal = triNormal;
}
else
{
if (sqDist > 0.f)
{
//segment intersect with the triangle
patchNormal = triNormal;
}
else
{
//edge normal
const PxVec3 pq = p1 - p0;
const PxVec3 pointOnSegment = p0 + pq * t; // V3ScaleAdd(pq, t, mCapsule.p0);
const PxReal w = 1.f - (u + v);// FSub(FOne(), FAdd(u, v));
const PxVec3 pointOnTriangle = currentTriangle.verts[0] * w + currentTriangle.verts[1] * u + currentTriangle.verts[2] * v;// V3ScaleAdd(p0, w, V3ScaleAdd(p1, u, V3Scale(p2, v)));
patchNormal = (pointOnSegment - pointOnTriangle).getNormalized();// V3Normalize(V3Sub(pointOnSegment, pointOnTriangle));
deferred = true;
}
}
//const PxU32 value = PxU32(faceContact);
//printf("triangleIdx %i faceContact %i\n", triangleIdx, value);
//printf("remapCpuTriangleIdx %i closestP(%f, %f, %f)\n", remapCpuTriangleIdx, closestP.x, closestP.y, closestP.z)
nbOutputContacts = generateContacts(currentTriangle.verts[0], currentTriangle.verts[1],
currentTriangle.verts[2], triNormal, patchNormal, p0, p1, inflatedRadius, contacts, separations);
generateEEContacts(currentTriangle.verts[0], currentTriangle.verts[1],
currentTriangle.verts[2], patchNormal, p0, p1, sqInflatedRadius, contacts, separations, nbOutputContacts);
for (PxU32 i = 0; i < nbOutputContacts; ++i)
{
//transform contact back to the capsule space
contacts[i] = heighfieldToCapsule.transform(contacts[i]);
const PxReal pen = separations[i];
separation = PxMin(separation, pen);
}
}
//printf("%i: triangleIndex %i separation %f\n", threadIdx.x, triangleIdx, separation);
SphereTriIntermediateData* PX_RESTRICT sphereTriInterm = *sphereTriIntermPtr;
PxU32* PX_RESTRICT orderedSphereTriInterm = *orderedSphereTriIntermPtr;
sphereTriInterm[sphereTriPairOffset].gpuTriIndex = triangleIdx;
const PxU32 deferMask = (nbOutputContacts && !deferred) ? 1 << 31 : 0;
orderedSphereTriInterm[sphereTriPairOffsetPadded] = deferMask | triangleIdx;
//PxReal* PX_RESTRICT sphereTriMaxDepth = sphereTriMaxDepthPtr[sphereTriPairOffset];
PxReal* PX_RESTRICT capsuleTriMaxDepth = *sphereTriMaxDepthPtr;
capsuleTriMaxDepth[sphereTriPairOffset] = separation;
SphereTriNormalAndIndex* PX_RESTRICT sphereTriNI = *sphereTriNIPtr;
//rotate the normal into A space
sphereTriNI[sphereTriPairOffset].normal = heighfieldToCapsule.rotate(-patchNormal);
sphereTriNI[sphereTriPairOffset].index = (nbOutputContacts << SphereTriNormalAndIndex::NbContactsShift) + triangleIdx;
//printf("triangleIdx %i sphereTriPairOffset %i\n", triangleIdx, sphereTriPairOffset);
return nbOutputContacts;
}
extern "C" __global__
void sphereHeightfieldNarrowphase(
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxReal* PX_RESTRICT contactDistance,
const PxgShape* PX_RESTRICT gpuShapes,
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair, // per CM
ConvexTriNormalAndIndex** PX_RESTRICT cvxTriNIPtr, // per cvx-tri
ConvexTriContacts** PX_RESTRICT cvxTriContactsPtr, // per cvx-tri
PxReal** PX_RESTRICT cvxTriMaxDepthPtr, // per cvx-tri
SphereTriIntermediateData** PX_RESTRICT cvxTriIntermPtr, // per cvx-tri
PxU32** PX_RESTRICT orderedCvxTriIntermPtr, // per cvx-tri
PxU32** PX_RESTRICT cvxTriSecondPassPairsPtr, // per cvx-tri
PxU8* PX_RESTRICT stackPtr,
PxU32* PX_RESTRICT nbPairsGlobal,
PxU32* PX_RESTRICT nbPaddedPairsGlobal,
PxU32* PX_RESTRICT nbSecondPassPairs,
const PxU32 stackSizeBytes,
ConvexTriContact* tempConvexTriContacts,
PxU32* pTempContactIndex,
PxU32* maxTempMemRequirement,
PxU32* midphasePairsNeeded,
const PxU32 nbContactManagers
)
{
__shared__ ConvexTriNormalAndIndex* sCvxTriNIPtr;
__shared__ ConvexTriContacts* sCvxTriContactsPtr;
__shared__ PxReal* sCvxTriMaxDepthPtr;
__shared__ ConvexTriIntermediateData* sCvxTriIntermPtr;
__shared__ PxU32* sOrderedTriIntermPtr;
__shared__ uint4* sPairsGPU;
__shared__ PxU32* sCvxTriSecondPassPairsPtr;
__shared__ char sShContacts[sizeof(PxVec3) * 128];
PxVec3* shContacts = reinterpret_cast<PxVec3*>(sShContacts);
__shared__ PxReal shSeparations[128];
const PxU32 nbPairs = *nbPairsGlobal;
const PxU32 nbPaddedPairs = *nbPaddedPairsGlobal;
//each block assign the corresponding ptr from the stack memory to sCvxTriNIPtr, sCvxTriContactPtr and sCvxTriMaxDepthPtr
if (threadIdx.x == 0 && threadIdx.y == 0)
{
midphaseAllocate(
&sCvxTriNIPtr,
&sCvxTriContactsPtr,
&sCvxTriMaxDepthPtr,
&sCvxTriIntermPtr,
&sOrderedTriIntermPtr,
&sCvxTriSecondPassPairsPtr,
&sPairsGPU,
stackPtr,
nbPairs,
nbPaddedPairs);
if (blockIdx.x == 0)
{
atomicMax(maxTempMemRequirement, calculateConvexMeshPairMemRequirement() * (*midphasePairsNeeded) + calculateAdditionalPadding(nbContactManagers));
}
}
__syncthreads();
if (threadIdx.x == 0 && threadIdx.y == 0 && blockIdx.x == 0)
{
*cvxTriNIPtr = sCvxTriNIPtr;
*cvxTriContactsPtr = sCvxTriContactsPtr;
*cvxTriMaxDepthPtr = sCvxTriMaxDepthPtr;
*cvxTriIntermPtr = sCvxTriIntermPtr;
*orderedCvxTriIntermPtr = sOrderedTriIntermPtr;
*cvxTriSecondPassPairsPtr = sCvxTriSecondPassPairsPtr;
}
//for (PxU32 globalThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; globalThreadIndex < nbPairs; globalThreadIndex += gridDim.x * blockDim.x)
for (PxU32 i = 0; i < nbPairs; i += gridDim.x * blockDim.x)
{
PxU32 globalThreadIndex = blockIdx.x * blockDim.x + threadIdx.x + i;
PxU32 nbContacts = 0;
PxU32 outMask = 0;
uint4 curPair;
if (globalThreadIndex < nbPairs)
{
// Pair
curPair = sPairsGPU[globalThreadIndex];
PxgContactManagerInput npWorkItem = cmInputs[curPair.x];
PxgShape shape0 = gpuShapes[npWorkItem.shapeRef0];
PxGeometryType::Enum type0 = PxGeometryType::Enum(shape0.type);
if (type0 == PxGeometryType::eSPHERE)
{
nbContacts = sphereHeightfieldNarrowphaseCore(
globalThreadIndex,
cmInputs,
transformCache,
contactDistance,
gpuShapes,
cvxTrimeshPair,
&sCvxTriNIPtr,
&sCvxTriContactsPtr,
&sCvxTriMaxDepthPtr,
&sCvxTriIntermPtr,
&sOrderedTriIntermPtr,
&sCvxTriSecondPassPairsPtr,
curPair,
npWorkItem,
shape0,
nbPairsGlobal,
nbSecondPassPairs,
shContacts[threadIdx.x * 2],
shSeparations[threadIdx.x * 2],
outMask
);
}
else if (type0 == PxGeometryType::eCAPSULE)
{
nbContacts = capsuleHeightfieldNarrowphaseCore(
globalThreadIndex,
cmInputs,
transformCache,
contactDistance,
gpuShapes,
cvxTrimeshPair,
&sCvxTriNIPtr,
&sCvxTriContactsPtr,
&sCvxTriMaxDepthPtr,
&sCvxTriIntermPtr,
&sOrderedTriIntermPtr,
&sCvxTriSecondPassPairsPtr,
curPair,
npWorkItem,
shape0,
&shContacts[threadIdx.x * 2],
&shSeparations[threadIdx.x * 2]
);
}
}
PxU32 outMaskWriteMask = __ballot_sync(FULL_MASK, outMask);
PxU32 count = __popc(outMaskWriteMask);
PxU32 startIndex = 0;
const PxU32 threadIndexInWarp = threadIdx.x & 31;
//Now output contacts!!!!
PxU32 inclusiveSum = warpScan<AddOpPxU32, PxU32>(FULL_MASK, nbContacts);
PxU32 totalContacts = __shfl_sync(FULL_MASK, inclusiveSum, 31);
if (totalContacts)
{
if (count)
{
if (threadIndexInWarp == 0)
startIndex = atomicAdd(nbSecondPassPairs, count);
startIndex = __shfl_sync(FULL_MASK, startIndex, 0);
if (outMask)
{
PxU32 offset = warpScanExclusive(outMaskWriteMask, threadIndexInWarp);
sCvxTriSecondPassPairsPtr[startIndex + offset] = outMask;
}
}
if (threadIndexInWarp == 31)
startIndex = atomicAdd(pTempContactIndex, inclusiveSum);
startIndex = __shfl_sync(FULL_MASK, startIndex, 31);
startIndex += inclusiveSum - nbContacts;
for (PxU32 i = 0, idx = 2 * threadIdx.x; i < nbContacts; ++i, idx++)
{
tempConvexTriContacts[startIndex + i].contact_sepW = make_float4(shContacts[idx].x, shContacts[idx].y,
shContacts[idx].z, shSeparations[idx]);
}
if (nbContacts)
{
PxU32 cmIdx = curPair.x;
PxU32 testOffset = curPair.z;
ConvexMeshPair& pair = cvxTrimeshPair[cmIdx];
PxU32 sphereTriPairOffset = pair.startIndex + testOffset;
sCvxTriContactsPtr[sphereTriPairOffset].index = startIndex;
}
}
}
}

View File

@@ -0,0 +1,975 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include <cuda.h>
#include <cuda_runtime.h>
#include <assert.h>
#include "foundation/PxVec3.h"
#include "foundation/PxPlane.h"
#include "foundation/PxTransform.h"
#include "geometry/PxMeshScale.h"
#include "geometry/PxGeometry.h"
#include "PxgCommonDefines.h"
#include "utils.cuh"
#include "reduction.cuh"
#include "sphereTriangle.cuh"
#include "convexTriangle.cuh"
#include "capsuleTriangle.cuh"
#include "midphaseAllocate.cuh"
#include "PxgNpKernelIndices.h"
#include "triangleMesh.cuh"
using namespace physx;
extern "C" __host__ void initNarrowphaseKernels2() {}
// Triangle scaling, triangle verts in shape space
__device__ void getShapeSpaceVerts(PxVec3& v0, PxVec3& v1, PxVec3& v2, const MeshScaling& meshScale, PxU32 triangleIdx, const uint4* PX_RESTRICT trimeshTriIndices, const float4* PX_RESTRICT trimeshVerts)
{
const uint4 triIndices = trimeshTriIndices[triangleIdx];
const PxU32 triVertIdx0 = triIndices.x;
const PxU32 triVertIdx1 = triIndices.y;
const PxU32 triVertIdx2 = triIndices.z;
const float4 triV0_f4 = trimeshVerts[triVertIdx0];
const float4 triV1_f4 = trimeshVerts[triVertIdx1];
const float4 triV2_f4 = trimeshVerts[triVertIdx2];
PxVec3 triV0 = PxLoad3(triV0_f4);
PxVec3 triV1 = PxLoad3(triV1_f4);
PxVec3 triV2 = PxLoad3(triV2_f4);
meshScale.getShapeSpaceVert(triV0, triV1, triV2, v0, v1, v2);
}
__device__ void convexTrimeshNarrowphaseCore(
PxU32 globalWarpIndex,
PxgShape& convexShape,
PxgShape& trimeshShape,
PxU32 transformCacheRef0,
PxU32 transformCacheRef1,
const uint4 curPair,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxReal* PX_RESTRICT contactDistance,
const PxgShape* PX_RESTRICT gpuShapes,
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair, // per CM
ConvexTriNormalAndIndex** PX_RESTRICT cvxTriNIPtr, // per cvx-tri
ConvexTriContacts** PX_RESTRICT cvxTriContactsPtr, // per cvx-tri
PxReal** PX_RESTRICT cvxTriMaxDepthPtr, // per cvx-tri
ConvexTriIntermediateData** PX_RESTRICT cvxTriIntermPtr, // per cvx-tri
PxU32** PX_RESTRICT orderedCvxTriIntermPtr, // per cvx-tri
PxU32** PX_RESTRICT cvxTriSecondPassPairPtr, // per cvx-tri
const uint4* PX_RESTRICT pairsGPU,
PxU32* nbPairsGlobal,
PxU32* nbSecondPassPairs,
volatile PxU32* s_WarpSharedMemory,
ConvexTriContact* tempConvexTriContacts,
const PxU32 tempContactSizeBytes,
PxU32* pTempContactIndex
)
{
// 1 warp is doing 1 triangle - convex pair here.
ConvexMeshScratch* s_scratch = (ConvexMeshScratch*)s_WarpSharedMemory;
PxU32 cmIdx = curPair.x;
PxU32 triangleIdx = curPair.y;
PxU32 testOffset = curPair.z;
__shared__ __align__(16) char sConvexTransformCached[sizeof(PxsCachedTransform) * NP_TRIMESH_WARPS_PER_BLOCK];
__shared__ __align__(16) char sTrimeshTransformCached[sizeof(PxsCachedTransform) * NP_TRIMESH_WARPS_PER_BLOCK];
PxsCachedTransform* convexTransformCached = reinterpret_cast<PxsCachedTransform*>(sConvexTransformCached);
PxsCachedTransform* trimeshTransformCached = reinterpret_cast<PxsCachedTransform*>(sTrimeshTransformCached);
if (threadIdx.x < sizeof(PxsCachedTransform) / sizeof(PxU32))
{
reinterpret_cast<PxU32*>(&convexTransformCached[threadIdx.y])[threadIdx.x] =
reinterpret_cast<const PxU32*>(transformCache + transformCacheRef0)[threadIdx.x];
reinterpret_cast<PxU32*>(&trimeshTransformCached[threadIdx.y])[threadIdx.x] =
reinterpret_cast<const PxU32*>(transformCache + transformCacheRef1)[threadIdx.x];
}
ConvexMeshPair& pair = cvxTrimeshPair[cmIdx];
PxU32 convexTriPairOffset = pair.startIndex + testOffset;
PxU32 convexTriPairOffsetPadded = pair.roundedStartIndex + testOffset;
__syncwarp();
if (threadIdx.x < 7)
{
reinterpret_cast<PxU32*>(&s_scratch->convexScale)[threadIdx.x] = reinterpret_cast<PxU32*>(&convexShape.scale)[threadIdx.x];
}
if (threadIdx.x == 0)
{
// convexTransform/trimeshTransform already include bodyTM/relTM
s_scratch->trimeshToConvexTransform = convexTransformCached[threadIdx.y].transform.transformInv(trimeshTransformCached[threadIdx.y].transform);
// Shapes
//s_scratch->contactDist = convexShape.contactOffset + trimeshShape.contactOffset;
s_scratch->contactDist = contactDistance[transformCacheRef0] + contactDistance[transformCacheRef1];
const PxU8* convexPtrA = reinterpret_cast<const PxU8 *>(convexShape.hullOrMeshPtr);
s_scratch->convexPtrA = convexPtrA;
const float4 hull0_centerOfMass_f4 = *reinterpret_cast<const float4 *>(convexPtrA);
const PxVec3 hull0_centerOfMass(hull0_centerOfMass_f4.x, hull0_centerOfMass_f4.y, hull0_centerOfMass_f4.z);
// Transform CoM into shape space
PxVec3 shapeSpaceCenterOfMass0 = vertex2Shape(hull0_centerOfMass, convexShape.scale.scale, convexShape.scale.rotation);
s_scratch->convexCenterOfMass = shapeSpaceCenterOfMass0;
convexPtrA += sizeof(float4);
const uint4 tmp = *((uint4*)convexPtrA);
const PxU32 polyData0_NbEdgesNbHullVerticesNbPolygons = tmp.x;
s_scratch->nbEdgesNbHullVerticesNbPolygons = polyData0_NbEdgesNbHullVerticesNbPolygons;
convexPtrA += sizeof(uint4);
// Geometries : Triangle Mesh
const PxU8 * trimeshGeomPtr = reinterpret_cast<const PxU8 *>(trimeshShape.hullOrMeshPtr);
const uint4* PX_RESTRICT trimeshTriAdjacencies;
readTriangleMesh(trimeshGeomPtr, s_scratch->trimeshVerts, s_scratch->trimeshTriIndices, trimeshTriAdjacencies, s_scratch->trimeshFaceRemap);
// TODO: shuffle this across threads (reading uint4 seq)
// later shuffle adjacency work and shuffle back adjNormals
uint4 triAdjTriIndices = trimeshTriAdjacencies[triangleIdx];
s_scratch->triAdjTrisIdx[0] = triAdjTriIndices.x;
s_scratch->triAdjTrisIdx[1] = triAdjTriIndices.y;
s_scratch->triAdjTrisIdx[2] = triAdjTriIndices.z;
}
__syncwarp();
if (threadIdx.x < 3)
{
const PxMeshScale& scale = trimeshShape.scale;
bool flipNormal = ((scale.scale.x * scale.scale.y * scale.scale.z) < 0.0f);
PxU32 writeIndex = threadIdx.x;
if (threadIdx.x > 0 && flipNormal)
writeIndex = threadIdx.x ^ 3;
const PxU32 triVertIdx = reinterpret_cast<const PxU32*>(&s_scratch->trimeshTriIndices[triangleIdx])[threadIdx.x];
const float4 triV_f4 = s_scratch->trimeshVerts[triVertIdx];
PxVec3 triV = PxLoad3(triV_f4);
triV = vertex2Shape(triV, trimeshShape.scale.scale, trimeshShape.scale.rotation);
PxVec3 triLocVerts = s_scratch->trimeshToConvexTransform.transform(triV);
s_scratch->triLocVerts[writeIndex] = triLocVerts;
}
__syncwarp();
if (threadIdx.x == 0)
{
PxVec3 triangleLocNormal;
triangleLocNormal = (s_scratch->triLocVerts[1] - s_scratch->triLocVerts[0]).cross(s_scratch->triLocVerts[2] - s_scratch->triLocVerts[0]).getNormalized();
s_scratch->triangleLocNormal = triangleLocNormal;
}
__syncwarp();
const PxU32 remapCpuTriangleIdx = s_scratch->trimeshFaceRemap[triangleIdx];
convexTriangleContactGen(
s_WarpSharedMemory, s_scratch, convexTriPairOffset, convexTriPairOffsetPadded, remapCpuTriangleIdx, triangleIdx, globalWarpIndex,
cvxTriNIPtr, cvxTriContactsPtr, cvxTriMaxDepthPtr, cvxTriIntermPtr, orderedCvxTriIntermPtr, cvxTriSecondPassPairPtr, nbSecondPassPairs,
tempConvexTriContacts, tempContactSizeBytes/sizeof(ConvexTriContact), pTempContactIndex);
}
__device__ PxU32 sphereTrimeshNarrowphaseCore(
PxU32 globalThreadIndex,
const PxgContactManagerInput& npWorkItem,
const PxgShape& sphereShape,
const PxgShape& trimeshShape,
const uint4 curPair,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxReal* PX_RESTRICT contactDistance,
SphereMeshPair* PX_RESTRICT sphereTrimeshPair, // per CM
SphereTriNormalAndIndex** PX_RESTRICT sphereTriNIPtr, // per sphere-tri
SphereTriContacts** PX_RESTRICT sphereTriContactsPtr, // per sphere-tri
PxReal** PX_RESTRICT sphereTriMaxDepthPtr, // per sphere-tri
SphereTriIntermediateData** PX_RESTRICT sphereTriIntermPtr, // per sphere-tri
PxU32** PX_RESTRICT orderedSphereTriIntermPtr, // per sphere-tri
PxU32** PX_RESTRICT sphereTriSecondPassPairPtr, // per sphere-tri
PxU32* nbPairsGlobal,
PxU32* nbSecondPassPairs,
PxVec3& outContact,
PxReal& outSep,
PxU32& outMask
)
{
// Pair, stackPtr in convexMeshMidphase kernel
PxU32 cmIdx = curPair.x;
PxU32 triangleIdx = curPair.y;
PxU32 testOffset = curPair.z;
PxsCachedTransform sphereTransformCached = transformCache[npWorkItem.transformCacheRef0];
PxsCachedTransform trimeshTransformCached = transformCache[npWorkItem.transformCacheRef1];
SphereMeshPair& pair = sphereTrimeshPair[cmIdx];
PxU32 sphereTriPairOffset = pair.startIndex + testOffset;
PxU32 sphereTriPairOffsetPadded = pair.roundedStartIndex + testOffset;
// Shapes
const PxReal contactDist = contactDistance[npWorkItem.transformCacheRef0] + contactDistance[npWorkItem.transformCacheRef1];
const PxQuat trimeshScaleRot = trimeshShape.scale.rotation;
const PxReal sphereRadius = sphereShape.scale.scale.x;
const PxVec3 trimeshScale = trimeshShape.scale.scale;
//Geometries : Sphere
const PxVec3 sphereCenterInTriMesh = trimeshTransformCached.transform.transformInv(sphereTransformCached.transform.p);
// Geometries : Triangle Mesh
const PxU8 * trimeshGeomPtr = reinterpret_cast<const PxU8 *>(trimeshShape.hullOrMeshPtr);
const float4 * trimeshVerts;
const uint4 * trimeshTriIndices;
const uint4* PX_RESTRICT trimeshTriAdjacencies;
const PxU32* PX_RESTRICT trimeshFaceRemap;
readTriangleMesh(trimeshGeomPtr, trimeshVerts, trimeshTriIndices, trimeshTriAdjacencies, trimeshFaceRemap);
const PxMeshScale& scale = trimeshShape.scale;
MeshScaling meshScale(scale.scale, scale.rotation);
PxVec3 v0, v1, v2;
getShapeSpaceVerts(v0, v1, v2, meshScale, triangleIdx, trimeshTriIndices, trimeshVerts);
const PxVec3 n = (v1 - v0).cross(v2 - v0).getNormalized();
uint4 triAdjTriIndices = trimeshTriAdjacencies[triangleIdx];
if (meshScale.flipNormal)
PxSwap(triAdjTriIndices.x, triAdjTriIndices.z);
const PxU32 remapCpuTriangleIdx = trimeshFaceRemap[triangleIdx];
const PxReal inflatedRadius = sphereRadius + contactDist;
const PxReal d = v0.dot(n);
const PxReal dist0 = sphereCenterInTriMesh.dot(n) - d;
PxReal separation = PX_MAX_F32;
PxU32 nbOutputContacts = 0;
bool delayContacts = false;
bool edgeContact = false;
PxVec3 patchNormal(0.f);
if (dist0 >= 0)
{
PxVec3 closestP;
//mSphereCenter will be in the local space of the triangle mesh
bool generateContact = false;
bool faceContact = false;
PxU32 mask;
const PxReal sqDist = distancePointTriangleSquared(sphereCenterInTriMesh, v0, v1, v2, triAdjTriIndices, closestP, generateContact, faceContact, &mask);
const PxReal sqInflatedSphereRadius = inflatedRadius * inflatedRadius;
//sphere overlap with triangles
if (sqInflatedSphereRadius > sqDist && generateContact)
{
//printf("remapCpuTriangleIdx %i sqInflatedSphereRadius %f sqDist %f faceContact %i\n", remapCpuTriangleIdx, sqInflatedSphereRadius, sqDist, PxU32(faceContact));
patchNormal = n;
if (!faceContact)
patchNormal = (sphereCenterInTriMesh - closestP).getNormalized();
//printf("remapCpuTriangleIdx %i closestP(%f, %f, %f)\n", remapCpuTriangleIdx, closestP.x, closestP.y, closestP.z);
const PxReal cosTheta = patchNormal.dot(n);
const PxReal tolerance = 0.996f;//around 5 degree
//two normal's projection less than 5 degree, generate contacts
delayContacts = cosTheta <= tolerance;
if (delayContacts)
{
//delay contacts
//PxU32* PX_RESTRICT sphereTriSecondPairPass = *sphereTriSecondPassPairPtr;
//const PxU32 startIndex = atomicAdd(nbSecondPassPairs, 1);
// PT: no need to compute the mask, we got it for free from distancePointTriangleSquared
//PxU32 mask = getTriangleActiveMask(closestP, v0, v1, v2);
//sphereTriSecondPairPass[startIndex] = mask | globalThreadIndex;
outMask = mask | globalThreadIndex;
// VR: is an edge contact?
edgeContact = ConvexTriIntermediateData::isEdgeContact(mask);
}
nbOutputContacts = 1;
separation = PxSqrt(sqDist);
// Generate contacts
//SphereTriContacts* PX_RESTRICT sphereTriContacts = *sphereTriContactsPtr;
//float4* contactBuf = reinterpret_cast<float4 *>(sphereTriContacts + sphereTriPairOffset);
//sphere center in sphere local space
//contactBuf[0] = make_float4(0.f, 0.f, 0.f, separation);
//normal = patchNormal;
outSep = separation;
outContact = PxVec3(0.f);
}
}
PxReal* PX_RESTRICT sphereTriMaxDepth = *sphereTriMaxDepthPtr;
sphereTriMaxDepth[sphereTriPairOffset] = separation;
SphereTriIntermediateData* PX_RESTRICT sphereTriInterm = *sphereTriIntermPtr;
PxU32* PX_RESTRICT orderedSphereTriInterm = *orderedSphereTriIntermPtr;
sphereTriInterm[sphereTriPairOffset].gpuTriIndex = triangleIdx;
// VR: bits 30 and 31 are used for the search flags
assert(triangleIdx < (1 << 30));
PxU32 orderedTriangleIdx = triangleIdx;
if (nbOutputContacts)
if (!delayContacts) // not dalayed -> face contact
orderedTriangleIdx |= TriangleSearchFlags::eFACE_CONTACT;
else if (edgeContact)
orderedTriangleIdx |= TriangleSearchFlags::eEDGE_CONTACT;
orderedSphereTriInterm[sphereTriPairOffsetPadded] = orderedTriangleIdx;
SphereTriNormalAndIndex* PX_RESTRICT sphereTriNI = *sphereTriNIPtr;
PxU32 delayContactMask = delayContacts ? SphereTriNormalAndIndex::DeferredContactMask : 0;
//rotate the normal into A space
PxVec3 worldNormal = trimeshTransformCached.transform.rotate(-patchNormal);
sphereTriNI[sphereTriPairOffset].normal = sphereTransformCached.transform.rotateInv(worldNormal);
sphereTriNI[sphereTriPairOffset].index = delayContactMask + (nbOutputContacts << SphereTriNormalAndIndex::NbContactsShift) + remapCpuTriangleIdx;
//printf("sphereTriPairOffset %i nbOutputContacts%i \n", sphereTriPairOffset, nbOutputContacts);
return nbOutputContacts;
}
__device__ PxU32 capsuleTrimeshNarrowphaseCore(
PxU32 globalThreadIndex,
const PxgContactManagerInput& npWorkItem,
const PxgShape& capsuleShape,
const PxgShape& trimeshShape,
const uint4 curPair,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxReal* PX_RESTRICT contactDistance,
SphereMeshPair* PX_RESTRICT sphereTrimeshPair, // per CM
SphereTriNormalAndIndex** PX_RESTRICT sphereTriNIPtr, // per sphere-tri
SphereTriContacts** PX_RESTRICT sphereTriContactsPtr, // per sphere-tri
PxReal** PX_RESTRICT sphereTriMaxDepthPtr, // per sphere-tri
SphereTriIntermediateData** PX_RESTRICT sphereTriIntermPtr, // per sphere-tri
PxU32** PX_RESTRICT orderedSphereTriIntermPtr, // per sphere-tri
PxU32** PX_RESTRICT sphereTriSecondPassPairPtr, // per sphere-tri
PxU32* nbSecondPassPairs,
PxVec3* contacts,
PxReal* separations,
PxU32& outMask
)
{
// Pair, stackPtr in convexMeshMidphase kernel
PxU32 cmIdx = curPair.x;
PxU32 triangleIdx = curPair.y;
PxU32 testOffset = curPair.z;
PxsCachedTransform capsuleTransformCached = transformCache[npWorkItem.transformCacheRef0];
PxsCachedTransform trimeshTransformCached = transformCache[npWorkItem.transformCacheRef1];
SphereMeshPair& pair = sphereTrimeshPair[cmIdx];
PxU32 sphereTriPairOffset = pair.startIndex + testOffset;
PxU32 sphereTriPairOffsetPadded = pair.roundedStartIndex + testOffset;
// Shapes
const PxReal contactDist = contactDistance[npWorkItem.transformCacheRef0] + contactDistance[npWorkItem.transformCacheRef1];
const PxQuat trimeshScaleRot = trimeshShape.scale.rotation;
const PxReal capsuleRadius = capsuleShape.scale.scale.y;
const PxReal capsuleHalfHeight = capsuleShape.scale.scale.x;
const PxVec3 trimeshScale = trimeshShape.scale.scale;
//Geometries : Capsule
const PxTransform meshToCapsule = capsuleTransformCached.transform.transformInv(trimeshTransformCached.transform);
//Capsule in triangle mesh space
const PxVec3 tmp = capsuleTransformCached.transform.q.getBasisVector0() * capsuleHalfHeight;
const PxVec3 capsuleCenterInMesh = trimeshTransformCached.transform.transformInv(capsuleTransformCached.transform.p);
const PxVec3 capsuleDirInMesh = trimeshTransformCached.transform.rotateInv(tmp);
//Geometries : Capsule in mesh local space
const PxVec3 p0 = capsuleCenterInMesh + capsuleDirInMesh;
const PxVec3 p1 = capsuleCenterInMesh - capsuleDirInMesh;
// Geometries : Triangle Mesh
const PxU8 * trimeshGeomPtr = reinterpret_cast<const PxU8 *>(trimeshShape.hullOrMeshPtr);
const float4 * trimeshVerts;
const uint4 * trimeshTriIndices;
const uint4* PX_RESTRICT trimeshTriAdjacencies;
readTriangleMesh(trimeshGeomPtr, trimeshVerts, trimeshTriIndices, trimeshTriAdjacencies);
const PxMeshScale& scale = trimeshShape.scale;
MeshScaling meshScale(scale.scale, scale.rotation);
PxVec3 v0, v1, v2;
getShapeSpaceVerts(v0, v1, v2, meshScale, triangleIdx, trimeshTriIndices, trimeshVerts);
const PxVec3 triNormal = (v1 - v0).cross(v2 - v0).getNormalized();
uint4 triAdjTrisIdx = trimeshTriAdjacencies[triangleIdx];
if (meshScale.flipNormal)
PxSwap(triAdjTrisIdx.x, triAdjTrisIdx.z);
const PxReal inflatedRadius = capsuleRadius + contactDist;
//const PxReal d = v0.dot(n);
//const PxReal dist0 = sphereCenterInTriMesh.dot(n) - d;
const PxReal dist = (capsuleCenterInMesh - v0).dot(triNormal);
PxReal separation = PX_MAX_F32;
PxU32 nbOutputContacts = 0;
PxVec3 patchNormal(0.f);
bool delayContacts = false;
bool edgeContact = false;
if (dist >= 0)
{
//PxVec3 closestP;
PxReal t, u, v;
PxReal sqDist = distanceSegmentTriangleSquared(p0, p1, v0, v1, v2, t, u, v);
const PxReal sqInflatedRadius = inflatedRadius * inflatedRadius;
//capsule overlap with triangles
if (sqInflatedRadius > sqDist)
{
// VR: I made this similar to sphere-trimesh
// and I left the old variant for the reference
#if 1
if (sqDist < FLT_EPSILON)
{
//segment intersect with the triangle
patchNormal = triNormal;
}
else
{
const PxVec3 pointOnSegment = p0 + (p1 - p0) * t;
const PxVec3 pointOnTriangle = v0 * (1.f - u - v) + v1 * u + v2 * v;
patchNormal = (pointOnSegment - pointOnTriangle).getNormalized();
const PxReal cosTheta = patchNormal.dot(triNormal);
const PxReal tolerance = 0.996f;//around 5 degree
//printf("pointOnSegment: (%g %g %g); pointOnTriangle: (%g %g %g); cosTheta: %g\n", pointOnSegment.x, pointOnSegment.y, pointOnSegment.z, pointOnTriangle.x, pointOnTriangle.y, pointOnTriangle.z, cosTheta);
//two normal's projection less than 5 degree, generate contacts
delayContacts = cosTheta <= tolerance;
}
if (delayContacts)
{
PxU32 mask = getTriangleActiveMaskFromNormal(v0, v1, v2, patchNormal);
outMask = mask | globalThreadIndex;
//const PxVec3 pn = patchNormal;
//printf("pn: (%g %g %g); mask: %x\n", pn.x, pn.y, pn.z, mask);
// is an edge contact?
edgeContact = ConvexTriIntermediateData::isEdgeContact(mask);
}
#else
PxU32 triFlags = 0;
triFlags |= (triAdjTrisIdx.x != BOUNDARY && (triAdjTrisIdx.x & NONCONVEX_FLAG) ? 0 : 1 << 3);
triFlags |= (triAdjTrisIdx.y != BOUNDARY && (triAdjTrisIdx.y & NONCONVEX_FLAG) ? 0 : 1 << 4);
triFlags |= (triAdjTrisIdx.z != BOUNDARY && (triAdjTrisIdx.z & NONCONVEX_FLAG) ? 0 : 1 << 5);
if (selectNormal(u, v, triFlags))
{
patchNormal = triNormal;
}
else
{
if (sqDist < 1e-6f)
{
//segment intersect with the triangle
patchNormal = triNormal;
}
else
{
//edge normal
const PxVec3 pq = p1 - p0;
const PxVec3 pointOnSegment = p0 + pq * t; // V3ScaleAdd(pq, t, mCapsule.p0);
const PxReal w = 1.f - (u + v);// FSub(FOne(), FAdd(u, v));
const PxVec3 pointOnTriangle = v0 * w + v1 * u + v2 * v;// V3ScaleAdd(p0, w, V3ScaleAdd(p1, u, V3Scale(p2, v)));
patchNormal = (pointOnSegment - pointOnTriangle).getNormalized();// V3Normalize(V3Sub(pointOnSegment, pointOnTriangle));
delayContacts = true;
}
}
#endif
nbOutputContacts = generateContacts(v0, v1, v2, triNormal, patchNormal, p0, p1, inflatedRadius, contacts, separations);
generateEEContacts(v0, v1, v2, patchNormal, p0, p1, sqInflatedRadius, contacts, separations, nbOutputContacts);
for (PxU32 i = 0; i < nbOutputContacts; ++i)
{
//transform contact back to the capsule space
contacts[i] = meshToCapsule.transform(contacts[i]);
const PxReal pen = separations[i];
separation = PxMin(separation, pen);
}
}
}
SphereTriIntermediateData* PX_RESTRICT sphereTriInterm = *sphereTriIntermPtr;
PxU32* PX_RESTRICT orderedSphereTriInterm = *orderedSphereTriIntermPtr;
sphereTriInterm[sphereTriPairOffset].gpuTriIndex = triangleIdx;
//if (nbOutputContacts)
// printf("triangleIdx: %d; nbOutputContacts: %d; delayContacts %d; edgeContact: %d\n", triangleIdx, nbOutputContacts, delayContacts, edgeContact);
// VR: bits 30 and 31 are used for the search flags
assert(triangleIdx < (1 << 30));
PxU32 orderedTriangleIdx = triangleIdx;
if (nbOutputContacts)
if (!delayContacts) // not dalayed -> face contact
orderedTriangleIdx |= TriangleSearchFlags::eFACE_CONTACT;
else if (edgeContact)
orderedTriangleIdx |= TriangleSearchFlags::eEDGE_CONTACT;
orderedSphereTriInterm[sphereTriPairOffsetPadded] = orderedTriangleIdx;
/*sphereTriInterm[sphereTriPairOffset].triAdjTrisIds[0] = triAdjTrisIdx.x;
sphereTriInterm[sphereTriPairOffset].triAdjTrisIds[1] = triAdjTrisIdx.y;
sphereTriInterm[sphereTriPairOffset].triAdjTrisIds[2] = triAdjTrisIdx.z;*/
//PxReal* PX_RESTRICT sphereTriMaxDepth = sphereTriMaxDepthPtr[sphereTriPairOffset];
PxReal* PX_RESTRICT capsuleTriMaxDepth = *sphereTriMaxDepthPtr;
capsuleTriMaxDepth[sphereTriPairOffset] = separation;
SphereTriNormalAndIndex* PX_RESTRICT sphereTriNI = *sphereTriNIPtr;
PxU32 delayContactMask = delayContacts ? SphereTriNormalAndIndex::DeferredContactMask : 0;
//rotate the normal into A space
sphereTriNI[sphereTriPairOffset].normal = meshToCapsule.rotate(-patchNormal);
sphereTriNI[sphereTriPairOffset].index = delayContactMask + (nbOutputContacts << SphereTriNormalAndIndex::NbContactsShift) + triangleIdx;
////printf("sphereTriPairOffset %i nbOutputContacts%i \n", sphereTriPairOffset, nbOutputContacts);
return nbOutputContacts;
}
extern "C" __global__
//__launch_bounds__(NP_TRIMESH_WARPS_PER_BLOCK * WARP_SIZE, 32 / NP_TRIMESH_WARPS_PER_BLOCK)
void convexTrimeshNarrowphase(
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxReal* PX_RESTRICT contactDistance,
const PxgShape* PX_RESTRICT gpuShapes,
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair, // per CM
ConvexTriNormalAndIndex** PX_RESTRICT cvxTriNIPtr, // per cvx-tri
ConvexTriContacts** PX_RESTRICT cvxTriContactsPtr, // per cvx-tri
PxReal** PX_RESTRICT cvxTriMaxDepthPtr, // per cvx-tri
ConvexTriIntermediateData** PX_RESTRICT cvxTriIntermPtr, // per cvx-tri
PxU32** PX_RESTRICT orderedCvxTriIntermPtr, // per cvx-tri
PxU32** PX_RESTRICT cvxTriSecondPassPairsPtr, // per cvx-tri
PxU8* PX_RESTRICT stackPtr,
PxU32* PX_RESTRICT nbPairsGlobal,
PxU32* PX_RESTRICT nbPaddedPairsGlobal,
PxU32* PX_RESTRICT nbSecondPassPairs,
const PxU32 stackSizeBytes,
ConvexTriContact* tempConvexTriContacts,
PxU32* pTempContactIndex,
PxU32* maxTempMemRequirement,
PxU32* midPhasePairsNeeded,
const PxU32 nbContactManagers
)
{
__shared__ ConvexTriNormalAndIndex* sCvxTriNIPtr;
__shared__ ConvexTriContacts* sCvxTriContactsPtr;
__shared__ PxReal* sCvxTriMaxDepthPtr;
__shared__ ConvexTriIntermediateData* sCvxTriIntermPtr;
__shared__ PxU32* sOrderedCvxTriIntermPtr;
__shared__ uint4* sPairsGPU;
__shared__ PxU32* sCvxTriSecondPassPairsPtr;
const PxU32 warpsPerBlock = NP_TRIMESH_WARPS_PER_BLOCK;
__shared__ volatile PxU32 sharedMem[warpsPerBlock][WARP_SIZE * 16];
volatile PxU32* s_WarpSharedMemory = sharedMem[threadIdx.y];
// we don't need to check overflow here anymore, the midphase checks it.
const PxU32 nbPairs = *nbPairsGlobal;
const PxU32 nbPaddedPairs = *nbPaddedPairsGlobal;
//each block assign the corresponding ptr from the stack memory to sCvxTriNIPtr, sCvxTriContactPtr and sCvxTriMaxDepthPtr
if (threadIdx.x == 0 && threadIdx.y == 0)
{
midphaseAllocate(&sCvxTriNIPtr, &sCvxTriContactsPtr, &sCvxTriMaxDepthPtr, &sCvxTriIntermPtr, &sOrderedCvxTriIntermPtr, &sCvxTriSecondPassPairsPtr, &sPairsGPU, stackPtr, nbPairs, nbPaddedPairs);
if (blockIdx.x == 0)
{
atomicMax(maxTempMemRequirement, calculateConvexMeshPairMemRequirement() * (*midPhasePairsNeeded) + calculateAdditionalPadding(nbContactManagers));
}
}
__syncthreads();
// the first block writes back the pointers to global memory.
if (threadIdx.x == 0 && threadIdx.y == 0 && blockIdx.x == 0)
{
*cvxTriNIPtr = sCvxTriNIPtr;
*cvxTriContactsPtr = sCvxTriContactsPtr;
*cvxTriMaxDepthPtr = sCvxTriMaxDepthPtr;
*cvxTriIntermPtr = sCvxTriIntermPtr;
*orderedCvxTriIntermPtr = sOrderedCvxTriIntermPtr;
*cvxTriSecondPassPairsPtr = sCvxTriSecondPassPairsPtr;
}
// 1 warp deals with 1 convexMeshPair - so 1 convex against 1 mesh triangle.
for (PxU32 globalWarpIndex = blockIdx.x * blockDim.y + threadIdx.y; globalWarpIndex < nbPairs; globalWarpIndex += gridDim.x * blockDim.y)
{
uint4 curPair = sPairsGPU[globalWarpIndex];
const PxU32 cmIdx = curPair.x;
PxgContactManagerInput npWorkItem;
PxgContactManagerInput_ReadWarp(npWorkItem, cmInputs, cmIdx);
PxU32 transformCacheRef0 = npWorkItem.transformCacheRef0;
PxU32 transformCacheRef1 = npWorkItem.transformCacheRef1;
PxU32 shapeRef0 = npWorkItem.shapeRef0;
PxU32 shapeRef1 = npWorkItem.shapeRef1;
__shared__ __align__(16) char sShape0[sizeof(PxgShape) * NP_TRIMESH_WARPS_PER_BLOCK];
__shared__ __align__(16) char sShape1[sizeof(PxgShape) * NP_TRIMESH_WARPS_PER_BLOCK];
PxgShape* shape0 = reinterpret_cast<PxgShape*>(sShape0);
PxgShape* shape1 = reinterpret_cast<PxgShape*>(sShape1);
if (threadIdx.x < sizeof(PxgShape) / sizeof(PxU32))
{
reinterpret_cast<PxU32*>(&shape0[threadIdx.y])[threadIdx.x] = reinterpret_cast<const PxU32*>(gpuShapes + shapeRef0)[threadIdx.x];
reinterpret_cast<PxU32*>(&shape1[threadIdx.y])[threadIdx.x] = reinterpret_cast<const PxU32*>(gpuShapes + shapeRef1)[threadIdx.x];
}
PxgShape* shape = &shape0[threadIdx.y];
PxgShape* trimeshShape = &shape1[threadIdx.y];
__syncwarp();
bool flip = shape0[threadIdx.y].type == PxGeometryType::eTRIANGLEMESH;
if (flip)
{
PxSwap(transformCacheRef0, transformCacheRef1);
PxSwap(shapeRef0, shapeRef1);
shape = &shape1[threadIdx.y];
trimeshShape = &shape0[threadIdx.y];
}
convexTrimeshNarrowphaseCore(
globalWarpIndex,
*shape,
*trimeshShape,
transformCacheRef0,
transformCacheRef1,
curPair,
transformCache,
contactDistance,
gpuShapes,
cvxTrimeshPair,
&sCvxTriNIPtr,
&sCvxTriContactsPtr,
&sCvxTriMaxDepthPtr,
&sCvxTriIntermPtr,
&sOrderedCvxTriIntermPtr,
&sCvxTriSecondPassPairsPtr,
sPairsGPU,
nbPairsGlobal,
nbSecondPassPairs,
s_WarpSharedMemory,
tempConvexTriContacts,
stackSizeBytes,
pTempContactIndex
);
}
}
extern "C" __global__
void sphereTrimeshNarrowphase(
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxReal* PX_RESTRICT contactDistance,
const PxgShape* PX_RESTRICT gpuShapes,
SphereMeshPair* PX_RESTRICT sphereTrimeshPair, // per CM
SphereTriNormalAndIndex** PX_RESTRICT sphereTriNIPtr, // per sphere-tri
SphereTriContacts** PX_RESTRICT sphereTriContactsPtr, // per sphere-tri
PxReal** PX_RESTRICT sphereTriMaxDepthPtr, // per sphere-tri
SphereTriIntermediateData** PX_RESTRICT sphereTriIntermPtr, // per sphere-tri
PxU32** PX_RESTRICT orderedSphereTriIntermPtr, // per sphere-tri
PxU32** PX_RESTRICT sphereTriSecondPassPairsPtr, // per sphere-tri
PxU8* PX_RESTRICT stackPtr,
PxU32* PX_RESTRICT nbPairsGlobal,
PxU32* PX_RESTRICT nbPaddedPairsGlobal,
PxU32* PX_RESTRICT nbSecondPassPairs,
const PxU32 stackSizeBytes,
ConvexTriContact* tempConvexTriContacts,
PxU32* pTempContactIndex,
PxU32* maxTempMemRequirement,
PxU32* midphasePairsNeeded,
const PxU32 nbContactManagers
)
{
//the pointer is shared memory. However, the memory we are writing to is in global memory
__shared__ SphereTriNormalAndIndex* sSphereTriNIPtr;
__shared__ SphereTriContacts* sSphereTriContactsPtr;
__shared__ PxReal* sSphereTriMaxDepthPtr;
__shared__ SphereTriIntermediateData* sSphereTriIntermPtr;
__shared__ PxU32* sOrderedSphereTriIntermPtr;
__shared__ uint4* sPairsGPU;
__shared__ PxU32* sSphereTriSecondPassPairsPtr;
__shared__ char sContacts[sizeof(PxVec3) * 128];
PxVec3* shContacts = reinterpret_cast<PxVec3*>(sContacts);
__shared__ PxReal shSeparations[128];
const PxU32 nbPairs = *nbPairsGlobal;
const PxU32 nbPaddedPairs = *nbPaddedPairsGlobal;
if (threadIdx.x == 0 && threadIdx.y == 0)
{
midphaseAllocate(&sSphereTriNIPtr, &sSphereTriContactsPtr, &sSphereTriMaxDepthPtr, &sSphereTriIntermPtr, &sOrderedSphereTriIntermPtr, &sSphereTriSecondPassPairsPtr, &sPairsGPU, stackPtr, nbPairs, nbPaddedPairs);
if (blockIdx.x == 0)
{
atomicMax(maxTempMemRequirement, calculateConvexMeshPairMemRequirement() * (*midphasePairsNeeded) + calculateAdditionalPadding(nbContactManagers));
}
}
__syncthreads();
if (threadIdx.x == 0 && threadIdx.y == 0 && blockIdx.x == 0)
{
*sphereTriNIPtr = sSphereTriNIPtr;
*sphereTriContactsPtr = sSphereTriContactsPtr;
*sphereTriMaxDepthPtr = sSphereTriMaxDepthPtr;
*sphereTriIntermPtr = sSphereTriIntermPtr;
*orderedSphereTriIntermPtr = sOrderedSphereTriIntermPtr;
*sphereTriSecondPassPairsPtr = sSphereTriSecondPassPairsPtr;
}
//each thread deal with
//for (PxU32 globalThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; globalThreadIndex < nbPairs; globalThreadIndex += gridDim.x * blockDim.x)
for (PxU32 i = 0; i < nbPairs; i += gridDim.x * blockDim.x)
{
PxU32 globalThreadIndex = blockIdx.x * blockDim.x + threadIdx.x + i;
PxU32 nbContacts = 0;
PxU32 outMask = 0;
uint4 curPair;
if (globalThreadIndex < nbPairs)
{
//printf("globalThreadIndex %i nbPairs %i\n", globalThreadIndex, nbPairs);
// Pair, stackPtr in convexMeshMidphase kernel
curPair = sPairsGPU[globalThreadIndex];
const PxU32 cmIdx = curPair.x;
PxgContactManagerInput npWorkItem = cmInputs[cmIdx];
const PxgShape* shape0 = &gpuShapes[npWorkItem.shapeRef0];
const PxgShape* trimeshShape = &gpuShapes[npWorkItem.shapeRef1];
PxGeometryType::Enum type0 = PxGeometryType::Enum(shape0->type);
bool flip = type0 == PxGeometryType::eTRIANGLEMESH;
if (flip)
{
PxSwap(npWorkItem.shapeRef0, npWorkItem.shapeRef1);
PxSwap(npWorkItem.transformCacheRef0, npWorkItem.transformCacheRef1);
PxSwap(shape0, trimeshShape);
type0 = PxGeometryType::Enum(shape0->type);
}
if (type0 == PxGeometryType::eSPHERE)
{
nbContacts = sphereTrimeshNarrowphaseCore(
globalThreadIndex,
npWorkItem,
*shape0,
*trimeshShape,
curPair,
transformCache,
contactDistance,
sphereTrimeshPair,
&sSphereTriNIPtr,
&sSphereTriContactsPtr,
&sSphereTriMaxDepthPtr,
&sSphereTriIntermPtr,
&sOrderedSphereTriIntermPtr,
&sSphereTriSecondPassPairsPtr,
nbPairsGlobal,
nbSecondPassPairs,
shContacts[threadIdx.x*2],
shSeparations[threadIdx.x*2],
outMask
);
}
else
{
nbContacts = capsuleTrimeshNarrowphaseCore(
globalThreadIndex,
npWorkItem,
*shape0,
*trimeshShape,
curPair,
transformCache,
contactDistance,
sphereTrimeshPair,
&sSphereTriNIPtr,
&sSphereTriContactsPtr,
&sSphereTriMaxDepthPtr,
&sSphereTriIntermPtr,
&sOrderedSphereTriIntermPtr,
&sSphereTriSecondPassPairsPtr,
nbSecondPassPairs,
&shContacts[threadIdx.x * 2],
&shSeparations[threadIdx.x * 2],
outMask
);
}
}
PxU32 outMaskWriteMask = __ballot_sync(FULL_MASK, outMask);
PxU32 count = __popc(outMaskWriteMask);
PxU32 startIndex = 0;
const PxU32 threadIndexInWarp = threadIdx.x & 31;
//Now output contacts!!!!
PxU32 inclusiveSum = warpScan<AddOpPxU32, PxU32>(FULL_MASK, nbContacts);
PxU32 totalContacts = __shfl_sync(FULL_MASK, inclusiveSum, 31);
if (totalContacts)
{
if (count)
{
if (threadIndexInWarp == 0)
startIndex = atomicAdd(nbSecondPassPairs, count);
startIndex = __shfl_sync(FULL_MASK, startIndex, 0);
if (outMask)
{
PxU32 offset = warpScanExclusive(outMaskWriteMask, threadIndexInWarp);
sSphereTriSecondPassPairsPtr[startIndex + offset] = outMask;
}
}
if (threadIndexInWarp == 31)
startIndex = atomicAdd(pTempContactIndex, inclusiveSum);
startIndex = __shfl_sync(FULL_MASK, startIndex, 31);
startIndex += inclusiveSum - nbContacts;
for (PxU32 i = 0, idx = 2*threadIdx.x; i < nbContacts; ++i, idx++)
{
tempConvexTriContacts[startIndex + i].contact_sepW = make_float4(shContacts[idx].x, shContacts[idx].y,
shContacts[idx].z, shSeparations[idx]);
}
if (nbContacts)
{
PxU32 cmIdx = curPair.x;
PxU32 testOffset = curPair.z;
SphereMeshPair& pair = sphereTrimeshPair[cmIdx];
PxU32 sphereTriPairOffset = pair.startIndex + testOffset;
sSphereTriContactsPtr[sphereTriPairOffset].index = startIndex;
}
}
}
}

View File

@@ -0,0 +1,379 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "foundation/PxMath.h"
#include "foundation/PxVec3.h"
#include "foundation/PxTransform.h"
#include "AlignedTransform.h"
#include "PxgContactManager.h"
#include "PxgNpKernelIndices.h"
#include "PxgPersistentContactManifold.h"
#include "PxsContactManagerState.h"
#include "PxgCommonDefines.h"
#include "reduction.cuh"
#include "contactReduction.cuh"
using namespace physx;
extern "C" __host__ void initNarrowphaseKernels3() {}
#include "manifold.cuh"
#include "nputils.cuh"
#include "cudaNpCommon.h"
#include "convexNpCommon.h"
//This method compacts the depthBuffer + nicBuffer to only include the set of triangles that actually have contacts.
//These then must be sorted based on depth values to construct patches, then appended and then output
__device__ PX_FORCE_INLINE
int purgeEmptyTriangles(const PxU32 syncMask, PxReal* depthBuffer, ConvexTriNormalAndIndex* nicBuffer, int count, PxU32* tempCounts)
{
const int tI = threadIdx.x;
PxU32 nbTriangles = 0;
int loaded = 0;
for (; loaded < count; loaded += 32)
{
int nbToLoad = PxMin(count - loaded, 32);
PxReal d = tI < nbToLoad ? depthBuffer[loaded + tI] : FLT_MAX;
PxU32 nbContacts = nbToLoad ? ConvexTriNormalAndIndex::getNbContacts(nicBuffer[loaded + tI].index) : 0;
int valid = d != FLT_MAX && nbContacts > 0;
int validBits = __ballot_sync(syncMask, valid);
int dest = __popc(validBits & ((1 << tI) - 1)) + nbTriangles;
if (valid)
{
depthBuffer[dest] = d;
tempCounts[dest] = ((loaded + tI) << 4) | nbContacts;
nicBuffer[dest] = nicBuffer[loaded + tI];
}
nbTriangles += __popc(validBits);
}
return nbTriangles;
}
struct Scratch
{
volatile ConvexMeshPair pair;
int pairIndex;
};
__device__
void correlate(
const ConvexMeshPair* PX_RESTRICT meshPairBuffer,
ConvexTriNormalAndIndex ** PX_RESTRICT nicBufferPtr,
const ConvexTriContacts ** PX_RESTRICT contactBufferPtr,
PxReal ** PX_RESTRICT depthBufferPtr,
PxU32* PX_RESTRICT tempCounts,
Scratch* PX_RESTRICT s,
PxgPersistentContactMultiManifold* PX_RESTRICT outBuffer,
PxsContactManagerOutput* PX_RESTRICT cmOutputs,
const PxU32 numPairs,
const PxReal clusterBias,
ConvexTriContact* PX_RESTRICT tempConvexTriContacts,
PxU32* PX_RESTRICT pTempContactIndex
)
{
__shared__ PxU32 triangleRunSums[CORRELATE_WARPS_PER_BLOCK][WARP_SIZE];
__shared__ PxU32 startIndices[CORRELATE_WARPS_PER_BLOCK][WARP_SIZE];
__shared__ PxU32 triIndices[CORRELATE_WARPS_PER_BLOCK][WARP_SIZE];
PxU32* triRunSums = triangleRunSums[threadIdx.y];
PxU32* triStartIndex = startIndices[threadIdx.y];
PxU32* triInds = triIndices[threadIdx.y];
const int pairIndex = threadIdx.y + CORRELATE_WARPS_PER_BLOCK * blockIdx.x;
if (pairIndex >= numPairs)
return;
s->pairIndex = pairIndex;
const int tI = threadIdx.x;
if (tI < sizeof(ConvexMeshPair) / 4)
((int*)(&s->pair))[tI] = ((int*)(meshPairBuffer + pairIndex))[tI];
__syncwarp();
int start = s->pair.startIndex, count = s->pair.count;
if (count == CONVEX_TRIMESH_CACHED)
{
// We've hit cache, nothing to do here
return;
}
ConvexTriNormalAndIndex* PX_RESTRICT nicBuffer = *nicBufferPtr;
const ConvexTriContacts* PX_RESTRICT contactBuffer = *contactBufferPtr;
PxReal* PX_RESTRICT depthBuffer = *depthBufferPtr;
PxU32 nbTriangles = purgeEmptyTriangles(FULL_MASK, depthBuffer + start, nicBuffer + start, count, tempCounts + start);
__syncwarp();
if (nbTriangles == 0)
{
outBuffer[pairIndex].mNbManifolds = 0;
return;
}
//Now, we loop creating patches and contacts from the triangles that actually have contacts!
PxU32 nbPatches = 0;
PxU32 numContactsTotal = 0;
PxU32 remainingTriangles = nbTriangles;
for (; nbPatches < MULTIMANIFOLD_MAX_MANIFOLDS && remainingTriangles != 0; nbPatches++)
{
//We loop through all triangles, load contacts, compress etc...
//(1) Find deepest triangle...
int bestIndex = 0;
PxReal bestDepth = PX_MAX_F32;
for (PxU32 i = threadIdx.x; i < nbTriangles; i += 32)
{
if (i < nbTriangles)
{
if (tempCounts[start + i]) //Still has contacts!
{
PxReal d = depthBuffer[start + i];
if (d < bestDepth)
{
bestDepth = d;
bestIndex = start + i;
}
}
}
}
int i0 = minIndex(bestDepth, FULL_MASK, bestDepth);
bestIndex = __shfl_sync(FULL_MASK, bestIndex, i0);
//Now we know which triangle defines this plane...
PxVec3 bestNormal = nicBuffer[bestIndex].normal;
PxU32 currentContactMask = 0;
PxVec3 pA(0.f);
PxReal separation = 0.f;
PxU32 contactTriangleIndex = 0;
bool hasContact = false;
for (PxU32 i = 0; i < nbTriangles; i += 32)
{
PxVec3 normal(0.f);
PxU32 triIndex = 0;
PxU32 nbContacts = 0;
PxU32 startIndex = 0;
if ((i + threadIdx.x) < nbTriangles)
{
PxU32 index = tempCounts[start + i + threadIdx.x];
//We read contacts from the temp contacts buffer because this will get cleared when the contacts are considered
//for a patch...
nbContacts = index & 7;
startIndex = index >> 4;
if (nbContacts)
{
ConvexTriNormalAndIndex nic = nicBuffer[start + i + threadIdx.x];
normal = nic.normal;
triIndex = ConvexTriNormalAndIndex::getTriangleIndex(nic.index);
}
}
bool accept = normal.dot(bestNormal) > PATCH_ACCEPTANCE_EPS;
if(!accept)
nbContacts = 0;
PxU32 acceptMask = __ballot_sync(FULL_MASK, accept);
remainingTriangles -= __popc(acceptMask);
PxU32 summedContacts = warpScan<AddOpPxU32, PxU32>(FULL_MASK, nbContacts);
triRunSums[threadIdx.x] = summedContacts - nbContacts;
triStartIndex[threadIdx.x] = startIndex;
triInds[threadIdx.x] = triIndex;
PxU32 totalContacts = __shfl_sync(FULL_MASK, summedContacts, 31);
__syncwarp();
for (PxU32 c = 0; c < totalContacts;)
{
//calculate my read index (where I'm offset from...)
PxU32 readMask = ~currentContactMask;
bool needsContact = readMask & (1 << threadIdx.x);
PxU32 readIndex = c + warpScanExclusive(readMask, threadIdx.x);
if (needsContact && readIndex < totalContacts)
{
PxU32 idx = binarySearch<PxU32>(triRunSums, 32u, readIndex);
PxU32 offset = readIndex - triRunSums[idx];
PxU32 startIdx = triStartIndex[idx] + start;
const PxU32 contactStartIdx = contactBuffer[startIdx].index;
float4 v = tempConvexTriContacts[contactStartIdx + offset].contact_sepW;
pA = PxVec3(v.x, v.y, v.z);
separation = v.w;
contactTriangleIndex = triInds[idx];
hasContact = true;
}
currentContactMask = __ballot_sync(FULL_MASK, hasContact); //Mask every thread that has a contact!
c += __popc(readMask);
PxU32 nbContacts = __popc(currentContactMask);
if (nbContacts > SUBMANIFOLD_MAX_CONTACTS)
{
currentContactMask = contactReduce<true, false, SUBMANIFOLD_MAX_CONTACTS, false>(pA, separation, bestNormal, currentContactMask, clusterBias);
}
hasContact = currentContactMask & (1 << threadIdx.x);
}
__syncwarp(); //triRunSums is read and written in the same loop - separate read and write with syncs
if (accept)
{
tempCounts[start + i + threadIdx.x] = 0; //Clear this triangle - it is done!
}
}
__syncwarp();
PxU32 nbContacts = __popc(currentContactMask);
//When we reach here, we have handled all the triangles, so we should output this patch...
PxgContact * cp = outBuffer[s->pair.cmIndex].mContacts[nbPatches] + warpScanExclusive(currentContactMask, tI);
if (threadIdx.x == 0)
outBuffer[s->pair.cmIndex].mNbContacts[nbPatches] = nbContacts;
numContactsTotal += nbContacts;
if (currentContactMask & (1 << tI))
{
PxVec3 nor = ldS(s->pair.aToB).rotate(bestNormal);
cp->normal = -nor;
cp->pointA = pA;
// TODO: PxTransform::transform is incredibly profligate with registers (i.e. the write-out phase uses
// more regs than the contact culling phase). Better would be to stash the transform in matrix form
// when not under register pressure (i.e. when initially loading it) then use a custom shmem
// matrix multiply, which requires essentially no tmps
PxVec3 pointB = ldS(s->pair.aToB).transform(pA) + nor * separation;
cp->pointB = pointB;
cp->penetration = separation;
cp->triIndex = contactTriangleIndex;
/*printf("cp normal(%f, %f, %f)\n", cp->normal.x, cp->normal.y, cp->normal.z);
printf("cp pointA(%f, %f, %f)\n", pA.x, pA.y, pA.z);
printf("cp pointB(%f, %f, %f)\n", pointB.x, pointB.y, pointB.z);*/
}
}
if (nbPatches > 0)
{
// TODO avoroshilov: decide what to do with aligned transforms
PxAlignedTransform aToB_aligned;// = s->pair.aToB;
aToB_aligned.p.x = s->pair.aToB.p.x;
aToB_aligned.p.y = s->pair.aToB.p.y;
aToB_aligned.p.z = s->pair.aToB.p.z;
aToB_aligned.q.q.x = s->pair.aToB.q.x;
aToB_aligned.q.q.y = s->pair.aToB.q.y;
aToB_aligned.q.q.z = s->pair.aToB.q.z;
aToB_aligned.q.q.w = s->pair.aToB.q.w;
PxAlignedTransform_WriteWarp(&outBuffer[s->pair.cmIndex].mRelativeTransform, aToB_aligned);
}
outBuffer[s->pair.cmIndex].mNbManifolds = nbPatches;
}
extern "C" __global__
void convexTrimeshCorrelate(
const ConvexMeshPair * PX_RESTRICT pairs,
PxReal ** PX_RESTRICT depthsPtr,
ConvexTriNormalAndIndex ** PX_RESTRICT normalAndIndexPtr,
const ConvexTriContacts ** PX_RESTRICT contactsPtr,
PxgPersistentContactMultiManifold * PX_RESTRICT output,
PxsContactManagerOutput * PX_RESTRICT cmOutputs,
PxU32* PX_RESTRICT tempCountAndOffsetBuffer,
const PxU32 numPairs,
const PxReal clusterBias,
ConvexTriContact* PX_RESTRICT tempConvexTriContacts,
PxU32* pTempContactIndex
)
{
const int warpIndex = threadIdx.y;
__shared__ char scratch[sizeof(Scratch) * CORRELATE_WARPS_PER_BLOCK];
correlate(
pairs,
normalAndIndexPtr,
contactsPtr,
depthsPtr,
tempCountAndOffsetBuffer,
((Scratch*)&scratch) + warpIndex,
output,
cmOutputs,
numPairs,
clusterBias,
tempConvexTriContacts,
pTempContactIndex
);
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,347 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "foundation/PxAssert.h"
#include "foundation/PxVec3.h"
#include "PxContact.h"
#include "PxsMaterialCore.h"
#include "PxsContactManagerState.h"
#include "PxgPersistentContactManifold.h"
#include "PxgConvexConvexShape.h"
#include "PxgContactManager.h"
#include "cudaNpCommon.h"
#include "PxsTransformCache.h"
#include "dataReadWriteHelper.cuh"
#include "materialCombiner.cuh"
#include "geometry/PxGeometry.h"
#include <assert.h>
using namespace physx;
extern "C" __host__ void initNarrowphaseKernels5() {}
__device__ void writeCompressedContact(
PxContactPatch* PX_RESTRICT patches, PxU32 patchWriteIndex, PxU32 contactWriteIndex,
const PxU32 nbContacts,
const PxVec3 normal,
const PxU32 materialIndex0, const PxU32 materialIndex1,
const PxsMaterialData* PX_RESTRICT materials
)
{
PxReal staticFriction, dynamicFriction, combinedRestitution, combinedDamping;
PxU32 materialFlags;
combineMaterials(materials, static_cast<PxU16>(materialIndex0), static_cast<PxU16>(materialIndex1),
materialFlags, staticFriction, dynamicFriction,
combinedRestitution, combinedDamping);
PxContactPatch& patch = patches[patchWriteIndex];
patch.normal = normal;
patch.nbContacts = nbContacts;
patch.startContactIndex = contactWriteIndex;
//KS - we could probably compress this further into the header but the complexity might not be worth it
patch.staticFriction = staticFriction;
patch.dynamicFriction = dynamicFriction;
patch.restitution = combinedRestitution;
patch.damping = combinedDamping;
patch.materialIndex0 = materialIndex0;
patch.materialIndex1 = materialIndex1;
patch.materialFlags = static_cast<PxU8>(materialFlags);
patch.mMassModification.linear0 = 1.f;
patch.mMassModification.linear1 = 1.f;
patch.mMassModification.angular0 = 1.f;
patch.mMassModification.angular1 = 1.f;
patch.internalFlags = PxContactPatch::eHAS_FACE_INDICES;
}
__device__ inline float getAvgContactValue(float val, PxU32 nbContacts)
{
PxReal avgValue = val;
#pragma unroll 3
for (PxU32 reductionRadius = 4; reductionRadius > 0; reductionRadius >>= 1)
{
PxReal tmp = __shfl_down_sync(FULL_MASK, avgValue, reductionRadius);
if (reductionRadius < nbContacts)
avgValue += tmp;
}
return avgValue / PxReal(nbContacts);
}
//calculate patch normal by average each normal and transform the normal into world space
__device__ static inline PxVec3 getWorldNormal(
const PxgContact* contacts, const PxTransform& meshTransform,
const PxU32 numContacts, const PxU32 manifoldIndex, const PxU32 threadIndexInWarp, const PxU32 threadIndexInManifold
)
{
PxVec3 normal = (threadIndexInManifold >= numContacts) ? PxVec3(0, 0, 0) : contacts[0].normal;
//transform into world space
PxVec3 unNormalizedNormal = meshTransform.rotate(normal);
return unNormalizedNormal.getNormalized();
}
//Each blocks has 8 warps and each warps deal with a pair
//max number of contacts in each PxgPersistentContactMultiManifold is 24(maxinum 4 single manifold and maxinum 6 contacts in each manifold )
extern "C" __global__
void convexTrimeshFinishContacts(
const ConvexMeshPair * pairs,
const PxsCachedTransform * transformCache,
const PxgShape* PX_RESTRICT gpuShapes,
const PxgContactManagerInput * cmInputs,
PxsContactManagerOutput * cmOutputs,
const PxgPersistentContactMultiManifold* cmMultiManifold,
const PxU32 numPairs,
const PxsMaterialData* PX_RESTRICT materials,
PxU8* PX_RESTRICT contactStream,//contact and pen
PxU8* PX_RESTRICT patchStream, // normal and friction
PxU8* PX_RESTRICT forceAndIndiceStream, //triangle(face) index is appended after the froce stream
bool insertAveragePoint,
PxgPatchAndContactCounters* PX_RESTRICT patchAndContactCounters,
PxU32* touchChangeFlags,
PxU32* patchChangeFlags,
PxU8* startContactPatches,
PxU8* startContactPoints,
PxU8* startContactForces,
PxU32 patchBytesLimit,
PxU32 contactBytesLimit,
PxU32 forceBytesLimit
)
{
const PxU32 globalThreadIndex = threadIdx.x + blockIdx.x * blockDim.x;
const PxU32 globalWarpIndex = globalThreadIndex / WARP_SIZE;
if(globalWarpIndex >= numPairs)
return;
const PxgPersistentContactMultiManifold& multiManifold = cmMultiManifold[globalWarpIndex];
PxU32 nbManifolds = multiManifold.mNbManifolds;
const PxU32 threadIndexInWarp = threadIdx.x & (WARP_SIZE - 1);
const PxU32 singleManifoldIndex = threadIndexInWarp / PXG_SUBMANIFOLD_MAX_CONTACTS;
const PxU32 threadIndexInManifold = threadIndexInWarp % PXG_SUBMANIFOLD_MAX_CONTACTS;
const PxU32 numContacts = (singleManifoldIndex >= nbManifolds) ? 0 : multiManifold.mNbContacts[singleManifoldIndex];
const bool hasContacts = threadIndexInManifold < numContacts;
PxU32 contactMask = __ballot_sync(FULL_MASK, (PxU32) hasContacts);
PxU32 totalNumContacts = __popc(contactMask);
const PxU32 writeIndex = warpScanExclusive(contactMask, threadIndexInWarp); //tells me how many threads preceding me had contacts (i.e. what my write index will be)
//Each single manifold should be one patch
PxU32 patchByteOffset = 0xFFFFFFFF;
PxU32 contactByteOffset = 0xFFFFFFFF;
PxU32 forceAndIndiceByteOffset = 0xFFFFFFFF;
PxsContactManagerOutput* output = cmOutputs + globalWarpIndex;
PxU32 allflags = reinterpret_cast<PxU32*>(&output->allflagsStart)[0];
PxU8 oldStatusFlags = u16Low(u32High(allflags));
PxU8 statusFlags = oldStatusFlags;
statusFlags &= (~PxsContactManagerStatusFlag::eTOUCH_KNOWN);
if (totalNumContacts != 0)
statusFlags |= PxsContactManagerStatusFlag::eHAS_TOUCH;
else
statusFlags |= PxsContactManagerStatusFlag::eHAS_NO_TOUCH;
PxU8 prevPatches = u16High(u32Low(allflags)); //Get out the current number of patches to store as the previous frame's number of patches
bool overflow = false;
if (threadIndexInWarp == 0 )
{
PxU32 nbInsertAveragePoint = insertAveragePoint ? nbManifolds : 0;
totalNumContacts = totalNumContacts + nbInsertAveragePoint;
if (totalNumContacts)
{
patchByteOffset = atomicAdd(&(patchAndContactCounters->patchesBytes), sizeof(PxContactPatch)*nbManifolds);
contactByteOffset = atomicAdd(&(patchAndContactCounters->contactsBytes), sizeof(PxContact) * totalNumContacts);
forceAndIndiceByteOffset = atomicAdd(&(patchAndContactCounters->forceAndIndiceBytes), sizeof(PxU32) * totalNumContacts * 2);
if ((patchByteOffset + sizeof(PxContactPatch) * nbManifolds) > patchBytesLimit)
{
patchAndContactCounters->setOverflowError(PxgPatchAndContactCounters::PATCH_BUFFER_OVERFLOW);
patchByteOffset = 0xFFFFFFFF;
overflow = true;
}
else if ((contactByteOffset + sizeof(PxContact) * totalNumContacts) > contactBytesLimit)
{
patchAndContactCounters->setOverflowError(PxgPatchAndContactCounters::CONTACT_BUFFER_OVERFLOW);
contactByteOffset = 0xFFFFFFFF;
overflow = true;
}
else if ((forceAndIndiceByteOffset + sizeof(PxU32) * totalNumContacts * 2) > forceBytesLimit)
{
patchAndContactCounters->setOverflowError(PxgPatchAndContactCounters::FORCE_BUFFER_OVERFLOW);
forceAndIndiceByteOffset = 0xFFFFFFFF;
overflow = true;
}
if (overflow)
{
nbManifolds = totalNumContacts = 0;
statusFlags &= (~PxsContactManagerStatusFlag::eTOUCH_KNOWN);
statusFlags |= PxsContactManagerStatusFlag::eHAS_NO_TOUCH;
}
//printf("threadIdex0 nbManifolds %i\n", nbManifolds);
}
bool previouslyHadTouch = oldStatusFlags & PxsContactManagerStatusFlag::eHAS_TOUCH;
bool prevTouchKnown = oldStatusFlags & PxsContactManagerStatusFlag::eTOUCH_KNOWN;
bool currentlyHasTouch = nbManifolds != 0;
const bool change = (previouslyHadTouch ^ currentlyHasTouch) || (!prevTouchKnown);
touchChangeFlags[globalWarpIndex] = change;
patchChangeFlags[globalWarpIndex] = (prevPatches != nbManifolds);
assert(totalNumContacts < 100);
reinterpret_cast<PxU32*>(&output->allflagsStart)[0] = merge(merge(prevPatches, statusFlags),
merge(nbManifolds, PxU8(0)));
output->nbContacts = totalNumContacts;
if (!overflow)
{
output->contactForces = reinterpret_cast<PxReal*>(startContactForces + forceAndIndiceByteOffset);
output->contactPatches = startContactPatches + patchByteOffset;
output->contactPoints = startContactPoints + contactByteOffset;
}
else
{
output->contactForces = 0;
output->contactPatches = 0;
output->contactPoints = 0;
}
}
if(__any_sync(FULL_MASK, overflow) || nbManifolds == 0)
return;
//const uint4 npWorkItem = *(reinterpret_cast<const uint4*>(cmInputs) + globalWarpIndex);
PxgContactManagerInput npWorkItem;
PxgContactManagerInput_ReadWarp(npWorkItem, cmInputs, globalWarpIndex);
PxU32 shapeRef0 = npWorkItem.shapeRef0;
PxU32 shapeRef1 = npWorkItem.shapeRef1;
PxU32 transformCacheRef0 = npWorkItem.transformCacheRef0;
PxU32 transformCacheRef1 = npWorkItem.transformCacheRef1;
bool flip = gpuShapes[shapeRef0].type == PxGeometryType::eTRIANGLEMESH;
if (flip)
{
PxSwap(shapeRef0, shapeRef1);
PxSwap(transformCacheRef0, transformCacheRef1);
}
PxgShape shape0;
PxgShape_ReadWarp(shape0, gpuShapes + shapeRef0);
const uint2 materialIndices = pairs[globalWarpIndex].materialIndices;
patchByteOffset = __shfl_sync(FULL_MASK, (PxI32) patchByteOffset, 0);
contactByteOffset = __shfl_sync(FULL_MASK, (PxI32) contactByteOffset, 0);
forceAndIndiceByteOffset = __shfl_sync(FULL_MASK, (PxI32)forceAndIndiceByteOffset, 0);
PxContactPatch* patches = reinterpret_cast<PxContactPatch*>(patchStream + patchByteOffset);
float4* contacts = reinterpret_cast<float4*>(contactStream + contactByteOffset);
PxU32* faceIndex = reinterpret_cast<PxU32*>(forceAndIndiceStream + forceAndIndiceByteOffset + totalNumContacts*sizeof(PxU32));
PxsCachedTransform trimeshTransformCached;
PxsCachedTransform_ReadWarp(trimeshTransformCached, transformCache + transformCacheRef1); //w is transformCacheRef1
PxsCachedTransform sphereTransformCached;
PxsCachedTransform_ReadWarp(sphereTransformCached, transformCache + transformCacheRef0); //w is transformCacheRef1
//calculate patch normal by average each normal and transform the normal into world space, need to optimized
const PxVec3 worldNormal = getWorldNormal(&multiManifold.mContacts[singleManifoldIndex][0], trimeshTransformCached.transform, numContacts, singleManifoldIndex, threadIndexInWarp, threadIndexInManifold);
if(hasContacts)
{
const PxgContact& point = multiManifold.mContacts[singleManifoldIndex][threadIndexInManifold];
if(threadIndexInManifold == 0)
writeCompressedContact(patches, singleManifoldIndex, writeIndex, numContacts, flip ? -worldNormal : worldNormal, materialIndices.x, materialIndices.y, materials);
assert(point.pointB.isFinite());
PxReal pen = point.penetration;
PxVec3 worldPt;
if (shape0.type == PxGeometryType::eSPHERE)
{
const PxReal radius = shape0.scale.scale.y;
pen = pen - radius;
worldPt = sphereTransformCached.transform.p - worldNormal * radius;
}
else if (shape0.type == PxGeometryType::eCAPSULE)
{
const PxReal radius = shape0.scale.scale.y;
pen = pen - radius;
worldPt = sphereTransformCached.transform.transform(point.pointA) - worldNormal * radius;
}
else
{
worldPt = trimeshTransformCached.transform.transform(point.pointB);
}
//contacts[writeIndex] = make_float4(point.pointB.x, point.pointB.y, point.pointB.z, -point.penetration);
if (contactByteOffset != 0xFFFFFFFF)
contacts[writeIndex] = make_float4(worldPt.x, worldPt.y, worldPt.z, pen);
/*if (threadIndexInWarp == 0)
{
printf("triIndex %i worldPt(%f, %f, %f)\n", point.triIndex, worldPt.x, worldPt.y, worldPt.z);
printf("triIndex %i worldNormal(%f, %f, %f)\n", point.triIndex, worldNormal.x, worldNormal.y, worldNormal.z);
printf("triIndex %i pen %f radius %f\n", point.triIndex, pen, shape0.scale.scale.x);
PxVec3 p = sphereTransformCached.transform.p;
PxQuat q = sphereTransformCached.transform.q;
printf("sphereTransform p(%f, %f, %f), q(%f, %f, %f, %f)\n", p.x, p.y, p.z, q.x, q.y, q.z, q.w);
}*/
if(forceAndIndiceByteOffset != 0xFFFFFFFF)
faceIndex[writeIndex] = point.triIndex;
}
}

View File

@@ -0,0 +1,649 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "cuda.h"
#include "convexNpCommon.h"
#include "triangle.cuh"
#include "dataReadWriteHelper.cuh"
#include "RadixSort.cuh"
#include "reduction.cuh"
#include "geometry/PxGeometry.h"
#include "heightfieldUtil.cuh"
#include "convexTriangle.cuh"
#include "assert.h"
#include "stdio.h"
#include "PxgNpKernelIndices.h"
using namespace physx;
extern "C" __host__ void initNarrowphaseKernels8() {}
////Based on edge edge checks
//__device__ void convexTrimeshPostProcess(
// ConvexMeshPair* PX_RESTRICT cvxTrimeshPair, // per CM
// ConvexTriNormalAndIndex** PX_RESTRICT cvxTriNIPtr, // per cvx-tri
// ConvexTriEdgeBuffer** PX_RESTRICT cvxTriEdgeBufferPtr, // per cvx-tri
// PxU32** PX_RESTRICT cvxTriSecondPassPairsPtr, // per cvx-tri
// const uint4* PX_RESTRICT pairsGPU,
// PxU32* PX_RESTRICT nbSecondPassPairs,
// const PxU32 globalWarpIndex)
//{
// PxU32* cvxTriSecondPassPairs = *cvxTriSecondPassPairsPtr;
// ConvexTriNormalAndIndex* cvxTriNI = *cvxTriNIPtr;
//
// const PxU32 pairIdx = cvxTriSecondPassPairs[globalWarpIndex];
//
// uint4 curPair = pairsGPU[pairIdx];
//
// PxU32 cmIdx = curPair.x;
// PxU32 testOffset = curPair.z;
//
// ConvexMeshPair& pair = cvxTrimeshPair[cmIdx];
// const PxU32 nbPairsPerCM = pair.count;
// const PxU32 pairStartIndex = pair.startIndex;
// PxU32 convexTriPairOffset = pairStartIndex + testOffset;
//
// ConvexTriNormalAndIndex& cvxni = cvxTriNI[convexTriPairOffset];
// //get the number of contacts
// const PxU32 nbContacts = cvxni.index >> 28;
//
// if (nbContacts == 0)
// return;
//
// ConvexTriEdgeBuffer* cvxTriEdgeBuffer = *cvxTriEdgeBufferPtr;
//
// ConvexTriEdgeBuffer& cvxTriEdge = cvxTriEdgeBuffer[convexTriPairOffset];
//
// //load the verts into shared memory
// __shared__ int sVertsIds[2][3];
//
// if (threadIdx.x < 3)
// {
// sVertsIds[threadIdx.y][threadIdx.x] = cvxTriEdge.triVertexIds[threadIdx.x];
// }
//
// __threadfence_block();
//
// bool foundTriangle = false;
// //each triangle has 3 edges, there are 9 combinations. Each thread work with one edge vs another edge
// for (PxU32 i = 0; i < nbPairsPerCM * 9; i += WARP_SIZE)
// {
// foundTriangle = false;
// const PxU32 index = i + threadIdx.x;
//
// if (index < nbPairsPerCM * 9)
// {
//
// const PxU32 offset = index / 9;
//
// //same triangle, we don't need to compare the same triangle
// if (offset == testOffset)
// continue;
//
// const PxU32 vStartIndex0 = index % 3;
// const PxU32 vEndIndex0 = (vStartIndex0 + 1) % 3;
//
// const PxU32 vertId00 = sVertsIds[threadIdx.y][vStartIndex0];
// const PxU32 vertId01 = sVertsIds[threadIdx.y][vEndIndex0];
//
// const PxU32 vOffset = index / 3;
//
// const PxU32 vStartIndex1 = (vEndIndex0 + vOffset) % 3;
// const PxU32 vEndIndex1 = (vStartIndex1 + 1) % 3;
//
// const PxU32 neighbourOffset = pairStartIndex + offset;
//
// ConvexTriNormalAndIndex& nCvxni = cvxTriNI[neighbourOffset];
// //get the number of contacts
// const PxU32 nbGeneratedContacts = nCvxni.index >> 28;
//
// if (nbGeneratedContacts > 0)
// {
// const PxU32 vertId10 = cvxTriEdgeBuffer[neighbourOffset].triVertexIds[vStartIndex1];
// const PxU32 vertId11 = cvxTriEdgeBuffer[neighbourOffset].triVertexIds[vEndIndex1];
//
// if ((vertId00 == vertId10 || vertId00 == vertId11) &&
// (vertId01 == vertId10 || vertId01 == vertId11))
// {
// foundTriangle = true;
// }
// }
// }
//
// if (__any(foundTriangle))
// {
// break;
// }
// }
//
// if (__any(foundTriangle))
// {
// //we don't need to generate contacts for this pair. However, we have already generated contacts in
// //the convexTriangleContactGen kernel. Therefore, we just need to clear the number of contacts in
// //the corresponding ConvexTriNormalAndIndex
// cvxni.index = 0 << 28;
// }
//
//}
__device__ bool findTriangle(const PxU32* PX_RESTRICT orderedTriangleIndices, const PxU32 targetIndexWithMask, PxU32 count)
{
PxU32 targetIndex = targetIndexWithMask | 0x80000000;
PxU32 index = binarySearch(orderedTriangleIndices, count, targetIndex);
return index < count && orderedTriangleIndices[index] == targetIndex;
}
__device__ bool findFaceContactTriangle(const PxU32* PX_RESTRICT orderedTriangleIndices, const PxU32 targetIndexWithMask, PxU32 count)
{
PxU32 targetIndex = targetIndexWithMask | TriangleSearchFlags::eFACE_CONTACT;
PxU32 index = binarySearch(orderedTriangleIndices, count, targetIndex);
return index < count && orderedTriangleIndices[index] == targetIndex;
}
__device__ bool findEdgeContactTriangle(const PxU32* PX_RESTRICT orderedTriangleIndices, const PxU32 targetIndexWithMask, PxU32 count)
{
PxU32 targetIndex = targetIndexWithMask | TriangleSearchFlags::eEDGE_CONTACT;
PxU32 index = binarySearch(orderedTriangleIndices, count, targetIndex);
return index < count && orderedTriangleIndices[index] == targetIndex;
}
//Based on neighbour face checks
__device__ void convexTrimeshPostProcessCore(
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair, // per CM
ConvexTriNormalAndIndex** PX_RESTRICT cvxTriNIPtr, // per cvx-tri
ConvexTriIntermediateData** PX_RESTRICT cvxTriIntermDataPtr, // per cvx-tri
const PxU32** PX_RESTRICT orderedCvxTriIntermDataPtr, // per cvx-tri
PxU32** PX_RESTRICT cvxTriSecondPassPairsPtr, // per cvx-tri
PxgShape* PX_RESTRICT gpuShapes,
const uint4* PX_RESTRICT pairsGPU,
PxU32* PX_RESTRICT nbSecondPassPairs,
const PxU32 globalThreadIndex)
{
PxU32* cvxTriSecondPassPairs = *cvxTriSecondPassPairsPtr;
ConvexTriNormalAndIndex* cvxTriNI = *cvxTriNIPtr;
const PxU32 pairIdxPlusMask = cvxTriSecondPassPairs[globalThreadIndex];
const PxU32 pairIdx = pairIdxPlusMask & 0x1fffffff;
uint4 curPair = pairsGPU[pairIdx];
PxU32 cmIdx = curPair.x;
PxU32 triangleIdx = curPair.y;
PxU32 testOffset = curPair.z;
PxU32 shapeID = curPair.w;
ConvexMeshPair& pair = cvxTrimeshPair[cmIdx];
const PxU32 nbPairsPerCM = pair.count;
assert(nbPairsPerCM != 0);
const PxU32 pairStartIndex = pair.startIndex;
const PxU32 orderedPairStartIndex = pair.roundedStartIndex;
PxU32 convexTriPairOffset = pairStartIndex + testOffset;
ConvexTriNormalAndIndex& cvxni = cvxTriNI[convexTriPairOffset];
//get the number of contacts
const PxU32 nbContacts = ConvexTriNormalAndIndex::getNbContacts(cvxni.index);
if (nbContacts == 0)
return;
PxgShape& trimeshShape = gpuShapes[shapeID];
assert(trimeshShape.type == PxGeometryType::eTRIANGLEMESH);
const PxU32* orderedTriIndices = (*orderedCvxTriIntermDataPtr) + orderedPairStartIndex;
const PxU8 * trimeshGeomPtr = reinterpret_cast<const PxU8 *>(trimeshShape.hullOrMeshPtr);
const uint4 trimesh_nbVerts_nbTris_nbAdjVertsTotal = *reinterpret_cast<const uint4 *>(trimeshGeomPtr);
trimeshGeomPtr += sizeof(uint4);
trimeshGeomPtr += sizeof(const Gu::BV32DataPacked)* trimesh_nbVerts_nbTris_nbAdjVertsTotal.w;
const PxU32 numVerts = trimesh_nbVerts_nbTris_nbAdjVertsTotal.x;
const PxU32 numTris = trimesh_nbVerts_nbTris_nbAdjVertsTotal.y;
//const float4 * trimeshVerts = reinterpret_cast<const float4 *>(trimeshGeomPtr);
trimeshGeomPtr += sizeof(float4)* numVerts;
const uint4 * trimeshTriIndices = reinterpret_cast<const uint4 *>(trimeshGeomPtr);
trimeshGeomPtr += sizeof(uint4)* numTris;
const uint4* PX_RESTRICT trimeshTriAdjacencies = reinterpret_cast<const uint4 *>(trimeshGeomPtr);
trimeshGeomPtr += sizeof(uint4)* numTris;
trimeshGeomPtr += sizeof(PxU32) * numTris; //mGRB_faceRemap
const PxU32* accumulatedTriangleRefs = reinterpret_cast<const PxU32*>(trimeshGeomPtr);
trimeshGeomPtr += sizeof(PxU32) * (numVerts + 1);
const PxU32* triangleReferences = reinterpret_cast<const PxU32*>(trimeshGeomPtr);
//ConvexTriIntermediateData* cvxTriIntermData = *cvxTriIntermDataPtr;
//ConvexTriIntermediateData& cvxTriInterm = cvxTriIntermData[convexTriPairOffset];
uint4 adjTriangles = trimeshTriAdjacencies[triangleIdx];
uint4 indices = trimeshTriIndices[triangleIdx];
const bool flipNormal = trimeshShape.scale.hasNegativeDeterminant();
if(flipNormal)
{
PxSwap(adjTriangles.x, adjTriangles.z);
PxSwap(indices.y, indices.z);
}
PxU32 mask = pairIdxPlusMask & PxU32(ConvexTriIntermediateData::eMASK);
bool needsProcessing = false;
PxU32 triangleIndex = 0xFFFFFFFF;
PxU32 vertexIdx = 0xFFFFFFFF;
uint2 adjTriangles2;
if (mask == PxU32(ConvexTriIntermediateData::eV0))
{
vertexIdx = indices.x;
adjTriangles2 = make_uint2(adjTriangles.x, adjTriangles.z);
// printf("eV0\n");
}
else if (mask == PxU32(ConvexTriIntermediateData::eV1))
{
vertexIdx = indices.y;
adjTriangles2 = make_uint2(adjTriangles.x, adjTriangles.y);
// printf("eV1\n");
}
else if (mask == PxU32(ConvexTriIntermediateData::eV2))
{
vertexIdx = indices.z;
adjTriangles2 = make_uint2(adjTriangles.y, adjTriangles.z);
// printf("eV2\n");
}
else if (mask == PxU32(ConvexTriIntermediateData::eE01))
{
triangleIndex = adjTriangles.x;
// printf("eE01\n");
if(triangleIndex == 0xFFFFFFFF)
needsProcessing = true;
}
else if (mask == PxU32(ConvexTriIntermediateData::eE12))
{
triangleIndex = adjTriangles.y;
// printf("eE12\n");
if(triangleIndex == 0xFFFFFFFF)
needsProcessing = true;
}
else if (mask == PxU32(ConvexTriIntermediateData::eE02))
{
triangleIndex = adjTriangles.z;
// printf("eE02\n");
if(triangleIndex == 0xFFFFFFFF)
needsProcessing = true;
}
//printf("triangleIdx: %d; mask: %x; triangleIndex: %d; vertexIdx: %d\n", triangleIdx, mask, triangleIndex, vertexIdx);
if (triangleIndex != 0xFFFFFFFF)
{
// printf("CHECKPOINT 0\n");
needsProcessing = isBoundaryEdge(triangleIndex) || !findFaceContactTriangle(orderedTriIndices, triangleIndex, nbPairsPerCM);
//if (!needsProcessing)
// printf("triangleIdx: %d; triangleIndex: %d\n", triangleIdx, triangleIndex);
}
else if (vertexIdx != 0xFFFFFFFF)
{
// printf("CHECKPOINT 1\n");
//First, let's see if either of the edges sharing this vertex are active. If they are, we keep the contacts, otherwise
//we look for a shared triangle...
needsProcessing = true;
if(!(isBoundaryEdge(adjTriangles2.x) && isBoundaryEdge(adjTriangles2.y)))
{
PxU32 startIdx = accumulatedTriangleRefs[vertexIdx];
PxU32 endIdx = accumulatedTriangleRefs[vertexIdx + 1];
for (PxU32 idx = startIdx; idx < endIdx; ++idx)
{
PxU32 triIdx = triangleReferences[idx];
if (findFaceContactTriangle(orderedTriIndices, triIdx, nbPairsPerCM) ||
findEdgeContactTriangle(orderedTriIndices, triIdx, nbPairsPerCM))
{
//printf("triangleIdx: %d; vertexIdx: %d\n", triangleIdx, vertexIdx);
needsProcessing = false;
break;
}
}
}
}
// printf("triangleIndex %d\n", triangleIndex);
// printf("vertexIdx %d\n", vertexIdx);
// printf("needsProcessing %d\n", needsProcessing);
if(!needsProcessing)
{
//we don't need to generate contacts for this pair. However, we have already generated contacts in
//the convexTriangleContactGen kernel. Therefore, we just need to clear the number of contacts in
//the corresponding ConvexTriNormalAndIndex
cvxni.index = cvxni.index & 0x0ffffff;
}
}
extern "C" __global__
//__launch_bounds__(NP_TRIMESH_WARPS_PER_BLOCK * WARP_SIZE, 32 / NP_TRIMESH_WARPS_PER_BLOCK)
void convexTrimeshPostProcess(
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair, // per CM
ConvexTriNormalAndIndex** PX_RESTRICT cvxTriNIPtr, // per cvx-tri
ConvexTriIntermediateData** PX_RESTRICT cvxTriIntermPtr, // per cvx-tri
const PxU32** PX_RESTRICT orderedCvxTriIntermPtr, // per cvx-tri
PxU32** PX_RESTRICT cvxTriSecondPassPairsPtr, // per cvx-tri
PxU8* PX_RESTRICT stackPtr,
PxU32* PX_RESTRICT nbSecondPassPairs,
PxgShape* PX_RESTRICT shapes)
{
const PxU32 nbTpProcess = *nbSecondPassPairs;
__shared__ uint4* sPairsGPU;
if (threadIdx.x == 0 && threadIdx.y == 0)
{
sPairsGPU = reinterpret_cast<uint4 *>(stackPtr);
}
__syncthreads();
for (PxU32 globalThreadIdx = blockIdx.x * blockDim.x + threadIdx.x; globalThreadIdx < nbTpProcess; globalThreadIdx += gridDim.x * blockDim.x)
{
convexTrimeshPostProcessCore(
cvxTrimeshPair,
cvxTriNIPtr,
cvxTriIntermPtr,
orderedCvxTriIntermPtr,
cvxTriSecondPassPairsPtr,
shapes,
sPairsGPU,
nbSecondPassPairs,
globalThreadIdx);
}
}
//Based on neighbour face checks
__device__ void convexHeightfieldPostProcessCore(
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair, // per CM
ConvexTriNormalAndIndex** PX_RESTRICT cvxTriNIPtr, // per cvx-tri
ConvexTriIntermediateData** PX_RESTRICT cvxTriIntermDataPtr, // per cvx-tri
const PxU32** PX_RESTRICT orderedCvxTriIntermDataPtr, // per cvx-tri
PxU32** PX_RESTRICT cvxTriSecondPassPairsPtr, // per cvx-tri
PxgShape* PX_RESTRICT gpuShapes,
const uint4* PX_RESTRICT pairsGPU,
PxU32* PX_RESTRICT nbSecondPassPairs,
const PxU32 globalThreadIndex)
{
PxU32* cvxTriSecondPassPairs = *cvxTriSecondPassPairsPtr;
ConvexTriNormalAndIndex* cvxTriNI = *cvxTriNIPtr;
const PxU32 pairIdxPlusMask = cvxTriSecondPassPairs[globalThreadIndex];
const PxU32 pairIdx = pairIdxPlusMask & 0x1fffffff;
uint4 curPair = pairsGPU[pairIdx];
PxU32 cmIdx = curPair.x;
PxU32 triangleIdx = curPair.y;
PxU32 testOffset = curPair.z;
PxU32 shapeID = curPair.w;
ConvexMeshPair& pair = cvxTrimeshPair[cmIdx];
const PxU32 nbPairsPerCM = pair.count;
const PxU32 pairStartIndex = pair.startIndex;
const PxU32 orderedPairStartIndex = pair.roundedStartIndex;
PxU32 convexTriPairOffset = pairStartIndex + testOffset;
ConvexTriNormalAndIndex& cvxni = cvxTriNI[convexTriPairOffset];
//get the number of contacts
const PxU32 nbContacts = ConvexTriNormalAndIndex::getNbContacts(cvxni.index);
if (nbContacts == 0)
return;
PxgShape& heightFieldShape = gpuShapes[shapeID];
PxVec3 scale = heightFieldShape.scale.scale;
const PxReal rowScale = scale.x;
const PxReal columnScale = scale.z;
PxVec3 handedness(1.0f); // Vector to invert normal coordinates according to the heightfield scales
bool wrongHanded = false;
if (columnScale < 0.f)
{
wrongHanded = !wrongHanded;
handedness.z = -1.0f;
}
if (rowScale < 0.f)
{
wrongHanded = !wrongHanded;
handedness.x = -1.0f;
}
const PxU32* orderedTriIndices = (*orderedCvxTriIntermDataPtr) + orderedPairStartIndex;
PxU32* heightfieldData = reinterpret_cast<PxU32*>(heightFieldShape.hullOrMeshPtr);
const PxU32 nbRows = heightfieldData[0];
const PxU32 nbCols = heightfieldData[1];
PxHeightFieldSample* samples = reinterpret_cast<PxHeightFieldSample*>(&heightfieldData[2]);
//ConvexTriIntermediateData* cvxTriIntermData = *cvxTriIntermDataPtr;
//ConvexTriIntermediateData& cvxTriInterm = cvxTriIntermData[convexTriPairOffset];
PxU32 tVertexIndices[3];
PxU32 adjacencyIndices[3];
getTriangleVertexIndices(triangleIdx, tVertexIndices[0], tVertexIndices[1 + wrongHanded], tVertexIndices[2 - wrongHanded], nbCols, samples);
getTriangleAdjacencyIndices(triangleIdx, adjacencyIndices[wrongHanded ? 2 : 0], adjacencyIndices[1], adjacencyIndices[wrongHanded ? 0 : 2], nbRows, nbCols, samples);
bool needsProcessing = false;
PxU32 triangleIndex = 0xFFFFFFFF;
PxU32 vertexIdx = 0xFFFFFFFF;
PxU32 mask = pairIdxPlusMask & PxU32(ConvexTriIntermediateData::eMASK);
if (mask == PxU32(ConvexTriIntermediateData::eV0))
{
vertexIdx = tVertexIndices[0];
}
else if (mask == PxU32(ConvexTriIntermediateData::eV1))
{
vertexIdx = tVertexIndices[1];
}
else if (mask == PxU32(ConvexTriIntermediateData::eV2))
{
vertexIdx = tVertexIndices[2];
}
else if (mask == PxU32(ConvexTriIntermediateData::eE01))
{
triangleIndex = PxU32(adjacencyIndices[0]);
}
else if (mask == PxU32(ConvexTriIntermediateData::eE12))
{
triangleIndex = adjacencyIndices[1];
}
else if (mask == PxU32(ConvexTriIntermediateData::eE02))
{
triangleIndex = adjacencyIndices[2];
}
if (triangleIndex != 0xFFFFFFFF)
{
needsProcessing = isActiveEdge(triangleIndex) && !findTriangle(orderedTriIndices, triangleIndex, nbPairsPerCM);
}
else if (vertexIdx != 0xFFFFFFFF)
{
PxU32 cellIdx = triangleIdx / 2;
PxU32 column = cellIdx % nbCols;
PxU32 row = cellIdx / nbCols;
PxU32 startRow = PxMax(row, 1u) - 1;
PxU32 endRow = PxMin(row, nbRows - 2) + 1;
PxU32 startCol = PxMax(column, 1u) - 1;
PxU32 endCol = PxMin(column, nbCols - 2) + 1;
needsProcessing = true;
for (PxU32 r = startRow; r <= endRow && needsProcessing; r++)
{
for (PxU32 c = startCol; c <= endCol; ++c)
{
PxU32 cellIdx = r * nbCols + c;
PxU32 triIdx = cellIdx * 2;
getTriangleVertexIndices(triIdx, tVertexIndices[0], tVertexIndices[1 + wrongHanded], tVertexIndices[2 - wrongHanded], nbCols, samples);
if (tVertexIndices[0] == vertexIdx || tVertexIndices[1] == vertexIdx || tVertexIndices[2] == vertexIdx)
{
if (findTriangle(orderedTriIndices, triIdx, nbPairsPerCM))
{
needsProcessing = false;
break;
}
}
triIdx = triIdx + 1;
getTriangleVertexIndices(triIdx, tVertexIndices[0], tVertexIndices[1 + wrongHanded], tVertexIndices[2 - wrongHanded], nbCols, samples);
if (tVertexIndices[0] == vertexIdx || tVertexIndices[1] == vertexIdx || tVertexIndices[2] == vertexIdx)
{
if (findTriangle(orderedTriIndices, triIdx, nbPairsPerCM))
{
needsProcessing = false;
break;
}
}
}
}
}
if (!needsProcessing)
{
//we don't need to generate contacts for this pair. However, we have already generated contacts in
//the convexTriangleContactGen kernel. Therefore, we just need to clear the number of contacts in
//the corresponding ConvexTriNormalAndIndex
cvxni.index = cvxni.index & 0x0ffffff;
}
}
extern "C" __global__
//__launch_bounds__(NP_TRIMESH_WARPS_PER_BLOCK * WARP_SIZE, 32 / NP_TRIMESH_WARPS_PER_BLOCK)
void convexHeightfieldPostProcess(
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair, // per CM
ConvexTriNormalAndIndex** PX_RESTRICT cvxTriNIPtr, // per cvx-tri
ConvexTriIntermediateData** PX_RESTRICT cvxTriIntermPtr, // per cvx-tri
const PxU32** PX_RESTRICT orderedCvxTriIntermPtr, // per cvx-tri
PxU32** PX_RESTRICT cvxTriSecondPassPairsPtr, // per cvx-tri
PxU8* PX_RESTRICT stackPtr,
PxU32* PX_RESTRICT nbSecondPassPairs,
PxgShape* PX_RESTRICT shapes)
{
__shared__ uint4* sPairsGPU;
if (threadIdx.x == 0 && threadIdx.y == 0)
{
sPairsGPU = reinterpret_cast<uint4 *>(stackPtr);
}
__syncthreads();
const PxU32 nbTpProcess = *nbSecondPassPairs;
for (PxU32 globalThreadIdx = blockIdx.x * blockDim.x + threadIdx.x; globalThreadIdx < nbTpProcess; globalThreadIdx += gridDim.x * blockDim.x)
{
convexHeightfieldPostProcessCore(
cvxTrimeshPair,
cvxTriNIPtr,
cvxTriIntermPtr,
orderedCvxTriIntermPtr,
cvxTriSecondPassPairsPtr,
shapes,
sPairsGPU,
nbSecondPassPairs,
globalThreadIdx);
}
}
extern "C" __global__
//__launch_bounds__(NP_TRIMESH_WARPS_PER_BLOCK * WARP_SIZE, 32 / NP_TRIMESH_WARPS_PER_BLOCK)
void sortTriangleIndices(
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair, // per CM
PxU32** PX_RESTRICT orderedCvxTriIntermPtr, // per cvx-tri
const PxU32 nbPairs
)
{
const PxU32 globalWarpIndex = threadIdx.y + blockIdx.x * blockDim.y;
if (globalWarpIndex >= nbPairs)
return;
PxU32 nbTriangles = cvxTrimeshPair[globalWarpIndex].count;
PxU32 startIndex = cvxTrimeshPair[globalWarpIndex].roundedStartIndex;
//Terminate if we had 0 triangles or we cached them from last frame
if (nbTriangles == 0 || nbTriangles == 0xFFFFFFFF)
return;
assert((startIndex & 3) == 0);
const PxU32 nbUint4s = (nbTriangles + 3) / 4;
const PxU32 nbWarps = NP_TRIMESH_WARPS_PER_BLOCK;
radixSortSingleWarpKeysOnly<nbWarps>(reinterpret_cast<uint4*>(*orderedCvxTriIntermPtr + startIndex),
nbTriangles, nbUint4s, reinterpret_cast<uint4*>(*orderedCvxTriIntermPtr + startIndex) + nbUint4s, 8);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,219 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "foundation/PxVec3.h"
#include "foundation/PxMat34.h"
#include "foundation/PxTransform.h"
#include "foundation/PxAssert.h"
#include <stdio.h>
#include "cuda.h"
#include "cuda_runtime.h"
#include "convexFormat.h"
#include "cudaNpCommon.h"
#include "PxsTransformCache.h"
#include "PxsContactManagerState.h"
#include "PxgContactManager.h"
#include "PxgConvexConvexShape.h"
#include "PxgPersistentContactManifold.h"
#include "PxgNpKernelIndices.h"
#include "PxsMaterialCore.h"
#include "PxContact.h"
#include "geometry/PxGeometry.h"
#include "SparseRemove.cuh"
#include "sphereCollision.cuh"
#include "contactPatchUtils.cuh"
using namespace physx;
extern "C" __host__ void initNarrowphaseKernels9() {}
extern "C" __global__ void sphereNphase_Kernel(
PxU32 numTests,
const PxReal toleranceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
PxsContactManagerOutput* PX_RESTRICT cmOutputs,
PxgShape* PX_RESTRICT shapes,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxReal* contactDistance,
const PxsMaterialData* PX_RESTRICT materials,
PxU8* PX_RESTRICT contactStream,
PxU8* PX_RESTRICT patchStream,
PxgPatchAndContactCounters* PX_RESTRICT patchAndContactCounters,
PxU32* PX_RESTRICT touchChangeFlags,
PxU32* PX_RESTRICT patchChangeFlags,
PxU8* PX_RESTRICT startContactPatches,
PxU8* PX_RESTRICT startContactPoints,
PxU8* PX_RESTRICT startContactForces,
PxU32 patchBytesLimit,
PxU32 contactBytesLimit,
PxU32 forceBytesLimit)
{
PxU32 globalThreadIndex = blockIdx.x * blockDim.x + threadIdx.x;
if (globalThreadIndex >= numTests)
return;
PxgContactManagerInput contactInput = cmInputs[globalThreadIndex];
PxU32 transformCacheRef0 = contactInput.transformCacheRef0;
PxU32 transformCacheRef1 = contactInput.transformCacheRef1;
PxsCachedTransform transformCache0 = transformCache[transformCacheRef0];
PxsCachedTransform transformCache1 = transformCache[transformCacheRef1];
const PxReal cDistance = contactDistance[transformCacheRef0] + contactDistance[transformCacheRef1];
PxgShape& shape0 = shapes[contactInput.shapeRef0];
PxgShape& shape1 = shapes[contactInput.shapeRef1];
PxGeometryType::Enum type0 = PxGeometryType::Enum(shape0.type);
PxGeometryType::Enum type1 = PxGeometryType::Enum(shape1.type);
PxVec3 scale0 = shape0.scale.scale;
PxVec3 scale1 = shape1.scale.scale;
const bool flip = (type1<type0);
//shape0 should be always sphere
if (flip)
{
PxSwap(type0, type1);
PxSwap(scale0, scale1);
PxSwap(transformCache0, transformCache1);
//printf("type0 %i type1 %i\n", type0, type1);
}
PxTransform& transform0 = transformCache0.transform;
PxTransform& transform1 = transformCache1.transform;
const PxReal sphereRadius = scale0.x;
float4 pointPen[2];
PxVec3 normal;
PxU32 nbContacts = 0;
if (type0 == PxGeometryType::eSPHERE)
{
switch (type1)
{
case PxGeometryType::eSPHERE:
{
//printf("sphere/sphere \n");
const PxReal sphereRadius1 = scale1.x;
nbContacts = spheresphere(transform0, transform1, sphereRadius, sphereRadius1, cDistance, pointPen[0], normal);
break;
}
case PxGeometryType::ePLANE:
{
//printf("sphere/plane \n");
nbContacts = sphereplane(transform0, transform1, sphereRadius, cDistance, pointPen[0], normal);
break;
}
case PxGeometryType::eCAPSULE:
{
//printf("sphere/capsule \n");
const PxReal capsuleRadius = scale1.y;
const PxReal halfHeight = scale1.x;
nbContacts = spherecapsule(transform0, transform1, sphereRadius, capsuleRadius, halfHeight, cDistance, pointPen[0], normal);
break;
}
case PxGeometryType::eBOX:
{
//printf("sphere/box \n");
const PxVec3 boxHalfExtents = scale1;
nbContacts = spherebox(transform0, transform1, sphereRadius, boxHalfExtents, cDistance, pointPen[0], normal);
break;
}
default:
break;
};
}
else
{
assert(type1 == PxGeometryType::eCAPSULE);
const PxReal capsuleRadius1 = scale1.y;
const PxReal capsuleHalfHeight1 = scale1.x;
switch (type0)
{
case PxGeometryType::ePLANE:
{
//printf("sphere/plane \n");
nbContacts = planeCapsule(transform0, transform1, capsuleRadius1, capsuleHalfHeight1, cDistance, pointPen, normal);
break;
}
case PxGeometryType::eCAPSULE:
{
////printf("sphere/capsule \n");
const PxReal capsuleRadius0 = scale0.y;
const PxReal halfHeight0 = scale0.x;
nbContacts = capsuleCapsule(transform0, transform1, capsuleRadius0, halfHeight0,
capsuleRadius1, capsuleHalfHeight1, cDistance, pointPen, normal);
break;
}
default:
break;
};
}
if (nbContacts && flip)
normal = -normal;
PxU32 contactByteOffset = setContactPointAndForcePointers(cmOutputs, patchAndContactCounters,
startContactPoints, startContactForces, contactBytesLimit, forceBytesLimit, globalThreadIndex, nbContacts);
if (contactByteOffset != 0xFFFFFFFF)
{
float4* baseContactStream = (float4*)(contactStream + contactByteOffset);
for (PxU32 i = 0; i < nbContacts; ++i)
{
float4& outPointPen = baseContactStream[i];
outPointPen = pointPen[i];
}
}
PxU32 patchIndex = registerContactPatch(cmOutputs, patchAndContactCounters, touchChangeFlags, patchChangeFlags, startContactPatches, patchBytesLimit, globalThreadIndex, nbContacts);
insertIntoPatchStream(materials, patchStream, shape0, shape1, patchIndex, normal, nbContacts);
}

View File

@@ -0,0 +1,396 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __DATA_RW_HELPER_CUH__
#define __DATA_RW_HELPER_CUH__
#include "foundation/PxSimpleTypes.h"
#include "foundation/PxBounds3.h"
#include "PxgContactManager.h"
#include "AlignedTransform.h"
#include "PxsTransformCache.h"
#include "PxgConvexConvexShape.h"
#include "convexNpCommon.h"
#include "PxgCommonDefines.h"
#include "geometry/PxGeometry.h"
#include "utils.cuh"
#include "PxgSolverCoreDesc.h"
#include "PxgArticulationCoreDesc.h"
using namespace physx;
struct PxgVelocityPackPGS
{
PxVec3 linVel;
PxVec3 angVel;
};
struct PxgVelocityPackTGS
{
PxVec3 linVel;
PxVec3 angVel;
PxVec3 linDelta;
PxVec3 angDelta;
};
union PxgVelocityPack
{
PxgVelocityPackTGS tgs;
PxgVelocityPackPGS pgs;
PX_FORCE_INLINE __device__ PxgVelocityPack() {}
};
struct PxgVelocityReader
{
//Shared
const PxU32* const solverBodyIndices;
//Articulation
const float4* const PX_RESTRICT artiLinkVelocities;
const PxU32 maxLinks;
const PxU32 nbArticulations;
const PxU32 artiOffset;
//Rigid
const PxU32 numSolverBodies;
const float4* const PX_RESTRICT solverBodyDeltaVel;
const float4* const PX_RESTRICT initialVel;
template <typename IterativeData>
PX_FORCE_INLINE __device__ PxgVelocityReader(const PxgPrePrepDesc* preDesc, const PxgSolverCoreDesc* solverCoreDesc, const PxgArticulationCoreDesc* artiCoreDesc,
const PxgSolverSharedDesc<IterativeData>* sharedDesc, PxU32 numSolverBodies) :
solverBodyIndices(preDesc->solverBodyIndices),
artiLinkVelocities(solverCoreDesc->outArtiVelocity),
maxLinks(artiCoreDesc->mMaxLinksPerArticulation),
nbArticulations(artiCoreDesc->nbArticulations),
artiOffset(maxLinks * nbArticulations),
solverBodyDeltaVel(sharedDesc->iterativeData.solverBodyVelPool + solverCoreDesc->accumulatedBodyDeltaVOffset),
initialVel(solverCoreDesc->outSolverVelocity),
numSolverBodies(numSolverBodies)
{
}
template<typename Pack>
PX_FORCE_INLINE __device__ PxU32 readVelocitiesPGS(const PxNodeIndex rigidId, Pack& result)
{
const PxU32 solverBodyIdx = rigidId.isStaticBody() ? 0 : solverBodyIndices[rigidId.index()];
if (rigidId.isArticulation())
{
const PxU32 index = solverBodyIdx * maxLinks;
const float4* vels = &artiLinkVelocities[index];
const PxU32 linkID = rigidId.articulationLinkId();
result.linVel = PxLoad3(vels[linkID]);
result.angVel = PxLoad3(vels[linkID + artiOffset]);
}
else
{
result.linVel = PxLoad3(initialVel[solverBodyIdx]) + PxLoad3(solverBodyDeltaVel[solverBodyIdx]);
result.angVel = PxLoad3(initialVel[solverBodyIdx + numSolverBodies]) + PxLoad3(solverBodyDeltaVel[solverBodyIdx + numSolverBodies]);
}
return solverBodyIdx;
}
PX_FORCE_INLINE __device__ PxU32 readVelocitiesTGS(const PxNodeIndex rigidId, PxgVelocityPackTGS& result)
{
const PxU32 solverBodyIdx = rigidId.isStaticBody() ? 0 : solverBodyIndices[rigidId.index()];
if (rigidId.isArticulation())
{
const PxU32 index = solverBodyIdx * maxLinks;
const float4* vels = &artiLinkVelocities[index];
const PxU32 linkID = rigidId.articulationLinkId();
float4 linearVelocity = vels[linkID];
float4 angularVelocity = vels[linkID + artiOffset];
float4 linDelta = vels[linkID + artiOffset + artiOffset];
float4 angDelta = vels[linkID + artiOffset + artiOffset + artiOffset];
result.linVel = PxVec3(linearVelocity.x, linearVelocity.y, linearVelocity.z);
result.angVel = PxVec3(angularVelocity.x, angularVelocity.y, angularVelocity.z);
result.linDelta = PxVec3(linDelta.x, linDelta.y, linDelta.z);
result.angDelta = PxVec3(angDelta.x, angDelta.y, angDelta.z);
}
else
{
float4 vel0 = solverBodyDeltaVel[solverBodyIdx];
float4 vel1 = solverBodyDeltaVel[solverBodyIdx + numSolverBodies];
float4 vel2 = solverBodyDeltaVel[solverBodyIdx + numSolverBodies + numSolverBodies];
result.linVel = PxVec3(vel0.x, vel0.y, vel0.z);
result.angVel = PxVec3(vel0.w, vel1.x, vel1.y);
result.linDelta = PxVec3(vel1.z, vel1.w, vel2.x);
result.angDelta = PxVec3(vel2.y, vel2.z, vel2.w);
}
return solverBodyIdx;
}
};
__device__ inline static
void PxgContactManagerInput_ReadWarp(PxgContactManagerInput & res, const PxgContactManagerInput * ptr, PxU32 offset)
{
const PxU32 laneIdx = threadIdx.x & (WARP_SIZE - 1);
PxU32 input;
if (laneIdx < 4)
input = reinterpret_cast<const PxU32 *>(ptr + offset)[laneIdx];
res.shapeRef0 = __shfl_sync(FULL_MASK, (input), 0);
res.shapeRef1 = __shfl_sync(FULL_MASK, (input), 1);
res.transformCacheRef0 = __shfl_sync(FULL_MASK, (input), 2);
res.transformCacheRef1 = __shfl_sync(FULL_MASK, (input), 3);
}
__device__ inline static
void PxAlignedTransform_ReadWarp(PxAlignedTransform& res, const PxAlignedTransform* ptr)
{
const PxU32 laneIdx = threadIdx.x & (WARP_SIZE - 1);
PxReal input;
if (laneIdx < 8)
input = reinterpret_cast<const PxReal *>(ptr)[laneIdx];
res.q.q.x = __shfl_sync(FULL_MASK, input, 0);
res.q.q.y = __shfl_sync(FULL_MASK, input, 1);
res.q.q.z = __shfl_sync(FULL_MASK, input, 2);
res.q.q.w = __shfl_sync(FULL_MASK, input, 3);
res.p.x = __shfl_sync(FULL_MASK, input, 4);
res.p.y = __shfl_sync(FULL_MASK, input, 5);
res.p.z = __shfl_sync(FULL_MASK, input, 6);
res.p.w = __shfl_sync(FULL_MASK, input, 7);
}
__device__ inline static
void PxsCachedTransform_ReadWarp(PxsCachedTransform& res, const PxsCachedTransform* ptr)
{
const PxU32 laneIdx = threadIdx.x & (WARP_SIZE - 1);
PxReal input;
if (laneIdx < sizeof(PxsCachedTransform)/sizeof(PxReal))
input = reinterpret_cast<const PxReal *>(ptr)[laneIdx];
res.transform.q.x = __shfl_sync(FULL_MASK, input, 0);
res.transform.q.y = __shfl_sync(FULL_MASK, input, 1);
res.transform.q.z = __shfl_sync(FULL_MASK, input, 2);
res.transform.q.w = __shfl_sync(FULL_MASK, input, 3);
res.transform.p.x = __shfl_sync(FULL_MASK, input, 4);
res.transform.p.y = __shfl_sync(FULL_MASK, input, 5);
res.transform.p.z = __shfl_sync(FULL_MASK, input, 6);
res.flags = __shfl_sync(FULL_MASK, __float_as_int(input), 7);
}
__device__ inline static
void PxBounds3_ReadWarp(PxBounds3& res, const PxBounds3* ptr)
{
const PxU32 laneIdx = threadIdx.x & (WARP_SIZE - 1);
PxReal input;
if (laneIdx < sizeof(PxBounds3) /sizeof(PxReal))
input = reinterpret_cast<const PxReal *>(ptr)[laneIdx];
res.minimum.x = __shfl_sync(FULL_MASK, input, 0);
res.minimum.y = __shfl_sync(FULL_MASK, input, 1);
res.minimum.z = __shfl_sync(FULL_MASK, input, 2);
res.maximum.x = __shfl_sync(FULL_MASK, input, 3);
res.maximum.y = __shfl_sync(FULL_MASK, input, 4);
res.maximum.z = __shfl_sync(FULL_MASK, input, 5);
}
__device__ inline static
void PxsCachedTransform_WriteWarp(PxsCachedTransform* ptr, const PxsCachedTransform& inp)
{
const PxU32 laneIdx = threadIdx.x & (WARP_SIZE - 1);
if (laneIdx < 8)
{
uint input = reinterpret_cast<const uint *>(&inp)[laneIdx];
reinterpret_cast<uint *>(ptr)[laneIdx] = input;
}
}
__device__ inline static
void PxAlignedTransform_WriteWarp(PxAlignedTransform* ptr, const PxAlignedTransform& inp)
{
const PxU32 laneIdx = threadIdx.x & (WARP_SIZE - 1);
if (laneIdx < 8)
{
uint input = reinterpret_cast<const uint *>(&inp)[laneIdx];
reinterpret_cast<uint *>(ptr)[laneIdx] = input;
}
}
__device__ inline static
void PxgShape_ReadWarp(PxgShape& res, const PxgShape* ptr)
{
const PxU32 laneIdx = threadIdx.x & (WARP_SIZE - 1);
PxReal input;
if (laneIdx < 11 + (sizeof(size_t) == 8) * 1)
{
input = reinterpret_cast<const PxReal *>(ptr)[laneIdx];
}
res.scale.scale.x = __shfl_sync(FULL_MASK, input, 0);
res.scale.scale.y = __shfl_sync(FULL_MASK, input, 1);
res.scale.scale.z = __shfl_sync(FULL_MASK, input, 2);
res.scale.rotation.x = __shfl_sync(FULL_MASK, input, 3);
res.scale.rotation.y = __shfl_sync(FULL_MASK, input, 4);
res.scale.rotation.z = __shfl_sync(FULL_MASK, input, 5);
res.scale.rotation.w = __shfl_sync(FULL_MASK, input, 6);
res.materialIndex = __shfl_sync(FULL_MASK, __float_as_uint(input), 7);
size_t hullOrMeshPtr = __shfl_sync(FULL_MASK, __float_as_uint(input), 8);
if (sizeof(size_t) == 8)
{
const PxU64 hullPtr_hi = __shfl_sync(FULL_MASK, __float_as_uint(input), 9);
hullOrMeshPtr |= hullPtr_hi << 32ll;
}
res.hullOrMeshPtr = hullOrMeshPtr;
res.type = __shfl_sync(FULL_MASK, __float_as_uint(input), 9 + (sizeof(size_t) == 8) * 1);
res.particleOrSoftbodyId = __shfl_sync(FULL_MASK, __float_as_uint(input), 10 + (sizeof(size_t) == 8) * 1);
}
/*
* Loads a PxgShape pair and corresponding transform cache refs.
* Loads ExpectedType0 into shape0. It is assumed that one of the shapes is actually of ExpectedType0.
* The function asserts that the shape0 result actually is of type ExpectedType0 and that shape1 is of type
* ExpectedType1 if it was specified.
*/
template <PxGeometryType::Enum ExpectedType0, PxGeometryType::Enum ExpectedType1 = PxGeometryType::eINVALID>
PX_FORCE_INLINE __device__ void LoadShapePair(
const PxgContactManagerInput* PX_RESTRICT cmInputs,
PxU32 cmIdx,
const PxgShape* PX_RESTRICT gpuShapes,
PxgShape& shape0, //output
PxU32& transformCacheRef0, //output
PxgShape& shape1, //output
PxU32& transformCacheRef1 //output
)
{
PxgContactManagerInput cmInput = cmInputs[cmIdx];
const PxgShape* srcShape0 = gpuShapes + cmInput.shapeRef0;
const PxgShape* srcShape1 = gpuShapes + cmInput.shapeRef1;
PxU32 srcCacheRef0 = cmInput.transformCacheRef0;
PxU32 srcCacheRef1 = cmInput.transformCacheRef1;
if (ExpectedType0 != ExpectedType1)
{
PxGeometryType::Enum type0 = PxGeometryType::Enum(srcShape0->type);
if (type0 != ExpectedType0)
{
PxSwap(srcShape0, srcShape1);
PxSwap(srcCacheRef0, srcCacheRef1);
}
}
shape0 = *srcShape0;
shape1 = *srcShape1;
transformCacheRef0 = srcCacheRef0;
transformCacheRef1 = srcCacheRef1;
assert(shape0.type == ExpectedType0);
assert(ExpectedType1 == PxGeometryType::eINVALID || shape1.type == ExpectedType1);
}
/*
* Loads a PxgShape pair and corresponding transform cache refs. Full Warp needs to participate.
* Loads ExpectedType0 into shape0. It is assumed that one of the shapes is actually of ExpectedType0.
* The function asserts that the shape0 result actually is of type ExpectedType0 and that shape1 is of type
* ExpectedType1 if it was specified.
*/
template <PxGeometryType::Enum ExpectedType0, PxGeometryType::Enum ExpectedType1 = PxGeometryType::eINVALID>
PX_FORCE_INLINE __device__ void LoadShapePairWarp(
const PxgContactManagerInput* PX_RESTRICT cmInputs,
PxU32 cmIdx,
const PxgShape* PX_RESTRICT gpuShapes,
PxgShape& shape0, //output
PxU32& transformCacheRef0, //output
PxgShape& shape1, //output
PxU32& transformCacheRef1 //output
)
{
PxgContactManagerInput cmInput;
PxgContactManagerInput_ReadWarp(cmInput, cmInputs, cmIdx);
const PxgShape* srcShape0 = gpuShapes + cmInput.shapeRef0;
const PxgShape* srcShape1 = gpuShapes + cmInput.shapeRef1;
PxU32 srcCacheRef0 = cmInput.transformCacheRef0;
PxU32 srcCacheRef1 = cmInput.transformCacheRef1;
if (ExpectedType0 != ExpectedType1)
{
PxGeometryType::Enum type0 = PxGeometryType::Enum(srcShape0->type);
if (type0 != ExpectedType0)
{
PxSwap(srcShape0, srcShape1);
PxSwap(srcCacheRef0, srcCacheRef1);
}
}
PxgShape_ReadWarp(shape0, srcShape0);
PxgShape_ReadWarp(shape1, srcShape1);
transformCacheRef0 = srcCacheRef0;
transformCacheRef1 = srcCacheRef1;
assert(shape0.type == ExpectedType0);
assert(ExpectedType1 == PxGeometryType::eINVALID || shape1.type == ExpectedType1);
}
__device__ inline static
void ConvexTriNormalAndIndex_WriteWarp(ConvexTriNormalAndIndex* outp, const ConvexTriNormalAndIndex& inp)
{
const PxU32 laneIdx = threadIdx.x & (WARP_SIZE - 1);
if (laneIdx < 4)
{
uint input = reinterpret_cast<const uint *>(&inp)[laneIdx];
reinterpret_cast<uint *>(outp)[laneIdx] = input;
}
}
__device__ inline static
void ConvexMeshPair_WriteWarp(ConvexMeshPair* outp, const ConvexMeshPair& inp)
{
const PxU32 laneIdx = threadIdx.x & (WARP_SIZE - 1);
if (laneIdx < (sizeof(ConvexMeshPair)/sizeof(uint)))
{
uint input = reinterpret_cast<const uint *>(&inp)[laneIdx];
reinterpret_cast<uint *>(outp)[laneIdx] = input;
}
}
#endif

View File

@@ -0,0 +1,380 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __DEFORMABLE_COLLISION_CUH__
#define __DEFORMABLE_COLLISION_CUH__
#include "foundation/PxVec2.h"
#include "foundation/PxVecMath.h"
#include "GuDistancePointTriangle.h"
#include "PxgFEMCloth.h"
#define DEFORMABLE_BARYCENTRIC_THRESHOLD (1.0e-6f)
#define DEFORMABLE_ONE_MINUS_BARYCENTRIC_THRESHOLD (1.0f - DEFORMABLE_BARYCENTRIC_THRESHOLD)
PX_FORCE_INLINE __device__ PxBounds3 triBoundingBox(const PxVec3& worldV0, const PxVec3& worldV1, const PxVec3& worldV2)
{
PxBounds3 triangleBound;
PxReal tX0 = PxMin(worldV0.x, worldV1.x);
PxReal tY0 = PxMin(worldV0.y, worldV1.y);
PxReal tZ0 = PxMin(worldV0.z, worldV1.z);
triangleBound.minimum.x = PxMin(tX0, worldV2.x);
triangleBound.minimum.y = PxMin(tY0, worldV2.y);
triangleBound.minimum.z = PxMin(tZ0, worldV2.z);
// compute max
tX0 = PxMax(worldV0.x, worldV1.x);
tY0 = PxMax(worldV0.y, worldV1.y);
tZ0 = PxMax(worldV0.z, worldV1.z);
triangleBound.maximum.x = PxMax(tX0, worldV2.x);
triangleBound.maximum.y = PxMax(tY0, worldV2.y);
triangleBound.maximum.z = PxMax(tZ0, worldV2.z);
return triangleBound;
}
PX_FORCE_INLINE __device__ PxBounds3 tetBoundingBox(const PxVec3& worldV0, const PxVec3& worldV1, const PxVec3& worldV2, const PxVec3& worldV3)
{
PxBounds3 result;
PxReal tX0 = PxMin(worldV0.x, worldV1.x);
PxReal tY0 = PxMin(worldV0.y, worldV1.y);
PxReal tZ0 = PxMin(worldV0.z, worldV1.z);
PxReal tX1 = PxMin(worldV2.x, worldV3.x);
PxReal tY1 = PxMin(worldV2.y, worldV3.y);
PxReal tZ1 = PxMin(worldV2.z, worldV3.z);
result.minimum.x = PxMin(tX0, tX1);
result.minimum.y = PxMin(tY0, tY1);
result.minimum.z = PxMin(tZ0, tZ1);
// compute max
tX0 = PxMax(worldV0.x, worldV1.x);
tY0 = PxMax(worldV0.y, worldV1.y);
tZ0 = PxMax(worldV0.z, worldV1.z);
tX1 = PxMax(worldV2.x, worldV3.x);
tY1 = PxMax(worldV2.y, worldV3.y);
tZ1 = PxMax(worldV2.z, worldV3.z);
result.maximum.x = PxMax(tX0, tX1);
result.maximum.y = PxMax(tY0, tY1);
result.maximum.z = PxMax(tZ0, tZ1);
return result;
}
__device__ inline bool computeTetBarycentric(const PxVec3& a, const PxVec3& b, const PxVec3& c, const PxVec3& d, const PxVec3& point,
float4& barycentric, const PxReal eps = 1e-5f)
{
const PxReal minEps = -eps;
const PxReal maxEps = 1.f + eps;
const PxVec3 ba = b - a;
const PxVec3 ca = c - a;
const PxVec3 da = d - a;
const PxVec3 pa = point - a;
// PxMat33 bcd(ba, ca, da);
// const PxReal detBcd = bcd.getDeterminant();
const PxReal detBcd = ba.dot(ca.cross(da));
/*PxMat33 pcd(pa, ca, da);
const PxReal detPcd = pcd.getDeterminant();*/
const PxReal detPcd = pa.dot(ca.cross(da));
const PxReal v = detPcd / detBcd;
/*PxMat33 bpd(ba, pa, da);
const PxReal detBpd = bpd.getDeterminant();*/
const PxReal detBpd = ba.dot(pa.cross(da));
const PxReal w = detBpd / detBcd;
/*PxMat33 bcp(ba, ca, pa);
const PxReal detBcp = bcp.getDeterminant();*/
const PxReal detBcp = ba.dot(ca.cross(pa));
const PxReal x = detBcp / detBcd;
const PxReal u = 1.f - v - w - x;
barycentric = make_float4(u, v, w, x);
// After clamping the barycentrics might not sum up to 1 anymore
// Clamping should be done because otherwise slightly negative barycentrics can lead to a negative inverse mass, e. g.
// when 3 vertices of a tetrahedron have inverse mass zero and the forth vertex happens to get a negative barycentric coordinate
barycentric.x = PxClamp(barycentric.x, 0.0f, 1.0f);
barycentric.y = PxClamp(barycentric.y, 0.0f, 1.0f);
barycentric.z = PxClamp(barycentric.z, 0.0f, 1.0f);
barycentric.w = PxClamp(barycentric.w, 0.0f, 1.0f);
return (u >= minEps && u <= maxEps) && (v >= minEps && v <= maxEps) && (w >= minEps && w <= maxEps) && (x >= minEps && x <= maxEps);
}
__device__ inline bool computeTetBarycentric(PxVec3* verts, const PxVec3& point, float4& barycentric, const float eps = 1e-5f)
{
return computeTetBarycentric(verts[0], verts[1], verts[2], verts[3], point, barycentric, eps);
}
__device__ inline PxVec3 closestPtPointTriangle(const PxVec3& p, const PxVec3& a, const PxVec3& b, const PxVec3& c, float& s, float& t)
{
// Check if P in vertex region outside A
const PxVec3 ab = b - a;
const PxVec3 ac = c - a;
const PxVec3 ap = p - a;
const float d1 = ab.dot(ap);
const float d2 = ac.dot(ap);
if(d1 <= 0.0f && d2 <= 0.0f)
{
s = 0.0f;
t = 0.0f;
return a; // Barycentric coords 1,0,0
}
// Check if P in vertex region outside B
const PxVec3 bp = p - b;
const float d3 = ab.dot(bp);
const float d4 = ac.dot(bp);
if(d3 >= 0.0f && d4 <= d3)
{
s = 1.0f;
t = 0.0f;
return b; // Barycentric coords 0,1,0
}
// Check if P in edge region of AB, if so return projection of P onto AB
const float vc = d1 * d4 - d3 * d2;
if(vc <= 0.0f && d1 >= 0.0f && d3 <= 0.0f)
{
const float v = d1 / (d1 - d3);
s = v;
t = 0.0f;
return a + v * ab; // barycentric coords (1-v, v, 0)
}
// Check if P in vertex region outside C
const PxVec3 cp = p - c;
const float d5 = ab.dot(cp);
const float d6 = ac.dot(cp);
if(d6 >= 0.0f && d5 <= d6)
{
s = 0.0f;
t = 1.0f;
return c; // Barycentric coords 0,0,1
}
// Check if P in edge region of AC, if so return projection of P onto AC
const float vb = d5 * d2 - d1 * d6;
if(vb <= 0.0f && d2 >= 0.0f && d6 <= 0.0f)
{
const float w = d2 / (d2 - d6);
s = 0.0f;
t = w;
return a + w * ac; // barycentric coords (1-w, 0, w)
}
// Check if P in edge region of BC, if so return projection of P onto BC
const float va = d3 * d6 - d5 * d4;
if(va <= 0.0f && (d4 - d3) >= 0.0f && (d5 - d6) >= 0.0f)
{
const float w = (d4 - d3) / ((d4 - d3) + (d5 - d6));
s = 1.0f - w;
t = w;
return b + w * (c - b); // barycentric coords (0, 1-w, w)
}
// P inside face region. Compute Q through its barycentric coords (u,v,w)
const float denom = 1.0f / (va + vb + vc);
const float v = vb * denom;
const float w = vc * denom;
s = v;
t = w;
return a + ab * v + ac * w;
}
__device__ static PxReal distancePointTriangleSquared(const PxVec3 p, const PxVec3 a, const PxVec3 b, const PxVec3 c, PxVec3& closestP)
{
const PxVec3 ab = b - a;
const PxVec3 ac = c - a;
closestP = Gu::closestPtPointTriangle2(p, a, b, c, ab, ac);
return (p - closestP).magnitudeSquared();
}
__device__ static void closestPtPointEdge(const PxVec3& v0, const PxVec3& ev0, const PxVec3& ev1, PxReal& w)
{
const PxVec3 r = ev1 - ev0;
const PxReal d2 = r.dot(r);
w = (d2 > 0.0f) ? PxClamp(r.dot(v0 - ev0) / d2, 0.0f, 1.0f) : 0.5f;
}
// Computes the closest points on two infinite lines.
// Returns false if the edges are parallel or degenerate.
// Outputs:
// - s, t: parametric positions along edge0 and edge1 (can be negative for infinite lines)
// - distSq: squared distance between the two closest points
PX_FORCE_INLINE __device__ static bool closestPtLineLine(const PxVec3& e0v0, const PxVec3& e0v1, const PxVec3& e1v0, const PxVec3& e1v1,
PxReal& s, PxReal& t, PxReal& distSq)
{
const PxVec3 r0 = e0v1 - e0v0; // Edge0 direction
const PxVec3 r1 = e1v1 - e1v0; // Edge1 direction
const PxVec3 d = e1v0 - e0v0; // Displacement between starting points
const PxReal r0Len2 = r0.magnitudeSquared();
const PxReal r1Len2 = r1.magnitudeSquared();
// Handle degenerate edges: If either edge has zero length, return midpoints
if(r0Len2 < 1.0e-18f | r1Len2 < 1.0e-18f)
{
s = t = 0.0f;
distSq = d.magnitudeSquared();
return false;
}
const PxReal r0Len = PxSqrt(r0Len2);
const PxReal r1Len = PxSqrt(r1Len2);
const PxVec3 u0 = r0 / r0Len;
const PxVec3 u1 = r1 / r1Len;
const PxReal cosTheta = PxAbs(u0.dot(u1));
// Edges are nearly parallel (angle < 0.81).
if(cosTheta > 0.9999f)
{
// Pick point e0v0 on edge0 and project to edge1
PxReal tProj = (e0v0 - e1v0).dot(r1) / r1Len2;
t = tProj;
s = 0.0f;
PxVec3 pt0 = e0v0;
PxVec3 pt1 = e1v0 + r1 * tProj;
distSq = (pt1 - pt0).magnitudeSquared();
return false;
}
// PxReal a00 = 1.0f;
// PxReal a11 = 1.0f;
PxReal a01 = -u0.dot(u1);
PxReal b0 = u0.dot(d) / r0Len;
PxReal b1 = -u1.dot(d) / r1Len;
PxReal det = 1.0f - a01 * a01;
if(det < 1.0e-9f)
{
s = t = 0.0f;
distSq = d.magnitudeSquared();
return false;
}
// Compute actual closest points
const PxReal invDet = 1.0f / det;
s = (b0 - b1 * a01) * invDet;
t = (b1 - a01 * b0) * invDet;
const PxVec3 pt0 = e0v0 + s * r0;
const PxVec3 pt1 = e1v0 + t * r1;
distSq = (pt1 - pt0).magnitudeSquared();
return true;
}
// Computes the closest points on two edges (finite edges)
PX_FORCE_INLINE __device__ static bool closestPtEdgeEdge(const PxVec3& e0v0, const PxVec3& e0v1, const PxVec3& e1v0, const PxVec3& e1v1,
PxReal& s, PxReal& t, PxReal& distSq)
{
if(!closestPtLineLine(e0v0, e0v1, e1v0, e1v1, s, t, distSq))
{
return false; // Degenerate edges
}
// Falls back to point edge distance.
if(s<0.0f | s> 1.0f | t<0.0f | t> 1.0f)
{
PxReal w[4];
closestPtPointEdge(e0v0, e1v0, e1v1, w[0]);
closestPtPointEdge(e0v1, e1v0, e1v1, w[1]);
closestPtPointEdge(e1v0, e0v0, e0v1, w[2]);
closestPtPointEdge(e1v1, e0v0, e0v1, w[3]);
// Candidates for closest point selection
const PxReal candidates[4][2] = { { 0.0f, w[0] }, { 1.0f, w[1] }, { w[2], 0.0f }, { w[3], 1.0f } };
PxI32 minIndex = 0;
distSq = PX_MAX_REAL;
#pragma unroll
// Compare the four closest points and select the minimum distance pair.
for(int i = 0; i < 4; ++i)
{
const PxReal sC = candidates[i][0];
const PxReal tC = candidates[i][1];
const PxVec3 p0 = e0v0 * (1.0f - sC) + e0v1 * sC;
const PxVec3 p1 = e1v0 * (1.0f - tC) + e1v1 * tC;
const PxReal d2 = (p0 - p1).magnitudeSquared();
minIndex = (d2 < distSq) ? i : minIndex;
distSq = PxMin(d2, distSq);
}
// Assign the best closest point.
assert(minIndex != -1);
s = candidates[minIndex][0];
t = candidates[minIndex][1];
}
return true;
}
// Type0: minimal triangle set covering all edges and vertices (compact encoding)
// Type1: more uniformly distributed edges and vertices across triangles (balanced encoding)
PX_FORCE_INLINE __device__ static bool isType0EdgeActive(PxU32 edgeAuthorship, PxU32 localEdgeIndex)
{
return (edgeAuthorship & (1U << (EdgeEncoding::TYPE0_EDGE_BASE_POS + localEdgeIndex))) != 0;
}
PX_FORCE_INLINE __device__ static bool isType1EdgeActive(PxU32 edgeAuthorship, PxU32 localEdgeIndex)
{
return (edgeAuthorship & (1U << (EdgeEncoding::TYPE1_EDGE_BASE_POS + localEdgeIndex))) != 0;
}
PX_FORCE_INLINE __device__ static bool isType0VertexActive(PxU32 edgeAuthorship, PxU32 localVertexIndex)
{
return (edgeAuthorship & (1U << (EdgeEncoding::TYPE0_VERTEX0_ACTIVE_POS + localVertexIndex))) != 0;
}
PX_FORCE_INLINE __device__ static bool isType1VertexActive(PxU32 edgeAuthorship, PxU32 localVertexIndex)
{
return (edgeAuthorship & (1U << (EdgeEncoding::TYPE1_VERTEX0_ACTIVE_POS + localVertexIndex))) != 0;
}
#endif // __DEFORMABLE_COLLISION_CUH__

View File

@@ -0,0 +1,180 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __DEFORMABLE_ELEMENT_FILTER_CUH__
#define __DEFORMABLE_ELEMENT_FILTER_CUH__
#include "foundation/PxSimpleTypes.h"
#include "PxFiltering.h"
#include "PxgSimulationCoreDesc.h"
#include "PxgParticleSystem.h"
using namespace physx;
static __device__ bool find(PxParticleRigidFilterPair* pairs, const PxU32 nbPairs, PxParticleRigidFilterPair& pair)
{
//Binary search...
PxU32 l = 0, r = nbPairs;
while (l < r)
{
PxU32 mid = (l + r) / 2;
const PxParticleRigidFilterPair& p = pairs[mid];
if (pair == p)
return true;
else if (pair < p)
r = mid;
else
l = mid + 1;
}
return false;
}
static __device__ bool find(PxParticleRigidFilterPair* pairs, const PxU32 nbPairs, PxU64 rigidId, PxU32 particleId)
{
PxParticleRigidFilterPair pair;
pair.mID0 = rigidId;
pair.mID1 = particleId;
return find(pairs, nbPairs, pair);
}
static __device__ bool find(const PxgRigidFilterPair* pairs, const PxU32 nbPairs, PxgRigidFilterPair& pair, bool ignoreIndex2)
{
//Binary search...
PxU32 l = 0, r = nbPairs;
while (l < r)
{
PxU32 mid = (l + r) / 2;
const PxgRigidFilterPair& p = pairs[mid];
PxI32 cmp = pair.compare(p);
if (cmp == 0 || (ignoreIndex2 && pair.index0 == p.index0 && pair.index1 == p.index1))
return true;
if (cmp < 0)
r = mid;
else
l = mid + 1;
}
return false;
}
static __device__ bool find(const PxgRigidFilterPair* pairs, const PxU32 nbPairs, PxU64 rigidId, PxU32 compressedParticleId)
{
PxgRigidFilterPair pair;
pair.index0 = rigidId;
pair.index1 = compressedParticleId;
pair.index2 = 0;
return find(pairs, nbPairs, pair, true);
}
static __device__ PxU32 findRange(PxU32 value, PxU32* values, PxU32 length)
{
if (length == 0)
return 0;
PxU32 l = 0, r = length;
while (l < r)
{
PxU32 m = (l + r) / 2;
if (values[m] > value)
r = m;
else
l = m + 1;
}
return r-1;
}
static __device__ bool find(PxgParticleSystem& particleSystem, PxU64 rigidId, PxU64 compressedParticleId)
{
// this is the ID into the flat uber buffer
PxU32 particleId = PxGetParticleIndex(compressedParticleId);
PxU32 id = findRange(particleId, particleSystem.mParticleBufferRunsum, particleSystem.mCommonData.mNumParticleBuffers);
PxgParticleSimBuffer& particleBuffer = particleSystem.mParticleSimBuffers[id];
return find(particleBuffer.mFilterPairs, particleBuffer.mNumFilterPairs, rigidId, particleId - particleSystem.mParticleBufferRunsum[id]);
}
static __device__ bool find(const PxgNonRigidFilterPair* pairs, const PxU32 nbPairs, PxgNonRigidFilterPair& pair, bool ignoreIndex2)
{
//Binary search...
PxU32 l = 0, r = nbPairs;
while (l < r)
{
PxU32 mid = (l + r) / 2;
const PxgNonRigidFilterPair& p = pairs[mid];
PxI32 cmp = pair.compare(p);
if (cmp == 0 || (ignoreIndex2 && pair.index0 == p.index0 && pair.index1 == p.index1))
return true;
if (cmp < 0)
r = mid;
else
l = mid + 1;
}
return false;
}
static __device__ bool find(const PxgNonRigidFilterPair* pairs, const PxU32 nbPairs, const PxU32 compressedId0, const PxU32 compressedId1)
{
PxgNonRigidFilterPair pair;
pair.index0 = compressedId0;
pair.index1 = compressedId1;
pair.index2 = 0;
return find(pairs, nbPairs, pair, true);
}
static __device__ bool find(const PxgNonRigidFilterPair* pairs, const PxU32 nbPairs, const PxU32 compressedId0, const PxU32 compressedId1, PxU32 uniqueParticleUserBufferId)
{
PxgNonRigidFilterPair pair;
pair.index0 = compressedId0;
pair.index1 = compressedId1;
pair.index2 = uniqueParticleUserBufferId;
return find(pairs, nbPairs, pair, false);
}
static __device__ bool find(const PxgParticleSystem& particleSystem, const PxgNonRigidFilterPair* pairs, const PxU32 nbPairs, const PxU32 compressedParticleId, const PxU32 compressedId1)
{
PxU32 particleId = PxGetParticleIndex(compressedParticleId);
PxU32 id = findRange(particleId, particleSystem.mParticleBufferRunsum, particleSystem.mCommonData.mNumParticleBuffers);
PxgParticleSimBuffer& particleBuffer = particleSystem.mParticleSimBuffers[id];
return find(pairs, nbPairs, compressedParticleId, compressedId1, particleBuffer.mUniqueId);
}
#endif // __DEFORMABLE_ELEMENT_FILTER_CUH__

View File

@@ -0,0 +1,90 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __DIS_SEG_SEG_CUH__
#define __DIS_SEG_SEG_CUH__
__device__ __forceinline__ static
void distanceSegmentSegmentSquared(
const PxVec3& p1, const PxVec3& d1,
const PxVec3& p2, const PxVec3& d2,
PxReal& s, PxReal& t)
{
//const FloatV zero = FZero();
//const FloatV one = FOne();
//const FloatV eps = FEps();
const PxReal eps = 1e-6f;
const PxVec3 r = p1 - p2;
const PxReal a = d1.dot(d1);
const PxReal e = d2.dot(d2);
const PxReal b = d1.dot(d2);
const PxReal c = d1.dot(r);
const PxReal aRecip = a > eps ? (1.f / a) : 0.f;
const PxReal eRecip = e > eps ? (1.f / e) : 0.f;
const PxReal f = d2.dot(r);
/*
s = (b*f - c*e)/(a*e - b*b);
t = (a*f - b*c)/(a*e - b*b);
s = (b*t - c)/a;
t = (b*s + f)/e;
*/
//if segments not parallel, the general non-degenerated case, compute closest point on two segments and clamp to segment1
const PxReal denom = a * e - b * b;
const PxReal temp = b * f - c * e;
const PxReal s0 = PxClamp(temp / denom, 0.f, 1.f);
//if segment is parallel, demon < eps
//const bool con2 = eps > denom;
const PxReal sTmp = eps > denom ? 0.5f : s0;
//const BoolV con2 = FIsGrtr(eps, denom);//FIsEq(denom, zero);
//const FloatV sTmp = FSel(con2, FHalf(), s0);
//compute point on segment2 closest to segment1
const PxReal tTmp = (b * sTmp + f) * eRecip;
//const FloatV tTmp = FMul(FScaleAdd(b, sTmp, f), eRecip);
//if t is in [zero, one], done. otherwise clamp t
const PxReal t2 = PxClamp(tTmp, 0.f, 1.f);
//const FloatV t2 = FClamp(tTmp, zero, one);
//recompute s for the new value
const PxReal comp = (b * t2 - c) * aRecip;
const PxReal s2 = PxClamp(comp, 0.f, 1.f);
//const FloatV comp = FMul(FSub(FMul(b, t2), c), aRecip);
//const FloatV s2 = FClamp(comp, zero, one);
s = s2;
t = t2;
}
#endif

View File

@@ -0,0 +1,530 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __EPA_CUH__
#define __EPA_CUH__
#include <assert.h>
#include "nputils.cuh"
#include "schlockShared.h"
namespace squawk
{
using namespace physx;
struct EpaScratch
{
PxVec3 vertices[schlock::EPA_MAX_VERTS];
PxVec3 bestDir;
PxReal bestDist;
PxU16 indices[schlock::EPA_MAX_VERTS];
PxU8 loop[schlock::EPA_MAX_VERTS];
PxU8 edges[schlock::EPA_MAX_VERTS];
PxU8 next[schlock::EPA_MAX_VERTS];
};
__constant__ __device__ schlock::Vec3I8 sStartVertices4[4] = { MAKE_Vec3I8(0,1,2), MAKE_Vec3I8(3,1,0), MAKE_Vec3I8(3,2,1), MAKE_Vec3I8(3,0,2) };
__constant__ __device__ schlock::Vec3I8 sStartAdjacencies4[4] = { MAKE_Vec3I8(2,3,1), MAKE_Vec3I8(0,3,2), MAKE_Vec3I8(0,1,3), MAKE_Vec3I8(0,2,1) };
__constant__ __device__ schlock::Vec3I8 sStartVertices3[2] = { MAKE_Vec3I8(0,1,2), MAKE_Vec3I8(1,0,2) };
__constant__ __device__ schlock::Vec3I8 sStartAdjacency3[2] = { MAKE_Vec3I8(1,1,1), MAKE_Vec3I8(0,0,0) };
typedef int VMapType;
__device__ inline bool replaceTriangle(EpaScratch& epaS, int nbVertices, int triIndex,
PxReal lowerBound, PxVec3& normal, PxReal& distance,
schlock::Vec3I8& v, schlock::Vec3I8& a, int& liveB, const PxReal releps)
{
// to make it easier to keep track, we use a 'B' suffix for a bitmap where each bit represents a lane (and therefore a triangle)
int tI = threadIdx.x, laneB = 1<<tI;
//PxVec3 apex = ldS(S.vertices[nbVertices-1]);
PxVec3 apex = epaS.vertices[nbVertices - 1];
// triangles that are silhouetted by the new vertex are threatened, but not yet killed
bool threatened = liveB&laneB && (normal.dot(apex) - distance >= /*-*/releps);
schlock::EPABitMap threatenedB = __ballot_sync(FULL_MASK, threatened);
if (threatenedB == 0 || threatenedB == liveB || !(threatenedB & (1<<triIndex)))
{
#if EPA_LOG_DEGENERATE_CASES
if(threadIdx.x == 0)
printf("degenrate: threatenedB %d liveB %d < tri %d \n", threatenedB, liveB, tri);
#endif
return false;
}
// the threatened set is not guaranteed connected, so flood fill to find killB, a bitmap of tris to remove
schlock::EPABitMap b0B = 1<<a.get(0), b1B = 1<<a.get(1), b2B = 1<<a.get(2);
// nbr = threatened neighbors of threatened triangles
schlock::EPABitMap killB = 1<<triIndex, nbr = (b0B | b1B | b2B) & (threatened?threatenedB:0);
// flood the threatened triangles to find the killed ones
//for(schlock::EPABitMap pB = 0; pB != killB; pB = killB, killB |= __ballot(nbr&killB));
for (schlock::EPABitMap pB = 0; pB != killB;)
{
pB = killB;
killB |= __ballot_sync(FULL_MASK, nbr&killB);
}
//every triangles aren't alive and the triangles we want to kill in this iteration
schlock::EPABitMap deadB = (~liveB) | killB;
// d0, d1, d2 denote for each triangle whether the corresponding edge is a silhouette edge
const int alive = laneB&(~deadB);
const int d0 = alive && (b0B&deadB);
const int d1 = alive && (b1B&deadB);
const int d2 = alive && (b2B&deadB);
// write the silhouette edges into the loop buffer
int v0 = v.get(0), v1 = v.get(1), v2 = v.get(2), z = 0;
assert(!alive || v0 < schlock::EPA_MAX_VERTS);
assert(!alive || v1 < schlock::EPA_MAX_VERTS);
assert(!alive || v2 < schlock::EPA_MAX_VERTS);
// S.edges is the adjacency for the new cone tri on this edge, and
// also where the cone triangle will write back its index for later fixup
if (d0)
z = v2, epaS.next[v2] = v1, epaS.edges[v2] = tI;
if (d1)
z = v0, epaS.next[v0] = v2, epaS.edges[v0] = tI;
if (d2)
z = v1, epaS.next[v1] = v0, epaS.edges[v1] = tI;
int d0d1d2B = __ballot_sync(FULL_MASK, d0|d1|d2);
if (!d0d1d2B)
{
#if EPA_LOG_DEGENERATE_CASES
if (threadIdx.x == 0)
printf("degenrate: d0d1d2B %d b0B %d b1B %d b2B %d deadB %d \n", d0d1d2B, b0B, b1B, b2B, deadB);
#endif
return false;
}
int loopStart = __shfl_sync(FULL_MASK, z, lowestSetIndex(d0d1d2B)); // pick a loop vert from the lowest indexed triangle that has one
int nbLoop = 0;
__syncwarp();
if(tI == 0)
{
assert(loopStart < schlock::EPA_MAX_VERTS);
epaS.loop[nbLoop++] = loopStart; // flatten the linked list into a vertex array
for(int vertex = epaS.next[loopStart]; vertex!=loopStart; vertex = epaS.next[vertex])
{
if(vertex >= schlock::EPA_MAX_VERTS)
{
#if EPA_LOG_DEGENERATE_CASES
printf("degenrate: vertex %d\n", vertex);
#endif
nbLoop = -1;
break;
}
if(nbLoop >= schlock::EPA_MAX_VERTS - 1)
{
#if EPA_LOG_DEGENERATE_CASES
printf("degenrate: nbLoop %d\n", nbLoop);
#endif
nbLoop = -1;
break;
}
epaS.loop[nbLoop++] = vertex;
}
#if EPA_REPLACE_TRIANGLE_LOGGING
for(int i=0;i<nbLoop;i++)
printf("%d, ", S.loop[i]);
printf("(starting at %d)\n",loopStart);
#endif
}
nbLoop = __shfl_sync(FULL_MASK, nbLoop, 0);
if(nbLoop != __popc(__ballot_sync(FULL_MASK, d0)) + __popc(__ballot_sync(FULL_MASK, d1)) + __popc(__ballot_sync(FULL_MASK, d2)))
{
#if EPA_LOG_DEGENERATE_CASES
{
printf("nbLoop %d __ballot(d0) %d __ballot(d1) %d __ballot(d2) %d tid %d", nbLoop, __ballot(d0), __ballot(d1), __ballot(d2), threadIdx.x);
}
#endif
return false;
}
__syncwarp();
// make the cone with the nbLoop least non-survivors
int coneIndex = __popc(deadB&(laneB-1)), coneB = __ballot_sync(FULL_MASK, deadB&laneB && coneIndex < nbLoop);
bool fail = false;
if(coneB & laneB)
{
assert(coneIndex < nbLoop);
assert(nbLoop < schlock::EPA_MAX_VERTS);
int v0 = nbVertices-1, v1 = epaS.loop[coneIndex], v2 = epaS.loop[coneIndex+1 == nbLoop ? 0 : coneIndex+1];
v.setInt(MAKE_Vec3I8(v0, v1, v2));
assert(v0 < schlock::EPA_MAX_VERTS);
assert(v1 < schlock::EPA_MAX_VERTS);
assert(v2 < schlock::EPA_MAX_VERTS);
a.set(0, epaS.edges[v1]);
a.set(1, lowestSetIndex(coneB & ~(coneIndex+1 == nbLoop ? 0 : 2*laneB-1))); // previous bit in the active mask, wrapping around
a.set(2, highestSetIndex(coneB & ((coneIndex==0 ? 0 : laneB)-1))); // next bit in the active mask, wrapping around
epaS.edges[v1] = tI; // write our index back into the buffer for later fixup
//PxVec3 p0 = ldS(S.vertices[v0]), p1 = ldS(S.vertices[v1]), p2 = ldS(S.vertices[v2]);
PxVec3 p0 = epaS.vertices[v0], p1 = epaS.vertices[v1], p2 = epaS.vertices[v2];
PxVec3 e01 = p1 - p0, e02 = p2 - p0;
PxVec3 n;
if (e01.magnitudeSquared() < e02.magnitudeSquared())
{
n = e01.cross(p2 - p1);
}
else
{
n = e02.cross(p2 - p1);
}
PxReal nm = n.magnitude();
normal = n/nm;
distance = normal.dot(p0);
/*fail = nm == 0 || (lowerBound > 0.0f && distance < lowerBound);
if (fail)
{
printf("fail nm %f\ lowerBound %f distance %f \n", nm, lowerBound, distance);
}*/
fail = nm == 0 || (lowerBound > 0.0f && (distance - lowerBound) < -1e-4f);
}
__syncwarp();
// ... to fix up the silhouette adjacency with the new tris
if(d0) a.set(0, epaS.edges[v.get(2)]);
if(d1) a.set(1, epaS.edges[v.get(0)]);
if(d2) a.set(2, epaS.edges[v.get(1)]);
liveB = (liveB & ~deadB) | coneB;
return __ballot_sync(FULL_MASK, fail) == 0;
}
__device__ inline void epaStart4(EpaScratch& S,
const schlock::GjkCachedData* data,
schlock::Vec3I8& a, schlock::Vec3I8& v,
PxVec3& normal,
PxReal& distance)
{
int tI = threadIdx.x;
PxReal input = reinterpret_cast<const PxReal*>(data)[tI];
if(tI<16)
{
if((tI&3)<3)
(reinterpret_cast<volatile PxReal*>(S.vertices))[tI-tI/4] = input;
else
S.indices[tI>>2] = __float_as_int(input);
}
PxReal d = dot(input, input), m = __shfl_sync(FULL_MASK, 1/sqrt(d), tI*4+16);
normal.x = __shfl_sync(FULL_MASK, input, tI*4+16) * m;
normal.y = __shfl_sync(FULL_MASK, input, tI*4+17) * m;
normal.z = __shfl_sync(FULL_MASK, input, tI*4+18) * m;
distance = __shfl_sync(FULL_MASK, input, tI*4+19) * m;
if(tI<4)
a = sStartAdjacencies4[tI], v = sStartVertices4[tI];
}
__device__ inline PxU8 supportIndex(const PxVec3* v, PxU8 nbVertices, const PxVec3& dir)
{
int bestN = 0xFFFFFFFF;
PxReal bestDist = -FLT_MAX;
for(PxU32 i=0;i<nbVertices;i+=32)
{
int n = i+threadIdx.x;
PxReal d = n < nbVertices ? dir.dot(v[n]) : -FLT_MAX, e;
e = fmaxf(d, __shfl_xor_sync(FULL_MASK, d, 16));
e = fmaxf(e, __shfl_xor_sync(FULL_MASK, e, 8));
e = fmaxf(e, __shfl_xor_sync(FULL_MASK, e, 4));
e = fmaxf(e, __shfl_xor_sync(FULL_MASK, e, 2));
e = fmaxf(e, __shfl_xor_sync(FULL_MASK, e, 1));
if(e>bestDist)
bestDist = e, bestN = i+__ffs(__ballot_sync(FULL_MASK, e == d))-1;
}
assert(bestN < nbVertices);
return PxU8(bestN);
}
// find a new support point in either the positive or negative direction furthest from the reference point
// we could be a bit more efficient here but this is a super-rare case
__device__ inline void newPoint(const PxVec3* vA, PxU32 nbA, const PxVec3* vB, PxU32 nbB, const PxVec3& dir, const PxVec3& ref, volatile PxVec3& vertex, volatile PxU16& indices)
{
PxU8 indexAp = supportIndex(vA, nbA, dir), indexBp = supportIndex(vB, nbB, -dir); // support point in +ve dir
PxU8 indexAn = supportIndex(vA, nbA, -dir), indexBn = supportIndex(vB, nbB, dir); // support point in +ve dir
PxVec3 vp = vA[indexAp] - vB[indexBp], vn = vA[indexAn] - vB[indexBn];
if(threadIdx.x == 0)
{
if(PxAbs((vp-ref).dot(dir)) > PxAbs((vn-ref).dot(dir)))
{
stS(vertex, vp);
indices = indexBp<<8 | indexAp;
}
else
{
stS(vertex, vn);
indices = indexBn<<8 | indexAn;
}
}
}
// find a new support point in either the positive or negative direction furthest from the reference point
// we could be a bit more efficient here but this is a super-rare case
__device__ inline void epaStart123(volatile EpaScratch& S,
const schlock::GjkCachedData* data,
const PxVec3* vA, PxU32 nbA,
const PxVec3* vB, PxU32 nbB,
schlock::Vec3I8& a, schlock::Vec3I8& v,
PxVec3& normal,
PxReal& distance,
PxU32 warpMask)
{
int tI = threadIdx.x;
PxReal input = reinterpret_cast<const PxReal*>(data)[tI];
if(tI<16)
{
if((tI&3)<3)
(reinterpret_cast<volatile PxReal*>(S.vertices))[tI-tI/4] = input;
else
S.indices[tI>>2] = __float_as_int(input);
}
__syncwarp(warpMask); //S.vertices is written above and read below
PxVec3 vertex0 = ldS(S.vertices[0]);
if(data->size<2)
newPoint(vA, nbA, vB, nbB, PxVec3(1,0,0), ldS(S.vertices[0]), S.vertices[1], S.indices[1]);
__syncwarp(warpMask); //S.vertices is written (inside newPoint) above and read below
PxVec3 vertex1 = ldS(S.vertices[1]);
if(data->size<3)
{
PxVec3 dir = vertex1 - vertex0, s = (PxAbs(dir.x) < PxAbs(dir.y) ? PxVec3(1,0,0) : PxVec3(0,1,0)).cross(dir);
newPoint(vA, nbA, vB, nbB, s, vertex0, S.vertices[2], S.indices[2]);
}
__syncwarp(warpMask); //S.vertices is written (inside newPoint) above and read below
PxVec3 vertex2 = ldS(S.vertices[2]);
PxVec3 e01 = vertex1 - vertex0, e12 = vertex2 - vertex1, e02 = vertex2 - vertex0;
if (e01.magnitude()<=e02.magnitude() )
{
normal = e01.cross(e12).getNormalized();
}
else
{
normal = e02.cross(e12).getNormalized();
}
distance = normal.dot(vertex0);
if(tI < 2)
v = sStartVertices3[tI], a = sStartAdjacency3[tI];
if(tI==1)
normal = -normal, distance = -distance;
}
__device__ inline schlock::GjkResult::Enum epa(EpaScratch& epaS,
const PxVec3* vA, PxU32 nbA,
const PxVec3* vB, PxU32 nbB,
const schlock::GjkCachedData* input,
PxReal convergenceRatio,
PxReal relEpsBase,
schlock::GjkOutput& output)
{
PxReal releps = relEpsBase * (1-convergenceRatio);
schlock::Vec3I8 a, v; // per-triangle adjancencies and vertex indices
PxVec3 normal; // per-triangle normal
PxReal distance; // per-triangle distance
schlock::Vec3I8 bestVerts;
PxReal lowerBound = -PX_MAX_F32, upperBound = PX_MAX_F32;
int nbVertices, liveTriangles;
bool b = input->size == 4;
PxU32 warpMask = __ballot_sync(FULL_MASK, !b);
if(b)
{
epaStart4(epaS, input, a, v, normal, distance);
nbVertices = 4;
//This is a tetrahedron so there are 4 triangles, hence liveTriangles = 0xf (1111)
liveTriangles = 0xf;
}
else
{
epaStart123(epaS, input, vA, nbA, vB, nbB, a, v, normal, distance, warpMask);
nbVertices = 3;
//This is a double-sided triangle so there are 2 triangles, hence liveTriangles = 0x3 (11)
liveTriangles = 0x3;
}
__syncwarp();
// TODO: check here for vertex count is slighly conservative
bool nondegenerate = true;
while (nondegenerate && nbVertices < schlock::EPA_MAX_VERTS)
{
PxReal t = liveTriangles & 1 << threadIdx.x ? distance : FLT_MAX, e;
//get the shortest distance triangle
e = fminf(t, __shfl_xor_sync(FULL_MASK, t, 16));
e = fminf(e, __shfl_xor_sync(FULL_MASK, e, 8));
e = fminf(e, __shfl_xor_sync(FULL_MASK, e, 4));
e = fminf(e, __shfl_xor_sync(FULL_MASK, e, 2));
e = fminf(e, __shfl_xor_sync(FULL_MASK, e, 1));
//try to find out which triangle index has the shortest distance
int f = __ballot_sync(FULL_MASK, e == t);
assert(f); //Trigger assert if e == t failed on all lanes (indicates a nan)
int triIndex = lowestSetIndex(f);
//get the normal from the shortest distance triangle
PxVec3 d(__shfl_sync(FULL_MASK, normal.x, triIndex), __shfl_sync(FULL_MASK, normal.y, triIndex), __shfl_sync(FULL_MASK, normal.z, triIndex));
//get the support vertex
PxU8 indexA = squawk::supportIndex(vA, nbA, d), indexB = squawk::supportIndex(vB, nbB, -d);
PxVec3 supportVertex = vA[indexA] - vB[indexB];
//calculate the distance from the origin to the support vertex
PxReal thisFrameUb = supportVertex.dot(d);
//found the MTD, epa terminate
if (abs(thisFrameUb - e) <= releps)
{
upperBound = thisFrameUb, lowerBound = e;
bestVerts.setInt(__shfl_sync(FULL_MASK, v.getInt(), triIndex));
if (threadIdx.x == 0)
epaS.bestDir = d;
break;
}
// all remaining tris are worse than the best candidate, epa terminate
if (e > upperBound)
break;
PxReal ub = PxMin(upperBound, thisFrameUb);
if (ub < upperBound)
{
upperBound = ub, lowerBound = e;
bestVerts.setInt(__shfl_sync(FULL_MASK, v.getInt(), triIndex));
if (threadIdx.x == 0)
epaS.bestDir = d;
}
#if EPA_LOGGING
if (threadIdx.x == 0)
printf("[%f, %f], (%f, %f, %f)\n", lowerBound, upperBound, supportVertex.x, supportVertex.y, supportVertex.z);
#endif
// save off the new vertex
if (threadIdx.x == 0)
{
epaS.vertices[nbVertices] = supportVertex;
epaS.indices[nbVertices] = PxU16(indexB) << 8 | PxU16(indexA);
}
__syncwarp();
nondegenerate = replaceTriangle(epaS, ++nbVertices, triIndex, e, normal, distance, v, a, liveTriangles, releps);
}
#if EPA_LOGGING
if(!nondegenerate)
printf("degenerate\n");
#endif
output.degenerate = !nondegenerate;
__syncwarp();
schlock::GjkResult::Enum result;
PxVec3 bestDir = epaS.bestDir;
PxVec3 closestPoint = bestDir * lowerBound;
if(threadIdx.x == 0)
{
output.direction = bestDir;
output.lowerBound = lowerBound;
output.upperBound = upperBound;
int v0 = bestVerts.get(0), v1 = bestVerts.get(1), v2 = bestVerts.get(2);
PxReal u, v, w;
schlock::barycentrics(epaS.vertices[v0], epaS.vertices[v1], epaS.vertices[v2], u, v, w, closestPoint);
PxVec3 cpA = vA[schlock::aIndex(epaS.indices[v0])] * u + vA[schlock::aIndex(epaS.indices[v1])] * v + vA[schlock::aIndex(epaS.indices[v2])] * w;
output.closestPointA = cpA;
output.closestPointB = cpA - closestPoint;
}
result = lowerBound < 0 ? schlock::GjkResult::eCLOSE : schlock::GjkResult::eOVERLAP;
return result;
}
}
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,397 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "foundation/PxBounds3.h"
#include "foundation/PxSimpleTypes.h"
#include "foundation/PxTransform.h"
#include "foundation/PxVec3.h"
#include "geometry/PxGeometry.h"
#include "geometry/PxHeightFieldSample.h"
#include "PxgContactManager.h"
#include "PxgConvexConvexShape.h"
#include "PxgFEMCloth.h"
#include "PxgNpKernelIndices.h"
#include "PxgSoftBodyCoreKernelIndices.h"
#include "PxsTransformCache.h"
#include <vector_types.h>
#include "PxgCommonDefines.h"
#include "dataReadWriteHelper.cuh"
#include "heightfieldUtil.cuh"
#include "utils.cuh"
#include "reduction.cuh"
#include "deformableCollision.cuh"
#include "assert.h"
using namespace physx;
extern "C" __host__ void initNarrowphaseKernels20() {}
struct clothHeightfieldScratch
{
/*const float4 * PX_RESTRICT trimeshVerts;
const uint4 * PX_RESTRICT trimeshTriVertIndices;
PxU32 numTriangles;*/
PxTransform heightfieldTransform;
PxgShape heightfieldShape;
}; PX_COMPILE_TIME_ASSERT(sizeof(clothHeightfieldScratch) <= 1024);
__device__ static inline void clothHeightfieldMidphaseCore(
clothHeightfieldScratch* scratch,
const PxU32 cmIdx,
const PxBounds3 triangleBound,
const PxU32 triangleIdx,
const PxU32 workIndex,
const PxU32 numTriangles,
PxU8* PX_RESTRICT stackPtr, //output
PxU32* PX_RESTRICT midphasePairsNum, //output
const PxU32 stackSize
)
{
PxU32 nbPairs = 0;
PxBounds3 localBound;
PxU32 nbRows = 0;
PxU32 nbCols = 0;
PxHeightFieldSample* samples = NULL;
PxU32 minRow = 0;
PxU32 maxRow = 0;
PxU32 minColumn = 0;
PxU32 maxColumn = 0;
if (triangleIdx < numTriangles)
{
PxTransform heightfieldTransform = scratch->heightfieldTransform;
PxgShape heightfieldShape = scratch->heightfieldShape;
const PxReal oneOverHeightScale = 1.f / heightfieldShape.scale.scale.y;
const PxReal oneOverRowScale = 1.f / PxAbs(heightfieldShape.scale.scale.x);
const PxReal oneOverlColScale = 1.f / PxAbs(heightfieldShape.scale.scale.z);
//bound is in world space, we need to transform the bound to the local space of height field
localBound = PxBounds3::transformFast(heightfieldTransform.getInverse(), triangleBound);
localBound.minimum.x *= oneOverRowScale;
localBound.minimum.y *= oneOverHeightScale;
localBound.minimum.z *= oneOverlColScale;
localBound.maximum.x *= oneOverRowScale;
localBound.maximum.y *= oneOverHeightScale;
localBound.maximum.z *= oneOverlColScale;
//row scale
if (heightfieldShape.scale.scale.x < 0.f)
{
//swap min and max row scale
const PxReal temp = localBound.minimum.x;
localBound.minimum.x = localBound.maximum.x;
localBound.maximum.x = temp;
}
//col scale
if (heightfieldShape.scale.scale.z < 0.f)
{
PxReal swap = localBound.minimum.z;
localBound.minimum.z = localBound.maximum.z;
localBound.maximum.z = swap;
}
PxU32* heightfieldData = reinterpret_cast<PxU32*>(heightfieldShape.hullOrMeshPtr);
nbRows = heightfieldData[0];
nbCols = heightfieldData[1];
samples = reinterpret_cast<PxHeightFieldSample*>(&heightfieldData[2]);
if (!(localBound.minimum.x > nbRows - 1) || (localBound.minimum.z > nbCols - 1)
|| (localBound.maximum.x < 0) || (localBound.maximum.z < 0))
{
minRow = getMinRow(localBound.minimum.x, nbRows);
maxRow = getMaxRow(localBound.maximum.x, nbRows);
minColumn = getMinColumn(localBound.minimum.z, nbCols);
maxColumn = getMaxColumn(localBound.maximum.z, nbCols);
if ((2 * (maxColumn - minColumn) * (maxRow - minRow)) > 0)
{
nbPairs = heightfieldComputePairs(minColumn, maxColumn, minRow, maxRow, nbRows, nbCols,
samples, localBound.minimum.y, localBound.maximum.y);
}
}
}
PxU32 pairOffset = warpScan<AddOpPxU32, PxU32>(FULL_MASK, nbPairs) - nbPairs;
PxU32 validCount = 0;
if (threadIdx.x == (WARP_SIZE - 1))
{
validCount = pairOffset + nbPairs;
}
validCount = __shfl_sync(FULL_MASK, validCount, WARP_SIZE - 1);
PxU32 startIndex = 0xffffffff;
if (threadIdx.x == 0 && validCount > 0)
{
startIndex = atomicAdd(midphasePairsNum, validCount);
}
startIndex = __shfl_sync(FULL_MASK, startIndex, 0);
uint4* tStackPtr = reinterpret_cast<uint4*>(stackPtr);
if (nbPairs != 0)
{
heightfieldOutputPairs(minColumn, maxColumn, minRow, maxRow, nbRows, nbCols,
samples, localBound.minimum.y, localBound.maximum.y, cmIdx, triangleIdx, startIndex + pairOffset, stackSize, tStackPtr);
}
}
extern "C" __global__ void cloth_heightfieldMidphaseGeneratePairsLaunch(
const PxU32 numNPWorkItems,
const PxReal toleranceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxReal* PX_RESTRICT contactDistance,
const PxgShape* PX_RESTRICT gpuShapes,
PxgFEMCloth* PX_RESTRICT clothes,
PxU8* stackPtr, //output
PxU32* midphasePairsNum, //output
const PxU32 stackSizeBytes
)
{
__shared__ PxU32 scratchMem[1024];
clothHeightfieldScratch* s_warpScratch = reinterpret_cast<clothHeightfieldScratch*>(scratchMem);
const PxU32 nbWarpsPerCm = 64; // 64 warp(2048 threads) to deal with one pair
const PxU32 nbWarpsRequired = nbWarpsPerCm * numNPWorkItems;
const PxU32 nbThreadsPerCm = 64 * WARP_SIZE;
const PxU32 numWarpPerBlock = MIDPHASE_WARPS_PER_BLOCK;
const PxU32 totalNumWarps = PxgSoftBodyKernelGridDim::SB_SBMIDPHASE * numWarpPerBlock;
const PxU32 warpIndex = threadIdx.y;
const PxU32 nbIterationsPerWarps = (nbWarpsRequired + totalNumWarps - 1) / totalNumWarps;
/*if (warpIndex == 0 && threadIdx.x == 0 && blockIdx.x == 0)
{
printf("nbIterationsPerWarps %i nbWarpsRequired %i\n", nbIterationsPerWarps, nbWarpsRequired);
}*/
for (PxU32 i = 0; i < nbIterationsPerWarps; ++i)
{
const PxU32 workIndex = i + (warpIndex + numWarpPerBlock * blockIdx.x) * nbIterationsPerWarps;
if (workIndex < nbWarpsRequired)
{
unsigned int cmIdx = workIndex / nbWarpsPerCm;
PxgShape clothShape, heightfieldShape;
PxU32 clothCacheRef, heightfieldCacheRef;
LoadShapePair<PxGeometryType::eTRIANGLEMESH, PxGeometryType::eHEIGHTFIELD>(cmInputs, cmIdx, gpuShapes,
clothShape, clothCacheRef, heightfieldShape, heightfieldCacheRef);
PxsCachedTransform heightfieldTransformCache;
PxsCachedTransform_ReadWarp(heightfieldTransformCache, transformCache + heightfieldCacheRef);
const PxgFEMCloth& cloth = clothes[clothShape.particleOrSoftbodyId];
if (threadIdx.x == 0)
{
s_warpScratch->heightfieldTransform = heightfieldTransformCache.transform;
s_warpScratch->heightfieldShape = heightfieldShape;
}
__syncwarp();
const PxReal contactDist = contactDistance[clothCacheRef] + contactDistance[heightfieldCacheRef];
const PxU32 nbTriangles = cloth.mNbTriangles;// s_warpScratch->numTriangles;
const PxU32 nbIter = (nbTriangles + nbThreadsPerCm - 1) / nbThreadsPerCm;
const uint4* triVertIndices = cloth.mTriangleVertexIndices;// s_warpScratch->trimeshTriVertIndices;
const float4* positions = cloth.mPosition_InvMass;
for (PxU32 j = 0; j < nbIter; ++j)
{
const PxU32 wrappedThreadIndex = (workIndex * WARP_SIZE + threadIdx.x) % nbThreadsPerCm;
const PxU32 triangleIdx = wrappedThreadIndex + nbThreadsPerCm * j;
PxBounds3 triangleBound = PxBounds3::empty();
if (triangleIdx < nbTriangles)
{
const uint4 vertIdx = triVertIndices[triangleIdx];
const PxVec3 worldV0 = PxLoad3(positions[vertIdx.x]);
const PxVec3 worldV1 = PxLoad3(positions[vertIdx.y]);
const PxVec3 worldV2 = PxLoad3(positions[vertIdx.z]);
triangleBound = triBoundingBox(worldV0, worldV1, worldV2);
triangleBound.fattenFast(contactDist);
}
clothHeightfieldMidphaseCore(
s_warpScratch,
cmIdx,
triangleBound,
triangleIdx,
workIndex,
nbTriangles,
stackPtr,
midphasePairsNum,
stackSizeBytes/sizeof(uint4)
);
}
}
}
}
extern "C" __global__ void cloth_midphaseVertexHeightfieldLaunch(
const PxU32 numNPWorkItems,
const PxReal toleranceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxReal* PX_RESTRICT contactDistance,
const PxgShape* PX_RESTRICT gpuShapes,
PxgFEMCloth* PX_RESTRICT clothes,
PxU8* stackPtr, //output
PxU32* midphasePairsNum, //output
const PxU32 stackSizeBytes
)
{
__shared__ PxU32 scratchMem[1024];
clothHeightfieldScratch* s_warpScratch = reinterpret_cast<clothHeightfieldScratch*>(scratchMem);
const PxU32 nbWarpsPerCm = 64; // 64 warp(2048 threads) to deal with one pair
const PxU32 nbWarpsRequired = nbWarpsPerCm * numNPWorkItems;
const PxU32 nbThreadsPerCm = 64 * WARP_SIZE;
const PxU32 numWarpPerBlock = MIDPHASE_WARPS_PER_BLOCK;
const PxU32 totalNumWarps = PxgSoftBodyKernelGridDim::SB_SBMIDPHASE * numWarpPerBlock;
const PxU32 warpIndex = threadIdx.y;
const PxU32 nbIterationsPerWarps = (nbWarpsRequired + totalNumWarps - 1) / totalNumWarps;
for (PxU32 i = 0; i < nbIterationsPerWarps; ++i)
{
const PxU32 workIndex = i + (warpIndex + numWarpPerBlock * blockIdx.x) * nbIterationsPerWarps;
if (workIndex < nbWarpsRequired)
{
unsigned int cmIdx = workIndex / nbWarpsPerCm;
PxgShape clothShape, heightfieldShape;
PxU32 clothCacheRef, heightfieldCacheRef;
LoadShapePairWarp<PxGeometryType::eTRIANGLEMESH, PxGeometryType::eHEIGHTFIELD>(cmInputs, cmIdx, gpuShapes,
clothShape, clothCacheRef, heightfieldShape, heightfieldCacheRef);
PxsCachedTransform heightfieldTransformCache;
PxsCachedTransform_ReadWarp(heightfieldTransformCache, transformCache + heightfieldCacheRef);
const PxgFEMCloth& cloth = clothes[clothShape.particleOrSoftbodyId];
if (threadIdx.x == 0)
{
s_warpScratch->heightfieldTransform = heightfieldTransformCache.transform;
s_warpScratch->heightfieldShape = heightfieldShape;
}
__syncwarp();
const PxReal contactDist = contactDistance[clothCacheRef] + contactDistance[heightfieldCacheRef];
const float4* positions = cloth.mPosition_InvMass;
const PxU32 nbVerts = cloth.mNbVerts;
const PxU32 nbIter = (nbVerts + nbThreadsPerCm - 1) / nbThreadsPerCm;
//const uint4* triVertIndices = s_warpScratch->trimeshTriVertIndices;
const PxVec3 radiusV(contactDist);
for (PxU32 j = 0; j < nbIter; ++j)
{
const PxU32 wrappedThreadIndex = (workIndex * WARP_SIZE + threadIdx.x) % nbThreadsPerCm;
const PxU32 vertIndex = wrappedThreadIndex + nbThreadsPerCm * j;
const float4 pos = positions[vertIndex];
//particle pos need to be in triangle mesh vertex space
const PxVec3 particlePos(pos.x, pos.y, pos.z);
PxBounds3 triangleBound = PxBounds3::empty();
triangleBound.minimum = particlePos - radiusV;
triangleBound.maximum = particlePos + radiusV;
clothHeightfieldMidphaseCore(
s_warpScratch,
cmIdx,
triangleBound,
vertIndex,
workIndex,
nbVerts,
stackPtr,
midphasePairsNum,
stackSizeBytes/sizeof(uint4)
);
}
}
}
}

View File

@@ -0,0 +1,620 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "foundation/PxBounds3.h"
#include "foundation/PxMath.h"
#include "foundation/PxSimpleTypes.h"
#include "foundation/PxTransform.h"
#include "foundation/PxVec3.h"
#include "geometry/PxGeometry.h"
#include "geometry/PxMeshScale.h"
#include "GuBV32.h"
#include "cudaNpCommon.h"
#include "PxgContactManager.h"
#include "PxgConvexConvexShape.h"
#include "PxgFEMCloth.h"
#include "PxgNpKernelIndices.h"
#include "PxgParticleSystem.h"
#include "PxsTransformCache.h"
#include <vector_types.h>
#include "copy.cuh"
#include "dataReadWriteHelper.cuh"
#include "femMidphaseScratch.cuh"
#include "utils.cuh"
#include "bv32Traversal.cuh"
#include "triangleMesh.cuh"
#include "deformableCollision.cuh"
using namespace physx;
extern "C" __host__ void initNarrowphaseKernels18() {}
struct ClothTreeTraverser
{
femMidphaseScratch* s_warpScratch;
uint4* PX_RESTRICT stackPtr;
PxU32* PX_RESTRICT midphasePairsNum;
unsigned int cmIdx;
const PxU32 stackSize;
const PxBounds3& mBox;
PX_FORCE_INLINE __device__ ClothTreeTraverser(
femMidphaseScratch* s_warpScratch,
PxBounds3& aabb,
uint4* PX_RESTRICT stackPtr,
PxU32* PX_RESTRICT midphasePairsNum,
unsigned int cmIdx,
const PxU32 stackSize)
: s_warpScratch(s_warpScratch), mBox(aabb), stackPtr(stackPtr), midphasePairsNum(midphasePairsNum),
cmIdx(cmIdx), stackSize(stackSize)
{ }
PX_FORCE_INLINE __device__ void intersectPrimitiveFullWarp(PxU32 primitiveIndex, PxU32 idxInWarp) const
{
bool intersect = false;
if (primitiveIndex != 0xFFFFFFFF)
{
uint4 triIdx = s_warpScratch->meshVertsIndices[primitiveIndex];
const PxVec3 worldV0 = PxLoad3(s_warpScratch->meshVerts[triIdx.x]);
const PxVec3 worldV1 = PxLoad3(s_warpScratch->meshVerts[triIdx.y]);
const PxVec3 worldV2 = PxLoad3(s_warpScratch->meshVerts[triIdx.z]);
intersect = triBoundingBox(worldV0, worldV1, worldV2).intersects(mBox);
}
pushOntoStackFullWarp(intersect, midphasePairsNum, stackPtr, stackSize, make_uint4(cmIdx, primitiveIndex, 0, 0));
}
PX_FORCE_INLINE __device__ bool intersectBoxFullWarp(bool hasBox, const PxVec3& min, const PxVec3& max) const
{
if (hasBox)
return mBox.intersects(PxBounds3(min, max));
else
return false;
}
};
template<unsigned int WarpsPerBlock>
__device__ static inline void femClothMidphaseCore(
PxU32 numNPWorkItems,
const PxReal toleranceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxBounds3* PX_RESTRICT bounds,
const PxReal* PX_RESTRICT contactDistance,
const PxgShape* PX_RESTRICT gpuShapes,
const PxgFEMCloth* PX_RESTRICT femClothes,
PxU8* PX_RESTRICT stackPtr, //output
PxU32* PX_RESTRICT midphasePairsNum, //output
femMidphaseScratch* s_warpScratch,
const PxU32 stackSize
)
{
//wrap index
const unsigned int warpIdx = threadIdx.y;//(idx >> LOG2_WARP_SIZE);
//wrap index in block
//const unsigned int idx = idxInWarp + warpIdx * WARP_SIZE;
unsigned int cmIdx = warpIdx + blockIdx.x * blockDim.y;
//unsigned mask_cmIdx = __ballot_sync(FULL_MASK, cmIdx < numNPWorkItems);
if (cmIdx < numNPWorkItems)
{
/* if(threadIdx.x == 0)
printf("cmIdx %i warpIdx %i\n", cmIdx, warpIdx);*/
PxgShape clothShape, rigidShape;
PxU32 clothCacheRef, rigidCacheRef;
LoadShapePairWarp<PxGeometryType::eTRIANGLEMESH>(cmInputs, cmIdx, gpuShapes,
clothShape, clothCacheRef, rigidShape, rigidCacheRef);
const PxU32 femClothId = clothShape.particleOrSoftbodyId;
const PxgFEMCloth& femCloth = femClothes[femClothId];
PxU8 * trimeshGeomPtr = reinterpret_cast<PxU8 *>(femCloth.mTriMeshData);
trimeshGeomPtr += sizeof(uint4); // skip nbVerts_nbTets_maxDepth_nbBv32TreeNodes
if (threadIdx.x == 0)
{
s_warpScratch->meshVerts = femCloth.mPosition_InvMass;
s_warpScratch->meshVertsIndices = femCloth.mTriangleVertexIndices;
Gu::BV32DataPacked* bv32PackedNodes = reinterpret_cast<Gu::BV32DataPacked*>(trimeshGeomPtr);
s_warpScratch->bv32PackedNodes = bv32PackedNodes;
}
__syncwarp();
//ML: this is the world bound AABB
PxBounds3 aabb;
PxBounds3_ReadWarp(aabb, bounds + rigidCacheRef);
const PxReal contactDist = contactDistance[clothCacheRef] + contactDistance[rigidCacheRef];
aabb.fattenFast(contactDist);
/*if (threadIdx.x == 0)
{
printf("contactDist %f transformCacheRef0 %i transformCacheRef1 %i\n", contactDist, transformCacheRef0, transformCacheRef1);
printf("min(%f, %f, %f) max(%f, %f, %f)\n", aabb.minimum.x, aabb.minimum.y, aabb.minimum.z,
aabb.maximum.x, aabb.maximum.y, aabb.maximum.z);
}*/
uint4* tStackPtr = reinterpret_cast<uint4*>(stackPtr);
bv32TreeTraversal<const ClothTreeTraverser, WarpsPerBlock>(s_warpScratch->bv32PackedNodes, s_warpScratch->sBv32Nodes, ClothTreeTraverser(
s_warpScratch, aabb, tStackPtr, midphasePairsNum, cmIdx, stackSize));
/*if (threadIdx.x == 0 && nbPairsPerCM > 0)
printf("nbPairsPerCM %i\n", nbPairsPerCM);*/
}
}
extern "C" __global__
//__launch_bounds__(MIDPHASE_WARPS_PER_BLOCK * WARP_SIZE, 4)
void cloth_midphaseGeneratePairsLaunch(
PxU32 numWorkItems,
const PxReal tolerenceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxBounds3* PX_RESTRICT bounds,
const PxReal* PX_RESTRICT contactDistance,
const PxgShape* PX_RESTRICT gpuShapes,
const PxgFEMCloth* PX_RESTRICT femClothes,
PxU8* PX_RESTRICT stackPtr, //output
PxU32* PX_RESTRICT midphasePairsNum, //output
const PxU32 stackSizeBytes
)
{
__shared__ PxU32 scratchMem[MIDPHASE_WARPS_PER_BLOCK][WARP_SIZE * 7];
femClothMidphaseCore<MIDPHASE_WARPS_PER_BLOCK>(
numWorkItems,
tolerenceLength,
cmInputs,
transformCache,
bounds,
contactDistance,
gpuShapes,
femClothes,
stackPtr,
midphasePairsNum,
(femMidphaseScratch*)scratchMem[threadIdx.y],
stackSizeBytes/sizeof(uint4)
);
}
extern "C" __global__
//__launch_bounds__(MIDPHASE_WARPS_PER_BLOCK * WARP_SIZE, 4)
void cloth_midphaseGenerateVertexPairsLaunch(
PxU32 numWorkItems,
const PxReal tolerenceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxBounds3* PX_RESTRICT bounds,
const PxReal* PX_RESTRICT contactDistance,
const PxgShape* PX_RESTRICT gpuShapes,
const PxgFEMCloth* PX_RESTRICT femClothes,
uint4* PX_RESTRICT stackPtr, //output
PxU32* PX_RESTRICT midphasePairsNum //output
)
{
const unsigned int cmIdx = blockIdx.y;
//unsigned mask_cmIdx = __ballot_sync(FULL_MASK, cmIdx < numNPWorkItems);
if (cmIdx < numWorkItems)
{
PxgShape clothShape, rigidShape;
PxU32 clothCacheRef, rigidCacheRef;
LoadShapePairWarp<PxGeometryType::eTRIANGLEMESH>(cmInputs, cmIdx, gpuShapes,
clothShape, clothCacheRef, rigidShape, rigidCacheRef);
const PxU32 femClothId = clothShape.particleOrSoftbodyId;
const PxgFEMCloth& femCloth = femClothes[femClothId];
PxU8 * trimeshGeomPtr = reinterpret_cast<PxU8 *>(femCloth.mTriMeshData);
trimeshGeomPtr += sizeof(uint4);
const float4* const PX_RESTRICT meshVerts = femCloth.mPosition_InvMass;
const PxU32 nbVerts = femCloth.mNbVerts;
//ML: this is the world bound AABB
PxBounds3 aabb;
PxBounds3_ReadWarp(aabb, bounds + rigidCacheRef);
const PxReal contactDist = contactDistance[clothCacheRef] + contactDistance[rigidCacheRef];
//aabb.fattenFast(contactDist);
const PxReal tolerance = contactDist * contactDist;
const PxU32 idx = threadIdx.x + threadIdx.y*WARP_SIZE + blockIdx.x*blockDim.x*blockDim.y;
for (PxU32 i = 0; i < nbVerts; i += blockDim.x*blockDim.y*gridDim.x)
{
PxU32 id = idx + i;
bool hasHit = false;
if (id < nbVerts)
{
float4 vertex = meshVerts[id];
PxVec3 v(vertex.x, vertex.y, vertex.z);
PxVec3 closest = aabb.closestPoint(v);
hasHit = (closest - v).magnitudeSquared() <= tolerance;
}
PxU32 hitMask = __ballot_sync(FULL_MASK, hasHit);
if (hitMask)
{
PxU32 prevStackShift = 0xFFffFFff;
PxU32 validCount = __popc(hitMask);
if (threadIdx.x == 0 && validCount > 0)
{
prevStackShift = atomicAdd(midphasePairsNum, validCount);
}
prevStackShift = __shfl_sync(FULL_MASK, prevStackShift, 0);
if (hasHit)
{
PxU32 offset = warpScanExclusive(hitMask, threadIdx.x);
stackPtr[prevStackShift + offset] = make_uint4(cmIdx, id, 0, 0);
}
}
}
}
}
struct ClothParticleTreeTraverser
{
femMidphaseScratch* PX_RESTRICT s_warpScratch;
const PxVec3 sphereCenter;
const PxReal radius;
const PxBounds3 mBox;
const PxU32 globalParticleIndex;
const PxU32 numParticles;
const PxU32 cmIdx;
const PxU32 stackSize;
uint4* PX_RESTRICT stackPtr; //output
PxU32* midphasePairs; //output
const bool isMesh;
PX_FORCE_INLINE __device__ ClothParticleTreeTraverser(
femMidphaseScratch* PX_RESTRICT s_warpScratch,
const PxVec3 sphereCenter,
const PxReal radius,
const PxU32 globalParticleIndex,
const PxU32 numParticles,
const PxU32 cmIdx,
const PxU32 stackSize,
uint4* PX_RESTRICT stackPtr, //output
PxU32* midphasePairs, //output
const bool isMesh)
: s_warpScratch(s_warpScratch), sphereCenter(sphereCenter), radius(radius), globalParticleIndex(globalParticleIndex), numParticles(numParticles),
cmIdx(cmIdx), stackSize(stackSize), stackPtr(stackPtr), midphasePairs(midphasePairs), isMesh(isMesh), mBox(sphereCenter - PxVec3(radius), sphereCenter + PxVec3(radius))
{ }
PX_FORCE_INLINE __device__ void intersectPrimitiveFullWarp(PxU32 primitiveIndex, PxU32 idxInWarp) const
{
bool intersect = false;
if (primitiveIndex != 0xFFFFFFFF)
{
/*if (myFistSBIndex == 933 && tetrahedronIdxSecondSb == 578)
{
const PxU32 parentNodeIndex = currentNodeIndex >> 6;
printf("workIndex % i parentNodeIndex %i blockIdx.x %i tetrahedronIdxSecondSb %i\n", workIndex, parentNodeIndex, blockIdx.x, tetrahedronIdxSecondSb);
}*/
uint4 triIdx = s_warpScratch->meshVertsIndices[primitiveIndex];
const PxVec3 worldV0 = PxLoad3(s_warpScratch->meshVerts[triIdx.x]);
const PxVec3 worldV1 = PxLoad3(s_warpScratch->meshVerts[triIdx.y]);
const PxVec3 worldV2 = PxLoad3(s_warpScratch->meshVerts[triIdx.z]);
/*if ((triangleIdx == 6 || triangleIdx == 7))
{
printf("triangleIdx %i worldV0(%f, %f, %f), worldV1(%f, %f, %f), worldV2(%f, %f, %f)\n", triangleIdx, worldV0.x, worldV0.y, worldV0.z,
worldV1.x, worldV1.y, worldV1.z, worldV2.x, worldV2.y, worldV2.z);
}*/
if (isMesh)
{
//PxPlane triPlane(worldV0, worldV1, worldV2);
//get rid of double-sided triangle
//if (triPlane.distance(sphereCenter) >= 0)
{
intersect = triBoundingBox(worldV0, worldV1, worldV2).intersects(mBox);
}
}
else
{
intersect = triBoundingBox(worldV0, worldV1, worldV2).intersects(mBox);
}
}
pushOntoStackFullWarp(intersect, midphasePairs, stackPtr, stackSize, make_uint4(cmIdx, globalParticleIndex, primitiveIndex, 0));
}
PX_FORCE_INLINE __device__ bool intersectBoxFullWarp(bool hasBox, const PxVec3& min, const PxVec3& max) const
{
if (hasBox)
return mBox.intersects(PxBounds3(min, max));
else
return false;
}
};
extern "C" __global__
//__launch_bounds__(MIDPHASE_WARPS_PER_BLOCK * WARP_SIZE, 4)
void cloth_psMidphaseGeneratePairsLaunch(
PxU32 numWorkItems,
const PxReal tolerenceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxgShape* PX_RESTRICT gpuShapes,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxBounds3* PX_RESTRICT bounds,
const PxReal* PX_RESTRICT contactDistance,
const PxgFEMCloth* PX_RESTRICT clothes,
PxgParticleSystem* PX_RESTRICT particleSystems,
const PxU32 stackSizeBytes,
PxU8* PX_RESTRICT stackPtr, //output
PxU32* PX_RESTRICT midphasePairsNum //output
)
{
__shared__ __align__(16) PxU8 particleSystemMemory[sizeof(PxgParticleSystem)];
PxgParticleSystem& sParticleSystem = *(reinterpret_cast<PxgParticleSystem*>(particleSystemMemory));
__shared__ femMidphaseScratch scratchMem[MIDPHASE_WARPS_PER_BLOCK];
femMidphaseScratch* s_warpScratch = &scratchMem[threadIdx.y];
if (blockIdx.y < numWorkItems)
{
//const PxU32 globalThreadIdx = threadIdx.y * WARP_SIZE + threadIdx.x + blockIdx.x*blockDim.x*blockDim.y;
const PxU32 globalWarpIdx = threadIdx.y + blockIdx.x*blockDim.y;
PxgShape particleShape, clothShape;
PxU32 particleCacheRef, clothCacheRef;
LoadShapePairWarp<PxGeometryType::ePARTICLESYSTEM, PxGeometryType::eTRIANGLEMESH>(cmInputs, blockIdx.y, gpuShapes,
particleShape, particleCacheRef, clothShape, clothCacheRef);
const PxU32 particleSystemId = particleShape.particleOrSoftbodyId;
//each thread read 16 byte
uint4* srcParticleSystem = reinterpret_cast<uint4*>(&particleSystems[particleSystemId]);
uint4* dstParticleSystem = reinterpret_cast<uint4*>(&sParticleSystem);
warpCopy<uint4>(dstParticleSystem, srcParticleSystem, sizeof(PxgParticleSystem));
const PxU32 clohtId = clothShape.particleOrSoftbodyId;
const PxgFEMCloth& cloth = clothes[clohtId];
PxU8 * tetmeshGeomPtr = reinterpret_cast<PxU8 *>(cloth.mTriMeshData);
//const uint4 nbVerts_nbTets_maxDepth_nbBv32TreeNodes = *reinterpret_cast<const uint4 *>(tetmeshGeomPtr);
tetmeshGeomPtr += sizeof(uint4);
/*if (workIndex == 0 && threadIdx.x == 0)
printf("particleSystemId %i softbodyId %i\n", particleSystemId, softbodyId);*/
if (threadIdx.x == 0)
{
s_warpScratch->meshVerts = cloth.mPosition_InvMass;
s_warpScratch->meshVertsIndices = cloth.mTriangleVertexIndices;
//const PxU32 & numVerts = nbVerts_nbTets_maxDepth_nbBv32TreeNodes.x;
//const PxU32 & numTets = nbVerts_nbTets_maxDepth_nbBv32TreeNodes.y;
//const PxU32 & maxDepth = nbVerts_nbTets_maxDepth_nbBv32TreeNodes.z;
//const PxU32 & nbBv32PackedNodes = nbVerts_nbTets_maxDepth_nbBv32TreeNodes.w;
//printf("maxDepth %i numVerts %i numTets %i nbBv32TreeNodes %i\n", maxDepth, numVerts, numTets, nbBv32TreeNodes);
Gu::BV32DataPacked* bv32PackedNodes = reinterpret_cast<Gu::BV32DataPacked*>(tetmeshGeomPtr);
s_warpScratch->bv32PackedNodes = bv32PackedNodes;
/*tetmeshGeomPtr += sizeof(const Gu::BV32DataPacked)* nbBv32PackedNodes
+ sizeof(const Gu::BV32DataDepthInfo) * nbVerts_nbTets_maxDepth_nbBv32TreeNodes.z
+ sizeof(PxU32) * nbVerts_nbTets_maxDepth_nbBv32TreeNodes.w;*/
/*const PxU8* surfaceHint = reinterpret_cast<const PxU8*>(tetmeshGeomPtr);
s_warpScratch->tetmeshSurfaceHint = surfaceHint;*/
}
__syncthreads();
PxReal cDistance = contactDistance[particleCacheRef] + contactDistance[clothCacheRef];
//const PxU32 threadBaseAddress = globalThreadIdx & (~31);
const PxU32 nbParticles = sParticleSystem.mCommonData.mNumParticles;
//const PxU32 NbThreads = blockDim.x*blockDim.y*gridDim.x;
const PxU32 NbWarps = blockDim.y*gridDim.x;
float4* sortedPose = reinterpret_cast<float4*>(sParticleSystem.mSortedPositions_InvMass);
uint4* tStackPtr = reinterpret_cast<uint4*>(stackPtr);
const PxU32 stackSize = stackSizeBytes / sizeof(uint4);
//for (PxU32 i = threadBaseAddress, particleIndex = globalThreadIdx; i < nbParticles; i+= NbThreads, particleIndex += NbThreads)
for (PxU32 i = globalWarpIdx; i < nbParticles; i += NbWarps)
{
float4 pos = make_float4(0.f);
//PxU32 particleIndex = 0xFFFFFFFF;
//if(threadIdx.x == 0)
PxU32 particleIndex = i;
pos = sortedPose[particleIndex];
/*if (particleIndex < nbParticles)
{
pos = sortedPose[particleIndex];
}*/
const PxVec3 particlePos(pos.x, pos.y, pos.z);
bv32TreeTraversal<const ClothParticleTreeTraverser, MIDPHASE_WARPS_PER_BLOCK>(s_warpScratch->bv32PackedNodes, s_warpScratch->sBv32Nodes, ClothParticleTreeTraverser(s_warpScratch, particlePos, cDistance, particleIndex,
nbParticles, blockIdx.y, stackSize, tStackPtr, midphasePairsNum, false));
}
}
}
extern "C" __global__
//__launch_bounds__(MIDPHASE_WARPS_PER_BLOCK * WARP_SIZE, 4)
void cloth_midphaseVertexMeshLaunch(
PxU32 numWorkItems,
const PxReal tolerenceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxBounds3* PX_RESTRICT bounds,
const PxReal* PX_RESTRICT contactDistance,
const PxgShape* PX_RESTRICT gpuShapes,
const PxgFEMCloth* PX_RESTRICT femClothes,
const PxU32 stackSizeBytes,
PxU8* PX_RESTRICT stackPtr, //output
PxU32* PX_RESTRICT midphasePairsNum //output
)
{
__shared__ femMidphaseScratch scratchMem[MIDPHASE_WARPS_PER_BLOCK];
femMidphaseScratch* s_warpScratch = &scratchMem[threadIdx.y];
const PxU32 cmIdx = blockIdx.y;
//if (cmIdx < numWorkItems)
{
//const PxU32 globalThreadIdx = threadIdx.y * WARP_SIZE + threadIdx.x + blockIdx.x*blockDim.x*blockDim.y;
const PxU32 globalWarpIdx = threadIdx.y + blockIdx.x*blockDim.y;
PxgShape shape0, shape1;
PxgShape* trimeshShape = NULL;
PxgShape* clothShape = NULL;
PxU32 trimeshCacheRef, clothCacheRef;
{
PxgContactManagerInput npWorkItem;
PxgContactManagerInput_ReadWarp(npWorkItem, cmInputs, cmIdx);
PxgShape_ReadWarp(shape0, gpuShapes + npWorkItem.shapeRef0);
PxgShape_ReadWarp(shape1, gpuShapes + npWorkItem.shapeRef1);
assert(shape0.type == PxGeometryType::eTRIANGLEMESH && shape1.type == PxGeometryType::eTRIANGLEMESH);
if (shape0.particleOrSoftbodyId != 0xffffffff)
{
trimeshShape = &shape1;
trimeshCacheRef = npWorkItem.transformCacheRef1;
clothShape = &shape0;
clothCacheRef = npWorkItem.transformCacheRef0;
}
else
{
trimeshShape = &shape0;
trimeshCacheRef = npWorkItem.transformCacheRef0;
clothShape = &shape1;
clothCacheRef = npWorkItem.transformCacheRef1;
}
assert(trimeshShape->particleOrSoftbodyId == 0xffffffff);
}
PxU32 clothId = clothShape->particleOrSoftbodyId;
const PxgFEMCloth* cloth = &femClothes[clothId];
PxsCachedTransform trimeshTransformCache;
PxsCachedTransform_ReadWarp(trimeshTransformCache, transformCache + trimeshCacheRef);
if (threadIdx.x == 0)
{
const PxU8* triGeomPtr = reinterpret_cast<const PxU8 *>(trimeshShape->hullOrMeshPtr);
readTriangleMesh(triGeomPtr, s_warpScratch->bv32PackedNodes, s_warpScratch->meshVerts, s_warpScratch->meshVertsIndices);
}
__syncthreads();
PxTransform& trimeshToWorld = trimeshTransformCache.transform;
const PxMeshScale& trimeshScale = trimeshShape->scale;
PxReal cDistance = contactDistance[clothCacheRef] + contactDistance[trimeshCacheRef];
//const PxU32 threadBaseAddress = globalThreadIdx & (~31);
const PxU32 numVerts = cloth->mNbVerts;
const float4* positions = cloth->mPosition_InvMass;
//const PxU32 NbThreads = blockDim.x*blockDim.y*gridDim.x;
const PxU32 NbWarps = blockDim.y*gridDim.x;
uint4* tStackPtr = reinterpret_cast<uint4*>(stackPtr);
const PxU32 stackSize = stackSizeBytes / sizeof(uint4);
for (PxU32 particleIndex = globalWarpIdx; particleIndex < numVerts; particleIndex += NbWarps)
{
const float4 pos = positions[particleIndex];
//particle pos need to be in triangle mesh vertex space
const PxVec3 particlePos(pos.x, pos.y, pos.z);
/// transform those point to triangle vertex space
const PxVec3 localParticlePos = shape2Vertex(trimeshToWorld.transformInv(particlePos), trimeshScale.scale, trimeshScale.rotation);
bv32TreeTraversal<const ClothParticleTreeTraverser, MIDPHASE_WARPS_PER_BLOCK>(s_warpScratch->bv32PackedNodes, s_warpScratch->sBv32Nodes, ClothParticleTreeTraverser(s_warpScratch, localParticlePos, cDistance, particleIndex,
numVerts, cmIdx, stackSize, tStackPtr, midphasePairsNum, true));
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,422 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __GJK_CUH__
#define __GJK_CUH__
namespace squawk
{
// The simplex is stored as 4 vectors and then 4 normals (so we can shoot it straight into EPA)
// The 3 component of each vector contains 3f80 in the high bits (so it's a valid float) and
// the vertex indices for a and b in the lower bits
typedef PxReal Simplex;
static __device__ inline Simplex setNormal0(Simplex z, PxReal nV, PxU32 index)
{
PxReal tmpV = splatV(index, nV);
return threadIdx.x < 16 || threadIdx.x > 19 ? z : tmpV;
}
static __device__ inline Simplex setNormal123(Simplex z, PxReal nV)
{
PxReal tmpV = __shfl_up_sync(FULL_MASK, nV, 20);
return threadIdx.x < 20 ? z : tmpV;
}
static __device__ inline Simplex loadSimplex(const schlock::GjkCachedData& data)
{
return reinterpret_cast<const PxReal*>(&data)[threadIdx.x];
}
static __device__ inline void storeSimplex(schlock::GjkCachedData& data, Simplex z)
{
reinterpret_cast<PxReal*>(&data)[threadIdx.x] = z;
}
static __device__ inline void storeSimplex(volatile schlock::GjkCachedData& data, Simplex z)
{
reinterpret_cast<volatile PxReal*>(&data)[threadIdx.x] = z;
}
// suffixes for vector warps:
// * V = 3 vectors: (aX,aY,aZ,_,bX,bY,bZ,_,cX,cY,cZ,_)
// * D = dense scalar: (a,a,a,a,b,b,b,b,c,c,c,c)
// * S = sparse scalar: (a,_,_,_,b,_,_,_,c,_,_,_)
// * I = index vector for shuffle
#define DEGENERATE 0xffff
static __device__ inline PxU32 closestPointTri(Simplex &zV, PxReal cV, PxReal& closestPoint)
{
PxU32 tI = threadIdx.x, cp0I = (tI&28)|next3(tI&3), cp1I = (tI&28)|prev3(tI&3); // cross product indices
closestPoint = 0;
PxReal eV = cV - zV, lD = splatX(dot(eV, eV)); // edges to apex & squared lengths
if(anyX<2>(lD==0))
return DEGENERATE; // FAIL: apex coincident with a base vert
PxReal tmp = __shfl_xor_sync(FULL_MASK, eV, 4), shortEdgeV = __shfl_xor_sync(FULL_MASK, lD, 4) < lD ? tmp : eV; // shorter edge to apex
PxReal mV = cross(__shfl_xor_sync(FULL_MASK, zV,4)-zV, shortEdgeV, cp0I, cp1I); // (normal, -normal)
// lots of dot products here with cV, (plus proj below). TODO: optimize
PxReal pS = dot(eV,cV); // projection of origin onto edges
bool uS = dot(cross(mV, eV, cp0I, cp1I), cV) <= 0; // origin outside edge planes
if(!anyX<2>(uS)) // REGION: face - inside both edge planes
{
PxReal projD = __shfl_sync(FULL_MASK, dot(mV,cV),0); // project onto face
closestPoint = splatV(0, mV*projD/splatX(dot(mV,mV)));
bool flip = projD<0; // ensure the normal winding makes it outward-facing in the tet
mV = (tI & 3) == 3 ? projD : mV;
zV = setNormal0(zV, mV, flip);
return flip ? 0x0123u : 0x1023u;
}
PxU32 eid = __ffs(packXFlags<2>(uS & bool(pS>0))); // REGION: edges - outside the edge plane, and projects onto the edge
if(eid--) // __ffs returns 0 for no bit, 1 for lsb
{
closestPoint = splatV(eid, cV - eV*splatX(pS)/lD);
return eid<<8 | 0x22;
}
closestPoint = cV; // REGION: apex - not an edge, not a face
return 0x0021;
}
static __device__ inline PxU32 closestPointTet(Simplex &zV, PxReal dV, PxReal& closestPoint)
{
closestPoint = 0;
PxReal n0V = splatV(4, zV);
if(__shfl_sync(FULL_MASK, dot(n0V, dV) > dot(n0V, zV), 0)) // FAIL: new vertex is beyond old face (TODO: set up normal in UT)
return DEGENERATE;
PxU32 tI = threadIdx.x, cp0I = (tI&28)|next3(tI&3), cp1I = (tI&28)|prev3(tI&3); // cross product indices
PxU32 vNextI = tI>7 ? tI-8 : tI+4, vPrevI = tI<4 ? tI + 8 : tI-4; // block rotate indices
PxReal eV = dV - zV, lD = splatX(dot(eV, eV)); // edges to apex and squared lengths
if(anyX<3>(lD==0))
return DEGENERATE; // FAIL: apex is coincident with a base vert
PxReal tmp = __shfl_sync(FULL_MASK, eV, vNextI), shortEdgeV = __shfl_sync(FULL_MASK, lD, vNextI) < lD ? tmp : eV; // shorter of edges to apex for each tri
PxReal nV = cross(shortEdgeV, __shfl_sync(FULL_MASK, zV, vNextI) - zV, cp0I, cp1I); // face normals, outward facing
PxReal aS = dot(nV, dV); // distances along normals of apex
PxReal gS = dot(__shfl_sync(FULL_MASK, zV, vPrevI), nV); // distances along normal of vert not in this triangle
if(anyX<3>(aS<=gS))
return DEGENERATE; // FAIL: simplex inversion/collapse
PxReal tmpV = __shfl_up_sync(FULL_MASK, aS, 3);
nV = (tI & 3) == 3 ? tmpV : nV;
if(allX<3>(aS>0)) // REGION: inside simplex
{
zV = setNormal123(zV, nV); // set normals for EPA start
return 0x2104;
}
const PxReal pS = dot(eV,dV); // projection of origin onto edges
if(allX<3>(pS<=0)) // REGION: apex
{
closestPoint = dV;
return 0x2131;
}
bool uiS = dot(cross(eV, nV, cp0I, cp1I), dV) <= 0; // origin outside one edge plane of each triangle
bool ujS = dot(cross(__shfl_sync(FULL_MASK, eV,vNextI), nV, cp0I, cp1I), dV) > 0; // origin outside other edge plane of each triangle
int eid = __ffs(packXFlags<3>(uiS & __shfl_sync(FULL_MASK, ujS, vPrevI) & bool(pS>=0))); // REGION: edges - origin outside edge planes and projects onto edge
if(eid--) // __ffs returns 0 for no bit, 1 for lsb
{
closestPoint = splatV(eid, dV - eV * splatX(pS/lD));
return eid<<8| 0x32;
}
PxU32 fid = __ffs(packXFlags<3>(!uiS & !ujS & bool(aS<=0))); // REGION: faces - origin outside face plane, inside edge planes
if(fid--) // __ffs returns 0 for no bit, 1 for lsb
{
closestPoint = splatV(fid, nV * splatX(aS / dot(nV, nV)));
zV = setNormal0(zV, -nV, fid); // flip normal to be outward facing for next iteration
return next3(fid)<<12 | fid<<8| 0x33;
}
return DEGENERATE; // FAIL: couldn't find voronoi region. Chort vozmi!
}
static __device__ inline PxU8 supportIndex(const PxVec3* v, PxU8 nbVertices, PxReal dir)
{
int bestN = 0xFFFFFFFF; //Ininitialize to a very illegal value to force a crash when we get nans
PxReal bestDist = -FLT_MAX;
PxReal x = __shfl_sync(FULL_MASK, dir, 0), y = __shfl_sync(FULL_MASK, dir, 1), z = __shfl_sync(FULL_MASK, dir, 2);
for(PxU32 i=0;i<nbVertices;i+=32)
{
int n = i+threadIdx.x;
PxReal d = n < nbVertices ? x*v[n].x + y*v[n].y + z*v[n].z : -FLT_MAX, e;
e = fmaxf(d, __shfl_xor_sync(FULL_MASK, d, 16));
e = fmaxf(e, __shfl_xor_sync(FULL_MASK, e, 8));
e = fmaxf(e, __shfl_xor_sync(FULL_MASK, e, 4));
e = fmaxf(e, __shfl_xor_sync(FULL_MASK, e, 2));
e = fmaxf(e, __shfl_xor_sync(FULL_MASK, e, 1));
unsigned mask_bestDist = __ballot_sync(FULL_MASK, e > bestDist);
if(e>bestDist)
bestDist = e, bestN = i+__ffs(__ballot_sync(mask_bestDist, e == d))-1;
}
assert(bestN < nbVertices);
return PxU8(bestN);
}
static __device__ inline PxU8 supportIndex(const PxVec3& v0, const PxVec3& v1, PxU8 nbVertices, PxReal dir)
{
int bestN = 0xFFFFFFFF;
//PxReal bestDist = -FLT_MAX;
PxReal x = __shfl_sync(FULL_MASK, dir, 0), y = __shfl_sync(FULL_MASK, dir, 1), z = __shfl_sync(FULL_MASK, dir, 2);
PxReal d = -FLT_MAX;
PxU32 n = threadIdx.x;
if (threadIdx.x < nbVertices)
{
d = x*v0.x + y*v0.y + z*v0.z;
if ((threadIdx.x + 32) < nbVertices)
{
PxReal d1 = x*v0.x + y*v0.y + z*v0.z;
if (d1 > d)
{
d = d1;
n += 32;
}
}
}
PxReal e = fmaxf(d, __shfl_xor_sync(FULL_MASK, d, 16));
e = fmaxf(e, __shfl_xor_sync(FULL_MASK, e, 8));
e = fmaxf(e, __shfl_xor_sync(FULL_MASK, e, 4));
e = fmaxf(e, __shfl_xor_sync(FULL_MASK, e, 2));
e = fmaxf(e, __shfl_xor_sync(FULL_MASK, e, 1));
//bestDist = e;
bestN = __ffs(__ballot_sync(FULL_MASK, e == d)) - 1;
bestN += __popc(__ballot_sync(FULL_MASK, n >= 32) & (1 << bestN)) * 32;
assert(bestN < nbVertices);
return PxU8(bestN);
}
static __device__ inline schlock::GjkResult::Enum gjk(
const PxVec3* vA, PxU32 nbA,
const PxVec3* vB, PxU32 nbB,
const PxVec3& initialUnitDir,
PxReal minSep,
PxReal convergenceRatio,
schlock::GjkOutput& output, schlock::GjkCachedData& cacheData)
{
int cacheMask = (1<< cacheData.size)-1;
__syncwarp(); //cacheData.size is written further below
Simplex simplexV = loadSimplex(cacheData);
restart:
bool warmStarted = false;
Simplex backupSimplexV;
int size = 0, backupSize = 0;
PxReal closestPointV, backupClosestV;
if (cacheMask)
closestPointV = splatV(0, simplexV);
else
{
if ((threadIdx.x & 3) == 0)
closestPointV = initialUnitDir.x;
else if ((threadIdx.x & 3) == 1)
closestPointV = initialUnitDir.y;
else if ((threadIdx.x & 3) == 2)
closestPointV = initialUnitDir.z;
else
closestPointV = PX_MAX_F32;
}
PxReal upperBound = PX_MAX_F32, lowerBound = -PX_MAX_F32, cm = magnitude(closestPointV); // if we hit a lower bound dir with distance exactly zero, -FLT_MIN means we keep it
bool degenerate;
do
{
PxReal dV = closestPointV/cm, supportVertexV;
#if GJK_LOGGING
PxReal dx = dV, dy = shfl(dV, 1), dz = shfl(dV,2);
PxReal cpx = closestPointV, cpy = shfl(closestPointV, 1), cpz = shfl(closestPointV,2);
if(threadIdx.x == 0)
printf("[%f, %f], dir (%f, %f, %f), closestPoint (%f, %f, %f), cm %f \n", lowerBound, upperBound, dx, dy, dz, cpx, cpy, cpz, cm);
#endif
// find a warm start entry that can be added to the simplex because it's closer than the closest feature
int candidates = 0;
if (cacheMask)
candidates = __ballot_sync(FULL_MASK, __shfl_sync(FULL_MASK, dot(dV, simplexV)<upperBound, threadIdx.x<<2)) & cacheMask;
if(candidates)
{
warmStarted = true;
cacheMask &= ~(lowestSetBit(candidates)*2-1);
supportVertexV = splatV(lowestSetIndex(candidates), simplexV);
if(threadIdx.x>>2 == size)
simplexV = supportVertexV;
}
else // if not, stop warm-starting and start using support vertices
{
cacheMask = 0;
PxU8 indexA = supportIndex(vA, nbA, -dV), indexB = supportIndex(vB, nbB, dV);
supportVertexV = loadV3Unsafe(vA+indexA) - loadV3Unsafe(vB+indexB);
if(threadIdx.x>>2 == size)
simplexV = (threadIdx.x&3) < 3 ? supportVertexV : __int_as_float(schlock::encodeIndices(indexA, indexB));
PxReal lb = splatX(dot(dV,supportVertexV));
if(lb>lowerBound)
{
storeV3(&output.direction, 0, -dV);
lowerBound = lb;
if(lowerBound > minSep || lowerBound>=upperBound*convergenceRatio)
break;
}
}
#if GJK_LOGGING
PxReal svx = supportVertexV, svy = shfl(supportVertexV,1), svz = shfl(supportVertexV,2);
if(threadIdx.x == 0)
printf("support (%f, %f, %f)\n", svx, svy, svz);
#endif
PxU32 f = 0; // f is feature index: (i2<<12 | i1<<8 | i0<<4 | size)
PxReal pointV;
switch(size)
{
case 0: f = 0x2101, pointV = supportVertexV; break; // have to be a bit for 0,1 cases not to trash the cached verts, which also live in the simplex
case 1:
{
PxReal aV = splatV(0,simplexV), lV = supportVertexV - aV, m = splatX(dot(lV,lV)), x = -splatX(dot(aV,lV));
if(m==0) f = DEGENERATE, pointV = 0; // FAIL: coincident vertex
else if(x<m) f = 0x2102, pointV = aV + lV * x/m; // REGION: line segment
else f = 0x2111, pointV = supportVertexV; // REGION: new vertex
break;
}
case 2: f = squawk::closestPointTri(simplexV, supportVertexV, pointV); break; // no worries for 2 & 3: only (0,1,2) are affected by permute, and they're already used
case 3: f = squawk::closestPointTet(simplexV, supportVertexV, pointV); break;
}
cm = magnitude(pointV);
int newSize = f&0xf;
degenerate = f == DEGENERATE || (cm>=upperBound && newSize<=size);
if(degenerate)
{
if(!warmStarted) // hit a problem warm-starting: just restart cold
break;
cacheMask = 0;
goto restart;
}
// local minima happen. Continue so long as either the simplex size increases or the upper bound decreases,
backupSize = cm >= upperBound ? size : 0;
if(backupSize)
backupSimplexV = simplexV, backupClosestV = closestPointV; // save the best simplex and size before updating state
else
upperBound = cm;
#if GJK_LOGGING
PxReal px = pointV, py = shfl(pointV,1), pz = shfl(pointV,2);
if(threadIdx.x == 0)
printf("%04x (%f, %f, %f) %f\n", f, px, py, pz, cm);
#endif
closestPointV = pointV, size = newSize; // update closest point and simplex
PxU32 sourceIndex = f>>((threadIdx.x&28)+4)&0xf; // +4 because size is the first nibble
PxReal tmp = __shfl_sync(FULL_MASK, simplexV, sourceIndex<<2 | (threadIdx.x&3));
if(threadIdx.x<12)
simplexV = tmp;
}
while(upperBound > 0 && lowerBound < upperBound * convergenceRatio);
if(backupSize>0)
simplexV = backupSimplexV, closestPointV = backupClosestV, size = backupSize;
__syncwarp();
storeSimplex(cacheData, simplexV);
if(threadIdx.x == 0)
{
output.degenerate = degenerate;
output.lowerBound = lowerBound;
output.upperBound = upperBound;
cacheData.size = size;
}
__syncwarp();
if(upperBound == 0)
return schlock::GjkResult::eOVERLAP;
if(lowerBound>minSep)
return schlock::GjkResult::eDISTANT;
PxU8 a0 = cacheData.vertices[0].a(), a1 = cacheData.vertices[1].a(), a2 = cacheData.vertices[2].a();
PxU8 b0 = cacheData.vertices[0].b(), b1 = cacheData.vertices[1].b(), b2 = cacheData.vertices[2].b();
PxVec3 closestPoint(closestPointV, __shfl_sync(FULL_MASK, closestPointV,1), __shfl_sync(FULL_MASK, closestPointV,2));
if(threadIdx.x == 0)
{
PxVec3 closestPointDir = -closestPoint.getNormalized();
output.closestPointDir = closestPointDir;
PxReal p, q, r;
if(size == 3)
{
schlock::barycentrics(cacheData.vertices[0].getV(), cacheData.vertices[1].getV(), cacheData.vertices[2].getV(), p, q, r, closestPoint);
output.closestPointA = p * vA[a0] + q * vA[a1] + r * vA[a2];
}
else if(size == 2)
{
PxVec3 d = cacheData.vertices[1].getV()- cacheData.vertices[0].getV();
PxReal q = -cacheData.vertices[0].getV().dot(d)/d.magnitudeSquared();
output.closestPointA = (1-q) * vA[a0] + q * vA[a1];
}
else
output.closestPointA = vA[a0];
output.closestPointB = output.closestPointA + upperBound * closestPointDir;
}
return schlock::GjkResult::eCLOSE;
}
}
#undef DEGENERATE_SIZE_FLAG
#endif

View File

@@ -0,0 +1,443 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __HEIGHTFIELD_CUH__
#define __HEIGHTFIELD_CUH__
#include "geometry/PxHeightFieldSample.h"
#include "geometry/PxMeshScale.h"
#include "triangle.cuh"
#include "assert.h"
__device__ static inline physx::PxU32 getMinRow(physx::PxReal x, const physx::PxU32 rows)
{
using namespace physx;
return (PxU32)PxClamp(PxI32(PxFloor(x)), PxI32(0), PxI32(rows - 2));
}
__device__ static inline physx::PxU32 getMaxRow(physx::PxReal x, const physx::PxU32 rows)
{
using namespace physx;
return (PxU32)PxClamp(PxI32(PxCeil(x)), PxI32(0), PxI32(rows - 1));
}
__device__ static inline physx::PxU32 getMinColumn(physx::PxReal z, const physx::PxU32 columns)
{
using namespace physx;
return (PxU32)PxClamp(PxI32(PxFloor(z)), PxI32(0), PxI32(columns - 2));
}
__device__ static inline physx::PxU32 getMaxColumn(physx::PxReal z, const physx::PxU32 columns)
{
using namespace physx;
return (PxU32)PxClamp(PxI32(PxCeil(z)), PxI32(0), PxI32(columns - 1));
}
__device__ static inline bool isValidVertex(physx::PxU32 vertexIndex, const physx::PxU32 rows, const physx::PxU32 columns)
{
using namespace physx;
return vertexIndex < rows*columns;
}
__device__ static inline bool isFirstTriangle(physx::PxU32 triangleIndex)
{
return ((triangleIndex & 0x1) == 0);
}
__device__ static inline const physx::PxHeightFieldSample& getSample(physx::PxU32 vertexIndex, const physx::PxHeightFieldSample* samples)
{
return samples[vertexIndex];
}
__device__ static inline physx::PxReal getHeight(physx::PxU32 vertexIndex, const physx::PxHeightFieldSample* samples)
{
using namespace physx;
return PxReal(getSample(vertexIndex, samples).height);
}
__device__ static inline physx::PxU16 getMaterialIndex0(physx::PxU32 vertexIndex, const physx::PxHeightFieldSample* samples)
{
using namespace physx;
return getSample(vertexIndex, samples).materialIndex0;
}
__device__ static inline physx::PxU16 getMaterialIndex1(physx::PxU32 vertexIndex, const physx::PxHeightFieldSample* samples)
{
using namespace physx;
return getSample(vertexIndex, samples).materialIndex1;
}
__device__ static inline bool isZerothVertexShared(physx::PxU32 vertexIndex, const physx::PxHeightFieldSample* samples)
{
return getSample(vertexIndex, samples).tessFlag() != 0;
}
__device__ static inline physx::PxVec3 getVertex(physx::PxU32 vertexIndex, const physx::PxU32 columns, const physx::PxHeightFieldSample* samples)
{
using namespace physx;
const PxU32 row = vertexIndex / columns;
const PxU32 column = vertexIndex % columns;
return PxVec3(PxReal(row), getHeight(vertexIndex, samples), PxReal(column));
}
__device__ static inline physx::PxVec3 hf2shapep(const physx::PxVec3& v, const physx::PxReal rowScale, const physx::PxReal heightScale, const physx::PxReal columnScale)
{
using namespace physx;
return PxVec3(v.x *rowScale, v.y * heightScale, v.z * columnScale);
}
__device__ static inline void getTriangleVertexIndices(physx::PxU32 triangleIndex, physx::PxU32& vertexIndex0, physx::PxU32& vertexIndex1, physx::PxU32& vertexIndex2, const physx::PxU32 columns, const physx::PxHeightFieldSample* samples)
{
using namespace physx;
const PxU32 cell = triangleIndex >> 1;
if (isZerothVertexShared(cell, samples))
{
// <---- COL
// 0----2 1 R
// | 1 / /| O
// | / / | W
// | / / | |
// |/ / 0 | |
// 1 2----0 V
//
if (isFirstTriangle(triangleIndex))
{
vertexIndex0 = cell + columns;
vertexIndex1 = cell;
vertexIndex2 = cell + columns + 1;
}
else
{
vertexIndex0 = cell + 1;
vertexIndex1 = cell + columns + 1;
vertexIndex2 = cell;
}
}
else
{
// <---- COL
// 2 1----0 R
// |\ \ 0 | O
// | \ \ | W
// | \ \ | |
// | 1 \ \| |
// 0----1 2 V
//
if (isFirstTriangle(triangleIndex))
{
vertexIndex0 = cell;
vertexIndex1 = cell + 1;
vertexIndex2 = cell + columns;
}
else
{
vertexIndex0 = cell + columns + 1;
vertexIndex1 = cell + columns;
vertexIndex2 = cell + 1;
}
}
}
__device__ static inline void getTriangleAdjacencyIndices(physx::PxU32 triangleIndex, physx::PxU32& adjacencyIndex0, physx::PxU32& adjacencyIndex1, physx::PxU32& adjacencyIndex2, const physx::PxU32 rows, const physx::PxU32 columns,
const physx::PxHeightFieldSample* samples)
{
using namespace physx;
const PxU32 cell = triangleIndex >> 1;
if (isZerothVertexShared(cell, samples))
{
// <---- COL
// 0----2 1 R
// | 1 / /| O
// | / / | W
// | / / | |
// |/ / 0 | |
// 1 2----0 V
//
if (isFirstTriangle(triangleIndex))
{
adjacencyIndex0 = BOUNDARY;
adjacencyIndex1 = triangleIndex + 1;
adjacencyIndex2 = BOUNDARY;
if ((cell % (columns) != 0))
{
adjacencyIndex0 = triangleIndex - 1;
}
if ((cell / columns != rows - 2))
{
adjacencyIndex2 = ((cell + columns) * 2) + 1;
}
}
else
{
adjacencyIndex0 = BOUNDARY;
adjacencyIndex1 = triangleIndex - 1;
adjacencyIndex2 = BOUNDARY;
if (cell % (columns) < (columns - 2))
{
adjacencyIndex0 = triangleIndex + 1;
}
if (cell >= columns - 1)
{
adjacencyIndex2 = (cell - columns) * 2;
}
}
}
else
{
// <---- COL
// 2 1----0 R
// |\ \ 0 | O
// | \ \ | W
// | \ \ | |
// | 1 \ \| |
// 0----1 2 V
//
if (isFirstTriangle(triangleIndex))
{
adjacencyIndex0 = BOUNDARY;
adjacencyIndex1 = triangleIndex + 1;
adjacencyIndex2 = BOUNDARY;
if (cell >= columns - 1)
{
adjacencyIndex0 = ((cell - (columns)) * 2) + 1;
}
if ((cell % (columns) != 0))
{
adjacencyIndex2 = triangleIndex - 1;
}
}
else
{
adjacencyIndex0 = BOUNDARY;
adjacencyIndex1 = triangleIndex - 1;
adjacencyIndex2 = BOUNDARY;
if ((cell / columns != rows - 2))
{
adjacencyIndex0 = (cell + (columns)) * 2;
}
if (cell % (columns) < (columns - 2))
{
adjacencyIndex2 = triangleIndex + 1;
}
}
}
}
__device__ static void getTriangle(physx::PxVec3& triLocV0, physx::PxVec3& triLocV1, physx::PxVec3& triLocV2, uint4* adjacencyIndices, const physx::PxU32 triangleIndex, const physx::PxMeshScale& scale, const physx::PxU32 rows, const physx::PxU32 columns, const physx::PxHeightFieldSample* samples)
{
using namespace physx;
const PxReal rowScale = scale.scale.x;
const PxReal heightScale = scale.scale.y;
const PxReal columnScale = scale.scale.z;
PxVec3 handedness(1.0f); // Vector to invert normal coordinates according to the heightfield scales
bool wrongHanded = false;
if (columnScale < 0.f)
{
wrongHanded = !wrongHanded;
handedness.z = -1.0f;
}
if (rowScale < 0.f)
{
wrongHanded = !wrongHanded;
handedness.x = -1.0f;
}
//PxU32 tVertexIndices[3];
//getTriangleVertexIndices(triangleIndex, tVertexIndices[0], tVertexIndices[1 + wrongHanded], tVertexIndices[2 - wrongHanded], columns, samples);
PxU32 tVert0, tVert1, tVert2;
getTriangleVertexIndices(triangleIndex, tVert0, tVert1, tVert2, columns, samples);
if (adjacencyIndices)
{
/*PxU32 tAdjInd0 = wrongHanded ? adjacencyIndices->z : adjacencyIndices->x;
PxU32 tAdjInd1 = adjacencyIndices->y;
PxU32 tAdjInd2 = wrongHanded ? adjacencyIndices->x : adjacencyIndices->z;*/
PxU32 tAdjInd0, tAdjInd1, tAdjInd2;
getTriangleAdjacencyIndices(triangleIndex, tAdjInd0, tAdjInd1, tAdjInd2, rows, columns, samples);
adjacencyIndices->x = wrongHanded ? tAdjInd2 : tAdjInd0;
adjacencyIndices->y = tAdjInd1;
adjacencyIndices->z = wrongHanded ? tAdjInd0 : tAdjInd2;
}
const PxVec3 vertex0 = getVertex(tVert0, columns, samples);
const PxVec3 vertex1 = getVertex(wrongHanded ? tVert2 : tVert1, columns, samples);
const PxVec3 vertex2 = getVertex(wrongHanded ? tVert1 : tVert2, columns, samples);
triLocV0 = hf2shapep(vertex0, rowScale, heightScale, columnScale);
triLocV1 = hf2shapep(vertex1, rowScale, heightScale, columnScale);
triLocV2 = hf2shapep(vertex2, rowScale, heightScale, columnScale);
}
__device__ static bool testLocalPoint(const physx::PxVec3& pnt, const physx::PxU32 rows, const physx::PxU32 columns, const physx::PxHeightFieldSample* samples, float& height)
{
using namespace physx;
if (pnt.x < 0 || pnt.x > rows - 1 || pnt.z < 0 || pnt.z > columns - 1)
return false;
PxU32 row = getMinRow(pnt.x, rows);
PxU32 column = getMinColumn(pnt.z, columns);
PxU32 vertex = row * columns + column;
PxReal h0 = getHeight(vertex, samples);
PxReal h1 = getHeight(vertex + 1, samples);
PxReal h2 = getHeight(vertex + columns, samples);
PxReal h3 = getHeight(vertex + columns + 1, samples);
PxReal s = pnt.x - PxFloor(pnt.x);
PxReal t = pnt.z - PxFloor(pnt.z);
PxReal h = h0 * (1 - s) * (1 - t) + h1 * s * (1 - t) + h2 * (1 - s) * t + h3 * s * t; // @@@ That's wrong. Should find point on triangle
height = h;
return h > pnt.y;
}
__device__ static inline physx::PxU32 heightfieldComputePairs(
const physx::PxU32 minColumn,
const physx::PxU32 maxColumn,
const physx::PxU32 minRow,
const physx::PxU32 maxRow,
const physx::PxU32 nbRows,
const physx::PxU32 nbCols,
physx::PxHeightFieldSample* samples,
const physx::PxU32 miny,
const physx::PxU32 maxy
)
{
using namespace physx;
PxU32 nbPairs = 0;
const PxU32 columnSpan = maxColumn - minColumn;
//we have two materials corresponding to one vertexIndex, so each thread will deal with one of the materials
const PxU32 totalNumProcessed = (maxRow - minRow) * columnSpan * 2;
for (PxU32 i = 0; i < totalNumProcessed; ++i)
{
const PxU32 index = i / 2;
const PxU32 vertexIndex = (minRow + index / columnSpan) * nbCols + (minColumn + index % columnSpan);
assert(isValidVertex(vertexIndex, nbRows, nbCols));
PxReal h0 = getHeight(vertexIndex, samples);
PxReal h1 = getHeight(vertexIndex + 1, samples);
PxReal h2 = getHeight(vertexIndex + nbCols, samples);
PxReal h3 = getHeight(vertexIndex + nbCols + 1, samples);
const bool con0 = maxy < h0 && maxy < h1 && maxy < h2 && maxy < h3;
const bool con1 = miny > h0 && miny > h1 && miny > h2 && miny > h3;
if (!(con0 || con1))
{
const PxHeightFieldSample& sample = getSample(vertexIndex, samples);
const bool isMaterial1 = (i & 1) ? 1 : 0;
PxU32 material = isMaterial1 ? sample.materialIndex1 : sample.materialIndex0;
if (material != PxHeightFieldMaterial::eHOLE)
nbPairs++;
}
}//end of totalNumProcessed
return nbPairs;
}
__device__ static inline void heightfieldOutputPairs(
const physx::PxU32 minColumn,
const physx::PxU32 maxColumn,
const physx::PxU32 minRow,
const physx::PxU32 maxRow,
const physx::PxU32 nbRows,
const physx::PxU32 nbCols,
physx::PxHeightFieldSample* samples,
const physx::PxU32 miny,
const physx::PxU32 maxy,
const physx::PxU32 cmInd,
const physx::PxU32 elementInd, //either tetrahedron id or triangle id
const physx::PxU32 startOffset,
const physx::PxU32 size,
uint4* stackPtr //output
)
{
using namespace physx;
PxU32 writeIndex = startOffset;
const PxU32 columnSpan = maxColumn - minColumn;
//we have two materials corresponding to one vertexIndex, so each thread will deal with one of the materials
const PxU32 totalNumProcessed = (maxRow - minRow) * columnSpan * 2;
for (PxU32 i = 0; i < totalNumProcessed && writeIndex < size; ++i)
{
PxU32 triangleIdx = 0xFFffFFff;
const PxU32 index = i / 2;
const PxU32 vertexIndex = (minRow + index / columnSpan) * nbCols + (minColumn + index % columnSpan);
assert(isValidVertex(vertexIndex, nbRows, nbCols));
PxReal h0 = getHeight(vertexIndex, samples);
PxReal h1 = getHeight(vertexIndex + 1, samples);
PxReal h2 = getHeight(vertexIndex + nbCols, samples);
PxReal h3 = getHeight(vertexIndex + nbCols + 1, samples);
const bool con0 = maxy < h0 && maxy < h1 && maxy < h2 && maxy < h3;
const bool con1 = miny > h0 && miny > h1 && miny > h2 && miny > h3;
if (!(con0 || con1))
{
const PxHeightFieldSample& sample = getSample(vertexIndex, samples);
const bool isMaterial1 = (i & 1) ? 1 : 0;
PxU32 material = isMaterial1 ? sample.materialIndex1 : sample.materialIndex0;
if (material != PxHeightFieldMaterial::eHOLE)
{
triangleIdx = isMaterial1 ? ((vertexIndex << 1) + 1) : (vertexIndex << 1);
stackPtr[writeIndex] = make_uint4(cmInd, elementInd, triangleIdx, 0);
writeIndex++;
}
}
}//end of totalNumProcessed
}
#endif

View File

@@ -0,0 +1,128 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __MANIFOLD_CUH__
#define __MANIFOLD_CUH__
#include "foundation/PxTransform.h"
#include "PxgPersistentContactManifold.h"
#include "PxgCommonDefines.h"
#include "dataReadWriteHelper.cuh"
#define MULTIMANIFOLD_MAX_MANIFOLDS 4
#define SUBMANIFOLD_MAX_CONTACTS 6
__device__ static inline bool refreshManifolds(
const physx::PxTransform & aToB,
physx::PxReal projectBreakingThreshold,
physx::PxgPersistentContactMultiManifold * PX_RESTRICT multiManifold
)
{
using namespace physx;
const PxU32 threadIdxInWarp = threadIdx.x & (WARP_SIZE - 1);
const PxReal sqProjectBreakingThreshold = projectBreakingThreshold * projectBreakingThreshold;
const PxU32 numManifolds = multiManifold->mNbManifolds;
PxU32 i = threadIdxInWarp;
{
bool lostContact = false;
PxU8 curManifoldIdx = i / SUBMANIFOLD_MAX_CONTACTS;
// refreshContactPoints
if (curManifoldIdx < numManifolds)
{
PxU32 nbContacts = multiManifold->mNbContacts[curManifoldIdx];
PxU8 curContactIdx = i % SUBMANIFOLD_MAX_CONTACTS;
if (curContactIdx < nbContacts)
{
PxReal penNew;
PxgContact & curContact = multiManifold->mContacts[curManifoldIdx][curContactIdx];
PxVec3 pointA = curContact.pointA;
PxVec3 pointB = curContact.pointB;
const PxVec3 localAInB = aToB.transform(pointA); // from a to b
const PxVec3 v = localAInB - pointB;
PxVec3 normal = curContact.normal;
PxReal penOld = curContact.penetration;
penNew = v.dot(normal);
const PxVec3 projectedPoint = localAInB - normal * penOld;
const PxVec3 projectedDifference = pointB - projectedPoint;
const PxReal distance2d = projectedDifference.magnitudeSquared();
curContact.penetration = penNew;
if (distance2d > sqProjectBreakingThreshold)
{
lostContact = true;
}
}
}
// If we lost any contact - we do the full contact gen, we don't need any contacts then
if (__any_sync(FULL_MASK, (PxU32)lostContact))
{
if(threadIdxInWarp == 0)
multiManifold->mNbManifolds = 0;
return true;
}
}
return false;
}
__device__ static inline bool invalidateManifold(const physx::PxTransform & aToB, physx::PxgPersistentContactMultiManifold& multiManifold,
const physx::PxReal minMargin, const PxReal ratio)
{
using namespace physx;
PxAlignedTransform prevRelativeTransform;
PxAlignedTransform_ReadWarp(prevRelativeTransform, &multiManifold.mRelativeTransform);
const PxReal thresholdP = minMargin * ratio;
const PxReal thresholdQ = 0.9998f; //about 1 degree
const PxVec3 maxTransfPosDelta_delta = aToB.p - PxVec3(prevRelativeTransform.p.x, prevRelativeTransform.p.y, prevRelativeTransform.p.z);
const PxVec3 maxTransfPosDelta_absDelta(PxAbs(maxTransfPosDelta_delta.x), PxAbs(maxTransfPosDelta_delta.y), PxAbs(maxTransfPosDelta_delta.z));
const PxReal deltaP = PxMax(maxTransfPosDelta_absDelta.x, PxMax(maxTransfPosDelta_absDelta.y, maxTransfPosDelta_absDelta.z));
const PxReal deltaQ = aToB.q.dot((PxQuat)prevRelativeTransform.q);
if ((deltaP > thresholdP) || (deltaQ < thresholdQ))
return true;
return false;
}
#endif

View File

@@ -0,0 +1,51 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __MATERIAL_COMBINER_CUH__
#define __MATERIAL_COMBINER_CUH__
#include "PxsMaterialCombiner.h"
__device__ static __forceinline__
void combineMaterials(const PxsMaterialData* PX_RESTRICT materials, PxU16 index0, PxU16 index1,
PxU32& combinedFlags,
PxReal& combinedStaticFriction,
PxReal& combinedDynamicFriction,
PxReal& combinedRestitution,
PxReal& combinedDamping
)
{
const PxsMaterialData& mat0 = materials[index0];
const PxsMaterialData& mat1 = materials[index1];
PxsCombineMaterials(mat0, mat1,
combinedStaticFriction, combinedDynamicFriction,
combinedRestitution, combinedFlags, combinedDamping);
}
#endif

View File

@@ -0,0 +1,165 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __MIDPHASE_ALLOCATION_CUH__
#define __MIDPHASE_ALLOCATION_CUH__
#include "foundation/PxSimpleTypes.h"
#include "convexNpCommon.h"
#include <vector_types.h>
using namespace physx;
/*
// TODO: read/write infos
AD 8.2023: some comments about all of this. The size of the collision stack is PxGpuDynamicsMemoryConfig::collisionStackSize
The following parameters are involved:
* numpairsAfterMP: The number of convex - triangle pairs after the midphase. These are pairs between 1 triangle of a trimesh
and a complete convex. Will be padded to a multiple of 4 during stack allocation to satisfy 16B alignment.
* numPaddedPairsAfterMP: A pair here is again a single triangle of a trimesh and a complete convex. But in contrast to the one above,
the list holding the triangle indices is padded to multiples of 4 and duplicated for each contact manager.
This is done because we do a radix sort on these indices later, which happens on uint4s. So all the lists
need to be padded to a multiple of 4, and we need the temp buffer as well. So this count represents the length
of the flat list needed to place all the triangle indices, with per-CM padding and temp buffers.
The list will look something like this (x is padding, y is temp buffer):
cm 0 | cm 1 | cm 2 ..
0 1 2 p t t t t | 3 4 p p t t t t | 5 6 7 8 9 p p p t t t t t t t t | ...
numPairsAfterMP4 = (numPairsAfterMP + 3) & (~3)
This is the layout of the collision stack (growing to the bottom):
|---------
|* PairsGPU: uint4 x numPairsAfterMP4
| x: contact manager index
| y: triangle index (of the triangle mesh)
| z: offset from first triangle found for this contact manager
| w: npWorkItem.shapeRef1 (ref of convex mesh)
|
|---------
|* TriNormalAndIndex: ConvexTriNormalAndIndex x numPairsAfterMP4
| x,y,z: minNormal from convexTriangleContactGen (confirm: the normal is from a convex mesh triangle, right?)
| w: remapCPUIndex of trimesh triangle index. (What? Why?)
|---------
|* Contacts: ConvexTriContacts x numPairsAfterMP4
|
|---------
|* TriangleIndices: PxU32 * numPaddedPairsAfterMP
|
|---------
|* maxDepth: PxReal * numPairsAfterMP4
|
|---------
|* intermediate data: ConvexTriIntermediateData * numPairsAfterMP4
|
|---------
|* secondPass data: PxU32 * numPairsAfterMP4
We only fill the pairs array in the midphase kernels. Everything else will be written in the core narrowphase kernels.
Not that pairs belonging to the same contact manager are not necessarily consecutive in the pairs list, because of the warp-wide
write in the midphase. We do maintain a count + offset into a per-CM list for each pair when filling the pairs list with atomics.
This count + offset is then used to calculate the index into per-CM lists to write all the other data in the core narrowphase kernels.
*/
__device__ __forceinline__ PxU32 calculateConvexMeshPairMemRequirement()
{
return (sizeof(uint4) // sPairsGPU
+ sizeof(ConvexTriNormalAndIndex) // normal and index, 4 floats,
+ sizeof(ConvexTriContacts) // 4 contacts for convexTriContacts
+ sizeof(PxReal) // maxdepth
+ sizeof(ConvexTriIntermediateData) // intermediate data
+ sizeof(PxU32) // secondPass indices
+ sizeof(PxU32) * 2); // triangle indices + radix sort data. We do not account for the padding here.
}
// This is calculating the worst-case padding intentionally to avoid stack overflows!
__device__ PX_FORCE_INLINE PxU32 calculateAdditionalPadding(const PxU32 numContactManagers)
{
// the number of pairs are padded to a multiple of 4, so we need additional space for at most 3.
// we subtract the memory needed for the triangle index buffer here because that one is allocated with a different count.
const PxU32 afterPairPadding = 3 * (calculateConvexMeshPairMemRequirement() - 2 * sizeof(PxU32));
// for the triangle index buffer, we pad the triangle count for each contact manager to a multiple of 4.
// We also need the temp buffer for the radix sort, so that means we need to account for another
// 3 + 3 PxU32s per contact manager.
const PxU32 afterTriangleIndexPadding = numContactManagers * 6 * sizeof(PxU32);
return (afterPairPadding + afterTriangleIndexPadding);
}
__device__ PX_FORCE_INLINE PxU32 calculateMaxPairs(const PxU32 stackSizeBytes, const PxU32 numContactManagers)
{
const PxU32 additionalPadding = calculateAdditionalPadding(numContactManagers);
const PxU32 memAvailableAfterPadding = stackSizeBytes - additionalPadding;
const PxU32 memRequiredPerPair = calculateConvexMeshPairMemRequirement();
return (memAvailableAfterPadding / memRequiredPerPair);
}
__device__ __forceinline__ void midphaseAllocate( ConvexTriNormalAndIndex** cvxTriNIGPU, ConvexTriContacts** cvxTriContactsGPU,
PxReal** cvxTriMaxDepthGPU, ConvexTriIntermediateData** cvxTriIntermGPU, PxU32** orderedCvxTriIntermGPU,
PxU32** cvxTriSecondPassedGPU, uint4** pairsGPU,
PxU8 * stackPtr, const PxU32 numpairsAfterMPUnpadded, const PxU32 numPaddedPairsAfterMP)
{
//Pad up to a multiple of 4 so we don't get misaligned addresses
const PxU32 numPairsAfterMP = (numpairsAfterMPUnpadded + 3)&(~3);
// I don't know why this line is needed.
PxU8 * newGpuIntermStackPtr = reinterpret_cast<PxU8* >(stackPtr);
// the pairs already exist, they have been written in the midphase kernel. So this allocation
// only makes sure we write all the other data behind it.
*pairsGPU = (uint4 *)newGpuIntermStackPtr;
newGpuIntermStackPtr += numPairsAfterMP * sizeof(uint4);
// Allocate intermediate arrays:
*cvxTriNIGPU = reinterpret_cast<ConvexTriNormalAndIndex* >(newGpuIntermStackPtr);
newGpuIntermStackPtr += numPairsAfterMP * sizeof(ConvexTriNormalAndIndex);
*cvxTriContactsGPU = reinterpret_cast<ConvexTriContacts* >(newGpuIntermStackPtr);
newGpuIntermStackPtr += numPairsAfterMP * sizeof(ConvexTriContacts); //ConvexTriContacts has space of 4 contacts
*orderedCvxTriIntermGPU = reinterpret_cast<PxU32*>(newGpuIntermStackPtr);
newGpuIntermStackPtr += numPaddedPairsAfterMP * sizeof(PxU32); // AD: see convexNpCommon.h - numPaddedPairsAfterMP is already padded up to multiple of 4 and duplicated for allow for a radix sort.
*cvxTriMaxDepthGPU = reinterpret_cast<PxReal*>(newGpuIntermStackPtr);
newGpuIntermStackPtr += numPairsAfterMP * sizeof(PxReal);
*cvxTriIntermGPU = reinterpret_cast<ConvexTriIntermediateData*>(newGpuIntermStackPtr);
newGpuIntermStackPtr += numPairsAfterMP * sizeof(ConvexTriIntermediateData);
*cvxTriSecondPassedGPU = reinterpret_cast<PxU32*>(newGpuIntermStackPtr);
newGpuIntermStackPtr += numPairsAfterMP * sizeof(PxU32);
}
#endif

View File

@@ -0,0 +1,193 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __NPUTILS_CUH__
#define __NPUTILS_CUH__
#include "foundation/PxQuat.h"
#include "foundation/PxTransform.h"
#include "foundation/PxVec3.h"
#include "PxgCommonDefines.h"
#include "utils.cuh"
template <typename T> __device__ static inline T ldS(const volatile T& val) { return val; }
template <typename T> __device__ static inline void stS(volatile T& dst, const T& src) { dst = src; }
template <> __device__ inline physx::PxVec3 ldS<physx::PxVec3>(const volatile physx::PxVec3& val) { return physx::PxVec3(val.x, val.y, val.z); }
template <> __device__ inline void stS<physx::PxVec3>(volatile physx::PxVec3& dst, const physx::PxVec3& src) { dst.x = src.x, dst.y = src.y, dst.z = src.z; }
template <> __device__ __forceinline__ physx::PxQuat ldS<physx::PxQuat>(const volatile physx::PxQuat& val) { return physx::PxQuat(val.x, val.y, val.z, val.w); }
template <> __device__ __forceinline__ void stS<physx::PxQuat>(volatile physx::PxQuat& dst, const physx::PxQuat& src) { dst.x = src.x, dst.y = src.y, dst.z = src.z, dst.w = src.w; }
template <> __device__ __forceinline__ physx::PxTransform ldS<physx::PxTransform>(const volatile physx::PxTransform& val) { return physx::PxTransform(ldS(val.p), ldS(val.q)); }
template <> __device__ __forceinline__ void stS<physx::PxTransform>(volatile physx::PxTransform& dst, const physx::PxTransform& src)
{
stS(dst.p, src.p);
stS(dst.q, src.q);
}
static __device__ __forceinline__ int next3(int x)
{
return x==2 ? 0 : x+1;
}
static __device__ inline int prev3(int x)
{
return x==0 ? 2 : x-1;
}
// operators dealing with 8 4-vectors in a warp
static __device__ inline float dot(float a, float b) // leaves answer in x
{
float x = a*b;
return x + __shfl_down_sync(FULL_MASK, x,1) + __shfl_down_sync(FULL_MASK, x,2);
}
static __device__ inline float cross(float a, float b, int cp0, int cp1)
{
return __shfl_sync(FULL_MASK, a, cp0) * __shfl_sync(FULL_MASK, b, cp1) - __shfl_sync(FULL_MASK, a, cp1) * __shfl_sync(FULL_MASK, b, cp0);
}
static __device__ inline float splatV(int index, float v) // splat a vector across all 8 vectors
{
return __shfl_sync(FULL_MASK, v, (index << 2) | threadIdx.x & 3);
}
static __device__ inline float splatX(float a) // splat the x-component of each vector into yzw
{
return __shfl_sync(FULL_MASK, a, threadIdx.x & 28);
}
static __device__ inline float normalize(float x) // normalize each vector
{
return x * splatX(rsqrt(dot(x,x))); // TODO: precision
}
static __device__ inline float magnitude(float x) // normalize each vector
{
return splatX(sqrt(dot(x,x))); // TODO: precision
}
template<int N>
static __device__ inline int packXFlags(bool a) // pack a boolean from each x into a byte
{
return __ballot_sync(FULL_MASK, __shfl_sync(FULL_MASK, a, threadIdx.x<<2)) & ((1<<N)-1);
}
template<int N>
static __device__ inline bool allX(bool a) // check some number initial set of the x-flags are all set
{
return packXFlags<N>(a) == ((1<<N)-1);
}
template<int N>
static __device__ inline bool anyX(bool a) // check if any of an initial segment of x-flags are all set
{
return packXFlags<N>(a) != 0;
}
static __device__ inline float loadV3Unsafe(const physx::PxVec3* v) // NB requires padding (i.e. the 4th element is loaded)
{
return reinterpret_cast<const float*>(v)[threadIdx.x&3];
}
static __device__ void storeV3(volatile physx::PxVec3* v, int index, float value)
{
int lane = index<<2;
volatile float* dest = reinterpret_cast<volatile float*>(v);
if(threadIdx.x>=lane && threadIdx.x<lane+3)
{
dest[threadIdx.x&3] = value;
}
}
// experimentally, seems more register-efficient to coalesce this
static __device__ __forceinline__
physx::PxReal shuffleDot(const physx::PxU32 syncMask, const physx::PxVec3& v0, int shuffle0, const physx::PxVec3& v1)
{
return __shfl_sync(syncMask, v0.x, shuffle0)*v1.x + __shfl_sync(syncMask, v0.y, shuffle0)*v1.y + __shfl_sync(syncMask, v0.z, shuffle0)*v1.z;
}
static __device__ __forceinline__
physx::PxU32 maxIndex(physx::PxReal v, physx::PxU32 mask, physx::PxReal& maxV)
{
maxV = mask & (1 << (threadIdx.x&31)) ? v : -FLT_MAX;
maxV = fmaxf(maxV, __shfl_xor_sync(FULL_MASK, maxV, 16));
maxV = fmaxf(maxV, __shfl_xor_sync(FULL_MASK, maxV, 8));
maxV = fmaxf(maxV, __shfl_xor_sync(FULL_MASK, maxV, 4));
maxV = fmaxf(maxV, __shfl_xor_sync(FULL_MASK, maxV, 2));
maxV = fmaxf(maxV, __shfl_xor_sync(FULL_MASK, maxV, 1));
return physx::lowestSetIndex(__ballot_sync(FULL_MASK, maxV == v)&mask);
}
static __device__ __forceinline__
physx::PxU32 minIndex(physx::PxReal v, physx::PxU32 mask, physx::PxReal& minV)
{
minV = mask & (1 << (threadIdx.x & 31)) ? v : FLT_MAX;
minV = fminf(minV, __shfl_xor_sync(FULL_MASK, minV, 16));
minV = fminf(minV, __shfl_xor_sync(FULL_MASK, minV, 8));
minV = fminf(minV, __shfl_xor_sync(FULL_MASK, minV, 4));
minV = fminf(minV, __shfl_xor_sync(FULL_MASK, minV, 2));
minV = fminf(minV, __shfl_xor_sync(FULL_MASK, minV, 1));
return physx::lowestSetIndex(__ballot_sync(FULL_MASK, minV == v)&mask);
}
// similar as above but only with blocks of 4 threads
// mask must have exactly 4 consecutive bits set and the corresponding
// threads must execute this function together
static __device__ __forceinline__
physx::PxU32 minIndex4(physx::PxReal v, physx::PxU32 mask, physx::PxU32 threadIndexInGroup)
{
// sanity checks that function has been called as expected
assert(__popc(mask) == 4); // exactly 4 threads in mask
assert(mask & (1 << (threadIdx.x & 31))); // executing thread must be in the mask
assert(threadIndexInGroup == 0 || (mask & (1 << (threadIdx.x - 1 & 31)))); // threads consecutive
physx::PxReal minV = v;
physx::PxReal minV_m1 = __shfl_sync(mask, minV, (threadIdx.x & 31) - 1);
if(threadIndexInGroup > 0)
minV = fminf(minV, minV_m1);
physx::PxReal minV_m2 = __shfl_sync(mask, minV, (threadIdx.x & 31) - 2);
if(threadIndexInGroup > 2)
minV = fminf(minV, minV_m2);
// Now the last thread (idx 3) of the group knows the min.
// Send it back to all the threads.
minV = __shfl_sync(mask, minV, (threadIdx.x & 31) | 3);
return physx::lowestSetIndex(__ballot_sync(mask, minV == v)&mask);
}
#endif

View File

@@ -0,0 +1,190 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include <assert.h>
#include "SparseRemove.cuh"
#include "cudaNpCommon.h"
#include "PxgPersistentContactManifold.h"
#include "PxsContactManagerState.h"
#include "PxgContactManager.h"
#include "PxgNpKernelIndices.h"
namespace physx
{
class PxsContactManager;
extern "C" __host__ void initNarrowphaseKernels7() {}
}
extern "C" __global__ void removeContactManagers_Stage1(const PxgPairManagementData* PX_RESTRICT pairData)
{
initializeKeepDropBuffer(pairData->mTempAccumulator, pairData->mNbPairs, pairData->mNbToRemove);
}
extern "C" __global__ void removeContactManagers_Stage2(const PxgPairManagementData* PX_RESTRICT pairData)
{
markKeepDropBuff(pairData->mRemoveIndices, pairData->mNbToRemove, pairData->mTempAccumulator, pairData->mNbPairs);
}
extern "C" __global__ void removeContactManagers_Stage3(const PxgPairManagementData* PX_RESTRICT pairData)
{
processKeepDropBuff<PxgNarrowPhaseBlockDims::REMOVE_CONTACT_MANAGERS, PxgNarrowPhaseGridDims::REMOVE_CONTACT_MANAGERS>
(pairData->mTempAccumulator, pairData->mNbPairs, pairData->mBlockSharedAccumulator);
}
extern "C" __global__ void removeContactManagers_Stage4(const PxgPairManagementData* PX_RESTRICT pairData)
{
accumulateKeepDrop<PxgNarrowPhaseGridDims::REMOVE_CONTACT_MANAGERS>(pairData->mTempAccumulator, pairData->mNbPairs, pairData->mBlockSharedAccumulator);
}
extern "C" __global__ void removeContactManagers_Stage5(const PxgPairManagementData* PX_RESTRICT pairData, const bool copyManifold)
{
const PxU32* PX_RESTRICT globalRunSumBuffer = pairData->mTempAccumulator;
const PxU32 totalSize = pairData->mNbPairs;
PxU32 nbToSwap = getNbSwapsRequired(globalRunSumBuffer, totalSize, pairData->mNbToRemove);
PxgContactManagerInput* inputData = pairData->mContactManagerInputData;
PxsContactManagerOutput* outputData = pairData->mContactManagerOutputData;
PxgPersistentContactManifold* manifolds = reinterpret_cast<PxgPersistentContactManifold*>(pairData->mPersistentContactManagers);
PxsContactManager** cms = pairData->mCpuContactManagerMapping;
Sc::ShapeInteraction** sis = pairData->mShapeInteractions;
PxReal* rest = pairData->mRestDistances;
PxsTorsionalFrictionData* torsional = pairData->mTorsionalData;
for(PxU32 i = (threadIdx.x + WARP_SIZE * threadIdx.y + blockIdx.x * blockDim.x * blockDim.y)/16;
i < nbToSwap; i+= ((blockDim.x * blockDim.y * gridDim.x)/16))
{
PxU32 srcIndex, dstIndex;
getSwapIndices(globalRunSumBuffer, totalSize, i, nbToSwap, dstIndex, srcIndex);
if((threadIdx.x&15) == 0)
{
inputData[dstIndex] = inputData[srcIndex];
outputData[dstIndex] = outputData[srcIndex];
cms[dstIndex] = cms[srcIndex];
sis[dstIndex] = sis[srcIndex];
rest[dstIndex] = rest[srcIndex];
torsional[dstIndex] = torsional[srcIndex];
}
if (copyManifold)
{
float4* dst = (float4*)&manifolds[dstIndex];
float4* src = (float4*)&manifolds[srcIndex];
const PxU32 nbFloat4 = sizeof(PxgPersistentContactManifold)/sizeof(float4);
for(PxU32 idx = threadIdx.x&15; idx < nbFloat4; idx += 16)
dst[idx] = src[idx];
}
}
}
extern "C" __global__ void removeContactManagers_Stage5_CvxTri(const PxgPairManagementData* PX_RESTRICT pairData, const bool copyManifold)
{
const PxU32* PX_RESTRICT globalRunSumBuffer = pairData->mTempAccumulator;
const PxU32 totalSize = pairData->mNbPairs;
PxU32 nbToSwap = getNbSwapsRequired(globalRunSumBuffer, totalSize, pairData->mNbToRemove);
PxgContactManagerInput* inputData = pairData->mContactManagerInputData;
PxsContactManagerOutput* outputData = pairData->mContactManagerOutputData;
PxgPersistentContactMultiManifold* manifolds = reinterpret_cast<PxgPersistentContactMultiManifold*>( pairData->mPersistentContactManagers);
PxsContactManager** cms = pairData->mCpuContactManagerMapping;
Sc::ShapeInteraction** sis = pairData->mShapeInteractions;
PxReal* rest = pairData->mRestDistances;
PxsTorsionalFrictionData* torsional = pairData->mTorsionalData;
for (PxU32 i = (threadIdx.x + WARP_SIZE * threadIdx.y + blockIdx.x * blockDim.x * blockDim.y)/WARP_SIZE;
i < nbToSwap; i += ((blockDim.x * blockDim.y * gridDim.x))/WARP_SIZE)
{
PxU32 srcIndex, dstIndex;
getSwapIndices(globalRunSumBuffer, totalSize, i, nbToSwap, dstIndex, srcIndex);
if(threadIdx.x == 0)
{
inputData[dstIndex] = inputData[srcIndex];
outputData[dstIndex] = outputData[srcIndex];
cms[dstIndex] = cms[srcIndex];
sis[dstIndex] = sis[srcIndex];
rest[dstIndex] = rest[srcIndex];
torsional[dstIndex] = torsional[srcIndex];
}
if (copyManifold)
{
float4* dst = (float4*)&manifolds[dstIndex];
float4* src = (float4*)&manifolds[srcIndex];
for (PxU32 i = threadIdx.x; i < sizeof(PxgPersistentContactMultiManifold) / sizeof(float4); i += WARP_SIZE)
{
dst[i] = src[i];
}
}
}
}
extern "C" __global__ void initializeManifolds(float4* destination, const float4* source, PxU32 dataSize, PxU32 nbTimesToReplicate)
{
const PxU32 MaxStructureSize = 4096;
__shared__ float sourceData[MaxStructureSize / sizeof(float)];
//Number of threads required per element (rounded up to a warp size)
const PxU32 nbThreadsPerElement = dataSize / sizeof(float4);
//Number to process per block
const PxU32 nbPerBlock = blockDim.x / nbThreadsPerElement;
const PxU32 MaxThreadIdx = nbPerBlock*nbThreadsPerElement;
const PxU32 nbBlocksRequired = (nbTimesToReplicate + nbPerBlock - 1) / nbPerBlock;
if (threadIdx.x < nbThreadsPerElement)
{
const float4 value = source[threadIdx.x];
sourceData[threadIdx.x] = value.x;
sourceData[threadIdx.x + nbThreadsPerElement] = value.y;
sourceData[threadIdx.x + 2*nbThreadsPerElement] = value.z;
sourceData[threadIdx.x + 3*nbThreadsPerElement] = value.w;
}
const PxU32 threadReadIdx = threadIdx.x%nbThreadsPerElement;
__syncthreads();
for (PxU32 a = blockIdx.x; a < nbBlocksRequired; a += gridDim.x)
{
PxU32 startBlockIdx = nbPerBlock*a;
float4* startPtr = destination + startBlockIdx*nbThreadsPerElement;
PxU32 idx = startBlockIdx + threadIdx.x / nbThreadsPerElement;
if (threadIdx.x < MaxThreadIdx && idx < nbTimesToReplicate)
{
startPtr[threadIdx.x] = make_float4(sourceData[threadReadIdx], sourceData[threadReadIdx + nbThreadsPerElement],
sourceData[threadReadIdx + 2 * nbThreadsPerElement], sourceData[threadReadIdx + 3 * nbThreadsPerElement]);
}
}
}

View File

@@ -0,0 +1,225 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __PARTICLE_COLLISION_CUH__
#define __PARTICLE_COLLISION_CUH__
#include "foundation/PxVec3.h"
#include "foundation/PxTransform.h"
#include "sphereCollision.cuh"
#include "deformableCollision.cuh"
#include "GuConvexSupport.h"
/**
* Retuns distance and normal information for a particle-box contact.
* If maxDist < PX_MAX_F32, then no result is provided if the distance is larger than maxDist.
*/
__device__ __forceinline__ static
bool contactPointBox(PxVec3& normal, PxReal& dist,
const PxVec3& pointPos, const PxTransform& boxToWorld, const PxVec3& halfBoxExtents, const PxReal maxDist)
{
PxVec3 relPointPos = boxToWorld.transformInv(pointPos);
const PxVec3 closestPos(
PxClamp(relPointPos.x, -halfBoxExtents.x, halfBoxExtents.x),
PxClamp(relPointPos.y, -halfBoxExtents.y, halfBoxExtents.y),
PxClamp(relPointPos.z, -halfBoxExtents.z, halfBoxExtents.z)
);
if (maxDist < PX_MAX_F32)
{
PxReal maxDistSq = maxDist*maxDist;
PxReal distSq = (relPointPos - closestPos).magnitudeSquared();
if (distSq > maxDistSq)
{
return false;
}
}
//check whether the point is inside the aabb
const bool bX = halfBoxExtents.x >= PxAbs(relPointPos.x);
const bool bY = halfBoxExtents.y >= PxAbs(relPointPos.y);
const bool bZ = halfBoxExtents.z >= PxAbs(relPointPos.z);
const bool isInside = bX && bY && bZ;
if (isInside)
{
//dist from embedded point to box surface along 3 dimensions.
const PxVec3 distToSurface = halfBoxExtents - closestPos.abs();
//assume z is the smallest element of the distToSurface
normal = closestPos.z >= 0.f ? PxVec3(0.f, 0.f, 1.f) : PxVec3(0.f, 0.f, -1.f);
dist = -distToSurface.z;
//find smallest element of distToSurface
if (distToSurface.x <= distToSurface.y && distToSurface.x <= distToSurface.z)
{
normal = closestPos.x >= 0.f ? PxVec3(1.f, 0.f, 0.f) : PxVec3(-1.0f, 0.f, 0.f);
dist = -distToSurface.x;
}
else if (distToSurface.y <= distToSurface.z)
{
normal = closestPos.y >= 0.f ? PxVec3(0.f, 1.f, 0.f) : PxVec3(0.f, -1.f, 0.f);
dist = -distToSurface.y;
}
}
else
{
normal = relPointPos - closestPos;
dist = normal.normalizeFast();
}
normal = boxToWorld.rotate(normal);
return true;
}
/**
* Retuns distance and normal information for a particle-sphere contact.
* If maxDist < PX_MAX_F32, then no result is provided if the distance is larger than maxDist.
*/
__device__ __forceinline__ static
bool contactPointSphere(PxVec3& normal, PxReal& dist,
const PxVec3& pointPos, const PxVec3& spherePos, const PxReal sphereRadius, const PxReal maxDist)
{
PxVec3 relPointPos = pointPos - spherePos;
const PxReal distCenterSq = relPointPos.magnitudeSquared();
if (maxDist < PX_MAX_F32)
{
const PxReal maxDistCenter = maxDist + sphereRadius;
const PxReal maxDistCenterSq = maxDistCenter*maxDistCenter;
if (distCenterSq > maxDistCenterSq)
{
return false;
}
}
const PxReal distCenter = PxSqrt(distCenterSq);
normal = (distCenter > PX_EPS_REAL) ? relPointPos * (1.0f / distCenter) : PxVec3(1.0f, 0.0f, 0.0f);
dist = distCenter - sphereRadius;
return true;
}
/**
* Retuns distance and normal information for a point-capsule contact.
* If maxDist < PX_MAX_F32, then no result is provided if the distance is larger than maxDist.
*/
__device__ __forceinline__ static
bool contactPointCapsule(PxVec3& normal, PxReal& dist,
const PxVec3& pointPos, const PxVec3& capsulePos, const PxVec3& capsuleDir, const PxReal capsuleRadius, const PxReal capsuleHalfHeight, const PxReal maxDist)
{
const PxVec3 capsuleAxisVec = capsuleDir * capsuleHalfHeight;
const PxVec3 relPointPos = pointPos - capsulePos;
PxReal t;
const PxReal distSegmentSq = distancePointSegmentSquared(-capsuleAxisVec, capsuleAxisVec, relPointPos, t);
if (maxDist < PX_MAX_F32)
{
const PxReal maxDistSegment = maxDist + capsuleRadius;
const PxReal maxDistSegmentSq = maxDistSegment * maxDistSegment;
if (distSegmentSq > maxDistSegmentSq)
{
return false;
}
}
const PxVec3 distSegmentVec = relPointPos - capsuleAxisVec * (2.0f * t - 1.0f);
const PxReal distSegment = PxSqrt(distSegmentSq);
normal = (distSegment > PX_EPS_REAL) ? (distSegmentVec * (1.0f / distSegment)) : PxVec3(1.0f, 0.0f, 0.0f);
dist = distSegment - capsuleRadius;
return true;
}
/**
* Combined speculative particle triangle cd. Returns normal and distance.
* If CCD is enabled two tests are performed. The first one decides if the triangle
* is overlapping with the contact volume (sphere), which is typically located at a
* predicted position of the particle, covering it's whole predicted path. The second
* test then establishes the correct distance and normal with respect to the actual particle
* position.
* If CCD is disabled, particlePos == cVolumePos, and cVolumeRadius is the standard contact
* distance.
*/
__device__ __forceinline__ static
bool particleTriangleTest(PxVec3& normal, PxReal& distance,
const PxVec3& particlePos, const PxVec3& cVolumePos, const PxReal cVolumeRadius,
const PxVec3& triV0, const PxVec3& triV1, const PxVec3& triV2, const bool enableCCD)
{
const PxVec3 triNormal = ((triV1 - triV0).cross(triV2 - triV0)).getNormalized();
const PxReal cVolumeRadiusSq = cVolumeRadius * cVolumeRadius;
//contact volume test
const PxReal cVolumePosPlaneDist = (cVolumePos - triV0).dot(triNormal);
PxVec3 closestPos;
if (cVolumePosPlaneDist < cVolumeRadius)
{
PxReal distSq = distancePointTriangleSquared(cVolumePos, triV0, triV1, triV2, closestPos);
if (distSq <= cVolumeRadiusSq)
{
//intersection test
//back face culling with respect to current particle position
const PxReal particlePosPlaneDist = (particlePos - triV0).dot(triNormal);
if (particlePosPlaneDist > 0.0f)
{
if (enableCCD)
{
//in case of CCD we replace the closest and distance from the contact volume with
//the corresponding from the current particle position
distSq = distancePointTriangleSquared(particlePos, triV0, triV1, triV2, closestPos);
}
//assumes particlePos == cVolumePos if enableCCD == false.
PxVec3 distVec = particlePos - closestPos;
normal = (distSq > 0.0f) ? distVec*PxRecipSqrt(distSq) : triNormal;
distance = PxSqrt(distSq);
return true;
}
}
}
return false;
}
/**
* Retuns distance and normal information for a particle-convexCore contact.
* If maxDist < PX_MAX_F32, then no result is provided if the distance is larger than maxDist.
*/
__device__ __forceinline__ static
bool particleConvexCore(PxVec3& normal, PxReal& dist, const PxVec3& particlePos,
const Gu::ConvexShape& convex, const PxReal maxDist)
{
Gu::ConvexShape particle;
particle.coreType = Gu::ConvexCore::Type::ePOINT;
particle.pose = PxTransform(particlePos);
particle.margin = 0;
PxVec3 point0, point1;
dist = Gu::RefGjkEpa::computeGjkDistance(particle, convex, particle.pose, convex.pose,
convex.margin + particle.margin + maxDist, point0, point1, normal);
if (dist < FLT_EPSILON)
dist = Gu::RefGjkEpa::computeEpaDepth(particle, convex, particle.pose, convex.pose,
point0, point1, normal);
if (dist > maxDist)
return false;
return true;
}
#endif // __PARTICLE_COLLISION_CUH__

View File

@@ -0,0 +1,359 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "foundation/PxBounds3.h"
#include "foundation/PxSimpleTypes.h"
#include "foundation/PxTransform.h"
#include "foundation/PxVec3.h"
#include "geometry/PxHeightFieldSample.h"
#include "PxNodeIndex.h"
#include "PxgContactManager.h"
#include "PxgConvexConvexShape.h"
#include "PxgParticleSystem.h"
#include "PxgParticleSystemCoreKernelIndices.h"
#include "PxsTransformCache.h"
#include <vector_types.h>
#include "PxgCommonDefines.h"
#include "heightfieldUtil.cuh"
#include "dataReadWriteHelper.cuh"
#include "gridCal.cuh"
#include "particleCollision.cuh"
#include "assert.h"
using namespace physx;
extern "C" __host__ void initNarrowphaseKernels12() {}
PX_ALIGN_PREFIX(16)
struct PsHeightfieldScratch
{
PxTransform heightfieldTransform;
PxgShape heightfieldShape;
PxgParticleContactInfo* PX_RESTRICT oneWayContactInfos;
PxNodeIndex* PX_RESTRICT oneWayContactNodeIndices;
PxU32* PX_RESTRICT oneWayContactCounts;
float4* currentPositions;
float4* predictedPositions;
PxNodeIndex rigidId; //heightfield
PxU32 transformCacheRef; //heightfield
PxU32 numParticles;
PxU32 particleSystemId;
PxReal contactDist;
PxReal restDist;
bool enableCCD;
}PX_ALIGN_SUFFIX(16);
__device__ static inline void psHeightfieldMidphaseCollision(
PsHeightfieldScratch* sScratch,
float4* PX_RESTRICT sPerParticleTempContacts,
const PxU32 numParticles,
const PxU32 particleIndex,
const bool isDiffuse,
const bool isTGS,
PxgParticleContactWriter& writer
)
{
const PxReal contactDist = sScratch->contactDist;
const PxReal restDist = sScratch->restDist;
PxVec3 currentPos = PxLoad3(sScratch->currentPositions[particleIndex]);
PxVec3 cVolumePos;
PxReal cVolumeRadius;
if (sScratch->enableCCD)
{
PxVec3 predictedPos = PxLoad3(sScratch->predictedPositions[particleIndex]);
cVolumeRadius = getParticleSpeculativeContactVolume(cVolumePos,
currentPos, predictedPos, contactDist, isDiffuse, isTGS);
}
else
{
cVolumePos = currentPos;
cVolumeRadius = contactDist;
}
const PxVec3 cVolumeRadius3(cVolumeRadius);
const PxVec3 min = cVolumePos - cVolumeRadius3;
const PxVec3 max = cVolumePos + cVolumeRadius3;
PxBounds3 worldBound(min, max);
PxTransform heightfieldTransform = sScratch->heightfieldTransform;
PxgShape heightfieldShape = sScratch->heightfieldShape;
const PxReal oneOverHeightScale = 1.f / heightfieldShape.scale.scale.y;
const PxReal oneOverRowScale = 1.f / PxAbs(heightfieldShape.scale.scale.x);
const PxReal oneOverlColScale = 1.f / PxAbs(heightfieldShape.scale.scale.z);
//bound is in world space, we need to transform the bound to the local space of height field
PxBounds3 localBound = PxBounds3::transformFast(heightfieldTransform.getInverse(), worldBound);
localBound.minimum.x *= oneOverRowScale;
localBound.minimum.y *= oneOverHeightScale;
localBound.minimum.z *= oneOverlColScale;
localBound.maximum.x *= oneOverRowScale;
localBound.maximum.y *= oneOverHeightScale;
localBound.maximum.z *= oneOverlColScale;
//row scale
if (heightfieldShape.scale.scale.x < 0.f)
{
//swap min and max row scale
const PxReal temp = localBound.minimum.x;
localBound.minimum.x = localBound.maximum.x;
localBound.maximum.x = temp;
}
//col scale
if (heightfieldShape.scale.scale.z < 0.f)
{
PxReal swap = localBound.minimum.z;
localBound.minimum.z = localBound.maximum.z;
localBound.maximum.z = swap;
}
PxU32* heightfieldData = reinterpret_cast<PxU32*>(heightfieldShape.hullOrMeshPtr);
const PxU32 nbRows = heightfieldData[0];
const PxU32 nbCols = heightfieldData[1];
PxHeightFieldSample* samples = reinterpret_cast<PxHeightFieldSample*>(&heightfieldData[2]);
if ((localBound.minimum.x > nbRows - 1) || (localBound.minimum.z > nbCols - 1)
|| (localBound.maximum.x < 0) || (localBound.maximum.z < 0))
{
return;
}
PxU32 minRow = getMinRow(localBound.minimum.x, nbRows);
PxU32 maxRow = getMaxRow(localBound.maximum.x, nbRows);
PxU32 minColumn = getMinColumn(localBound.minimum.z, nbCols);
PxU32 maxColumn = getMaxColumn(localBound.maximum.z, nbCols);
if ((2 * (maxColumn - minColumn) * (maxRow - minRow)) == 0)
{
return;
}
const PxVec3 cVolumePosShapeSpace = heightfieldTransform.transformInv(cVolumePos);
const PxVec3 particlePosShapeSpace = heightfieldTransform.transformInv(currentPos);
const PxReal miny = localBound.minimum.y;
const PxReal maxy = localBound.maximum.y;
const PxU32 columnSpan = maxColumn - minColumn;
//How many static contacts this particle already has
PxU32 contactCounts = 0;
//we have two materials corresponding to one vertexIndex, so each thread will deal with one of the materials
const PxU32 totalNumProcessed = (maxRow - minRow) * columnSpan * 2;
for (PxU32 i = 0; i < totalNumProcessed; ++i)
{
PxU32 triangleIdx = 0xFFffFFff;
const PxU32 index = i / 2;
const PxU32 vertexIndex = (minRow + index / columnSpan) * nbCols + (minColumn + index % columnSpan);
assert(isValidVertex(vertexIndex, nbRows, nbCols));
PxReal h0 = getHeight(vertexIndex, samples);
PxReal h1 = getHeight(vertexIndex + 1, samples);
PxReal h2 = getHeight(vertexIndex + nbCols, samples);
PxReal h3 = getHeight(vertexIndex + nbCols + 1, samples);
const bool con0 = maxy < h0 && maxy < h1 && maxy < h2 && maxy < h3;
const bool con1 = miny > h0 && miny > h1 && miny > h2 && miny > h3;
if (!(con0 || con1))
{
const PxHeightFieldSample& sample = getSample(vertexIndex, samples);
const bool isMaterial1 = (i & 1) ? 1 : 0;
PxU32 material = isMaterial1 ? sample.materialIndex1 : sample.materialIndex0;
if (material != PxHeightFieldMaterial::eHOLE)
{
triangleIdx = isMaterial1 ? ((vertexIndex << 1) + 1) : (vertexIndex << 1);
PxVec3 triV0, triV1, triV2;
getTriangle(triV0, triV1, triV2, NULL, triangleIdx, heightfieldShape.scale, nbRows, nbCols, samples);
PxVec3 normalShapeSpace;
PxReal distance;
bool intersect = particleTriangleTest(normalShapeSpace, distance,
particlePosShapeSpace, cVolumePosShapeSpace, cVolumeRadius, triV0, triV1, triV2, sScratch->enableCCD);
//this code still misses a logic from the triangle mesh near phase, which also selects the best contact
//if there are more than MaxStaticContactsPerMesh
if (intersect && contactCounts < PxgParticleContactInfo::MaxStaticContactsPerMesh)
{
PxVec3 normal = heightfieldTransform.rotate(normalShapeSpace);
sPerParticleTempContacts[contactCounts++] = make_float4(normal.x, normal.y, normal.z, distance - restDist);
}
}
}
}//end of totalNumProcessed
contactCounts = PxMin(contactCounts, PxgParticleContactInfo::MaxStaticContactsPerMesh);
PxU32 contactWritten = 0;
if (sScratch->rigidId.isStaticBody())
{
if (contactCounts)
{
PxU32 contactIndex = (PxU32)(atomicAdd((PxI32*)&sScratch->oneWayContactCounts[particleIndex], (PxI32)contactCounts));
PxU32 endIndex = PxMin(contactIndex + contactCounts, PxgParticleContactInfo::MaxStaticContactsPerParticle);
for (PxU32 i = contactIndex, src = 0; i < endIndex; i++, src++)
{
const PxU32 outputIndex = particleIndex + i * numParticles;
sScratch->oneWayContactInfos[outputIndex].mNormal_PenW = sPerParticleTempContacts[src];
sScratch->oneWayContactNodeIndices[outputIndex] = sScratch->rigidId;
}
}
contactWritten = PxMin(contactCounts, PxgParticleContactInfo::MaxStaticContactsPerParticle);
sScratch->oneWayContactCounts[particleIndex] = contactWritten;
}
PxU32 remaining = contactCounts - contactWritten;
if ((remaining > 0) && !isDiffuse)
{
PxU64 compressedParticleIndex = PxEncodeParticleIndex(sScratch->particleSystemId, particleIndex);
PxU32 contactStartIndex = atomicAdd(writer.numTotalContacts, remaining);
PxU32 contactEndIndex = contactStartIndex + remaining;
for (PxU32 contactIndex = contactStartIndex, src = 0; contactIndex < contactEndIndex; contactIndex++, src++)
{
float4 contact = sPerParticleTempContacts[src];
writer.writeContact(contactIndex, PxVec4(contact.x, contact.y, contact.z, contact.w), compressedParticleIndex, sScratch->rigidId);
}
}
}
extern "C" __global__ void ps_heightfieldCollisonLaunch(
const bool isTGS,
const PxReal toleranceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxReal* PX_RESTRICT contactDistance,
const PxReal* PX_RESTRICT restDistances,
const PxgShape* PX_RESTRICT gpuShapes,
PxgParticleSystem* PX_RESTRICT particleSystems,
PxNodeIndex* PX_RESTRICT shapeToRigidRemapTable,
PxgParticleContactWriter writer
)
{
__shared__ __align__(16) PxU8 sData[sizeof(PsHeightfieldScratch)];
PsHeightfieldScratch& sHeightfieldScratch = reinterpret_cast<PsHeightfieldScratch&>(sData);
__shared__ float4 sTempContacts[PxgParticleSystemKernelBlockDim::PS_HEIGHTFIELD_COLLISION][PxgParticleContactInfo::MaxStaticContactsPerMesh];
unsigned int cmIdx = blockIdx.y;
const PxU32 warpIndex = threadIdx.x / WARP_SIZE;
const bool isDiffuseParticlesThread = blockIdx.z == 1;
if (warpIndex == 0)
{
const PxU32 threadIndexInWarp = threadIdx.x & (WARP_SIZE - 1);
PxgShape particleShape, heightfieldShape;
PxU32 particleCacheRef, heightfieldCacheRef;
LoadShapePairWarp<PxGeometryType::ePARTICLESYSTEM, PxGeometryType::eHEIGHTFIELD>(cmInputs, cmIdx, gpuShapes,
particleShape, particleCacheRef, heightfieldShape, heightfieldCacheRef);
PxsCachedTransform heightfieldTransformCache;
PxsCachedTransform_ReadWarp(heightfieldTransformCache, transformCache + heightfieldCacheRef);
const PxReal contactDist = contactDistance[particleCacheRef] + contactDistance[heightfieldCacheRef];
const PxNodeIndex rigidId = shapeToRigidRemapTable[heightfieldCacheRef];
const PxU32 particleSystemId = particleShape.particleOrSoftbodyId;
PxgParticleSystem& particleSystem = particleSystems[particleSystemId];
if (threadIndexInWarp == 0)
{
sHeightfieldScratch.heightfieldTransform = heightfieldTransformCache.transform;
sHeightfieldScratch.heightfieldShape = heightfieldShape;
sHeightfieldScratch.rigidId = rigidId;
sHeightfieldScratch.transformCacheRef = heightfieldCacheRef;
sHeightfieldScratch.contactDist = contactDist;
sHeightfieldScratch.restDist = restDistances[cmIdx];
sHeightfieldScratch.enableCCD = (particleSystem.mData.mFlags & PxParticleFlag::eENABLE_SPECULATIVE_CCD) > 0;
if (isDiffuseParticlesThread)
{
// If there is no diffuse particles or is not using PBD, disactivate collision handling from this kernel
const bool isDiffuseParticlesActive = (particleSystem.mCommonData.mMaxDiffuseParticles > 0);
sHeightfieldScratch.currentPositions = particleSystem.mDiffuseSortedOriginPos_LifeTime;
sHeightfieldScratch.predictedPositions = particleSystem.mDiffuseSortedPos_LifeTime;
sHeightfieldScratch.numParticles = (isDiffuseParticlesActive) ? *particleSystem.mNumDiffuseParticles : 0;
sHeightfieldScratch.oneWayContactInfos = particleSystem.mDiffuseOneWayContactInfos;
sHeightfieldScratch.oneWayContactNodeIndices = particleSystem.mDiffuseOneWayNodeIndex;
sHeightfieldScratch.oneWayContactCounts = particleSystem.mDiffuseOneWayContactCount;
sHeightfieldScratch.particleSystemId = particleSystemId;
}
else
{
sHeightfieldScratch.currentPositions = particleSystem.mSortedOriginPos_InvMass;
sHeightfieldScratch.predictedPositions = particleSystem.mSortedPositions_InvMass;
sHeightfieldScratch.numParticles = particleSystem.mCommonData.mNumParticles;
sHeightfieldScratch.oneWayContactInfos = particleSystem.mOneWayContactInfos;
sHeightfieldScratch.oneWayContactNodeIndices = particleSystem.mOneWayNodeIndex;
sHeightfieldScratch.oneWayContactCounts = particleSystem.mOneWayContactCount;
sHeightfieldScratch.particleSystemId = particleSystemId;
}
}
}
__syncthreads();
const PxU32 numParticles = sHeightfieldScratch.numParticles;
if (isDiffuseParticlesThread && numParticles == 0)
return;
for (PxU32 globalThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; globalThreadIndex < numParticles; globalThreadIndex += gridDim.x * blockDim.x)
{
psHeightfieldMidphaseCollision(
&sHeightfieldScratch,
sTempContacts[threadIdx.x],
numParticles,
globalThreadIndex,
isDiffuseParticlesThread,
isTGS,
writer
);
}
}

View File

@@ -0,0 +1,805 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include <stdio.h>
#include <stdint.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include "GuIntersectionTriangleBoxRef.h"
#include "foundation/PxVec3.h"
#include "foundation/PxQuat.h"
#include "foundation/PxTransform.h"
#include "geometry/PxMeshScale.h"
#include "geometry/PxGeometry.h"
#include "cudaNpCommon.h"
#include "PxgPersistentContactManifold.h"
#include "PxgContactManager.h"
#include "PxgConvexConvexShape.h"
#include "PxsContactManagerState.h"
#include "PxsTransformCache.h"
#include "PxgParticleSystem.h"
#include "PxgParticleSystemCore.h"
#include "PxgParticleSystemCoreKernelIndices.h"
#include "PxgNpKernelIndices.h"
#include "PxsMaterialCore.h"
#include "PxgCommonDefines.h"
#include "utils.cuh"
#include "deformableElementFilter.cuh"
#include "PxNodeIndex.h"
#include "sdfCollision.cuh"
#include "gridCal.cuh"
using namespace physx;
extern "C" __host__ void initNarrowphaseKernels11() {}
#include "warpHelpers.cuh"
#include "manifold.cuh"
#include "vector_functions.h"
#include "reduction.cuh"
#include "PxgSimulationCoreDesc.h"
#include "bv32Traversal.cuh"
#include "triangleMesh.cuh"
#include "particleCollision.cuh"
PX_ALIGN_PREFIX(16)
struct PsMidphaseScratch
{
const float4* PX_RESTRICT trimeshVerts;
const uint4* PX_RESTRICT trimeshTriIndices;
PxTransform meshToWorld;
PxMeshScale trimeshScale;
//bv32 tree
const Gu::BV32DataPacked* bv32PackedNodes;
//stack for traversal
int sBv32Nodes[192]; //6 depth of the bv32 tree
PxU32 sBv32ActiveParticles[192];
PxVec3 shLocalContacts[32][PxgParticleContactInfo::MaxStaticContactsPerMesh];
PxReal shLocalContactDepths[32][PxgParticleContactInfo::MaxStaticContactsPerMesh];
float4* currentPositions;
float4* predictedPositions;
bool enableCCD;
PxU8 pad[7];
}PX_ALIGN_SUFFIX(16);
struct PsTreeContactGenTraverser
{
PsMidphaseScratch* s_warpScratch;
const PxU32 globalParticleIndex;
const PxU32 numParticles;
PxVec3 cVolumePosVertexSpace;
PxReal cVolumeRadius;
PxVec3 cVolumePosShapeSpace;
PxVec3 particlePosShapeSpace;
PxU32 currentNodeActiveParticles;
PxU32 contactCount;
PX_FORCE_INLINE __device__ PsTreeContactGenTraverser(
PsMidphaseScratch* s_warpScratch,
const PxU32 numParticles,
const PxU32 globalParticleIndex,
const bool isDiffuse,
const bool isTGS,
const PxReal contactDist
)
: s_warpScratch(s_warpScratch)
, numParticles(numParticles)
, globalParticleIndex(globalParticleIndex)
, contactCount(0)
{
if (globalParticleIndex < numParticles)
{
const PxMeshScale trimeshScale = s_warpScratch->trimeshScale;
const PxTransform meshToWorld = s_warpScratch->meshToWorld;
const PxVec3 currentPos = PxLoad3(s_warpScratch->currentPositions[globalParticleIndex]);
particlePosShapeSpace = meshToWorld.transformInv(currentPos);
if (s_warpScratch->enableCCD)
{
const PxVec3 predictedPos = PxLoad3(s_warpScratch->predictedPositions[globalParticleIndex]);
PxVec3 cVolumePos;
cVolumeRadius = getParticleSpeculativeContactVolume(cVolumePos, currentPos, predictedPos, contactDist, isDiffuse, isTGS);
cVolumePosShapeSpace = meshToWorld.transformInv(cVolumePos);
}
else
{
cVolumePosShapeSpace = particlePosShapeSpace;
cVolumeRadius = contactDist;
}
cVolumePosVertexSpace = shape2Vertex(cVolumePosShapeSpace, trimeshScale.scale, trimeshScale.rotation);
}
//intiailize all valid particles as active particle
currentNodeActiveParticles = __ballot_sync(FULL_MASK, globalParticleIndex < numParticles);
}
PX_FORCE_INLINE __device__ bool intersectBoxFullWarp(bool hasBox, const PxVec3& cMin, const PxVec3& cMax) const
{
const PxMeshScale trimeshScale = s_warpScratch->trimeshScale;
PxVec3 sCVolumePosVertexSpace;
PxReal sCVolumeRadius;
PxU32 newActiveParticles = 0;
for (PxU32 i = currentNodeActiveParticles; i; i = clearLowestSetBit(i))
{
const PxU32 pIndexInWarp = (i == 0) ? 0 : lowestSetIndex(i);
sCVolumePosVertexSpace.x = __shfl_sync(FULL_MASK, cVolumePosVertexSpace.x, pIndexInWarp);
sCVolumePosVertexSpace.y = __shfl_sync(FULL_MASK, cVolumePosVertexSpace.y, pIndexInWarp);
sCVolumePosVertexSpace.z = __shfl_sync(FULL_MASK, cVolumePosVertexSpace.z, pIndexInWarp);
sCVolumeRadius = __shfl_sync(FULL_MASK, cVolumeRadius, pIndexInWarp);
if (hasBox)
{
const PxVec3 closestVertexSpace = cMin.maximum(cMax.minimum(sCVolumePosVertexSpace));
const PxVec3 distVecVertexSpace = closestVertexSpace - sCVolumePosVertexSpace;
const PxVec3 distVec = vertex2Shape(distVecVertexSpace, trimeshScale.scale, trimeshScale.rotation);
const PxReal distSq = distVec.magnitudeSquared();
if (distSq < sCVolumeRadius*sCVolumeRadius)
{
newActiveParticles |= 1 << pIndexInWarp;
}
}
}
return newActiveParticles != 0;
}
PX_FORCE_INLINE __device__ void intersectPrimitiveFullWarp(PxU32 primitiveIndex, PxU32 idxInWarp)
{
const PxMeshScale trimeshScale = s_warpScratch->trimeshScale;
const PxTransform meshToWorld = s_warpScratch->meshToWorld;
PxVec3 sParticlePosShapeSpace;
PxVec3 sCVolumePosShapeSpace;
PxReal sCVolumeRadius;
for (PxU32 i = currentNodeActiveParticles; i; i = clearLowestSetBit(i))
{
const PxU32 pIndexInWarp = (i == 0) ? 0 : lowestSetIndex(i);
sParticlePosShapeSpace.x = __shfl_sync(FULL_MASK, particlePosShapeSpace.x, pIndexInWarp);
sParticlePosShapeSpace.y = __shfl_sync(FULL_MASK, particlePosShapeSpace.y, pIndexInWarp);
sParticlePosShapeSpace.z = __shfl_sync(FULL_MASK, particlePosShapeSpace.z, pIndexInWarp);
sCVolumePosShapeSpace.x = __shfl_sync(FULL_MASK, cVolumePosShapeSpace.x, pIndexInWarp);
sCVolumePosShapeSpace.y = __shfl_sync(FULL_MASK, cVolumePosShapeSpace.y, pIndexInWarp);
sCVolumePosShapeSpace.z = __shfl_sync(FULL_MASK, cVolumePosShapeSpace.z, pIndexInWarp);
sCVolumeRadius = __shfl_sync(FULL_MASK, cVolumeRadius, pIndexInWarp);
bool intersect = false;
PxVec3 normal(0.f);
PxReal distance = PX_MAX_F32;
PxU32 triangleIdx = 0xffffffff;
if (primitiveIndex != 0xFFFFFFFF)
{
triangleIdx = primitiveIndex;
uint4 triIdx = s_warpScratch->trimeshTriIndices[triangleIdx];
PxVec3 triV0, triV1, triV2;
triV0 = vertex2Shape(PxLoad3(s_warpScratch->trimeshVerts[triIdx.x]), trimeshScale.scale, trimeshScale.rotation);
triV1 = vertex2Shape(PxLoad3(s_warpScratch->trimeshVerts[triIdx.y]), trimeshScale.scale, trimeshScale.rotation);
triV2 = vertex2Shape(PxLoad3(s_warpScratch->trimeshVerts[triIdx.z]), trimeshScale.scale, trimeshScale.rotation);
MeshScaling meshScale(s_warpScratch->trimeshScale.scale, s_warpScratch->trimeshScale.rotation);
if (meshScale.flipNormal)
PxSwap(triV1, triV2);
PxVec3 normalShapeSpace;
intersect = particleTriangleTest(normalShapeSpace, distance,
sParticlePosShapeSpace, sCVolumePosShapeSpace, sCVolumeRadius, triV0, triV1, triV2, s_warpScratch->enableCCD);
if (intersect)
{
normal = meshToWorld.rotate(normalShapeSpace);
}
}
//push the triangle into stack ptr
PxU32 resultWarp = __ballot_sync(FULL_MASK, intersect);
PxU32 offset = warpScanExclusive(resultWarp, idxInWarp);
PxU32 validCount = __popc(resultWarp);
//However many contacts the particle already had...
PxU32 contactStartIndex = __shfl_sync(FULL_MASK, contactCount, pIndexInWarp);
if (idxInWarp == pIndexInWarp)
contactCount += validCount;
bool failedWrite = false;
if (intersect)
{
const PxU32 index = contactStartIndex + offset;
if (index < PxgParticleContactInfo::MaxStaticContactsPerMesh)
{
//If we have space for this contact (otherwise, we drop it!)
s_warpScratch->shLocalContacts[pIndexInWarp][index] = normal;
s_warpScratch->shLocalContactDepths[pIndexInWarp][index] = distance;
}
else
failedWrite = true;
}
PxU32 failMask = __ballot_sync(FULL_MASK, failedWrite);
if (failMask)
{
PxReal outSep = -PX_MAX_F32;
if (idxInWarp < PxgParticleContactInfo::MaxStaticContactsPerMesh)
outSep = s_warpScratch->shLocalContactDepths[pIndexInWarp][idxInWarp];
for (PxU32 i = failMask; i != 0; i = clearLowestSetBit(i))
{
PxU32 index = lowestSetIndex(i);
PxReal depth = __shfl_sync(FULL_MASK, distance, index);
PxU32 candidateSwapMask = __ballot_sync(FULL_MASK, depth < outSep);
if (candidateSwapMask)
{
PxReal bestVal;
PxU32 bestIndex = maxIndex(outSep, FULL_MASK, bestVal);
if (idxInWarp == index)
{
s_warpScratch->shLocalContacts[pIndexInWarp][bestIndex] = normal;
s_warpScratch->shLocalContactDepths[pIndexInWarp][bestIndex] = distance;
}
if (idxInWarp == bestIndex)
outSep = depth;
}
}
if (idxInWarp == pIndexInWarp)
contactCount = PxgParticleContactInfo::MaxStaticContactsPerMesh;
}
}
}
PX_FORCE_INLINE __device__ void finalizeFullWarp(
PxU32 idxInWarp,
const PxU32 particleSystemId,
const PxNodeIndex rigidId,
PxgParticleContactInfo* PX_RESTRICT oneWayContactInfos, //output
PxNodeIndex* PX_RESTRICT oneWayContactNodeIndices, //output
PxU32* PX_RESTRICT oneWayContactCounts, //output
PxgParticleContactWriter& writer,
const bool isDiffuseParticlesThread,
const PxReal restDist
)
{
//__syncwarp();
PxU32 id = 0;
if (globalParticleIndex < numParticles)
{
if (rigidId.isStaticBody())
{
if (contactCount != 0)
{
PxU32 startIndex = PxU32(atomicAdd((PxI32*)&oneWayContactCounts[globalParticleIndex], PxI32(contactCount)));
PxU32 endIndex = PxMin(PxgParticleContactInfo::MaxStaticContactsPerParticle, startIndex + contactCount);
for (PxU32 i = startIndex; i < endIndex; i++, id++)
{
const PxVec3& normal = s_warpScratch->shLocalContacts[threadIdx.x][id];
PxReal penetration = s_warpScratch->shLocalContactDepths[threadIdx.x][id] - restDist;
const PxU32 outputIndex = globalParticleIndex + i * numParticles;
oneWayContactInfos[outputIndex].mNormal_PenW = make_float4(normal.x, normal.y, normal.z, penetration);
oneWayContactNodeIndices[outputIndex] = rigidId;
}
}
}
}
PxU32 remaining = contactCount - id;
if (__any_sync(FULL_MASK, remaining))
{
PxU32 count = warpScan<AddOpPxU32, PxU32>(FULL_MASK, remaining);
PxU32 startIndex = 0xFFFFFFFF;
if (idxInWarp == 31)
{
startIndex = atomicAdd(writer.numTotalContacts, count);
}
startIndex = __shfl_sync(FULL_MASK, startIndex, 31);
if (globalParticleIndex < numParticles)
{
startIndex = startIndex + count - remaining;
if (remaining && !isDiffuseParticlesThread)
{
PxU64 compressedParticleIndex = PxEncodeParticleIndex(particleSystemId, globalParticleIndex);
for (PxU32 i = id, index = startIndex; i < contactCount; ++i, ++index)
{
const PxVec3& normal = s_warpScratch->shLocalContacts[idxInWarp][i];
PxReal penetration = s_warpScratch->shLocalContactDepths[idxInWarp][i] - restDist;
writer.writeContact(index, PxVec4(normal, penetration), compressedParticleIndex, rigidId);
}
}
}
}
}
};
__device__ static inline void psSdfMeshMidphaseCollision(
const PxReal toleranceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxReal* PX_RESTRICT contactDistance,
const PxReal* PX_RESTRICT restDistances,
const PxgShape* PX_RESTRICT gpuShapes,
PxgParticleSystem* particleSystems,
PxNodeIndex* PX_RESTRICT shapeToRigidRemapTable,
PxgParticleContactWriter& writer
)
{
__shared__ __align__(16) char sMesh[sizeof(PxgTriangleMesh)];
PxgTriangleMesh& triangleMesh = reinterpret_cast<PxgTriangleMesh&>(*sMesh);
__shared__ __align__(16) char sSdfTexture[sizeof(SparseSDFTexture)];
SparseSDFTexture& sdfTexture = reinterpret_cast<SparseSDFTexture&>(*sSdfTexture);
__shared__ PxU32 triangleIndicesS[1024];
unsigned int cmIdx = blockIdx.x;
const bool isDiffuseParticlesThread = blockIdx.y == 1;
PxgShape trimeshShape, shape;
PxU32 trimeshTransformId, particleTransformId;
LoadShapePair<PxGeometryType::eTRIANGLEMESH, PxGeometryType::ePARTICLESYSTEM>(cmInputs, cmIdx, gpuShapes,
trimeshShape, trimeshTransformId, shape, particleTransformId);
PxsCachedTransform trimeshTransformCached;
PxsCachedTransform_ReadWarp(trimeshTransformCached, transformCache + trimeshTransformId);
const PxReal contactDist = contactDistance[trimeshTransformId] + contactDistance[particleTransformId];
const PxReal restDist = restDistances[cmIdx];
const PxNodeIndex rigidId = shapeToRigidRemapTable[trimeshTransformId];
const PxU32 particleSystemId = shape.particleOrSoftbodyId;
PxgParticleSystem& particleSystem = particleSystems[particleSystemId];
// If there is no diffuse particles or is not using PBD, skip this function
if (isDiffuseParticlesThread && (particleSystem.mCommonData.mMaxDiffuseParticles == 0))
return;
if (threadIdx.x < 32)
{
readTriangleMesh(trimeshShape, triangleMesh);
__syncwarp();
}
float4* positions;
float4* positionsPrev;
if (isDiffuseParticlesThread)
{
positions = reinterpret_cast<float4*>(particleSystem.mDiffuseSortedPos_LifeTime);
positionsPrev = reinterpret_cast<float4*>(particleSystem.mDiffuseSortedOriginPos_LifeTime);
}
else
{
positions = reinterpret_cast<float4*>(particleSystem.mSortedPositions_InvMass);
positionsPrev = reinterpret_cast<float4*>(particleSystem.mSortedOriginPos_InvMass);
}
if (threadIdx.x == 0)
sdfTexture.initialize(triangleMesh); //The texture is stored in shared memory - only one threads needs to initialize it
__syncthreads();
PxU32 numParticles;
PxgParticleContactInfo* PX_RESTRICT oneWayContactInfos;
PxNodeIndex* PX_RESTRICT oneWayNodeIndices;
PxU32* PX_RESTRICT oneWayContactCounts;
if (isDiffuseParticlesThread)
{
numParticles = *particleSystem.mNumDiffuseParticles;
oneWayContactInfos = particleSystem.mDiffuseOneWayContactInfos;
oneWayNodeIndices = particleSystem.mDiffuseOneWayNodeIndex;
oneWayContactCounts = particleSystem.mDiffuseOneWayContactCount;
}
else
{
numParticles = particleSystem.mCommonData.mNumParticles;
oneWayContactInfos = particleSystem.mOneWayContactInfos;
oneWayNodeIndices = particleSystem.mOneWayNodeIndex;
oneWayContactCounts = particleSystem.mOneWayContactCount;
}
const PxU32* PX_RESTRICT gridParticleIndex = particleSystem.mSortedToUnsortedMapping;
PxReal cullScale = contactDist;
cullScale /= trimeshShape.scale.scale.abs().minElement();
const PxTransform& trimeshToWorld = trimeshTransformCached.transform;
const PxTransform particleToTrimeshTransform = trimeshToWorld.getInverse();
bool enableCCD = (particleSystem.mData.mFlags & PxParticleFlag::eENABLE_SPECULATIVE_CCD) > 0;
for (PxU32 i = 0; i < numParticles;)
{
PxU32 nbFoundVertices = findInterestingVertices<32, 1024>(numParticles, positions,
trimeshShape.scale, cullScale, sdfTexture, particleToTrimeshTransform, i, triangleIndicesS);
bool candidateContact = false;
PxVec3 pos;
PxVec3 normal;
PxReal distance;
PxReal finalSep;
PxVec3 posPrev;
PxReal startDist; //distPrev
PxU32 ind = threadIdx.x < nbFoundVertices ? triangleIndicesS[threadIdx.x] : numParticles;
if (ind < numParticles)
{
pos = particleToTrimeshTransform.transform(PxLoad3(positions[ind]));
PxVec3 v = shape2Vertex(pos, trimeshShape.scale.scale, trimeshShape.scale.rotation);
posPrev = particleToTrimeshTransform.transform(PxLoad3(positionsPrev[ind]));
PxVec3 dirPrev;
startDist = doVertexSDFCollision(sdfTexture, shape2Vertex(posPrev, trimeshShape.scale.scale, trimeshShape.scale.rotation), dirPrev, cullScale);
PxVec3 dir;
distance = doVertexSDFCollision(sdfTexture, v, dir, cullScale);
if (distance < cullScale || startDist < cullScale)
{
dir = vertex2ShapeNormalVector(dir, trimeshShape.scale.scale, trimeshShape.scale.rotation);
dirPrev = vertex2ShapeNormalVector(dirPrev, trimeshShape.scale.scale, trimeshShape.scale.rotation);
PxReal m = dir.magnitudeSquared();
if (m > 0.0f)
{
m = 1.0f / PxSqrt(m);
distance = distance * m;
startDist = startDist * m;
dir = dir * m;
dirPrev = dirPrev * m;
}
/*candidateContact = distance < contactDist;
if (candidateContact)
{
sep = sep;
normal = dir;
}*/
if (startDist < contactDist)
{
normal = dirPrev;
finalSep = startDist - restDist;
candidateContact = true;
}
if (!candidateContact && distance > 0.f)
{
if (distance < contactDist)
{
normal = dir;
finalSep = (distance - restDist) + normal.dot(posPrev - pos);
candidateContact = true;
}
}
if (enableCCD && !candidateContact)
{
if (distance < 0.f)
{
PxVec3 d = (pos - posPrev).getNormalized();
PxReal displacement = -PxMax(0.f, startDist - restDist) / (dir.dot(d));
PxVec3 pt = posPrev + d * displacement;
distance = doVertexSDFCollision(sdfTexture, shape2Vertex(particleToTrimeshTransform.transform(pt), trimeshShape.scale.scale, trimeshShape.scale.rotation), dir, cullScale);
dir = vertex2ShapeNormalVector(dir, trimeshShape.scale.scale, trimeshShape.scale.rotation);
if (m > 0.0f)
{
distance = distance * m;
dir = dir * m;
}
if (distance < contactDist)
{
normal = dir;
finalSep = (distance - restDist) + normal.dot(posPrev - pt);
candidateContact = true;
}
}
}
}
const PxU64 particleMask = PxEncodeParticleIndex(particleSystemId, gridParticleIndex[ind]);
if (!isDiffuseParticlesThread && find(particleSystem, rigidId.getInd(), particleMask))
candidateContact = false;
}
if (rigidId.isStaticBody())
{
if (candidateContact)
{
PxU32 startIndex = PxU32(atomicAdd((PxI32*)&oneWayContactCounts[ind], 1));
const PxU32 outputIndex = ind + (startIndex) * numParticles;
if (startIndex < PxgParticleContactInfo::MaxStaticContactsPerParticle)
{
oneWayContactInfos[outputIndex].mNormal_PenW = make_float4(normal.x, normal.y, normal.z, finalSep/*sep - restDist + normal.dot(posPrev - pos)*/);
oneWayNodeIndices[outputIndex] = rigidId;
candidateContact = false; //Contact is stored, so it is not a candidate contact anymore
}
}
}
PxU32 contactIndex = globalScanExclusive<32>(candidateContact, writer.numTotalContacts);
if (candidateContact && !isDiffuseParticlesThread)
{
normal = trimeshToWorld.rotate(normal);
PxVec4 normalPen(normal.x, normal.y, normal.z, finalSep/*sep - restDist + normal.dot(posPrev - pos)*/);
writer.writeContact(contactIndex, normalPen,
PxEncodeParticleIndex(particleSystemId, ind), rigidId);
}
}
}
template<unsigned int WarpsPerBlock>
__device__ static inline void psMeshMidphaseCollision(
const bool isTGS,
const PxReal toleranceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxReal* PX_RESTRICT contactDistance,
const PxReal* PX_RESTRICT restDistances,
const PxgShape* PX_RESTRICT gpuShapes,
PsMidphaseScratch* s_warpScratch,
PxgParticleSystem* particleSystems,
PxNodeIndex* PX_RESTRICT shapeToRigidRemapTable,
PxgParticleContactWriter& writer
)
{
//thread index in warp
const unsigned int idxInWarp = threadIdx.x;
//wrap index
const unsigned int warpIdx = threadIdx.y;//(idx >> LOG2_WARP_SIZE);
//wrap index in block
//const unsigned int idx = idxInWarp + warpIdx * WARP_SIZE;
unsigned int cmIdx = blockIdx.y;
const bool isDiffuseParticlesThread = blockIdx.z == 1;
PxgShape trimeshShape, shape;
PxU32 trimeshTransformId, particleTransformId;
LoadShapePair<PxGeometryType::eTRIANGLEMESH, PxGeometryType::ePARTICLESYSTEM>(cmInputs, cmIdx, gpuShapes,
trimeshShape, trimeshTransformId, shape, particleTransformId);
PxsCachedTransform trimeshTransformCached;
PxsCachedTransform_ReadWarp(trimeshTransformCached, transformCache + trimeshTransformId);
const PxReal contactDist = contactDistance[trimeshTransformId] + contactDistance[particleTransformId];
const PxReal restDist = restDistances[cmIdx];
const PxNodeIndex rigidId = shapeToRigidRemapTable[trimeshTransformId];
const PxU32 particleSystemId = shape.particleOrSoftbodyId;
PxgParticleSystem& particleSystem = particleSystems[particleSystemId];
// If there is no diffuse particles or is not using PBD, skip this function
if (isDiffuseParticlesThread && (particleSystem.mCommonData.mMaxDiffuseParticles == 0))
return;
const PxU8 * trimeshGeomPtr = reinterpret_cast<const PxU8 *>(trimeshShape.hullOrMeshPtr);
if (idxInWarp == 0)
{
s_warpScratch->meshToWorld = trimeshTransformCached.transform;
s_warpScratch->trimeshScale = trimeshShape.scale;
readTriangleMesh(trimeshGeomPtr, s_warpScratch->bv32PackedNodes, s_warpScratch->trimeshVerts, s_warpScratch->trimeshTriIndices);
if (isDiffuseParticlesThread)
{
s_warpScratch->currentPositions = reinterpret_cast<float4*>(particleSystem.mDiffuseSortedOriginPos_LifeTime);
s_warpScratch->predictedPositions = reinterpret_cast<float4*>(particleSystem.mDiffuseSortedPos_LifeTime);
}
else
{
s_warpScratch->currentPositions = reinterpret_cast<float4*>(particleSystem.mSortedOriginPos_InvMass);
s_warpScratch->predictedPositions = reinterpret_cast<float4*>(particleSystem.mSortedPositions_InvMass);
}
s_warpScratch->enableCCD = (particleSystem.mData.mFlags & PxParticleFlag::eENABLE_SPECULATIVE_CCD) > 0;
}
__syncwarp();
PxU32 numParticles;
PxgParticleContactInfo* PX_RESTRICT oneWayContactInfos;
PxNodeIndex* PX_RESTRICT oneWayNodeIndices;
PxU32* PX_RESTRICT oneWayContactCounts;
if (isDiffuseParticlesThread)
{
numParticles = *particleSystem.mNumDiffuseParticles;
oneWayContactInfos = particleSystem.mDiffuseOneWayContactInfos;
oneWayNodeIndices = particleSystem.mDiffuseOneWayNodeIndex;
oneWayContactCounts = particleSystem.mDiffuseOneWayContactCount;
}
else
{
numParticles = particleSystem.mCommonData.mNumParticles;
oneWayContactInfos = particleSystem.mOneWayContactInfos;
oneWayNodeIndices = particleSystem.mOneWayNodeIndex;
oneWayContactCounts = particleSystem.mOneWayContactCount;
}
const PxU32 nbWarpsRequired = (numParticles + WARP_SIZE - 1) / WARP_SIZE;
const PxU32 totalNumWarps = PxgParticleSystemKernelGridDim::PS_MESH_COLLISION * PS_MIDPHASE_COLLISION_WAPRS_PER_BLOCK;
const PxU32 nbIterationsPerWarps = (nbWarpsRequired + totalNumWarps - 1) / totalNumWarps;
const PxU32* PX_RESTRICT gridParticleIndex = particleSystem.mSortedToUnsortedMapping;
for (PxU32 i = 0; i < nbIterationsPerWarps; ++i)
{
const PxU32 workIndex = i + (warpIdx + WarpsPerBlock * blockIdx.x) * nbIterationsPerWarps;
if (workIndex < nbWarpsRequired)
{
const PxU32 particleIndex = idxInWarp + workIndex * WARP_SIZE;
const PxU64 particleMask = PxEncodeParticleIndex(particleSystemId, gridParticleIndex[particleIndex]);
if (!isDiffuseParticlesThread && find(particleSystem, rigidId.getInd(), particleMask))
return;
PsTreeContactGenTraverser traverser(
s_warpScratch,
numParticles,
particleIndex,
isDiffuseParticlesThread,
isTGS,
contactDist
);
bv32TreeTraversal<PsTreeContactGenTraverser, WarpsPerBlock>(s_warpScratch->bv32PackedNodes, s_warpScratch->sBv32Nodes, traverser);
traverser.finalizeFullWarp(idxInWarp, particleSystemId,
rigidId,
oneWayContactInfos,
oneWayNodeIndices,
oneWayContactCounts,
writer,
isDiffuseParticlesThread,
restDist
);
}
}
}
extern "C" __global__
__launch_bounds__(1024, 1)
void ps_sdfMeshCollisonLaunch(
const PxReal tolerenceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxReal* PX_RESTRICT contactDistance,
const PxReal* PX_RESTRICT restDistances,
const PxgShape* PX_RESTRICT gpuShapes,
PxgParticleSystem* PX_RESTRICT particleSystems, //output
PxNodeIndex* PX_RESTRICT shapeToRigidRemapTable,
PxgParticleContactWriter writer
)
{
psSdfMeshMidphaseCollision(
tolerenceLength,
cmInputs,
transformCache,
contactDistance,
restDistances,
gpuShapes,
particleSystems,
shapeToRigidRemapTable,
writer
);
}
extern "C" __global__
//__launch_bounds__(MIDPHASE_WARPS_PER_BLOCK * WARP_SIZE, 4)
void ps_meshCollisonLaunch(
const bool isTGS,
const PxReal tolerenceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxReal* PX_RESTRICT contactDistance,
const PxReal* PX_RESTRICT restDistances,
const PxgShape* PX_RESTRICT gpuShapes,
PxgParticleSystem* PX_RESTRICT particleSystems, //output
PxNodeIndex* PX_RESTRICT shapeToRigidRemapTable,
PxgParticleContactWriter writer
)
{
__shared__ __align__(16) PxU8 scratchMem[PS_MIDPHASE_COLLISION_WAPRS_PER_BLOCK][sizeof(PsMidphaseScratch)];
psMeshMidphaseCollision<PS_MIDPHASE_COLLISION_WAPRS_PER_BLOCK>(
isTGS,
tolerenceLength,
cmInputs,
transformCache,
contactDistance,
restDistances,
gpuShapes,
(PsMidphaseScratch*)&scratchMem[threadIdx.y],
particleSystems,
shapeToRigidRemapTable,
writer
);
}

View File

@@ -0,0 +1,765 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __SDF_COLLISION_CUH__
#define __SDF_COLLISION_CUH__
#include "cudaNpCommon.h"
#include "foundation/PxVec3.h"
#include "geometry/PxMeshScale.h"
#include "GuDistancePointTetrahedron.h"
#include "GuTriangleRefinement.h"
#include "triangleMesh.cuh"
#include "utils.cuh"
namespace physx
{
struct PX_ALIGN_PREFIX(16) SparseSDFTexture
{
//Dense info - for coarse resolution background grid
PxVec3 sdfBoxLower;
PxReal sdfDx;
PxVec3 sdfBoxHigher;
PxReal invSdfDx;
CUtexObject texture;
//Sparse info - for high resolution in narrow band around isolevel 0
CUtexObject textureSubgrids;
uint3 coarseSize;
PxU32 subgridSize;
const PxU32* subgridStartSlots;
PxReal fineToCoarse; //Inverse of subgridSize
uint4 sdfDims;
PxReal subgridsMinSdfValue;
PxReal subgridsSdfValueRange;
PX_INLINE __device__ void initialize(const PxgTriangleMesh& mesh)
{
texture = mesh.mTexObject;
const PxReal margin = mesh.subgridSize == 0 ? 0.5f : 0.0f;
sdfBoxLower = mesh.meshLower + margin * PxVec3(mesh.spacing);
sdfBoxHigher = mesh.meshLower + PxVec3((mesh.sdfDims.x - margin) * mesh.spacing, (mesh.sdfDims.y - margin) * mesh.spacing, (mesh.sdfDims.z - margin) * mesh.spacing);
invSdfDx = 1.f / mesh.spacing;
sdfDx = mesh.spacing;
textureSubgrids = mesh.mTexObjectSparse;
subgridStartSlots = mesh.subgridStarts;
sdfDims = mesh.sdfDims;
coarseSize.x = mesh.sdfDims.x / PxMax(1u, mesh.subgridSize);
coarseSize.y = mesh.sdfDims.y / PxMax(1u, mesh.subgridSize);
coarseSize.z = mesh.sdfDims.z / PxMax(1u, mesh.subgridSize);
//sdfDx = 1.0f / invSdfDx; // dx;
subgridSize = mesh.subgridSize;
fineToCoarse = 1.0f / mesh.subgridSize;
subgridsMinSdfValue = mesh.subgridsMinSdfValue;
subgridsSdfValueRange = mesh.subgridsMaxSdfValue - mesh.subgridsMinSdfValue;
//if (threadIdx.x == 0)
// printf("%f, %f\n", mesh.subgridsMinSdfValue, mesh.subgridsMaxSdfValue);
}
private:
static PX_INLINE __device__ PxU32 idx(PxU32 x, PxU32 y, PxU32 z, PxU32 width, PxU32 height)
{
return z * width * height + y * width + x;
}
static PX_INLINE __device__ void applySubgridStart(PxU32 triple, PxVec3& f, const PxU32 subgridSize)
{
f.x += (triple & 1023) * (subgridSize + 1);
triple = triple >> 10;
f.y += (triple & 1023) * (subgridSize + 1);
triple = triple >> 10;
f.z += (triple & 1023) * (subgridSize + 1);
}
PX_INLINE __device__ PxReal applySubgridSdfScale(PxReal rawSdfValue) const
{
return rawSdfValue * subgridsSdfValueRange + subgridsMinSdfValue;
}
public:
PX_INLINE __device__ PxReal Sample(PxVec3 f) const
{
if (subgridSize == 0)
{
//Dense sdf
return tex3D<float>(texture, f.x + 0.5f, f.y + 0.5f, f.z + 0.5f);
}
else
{
const PxU32 xBase = PxClamp(PxU32(f.x * fineToCoarse), 0u, coarseSize.x - 1);
const PxU32 yBase = PxClamp(PxU32(f.y * fineToCoarse), 0u, coarseSize.y - 1);
const PxU32 zBase = PxClamp(PxU32(f.z * fineToCoarse), 0u, coarseSize.z - 1);
const PxU32 i = idx(xBase, yBase, zBase, coarseSize.x, coarseSize.y);
const PxU32 startSlot = subgridStartSlots[i];
if (startSlot == 0xFFFFFFFF)
{
//Evaluate coarse
f *= fineToCoarse;
//+0.5: Align with texel center
//https://forums.developer.nvidia.com/t/understanding-cuda-texture-2d-linear-interpolation/213924
return tex3D<float>(texture, f.x + 0.5f, f.y + 0.5f, f.z + 0.5f);
}
else
{
f.x = PxClamp(f.x - xBase * subgridSize, 0.0f, PxReal(subgridSize + 1));
f.y = PxClamp(f.y - yBase * subgridSize, 0.0f, PxReal(subgridSize + 1));
f.z = PxClamp(f.z - zBase * subgridSize, 0.0f, PxReal(subgridSize + 1));
//Transform to subgrid storage location in 3D texture
applySubgridStart(startSlot, f, subgridSize);
//+0.5: Align with texel center
//https://forums.developer.nvidia.com/t/understanding-cuda-texture-2d-linear-interpolation/213924
PxReal v = tex3D<float>(textureSubgrids, f.x + 0.5f, f.y + 0.5f, f.z + 0.5f);
return applySubgridSdfScale(v);
}
}
}
}
PX_ALIGN_SUFFIX(16);
template<typename T>
PX_INLINE __device__ PxReal PxSdfDistance(const T& texture, const PxVec3& localPos)
{
// clamp to SDF support
const PxVec3 clampedGridPt = localPos.maximum(texture.sdfBoxLower).minimum(texture.sdfBoxHigher);
PxReal diffMag = (localPos - clampedGridPt).magnitude();
PxVec3 f = (clampedGridPt - texture.sdfBoxLower) * texture.invSdfDx;
return texture.Sample(f) + diffMag;
}
template<typename T>
PX_INLINE __device__ PxVec3 PxVolumeGrad(const T& texture, const PxVec3& localPos)
{
PxVec3 grad;
const PxVec3 clampedGridPt = localPos.maximum(texture.sdfBoxLower).minimum(texture.sdfBoxHigher);
PxVec3 f = (clampedGridPt - texture.sdfBoxLower) * texture.invSdfDx;
if (f.x >= 1.0f && f.x <= texture.sdfDims.x - 2.0f &&
f.y >= 1.0f && f.y <= texture.sdfDims.y - 2.0f &&
f.z >= 1.0f && f.z <= texture.sdfDims.z - 2.0f)
{
grad.x = texture.Sample(PxVec3(f.x + 1.0f, f.y, f.z)) -
texture.Sample(PxVec3(f.x - 1.0f, f.y, f.z));
grad.y = texture.Sample(PxVec3(f.x, f.y + 1.0f, f.z)) -
texture.Sample(PxVec3(f.x, f.y - 1.0f, f.z));
grad.z = texture.Sample(PxVec3(f.x, f.y, f.z + 1.0f)) -
texture.Sample(PxVec3(f.x, f.y, f.z - 1.0f));
return grad;
}
grad.x = PxSdfDistance(texture, localPos + PxVec3(texture.sdfDx, 0.f, 0.f)) -
PxSdfDistance(texture, localPos - PxVec3(texture.sdfDx, 0.f, 0.f));
grad.y = PxSdfDistance(texture, localPos + PxVec3(0.f, texture.sdfDx, 0.f)) -
PxSdfDistance(texture, localPos - PxVec3(0.f, texture.sdfDx, 0.f));
grad.z = PxSdfDistance(texture, localPos + PxVec3(0.f, 0.f, texture.sdfDx)) -
PxSdfDistance(texture, localPos - PxVec3(0.f, 0.f, texture.sdfDx));
return grad;
}
template<typename T>
PX_INLINE __device__ PxReal PxSdfSampleWithGrad(const T& texture, const PxVec3& localPos,
const PxVec3& sdfGradient, PxVec3& dir, PxReal tolerance = PX_MAX_F32)
{
const PxVec3 clampedGridPt = localPos.maximum(texture.sdfBoxLower).minimum(texture.sdfBoxHigher);
PxVec3 diff = (localPos - clampedGridPt);
/*
const PxReal diffMag = diff.magnitudeSquared();
if (diffMag > tolerance*tolerance)
return PX_MAX_F32;*/
PxVec3 f = (clampedGridPt - texture.sdfBoxLower) * texture.invSdfDx;
PxReal dist = texture.Sample(f);
if (dist < tolerance)
{
// estimate the contact direction based on the SDF
// if we are outside the SDF support, add in the distance to the point
dir = (sdfGradient.getNormalized() * PxAbs(dist) + diff).getNormalized();
return dist + dir.dot(diff);
}
return PX_MAX_F32;
}
static __device__ PxReal doVertexSDFCollision(SparseSDFTexture& texture, const PxVec3& v,
PxVec3& dir, PxReal tolerance)
{
PxVec3 sdfGradient = PxVolumeGrad(texture, v);
if (sdfGradient.normalize() == 0.0f)
{
//We ran into a discontinuity e. g. the exact center of a cube
//Just pick an arbitrary gradient of unit length to move out of the discontinuity
sdfGradient = PxVec3(0.5448762f, 0.672269f, 0.398825f).getNormalized();
}
return PxSdfSampleWithGrad(texture, v, sdfGradient, dir, tolerance);
}
//#define gr 1.6180339887498948482045868343656381177203091798057628621354486227f
#define invGr 0.6180339887498948482045868343656381177203091798057628621354486227f
static __device__ PxReal doSegmentSDFCollision(SparseSDFTexture& texture, const PxVec3& v0, const PxVec3& v1,
PxVec3& point, PxVec3& dir, PxReal tolerance, PxReal& t)
{
//Golden section search
PxReal a = 0.0f;
PxReal b = 1.0f;
PxReal c = b - (b - a) * invGr;
PxReal d = a + (b - a) * invGr;
for (PxU32 iter = 0; iter < 16; ++iter)
{
PxVec3 pC = (1.0f - c) * v0 + c * v1;
PxReal distC = PxSdfDistance(texture, pC);
PxVec3 pD = (1.0f - d) * v0 + d * v1;
PxReal distD = PxSdfDistance(texture, pD);
if (distC < distD)
b = d;
else
a = c;
c = b - (b - a) * invGr;
d = a + (b - a) * invGr;
}
t = 0.5f * (a + b);
PxVec3 p = (1.0f - t) * v0 + t * v1;
PxVec3 sdfGradient = PxVolumeGrad(texture, p);
if (sdfGradient.normalize() == 0.0f)
{
//We ran into a discontinuity e. g. the exact center of a cube
//Just pick an arbitrary gradient of unit length to move out of the discontinuity
sdfGradient = PxVec3(0.5448762f, 0.672269f, 0.398825f).getNormalized();
}
point = p;
return PxSdfSampleWithGrad(texture, p, sdfGradient, dir, tolerance);
}
static __device__ PxReal doTetrahedronSDFCollision(SparseSDFTexture& texture, const PxVec3& v0, const PxVec3& v1, const PxVec3& v2, const PxVec3& v3,
PxVec3& point, PxVec3& dir, PxReal tolerance/*, PxVec4& uvw*/)
{
PxVec3 center = (v0 + v1 + v2 + v3) * 0.25f;
PxVec3 p = center;
PxReal dist = PxSdfDistance(texture, p);
PxReal d0 = PxSdfDistance(texture, v0);
PxReal d1 = PxSdfDistance(texture, v1);
PxReal d2 = PxSdfDistance(texture, v2);
PxReal d3 = PxSdfDistance(texture, v3);
PxVec4 uvw = PxVec4(0.f);
if (d0 < d1 && d0 < d2 && d0 < d3 && d0 < dist)
{
dist = d0;
p = v0;
uvw.x = 1.f;
}
else if (d1 < d2 && d1 < d3 && d1 < dist)
{
dist = d1;
p = v1;
uvw.y = 1.f;
}
else if (d2 < d3 && d2 < dist)
{
dist = d2;
p = v2;
uvw.z = 1.f;
}
else if (d3 < dist)
{
dist = d3;
p = v3;
uvw.w = 1.f;
}
else
uvw = PxVec4(0.25f);
PxReal difference = PxSqrt(
PxMax(
PxMax((v0 - p).magnitudeSquared(), (v1 - p).magnitudeSquared()),
PxMax((v2 - p).magnitudeSquared(), (v3 - p).magnitudeSquared())));
const PxReal eps = 1e-4;
const PxReal toleranceSq = 1e-3f*1e-3f;
PxVec3 sdfGradient;
PxReal step = 1.f / (2.f*difference);
for (PxU32 iter = 0; iter < 16; ++iter)
{
sdfGradient = PxVolumeGrad(texture, p);
// why do we normalize the gradient here?
if (sdfGradient.normalize() == 0.0f)
{
//We ran into a discontinuity e. g. the exact center of a cube
//Just pick an arbitrary gradient of unit length to move out of the discontinuity
sdfGradient = PxVec3(0.5448762f, 0.672269f, 0.398825f).getNormalized();
}
PxReal dfdu = sdfGradient.dot(v0 - p);
PxReal dfdv = sdfGradient.dot(v1 - p);
PxReal dfdw = sdfGradient.dot(v2 - p);
PxReal dfd4 = sdfGradient.dot(v3 - p);
PxVec4 newUVW;
{
newUVW = uvw;
newUVW.x -= step * dfdu;
newUVW.y -= step * dfdv;
newUVW.z -= step * dfdw;
newUVW.w -= step * dfd4;
step = step * 0.8f;
if (((newUVW.x <= -eps || newUVW.x >= 1.f + eps) || (newUVW.y <= -eps || newUVW.y >= 1.f + eps) ||
(newUVW.z <= -eps || newUVW.z >= 1.f + eps) || (newUVW.w <= -eps || newUVW.w >= 1.f + eps)))
{
PxVec3 cp = Gu::closestPtPointTetrahedron(newUVW.getXYZ(), PxVec3(1.0f, 0.0f, 0.0f), PxVec3(0.0f, 1.0f, 0.0f), PxVec3(0.f, 0.f, 1.f), PxVec3(0.0f));
newUVW.x = cp.x;
newUVW.y = cp.y;
newUVW.z = cp.z;
newUVW.w = 1.0f - cp.x - cp.y - cp.z; //TODO: Veryfy this logic
assert(newUVW.w >= -eps && newUVW.w <= 1.f + eps);
}
p = v0 * newUVW.x + v1 * newUVW.y + v2 * newUVW.z + v3 * newUVW.w;
if ((uvw - newUVW).magnitudeSquared() < toleranceSq)
{
/*if(iter != 0)
printf("Iter = %i\n", iter);*/
break;
}
uvw = newUVW;
}
}
sdfGradient = PxVolumeGrad(texture, p);
point = p;
dist = PxSdfSampleWithGrad(texture, p, sdfGradient, dir, tolerance);
return dist;
}
static __device__ PxReal doTriangleSDFCollision(SparseSDFTexture& texture, const PxVec3& v0, const PxVec3& v1, const PxVec3& v2,
PxVec3& point, PxVec3& dir, PxReal tolerance)
{
const PxReal third = 1.f / 3.f;
PxVec3 center = (v0 + v1 + v2) * third;
PxVec3 p = center;
PxReal dist = PxSdfDistance(texture, p);
PxReal d0 = PxSdfDistance(texture, v0);
PxReal d1 = PxSdfDistance(texture, v1);
PxReal d2 = PxSdfDistance(texture, v2);
//PxVec3 nor = (v1 - v0).cross(v2 - v0);
PxVec3 uvw(0.f);
// choose starting iterate among centroid and triangle vertices
if (d0 < d1 && d0 < d2 && d0 < dist)
{
p = v0;
uvw.x = 1.f;
}
else if (d1 < d2 && d1 < dist)
{
p = v1;
uvw.y = 1.f;
}
else if (d2 < dist)
{
p = v2;
uvw.z = 1.f;
}
else
uvw = PxVec3(third);
PxReal difference = PxSqrt(PxMax((v0 - p).magnitudeSquared(), PxMax((v1 - p).magnitudeSquared(),
(v2 - p).magnitudeSquared())));
const PxReal toleranceSq = 1e-3f*1e-3f;
PxVec3 sdfGradient;
PxReal step = 1.f / (2.f*difference);
for (PxU32 iter = 0; iter < 16; ++iter)
{
sdfGradient = PxVolumeGrad(texture, p);
if (sdfGradient.normalize() == 0.0f)
{
//We ran into a discontinuity e. g. the exact center of a cube
//Just pick an arbitrary gradient of unit length to move out of the discontinuity
sdfGradient = PxVec3(0.571846586f, 0.705545099f, 0.418566116f);
}
PxReal dfdu = sdfGradient.dot(v0 - p);
PxReal dfdv = sdfGradient.dot(v1 - p);
PxReal dfdw = sdfGradient.dot(v2 - p);
PxVec3 newUVW;
{
newUVW = uvw;
newUVW.x -= step * dfdu;
newUVW.y -= step * dfdv;
newUVW.z -= step * dfdw;
step = step * 0.8f;
newUVW = Gu::closestPtPointBaryTriangle(newUVW);
p = v0 * newUVW.x + v1 * newUVW.y + v2 * newUVW.z;
if ((uvw - newUVW).magnitudeSquared() < toleranceSq)
{
/*if(iter != 0)
printf("Iter = %i\n", iter);*/
break;
}
uvw = newUVW;
}
}
sdfGradient = PxVolumeGrad(texture, p);
point = p;
dist = PxSdfSampleWithGrad(texture, p, sdfGradient, dir, tolerance);
return dist;
}
//Returns the new number of elements in the buffer
//T should be of type PxU32 or a multiple of it (uint2, uint2, uint4)
template<typename T, PxU32 NbWarps, const PxU32 TargetCount>
__device__ PxU32 addIndexToBuffer(bool threadEmitsElement, T index, PxU32 nbElementsInBuffer, T* sharedMemoryBuffer, PxU32& offset)
{
PxU32 total;
PxU32 exclusiveSum = threadBlockScanExclusive<NbWarps>(threadEmitsElement, total);
if (threadEmitsElement)
{
PxU32 writeOffset = nbElementsInBuffer + exclusiveSum;
if (writeOffset < TargetCount)
{
sharedMemoryBuffer[writeOffset] = index;
}
if (writeOffset == (TargetCount - 1))
{
//Get the first element
offset = ((PxU32*)(&index))[0] + 1; //The thread that added the last element to the sharedIndices writes the new offset
}
}
return min(total + nbElementsInBuffer, TargetCount);
}
PX_FORCE_INLINE __device__ void getTriangleVertices(const uint4& triangle, const float4* meshVerts, const PxMeshScale& scale, const PxTransform t,
PxU32 encodedSubIndex, PxVec3& a, PxVec3& b, PxVec3& c)
{
a = t.transform(vertex2Shape(PxLoad3(meshVerts[triangle.x]), scale.scale, scale.rotation));
b = t.transform(vertex2Shape(PxLoad3(meshVerts[triangle.y]), scale.scale, scale.rotation));
c = t.transform(vertex2Shape(PxLoad3(meshVerts[triangle.z]), scale.scale, scale.rotation));
if (encodedSubIndex > 0)
Gu::getSubTriangleEncoded(a, b, c, encodedSubIndex);
}
PX_FORCE_INLINE __device__ void getTriangleVertices(const uint4* meshInd, const float4* meshVerts, const PxMeshScale& scale, const PxTransform t,
PxU32 triangleIndex, PxU32 encodedSubIndex, PxVec3& a, PxVec3& b, PxVec3& c)
{
uint4 inds = meshInd[triangleIndex];
getTriangleVertices(inds, meshVerts, scale, t, encodedSubIndex, a, b, c);
}
PX_FORCE_INLINE __device__ void getTriangleVertices(const PxgTriangleMesh& mesh, const PxMeshScale& scale, const PxTransform t,
PxU32 triangleIndex, PxU32 encodedSubIndex, PxVec3& a, PxVec3& b, PxVec3& c)
{
uint4 inds = mesh.indices[triangleIndex];
a = t.transform(vertex2Shape(PxLoad3(mesh.trimeshVerts[inds.x]), scale.scale, scale.rotation));
b = t.transform(vertex2Shape(PxLoad3(mesh.trimeshVerts[inds.y]), scale.scale, scale.rotation));
c = t.transform(vertex2Shape(PxLoad3(mesh.trimeshVerts[inds.z]), scale.scale, scale.rotation));
if (encodedSubIndex > 0)
Gu::getSubTriangleEncoded(a, b, c, encodedSubIndex);
}
PX_FORCE_INLINE __device__ PxVec3 getVertex(const float4* meshVerts, const PxMeshScale& scale, const PxTransform t, PxU32 vertexIndex)
{
return t.transform(vertex2Shape(PxLoad3(meshVerts[vertexIndex]), scale.scale, scale.rotation));
}
PX_FORCE_INLINE __device__ PxVec3 getVertex(const PxgTriangleMesh& mesh, const PxMeshScale& scale, const PxTransform t, PxU32 vertexIndex)
{
return t.transform(vertex2Shape(PxLoad3(mesh.trimeshVerts[vertexIndex]), scale.scale, scale.rotation));
}
//Input of surface hint is optional
template<PxU32 NbWarps, const PxU32 TargetCount>
__device__ PxU32 findInterestingTets(PxU32 nbTets, const uint4* tetIndices, const float4* tetVerts,
const PxMeshScale& scale1, const PxReal tolerance, const SparseSDFTexture& sdfMesh1,
const PxTransform& aToB, PxU32& startIndex, PxU32* sharedIndices, const PxU8* surfaceHint = NULL)
{
__shared__ PxU32 offset;
if (threadIdx.x == 0)
offset = startIndex;
PxU32 sharedIndicesCount = 0;
for (PxU32 i = startIndex; (i < nbTets); )
{
PxU32 ind = i + threadIdx.x;
i += blockDim.x;
bool addToBuffer = false;
if (ind < nbTets)
{
if (!surfaceHint || surfaceHint[ind])
{
const uint4 tet = tetIndices[ind];
const PxVec3 a = aToB.transform(PxLoad3(tetVerts[tet.x]));
const PxVec3 b = aToB.transform(PxLoad3(tetVerts[tet.y]));
const PxVec3 c = aToB.transform(PxLoad3(tetVerts[tet.z]));
const PxVec3 d = aToB.transform(PxLoad3(tetVerts[tet.w]));
PxVec3 centroid = (a + b + c + d) * 0.25f;
PxReal distInSDFSpace1 = PxSdfDistance(sdfMesh1, shape2Vertex(centroid, scale1.scale, scale1.rotation));
//PxReal boundingSphereRadius = PxSqrt(PxMax((v0 - centroid).magnitudeSquared(), (v1 - centroid).magnitudeSquared()));
PxReal boundingSphereRadius = PxSqrt(PxMax(PxMax((a - centroid).magnitudeSquared(), (b - centroid).magnitudeSquared()),
PxMax((c - centroid).magnitudeSquared(), (d - centroid).magnitudeSquared())));
boundingSphereRadius /= PxMin(scale1.scale.x, PxMin(scale1.scale.y, scale1.scale.z));
addToBuffer = distInSDFSpace1 < (boundingSphereRadius + tolerance);
}
}
__syncthreads();
sharedIndicesCount = addIndexToBuffer<PxU32, NbWarps, TargetCount>(addToBuffer, ind, sharedIndicesCount, sharedIndices, offset);
}
__syncthreads();
if (startIndex != offset)
startIndex = offset;
else
startIndex = nbTets;
return sharedIndicesCount;
}
template<PxU32 NbWarps, const PxU32 TargetCount, bool useVertices = false>
__device__ PxU32 findInterestingTrianglesA(PxU32 mesh0NbPrimitives, const uint4* mesh0Ind, const float4* mesh0Verts,
const PxMeshScale& scale0, const PxMeshScale& scale1, const PxReal tolerance, const SparseSDFTexture& sdfMesh1,
const PxTransform& aToB, PxU32& startIndex, uint2* sharedIndices, PxU32 nbSubdivisionIndices, const uint2* sharedSubdivisionIndices)
{
__shared__ PxU32 offset;
if (threadIdx.x == 0)
offset = startIndex;
const PxU32 nbPrimitives = mesh0NbPrimitives; //Can be the number of triangles or the number of vertices depending on the template boolean useVertices
PxU32 sharedIndicesCount = 0;
for (PxU32 i = startIndex; (i < nbPrimitives || nbSubdivisionIndices>0) && sharedIndicesCount < TargetCount; )
{
PxU32 ind;
PxU32 subdivisionLevel = 0;
if (threadIdx.x < 4 * nbSubdivisionIndices)
{
uint2 v = sharedSubdivisionIndices[threadIdx.x >> 2];
ind = v.x;
subdivisionLevel = v.y;
subdivisionLevel = Gu::elevateSubdivisionId(subdivisionLevel, threadIdx.x & 3);
}
else
{
ind = i + threadIdx.x - PxMin(TargetCount, 4 * nbSubdivisionIndices);
}
i += blockDim.x - PxMin(TargetCount, 4 * nbSubdivisionIndices);
nbSubdivisionIndices = 0; //It is guaranteed that all subdivision triangles get consumed in the first run because of the limited size of sharedSubdivisionIndices
bool addToBuffer = false;
if (ind < nbPrimitives)
{
if (useVertices)
{
PxVec3 p = getVertex(mesh0Verts, scale0, aToB, ind);
PxReal distInSDFSpace1 = PxSdfDistance(sdfMesh1, shape2Vertex(p, scale1.scale, scale1.rotation));
addToBuffer = distInSDFSpace1 < tolerance;
}
else
{
PxVec3 v0, v1, v2;
getTriangleVertices(mesh0Ind, mesh0Verts, scale0, aToB, ind, subdivisionLevel, v0, v1, v2);
const PxReal third = 1.f / 3.f;
PxVec3 centroid = (v0 + v1 + v2) * third;
PxReal distInSDFSpace1 = PxSdfDistance(sdfMesh1, shape2Vertex(centroid, scale1.scale, scale1.rotation));
PxReal boundingSphereRadius = PxSqrt(PxMax((v0 - centroid).magnitudeSquared(), PxMax((v1 - centroid).magnitudeSquared(),
(v2 - centroid).magnitudeSquared())));
boundingSphereRadius /= PxMin(scale1.scale.x, PxMin(scale1.scale.y, scale1.scale.z));
addToBuffer = distInSDFSpace1 < (boundingSphereRadius + tolerance);
}
}
__syncthreads();
sharedIndicesCount = addIndexToBuffer<uint2, NbWarps, TargetCount>(addToBuffer, make_uint2(ind, subdivisionLevel), sharedIndicesCount, sharedIndices, offset);
}
__syncthreads();
if (startIndex != offset)
startIndex = offset;
else
startIndex = nbPrimitives;
return sharedIndicesCount;
}
template<PxU32 NbWarps, const PxU32 TargetCount>
__device__ PxU32 findInterestingVertices(PxU32 nbVertices, const float4* positions,
const PxMeshScale& scale1, const PxReal tolerance, const SparseSDFTexture& sdfMesh1,
const PxTransform& aToB, PxU32& startIndex, PxU32* sharedIndices)
{
__shared__ PxU32 offset;
if (threadIdx.x == 0)
offset = startIndex;
PxU32 sharedIndicesCount = 0;
for (PxU32 i = startIndex; (i < nbVertices); )
{
PxU32 ind = i + threadIdx.x;
i += blockDim.x;
bool addToBuffer = false;
if (ind < nbVertices)
{
PxVec3 v = aToB.transform(PxLoad3(positions[ind]));
PxReal distInSDFSpace1 = PxSdfDistance(sdfMesh1, shape2Vertex(v, scale1.scale, scale1.rotation));
addToBuffer = distInSDFSpace1 < tolerance;
}
__syncthreads();
sharedIndicesCount = addIndexToBuffer<PxU32, NbWarps, TargetCount>(addToBuffer, ind, sharedIndicesCount, sharedIndices, offset);
}
__syncthreads();
if (startIndex != offset)
startIndex = offset;
else
startIndex = nbVertices;
return sharedIndicesCount;
}
PX_FORCE_INLINE __device__ PxReal triangleRadiusSquared(const PxVec3& v0, const PxVec3& v1, const PxVec3& v2)
{
const PxReal third = 1.0f / 3.0f;
PxVec3 center = third * (v0 + v1 + v2);
return PxMax(PxMax((v1 - v0).magnitudeSquared(), (v2 - v1).magnitudeSquared()), (v0 - v2).magnitudeSquared());
}
PX_FORCE_INLINE __device__ PxReal sdfRadiusSquared(SparseSDFTexture& sdfTexture)
{
return (sdfTexture.sdfBoxHigher - sdfTexture.sdfBoxLower).magnitudeSquared();
}
//TargetCount is actually the maximal size of sharedSubdivisionIndices. The naming could be improved...
template<PxU32 NbWarps, const PxU32 TargetCount>
__device__ PxU32 addToRefinementBuffer(bool addToBuffer, PxU32 ind, PxU32 subInd, PxU32 nbElementsInBuffer, uint2* sharedMemoryBuffer, PxU32 maxRefinementLevel)
{
PxU32 subdivisionLevel, subTriangleIndex;
Gu::decodeSubdivisionId(subInd, subdivisionLevel, subTriangleIndex);
addToBuffer = addToBuffer && subdivisionLevel < maxRefinementLevel; //Make sure a upper limit of refinement is never exceeded
PxU32 total;
PxU32 exclusiveSum = threadBlockScanExclusive<NbWarps>(addToBuffer, total);
if (addToBuffer)
{
PxU32 writeOffset = nbElementsInBuffer + exclusiveSum;
if (writeOffset < TargetCount)
{
sharedMemoryBuffer[writeOffset] = make_uint2(ind, subInd);
}
}
return min(total + nbElementsInBuffer, TargetCount);
}
}
#endif

View File

@@ -0,0 +1,418 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "PxsTransformCache.h"
#include "PxsContactManagerState.h"
#include "foundation/PxBounds3.h"
#include "PxgContactManager.h"
#include "PxgSoftBody.h"
#include "geometry/PxGeometry.h"
#include "PxsMaterialCore.h"
#include "cudaNpCommon.h"
#include "PxgCommonDefines.h"
#include "heightfieldUtil.cuh"
#include "utils.cuh"
#include "sphereTriangle.cuh"
#include "deformableCollision.cuh"
#include "assert.h"
#include "reduction.cuh"
#include <stdio.h>
#include "PxgSoftBodyCoreKernelIndices.h"
#include "PxgNpKernelIndices.h"
using namespace physx;
extern "C" __host__ void initNarrowphaseKernels16() {}
struct sbHeightfieldScratch
{
const float4 * PX_RESTRICT tetmeshVerts;
const uint4 * PX_RESTRICT tetmeshTetIndices;
const PxU8* PX_RESTRICT tetmeshTetSurfaceHint;
PxU32 numTets;
PxTransform heightfieldTransform;
PxgShape heightfieldShape;
}; PX_COMPILE_TIME_ASSERT(sizeof(sbHeightfieldScratch) <= 1024);
//__device__ static inline PxU32 sbHeightfieldComputePairs(
// const PxU32 minColumn,
// const PxU32 maxColumn,
// const PxU32 minRow,
// const PxU32 maxRow,
// const PxU32 nbRows,
// const PxU32 nbCols,
// PxHeightFieldSample* samples,
// const PxU32 miny,
// const PxU32 maxy
//)
//{
//
// PxU32 nbPairs = 0;
//
// const PxU32 columnSpan = maxColumn - minColumn;
//
// //we have two materials corresponding to one vertexIndex, so each thread will deal with one of the materials
// const PxU32 totalNumProcessed = (maxRow - minRow) * columnSpan * 2;
//
// for (PxU32 i = 0; i < totalNumProcessed; ++i)
// {
//
// const PxU32 index = i / 2;
// const PxU32 vertexIndex = (minRow + index / columnSpan) * nbCols + (minColumn + index % columnSpan);
// assert(isValidVertex(vertexIndex, nbRows, nbCols));
// PxReal h0 = getHeight(vertexIndex, samples);
// PxReal h1 = getHeight(vertexIndex + 1, samples);
// PxReal h2 = getHeight(vertexIndex + nbCols, samples);
// PxReal h3 = getHeight(vertexIndex + nbCols + 1, samples);
// const bool con0 = maxy < h0 && maxy < h1 && maxy < h2 && maxy < h3;
// const bool con1 = miny > h0 && miny > h1 && miny > h2 && miny > h3;
//
// if (!(con0 || con1))
// {
// const PxHeightFieldSample& sample = getSample(vertexIndex, samples);
//
// const bool isMaterial1 = (i & 1) ? 1 : 0;
// PxU32 material = isMaterial1 ? sample.materialIndex1 : sample.materialIndex0;
// if (material != PxHeightFieldMaterial::eHOLE)
// nbPairs++;
// }
// }//end of totalNumProcessed
//
// return nbPairs;
//}
//__device__ static inline void sbHeightfieldOutputPairs(
// const PxU32 minColumn,
// const PxU32 maxColumn,
// const PxU32 minRow,
// const PxU32 maxRow,
// const PxU32 nbRows,
// const PxU32 nbCols,
// PxHeightFieldSample* samples,
// const PxU32 miny,
// const PxU32 maxy,
//
// const PxU32 cmInd,
// const PxU32 tetrahedronInd,
// const PxU32 startOffset,
//
// uint4* stackPtr //output
//
//)
//{
// PxU32 pairCount = 0;
// const PxU32 columnSpan = maxColumn - minColumn;
//
// //we have two materials corresponding to one vertexIndex, so each thread will deal with one of the materials
// const PxU32 totalNumProcessed = (maxRow - minRow) * columnSpan * 2;
//
// for (PxU32 i = 0; i < totalNumProcessed; ++i)
// {
//
// PxU32 triangleIdx = 0xFFffFFff;
//
// const PxU32 index = i / 2;
// const PxU32 vertexIndex = (minRow + index / columnSpan) * nbCols + (minColumn + index % columnSpan);
// assert(isValidVertex(vertexIndex, nbRows, nbCols));
// PxReal h0 = getHeight(vertexIndex, samples);
// PxReal h1 = getHeight(vertexIndex + 1, samples);
// PxReal h2 = getHeight(vertexIndex + nbCols, samples);
// PxReal h3 = getHeight(vertexIndex + nbCols + 1, samples);
// const bool con0 = maxy < h0 && maxy < h1 && maxy < h2 && maxy < h3;
// const bool con1 = miny > h0 && miny > h1 && miny > h2 && miny > h3;
//
// if (!(con0 || con1))
// {
// const PxHeightFieldSample& sample = getSample(vertexIndex, samples);
//
// const bool isMaterial1 = (i & 1) ? 1 : 0;
// PxU32 material = isMaterial1 ? sample.materialIndex1 : sample.materialIndex0;
// if (material != PxHeightFieldMaterial::eHOLE)
// {
// triangleIdx = isMaterial1 ? ((vertexIndex << 1) + 1) : (vertexIndex << 1);
//
// stackPtr[startOffset + pairCount] = make_uint4(cmInd, tetrahedronInd, triangleIdx, 0);
//
// pairCount++;
// }
// }
// }//end of totalNumProcessed
//}
__device__ static inline void sbHeightfieldMidphaseCore(
sbHeightfieldScratch* scratch,
const PxU32 cmIdx,
const PxBounds3 tetBound,
const PxU32 tetrahedronIdx,
const PxU32 workIndex,
const PxU8 hint,
const PxU32 numTets,
const PxU32 stackSize,
uint4* PX_RESTRICT stackPtr, //output
PxU32* PX_RESTRICT midphasePairsNum //output
)
{
PxU32 nbPairs = 0;
PxBounds3 localBound;
PxU32 nbRows = 0;
PxU32 nbCols = 0;
PxHeightFieldSample* samples = NULL;
PxU32 minRow = 0;
PxU32 maxRow = 0;
PxU32 minColumn = 0;
PxU32 maxColumn = 0;
if (hint && tetrahedronIdx < numTets)
{
PxTransform heightfieldTransform = scratch->heightfieldTransform;
PxgShape heightfieldShape = scratch->heightfieldShape;
const PxReal oneOverHeightScale = 1.f / heightfieldShape.scale.scale.y;
const PxReal oneOverRowScale = 1.f / PxAbs(heightfieldShape.scale.scale.x);
const PxReal oneOverlColScale = 1.f / PxAbs(heightfieldShape.scale.scale.z);
//bound is in world space, we need to transform the bound to the local space of height field
localBound = PxBounds3::transformFast(heightfieldTransform.getInverse(), tetBound);
localBound.minimum.x *= oneOverRowScale;
localBound.minimum.y *= oneOverHeightScale;
localBound.minimum.z *= oneOverlColScale;
localBound.maximum.x *= oneOverRowScale;
localBound.maximum.y *= oneOverHeightScale;
localBound.maximum.z *= oneOverlColScale;
//row scale
if (heightfieldShape.scale.scale.x < 0.f)
{
//swap min and max row scale
const PxReal temp = localBound.minimum.x;
localBound.minimum.x = localBound.maximum.x;
localBound.maximum.x = temp;
}
//col scale
if (heightfieldShape.scale.scale.z < 0.f)
{
PxReal swap = localBound.minimum.z;
localBound.minimum.z = localBound.maximum.z;
localBound.maximum.z = swap;
}
PxU32* heightfieldData = reinterpret_cast<PxU32*>(heightfieldShape.hullOrMeshPtr);
nbRows = heightfieldData[0];
nbCols = heightfieldData[1];
samples = reinterpret_cast<PxHeightFieldSample*>(&heightfieldData[2]);
if (!(localBound.minimum.x > nbRows - 1) || (localBound.minimum.z > nbCols - 1)
|| (localBound.maximum.x < 0) || (localBound.maximum.z < 0))
{
minRow = getMinRow(localBound.minimum.x, nbRows);
maxRow = getMaxRow(localBound.maximum.x, nbRows);
minColumn = getMinColumn(localBound.minimum.z, nbCols);
maxColumn = getMaxColumn(localBound.maximum.z, nbCols);
if ((2 * (maxColumn - minColumn) * (maxRow - minRow)) > 0)
{
nbPairs = heightfieldComputePairs(minColumn, maxColumn, minRow, maxRow, nbRows, nbCols,
samples, localBound.minimum.y, localBound.maximum.y);
}
}
}
PxU32 pairOffset = warpScan<AddOpPxU32, PxU32>(FULL_MASK, nbPairs) - nbPairs;
PxU32 validCount = 0;
if (threadIdx.x == (WARP_SIZE - 1))
{
validCount = pairOffset + nbPairs;
}
validCount = __shfl_sync(FULL_MASK, validCount, WARP_SIZE - 1);
PxU32 startIndex = 0xffffffff;
if (threadIdx.x == 0 && validCount > 0)
{
startIndex = atomicAdd(midphasePairsNum, validCount);
}
startIndex = __shfl_sync(FULL_MASK, startIndex, 0);
if (startIndex != 0xffffffff)
{
heightfieldOutputPairs(minColumn, maxColumn, minRow, maxRow, nbRows, nbCols,
samples, localBound.minimum.y, localBound.maximum.y, cmIdx, tetrahedronIdx, startIndex + pairOffset, stackSize, stackPtr);
}
}
extern "C" __global__ void sb_heightfieldMidphaseGeneratePairsLaunch(
const PxU32 numNPWorkItems,
const PxReal toleranceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxReal* PX_RESTRICT contactDistance,
const PxgShape* PX_RESTRICT gpuShapes,
PxgSoftBody* PX_RESTRICT softbodies,
const PxU32 stackSizeBytes,
PxU8* stackPtr, //output
PxU32* midphasePairsNum //output
)
{
__shared__ PxU32 scratchMem[1024];
sbHeightfieldScratch* s_warpScratch = reinterpret_cast<sbHeightfieldScratch*>(scratchMem);
const PxU32 nbWarpsPerCm = 64; // 64 warp(2048 threads) to deal with one pair
const PxU32 nbWarpsRequired = nbWarpsPerCm * numNPWorkItems;
const PxU32 nbThreadsPerCm = 64 * WARP_SIZE;
const PxU32 numWarpPerBlock = MIDPHASE_WARPS_PER_BLOCK;
const PxU32 totalNumWarps = PxgSoftBodyKernelGridDim::SB_SBMIDPHASE * numWarpPerBlock;
const PxU32 warpIndex = threadIdx.y;
const PxU32 nbIterationsPerWarps = (nbWarpsRequired + totalNumWarps - 1) / totalNumWarps;
/*if (warpIndex == 0 && threadIdx.x == 0 && blockIdx.x == 0)
{
printf("nbIterationsPerWarps %i nbWarpsRequired %i\n", nbIterationsPerWarps, nbWarpsRequired);
}*/
for (PxU32 i = 0; i < nbIterationsPerWarps; ++i)
{
const PxU32 workIndex = i + (warpIndex + numWarpPerBlock * blockIdx.x) * nbIterationsPerWarps;
if (workIndex < nbWarpsRequired)
{
unsigned int cmIdx = workIndex / nbWarpsPerCm;
PxgShape softbodyShape, heightfieldShape;
PxU32 softbodyCacheRef, heightfieldCacheRef;
LoadShapePairWarp<PxGeometryType::eTETRAHEDRONMESH, PxGeometryType::eHEIGHTFIELD>(cmInputs, cmIdx, gpuShapes,
softbodyShape, softbodyCacheRef, heightfieldShape, heightfieldCacheRef);
PxsCachedTransform heightfieldTransformCache;
PxsCachedTransform_ReadWarp(heightfieldTransformCache, transformCache + heightfieldCacheRef);
const PxgSoftBody& softbody = softbodies[softbodyShape.particleOrSoftbodyId];
PxU8* meshGeomPtr;
meshGeomPtr = reinterpret_cast<PxU8 *>(softbody.mTetMeshData);
const PxU32 idx = threadIdx.x;
if (idx == 0)
{
s_warpScratch->tetmeshVerts = softbody.mPosition_InvMass;
s_warpScratch->tetmeshTetIndices = softbody.mTetIndices;
s_warpScratch->tetmeshTetSurfaceHint = softbody.mTetMeshSurfaceHint;
const uint4 nbVerts_nbPrimitives_maxDepth_nbBv32TreeNodes = *reinterpret_cast<const uint4 *>(meshGeomPtr);
s_warpScratch->numTets = nbVerts_nbPrimitives_maxDepth_nbBv32TreeNodes.y;
meshGeomPtr += sizeof(uint4) + sizeof(const Gu::BV32DataPacked)* nbVerts_nbPrimitives_maxDepth_nbBv32TreeNodes.w
+sizeof(const Gu::BV32DataDepthInfo) * nbVerts_nbPrimitives_maxDepth_nbBv32TreeNodes.z
+ sizeof(PxU32) * nbVerts_nbPrimitives_maxDepth_nbBv32TreeNodes.w;
/*const PxU8* surfaceHint = reinterpret_cast<const PxU8*>(meshGeomPtr);
s_warpScratch->tetmeshTetSurfaceHint = surfaceHint;*/
s_warpScratch->heightfieldTransform = heightfieldTransformCache.transform;
s_warpScratch->heightfieldShape = heightfieldShape;
}
__syncwarp();
const PxReal contactDist = contactDistance[softbodyCacheRef] + contactDistance[heightfieldCacheRef];
const PxU32 nbTets = s_warpScratch->numTets;
const PxU32 nbIter = (nbTets + nbThreadsPerCm - 1) / nbThreadsPerCm;
const uint4* tetIndices = s_warpScratch->tetmeshTetIndices;
const PxU8* surfaceHint = s_warpScratch->tetmeshTetSurfaceHint;
for (PxU32 j = 0; j < nbIter; ++j)
{
const PxU32 wrappedThreadIndex = (workIndex * WARP_SIZE + threadIdx.x) % nbThreadsPerCm;
const PxU32 tetrahedronIdx = wrappedThreadIndex + nbThreadsPerCm * j;
PxBounds3 tetBound = PxBounds3::empty();
PxU8 hint = 1;
if (tetrahedronIdx < nbTets)
{
//avoid generate contacts if the tetrahedron isn't a surface tetrahedron
hint = surfaceHint[tetrahedronIdx];
if (hint)
{
const uint4 tetIdx = tetIndices[tetrahedronIdx];
const PxVec3 worldV0 = PxLoad3(s_warpScratch->tetmeshVerts[tetIdx.x]);
const PxVec3 worldV1 = PxLoad3(s_warpScratch->tetmeshVerts[tetIdx.y]);
const PxVec3 worldV2 = PxLoad3(s_warpScratch->tetmeshVerts[tetIdx.z]);
const PxVec3 worldV3 = PxLoad3(s_warpScratch->tetmeshVerts[tetIdx.w]);
tetBound = tetBoundingBox(worldV0, worldV1, worldV2, worldV3);
tetBound.fattenFast(contactDist);
}
}
uint4* tStackPtr = reinterpret_cast<uint4*>(stackPtr);
const PxU32 stackSize = stackSizeBytes / sizeof(uint4);
sbHeightfieldMidphaseCore(
s_warpScratch,
cmIdx,
tetBound,
tetrahedronIdx,
workIndex,
hint,
nbTets,
stackSize,
tStackPtr,
midphasePairsNum
);
}
}
}
}

View File

@@ -0,0 +1,526 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "foundation/PxBasicTemplates.h"
#include "foundation/PxBounds3.h"
#include "foundation/PxMath.h"
#include "foundation/PxSimpleTypes.h"
#include "foundation/PxVec3.h"
#include "geometry/PxGeometry.h"
#include "GuBV32.h"
#include "cudaNpCommon.h"
#include "PxgContactManager.h"
#include "PxgConvexConvexShape.h"
#include "PxgFEMCloth.h"
#include "PxgNpKernelIndices.h"
#include "PxgParticleSystem.h"
#include "PxgSoftBody.h"
#include "PxsTransformCache.h"
#include <vector_types.h>
#include "copy.cuh"
#include "dataReadWriteHelper.cuh"
#include "femMidphaseScratch.cuh"
#include "utils.cuh"
#include "sbMidphaseScratch.cuh"
#include "deformableCollision.cuh"
#include "bv32Traversal.cuh"
using namespace physx;
extern "C" __host__ void initNarrowphaseKernels13() {}
struct SoftbodyTreeTraverser
{
sbMidphaseScratch* s_warpScratch;
const PxBounds3& mBox;
const PxU32 stackSize;
uint4* PX_RESTRICT stackPtr;
PxU32* PX_RESTRICT midphasePairsNum;
unsigned int cmIdx;
PX_FORCE_INLINE __device__ SoftbodyTreeTraverser(
sbMidphaseScratch* s_warpScratch,
PxBounds3& aabb,
const PxU32 stackSize,
uint4* PX_RESTRICT stackPtr,
PxU32* PX_RESTRICT midphasePairsNum,
unsigned int cmIdx)
: s_warpScratch(s_warpScratch), mBox(aabb), stackSize(stackSize), stackPtr(stackPtr), midphasePairsNum(midphasePairsNum), cmIdx(cmIdx)
{ }
PX_FORCE_INLINE __device__ void intersectPrimitiveFullWarp(PxU32 primitiveIndex, PxU32 idxInWarp) const
{
bool intersect = false;
if (primitiveIndex != 0xFFFFFFFF)
{
const PxU8 hint = s_warpScratch->tetmeshSurfaceHint[primitiveIndex];
if (hint)
{
uint4 tetIdx = s_warpScratch->tetmeshTetIndices[primitiveIndex];
const PxVec3 worldV0 = PxLoad3(s_warpScratch->tetmeshVerts[tetIdx.x]);
const PxVec3 worldV1 = PxLoad3(s_warpScratch->tetmeshVerts[tetIdx.y]);
const PxVec3 worldV2 = PxLoad3(s_warpScratch->tetmeshVerts[tetIdx.z]);
const PxVec3 worldV3 = PxLoad3(s_warpScratch->tetmeshVerts[tetIdx.w]);
if (isValidTet(worldV0, worldV1, worldV2, worldV3))
{
intersect = tetBoundingBox(worldV0, worldV1, worldV2, worldV3).intersects(mBox);
}
}
}
pushOntoStackFullWarp(intersect, midphasePairsNum, stackPtr, stackSize, make_uint4(cmIdx, primitiveIndex, 0, 0));
}
PX_FORCE_INLINE __device__ bool intersectBoxFullWarp(bool hasBox, const PxVec3& min, const PxVec3& max) const
{
if (hasBox)
return mBox.intersects(PxBounds3(min, max));
else
return false;
}
};
template<unsigned int WarpsPerBlock>
__device__ static inline void sbMidphaseCore(
PxU32 numNPWorkItems,
const PxReal toleranceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxBounds3* PX_RESTRICT bounds,
const PxReal* PX_RESTRICT contactDistance,
const PxgShape* PX_RESTRICT gpuShapes,
const PxgSoftBody* PX_RESTRICT softbodies,
const PxU32 stackSizeBytes,
PxU8* PX_RESTRICT stackPtr, //output
PxU32* PX_RESTRICT midphasePairsNum, //output
sbMidphaseScratch* s_warpScratch
)
{
//wrap index
const unsigned int warpIdx = threadIdx.y;
unsigned int cmIdx = warpIdx + blockIdx.x * blockDim.y;
//unsigned mask_cmIdx = __ballot_sync(FULL_MASK, cmIdx < numNPWorkItems);
if (cmIdx < numNPWorkItems)
{
PxgShape softbodyShape, rigidShape;
PxU32 softbodyCacheRef, rigidCacheRef;
LoadShapePairWarp<PxGeometryType::eTETRAHEDRONMESH>(cmInputs, cmIdx, gpuShapes,
softbodyShape, softbodyCacheRef, rigidShape, rigidCacheRef);
const PxU32 softbodyId = softbodyShape.particleOrSoftbodyId;
const PxgSoftBody& softbody = softbodies[softbodyId];
PxU8 * tetmeshGeomPtr = reinterpret_cast<PxU8 *>(softbody.mTetMeshData);
const uint4 nbVerts_nbTets_maxDepth_nbBv32TreeNodes = *reinterpret_cast<const uint4 *>(tetmeshGeomPtr);
tetmeshGeomPtr += sizeof(uint4);
if (threadIdx.x == 0)
{
s_warpScratch->tetmeshVerts = softbody.mPosition_InvMass;
s_warpScratch->tetmeshTetIndices = softbody.mTetIndices;
s_warpScratch->tetmeshSurfaceHint = softbody.mTetMeshSurfaceHint;
//const PxU32 & numVerts = nbVerts_nbTets_maxDepth_nbBv32TreeNodes.x;
//const PxU32 & numTets = nbVerts_nbTets_maxDepth_nbBv32TreeNodes.y;
//const PxU32 & maxDepth = nbVerts_nbTets_maxDepth_nbBv32TreeNodes.z;
const PxU32 & nbBv32PackedNodes = nbVerts_nbTets_maxDepth_nbBv32TreeNodes.w;
//printf("maxDepth %i numVerts %i numTets %i nbBv32TreeNodes %i\n", maxDepth, numVerts, numTets, nbBv32TreeNodes);
Gu::BV32DataPacked* bv32PackedNodes = reinterpret_cast<Gu::BV32DataPacked*>(tetmeshGeomPtr);
s_warpScratch->bv32PackedNodes = bv32PackedNodes;
tetmeshGeomPtr += sizeof(const Gu::BV32DataPacked)* nbBv32PackedNodes
+ sizeof(const Gu::BV32DataDepthInfo) * nbVerts_nbTets_maxDepth_nbBv32TreeNodes.z
+ sizeof(PxU32) * nbVerts_nbTets_maxDepth_nbBv32TreeNodes.w;
}
__syncwarp();
//ML: this is the world bound AABB
PxBounds3 aabb;
PxBounds3_ReadWarp(aabb, bounds + rigidCacheRef);
//PxReal contactDist = convexShape.contactOffset + trimeshShape.contactOffset;
const PxReal contactDist = contactDistance[softbodyCacheRef] + contactDistance[rigidCacheRef];
aabb.fattenFast(contactDist);
uint4* tStackPtr = reinterpret_cast<uint4*>(stackPtr);
const PxU32 stackSize = stackSizeBytes / sizeof(uint4);
bv32TreeTraversal<const SoftbodyTreeTraverser, WarpsPerBlock>(s_warpScratch->bv32PackedNodes, s_warpScratch->sBv32Nodes, SoftbodyTreeTraverser(s_warpScratch, aabb, stackSize, tStackPtr, midphasePairsNum, cmIdx));
/*if (threadIdx.x == 0 && nbPairsPerCM > 0)
printf("nbPairsPerCM %i\n", nbPairsPerCM);*/
}
}
extern "C" __global__
//__launch_bounds__(MIDPHASE_WARPS_PER_BLOCK * WARP_SIZE, 4)
void sb_midphaseGeneratePairsLaunch(
PxU32 numWorkItems,
const PxReal toleranceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxBounds3* PX_RESTRICT bounds,
const PxReal* PX_RESTRICT contactDistance,
const PxgShape* PX_RESTRICT gpuShapes,
const PxgSoftBody* PX_RESTRICT softbodies,
const PxU32 stackSizeBytes,
PxU8* PX_RESTRICT stackPtr, //output
PxU32* PX_RESTRICT midphasePairsNum //output
)
{
__shared__ PxU32 scratchMem[MIDPHASE_WARPS_PER_BLOCK][WARP_SIZE * 7];
sbMidphaseCore<MIDPHASE_WARPS_PER_BLOCK>(
numWorkItems,
toleranceLength,
cmInputs,
transformCache,
bounds,
contactDistance,
gpuShapes,
softbodies,
stackSizeBytes,
stackPtr,
midphasePairsNum,
(sbMidphaseScratch*)scratchMem[threadIdx.y]
);
}
//struct sbParticleMidphaseScratch
//{
// const float4 * PX_RESTRICT tetmeshVerts;
// const uint4 * PX_RESTRICT tetmeshTetIndices;
// const PxU8* PX_RESTRICT tetmeshSurfaceHint;
//
// const Gu::BV32DataDepthInfo* PX_RESTRICT bv32DepthInfo;
// const PxU32* PX_RESTRICT bv32RemapPackedNodeIndex;
// //bv32 tree
// Gu::BV32DataPacked* bv32PackedNodes;
// int nbPackedNodes;
// //statck for traversal
// int sBv32Nodes[192]; //6 depth of the bv32 tree
// PxU32 sBv32ActiveParticles[192];
//};
//PX_COMPILE_TIME_ASSERT(sizeof(sbParticleMidphaseScratch) <= WARP_SIZE * 15 * sizeof(PxU32));
struct SoftbodyBoxTraverser
{
femMidphaseScratch* PX_RESTRICT s_warpScratch;
const PxBounds3 mBox;
const PxU32 queryIndex;
const PxU32 cmIdx;
const PxU32 stackSize;
uint4* PX_RESTRICT stackPtr; //output
PxU32* midphasePairs; //output
PX_FORCE_INLINE __device__ SoftbodyBoxTraverser(
femMidphaseScratch* PX_RESTRICT s_warpScratch,
const PxVec3 boxMin,
const PxVec3 boxMax,
const PxU32 queryIndex,
const PxU32 cmIdx,
const PxU32 stackSize,
uint4* PX_RESTRICT stackPtr, //output
PxU32* midphasePairs) //output
: s_warpScratch(s_warpScratch), mBox(boxMin, boxMax), queryIndex(queryIndex), cmIdx(cmIdx),
stackSize(stackSize), stackPtr(stackPtr), midphasePairs(midphasePairs)
{ }
PX_FORCE_INLINE __device__ void intersectPrimitiveFullWarp(PxU32 primitiveIndex, PxU32 idxInWarp) const
{
bool intersect = false;
if (primitiveIndex != 0xFFFFFFFF)
{
uint4 tetIdx = s_warpScratch->meshVertsIndices[primitiveIndex];
const PxVec3 worldV0 = PxLoad3(s_warpScratch->meshVerts[tetIdx.x]);
const PxVec3 worldV1 = PxLoad3(s_warpScratch->meshVerts[tetIdx.y]);
const PxVec3 worldV2 = PxLoad3(s_warpScratch->meshVerts[tetIdx.z]);
const PxVec3 worldV3 = PxLoad3(s_warpScratch->meshVerts[tetIdx.w]);
if (isValidTet(worldV0, worldV1, worldV2, worldV3))
{
intersect = tetBoundingBox(worldV0, worldV1, worldV2, worldV3).intersects(mBox);
}
}
pushOntoStackFullWarp(intersect, midphasePairs, stackPtr, stackSize, make_uint4(cmIdx, queryIndex, primitiveIndex, 0));
}
PX_FORCE_INLINE __device__ bool intersectBoxFullWarp(bool hasBox, const PxVec3& min, const PxVec3& max) const
{
if (hasBox)
return mBox.intersects(PxBounds3(min, max));
else
return false;
}
};
extern "C" __global__
//__launch_bounds__(MIDPHASE_WARPS_PER_BLOCK * WARP_SIZE, 4)
void sb_psMidphaseGeneratePairsLaunch(
PxU32 numWorkItems,
const PxReal toleranceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxgShape* PX_RESTRICT gpuShapes,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxBounds3* PX_RESTRICT bounds,
const PxReal* PX_RESTRICT contactDistance,
const PxgSoftBody* PX_RESTRICT softbodies,
PxgParticleSystem* PX_RESTRICT particleSystems,
const PxU32 stackSizeBytes,
PxU8* PX_RESTRICT stackPtr, //output
PxU32* PX_RESTRICT midphasePairsNum //output
)
{
__shared__ __align__(16) PxU8 particleSystemMemory[sizeof(PxgParticleSystem)];
PxgParticleSystem& sParticleSystem = *(reinterpret_cast<PxgParticleSystem*>(particleSystemMemory));
__shared__ femMidphaseScratch scratchMem[MIDPHASE_WARPS_PER_BLOCK];
femMidphaseScratch* s_warpScratch = &scratchMem[threadIdx.y];
if (blockIdx.y < numWorkItems)
{
//const PxU32 globalThreadIdx = threadIdx.y * WARP_SIZE + threadIdx.x + blockIdx.x*blockDim.x*blockDim.y;
const PxU32 globalWarpIdx = threadIdx.y + blockIdx.x*blockDim.y;
PxgShape particleShape, softbodyShape;
PxU32 particleCacheRef, softbodyCacheRef;
LoadShapePairWarp<PxGeometryType::ePARTICLESYSTEM, PxGeometryType::eTETRAHEDRONMESH>(cmInputs, blockIdx.y, gpuShapes,
particleShape, particleCacheRef, softbodyShape, softbodyCacheRef);
const PxU32 particleSystemId = particleShape.particleOrSoftbodyId;
//each thread read 16 byte
uint4* srcParticleSystem = reinterpret_cast<uint4*>(&particleSystems[particleSystemId]);
uint4* dstParticleSystem = reinterpret_cast<uint4*>(&sParticleSystem);
warpCopy<uint4>(dstParticleSystem, srcParticleSystem, sizeof(PxgParticleSystem));
const PxU32 softbodyId = softbodyShape.particleOrSoftbodyId;
const PxgSoftBody& softbody = softbodies[softbodyId];
PxU8 * tetmeshGeomPtr = reinterpret_cast<PxU8 *>(softbody.mTetMeshData);
//const uint4 nbVerts_nbTets_maxDepth_nbBv32TreeNodes = *reinterpret_cast<const uint4 *>(tetmeshGeomPtr);
tetmeshGeomPtr += sizeof(uint4);
/*if (workIndex == 0 && threadIdx.x == 0)
printf("particleSystemId %i softbodyId %i\n", particleSystemId, softbodyId);*/
if (threadIdx.x == 0)
{
s_warpScratch->meshVerts = softbody.mPosition_InvMass;
s_warpScratch->meshVertsIndices = softbody.mTetIndices;
Gu::BV32DataPacked* bv32PackedNodes = reinterpret_cast<Gu::BV32DataPacked*>(tetmeshGeomPtr);
s_warpScratch->bv32PackedNodes = bv32PackedNodes;
}
__syncthreads();
PxReal cDistance = contactDistance[particleCacheRef] + contactDistance[softbodyCacheRef];
//const PxU32 threadBaseAddress = globalThreadIdx & (~31);
const PxU32 nbParticles = sParticleSystem.mCommonData.mNumParticles;
//const PxU32 NbThreads = blockDim.x*blockDim.y*gridDim.x;
const PxU32 NbWarps = blockDim.y*gridDim.x;
float4* sortedPose = reinterpret_cast<float4*>(sParticleSystem.mSortedPositions_InvMass);
uint4* tStackPtr = reinterpret_cast<uint4*>(stackPtr);
const PxU32 stackSize = stackSizeBytes / sizeof(uint4);
//for (PxU32 i = threadBaseAddress, particleIndex = globalThreadIdx; i < nbParticles; i+= NbThreads, particleIndex += NbThreads)
for (PxU32 i = globalWarpIdx; i < nbParticles; i += NbWarps)
{
float4 pos = make_float4(0.f);
//PxU32 particleIndex = 0xFFFFFFFF;
//if(threadIdx.x == 0)
PxU32 particleIndex = i;
pos = sortedPose[particleIndex];
/*if (particleIndex < nbParticles)
{
pos = sortedPose[particleIndex];
}*/
const PxVec3 particlePos(pos.x, pos.y, pos.z);
bv32TreeTraversal<const SoftbodyBoxTraverser, MIDPHASE_WARPS_PER_BLOCK>(s_warpScratch->bv32PackedNodes, s_warpScratch->sBv32Nodes, SoftbodyBoxTraverser(s_warpScratch, particlePos - PxVec3(cDistance), particlePos + PxVec3(cDistance), particleIndex,
blockIdx.y, stackSize, tStackPtr, midphasePairsNum));
}
}
}
template<unsigned int WarpsPerBlock>
__device__ static inline void sb_clothVertMidphaseCore(
const PxReal toleranceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxBounds3* PX_RESTRICT bounds,
const PxReal* PX_RESTRICT contactDistance,
const PxgShape* PX_RESTRICT gpuShapes,
const PxgSoftBody* PX_RESTRICT softbodies,
const PxgFEMCloth* PX_RESTRICT clothes,
const PxU32 stackSizeBytes,
const PxReal nbCollisionPairUpdatesPerTimestep,
PxU8* PX_RESTRICT stackPtr, //output
PxU32* PX_RESTRICT midphasePairsNum, //output
femMidphaseScratch* s_warpScratch
)
{
const PxU32 cmIdx = blockIdx.y;
// each block deals with one pair
{
const PxU32 globalWarpIdx = threadIdx.y + blockIdx.x*blockDim.y;
PxgShape softbodyShape, clothShape;
PxU32 softbodyCacheRef, clothCacheRef;
LoadShapePairWarp<PxGeometryType::eTETRAHEDRONMESH, PxGeometryType::eTRIANGLEMESH>(cmInputs, cmIdx, gpuShapes,
softbodyShape, softbodyCacheRef, clothShape, clothCacheRef);
const PxgSoftBody& softbody = softbodies[softbodyShape.particleOrSoftbodyId];
const PxgFEMCloth& cloth = clothes[clothShape.particleOrSoftbodyId];
PxU8 * tetmeshGeomPtr = reinterpret_cast<PxU8 *>(softbody.mTetMeshData);
tetmeshGeomPtr += sizeof(uint4);
if (threadIdx.x == 0)
{
s_warpScratch->meshVerts = softbody.mPosition_InvMass;
s_warpScratch->meshVertsIndices = softbody.mTetIndices;
Gu::BV32DataPacked* bv32PackedNodes = reinterpret_cast<Gu::BV32DataPacked*>(tetmeshGeomPtr);
s_warpScratch->bv32PackedNodes = bv32PackedNodes;
}
__syncthreads();
// if cloth collision pair is updated more than once per time step, early-out the influence of extended
// cloth bounding box in broad phase.
if (nbCollisionPairUpdatesPerTimestep > 1.f)
{
PxBounds3 clothBB, softbodyBB;
PxBounds3_ReadWarp(clothBB, bounds + clothCacheRef);
PxBounds3_ReadWarp(softbodyBB, bounds + softbodyCacheRef);
clothBB.minimum += (nbCollisionPairUpdatesPerTimestep - 1.f) * PxVec3(contactDistance[clothCacheRef]);
clothBB.maximum -= (nbCollisionPairUpdatesPerTimestep - 1.f) * PxVec3(contactDistance[clothCacheRef]);
if (!clothBB.intersects(softbodyBB)) return;
}
const PxReal cDistance = contactDistance[softbodyCacheRef] + contactDistance[clothCacheRef];
const PxU32 NbWarps = blockDim.y*gridDim.x;
const PxU32 nbVerts = cloth.mNbVerts;
const float4* positions = cloth.mPosition_InvMass;
uint4* tStackPtr = reinterpret_cast<uint4*>(stackPtr);
const PxU32 stackSize = stackSizeBytes / sizeof(uint4);
for (PxU32 i = globalWarpIdx; i < nbVerts; i += NbWarps)
{
const float4 tPos = positions[i];
const PxVec3 pos(tPos.x, tPos.y, tPos.z);
bv32TreeTraversal<const SoftbodyBoxTraverser, WarpsPerBlock>(s_warpScratch->bv32PackedNodes, s_warpScratch->sBv32Nodes, SoftbodyBoxTraverser(s_warpScratch, pos - PxVec3(cDistance), pos + PxVec3(cDistance), i,
blockIdx.y, stackSize, tStackPtr, midphasePairsNum));
}
}
}
extern "C" __global__
//__launch_bounds__(MIDPHASE_WARPS_PER_BLOCK * WARP_SIZE, 4)
void sb_clothVertMidphaseGeneratePairsLaunch(
const PxReal toleranceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxBounds3* PX_RESTRICT bounds,
const PxReal* PX_RESTRICT contactDistance,
const PxgShape* PX_RESTRICT gpuShapes,
const PxgSoftBody* PX_RESTRICT softbodies,
const PxgFEMCloth* PX_RESTRICT clothes,
const PxU32 stackSizeBytes,
const PxReal nbCollisionPairUpdatesPerTimestep,
PxU8* PX_RESTRICT stackPtr, //output
PxU32* PX_RESTRICT midphasePairsNum //output
)
{
__shared__ PxU32 scratchMem[MIDPHASE_WARPS_PER_BLOCK][WARP_SIZE * 18];
sb_clothVertMidphaseCore<MIDPHASE_WARPS_PER_BLOCK>(
toleranceLength,
cmInputs,
transformCache,
bounds,
contactDistance,
gpuShapes,
softbodies,
clothes,
stackSizeBytes,
nbCollisionPairUpdatesPerTimestep,
stackPtr,
midphasePairsNum,
(femMidphaseScratch*)scratchMem[threadIdx.y]
);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,840 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __SPHERE_COLLISION_CUH__
#define __SPHERE_COLLISION_CUH__
#include "foundation/PxVec3.h"
#include "foundation/PxTransform.h"
#include "distanceSegmentSegment.cuh"
//0 is sphere, 1 is sphere, point = [x, y, z, pen];
__device__ __forceinline__ static
PxU32 spheresphere(const PxTransform& transform0, const PxTransform& transform1, const PxReal r0,
const PxReal r1, const PxReal contactDist, float4& outPointPen, PxVec3& outNormal)
{
const PxVec3& p0 = transform0.p;
const PxVec3& p1 = transform1.p;
const PxVec3 _delta = p0 - p1;
const PxReal distanceSq = _delta.dot(_delta);
const PxReal radiusSum = r0 + r1;
const PxReal inflatedSum = radiusSum + contactDist;
if ((inflatedSum * inflatedSum) > distanceSq)
{
const PxReal eps = 1e-4f;
const PxReal dist = PxSqrt(distanceSq);
const PxVec3 normal = (eps >= dist) ? PxVec3(1.0f, 0.f, 0.f) : (_delta * (1.f / dist));
const PxVec3 point = p1 + (normal * r1);
const PxReal pen = dist - radiusSum;
outPointPen = make_float4(point.x, point.y, point.z, pen);
outNormal = normal;
return 1;
}
return 0;
}
__device__ __forceinline__ static
PxU32 spheresphere(const PxVec3& p0, const PxVec3& p1, const PxReal r0,
const PxReal r1, const PxReal contactDist, float4& outNormalPen)
{
const PxVec3 _delta = p0 - p1;
const PxReal distanceSq = _delta.dot(_delta);
const PxReal radiusSum = r0 + r1;
const PxReal inflatedSum = radiusSum + contactDist;
if ((inflatedSum * inflatedSum) > distanceSq)
{
const PxReal eps = 1e-4f;
const PxReal dist = PxSqrt(distanceSq);
const PxVec3 normal = (eps >= dist) ? PxVec3(1.0f, 0.f, 0.f) : (_delta * (1.f / dist));
const PxReal pen = dist - radiusSum;
outNormalPen = make_float4(normal.x, normal.y, normal.z, pen);
return 1;
}
return 0;
}
__device__ __forceinline__ static
PxReal distancePointSegmentSquared(const PxVec3 a, const PxVec3 b, const PxVec3 p, PxReal& param)
{
const PxVec3 ap = p - a;
const PxVec3 ab = b - a;
const PxReal nom = ap.dot(ab);
const PxReal denom = ab.dot(ab);
const PxReal tValue = PxClamp(nom / denom, 0.f, 1.f);
const PxReal t = denom > 0.f ? tValue : 0.f;
const PxVec3 v = ap - (ab * t);
param = t;
return v.dot(v);
}
//0 is sphere, 1 is capsule
__device__ __forceinline__ static
PxU32 spherecapsule(const PxTransform& transform0, const PxTransform& transform1, const PxReal sphereRadius,
const PxReal capsuleRadius, const PxReal halfHeight, const PxReal contactDist, float4& outPointPen, PxVec3& outNormal)
{
//Sphere in world space
const PxVec3& sphereCenter = transform0.p;
const PxQuat& q1 = transform1.q;
const PxVec3& p1 = transform1.p;
const PxVec3 basisVector0 = q1.getBasisVector0();
const PxVec3 tmp0 = basisVector0 * halfHeight;
const PxVec3 s = p1 + tmp0;
const PxVec3 e = p1 - tmp0;
const PxReal radiusSum = sphereRadius + capsuleRadius;
const PxReal inflatedSum = radiusSum + contactDist;
PxReal t;
const PxReal squareDist = distancePointSegmentSquared(s, e, sphereCenter, t);
const PxReal sqInflatedSum = inflatedSum * inflatedSum;
// Collision detection
if (sqInflatedSum > squareDist)
{
const PxVec3 p = s + (e - s) * t;//V3ScaleAdd(V3Sub(e, s), t, s);
const PxVec3 dir = sphereCenter - p;// V3Sub(sphereCenter, p);
const PxReal length = PxSqrt(dir.magnitudeSquared());
const PxVec3 normal = length > PX_EPS_REAL ? (dir * (1.f / length)) : PxVec3(1.0f, 0.f, 0.f);
const PxVec3 point = sphereCenter - normal * sphereRadius;
const PxReal pen = PxSqrt(squareDist) - radiusSum;
outPointPen = make_float4(point.x, point.y, point.z, pen);
outNormal = normal;
/*printf("outPoint(%f, %f, %f), pen %f\n", outPointPen.x, outPointPen.y, outPointPen.z, outPointPen.w);
printf("outNormal(%f, %f, %f)\n", outNormal.x, outNormal.y, outNormal.z, 0.f);*/
return 1;
}
return 0;
}
//0 is sphere, 1 is capsule
__device__ __forceinline__ static
PxU32 spherecapsule(const PxTransform& transform0, const PxTransform& transform1, const PxReal sphereRadius,
const PxReal capsuleRadius, const PxReal halfHeight, const PxReal contactDist, float4& outNormalPen, PxReal* outT = NULL)
{
//Sphere in world space
const PxVec3& sphereCenter = transform0.p;
const PxQuat& q1 = transform1.q;
const PxVec3& p1 = transform1.p;
const PxVec3 basisVector0 = q1.getBasisVector0();
const PxVec3 tmp0 = basisVector0 * halfHeight;
const PxVec3 s = p1 + tmp0;
const PxVec3 e = p1 - tmp0;
const PxReal radiusSum = sphereRadius + capsuleRadius;
const PxReal inflatedSum = radiusSum + contactDist;
PxReal t;
const PxReal squareDist = distancePointSegmentSquared(s, e, sphereCenter, t);
const PxReal sqInflatedSum = inflatedSum * inflatedSum;
if (outT != NULL)
{
*outT = 1.0f - t;
}
// Collision detection
if (sqInflatedSum > squareDist)
{
const PxVec3 p = s + (e - s) * t;//V3ScaleAdd(V3Sub(e, s), t, s);
const PxVec3 dir = sphereCenter - p;// V3Sub(sphereCenter, p);
const PxReal length = PxSqrt(dir.magnitudeSquared());
const PxVec3 normal = length > PX_EPS_REAL ? (dir * (1.f / length)) : PxVec3(1.0f, 0.f, 0.f);
const PxReal pen = PxSqrt(squareDist) - radiusSum;
outNormalPen = make_float4(normal.x, normal.y, normal.z, pen);
/*printf("outPoint(%f, %f, %f), pen %f\n", outPointPen.x, outPointPen.y, outPointPen.z, outPointPen.w);
printf("outNormal(%f, %f, %f)\n", outNormal.x, outNormal.y, outNormal.z, 0.f);*/
return 1;
}
return 0;
}
//0 is sphere, 1 is plane
__device__ __forceinline__ static
PxU32 sphereplane(const PxTransform& transform0, const PxTransform& transform1, const PxReal sphereRadius, const PxReal contactDist,
float4& outPointPen, PxVec3& outNormal)
{
//sphere transform
const PxVec3& p0 = transform0.p;
const PxVec3& p1 = transform1.p;
const PxQuat& q1 = transform1.q;
const PxTransform transf1(p1, q1);
//Sphere in plane space
const PxVec3 sphereCenterInPlaneSpace = transf1.transformInv(p0);
//separation / penetration
const PxReal separation = sphereCenterInPlaneSpace.x - sphereRadius;// FSub(V3GetX(sphereCenterInPlaneSpace), radius);
if (contactDist >= separation)
{
//get the plane normal
const PxVec3 worldNormal = q1.getBasisVector0();
const PxVec3 worldPoint = p0 - worldNormal * sphereRadius;//V3NegScaleSub(worldNormal, radius, p0);
outPointPen = make_float4(worldPoint.x, worldPoint.y, worldPoint.z, separation);
outNormal = worldNormal;
return 1;
}
return 0;
}
//0 is sphere, 1 is plane
__device__ __forceinline__ static
PxU32 sphereplane(const PxTransform& transform0, const PxTransform& transform1, const PxReal sphereRadius, const PxReal contactDist,
float4& outNormalPen)
{
//sphere transform
const PxVec3& p0 = transform0.p;
const PxVec3& p1 = transform1.p;
const PxQuat& q1 = transform1.q;
const PxTransform transf1(p1, q1);
//Sphere in plane space
const PxVec3 sphereCenterInPlaneSpace = transf1.transformInv(p0);
//separation / penetration
const PxReal separation = sphereCenterInPlaneSpace.x - sphereRadius;// FSub(V3GetX(sphereCenterInPlaneSpace), radius);
if (contactDist >= separation)
{
//get the plane normal
const PxVec3 worldNormal = q1.getBasisVector0();
outNormalPen = make_float4(worldNormal.x, worldNormal.y, worldNormal.z, separation);
return 1;
}
return 0;
}
//0 is sphere, 1 is box
__device__ __forceinline__ static
PxU32 spherebox(const PxTransform& transform0, const PxTransform& transform1, const PxReal sphereRadius,
const PxVec3 boxExtents, const PxReal contactDist, float4& outPointPen, PxVec3& outNormal)
{
const PxVec3& sphereOrigin = transform0.p;
//const PxQuat& q1 = transform1.q;
//const PxVec3& p1 = transform1.p;
//const PxTransform transf1(p1, q1);
//translate sphere center into the box space
const PxVec3 sphereCenter = transform1.transformInv(sphereOrigin);
const PxVec3 nBoxExtents = -boxExtents;
const PxReal inflatedSum = sphereRadius + contactDist;
const PxReal sqInflatedSum = inflatedSum * inflatedSum;
const PxReal x = PxClamp(sphereCenter.x, nBoxExtents.x, boxExtents.x);
const PxReal y = PxClamp(sphereCenter.y, nBoxExtents.y, boxExtents.y);
const PxReal z = PxClamp(sphereCenter.z, nBoxExtents.z, boxExtents.z);
const PxVec3 p(x, y, z);
const PxVec3 v = sphereCenter - p;
const PxReal lengthSq = v.dot(v);
if (sqInflatedSum > lengthSq)//intersect
{
//check whether the spherCenter is inside the box
const bool bX = boxExtents.x >= PxAbs(sphereCenter.x);
const bool bY = boxExtents.y >= PxAbs(sphereCenter.y);
const bool bZ = boxExtents.z >= PxAbs(sphereCenter.z);
const bool bInsideBox = bX && bY && bZ;// V3IsGrtrOrEq(boxExtents, V3Abs(sphereCenter));
if (bInsideBox)//sphere center inside the box
{
//Pick directions and sign
const PxVec3 absP(PxAbs(p.x), PxAbs(p.y), PxAbs(p.z));
const PxVec3 distToSurface = boxExtents - absP;//dist from embedded center to box surface along 3 dimensions.
//assume z is the smallest elemtn of the distToSurface
PxVec3 localNorm = p.z >= 0.f ? PxVec3(0.f, 0.f, 1.f) : PxVec3(0.f, 0.f, -1.f);
PxReal dist = -distToSurface.z;
//find smallest element of distToSurface
if (distToSurface.x <= distToSurface.y && distToSurface.x <= distToSurface.z)
{
localNorm = p.x >= 0.f ? PxVec3(1.f, 0.f, 0.f) : PxVec3(-1.0f, 0.f, 0.f);
dist = -distToSurface.x;
}
else if (distToSurface.y <= distToSurface.z)
{
localNorm = p.y >= 0.f ? PxVec3(0.f, 1.f, 0.f) : PxVec3(0.f, -1.f, 0.f);
dist = -distToSurface.y;
}
/* printf("p(%f, %f, %f)\n", p.x, p.y, p.z);
printf("distToSurface(%f, %f, %f)\n", distToSurface.x, distToSurface.y, distToSurface.z);
printf("dist %f\n", dist);*/
//separation so far is just the embedding of the center point; we still have to push out all of the radius.
const PxVec3 normal = transform1.rotate(localNorm);
const PxReal penetration = dist - sphereRadius;
const PxVec3 point = sphereOrigin - normal * dist;
outPointPen = make_float4(point.x, point.y, point.z, penetration);
outNormal = normal;
//printf("Inside the box\n");
/*printf("pentration %f\n", penetration);
printf("tP(%f, %f, %f), tQ(%f, %f, %f, %f)\n", transf1.p.x, transf1.p.y, transf1.p.z, transf1.q.x, transf1.q.y, transf1.q.z, transf1.q.w);
printf("outPoint(%f, %f, %f), pen %f\n", outPointPen.x, outPointPen.y, outPointPen.z, outPointPen.w);
printf("outNormal(%f, %f, %f)\n", outNormal.x, outNormal.y, outNormal.z, 0.f);*/
}
else
{
//get the closest point from the center to the box surface
const PxReal recipLength = PxRecipSqrt(lengthSq);
const PxReal length = 1 / recipLength;
const PxVec3 locNorm = v * recipLength;
const PxReal penetration = length - sphereRadius;
const PxVec3 normal = transform1.rotate(locNorm);
const PxVec3 point = transform1.transform(p);
outPointPen = make_float4(point.x, point.y, point.z, penetration);
outNormal = normal;
/*printf("recipLength %f, length %f \n", recipLength, length);
printf("locNorm(%f, %f, %f)\n", locNorm.x, locNorm.y, locNorm.z);
printf("pentration %f\n", penetration);
printf("tP(%f, %f, %f), tQ(%f, %f, %f, %f)\n", transf1.p.x, transf1.p.y, transf1.p.z, transf1.q.x, transf1.q.y, transf1.q.z, transf1.q.w);
printf("outPoint(%f, %f, %f), pen %f\n", outPointPen.x, outPointPen.y, outPointPen.z, outPointPen.w);
printf("outNormal(%f, %f, %f)\n", outNormal.x, outNormal.y, outNormal.z, 0.f);*/
}
return 1;
}
//printf("Not intersect! \n");
return 0;
}
//0 is sphere, 1 is box
__device__ __forceinline__ static
PxU32 spherebox(const PxVec3& sphereOrigin, const PxTransform& transform1, const PxReal sphereRadius,
const PxVec3 boxExtents, const PxReal contactDist, float4& outNormalPen)
{
/*const PxVec3& sphereOrigin = transform0.p;
const PxQuat& q1 = transform1.q;
const PxVec3& p1 = transform1.p;
const PxTransform transf1(p1, q1);*/
//translate sphere center into the box space
const PxVec3 sphereCenter = transform1.transformInv(sphereOrigin);
const PxVec3 nBoxExtents = -boxExtents;
const PxReal inflatedSum = sphereRadius + contactDist;
const PxReal sqInflatedSum = inflatedSum * inflatedSum;
const PxReal x = PxClamp(sphereCenter.x, nBoxExtents.x, boxExtents.x);
const PxReal y = PxClamp(sphereCenter.y, nBoxExtents.y, boxExtents.y);
const PxReal z = PxClamp(sphereCenter.z, nBoxExtents.z, boxExtents.z);
const PxVec3 p(x, y, z);
const PxVec3 v = sphereCenter - p;
const PxReal lengthSq = v.dot(v);
if (sqInflatedSum > lengthSq)//intersect
{
//check whether the spherCenter is inside the box
const bool bX = boxExtents.x >= PxAbs(sphereCenter.x);
const bool bY = boxExtents.y >= PxAbs(sphereCenter.y);
const bool bZ = boxExtents.z >= PxAbs(sphereCenter.z);
const bool bInsideBox = bX && bY && bZ;// V3IsGrtrOrEq(boxExtents, V3Abs(sphereCenter));
if (bInsideBox)//sphere center inside the box
{
//Pick directions and sign
const PxVec3 absP(PxAbs(p.x), PxAbs(p.y), PxAbs(p.z));
const PxVec3 distToSurface = boxExtents - absP;//dist from embedded center to box surface along 3 dimensions.
//assume z is the smallest elemtn of the distToSurface
PxVec3 localNorm = p.z >= 0.f ? PxVec3(0.f, 0.f, 1.f) : PxVec3(0.f, 0.f, -1.f);
PxReal dist = -distToSurface.z;
//find smallest element of distToSurface
if (distToSurface.x <= distToSurface.y && distToSurface.x <= distToSurface.z)
{
localNorm = p.x >= 0.f ? PxVec3(1.f, 0.f, 0.f) : PxVec3(-1.0f, 0.f, 0.f);
dist = -distToSurface.x;
}
else if (distToSurface.y <= distToSurface.z)
{
localNorm = p.y >= 0.f ? PxVec3(0.f, 1.f, 0.f) : PxVec3(0.f, -1.f, 0.f);
dist = -distToSurface.y;
}
//separation so far is just the embedding of the center point; we still have to push out all of the radius.
const PxVec3 normal = transform1.rotate(localNorm);
const PxReal penetration = dist - sphereRadius;
outNormalPen = make_float4(normal.x, normal.y, normal.z, penetration);
}
else
{
//get the closest point from the center to the box surface
const PxReal recipLength = PxRecipSqrt(lengthSq);
const PxReal length = 1 / recipLength;
const PxVec3 locNorm = v * recipLength;
const PxReal penetration = length - sphereRadius;
const PxVec3 normal = transform1.rotate(locNorm);
outNormalPen = make_float4(normal.x, normal.y, normal.z, penetration);
}
return 1;
}
return 0;
}
//0 is sphere, 1 is box(sphere is in the local space of box), normal is the in the local space of box
__device__ __forceinline__ static
PxU32 spherebox(const PxVec3& sphereCenter, const PxReal sphereRadius,
const PxVec3 boxExtents, const PxReal contactDist, float4& outNormalPen)
{
const PxVec3 nBoxExtents = -boxExtents;
const PxReal inflatedSum = sphereRadius + contactDist;
const PxReal sqInflatedSum = inflatedSum * inflatedSum;
const PxReal x = PxClamp(sphereCenter.x, nBoxExtents.x, boxExtents.x);
const PxReal y = PxClamp(sphereCenter.y, nBoxExtents.y, boxExtents.y);
const PxReal z = PxClamp(sphereCenter.z, nBoxExtents.z, boxExtents.z);
const PxVec3 p(x, y, z);
const PxVec3 v = sphereCenter - p;
const PxReal lengthSq = v.dot(v);
if (sqInflatedSum > lengthSq)//intersect
{
//check whether the spherCenter is inside the box
const bool bX = boxExtents.x >= PxAbs(sphereCenter.x);
const bool bY = boxExtents.y >= PxAbs(sphereCenter.y);
const bool bZ = boxExtents.z >= PxAbs(sphereCenter.z);
const bool bInsideBox = bX && bY && bZ;// V3IsGrtrOrEq(boxExtents, V3Abs(sphereCenter));
if (bInsideBox)//sphere center inside the box
{
//Pick directions and sign
const PxVec3 absP(PxAbs(p.x), PxAbs(p.y), PxAbs(p.z));
const PxVec3 distToSurface = boxExtents - absP;//dist from embedded center to box surface along 3 dimensions.
//assume z is the smallest elemtn of the distToSurface
PxVec3 localNorm = p.z >= 0.f ? PxVec3(0.f, 0.f, 1.f) : PxVec3(0.f, 0.f, -1.f);
PxReal dist = -distToSurface.z;
//find smallest element of distToSurface
if (distToSurface.x <= distToSurface.y && distToSurface.x <= distToSurface.z)
{
localNorm = p.x >= 0.f ? PxVec3(1.f, 0.f, 0.f) : PxVec3(-1.0f, 0.f, 0.f);
dist = -distToSurface.x;
}
else if (distToSurface.y <= distToSurface.z)
{
localNorm = p.y >= 0.f ? PxVec3(0.f, 1.f, 0.f) : PxVec3(0.f, -1.f, 0.f);
dist = -distToSurface.y;
}
//separation so far is just the embedding of the center point; we still have to push out all of the radius.
const PxReal penetration = dist - sphereRadius;
outNormalPen = make_float4(localNorm.x, localNorm.y, localNorm.z, penetration);
}
else
{
//get the closest point from the center to the box surface
const PxReal recipLength = PxRecipSqrt(lengthSq);
const PxReal length = 1 / recipLength;
const PxVec3 locNorm = v * recipLength;
const PxReal penetration = length - sphereRadius;
outNormalPen = make_float4(locNorm.x, locNorm.y, locNorm.z, penetration);
}
return 1;
}
return 0;
}
//0 is plane, 1 is capsule
__device__ __forceinline__ static
PxU32 planeCapsule(const PxTransform& transform0, const PxTransform& transform1,
const PxReal radius, const PxReal halfHeight, const PxReal contactDist,
float4 *outPointPen, PxVec3& outNormal, PxReal* t = NULL)
{
//capsule to plane
const PxTransform bToA = transform0.transformInv(transform1);
//in world space
const PxVec3 planeNormal = transform0.q.getBasisVector0().getNormalized();
//const PxVec3 contactNormal =-planeNormal;
outNormal = -planeNormal;
//unit x
const PxVec3 localNormal(1.f, 0.f, 0.f);
//printf("outNormal (%f, %f, %f)\n", outNormal.x, outNormal.y, outNormal.z);
//capsule is in the local space of plane(n = (1.f, 0.f, 0.f), d=0.f)
const PxVec3 basisVector = bToA.q.getBasisVector0();
const PxVec3 tmp = basisVector * halfHeight;
const PxVec3 s = bToA.p + tmp;
const PxVec3 e = bToA.p - tmp;
const PxReal inflatedRadius = radius + contactDist;
PxU32 numContacts = 0;
const PxReal signDist0 = s.x;//V3Dot(localNormal, s);
if (inflatedRadius >= signDist0)
{
const PxVec3 worldPoint = transform0.transform(s) - planeNormal * radius;
const PxReal pen = signDist0 - radius;
//printf("s localPoint(%f, %f, %f) pen %f\n", localPoint.x, localPoint.y, localPoint.z, pen);
outPointPen[numContacts] = make_float4(worldPoint.x, worldPoint.y, worldPoint.z, pen);
numContacts++;
}
const PxReal signDist1 = e.x;//V3Dot(localNormal, s);
if (inflatedRadius >= signDist1)
{
const PxVec3 worldPoint = transform0.transform(e) - planeNormal * radius;
const PxReal pen = signDist1 - radius;
//printf("e worldPoint(%f, %f, %f) pen %f\n", worldPoint.x, worldPoint.y, worldPoint.z, pen);
//printf("e normal(%f, %f, %f)\n", outNormal.x, outNormal.y, outNormal.z);
outPointPen[numContacts] = make_float4(worldPoint.x, worldPoint.y, worldPoint.z, pen);
numContacts++;
}
if (t != NULL)
{
if (contactDist > 0.0f)
{
// *t = PxClamp(0.5f * (signDist1 - signDist0) / contactDist + 0.5f, 0.0f, 1.0f);
*t = 0.5f + 0.5f * tanh(5.0f * (signDist1 - signDist0) / contactDist);
}
else
{
*t = signDist0 > signDist1 ? 0.0f : 1.0f;
}
}
return numContacts;
}
__device__ __forceinline__ static
void pcmDistancePointSegmentTValue22(
const PxVec3& a0, const PxVec3& b0,
const PxVec3& a1, const PxVec3& b1,
const PxVec3& p0, const PxVec3& p1,
const PxVec3& p2, const PxVec3& p3,
PxReal& t0, PxReal& t1,
PxReal& t2, PxReal& t3)
{
const PxVec3 ap00 = p0 - a0;
const PxVec3 ap10 = p1 - a0;
const PxVec3 ap01 = p2 - a1;
const PxVec3 ap11 = p3 - a1;
const PxVec3 ab0 = b0 - a0;
const PxVec3 ab1 = b1 - a1;
/* const FloatV nom00 = V3Dot(ap00, ab0);
const FloatV nom10 = V3Dot(ap10, ab0);
const FloatV nom01 = V3Dot(ap01, ab1);
const FloatV nom11 = V3Dot(ap11, ab1);*/
const PxReal nom00 = ap00.dot(ab0);
const PxReal nom10 = ap10.dot(ab0);
const PxReal nom01 = ap01.dot(ab1);
const PxReal nom11 = ap11.dot(ab1);
/*const Vec4V combinedDot = V3Dot4(ap00, ab0, ap10, ab0, ap01, ab1, ap11, ab1);
const FloatV nom00 = V4GetX(combinedDot);
const FloatV nom10 = V4GetY(combinedDot);
const FloatV nom01 = V4GetZ(combinedDot);
const FloatV nom11 = V4GetW(combinedDot);*/
const PxReal denom0 = ab0.dot(ab0);
const PxReal denom1 = ab1.dot(ab1);
/*const FloatV denom0 = V3Dot(ab0, ab0);
const FloatV denom1 = V3Dot(ab1, ab1);*/
const PxReal eps = 1e-6f;
t0 = (PxAbs(denom0) < eps) ? 0.f : (nom00 / denom0);
t1 = (PxAbs(denom0) < eps) ? 0.f : (nom10 / denom0);
t2 = (PxAbs(denom1) < eps) ? 0.f : (nom01 / denom1);
t3 = (PxAbs(denom1) < eps) ? 0.f : (nom11 / denom1);
/*const Vec4V nom = V4Merge(nom00, nom10, nom01, nom11);
const Vec4V denom = V4Merge(denom0, denom0, denom1, denom1);
const Vec4V tValue = V4Div(nom, denom);
return V4Sel(V4IsEq(denom, zero), zero, tValue);*/
}
__device__ __forceinline__ static
PxU32 capsuleCapsule(const PxTransform& transform0, const PxTransform& transform1,
const PxReal radius0, const PxReal halfHeight0, const PxReal radius1, const PxReal halfHeight1,
const PxReal contactDist,
float4 *outPointPen, PxVec3& outNormal, PxReal* outT0 = NULL, PxReal* outT1 = NULL)
{
PxU32 numContacts = 0;
const PxVec3 positionOffset = (transform0.p + transform1.p) * 0.5f;
const PxVec3 p0 = transform0.p - positionOffset;
const PxVec3 p1 = transform1.p - positionOffset;
const PxVec3 basisVector0 = transform0.q.getBasisVector0();
const PxVec3 tmp0 = basisVector0 * halfHeight0;
const PxVec3 s0 = p0 + tmp0;
const PxVec3 e0 = p0 - tmp0;
const PxVec3 d0 = e0 - s0;
const PxVec3 basisVector1 = transform1.q.getBasisVector0();
const PxVec3 tmp1 = basisVector1 * halfHeight1;
const PxVec3 s1 = p1 + tmp1;
const PxVec3 e1 = p1 - tmp1;
const PxVec3 d1 = e1 - s1;
const PxReal sumRadius = radius0 + radius1; //FAdd(r0, r1);
const PxReal inflatedSum = sumRadius + contactDist;
const PxReal inflatedSumSquared = inflatedSum * inflatedSum;
const PxReal a = d0.dot(d0);//squared length of segment1
const PxReal e = d1.dot(d1);//squared length of segment2
const PxReal eps = 1e-6f;//FEps();
PxReal t0, t1;
distanceSegmentSegmentSquared(s0, d0, s1, d1, t0, t1);
if (outT0 != NULL)
{
*outT0 = (1.0f - t0);
}
if (outT1 != NULL)
{
*outT1 = (1.0f - t1);
}
const PxVec3 closestA = s0 + d0 * t0;
const PxVec3 closestB = s1 + d1 * t1;
const PxVec3 vv = closestA - closestB;
const PxReal sqDist0 = vv.dot(vv);
if (eps > sqDist0)
{
const PxVec3 _normal = transform0.q.rotateInv(PxVec3(0.f, 1.f, 0.f));
const PxVec3 normal = _normal.getNormalized();
const PxVec3 _point = closestA - normal * radius0;
const PxVec3 p = _point + positionOffset;
const PxReal pen = -radius0;
outPointPen[0] = make_float4(p.x, p.y, p.z, pen);
//printf("This is two segment inline with each other");
outNormal = normal;
numContacts = 1;
}
else
{
if (inflatedSumSquared >= sqDist0)
{
//check to see whether these two capsule are paralle
const PxReal parallelTolerance = 0.9998f;
const PxVec3 dir0 = (eps > a) ? PxVec3(0.f) : (d0 * (1.f / PxSqrt(a)));
const PxVec3 dir1 = (eps > e) ? PxVec3(0.f) : (d1 * (1.f / PxSqrt(e)));
const PxReal cos = PxAbs(dir0.dot(dir1));
if (cos > parallelTolerance)//paralle
{
//tV0 is s1 project to s0e0
//tV1 is e1 project to s0e0
//tV2 is s0 project to s1e1
//tV3 is e0 project to s1e1
PxReal tV0, tV1, tV2, tV3;
pcmDistancePointSegmentTValue22(s0, e0, s1, e1,
s1, e1, s0, e0, tV0, tV1, tV2, tV3);
//printf("t(%f %f %f %f)\n", tV0, tV1, tV2, tV3);
if (tV0 >= 0.f && tV0 <= 1.f)
{
const PxVec3 projS1 = s0 + d0 * tV0;
const PxVec3 v = projS1 - s1;
const PxReal sqDist = v.dot(v);
const PxReal dist = PxSqrt(sqDist);
const PxReal pen = dist - sumRadius;
const PxVec3 normal = v / dist;// V3ScaleInv(v, dist);
//assert(isFiniteVec3V(normal));
const PxVec3 _p = projS1 - normal * radius0;// V3NegScaleSub(normal, r0, projS1);
const PxVec3 p = _p + positionOffset;// V3Add(_p, positionOffset);
outPointPen[numContacts] = make_float4(p.x, p.y, p.z, pen);
outNormal = normal;
//printf("_p(%f, %f, %f)\n", _p.x, _p.y, _p.z);
//printf("p(%f, %f, %f pen %f)\n", p.x, p.y, p.z, pen);
//printf("normal(%f, %f, %f)\n", normal.x, normal.y, normal.z);
//storeContact(p, normal, pen, contactBuffer);
numContacts++;
}
if (tV1 >= 0.f && tV1 <= 1.f)
{
const PxVec3 projE1 = s0 + d0 * tV1;
const PxVec3 v = projE1 - e1;
const PxReal sqDist = v.dot(v);
const PxReal dist = PxSqrt(sqDist);
const PxReal pen = dist - sumRadius;
const PxVec3 normal = v / dist; // V3ScaleInv(v, dist);
//assert(isFiniteVec3V(normal));
const PxVec3 _p = projE1 - normal * radius0;// V3NegScaleSub(normal, r0, projE1);
const PxVec3 p = _p + positionOffset;
//storeContact(p, normal, pen, contactBuffer);
outPointPen[numContacts] = make_float4(p.x, p.y, p.z, pen);
outNormal = normal;
//printf("p(%f, %f, %f pen %f)\n", p.x, p.y, p.z, pen);
//printf("normal(%f, %f, %f)\n", normal.x, normal.y, normal.z);
numContacts++;
}
if (numContacts < 2 && (tV2 >= 0.f && tV2 <= 1.f))
{
const PxVec3 projS0 = s1 + d1 * tV2;
const PxVec3 v = s0 - projS0;
const PxReal sqDist = v.dot(v);
const PxReal dist = PxSqrt(sqDist);
const PxReal pen = dist - sumRadius;
const PxVec3 normal = v / dist;// V3ScaleInv(v, dist);
//assert(isFiniteVec3V(normal));
const PxVec3 _p = s0 - normal * radius0;// V3NegScaleSub(normal, r0, s0);
const PxVec3 p = _p + positionOffset;
outPointPen[numContacts] = make_float4(p.x, p.y, p.z, pen);
outNormal = normal;
//printf("p(%f, %f, %f pen %f)\n", p.x, p.y, p.z, pen);
//printf("normal(%f, %f, %f)\n", normal.x, normal.y, normal.z);
numContacts++;
}
if (numContacts < 2 && (tV3 >= 0.f && tV3 <= 1.f))
{
const PxVec3 projE0 = s1 + d1 * tV3;// V3ScaleAdd(d1, V4GetW(t), s1);
const PxVec3 v = e0 - projE0;// V3Sub(e0, projE0);
const PxReal sqDist = v.dot(v);// V3Dot(v, v);
const PxReal dist = PxSqrt(sqDist);
const PxReal pen = dist - sumRadius;
const PxVec3 normal = v / dist;
//assert(isFiniteVec3V(normal));
const PxVec3 _p = e0 - normal * radius0;// V3NegScaleSub(normal, r0, e0);
const PxVec3 p = _p + positionOffset;// V3Add(_p, positionOffset);
outPointPen[numContacts] = make_float4(p.x, p.y, p.z, pen);
outNormal = normal;
//printf("p(%f, %f, %f pen %f)\n", p.x, p.y, p.z, pen);
//printf("normal(%f, %f, %f)\n", normal.x, normal.y, normal.z);
numContacts++;
}
}
if(numContacts == 0)
{
const PxVec3 normal = vv.getNormalized();
const PxVec3 _point = closestA - normal * radius0;
const PxVec3 p = _point + positionOffset;
const PxReal dist = PxSqrt(sqDist0);
const PxReal pen = dist - sumRadius;
outPointPen[0] = make_float4(p.x, p.y, p.z, pen);
outNormal = normal;
//printf("one point p(%f, %f, %f pen %f)\n", p.x, p.y, p.z, pen);
//printf("one point normal(%f, %f, %f)\n", normal.x, normal.y, normal.z);
numContacts = 1;
}
}
}
return numContacts;
}
#endif

View File

@@ -0,0 +1,163 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __SPHERE_TRIANGLE_CUH__
#define __SPHERE_TRIANGLE_CUH__
//#include "shuffle.cuh"
#include "nputils.cuh"
#include "dataReadWriteHelper.cuh"
#include "convexNpCommon.h"
#include "cudaNpCommon.h"
#include "triangle.cuh"
#include "stdio.h"
__device__ static PxReal distancePointTriangleSquared(const PxVec3 p,
const PxVec3 a,
const PxVec3 b,
const PxVec3 c,
const uint4 adjIdxs,
PxVec3& closestP,
bool& generateContact,
bool& faceContact,
PxU32* mask=NULL)
{
faceContact = false;
// Check if P in vertex region outside A
const PxVec3 ab = b - a;
const PxVec3 ac = c - a;
const PxVec3 ap = p - a;
const PxReal d1 = ab.dot(ap);
const PxReal d2 = ac.dot(ap);
if (d1 < 0.0f && d2 < 0.0f)
{
if(mask)
*mask = PxU32(ConvexTriIntermediateData::eV0);
generateContact = (!isEdgeNonconvex(adjIdxs.x)) || (!isEdgeNonconvex(adjIdxs.z));
closestP = a;
return ap.magnitudeSquared(); // Barycentric coords 1,0,0
}
// Check if P in vertex region outside B
const PxVec3 bp = p - b;
const PxReal d3 = ab.dot(bp);
const PxReal d4 = ac.dot(bp);
if (d3 > 0.0f && d4 < d3)
{
if(mask)
*mask = PxU32(ConvexTriIntermediateData::eV1);
generateContact = (!isEdgeNonconvex(adjIdxs.x)) || (!isEdgeNonconvex(adjIdxs.y));
closestP = b;
return bp.magnitudeSquared(); // Barycentric coords 0,1,0
}
// Check if P in vertex region outside C
const PxVec3 cp = p - c;
const PxReal d5 = ab.dot(cp);
const PxReal d6 = ac.dot(cp);
if (d6 > 0.0f && d5 < d6)
{
if (mask)
*mask = PxU32(ConvexTriIntermediateData::eV2);
generateContact = (!isEdgeNonconvex(adjIdxs.y)) || (!isEdgeNonconvex(adjIdxs.z));
closestP = c;
return cp.magnitudeSquared(); // Barycentric coords 0,0,1
}
PxReal vc = d1 * d4 - d3 * d2;
PxReal vb = d5 * d2 - d1 * d6;
PxReal va = d3 * d6 - d5 * d4;
const float edgeTol = (va + vb + vc) * 1e-5;
// Check if P in edge region of AB, if so return projection of P onto AB
if (vc < 0.0f && d1 >= 0.0f && d3 <= 0.0f)
{
if(mask)
*mask = PxU32(ConvexTriIntermediateData::eE01);
generateContact = !isEdgeNonconvex(adjIdxs.x) || vc > -edgeTol;
const PxReal v = d1 / (d1 - d3);
closestP = a + v * ab; // barycentric coords (1-v, v, 0)
return (closestP - p).magnitudeSquared();
}
// Check if P in edge region of AC, if so return projection of P onto AC
if (vb < 0.0f && d2 >= 0.0f && d6 <= 0.0f)
{
if(mask)
*mask = PxU32(ConvexTriIntermediateData::eE02);
generateContact = !isEdgeNonconvex(adjIdxs.z) || vb > -edgeTol;
const PxReal w = d2 / (d2 - d6);
closestP = a + w * ac; // barycentric coords (1-w, 0, w)
return (closestP - p).magnitudeSquared();
}
// Check if P in edge region of BC, if so return projection of P onto BC
if (va < 0.0f && d4 >= d3 && d5 >= d6)
{
if(mask)
*mask = PxU32(ConvexTriIntermediateData::eE12);
generateContact = !isEdgeNonconvex(adjIdxs.y) || va > -edgeTol;
const PxReal w = (d4 - d3) / ((d4 - d3) + (d5 - d6));
closestP = b + w * (c - b); // barycentric coords (0, 1-w, w)
return (closestP - p).magnitudeSquared();
}
generateContact = true;
faceContact = true;
if(mask)
*mask = 0;
// P inside face region. Compute Q through its barycentric coords (u,v,w)
closestP = a + (ab * vb + ac * vc)/ (va + vb + vc);
return (closestP - p).magnitudeSquared();
}
__device__ inline PxReal scalarTriple(const PxVec3& a, const PxVec3& b, const PxVec3& c)
{
return a.cross(b).dot(c);
}
// intersects a line (through points p and q, against a triangle a, b, c - mostly taken from Real Time Collision Detection - p186
__device__ inline bool intersectLineTri(const PxVec3& p, const PxVec3& q, const PxVec3& a, const PxVec3& b, const PxVec3& c)
{
const PxVec3 pq = q - p;
const PxVec3 pa = a - p;
const PxVec3 pb = b - p;
const PxVec3 pc = c - p;
PxVec3 m = pq.cross(pc);
PxReal u = pb.dot(m);
PxReal v = -pa.dot(m);
PxReal w = scalarTriple(pq, pb, pa);
return u >= 0.f && v >= 0.f && w >= 0.f;
}
#endif

View File

@@ -0,0 +1,43 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __TRIANGLE_COMMON_CUH__
#define __TRIANGLE_COMMON_CUH__
#define TRI_FEATURE_IDX 0x7FffFFff
#define EDGE_FEATURE_IDX 0x80000000
static const physx::PxU32 BOUNDARY = 0xffffffff;
static const physx::PxU32 NONCONVEX_FLAG = 0x80000000;
__device__ static bool isEdgeNonconvex(physx::PxU32 adjEdgeIndex)
{
return (adjEdgeIndex != BOUNDARY) && (adjEdgeIndex & NONCONVEX_FLAG);
}
#endif

View File

@@ -0,0 +1,198 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __TRIANGLE_MESH_CUH__
#define __TRIANGLE_MESH_CUH__
#include "GuBV32.h"
#include "PxgConvexConvexShape.h"
#include "foundation/PxVec3.h"
namespace physx
{
struct PxgTriangleMesh
{
PxU32 numVerts;
PxU32 numTris;
PxU32 numBv32TreeNodes;
PxU32 nbPackedNodes;
const Gu::BV32DataPacked* bv32PackedNodes;
const float4* trimeshVerts;
const uint4* indices;
const PxU32* PX_RESTRICT trimeshFaceRemap;
uint4 sdfDims;
PxVec3 meshLower;
PxReal spacing;
PxU32 subgridSize;
PxReal subgridsMinSdfValue;
PxReal subgridsMaxSdfValue;
PxU32 pad;
//const PxReal* sdf;
CUtexObject mTexObject;
CUtexObject mTexObjectSparse;
const PxU32* subgridStarts;
};
static PX_FORCE_INLINE __device__ uint4 readTriangleMesh(const PxU8* PX_RESTRICT & trimeshGeomPtr,
const Gu::BV32DataPacked* PX_RESTRICT & bv32PackedNodes, const float4* PX_RESTRICT & trimeshVerts)
{
const uint4 trimesh_nbVerts_nbTris_nbAdjVertsTotal = *reinterpret_cast<const uint4 *>(trimeshGeomPtr);
trimeshGeomPtr += sizeof(uint4);
bv32PackedNodes = reinterpret_cast<const Gu::BV32DataPacked*>(trimeshGeomPtr);
trimeshGeomPtr += sizeof(const Gu::BV32DataPacked)* trimesh_nbVerts_nbTris_nbAdjVertsTotal.w;
trimeshVerts = reinterpret_cast<const float4 *>(trimeshGeomPtr);
return trimesh_nbVerts_nbTris_nbAdjVertsTotal;
}
static PX_FORCE_INLINE __device__ uint4 readTriangleMesh(const PxU8 * PX_RESTRICT & trimeshGeomPtr,
const Gu::BV32DataPacked* PX_RESTRICT & bv32PackedNodes, const float4* PX_RESTRICT & trimeshVerts, const uint4* PX_RESTRICT & trimeshTriIndices)
{
uint4 result = readTriangleMesh(trimeshGeomPtr, bv32PackedNodes, trimeshVerts);
trimeshGeomPtr += sizeof(float4) * result.x;
trimeshTriIndices = reinterpret_cast<const uint4 *>(trimeshGeomPtr);
return result;
}
static PX_FORCE_INLINE __device__ uint4 readTriangleMesh(const PxU8 * PX_RESTRICT & trimeshGeomPtr, const float4* PX_RESTRICT & trimeshVerts, const uint4* PX_RESTRICT & trimeshTriIndices,
const uint4* PX_RESTRICT & trimeshTriAdjacencies)
{
const Gu::BV32DataPacked* bv32PackedNodes;
uint4 result = readTriangleMesh(trimeshGeomPtr, bv32PackedNodes, trimeshVerts, trimeshTriIndices);
trimeshGeomPtr += sizeof(uint4)* result.y;
trimeshTriAdjacencies = reinterpret_cast<const uint4 *>(trimeshGeomPtr);
return result;
}
static PX_FORCE_INLINE __device__ uint4 readTriangleMesh(const PxU8 * PX_RESTRICT & trimeshGeomPtr, const float4* PX_RESTRICT & trimeshVerts, const uint4* PX_RESTRICT & trimeshTriIndices,
const uint4* PX_RESTRICT & trimeshTriAdjacencies, const PxU32* PX_RESTRICT & trimeshFaceRemap)
{
uint4 result = readTriangleMesh(trimeshGeomPtr, trimeshVerts, trimeshTriIndices, trimeshTriAdjacencies);
trimeshGeomPtr += sizeof(uint4)* result.y;
trimeshFaceRemap = reinterpret_cast<const PxU32 *>(trimeshGeomPtr);
return result;
}
static __device__ void readTriangleMesh(const PxgShape& trimeshShape, PxgTriangleMesh& triangleMesh)
{
const PxU8 * trimeshGeomPtr = reinterpret_cast<const PxU8 *>(trimeshShape.hullOrMeshPtr);
const uint4 nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes = *reinterpret_cast<const uint4 *>(trimeshGeomPtr);
trimeshGeomPtr += sizeof(uint4);
triangleMesh.numVerts = nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes.x;
triangleMesh.numTris = nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes.y;
triangleMesh.numBv32TreeNodes = nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes.z;
triangleMesh.nbPackedNodes = nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes.w;
triangleMesh.bv32PackedNodes = reinterpret_cast<const Gu::BV32DataPacked*>(trimeshGeomPtr);
trimeshGeomPtr += sizeof(const Gu::BV32DataPacked)* nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes.w;
triangleMesh.trimeshVerts = reinterpret_cast<const float4 *>(trimeshGeomPtr);
trimeshGeomPtr += sizeof(float4) * nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes.x;
triangleMesh.indices = reinterpret_cast<const uint4 *>(trimeshGeomPtr);
trimeshGeomPtr += sizeof(uint4) * nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes.y;
//const uint4* PX_RESTRICT trimeshTriAdjacencies = reinterpret_cast<const uint4 *>(trimeshGeomPtr);
trimeshGeomPtr += sizeof(uint4)* nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes.y;
triangleMesh.trimeshFaceRemap = reinterpret_cast<const PxU32 *>(trimeshGeomPtr);
//trimeshGeomPtr += ((nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes.y + 3)& (~3)) * sizeof(PxU32);
trimeshGeomPtr += nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes.y * sizeof(PxU32);
//triMesh->mAccumulatedTrianglesRef
trimeshGeomPtr += (nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes.x + 1) * sizeof(PxU32);
//triMesh->mTrianglesReferences
trimeshGeomPtr += (nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes.y * 3) * sizeof(PxU32);
trimeshGeomPtr = (PxU8*)(((size_t)trimeshGeomPtr + 15) & ~15);
uint4 sdfDims = *reinterpret_cast<const uint4 *>(trimeshGeomPtr);
trimeshGeomPtr += sizeof(uint4);
PxU32 numSdfs = sdfDims.x*sdfDims.y*sdfDims.z;
triangleMesh.sdfDims = sdfDims;
if (numSdfs)
{
float4 meshLower_spacing = *reinterpret_cast<const float4 *>(trimeshGeomPtr);
trimeshGeomPtr += sizeof(float4);
uint4 sizeInfo = *reinterpret_cast<const uint4 *>(trimeshGeomPtr);
trimeshGeomPtr += sizeof(uint4);
triangleMesh.meshLower = PxVec3(meshLower_spacing.x, meshLower_spacing.y, meshLower_spacing.z);
triangleMesh.spacing = meshLower_spacing.w;
/*PxU32 subgridSize;
PxU32 unused0;
PxU32 unused1;
PxU32 numCoarseGridElements;*/
triangleMesh.subgridSize = sizeInfo.x;
triangleMesh.subgridsMinSdfValue = reinterpret_cast<PxReal&>(sizeInfo.y);
triangleMesh.subgridsMaxSdfValue = reinterpret_cast<PxReal&>(sizeInfo.z);
//triangleMesh.numCoarseGridElements = sizeInfo.w;
//triangleMesh.sdf = reinterpret_cast<const PxReal*>(trimeshGeomPtr);
triangleMesh.mTexObject = *reinterpret_cast<const CUtexObject*>(trimeshGeomPtr);
trimeshGeomPtr += sizeof(const CUtexObject);
triangleMesh.mTexObjectSparse = *reinterpret_cast<const CUtexObject*>(trimeshGeomPtr);
trimeshGeomPtr += sizeof(const CUtexObject);
triangleMesh.subgridStarts = reinterpret_cast<const PxU32 *>(trimeshGeomPtr);
trimeshGeomPtr += sizeof(PxU32) * sizeInfo.w;
}
else
triangleMesh.mTexObject = 0;
}
}
#endif

View File

@@ -0,0 +1,121 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __TRIANGLE_TRIANGLE_CUH__
#define __TRIANGLE_TRIANGLE_CUH__
#include "foundation/PxPlane.h"
#include "foundation/PxVec3.h"
struct Triangle
{
physx::PxVec3 verts[3];
physx::PxPlane triPlane;
physx::PxVec3 centroid; //mShapeCoMInTrimeshVertexSpace
};
__device__ PxReal minProject(const PxPlane& plane, const Triangle& triangle)
{
/*return PxMin(plane.distance(tet.verts[0]), PxMin(plane.distance(tet.verts[1]),
PxMin(plane.distance(tet.verts[2]), plane.distance(tet.verts[3]))));*/
return PxMin(plane.n.dot(triangle.verts[0]), PxMin(plane.n.dot(triangle.verts[1]),
plane.n.dot(triangle.verts[2]))) + plane.d;
}
__device__ inline PxReal testSeparatingAxis(const PxVec3& axis, const Triangle& triangle0, const Triangle& triangle1)
{
PxReal min0, max0, min1, max1;
min0 = max0 = triangle0.verts[0].dot(axis);
min1 = max1 = triangle1.verts[0].dot(axis);
for (PxU32 i = 1; i < 3; ++i)
{
PxReal proj0 = triangle0.verts[i].dot(axis);
PxReal proj1 = triangle1.verts[i].dot(axis);
min0 = PxMin(proj0, min0);
max0 = PxMax(proj0, max0);
min1 = PxMin(proj1, min1);
max1 = PxMax(proj1, max1);
}
return PxMax(min1 - max0, min0 - max1);
}
template <bool TDoCross = true>
__device__ inline PxReal satIntersect(const Triangle& triangle0, const Triangle& triangle1, const PxReal tolerance)
{
PxReal sep = minProject(triangle0.triPlane, triangle1);
if (sep > tolerance)
return sep;
sep = PxMax(sep, minProject(triangle1.triPlane, triangle0));
if (sep > tolerance)
return sep;
if (TDoCross)
{
PxVec3 axes0[3];
PxVec3 axes1[3];
axes0[0] = triangle0.verts[1] - triangle0.verts[0];
axes0[1] = triangle0.verts[2] - triangle0.verts[0];
axes0[2] = triangle0.verts[2] - triangle0.verts[1];
axes1[0] = triangle1.verts[1] - triangle1.verts[0];
axes1[1] = triangle1.verts[2] - triangle1.verts[0];
axes1[2] = triangle1.verts[2] - triangle1.verts[1];
for (PxU32 i = 0; i < 3; ++i)
{
const PxVec3 axis0 = axes0[i];
for (PxU32 j = 0; j < 3; ++j)
{
const PxVec3 axis1 = axes1[j];
PxVec3 sepAxis = axis0.cross(axis1);
const PxReal magSq = sepAxis.magnitudeSquared();
if (magSq > 1e-5f)
{
sepAxis = sepAxis * (1.f / PxSqrt(magSq));
const PxReal tSep = testSeparatingAxis(sepAxis, triangle0, triangle1);
sep = PxMax(sep, tSep);
if (sep > tolerance)
return sep;
}
}
}
}
return sep;
}
#endif

View File

@@ -0,0 +1,683 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "foundation/PxMath.h"
#include "foundation/PxSimpleTypes.h"
#include "foundation/PxTransform.h"
#include "geometry/PxGeometry.h"
#include "PxgContactManager.h"
#include "PxsContactManagerState.h"
#include "PxgConvexConvexShape.h"
#include "PxsMaterialCore.h"
#include "PxgPersistentContactManifold.h"
#include "PxsTransformCache.h"
#include "PxContact.h"
#include "GuBV32.h"
#include "cudaNpCommon.h"
#include "contactPatchUtils.cuh"
#include "contactReduction.cuh"
#include "utils.cuh"
#include "manifold.cuh"
#include "triangleMesh.cuh"
#define PLANE_TRI_MAX_CONTACTS 6
using namespace physx;
extern "C" __host__ void initNarrowphaseKernels22() {}
//each block deal with one test
extern "C" __global__
__launch_bounds__(1024, 1)
void trimeshPlaneNarrowphase(
const PxReal toleranceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
PxsContactManagerOutput* PX_RESTRICT cmOutputs,
PxgShape* PX_RESTRICT shapes,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxReal* contactDistance,
const PxsMaterialData* PX_RESTRICT materials,
PxgPersistentContactMultiManifold* PX_RESTRICT multiManifold,
PxU8* PX_RESTRICT contactStream,
PxU8* PX_RESTRICT patchStream,
PxgPatchAndContactCounters* PX_RESTRICT patchAndContactCounters,
PxU32* PX_RESTRICT touchChangeFlags,
PxU32* PX_RESTRICT patchChangeFlags,
PxU8* PX_RESTRICT startContactPatches,
PxU8* PX_RESTRICT startContactPoints,
PxU8* PX_RESTRICT startContactForces,
PxU32 patchBytesLimit,
PxU32 contactBytesLimit,
PxU32 forceBytesLimit,
const PxReal clusterTolerance)
{
__shared__ PxU32 sContacts[(sizeof(PxVec3)/sizeof(PxU32)) * (WARP_SIZE + 1) * PLANE_TRI_MAX_CONTACTS];
PxVec3* contacts = reinterpret_cast<PxVec3*>(sContacts);
__shared__ PxReal separations[(WARP_SIZE+1)*PLANE_TRI_MAX_CONTACTS];
__shared__ PxU32 counters[WARP_SIZE];
__shared__ PxU32 retainedCount;
__shared__ bool doFullContactGen;
if (threadIdx.x == 0)
{
retainedCount = 0;
}
const PxU32 workIndex = blockIdx.x;
const PxU32 threadIndexInWarp = threadIdx.x & 31;
PxgShape trimeshShape, planeShape;
PxU32 trimeshCacheRef, planeCacheRef;
LoadShapePairWarp<PxGeometryType::eTRIANGLEMESH, PxGeometryType::ePLANE>(cmInputs, workIndex, shapes,
trimeshShape, trimeshCacheRef, planeShape, planeCacheRef);
PxsCachedTransform trimeshTransformCache = transformCache[trimeshCacheRef];
PxsCachedTransform planeTransformCache = transformCache[planeCacheRef];
const PxReal cDistance = contactDistance[trimeshCacheRef] + contactDistance[planeCacheRef];
PxTransform planeTransform = planeTransformCache.transform;
PxTransform trimeshTransform = trimeshTransformCache.transform;
// Geometries : Triangle Mesh
const PxU8 * trimeshGeomPtr = reinterpret_cast<const PxU8 *>(trimeshShape.hullOrMeshPtr);
const Gu::BV32DataPacked* nodes;
const float4 * trimeshVerts;
uint4 counts = readTriangleMesh(trimeshGeomPtr, nodes, trimeshVerts);
const PxU32 numVerts = counts.x;
const PxU32 numIterationsRequired = (numVerts + blockDim.x - 1) / blockDim.x;
PxVec3 min(PX_MAX_F32), max(-PX_MAX_F32);
if (threadIndexInWarp < nodes->mNbNodes)
{
const PxVec4 min4 = nodes->mMin[threadIndexInWarp];
const PxVec4 max4 = nodes->mMax[threadIndexInWarp];
min = PxVec3(min4.x, min4.y, min4.z);
max = PxVec3(max4.x, max4.y, max4.z);
}
minIndex(min.x, FULL_MASK, min.x); minIndex(min.y, FULL_MASK, min.y); minIndex(min.z, FULL_MASK, min.z);
maxIndex(max.x, FULL_MASK, max.x); maxIndex(max.y, FULL_MASK, max.y); maxIndex(max.z, FULL_MASK, max.z);
const float4 extents4_f = make_float4(max.x - min.x, max.y - min.y, max.z - min.z, 0.f)*0.5f;
PxReal minMargin = calculatePCMConvexMargin(extents4_f, trimeshShape.scale.scale, toleranceLength) * 0.5f;
const PxReal ratio = 0.1f;
const PxReal breakingThresholdRatio = 0.5f;
bool lostContacts = false;
PxTransform trimeshToPlane = planeTransform.transformInv(trimeshTransform);
const PxU32 warpIndex = threadIdx.x / 32;
if (warpIndex == 0)
{
const bool invalidate = invalidateManifold(trimeshToPlane, multiManifold[workIndex], minMargin, ratio);
if (!invalidate)
{
const PxReal projectBreakingThreshold = minMargin * breakingThresholdRatio;
lostContacts = refreshManifolds(
trimeshToPlane,
projectBreakingThreshold,
multiManifold + workIndex
);
}
bool fullContactGen = invalidate || lostContacts;
if (threadIdx.x == 0)
doFullContactGen = fullContactGen;
}
//const PxU32 numThreadsRequired = numIterationsRequired * blockDim.x;
PxVec3 worldNormal = planeTransform.q.getBasisVector0();
__syncthreads();
if (doFullContactGen)
{
for (PxU32 vertInd = 0; vertInd < numIterationsRequired; vertInd++)
{
PxU32 i = vertInd * blockDim.x + threadIdx.x;
bool hasContact = false;
PxVec3 worldPoint;
PxReal separation = PX_MAX_F32;
if (i < numVerts)
{
float4 point = trimeshVerts[i];
PxVec3 tp = PxLoad3(point);
PxVec3 p = vertex2Shape(tp, trimeshShape.scale.scale, trimeshShape.scale.rotation);
//v in plane space
const PxVec3 pInPlaneSpace = trimeshToPlane.transform(p);
//separation / penetration
//separation = pInPlaneSpace.x;// FSub(V3GetX(sphereCenterInPlaneSpace), radius);
if (cDistance >= pInPlaneSpace.x)
{
//get the plane normal
separation = pInPlaneSpace.x;
worldPoint = trimeshTransform.transform(p);
hasContact = true;
}
}
int mask = __ballot_sync(FULL_MASK, hasContact);
mask = contactReduce<true, true, PLANE_TRI_MAX_CONTACTS, false>(worldPoint, separation, worldNormal, mask, clusterTolerance);
counters[warpIndex] = __popc(mask);
hasContact = mask & (1 << threadIndexInWarp);
__syncthreads();
PxU32 counter = counters[threadIndexInWarp];
PxU32 contactCount = warpScan<AddOpPxU32, PxU32>(FULL_MASK, counter);
//exclusive runsum
PxU32 contactWarpOffset = contactCount - counter;
contactWarpOffset = __shfl_sync(FULL_MASK, contactWarpOffset, warpIndex);
const PxU32 offset = warpScanExclusive(mask, threadIndexInWarp);
if (hasContact)
{
const PxU32 index = offset + contactWarpOffset + retainedCount;
contacts[index] = worldPoint;
separations[index] = separation;
}
const PxU32 totalContacts = __shfl_sync(FULL_MASK, contactCount, 31) + retainedCount;
__syncthreads();
//Now do another loop with warp 0 doing reduction
if (warpIndex == 0)
{
hasContact = false;
for (PxU32 ind = 0; ind < totalContacts;)
{
PxU32 readMask = ~(__ballot_sync(FULL_MASK, hasContact));
if (!hasContact)
{
PxU32 readIndex = warpScanExclusive(readMask, threadIndexInWarp) + ind;
if (readIndex < totalContacts)
{
worldPoint = contacts[readIndex];
separation = separations[readIndex];
hasContact = true;
}
}
mask = __ballot_sync(FULL_MASK, hasContact);
ind += __popc(readMask);
mask = contactReduce<true, true, PLANE_TRI_MAX_CONTACTS, false>(worldPoint, separation, worldNormal, mask, clusterTolerance);
hasContact = mask & (1 << threadIndexInWarp);
}
__syncwarp();
retainedCount = __popc(__ballot_sync(FULL_MASK, hasContact));
if (hasContact)
{
PxU32 offset = warpScanExclusive(mask, threadIndexInWarp);
contacts[offset] = worldPoint;
separations[offset] = separation;
}
}
__syncthreads();
}
//Store to PCM...
if (warpIndex == 0)
{
PxgPersistentContactMultiManifold& manifold = multiManifold[workIndex];
if (retainedCount)
{
if (threadIdx.x == 0)
{
manifold.mNbManifolds = 1;
manifold.mNbContacts[0] = retainedCount;
manifold.mRelativeTransform = trimeshToPlane;
}
if (threadIdx.x < retainedCount)
{
PxgContact& contact = manifold.mContacts[0][threadIdx.x];
contact.pointA = trimeshTransform.transformInv(contacts[threadIdx.x]);
PxVec3 planePoint = planeTransform.transformInv(contacts[threadIdx.x]);
planePoint.x = 0.f;// separations[threadIdx.x];
contact.pointB = planePoint;
contact.normal = PxVec3(1.f, 0.f, 0.f);
contact.penetration = separations[threadIdx.x];
contact.triIndex = 0xFFFFFFFF;
}
}
else
manifold.mNbManifolds = 0;
}
}
else
{
//Load contacts out of PCM...
if (warpIndex == 0)
{
PxgPersistentContactMultiManifold& manifold = multiManifold[workIndex];
if (manifold.mNbManifolds != 0)
{
if (threadIdx.x < manifold.mNbContacts[0])
{
PxgContact& contact = manifold.mContacts[0][threadIdx.x];
contacts[threadIdx.x] = trimeshTransform.transform(contact.pointA);
separations[threadIdx.x] = contact.penetration;
}
if (threadIdx.x == 0)
retainedCount = manifold.mNbContacts[0];
}
}
__syncthreads();
}
//Retained counts stores the set of contacts we kept, which are stored in the first N
//elements of contacts and separations buffer
if (warpIndex == 0)
{
PxU32 nbContacts = retainedCount;
PxU32 contactByteOffset = 0xFFFFFFFF;
PxU32 forceAndIndiceByteOffset = 0xFFFFFFFF;
if (threadIndexInWarp == 0)
{
if (nbContacts)
{
contactByteOffset = atomicAdd(&(patchAndContactCounters->contactsBytes), sizeof(PxContact) * nbContacts);
forceAndIndiceByteOffset = atomicAdd(&(patchAndContactCounters->forceAndIndiceBytes), sizeof(PxU32) * nbContacts);
if ((contactByteOffset + sizeof(PxContact)* nbContacts) > contactBytesLimit)
{
patchAndContactCounters->setOverflowError(PxgPatchAndContactCounters::CONTACT_BUFFER_OVERFLOW);
contactByteOffset = 0xFFFFFFFF; //overflow
}
if ((forceAndIndiceByteOffset + sizeof(PxU32)* nbContacts) > forceBytesLimit)
{
patchAndContactCounters->setOverflowError(PxgPatchAndContactCounters::FORCE_BUFFER_OVERFLOW);
forceAndIndiceByteOffset = 0xFFFFFFFF; //overflow
}
}
}
contactByteOffset = __shfl_sync(FULL_MASK, contactByteOffset, 0);
PxsContactManagerOutput* output = cmOutputs + workIndex;
//write point and penetration to the contact stream
if (contactByteOffset != 0xFFFFFFFF)
{
//*((float4 *)(contactStream + contactByteOffset)) = pointPen;
//output->contactForces = reinterpret_cast<PxReal*>(startContactForces + forceAndIndiceByteOffset);
//output->contactPoints = startContactPoints + contactByteOffset;
float4* baseContactStream = (float4*)(contactStream + contactByteOffset);
if (threadIndexInWarp < nbContacts)
{
float4& outPointPen = baseContactStream[threadIndexInWarp];
outPointPen = make_float4(contacts[threadIndexInWarp].x, contacts[threadIndexInWarp].y,
contacts[threadIndexInWarp].z, separations[threadIndexInWarp]);
}
if (threadIndexInWarp == 0)
{
output->contactForces = reinterpret_cast<PxReal*>(startContactForces + forceAndIndiceByteOffset);
output->contactPoints = startContactPoints + contactByteOffset;
}
}
else
{
if (threadIndexInWarp == 0)
{
output->contactForces = NULL;
output->contactPoints = NULL;
}
nbContacts = 0;
}
if (threadIndexInWarp == 0)
{
PxU32 patchIndex = registerContactPatch(cmOutputs, patchAndContactCounters, touchChangeFlags, patchChangeFlags, startContactPatches, patchBytesLimit, workIndex, nbContacts);
if (nbContacts == 0)
(cmOutputs + workIndex)->contactPatches = 0;
insertIntoPatchStream(materials, patchStream, planeShape, trimeshShape, patchIndex, worldNormal, nbContacts);
}
}
}
//each block deal with one test
extern "C" __global__
__launch_bounds__(1024, 1)
void trimeshHeightfieldNarrowphase(
const PxReal toleranceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
PxsContactManagerOutput* PX_RESTRICT cmOutputs,
PxgShape* PX_RESTRICT shapes,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxReal* contactDistance,
const PxsMaterialData* PX_RESTRICT materials,
PxU8* PX_RESTRICT contactStream,
PxU8* PX_RESTRICT patchStream,
PxgPatchAndContactCounters* PX_RESTRICT patchAndContactCounters,
PxU32* PX_RESTRICT touchChangeFlags,
PxU32* PX_RESTRICT patchChangeFlags,
PxU8* PX_RESTRICT startContactPatches,
PxU8* PX_RESTRICT startContactPoints,
PxU8* PX_RESTRICT startContactForces,
PxU32 patchBytesLimit,
PxU32 contactBytesLimit,
PxU32 forceBytesLimit,
const PxReal clusterBias)
{
__shared__ char sContacts[sizeof(PxVec3) * (WARP_SIZE + 1) * PLANE_TRI_MAX_CONTACTS];
PxVec3* contacts = reinterpret_cast<PxVec3*>(sContacts);
__shared__ PxReal separations[(WARP_SIZE + 1)*PLANE_TRI_MAX_CONTACTS];
__shared__ PxU32 counters[WARP_SIZE];
__shared__ PxU32 retainedCount;
if (threadIdx.x == 0)
{
retainedCount = 0;
}
__syncthreads();
const PxU32 workIndex = blockIdx.x;
PxgShape trimeshShape, heightfieldShape;
PxU32 trimeshCacheRef, heightfieldCacheRef;
LoadShapePairWarp<PxGeometryType::eTRIANGLEMESH, PxGeometryType::eHEIGHTFIELD>(cmInputs, workIndex, shapes,
trimeshShape, trimeshCacheRef, heightfieldShape, heightfieldCacheRef);
PxsCachedTransform trimeshTransformCache = transformCache[trimeshCacheRef];
PxsCachedTransform heightfieldTransformCache = transformCache[heightfieldCacheRef];
const PxReal cDistance = contactDistance[trimeshCacheRef] + contactDistance[heightfieldCacheRef];
PxTransform trimeshTransform = trimeshTransformCache.transform;
PxTransform heightfieldTransform = heightfieldTransformCache.transform;
// Geometries : Triangle Mesh
const PxU8 * trimeshGeomPtr = reinterpret_cast<const PxU8 *>(trimeshShape.hullOrMeshPtr);
const Gu::BV32DataPacked* nodes;
const float4 * trimeshVerts;
uint4 counts = readTriangleMesh(trimeshGeomPtr, nodes, trimeshVerts);
const PxU32 numVerts = counts.x;
const PxU32 numIterationsRequired = (numVerts + blockDim.x - 1) / blockDim.x;
//const PxU32 numThreadsRequired = numIterationsRequired * blockDim.x;
const PxU32 threadIndexInWarp = threadIdx.x & 31;
const PxU32 warpIndex = threadIdx.x / 32;
PxTransform trimeshToHeightfield = heightfieldTransform.transformInv(trimeshTransform);
PxVec3 worldNormal = heightfieldTransform.q.getBasisVector0();
for (PxU32 ind = 0; ind < numIterationsRequired; ind++)
{
PxU32 i = ind * blockDim.x + threadIdx.x;
bool hasContact = false;
PxVec3 worldPoint;
PxReal separation;
if (i < numVerts)
{
float4 point = trimeshVerts[i];
PxVec3 tp = PxLoad3(point);
PxVec3 p = vertex2Shape(tp, trimeshShape.scale.scale, trimeshShape.scale.rotation);
//v in plane space
const PxVec3 pInHeightfieldSpace = trimeshToHeightfield.transform(p);
//separation / penetration
separation = pInHeightfieldSpace.x;
if (cDistance >= pInHeightfieldSpace.x)
{
//get the plane normal
worldPoint = trimeshTransform.transform(p);
hasContact = true;
}
}
int mask = __ballot_sync(FULL_MASK, hasContact);
mask = contactReduce<true, true, PLANE_TRI_MAX_CONTACTS>(worldPoint, separation, worldNormal, mask, clusterBias);
counters[warpIndex] = __popc(mask);
__syncthreads();
PxU32 contactCount = warpScan<AddOpPxU32, PxU32>(FULL_MASK, counters[threadIndexInWarp]);
//exclusive runsum
PxU32 contactWarpOffset = contactCount - counters[threadIndexInWarp];
contactWarpOffset = __shfl_sync(FULL_MASK, contactWarpOffset, warpIndex);
const PxU32 offset = warpScanExclusive(mask, threadIndexInWarp);
if (hasContact)
{
const PxU32 index = offset + contactWarpOffset + retainedCount;
contacts[index] = worldPoint;
separations[index] = separation;
}
__syncthreads();
const PxU32 totalContacts = __shfl_sync(FULL_MASK, contactCount, 31) + retainedCount;
//Now do another loop with warp 0 doing reduction
if (warpIndex == 0)
{
hasContact = false;
for (PxU32 ind = 0; ind < totalContacts;)
{
PxU32 readMask = ~(__ballot_sync(FULL_MASK, hasContact));
if (!hasContact)
{
PxU32 readIndex = warpScanExclusive(readMask, threadIndexInWarp);
if ((ind + readIndex) < totalContacts)
{
worldPoint = contacts[readIndex];
separation = separations[readIndex];
hasContact = true;
}
}
mask = __ballot_sync(FULL_MASK, hasContact);
ind += __popc(readMask);
mask = contactReduce<true, true, PLANE_TRI_MAX_CONTACTS>(worldPoint, separation, worldNormal, mask, clusterBias);
hasContact = mask & (1 << threadIndexInWarp);
}
retainedCount = __popc(__ballot_sync(FULL_MASK, hasContact));
if (hasContact)
{
PxU32 offset = warpScanExclusive(mask, threadIndexInWarp);
contacts[offset] = worldPoint;
separations[offset] = separation;
}
}
__syncthreads();
}
//Retained counts stores the set of contacts we kept, which are stored in the first N
//elements of contacts and separations buffer
if (warpIndex == 0)
{
const PxU32 nbContacts = retainedCount;
PxU32 contactByteOffset = 0xFFFFFFFF;
PxU32 forceAndIndiceByteOffset = 0xFFFFFFFF;
if (threadIndexInWarp == 0)
{
if (nbContacts)
{
contactByteOffset = atomicAdd(&(patchAndContactCounters->contactsBytes), sizeof(PxContact) * nbContacts);
forceAndIndiceByteOffset = atomicAdd(&(patchAndContactCounters->forceAndIndiceBytes), sizeof(PxU32) * nbContacts);
if ((contactByteOffset + sizeof(PxContact)) > contactBytesLimit)
{
patchAndContactCounters->setOverflowError(PxgPatchAndContactCounters::CONTACT_BUFFER_OVERFLOW);
contactByteOffset = 0xFFFFFFFF; //overflow
}
else if ((forceAndIndiceByteOffset + sizeof(PxU32)) > forceBytesLimit)
{
patchAndContactCounters->setOverflowError(PxgPatchAndContactCounters::FORCE_BUFFER_OVERFLOW);
forceAndIndiceByteOffset = 0xFFFFFFFF; //overflow
}
}
}
contactByteOffset = __shfl_sync(FULL_MASK, contactByteOffset, 0);
PxsContactManagerOutput* output = cmOutputs + workIndex;
//write point and penetration to the contact stream
if (contactByteOffset != 0xFFFFFFFF)
{
//*((float4 *)(contactStream + contactByteOffset)) = pointPen;
//output->contactForces = reinterpret_cast<PxReal*>(startContactForces + forceAndIndiceByteOffset);
//output->contactPoints = startContactPoints + contactByteOffset;
float4* baseContactStream = (float4*)(contactStream + contactByteOffset);
if (threadIndexInWarp < nbContacts)
{
float4& outPointPen = baseContactStream[threadIndexInWarp];
outPointPen = make_float4(contacts[threadIndexInWarp].x, contacts[threadIndexInWarp].y,
contacts[threadIndexInWarp].z, separations[threadIndexInWarp]);
}
if (threadIndexInWarp == 0)
{
output->contactForces = reinterpret_cast<PxReal*>(startContactForces + forceAndIndiceByteOffset);
output->contactPoints = startContactPoints + contactByteOffset;
}
}
else if (threadIndexInWarp == 0)
{
output->contactForces = NULL;
output->contactPoints = NULL;
}
if (threadIndexInWarp == 0)
{
PxU32 patchIndex = registerContactPatch(cmOutputs, patchAndContactCounters, touchChangeFlags, patchChangeFlags, startContactPatches, patchBytesLimit, workIndex, nbContacts);
insertIntoPatchStream(materials, patchStream, heightfieldShape, trimeshShape, patchIndex, worldNormal, nbContacts);
}
}
}

View File

@@ -0,0 +1,248 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __SCANWARP_CUH__
#define __SCANWARP_CUH__
/* Scan&Reduce operators */
template <typename T>
struct AddOP
{
static __device__ T apply(T a, T b) { return a + b; }
static __device__ T identity() { return T(); }
};
template <typename T>
struct AndOP
{
static __device__ T apply(T a, T b) { return a & b; }
static __device__ T identity() { return T(); }
};
typedef AddOP<unsigned int> AddOPu;
typedef AddOP<float> AddOPf;
template <typename T>
struct MaxOPu
{
static __device__ T apply(T a, T b) { return a > b ? a : b; }
static __device__ T identity() { return T(); }
};
struct MinOPf
{
static __device__ float apply(float a, float b) { return fminf(a, b); }
static __device__ float applyIdx(float a, float b, unsigned int &oindex, unsigned int aindex, unsigned int bindex)
{
if(b < a)
{
oindex = bindex;
return b;
}
else
{
oindex = aindex;
return a;
}
}
static __device__ float identity() { return +FLT_MAX; }
};
struct MaxOPf
{
static __device__ float apply(float a, float b) { return fmaxf(a, b); }
static __device__ float applyIdx(float a, float b, unsigned int &oindex, unsigned int aindex, unsigned int bindex)
{
if(b > a)
{
oindex = bindex;
return b;
}
else
{
oindex = aindex;
return a;
}
}
static __device__ float identity() { return -FLT_MAX; }
};
/* Reduce */
template <typename T, class OP>
__device__ static inline void reduceWarp(volatile T* sdata)
{
unsigned int idx = threadIdx.x;
if ((idx & (WARP_SIZE-1)) < 16)
{
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 16]);
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 8]);
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 4]);
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 2]);
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 1]);
}
}
template <typename T, class OP>
__device__ static inline void reduceWarp(volatile T* sdata, unsigned int idx)
{
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 16]);
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 8]);
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 4]);
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 2]);
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 1]);
}
template <typename T, class OP>
__device__ static inline void reduceHalfWarp(volatile T* sdata)
{
unsigned int idx = threadIdx.x;
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 8]);
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 4]);
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 2]);
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 1]);
}
template <typename T, typename TIndex, class OP>
__device__ static inline void reduceWarpKeepIndex(volatile T* sdata, volatile TIndex* sindices, unsigned int idx)
{
TIndex ia, ib, oindex;
if ((idx & (WARP_SIZE-1)) < 16)
{
ia = sindices[idx];
ib = sindices[idx + 16];
sdata[idx] = OP::applyIdx(sdata[idx], sdata[idx + 16], oindex, ia, ib);
sindices[idx] = oindex;
}
if ((idx & (WARP_SIZE-1)) < 8)
{
ia = sindices[idx];
ib = sindices[idx + 8];
sdata[idx] = OP::applyIdx(sdata[idx], sdata[idx + 8], oindex, ia, ib);
sindices[idx] = oindex;
}
if ((idx & (WARP_SIZE-1)) < 4)
{
ia = sindices[idx];
ib = sindices[idx + 4];
sdata[idx] = OP::applyIdx(sdata[idx], sdata[idx + 4], oindex, ia, ib);
sindices[idx] = oindex;
}
if ((idx & (WARP_SIZE-1)) < 2)
{
ia = sindices[idx];
ib = sindices[idx + 2];
sdata[idx] = OP::applyIdx(sdata[idx], sdata[idx + 2], oindex, ia, ib);
sindices[idx] = oindex;
}
if ((idx & (WARP_SIZE-1)) < 1)
{
ia = sindices[idx];
ib = sindices[idx + 1];
sdata[idx] = OP::applyIdx(sdata[idx], sdata[idx + 1], oindex, ia, ib);
sindices[idx] = oindex;
}
}
/* Scan */
template <typename T, typename OP>
__device__ static inline void scanWarp(unsigned int scanIdx, volatile T* sdata)
{
sdata[scanIdx] = OP::apply(sdata[scanIdx], sdata[scanIdx - 1]);
sdata[scanIdx] = OP::apply(sdata[scanIdx], sdata[scanIdx - 2]);
sdata[scanIdx] = OP::apply(sdata[scanIdx], sdata[scanIdx - 4]);
sdata[scanIdx] = OP::apply(sdata[scanIdx], sdata[scanIdx - 8]);
sdata[scanIdx] = OP::apply(sdata[scanIdx], sdata[scanIdx - 16]);
}
/// The number of warp scan steps
#define STEPS LOG2_WARP_SIZE
// The 5-bit SHFL mask for logically splitting warps into sub-segments starts 8-bits up
#define SHFL_MASK ((-1 << STEPS) & 31) << 8
template <>
__device__ inline void scanWarp<PxU32, AddOP<PxU32> >(unsigned int scanIdx, volatile PxU32 * sdata)
{
PxU32 input = sdata[scanIdx];
// Iterate scan steps
#pragma unroll
for (int STEP = 0; STEP < STEPS; STEP++)
{
asm(
"{"
" .reg .u32 r0;"
" .reg .pred p;"
" shfl.up.b32 r0|p, %1, %2, %3;"
" @p add.u32 r0, r0, %4;"
" mov.u32 %0, r0;"
"}"
: "=r"(input) : "r"(input), "r"(1 << STEP), "r"(SHFL_MASK), "r"(input));
}
sdata[scanIdx] = input;
}
template <>
__device__ inline void scanWarp<int, AddOP<int> >(unsigned int scanIdx, volatile int* sdata)
{
PxU32 input = sdata[scanIdx];
// Iterate scan steps
#pragma unroll
for (int STEP = 0; STEP < STEPS; STEP++)
{
asm(
"{"
" .reg .u32 r0;"
" .reg .pred p;"
" shfl.up.b32 r0|p, %1, %2, %3;"
" @p add.u32 r0, r0, %4;"
" mov.u32 %0, r0;"
"}"
: "=r"(input) : "r"(input), "r"(1 << STEP), "r"(SHFL_MASK), "r"(input));
}
sdata[scanIdx] = input;
}
#endif

View File

@@ -0,0 +1,772 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "PxgGeometryManager.h"
#include "PxsHeapMemoryAllocator.h"
#include "foundation/PxAssert.h"
#include "foundation/PxBasicTemplates.h"
#include "foundation/PxMemory.h"
#include "foundation/PxSimpleTypes.h"
#include "foundation/PxVec3.h"
#include "PxgCopyManager.h"
#include "PxgCudaUtils.h"
#include "PxgHeapMemAllocator.h"
#include "GuBV32.h"
#include "convex/GuConvexMesh.h"
#include "mesh/GuTriangleMesh.h"
#include "hf/GuHeightField.h"
#include "GuSDF.h"
#include "cudaNpCommon.h"
#include "cudamanager/PxCudaContextManager.h"
#include "cudamanager/PxCudaContext.h"
#include <vector_types.h>
#include <vector_functions.h>
// AD: PxgGeometryManager manages the CPU/GPU data transfers and the lifetime of collision geometries: Convex Hulls, Trimeshes, SDFs, Heightfields.
using namespace physx;
using namespace Gu;
// forward declarations of static functions
static PxU64 computeBoxHullByteSize();
static PxU64 computeHullByteSize(const ConvexHullData& hull, PxU32& numPolyVertices);
static PxU64 computeTriMeshByteSize(const TriangleMesh& triMesh);
static PxU64 computeHeightfieldByteSize(const HeightFieldData& hf);
static void layOutBoxHull(void* mem);
static void layOutHull(void* mem, const ConvexHullData& hull, PxU32 numPolyVertices);
static PxgMeshTextureData layOutTriMesh(void* mem, const TriangleMesh& triMesh, CUstream stream);
static void layOutHeightfield(void* mem, const HeightFieldData& hf);
template<typename T> static void createTextureObject(CUarray_format format, CUtexObject*& texture, CUarray& cuArray, PxU32 width, PxU32 height, PxU32 depth, const T* data, CUstream stream);
// PxgGeometryManager definitions
PxgGeometryManager::PxgGeometryManager(PxgHeapMemoryAllocatorManager* heapMemoryManager):
mDeviceMemoryAllocator(heapMemoryManager->mDeviceMemoryAllocators),
mPinnedMemoryAllocator(static_cast<PxgHeapMemoryAllocator*>(heapMemoryManager->mMappedMemoryAllocators)),
mPinnedMemoryBasePtr(NULL),
mPinnedHostMemoryRequirements(0),
mFreeGeometryIndices(mGeometryData),
mBoxHullIdx(0xFFffFFff)
{
}
PxgGeometryManager::~PxgGeometryManager()
{
if (mBoxHullIdx != 0xFFFFFFFF)
mDeviceMemoryAllocator->deallocate(reinterpret_cast<void*>(getGeometryDevPtrByIndex(mBoxHullIdx)));
if (mPinnedMemoryBasePtr)
{
mPinnedMemoryAllocator->deallocate(mPinnedMemoryBasePtr);
mPinnedMemoryBasePtr = NULL;
}
}
PxU32 PxgGeometryManager::addGeometryInternal(PxU64 byteSize, const void* geomPtr, UploadGeometryType::Enum type, PxU32 numPolyVertices /*= 0*/)
{
byteSize = (byteSize + 255) & ~255;
mPinnedHostMemoryRequirements += byteSize;
PxU32 idx = mFreeGeometryIndices.getFreeIndex();
void* devicePtr = mDeviceMemoryAllocator->allocate(byteSize, PxsHeapStats::eNARROWPHASE, PX_FL);
PxU32 copyIndex = mScheduledCopies.size();
ScheduledCopyData scheduledCopy;
scheduledCopy.mHullOrTrimeshIdx = idx;
scheduledCopy.mGeometryPtr = geomPtr;
scheduledCopy.mType = type;
scheduledCopy.mNumPolyVertices = numPolyVertices;
PxgCopyManager::CopyDesc desc;
desc.dest = (size_t)devicePtr;
desc.bytes = (size_t)byteSize;
scheduledCopy.mCopyDesc = desc;
mScheduledCopies.pushBack(scheduledCopy);
HullOrMeshData newHullOrMesh;
newHullOrMesh.mDeviceMemPointer = devicePtr;
newHullOrMesh.mCopyDescIndex = copyIndex;
PX_ASSERT(idx < mGeometryData.size());
mGeometryData[idx] = newHullOrMesh;
return idx;
}
void PxgGeometryManager::addBoxHull()
{
if (mBoxHullIdx != 0xFFffFFff)
return;
PxU64 byteSize = computeBoxHullByteSize();
mBoxHullIdx = addGeometryInternal(byteSize, NULL, UploadGeometryType::eBOXHULL);
}
PxU32 PxgGeometryManager::addHull(const ConvexHullData& hull)
{
PxU32 numPolyVertices;
PxU64 byteSize = computeHullByteSize(hull, numPolyVertices);
return addGeometryInternal(byteSize, &hull, UploadGeometryType::eCONVEXHULL, numPolyVertices);
}
void PxgGeometryManager::removeGeometry(PxU32 idx)
{
PX_ASSERT(idx < mGeometryData.size());
HullOrMeshData geometryToRemove = mGeometryData[idx];
PxU32 scheduledCopyIndex = geometryToRemove.mCopyDescIndex;
if (scheduledCopyIndex != HullOrMeshData::INVALID_COPY_DESC_INDEX)
{
PxU32 lastScheduledCopyHullOrTrimeshIndex = mScheduledCopies.back().mHullOrTrimeshIdx;
if (lastScheduledCopyHullOrTrimeshIndex != idx)
{
mScheduledCopies.replaceWithLast(scheduledCopyIndex);
PX_ASSERT(lastScheduledCopyHullOrTrimeshIndex < mGeometryData.size());
mGeometryData[lastScheduledCopyHullOrTrimeshIndex].mCopyDescIndex = scheduledCopyIndex;
}
else
{
mScheduledCopies.popBack();
}
}
const PxPair<void*const, PxgMeshTextureData>* pair = mMeshToTextureMap.find(geometryToRemove.mDeviceMemPointer);
if (pair)
{
cuArrayDestroy(pair->second.cuArray);
cuTexObjectDestroy(pair->second.cuTexRef);
if (pair->second.cuArraySubgrids)
{
cuArrayDestroy(pair->second.cuArraySubgrids);
cuTexObjectDestroy(pair->second.cuTexRefSubgrids);
}
mMeshToTextureMap.erase(geometryToRemove.mDeviceMemPointer);
}
mDeviceMemoryAllocator->deallocate(geometryToRemove.mDeviceMemPointer);
mFreeGeometryIndices.setFreeIndex(idx);
}
void PxgGeometryManager::scheduleCopyHtoD(PxgCopyManager& copyMan, PxCudaContext& cudaContext, CUstream stream)
{
// allocate the proper amount of pinned memory
mPinnedMemoryBasePtr = mPinnedMemoryAllocator->allocate(mPinnedHostMemoryRequirements, PxsHeapStats::eNARROWPHASE, PX_FL);
PxU8* basePtr = reinterpret_cast<PxU8*>(mPinnedMemoryBasePtr);
for (PxArray<ScheduledCopyData>::Iterator it = mScheduledCopies.begin(), end = mScheduledCopies.end(); it != end; ++it)
{
PxU64 byteSize = it->mCopyDesc.bytes;
PxU8* mem = basePtr;
basePtr += byteSize;
PX_ASSERT((byteSize & 255) == 0);
PX_ASSERT(((size_t)mem & 255) == 0);
// layout the geometry in pinned memory depending on type
switch (it->mType)
{
case UploadGeometryType::eTRIANGLEMESH:
{
PxgMeshTextureData texData = layOutTriMesh(mem, *reinterpret_cast<const TriangleMesh*>(it->mGeometryPtr), stream);
if (texData.cuArray)
{
PX_ASSERT(it->mHullOrTrimeshIdx < mGeometryData.size());
mMeshToTextureMap.insert(mGeometryData[it->mHullOrTrimeshIdx].mDeviceMemPointer, texData);
}
}
break;
case UploadGeometryType::eCONVEXHULL:
layOutHull(mem, *reinterpret_cast<const ConvexHullData*>(it->mGeometryPtr), it->mNumPolyVertices);
break;
case UploadGeometryType::eHEIGHTFIELD:
layOutHeightfield(mem, *reinterpret_cast<const HeightFieldData*>(it->mGeometryPtr));
break;
case UploadGeometryType::eBOXHULL:
layOutBoxHull(mem);
break;
default:
PX_ALWAYS_ASSERT();
}
// set the source pointer in the copy descriptor.
it->mCopyDesc.source = reinterpret_cast<size_t>(getMappedDevicePtr(&cudaContext, mem));
// schedule the copies.
PX_ASSERT(it->mHullOrTrimeshIdx < mGeometryData.size());
mGeometryData[it->mHullOrTrimeshIdx].mCopyDescIndex = HullOrMeshData::INVALID_COPY_DESC_INDEX;
copyMan.pushDeferredHtoD(it->mCopyDesc);
}
mScheduledCopies.forceSize_Unsafe(0);
mPinnedHostMemoryRequirements = 0;
}
void PxgGeometryManager::resetAfterMemcpyCompleted()
{
// AD: we basically never send geometry each frame, and they're usually sized differently anyway, so let's give this memory back
// to keep pinned memory usage under control.
if (mPinnedMemoryBasePtr)
{
mPinnedMemoryAllocator->deallocate(mPinnedMemoryBasePtr);
mPinnedMemoryBasePtr = NULL;
}
mFreeGeometryIndices.releaseFreeIndices();
}
CUdeviceptr PxgGeometryManager::getGeometryDevPtrByIndex(PxU32 idx) const
{
PX_ASSERT(idx < mGeometryData.size());
return reinterpret_cast<CUdeviceptr>(mGeometryData[idx].mDeviceMemPointer);
}
CUdeviceptr PxgGeometryManager::getBoxHullDevPtr() const
{
PX_ASSERT(mBoxHullIdx != 0xFFffFFff);
PX_ASSERT(mBoxHullIdx < mGeometryData.size());
return reinterpret_cast<CUdeviceptr>(mGeometryData[mBoxHullIdx].mDeviceMemPointer);
}
PxU32 PxgGeometryManager::addTriMesh(const TriangleMesh& triMesh)
{
PxU64 byteSize = computeTriMeshByteSize(triMesh);
return addGeometryInternal(byteSize, &triMesh, UploadGeometryType::eTRIANGLEMESH);
}
PxU32 PxgGeometryManager::addHeightfield(const HeightFieldData& hf)
{
PxU64 byteSize = computeHeightfieldByteSize(hf);
return addGeometryInternal(byteSize, &hf, UploadGeometryType::eHEIGHTFIELD);
}
// static functions
static PxU64 computeBoxHullByteSize()
{
PxU64 byteSize = sizeof(float4) + //center of mass
sizeof(PxU32) + // NbEdgesNbHullVerticesNbPolygons
3 * sizeof(PxU32) + //pad
sizeof(float4) + //extents
sizeof(float4) * 8 + //vertices
sizeof(float4) * 6 + //planes
sizeof(PxU32) * 6 + //vRef8NbVertsMinIndex0
sizeof(PxU16) * 2 * 12 + //mVerticesByEdges16
sizeof(PxU8) * 2 * 12 + //mFacesByEdges8
sizeof(PxU8) * 3 * 8 + //mFacesByVertices8
sizeof(PxU8) * 24; //vertexData8
return byteSize;
}
static PxU64 computeHullByteSize(const ConvexHullData& hull, PxU32& numPolyVertices)
{
numPolyVertices = 0;
for (PxU32 i = 0; i < hull.mNbPolygons; ++i)
{
numPolyVertices += hull.mPolygons[i].mNbVerts;
}
return
sizeof(float4) + //center of mass
sizeof(PxU32) + // NbEdgesNbHullVerticesNbPolygons
3 * sizeof(PxU32) + //pad
sizeof(float4) + //extents
sizeof(float4) * hull.mNbHullVertices +//vertices
sizeof(float4) * hull.mNbPolygons +//planes
sizeof(PxU32) * hull.mNbPolygons +//vRef8NbVertsMinIndex0
sizeof(PxU16) * 2 * hull.mNbEdges +//mVerticesByEdges16
sizeof(PxU8) * 2 * hull.mNbEdges +//mFacesByEdges8
sizeof(PxU8) * 3 * hull.mNbHullVertices +//mFacesByVertices8
sizeof(PxU8) * numPolyVertices;//vertexData8
}
static PxU64 computeTriMeshByteSize(const TriangleMesh& triMesh)
{
const PxU32 numTris = triMesh.getNbTrianglesFast();
const PxU32 numVerts = triMesh.getNbVerticesFast();
PxU64 meshDataSize =
sizeof(uint4) // (nbVerts, nbTris, meshAdjVerticiesTotal, nbBv32TreeNodes)
+ sizeof(float4) * numVerts // meshVerts
+ sizeof(uint4) * numTris // meshTriIndices
+ sizeof(uint4) * numTris // meshTriAdjacencies
+ sizeof(PxU32) * numTris // gpu to cpu remap table
+ sizeof(PxU32) * numTris * 3 // vertex to triangle remap table
+ sizeof(PxU32) * (numVerts + 1) // vertex to triangle offset table
;
// Make sure memory is always 16-byte aligned
meshDataSize = (meshDataSize + 15) & ~15;
meshDataSize += sizeof(uint4); // (sdfDimX, sdfDimY, sdfDimZ, 0)
const SDF& sdfData = triMesh.getSdfDataFast();
const PxU32 numSdfs = sdfData.mNumSdfs;
if (numSdfs > 0)
{
meshDataSize +=
+ sizeof(float4) // (meshLower.x, meshLower.y, meshLower.z, spacing)
+ sizeof(uint4)
+ sizeof(CUtexObject) // SDF texture object reference
+ sizeof(CUtexObject)
+ sizeof(PxU32) * sdfData.mNumStartSlots;
}
// Make sure memory is always 16-byte aligned
meshDataSize = (meshDataSize + 15) & ~15;
//ML: don't know whether we need to have local bound
const PxU64 bv32Size = triMesh.mGRB_BV32Tree->mNbPackedNodes * sizeof(BV32DataPacked);
return meshDataSize + bv32Size;
}
static PxU64 computeHeightfieldByteSize(const HeightFieldData& hf)
{
/* Height field height data is 16 bit signed integers, followed by triangle materials.
Each sample is 32 bits wide arranged as follows:
1) First there is a 16 bit height value.
2) Next, two one byte material indices, with the high bit of each byte reserved for special use.
(so the material index is only 7 bits).
The high bit of material0 is the tess-flag.
The high bit of material1 is reserved for future use.
*/
return sizeof(PxU32) //rows
+ sizeof(PxU32) //columns
+ sizeof(PxU32) * hf.columns * hf.rows
+ sizeof(PxU16); //PxHeightFieldFlags
}
static void layOutBoxHull(void* mem)
{
const float4 vertices[8] = { make_float4(-1.f, 1.f, -1.f, 0.f),
make_float4(1.f, 1.f, -1.f, 0.f),
make_float4(1.f, -1.f, -1.f, 0.f),
make_float4(-1.f, -1.f, -1.f, 0.f),
make_float4(-1.f, -1.f, 1.f, 0.f),
make_float4(-1.f, 1.f, 1.f, 0.f),
make_float4(1.f, -1.f, 1.f, 0.f),
make_float4(1.f, 1.f, 1.f, 0.f)
};
const float4 planes[6] = {make_float4(0.f, 0.0f, -1.0f, -1.0f),
make_float4(-1.0f, 0.0f, 0.0f, -1.0f),
make_float4(0.0f, -1.0f, 0.0f, -1.0f),
make_float4(0.0f, 1.0f, 0.0f, -1.0f),
make_float4(0.0f, 0.0f, 1.0f, -1.0f),
make_float4(1.0f, 0.0f, 0.0f, -1.0f)};
const PxU32 polyData[6] = {merge(0, 4, 4),
merge(4, 4, 1),
merge(8, 4, 0),
merge(12, 4, 2),
merge(16, 4, 0),
merge(20, 4, 0)};
const PxU16 vertsByEdges[24] = { 0, 1, 3, 0, 5, 0, 1, 2, 7, 1, 2, 3, 2, 6, 3, 4, 4, 5, 6, 4, 5, 7, 6, 7 };
const PxU8 facesByEdges[24] = {
0, 3,
0, 1,
1, 3,
0, 5,
3, 5,
0, 2,
2, 5,
1, 2,
1, 4,
2, 4,
3, 4,
4, 5
};
const PxU8 facesByVerts[24] = {
0, 1, 3,
0, 3, 5,
0, 2, 5,
0, 1, 2,
1, 2, 4,
1, 3, 4,
2, 4, 5,
3, 4, 5
};
const PxU8 polyIndices[24] = {0, 1, 2, 3,
4, 5, 0, 3,
4, 3, 2, 6,
7, 1, 0, 5,
7, 5, 4, 6,
7, 6, 2, 1
};
PxU8* m = reinterpret_cast<PxU8*>(mem);
*((float4*)m) = make_float4(0.0f, 0.0f, 0.0f, 0.0f);//center of mass
m += sizeof(float4);
*((PxU32*)m) = merge(12, merge((PxU8) 8, (PxU8) 6));
m += 4 * sizeof(PxU32);
*((float4*)m) = make_float4(1.0f, //half-extents and the insphere radius
1.0f,
1.0f,
1.0f);
m += sizeof(float4);
PxMemCopy(m, vertices, sizeof(float4) * 8);
m += sizeof(float4) * 8;
PxMemCopy(m, planes, sizeof(float4) * 6);
m += sizeof(float4) * 6;
PxMemCopy(m, polyData, sizeof(PxU32) * 6);
m += sizeof(PxU32) * 6;
PxMemCopy(m, vertsByEdges, sizeof(PxU16) * 2 * 12);
m += sizeof(PxU16) * 2 * 12;
PxMemCopy(m, facesByEdges, sizeof(PxU8) * 2 * 12);
m += sizeof(PxU8) * 2 * 12;
PxMemCopy(m, facesByVerts, sizeof(PxU8) * 3 * 8);
m += sizeof(PxU8) * 3 * 8;
PxMemCopy(m, polyIndices, sizeof(PxU8) * 24);
}
static void layOutHull(void* mem, const ConvexHullData& hull, PxU32 numPolyVertices)
{
PxU8* m = (PxU8*) mem;
*((float4*)m) = make_float4(hull.mCenterOfMass.x, hull.mCenterOfMass.y, hull.mCenterOfMass.z, 0.f);
m +=sizeof(float4);
*((PxU32*)m) = merge(hull.mNbEdges, merge(hull.mNbHullVertices, hull.mNbPolygons));
m += 4 * sizeof(PxU32);
*((float4*)m) = make_float4(hull.mInternal.mInternalExtents.x,
hull.mInternal.mInternalExtents.y,
hull.mInternal.mInternalExtents.z,
hull.mInternal.mInternalRadius);
m += sizeof(float4);
const PxVec3* verts = hull.getHullVertices();
for (const PxVec3* end = verts + hull.mNbHullVertices; verts < end; ++verts)
{
*((float4*)m) = make_float4(verts->x, verts->y, verts->z, 0);
m += sizeof(float4);
}
const HullPolygonData* polys = hull.mPolygons;
float4* planes = (float4*)m;
PxU32* vRef8NbVertsMinIndex = (PxU32*) (m + sizeof(float4) * hull.mNbPolygons);
for (const HullPolygonData* end = polys + hull.mNbPolygons; polys < end; ++polys)
{
*(planes++) = make_float4(polys->mPlane.n.x, polys->mPlane.n.y, polys->mPlane.n.z, polys->mPlane.d);
*(vRef8NbVertsMinIndex++) = merge(polys->mVRef8, polys->mNbVerts, polys->mMinIndex);
}
m += (sizeof(float4) + sizeof(PxU32)) * hull.mNbPolygons;
PxMemCopy(m, hull.getVerticesByEdges16(), sizeof(PxU16) * 2*hull.mNbEdges);
m += sizeof(PxU16) * 2*hull.mNbEdges;
PxMemCopy(m, hull.getFacesByEdges8(), sizeof(PxU8) * 2*hull.mNbEdges);
m += sizeof(PxU8) * 2*hull.mNbEdges;
PxMemCopy(m, hull.getFacesByVertices8(), sizeof(PxU8) * 3*hull.mNbHullVertices);
m += sizeof(PxU8) * 3*hull.mNbHullVertices;
PxMemCopy(m, hull.getVertexData8(), sizeof(PxU8) * numPolyVertices);
//m += sizeof(PxU8) * numPolyVertices;
}
static PxgMeshTextureData layOutTriMesh(void* mem, const TriangleMesh& triMesh, CUstream stream)
{
const PxU32 numTris = triMesh.getNbTrianglesFast();
const PxU32 numVerts = triMesh.getNbVerticesFast();
BV32Tree* bv32Tree = triMesh.mGRB_BV32Tree;
PxU8* m = (PxU8*) mem;
*((uint4*)m) = make_uint4(triMesh.getNbVerticesFast(), triMesh.getNbTrianglesFast(), 0, bv32Tree->mNbPackedNodes);
m += sizeof(uint4);
//Midphase
PxMemCopy(m, bv32Tree->mPackedNodes, sizeof(BV32DataPacked) * bv32Tree->mNbPackedNodes);
m += sizeof(BV32DataPacked) * bv32Tree->mNbPackedNodes;
PX_ASSERT(bv32Tree->mNbPackedNodes > 0);
// Core: Mesh data
//construct gpu friendly verts
float4* grbVerts = reinterpret_cast<float4*>(m);
const PxVec3* verts = triMesh.getVerticesFast();
for (PxU32 i = 0; i < numVerts; ++i)
{
grbVerts[i].x = verts[i].x;
grbVerts[i].y = verts[i].y;
grbVerts[i].z = verts[i].z;
grbVerts[i].w = 0.f;
}
m += numVerts * sizeof(float4);
uint4* grbTriInd = reinterpret_cast<uint4*>(m);
//copy triangle indices
if (triMesh.has16BitIndices())
{
const PxU16* triInds = reinterpret_cast<PxU16*>(triMesh.mGRB_triIndices);
for (PxU32 i = 0; i < numTris; ++i)
{
grbTriInd[i].x = triInds[3 * i + 0];
grbTriInd[i].y = triInds[3 * i + 1];
grbTriInd[i].z = triInds[3 * i + 2];
grbTriInd[i].w = 0;
}
}
else
{
const PxU32* triInds = reinterpret_cast<PxU32*>(triMesh.mGRB_triIndices);
for (PxU32 i = 0; i < numTris; ++i)
{
grbTriInd[i].x = triInds[3 * i + 0];
grbTriInd[i].y = triInds[3 * i + 1];
grbTriInd[i].z = triInds[3 * i + 2];
grbTriInd[i].w = 0;
}
}
//PxMemCopy(m, triMesh->mGRB_triIndices, numTris * sizeof(uint4));
m += numTris * sizeof(uint4);
//Adjacencies data for contact gen
PxMemCopy(m, triMesh.mGRB_triAdjacencies, numTris * sizeof(uint4));
m += numTris * sizeof(uint4);
//GPU to CPU remap table
PxMemCopy(m, triMesh.mGRB_faceRemap, numTris * sizeof(PxU32));
m += numTris * sizeof(PxU32);
//GPU to CPU remap table
PxMemCopy(m, triMesh.mAccumulatedTrianglesRef, numVerts * sizeof(PxU32));
reinterpret_cast<PxU32*>(m)[numVerts] = triMesh.mNbTrianglesReferences;
m += (numVerts + 1) * sizeof(PxU32);
//GPU to CPU remap table
PxMemCopy(m, triMesh.mTrianglesReferences, (numTris*3) * sizeof(PxU32));
m += (numTris * 3) * sizeof(PxU32);
// Make sure m is 16-byte aligned
m = (PxU8*)(((size_t)m + 15) & ~15);
// Copy sdf values
const SDF& sdfData = triMesh.getSdfDataFast();
*((uint4*)m) = make_uint4(sdfData.mDims.x, sdfData.mDims.y, sdfData.mDims.z, triMesh.getPreferSDFProjection() ? 1 : 0);
m += sizeof(uint4);
const PxU32 numSdfs = sdfData.mNumSdfs;
PxgMeshTextureData result;
PxMemZero(&result, sizeof(result));
if (numSdfs)
{
const PxVec3 meshLower = sdfData.mMeshLower;
const PxReal spacing = sdfData.mSpacing;
*((float4*)m) = make_float4(meshLower.x, meshLower.y, meshLower.z, spacing);
m += sizeof(float4);
*((uint4*)m) = make_uint4(sdfData.mSubgridSize, reinterpret_cast<const PxU32&>(sdfData.mSubgridsMinSdfValue), reinterpret_cast<const PxU32&>(sdfData.mSubgridsMaxSdfValue), sdfData.mNumStartSlots);
m += sizeof(uint4);
CUtexObject* pTexture = (CUtexObject*)m;
m += sizeof(CUtexObject);
CUtexObject* pTextureSubgrids = (CUtexObject*)m;
m += sizeof(CUtexObject);
if (sdfData.mSubgridSize > 0)
PxMemCopy(m, sdfData.mSubgridStartSlots, sdfData.mNumStartSlots * sizeof(PxU32));
m += sizeof(PxU32) * sdfData.mNumStartSlots;
if (sdfData.mSubgridSize == 0)
{
CUarray cuArray;
createTextureObject<PxReal>(CU_AD_FORMAT_FLOAT, pTexture, cuArray, sdfData.mDims.x, sdfData.mDims.y, sdfData.mDims.z, sdfData.mSdf, stream);
result.cuArray = cuArray;
result.cuTexRef = *pTexture;
result.cuArraySubgrids = 0;
result.cuTexRefSubgrids = 0;
}
else
{
PxU32 x = sdfData.mDims.x / sdfData.mSubgridSize;
PxU32 y = sdfData.mDims.y / sdfData.mSubgridSize;
PxU32 z = sdfData.mDims.z / sdfData.mSubgridSize;
CUarray cuArray;
createTextureObject<PxReal>(CU_AD_FORMAT_FLOAT, pTexture, cuArray, x + 1, y + 1, z + 1, sdfData.mSdf, stream);
CUarray cuArraySubgrids = 0;
switch(sdfData.mBytesPerSparsePixel)
{
case 1:
createTextureObject<PxU8>(CU_AD_FORMAT_UNSIGNED_INT8, pTextureSubgrids, cuArraySubgrids,
sdfData.mSdfSubgrids3DTexBlockDim.x * (sdfData.mSubgridSize + 1),
sdfData.mSdfSubgrids3DTexBlockDim.y * (sdfData.mSubgridSize + 1),
sdfData.mSdfSubgrids3DTexBlockDim.z * (sdfData.mSubgridSize + 1), sdfData.mSubgridSdf, stream);
break;
case 2:
createTextureObject<PxU16>(CU_AD_FORMAT_UNSIGNED_INT16, pTextureSubgrids, cuArraySubgrids,
sdfData.mSdfSubgrids3DTexBlockDim.x * (sdfData.mSubgridSize + 1),
sdfData.mSdfSubgrids3DTexBlockDim.y * (sdfData.mSubgridSize + 1),
sdfData.mSdfSubgrids3DTexBlockDim.z * (sdfData.mSubgridSize + 1), reinterpret_cast<PxU16*>(sdfData.mSubgridSdf), stream);
break;
case 4:
createTextureObject<PxReal>(CU_AD_FORMAT_FLOAT, pTextureSubgrids, cuArraySubgrids,
sdfData.mSdfSubgrids3DTexBlockDim.x * (sdfData.mSubgridSize + 1),
sdfData.mSdfSubgrids3DTexBlockDim.y * (sdfData.mSubgridSize + 1),
sdfData.mSdfSubgrids3DTexBlockDim.z * (sdfData.mSubgridSize + 1), reinterpret_cast<PxReal*>(sdfData.mSubgridSdf), stream);
break;
}
result.cuArray = cuArray;
result.cuTexRef = *pTexture;
if (cuArraySubgrids)
{
result.cuArraySubgrids = cuArraySubgrids;
result.cuTexRefSubgrids = *pTextureSubgrids;
}
else
{
result.cuArraySubgrids = 0;
result.cuTexRefSubgrids = 0;
}
}
}
return result;
}
static void layOutHeightfield(void* mem, const HeightFieldData& hf)
{
PxU8* m = (PxU8*) mem;
PxU32 nbRows = hf.rows;
PxU32 nbCols = hf.columns;
*((PxU32* ) m) = nbRows;
m += sizeof(PxU32);
*((PxU32* ) m) = nbCols;
m += sizeof(PxU32);
PX_ASSERT(sizeof(PxU32) == sizeof(PxHeightFieldSample));
PxMemCopy(m, hf.samples, sizeof(PxU32) * nbRows * nbCols);
m += sizeof(PxU32) * nbRows * nbCols;
*((PxU16*)m) = hf.flags;
}
template<typename T>
static void createTextureObject(CUarray_format format, CUtexObject*& texture, CUarray& cuArray, PxU32 width, PxU32 height, PxU32 depth, const T* data, CUstream stream)
{
if (width == 0 || height == 0 || depth == 0)
{
texture = 0;
cuArray = 0;
return;
}
CUDA_ARRAY3D_DESCRIPTOR_st arrDesc;
arrDesc.Format = format;
arrDesc.NumChannels = 1;
arrDesc.Width = width;
arrDesc.Height = height;
arrDesc.Depth = depth;
arrDesc.Flags = 0;
CUresult r = cuArray3DCreate(&cuArray, &arrDesc);
PX_UNUSED(r);
PX_ASSERT(r == CUDA_SUCCESS);
CUDA_MEMCPY3D copyParam;
PxMemZero(&copyParam, sizeof(copyParam));
copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY;
copyParam.dstArray = cuArray;
copyParam.srcMemoryType = CU_MEMORYTYPE_HOST;
copyParam.srcHost = data;
copyParam.srcPitch = width * sizeof(T);
copyParam.srcHeight = height;
copyParam.WidthInBytes = copyParam.srcPitch;
copyParam.Height = height;
copyParam.Depth = depth;
cuMemcpy3DAsync(&copyParam, stream);
CUDA_RESOURCE_DESC resDesc;
PxMemZero(&resDesc, sizeof(resDesc));
resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
resDesc.res.array.hArray = cuArray;
CUDA_TEXTURE_DESC texDesc;
PxMemZero(&texDesc, sizeof(texDesc));
texDesc.addressMode[0] = CU_TR_ADDRESS_MODE_CLAMP;
texDesc.addressMode[1] = CU_TR_ADDRESS_MODE_CLAMP;
texDesc.addressMode[2] = CU_TR_ADDRESS_MODE_CLAMP;
texDesc.filterMode = CU_TR_FILTER_MODE_LINEAR;
r = cuTexObjectCreate(texture, &resDesc, &texDesc, NULL);
PX_ASSERT(r == CUDA_SUCCESS);
}

View File

@@ -0,0 +1,92 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "PxgBroadPhase.h"
namespace physx
{
extern "C" void initNarrowphaseKernels0();
extern "C" void initNarrowphaseKernels1();
extern "C" void initNarrowphaseKernels2();
extern "C" void initNarrowphaseKernels3();
extern "C" void initNarrowphaseKernels4();
extern "C" void initNarrowphaseKernels5();
extern "C" void initNarrowphaseKernels6();
extern "C" void initNarrowphaseKernels7();
extern "C" void initNarrowphaseKernels8();
extern "C" void initNarrowphaseKernels9();
extern "C" void initNarrowphaseKernels10();
extern "C" void initNarrowphaseKernels11();
extern "C" void initNarrowphaseKernels12();
extern "C" void initNarrowphaseKernels13();
extern "C" void initNarrowphaseKernels14();
extern "C" void initNarrowphaseKernels15();
extern "C" void initNarrowphaseKernels16();
extern "C" void initNarrowphaseKernels17();
extern "C" void initNarrowphaseKernels18();
extern "C" void initNarrowphaseKernels19();
extern "C" void initNarrowphaseKernels20();
extern "C" void initNarrowphaseKernels21();
extern "C" void initNarrowphaseKernels22();
extern "C" void initNarrowphaseKernels23();
extern "C" void initNarrowphaseKernels24();
void createPxgNarrowphase()
{
#if !PX_PHYSX_GPU_EXPORTS
//this call is needed to force PhysXNarrowphaseGpu linkage as Static Library!
initNarrowphaseKernels0();
initNarrowphaseKernels1();
initNarrowphaseKernels2();
initNarrowphaseKernels3();
initNarrowphaseKernels4();
initNarrowphaseKernels5();
initNarrowphaseKernels6();
initNarrowphaseKernels7();
initNarrowphaseKernels8();
initNarrowphaseKernels9();
initNarrowphaseKernels10();
initNarrowphaseKernels11();
initNarrowphaseKernels12();
initNarrowphaseKernels13();
initNarrowphaseKernels14();
initNarrowphaseKernels15();
initNarrowphaseKernels16();
initNarrowphaseKernels17();
initNarrowphaseKernels18();
initNarrowphaseKernels19();
initNarrowphaseKernels20();
initNarrowphaseKernels21();
initNarrowphaseKernels22();
initNarrowphaseKernels23();
initNarrowphaseKernels24();
#endif
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,570 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "PxgShapeManager.h"
#include "PxgCopyManager.h"
#include "PxgHeapMemAllocator.h"
#include "PxgCudaUtils.h"
#include "PxNodeIndex.h"
using namespace physx;
PxgShapeManager::PxgShapeManager(PxgHeapMemoryAllocatorManager* heapManager) :
mHeapManager(heapManager),
mHostShapes(PxVirtualAllocator(heapManager->mMappedMemoryAllocators, PxsHeapStats::eNARROWPHASE)),
mHostShapesRemapTable(PxVirtualAllocator(heapManager->mMappedMemoryAllocators, PxsHeapStats::eNARROWPHASE)),
mHostShapeIdTable(PxVirtualAllocator(heapManager->mMappedMemoryAllocators, PxsHeapStats::eNARROWPHASE)),
mHostTransformCacheIdToActorTable(PxVirtualAllocator(heapManager->mMappedMemoryAllocators, PxsHeapStats::eNARROWPHASE)),
mGpuShapesBuffer(heapManager, PxsHeapStats::eNARROWPHASE),
mGpuShapesRemapTableBuffer(heapManager, PxsHeapStats::eNARROWPHASE),
mGpuTransformCacheIdToActorTableBuffer(heapManager, PxsHeapStats::eNARROWPHASE),
mGpuRigidIndiceBuffer(heapManager, PxsHeapStats::eNARROWPHASE),
mGpuShapeIndiceBuffer(heapManager, PxsHeapStats::eNARROWPHASE),
mGpuUnsortedShapeIndicesBuffer(heapManager, PxsHeapStats::eNARROWPHASE),
mGpuTempRigidBitIndiceBuffer(heapManager, PxsHeapStats::eNARROWPHASE),
mGpuTempRigidIndiceBuffer(heapManager, PxsHeapStats::eNARROWPHASE)
{
//allocate x4
const PxU32 initialSize = 128;
mHostShapes.forceSize_Unsafe(0);
mHostShapes.reserve(initialSize);
mHostShapes.forceSize_Unsafe(initialSize);
mHostShapesRemapTable.forceSize_Unsafe(0);
mHostShapesRemapTable.reserve(initialSize);
mHostShapesRemapTable.forceSize_Unsafe(initialSize);
mHostShapeIdTable.forceSize_Unsafe(0);
mHostShapeIdTable.reserve(initialSize);
mHostShapeIdTable.forceSize_Unsafe(initialSize);
mHostTransformCacheIdToActorTable.forceSize_Unsafe(0);
mHostTransformCacheIdToActorTable.reserve(initialSize);
mHostTransformCacheIdToActorTable.forceSize_Unsafe(initialSize);
mGpuShapesBuffer.allocate(sizeof(PxgShape)*initialSize, PX_FL);
mGpuShapesRemapTableBuffer.allocate(sizeof(PxNodeIndex) * initialSize, PX_FL);
mGpuTransformCacheIdToActorTableBuffer.allocate(sizeof(PxActor*) * initialSize, PX_FL);
mGpuRigidIndiceBuffer.allocate(sizeof(PxNodeIndex) * initialSize, PX_FL);
mGpuShapeIndiceBuffer.allocate(sizeof(PxU32) * initialSize, PX_FL);
mGpuUnsortedShapeIndicesBuffer.allocate(sizeof(PxU32) * initialSize, PX_FL);
mGpuTempRigidBitIndiceBuffer.allocate(sizeof(PxU32) * initialSize, PX_FL);
mGpuTempRigidIndiceBuffer.allocate(sizeof(PxNodeIndex) * initialSize, PX_FL);
mResizeRequired = false;
mTransformCacheResizeRequired = false;
mMaxShapeId = -1;
mMaxTransformCacheID = -1;
mHasShapeChanged = false;
mHasShapeInstanceChanged = false;
}
void PxgShapeManager::initialize(PxCudaContext* cudaContext, CUstream stream)
{
cudaContext->memsetD32Async(mGpuShapesRemapTableBuffer.getDevicePtr(), 0xFFFFFFFF, mGpuShapesRemapTableBuffer.getSize()/sizeof(PxU32), stream);
cudaContext->memsetD32Async(mGpuRigidIndiceBuffer.getDevicePtr(), 0xFFFFFFFF, mGpuRigidIndiceBuffer.getSize() / sizeof(PxU32), stream);
cudaContext->memsetD32Async(mGpuShapeIndiceBuffer.getDevicePtr(), 0xFFFFFFFF, mGpuShapeIndiceBuffer.getSize() / sizeof(PxU32), stream);
cudaContext->memsetD32Async(mGpuUnsortedShapeIndicesBuffer.getDevicePtr(), 0xFFFFFFFF, mGpuUnsortedShapeIndicesBuffer.getSize() / sizeof(PxU32), stream);
}
PxU32 PxgShapeManager::registerShape(PxgShape& shape)
{
const PxU32 shapeId = mIdPool.getNewID();
if (shapeId >= mHostShapes.capacity())
{
mResizeRequired = true;
const PxU32 capacity = shapeId * 2;
//make sure capacity is x4 because we need to use radix sort to sort shape id based on rigid body index later
const PxU32 tempCapacity = (capacity + 3)&(~3);
mHostShapes.resize(tempCapacity);
mDirtyShapeMap.resize(tempCapacity);
}
mHostShapes[shapeId] = shape;
mDirtyShapeMap.growAndSet(shapeId);
mMaxShapeId = PxMax(PxI32(shapeId), mMaxShapeId);
mHasShapeChanged = true;
return shapeId;
}
void PxgShapeManager::registerShapeInstance(const PxNodeIndex& nodeIndex, const PxU32 transformCacheID, PxActor* actor, bool aggregate)
{
if (transformCacheID >= mHostShapesRemapTable.capacity())
{
const PxU32 capacity = transformCacheID*2;
//make sure capacity is x4 because we need to use radix sort to sort shape id based on rigid body index later
const PxU32 tempCapacity = (capacity + 3)&(~3);
mTransformCacheResizeRequired = true;
mHostShapesRemapTable.resize(tempCapacity);
mHostShapeIdTable.resize(tempCapacity);
mHostTransformCacheIdToActorTable.resize(tempCapacity);
mDirtyTransformCacheMap.resize(tempCapacity);
}
mHostShapesRemapTable[transformCacheID] = nodeIndex;
mHostShapeIdTable[transformCacheID] = aggregate? 0xffffffff : transformCacheID;
mHostTransformCacheIdToActorTable[transformCacheID] = aggregate ? NULL : actor;
mHasShapeInstanceChanged = true;
mDirtyTransformCacheMap.growAndSet(transformCacheID);
mMaxTransformCacheID = PxMax(PxI32(transformCacheID), mMaxTransformCacheID);
}
void PxgShapeManager::unregisterShape(const PxU32 id)
{
mDirtyShapeMap.reset(id);
mIdPool.deferredFreeID(id);
mHasShapeChanged = true;
}
void PxgShapeManager::unregisterShapeInstance(const PxU32 transformCacheID)
{
mDirtyTransformCacheMap.set(transformCacheID);
mHostShapesRemapTable[transformCacheID] = PxNodeIndex(PX_INVALID_NODE);
mHostShapeIdTable[transformCacheID] = 0xffffffff;
mHostTransformCacheIdToActorTable[transformCacheID] = NULL;
mHasShapeInstanceChanged = true;
}
void PxgShapeManager::scheduleCopyHtoD(PxgCopyManager& copyManager, PxCudaContext* cudaContext, CUstream stream)
{
PX_UNUSED(copyManager);
const PxU32 maxGrouping = 16;
if (mHasShapeChanged)
{
mHasShapeChanged = false;
if (mResizeRequired)
{
//Allocate and copy data across
mGpuShapesBuffer.allocateCopyOldDataAsync(sizeof(PxgShape)*mHostShapes.capacity(), cudaContext, stream, PX_FL);
mResizeRequired = false;
}
const PxU32* bits = mDirtyShapeMap.getWords();
if (bits)
{
const PxU32 totalNumOfShapes = mMaxShapeId + 1;
const PxU32 numShapes = (totalNumOfShapes + 3) &(~3);
//make sure the dirty shape map cover x4 case and set those to invalid value
for (PxU32 i = totalNumOfShapes; i < numShapes; ++i)
{
mDirtyShapeMap.growAndSet(i);
}
// PT: ### bitmap iterator pattern
const PxU32 lastSetBit = mDirtyShapeMap.findLast();
for (PxU32 w = 0; w <= lastSetBit >> 5; ++w)
{
//b&=b-1 will clear the lowest set bit in b
for (PxU32 b = bits[w]; b; )
{
//dirtyId is the next bit that's set to 1!
const PxU32 dirtyId = PxU32(w << 5 | PxLowestSetBit(b));
void* hostPtr = mHostShapes.begin() + dirtyId;
PxgCopyManager::CopyDesc desc;
desc.source = reinterpret_cast<size_t>(getMappedDevicePtr(cudaContext, hostPtr));
desc.dest = reinterpret_cast<size_t>(reinterpret_cast<PxU8*>(mGpuShapesBuffer.getDevicePtr()) + dirtyId * sizeof(PxgShape));
desc.bytes = sizeof(PxgShape);
mDirtyShapeMap.reset(dirtyId);
//Now we loop to try and find adjacent bits that are set...
PxU32 currIdx = dirtyId + 1;
PxU32 groupSize = 1;
while (currIdx <= lastSetBit && mDirtyShapeMap.test(currIdx) && groupSize < maxGrouping)
{
groupSize++;
mDirtyShapeMap.reset(currIdx);
currIdx++;
desc.bytes += sizeof(PxgShape);
}
if (currIdx != (dirtyId + 1))
{
//get the word from the current bit
w = PxMin(currIdx, lastSetBit) >> 5;
//reload the world
b = bits[w]; //Set a 1 here to make sure the b &= (b-1) in the for loop doesn't remove the current bit we're interested in
}
else
{
b &= (b - 1);
}
copyManager.pushDeferredHtoD(desc);
}
}
}
mDirtyShapeMap.clear();
}
if (mHasShapeInstanceChanged)
{
//AD: mHasShapeInstanceChanged needs to persist because computeRigidsToShapes() needs to run if we use direct-API
// we lower the flag in PxgNarrowphaseCore::prepareGpuNarrowphase.
// AD: the resize of the GPU transform cache is inside PxgNarrowphaseCore::prepareGpuNarrowphase.
if (mTransformCacheResizeRequired)
{
PxU64 oldCapacity = mGpuShapesRemapTableBuffer.getSize();
mGpuShapesRemapTableBuffer.allocateCopyOldDataAsync(sizeof(PxNodeIndex)*mHostShapesRemapTable.capacity(), cudaContext, stream, PX_FL);
cudaContext->memsetD32Async(mGpuShapesRemapTableBuffer.getDevicePtr() + oldCapacity, 0xFFFFFFFF, (mGpuShapesRemapTableBuffer.getSize() - oldCapacity) / sizeof(PxU32), stream);
oldCapacity = mGpuRigidIndiceBuffer.getSize();
mGpuRigidIndiceBuffer.allocateCopyOldDataAsync(sizeof(PxNodeIndex) * mHostShapesRemapTable.capacity(), cudaContext, stream, PX_FL);
cudaContext->memsetD32Async(mGpuRigidIndiceBuffer.getDevicePtr() + oldCapacity, 0xFFFFFFFF, (mGpuRigidIndiceBuffer.getSize() - oldCapacity) / sizeof(PxU32), stream);
mGpuTempRigidIndiceBuffer.allocate(sizeof(PxNodeIndex) * mHostShapesRemapTable.capacity(), PX_FL);
oldCapacity = mGpuShapeIndiceBuffer.getSize();
mGpuShapeIndiceBuffer.allocateCopyOldDataAsync(sizeof(PxU32) * mHostShapeIdTable.capacity(), cudaContext, stream, PX_FL);
cudaContext->memsetD32Async(mGpuShapeIndiceBuffer.getDevicePtr() + oldCapacity, 0xFFFFFFFF, (mGpuShapeIndiceBuffer.getSize() - oldCapacity) / sizeof(PxU32), stream);
oldCapacity = mGpuUnsortedShapeIndicesBuffer.getSize();
mGpuUnsortedShapeIndicesBuffer.allocateCopyOldDataAsync(sizeof(PxU32) * mHostShapeIdTable.capacity(), cudaContext, stream, PX_FL);
cudaContext->memsetD32Async(mGpuUnsortedShapeIndicesBuffer.getDevicePtr() + oldCapacity, 0xFFFFFFFF, (mGpuUnsortedShapeIndicesBuffer.getSize() - oldCapacity) / sizeof(PxU32), stream);
mGpuTempRigidBitIndiceBuffer.allocate(sizeof(PxU32) * mHostShapeIdTable.capacity(), PX_FL);
oldCapacity = mGpuTransformCacheIdToActorTableBuffer.getSize();
mGpuTransformCacheIdToActorTableBuffer.allocateCopyOldDataAsync(sizeof(PxActor*) * mHostTransformCacheIdToActorTable.capacity(), cudaContext, stream, PX_FL);
cudaContext->memsetD32Async(mGpuTransformCacheIdToActorTableBuffer.getDevicePtr() + oldCapacity, 0, (mGpuTransformCacheIdToActorTableBuffer.getSize() - oldCapacity) / sizeof(PxActor*), stream);
mTransformCacheResizeRequired = false;
}
const PxU32 totalNumOfShapeInstances = mMaxTransformCacheID + 1;
const PxU32 numShapeInstances = (totalNumOfShapeInstances + 3) &(~3);
//make sure the dirty shape map cover x4 case and set those to invalid value
for (PxU32 i = totalNumOfShapeInstances; i < numShapeInstances; ++i)
{
if (!mHostShapesRemapTable[i].isStaticBody())
{
mDirtyTransformCacheMap.growAndSet(i);
mHostShapesRemapTable[i] = PxNodeIndex(PX_INVALID_NODE);
mHostShapeIdTable[i] = 0xffffffff;
mHostTransformCacheIdToActorTable[i] = NULL;
}
}
const PxU32* bits = mDirtyTransformCacheMap.getWords();
if (bits)
{
// PT: ### bitmap iterator pattern
const PxU32 lastSetBit = mDirtyTransformCacheMap.findLast();
for (PxU32 w = 0; w <= lastSetBit >> 5; ++w)
{
//b&=b-1 will clear the lowest set bit in b
for (PxU32 b = bits[w]; b; )
{
//dirtyId is the next bit that's set to 1!
const PxU32 dirtyId = PxU32(w << 5 | PxLowestSetBit(b));
void* hostRemapPtr = mHostShapesRemapTable.begin() + dirtyId;
void* hostShapeIdPtr = mHostShapeIdTable.begin() + dirtyId;
void* hostTransformCacheIdToActorPtr = mHostTransformCacheIdToActorTable.begin() + dirtyId;
PxgCopyManager::CopyDesc desc1;
desc1.source = reinterpret_cast<size_t>(getMappedDevicePtr(cudaContext, hostRemapPtr));
desc1.dest = reinterpret_cast<size_t>(reinterpret_cast<PxU8*>(mGpuShapesRemapTableBuffer.getDevicePtr()) + dirtyId * sizeof(PxNodeIndex));
desc1.bytes = sizeof(PxNodeIndex);
PxgCopyManager::CopyDesc desc2;
desc2.source = reinterpret_cast<size_t>(getMappedDevicePtr(cudaContext, hostRemapPtr));
desc2.dest = reinterpret_cast<size_t>(reinterpret_cast<PxU8*>(mGpuRigidIndiceBuffer.getDevicePtr()) + dirtyId * sizeof(PxNodeIndex));
desc2.bytes = sizeof(PxNodeIndex);
PxgCopyManager::CopyDesc desc3;
desc3.source = reinterpret_cast<size_t>(getMappedDevicePtr(cudaContext, hostShapeIdPtr));
desc3.dest = reinterpret_cast<size_t>(reinterpret_cast<PxU8*>(mGpuUnsortedShapeIndicesBuffer.getDevicePtr()) + dirtyId * sizeof(PxU32));
desc3.bytes = sizeof(PxU32);
PxgCopyManager::CopyDesc desc4;
desc4.source = reinterpret_cast<size_t>(getMappedDevicePtr(cudaContext, hostTransformCacheIdToActorPtr));
desc4.dest = reinterpret_cast<size_t>(reinterpret_cast<PxU8*>(mGpuTransformCacheIdToActorTableBuffer.getDevicePtr()) + dirtyId * sizeof(PxActor*));
desc4.bytes = sizeof(PxActor*);
mDirtyTransformCacheMap.reset(dirtyId);
//Now we loop to try and find adjacent bits that are set...
PxU32 currIdx = dirtyId + 1;
PxU32 groupSize = 1;
while (currIdx <= lastSetBit && mDirtyTransformCacheMap.test(currIdx) && groupSize < maxGrouping)
{
groupSize++;
mDirtyTransformCacheMap.reset(currIdx);
currIdx++;
desc1.bytes += sizeof(PxNodeIndex);
desc2.bytes += sizeof(PxNodeIndex);
desc3.bytes += sizeof(PxU32);
desc4.bytes += sizeof(PxActor*);
}
if (currIdx != (dirtyId + 1))
{
//get the word from the current bit
w = PxMin(currIdx, lastSetBit) >> 5;
//reload the world
b = bits[w]; //Set a 1 here to make sure the b &= (b-1) in the for loop doesn't remove the current bit we're interested in
}
else
{
b &= (b - 1);
}
copyManager.pushDeferredHtoD(desc1);
copyManager.pushDeferredHtoD(desc2);
copyManager.pushDeferredHtoD(desc3);
copyManager.pushDeferredHtoD(desc4);
}
}
}
mDirtyTransformCacheMap.clear();
}
}
void PxgShapeManager::updateShapeMaterial(const PxU32 materialIndex, const PxU32 id)
{
PX_ASSERT(id < mHostShapes.size());
mHostShapes[id].materialIndex = materialIndex;
mDirtyShapeMap.growAndSet(id);
mHasShapeChanged = true;
}
////////////////////////////////////////////////////////////////////////////////////////////
PxgMaterialManager::PxgMaterialManager(PxgHeapMemoryAllocatorManager* heapManager, const PxU32 elemSize) :
mGpuMaterialBuffer(heapManager, PxsHeapStats::eNARROWPHASE),
mHeapManager(heapManager),
mHostMaterial(PxVirtualAllocator(heapManager->mMappedMemoryAllocators, PxsHeapStats::eNARROWPHASE))
{
const PxU32 originalSize = elemSize * 128;
mHostMaterial.forceSize_Unsafe(0);
mHostMaterial.reserve(originalSize);
mHostMaterial.forceSize_Unsafe(originalSize);
mGpuMaterialBuffer.allocate(originalSize, PX_FL);
mResizeRequired = false;
}
PxU32 PxgMaterialManager::registerMaterial(const PxU8* materialData, const PxU32 elemSize)
{
const PxU32 shapeId = mIdPool.getNewID();
PxU32 capacity = mHostMaterial.capacity() / elemSize;
if (shapeId >= capacity)
{
capacity = PxMax(capacity * 2 + 1, shapeId + 1);
mHostMaterial.resize(capacity * elemSize);
mResizeRequired = true;
}
PxU8* destPtr = mHostMaterial.begin() + shapeId * elemSize;
PxMemCopy(destPtr, materialData, elemSize);
mDirtyMaterialMap.growAndSet(shapeId);
return shapeId;
}
void PxgMaterialManager::unregisterMaterial(const PxU32 id)
{
mDirtyMaterialMap.reset(id);
mIdPool.deferredFreeID(id);
}
void PxgMaterialManager::scheduleCopyHtoD(PxgCopyManager& copyManager, PxCudaContext* cudaContext,
CUstream stream, const PxU32 elemSize)
{
if (mResizeRequired)
{
mGpuMaterialBuffer.allocateCopyOldDataAsync(mHostMaterial.capacity(), cudaContext, stream, PX_FL);
mResizeRequired = false;
}
const PxU32* bits = mDirtyMaterialMap.getWords();
const PxU32 maxGrouping = 16;
if (bits)
{
// PT: ### bitmap iterator pattern
const PxU32 lastSetBit = mDirtyMaterialMap.findLast();
for (PxU32 w = 0; w <= lastSetBit >> 5; ++w)
{
//b&=b-1 will clear the lowest set bit in b
for (PxU32 b = bits[w]; b; )
{
//dirtyId is the next bit that's set to 1!
const PxU32 dirtyId = PxU32(w << 5 | PxLowestSetBit(b));
void* hostPtr = mHostMaterial.begin() + dirtyId * elemSize;
PxgCopyManager::CopyDesc desc;
desc.source = reinterpret_cast<size_t>(getMappedDevicePtr(cudaContext, hostPtr));
desc.dest = reinterpret_cast<size_t>(reinterpret_cast<PxU8*>(mGpuMaterialBuffer.getDevicePtr()) + dirtyId * elemSize);
desc.bytes = elemSize;
mDirtyMaterialMap.reset(dirtyId);
//Now we loop to try and find adjacent bits that are set...
PxU32 currIdx = dirtyId + 1;
PxU32 groupSize = 1;
while (currIdx <= lastSetBit && mDirtyMaterialMap.test(currIdx) && (groupSize < maxGrouping))
{
groupSize++;
mDirtyMaterialMap.reset(currIdx);
currIdx++;
desc.bytes += elemSize;
}
if (currIdx != (dirtyId + 1))
{
//get the word from the current bit
w = PxMin(currIdx, lastSetBit) >> 5;
//reload the world
b = bits[w]; //Set a 1 here to make sure the b &= (b-1) in the for loop doesn't remove the current bit we're interested in
}
else
{
b &= (b - 1);
}
copyManager.pushDeferredHtoD(desc);
}
}
}
mDirtyMaterialMap.clear();
}
void PxgMaterialManager::updateMaterial(const PxU8* materialCore, const PxU32 elemSize, const PxU32 id)
{
PX_ASSERT(id < mHostMaterial.size());
PxU8* destptr = reinterpret_cast<PxU8*>(mHostMaterial.begin() + id * elemSize);
PxMemCopy(destptr, materialCore, elemSize);
//mHostMaterial[id] = materialCore;
mDirtyMaterialMap.growAndSet(id);
}
//////////////////////////////////////////////////////////////////////////////////////////////
PxgFEMMaterialManager::PxgFEMMaterialManager(PxgHeapMemoryAllocatorManager* heapManager, const PxU32 elemSize) :
PxgMaterialManager(heapManager, elemSize)
{
}
void PxgFEMMaterialManager::scheduleCopyHtoD(PxgCopyManager& copyManager, PxCudaContext* cudaContext,
CUstream stream, const PxU32 elemSize)
{
if (mResizeRequired)
{
mGpuMaterialBuffer.allocateCopyOldDataAsync(mHostMaterial.capacity(), cudaContext, stream, PX_FL);
mResizeRequired = false;
}
const PxU32* bits = mDirtyMaterialMap.getWords();
const PxU32 maxGrouping = 16;
if (bits)
{
// PT: ### bitmap iterator pattern
const PxU32 lastSetBit = mDirtyMaterialMap.findLast();
for (PxU32 w = 0; w <= lastSetBit >> 5; ++w)
{
//b&=b-1 will clear the lowest set bit in b
for (PxU32 b = bits[w]; b; )
{
//dirtyId is the next bit that's set to 1!
const PxU32 dirtyId = PxU32(w << 5 | PxLowestSetBit(b));
void* hostPtr = mHostMaterial.begin() + dirtyId * elemSize;
PxgCopyManager::CopyDesc desc;
desc.source = reinterpret_cast<size_t>(getMappedDevicePtr(cudaContext, hostPtr));
desc.dest = reinterpret_cast<size_t>(reinterpret_cast<PxU8*>(mGpuMaterialBuffer.getDevicePtr()) + dirtyId * elemSize);
desc.bytes = elemSize;
mDirtyMaterialMap.reset(dirtyId);
//Now we loop to try and find adjacent bits that are set...
PxU32 currIdx = dirtyId + 1;
PxU32 groupSize = 1;
while (currIdx <= lastSetBit && mDirtyMaterialMap.test(currIdx) && (groupSize < maxGrouping))
{
groupSize++;
mDirtyMaterialMap.reset(currIdx);
currIdx++;
desc.bytes += elemSize;
}
if (currIdx != (dirtyId + 1))
{
//get the word from the current bit
w = PxMin(currIdx, lastSetBit) >> 5;
//reload the world
b = bits[w]; //Set a 1 here to make sure the b &= (b-1) in the for loop doesn't remove the current bit we're interested in
}
else
{
b &= (b - 1);
}
copyManager.pushDeferredHtoD(desc);
}
}
}
mDirtyMaterialMap.clear();
}
////////////////////////////////////////////////////////////////////////////////////////////
PxgFEMSoftBodyMaterialManager::PxgFEMSoftBodyMaterialManager(PxgHeapMemoryAllocatorManager* heapManager) :
PxgFEMMaterialManager(heapManager, sizeof(PxsDeformableVolumeMaterialData))
{
}