feat(physics): wire physx sdk into build
This commit is contained in:
48
engine/third_party/physx/source/gpunarrowphase/include/PxgContactManager.h
vendored
Normal file
48
engine/third_party/physx/source/gpunarrowphase/include/PxgContactManager.h
vendored
Normal file
@@ -0,0 +1,48 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_CONTACT_MANAGER_H
|
||||
#define PXG_CONTACT_MANAGER_H
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
struct PX_ALIGN_PREFIX(16) PxgContactManagerInput
|
||||
{
|
||||
//Body refs are not needed here. World-space transforms are computed using transforCacheRefs instead!
|
||||
PxU32 shapeRef0; //Ref to shape 0
|
||||
PxU32 shapeRef1; //Ref to shape 1
|
||||
PxU32 transformCacheRef0; //Ref to shape0's transforms in transform cache
|
||||
PxU32 transformCacheRef1; //Ref to shape1's transform in transform cache
|
||||
}
|
||||
PX_ALIGN_SUFFIX(16);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
69
engine/third_party/physx/source/gpunarrowphase/include/PxgConvexConvexShape.h
vendored
Normal file
69
engine/third_party/physx/source/gpunarrowphase/include/PxgConvexConvexShape.h
vendored
Normal file
@@ -0,0 +1,69 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_CONVEX_CONVEX_SHAPE_H
|
||||
#define PXG_CONVEX_CONVEX_SHAPE_H
|
||||
|
||||
#include "geometry/PxMeshScale.h"
|
||||
|
||||
#define PXG_MAX_PCM_CONTACTS 4
|
||||
|
||||
namespace physx
|
||||
{
|
||||
template<PxU32 byteSize>
|
||||
class FlexiblePad
|
||||
{
|
||||
PxU8 pad[byteSize];
|
||||
};
|
||||
|
||||
template<>
|
||||
class FlexiblePad<0>
|
||||
{
|
||||
};
|
||||
|
||||
//ML: PxgShape don't need to have contactOffset because we will dma a separated contact offset array later
|
||||
struct PX_ALIGN_PREFIX(16) PxgShape
|
||||
{
|
||||
PxMeshScale scale; //28
|
||||
PxU32 materialIndex; //32
|
||||
size_t hullOrMeshPtr; //36 or 40
|
||||
|
||||
PxU32 type; //40 or 44
|
||||
PxU32 particleOrSoftbodyId; //44 or 48
|
||||
|
||||
//FlexiblePad<16 - sizeof(size_t) - sizeof(PxU32)> pad; //48
|
||||
#if !PX_P64_FAMILY
|
||||
PxU32 pad0;
|
||||
#endif
|
||||
}
|
||||
PX_ALIGN_SUFFIX(16);
|
||||
|
||||
PX_COMPILE_TIME_ASSERT(sizeof(PxgShape) == 48);
|
||||
}
|
||||
|
||||
#endif
|
||||
218
engine/third_party/physx/source/gpunarrowphase/include/PxgGeometryManager.h
vendored
Normal file
218
engine/third_party/physx/source/gpunarrowphase/include/PxgGeometryManager.h
vendored
Normal file
@@ -0,0 +1,218 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_GEOMETRY_MANAGER_H
|
||||
#define PXG_GEOMETRY_MANAGER_H
|
||||
|
||||
#include "foundation/PxPreprocessor.h"
|
||||
|
||||
#include "foundation/PxArray.h"
|
||||
#include "foundation/PxHashMap.h"
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
|
||||
#include "PxgCopyManager.h"
|
||||
|
||||
#if PX_LINUX && PX_CLANG
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wdocumentation"
|
||||
#pragma clang diagnostic ignored "-Wdisabled-macro-expansion"
|
||||
#endif
|
||||
#include "cuda.h"
|
||||
#if PX_LINUX && PX_CLANG
|
||||
#pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
// AD: PxgGeometryManager manages the CPU/GPU data transfers and the lifetime of collision geometries: Convex Hulls, Trimeshes, SDFs, Heightfields.
|
||||
|
||||
/*
|
||||
Short overview:
|
||||
|
||||
- geometries are added using the add* functions.
|
||||
- during add, device memory is allocated and indices are given to geometries. We need to allocate the device memory here despite not writing to it
|
||||
because the pointer is uploaded to GPU together with the shapeInstance data, which happens before we perform the final copy of the geometry in scheduleCopyHtoD.
|
||||
- We also accumulate the pinned memory requirements for the staging buffer during add.
|
||||
- scheduleCopyHtoD is performing the bulk of the work:
|
||||
- we allocate the proper amount of pinned memory
|
||||
- for each scheduled geometry, we lay it out in the pinned memory
|
||||
- we push the copy descriptor to the copy manager.
|
||||
|
||||
- the pinned memory is deallocated and the queues reset in resetAfterMemCopyCompleted().
|
||||
|
||||
Future things:
|
||||
- look into the sequences of device uploads: we might be able to defer device memory allocations and use pools if we can rearrange things such that
|
||||
we don't need the device pointer upfront.
|
||||
- The pinned memory is contiguous, so the copy manager taking pairs of (src, dest) could potentially be replaced by a linear thing.
|
||||
*/
|
||||
|
||||
namespace physx
|
||||
{
|
||||
|
||||
class PxgHeapMemoryAllocatorManager;
|
||||
class PxgHeapMemoryAllocator;
|
||||
class PxCudaContext;
|
||||
|
||||
namespace Gu
|
||||
{
|
||||
struct ConvexHullData;
|
||||
class TriangleMesh;
|
||||
struct HeightFieldData;
|
||||
};
|
||||
|
||||
template<typename ArrayT>
|
||||
class PxgFreeIndicesProvider
|
||||
{
|
||||
PX_NOCOPY(PxgFreeIndicesProvider)
|
||||
public:
|
||||
PxgFreeIndicesProvider(ArrayT& ref): mDataArray(ref) {}
|
||||
|
||||
PxU32 getFreeIndex()
|
||||
{
|
||||
if (!mFreeIndices.empty())
|
||||
{
|
||||
return mFreeIndices.popBack();
|
||||
}
|
||||
else
|
||||
{
|
||||
PxU32 currSz = mDataArray.size();
|
||||
|
||||
if (currSz + 1 > mDataArray.capacity())
|
||||
{
|
||||
mDataArray.reserve(currSz * 2);
|
||||
}
|
||||
|
||||
mDataArray.resize(currSz + 1);
|
||||
|
||||
return currSz;
|
||||
}
|
||||
}
|
||||
|
||||
void setFreeIndex(PxU32 idx)
|
||||
{
|
||||
PX_ASSERT(idx < mDataArray.size());
|
||||
mDeferredFreeIndices.pushBack(idx);
|
||||
}
|
||||
|
||||
void releaseFreeIndices()
|
||||
{
|
||||
for (PxU32 a = 0; a < mDeferredFreeIndices.size(); ++a)
|
||||
{
|
||||
mFreeIndices.pushBack(mDeferredFreeIndices[a]);
|
||||
}
|
||||
mDeferredFreeIndices.forceSize_Unsafe(0);
|
||||
}
|
||||
|
||||
private:
|
||||
PxArray<PxU32> mDeferredFreeIndices;
|
||||
PxArray<PxU32> mFreeIndices;
|
||||
ArrayT& mDataArray;
|
||||
};
|
||||
|
||||
struct PxgMeshTextureData
|
||||
{
|
||||
CUarray cuArray;
|
||||
CUtexObject cuTexRef;
|
||||
|
||||
CUarray cuArraySubgrids;
|
||||
CUtexObject cuTexRefSubgrids;
|
||||
};
|
||||
|
||||
struct HullOrMeshData
|
||||
{
|
||||
void* mDeviceMemPointer;
|
||||
PxU32 mCopyDescIndex;
|
||||
|
||||
static const PxU32 INVALID_COPY_DESC_INDEX = 0xFFffFFff;
|
||||
};
|
||||
class PxgGeometryManager;
|
||||
// Holds all the data needed to upload a geometry to the GPU. Created when calling add* functions, read during scheduleCopyHtoD.
|
||||
struct ScheduledCopyData
|
||||
{
|
||||
private:
|
||||
// AD this is local, not intended for widespread usage.
|
||||
struct UploadGeometryType
|
||||
{
|
||||
enum Enum
|
||||
{
|
||||
eTRIANGLEMESH = 1,
|
||||
eCONVEXHULL = 2,
|
||||
eHEIGHTFIELD = 3,
|
||||
eBOXHULL = 4
|
||||
};
|
||||
};
|
||||
public:
|
||||
PxgCopyManager::CopyDesc mCopyDesc; //!< Copy descriptor for upload to GPU. lives until copy completed.
|
||||
PxU32 mHullOrTrimeshIdx; //!< Index into mGeometrydata
|
||||
UploadGeometryType::Enum mType; //!< Geometry type: see UploadGeometryType. Needed for deferred pinned memory allocation.
|
||||
PxU32 mNumPolyVertices; //!< Number of polygon vertices in case this is a convex hull
|
||||
const void* mGeometryPtr; //!< Pointer to CPU geometry data.
|
||||
friend class PxgGeometryManager;
|
||||
};
|
||||
|
||||
class PxgGeometryManager
|
||||
{
|
||||
PX_NOCOPY(PxgGeometryManager)
|
||||
using UploadGeometryType = ScheduledCopyData::UploadGeometryType;
|
||||
|
||||
public:
|
||||
PxgGeometryManager(PxgHeapMemoryAllocatorManager* heapMemoryManager);
|
||||
|
||||
virtual ~PxgGeometryManager();
|
||||
|
||||
PxU32 addHull(const Gu::ConvexHullData& hull);
|
||||
PxU32 addTriMesh(const Gu::TriangleMesh& triMesh);
|
||||
PxU32 addHeightfield(const Gu::HeightFieldData& hf);
|
||||
void addBoxHull();
|
||||
|
||||
void removeGeometry(PxU32 idx);
|
||||
|
||||
void scheduleCopyHtoD(PxgCopyManager& copyMan, PxCudaContext& cudaContext, CUstream stream);
|
||||
CUdeviceptr getGeometryDevPtrByIndex(PxU32 idx) const;
|
||||
CUdeviceptr getBoxHullDevPtr() const;
|
||||
|
||||
void resetAfterMemcpyCompleted();
|
||||
|
||||
private:
|
||||
PxU32 addGeometryInternal(PxU64 byteSize, const void* geomPtr, UploadGeometryType::Enum type, PxU32 numPolyVertices = 0);
|
||||
|
||||
PxgHeapMemoryAllocator* mDeviceMemoryAllocator; //device memory
|
||||
PxgHeapMemoryAllocator* mPinnedMemoryAllocator; //mapped memory
|
||||
void* mPinnedMemoryBasePtr; //stores the pointer to the current pinned memory allocation.
|
||||
PxU64 mPinnedHostMemoryRequirements; // accumulates pinned memory requirements for copies.
|
||||
|
||||
PxArray<ScheduledCopyData> mScheduledCopies; // copies in current step, cleared after scheduleCopyHtoD
|
||||
|
||||
PxArray<HullOrMeshData> mGeometryData; //persistent geometry data, holds idx -> device pointer mapping
|
||||
PxgFreeIndicesProvider<PxArray<HullOrMeshData> > mFreeGeometryIndices; //freeIndex provider for mGeometry.
|
||||
|
||||
PxU32 mBoxHullIdx; //index into mGeometryData for hardcoded convex hull for box.
|
||||
|
||||
PxHashMap<void*, PxgMeshTextureData> mMeshToTextureMap; //maps meshes to SDF textures for lifetime management
|
||||
};
|
||||
};
|
||||
|
||||
#endif
|
||||
39
engine/third_party/physx/source/gpunarrowphase/include/PxgNarrowphase.h
vendored
Normal file
39
engine/third_party/physx/source/gpunarrowphase/include/PxgNarrowphase.h
vendored
Normal file
@@ -0,0 +1,39 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_NARROWPHASE_H
|
||||
#define PXG_NARROWPHASE_H
|
||||
|
||||
namespace physx
|
||||
{
|
||||
|
||||
//this is needed to force PhysXNarrowphaseGpu linkage as Static Library!
|
||||
void createPxgNarrowphase();
|
||||
}
|
||||
|
||||
#endif
|
||||
817
engine/third_party/physx/source/gpunarrowphase/include/PxgNarrowphaseCore.h
vendored
Normal file
817
engine/third_party/physx/source/gpunarrowphase/include/PxgNarrowphaseCore.h
vendored
Normal file
@@ -0,0 +1,817 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_NARROWPHASE_CORE_H
|
||||
#define PXG_NARROWPHASE_CORE_H
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxTransform.h"
|
||||
#include "foundation/PxPinnedArray.h"
|
||||
#include "foundation/PxUserAllocated.h"
|
||||
#include "task/PxTask.h"
|
||||
#include "geometry/PxMeshScale.h"
|
||||
|
||||
#include "foundation/PxMutex.h"
|
||||
#include "PxsContactManagerState.h"
|
||||
#include "PxgContactManager.h"
|
||||
#include "PxgPersistentContactManifold.h"
|
||||
#include "PxgCudaBuffer.h"
|
||||
#include "cudaNpCommon.h"
|
||||
|
||||
#include "PxgCudaPagedLinearAllocator.h"
|
||||
#include "PxgCopyManager.h"
|
||||
#include "PxgConvexConvexShape.h"
|
||||
#include "PxgGeometryManager.h"
|
||||
#include "PxvNphaseImplementationContext.h"
|
||||
#include "PxgHeapMemAllocator.h"
|
||||
#include "PxgShapeManager.h"
|
||||
#include "PxgRadixSortDesc.h"
|
||||
#include "geometry/PxParticleSystemGeometry.h"
|
||||
#include "PxvGeometry.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
class PxsTransformCache;
|
||||
struct PxsMaterialData;
|
||||
struct PxsCachedTransform;
|
||||
struct PxcDataStreamPool;
|
||||
class PxgCudaKernelWranglerManager;
|
||||
class PxgNphaseImplementationContext;
|
||||
|
||||
class PxRenderOutput;
|
||||
class PxgGpuContext;
|
||||
struct PxTriangleMeshGeometryLL;
|
||||
class PxgParticleSystemCore;
|
||||
|
||||
struct PxgFemFemContactInfo;
|
||||
struct PxgFemOtherContactInfo;
|
||||
|
||||
namespace Gu
|
||||
{
|
||||
struct ConvexHullData;
|
||||
class TriangleMesh;
|
||||
class DeformableVolumeMesh;
|
||||
class BVTetrahedronMesh;
|
||||
struct HeightFieldData;
|
||||
class PersistentContactManifold;
|
||||
}
|
||||
|
||||
namespace Cm
|
||||
{
|
||||
class FlushPool;
|
||||
}
|
||||
|
||||
namespace IG
|
||||
{
|
||||
class IslandSim;
|
||||
}
|
||||
|
||||
|
||||
struct GPU_BUCKET_ID
|
||||
{
|
||||
enum Enum
|
||||
{
|
||||
eFallback = 0,
|
||||
eConvex = 1, //manifold
|
||||
eConvexPlane = 2, //manifold
|
||||
eConvexTrimesh = 3, //multi-manifold
|
||||
eConvexHeightfield = 4, //multi-manifold
|
||||
eSphereTrimesh = 5, //multi-manifold
|
||||
eSphereHeightfield = 6, //multi-manifold
|
||||
eTrianglePlane = 7, //multi-manifold - technically could be single manifold but needs 6 points
|
||||
|
||||
eSphere = 8, //no manifold
|
||||
eBoxBox = 9,
|
||||
|
||||
eConvexCorePlane = 10,
|
||||
eConvexCoreConvex = 11,
|
||||
eConvexCoreTrimesh = 12,
|
||||
eConvexCoreTetmesh = 13,
|
||||
eConvexCoreClothmesh = 14,
|
||||
|
||||
eTriangleHeightfield = 15, //no mainfold
|
||||
eTriangleTriangle = 16, //no manifold
|
||||
eSoftbody = 17, //no manifold
|
||||
eSoftbodies = 18, //no manifold
|
||||
eSoftbodyFemCloth = 19, //no manifold
|
||||
eSoftbodyTrimesh = 20, //no manifold
|
||||
eSoftbodySdfTrimesh = 21, //no manifold
|
||||
eSoftbodyHeightfield = 22, //no manifold
|
||||
eFemClothSphere = 23, //no manifold
|
||||
eFemClothPlane = 24, //no manifold
|
||||
eFemClothBox = 25, //no manifold
|
||||
eFemClothConvexes = 26, //no manifold
|
||||
eFemClothes = 27, //no manifold
|
||||
eFemClothTrimesh = 28, //no manifold
|
||||
eFemClothSdfTrimesh = 29,
|
||||
eFemClothHeightfield = 30, //no manifold
|
||||
eConvexParticle = 31, //no manifold
|
||||
eParticlesystems = 32, //no manifold
|
||||
eParticlesystemSoftbody = 33, //no manifold
|
||||
eParticlesystemFemCloth = 34, //no manifold
|
||||
eParticlesystemTrimesh = 35, //no manifold
|
||||
eParticlesystemSdfTrimesh = 36, //no manifold
|
||||
eParticlesystemHeightfield = 37, //no manifold
|
||||
|
||||
eCount
|
||||
};
|
||||
};
|
||||
|
||||
const PxU32 BUCKET_ManifoldSize[GPU_BUCKET_ID::eCount] = {
|
||||
0, //0
|
||||
sizeof(PxgPersistentContactManifold), //1
|
||||
sizeof(PxgPersistentContactManifold), //2
|
||||
sizeof(PxgPersistentContactMultiManifold), //3
|
||||
sizeof(PxgPersistentContactMultiManifold), //4
|
||||
sizeof(PxgPersistentContactMultiManifold), //5
|
||||
sizeof(PxgPersistentContactMultiManifold), //6
|
||||
sizeof(PxgPersistentContactMultiManifold), //7
|
||||
0, //8
|
||||
0, //9
|
||||
0, //10
|
||||
0, //11
|
||||
0, //12
|
||||
0, //13
|
||||
0, //14
|
||||
0, //15
|
||||
0, //16
|
||||
0, //17
|
||||
0, //18
|
||||
0, //19
|
||||
0, //20
|
||||
0, //21
|
||||
0, //22
|
||||
0, //23
|
||||
0, //24
|
||||
0, //25
|
||||
0, //26
|
||||
0, //27
|
||||
0, //28
|
||||
0, //29
|
||||
0, //30
|
||||
0, //31
|
||||
0, //32
|
||||
0, //33
|
||||
0, //34
|
||||
0, //35
|
||||
0, //36
|
||||
0, //37
|
||||
};
|
||||
|
||||
struct PxgContactManagers : public PxsContactManagerBase
|
||||
{
|
||||
PxgContactManagers(const PxU32 bucketId, const PxVirtualAllocator& allocator) : PxsContactManagerBase(bucketId),
|
||||
mGpuInputContactManagers(allocator),
|
||||
mCpuContactManagerMapping(allocator),
|
||||
mShapeInteractions(allocator),
|
||||
mRestDistances(allocator),
|
||||
mTorsionalProperties(allocator)
|
||||
{
|
||||
}
|
||||
|
||||
PxPinnedArray<PxgContactManagerInput> mGpuInputContactManagers;
|
||||
PxPinnedArray<PxsContactManager*> mCpuContactManagerMapping;
|
||||
PxPinnedArray<const Sc::ShapeInteraction*> mShapeInteractions;
|
||||
PxFloatArrayPinned mRestDistances;
|
||||
PxPinnedArray<PxsTorsionalFrictionData> mTorsionalProperties;
|
||||
|
||||
void clear()
|
||||
{
|
||||
mGpuInputContactManagers.forceSize_Unsafe(0);
|
||||
mCpuContactManagerMapping.forceSize_Unsafe(0);
|
||||
mShapeInteractions.forceSize_Unsafe(0);
|
||||
mRestDistances.forceSize_Unsafe(0);
|
||||
mTorsionalProperties.forceSize_Unsafe(0);
|
||||
}
|
||||
|
||||
void preallocateNewBuffers(PxU32 nbToPreallocate)
|
||||
{
|
||||
mGpuInputContactManagers.reserve(nbToPreallocate);
|
||||
mCpuContactManagerMapping.reserve(nbToPreallocate);
|
||||
mShapeInteractions.reserve(nbToPreallocate);
|
||||
mRestDistances.reserve(nbToPreallocate);
|
||||
mTorsionalProperties.reserve(nbToPreallocate);
|
||||
}
|
||||
|
||||
private:
|
||||
PX_NOCOPY(PxgContactManagers)
|
||||
};
|
||||
|
||||
struct PxgNewContactManagers : public PxgContactManagers
|
||||
{
|
||||
PxPinnedArray<PxsContactManagerOutput> mGpuOutputContactManagers;
|
||||
|
||||
PxgNewContactManagers(const PxU32 bucketIndex, const PxVirtualAllocator& allocator) : PxgContactManagers(bucketIndex, allocator),
|
||||
mGpuOutputContactManagers(allocator)
|
||||
{
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
PxgContactManagers::clear();
|
||||
mGpuOutputContactManagers.forceSize_Unsafe(0);
|
||||
}
|
||||
|
||||
void preallocateNewBuffers(PxU32 nbToPreallocate)
|
||||
{
|
||||
mGpuOutputContactManagers.reserve(nbToPreallocate);
|
||||
PxgContactManagers::preallocateNewBuffers(nbToPreallocate);
|
||||
}
|
||||
};
|
||||
|
||||
struct PxgGpuContactManagers
|
||||
{
|
||||
PxgTypedCudaBuffer<PxgContactManagerInput> mContactManagerInputData;
|
||||
PxgTypedCudaBuffer<PxsContactManagerOutput> mContactManagerOutputData;
|
||||
PxgCudaBuffer mPersistentContactManifolds;
|
||||
|
||||
PxgTypedCudaBuffer<PxU32> mTempRunsumArray;
|
||||
PxgTypedCudaBuffer<PxU32> mTempRunsumArray2;
|
||||
PxgTypedCudaBuffer<PxU32> mBlockAccumulationArray;
|
||||
PxgTypedCudaBuffer<PxsContactManagerOutputCounts> mLostFoundPairsOutputData;
|
||||
PxgTypedCudaBuffer<PxsContactManager*> mLostFoundPairsCms;
|
||||
PxgTypedCudaBuffer<PxsContactManager*> mCpuContactManagerMapping;
|
||||
PxgTypedCudaBuffer<Sc::ShapeInteraction*> mShapeInteractions;
|
||||
PxgTypedCudaBuffer<PxReal> mRestDistances;
|
||||
PxgTypedCudaBuffer<PxsTorsionalFrictionData> mTorsionalProperties;
|
||||
uint2* mLostAndTotalReportedPairsCountPinned;
|
||||
const PxU32 mBucketIndex;
|
||||
|
||||
PxgGpuContactManagers(const PxU32 bucketIndex, PxgHeapMemoryAllocatorManager* manager) :
|
||||
mContactManagerInputData(manager, PxsHeapStats::eNARROWPHASE),
|
||||
mContactManagerOutputData(manager, PxsHeapStats::eNARROWPHASE),
|
||||
mPersistentContactManifolds(manager, PxsHeapStats::eNARROWPHASE),
|
||||
mTempRunsumArray(manager, PxsHeapStats::eNARROWPHASE),
|
||||
mTempRunsumArray2(manager, PxsHeapStats::eNARROWPHASE),
|
||||
mBlockAccumulationArray(manager, PxsHeapStats::eNARROWPHASE),
|
||||
mLostFoundPairsOutputData(manager, PxsHeapStats::eNARROWPHASE),
|
||||
mLostFoundPairsCms(manager, PxsHeapStats::eNARROWPHASE),
|
||||
mCpuContactManagerMapping(manager, PxsHeapStats::eNARROWPHASE),
|
||||
mShapeInteractions(manager, PxsHeapStats::eNARROWPHASE),
|
||||
mRestDistances(manager, PxsHeapStats::eNARROWPHASE),
|
||||
mTorsionalProperties(manager, PxsHeapStats::eNARROWPHASE),
|
||||
mLostAndTotalReportedPairsCountPinned(NULL),
|
||||
mBucketIndex(bucketIndex)
|
||||
{
|
||||
PX_ASSERT(bucketIndex < (1 << PxsContactManagerBase::MaxBucketBits));
|
||||
}
|
||||
|
||||
private:
|
||||
PX_NOCOPY(PxgGpuContactManagers)
|
||||
};
|
||||
|
||||
struct PxgGpuPairManagementBuffers
|
||||
{
|
||||
PxgCudaBuffer mTempRunsumArray;
|
||||
PxgCudaBuffer mBlockAccumulationArray;
|
||||
PxgCudaBuffer mRemovedIndicesArray;
|
||||
|
||||
PxgGpuPairManagementBuffers(PxgHeapMemoryAllocatorManager* manager):
|
||||
mTempRunsumArray(manager, PxsHeapStats::eNARROWPHASE), mBlockAccumulationArray(manager, PxsHeapStats::eNARROWPHASE), mRemovedIndicesArray(manager, PxsHeapStats::eNARROWPHASE)
|
||||
{
|
||||
}
|
||||
|
||||
private:
|
||||
PX_NOCOPY(PxgGpuPairManagementBuffers)
|
||||
};
|
||||
|
||||
|
||||
class PxgMirroredContactManagerPair : public PxUserAllocated
|
||||
{
|
||||
public:
|
||||
PxgMirroredContactManagerPair(const PxU32 bucketId, const PxVirtualAllocator& allocator) :
|
||||
mContactManagers(bucketId, allocator),
|
||||
mNewContactManagers(bucketId, allocator)
|
||||
{
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PxU32 getNbFirstPassTests() { return mContactManagers.mCpuContactManagerMapping.size(); }
|
||||
PX_FORCE_INLINE PxU32 getNbSecondPassTests() { return mNewContactManagers.mCpuContactManagerMapping.size(); }
|
||||
|
||||
PX_FORCE_INLINE PxU32 getNbPassTests() { return mContactManagers.mCpuContactManagerMapping.size() + mNewContactManagers.mCpuContactManagerMapping.size(); }
|
||||
|
||||
PX_FORCE_INLINE void allocateContactManagers()
|
||||
{
|
||||
const PxU32 newPairCount = mContactManagers.mGpuInputContactManagers.size() + mNewContactManagers.mGpuInputContactManagers.size();
|
||||
|
||||
if (newPairCount > mContactManagers.mGpuInputContactManagers.capacity())
|
||||
{
|
||||
PxU32 newSize = PxMax(newPairCount, mContactManagers.mGpuInputContactManagers.capacity() * 2);
|
||||
|
||||
mContactManagers.mGpuInputContactManagers.reserve(newSize);
|
||||
|
||||
//mContactManagers.mCpuContactManagerMapping.reserve(newSize);
|
||||
}
|
||||
}
|
||||
|
||||
PxgContactManagers mContactManagers; //existing contact managers
|
||||
PxgNewContactManagers mNewContactManagers; //new contact managers
|
||||
|
||||
};
|
||||
|
||||
class PxgGpuContactManagerPair : public PxUserAllocated
|
||||
{
|
||||
public:
|
||||
PxgGpuContactManagerPair(const PxU32 bucketId, PxgHeapMemoryAllocatorManager* manager) :
|
||||
mContactManagers(bucketId, manager),
|
||||
mNewContactManagers(bucketId, manager)
|
||||
{
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE void allocateBlockAccumulationArray(const PxU32 size)
|
||||
{
|
||||
mContactManagers.mBlockAccumulationArray.allocate(sizeof(PxU32) * size, PX_FL);
|
||||
mNewContactManagers.mBlockAccumulationArray.allocate(sizeof(PxU32) * size, PX_FL);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE void allocateLostAndTotalReportedPairsCount(PxsHeapMemoryAllocator* mappedMemoryAllocators)
|
||||
{
|
||||
mContactManagers.mLostAndTotalReportedPairsCountPinned = reinterpret_cast<uint2*>(mappedMemoryAllocators->allocate(sizeof(uint2), PxsHeapStats::eNARROWPHASE, PX_FL));
|
||||
mNewContactManagers.mLostAndTotalReportedPairsCountPinned = reinterpret_cast<uint2*>(mappedMemoryAllocators->allocate(sizeof(uint2), PxsHeapStats::eNARROWPHASE, PX_FL));
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PxU32 getTotalLostFoundPairs()
|
||||
{
|
||||
return mContactManagers.mLostAndTotalReportedPairsCountPinned->y + mNewContactManagers.mLostAndTotalReportedPairsCountPinned->y;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PxU32 getTotalLostFoundPatches()
|
||||
{
|
||||
return mContactManagers.mLostAndTotalReportedPairsCountPinned->x + mNewContactManagers.mLostAndTotalReportedPairsCountPinned->x;
|
||||
}
|
||||
|
||||
PxgGpuContactManagers mContactManagers;
|
||||
PxgGpuContactManagers mNewContactManagers;
|
||||
};
|
||||
|
||||
struct PxsShapeCore;
|
||||
struct PxgKernelIds;
|
||||
|
||||
class PxgGpuNarrowphaseCore : public PxUserAllocated
|
||||
{
|
||||
PX_NOCOPY(PxgGpuNarrowphaseCore)
|
||||
|
||||
public:
|
||||
PxgMirroredContactManagerPair* mContactManagers[GPU_BUCKET_ID::eCount]; //some order as BUCKET_ID. 0 is fallback and it will be NULL in this array
|
||||
PxgGpuContactManagerPair* mGpuContactManagers[GPU_BUCKET_ID::eCount]; //some order as BUCKET_ID. 0 is fallback and it will be NULL in this array
|
||||
|
||||
PxgGpuPairManagementBuffers mPairManagementBuffers;
|
||||
|
||||
PxgCudaBuffer mGpuTransformCache;
|
||||
|
||||
PxgCudaBuffer mGpuContactDistance;
|
||||
|
||||
typedef PxInt32ArrayPinned RemovedIndicesArray;
|
||||
|
||||
RemovedIndicesArray* mRemovedIndices[GPU_BUCKET_ID::eCount];
|
||||
|
||||
PxBitMap mKeepMap;
|
||||
|
||||
PxPinnedArray<PxsContactManagerOutputCounts> mLostFoundPairsOutputData;
|
||||
PxPinnedArray<PxsContactManager*> mLostFoundPairsCms;
|
||||
|
||||
PxU32 mTotalLostFoundPairs;
|
||||
PxU32 mTotalLostFoundPatches;
|
||||
PxU32 mTotalNumPairs;
|
||||
|
||||
PxgPairManagementData* mPairManagementData[GPU_BUCKET_ID::eCount]; //mapped memory
|
||||
|
||||
PxgCudaBuffer mGpuPairManagementData;
|
||||
|
||||
|
||||
PxPinnedArray<PxgRadixSortDesc> mRSDesc;
|
||||
PxgCudaBufferN<2> mRadixSortDescBuf; //radix sort with rank
|
||||
|
||||
PxgCudaBuffer mTempGpuRigidIndiceBuf;
|
||||
PxgCudaBuffer mTempGpuShapeIndiceBuf;
|
||||
PxgCudaBuffer mRadixCountTotalBuf;
|
||||
|
||||
CUdeviceptr mContactStream;
|
||||
CUdeviceptr mPatchStream;
|
||||
CUdeviceptr mForceAndIndiceStream;
|
||||
|
||||
PxgTypedCudaBuffer<PxgPatchAndContactCounters> mPatchAndContactCountersOnDevice; //device memory
|
||||
PxgPatchAndContactCounters* mPatchAndContactCountersReadback; //host memory
|
||||
|
||||
|
||||
PxgShapeManager mGpuShapesManager;
|
||||
PxgMaterialManager mGpuMaterialManager;
|
||||
PxgFEMSoftBodyMaterialManager mGpuFEMMaterialManager;
|
||||
PxgFEMClothMaterialManager mGpuFEMClothMaterialManager;
|
||||
PxgPBDMaterialManager mGpuPBDMaterialManager;
|
||||
|
||||
//device memory
|
||||
PxgCudaPagedLinearAllocator<PxgHeapMemoryAllocator> mIntermStackAlloc;
|
||||
|
||||
CUstream mStream;
|
||||
CUstream mSolverStream; //this is the stream handle belong to the solver, we can't destroy the solver stream
|
||||
PxgCudaKernelWranglerManager* mGpuKernelWranglerManager;
|
||||
PxCudaContextManager* mCudaContextManager;
|
||||
PxCudaContext* mCudaContext;
|
||||
PxgHeapMemoryAllocatorManager* mHeapMemoryManager;
|
||||
|
||||
PxgCopyManager mCopyMan;
|
||||
PxgCopyManager mCopyManBp;
|
||||
PxgGeometryManager mGeometryManager;
|
||||
|
||||
IG::IslandSim* mIslandSim;
|
||||
|
||||
PxgNphaseImplementationContext* mNphaseImplContext;
|
||||
PxgGpuContext* mGpuContext;
|
||||
|
||||
PxU32 mCollisionStackSizeBytes;
|
||||
|
||||
PxU32* mMaxConvexMeshTempMemory;
|
||||
|
||||
struct RefcountedRecord
|
||||
{
|
||||
PxU32 refCnt;
|
||||
PxU32 idx;
|
||||
};
|
||||
|
||||
typedef PxHashMap<size_t, RefcountedRecord> RefcountedRecordsMap;
|
||||
RefcountedRecordsMap* mShapesMap;
|
||||
|
||||
RefcountedRecordsMap* mGeometriesMap;
|
||||
RefcountedRecordsMap* mMaterialsMap;
|
||||
RefcountedRecordsMap* mFEMMaterialsMap;
|
||||
RefcountedRecordsMap* mFEMClothMaterialsMap;
|
||||
RefcountedRecordsMap* mPBDMaterialsMap;
|
||||
|
||||
PxgCudaBuffer mGpuMultiManifold;
|
||||
PxgCudaBuffer mGpuManifold;
|
||||
|
||||
CUevent mParticleEvent;
|
||||
CUevent mSoftbodyEvent;
|
||||
CUevent mFemClothEvent;
|
||||
CUevent mDirectApiDmaEvent;
|
||||
|
||||
#if PX_ENABLE_SIM_STATS
|
||||
PxU32 mGpuDynamicsRigidContactCountStats;
|
||||
PxU32 mGpuDynamicsRigidPatchCountStats;
|
||||
PxU32 mGpuDynamicsCollisionStackSizeStats;
|
||||
#else
|
||||
PX_CATCH_UNDEFINED_ENABLE_SIM_STATS
|
||||
#endif
|
||||
|
||||
void createGpuStreamsAndEvents();
|
||||
void releaseGpuStreamsAndEvents();
|
||||
|
||||
|
||||
void removeLostPairsGpu(const PxU32 bucketID, const PxU16 stage5KernelID, const bool copyManifold);
|
||||
|
||||
void appendContactManagersGpu(PxU32 nbExistingManagers, PxU32 nbNewManagers, PxgGpuContactManagers& gpuContactManagers, PxgGpuContactManagers& newGpuContactManagers, PxU32 manifoldSize);
|
||||
void drawManifold(PxgPersistentContactManifold* manifolds, PxgContactManagerInput* cmInput, PxsCachedTransform* cachedTransform, const PxU32 numTests, PxRenderOutput& renderOutput,
|
||||
const PxU32 color, const PxF32 size);
|
||||
void drawPoint(PxRenderOutput& renderOutput, PxVec3 point, const PxU32 color, const PxF32 size);
|
||||
void drawLine(PxRenderOutput& out, PxVec3 a, const PxVec3 b, const PxU32 color);
|
||||
|
||||
template <typename ManagementData, typename Manifold>
|
||||
void removeLostPairsGpuInternal( ManagementData& cpuBuffer, CUdeviceptr gpuBuffer,
|
||||
PxgContactManagers& contactManagers, PxgGpuContactManagers& gpuContactManagers, PxInt32ArrayPinned& removedIndices, PxgGpuPairManagementBuffers& pairManagementBuffers,
|
||||
PxU16 stage5KernelID, const bool copyManifold = true);
|
||||
|
||||
|
||||
|
||||
|
||||
public:
|
||||
|
||||
PxgGpuNarrowphaseCore(PxgCudaKernelWranglerManager* gpuKernelWrangler, PxCudaContextManager* cudaContextManager, const PxGpuDynamicsMemoryConfig& gpuDynamicsConfig, void* contactStreamBase,
|
||||
void* patchStreamBase, void* forceAndIndiceStreamBase, IG::IslandSim* islandSim, CUstream solverStream, PxgHeapMemoryAllocatorManager* heapMemoryManager,
|
||||
PxgNphaseImplementationContext* nphaseImplContext);
|
||||
virtual ~PxgGpuNarrowphaseCore();
|
||||
|
||||
void testSDKSphereGpu(PxgGpuContactManagers& gpuManagers, const PxU32 numTests,
|
||||
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces, PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit,
|
||||
PxRenderOutput* renderOutput);
|
||||
|
||||
void testSDKBoxBoxGpu(PxgGpuContactManagers& gpuManagers, const PxU32 numTests,
|
||||
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces, PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit,
|
||||
PxRenderOutput* renderOutput);
|
||||
|
||||
void testSDKConvexConvexGjkEpaGpu(PxgGpuContactManagers& gpuManagers, bool insertAveragePoint, const PxU32 numTests,
|
||||
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces, PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit,
|
||||
PxRenderOutput* renderOutput);
|
||||
|
||||
void testSDKConvexPlaneGjkEpaGpu(PxgGpuContactManagers& gpuManagers, bool insertAveragePoint, const PxU32 numTests,
|
||||
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces, PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit,
|
||||
PxRenderOutput* renderOutput);
|
||||
|
||||
void testSDKConvexCorePlaneGjkEpaGpu(PxgGpuContactManagers& gpuManagers, bool insertAveragePoint, const PxU32 numTests,
|
||||
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces, PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit,
|
||||
PxRenderOutput* renderOutput);
|
||||
|
||||
void testSDKConvexCoreConvexGjkEpaGpu(PxgGpuContactManagers& gpuManagers, bool insertAveragePoint, const PxU32 numTests,
|
||||
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces, PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit,
|
||||
PxRenderOutput* renderOutput);
|
||||
|
||||
void testSDKConvexCoreTrimeshGjkEpaGpu(PxgGpuContactManagers& gpuManagers, bool insertAveragePoint, const PxU32 numTests,
|
||||
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces, PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit,
|
||||
PxRenderOutput* renderOutput);
|
||||
|
||||
void testSDKConvexCoreTetmeshGjkEpaGpu(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
|
||||
|
||||
void testSDKConvexCoreClothmeshGjkEpaGpu(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
|
||||
|
||||
void testSDKSphereTriMeshSATGpu(PxgGpuContactManagers& gpuManagers, bool insertAveragePoint, const PxU32 numTests,
|
||||
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces,
|
||||
PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit);
|
||||
|
||||
void testSDKSphereHeightfieldGpu(PxgGpuContactManagers& gpuManagers, bool insertAveragePoint, const PxU32 numTests,
|
||||
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces,
|
||||
PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit);
|
||||
|
||||
void testSDKTriMeshPlaneGpu(PxgGpuContactManagers& gpuManagers, const PxU32 numTests,
|
||||
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces,
|
||||
PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit);
|
||||
|
||||
void testSDKTriMeshHeightfieldGpu(PxgGpuContactManagers& gpuManagers, const PxU32 numTests,
|
||||
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces,
|
||||
PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit);
|
||||
|
||||
void testSDKTriMeshTriMeshGpu(PxgGpuContactManagers& gpuManagers,const PxU32 numTests,
|
||||
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces,
|
||||
PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit);
|
||||
|
||||
void testSDKConvexTriMeshSATGpu(PxgGpuContactManagers& gpuManagers, bool insertAveragePoint, const PxU32 numTests,
|
||||
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces,
|
||||
PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit);
|
||||
|
||||
void testSDKConvexHeightfieldGpu(PxgGpuContactManagers& gpuManagers, bool insertAveragePoint, const PxU32 numTests,
|
||||
PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces,
|
||||
PxU32 patchBytesLimit, PxU32 contactBytesLimit, PxU32 forceBytesLimit);
|
||||
|
||||
void testSDKParticleSystemGpu(PxgGpuContactManagers& gpuManagers, const PxU32 numTests);
|
||||
|
||||
void testSDKParticleSoftbody(PxgGpuContactManagers& gpuManagers, const PxU32 numTests,
|
||||
PxRenderOutput* renderOutput);
|
||||
|
||||
void testSDKParticleFemCloth(PxgGpuContactManagers& gpuManagers, const PxU32 numTests,
|
||||
PxRenderOutput* renderOutput);
|
||||
|
||||
void testSDKConvexParticle(PxgGpuContactManagers& gpuManagers, const PxU32 numTests);
|
||||
|
||||
void testSDKParticleSdfTriMesh(PxgGpuContactManagers& gpuManagers, const PxU32 numTests);
|
||||
|
||||
void testSDKParticleTriMesh(PxgGpuContactManagers& gpuManagers,const PxU32 numTests);
|
||||
|
||||
void testSDKParticleHeightfield(PxgGpuContactManagers& gpuManagers, const PxU32 numTests);
|
||||
|
||||
void testSDKSoftbody(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
|
||||
|
||||
void testSDKSoftbodies(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
|
||||
|
||||
void testSDKSoftbodyCloth(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
|
||||
|
||||
void testSDKSoftbodyTrimesh(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
|
||||
|
||||
void testSDKSoftbodySdfTrimesh(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
|
||||
|
||||
void testSDKSoftbodyHeightfield(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
|
||||
|
||||
void testSDKFemClothSphere(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
|
||||
|
||||
void testSDKFemClothPlane(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
|
||||
|
||||
void testSDKFemClothBox(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
|
||||
|
||||
void testSDKFemClothConvexes(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
|
||||
|
||||
void testSDKFemClothCloth(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
|
||||
|
||||
void testSDKFemClothSdfTrimesh(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
|
||||
|
||||
void testSDKFemClothTrimesh(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
|
||||
|
||||
void testSDKFemClothHeightfield(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxRenderOutput* renderOutput);
|
||||
|
||||
void updateFrictionPatches(PxgGpuContactManagers& gpuManagers, PxU32 count, PxU8* baseContactPatches, PxU8* baseFrictionPatches);
|
||||
|
||||
void fetchNarrowPhaseResults(PxcDataStreamPool* contactStreamPool, PxcDataStreamPool* patchStreamPool, PxcDataStreamPool* forceStreamPool,
|
||||
PxsContactManagerOutput* cmOutputs, const Sc::ShapeInteraction*const* shapeInteractions, const PxReal* restDistances, const PxsTorsionalFrictionData* torsionalData, PxU32 nbFallbackPairs,
|
||||
const PxsContactManagerOutputCounts* foundPatchCounts, const PxsContactManager*const* foundPatchManagers, PxU32 nbFoundPatchManagers);
|
||||
|
||||
void syncNotRigidWithNp();
|
||||
|
||||
void acquireContext();
|
||||
void releaseContext();
|
||||
|
||||
CUstream getStream();
|
||||
|
||||
void registerContactManager(PxsContactManager* cm, const Sc::ShapeInteraction* shapeInteraction, PxsContactManagerOutput& output, const PxU32 bucketId);
|
||||
void unregisterContactManager(PxsContactManager* manager, const PxU32 bucketId);
|
||||
void refreshContactManager(PxsContactManager* manager, PxsContactManagerOutput* cmOutputs, PxgContactManagerInput& input, const PxU32 bucketId);
|
||||
|
||||
void removeLostPairs();
|
||||
void appendContactManagers(PxsContactManagerOutput* cmOutputs, PxU32 nbFallbackPairs);
|
||||
|
||||
void prepareTempContactManagers();
|
||||
|
||||
template <GPU_BUCKET_ID::Enum>
|
||||
void prepareTempContactManagers();
|
||||
|
||||
void prepareTempContactManagersTasks(Cm::FlushPool& flushPool, PxBaseTask* continuation);
|
||||
|
||||
bool isMeshGPUCompatible(const PxTriangleMeshGeometryLL& meshData);
|
||||
|
||||
bool isClothMeshGPUCompatible(const PxTriangleMeshGeometryLL& meshData); // skipping material counts
|
||||
|
||||
bool isTetMeshGPUCompatible(const Gu::BVTetrahedronMesh* meshData);
|
||||
|
||||
void computeRigidsToShapes();
|
||||
|
||||
void prepareGpuNarrowphase(PxsTransformCache& cache, const PxReal* contactDistances, bool hasContactDistanceChanged);
|
||||
|
||||
PxsContactManagerOutput* getGPUContactManagerOutputBase() { return reinterpret_cast<PxsContactManagerOutput*>(mGpuContactManagers[GPU_BUCKET_ID::eConvex]->mContactManagers.mContactManagerOutputData.getDevicePtr()); }
|
||||
|
||||
PxReal* getGPURestDistances() { return reinterpret_cast<PxReal*>(mGpuContactManagers[GPU_BUCKET_ID::eConvex]->mContactManagers.mRestDistances.getDevicePtr()); }
|
||||
|
||||
PxsTorsionalFrictionData* getGPUTorsionalData() { return reinterpret_cast<PxsTorsionalFrictionData*>(mGpuContactManagers[GPU_BUCKET_ID::eConvex]->mContactManagers.mTorsionalProperties.getDevicePtr()); }
|
||||
|
||||
Sc::ShapeInteraction** getGPUShapeInteractions() { return reinterpret_cast<Sc::ShapeInteraction**>(mGpuContactManagers[GPU_BUCKET_ID::eConvex]->mContactManagers.mShapeInteractions.getDevicePtr()); }
|
||||
|
||||
PxgContactManagers& getExistingContactManagers(GPU_BUCKET_ID::Enum type) { return mContactManagers[type]->mContactManagers; }
|
||||
PxgNewContactManagers& getNewContactManagers(GPU_BUCKET_ID::Enum type) { return mContactManagers[type]->mNewContactManagers; }
|
||||
|
||||
PxgGpuContactManagers& getExistingGpuContactManagers(GPU_BUCKET_ID::Enum type) { return mGpuContactManagers[type]->mContactManagers; }
|
||||
PxgGpuContactManagers& getNewGpuContactManagers(GPU_BUCKET_ID::Enum type) { return mGpuContactManagers[type]->mNewContactManagers; }
|
||||
|
||||
|
||||
PxPinnedArray<PxsContactManagerOutputCounts>& getLostFoundPairsOutput() { return mLostFoundPairsOutputData; }
|
||||
PxPinnedArray<PxsContactManager*>& getLostFoundPairsCms() { return mLostFoundPairsCms; }
|
||||
|
||||
PxgCudaBuffer& getTransformCache() { return mGpuTransformCache; }
|
||||
PxU32 getTotalNbLostFoundPairs() const { return mTotalLostFoundPairs; }
|
||||
PxU32 getTotalNbLostFoundPatches() const { return mTotalLostFoundPatches; }
|
||||
|
||||
|
||||
void preallocateNewBuffers(PxU32 nbNewPairs);
|
||||
|
||||
void pushBuffer();
|
||||
|
||||
PxU32 addHull(const Gu::ConvexHullData& hull);
|
||||
PxU32 getHullIdxByHostPtr(const Gu::ConvexHullData* hull);
|
||||
void removeGeometry(PxU32 idx);
|
||||
|
||||
//schedules data copies on the near phase stream
|
||||
void uploadDataChunksToGpu();
|
||||
void waitAndResetCopyQueues();
|
||||
|
||||
//schedules particle material copies on the broad phase stream,
|
||||
//because particle materials are already accessed in pre integreation
|
||||
//- we migth be able to remove this dependency
|
||||
void uploadDataChunksToGpuBp();
|
||||
void waitAndResetCopyQueuesBp();
|
||||
|
||||
PxU32 addTriMesh(const Gu::TriangleMesh& mesh);
|
||||
PxU32 getTriMeshIdxByHostPtr(const Gu::TriangleMesh* mesh);
|
||||
|
||||
PxU32 addHeightfield(const Gu::HeightFieldData& hf);
|
||||
PxU32 getHeightfieldIdxByHostPtr(const Gu::HeightFieldData* hf);
|
||||
|
||||
void registerShape(const PxNodeIndex& nodeIndex, const PxsShapeCore& shapeCore, const PxU32 transformCacheID, const bool isFemCloth, PxActor* actor);
|
||||
void updateShapeMaterial(const PxsShapeCore& shapeCore);
|
||||
PxU32 getShapeIndex(const PxsShapeCore& shapeCore);
|
||||
void unregisterShape(const PxsShapeCore& shapeCore, const PxU32 transformCacheID, const bool isFemCloth);
|
||||
|
||||
|
||||
void registerAggregate(const PxU32 transformCacheID);
|
||||
|
||||
void registerMaterial(const PxsMaterialCore& materialCore);
|
||||
void updateMaterial(const PxsMaterialCore& materialCore);
|
||||
void unregisterMaterial(const PxsMaterialCore& materialCore);
|
||||
|
||||
void registerFEMMaterial(const PxsDeformableSurfaceMaterialCore& materialCore);
|
||||
void updateFEMMaterial(const PxsDeformableSurfaceMaterialCore& materialCore);
|
||||
void unregisterFEMMaterial(const PxsDeformableSurfaceMaterialCore& materialCore);
|
||||
|
||||
void registerFEMMaterial(const PxsDeformableVolumeMaterialCore& materialCore);
|
||||
void updateFEMMaterial(const PxsDeformableVolumeMaterialCore& materialCore);
|
||||
void unregisterFEMMaterial(const PxsDeformableVolumeMaterialCore& materialCore);
|
||||
|
||||
void registerParticleMaterial(const PxsPBDMaterialCore& materialCore);
|
||||
void updateParticleMaterial(const PxsPBDMaterialCore& materialCore);
|
||||
void unregisterParticleMaterial(const PxsPBDMaterialCore& materialCore);
|
||||
|
||||
//direct gpu contact access
|
||||
bool copyContactData(void* data, PxU32* numContactPairs, const PxU32 maxContactPairs, CUevent startEvent,
|
||||
CUevent finishEvent, PxU8* baseContactPatches, PxU8* baseContactPoints, PxU8* baseContactForces);
|
||||
|
||||
bool evaluateSDFDistances(PxVec4* localGradientAndSDFConcatenated, const PxShapeGPUIndex* shapeIndices, const PxVec4* localSamplePointsConcatenated, const PxU32* samplePointCountPerShape, PxU32 nbElements, PxU32 maxPointCount, CUevent startEvent, CUevent finishEvent);
|
||||
|
||||
void synchronizedStreams(CUstream artiStream);
|
||||
|
||||
PxgShapeManager& getGpuShapeManager() { return mGpuShapesManager; }
|
||||
|
||||
template <class MaterialData>
|
||||
void mapMaterialIndices(PxU16* gpuMaterialIndices, const PxU16* materialIndices, PxU32 numMaterialIndices);
|
||||
|
||||
private:
|
||||
|
||||
void compactLostFoundPairs(PxgGpuContactManagers& gpuManagers, const PxU32 numTests, PxU32* touchChangeFlags, PxsContactManagerOutput* cmOutputs);
|
||||
|
||||
void softbodyOtherContactApplyCollisionToSimMeshMapping(
|
||||
PxgDevicePointer<float4> contactsd,
|
||||
PxgDevicePointer<float4> barycentricsd,
|
||||
PxgDevicePointer<PxgFemOtherContactInfo> contactInfosd,
|
||||
PxgDevicePointer<PxU32> totalNumCountsd,
|
||||
PxgDevicePointer<PxU32> prevNumCountsd);
|
||||
|
||||
public:
|
||||
|
||||
void softbodyFemContactApplyCollisionToSimMeshMapping(
|
||||
PxgDevicePointer<float4> barycentrics0d,
|
||||
PxgDevicePointer<float4> barycentrics1d,
|
||||
PxgDevicePointer<PxgFemFemContactInfo> contactInfosd,
|
||||
PxgDevicePointer<PxU32> totalNumCountsd,
|
||||
PxgDevicePointer<PxU32> prevNumCountsd, bool isSelfCollision, bool isCloth);
|
||||
|
||||
private:
|
||||
|
||||
void adjustNpIndices(PxgNewContactManagers& newContactManagers, PxPinnedArray<PxgContactManagerInput>& itMainInputs,
|
||||
PxPinnedArray<PxsContactManager*>& itCms, PxPinnedArray<const Sc::ShapeInteraction*>& itSIs,
|
||||
PxFloatArrayPinned& itR, PxPinnedArray<PxsTorsionalFrictionData>& itTor,
|
||||
PxPinnedArray<PxgContactManagerInput>& itNewInputs,
|
||||
PxPinnedArray<PxsContactManager*>& itNewCms,
|
||||
PxPinnedArray<const Sc::ShapeInteraction*>& itNewSIs, PxFloatArrayPinned& itNewR,
|
||||
PxPinnedArray<PxsTorsionalFrictionData>& itNewTor);
|
||||
|
||||
void registerContactManagerInternal(PxsContactManager* cm, const Sc::ShapeInteraction* shapeInteraction, PxgContactManagerInput* input, PxsContactManagerOutput& output, PxgNewContactManagers& newContactManagers);
|
||||
|
||||
void unregisterContactManagerInternal(PxsContactManager* cm, PxInt32ArrayPinned& removedIndices, PxgNewContactManagers& newContactManagers);
|
||||
|
||||
void refreshContactManagerInternal(PxsContactManager* cm, PxsContactManagerOutput* cmOutputs, const Sc::ShapeInteraction** shapeInteractions, PxgContactManagerInput& input, PxgNewContactManagers& newContactManagers,
|
||||
PxInt32ArrayPinned& removedIndices);
|
||||
|
||||
template <typename Manifold>
|
||||
void prepareTempContactManagers(PxgGpuContactManagers& gpuManagers, PxgNewContactManagers& newManagers, Manifold* emptyManifold);
|
||||
|
||||
void prepareTempContactManagers(PxgGpuContactManagers& gpuManagers, PxgNewContactManagers& newManagers);
|
||||
|
||||
void removeLostPairsInternal(PxInt32ArrayPinned& removedIndices, PxgContactManagers& contactManagers);
|
||||
|
||||
void prepareTempContactManagersInternal(PxgNewContactManagers& newManagers, Cm::FlushPool& flushPool, PxBaseTask* continuation);
|
||||
|
||||
void drawContacts(PxRenderOutput& out, CUdeviceptr contactsd, CUdeviceptr normalPensd, const PxU32 numContacts);
|
||||
|
||||
template <typename MaterialCore, typename MaterialData>
|
||||
PxU32 registerMaterialInternal(const MaterialCore& materialCore, RefcountedRecordsMap* materialsMap, PxgMaterialManager& materialManager);
|
||||
|
||||
template <typename MaterialCore, typename MaterialData >
|
||||
void updateMaterialInternal(const MaterialCore& materialCore, RefcountedRecordsMap* materialsMap, PxgMaterialManager& materialManager);
|
||||
|
||||
template <typename MaterialData>
|
||||
PxU16 mapMaterialIndex(PxU16 sdkMaterialIndex, RefcountedRecordsMap* materialsMap, PxgMaterialManager& materialManager)
|
||||
{
|
||||
return mapMaterialIndexInternal(sdkMaterialIndex, materialsMap, materialManager, sizeof(MaterialData));
|
||||
}
|
||||
|
||||
PxU16 mapMaterialIndexInternal(PxU16 sdkMaterialIndex, RefcountedRecordsMap* materialsMap,
|
||||
PxgMaterialManager& materialManager, PxU32 materialDataByteSize);
|
||||
|
||||
template <typename MaterialCore>
|
||||
void unregisterMaterialInternal(const MaterialCore& materialCore, RefcountedRecordsMap* materialsMap, PxgMaterialManager& materialManager);
|
||||
|
||||
void updateContactDistance(const PxReal* contactDistances, const PxU32 numContactDistance);
|
||||
};
|
||||
|
||||
template <>
|
||||
PX_FORCE_INLINE void PxgGpuNarrowphaseCore::mapMaterialIndices<PxsPBDMaterialData>(PxU16* gpuMaterialIndices,
|
||||
const PxU16* materialIndices, PxU32 numMaterialIndices)
|
||||
{
|
||||
PxgGpuNarrowphaseCore::RefcountedRecordsMap* map = mPBDMaterialsMap;
|
||||
PxgPBDMaterialManager& manager = mGpuPBDMaterialManager;
|
||||
for(PxU32 i = 0; i < numMaterialIndices; ++i)
|
||||
{
|
||||
const PxU16 gpuIndex = mapMaterialIndex<PxsPBDMaterialData>(materialIndices[i], map, manager);
|
||||
gpuMaterialIndices[i] = gpuIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
71
engine/third_party/physx/source/gpunarrowphase/include/PxgNpKernelIndices.h
vendored
Normal file
71
engine/third_party/physx/source/gpunarrowphase/include/PxgNpKernelIndices.h
vendored
Normal file
@@ -0,0 +1,71 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_NP_KERNEL_INDICES_H
|
||||
#define PXG_NP_KERNEL_INDICES_H
|
||||
|
||||
#define MIDPHASE_WARPS_PER_BLOCK 2
|
||||
#define NP_TRIMESH_WARPS_PER_BLOCK 2
|
||||
#define CORRELATE_WARPS_PER_BLOCK 2
|
||||
#define PS_MIDPHASE_COLLISION_WAPRS_PER_BLOCK 1
|
||||
#define SB_REFIT_WAPRS_PER_BLOCK 32
|
||||
|
||||
namespace physx
|
||||
{
|
||||
|
||||
struct PxgNarrowPhaseBlockDims
|
||||
{
|
||||
enum
|
||||
{
|
||||
REMOVE_CONTACT_MANAGERS = 512,
|
||||
COMPACT_LOST_FOUND_PAIRS = 512,
|
||||
FINISH_CONTACTS = 128,
|
||||
EARLY_OUT_KERNEL = 128,
|
||||
COLLIDE_KERNEL = 128,
|
||||
INITIALIZE_MANIFOLDS = 512,
|
||||
COMPRESS_CONTACT = 256
|
||||
};
|
||||
};
|
||||
|
||||
struct PxgNarrowPhaseGridDims
|
||||
{
|
||||
enum
|
||||
{
|
||||
REMOVE_CONTACT_MANAGERS = 32,
|
||||
COMPACT_LOST_FOUND_PAIRS = 32,
|
||||
EARLY_OUT_KERNEL = 32,
|
||||
COLLIDE_KERNEL = 32,
|
||||
INITIALIZE_MANIFOLDS = 128,
|
||||
COMPRESS_CONTACT = 32,
|
||||
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
305
engine/third_party/physx/source/gpunarrowphase/include/PxgNphaseImplementationContext.h
vendored
Normal file
305
engine/third_party/physx/source/gpunarrowphase/include/PxgNphaseImplementationContext.h
vendored
Normal file
@@ -0,0 +1,305 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_NPHASE_IMPLEMENTATION_CONTEXT_H
|
||||
#define PXG_NPHASE_IMPLEMENTATION_CONTEXT_H
|
||||
|
||||
#include "CmTask.h"
|
||||
#include "foundation/PxMutex.h"
|
||||
|
||||
#include "PxvNphaseImplementationContext.h"
|
||||
#include "PxgNarrowphaseCore.h"
|
||||
#include "PxgSimulationCore.h"
|
||||
#include "foundation/PxHashMap.h"
|
||||
|
||||
#include "PxsContactManagerState.h"
|
||||
#include "PxgContactManager.h"
|
||||
#include "PxgPersistentContactManifold.h"
|
||||
#include "PxgHeapMemAllocator.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
struct PxcNpWorkUnit;
|
||||
class PxgNphaseImplementationContext;
|
||||
class PxgGpuContext;
|
||||
class PxgParticleSystemCore;
|
||||
class PxsKernelWranglerManager;
|
||||
|
||||
namespace Bp
|
||||
{
|
||||
class BpNonVirtualMemAllocator;
|
||||
}
|
||||
|
||||
namespace Dy
|
||||
{
|
||||
class Context;
|
||||
}
|
||||
|
||||
class PxgCMGpuDiscreteUpdateBase: public Cm::Task
|
||||
{
|
||||
private:
|
||||
PX_NOCOPY(PxgCMGpuDiscreteUpdateBase)
|
||||
|
||||
public:
|
||||
PxgCMGpuDiscreteUpdateBase(PxgNphaseImplementationContext* context):
|
||||
Cm::Task(0), mDt(FLT_MAX), mContext(context)
|
||||
{
|
||||
}
|
||||
|
||||
virtual ~PxgCMGpuDiscreteUpdateBase()
|
||||
{}
|
||||
|
||||
void setDt(PxReal dt)
|
||||
{
|
||||
mDt = dt;
|
||||
}
|
||||
|
||||
void processContactManagers(PxgContactManagers& managers, PxgGpuContactManagers& managersGPU, GPU_BUCKET_ID::Enum type);
|
||||
|
||||
protected:
|
||||
PxReal mDt; //we could probably retrieve from context to save space?
|
||||
PxgNphaseImplementationContext* mContext;
|
||||
};
|
||||
|
||||
class PxgCMGpuDiscreteUpdateFallbackTask : public Cm::Task
|
||||
{
|
||||
public:
|
||||
PxgCMGpuDiscreteUpdateFallbackTask(PxgNphaseImplementationContext* context) :
|
||||
Cm::Task(0), mDt(FLT_MAX), mContext(context)
|
||||
{
|
||||
}
|
||||
|
||||
virtual ~PxgCMGpuDiscreteUpdateFallbackTask()
|
||||
{}
|
||||
|
||||
void setDt(PxReal dt)
|
||||
{
|
||||
mDt = dt;
|
||||
}
|
||||
|
||||
virtual const char* getName() const
|
||||
{
|
||||
return "PxgCMGpuDiscreteUpdateFallbackTask";
|
||||
}
|
||||
|
||||
void runInternal();
|
||||
|
||||
protected:
|
||||
PxReal mDt;
|
||||
PxgNphaseImplementationContext* mContext;
|
||||
};
|
||||
|
||||
class PxgCMGpuDiscreteUpdateTask : public PxgCMGpuDiscreteUpdateBase
|
||||
{
|
||||
PxBaseTask* mFirstPassNpContinuation;
|
||||
PxBaseTask* mPostBroadPhaseTask;
|
||||
public:
|
||||
PxgCMGpuDiscreteUpdateTask(PxgNphaseImplementationContext* context) : PxgCMGpuDiscreteUpdateBase(context), mFirstPassNpContinuation(NULL)
|
||||
{
|
||||
mPostBroadPhaseTask = NULL;
|
||||
}
|
||||
|
||||
virtual ~PxgCMGpuDiscreteUpdateTask()
|
||||
{
|
||||
}
|
||||
|
||||
virtual void setPostBroadPhaseTask(PxBaseTask* postBroadPhaseTask)
|
||||
{
|
||||
mPostBroadPhaseTask = postBroadPhaseTask;
|
||||
}
|
||||
|
||||
virtual void release()
|
||||
{
|
||||
if(mFirstPassNpContinuation)
|
||||
mFirstPassNpContinuation->removeReference();
|
||||
PxgCMGpuDiscreteUpdateBase::release();
|
||||
}
|
||||
|
||||
void setFirstPassContinuation(PxBaseTask* firstPassContinuation) { mFirstPassNpContinuation = firstPassContinuation; }
|
||||
|
||||
void runInternal();
|
||||
|
||||
virtual const char* getName() const
|
||||
{
|
||||
return "PxgNphaseImplementationContext.firstContactManagerDiscreteUpdate";
|
||||
}
|
||||
};
|
||||
|
||||
class PxgCMGpuDiscreteSecondPassUpdateTask : public PxgCMGpuDiscreteUpdateBase
|
||||
{
|
||||
public:
|
||||
PxgCMGpuDiscreteSecondPassUpdateTask(PxgNphaseImplementationContext* context): PxgCMGpuDiscreteUpdateBase(context)
|
||||
{
|
||||
}
|
||||
|
||||
virtual ~PxgCMGpuDiscreteSecondPassUpdateTask()
|
||||
{}
|
||||
|
||||
void runInternal();
|
||||
|
||||
virtual const char* getName() const
|
||||
{
|
||||
return "PxgNphaseImplementationContext.secondContactManagerDiscreteUpdate";
|
||||
}
|
||||
|
||||
protected:
|
||||
};
|
||||
|
||||
class PxgNphaseImplementationContext : public PxvNphaseImplementationContext
|
||||
{
|
||||
private:
|
||||
PX_NOCOPY(PxgNphaseImplementationContext)
|
||||
public:
|
||||
|
||||
PxgNphaseImplementationContext(PxsContext& context, PxsKernelWranglerManager* gpuKernelWrangler, PxvNphaseImplementationFallback* fallbackForUnsupportedCMs,
|
||||
const PxGpuDynamicsMemoryConfig& gpuDynamicsConfig, void* contactStreamBase, void* patchStreamBase, void* forceAndIndiceStreamBase,
|
||||
PxBoundsArrayPinned& bounds, IG::IslandSim* islandSim,
|
||||
physx::Dy::Context* dynamicsContext, PxgHeapMemoryAllocatorManager* heapMemoryManager,
|
||||
bool useGPUNP);
|
||||
|
||||
virtual ~PxgNphaseImplementationContext();
|
||||
|
||||
virtual void destroy() PX_OVERRIDE PX_FINAL;
|
||||
virtual void updateContactManager(PxReal dt, bool hasContactDistanceChanged, PxBaseTask* continuation, PxBaseTask* firstPassNpContinuation, Cm::FanoutTask* updateBoundAndShapeTask) PX_OVERRIDE PX_FINAL;
|
||||
void updateContactManagersFallback(PxReal dt, PxBaseTask* continuation);
|
||||
virtual void postBroadPhaseUpdateContactManager(PxBaseTask* continuation) PX_OVERRIDE PX_FINAL;
|
||||
virtual void secondPassUpdateContactManager(PxReal dt, PxBaseTask* continuation) PX_OVERRIDE PX_FINAL;
|
||||
virtual void fetchUpdateContactManager() PX_OVERRIDE PX_FINAL;
|
||||
|
||||
virtual void preallocateNewBuffers(PxU32 nbNewPairs, PxU32 maxIndex) PX_OVERRIDE PX_FINAL;
|
||||
|
||||
virtual void registerContactManager(PxsContactManager* cm, const Sc::ShapeInteraction* shapeInteraction, PxI32 touching, PxU32 numPatches) PX_OVERRIDE PX_FINAL;
|
||||
virtual void unregisterContactManager(PxsContactManager* cm) PX_OVERRIDE PX_FINAL;
|
||||
virtual void refreshContactManager(PxsContactManager* cm) PX_OVERRIDE PX_FINAL;
|
||||
|
||||
virtual void registerShape(const PxNodeIndex& nodeIndex, const PxsShapeCore& shapeCore, const PxU32 transformCacheID, PxActor* actor, const bool isFemCloth) PX_OVERRIDE PX_FINAL;
|
||||
virtual void updateShapeMaterial(const PxsShapeCore& shapeCore) PX_OVERRIDE PX_FINAL;
|
||||
virtual void unregisterShape(const PxsShapeCore& shapeCore, const PxU32 transformCacheID, const bool isFemCloth) PX_OVERRIDE PX_FINAL;
|
||||
|
||||
virtual void registerAggregate(const PxU32 transformCacheID) PX_OVERRIDE PX_FINAL;
|
||||
|
||||
virtual void registerMaterial(const PxsMaterialCore& materialCore) PX_OVERRIDE PX_FINAL;
|
||||
virtual void updateMaterial(const PxsMaterialCore& materialCore) PX_OVERRIDE PX_FINAL;
|
||||
virtual void unregisterMaterial(const PxsMaterialCore& materialCore) PX_OVERRIDE PX_FINAL;
|
||||
|
||||
virtual void registerMaterial(const PxsDeformableSurfaceMaterialCore& materialCore) PX_OVERRIDE PX_FINAL;
|
||||
virtual void updateMaterial(const PxsDeformableSurfaceMaterialCore& materialCore) PX_OVERRIDE PX_FINAL;
|
||||
virtual void unregisterMaterial(const PxsDeformableSurfaceMaterialCore& materialCore) PX_OVERRIDE PX_FINAL;
|
||||
|
||||
virtual void registerMaterial(const PxsDeformableVolumeMaterialCore& materialCore) PX_OVERRIDE PX_FINAL;
|
||||
virtual void updateMaterial(const PxsDeformableVolumeMaterialCore& materialCore) PX_OVERRIDE PX_FINAL;
|
||||
virtual void unregisterMaterial(const PxsDeformableVolumeMaterialCore& materialCore) PX_OVERRIDE PX_FINAL;
|
||||
|
||||
virtual void registerMaterial(const PxsPBDMaterialCore& materialCore) PX_OVERRIDE PX_FINAL;
|
||||
virtual void updateMaterial(const PxsPBDMaterialCore& materialCore) PX_OVERRIDE PX_FINAL;
|
||||
virtual void unregisterMaterial(const PxsPBDMaterialCore& materialCore) PX_OVERRIDE PX_FINAL;
|
||||
|
||||
virtual PxsContactManagerOutput& getNewContactManagerOutput(PxU32 npId) PX_OVERRIDE PX_FINAL;
|
||||
|
||||
virtual void appendContactManagers() PX_OVERRIDE PX_FINAL;
|
||||
|
||||
virtual PxsContactManagerOutputIterator getContactManagerOutputs() PX_OVERRIDE PX_FINAL;
|
||||
|
||||
virtual void setContactModifyCallback(PxContactModifyCallback* callback) PX_OVERRIDE PX_FINAL { mFallbackForUnsupportedCMs->setContactModifyCallback(callback); }
|
||||
|
||||
void updateNarrowPhaseShape();
|
||||
|
||||
void mergeContactManagers(PxBaseTask* continuation);
|
||||
|
||||
void processResults();
|
||||
|
||||
virtual void startNarrowPhaseTasks() PX_OVERRIDE PX_FINAL;
|
||||
|
||||
PxReal getToleranceLength();
|
||||
|
||||
PxgGpuNarrowphaseCore* getGpuNarrowphaseCore() { return mGpuNarrowphaseCore; }
|
||||
|
||||
PxgSimulationCore* getSimulationCore() { return mGpuSimulationCore; }
|
||||
void setSimulationCore(PxgSimulationCore* sc) { mGpuSimulationCore = sc; }
|
||||
|
||||
virtual void acquireContext() PX_OVERRIDE PX_FINAL;
|
||||
virtual void releaseContext() PX_OVERRIDE PX_FINAL;
|
||||
|
||||
virtual void lock() PX_OVERRIDE PX_FINAL { mFallbackForUnsupportedCMs->lock();}
|
||||
virtual void unlock() PX_OVERRIDE PX_FINAL { mFallbackForUnsupportedCMs->unlock(); }
|
||||
|
||||
PxsContext& getContext() { return mContext; }
|
||||
|
||||
virtual PxsContactManagerOutputCounts* getLostFoundPatchOutputCounts() PX_OVERRIDE PX_FINAL;
|
||||
virtual PxsContactManager** getLostFoundPatchManagers() PX_OVERRIDE PX_FINAL;
|
||||
virtual PxU32 getNbLostFoundPatchManagers() PX_OVERRIDE PX_FINAL;
|
||||
|
||||
virtual PxsContactManagerOutput* getGPUContactManagerOutputBase() PX_OVERRIDE PX_FINAL;
|
||||
virtual PxReal* getGPURestDistances() PX_OVERRIDE PX_FINAL;
|
||||
virtual Sc::ShapeInteraction** getGPUShapeInteractions() PX_OVERRIDE PX_FINAL;
|
||||
virtual PxsTorsionalFrictionData* getGPUTorsionalData() PX_OVERRIDE PX_FINAL;
|
||||
|
||||
protected:
|
||||
|
||||
void prepareTempContactManagers(PxBaseTask* continuation);
|
||||
|
||||
void removeLostPairs();
|
||||
|
||||
void registerContactManagerInternal(PxsContactManager* cm, const PxcNpWorkUnit& workUnit, PxU32 patchCount, PxI32 touching,
|
||||
const Sc::ShapeInteraction* shapeInteraction, GPU_BUCKET_ID::Enum bucketId);
|
||||
|
||||
PxvNphaseImplementationFallback* mFallbackForUnsupportedCMs;
|
||||
|
||||
//those arrays logically belongs to the task, but we keep it here to avoid reallocations each frame
|
||||
friend class PxgCMGpuDiscreteUpdateBase;
|
||||
friend class PxgCMGpuDiscreteUpdateTask;
|
||||
friend class PxgCMGpuDiscreteSecondPassUpdateTask;
|
||||
friend class PxgCMGpuDiscreteUpdateConvexTriMeshTask;
|
||||
friend class PxgCMGpuDiscreteSecondPassUpdateConvexTriMeshTask;
|
||||
|
||||
PxgCMGpuDiscreteUpdateTask mUpdateCMsFirstPassTask;
|
||||
PxgCMGpuDiscreteSecondPassUpdateTask mUpdateCMsSecondPassTask;
|
||||
PxgCMGpuDiscreteUpdateFallbackTask mUpdateFallbackPairs;
|
||||
|
||||
PxPinnedArray<PxsContactManagerOutput> mContactManagerOutputs;
|
||||
|
||||
PxBitMap mGpuContactManagerBitMap[GPU_BUCKET_ID::eCount];
|
||||
PxU32 mRecordedGpuPairCount[GPU_BUCKET_ID::eCount];
|
||||
PxU32 mNbPairCount[GPU_BUCKET_ID::eCount];
|
||||
|
||||
PxgGpuNarrowphaseCore* mGpuNarrowphaseCore;
|
||||
PxgSimulationCore* mGpuSimulationCore;
|
||||
PxgGpuContext* mGpuDynamicContext;
|
||||
|
||||
PxU32 mTotalNbPairs;
|
||||
|
||||
PxBoundsArrayPinned& mBounds;
|
||||
|
||||
bool mHasContactDistanceChanged;
|
||||
bool mUseGPUBP;
|
||||
|
||||
PxU32 mMaxPatches;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
98
engine/third_party/physx/source/gpunarrowphase/include/PxgPersistentContactManifold.h
vendored
Normal file
98
engine/third_party/physx/source/gpunarrowphase/include/PxgPersistentContactManifold.h
vendored
Normal file
@@ -0,0 +1,98 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_PERSISTENT_CONTACT_MANIFOLD_H
|
||||
#define PXG_PERSISTENT_CONTACT_MANIFOLD_H
|
||||
|
||||
#include "cutil_math.h"
|
||||
#include "AlignedTransform.h"
|
||||
|
||||
#define PXG_MAX_PCM_CONTACTS 4
|
||||
|
||||
#define PXG_MULTIMANIFOLD_MAX_SUBMANIFOLDS 4
|
||||
#define PXG_SUBMANIFOLD_MAX_CONTACTS 6
|
||||
|
||||
namespace physx
|
||||
{
|
||||
struct PX_ALIGN_PREFIX(16) PxgPersistentContactManifold
|
||||
{
|
||||
float4 mLocalNormal_PenW[PXG_MAX_PCM_CONTACTS]; //local normal + penetration
|
||||
float4 mLocalContactA[PXG_MAX_PCM_CONTACTS]; //local contact in body A's space
|
||||
float4 mLocalContactB[PXG_MAX_PCM_CONTACTS]; //local contact in body B's space
|
||||
float4 mRelativePos; //Cached relative transforms
|
||||
PxAlignedQuat mQuatA; //Cached body A's quatenion
|
||||
PxAlignedQuat mQuatB; //Cached body B's quatenion
|
||||
PxU32 mNbContacts; //NbContacts
|
||||
PxU32 mNbWarmStartPoints; //Nb warm start points
|
||||
PxU32 mWarmStartA; //GJK warm-start mask for shape A
|
||||
PxU32 mWarmStartB; //GJK warm-start mask for shape B
|
||||
|
||||
PX_FORCE_INLINE void clear()
|
||||
{
|
||||
mNbContacts = 0;
|
||||
mNbWarmStartPoints = 0;
|
||||
mRelativePos = make_float4(PX_MAX_REAL, PX_MAX_REAL, PX_MAX_REAL, PX_MAX_REAL);
|
||||
mQuatA = PxAlignedQuat(0.f, 0.f, 0.f, 1.f);
|
||||
mQuatB = PxAlignedQuat(0.f, 0.f, 0.f, 1.f);
|
||||
}
|
||||
|
||||
//KS - TODO - extend and add functionality as is required...
|
||||
}
|
||||
PX_ALIGN_SUFFIX(16);
|
||||
|
||||
struct PX_ALIGN_PREFIX(16) PxgContact
|
||||
{
|
||||
PxVec3 pointA;
|
||||
PxVec3 pointB;
|
||||
PxVec3 normal;
|
||||
PxReal penetration;
|
||||
PxU32 triIndex;
|
||||
PxU32 pad;
|
||||
} // 48 due to padding
|
||||
PX_ALIGN_SUFFIX(16);
|
||||
|
||||
|
||||
struct PX_ALIGN_PREFIX(16) PxgPersistentContactMultiManifold
|
||||
{
|
||||
PxgContact mContacts[PXG_MULTIMANIFOLD_MAX_SUBMANIFOLDS][PXG_SUBMANIFOLD_MAX_CONTACTS]; // 1152
|
||||
PxU32 mNbContacts[PXG_MULTIMANIFOLD_MAX_SUBMANIFOLDS]; // 1168
|
||||
PxAlignedTransform mRelativeTransform; // 1200
|
||||
PxU32 mNbManifolds; // 1216 due to padding
|
||||
|
||||
|
||||
PX_FORCE_INLINE void clear()
|
||||
{
|
||||
mNbManifolds = 0;
|
||||
mRelativeTransform.p = make_float4(PX_MAX_REAL, PX_MAX_REAL, PX_MAX_REAL, PX_MAX_REAL);
|
||||
mRelativeTransform.q = PxAlignedQuat(0.f, 0.f, 0.f, 1.f);
|
||||
}
|
||||
}
|
||||
PX_ALIGN_SUFFIX(16);
|
||||
}
|
||||
|
||||
#endif
|
||||
163
engine/third_party/physx/source/gpunarrowphase/include/PxgShapeManager.h
vendored
Normal file
163
engine/third_party/physx/source/gpunarrowphase/include/PxgShapeManager.h
vendored
Normal file
@@ -0,0 +1,163 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_SHAPE_MANAGER_H
|
||||
#define PXG_SHAPE_MANAGER_H
|
||||
|
||||
#include "foundation/PxPreprocessor.h"
|
||||
#include "PxSceneDesc.h"
|
||||
#include "foundation/PxPinnedArray.h"
|
||||
#include "CmIDPool.h"
|
||||
#include "PxgCudaBuffer.h"
|
||||
#include "foundation/PxBitMap.h"
|
||||
#include "PxsMaterialCore.h"
|
||||
#include "PxsDeformableSurfaceMaterialCore.h"
|
||||
#include "PxsDeformableVolumeMaterialCore.h"
|
||||
#include "PxsPBDMaterialCore.h"
|
||||
#include "PxgConvexConvexShape.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
class PxCudaContext;
|
||||
class PxgHeapMemoryAllocatorManager;
|
||||
class PxgCopyManager;
|
||||
class PxNodeIndex;
|
||||
|
||||
class PxgShapeManager
|
||||
{
|
||||
public:
|
||||
PxgShapeManager(PxgHeapMemoryAllocatorManager* heapManager);
|
||||
~PxgShapeManager(){}
|
||||
|
||||
|
||||
//this method push CopyDesc to PxgCopyManager
|
||||
void initialize(PxCudaContext* cudaContext, CUstream stream);
|
||||
void scheduleCopyHtoD(PxgCopyManager& copyManager, PxCudaContext* cudaContext, CUstream stream);
|
||||
|
||||
PxU32 registerShape(PxgShape& shape);
|
||||
void registerShapeInstance(const PxNodeIndex& nodeIndex, const PxU32 transformCacheID, PxActor* actor, bool aggregate = false);
|
||||
void unregisterShape(const PxU32 shapeId);
|
||||
void unregisterShapeInstance(const PxU32 transformCacheID);
|
||||
|
||||
void updateShapeMaterial(const PxU32 materalIndex, const PxU32 id);
|
||||
|
||||
|
||||
void releaseIDs() { mIdPool.processDeferredIds(); }
|
||||
|
||||
Cm::DeferredIDPool mIdPool;
|
||||
PxgHeapMemoryAllocatorManager* mHeapManager;
|
||||
PxPinnedArray<PxgShape> mHostShapes;
|
||||
PxPinnedArray<PxNodeIndex> mHostShapesRemapTable;//remap table from shape to node index
|
||||
PxInt32ArrayPinned mHostShapeIdTable;
|
||||
PxPinnedArray<PxActor*> mHostTransformCacheIdToActorTable; // remap table from transformCacheId to PxActor
|
||||
PxgCudaBuffer mGpuShapesBuffer;
|
||||
PxgCudaBuffer mGpuShapesRemapTableBuffer; // shape to rigid
|
||||
PxgCudaBuffer mGpuTransformCacheIdToActorTableBuffer; // transformCacheId to PxActor
|
||||
PxgCudaBuffer mGpuRigidIndiceBuffer; // 00011 nodeIndex 0 has shape(0, 2, 3), nodeIndex 1 has shape(1, 4)(it can be either articulation link or rigid body)
|
||||
PxgCudaBuffer mGpuShapeIndiceBuffer; // 02314
|
||||
PxgCudaBuffer mGpuUnsortedShapeIndicesBuffer; // needed when recomputing the shape to rigids mapping
|
||||
PxgCudaBuffer mGpuTempRigidBitIndiceBuffer; //either the lower 32 bits or higher 32 bits of mGpuRigidIndiceBuffer
|
||||
PxgCudaBuffer mGpuTempRigidIndiceBuffer;
|
||||
PxBitMap mDirtyShapeMap;
|
||||
PxBitMap mDirtyTransformCacheMap;
|
||||
bool mResizeRequired;
|
||||
bool mTransformCacheResizeRequired;
|
||||
PxI32 mMaxShapeId;
|
||||
PxI32 mMaxTransformCacheID;
|
||||
bool mHasShapeChanged;
|
||||
bool mHasShapeInstanceChanged;
|
||||
|
||||
private:
|
||||
PX_NOCOPY(PxgShapeManager)
|
||||
};
|
||||
|
||||
class PxgMaterialManager
|
||||
{
|
||||
PX_NOCOPY(PxgMaterialManager)
|
||||
public:
|
||||
PxgMaterialManager(PxgHeapMemoryAllocatorManager* heapManager, const PxU32 elemSize = sizeof(PxsMaterialData));
|
||||
virtual ~PxgMaterialManager(){}
|
||||
|
||||
//this method push CopyDesc to PxgCopyManager
|
||||
virtual void scheduleCopyHtoD(PxgCopyManager& copyManager, PxCudaContext* cudaContext, CUstream stream, const PxU32 elemSize);
|
||||
|
||||
PxU32 registerMaterial(const PxU8* materialData, const PxU32 elemSize);
|
||||
void updateMaterial(const PxU8* materialData, const PxU32 elemSize, const PxU32 id);
|
||||
|
||||
void unregisterMaterial(const PxU32 materialId);
|
||||
void releaseIDs() { mIdPool.processDeferredIds(); }
|
||||
|
||||
PxgCudaBuffer mGpuMaterialBuffer;
|
||||
|
||||
protected:
|
||||
Cm::DeferredIDPool mIdPool;
|
||||
PxgHeapMemoryAllocatorManager* mHeapManager;
|
||||
PxInt8ArrayPinned mHostMaterial;
|
||||
PxBitMap mDirtyMaterialMap;
|
||||
bool mResizeRequired;
|
||||
};
|
||||
|
||||
class PxgFEMMaterialManager : public PxgMaterialManager
|
||||
{
|
||||
public:
|
||||
|
||||
PxgFEMMaterialManager(PxgHeapMemoryAllocatorManager* heapManager, const PxU32 elemSize);
|
||||
|
||||
virtual void scheduleCopyHtoD(PxgCopyManager& copyManager, PxCudaContext* cudaContext, CUstream stream, const PxU32 elemSize);
|
||||
};
|
||||
|
||||
class PxgFEMSoftBodyMaterialManager : public PxgFEMMaterialManager
|
||||
{
|
||||
public:
|
||||
PxgFEMSoftBodyMaterialManager(PxgHeapMemoryAllocatorManager* heapManager);
|
||||
};
|
||||
|
||||
class PxgFEMClothMaterialManager : public PxgMaterialManager
|
||||
{
|
||||
public:
|
||||
PxgFEMClothMaterialManager(PxgHeapMemoryAllocatorManager* heapManager) :
|
||||
PxgMaterialManager(heapManager, sizeof(PxsDeformableSurfaceMaterialData))
|
||||
{
|
||||
}
|
||||
PxsDeformableSurfaceMaterialData* getCPUMaterialData()
|
||||
{
|
||||
return reinterpret_cast<PxsDeformableSurfaceMaterialData*>(mHostMaterial.begin());
|
||||
}
|
||||
};
|
||||
|
||||
class PxgPBDMaterialManager : public PxgMaterialManager
|
||||
{
|
||||
public:
|
||||
PxgPBDMaterialManager(PxgHeapMemoryAllocatorManager* heapManager) :
|
||||
PxgMaterialManager(heapManager, sizeof(PxsPBDMaterialData))
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
137
engine/third_party/physx/source/gpunarrowphase/include/convexFormat.h
vendored
Normal file
137
engine/third_party/physx/source/gpunarrowphase/include/convexFormat.h
vendored
Normal file
@@ -0,0 +1,137 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef CONVEX_FORMAT_H
|
||||
#define CONVEX_FORMAT_H
|
||||
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
#include "foundation/PxBounds3.h"
|
||||
#include "foundation/PxPlane.h"
|
||||
#include "foundation/PxMat44.h"
|
||||
|
||||
#include "foundation/PxMemory.h"
|
||||
#include "foundation/PxUtilities.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
|
||||
struct ConvexHullCooked
|
||||
{
|
||||
struct Valency
|
||||
{
|
||||
PxU16 mCount;
|
||||
PxU16 mOffset;
|
||||
};
|
||||
|
||||
struct BigConvexRawData
|
||||
{
|
||||
// Support vertex map
|
||||
PxU16 mSubdiv; // "Gaussmap" subdivision
|
||||
PxU16 mNbSamples; // Total #samples in gaussmap PT: this is not even needed at runtime!
|
||||
|
||||
PxU8* mSamples; //mNbSamples * 2 elements
|
||||
PX_FORCE_INLINE const PxU8* getSamples2() const
|
||||
{
|
||||
return mSamples + mNbSamples;
|
||||
}
|
||||
//~Support vertex map
|
||||
|
||||
// Valencies data
|
||||
PxU32 mNbVerts; //!< Number of vertices
|
||||
PxU32 mNbAdjVerts; //!< Total number of adjacent vertices ### PT: this is useless at runtime and should not be stored here
|
||||
Valency* mValencies; //!< A list of mNbVerts valencies (= number of neighbors) mNbVerts elements
|
||||
PxU8* mAdjacentVerts; //!< List of adjacent vertices
|
||||
//~Valencies data
|
||||
};
|
||||
|
||||
struct InternalObjectsData
|
||||
{
|
||||
PxReal mRadius;
|
||||
PxReal mExtents[3];
|
||||
|
||||
PX_FORCE_INLINE void reset()
|
||||
{
|
||||
mRadius = 0.0f;
|
||||
mExtents[0] = 0.0f;
|
||||
mExtents[1] = 0.0f;
|
||||
mExtents[2] = 0.0f;
|
||||
}
|
||||
};
|
||||
|
||||
struct HullPolygon
|
||||
{
|
||||
PxPlane mPlane; //!< Plane equation for this polygon //Could drop 4th elem as it can be computed from any vertex as: d = - p.dot(n);
|
||||
PxU16 mVRef8; //!< Offset of vertex references in hull vertex data (CS: can we assume indices are tightly packed and offsets are ascending?? DrawObjects makes and uses this assumption)
|
||||
PxU8 mNbVerts; //!< Number of vertices/edges in the polygon
|
||||
PxU8 mMinIndex; //!< Index of the polygon vertex that has minimal projection along this plane's normal.
|
||||
|
||||
PX_FORCE_INLINE PxReal getMin(const PxVec3* PX_RESTRICT hullVertices) const //minimum of projection of the hull along this plane normal
|
||||
{
|
||||
return mPlane.n.dot(hullVertices[mMinIndex]);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PxReal getMax() const { return -mPlane.d; } //maximum of projection of the hull along this plane normal
|
||||
};
|
||||
|
||||
PxBounds3 mAABB;
|
||||
PxVec3 mCenterOfMass;
|
||||
|
||||
PxU16 mNbEdges;
|
||||
PxU8 mNbHullVertices;
|
||||
PxU8 mNbPolygons;
|
||||
|
||||
HullPolygon* mPolygons; //!< Array of mNbPolygons structures
|
||||
PxVec3* mVertices; //mNbHullVertices elements
|
||||
|
||||
PxU8* mFacesByEdges8; //mNbEdges * 2 elements
|
||||
PxU8* mFacesByVertices8; //mNbHullVertices * 3 elements
|
||||
PxU16* mVerticesByEdges16; //mNbEdges * 2 elements
|
||||
PxU8* mVertexData8; //G-d knows how many elements
|
||||
|
||||
BigConvexRawData* mBigConvexRawData; //!< Hill climbing data, only for large convexes! else NULL.
|
||||
|
||||
InternalObjectsData mInternal;
|
||||
|
||||
bool isGpuFriendly()
|
||||
{
|
||||
bool ret = mNbHullVertices <= 64;
|
||||
|
||||
if(!ret)
|
||||
return false;
|
||||
|
||||
for(PxU32 i = 0; i < mNbPolygons; ++i)
|
||||
ret &= mPolygons[i].mNbVerts <= 31;
|
||||
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
306
engine/third_party/physx/source/gpunarrowphase/include/convexNpCommon.h
vendored
Normal file
306
engine/third_party/physx/source/gpunarrowphase/include/convexNpCommon.h
vendored
Normal file
@@ -0,0 +1,306 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
|
||||
#ifndef __CONVEXNPCOMMON_H__
|
||||
#define __CONVEXNPCOMMON_H__
|
||||
|
||||
#define NUM_TMP_CONTACTS_PER_PAIR 32
|
||||
|
||||
#define CVX_TRI_MAX_CONTACTS 5
|
||||
|
||||
#define MAX_MESH_MESH_PATCHES 128
|
||||
|
||||
// Patches correlation - same normal threshold
|
||||
// 8 degrees
|
||||
//#define PATCH_ACCEPTANCE_EPS 0.990268f
|
||||
// 5 degrees
|
||||
#define PATCH_ACCEPTANCE_EPS 0.996194698f
|
||||
// 3 degrees
|
||||
//#define PATCH_ACCEPTANCE_EPS 0.99862953f
|
||||
|
||||
#define CONVEX_TRIMESH_CACHED 0xFFffFFff
|
||||
|
||||
// AD: used to specify sizes of shared memory structs
|
||||
// for collisions involving convexes. Make sure you
|
||||
// know what you are doing if you change this.
|
||||
#define CONVEX_MAX_VERTICES_POLYGONS 64
|
||||
|
||||
#include "foundation/PxPreprocessor.h"
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxQuat.h"
|
||||
#include "foundation/PxTransform.h"
|
||||
|
||||
#include "geometry/PxMeshScale.h"
|
||||
|
||||
#include "GuBV32.h"
|
||||
|
||||
#include "PxgCommonDefines.h"
|
||||
|
||||
#include <vector_types.h>
|
||||
|
||||
namespace physx
|
||||
{
|
||||
struct ConvexMeshPair
|
||||
{
|
||||
physx::PxTransform aToB; // pB = aToB.transform(pA + normal * separation) 28
|
||||
int startIndex; // start index in the ConvexTriContacts and ConvexTriPairs arrays 32
|
||||
int count; // the number of triangles in the ConvexTriContacts and ConvexTriPairs 36
|
||||
int cmIndex; // index of the original CM, to index into the output buffer and pass on to contact reporting 40
|
||||
int roundedStartIndex; // start index in the sorted triangle index array. For each pair, the number of indices 44
|
||||
// assigned to is the number of triangles padded to a multiple of 4 and duplicated to have
|
||||
// space for the temp buffer for the radix sort. The start index is aligned to a mutliple of 4.
|
||||
int pad; // pad 48
|
||||
uint2 materialIndices; // material index for shape0 and shape1 56
|
||||
};
|
||||
|
||||
struct ConvexTriNormalAndIndex
|
||||
{
|
||||
static const PxU32 DeferredContactMask = 0x80000000;
|
||||
static const PxU32 NbContactsShift = 27;
|
||||
physx::PxVec3 normal; // normal
|
||||
int index; // isDeferredContact is in bit 31, nbContacts is in bits 27-30, rest is cpu mesh index for the triangle
|
||||
|
||||
static PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 getTriangleIndex(PxU32 index)
|
||||
{
|
||||
return (index & ((1<<NbContactsShift)-1));
|
||||
}
|
||||
|
||||
static PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 getNbContacts(PxU32 index)
|
||||
{
|
||||
return (index & (~DeferredContactMask)) >> NbContactsShift;
|
||||
}
|
||||
|
||||
static PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 isDeferred(const PxU32 index)
|
||||
{
|
||||
return index & DeferredContactMask;
|
||||
}
|
||||
};
|
||||
|
||||
struct ConvexTriContacts
|
||||
{
|
||||
PxU32 index;
|
||||
};
|
||||
|
||||
struct ConvexTriContact
|
||||
{
|
||||
float4 contact_sepW;
|
||||
};
|
||||
|
||||
//each convex triangle pairs will generate the intermediate data in case we need to
|
||||
//do post process of this pair
|
||||
struct ConvexTriIntermediateData
|
||||
{
|
||||
//Enums to indicate the barycentric weighting of the contacts
|
||||
enum VoronoiState: PxU32
|
||||
{
|
||||
eV0 = 1u << 29,
|
||||
eV1 = 2u << 29,
|
||||
eV2 = 3u << 29,
|
||||
eE01 = 4u << 29,
|
||||
eE02 = 5u << 29,
|
||||
eE12 = 6u << 29,
|
||||
eMASK = 0xe0000000
|
||||
};
|
||||
|
||||
physx::PxU32 gpuTriIndex; //gpu triangle index
|
||||
|
||||
static PX_CUDA_CALLABLE PX_FORCE_INLINE bool isEdgeContact(const PxU32 mask)
|
||||
{
|
||||
return mask & 0x80000000;
|
||||
}
|
||||
};
|
||||
|
||||
typedef ConvexMeshPair SphereMeshPair;
|
||||
typedef ConvexTriContacts SphereTriContacts;
|
||||
typedef ConvexTriNormalAndIndex SphereTriNormalAndIndex;
|
||||
typedef ConvexTriIntermediateData SphereTriIntermediateData;
|
||||
}
|
||||
|
||||
|
||||
#if PX_CUDA_COMPILER
|
||||
|
||||
#include "PxgCommonDefines.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
|
||||
PX_ALIGN_PREFIX(16)
|
||||
struct MidphaseScratch
|
||||
{
|
||||
const float4 * PX_RESTRICT trimeshVerts;
|
||||
const uint4 * PX_RESTRICT trimeshTriIndices;
|
||||
PxTransform shapeToMeshNoScale;
|
||||
PxTransform meshToWorld;
|
||||
PxMeshScale trimeshScale;
|
||||
PxVec3 inflatedExtents;
|
||||
PxVec3 center;
|
||||
PxU32 shape_materialIndex;
|
||||
PxU32 trimeshShape_materialIndex;
|
||||
|
||||
//bv32 tree
|
||||
const Gu::BV32DataPacked* bv32PackedNodes;
|
||||
|
||||
//stack for traversal
|
||||
int sBv32Nodes[320]; //10 depth of the bv32 tree
|
||||
}PX_ALIGN_SUFFIX(16);
|
||||
PX_COMPILE_TIME_ASSERT(sizeof(MidphaseScratch) <= WARP_SIZE * 16 * sizeof(PxU32));
|
||||
}
|
||||
|
||||
#include "schlockShared.h"
|
||||
|
||||
//0 is primitive, 1 is tetrahedron world space
|
||||
struct TetCollideScratch
|
||||
{
|
||||
physx::PxVec3 vA[CONVEX_MAX_VERTICES_POLYGONS];
|
||||
physx::PxVec3 vB[4];
|
||||
schlock::GjkCachedData cachedData;
|
||||
schlock::GjkOutput gjkOutput;
|
||||
|
||||
physx::PxQuat rot0;
|
||||
physx::PxVec3 scale0;
|
||||
|
||||
physx::PxReal contactDistance;
|
||||
physx::PxReal inSphereRadius0;
|
||||
physx::PxReal inSphereRadius1;
|
||||
|
||||
physx::PxVec3 searchDir;
|
||||
|
||||
physx::PxU8 nbVertices0;
|
||||
physx::PxU8 nbVertices1;
|
||||
|
||||
physx::PxU32 tetIndex0;
|
||||
physx::PxU32 tetIndex1;
|
||||
|
||||
__device__ void Clear()
|
||||
{
|
||||
if (threadIdx.x == 0)
|
||||
cachedData.size = 0;
|
||||
__syncwarp();
|
||||
}
|
||||
};
|
||||
|
||||
#include "epa.cuh"
|
||||
#include "gjk.cuh"
|
||||
|
||||
__device__ static schlock::GjkResult::Enum tetPrimitivesCollide2(
|
||||
squawk::EpaScratch& ss_epa_scratch,
|
||||
TetCollideScratch& ss_scratch,
|
||||
const physx::PxU32 globalWarpIndex)
|
||||
{
|
||||
typedef schlock::GjkResult GjkResult;
|
||||
|
||||
const physx::PxReal convergenceRatio = 1 - 0.000225f;
|
||||
|
||||
assert(ss_scratch.nbVertices0 <= 64);
|
||||
assert(ss_scratch.nbVertices1 <= 64);
|
||||
|
||||
physx::PxVec3 initialDir(1, 0, 0);
|
||||
|
||||
physx::PxVec3 aToB_p = ss_scratch.searchDir; // ss_scratch.aToB.p;
|
||||
|
||||
if (aToB_p.magnitudeSquared() > 0)
|
||||
initialDir = aToB_p; // GJK's initial start dir is usually from the warm cache => lazy normalize inside GJK
|
||||
|
||||
GjkResult::Enum gjkResult = squawk::gjk(
|
||||
ss_scratch.vA, ss_scratch.nbVertices0,
|
||||
ss_scratch.vB, ss_scratch.nbVertices1,
|
||||
initialDir,
|
||||
ss_scratch.contactDistance,
|
||||
convergenceRatio,
|
||||
ss_scratch.gjkOutput, ss_scratch.cachedData);
|
||||
|
||||
__syncwarp();
|
||||
|
||||
if (gjkResult == GjkResult::eDISTANT)
|
||||
{
|
||||
return GjkResult::eDISTANT;
|
||||
}
|
||||
|
||||
bool separated = (gjkResult == GjkResult::eCLOSE);
|
||||
bool anomaly = separated && ss_scratch.gjkOutput.closestPointDir.dot(ss_scratch.gjkOutput.direction) < 0.999f;
|
||||
|
||||
GjkResult::Enum result = GjkResult::eCLOSE;
|
||||
|
||||
if (!separated || anomaly)
|
||||
{
|
||||
GjkResult::Enum epaResult = squawk::epa(ss_epa_scratch,
|
||||
ss_scratch.vA, ss_scratch.nbVertices0,
|
||||
ss_scratch.vB, ss_scratch.nbVertices1,
|
||||
&(ss_scratch.cachedData),
|
||||
convergenceRatio,
|
||||
0.5f * (ss_scratch.inSphereRadius0 + ss_scratch.inSphereRadius1),
|
||||
ss_scratch.gjkOutput);
|
||||
|
||||
//separated = epaResult == GjkResult::eCLOSE;
|
||||
|
||||
result = epaResult;
|
||||
|
||||
__syncwarp();
|
||||
|
||||
if (ss_scratch.gjkOutput.degenerate)
|
||||
{
|
||||
//we need to re-run gjk epa with other configurations
|
||||
ss_scratch.cachedData.size = 0;
|
||||
|
||||
//we need to re-run gjk epa with other configurations
|
||||
initialDir = physx::PxVec3(0.f, 1.f, 0.f);
|
||||
|
||||
GjkResult::Enum gjkResult = squawk::gjk(
|
||||
ss_scratch.vA, ss_scratch.nbVertices0,
|
||||
ss_scratch.vB, ss_scratch.nbVertices1,
|
||||
initialDir,
|
||||
ss_scratch.contactDistance,
|
||||
convergenceRatio,
|
||||
ss_scratch.gjkOutput, ss_scratch.cachedData);
|
||||
|
||||
__syncwarp();
|
||||
|
||||
GjkResult::Enum epaResult = squawk::epa(ss_epa_scratch,
|
||||
ss_scratch.vA, ss_scratch.nbVertices0,
|
||||
ss_scratch.vB, ss_scratch.nbVertices1,
|
||||
&(ss_scratch.cachedData),
|
||||
convergenceRatio,
|
||||
0.5f * (ss_scratch.inSphereRadius0 + ss_scratch.inSphereRadius1),
|
||||
ss_scratch.gjkOutput);
|
||||
|
||||
//separated = epaResult == GjkResult::eCLOSE;
|
||||
|
||||
result = epaResult;
|
||||
}
|
||||
__syncwarp();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
424
engine/third_party/physx/source/gpunarrowphase/include/cudaNpCommon.h
vendored
Normal file
424
engine/third_party/physx/source/gpunarrowphase/include/cudaNpCommon.h
vendored
Normal file
@@ -0,0 +1,424 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef CUDA_NP_COMMON_H
|
||||
#define CUDA_NP_COMMON_H
|
||||
|
||||
#include "foundation/PxVec3.h"
|
||||
#include <stdio.h>
|
||||
#include "convexFormat.h"
|
||||
#include "cutil_math.h"
|
||||
#include "PxgCommonDefines.h"
|
||||
#include "PxgPersistentContactManifold.h"
|
||||
#include "assert.h"
|
||||
|
||||
#define CONVEX_PLANE_WARPS_PER_BLOCK 4
|
||||
#define GJK_EPA_WARPS_PER_BLOCK 1
|
||||
#define NUM_TMP_CONTACTS_PER_PAIR 32
|
||||
|
||||
|
||||
//The smallest epsilon we will permit (scaled by PxTolerancesScale.length)
|
||||
#define PCM_WITNESS_POINT_LOWER_EPS 1e-2f
|
||||
//The largest epsilon we will permit (scaled by PxTolerancesScale.length)
|
||||
#define PCM_WITNESS_POINT_UPPER_EPS 5e-2f
|
||||
|
||||
namespace physx
|
||||
{
|
||||
struct PxgPersistentContactManifold;
|
||||
struct PxgPersistentContactMultiManifold;
|
||||
struct PxgContactManagerInput;
|
||||
struct PxsContactManagerOutput;
|
||||
class PxsContactManager;
|
||||
struct PxsTorsionalFrictionData;
|
||||
|
||||
namespace Sc
|
||||
{
|
||||
class ShapeInteraction;
|
||||
}
|
||||
|
||||
|
||||
struct PxgPairManagementData
|
||||
{
|
||||
PxgContactManagerInput* mContactManagerInputData;
|
||||
PxsContactManagerOutput* mContactManagerOutputData;
|
||||
PxsContactManager** mCpuContactManagerMapping;
|
||||
Sc::ShapeInteraction** mShapeInteractions;
|
||||
PxReal* mRestDistances;
|
||||
PxsTorsionalFrictionData* mTorsionalData;
|
||||
PxU32* mTempAccumulator;
|
||||
PxU32* mBlockSharedAccumulator;
|
||||
PxU32* mRemoveIndices;
|
||||
PxU32 mNbPairs;
|
||||
PxU32 mNbToRemove;
|
||||
|
||||
void* mPersistentContactManagers;// either PxgPersistentContactManifold or PxgPersistentMultiManagementData
|
||||
|
||||
PxgPairManagementData() : mContactManagerInputData(NULL), mContactManagerOutputData(NULL), mCpuContactManagerMapping(NULL), mShapeInteractions(NULL),
|
||||
mRestDistances(NULL), mTorsionalData(NULL), mTempAccumulator(NULL), mBlockSharedAccumulator(NULL), mNbPairs(0)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct PX_ALIGN_PREFIX(16) PxgPatchAndContactCounters
|
||||
{
|
||||
enum OverflowError
|
||||
{
|
||||
NO_OVERFLOW = 0,
|
||||
CONTACT_BUFFER_OVERFLOW = 1 << 0,
|
||||
FORCE_BUFFER_OVERFLOW = 1 << 1,
|
||||
PATCH_BUFFER_OVERFLOW = 1 << 2
|
||||
};
|
||||
|
||||
PxU32 patchesBytes;
|
||||
PxU32 contactsBytes;
|
||||
PxU32 forceAndIndiceBytes;
|
||||
PxU32 overflowError;
|
||||
|
||||
__device__
|
||||
void setOverflowError(const OverflowError& err)
|
||||
{
|
||||
PX_UNUSED(err);
|
||||
#if __CUDA_ARCH__
|
||||
atomicOr(&overflowError, PxU32(err));
|
||||
#endif
|
||||
}
|
||||
|
||||
__host__ __device__
|
||||
PxU32 getOverflowError() const
|
||||
{
|
||||
return overflowError;
|
||||
}
|
||||
} PX_ALIGN_SUFFIX(16);
|
||||
|
||||
}
|
||||
|
||||
|
||||
__host__ __device__ inline
|
||||
physx::ConvexHullCooked::Valency u32ToValency(physx::PxU32 countOffset)
|
||||
{
|
||||
physx::ConvexHullCooked::Valency v;
|
||||
v.mCount = u32High(countOffset);
|
||||
v.mOffset = u32Low(countOffset);
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
__host__ __device__ inline
|
||||
physx::PxU32 valencyToPxU32(const physx::ConvexHullCooked::Valency& v)
|
||||
{
|
||||
return merge(v.mCount, v.mOffset);
|
||||
}
|
||||
|
||||
__host__ __device__ inline
|
||||
physx::PxU8 getNbAdjVerts(physx::PxU32 val)
|
||||
{
|
||||
return u16Low(u32Low(val));
|
||||
}
|
||||
|
||||
__host__ __device__ inline
|
||||
physx::PxU16 getNbEdges(physx::PxU32 val)
|
||||
{
|
||||
return u32High(val);
|
||||
}
|
||||
|
||||
__host__ __device__ inline
|
||||
physx::PxU8 getNbPolygons(physx::PxU32 val)
|
||||
{
|
||||
return u16Low(u32Low(val));
|
||||
}
|
||||
|
||||
__host__ __device__ inline
|
||||
physx::PxU16 getVRef8(physx::PxU32 val)
|
||||
{
|
||||
return u32High(val);
|
||||
}
|
||||
|
||||
__host__ __device__ inline
|
||||
physx::PxU8 getNbVerts(physx::PxU32 val)
|
||||
{
|
||||
return u16High(u32Low(val));
|
||||
}
|
||||
|
||||
__host__ __device__ inline
|
||||
physx::PxU8 getMinIndex(physx::PxU32 val)
|
||||
{
|
||||
return u16Low(u32Low(val));
|
||||
}
|
||||
|
||||
__host__ __device__ inline
|
||||
physx::PxU32 merge(physx::PxU16 hi, physx::PxU8 lohi, physx::PxU8 lolo)
|
||||
{
|
||||
return merge(hi, merge(lohi, lolo));
|
||||
}
|
||||
|
||||
__host__ __device__ inline bool isValidTet(const physx::PxVec3& worldV0, const physx::PxVec3& worldV1, const physx::PxVec3& worldV2, const physx::PxVec3& worldV3)
|
||||
{
|
||||
return (worldV1 - worldV0).dot((worldV2 - worldV0).cross(worldV3 - worldV0)) > 1e-9f;
|
||||
}
|
||||
|
||||
//device only functions
|
||||
#if PX_CUDA_COMPILER
|
||||
|
||||
#include "reduction.cuh"
|
||||
|
||||
class MeshScaling
|
||||
{
|
||||
public:
|
||||
__device__ MeshScaling(const physx::PxVec3& scale_, const physx::PxQuat& rotation_)
|
||||
{
|
||||
scale = scale_;
|
||||
rotation = rotation_;
|
||||
idtMeshScale = (scale.x == 1.0f && scale.y == 1.0f && scale.z == 1.0f);
|
||||
flipNormal = ((scale.x * scale.y * scale.z) < 0.0f);
|
||||
}
|
||||
|
||||
__device__ inline physx::PxVec3 vertex2Shape(const physx::PxVec3& v) const
|
||||
{
|
||||
using namespace physx;
|
||||
|
||||
PxVec3 temp = rotation.rotate(v);
|
||||
|
||||
temp.x *= scale.x;
|
||||
temp.y *= scale.y;
|
||||
temp.z *= scale.z;
|
||||
|
||||
return rotation.rotateInv(temp);
|
||||
}
|
||||
|
||||
__device__ inline physx::PxVec3 shape2Vertex(const physx::PxVec3& v) const
|
||||
{
|
||||
using namespace physx;
|
||||
|
||||
PxVec3 temp = rotation.rotate(v);
|
||||
|
||||
temp.x /= scale.x;
|
||||
temp.y /= scale.y;
|
||||
temp.z /= scale.z;
|
||||
|
||||
return rotation.rotateInv(temp);
|
||||
}
|
||||
|
||||
//Transforms a normal vector from vertex to shape space. This keeps the normal vector perpendicular to surfaces that get scaled with the same transform.
|
||||
//Does not preserve length. Is applicable to other covariant vectors too.
|
||||
__device__ inline physx::PxVec3 vertex2ShapeNormalVector(const physx::PxVec3& normal) const
|
||||
{
|
||||
return shape2Vertex(normal);
|
||||
}
|
||||
|
||||
//Transforms a normal vector from shape to vertex space. This keeps the normal vector perpendicular to surfaces that get scaled with the same transform.
|
||||
//Does not preserve length. Is applicable to other covariant vectors too.
|
||||
__device__ inline physx::PxVec3 shape2VertexNormalVector(const physx::PxVec3& normal) const
|
||||
{
|
||||
return vertex2Shape(normal);
|
||||
}
|
||||
|
||||
__device__ inline void getShapeSpaceVert(physx::PxVec3& triV0, physx::PxVec3& triV1, physx::PxVec3& triV2,
|
||||
physx::PxVec3& v0, physx::PxVec3& v1, physx::PxVec3& v2) const
|
||||
{
|
||||
if (idtMeshScale)
|
||||
{
|
||||
v0 = triV0;
|
||||
v1 = triV1;
|
||||
v2 = triV2;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Triangle scaling, triangle verts in shape space
|
||||
triV0 = vertex2Shape(triV0);
|
||||
triV1 = vertex2Shape(triV1);
|
||||
triV2 = vertex2Shape(triV2);
|
||||
v0 = triV0;
|
||||
v1 = flipNormal ? triV2 : triV1;
|
||||
v2 = flipNormal ? triV1 : triV2;
|
||||
}
|
||||
}
|
||||
|
||||
physx::PxVec3 scale;
|
||||
physx::PxQuat rotation;
|
||||
bool idtMeshScale;
|
||||
bool flipNormal;
|
||||
};
|
||||
|
||||
//Applies a potentially non-uniform scaling to the point v. The scaling can be expressed in a rotated coordinate frame defined by the quaternion called rotation.
|
||||
__device__ inline
|
||||
physx::PxVec3 vertex2Shape(const physx::PxVec3& v, const physx::PxVec3& scale, const physx::PxQuat& rotation)
|
||||
{
|
||||
using namespace physx;
|
||||
|
||||
PxVec3 temp = rotation.rotate(v);
|
||||
|
||||
temp.x *= scale.x;
|
||||
temp.y *= scale.y;
|
||||
temp.z *= scale.z;
|
||||
|
||||
return rotation.rotateInv(temp);
|
||||
}
|
||||
|
||||
//Removes a potentially non-uniform scaling from the point v. The scaling can be expressed in a rotated coordinate frame defined by the quaternion called rotation.
|
||||
__device__ inline
|
||||
physx::PxVec3 shape2Vertex(const physx::PxVec3& v, const physx::PxVec3& scale, const physx::PxQuat& rotation)
|
||||
{
|
||||
using namespace physx;
|
||||
|
||||
PxVec3 temp = rotation.rotate(v);
|
||||
|
||||
temp.x /= scale.x;
|
||||
temp.y /= scale.y;
|
||||
temp.z /= scale.z;
|
||||
|
||||
return rotation.rotateInv(temp);
|
||||
}
|
||||
|
||||
//Transforms a normal vector from vertex to shape space. This keeps the normal vector perpendicular to surfaces that get scaled with the same transform.
|
||||
//Does not preserve length. Is applicable to other covariant vectors too.
|
||||
__device__ inline
|
||||
physx::PxVec3 vertex2ShapeNormalVector(const physx::PxVec3& normal, const physx::PxVec3& scale, const physx::PxQuat& rotation)
|
||||
{
|
||||
return shape2Vertex(normal, scale, rotation);
|
||||
}
|
||||
|
||||
//Transforms a normal vector from shape to vertex space. This keeps the normal vector perpendicular to surfaces that get scaled with the same transform.
|
||||
//Does not preserve length. Is applicable to other covariant vectors too.
|
||||
__device__ inline
|
||||
physx::PxVec3 shape2VertexNormalVector(const physx::PxVec3& normal, const physx::PxVec3& scale, const physx::PxQuat& rotation)
|
||||
{
|
||||
return vertex2Shape(normal, scale, rotation);
|
||||
}
|
||||
|
||||
__device__ inline static
|
||||
void prepareVertices(const PxTransform& transf,
|
||||
PxVec3 scale, PxQuat rot,
|
||||
PxU32 nbHullVertices,
|
||||
const float4* pVertices,
|
||||
PxVec3* s_vertices
|
||||
)
|
||||
{
|
||||
assert(transf.isFinite());
|
||||
for (PxU32 i = threadIdx.x; i < nbHullVertices; i += WARP_SIZE)
|
||||
{
|
||||
float4 f = pVertices[i];
|
||||
s_vertices[i] = transf.transform(vertex2Shape(PxVec3(f.x, f.y, f.z), scale, rot));
|
||||
assert(s_vertices[i].isFinite());
|
||||
}
|
||||
}
|
||||
|
||||
__device__ inline static
|
||||
void prepareVertices(PxVec3 scale, PxQuat rot,
|
||||
PxU32 nbHullVertices,
|
||||
const float4* pVertices,
|
||||
PxVec3* s_vertices
|
||||
)
|
||||
{
|
||||
for (PxU32 i = threadIdx.x; i < nbHullVertices; i += WARP_SIZE)
|
||||
{
|
||||
float4 f = pVertices[i];
|
||||
s_vertices[i] = vertex2Shape(PxVec3(f.x, f.y, f.z), scale, rot);
|
||||
}
|
||||
}
|
||||
|
||||
#define MESH_MANIFOLD_EPSILON 0.05f
|
||||
#define BOX_MARGIN_RATIO 0.25f
|
||||
|
||||
__device__ inline static
|
||||
PxReal calculatePCMConvexMargin(const float4& extents_, const PxVec3& scale, const PxReal toleranceLength)
|
||||
{
|
||||
using namespace physx;
|
||||
const PxVec3 extents = PxVec3(extents_.x * scale.x,
|
||||
extents_.y * scale.y,
|
||||
extents_.z * scale.z);
|
||||
|
||||
const PxReal min = fminf(fminf(extents.x, extents.y), extents.z);
|
||||
const PxReal toleranceMargin = toleranceLength * MESH_MANIFOLD_EPSILON;
|
||||
//ML: 25% of the minimum extents of the internal AABB as this convex hull's margin
|
||||
return fminf((min * BOX_MARGIN_RATIO), toleranceMargin);
|
||||
}
|
||||
|
||||
__device__ inline static
|
||||
PxReal calculatePCMConvexMargin(const PxVec3& extents, const PxReal toleranceLength)
|
||||
{
|
||||
using namespace physx;
|
||||
const PxReal min = fminf(fminf(extents.x, extents.y), extents.z);
|
||||
const PxReal toleranceMargin = toleranceLength * MESH_MANIFOLD_EPSILON;
|
||||
//ML: 25% of the minimum extents of the internal AABB as this convex hull's margin
|
||||
return fminf((min * BOX_MARGIN_RATIO), toleranceMargin);
|
||||
}
|
||||
|
||||
__device__ inline static physx::PxReal maxTransformPositionDelta(const physx::PxVec3& curP, const physx::PxVec3& preP)
|
||||
{
|
||||
using namespace physx;
|
||||
const PxVec3 deltaP = curP - preP;
|
||||
return PxMax(PxAbs(deltaP.x), PxMax(PxAbs(deltaP.y), PxAbs(deltaP.z)));
|
||||
}
|
||||
|
||||
|
||||
__constant__ __device__ PxF32 local_invalidateThresholdsConvex[5] = { 0.5f, 0.125f, 0.25f, 0.375f, 0.375f };
|
||||
__constant__ __device__ PxF32 local_invalidateQuatThresholdsConvex[5] = { 0.9998f, 0.9999f, 0.9999f, 0.9999f, 0.9999f };
|
||||
|
||||
__constant__ __device__ PxF32 local_invalidateThresholdsSphere[3] = { 0.5f, 0.1f, 0.75f };
|
||||
__constant__ __device__ PxF32 local_invalidateQuatThresholdsSphere[3] = { 0.9995f, 0.9999f, 0.9997f };
|
||||
|
||||
__constant__ __device__ PxF32 local_invalidateQuatThresholdsConvexPlane[5] = { 0.99996f, 0.99996f, 0.99996f, 0.99996f, 0.99996f };
|
||||
|
||||
__device__ inline static
|
||||
PxU32 invalidate_BoxConvex(const PxVec3& curRelPos, const PxQuat& curQuatA, const PxQuat& curQuatB,
|
||||
const PxVec3& preRelPos, const PxQuat& preRelQuatA, const PxQuat& preRelQuatB,
|
||||
const physx::PxReal minMargin, const physx::PxReal radiusA, const physx::PxReal radiusB,
|
||||
const physx::PxU8 manifold_NumContacts, PxF32* local_invalidateThresholds, PxF32* local_invalidateQuatThresholds)
|
||||
{
|
||||
using namespace physx;
|
||||
//This is the translational threshold used in invalidate_BoxConvexHull. 0.5 is 50% of the object margin. we use different threshold between
|
||||
//0 and 4 points. This threashold is a scale that is multiplied by the objects' margins.
|
||||
const PxReal ratio = local_invalidateThresholds[manifold_NumContacts];
|
||||
|
||||
const PxReal thresholdP = minMargin * ratio;
|
||||
const PxReal deltaP = maxTransformPositionDelta(curRelPos, preRelPos);
|
||||
|
||||
//This is the rotational threshold used in invalidate_BoxConvexHull. 0.9998 is a threshold for quat difference
|
||||
//between previous and current frame
|
||||
const PxReal thresholdQ = local_invalidateQuatThresholds[manifold_NumContacts];
|
||||
const PxReal deltaQA = curQuatA.dot(preRelQuatA);
|
||||
const PxReal deltaQB = curQuatB.dot(preRelQuatB);
|
||||
|
||||
bool generateContacts = (deltaP > thresholdP) || (thresholdQ > deltaQA) || (thresholdQ > deltaQB);
|
||||
if (!generateContacts)
|
||||
{
|
||||
const PxReal aRadian = deltaQA < 1.0f ? acosf(deltaQA) : 0.f;
|
||||
const PxReal bRadian = deltaQB < 1.0f ? acosf(deltaQB) : 0.f;
|
||||
|
||||
const PxReal travelDistA = aRadian * radiusA;
|
||||
const PxReal travelDistB = bRadian * radiusB;
|
||||
|
||||
generateContacts = (travelDistA > thresholdP) || (travelDistB > thresholdP);
|
||||
}
|
||||
return generateContacts;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
#endif
|
||||
230
engine/third_party/physx/source/gpunarrowphase/include/schlockShared.h
vendored
Normal file
230
engine/third_party/physx/source/gpunarrowphase/include/schlockShared.h
vendored
Normal file
@@ -0,0 +1,230 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef GU_SCHLOCKSHARED_H
|
||||
#define GU_SCHLOCKSHARED_H
|
||||
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxMat33.h"
|
||||
#include "foundation/PxTransform.h"
|
||||
#include "geometry/PxMeshScale.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define GJK_LOGGING 0
|
||||
#define EPA_REPLACE_TRIANGLE_LOGGING 0
|
||||
#define EPA_LOGGING 0
|
||||
#define EPA_LOG_DEGENERATE_CASES 0
|
||||
|
||||
#if GJK_LOGGING
|
||||
#define GJK_LOG(...) if(gjkDebug) printf(__VA_ARGS__)
|
||||
#else
|
||||
#define GJK_LOG(...)
|
||||
#endif
|
||||
|
||||
|
||||
namespace schlock
|
||||
{
|
||||
|
||||
extern bool epaDebug;
|
||||
|
||||
using namespace physx;
|
||||
|
||||
template<typename T>
|
||||
PX_CUDA_CALLABLE PX_FORCE_INLINE void swap(T& a, T& b)
|
||||
{
|
||||
T tmp = a;
|
||||
a = b;
|
||||
b = tmp;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU32 encodeIndices(PxU8 a, PxU8 b) { return 0x3f800000u | b<<8 | a;}
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU8 aIndex(PxU32 indices) { return indices&0xff; }
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU8 bIndex(PxU32 indices) { return (indices>>8)&0xff; }
|
||||
|
||||
struct CachedVertex
|
||||
{
|
||||
PxVec3 v;
|
||||
PxU32 indices;
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE CachedVertex() {}
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE CachedVertex(PxU8 iA, PxU8 iB, const PxVec3& iV): v(iV), indices(encodeIndices(iA,iB)) {}
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE CachedVertex(const CachedVertex& other): v(other.v), indices(other.indices) {}
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void operator=(const CachedVertex& other) { v = other.v, indices = other.indices; }
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU8 a() const { return aIndex(indices); }
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU8 b() const { return bIndex(indices); }
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU8 a() const volatile { return aIndex(indices); }
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU8 b() const volatile { return bIndex(indices); }
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxVec3 getV() const volatile { return PxVec3(v.x, v.y, v.z); }
|
||||
};
|
||||
|
||||
struct GjkCachedData
|
||||
{
|
||||
CachedVertex vertices[4];
|
||||
struct
|
||||
{
|
||||
PxVec3 n;
|
||||
PxReal d;
|
||||
} normals[4];
|
||||
|
||||
PxU32 size;
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE GjkCachedData(){}
|
||||
|
||||
__device__ void Reset() { size = 0; __syncwarp(); }
|
||||
};
|
||||
|
||||
struct GjkResult
|
||||
{
|
||||
enum Enum
|
||||
{
|
||||
eDISTANT,
|
||||
eCLOSE,
|
||||
eOVERLAP
|
||||
};
|
||||
};
|
||||
|
||||
struct GjkOutput
|
||||
{
|
||||
PxVec3 direction; // unit witness for lower bound
|
||||
PxReal lowerBound; // provable lower bound
|
||||
PxReal upperBound; // provable upper bound
|
||||
|
||||
PxVec3 closestPointA; // witnesses for upper bound
|
||||
PxVec3 closestPointB;
|
||||
|
||||
bool degenerate;
|
||||
PxVec3 closestPointDir;
|
||||
};
|
||||
|
||||
PX_CUDA_CALLABLE PX_FORCE_INLINE void barycentrics(const PxVec3& a, const PxVec3& b, const PxVec3& c, float &u, float &v, float &w, const PxVec3& p)
|
||||
{
|
||||
PxVec3 m = (b - a).cross(c - a), ma = m.abs(), a1 = (b-p).cross(c-p), a2 = (c-p).cross(a-p);
|
||||
PxReal d;
|
||||
|
||||
// area ratios in plane with largest normal component
|
||||
if (ma.x >= ma.y && ma.x >= ma.z)
|
||||
d = 1.0f/m.x, u = a1.x * d, v = a2.x * d;
|
||||
else if (ma.y >= ma.z)
|
||||
d = 1.0f/m.y, u = a1.y * d, v = a2.y * d;
|
||||
else
|
||||
d = 1.0f/m.z, u = a1.z * d, v = a2.z * d;
|
||||
|
||||
w = 1.0f - u - v;
|
||||
}
|
||||
|
||||
|
||||
// top-level functions
|
||||
|
||||
// unit-level functions for testing
|
||||
static const PxU32 EPA_MAX_VERTS = 18, EPA_MAX_FACES = 2*EPA_MAX_VERTS-4; // for N verts, we'll have 2(N-2) faces.
|
||||
|
||||
typedef int EPABitMap;
|
||||
|
||||
#define MAKE_Vec3I8(A, B, C) (physx::PxU32(A) | (physx::PxU32(B) << 8) | (physx::PxU32(C) << 16))
|
||||
|
||||
struct Vec3I8
|
||||
{
|
||||
PxU32 a;
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU8 get(PxU32 i) const { return (a >> (i << 3)) & 0xFFul; }
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void set(PxU32 i, PxU8 v) { a = (a & ~(0xFFul << (i << 3))) | (PxU32(v) << (i << 3)); }
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void setInt(int i) { a = i; }
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE int getInt() { return a; }
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE const Vec3I8& operator=(const Vec3I8& other) {a = other.a; return *this;}
|
||||
|
||||
bool operator==(const Vec3I8& other) { return (a & 0xFFffFFul) == (other.a & 0xFFffFFul); }
|
||||
};
|
||||
|
||||
struct EPATriangle
|
||||
{
|
||||
PxVec3 normal; // unit normal
|
||||
PxReal distance; // *plane* distance from origin
|
||||
Vec3I8 v; // vertices
|
||||
};
|
||||
|
||||
struct EPAHullData
|
||||
{
|
||||
PxVec3 v[EPA_MAX_VERTS];
|
||||
PxU32 i[EPA_MAX_VERTS];
|
||||
EPATriangle t[EPA_MAX_FACES];
|
||||
Vec3I8 a[EPA_MAX_FACES];
|
||||
|
||||
PxU8 nbVertices;
|
||||
EPABitMap liveMap;
|
||||
};
|
||||
|
||||
|
||||
// GJK routine.
|
||||
//
|
||||
// Input:
|
||||
// convexA, convexB: two convexes in the same space
|
||||
// initialDir: an unnormalized start direction for GJK iteration
|
||||
// minSep: the minimum distance at which the two objects are judged to be separated. Must be positive.
|
||||
// convergenceRatio: minimum ratio of lowerBound/upperBound for convergence (a number close to, but less than, 1)
|
||||
//
|
||||
// returns DISTANT if a lower bound on distance is found that is > minSep
|
||||
// OVERLAP if the objects are provably overlapping
|
||||
// else CLOSE.
|
||||
//
|
||||
// for OVERLAP, the cachedData (vertex indices & search directions) can be passed to EPA
|
||||
// for DISTANT, a direction is generated which provides a witness for the lower bound
|
||||
// for CLOSE, a pair of closest points is generated, which provide (in MS space) a witness for the upper bound, and
|
||||
// if nondegenerate, also a direction which witnesses the lower bound
|
||||
//
|
||||
// the degenerate flag is raised in the output if the algorithm encountered geometric instability while the minSep value is in [lower, upper].
|
||||
// In particular, we are not overlapping (lower>minSep) nor overlapping (upper == 0), so this can only accompany a result of CLOSE.
|
||||
// geometric instability can only happen very close to convergence, so unless very high tolerances are necessary, convergence can be assumed.
|
||||
// in this case if the lower bound is zero, then the the lower bound witness direction is not generated.
|
||||
// So when CLOSE is returned, the upper bound should be used as the authoritative distance, and the direction should be taken from the closest points
|
||||
|
||||
// EPA routine.
|
||||
//
|
||||
// Input:
|
||||
// convexA, convexB: two convexes in the same space
|
||||
// cache: a cache of initial vertices produced by GJK. Must have size at least 2
|
||||
// convergenceRatio: minimum ratio of lowerBound/upperBound for convergence (a number close to, but less than, 1)
|
||||
//
|
||||
// on output, the following are emitted:
|
||||
// * a pair of closest points which are the best depenetration found in the allowed space
|
||||
// * a lower bound and upper bound on penetration depth.
|
||||
// * a direction corresponding to the lower bound (this direction *opposes* closestB - closestA), so if we translate object B by lowerBound * direction, we
|
||||
// should approximately separate the shapes
|
||||
//
|
||||
// the degenerate flag is raised in the output if the algorithm encountered geometric instability before convergence, or ran out of space.
|
||||
// in this case, the bounds, direction and closest points are still generated.
|
||||
//
|
||||
// The algorithm returns GjkResult::eOVERLAP in almost all cases. However, it's possible for EPA to find separation even if GJK found penetration,
|
||||
// and in this anomalous case it returns GjkResult::eCLOSE
|
||||
//
|
||||
// // There is currently no direction for the upper bound, which would allow for reliable (if not minimal) separation in degenerate cases
|
||||
|
||||
}
|
||||
#endif
|
||||
87
engine/third_party/physx/source/gpunarrowphase/include/tri32Data.h
vendored
Normal file
87
engine/third_party/physx/source/gpunarrowphase/include/tri32Data.h
vendored
Normal file
@@ -0,0 +1,87 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
|
||||
#ifndef __TRI32DATA_H__
|
||||
#define __TRI32DATA_H__
|
||||
|
||||
struct Triangle32Data
|
||||
{
|
||||
PxU32 v[3];
|
||||
|
||||
Triangle32Data() {}
|
||||
|
||||
Triangle32Data(PxU32 v0,PxU32 v1,PxU32 v2)
|
||||
{
|
||||
v[0]=v0;
|
||||
v[1]=v1;
|
||||
v[2]=v2;
|
||||
}
|
||||
};
|
||||
|
||||
struct Triangle32DataPad
|
||||
{
|
||||
PxU32 v[3];
|
||||
PxU32 pad;
|
||||
|
||||
PX_CUDA_CALLABLE Triangle32DataPad() {}
|
||||
|
||||
PX_CUDA_CALLABLE Triangle32DataPad(PxU32 v0, PxU32 v1, PxU32 v2, PxU32 v3)
|
||||
{
|
||||
v[0] = v0;
|
||||
v[1] = v1;
|
||||
v[2] = v2;
|
||||
pad = v3;
|
||||
}
|
||||
|
||||
PX_CUDA_CALLABLE Triangle32DataPad(PxU32 v0, PxU32 v1, PxU32 v2)
|
||||
{
|
||||
v[0] = v0;
|
||||
v[1] = v1;
|
||||
v[2] = v2;
|
||||
pad = 0;
|
||||
}
|
||||
|
||||
PX_CUDA_CALLABLE Triangle32DataPad(Triangle32Data t)
|
||||
{
|
||||
v[0] = t.v[0];
|
||||
v[1] = t.v[1];
|
||||
v[2] = t.v[2];
|
||||
pad = 0;
|
||||
}
|
||||
|
||||
PX_CUDA_CALLABLE Triangle32DataPad(Triangle32Data t, PxU32 ipad = 0)
|
||||
{
|
||||
v[0] = t.v[0];
|
||||
v[1] = t.v[1];
|
||||
v[2] = t.v[2];
|
||||
pad = ipad;
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
62
engine/third_party/physx/source/gpunarrowphase/include/typeHelpers.h
vendored
Normal file
62
engine/third_party/physx/source/gpunarrowphase/include/typeHelpers.h
vendored
Normal file
@@ -0,0 +1,62 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
|
||||
#ifndef __TYPEHELPERS_H__
|
||||
#define __TYPEHELPERS_H__
|
||||
|
||||
PX_CUDA_CALLABLE static inline PxVec3 float4ToVec3(const float4 & data)
|
||||
{
|
||||
return PxVec3(data.x, data.y, data.z);
|
||||
}
|
||||
PX_CUDA_CALLABLE static inline float4 vec3ToFloat4(const PxVec3 & data)
|
||||
{
|
||||
return make_float4(data.x, data.y, data.z, 0.0f);
|
||||
}
|
||||
|
||||
PX_CUDA_CALLABLE static inline PxQuat float4ToQuat(const float4 & data)
|
||||
{
|
||||
return PxQuat(data.x, data.y, data.z, data.w);
|
||||
}
|
||||
PX_CUDA_CALLABLE static inline float4 quatToFloat4(const PxQuat & data)
|
||||
{
|
||||
return make_float4(data.x, data.y, data.z, data.w);
|
||||
}
|
||||
|
||||
PX_CUDA_CALLABLE static inline void transformToFloat4s(float4 & pos, float4 & rot, const PxTransform & transform)
|
||||
{
|
||||
pos = make_float4(transform.p.x, transform.p.y, transform.p.z, 0.0f);
|
||||
rot = make_float4(transform.q.x, transform.q.y, transform.q.z, transform.q.w);
|
||||
}
|
||||
PX_CUDA_CALLABLE static inline void float4sToTransform(PxTransform & transform, const float4 & pos, const float4 & rot)
|
||||
{
|
||||
transform.p = PxVec3(pos.x, pos.y, pos.z);
|
||||
transform.q = PxQuat(rot.x, rot.y, rot.z, rot.w);
|
||||
}
|
||||
|
||||
#endif
|
||||
208
engine/third_party/physx/source/gpunarrowphase/src/CUDA/bv32Traversal.cuh
vendored
Normal file
208
engine/third_party/physx/source/gpunarrowphase/src/CUDA/bv32Traversal.cuh
vendored
Normal file
@@ -0,0 +1,208 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __BV32_TRAVERSAL_CUH__
|
||||
#define __BV32_TRAVERSAL_CUH__
|
||||
|
||||
#include "copy.cuh"
|
||||
#include "reduction.cuh"
|
||||
|
||||
//Not used but handy when creating a new type of tree traversal
|
||||
struct BV32TraversalTemplate
|
||||
{
|
||||
PX_FORCE_INLINE __device__ BV32TraversalTemplate()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
//Called on leaf nodes to process them
|
||||
//A full warp will call the function. If primitiveIndex is 0xFFFFFFFF, then the thread has no leaf node to check.
|
||||
PX_FORCE_INLINE __device__ void intersectPrimitiveFullWarp(PxU32 primitiveIndex, PxU32 idxInWarp) const
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
//Called to determine if the traversal should continue or stop on a branch node
|
||||
//A full warp will call the function. If hasBox is false, then the thread has no branch node to check.
|
||||
PX_FORCE_INLINE __device__ bool intersectBoxFullWarp(bool hasBox, const PxVec3& min, const PxVec3& max) const
|
||||
{
|
||||
if (hasBox)
|
||||
return true; //Actually test against the axis aligned box defined by min and max
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
// TW: On my laptop RTX 3080 the FPS of some FEM cloth VTs actually drops by about 10% when enabling optimized traversal...
|
||||
#define OPTIMIZE_TRAVERSAL 0
|
||||
|
||||
template<typename T, unsigned int WarpsPerBlock>
|
||||
__device__ static void bv32TreeTraversal(
|
||||
const physx::Gu::BV32DataPacked* bv32PackedNodes,
|
||||
int* sBv32Nodes,
|
||||
T& callback
|
||||
)
|
||||
{
|
||||
#if OPTIMIZE_TRAVERSAL
|
||||
__shared__ Gu::BV32DataPacked shNode[WarpsPerBlock];
|
||||
const unsigned int warpId = threadIdx.y;
|
||||
#endif
|
||||
|
||||
//thread index in warp
|
||||
const unsigned int idxInWarp = threadIdx.x;
|
||||
|
||||
PxU32 currentNodeIndex = 0; // start at the root
|
||||
const Gu::BV32DataPacked* PX_RESTRICT currentNode = &bv32PackedNodes[0];
|
||||
|
||||
PxU32 nbStack = 0; // number of nodes on the stack of to-be-expanded nodes
|
||||
|
||||
while (currentNode)
|
||||
{
|
||||
#if OPTIMIZE_TRAVERSAL
|
||||
//This optimization was found in cudaParticleSystem.cu, line 2373
|
||||
// VR: This variant works ~6 times *faster* on my GTX 1080
|
||||
Gu::BV32DataPacked& node = shNode[warpId];
|
||||
warpCopy((uint*)&node, (uint*)currentNode, sizeof(Gu::BV32DataPacked));
|
||||
__syncwarp();
|
||||
#else
|
||||
// VR: This variant works ~6 times *slower* on my GTX 1080
|
||||
const Gu::BV32DataPacked& node = *currentNode;
|
||||
#endif
|
||||
|
||||
__syncwarp(); //sBv32Nodes (shared memory) is read and written in the same loop - read and write must be separated with a sync
|
||||
|
||||
//check to see whether this is a leaf or not
|
||||
if (currentNodeIndex & 1)
|
||||
{
|
||||
const PxU32 leafOffset = (currentNodeIndex >> 1) & 31;
|
||||
//at max 32 primitives at a leaf node
|
||||
const PxU32 nbPrimitives = node.getNbReferencedPrimitives(leafOffset);
|
||||
const PxU32 primStartIndex = node.getPrimitiveStartIndex(leafOffset);
|
||||
|
||||
// now every node checks a primitive (triangle, tetrahedron etc) against the query geometry
|
||||
// result variables:
|
||||
PxU32 primitiveIdx = 0xffffffff;
|
||||
|
||||
if (idxInWarp < nbPrimitives)
|
||||
{
|
||||
primitiveIdx = idxInWarp + primStartIndex;
|
||||
}
|
||||
callback.intersectPrimitiveFullWarp(primitiveIdx, idxInWarp);
|
||||
}
|
||||
else // not a leave node
|
||||
{
|
||||
const PxU32 nbChildren = node.mNbNodes;
|
||||
|
||||
// there are max 32 children nodes. Every thread checks whether its child's bounding box
|
||||
// would intersect with the query object (which can be a box, a ray or anything)
|
||||
|
||||
PxVec3 min, max;
|
||||
bool hasBox = idxInWarp < nbChildren;
|
||||
if (hasBox)
|
||||
{
|
||||
min = PxLoad3(reinterpret_cast<const float4&>(node.mMin[idxInWarp]));
|
||||
max = PxLoad3(reinterpret_cast<const float4&>(node.mMax[idxInWarp]));
|
||||
}
|
||||
bool intersect = callback.intersectBoxFullWarp(hasBox, min, max);
|
||||
//if (!hasBox)
|
||||
// intersect = false;
|
||||
/*if (workIndex == 0 )
|
||||
{
|
||||
printf("newActiveParticles %x idx %i Finished\n", newActiveParticles, idxInWarp);
|
||||
}*/
|
||||
|
||||
// ML: the first one bit is to decide whether the node is a leaf or not.The subsequence 5 bits is used to
|
||||
//store the row index of mData so that if the node is leaf we can get the the reference triangle. The subsequence 26 bits
|
||||
//is used to store either the next node index if the node isn't a leaf or store the parent node index if the node is leaf node.
|
||||
|
||||
//Masking currentNodeIndex with 0xFFFFFFC0 clears the 6 lowest bits, so we can mask in our idxInWarp<<1 | 1 for leaf masks
|
||||
PxU32 currentIndex = ((currentNodeIndex) & 0xFFFFFFC0) | (idxInWarp << 1) | 1;
|
||||
|
||||
//create node bitmap
|
||||
bool isValidNode = false;
|
||||
if (idxInWarp < nbChildren)
|
||||
{
|
||||
const PxU32 childOffset = node.getChildOffset(idxInWarp);
|
||||
|
||||
isValidNode = intersect;
|
||||
|
||||
if (isValidNode && !node.isLeaf(idxInWarp))
|
||||
{
|
||||
currentIndex = (childOffset) << 6;
|
||||
}
|
||||
}
|
||||
|
||||
PxU32 resultWarp = __ballot_sync(FULL_MASK, isValidNode);
|
||||
/*if (idxInWarp == 0)
|
||||
{
|
||||
printf("ResultWarp = %x\n", resultWarp);
|
||||
}*/
|
||||
|
||||
//PxU32 resultWarp = __ballot_sync(FULL_MASK, intersect);
|
||||
PxU32 offset = warpScanExclusive(resultWarp, idxInWarp);
|
||||
PxU32 validCount = __popc(resultWarp);
|
||||
|
||||
if (isValidNode)
|
||||
{
|
||||
//printf("Push Bv32Nodes[%i] = %i mask = %x\n", nbStack + offset, currentIndex, newActiveParticles);
|
||||
sBv32Nodes[nbStack + offset] = currentIndex;
|
||||
}
|
||||
|
||||
nbStack += validCount;
|
||||
}
|
||||
|
||||
__syncwarp();
|
||||
|
||||
// select the next node from the stack, same for all warps
|
||||
if (nbStack > 0)
|
||||
{
|
||||
currentNodeIndex = sBv32Nodes[--nbStack];
|
||||
currentNode = &bv32PackedNodes[currentNodeIndex >> 6];
|
||||
|
||||
/*if (idxInWarp == 0)
|
||||
{
|
||||
printf("Pop CurrentIndex = %i, currentNode = %i, currentActiveParticles = %x\n", currentNodeIndex, currentNodeIndex >> 6, currentNodeActiveParticles);
|
||||
}*/
|
||||
}
|
||||
|
||||
else
|
||||
currentNode = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ void pushOntoStackFullWarp(bool threadPushesElement, PxU32* atomicCounter, uint4* PX_RESTRICT stackPtr, PxU32 stackSize, const uint4& stackItemToPush)
|
||||
{
|
||||
PxU32 stackIndex = globalScanExclusiveSingleWarp(threadPushesElement, atomicCounter);
|
||||
|
||||
// Each thread stores its contact. If we run out of stacksize, drop contacts
|
||||
if (threadPushesElement && (stackIndex < stackSize))
|
||||
{
|
||||
stackPtr[stackIndex] = stackItemToPush;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
476
engine/third_party/physx/source/gpunarrowphase/src/CUDA/capsuleTriangle.cuh
vendored
Normal file
476
engine/third_party/physx/source/gpunarrowphase/src/CUDA/capsuleTriangle.cuh
vendored
Normal file
@@ -0,0 +1,476 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __CAPSULE_COLLISION_CUH__
|
||||
#define __CAPSULE_COLLISION_CUH__
|
||||
|
||||
#include "distanceSegmentSegment.cuh"
|
||||
|
||||
#define ETD_CONVEX_EDGE_01 (1 << 3) // PT: important value, don't change
|
||||
#define ETD_CONVEX_EDGE_12 (1 << 4) // PT: important value, don't change
|
||||
#define ETD_CONVEX_EDGE_20 (1 << 5) // PT: important value, don't change
|
||||
|
||||
__device__ __forceinline__ bool selectNormal(const PxReal u,
|
||||
const PxReal v, PxU8 triFlags)
|
||||
{
|
||||
const PxReal zero = 1e-6f;
|
||||
const PxReal one = 0.999999f;
|
||||
// Analysis
|
||||
if (zero > u)
|
||||
{
|
||||
if (zero > v)
|
||||
{
|
||||
// Vertex 0
|
||||
if (!(triFlags & (ETD_CONVEX_EDGE_01 | ETD_CONVEX_EDGE_20)))
|
||||
return true;
|
||||
}
|
||||
else if (v > one)
|
||||
{
|
||||
// Vertex 2
|
||||
if (!(triFlags & (ETD_CONVEX_EDGE_12 | ETD_CONVEX_EDGE_20)))
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Edge 0-2
|
||||
if (!(triFlags & ETD_CONVEX_EDGE_20))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else if (u > one)
|
||||
{
|
||||
if (zero > v)
|
||||
{
|
||||
// Vertex 1
|
||||
if (!(triFlags & (ETD_CONVEX_EDGE_01 | ETD_CONVEX_EDGE_12)))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (zero > v)
|
||||
{
|
||||
// Edge 0-1
|
||||
if (!(triFlags & ETD_CONVEX_EDGE_01))
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
const PxReal threshold = 0.9999f;
|
||||
const PxReal temp = u + v;
|
||||
if (temp >= threshold)
|
||||
{
|
||||
// Edge 1-2
|
||||
if (!(triFlags & ETD_CONVEX_EDGE_12))
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Face
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ bool isValidTriangleBarycentricCoord(
|
||||
const PxReal v, const PxReal w)
|
||||
{
|
||||
const PxReal eps = 1e-6f;
|
||||
const PxReal zero(-eps);
|
||||
const PxReal one(1.f + eps);
|
||||
|
||||
return (v >= zero && v <= one) && (w >= zero && w <= one) && ((v + w) <= one);
|
||||
}
|
||||
|
||||
/*
|
||||
t is the barycenteric coordinate of a segment
|
||||
u is the barycenteric coordinate of a triangle
|
||||
v is the barycenteric coordinate of a triangle
|
||||
*/
|
||||
__device__ __forceinline__ PxReal distanceSegmentTriangleSquared(
|
||||
const PxVec3& p, const PxVec3& q,
|
||||
const PxVec3& a, const PxVec3& b,
|
||||
const PxVec3& c, PxReal& t, PxReal& u,
|
||||
PxReal& v)
|
||||
{
|
||||
const PxVec3 pq = q - p;
|
||||
const PxVec3 ab = b - a;
|
||||
const PxVec3 ac = c - a;
|
||||
const PxVec3 bc = c - b;
|
||||
const PxVec3 ap = p - a;
|
||||
const PxVec3 aq = q - a;
|
||||
|
||||
//This is used to calculate the barycentric coordinate
|
||||
const PxReal d00 = ab.dot(ab);
|
||||
const PxReal d01 = ab.dot(ac);
|
||||
const PxReal d11 = ac.dot(ac);
|
||||
const PxReal tDenom = d00 * d11 - d01 * d01;
|
||||
|
||||
const PxReal bdenom = tDenom > 0.f ? 1.f / tDenom : PX_MAX_F32;// FSel(FIsGrtr(tDenom, zero), FRecip(tDenom), zero);
|
||||
|
||||
const PxVec3 n = (ab.cross(ac)).getNormalized();// V3Normalize(V3Cross(ab, ac)); // normalize vector
|
||||
|
||||
//compute the closest point of p and triangle plane abc
|
||||
const PxReal dist3 = ap.dot(n);// V3Dot(ap, n);
|
||||
const PxReal sqDist3 = dist3 * dist3;
|
||||
|
||||
//compute the closest point of q and triangle plane abc
|
||||
const PxReal dist4 = aq.dot(n);// V3Dot(aq, n);
|
||||
const PxReal sqDist4 = dist4 * dist4;// FMul(dist4, dist4);
|
||||
const PxReal dMul = dist3 * dist4;//FMul(dist3, dist4);
|
||||
//const BoolV con = FIsGrtr(zero, dMul);
|
||||
|
||||
// intersect with the plane
|
||||
if (dMul <= 0.f)
|
||||
{
|
||||
//compute the intersect point
|
||||
const PxReal nom = -n.dot(ap); // FNeg(V3Dot(n, ap));
|
||||
const PxReal denom = 1.f / n.dot(pq);// FRecip(V3Dot(n, pq));
|
||||
const PxReal t0 = nom * denom; // FMul(nom, denom);
|
||||
const PxVec3 ip = p + pq * t0;// V3ScaleAdd(pq, t, p);//V3Add(p, V3Scale(pq, t));
|
||||
const PxVec3 v2 = ip - a;// V3Sub(ip, a);
|
||||
const PxReal d20 = v2.dot(ab);// V3Dot(v2, ab);
|
||||
const PxReal d21 = v2.dot(ac);// V3Dot(v2, ac);
|
||||
const PxReal v00 = (d11 * d20 - d01 * d21) * bdenom; //FMul(FSub(FMul(d11, d20), FMul(d01, d21)), bdenom);
|
||||
const PxReal w00 = (d00 * d21 - d01 * d20) * bdenom; //FMul(FSub(FMul(d00, d21), FMul(d01, d20)), bdenom);
|
||||
if (isValidTriangleBarycentricCoord(v00, w00))
|
||||
{
|
||||
t = t0;
|
||||
u = v00;
|
||||
v = w00;
|
||||
return 0.f;
|
||||
}
|
||||
}
|
||||
|
||||
const PxVec3 closestP31 = p - n * dist3;// V3NegScaleSub(n, dist3, p);//V3Sub(p, V3Scale(n, dist3));
|
||||
//const PxVec3 closestP30 = p;
|
||||
|
||||
//Compute the barycentric coordinate for project point of q
|
||||
const PxVec3 pV20 = closestP31 - a;
|
||||
const PxReal pD20 = pV20.dot(ab);
|
||||
const PxReal pD21 = pV20.dot(ac);
|
||||
const PxReal v0 = (d11 * pD20 - d01 * pD21) * bdenom;// FMul(FSub(FMul(d11, pD20), FMul(d01, pD21)), bdenom);
|
||||
const PxReal w0 = (d00 * pD21 - d01 * pD20) * bdenom;// FMul(FSub(FMul(d00, pD21), FMul(d01, pD20)), bdenom);
|
||||
|
||||
//check closestP3 is inside the triangle
|
||||
const bool con0 = isValidTriangleBarycentricCoord(v0, w0);
|
||||
|
||||
const PxVec3 closestP41 = q - n * dist4;// V3NegScaleSub(n, dist4, q);// V3Sub(q, V3Scale(n, dist4));
|
||||
//const PxVec3 closestP40 = q;
|
||||
|
||||
//Compute the barycentric coordinate for project point of q
|
||||
const PxVec3 qV20 = closestP41 - a;
|
||||
const PxReal qD20 = qV20.dot(ab);
|
||||
const PxReal qD21 = qV20.dot(ac);
|
||||
const PxReal v1 = (d11 * qD20 - d01 * qD21) * bdenom;// FMul(FSub(FMul(d11, qD20), FMul(d01, qD21)), bdenom);
|
||||
const PxReal w1 = (d00 * qD21 - d01 * qD20) * bdenom;// FMul(FSub(FMul(d00, qD21), FMul(d01, qD20)), bdenom);
|
||||
|
||||
const bool con1 = isValidTriangleBarycentricCoord(v1, w1);
|
||||
|
||||
if (con0 && con1)
|
||||
{
|
||||
/*
|
||||
both p and q project points are interior point
|
||||
*/
|
||||
if (sqDist4 > sqDist3)
|
||||
{
|
||||
t = 0.f;
|
||||
u = v0;
|
||||
v = w0;
|
||||
|
||||
return sqDist3;
|
||||
}
|
||||
else
|
||||
{
|
||||
t = 1.f;
|
||||
u = v1;
|
||||
v = w1;
|
||||
|
||||
return sqDist4;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
PxReal t00, t01, t02;
|
||||
PxReal t10, t11, t12;
|
||||
|
||||
distanceSegmentSegmentSquared(p, pq, a, ab, t00, t10);
|
||||
distanceSegmentSegmentSquared(p, pq, b, bc, t01, t11);
|
||||
distanceSegmentSegmentSquared(p, pq, a, ac, t02, t12);
|
||||
|
||||
const PxVec3 closestP00 = p + pq * t00;// V3ScaleAdd(pq, t00, p);
|
||||
const PxVec3 closestP01 = a + ab * t10;// V3ScaleAdd(ab, t01, a);
|
||||
PxVec3 tempV = closestP00 - closestP01;
|
||||
const PxReal sqDist0 = tempV.dot(tempV);
|
||||
|
||||
const PxVec3 closestP10 = p + pq * t01;// V3ScaleAdd(pq, t10, p);
|
||||
const PxVec3 closestP11 = b + bc * t11;// V3ScaleAdd(bc, t11, b);
|
||||
tempV = closestP10 - closestP11;
|
||||
const PxReal sqDist1 = tempV.dot(tempV);
|
||||
|
||||
const PxVec3 closestP20 = p + pq * t02;// V3ScaleAdd(pq, t20, p);
|
||||
const PxVec3 closestP21 = a + ac * t12;// V3ScaleAdd(ac, t21, a);
|
||||
tempV = closestP20 - closestP21;
|
||||
const PxReal sqDist2 = tempV.dot(tempV);
|
||||
|
||||
//edge ab
|
||||
const PxReal u10 = t10;
|
||||
const PxReal v10 = 0.f;
|
||||
|
||||
//edge bc
|
||||
const PxReal u11 = 1.f - t11;
|
||||
const PxReal v11 = t11;
|
||||
|
||||
//edge ac
|
||||
const PxReal u12 = 0.f;
|
||||
const PxReal v12 = t12;
|
||||
|
||||
PxReal sqDistPE;
|
||||
PxReal uEdge, vEdge, tSeg;
|
||||
if (sqDist1 > sqDist0 && sqDist2 > sqDist0)
|
||||
{
|
||||
//edge ab
|
||||
sqDistPE = sqDist0;
|
||||
uEdge = u10;
|
||||
vEdge = v10;
|
||||
tSeg = t00;
|
||||
}
|
||||
else if (sqDist2 > sqDist1)
|
||||
{
|
||||
//edge bc
|
||||
sqDistPE = sqDist1;
|
||||
uEdge = u11;
|
||||
vEdge = v11;
|
||||
tSeg = t01;
|
||||
}
|
||||
else
|
||||
{
|
||||
//edge ac
|
||||
sqDistPE = sqDist2;
|
||||
uEdge = u12;
|
||||
vEdge = v12;
|
||||
tSeg = t02;
|
||||
}
|
||||
|
||||
if (con0)
|
||||
{
|
||||
//p's project point is an interior point
|
||||
if (sqDistPE > sqDist3)
|
||||
{
|
||||
t = 0.f;
|
||||
u = v0;
|
||||
v = w0;
|
||||
return sqDist3;
|
||||
}
|
||||
else
|
||||
{
|
||||
t = tSeg;
|
||||
u = uEdge;
|
||||
v = vEdge;
|
||||
return sqDistPE;
|
||||
}
|
||||
}
|
||||
else if (con1)
|
||||
{
|
||||
//q's project point is an interior point
|
||||
|
||||
if (sqDistPE > sqDist4)
|
||||
{
|
||||
t = 1.f;
|
||||
u = v1;
|
||||
v = w1;
|
||||
return sqDist4;
|
||||
}
|
||||
else
|
||||
{
|
||||
t = tSeg;
|
||||
u = uEdge;
|
||||
v = vEdge;
|
||||
return sqDistPE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
t = tSeg;
|
||||
u = uEdge;
|
||||
v = vEdge;
|
||||
return sqDistPE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__device__ __forceinline__ PxU32 generateContacts(
|
||||
const PxVec3& a, const PxVec3& b,
|
||||
const PxVec3& c, const PxVec3& planeNormal,
|
||||
const PxVec3& normal, const PxVec3& p,
|
||||
const PxVec3& q, const PxReal inflatedRadius,
|
||||
PxVec3* outContacts, PxReal* outPen)
|
||||
{
|
||||
const PxVec3 ab = b - a;
|
||||
const PxVec3 ac = c - a;
|
||||
const PxVec3 ap = p - a;
|
||||
const PxVec3 aq = q - a;
|
||||
|
||||
//This is used to calculate the barycentric coordinate
|
||||
const PxReal d00 = ab.dot(ab);// V3Dot(ab, ab);
|
||||
const PxReal d01 = ab.dot(ac);// V3Dot(ab, ac);
|
||||
const PxReal d11 = ac.dot(ac);// V3Dot(ac, ac);
|
||||
const PxReal tdenom = (d00 * d11 - d01 * d01);
|
||||
const PxReal bdenom = (tdenom > 0.f) ? (1.f / tdenom) : PX_MAX_F32;// FRecip(FSub(FMul(d00, d11), FMul(d01, d01)));
|
||||
|
||||
//compute the intersect point of p and triangle plane abc
|
||||
const PxReal inomp = planeNormal.dot(-ap);// V3Dot(planeNormal, V3Neg(ap));
|
||||
const PxReal ideom = planeNormal.dot(normal);// V3Dot(planeNormal, normal);
|
||||
|
||||
const PxReal ipt = ideom > 0.f ? -(inomp / ideom) : PX_MAX_F32;// FSel(FIsGrtr(ideom, FZero()), FDiv(inomp, ideom), FZero());
|
||||
|
||||
const PxVec3 closestP31 = p - normal * ipt;// V3ScaleAdd(normal, ipt, p);
|
||||
//const PxVec3 closestP30 = p;
|
||||
|
||||
//Compute the barycentric coordinate for project point of q
|
||||
const PxVec3 pV20 = closestP31 - a;
|
||||
const PxReal pD20 = pV20.dot(ab);
|
||||
const PxReal pD21 = pV20.dot(ac);
|
||||
const PxReal v0 = (d11 * pD20 - d01 * pD21) * bdenom;// FMul(FSub(FMul(d11, pD20), FMul(d01, pD21)), bdenom);
|
||||
const PxReal w0 = (d00 * pD21 - d01 * pD20) * bdenom;// FMul(FSub(FMul(d00, pD21), FMul(d01, pD20)), bdenom);
|
||||
|
||||
//check closestP3 is inside the triangle
|
||||
const bool con0 = isValidTriangleBarycentricCoord(v0, w0);
|
||||
|
||||
//printf("v0 %f, w0 %f\n", v0, w0);
|
||||
|
||||
PxU32 numContacts = 0;
|
||||
|
||||
if (con0 && (inflatedRadius > ipt))
|
||||
{
|
||||
outContacts[numContacts] = p;
|
||||
outPen[numContacts] = ipt;
|
||||
numContacts++;
|
||||
|
||||
//printf("con0 %i inflatedRadius %f dist %f pen %f\n", con0, inflatedRadius, dist3, -ipt);
|
||||
}
|
||||
|
||||
const PxReal inomq = planeNormal.dot(-aq);
|
||||
|
||||
const PxReal iqt = ideom > 0.f ? -(inomq / ideom) : PX_MAX_F32;// FSel(FIsGrtr(ideom, FZero()), FDiv(inomq, ideom), FZero());
|
||||
|
||||
const PxVec3 closestP41 = q - normal * iqt;// V3ScaleAdd(normal, iqt, q);
|
||||
//const PxVec3 closestP40 = q;
|
||||
|
||||
//Compute the barycentric coordinate for project point of q
|
||||
const PxVec3 qV20 = closestP41 - a;// V3Sub(closestP41, a);
|
||||
const PxReal qD20 = qV20.dot(ab);// V3Dot(qV20, ab);
|
||||
const PxReal qD21 = qV20.dot(ac);// V3Dot(qV20, ac);
|
||||
const PxReal v1 = (d11 * qD20 - d01 * qD21) * bdenom;// FMul(FSub(FMul(d11, qD20), FMul(d01, qD21)), bdenom);
|
||||
const PxReal w1 = (d00 * qD21 - d01 * qD20) * bdenom;// FMul(FSub(FMul(d00, qD21), FMul(d01, qD20)), bdenom);
|
||||
|
||||
const bool con1 = isValidTriangleBarycentricCoord(v1, w1);
|
||||
|
||||
if (con1 && (inflatedRadius > iqt))
|
||||
{
|
||||
outContacts[numContacts] = q;
|
||||
outPen[numContacts] = iqt;
|
||||
numContacts++;
|
||||
|
||||
//printf("con1 %i inflatedRadius %f dist %f pen %f\n", con1, inflatedRadius, dist4, -iqt);
|
||||
}
|
||||
|
||||
return numContacts;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void generateEE(const PxVec3& p, const PxVec3& q, const PxReal sqInflatedRadius,
|
||||
const PxVec3& normal, const PxVec3& a, const PxVec3& b,
|
||||
PxVec3* outContacts, PxReal* outPen, PxU32& numContacts)
|
||||
{
|
||||
const PxVec3 ab = b - a;
|
||||
const PxVec3 n = ab.cross(normal);// V3Cross(ab, normal);
|
||||
const PxReal d = n.dot(a);// V3Dot(n, a);
|
||||
const PxReal np = n.dot(p);// V3Dot(n, p);
|
||||
const PxReal nq = n.dot(q);// V3Dot(n, q);
|
||||
const PxReal signP = np - d;// FSub(np, d);
|
||||
const PxReal signQ = nq - d;
|
||||
const PxReal temp = signP * signQ;
|
||||
if (temp > 0.f) return;//If both points in the same side as the plane, no intersect points
|
||||
|
||||
// if colliding edge (p3,p4) and plane are parallel return no collision
|
||||
const PxVec3 pq = q - p;
|
||||
const PxReal npq = n.dot(pq);// V3Dot(n, pq);
|
||||
if (npq == 0.f) return;
|
||||
|
||||
//calculate the intersect point in the segment pq
|
||||
const PxReal segTValue = (d - np) / npq;//FDiv(FSub(d, np), npq);
|
||||
const PxVec3 localPointA = p + pq * segTValue;// V3ScaleAdd(pq, segTValue, p);
|
||||
|
||||
//printf("edge segTValue %f p (%f, %f, %f) pq(%f, %f, %f)\n", segTValue, p.x, p.y, p.z,
|
||||
// pq.x, pq.y, pq.z);
|
||||
|
||||
//calculate a normal perpendicular to ray localPointA + normal, 2D segment segment intersection
|
||||
const PxVec3 perNormal = normal.cross(pq);// V3Cross(normal, pq);
|
||||
const PxVec3 ap = localPointA - a;// V3Sub(localPointA, a);
|
||||
const PxReal nom = perNormal.dot(ap);// V3Dot(perNormal, ap);
|
||||
const PxReal denom = perNormal.dot(ab);// V3Dot(perNormal, ab);
|
||||
|
||||
//const FloatV tValue = FClamp(FDiv(nom, denom), zero, FOne());
|
||||
const PxReal tValue = nom / denom;// FDiv(nom, denom);
|
||||
//printf("edge segTValue %f tValue %f\n", segTValue, tValue);
|
||||
//const bool con = 1.0 >= tValue && tValue >= 0.f;// BAnd(FIsGrtrOrEq(one, tValue), FIsGrtrOrEq(tValue, zero));
|
||||
if (tValue > 1.f || tValue < 0.f)
|
||||
return;
|
||||
|
||||
//const Vec3V localPointB = V3ScaleAdd(ab, tValue, a); v = V3Sub(localPointA, localPointB); v = V3NegScaleSub(ab, tValue, tap)
|
||||
const PxVec3 v = ap - ab * tValue;// V3NegScaleSub(ab, tValue, ap);
|
||||
const PxReal sqDist = v.dot(v); // V3Dot(v, v);
|
||||
|
||||
//printf("edge sqDist %f sqInflatedRadius %f\n", sqDist, sqInflatedRadius);
|
||||
|
||||
if (sqInflatedRadius > sqDist)
|
||||
{
|
||||
const PxReal signedDist = v.dot(normal);
|
||||
//printf("edge signedDist %f localPointA(%f, %f, %f)\n", signedDist, localPointA.x, localPointA.y, localPointA.z);
|
||||
outContacts[numContacts] = localPointA;
|
||||
outPen[numContacts] = signedDist;
|
||||
numContacts++;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void generateEEContacts(const PxVec3& a, const PxVec3& b,
|
||||
const PxVec3& c, const PxVec3& normal,
|
||||
const PxVec3& p, const PxVec3& q, const PxReal sqInflatedRadius,
|
||||
PxVec3* outContacts, PxReal* outPens, PxU32& numContacts)
|
||||
{
|
||||
if (numContacts < 2)
|
||||
generateEE(p, q, sqInflatedRadius, normal, a, b, outContacts, outPens, numContacts);
|
||||
if (numContacts < 2)
|
||||
generateEE(p, q, sqInflatedRadius, normal, b, c, outContacts, outPens, numContacts);
|
||||
if (numContacts < 2)
|
||||
generateEE(p, q, sqInflatedRadius, normal, a, c, outContacts, outPens, numContacts);
|
||||
}
|
||||
|
||||
#endif
|
||||
292
engine/third_party/physx/source/gpunarrowphase/src/CUDA/compressOutputContacts.cu
vendored
Normal file
292
engine/third_party/physx/source/gpunarrowphase/src/CUDA/compressOutputContacts.cu
vendored
Normal file
@@ -0,0 +1,292 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <assert.h>
|
||||
|
||||
//#include "foundation/PxVec3.h"
|
||||
//#include "foundation/PxTransform.h"
|
||||
#include "PxgContactManager.h"
|
||||
#include "PxsContactManagerState.h"
|
||||
#include "PxContact.h"
|
||||
#include "PxgNpKernelIndices.h"
|
||||
#include "PxgCommonDefines.h"
|
||||
#include "reduction.cuh"
|
||||
#include "stdio.h"
|
||||
#include "PxNodeIndex.h"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
extern "C" __host__ void initNarrowphaseKernels17() {}
|
||||
|
||||
extern "C" __global__ void compressContactStage1(
|
||||
PxsContactManagerOutput* outputData, //input
|
||||
PxU32 numTotalPairs, //input
|
||||
PxU32* gBlockNumPairs //output
|
||||
|
||||
)
|
||||
{
|
||||
const PxU32 warpPerBlock = PxgNarrowPhaseBlockDims::COMPRESS_CONTACT / WARP_SIZE;
|
||||
|
||||
__shared__ PxU32 sWarpAccum[warpPerBlock];
|
||||
__shared__ PxU32 sAccum;
|
||||
|
||||
const PxU32 block_size = PxgNarrowPhaseGridDims::COMPRESS_CONTACT;
|
||||
|
||||
const PxU32 totalBlockRequired = (numTotalPairs + (blockDim.x - 1)) / blockDim.x;
|
||||
|
||||
const PxU32 numIterationPerBlock = (totalBlockRequired + (block_size - 1)) / block_size;
|
||||
|
||||
const PxU32 idx = threadIdx.x;
|
||||
|
||||
const PxU32 threadIndexInWarp = threadIdx.x&(WARP_SIZE - 1);
|
||||
const PxU32 warpIndex = threadIdx.x / WARP_SIZE;
|
||||
|
||||
if (idx == (WARP_SIZE - 1))
|
||||
{
|
||||
sAccum = 0;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
for (PxU32 i = 0; i < numIterationPerBlock; ++i)
|
||||
{
|
||||
const PxU32 workIndex = i * blockDim.x + idx + numIterationPerBlock * blockIdx.x * blockDim.x;
|
||||
|
||||
PxU32 nbContacts = 0;
|
||||
|
||||
if (workIndex < numTotalPairs)
|
||||
{
|
||||
nbContacts = outputData[workIndex].nbContacts;
|
||||
}
|
||||
|
||||
//we don't care about how many contacts in the header. There either have contacts or doesn't
|
||||
//has cotacts
|
||||
const PxU32 contactAccum = __popc(__ballot_sync(FULL_MASK, nbContacts));
|
||||
|
||||
if (threadIndexInWarp == (WARP_SIZE - 1))
|
||||
{
|
||||
sWarpAccum[warpIndex] = contactAccum;
|
||||
//printf("blockIdx %i warpIndex %i contactAccum %i\n", blockIdx.x, warpIndex, contactAccum);
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
unsigned mask_idx = __ballot_sync(FULL_MASK, threadIndexInWarp < warpPerBlock);
|
||||
if (idx < warpPerBlock)
|
||||
{
|
||||
const PxU32 value = sWarpAccum[idx];
|
||||
|
||||
const PxU32 res = warpReduction<AddOpPxU32, PxU32>(mask_idx, value);
|
||||
|
||||
if (idx == (warpPerBlock - 1))
|
||||
{
|
||||
sAccum += res;
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
}
|
||||
|
||||
if (idx == (warpPerBlock - 1))
|
||||
{
|
||||
gBlockNumPairs[blockIdx.x] = sAccum;
|
||||
|
||||
//printf("blockIdx.x %i numPairs %i \n", blockIdx.x, sAccum);
|
||||
}
|
||||
}
|
||||
|
||||
//PxgNarrowPhaseGridDims::COMPRESS_CONTACT is 32, which means we can use one warp to do the run sum for
|
||||
//block
|
||||
extern "C" __global__ void compressContactStage2(
|
||||
const PxgContactManagerInput* inputData, //input
|
||||
const PxsContactManagerOutput* outputData, //input
|
||||
const PxU32 numTotalPairs, //input
|
||||
PxNodeIndex* shapeToRigidRemapTable, //input
|
||||
PxActor** transformCacheIdToActorTable, //input
|
||||
PxU32* gBlockNumPairs, //input
|
||||
PxU8* cpuCompressedPatchesBase, //input
|
||||
PxU8* cpuCompressedContactsBase, //input
|
||||
PxReal* cpuForceBufferBase, //input
|
||||
PxU8* gpuCompressedPatchesBase, //input
|
||||
PxU8* gpuCompressedContactsBase, //input
|
||||
PxReal* gpuForceBufferBase, //input
|
||||
PxU8* gpuFrictionPatchesBase, //input
|
||||
PxU32* numPairs, //input
|
||||
PxGpuContactPair* outputPatchs, //output
|
||||
const PxU32 maxOutputPatches //input
|
||||
|
||||
)
|
||||
{
|
||||
const PxU32 warpPerBlock = PxgNarrowPhaseBlockDims::COMPRESS_CONTACT / WARP_SIZE;
|
||||
const PxU32 block_size = PxgNarrowPhaseGridDims::COMPRESS_CONTACT;
|
||||
|
||||
|
||||
__shared__ PxU32 sWarpAccum[warpPerBlock];
|
||||
__shared__ PxU32 sBlockHistogram[block_size];
|
||||
__shared__ PxU32 sAccum;
|
||||
|
||||
const PxU32 idx = threadIdx.x;
|
||||
|
||||
const PxU32 threadIndexInWarp = threadIdx.x&(WARP_SIZE - 1);
|
||||
const PxU32 warpIndex = threadIdx.x / WARP_SIZE;
|
||||
|
||||
if (idx == (WARP_SIZE - 1))
|
||||
{
|
||||
sAccum = 0;
|
||||
}
|
||||
|
||||
//accumulate num pairs per block and compute exclusive run sum
|
||||
unsigned mask_idx = __ballot_sync(FULL_MASK, threadIndexInWarp < block_size);
|
||||
if (warpIndex == 0 && threadIndexInWarp < block_size)
|
||||
{
|
||||
const PxU32 blockNumPairs = gBlockNumPairs[threadIndexInWarp];
|
||||
|
||||
const PxU32 result = warpScan<AddOpPxU32, PxU32>(mask_idx, blockNumPairs);
|
||||
//store exclusive run sum
|
||||
sBlockHistogram[threadIndexInWarp] = result - blockNumPairs;
|
||||
|
||||
//printf("tInd %i blockNumPairs %i result %i\n", threadIndexInWarp, blockNumPairs, result);
|
||||
|
||||
if (threadIndexInWarp == block_size - 1)
|
||||
{
|
||||
(*numPairs) = result;
|
||||
//printf("blockIdx %i blockNumPairs %i numPairs %i\n", blockIdx.x, blockNumPairs, result);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
__syncthreads();
|
||||
|
||||
const PxU32 totalBlockRequired = (numTotalPairs + (blockDim.x - 1)) / blockDim.x;
|
||||
|
||||
const PxU32 numIterationPerBlock = (totalBlockRequired + (block_size - 1)) / block_size;
|
||||
|
||||
const PxU32 blockStartIndex = sBlockHistogram[blockIdx.x];
|
||||
|
||||
for (PxU32 i = 0; i < numIterationPerBlock; ++i)
|
||||
{
|
||||
const PxU32 workIndex = i * blockDim.x + idx + numIterationPerBlock * blockIdx.x * blockDim.x;
|
||||
|
||||
PxU32 nbContacts = 0;
|
||||
|
||||
|
||||
if (workIndex < numTotalPairs)
|
||||
{
|
||||
nbContacts = outputData[workIndex].nbContacts;
|
||||
}
|
||||
|
||||
const PxU32 threadMask = (1 << threadIndexInWarp) - 1;
|
||||
|
||||
//we don't care about how many contacts in the header. There either have contacts or doesn't
|
||||
//has contacts
|
||||
|
||||
const PxU32 contactMask = __ballot_sync(FULL_MASK, nbContacts);
|
||||
const PxU32 contactAccum = __popc(contactMask);
|
||||
const PxU32 offset = __popc(contactMask & threadMask);
|
||||
if (threadIndexInWarp == (WARP_SIZE - 1))
|
||||
{
|
||||
sWarpAccum[warpIndex] = contactAccum;
|
||||
}
|
||||
|
||||
const PxU32 prevAccum = sAccum;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
unsigned mask_idx = __ballot_sync(FULL_MASK, threadIndexInWarp < warpPerBlock);
|
||||
if (idx < warpPerBlock)
|
||||
{
|
||||
const PxU32 value = sWarpAccum[idx];
|
||||
|
||||
const PxU32 res = warpScan<AddOpPxU32, PxU32>(mask_idx, value);
|
||||
|
||||
sWarpAccum[idx] = res - value;
|
||||
|
||||
if (idx == warpPerBlock - 1)
|
||||
{
|
||||
sAccum += res;
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
//write to output
|
||||
if (nbContacts > 0)
|
||||
{
|
||||
const PxgContactManagerInput& cmInput = inputData[workIndex];
|
||||
const PxsContactManagerOutput& cmOutput = outputData[workIndex];
|
||||
const PxU32 index = offset + prevAccum + sWarpAccum[warpIndex] + blockStartIndex;
|
||||
|
||||
if (index < maxOutputPatches)
|
||||
{
|
||||
PxU32 patchStartIndex = cmOutput.contactPatches - cpuCompressedPatchesBase;
|
||||
PxU32 contactIndex = cmOutput.contactPoints - cpuCompressedContactsBase;
|
||||
PxU32 forceIndex = 0xFFFFFFFF;
|
||||
if (cmOutput.contactForces)
|
||||
forceIndex = cmOutput.contactForces - cpuForceBufferBase;
|
||||
|
||||
PxGpuContactPair & contact = outputPatchs[index];
|
||||
|
||||
const PxU32 transformCacheRef0 = cmInput.transformCacheRef0;
|
||||
const PxU32 transformCacheRef1 = cmInput.transformCacheRef1;
|
||||
contact.transformCacheRef0 = transformCacheRef0;
|
||||
contact.transformCacheRef1 = transformCacheRef1;
|
||||
contact.nodeIndex0 = shapeToRigidRemapTable[transformCacheRef0];
|
||||
contact.nodeIndex1 = shapeToRigidRemapTable[transformCacheRef1];
|
||||
contact.contactForces = gpuForceBufferBase + forceIndex;
|
||||
contact.frictionPatches = gpuFrictionPatchesBase + patchStartIndex / sizeof(PxContactPatch) * sizeof(PxFrictionPatch);
|
||||
contact.contactPatches = gpuCompressedPatchesBase + patchStartIndex;
|
||||
contact.contactPoints = gpuCompressedContactsBase + contactIndex;
|
||||
contact.nbPatches = cmOutput.nbPatches;
|
||||
contact.nbContacts = cmOutput.nbContacts;
|
||||
contact.actor0 = transformCacheIdToActorTable[transformCacheRef0];
|
||||
contact.actor1 = transformCacheIdToActorTable[transformCacheRef1];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// update frictionPatches CPU pointers from contactPatches CPU pointers
|
||||
extern "C" __global__ void updateFrictionPatches(
|
||||
const PxU32 pairCount, //input
|
||||
const PxU8* PX_RESTRICT startContactPatches, //input
|
||||
PxU8* PX_RESTRICT startFrictionPatches, //input (but we need it non-const)
|
||||
PxsContactManagerOutput* PX_RESTRICT outputs //input/output
|
||||
)
|
||||
{
|
||||
const PxU32 threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (threadIndex < pairCount)
|
||||
{
|
||||
PxsContactManagerOutput& output = outputs[threadIndex];
|
||||
output.frictionPatches = startFrictionPatches + (output.contactPatches - startContactPatches) * sizeof(PxFrictionPatch) / sizeof(PxContactPatch);
|
||||
}
|
||||
}
|
||||
182
engine/third_party/physx/source/gpunarrowphase/src/CUDA/contactPatchUtils.cuh
vendored
Normal file
182
engine/third_party/physx/source/gpunarrowphase/src/CUDA/contactPatchUtils.cuh
vendored
Normal file
@@ -0,0 +1,182 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __CONTACT_PATCH_UTILS_CUH__
|
||||
#define __CONTACT_PATCH_UTILS_CUH__
|
||||
|
||||
#include "materialCombiner.cuh"
|
||||
#include "PxsContactManagerState.h"
|
||||
#include "PxgContactManager.h"
|
||||
|
||||
PX_FORCE_INLINE __device__ PxU32 setContactPointAndForcePointers(PxsContactManagerOutput* PX_RESTRICT cmOutputs, PxgPatchAndContactCounters* PX_RESTRICT patchAndContactCounters,
|
||||
PxU8* PX_RESTRICT startContactPoints, PxU8* PX_RESTRICT startContactForces, PxU32 contactBytesLimit, PxU32 forceBytesLimit, PxU32 workIndex, PxU32 nbContacts)
|
||||
{
|
||||
PxU32 contactByteOffset = 0xFFFFFFFF;
|
||||
PxU32 forceAndIndiceByteOffset = 0xFFFFFFFF;
|
||||
|
||||
if (nbContacts)
|
||||
{
|
||||
PxsContactManagerOutput* output = &cmOutputs[workIndex];
|
||||
|
||||
contactByteOffset = atomicAdd(&(patchAndContactCounters->contactsBytes), sizeof(PxContact) * nbContacts);
|
||||
forceAndIndiceByteOffset = atomicAdd(&(patchAndContactCounters->forceAndIndiceBytes), sizeof(PxU32) * nbContacts);
|
||||
|
||||
if ((contactByteOffset + sizeof(PxContact)) > contactBytesLimit)
|
||||
{
|
||||
patchAndContactCounters->setOverflowError(PxgPatchAndContactCounters::CONTACT_BUFFER_OVERFLOW);
|
||||
contactByteOffset = 0xFFFFFFFF; //overflow
|
||||
}
|
||||
|
||||
if ((forceAndIndiceByteOffset + sizeof(PxU32)) > forceBytesLimit)
|
||||
{
|
||||
patchAndContactCounters->setOverflowError(PxgPatchAndContactCounters::FORCE_BUFFER_OVERFLOW);
|
||||
forceAndIndiceByteOffset = 0xFFFFFFFF; //overflow
|
||||
}
|
||||
|
||||
if (contactByteOffset != 0xFFFFFFFF)
|
||||
{
|
||||
output->contactForces = reinterpret_cast<PxReal*>(startContactForces + forceAndIndiceByteOffset);
|
||||
output->contactPoints = startContactPoints + contactByteOffset;
|
||||
}
|
||||
else
|
||||
{
|
||||
output->contactForces = NULL;
|
||||
output->contactPoints = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return contactByteOffset;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxU32 registerContactPatch(PxsContactManagerOutput* PX_RESTRICT cmOutputs, PxgPatchAndContactCounters* PX_RESTRICT patchAndContactCounters,
|
||||
PxU32* PX_RESTRICT touchChangeFlags, PxU32* PX_RESTRICT patchChangeFlags, PxU8* PX_RESTRICT startContactPatches, PxU32 patchBytesLimit, PxU32 workIndex, PxU32 nbContacts)
|
||||
{
|
||||
PxU32 allflags = reinterpret_cast<PxU32*>(&((cmOutputs + workIndex)->allflagsStart))[0];
|
||||
|
||||
PxU8 oldStatusFlags = u16Low(u32High(allflags));
|
||||
PxU8 statusFlags = oldStatusFlags;
|
||||
|
||||
statusFlags &= (~PxsContactManagerStatusFlag::eTOUCH_KNOWN);
|
||||
|
||||
if (nbContacts)
|
||||
statusFlags |= PxsContactManagerStatusFlag::eHAS_TOUCH;
|
||||
else
|
||||
statusFlags |= PxsContactManagerStatusFlag::eHAS_NO_TOUCH;
|
||||
|
||||
PxU8 prevPatches = u16High(u32Low(allflags)); //Get out the current number of patches to store as the previous frame's number of patches
|
||||
bool previouslyHadTouch = oldStatusFlags & PxsContactManagerStatusFlag::eHAS_TOUCH;
|
||||
bool prevTouchKnown = oldStatusFlags & PxsContactManagerStatusFlag::eTOUCH_KNOWN;
|
||||
|
||||
PxU8 numPatches = (nbContacts > 0) ? 1 : 0;
|
||||
|
||||
bool currentlyHasTouch = nbContacts > 0;
|
||||
|
||||
const bool change = (previouslyHadTouch ^ currentlyHasTouch) || (!prevTouchKnown);
|
||||
touchChangeFlags[workIndex] = change;
|
||||
patchChangeFlags[workIndex] = (prevPatches != numPatches);
|
||||
|
||||
reinterpret_cast<PxU32*>(&((cmOutputs + workIndex)->allflagsStart))[0] = merge(merge(prevPatches, statusFlags),
|
||||
merge(numPatches, 0));
|
||||
|
||||
cmOutputs[workIndex].nbContacts = nbContacts;
|
||||
|
||||
//fill in patch information
|
||||
PxU32 patchIndex = 0xFFFFFFFF;
|
||||
if (nbContacts)
|
||||
{
|
||||
patchIndex = atomicAdd(&(patchAndContactCounters->patchesBytes), sizeof(PxContactPatch));
|
||||
|
||||
if ((patchIndex + sizeof(PxContactPatch)) > patchBytesLimit)
|
||||
{
|
||||
//patchAndContactCounters->setOverflowError(PxgPatchAndContactCounters::PATCH_BUFFER_OVERFLOW);
|
||||
//patchIndex = 0xFFFFFFFF; //overflow
|
||||
|
||||
patchAndContactCounters->setOverflowError(PxgPatchAndContactCounters::PATCH_BUFFER_OVERFLOW);
|
||||
patchIndex = 0xFFFFFFFF; //overflow
|
||||
|
||||
statusFlags &= (~PxsContactManagerStatusFlag::eTOUCH_KNOWN);
|
||||
statusFlags |= PxsContactManagerStatusFlag::eHAS_NO_TOUCH;
|
||||
|
||||
reinterpret_cast<PxU32*>(&((cmOutputs + workIndex)->allflagsStart))[0] = merge(merge(prevPatches, statusFlags), 0);
|
||||
cmOutputs[workIndex].nbContacts = 0;
|
||||
|
||||
touchChangeFlags[workIndex] = previouslyHadTouch;
|
||||
patchChangeFlags[workIndex] = prevPatches != 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
(cmOutputs + workIndex)->contactPatches = startContactPatches + patchIndex;
|
||||
}
|
||||
}
|
||||
return patchIndex;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ void insertIntoPatchStream(const PxsMaterialData* PX_RESTRICT materials, PxU8* PX_RESTRICT patchStream,
|
||||
const PxgShape& shape0, const PxgShape& shape1, PxU32 patchIndex, const PxVec3& normal, PxU32 nbContacts)
|
||||
{
|
||||
//write to patch to stream
|
||||
if (patchIndex != 0xFFFFFFFF)
|
||||
{
|
||||
//fill in material
|
||||
PxReal restitution, dynamicFriction, staticFriction, damping;
|
||||
PxU32 materialFlags;
|
||||
|
||||
PxU16 materialIndex0 = shape0.materialIndex;
|
||||
PxU16 materialIndex1 = shape1.materialIndex;
|
||||
|
||||
combineMaterials(materials, materialIndex0,
|
||||
materialIndex1,
|
||||
materialFlags,
|
||||
staticFriction,
|
||||
dynamicFriction,
|
||||
restitution,
|
||||
damping
|
||||
);
|
||||
|
||||
PxContactPatch* patch = ((PxContactPatch*)(patchStream + patchIndex));
|
||||
patch->normal = normal;
|
||||
patch->nbContacts = nbContacts;
|
||||
patch->startContactIndex = 0;
|
||||
//KS - we could probably compress this further into the header but the complexity might not be worth it
|
||||
patch->staticFriction = staticFriction;
|
||||
patch->dynamicFriction = dynamicFriction;
|
||||
patch->damping = damping;
|
||||
patch->restitution = restitution;
|
||||
patch->materialIndex0 = materialIndex0;
|
||||
patch->materialIndex1 = materialIndex1;
|
||||
assert(materialFlags <= PX_MAX_U8);
|
||||
patch->materialFlags = PxU8(materialFlags);
|
||||
patch->internalFlags = 0;
|
||||
patch->mMassModification.linear0 = 1.0f;
|
||||
patch->mMassModification.linear1 = 1.0f;
|
||||
patch->mMassModification.angular0 = 1.0f;
|
||||
patch->mMassModification.angular1 = 1.0f;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
1279
engine/third_party/physx/source/gpunarrowphase/src/CUDA/convexCoreCollision.cu
vendored
Normal file
1279
engine/third_party/physx/source/gpunarrowphase/src/CUDA/convexCoreCollision.cu
vendored
Normal file
File diff suppressed because it is too large
Load Diff
394
engine/third_party/physx/source/gpunarrowphase/src/CUDA/convexHFMidphase.cu
vendored
Normal file
394
engine/third_party/physx/source/gpunarrowphase/src/CUDA/convexHFMidphase.cu
vendored
Normal file
@@ -0,0 +1,394 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "foundation/PxBounds3.h"
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxTransform.h"
|
||||
|
||||
#include "geometry/PxGeometry.h"
|
||||
#include "geometry/PxHeightFieldSample.h"
|
||||
|
||||
#include "PxgContactManager.h"
|
||||
#include "PxgNpKernelIndices.h"
|
||||
#include "PxgPersistentContactManifold.h"
|
||||
#include "PxgSimulationCoreDesc.h"
|
||||
|
||||
#include "PxsContactManagerState.h"
|
||||
#include "PxsTransformCache.h"
|
||||
|
||||
#include "convexNpCommon.h"
|
||||
#include "cudaNpCommon.h"
|
||||
|
||||
#include "PxgCommonDefines.h"
|
||||
#include "dataReadWriteHelper.cuh"
|
||||
#include "heightfieldUtil.cuh"
|
||||
#include "manifold.cuh"
|
||||
#include "midphaseAllocate.cuh"
|
||||
|
||||
#include <vector_types.h>
|
||||
|
||||
using namespace physx;
|
||||
|
||||
extern "C" __host__ void initNarrowphaseKernels0() {}
|
||||
|
||||
PX_ALIGN_PREFIX(16)
|
||||
struct HeigtFieldDataScratch
|
||||
{
|
||||
PxTransform convexToHeightfieldNoScale;
|
||||
PxU32 convexShape_materialIndex;
|
||||
PxU32 heightfieldShape_materialIndex;
|
||||
}PX_ALIGN_SUFFIX(16);
|
||||
|
||||
|
||||
template<unsigned int WarpsPerBlock>
|
||||
__device__ static inline void heightfieldMidphaseCore(
|
||||
PxU32 numContactManagers,
|
||||
const PxReal toleranceLength,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxBounds3* PX_RESTRICT bounds,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
|
||||
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair,
|
||||
PxgPersistentContactMultiManifold* PX_RESTRICT multiManifolds,
|
||||
PxsContactManagerOutput * PX_RESTRICT cmOutputs,
|
||||
uint4* PX_RESTRICT stackBasePtr,
|
||||
PxU32* PX_RESTRICT nbPairsFound,
|
||||
PxU32* PX_RESTRICT midphasePairsNum,
|
||||
PxU32* PX_RESTRICT midphasePairsNumPadded,
|
||||
|
||||
HeigtFieldDataScratch* s_warpScratch,
|
||||
const PxU32 maxPairs
|
||||
)
|
||||
{
|
||||
//thread index in warp
|
||||
const unsigned int idxInWarp = threadIdx.x;
|
||||
//wrap index
|
||||
const unsigned int warpIdx = threadIdx.y;
|
||||
|
||||
unsigned int cmIdx = warpIdx + blockIdx.x * blockDim.y;
|
||||
|
||||
//this is number of contact managers
|
||||
PxU32 nbPairsPerCM = 0;
|
||||
|
||||
if (cmIdx < numContactManagers)
|
||||
{
|
||||
PxgContactManagerInput npWorkItem;
|
||||
PxgContactManagerInput_ReadWarp(npWorkItem, cmInputs, cmIdx);
|
||||
|
||||
PxsCachedTransform transformCached, heightfieldTransformCached;
|
||||
PxsCachedTransform_ReadWarp(transformCached, transformCache + npWorkItem.transformCacheRef0);
|
||||
PxsCachedTransform_ReadWarp(heightfieldTransformCached, transformCache + npWorkItem.transformCacheRef1);
|
||||
|
||||
const PxTransform shapeToHeightfieldNoScale = heightfieldTransformCached.transform.transformInv(transformCached.transform);
|
||||
|
||||
//read convex/sphere shape
|
||||
PxgShape shape;
|
||||
PxgShape_ReadWarp(shape, gpuShapes + npWorkItem.shapeRef0);
|
||||
|
||||
|
||||
PxReal ratio, minMargin, breakingThresholdRatio;
|
||||
if (shape.type == PxGeometryType::eSPHERE)
|
||||
{
|
||||
minMargin = shape.scale.scale.x; //sphere radius
|
||||
ratio = 0.02f;
|
||||
breakingThresholdRatio = 0.05f;
|
||||
}
|
||||
else if (shape.type == PxGeometryType::eCAPSULE)
|
||||
{
|
||||
minMargin = shape.scale.scale.y; //capsule radius
|
||||
ratio = 0.02;
|
||||
breakingThresholdRatio = 0.05f;
|
||||
}
|
||||
else
|
||||
{
|
||||
PxU8* hullPtr = reinterpret_cast<PxU8*>(shape.hullOrMeshPtr);
|
||||
|
||||
const float4 extents4_f = *reinterpret_cast<float4*>(hullPtr + sizeof(float4) * 2);
|
||||
minMargin = calculatePCMConvexMargin(extents4_f, shape.scale.scale, toleranceLength);
|
||||
ratio = 0.2f;
|
||||
breakingThresholdRatio = 0.8f;
|
||||
}
|
||||
|
||||
/*PxU8* hullPtr = reinterpret_cast<PxU8*>(convexShape.hullOrMeshPtr);
|
||||
|
||||
const float4 extents4_f = *reinterpret_cast<float4*>(hullPtr + sizeof(float4) * 2);
|
||||
const PxReal minMargin = calculatePCMConvexMargin(extents4_f, convexShape.scale.scale, toleranceLength);*/
|
||||
|
||||
bool lostContacts = false;
|
||||
|
||||
const bool invalidate = invalidateManifold(shapeToHeightfieldNoScale, multiManifolds[cmIdx], minMargin, ratio);
|
||||
|
||||
if (!invalidate)
|
||||
{
|
||||
const PxReal projectBreakingThreshold = minMargin * breakingThresholdRatio;
|
||||
|
||||
lostContacts = refreshManifolds(
|
||||
shapeToHeightfieldNoScale,
|
||||
projectBreakingThreshold,
|
||||
multiManifolds + cmIdx
|
||||
);
|
||||
}
|
||||
|
||||
bool fullContactGen = invalidate || lostContacts;
|
||||
|
||||
//read heightfield shape
|
||||
PxgShape heightFieldShape;
|
||||
PxgShape_ReadWarp(heightFieldShape, gpuShapes + npWorkItem.shapeRef1);
|
||||
|
||||
if (idxInWarp == 0)
|
||||
{
|
||||
s_warpScratch->convexToHeightfieldNoScale = shapeToHeightfieldNoScale;
|
||||
s_warpScratch->convexShape_materialIndex = shape.materialIndex;
|
||||
s_warpScratch->heightfieldShape_materialIndex = heightFieldShape.materialIndex;
|
||||
}
|
||||
|
||||
__syncwarp();
|
||||
|
||||
//if invalidate is true, generate full contacts
|
||||
if (fullContactGen)
|
||||
{
|
||||
|
||||
PxU32* heightfieldData = reinterpret_cast<PxU32*>(heightFieldShape.hullOrMeshPtr);
|
||||
const PxU32 nbRows = heightfieldData[0];
|
||||
const PxU32 nbCols = heightfieldData[1];
|
||||
PxHeightFieldSample* samples = reinterpret_cast<PxHeightFieldSample*>(&heightfieldData[2]);
|
||||
|
||||
|
||||
const PxReal oneOverHeightScale = 1.f / heightFieldShape.scale.scale.y;
|
||||
const PxReal oneOverRowScale = 1.f / PxAbs(heightFieldShape.scale.scale.x);
|
||||
const PxReal oneOverlColScale = 1.f / PxAbs(heightFieldShape.scale.scale.z);
|
||||
|
||||
PxBounds3 worldBound;
|
||||
PxBounds3_ReadWarp(worldBound, bounds + npWorkItem.transformCacheRef0);
|
||||
|
||||
//bound is in world space, we need to transform the bound to the local space of height field
|
||||
PxBounds3 localBound = PxBounds3::transformFast(heightfieldTransformCached.transform.getInverse(), worldBound);
|
||||
const PxReal contactDist = contactDistance[npWorkItem.transformCacheRef0] + contactDistance[npWorkItem.transformCacheRef1];
|
||||
localBound.fattenFast(contactDist);
|
||||
|
||||
localBound.minimum.x *= oneOverRowScale;
|
||||
localBound.minimum.y *= oneOverHeightScale;
|
||||
localBound.minimum.z *= oneOverlColScale;
|
||||
|
||||
localBound.maximum.x *= oneOverRowScale;
|
||||
localBound.maximum.y *= oneOverHeightScale;
|
||||
localBound.maximum.z *= oneOverlColScale;
|
||||
|
||||
//row scale
|
||||
if (heightFieldShape.scale.scale.x < 0.f)
|
||||
{
|
||||
//swap min and max row scale
|
||||
const PxReal temp = localBound.minimum.x;
|
||||
localBound.minimum.x = localBound.maximum.x;
|
||||
localBound.maximum.x = temp;
|
||||
}
|
||||
|
||||
//col scale
|
||||
if (heightFieldShape.scale.scale.z < 0.f)
|
||||
{
|
||||
PxReal swap = localBound.minimum.z;
|
||||
localBound.minimum.z = localBound.maximum.z;
|
||||
localBound.maximum.z = swap;
|
||||
}
|
||||
|
||||
bool boundsDontOverlap = false;
|
||||
|
||||
// this tests if the complete shape is outside of the bounds of the HF in the XZ plane.
|
||||
if ((localBound.minimum.x > nbRows - 1) || (localBound.minimum.z > nbCols - 1)
|
||||
|| (localBound.maximum.x < 0) || (localBound.maximum.z < 0))
|
||||
{
|
||||
boundsDontOverlap = true;
|
||||
}
|
||||
|
||||
if (!boundsDontOverlap)
|
||||
{
|
||||
PxU32 minRow = getMinRow(localBound.minimum.x, nbRows);
|
||||
PxU32 maxRow = getMaxRow(localBound.maximum.x, nbRows);
|
||||
PxU32 minColumn = getMinColumn(localBound.minimum.z, nbCols);
|
||||
PxU32 maxColumn = getMaxColumn(localBound.maximum.z, nbCols);
|
||||
|
||||
bool noTriangles = false;
|
||||
|
||||
// AD: This test whether we have any triangles at all.
|
||||
// Given the clamping above I can only see this happening if we have a
|
||||
// flat shape that lies exactly on one of the grid lines for sampling.
|
||||
|
||||
// Also, the 2x looks unnecessary here if my basic math isn't failing me.
|
||||
// This is the same code as CPU and has been there since 2009, probably before,
|
||||
// so I'm not going to change it now.
|
||||
if ((2 * (maxColumn - minColumn) * (maxRow - minRow)) == 0)
|
||||
{
|
||||
noTriangles = true;
|
||||
}
|
||||
|
||||
if (!noTriangles)
|
||||
{
|
||||
|
||||
const PxReal miny = localBound.minimum.y;
|
||||
const PxReal maxy = localBound.maximum.y;
|
||||
|
||||
const PxU32 columnSpan = maxColumn - minColumn;
|
||||
|
||||
//we have two materials corresponding to one vertexIndex, so each thread will deal with one of the materials
|
||||
const PxU32 totalNumProcessed = (maxRow - minRow) * columnSpan * 2;
|
||||
for (PxU32 i = 0; i < totalNumProcessed; i += WARP_SIZE)
|
||||
{
|
||||
bool result = false;
|
||||
PxU32 triangleIndex = 0xFFffFFff;
|
||||
|
||||
const PxU32 workIndex = idxInWarp + i;
|
||||
|
||||
if (workIndex < totalNumProcessed)
|
||||
{
|
||||
const PxU32 index = workIndex / 2;
|
||||
const PxU32 vertexIndex = (minRow + index / columnSpan) * nbCols + (minColumn + index % columnSpan);
|
||||
assert(isValidVertex(vertexIndex, nbRows, nbCols));
|
||||
PxReal h0 = getHeight(vertexIndex, samples);
|
||||
PxReal h1 = getHeight(vertexIndex + 1, samples);
|
||||
PxReal h2 = getHeight(vertexIndex + nbCols, samples);
|
||||
PxReal h3 = getHeight(vertexIndex + nbCols + 1, samples);
|
||||
const bool con0 = maxy < h0 && maxy < h1 && maxy < h2 && maxy < h3;
|
||||
const bool con1 = miny > h0 && miny > h1 && miny > h2 && miny > h3;
|
||||
|
||||
if (!(con0 || con1))
|
||||
{
|
||||
const PxHeightFieldSample& sample = getSample(vertexIndex, samples);
|
||||
|
||||
const bool isMaterial1 = (workIndex & 1) ? 1 : 0;
|
||||
PxU32 material = isMaterial1 ? sample.materialIndex1 : sample.materialIndex0;
|
||||
if (material != PxHeightFieldMaterial::eHOLE)
|
||||
{
|
||||
triangleIndex = isMaterial1 ? ((vertexIndex << 1) + 1) : (vertexIndex << 1);
|
||||
result = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PxU32 resultWarp = __ballot_sync(FULL_MASK, result);
|
||||
PxU32 offset = warpScanExclusive(resultWarp, idxInWarp);
|
||||
PxU32 validCount = __popc(resultWarp);
|
||||
|
||||
// Allocate only amount of memory, needed for single warp-wide write
|
||||
PxU32 prevNbPairs = 0xFFffFFff;
|
||||
if (idxInWarp == 0 && validCount > 0)
|
||||
{
|
||||
prevNbPairs = atomicAdd(nbPairsFound, validCount);
|
||||
}
|
||||
|
||||
prevNbPairs = __shfl_sync(FULL_MASK, prevNbPairs, 0);
|
||||
|
||||
if (result && (prevNbPairs + offset) < maxPairs)
|
||||
{
|
||||
stackBasePtr[prevNbPairs + offset] = make_uint4(cmIdx, triangleIndex, nbPairsPerCM + offset, npWorkItem.shapeRef1);
|
||||
}
|
||||
|
||||
if ((validCount > 0) && ((validCount + prevNbPairs) >= maxPairs))
|
||||
{
|
||||
validCount = PxMax(maxPairs, prevNbPairs) - prevNbPairs;
|
||||
}
|
||||
|
||||
assert(((validCount + prevNbPairs) <= maxPairs) || (validCount == 0));
|
||||
|
||||
nbPairsPerCM += validCount;
|
||||
}
|
||||
} // noTriangles
|
||||
} // boundsDontOverlap
|
||||
}
|
||||
|
||||
PxU32 prevIntermArraysOffset = 0xFFffFFff;
|
||||
PxU32 prevIntermArraysPaddedOffset = 0xFFffFFff;
|
||||
if (idxInWarp == 0 && nbPairsPerCM > 0)
|
||||
{
|
||||
prevIntermArraysOffset = atomicAdd(midphasePairsNum, nbPairsPerCM);
|
||||
prevIntermArraysPaddedOffset = atomicAdd(midphasePairsNumPadded, ((nbPairsPerCM + 3)&(~3)) * 2); // AD: we need 2x space for the radix sort.
|
||||
}
|
||||
|
||||
prevIntermArraysOffset = __shfl_sync(FULL_MASK, prevIntermArraysOffset, 0);
|
||||
prevIntermArraysPaddedOffset = __shfl_sync(FULL_MASK, prevIntermArraysPaddedOffset, 0);
|
||||
|
||||
ConvexMeshPair pairInfo;
|
||||
pairInfo.aToB = s_warpScratch->convexToHeightfieldNoScale;
|
||||
pairInfo.cmIndex = cmIdx;
|
||||
pairInfo.startIndex = prevIntermArraysOffset;
|
||||
pairInfo.count = fullContactGen ? nbPairsPerCM : CONVEX_TRIMESH_CACHED;
|
||||
pairInfo.roundedStartIndex = prevIntermArraysPaddedOffset;
|
||||
pairInfo.materialIndices = make_uint2(s_warpScratch->convexShape_materialIndex, s_warpScratch->heightfieldShape_materialIndex);
|
||||
|
||||
ConvexMeshPair_WriteWarp(cvxTrimeshPair + cmIdx, pairInfo);
|
||||
|
||||
assert(*midphasePairsNum <= maxPairs);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void convexHeightFieldMidphase(
|
||||
PxU32 numContactManagers,
|
||||
const PxReal toleranceLength,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxBounds3* PX_RESTRICT bounds,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
|
||||
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair,
|
||||
PxgPersistentContactMultiManifold* PX_RESTRICT multiManifolds,
|
||||
PxsContactManagerOutput* PX_RESTRICT cmOutputs,
|
||||
PxU8* PX_RESTRICT stackPtr,
|
||||
PxU32* PX_RESTRICT stackOffset,
|
||||
PxU32* PX_RESTRICT midphasePairsNum,
|
||||
PxU32* PX_RESTRICT midphasePairsNumPadded,
|
||||
const PxU32 stackSizeBytes
|
||||
)
|
||||
{
|
||||
__shared__ PxU32 scratchMem[MIDPHASE_WARPS_PER_BLOCK][WARP_SIZE * 2];
|
||||
|
||||
const PxU32 maxPairs = calculateMaxPairs(stackSizeBytes, numContactManagers);
|
||||
|
||||
heightfieldMidphaseCore<MIDPHASE_WARPS_PER_BLOCK>(
|
||||
|
||||
numContactManagers,
|
||||
toleranceLength,
|
||||
cmInputs,
|
||||
transformCache,
|
||||
bounds,
|
||||
contactDistance,
|
||||
gpuShapes,
|
||||
|
||||
cvxTrimeshPair,
|
||||
multiManifolds,
|
||||
cmOutputs,
|
||||
reinterpret_cast<uint4*>(stackPtr),
|
||||
stackOffset,
|
||||
midphasePairsNum,
|
||||
midphasePairsNumPadded,
|
||||
|
||||
(HeigtFieldDataScratch*)scratchMem[threadIdx.y],
|
||||
maxPairs
|
||||
);
|
||||
}
|
||||
997
engine/third_party/physx/source/gpunarrowphase/src/CUDA/convexHeightfield.cu
vendored
Normal file
997
engine/third_party/physx/source/gpunarrowphase/src/CUDA/convexHeightfield.cu
vendored
Normal file
@@ -0,0 +1,997 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxTransform.h"
|
||||
#include "geometry/PxMeshScale.h"
|
||||
|
||||
#include "convexNpCommon.h"
|
||||
#include "cudaNpCommon.h"
|
||||
|
||||
#include "PxgPersistentContactManifold.h"
|
||||
#include "PxgContactManager.h"
|
||||
#include "PxgConvexConvexShape.h"
|
||||
#include "PxgNpKernelIndices.h"
|
||||
#include "PxsTransformCache.h"
|
||||
|
||||
#include "PxgCommonDefines.h"
|
||||
#include "reduction.cuh"
|
||||
#include "nputils.cuh"
|
||||
#include "midphaseAllocate.cuh"
|
||||
#include "dataReadWriteHelper.cuh"
|
||||
#include "convexTriangle.cuh"
|
||||
#include "heightfieldUtil.cuh"
|
||||
#include "sphereTriangle.cuh"
|
||||
#include "sphereCollision.cuh"
|
||||
#include "capsuleTriangle.cuh"
|
||||
#include "geometry/PxHeightFieldFlag.h"
|
||||
#include "geometry/PxGeometry.h"
|
||||
|
||||
|
||||
using namespace physx;
|
||||
|
||||
extern "C" __host__ void initNarrowphaseKernels1() {}
|
||||
|
||||
////ML: for heightfield, the maximum numbers of adjacent verts will be 6
|
||||
//#define HEIGHTFIELD_MAX_ADJACENCY_VERT_INDICE 6
|
||||
//#define HEIGHTFIELD_MAX_PROCESS_ADJACENCY_VERT_INDICE 9
|
||||
#define HEIGHTFIELD_MAX_VERTS 18 //allow duplication
|
||||
//
|
||||
//#define HEIGHTFIELD_DEBUG 1
|
||||
|
||||
struct PxgTriangle
|
||||
{
|
||||
|
||||
public:
|
||||
__device__ PX_FORCE_INLINE PxVec3 denormalizedNormal() const
|
||||
{
|
||||
return (verts[1] - verts[0]).cross(verts[2] - verts[0]);
|
||||
}
|
||||
|
||||
PxVec3 verts[3];
|
||||
};
|
||||
|
||||
__device__ static void getTriangle(PxgTriangle& triLoc, PxU32* adjacencyIndices, const PxU32 triangleIndex, const PxMeshScale& scale, const PxU32 rows, const PxU32 columns, const PxHeightFieldSample* samples)
|
||||
{
|
||||
|
||||
const PxReal rowScale = scale.scale.x;
|
||||
const PxReal heightScale = scale.scale.y;
|
||||
const PxReal columnScale = scale.scale.z;
|
||||
PxVec3 handedness(1.0f); // Vector to invert normal coordinates according to the heightfield scales
|
||||
bool wrongHanded = false;
|
||||
if (columnScale < 0.f)
|
||||
{
|
||||
wrongHanded = !wrongHanded;
|
||||
handedness.z = -1.0f;
|
||||
}
|
||||
if (rowScale < 0.f)
|
||||
{
|
||||
wrongHanded = !wrongHanded;
|
||||
handedness.x = -1.0f;
|
||||
}
|
||||
|
||||
PxU32 tVertexIndices[3];
|
||||
getTriangleVertexIndices(triangleIndex, tVertexIndices[0], tVertexIndices[1 + wrongHanded], tVertexIndices[2 - wrongHanded], columns, samples);
|
||||
|
||||
|
||||
if (adjacencyIndices)
|
||||
{
|
||||
getTriangleAdjacencyIndices(triangleIndex, adjacencyIndices[wrongHanded ? 2 : 0], adjacencyIndices[1], adjacencyIndices[wrongHanded ? 0 : 2], rows, columns, samples);
|
||||
}
|
||||
|
||||
for (PxU32 vi = 0; vi < 3; vi++)
|
||||
{
|
||||
const PxVec3 vertex = getVertex(tVertexIndices[vi], columns, samples);
|
||||
triLoc.verts[vi] = hf2shapep(vertex, rowScale, heightScale, columnScale);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
__device__ void buildTriangleInformations(const PxU32 triangleIdx, PxgShape& heightfieldShape, ConvexMeshScratch* s_scratch,
|
||||
PxTransform& trimeshToConvexTransform)
|
||||
{
|
||||
const PxU32 * heightfieldGeomPtr = reinterpret_cast<const PxU32 *>(heightfieldShape.hullOrMeshPtr);
|
||||
|
||||
const PxU32 rows = *heightfieldGeomPtr++;
|
||||
const PxU32 columns = *heightfieldGeomPtr++;
|
||||
const PxHeightFieldSample* samples = reinterpret_cast<const PxHeightFieldSample*>(heightfieldGeomPtr);
|
||||
|
||||
|
||||
//PxU32 vertexIndices[3];
|
||||
PxU32 adjacencyIndices[3];
|
||||
PxgTriangle triLocal; // height field local space triangle
|
||||
getTriangle(triLocal, adjacencyIndices, triangleIdx, heightfieldShape.scale, rows, columns, samples);
|
||||
|
||||
|
||||
const PxVec3 triNormal = triLocal.denormalizedNormal();
|
||||
const PxVec3 triangleLocNormal = trimeshToConvexTransform.rotate(triNormal).getNormalized();
|
||||
|
||||
if (threadIdx.x < 1)
|
||||
{
|
||||
s_scratch->triangleLocNormal = triangleLocNormal;
|
||||
s_scratch->trimeshToConvexTransform = trimeshToConvexTransform;
|
||||
s_scratch->trimeshVerts = NULL;
|
||||
//s_scratch->trimeshVerts = trimeshVerts;
|
||||
}
|
||||
|
||||
|
||||
if (threadIdx.x < 3)
|
||||
{
|
||||
//assign adjacent triangle indices to the scratch memory
|
||||
reinterpret_cast<PxU32*>(&s_scratch->triAdjTrisIdx)[threadIdx.x] = adjacencyIndices[threadIdx.x];
|
||||
|
||||
|
||||
//assign triangle vertes in the local space of convex hull to the scratch memory
|
||||
s_scratch->triLocVerts[threadIdx.x] = trimeshToConvexTransform.transform(triLocal.verts[threadIdx.x]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__device__ void convexHeightfieldNarrowphaseCore(
|
||||
PxU32 globalWarpIndex,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
|
||||
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair, // per CM
|
||||
ConvexTriNormalAndIndex** PX_RESTRICT cvxTriNIPtr, // per cvx-tri
|
||||
ConvexTriContacts** PX_RESTRICT cvxTriContactsPtr, // per cvx-tri
|
||||
PxReal** PX_RESTRICT cvxTriMaxDepthPtr, // per cvx-tri
|
||||
ConvexTriIntermediateData** PX_RESTRICT cvxTriIntermPtr, // per cvx-tri
|
||||
PxU32** PX_RESTRICT orderedCvxTriIntermPtr, // per cvx-tri
|
||||
PxU32** PX_RESTRICT cvxTriSecondPassPairPtr, // per cvx-tri
|
||||
|
||||
const uint4 * PX_RESTRICT pairsGPU,
|
||||
PxU32* PX_RESTRICT nbPairsGlobal,
|
||||
PxU32* PX_RESTRICT nbSecondPassPairs,
|
||||
ConvexTriContact* tempConvexTriContacts,
|
||||
const PxU32 tempContactSizeBytes,
|
||||
PxU32* pTempContactIndex
|
||||
)
|
||||
{
|
||||
const PxU32 warpsPerBlock = NP_TRIMESH_WARPS_PER_BLOCK;
|
||||
const PxU32 wrapIndex = threadIdx.y;
|
||||
|
||||
//for heightfield, each vertex in a triangle has maximum 6 adacent vertexes, which invoke 12 vertexes in total
|
||||
//__shared__ PxU32 sTrimeshAdjVerts[warpsPerBlock * HEIGHTFIELD_MAX_VERTS];
|
||||
__shared__ volatile PxU32 sharedMem[warpsPerBlock][ WARP_SIZE * 16];
|
||||
volatile PxU32* s_WarpSharedMemory = sharedMem[wrapIndex];
|
||||
|
||||
ConvexMeshScratch* s_scratch = (ConvexMeshScratch*)s_WarpSharedMemory;
|
||||
|
||||
// Pair
|
||||
uint4 curPair = pairsGPU[globalWarpIndex];
|
||||
|
||||
PxU32 cmIdx = curPair.x;
|
||||
PxU32 triangleIdx = curPair.y;
|
||||
PxU32 testOffset = curPair.z;
|
||||
|
||||
PxgContactManagerInput npWorkItem;
|
||||
PxgContactManagerInput_ReadWarp(npWorkItem, cmInputs, cmIdx);
|
||||
|
||||
PxsCachedTransform convexTransformCached, trimeshTransformCached;
|
||||
|
||||
PxsCachedTransform_ReadWarp(convexTransformCached, transformCache + npWorkItem.transformCacheRef0);
|
||||
PxsCachedTransform_ReadWarp(trimeshTransformCached, transformCache + npWorkItem.transformCacheRef1);
|
||||
|
||||
PxTransform trimeshToConvexTransform(convexTransformCached.transform.transformInv(trimeshTransformCached.transform));
|
||||
|
||||
PxgShape convexShape;
|
||||
PxgShape_ReadWarp(convexShape, gpuShapes + npWorkItem.shapeRef0);
|
||||
|
||||
PxgShape heightfieldShape;
|
||||
PxgShape_ReadWarp(heightfieldShape, gpuShapes + npWorkItem.shapeRef1);
|
||||
|
||||
/*const PxU32 startIndex = wrapIndex * HEIGHTFIELD_MAX_VERTS;
|
||||
PxU32* startTrimeshAdjVerts = &sTrimeshAdjVerts[startIndex];
|
||||
float4* startTrimeshVerts = &sTrimeshVerts[startIndex];
|
||||
buildAdjacencyInformations(triangleIdx, heightfieldShape, startTrimeshAdjVerts, startTrimeshVerts, s_scratch, trimeshToConvexTransform);*/
|
||||
buildTriangleInformations(triangleIdx, heightfieldShape, s_scratch, trimeshToConvexTransform);
|
||||
|
||||
ConvexMeshPair& pair = cvxTrimeshPair[cmIdx];
|
||||
PxU32 convexTriPairOffset = pair.startIndex + testOffset;
|
||||
PxU32 convexTriPairOffsetPadded = pair.roundedStartIndex + testOffset;
|
||||
// Geometries : Convex
|
||||
|
||||
if (threadIdx.x < 7)
|
||||
{
|
||||
reinterpret_cast<PxU32*>(&s_scratch->convexScale)[threadIdx.x] = reinterpret_cast<PxU32*>(&convexShape.scale)[threadIdx.x];
|
||||
}
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
// Shapes
|
||||
//s_scratch->contactDist = convexShape.contactOffset + heightfieldShape.contactOffset;
|
||||
s_scratch->contactDist = contactDistance[npWorkItem.transformCacheRef0] + contactDistance[npWorkItem.transformCacheRef1];
|
||||
s_scratch->trimeshToConvexTransform = trimeshToConvexTransform;
|
||||
|
||||
const PxU8* convexPtrA = reinterpret_cast<const PxU8 *>(convexShape.hullOrMeshPtr);
|
||||
s_scratch->convexPtrA = convexPtrA;
|
||||
|
||||
const float4 hull0_centerOfMass_f4 = *reinterpret_cast<const float4 *>(convexPtrA);
|
||||
const PxVec3 hull0_centerOfMass(hull0_centerOfMass_f4.x, hull0_centerOfMass_f4.y, hull0_centerOfMass_f4.z);
|
||||
// Transform CoM into shape space
|
||||
PxVec3 shapeSpaceCenterOfMass0 = vertex2Shape(hull0_centerOfMass, convexShape.scale.scale, convexShape.scale.rotation);
|
||||
|
||||
s_scratch->convexCenterOfMass = shapeSpaceCenterOfMass0;
|
||||
|
||||
convexPtrA += sizeof(float4);
|
||||
|
||||
const uint4 tmp = *((uint4*)convexPtrA);
|
||||
const PxU32 polyData0_NbEdgesNbHullVerticesNbPolygons = tmp.x;
|
||||
s_scratch->nbEdgesNbHullVerticesNbPolygons = polyData0_NbEdgesNbHullVerticesNbPolygons;
|
||||
|
||||
convexPtrA += sizeof(uint4);
|
||||
|
||||
/*const float4 polyData0_Extents = *((float4*)(convexPtrA));
|
||||
PxVec3 convex_extents = toVec3(polyData0_Extents);
|
||||
s_scratch->extents = convex_extents;*/
|
||||
}
|
||||
|
||||
__syncwarp();
|
||||
|
||||
//height field don't have cpu remap triangle index
|
||||
convexTriangleContactGen(
|
||||
s_WarpSharedMemory, s_scratch, convexTriPairOffset, convexTriPairOffsetPadded, triangleIdx, triangleIdx, globalWarpIndex,
|
||||
cvxTriNIPtr, cvxTriContactsPtr,cvxTriMaxDepthPtr, cvxTriIntermPtr, orderedCvxTriIntermPtr, cvxTriSecondPassPairPtr, nbSecondPassPairs,
|
||||
tempConvexTriContacts, tempContactSizeBytes / sizeof(ConvexTriContact), pTempContactIndex);
|
||||
}
|
||||
|
||||
extern "C" __global__
|
||||
__launch_bounds__(NP_TRIMESH_WARPS_PER_BLOCK * WARP_SIZE, 32 / NP_TRIMESH_WARPS_PER_BLOCK)
|
||||
void convexHeightfieldNarrowphase(
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair, // per CM
|
||||
|
||||
ConvexTriNormalAndIndex** PX_RESTRICT cvxTriNIPtr, // per cvx-tri
|
||||
ConvexTriContacts** PX_RESTRICT cvxTriContactsPtr, // per cvx-tri
|
||||
PxReal** PX_RESTRICT cvxTriMaxDepthPtr, // per cvx-tri
|
||||
ConvexTriIntermediateData** PX_RESTRICT cvxTriIntermPtr, // per cvx-tri
|
||||
PxU32** PX_RESTRICT orderedCvxTriIntermPtr, // per cvx-tri
|
||||
PxU32** PX_RESTRICT cvxTriSecondPassPairsPtr, // per cvx-tri
|
||||
|
||||
PxU8* PX_RESTRICT stackPtr,
|
||||
PxU32* PX_RESTRICT nbPairsGlobal,
|
||||
PxU32* PX_RESTRICT nbPaddedPairsGlobal,
|
||||
PxU32* PX_RESTRICT nbSecondPassPairs,
|
||||
const PxU32 stackSizeBytes,
|
||||
ConvexTriContact* tempConvexTriContacts,
|
||||
PxU32* pTempContactIndex,
|
||||
PxU32* maxTempMemRequirement,
|
||||
PxU32* midphasePairsNeeded,
|
||||
const PxU32 nbContactManagers
|
||||
)
|
||||
{
|
||||
__shared__ ConvexTriNormalAndIndex* sCvxTriNIPtr;
|
||||
__shared__ ConvexTriContacts* sCvxTriContactsPtr;
|
||||
__shared__ PxReal* sCvxTriMaxDepthPtr;
|
||||
__shared__ ConvexTriIntermediateData* sCvxTriIntermPtr;
|
||||
__shared__ PxU32* sOrderedCvxTriIntermPtr;
|
||||
__shared__ uint4* sPairsGPU;
|
||||
__shared__ PxU32* sCvxTriSecondPassPairsPtr;
|
||||
|
||||
const PxU32 nbPairs = *nbPairsGlobal;
|
||||
const PxU32 nbPaddedPairs = *nbPaddedPairsGlobal;
|
||||
|
||||
//each block assign the corresponding ptr from the stack memory to sCvxTriNIPtr, sCvxTriContactPtr and sCvxTriMaxDepthPtr
|
||||
if (threadIdx.x == 0 && threadIdx.y == 0)
|
||||
{
|
||||
midphaseAllocate(&sCvxTriNIPtr, &sCvxTriContactsPtr, &sCvxTriMaxDepthPtr, &sCvxTriIntermPtr, &sOrderedCvxTriIntermPtr, &sCvxTriSecondPassPairsPtr, &sPairsGPU, stackPtr, nbPairs, nbPaddedPairs);
|
||||
if (blockIdx.x == 0)
|
||||
{
|
||||
atomicMax(maxTempMemRequirement, calculateConvexMeshPairMemRequirement() * (*midphasePairsNeeded) + calculateAdditionalPadding(nbContactManagers));
|
||||
}
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.x == 0 && threadIdx.y == 0 && blockIdx.x == 0)
|
||||
{
|
||||
*cvxTriNIPtr = sCvxTriNIPtr;
|
||||
*cvxTriContactsPtr = sCvxTriContactsPtr;
|
||||
*cvxTriMaxDepthPtr = sCvxTriMaxDepthPtr;
|
||||
*cvxTriIntermPtr = sCvxTriIntermPtr;
|
||||
*orderedCvxTriIntermPtr = sOrderedCvxTriIntermPtr;
|
||||
*cvxTriSecondPassPairsPtr = sCvxTriSecondPassPairsPtr;
|
||||
}
|
||||
|
||||
for (PxU32 globalWarpIndex = blockIdx.x * blockDim.y + threadIdx.y; globalWarpIndex < nbPairs; globalWarpIndex += gridDim.x * blockDim.y)
|
||||
{
|
||||
convexHeightfieldNarrowphaseCore(
|
||||
globalWarpIndex,
|
||||
|
||||
cmInputs,
|
||||
transformCache,
|
||||
contactDistance,
|
||||
gpuShapes,
|
||||
cvxTrimeshPair,
|
||||
|
||||
&sCvxTriNIPtr,
|
||||
&sCvxTriContactsPtr,
|
||||
&sCvxTriMaxDepthPtr,
|
||||
&sCvxTriIntermPtr,
|
||||
&sOrderedCvxTriIntermPtr,
|
||||
&sCvxTriSecondPassPairsPtr,
|
||||
|
||||
sPairsGPU,
|
||||
nbPairsGlobal,
|
||||
nbSecondPassPairs,
|
||||
tempConvexTriContacts,
|
||||
stackSizeBytes,
|
||||
pTempContactIndex
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
__device__ PxU32 sphereHeightfieldNarrowphaseCore(
|
||||
PxU32 globalThreadIndex,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
|
||||
SphereMeshPair* PX_RESTRICT sphereTrimeshPair, // per CM
|
||||
SphereTriNormalAndIndex** PX_RESTRICT sphereTriNIPtr, // per sphere-tri
|
||||
SphereTriContacts** PX_RESTRICT sphereTriContactsPtr, // per sphere-tri
|
||||
PxReal** PX_RESTRICT sphereTriMaxDepthPtr, // per sphere-tri
|
||||
SphereTriIntermediateData** PX_RESTRICT sphereTriIntermPtr, // per sphere-tri
|
||||
PxU32** PX_RESTRICT orderedSphereTriIntermPtr, // per sphere-tri
|
||||
PxU32** PX_RESTRICT sphereTriSecondPassPairPtr, // per sphere-tri
|
||||
|
||||
const uint4& curPair,
|
||||
const PxgContactManagerInput& npWorkItem,
|
||||
PxgShape& sphereShape,
|
||||
PxU32* PX_RESTRICT nbPairsGlobal,
|
||||
PxU32* PX_RESTRICT nbSecondPassPairs,
|
||||
PxVec3& outContact,
|
||||
PxReal& outSep,
|
||||
PxU32& outMask
|
||||
)
|
||||
{
|
||||
// Pair
|
||||
PxU32 cmIdx = curPair.x;
|
||||
PxU32 triangleIdx = curPair.y;
|
||||
PxU32 testOffset = curPair.z;
|
||||
|
||||
PxsCachedTransform sphereTransformCached = transformCache[npWorkItem.transformCacheRef0];
|
||||
PxsCachedTransform trimeshTransformCached = transformCache[npWorkItem.transformCacheRef1];
|
||||
|
||||
//PxTransform trimeshToConvexTransform(sphereTransformCached.transform.transformInv(trimeshTransformCached.transform));
|
||||
|
||||
PxgShape heightfieldShape = gpuShapes[npWorkItem.shapeRef1];
|
||||
//const PxU32 startIndex = threadIdx.x * HEIGHTFIELD_MAX_VERTS;
|
||||
//float4* startTrimeshVerts = &sTrimeshVerts[startIndex];
|
||||
|
||||
//buildTriangleInformations
|
||||
const PxU32 * heightfieldGeomPtr = reinterpret_cast<const PxU32 *>(heightfieldShape.hullOrMeshPtr);
|
||||
|
||||
const PxU32 rows = *heightfieldGeomPtr++;
|
||||
const PxU32 columns = *heightfieldGeomPtr++;
|
||||
const PxHeightFieldSample* samples = reinterpret_cast<const PxHeightFieldSample*>(heightfieldGeomPtr);
|
||||
|
||||
//PxU32 vertexIndices[3];
|
||||
uint4 triAdjTriIndices;
|
||||
//PxgTriangle triLocal; // height field local space triangle
|
||||
PxVec3 triLocV0, triLocV1, triLocV2;
|
||||
getTriangle(triLocV0, triLocV1, triLocV2, &triAdjTriIndices, triangleIdx, heightfieldShape.scale, rows, columns, samples);
|
||||
|
||||
//printf("threadIdx.x %i triangleIdx %i\n", threadIdx.x, triangleIdx);
|
||||
|
||||
|
||||
const PxVec3 triNormal = ((triLocV1 - triLocV0).cross(triLocV2 - triLocV0)).getNormalized();
|
||||
//const PxVec3 triNormal = triLocal.denormalizedNormal().getNormalized();
|
||||
//const PxVec3 triangleLocNormal = trimeshToConvexTransform.rotate(triNormal).getNormalized();
|
||||
|
||||
|
||||
ConvexMeshPair& pair = sphereTrimeshPair[cmIdx];
|
||||
PxU32 sphereTriPairOffset = pair.startIndex + testOffset;
|
||||
PxU32 sphereTriPairOffsetPadded = pair.roundedStartIndex + testOffset;
|
||||
|
||||
|
||||
// Shapes
|
||||
const PxReal contactDist = contactDistance[npWorkItem.transformCacheRef0] + contactDistance[npWorkItem.transformCacheRef1];
|
||||
const PxReal sphereRadius = sphereShape.scale.scale.x;
|
||||
|
||||
//Geometries : Sphere
|
||||
const PxVec3 sphereCenterInTriMesh = trimeshTransformCached.transform.transformInv(sphereTransformCached.transform.p);
|
||||
|
||||
|
||||
const PxReal inflatedRadius = sphereRadius + contactDist;
|
||||
|
||||
//const PxReal d = triLocal.verts[0].dot(triNormal);
|
||||
//const PxReal dist0 = sphereCenterInTriMesh.dot(triNormal) - d;
|
||||
const PxReal dist = (sphereCenterInTriMesh - triLocV0).dot(triNormal);
|
||||
|
||||
PxReal separation = PX_MAX_F32;
|
||||
PxU32 nbOutputContacts = 0;
|
||||
PxVec3 patchNormal(0.f);
|
||||
PxVec3 closestP;
|
||||
//mSphereCenter will be in the local space of the triangle mesh
|
||||
bool generateContact = false;
|
||||
bool faceContact = false;
|
||||
bool delayContacts = false;
|
||||
PxU32 mask;
|
||||
PxReal sqDist = distancePointTriangleSquared(sphereCenterInTriMesh, triLocV0, triLocV1, triLocV2, triAdjTriIndices, closestP, generateContact, faceContact, &mask);
|
||||
|
||||
|
||||
|
||||
|
||||
const PxReal sqInflatedSphereRadius = inflatedRadius * inflatedRadius;
|
||||
//sphere overlap with triangles
|
||||
if (dist >= 0.f && sqInflatedSphereRadius > sqDist && generateContact)
|
||||
{
|
||||
//printf("triangleIdx %i sqDist %f\n", triangleIdx, sqDist);
|
||||
//printf("triangleIdx %i triNormal(%f, %f, %f)\n", triangleIdx, triNormal.x, triNormal.y, triNormal.z);
|
||||
//printf("triangleIdx % i closestP(%f, %f, %f)\n", triangleIdx, closestP.x, closestP.y, closestP.z);
|
||||
|
||||
|
||||
//printf("triangleIdx %i v0(%f, %f, %f)\n", triangleIdx, triLocal.verts[0].x, triLocal.verts[0].y, triLocal.verts[0].z);
|
||||
//printf("triangleIdx %i v1(%f, %f, %f)\n", triangleIdx, triLocal.verts[1].x, triLocal.verts[1].y, triLocal.verts[1].z);
|
||||
//printf("triangleIdx %i v2(%f, %f, %f)\n", triangleIdx, triLocal.verts[2].x, triLocal.verts[2].y, triLocal.verts[2].z);
|
||||
|
||||
//printf("remapCpuTriangleIdx %i sqInflatedSphereRadius %f sqDist %f faceContact %i\n", remapCpuTriangleIdx, sqInflatedSphereRadius, sqDist, PxU32(faceContact));
|
||||
|
||||
switch (mask)
|
||||
{
|
||||
case ConvexTriIntermediateData::eE01:
|
||||
{
|
||||
PxVec3 triLocV0Adj, triLocV1Adj, triLocV2Adj;
|
||||
getTriangle(triLocV0Adj, triLocV1Adj, triLocV2Adj, NULL, triAdjTriIndices.x, heightfieldShape.scale, rows, columns, samples);
|
||||
const PxVec3 triNormalAdj = ((triLocV1Adj - triLocV0Adj).cross(triLocV2Adj - triLocV0Adj)).getNormalized();
|
||||
|
||||
generateContact = ((triNormalAdj.dot(triLocV2 - triLocV0Adj)) < 0.f);
|
||||
|
||||
break;
|
||||
}
|
||||
case ConvexTriIntermediateData::eE12:
|
||||
{
|
||||
PxVec3 triLocV0Adj, triLocV1Adj, triLocV2Adj;
|
||||
getTriangle(triLocV0Adj, triLocV1Adj, triLocV2Adj, NULL, triAdjTriIndices.y, heightfieldShape.scale, rows, columns, samples);
|
||||
const PxVec3 triNormalAdj = ((triLocV1Adj - triLocV0Adj).cross(triLocV2Adj - triLocV0Adj)).getNormalized();
|
||||
|
||||
/*printf("%x: triNormalAdj = (%f, %f, %f), triNormal = (%f, %f, %f)\n", triangleIdx, triNormalAdj.x, triNormalAdj.y, triNormalAdj.z,
|
||||
triNormal.x, triNormal.y, triNormal.z);*/
|
||||
|
||||
generateContact = (triNormalAdj.dot(triLocV0 - triLocV0Adj) < 0.f);
|
||||
|
||||
break;
|
||||
}
|
||||
case ConvexTriIntermediateData::eE02:
|
||||
{
|
||||
PxVec3 triLocV0Adj, triLocV1Adj, triLocV2Adj;
|
||||
getTriangle(triLocV0Adj, triLocV1Adj, triLocV2Adj, NULL, triAdjTriIndices.z, heightfieldShape.scale, rows, columns, samples);
|
||||
const PxVec3 triNormalAdj = ((triLocV1Adj - triLocV0Adj).cross(triLocV2Adj - triLocV0Adj)).getNormalized();
|
||||
|
||||
generateContact = ((triNormalAdj.dot(triLocV1 - triLocV0Adj)) < 0.f);
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (generateContact)
|
||||
{
|
||||
|
||||
patchNormal = triNormal;
|
||||
if (!faceContact)
|
||||
patchNormal = (sphereCenterInTriMesh - closestP).getNormalized();
|
||||
|
||||
//const PxU32 value = PxU32(faceContact);
|
||||
//printf("triangleIdx %i faceContact %i\n", triangleIdx, value);
|
||||
//printf("remapCpuTriangleIdx %i closestP(%f, %f, %f)\n", remapCpuTriangleIdx, closestP.x, closestP.y, closestP.z);
|
||||
|
||||
const PxReal cosTheta = patchNormal.dot(triNormal);
|
||||
const PxReal tolerance = 0.996f;//around 5 degree
|
||||
|
||||
//two normal's projection less than 5 degree, generate contacts
|
||||
delayContacts = cosTheta <= tolerance;
|
||||
if (delayContacts)
|
||||
{
|
||||
//delay contacts
|
||||
/*PxU32* PX_RESTRICT sphereTriSecondPairPass = *sphereTriSecondPassPairPtr;
|
||||
const PxU32 startIndex = atomicAdd(nbSecondPassPairs, 1);
|
||||
sphereTriSecondPairPass[startIndex] = globalThreadIndex;*/
|
||||
outMask = globalThreadIndex | mask;
|
||||
//printf("remapCpuTriangleIdx %i gpuTriangleIdx %i delayContacts\n", remapCpuTriangleIdx, triangleIdx);
|
||||
}
|
||||
|
||||
//printf("triangleIdx % i delayContacts %i\n", triangleIdx, PxU32(delayContacts));
|
||||
|
||||
nbOutputContacts = 1;
|
||||
separation = PxSqrt(sqDist);
|
||||
|
||||
outContact = PxVec3(0.f);
|
||||
outSep = separation;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
PxReal* PX_RESTRICT sphereTriMaxDepth = *sphereTriMaxDepthPtr;
|
||||
sphereTriMaxDepth[sphereTriPairOffset] = separation;
|
||||
|
||||
SphereTriIntermediateData* PX_RESTRICT sphereTriInterm = *sphereTriIntermPtr;
|
||||
PxU32* PX_RESTRICT orderedSphereTriInterm = *orderedSphereTriIntermPtr;
|
||||
sphereTriInterm[sphereTriPairOffset].gpuTriIndex = triangleIdx;
|
||||
orderedSphereTriInterm[sphereTriPairOffsetPadded] = (nbOutputContacts && !delayContacts) ? (0x80000000 | triangleIdx) : triangleIdx;
|
||||
|
||||
SphereTriNormalAndIndex* PX_RESTRICT sphereTriNI = *sphereTriNIPtr;
|
||||
|
||||
PxU32 delayContactMask = delayContacts ? SphereTriNormalAndIndex::DeferredContactMask : 0;
|
||||
|
||||
//rotate the normal into A space
|
||||
PxVec3 worldNormal = trimeshTransformCached.transform.rotate(-patchNormal);
|
||||
sphereTriNI[sphereTriPairOffset].normal = sphereTransformCached.transform.rotateInv(worldNormal);
|
||||
sphereTriNI[sphereTriPairOffset].index = delayContactMask + (nbOutputContacts << SphereTriNormalAndIndex::NbContactsShift) + triangleIdx;
|
||||
|
||||
//printf("triangleIdx %i sphereTriPairOffset %i\n", triangleIdx, sphereTriPairOffset);
|
||||
|
||||
return nbOutputContacts;
|
||||
|
||||
}
|
||||
|
||||
|
||||
__device__ PxU8 computeTriangleFlags(
|
||||
const PxgTriangle& currentTriangle,
|
||||
const PxVec3& triNormal,
|
||||
const PxU32* adjTriIndices,
|
||||
const PxU16 flags,
|
||||
const PxMeshScale& scale,
|
||||
const PxU32 rows,
|
||||
const PxU32 columns,
|
||||
const PxHeightFieldSample* samples
|
||||
)
|
||||
{
|
||||
const bool boundaryCollisions = !(flags & PxHeightFieldFlag::eNO_BOUNDARY_EDGES);
|
||||
PxU8 triFlags = 0;
|
||||
const PxU8 nextInd[] = { 2,0,1 };
|
||||
//fill in triangle flag
|
||||
for (PxU32 a = 0; a < 3; ++a)
|
||||
{
|
||||
if (adjTriIndices[a] != 0xFFFFFFFF)
|
||||
{
|
||||
PxgTriangle adjTri;
|
||||
|
||||
getTriangle(adjTri, NULL, adjTriIndices[a], scale, rows, columns, samples);
|
||||
|
||||
PxVec3 adjNormal = adjTri.denormalizedNormal();
|
||||
PxU32 otherIndex = nextInd[a];
|
||||
PxReal projD = adjNormal.dot(currentTriangle.verts[otherIndex] - adjTri.verts[0]);
|
||||
if (projD < 0.f)
|
||||
{
|
||||
adjNormal.normalize();
|
||||
|
||||
PxReal proj = adjNormal.dot(triNormal);
|
||||
|
||||
if (proj < 0.997f)
|
||||
{
|
||||
triFlags |= (1 << (a + 3));
|
||||
}
|
||||
}
|
||||
else if (boundaryCollisions)
|
||||
{
|
||||
triFlags |= (1 << (a + 3)); //Mark boundary edge active
|
||||
}
|
||||
else
|
||||
triFlags |= (1 << a); //Mark as silhouette edge
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return triFlags;
|
||||
|
||||
}
|
||||
|
||||
|
||||
__device__ PxU32 capsuleHeightfieldNarrowphaseCore(
|
||||
PxU32 globalThreadIndex,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
|
||||
SphereMeshPair* PX_RESTRICT sphereTrimeshPair, // per CM
|
||||
SphereTriNormalAndIndex** PX_RESTRICT sphereTriNIPtr, // per sphere-tri
|
||||
SphereTriContacts** PX_RESTRICT sphereTriContactsPtr, // per sphere-tri
|
||||
PxReal** PX_RESTRICT sphereTriMaxDepthPtr, // per sphere-tri
|
||||
SphereTriIntermediateData** PX_RESTRICT sphereTriIntermPtr, // per sphere-tri
|
||||
PxU32** PX_RESTRICT orderedSphereTriIntermPtr, // per sphere-tri
|
||||
PxU32** PX_RESTRICT sphereTriSecondPassPairPtr, // per sphere-tri
|
||||
|
||||
const uint4& curPair,
|
||||
const PxgContactManagerInput& npWorkItem,
|
||||
PxgShape& capsuleShape,
|
||||
PxVec3* contacts,
|
||||
PxReal* separations
|
||||
)
|
||||
{
|
||||
// Pair
|
||||
const PxU32 cmIdx = curPair.x;
|
||||
PxU32 triangleIdx = curPair.y;
|
||||
PxU32 testOffset = curPair.z;
|
||||
|
||||
PxsCachedTransform capsuleTransformCached = transformCache[npWorkItem.transformCacheRef0];
|
||||
PxsCachedTransform trimeshTransformCached = transformCache[npWorkItem.transformCacheRef1];
|
||||
|
||||
PxgShape heightfieldShape = gpuShapes[npWorkItem.shapeRef1];
|
||||
|
||||
//buildTriangleInformations
|
||||
const PxU32 * heightfieldGeomPtr = reinterpret_cast<const PxU32 *>(heightfieldShape.hullOrMeshPtr);
|
||||
|
||||
const PxU32 rows = *heightfieldGeomPtr++;
|
||||
const PxU32 columns = *heightfieldGeomPtr++;
|
||||
const PxHeightFieldSample* samples = reinterpret_cast<const PxHeightFieldSample*>(heightfieldGeomPtr);
|
||||
|
||||
heightfieldGeomPtr += sizeof(PxU32) * rows * columns;
|
||||
|
||||
const PxU16 flags = reinterpret_cast<PxU16&>(heightfieldGeomPtr);
|
||||
|
||||
|
||||
PxU32 adjTriIndices[3];
|
||||
PxgTriangle currentTriangle; // height field local space triangle
|
||||
getTriangle(currentTriangle, adjTriIndices, triangleIdx, heightfieldShape.scale, rows, columns, samples);
|
||||
|
||||
|
||||
ConvexMeshPair& pair = sphereTrimeshPair[cmIdx];
|
||||
PxU32 sphereTriPairOffset = pair.startIndex + testOffset;
|
||||
PxU32 sphereTriPairOffsetPadded = pair.roundedStartIndex + testOffset;
|
||||
|
||||
// Shapes
|
||||
const PxReal contactDist = contactDistance[npWorkItem.transformCacheRef0] + contactDistance[npWorkItem.transformCacheRef1];
|
||||
const PxReal capsuleRadius = capsuleShape.scale.scale.y;
|
||||
const PxReal capsuleHalfHeight = capsuleShape.scale.scale.x;
|
||||
|
||||
|
||||
const PxTransform heighfieldToCapsule = capsuleTransformCached.transform.transformInv(trimeshTransformCached.transform);
|
||||
|
||||
const PxVec3 tmp = capsuleTransformCached.transform.q.getBasisVector0() * capsuleHalfHeight;
|
||||
const PxVec3 capsuleCenterInMesh = trimeshTransformCached.transform.transformInv(capsuleTransformCached.transform.p);
|
||||
const PxVec3 capsuleDirInMesh = trimeshTransformCached.transform.rotateInv(tmp);
|
||||
//Geometries : Capsule in height field local space
|
||||
const PxVec3 p0 = capsuleCenterInMesh + tmp;
|
||||
const PxVec3 p1 = capsuleCenterInMesh - tmp;
|
||||
|
||||
|
||||
const PxReal inflatedRadius = capsuleRadius + contactDist;
|
||||
|
||||
const PxVec3 triNormal = currentTriangle.denormalizedNormal().getNormalized();// ((triLocV1 - triLocV0).cross(triLocV2 - triLocV0)).getNormalized();
|
||||
|
||||
//const PxReal d = triLocal.verts[0].dot(triNormal);
|
||||
//const PxReal dist0 = sphereCenterInTriMesh.dot(triNormal) - d;
|
||||
const PxReal dist = (capsuleCenterInMesh - currentTriangle.verts[0]).dot(triNormal);
|
||||
|
||||
PxU32 nbOutputContacts = 0;
|
||||
PxVec3 patchNormal(0.f);
|
||||
PxVec3 closestP;
|
||||
//mSphereCenter will be in the local space of the triangle mesh
|
||||
|
||||
PxReal t, u, v;
|
||||
PxReal sqDist = distanceSegmentTriangleSquared(p0, p1, currentTriangle.verts[0], currentTriangle.verts[1], currentTriangle.verts[2], t, u, v);
|
||||
|
||||
|
||||
const PxReal sqInflatedRadius = inflatedRadius * inflatedRadius;
|
||||
PxReal separation = PX_MAX_F32;
|
||||
bool deferred = false;
|
||||
//capsule overlap with triangles
|
||||
if (dist >= 0.f && sqInflatedRadius > sqDist)
|
||||
{
|
||||
|
||||
const PxU8 triFlags = computeTriangleFlags(currentTriangle, triNormal, adjTriIndices,
|
||||
flags, heightfieldShape.scale, rows, columns, samples);
|
||||
//printf("triangleIdx %i sqDist %f\n", triangleIdx, sqDist);
|
||||
//printf("triangleIdx %i triNormal(%f, %f, %f)\n", triangleIdx, triNormal.x, triNormal.y, triNormal.z);
|
||||
//printf("triangleIdx % i closestP(%f, %f, %f)\n", triangleIdx, closestP.x, closestP.y, closestP.z);
|
||||
|
||||
|
||||
//printf("triangleIdx %i v0(%f, %f, %f)\n", triangleIdx, triLocal.verts[0].x, triLocal.verts[0].y, triLocal.verts[0].z);
|
||||
//printf("triangleIdx %i v1(%f, %f, %f)\n", triangleIdx, triLocal.verts[1].x, triLocal.verts[1].y, triLocal.verts[1].z);
|
||||
//printf("triangleIdx %i v2(%f, %f, %f)\n", triangleIdx, triLocal.verts[2].x, triLocal.verts[2].y, triLocal.verts[2].z);
|
||||
|
||||
//printf("remapCpuTriangleIdx %i sqInflatedSphereRadius %f sqDist %f faceContact %i\n", remapCpuTriangleIdx, sqInflatedSphereRadius, sqDist, PxU32(faceContact));
|
||||
//patchNormal = triNormal;
|
||||
if (selectNormal(u, v, triFlags))
|
||||
{
|
||||
patchNormal = triNormal;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (sqDist > 0.f)
|
||||
{
|
||||
//segment intersect with the triangle
|
||||
patchNormal = triNormal;
|
||||
}
|
||||
else
|
||||
{
|
||||
//edge normal
|
||||
const PxVec3 pq = p1 - p0;
|
||||
const PxVec3 pointOnSegment = p0 + pq * t; // V3ScaleAdd(pq, t, mCapsule.p0);
|
||||
const PxReal w = 1.f - (u + v);// FSub(FOne(), FAdd(u, v));
|
||||
const PxVec3 pointOnTriangle = currentTriangle.verts[0] * w + currentTriangle.verts[1] * u + currentTriangle.verts[2] * v;// V3ScaleAdd(p0, w, V3ScaleAdd(p1, u, V3Scale(p2, v)));
|
||||
patchNormal = (pointOnSegment - pointOnTriangle).getNormalized();// V3Normalize(V3Sub(pointOnSegment, pointOnTriangle));
|
||||
deferred = true;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
//const PxU32 value = PxU32(faceContact);
|
||||
//printf("triangleIdx %i faceContact %i\n", triangleIdx, value);
|
||||
//printf("remapCpuTriangleIdx %i closestP(%f, %f, %f)\n", remapCpuTriangleIdx, closestP.x, closestP.y, closestP.z)
|
||||
|
||||
nbOutputContacts = generateContacts(currentTriangle.verts[0], currentTriangle.verts[1],
|
||||
currentTriangle.verts[2], triNormal, patchNormal, p0, p1, inflatedRadius, contacts, separations);
|
||||
|
||||
|
||||
generateEEContacts(currentTriangle.verts[0], currentTriangle.verts[1],
|
||||
currentTriangle.verts[2], patchNormal, p0, p1, sqInflatedRadius, contacts, separations, nbOutputContacts);
|
||||
|
||||
for (PxU32 i = 0; i < nbOutputContacts; ++i)
|
||||
{
|
||||
//transform contact back to the capsule space
|
||||
contacts[i] = heighfieldToCapsule.transform(contacts[i]);
|
||||
const PxReal pen = separations[i];
|
||||
separation = PxMin(separation, pen);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//printf("%i: triangleIndex %i separation %f\n", threadIdx.x, triangleIdx, separation);
|
||||
|
||||
SphereTriIntermediateData* PX_RESTRICT sphereTriInterm = *sphereTriIntermPtr;
|
||||
PxU32* PX_RESTRICT orderedSphereTriInterm = *orderedSphereTriIntermPtr;
|
||||
sphereTriInterm[sphereTriPairOffset].gpuTriIndex = triangleIdx;
|
||||
const PxU32 deferMask = (nbOutputContacts && !deferred) ? 1 << 31 : 0;
|
||||
orderedSphereTriInterm[sphereTriPairOffsetPadded] = deferMask | triangleIdx;
|
||||
|
||||
|
||||
//PxReal* PX_RESTRICT sphereTriMaxDepth = sphereTriMaxDepthPtr[sphereTriPairOffset];
|
||||
PxReal* PX_RESTRICT capsuleTriMaxDepth = *sphereTriMaxDepthPtr;
|
||||
capsuleTriMaxDepth[sphereTriPairOffset] = separation;
|
||||
|
||||
SphereTriNormalAndIndex* PX_RESTRICT sphereTriNI = *sphereTriNIPtr;
|
||||
|
||||
//rotate the normal into A space
|
||||
sphereTriNI[sphereTriPairOffset].normal = heighfieldToCapsule.rotate(-patchNormal);
|
||||
sphereTriNI[sphereTriPairOffset].index = (nbOutputContacts << SphereTriNormalAndIndex::NbContactsShift) + triangleIdx;
|
||||
|
||||
//printf("triangleIdx %i sphereTriPairOffset %i\n", triangleIdx, sphereTriPairOffset);
|
||||
|
||||
return nbOutputContacts;
|
||||
|
||||
}
|
||||
|
||||
extern "C" __global__
|
||||
void sphereHeightfieldNarrowphase(
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair, // per CM
|
||||
|
||||
ConvexTriNormalAndIndex** PX_RESTRICT cvxTriNIPtr, // per cvx-tri
|
||||
ConvexTriContacts** PX_RESTRICT cvxTriContactsPtr, // per cvx-tri
|
||||
PxReal** PX_RESTRICT cvxTriMaxDepthPtr, // per cvx-tri
|
||||
SphereTriIntermediateData** PX_RESTRICT cvxTriIntermPtr, // per cvx-tri
|
||||
PxU32** PX_RESTRICT orderedCvxTriIntermPtr, // per cvx-tri
|
||||
PxU32** PX_RESTRICT cvxTriSecondPassPairsPtr, // per cvx-tri
|
||||
|
||||
PxU8* PX_RESTRICT stackPtr,
|
||||
PxU32* PX_RESTRICT nbPairsGlobal,
|
||||
PxU32* PX_RESTRICT nbPaddedPairsGlobal,
|
||||
PxU32* PX_RESTRICT nbSecondPassPairs,
|
||||
const PxU32 stackSizeBytes,
|
||||
ConvexTriContact* tempConvexTriContacts,
|
||||
PxU32* pTempContactIndex,
|
||||
PxU32* maxTempMemRequirement,
|
||||
PxU32* midphasePairsNeeded,
|
||||
const PxU32 nbContactManagers
|
||||
)
|
||||
{
|
||||
__shared__ ConvexTriNormalAndIndex* sCvxTriNIPtr;
|
||||
__shared__ ConvexTriContacts* sCvxTriContactsPtr;
|
||||
__shared__ PxReal* sCvxTriMaxDepthPtr;
|
||||
__shared__ ConvexTriIntermediateData* sCvxTriIntermPtr;
|
||||
__shared__ PxU32* sOrderedTriIntermPtr;
|
||||
__shared__ uint4* sPairsGPU;
|
||||
__shared__ PxU32* sCvxTriSecondPassPairsPtr;
|
||||
|
||||
__shared__ char sShContacts[sizeof(PxVec3) * 128];
|
||||
PxVec3* shContacts = reinterpret_cast<PxVec3*>(sShContacts);
|
||||
__shared__ PxReal shSeparations[128];
|
||||
|
||||
const PxU32 nbPairs = *nbPairsGlobal;
|
||||
const PxU32 nbPaddedPairs = *nbPaddedPairsGlobal;
|
||||
|
||||
//each block assign the corresponding ptr from the stack memory to sCvxTriNIPtr, sCvxTriContactPtr and sCvxTriMaxDepthPtr
|
||||
if (threadIdx.x == 0 && threadIdx.y == 0)
|
||||
{
|
||||
midphaseAllocate(
|
||||
&sCvxTriNIPtr,
|
||||
&sCvxTriContactsPtr,
|
||||
&sCvxTriMaxDepthPtr,
|
||||
&sCvxTriIntermPtr,
|
||||
&sOrderedTriIntermPtr,
|
||||
&sCvxTriSecondPassPairsPtr,
|
||||
&sPairsGPU,
|
||||
stackPtr,
|
||||
nbPairs,
|
||||
nbPaddedPairs);
|
||||
if (blockIdx.x == 0)
|
||||
{
|
||||
atomicMax(maxTempMemRequirement, calculateConvexMeshPairMemRequirement() * (*midphasePairsNeeded) + calculateAdditionalPadding(nbContactManagers));
|
||||
}
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
|
||||
if (threadIdx.x == 0 && threadIdx.y == 0 && blockIdx.x == 0)
|
||||
{
|
||||
*cvxTriNIPtr = sCvxTriNIPtr;
|
||||
*cvxTriContactsPtr = sCvxTriContactsPtr;
|
||||
*cvxTriMaxDepthPtr = sCvxTriMaxDepthPtr;
|
||||
*cvxTriIntermPtr = sCvxTriIntermPtr;
|
||||
*orderedCvxTriIntermPtr = sOrderedTriIntermPtr;
|
||||
*cvxTriSecondPassPairsPtr = sCvxTriSecondPassPairsPtr;
|
||||
}
|
||||
|
||||
|
||||
//for (PxU32 globalThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; globalThreadIndex < nbPairs; globalThreadIndex += gridDim.x * blockDim.x)
|
||||
for (PxU32 i = 0; i < nbPairs; i += gridDim.x * blockDim.x)
|
||||
{
|
||||
PxU32 globalThreadIndex = blockIdx.x * blockDim.x + threadIdx.x + i;
|
||||
|
||||
PxU32 nbContacts = 0;
|
||||
PxU32 outMask = 0;
|
||||
uint4 curPair;
|
||||
|
||||
if (globalThreadIndex < nbPairs)
|
||||
{
|
||||
|
||||
// Pair
|
||||
curPair = sPairsGPU[globalThreadIndex];
|
||||
|
||||
PxgContactManagerInput npWorkItem = cmInputs[curPair.x];
|
||||
PxgShape shape0 = gpuShapes[npWorkItem.shapeRef0];
|
||||
PxGeometryType::Enum type0 = PxGeometryType::Enum(shape0.type);
|
||||
if (type0 == PxGeometryType::eSPHERE)
|
||||
{
|
||||
nbContacts = sphereHeightfieldNarrowphaseCore(
|
||||
globalThreadIndex,
|
||||
|
||||
cmInputs,
|
||||
transformCache,
|
||||
contactDistance,
|
||||
gpuShapes,
|
||||
cvxTrimeshPair,
|
||||
|
||||
&sCvxTriNIPtr,
|
||||
&sCvxTriContactsPtr,
|
||||
&sCvxTriMaxDepthPtr,
|
||||
&sCvxTriIntermPtr,
|
||||
&sOrderedTriIntermPtr,
|
||||
&sCvxTriSecondPassPairsPtr,
|
||||
|
||||
curPair,
|
||||
npWorkItem,
|
||||
shape0,
|
||||
nbPairsGlobal,
|
||||
nbSecondPassPairs,
|
||||
shContacts[threadIdx.x * 2],
|
||||
shSeparations[threadIdx.x * 2],
|
||||
outMask
|
||||
);
|
||||
}
|
||||
else if (type0 == PxGeometryType::eCAPSULE)
|
||||
{
|
||||
nbContacts = capsuleHeightfieldNarrowphaseCore(
|
||||
globalThreadIndex,
|
||||
|
||||
cmInputs,
|
||||
transformCache,
|
||||
contactDistance,
|
||||
gpuShapes,
|
||||
cvxTrimeshPair,
|
||||
|
||||
&sCvxTriNIPtr,
|
||||
&sCvxTriContactsPtr,
|
||||
&sCvxTriMaxDepthPtr,
|
||||
&sCvxTriIntermPtr,
|
||||
&sOrderedTriIntermPtr,
|
||||
&sCvxTriSecondPassPairsPtr,
|
||||
|
||||
curPair,
|
||||
npWorkItem,
|
||||
shape0,
|
||||
&shContacts[threadIdx.x * 2],
|
||||
&shSeparations[threadIdx.x * 2]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
PxU32 outMaskWriteMask = __ballot_sync(FULL_MASK, outMask);
|
||||
PxU32 count = __popc(outMaskWriteMask);
|
||||
PxU32 startIndex = 0;
|
||||
|
||||
const PxU32 threadIndexInWarp = threadIdx.x & 31;
|
||||
|
||||
//Now output contacts!!!!
|
||||
|
||||
PxU32 inclusiveSum = warpScan<AddOpPxU32, PxU32>(FULL_MASK, nbContacts);
|
||||
|
||||
PxU32 totalContacts = __shfl_sync(FULL_MASK, inclusiveSum, 31);
|
||||
|
||||
if (totalContacts)
|
||||
{
|
||||
if (count)
|
||||
{
|
||||
if (threadIndexInWarp == 0)
|
||||
startIndex = atomicAdd(nbSecondPassPairs, count);
|
||||
|
||||
startIndex = __shfl_sync(FULL_MASK, startIndex, 0);
|
||||
|
||||
if (outMask)
|
||||
{
|
||||
PxU32 offset = warpScanExclusive(outMaskWriteMask, threadIndexInWarp);
|
||||
sCvxTriSecondPassPairsPtr[startIndex + offset] = outMask;
|
||||
}
|
||||
}
|
||||
|
||||
if (threadIndexInWarp == 31)
|
||||
startIndex = atomicAdd(pTempContactIndex, inclusiveSum);
|
||||
|
||||
startIndex = __shfl_sync(FULL_MASK, startIndex, 31);
|
||||
|
||||
startIndex += inclusiveSum - nbContacts;
|
||||
|
||||
for (PxU32 i = 0, idx = 2 * threadIdx.x; i < nbContacts; ++i, idx++)
|
||||
{
|
||||
tempConvexTriContacts[startIndex + i].contact_sepW = make_float4(shContacts[idx].x, shContacts[idx].y,
|
||||
shContacts[idx].z, shSeparations[idx]);
|
||||
}
|
||||
|
||||
if (nbContacts)
|
||||
{
|
||||
PxU32 cmIdx = curPair.x;
|
||||
PxU32 testOffset = curPair.z;
|
||||
|
||||
ConvexMeshPair& pair = cvxTrimeshPair[cmIdx];
|
||||
PxU32 sphereTriPairOffset = pair.startIndex + testOffset;
|
||||
|
||||
sCvxTriContactsPtr[sphereTriPairOffset].index = startIndex;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
975
engine/third_party/physx/source/gpunarrowphase/src/CUDA/convexMesh.cu
vendored
Normal file
975
engine/third_party/physx/source/gpunarrowphase/src/CUDA/convexMesh.cu
vendored
Normal file
@@ -0,0 +1,975 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxPlane.h"
|
||||
#include "foundation/PxTransform.h"
|
||||
#include "geometry/PxMeshScale.h"
|
||||
#include "geometry/PxGeometry.h"
|
||||
|
||||
#include "PxgCommonDefines.h"
|
||||
#include "utils.cuh"
|
||||
#include "reduction.cuh"
|
||||
#include "sphereTriangle.cuh"
|
||||
#include "convexTriangle.cuh"
|
||||
#include "capsuleTriangle.cuh"
|
||||
#include "midphaseAllocate.cuh"
|
||||
|
||||
#include "PxgNpKernelIndices.h"
|
||||
#include "triangleMesh.cuh"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
extern "C" __host__ void initNarrowphaseKernels2() {}
|
||||
|
||||
// Triangle scaling, triangle verts in shape space
|
||||
__device__ void getShapeSpaceVerts(PxVec3& v0, PxVec3& v1, PxVec3& v2, const MeshScaling& meshScale, PxU32 triangleIdx, const uint4* PX_RESTRICT trimeshTriIndices, const float4* PX_RESTRICT trimeshVerts)
|
||||
{
|
||||
const uint4 triIndices = trimeshTriIndices[triangleIdx];
|
||||
|
||||
const PxU32 triVertIdx0 = triIndices.x;
|
||||
const PxU32 triVertIdx1 = triIndices.y;
|
||||
const PxU32 triVertIdx2 = triIndices.z;
|
||||
|
||||
const float4 triV0_f4 = trimeshVerts[triVertIdx0];
|
||||
const float4 triV1_f4 = trimeshVerts[triVertIdx1];
|
||||
const float4 triV2_f4 = trimeshVerts[triVertIdx2];
|
||||
|
||||
PxVec3 triV0 = PxLoad3(triV0_f4);
|
||||
PxVec3 triV1 = PxLoad3(triV1_f4);
|
||||
PxVec3 triV2 = PxLoad3(triV2_f4);
|
||||
|
||||
meshScale.getShapeSpaceVert(triV0, triV1, triV2, v0, v1, v2);
|
||||
}
|
||||
|
||||
__device__ void convexTrimeshNarrowphaseCore(
|
||||
PxU32 globalWarpIndex,
|
||||
PxgShape& convexShape,
|
||||
PxgShape& trimeshShape,
|
||||
PxU32 transformCacheRef0,
|
||||
PxU32 transformCacheRef1,
|
||||
const uint4 curPair,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair, // per CM
|
||||
ConvexTriNormalAndIndex** PX_RESTRICT cvxTriNIPtr, // per cvx-tri
|
||||
ConvexTriContacts** PX_RESTRICT cvxTriContactsPtr, // per cvx-tri
|
||||
PxReal** PX_RESTRICT cvxTriMaxDepthPtr, // per cvx-tri
|
||||
ConvexTriIntermediateData** PX_RESTRICT cvxTriIntermPtr, // per cvx-tri
|
||||
PxU32** PX_RESTRICT orderedCvxTriIntermPtr, // per cvx-tri
|
||||
PxU32** PX_RESTRICT cvxTriSecondPassPairPtr, // per cvx-tri
|
||||
const uint4* PX_RESTRICT pairsGPU,
|
||||
PxU32* nbPairsGlobal,
|
||||
PxU32* nbSecondPassPairs,
|
||||
volatile PxU32* s_WarpSharedMemory,
|
||||
ConvexTriContact* tempConvexTriContacts,
|
||||
const PxU32 tempContactSizeBytes,
|
||||
PxU32* pTempContactIndex
|
||||
)
|
||||
{
|
||||
// 1 warp is doing 1 triangle - convex pair here.
|
||||
|
||||
ConvexMeshScratch* s_scratch = (ConvexMeshScratch*)s_WarpSharedMemory;
|
||||
|
||||
PxU32 cmIdx = curPair.x;
|
||||
PxU32 triangleIdx = curPair.y;
|
||||
PxU32 testOffset = curPair.z;
|
||||
|
||||
__shared__ __align__(16) char sConvexTransformCached[sizeof(PxsCachedTransform) * NP_TRIMESH_WARPS_PER_BLOCK];
|
||||
__shared__ __align__(16) char sTrimeshTransformCached[sizeof(PxsCachedTransform) * NP_TRIMESH_WARPS_PER_BLOCK];
|
||||
|
||||
PxsCachedTransform* convexTransformCached = reinterpret_cast<PxsCachedTransform*>(sConvexTransformCached);
|
||||
PxsCachedTransform* trimeshTransformCached = reinterpret_cast<PxsCachedTransform*>(sTrimeshTransformCached);
|
||||
|
||||
|
||||
if (threadIdx.x < sizeof(PxsCachedTransform) / sizeof(PxU32))
|
||||
{
|
||||
reinterpret_cast<PxU32*>(&convexTransformCached[threadIdx.y])[threadIdx.x] =
|
||||
reinterpret_cast<const PxU32*>(transformCache + transformCacheRef0)[threadIdx.x];
|
||||
|
||||
reinterpret_cast<PxU32*>(&trimeshTransformCached[threadIdx.y])[threadIdx.x] =
|
||||
reinterpret_cast<const PxU32*>(transformCache + transformCacheRef1)[threadIdx.x];
|
||||
}
|
||||
|
||||
ConvexMeshPair& pair = cvxTrimeshPair[cmIdx];
|
||||
PxU32 convexTriPairOffset = pair.startIndex + testOffset;
|
||||
PxU32 convexTriPairOffsetPadded = pair.roundedStartIndex + testOffset;
|
||||
|
||||
__syncwarp();
|
||||
|
||||
if (threadIdx.x < 7)
|
||||
{
|
||||
reinterpret_cast<PxU32*>(&s_scratch->convexScale)[threadIdx.x] = reinterpret_cast<PxU32*>(&convexShape.scale)[threadIdx.x];
|
||||
}
|
||||
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
// convexTransform/trimeshTransform already include bodyTM/relTM
|
||||
s_scratch->trimeshToConvexTransform = convexTransformCached[threadIdx.y].transform.transformInv(trimeshTransformCached[threadIdx.y].transform);
|
||||
// Shapes
|
||||
//s_scratch->contactDist = convexShape.contactOffset + trimeshShape.contactOffset;
|
||||
s_scratch->contactDist = contactDistance[transformCacheRef0] + contactDistance[transformCacheRef1];
|
||||
|
||||
const PxU8* convexPtrA = reinterpret_cast<const PxU8 *>(convexShape.hullOrMeshPtr);
|
||||
s_scratch->convexPtrA = convexPtrA;
|
||||
|
||||
const float4 hull0_centerOfMass_f4 = *reinterpret_cast<const float4 *>(convexPtrA);
|
||||
const PxVec3 hull0_centerOfMass(hull0_centerOfMass_f4.x, hull0_centerOfMass_f4.y, hull0_centerOfMass_f4.z);
|
||||
// Transform CoM into shape space
|
||||
PxVec3 shapeSpaceCenterOfMass0 = vertex2Shape(hull0_centerOfMass, convexShape.scale.scale, convexShape.scale.rotation);
|
||||
|
||||
s_scratch->convexCenterOfMass = shapeSpaceCenterOfMass0;
|
||||
|
||||
convexPtrA += sizeof(float4);
|
||||
|
||||
const uint4 tmp = *((uint4*)convexPtrA);
|
||||
const PxU32 polyData0_NbEdgesNbHullVerticesNbPolygons = tmp.x;
|
||||
s_scratch->nbEdgesNbHullVerticesNbPolygons = polyData0_NbEdgesNbHullVerticesNbPolygons;
|
||||
|
||||
convexPtrA += sizeof(uint4);
|
||||
|
||||
// Geometries : Triangle Mesh
|
||||
const PxU8 * trimeshGeomPtr = reinterpret_cast<const PxU8 *>(trimeshShape.hullOrMeshPtr);
|
||||
|
||||
const uint4* PX_RESTRICT trimeshTriAdjacencies;
|
||||
readTriangleMesh(trimeshGeomPtr, s_scratch->trimeshVerts, s_scratch->trimeshTriIndices, trimeshTriAdjacencies, s_scratch->trimeshFaceRemap);
|
||||
|
||||
|
||||
// TODO: shuffle this across threads (reading uint4 seq)
|
||||
// later shuffle adjacency work and shuffle back adjNormals
|
||||
uint4 triAdjTriIndices = trimeshTriAdjacencies[triangleIdx];
|
||||
|
||||
s_scratch->triAdjTrisIdx[0] = triAdjTriIndices.x;
|
||||
s_scratch->triAdjTrisIdx[1] = triAdjTriIndices.y;
|
||||
s_scratch->triAdjTrisIdx[2] = triAdjTriIndices.z;
|
||||
}
|
||||
|
||||
__syncwarp();
|
||||
|
||||
if (threadIdx.x < 3)
|
||||
{
|
||||
const PxMeshScale& scale = trimeshShape.scale;
|
||||
bool flipNormal = ((scale.scale.x * scale.scale.y * scale.scale.z) < 0.0f);
|
||||
PxU32 writeIndex = threadIdx.x;
|
||||
if (threadIdx.x > 0 && flipNormal)
|
||||
writeIndex = threadIdx.x ^ 3;
|
||||
|
||||
const PxU32 triVertIdx = reinterpret_cast<const PxU32*>(&s_scratch->trimeshTriIndices[triangleIdx])[threadIdx.x];
|
||||
|
||||
const float4 triV_f4 = s_scratch->trimeshVerts[triVertIdx];
|
||||
|
||||
PxVec3 triV = PxLoad3(triV_f4);
|
||||
|
||||
triV = vertex2Shape(triV, trimeshShape.scale.scale, trimeshShape.scale.rotation);
|
||||
|
||||
PxVec3 triLocVerts = s_scratch->trimeshToConvexTransform.transform(triV);
|
||||
|
||||
s_scratch->triLocVerts[writeIndex] = triLocVerts;
|
||||
}
|
||||
__syncwarp();
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
PxVec3 triangleLocNormal;
|
||||
triangleLocNormal = (s_scratch->triLocVerts[1] - s_scratch->triLocVerts[0]).cross(s_scratch->triLocVerts[2] - s_scratch->triLocVerts[0]).getNormalized();
|
||||
|
||||
s_scratch->triangleLocNormal = triangleLocNormal;
|
||||
}
|
||||
__syncwarp();
|
||||
|
||||
const PxU32 remapCpuTriangleIdx = s_scratch->trimeshFaceRemap[triangleIdx];
|
||||
convexTriangleContactGen(
|
||||
s_WarpSharedMemory, s_scratch, convexTriPairOffset, convexTriPairOffsetPadded, remapCpuTriangleIdx, triangleIdx, globalWarpIndex,
|
||||
cvxTriNIPtr, cvxTriContactsPtr, cvxTriMaxDepthPtr, cvxTriIntermPtr, orderedCvxTriIntermPtr, cvxTriSecondPassPairPtr, nbSecondPassPairs,
|
||||
tempConvexTriContacts, tempContactSizeBytes/sizeof(ConvexTriContact), pTempContactIndex);
|
||||
}
|
||||
|
||||
|
||||
|
||||
__device__ PxU32 sphereTrimeshNarrowphaseCore(
|
||||
PxU32 globalThreadIndex,
|
||||
const PxgContactManagerInput& npWorkItem,
|
||||
const PxgShape& sphereShape,
|
||||
const PxgShape& trimeshShape,
|
||||
const uint4 curPair,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
SphereMeshPair* PX_RESTRICT sphereTrimeshPair, // per CM
|
||||
SphereTriNormalAndIndex** PX_RESTRICT sphereTriNIPtr, // per sphere-tri
|
||||
SphereTriContacts** PX_RESTRICT sphereTriContactsPtr, // per sphere-tri
|
||||
PxReal** PX_RESTRICT sphereTriMaxDepthPtr, // per sphere-tri
|
||||
SphereTriIntermediateData** PX_RESTRICT sphereTriIntermPtr, // per sphere-tri
|
||||
PxU32** PX_RESTRICT orderedSphereTriIntermPtr, // per sphere-tri
|
||||
PxU32** PX_RESTRICT sphereTriSecondPassPairPtr, // per sphere-tri
|
||||
PxU32* nbPairsGlobal,
|
||||
PxU32* nbSecondPassPairs,
|
||||
PxVec3& outContact,
|
||||
PxReal& outSep,
|
||||
PxU32& outMask
|
||||
)
|
||||
{
|
||||
// Pair, stackPtr in convexMeshMidphase kernel
|
||||
PxU32 cmIdx = curPair.x;
|
||||
PxU32 triangleIdx = curPair.y;
|
||||
PxU32 testOffset = curPair.z;
|
||||
|
||||
PxsCachedTransform sphereTransformCached = transformCache[npWorkItem.transformCacheRef0];
|
||||
PxsCachedTransform trimeshTransformCached = transformCache[npWorkItem.transformCacheRef1];
|
||||
|
||||
SphereMeshPair& pair = sphereTrimeshPair[cmIdx];
|
||||
PxU32 sphereTriPairOffset = pair.startIndex + testOffset;
|
||||
PxU32 sphereTriPairOffsetPadded = pair.roundedStartIndex + testOffset;
|
||||
|
||||
|
||||
// Shapes
|
||||
const PxReal contactDist = contactDistance[npWorkItem.transformCacheRef0] + contactDistance[npWorkItem.transformCacheRef1];
|
||||
const PxQuat trimeshScaleRot = trimeshShape.scale.rotation;
|
||||
|
||||
const PxReal sphereRadius = sphereShape.scale.scale.x;
|
||||
const PxVec3 trimeshScale = trimeshShape.scale.scale;
|
||||
|
||||
//Geometries : Sphere
|
||||
const PxVec3 sphereCenterInTriMesh = trimeshTransformCached.transform.transformInv(sphereTransformCached.transform.p);
|
||||
|
||||
// Geometries : Triangle Mesh
|
||||
const PxU8 * trimeshGeomPtr = reinterpret_cast<const PxU8 *>(trimeshShape.hullOrMeshPtr);
|
||||
|
||||
const float4 * trimeshVerts;
|
||||
const uint4 * trimeshTriIndices;
|
||||
const uint4* PX_RESTRICT trimeshTriAdjacencies;
|
||||
const PxU32* PX_RESTRICT trimeshFaceRemap;
|
||||
readTriangleMesh(trimeshGeomPtr, trimeshVerts, trimeshTriIndices, trimeshTriAdjacencies, trimeshFaceRemap);
|
||||
|
||||
|
||||
const PxMeshScale& scale = trimeshShape.scale;
|
||||
MeshScaling meshScale(scale.scale, scale.rotation);
|
||||
|
||||
PxVec3 v0, v1, v2;
|
||||
getShapeSpaceVerts(v0, v1, v2, meshScale, triangleIdx, trimeshTriIndices, trimeshVerts);
|
||||
|
||||
const PxVec3 n = (v1 - v0).cross(v2 - v0).getNormalized();
|
||||
|
||||
|
||||
uint4 triAdjTriIndices = trimeshTriAdjacencies[triangleIdx];
|
||||
if (meshScale.flipNormal)
|
||||
PxSwap(triAdjTriIndices.x, triAdjTriIndices.z);
|
||||
|
||||
|
||||
const PxU32 remapCpuTriangleIdx = trimeshFaceRemap[triangleIdx];
|
||||
|
||||
const PxReal inflatedRadius = sphereRadius + contactDist;
|
||||
|
||||
const PxReal d = v0.dot(n);
|
||||
const PxReal dist0 = sphereCenterInTriMesh.dot(n) - d;
|
||||
|
||||
|
||||
PxReal separation = PX_MAX_F32;
|
||||
|
||||
PxU32 nbOutputContacts = 0;
|
||||
bool delayContacts = false;
|
||||
bool edgeContact = false;
|
||||
PxVec3 patchNormal(0.f);
|
||||
|
||||
if (dist0 >= 0)
|
||||
{
|
||||
PxVec3 closestP;
|
||||
//mSphereCenter will be in the local space of the triangle mesh
|
||||
bool generateContact = false;
|
||||
bool faceContact = false;
|
||||
|
||||
PxU32 mask;
|
||||
const PxReal sqDist = distancePointTriangleSquared(sphereCenterInTriMesh, v0, v1, v2, triAdjTriIndices, closestP, generateContact, faceContact, &mask);
|
||||
|
||||
const PxReal sqInflatedSphereRadius = inflatedRadius * inflatedRadius;
|
||||
//sphere overlap with triangles
|
||||
if (sqInflatedSphereRadius > sqDist && generateContact)
|
||||
{
|
||||
//printf("remapCpuTriangleIdx %i sqInflatedSphereRadius %f sqDist %f faceContact %i\n", remapCpuTriangleIdx, sqInflatedSphereRadius, sqDist, PxU32(faceContact));
|
||||
patchNormal = n;
|
||||
if (!faceContact)
|
||||
patchNormal = (sphereCenterInTriMesh - closestP).getNormalized();
|
||||
|
||||
//printf("remapCpuTriangleIdx %i closestP(%f, %f, %f)\n", remapCpuTriangleIdx, closestP.x, closestP.y, closestP.z);
|
||||
|
||||
const PxReal cosTheta = patchNormal.dot(n);
|
||||
const PxReal tolerance = 0.996f;//around 5 degree
|
||||
|
||||
//two normal's projection less than 5 degree, generate contacts
|
||||
delayContacts = cosTheta <= tolerance;
|
||||
if (delayContacts)
|
||||
{
|
||||
//delay contacts
|
||||
//PxU32* PX_RESTRICT sphereTriSecondPairPass = *sphereTriSecondPassPairPtr;
|
||||
//const PxU32 startIndex = atomicAdd(nbSecondPassPairs, 1);
|
||||
|
||||
// PT: no need to compute the mask, we got it for free from distancePointTriangleSquared
|
||||
//PxU32 mask = getTriangleActiveMask(closestP, v0, v1, v2);
|
||||
|
||||
//sphereTriSecondPairPass[startIndex] = mask | globalThreadIndex;
|
||||
outMask = mask | globalThreadIndex;
|
||||
|
||||
// VR: is an edge contact?
|
||||
edgeContact = ConvexTriIntermediateData::isEdgeContact(mask);
|
||||
}
|
||||
|
||||
nbOutputContacts = 1;
|
||||
separation = PxSqrt(sqDist);
|
||||
|
||||
|
||||
// Generate contacts
|
||||
//SphereTriContacts* PX_RESTRICT sphereTriContacts = *sphereTriContactsPtr;
|
||||
|
||||
//float4* contactBuf = reinterpret_cast<float4 *>(sphereTriContacts + sphereTriPairOffset);
|
||||
//sphere center in sphere local space
|
||||
//contactBuf[0] = make_float4(0.f, 0.f, 0.f, separation);
|
||||
|
||||
//normal = patchNormal;
|
||||
|
||||
outSep = separation;
|
||||
|
||||
outContact = PxVec3(0.f);
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
PxReal* PX_RESTRICT sphereTriMaxDepth = *sphereTriMaxDepthPtr;
|
||||
sphereTriMaxDepth[sphereTriPairOffset] = separation;
|
||||
|
||||
SphereTriIntermediateData* PX_RESTRICT sphereTriInterm = *sphereTriIntermPtr;
|
||||
PxU32* PX_RESTRICT orderedSphereTriInterm = *orderedSphereTriIntermPtr;
|
||||
sphereTriInterm[sphereTriPairOffset].gpuTriIndex = triangleIdx;
|
||||
|
||||
// VR: bits 30 and 31 are used for the search flags
|
||||
assert(triangleIdx < (1 << 30));
|
||||
|
||||
PxU32 orderedTriangleIdx = triangleIdx;
|
||||
if (nbOutputContacts)
|
||||
if (!delayContacts) // not dalayed -> face contact
|
||||
orderedTriangleIdx |= TriangleSearchFlags::eFACE_CONTACT;
|
||||
else if (edgeContact)
|
||||
orderedTriangleIdx |= TriangleSearchFlags::eEDGE_CONTACT;
|
||||
|
||||
orderedSphereTriInterm[sphereTriPairOffsetPadded] = orderedTriangleIdx;
|
||||
|
||||
SphereTriNormalAndIndex* PX_RESTRICT sphereTriNI = *sphereTriNIPtr;
|
||||
|
||||
PxU32 delayContactMask = delayContacts ? SphereTriNormalAndIndex::DeferredContactMask : 0;
|
||||
|
||||
//rotate the normal into A space
|
||||
PxVec3 worldNormal = trimeshTransformCached.transform.rotate(-patchNormal);
|
||||
sphereTriNI[sphereTriPairOffset].normal = sphereTransformCached.transform.rotateInv(worldNormal);
|
||||
sphereTriNI[sphereTriPairOffset].index = delayContactMask + (nbOutputContacts << SphereTriNormalAndIndex::NbContactsShift) + remapCpuTriangleIdx;
|
||||
|
||||
//printf("sphereTriPairOffset %i nbOutputContacts%i \n", sphereTriPairOffset, nbOutputContacts);
|
||||
|
||||
return nbOutputContacts;
|
||||
}
|
||||
|
||||
|
||||
__device__ PxU32 capsuleTrimeshNarrowphaseCore(
|
||||
PxU32 globalThreadIndex,
|
||||
const PxgContactManagerInput& npWorkItem,
|
||||
const PxgShape& capsuleShape,
|
||||
const PxgShape& trimeshShape,
|
||||
const uint4 curPair,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
SphereMeshPair* PX_RESTRICT sphereTrimeshPair, // per CM
|
||||
SphereTriNormalAndIndex** PX_RESTRICT sphereTriNIPtr, // per sphere-tri
|
||||
SphereTriContacts** PX_RESTRICT sphereTriContactsPtr, // per sphere-tri
|
||||
PxReal** PX_RESTRICT sphereTriMaxDepthPtr, // per sphere-tri
|
||||
SphereTriIntermediateData** PX_RESTRICT sphereTriIntermPtr, // per sphere-tri
|
||||
PxU32** PX_RESTRICT orderedSphereTriIntermPtr, // per sphere-tri
|
||||
PxU32** PX_RESTRICT sphereTriSecondPassPairPtr, // per sphere-tri
|
||||
PxU32* nbSecondPassPairs,
|
||||
PxVec3* contacts,
|
||||
PxReal* separations,
|
||||
PxU32& outMask
|
||||
)
|
||||
{
|
||||
// Pair, stackPtr in convexMeshMidphase kernel
|
||||
PxU32 cmIdx = curPair.x;
|
||||
PxU32 triangleIdx = curPair.y;
|
||||
PxU32 testOffset = curPair.z;
|
||||
|
||||
PxsCachedTransform capsuleTransformCached = transformCache[npWorkItem.transformCacheRef0];
|
||||
PxsCachedTransform trimeshTransformCached = transformCache[npWorkItem.transformCacheRef1];
|
||||
|
||||
SphereMeshPair& pair = sphereTrimeshPair[cmIdx];
|
||||
PxU32 sphereTriPairOffset = pair.startIndex + testOffset;
|
||||
PxU32 sphereTriPairOffsetPadded = pair.roundedStartIndex + testOffset;
|
||||
|
||||
|
||||
// Shapes
|
||||
const PxReal contactDist = contactDistance[npWorkItem.transformCacheRef0] + contactDistance[npWorkItem.transformCacheRef1];
|
||||
const PxQuat trimeshScaleRot = trimeshShape.scale.rotation;
|
||||
|
||||
const PxReal capsuleRadius = capsuleShape.scale.scale.y;
|
||||
const PxReal capsuleHalfHeight = capsuleShape.scale.scale.x;
|
||||
const PxVec3 trimeshScale = trimeshShape.scale.scale;
|
||||
|
||||
//Geometries : Capsule
|
||||
const PxTransform meshToCapsule = capsuleTransformCached.transform.transformInv(trimeshTransformCached.transform);
|
||||
|
||||
//Capsule in triangle mesh space
|
||||
const PxVec3 tmp = capsuleTransformCached.transform.q.getBasisVector0() * capsuleHalfHeight;
|
||||
const PxVec3 capsuleCenterInMesh = trimeshTransformCached.transform.transformInv(capsuleTransformCached.transform.p);
|
||||
const PxVec3 capsuleDirInMesh = trimeshTransformCached.transform.rotateInv(tmp);
|
||||
//Geometries : Capsule in mesh local space
|
||||
const PxVec3 p0 = capsuleCenterInMesh + capsuleDirInMesh;
|
||||
const PxVec3 p1 = capsuleCenterInMesh - capsuleDirInMesh;
|
||||
|
||||
// Geometries : Triangle Mesh
|
||||
const PxU8 * trimeshGeomPtr = reinterpret_cast<const PxU8 *>(trimeshShape.hullOrMeshPtr);
|
||||
|
||||
const float4 * trimeshVerts;
|
||||
const uint4 * trimeshTriIndices;
|
||||
const uint4* PX_RESTRICT trimeshTriAdjacencies;
|
||||
readTriangleMesh(trimeshGeomPtr, trimeshVerts, trimeshTriIndices, trimeshTriAdjacencies);
|
||||
|
||||
|
||||
const PxMeshScale& scale = trimeshShape.scale;
|
||||
MeshScaling meshScale(scale.scale, scale.rotation);
|
||||
|
||||
PxVec3 v0, v1, v2;
|
||||
getShapeSpaceVerts(v0, v1, v2, meshScale, triangleIdx, trimeshTriIndices, trimeshVerts);
|
||||
|
||||
const PxVec3 triNormal = (v1 - v0).cross(v2 - v0).getNormalized();
|
||||
|
||||
uint4 triAdjTrisIdx = trimeshTriAdjacencies[triangleIdx];
|
||||
if (meshScale.flipNormal)
|
||||
PxSwap(triAdjTrisIdx.x, triAdjTrisIdx.z);
|
||||
|
||||
const PxReal inflatedRadius = capsuleRadius + contactDist;
|
||||
|
||||
//const PxReal d = v0.dot(n);
|
||||
//const PxReal dist0 = sphereCenterInTriMesh.dot(n) - d;
|
||||
|
||||
const PxReal dist = (capsuleCenterInMesh - v0).dot(triNormal);
|
||||
|
||||
PxReal separation = PX_MAX_F32;
|
||||
PxU32 nbOutputContacts = 0;
|
||||
PxVec3 patchNormal(0.f);
|
||||
bool delayContacts = false;
|
||||
bool edgeContact = false;
|
||||
|
||||
if (dist >= 0)
|
||||
{
|
||||
//PxVec3 closestP;
|
||||
|
||||
PxReal t, u, v;
|
||||
PxReal sqDist = distanceSegmentTriangleSquared(p0, p1, v0, v1, v2, t, u, v);
|
||||
|
||||
const PxReal sqInflatedRadius = inflatedRadius * inflatedRadius;
|
||||
|
||||
//capsule overlap with triangles
|
||||
|
||||
if (sqInflatedRadius > sqDist)
|
||||
{
|
||||
// VR: I made this similar to sphere-trimesh
|
||||
// and I left the old variant for the reference
|
||||
#if 1
|
||||
if (sqDist < FLT_EPSILON)
|
||||
{
|
||||
//segment intersect with the triangle
|
||||
patchNormal = triNormal;
|
||||
}
|
||||
else
|
||||
{
|
||||
const PxVec3 pointOnSegment = p0 + (p1 - p0) * t;
|
||||
const PxVec3 pointOnTriangle = v0 * (1.f - u - v) + v1 * u + v2 * v;
|
||||
patchNormal = (pointOnSegment - pointOnTriangle).getNormalized();
|
||||
|
||||
const PxReal cosTheta = patchNormal.dot(triNormal);
|
||||
const PxReal tolerance = 0.996f;//around 5 degree
|
||||
|
||||
//printf("pointOnSegment: (%g %g %g); pointOnTriangle: (%g %g %g); cosTheta: %g\n", pointOnSegment.x, pointOnSegment.y, pointOnSegment.z, pointOnTriangle.x, pointOnTriangle.y, pointOnTriangle.z, cosTheta);
|
||||
|
||||
//two normal's projection less than 5 degree, generate contacts
|
||||
delayContacts = cosTheta <= tolerance;
|
||||
}
|
||||
|
||||
if (delayContacts)
|
||||
{
|
||||
PxU32 mask = getTriangleActiveMaskFromNormal(v0, v1, v2, patchNormal);
|
||||
outMask = mask | globalThreadIndex;
|
||||
|
||||
//const PxVec3 pn = patchNormal;
|
||||
//printf("pn: (%g %g %g); mask: %x\n", pn.x, pn.y, pn.z, mask);
|
||||
|
||||
// is an edge contact?
|
||||
edgeContact = ConvexTriIntermediateData::isEdgeContact(mask);
|
||||
}
|
||||
#else
|
||||
PxU32 triFlags = 0;
|
||||
|
||||
triFlags |= (triAdjTrisIdx.x != BOUNDARY && (triAdjTrisIdx.x & NONCONVEX_FLAG) ? 0 : 1 << 3);
|
||||
triFlags |= (triAdjTrisIdx.y != BOUNDARY && (triAdjTrisIdx.y & NONCONVEX_FLAG) ? 0 : 1 << 4);
|
||||
triFlags |= (triAdjTrisIdx.z != BOUNDARY && (triAdjTrisIdx.z & NONCONVEX_FLAG) ? 0 : 1 << 5);
|
||||
|
||||
if (selectNormal(u, v, triFlags))
|
||||
{
|
||||
patchNormal = triNormal;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (sqDist < 1e-6f)
|
||||
{
|
||||
//segment intersect with the triangle
|
||||
patchNormal = triNormal;
|
||||
}
|
||||
else
|
||||
{
|
||||
//edge normal
|
||||
const PxVec3 pq = p1 - p0;
|
||||
const PxVec3 pointOnSegment = p0 + pq * t; // V3ScaleAdd(pq, t, mCapsule.p0);
|
||||
const PxReal w = 1.f - (u + v);// FSub(FOne(), FAdd(u, v));
|
||||
const PxVec3 pointOnTriangle = v0 * w + v1 * u + v2 * v;// V3ScaleAdd(p0, w, V3ScaleAdd(p1, u, V3Scale(p2, v)));
|
||||
patchNormal = (pointOnSegment - pointOnTriangle).getNormalized();// V3Normalize(V3Sub(pointOnSegment, pointOnTriangle));
|
||||
delayContacts = true;
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
nbOutputContacts = generateContacts(v0, v1, v2, triNormal, patchNormal, p0, p1, inflatedRadius, contacts, separations);
|
||||
|
||||
generateEEContacts(v0, v1, v2, patchNormal, p0, p1, sqInflatedRadius, contacts, separations, nbOutputContacts);
|
||||
|
||||
for (PxU32 i = 0; i < nbOutputContacts; ++i)
|
||||
{
|
||||
//transform contact back to the capsule space
|
||||
contacts[i] = meshToCapsule.transform(contacts[i]);
|
||||
const PxReal pen = separations[i];
|
||||
separation = PxMin(separation, pen);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SphereTriIntermediateData* PX_RESTRICT sphereTriInterm = *sphereTriIntermPtr;
|
||||
PxU32* PX_RESTRICT orderedSphereTriInterm = *orderedSphereTriIntermPtr;
|
||||
sphereTriInterm[sphereTriPairOffset].gpuTriIndex = triangleIdx;
|
||||
|
||||
//if (nbOutputContacts)
|
||||
// printf("triangleIdx: %d; nbOutputContacts: %d; delayContacts %d; edgeContact: %d\n", triangleIdx, nbOutputContacts, delayContacts, edgeContact);
|
||||
|
||||
// VR: bits 30 and 31 are used for the search flags
|
||||
assert(triangleIdx < (1 << 30));
|
||||
|
||||
PxU32 orderedTriangleIdx = triangleIdx;
|
||||
if (nbOutputContacts)
|
||||
if (!delayContacts) // not dalayed -> face contact
|
||||
orderedTriangleIdx |= TriangleSearchFlags::eFACE_CONTACT;
|
||||
else if (edgeContact)
|
||||
orderedTriangleIdx |= TriangleSearchFlags::eEDGE_CONTACT;
|
||||
|
||||
orderedSphereTriInterm[sphereTriPairOffsetPadded] = orderedTriangleIdx;
|
||||
|
||||
/*sphereTriInterm[sphereTriPairOffset].triAdjTrisIds[0] = triAdjTrisIdx.x;
|
||||
sphereTriInterm[sphereTriPairOffset].triAdjTrisIds[1] = triAdjTrisIdx.y;
|
||||
sphereTriInterm[sphereTriPairOffset].triAdjTrisIds[2] = triAdjTrisIdx.z;*/
|
||||
|
||||
//PxReal* PX_RESTRICT sphereTriMaxDepth = sphereTriMaxDepthPtr[sphereTriPairOffset];
|
||||
PxReal* PX_RESTRICT capsuleTriMaxDepth = *sphereTriMaxDepthPtr;
|
||||
capsuleTriMaxDepth[sphereTriPairOffset] = separation;
|
||||
|
||||
SphereTriNormalAndIndex* PX_RESTRICT sphereTriNI = *sphereTriNIPtr;
|
||||
|
||||
PxU32 delayContactMask = delayContacts ? SphereTriNormalAndIndex::DeferredContactMask : 0;
|
||||
|
||||
//rotate the normal into A space
|
||||
sphereTriNI[sphereTriPairOffset].normal = meshToCapsule.rotate(-patchNormal);
|
||||
sphereTriNI[sphereTriPairOffset].index = delayContactMask + (nbOutputContacts << SphereTriNormalAndIndex::NbContactsShift) + triangleIdx;
|
||||
|
||||
////printf("sphereTriPairOffset %i nbOutputContacts%i \n", sphereTriPairOffset, nbOutputContacts);
|
||||
|
||||
return nbOutputContacts;
|
||||
}
|
||||
|
||||
|
||||
extern "C" __global__
|
||||
//__launch_bounds__(NP_TRIMESH_WARPS_PER_BLOCK * WARP_SIZE, 32 / NP_TRIMESH_WARPS_PER_BLOCK)
|
||||
void convexTrimeshNarrowphase(
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
|
||||
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair, // per CM
|
||||
|
||||
ConvexTriNormalAndIndex** PX_RESTRICT cvxTriNIPtr, // per cvx-tri
|
||||
ConvexTriContacts** PX_RESTRICT cvxTriContactsPtr, // per cvx-tri
|
||||
PxReal** PX_RESTRICT cvxTriMaxDepthPtr, // per cvx-tri
|
||||
ConvexTriIntermediateData** PX_RESTRICT cvxTriIntermPtr, // per cvx-tri
|
||||
PxU32** PX_RESTRICT orderedCvxTriIntermPtr, // per cvx-tri
|
||||
PxU32** PX_RESTRICT cvxTriSecondPassPairsPtr, // per cvx-tri
|
||||
|
||||
PxU8* PX_RESTRICT stackPtr,
|
||||
PxU32* PX_RESTRICT nbPairsGlobal,
|
||||
PxU32* PX_RESTRICT nbPaddedPairsGlobal,
|
||||
PxU32* PX_RESTRICT nbSecondPassPairs,
|
||||
const PxU32 stackSizeBytes,
|
||||
ConvexTriContact* tempConvexTriContacts,
|
||||
PxU32* pTempContactIndex,
|
||||
PxU32* maxTempMemRequirement,
|
||||
PxU32* midPhasePairsNeeded,
|
||||
const PxU32 nbContactManagers
|
||||
)
|
||||
{
|
||||
__shared__ ConvexTriNormalAndIndex* sCvxTriNIPtr;
|
||||
__shared__ ConvexTriContacts* sCvxTriContactsPtr;
|
||||
__shared__ PxReal* sCvxTriMaxDepthPtr;
|
||||
__shared__ ConvexTriIntermediateData* sCvxTriIntermPtr;
|
||||
__shared__ PxU32* sOrderedCvxTriIntermPtr;
|
||||
__shared__ uint4* sPairsGPU;
|
||||
__shared__ PxU32* sCvxTriSecondPassPairsPtr;
|
||||
|
||||
const PxU32 warpsPerBlock = NP_TRIMESH_WARPS_PER_BLOCK;
|
||||
|
||||
__shared__ volatile PxU32 sharedMem[warpsPerBlock][WARP_SIZE * 16];
|
||||
volatile PxU32* s_WarpSharedMemory = sharedMem[threadIdx.y];
|
||||
|
||||
// we don't need to check overflow here anymore, the midphase checks it.
|
||||
const PxU32 nbPairs = *nbPairsGlobal;
|
||||
const PxU32 nbPaddedPairs = *nbPaddedPairsGlobal;
|
||||
|
||||
//each block assign the corresponding ptr from the stack memory to sCvxTriNIPtr, sCvxTriContactPtr and sCvxTriMaxDepthPtr
|
||||
if (threadIdx.x == 0 && threadIdx.y == 0)
|
||||
{
|
||||
midphaseAllocate(&sCvxTriNIPtr, &sCvxTriContactsPtr, &sCvxTriMaxDepthPtr, &sCvxTriIntermPtr, &sOrderedCvxTriIntermPtr, &sCvxTriSecondPassPairsPtr, &sPairsGPU, stackPtr, nbPairs, nbPaddedPairs);
|
||||
if (blockIdx.x == 0)
|
||||
{
|
||||
atomicMax(maxTempMemRequirement, calculateConvexMeshPairMemRequirement() * (*midPhasePairsNeeded) + calculateAdditionalPadding(nbContactManagers));
|
||||
}
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
// the first block writes back the pointers to global memory.
|
||||
if (threadIdx.x == 0 && threadIdx.y == 0 && blockIdx.x == 0)
|
||||
{
|
||||
*cvxTriNIPtr = sCvxTriNIPtr;
|
||||
*cvxTriContactsPtr = sCvxTriContactsPtr;
|
||||
*cvxTriMaxDepthPtr = sCvxTriMaxDepthPtr;
|
||||
*cvxTriIntermPtr = sCvxTriIntermPtr;
|
||||
*orderedCvxTriIntermPtr = sOrderedCvxTriIntermPtr;
|
||||
|
||||
*cvxTriSecondPassPairsPtr = sCvxTriSecondPassPairsPtr;
|
||||
}
|
||||
|
||||
// 1 warp deals with 1 convexMeshPair - so 1 convex against 1 mesh triangle.
|
||||
for (PxU32 globalWarpIndex = blockIdx.x * blockDim.y + threadIdx.y; globalWarpIndex < nbPairs; globalWarpIndex += gridDim.x * blockDim.y)
|
||||
{
|
||||
uint4 curPair = sPairsGPU[globalWarpIndex];
|
||||
const PxU32 cmIdx = curPair.x;
|
||||
|
||||
PxgContactManagerInput npWorkItem;
|
||||
PxgContactManagerInput_ReadWarp(npWorkItem, cmInputs, cmIdx);
|
||||
|
||||
PxU32 transformCacheRef0 = npWorkItem.transformCacheRef0;
|
||||
PxU32 transformCacheRef1 = npWorkItem.transformCacheRef1;
|
||||
PxU32 shapeRef0 = npWorkItem.shapeRef0;
|
||||
PxU32 shapeRef1 = npWorkItem.shapeRef1;
|
||||
|
||||
__shared__ __align__(16) char sShape0[sizeof(PxgShape) * NP_TRIMESH_WARPS_PER_BLOCK];
|
||||
__shared__ __align__(16) char sShape1[sizeof(PxgShape) * NP_TRIMESH_WARPS_PER_BLOCK];
|
||||
|
||||
PxgShape* shape0 = reinterpret_cast<PxgShape*>(sShape0);
|
||||
PxgShape* shape1 = reinterpret_cast<PxgShape*>(sShape1);
|
||||
|
||||
if (threadIdx.x < sizeof(PxgShape) / sizeof(PxU32))
|
||||
{
|
||||
reinterpret_cast<PxU32*>(&shape0[threadIdx.y])[threadIdx.x] = reinterpret_cast<const PxU32*>(gpuShapes + shapeRef0)[threadIdx.x];
|
||||
reinterpret_cast<PxU32*>(&shape1[threadIdx.y])[threadIdx.x] = reinterpret_cast<const PxU32*>(gpuShapes + shapeRef1)[threadIdx.x];
|
||||
}
|
||||
|
||||
PxgShape* shape = &shape0[threadIdx.y];
|
||||
PxgShape* trimeshShape = &shape1[threadIdx.y];
|
||||
|
||||
__syncwarp();
|
||||
|
||||
bool flip = shape0[threadIdx.y].type == PxGeometryType::eTRIANGLEMESH;
|
||||
|
||||
if (flip)
|
||||
{
|
||||
PxSwap(transformCacheRef0, transformCacheRef1);
|
||||
PxSwap(shapeRef0, shapeRef1);
|
||||
|
||||
shape = &shape1[threadIdx.y];
|
||||
trimeshShape = &shape0[threadIdx.y];
|
||||
}
|
||||
|
||||
convexTrimeshNarrowphaseCore(
|
||||
globalWarpIndex,
|
||||
*shape,
|
||||
*trimeshShape,
|
||||
transformCacheRef0,
|
||||
transformCacheRef1,
|
||||
curPair,
|
||||
transformCache,
|
||||
contactDistance,
|
||||
gpuShapes,
|
||||
|
||||
cvxTrimeshPair,
|
||||
&sCvxTriNIPtr,
|
||||
&sCvxTriContactsPtr,
|
||||
&sCvxTriMaxDepthPtr,
|
||||
&sCvxTriIntermPtr,
|
||||
&sOrderedCvxTriIntermPtr,
|
||||
&sCvxTriSecondPassPairsPtr,
|
||||
|
||||
sPairsGPU,
|
||||
nbPairsGlobal,
|
||||
nbSecondPassPairs,
|
||||
s_WarpSharedMemory,
|
||||
tempConvexTriContacts,
|
||||
stackSizeBytes,
|
||||
pTempContactIndex
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__
|
||||
void sphereTrimeshNarrowphase(
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
|
||||
SphereMeshPair* PX_RESTRICT sphereTrimeshPair, // per CM
|
||||
|
||||
SphereTriNormalAndIndex** PX_RESTRICT sphereTriNIPtr, // per sphere-tri
|
||||
SphereTriContacts** PX_RESTRICT sphereTriContactsPtr, // per sphere-tri
|
||||
PxReal** PX_RESTRICT sphereTriMaxDepthPtr, // per sphere-tri
|
||||
SphereTriIntermediateData** PX_RESTRICT sphereTriIntermPtr, // per sphere-tri
|
||||
PxU32** PX_RESTRICT orderedSphereTriIntermPtr, // per sphere-tri
|
||||
PxU32** PX_RESTRICT sphereTriSecondPassPairsPtr, // per sphere-tri
|
||||
|
||||
PxU8* PX_RESTRICT stackPtr,
|
||||
PxU32* PX_RESTRICT nbPairsGlobal,
|
||||
PxU32* PX_RESTRICT nbPaddedPairsGlobal,
|
||||
PxU32* PX_RESTRICT nbSecondPassPairs,
|
||||
const PxU32 stackSizeBytes,
|
||||
ConvexTriContact* tempConvexTriContacts,
|
||||
PxU32* pTempContactIndex,
|
||||
PxU32* maxTempMemRequirement,
|
||||
PxU32* midphasePairsNeeded,
|
||||
const PxU32 nbContactManagers
|
||||
)
|
||||
{
|
||||
|
||||
//the pointer is shared memory. However, the memory we are writing to is in global memory
|
||||
__shared__ SphereTriNormalAndIndex* sSphereTriNIPtr;
|
||||
__shared__ SphereTriContacts* sSphereTriContactsPtr;
|
||||
__shared__ PxReal* sSphereTriMaxDepthPtr;
|
||||
__shared__ SphereTriIntermediateData* sSphereTriIntermPtr;
|
||||
__shared__ PxU32* sOrderedSphereTriIntermPtr;
|
||||
__shared__ uint4* sPairsGPU;
|
||||
__shared__ PxU32* sSphereTriSecondPassPairsPtr;
|
||||
|
||||
__shared__ char sContacts[sizeof(PxVec3) * 128];
|
||||
PxVec3* shContacts = reinterpret_cast<PxVec3*>(sContacts);
|
||||
__shared__ PxReal shSeparations[128];
|
||||
|
||||
const PxU32 nbPairs = *nbPairsGlobal;
|
||||
const PxU32 nbPaddedPairs = *nbPaddedPairsGlobal;
|
||||
|
||||
if (threadIdx.x == 0 && threadIdx.y == 0)
|
||||
{
|
||||
midphaseAllocate(&sSphereTriNIPtr, &sSphereTriContactsPtr, &sSphereTriMaxDepthPtr, &sSphereTriIntermPtr, &sOrderedSphereTriIntermPtr, &sSphereTriSecondPassPairsPtr, &sPairsGPU, stackPtr, nbPairs, nbPaddedPairs);
|
||||
if (blockIdx.x == 0)
|
||||
{
|
||||
atomicMax(maxTempMemRequirement, calculateConvexMeshPairMemRequirement() * (*midphasePairsNeeded) + calculateAdditionalPadding(nbContactManagers));
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
|
||||
if (threadIdx.x == 0 && threadIdx.y == 0 && blockIdx.x == 0)
|
||||
{
|
||||
*sphereTriNIPtr = sSphereTriNIPtr;
|
||||
*sphereTriContactsPtr = sSphereTriContactsPtr;
|
||||
*sphereTriMaxDepthPtr = sSphereTriMaxDepthPtr;
|
||||
*sphereTriIntermPtr = sSphereTriIntermPtr;
|
||||
*orderedSphereTriIntermPtr = sOrderedSphereTriIntermPtr;
|
||||
*sphereTriSecondPassPairsPtr = sSphereTriSecondPassPairsPtr;
|
||||
}
|
||||
|
||||
//each thread deal with
|
||||
//for (PxU32 globalThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; globalThreadIndex < nbPairs; globalThreadIndex += gridDim.x * blockDim.x)
|
||||
for (PxU32 i = 0; i < nbPairs; i += gridDim.x * blockDim.x)
|
||||
{
|
||||
PxU32 globalThreadIndex = blockIdx.x * blockDim.x + threadIdx.x + i;
|
||||
|
||||
PxU32 nbContacts = 0;
|
||||
PxU32 outMask = 0;
|
||||
uint4 curPair;
|
||||
|
||||
if (globalThreadIndex < nbPairs)
|
||||
{
|
||||
|
||||
//printf("globalThreadIndex %i nbPairs %i\n", globalThreadIndex, nbPairs);
|
||||
// Pair, stackPtr in convexMeshMidphase kernel
|
||||
curPair = sPairsGPU[globalThreadIndex];
|
||||
|
||||
const PxU32 cmIdx = curPair.x;
|
||||
|
||||
PxgContactManagerInput npWorkItem = cmInputs[cmIdx];
|
||||
|
||||
|
||||
const PxgShape* shape0 = &gpuShapes[npWorkItem.shapeRef0];
|
||||
const PxgShape* trimeshShape = &gpuShapes[npWorkItem.shapeRef1];
|
||||
PxGeometryType::Enum type0 = PxGeometryType::Enum(shape0->type);
|
||||
|
||||
|
||||
bool flip = type0 == PxGeometryType::eTRIANGLEMESH;
|
||||
|
||||
if (flip)
|
||||
{
|
||||
PxSwap(npWorkItem.shapeRef0, npWorkItem.shapeRef1);
|
||||
PxSwap(npWorkItem.transformCacheRef0, npWorkItem.transformCacheRef1);
|
||||
|
||||
PxSwap(shape0, trimeshShape);
|
||||
|
||||
type0 = PxGeometryType::Enum(shape0->type);
|
||||
}
|
||||
|
||||
if (type0 == PxGeometryType::eSPHERE)
|
||||
{
|
||||
nbContacts = sphereTrimeshNarrowphaseCore(
|
||||
globalThreadIndex,
|
||||
|
||||
npWorkItem,
|
||||
*shape0,
|
||||
*trimeshShape,
|
||||
curPair,
|
||||
transformCache,
|
||||
contactDistance,
|
||||
sphereTrimeshPair,
|
||||
&sSphereTriNIPtr,
|
||||
&sSphereTriContactsPtr,
|
||||
&sSphereTriMaxDepthPtr,
|
||||
&sSphereTriIntermPtr,
|
||||
&sOrderedSphereTriIntermPtr,
|
||||
&sSphereTriSecondPassPairsPtr,
|
||||
nbPairsGlobal,
|
||||
nbSecondPassPairs,
|
||||
shContacts[threadIdx.x*2],
|
||||
shSeparations[threadIdx.x*2],
|
||||
outMask
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
nbContacts = capsuleTrimeshNarrowphaseCore(
|
||||
globalThreadIndex,
|
||||
npWorkItem,
|
||||
*shape0,
|
||||
*trimeshShape,
|
||||
curPair,
|
||||
transformCache,
|
||||
contactDistance,
|
||||
sphereTrimeshPair,
|
||||
&sSphereTriNIPtr,
|
||||
&sSphereTriContactsPtr,
|
||||
&sSphereTriMaxDepthPtr,
|
||||
&sSphereTriIntermPtr,
|
||||
&sOrderedSphereTriIntermPtr,
|
||||
&sSphereTriSecondPassPairsPtr,
|
||||
nbSecondPassPairs,
|
||||
&shContacts[threadIdx.x * 2],
|
||||
&shSeparations[threadIdx.x * 2],
|
||||
outMask
|
||||
);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
PxU32 outMaskWriteMask = __ballot_sync(FULL_MASK, outMask);
|
||||
PxU32 count = __popc(outMaskWriteMask);
|
||||
PxU32 startIndex = 0;
|
||||
|
||||
const PxU32 threadIndexInWarp = threadIdx.x & 31;
|
||||
|
||||
//Now output contacts!!!!
|
||||
|
||||
PxU32 inclusiveSum = warpScan<AddOpPxU32, PxU32>(FULL_MASK, nbContacts);
|
||||
|
||||
PxU32 totalContacts = __shfl_sync(FULL_MASK, inclusiveSum, 31);
|
||||
|
||||
if (totalContacts)
|
||||
{
|
||||
if (count)
|
||||
{
|
||||
if (threadIndexInWarp == 0)
|
||||
startIndex = atomicAdd(nbSecondPassPairs, count);
|
||||
|
||||
startIndex = __shfl_sync(FULL_MASK, startIndex, 0);
|
||||
|
||||
if (outMask)
|
||||
{
|
||||
PxU32 offset = warpScanExclusive(outMaskWriteMask, threadIndexInWarp);
|
||||
sSphereTriSecondPassPairsPtr[startIndex + offset] = outMask;
|
||||
}
|
||||
}
|
||||
|
||||
if (threadIndexInWarp == 31)
|
||||
startIndex = atomicAdd(pTempContactIndex, inclusiveSum);
|
||||
|
||||
startIndex = __shfl_sync(FULL_MASK, startIndex, 31);
|
||||
|
||||
startIndex += inclusiveSum - nbContacts;
|
||||
|
||||
for (PxU32 i = 0, idx = 2*threadIdx.x; i < nbContacts; ++i, idx++)
|
||||
{
|
||||
tempConvexTriContacts[startIndex + i].contact_sepW = make_float4(shContacts[idx].x, shContacts[idx].y,
|
||||
shContacts[idx].z, shSeparations[idx]);
|
||||
}
|
||||
|
||||
if (nbContacts)
|
||||
{
|
||||
PxU32 cmIdx = curPair.x;
|
||||
PxU32 testOffset = curPair.z;
|
||||
|
||||
SphereMeshPair& pair = sphereTrimeshPair[cmIdx];
|
||||
PxU32 sphereTriPairOffset = pair.startIndex + testOffset;
|
||||
|
||||
sSphereTriContactsPtr[sphereTriPairOffset].index = startIndex;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
379
engine/third_party/physx/source/gpunarrowphase/src/CUDA/convexMeshCorrelate.cu
vendored
Normal file
379
engine/third_party/physx/source/gpunarrowphase/src/CUDA/convexMeshCorrelate.cu
vendored
Normal file
@@ -0,0 +1,379 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "foundation/PxMath.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxTransform.h"
|
||||
|
||||
#include "AlignedTransform.h"
|
||||
|
||||
#include "PxgContactManager.h"
|
||||
#include "PxgNpKernelIndices.h"
|
||||
#include "PxgPersistentContactManifold.h"
|
||||
#include "PxsContactManagerState.h"
|
||||
|
||||
#include "PxgCommonDefines.h"
|
||||
#include "reduction.cuh"
|
||||
#include "contactReduction.cuh"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
extern "C" __host__ void initNarrowphaseKernels3() {}
|
||||
|
||||
#include "manifold.cuh"
|
||||
#include "nputils.cuh"
|
||||
|
||||
|
||||
#include "cudaNpCommon.h"
|
||||
#include "convexNpCommon.h"
|
||||
|
||||
|
||||
//This method compacts the depthBuffer + nicBuffer to only include the set of triangles that actually have contacts.
|
||||
//These then must be sorted based on depth values to construct patches, then appended and then output
|
||||
__device__ PX_FORCE_INLINE
|
||||
int purgeEmptyTriangles(const PxU32 syncMask, PxReal* depthBuffer, ConvexTriNormalAndIndex* nicBuffer, int count, PxU32* tempCounts)
|
||||
{
|
||||
const int tI = threadIdx.x;
|
||||
|
||||
PxU32 nbTriangles = 0;
|
||||
|
||||
int loaded = 0;
|
||||
for (; loaded < count; loaded += 32)
|
||||
{
|
||||
int nbToLoad = PxMin(count - loaded, 32);
|
||||
|
||||
PxReal d = tI < nbToLoad ? depthBuffer[loaded + tI] : FLT_MAX;
|
||||
PxU32 nbContacts = nbToLoad ? ConvexTriNormalAndIndex::getNbContacts(nicBuffer[loaded + tI].index) : 0;
|
||||
int valid = d != FLT_MAX && nbContacts > 0;
|
||||
int validBits = __ballot_sync(syncMask, valid);
|
||||
int dest = __popc(validBits & ((1 << tI) - 1)) + nbTriangles;
|
||||
|
||||
if (valid)
|
||||
{
|
||||
depthBuffer[dest] = d;
|
||||
tempCounts[dest] = ((loaded + tI) << 4) | nbContacts;
|
||||
nicBuffer[dest] = nicBuffer[loaded + tI];
|
||||
}
|
||||
|
||||
nbTriangles += __popc(validBits);
|
||||
}
|
||||
|
||||
return nbTriangles;
|
||||
}
|
||||
|
||||
struct Scratch
|
||||
{
|
||||
volatile ConvexMeshPair pair;
|
||||
int pairIndex;
|
||||
};
|
||||
|
||||
__device__
|
||||
void correlate(
|
||||
const ConvexMeshPair* PX_RESTRICT meshPairBuffer,
|
||||
ConvexTriNormalAndIndex ** PX_RESTRICT nicBufferPtr,
|
||||
const ConvexTriContacts ** PX_RESTRICT contactBufferPtr,
|
||||
PxReal ** PX_RESTRICT depthBufferPtr,
|
||||
PxU32* PX_RESTRICT tempCounts,
|
||||
Scratch* PX_RESTRICT s,
|
||||
PxgPersistentContactMultiManifold* PX_RESTRICT outBuffer,
|
||||
PxsContactManagerOutput* PX_RESTRICT cmOutputs,
|
||||
const PxU32 numPairs,
|
||||
const PxReal clusterBias,
|
||||
ConvexTriContact* PX_RESTRICT tempConvexTriContacts,
|
||||
PxU32* PX_RESTRICT pTempContactIndex
|
||||
)
|
||||
{
|
||||
__shared__ PxU32 triangleRunSums[CORRELATE_WARPS_PER_BLOCK][WARP_SIZE];
|
||||
__shared__ PxU32 startIndices[CORRELATE_WARPS_PER_BLOCK][WARP_SIZE];
|
||||
__shared__ PxU32 triIndices[CORRELATE_WARPS_PER_BLOCK][WARP_SIZE];
|
||||
|
||||
PxU32* triRunSums = triangleRunSums[threadIdx.y];
|
||||
PxU32* triStartIndex = startIndices[threadIdx.y];
|
||||
PxU32* triInds = triIndices[threadIdx.y];
|
||||
|
||||
const int pairIndex = threadIdx.y + CORRELATE_WARPS_PER_BLOCK * blockIdx.x;
|
||||
|
||||
if (pairIndex >= numPairs)
|
||||
return;
|
||||
|
||||
s->pairIndex = pairIndex;
|
||||
const int tI = threadIdx.x;
|
||||
if (tI < sizeof(ConvexMeshPair) / 4)
|
||||
((int*)(&s->pair))[tI] = ((int*)(meshPairBuffer + pairIndex))[tI];
|
||||
|
||||
__syncwarp();
|
||||
|
||||
int start = s->pair.startIndex, count = s->pair.count;
|
||||
|
||||
if (count == CONVEX_TRIMESH_CACHED)
|
||||
{
|
||||
// We've hit cache, nothing to do here
|
||||
return;
|
||||
}
|
||||
|
||||
ConvexTriNormalAndIndex* PX_RESTRICT nicBuffer = *nicBufferPtr;
|
||||
const ConvexTriContacts* PX_RESTRICT contactBuffer = *contactBufferPtr;
|
||||
PxReal* PX_RESTRICT depthBuffer = *depthBufferPtr;
|
||||
|
||||
|
||||
PxU32 nbTriangles = purgeEmptyTriangles(FULL_MASK, depthBuffer + start, nicBuffer + start, count, tempCounts + start);
|
||||
|
||||
__syncwarp();
|
||||
|
||||
|
||||
if (nbTriangles == 0)
|
||||
{
|
||||
outBuffer[pairIndex].mNbManifolds = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
//Now, we loop creating patches and contacts from the triangles that actually have contacts!
|
||||
|
||||
PxU32 nbPatches = 0;
|
||||
PxU32 numContactsTotal = 0;
|
||||
|
||||
PxU32 remainingTriangles = nbTriangles;
|
||||
|
||||
|
||||
|
||||
for (; nbPatches < MULTIMANIFOLD_MAX_MANIFOLDS && remainingTriangles != 0; nbPatches++)
|
||||
{
|
||||
//We loop through all triangles, load contacts, compress etc...
|
||||
//(1) Find deepest triangle...
|
||||
|
||||
|
||||
int bestIndex = 0;
|
||||
PxReal bestDepth = PX_MAX_F32;
|
||||
for (PxU32 i = threadIdx.x; i < nbTriangles; i += 32)
|
||||
{
|
||||
if (i < nbTriangles)
|
||||
{
|
||||
if (tempCounts[start + i]) //Still has contacts!
|
||||
{
|
||||
PxReal d = depthBuffer[start + i];
|
||||
if (d < bestDepth)
|
||||
{
|
||||
bestDepth = d;
|
||||
bestIndex = start + i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int i0 = minIndex(bestDepth, FULL_MASK, bestDepth);
|
||||
|
||||
bestIndex = __shfl_sync(FULL_MASK, bestIndex, i0);
|
||||
|
||||
//Now we know which triangle defines this plane...
|
||||
|
||||
PxVec3 bestNormal = nicBuffer[bestIndex].normal;
|
||||
|
||||
PxU32 currentContactMask = 0;
|
||||
|
||||
|
||||
PxVec3 pA(0.f);
|
||||
PxReal separation = 0.f;
|
||||
PxU32 contactTriangleIndex = 0;
|
||||
bool hasContact = false;
|
||||
|
||||
for (PxU32 i = 0; i < nbTriangles; i += 32)
|
||||
{
|
||||
PxVec3 normal(0.f);
|
||||
PxU32 triIndex = 0;
|
||||
PxU32 nbContacts = 0;
|
||||
PxU32 startIndex = 0;
|
||||
|
||||
if ((i + threadIdx.x) < nbTriangles)
|
||||
{
|
||||
PxU32 index = tempCounts[start + i + threadIdx.x];
|
||||
//We read contacts from the temp contacts buffer because this will get cleared when the contacts are considered
|
||||
//for a patch...
|
||||
nbContacts = index & 7;
|
||||
startIndex = index >> 4;
|
||||
if (nbContacts)
|
||||
{
|
||||
ConvexTriNormalAndIndex nic = nicBuffer[start + i + threadIdx.x];
|
||||
normal = nic.normal;
|
||||
triIndex = ConvexTriNormalAndIndex::getTriangleIndex(nic.index);
|
||||
}
|
||||
}
|
||||
|
||||
bool accept = normal.dot(bestNormal) > PATCH_ACCEPTANCE_EPS;
|
||||
|
||||
if(!accept)
|
||||
nbContacts = 0;
|
||||
|
||||
|
||||
PxU32 acceptMask = __ballot_sync(FULL_MASK, accept);
|
||||
|
||||
remainingTriangles -= __popc(acceptMask);
|
||||
|
||||
PxU32 summedContacts = warpScan<AddOpPxU32, PxU32>(FULL_MASK, nbContacts);
|
||||
|
||||
triRunSums[threadIdx.x] = summedContacts - nbContacts;
|
||||
triStartIndex[threadIdx.x] = startIndex;
|
||||
triInds[threadIdx.x] = triIndex;
|
||||
|
||||
PxU32 totalContacts = __shfl_sync(FULL_MASK, summedContacts, 31);
|
||||
__syncwarp();
|
||||
|
||||
for (PxU32 c = 0; c < totalContacts;)
|
||||
{
|
||||
//calculate my read index (where I'm offset from...)
|
||||
PxU32 readMask = ~currentContactMask;
|
||||
bool needsContact = readMask & (1 << threadIdx.x);
|
||||
PxU32 readIndex = c + warpScanExclusive(readMask, threadIdx.x);
|
||||
|
||||
if (needsContact && readIndex < totalContacts)
|
||||
{
|
||||
PxU32 idx = binarySearch<PxU32>(triRunSums, 32u, readIndex);
|
||||
|
||||
PxU32 offset = readIndex - triRunSums[idx];
|
||||
|
||||
PxU32 startIdx = triStartIndex[idx] + start;
|
||||
|
||||
const PxU32 contactStartIdx = contactBuffer[startIdx].index;
|
||||
|
||||
|
||||
float4 v = tempConvexTriContacts[contactStartIdx + offset].contact_sepW;
|
||||
|
||||
pA = PxVec3(v.x, v.y, v.z);
|
||||
separation = v.w;
|
||||
contactTriangleIndex = triInds[idx];
|
||||
hasContact = true;
|
||||
}
|
||||
|
||||
currentContactMask = __ballot_sync(FULL_MASK, hasContact); //Mask every thread that has a contact!
|
||||
|
||||
c += __popc(readMask);
|
||||
PxU32 nbContacts = __popc(currentContactMask);
|
||||
|
||||
if (nbContacts > SUBMANIFOLD_MAX_CONTACTS)
|
||||
{
|
||||
currentContactMask = contactReduce<true, false, SUBMANIFOLD_MAX_CONTACTS, false>(pA, separation, bestNormal, currentContactMask, clusterBias);
|
||||
}
|
||||
hasContact = currentContactMask & (1 << threadIdx.x);
|
||||
}
|
||||
|
||||
__syncwarp(); //triRunSums is read and written in the same loop - separate read and write with syncs
|
||||
|
||||
if (accept)
|
||||
{
|
||||
tempCounts[start + i + threadIdx.x] = 0; //Clear this triangle - it is done!
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
__syncwarp();
|
||||
PxU32 nbContacts = __popc(currentContactMask);
|
||||
|
||||
//When we reach here, we have handled all the triangles, so we should output this patch...
|
||||
PxgContact * cp = outBuffer[s->pair.cmIndex].mContacts[nbPatches] + warpScanExclusive(currentContactMask, tI);
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
outBuffer[s->pair.cmIndex].mNbContacts[nbPatches] = nbContacts;
|
||||
|
||||
numContactsTotal += nbContacts;
|
||||
if (currentContactMask & (1 << tI))
|
||||
{
|
||||
PxVec3 nor = ldS(s->pair.aToB).rotate(bestNormal);
|
||||
|
||||
cp->normal = -nor;
|
||||
cp->pointA = pA;
|
||||
// TODO: PxTransform::transform is incredibly profligate with registers (i.e. the write-out phase uses
|
||||
// more regs than the contact culling phase). Better would be to stash the transform in matrix form
|
||||
// when not under register pressure (i.e. when initially loading it) then use a custom shmem
|
||||
// matrix multiply, which requires essentially no tmps
|
||||
|
||||
PxVec3 pointB = ldS(s->pair.aToB).transform(pA) + nor * separation;
|
||||
|
||||
cp->pointB = pointB;
|
||||
cp->penetration = separation;
|
||||
cp->triIndex = contactTriangleIndex;
|
||||
|
||||
/*printf("cp normal(%f, %f, %f)\n", cp->normal.x, cp->normal.y, cp->normal.z);
|
||||
printf("cp pointA(%f, %f, %f)\n", pA.x, pA.y, pA.z);
|
||||
printf("cp pointB(%f, %f, %f)\n", pointB.x, pointB.y, pointB.z);*/
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (nbPatches > 0)
|
||||
{
|
||||
// TODO avoroshilov: decide what to do with aligned transforms
|
||||
PxAlignedTransform aToB_aligned;// = s->pair.aToB;
|
||||
aToB_aligned.p.x = s->pair.aToB.p.x;
|
||||
aToB_aligned.p.y = s->pair.aToB.p.y;
|
||||
aToB_aligned.p.z = s->pair.aToB.p.z;
|
||||
|
||||
aToB_aligned.q.q.x = s->pair.aToB.q.x;
|
||||
aToB_aligned.q.q.y = s->pair.aToB.q.y;
|
||||
aToB_aligned.q.q.z = s->pair.aToB.q.z;
|
||||
aToB_aligned.q.q.w = s->pair.aToB.q.w;
|
||||
|
||||
PxAlignedTransform_WriteWarp(&outBuffer[s->pair.cmIndex].mRelativeTransform, aToB_aligned);
|
||||
}
|
||||
|
||||
outBuffer[s->pair.cmIndex].mNbManifolds = nbPatches;
|
||||
}
|
||||
|
||||
|
||||
extern "C" __global__
|
||||
void convexTrimeshCorrelate(
|
||||
const ConvexMeshPair * PX_RESTRICT pairs,
|
||||
PxReal ** PX_RESTRICT depthsPtr,
|
||||
ConvexTriNormalAndIndex ** PX_RESTRICT normalAndIndexPtr,
|
||||
const ConvexTriContacts ** PX_RESTRICT contactsPtr,
|
||||
|
||||
PxgPersistentContactMultiManifold * PX_RESTRICT output,
|
||||
PxsContactManagerOutput * PX_RESTRICT cmOutputs,
|
||||
PxU32* PX_RESTRICT tempCountAndOffsetBuffer,
|
||||
const PxU32 numPairs,
|
||||
const PxReal clusterBias,
|
||||
ConvexTriContact* PX_RESTRICT tempConvexTriContacts,
|
||||
PxU32* pTempContactIndex
|
||||
)
|
||||
{
|
||||
const int warpIndex = threadIdx.y;
|
||||
__shared__ char scratch[sizeof(Scratch) * CORRELATE_WARPS_PER_BLOCK];
|
||||
correlate(
|
||||
pairs,
|
||||
normalAndIndexPtr,
|
||||
contactsPtr,
|
||||
depthsPtr,
|
||||
tempCountAndOffsetBuffer,
|
||||
((Scratch*)&scratch) + warpIndex,
|
||||
output,
|
||||
cmOutputs,
|
||||
numPairs,
|
||||
clusterBias,
|
||||
tempConvexTriContacts,
|
||||
pTempContactIndex
|
||||
);
|
||||
}
|
||||
1530
engine/third_party/physx/source/gpunarrowphase/src/CUDA/convexMeshMidphase.cu
vendored
Normal file
1530
engine/third_party/physx/source/gpunarrowphase/src/CUDA/convexMeshMidphase.cu
vendored
Normal file
File diff suppressed because it is too large
Load Diff
347
engine/third_party/physx/source/gpunarrowphase/src/CUDA/convexMeshOutput.cu
vendored
Normal file
347
engine/third_party/physx/source/gpunarrowphase/src/CUDA/convexMeshOutput.cu
vendored
Normal file
@@ -0,0 +1,347 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "foundation/PxAssert.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "PxContact.h"
|
||||
|
||||
#include "PxsMaterialCore.h"
|
||||
#include "PxsContactManagerState.h"
|
||||
#include "PxgPersistentContactManifold.h"
|
||||
#include "PxgConvexConvexShape.h"
|
||||
#include "PxgContactManager.h"
|
||||
#include "cudaNpCommon.h"
|
||||
#include "PxsTransformCache.h"
|
||||
#include "dataReadWriteHelper.cuh"
|
||||
#include "materialCombiner.cuh"
|
||||
#include "geometry/PxGeometry.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
using namespace physx;
|
||||
|
||||
extern "C" __host__ void initNarrowphaseKernels5() {}
|
||||
|
||||
__device__ void writeCompressedContact(
|
||||
PxContactPatch* PX_RESTRICT patches, PxU32 patchWriteIndex, PxU32 contactWriteIndex,
|
||||
const PxU32 nbContacts,
|
||||
const PxVec3 normal,
|
||||
const PxU32 materialIndex0, const PxU32 materialIndex1,
|
||||
const PxsMaterialData* PX_RESTRICT materials
|
||||
)
|
||||
{
|
||||
PxReal staticFriction, dynamicFriction, combinedRestitution, combinedDamping;
|
||||
PxU32 materialFlags;
|
||||
|
||||
combineMaterials(materials, static_cast<PxU16>(materialIndex0), static_cast<PxU16>(materialIndex1),
|
||||
materialFlags, staticFriction, dynamicFriction,
|
||||
combinedRestitution, combinedDamping);
|
||||
|
||||
PxContactPatch& patch = patches[patchWriteIndex];
|
||||
patch.normal = normal;
|
||||
patch.nbContacts = nbContacts;
|
||||
patch.startContactIndex = contactWriteIndex;
|
||||
//KS - we could probably compress this further into the header but the complexity might not be worth it
|
||||
patch.staticFriction = staticFriction;
|
||||
patch.dynamicFriction = dynamicFriction;
|
||||
patch.restitution = combinedRestitution;
|
||||
patch.damping = combinedDamping;
|
||||
patch.materialIndex0 = materialIndex0;
|
||||
patch.materialIndex1 = materialIndex1;
|
||||
patch.materialFlags = static_cast<PxU8>(materialFlags);
|
||||
patch.mMassModification.linear0 = 1.f;
|
||||
patch.mMassModification.linear1 = 1.f;
|
||||
patch.mMassModification.angular0 = 1.f;
|
||||
patch.mMassModification.angular1 = 1.f;
|
||||
patch.internalFlags = PxContactPatch::eHAS_FACE_INDICES;
|
||||
}
|
||||
|
||||
__device__ inline float getAvgContactValue(float val, PxU32 nbContacts)
|
||||
{
|
||||
PxReal avgValue = val;
|
||||
|
||||
#pragma unroll 3
|
||||
for (PxU32 reductionRadius = 4; reductionRadius > 0; reductionRadius >>= 1)
|
||||
{
|
||||
PxReal tmp = __shfl_down_sync(FULL_MASK, avgValue, reductionRadius);
|
||||
|
||||
if (reductionRadius < nbContacts)
|
||||
avgValue += tmp;
|
||||
}
|
||||
|
||||
return avgValue / PxReal(nbContacts);
|
||||
}
|
||||
|
||||
//calculate patch normal by average each normal and transform the normal into world space
|
||||
__device__ static inline PxVec3 getWorldNormal(
|
||||
const PxgContact* contacts, const PxTransform& meshTransform,
|
||||
const PxU32 numContacts, const PxU32 manifoldIndex, const PxU32 threadIndexInWarp, const PxU32 threadIndexInManifold
|
||||
)
|
||||
{
|
||||
PxVec3 normal = (threadIndexInManifold >= numContacts) ? PxVec3(0, 0, 0) : contacts[0].normal;
|
||||
|
||||
//transform into world space
|
||||
PxVec3 unNormalizedNormal = meshTransform.rotate(normal);
|
||||
|
||||
return unNormalizedNormal.getNormalized();
|
||||
}
|
||||
|
||||
//Each blocks has 8 warps and each warps deal with a pair
|
||||
//max number of contacts in each PxgPersistentContactMultiManifold is 24(maxinum 4 single manifold and maxinum 6 contacts in each manifold )
|
||||
extern "C" __global__
|
||||
void convexTrimeshFinishContacts(
|
||||
const ConvexMeshPair * pairs,
|
||||
const PxsCachedTransform * transformCache,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
const PxgContactManagerInput * cmInputs,
|
||||
PxsContactManagerOutput * cmOutputs,
|
||||
const PxgPersistentContactMultiManifold* cmMultiManifold,
|
||||
const PxU32 numPairs,
|
||||
const PxsMaterialData* PX_RESTRICT materials,
|
||||
PxU8* PX_RESTRICT contactStream,//contact and pen
|
||||
PxU8* PX_RESTRICT patchStream, // normal and friction
|
||||
PxU8* PX_RESTRICT forceAndIndiceStream, //triangle(face) index is appended after the froce stream
|
||||
bool insertAveragePoint,
|
||||
PxgPatchAndContactCounters* PX_RESTRICT patchAndContactCounters,
|
||||
PxU32* touchChangeFlags,
|
||||
PxU32* patchChangeFlags,
|
||||
PxU8* startContactPatches,
|
||||
PxU8* startContactPoints,
|
||||
PxU8* startContactForces,
|
||||
PxU32 patchBytesLimit,
|
||||
PxU32 contactBytesLimit,
|
||||
PxU32 forceBytesLimit
|
||||
)
|
||||
{
|
||||
const PxU32 globalThreadIndex = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
|
||||
const PxU32 globalWarpIndex = globalThreadIndex / WARP_SIZE;
|
||||
if(globalWarpIndex >= numPairs)
|
||||
return;
|
||||
|
||||
|
||||
const PxgPersistentContactMultiManifold& multiManifold = cmMultiManifold[globalWarpIndex];
|
||||
PxU32 nbManifolds = multiManifold.mNbManifolds;
|
||||
|
||||
const PxU32 threadIndexInWarp = threadIdx.x & (WARP_SIZE - 1);
|
||||
const PxU32 singleManifoldIndex = threadIndexInWarp / PXG_SUBMANIFOLD_MAX_CONTACTS;
|
||||
const PxU32 threadIndexInManifold = threadIndexInWarp % PXG_SUBMANIFOLD_MAX_CONTACTS;
|
||||
|
||||
const PxU32 numContacts = (singleManifoldIndex >= nbManifolds) ? 0 : multiManifold.mNbContacts[singleManifoldIndex];
|
||||
|
||||
const bool hasContacts = threadIndexInManifold < numContacts;
|
||||
|
||||
PxU32 contactMask = __ballot_sync(FULL_MASK, (PxU32) hasContacts);
|
||||
PxU32 totalNumContacts = __popc(contactMask);
|
||||
|
||||
const PxU32 writeIndex = warpScanExclusive(contactMask, threadIndexInWarp); //tells me how many threads preceding me had contacts (i.e. what my write index will be)
|
||||
|
||||
|
||||
//Each single manifold should be one patch
|
||||
PxU32 patchByteOffset = 0xFFFFFFFF;
|
||||
PxU32 contactByteOffset = 0xFFFFFFFF;
|
||||
PxU32 forceAndIndiceByteOffset = 0xFFFFFFFF;
|
||||
|
||||
PxsContactManagerOutput* output = cmOutputs + globalWarpIndex;
|
||||
|
||||
PxU32 allflags = reinterpret_cast<PxU32*>(&output->allflagsStart)[0];
|
||||
PxU8 oldStatusFlags = u16Low(u32High(allflags));
|
||||
PxU8 statusFlags = oldStatusFlags;
|
||||
|
||||
statusFlags &= (~PxsContactManagerStatusFlag::eTOUCH_KNOWN);
|
||||
|
||||
if (totalNumContacts != 0)
|
||||
statusFlags |= PxsContactManagerStatusFlag::eHAS_TOUCH;
|
||||
else
|
||||
statusFlags |= PxsContactManagerStatusFlag::eHAS_NO_TOUCH;
|
||||
|
||||
PxU8 prevPatches = u16High(u32Low(allflags)); //Get out the current number of patches to store as the previous frame's number of patches
|
||||
|
||||
bool overflow = false;
|
||||
|
||||
|
||||
if (threadIndexInWarp == 0 )
|
||||
{
|
||||
PxU32 nbInsertAveragePoint = insertAveragePoint ? nbManifolds : 0;
|
||||
totalNumContacts = totalNumContacts + nbInsertAveragePoint;
|
||||
|
||||
if (totalNumContacts)
|
||||
{
|
||||
patchByteOffset = atomicAdd(&(patchAndContactCounters->patchesBytes), sizeof(PxContactPatch)*nbManifolds);
|
||||
contactByteOffset = atomicAdd(&(patchAndContactCounters->contactsBytes), sizeof(PxContact) * totalNumContacts);
|
||||
forceAndIndiceByteOffset = atomicAdd(&(patchAndContactCounters->forceAndIndiceBytes), sizeof(PxU32) * totalNumContacts * 2);
|
||||
|
||||
if ((patchByteOffset + sizeof(PxContactPatch) * nbManifolds) > patchBytesLimit)
|
||||
{
|
||||
patchAndContactCounters->setOverflowError(PxgPatchAndContactCounters::PATCH_BUFFER_OVERFLOW);
|
||||
patchByteOffset = 0xFFFFFFFF;
|
||||
overflow = true;
|
||||
}
|
||||
else if ((contactByteOffset + sizeof(PxContact) * totalNumContacts) > contactBytesLimit)
|
||||
{
|
||||
patchAndContactCounters->setOverflowError(PxgPatchAndContactCounters::CONTACT_BUFFER_OVERFLOW);
|
||||
contactByteOffset = 0xFFFFFFFF;
|
||||
overflow = true;
|
||||
}
|
||||
else if ((forceAndIndiceByteOffset + sizeof(PxU32) * totalNumContacts * 2) > forceBytesLimit)
|
||||
{
|
||||
patchAndContactCounters->setOverflowError(PxgPatchAndContactCounters::FORCE_BUFFER_OVERFLOW);
|
||||
forceAndIndiceByteOffset = 0xFFFFFFFF;
|
||||
overflow = true;
|
||||
}
|
||||
|
||||
if (overflow)
|
||||
{
|
||||
nbManifolds = totalNumContacts = 0;
|
||||
|
||||
statusFlags &= (~PxsContactManagerStatusFlag::eTOUCH_KNOWN);
|
||||
statusFlags |= PxsContactManagerStatusFlag::eHAS_NO_TOUCH;
|
||||
}
|
||||
|
||||
//printf("threadIdex0 nbManifolds %i\n", nbManifolds);
|
||||
}
|
||||
|
||||
|
||||
bool previouslyHadTouch = oldStatusFlags & PxsContactManagerStatusFlag::eHAS_TOUCH;
|
||||
bool prevTouchKnown = oldStatusFlags & PxsContactManagerStatusFlag::eTOUCH_KNOWN;
|
||||
bool currentlyHasTouch = nbManifolds != 0;
|
||||
|
||||
const bool change = (previouslyHadTouch ^ currentlyHasTouch) || (!prevTouchKnown);
|
||||
touchChangeFlags[globalWarpIndex] = change;
|
||||
patchChangeFlags[globalWarpIndex] = (prevPatches != nbManifolds);
|
||||
|
||||
assert(totalNumContacts < 100);
|
||||
reinterpret_cast<PxU32*>(&output->allflagsStart)[0] = merge(merge(prevPatches, statusFlags),
|
||||
merge(nbManifolds, PxU8(0)));
|
||||
output->nbContacts = totalNumContacts;
|
||||
|
||||
if (!overflow)
|
||||
{
|
||||
output->contactForces = reinterpret_cast<PxReal*>(startContactForces + forceAndIndiceByteOffset);
|
||||
output->contactPatches = startContactPatches + patchByteOffset;
|
||||
output->contactPoints = startContactPoints + contactByteOffset;
|
||||
}
|
||||
else
|
||||
{
|
||||
output->contactForces = 0;
|
||||
output->contactPatches = 0;
|
||||
output->contactPoints = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if(__any_sync(FULL_MASK, overflow) || nbManifolds == 0)
|
||||
return;
|
||||
|
||||
|
||||
//const uint4 npWorkItem = *(reinterpret_cast<const uint4*>(cmInputs) + globalWarpIndex);
|
||||
|
||||
PxgContactManagerInput npWorkItem;
|
||||
PxgContactManagerInput_ReadWarp(npWorkItem, cmInputs, globalWarpIndex);
|
||||
|
||||
PxU32 shapeRef0 = npWorkItem.shapeRef0;
|
||||
PxU32 shapeRef1 = npWorkItem.shapeRef1;
|
||||
PxU32 transformCacheRef0 = npWorkItem.transformCacheRef0;
|
||||
PxU32 transformCacheRef1 = npWorkItem.transformCacheRef1;
|
||||
|
||||
bool flip = gpuShapes[shapeRef0].type == PxGeometryType::eTRIANGLEMESH;
|
||||
|
||||
if (flip)
|
||||
{
|
||||
PxSwap(shapeRef0, shapeRef1);
|
||||
PxSwap(transformCacheRef0, transformCacheRef1);
|
||||
}
|
||||
|
||||
PxgShape shape0;
|
||||
PxgShape_ReadWarp(shape0, gpuShapes + shapeRef0);
|
||||
|
||||
const uint2 materialIndices = pairs[globalWarpIndex].materialIndices;
|
||||
|
||||
patchByteOffset = __shfl_sync(FULL_MASK, (PxI32) patchByteOffset, 0);
|
||||
contactByteOffset = __shfl_sync(FULL_MASK, (PxI32) contactByteOffset, 0);
|
||||
forceAndIndiceByteOffset = __shfl_sync(FULL_MASK, (PxI32)forceAndIndiceByteOffset, 0);
|
||||
|
||||
PxContactPatch* patches = reinterpret_cast<PxContactPatch*>(patchStream + patchByteOffset);
|
||||
float4* contacts = reinterpret_cast<float4*>(contactStream + contactByteOffset);
|
||||
PxU32* faceIndex = reinterpret_cast<PxU32*>(forceAndIndiceStream + forceAndIndiceByteOffset + totalNumContacts*sizeof(PxU32));
|
||||
|
||||
PxsCachedTransform trimeshTransformCached;
|
||||
PxsCachedTransform_ReadWarp(trimeshTransformCached, transformCache + transformCacheRef1); //w is transformCacheRef1
|
||||
|
||||
PxsCachedTransform sphereTransformCached;
|
||||
PxsCachedTransform_ReadWarp(sphereTransformCached, transformCache + transformCacheRef0); //w is transformCacheRef1
|
||||
|
||||
//calculate patch normal by average each normal and transform the normal into world space, need to optimized
|
||||
const PxVec3 worldNormal = getWorldNormal(&multiManifold.mContacts[singleManifoldIndex][0], trimeshTransformCached.transform, numContacts, singleManifoldIndex, threadIndexInWarp, threadIndexInManifold);
|
||||
|
||||
if(hasContacts)
|
||||
{
|
||||
const PxgContact& point = multiManifold.mContacts[singleManifoldIndex][threadIndexInManifold];
|
||||
|
||||
if(threadIndexInManifold == 0)
|
||||
writeCompressedContact(patches, singleManifoldIndex, writeIndex, numContacts, flip ? -worldNormal : worldNormal, materialIndices.x, materialIndices.y, materials);
|
||||
|
||||
assert(point.pointB.isFinite());
|
||||
|
||||
PxReal pen = point.penetration;
|
||||
PxVec3 worldPt;
|
||||
if (shape0.type == PxGeometryType::eSPHERE)
|
||||
{
|
||||
const PxReal radius = shape0.scale.scale.y;
|
||||
pen = pen - radius;
|
||||
worldPt = sphereTransformCached.transform.p - worldNormal * radius;
|
||||
}
|
||||
else if (shape0.type == PxGeometryType::eCAPSULE)
|
||||
{
|
||||
const PxReal radius = shape0.scale.scale.y;
|
||||
pen = pen - radius;
|
||||
worldPt = sphereTransformCached.transform.transform(point.pointA) - worldNormal * radius;
|
||||
}
|
||||
else
|
||||
{
|
||||
worldPt = trimeshTransformCached.transform.transform(point.pointB);
|
||||
}
|
||||
|
||||
//contacts[writeIndex] = make_float4(point.pointB.x, point.pointB.y, point.pointB.z, -point.penetration);
|
||||
if (contactByteOffset != 0xFFFFFFFF)
|
||||
contacts[writeIndex] = make_float4(worldPt.x, worldPt.y, worldPt.z, pen);
|
||||
|
||||
/*if (threadIndexInWarp == 0)
|
||||
{
|
||||
printf("triIndex %i worldPt(%f, %f, %f)\n", point.triIndex, worldPt.x, worldPt.y, worldPt.z);
|
||||
printf("triIndex %i worldNormal(%f, %f, %f)\n", point.triIndex, worldNormal.x, worldNormal.y, worldNormal.z);
|
||||
printf("triIndex %i pen %f radius %f\n", point.triIndex, pen, shape0.scale.scale.x);
|
||||
PxVec3 p = sphereTransformCached.transform.p;
|
||||
PxQuat q = sphereTransformCached.transform.q;
|
||||
printf("sphereTransform p(%f, %f, %f), q(%f, %f, %f, %f)\n", p.x, p.y, p.z, q.x, q.y, q.z, q.w);
|
||||
}*/
|
||||
|
||||
if(forceAndIndiceByteOffset != 0xFFFFFFFF)
|
||||
faceIndex[writeIndex] = point.triIndex;
|
||||
}
|
||||
}
|
||||
|
||||
649
engine/third_party/physx/source/gpunarrowphase/src/CUDA/convexMeshPostProcess.cu
vendored
Normal file
649
engine/third_party/physx/source/gpunarrowphase/src/CUDA/convexMeshPostProcess.cu
vendored
Normal file
@@ -0,0 +1,649 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "cuda.h"
|
||||
#include "convexNpCommon.h"
|
||||
#include "triangle.cuh"
|
||||
#include "dataReadWriteHelper.cuh"
|
||||
#include "RadixSort.cuh"
|
||||
#include "reduction.cuh"
|
||||
#include "geometry/PxGeometry.h"
|
||||
#include "heightfieldUtil.cuh"
|
||||
#include "convexTriangle.cuh"
|
||||
#include "assert.h"
|
||||
#include "stdio.h"
|
||||
#include "PxgNpKernelIndices.h"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
extern "C" __host__ void initNarrowphaseKernels8() {}
|
||||
|
||||
////Based on edge edge checks
|
||||
//__device__ void convexTrimeshPostProcess(
|
||||
// ConvexMeshPair* PX_RESTRICT cvxTrimeshPair, // per CM
|
||||
// ConvexTriNormalAndIndex** PX_RESTRICT cvxTriNIPtr, // per cvx-tri
|
||||
// ConvexTriEdgeBuffer** PX_RESTRICT cvxTriEdgeBufferPtr, // per cvx-tri
|
||||
// PxU32** PX_RESTRICT cvxTriSecondPassPairsPtr, // per cvx-tri
|
||||
// const uint4* PX_RESTRICT pairsGPU,
|
||||
// PxU32* PX_RESTRICT nbSecondPassPairs,
|
||||
// const PxU32 globalWarpIndex)
|
||||
//{
|
||||
// PxU32* cvxTriSecondPassPairs = *cvxTriSecondPassPairsPtr;
|
||||
// ConvexTriNormalAndIndex* cvxTriNI = *cvxTriNIPtr;
|
||||
//
|
||||
// const PxU32 pairIdx = cvxTriSecondPassPairs[globalWarpIndex];
|
||||
//
|
||||
// uint4 curPair = pairsGPU[pairIdx];
|
||||
//
|
||||
// PxU32 cmIdx = curPair.x;
|
||||
// PxU32 testOffset = curPair.z;
|
||||
//
|
||||
// ConvexMeshPair& pair = cvxTrimeshPair[cmIdx];
|
||||
// const PxU32 nbPairsPerCM = pair.count;
|
||||
// const PxU32 pairStartIndex = pair.startIndex;
|
||||
// PxU32 convexTriPairOffset = pairStartIndex + testOffset;
|
||||
//
|
||||
// ConvexTriNormalAndIndex& cvxni = cvxTriNI[convexTriPairOffset];
|
||||
// //get the number of contacts
|
||||
// const PxU32 nbContacts = cvxni.index >> 28;
|
||||
//
|
||||
// if (nbContacts == 0)
|
||||
// return;
|
||||
//
|
||||
// ConvexTriEdgeBuffer* cvxTriEdgeBuffer = *cvxTriEdgeBufferPtr;
|
||||
//
|
||||
// ConvexTriEdgeBuffer& cvxTriEdge = cvxTriEdgeBuffer[convexTriPairOffset];
|
||||
//
|
||||
// //load the verts into shared memory
|
||||
// __shared__ int sVertsIds[2][3];
|
||||
//
|
||||
// if (threadIdx.x < 3)
|
||||
// {
|
||||
// sVertsIds[threadIdx.y][threadIdx.x] = cvxTriEdge.triVertexIds[threadIdx.x];
|
||||
// }
|
||||
//
|
||||
// __threadfence_block();
|
||||
//
|
||||
// bool foundTriangle = false;
|
||||
// //each triangle has 3 edges, there are 9 combinations. Each thread work with one edge vs another edge
|
||||
// for (PxU32 i = 0; i < nbPairsPerCM * 9; i += WARP_SIZE)
|
||||
// {
|
||||
// foundTriangle = false;
|
||||
// const PxU32 index = i + threadIdx.x;
|
||||
//
|
||||
// if (index < nbPairsPerCM * 9)
|
||||
// {
|
||||
//
|
||||
// const PxU32 offset = index / 9;
|
||||
//
|
||||
// //same triangle, we don't need to compare the same triangle
|
||||
// if (offset == testOffset)
|
||||
// continue;
|
||||
//
|
||||
// const PxU32 vStartIndex0 = index % 3;
|
||||
// const PxU32 vEndIndex0 = (vStartIndex0 + 1) % 3;
|
||||
//
|
||||
// const PxU32 vertId00 = sVertsIds[threadIdx.y][vStartIndex0];
|
||||
// const PxU32 vertId01 = sVertsIds[threadIdx.y][vEndIndex0];
|
||||
//
|
||||
// const PxU32 vOffset = index / 3;
|
||||
//
|
||||
// const PxU32 vStartIndex1 = (vEndIndex0 + vOffset) % 3;
|
||||
// const PxU32 vEndIndex1 = (vStartIndex1 + 1) % 3;
|
||||
//
|
||||
// const PxU32 neighbourOffset = pairStartIndex + offset;
|
||||
//
|
||||
// ConvexTriNormalAndIndex& nCvxni = cvxTriNI[neighbourOffset];
|
||||
// //get the number of contacts
|
||||
// const PxU32 nbGeneratedContacts = nCvxni.index >> 28;
|
||||
//
|
||||
// if (nbGeneratedContacts > 0)
|
||||
// {
|
||||
// const PxU32 vertId10 = cvxTriEdgeBuffer[neighbourOffset].triVertexIds[vStartIndex1];
|
||||
// const PxU32 vertId11 = cvxTriEdgeBuffer[neighbourOffset].triVertexIds[vEndIndex1];
|
||||
//
|
||||
// if ((vertId00 == vertId10 || vertId00 == vertId11) &&
|
||||
// (vertId01 == vertId10 || vertId01 == vertId11))
|
||||
// {
|
||||
// foundTriangle = true;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if (__any(foundTriangle))
|
||||
// {
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if (__any(foundTriangle))
|
||||
// {
|
||||
// //we don't need to generate contacts for this pair. However, we have already generated contacts in
|
||||
// //the convexTriangleContactGen kernel. Therefore, we just need to clear the number of contacts in
|
||||
// //the corresponding ConvexTriNormalAndIndex
|
||||
// cvxni.index = 0 << 28;
|
||||
// }
|
||||
//
|
||||
//}
|
||||
|
||||
__device__ bool findTriangle(const PxU32* PX_RESTRICT orderedTriangleIndices, const PxU32 targetIndexWithMask, PxU32 count)
|
||||
{
|
||||
PxU32 targetIndex = targetIndexWithMask | 0x80000000;
|
||||
|
||||
PxU32 index = binarySearch(orderedTriangleIndices, count, targetIndex);
|
||||
|
||||
return index < count && orderedTriangleIndices[index] == targetIndex;
|
||||
}
|
||||
|
||||
__device__ bool findFaceContactTriangle(const PxU32* PX_RESTRICT orderedTriangleIndices, const PxU32 targetIndexWithMask, PxU32 count)
|
||||
{
|
||||
PxU32 targetIndex = targetIndexWithMask | TriangleSearchFlags::eFACE_CONTACT;
|
||||
|
||||
PxU32 index = binarySearch(orderedTriangleIndices, count, targetIndex);
|
||||
|
||||
return index < count && orderedTriangleIndices[index] == targetIndex;
|
||||
}
|
||||
|
||||
__device__ bool findEdgeContactTriangle(const PxU32* PX_RESTRICT orderedTriangleIndices, const PxU32 targetIndexWithMask, PxU32 count)
|
||||
{
|
||||
PxU32 targetIndex = targetIndexWithMask | TriangleSearchFlags::eEDGE_CONTACT;
|
||||
|
||||
PxU32 index = binarySearch(orderedTriangleIndices, count, targetIndex);
|
||||
|
||||
return index < count && orderedTriangleIndices[index] == targetIndex;
|
||||
}
|
||||
|
||||
//Based on neighbour face checks
|
||||
__device__ void convexTrimeshPostProcessCore(
|
||||
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair, // per CM
|
||||
ConvexTriNormalAndIndex** PX_RESTRICT cvxTriNIPtr, // per cvx-tri
|
||||
ConvexTriIntermediateData** PX_RESTRICT cvxTriIntermDataPtr, // per cvx-tri
|
||||
const PxU32** PX_RESTRICT orderedCvxTriIntermDataPtr, // per cvx-tri
|
||||
PxU32** PX_RESTRICT cvxTriSecondPassPairsPtr, // per cvx-tri
|
||||
PxgShape* PX_RESTRICT gpuShapes,
|
||||
const uint4* PX_RESTRICT pairsGPU,
|
||||
PxU32* PX_RESTRICT nbSecondPassPairs,
|
||||
const PxU32 globalThreadIndex)
|
||||
{
|
||||
|
||||
PxU32* cvxTriSecondPassPairs = *cvxTriSecondPassPairsPtr;
|
||||
ConvexTriNormalAndIndex* cvxTriNI = *cvxTriNIPtr;
|
||||
|
||||
const PxU32 pairIdxPlusMask = cvxTriSecondPassPairs[globalThreadIndex];
|
||||
const PxU32 pairIdx = pairIdxPlusMask & 0x1fffffff;
|
||||
|
||||
uint4 curPair = pairsGPU[pairIdx];
|
||||
|
||||
PxU32 cmIdx = curPair.x;
|
||||
PxU32 triangleIdx = curPair.y;
|
||||
PxU32 testOffset = curPair.z;
|
||||
PxU32 shapeID = curPair.w;
|
||||
|
||||
ConvexMeshPair& pair = cvxTrimeshPair[cmIdx];
|
||||
const PxU32 nbPairsPerCM = pair.count;
|
||||
assert(nbPairsPerCM != 0);
|
||||
|
||||
const PxU32 pairStartIndex = pair.startIndex;
|
||||
const PxU32 orderedPairStartIndex = pair.roundedStartIndex;
|
||||
PxU32 convexTriPairOffset = pairStartIndex + testOffset;
|
||||
|
||||
ConvexTriNormalAndIndex& cvxni = cvxTriNI[convexTriPairOffset];
|
||||
//get the number of contacts
|
||||
const PxU32 nbContacts = ConvexTriNormalAndIndex::getNbContacts(cvxni.index);
|
||||
|
||||
if (nbContacts == 0)
|
||||
return;
|
||||
|
||||
PxgShape& trimeshShape = gpuShapes[shapeID];
|
||||
|
||||
assert(trimeshShape.type == PxGeometryType::eTRIANGLEMESH);
|
||||
|
||||
const PxU32* orderedTriIndices = (*orderedCvxTriIntermDataPtr) + orderedPairStartIndex;
|
||||
|
||||
|
||||
const PxU8 * trimeshGeomPtr = reinterpret_cast<const PxU8 *>(trimeshShape.hullOrMeshPtr);
|
||||
|
||||
const uint4 trimesh_nbVerts_nbTris_nbAdjVertsTotal = *reinterpret_cast<const uint4 *>(trimeshGeomPtr);
|
||||
trimeshGeomPtr += sizeof(uint4);
|
||||
|
||||
trimeshGeomPtr += sizeof(const Gu::BV32DataPacked)* trimesh_nbVerts_nbTris_nbAdjVertsTotal.w;
|
||||
|
||||
const PxU32 numVerts = trimesh_nbVerts_nbTris_nbAdjVertsTotal.x;
|
||||
const PxU32 numTris = trimesh_nbVerts_nbTris_nbAdjVertsTotal.y;
|
||||
|
||||
//const float4 * trimeshVerts = reinterpret_cast<const float4 *>(trimeshGeomPtr);
|
||||
trimeshGeomPtr += sizeof(float4)* numVerts;
|
||||
const uint4 * trimeshTriIndices = reinterpret_cast<const uint4 *>(trimeshGeomPtr);
|
||||
trimeshGeomPtr += sizeof(uint4)* numTris;
|
||||
|
||||
const uint4* PX_RESTRICT trimeshTriAdjacencies = reinterpret_cast<const uint4 *>(trimeshGeomPtr);
|
||||
trimeshGeomPtr += sizeof(uint4)* numTris;
|
||||
|
||||
trimeshGeomPtr += sizeof(PxU32) * numTris; //mGRB_faceRemap
|
||||
|
||||
const PxU32* accumulatedTriangleRefs = reinterpret_cast<const PxU32*>(trimeshGeomPtr);
|
||||
trimeshGeomPtr += sizeof(PxU32) * (numVerts + 1);
|
||||
|
||||
const PxU32* triangleReferences = reinterpret_cast<const PxU32*>(trimeshGeomPtr);
|
||||
|
||||
|
||||
//ConvexTriIntermediateData* cvxTriIntermData = *cvxTriIntermDataPtr;
|
||||
|
||||
//ConvexTriIntermediateData& cvxTriInterm = cvxTriIntermData[convexTriPairOffset];
|
||||
|
||||
uint4 adjTriangles = trimeshTriAdjacencies[triangleIdx];
|
||||
uint4 indices = trimeshTriIndices[triangleIdx];
|
||||
|
||||
const bool flipNormal = trimeshShape.scale.hasNegativeDeterminant();
|
||||
if(flipNormal)
|
||||
{
|
||||
PxSwap(adjTriangles.x, adjTriangles.z);
|
||||
PxSwap(indices.y, indices.z);
|
||||
}
|
||||
|
||||
PxU32 mask = pairIdxPlusMask & PxU32(ConvexTriIntermediateData::eMASK);
|
||||
|
||||
bool needsProcessing = false;
|
||||
|
||||
PxU32 triangleIndex = 0xFFFFFFFF;
|
||||
PxU32 vertexIdx = 0xFFFFFFFF;
|
||||
uint2 adjTriangles2;
|
||||
|
||||
if (mask == PxU32(ConvexTriIntermediateData::eV0))
|
||||
{
|
||||
vertexIdx = indices.x;
|
||||
adjTriangles2 = make_uint2(adjTriangles.x, adjTriangles.z);
|
||||
// printf("eV0\n");
|
||||
}
|
||||
else if (mask == PxU32(ConvexTriIntermediateData::eV1))
|
||||
{
|
||||
vertexIdx = indices.y;
|
||||
adjTriangles2 = make_uint2(adjTriangles.x, adjTriangles.y);
|
||||
// printf("eV1\n");
|
||||
}
|
||||
else if (mask == PxU32(ConvexTriIntermediateData::eV2))
|
||||
{
|
||||
vertexIdx = indices.z;
|
||||
adjTriangles2 = make_uint2(adjTriangles.y, adjTriangles.z);
|
||||
// printf("eV2\n");
|
||||
}
|
||||
else if (mask == PxU32(ConvexTriIntermediateData::eE01))
|
||||
{
|
||||
triangleIndex = adjTriangles.x;
|
||||
// printf("eE01\n");
|
||||
if(triangleIndex == 0xFFFFFFFF)
|
||||
needsProcessing = true;
|
||||
}
|
||||
else if (mask == PxU32(ConvexTriIntermediateData::eE12))
|
||||
{
|
||||
triangleIndex = adjTriangles.y;
|
||||
// printf("eE12\n");
|
||||
if(triangleIndex == 0xFFFFFFFF)
|
||||
needsProcessing = true;
|
||||
}
|
||||
else if (mask == PxU32(ConvexTriIntermediateData::eE02))
|
||||
{
|
||||
triangleIndex = adjTriangles.z;
|
||||
// printf("eE02\n");
|
||||
if(triangleIndex == 0xFFFFFFFF)
|
||||
needsProcessing = true;
|
||||
}
|
||||
|
||||
//printf("triangleIdx: %d; mask: %x; triangleIndex: %d; vertexIdx: %d\n", triangleIdx, mask, triangleIndex, vertexIdx);
|
||||
|
||||
if (triangleIndex != 0xFFFFFFFF)
|
||||
{
|
||||
// printf("CHECKPOINT 0\n");
|
||||
needsProcessing = isBoundaryEdge(triangleIndex) || !findFaceContactTriangle(orderedTriIndices, triangleIndex, nbPairsPerCM);
|
||||
//if (!needsProcessing)
|
||||
// printf("triangleIdx: %d; triangleIndex: %d\n", triangleIdx, triangleIndex);
|
||||
}
|
||||
else if (vertexIdx != 0xFFFFFFFF)
|
||||
{
|
||||
// printf("CHECKPOINT 1\n");
|
||||
//First, let's see if either of the edges sharing this vertex are active. If they are, we keep the contacts, otherwise
|
||||
//we look for a shared triangle...
|
||||
|
||||
needsProcessing = true;
|
||||
|
||||
if(!(isBoundaryEdge(adjTriangles2.x) && isBoundaryEdge(adjTriangles2.y)))
|
||||
{
|
||||
PxU32 startIdx = accumulatedTriangleRefs[vertexIdx];
|
||||
PxU32 endIdx = accumulatedTriangleRefs[vertexIdx + 1];
|
||||
|
||||
for (PxU32 idx = startIdx; idx < endIdx; ++idx)
|
||||
{
|
||||
PxU32 triIdx = triangleReferences[idx];
|
||||
|
||||
if (findFaceContactTriangle(orderedTriIndices, triIdx, nbPairsPerCM) ||
|
||||
findEdgeContactTriangle(orderedTriIndices, triIdx, nbPairsPerCM))
|
||||
{
|
||||
//printf("triangleIdx: %d; vertexIdx: %d\n", triangleIdx, vertexIdx);
|
||||
needsProcessing = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// printf("triangleIndex %d\n", triangleIndex);
|
||||
// printf("vertexIdx %d\n", vertexIdx);
|
||||
// printf("needsProcessing %d\n", needsProcessing);
|
||||
|
||||
if(!needsProcessing)
|
||||
{
|
||||
//we don't need to generate contacts for this pair. However, we have already generated contacts in
|
||||
//the convexTriangleContactGen kernel. Therefore, we just need to clear the number of contacts in
|
||||
//the corresponding ConvexTriNormalAndIndex
|
||||
cvxni.index = cvxni.index & 0x0ffffff;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__
|
||||
//__launch_bounds__(NP_TRIMESH_WARPS_PER_BLOCK * WARP_SIZE, 32 / NP_TRIMESH_WARPS_PER_BLOCK)
|
||||
void convexTrimeshPostProcess(
|
||||
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair, // per CM
|
||||
ConvexTriNormalAndIndex** PX_RESTRICT cvxTriNIPtr, // per cvx-tri
|
||||
ConvexTriIntermediateData** PX_RESTRICT cvxTriIntermPtr, // per cvx-tri
|
||||
const PxU32** PX_RESTRICT orderedCvxTriIntermPtr, // per cvx-tri
|
||||
PxU32** PX_RESTRICT cvxTriSecondPassPairsPtr, // per cvx-tri
|
||||
PxU8* PX_RESTRICT stackPtr,
|
||||
PxU32* PX_RESTRICT nbSecondPassPairs,
|
||||
PxgShape* PX_RESTRICT shapes)
|
||||
{
|
||||
const PxU32 nbTpProcess = *nbSecondPassPairs;
|
||||
|
||||
__shared__ uint4* sPairsGPU;
|
||||
|
||||
if (threadIdx.x == 0 && threadIdx.y == 0)
|
||||
{
|
||||
sPairsGPU = reinterpret_cast<uint4 *>(stackPtr);
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
|
||||
|
||||
for (PxU32 globalThreadIdx = blockIdx.x * blockDim.x + threadIdx.x; globalThreadIdx < nbTpProcess; globalThreadIdx += gridDim.x * blockDim.x)
|
||||
{
|
||||
convexTrimeshPostProcessCore(
|
||||
cvxTrimeshPair,
|
||||
cvxTriNIPtr,
|
||||
cvxTriIntermPtr,
|
||||
orderedCvxTriIntermPtr,
|
||||
cvxTriSecondPassPairsPtr,
|
||||
shapes,
|
||||
sPairsGPU,
|
||||
nbSecondPassPairs,
|
||||
globalThreadIdx);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
//Based on neighbour face checks
|
||||
__device__ void convexHeightfieldPostProcessCore(
|
||||
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair, // per CM
|
||||
ConvexTriNormalAndIndex** PX_RESTRICT cvxTriNIPtr, // per cvx-tri
|
||||
ConvexTriIntermediateData** PX_RESTRICT cvxTriIntermDataPtr, // per cvx-tri
|
||||
const PxU32** PX_RESTRICT orderedCvxTriIntermDataPtr, // per cvx-tri
|
||||
PxU32** PX_RESTRICT cvxTriSecondPassPairsPtr, // per cvx-tri
|
||||
PxgShape* PX_RESTRICT gpuShapes,
|
||||
const uint4* PX_RESTRICT pairsGPU,
|
||||
PxU32* PX_RESTRICT nbSecondPassPairs,
|
||||
const PxU32 globalThreadIndex)
|
||||
{
|
||||
PxU32* cvxTriSecondPassPairs = *cvxTriSecondPassPairsPtr;
|
||||
ConvexTriNormalAndIndex* cvxTriNI = *cvxTriNIPtr;
|
||||
|
||||
const PxU32 pairIdxPlusMask = cvxTriSecondPassPairs[globalThreadIndex];
|
||||
const PxU32 pairIdx = pairIdxPlusMask & 0x1fffffff;
|
||||
|
||||
uint4 curPair = pairsGPU[pairIdx];
|
||||
|
||||
PxU32 cmIdx = curPair.x;
|
||||
PxU32 triangleIdx = curPair.y;
|
||||
PxU32 testOffset = curPair.z;
|
||||
PxU32 shapeID = curPair.w;
|
||||
|
||||
ConvexMeshPair& pair = cvxTrimeshPair[cmIdx];
|
||||
const PxU32 nbPairsPerCM = pair.count;
|
||||
const PxU32 pairStartIndex = pair.startIndex;
|
||||
const PxU32 orderedPairStartIndex = pair.roundedStartIndex;
|
||||
PxU32 convexTriPairOffset = pairStartIndex + testOffset;
|
||||
|
||||
ConvexTriNormalAndIndex& cvxni = cvxTriNI[convexTriPairOffset];
|
||||
//get the number of contacts
|
||||
const PxU32 nbContacts = ConvexTriNormalAndIndex::getNbContacts(cvxni.index);
|
||||
|
||||
if (nbContacts == 0)
|
||||
return;
|
||||
|
||||
PxgShape& heightFieldShape = gpuShapes[shapeID];
|
||||
|
||||
PxVec3 scale = heightFieldShape.scale.scale;
|
||||
|
||||
const PxReal rowScale = scale.x;
|
||||
const PxReal columnScale = scale.z;
|
||||
PxVec3 handedness(1.0f); // Vector to invert normal coordinates according to the heightfield scales
|
||||
bool wrongHanded = false;
|
||||
if (columnScale < 0.f)
|
||||
{
|
||||
wrongHanded = !wrongHanded;
|
||||
handedness.z = -1.0f;
|
||||
}
|
||||
if (rowScale < 0.f)
|
||||
{
|
||||
wrongHanded = !wrongHanded;
|
||||
handedness.x = -1.0f;
|
||||
}
|
||||
|
||||
|
||||
const PxU32* orderedTriIndices = (*orderedCvxTriIntermDataPtr) + orderedPairStartIndex;
|
||||
|
||||
|
||||
PxU32* heightfieldData = reinterpret_cast<PxU32*>(heightFieldShape.hullOrMeshPtr);
|
||||
const PxU32 nbRows = heightfieldData[0];
|
||||
const PxU32 nbCols = heightfieldData[1];
|
||||
PxHeightFieldSample* samples = reinterpret_cast<PxHeightFieldSample*>(&heightfieldData[2]);
|
||||
|
||||
|
||||
//ConvexTriIntermediateData* cvxTriIntermData = *cvxTriIntermDataPtr;
|
||||
|
||||
//ConvexTriIntermediateData& cvxTriInterm = cvxTriIntermData[convexTriPairOffset];
|
||||
|
||||
PxU32 tVertexIndices[3];
|
||||
PxU32 adjacencyIndices[3];
|
||||
getTriangleVertexIndices(triangleIdx, tVertexIndices[0], tVertexIndices[1 + wrongHanded], tVertexIndices[2 - wrongHanded], nbCols, samples);
|
||||
|
||||
getTriangleAdjacencyIndices(triangleIdx, adjacencyIndices[wrongHanded ? 2 : 0], adjacencyIndices[1], adjacencyIndices[wrongHanded ? 0 : 2], nbRows, nbCols, samples);
|
||||
|
||||
|
||||
bool needsProcessing = false;
|
||||
|
||||
PxU32 triangleIndex = 0xFFFFFFFF;
|
||||
PxU32 vertexIdx = 0xFFFFFFFF;
|
||||
|
||||
PxU32 mask = pairIdxPlusMask & PxU32(ConvexTriIntermediateData::eMASK);
|
||||
|
||||
if (mask == PxU32(ConvexTriIntermediateData::eV0))
|
||||
{
|
||||
vertexIdx = tVertexIndices[0];
|
||||
}
|
||||
else if (mask == PxU32(ConvexTriIntermediateData::eV1))
|
||||
{
|
||||
vertexIdx = tVertexIndices[1];
|
||||
}
|
||||
else if (mask == PxU32(ConvexTriIntermediateData::eV2))
|
||||
{
|
||||
vertexIdx = tVertexIndices[2];
|
||||
}
|
||||
else if (mask == PxU32(ConvexTriIntermediateData::eE01))
|
||||
{
|
||||
triangleIndex = PxU32(adjacencyIndices[0]);
|
||||
}
|
||||
else if (mask == PxU32(ConvexTriIntermediateData::eE12))
|
||||
{
|
||||
triangleIndex = adjacencyIndices[1];
|
||||
}
|
||||
else if (mask == PxU32(ConvexTriIntermediateData::eE02))
|
||||
{
|
||||
triangleIndex = adjacencyIndices[2];
|
||||
}
|
||||
|
||||
if (triangleIndex != 0xFFFFFFFF)
|
||||
{
|
||||
|
||||
needsProcessing = isActiveEdge(triangleIndex) && !findTriangle(orderedTriIndices, triangleIndex, nbPairsPerCM);
|
||||
}
|
||||
else if (vertexIdx != 0xFFFFFFFF)
|
||||
{
|
||||
PxU32 cellIdx = triangleIdx / 2;
|
||||
|
||||
PxU32 column = cellIdx % nbCols;
|
||||
PxU32 row = cellIdx / nbCols;
|
||||
PxU32 startRow = PxMax(row, 1u) - 1;
|
||||
PxU32 endRow = PxMin(row, nbRows - 2) + 1;
|
||||
PxU32 startCol = PxMax(column, 1u) - 1;
|
||||
PxU32 endCol = PxMin(column, nbCols - 2) + 1;
|
||||
|
||||
needsProcessing = true;
|
||||
|
||||
for (PxU32 r = startRow; r <= endRow && needsProcessing; r++)
|
||||
{
|
||||
for (PxU32 c = startCol; c <= endCol; ++c)
|
||||
{
|
||||
PxU32 cellIdx = r * nbCols + c;
|
||||
|
||||
PxU32 triIdx = cellIdx * 2;
|
||||
getTriangleVertexIndices(triIdx, tVertexIndices[0], tVertexIndices[1 + wrongHanded], tVertexIndices[2 - wrongHanded], nbCols, samples);
|
||||
|
||||
if (tVertexIndices[0] == vertexIdx || tVertexIndices[1] == vertexIdx || tVertexIndices[2] == vertexIdx)
|
||||
{
|
||||
if (findTriangle(orderedTriIndices, triIdx, nbPairsPerCM))
|
||||
{
|
||||
needsProcessing = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
triIdx = triIdx + 1;
|
||||
getTriangleVertexIndices(triIdx, tVertexIndices[0], tVertexIndices[1 + wrongHanded], tVertexIndices[2 - wrongHanded], nbCols, samples);
|
||||
|
||||
if (tVertexIndices[0] == vertexIdx || tVertexIndices[1] == vertexIdx || tVertexIndices[2] == vertexIdx)
|
||||
{
|
||||
if (findTriangle(orderedTriIndices, triIdx, nbPairsPerCM))
|
||||
{
|
||||
needsProcessing = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (!needsProcessing)
|
||||
{
|
||||
//we don't need to generate contacts for this pair. However, we have already generated contacts in
|
||||
//the convexTriangleContactGen kernel. Therefore, we just need to clear the number of contacts in
|
||||
//the corresponding ConvexTriNormalAndIndex
|
||||
cvxni.index = cvxni.index & 0x0ffffff;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
extern "C" __global__
|
||||
//__launch_bounds__(NP_TRIMESH_WARPS_PER_BLOCK * WARP_SIZE, 32 / NP_TRIMESH_WARPS_PER_BLOCK)
|
||||
void convexHeightfieldPostProcess(
|
||||
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair, // per CM
|
||||
ConvexTriNormalAndIndex** PX_RESTRICT cvxTriNIPtr, // per cvx-tri
|
||||
ConvexTriIntermediateData** PX_RESTRICT cvxTriIntermPtr, // per cvx-tri
|
||||
const PxU32** PX_RESTRICT orderedCvxTriIntermPtr, // per cvx-tri
|
||||
PxU32** PX_RESTRICT cvxTriSecondPassPairsPtr, // per cvx-tri
|
||||
PxU8* PX_RESTRICT stackPtr,
|
||||
PxU32* PX_RESTRICT nbSecondPassPairs,
|
||||
PxgShape* PX_RESTRICT shapes)
|
||||
{
|
||||
__shared__ uint4* sPairsGPU;
|
||||
|
||||
if (threadIdx.x == 0 && threadIdx.y == 0)
|
||||
{
|
||||
sPairsGPU = reinterpret_cast<uint4 *>(stackPtr);
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
const PxU32 nbTpProcess = *nbSecondPassPairs;
|
||||
|
||||
for (PxU32 globalThreadIdx = blockIdx.x * blockDim.x + threadIdx.x; globalThreadIdx < nbTpProcess; globalThreadIdx += gridDim.x * blockDim.x)
|
||||
{
|
||||
convexHeightfieldPostProcessCore(
|
||||
cvxTrimeshPair,
|
||||
cvxTriNIPtr,
|
||||
cvxTriIntermPtr,
|
||||
orderedCvxTriIntermPtr,
|
||||
cvxTriSecondPassPairsPtr,
|
||||
shapes,
|
||||
sPairsGPU,
|
||||
nbSecondPassPairs,
|
||||
globalThreadIdx);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
extern "C" __global__
|
||||
//__launch_bounds__(NP_TRIMESH_WARPS_PER_BLOCK * WARP_SIZE, 32 / NP_TRIMESH_WARPS_PER_BLOCK)
|
||||
void sortTriangleIndices(
|
||||
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair, // per CM
|
||||
PxU32** PX_RESTRICT orderedCvxTriIntermPtr, // per cvx-tri
|
||||
const PxU32 nbPairs
|
||||
)
|
||||
{
|
||||
const PxU32 globalWarpIndex = threadIdx.y + blockIdx.x * blockDim.y;
|
||||
|
||||
if (globalWarpIndex >= nbPairs)
|
||||
return;
|
||||
|
||||
PxU32 nbTriangles = cvxTrimeshPair[globalWarpIndex].count;
|
||||
PxU32 startIndex = cvxTrimeshPair[globalWarpIndex].roundedStartIndex;
|
||||
|
||||
//Terminate if we had 0 triangles or we cached them from last frame
|
||||
if (nbTriangles == 0 || nbTriangles == 0xFFFFFFFF)
|
||||
return;
|
||||
|
||||
assert((startIndex & 3) == 0);
|
||||
|
||||
const PxU32 nbUint4s = (nbTriangles + 3) / 4;
|
||||
|
||||
const PxU32 nbWarps = NP_TRIMESH_WARPS_PER_BLOCK;
|
||||
|
||||
radixSortSingleWarpKeysOnly<nbWarps>(reinterpret_cast<uint4*>(*orderedCvxTriIntermPtr + startIndex),
|
||||
nbTriangles, nbUint4s, reinterpret_cast<uint4*>(*orderedCvxTriIntermPtr + startIndex) + nbUint4s, 8);
|
||||
}
|
||||
1884
engine/third_party/physx/source/gpunarrowphase/src/CUDA/convexTriangle.cuh
vendored
Normal file
1884
engine/third_party/physx/source/gpunarrowphase/src/CUDA/convexTriangle.cuh
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1063
engine/third_party/physx/source/gpunarrowphase/src/CUDA/cudaBox.cu
vendored
Normal file
1063
engine/third_party/physx/source/gpunarrowphase/src/CUDA/cudaBox.cu
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3232
engine/third_party/physx/source/gpunarrowphase/src/CUDA/cudaGJKEPA.cu
vendored
Normal file
3232
engine/third_party/physx/source/gpunarrowphase/src/CUDA/cudaGJKEPA.cu
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1714
engine/third_party/physx/source/gpunarrowphase/src/CUDA/cudaParticleSystem.cu
vendored
Normal file
1714
engine/third_party/physx/source/gpunarrowphase/src/CUDA/cudaParticleSystem.cu
vendored
Normal file
File diff suppressed because it is too large
Load Diff
219
engine/third_party/physx/source/gpunarrowphase/src/CUDA/cudaSphere.cu
vendored
Normal file
219
engine/third_party/physx/source/gpunarrowphase/src/CUDA/cudaSphere.cu
vendored
Normal file
@@ -0,0 +1,219 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxMat34.h"
|
||||
#include "foundation/PxTransform.h"
|
||||
#include "foundation/PxAssert.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#include "cuda.h"
|
||||
#include "cuda_runtime.h"
|
||||
#include "convexFormat.h"
|
||||
#include "cudaNpCommon.h"
|
||||
|
||||
#include "PxsTransformCache.h"
|
||||
#include "PxsContactManagerState.h"
|
||||
#include "PxgContactManager.h"
|
||||
#include "PxgConvexConvexShape.h"
|
||||
#include "PxgPersistentContactManifold.h"
|
||||
|
||||
#include "PxgNpKernelIndices.h"
|
||||
|
||||
#include "PxsMaterialCore.h"
|
||||
|
||||
#include "PxContact.h"
|
||||
#include "geometry/PxGeometry.h"
|
||||
|
||||
#include "SparseRemove.cuh"
|
||||
|
||||
#include "sphereCollision.cuh"
|
||||
#include "contactPatchUtils.cuh"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
extern "C" __host__ void initNarrowphaseKernels9() {}
|
||||
|
||||
extern "C" __global__ void sphereNphase_Kernel(
|
||||
PxU32 numTests,
|
||||
const PxReal toleranceLength,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
PxsContactManagerOutput* PX_RESTRICT cmOutputs,
|
||||
PxgShape* PX_RESTRICT shapes,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxReal* contactDistance,
|
||||
const PxsMaterialData* PX_RESTRICT materials,
|
||||
PxU8* PX_RESTRICT contactStream,
|
||||
PxU8* PX_RESTRICT patchStream,
|
||||
PxgPatchAndContactCounters* PX_RESTRICT patchAndContactCounters,
|
||||
PxU32* PX_RESTRICT touchChangeFlags,
|
||||
PxU32* PX_RESTRICT patchChangeFlags,
|
||||
PxU8* PX_RESTRICT startContactPatches,
|
||||
PxU8* PX_RESTRICT startContactPoints,
|
||||
PxU8* PX_RESTRICT startContactForces,
|
||||
PxU32 patchBytesLimit,
|
||||
PxU32 contactBytesLimit,
|
||||
PxU32 forceBytesLimit)
|
||||
{
|
||||
PxU32 globalThreadIndex = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (globalThreadIndex >= numTests)
|
||||
return;
|
||||
|
||||
PxgContactManagerInput contactInput = cmInputs[globalThreadIndex];
|
||||
|
||||
PxU32 transformCacheRef0 = contactInput.transformCacheRef0;
|
||||
PxU32 transformCacheRef1 = contactInput.transformCacheRef1;
|
||||
|
||||
PxsCachedTransform transformCache0 = transformCache[transformCacheRef0];
|
||||
PxsCachedTransform transformCache1 = transformCache[transformCacheRef1];
|
||||
|
||||
const PxReal cDistance = contactDistance[transformCacheRef0] + contactDistance[transformCacheRef1];
|
||||
|
||||
PxgShape& shape0 = shapes[contactInput.shapeRef0];
|
||||
PxgShape& shape1 = shapes[contactInput.shapeRef1];
|
||||
|
||||
PxGeometryType::Enum type0 = PxGeometryType::Enum(shape0.type);
|
||||
PxGeometryType::Enum type1 = PxGeometryType::Enum(shape1.type);
|
||||
|
||||
PxVec3 scale0 = shape0.scale.scale;
|
||||
PxVec3 scale1 = shape1.scale.scale;
|
||||
|
||||
const bool flip = (type1<type0);
|
||||
|
||||
//shape0 should be always sphere
|
||||
if (flip)
|
||||
{
|
||||
PxSwap(type0, type1);
|
||||
PxSwap(scale0, scale1);
|
||||
PxSwap(transformCache0, transformCache1);
|
||||
//printf("type0 %i type1 %i\n", type0, type1);
|
||||
}
|
||||
|
||||
|
||||
PxTransform& transform0 = transformCache0.transform;
|
||||
PxTransform& transform1 = transformCache1.transform;
|
||||
|
||||
const PxReal sphereRadius = scale0.x;
|
||||
|
||||
float4 pointPen[2];
|
||||
PxVec3 normal;
|
||||
PxU32 nbContacts = 0;
|
||||
|
||||
if (type0 == PxGeometryType::eSPHERE)
|
||||
{
|
||||
|
||||
switch (type1)
|
||||
{
|
||||
case PxGeometryType::eSPHERE:
|
||||
{
|
||||
//printf("sphere/sphere \n");
|
||||
const PxReal sphereRadius1 = scale1.x;
|
||||
nbContacts = spheresphere(transform0, transform1, sphereRadius, sphereRadius1, cDistance, pointPen[0], normal);
|
||||
break;
|
||||
}
|
||||
case PxGeometryType::ePLANE:
|
||||
{
|
||||
//printf("sphere/plane \n");
|
||||
nbContacts = sphereplane(transform0, transform1, sphereRadius, cDistance, pointPen[0], normal);
|
||||
break;
|
||||
}
|
||||
case PxGeometryType::eCAPSULE:
|
||||
{
|
||||
//printf("sphere/capsule \n");
|
||||
const PxReal capsuleRadius = scale1.y;
|
||||
const PxReal halfHeight = scale1.x;
|
||||
nbContacts = spherecapsule(transform0, transform1, sphereRadius, capsuleRadius, halfHeight, cDistance, pointPen[0], normal);
|
||||
break;
|
||||
}
|
||||
case PxGeometryType::eBOX:
|
||||
{
|
||||
//printf("sphere/box \n");
|
||||
const PxVec3 boxHalfExtents = scale1;
|
||||
nbContacts = spherebox(transform0, transform1, sphereRadius, boxHalfExtents, cDistance, pointPen[0], normal);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
};
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(type1 == PxGeometryType::eCAPSULE);
|
||||
|
||||
const PxReal capsuleRadius1 = scale1.y;
|
||||
const PxReal capsuleHalfHeight1 = scale1.x;
|
||||
|
||||
switch (type0)
|
||||
{
|
||||
case PxGeometryType::ePLANE:
|
||||
{
|
||||
//printf("sphere/plane \n");
|
||||
nbContacts = planeCapsule(transform0, transform1, capsuleRadius1, capsuleHalfHeight1, cDistance, pointPen, normal);
|
||||
break;
|
||||
}
|
||||
case PxGeometryType::eCAPSULE:
|
||||
{
|
||||
////printf("sphere/capsule \n");
|
||||
const PxReal capsuleRadius0 = scale0.y;
|
||||
const PxReal halfHeight0 = scale0.x;
|
||||
nbContacts = capsuleCapsule(transform0, transform1, capsuleRadius0, halfHeight0,
|
||||
capsuleRadius1, capsuleHalfHeight1, cDistance, pointPen, normal);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
if (nbContacts && flip)
|
||||
normal = -normal;
|
||||
|
||||
PxU32 contactByteOffset = setContactPointAndForcePointers(cmOutputs, patchAndContactCounters,
|
||||
startContactPoints, startContactForces, contactBytesLimit, forceBytesLimit, globalThreadIndex, nbContacts);
|
||||
|
||||
if (contactByteOffset != 0xFFFFFFFF)
|
||||
{
|
||||
float4* baseContactStream = (float4*)(contactStream + contactByteOffset);
|
||||
for (PxU32 i = 0; i < nbContacts; ++i)
|
||||
{
|
||||
float4& outPointPen = baseContactStream[i];
|
||||
outPointPen = pointPen[i];
|
||||
}
|
||||
}
|
||||
|
||||
PxU32 patchIndex = registerContactPatch(cmOutputs, patchAndContactCounters, touchChangeFlags, patchChangeFlags, startContactPatches, patchBytesLimit, globalThreadIndex, nbContacts);
|
||||
|
||||
insertIntoPatchStream(materials, patchStream, shape0, shape1, patchIndex, normal, nbContacts);
|
||||
}
|
||||
|
||||
|
||||
|
||||
396
engine/third_party/physx/source/gpunarrowphase/src/CUDA/dataReadWriteHelper.cuh
vendored
Normal file
396
engine/third_party/physx/source/gpunarrowphase/src/CUDA/dataReadWriteHelper.cuh
vendored
Normal file
@@ -0,0 +1,396 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __DATA_RW_HELPER_CUH__
|
||||
#define __DATA_RW_HELPER_CUH__
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxBounds3.h"
|
||||
#include "PxgContactManager.h"
|
||||
#include "AlignedTransform.h"
|
||||
#include "PxsTransformCache.h"
|
||||
#include "PxgConvexConvexShape.h"
|
||||
#include "convexNpCommon.h"
|
||||
|
||||
#include "PxgCommonDefines.h"
|
||||
#include "geometry/PxGeometry.h"
|
||||
#include "utils.cuh"
|
||||
#include "PxgSolverCoreDesc.h"
|
||||
#include "PxgArticulationCoreDesc.h"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
struct PxgVelocityPackPGS
|
||||
{
|
||||
PxVec3 linVel;
|
||||
PxVec3 angVel;
|
||||
};
|
||||
|
||||
struct PxgVelocityPackTGS
|
||||
{
|
||||
PxVec3 linVel;
|
||||
PxVec3 angVel;
|
||||
PxVec3 linDelta;
|
||||
PxVec3 angDelta;
|
||||
};
|
||||
|
||||
union PxgVelocityPack
|
||||
{
|
||||
PxgVelocityPackTGS tgs;
|
||||
PxgVelocityPackPGS pgs;
|
||||
|
||||
PX_FORCE_INLINE __device__ PxgVelocityPack() {}
|
||||
};
|
||||
|
||||
struct PxgVelocityReader
|
||||
{
|
||||
//Shared
|
||||
const PxU32* const solverBodyIndices;
|
||||
|
||||
//Articulation
|
||||
const float4* const PX_RESTRICT artiLinkVelocities;
|
||||
const PxU32 maxLinks;
|
||||
const PxU32 nbArticulations;
|
||||
const PxU32 artiOffset;
|
||||
|
||||
//Rigid
|
||||
const PxU32 numSolverBodies;
|
||||
const float4* const PX_RESTRICT solverBodyDeltaVel;
|
||||
const float4* const PX_RESTRICT initialVel;
|
||||
|
||||
template <typename IterativeData>
|
||||
PX_FORCE_INLINE __device__ PxgVelocityReader(const PxgPrePrepDesc* preDesc, const PxgSolverCoreDesc* solverCoreDesc, const PxgArticulationCoreDesc* artiCoreDesc,
|
||||
const PxgSolverSharedDesc<IterativeData>* sharedDesc, PxU32 numSolverBodies) :
|
||||
solverBodyIndices(preDesc->solverBodyIndices),
|
||||
artiLinkVelocities(solverCoreDesc->outArtiVelocity),
|
||||
maxLinks(artiCoreDesc->mMaxLinksPerArticulation),
|
||||
nbArticulations(artiCoreDesc->nbArticulations),
|
||||
artiOffset(maxLinks * nbArticulations),
|
||||
solverBodyDeltaVel(sharedDesc->iterativeData.solverBodyVelPool + solverCoreDesc->accumulatedBodyDeltaVOffset),
|
||||
initialVel(solverCoreDesc->outSolverVelocity),
|
||||
numSolverBodies(numSolverBodies)
|
||||
{
|
||||
}
|
||||
|
||||
template<typename Pack>
|
||||
PX_FORCE_INLINE __device__ PxU32 readVelocitiesPGS(const PxNodeIndex rigidId, Pack& result)
|
||||
{
|
||||
const PxU32 solverBodyIdx = rigidId.isStaticBody() ? 0 : solverBodyIndices[rigidId.index()];
|
||||
|
||||
if (rigidId.isArticulation())
|
||||
{
|
||||
const PxU32 index = solverBodyIdx * maxLinks;
|
||||
const float4* vels = &artiLinkVelocities[index];
|
||||
const PxU32 linkID = rigidId.articulationLinkId();
|
||||
|
||||
result.linVel = PxLoad3(vels[linkID]);
|
||||
result.angVel = PxLoad3(vels[linkID + artiOffset]);
|
||||
}
|
||||
else
|
||||
{
|
||||
result.linVel = PxLoad3(initialVel[solverBodyIdx]) + PxLoad3(solverBodyDeltaVel[solverBodyIdx]);
|
||||
result.angVel = PxLoad3(initialVel[solverBodyIdx + numSolverBodies]) + PxLoad3(solverBodyDeltaVel[solverBodyIdx + numSolverBodies]);
|
||||
}
|
||||
return solverBodyIdx;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxU32 readVelocitiesTGS(const PxNodeIndex rigidId, PxgVelocityPackTGS& result)
|
||||
{
|
||||
const PxU32 solverBodyIdx = rigidId.isStaticBody() ? 0 : solverBodyIndices[rigidId.index()];
|
||||
|
||||
if (rigidId.isArticulation())
|
||||
{
|
||||
const PxU32 index = solverBodyIdx * maxLinks;
|
||||
const float4* vels = &artiLinkVelocities[index];
|
||||
const PxU32 linkID = rigidId.articulationLinkId();
|
||||
|
||||
float4 linearVelocity = vels[linkID];
|
||||
float4 angularVelocity = vels[linkID + artiOffset];
|
||||
float4 linDelta = vels[linkID + artiOffset + artiOffset];
|
||||
float4 angDelta = vels[linkID + artiOffset + artiOffset + artiOffset];
|
||||
|
||||
result.linVel = PxVec3(linearVelocity.x, linearVelocity.y, linearVelocity.z);
|
||||
result.angVel = PxVec3(angularVelocity.x, angularVelocity.y, angularVelocity.z);
|
||||
result.linDelta = PxVec3(linDelta.x, linDelta.y, linDelta.z);
|
||||
result.angDelta = PxVec3(angDelta.x, angDelta.y, angDelta.z);
|
||||
}
|
||||
else
|
||||
{
|
||||
float4 vel0 = solverBodyDeltaVel[solverBodyIdx];
|
||||
float4 vel1 = solverBodyDeltaVel[solverBodyIdx + numSolverBodies];
|
||||
float4 vel2 = solverBodyDeltaVel[solverBodyIdx + numSolverBodies + numSolverBodies];
|
||||
|
||||
result.linVel = PxVec3(vel0.x, vel0.y, vel0.z);
|
||||
result.angVel = PxVec3(vel0.w, vel1.x, vel1.y);
|
||||
result.linDelta = PxVec3(vel1.z, vel1.w, vel2.x);
|
||||
result.angDelta = PxVec3(vel2.y, vel2.z, vel2.w);
|
||||
}
|
||||
return solverBodyIdx;
|
||||
}
|
||||
};
|
||||
|
||||
__device__ inline static
|
||||
void PxgContactManagerInput_ReadWarp(PxgContactManagerInput & res, const PxgContactManagerInput * ptr, PxU32 offset)
|
||||
{
|
||||
const PxU32 laneIdx = threadIdx.x & (WARP_SIZE - 1);
|
||||
|
||||
PxU32 input;
|
||||
if (laneIdx < 4)
|
||||
input = reinterpret_cast<const PxU32 *>(ptr + offset)[laneIdx];
|
||||
|
||||
res.shapeRef0 = __shfl_sync(FULL_MASK, (input), 0);
|
||||
res.shapeRef1 = __shfl_sync(FULL_MASK, (input), 1);
|
||||
res.transformCacheRef0 = __shfl_sync(FULL_MASK, (input), 2);
|
||||
res.transformCacheRef1 = __shfl_sync(FULL_MASK, (input), 3);
|
||||
}
|
||||
|
||||
__device__ inline static
|
||||
void PxAlignedTransform_ReadWarp(PxAlignedTransform& res, const PxAlignedTransform* ptr)
|
||||
{
|
||||
const PxU32 laneIdx = threadIdx.x & (WARP_SIZE - 1);
|
||||
|
||||
PxReal input;
|
||||
if (laneIdx < 8)
|
||||
input = reinterpret_cast<const PxReal *>(ptr)[laneIdx];
|
||||
|
||||
res.q.q.x = __shfl_sync(FULL_MASK, input, 0);
|
||||
res.q.q.y = __shfl_sync(FULL_MASK, input, 1);
|
||||
res.q.q.z = __shfl_sync(FULL_MASK, input, 2);
|
||||
res.q.q.w = __shfl_sync(FULL_MASK, input, 3);
|
||||
res.p.x = __shfl_sync(FULL_MASK, input, 4);
|
||||
res.p.y = __shfl_sync(FULL_MASK, input, 5);
|
||||
res.p.z = __shfl_sync(FULL_MASK, input, 6);
|
||||
res.p.w = __shfl_sync(FULL_MASK, input, 7);
|
||||
}
|
||||
|
||||
__device__ inline static
|
||||
void PxsCachedTransform_ReadWarp(PxsCachedTransform& res, const PxsCachedTransform* ptr)
|
||||
{
|
||||
const PxU32 laneIdx = threadIdx.x & (WARP_SIZE - 1);
|
||||
|
||||
PxReal input;
|
||||
if (laneIdx < sizeof(PxsCachedTransform)/sizeof(PxReal))
|
||||
input = reinterpret_cast<const PxReal *>(ptr)[laneIdx];
|
||||
|
||||
res.transform.q.x = __shfl_sync(FULL_MASK, input, 0);
|
||||
res.transform.q.y = __shfl_sync(FULL_MASK, input, 1);
|
||||
res.transform.q.z = __shfl_sync(FULL_MASK, input, 2);
|
||||
res.transform.q.w = __shfl_sync(FULL_MASK, input, 3);
|
||||
res.transform.p.x = __shfl_sync(FULL_MASK, input, 4);
|
||||
res.transform.p.y = __shfl_sync(FULL_MASK, input, 5);
|
||||
res.transform.p.z = __shfl_sync(FULL_MASK, input, 6);
|
||||
res.flags = __shfl_sync(FULL_MASK, __float_as_int(input), 7);
|
||||
}
|
||||
|
||||
__device__ inline static
|
||||
void PxBounds3_ReadWarp(PxBounds3& res, const PxBounds3* ptr)
|
||||
{
|
||||
const PxU32 laneIdx = threadIdx.x & (WARP_SIZE - 1);
|
||||
|
||||
PxReal input;
|
||||
if (laneIdx < sizeof(PxBounds3) /sizeof(PxReal))
|
||||
input = reinterpret_cast<const PxReal *>(ptr)[laneIdx];
|
||||
|
||||
res.minimum.x = __shfl_sync(FULL_MASK, input, 0);
|
||||
res.minimum.y = __shfl_sync(FULL_MASK, input, 1);
|
||||
res.minimum.z = __shfl_sync(FULL_MASK, input, 2);
|
||||
res.maximum.x = __shfl_sync(FULL_MASK, input, 3);
|
||||
res.maximum.y = __shfl_sync(FULL_MASK, input, 4);
|
||||
res.maximum.z = __shfl_sync(FULL_MASK, input, 5);
|
||||
|
||||
}
|
||||
|
||||
__device__ inline static
|
||||
void PxsCachedTransform_WriteWarp(PxsCachedTransform* ptr, const PxsCachedTransform& inp)
|
||||
{
|
||||
const PxU32 laneIdx = threadIdx.x & (WARP_SIZE - 1);
|
||||
|
||||
if (laneIdx < 8)
|
||||
{
|
||||
uint input = reinterpret_cast<const uint *>(&inp)[laneIdx];
|
||||
reinterpret_cast<uint *>(ptr)[laneIdx] = input;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ inline static
|
||||
void PxAlignedTransform_WriteWarp(PxAlignedTransform* ptr, const PxAlignedTransform& inp)
|
||||
{
|
||||
const PxU32 laneIdx = threadIdx.x & (WARP_SIZE - 1);
|
||||
|
||||
if (laneIdx < 8)
|
||||
{
|
||||
uint input = reinterpret_cast<const uint *>(&inp)[laneIdx];
|
||||
reinterpret_cast<uint *>(ptr)[laneIdx] = input;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ inline static
|
||||
void PxgShape_ReadWarp(PxgShape& res, const PxgShape* ptr)
|
||||
{
|
||||
const PxU32 laneIdx = threadIdx.x & (WARP_SIZE - 1);
|
||||
|
||||
PxReal input;
|
||||
if (laneIdx < 11 + (sizeof(size_t) == 8) * 1)
|
||||
{
|
||||
input = reinterpret_cast<const PxReal *>(ptr)[laneIdx];
|
||||
}
|
||||
|
||||
res.scale.scale.x = __shfl_sync(FULL_MASK, input, 0);
|
||||
res.scale.scale.y = __shfl_sync(FULL_MASK, input, 1);
|
||||
res.scale.scale.z = __shfl_sync(FULL_MASK, input, 2);
|
||||
res.scale.rotation.x = __shfl_sync(FULL_MASK, input, 3);
|
||||
res.scale.rotation.y = __shfl_sync(FULL_MASK, input, 4);
|
||||
res.scale.rotation.z = __shfl_sync(FULL_MASK, input, 5);
|
||||
res.scale.rotation.w = __shfl_sync(FULL_MASK, input, 6);
|
||||
res.materialIndex = __shfl_sync(FULL_MASK, __float_as_uint(input), 7);
|
||||
|
||||
size_t hullOrMeshPtr = __shfl_sync(FULL_MASK, __float_as_uint(input), 8);
|
||||
|
||||
if (sizeof(size_t) == 8)
|
||||
{
|
||||
const PxU64 hullPtr_hi = __shfl_sync(FULL_MASK, __float_as_uint(input), 9);
|
||||
hullOrMeshPtr |= hullPtr_hi << 32ll;
|
||||
}
|
||||
res.hullOrMeshPtr = hullOrMeshPtr;
|
||||
|
||||
res.type = __shfl_sync(FULL_MASK, __float_as_uint(input), 9 + (sizeof(size_t) == 8) * 1);
|
||||
res.particleOrSoftbodyId = __shfl_sync(FULL_MASK, __float_as_uint(input), 10 + (sizeof(size_t) == 8) * 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Loads a PxgShape pair and corresponding transform cache refs.
|
||||
* Loads ExpectedType0 into shape0. It is assumed that one of the shapes is actually of ExpectedType0.
|
||||
* The function asserts that the shape0 result actually is of type ExpectedType0 and that shape1 is of type
|
||||
* ExpectedType1 if it was specified.
|
||||
*/
|
||||
template <PxGeometryType::Enum ExpectedType0, PxGeometryType::Enum ExpectedType1 = PxGeometryType::eINVALID>
|
||||
PX_FORCE_INLINE __device__ void LoadShapePair(
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
PxU32 cmIdx,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
PxgShape& shape0, //output
|
||||
PxU32& transformCacheRef0, //output
|
||||
PxgShape& shape1, //output
|
||||
PxU32& transformCacheRef1 //output
|
||||
)
|
||||
{
|
||||
PxgContactManagerInput cmInput = cmInputs[cmIdx];
|
||||
const PxgShape* srcShape0 = gpuShapes + cmInput.shapeRef0;
|
||||
const PxgShape* srcShape1 = gpuShapes + cmInput.shapeRef1;
|
||||
PxU32 srcCacheRef0 = cmInput.transformCacheRef0;
|
||||
PxU32 srcCacheRef1 = cmInput.transformCacheRef1;
|
||||
|
||||
if (ExpectedType0 != ExpectedType1)
|
||||
{
|
||||
PxGeometryType::Enum type0 = PxGeometryType::Enum(srcShape0->type);
|
||||
if (type0 != ExpectedType0)
|
||||
{
|
||||
PxSwap(srcShape0, srcShape1);
|
||||
PxSwap(srcCacheRef0, srcCacheRef1);
|
||||
}
|
||||
}
|
||||
|
||||
shape0 = *srcShape0;
|
||||
shape1 = *srcShape1;
|
||||
transformCacheRef0 = srcCacheRef0;
|
||||
transformCacheRef1 = srcCacheRef1;
|
||||
|
||||
assert(shape0.type == ExpectedType0);
|
||||
assert(ExpectedType1 == PxGeometryType::eINVALID || shape1.type == ExpectedType1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Loads a PxgShape pair and corresponding transform cache refs. Full Warp needs to participate.
|
||||
* Loads ExpectedType0 into shape0. It is assumed that one of the shapes is actually of ExpectedType0.
|
||||
* The function asserts that the shape0 result actually is of type ExpectedType0 and that shape1 is of type
|
||||
* ExpectedType1 if it was specified.
|
||||
*/
|
||||
template <PxGeometryType::Enum ExpectedType0, PxGeometryType::Enum ExpectedType1 = PxGeometryType::eINVALID>
|
||||
PX_FORCE_INLINE __device__ void LoadShapePairWarp(
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
PxU32 cmIdx,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
PxgShape& shape0, //output
|
||||
PxU32& transformCacheRef0, //output
|
||||
PxgShape& shape1, //output
|
||||
PxU32& transformCacheRef1 //output
|
||||
)
|
||||
{
|
||||
PxgContactManagerInput cmInput;
|
||||
PxgContactManagerInput_ReadWarp(cmInput, cmInputs, cmIdx);
|
||||
const PxgShape* srcShape0 = gpuShapes + cmInput.shapeRef0;
|
||||
const PxgShape* srcShape1 = gpuShapes + cmInput.shapeRef1;
|
||||
PxU32 srcCacheRef0 = cmInput.transformCacheRef0;
|
||||
PxU32 srcCacheRef1 = cmInput.transformCacheRef1;
|
||||
|
||||
if (ExpectedType0 != ExpectedType1)
|
||||
{
|
||||
PxGeometryType::Enum type0 = PxGeometryType::Enum(srcShape0->type);
|
||||
if (type0 != ExpectedType0)
|
||||
{
|
||||
PxSwap(srcShape0, srcShape1);
|
||||
PxSwap(srcCacheRef0, srcCacheRef1);
|
||||
}
|
||||
}
|
||||
|
||||
PxgShape_ReadWarp(shape0, srcShape0);
|
||||
PxgShape_ReadWarp(shape1, srcShape1);
|
||||
transformCacheRef0 = srcCacheRef0;
|
||||
transformCacheRef1 = srcCacheRef1;
|
||||
|
||||
assert(shape0.type == ExpectedType0);
|
||||
assert(ExpectedType1 == PxGeometryType::eINVALID || shape1.type == ExpectedType1);
|
||||
}
|
||||
|
||||
__device__ inline static
|
||||
void ConvexTriNormalAndIndex_WriteWarp(ConvexTriNormalAndIndex* outp, const ConvexTriNormalAndIndex& inp)
|
||||
{
|
||||
const PxU32 laneIdx = threadIdx.x & (WARP_SIZE - 1);
|
||||
|
||||
if (laneIdx < 4)
|
||||
{
|
||||
uint input = reinterpret_cast<const uint *>(&inp)[laneIdx];
|
||||
reinterpret_cast<uint *>(outp)[laneIdx] = input;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
__device__ inline static
|
||||
void ConvexMeshPair_WriteWarp(ConvexMeshPair* outp, const ConvexMeshPair& inp)
|
||||
{
|
||||
const PxU32 laneIdx = threadIdx.x & (WARP_SIZE - 1);
|
||||
|
||||
if (laneIdx < (sizeof(ConvexMeshPair)/sizeof(uint)))
|
||||
{
|
||||
uint input = reinterpret_cast<const uint *>(&inp)[laneIdx];
|
||||
reinterpret_cast<uint *>(outp)[laneIdx] = input;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
380
engine/third_party/physx/source/gpunarrowphase/src/CUDA/deformableCollision.cuh
vendored
Normal file
380
engine/third_party/physx/source/gpunarrowphase/src/CUDA/deformableCollision.cuh
vendored
Normal file
@@ -0,0 +1,380 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __DEFORMABLE_COLLISION_CUH__
|
||||
#define __DEFORMABLE_COLLISION_CUH__
|
||||
|
||||
#include "foundation/PxVec2.h"
|
||||
#include "foundation/PxVecMath.h"
|
||||
#include "GuDistancePointTriangle.h"
|
||||
#include "PxgFEMCloth.h"
|
||||
|
||||
#define DEFORMABLE_BARYCENTRIC_THRESHOLD (1.0e-6f)
|
||||
#define DEFORMABLE_ONE_MINUS_BARYCENTRIC_THRESHOLD (1.0f - DEFORMABLE_BARYCENTRIC_THRESHOLD)
|
||||
|
||||
PX_FORCE_INLINE __device__ PxBounds3 triBoundingBox(const PxVec3& worldV0, const PxVec3& worldV1, const PxVec3& worldV2)
|
||||
{
|
||||
PxBounds3 triangleBound;
|
||||
|
||||
PxReal tX0 = PxMin(worldV0.x, worldV1.x);
|
||||
PxReal tY0 = PxMin(worldV0.y, worldV1.y);
|
||||
PxReal tZ0 = PxMin(worldV0.z, worldV1.z);
|
||||
|
||||
triangleBound.minimum.x = PxMin(tX0, worldV2.x);
|
||||
triangleBound.minimum.y = PxMin(tY0, worldV2.y);
|
||||
triangleBound.minimum.z = PxMin(tZ0, worldV2.z);
|
||||
|
||||
// compute max
|
||||
tX0 = PxMax(worldV0.x, worldV1.x);
|
||||
tY0 = PxMax(worldV0.y, worldV1.y);
|
||||
tZ0 = PxMax(worldV0.z, worldV1.z);
|
||||
|
||||
triangleBound.maximum.x = PxMax(tX0, worldV2.x);
|
||||
triangleBound.maximum.y = PxMax(tY0, worldV2.y);
|
||||
triangleBound.maximum.z = PxMax(tZ0, worldV2.z);
|
||||
|
||||
return triangleBound;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxBounds3 tetBoundingBox(const PxVec3& worldV0, const PxVec3& worldV1, const PxVec3& worldV2, const PxVec3& worldV3)
|
||||
{
|
||||
PxBounds3 result;
|
||||
|
||||
PxReal tX0 = PxMin(worldV0.x, worldV1.x);
|
||||
PxReal tY0 = PxMin(worldV0.y, worldV1.y);
|
||||
PxReal tZ0 = PxMin(worldV0.z, worldV1.z);
|
||||
|
||||
PxReal tX1 = PxMin(worldV2.x, worldV3.x);
|
||||
PxReal tY1 = PxMin(worldV2.y, worldV3.y);
|
||||
PxReal tZ1 = PxMin(worldV2.z, worldV3.z);
|
||||
|
||||
result.minimum.x = PxMin(tX0, tX1);
|
||||
result.minimum.y = PxMin(tY0, tY1);
|
||||
result.minimum.z = PxMin(tZ0, tZ1);
|
||||
|
||||
// compute max
|
||||
tX0 = PxMax(worldV0.x, worldV1.x);
|
||||
tY0 = PxMax(worldV0.y, worldV1.y);
|
||||
tZ0 = PxMax(worldV0.z, worldV1.z);
|
||||
|
||||
tX1 = PxMax(worldV2.x, worldV3.x);
|
||||
tY1 = PxMax(worldV2.y, worldV3.y);
|
||||
tZ1 = PxMax(worldV2.z, worldV3.z);
|
||||
|
||||
result.maximum.x = PxMax(tX0, tX1);
|
||||
result.maximum.y = PxMax(tY0, tY1);
|
||||
result.maximum.z = PxMax(tZ0, tZ1);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ inline bool computeTetBarycentric(const PxVec3& a, const PxVec3& b, const PxVec3& c, const PxVec3& d, const PxVec3& point,
|
||||
float4& barycentric, const PxReal eps = 1e-5f)
|
||||
{
|
||||
const PxReal minEps = -eps;
|
||||
const PxReal maxEps = 1.f + eps;
|
||||
const PxVec3 ba = b - a;
|
||||
const PxVec3 ca = c - a;
|
||||
const PxVec3 da = d - a;
|
||||
const PxVec3 pa = point - a;
|
||||
|
||||
// PxMat33 bcd(ba, ca, da);
|
||||
// const PxReal detBcd = bcd.getDeterminant();
|
||||
const PxReal detBcd = ba.dot(ca.cross(da));
|
||||
|
||||
/*PxMat33 pcd(pa, ca, da);
|
||||
const PxReal detPcd = pcd.getDeterminant();*/
|
||||
const PxReal detPcd = pa.dot(ca.cross(da));
|
||||
|
||||
const PxReal v = detPcd / detBcd;
|
||||
|
||||
/*PxMat33 bpd(ba, pa, da);
|
||||
const PxReal detBpd = bpd.getDeterminant();*/
|
||||
const PxReal detBpd = ba.dot(pa.cross(da));
|
||||
const PxReal w = detBpd / detBcd;
|
||||
|
||||
/*PxMat33 bcp(ba, ca, pa);
|
||||
const PxReal detBcp = bcp.getDeterminant();*/
|
||||
const PxReal detBcp = ba.dot(ca.cross(pa));
|
||||
|
||||
const PxReal x = detBcp / detBcd;
|
||||
|
||||
const PxReal u = 1.f - v - w - x;
|
||||
barycentric = make_float4(u, v, w, x);
|
||||
|
||||
// After clamping the barycentrics might not sum up to 1 anymore
|
||||
// Clamping should be done because otherwise slightly negative barycentrics can lead to a negative inverse mass, e. g.
|
||||
// when 3 vertices of a tetrahedron have inverse mass zero and the forth vertex happens to get a negative barycentric coordinate
|
||||
barycentric.x = PxClamp(barycentric.x, 0.0f, 1.0f);
|
||||
barycentric.y = PxClamp(barycentric.y, 0.0f, 1.0f);
|
||||
barycentric.z = PxClamp(barycentric.z, 0.0f, 1.0f);
|
||||
barycentric.w = PxClamp(barycentric.w, 0.0f, 1.0f);
|
||||
|
||||
return (u >= minEps && u <= maxEps) && (v >= minEps && v <= maxEps) && (w >= minEps && w <= maxEps) && (x >= minEps && x <= maxEps);
|
||||
}
|
||||
|
||||
__device__ inline bool computeTetBarycentric(PxVec3* verts, const PxVec3& point, float4& barycentric, const float eps = 1e-5f)
|
||||
{
|
||||
return computeTetBarycentric(verts[0], verts[1], verts[2], verts[3], point, barycentric, eps);
|
||||
}
|
||||
|
||||
__device__ inline PxVec3 closestPtPointTriangle(const PxVec3& p, const PxVec3& a, const PxVec3& b, const PxVec3& c, float& s, float& t)
|
||||
{
|
||||
// Check if P in vertex region outside A
|
||||
const PxVec3 ab = b - a;
|
||||
const PxVec3 ac = c - a;
|
||||
const PxVec3 ap = p - a;
|
||||
|
||||
const float d1 = ab.dot(ap);
|
||||
const float d2 = ac.dot(ap);
|
||||
if(d1 <= 0.0f && d2 <= 0.0f)
|
||||
{
|
||||
s = 0.0f;
|
||||
t = 0.0f;
|
||||
return a; // Barycentric coords 1,0,0
|
||||
}
|
||||
|
||||
// Check if P in vertex region outside B
|
||||
const PxVec3 bp = p - b;
|
||||
const float d3 = ab.dot(bp);
|
||||
const float d4 = ac.dot(bp);
|
||||
if(d3 >= 0.0f && d4 <= d3)
|
||||
{
|
||||
s = 1.0f;
|
||||
t = 0.0f;
|
||||
return b; // Barycentric coords 0,1,0
|
||||
}
|
||||
|
||||
// Check if P in edge region of AB, if so return projection of P onto AB
|
||||
const float vc = d1 * d4 - d3 * d2;
|
||||
if(vc <= 0.0f && d1 >= 0.0f && d3 <= 0.0f)
|
||||
{
|
||||
const float v = d1 / (d1 - d3);
|
||||
s = v;
|
||||
t = 0.0f;
|
||||
return a + v * ab; // barycentric coords (1-v, v, 0)
|
||||
}
|
||||
|
||||
// Check if P in vertex region outside C
|
||||
const PxVec3 cp = p - c;
|
||||
const float d5 = ab.dot(cp);
|
||||
const float d6 = ac.dot(cp);
|
||||
if(d6 >= 0.0f && d5 <= d6)
|
||||
{
|
||||
s = 0.0f;
|
||||
t = 1.0f;
|
||||
return c; // Barycentric coords 0,0,1
|
||||
}
|
||||
|
||||
// Check if P in edge region of AC, if so return projection of P onto AC
|
||||
const float vb = d5 * d2 - d1 * d6;
|
||||
if(vb <= 0.0f && d2 >= 0.0f && d6 <= 0.0f)
|
||||
{
|
||||
const float w = d2 / (d2 - d6);
|
||||
s = 0.0f;
|
||||
t = w;
|
||||
return a + w * ac; // barycentric coords (1-w, 0, w)
|
||||
}
|
||||
|
||||
// Check if P in edge region of BC, if so return projection of P onto BC
|
||||
const float va = d3 * d6 - d5 * d4;
|
||||
if(va <= 0.0f && (d4 - d3) >= 0.0f && (d5 - d6) >= 0.0f)
|
||||
{
|
||||
const float w = (d4 - d3) / ((d4 - d3) + (d5 - d6));
|
||||
s = 1.0f - w;
|
||||
t = w;
|
||||
return b + w * (c - b); // barycentric coords (0, 1-w, w)
|
||||
}
|
||||
|
||||
// P inside face region. Compute Q through its barycentric coords (u,v,w)
|
||||
const float denom = 1.0f / (va + vb + vc);
|
||||
const float v = vb * denom;
|
||||
const float w = vc * denom;
|
||||
s = v;
|
||||
t = w;
|
||||
return a + ab * v + ac * w;
|
||||
}
|
||||
|
||||
__device__ static PxReal distancePointTriangleSquared(const PxVec3 p, const PxVec3 a, const PxVec3 b, const PxVec3 c, PxVec3& closestP)
|
||||
{
|
||||
const PxVec3 ab = b - a;
|
||||
const PxVec3 ac = c - a;
|
||||
closestP = Gu::closestPtPointTriangle2(p, a, b, c, ab, ac);
|
||||
return (p - closestP).magnitudeSquared();
|
||||
}
|
||||
|
||||
__device__ static void closestPtPointEdge(const PxVec3& v0, const PxVec3& ev0, const PxVec3& ev1, PxReal& w)
|
||||
{
|
||||
const PxVec3 r = ev1 - ev0;
|
||||
const PxReal d2 = r.dot(r);
|
||||
|
||||
w = (d2 > 0.0f) ? PxClamp(r.dot(v0 - ev0) / d2, 0.0f, 1.0f) : 0.5f;
|
||||
}
|
||||
|
||||
// Computes the closest points on two infinite lines.
|
||||
// Returns false if the edges are parallel or degenerate.
|
||||
// Outputs:
|
||||
// - s, t: parametric positions along edge0 and edge1 (can be negative for infinite lines)
|
||||
// - distSq: squared distance between the two closest points
|
||||
PX_FORCE_INLINE __device__ static bool closestPtLineLine(const PxVec3& e0v0, const PxVec3& e0v1, const PxVec3& e1v0, const PxVec3& e1v1,
|
||||
PxReal& s, PxReal& t, PxReal& distSq)
|
||||
{
|
||||
const PxVec3 r0 = e0v1 - e0v0; // Edge0 direction
|
||||
const PxVec3 r1 = e1v1 - e1v0; // Edge1 direction
|
||||
const PxVec3 d = e1v0 - e0v0; // Displacement between starting points
|
||||
|
||||
const PxReal r0Len2 = r0.magnitudeSquared();
|
||||
const PxReal r1Len2 = r1.magnitudeSquared();
|
||||
|
||||
// Handle degenerate edges: If either edge has zero length, return midpoints
|
||||
if(r0Len2 < 1.0e-18f | r1Len2 < 1.0e-18f)
|
||||
{
|
||||
s = t = 0.0f;
|
||||
distSq = d.magnitudeSquared();
|
||||
return false;
|
||||
}
|
||||
|
||||
const PxReal r0Len = PxSqrt(r0Len2);
|
||||
const PxReal r1Len = PxSqrt(r1Len2);
|
||||
|
||||
const PxVec3 u0 = r0 / r0Len;
|
||||
const PxVec3 u1 = r1 / r1Len;
|
||||
|
||||
const PxReal cosTheta = PxAbs(u0.dot(u1));
|
||||
|
||||
// Edges are nearly parallel (angle < 0.81).
|
||||
if(cosTheta > 0.9999f)
|
||||
{
|
||||
// Pick point e0v0 on edge0 and project to edge1
|
||||
PxReal tProj = (e0v0 - e1v0).dot(r1) / r1Len2;
|
||||
t = tProj;
|
||||
s = 0.0f;
|
||||
|
||||
PxVec3 pt0 = e0v0;
|
||||
PxVec3 pt1 = e1v0 + r1 * tProj;
|
||||
distSq = (pt1 - pt0).magnitudeSquared();
|
||||
return false;
|
||||
}
|
||||
|
||||
// PxReal a00 = 1.0f;
|
||||
// PxReal a11 = 1.0f;
|
||||
|
||||
PxReal a01 = -u0.dot(u1);
|
||||
PxReal b0 = u0.dot(d) / r0Len;
|
||||
PxReal b1 = -u1.dot(d) / r1Len;
|
||||
|
||||
PxReal det = 1.0f - a01 * a01;
|
||||
|
||||
if(det < 1.0e-9f)
|
||||
{
|
||||
s = t = 0.0f;
|
||||
distSq = d.magnitudeSquared();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Compute actual closest points
|
||||
const PxReal invDet = 1.0f / det;
|
||||
s = (b0 - b1 * a01) * invDet;
|
||||
t = (b1 - a01 * b0) * invDet;
|
||||
|
||||
const PxVec3 pt0 = e0v0 + s * r0;
|
||||
const PxVec3 pt1 = e1v0 + t * r1;
|
||||
distSq = (pt1 - pt0).magnitudeSquared();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Computes the closest points on two edges (finite edges)
|
||||
PX_FORCE_INLINE __device__ static bool closestPtEdgeEdge(const PxVec3& e0v0, const PxVec3& e0v1, const PxVec3& e1v0, const PxVec3& e1v1,
|
||||
PxReal& s, PxReal& t, PxReal& distSq)
|
||||
{
|
||||
if(!closestPtLineLine(e0v0, e0v1, e1v0, e1v1, s, t, distSq))
|
||||
{
|
||||
return false; // Degenerate edges
|
||||
}
|
||||
|
||||
// Falls back to point edge distance.
|
||||
if(s<0.0f | s> 1.0f | t<0.0f | t> 1.0f)
|
||||
{
|
||||
PxReal w[4];
|
||||
closestPtPointEdge(e0v0, e1v0, e1v1, w[0]);
|
||||
closestPtPointEdge(e0v1, e1v0, e1v1, w[1]);
|
||||
closestPtPointEdge(e1v0, e0v0, e0v1, w[2]);
|
||||
closestPtPointEdge(e1v1, e0v0, e0v1, w[3]);
|
||||
|
||||
// Candidates for closest point selection
|
||||
const PxReal candidates[4][2] = { { 0.0f, w[0] }, { 1.0f, w[1] }, { w[2], 0.0f }, { w[3], 1.0f } };
|
||||
|
||||
PxI32 minIndex = 0;
|
||||
distSq = PX_MAX_REAL;
|
||||
|
||||
#pragma unroll
|
||||
// Compare the four closest points and select the minimum distance pair.
|
||||
for(int i = 0; i < 4; ++i)
|
||||
{
|
||||
const PxReal sC = candidates[i][0];
|
||||
const PxReal tC = candidates[i][1];
|
||||
const PxVec3 p0 = e0v0 * (1.0f - sC) + e0v1 * sC;
|
||||
const PxVec3 p1 = e1v0 * (1.0f - tC) + e1v1 * tC;
|
||||
const PxReal d2 = (p0 - p1).magnitudeSquared();
|
||||
minIndex = (d2 < distSq) ? i : minIndex;
|
||||
distSq = PxMin(d2, distSq);
|
||||
}
|
||||
|
||||
// Assign the best closest point.
|
||||
assert(minIndex != -1);
|
||||
s = candidates[minIndex][0];
|
||||
t = candidates[minIndex][1];
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Type0: minimal triangle set covering all edges and vertices (compact encoding)
|
||||
// Type1: more uniformly distributed edges and vertices across triangles (balanced encoding)
|
||||
PX_FORCE_INLINE __device__ static bool isType0EdgeActive(PxU32 edgeAuthorship, PxU32 localEdgeIndex)
|
||||
{
|
||||
return (edgeAuthorship & (1U << (EdgeEncoding::TYPE0_EDGE_BASE_POS + localEdgeIndex))) != 0;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ static bool isType1EdgeActive(PxU32 edgeAuthorship, PxU32 localEdgeIndex)
|
||||
{
|
||||
return (edgeAuthorship & (1U << (EdgeEncoding::TYPE1_EDGE_BASE_POS + localEdgeIndex))) != 0;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ static bool isType0VertexActive(PxU32 edgeAuthorship, PxU32 localVertexIndex)
|
||||
{
|
||||
return (edgeAuthorship & (1U << (EdgeEncoding::TYPE0_VERTEX0_ACTIVE_POS + localVertexIndex))) != 0;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ static bool isType1VertexActive(PxU32 edgeAuthorship, PxU32 localVertexIndex)
|
||||
{
|
||||
return (edgeAuthorship & (1U << (EdgeEncoding::TYPE1_VERTEX0_ACTIVE_POS + localVertexIndex))) != 0;
|
||||
}
|
||||
|
||||
#endif // __DEFORMABLE_COLLISION_CUH__
|
||||
180
engine/third_party/physx/source/gpunarrowphase/src/CUDA/deformableElementFilter.cuh
vendored
Normal file
180
engine/third_party/physx/source/gpunarrowphase/src/CUDA/deformableElementFilter.cuh
vendored
Normal file
@@ -0,0 +1,180 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __DEFORMABLE_ELEMENT_FILTER_CUH__
|
||||
#define __DEFORMABLE_ELEMENT_FILTER_CUH__
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "PxFiltering.h"
|
||||
#include "PxgSimulationCoreDesc.h"
|
||||
#include "PxgParticleSystem.h"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
static __device__ bool find(PxParticleRigidFilterPair* pairs, const PxU32 nbPairs, PxParticleRigidFilterPair& pair)
|
||||
{
|
||||
//Binary search...
|
||||
PxU32 l = 0, r = nbPairs;
|
||||
|
||||
while (l < r)
|
||||
{
|
||||
PxU32 mid = (l + r) / 2;
|
||||
const PxParticleRigidFilterPair& p = pairs[mid];
|
||||
|
||||
if (pair == p)
|
||||
return true;
|
||||
else if (pair < p)
|
||||
r = mid;
|
||||
else
|
||||
l = mid + 1;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static __device__ bool find(PxParticleRigidFilterPair* pairs, const PxU32 nbPairs, PxU64 rigidId, PxU32 particleId)
|
||||
{
|
||||
PxParticleRigidFilterPair pair;
|
||||
pair.mID0 = rigidId;
|
||||
pair.mID1 = particleId;
|
||||
return find(pairs, nbPairs, pair);
|
||||
}
|
||||
|
||||
static __device__ bool find(const PxgRigidFilterPair* pairs, const PxU32 nbPairs, PxgRigidFilterPair& pair, bool ignoreIndex2)
|
||||
{
|
||||
//Binary search...
|
||||
PxU32 l = 0, r = nbPairs;
|
||||
|
||||
while (l < r)
|
||||
{
|
||||
PxU32 mid = (l + r) / 2;
|
||||
const PxgRigidFilterPair& p = pairs[mid];
|
||||
PxI32 cmp = pair.compare(p);
|
||||
|
||||
if (cmp == 0 || (ignoreIndex2 && pair.index0 == p.index0 && pair.index1 == p.index1))
|
||||
return true;
|
||||
if (cmp < 0)
|
||||
r = mid;
|
||||
else
|
||||
l = mid + 1;
|
||||
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static __device__ bool find(const PxgRigidFilterPair* pairs, const PxU32 nbPairs, PxU64 rigidId, PxU32 compressedParticleId)
|
||||
{
|
||||
PxgRigidFilterPair pair;
|
||||
pair.index0 = rigidId;
|
||||
pair.index1 = compressedParticleId;
|
||||
pair.index2 = 0;
|
||||
return find(pairs, nbPairs, pair, true);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static __device__ PxU32 findRange(PxU32 value, PxU32* values, PxU32 length)
|
||||
{
|
||||
if (length == 0)
|
||||
return 0;
|
||||
|
||||
PxU32 l = 0, r = length;
|
||||
while (l < r)
|
||||
{
|
||||
PxU32 m = (l + r) / 2;
|
||||
if (values[m] > value)
|
||||
r = m;
|
||||
else
|
||||
l = m + 1;
|
||||
|
||||
}
|
||||
return r-1;
|
||||
}
|
||||
|
||||
static __device__ bool find(PxgParticleSystem& particleSystem, PxU64 rigidId, PxU64 compressedParticleId)
|
||||
{
|
||||
// this is the ID into the flat uber buffer
|
||||
PxU32 particleId = PxGetParticleIndex(compressedParticleId);
|
||||
|
||||
PxU32 id = findRange(particleId, particleSystem.mParticleBufferRunsum, particleSystem.mCommonData.mNumParticleBuffers);
|
||||
|
||||
PxgParticleSimBuffer& particleBuffer = particleSystem.mParticleSimBuffers[id];
|
||||
return find(particleBuffer.mFilterPairs, particleBuffer.mNumFilterPairs, rigidId, particleId - particleSystem.mParticleBufferRunsum[id]);
|
||||
}
|
||||
|
||||
static __device__ bool find(const PxgNonRigidFilterPair* pairs, const PxU32 nbPairs, PxgNonRigidFilterPair& pair, bool ignoreIndex2)
|
||||
{
|
||||
//Binary search...
|
||||
PxU32 l = 0, r = nbPairs;
|
||||
|
||||
while (l < r)
|
||||
{
|
||||
PxU32 mid = (l + r) / 2;
|
||||
const PxgNonRigidFilterPair& p = pairs[mid];
|
||||
PxI32 cmp = pair.compare(p);
|
||||
|
||||
if (cmp == 0 || (ignoreIndex2 && pair.index0 == p.index0 && pair.index1 == p.index1))
|
||||
return true;
|
||||
if (cmp < 0)
|
||||
r = mid;
|
||||
else
|
||||
l = mid + 1;
|
||||
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static __device__ bool find(const PxgNonRigidFilterPair* pairs, const PxU32 nbPairs, const PxU32 compressedId0, const PxU32 compressedId1)
|
||||
{
|
||||
PxgNonRigidFilterPair pair;
|
||||
pair.index0 = compressedId0;
|
||||
pair.index1 = compressedId1;
|
||||
pair.index2 = 0;
|
||||
return find(pairs, nbPairs, pair, true);
|
||||
}
|
||||
|
||||
static __device__ bool find(const PxgNonRigidFilterPair* pairs, const PxU32 nbPairs, const PxU32 compressedId0, const PxU32 compressedId1, PxU32 uniqueParticleUserBufferId)
|
||||
{
|
||||
PxgNonRigidFilterPair pair;
|
||||
pair.index0 = compressedId0;
|
||||
pair.index1 = compressedId1;
|
||||
pair.index2 = uniqueParticleUserBufferId;
|
||||
|
||||
return find(pairs, nbPairs, pair, false);
|
||||
}
|
||||
|
||||
static __device__ bool find(const PxgParticleSystem& particleSystem, const PxgNonRigidFilterPair* pairs, const PxU32 nbPairs, const PxU32 compressedParticleId, const PxU32 compressedId1)
|
||||
{
|
||||
PxU32 particleId = PxGetParticleIndex(compressedParticleId);
|
||||
|
||||
PxU32 id = findRange(particleId, particleSystem.mParticleBufferRunsum, particleSystem.mCommonData.mNumParticleBuffers);
|
||||
|
||||
PxgParticleSimBuffer& particleBuffer = particleSystem.mParticleSimBuffers[id];
|
||||
return find(pairs, nbPairs, compressedParticleId, compressedId1, particleBuffer.mUniqueId);
|
||||
}
|
||||
|
||||
#endif // __DEFORMABLE_ELEMENT_FILTER_CUH__
|
||||
90
engine/third_party/physx/source/gpunarrowphase/src/CUDA/distanceSegmentSegment.cuh
vendored
Normal file
90
engine/third_party/physx/source/gpunarrowphase/src/CUDA/distanceSegmentSegment.cuh
vendored
Normal file
@@ -0,0 +1,90 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __DIS_SEG_SEG_CUH__
|
||||
#define __DIS_SEG_SEG_CUH__
|
||||
|
||||
__device__ __forceinline__ static
|
||||
void distanceSegmentSegmentSquared(
|
||||
const PxVec3& p1, const PxVec3& d1,
|
||||
const PxVec3& p2, const PxVec3& d2,
|
||||
PxReal& s, PxReal& t)
|
||||
{
|
||||
//const FloatV zero = FZero();
|
||||
//const FloatV one = FOne();
|
||||
//const FloatV eps = FEps();
|
||||
|
||||
const PxReal eps = 1e-6f;
|
||||
const PxVec3 r = p1 - p2;
|
||||
const PxReal a = d1.dot(d1);
|
||||
const PxReal e = d2.dot(d2);
|
||||
const PxReal b = d1.dot(d2);
|
||||
const PxReal c = d1.dot(r);
|
||||
const PxReal aRecip = a > eps ? (1.f / a) : 0.f;
|
||||
const PxReal eRecip = e > eps ? (1.f / e) : 0.f;
|
||||
const PxReal f = d2.dot(r);
|
||||
|
||||
/*
|
||||
s = (b*f - c*e)/(a*e - b*b);
|
||||
t = (a*f - b*c)/(a*e - b*b);
|
||||
|
||||
s = (b*t - c)/a;
|
||||
t = (b*s + f)/e;
|
||||
*/
|
||||
|
||||
//if segments not parallel, the general non-degenerated case, compute closest point on two segments and clamp to segment1
|
||||
const PxReal denom = a * e - b * b;
|
||||
const PxReal temp = b * f - c * e;
|
||||
const PxReal s0 = PxClamp(temp / denom, 0.f, 1.f);
|
||||
|
||||
//if segment is parallel, demon < eps
|
||||
//const bool con2 = eps > denom;
|
||||
const PxReal sTmp = eps > denom ? 0.5f : s0;
|
||||
//const BoolV con2 = FIsGrtr(eps, denom);//FIsEq(denom, zero);
|
||||
//const FloatV sTmp = FSel(con2, FHalf(), s0);
|
||||
|
||||
//compute point on segment2 closest to segment1
|
||||
const PxReal tTmp = (b * sTmp + f) * eRecip;
|
||||
//const FloatV tTmp = FMul(FScaleAdd(b, sTmp, f), eRecip);
|
||||
|
||||
//if t is in [zero, one], done. otherwise clamp t
|
||||
const PxReal t2 = PxClamp(tTmp, 0.f, 1.f);
|
||||
//const FloatV t2 = FClamp(tTmp, zero, one);
|
||||
|
||||
//recompute s for the new value
|
||||
const PxReal comp = (b * t2 - c) * aRecip;
|
||||
const PxReal s2 = PxClamp(comp, 0.f, 1.f);
|
||||
//const FloatV comp = FMul(FSub(FMul(b, t2), c), aRecip);
|
||||
//const FloatV s2 = FClamp(comp, zero, one);
|
||||
|
||||
s = s2;
|
||||
t = t2;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
530
engine/third_party/physx/source/gpunarrowphase/src/CUDA/epa.cuh
vendored
Normal file
530
engine/third_party/physx/source/gpunarrowphase/src/CUDA/epa.cuh
vendored
Normal file
@@ -0,0 +1,530 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __EPA_CUH__
|
||||
#define __EPA_CUH__
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "nputils.cuh"
|
||||
#include "schlockShared.h"
|
||||
|
||||
namespace squawk
|
||||
{
|
||||
|
||||
using namespace physx;
|
||||
|
||||
struct EpaScratch
|
||||
{
|
||||
PxVec3 vertices[schlock::EPA_MAX_VERTS];
|
||||
PxVec3 bestDir;
|
||||
PxReal bestDist;
|
||||
PxU16 indices[schlock::EPA_MAX_VERTS];
|
||||
PxU8 loop[schlock::EPA_MAX_VERTS];
|
||||
PxU8 edges[schlock::EPA_MAX_VERTS];
|
||||
PxU8 next[schlock::EPA_MAX_VERTS];
|
||||
};
|
||||
|
||||
|
||||
__constant__ __device__ schlock::Vec3I8 sStartVertices4[4] = { MAKE_Vec3I8(0,1,2), MAKE_Vec3I8(3,1,0), MAKE_Vec3I8(3,2,1), MAKE_Vec3I8(3,0,2) };
|
||||
__constant__ __device__ schlock::Vec3I8 sStartAdjacencies4[4] = { MAKE_Vec3I8(2,3,1), MAKE_Vec3I8(0,3,2), MAKE_Vec3I8(0,1,3), MAKE_Vec3I8(0,2,1) };
|
||||
|
||||
__constant__ __device__ schlock::Vec3I8 sStartVertices3[2] = { MAKE_Vec3I8(0,1,2), MAKE_Vec3I8(1,0,2) };
|
||||
__constant__ __device__ schlock::Vec3I8 sStartAdjacency3[2] = { MAKE_Vec3I8(1,1,1), MAKE_Vec3I8(0,0,0) };
|
||||
|
||||
|
||||
|
||||
typedef int VMapType;
|
||||
|
||||
__device__ inline bool replaceTriangle(EpaScratch& epaS, int nbVertices, int triIndex,
|
||||
PxReal lowerBound, PxVec3& normal, PxReal& distance,
|
||||
schlock::Vec3I8& v, schlock::Vec3I8& a, int& liveB, const PxReal releps)
|
||||
{
|
||||
// to make it easier to keep track, we use a 'B' suffix for a bitmap where each bit represents a lane (and therefore a triangle)
|
||||
int tI = threadIdx.x, laneB = 1<<tI;
|
||||
//PxVec3 apex = ldS(S.vertices[nbVertices-1]);
|
||||
PxVec3 apex = epaS.vertices[nbVertices - 1];
|
||||
|
||||
// triangles that are silhouetted by the new vertex are threatened, but not yet killed
|
||||
bool threatened = liveB&laneB && (normal.dot(apex) - distance >= /*-*/releps);
|
||||
schlock::EPABitMap threatenedB = __ballot_sync(FULL_MASK, threatened);
|
||||
|
||||
if (threatenedB == 0 || threatenedB == liveB || !(threatenedB & (1<<triIndex)))
|
||||
{
|
||||
#if EPA_LOG_DEGENERATE_CASES
|
||||
if(threadIdx.x == 0)
|
||||
printf("degenrate: threatenedB %d liveB %d < tri %d \n", threatenedB, liveB, tri);
|
||||
#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// the threatened set is not guaranteed connected, so flood fill to find killB, a bitmap of tris to remove
|
||||
schlock::EPABitMap b0B = 1<<a.get(0), b1B = 1<<a.get(1), b2B = 1<<a.get(2);
|
||||
// nbr = threatened neighbors of threatened triangles
|
||||
schlock::EPABitMap killB = 1<<triIndex, nbr = (b0B | b1B | b2B) & (threatened?threatenedB:0);
|
||||
|
||||
// flood the threatened triangles to find the killed ones
|
||||
//for(schlock::EPABitMap pB = 0; pB != killB; pB = killB, killB |= __ballot(nbr&killB));
|
||||
|
||||
for (schlock::EPABitMap pB = 0; pB != killB;)
|
||||
{
|
||||
pB = killB;
|
||||
killB |= __ballot_sync(FULL_MASK, nbr&killB);
|
||||
}
|
||||
|
||||
//every triangles aren't alive and the triangles we want to kill in this iteration
|
||||
schlock::EPABitMap deadB = (~liveB) | killB;
|
||||
|
||||
// d0, d1, d2 denote for each triangle whether the corresponding edge is a silhouette edge
|
||||
const int alive = laneB&(~deadB);
|
||||
const int d0 = alive && (b0B&deadB);
|
||||
const int d1 = alive && (b1B&deadB);
|
||||
const int d2 = alive && (b2B&deadB);
|
||||
|
||||
// write the silhouette edges into the loop buffer
|
||||
int v0 = v.get(0), v1 = v.get(1), v2 = v.get(2), z = 0;
|
||||
|
||||
assert(!alive || v0 < schlock::EPA_MAX_VERTS);
|
||||
assert(!alive || v1 < schlock::EPA_MAX_VERTS);
|
||||
assert(!alive || v2 < schlock::EPA_MAX_VERTS);
|
||||
|
||||
// S.edges is the adjacency for the new cone tri on this edge, and
|
||||
// also where the cone triangle will write back its index for later fixup
|
||||
if (d0)
|
||||
z = v2, epaS.next[v2] = v1, epaS.edges[v2] = tI;
|
||||
if (d1)
|
||||
z = v0, epaS.next[v0] = v2, epaS.edges[v0] = tI;
|
||||
if (d2)
|
||||
z = v1, epaS.next[v1] = v0, epaS.edges[v1] = tI;
|
||||
|
||||
int d0d1d2B = __ballot_sync(FULL_MASK, d0|d1|d2);
|
||||
|
||||
if (!d0d1d2B)
|
||||
{
|
||||
#if EPA_LOG_DEGENERATE_CASES
|
||||
if (threadIdx.x == 0)
|
||||
printf("degenrate: d0d1d2B %d b0B %d b1B %d b2B %d deadB %d \n", d0d1d2B, b0B, b1B, b2B, deadB);
|
||||
#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
int loopStart = __shfl_sync(FULL_MASK, z, lowestSetIndex(d0d1d2B)); // pick a loop vert from the lowest indexed triangle that has one
|
||||
int nbLoop = 0;
|
||||
|
||||
__syncwarp();
|
||||
|
||||
if(tI == 0)
|
||||
{
|
||||
assert(loopStart < schlock::EPA_MAX_VERTS);
|
||||
|
||||
epaS.loop[nbLoop++] = loopStart; // flatten the linked list into a vertex array
|
||||
for(int vertex = epaS.next[loopStart]; vertex!=loopStart; vertex = epaS.next[vertex])
|
||||
{
|
||||
if(vertex >= schlock::EPA_MAX_VERTS)
|
||||
{
|
||||
#if EPA_LOG_DEGENERATE_CASES
|
||||
printf("degenrate: vertex %d\n", vertex);
|
||||
#endif
|
||||
|
||||
nbLoop = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
if(nbLoop >= schlock::EPA_MAX_VERTS - 1)
|
||||
{
|
||||
#if EPA_LOG_DEGENERATE_CASES
|
||||
printf("degenrate: nbLoop %d\n", nbLoop);
|
||||
#endif
|
||||
|
||||
nbLoop = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
epaS.loop[nbLoop++] = vertex;
|
||||
}
|
||||
|
||||
#if EPA_REPLACE_TRIANGLE_LOGGING
|
||||
for(int i=0;i<nbLoop;i++)
|
||||
printf("%d, ", S.loop[i]);
|
||||
printf("(starting at %d)\n",loopStart);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
nbLoop = __shfl_sync(FULL_MASK, nbLoop, 0);
|
||||
|
||||
if(nbLoop != __popc(__ballot_sync(FULL_MASK, d0)) + __popc(__ballot_sync(FULL_MASK, d1)) + __popc(__ballot_sync(FULL_MASK, d2)))
|
||||
{
|
||||
#if EPA_LOG_DEGENERATE_CASES
|
||||
{
|
||||
printf("nbLoop %d __ballot(d0) %d __ballot(d1) %d __ballot(d2) %d tid %d", nbLoop, __ballot(d0), __ballot(d1), __ballot(d2), threadIdx.x);
|
||||
}
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
__syncwarp();
|
||||
|
||||
// make the cone with the nbLoop least non-survivors
|
||||
int coneIndex = __popc(deadB&(laneB-1)), coneB = __ballot_sync(FULL_MASK, deadB&laneB && coneIndex < nbLoop);
|
||||
bool fail = false;
|
||||
if(coneB & laneB)
|
||||
{
|
||||
assert(coneIndex < nbLoop);
|
||||
assert(nbLoop < schlock::EPA_MAX_VERTS);
|
||||
|
||||
int v0 = nbVertices-1, v1 = epaS.loop[coneIndex], v2 = epaS.loop[coneIndex+1 == nbLoop ? 0 : coneIndex+1];
|
||||
v.setInt(MAKE_Vec3I8(v0, v1, v2));
|
||||
|
||||
assert(v0 < schlock::EPA_MAX_VERTS);
|
||||
assert(v1 < schlock::EPA_MAX_VERTS);
|
||||
assert(v2 < schlock::EPA_MAX_VERTS);
|
||||
|
||||
a.set(0, epaS.edges[v1]);
|
||||
a.set(1, lowestSetIndex(coneB & ~(coneIndex+1 == nbLoop ? 0 : 2*laneB-1))); // previous bit in the active mask, wrapping around
|
||||
a.set(2, highestSetIndex(coneB & ((coneIndex==0 ? 0 : laneB)-1))); // next bit in the active mask, wrapping around
|
||||
|
||||
|
||||
epaS.edges[v1] = tI; // write our index back into the buffer for later fixup
|
||||
|
||||
//PxVec3 p0 = ldS(S.vertices[v0]), p1 = ldS(S.vertices[v1]), p2 = ldS(S.vertices[v2]);
|
||||
PxVec3 p0 = epaS.vertices[v0], p1 = epaS.vertices[v1], p2 = epaS.vertices[v2];
|
||||
PxVec3 e01 = p1 - p0, e02 = p2 - p0;
|
||||
PxVec3 n;
|
||||
|
||||
if (e01.magnitudeSquared() < e02.magnitudeSquared())
|
||||
{
|
||||
n = e01.cross(p2 - p1);
|
||||
}
|
||||
else
|
||||
{
|
||||
n = e02.cross(p2 - p1);
|
||||
}
|
||||
|
||||
PxReal nm = n.magnitude();
|
||||
normal = n/nm;
|
||||
distance = normal.dot(p0);
|
||||
/*fail = nm == 0 || (lowerBound > 0.0f && distance < lowerBound);
|
||||
if (fail)
|
||||
{
|
||||
printf("fail nm %f\ lowerBound %f distance %f \n", nm, lowerBound, distance);
|
||||
}*/
|
||||
fail = nm == 0 || (lowerBound > 0.0f && (distance - lowerBound) < -1e-4f);
|
||||
}
|
||||
|
||||
__syncwarp();
|
||||
|
||||
// ... to fix up the silhouette adjacency with the new tris
|
||||
if(d0) a.set(0, epaS.edges[v.get(2)]);
|
||||
if(d1) a.set(1, epaS.edges[v.get(0)]);
|
||||
if(d2) a.set(2, epaS.edges[v.get(1)]);
|
||||
|
||||
liveB = (liveB & ~deadB) | coneB;
|
||||
return __ballot_sync(FULL_MASK, fail) == 0;
|
||||
}
|
||||
|
||||
|
||||
__device__ inline void epaStart4(EpaScratch& S,
|
||||
const schlock::GjkCachedData* data,
|
||||
schlock::Vec3I8& a, schlock::Vec3I8& v,
|
||||
PxVec3& normal,
|
||||
PxReal& distance)
|
||||
{
|
||||
int tI = threadIdx.x;
|
||||
PxReal input = reinterpret_cast<const PxReal*>(data)[tI];
|
||||
if(tI<16)
|
||||
{
|
||||
if((tI&3)<3)
|
||||
(reinterpret_cast<volatile PxReal*>(S.vertices))[tI-tI/4] = input;
|
||||
else
|
||||
S.indices[tI>>2] = __float_as_int(input);
|
||||
}
|
||||
|
||||
PxReal d = dot(input, input), m = __shfl_sync(FULL_MASK, 1/sqrt(d), tI*4+16);
|
||||
|
||||
normal.x = __shfl_sync(FULL_MASK, input, tI*4+16) * m;
|
||||
normal.y = __shfl_sync(FULL_MASK, input, tI*4+17) * m;
|
||||
normal.z = __shfl_sync(FULL_MASK, input, tI*4+18) * m;
|
||||
distance = __shfl_sync(FULL_MASK, input, tI*4+19) * m;
|
||||
if(tI<4)
|
||||
a = sStartAdjacencies4[tI], v = sStartVertices4[tI];
|
||||
}
|
||||
|
||||
|
||||
__device__ inline PxU8 supportIndex(const PxVec3* v, PxU8 nbVertices, const PxVec3& dir)
|
||||
{
|
||||
int bestN = 0xFFFFFFFF;
|
||||
PxReal bestDist = -FLT_MAX;
|
||||
for(PxU32 i=0;i<nbVertices;i+=32)
|
||||
{
|
||||
int n = i+threadIdx.x;
|
||||
PxReal d = n < nbVertices ? dir.dot(v[n]) : -FLT_MAX, e;
|
||||
|
||||
e = fmaxf(d, __shfl_xor_sync(FULL_MASK, d, 16));
|
||||
e = fmaxf(e, __shfl_xor_sync(FULL_MASK, e, 8));
|
||||
e = fmaxf(e, __shfl_xor_sync(FULL_MASK, e, 4));
|
||||
e = fmaxf(e, __shfl_xor_sync(FULL_MASK, e, 2));
|
||||
e = fmaxf(e, __shfl_xor_sync(FULL_MASK, e, 1));
|
||||
|
||||
if(e>bestDist)
|
||||
bestDist = e, bestN = i+__ffs(__ballot_sync(FULL_MASK, e == d))-1;
|
||||
}
|
||||
|
||||
assert(bestN < nbVertices);
|
||||
return PxU8(bestN);
|
||||
}
|
||||
|
||||
// find a new support point in either the positive or negative direction furthest from the reference point
|
||||
// we could be a bit more efficient here but this is a super-rare case
|
||||
__device__ inline void newPoint(const PxVec3* vA, PxU32 nbA, const PxVec3* vB, PxU32 nbB, const PxVec3& dir, const PxVec3& ref, volatile PxVec3& vertex, volatile PxU16& indices)
|
||||
{
|
||||
PxU8 indexAp = supportIndex(vA, nbA, dir), indexBp = supportIndex(vB, nbB, -dir); // support point in +ve dir
|
||||
PxU8 indexAn = supportIndex(vA, nbA, -dir), indexBn = supportIndex(vB, nbB, dir); // support point in +ve dir
|
||||
PxVec3 vp = vA[indexAp] - vB[indexBp], vn = vA[indexAn] - vB[indexBn];
|
||||
|
||||
if(threadIdx.x == 0)
|
||||
{
|
||||
if(PxAbs((vp-ref).dot(dir)) > PxAbs((vn-ref).dot(dir)))
|
||||
{
|
||||
stS(vertex, vp);
|
||||
indices = indexBp<<8 | indexAp;
|
||||
}
|
||||
else
|
||||
{
|
||||
stS(vertex, vn);
|
||||
indices = indexBn<<8 | indexAn;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// find a new support point in either the positive or negative direction furthest from the reference point
|
||||
// we could be a bit more efficient here but this is a super-rare case
|
||||
|
||||
__device__ inline void epaStart123(volatile EpaScratch& S,
|
||||
const schlock::GjkCachedData* data,
|
||||
const PxVec3* vA, PxU32 nbA,
|
||||
const PxVec3* vB, PxU32 nbB,
|
||||
schlock::Vec3I8& a, schlock::Vec3I8& v,
|
||||
PxVec3& normal,
|
||||
PxReal& distance,
|
||||
PxU32 warpMask)
|
||||
{
|
||||
int tI = threadIdx.x;
|
||||
PxReal input = reinterpret_cast<const PxReal*>(data)[tI];
|
||||
if(tI<16)
|
||||
{
|
||||
if((tI&3)<3)
|
||||
(reinterpret_cast<volatile PxReal*>(S.vertices))[tI-tI/4] = input;
|
||||
else
|
||||
S.indices[tI>>2] = __float_as_int(input);
|
||||
}
|
||||
|
||||
__syncwarp(warpMask); //S.vertices is written above and read below
|
||||
|
||||
PxVec3 vertex0 = ldS(S.vertices[0]);
|
||||
|
||||
if(data->size<2)
|
||||
newPoint(vA, nbA, vB, nbB, PxVec3(1,0,0), ldS(S.vertices[0]), S.vertices[1], S.indices[1]);
|
||||
|
||||
__syncwarp(warpMask); //S.vertices is written (inside newPoint) above and read below
|
||||
PxVec3 vertex1 = ldS(S.vertices[1]);
|
||||
|
||||
if(data->size<3)
|
||||
{
|
||||
PxVec3 dir = vertex1 - vertex0, s = (PxAbs(dir.x) < PxAbs(dir.y) ? PxVec3(1,0,0) : PxVec3(0,1,0)).cross(dir);
|
||||
newPoint(vA, nbA, vB, nbB, s, vertex0, S.vertices[2], S.indices[2]);
|
||||
}
|
||||
|
||||
__syncwarp(warpMask); //S.vertices is written (inside newPoint) above and read below
|
||||
PxVec3 vertex2 = ldS(S.vertices[2]);
|
||||
|
||||
PxVec3 e01 = vertex1 - vertex0, e12 = vertex2 - vertex1, e02 = vertex2 - vertex0;
|
||||
|
||||
if (e01.magnitude()<=e02.magnitude() )
|
||||
{
|
||||
normal = e01.cross(e12).getNormalized();
|
||||
}
|
||||
else
|
||||
{
|
||||
normal = e02.cross(e12).getNormalized();
|
||||
}
|
||||
|
||||
distance = normal.dot(vertex0);
|
||||
|
||||
if(tI < 2)
|
||||
v = sStartVertices3[tI], a = sStartAdjacency3[tI];
|
||||
if(tI==1)
|
||||
normal = -normal, distance = -distance;
|
||||
}
|
||||
|
||||
|
||||
__device__ inline schlock::GjkResult::Enum epa(EpaScratch& epaS,
|
||||
const PxVec3* vA, PxU32 nbA,
|
||||
const PxVec3* vB, PxU32 nbB,
|
||||
const schlock::GjkCachedData* input,
|
||||
PxReal convergenceRatio,
|
||||
PxReal relEpsBase,
|
||||
schlock::GjkOutput& output)
|
||||
{
|
||||
PxReal releps = relEpsBase * (1-convergenceRatio);
|
||||
|
||||
schlock::Vec3I8 a, v; // per-triangle adjancencies and vertex indices
|
||||
PxVec3 normal; // per-triangle normal
|
||||
PxReal distance; // per-triangle distance
|
||||
schlock::Vec3I8 bestVerts;
|
||||
|
||||
PxReal lowerBound = -PX_MAX_F32, upperBound = PX_MAX_F32;
|
||||
int nbVertices, liveTriangles;
|
||||
|
||||
bool b = input->size == 4;
|
||||
PxU32 warpMask = __ballot_sync(FULL_MASK, !b);
|
||||
|
||||
if(b)
|
||||
{
|
||||
epaStart4(epaS, input, a, v, normal, distance);
|
||||
nbVertices = 4;
|
||||
//This is a tetrahedron so there are 4 triangles, hence liveTriangles = 0xf (1111)
|
||||
liveTriangles = 0xf;
|
||||
}
|
||||
else
|
||||
{
|
||||
epaStart123(epaS, input, vA, nbA, vB, nbB, a, v, normal, distance, warpMask);
|
||||
nbVertices = 3;
|
||||
//This is a double-sided triangle so there are 2 triangles, hence liveTriangles = 0x3 (11)
|
||||
liveTriangles = 0x3;
|
||||
}
|
||||
|
||||
__syncwarp();
|
||||
|
||||
// TODO: check here for vertex count is slighly conservative
|
||||
bool nondegenerate = true;
|
||||
while (nondegenerate && nbVertices < schlock::EPA_MAX_VERTS)
|
||||
{
|
||||
PxReal t = liveTriangles & 1 << threadIdx.x ? distance : FLT_MAX, e;
|
||||
//get the shortest distance triangle
|
||||
e = fminf(t, __shfl_xor_sync(FULL_MASK, t, 16));
|
||||
e = fminf(e, __shfl_xor_sync(FULL_MASK, e, 8));
|
||||
e = fminf(e, __shfl_xor_sync(FULL_MASK, e, 4));
|
||||
e = fminf(e, __shfl_xor_sync(FULL_MASK, e, 2));
|
||||
e = fminf(e, __shfl_xor_sync(FULL_MASK, e, 1));
|
||||
|
||||
//try to find out which triangle index has the shortest distance
|
||||
int f = __ballot_sync(FULL_MASK, e == t);
|
||||
assert(f); //Trigger assert if e == t failed on all lanes (indicates a nan)
|
||||
int triIndex = lowestSetIndex(f);
|
||||
|
||||
//get the normal from the shortest distance triangle
|
||||
PxVec3 d(__shfl_sync(FULL_MASK, normal.x, triIndex), __shfl_sync(FULL_MASK, normal.y, triIndex), __shfl_sync(FULL_MASK, normal.z, triIndex));
|
||||
//get the support vertex
|
||||
PxU8 indexA = squawk::supportIndex(vA, nbA, d), indexB = squawk::supportIndex(vB, nbB, -d);
|
||||
PxVec3 supportVertex = vA[indexA] - vB[indexB];
|
||||
|
||||
//calculate the distance from the origin to the support vertex
|
||||
PxReal thisFrameUb = supportVertex.dot(d);
|
||||
|
||||
//found the MTD, epa terminate
|
||||
if (abs(thisFrameUb - e) <= releps)
|
||||
{
|
||||
upperBound = thisFrameUb, lowerBound = e;
|
||||
bestVerts.setInt(__shfl_sync(FULL_MASK, v.getInt(), triIndex));
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
epaS.bestDir = d;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// all remaining tris are worse than the best candidate, epa terminate
|
||||
if (e > upperBound)
|
||||
break;
|
||||
|
||||
|
||||
PxReal ub = PxMin(upperBound, thisFrameUb);
|
||||
|
||||
if (ub < upperBound)
|
||||
{
|
||||
upperBound = ub, lowerBound = e;
|
||||
bestVerts.setInt(__shfl_sync(FULL_MASK, v.getInt(), triIndex));
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
epaS.bestDir = d;
|
||||
}
|
||||
|
||||
#if EPA_LOGGING
|
||||
if (threadIdx.x == 0)
|
||||
printf("[%f, %f], (%f, %f, %f)\n", lowerBound, upperBound, supportVertex.x, supportVertex.y, supportVertex.z);
|
||||
#endif
|
||||
|
||||
// save off the new vertex
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
epaS.vertices[nbVertices] = supportVertex;
|
||||
epaS.indices[nbVertices] = PxU16(indexB) << 8 | PxU16(indexA);
|
||||
}
|
||||
|
||||
__syncwarp();
|
||||
|
||||
nondegenerate = replaceTriangle(epaS, ++nbVertices, triIndex, e, normal, distance, v, a, liveTriangles, releps);
|
||||
}
|
||||
|
||||
#if EPA_LOGGING
|
||||
if(!nondegenerate)
|
||||
printf("degenerate\n");
|
||||
#endif
|
||||
|
||||
output.degenerate = !nondegenerate;
|
||||
|
||||
__syncwarp();
|
||||
|
||||
schlock::GjkResult::Enum result;
|
||||
PxVec3 bestDir = epaS.bestDir;
|
||||
PxVec3 closestPoint = bestDir * lowerBound;
|
||||
|
||||
if(threadIdx.x == 0)
|
||||
{
|
||||
output.direction = bestDir;
|
||||
output.lowerBound = lowerBound;
|
||||
output.upperBound = upperBound;
|
||||
|
||||
int v0 = bestVerts.get(0), v1 = bestVerts.get(1), v2 = bestVerts.get(2);
|
||||
PxReal u, v, w;
|
||||
schlock::barycentrics(epaS.vertices[v0], epaS.vertices[v1], epaS.vertices[v2], u, v, w, closestPoint);
|
||||
PxVec3 cpA = vA[schlock::aIndex(epaS.indices[v0])] * u + vA[schlock::aIndex(epaS.indices[v1])] * v + vA[schlock::aIndex(epaS.indices[v2])] * w;
|
||||
output.closestPointA = cpA;
|
||||
output.closestPointB = cpA - closestPoint;
|
||||
}
|
||||
result = lowerBound < 0 ? schlock::GjkResult::eCLOSE : schlock::GjkResult::eOVERLAP;
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
1462
engine/third_party/physx/source/gpunarrowphase/src/CUDA/femClothClothMidPhase.cu
vendored
Normal file
1462
engine/third_party/physx/source/gpunarrowphase/src/CUDA/femClothClothMidPhase.cu
vendored
Normal file
File diff suppressed because it is too large
Load Diff
397
engine/third_party/physx/source/gpunarrowphase/src/CUDA/femClothHFMidPhase.cu
vendored
Normal file
397
engine/third_party/physx/source/gpunarrowphase/src/CUDA/femClothHFMidPhase.cu
vendored
Normal file
@@ -0,0 +1,397 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "foundation/PxBounds3.h"
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxTransform.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
|
||||
#include "geometry/PxGeometry.h"
|
||||
#include "geometry/PxHeightFieldSample.h"
|
||||
|
||||
#include "PxgContactManager.h"
|
||||
#include "PxgConvexConvexShape.h"
|
||||
#include "PxgFEMCloth.h"
|
||||
#include "PxgNpKernelIndices.h"
|
||||
#include "PxgSoftBodyCoreKernelIndices.h"
|
||||
|
||||
#include "PxsTransformCache.h"
|
||||
|
||||
#include <vector_types.h>
|
||||
|
||||
#include "PxgCommonDefines.h"
|
||||
#include "dataReadWriteHelper.cuh"
|
||||
#include "heightfieldUtil.cuh"
|
||||
#include "utils.cuh"
|
||||
#include "reduction.cuh"
|
||||
#include "deformableCollision.cuh"
|
||||
|
||||
#include "assert.h"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
extern "C" __host__ void initNarrowphaseKernels20() {}
|
||||
|
||||
struct clothHeightfieldScratch
|
||||
{
|
||||
|
||||
/*const float4 * PX_RESTRICT trimeshVerts;
|
||||
const uint4 * PX_RESTRICT trimeshTriVertIndices;
|
||||
|
||||
PxU32 numTriangles;*/
|
||||
|
||||
PxTransform heightfieldTransform;
|
||||
PxgShape heightfieldShape;
|
||||
|
||||
}; PX_COMPILE_TIME_ASSERT(sizeof(clothHeightfieldScratch) <= 1024);
|
||||
|
||||
|
||||
|
||||
__device__ static inline void clothHeightfieldMidphaseCore(
|
||||
clothHeightfieldScratch* scratch,
|
||||
const PxU32 cmIdx,
|
||||
const PxBounds3 triangleBound,
|
||||
const PxU32 triangleIdx,
|
||||
const PxU32 workIndex,
|
||||
const PxU32 numTriangles,
|
||||
PxU8* PX_RESTRICT stackPtr, //output
|
||||
PxU32* PX_RESTRICT midphasePairsNum, //output
|
||||
const PxU32 stackSize
|
||||
)
|
||||
{
|
||||
PxU32 nbPairs = 0;
|
||||
PxBounds3 localBound;
|
||||
PxU32 nbRows = 0;
|
||||
PxU32 nbCols = 0;
|
||||
PxHeightFieldSample* samples = NULL;
|
||||
PxU32 minRow = 0;
|
||||
PxU32 maxRow = 0;
|
||||
PxU32 minColumn = 0;
|
||||
PxU32 maxColumn = 0;
|
||||
|
||||
if (triangleIdx < numTriangles)
|
||||
{
|
||||
PxTransform heightfieldTransform = scratch->heightfieldTransform;
|
||||
|
||||
PxgShape heightfieldShape = scratch->heightfieldShape;
|
||||
const PxReal oneOverHeightScale = 1.f / heightfieldShape.scale.scale.y;
|
||||
const PxReal oneOverRowScale = 1.f / PxAbs(heightfieldShape.scale.scale.x);
|
||||
const PxReal oneOverlColScale = 1.f / PxAbs(heightfieldShape.scale.scale.z);
|
||||
|
||||
//bound is in world space, we need to transform the bound to the local space of height field
|
||||
localBound = PxBounds3::transformFast(heightfieldTransform.getInverse(), triangleBound);
|
||||
|
||||
localBound.minimum.x *= oneOverRowScale;
|
||||
localBound.minimum.y *= oneOverHeightScale;
|
||||
localBound.minimum.z *= oneOverlColScale;
|
||||
|
||||
localBound.maximum.x *= oneOverRowScale;
|
||||
localBound.maximum.y *= oneOverHeightScale;
|
||||
localBound.maximum.z *= oneOverlColScale;
|
||||
|
||||
//row scale
|
||||
if (heightfieldShape.scale.scale.x < 0.f)
|
||||
{
|
||||
//swap min and max row scale
|
||||
const PxReal temp = localBound.minimum.x;
|
||||
localBound.minimum.x = localBound.maximum.x;
|
||||
localBound.maximum.x = temp;
|
||||
}
|
||||
|
||||
//col scale
|
||||
if (heightfieldShape.scale.scale.z < 0.f)
|
||||
{
|
||||
PxReal swap = localBound.minimum.z;
|
||||
localBound.minimum.z = localBound.maximum.z;
|
||||
localBound.maximum.z = swap;
|
||||
}
|
||||
|
||||
PxU32* heightfieldData = reinterpret_cast<PxU32*>(heightfieldShape.hullOrMeshPtr);
|
||||
nbRows = heightfieldData[0];
|
||||
nbCols = heightfieldData[1];
|
||||
samples = reinterpret_cast<PxHeightFieldSample*>(&heightfieldData[2]);
|
||||
|
||||
|
||||
if (!(localBound.minimum.x > nbRows - 1) || (localBound.minimum.z > nbCols - 1)
|
||||
|| (localBound.maximum.x < 0) || (localBound.maximum.z < 0))
|
||||
{
|
||||
minRow = getMinRow(localBound.minimum.x, nbRows);
|
||||
maxRow = getMaxRow(localBound.maximum.x, nbRows);
|
||||
minColumn = getMinColumn(localBound.minimum.z, nbCols);
|
||||
maxColumn = getMaxColumn(localBound.maximum.z, nbCols);
|
||||
|
||||
if ((2 * (maxColumn - minColumn) * (maxRow - minRow)) > 0)
|
||||
{
|
||||
nbPairs = heightfieldComputePairs(minColumn, maxColumn, minRow, maxRow, nbRows, nbCols,
|
||||
samples, localBound.minimum.y, localBound.maximum.y);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
PxU32 pairOffset = warpScan<AddOpPxU32, PxU32>(FULL_MASK, nbPairs) - nbPairs;
|
||||
|
||||
PxU32 validCount = 0;
|
||||
if (threadIdx.x == (WARP_SIZE - 1))
|
||||
{
|
||||
validCount = pairOffset + nbPairs;
|
||||
}
|
||||
validCount = __shfl_sync(FULL_MASK, validCount, WARP_SIZE - 1);
|
||||
|
||||
|
||||
PxU32 startIndex = 0xffffffff;
|
||||
if (threadIdx.x == 0 && validCount > 0)
|
||||
{
|
||||
startIndex = atomicAdd(midphasePairsNum, validCount);
|
||||
}
|
||||
|
||||
startIndex = __shfl_sync(FULL_MASK, startIndex, 0);
|
||||
|
||||
uint4* tStackPtr = reinterpret_cast<uint4*>(stackPtr);
|
||||
|
||||
if (nbPairs != 0)
|
||||
{
|
||||
heightfieldOutputPairs(minColumn, maxColumn, minRow, maxRow, nbRows, nbCols,
|
||||
samples, localBound.minimum.y, localBound.maximum.y, cmIdx, triangleIdx, startIndex + pairOffset, stackSize, tStackPtr);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void cloth_heightfieldMidphaseGeneratePairsLaunch(
|
||||
const PxU32 numNPWorkItems,
|
||||
const PxReal toleranceLength,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
|
||||
PxgFEMCloth* PX_RESTRICT clothes,
|
||||
|
||||
PxU8* stackPtr, //output
|
||||
PxU32* midphasePairsNum, //output
|
||||
const PxU32 stackSizeBytes
|
||||
)
|
||||
{
|
||||
|
||||
__shared__ PxU32 scratchMem[1024];
|
||||
|
||||
clothHeightfieldScratch* s_warpScratch = reinterpret_cast<clothHeightfieldScratch*>(scratchMem);
|
||||
|
||||
const PxU32 nbWarpsPerCm = 64; // 64 warp(2048 threads) to deal with one pair
|
||||
const PxU32 nbWarpsRequired = nbWarpsPerCm * numNPWorkItems;
|
||||
|
||||
const PxU32 nbThreadsPerCm = 64 * WARP_SIZE;
|
||||
|
||||
const PxU32 numWarpPerBlock = MIDPHASE_WARPS_PER_BLOCK;
|
||||
const PxU32 totalNumWarps = PxgSoftBodyKernelGridDim::SB_SBMIDPHASE * numWarpPerBlock;
|
||||
|
||||
const PxU32 warpIndex = threadIdx.y;
|
||||
|
||||
const PxU32 nbIterationsPerWarps = (nbWarpsRequired + totalNumWarps - 1) / totalNumWarps;
|
||||
|
||||
/*if (warpIndex == 0 && threadIdx.x == 0 && blockIdx.x == 0)
|
||||
{
|
||||
printf("nbIterationsPerWarps %i nbWarpsRequired %i\n", nbIterationsPerWarps, nbWarpsRequired);
|
||||
}*/
|
||||
|
||||
for (PxU32 i = 0; i < nbIterationsPerWarps; ++i)
|
||||
{
|
||||
const PxU32 workIndex = i + (warpIndex + numWarpPerBlock * blockIdx.x) * nbIterationsPerWarps;
|
||||
|
||||
if (workIndex < nbWarpsRequired)
|
||||
{
|
||||
|
||||
unsigned int cmIdx = workIndex / nbWarpsPerCm;
|
||||
|
||||
PxgShape clothShape, heightfieldShape;
|
||||
PxU32 clothCacheRef, heightfieldCacheRef;
|
||||
LoadShapePair<PxGeometryType::eTRIANGLEMESH, PxGeometryType::eHEIGHTFIELD>(cmInputs, cmIdx, gpuShapes,
|
||||
clothShape, clothCacheRef, heightfieldShape, heightfieldCacheRef);
|
||||
|
||||
PxsCachedTransform heightfieldTransformCache;
|
||||
PxsCachedTransform_ReadWarp(heightfieldTransformCache, transformCache + heightfieldCacheRef);
|
||||
|
||||
const PxgFEMCloth& cloth = clothes[clothShape.particleOrSoftbodyId];
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
s_warpScratch->heightfieldTransform = heightfieldTransformCache.transform;
|
||||
s_warpScratch->heightfieldShape = heightfieldShape;
|
||||
}
|
||||
|
||||
__syncwarp();
|
||||
|
||||
const PxReal contactDist = contactDistance[clothCacheRef] + contactDistance[heightfieldCacheRef];
|
||||
|
||||
const PxU32 nbTriangles = cloth.mNbTriangles;// s_warpScratch->numTriangles;
|
||||
|
||||
const PxU32 nbIter = (nbTriangles + nbThreadsPerCm - 1) / nbThreadsPerCm;
|
||||
|
||||
const uint4* triVertIndices = cloth.mTriangleVertexIndices;// s_warpScratch->trimeshTriVertIndices;
|
||||
|
||||
const float4* positions = cloth.mPosition_InvMass;
|
||||
|
||||
for (PxU32 j = 0; j < nbIter; ++j)
|
||||
{
|
||||
const PxU32 wrappedThreadIndex = (workIndex * WARP_SIZE + threadIdx.x) % nbThreadsPerCm;
|
||||
|
||||
const PxU32 triangleIdx = wrappedThreadIndex + nbThreadsPerCm * j;
|
||||
|
||||
PxBounds3 triangleBound = PxBounds3::empty();
|
||||
|
||||
if (triangleIdx < nbTriangles)
|
||||
{
|
||||
const uint4 vertIdx = triVertIndices[triangleIdx];
|
||||
const PxVec3 worldV0 = PxLoad3(positions[vertIdx.x]);
|
||||
const PxVec3 worldV1 = PxLoad3(positions[vertIdx.y]);
|
||||
const PxVec3 worldV2 = PxLoad3(positions[vertIdx.z]);
|
||||
|
||||
triangleBound = triBoundingBox(worldV0, worldV1, worldV2);
|
||||
|
||||
triangleBound.fattenFast(contactDist);
|
||||
}
|
||||
|
||||
clothHeightfieldMidphaseCore(
|
||||
s_warpScratch,
|
||||
cmIdx,
|
||||
triangleBound,
|
||||
triangleIdx,
|
||||
workIndex,
|
||||
nbTriangles,
|
||||
stackPtr,
|
||||
midphasePairsNum,
|
||||
stackSizeBytes/sizeof(uint4)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern "C" __global__ void cloth_midphaseVertexHeightfieldLaunch(
|
||||
const PxU32 numNPWorkItems,
|
||||
const PxReal toleranceLength,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
|
||||
PxgFEMCloth* PX_RESTRICT clothes,
|
||||
|
||||
PxU8* stackPtr, //output
|
||||
PxU32* midphasePairsNum, //output
|
||||
const PxU32 stackSizeBytes
|
||||
)
|
||||
{
|
||||
|
||||
__shared__ PxU32 scratchMem[1024];
|
||||
|
||||
clothHeightfieldScratch* s_warpScratch = reinterpret_cast<clothHeightfieldScratch*>(scratchMem);
|
||||
|
||||
const PxU32 nbWarpsPerCm = 64; // 64 warp(2048 threads) to deal with one pair
|
||||
const PxU32 nbWarpsRequired = nbWarpsPerCm * numNPWorkItems;
|
||||
|
||||
const PxU32 nbThreadsPerCm = 64 * WARP_SIZE;
|
||||
|
||||
const PxU32 numWarpPerBlock = MIDPHASE_WARPS_PER_BLOCK;
|
||||
const PxU32 totalNumWarps = PxgSoftBodyKernelGridDim::SB_SBMIDPHASE * numWarpPerBlock;
|
||||
|
||||
const PxU32 warpIndex = threadIdx.y;
|
||||
|
||||
const PxU32 nbIterationsPerWarps = (nbWarpsRequired + totalNumWarps - 1) / totalNumWarps;
|
||||
|
||||
for (PxU32 i = 0; i < nbIterationsPerWarps; ++i)
|
||||
{
|
||||
const PxU32 workIndex = i + (warpIndex + numWarpPerBlock * blockIdx.x) * nbIterationsPerWarps;
|
||||
|
||||
if (workIndex < nbWarpsRequired)
|
||||
{
|
||||
|
||||
unsigned int cmIdx = workIndex / nbWarpsPerCm;
|
||||
|
||||
PxgShape clothShape, heightfieldShape;
|
||||
PxU32 clothCacheRef, heightfieldCacheRef;
|
||||
LoadShapePairWarp<PxGeometryType::eTRIANGLEMESH, PxGeometryType::eHEIGHTFIELD>(cmInputs, cmIdx, gpuShapes,
|
||||
clothShape, clothCacheRef, heightfieldShape, heightfieldCacheRef);
|
||||
|
||||
PxsCachedTransform heightfieldTransformCache;
|
||||
PxsCachedTransform_ReadWarp(heightfieldTransformCache, transformCache + heightfieldCacheRef);
|
||||
|
||||
const PxgFEMCloth& cloth = clothes[clothShape.particleOrSoftbodyId];
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
s_warpScratch->heightfieldTransform = heightfieldTransformCache.transform;
|
||||
s_warpScratch->heightfieldShape = heightfieldShape;
|
||||
}
|
||||
|
||||
__syncwarp();
|
||||
|
||||
const PxReal contactDist = contactDistance[clothCacheRef] + contactDistance[heightfieldCacheRef];
|
||||
|
||||
const float4* positions = cloth.mPosition_InvMass;
|
||||
|
||||
const PxU32 nbVerts = cloth.mNbVerts;
|
||||
|
||||
const PxU32 nbIter = (nbVerts + nbThreadsPerCm - 1) / nbThreadsPerCm;
|
||||
|
||||
//const uint4* triVertIndices = s_warpScratch->trimeshTriVertIndices;
|
||||
|
||||
const PxVec3 radiusV(contactDist);
|
||||
|
||||
for (PxU32 j = 0; j < nbIter; ++j)
|
||||
{
|
||||
const PxU32 wrappedThreadIndex = (workIndex * WARP_SIZE + threadIdx.x) % nbThreadsPerCm;
|
||||
|
||||
const PxU32 vertIndex = wrappedThreadIndex + nbThreadsPerCm * j;
|
||||
|
||||
const float4 pos = positions[vertIndex];
|
||||
|
||||
//particle pos need to be in triangle mesh vertex space
|
||||
const PxVec3 particlePos(pos.x, pos.y, pos.z);
|
||||
|
||||
PxBounds3 triangleBound = PxBounds3::empty();
|
||||
|
||||
triangleBound.minimum = particlePos - radiusV;
|
||||
triangleBound.maximum = particlePos + radiusV;
|
||||
|
||||
clothHeightfieldMidphaseCore(
|
||||
s_warpScratch,
|
||||
cmIdx,
|
||||
triangleBound,
|
||||
vertIndex,
|
||||
workIndex,
|
||||
nbVerts,
|
||||
stackPtr,
|
||||
midphasePairsNum,
|
||||
stackSizeBytes/sizeof(uint4)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
620
engine/third_party/physx/source/gpunarrowphase/src/CUDA/femClothMidPhase.cu
vendored
Normal file
620
engine/third_party/physx/source/gpunarrowphase/src/CUDA/femClothMidPhase.cu
vendored
Normal file
@@ -0,0 +1,620 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "foundation/PxBounds3.h"
|
||||
#include "foundation/PxMath.h"
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxTransform.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
|
||||
#include "geometry/PxGeometry.h"
|
||||
#include "geometry/PxMeshScale.h"
|
||||
|
||||
#include "GuBV32.h"
|
||||
#include "cudaNpCommon.h"
|
||||
|
||||
#include "PxgContactManager.h"
|
||||
#include "PxgConvexConvexShape.h"
|
||||
#include "PxgFEMCloth.h"
|
||||
#include "PxgNpKernelIndices.h"
|
||||
#include "PxgParticleSystem.h"
|
||||
|
||||
#include "PxsTransformCache.h"
|
||||
|
||||
#include <vector_types.h>
|
||||
|
||||
#include "copy.cuh"
|
||||
#include "dataReadWriteHelper.cuh"
|
||||
#include "femMidphaseScratch.cuh"
|
||||
#include "utils.cuh"
|
||||
|
||||
#include "bv32Traversal.cuh"
|
||||
#include "triangleMesh.cuh"
|
||||
#include "deformableCollision.cuh"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
extern "C" __host__ void initNarrowphaseKernels18() {}
|
||||
|
||||
struct ClothTreeTraverser
|
||||
{
|
||||
femMidphaseScratch* s_warpScratch;
|
||||
uint4* PX_RESTRICT stackPtr;
|
||||
PxU32* PX_RESTRICT midphasePairsNum;
|
||||
unsigned int cmIdx;
|
||||
const PxU32 stackSize;
|
||||
const PxBounds3& mBox;
|
||||
|
||||
PX_FORCE_INLINE __device__ ClothTreeTraverser(
|
||||
femMidphaseScratch* s_warpScratch,
|
||||
PxBounds3& aabb,
|
||||
uint4* PX_RESTRICT stackPtr,
|
||||
PxU32* PX_RESTRICT midphasePairsNum,
|
||||
unsigned int cmIdx,
|
||||
const PxU32 stackSize)
|
||||
: s_warpScratch(s_warpScratch), mBox(aabb), stackPtr(stackPtr), midphasePairsNum(midphasePairsNum),
|
||||
cmIdx(cmIdx), stackSize(stackSize)
|
||||
{ }
|
||||
|
||||
PX_FORCE_INLINE __device__ void intersectPrimitiveFullWarp(PxU32 primitiveIndex, PxU32 idxInWarp) const
|
||||
{
|
||||
bool intersect = false;
|
||||
if (primitiveIndex != 0xFFFFFFFF)
|
||||
{
|
||||
uint4 triIdx = s_warpScratch->meshVertsIndices[primitiveIndex];
|
||||
|
||||
const PxVec3 worldV0 = PxLoad3(s_warpScratch->meshVerts[triIdx.x]);
|
||||
const PxVec3 worldV1 = PxLoad3(s_warpScratch->meshVerts[triIdx.y]);
|
||||
const PxVec3 worldV2 = PxLoad3(s_warpScratch->meshVerts[triIdx.z]);
|
||||
|
||||
intersect = triBoundingBox(worldV0, worldV1, worldV2).intersects(mBox);
|
||||
}
|
||||
|
||||
pushOntoStackFullWarp(intersect, midphasePairsNum, stackPtr, stackSize, make_uint4(cmIdx, primitiveIndex, 0, 0));
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ bool intersectBoxFullWarp(bool hasBox, const PxVec3& min, const PxVec3& max) const
|
||||
{
|
||||
if (hasBox)
|
||||
return mBox.intersects(PxBounds3(min, max));
|
||||
else
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template<unsigned int WarpsPerBlock>
|
||||
__device__ static inline void femClothMidphaseCore(
|
||||
PxU32 numNPWorkItems,
|
||||
const PxReal toleranceLength,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxBounds3* PX_RESTRICT bounds,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
const PxgFEMCloth* PX_RESTRICT femClothes,
|
||||
|
||||
PxU8* PX_RESTRICT stackPtr, //output
|
||||
PxU32* PX_RESTRICT midphasePairsNum, //output
|
||||
|
||||
femMidphaseScratch* s_warpScratch,
|
||||
const PxU32 stackSize
|
||||
)
|
||||
{
|
||||
|
||||
//wrap index
|
||||
const unsigned int warpIdx = threadIdx.y;//(idx >> LOG2_WARP_SIZE);
|
||||
//wrap index in block
|
||||
//const unsigned int idx = idxInWarp + warpIdx * WARP_SIZE;
|
||||
|
||||
unsigned int cmIdx = warpIdx + blockIdx.x * blockDim.y;
|
||||
|
||||
//unsigned mask_cmIdx = __ballot_sync(FULL_MASK, cmIdx < numNPWorkItems);
|
||||
if (cmIdx < numNPWorkItems)
|
||||
{
|
||||
/* if(threadIdx.x == 0)
|
||||
printf("cmIdx %i warpIdx %i\n", cmIdx, warpIdx);*/
|
||||
|
||||
PxgShape clothShape, rigidShape;
|
||||
PxU32 clothCacheRef, rigidCacheRef;
|
||||
LoadShapePairWarp<PxGeometryType::eTRIANGLEMESH>(cmInputs, cmIdx, gpuShapes,
|
||||
clothShape, clothCacheRef, rigidShape, rigidCacheRef);
|
||||
|
||||
const PxU32 femClothId = clothShape.particleOrSoftbodyId;
|
||||
|
||||
const PxgFEMCloth& femCloth = femClothes[femClothId];
|
||||
|
||||
PxU8 * trimeshGeomPtr = reinterpret_cast<PxU8 *>(femCloth.mTriMeshData);
|
||||
|
||||
trimeshGeomPtr += sizeof(uint4); // skip nbVerts_nbTets_maxDepth_nbBv32TreeNodes
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
s_warpScratch->meshVerts = femCloth.mPosition_InvMass;
|
||||
s_warpScratch->meshVertsIndices = femCloth.mTriangleVertexIndices;
|
||||
|
||||
Gu::BV32DataPacked* bv32PackedNodes = reinterpret_cast<Gu::BV32DataPacked*>(trimeshGeomPtr);
|
||||
s_warpScratch->bv32PackedNodes = bv32PackedNodes;
|
||||
}
|
||||
|
||||
__syncwarp();
|
||||
|
||||
|
||||
//ML: this is the world bound AABB
|
||||
PxBounds3 aabb;
|
||||
PxBounds3_ReadWarp(aabb, bounds + rigidCacheRef);
|
||||
const PxReal contactDist = contactDistance[clothCacheRef] + contactDistance[rigidCacheRef];
|
||||
aabb.fattenFast(contactDist);
|
||||
|
||||
/*if (threadIdx.x == 0)
|
||||
{
|
||||
printf("contactDist %f transformCacheRef0 %i transformCacheRef1 %i\n", contactDist, transformCacheRef0, transformCacheRef1);
|
||||
printf("min(%f, %f, %f) max(%f, %f, %f)\n", aabb.minimum.x, aabb.minimum.y, aabb.minimum.z,
|
||||
aabb.maximum.x, aabb.maximum.y, aabb.maximum.z);
|
||||
}*/
|
||||
|
||||
uint4* tStackPtr = reinterpret_cast<uint4*>(stackPtr);
|
||||
bv32TreeTraversal<const ClothTreeTraverser, WarpsPerBlock>(s_warpScratch->bv32PackedNodes, s_warpScratch->sBv32Nodes, ClothTreeTraverser(
|
||||
s_warpScratch, aabb, tStackPtr, midphasePairsNum, cmIdx, stackSize));
|
||||
|
||||
/*if (threadIdx.x == 0 && nbPairsPerCM > 0)
|
||||
printf("nbPairsPerCM %i\n", nbPairsPerCM);*/
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__
|
||||
//__launch_bounds__(MIDPHASE_WARPS_PER_BLOCK * WARP_SIZE, 4)
|
||||
void cloth_midphaseGeneratePairsLaunch(
|
||||
PxU32 numWorkItems,
|
||||
const PxReal tolerenceLength,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxBounds3* PX_RESTRICT bounds,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
const PxgFEMCloth* PX_RESTRICT femClothes,
|
||||
|
||||
PxU8* PX_RESTRICT stackPtr, //output
|
||||
PxU32* PX_RESTRICT midphasePairsNum, //output
|
||||
const PxU32 stackSizeBytes
|
||||
)
|
||||
{
|
||||
|
||||
__shared__ PxU32 scratchMem[MIDPHASE_WARPS_PER_BLOCK][WARP_SIZE * 7];
|
||||
|
||||
femClothMidphaseCore<MIDPHASE_WARPS_PER_BLOCK>(
|
||||
numWorkItems,
|
||||
tolerenceLength,
|
||||
cmInputs,
|
||||
transformCache,
|
||||
bounds,
|
||||
contactDistance,
|
||||
gpuShapes,
|
||||
femClothes,
|
||||
stackPtr,
|
||||
midphasePairsNum,
|
||||
(femMidphaseScratch*)scratchMem[threadIdx.y],
|
||||
stackSizeBytes/sizeof(uint4)
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
extern "C" __global__
|
||||
//__launch_bounds__(MIDPHASE_WARPS_PER_BLOCK * WARP_SIZE, 4)
|
||||
void cloth_midphaseGenerateVertexPairsLaunch(
|
||||
PxU32 numWorkItems,
|
||||
const PxReal tolerenceLength,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxBounds3* PX_RESTRICT bounds,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
const PxgFEMCloth* PX_RESTRICT femClothes,
|
||||
|
||||
uint4* PX_RESTRICT stackPtr, //output
|
||||
PxU32* PX_RESTRICT midphasePairsNum //output
|
||||
)
|
||||
{
|
||||
|
||||
const unsigned int cmIdx = blockIdx.y;
|
||||
|
||||
//unsigned mask_cmIdx = __ballot_sync(FULL_MASK, cmIdx < numNPWorkItems);
|
||||
if (cmIdx < numWorkItems)
|
||||
{
|
||||
PxgShape clothShape, rigidShape;
|
||||
PxU32 clothCacheRef, rigidCacheRef;
|
||||
LoadShapePairWarp<PxGeometryType::eTRIANGLEMESH>(cmInputs, cmIdx, gpuShapes,
|
||||
clothShape, clothCacheRef, rigidShape, rigidCacheRef);
|
||||
|
||||
const PxU32 femClothId = clothShape.particleOrSoftbodyId;
|
||||
|
||||
const PxgFEMCloth& femCloth = femClothes[femClothId];
|
||||
|
||||
PxU8 * trimeshGeomPtr = reinterpret_cast<PxU8 *>(femCloth.mTriMeshData);
|
||||
|
||||
trimeshGeomPtr += sizeof(uint4);
|
||||
|
||||
const float4* const PX_RESTRICT meshVerts = femCloth.mPosition_InvMass;
|
||||
const PxU32 nbVerts = femCloth.mNbVerts;
|
||||
|
||||
//ML: this is the world bound AABB
|
||||
PxBounds3 aabb;
|
||||
PxBounds3_ReadWarp(aabb, bounds + rigidCacheRef);
|
||||
const PxReal contactDist = contactDistance[clothCacheRef] + contactDistance[rigidCacheRef];
|
||||
//aabb.fattenFast(contactDist);
|
||||
|
||||
const PxReal tolerance = contactDist * contactDist;
|
||||
|
||||
const PxU32 idx = threadIdx.x + threadIdx.y*WARP_SIZE + blockIdx.x*blockDim.x*blockDim.y;
|
||||
|
||||
for (PxU32 i = 0; i < nbVerts; i += blockDim.x*blockDim.y*gridDim.x)
|
||||
{
|
||||
PxU32 id = idx + i;
|
||||
|
||||
bool hasHit = false;
|
||||
if (id < nbVerts)
|
||||
{
|
||||
float4 vertex = meshVerts[id];
|
||||
PxVec3 v(vertex.x, vertex.y, vertex.z);
|
||||
|
||||
PxVec3 closest = aabb.closestPoint(v);
|
||||
|
||||
hasHit = (closest - v).magnitudeSquared() <= tolerance;
|
||||
}
|
||||
|
||||
PxU32 hitMask = __ballot_sync(FULL_MASK, hasHit);
|
||||
|
||||
if (hitMask)
|
||||
{
|
||||
PxU32 prevStackShift = 0xFFffFFff;
|
||||
PxU32 validCount = __popc(hitMask);
|
||||
if (threadIdx.x == 0 && validCount > 0)
|
||||
{
|
||||
prevStackShift = atomicAdd(midphasePairsNum, validCount);
|
||||
|
||||
}
|
||||
|
||||
prevStackShift = __shfl_sync(FULL_MASK, prevStackShift, 0);
|
||||
|
||||
if (hasHit)
|
||||
{
|
||||
PxU32 offset = warpScanExclusive(hitMask, threadIdx.x);
|
||||
stackPtr[prevStackShift + offset] = make_uint4(cmIdx, id, 0, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct ClothParticleTreeTraverser
|
||||
{
|
||||
femMidphaseScratch* PX_RESTRICT s_warpScratch;
|
||||
const PxVec3 sphereCenter;
|
||||
const PxReal radius;
|
||||
const PxBounds3 mBox;
|
||||
const PxU32 globalParticleIndex;
|
||||
const PxU32 numParticles;
|
||||
const PxU32 cmIdx;
|
||||
const PxU32 stackSize;
|
||||
uint4* PX_RESTRICT stackPtr; //output
|
||||
PxU32* midphasePairs; //output
|
||||
const bool isMesh;
|
||||
|
||||
|
||||
PX_FORCE_INLINE __device__ ClothParticleTreeTraverser(
|
||||
femMidphaseScratch* PX_RESTRICT s_warpScratch,
|
||||
const PxVec3 sphereCenter,
|
||||
const PxReal radius,
|
||||
const PxU32 globalParticleIndex,
|
||||
const PxU32 numParticles,
|
||||
const PxU32 cmIdx,
|
||||
const PxU32 stackSize,
|
||||
uint4* PX_RESTRICT stackPtr, //output
|
||||
PxU32* midphasePairs, //output
|
||||
const bool isMesh)
|
||||
: s_warpScratch(s_warpScratch), sphereCenter(sphereCenter), radius(radius), globalParticleIndex(globalParticleIndex), numParticles(numParticles),
|
||||
cmIdx(cmIdx), stackSize(stackSize), stackPtr(stackPtr), midphasePairs(midphasePairs), isMesh(isMesh), mBox(sphereCenter - PxVec3(radius), sphereCenter + PxVec3(radius))
|
||||
{ }
|
||||
|
||||
PX_FORCE_INLINE __device__ void intersectPrimitiveFullWarp(PxU32 primitiveIndex, PxU32 idxInWarp) const
|
||||
{
|
||||
bool intersect = false;
|
||||
if (primitiveIndex != 0xFFFFFFFF)
|
||||
{
|
||||
/*if (myFistSBIndex == 933 && tetrahedronIdxSecondSb == 578)
|
||||
{
|
||||
const PxU32 parentNodeIndex = currentNodeIndex >> 6;
|
||||
printf("workIndex % i parentNodeIndex %i blockIdx.x %i tetrahedronIdxSecondSb %i\n", workIndex, parentNodeIndex, blockIdx.x, tetrahedronIdxSecondSb);
|
||||
}*/
|
||||
|
||||
uint4 triIdx = s_warpScratch->meshVertsIndices[primitiveIndex];
|
||||
|
||||
const PxVec3 worldV0 = PxLoad3(s_warpScratch->meshVerts[triIdx.x]);
|
||||
const PxVec3 worldV1 = PxLoad3(s_warpScratch->meshVerts[triIdx.y]);
|
||||
const PxVec3 worldV2 = PxLoad3(s_warpScratch->meshVerts[triIdx.z]);
|
||||
|
||||
/*if ((triangleIdx == 6 || triangleIdx == 7))
|
||||
{
|
||||
printf("triangleIdx %i worldV0(%f, %f, %f), worldV1(%f, %f, %f), worldV2(%f, %f, %f)\n", triangleIdx, worldV0.x, worldV0.y, worldV0.z,
|
||||
worldV1.x, worldV1.y, worldV1.z, worldV2.x, worldV2.y, worldV2.z);
|
||||
}*/
|
||||
|
||||
if (isMesh)
|
||||
{
|
||||
//PxPlane triPlane(worldV0, worldV1, worldV2);
|
||||
//get rid of double-sided triangle
|
||||
//if (triPlane.distance(sphereCenter) >= 0)
|
||||
{
|
||||
intersect = triBoundingBox(worldV0, worldV1, worldV2).intersects(mBox);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
intersect = triBoundingBox(worldV0, worldV1, worldV2).intersects(mBox);
|
||||
}
|
||||
}
|
||||
|
||||
pushOntoStackFullWarp(intersect, midphasePairs, stackPtr, stackSize, make_uint4(cmIdx, globalParticleIndex, primitiveIndex, 0));
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ bool intersectBoxFullWarp(bool hasBox, const PxVec3& min, const PxVec3& max) const
|
||||
{
|
||||
if (hasBox)
|
||||
return mBox.intersects(PxBounds3(min, max));
|
||||
else
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
extern "C" __global__
|
||||
//__launch_bounds__(MIDPHASE_WARPS_PER_BLOCK * WARP_SIZE, 4)
|
||||
void cloth_psMidphaseGeneratePairsLaunch(
|
||||
PxU32 numWorkItems,
|
||||
const PxReal tolerenceLength,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxBounds3* PX_RESTRICT bounds,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxgFEMCloth* PX_RESTRICT clothes,
|
||||
PxgParticleSystem* PX_RESTRICT particleSystems,
|
||||
|
||||
const PxU32 stackSizeBytes,
|
||||
PxU8* PX_RESTRICT stackPtr, //output
|
||||
PxU32* PX_RESTRICT midphasePairsNum //output
|
||||
)
|
||||
{
|
||||
__shared__ __align__(16) PxU8 particleSystemMemory[sizeof(PxgParticleSystem)];
|
||||
PxgParticleSystem& sParticleSystem = *(reinterpret_cast<PxgParticleSystem*>(particleSystemMemory));
|
||||
__shared__ femMidphaseScratch scratchMem[MIDPHASE_WARPS_PER_BLOCK];
|
||||
|
||||
femMidphaseScratch* s_warpScratch = &scratchMem[threadIdx.y];
|
||||
|
||||
if (blockIdx.y < numWorkItems)
|
||||
{
|
||||
//const PxU32 globalThreadIdx = threadIdx.y * WARP_SIZE + threadIdx.x + blockIdx.x*blockDim.x*blockDim.y;
|
||||
const PxU32 globalWarpIdx = threadIdx.y + blockIdx.x*blockDim.y;
|
||||
|
||||
PxgShape particleShape, clothShape;
|
||||
PxU32 particleCacheRef, clothCacheRef;
|
||||
LoadShapePairWarp<PxGeometryType::ePARTICLESYSTEM, PxGeometryType::eTRIANGLEMESH>(cmInputs, blockIdx.y, gpuShapes,
|
||||
particleShape, particleCacheRef, clothShape, clothCacheRef);
|
||||
|
||||
const PxU32 particleSystemId = particleShape.particleOrSoftbodyId;
|
||||
|
||||
//each thread read 16 byte
|
||||
uint4* srcParticleSystem = reinterpret_cast<uint4*>(&particleSystems[particleSystemId]);
|
||||
uint4* dstParticleSystem = reinterpret_cast<uint4*>(&sParticleSystem);
|
||||
warpCopy<uint4>(dstParticleSystem, srcParticleSystem, sizeof(PxgParticleSystem));
|
||||
|
||||
|
||||
const PxU32 clohtId = clothShape.particleOrSoftbodyId;
|
||||
const PxgFEMCloth& cloth = clothes[clohtId];
|
||||
|
||||
PxU8 * tetmeshGeomPtr = reinterpret_cast<PxU8 *>(cloth.mTriMeshData);
|
||||
|
||||
//const uint4 nbVerts_nbTets_maxDepth_nbBv32TreeNodes = *reinterpret_cast<const uint4 *>(tetmeshGeomPtr);
|
||||
tetmeshGeomPtr += sizeof(uint4);
|
||||
|
||||
/*if (workIndex == 0 && threadIdx.x == 0)
|
||||
printf("particleSystemId %i softbodyId %i\n", particleSystemId, softbodyId);*/
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
s_warpScratch->meshVerts = cloth.mPosition_InvMass;
|
||||
s_warpScratch->meshVertsIndices = cloth.mTriangleVertexIndices;
|
||||
|
||||
|
||||
//const PxU32 & numVerts = nbVerts_nbTets_maxDepth_nbBv32TreeNodes.x;
|
||||
//const PxU32 & numTets = nbVerts_nbTets_maxDepth_nbBv32TreeNodes.y;
|
||||
//const PxU32 & maxDepth = nbVerts_nbTets_maxDepth_nbBv32TreeNodes.z;
|
||||
//const PxU32 & nbBv32PackedNodes = nbVerts_nbTets_maxDepth_nbBv32TreeNodes.w;
|
||||
|
||||
//printf("maxDepth %i numVerts %i numTets %i nbBv32TreeNodes %i\n", maxDepth, numVerts, numTets, nbBv32TreeNodes);
|
||||
|
||||
Gu::BV32DataPacked* bv32PackedNodes = reinterpret_cast<Gu::BV32DataPacked*>(tetmeshGeomPtr);
|
||||
s_warpScratch->bv32PackedNodes = bv32PackedNodes;
|
||||
|
||||
/*tetmeshGeomPtr += sizeof(const Gu::BV32DataPacked)* nbBv32PackedNodes
|
||||
+ sizeof(const Gu::BV32DataDepthInfo) * nbVerts_nbTets_maxDepth_nbBv32TreeNodes.z
|
||||
+ sizeof(PxU32) * nbVerts_nbTets_maxDepth_nbBv32TreeNodes.w;*/
|
||||
|
||||
/*const PxU8* surfaceHint = reinterpret_cast<const PxU8*>(tetmeshGeomPtr);
|
||||
s_warpScratch->tetmeshSurfaceHint = surfaceHint;*/
|
||||
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
PxReal cDistance = contactDistance[particleCacheRef] + contactDistance[clothCacheRef];
|
||||
|
||||
//const PxU32 threadBaseAddress = globalThreadIdx & (~31);
|
||||
|
||||
const PxU32 nbParticles = sParticleSystem.mCommonData.mNumParticles;
|
||||
|
||||
//const PxU32 NbThreads = blockDim.x*blockDim.y*gridDim.x;
|
||||
const PxU32 NbWarps = blockDim.y*gridDim.x;
|
||||
|
||||
float4* sortedPose = reinterpret_cast<float4*>(sParticleSystem.mSortedPositions_InvMass);
|
||||
|
||||
uint4* tStackPtr = reinterpret_cast<uint4*>(stackPtr);
|
||||
const PxU32 stackSize = stackSizeBytes / sizeof(uint4);
|
||||
|
||||
//for (PxU32 i = threadBaseAddress, particleIndex = globalThreadIdx; i < nbParticles; i+= NbThreads, particleIndex += NbThreads)
|
||||
for (PxU32 i = globalWarpIdx; i < nbParticles; i += NbWarps)
|
||||
{
|
||||
float4 pos = make_float4(0.f);
|
||||
//PxU32 particleIndex = 0xFFFFFFFF;
|
||||
|
||||
//if(threadIdx.x == 0)
|
||||
PxU32 particleIndex = i;
|
||||
pos = sortedPose[particleIndex];
|
||||
|
||||
|
||||
/*if (particleIndex < nbParticles)
|
||||
{
|
||||
pos = sortedPose[particleIndex];
|
||||
}*/
|
||||
|
||||
const PxVec3 particlePos(pos.x, pos.y, pos.z);
|
||||
|
||||
bv32TreeTraversal<const ClothParticleTreeTraverser, MIDPHASE_WARPS_PER_BLOCK>(s_warpScratch->bv32PackedNodes, s_warpScratch->sBv32Nodes, ClothParticleTreeTraverser(s_warpScratch, particlePos, cDistance, particleIndex,
|
||||
nbParticles, blockIdx.y, stackSize, tStackPtr, midphasePairsNum, false));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
extern "C" __global__
|
||||
//__launch_bounds__(MIDPHASE_WARPS_PER_BLOCK * WARP_SIZE, 4)
|
||||
void cloth_midphaseVertexMeshLaunch(
|
||||
PxU32 numWorkItems,
|
||||
const PxReal tolerenceLength,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxBounds3* PX_RESTRICT bounds,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
const PxgFEMCloth* PX_RESTRICT femClothes,
|
||||
|
||||
const PxU32 stackSizeBytes,
|
||||
PxU8* PX_RESTRICT stackPtr, //output
|
||||
PxU32* PX_RESTRICT midphasePairsNum //output
|
||||
)
|
||||
{
|
||||
|
||||
__shared__ femMidphaseScratch scratchMem[MIDPHASE_WARPS_PER_BLOCK];
|
||||
|
||||
femMidphaseScratch* s_warpScratch = &scratchMem[threadIdx.y];
|
||||
|
||||
const PxU32 cmIdx = blockIdx.y;
|
||||
|
||||
//if (cmIdx < numWorkItems)
|
||||
{
|
||||
//const PxU32 globalThreadIdx = threadIdx.y * WARP_SIZE + threadIdx.x + blockIdx.x*blockDim.x*blockDim.y;
|
||||
const PxU32 globalWarpIdx = threadIdx.y + blockIdx.x*blockDim.y;
|
||||
|
||||
PxgShape shape0, shape1;
|
||||
PxgShape* trimeshShape = NULL;
|
||||
PxgShape* clothShape = NULL;
|
||||
PxU32 trimeshCacheRef, clothCacheRef;
|
||||
{
|
||||
PxgContactManagerInput npWorkItem;
|
||||
PxgContactManagerInput_ReadWarp(npWorkItem, cmInputs, cmIdx);
|
||||
PxgShape_ReadWarp(shape0, gpuShapes + npWorkItem.shapeRef0);
|
||||
PxgShape_ReadWarp(shape1, gpuShapes + npWorkItem.shapeRef1);
|
||||
assert(shape0.type == PxGeometryType::eTRIANGLEMESH && shape1.type == PxGeometryType::eTRIANGLEMESH);
|
||||
|
||||
if (shape0.particleOrSoftbodyId != 0xffffffff)
|
||||
{
|
||||
trimeshShape = &shape1;
|
||||
trimeshCacheRef = npWorkItem.transformCacheRef1;
|
||||
clothShape = &shape0;
|
||||
clothCacheRef = npWorkItem.transformCacheRef0;
|
||||
}
|
||||
else
|
||||
{
|
||||
trimeshShape = &shape0;
|
||||
trimeshCacheRef = npWorkItem.transformCacheRef0;
|
||||
clothShape = &shape1;
|
||||
clothCacheRef = npWorkItem.transformCacheRef1;
|
||||
}
|
||||
assert(trimeshShape->particleOrSoftbodyId == 0xffffffff);
|
||||
}
|
||||
|
||||
PxU32 clothId = clothShape->particleOrSoftbodyId;
|
||||
const PxgFEMCloth* cloth = &femClothes[clothId];
|
||||
|
||||
PxsCachedTransform trimeshTransformCache;
|
||||
PxsCachedTransform_ReadWarp(trimeshTransformCache, transformCache + trimeshCacheRef);
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
const PxU8* triGeomPtr = reinterpret_cast<const PxU8 *>(trimeshShape->hullOrMeshPtr);
|
||||
readTriangleMesh(triGeomPtr, s_warpScratch->bv32PackedNodes, s_warpScratch->meshVerts, s_warpScratch->meshVertsIndices);
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
PxTransform& trimeshToWorld = trimeshTransformCache.transform;
|
||||
const PxMeshScale& trimeshScale = trimeshShape->scale;
|
||||
|
||||
PxReal cDistance = contactDistance[clothCacheRef] + contactDistance[trimeshCacheRef];
|
||||
|
||||
//const PxU32 threadBaseAddress = globalThreadIdx & (~31);
|
||||
|
||||
const PxU32 numVerts = cloth->mNbVerts;
|
||||
|
||||
const float4* positions = cloth->mPosition_InvMass;
|
||||
|
||||
//const PxU32 NbThreads = blockDim.x*blockDim.y*gridDim.x;
|
||||
const PxU32 NbWarps = blockDim.y*gridDim.x;
|
||||
|
||||
uint4* tStackPtr = reinterpret_cast<uint4*>(stackPtr);
|
||||
const PxU32 stackSize = stackSizeBytes / sizeof(uint4);
|
||||
|
||||
for (PxU32 particleIndex = globalWarpIdx; particleIndex < numVerts; particleIndex += NbWarps)
|
||||
{
|
||||
|
||||
const float4 pos = positions[particleIndex];
|
||||
|
||||
//particle pos need to be in triangle mesh vertex space
|
||||
const PxVec3 particlePos(pos.x, pos.y, pos.z);
|
||||
|
||||
/// transform those point to triangle vertex space
|
||||
const PxVec3 localParticlePos = shape2Vertex(trimeshToWorld.transformInv(particlePos), trimeshScale.scale, trimeshScale.rotation);
|
||||
|
||||
bv32TreeTraversal<const ClothParticleTreeTraverser, MIDPHASE_WARPS_PER_BLOCK>(s_warpScratch->bv32PackedNodes, s_warpScratch->sBv32Nodes, ClothParticleTreeTraverser(s_warpScratch, localParticlePos, cDistance, particleIndex,
|
||||
numVerts, cmIdx, stackSize, tStackPtr, midphasePairsNum, true));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
3900
engine/third_party/physx/source/gpunarrowphase/src/CUDA/femClothPrimitives.cu
vendored
Normal file
3900
engine/third_party/physx/source/gpunarrowphase/src/CUDA/femClothPrimitives.cu
vendored
Normal file
File diff suppressed because it is too large
Load Diff
422
engine/third_party/physx/source/gpunarrowphase/src/CUDA/gjk.cuh
vendored
Normal file
422
engine/third_party/physx/source/gpunarrowphase/src/CUDA/gjk.cuh
vendored
Normal file
@@ -0,0 +1,422 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __GJK_CUH__
|
||||
#define __GJK_CUH__
|
||||
|
||||
namespace squawk
|
||||
{
|
||||
// The simplex is stored as 4 vectors and then 4 normals (so we can shoot it straight into EPA)
|
||||
// The 3 component of each vector contains 3f80 in the high bits (so it's a valid float) and
|
||||
// the vertex indices for a and b in the lower bits
|
||||
|
||||
typedef PxReal Simplex;
|
||||
|
||||
static __device__ inline Simplex setNormal0(Simplex z, PxReal nV, PxU32 index)
|
||||
{
|
||||
PxReal tmpV = splatV(index, nV);
|
||||
return threadIdx.x < 16 || threadIdx.x > 19 ? z : tmpV;
|
||||
}
|
||||
|
||||
static __device__ inline Simplex setNormal123(Simplex z, PxReal nV)
|
||||
{
|
||||
PxReal tmpV = __shfl_up_sync(FULL_MASK, nV, 20);
|
||||
return threadIdx.x < 20 ? z : tmpV;
|
||||
}
|
||||
|
||||
static __device__ inline Simplex loadSimplex(const schlock::GjkCachedData& data)
|
||||
{
|
||||
return reinterpret_cast<const PxReal*>(&data)[threadIdx.x];
|
||||
}
|
||||
|
||||
static __device__ inline void storeSimplex(schlock::GjkCachedData& data, Simplex z)
|
||||
{
|
||||
reinterpret_cast<PxReal*>(&data)[threadIdx.x] = z;
|
||||
}
|
||||
|
||||
static __device__ inline void storeSimplex(volatile schlock::GjkCachedData& data, Simplex z)
|
||||
{
|
||||
reinterpret_cast<volatile PxReal*>(&data)[threadIdx.x] = z;
|
||||
}
|
||||
|
||||
// suffixes for vector warps:
|
||||
// * V = 3 vectors: (aX,aY,aZ,_,bX,bY,bZ,_,cX,cY,cZ,_)
|
||||
// * D = dense scalar: (a,a,a,a,b,b,b,b,c,c,c,c)
|
||||
// * S = sparse scalar: (a,_,_,_,b,_,_,_,c,_,_,_)
|
||||
// * I = index vector for shuffle
|
||||
|
||||
#define DEGENERATE 0xffff
|
||||
|
||||
static __device__ inline PxU32 closestPointTri(Simplex &zV, PxReal cV, PxReal& closestPoint)
|
||||
{
|
||||
PxU32 tI = threadIdx.x, cp0I = (tI&28)|next3(tI&3), cp1I = (tI&28)|prev3(tI&3); // cross product indices
|
||||
|
||||
closestPoint = 0;
|
||||
PxReal eV = cV - zV, lD = splatX(dot(eV, eV)); // edges to apex & squared lengths
|
||||
if(anyX<2>(lD==0))
|
||||
return DEGENERATE; // FAIL: apex coincident with a base vert
|
||||
|
||||
PxReal tmp = __shfl_xor_sync(FULL_MASK, eV, 4), shortEdgeV = __shfl_xor_sync(FULL_MASK, lD, 4) < lD ? tmp : eV; // shorter edge to apex
|
||||
PxReal mV = cross(__shfl_xor_sync(FULL_MASK, zV,4)-zV, shortEdgeV, cp0I, cp1I); // (normal, -normal)
|
||||
|
||||
// lots of dot products here with cV, (plus proj below). TODO: optimize
|
||||
PxReal pS = dot(eV,cV); // projection of origin onto edges
|
||||
bool uS = dot(cross(mV, eV, cp0I, cp1I), cV) <= 0; // origin outside edge planes
|
||||
|
||||
if(!anyX<2>(uS)) // REGION: face - inside both edge planes
|
||||
{
|
||||
PxReal projD = __shfl_sync(FULL_MASK, dot(mV,cV),0); // project onto face
|
||||
closestPoint = splatV(0, mV*projD/splatX(dot(mV,mV)));
|
||||
bool flip = projD<0; // ensure the normal winding makes it outward-facing in the tet
|
||||
mV = (tI & 3) == 3 ? projD : mV;
|
||||
zV = setNormal0(zV, mV, flip);
|
||||
return flip ? 0x0123u : 0x1023u;
|
||||
}
|
||||
|
||||
PxU32 eid = __ffs(packXFlags<2>(uS & bool(pS>0))); // REGION: edges - outside the edge plane, and projects onto the edge
|
||||
if(eid--) // __ffs returns 0 for no bit, 1 for lsb
|
||||
{
|
||||
closestPoint = splatV(eid, cV - eV*splatX(pS)/lD);
|
||||
return eid<<8 | 0x22;
|
||||
}
|
||||
|
||||
closestPoint = cV; // REGION: apex - not an edge, not a face
|
||||
return 0x0021;
|
||||
}
|
||||
|
||||
|
||||
static __device__ inline PxU32 closestPointTet(Simplex &zV, PxReal dV, PxReal& closestPoint)
|
||||
{
|
||||
closestPoint = 0;
|
||||
|
||||
PxReal n0V = splatV(4, zV);
|
||||
if(__shfl_sync(FULL_MASK, dot(n0V, dV) > dot(n0V, zV), 0)) // FAIL: new vertex is beyond old face (TODO: set up normal in UT)
|
||||
return DEGENERATE;
|
||||
|
||||
PxU32 tI = threadIdx.x, cp0I = (tI&28)|next3(tI&3), cp1I = (tI&28)|prev3(tI&3); // cross product indices
|
||||
PxU32 vNextI = tI>7 ? tI-8 : tI+4, vPrevI = tI<4 ? tI + 8 : tI-4; // block rotate indices
|
||||
PxReal eV = dV - zV, lD = splatX(dot(eV, eV)); // edges to apex and squared lengths
|
||||
if(anyX<3>(lD==0))
|
||||
return DEGENERATE; // FAIL: apex is coincident with a base vert
|
||||
|
||||
PxReal tmp = __shfl_sync(FULL_MASK, eV, vNextI), shortEdgeV = __shfl_sync(FULL_MASK, lD, vNextI) < lD ? tmp : eV; // shorter of edges to apex for each tri
|
||||
PxReal nV = cross(shortEdgeV, __shfl_sync(FULL_MASK, zV, vNextI) - zV, cp0I, cp1I); // face normals, outward facing
|
||||
PxReal aS = dot(nV, dV); // distances along normals of apex
|
||||
PxReal gS = dot(__shfl_sync(FULL_MASK, zV, vPrevI), nV); // distances along normal of vert not in this triangle
|
||||
|
||||
if(anyX<3>(aS<=gS))
|
||||
return DEGENERATE; // FAIL: simplex inversion/collapse
|
||||
|
||||
PxReal tmpV = __shfl_up_sync(FULL_MASK, aS, 3);
|
||||
nV = (tI & 3) == 3 ? tmpV : nV;
|
||||
|
||||
if(allX<3>(aS>0)) // REGION: inside simplex
|
||||
{
|
||||
zV = setNormal123(zV, nV); // set normals for EPA start
|
||||
return 0x2104;
|
||||
}
|
||||
|
||||
const PxReal pS = dot(eV,dV); // projection of origin onto edges
|
||||
if(allX<3>(pS<=0)) // REGION: apex
|
||||
{
|
||||
closestPoint = dV;
|
||||
return 0x2131;
|
||||
}
|
||||
|
||||
bool uiS = dot(cross(eV, nV, cp0I, cp1I), dV) <= 0; // origin outside one edge plane of each triangle
|
||||
bool ujS = dot(cross(__shfl_sync(FULL_MASK, eV,vNextI), nV, cp0I, cp1I), dV) > 0; // origin outside other edge plane of each triangle
|
||||
int eid = __ffs(packXFlags<3>(uiS & __shfl_sync(FULL_MASK, ujS, vPrevI) & bool(pS>=0))); // REGION: edges - origin outside edge planes and projects onto edge
|
||||
if(eid--) // __ffs returns 0 for no bit, 1 for lsb
|
||||
{
|
||||
closestPoint = splatV(eid, dV - eV * splatX(pS/lD));
|
||||
return eid<<8| 0x32;
|
||||
}
|
||||
|
||||
PxU32 fid = __ffs(packXFlags<3>(!uiS & !ujS & bool(aS<=0))); // REGION: faces - origin outside face plane, inside edge planes
|
||||
if(fid--) // __ffs returns 0 for no bit, 1 for lsb
|
||||
{
|
||||
closestPoint = splatV(fid, nV * splatX(aS / dot(nV, nV)));
|
||||
zV = setNormal0(zV, -nV, fid); // flip normal to be outward facing for next iteration
|
||||
return next3(fid)<<12 | fid<<8| 0x33;
|
||||
}
|
||||
|
||||
return DEGENERATE; // FAIL: couldn't find voronoi region. Chort vozmi!
|
||||
}
|
||||
|
||||
static __device__ inline PxU8 supportIndex(const PxVec3* v, PxU8 nbVertices, PxReal dir)
|
||||
{
|
||||
int bestN = 0xFFFFFFFF; //Ininitialize to a very illegal value to force a crash when we get nans
|
||||
PxReal bestDist = -FLT_MAX;
|
||||
PxReal x = __shfl_sync(FULL_MASK, dir, 0), y = __shfl_sync(FULL_MASK, dir, 1), z = __shfl_sync(FULL_MASK, dir, 2);
|
||||
for(PxU32 i=0;i<nbVertices;i+=32)
|
||||
{
|
||||
int n = i+threadIdx.x;
|
||||
PxReal d = n < nbVertices ? x*v[n].x + y*v[n].y + z*v[n].z : -FLT_MAX, e;
|
||||
|
||||
e = fmaxf(d, __shfl_xor_sync(FULL_MASK, d, 16));
|
||||
e = fmaxf(e, __shfl_xor_sync(FULL_MASK, e, 8));
|
||||
e = fmaxf(e, __shfl_xor_sync(FULL_MASK, e, 4));
|
||||
e = fmaxf(e, __shfl_xor_sync(FULL_MASK, e, 2));
|
||||
e = fmaxf(e, __shfl_xor_sync(FULL_MASK, e, 1));
|
||||
|
||||
unsigned mask_bestDist = __ballot_sync(FULL_MASK, e > bestDist);
|
||||
if(e>bestDist)
|
||||
bestDist = e, bestN = i+__ffs(__ballot_sync(mask_bestDist, e == d))-1;
|
||||
}
|
||||
|
||||
assert(bestN < nbVertices);
|
||||
return PxU8(bestN);
|
||||
}
|
||||
|
||||
static __device__ inline PxU8 supportIndex(const PxVec3& v0, const PxVec3& v1, PxU8 nbVertices, PxReal dir)
|
||||
{
|
||||
int bestN = 0xFFFFFFFF;
|
||||
//PxReal bestDist = -FLT_MAX;
|
||||
PxReal x = __shfl_sync(FULL_MASK, dir, 0), y = __shfl_sync(FULL_MASK, dir, 1), z = __shfl_sync(FULL_MASK, dir, 2);
|
||||
|
||||
PxReal d = -FLT_MAX;
|
||||
PxU32 n = threadIdx.x;
|
||||
if (threadIdx.x < nbVertices)
|
||||
{
|
||||
d = x*v0.x + y*v0.y + z*v0.z;
|
||||
|
||||
if ((threadIdx.x + 32) < nbVertices)
|
||||
{
|
||||
PxReal d1 = x*v0.x + y*v0.y + z*v0.z;
|
||||
if (d1 > d)
|
||||
{
|
||||
d = d1;
|
||||
n += 32;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PxReal e = fmaxf(d, __shfl_xor_sync(FULL_MASK, d, 16));
|
||||
e = fmaxf(e, __shfl_xor_sync(FULL_MASK, e, 8));
|
||||
e = fmaxf(e, __shfl_xor_sync(FULL_MASK, e, 4));
|
||||
e = fmaxf(e, __shfl_xor_sync(FULL_MASK, e, 2));
|
||||
e = fmaxf(e, __shfl_xor_sync(FULL_MASK, e, 1));
|
||||
|
||||
//bestDist = e;
|
||||
bestN = __ffs(__ballot_sync(FULL_MASK, e == d)) - 1;
|
||||
bestN += __popc(__ballot_sync(FULL_MASK, n >= 32) & (1 << bestN)) * 32;
|
||||
|
||||
assert(bestN < nbVertices);
|
||||
|
||||
return PxU8(bestN);
|
||||
}
|
||||
|
||||
|
||||
static __device__ inline schlock::GjkResult::Enum gjk(
|
||||
const PxVec3* vA, PxU32 nbA,
|
||||
const PxVec3* vB, PxU32 nbB,
|
||||
const PxVec3& initialUnitDir,
|
||||
PxReal minSep,
|
||||
PxReal convergenceRatio,
|
||||
schlock::GjkOutput& output, schlock::GjkCachedData& cacheData)
|
||||
{
|
||||
int cacheMask = (1<< cacheData.size)-1;
|
||||
__syncwarp(); //cacheData.size is written further below
|
||||
Simplex simplexV = loadSimplex(cacheData);
|
||||
|
||||
restart:
|
||||
bool warmStarted = false;
|
||||
Simplex backupSimplexV;
|
||||
int size = 0, backupSize = 0;
|
||||
PxReal closestPointV, backupClosestV;
|
||||
|
||||
if (cacheMask)
|
||||
closestPointV = splatV(0, simplexV);
|
||||
else
|
||||
{
|
||||
if ((threadIdx.x & 3) == 0)
|
||||
closestPointV = initialUnitDir.x;
|
||||
else if ((threadIdx.x & 3) == 1)
|
||||
closestPointV = initialUnitDir.y;
|
||||
else if ((threadIdx.x & 3) == 2)
|
||||
closestPointV = initialUnitDir.z;
|
||||
else
|
||||
closestPointV = PX_MAX_F32;
|
||||
}
|
||||
|
||||
PxReal upperBound = PX_MAX_F32, lowerBound = -PX_MAX_F32, cm = magnitude(closestPointV); // if we hit a lower bound dir with distance exactly zero, -FLT_MIN means we keep it
|
||||
bool degenerate;
|
||||
|
||||
do
|
||||
{
|
||||
PxReal dV = closestPointV/cm, supportVertexV;
|
||||
|
||||
#if GJK_LOGGING
|
||||
PxReal dx = dV, dy = shfl(dV, 1), dz = shfl(dV,2);
|
||||
PxReal cpx = closestPointV, cpy = shfl(closestPointV, 1), cpz = shfl(closestPointV,2);
|
||||
if(threadIdx.x == 0)
|
||||
printf("[%f, %f], dir (%f, %f, %f), closestPoint (%f, %f, %f), cm %f \n", lowerBound, upperBound, dx, dy, dz, cpx, cpy, cpz, cm);
|
||||
#endif
|
||||
|
||||
// find a warm start entry that can be added to the simplex because it's closer than the closest feature
|
||||
int candidates = 0;
|
||||
|
||||
if (cacheMask)
|
||||
candidates = __ballot_sync(FULL_MASK, __shfl_sync(FULL_MASK, dot(dV, simplexV)<upperBound, threadIdx.x<<2)) & cacheMask;
|
||||
|
||||
if(candidates)
|
||||
{
|
||||
warmStarted = true;
|
||||
cacheMask &= ~(lowestSetBit(candidates)*2-1);
|
||||
supportVertexV = splatV(lowestSetIndex(candidates), simplexV);
|
||||
if(threadIdx.x>>2 == size)
|
||||
simplexV = supportVertexV;
|
||||
}
|
||||
else // if not, stop warm-starting and start using support vertices
|
||||
{
|
||||
cacheMask = 0;
|
||||
PxU8 indexA = supportIndex(vA, nbA, -dV), indexB = supportIndex(vB, nbB, dV);
|
||||
supportVertexV = loadV3Unsafe(vA+indexA) - loadV3Unsafe(vB+indexB);
|
||||
if(threadIdx.x>>2 == size)
|
||||
simplexV = (threadIdx.x&3) < 3 ? supportVertexV : __int_as_float(schlock::encodeIndices(indexA, indexB));
|
||||
PxReal lb = splatX(dot(dV,supportVertexV));
|
||||
if(lb>lowerBound)
|
||||
{
|
||||
storeV3(&output.direction, 0, -dV);
|
||||
lowerBound = lb;
|
||||
if(lowerBound > minSep || lowerBound>=upperBound*convergenceRatio)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#if GJK_LOGGING
|
||||
PxReal svx = supportVertexV, svy = shfl(supportVertexV,1), svz = shfl(supportVertexV,2);
|
||||
if(threadIdx.x == 0)
|
||||
printf("support (%f, %f, %f)\n", svx, svy, svz);
|
||||
#endif
|
||||
|
||||
PxU32 f = 0; // f is feature index: (i2<<12 | i1<<8 | i0<<4 | size)
|
||||
PxReal pointV;
|
||||
switch(size)
|
||||
{
|
||||
case 0: f = 0x2101, pointV = supportVertexV; break; // have to be a bit for 0,1 cases not to trash the cached verts, which also live in the simplex
|
||||
case 1:
|
||||
{
|
||||
PxReal aV = splatV(0,simplexV), lV = supportVertexV - aV, m = splatX(dot(lV,lV)), x = -splatX(dot(aV,lV));
|
||||
if(m==0) f = DEGENERATE, pointV = 0; // FAIL: coincident vertex
|
||||
else if(x<m) f = 0x2102, pointV = aV + lV * x/m; // REGION: line segment
|
||||
else f = 0x2111, pointV = supportVertexV; // REGION: new vertex
|
||||
break;
|
||||
}
|
||||
case 2: f = squawk::closestPointTri(simplexV, supportVertexV, pointV); break; // no worries for 2 & 3: only (0,1,2) are affected by permute, and they're already used
|
||||
case 3: f = squawk::closestPointTet(simplexV, supportVertexV, pointV); break;
|
||||
}
|
||||
|
||||
cm = magnitude(pointV);
|
||||
int newSize = f&0xf;
|
||||
degenerate = f == DEGENERATE || (cm>=upperBound && newSize<=size);
|
||||
if(degenerate)
|
||||
{
|
||||
if(!warmStarted) // hit a problem warm-starting: just restart cold
|
||||
break;
|
||||
cacheMask = 0;
|
||||
goto restart;
|
||||
}
|
||||
|
||||
// local minima happen. Continue so long as either the simplex size increases or the upper bound decreases,
|
||||
backupSize = cm >= upperBound ? size : 0;
|
||||
if(backupSize)
|
||||
backupSimplexV = simplexV, backupClosestV = closestPointV; // save the best simplex and size before updating state
|
||||
else
|
||||
upperBound = cm;
|
||||
|
||||
#if GJK_LOGGING
|
||||
PxReal px = pointV, py = shfl(pointV,1), pz = shfl(pointV,2);
|
||||
if(threadIdx.x == 0)
|
||||
printf("%04x (%f, %f, %f) %f\n", f, px, py, pz, cm);
|
||||
#endif
|
||||
|
||||
closestPointV = pointV, size = newSize; // update closest point and simplex
|
||||
PxU32 sourceIndex = f>>((threadIdx.x&28)+4)&0xf; // +4 because size is the first nibble
|
||||
PxReal tmp = __shfl_sync(FULL_MASK, simplexV, sourceIndex<<2 | (threadIdx.x&3));
|
||||
if(threadIdx.x<12)
|
||||
simplexV = tmp;
|
||||
}
|
||||
while(upperBound > 0 && lowerBound < upperBound * convergenceRatio);
|
||||
|
||||
if(backupSize>0)
|
||||
simplexV = backupSimplexV, closestPointV = backupClosestV, size = backupSize;
|
||||
|
||||
__syncwarp();
|
||||
storeSimplex(cacheData, simplexV);
|
||||
|
||||
if(threadIdx.x == 0)
|
||||
{
|
||||
output.degenerate = degenerate;
|
||||
output.lowerBound = lowerBound;
|
||||
output.upperBound = upperBound;
|
||||
cacheData.size = size;
|
||||
}
|
||||
|
||||
__syncwarp();
|
||||
|
||||
if(upperBound == 0)
|
||||
return schlock::GjkResult::eOVERLAP;
|
||||
|
||||
if(lowerBound>minSep)
|
||||
return schlock::GjkResult::eDISTANT;
|
||||
|
||||
PxU8 a0 = cacheData.vertices[0].a(), a1 = cacheData.vertices[1].a(), a2 = cacheData.vertices[2].a();
|
||||
PxU8 b0 = cacheData.vertices[0].b(), b1 = cacheData.vertices[1].b(), b2 = cacheData.vertices[2].b();
|
||||
|
||||
PxVec3 closestPoint(closestPointV, __shfl_sync(FULL_MASK, closestPointV,1), __shfl_sync(FULL_MASK, closestPointV,2));
|
||||
|
||||
if(threadIdx.x == 0)
|
||||
{
|
||||
PxVec3 closestPointDir = -closestPoint.getNormalized();
|
||||
output.closestPointDir = closestPointDir;
|
||||
|
||||
PxReal p, q, r;
|
||||
if(size == 3)
|
||||
{
|
||||
schlock::barycentrics(cacheData.vertices[0].getV(), cacheData.vertices[1].getV(), cacheData.vertices[2].getV(), p, q, r, closestPoint);
|
||||
output.closestPointA = p * vA[a0] + q * vA[a1] + r * vA[a2];
|
||||
}
|
||||
else if(size == 2)
|
||||
{
|
||||
PxVec3 d = cacheData.vertices[1].getV()- cacheData.vertices[0].getV();
|
||||
PxReal q = -cacheData.vertices[0].getV().dot(d)/d.magnitudeSquared();
|
||||
output.closestPointA = (1-q) * vA[a0] + q * vA[a1];
|
||||
}
|
||||
else
|
||||
output.closestPointA = vA[a0];
|
||||
|
||||
output.closestPointB = output.closestPointA + upperBound * closestPointDir;
|
||||
}
|
||||
|
||||
return schlock::GjkResult::eCLOSE;
|
||||
}
|
||||
}
|
||||
#undef DEGENERATE_SIZE_FLAG
|
||||
|
||||
#endif
|
||||
443
engine/third_party/physx/source/gpunarrowphase/src/CUDA/heightfieldUtil.cuh
vendored
Normal file
443
engine/third_party/physx/source/gpunarrowphase/src/CUDA/heightfieldUtil.cuh
vendored
Normal file
@@ -0,0 +1,443 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __HEIGHTFIELD_CUH__
|
||||
#define __HEIGHTFIELD_CUH__
|
||||
|
||||
#include "geometry/PxHeightFieldSample.h"
|
||||
#include "geometry/PxMeshScale.h"
|
||||
#include "triangle.cuh"
|
||||
#include "assert.h"
|
||||
|
||||
__device__ static inline physx::PxU32 getMinRow(physx::PxReal x, const physx::PxU32 rows)
|
||||
{
|
||||
using namespace physx;
|
||||
return (PxU32)PxClamp(PxI32(PxFloor(x)), PxI32(0), PxI32(rows - 2));
|
||||
}
|
||||
__device__ static inline physx::PxU32 getMaxRow(physx::PxReal x, const physx::PxU32 rows)
|
||||
{
|
||||
using namespace physx;
|
||||
return (PxU32)PxClamp(PxI32(PxCeil(x)), PxI32(0), PxI32(rows - 1));
|
||||
}
|
||||
|
||||
__device__ static inline physx::PxU32 getMinColumn(physx::PxReal z, const physx::PxU32 columns)
|
||||
{
|
||||
using namespace physx;
|
||||
return (PxU32)PxClamp(PxI32(PxFloor(z)), PxI32(0), PxI32(columns - 2));
|
||||
}
|
||||
|
||||
__device__ static inline physx::PxU32 getMaxColumn(physx::PxReal z, const physx::PxU32 columns)
|
||||
{
|
||||
using namespace physx;
|
||||
return (PxU32)PxClamp(PxI32(PxCeil(z)), PxI32(0), PxI32(columns - 1));
|
||||
}
|
||||
|
||||
__device__ static inline bool isValidVertex(physx::PxU32 vertexIndex, const physx::PxU32 rows, const physx::PxU32 columns)
|
||||
{
|
||||
using namespace physx;
|
||||
return vertexIndex < rows*columns;
|
||||
}
|
||||
|
||||
__device__ static inline bool isFirstTriangle(physx::PxU32 triangleIndex)
|
||||
{
|
||||
return ((triangleIndex & 0x1) == 0);
|
||||
}
|
||||
|
||||
__device__ static inline const physx::PxHeightFieldSample& getSample(physx::PxU32 vertexIndex, const physx::PxHeightFieldSample* samples)
|
||||
{
|
||||
|
||||
return samples[vertexIndex];
|
||||
}
|
||||
|
||||
__device__ static inline physx::PxReal getHeight(physx::PxU32 vertexIndex, const physx::PxHeightFieldSample* samples)
|
||||
{
|
||||
using namespace physx;
|
||||
return PxReal(getSample(vertexIndex, samples).height);
|
||||
}
|
||||
|
||||
__device__ static inline physx::PxU16 getMaterialIndex0(physx::PxU32 vertexIndex, const physx::PxHeightFieldSample* samples)
|
||||
{
|
||||
using namespace physx;
|
||||
return getSample(vertexIndex, samples).materialIndex0;
|
||||
}
|
||||
|
||||
__device__ static inline physx::PxU16 getMaterialIndex1(physx::PxU32 vertexIndex, const physx::PxHeightFieldSample* samples)
|
||||
{
|
||||
using namespace physx;
|
||||
return getSample(vertexIndex, samples).materialIndex1;
|
||||
}
|
||||
|
||||
__device__ static inline bool isZerothVertexShared(physx::PxU32 vertexIndex, const physx::PxHeightFieldSample* samples)
|
||||
{
|
||||
return getSample(vertexIndex, samples).tessFlag() != 0;
|
||||
}
|
||||
|
||||
|
||||
__device__ static inline physx::PxVec3 getVertex(physx::PxU32 vertexIndex, const physx::PxU32 columns, const physx::PxHeightFieldSample* samples)
|
||||
{
|
||||
using namespace physx;
|
||||
const PxU32 row = vertexIndex / columns;
|
||||
const PxU32 column = vertexIndex % columns;
|
||||
return PxVec3(PxReal(row), getHeight(vertexIndex, samples), PxReal(column));
|
||||
}
|
||||
|
||||
__device__ static inline physx::PxVec3 hf2shapep(const physx::PxVec3& v, const physx::PxReal rowScale, const physx::PxReal heightScale, const physx::PxReal columnScale)
|
||||
{
|
||||
using namespace physx;
|
||||
return PxVec3(v.x *rowScale, v.y * heightScale, v.z * columnScale);
|
||||
}
|
||||
|
||||
__device__ static inline void getTriangleVertexIndices(physx::PxU32 triangleIndex, physx::PxU32& vertexIndex0, physx::PxU32& vertexIndex1, physx::PxU32& vertexIndex2, const physx::PxU32 columns, const physx::PxHeightFieldSample* samples)
|
||||
{
|
||||
using namespace physx;
|
||||
const PxU32 cell = triangleIndex >> 1;
|
||||
if (isZerothVertexShared(cell, samples))
|
||||
{
|
||||
// <---- COL
|
||||
// 0----2 1 R
|
||||
// | 1 / /| O
|
||||
// | / / | W
|
||||
// | / / | |
|
||||
// |/ / 0 | |
|
||||
// 1 2----0 V
|
||||
//
|
||||
if (isFirstTriangle(triangleIndex))
|
||||
{
|
||||
vertexIndex0 = cell + columns;
|
||||
vertexIndex1 = cell;
|
||||
vertexIndex2 = cell + columns + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
vertexIndex0 = cell + 1;
|
||||
vertexIndex1 = cell + columns + 1;
|
||||
vertexIndex2 = cell;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// <---- COL
|
||||
// 2 1----0 R
|
||||
// |\ \ 0 | O
|
||||
// | \ \ | W
|
||||
// | \ \ | |
|
||||
// | 1 \ \| |
|
||||
// 0----1 2 V
|
||||
//
|
||||
if (isFirstTriangle(triangleIndex))
|
||||
{
|
||||
vertexIndex0 = cell;
|
||||
vertexIndex1 = cell + 1;
|
||||
vertexIndex2 = cell + columns;
|
||||
}
|
||||
else
|
||||
{
|
||||
vertexIndex0 = cell + columns + 1;
|
||||
vertexIndex1 = cell + columns;
|
||||
vertexIndex2 = cell + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__device__ static inline void getTriangleAdjacencyIndices(physx::PxU32 triangleIndex, physx::PxU32& adjacencyIndex0, physx::PxU32& adjacencyIndex1, physx::PxU32& adjacencyIndex2, const physx::PxU32 rows, const physx::PxU32 columns,
|
||||
const physx::PxHeightFieldSample* samples)
|
||||
{
|
||||
using namespace physx;
|
||||
const PxU32 cell = triangleIndex >> 1;
|
||||
if (isZerothVertexShared(cell, samples))
|
||||
{
|
||||
// <---- COL
|
||||
// 0----2 1 R
|
||||
// | 1 / /| O
|
||||
// | / / | W
|
||||
// | / / | |
|
||||
// |/ / 0 | |
|
||||
// 1 2----0 V
|
||||
//
|
||||
if (isFirstTriangle(triangleIndex))
|
||||
{
|
||||
adjacencyIndex0 = BOUNDARY;
|
||||
adjacencyIndex1 = triangleIndex + 1;
|
||||
adjacencyIndex2 = BOUNDARY;
|
||||
|
||||
if ((cell % (columns) != 0))
|
||||
{
|
||||
adjacencyIndex0 = triangleIndex - 1;
|
||||
}
|
||||
|
||||
if ((cell / columns != rows - 2))
|
||||
{
|
||||
adjacencyIndex2 = ((cell + columns) * 2) + 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
adjacencyIndex0 = BOUNDARY;
|
||||
adjacencyIndex1 = triangleIndex - 1;
|
||||
adjacencyIndex2 = BOUNDARY;
|
||||
|
||||
if (cell % (columns) < (columns - 2))
|
||||
{
|
||||
adjacencyIndex0 = triangleIndex + 1;
|
||||
}
|
||||
|
||||
if (cell >= columns - 1)
|
||||
{
|
||||
adjacencyIndex2 = (cell - columns) * 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// <---- COL
|
||||
// 2 1----0 R
|
||||
// |\ \ 0 | O
|
||||
// | \ \ | W
|
||||
// | \ \ | |
|
||||
// | 1 \ \| |
|
||||
// 0----1 2 V
|
||||
//
|
||||
if (isFirstTriangle(triangleIndex))
|
||||
{
|
||||
adjacencyIndex0 = BOUNDARY;
|
||||
adjacencyIndex1 = triangleIndex + 1;
|
||||
adjacencyIndex2 = BOUNDARY;
|
||||
|
||||
if (cell >= columns - 1)
|
||||
{
|
||||
adjacencyIndex0 = ((cell - (columns)) * 2) + 1;
|
||||
}
|
||||
|
||||
if ((cell % (columns) != 0))
|
||||
{
|
||||
adjacencyIndex2 = triangleIndex - 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
adjacencyIndex0 = BOUNDARY;
|
||||
adjacencyIndex1 = triangleIndex - 1;
|
||||
adjacencyIndex2 = BOUNDARY;
|
||||
|
||||
if ((cell / columns != rows - 2))
|
||||
{
|
||||
adjacencyIndex0 = (cell + (columns)) * 2;
|
||||
}
|
||||
|
||||
if (cell % (columns) < (columns - 2))
|
||||
{
|
||||
adjacencyIndex2 = triangleIndex + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
__device__ static void getTriangle(physx::PxVec3& triLocV0, physx::PxVec3& triLocV1, physx::PxVec3& triLocV2, uint4* adjacencyIndices, const physx::PxU32 triangleIndex, const physx::PxMeshScale& scale, const physx::PxU32 rows, const physx::PxU32 columns, const physx::PxHeightFieldSample* samples)
|
||||
{
|
||||
using namespace physx;
|
||||
|
||||
const PxReal rowScale = scale.scale.x;
|
||||
const PxReal heightScale = scale.scale.y;
|
||||
const PxReal columnScale = scale.scale.z;
|
||||
PxVec3 handedness(1.0f); // Vector to invert normal coordinates according to the heightfield scales
|
||||
bool wrongHanded = false;
|
||||
if (columnScale < 0.f)
|
||||
{
|
||||
wrongHanded = !wrongHanded;
|
||||
handedness.z = -1.0f;
|
||||
}
|
||||
if (rowScale < 0.f)
|
||||
{
|
||||
wrongHanded = !wrongHanded;
|
||||
handedness.x = -1.0f;
|
||||
}
|
||||
|
||||
//PxU32 tVertexIndices[3];
|
||||
//getTriangleVertexIndices(triangleIndex, tVertexIndices[0], tVertexIndices[1 + wrongHanded], tVertexIndices[2 - wrongHanded], columns, samples);
|
||||
|
||||
PxU32 tVert0, tVert1, tVert2;
|
||||
getTriangleVertexIndices(triangleIndex, tVert0, tVert1, tVert2, columns, samples);
|
||||
|
||||
if (adjacencyIndices)
|
||||
{
|
||||
/*PxU32 tAdjInd0 = wrongHanded ? adjacencyIndices->z : adjacencyIndices->x;
|
||||
PxU32 tAdjInd1 = adjacencyIndices->y;
|
||||
PxU32 tAdjInd2 = wrongHanded ? adjacencyIndices->x : adjacencyIndices->z;*/
|
||||
|
||||
PxU32 tAdjInd0, tAdjInd1, tAdjInd2;
|
||||
getTriangleAdjacencyIndices(triangleIndex, tAdjInd0, tAdjInd1, tAdjInd2, rows, columns, samples);
|
||||
|
||||
adjacencyIndices->x = wrongHanded ? tAdjInd2 : tAdjInd0;
|
||||
adjacencyIndices->y = tAdjInd1;
|
||||
adjacencyIndices->z = wrongHanded ? tAdjInd0 : tAdjInd2;
|
||||
}
|
||||
|
||||
|
||||
const PxVec3 vertex0 = getVertex(tVert0, columns, samples);
|
||||
const PxVec3 vertex1 = getVertex(wrongHanded ? tVert2 : tVert1, columns, samples);
|
||||
const PxVec3 vertex2 = getVertex(wrongHanded ? tVert1 : tVert2, columns, samples);
|
||||
|
||||
triLocV0 = hf2shapep(vertex0, rowScale, heightScale, columnScale);
|
||||
triLocV1 = hf2shapep(vertex1, rowScale, heightScale, columnScale);
|
||||
triLocV2 = hf2shapep(vertex2, rowScale, heightScale, columnScale);
|
||||
}
|
||||
|
||||
__device__ static bool testLocalPoint(const physx::PxVec3& pnt, const physx::PxU32 rows, const physx::PxU32 columns, const physx::PxHeightFieldSample* samples, float& height)
|
||||
{
|
||||
using namespace physx;
|
||||
|
||||
if (pnt.x < 0 || pnt.x > rows - 1 || pnt.z < 0 || pnt.z > columns - 1)
|
||||
return false;
|
||||
|
||||
PxU32 row = getMinRow(pnt.x, rows);
|
||||
PxU32 column = getMinColumn(pnt.z, columns);
|
||||
PxU32 vertex = row * columns + column;
|
||||
PxReal h0 = getHeight(vertex, samples);
|
||||
PxReal h1 = getHeight(vertex + 1, samples);
|
||||
PxReal h2 = getHeight(vertex + columns, samples);
|
||||
PxReal h3 = getHeight(vertex + columns + 1, samples);
|
||||
PxReal s = pnt.x - PxFloor(pnt.x);
|
||||
PxReal t = pnt.z - PxFloor(pnt.z);
|
||||
PxReal h = h0 * (1 - s) * (1 - t) + h1 * s * (1 - t) + h2 * (1 - s) * t + h3 * s * t; // @@@ That's wrong. Should find point on triangle
|
||||
height = h;
|
||||
return h > pnt.y;
|
||||
}
|
||||
|
||||
|
||||
__device__ static inline physx::PxU32 heightfieldComputePairs(
|
||||
const physx::PxU32 minColumn,
|
||||
const physx::PxU32 maxColumn,
|
||||
const physx::PxU32 minRow,
|
||||
const physx::PxU32 maxRow,
|
||||
const physx::PxU32 nbRows,
|
||||
const physx::PxU32 nbCols,
|
||||
physx::PxHeightFieldSample* samples,
|
||||
const physx::PxU32 miny,
|
||||
const physx::PxU32 maxy
|
||||
)
|
||||
{
|
||||
using namespace physx;
|
||||
|
||||
PxU32 nbPairs = 0;
|
||||
|
||||
const PxU32 columnSpan = maxColumn - minColumn;
|
||||
|
||||
//we have two materials corresponding to one vertexIndex, so each thread will deal with one of the materials
|
||||
const PxU32 totalNumProcessed = (maxRow - minRow) * columnSpan * 2;
|
||||
|
||||
for (PxU32 i = 0; i < totalNumProcessed; ++i)
|
||||
{
|
||||
|
||||
const PxU32 index = i / 2;
|
||||
const PxU32 vertexIndex = (minRow + index / columnSpan) * nbCols + (minColumn + index % columnSpan);
|
||||
assert(isValidVertex(vertexIndex, nbRows, nbCols));
|
||||
PxReal h0 = getHeight(vertexIndex, samples);
|
||||
PxReal h1 = getHeight(vertexIndex + 1, samples);
|
||||
PxReal h2 = getHeight(vertexIndex + nbCols, samples);
|
||||
PxReal h3 = getHeight(vertexIndex + nbCols + 1, samples);
|
||||
const bool con0 = maxy < h0 && maxy < h1 && maxy < h2 && maxy < h3;
|
||||
const bool con1 = miny > h0 && miny > h1 && miny > h2 && miny > h3;
|
||||
|
||||
if (!(con0 || con1))
|
||||
{
|
||||
const PxHeightFieldSample& sample = getSample(vertexIndex, samples);
|
||||
|
||||
const bool isMaterial1 = (i & 1) ? 1 : 0;
|
||||
PxU32 material = isMaterial1 ? sample.materialIndex1 : sample.materialIndex0;
|
||||
if (material != PxHeightFieldMaterial::eHOLE)
|
||||
nbPairs++;
|
||||
}
|
||||
}//end of totalNumProcessed
|
||||
|
||||
return nbPairs;
|
||||
}
|
||||
|
||||
__device__ static inline void heightfieldOutputPairs(
|
||||
const physx::PxU32 minColumn,
|
||||
const physx::PxU32 maxColumn,
|
||||
const physx::PxU32 minRow,
|
||||
const physx::PxU32 maxRow,
|
||||
const physx::PxU32 nbRows,
|
||||
const physx::PxU32 nbCols,
|
||||
physx::PxHeightFieldSample* samples,
|
||||
const physx::PxU32 miny,
|
||||
const physx::PxU32 maxy,
|
||||
|
||||
const physx::PxU32 cmInd,
|
||||
const physx::PxU32 elementInd, //either tetrahedron id or triangle id
|
||||
const physx::PxU32 startOffset,
|
||||
const physx::PxU32 size,
|
||||
|
||||
uint4* stackPtr //output
|
||||
|
||||
)
|
||||
{
|
||||
using namespace physx;
|
||||
|
||||
PxU32 writeIndex = startOffset;
|
||||
const PxU32 columnSpan = maxColumn - minColumn;
|
||||
|
||||
//we have two materials corresponding to one vertexIndex, so each thread will deal with one of the materials
|
||||
const PxU32 totalNumProcessed = (maxRow - minRow) * columnSpan * 2;
|
||||
|
||||
for (PxU32 i = 0; i < totalNumProcessed && writeIndex < size; ++i)
|
||||
{
|
||||
|
||||
PxU32 triangleIdx = 0xFFffFFff;
|
||||
|
||||
const PxU32 index = i / 2;
|
||||
const PxU32 vertexIndex = (minRow + index / columnSpan) * nbCols + (minColumn + index % columnSpan);
|
||||
assert(isValidVertex(vertexIndex, nbRows, nbCols));
|
||||
PxReal h0 = getHeight(vertexIndex, samples);
|
||||
PxReal h1 = getHeight(vertexIndex + 1, samples);
|
||||
PxReal h2 = getHeight(vertexIndex + nbCols, samples);
|
||||
PxReal h3 = getHeight(vertexIndex + nbCols + 1, samples);
|
||||
const bool con0 = maxy < h0 && maxy < h1 && maxy < h2 && maxy < h3;
|
||||
const bool con1 = miny > h0 && miny > h1 && miny > h2 && miny > h3;
|
||||
|
||||
if (!(con0 || con1))
|
||||
{
|
||||
const PxHeightFieldSample& sample = getSample(vertexIndex, samples);
|
||||
|
||||
const bool isMaterial1 = (i & 1) ? 1 : 0;
|
||||
PxU32 material = isMaterial1 ? sample.materialIndex1 : sample.materialIndex0;
|
||||
if (material != PxHeightFieldMaterial::eHOLE)
|
||||
{
|
||||
triangleIdx = isMaterial1 ? ((vertexIndex << 1) + 1) : (vertexIndex << 1);
|
||||
|
||||
stackPtr[writeIndex] = make_uint4(cmInd, elementInd, triangleIdx, 0);
|
||||
|
||||
writeIndex++;
|
||||
}
|
||||
}
|
||||
}//end of totalNumProcessed
|
||||
}
|
||||
|
||||
#endif
|
||||
128
engine/third_party/physx/source/gpunarrowphase/src/CUDA/manifold.cuh
vendored
Normal file
128
engine/third_party/physx/source/gpunarrowphase/src/CUDA/manifold.cuh
vendored
Normal file
@@ -0,0 +1,128 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __MANIFOLD_CUH__
|
||||
#define __MANIFOLD_CUH__
|
||||
|
||||
#include "foundation/PxTransform.h"
|
||||
#include "PxgPersistentContactManifold.h"
|
||||
#include "PxgCommonDefines.h"
|
||||
#include "dataReadWriteHelper.cuh"
|
||||
|
||||
#define MULTIMANIFOLD_MAX_MANIFOLDS 4
|
||||
#define SUBMANIFOLD_MAX_CONTACTS 6
|
||||
|
||||
__device__ static inline bool refreshManifolds(
|
||||
const physx::PxTransform & aToB,
|
||||
physx::PxReal projectBreakingThreshold,
|
||||
physx::PxgPersistentContactMultiManifold * PX_RESTRICT multiManifold
|
||||
)
|
||||
{
|
||||
using namespace physx;
|
||||
|
||||
const PxU32 threadIdxInWarp = threadIdx.x & (WARP_SIZE - 1);
|
||||
const PxReal sqProjectBreakingThreshold = projectBreakingThreshold * projectBreakingThreshold;
|
||||
const PxU32 numManifolds = multiManifold->mNbManifolds;
|
||||
|
||||
PxU32 i = threadIdxInWarp;
|
||||
{
|
||||
bool lostContact = false;
|
||||
|
||||
PxU8 curManifoldIdx = i / SUBMANIFOLD_MAX_CONTACTS;
|
||||
|
||||
// refreshContactPoints
|
||||
if (curManifoldIdx < numManifolds)
|
||||
{
|
||||
PxU32 nbContacts = multiManifold->mNbContacts[curManifoldIdx];
|
||||
PxU8 curContactIdx = i % SUBMANIFOLD_MAX_CONTACTS;
|
||||
|
||||
if (curContactIdx < nbContacts)
|
||||
{
|
||||
PxReal penNew;
|
||||
PxgContact & curContact = multiManifold->mContacts[curManifoldIdx][curContactIdx];
|
||||
|
||||
PxVec3 pointA = curContact.pointA;
|
||||
PxVec3 pointB = curContact.pointB;
|
||||
|
||||
const PxVec3 localAInB = aToB.transform(pointA); // from a to b
|
||||
const PxVec3 v = localAInB - pointB;
|
||||
|
||||
PxVec3 normal = curContact.normal;
|
||||
PxReal penOld = curContact.penetration;
|
||||
penNew = v.dot(normal);
|
||||
|
||||
const PxVec3 projectedPoint = localAInB - normal * penOld;
|
||||
const PxVec3 projectedDifference = pointB - projectedPoint;
|
||||
|
||||
const PxReal distance2d = projectedDifference.magnitudeSquared();
|
||||
|
||||
curContact.penetration = penNew;
|
||||
|
||||
if (distance2d > sqProjectBreakingThreshold)
|
||||
{
|
||||
lostContact = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we lost any contact - we do the full contact gen, we don't need any contacts then
|
||||
if (__any_sync(FULL_MASK, (PxU32)lostContact))
|
||||
{
|
||||
if(threadIdxInWarp == 0)
|
||||
multiManifold->mNbManifolds = 0;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
__device__ static inline bool invalidateManifold(const physx::PxTransform & aToB, physx::PxgPersistentContactMultiManifold& multiManifold,
|
||||
const physx::PxReal minMargin, const PxReal ratio)
|
||||
{
|
||||
using namespace physx;
|
||||
|
||||
PxAlignedTransform prevRelativeTransform;
|
||||
PxAlignedTransform_ReadWarp(prevRelativeTransform, &multiManifold.mRelativeTransform);
|
||||
|
||||
const PxReal thresholdP = minMargin * ratio;
|
||||
const PxReal thresholdQ = 0.9998f; //about 1 degree
|
||||
|
||||
const PxVec3 maxTransfPosDelta_delta = aToB.p - PxVec3(prevRelativeTransform.p.x, prevRelativeTransform.p.y, prevRelativeTransform.p.z);
|
||||
const PxVec3 maxTransfPosDelta_absDelta(PxAbs(maxTransfPosDelta_delta.x), PxAbs(maxTransfPosDelta_delta.y), PxAbs(maxTransfPosDelta_delta.z));
|
||||
|
||||
const PxReal deltaP = PxMax(maxTransfPosDelta_absDelta.x, PxMax(maxTransfPosDelta_absDelta.y, maxTransfPosDelta_absDelta.z));
|
||||
const PxReal deltaQ = aToB.q.dot((PxQuat)prevRelativeTransform.q);
|
||||
|
||||
if ((deltaP > thresholdP) || (deltaQ < thresholdQ))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif
|
||||
51
engine/third_party/physx/source/gpunarrowphase/src/CUDA/materialCombiner.cuh
vendored
Normal file
51
engine/third_party/physx/source/gpunarrowphase/src/CUDA/materialCombiner.cuh
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __MATERIAL_COMBINER_CUH__
|
||||
#define __MATERIAL_COMBINER_CUH__
|
||||
|
||||
#include "PxsMaterialCombiner.h"
|
||||
|
||||
__device__ static __forceinline__
|
||||
void combineMaterials(const PxsMaterialData* PX_RESTRICT materials, PxU16 index0, PxU16 index1,
|
||||
PxU32& combinedFlags,
|
||||
PxReal& combinedStaticFriction,
|
||||
PxReal& combinedDynamicFriction,
|
||||
PxReal& combinedRestitution,
|
||||
PxReal& combinedDamping
|
||||
)
|
||||
{
|
||||
const PxsMaterialData& mat0 = materials[index0];
|
||||
const PxsMaterialData& mat1 = materials[index1];
|
||||
|
||||
PxsCombineMaterials(mat0, mat1,
|
||||
combinedStaticFriction, combinedDynamicFriction,
|
||||
combinedRestitution, combinedFlags, combinedDamping);
|
||||
}
|
||||
|
||||
#endif
|
||||
165
engine/third_party/physx/source/gpunarrowphase/src/CUDA/midphaseAllocate.cuh
vendored
Normal file
165
engine/third_party/physx/source/gpunarrowphase/src/CUDA/midphaseAllocate.cuh
vendored
Normal file
@@ -0,0 +1,165 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __MIDPHASE_ALLOCATION_CUH__
|
||||
#define __MIDPHASE_ALLOCATION_CUH__
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "convexNpCommon.h"
|
||||
#include <vector_types.h>
|
||||
|
||||
using namespace physx;
|
||||
|
||||
/*
|
||||
// TODO: read/write infos
|
||||
|
||||
AD 8.2023: some comments about all of this. The size of the collision stack is PxGpuDynamicsMemoryConfig::collisionStackSize
|
||||
|
||||
The following parameters are involved:
|
||||
* numpairsAfterMP: The number of convex - triangle pairs after the midphase. These are pairs between 1 triangle of a trimesh
|
||||
and a complete convex. Will be padded to a multiple of 4 during stack allocation to satisfy 16B alignment.
|
||||
* numPaddedPairsAfterMP: A pair here is again a single triangle of a trimesh and a complete convex. But in contrast to the one above,
|
||||
the list holding the triangle indices is padded to multiples of 4 and duplicated for each contact manager.
|
||||
This is done because we do a radix sort on these indices later, which happens on uint4s. So all the lists
|
||||
need to be padded to a multiple of 4, and we need the temp buffer as well. So this count represents the length
|
||||
of the flat list needed to place all the triangle indices, with per-CM padding and temp buffers.
|
||||
The list will look something like this (x is padding, y is temp buffer):
|
||||
|
||||
cm 0 | cm 1 | cm 2 ..
|
||||
0 1 2 p t t t t | 3 4 p p t t t t | 5 6 7 8 9 p p p t t t t t t t t | ...
|
||||
|
||||
numPairsAfterMP4 = (numPairsAfterMP + 3) & (~3)
|
||||
|
||||
This is the layout of the collision stack (growing to the bottom):
|
||||
|
||||
|---------
|
||||
|* PairsGPU: uint4 x numPairsAfterMP4
|
||||
| x: contact manager index
|
||||
| y: triangle index (of the triangle mesh)
|
||||
| z: offset from first triangle found for this contact manager
|
||||
| w: npWorkItem.shapeRef1 (ref of convex mesh)
|
||||
|
|
||||
|---------
|
||||
|* TriNormalAndIndex: ConvexTriNormalAndIndex x numPairsAfterMP4
|
||||
| x,y,z: minNormal from convexTriangleContactGen (confirm: the normal is from a convex mesh triangle, right?)
|
||||
| w: remapCPUIndex of trimesh triangle index. (What? Why?)
|
||||
|---------
|
||||
|* Contacts: ConvexTriContacts x numPairsAfterMP4
|
||||
|
|
||||
|---------
|
||||
|* TriangleIndices: PxU32 * numPaddedPairsAfterMP
|
||||
|
|
||||
|---------
|
||||
|* maxDepth: PxReal * numPairsAfterMP4
|
||||
|
|
||||
|---------
|
||||
|* intermediate data: ConvexTriIntermediateData * numPairsAfterMP4
|
||||
|
|
||||
|---------
|
||||
|* secondPass data: PxU32 * numPairsAfterMP4
|
||||
|
||||
We only fill the pairs array in the midphase kernels. Everything else will be written in the core narrowphase kernels.
|
||||
Not that pairs belonging to the same contact manager are not necessarily consecutive in the pairs list, because of the warp-wide
|
||||
write in the midphase. We do maintain a count + offset into a per-CM list for each pair when filling the pairs list with atomics.
|
||||
This count + offset is then used to calculate the index into per-CM lists to write all the other data in the core narrowphase kernels.
|
||||
|
||||
*/
|
||||
|
||||
__device__ __forceinline__ PxU32 calculateConvexMeshPairMemRequirement()
|
||||
{
|
||||
return (sizeof(uint4) // sPairsGPU
|
||||
+ sizeof(ConvexTriNormalAndIndex) // normal and index, 4 floats,
|
||||
+ sizeof(ConvexTriContacts) // 4 contacts for convexTriContacts
|
||||
+ sizeof(PxReal) // maxdepth
|
||||
+ sizeof(ConvexTriIntermediateData) // intermediate data
|
||||
+ sizeof(PxU32) // secondPass indices
|
||||
+ sizeof(PxU32) * 2); // triangle indices + radix sort data. We do not account for the padding here.
|
||||
}
|
||||
|
||||
// This is calculating the worst-case padding intentionally to avoid stack overflows!
|
||||
__device__ PX_FORCE_INLINE PxU32 calculateAdditionalPadding(const PxU32 numContactManagers)
|
||||
{
|
||||
// the number of pairs are padded to a multiple of 4, so we need additional space for at most 3.
|
||||
// we subtract the memory needed for the triangle index buffer here because that one is allocated with a different count.
|
||||
const PxU32 afterPairPadding = 3 * (calculateConvexMeshPairMemRequirement() - 2 * sizeof(PxU32));
|
||||
|
||||
// for the triangle index buffer, we pad the triangle count for each contact manager to a multiple of 4.
|
||||
// We also need the temp buffer for the radix sort, so that means we need to account for another
|
||||
// 3 + 3 PxU32s per contact manager.
|
||||
const PxU32 afterTriangleIndexPadding = numContactManagers * 6 * sizeof(PxU32);
|
||||
|
||||
return (afterPairPadding + afterTriangleIndexPadding);
|
||||
}
|
||||
|
||||
__device__ PX_FORCE_INLINE PxU32 calculateMaxPairs(const PxU32 stackSizeBytes, const PxU32 numContactManagers)
|
||||
{
|
||||
const PxU32 additionalPadding = calculateAdditionalPadding(numContactManagers);
|
||||
|
||||
const PxU32 memAvailableAfterPadding = stackSizeBytes - additionalPadding;
|
||||
const PxU32 memRequiredPerPair = calculateConvexMeshPairMemRequirement();
|
||||
|
||||
return (memAvailableAfterPadding / memRequiredPerPair);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void midphaseAllocate( ConvexTriNormalAndIndex** cvxTriNIGPU, ConvexTriContacts** cvxTriContactsGPU,
|
||||
PxReal** cvxTriMaxDepthGPU, ConvexTriIntermediateData** cvxTriIntermGPU, PxU32** orderedCvxTriIntermGPU,
|
||||
PxU32** cvxTriSecondPassedGPU, uint4** pairsGPU,
|
||||
PxU8 * stackPtr, const PxU32 numpairsAfterMPUnpadded, const PxU32 numPaddedPairsAfterMP)
|
||||
{
|
||||
//Pad up to a multiple of 4 so we don't get misaligned addresses
|
||||
const PxU32 numPairsAfterMP = (numpairsAfterMPUnpadded + 3)&(~3);
|
||||
|
||||
// I don't know why this line is needed.
|
||||
PxU8 * newGpuIntermStackPtr = reinterpret_cast<PxU8* >(stackPtr);
|
||||
|
||||
// the pairs already exist, they have been written in the midphase kernel. So this allocation
|
||||
// only makes sure we write all the other data behind it.
|
||||
*pairsGPU = (uint4 *)newGpuIntermStackPtr;
|
||||
newGpuIntermStackPtr += numPairsAfterMP * sizeof(uint4);
|
||||
|
||||
// Allocate intermediate arrays:
|
||||
*cvxTriNIGPU = reinterpret_cast<ConvexTriNormalAndIndex* >(newGpuIntermStackPtr);
|
||||
newGpuIntermStackPtr += numPairsAfterMP * sizeof(ConvexTriNormalAndIndex);
|
||||
|
||||
*cvxTriContactsGPU = reinterpret_cast<ConvexTriContacts* >(newGpuIntermStackPtr);
|
||||
newGpuIntermStackPtr += numPairsAfterMP * sizeof(ConvexTriContacts); //ConvexTriContacts has space of 4 contacts
|
||||
|
||||
*orderedCvxTriIntermGPU = reinterpret_cast<PxU32*>(newGpuIntermStackPtr);
|
||||
newGpuIntermStackPtr += numPaddedPairsAfterMP * sizeof(PxU32); // AD: see convexNpCommon.h - numPaddedPairsAfterMP is already padded up to multiple of 4 and duplicated for allow for a radix sort.
|
||||
|
||||
*cvxTriMaxDepthGPU = reinterpret_cast<PxReal*>(newGpuIntermStackPtr);
|
||||
newGpuIntermStackPtr += numPairsAfterMP * sizeof(PxReal);
|
||||
|
||||
*cvxTriIntermGPU = reinterpret_cast<ConvexTriIntermediateData*>(newGpuIntermStackPtr);
|
||||
newGpuIntermStackPtr += numPairsAfterMP * sizeof(ConvexTriIntermediateData);
|
||||
|
||||
*cvxTriSecondPassedGPU = reinterpret_cast<PxU32*>(newGpuIntermStackPtr);
|
||||
newGpuIntermStackPtr += numPairsAfterMP * sizeof(PxU32);
|
||||
}
|
||||
|
||||
#endif
|
||||
193
engine/third_party/physx/source/gpunarrowphase/src/CUDA/nputils.cuh
vendored
Normal file
193
engine/third_party/physx/source/gpunarrowphase/src/CUDA/nputils.cuh
vendored
Normal file
@@ -0,0 +1,193 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __NPUTILS_CUH__
|
||||
#define __NPUTILS_CUH__
|
||||
|
||||
#include "foundation/PxQuat.h"
|
||||
#include "foundation/PxTransform.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "PxgCommonDefines.h"
|
||||
#include "utils.cuh"
|
||||
|
||||
template <typename T> __device__ static inline T ldS(const volatile T& val) { return val; }
|
||||
template <typename T> __device__ static inline void stS(volatile T& dst, const T& src) { dst = src; }
|
||||
|
||||
template <> __device__ inline physx::PxVec3 ldS<physx::PxVec3>(const volatile physx::PxVec3& val) { return physx::PxVec3(val.x, val.y, val.z); }
|
||||
template <> __device__ inline void stS<physx::PxVec3>(volatile physx::PxVec3& dst, const physx::PxVec3& src) { dst.x = src.x, dst.y = src.y, dst.z = src.z; }
|
||||
|
||||
template <> __device__ __forceinline__ physx::PxQuat ldS<physx::PxQuat>(const volatile physx::PxQuat& val) { return physx::PxQuat(val.x, val.y, val.z, val.w); }
|
||||
template <> __device__ __forceinline__ void stS<physx::PxQuat>(volatile physx::PxQuat& dst, const physx::PxQuat& src) { dst.x = src.x, dst.y = src.y, dst.z = src.z, dst.w = src.w; }
|
||||
|
||||
template <> __device__ __forceinline__ physx::PxTransform ldS<physx::PxTransform>(const volatile physx::PxTransform& val) { return physx::PxTransform(ldS(val.p), ldS(val.q)); }
|
||||
template <> __device__ __forceinline__ void stS<physx::PxTransform>(volatile physx::PxTransform& dst, const physx::PxTransform& src)
|
||||
{
|
||||
stS(dst.p, src.p);
|
||||
stS(dst.q, src.q);
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ int next3(int x)
|
||||
{
|
||||
return x==2 ? 0 : x+1;
|
||||
}
|
||||
|
||||
static __device__ inline int prev3(int x)
|
||||
{
|
||||
return x==0 ? 2 : x-1;
|
||||
}
|
||||
|
||||
|
||||
// operators dealing with 8 4-vectors in a warp
|
||||
static __device__ inline float dot(float a, float b) // leaves answer in x
|
||||
{
|
||||
float x = a*b;
|
||||
return x + __shfl_down_sync(FULL_MASK, x,1) + __shfl_down_sync(FULL_MASK, x,2);
|
||||
}
|
||||
|
||||
static __device__ inline float cross(float a, float b, int cp0, int cp1)
|
||||
{
|
||||
return __shfl_sync(FULL_MASK, a, cp0) * __shfl_sync(FULL_MASK, b, cp1) - __shfl_sync(FULL_MASK, a, cp1) * __shfl_sync(FULL_MASK, b, cp0);
|
||||
}
|
||||
|
||||
static __device__ inline float splatV(int index, float v) // splat a vector across all 8 vectors
|
||||
{
|
||||
return __shfl_sync(FULL_MASK, v, (index << 2) | threadIdx.x & 3);
|
||||
}
|
||||
|
||||
static __device__ inline float splatX(float a) // splat the x-component of each vector into yzw
|
||||
{
|
||||
return __shfl_sync(FULL_MASK, a, threadIdx.x & 28);
|
||||
}
|
||||
|
||||
static __device__ inline float normalize(float x) // normalize each vector
|
||||
{
|
||||
return x * splatX(rsqrt(dot(x,x))); // TODO: precision
|
||||
}
|
||||
|
||||
static __device__ inline float magnitude(float x) // normalize each vector
|
||||
{
|
||||
return splatX(sqrt(dot(x,x))); // TODO: precision
|
||||
}
|
||||
|
||||
template<int N>
|
||||
static __device__ inline int packXFlags(bool a) // pack a boolean from each x into a byte
|
||||
{
|
||||
return __ballot_sync(FULL_MASK, __shfl_sync(FULL_MASK, a, threadIdx.x<<2)) & ((1<<N)-1);
|
||||
}
|
||||
|
||||
template<int N>
|
||||
static __device__ inline bool allX(bool a) // check some number initial set of the x-flags are all set
|
||||
{
|
||||
return packXFlags<N>(a) == ((1<<N)-1);
|
||||
}
|
||||
|
||||
template<int N>
|
||||
static __device__ inline bool anyX(bool a) // check if any of an initial segment of x-flags are all set
|
||||
{
|
||||
return packXFlags<N>(a) != 0;
|
||||
}
|
||||
|
||||
static __device__ inline float loadV3Unsafe(const physx::PxVec3* v) // NB requires padding (i.e. the 4th element is loaded)
|
||||
{
|
||||
return reinterpret_cast<const float*>(v)[threadIdx.x&3];
|
||||
}
|
||||
|
||||
static __device__ void storeV3(volatile physx::PxVec3* v, int index, float value)
|
||||
{
|
||||
int lane = index<<2;
|
||||
volatile float* dest = reinterpret_cast<volatile float*>(v);
|
||||
if(threadIdx.x>=lane && threadIdx.x<lane+3)
|
||||
{
|
||||
dest[threadIdx.x&3] = value;
|
||||
}
|
||||
}
|
||||
|
||||
// experimentally, seems more register-efficient to coalesce this
|
||||
static __device__ __forceinline__
|
||||
physx::PxReal shuffleDot(const physx::PxU32 syncMask, const physx::PxVec3& v0, int shuffle0, const physx::PxVec3& v1)
|
||||
{
|
||||
return __shfl_sync(syncMask, v0.x, shuffle0)*v1.x + __shfl_sync(syncMask, v0.y, shuffle0)*v1.y + __shfl_sync(syncMask, v0.z, shuffle0)*v1.z;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__
|
||||
physx::PxU32 maxIndex(physx::PxReal v, physx::PxU32 mask, physx::PxReal& maxV)
|
||||
{
|
||||
maxV = mask & (1 << (threadIdx.x&31)) ? v : -FLT_MAX;
|
||||
|
||||
maxV = fmaxf(maxV, __shfl_xor_sync(FULL_MASK, maxV, 16));
|
||||
maxV = fmaxf(maxV, __shfl_xor_sync(FULL_MASK, maxV, 8));
|
||||
maxV = fmaxf(maxV, __shfl_xor_sync(FULL_MASK, maxV, 4));
|
||||
maxV = fmaxf(maxV, __shfl_xor_sync(FULL_MASK, maxV, 2));
|
||||
maxV = fmaxf(maxV, __shfl_xor_sync(FULL_MASK, maxV, 1));
|
||||
|
||||
return physx::lowestSetIndex(__ballot_sync(FULL_MASK, maxV == v)&mask);
|
||||
}
|
||||
|
||||
static __device__ __forceinline__
|
||||
physx::PxU32 minIndex(physx::PxReal v, physx::PxU32 mask, physx::PxReal& minV)
|
||||
{
|
||||
minV = mask & (1 << (threadIdx.x & 31)) ? v : FLT_MAX;
|
||||
|
||||
minV = fminf(minV, __shfl_xor_sync(FULL_MASK, minV, 16));
|
||||
minV = fminf(minV, __shfl_xor_sync(FULL_MASK, minV, 8));
|
||||
minV = fminf(minV, __shfl_xor_sync(FULL_MASK, minV, 4));
|
||||
minV = fminf(minV, __shfl_xor_sync(FULL_MASK, minV, 2));
|
||||
minV = fminf(minV, __shfl_xor_sync(FULL_MASK, minV, 1));
|
||||
|
||||
return physx::lowestSetIndex(__ballot_sync(FULL_MASK, minV == v)&mask);
|
||||
}
|
||||
|
||||
// similar as above but only with blocks of 4 threads
|
||||
// mask must have exactly 4 consecutive bits set and the corresponding
|
||||
// threads must execute this function together
|
||||
static __device__ __forceinline__
|
||||
physx::PxU32 minIndex4(physx::PxReal v, physx::PxU32 mask, physx::PxU32 threadIndexInGroup)
|
||||
{
|
||||
// sanity checks that function has been called as expected
|
||||
assert(__popc(mask) == 4); // exactly 4 threads in mask
|
||||
assert(mask & (1 << (threadIdx.x & 31))); // executing thread must be in the mask
|
||||
assert(threadIndexInGroup == 0 || (mask & (1 << (threadIdx.x - 1 & 31)))); // threads consecutive
|
||||
|
||||
physx::PxReal minV = v;
|
||||
|
||||
physx::PxReal minV_m1 = __shfl_sync(mask, minV, (threadIdx.x & 31) - 1);
|
||||
if(threadIndexInGroup > 0)
|
||||
minV = fminf(minV, minV_m1);
|
||||
|
||||
physx::PxReal minV_m2 = __shfl_sync(mask, minV, (threadIdx.x & 31) - 2);
|
||||
if(threadIndexInGroup > 2)
|
||||
minV = fminf(minV, minV_m2);
|
||||
|
||||
// Now the last thread (idx 3) of the group knows the min.
|
||||
// Send it back to all the threads.
|
||||
minV = __shfl_sync(mask, minV, (threadIdx.x & 31) | 3);
|
||||
|
||||
return physx::lowestSetIndex(__ballot_sync(mask, minV == v)&mask);
|
||||
}
|
||||
|
||||
#endif
|
||||
190
engine/third_party/physx/source/gpunarrowphase/src/CUDA/pairManagement.cu
vendored
Normal file
190
engine/third_party/physx/source/gpunarrowphase/src/CUDA/pairManagement.cu
vendored
Normal file
@@ -0,0 +1,190 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include <assert.h>
|
||||
#include "SparseRemove.cuh"
|
||||
|
||||
#include "cudaNpCommon.h"
|
||||
#include "PxgPersistentContactManifold.h"
|
||||
#include "PxsContactManagerState.h"
|
||||
#include "PxgContactManager.h"
|
||||
#include "PxgNpKernelIndices.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
class PxsContactManager;
|
||||
|
||||
extern "C" __host__ void initNarrowphaseKernels7() {}
|
||||
}
|
||||
|
||||
extern "C" __global__ void removeContactManagers_Stage1(const PxgPairManagementData* PX_RESTRICT pairData)
|
||||
{
|
||||
initializeKeepDropBuffer(pairData->mTempAccumulator, pairData->mNbPairs, pairData->mNbToRemove);
|
||||
}
|
||||
|
||||
extern "C" __global__ void removeContactManagers_Stage2(const PxgPairManagementData* PX_RESTRICT pairData)
|
||||
{
|
||||
markKeepDropBuff(pairData->mRemoveIndices, pairData->mNbToRemove, pairData->mTempAccumulator, pairData->mNbPairs);
|
||||
}
|
||||
|
||||
extern "C" __global__ void removeContactManagers_Stage3(const PxgPairManagementData* PX_RESTRICT pairData)
|
||||
{
|
||||
processKeepDropBuff<PxgNarrowPhaseBlockDims::REMOVE_CONTACT_MANAGERS, PxgNarrowPhaseGridDims::REMOVE_CONTACT_MANAGERS>
|
||||
(pairData->mTempAccumulator, pairData->mNbPairs, pairData->mBlockSharedAccumulator);
|
||||
}
|
||||
|
||||
extern "C" __global__ void removeContactManagers_Stage4(const PxgPairManagementData* PX_RESTRICT pairData)
|
||||
{
|
||||
accumulateKeepDrop<PxgNarrowPhaseGridDims::REMOVE_CONTACT_MANAGERS>(pairData->mTempAccumulator, pairData->mNbPairs, pairData->mBlockSharedAccumulator);
|
||||
}
|
||||
|
||||
extern "C" __global__ void removeContactManagers_Stage5(const PxgPairManagementData* PX_RESTRICT pairData, const bool copyManifold)
|
||||
{
|
||||
const PxU32* PX_RESTRICT globalRunSumBuffer = pairData->mTempAccumulator;
|
||||
const PxU32 totalSize = pairData->mNbPairs;
|
||||
PxU32 nbToSwap = getNbSwapsRequired(globalRunSumBuffer, totalSize, pairData->mNbToRemove);
|
||||
PxgContactManagerInput* inputData = pairData->mContactManagerInputData;
|
||||
PxsContactManagerOutput* outputData = pairData->mContactManagerOutputData;
|
||||
PxgPersistentContactManifold* manifolds = reinterpret_cast<PxgPersistentContactManifold*>(pairData->mPersistentContactManagers);
|
||||
PxsContactManager** cms = pairData->mCpuContactManagerMapping;
|
||||
Sc::ShapeInteraction** sis = pairData->mShapeInteractions;
|
||||
PxReal* rest = pairData->mRestDistances;
|
||||
PxsTorsionalFrictionData* torsional = pairData->mTorsionalData;
|
||||
|
||||
for(PxU32 i = (threadIdx.x + WARP_SIZE * threadIdx.y + blockIdx.x * blockDim.x * blockDim.y)/16;
|
||||
i < nbToSwap; i+= ((blockDim.x * blockDim.y * gridDim.x)/16))
|
||||
{
|
||||
PxU32 srcIndex, dstIndex;
|
||||
getSwapIndices(globalRunSumBuffer, totalSize, i, nbToSwap, dstIndex, srcIndex);
|
||||
|
||||
if((threadIdx.x&15) == 0)
|
||||
{
|
||||
inputData[dstIndex] = inputData[srcIndex];
|
||||
outputData[dstIndex] = outputData[srcIndex];
|
||||
cms[dstIndex] = cms[srcIndex];
|
||||
sis[dstIndex] = sis[srcIndex];
|
||||
rest[dstIndex] = rest[srcIndex];
|
||||
torsional[dstIndex] = torsional[srcIndex];
|
||||
}
|
||||
|
||||
if (copyManifold)
|
||||
{
|
||||
float4* dst = (float4*)&manifolds[dstIndex];
|
||||
float4* src = (float4*)&manifolds[srcIndex];
|
||||
|
||||
const PxU32 nbFloat4 = sizeof(PxgPersistentContactManifold)/sizeof(float4);
|
||||
|
||||
for(PxU32 idx = threadIdx.x&15; idx < nbFloat4; idx += 16)
|
||||
dst[idx] = src[idx];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void removeContactManagers_Stage5_CvxTri(const PxgPairManagementData* PX_RESTRICT pairData, const bool copyManifold)
|
||||
{
|
||||
const PxU32* PX_RESTRICT globalRunSumBuffer = pairData->mTempAccumulator;
|
||||
const PxU32 totalSize = pairData->mNbPairs;
|
||||
PxU32 nbToSwap = getNbSwapsRequired(globalRunSumBuffer, totalSize, pairData->mNbToRemove);
|
||||
PxgContactManagerInput* inputData = pairData->mContactManagerInputData;
|
||||
PxsContactManagerOutput* outputData = pairData->mContactManagerOutputData;
|
||||
PxgPersistentContactMultiManifold* manifolds = reinterpret_cast<PxgPersistentContactMultiManifold*>( pairData->mPersistentContactManagers);
|
||||
PxsContactManager** cms = pairData->mCpuContactManagerMapping;
|
||||
Sc::ShapeInteraction** sis = pairData->mShapeInteractions;
|
||||
PxReal* rest = pairData->mRestDistances;
|
||||
PxsTorsionalFrictionData* torsional = pairData->mTorsionalData;
|
||||
|
||||
for (PxU32 i = (threadIdx.x + WARP_SIZE * threadIdx.y + blockIdx.x * blockDim.x * blockDim.y)/WARP_SIZE;
|
||||
i < nbToSwap; i += ((blockDim.x * blockDim.y * gridDim.x))/WARP_SIZE)
|
||||
{
|
||||
PxU32 srcIndex, dstIndex;
|
||||
getSwapIndices(globalRunSumBuffer, totalSize, i, nbToSwap, dstIndex, srcIndex);
|
||||
|
||||
if(threadIdx.x == 0)
|
||||
{
|
||||
inputData[dstIndex] = inputData[srcIndex];
|
||||
outputData[dstIndex] = outputData[srcIndex];
|
||||
cms[dstIndex] = cms[srcIndex];
|
||||
sis[dstIndex] = sis[srcIndex];
|
||||
rest[dstIndex] = rest[srcIndex];
|
||||
torsional[dstIndex] = torsional[srcIndex];
|
||||
}
|
||||
|
||||
if (copyManifold)
|
||||
{
|
||||
float4* dst = (float4*)&manifolds[dstIndex];
|
||||
float4* src = (float4*)&manifolds[srcIndex];
|
||||
|
||||
for (PxU32 i = threadIdx.x; i < sizeof(PxgPersistentContactMultiManifold) / sizeof(float4); i += WARP_SIZE)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void initializeManifolds(float4* destination, const float4* source, PxU32 dataSize, PxU32 nbTimesToReplicate)
|
||||
{
|
||||
const PxU32 MaxStructureSize = 4096;
|
||||
|
||||
__shared__ float sourceData[MaxStructureSize / sizeof(float)];
|
||||
|
||||
//Number of threads required per element (rounded up to a warp size)
|
||||
const PxU32 nbThreadsPerElement = dataSize / sizeof(float4);
|
||||
//Number to process per block
|
||||
const PxU32 nbPerBlock = blockDim.x / nbThreadsPerElement;
|
||||
const PxU32 MaxThreadIdx = nbPerBlock*nbThreadsPerElement;
|
||||
|
||||
const PxU32 nbBlocksRequired = (nbTimesToReplicate + nbPerBlock - 1) / nbPerBlock;
|
||||
|
||||
if (threadIdx.x < nbThreadsPerElement)
|
||||
{
|
||||
const float4 value = source[threadIdx.x];
|
||||
sourceData[threadIdx.x] = value.x;
|
||||
sourceData[threadIdx.x + nbThreadsPerElement] = value.y;
|
||||
sourceData[threadIdx.x + 2*nbThreadsPerElement] = value.z;
|
||||
sourceData[threadIdx.x + 3*nbThreadsPerElement] = value.w;
|
||||
}
|
||||
|
||||
const PxU32 threadReadIdx = threadIdx.x%nbThreadsPerElement;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
for (PxU32 a = blockIdx.x; a < nbBlocksRequired; a += gridDim.x)
|
||||
{
|
||||
PxU32 startBlockIdx = nbPerBlock*a;
|
||||
float4* startPtr = destination + startBlockIdx*nbThreadsPerElement;
|
||||
|
||||
PxU32 idx = startBlockIdx + threadIdx.x / nbThreadsPerElement;
|
||||
|
||||
if (threadIdx.x < MaxThreadIdx && idx < nbTimesToReplicate)
|
||||
{
|
||||
startPtr[threadIdx.x] = make_float4(sourceData[threadReadIdx], sourceData[threadReadIdx + nbThreadsPerElement],
|
||||
sourceData[threadReadIdx + 2 * nbThreadsPerElement], sourceData[threadReadIdx + 3 * nbThreadsPerElement]);
|
||||
}
|
||||
}
|
||||
}
|
||||
225
engine/third_party/physx/source/gpunarrowphase/src/CUDA/particleCollision.cuh
vendored
Normal file
225
engine/third_party/physx/source/gpunarrowphase/src/CUDA/particleCollision.cuh
vendored
Normal file
@@ -0,0 +1,225 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __PARTICLE_COLLISION_CUH__
|
||||
#define __PARTICLE_COLLISION_CUH__
|
||||
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxTransform.h"
|
||||
#include "sphereCollision.cuh"
|
||||
#include "deformableCollision.cuh"
|
||||
#include "GuConvexSupport.h"
|
||||
/**
|
||||
* Retuns distance and normal information for a particle-box contact.
|
||||
* If maxDist < PX_MAX_F32, then no result is provided if the distance is larger than maxDist.
|
||||
*/
|
||||
__device__ __forceinline__ static
|
||||
bool contactPointBox(PxVec3& normal, PxReal& dist,
|
||||
const PxVec3& pointPos, const PxTransform& boxToWorld, const PxVec3& halfBoxExtents, const PxReal maxDist)
|
||||
{
|
||||
PxVec3 relPointPos = boxToWorld.transformInv(pointPos);
|
||||
|
||||
const PxVec3 closestPos(
|
||||
PxClamp(relPointPos.x, -halfBoxExtents.x, halfBoxExtents.x),
|
||||
PxClamp(relPointPos.y, -halfBoxExtents.y, halfBoxExtents.y),
|
||||
PxClamp(relPointPos.z, -halfBoxExtents.z, halfBoxExtents.z)
|
||||
);
|
||||
|
||||
if (maxDist < PX_MAX_F32)
|
||||
{
|
||||
PxReal maxDistSq = maxDist*maxDist;
|
||||
PxReal distSq = (relPointPos - closestPos).magnitudeSquared();
|
||||
if (distSq > maxDistSq)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
//check whether the point is inside the aabb
|
||||
const bool bX = halfBoxExtents.x >= PxAbs(relPointPos.x);
|
||||
const bool bY = halfBoxExtents.y >= PxAbs(relPointPos.y);
|
||||
const bool bZ = halfBoxExtents.z >= PxAbs(relPointPos.z);
|
||||
|
||||
const bool isInside = bX && bY && bZ;
|
||||
if (isInside)
|
||||
{
|
||||
//dist from embedded point to box surface along 3 dimensions.
|
||||
const PxVec3 distToSurface = halfBoxExtents - closestPos.abs();
|
||||
|
||||
//assume z is the smallest element of the distToSurface
|
||||
normal = closestPos.z >= 0.f ? PxVec3(0.f, 0.f, 1.f) : PxVec3(0.f, 0.f, -1.f);
|
||||
dist = -distToSurface.z;
|
||||
//find smallest element of distToSurface
|
||||
if (distToSurface.x <= distToSurface.y && distToSurface.x <= distToSurface.z)
|
||||
{
|
||||
normal = closestPos.x >= 0.f ? PxVec3(1.f, 0.f, 0.f) : PxVec3(-1.0f, 0.f, 0.f);
|
||||
dist = -distToSurface.x;
|
||||
}
|
||||
else if (distToSurface.y <= distToSurface.z)
|
||||
{
|
||||
normal = closestPos.y >= 0.f ? PxVec3(0.f, 1.f, 0.f) : PxVec3(0.f, -1.f, 0.f);
|
||||
dist = -distToSurface.y;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
normal = relPointPos - closestPos;
|
||||
dist = normal.normalizeFast();
|
||||
}
|
||||
normal = boxToWorld.rotate(normal);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retuns distance and normal information for a particle-sphere contact.
|
||||
* If maxDist < PX_MAX_F32, then no result is provided if the distance is larger than maxDist.
|
||||
*/
|
||||
__device__ __forceinline__ static
|
||||
bool contactPointSphere(PxVec3& normal, PxReal& dist,
|
||||
const PxVec3& pointPos, const PxVec3& spherePos, const PxReal sphereRadius, const PxReal maxDist)
|
||||
{
|
||||
PxVec3 relPointPos = pointPos - spherePos;
|
||||
const PxReal distCenterSq = relPointPos.magnitudeSquared();
|
||||
if (maxDist < PX_MAX_F32)
|
||||
{
|
||||
const PxReal maxDistCenter = maxDist + sphereRadius;
|
||||
const PxReal maxDistCenterSq = maxDistCenter*maxDistCenter;
|
||||
if (distCenterSq > maxDistCenterSq)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
const PxReal distCenter = PxSqrt(distCenterSq);
|
||||
normal = (distCenter > PX_EPS_REAL) ? relPointPos * (1.0f / distCenter) : PxVec3(1.0f, 0.0f, 0.0f);
|
||||
dist = distCenter - sphereRadius;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retuns distance and normal information for a point-capsule contact.
|
||||
* If maxDist < PX_MAX_F32, then no result is provided if the distance is larger than maxDist.
|
||||
*/
|
||||
__device__ __forceinline__ static
|
||||
bool contactPointCapsule(PxVec3& normal, PxReal& dist,
|
||||
const PxVec3& pointPos, const PxVec3& capsulePos, const PxVec3& capsuleDir, const PxReal capsuleRadius, const PxReal capsuleHalfHeight, const PxReal maxDist)
|
||||
{
|
||||
const PxVec3 capsuleAxisVec = capsuleDir * capsuleHalfHeight;
|
||||
const PxVec3 relPointPos = pointPos - capsulePos;
|
||||
PxReal t;
|
||||
const PxReal distSegmentSq = distancePointSegmentSquared(-capsuleAxisVec, capsuleAxisVec, relPointPos, t);
|
||||
if (maxDist < PX_MAX_F32)
|
||||
{
|
||||
const PxReal maxDistSegment = maxDist + capsuleRadius;
|
||||
const PxReal maxDistSegmentSq = maxDistSegment * maxDistSegment;
|
||||
if (distSegmentSq > maxDistSegmentSq)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
const PxVec3 distSegmentVec = relPointPos - capsuleAxisVec * (2.0f * t - 1.0f);
|
||||
const PxReal distSegment = PxSqrt(distSegmentSq);
|
||||
normal = (distSegment > PX_EPS_REAL) ? (distSegmentVec * (1.0f / distSegment)) : PxVec3(1.0f, 0.0f, 0.0f);
|
||||
dist = distSegment - capsuleRadius;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Combined speculative particle triangle cd. Returns normal and distance.
|
||||
* If CCD is enabled two tests are performed. The first one decides if the triangle
|
||||
* is overlapping with the contact volume (sphere), which is typically located at a
|
||||
* predicted position of the particle, covering it's whole predicted path. The second
|
||||
* test then establishes the correct distance and normal with respect to the actual particle
|
||||
* position.
|
||||
* If CCD is disabled, particlePos == cVolumePos, and cVolumeRadius is the standard contact
|
||||
* distance.
|
||||
*/
|
||||
__device__ __forceinline__ static
|
||||
bool particleTriangleTest(PxVec3& normal, PxReal& distance,
|
||||
const PxVec3& particlePos, const PxVec3& cVolumePos, const PxReal cVolumeRadius,
|
||||
const PxVec3& triV0, const PxVec3& triV1, const PxVec3& triV2, const bool enableCCD)
|
||||
{
|
||||
const PxVec3 triNormal = ((triV1 - triV0).cross(triV2 - triV0)).getNormalized();
|
||||
const PxReal cVolumeRadiusSq = cVolumeRadius * cVolumeRadius;
|
||||
|
||||
//contact volume test
|
||||
const PxReal cVolumePosPlaneDist = (cVolumePos - triV0).dot(triNormal);
|
||||
PxVec3 closestPos;
|
||||
if (cVolumePosPlaneDist < cVolumeRadius)
|
||||
{
|
||||
PxReal distSq = distancePointTriangleSquared(cVolumePos, triV0, triV1, triV2, closestPos);
|
||||
if (distSq <= cVolumeRadiusSq)
|
||||
{
|
||||
//intersection test
|
||||
//back face culling with respect to current particle position
|
||||
const PxReal particlePosPlaneDist = (particlePos - triV0).dot(triNormal);
|
||||
if (particlePosPlaneDist > 0.0f)
|
||||
{
|
||||
if (enableCCD)
|
||||
{
|
||||
//in case of CCD we replace the closest and distance from the contact volume with
|
||||
//the corresponding from the current particle position
|
||||
distSq = distancePointTriangleSquared(particlePos, triV0, triV1, triV2, closestPos);
|
||||
}
|
||||
//assumes particlePos == cVolumePos if enableCCD == false.
|
||||
PxVec3 distVec = particlePos - closestPos;
|
||||
normal = (distSq > 0.0f) ? distVec*PxRecipSqrt(distSq) : triNormal;
|
||||
distance = PxSqrt(distSq);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retuns distance and normal information for a particle-convexCore contact.
|
||||
* If maxDist < PX_MAX_F32, then no result is provided if the distance is larger than maxDist.
|
||||
*/
|
||||
__device__ __forceinline__ static
|
||||
bool particleConvexCore(PxVec3& normal, PxReal& dist, const PxVec3& particlePos,
|
||||
const Gu::ConvexShape& convex, const PxReal maxDist)
|
||||
{
|
||||
Gu::ConvexShape particle;
|
||||
particle.coreType = Gu::ConvexCore::Type::ePOINT;
|
||||
particle.pose = PxTransform(particlePos);
|
||||
particle.margin = 0;
|
||||
|
||||
PxVec3 point0, point1;
|
||||
dist = Gu::RefGjkEpa::computeGjkDistance(particle, convex, particle.pose, convex.pose,
|
||||
convex.margin + particle.margin + maxDist, point0, point1, normal);
|
||||
|
||||
if (dist < FLT_EPSILON)
|
||||
dist = Gu::RefGjkEpa::computeEpaDepth(particle, convex, particle.pose, convex.pose,
|
||||
point0, point1, normal);
|
||||
|
||||
if (dist > maxDist)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif // __PARTICLE_COLLISION_CUH__
|
||||
359
engine/third_party/physx/source/gpunarrowphase/src/CUDA/particleSystemHFMidPhaseCG.cu
vendored
Normal file
359
engine/third_party/physx/source/gpunarrowphase/src/CUDA/particleSystemHFMidPhaseCG.cu
vendored
Normal file
@@ -0,0 +1,359 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "foundation/PxBounds3.h"
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxTransform.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
|
||||
#include "geometry/PxHeightFieldSample.h"
|
||||
|
||||
#include "PxNodeIndex.h"
|
||||
|
||||
#include "PxgContactManager.h"
|
||||
#include "PxgConvexConvexShape.h"
|
||||
#include "PxgParticleSystem.h"
|
||||
#include "PxgParticleSystemCoreKernelIndices.h"
|
||||
|
||||
#include "PxsTransformCache.h"
|
||||
|
||||
#include <vector_types.h>
|
||||
|
||||
#include "PxgCommonDefines.h"
|
||||
#include "heightfieldUtil.cuh"
|
||||
#include "dataReadWriteHelper.cuh"
|
||||
#include "gridCal.cuh"
|
||||
#include "particleCollision.cuh"
|
||||
|
||||
#include "assert.h"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
extern "C" __host__ void initNarrowphaseKernels12() {}
|
||||
|
||||
PX_ALIGN_PREFIX(16)
|
||||
struct PsHeightfieldScratch
|
||||
{
|
||||
PxTransform heightfieldTransform;
|
||||
PxgShape heightfieldShape;
|
||||
|
||||
PxgParticleContactInfo* PX_RESTRICT oneWayContactInfos;
|
||||
PxNodeIndex* PX_RESTRICT oneWayContactNodeIndices;
|
||||
PxU32* PX_RESTRICT oneWayContactCounts;
|
||||
|
||||
float4* currentPositions;
|
||||
float4* predictedPositions;
|
||||
PxNodeIndex rigidId; //heightfield
|
||||
PxU32 transformCacheRef; //heightfield
|
||||
PxU32 numParticles;
|
||||
PxU32 particleSystemId;
|
||||
PxReal contactDist;
|
||||
PxReal restDist;
|
||||
bool enableCCD;
|
||||
}PX_ALIGN_SUFFIX(16);
|
||||
|
||||
|
||||
__device__ static inline void psHeightfieldMidphaseCollision(
|
||||
PsHeightfieldScratch* sScratch,
|
||||
float4* PX_RESTRICT sPerParticleTempContacts,
|
||||
const PxU32 numParticles,
|
||||
const PxU32 particleIndex,
|
||||
const bool isDiffuse,
|
||||
const bool isTGS,
|
||||
PxgParticleContactWriter& writer
|
||||
)
|
||||
{
|
||||
const PxReal contactDist = sScratch->contactDist;
|
||||
const PxReal restDist = sScratch->restDist;
|
||||
|
||||
PxVec3 currentPos = PxLoad3(sScratch->currentPositions[particleIndex]);
|
||||
PxVec3 cVolumePos;
|
||||
PxReal cVolumeRadius;
|
||||
if (sScratch->enableCCD)
|
||||
{
|
||||
PxVec3 predictedPos = PxLoad3(sScratch->predictedPositions[particleIndex]);
|
||||
cVolumeRadius = getParticleSpeculativeContactVolume(cVolumePos,
|
||||
currentPos, predictedPos, contactDist, isDiffuse, isTGS);
|
||||
}
|
||||
else
|
||||
{
|
||||
cVolumePos = currentPos;
|
||||
cVolumeRadius = contactDist;
|
||||
}
|
||||
|
||||
const PxVec3 cVolumeRadius3(cVolumeRadius);
|
||||
const PxVec3 min = cVolumePos - cVolumeRadius3;
|
||||
const PxVec3 max = cVolumePos + cVolumeRadius3;
|
||||
PxBounds3 worldBound(min, max);
|
||||
|
||||
PxTransform heightfieldTransform = sScratch->heightfieldTransform;
|
||||
|
||||
PxgShape heightfieldShape = sScratch->heightfieldShape;
|
||||
const PxReal oneOverHeightScale = 1.f / heightfieldShape.scale.scale.y;
|
||||
const PxReal oneOverRowScale = 1.f / PxAbs(heightfieldShape.scale.scale.x);
|
||||
const PxReal oneOverlColScale = 1.f / PxAbs(heightfieldShape.scale.scale.z);
|
||||
|
||||
//bound is in world space, we need to transform the bound to the local space of height field
|
||||
PxBounds3 localBound = PxBounds3::transformFast(heightfieldTransform.getInverse(), worldBound);
|
||||
|
||||
localBound.minimum.x *= oneOverRowScale;
|
||||
localBound.minimum.y *= oneOverHeightScale;
|
||||
localBound.minimum.z *= oneOverlColScale;
|
||||
|
||||
localBound.maximum.x *= oneOverRowScale;
|
||||
localBound.maximum.y *= oneOverHeightScale;
|
||||
localBound.maximum.z *= oneOverlColScale;
|
||||
|
||||
//row scale
|
||||
if (heightfieldShape.scale.scale.x < 0.f)
|
||||
{
|
||||
//swap min and max row scale
|
||||
const PxReal temp = localBound.minimum.x;
|
||||
localBound.minimum.x = localBound.maximum.x;
|
||||
localBound.maximum.x = temp;
|
||||
}
|
||||
|
||||
//col scale
|
||||
if (heightfieldShape.scale.scale.z < 0.f)
|
||||
{
|
||||
PxReal swap = localBound.minimum.z;
|
||||
localBound.minimum.z = localBound.maximum.z;
|
||||
localBound.maximum.z = swap;
|
||||
}
|
||||
|
||||
PxU32* heightfieldData = reinterpret_cast<PxU32*>(heightfieldShape.hullOrMeshPtr);
|
||||
const PxU32 nbRows = heightfieldData[0];
|
||||
const PxU32 nbCols = heightfieldData[1];
|
||||
PxHeightFieldSample* samples = reinterpret_cast<PxHeightFieldSample*>(&heightfieldData[2]);
|
||||
|
||||
if ((localBound.minimum.x > nbRows - 1) || (localBound.minimum.z > nbCols - 1)
|
||||
|| (localBound.maximum.x < 0) || (localBound.maximum.z < 0))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
PxU32 minRow = getMinRow(localBound.minimum.x, nbRows);
|
||||
PxU32 maxRow = getMaxRow(localBound.maximum.x, nbRows);
|
||||
PxU32 minColumn = getMinColumn(localBound.minimum.z, nbCols);
|
||||
PxU32 maxColumn = getMaxColumn(localBound.maximum.z, nbCols);
|
||||
|
||||
if ((2 * (maxColumn - minColumn) * (maxRow - minRow)) == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
const PxVec3 cVolumePosShapeSpace = heightfieldTransform.transformInv(cVolumePos);
|
||||
const PxVec3 particlePosShapeSpace = heightfieldTransform.transformInv(currentPos);
|
||||
|
||||
const PxReal miny = localBound.minimum.y;
|
||||
const PxReal maxy = localBound.maximum.y;
|
||||
|
||||
const PxU32 columnSpan = maxColumn - minColumn;
|
||||
|
||||
//How many static contacts this particle already has
|
||||
PxU32 contactCounts = 0;
|
||||
|
||||
//we have two materials corresponding to one vertexIndex, so each thread will deal with one of the materials
|
||||
const PxU32 totalNumProcessed = (maxRow - minRow) * columnSpan * 2;
|
||||
|
||||
for (PxU32 i = 0; i < totalNumProcessed; ++i)
|
||||
{
|
||||
PxU32 triangleIdx = 0xFFffFFff;
|
||||
|
||||
const PxU32 index = i / 2;
|
||||
const PxU32 vertexIndex = (minRow + index / columnSpan) * nbCols + (minColumn + index % columnSpan);
|
||||
assert(isValidVertex(vertexIndex, nbRows, nbCols));
|
||||
PxReal h0 = getHeight(vertexIndex, samples);
|
||||
PxReal h1 = getHeight(vertexIndex + 1, samples);
|
||||
PxReal h2 = getHeight(vertexIndex + nbCols, samples);
|
||||
PxReal h3 = getHeight(vertexIndex + nbCols + 1, samples);
|
||||
const bool con0 = maxy < h0 && maxy < h1 && maxy < h2 && maxy < h3;
|
||||
const bool con1 = miny > h0 && miny > h1 && miny > h2 && miny > h3;
|
||||
|
||||
if (!(con0 || con1))
|
||||
{
|
||||
const PxHeightFieldSample& sample = getSample(vertexIndex, samples);
|
||||
|
||||
const bool isMaterial1 = (i & 1) ? 1 : 0;
|
||||
PxU32 material = isMaterial1 ? sample.materialIndex1 : sample.materialIndex0;
|
||||
if (material != PxHeightFieldMaterial::eHOLE)
|
||||
{
|
||||
triangleIdx = isMaterial1 ? ((vertexIndex << 1) + 1) : (vertexIndex << 1);
|
||||
|
||||
PxVec3 triV0, triV1, triV2;
|
||||
getTriangle(triV0, triV1, triV2, NULL, triangleIdx, heightfieldShape.scale, nbRows, nbCols, samples);
|
||||
|
||||
PxVec3 normalShapeSpace;
|
||||
PxReal distance;
|
||||
bool intersect = particleTriangleTest(normalShapeSpace, distance,
|
||||
particlePosShapeSpace, cVolumePosShapeSpace, cVolumeRadius, triV0, triV1, triV2, sScratch->enableCCD);
|
||||
|
||||
//this code still misses a logic from the triangle mesh near phase, which also selects the best contact
|
||||
//if there are more than MaxStaticContactsPerMesh
|
||||
if (intersect && contactCounts < PxgParticleContactInfo::MaxStaticContactsPerMesh)
|
||||
{
|
||||
PxVec3 normal = heightfieldTransform.rotate(normalShapeSpace);
|
||||
sPerParticleTempContacts[contactCounts++] = make_float4(normal.x, normal.y, normal.z, distance - restDist);
|
||||
}
|
||||
}
|
||||
}
|
||||
}//end of totalNumProcessed
|
||||
|
||||
contactCounts = PxMin(contactCounts, PxgParticleContactInfo::MaxStaticContactsPerMesh);
|
||||
PxU32 contactWritten = 0;
|
||||
if (sScratch->rigidId.isStaticBody())
|
||||
{
|
||||
if (contactCounts)
|
||||
{
|
||||
PxU32 contactIndex = (PxU32)(atomicAdd((PxI32*)&sScratch->oneWayContactCounts[particleIndex], (PxI32)contactCounts));
|
||||
PxU32 endIndex = PxMin(contactIndex + contactCounts, PxgParticleContactInfo::MaxStaticContactsPerParticle);
|
||||
for (PxU32 i = contactIndex, src = 0; i < endIndex; i++, src++)
|
||||
{
|
||||
const PxU32 outputIndex = particleIndex + i * numParticles;
|
||||
sScratch->oneWayContactInfos[outputIndex].mNormal_PenW = sPerParticleTempContacts[src];
|
||||
sScratch->oneWayContactNodeIndices[outputIndex] = sScratch->rigidId;
|
||||
}
|
||||
}
|
||||
contactWritten = PxMin(contactCounts, PxgParticleContactInfo::MaxStaticContactsPerParticle);
|
||||
sScratch->oneWayContactCounts[particleIndex] = contactWritten;
|
||||
|
||||
}
|
||||
|
||||
PxU32 remaining = contactCounts - contactWritten;
|
||||
if ((remaining > 0) && !isDiffuse)
|
||||
{
|
||||
PxU64 compressedParticleIndex = PxEncodeParticleIndex(sScratch->particleSystemId, particleIndex);
|
||||
PxU32 contactStartIndex = atomicAdd(writer.numTotalContacts, remaining);
|
||||
PxU32 contactEndIndex = contactStartIndex + remaining;
|
||||
for (PxU32 contactIndex = contactStartIndex, src = 0; contactIndex < contactEndIndex; contactIndex++, src++)
|
||||
{
|
||||
float4 contact = sPerParticleTempContacts[src];
|
||||
writer.writeContact(contactIndex, PxVec4(contact.x, contact.y, contact.z, contact.w), compressedParticleIndex, sScratch->rigidId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern "C" __global__ void ps_heightfieldCollisonLaunch(
|
||||
const bool isTGS,
|
||||
const PxReal toleranceLength,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxReal* PX_RESTRICT restDistances,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
PxgParticleSystem* PX_RESTRICT particleSystems,
|
||||
PxNodeIndex* PX_RESTRICT shapeToRigidRemapTable,
|
||||
PxgParticleContactWriter writer
|
||||
)
|
||||
{
|
||||
__shared__ __align__(16) PxU8 sData[sizeof(PsHeightfieldScratch)];
|
||||
PsHeightfieldScratch& sHeightfieldScratch = reinterpret_cast<PsHeightfieldScratch&>(sData);
|
||||
|
||||
__shared__ float4 sTempContacts[PxgParticleSystemKernelBlockDim::PS_HEIGHTFIELD_COLLISION][PxgParticleContactInfo::MaxStaticContactsPerMesh];
|
||||
|
||||
unsigned int cmIdx = blockIdx.y;
|
||||
|
||||
const PxU32 warpIndex = threadIdx.x / WARP_SIZE;
|
||||
|
||||
const bool isDiffuseParticlesThread = blockIdx.z == 1;
|
||||
|
||||
if (warpIndex == 0)
|
||||
{
|
||||
const PxU32 threadIndexInWarp = threadIdx.x & (WARP_SIZE - 1);
|
||||
|
||||
PxgShape particleShape, heightfieldShape;
|
||||
PxU32 particleCacheRef, heightfieldCacheRef;
|
||||
LoadShapePairWarp<PxGeometryType::ePARTICLESYSTEM, PxGeometryType::eHEIGHTFIELD>(cmInputs, cmIdx, gpuShapes,
|
||||
particleShape, particleCacheRef, heightfieldShape, heightfieldCacheRef);
|
||||
|
||||
PxsCachedTransform heightfieldTransformCache;
|
||||
PxsCachedTransform_ReadWarp(heightfieldTransformCache, transformCache + heightfieldCacheRef);
|
||||
|
||||
const PxReal contactDist = contactDistance[particleCacheRef] + contactDistance[heightfieldCacheRef];
|
||||
|
||||
const PxNodeIndex rigidId = shapeToRigidRemapTable[heightfieldCacheRef];
|
||||
|
||||
const PxU32 particleSystemId = particleShape.particleOrSoftbodyId;
|
||||
PxgParticleSystem& particleSystem = particleSystems[particleSystemId];
|
||||
|
||||
if (threadIndexInWarp == 0)
|
||||
{
|
||||
sHeightfieldScratch.heightfieldTransform = heightfieldTransformCache.transform;
|
||||
sHeightfieldScratch.heightfieldShape = heightfieldShape;
|
||||
sHeightfieldScratch.rigidId = rigidId;
|
||||
sHeightfieldScratch.transformCacheRef = heightfieldCacheRef;
|
||||
sHeightfieldScratch.contactDist = contactDist;
|
||||
sHeightfieldScratch.restDist = restDistances[cmIdx];
|
||||
sHeightfieldScratch.enableCCD = (particleSystem.mData.mFlags & PxParticleFlag::eENABLE_SPECULATIVE_CCD) > 0;
|
||||
|
||||
if (isDiffuseParticlesThread)
|
||||
{
|
||||
// If there is no diffuse particles or is not using PBD, disactivate collision handling from this kernel
|
||||
const bool isDiffuseParticlesActive = (particleSystem.mCommonData.mMaxDiffuseParticles > 0);
|
||||
sHeightfieldScratch.currentPositions = particleSystem.mDiffuseSortedOriginPos_LifeTime;
|
||||
sHeightfieldScratch.predictedPositions = particleSystem.mDiffuseSortedPos_LifeTime;
|
||||
sHeightfieldScratch.numParticles = (isDiffuseParticlesActive) ? *particleSystem.mNumDiffuseParticles : 0;
|
||||
sHeightfieldScratch.oneWayContactInfos = particleSystem.mDiffuseOneWayContactInfos;
|
||||
sHeightfieldScratch.oneWayContactNodeIndices = particleSystem.mDiffuseOneWayNodeIndex;
|
||||
sHeightfieldScratch.oneWayContactCounts = particleSystem.mDiffuseOneWayContactCount;
|
||||
sHeightfieldScratch.particleSystemId = particleSystemId;
|
||||
}
|
||||
else
|
||||
{
|
||||
sHeightfieldScratch.currentPositions = particleSystem.mSortedOriginPos_InvMass;
|
||||
sHeightfieldScratch.predictedPositions = particleSystem.mSortedPositions_InvMass;
|
||||
sHeightfieldScratch.numParticles = particleSystem.mCommonData.mNumParticles;
|
||||
sHeightfieldScratch.oneWayContactInfos = particleSystem.mOneWayContactInfos;
|
||||
sHeightfieldScratch.oneWayContactNodeIndices = particleSystem.mOneWayNodeIndex;
|
||||
sHeightfieldScratch.oneWayContactCounts = particleSystem.mOneWayContactCount;
|
||||
sHeightfieldScratch.particleSystemId = particleSystemId;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
const PxU32 numParticles = sHeightfieldScratch.numParticles;
|
||||
|
||||
if (isDiffuseParticlesThread && numParticles == 0)
|
||||
return;
|
||||
|
||||
for (PxU32 globalThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; globalThreadIndex < numParticles; globalThreadIndex += gridDim.x * blockDim.x)
|
||||
{
|
||||
psHeightfieldMidphaseCollision(
|
||||
&sHeightfieldScratch,
|
||||
sTempContacts[threadIdx.x],
|
||||
numParticles,
|
||||
globalThreadIndex,
|
||||
isDiffuseParticlesThread,
|
||||
isTGS,
|
||||
writer
|
||||
);
|
||||
}
|
||||
}
|
||||
805
engine/third_party/physx/source/gpunarrowphase/src/CUDA/particleSystemMeshMidphase.cu
vendored
Normal file
805
engine/third_party/physx/source/gpunarrowphase/src/CUDA/particleSystemMeshMidphase.cu
vendored
Normal file
@@ -0,0 +1,805 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include "GuIntersectionTriangleBoxRef.h"
|
||||
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxQuat.h"
|
||||
#include "foundation/PxTransform.h"
|
||||
#include "geometry/PxMeshScale.h"
|
||||
#include "geometry/PxGeometry.h"
|
||||
|
||||
#include "cudaNpCommon.h"
|
||||
#include "PxgPersistentContactManifold.h"
|
||||
#include "PxgContactManager.h"
|
||||
#include "PxgConvexConvexShape.h"
|
||||
#include "PxsContactManagerState.h"
|
||||
#include "PxsTransformCache.h"
|
||||
#include "PxgParticleSystem.h"
|
||||
#include "PxgParticleSystemCore.h"
|
||||
#include "PxgParticleSystemCoreKernelIndices.h"
|
||||
#include "PxgNpKernelIndices.h"
|
||||
#include "PxsMaterialCore.h"
|
||||
|
||||
#include "PxgCommonDefines.h"
|
||||
#include "utils.cuh"
|
||||
#include "deformableElementFilter.cuh"
|
||||
#include "PxNodeIndex.h"
|
||||
#include "sdfCollision.cuh"
|
||||
#include "gridCal.cuh"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
extern "C" __host__ void initNarrowphaseKernels11() {}
|
||||
|
||||
#include "warpHelpers.cuh"
|
||||
#include "manifold.cuh"
|
||||
#include "vector_functions.h"
|
||||
|
||||
#include "reduction.cuh"
|
||||
#include "PxgSimulationCoreDesc.h"
|
||||
|
||||
#include "bv32Traversal.cuh"
|
||||
#include "triangleMesh.cuh"
|
||||
#include "particleCollision.cuh"
|
||||
|
||||
|
||||
PX_ALIGN_PREFIX(16)
|
||||
struct PsMidphaseScratch
|
||||
{
|
||||
const float4* PX_RESTRICT trimeshVerts;
|
||||
const uint4* PX_RESTRICT trimeshTriIndices;
|
||||
|
||||
PxTransform meshToWorld;
|
||||
PxMeshScale trimeshScale;
|
||||
|
||||
//bv32 tree
|
||||
const Gu::BV32DataPacked* bv32PackedNodes;
|
||||
|
||||
//stack for traversal
|
||||
int sBv32Nodes[192]; //6 depth of the bv32 tree
|
||||
PxU32 sBv32ActiveParticles[192];
|
||||
PxVec3 shLocalContacts[32][PxgParticleContactInfo::MaxStaticContactsPerMesh];
|
||||
PxReal shLocalContactDepths[32][PxgParticleContactInfo::MaxStaticContactsPerMesh];
|
||||
|
||||
float4* currentPositions;
|
||||
float4* predictedPositions;
|
||||
bool enableCCD;
|
||||
PxU8 pad[7];
|
||||
}PX_ALIGN_SUFFIX(16);
|
||||
|
||||
|
||||
struct PsTreeContactGenTraverser
|
||||
{
|
||||
PsMidphaseScratch* s_warpScratch;
|
||||
const PxU32 globalParticleIndex;
|
||||
const PxU32 numParticles;
|
||||
PxVec3 cVolumePosVertexSpace;
|
||||
PxReal cVolumeRadius;
|
||||
PxVec3 cVolumePosShapeSpace;
|
||||
PxVec3 particlePosShapeSpace;
|
||||
PxU32 currentNodeActiveParticles;
|
||||
PxU32 contactCount;
|
||||
|
||||
PX_FORCE_INLINE __device__ PsTreeContactGenTraverser(
|
||||
PsMidphaseScratch* s_warpScratch,
|
||||
const PxU32 numParticles,
|
||||
const PxU32 globalParticleIndex,
|
||||
const bool isDiffuse,
|
||||
const bool isTGS,
|
||||
const PxReal contactDist
|
||||
)
|
||||
: s_warpScratch(s_warpScratch)
|
||||
, numParticles(numParticles)
|
||||
, globalParticleIndex(globalParticleIndex)
|
||||
, contactCount(0)
|
||||
{
|
||||
if (globalParticleIndex < numParticles)
|
||||
{
|
||||
const PxMeshScale trimeshScale = s_warpScratch->trimeshScale;
|
||||
const PxTransform meshToWorld = s_warpScratch->meshToWorld;
|
||||
const PxVec3 currentPos = PxLoad3(s_warpScratch->currentPositions[globalParticleIndex]);
|
||||
particlePosShapeSpace = meshToWorld.transformInv(currentPos);
|
||||
|
||||
if (s_warpScratch->enableCCD)
|
||||
{
|
||||
const PxVec3 predictedPos = PxLoad3(s_warpScratch->predictedPositions[globalParticleIndex]);
|
||||
PxVec3 cVolumePos;
|
||||
cVolumeRadius = getParticleSpeculativeContactVolume(cVolumePos, currentPos, predictedPos, contactDist, isDiffuse, isTGS);
|
||||
cVolumePosShapeSpace = meshToWorld.transformInv(cVolumePos);
|
||||
}
|
||||
else
|
||||
{
|
||||
cVolumePosShapeSpace = particlePosShapeSpace;
|
||||
cVolumeRadius = contactDist;
|
||||
}
|
||||
cVolumePosVertexSpace = shape2Vertex(cVolumePosShapeSpace, trimeshScale.scale, trimeshScale.rotation);
|
||||
}
|
||||
|
||||
//intiailize all valid particles as active particle
|
||||
currentNodeActiveParticles = __ballot_sync(FULL_MASK, globalParticleIndex < numParticles);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ bool intersectBoxFullWarp(bool hasBox, const PxVec3& cMin, const PxVec3& cMax) const
|
||||
{
|
||||
const PxMeshScale trimeshScale = s_warpScratch->trimeshScale;
|
||||
|
||||
PxVec3 sCVolumePosVertexSpace;
|
||||
PxReal sCVolumeRadius;
|
||||
|
||||
PxU32 newActiveParticles = 0;
|
||||
for (PxU32 i = currentNodeActiveParticles; i; i = clearLowestSetBit(i))
|
||||
{
|
||||
const PxU32 pIndexInWarp = (i == 0) ? 0 : lowestSetIndex(i);
|
||||
|
||||
sCVolumePosVertexSpace.x = __shfl_sync(FULL_MASK, cVolumePosVertexSpace.x, pIndexInWarp);
|
||||
sCVolumePosVertexSpace.y = __shfl_sync(FULL_MASK, cVolumePosVertexSpace.y, pIndexInWarp);
|
||||
sCVolumePosVertexSpace.z = __shfl_sync(FULL_MASK, cVolumePosVertexSpace.z, pIndexInWarp);
|
||||
sCVolumeRadius = __shfl_sync(FULL_MASK, cVolumeRadius, pIndexInWarp);
|
||||
|
||||
if (hasBox)
|
||||
{
|
||||
const PxVec3 closestVertexSpace = cMin.maximum(cMax.minimum(sCVolumePosVertexSpace));
|
||||
const PxVec3 distVecVertexSpace = closestVertexSpace - sCVolumePosVertexSpace;
|
||||
const PxVec3 distVec = vertex2Shape(distVecVertexSpace, trimeshScale.scale, trimeshScale.rotation);
|
||||
const PxReal distSq = distVec.magnitudeSquared();
|
||||
|
||||
if (distSq < sCVolumeRadius*sCVolumeRadius)
|
||||
{
|
||||
newActiveParticles |= 1 << pIndexInWarp;
|
||||
}
|
||||
}
|
||||
}
|
||||
return newActiveParticles != 0;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ void intersectPrimitiveFullWarp(PxU32 primitiveIndex, PxU32 idxInWarp)
|
||||
{
|
||||
const PxMeshScale trimeshScale = s_warpScratch->trimeshScale;
|
||||
const PxTransform meshToWorld = s_warpScratch->meshToWorld;
|
||||
|
||||
PxVec3 sParticlePosShapeSpace;
|
||||
PxVec3 sCVolumePosShapeSpace;
|
||||
PxReal sCVolumeRadius;
|
||||
|
||||
for (PxU32 i = currentNodeActiveParticles; i; i = clearLowestSetBit(i))
|
||||
{
|
||||
const PxU32 pIndexInWarp = (i == 0) ? 0 : lowestSetIndex(i);
|
||||
|
||||
sParticlePosShapeSpace.x = __shfl_sync(FULL_MASK, particlePosShapeSpace.x, pIndexInWarp);
|
||||
sParticlePosShapeSpace.y = __shfl_sync(FULL_MASK, particlePosShapeSpace.y, pIndexInWarp);
|
||||
sParticlePosShapeSpace.z = __shfl_sync(FULL_MASK, particlePosShapeSpace.z, pIndexInWarp);
|
||||
|
||||
sCVolumePosShapeSpace.x = __shfl_sync(FULL_MASK, cVolumePosShapeSpace.x, pIndexInWarp);
|
||||
sCVolumePosShapeSpace.y = __shfl_sync(FULL_MASK, cVolumePosShapeSpace.y, pIndexInWarp);
|
||||
sCVolumePosShapeSpace.z = __shfl_sync(FULL_MASK, cVolumePosShapeSpace.z, pIndexInWarp);
|
||||
sCVolumeRadius = __shfl_sync(FULL_MASK, cVolumeRadius, pIndexInWarp);
|
||||
|
||||
bool intersect = false;
|
||||
PxVec3 normal(0.f);
|
||||
PxReal distance = PX_MAX_F32;
|
||||
PxU32 triangleIdx = 0xffffffff;
|
||||
|
||||
if (primitiveIndex != 0xFFFFFFFF)
|
||||
{
|
||||
triangleIdx = primitiveIndex;
|
||||
|
||||
uint4 triIdx = s_warpScratch->trimeshTriIndices[triangleIdx];
|
||||
|
||||
PxVec3 triV0, triV1, triV2;
|
||||
triV0 = vertex2Shape(PxLoad3(s_warpScratch->trimeshVerts[triIdx.x]), trimeshScale.scale, trimeshScale.rotation);
|
||||
triV1 = vertex2Shape(PxLoad3(s_warpScratch->trimeshVerts[triIdx.y]), trimeshScale.scale, trimeshScale.rotation);
|
||||
triV2 = vertex2Shape(PxLoad3(s_warpScratch->trimeshVerts[triIdx.z]), trimeshScale.scale, trimeshScale.rotation);
|
||||
|
||||
MeshScaling meshScale(s_warpScratch->trimeshScale.scale, s_warpScratch->trimeshScale.rotation);
|
||||
|
||||
if (meshScale.flipNormal)
|
||||
PxSwap(triV1, triV2);
|
||||
|
||||
PxVec3 normalShapeSpace;
|
||||
intersect = particleTriangleTest(normalShapeSpace, distance,
|
||||
sParticlePosShapeSpace, sCVolumePosShapeSpace, sCVolumeRadius, triV0, triV1, triV2, s_warpScratch->enableCCD);
|
||||
|
||||
if (intersect)
|
||||
{
|
||||
normal = meshToWorld.rotate(normalShapeSpace);
|
||||
}
|
||||
}
|
||||
|
||||
//push the triangle into stack ptr
|
||||
PxU32 resultWarp = __ballot_sync(FULL_MASK, intersect);
|
||||
PxU32 offset = warpScanExclusive(resultWarp, idxInWarp);
|
||||
PxU32 validCount = __popc(resultWarp);
|
||||
|
||||
//However many contacts the particle already had...
|
||||
PxU32 contactStartIndex = __shfl_sync(FULL_MASK, contactCount, pIndexInWarp);
|
||||
|
||||
if (idxInWarp == pIndexInWarp)
|
||||
contactCount += validCount;
|
||||
|
||||
bool failedWrite = false;
|
||||
if (intersect)
|
||||
{
|
||||
const PxU32 index = contactStartIndex + offset;
|
||||
|
||||
if (index < PxgParticleContactInfo::MaxStaticContactsPerMesh)
|
||||
{
|
||||
//If we have space for this contact (otherwise, we drop it!)
|
||||
s_warpScratch->shLocalContacts[pIndexInWarp][index] = normal;
|
||||
s_warpScratch->shLocalContactDepths[pIndexInWarp][index] = distance;
|
||||
}
|
||||
else
|
||||
failedWrite = true;
|
||||
}
|
||||
|
||||
PxU32 failMask = __ballot_sync(FULL_MASK, failedWrite);
|
||||
|
||||
if (failMask)
|
||||
{
|
||||
PxReal outSep = -PX_MAX_F32;
|
||||
if (idxInWarp < PxgParticleContactInfo::MaxStaticContactsPerMesh)
|
||||
outSep = s_warpScratch->shLocalContactDepths[pIndexInWarp][idxInWarp];
|
||||
|
||||
for (PxU32 i = failMask; i != 0; i = clearLowestSetBit(i))
|
||||
{
|
||||
PxU32 index = lowestSetIndex(i);
|
||||
PxReal depth = __shfl_sync(FULL_MASK, distance, index);
|
||||
|
||||
PxU32 candidateSwapMask = __ballot_sync(FULL_MASK, depth < outSep);
|
||||
if (candidateSwapMask)
|
||||
{
|
||||
PxReal bestVal;
|
||||
PxU32 bestIndex = maxIndex(outSep, FULL_MASK, bestVal);
|
||||
|
||||
if (idxInWarp == index)
|
||||
{
|
||||
s_warpScratch->shLocalContacts[pIndexInWarp][bestIndex] = normal;
|
||||
s_warpScratch->shLocalContactDepths[pIndexInWarp][bestIndex] = distance;
|
||||
}
|
||||
if (idxInWarp == bestIndex)
|
||||
outSep = depth;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (idxInWarp == pIndexInWarp)
|
||||
contactCount = PxgParticleContactInfo::MaxStaticContactsPerMesh;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ void finalizeFullWarp(
|
||||
PxU32 idxInWarp,
|
||||
const PxU32 particleSystemId,
|
||||
const PxNodeIndex rigidId,
|
||||
PxgParticleContactInfo* PX_RESTRICT oneWayContactInfos, //output
|
||||
PxNodeIndex* PX_RESTRICT oneWayContactNodeIndices, //output
|
||||
PxU32* PX_RESTRICT oneWayContactCounts, //output
|
||||
PxgParticleContactWriter& writer,
|
||||
const bool isDiffuseParticlesThread,
|
||||
const PxReal restDist
|
||||
)
|
||||
{
|
||||
//__syncwarp();
|
||||
|
||||
PxU32 id = 0;
|
||||
if (globalParticleIndex < numParticles)
|
||||
{
|
||||
if (rigidId.isStaticBody())
|
||||
{
|
||||
if (contactCount != 0)
|
||||
{
|
||||
PxU32 startIndex = PxU32(atomicAdd((PxI32*)&oneWayContactCounts[globalParticleIndex], PxI32(contactCount)));
|
||||
|
||||
PxU32 endIndex = PxMin(PxgParticleContactInfo::MaxStaticContactsPerParticle, startIndex + contactCount);
|
||||
|
||||
for (PxU32 i = startIndex; i < endIndex; i++, id++)
|
||||
{
|
||||
const PxVec3& normal = s_warpScratch->shLocalContacts[threadIdx.x][id];
|
||||
PxReal penetration = s_warpScratch->shLocalContactDepths[threadIdx.x][id] - restDist;
|
||||
const PxU32 outputIndex = globalParticleIndex + i * numParticles;
|
||||
oneWayContactInfos[outputIndex].mNormal_PenW = make_float4(normal.x, normal.y, normal.z, penetration);
|
||||
oneWayContactNodeIndices[outputIndex] = rigidId;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PxU32 remaining = contactCount - id;
|
||||
|
||||
if (__any_sync(FULL_MASK, remaining))
|
||||
{
|
||||
|
||||
PxU32 count = warpScan<AddOpPxU32, PxU32>(FULL_MASK, remaining);
|
||||
|
||||
PxU32 startIndex = 0xFFFFFFFF;
|
||||
if (idxInWarp == 31)
|
||||
{
|
||||
startIndex = atomicAdd(writer.numTotalContacts, count);
|
||||
}
|
||||
|
||||
startIndex = __shfl_sync(FULL_MASK, startIndex, 31);
|
||||
|
||||
|
||||
if (globalParticleIndex < numParticles)
|
||||
{
|
||||
startIndex = startIndex + count - remaining;
|
||||
|
||||
if (remaining && !isDiffuseParticlesThread)
|
||||
{
|
||||
PxU64 compressedParticleIndex = PxEncodeParticleIndex(particleSystemId, globalParticleIndex);
|
||||
|
||||
for (PxU32 i = id, index = startIndex; i < contactCount; ++i, ++index)
|
||||
{
|
||||
const PxVec3& normal = s_warpScratch->shLocalContacts[idxInWarp][i];
|
||||
PxReal penetration = s_warpScratch->shLocalContactDepths[idxInWarp][i] - restDist;
|
||||
writer.writeContact(index, PxVec4(normal, penetration), compressedParticleIndex, rigidId);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
__device__ static inline void psSdfMeshMidphaseCollision(
|
||||
const PxReal toleranceLength,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxReal* PX_RESTRICT restDistances,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
PxgParticleSystem* particleSystems,
|
||||
PxNodeIndex* PX_RESTRICT shapeToRigidRemapTable,
|
||||
|
||||
PxgParticleContactWriter& writer
|
||||
)
|
||||
{
|
||||
__shared__ __align__(16) char sMesh[sizeof(PxgTriangleMesh)];
|
||||
PxgTriangleMesh& triangleMesh = reinterpret_cast<PxgTriangleMesh&>(*sMesh);
|
||||
|
||||
__shared__ __align__(16) char sSdfTexture[sizeof(SparseSDFTexture)];
|
||||
SparseSDFTexture& sdfTexture = reinterpret_cast<SparseSDFTexture&>(*sSdfTexture);
|
||||
|
||||
__shared__ PxU32 triangleIndicesS[1024];
|
||||
|
||||
|
||||
unsigned int cmIdx = blockIdx.x;
|
||||
const bool isDiffuseParticlesThread = blockIdx.y == 1;
|
||||
|
||||
|
||||
PxgShape trimeshShape, shape;
|
||||
PxU32 trimeshTransformId, particleTransformId;
|
||||
LoadShapePair<PxGeometryType::eTRIANGLEMESH, PxGeometryType::ePARTICLESYSTEM>(cmInputs, cmIdx, gpuShapes,
|
||||
trimeshShape, trimeshTransformId, shape, particleTransformId);
|
||||
|
||||
PxsCachedTransform trimeshTransformCached;
|
||||
PxsCachedTransform_ReadWarp(trimeshTransformCached, transformCache + trimeshTransformId);
|
||||
|
||||
|
||||
const PxReal contactDist = contactDistance[trimeshTransformId] + contactDistance[particleTransformId];
|
||||
const PxReal restDist = restDistances[cmIdx];
|
||||
|
||||
const PxNodeIndex rigidId = shapeToRigidRemapTable[trimeshTransformId];
|
||||
|
||||
const PxU32 particleSystemId = shape.particleOrSoftbodyId;
|
||||
PxgParticleSystem& particleSystem = particleSystems[particleSystemId];
|
||||
|
||||
// If there is no diffuse particles or is not using PBD, skip this function
|
||||
if (isDiffuseParticlesThread && (particleSystem.mCommonData.mMaxDiffuseParticles == 0))
|
||||
return;
|
||||
|
||||
|
||||
if (threadIdx.x < 32)
|
||||
{
|
||||
readTriangleMesh(trimeshShape, triangleMesh);
|
||||
__syncwarp();
|
||||
}
|
||||
|
||||
float4* positions;
|
||||
float4* positionsPrev;
|
||||
if (isDiffuseParticlesThread)
|
||||
{
|
||||
positions = reinterpret_cast<float4*>(particleSystem.mDiffuseSortedPos_LifeTime);
|
||||
positionsPrev = reinterpret_cast<float4*>(particleSystem.mDiffuseSortedOriginPos_LifeTime);
|
||||
}
|
||||
else
|
||||
{
|
||||
positions = reinterpret_cast<float4*>(particleSystem.mSortedPositions_InvMass);
|
||||
positionsPrev = reinterpret_cast<float4*>(particleSystem.mSortedOriginPos_InvMass);
|
||||
}
|
||||
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
sdfTexture.initialize(triangleMesh); //The texture is stored in shared memory - only one threads needs to initialize it
|
||||
|
||||
__syncthreads();
|
||||
|
||||
PxU32 numParticles;
|
||||
PxgParticleContactInfo* PX_RESTRICT oneWayContactInfos;
|
||||
PxNodeIndex* PX_RESTRICT oneWayNodeIndices;
|
||||
PxU32* PX_RESTRICT oneWayContactCounts;
|
||||
|
||||
if (isDiffuseParticlesThread)
|
||||
{
|
||||
numParticles = *particleSystem.mNumDiffuseParticles;
|
||||
oneWayContactInfos = particleSystem.mDiffuseOneWayContactInfos;
|
||||
oneWayNodeIndices = particleSystem.mDiffuseOneWayNodeIndex;
|
||||
oneWayContactCounts = particleSystem.mDiffuseOneWayContactCount;
|
||||
}
|
||||
else
|
||||
{
|
||||
numParticles = particleSystem.mCommonData.mNumParticles;
|
||||
oneWayContactInfos = particleSystem.mOneWayContactInfos;
|
||||
oneWayNodeIndices = particleSystem.mOneWayNodeIndex;
|
||||
oneWayContactCounts = particleSystem.mOneWayContactCount;
|
||||
}
|
||||
|
||||
const PxU32* PX_RESTRICT gridParticleIndex = particleSystem.mSortedToUnsortedMapping;
|
||||
|
||||
PxReal cullScale = contactDist;
|
||||
cullScale /= trimeshShape.scale.scale.abs().minElement();
|
||||
|
||||
const PxTransform& trimeshToWorld = trimeshTransformCached.transform;
|
||||
const PxTransform particleToTrimeshTransform = trimeshToWorld.getInverse();
|
||||
|
||||
bool enableCCD = (particleSystem.mData.mFlags & PxParticleFlag::eENABLE_SPECULATIVE_CCD) > 0;
|
||||
|
||||
for (PxU32 i = 0; i < numParticles;)
|
||||
{
|
||||
PxU32 nbFoundVertices = findInterestingVertices<32, 1024>(numParticles, positions,
|
||||
trimeshShape.scale, cullScale, sdfTexture, particleToTrimeshTransform, i, triangleIndicesS);
|
||||
|
||||
bool candidateContact = false;
|
||||
PxVec3 pos;
|
||||
PxVec3 normal;
|
||||
PxReal distance;
|
||||
PxReal finalSep;
|
||||
PxVec3 posPrev;
|
||||
PxReal startDist; //distPrev
|
||||
|
||||
PxU32 ind = threadIdx.x < nbFoundVertices ? triangleIndicesS[threadIdx.x] : numParticles;
|
||||
|
||||
if (ind < numParticles)
|
||||
{
|
||||
pos = particleToTrimeshTransform.transform(PxLoad3(positions[ind]));
|
||||
PxVec3 v = shape2Vertex(pos, trimeshShape.scale.scale, trimeshShape.scale.rotation);
|
||||
|
||||
posPrev = particleToTrimeshTransform.transform(PxLoad3(positionsPrev[ind]));
|
||||
|
||||
|
||||
PxVec3 dirPrev;
|
||||
startDist = doVertexSDFCollision(sdfTexture, shape2Vertex(posPrev, trimeshShape.scale.scale, trimeshShape.scale.rotation), dirPrev, cullScale);
|
||||
PxVec3 dir;
|
||||
distance = doVertexSDFCollision(sdfTexture, v, dir, cullScale);
|
||||
|
||||
if (distance < cullScale || startDist < cullScale)
|
||||
{
|
||||
dir = vertex2ShapeNormalVector(dir, trimeshShape.scale.scale, trimeshShape.scale.rotation);
|
||||
dirPrev = vertex2ShapeNormalVector(dirPrev, trimeshShape.scale.scale, trimeshShape.scale.rotation);
|
||||
|
||||
PxReal m = dir.magnitudeSquared();
|
||||
if (m > 0.0f)
|
||||
{
|
||||
m = 1.0f / PxSqrt(m);
|
||||
distance = distance * m;
|
||||
startDist = startDist * m;
|
||||
dir = dir * m;
|
||||
dirPrev = dirPrev * m;
|
||||
}
|
||||
|
||||
/*candidateContact = distance < contactDist;
|
||||
|
||||
if (candidateContact)
|
||||
{
|
||||
sep = sep;
|
||||
normal = dir;
|
||||
}*/
|
||||
|
||||
if (startDist < contactDist)
|
||||
{
|
||||
normal = dirPrev;
|
||||
finalSep = startDist - restDist;
|
||||
candidateContact = true;
|
||||
}
|
||||
if (!candidateContact && distance > 0.f)
|
||||
{
|
||||
if (distance < contactDist)
|
||||
{
|
||||
normal = dir;
|
||||
finalSep = (distance - restDist) + normal.dot(posPrev - pos);
|
||||
candidateContact = true;
|
||||
}
|
||||
}
|
||||
if (enableCCD && !candidateContact)
|
||||
{
|
||||
if (distance < 0.f)
|
||||
{
|
||||
PxVec3 d = (pos - posPrev).getNormalized();
|
||||
|
||||
PxReal displacement = -PxMax(0.f, startDist - restDist) / (dir.dot(d));
|
||||
|
||||
PxVec3 pt = posPrev + d * displacement;
|
||||
|
||||
distance = doVertexSDFCollision(sdfTexture, shape2Vertex(particleToTrimeshTransform.transform(pt), trimeshShape.scale.scale, trimeshShape.scale.rotation), dir, cullScale);
|
||||
dir = vertex2ShapeNormalVector(dir, trimeshShape.scale.scale, trimeshShape.scale.rotation);
|
||||
if (m > 0.0f)
|
||||
{
|
||||
distance = distance * m;
|
||||
dir = dir * m;
|
||||
}
|
||||
|
||||
if (distance < contactDist)
|
||||
{
|
||||
normal = dir;
|
||||
finalSep = (distance - restDist) + normal.dot(posPrev - pt);
|
||||
candidateContact = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const PxU64 particleMask = PxEncodeParticleIndex(particleSystemId, gridParticleIndex[ind]);
|
||||
|
||||
if (!isDiffuseParticlesThread && find(particleSystem, rigidId.getInd(), particleMask))
|
||||
candidateContact = false;
|
||||
}
|
||||
|
||||
|
||||
if (rigidId.isStaticBody())
|
||||
{
|
||||
if (candidateContact)
|
||||
{
|
||||
PxU32 startIndex = PxU32(atomicAdd((PxI32*)&oneWayContactCounts[ind], 1));
|
||||
|
||||
const PxU32 outputIndex = ind + (startIndex) * numParticles;
|
||||
if (startIndex < PxgParticleContactInfo::MaxStaticContactsPerParticle)
|
||||
{
|
||||
oneWayContactInfos[outputIndex].mNormal_PenW = make_float4(normal.x, normal.y, normal.z, finalSep/*sep - restDist + normal.dot(posPrev - pos)*/);
|
||||
oneWayNodeIndices[outputIndex] = rigidId;
|
||||
candidateContact = false; //Contact is stored, so it is not a candidate contact anymore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
PxU32 contactIndex = globalScanExclusive<32>(candidateContact, writer.numTotalContacts);
|
||||
|
||||
if (candidateContact && !isDiffuseParticlesThread)
|
||||
{
|
||||
normal = trimeshToWorld.rotate(normal);
|
||||
|
||||
PxVec4 normalPen(normal.x, normal.y, normal.z, finalSep/*sep - restDist + normal.dot(posPrev - pos)*/);
|
||||
|
||||
writer.writeContact(contactIndex, normalPen,
|
||||
PxEncodeParticleIndex(particleSystemId, ind), rigidId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
template<unsigned int WarpsPerBlock>
|
||||
__device__ static inline void psMeshMidphaseCollision(
|
||||
const bool isTGS,
|
||||
const PxReal toleranceLength,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxReal* PX_RESTRICT restDistances,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
PsMidphaseScratch* s_warpScratch,
|
||||
PxgParticleSystem* particleSystems,
|
||||
PxNodeIndex* PX_RESTRICT shapeToRigidRemapTable,
|
||||
PxgParticleContactWriter& writer
|
||||
)
|
||||
{
|
||||
//thread index in warp
|
||||
const unsigned int idxInWarp = threadIdx.x;
|
||||
//wrap index
|
||||
const unsigned int warpIdx = threadIdx.y;//(idx >> LOG2_WARP_SIZE);
|
||||
//wrap index in block
|
||||
//const unsigned int idx = idxInWarp + warpIdx * WARP_SIZE;
|
||||
|
||||
unsigned int cmIdx = blockIdx.y;
|
||||
const bool isDiffuseParticlesThread = blockIdx.z == 1;
|
||||
|
||||
|
||||
PxgShape trimeshShape, shape;
|
||||
PxU32 trimeshTransformId, particleTransformId;
|
||||
LoadShapePair<PxGeometryType::eTRIANGLEMESH, PxGeometryType::ePARTICLESYSTEM>(cmInputs, cmIdx, gpuShapes,
|
||||
trimeshShape, trimeshTransformId, shape, particleTransformId);
|
||||
|
||||
PxsCachedTransform trimeshTransformCached;
|
||||
PxsCachedTransform_ReadWarp(trimeshTransformCached, transformCache + trimeshTransformId);
|
||||
|
||||
const PxReal contactDist = contactDistance[trimeshTransformId] + contactDistance[particleTransformId];
|
||||
const PxReal restDist = restDistances[cmIdx];
|
||||
|
||||
const PxNodeIndex rigidId = shapeToRigidRemapTable[trimeshTransformId];
|
||||
|
||||
const PxU32 particleSystemId = shape.particleOrSoftbodyId;
|
||||
PxgParticleSystem& particleSystem = particleSystems[particleSystemId];
|
||||
|
||||
// If there is no diffuse particles or is not using PBD, skip this function
|
||||
if (isDiffuseParticlesThread && (particleSystem.mCommonData.mMaxDiffuseParticles == 0))
|
||||
return;
|
||||
|
||||
const PxU8 * trimeshGeomPtr = reinterpret_cast<const PxU8 *>(trimeshShape.hullOrMeshPtr);
|
||||
|
||||
if (idxInWarp == 0)
|
||||
{
|
||||
s_warpScratch->meshToWorld = trimeshTransformCached.transform;
|
||||
s_warpScratch->trimeshScale = trimeshShape.scale;
|
||||
|
||||
readTriangleMesh(trimeshGeomPtr, s_warpScratch->bv32PackedNodes, s_warpScratch->trimeshVerts, s_warpScratch->trimeshTriIndices);
|
||||
|
||||
if (isDiffuseParticlesThread)
|
||||
{
|
||||
s_warpScratch->currentPositions = reinterpret_cast<float4*>(particleSystem.mDiffuseSortedOriginPos_LifeTime);
|
||||
s_warpScratch->predictedPositions = reinterpret_cast<float4*>(particleSystem.mDiffuseSortedPos_LifeTime);
|
||||
}
|
||||
else
|
||||
{
|
||||
s_warpScratch->currentPositions = reinterpret_cast<float4*>(particleSystem.mSortedOriginPos_InvMass);
|
||||
s_warpScratch->predictedPositions = reinterpret_cast<float4*>(particleSystem.mSortedPositions_InvMass);
|
||||
}
|
||||
s_warpScratch->enableCCD = (particleSystem.mData.mFlags & PxParticleFlag::eENABLE_SPECULATIVE_CCD) > 0;
|
||||
}
|
||||
|
||||
__syncwarp();
|
||||
|
||||
PxU32 numParticles;
|
||||
PxgParticleContactInfo* PX_RESTRICT oneWayContactInfos;
|
||||
PxNodeIndex* PX_RESTRICT oneWayNodeIndices;
|
||||
PxU32* PX_RESTRICT oneWayContactCounts;
|
||||
|
||||
if (isDiffuseParticlesThread)
|
||||
{
|
||||
numParticles = *particleSystem.mNumDiffuseParticles;
|
||||
oneWayContactInfos = particleSystem.mDiffuseOneWayContactInfos;
|
||||
oneWayNodeIndices = particleSystem.mDiffuseOneWayNodeIndex;
|
||||
oneWayContactCounts = particleSystem.mDiffuseOneWayContactCount;
|
||||
}
|
||||
else
|
||||
{
|
||||
numParticles = particleSystem.mCommonData.mNumParticles;
|
||||
oneWayContactInfos = particleSystem.mOneWayContactInfos;
|
||||
oneWayNodeIndices = particleSystem.mOneWayNodeIndex;
|
||||
oneWayContactCounts = particleSystem.mOneWayContactCount;
|
||||
}
|
||||
|
||||
const PxU32 nbWarpsRequired = (numParticles + WARP_SIZE - 1) / WARP_SIZE;
|
||||
const PxU32 totalNumWarps = PxgParticleSystemKernelGridDim::PS_MESH_COLLISION * PS_MIDPHASE_COLLISION_WAPRS_PER_BLOCK;
|
||||
const PxU32 nbIterationsPerWarps = (nbWarpsRequired + totalNumWarps - 1) / totalNumWarps;
|
||||
const PxU32* PX_RESTRICT gridParticleIndex = particleSystem.mSortedToUnsortedMapping;
|
||||
|
||||
for (PxU32 i = 0; i < nbIterationsPerWarps; ++i)
|
||||
{
|
||||
const PxU32 workIndex = i + (warpIdx + WarpsPerBlock * blockIdx.x) * nbIterationsPerWarps;
|
||||
|
||||
if (workIndex < nbWarpsRequired)
|
||||
{
|
||||
const PxU32 particleIndex = idxInWarp + workIndex * WARP_SIZE;
|
||||
const PxU64 particleMask = PxEncodeParticleIndex(particleSystemId, gridParticleIndex[particleIndex]);
|
||||
|
||||
if (!isDiffuseParticlesThread && find(particleSystem, rigidId.getInd(), particleMask))
|
||||
return;
|
||||
|
||||
PsTreeContactGenTraverser traverser(
|
||||
s_warpScratch,
|
||||
numParticles,
|
||||
particleIndex,
|
||||
isDiffuseParticlesThread,
|
||||
isTGS,
|
||||
contactDist
|
||||
);
|
||||
bv32TreeTraversal<PsTreeContactGenTraverser, WarpsPerBlock>(s_warpScratch->bv32PackedNodes, s_warpScratch->sBv32Nodes, traverser);
|
||||
|
||||
traverser.finalizeFullWarp(idxInWarp, particleSystemId,
|
||||
rigidId,
|
||||
oneWayContactInfos,
|
||||
oneWayNodeIndices,
|
||||
oneWayContactCounts,
|
||||
writer,
|
||||
isDiffuseParticlesThread,
|
||||
restDist
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern "C" __global__
|
||||
__launch_bounds__(1024, 1)
|
||||
void ps_sdfMeshCollisonLaunch(
|
||||
const PxReal tolerenceLength,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxReal* PX_RESTRICT restDistances,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
|
||||
PxgParticleSystem* PX_RESTRICT particleSystems, //output
|
||||
PxNodeIndex* PX_RESTRICT shapeToRigidRemapTable,
|
||||
|
||||
PxgParticleContactWriter writer
|
||||
)
|
||||
{
|
||||
psSdfMeshMidphaseCollision(
|
||||
tolerenceLength,
|
||||
cmInputs,
|
||||
transformCache,
|
||||
contactDistance,
|
||||
restDistances,
|
||||
gpuShapes,
|
||||
particleSystems,
|
||||
shapeToRigidRemapTable,
|
||||
writer
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
extern "C" __global__
|
||||
//__launch_bounds__(MIDPHASE_WARPS_PER_BLOCK * WARP_SIZE, 4)
|
||||
void ps_meshCollisonLaunch(
|
||||
const bool isTGS,
|
||||
const PxReal tolerenceLength,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxReal* PX_RESTRICT restDistances,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
PxgParticleSystem* PX_RESTRICT particleSystems, //output
|
||||
PxNodeIndex* PX_RESTRICT shapeToRigidRemapTable,
|
||||
PxgParticleContactWriter writer
|
||||
)
|
||||
{
|
||||
__shared__ __align__(16) PxU8 scratchMem[PS_MIDPHASE_COLLISION_WAPRS_PER_BLOCK][sizeof(PsMidphaseScratch)];
|
||||
|
||||
psMeshMidphaseCollision<PS_MIDPHASE_COLLISION_WAPRS_PER_BLOCK>(
|
||||
isTGS,
|
||||
tolerenceLength,
|
||||
cmInputs,
|
||||
transformCache,
|
||||
contactDistance,
|
||||
restDistances,
|
||||
gpuShapes,
|
||||
(PsMidphaseScratch*)&scratchMem[threadIdx.y],
|
||||
particleSystems,
|
||||
shapeToRigidRemapTable,
|
||||
writer
|
||||
);
|
||||
}
|
||||
765
engine/third_party/physx/source/gpunarrowphase/src/CUDA/sdfCollision.cuh
vendored
Normal file
765
engine/third_party/physx/source/gpunarrowphase/src/CUDA/sdfCollision.cuh
vendored
Normal file
@@ -0,0 +1,765 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __SDF_COLLISION_CUH__
|
||||
#define __SDF_COLLISION_CUH__
|
||||
|
||||
#include "cudaNpCommon.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "geometry/PxMeshScale.h"
|
||||
#include "GuDistancePointTetrahedron.h"
|
||||
#include "GuTriangleRefinement.h"
|
||||
#include "triangleMesh.cuh"
|
||||
#include "utils.cuh"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
|
||||
struct PX_ALIGN_PREFIX(16) SparseSDFTexture
|
||||
{
|
||||
//Dense info - for coarse resolution background grid
|
||||
PxVec3 sdfBoxLower;
|
||||
PxReal sdfDx;
|
||||
PxVec3 sdfBoxHigher;
|
||||
PxReal invSdfDx;
|
||||
CUtexObject texture;
|
||||
|
||||
//Sparse info - for high resolution in narrow band around isolevel 0
|
||||
CUtexObject textureSubgrids;
|
||||
|
||||
uint3 coarseSize;
|
||||
PxU32 subgridSize;
|
||||
|
||||
const PxU32* subgridStartSlots;
|
||||
|
||||
PxReal fineToCoarse; //Inverse of subgridSize
|
||||
|
||||
uint4 sdfDims;
|
||||
PxReal subgridsMinSdfValue;
|
||||
PxReal subgridsSdfValueRange;
|
||||
|
||||
PX_INLINE __device__ void initialize(const PxgTriangleMesh& mesh)
|
||||
{
|
||||
texture = mesh.mTexObject;
|
||||
|
||||
const PxReal margin = mesh.subgridSize == 0 ? 0.5f : 0.0f;
|
||||
sdfBoxLower = mesh.meshLower + margin * PxVec3(mesh.spacing);
|
||||
sdfBoxHigher = mesh.meshLower + PxVec3((mesh.sdfDims.x - margin) * mesh.spacing, (mesh.sdfDims.y - margin) * mesh.spacing, (mesh.sdfDims.z - margin) * mesh.spacing);
|
||||
invSdfDx = 1.f / mesh.spacing;
|
||||
sdfDx = mesh.spacing;
|
||||
|
||||
textureSubgrids = mesh.mTexObjectSparse;
|
||||
subgridStartSlots = mesh.subgridStarts;
|
||||
sdfDims = mesh.sdfDims;
|
||||
coarseSize.x = mesh.sdfDims.x / PxMax(1u, mesh.subgridSize);
|
||||
coarseSize.y = mesh.sdfDims.y / PxMax(1u, mesh.subgridSize);
|
||||
coarseSize.z = mesh.sdfDims.z / PxMax(1u, mesh.subgridSize);
|
||||
//sdfDx = 1.0f / invSdfDx; // dx;
|
||||
subgridSize = mesh.subgridSize;
|
||||
fineToCoarse = 1.0f / mesh.subgridSize;
|
||||
|
||||
subgridsMinSdfValue = mesh.subgridsMinSdfValue;
|
||||
subgridsSdfValueRange = mesh.subgridsMaxSdfValue - mesh.subgridsMinSdfValue;
|
||||
|
||||
//if (threadIdx.x == 0)
|
||||
// printf("%f, %f\n", mesh.subgridsMinSdfValue, mesh.subgridsMaxSdfValue);
|
||||
}
|
||||
|
||||
private:
|
||||
static PX_INLINE __device__ PxU32 idx(PxU32 x, PxU32 y, PxU32 z, PxU32 width, PxU32 height)
|
||||
{
|
||||
return z * width * height + y * width + x;
|
||||
}
|
||||
static PX_INLINE __device__ void applySubgridStart(PxU32 triple, PxVec3& f, const PxU32 subgridSize)
|
||||
{
|
||||
f.x += (triple & 1023) * (subgridSize + 1);
|
||||
triple = triple >> 10;
|
||||
f.y += (triple & 1023) * (subgridSize + 1);
|
||||
triple = triple >> 10;
|
||||
f.z += (triple & 1023) * (subgridSize + 1);
|
||||
}
|
||||
|
||||
PX_INLINE __device__ PxReal applySubgridSdfScale(PxReal rawSdfValue) const
|
||||
{
|
||||
return rawSdfValue * subgridsSdfValueRange + subgridsMinSdfValue;
|
||||
}
|
||||
|
||||
public:
|
||||
PX_INLINE __device__ PxReal Sample(PxVec3 f) const
|
||||
{
|
||||
if (subgridSize == 0)
|
||||
{
|
||||
//Dense sdf
|
||||
return tex3D<float>(texture, f.x + 0.5f, f.y + 0.5f, f.z + 0.5f);
|
||||
}
|
||||
else
|
||||
{
|
||||
const PxU32 xBase = PxClamp(PxU32(f.x * fineToCoarse), 0u, coarseSize.x - 1);
|
||||
const PxU32 yBase = PxClamp(PxU32(f.y * fineToCoarse), 0u, coarseSize.y - 1);
|
||||
const PxU32 zBase = PxClamp(PxU32(f.z * fineToCoarse), 0u, coarseSize.z - 1);
|
||||
|
||||
const PxU32 i = idx(xBase, yBase, zBase, coarseSize.x, coarseSize.y);
|
||||
const PxU32 startSlot = subgridStartSlots[i];
|
||||
if (startSlot == 0xFFFFFFFF)
|
||||
{
|
||||
//Evaluate coarse
|
||||
f *= fineToCoarse;
|
||||
//+0.5: Align with texel center
|
||||
//https://forums.developer.nvidia.com/t/understanding-cuda-texture-2d-linear-interpolation/213924
|
||||
return tex3D<float>(texture, f.x + 0.5f, f.y + 0.5f, f.z + 0.5f);
|
||||
}
|
||||
else
|
||||
{
|
||||
f.x = PxClamp(f.x - xBase * subgridSize, 0.0f, PxReal(subgridSize + 1));
|
||||
f.y = PxClamp(f.y - yBase * subgridSize, 0.0f, PxReal(subgridSize + 1));
|
||||
f.z = PxClamp(f.z - zBase * subgridSize, 0.0f, PxReal(subgridSize + 1));
|
||||
|
||||
//Transform to subgrid storage location in 3D texture
|
||||
applySubgridStart(startSlot, f, subgridSize);
|
||||
|
||||
//+0.5: Align with texel center
|
||||
//https://forums.developer.nvidia.com/t/understanding-cuda-texture-2d-linear-interpolation/213924
|
||||
PxReal v = tex3D<float>(textureSubgrids, f.x + 0.5f, f.y + 0.5f, f.z + 0.5f);
|
||||
|
||||
return applySubgridSdfScale(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
PX_ALIGN_SUFFIX(16);
|
||||
|
||||
template<typename T>
|
||||
PX_INLINE __device__ PxReal PxSdfDistance(const T& texture, const PxVec3& localPos)
|
||||
{
|
||||
// clamp to SDF support
|
||||
const PxVec3 clampedGridPt = localPos.maximum(texture.sdfBoxLower).minimum(texture.sdfBoxHigher);
|
||||
|
||||
PxReal diffMag = (localPos - clampedGridPt).magnitude();
|
||||
PxVec3 f = (clampedGridPt - texture.sdfBoxLower) * texture.invSdfDx;
|
||||
|
||||
return texture.Sample(f) + diffMag;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
PX_INLINE __device__ PxVec3 PxVolumeGrad(const T& texture, const PxVec3& localPos)
|
||||
{
|
||||
PxVec3 grad;
|
||||
|
||||
const PxVec3 clampedGridPt = localPos.maximum(texture.sdfBoxLower).minimum(texture.sdfBoxHigher);
|
||||
|
||||
PxVec3 f = (clampedGridPt - texture.sdfBoxLower) * texture.invSdfDx;
|
||||
|
||||
if (f.x >= 1.0f && f.x <= texture.sdfDims.x - 2.0f &&
|
||||
f.y >= 1.0f && f.y <= texture.sdfDims.y - 2.0f &&
|
||||
f.z >= 1.0f && f.z <= texture.sdfDims.z - 2.0f)
|
||||
{
|
||||
grad.x = texture.Sample(PxVec3(f.x + 1.0f, f.y, f.z)) -
|
||||
texture.Sample(PxVec3(f.x - 1.0f, f.y, f.z));
|
||||
grad.y = texture.Sample(PxVec3(f.x, f.y + 1.0f, f.z)) -
|
||||
texture.Sample(PxVec3(f.x, f.y - 1.0f, f.z));
|
||||
grad.z = texture.Sample(PxVec3(f.x, f.y, f.z + 1.0f)) -
|
||||
texture.Sample(PxVec3(f.x, f.y, f.z - 1.0f));
|
||||
return grad;
|
||||
}
|
||||
|
||||
grad.x = PxSdfDistance(texture, localPos + PxVec3(texture.sdfDx, 0.f, 0.f)) -
|
||||
PxSdfDistance(texture, localPos - PxVec3(texture.sdfDx, 0.f, 0.f));
|
||||
grad.y = PxSdfDistance(texture, localPos + PxVec3(0.f, texture.sdfDx, 0.f)) -
|
||||
PxSdfDistance(texture, localPos - PxVec3(0.f, texture.sdfDx, 0.f));
|
||||
grad.z = PxSdfDistance(texture, localPos + PxVec3(0.f, 0.f, texture.sdfDx)) -
|
||||
PxSdfDistance(texture, localPos - PxVec3(0.f, 0.f, texture.sdfDx));
|
||||
|
||||
return grad;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
PX_INLINE __device__ PxReal PxSdfSampleWithGrad(const T& texture, const PxVec3& localPos,
|
||||
const PxVec3& sdfGradient, PxVec3& dir, PxReal tolerance = PX_MAX_F32)
|
||||
{
|
||||
const PxVec3 clampedGridPt = localPos.maximum(texture.sdfBoxLower).minimum(texture.sdfBoxHigher);
|
||||
|
||||
PxVec3 diff = (localPos - clampedGridPt);
|
||||
/*
|
||||
const PxReal diffMag = diff.magnitudeSquared();
|
||||
if (diffMag > tolerance*tolerance)
|
||||
return PX_MAX_F32;*/
|
||||
|
||||
PxVec3 f = (clampedGridPt - texture.sdfBoxLower) * texture.invSdfDx;
|
||||
|
||||
PxReal dist = texture.Sample(f);
|
||||
|
||||
if (dist < tolerance)
|
||||
{
|
||||
// estimate the contact direction based on the SDF
|
||||
// if we are outside the SDF support, add in the distance to the point
|
||||
dir = (sdfGradient.getNormalized() * PxAbs(dist) + diff).getNormalized();
|
||||
return dist + dir.dot(diff);
|
||||
}
|
||||
|
||||
return PX_MAX_F32;
|
||||
}
|
||||
|
||||
static __device__ PxReal doVertexSDFCollision(SparseSDFTexture& texture, const PxVec3& v,
|
||||
PxVec3& dir, PxReal tolerance)
|
||||
{
|
||||
PxVec3 sdfGradient = PxVolumeGrad(texture, v);
|
||||
|
||||
if (sdfGradient.normalize() == 0.0f)
|
||||
{
|
||||
//We ran into a discontinuity e. g. the exact center of a cube
|
||||
//Just pick an arbitrary gradient of unit length to move out of the discontinuity
|
||||
sdfGradient = PxVec3(0.5448762f, 0.672269f, 0.398825f).getNormalized();
|
||||
}
|
||||
|
||||
return PxSdfSampleWithGrad(texture, v, sdfGradient, dir, tolerance);
|
||||
}
|
||||
|
||||
|
||||
//#define gr 1.6180339887498948482045868343656381177203091798057628621354486227f
|
||||
#define invGr 0.6180339887498948482045868343656381177203091798057628621354486227f
|
||||
|
||||
static __device__ PxReal doSegmentSDFCollision(SparseSDFTexture& texture, const PxVec3& v0, const PxVec3& v1,
|
||||
PxVec3& point, PxVec3& dir, PxReal tolerance, PxReal& t)
|
||||
{
|
||||
//Golden section search
|
||||
PxReal a = 0.0f;
|
||||
PxReal b = 1.0f;
|
||||
PxReal c = b - (b - a) * invGr;
|
||||
PxReal d = a + (b - a) * invGr;
|
||||
for (PxU32 iter = 0; iter < 16; ++iter)
|
||||
{
|
||||
PxVec3 pC = (1.0f - c) * v0 + c * v1;
|
||||
PxReal distC = PxSdfDistance(texture, pC);
|
||||
|
||||
PxVec3 pD = (1.0f - d) * v0 + d * v1;
|
||||
PxReal distD = PxSdfDistance(texture, pD);
|
||||
|
||||
if (distC < distD)
|
||||
b = d;
|
||||
else
|
||||
a = c;
|
||||
|
||||
c = b - (b - a) * invGr;
|
||||
d = a + (b - a) * invGr;
|
||||
}
|
||||
|
||||
t = 0.5f * (a + b);
|
||||
PxVec3 p = (1.0f - t) * v0 + t * v1;
|
||||
|
||||
PxVec3 sdfGradient = PxVolumeGrad(texture, p);
|
||||
if (sdfGradient.normalize() == 0.0f)
|
||||
{
|
||||
//We ran into a discontinuity e. g. the exact center of a cube
|
||||
//Just pick an arbitrary gradient of unit length to move out of the discontinuity
|
||||
sdfGradient = PxVec3(0.5448762f, 0.672269f, 0.398825f).getNormalized();
|
||||
}
|
||||
|
||||
point = p;
|
||||
return PxSdfSampleWithGrad(texture, p, sdfGradient, dir, tolerance);
|
||||
}
|
||||
|
||||
static __device__ PxReal doTetrahedronSDFCollision(SparseSDFTexture& texture, const PxVec3& v0, const PxVec3& v1, const PxVec3& v2, const PxVec3& v3,
|
||||
PxVec3& point, PxVec3& dir, PxReal tolerance/*, PxVec4& uvw*/)
|
||||
{
|
||||
PxVec3 center = (v0 + v1 + v2 + v3) * 0.25f;
|
||||
PxVec3 p = center;
|
||||
|
||||
PxReal dist = PxSdfDistance(texture, p);
|
||||
|
||||
PxReal d0 = PxSdfDistance(texture, v0);
|
||||
PxReal d1 = PxSdfDistance(texture, v1);
|
||||
PxReal d2 = PxSdfDistance(texture, v2);
|
||||
PxReal d3 = PxSdfDistance(texture, v3);
|
||||
|
||||
PxVec4 uvw = PxVec4(0.f);
|
||||
|
||||
if (d0 < d1 && d0 < d2 && d0 < d3 && d0 < dist)
|
||||
{
|
||||
dist = d0;
|
||||
p = v0;
|
||||
uvw.x = 1.f;
|
||||
}
|
||||
else if (d1 < d2 && d1 < d3 && d1 < dist)
|
||||
{
|
||||
dist = d1;
|
||||
p = v1;
|
||||
uvw.y = 1.f;
|
||||
}
|
||||
else if (d2 < d3 && d2 < dist)
|
||||
{
|
||||
dist = d2;
|
||||
p = v2;
|
||||
uvw.z = 1.f;
|
||||
}
|
||||
else if (d3 < dist)
|
||||
{
|
||||
dist = d3;
|
||||
p = v3;
|
||||
uvw.w = 1.f;
|
||||
}
|
||||
else
|
||||
uvw = PxVec4(0.25f);
|
||||
|
||||
PxReal difference = PxSqrt(
|
||||
PxMax(
|
||||
PxMax((v0 - p).magnitudeSquared(), (v1 - p).magnitudeSquared()),
|
||||
PxMax((v2 - p).magnitudeSquared(), (v3 - p).magnitudeSquared())));
|
||||
|
||||
const PxReal eps = 1e-4;
|
||||
const PxReal toleranceSq = 1e-3f*1e-3f;
|
||||
|
||||
PxVec3 sdfGradient;
|
||||
PxReal step = 1.f / (2.f*difference);
|
||||
|
||||
for (PxU32 iter = 0; iter < 16; ++iter)
|
||||
{
|
||||
sdfGradient = PxVolumeGrad(texture, p);
|
||||
|
||||
// why do we normalize the gradient here?
|
||||
if (sdfGradient.normalize() == 0.0f)
|
||||
{
|
||||
//We ran into a discontinuity e. g. the exact center of a cube
|
||||
//Just pick an arbitrary gradient of unit length to move out of the discontinuity
|
||||
sdfGradient = PxVec3(0.5448762f, 0.672269f, 0.398825f).getNormalized();
|
||||
}
|
||||
|
||||
PxReal dfdu = sdfGradient.dot(v0 - p);
|
||||
PxReal dfdv = sdfGradient.dot(v1 - p);
|
||||
PxReal dfdw = sdfGradient.dot(v2 - p);
|
||||
PxReal dfd4 = sdfGradient.dot(v3 - p);
|
||||
|
||||
PxVec4 newUVW;
|
||||
|
||||
{
|
||||
newUVW = uvw;
|
||||
|
||||
newUVW.x -= step * dfdu;
|
||||
newUVW.y -= step * dfdv;
|
||||
newUVW.z -= step * dfdw;
|
||||
newUVW.w -= step * dfd4;
|
||||
|
||||
step = step * 0.8f;
|
||||
|
||||
|
||||
if (((newUVW.x <= -eps || newUVW.x >= 1.f + eps) || (newUVW.y <= -eps || newUVW.y >= 1.f + eps) ||
|
||||
(newUVW.z <= -eps || newUVW.z >= 1.f + eps) || (newUVW.w <= -eps || newUVW.w >= 1.f + eps)))
|
||||
{
|
||||
PxVec3 cp = Gu::closestPtPointTetrahedron(newUVW.getXYZ(), PxVec3(1.0f, 0.0f, 0.0f), PxVec3(0.0f, 1.0f, 0.0f), PxVec3(0.f, 0.f, 1.f), PxVec3(0.0f));
|
||||
newUVW.x = cp.x;
|
||||
newUVW.y = cp.y;
|
||||
newUVW.z = cp.z;
|
||||
newUVW.w = 1.0f - cp.x - cp.y - cp.z; //TODO: Veryfy this logic
|
||||
|
||||
assert(newUVW.w >= -eps && newUVW.w <= 1.f + eps);
|
||||
}
|
||||
|
||||
p = v0 * newUVW.x + v1 * newUVW.y + v2 * newUVW.z + v3 * newUVW.w;
|
||||
|
||||
if ((uvw - newUVW).magnitudeSquared() < toleranceSq)
|
||||
{
|
||||
/*if(iter != 0)
|
||||
printf("Iter = %i\n", iter);*/
|
||||
break;
|
||||
}
|
||||
|
||||
uvw = newUVW;
|
||||
}
|
||||
}
|
||||
|
||||
sdfGradient = PxVolumeGrad(texture, p);
|
||||
|
||||
point = p;
|
||||
|
||||
dist = PxSdfSampleWithGrad(texture, p, sdfGradient, dir, tolerance);
|
||||
return dist;
|
||||
}
|
||||
|
||||
static __device__ PxReal doTriangleSDFCollision(SparseSDFTexture& texture, const PxVec3& v0, const PxVec3& v1, const PxVec3& v2,
|
||||
PxVec3& point, PxVec3& dir, PxReal tolerance)
|
||||
{
|
||||
const PxReal third = 1.f / 3.f;
|
||||
PxVec3 center = (v0 + v1 + v2) * third;
|
||||
PxVec3 p = center;
|
||||
|
||||
PxReal dist = PxSdfDistance(texture, p);
|
||||
|
||||
PxReal d0 = PxSdfDistance(texture, v0);
|
||||
PxReal d1 = PxSdfDistance(texture, v1);
|
||||
PxReal d2 = PxSdfDistance(texture, v2);
|
||||
|
||||
//PxVec3 nor = (v1 - v0).cross(v2 - v0);
|
||||
|
||||
PxVec3 uvw(0.f);
|
||||
|
||||
// choose starting iterate among centroid and triangle vertices
|
||||
if (d0 < d1 && d0 < d2 && d0 < dist)
|
||||
{
|
||||
p = v0;
|
||||
uvw.x = 1.f;
|
||||
}
|
||||
else if (d1 < d2 && d1 < dist)
|
||||
{
|
||||
p = v1;
|
||||
uvw.y = 1.f;
|
||||
}
|
||||
else if (d2 < dist)
|
||||
{
|
||||
p = v2;
|
||||
uvw.z = 1.f;
|
||||
}
|
||||
else
|
||||
uvw = PxVec3(third);
|
||||
|
||||
PxReal difference = PxSqrt(PxMax((v0 - p).magnitudeSquared(), PxMax((v1 - p).magnitudeSquared(),
|
||||
(v2 - p).magnitudeSquared())));
|
||||
|
||||
|
||||
const PxReal toleranceSq = 1e-3f*1e-3f;
|
||||
|
||||
PxVec3 sdfGradient;
|
||||
PxReal step = 1.f / (2.f*difference);
|
||||
|
||||
for (PxU32 iter = 0; iter < 16; ++iter)
|
||||
{
|
||||
sdfGradient = PxVolumeGrad(texture, p);
|
||||
|
||||
if (sdfGradient.normalize() == 0.0f)
|
||||
{
|
||||
//We ran into a discontinuity e. g. the exact center of a cube
|
||||
//Just pick an arbitrary gradient of unit length to move out of the discontinuity
|
||||
sdfGradient = PxVec3(0.571846586f, 0.705545099f, 0.418566116f);
|
||||
}
|
||||
|
||||
PxReal dfdu = sdfGradient.dot(v0 - p);
|
||||
PxReal dfdv = sdfGradient.dot(v1 - p);
|
||||
PxReal dfdw = sdfGradient.dot(v2 - p);
|
||||
|
||||
PxVec3 newUVW;
|
||||
|
||||
{
|
||||
newUVW = uvw;
|
||||
|
||||
newUVW.x -= step * dfdu;
|
||||
newUVW.y -= step * dfdv;
|
||||
newUVW.z -= step * dfdw;
|
||||
|
||||
step = step * 0.8f;
|
||||
|
||||
newUVW = Gu::closestPtPointBaryTriangle(newUVW);
|
||||
|
||||
p = v0 * newUVW.x + v1 * newUVW.y + v2 * newUVW.z;
|
||||
|
||||
if ((uvw - newUVW).magnitudeSquared() < toleranceSq)
|
||||
{
|
||||
/*if(iter != 0)
|
||||
printf("Iter = %i\n", iter);*/
|
||||
break;
|
||||
}
|
||||
|
||||
uvw = newUVW;
|
||||
}
|
||||
}
|
||||
|
||||
sdfGradient = PxVolumeGrad(texture, p);
|
||||
|
||||
point = p;
|
||||
|
||||
dist = PxSdfSampleWithGrad(texture, p, sdfGradient, dir, tolerance);
|
||||
return dist;
|
||||
}
|
||||
|
||||
//Returns the new number of elements in the buffer
|
||||
//T should be of type PxU32 or a multiple of it (uint2, uint2, uint4)
|
||||
template<typename T, PxU32 NbWarps, const PxU32 TargetCount>
|
||||
__device__ PxU32 addIndexToBuffer(bool threadEmitsElement, T index, PxU32 nbElementsInBuffer, T* sharedMemoryBuffer, PxU32& offset)
|
||||
{
|
||||
PxU32 total;
|
||||
PxU32 exclusiveSum = threadBlockScanExclusive<NbWarps>(threadEmitsElement, total);
|
||||
|
||||
if (threadEmitsElement)
|
||||
{
|
||||
PxU32 writeOffset = nbElementsInBuffer + exclusiveSum;
|
||||
|
||||
if (writeOffset < TargetCount)
|
||||
{
|
||||
sharedMemoryBuffer[writeOffset] = index;
|
||||
}
|
||||
if (writeOffset == (TargetCount - 1))
|
||||
{
|
||||
//Get the first element
|
||||
offset = ((PxU32*)(&index))[0] + 1; //The thread that added the last element to the sharedIndices writes the new offset
|
||||
}
|
||||
}
|
||||
|
||||
return min(total + nbElementsInBuffer, TargetCount);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ void getTriangleVertices(const uint4& triangle, const float4* meshVerts, const PxMeshScale& scale, const PxTransform t,
|
||||
PxU32 encodedSubIndex, PxVec3& a, PxVec3& b, PxVec3& c)
|
||||
{
|
||||
a = t.transform(vertex2Shape(PxLoad3(meshVerts[triangle.x]), scale.scale, scale.rotation));
|
||||
b = t.transform(vertex2Shape(PxLoad3(meshVerts[triangle.y]), scale.scale, scale.rotation));
|
||||
c = t.transform(vertex2Shape(PxLoad3(meshVerts[triangle.z]), scale.scale, scale.rotation));
|
||||
|
||||
if (encodedSubIndex > 0)
|
||||
Gu::getSubTriangleEncoded(a, b, c, encodedSubIndex);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ void getTriangleVertices(const uint4* meshInd, const float4* meshVerts, const PxMeshScale& scale, const PxTransform t,
|
||||
PxU32 triangleIndex, PxU32 encodedSubIndex, PxVec3& a, PxVec3& b, PxVec3& c)
|
||||
{
|
||||
uint4 inds = meshInd[triangleIndex];
|
||||
getTriangleVertices(inds, meshVerts, scale, t, encodedSubIndex, a, b, c);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ void getTriangleVertices(const PxgTriangleMesh& mesh, const PxMeshScale& scale, const PxTransform t,
|
||||
PxU32 triangleIndex, PxU32 encodedSubIndex, PxVec3& a, PxVec3& b, PxVec3& c)
|
||||
{
|
||||
uint4 inds = mesh.indices[triangleIndex];
|
||||
a = t.transform(vertex2Shape(PxLoad3(mesh.trimeshVerts[inds.x]), scale.scale, scale.rotation));
|
||||
b = t.transform(vertex2Shape(PxLoad3(mesh.trimeshVerts[inds.y]), scale.scale, scale.rotation));
|
||||
c = t.transform(vertex2Shape(PxLoad3(mesh.trimeshVerts[inds.z]), scale.scale, scale.rotation));
|
||||
|
||||
if (encodedSubIndex > 0)
|
||||
Gu::getSubTriangleEncoded(a, b, c, encodedSubIndex);
|
||||
}
|
||||
|
||||
|
||||
PX_FORCE_INLINE __device__ PxVec3 getVertex(const float4* meshVerts, const PxMeshScale& scale, const PxTransform t, PxU32 vertexIndex)
|
||||
{
|
||||
return t.transform(vertex2Shape(PxLoad3(meshVerts[vertexIndex]), scale.scale, scale.rotation));
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxVec3 getVertex(const PxgTriangleMesh& mesh, const PxMeshScale& scale, const PxTransform t, PxU32 vertexIndex)
|
||||
{
|
||||
return t.transform(vertex2Shape(PxLoad3(mesh.trimeshVerts[vertexIndex]), scale.scale, scale.rotation));
|
||||
}
|
||||
|
||||
//Input of surface hint is optional
|
||||
template<PxU32 NbWarps, const PxU32 TargetCount>
|
||||
__device__ PxU32 findInterestingTets(PxU32 nbTets, const uint4* tetIndices, const float4* tetVerts,
|
||||
const PxMeshScale& scale1, const PxReal tolerance, const SparseSDFTexture& sdfMesh1,
|
||||
const PxTransform& aToB, PxU32& startIndex, PxU32* sharedIndices, const PxU8* surfaceHint = NULL)
|
||||
{
|
||||
__shared__ PxU32 offset;
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
offset = startIndex;
|
||||
|
||||
PxU32 sharedIndicesCount = 0;
|
||||
for (PxU32 i = startIndex; (i < nbTets); )
|
||||
{
|
||||
PxU32 ind = i + threadIdx.x;
|
||||
i += blockDim.x;
|
||||
|
||||
bool addToBuffer = false;
|
||||
if (ind < nbTets)
|
||||
{
|
||||
if (!surfaceHint || surfaceHint[ind])
|
||||
{
|
||||
const uint4 tet = tetIndices[ind];
|
||||
const PxVec3 a = aToB.transform(PxLoad3(tetVerts[tet.x]));
|
||||
const PxVec3 b = aToB.transform(PxLoad3(tetVerts[tet.y]));
|
||||
const PxVec3 c = aToB.transform(PxLoad3(tetVerts[tet.z]));
|
||||
const PxVec3 d = aToB.transform(PxLoad3(tetVerts[tet.w]));
|
||||
|
||||
PxVec3 centroid = (a + b + c + d) * 0.25f;
|
||||
|
||||
PxReal distInSDFSpace1 = PxSdfDistance(sdfMesh1, shape2Vertex(centroid, scale1.scale, scale1.rotation));
|
||||
//PxReal boundingSphereRadius = PxSqrt(PxMax((v0 - centroid).magnitudeSquared(), (v1 - centroid).magnitudeSquared()));
|
||||
|
||||
PxReal boundingSphereRadius = PxSqrt(PxMax(PxMax((a - centroid).magnitudeSquared(), (b - centroid).magnitudeSquared()),
|
||||
PxMax((c - centroid).magnitudeSquared(), (d - centroid).magnitudeSquared())));
|
||||
|
||||
boundingSphereRadius /= PxMin(scale1.scale.x, PxMin(scale1.scale.y, scale1.scale.z));
|
||||
addToBuffer = distInSDFSpace1 < (boundingSphereRadius + tolerance);
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
sharedIndicesCount = addIndexToBuffer<PxU32, NbWarps, TargetCount>(addToBuffer, ind, sharedIndicesCount, sharedIndices, offset);
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (startIndex != offset)
|
||||
startIndex = offset;
|
||||
else
|
||||
startIndex = nbTets;
|
||||
|
||||
return sharedIndicesCount;
|
||||
}
|
||||
|
||||
|
||||
template<PxU32 NbWarps, const PxU32 TargetCount, bool useVertices = false>
|
||||
__device__ PxU32 findInterestingTrianglesA(PxU32 mesh0NbPrimitives, const uint4* mesh0Ind, const float4* mesh0Verts,
|
||||
const PxMeshScale& scale0, const PxMeshScale& scale1, const PxReal tolerance, const SparseSDFTexture& sdfMesh1,
|
||||
const PxTransform& aToB, PxU32& startIndex, uint2* sharedIndices, PxU32 nbSubdivisionIndices, const uint2* sharedSubdivisionIndices)
|
||||
{
|
||||
__shared__ PxU32 offset;
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
offset = startIndex;
|
||||
|
||||
const PxU32 nbPrimitives = mesh0NbPrimitives; //Can be the number of triangles or the number of vertices depending on the template boolean useVertices
|
||||
PxU32 sharedIndicesCount = 0;
|
||||
for (PxU32 i = startIndex; (i < nbPrimitives || nbSubdivisionIndices>0) && sharedIndicesCount < TargetCount; )
|
||||
{
|
||||
PxU32 ind;
|
||||
PxU32 subdivisionLevel = 0;
|
||||
if (threadIdx.x < 4 * nbSubdivisionIndices)
|
||||
{
|
||||
uint2 v = sharedSubdivisionIndices[threadIdx.x >> 2];
|
||||
ind = v.x;
|
||||
subdivisionLevel = v.y;
|
||||
subdivisionLevel = Gu::elevateSubdivisionId(subdivisionLevel, threadIdx.x & 3);
|
||||
}
|
||||
else
|
||||
{
|
||||
ind = i + threadIdx.x - PxMin(TargetCount, 4 * nbSubdivisionIndices);
|
||||
}
|
||||
i += blockDim.x - PxMin(TargetCount, 4 * nbSubdivisionIndices);
|
||||
nbSubdivisionIndices = 0; //It is guaranteed that all subdivision triangles get consumed in the first run because of the limited size of sharedSubdivisionIndices
|
||||
|
||||
|
||||
bool addToBuffer = false;
|
||||
if (ind < nbPrimitives)
|
||||
{
|
||||
if (useVertices)
|
||||
{
|
||||
PxVec3 p = getVertex(mesh0Verts, scale0, aToB, ind);
|
||||
PxReal distInSDFSpace1 = PxSdfDistance(sdfMesh1, shape2Vertex(p, scale1.scale, scale1.rotation));
|
||||
addToBuffer = distInSDFSpace1 < tolerance;
|
||||
}
|
||||
else
|
||||
{
|
||||
PxVec3 v0, v1, v2;
|
||||
getTriangleVertices(mesh0Ind, mesh0Verts, scale0, aToB, ind, subdivisionLevel, v0, v1, v2);
|
||||
|
||||
const PxReal third = 1.f / 3.f;
|
||||
PxVec3 centroid = (v0 + v1 + v2) * third;
|
||||
|
||||
PxReal distInSDFSpace1 = PxSdfDistance(sdfMesh1, shape2Vertex(centroid, scale1.scale, scale1.rotation));
|
||||
PxReal boundingSphereRadius = PxSqrt(PxMax((v0 - centroid).magnitudeSquared(), PxMax((v1 - centroid).magnitudeSquared(),
|
||||
(v2 - centroid).magnitudeSquared())));
|
||||
boundingSphereRadius /= PxMin(scale1.scale.x, PxMin(scale1.scale.y, scale1.scale.z));
|
||||
addToBuffer = distInSDFSpace1 < (boundingSphereRadius + tolerance);
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
sharedIndicesCount = addIndexToBuffer<uint2, NbWarps, TargetCount>(addToBuffer, make_uint2(ind, subdivisionLevel), sharedIndicesCount, sharedIndices, offset);
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (startIndex != offset)
|
||||
startIndex = offset;
|
||||
else
|
||||
startIndex = nbPrimitives;
|
||||
|
||||
return sharedIndicesCount;
|
||||
}
|
||||
|
||||
|
||||
template<PxU32 NbWarps, const PxU32 TargetCount>
|
||||
__device__ PxU32 findInterestingVertices(PxU32 nbVertices, const float4* positions,
|
||||
const PxMeshScale& scale1, const PxReal tolerance, const SparseSDFTexture& sdfMesh1,
|
||||
const PxTransform& aToB, PxU32& startIndex, PxU32* sharedIndices)
|
||||
{
|
||||
__shared__ PxU32 offset;
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
offset = startIndex;
|
||||
|
||||
PxU32 sharedIndicesCount = 0;
|
||||
for (PxU32 i = startIndex; (i < nbVertices); )
|
||||
{
|
||||
PxU32 ind = i + threadIdx.x;
|
||||
i += blockDim.x;
|
||||
|
||||
bool addToBuffer = false;
|
||||
if (ind < nbVertices)
|
||||
{
|
||||
PxVec3 v = aToB.transform(PxLoad3(positions[ind]));
|
||||
PxReal distInSDFSpace1 = PxSdfDistance(sdfMesh1, shape2Vertex(v, scale1.scale, scale1.rotation));
|
||||
addToBuffer = distInSDFSpace1 < tolerance;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
sharedIndicesCount = addIndexToBuffer<PxU32, NbWarps, TargetCount>(addToBuffer, ind, sharedIndicesCount, sharedIndices, offset);
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (startIndex != offset)
|
||||
startIndex = offset;
|
||||
else
|
||||
startIndex = nbVertices;
|
||||
|
||||
return sharedIndicesCount;
|
||||
}
|
||||
|
||||
|
||||
PX_FORCE_INLINE __device__ PxReal triangleRadiusSquared(const PxVec3& v0, const PxVec3& v1, const PxVec3& v2)
|
||||
{
|
||||
const PxReal third = 1.0f / 3.0f;
|
||||
PxVec3 center = third * (v0 + v1 + v2);
|
||||
return PxMax(PxMax((v1 - v0).magnitudeSquared(), (v2 - v1).magnitudeSquared()), (v0 - v2).magnitudeSquared());
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxReal sdfRadiusSquared(SparseSDFTexture& sdfTexture)
|
||||
{
|
||||
return (sdfTexture.sdfBoxHigher - sdfTexture.sdfBoxLower).magnitudeSquared();
|
||||
}
|
||||
|
||||
//TargetCount is actually the maximal size of sharedSubdivisionIndices. The naming could be improved...
|
||||
template<PxU32 NbWarps, const PxU32 TargetCount>
|
||||
__device__ PxU32 addToRefinementBuffer(bool addToBuffer, PxU32 ind, PxU32 subInd, PxU32 nbElementsInBuffer, uint2* sharedMemoryBuffer, PxU32 maxRefinementLevel)
|
||||
{
|
||||
PxU32 subdivisionLevel, subTriangleIndex;
|
||||
Gu::decodeSubdivisionId(subInd, subdivisionLevel, subTriangleIndex);
|
||||
addToBuffer = addToBuffer && subdivisionLevel < maxRefinementLevel; //Make sure a upper limit of refinement is never exceeded
|
||||
|
||||
PxU32 total;
|
||||
PxU32 exclusiveSum = threadBlockScanExclusive<NbWarps>(addToBuffer, total);
|
||||
|
||||
if (addToBuffer)
|
||||
{
|
||||
PxU32 writeOffset = nbElementsInBuffer + exclusiveSum;
|
||||
|
||||
if (writeOffset < TargetCount)
|
||||
{
|
||||
sharedMemoryBuffer[writeOffset] = make_uint2(ind, subInd);
|
||||
}
|
||||
}
|
||||
|
||||
return min(total + nbElementsInBuffer, TargetCount);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
418
engine/third_party/physx/source/gpunarrowphase/src/CUDA/softbodyHFMidPhase.cu
vendored
Normal file
418
engine/third_party/physx/source/gpunarrowphase/src/CUDA/softbodyHFMidPhase.cu
vendored
Normal file
@@ -0,0 +1,418 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "PxsTransformCache.h"
|
||||
#include "PxsContactManagerState.h"
|
||||
#include "foundation/PxBounds3.h"
|
||||
#include "PxgContactManager.h"
|
||||
#include "PxgSoftBody.h"
|
||||
#include "geometry/PxGeometry.h"
|
||||
#include "PxsMaterialCore.h"
|
||||
#include "cudaNpCommon.h"
|
||||
|
||||
#include "PxgCommonDefines.h"
|
||||
#include "heightfieldUtil.cuh"
|
||||
#include "utils.cuh"
|
||||
#include "sphereTriangle.cuh"
|
||||
#include "deformableCollision.cuh"
|
||||
#include "assert.h"
|
||||
#include "reduction.cuh"
|
||||
#include <stdio.h>
|
||||
#include "PxgSoftBodyCoreKernelIndices.h"
|
||||
#include "PxgNpKernelIndices.h"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
extern "C" __host__ void initNarrowphaseKernels16() {}
|
||||
|
||||
struct sbHeightfieldScratch
|
||||
{
|
||||
|
||||
const float4 * PX_RESTRICT tetmeshVerts;
|
||||
const uint4 * PX_RESTRICT tetmeshTetIndices;
|
||||
const PxU8* PX_RESTRICT tetmeshTetSurfaceHint;
|
||||
PxU32 numTets;
|
||||
|
||||
PxTransform heightfieldTransform;
|
||||
PxgShape heightfieldShape;
|
||||
|
||||
}; PX_COMPILE_TIME_ASSERT(sizeof(sbHeightfieldScratch) <= 1024);
|
||||
|
||||
|
||||
//__device__ static inline PxU32 sbHeightfieldComputePairs(
|
||||
// const PxU32 minColumn,
|
||||
// const PxU32 maxColumn,
|
||||
// const PxU32 minRow,
|
||||
// const PxU32 maxRow,
|
||||
// const PxU32 nbRows,
|
||||
// const PxU32 nbCols,
|
||||
// PxHeightFieldSample* samples,
|
||||
// const PxU32 miny,
|
||||
// const PxU32 maxy
|
||||
//)
|
||||
//{
|
||||
//
|
||||
// PxU32 nbPairs = 0;
|
||||
//
|
||||
// const PxU32 columnSpan = maxColumn - minColumn;
|
||||
//
|
||||
// //we have two materials corresponding to one vertexIndex, so each thread will deal with one of the materials
|
||||
// const PxU32 totalNumProcessed = (maxRow - minRow) * columnSpan * 2;
|
||||
//
|
||||
// for (PxU32 i = 0; i < totalNumProcessed; ++i)
|
||||
// {
|
||||
//
|
||||
// const PxU32 index = i / 2;
|
||||
// const PxU32 vertexIndex = (minRow + index / columnSpan) * nbCols + (minColumn + index % columnSpan);
|
||||
// assert(isValidVertex(vertexIndex, nbRows, nbCols));
|
||||
// PxReal h0 = getHeight(vertexIndex, samples);
|
||||
// PxReal h1 = getHeight(vertexIndex + 1, samples);
|
||||
// PxReal h2 = getHeight(vertexIndex + nbCols, samples);
|
||||
// PxReal h3 = getHeight(vertexIndex + nbCols + 1, samples);
|
||||
// const bool con0 = maxy < h0 && maxy < h1 && maxy < h2 && maxy < h3;
|
||||
// const bool con1 = miny > h0 && miny > h1 && miny > h2 && miny > h3;
|
||||
//
|
||||
// if (!(con0 || con1))
|
||||
// {
|
||||
// const PxHeightFieldSample& sample = getSample(vertexIndex, samples);
|
||||
//
|
||||
// const bool isMaterial1 = (i & 1) ? 1 : 0;
|
||||
// PxU32 material = isMaterial1 ? sample.materialIndex1 : sample.materialIndex0;
|
||||
// if (material != PxHeightFieldMaterial::eHOLE)
|
||||
// nbPairs++;
|
||||
// }
|
||||
// }//end of totalNumProcessed
|
||||
//
|
||||
// return nbPairs;
|
||||
//}
|
||||
|
||||
//__device__ static inline void sbHeightfieldOutputPairs(
|
||||
// const PxU32 minColumn,
|
||||
// const PxU32 maxColumn,
|
||||
// const PxU32 minRow,
|
||||
// const PxU32 maxRow,
|
||||
// const PxU32 nbRows,
|
||||
// const PxU32 nbCols,
|
||||
// PxHeightFieldSample* samples,
|
||||
// const PxU32 miny,
|
||||
// const PxU32 maxy,
|
||||
//
|
||||
// const PxU32 cmInd,
|
||||
// const PxU32 tetrahedronInd,
|
||||
// const PxU32 startOffset,
|
||||
//
|
||||
// uint4* stackPtr //output
|
||||
//
|
||||
//)
|
||||
//{
|
||||
// PxU32 pairCount = 0;
|
||||
// const PxU32 columnSpan = maxColumn - minColumn;
|
||||
//
|
||||
// //we have two materials corresponding to one vertexIndex, so each thread will deal with one of the materials
|
||||
// const PxU32 totalNumProcessed = (maxRow - minRow) * columnSpan * 2;
|
||||
//
|
||||
// for (PxU32 i = 0; i < totalNumProcessed; ++i)
|
||||
// {
|
||||
//
|
||||
// PxU32 triangleIdx = 0xFFffFFff;
|
||||
//
|
||||
// const PxU32 index = i / 2;
|
||||
// const PxU32 vertexIndex = (minRow + index / columnSpan) * nbCols + (minColumn + index % columnSpan);
|
||||
// assert(isValidVertex(vertexIndex, nbRows, nbCols));
|
||||
// PxReal h0 = getHeight(vertexIndex, samples);
|
||||
// PxReal h1 = getHeight(vertexIndex + 1, samples);
|
||||
// PxReal h2 = getHeight(vertexIndex + nbCols, samples);
|
||||
// PxReal h3 = getHeight(vertexIndex + nbCols + 1, samples);
|
||||
// const bool con0 = maxy < h0 && maxy < h1 && maxy < h2 && maxy < h3;
|
||||
// const bool con1 = miny > h0 && miny > h1 && miny > h2 && miny > h3;
|
||||
//
|
||||
// if (!(con0 || con1))
|
||||
// {
|
||||
// const PxHeightFieldSample& sample = getSample(vertexIndex, samples);
|
||||
//
|
||||
// const bool isMaterial1 = (i & 1) ? 1 : 0;
|
||||
// PxU32 material = isMaterial1 ? sample.materialIndex1 : sample.materialIndex0;
|
||||
// if (material != PxHeightFieldMaterial::eHOLE)
|
||||
// {
|
||||
// triangleIdx = isMaterial1 ? ((vertexIndex << 1) + 1) : (vertexIndex << 1);
|
||||
//
|
||||
// stackPtr[startOffset + pairCount] = make_uint4(cmInd, tetrahedronInd, triangleIdx, 0);
|
||||
//
|
||||
// pairCount++;
|
||||
// }
|
||||
// }
|
||||
// }//end of totalNumProcessed
|
||||
//}
|
||||
|
||||
__device__ static inline void sbHeightfieldMidphaseCore(
|
||||
sbHeightfieldScratch* scratch,
|
||||
const PxU32 cmIdx,
|
||||
const PxBounds3 tetBound,
|
||||
const PxU32 tetrahedronIdx,
|
||||
const PxU32 workIndex,
|
||||
const PxU8 hint,
|
||||
const PxU32 numTets,
|
||||
const PxU32 stackSize,
|
||||
uint4* PX_RESTRICT stackPtr, //output
|
||||
PxU32* PX_RESTRICT midphasePairsNum //output
|
||||
)
|
||||
{
|
||||
PxU32 nbPairs = 0;
|
||||
PxBounds3 localBound;
|
||||
PxU32 nbRows = 0;
|
||||
PxU32 nbCols = 0;
|
||||
PxHeightFieldSample* samples = NULL;
|
||||
PxU32 minRow = 0;
|
||||
PxU32 maxRow = 0;
|
||||
PxU32 minColumn = 0;
|
||||
PxU32 maxColumn = 0;
|
||||
|
||||
if (hint && tetrahedronIdx < numTets)
|
||||
{
|
||||
PxTransform heightfieldTransform = scratch->heightfieldTransform;
|
||||
|
||||
PxgShape heightfieldShape = scratch->heightfieldShape;
|
||||
const PxReal oneOverHeightScale = 1.f / heightfieldShape.scale.scale.y;
|
||||
const PxReal oneOverRowScale = 1.f / PxAbs(heightfieldShape.scale.scale.x);
|
||||
const PxReal oneOverlColScale = 1.f / PxAbs(heightfieldShape.scale.scale.z);
|
||||
|
||||
//bound is in world space, we need to transform the bound to the local space of height field
|
||||
localBound = PxBounds3::transformFast(heightfieldTransform.getInverse(), tetBound);
|
||||
|
||||
localBound.minimum.x *= oneOverRowScale;
|
||||
localBound.minimum.y *= oneOverHeightScale;
|
||||
localBound.minimum.z *= oneOverlColScale;
|
||||
|
||||
localBound.maximum.x *= oneOverRowScale;
|
||||
localBound.maximum.y *= oneOverHeightScale;
|
||||
localBound.maximum.z *= oneOverlColScale;
|
||||
|
||||
//row scale
|
||||
if (heightfieldShape.scale.scale.x < 0.f)
|
||||
{
|
||||
//swap min and max row scale
|
||||
const PxReal temp = localBound.minimum.x;
|
||||
localBound.minimum.x = localBound.maximum.x;
|
||||
localBound.maximum.x = temp;
|
||||
}
|
||||
|
||||
//col scale
|
||||
if (heightfieldShape.scale.scale.z < 0.f)
|
||||
{
|
||||
PxReal swap = localBound.minimum.z;
|
||||
localBound.minimum.z = localBound.maximum.z;
|
||||
localBound.maximum.z = swap;
|
||||
}
|
||||
|
||||
PxU32* heightfieldData = reinterpret_cast<PxU32*>(heightfieldShape.hullOrMeshPtr);
|
||||
nbRows = heightfieldData[0];
|
||||
nbCols = heightfieldData[1];
|
||||
samples = reinterpret_cast<PxHeightFieldSample*>(&heightfieldData[2]);
|
||||
|
||||
|
||||
if (!(localBound.minimum.x > nbRows - 1) || (localBound.minimum.z > nbCols - 1)
|
||||
|| (localBound.maximum.x < 0) || (localBound.maximum.z < 0))
|
||||
{
|
||||
minRow = getMinRow(localBound.minimum.x, nbRows);
|
||||
maxRow = getMaxRow(localBound.maximum.x, nbRows);
|
||||
minColumn = getMinColumn(localBound.minimum.z, nbCols);
|
||||
maxColumn = getMaxColumn(localBound.maximum.z, nbCols);
|
||||
|
||||
if ((2 * (maxColumn - minColumn) * (maxRow - minRow)) > 0)
|
||||
{
|
||||
nbPairs = heightfieldComputePairs(minColumn, maxColumn, minRow, maxRow, nbRows, nbCols,
|
||||
samples, localBound.minimum.y, localBound.maximum.y);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
PxU32 pairOffset = warpScan<AddOpPxU32, PxU32>(FULL_MASK, nbPairs) - nbPairs;
|
||||
|
||||
PxU32 validCount = 0;
|
||||
if (threadIdx.x == (WARP_SIZE - 1))
|
||||
{
|
||||
validCount = pairOffset + nbPairs;
|
||||
}
|
||||
validCount = __shfl_sync(FULL_MASK, validCount, WARP_SIZE - 1);
|
||||
|
||||
|
||||
PxU32 startIndex = 0xffffffff;
|
||||
if (threadIdx.x == 0 && validCount > 0)
|
||||
{
|
||||
startIndex = atomicAdd(midphasePairsNum, validCount);
|
||||
}
|
||||
|
||||
startIndex = __shfl_sync(FULL_MASK, startIndex, 0);
|
||||
|
||||
if (startIndex != 0xffffffff)
|
||||
{
|
||||
heightfieldOutputPairs(minColumn, maxColumn, minRow, maxRow, nbRows, nbCols,
|
||||
samples, localBound.minimum.y, localBound.maximum.y, cmIdx, tetrahedronIdx, startIndex + pairOffset, stackSize, stackPtr);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void sb_heightfieldMidphaseGeneratePairsLaunch(
|
||||
const PxU32 numNPWorkItems,
|
||||
const PxReal toleranceLength,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
|
||||
PxgSoftBody* PX_RESTRICT softbodies,
|
||||
const PxU32 stackSizeBytes,
|
||||
|
||||
PxU8* stackPtr, //output
|
||||
PxU32* midphasePairsNum //output
|
||||
)
|
||||
{
|
||||
|
||||
__shared__ PxU32 scratchMem[1024];
|
||||
|
||||
sbHeightfieldScratch* s_warpScratch = reinterpret_cast<sbHeightfieldScratch*>(scratchMem);
|
||||
|
||||
const PxU32 nbWarpsPerCm = 64; // 64 warp(2048 threads) to deal with one pair
|
||||
const PxU32 nbWarpsRequired = nbWarpsPerCm * numNPWorkItems;
|
||||
|
||||
const PxU32 nbThreadsPerCm = 64 * WARP_SIZE;
|
||||
|
||||
const PxU32 numWarpPerBlock = MIDPHASE_WARPS_PER_BLOCK;
|
||||
const PxU32 totalNumWarps = PxgSoftBodyKernelGridDim::SB_SBMIDPHASE * numWarpPerBlock;
|
||||
|
||||
const PxU32 warpIndex = threadIdx.y;
|
||||
|
||||
const PxU32 nbIterationsPerWarps = (nbWarpsRequired + totalNumWarps - 1) / totalNumWarps;
|
||||
|
||||
/*if (warpIndex == 0 && threadIdx.x == 0 && blockIdx.x == 0)
|
||||
{
|
||||
printf("nbIterationsPerWarps %i nbWarpsRequired %i\n", nbIterationsPerWarps, nbWarpsRequired);
|
||||
}*/
|
||||
|
||||
for (PxU32 i = 0; i < nbIterationsPerWarps; ++i)
|
||||
{
|
||||
const PxU32 workIndex = i + (warpIndex + numWarpPerBlock * blockIdx.x) * nbIterationsPerWarps;
|
||||
|
||||
if (workIndex < nbWarpsRequired)
|
||||
{
|
||||
unsigned int cmIdx = workIndex / nbWarpsPerCm;
|
||||
|
||||
PxgShape softbodyShape, heightfieldShape;
|
||||
PxU32 softbodyCacheRef, heightfieldCacheRef;
|
||||
LoadShapePairWarp<PxGeometryType::eTETRAHEDRONMESH, PxGeometryType::eHEIGHTFIELD>(cmInputs, cmIdx, gpuShapes,
|
||||
softbodyShape, softbodyCacheRef, heightfieldShape, heightfieldCacheRef);
|
||||
|
||||
|
||||
PxsCachedTransform heightfieldTransformCache;
|
||||
PxsCachedTransform_ReadWarp(heightfieldTransformCache, transformCache + heightfieldCacheRef);
|
||||
|
||||
const PxgSoftBody& softbody = softbodies[softbodyShape.particleOrSoftbodyId];
|
||||
|
||||
PxU8* meshGeomPtr;
|
||||
meshGeomPtr = reinterpret_cast<PxU8 *>(softbody.mTetMeshData);
|
||||
|
||||
const PxU32 idx = threadIdx.x;
|
||||
|
||||
if (idx == 0)
|
||||
{
|
||||
s_warpScratch->tetmeshVerts = softbody.mPosition_InvMass;
|
||||
s_warpScratch->tetmeshTetIndices = softbody.mTetIndices;
|
||||
s_warpScratch->tetmeshTetSurfaceHint = softbody.mTetMeshSurfaceHint;
|
||||
|
||||
const uint4 nbVerts_nbPrimitives_maxDepth_nbBv32TreeNodes = *reinterpret_cast<const uint4 *>(meshGeomPtr);
|
||||
s_warpScratch->numTets = nbVerts_nbPrimitives_maxDepth_nbBv32TreeNodes.y;
|
||||
|
||||
meshGeomPtr += sizeof(uint4) + sizeof(const Gu::BV32DataPacked)* nbVerts_nbPrimitives_maxDepth_nbBv32TreeNodes.w
|
||||
+sizeof(const Gu::BV32DataDepthInfo) * nbVerts_nbPrimitives_maxDepth_nbBv32TreeNodes.z
|
||||
+ sizeof(PxU32) * nbVerts_nbPrimitives_maxDepth_nbBv32TreeNodes.w;
|
||||
|
||||
/*const PxU8* surfaceHint = reinterpret_cast<const PxU8*>(meshGeomPtr);
|
||||
s_warpScratch->tetmeshTetSurfaceHint = surfaceHint;*/
|
||||
|
||||
s_warpScratch->heightfieldTransform = heightfieldTransformCache.transform;
|
||||
s_warpScratch->heightfieldShape = heightfieldShape;
|
||||
}
|
||||
|
||||
__syncwarp();
|
||||
|
||||
const PxReal contactDist = contactDistance[softbodyCacheRef] + contactDistance[heightfieldCacheRef];
|
||||
|
||||
const PxU32 nbTets = s_warpScratch->numTets;
|
||||
|
||||
const PxU32 nbIter = (nbTets + nbThreadsPerCm - 1) / nbThreadsPerCm;
|
||||
|
||||
const uint4* tetIndices = s_warpScratch->tetmeshTetIndices;
|
||||
const PxU8* surfaceHint = s_warpScratch->tetmeshTetSurfaceHint;
|
||||
|
||||
for (PxU32 j = 0; j < nbIter; ++j)
|
||||
{
|
||||
const PxU32 wrappedThreadIndex = (workIndex * WARP_SIZE + threadIdx.x) % nbThreadsPerCm;
|
||||
|
||||
const PxU32 tetrahedronIdx = wrappedThreadIndex + nbThreadsPerCm * j;
|
||||
|
||||
PxBounds3 tetBound = PxBounds3::empty();
|
||||
PxU8 hint = 1;
|
||||
if (tetrahedronIdx < nbTets)
|
||||
{
|
||||
//avoid generate contacts if the tetrahedron isn't a surface tetrahedron
|
||||
hint = surfaceHint[tetrahedronIdx];
|
||||
|
||||
if (hint)
|
||||
{
|
||||
const uint4 tetIdx = tetIndices[tetrahedronIdx];
|
||||
const PxVec3 worldV0 = PxLoad3(s_warpScratch->tetmeshVerts[tetIdx.x]);
|
||||
const PxVec3 worldV1 = PxLoad3(s_warpScratch->tetmeshVerts[tetIdx.y]);
|
||||
const PxVec3 worldV2 = PxLoad3(s_warpScratch->tetmeshVerts[tetIdx.z]);
|
||||
const PxVec3 worldV3 = PxLoad3(s_warpScratch->tetmeshVerts[tetIdx.w]);
|
||||
|
||||
tetBound = tetBoundingBox(worldV0, worldV1, worldV2, worldV3);
|
||||
|
||||
tetBound.fattenFast(contactDist);
|
||||
}
|
||||
}
|
||||
|
||||
uint4* tStackPtr = reinterpret_cast<uint4*>(stackPtr);
|
||||
const PxU32 stackSize = stackSizeBytes / sizeof(uint4);
|
||||
|
||||
sbHeightfieldMidphaseCore(
|
||||
s_warpScratch,
|
||||
cmIdx,
|
||||
tetBound,
|
||||
tetrahedronIdx,
|
||||
workIndex,
|
||||
hint,
|
||||
nbTets,
|
||||
stackSize,
|
||||
tStackPtr,
|
||||
midphasePairsNum
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
526
engine/third_party/physx/source/gpunarrowphase/src/CUDA/softbodyMidPhase.cu
vendored
Normal file
526
engine/third_party/physx/source/gpunarrowphase/src/CUDA/softbodyMidPhase.cu
vendored
Normal file
@@ -0,0 +1,526 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "foundation/PxBasicTemplates.h"
|
||||
#include "foundation/PxBounds3.h"
|
||||
#include "foundation/PxMath.h"
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
|
||||
#include "geometry/PxGeometry.h"
|
||||
|
||||
#include "GuBV32.h"
|
||||
#include "cudaNpCommon.h"
|
||||
|
||||
#include "PxgContactManager.h"
|
||||
#include "PxgConvexConvexShape.h"
|
||||
#include "PxgFEMCloth.h"
|
||||
#include "PxgNpKernelIndices.h"
|
||||
#include "PxgParticleSystem.h"
|
||||
#include "PxgSoftBody.h"
|
||||
|
||||
#include "PxsTransformCache.h"
|
||||
|
||||
#include <vector_types.h>
|
||||
|
||||
#include "copy.cuh"
|
||||
#include "dataReadWriteHelper.cuh"
|
||||
#include "femMidphaseScratch.cuh"
|
||||
#include "utils.cuh"
|
||||
#include "sbMidphaseScratch.cuh"
|
||||
#include "deformableCollision.cuh"
|
||||
|
||||
#include "bv32Traversal.cuh"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
extern "C" __host__ void initNarrowphaseKernels13() {}
|
||||
|
||||
struct SoftbodyTreeTraverser
|
||||
{
|
||||
sbMidphaseScratch* s_warpScratch;
|
||||
const PxBounds3& mBox;
|
||||
const PxU32 stackSize;
|
||||
uint4* PX_RESTRICT stackPtr;
|
||||
PxU32* PX_RESTRICT midphasePairsNum;
|
||||
unsigned int cmIdx;
|
||||
|
||||
PX_FORCE_INLINE __device__ SoftbodyTreeTraverser(
|
||||
sbMidphaseScratch* s_warpScratch,
|
||||
PxBounds3& aabb,
|
||||
const PxU32 stackSize,
|
||||
uint4* PX_RESTRICT stackPtr,
|
||||
PxU32* PX_RESTRICT midphasePairsNum,
|
||||
unsigned int cmIdx)
|
||||
: s_warpScratch(s_warpScratch), mBox(aabb), stackSize(stackSize), stackPtr(stackPtr), midphasePairsNum(midphasePairsNum), cmIdx(cmIdx)
|
||||
{ }
|
||||
|
||||
PX_FORCE_INLINE __device__ void intersectPrimitiveFullWarp(PxU32 primitiveIndex, PxU32 idxInWarp) const
|
||||
{
|
||||
bool intersect = false;
|
||||
if (primitiveIndex != 0xFFFFFFFF)
|
||||
{
|
||||
const PxU8 hint = s_warpScratch->tetmeshSurfaceHint[primitiveIndex];
|
||||
|
||||
if (hint)
|
||||
{
|
||||
uint4 tetIdx = s_warpScratch->tetmeshTetIndices[primitiveIndex];
|
||||
|
||||
const PxVec3 worldV0 = PxLoad3(s_warpScratch->tetmeshVerts[tetIdx.x]);
|
||||
const PxVec3 worldV1 = PxLoad3(s_warpScratch->tetmeshVerts[tetIdx.y]);
|
||||
const PxVec3 worldV2 = PxLoad3(s_warpScratch->tetmeshVerts[tetIdx.z]);
|
||||
const PxVec3 worldV3 = PxLoad3(s_warpScratch->tetmeshVerts[tetIdx.w]);
|
||||
|
||||
if (isValidTet(worldV0, worldV1, worldV2, worldV3))
|
||||
{
|
||||
intersect = tetBoundingBox(worldV0, worldV1, worldV2, worldV3).intersects(mBox);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pushOntoStackFullWarp(intersect, midphasePairsNum, stackPtr, stackSize, make_uint4(cmIdx, primitiveIndex, 0, 0));
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ bool intersectBoxFullWarp(bool hasBox, const PxVec3& min, const PxVec3& max) const
|
||||
{
|
||||
if (hasBox)
|
||||
return mBox.intersects(PxBounds3(min, max));
|
||||
else
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<unsigned int WarpsPerBlock>
|
||||
__device__ static inline void sbMidphaseCore(
|
||||
PxU32 numNPWorkItems,
|
||||
const PxReal toleranceLength,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxBounds3* PX_RESTRICT bounds,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
const PxgSoftBody* PX_RESTRICT softbodies,
|
||||
|
||||
const PxU32 stackSizeBytes,
|
||||
PxU8* PX_RESTRICT stackPtr, //output
|
||||
PxU32* PX_RESTRICT midphasePairsNum, //output
|
||||
|
||||
sbMidphaseScratch* s_warpScratch
|
||||
)
|
||||
{
|
||||
//wrap index
|
||||
const unsigned int warpIdx = threadIdx.y;
|
||||
unsigned int cmIdx = warpIdx + blockIdx.x * blockDim.y;
|
||||
|
||||
//unsigned mask_cmIdx = __ballot_sync(FULL_MASK, cmIdx < numNPWorkItems);
|
||||
if (cmIdx < numNPWorkItems)
|
||||
{
|
||||
PxgShape softbodyShape, rigidShape;
|
||||
PxU32 softbodyCacheRef, rigidCacheRef;
|
||||
LoadShapePairWarp<PxGeometryType::eTETRAHEDRONMESH>(cmInputs, cmIdx, gpuShapes,
|
||||
softbodyShape, softbodyCacheRef, rigidShape, rigidCacheRef);
|
||||
|
||||
const PxU32 softbodyId = softbodyShape.particleOrSoftbodyId;
|
||||
const PxgSoftBody& softbody = softbodies[softbodyId];
|
||||
|
||||
PxU8 * tetmeshGeomPtr = reinterpret_cast<PxU8 *>(softbody.mTetMeshData);
|
||||
|
||||
const uint4 nbVerts_nbTets_maxDepth_nbBv32TreeNodes = *reinterpret_cast<const uint4 *>(tetmeshGeomPtr);
|
||||
tetmeshGeomPtr += sizeof(uint4);
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
s_warpScratch->tetmeshVerts = softbody.mPosition_InvMass;
|
||||
s_warpScratch->tetmeshTetIndices = softbody.mTetIndices;
|
||||
s_warpScratch->tetmeshSurfaceHint = softbody.mTetMeshSurfaceHint;
|
||||
|
||||
//const PxU32 & numVerts = nbVerts_nbTets_maxDepth_nbBv32TreeNodes.x;
|
||||
//const PxU32 & numTets = nbVerts_nbTets_maxDepth_nbBv32TreeNodes.y;
|
||||
//const PxU32 & maxDepth = nbVerts_nbTets_maxDepth_nbBv32TreeNodes.z;
|
||||
const PxU32 & nbBv32PackedNodes = nbVerts_nbTets_maxDepth_nbBv32TreeNodes.w;
|
||||
|
||||
//printf("maxDepth %i numVerts %i numTets %i nbBv32TreeNodes %i\n", maxDepth, numVerts, numTets, nbBv32TreeNodes);
|
||||
|
||||
Gu::BV32DataPacked* bv32PackedNodes = reinterpret_cast<Gu::BV32DataPacked*>(tetmeshGeomPtr);
|
||||
s_warpScratch->bv32PackedNodes = bv32PackedNodes;
|
||||
|
||||
tetmeshGeomPtr += sizeof(const Gu::BV32DataPacked)* nbBv32PackedNodes
|
||||
+ sizeof(const Gu::BV32DataDepthInfo) * nbVerts_nbTets_maxDepth_nbBv32TreeNodes.z
|
||||
+ sizeof(PxU32) * nbVerts_nbTets_maxDepth_nbBv32TreeNodes.w;
|
||||
}
|
||||
|
||||
__syncwarp();
|
||||
|
||||
|
||||
//ML: this is the world bound AABB
|
||||
PxBounds3 aabb;
|
||||
PxBounds3_ReadWarp(aabb, bounds + rigidCacheRef);
|
||||
//PxReal contactDist = convexShape.contactOffset + trimeshShape.contactOffset;
|
||||
const PxReal contactDist = contactDistance[softbodyCacheRef] + contactDistance[rigidCacheRef];
|
||||
aabb.fattenFast(contactDist);
|
||||
|
||||
uint4* tStackPtr = reinterpret_cast<uint4*>(stackPtr);
|
||||
const PxU32 stackSize = stackSizeBytes / sizeof(uint4);
|
||||
bv32TreeTraversal<const SoftbodyTreeTraverser, WarpsPerBlock>(s_warpScratch->bv32PackedNodes, s_warpScratch->sBv32Nodes, SoftbodyTreeTraverser(s_warpScratch, aabb, stackSize, tStackPtr, midphasePairsNum, cmIdx));
|
||||
|
||||
/*if (threadIdx.x == 0 && nbPairsPerCM > 0)
|
||||
printf("nbPairsPerCM %i\n", nbPairsPerCM);*/
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__
|
||||
//__launch_bounds__(MIDPHASE_WARPS_PER_BLOCK * WARP_SIZE, 4)
|
||||
void sb_midphaseGeneratePairsLaunch(
|
||||
PxU32 numWorkItems,
|
||||
const PxReal toleranceLength,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxBounds3* PX_RESTRICT bounds,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
const PxgSoftBody* PX_RESTRICT softbodies,
|
||||
|
||||
const PxU32 stackSizeBytes,
|
||||
PxU8* PX_RESTRICT stackPtr, //output
|
||||
PxU32* PX_RESTRICT midphasePairsNum //output
|
||||
)
|
||||
{
|
||||
|
||||
__shared__ PxU32 scratchMem[MIDPHASE_WARPS_PER_BLOCK][WARP_SIZE * 7];
|
||||
|
||||
sbMidphaseCore<MIDPHASE_WARPS_PER_BLOCK>(
|
||||
numWorkItems,
|
||||
toleranceLength,
|
||||
cmInputs,
|
||||
transformCache,
|
||||
bounds,
|
||||
contactDistance,
|
||||
gpuShapes,
|
||||
softbodies,
|
||||
stackSizeBytes,
|
||||
stackPtr,
|
||||
midphasePairsNum,
|
||||
(sbMidphaseScratch*)scratchMem[threadIdx.y]
|
||||
);
|
||||
}
|
||||
|
||||
//struct sbParticleMidphaseScratch
|
||||
//{
|
||||
// const float4 * PX_RESTRICT tetmeshVerts;
|
||||
// const uint4 * PX_RESTRICT tetmeshTetIndices;
|
||||
// const PxU8* PX_RESTRICT tetmeshSurfaceHint;
|
||||
//
|
||||
// const Gu::BV32DataDepthInfo* PX_RESTRICT bv32DepthInfo;
|
||||
// const PxU32* PX_RESTRICT bv32RemapPackedNodeIndex;
|
||||
// //bv32 tree
|
||||
// Gu::BV32DataPacked* bv32PackedNodes;
|
||||
// int nbPackedNodes;
|
||||
// //statck for traversal
|
||||
// int sBv32Nodes[192]; //6 depth of the bv32 tree
|
||||
// PxU32 sBv32ActiveParticles[192];
|
||||
//};
|
||||
//PX_COMPILE_TIME_ASSERT(sizeof(sbParticleMidphaseScratch) <= WARP_SIZE * 15 * sizeof(PxU32));
|
||||
|
||||
|
||||
|
||||
|
||||
struct SoftbodyBoxTraverser
|
||||
{
|
||||
femMidphaseScratch* PX_RESTRICT s_warpScratch;
|
||||
const PxBounds3 mBox;
|
||||
const PxU32 queryIndex;
|
||||
const PxU32 cmIdx;
|
||||
const PxU32 stackSize;
|
||||
uint4* PX_RESTRICT stackPtr; //output
|
||||
PxU32* midphasePairs; //output
|
||||
|
||||
PX_FORCE_INLINE __device__ SoftbodyBoxTraverser(
|
||||
femMidphaseScratch* PX_RESTRICT s_warpScratch,
|
||||
const PxVec3 boxMin,
|
||||
const PxVec3 boxMax,
|
||||
const PxU32 queryIndex,
|
||||
const PxU32 cmIdx,
|
||||
const PxU32 stackSize,
|
||||
uint4* PX_RESTRICT stackPtr, //output
|
||||
PxU32* midphasePairs) //output
|
||||
: s_warpScratch(s_warpScratch), mBox(boxMin, boxMax), queryIndex(queryIndex), cmIdx(cmIdx),
|
||||
stackSize(stackSize), stackPtr(stackPtr), midphasePairs(midphasePairs)
|
||||
{ }
|
||||
|
||||
PX_FORCE_INLINE __device__ void intersectPrimitiveFullWarp(PxU32 primitiveIndex, PxU32 idxInWarp) const
|
||||
{
|
||||
bool intersect = false;
|
||||
if (primitiveIndex != 0xFFFFFFFF)
|
||||
{
|
||||
uint4 tetIdx = s_warpScratch->meshVertsIndices[primitiveIndex];
|
||||
|
||||
const PxVec3 worldV0 = PxLoad3(s_warpScratch->meshVerts[tetIdx.x]);
|
||||
const PxVec3 worldV1 = PxLoad3(s_warpScratch->meshVerts[tetIdx.y]);
|
||||
const PxVec3 worldV2 = PxLoad3(s_warpScratch->meshVerts[tetIdx.z]);
|
||||
const PxVec3 worldV3 = PxLoad3(s_warpScratch->meshVerts[tetIdx.w]);
|
||||
|
||||
if (isValidTet(worldV0, worldV1, worldV2, worldV3))
|
||||
{
|
||||
intersect = tetBoundingBox(worldV0, worldV1, worldV2, worldV3).intersects(mBox);
|
||||
}
|
||||
}
|
||||
|
||||
pushOntoStackFullWarp(intersect, midphasePairs, stackPtr, stackSize, make_uint4(cmIdx, queryIndex, primitiveIndex, 0));
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ bool intersectBoxFullWarp(bool hasBox, const PxVec3& min, const PxVec3& max) const
|
||||
{
|
||||
if (hasBox)
|
||||
return mBox.intersects(PxBounds3(min, max));
|
||||
else
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
extern "C" __global__
|
||||
//__launch_bounds__(MIDPHASE_WARPS_PER_BLOCK * WARP_SIZE, 4)
|
||||
void sb_psMidphaseGeneratePairsLaunch(
|
||||
PxU32 numWorkItems,
|
||||
const PxReal toleranceLength,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxBounds3* PX_RESTRICT bounds,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxgSoftBody* PX_RESTRICT softbodies,
|
||||
PxgParticleSystem* PX_RESTRICT particleSystems,
|
||||
|
||||
const PxU32 stackSizeBytes,
|
||||
PxU8* PX_RESTRICT stackPtr, //output
|
||||
PxU32* PX_RESTRICT midphasePairsNum //output
|
||||
)
|
||||
{
|
||||
__shared__ __align__(16) PxU8 particleSystemMemory[sizeof(PxgParticleSystem)];
|
||||
PxgParticleSystem& sParticleSystem = *(reinterpret_cast<PxgParticleSystem*>(particleSystemMemory));
|
||||
__shared__ femMidphaseScratch scratchMem[MIDPHASE_WARPS_PER_BLOCK];
|
||||
|
||||
femMidphaseScratch* s_warpScratch = &scratchMem[threadIdx.y];
|
||||
|
||||
if (blockIdx.y < numWorkItems)
|
||||
{
|
||||
//const PxU32 globalThreadIdx = threadIdx.y * WARP_SIZE + threadIdx.x + blockIdx.x*blockDim.x*blockDim.y;
|
||||
const PxU32 globalWarpIdx = threadIdx.y + blockIdx.x*blockDim.y;
|
||||
|
||||
PxgShape particleShape, softbodyShape;
|
||||
PxU32 particleCacheRef, softbodyCacheRef;
|
||||
LoadShapePairWarp<PxGeometryType::ePARTICLESYSTEM, PxGeometryType::eTETRAHEDRONMESH>(cmInputs, blockIdx.y, gpuShapes,
|
||||
particleShape, particleCacheRef, softbodyShape, softbodyCacheRef);
|
||||
|
||||
const PxU32 particleSystemId = particleShape.particleOrSoftbodyId;
|
||||
|
||||
//each thread read 16 byte
|
||||
uint4* srcParticleSystem = reinterpret_cast<uint4*>(&particleSystems[particleSystemId]);
|
||||
uint4* dstParticleSystem = reinterpret_cast<uint4*>(&sParticleSystem);
|
||||
warpCopy<uint4>(dstParticleSystem, srcParticleSystem, sizeof(PxgParticleSystem));
|
||||
|
||||
|
||||
const PxU32 softbodyId = softbodyShape.particleOrSoftbodyId;
|
||||
const PxgSoftBody& softbody = softbodies[softbodyId];
|
||||
|
||||
PxU8 * tetmeshGeomPtr = reinterpret_cast<PxU8 *>(softbody.mTetMeshData);
|
||||
|
||||
//const uint4 nbVerts_nbTets_maxDepth_nbBv32TreeNodes = *reinterpret_cast<const uint4 *>(tetmeshGeomPtr);
|
||||
tetmeshGeomPtr += sizeof(uint4);
|
||||
|
||||
/*if (workIndex == 0 && threadIdx.x == 0)
|
||||
printf("particleSystemId %i softbodyId %i\n", particleSystemId, softbodyId);*/
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
s_warpScratch->meshVerts = softbody.mPosition_InvMass;
|
||||
s_warpScratch->meshVertsIndices = softbody.mTetIndices;
|
||||
|
||||
Gu::BV32DataPacked* bv32PackedNodes = reinterpret_cast<Gu::BV32DataPacked*>(tetmeshGeomPtr);
|
||||
s_warpScratch->bv32PackedNodes = bv32PackedNodes;
|
||||
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
PxReal cDistance = contactDistance[particleCacheRef] + contactDistance[softbodyCacheRef];
|
||||
|
||||
//const PxU32 threadBaseAddress = globalThreadIdx & (~31);
|
||||
|
||||
const PxU32 nbParticles = sParticleSystem.mCommonData.mNumParticles;
|
||||
|
||||
//const PxU32 NbThreads = blockDim.x*blockDim.y*gridDim.x;
|
||||
const PxU32 NbWarps = blockDim.y*gridDim.x;
|
||||
|
||||
float4* sortedPose = reinterpret_cast<float4*>(sParticleSystem.mSortedPositions_InvMass);
|
||||
|
||||
uint4* tStackPtr = reinterpret_cast<uint4*>(stackPtr);
|
||||
const PxU32 stackSize = stackSizeBytes / sizeof(uint4);
|
||||
|
||||
//for (PxU32 i = threadBaseAddress, particleIndex = globalThreadIdx; i < nbParticles; i+= NbThreads, particleIndex += NbThreads)
|
||||
for (PxU32 i = globalWarpIdx; i < nbParticles; i += NbWarps)
|
||||
{
|
||||
float4 pos = make_float4(0.f);
|
||||
//PxU32 particleIndex = 0xFFFFFFFF;
|
||||
|
||||
//if(threadIdx.x == 0)
|
||||
PxU32 particleIndex = i;
|
||||
pos = sortedPose[particleIndex];
|
||||
|
||||
|
||||
/*if (particleIndex < nbParticles)
|
||||
{
|
||||
pos = sortedPose[particleIndex];
|
||||
}*/
|
||||
|
||||
const PxVec3 particlePos(pos.x, pos.y, pos.z);
|
||||
|
||||
bv32TreeTraversal<const SoftbodyBoxTraverser, MIDPHASE_WARPS_PER_BLOCK>(s_warpScratch->bv32PackedNodes, s_warpScratch->sBv32Nodes, SoftbodyBoxTraverser(s_warpScratch, particlePos - PxVec3(cDistance), particlePos + PxVec3(cDistance), particleIndex,
|
||||
blockIdx.y, stackSize, tStackPtr, midphasePairsNum));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<unsigned int WarpsPerBlock>
|
||||
__device__ static inline void sb_clothVertMidphaseCore(
|
||||
const PxReal toleranceLength,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxBounds3* PX_RESTRICT bounds,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
const PxgSoftBody* PX_RESTRICT softbodies,
|
||||
const PxgFEMCloth* PX_RESTRICT clothes,
|
||||
const PxU32 stackSizeBytes,
|
||||
const PxReal nbCollisionPairUpdatesPerTimestep,
|
||||
PxU8* PX_RESTRICT stackPtr, //output
|
||||
PxU32* PX_RESTRICT midphasePairsNum, //output
|
||||
femMidphaseScratch* s_warpScratch
|
||||
)
|
||||
{
|
||||
const PxU32 cmIdx = blockIdx.y;
|
||||
|
||||
// each block deals with one pair
|
||||
{
|
||||
const PxU32 globalWarpIdx = threadIdx.y + blockIdx.x*blockDim.y;
|
||||
|
||||
PxgShape softbodyShape, clothShape;
|
||||
PxU32 softbodyCacheRef, clothCacheRef;
|
||||
LoadShapePairWarp<PxGeometryType::eTETRAHEDRONMESH, PxGeometryType::eTRIANGLEMESH>(cmInputs, cmIdx, gpuShapes,
|
||||
softbodyShape, softbodyCacheRef, clothShape, clothCacheRef);
|
||||
|
||||
const PxgSoftBody& softbody = softbodies[softbodyShape.particleOrSoftbodyId];
|
||||
const PxgFEMCloth& cloth = clothes[clothShape.particleOrSoftbodyId];
|
||||
|
||||
PxU8 * tetmeshGeomPtr = reinterpret_cast<PxU8 *>(softbody.mTetMeshData);
|
||||
tetmeshGeomPtr += sizeof(uint4);
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
s_warpScratch->meshVerts = softbody.mPosition_InvMass;
|
||||
s_warpScratch->meshVertsIndices = softbody.mTetIndices;
|
||||
|
||||
Gu::BV32DataPacked* bv32PackedNodes = reinterpret_cast<Gu::BV32DataPacked*>(tetmeshGeomPtr);
|
||||
s_warpScratch->bv32PackedNodes = bv32PackedNodes;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
// if cloth collision pair is updated more than once per time step, early-out the influence of extended
|
||||
// cloth bounding box in broad phase.
|
||||
if (nbCollisionPairUpdatesPerTimestep > 1.f)
|
||||
{
|
||||
PxBounds3 clothBB, softbodyBB;
|
||||
PxBounds3_ReadWarp(clothBB, bounds + clothCacheRef);
|
||||
PxBounds3_ReadWarp(softbodyBB, bounds + softbodyCacheRef);
|
||||
|
||||
clothBB.minimum += (nbCollisionPairUpdatesPerTimestep - 1.f) * PxVec3(contactDistance[clothCacheRef]);
|
||||
clothBB.maximum -= (nbCollisionPairUpdatesPerTimestep - 1.f) * PxVec3(contactDistance[clothCacheRef]);
|
||||
|
||||
if (!clothBB.intersects(softbodyBB)) return;
|
||||
}
|
||||
|
||||
const PxReal cDistance = contactDistance[softbodyCacheRef] + contactDistance[clothCacheRef];
|
||||
|
||||
const PxU32 NbWarps = blockDim.y*gridDim.x;
|
||||
|
||||
const PxU32 nbVerts = cloth.mNbVerts;
|
||||
|
||||
const float4* positions = cloth.mPosition_InvMass;
|
||||
|
||||
uint4* tStackPtr = reinterpret_cast<uint4*>(stackPtr);
|
||||
const PxU32 stackSize = stackSizeBytes / sizeof(uint4);
|
||||
|
||||
for (PxU32 i = globalWarpIdx; i < nbVerts; i += NbWarps)
|
||||
{
|
||||
const float4 tPos = positions[i];
|
||||
|
||||
const PxVec3 pos(tPos.x, tPos.y, tPos.z);
|
||||
|
||||
bv32TreeTraversal<const SoftbodyBoxTraverser, WarpsPerBlock>(s_warpScratch->bv32PackedNodes, s_warpScratch->sBv32Nodes, SoftbodyBoxTraverser(s_warpScratch, pos - PxVec3(cDistance), pos + PxVec3(cDistance), i,
|
||||
blockIdx.y, stackSize, tStackPtr, midphasePairsNum));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__
|
||||
//__launch_bounds__(MIDPHASE_WARPS_PER_BLOCK * WARP_SIZE, 4)
|
||||
void sb_clothVertMidphaseGeneratePairsLaunch(
|
||||
const PxReal toleranceLength,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxBounds3* PX_RESTRICT bounds,
|
||||
const PxReal* PX_RESTRICT contactDistance,
|
||||
const PxgShape* PX_RESTRICT gpuShapes,
|
||||
const PxgSoftBody* PX_RESTRICT softbodies,
|
||||
const PxgFEMCloth* PX_RESTRICT clothes,
|
||||
const PxU32 stackSizeBytes,
|
||||
const PxReal nbCollisionPairUpdatesPerTimestep,
|
||||
PxU8* PX_RESTRICT stackPtr, //output
|
||||
PxU32* PX_RESTRICT midphasePairsNum //output
|
||||
)
|
||||
{
|
||||
__shared__ PxU32 scratchMem[MIDPHASE_WARPS_PER_BLOCK][WARP_SIZE * 18];
|
||||
|
||||
sb_clothVertMidphaseCore<MIDPHASE_WARPS_PER_BLOCK>(
|
||||
toleranceLength,
|
||||
cmInputs,
|
||||
transformCache,
|
||||
bounds,
|
||||
contactDistance,
|
||||
gpuShapes,
|
||||
softbodies,
|
||||
clothes,
|
||||
stackSizeBytes,
|
||||
nbCollisionPairUpdatesPerTimestep,
|
||||
stackPtr,
|
||||
midphasePairsNum,
|
||||
(femMidphaseScratch*)scratchMem[threadIdx.y]
|
||||
);
|
||||
}
|
||||
2623
engine/third_party/physx/source/gpunarrowphase/src/CUDA/softbodyPrimitives.cu
vendored
Normal file
2623
engine/third_party/physx/source/gpunarrowphase/src/CUDA/softbodyPrimitives.cu
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1475
engine/third_party/physx/source/gpunarrowphase/src/CUDA/softbodySoftbodyMidPhase.cu
vendored
Normal file
1475
engine/third_party/physx/source/gpunarrowphase/src/CUDA/softbodySoftbodyMidPhase.cu
vendored
Normal file
File diff suppressed because it is too large
Load Diff
840
engine/third_party/physx/source/gpunarrowphase/src/CUDA/sphereCollision.cuh
vendored
Normal file
840
engine/third_party/physx/source/gpunarrowphase/src/CUDA/sphereCollision.cuh
vendored
Normal file
@@ -0,0 +1,840 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __SPHERE_COLLISION_CUH__
|
||||
#define __SPHERE_COLLISION_CUH__
|
||||
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxTransform.h"
|
||||
#include "distanceSegmentSegment.cuh"
|
||||
|
||||
//0 is sphere, 1 is sphere, point = [x, y, z, pen];
|
||||
__device__ __forceinline__ static
|
||||
PxU32 spheresphere(const PxTransform& transform0, const PxTransform& transform1, const PxReal r0,
|
||||
const PxReal r1, const PxReal contactDist, float4& outPointPen, PxVec3& outNormal)
|
||||
{
|
||||
const PxVec3& p0 = transform0.p;
|
||||
const PxVec3& p1 = transform1.p;
|
||||
|
||||
const PxVec3 _delta = p0 - p1;
|
||||
const PxReal distanceSq = _delta.dot(_delta);
|
||||
const PxReal radiusSum = r0 + r1;
|
||||
const PxReal inflatedSum = radiusSum + contactDist;
|
||||
|
||||
if ((inflatedSum * inflatedSum) > distanceSq)
|
||||
{
|
||||
const PxReal eps = 1e-4f;
|
||||
const PxReal dist = PxSqrt(distanceSq);
|
||||
const PxVec3 normal = (eps >= dist) ? PxVec3(1.0f, 0.f, 0.f) : (_delta * (1.f / dist));
|
||||
const PxVec3 point = p1 + (normal * r1);
|
||||
const PxReal pen = dist - radiusSum;
|
||||
|
||||
outPointPen = make_float4(point.x, point.y, point.z, pen);
|
||||
outNormal = normal;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ static
|
||||
PxU32 spheresphere(const PxVec3& p0, const PxVec3& p1, const PxReal r0,
|
||||
const PxReal r1, const PxReal contactDist, float4& outNormalPen)
|
||||
{
|
||||
const PxVec3 _delta = p0 - p1;
|
||||
const PxReal distanceSq = _delta.dot(_delta);
|
||||
const PxReal radiusSum = r0 + r1;
|
||||
const PxReal inflatedSum = radiusSum + contactDist;
|
||||
|
||||
if ((inflatedSum * inflatedSum) > distanceSq)
|
||||
{
|
||||
const PxReal eps = 1e-4f;
|
||||
const PxReal dist = PxSqrt(distanceSq);
|
||||
const PxVec3 normal = (eps >= dist) ? PxVec3(1.0f, 0.f, 0.f) : (_delta * (1.f / dist));
|
||||
const PxReal pen = dist - radiusSum;
|
||||
|
||||
outNormalPen = make_float4(normal.x, normal.y, normal.z, pen);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ static
|
||||
PxReal distancePointSegmentSquared(const PxVec3 a, const PxVec3 b, const PxVec3 p, PxReal& param)
|
||||
{
|
||||
const PxVec3 ap = p - a;
|
||||
const PxVec3 ab = b - a;
|
||||
|
||||
const PxReal nom = ap.dot(ab);
|
||||
const PxReal denom = ab.dot(ab);
|
||||
const PxReal tValue = PxClamp(nom / denom, 0.f, 1.f);
|
||||
const PxReal t = denom > 0.f ? tValue : 0.f;
|
||||
const PxVec3 v = ap - (ab * t);
|
||||
param = t;
|
||||
return v.dot(v);
|
||||
}
|
||||
|
||||
//0 is sphere, 1 is capsule
|
||||
__device__ __forceinline__ static
|
||||
PxU32 spherecapsule(const PxTransform& transform0, const PxTransform& transform1, const PxReal sphereRadius,
|
||||
const PxReal capsuleRadius, const PxReal halfHeight, const PxReal contactDist, float4& outPointPen, PxVec3& outNormal)
|
||||
{
|
||||
|
||||
//Sphere in world space
|
||||
const PxVec3& sphereCenter = transform0.p;
|
||||
const PxQuat& q1 = transform1.q;
|
||||
const PxVec3& p1 = transform1.p;
|
||||
|
||||
const PxVec3 basisVector0 = q1.getBasisVector0();
|
||||
const PxVec3 tmp0 = basisVector0 * halfHeight;
|
||||
const PxVec3 s = p1 + tmp0;
|
||||
const PxVec3 e = p1 - tmp0;
|
||||
|
||||
const PxReal radiusSum = sphereRadius + capsuleRadius;
|
||||
const PxReal inflatedSum = radiusSum + contactDist;
|
||||
|
||||
PxReal t;
|
||||
const PxReal squareDist = distancePointSegmentSquared(s, e, sphereCenter, t);
|
||||
const PxReal sqInflatedSum = inflatedSum * inflatedSum;
|
||||
|
||||
// Collision detection
|
||||
if (sqInflatedSum > squareDist)
|
||||
{
|
||||
const PxVec3 p = s + (e - s) * t;//V3ScaleAdd(V3Sub(e, s), t, s);
|
||||
const PxVec3 dir = sphereCenter - p;// V3Sub(sphereCenter, p);
|
||||
|
||||
const PxReal length = PxSqrt(dir.magnitudeSquared());
|
||||
const PxVec3 normal = length > PX_EPS_REAL ? (dir * (1.f / length)) : PxVec3(1.0f, 0.f, 0.f);
|
||||
const PxVec3 point = sphereCenter - normal * sphereRadius;
|
||||
const PxReal pen = PxSqrt(squareDist) - radiusSum;
|
||||
|
||||
outPointPen = make_float4(point.x, point.y, point.z, pen);
|
||||
outNormal = normal;
|
||||
|
||||
/*printf("outPoint(%f, %f, %f), pen %f\n", outPointPen.x, outPointPen.y, outPointPen.z, outPointPen.w);
|
||||
printf("outNormal(%f, %f, %f)\n", outNormal.x, outNormal.y, outNormal.z, 0.f);*/
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
//0 is sphere, 1 is capsule
|
||||
__device__ __forceinline__ static
|
||||
PxU32 spherecapsule(const PxTransform& transform0, const PxTransform& transform1, const PxReal sphereRadius,
|
||||
const PxReal capsuleRadius, const PxReal halfHeight, const PxReal contactDist, float4& outNormalPen, PxReal* outT = NULL)
|
||||
{
|
||||
|
||||
//Sphere in world space
|
||||
const PxVec3& sphereCenter = transform0.p;
|
||||
const PxQuat& q1 = transform1.q;
|
||||
const PxVec3& p1 = transform1.p;
|
||||
|
||||
const PxVec3 basisVector0 = q1.getBasisVector0();
|
||||
const PxVec3 tmp0 = basisVector0 * halfHeight;
|
||||
const PxVec3 s = p1 + tmp0;
|
||||
const PxVec3 e = p1 - tmp0;
|
||||
|
||||
const PxReal radiusSum = sphereRadius + capsuleRadius;
|
||||
const PxReal inflatedSum = radiusSum + contactDist;
|
||||
|
||||
PxReal t;
|
||||
const PxReal squareDist = distancePointSegmentSquared(s, e, sphereCenter, t);
|
||||
const PxReal sqInflatedSum = inflatedSum * inflatedSum;
|
||||
if (outT != NULL)
|
||||
{
|
||||
*outT = 1.0f - t;
|
||||
}
|
||||
|
||||
// Collision detection
|
||||
if (sqInflatedSum > squareDist)
|
||||
{
|
||||
const PxVec3 p = s + (e - s) * t;//V3ScaleAdd(V3Sub(e, s), t, s);
|
||||
const PxVec3 dir = sphereCenter - p;// V3Sub(sphereCenter, p);
|
||||
|
||||
const PxReal length = PxSqrt(dir.magnitudeSquared());
|
||||
const PxVec3 normal = length > PX_EPS_REAL ? (dir * (1.f / length)) : PxVec3(1.0f, 0.f, 0.f);
|
||||
const PxReal pen = PxSqrt(squareDist) - radiusSum;
|
||||
|
||||
outNormalPen = make_float4(normal.x, normal.y, normal.z, pen);
|
||||
|
||||
/*printf("outPoint(%f, %f, %f), pen %f\n", outPointPen.x, outPointPen.y, outPointPen.z, outPointPen.w);
|
||||
printf("outNormal(%f, %f, %f)\n", outNormal.x, outNormal.y, outNormal.z, 0.f);*/
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
//0 is sphere, 1 is plane
|
||||
__device__ __forceinline__ static
|
||||
PxU32 sphereplane(const PxTransform& transform0, const PxTransform& transform1, const PxReal sphereRadius, const PxReal contactDist,
|
||||
float4& outPointPen, PxVec3& outNormal)
|
||||
{
|
||||
//sphere transform
|
||||
const PxVec3& p0 = transform0.p;
|
||||
const PxVec3& p1 = transform1.p;
|
||||
const PxQuat& q1 = transform1.q;
|
||||
|
||||
const PxTransform transf1(p1, q1);
|
||||
//Sphere in plane space
|
||||
const PxVec3 sphereCenterInPlaneSpace = transf1.transformInv(p0);
|
||||
|
||||
//separation / penetration
|
||||
const PxReal separation = sphereCenterInPlaneSpace.x - sphereRadius;// FSub(V3GetX(sphereCenterInPlaneSpace), radius);
|
||||
|
||||
if (contactDist >= separation)
|
||||
{
|
||||
//get the plane normal
|
||||
const PxVec3 worldNormal = q1.getBasisVector0();
|
||||
const PxVec3 worldPoint = p0 - worldNormal * sphereRadius;//V3NegScaleSub(worldNormal, radius, p0);
|
||||
|
||||
outPointPen = make_float4(worldPoint.x, worldPoint.y, worldPoint.z, separation);
|
||||
outNormal = worldNormal;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
//0 is sphere, 1 is plane
|
||||
__device__ __forceinline__ static
|
||||
PxU32 sphereplane(const PxTransform& transform0, const PxTransform& transform1, const PxReal sphereRadius, const PxReal contactDist,
|
||||
float4& outNormalPen)
|
||||
{
|
||||
//sphere transform
|
||||
const PxVec3& p0 = transform0.p;
|
||||
const PxVec3& p1 = transform1.p;
|
||||
const PxQuat& q1 = transform1.q;
|
||||
|
||||
const PxTransform transf1(p1, q1);
|
||||
//Sphere in plane space
|
||||
const PxVec3 sphereCenterInPlaneSpace = transf1.transformInv(p0);
|
||||
|
||||
//separation / penetration
|
||||
const PxReal separation = sphereCenterInPlaneSpace.x - sphereRadius;// FSub(V3GetX(sphereCenterInPlaneSpace), radius);
|
||||
|
||||
if (contactDist >= separation)
|
||||
{
|
||||
//get the plane normal
|
||||
const PxVec3 worldNormal = q1.getBasisVector0();
|
||||
outNormalPen = make_float4(worldNormal.x, worldNormal.y, worldNormal.z, separation);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
//0 is sphere, 1 is box
|
||||
__device__ __forceinline__ static
|
||||
PxU32 spherebox(const PxTransform& transform0, const PxTransform& transform1, const PxReal sphereRadius,
|
||||
const PxVec3 boxExtents, const PxReal contactDist, float4& outPointPen, PxVec3& outNormal)
|
||||
{
|
||||
const PxVec3& sphereOrigin = transform0.p;
|
||||
|
||||
//const PxQuat& q1 = transform1.q;
|
||||
//const PxVec3& p1 = transform1.p;
|
||||
|
||||
//const PxTransform transf1(p1, q1);
|
||||
|
||||
//translate sphere center into the box space
|
||||
const PxVec3 sphereCenter = transform1.transformInv(sphereOrigin);
|
||||
|
||||
const PxVec3 nBoxExtents = -boxExtents;
|
||||
|
||||
const PxReal inflatedSum = sphereRadius + contactDist;
|
||||
const PxReal sqInflatedSum = inflatedSum * inflatedSum;
|
||||
|
||||
const PxReal x = PxClamp(sphereCenter.x, nBoxExtents.x, boxExtents.x);
|
||||
const PxReal y = PxClamp(sphereCenter.y, nBoxExtents.y, boxExtents.y);
|
||||
const PxReal z = PxClamp(sphereCenter.z, nBoxExtents.z, boxExtents.z);
|
||||
const PxVec3 p(x, y, z);
|
||||
const PxVec3 v = sphereCenter - p;
|
||||
const PxReal lengthSq = v.dot(v);
|
||||
|
||||
if (sqInflatedSum > lengthSq)//intersect
|
||||
{
|
||||
//check whether the spherCenter is inside the box
|
||||
const bool bX = boxExtents.x >= PxAbs(sphereCenter.x);
|
||||
const bool bY = boxExtents.y >= PxAbs(sphereCenter.y);
|
||||
const bool bZ = boxExtents.z >= PxAbs(sphereCenter.z);
|
||||
|
||||
const bool bInsideBox = bX && bY && bZ;// V3IsGrtrOrEq(boxExtents, V3Abs(sphereCenter));
|
||||
if (bInsideBox)//sphere center inside the box
|
||||
{
|
||||
//Pick directions and sign
|
||||
const PxVec3 absP(PxAbs(p.x), PxAbs(p.y), PxAbs(p.z));
|
||||
const PxVec3 distToSurface = boxExtents - absP;//dist from embedded center to box surface along 3 dimensions.
|
||||
|
||||
//assume z is the smallest elemtn of the distToSurface
|
||||
PxVec3 localNorm = p.z >= 0.f ? PxVec3(0.f, 0.f, 1.f) : PxVec3(0.f, 0.f, -1.f);
|
||||
PxReal dist = -distToSurface.z;
|
||||
//find smallest element of distToSurface
|
||||
if (distToSurface.x <= distToSurface.y && distToSurface.x <= distToSurface.z)
|
||||
{
|
||||
localNorm = p.x >= 0.f ? PxVec3(1.f, 0.f, 0.f) : PxVec3(-1.0f, 0.f, 0.f);
|
||||
dist = -distToSurface.x;
|
||||
}
|
||||
else if (distToSurface.y <= distToSurface.z)
|
||||
{
|
||||
localNorm = p.y >= 0.f ? PxVec3(0.f, 1.f, 0.f) : PxVec3(0.f, -1.f, 0.f);
|
||||
dist = -distToSurface.y;
|
||||
}
|
||||
/* printf("p(%f, %f, %f)\n", p.x, p.y, p.z);
|
||||
printf("distToSurface(%f, %f, %f)\n", distToSurface.x, distToSurface.y, distToSurface.z);
|
||||
printf("dist %f\n", dist);*/
|
||||
|
||||
//separation so far is just the embedding of the center point; we still have to push out all of the radius.
|
||||
const PxVec3 normal = transform1.rotate(localNorm);
|
||||
const PxReal penetration = dist - sphereRadius;
|
||||
const PxVec3 point = sphereOrigin - normal * dist;
|
||||
|
||||
outPointPen = make_float4(point.x, point.y, point.z, penetration);
|
||||
outNormal = normal;
|
||||
|
||||
//printf("Inside the box\n");
|
||||
|
||||
/*printf("pentration %f\n", penetration);
|
||||
printf("tP(%f, %f, %f), tQ(%f, %f, %f, %f)\n", transf1.p.x, transf1.p.y, transf1.p.z, transf1.q.x, transf1.q.y, transf1.q.z, transf1.q.w);
|
||||
|
||||
printf("outPoint(%f, %f, %f), pen %f\n", outPointPen.x, outPointPen.y, outPointPen.z, outPointPen.w);
|
||||
printf("outNormal(%f, %f, %f)\n", outNormal.x, outNormal.y, outNormal.z, 0.f);*/
|
||||
}
|
||||
else
|
||||
{
|
||||
//get the closest point from the center to the box surface
|
||||
const PxReal recipLength = PxRecipSqrt(lengthSq);
|
||||
const PxReal length = 1 / recipLength;
|
||||
const PxVec3 locNorm = v * recipLength;
|
||||
const PxReal penetration = length - sphereRadius;
|
||||
const PxVec3 normal = transform1.rotate(locNorm);
|
||||
const PxVec3 point = transform1.transform(p);
|
||||
|
||||
outPointPen = make_float4(point.x, point.y, point.z, penetration);
|
||||
outNormal = normal;
|
||||
|
||||
|
||||
|
||||
/*printf("recipLength %f, length %f \n", recipLength, length);
|
||||
printf("locNorm(%f, %f, %f)\n", locNorm.x, locNorm.y, locNorm.z);
|
||||
printf("pentration %f\n", penetration);
|
||||
printf("tP(%f, %f, %f), tQ(%f, %f, %f, %f)\n", transf1.p.x, transf1.p.y, transf1.p.z, transf1.q.x, transf1.q.y, transf1.q.z, transf1.q.w);
|
||||
|
||||
printf("outPoint(%f, %f, %f), pen %f\n", outPointPen.x, outPointPen.y, outPointPen.z, outPointPen.w);
|
||||
printf("outNormal(%f, %f, %f)\n", outNormal.x, outNormal.y, outNormal.z, 0.f);*/
|
||||
|
||||
|
||||
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
//printf("Not intersect! \n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
//0 is sphere, 1 is box
|
||||
__device__ __forceinline__ static
|
||||
PxU32 spherebox(const PxVec3& sphereOrigin, const PxTransform& transform1, const PxReal sphereRadius,
|
||||
const PxVec3 boxExtents, const PxReal contactDist, float4& outNormalPen)
|
||||
{
|
||||
/*const PxVec3& sphereOrigin = transform0.p;
|
||||
|
||||
const PxQuat& q1 = transform1.q;
|
||||
const PxVec3& p1 = transform1.p;
|
||||
|
||||
const PxTransform transf1(p1, q1);*/
|
||||
|
||||
//translate sphere center into the box space
|
||||
const PxVec3 sphereCenter = transform1.transformInv(sphereOrigin);
|
||||
|
||||
const PxVec3 nBoxExtents = -boxExtents;
|
||||
|
||||
const PxReal inflatedSum = sphereRadius + contactDist;
|
||||
const PxReal sqInflatedSum = inflatedSum * inflatedSum;
|
||||
|
||||
const PxReal x = PxClamp(sphereCenter.x, nBoxExtents.x, boxExtents.x);
|
||||
const PxReal y = PxClamp(sphereCenter.y, nBoxExtents.y, boxExtents.y);
|
||||
const PxReal z = PxClamp(sphereCenter.z, nBoxExtents.z, boxExtents.z);
|
||||
const PxVec3 p(x, y, z);
|
||||
const PxVec3 v = sphereCenter - p;
|
||||
const PxReal lengthSq = v.dot(v);
|
||||
|
||||
if (sqInflatedSum > lengthSq)//intersect
|
||||
{
|
||||
//check whether the spherCenter is inside the box
|
||||
const bool bX = boxExtents.x >= PxAbs(sphereCenter.x);
|
||||
const bool bY = boxExtents.y >= PxAbs(sphereCenter.y);
|
||||
const bool bZ = boxExtents.z >= PxAbs(sphereCenter.z);
|
||||
|
||||
const bool bInsideBox = bX && bY && bZ;// V3IsGrtrOrEq(boxExtents, V3Abs(sphereCenter));
|
||||
if (bInsideBox)//sphere center inside the box
|
||||
{
|
||||
//Pick directions and sign
|
||||
const PxVec3 absP(PxAbs(p.x), PxAbs(p.y), PxAbs(p.z));
|
||||
const PxVec3 distToSurface = boxExtents - absP;//dist from embedded center to box surface along 3 dimensions.
|
||||
|
||||
//assume z is the smallest elemtn of the distToSurface
|
||||
PxVec3 localNorm = p.z >= 0.f ? PxVec3(0.f, 0.f, 1.f) : PxVec3(0.f, 0.f, -1.f);
|
||||
PxReal dist = -distToSurface.z;
|
||||
//find smallest element of distToSurface
|
||||
if (distToSurface.x <= distToSurface.y && distToSurface.x <= distToSurface.z)
|
||||
{
|
||||
localNorm = p.x >= 0.f ? PxVec3(1.f, 0.f, 0.f) : PxVec3(-1.0f, 0.f, 0.f);
|
||||
dist = -distToSurface.x;
|
||||
}
|
||||
else if (distToSurface.y <= distToSurface.z)
|
||||
{
|
||||
localNorm = p.y >= 0.f ? PxVec3(0.f, 1.f, 0.f) : PxVec3(0.f, -1.f, 0.f);
|
||||
dist = -distToSurface.y;
|
||||
}
|
||||
|
||||
//separation so far is just the embedding of the center point; we still have to push out all of the radius.
|
||||
const PxVec3 normal = transform1.rotate(localNorm);
|
||||
const PxReal penetration = dist - sphereRadius;
|
||||
|
||||
outNormalPen = make_float4(normal.x, normal.y, normal.z, penetration);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
//get the closest point from the center to the box surface
|
||||
const PxReal recipLength = PxRecipSqrt(lengthSq);
|
||||
const PxReal length = 1 / recipLength;
|
||||
const PxVec3 locNorm = v * recipLength;
|
||||
const PxReal penetration = length - sphereRadius;
|
||||
const PxVec3 normal = transform1.rotate(locNorm);
|
||||
|
||||
outNormalPen = make_float4(normal.x, normal.y, normal.z, penetration);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
//0 is sphere, 1 is box(sphere is in the local space of box), normal is the in the local space of box
|
||||
__device__ __forceinline__ static
|
||||
PxU32 spherebox(const PxVec3& sphereCenter, const PxReal sphereRadius,
|
||||
const PxVec3 boxExtents, const PxReal contactDist, float4& outNormalPen)
|
||||
{
|
||||
|
||||
const PxVec3 nBoxExtents = -boxExtents;
|
||||
|
||||
const PxReal inflatedSum = sphereRadius + contactDist;
|
||||
const PxReal sqInflatedSum = inflatedSum * inflatedSum;
|
||||
|
||||
const PxReal x = PxClamp(sphereCenter.x, nBoxExtents.x, boxExtents.x);
|
||||
const PxReal y = PxClamp(sphereCenter.y, nBoxExtents.y, boxExtents.y);
|
||||
const PxReal z = PxClamp(sphereCenter.z, nBoxExtents.z, boxExtents.z);
|
||||
const PxVec3 p(x, y, z);
|
||||
const PxVec3 v = sphereCenter - p;
|
||||
const PxReal lengthSq = v.dot(v);
|
||||
|
||||
if (sqInflatedSum > lengthSq)//intersect
|
||||
{
|
||||
//check whether the spherCenter is inside the box
|
||||
const bool bX = boxExtents.x >= PxAbs(sphereCenter.x);
|
||||
const bool bY = boxExtents.y >= PxAbs(sphereCenter.y);
|
||||
const bool bZ = boxExtents.z >= PxAbs(sphereCenter.z);
|
||||
|
||||
const bool bInsideBox = bX && bY && bZ;// V3IsGrtrOrEq(boxExtents, V3Abs(sphereCenter));
|
||||
if (bInsideBox)//sphere center inside the box
|
||||
{
|
||||
//Pick directions and sign
|
||||
const PxVec3 absP(PxAbs(p.x), PxAbs(p.y), PxAbs(p.z));
|
||||
const PxVec3 distToSurface = boxExtents - absP;//dist from embedded center to box surface along 3 dimensions.
|
||||
|
||||
//assume z is the smallest elemtn of the distToSurface
|
||||
PxVec3 localNorm = p.z >= 0.f ? PxVec3(0.f, 0.f, 1.f) : PxVec3(0.f, 0.f, -1.f);
|
||||
PxReal dist = -distToSurface.z;
|
||||
//find smallest element of distToSurface
|
||||
if (distToSurface.x <= distToSurface.y && distToSurface.x <= distToSurface.z)
|
||||
{
|
||||
localNorm = p.x >= 0.f ? PxVec3(1.f, 0.f, 0.f) : PxVec3(-1.0f, 0.f, 0.f);
|
||||
dist = -distToSurface.x;
|
||||
}
|
||||
else if (distToSurface.y <= distToSurface.z)
|
||||
{
|
||||
localNorm = p.y >= 0.f ? PxVec3(0.f, 1.f, 0.f) : PxVec3(0.f, -1.f, 0.f);
|
||||
dist = -distToSurface.y;
|
||||
}
|
||||
|
||||
//separation so far is just the embedding of the center point; we still have to push out all of the radius.
|
||||
const PxReal penetration = dist - sphereRadius;
|
||||
|
||||
outNormalPen = make_float4(localNorm.x, localNorm.y, localNorm.z, penetration);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
//get the closest point from the center to the box surface
|
||||
const PxReal recipLength = PxRecipSqrt(lengthSq);
|
||||
const PxReal length = 1 / recipLength;
|
||||
const PxVec3 locNorm = v * recipLength;
|
||||
const PxReal penetration = length - sphereRadius;
|
||||
|
||||
outNormalPen = make_float4(locNorm.x, locNorm.y, locNorm.z, penetration);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//0 is plane, 1 is capsule
|
||||
__device__ __forceinline__ static
|
||||
PxU32 planeCapsule(const PxTransform& transform0, const PxTransform& transform1,
|
||||
const PxReal radius, const PxReal halfHeight, const PxReal contactDist,
|
||||
float4 *outPointPen, PxVec3& outNormal, PxReal* t = NULL)
|
||||
{
|
||||
//capsule to plane
|
||||
const PxTransform bToA = transform0.transformInv(transform1);
|
||||
|
||||
//in world space
|
||||
const PxVec3 planeNormal = transform0.q.getBasisVector0().getNormalized();
|
||||
//const PxVec3 contactNormal =-planeNormal;
|
||||
|
||||
outNormal = -planeNormal;
|
||||
//unit x
|
||||
const PxVec3 localNormal(1.f, 0.f, 0.f);
|
||||
|
||||
//printf("outNormal (%f, %f, %f)\n", outNormal.x, outNormal.y, outNormal.z);
|
||||
|
||||
//capsule is in the local space of plane(n = (1.f, 0.f, 0.f), d=0.f)
|
||||
const PxVec3 basisVector = bToA.q.getBasisVector0();
|
||||
const PxVec3 tmp = basisVector * halfHeight;
|
||||
const PxVec3 s = bToA.p + tmp;
|
||||
const PxVec3 e = bToA.p - tmp;
|
||||
|
||||
const PxReal inflatedRadius = radius + contactDist;
|
||||
PxU32 numContacts = 0;
|
||||
const PxReal signDist0 = s.x;//V3Dot(localNormal, s);
|
||||
if (inflatedRadius >= signDist0)
|
||||
{
|
||||
const PxVec3 worldPoint = transform0.transform(s) - planeNormal * radius;
|
||||
const PxReal pen = signDist0 - radius;
|
||||
//printf("s localPoint(%f, %f, %f) pen %f\n", localPoint.x, localPoint.y, localPoint.z, pen);
|
||||
outPointPen[numContacts] = make_float4(worldPoint.x, worldPoint.y, worldPoint.z, pen);
|
||||
numContacts++;
|
||||
}
|
||||
|
||||
const PxReal signDist1 = e.x;//V3Dot(localNormal, s);
|
||||
if (inflatedRadius >= signDist1)
|
||||
{
|
||||
const PxVec3 worldPoint = transform0.transform(e) - planeNormal * radius;
|
||||
const PxReal pen = signDist1 - radius;
|
||||
//printf("e worldPoint(%f, %f, %f) pen %f\n", worldPoint.x, worldPoint.y, worldPoint.z, pen);
|
||||
//printf("e normal(%f, %f, %f)\n", outNormal.x, outNormal.y, outNormal.z);
|
||||
outPointPen[numContacts] = make_float4(worldPoint.x, worldPoint.y, worldPoint.z, pen);
|
||||
numContacts++;
|
||||
}
|
||||
|
||||
if (t != NULL)
|
||||
{
|
||||
if (contactDist > 0.0f)
|
||||
{
|
||||
// *t = PxClamp(0.5f * (signDist1 - signDist0) / contactDist + 0.5f, 0.0f, 1.0f);
|
||||
*t = 0.5f + 0.5f * tanh(5.0f * (signDist1 - signDist0) / contactDist);
|
||||
}
|
||||
else
|
||||
{
|
||||
*t = signDist0 > signDist1 ? 0.0f : 1.0f;
|
||||
}
|
||||
}
|
||||
|
||||
return numContacts;
|
||||
}
|
||||
|
||||
|
||||
__device__ __forceinline__ static
|
||||
void pcmDistancePointSegmentTValue22(
|
||||
const PxVec3& a0, const PxVec3& b0,
|
||||
const PxVec3& a1, const PxVec3& b1,
|
||||
const PxVec3& p0, const PxVec3& p1,
|
||||
const PxVec3& p2, const PxVec3& p3,
|
||||
PxReal& t0, PxReal& t1,
|
||||
PxReal& t2, PxReal& t3)
|
||||
{
|
||||
|
||||
const PxVec3 ap00 = p0 - a0;
|
||||
const PxVec3 ap10 = p1 - a0;
|
||||
const PxVec3 ap01 = p2 - a1;
|
||||
const PxVec3 ap11 = p3 - a1;
|
||||
|
||||
const PxVec3 ab0 = b0 - a0;
|
||||
const PxVec3 ab1 = b1 - a1;
|
||||
|
||||
/* const FloatV nom00 = V3Dot(ap00, ab0);
|
||||
const FloatV nom10 = V3Dot(ap10, ab0);
|
||||
const FloatV nom01 = V3Dot(ap01, ab1);
|
||||
const FloatV nom11 = V3Dot(ap11, ab1);*/
|
||||
|
||||
const PxReal nom00 = ap00.dot(ab0);
|
||||
const PxReal nom10 = ap10.dot(ab0);
|
||||
const PxReal nom01 = ap01.dot(ab1);
|
||||
const PxReal nom11 = ap11.dot(ab1);
|
||||
|
||||
/*const Vec4V combinedDot = V3Dot4(ap00, ab0, ap10, ab0, ap01, ab1, ap11, ab1);
|
||||
const FloatV nom00 = V4GetX(combinedDot);
|
||||
const FloatV nom10 = V4GetY(combinedDot);
|
||||
const FloatV nom01 = V4GetZ(combinedDot);
|
||||
const FloatV nom11 = V4GetW(combinedDot);*/
|
||||
|
||||
const PxReal denom0 = ab0.dot(ab0);
|
||||
const PxReal denom1 = ab1.dot(ab1);
|
||||
|
||||
/*const FloatV denom0 = V3Dot(ab0, ab0);
|
||||
const FloatV denom1 = V3Dot(ab1, ab1);*/
|
||||
|
||||
const PxReal eps = 1e-6f;
|
||||
t0 = (PxAbs(denom0) < eps) ? 0.f : (nom00 / denom0);
|
||||
t1 = (PxAbs(denom0) < eps) ? 0.f : (nom10 / denom0);
|
||||
t2 = (PxAbs(denom1) < eps) ? 0.f : (nom01 / denom1);
|
||||
t3 = (PxAbs(denom1) < eps) ? 0.f : (nom11 / denom1);
|
||||
|
||||
/*const Vec4V nom = V4Merge(nom00, nom10, nom01, nom11);
|
||||
const Vec4V denom = V4Merge(denom0, denom0, denom1, denom1);
|
||||
|
||||
const Vec4V tValue = V4Div(nom, denom);
|
||||
return V4Sel(V4IsEq(denom, zero), zero, tValue);*/
|
||||
}
|
||||
|
||||
|
||||
|
||||
__device__ __forceinline__ static
|
||||
PxU32 capsuleCapsule(const PxTransform& transform0, const PxTransform& transform1,
|
||||
const PxReal radius0, const PxReal halfHeight0, const PxReal radius1, const PxReal halfHeight1,
|
||||
const PxReal contactDist,
|
||||
float4 *outPointPen, PxVec3& outNormal, PxReal* outT0 = NULL, PxReal* outT1 = NULL)
|
||||
{
|
||||
|
||||
PxU32 numContacts = 0;
|
||||
|
||||
const PxVec3 positionOffset = (transform0.p + transform1.p) * 0.5f;
|
||||
const PxVec3 p0 = transform0.p - positionOffset;
|
||||
const PxVec3 p1 = transform1.p - positionOffset;
|
||||
|
||||
const PxVec3 basisVector0 = transform0.q.getBasisVector0();
|
||||
const PxVec3 tmp0 = basisVector0 * halfHeight0;
|
||||
const PxVec3 s0 = p0 + tmp0;
|
||||
const PxVec3 e0 = p0 - tmp0;
|
||||
const PxVec3 d0 = e0 - s0;
|
||||
|
||||
const PxVec3 basisVector1 = transform1.q.getBasisVector0();
|
||||
const PxVec3 tmp1 = basisVector1 * halfHeight1;
|
||||
const PxVec3 s1 = p1 + tmp1;
|
||||
const PxVec3 e1 = p1 - tmp1;
|
||||
const PxVec3 d1 = e1 - s1;
|
||||
|
||||
const PxReal sumRadius = radius0 + radius1; //FAdd(r0, r1);
|
||||
const PxReal inflatedSum = sumRadius + contactDist;
|
||||
const PxReal inflatedSumSquared = inflatedSum * inflatedSum;
|
||||
const PxReal a = d0.dot(d0);//squared length of segment1
|
||||
const PxReal e = d1.dot(d1);//squared length of segment2
|
||||
const PxReal eps = 1e-6f;//FEps();
|
||||
|
||||
PxReal t0, t1;
|
||||
distanceSegmentSegmentSquared(s0, d0, s1, d1, t0, t1);
|
||||
if (outT0 != NULL)
|
||||
{
|
||||
*outT0 = (1.0f - t0);
|
||||
}
|
||||
if (outT1 != NULL)
|
||||
{
|
||||
*outT1 = (1.0f - t1);
|
||||
}
|
||||
|
||||
const PxVec3 closestA = s0 + d0 * t0;
|
||||
const PxVec3 closestB = s1 + d1 * t1;
|
||||
const PxVec3 vv = closestA - closestB;
|
||||
const PxReal sqDist0 = vv.dot(vv);
|
||||
|
||||
if (eps > sqDist0)
|
||||
{
|
||||
const PxVec3 _normal = transform0.q.rotateInv(PxVec3(0.f, 1.f, 0.f));
|
||||
const PxVec3 normal = _normal.getNormalized();
|
||||
|
||||
const PxVec3 _point = closestA - normal * radius0;
|
||||
const PxVec3 p = _point + positionOffset;
|
||||
const PxReal pen = -radius0;
|
||||
outPointPen[0] = make_float4(p.x, p.y, p.z, pen);
|
||||
//printf("This is two segment inline with each other");
|
||||
|
||||
outNormal = normal;
|
||||
numContacts = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (inflatedSumSquared >= sqDist0)
|
||||
{
|
||||
//check to see whether these two capsule are paralle
|
||||
const PxReal parallelTolerance = 0.9998f;
|
||||
|
||||
const PxVec3 dir0 = (eps > a) ? PxVec3(0.f) : (d0 * (1.f / PxSqrt(a)));
|
||||
const PxVec3 dir1 = (eps > e) ? PxVec3(0.f) : (d1 * (1.f / PxSqrt(e)));
|
||||
|
||||
const PxReal cos = PxAbs(dir0.dot(dir1));
|
||||
if (cos > parallelTolerance)//paralle
|
||||
{
|
||||
//tV0 is s1 project to s0e0
|
||||
//tV1 is e1 project to s0e0
|
||||
//tV2 is s0 project to s1e1
|
||||
//tV3 is e0 project to s1e1
|
||||
PxReal tV0, tV1, tV2, tV3;
|
||||
pcmDistancePointSegmentTValue22(s0, e0, s1, e1,
|
||||
s1, e1, s0, e0, tV0, tV1, tV2, tV3);
|
||||
|
||||
//printf("t(%f %f %f %f)\n", tV0, tV1, tV2, tV3);
|
||||
|
||||
if (tV0 >= 0.f && tV0 <= 1.f)
|
||||
{
|
||||
const PxVec3 projS1 = s0 + d0 * tV0;
|
||||
const PxVec3 v = projS1 - s1;
|
||||
const PxReal sqDist = v.dot(v);
|
||||
|
||||
const PxReal dist = PxSqrt(sqDist);
|
||||
const PxReal pen = dist - sumRadius;
|
||||
const PxVec3 normal = v / dist;// V3ScaleInv(v, dist);
|
||||
//assert(isFiniteVec3V(normal));
|
||||
const PxVec3 _p = projS1 - normal * radius0;// V3NegScaleSub(normal, r0, projS1);
|
||||
const PxVec3 p = _p + positionOffset;// V3Add(_p, positionOffset);
|
||||
outPointPen[numContacts] = make_float4(p.x, p.y, p.z, pen);
|
||||
outNormal = normal;
|
||||
|
||||
//printf("_p(%f, %f, %f)\n", _p.x, _p.y, _p.z);
|
||||
//printf("p(%f, %f, %f pen %f)\n", p.x, p.y, p.z, pen);
|
||||
//printf("normal(%f, %f, %f)\n", normal.x, normal.y, normal.z);
|
||||
//storeContact(p, normal, pen, contactBuffer);
|
||||
numContacts++;
|
||||
|
||||
}
|
||||
if (tV1 >= 0.f && tV1 <= 1.f)
|
||||
{
|
||||
const PxVec3 projE1 = s0 + d0 * tV1;
|
||||
const PxVec3 v = projE1 - e1;
|
||||
const PxReal sqDist = v.dot(v);
|
||||
|
||||
const PxReal dist = PxSqrt(sqDist);
|
||||
const PxReal pen = dist - sumRadius;
|
||||
const PxVec3 normal = v / dist; // V3ScaleInv(v, dist);
|
||||
//assert(isFiniteVec3V(normal));
|
||||
const PxVec3 _p = projE1 - normal * radius0;// V3NegScaleSub(normal, r0, projE1);
|
||||
const PxVec3 p = _p + positionOffset;
|
||||
//storeContact(p, normal, pen, contactBuffer);
|
||||
|
||||
outPointPen[numContacts] = make_float4(p.x, p.y, p.z, pen);
|
||||
outNormal = normal;
|
||||
|
||||
//printf("p(%f, %f, %f pen %f)\n", p.x, p.y, p.z, pen);
|
||||
//printf("normal(%f, %f, %f)\n", normal.x, normal.y, normal.z);
|
||||
numContacts++;
|
||||
|
||||
}
|
||||
|
||||
if (numContacts < 2 && (tV2 >= 0.f && tV2 <= 1.f))
|
||||
{
|
||||
const PxVec3 projS0 = s1 + d1 * tV2;
|
||||
const PxVec3 v = s0 - projS0;
|
||||
const PxReal sqDist = v.dot(v);
|
||||
|
||||
const PxReal dist = PxSqrt(sqDist);
|
||||
const PxReal pen = dist - sumRadius;
|
||||
const PxVec3 normal = v / dist;// V3ScaleInv(v, dist);
|
||||
//assert(isFiniteVec3V(normal));
|
||||
const PxVec3 _p = s0 - normal * radius0;// V3NegScaleSub(normal, r0, s0);
|
||||
const PxVec3 p = _p + positionOffset;
|
||||
outPointPen[numContacts] = make_float4(p.x, p.y, p.z, pen);
|
||||
outNormal = normal;
|
||||
|
||||
//printf("p(%f, %f, %f pen %f)\n", p.x, p.y, p.z, pen);
|
||||
//printf("normal(%f, %f, %f)\n", normal.x, normal.y, normal.z);
|
||||
numContacts++;
|
||||
|
||||
}
|
||||
|
||||
if (numContacts < 2 && (tV3 >= 0.f && tV3 <= 1.f))
|
||||
{
|
||||
const PxVec3 projE0 = s1 + d1 * tV3;// V3ScaleAdd(d1, V4GetW(t), s1);
|
||||
const PxVec3 v = e0 - projE0;// V3Sub(e0, projE0);
|
||||
const PxReal sqDist = v.dot(v);// V3Dot(v, v);
|
||||
|
||||
const PxReal dist = PxSqrt(sqDist);
|
||||
const PxReal pen = dist - sumRadius;
|
||||
const PxVec3 normal = v / dist;
|
||||
//assert(isFiniteVec3V(normal));
|
||||
const PxVec3 _p = e0 - normal * radius0;// V3NegScaleSub(normal, r0, e0);
|
||||
const PxVec3 p = _p + positionOffset;// V3Add(_p, positionOffset);
|
||||
|
||||
outPointPen[numContacts] = make_float4(p.x, p.y, p.z, pen);
|
||||
outNormal = normal;
|
||||
|
||||
//printf("p(%f, %f, %f pen %f)\n", p.x, p.y, p.z, pen);
|
||||
//printf("normal(%f, %f, %f)\n", normal.x, normal.y, normal.z);
|
||||
numContacts++;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if(numContacts == 0)
|
||||
{
|
||||
const PxVec3 normal = vv.getNormalized();
|
||||
|
||||
const PxVec3 _point = closestA - normal * radius0;
|
||||
const PxVec3 p = _point + positionOffset;
|
||||
|
||||
const PxReal dist = PxSqrt(sqDist0);
|
||||
const PxReal pen = dist - sumRadius;
|
||||
|
||||
outPointPen[0] = make_float4(p.x, p.y, p.z, pen);
|
||||
outNormal = normal;
|
||||
|
||||
//printf("one point p(%f, %f, %f pen %f)\n", p.x, p.y, p.z, pen);
|
||||
//printf("one point normal(%f, %f, %f)\n", normal.x, normal.y, normal.z);
|
||||
|
||||
numContacts = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return numContacts;
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
163
engine/third_party/physx/source/gpunarrowphase/src/CUDA/sphereTriangle.cuh
vendored
Normal file
163
engine/third_party/physx/source/gpunarrowphase/src/CUDA/sphereTriangle.cuh
vendored
Normal file
@@ -0,0 +1,163 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __SPHERE_TRIANGLE_CUH__
|
||||
#define __SPHERE_TRIANGLE_CUH__
|
||||
|
||||
//#include "shuffle.cuh"
|
||||
#include "nputils.cuh"
|
||||
#include "dataReadWriteHelper.cuh"
|
||||
#include "convexNpCommon.h"
|
||||
#include "cudaNpCommon.h"
|
||||
|
||||
#include "triangle.cuh"
|
||||
#include "stdio.h"
|
||||
|
||||
__device__ static PxReal distancePointTriangleSquared(const PxVec3 p,
|
||||
const PxVec3 a,
|
||||
const PxVec3 b,
|
||||
const PxVec3 c,
|
||||
const uint4 adjIdxs,
|
||||
PxVec3& closestP,
|
||||
bool& generateContact,
|
||||
bool& faceContact,
|
||||
PxU32* mask=NULL)
|
||||
{
|
||||
faceContact = false;
|
||||
|
||||
// Check if P in vertex region outside A
|
||||
const PxVec3 ab = b - a;
|
||||
const PxVec3 ac = c - a;
|
||||
const PxVec3 ap = p - a;
|
||||
const PxReal d1 = ab.dot(ap);
|
||||
const PxReal d2 = ac.dot(ap);
|
||||
if (d1 < 0.0f && d2 < 0.0f)
|
||||
{
|
||||
if(mask)
|
||||
*mask = PxU32(ConvexTriIntermediateData::eV0);
|
||||
generateContact = (!isEdgeNonconvex(adjIdxs.x)) || (!isEdgeNonconvex(adjIdxs.z));
|
||||
closestP = a;
|
||||
return ap.magnitudeSquared(); // Barycentric coords 1,0,0
|
||||
}
|
||||
// Check if P in vertex region outside B
|
||||
const PxVec3 bp = p - b;
|
||||
const PxReal d3 = ab.dot(bp);
|
||||
const PxReal d4 = ac.dot(bp);
|
||||
if (d3 > 0.0f && d4 < d3)
|
||||
{
|
||||
if(mask)
|
||||
*mask = PxU32(ConvexTriIntermediateData::eV1);
|
||||
generateContact = (!isEdgeNonconvex(adjIdxs.x)) || (!isEdgeNonconvex(adjIdxs.y));
|
||||
closestP = b;
|
||||
return bp.magnitudeSquared(); // Barycentric coords 0,1,0
|
||||
}
|
||||
// Check if P in vertex region outside C
|
||||
const PxVec3 cp = p - c;
|
||||
const PxReal d5 = ab.dot(cp);
|
||||
const PxReal d6 = ac.dot(cp);
|
||||
if (d6 > 0.0f && d5 < d6)
|
||||
{
|
||||
if (mask)
|
||||
*mask = PxU32(ConvexTriIntermediateData::eV2);
|
||||
generateContact = (!isEdgeNonconvex(adjIdxs.y)) || (!isEdgeNonconvex(adjIdxs.z));
|
||||
closestP = c;
|
||||
return cp.magnitudeSquared(); // Barycentric coords 0,0,1
|
||||
}
|
||||
|
||||
PxReal vc = d1 * d4 - d3 * d2;
|
||||
PxReal vb = d5 * d2 - d1 * d6;
|
||||
PxReal va = d3 * d6 - d5 * d4;
|
||||
|
||||
const float edgeTol = (va + vb + vc) * 1e-5;
|
||||
|
||||
// Check if P in edge region of AB, if so return projection of P onto AB
|
||||
if (vc < 0.0f && d1 >= 0.0f && d3 <= 0.0f)
|
||||
{
|
||||
if(mask)
|
||||
*mask = PxU32(ConvexTriIntermediateData::eE01);
|
||||
generateContact = !isEdgeNonconvex(adjIdxs.x) || vc > -edgeTol;
|
||||
const PxReal v = d1 / (d1 - d3);
|
||||
closestP = a + v * ab; // barycentric coords (1-v, v, 0)
|
||||
return (closestP - p).magnitudeSquared();
|
||||
}
|
||||
|
||||
// Check if P in edge region of AC, if so return projection of P onto AC
|
||||
if (vb < 0.0f && d2 >= 0.0f && d6 <= 0.0f)
|
||||
{
|
||||
if(mask)
|
||||
*mask = PxU32(ConvexTriIntermediateData::eE02);
|
||||
generateContact = !isEdgeNonconvex(adjIdxs.z) || vb > -edgeTol;
|
||||
const PxReal w = d2 / (d2 - d6);
|
||||
closestP = a + w * ac; // barycentric coords (1-w, 0, w)
|
||||
return (closestP - p).magnitudeSquared();
|
||||
}
|
||||
|
||||
// Check if P in edge region of BC, if so return projection of P onto BC
|
||||
if (va < 0.0f && d4 >= d3 && d5 >= d6)
|
||||
{
|
||||
if(mask)
|
||||
*mask = PxU32(ConvexTriIntermediateData::eE12);
|
||||
generateContact = !isEdgeNonconvex(adjIdxs.y) || va > -edgeTol;
|
||||
const PxReal w = (d4 - d3) / ((d4 - d3) + (d5 - d6));
|
||||
closestP = b + w * (c - b); // barycentric coords (0, 1-w, w)
|
||||
return (closestP - p).magnitudeSquared();
|
||||
}
|
||||
|
||||
generateContact = true;
|
||||
faceContact = true;
|
||||
|
||||
if(mask)
|
||||
*mask = 0;
|
||||
|
||||
// P inside face region. Compute Q through its barycentric coords (u,v,w)
|
||||
closestP = a + (ab * vb + ac * vc)/ (va + vb + vc);
|
||||
return (closestP - p).magnitudeSquared();
|
||||
}
|
||||
|
||||
__device__ inline PxReal scalarTriple(const PxVec3& a, const PxVec3& b, const PxVec3& c)
|
||||
{
|
||||
return a.cross(b).dot(c);
|
||||
}
|
||||
|
||||
// intersects a line (through points p and q, against a triangle a, b, c - mostly taken from Real Time Collision Detection - p186
|
||||
__device__ inline bool intersectLineTri(const PxVec3& p, const PxVec3& q, const PxVec3& a, const PxVec3& b, const PxVec3& c)
|
||||
{
|
||||
const PxVec3 pq = q - p;
|
||||
const PxVec3 pa = a - p;
|
||||
const PxVec3 pb = b - p;
|
||||
const PxVec3 pc = c - p;
|
||||
|
||||
PxVec3 m = pq.cross(pc);
|
||||
PxReal u = pb.dot(m);
|
||||
PxReal v = -pa.dot(m);
|
||||
PxReal w = scalarTriple(pq, pb, pa);
|
||||
|
||||
return u >= 0.f && v >= 0.f && w >= 0.f;
|
||||
}
|
||||
|
||||
#endif
|
||||
43
engine/third_party/physx/source/gpunarrowphase/src/CUDA/triangle.cuh
vendored
Normal file
43
engine/third_party/physx/source/gpunarrowphase/src/CUDA/triangle.cuh
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __TRIANGLE_COMMON_CUH__
|
||||
#define __TRIANGLE_COMMON_CUH__
|
||||
|
||||
#define TRI_FEATURE_IDX 0x7FffFFff
|
||||
#define EDGE_FEATURE_IDX 0x80000000
|
||||
|
||||
static const physx::PxU32 BOUNDARY = 0xffffffff;
|
||||
static const physx::PxU32 NONCONVEX_FLAG = 0x80000000;
|
||||
|
||||
__device__ static bool isEdgeNonconvex(physx::PxU32 adjEdgeIndex)
|
||||
{
|
||||
return (adjEdgeIndex != BOUNDARY) && (adjEdgeIndex & NONCONVEX_FLAG);
|
||||
}
|
||||
|
||||
#endif
|
||||
198
engine/third_party/physx/source/gpunarrowphase/src/CUDA/triangleMesh.cuh
vendored
Normal file
198
engine/third_party/physx/source/gpunarrowphase/src/CUDA/triangleMesh.cuh
vendored
Normal file
@@ -0,0 +1,198 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __TRIANGLE_MESH_CUH__
|
||||
#define __TRIANGLE_MESH_CUH__
|
||||
|
||||
#include "GuBV32.h"
|
||||
#include "PxgConvexConvexShape.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
|
||||
struct PxgTriangleMesh
|
||||
{
|
||||
PxU32 numVerts;
|
||||
PxU32 numTris;
|
||||
PxU32 numBv32TreeNodes;
|
||||
PxU32 nbPackedNodes;
|
||||
|
||||
const Gu::BV32DataPacked* bv32PackedNodes;
|
||||
|
||||
const float4* trimeshVerts;
|
||||
const uint4* indices;
|
||||
|
||||
const PxU32* PX_RESTRICT trimeshFaceRemap;
|
||||
|
||||
uint4 sdfDims;
|
||||
PxVec3 meshLower;
|
||||
PxReal spacing;
|
||||
PxU32 subgridSize;
|
||||
PxReal subgridsMinSdfValue;
|
||||
PxReal subgridsMaxSdfValue;
|
||||
PxU32 pad;
|
||||
|
||||
//const PxReal* sdf;
|
||||
CUtexObject mTexObject;
|
||||
CUtexObject mTexObjectSparse;
|
||||
|
||||
const PxU32* subgridStarts;
|
||||
};
|
||||
|
||||
static PX_FORCE_INLINE __device__ uint4 readTriangleMesh(const PxU8* PX_RESTRICT & trimeshGeomPtr,
|
||||
const Gu::BV32DataPacked* PX_RESTRICT & bv32PackedNodes, const float4* PX_RESTRICT & trimeshVerts)
|
||||
{
|
||||
const uint4 trimesh_nbVerts_nbTris_nbAdjVertsTotal = *reinterpret_cast<const uint4 *>(trimeshGeomPtr);
|
||||
trimeshGeomPtr += sizeof(uint4);
|
||||
|
||||
bv32PackedNodes = reinterpret_cast<const Gu::BV32DataPacked*>(trimeshGeomPtr);
|
||||
|
||||
trimeshGeomPtr += sizeof(const Gu::BV32DataPacked)* trimesh_nbVerts_nbTris_nbAdjVertsTotal.w;
|
||||
|
||||
trimeshVerts = reinterpret_cast<const float4 *>(trimeshGeomPtr);
|
||||
|
||||
return trimesh_nbVerts_nbTris_nbAdjVertsTotal;
|
||||
}
|
||||
|
||||
static PX_FORCE_INLINE __device__ uint4 readTriangleMesh(const PxU8 * PX_RESTRICT & trimeshGeomPtr,
|
||||
const Gu::BV32DataPacked* PX_RESTRICT & bv32PackedNodes, const float4* PX_RESTRICT & trimeshVerts, const uint4* PX_RESTRICT & trimeshTriIndices)
|
||||
{
|
||||
uint4 result = readTriangleMesh(trimeshGeomPtr, bv32PackedNodes, trimeshVerts);
|
||||
|
||||
trimeshGeomPtr += sizeof(float4) * result.x;
|
||||
trimeshTriIndices = reinterpret_cast<const uint4 *>(trimeshGeomPtr);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static PX_FORCE_INLINE __device__ uint4 readTriangleMesh(const PxU8 * PX_RESTRICT & trimeshGeomPtr, const float4* PX_RESTRICT & trimeshVerts, const uint4* PX_RESTRICT & trimeshTriIndices,
|
||||
const uint4* PX_RESTRICT & trimeshTriAdjacencies)
|
||||
{
|
||||
const Gu::BV32DataPacked* bv32PackedNodes;
|
||||
|
||||
uint4 result = readTriangleMesh(trimeshGeomPtr, bv32PackedNodes, trimeshVerts, trimeshTriIndices);
|
||||
|
||||
trimeshGeomPtr += sizeof(uint4)* result.y;
|
||||
|
||||
trimeshTriAdjacencies = reinterpret_cast<const uint4 *>(trimeshGeomPtr);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static PX_FORCE_INLINE __device__ uint4 readTriangleMesh(const PxU8 * PX_RESTRICT & trimeshGeomPtr, const float4* PX_RESTRICT & trimeshVerts, const uint4* PX_RESTRICT & trimeshTriIndices,
|
||||
const uint4* PX_RESTRICT & trimeshTriAdjacencies, const PxU32* PX_RESTRICT & trimeshFaceRemap)
|
||||
{
|
||||
uint4 result = readTriangleMesh(trimeshGeomPtr, trimeshVerts, trimeshTriIndices, trimeshTriAdjacencies);
|
||||
|
||||
trimeshGeomPtr += sizeof(uint4)* result.y;
|
||||
|
||||
trimeshFaceRemap = reinterpret_cast<const PxU32 *>(trimeshGeomPtr);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static __device__ void readTriangleMesh(const PxgShape& trimeshShape, PxgTriangleMesh& triangleMesh)
|
||||
{
|
||||
const PxU8 * trimeshGeomPtr = reinterpret_cast<const PxU8 *>(trimeshShape.hullOrMeshPtr);
|
||||
|
||||
const uint4 nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes = *reinterpret_cast<const uint4 *>(trimeshGeomPtr);
|
||||
trimeshGeomPtr += sizeof(uint4);
|
||||
|
||||
triangleMesh.numVerts = nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes.x;
|
||||
triangleMesh.numTris = nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes.y;
|
||||
triangleMesh.numBv32TreeNodes = nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes.z;
|
||||
triangleMesh.nbPackedNodes = nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes.w;
|
||||
|
||||
triangleMesh.bv32PackedNodes = reinterpret_cast<const Gu::BV32DataPacked*>(trimeshGeomPtr);
|
||||
trimeshGeomPtr += sizeof(const Gu::BV32DataPacked)* nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes.w;
|
||||
|
||||
triangleMesh.trimeshVerts = reinterpret_cast<const float4 *>(trimeshGeomPtr);
|
||||
trimeshGeomPtr += sizeof(float4) * nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes.x;
|
||||
|
||||
triangleMesh.indices = reinterpret_cast<const uint4 *>(trimeshGeomPtr);
|
||||
trimeshGeomPtr += sizeof(uint4) * nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes.y;
|
||||
|
||||
//const uint4* PX_RESTRICT trimeshTriAdjacencies = reinterpret_cast<const uint4 *>(trimeshGeomPtr);
|
||||
trimeshGeomPtr += sizeof(uint4)* nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes.y;
|
||||
|
||||
triangleMesh.trimeshFaceRemap = reinterpret_cast<const PxU32 *>(trimeshGeomPtr);
|
||||
//trimeshGeomPtr += ((nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes.y + 3)& (~3)) * sizeof(PxU32);
|
||||
trimeshGeomPtr += nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes.y * sizeof(PxU32);
|
||||
|
||||
//triMesh->mAccumulatedTrianglesRef
|
||||
trimeshGeomPtr += (nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes.x + 1) * sizeof(PxU32);
|
||||
|
||||
//triMesh->mTrianglesReferences
|
||||
trimeshGeomPtr += (nbVerts_nbTris_nbAdjVertsTotal_nbBv32TreeNodes.y * 3) * sizeof(PxU32);
|
||||
|
||||
trimeshGeomPtr = (PxU8*)(((size_t)trimeshGeomPtr + 15) & ~15);
|
||||
|
||||
|
||||
uint4 sdfDims = *reinterpret_cast<const uint4 *>(trimeshGeomPtr);
|
||||
trimeshGeomPtr += sizeof(uint4);
|
||||
|
||||
PxU32 numSdfs = sdfDims.x*sdfDims.y*sdfDims.z;
|
||||
|
||||
triangleMesh.sdfDims = sdfDims;
|
||||
|
||||
if (numSdfs)
|
||||
{
|
||||
float4 meshLower_spacing = *reinterpret_cast<const float4 *>(trimeshGeomPtr);
|
||||
trimeshGeomPtr += sizeof(float4);
|
||||
uint4 sizeInfo = *reinterpret_cast<const uint4 *>(trimeshGeomPtr);
|
||||
trimeshGeomPtr += sizeof(uint4);
|
||||
|
||||
triangleMesh.meshLower = PxVec3(meshLower_spacing.x, meshLower_spacing.y, meshLower_spacing.z);
|
||||
triangleMesh.spacing = meshLower_spacing.w;
|
||||
|
||||
/*PxU32 subgridSize;
|
||||
PxU32 unused0;
|
||||
PxU32 unused1;
|
||||
PxU32 numCoarseGridElements;*/
|
||||
triangleMesh.subgridSize = sizeInfo.x;
|
||||
triangleMesh.subgridsMinSdfValue = reinterpret_cast<PxReal&>(sizeInfo.y);
|
||||
triangleMesh.subgridsMaxSdfValue = reinterpret_cast<PxReal&>(sizeInfo.z);
|
||||
//triangleMesh.numCoarseGridElements = sizeInfo.w;
|
||||
|
||||
//triangleMesh.sdf = reinterpret_cast<const PxReal*>(trimeshGeomPtr);
|
||||
triangleMesh.mTexObject = *reinterpret_cast<const CUtexObject*>(trimeshGeomPtr);
|
||||
trimeshGeomPtr += sizeof(const CUtexObject);
|
||||
triangleMesh.mTexObjectSparse = *reinterpret_cast<const CUtexObject*>(trimeshGeomPtr);
|
||||
trimeshGeomPtr += sizeof(const CUtexObject);
|
||||
|
||||
triangleMesh.subgridStarts = reinterpret_cast<const PxU32 *>(trimeshGeomPtr);
|
||||
|
||||
trimeshGeomPtr += sizeof(PxU32) * sizeInfo.w;
|
||||
}
|
||||
else
|
||||
triangleMesh.mTexObject = 0;
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
121
engine/third_party/physx/source/gpunarrowphase/src/CUDA/triangletriangle.cuh
vendored
Normal file
121
engine/third_party/physx/source/gpunarrowphase/src/CUDA/triangletriangle.cuh
vendored
Normal file
@@ -0,0 +1,121 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __TRIANGLE_TRIANGLE_CUH__
|
||||
#define __TRIANGLE_TRIANGLE_CUH__
|
||||
|
||||
#include "foundation/PxPlane.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
|
||||
struct Triangle
|
||||
{
|
||||
physx::PxVec3 verts[3];
|
||||
physx::PxPlane triPlane;
|
||||
physx::PxVec3 centroid; //mShapeCoMInTrimeshVertexSpace
|
||||
};
|
||||
|
||||
|
||||
__device__ PxReal minProject(const PxPlane& plane, const Triangle& triangle)
|
||||
{
|
||||
/*return PxMin(plane.distance(tet.verts[0]), PxMin(plane.distance(tet.verts[1]),
|
||||
PxMin(plane.distance(tet.verts[2]), plane.distance(tet.verts[3]))));*/
|
||||
|
||||
return PxMin(plane.n.dot(triangle.verts[0]), PxMin(plane.n.dot(triangle.verts[1]),
|
||||
plane.n.dot(triangle.verts[2]))) + plane.d;
|
||||
}
|
||||
|
||||
|
||||
__device__ inline PxReal testSeparatingAxis(const PxVec3& axis, const Triangle& triangle0, const Triangle& triangle1)
|
||||
{
|
||||
|
||||
PxReal min0, max0, min1, max1;
|
||||
min0 = max0 = triangle0.verts[0].dot(axis);
|
||||
min1 = max1 = triangle1.verts[0].dot(axis);
|
||||
for (PxU32 i = 1; i < 3; ++i)
|
||||
{
|
||||
PxReal proj0 = triangle0.verts[i].dot(axis);
|
||||
PxReal proj1 = triangle1.verts[i].dot(axis);
|
||||
|
||||
min0 = PxMin(proj0, min0);
|
||||
max0 = PxMax(proj0, max0);
|
||||
min1 = PxMin(proj1, min1);
|
||||
max1 = PxMax(proj1, max1);
|
||||
}
|
||||
|
||||
return PxMax(min1 - max0, min0 - max1);
|
||||
}
|
||||
|
||||
template <bool TDoCross = true>
|
||||
__device__ inline PxReal satIntersect(const Triangle& triangle0, const Triangle& triangle1, const PxReal tolerance)
|
||||
{
|
||||
PxReal sep = minProject(triangle0.triPlane, triangle1);
|
||||
if (sep > tolerance)
|
||||
return sep;
|
||||
|
||||
sep = PxMax(sep, minProject(triangle1.triPlane, triangle0));
|
||||
if (sep > tolerance)
|
||||
return sep;
|
||||
|
||||
if (TDoCross)
|
||||
{
|
||||
PxVec3 axes0[3];
|
||||
PxVec3 axes1[3];
|
||||
|
||||
axes0[0] = triangle0.verts[1] - triangle0.verts[0];
|
||||
axes0[1] = triangle0.verts[2] - triangle0.verts[0];
|
||||
axes0[2] = triangle0.verts[2] - triangle0.verts[1];
|
||||
|
||||
axes1[0] = triangle1.verts[1] - triangle1.verts[0];
|
||||
axes1[1] = triangle1.verts[2] - triangle1.verts[0];
|
||||
axes1[2] = triangle1.verts[2] - triangle1.verts[1];
|
||||
|
||||
for (PxU32 i = 0; i < 3; ++i)
|
||||
{
|
||||
const PxVec3 axis0 = axes0[i];
|
||||
for (PxU32 j = 0; j < 3; ++j)
|
||||
{
|
||||
const PxVec3 axis1 = axes1[j];
|
||||
PxVec3 sepAxis = axis0.cross(axis1);
|
||||
const PxReal magSq = sepAxis.magnitudeSquared();
|
||||
if (magSq > 1e-5f)
|
||||
{
|
||||
sepAxis = sepAxis * (1.f / PxSqrt(magSq));
|
||||
const PxReal tSep = testSeparatingAxis(sepAxis, triangle0, triangle1);
|
||||
sep = PxMax(sep, tSep);
|
||||
if (sep > tolerance)
|
||||
return sep;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sep;
|
||||
}
|
||||
|
||||
#endif
|
||||
683
engine/third_party/physx/source/gpunarrowphase/src/CUDA/trimeshCollision.cu
vendored
Normal file
683
engine/third_party/physx/source/gpunarrowphase/src/CUDA/trimeshCollision.cu
vendored
Normal file
@@ -0,0 +1,683 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "foundation/PxMath.h"
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxTransform.h"
|
||||
|
||||
#include "geometry/PxGeometry.h"
|
||||
|
||||
#include "PxgContactManager.h"
|
||||
#include "PxsContactManagerState.h"
|
||||
#include "PxgConvexConvexShape.h"
|
||||
#include "PxsMaterialCore.h"
|
||||
#include "PxgPersistentContactManifold.h"
|
||||
#include "PxsTransformCache.h"
|
||||
|
||||
#include "PxContact.h"
|
||||
|
||||
#include "GuBV32.h"
|
||||
|
||||
#include "cudaNpCommon.h"
|
||||
|
||||
#include "contactPatchUtils.cuh"
|
||||
#include "contactReduction.cuh"
|
||||
#include "utils.cuh"
|
||||
#include "manifold.cuh"
|
||||
#include "triangleMesh.cuh"
|
||||
|
||||
#define PLANE_TRI_MAX_CONTACTS 6
|
||||
|
||||
using namespace physx;
|
||||
|
||||
extern "C" __host__ void initNarrowphaseKernels22() {}
|
||||
|
||||
//each block deal with one test
|
||||
extern "C" __global__
|
||||
__launch_bounds__(1024, 1)
|
||||
void trimeshPlaneNarrowphase(
|
||||
const PxReal toleranceLength,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
PxsContactManagerOutput* PX_RESTRICT cmOutputs,
|
||||
PxgShape* PX_RESTRICT shapes,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxReal* contactDistance,
|
||||
const PxsMaterialData* PX_RESTRICT materials,
|
||||
PxgPersistentContactMultiManifold* PX_RESTRICT multiManifold,
|
||||
PxU8* PX_RESTRICT contactStream,
|
||||
PxU8* PX_RESTRICT patchStream,
|
||||
PxgPatchAndContactCounters* PX_RESTRICT patchAndContactCounters,
|
||||
PxU32* PX_RESTRICT touchChangeFlags,
|
||||
PxU32* PX_RESTRICT patchChangeFlags,
|
||||
PxU8* PX_RESTRICT startContactPatches,
|
||||
PxU8* PX_RESTRICT startContactPoints,
|
||||
PxU8* PX_RESTRICT startContactForces,
|
||||
PxU32 patchBytesLimit,
|
||||
PxU32 contactBytesLimit,
|
||||
PxU32 forceBytesLimit,
|
||||
const PxReal clusterTolerance)
|
||||
{
|
||||
__shared__ PxU32 sContacts[(sizeof(PxVec3)/sizeof(PxU32)) * (WARP_SIZE + 1) * PLANE_TRI_MAX_CONTACTS];
|
||||
PxVec3* contacts = reinterpret_cast<PxVec3*>(sContacts);
|
||||
__shared__ PxReal separations[(WARP_SIZE+1)*PLANE_TRI_MAX_CONTACTS];
|
||||
__shared__ PxU32 counters[WARP_SIZE];
|
||||
|
||||
__shared__ PxU32 retainedCount;
|
||||
|
||||
__shared__ bool doFullContactGen;
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
retainedCount = 0;
|
||||
}
|
||||
|
||||
const PxU32 workIndex = blockIdx.x;
|
||||
const PxU32 threadIndexInWarp = threadIdx.x & 31;
|
||||
|
||||
PxgShape trimeshShape, planeShape;
|
||||
PxU32 trimeshCacheRef, planeCacheRef;
|
||||
LoadShapePairWarp<PxGeometryType::eTRIANGLEMESH, PxGeometryType::ePLANE>(cmInputs, workIndex, shapes,
|
||||
trimeshShape, trimeshCacheRef, planeShape, planeCacheRef);
|
||||
|
||||
PxsCachedTransform trimeshTransformCache = transformCache[trimeshCacheRef];
|
||||
PxsCachedTransform planeTransformCache = transformCache[planeCacheRef];
|
||||
const PxReal cDistance = contactDistance[trimeshCacheRef] + contactDistance[planeCacheRef];
|
||||
|
||||
PxTransform planeTransform = planeTransformCache.transform;
|
||||
PxTransform trimeshTransform = trimeshTransformCache.transform;
|
||||
|
||||
// Geometries : Triangle Mesh
|
||||
const PxU8 * trimeshGeomPtr = reinterpret_cast<const PxU8 *>(trimeshShape.hullOrMeshPtr);
|
||||
|
||||
const Gu::BV32DataPacked* nodes;
|
||||
const float4 * trimeshVerts;
|
||||
uint4 counts = readTriangleMesh(trimeshGeomPtr, nodes, trimeshVerts);
|
||||
const PxU32 numVerts = counts.x;
|
||||
|
||||
const PxU32 numIterationsRequired = (numVerts + blockDim.x - 1) / blockDim.x;
|
||||
|
||||
|
||||
PxVec3 min(PX_MAX_F32), max(-PX_MAX_F32);
|
||||
|
||||
if (threadIndexInWarp < nodes->mNbNodes)
|
||||
{
|
||||
const PxVec4 min4 = nodes->mMin[threadIndexInWarp];
|
||||
const PxVec4 max4 = nodes->mMax[threadIndexInWarp];
|
||||
|
||||
min = PxVec3(min4.x, min4.y, min4.z);
|
||||
max = PxVec3(max4.x, max4.y, max4.z);
|
||||
}
|
||||
|
||||
minIndex(min.x, FULL_MASK, min.x); minIndex(min.y, FULL_MASK, min.y); minIndex(min.z, FULL_MASK, min.z);
|
||||
maxIndex(max.x, FULL_MASK, max.x); maxIndex(max.y, FULL_MASK, max.y); maxIndex(max.z, FULL_MASK, max.z);
|
||||
|
||||
const float4 extents4_f = make_float4(max.x - min.x, max.y - min.y, max.z - min.z, 0.f)*0.5f;
|
||||
|
||||
PxReal minMargin = calculatePCMConvexMargin(extents4_f, trimeshShape.scale.scale, toleranceLength) * 0.5f;
|
||||
const PxReal ratio = 0.1f;
|
||||
const PxReal breakingThresholdRatio = 0.5f;
|
||||
|
||||
bool lostContacts = false;
|
||||
|
||||
PxTransform trimeshToPlane = planeTransform.transformInv(trimeshTransform);
|
||||
|
||||
const PxU32 warpIndex = threadIdx.x / 32;
|
||||
|
||||
if (warpIndex == 0)
|
||||
{
|
||||
const bool invalidate = invalidateManifold(trimeshToPlane, multiManifold[workIndex], minMargin, ratio);
|
||||
|
||||
if (!invalidate)
|
||||
{
|
||||
const PxReal projectBreakingThreshold = minMargin * breakingThresholdRatio;
|
||||
|
||||
lostContacts = refreshManifolds(
|
||||
trimeshToPlane,
|
||||
projectBreakingThreshold,
|
||||
multiManifold + workIndex
|
||||
);
|
||||
}
|
||||
|
||||
bool fullContactGen = invalidate || lostContacts;
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
doFullContactGen = fullContactGen;
|
||||
}
|
||||
|
||||
|
||||
//const PxU32 numThreadsRequired = numIterationsRequired * blockDim.x;
|
||||
|
||||
|
||||
|
||||
PxVec3 worldNormal = planeTransform.q.getBasisVector0();
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (doFullContactGen)
|
||||
{
|
||||
for (PxU32 vertInd = 0; vertInd < numIterationsRequired; vertInd++)
|
||||
{
|
||||
|
||||
PxU32 i = vertInd * blockDim.x + threadIdx.x;
|
||||
bool hasContact = false;
|
||||
PxVec3 worldPoint;
|
||||
PxReal separation = PX_MAX_F32;
|
||||
|
||||
if (i < numVerts)
|
||||
{
|
||||
float4 point = trimeshVerts[i];
|
||||
|
||||
PxVec3 tp = PxLoad3(point);
|
||||
PxVec3 p = vertex2Shape(tp, trimeshShape.scale.scale, trimeshShape.scale.rotation);
|
||||
|
||||
//v in plane space
|
||||
const PxVec3 pInPlaneSpace = trimeshToPlane.transform(p);
|
||||
|
||||
//separation / penetration
|
||||
//separation = pInPlaneSpace.x;// FSub(V3GetX(sphereCenterInPlaneSpace), radius);
|
||||
|
||||
|
||||
if (cDistance >= pInPlaneSpace.x)
|
||||
{
|
||||
//get the plane normal
|
||||
separation = pInPlaneSpace.x;
|
||||
worldPoint = trimeshTransform.transform(p);
|
||||
hasContact = true;
|
||||
}
|
||||
}
|
||||
|
||||
int mask = __ballot_sync(FULL_MASK, hasContact);
|
||||
|
||||
mask = contactReduce<true, true, PLANE_TRI_MAX_CONTACTS, false>(worldPoint, separation, worldNormal, mask, clusterTolerance);
|
||||
|
||||
counters[warpIndex] = __popc(mask);
|
||||
|
||||
hasContact = mask & (1 << threadIndexInWarp);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
PxU32 counter = counters[threadIndexInWarp];
|
||||
|
||||
PxU32 contactCount = warpScan<AddOpPxU32, PxU32>(FULL_MASK, counter);
|
||||
|
||||
//exclusive runsum
|
||||
PxU32 contactWarpOffset = contactCount - counter;
|
||||
|
||||
|
||||
contactWarpOffset = __shfl_sync(FULL_MASK, contactWarpOffset, warpIndex);
|
||||
|
||||
|
||||
const PxU32 offset = warpScanExclusive(mask, threadIndexInWarp);
|
||||
|
||||
if (hasContact)
|
||||
{
|
||||
const PxU32 index = offset + contactWarpOffset + retainedCount;
|
||||
contacts[index] = worldPoint;
|
||||
separations[index] = separation;
|
||||
}
|
||||
|
||||
|
||||
const PxU32 totalContacts = __shfl_sync(FULL_MASK, contactCount, 31) + retainedCount;
|
||||
__syncthreads();
|
||||
|
||||
//Now do another loop with warp 0 doing reduction
|
||||
|
||||
if (warpIndex == 0)
|
||||
{
|
||||
hasContact = false;
|
||||
for (PxU32 ind = 0; ind < totalContacts;)
|
||||
{
|
||||
PxU32 readMask = ~(__ballot_sync(FULL_MASK, hasContact));
|
||||
|
||||
if (!hasContact)
|
||||
{
|
||||
PxU32 readIndex = warpScanExclusive(readMask, threadIndexInWarp) + ind;
|
||||
|
||||
if (readIndex < totalContacts)
|
||||
{
|
||||
worldPoint = contacts[readIndex];
|
||||
separation = separations[readIndex];
|
||||
hasContact = true;
|
||||
}
|
||||
}
|
||||
|
||||
mask = __ballot_sync(FULL_MASK, hasContact);
|
||||
ind += __popc(readMask);
|
||||
|
||||
mask = contactReduce<true, true, PLANE_TRI_MAX_CONTACTS, false>(worldPoint, separation, worldNormal, mask, clusterTolerance);
|
||||
|
||||
hasContact = mask & (1 << threadIndexInWarp);
|
||||
}
|
||||
|
||||
__syncwarp();
|
||||
retainedCount = __popc(__ballot_sync(FULL_MASK, hasContact));
|
||||
|
||||
if (hasContact)
|
||||
{
|
||||
PxU32 offset = warpScanExclusive(mask, threadIndexInWarp);
|
||||
contacts[offset] = worldPoint;
|
||||
separations[offset] = separation;
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
}
|
||||
|
||||
//Store to PCM...
|
||||
if (warpIndex == 0)
|
||||
{
|
||||
PxgPersistentContactMultiManifold& manifold = multiManifold[workIndex];
|
||||
if (retainedCount)
|
||||
{
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
manifold.mNbManifolds = 1;
|
||||
|
||||
manifold.mNbContacts[0] = retainedCount;
|
||||
|
||||
manifold.mRelativeTransform = trimeshToPlane;
|
||||
}
|
||||
|
||||
if (threadIdx.x < retainedCount)
|
||||
{
|
||||
|
||||
PxgContact& contact = manifold.mContacts[0][threadIdx.x];
|
||||
contact.pointA = trimeshTransform.transformInv(contacts[threadIdx.x]);
|
||||
PxVec3 planePoint = planeTransform.transformInv(contacts[threadIdx.x]);
|
||||
planePoint.x = 0.f;// separations[threadIdx.x];
|
||||
contact.pointB = planePoint;
|
||||
contact.normal = PxVec3(1.f, 0.f, 0.f);
|
||||
contact.penetration = separations[threadIdx.x];
|
||||
contact.triIndex = 0xFFFFFFFF;
|
||||
}
|
||||
}
|
||||
else
|
||||
manifold.mNbManifolds = 0;
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
//Load contacts out of PCM...
|
||||
if (warpIndex == 0)
|
||||
{
|
||||
PxgPersistentContactMultiManifold& manifold = multiManifold[workIndex];
|
||||
|
||||
if (manifold.mNbManifolds != 0)
|
||||
{
|
||||
if (threadIdx.x < manifold.mNbContacts[0])
|
||||
{
|
||||
|
||||
PxgContact& contact = manifold.mContacts[0][threadIdx.x];
|
||||
contacts[threadIdx.x] = trimeshTransform.transform(contact.pointA);
|
||||
separations[threadIdx.x] = contact.penetration;
|
||||
}
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
retainedCount = manifold.mNbContacts[0];
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
|
||||
|
||||
//Retained counts stores the set of contacts we kept, which are stored in the first N
|
||||
//elements of contacts and separations buffer
|
||||
|
||||
if (warpIndex == 0)
|
||||
{
|
||||
|
||||
PxU32 nbContacts = retainedCount;
|
||||
|
||||
PxU32 contactByteOffset = 0xFFFFFFFF;
|
||||
PxU32 forceAndIndiceByteOffset = 0xFFFFFFFF;
|
||||
|
||||
if (threadIndexInWarp == 0)
|
||||
{
|
||||
if (nbContacts)
|
||||
{
|
||||
contactByteOffset = atomicAdd(&(patchAndContactCounters->contactsBytes), sizeof(PxContact) * nbContacts);
|
||||
forceAndIndiceByteOffset = atomicAdd(&(patchAndContactCounters->forceAndIndiceBytes), sizeof(PxU32) * nbContacts);
|
||||
|
||||
if ((contactByteOffset + sizeof(PxContact)* nbContacts) > contactBytesLimit)
|
||||
{
|
||||
patchAndContactCounters->setOverflowError(PxgPatchAndContactCounters::CONTACT_BUFFER_OVERFLOW);
|
||||
contactByteOffset = 0xFFFFFFFF; //overflow
|
||||
}
|
||||
|
||||
if ((forceAndIndiceByteOffset + sizeof(PxU32)* nbContacts) > forceBytesLimit)
|
||||
{
|
||||
patchAndContactCounters->setOverflowError(PxgPatchAndContactCounters::FORCE_BUFFER_OVERFLOW);
|
||||
forceAndIndiceByteOffset = 0xFFFFFFFF; //overflow
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
contactByteOffset = __shfl_sync(FULL_MASK, contactByteOffset, 0);
|
||||
|
||||
|
||||
PxsContactManagerOutput* output = cmOutputs + workIndex;
|
||||
//write point and penetration to the contact stream
|
||||
if (contactByteOffset != 0xFFFFFFFF)
|
||||
{
|
||||
//*((float4 *)(contactStream + contactByteOffset)) = pointPen;
|
||||
|
||||
//output->contactForces = reinterpret_cast<PxReal*>(startContactForces + forceAndIndiceByteOffset);
|
||||
//output->contactPoints = startContactPoints + contactByteOffset;
|
||||
|
||||
float4* baseContactStream = (float4*)(contactStream + contactByteOffset);
|
||||
if (threadIndexInWarp < nbContacts)
|
||||
{
|
||||
float4& outPointPen = baseContactStream[threadIndexInWarp];
|
||||
outPointPen = make_float4(contacts[threadIndexInWarp].x, contacts[threadIndexInWarp].y,
|
||||
contacts[threadIndexInWarp].z, separations[threadIndexInWarp]);
|
||||
}
|
||||
|
||||
if (threadIndexInWarp == 0)
|
||||
{
|
||||
output->contactForces = reinterpret_cast<PxReal*>(startContactForces + forceAndIndiceByteOffset);
|
||||
output->contactPoints = startContactPoints + contactByteOffset;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (threadIndexInWarp == 0)
|
||||
{
|
||||
output->contactForces = NULL;
|
||||
output->contactPoints = NULL;
|
||||
}
|
||||
nbContacts = 0;
|
||||
}
|
||||
|
||||
if (threadIndexInWarp == 0)
|
||||
{
|
||||
PxU32 patchIndex = registerContactPatch(cmOutputs, patchAndContactCounters, touchChangeFlags, patchChangeFlags, startContactPatches, patchBytesLimit, workIndex, nbContacts);
|
||||
|
||||
if (nbContacts == 0)
|
||||
(cmOutputs + workIndex)->contactPatches = 0;
|
||||
|
||||
insertIntoPatchStream(materials, patchStream, planeShape, trimeshShape, patchIndex, worldNormal, nbContacts);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//each block deal with one test
|
||||
extern "C" __global__
|
||||
__launch_bounds__(1024, 1)
|
||||
void trimeshHeightfieldNarrowphase(
|
||||
const PxReal toleranceLength,
|
||||
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
||||
PxsContactManagerOutput* PX_RESTRICT cmOutputs,
|
||||
PxgShape* PX_RESTRICT shapes,
|
||||
const PxsCachedTransform* PX_RESTRICT transformCache,
|
||||
const PxReal* contactDistance,
|
||||
const PxsMaterialData* PX_RESTRICT materials,
|
||||
PxU8* PX_RESTRICT contactStream,
|
||||
PxU8* PX_RESTRICT patchStream,
|
||||
PxgPatchAndContactCounters* PX_RESTRICT patchAndContactCounters,
|
||||
PxU32* PX_RESTRICT touchChangeFlags,
|
||||
PxU32* PX_RESTRICT patchChangeFlags,
|
||||
PxU8* PX_RESTRICT startContactPatches,
|
||||
PxU8* PX_RESTRICT startContactPoints,
|
||||
PxU8* PX_RESTRICT startContactForces,
|
||||
PxU32 patchBytesLimit,
|
||||
PxU32 contactBytesLimit,
|
||||
PxU32 forceBytesLimit,
|
||||
const PxReal clusterBias)
|
||||
{
|
||||
__shared__ char sContacts[sizeof(PxVec3) * (WARP_SIZE + 1) * PLANE_TRI_MAX_CONTACTS];
|
||||
PxVec3* contacts = reinterpret_cast<PxVec3*>(sContacts);
|
||||
|
||||
__shared__ PxReal separations[(WARP_SIZE + 1)*PLANE_TRI_MAX_CONTACTS];
|
||||
__shared__ PxU32 counters[WARP_SIZE];
|
||||
|
||||
__shared__ PxU32 retainedCount;
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
retainedCount = 0;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
const PxU32 workIndex = blockIdx.x;
|
||||
|
||||
PxgShape trimeshShape, heightfieldShape;
|
||||
PxU32 trimeshCacheRef, heightfieldCacheRef;
|
||||
LoadShapePairWarp<PxGeometryType::eTRIANGLEMESH, PxGeometryType::eHEIGHTFIELD>(cmInputs, workIndex, shapes,
|
||||
trimeshShape, trimeshCacheRef, heightfieldShape, heightfieldCacheRef);
|
||||
|
||||
PxsCachedTransform trimeshTransformCache = transformCache[trimeshCacheRef];
|
||||
PxsCachedTransform heightfieldTransformCache = transformCache[heightfieldCacheRef];
|
||||
const PxReal cDistance = contactDistance[trimeshCacheRef] + contactDistance[heightfieldCacheRef];
|
||||
|
||||
PxTransform trimeshTransform = trimeshTransformCache.transform;
|
||||
PxTransform heightfieldTransform = heightfieldTransformCache.transform;
|
||||
|
||||
// Geometries : Triangle Mesh
|
||||
const PxU8 * trimeshGeomPtr = reinterpret_cast<const PxU8 *>(trimeshShape.hullOrMeshPtr);
|
||||
|
||||
const Gu::BV32DataPacked* nodes;
|
||||
const float4 * trimeshVerts;
|
||||
uint4 counts = readTriangleMesh(trimeshGeomPtr, nodes, trimeshVerts);
|
||||
const PxU32 numVerts = counts.x;
|
||||
|
||||
const PxU32 numIterationsRequired = (numVerts + blockDim.x - 1) / blockDim.x;
|
||||
|
||||
//const PxU32 numThreadsRequired = numIterationsRequired * blockDim.x;
|
||||
|
||||
const PxU32 threadIndexInWarp = threadIdx.x & 31;
|
||||
const PxU32 warpIndex = threadIdx.x / 32;
|
||||
|
||||
PxTransform trimeshToHeightfield = heightfieldTransform.transformInv(trimeshTransform);
|
||||
|
||||
PxVec3 worldNormal = heightfieldTransform.q.getBasisVector0();
|
||||
|
||||
for (PxU32 ind = 0; ind < numIterationsRequired; ind++)
|
||||
{
|
||||
|
||||
PxU32 i = ind * blockDim.x + threadIdx.x;
|
||||
bool hasContact = false;
|
||||
PxVec3 worldPoint;
|
||||
PxReal separation;
|
||||
|
||||
if (i < numVerts)
|
||||
{
|
||||
float4 point = trimeshVerts[i];
|
||||
|
||||
PxVec3 tp = PxLoad3(point);
|
||||
PxVec3 p = vertex2Shape(tp, trimeshShape.scale.scale, trimeshShape.scale.rotation);
|
||||
|
||||
//v in plane space
|
||||
const PxVec3 pInHeightfieldSpace = trimeshToHeightfield.transform(p);
|
||||
|
||||
//separation / penetration
|
||||
separation = pInHeightfieldSpace.x;
|
||||
|
||||
|
||||
if (cDistance >= pInHeightfieldSpace.x)
|
||||
{
|
||||
//get the plane normal
|
||||
worldPoint = trimeshTransform.transform(p);
|
||||
hasContact = true;
|
||||
}
|
||||
}
|
||||
|
||||
int mask = __ballot_sync(FULL_MASK, hasContact);
|
||||
|
||||
mask = contactReduce<true, true, PLANE_TRI_MAX_CONTACTS>(worldPoint, separation, worldNormal, mask, clusterBias);
|
||||
|
||||
counters[warpIndex] = __popc(mask);
|
||||
|
||||
|
||||
__syncthreads();
|
||||
|
||||
PxU32 contactCount = warpScan<AddOpPxU32, PxU32>(FULL_MASK, counters[threadIndexInWarp]);
|
||||
|
||||
//exclusive runsum
|
||||
PxU32 contactWarpOffset = contactCount - counters[threadIndexInWarp];
|
||||
|
||||
|
||||
contactWarpOffset = __shfl_sync(FULL_MASK, contactWarpOffset, warpIndex);
|
||||
|
||||
|
||||
const PxU32 offset = warpScanExclusive(mask, threadIndexInWarp);
|
||||
|
||||
if (hasContact)
|
||||
{
|
||||
const PxU32 index = offset + contactWarpOffset + retainedCount;
|
||||
contacts[index] = worldPoint;
|
||||
separations[index] = separation;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
const PxU32 totalContacts = __shfl_sync(FULL_MASK, contactCount, 31) + retainedCount;
|
||||
|
||||
|
||||
//Now do another loop with warp 0 doing reduction
|
||||
|
||||
if (warpIndex == 0)
|
||||
{
|
||||
hasContact = false;
|
||||
for (PxU32 ind = 0; ind < totalContacts;)
|
||||
{
|
||||
PxU32 readMask = ~(__ballot_sync(FULL_MASK, hasContact));
|
||||
|
||||
if (!hasContact)
|
||||
{
|
||||
PxU32 readIndex = warpScanExclusive(readMask, threadIndexInWarp);
|
||||
|
||||
if ((ind + readIndex) < totalContacts)
|
||||
{
|
||||
worldPoint = contacts[readIndex];
|
||||
separation = separations[readIndex];
|
||||
hasContact = true;
|
||||
}
|
||||
}
|
||||
|
||||
mask = __ballot_sync(FULL_MASK, hasContact);
|
||||
ind += __popc(readMask);
|
||||
mask = contactReduce<true, true, PLANE_TRI_MAX_CONTACTS>(worldPoint, separation, worldNormal, mask, clusterBias);
|
||||
|
||||
hasContact = mask & (1 << threadIndexInWarp);
|
||||
}
|
||||
|
||||
retainedCount = __popc(__ballot_sync(FULL_MASK, hasContact));
|
||||
|
||||
if (hasContact)
|
||||
{
|
||||
PxU32 offset = warpScanExclusive(mask, threadIndexInWarp);
|
||||
contacts[offset] = worldPoint;
|
||||
separations[offset] = separation;
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
//Retained counts stores the set of contacts we kept, which are stored in the first N
|
||||
//elements of contacts and separations buffer
|
||||
|
||||
if (warpIndex == 0)
|
||||
{
|
||||
|
||||
const PxU32 nbContacts = retainedCount;
|
||||
|
||||
PxU32 contactByteOffset = 0xFFFFFFFF;
|
||||
PxU32 forceAndIndiceByteOffset = 0xFFFFFFFF;
|
||||
|
||||
if (threadIndexInWarp == 0)
|
||||
{
|
||||
if (nbContacts)
|
||||
{
|
||||
contactByteOffset = atomicAdd(&(patchAndContactCounters->contactsBytes), sizeof(PxContact) * nbContacts);
|
||||
forceAndIndiceByteOffset = atomicAdd(&(patchAndContactCounters->forceAndIndiceBytes), sizeof(PxU32) * nbContacts);
|
||||
|
||||
if ((contactByteOffset + sizeof(PxContact)) > contactBytesLimit)
|
||||
{
|
||||
patchAndContactCounters->setOverflowError(PxgPatchAndContactCounters::CONTACT_BUFFER_OVERFLOW);
|
||||
contactByteOffset = 0xFFFFFFFF; //overflow
|
||||
}
|
||||
else if ((forceAndIndiceByteOffset + sizeof(PxU32)) > forceBytesLimit)
|
||||
{
|
||||
patchAndContactCounters->setOverflowError(PxgPatchAndContactCounters::FORCE_BUFFER_OVERFLOW);
|
||||
forceAndIndiceByteOffset = 0xFFFFFFFF; //overflow
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
contactByteOffset = __shfl_sync(FULL_MASK, contactByteOffset, 0);
|
||||
|
||||
|
||||
PxsContactManagerOutput* output = cmOutputs + workIndex;
|
||||
//write point and penetration to the contact stream
|
||||
if (contactByteOffset != 0xFFFFFFFF)
|
||||
{
|
||||
//*((float4 *)(contactStream + contactByteOffset)) = pointPen;
|
||||
|
||||
//output->contactForces = reinterpret_cast<PxReal*>(startContactForces + forceAndIndiceByteOffset);
|
||||
//output->contactPoints = startContactPoints + contactByteOffset;
|
||||
|
||||
float4* baseContactStream = (float4*)(contactStream + contactByteOffset);
|
||||
if (threadIndexInWarp < nbContacts)
|
||||
{
|
||||
float4& outPointPen = baseContactStream[threadIndexInWarp];
|
||||
outPointPen = make_float4(contacts[threadIndexInWarp].x, contacts[threadIndexInWarp].y,
|
||||
contacts[threadIndexInWarp].z, separations[threadIndexInWarp]);
|
||||
}
|
||||
|
||||
if (threadIndexInWarp == 0)
|
||||
{
|
||||
output->contactForces = reinterpret_cast<PxReal*>(startContactForces + forceAndIndiceByteOffset);
|
||||
output->contactPoints = startContactPoints + contactByteOffset;
|
||||
}
|
||||
}
|
||||
else if (threadIndexInWarp == 0)
|
||||
{
|
||||
output->contactForces = NULL;
|
||||
output->contactPoints = NULL;
|
||||
}
|
||||
|
||||
if (threadIndexInWarp == 0)
|
||||
{
|
||||
PxU32 patchIndex = registerContactPatch(cmOutputs, patchAndContactCounters, touchChangeFlags, patchChangeFlags, startContactPatches, patchBytesLimit, workIndex, nbContacts);
|
||||
|
||||
insertIntoPatchStream(materials, patchStream, heightfieldShape, trimeshShape, patchIndex, worldNormal, nbContacts);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
248
engine/third_party/physx/source/gpunarrowphase/src/CUDA/warpHelpers.cuh
vendored
Normal file
248
engine/third_party/physx/source/gpunarrowphase/src/CUDA/warpHelpers.cuh
vendored
Normal file
@@ -0,0 +1,248 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
|
||||
#ifndef __SCANWARP_CUH__
|
||||
#define __SCANWARP_CUH__
|
||||
|
||||
/* Scan&Reduce operators */
|
||||
|
||||
template <typename T>
|
||||
struct AddOP
|
||||
{
|
||||
static __device__ T apply(T a, T b) { return a + b; }
|
||||
static __device__ T identity() { return T(); }
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct AndOP
|
||||
{
|
||||
static __device__ T apply(T a, T b) { return a & b; }
|
||||
static __device__ T identity() { return T(); }
|
||||
};
|
||||
|
||||
typedef AddOP<unsigned int> AddOPu;
|
||||
typedef AddOP<float> AddOPf;
|
||||
|
||||
template <typename T>
|
||||
struct MaxOPu
|
||||
{
|
||||
static __device__ T apply(T a, T b) { return a > b ? a : b; }
|
||||
static __device__ T identity() { return T(); }
|
||||
};
|
||||
|
||||
struct MinOPf
|
||||
{
|
||||
static __device__ float apply(float a, float b) { return fminf(a, b); }
|
||||
static __device__ float applyIdx(float a, float b, unsigned int &oindex, unsigned int aindex, unsigned int bindex)
|
||||
{
|
||||
if(b < a)
|
||||
{
|
||||
oindex = bindex;
|
||||
return b;
|
||||
}
|
||||
else
|
||||
{
|
||||
oindex = aindex;
|
||||
return a;
|
||||
}
|
||||
}
|
||||
static __device__ float identity() { return +FLT_MAX; }
|
||||
};
|
||||
|
||||
struct MaxOPf
|
||||
{
|
||||
static __device__ float apply(float a, float b) { return fmaxf(a, b); }
|
||||
static __device__ float applyIdx(float a, float b, unsigned int &oindex, unsigned int aindex, unsigned int bindex)
|
||||
{
|
||||
if(b > a)
|
||||
{
|
||||
oindex = bindex;
|
||||
return b;
|
||||
}
|
||||
else
|
||||
{
|
||||
oindex = aindex;
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
||||
static __device__ float identity() { return -FLT_MAX; }
|
||||
};
|
||||
|
||||
/* Reduce */
|
||||
|
||||
template <typename T, class OP>
|
||||
__device__ static inline void reduceWarp(volatile T* sdata)
|
||||
{
|
||||
unsigned int idx = threadIdx.x;
|
||||
if ((idx & (WARP_SIZE-1)) < 16)
|
||||
{
|
||||
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 16]);
|
||||
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 8]);
|
||||
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 4]);
|
||||
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 2]);
|
||||
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 1]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, class OP>
|
||||
__device__ static inline void reduceWarp(volatile T* sdata, unsigned int idx)
|
||||
{
|
||||
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 16]);
|
||||
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 8]);
|
||||
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 4]);
|
||||
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 2]);
|
||||
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 1]);
|
||||
}
|
||||
|
||||
template <typename T, class OP>
|
||||
__device__ static inline void reduceHalfWarp(volatile T* sdata)
|
||||
{
|
||||
unsigned int idx = threadIdx.x;
|
||||
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 8]);
|
||||
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 4]);
|
||||
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 2]);
|
||||
sdata[idx] = OP::apply(sdata[idx], sdata[idx + 1]);
|
||||
}
|
||||
|
||||
template <typename T, typename TIndex, class OP>
|
||||
__device__ static inline void reduceWarpKeepIndex(volatile T* sdata, volatile TIndex* sindices, unsigned int idx)
|
||||
{
|
||||
TIndex ia, ib, oindex;
|
||||
|
||||
if ((idx & (WARP_SIZE-1)) < 16)
|
||||
{
|
||||
ia = sindices[idx];
|
||||
ib = sindices[idx + 16];
|
||||
|
||||
sdata[idx] = OP::applyIdx(sdata[idx], sdata[idx + 16], oindex, ia, ib);
|
||||
sindices[idx] = oindex;
|
||||
}
|
||||
|
||||
if ((idx & (WARP_SIZE-1)) < 8)
|
||||
{
|
||||
ia = sindices[idx];
|
||||
ib = sindices[idx + 8];
|
||||
|
||||
sdata[idx] = OP::applyIdx(sdata[idx], sdata[idx + 8], oindex, ia, ib);
|
||||
sindices[idx] = oindex;
|
||||
}
|
||||
|
||||
if ((idx & (WARP_SIZE-1)) < 4)
|
||||
{
|
||||
ia = sindices[idx];
|
||||
ib = sindices[idx + 4];
|
||||
|
||||
sdata[idx] = OP::applyIdx(sdata[idx], sdata[idx + 4], oindex, ia, ib);
|
||||
sindices[idx] = oindex;
|
||||
}
|
||||
|
||||
if ((idx & (WARP_SIZE-1)) < 2)
|
||||
{
|
||||
ia = sindices[idx];
|
||||
ib = sindices[idx + 2];
|
||||
|
||||
sdata[idx] = OP::applyIdx(sdata[idx], sdata[idx + 2], oindex, ia, ib);
|
||||
sindices[idx] = oindex;
|
||||
}
|
||||
|
||||
if ((idx & (WARP_SIZE-1)) < 1)
|
||||
{
|
||||
ia = sindices[idx];
|
||||
ib = sindices[idx + 1];
|
||||
|
||||
sdata[idx] = OP::applyIdx(sdata[idx], sdata[idx + 1], oindex, ia, ib);
|
||||
sindices[idx] = oindex;
|
||||
}
|
||||
}
|
||||
|
||||
/* Scan */
|
||||
|
||||
template <typename T, typename OP>
|
||||
__device__ static inline void scanWarp(unsigned int scanIdx, volatile T* sdata)
|
||||
{
|
||||
sdata[scanIdx] = OP::apply(sdata[scanIdx], sdata[scanIdx - 1]);
|
||||
sdata[scanIdx] = OP::apply(sdata[scanIdx], sdata[scanIdx - 2]);
|
||||
sdata[scanIdx] = OP::apply(sdata[scanIdx], sdata[scanIdx - 4]);
|
||||
sdata[scanIdx] = OP::apply(sdata[scanIdx], sdata[scanIdx - 8]);
|
||||
sdata[scanIdx] = OP::apply(sdata[scanIdx], sdata[scanIdx - 16]);
|
||||
}
|
||||
|
||||
/// The number of warp scan steps
|
||||
#define STEPS LOG2_WARP_SIZE
|
||||
|
||||
// The 5-bit SHFL mask for logically splitting warps into sub-segments starts 8-bits up
|
||||
#define SHFL_MASK ((-1 << STEPS) & 31) << 8
|
||||
|
||||
template <>
|
||||
__device__ inline void scanWarp<PxU32, AddOP<PxU32> >(unsigned int scanIdx, volatile PxU32 * sdata)
|
||||
{
|
||||
PxU32 input = sdata[scanIdx];
|
||||
|
||||
// Iterate scan steps
|
||||
#pragma unroll
|
||||
for (int STEP = 0; STEP < STEPS; STEP++)
|
||||
{
|
||||
asm(
|
||||
"{"
|
||||
" .reg .u32 r0;"
|
||||
" .reg .pred p;"
|
||||
" shfl.up.b32 r0|p, %1, %2, %3;"
|
||||
" @p add.u32 r0, r0, %4;"
|
||||
" mov.u32 %0, r0;"
|
||||
"}"
|
||||
: "=r"(input) : "r"(input), "r"(1 << STEP), "r"(SHFL_MASK), "r"(input));
|
||||
}
|
||||
sdata[scanIdx] = input;
|
||||
}
|
||||
|
||||
template <>
|
||||
__device__ inline void scanWarp<int, AddOP<int> >(unsigned int scanIdx, volatile int* sdata)
|
||||
{
|
||||
PxU32 input = sdata[scanIdx];
|
||||
|
||||
// Iterate scan steps
|
||||
#pragma unroll
|
||||
for (int STEP = 0; STEP < STEPS; STEP++)
|
||||
{
|
||||
asm(
|
||||
"{"
|
||||
" .reg .u32 r0;"
|
||||
" .reg .pred p;"
|
||||
" shfl.up.b32 r0|p, %1, %2, %3;"
|
||||
" @p add.u32 r0, r0, %4;"
|
||||
" mov.u32 %0, r0;"
|
||||
"}"
|
||||
: "=r"(input) : "r"(input), "r"(1 << STEP), "r"(SHFL_MASK), "r"(input));
|
||||
}
|
||||
sdata[scanIdx] = input;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
772
engine/third_party/physx/source/gpunarrowphase/src/PxgGeometryManager.cpp
vendored
Normal file
772
engine/third_party/physx/source/gpunarrowphase/src/PxgGeometryManager.cpp
vendored
Normal file
@@ -0,0 +1,772 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "PxgGeometryManager.h"
|
||||
|
||||
#include "PxsHeapMemoryAllocator.h"
|
||||
#include "foundation/PxAssert.h"
|
||||
#include "foundation/PxBasicTemplates.h"
|
||||
#include "foundation/PxMemory.h"
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
|
||||
#include "PxgCopyManager.h"
|
||||
#include "PxgCudaUtils.h"
|
||||
#include "PxgHeapMemAllocator.h"
|
||||
|
||||
#include "GuBV32.h"
|
||||
#include "convex/GuConvexMesh.h"
|
||||
#include "mesh/GuTriangleMesh.h"
|
||||
#include "hf/GuHeightField.h"
|
||||
#include "GuSDF.h"
|
||||
#include "cudaNpCommon.h"
|
||||
|
||||
#include "cudamanager/PxCudaContextManager.h"
|
||||
#include "cudamanager/PxCudaContext.h"
|
||||
#include <vector_types.h>
|
||||
#include <vector_functions.h>
|
||||
|
||||
// AD: PxgGeometryManager manages the CPU/GPU data transfers and the lifetime of collision geometries: Convex Hulls, Trimeshes, SDFs, Heightfields.
|
||||
|
||||
using namespace physx;
|
||||
using namespace Gu;
|
||||
|
||||
// forward declarations of static functions
|
||||
static PxU64 computeBoxHullByteSize();
|
||||
static PxU64 computeHullByteSize(const ConvexHullData& hull, PxU32& numPolyVertices);
|
||||
static PxU64 computeTriMeshByteSize(const TriangleMesh& triMesh);
|
||||
static PxU64 computeHeightfieldByteSize(const HeightFieldData& hf);
|
||||
static void layOutBoxHull(void* mem);
|
||||
static void layOutHull(void* mem, const ConvexHullData& hull, PxU32 numPolyVertices);
|
||||
static PxgMeshTextureData layOutTriMesh(void* mem, const TriangleMesh& triMesh, CUstream stream);
|
||||
static void layOutHeightfield(void* mem, const HeightFieldData& hf);
|
||||
template<typename T> static void createTextureObject(CUarray_format format, CUtexObject*& texture, CUarray& cuArray, PxU32 width, PxU32 height, PxU32 depth, const T* data, CUstream stream);
|
||||
|
||||
// PxgGeometryManager definitions
|
||||
PxgGeometryManager::PxgGeometryManager(PxgHeapMemoryAllocatorManager* heapMemoryManager):
|
||||
mDeviceMemoryAllocator(heapMemoryManager->mDeviceMemoryAllocators),
|
||||
mPinnedMemoryAllocator(static_cast<PxgHeapMemoryAllocator*>(heapMemoryManager->mMappedMemoryAllocators)),
|
||||
mPinnedMemoryBasePtr(NULL),
|
||||
mPinnedHostMemoryRequirements(0),
|
||||
mFreeGeometryIndices(mGeometryData),
|
||||
mBoxHullIdx(0xFFffFFff)
|
||||
{
|
||||
}
|
||||
|
||||
PxgGeometryManager::~PxgGeometryManager()
|
||||
{
|
||||
if (mBoxHullIdx != 0xFFFFFFFF)
|
||||
mDeviceMemoryAllocator->deallocate(reinterpret_cast<void*>(getGeometryDevPtrByIndex(mBoxHullIdx)));
|
||||
|
||||
if (mPinnedMemoryBasePtr)
|
||||
{
|
||||
mPinnedMemoryAllocator->deallocate(mPinnedMemoryBasePtr);
|
||||
mPinnedMemoryBasePtr = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
PxU32 PxgGeometryManager::addGeometryInternal(PxU64 byteSize, const void* geomPtr, UploadGeometryType::Enum type, PxU32 numPolyVertices /*= 0*/)
|
||||
{
|
||||
byteSize = (byteSize + 255) & ~255;
|
||||
|
||||
mPinnedHostMemoryRequirements += byteSize;
|
||||
|
||||
PxU32 idx = mFreeGeometryIndices.getFreeIndex();
|
||||
void* devicePtr = mDeviceMemoryAllocator->allocate(byteSize, PxsHeapStats::eNARROWPHASE, PX_FL);
|
||||
|
||||
PxU32 copyIndex = mScheduledCopies.size();
|
||||
|
||||
ScheduledCopyData scheduledCopy;
|
||||
scheduledCopy.mHullOrTrimeshIdx = idx;
|
||||
scheduledCopy.mGeometryPtr = geomPtr;
|
||||
scheduledCopy.mType = type;
|
||||
scheduledCopy.mNumPolyVertices = numPolyVertices;
|
||||
|
||||
PxgCopyManager::CopyDesc desc;
|
||||
desc.dest = (size_t)devicePtr;
|
||||
desc.bytes = (size_t)byteSize;
|
||||
|
||||
scheduledCopy.mCopyDesc = desc;
|
||||
mScheduledCopies.pushBack(scheduledCopy);
|
||||
|
||||
HullOrMeshData newHullOrMesh;
|
||||
newHullOrMesh.mDeviceMemPointer = devicePtr;
|
||||
newHullOrMesh.mCopyDescIndex = copyIndex;
|
||||
|
||||
PX_ASSERT(idx < mGeometryData.size());
|
||||
mGeometryData[idx] = newHullOrMesh;
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
void PxgGeometryManager::addBoxHull()
|
||||
{
|
||||
if (mBoxHullIdx != 0xFFffFFff)
|
||||
return;
|
||||
|
||||
PxU64 byteSize = computeBoxHullByteSize();
|
||||
|
||||
mBoxHullIdx = addGeometryInternal(byteSize, NULL, UploadGeometryType::eBOXHULL);
|
||||
}
|
||||
|
||||
PxU32 PxgGeometryManager::addHull(const ConvexHullData& hull)
|
||||
{
|
||||
PxU32 numPolyVertices;
|
||||
PxU64 byteSize = computeHullByteSize(hull, numPolyVertices);
|
||||
|
||||
return addGeometryInternal(byteSize, &hull, UploadGeometryType::eCONVEXHULL, numPolyVertices);
|
||||
}
|
||||
|
||||
void PxgGeometryManager::removeGeometry(PxU32 idx)
|
||||
{
|
||||
PX_ASSERT(idx < mGeometryData.size());
|
||||
HullOrMeshData geometryToRemove = mGeometryData[idx];
|
||||
|
||||
PxU32 scheduledCopyIndex = geometryToRemove.mCopyDescIndex;
|
||||
|
||||
if (scheduledCopyIndex != HullOrMeshData::INVALID_COPY_DESC_INDEX)
|
||||
{
|
||||
PxU32 lastScheduledCopyHullOrTrimeshIndex = mScheduledCopies.back().mHullOrTrimeshIdx;
|
||||
|
||||
if (lastScheduledCopyHullOrTrimeshIndex != idx)
|
||||
{
|
||||
mScheduledCopies.replaceWithLast(scheduledCopyIndex);
|
||||
|
||||
PX_ASSERT(lastScheduledCopyHullOrTrimeshIndex < mGeometryData.size());
|
||||
mGeometryData[lastScheduledCopyHullOrTrimeshIndex].mCopyDescIndex = scheduledCopyIndex;
|
||||
}
|
||||
else
|
||||
{
|
||||
mScheduledCopies.popBack();
|
||||
}
|
||||
}
|
||||
|
||||
const PxPair<void*const, PxgMeshTextureData>* pair = mMeshToTextureMap.find(geometryToRemove.mDeviceMemPointer);
|
||||
|
||||
if (pair)
|
||||
{
|
||||
cuArrayDestroy(pair->second.cuArray);
|
||||
cuTexObjectDestroy(pair->second.cuTexRef);
|
||||
|
||||
if (pair->second.cuArraySubgrids)
|
||||
{
|
||||
cuArrayDestroy(pair->second.cuArraySubgrids);
|
||||
cuTexObjectDestroy(pair->second.cuTexRefSubgrids);
|
||||
}
|
||||
|
||||
mMeshToTextureMap.erase(geometryToRemove.mDeviceMemPointer);
|
||||
}
|
||||
|
||||
mDeviceMemoryAllocator->deallocate(geometryToRemove.mDeviceMemPointer);
|
||||
mFreeGeometryIndices.setFreeIndex(idx);
|
||||
}
|
||||
|
||||
void PxgGeometryManager::scheduleCopyHtoD(PxgCopyManager& copyMan, PxCudaContext& cudaContext, CUstream stream)
|
||||
{
|
||||
// allocate the proper amount of pinned memory
|
||||
mPinnedMemoryBasePtr = mPinnedMemoryAllocator->allocate(mPinnedHostMemoryRequirements, PxsHeapStats::eNARROWPHASE, PX_FL);
|
||||
PxU8* basePtr = reinterpret_cast<PxU8*>(mPinnedMemoryBasePtr);
|
||||
|
||||
for (PxArray<ScheduledCopyData>::Iterator it = mScheduledCopies.begin(), end = mScheduledCopies.end(); it != end; ++it)
|
||||
{
|
||||
PxU64 byteSize = it->mCopyDesc.bytes;
|
||||
|
||||
PxU8* mem = basePtr;
|
||||
basePtr += byteSize;
|
||||
|
||||
PX_ASSERT((byteSize & 255) == 0);
|
||||
PX_ASSERT(((size_t)mem & 255) == 0);
|
||||
|
||||
// layout the geometry in pinned memory depending on type
|
||||
switch (it->mType)
|
||||
{
|
||||
case UploadGeometryType::eTRIANGLEMESH:
|
||||
{
|
||||
PxgMeshTextureData texData = layOutTriMesh(mem, *reinterpret_cast<const TriangleMesh*>(it->mGeometryPtr), stream);
|
||||
|
||||
if (texData.cuArray)
|
||||
{
|
||||
PX_ASSERT(it->mHullOrTrimeshIdx < mGeometryData.size());
|
||||
mMeshToTextureMap.insert(mGeometryData[it->mHullOrTrimeshIdx].mDeviceMemPointer, texData);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case UploadGeometryType::eCONVEXHULL:
|
||||
layOutHull(mem, *reinterpret_cast<const ConvexHullData*>(it->mGeometryPtr), it->mNumPolyVertices);
|
||||
break;
|
||||
case UploadGeometryType::eHEIGHTFIELD:
|
||||
layOutHeightfield(mem, *reinterpret_cast<const HeightFieldData*>(it->mGeometryPtr));
|
||||
break;
|
||||
case UploadGeometryType::eBOXHULL:
|
||||
layOutBoxHull(mem);
|
||||
break;
|
||||
default:
|
||||
PX_ALWAYS_ASSERT();
|
||||
}
|
||||
|
||||
// set the source pointer in the copy descriptor.
|
||||
it->mCopyDesc.source = reinterpret_cast<size_t>(getMappedDevicePtr(&cudaContext, mem));
|
||||
|
||||
// schedule the copies.
|
||||
PX_ASSERT(it->mHullOrTrimeshIdx < mGeometryData.size());
|
||||
mGeometryData[it->mHullOrTrimeshIdx].mCopyDescIndex = HullOrMeshData::INVALID_COPY_DESC_INDEX;
|
||||
copyMan.pushDeferredHtoD(it->mCopyDesc);
|
||||
}
|
||||
|
||||
mScheduledCopies.forceSize_Unsafe(0);
|
||||
mPinnedHostMemoryRequirements = 0;
|
||||
}
|
||||
|
||||
void PxgGeometryManager::resetAfterMemcpyCompleted()
|
||||
{
|
||||
// AD: we basically never send geometry each frame, and they're usually sized differently anyway, so let's give this memory back
|
||||
// to keep pinned memory usage under control.
|
||||
if (mPinnedMemoryBasePtr)
|
||||
{
|
||||
mPinnedMemoryAllocator->deallocate(mPinnedMemoryBasePtr);
|
||||
mPinnedMemoryBasePtr = NULL;
|
||||
}
|
||||
|
||||
mFreeGeometryIndices.releaseFreeIndices();
|
||||
}
|
||||
|
||||
CUdeviceptr PxgGeometryManager::getGeometryDevPtrByIndex(PxU32 idx) const
|
||||
{
|
||||
PX_ASSERT(idx < mGeometryData.size());
|
||||
|
||||
return reinterpret_cast<CUdeviceptr>(mGeometryData[idx].mDeviceMemPointer);
|
||||
}
|
||||
|
||||
CUdeviceptr PxgGeometryManager::getBoxHullDevPtr() const
|
||||
{
|
||||
PX_ASSERT(mBoxHullIdx != 0xFFffFFff);
|
||||
PX_ASSERT(mBoxHullIdx < mGeometryData.size());
|
||||
|
||||
return reinterpret_cast<CUdeviceptr>(mGeometryData[mBoxHullIdx].mDeviceMemPointer);
|
||||
}
|
||||
|
||||
PxU32 PxgGeometryManager::addTriMesh(const TriangleMesh& triMesh)
|
||||
{
|
||||
PxU64 byteSize = computeTriMeshByteSize(triMesh);
|
||||
|
||||
return addGeometryInternal(byteSize, &triMesh, UploadGeometryType::eTRIANGLEMESH);
|
||||
}
|
||||
|
||||
PxU32 PxgGeometryManager::addHeightfield(const HeightFieldData& hf)
|
||||
{
|
||||
PxU64 byteSize = computeHeightfieldByteSize(hf);
|
||||
|
||||
return addGeometryInternal(byteSize, &hf, UploadGeometryType::eHEIGHTFIELD);
|
||||
}
|
||||
|
||||
|
||||
// static functions
|
||||
static PxU64 computeBoxHullByteSize()
|
||||
{
|
||||
PxU64 byteSize = sizeof(float4) + //center of mass
|
||||
sizeof(PxU32) + // NbEdgesNbHullVerticesNbPolygons
|
||||
3 * sizeof(PxU32) + //pad
|
||||
sizeof(float4) + //extents
|
||||
sizeof(float4) * 8 + //vertices
|
||||
sizeof(float4) * 6 + //planes
|
||||
sizeof(PxU32) * 6 + //vRef8NbVertsMinIndex0
|
||||
sizeof(PxU16) * 2 * 12 + //mVerticesByEdges16
|
||||
sizeof(PxU8) * 2 * 12 + //mFacesByEdges8
|
||||
sizeof(PxU8) * 3 * 8 + //mFacesByVertices8
|
||||
sizeof(PxU8) * 24; //vertexData8
|
||||
|
||||
return byteSize;
|
||||
}
|
||||
|
||||
static PxU64 computeHullByteSize(const ConvexHullData& hull, PxU32& numPolyVertices)
|
||||
{
|
||||
numPolyVertices = 0;
|
||||
|
||||
for (PxU32 i = 0; i < hull.mNbPolygons; ++i)
|
||||
{
|
||||
numPolyVertices += hull.mPolygons[i].mNbVerts;
|
||||
}
|
||||
|
||||
return
|
||||
sizeof(float4) + //center of mass
|
||||
|
||||
sizeof(PxU32) + // NbEdgesNbHullVerticesNbPolygons
|
||||
3 * sizeof(PxU32) + //pad
|
||||
sizeof(float4) + //extents
|
||||
sizeof(float4) * hull.mNbHullVertices +//vertices
|
||||
sizeof(float4) * hull.mNbPolygons +//planes
|
||||
sizeof(PxU32) * hull.mNbPolygons +//vRef8NbVertsMinIndex0
|
||||
sizeof(PxU16) * 2 * hull.mNbEdges +//mVerticesByEdges16
|
||||
sizeof(PxU8) * 2 * hull.mNbEdges +//mFacesByEdges8
|
||||
sizeof(PxU8) * 3 * hull.mNbHullVertices +//mFacesByVertices8
|
||||
sizeof(PxU8) * numPolyVertices;//vertexData8
|
||||
}
|
||||
|
||||
static PxU64 computeTriMeshByteSize(const TriangleMesh& triMesh)
|
||||
{
|
||||
const PxU32 numTris = triMesh.getNbTrianglesFast();
|
||||
const PxU32 numVerts = triMesh.getNbVerticesFast();
|
||||
|
||||
PxU64 meshDataSize =
|
||||
sizeof(uint4) // (nbVerts, nbTris, meshAdjVerticiesTotal, nbBv32TreeNodes)
|
||||
+ sizeof(float4) * numVerts // meshVerts
|
||||
+ sizeof(uint4) * numTris // meshTriIndices
|
||||
+ sizeof(uint4) * numTris // meshTriAdjacencies
|
||||
+ sizeof(PxU32) * numTris // gpu to cpu remap table
|
||||
+ sizeof(PxU32) * numTris * 3 // vertex to triangle remap table
|
||||
+ sizeof(PxU32) * (numVerts + 1) // vertex to triangle offset table
|
||||
;
|
||||
|
||||
// Make sure memory is always 16-byte aligned
|
||||
meshDataSize = (meshDataSize + 15) & ~15;
|
||||
|
||||
meshDataSize += sizeof(uint4); // (sdfDimX, sdfDimY, sdfDimZ, 0)
|
||||
|
||||
const SDF& sdfData = triMesh.getSdfDataFast();
|
||||
const PxU32 numSdfs = sdfData.mNumSdfs;
|
||||
if (numSdfs > 0)
|
||||
{
|
||||
meshDataSize +=
|
||||
+ sizeof(float4) // (meshLower.x, meshLower.y, meshLower.z, spacing)
|
||||
+ sizeof(uint4)
|
||||
+ sizeof(CUtexObject) // SDF texture object reference
|
||||
+ sizeof(CUtexObject)
|
||||
+ sizeof(PxU32) * sdfData.mNumStartSlots;
|
||||
}
|
||||
|
||||
// Make sure memory is always 16-byte aligned
|
||||
meshDataSize = (meshDataSize + 15) & ~15;
|
||||
|
||||
//ML: don't know whether we need to have local bound
|
||||
const PxU64 bv32Size = triMesh.mGRB_BV32Tree->mNbPackedNodes * sizeof(BV32DataPacked);
|
||||
|
||||
return meshDataSize + bv32Size;
|
||||
}
|
||||
|
||||
static PxU64 computeHeightfieldByteSize(const HeightFieldData& hf)
|
||||
{
|
||||
|
||||
/* Height field height data is 16 bit signed integers, followed by triangle materials.
|
||||
|
||||
Each sample is 32 bits wide arranged as follows:
|
||||
|
||||
1) First there is a 16 bit height value.
|
||||
2) Next, two one byte material indices, with the high bit of each byte reserved for special use.
|
||||
(so the material index is only 7 bits).
|
||||
The high bit of material0 is the tess-flag.
|
||||
The high bit of material1 is reserved for future use.
|
||||
*/
|
||||
|
||||
return sizeof(PxU32) //rows
|
||||
+ sizeof(PxU32) //columns
|
||||
+ sizeof(PxU32) * hf.columns * hf.rows
|
||||
+ sizeof(PxU16); //PxHeightFieldFlags
|
||||
}
|
||||
|
||||
static void layOutBoxHull(void* mem)
|
||||
{
|
||||
const float4 vertices[8] = { make_float4(-1.f, 1.f, -1.f, 0.f),
|
||||
make_float4(1.f, 1.f, -1.f, 0.f),
|
||||
make_float4(1.f, -1.f, -1.f, 0.f),
|
||||
make_float4(-1.f, -1.f, -1.f, 0.f),
|
||||
make_float4(-1.f, -1.f, 1.f, 0.f),
|
||||
make_float4(-1.f, 1.f, 1.f, 0.f),
|
||||
make_float4(1.f, -1.f, 1.f, 0.f),
|
||||
make_float4(1.f, 1.f, 1.f, 0.f)
|
||||
};
|
||||
|
||||
const float4 planes[6] = {make_float4(0.f, 0.0f, -1.0f, -1.0f),
|
||||
make_float4(-1.0f, 0.0f, 0.0f, -1.0f),
|
||||
make_float4(0.0f, -1.0f, 0.0f, -1.0f),
|
||||
make_float4(0.0f, 1.0f, 0.0f, -1.0f),
|
||||
make_float4(0.0f, 0.0f, 1.0f, -1.0f),
|
||||
make_float4(1.0f, 0.0f, 0.0f, -1.0f)};
|
||||
|
||||
const PxU32 polyData[6] = {merge(0, 4, 4),
|
||||
merge(4, 4, 1),
|
||||
merge(8, 4, 0),
|
||||
merge(12, 4, 2),
|
||||
merge(16, 4, 0),
|
||||
merge(20, 4, 0)};
|
||||
|
||||
const PxU16 vertsByEdges[24] = { 0, 1, 3, 0, 5, 0, 1, 2, 7, 1, 2, 3, 2, 6, 3, 4, 4, 5, 6, 4, 5, 7, 6, 7 };
|
||||
|
||||
const PxU8 facesByEdges[24] = {
|
||||
0, 3,
|
||||
0, 1,
|
||||
1, 3,
|
||||
0, 5,
|
||||
|
||||
3, 5,
|
||||
0, 2,
|
||||
2, 5,
|
||||
1, 2,
|
||||
|
||||
1, 4,
|
||||
2, 4,
|
||||
3, 4,
|
||||
4, 5
|
||||
};
|
||||
|
||||
const PxU8 facesByVerts[24] = {
|
||||
0, 1, 3,
|
||||
0, 3, 5,
|
||||
0, 2, 5,
|
||||
0, 1, 2,
|
||||
|
||||
1, 2, 4,
|
||||
1, 3, 4,
|
||||
2, 4, 5,
|
||||
3, 4, 5
|
||||
};
|
||||
|
||||
const PxU8 polyIndices[24] = {0, 1, 2, 3,
|
||||
4, 5, 0, 3,
|
||||
4, 3, 2, 6,
|
||||
7, 1, 0, 5,
|
||||
7, 5, 4, 6,
|
||||
7, 6, 2, 1
|
||||
};
|
||||
|
||||
PxU8* m = reinterpret_cast<PxU8*>(mem);
|
||||
*((float4*)m) = make_float4(0.0f, 0.0f, 0.0f, 0.0f);//center of mass
|
||||
m += sizeof(float4);
|
||||
*((PxU32*)m) = merge(12, merge((PxU8) 8, (PxU8) 6));
|
||||
m += 4 * sizeof(PxU32);
|
||||
*((float4*)m) = make_float4(1.0f, //half-extents and the insphere radius
|
||||
1.0f,
|
||||
1.0f,
|
||||
1.0f);
|
||||
|
||||
m += sizeof(float4);
|
||||
PxMemCopy(m, vertices, sizeof(float4) * 8);
|
||||
m += sizeof(float4) * 8;
|
||||
PxMemCopy(m, planes, sizeof(float4) * 6);
|
||||
m += sizeof(float4) * 6;
|
||||
PxMemCopy(m, polyData, sizeof(PxU32) * 6);
|
||||
m += sizeof(PxU32) * 6;
|
||||
PxMemCopy(m, vertsByEdges, sizeof(PxU16) * 2 * 12);
|
||||
m += sizeof(PxU16) * 2 * 12;
|
||||
PxMemCopy(m, facesByEdges, sizeof(PxU8) * 2 * 12);
|
||||
m += sizeof(PxU8) * 2 * 12;
|
||||
PxMemCopy(m, facesByVerts, sizeof(PxU8) * 3 * 8);
|
||||
m += sizeof(PxU8) * 3 * 8;
|
||||
PxMemCopy(m, polyIndices, sizeof(PxU8) * 24);
|
||||
}
|
||||
|
||||
static void layOutHull(void* mem, const ConvexHullData& hull, PxU32 numPolyVertices)
|
||||
{
|
||||
PxU8* m = (PxU8*) mem;
|
||||
*((float4*)m) = make_float4(hull.mCenterOfMass.x, hull.mCenterOfMass.y, hull.mCenterOfMass.z, 0.f);
|
||||
m +=sizeof(float4);
|
||||
*((PxU32*)m) = merge(hull.mNbEdges, merge(hull.mNbHullVertices, hull.mNbPolygons));
|
||||
m += 4 * sizeof(PxU32);
|
||||
*((float4*)m) = make_float4(hull.mInternal.mInternalExtents.x,
|
||||
hull.mInternal.mInternalExtents.y,
|
||||
hull.mInternal.mInternalExtents.z,
|
||||
hull.mInternal.mInternalRadius);
|
||||
|
||||
m += sizeof(float4);
|
||||
|
||||
const PxVec3* verts = hull.getHullVertices();
|
||||
|
||||
for (const PxVec3* end = verts + hull.mNbHullVertices; verts < end; ++verts)
|
||||
{
|
||||
*((float4*)m) = make_float4(verts->x, verts->y, verts->z, 0);
|
||||
m += sizeof(float4);
|
||||
}
|
||||
|
||||
const HullPolygonData* polys = hull.mPolygons;
|
||||
float4* planes = (float4*)m;
|
||||
PxU32* vRef8NbVertsMinIndex = (PxU32*) (m + sizeof(float4) * hull.mNbPolygons);
|
||||
|
||||
for (const HullPolygonData* end = polys + hull.mNbPolygons; polys < end; ++polys)
|
||||
{
|
||||
*(planes++) = make_float4(polys->mPlane.n.x, polys->mPlane.n.y, polys->mPlane.n.z, polys->mPlane.d);
|
||||
*(vRef8NbVertsMinIndex++) = merge(polys->mVRef8, polys->mNbVerts, polys->mMinIndex);
|
||||
}
|
||||
|
||||
m += (sizeof(float4) + sizeof(PxU32)) * hull.mNbPolygons;
|
||||
|
||||
PxMemCopy(m, hull.getVerticesByEdges16(), sizeof(PxU16) * 2*hull.mNbEdges);
|
||||
m += sizeof(PxU16) * 2*hull.mNbEdges;
|
||||
|
||||
PxMemCopy(m, hull.getFacesByEdges8(), sizeof(PxU8) * 2*hull.mNbEdges);
|
||||
m += sizeof(PxU8) * 2*hull.mNbEdges;
|
||||
|
||||
PxMemCopy(m, hull.getFacesByVertices8(), sizeof(PxU8) * 3*hull.mNbHullVertices);
|
||||
m += sizeof(PxU8) * 3*hull.mNbHullVertices;
|
||||
|
||||
PxMemCopy(m, hull.getVertexData8(), sizeof(PxU8) * numPolyVertices);
|
||||
//m += sizeof(PxU8) * numPolyVertices;
|
||||
}
|
||||
|
||||
static PxgMeshTextureData layOutTriMesh(void* mem, const TriangleMesh& triMesh, CUstream stream)
|
||||
{
|
||||
const PxU32 numTris = triMesh.getNbTrianglesFast();
|
||||
const PxU32 numVerts = triMesh.getNbVerticesFast();
|
||||
|
||||
BV32Tree* bv32Tree = triMesh.mGRB_BV32Tree;
|
||||
|
||||
PxU8* m = (PxU8*) mem;
|
||||
*((uint4*)m) = make_uint4(triMesh.getNbVerticesFast(), triMesh.getNbTrianglesFast(), 0, bv32Tree->mNbPackedNodes);
|
||||
m += sizeof(uint4);
|
||||
|
||||
//Midphase
|
||||
PxMemCopy(m, bv32Tree->mPackedNodes, sizeof(BV32DataPacked) * bv32Tree->mNbPackedNodes);
|
||||
m += sizeof(BV32DataPacked) * bv32Tree->mNbPackedNodes;
|
||||
|
||||
PX_ASSERT(bv32Tree->mNbPackedNodes > 0);
|
||||
|
||||
// Core: Mesh data
|
||||
//construct gpu friendly verts
|
||||
float4* grbVerts = reinterpret_cast<float4*>(m);
|
||||
const PxVec3* verts = triMesh.getVerticesFast();
|
||||
|
||||
for (PxU32 i = 0; i < numVerts; ++i)
|
||||
{
|
||||
grbVerts[i].x = verts[i].x;
|
||||
grbVerts[i].y = verts[i].y;
|
||||
grbVerts[i].z = verts[i].z;
|
||||
grbVerts[i].w = 0.f;
|
||||
}
|
||||
|
||||
m += numVerts * sizeof(float4);
|
||||
|
||||
uint4* grbTriInd = reinterpret_cast<uint4*>(m);
|
||||
//copy triangle indices
|
||||
if (triMesh.has16BitIndices())
|
||||
{
|
||||
const PxU16* triInds = reinterpret_cast<PxU16*>(triMesh.mGRB_triIndices);
|
||||
for (PxU32 i = 0; i < numTris; ++i)
|
||||
{
|
||||
grbTriInd[i].x = triInds[3 * i + 0];
|
||||
grbTriInd[i].y = triInds[3 * i + 1];
|
||||
grbTriInd[i].z = triInds[3 * i + 2];
|
||||
grbTriInd[i].w = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const PxU32* triInds = reinterpret_cast<PxU32*>(triMesh.mGRB_triIndices);
|
||||
for (PxU32 i = 0; i < numTris; ++i)
|
||||
{
|
||||
grbTriInd[i].x = triInds[3 * i + 0];
|
||||
grbTriInd[i].y = triInds[3 * i + 1];
|
||||
grbTriInd[i].z = triInds[3 * i + 2];
|
||||
grbTriInd[i].w = 0;
|
||||
}
|
||||
}
|
||||
|
||||
//PxMemCopy(m, triMesh->mGRB_triIndices, numTris * sizeof(uint4));
|
||||
m += numTris * sizeof(uint4);
|
||||
|
||||
//Adjacencies data for contact gen
|
||||
PxMemCopy(m, triMesh.mGRB_triAdjacencies, numTris * sizeof(uint4));
|
||||
m += numTris * sizeof(uint4);
|
||||
|
||||
//GPU to CPU remap table
|
||||
PxMemCopy(m, triMesh.mGRB_faceRemap, numTris * sizeof(PxU32));
|
||||
m += numTris * sizeof(PxU32);
|
||||
|
||||
//GPU to CPU remap table
|
||||
PxMemCopy(m, triMesh.mAccumulatedTrianglesRef, numVerts * sizeof(PxU32));
|
||||
reinterpret_cast<PxU32*>(m)[numVerts] = triMesh.mNbTrianglesReferences;
|
||||
m += (numVerts + 1) * sizeof(PxU32);
|
||||
|
||||
//GPU to CPU remap table
|
||||
PxMemCopy(m, triMesh.mTrianglesReferences, (numTris*3) * sizeof(PxU32));
|
||||
m += (numTris * 3) * sizeof(PxU32);
|
||||
|
||||
// Make sure m is 16-byte aligned
|
||||
m = (PxU8*)(((size_t)m + 15) & ~15);
|
||||
|
||||
// Copy sdf values
|
||||
const SDF& sdfData = triMesh.getSdfDataFast();
|
||||
|
||||
*((uint4*)m) = make_uint4(sdfData.mDims.x, sdfData.mDims.y, sdfData.mDims.z, triMesh.getPreferSDFProjection() ? 1 : 0);
|
||||
m += sizeof(uint4);
|
||||
|
||||
const PxU32 numSdfs = sdfData.mNumSdfs;
|
||||
|
||||
PxgMeshTextureData result;
|
||||
PxMemZero(&result, sizeof(result));
|
||||
if (numSdfs)
|
||||
{
|
||||
const PxVec3 meshLower = sdfData.mMeshLower;
|
||||
const PxReal spacing = sdfData.mSpacing;
|
||||
*((float4*)m) = make_float4(meshLower.x, meshLower.y, meshLower.z, spacing);
|
||||
m += sizeof(float4);
|
||||
|
||||
*((uint4*)m) = make_uint4(sdfData.mSubgridSize, reinterpret_cast<const PxU32&>(sdfData.mSubgridsMinSdfValue), reinterpret_cast<const PxU32&>(sdfData.mSubgridsMaxSdfValue), sdfData.mNumStartSlots);
|
||||
m += sizeof(uint4);
|
||||
|
||||
CUtexObject* pTexture = (CUtexObject*)m;
|
||||
m += sizeof(CUtexObject);
|
||||
|
||||
CUtexObject* pTextureSubgrids = (CUtexObject*)m;
|
||||
m += sizeof(CUtexObject);
|
||||
|
||||
if (sdfData.mSubgridSize > 0)
|
||||
PxMemCopy(m, sdfData.mSubgridStartSlots, sdfData.mNumStartSlots * sizeof(PxU32));
|
||||
m += sizeof(PxU32) * sdfData.mNumStartSlots;
|
||||
|
||||
if (sdfData.mSubgridSize == 0)
|
||||
{
|
||||
CUarray cuArray;
|
||||
createTextureObject<PxReal>(CU_AD_FORMAT_FLOAT, pTexture, cuArray, sdfData.mDims.x, sdfData.mDims.y, sdfData.mDims.z, sdfData.mSdf, stream);
|
||||
|
||||
result.cuArray = cuArray;
|
||||
result.cuTexRef = *pTexture;
|
||||
|
||||
result.cuArraySubgrids = 0;
|
||||
result.cuTexRefSubgrids = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
PxU32 x = sdfData.mDims.x / sdfData.mSubgridSize;
|
||||
PxU32 y = sdfData.mDims.y / sdfData.mSubgridSize;
|
||||
PxU32 z = sdfData.mDims.z / sdfData.mSubgridSize;
|
||||
|
||||
CUarray cuArray;
|
||||
createTextureObject<PxReal>(CU_AD_FORMAT_FLOAT, pTexture, cuArray, x + 1, y + 1, z + 1, sdfData.mSdf, stream);
|
||||
|
||||
CUarray cuArraySubgrids = 0;
|
||||
switch(sdfData.mBytesPerSparsePixel)
|
||||
{
|
||||
case 1:
|
||||
createTextureObject<PxU8>(CU_AD_FORMAT_UNSIGNED_INT8, pTextureSubgrids, cuArraySubgrids,
|
||||
sdfData.mSdfSubgrids3DTexBlockDim.x * (sdfData.mSubgridSize + 1),
|
||||
sdfData.mSdfSubgrids3DTexBlockDim.y * (sdfData.mSubgridSize + 1),
|
||||
sdfData.mSdfSubgrids3DTexBlockDim.z * (sdfData.mSubgridSize + 1), sdfData.mSubgridSdf, stream);
|
||||
break;
|
||||
case 2:
|
||||
createTextureObject<PxU16>(CU_AD_FORMAT_UNSIGNED_INT16, pTextureSubgrids, cuArraySubgrids,
|
||||
sdfData.mSdfSubgrids3DTexBlockDim.x * (sdfData.mSubgridSize + 1),
|
||||
sdfData.mSdfSubgrids3DTexBlockDim.y * (sdfData.mSubgridSize + 1),
|
||||
sdfData.mSdfSubgrids3DTexBlockDim.z * (sdfData.mSubgridSize + 1), reinterpret_cast<PxU16*>(sdfData.mSubgridSdf), stream);
|
||||
break;
|
||||
case 4:
|
||||
createTextureObject<PxReal>(CU_AD_FORMAT_FLOAT, pTextureSubgrids, cuArraySubgrids,
|
||||
sdfData.mSdfSubgrids3DTexBlockDim.x * (sdfData.mSubgridSize + 1),
|
||||
sdfData.mSdfSubgrids3DTexBlockDim.y * (sdfData.mSubgridSize + 1),
|
||||
sdfData.mSdfSubgrids3DTexBlockDim.z * (sdfData.mSubgridSize + 1), reinterpret_cast<PxReal*>(sdfData.mSubgridSdf), stream);
|
||||
break;
|
||||
}
|
||||
|
||||
result.cuArray = cuArray;
|
||||
result.cuTexRef = *pTexture;
|
||||
|
||||
if (cuArraySubgrids)
|
||||
{
|
||||
result.cuArraySubgrids = cuArraySubgrids;
|
||||
result.cuTexRefSubgrids = *pTextureSubgrids;
|
||||
}
|
||||
else
|
||||
{
|
||||
result.cuArraySubgrids = 0;
|
||||
result.cuTexRefSubgrids = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void layOutHeightfield(void* mem, const HeightFieldData& hf)
|
||||
{
|
||||
PxU8* m = (PxU8*) mem;
|
||||
|
||||
PxU32 nbRows = hf.rows;
|
||||
PxU32 nbCols = hf.columns;
|
||||
|
||||
*((PxU32* ) m) = nbRows;
|
||||
m += sizeof(PxU32);
|
||||
|
||||
*((PxU32* ) m) = nbCols;
|
||||
m += sizeof(PxU32);
|
||||
|
||||
PX_ASSERT(sizeof(PxU32) == sizeof(PxHeightFieldSample));
|
||||
PxMemCopy(m, hf.samples, sizeof(PxU32) * nbRows * nbCols);
|
||||
|
||||
m += sizeof(PxU32) * nbRows * nbCols;
|
||||
|
||||
*((PxU16*)m) = hf.flags;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static void createTextureObject(CUarray_format format, CUtexObject*& texture, CUarray& cuArray, PxU32 width, PxU32 height, PxU32 depth, const T* data, CUstream stream)
|
||||
{
|
||||
if (width == 0 || height == 0 || depth == 0)
|
||||
{
|
||||
texture = 0;
|
||||
cuArray = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
CUDA_ARRAY3D_DESCRIPTOR_st arrDesc;
|
||||
arrDesc.Format = format;
|
||||
arrDesc.NumChannels = 1;
|
||||
arrDesc.Width = width;
|
||||
arrDesc.Height = height;
|
||||
arrDesc.Depth = depth;
|
||||
arrDesc.Flags = 0;
|
||||
CUresult r = cuArray3DCreate(&cuArray, &arrDesc);
|
||||
PX_UNUSED(r);
|
||||
PX_ASSERT(r == CUDA_SUCCESS);
|
||||
|
||||
CUDA_MEMCPY3D copyParam;
|
||||
PxMemZero(©Param, sizeof(copyParam));
|
||||
copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY;
|
||||
copyParam.dstArray = cuArray;
|
||||
copyParam.srcMemoryType = CU_MEMORYTYPE_HOST;
|
||||
copyParam.srcHost = data;
|
||||
copyParam.srcPitch = width * sizeof(T);
|
||||
copyParam.srcHeight = height;
|
||||
copyParam.WidthInBytes = copyParam.srcPitch;
|
||||
copyParam.Height = height;
|
||||
copyParam.Depth = depth;
|
||||
|
||||
cuMemcpy3DAsync(©Param, stream);
|
||||
|
||||
CUDA_RESOURCE_DESC resDesc;
|
||||
PxMemZero(&resDesc, sizeof(resDesc));
|
||||
resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
|
||||
resDesc.res.array.hArray = cuArray;
|
||||
|
||||
CUDA_TEXTURE_DESC texDesc;
|
||||
PxMemZero(&texDesc, sizeof(texDesc));
|
||||
texDesc.addressMode[0] = CU_TR_ADDRESS_MODE_CLAMP;
|
||||
texDesc.addressMode[1] = CU_TR_ADDRESS_MODE_CLAMP;
|
||||
texDesc.addressMode[2] = CU_TR_ADDRESS_MODE_CLAMP;
|
||||
|
||||
texDesc.filterMode = CU_TR_FILTER_MODE_LINEAR;
|
||||
|
||||
r = cuTexObjectCreate(texture, &resDesc, &texDesc, NULL);
|
||||
PX_ASSERT(r == CUDA_SUCCESS);
|
||||
}
|
||||
92
engine/third_party/physx/source/gpunarrowphase/src/PxgNarrowphase.cpp
vendored
Normal file
92
engine/third_party/physx/source/gpunarrowphase/src/PxgNarrowphase.cpp
vendored
Normal file
@@ -0,0 +1,92 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "PxgBroadPhase.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
|
||||
extern "C" void initNarrowphaseKernels0();
|
||||
extern "C" void initNarrowphaseKernels1();
|
||||
extern "C" void initNarrowphaseKernels2();
|
||||
extern "C" void initNarrowphaseKernels3();
|
||||
extern "C" void initNarrowphaseKernels4();
|
||||
extern "C" void initNarrowphaseKernels5();
|
||||
extern "C" void initNarrowphaseKernels6();
|
||||
extern "C" void initNarrowphaseKernels7();
|
||||
extern "C" void initNarrowphaseKernels8();
|
||||
extern "C" void initNarrowphaseKernels9();
|
||||
extern "C" void initNarrowphaseKernels10();
|
||||
extern "C" void initNarrowphaseKernels11();
|
||||
extern "C" void initNarrowphaseKernels12();
|
||||
extern "C" void initNarrowphaseKernels13();
|
||||
extern "C" void initNarrowphaseKernels14();
|
||||
extern "C" void initNarrowphaseKernels15();
|
||||
extern "C" void initNarrowphaseKernels16();
|
||||
extern "C" void initNarrowphaseKernels17();
|
||||
extern "C" void initNarrowphaseKernels18();
|
||||
extern "C" void initNarrowphaseKernels19();
|
||||
extern "C" void initNarrowphaseKernels20();
|
||||
extern "C" void initNarrowphaseKernels21();
|
||||
extern "C" void initNarrowphaseKernels22();
|
||||
extern "C" void initNarrowphaseKernels23();
|
||||
extern "C" void initNarrowphaseKernels24();
|
||||
|
||||
void createPxgNarrowphase()
|
||||
{
|
||||
#if !PX_PHYSX_GPU_EXPORTS
|
||||
//this call is needed to force PhysXNarrowphaseGpu linkage as Static Library!
|
||||
initNarrowphaseKernels0();
|
||||
initNarrowphaseKernels1();
|
||||
initNarrowphaseKernels2();
|
||||
initNarrowphaseKernels3();
|
||||
initNarrowphaseKernels4();
|
||||
initNarrowphaseKernels5();
|
||||
initNarrowphaseKernels6();
|
||||
initNarrowphaseKernels7();
|
||||
initNarrowphaseKernels8();
|
||||
initNarrowphaseKernels9();
|
||||
initNarrowphaseKernels10();
|
||||
initNarrowphaseKernels11();
|
||||
initNarrowphaseKernels12();
|
||||
initNarrowphaseKernels13();
|
||||
initNarrowphaseKernels14();
|
||||
initNarrowphaseKernels15();
|
||||
initNarrowphaseKernels16();
|
||||
initNarrowphaseKernels17();
|
||||
initNarrowphaseKernels18();
|
||||
initNarrowphaseKernels19();
|
||||
initNarrowphaseKernels20();
|
||||
initNarrowphaseKernels21();
|
||||
initNarrowphaseKernels22();
|
||||
initNarrowphaseKernels23();
|
||||
initNarrowphaseKernels24();
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
9142
engine/third_party/physx/source/gpunarrowphase/src/PxgNarrowphaseCore.cpp
vendored
Normal file
9142
engine/third_party/physx/source/gpunarrowphase/src/PxgNarrowphaseCore.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1838
engine/third_party/physx/source/gpunarrowphase/src/PxgNphaseImplementationContext.cpp
vendored
Normal file
1838
engine/third_party/physx/source/gpunarrowphase/src/PxgNphaseImplementationContext.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
570
engine/third_party/physx/source/gpunarrowphase/src/PxgShapeManager.cpp
vendored
Normal file
570
engine/third_party/physx/source/gpunarrowphase/src/PxgShapeManager.cpp
vendored
Normal file
@@ -0,0 +1,570 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "PxgShapeManager.h"
|
||||
#include "PxgCopyManager.h"
|
||||
#include "PxgHeapMemAllocator.h"
|
||||
#include "PxgCudaUtils.h"
|
||||
#include "PxNodeIndex.h"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
PxgShapeManager::PxgShapeManager(PxgHeapMemoryAllocatorManager* heapManager) :
|
||||
mHeapManager(heapManager),
|
||||
mHostShapes(PxVirtualAllocator(heapManager->mMappedMemoryAllocators, PxsHeapStats::eNARROWPHASE)),
|
||||
mHostShapesRemapTable(PxVirtualAllocator(heapManager->mMappedMemoryAllocators, PxsHeapStats::eNARROWPHASE)),
|
||||
mHostShapeIdTable(PxVirtualAllocator(heapManager->mMappedMemoryAllocators, PxsHeapStats::eNARROWPHASE)),
|
||||
mHostTransformCacheIdToActorTable(PxVirtualAllocator(heapManager->mMappedMemoryAllocators, PxsHeapStats::eNARROWPHASE)),
|
||||
mGpuShapesBuffer(heapManager, PxsHeapStats::eNARROWPHASE),
|
||||
mGpuShapesRemapTableBuffer(heapManager, PxsHeapStats::eNARROWPHASE),
|
||||
mGpuTransformCacheIdToActorTableBuffer(heapManager, PxsHeapStats::eNARROWPHASE),
|
||||
mGpuRigidIndiceBuffer(heapManager, PxsHeapStats::eNARROWPHASE),
|
||||
mGpuShapeIndiceBuffer(heapManager, PxsHeapStats::eNARROWPHASE),
|
||||
mGpuUnsortedShapeIndicesBuffer(heapManager, PxsHeapStats::eNARROWPHASE),
|
||||
mGpuTempRigidBitIndiceBuffer(heapManager, PxsHeapStats::eNARROWPHASE),
|
||||
mGpuTempRigidIndiceBuffer(heapManager, PxsHeapStats::eNARROWPHASE)
|
||||
{
|
||||
//allocate x4
|
||||
const PxU32 initialSize = 128;
|
||||
mHostShapes.forceSize_Unsafe(0);
|
||||
mHostShapes.reserve(initialSize);
|
||||
mHostShapes.forceSize_Unsafe(initialSize);
|
||||
|
||||
mHostShapesRemapTable.forceSize_Unsafe(0);
|
||||
mHostShapesRemapTable.reserve(initialSize);
|
||||
mHostShapesRemapTable.forceSize_Unsafe(initialSize);
|
||||
|
||||
mHostShapeIdTable.forceSize_Unsafe(0);
|
||||
mHostShapeIdTable.reserve(initialSize);
|
||||
mHostShapeIdTable.forceSize_Unsafe(initialSize);
|
||||
|
||||
mHostTransformCacheIdToActorTable.forceSize_Unsafe(0);
|
||||
mHostTransformCacheIdToActorTable.reserve(initialSize);
|
||||
mHostTransformCacheIdToActorTable.forceSize_Unsafe(initialSize);
|
||||
|
||||
mGpuShapesBuffer.allocate(sizeof(PxgShape)*initialSize, PX_FL);
|
||||
mGpuShapesRemapTableBuffer.allocate(sizeof(PxNodeIndex) * initialSize, PX_FL);
|
||||
mGpuTransformCacheIdToActorTableBuffer.allocate(sizeof(PxActor*) * initialSize, PX_FL);
|
||||
mGpuRigidIndiceBuffer.allocate(sizeof(PxNodeIndex) * initialSize, PX_FL);
|
||||
mGpuShapeIndiceBuffer.allocate(sizeof(PxU32) * initialSize, PX_FL);
|
||||
mGpuUnsortedShapeIndicesBuffer.allocate(sizeof(PxU32) * initialSize, PX_FL);
|
||||
mGpuTempRigidBitIndiceBuffer.allocate(sizeof(PxU32) * initialSize, PX_FL);
|
||||
mGpuTempRigidIndiceBuffer.allocate(sizeof(PxNodeIndex) * initialSize, PX_FL);
|
||||
|
||||
mResizeRequired = false;
|
||||
mTransformCacheResizeRequired = false;
|
||||
mMaxShapeId = -1;
|
||||
mMaxTransformCacheID = -1;
|
||||
|
||||
mHasShapeChanged = false;
|
||||
mHasShapeInstanceChanged = false;
|
||||
}
|
||||
|
||||
void PxgShapeManager::initialize(PxCudaContext* cudaContext, CUstream stream)
|
||||
{
|
||||
cudaContext->memsetD32Async(mGpuShapesRemapTableBuffer.getDevicePtr(), 0xFFFFFFFF, mGpuShapesRemapTableBuffer.getSize()/sizeof(PxU32), stream);
|
||||
cudaContext->memsetD32Async(mGpuRigidIndiceBuffer.getDevicePtr(), 0xFFFFFFFF, mGpuRigidIndiceBuffer.getSize() / sizeof(PxU32), stream);
|
||||
cudaContext->memsetD32Async(mGpuShapeIndiceBuffer.getDevicePtr(), 0xFFFFFFFF, mGpuShapeIndiceBuffer.getSize() / sizeof(PxU32), stream);
|
||||
cudaContext->memsetD32Async(mGpuUnsortedShapeIndicesBuffer.getDevicePtr(), 0xFFFFFFFF, mGpuUnsortedShapeIndicesBuffer.getSize() / sizeof(PxU32), stream);
|
||||
|
||||
}
|
||||
|
||||
PxU32 PxgShapeManager::registerShape(PxgShape& shape)
|
||||
{
|
||||
const PxU32 shapeId = mIdPool.getNewID();
|
||||
|
||||
if (shapeId >= mHostShapes.capacity())
|
||||
{
|
||||
mResizeRequired = true;
|
||||
const PxU32 capacity = shapeId * 2;
|
||||
|
||||
//make sure capacity is x4 because we need to use radix sort to sort shape id based on rigid body index later
|
||||
const PxU32 tempCapacity = (capacity + 3)&(~3);
|
||||
mHostShapes.resize(tempCapacity);
|
||||
mDirtyShapeMap.resize(tempCapacity);
|
||||
}
|
||||
|
||||
mHostShapes[shapeId] = shape;
|
||||
mDirtyShapeMap.growAndSet(shapeId);
|
||||
|
||||
mMaxShapeId = PxMax(PxI32(shapeId), mMaxShapeId);
|
||||
|
||||
mHasShapeChanged = true;
|
||||
|
||||
return shapeId;
|
||||
}
|
||||
|
||||
void PxgShapeManager::registerShapeInstance(const PxNodeIndex& nodeIndex, const PxU32 transformCacheID, PxActor* actor, bool aggregate)
|
||||
{
|
||||
if (transformCacheID >= mHostShapesRemapTable.capacity())
|
||||
{
|
||||
const PxU32 capacity = transformCacheID*2;
|
||||
//make sure capacity is x4 because we need to use radix sort to sort shape id based on rigid body index later
|
||||
const PxU32 tempCapacity = (capacity + 3)&(~3);
|
||||
mTransformCacheResizeRequired = true;
|
||||
mHostShapesRemapTable.resize(tempCapacity);
|
||||
mHostShapeIdTable.resize(tempCapacity);
|
||||
mHostTransformCacheIdToActorTable.resize(tempCapacity);
|
||||
mDirtyTransformCacheMap.resize(tempCapacity);
|
||||
}
|
||||
|
||||
mHostShapesRemapTable[transformCacheID] = nodeIndex;
|
||||
mHostShapeIdTable[transformCacheID] = aggregate? 0xffffffff : transformCacheID;
|
||||
mHostTransformCacheIdToActorTable[transformCacheID] = aggregate ? NULL : actor;
|
||||
mHasShapeInstanceChanged = true;
|
||||
mDirtyTransformCacheMap.growAndSet(transformCacheID);
|
||||
mMaxTransformCacheID = PxMax(PxI32(transformCacheID), mMaxTransformCacheID);
|
||||
}
|
||||
|
||||
void PxgShapeManager::unregisterShape(const PxU32 id)
|
||||
{
|
||||
mDirtyShapeMap.reset(id);
|
||||
mIdPool.deferredFreeID(id);
|
||||
mHasShapeChanged = true;
|
||||
}
|
||||
|
||||
void PxgShapeManager::unregisterShapeInstance(const PxU32 transformCacheID)
|
||||
{
|
||||
mDirtyTransformCacheMap.set(transformCacheID);
|
||||
mHostShapesRemapTable[transformCacheID] = PxNodeIndex(PX_INVALID_NODE);
|
||||
mHostShapeIdTable[transformCacheID] = 0xffffffff;
|
||||
mHostTransformCacheIdToActorTable[transformCacheID] = NULL;
|
||||
mHasShapeInstanceChanged = true;
|
||||
}
|
||||
|
||||
void PxgShapeManager::scheduleCopyHtoD(PxgCopyManager& copyManager, PxCudaContext* cudaContext, CUstream stream)
|
||||
{
|
||||
PX_UNUSED(copyManager);
|
||||
|
||||
const PxU32 maxGrouping = 16;
|
||||
|
||||
if (mHasShapeChanged)
|
||||
{
|
||||
mHasShapeChanged = false;
|
||||
|
||||
if (mResizeRequired)
|
||||
{
|
||||
//Allocate and copy data across
|
||||
mGpuShapesBuffer.allocateCopyOldDataAsync(sizeof(PxgShape)*mHostShapes.capacity(), cudaContext, stream, PX_FL);
|
||||
|
||||
mResizeRequired = false;
|
||||
}
|
||||
|
||||
const PxU32* bits = mDirtyShapeMap.getWords();
|
||||
if (bits)
|
||||
{
|
||||
const PxU32 totalNumOfShapes = mMaxShapeId + 1;
|
||||
const PxU32 numShapes = (totalNumOfShapes + 3) &(~3);
|
||||
|
||||
//make sure the dirty shape map cover x4 case and set those to invalid value
|
||||
for (PxU32 i = totalNumOfShapes; i < numShapes; ++i)
|
||||
{
|
||||
mDirtyShapeMap.growAndSet(i);
|
||||
}
|
||||
|
||||
// PT: ### bitmap iterator pattern
|
||||
const PxU32 lastSetBit = mDirtyShapeMap.findLast();
|
||||
for (PxU32 w = 0; w <= lastSetBit >> 5; ++w)
|
||||
{
|
||||
//b&=b-1 will clear the lowest set bit in b
|
||||
for (PxU32 b = bits[w]; b; )
|
||||
{
|
||||
//dirtyId is the next bit that's set to 1!
|
||||
const PxU32 dirtyId = PxU32(w << 5 | PxLowestSetBit(b));
|
||||
|
||||
void* hostPtr = mHostShapes.begin() + dirtyId;
|
||||
|
||||
PxgCopyManager::CopyDesc desc;
|
||||
desc.source = reinterpret_cast<size_t>(getMappedDevicePtr(cudaContext, hostPtr));
|
||||
desc.dest = reinterpret_cast<size_t>(reinterpret_cast<PxU8*>(mGpuShapesBuffer.getDevicePtr()) + dirtyId * sizeof(PxgShape));
|
||||
desc.bytes = sizeof(PxgShape);
|
||||
|
||||
mDirtyShapeMap.reset(dirtyId);
|
||||
//Now we loop to try and find adjacent bits that are set...
|
||||
PxU32 currIdx = dirtyId + 1;
|
||||
PxU32 groupSize = 1;
|
||||
while (currIdx <= lastSetBit && mDirtyShapeMap.test(currIdx) && groupSize < maxGrouping)
|
||||
{
|
||||
groupSize++;
|
||||
mDirtyShapeMap.reset(currIdx);
|
||||
currIdx++;
|
||||
desc.bytes += sizeof(PxgShape);
|
||||
}
|
||||
|
||||
if (currIdx != (dirtyId + 1))
|
||||
{
|
||||
//get the word from the current bit
|
||||
w = PxMin(currIdx, lastSetBit) >> 5;
|
||||
//reload the world
|
||||
b = bits[w]; //Set a 1 here to make sure the b &= (b-1) in the for loop doesn't remove the current bit we're interested in
|
||||
}
|
||||
else
|
||||
{
|
||||
b &= (b - 1);
|
||||
}
|
||||
|
||||
copyManager.pushDeferredHtoD(desc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mDirtyShapeMap.clear();
|
||||
}
|
||||
|
||||
if (mHasShapeInstanceChanged)
|
||||
{
|
||||
//AD: mHasShapeInstanceChanged needs to persist because computeRigidsToShapes() needs to run if we use direct-API
|
||||
// we lower the flag in PxgNarrowphaseCore::prepareGpuNarrowphase.
|
||||
|
||||
// AD: the resize of the GPU transform cache is inside PxgNarrowphaseCore::prepareGpuNarrowphase.
|
||||
if (mTransformCacheResizeRequired)
|
||||
{
|
||||
PxU64 oldCapacity = mGpuShapesRemapTableBuffer.getSize();
|
||||
mGpuShapesRemapTableBuffer.allocateCopyOldDataAsync(sizeof(PxNodeIndex)*mHostShapesRemapTable.capacity(), cudaContext, stream, PX_FL);
|
||||
cudaContext->memsetD32Async(mGpuShapesRemapTableBuffer.getDevicePtr() + oldCapacity, 0xFFFFFFFF, (mGpuShapesRemapTableBuffer.getSize() - oldCapacity) / sizeof(PxU32), stream);
|
||||
|
||||
oldCapacity = mGpuRigidIndiceBuffer.getSize();
|
||||
mGpuRigidIndiceBuffer.allocateCopyOldDataAsync(sizeof(PxNodeIndex) * mHostShapesRemapTable.capacity(), cudaContext, stream, PX_FL);
|
||||
cudaContext->memsetD32Async(mGpuRigidIndiceBuffer.getDevicePtr() + oldCapacity, 0xFFFFFFFF, (mGpuRigidIndiceBuffer.getSize() - oldCapacity) / sizeof(PxU32), stream);
|
||||
|
||||
mGpuTempRigidIndiceBuffer.allocate(sizeof(PxNodeIndex) * mHostShapesRemapTable.capacity(), PX_FL);
|
||||
|
||||
oldCapacity = mGpuShapeIndiceBuffer.getSize();
|
||||
mGpuShapeIndiceBuffer.allocateCopyOldDataAsync(sizeof(PxU32) * mHostShapeIdTable.capacity(), cudaContext, stream, PX_FL);
|
||||
cudaContext->memsetD32Async(mGpuShapeIndiceBuffer.getDevicePtr() + oldCapacity, 0xFFFFFFFF, (mGpuShapeIndiceBuffer.getSize() - oldCapacity) / sizeof(PxU32), stream);
|
||||
|
||||
oldCapacity = mGpuUnsortedShapeIndicesBuffer.getSize();
|
||||
mGpuUnsortedShapeIndicesBuffer.allocateCopyOldDataAsync(sizeof(PxU32) * mHostShapeIdTable.capacity(), cudaContext, stream, PX_FL);
|
||||
cudaContext->memsetD32Async(mGpuUnsortedShapeIndicesBuffer.getDevicePtr() + oldCapacity, 0xFFFFFFFF, (mGpuUnsortedShapeIndicesBuffer.getSize() - oldCapacity) / sizeof(PxU32), stream);
|
||||
|
||||
|
||||
mGpuTempRigidBitIndiceBuffer.allocate(sizeof(PxU32) * mHostShapeIdTable.capacity(), PX_FL);
|
||||
|
||||
oldCapacity = mGpuTransformCacheIdToActorTableBuffer.getSize();
|
||||
mGpuTransformCacheIdToActorTableBuffer.allocateCopyOldDataAsync(sizeof(PxActor*) * mHostTransformCacheIdToActorTable.capacity(), cudaContext, stream, PX_FL);
|
||||
cudaContext->memsetD32Async(mGpuTransformCacheIdToActorTableBuffer.getDevicePtr() + oldCapacity, 0, (mGpuTransformCacheIdToActorTableBuffer.getSize() - oldCapacity) / sizeof(PxActor*), stream);
|
||||
|
||||
mTransformCacheResizeRequired = false;
|
||||
}
|
||||
|
||||
const PxU32 totalNumOfShapeInstances = mMaxTransformCacheID + 1;
|
||||
const PxU32 numShapeInstances = (totalNumOfShapeInstances + 3) &(~3);
|
||||
|
||||
//make sure the dirty shape map cover x4 case and set those to invalid value
|
||||
for (PxU32 i = totalNumOfShapeInstances; i < numShapeInstances; ++i)
|
||||
{
|
||||
if (!mHostShapesRemapTable[i].isStaticBody())
|
||||
{
|
||||
mDirtyTransformCacheMap.growAndSet(i);
|
||||
mHostShapesRemapTable[i] = PxNodeIndex(PX_INVALID_NODE);
|
||||
mHostShapeIdTable[i] = 0xffffffff;
|
||||
mHostTransformCacheIdToActorTable[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
const PxU32* bits = mDirtyTransformCacheMap.getWords();
|
||||
|
||||
if (bits)
|
||||
{
|
||||
// PT: ### bitmap iterator pattern
|
||||
const PxU32 lastSetBit = mDirtyTransformCacheMap.findLast();
|
||||
for (PxU32 w = 0; w <= lastSetBit >> 5; ++w)
|
||||
{
|
||||
//b&=b-1 will clear the lowest set bit in b
|
||||
for (PxU32 b = bits[w]; b; )
|
||||
{
|
||||
//dirtyId is the next bit that's set to 1!
|
||||
const PxU32 dirtyId = PxU32(w << 5 | PxLowestSetBit(b));
|
||||
|
||||
void* hostRemapPtr = mHostShapesRemapTable.begin() + dirtyId;
|
||||
|
||||
void* hostShapeIdPtr = mHostShapeIdTable.begin() + dirtyId;
|
||||
|
||||
void* hostTransformCacheIdToActorPtr = mHostTransformCacheIdToActorTable.begin() + dirtyId;
|
||||
|
||||
PxgCopyManager::CopyDesc desc1;
|
||||
desc1.source = reinterpret_cast<size_t>(getMappedDevicePtr(cudaContext, hostRemapPtr));
|
||||
desc1.dest = reinterpret_cast<size_t>(reinterpret_cast<PxU8*>(mGpuShapesRemapTableBuffer.getDevicePtr()) + dirtyId * sizeof(PxNodeIndex));
|
||||
desc1.bytes = sizeof(PxNodeIndex);
|
||||
|
||||
PxgCopyManager::CopyDesc desc2;
|
||||
desc2.source = reinterpret_cast<size_t>(getMappedDevicePtr(cudaContext, hostRemapPtr));
|
||||
desc2.dest = reinterpret_cast<size_t>(reinterpret_cast<PxU8*>(mGpuRigidIndiceBuffer.getDevicePtr()) + dirtyId * sizeof(PxNodeIndex));
|
||||
desc2.bytes = sizeof(PxNodeIndex);
|
||||
|
||||
PxgCopyManager::CopyDesc desc3;
|
||||
desc3.source = reinterpret_cast<size_t>(getMappedDevicePtr(cudaContext, hostShapeIdPtr));
|
||||
desc3.dest = reinterpret_cast<size_t>(reinterpret_cast<PxU8*>(mGpuUnsortedShapeIndicesBuffer.getDevicePtr()) + dirtyId * sizeof(PxU32));
|
||||
desc3.bytes = sizeof(PxU32);
|
||||
|
||||
PxgCopyManager::CopyDesc desc4;
|
||||
desc4.source = reinterpret_cast<size_t>(getMappedDevicePtr(cudaContext, hostTransformCacheIdToActorPtr));
|
||||
desc4.dest = reinterpret_cast<size_t>(reinterpret_cast<PxU8*>(mGpuTransformCacheIdToActorTableBuffer.getDevicePtr()) + dirtyId * sizeof(PxActor*));
|
||||
desc4.bytes = sizeof(PxActor*);
|
||||
|
||||
mDirtyTransformCacheMap.reset(dirtyId);
|
||||
//Now we loop to try and find adjacent bits that are set...
|
||||
PxU32 currIdx = dirtyId + 1;
|
||||
PxU32 groupSize = 1;
|
||||
while (currIdx <= lastSetBit && mDirtyTransformCacheMap.test(currIdx) && groupSize < maxGrouping)
|
||||
{
|
||||
groupSize++;
|
||||
mDirtyTransformCacheMap.reset(currIdx);
|
||||
currIdx++;
|
||||
desc1.bytes += sizeof(PxNodeIndex);
|
||||
desc2.bytes += sizeof(PxNodeIndex);
|
||||
desc3.bytes += sizeof(PxU32);
|
||||
desc4.bytes += sizeof(PxActor*);
|
||||
}
|
||||
|
||||
if (currIdx != (dirtyId + 1))
|
||||
{
|
||||
//get the word from the current bit
|
||||
w = PxMin(currIdx, lastSetBit) >> 5;
|
||||
//reload the world
|
||||
b = bits[w]; //Set a 1 here to make sure the b &= (b-1) in the for loop doesn't remove the current bit we're interested in
|
||||
}
|
||||
else
|
||||
{
|
||||
b &= (b - 1);
|
||||
}
|
||||
|
||||
copyManager.pushDeferredHtoD(desc1);
|
||||
copyManager.pushDeferredHtoD(desc2);
|
||||
copyManager.pushDeferredHtoD(desc3);
|
||||
copyManager.pushDeferredHtoD(desc4);
|
||||
}
|
||||
}
|
||||
}
|
||||
mDirtyTransformCacheMap.clear();
|
||||
}
|
||||
}
|
||||
|
||||
void PxgShapeManager::updateShapeMaterial(const PxU32 materialIndex, const PxU32 id)
|
||||
{
|
||||
PX_ASSERT(id < mHostShapes.size());
|
||||
mHostShapes[id].materialIndex = materialIndex;
|
||||
mDirtyShapeMap.growAndSet(id);
|
||||
mHasShapeChanged = true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
PxgMaterialManager::PxgMaterialManager(PxgHeapMemoryAllocatorManager* heapManager, const PxU32 elemSize) :
|
||||
mGpuMaterialBuffer(heapManager, PxsHeapStats::eNARROWPHASE),
|
||||
mHeapManager(heapManager),
|
||||
mHostMaterial(PxVirtualAllocator(heapManager->mMappedMemoryAllocators, PxsHeapStats::eNARROWPHASE))
|
||||
{
|
||||
const PxU32 originalSize = elemSize * 128;
|
||||
mHostMaterial.forceSize_Unsafe(0);
|
||||
mHostMaterial.reserve(originalSize);
|
||||
mHostMaterial.forceSize_Unsafe(originalSize);
|
||||
|
||||
mGpuMaterialBuffer.allocate(originalSize, PX_FL);
|
||||
mResizeRequired = false;
|
||||
}
|
||||
|
||||
PxU32 PxgMaterialManager::registerMaterial(const PxU8* materialData, const PxU32 elemSize)
|
||||
{
|
||||
const PxU32 shapeId = mIdPool.getNewID();
|
||||
PxU32 capacity = mHostMaterial.capacity() / elemSize;
|
||||
|
||||
if (shapeId >= capacity)
|
||||
{
|
||||
capacity = PxMax(capacity * 2 + 1, shapeId + 1);
|
||||
mHostMaterial.resize(capacity * elemSize);
|
||||
mResizeRequired = true;
|
||||
}
|
||||
|
||||
PxU8* destPtr = mHostMaterial.begin() + shapeId * elemSize;
|
||||
PxMemCopy(destPtr, materialData, elemSize);
|
||||
|
||||
mDirtyMaterialMap.growAndSet(shapeId);
|
||||
|
||||
return shapeId;
|
||||
}
|
||||
|
||||
void PxgMaterialManager::unregisterMaterial(const PxU32 id)
|
||||
{
|
||||
mDirtyMaterialMap.reset(id);
|
||||
mIdPool.deferredFreeID(id);
|
||||
}
|
||||
|
||||
void PxgMaterialManager::scheduleCopyHtoD(PxgCopyManager& copyManager, PxCudaContext* cudaContext,
|
||||
CUstream stream, const PxU32 elemSize)
|
||||
{
|
||||
if (mResizeRequired)
|
||||
{
|
||||
mGpuMaterialBuffer.allocateCopyOldDataAsync(mHostMaterial.capacity(), cudaContext, stream, PX_FL);
|
||||
mResizeRequired = false;
|
||||
}
|
||||
const PxU32* bits = mDirtyMaterialMap.getWords();
|
||||
|
||||
const PxU32 maxGrouping = 16;
|
||||
|
||||
if (bits)
|
||||
{
|
||||
// PT: ### bitmap iterator pattern
|
||||
const PxU32 lastSetBit = mDirtyMaterialMap.findLast();
|
||||
for (PxU32 w = 0; w <= lastSetBit >> 5; ++w)
|
||||
{
|
||||
//b&=b-1 will clear the lowest set bit in b
|
||||
for (PxU32 b = bits[w]; b; )
|
||||
{
|
||||
//dirtyId is the next bit that's set to 1!
|
||||
const PxU32 dirtyId = PxU32(w << 5 | PxLowestSetBit(b));
|
||||
|
||||
void* hostPtr = mHostMaterial.begin() + dirtyId * elemSize;
|
||||
|
||||
PxgCopyManager::CopyDesc desc;
|
||||
desc.source = reinterpret_cast<size_t>(getMappedDevicePtr(cudaContext, hostPtr));
|
||||
desc.dest = reinterpret_cast<size_t>(reinterpret_cast<PxU8*>(mGpuMaterialBuffer.getDevicePtr()) + dirtyId * elemSize);
|
||||
desc.bytes = elemSize;
|
||||
|
||||
mDirtyMaterialMap.reset(dirtyId);
|
||||
|
||||
//Now we loop to try and find adjacent bits that are set...
|
||||
PxU32 currIdx = dirtyId + 1;
|
||||
PxU32 groupSize = 1;
|
||||
while (currIdx <= lastSetBit && mDirtyMaterialMap.test(currIdx) && (groupSize < maxGrouping))
|
||||
{
|
||||
groupSize++;
|
||||
mDirtyMaterialMap.reset(currIdx);
|
||||
currIdx++;
|
||||
desc.bytes += elemSize;
|
||||
}
|
||||
|
||||
if (currIdx != (dirtyId + 1))
|
||||
{
|
||||
//get the word from the current bit
|
||||
w = PxMin(currIdx, lastSetBit) >> 5;
|
||||
//reload the world
|
||||
b = bits[w]; //Set a 1 here to make sure the b &= (b-1) in the for loop doesn't remove the current bit we're interested in
|
||||
}
|
||||
else
|
||||
{
|
||||
b &= (b - 1);
|
||||
}
|
||||
|
||||
copyManager.pushDeferredHtoD(desc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mDirtyMaterialMap.clear();
|
||||
}
|
||||
|
||||
void PxgMaterialManager::updateMaterial(const PxU8* materialCore, const PxU32 elemSize, const PxU32 id)
|
||||
{
|
||||
PX_ASSERT(id < mHostMaterial.size());
|
||||
PxU8* destptr = reinterpret_cast<PxU8*>(mHostMaterial.begin() + id * elemSize);
|
||||
PxMemCopy(destptr, materialCore, elemSize);
|
||||
//mHostMaterial[id] = materialCore;
|
||||
mDirtyMaterialMap.growAndSet(id);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
PxgFEMMaterialManager::PxgFEMMaterialManager(PxgHeapMemoryAllocatorManager* heapManager, const PxU32 elemSize) :
|
||||
PxgMaterialManager(heapManager, elemSize)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void PxgFEMMaterialManager::scheduleCopyHtoD(PxgCopyManager& copyManager, PxCudaContext* cudaContext,
|
||||
CUstream stream, const PxU32 elemSize)
|
||||
{
|
||||
if (mResizeRequired)
|
||||
{
|
||||
mGpuMaterialBuffer.allocateCopyOldDataAsync(mHostMaterial.capacity(), cudaContext, stream, PX_FL);
|
||||
mResizeRequired = false;
|
||||
}
|
||||
|
||||
|
||||
const PxU32* bits = mDirtyMaterialMap.getWords();
|
||||
|
||||
const PxU32 maxGrouping = 16;
|
||||
|
||||
if (bits)
|
||||
{
|
||||
// PT: ### bitmap iterator pattern
|
||||
const PxU32 lastSetBit = mDirtyMaterialMap.findLast();
|
||||
for (PxU32 w = 0; w <= lastSetBit >> 5; ++w)
|
||||
{
|
||||
//b&=b-1 will clear the lowest set bit in b
|
||||
for (PxU32 b = bits[w]; b; )
|
||||
{
|
||||
//dirtyId is the next bit that's set to 1!
|
||||
const PxU32 dirtyId = PxU32(w << 5 | PxLowestSetBit(b));
|
||||
|
||||
void* hostPtr = mHostMaterial.begin() + dirtyId * elemSize;
|
||||
|
||||
PxgCopyManager::CopyDesc desc;
|
||||
desc.source = reinterpret_cast<size_t>(getMappedDevicePtr(cudaContext, hostPtr));
|
||||
desc.dest = reinterpret_cast<size_t>(reinterpret_cast<PxU8*>(mGpuMaterialBuffer.getDevicePtr()) + dirtyId * elemSize);
|
||||
desc.bytes = elemSize;
|
||||
|
||||
mDirtyMaterialMap.reset(dirtyId);
|
||||
|
||||
//Now we loop to try and find adjacent bits that are set...
|
||||
PxU32 currIdx = dirtyId + 1;
|
||||
PxU32 groupSize = 1;
|
||||
while (currIdx <= lastSetBit && mDirtyMaterialMap.test(currIdx) && (groupSize < maxGrouping))
|
||||
{
|
||||
groupSize++;
|
||||
mDirtyMaterialMap.reset(currIdx);
|
||||
currIdx++;
|
||||
desc.bytes += elemSize;
|
||||
}
|
||||
|
||||
if (currIdx != (dirtyId + 1))
|
||||
{
|
||||
//get the word from the current bit
|
||||
w = PxMin(currIdx, lastSetBit) >> 5;
|
||||
//reload the world
|
||||
b = bits[w]; //Set a 1 here to make sure the b &= (b-1) in the for loop doesn't remove the current bit we're interested in
|
||||
}
|
||||
else
|
||||
{
|
||||
b &= (b - 1);
|
||||
}
|
||||
|
||||
copyManager.pushDeferredHtoD(desc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mDirtyMaterialMap.clear();
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
PxgFEMSoftBodyMaterialManager::PxgFEMSoftBodyMaterialManager(PxgHeapMemoryAllocatorManager* heapManager) :
|
||||
PxgFEMMaterialManager(heapManager, sizeof(PxsDeformableVolumeMaterialData))
|
||||
{
|
||||
}
|
||||
Reference in New Issue
Block a user