feat(physics): wire physx sdk into build
This commit is contained in:
200
engine/third_party/physx/source/gpusimulationcontroller/include/PxgAlgorithms.h
vendored
Normal file
200
engine/third_party/physx/source/gpusimulationcontroller/include/PxgAlgorithms.h
vendored
Normal file
@@ -0,0 +1,200 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_ALGORITHMS_H
|
||||
#define PXG_ALGORITHMS_H
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "PxgAlgorithmsData.h"
|
||||
#include "PxgKernelLauncher.h"
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
namespace physx
|
||||
{
|
||||
#endif
|
||||
|
||||
class PxgKernelLauncher;
|
||||
|
||||
/**
|
||||
\brief Performs a sort operation on the GPU
|
||||
*/
|
||||
template<typename T>
|
||||
class PxGpuRadixSort
|
||||
{
|
||||
protected:
|
||||
PxgKernelLauncher* mKernelLauncher;
|
||||
|
||||
PxU32 mTempBufferSize;
|
||||
|
||||
PxInt4x4* mTempBlockSumsGpuPtr;
|
||||
PxInt4x4* mTempBlockSumScanGpuPtr;
|
||||
PxInt4x4* mTotalSum;
|
||||
|
||||
PxU32 mNumThreadsPerBlock = 0;
|
||||
PxU32 mNumElements = 0;
|
||||
|
||||
T* mReorderBuffer;
|
||||
PxU16* mOffsetBuffer;
|
||||
|
||||
//Optional, reorder buffer when sorting key-value pairs, uses lazy initialization
|
||||
PxU32* mValueReorderBuffer;
|
||||
|
||||
public:
|
||||
/**
|
||||
\brief Empty constructor which allows creating uninitialized objects
|
||||
*/
|
||||
PxGpuRadixSort() : mTempBlockSumsGpuPtr(NULL), mTempBlockSumScanGpuPtr(NULL), mTotalSum(NULL), mValueReorderBuffer(NULL) {}
|
||||
|
||||
/**
|
||||
\brief Constructor that initializes and allocates all internal data
|
||||
|
||||
\param[in] cudaContextManager The cuda context manager
|
||||
\param[in] numElements The maximum number of elements that can be processed by this gpu sort instance
|
||||
\param[in] numThreadsPerBlock The number of threads applied per block when scheduling the gpu work
|
||||
*/
|
||||
PxGpuRadixSort(PxgKernelLauncher* cudaContextManager, PxU32 numElements, PxU32 numThreadsPerBlock = 512);
|
||||
|
||||
/**
|
||||
\brief Initializes and allocates all internal data
|
||||
|
||||
\param[in] cudaContextManager The cuda context manager
|
||||
\param[in] numElements The maximum number of elements that can be processed by this gpu sort instance
|
||||
\param[in] numThreadsPerBlock The number of threads applied per block when scheduling the gpu work
|
||||
*/
|
||||
virtual bool initialize(PxgKernelLauncher* cudaContextManager, PxU32 numElements, PxU32 numThreadsPerBlock = 512);
|
||||
|
||||
/**
|
||||
\brief Sorts the integer array in place
|
||||
|
||||
\param[in,out] inAndOutBuffer Gpu array with the integer data which gets sorted
|
||||
\param[in] numBitsToSort The number of bits to sort. For 32bit integers where it is known that only 24 bits are used at most, it is sufficient to sort 24 bits only.
|
||||
\param[in] stream Gpu stream on which the calculation is scheduled. To be sure that the sort finished, a synchronize call must be executed on that stream.
|
||||
\param[in] outReorderTrackingBuffer Optional: Gpu tracking buffer that contains the original location in the unsorted array for every element after the sorting completed.
|
||||
\param[in] numElementsToSort Optional: The number of elements that should get sorted. By default all elements are processed. The maximal number of elements is specified in the constructor.
|
||||
*/
|
||||
virtual void sort(T* inAndOutBuffer, PxU32 numBitsToSort, const CUstream& stream, PxU32* outReorderTrackingBuffer = NULL, PxU32 numElementsToSort = 0xFFFFFFFF);
|
||||
|
||||
/**
|
||||
\brief Releases all internal data
|
||||
*/
|
||||
virtual bool release();
|
||||
|
||||
virtual ~PxGpuRadixSort() { }
|
||||
};
|
||||
|
||||
/**
|
||||
\brief Performs a scan operation (exclusive or inclusive cumulative sum) on the GPU
|
||||
*/
|
||||
class PxGpuScan
|
||||
{
|
||||
private:
|
||||
PxU32 mTempBufferSize;
|
||||
PxU32* mTempBlockSumsGpuPtr;
|
||||
PxU32* mTempBlockSumScanGpuPtr;
|
||||
PxU32* mTotalSum;
|
||||
PxU32 mNumThreadsPerBlock = 0;
|
||||
PxU32 mNumElements = 0;
|
||||
|
||||
PxgKernelLauncher* mKernelLauncher;
|
||||
|
||||
void scan(PxU32* inAndOutBuf, PxU32 exclusiveScan, const CUstream& stream, PxU32 numElementsToScan);
|
||||
void sumOnly(PxU32* inBuf, const CUstream& stream, PxU32 numElementsToScan);
|
||||
|
||||
public:
|
||||
/**
|
||||
\brief Empty constructor which allows creating uninitialized objects
|
||||
*/
|
||||
PxGpuScan() : mTempBlockSumsGpuPtr(NULL), mTempBlockSumScanGpuPtr(NULL), mTotalSum(NULL) {}
|
||||
|
||||
/**
|
||||
\brief Constructor that initializes and allocates all internal data
|
||||
|
||||
\param[in] cudaContextManager The cuda context manager
|
||||
\param[in] numElements The maximum number of elements that can be processed by this gpu scan instance
|
||||
\param[in] numThreadsPerBlock The number of threads applied per block when scheduling the gpu work
|
||||
*/
|
||||
PxGpuScan(PxgKernelLauncher* cudaContextManager, PxU32 numElements, PxU32 numThreadsPerBlock = 512);
|
||||
|
||||
/**
|
||||
\brief Initializes and allocates all internal data
|
||||
|
||||
\param[in] cudaContextManager The cuda context manager
|
||||
\param[in] numElements The maximum number of elements that can be processed by this gpu scan instance
|
||||
\param[in] numThreadsPerBlock The number of threads applied per block when scheduling the gpu work
|
||||
*/
|
||||
bool initialize(PxgKernelLauncher* cudaContextManager, PxU32 numElements, PxU32 numThreadsPerBlock = 512);
|
||||
|
||||
/**
|
||||
\brief Allows to access to total sum of all elements that took part in the scan operation
|
||||
|
||||
\return A gpu pointer to the total sum. Only contains valid data after a scan operation finished.
|
||||
*/
|
||||
PX_FORCE_INLINE PxU32* getSumPointer()
|
||||
{
|
||||
return mTotalSum;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Performs an exclusive scan in place on the given array
|
||||
|
||||
\param[in,out] inAndOutBuf Gpu array with the integer data which gets transformed into its exclusive cumulative sum
|
||||
\param[in] stream Gpu stream on which the calculation is scheduled. To be sure that the scan finished, a synchronize call must be executed on that stream.
|
||||
\param[in] numElementsToScan Optional: The number of elements that should get scanned. By default all elements are processed. The maximal number of elements is specified in the constructor.
|
||||
*/
|
||||
PX_FORCE_INLINE void exclusiveScan(PxU32* inAndOutBuf, const CUstream& stream, PxU32 numElementsToScan = 0xFFFFFFFF)
|
||||
{
|
||||
const PxU32 exclusiveScan = 1;
|
||||
scan(inAndOutBuf, exclusiveScan, stream, numElementsToScan);
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Performs an inclusive scan in place on the given array
|
||||
|
||||
\param[in,out] inAndOutBuf Gpu array with the integer data which gets transformed into its inclusive cumulative sum
|
||||
\param[in] stream Gpu stream on which the calculation is scheduled. To be sure that the scan finished, a synchronize call must be executed on that stream.
|
||||
\param[in] numElementsToScan The number of elements that should get scanned. By default all elements are processed. The maximal number of elements is specified in the constructor.
|
||||
*/
|
||||
PX_FORCE_INLINE void inclusiveScan(PxU32* inAndOutBuf, const CUstream& stream, PxU32 numElementsToScan = 0xFFFFFFFF)
|
||||
{
|
||||
const PxU32 exclusiveScan = 0;
|
||||
scan(inAndOutBuf, exclusiveScan, stream, numElementsToScan);
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Releases all internal data
|
||||
*/
|
||||
bool release();
|
||||
|
||||
~PxGpuScan() { }
|
||||
};
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
} // namespace physx
|
||||
#endif
|
||||
|
||||
#endif
|
||||
71
engine/third_party/physx/source/gpusimulationcontroller/include/PxgAlgorithmsData.h
vendored
Normal file
71
engine/third_party/physx/source/gpusimulationcontroller/include/PxgAlgorithmsData.h
vendored
Normal file
@@ -0,0 +1,71 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_ALGORITHMS_DATA_H
|
||||
#define PXG_ALGORITHMS_DATA_H
|
||||
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
namespace physx
|
||||
{
|
||||
#endif
|
||||
|
||||
/**
|
||||
\brief An integer vector with 4 components
|
||||
*/
|
||||
PX_ALIGN_PREFIX(16) struct PxInt4
|
||||
{
|
||||
PxI32 x;
|
||||
PxI32 y;
|
||||
PxI32 z;
|
||||
PxI32 w;
|
||||
|
||||
/**
|
||||
\brief Comparison operator to check if two instances are equal
|
||||
*/
|
||||
bool operator==(const PxInt4& rhs) const
|
||||
{
|
||||
return x == rhs.x && y == rhs.y && z == rhs.z && w == rhs.w;
|
||||
}
|
||||
}PX_ALIGN_SUFFIX(16);
|
||||
|
||||
/**
|
||||
\brief An bundle of four integer vectors with 4 components each
|
||||
*/
|
||||
PX_ALIGN_PREFIX(16) struct PxInt4x4
|
||||
{
|
||||
PxInt4 data[4];
|
||||
}PX_ALIGN_SUFFIX(16);
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
} // namespace physx
|
||||
#endif
|
||||
|
||||
#endif
|
||||
135
engine/third_party/physx/source/gpusimulationcontroller/include/PxgAnisotropy.h
vendored
Normal file
135
engine/third_party/physx/source/gpusimulationcontroller/include/PxgAnisotropy.h
vendored
Normal file
@@ -0,0 +1,135 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_ANISOTROPY_H
|
||||
#define PXG_ANISOTROPY_H
|
||||
|
||||
|
||||
#include "PxAnisotropy.h"
|
||||
#include "PxgAnisotropyData.h"
|
||||
#include "foundation/PxUserAllocated.h"
|
||||
#include "PxgKernelLauncher.h"
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
namespace physx
|
||||
{
|
||||
#endif
|
||||
|
||||
#if PX_SUPPORT_GPU_PHYSX
|
||||
|
||||
class PxgAnisotropyGenerator : public PxAnisotropyGenerator, public PxUserAllocated
|
||||
{
|
||||
PxgKernelLauncher mKernelLauncher;
|
||||
PxAnisotropyData mAnisotropyDataHost;
|
||||
PxAnisotropyData* mAnisotropyDataPerParticleSystemDevice;
|
||||
PxU32 mNumParticles;
|
||||
bool mDirty;
|
||||
bool mOwnsAnisotropyGPUBuffers;
|
||||
PxVec4* mAnisotropy1;
|
||||
PxVec4* mAnisotropy2;
|
||||
PxVec4* mAnisotropy3;
|
||||
bool mEnabled;
|
||||
|
||||
void releaseGPUAnisotropyBuffers();
|
||||
|
||||
void allocateGPUAnisotropyBuffers();
|
||||
|
||||
public:
|
||||
|
||||
PxgAnisotropyGenerator(PxgKernelLauncher& cudaContextManager, PxU32 maxNumParticles, PxReal anisotropyScale, PxReal minAnisotropy, PxReal maxAnisotropy);
|
||||
|
||||
virtual ~PxgAnisotropyGenerator() { }
|
||||
|
||||
virtual void setAnisotropyMax(float maxAnisotropy)
|
||||
{
|
||||
mAnisotropyDataHost.mAnisotropyMax = maxAnisotropy;
|
||||
mDirty = true;
|
||||
}
|
||||
|
||||
virtual void setAnisotropyMin(float minAnisotropy)
|
||||
{
|
||||
mAnisotropyDataHost.mAnisotropyMin = minAnisotropy;
|
||||
mDirty = true;
|
||||
}
|
||||
|
||||
virtual void setAnisotropyScale(float anisotropyScale)
|
||||
{
|
||||
mAnisotropyDataHost.mAnisotropy = anisotropyScale;
|
||||
mDirty = true;
|
||||
}
|
||||
|
||||
virtual void release();
|
||||
|
||||
virtual void setResultBufferHost(PxVec4* anisotropy1, PxVec4* anisotropy2, PxVec4* anisotropy3);
|
||||
|
||||
virtual void setResultBufferDevice(PxVec4* anisotropy1, PxVec4* anisotropy2, PxVec4* anisotropy3);
|
||||
|
||||
virtual void generateAnisotropy(PxGpuParticleSystem* gpuParticleSystem, PxU32 numParticles, CUstream stream);
|
||||
|
||||
virtual void generateAnisotropy(PxVec4* particlePositionsGpu, PxParticleNeighborhoodProvider& neighborhoodProvider, PxU32 numParticles, PxReal particleContactOffset, CUstream stream);
|
||||
|
||||
virtual PxU32 getMaxParticles() const
|
||||
{
|
||||
return mNumParticles;
|
||||
}
|
||||
|
||||
virtual void setMaxParticles(PxU32 maxParticles);
|
||||
|
||||
virtual PxVec4* getAnisotropy1DevicePointer() const
|
||||
{
|
||||
return mAnisotropyDataHost.mAnisotropy_q1;
|
||||
}
|
||||
|
||||
virtual PxVec4* getAnisotropy2DevicePointer() const
|
||||
{
|
||||
return mAnisotropyDataHost.mAnisotropy_q2;
|
||||
}
|
||||
|
||||
virtual PxVec4* getAnisotropy3DevicePointer() const
|
||||
{
|
||||
return mAnisotropyDataHost.mAnisotropy_q3;
|
||||
}
|
||||
|
||||
virtual void setEnabled(bool enabled)
|
||||
{
|
||||
mEnabled = enabled;
|
||||
}
|
||||
|
||||
virtual bool isEnabled() const
|
||||
{
|
||||
return mEnabled;
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
} // namespace physx
|
||||
#endif
|
||||
|
||||
#endif
|
||||
68
engine/third_party/physx/source/gpusimulationcontroller/include/PxgAnisotropyData.h
vendored
Normal file
68
engine/third_party/physx/source/gpusimulationcontroller/include/PxgAnisotropyData.h
vendored
Normal file
@@ -0,0 +1,68 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PX_ANISOTROPY_DATA_H
|
||||
#define PX_ANISOTROPY_DATA_H
|
||||
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
namespace physx
|
||||
{
|
||||
/**
|
||||
\brief Data and settings to apply smoothing to an array of particle positions
|
||||
*/
|
||||
struct PxSmoothedPositionData
|
||||
{
|
||||
PxVec4* mPositions; //!< The gpu array with the positions
|
||||
PxReal mSmoothing; //!< The strength of the smoothing
|
||||
};
|
||||
|
||||
/**
|
||||
\brief Data and settings to compute anisotropy information for an array of particle positions
|
||||
*/
|
||||
struct PxAnisotropyData
|
||||
{
|
||||
PxVec4* mAnisotropy_q1; //!< Gpu array containing the first direction (x, y, z) and magnitude (w) of the anisotropy
|
||||
PxVec4* mAnisotropy_q2; //!< Gpu array containing the second direction (x, y, z) and magnitude (w) of the anisotropy
|
||||
PxVec4* mAnisotropy_q3; //!< Gpu array containing the third direction (x, y, z) and magnitude (w) of the anisotropy
|
||||
PxReal mAnisotropy; //!< Anisotropy scaling factor
|
||||
PxReal mAnisotropyMin; //!< Lower anisotropy bound
|
||||
PxReal mAnisotropyMax; //!< Upper anisotropy bound
|
||||
PxU32 mPadding;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
} // namespace physx
|
||||
#endif
|
||||
|
||||
#endif
|
||||
65
engine/third_party/physx/source/gpusimulationcontroller/include/PxgArrayConverter.h
vendored
Normal file
65
engine/third_party/physx/source/gpusimulationcontroller/include/PxgArrayConverter.h
vendored
Normal file
@@ -0,0 +1,65 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_ARRAY_CONVERTER_H
|
||||
#define PXG_ARRAY_CONVERTER_H
|
||||
|
||||
#include "PxArrayConverter.h"
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
|
||||
#include "PxgKernelLauncher.h"
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
namespace physx
|
||||
{
|
||||
#endif
|
||||
|
||||
#if PX_SUPPORT_GPU_PHYSX
|
||||
|
||||
class PxgArrayConverter : public PxArrayConverter, public PxUserAllocated
|
||||
{
|
||||
private:
|
||||
PxgKernelLauncher mKernelLauncher;
|
||||
|
||||
public:
|
||||
PxgArrayConverter(PxgKernelLauncher& kernelLauncher);
|
||||
|
||||
virtual ~PxgArrayConverter() { }
|
||||
|
||||
virtual void interleaveGpuBuffers(const PxVec4* vertices, const PxVec4* normals, PxU32 length, PxVec3* interleavedResultBuffer, CUstream stream) PX_OVERRIDE;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
} // namespace physx
|
||||
#endif
|
||||
|
||||
#endif
|
||||
976
engine/third_party/physx/source/gpusimulationcontroller/include/PxgArticulation.h
vendored
Normal file
976
engine/third_party/physx/source/gpusimulationcontroller/include/PxgArticulation.h
vendored
Normal file
@@ -0,0 +1,976 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_ARTICULATION_H
|
||||
#define PXG_ARTICULATION_H
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "PxArticulationReducedCoordinate.h"
|
||||
#include "PxgArticulationLink.h"
|
||||
#include "PxgArticulationTendon.h"
|
||||
#include "PxgCudaBuffer.h"
|
||||
#include "PxSpatialMatrix.h"
|
||||
#include "DyFeatherstoneArticulation.h"
|
||||
#include "foundation/PxUserAllocated.h"
|
||||
#include "vector_types.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
class PxGpuSpatialTendonData;
|
||||
class PxGpuFixedTendonData;
|
||||
|
||||
namespace Dy
|
||||
{
|
||||
struct ArticulationJointCore;
|
||||
class ArticulationJointCoreData;
|
||||
struct ArticulationInternalConstraint;
|
||||
struct SpatialSubspaceMatrix;
|
||||
}
|
||||
|
||||
//We load/store to Cm::SpatialMatrix rather than PxSpatialMatrix.
|
||||
//This exploits the symmetry of the spatial matrix to avoid storing/loading the bottom-right,
|
||||
//which is the transpose of the top-left
|
||||
struct PxgSpatialMatrixBlock
|
||||
{
|
||||
float4 columns[7][32];
|
||||
};
|
||||
|
||||
//We load/store to a PxSpatialMatrix. This has a full 6x6 matrix as there is no
|
||||
//symmetry as described in the above struct
|
||||
struct PxgSpatialMatrix6x6Block
|
||||
{
|
||||
float4 columns[9][32];
|
||||
};
|
||||
|
||||
struct PxgMat33Block
|
||||
{
|
||||
float4 mCol0[32];
|
||||
float4 mCol1[32];
|
||||
float mCol2[32];
|
||||
};
|
||||
|
||||
struct PxgSpatialVectorBlock
|
||||
{
|
||||
float4 mTopxyz_bx[32];
|
||||
float2 mbyz[32];
|
||||
};
|
||||
|
||||
struct PxgSpatialTransformBlock
|
||||
{
|
||||
float4 q[32];
|
||||
float4 p[32];
|
||||
};
|
||||
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void loadSpatialMatrix(const PxgSpatialMatrixBlock& block, const PxU32 threadIndexInWarp, Dy::SpatialMatrix& mat)
|
||||
{
|
||||
float4 val = block.columns[0][threadIndexInWarp];
|
||||
mat.topLeft[0][0] = val.x; mat.topLeft[0][1] = val.y; mat.topLeft[0][2] = val.z; mat.topLeft[1][0] = val.w;
|
||||
val = block.columns[1][threadIndexInWarp];
|
||||
mat.topLeft[1][1] = val.x; mat.topLeft[1][2] = val.y; mat.topLeft[2][0] = val.z; mat.topLeft[2][1] = val.w;
|
||||
val = block.columns[2][threadIndexInWarp];
|
||||
mat.topLeft[2][2] = val.x; mat.topRight[0][0] = val.y; mat.topRight[0][1] = val.z; mat.topRight[0][2] = val.w;
|
||||
val = block.columns[3][threadIndexInWarp];
|
||||
mat.topRight[1][0] = val.x; mat.topRight[1][1] = val.y; mat.topRight[1][2] = val.z; mat.topRight[2][0] = val.w;
|
||||
val = block.columns[4][threadIndexInWarp];
|
||||
mat.topRight[2][1] = val.x; mat.topRight[2][2] = val.y; mat.bottomLeft[0][0] = val.z; mat.bottomLeft[0][1] = val.w;
|
||||
val = block.columns[5][threadIndexInWarp];
|
||||
mat.bottomLeft[0][2] = val.x; mat.bottomLeft[1][0] = val.y; mat.bottomLeft[1][1] = val.z; mat.bottomLeft[1][2] = val.w;
|
||||
val = block.columns[6][threadIndexInWarp];
|
||||
mat.bottomLeft[2][0] = val.x; mat.bottomLeft[2][1] = val.y; mat.bottomLeft[2][2] = val.z;
|
||||
}
|
||||
|
||||
//This loads a 7-vec matrix in which the bottomRight is equal to the transpose of the top left
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void loadSpatialMatrix(const PxgSpatialMatrixBlock& block, const PxU32 threadIndexInWarp, PxSpatialMatrix& mat)
|
||||
{
|
||||
float4 val = block.columns[0][threadIndexInWarp];
|
||||
mat.column[0][0] = val.x; mat.column[0][1] = val.y; mat.column[0][2] = val.z; mat.column[1][0] = val.w;
|
||||
mat.column[3][3] = val.x; mat.column[4][3] = val.y; mat.column[5][3] = val.z; mat.column[3][4] = val.w;
|
||||
val = block.columns[1][threadIndexInWarp];
|
||||
mat.column[1][1] = val.x; mat.column[1][2] = val.y; mat.column[2][0] = val.z; mat.column[2][1] = val.w;
|
||||
mat.column[4][4] = val.x; mat.column[5][4] = val.y; mat.column[3][5] = val.z; mat.column[4][5] = val.w;
|
||||
val = block.columns[2][threadIndexInWarp];
|
||||
mat.column[2][2] = val.x; mat.column[0][3] = val.y; mat.column[0][4] = val.z; mat.column[0][5] = val.w;
|
||||
mat.column[5][5] = val.x;
|
||||
val = block.columns[3][threadIndexInWarp];
|
||||
mat.column[1][3] = val.x; mat.column[1][4] = val.y; mat.column[1][5] = val.z; mat.column[2][3] = val.w;
|
||||
val = block.columns[4][threadIndexInWarp];
|
||||
mat.column[2][4] = val.x; mat.column[2][5] = val.y; mat.column[3][0] = val.z; mat.column[3][1] = val.w;
|
||||
val = block.columns[5][threadIndexInWarp];
|
||||
mat.column[3][2] = val.x; mat.column[4][0] = val.y; mat.column[4][1] = val.z; mat.column[4][2] = val.w;
|
||||
val = block.columns[6][threadIndexInWarp];
|
||||
mat.column[5][0] = val.x; mat.column[5][1] = val.y; mat.column[5][2] = val.z;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void storeSpatialMatrix(PxgSpatialMatrixBlock& block, const PxU32 threadIndexInWarp, const Dy::SpatialMatrix& mat)
|
||||
{
|
||||
block.columns[0][threadIndexInWarp] = make_float4(mat.topLeft[0][0], mat.topLeft[0][1], mat.topLeft[0][2], mat.topLeft[1][0]);
|
||||
block.columns[1][threadIndexInWarp] = make_float4(mat.topLeft[1][1], mat.topLeft[1][2], mat.topLeft[2][0], mat.topLeft[2][1]);
|
||||
block.columns[2][threadIndexInWarp] = make_float4(mat.topLeft[2][2], mat.topRight[0][0], mat.topRight[0][1], mat.topRight[0][2]);
|
||||
block.columns[3][threadIndexInWarp] = make_float4(mat.topRight[1][0], mat.topRight[1][1], mat.topRight[1][2], mat.topRight[2][0]);
|
||||
block.columns[4][threadIndexInWarp] = make_float4(mat.topRight[2][1], mat.topRight[2][2], mat.bottomLeft[0][0], mat.bottomLeft[0][1]);
|
||||
block.columns[5][threadIndexInWarp] = make_float4(mat.bottomLeft[0][2], mat.bottomLeft[1][0], mat.bottomLeft[1][1], mat.bottomLeft[1][2]);
|
||||
block.columns[6][threadIndexInWarp] = make_float4(mat.bottomLeft[2][0], mat.bottomLeft[2][1], mat.bottomLeft[2][2], 0.f);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void loadSpatialMatrix(const PxgSpatialMatrix6x6Block& block, const PxU32 threadIndexInWarp, PxSpatialMatrix& mat)
|
||||
{
|
||||
float4 val = block.columns[0][threadIndexInWarp];
|
||||
mat.column[0][0] = val.x; mat.column[0][1] = val.y; mat.column[0][2] = val.z; mat.column[0][3] = val.w;
|
||||
val = block.columns[1][threadIndexInWarp];
|
||||
mat.column[0][4] = val.x; mat.column[0][5] = val.y; mat.column[1][0] = val.z; mat.column[1][1] = val.w;
|
||||
val = block.columns[2][threadIndexInWarp];
|
||||
mat.column[1][2] = val.x; mat.column[1][3] = val.y; mat.column[1][4] = val.z; mat.column[1][5] = val.w;
|
||||
val = block.columns[3][threadIndexInWarp];
|
||||
mat.column[2][0] = val.x; mat.column[2][1] = val.y; mat.column[2][2] = val.z; mat.column[2][3] = val.w;
|
||||
val = block.columns[4][threadIndexInWarp];
|
||||
mat.column[2][4] = val.x; mat.column[2][5] = val.y; mat.column[3][0] = val.z; mat.column[3][1] = val.w;
|
||||
val = block.columns[5][threadIndexInWarp];
|
||||
mat.column[3][2] = val.x; mat.column[3][3] = val.y; mat.column[3][4] = val.z; mat.column[3][5] = val.w;
|
||||
val = block.columns[6][threadIndexInWarp];
|
||||
mat.column[4][0] = val.x; mat.column[4][1] = val.y; mat.column[4][2] = val.z; mat.column[4][3] = val.w;
|
||||
val = block.columns[7][threadIndexInWarp];
|
||||
mat.column[4][4] = val.x; mat.column[4][5] = val.y; mat.column[5][0] = val.z; mat.column[5][1] = val.w;
|
||||
val = block.columns[8][threadIndexInWarp];
|
||||
mat.column[5][2] = val.x; mat.column[5][3] = val.y; mat.column[5][4] = val.z; mat.column[5][5] = val.w;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void storeSpatialMatrix(PxgSpatialMatrix6x6Block& block, const PxU32 threadIndexInWarp, const PxSpatialMatrix& mat)
|
||||
{
|
||||
block.columns[0][threadIndexInWarp] = make_float4(mat.column[0][0], mat.column[0][1], mat.column[0][2], mat.column[0][3]);
|
||||
block.columns[1][threadIndexInWarp] = make_float4(mat.column[0][4], mat.column[0][5], mat.column[1][0], mat.column[1][1]);
|
||||
block.columns[2][threadIndexInWarp] = make_float4(mat.column[1][2], mat.column[1][3], mat.column[1][4], mat.column[1][5]);
|
||||
block.columns[3][threadIndexInWarp] = make_float4(mat.column[2][0], mat.column[2][1], mat.column[2][2], mat.column[2][3]);
|
||||
block.columns[4][threadIndexInWarp] = make_float4(mat.column[2][4], mat.column[2][5], mat.column[3][0], mat.column[3][1]);
|
||||
block.columns[5][threadIndexInWarp] = make_float4(mat.column[3][2], mat.column[3][3], mat.column[3][4], mat.column[3][5]);
|
||||
block.columns[6][threadIndexInWarp] = make_float4(mat.column[4][0], mat.column[4][1], mat.column[4][2], mat.column[4][3]);
|
||||
block.columns[7][threadIndexInWarp] = make_float4(mat.column[4][4], mat.column[4][5], mat.column[5][0], mat.column[5][1]);
|
||||
block.columns[8][threadIndexInWarp] = make_float4(mat.column[5][2], mat.column[5][3], mat.column[5][4], mat.column[5][5]);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void zeroSpatialMatrix(PxgSpatialMatrix6x6Block& block, const PxU32 threadIndexInWarp)
|
||||
{
|
||||
block.columns[0][threadIndexInWarp] = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
block.columns[1][threadIndexInWarp] = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
block.columns[2][threadIndexInWarp] = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
block.columns[3][threadIndexInWarp] = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
block.columns[4][threadIndexInWarp] = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
block.columns[5][threadIndexInWarp] = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
block.columns[6][threadIndexInWarp] = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
block.columns[7][threadIndexInWarp] = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
block.columns[8][threadIndexInWarp] = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE Cm::SpatialVectorF loadSpatialVectorF(const PxgSpatialVectorBlock& block, const PxU32 threadIndexInWarp)
|
||||
{
|
||||
float4 top = block.mTopxyz_bx[threadIndexInWarp];
|
||||
float2 bottom = block.mbyz[threadIndexInWarp];
|
||||
|
||||
return Cm::SpatialVectorF(PxVec3(top.x, top.y, top.z), PxVec3(top.w, bottom.x, bottom.y));
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE Cm::UnAlignedSpatialVector loadSpatialVector(const PxgSpatialVectorBlock& block, const PxU32 threadIndexInWarp)
|
||||
{
|
||||
float4 top = block.mTopxyz_bx[threadIndexInWarp];
|
||||
float2 bottom = block.mbyz[threadIndexInWarp];
|
||||
|
||||
return Cm::UnAlignedSpatialVector(PxVec3(top.x, top.y, top.z), PxVec3(top.w, bottom.x, bottom.y));
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void storeSpatialVector(PxgSpatialVectorBlock& block, const Cm::UnAlignedSpatialVector& v, const PxU32 threadIndexInWarp)
|
||||
{
|
||||
block.mTopxyz_bx[threadIndexInWarp] = make_float4(v.top.x, v.top.y, v.top.z, v.bottom.x);
|
||||
block.mbyz[threadIndexInWarp] = make_float2(v.bottom.y, v.bottom.z);
|
||||
|
||||
/*Pxstcg(&block.mTopxyz_bx[threadIndexInWarp], make_float4(v.top.x, v.top.y, v.top.z, v.bottom.x));
|
||||
Pxstcg(&block.mbyz[threadIndexInWarp], make_float2(v.bottom.y, v.bottom.z));*/
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void addSpatialVector(PxgSpatialVectorBlock& block, const Cm::UnAlignedSpatialVector& v, const PxU32 threadIndexInWarp)
|
||||
{
|
||||
block.mTopxyz_bx[threadIndexInWarp] += make_float4(v.top.x, v.top.y, v.top.z, v.bottom.x);
|
||||
block.mbyz[threadIndexInWarp] += make_float2(v.bottom.y, v.bottom.z);
|
||||
|
||||
/*Pxstcg(&block.mTopxyz_bx[threadIndexInWarp], make_float4(v.top.x, v.top.y, v.top.z, v.bottom.x));
|
||||
Pxstcg(&block.mbyz[threadIndexInWarp], make_float2(v.bottom.y, v.bottom.z));*/
|
||||
}
|
||||
|
||||
#if PX_CUDA_COMPILER
|
||||
PX_FORCE_INLINE __device__ void atomicAddSpatialVector(PxgSpatialVectorBlock& block, const Cm::UnAlignedSpatialVector& v, const PxU32 threadIndexInWarp)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 200
|
||||
atomicAdd(&block.mTopxyz_bx[threadIndexInWarp].x, v.top.x);
|
||||
atomicAdd(&block.mTopxyz_bx[threadIndexInWarp].y, v.top.y);
|
||||
atomicAdd(&block.mTopxyz_bx[threadIndexInWarp].z, v.top.z);
|
||||
|
||||
atomicAdd(&block.mTopxyz_bx[threadIndexInWarp].w, v.bottom.x);
|
||||
atomicAdd(&block.mbyz[threadIndexInWarp].x, v.bottom.y);
|
||||
atomicAdd(&block.mbyz[threadIndexInWarp].y, v.bottom.z);
|
||||
#else
|
||||
PX_UNUSED(block);
|
||||
PX_UNUSED(v);
|
||||
PX_UNUSED(threadIndexInWarp);
|
||||
#endif
|
||||
|
||||
/*Pxstcg(&block.mTopxyz_bx[threadIndexInWarp], make_float4(v.top.x, v.top.y, v.top.z, v.bottom.x));
|
||||
Pxstcg(&block.mbyz[threadIndexInWarp], make_float2(v.bottom.y, v.bottom.z));*/
|
||||
}
|
||||
#endif
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxTransform loadSpatialTransform(const PxgSpatialTransformBlock& block, const PxU32 threadIndexInWarp)
|
||||
{
|
||||
const float4 q = block.q[threadIndexInWarp];
|
||||
const float4 p = block.p[threadIndexInWarp];
|
||||
|
||||
return PxTransform(PxVec3(p.x, p.y, p.z), PxQuat(q.x, q.y, q.z, q.w));
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void storeSpatialTransform(PxgSpatialTransformBlock& block, const PxU32 threadIndexInWarp, const PxTransform& transform)
|
||||
{
|
||||
block.q[threadIndexInWarp] = make_float4(transform.q.x, transform.q.y, transform.q.z, transform.q.w);
|
||||
block.p[threadIndexInWarp] = make_float4(transform.p.x, transform.p.y, transform.p.z, 0.f);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxQuat loadQuat(const float4* f, const PxU32 threadIndexInWarp)
|
||||
{
|
||||
float4 q = f[threadIndexInWarp];
|
||||
return PxQuat(q.x, q.y, q.z, q.w);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxVec3 loadPxVec3(const float4* f, const PxU32 threadIndexInWarp)
|
||||
{
|
||||
float4 v = f[threadIndexInWarp];
|
||||
return PxVec3(v.x, v.y, v.z);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void loadPxMat33(const PxgMat33Block& block, const PxU32 threadIndexInWarp, PxMat33& mat)
|
||||
{
|
||||
float4 v0 = block.mCol0[threadIndexInWarp];
|
||||
float4 v1 = block.mCol1[threadIndexInWarp];
|
||||
float v2 = block.mCol2[threadIndexInWarp];
|
||||
|
||||
mat.column0.x = v0.x; mat.column0.y = v0.y; mat.column0.z = v0.z;
|
||||
mat.column1.x = v0.w; mat.column1.y = v1.x; mat.column1.z = v1.y;
|
||||
mat.column2.x = v1.z; mat.column2.y = v1.w; mat.column2.z = v2;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void storePxMat33(PxgMat33Block& block, const PxU32 threadIndexInWarp, const PxMat33& mat)
|
||||
{
|
||||
block.mCol0[threadIndexInWarp] = make_float4(mat.column0.x, mat.column0.y, mat.column0.z, mat.column1.x);
|
||||
block.mCol1[threadIndexInWarp] = make_float4(mat.column1.y, mat.column1.z, mat.column2.x, mat.column2.y);
|
||||
block.mCol2[threadIndexInWarp] = mat.column2.z;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE Cm::UnAlignedSpatialVector loadSpatialVector(const Cm::UnAlignedSpatialVector* PX_RESTRICT vector)
|
||||
{
|
||||
size_t ptr = size_t(vector);
|
||||
if (ptr & 0xf)
|
||||
{
|
||||
float2* top = reinterpret_cast<float2*>(ptr);
|
||||
float2 val = *top;
|
||||
float4 val2 = *(reinterpret_cast<float4*>(top+1));
|
||||
|
||||
return Cm::UnAlignedSpatialVector(PxVec3(val.x, val.y, val2.x), PxVec3(val2.y, val2.z, val2.w));
|
||||
}
|
||||
else
|
||||
{
|
||||
float4* top = reinterpret_cast<float4*>(ptr);
|
||||
float4 val = *top;
|
||||
float2 val2 = *(reinterpret_cast<float2*>(top + 1));
|
||||
|
||||
return Cm::UnAlignedSpatialVector(PxVec3(val.x, val.y, val.z), PxVec3(val.w, val2.x, val2.y));
|
||||
}
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void storeSpatialVector(Cm::UnAlignedSpatialVector* PX_RESTRICT vector,
|
||||
const Cm::UnAlignedSpatialVector& src)
|
||||
{
|
||||
size_t ptr = size_t(vector);
|
||||
if (ptr & 0xf)
|
||||
{
|
||||
float2* top = reinterpret_cast<float2*>(ptr);
|
||||
float4* bottom = (reinterpret_cast<float4*>(top + 1));
|
||||
|
||||
*top = make_float2(src.top.x, src.top.y);
|
||||
*bottom = make_float4(src.top.z, src.bottom.x, src.bottom.y, src.bottom.z);
|
||||
}
|
||||
else
|
||||
{
|
||||
float4* top = reinterpret_cast<float4*>(ptr);
|
||||
float2* bottom = (reinterpret_cast<float2*>(top + 1));
|
||||
|
||||
*top = make_float4(src.top.x, src.top.y, src.top.z, src.bottom.x);
|
||||
*bottom = make_float2(src.bottom.y, src.bottom.z);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
struct PxgArticulationBitFieldStackData
|
||||
{
|
||||
PxU64 bitField[32];
|
||||
};
|
||||
|
||||
typedef PxgArticulationBitFieldStackData PxgArticulationBitFieldData;
|
||||
|
||||
struct PxgArticulationTraversalStackData
|
||||
{
|
||||
PxU32 indices[32];
|
||||
PxgSpatialVectorBlock impulseStack;
|
||||
PxgSpatialVectorBlock deltaVStack;
|
||||
};
|
||||
|
||||
struct PxgArticulationBlockLinkData
|
||||
{
|
||||
PxgSpatialMatrixBlock mSpatialArticulatedInertia;
|
||||
PxgSpatialMatrix6x6Block mSpatialResponseMatrix;
|
||||
PxgMat33Block mIsolatedInertia;
|
||||
PxReal mMass[32];
|
||||
|
||||
//KS - these are questionable - we might want to store velocity/deltaVelocity etc. in a non-SOA format
|
||||
//for the constraint solver. To be revisited later!!!!
|
||||
PxgSpatialVectorBlock mMotionVelocity;
|
||||
PxgSpatialVectorBlock mPosMotionVelocity;
|
||||
PxgSpatialVectorBlock mDeltaMotion;
|
||||
PxgSpatialVectorBlock mScratchImpulse; // These are non-propagated impulses. They may persist between kernels and are properly processed and then reset in averageLinkImpulsesAndPropagate. Should be reset to zero after propagation.
|
||||
PxgSpatialVectorBlock mScratchDeltaV; // Temp! Used only for propagating velocities around the tree structure within a kernel! Left in an undefined state after use.
|
||||
|
||||
PxgSpatialVectorBlock mSolverSpatialDeltaVel;
|
||||
PxgSpatialVectorBlock mSolverSpatialImpulse;
|
||||
PxgSpatialVectorBlock mSolverSpatialInternalConstraintImpulse;
|
||||
|
||||
PxgSpatialVectorBlock mZAVector;
|
||||
PxgSpatialVectorBlock mZAIntVector;
|
||||
PxgSpatialVectorBlock mCoriolis;
|
||||
PxgSpatialVectorBlock mConstraintForces;
|
||||
|
||||
PxgSpatialVectorBlock mMotionAcceleration;
|
||||
PxgSpatialVectorBlock mMotionAccelerationInternal;
|
||||
PxgSpatialVectorBlock mBiasForce;
|
||||
|
||||
PxReal mDeltaScale[32];
|
||||
|
||||
float mRw_x[32];
|
||||
float mRw_y[32];
|
||||
float mRw_z[32];
|
||||
float4 mRelativeQuat[32];
|
||||
|
||||
PxU32 mNbStaticContacts[32];
|
||||
PxU32 mStaticContactStartIndex[32];
|
||||
|
||||
PxU32 mNbStaticJoints[32];
|
||||
PxU32 mStaticJointStartIndex[32];
|
||||
|
||||
PxgSpatialTransformBlock mPreTransform;
|
||||
PxgSpatialTransformBlock mAccumulatedPose;
|
||||
PxgSpatialTransformBlock mChildPose;
|
||||
PxgSpatialTransformBlock mParentPose;
|
||||
float4 mDeltaQ[32];
|
||||
float4 mLinDampingX_AngDampingY_maxLinVelSqZ_maxAngVelSqW[32];
|
||||
float4 mInvInertiaXYZ_invMassW[32];
|
||||
PxReal mCfm[32];
|
||||
PxU32 mChildrenOffset[32];
|
||||
PxU32 mNumChildren[32];
|
||||
|
||||
PxU32 mJointOffset[32];
|
||||
PxU32 mParents[32];
|
||||
PxU8 mDofs[32];
|
||||
PxU8 mJointType[32];
|
||||
PxU8 mInvDofIds[6][32]; //mapping from axis to joint offset
|
||||
PxU8 mDisableGravity[32];
|
||||
PxU8 mRetainsAcceleration[32];
|
||||
|
||||
// PT: padded to 128 for coalesced loads
|
||||
PxU32 pad[16];
|
||||
};
|
||||
PX_COMPILE_TIME_ASSERT((sizeof(PxgArticulationBlockLinkData) & 127)==0);
|
||||
|
||||
struct PxgArticulationBlockSpatialTendonData
|
||||
{
|
||||
public:
|
||||
PxU32 mNumAttachments[32];
|
||||
PxU32 mNumConstraints[32];
|
||||
PxReal mStiffness[32];
|
||||
PxReal mDamping[32];
|
||||
PxReal mLimitStiffness[32];
|
||||
PxReal mOffset[32];
|
||||
};
|
||||
|
||||
struct PxgArticulationBlockFixedTendonData
|
||||
{
|
||||
public:
|
||||
PxU32 mNumTendonJoints[32];
|
||||
PxU32 mNumConstraints[32];
|
||||
PxReal mStiffness[32];
|
||||
PxReal mDamping[32];
|
||||
PxReal mLimitStiffness[32];
|
||||
PxReal mOffset[32];
|
||||
PxReal mLowLimit[32];
|
||||
PxReal mHighLimit[32];
|
||||
PxReal mRestLength[32];
|
||||
};
|
||||
|
||||
struct PxgArtiStateDirtyFlag
|
||||
{
|
||||
enum
|
||||
{
|
||||
eVEL_DIRTY = 1 << 0,
|
||||
eHAS_IMPULSES = 1 << 1
|
||||
};
|
||||
};
|
||||
|
||||
//This class stores a block of data corresponding to 32 articulations.
|
||||
//It is filled in at runtime.
|
||||
struct PxgArticulationBlockData
|
||||
{
|
||||
PxgSpatialMatrixBlock mInvSpatialArticulatedInertia;
|
||||
PxU32 mFlags[32]; // PT: seems to be mainly for PxArticulationFlag::eFIX_BASE so only 1 bit is needed? Merge with mNumLinks?
|
||||
PxU32 mNumLinks[32];
|
||||
PxU32 mNumSpatialTendons[32];
|
||||
PxU32 mNumFixedTendons[32];
|
||||
PxU32 mNumMimicJoints[32];
|
||||
PxU32 mTotalDofs[32];
|
||||
PxU32 mArticulationIndex[32];
|
||||
PxReal mSleepThreshold[32];
|
||||
|
||||
//This one is a bit of a hack - it's the common link velocity.
|
||||
PxgSpatialVectorBlock mCommonLinkDeltaVelocity;
|
||||
PxU32 mLinkWithDeferredImpulse[32];
|
||||
Cm::UnAlignedSpatialVector* mMotionVelocitiesPtr[32];
|
||||
PxgSpatialVectorBlock mRootDeferredZ;
|
||||
|
||||
float4 mCOM_TotalInvMassW[32];
|
||||
|
||||
PxU8 mStateDirty[32]; //32 bytes
|
||||
PxU32 mTotalSelfConstraintCount; //4 bytes
|
||||
PxU32 mSelfConstraintOffset; //4 bytes
|
||||
// PT: padded to 128 for coalesced loads
|
||||
PxU32 pad[22]; //88 bytes padding, bringing this all up to 128 bytes
|
||||
};
|
||||
PX_COMPILE_TIME_ASSERT((sizeof(PxgArticulationBlockData) & 127)==0);
|
||||
|
||||
struct PxgArticulationInternalConstraintData
|
||||
{
|
||||
PxgSpatialVectorBlock mRow0;
|
||||
PxgSpatialVectorBlock mRow1;
|
||||
PxgSpatialVectorBlock mDeltaVA;
|
||||
PxgSpatialVectorBlock mDeltaVB;
|
||||
|
||||
PxReal mRecipResponse[32];
|
||||
PxReal mResponse[32];
|
||||
|
||||
float2 mLimits_LowLimitX_highLimitY[32];
|
||||
float2 mLimitError_LowX_highY[32];
|
||||
PxReal mHighImpulse[32];
|
||||
PxReal mLowImpulse[32];
|
||||
PxReal mMaxJointVelocity[32];
|
||||
|
||||
// drive
|
||||
PxReal mDriveImpulse[32];
|
||||
PxReal mConstraintMaxImpulse[32];
|
||||
PxReal mMaxForce[32];
|
||||
PxU32 mDriveType[32];
|
||||
PxReal mMaxEffort[32];
|
||||
PxReal mMaxActuatorVelocity[32];
|
||||
PxReal mVelocityDependentResistance[32];
|
||||
PxReal mSpeedEffortGradient[32];
|
||||
|
||||
PxReal mDriveTargetPos[32];
|
||||
PxReal mArmature[32];
|
||||
|
||||
float4 mTargetVelPlusInitialBiasX_DriveBiasCoefficientY_VelMultiplierZ_ImpulseMultiplierW[32];
|
||||
|
||||
float mDriveStiffness[32];
|
||||
float mDamping[32];
|
||||
float mDriveTargetVel[32];
|
||||
float mTargetPosBias[32];
|
||||
|
||||
PxReal mAccumulatedFrictionImpulse[32];
|
||||
|
||||
//old friction
|
||||
PxReal mMaxFrictionForce[32];
|
||||
PxReal mFrictionCoefficient[32];
|
||||
|
||||
//new friction
|
||||
PxReal mStaticFrictionEffort[32];
|
||||
PxReal mDynamicFrictionEffort[32];
|
||||
PxReal mViscousFrictionCoefficient[32];
|
||||
|
||||
PX_FORCE_INLINE void PX_CUDA_CALLABLE setImplicitDriveDesc
|
||||
(const PxU32 threadIndexInWarp,const Dy::ArticulationImplicitDriveDesc& driveDesc)
|
||||
{
|
||||
mTargetVelPlusInitialBiasX_DriveBiasCoefficientY_VelMultiplierZ_ImpulseMultiplierW[threadIndexInWarp] =
|
||||
make_float4(driveDesc.driveTargetVelPlusInitialBias, driveDesc.driveBiasCoefficient, driveDesc.driveVelMultiplier, driveDesc.driveImpulseMultiplier);
|
||||
mTargetPosBias[threadIndexInWarp] = driveDesc.driveTargetPosBias;
|
||||
}
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE Dy::ArticulationImplicitDriveDesc getImplicitDriveDesc(const PxU32 threadIndexInWarp) const
|
||||
{
|
||||
const Dy::ArticulationImplicitDriveDesc driveDesc
|
||||
(
|
||||
mTargetVelPlusInitialBiasX_DriveBiasCoefficientY_VelMultiplierZ_ImpulseMultiplierW[threadIndexInWarp].x,
|
||||
mTargetVelPlusInitialBiasX_DriveBiasCoefficientY_VelMultiplierZ_ImpulseMultiplierW[threadIndexInWarp].y,
|
||||
mTargetVelPlusInitialBiasX_DriveBiasCoefficientY_VelMultiplierZ_ImpulseMultiplierW[threadIndexInWarp].z,
|
||||
mTargetVelPlusInitialBiasX_DriveBiasCoefficientY_VelMultiplierZ_ImpulseMultiplierW[threadIndexInWarp].w,
|
||||
mTargetPosBias[threadIndexInWarp]
|
||||
);
|
||||
return driveDesc;
|
||||
}
|
||||
};
|
||||
PX_COMPILE_TIME_ASSERT((sizeof(PxgArticulationInternalConstraintData) & 127)==0);
|
||||
|
||||
struct PxgArticulationInternalTendonConstraintData
|
||||
{
|
||||
PxgSpatialVectorBlock mRow0;
|
||||
PxgSpatialVectorBlock mRow1;
|
||||
|
||||
PxgSpatialVectorBlock mDeltaVB;
|
||||
|
||||
PxReal mRecipResponse[32];
|
||||
PxReal mAccumulatedLength[32];
|
||||
PxReal mBiasCoefficient[32];
|
||||
PxReal mVelMultiplier[32];
|
||||
PxReal mImpulseMultiplier[32];
|
||||
PxReal mAppliedForce[32];
|
||||
|
||||
PxReal mLimitBiasCoefficient[32];
|
||||
PxReal mLimitImpulseMultiplier[32];
|
||||
PxReal mLimitAppliedForce[32];
|
||||
|
||||
PxU32 mLink0[32];
|
||||
PxU32 mLink1[32];
|
||||
PxReal mDeltaVA[32];
|
||||
|
||||
PxReal mRestDistance[32];
|
||||
PxReal mLowLimit[32];
|
||||
PxReal mHighLimit[32];
|
||||
};
|
||||
|
||||
struct PxgArticulationBlockDofData
|
||||
{
|
||||
PxReal mJointPositions[32];
|
||||
PxReal mJointVelocities[32];
|
||||
PxReal mJointUnconstrainedVelocities[32];
|
||||
PxReal mPosJointVelocities[32];
|
||||
PxReal mJointAccel[32];
|
||||
PxReal mQstZ[32];
|
||||
PxReal mQstZIcInternal[32];
|
||||
PxReal mDeferredQstZ[32];
|
||||
PxReal mTmpQstZ[32];
|
||||
|
||||
PxgSpatialVectorBlock mWorldMotionMatrix;
|
||||
PxgSpatialVectorBlock mLocalMotionMatrix;
|
||||
PxgSpatialVectorBlock mIsInvDW;
|
||||
PxgSpatialVectorBlock mIsW;
|
||||
PxgSpatialVectorBlock mJointAxis;
|
||||
float mInvStIsT_x[32];
|
||||
float mInvStIsT_y[32];
|
||||
float mInvStIsT_z[32];
|
||||
PxU8 mDofIds[32]; //mapping from joint offset to axis
|
||||
PxU8 mMotion[32];
|
||||
|
||||
PxgArticulationInternalConstraintData mConstraintData;
|
||||
|
||||
// PT: padded to 128 for coalesced loads
|
||||
PxU32 pad[16];
|
||||
};
|
||||
PX_COMPILE_TIME_ASSERT((sizeof(PxgArticulationBlockDofData) & 127)==0);
|
||||
|
||||
struct PxgArticulationBlockAttachmentData
|
||||
{
|
||||
public:
|
||||
PxVec3 mRelativeOffset[32];
|
||||
PxReal mRestDistance[32];
|
||||
PxReal mCoefficient[32];
|
||||
|
||||
PxU64 mChildrens[32];
|
||||
PxU16 mLinkIndex[32];
|
||||
PxU32 mParents[32];
|
||||
PxReal mLowLimit[32];
|
||||
PxReal mHighLimit[32];
|
||||
};
|
||||
|
||||
struct PxgArticulationBlockTendonJointData
|
||||
{
|
||||
public:
|
||||
PxU32 mAxis[32];
|
||||
PxReal mCoefficient[32];
|
||||
PxReal mRecipCoefficient[32];
|
||||
|
||||
PxU64 mChildrens[32];
|
||||
PxU16 mLinkIndex[32];
|
||||
PxU32 mParents[32];
|
||||
PxU32 mConstraintId[32];
|
||||
};
|
||||
|
||||
struct PxgArticulationBlockInternallMimicJointData
|
||||
{
|
||||
PxU32 mDofA[32];
|
||||
PxU32 mDofB[32];
|
||||
//Cache recip effectiveInertia = [J * M^-1 * J^T] = [rAA + gearRatio*(rAB + rBA) + gearRatio*gearRatio*rBB]
|
||||
//Impulse = [1, gearRatio]^T * [-b + J*v] /[J * M^-1 * J^T + cfm]
|
||||
//Impulse = [1, gearRatio]^T * [-b + J*v] / [recipEffectiveInertia + cfm];
|
||||
PxReal recipEffectiveInertia[32];
|
||||
};
|
||||
|
||||
struct PxgArticulationBlockMimicJointData
|
||||
{
|
||||
PxU32 mLinkA[32];
|
||||
PxU32 mAxisA[32];
|
||||
PxU32 mLinkB[32];
|
||||
PxU32 mAxisB[32];
|
||||
PxReal mGearRatio[32];
|
||||
PxReal mOffset[32];
|
||||
PxReal mNaturalFrequency[32];
|
||||
PxReal mDampingRatio[32];
|
||||
PxgArticulationBlockInternallMimicJointData mInternalData;
|
||||
};
|
||||
|
||||
struct PxgArticulationData
|
||||
{
|
||||
// index in the articulation buffer. This is the same as articulationRemapId in the PxgBodySim
|
||||
PxU32 index;
|
||||
PxU32 bodySimIndex;
|
||||
PxU32 gpuDirtyFlag;
|
||||
PxU32 updateDirty;
|
||||
|
||||
PxU16 numLinks;
|
||||
PxU16 numJointDofs;
|
||||
PxU16 numSpatialTendons;
|
||||
|
||||
PxU16 numFixedTendons;
|
||||
PxU16 numMimicJoints;
|
||||
PxU8 flags;
|
||||
bool confiDirty;
|
||||
|
||||
PxU32 numPathToRoots;
|
||||
};
|
||||
|
||||
#if PX_VC
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable:4324)
|
||||
#endif
|
||||
PX_ALIGN_PREFIX(16)
|
||||
class PxgArticulation
|
||||
{
|
||||
public:
|
||||
|
||||
PxgArticulationData data;
|
||||
|
||||
PxgArticulationLink* links;
|
||||
Dy::ArticulationJointCore* joints;
|
||||
Dy::ArticulationJointCoreData* jointData;
|
||||
|
||||
Cm::UnAlignedSpatialVector* motionVelocities;
|
||||
Cm::UnAlignedSpatialVector* motionAccelerations;
|
||||
Cm::UnAlignedSpatialVector* linkIncomingJointForces;
|
||||
Cm::UnAlignedSpatialVector* corioliseVectors;
|
||||
Cm::UnAlignedSpatialVector* zAForces; // used as temporary propagation buffer in inverseDynamics and to store TGS per substep isolated forces while the solver runs. Not cleared after use.
|
||||
|
||||
Cm::UnAlignedSpatialVector* externalAccelerations;
|
||||
|
||||
Cm::UnAlignedSpatialVector* rootPreMotionVelocity;
|
||||
|
||||
PxReal* jointPositions;
|
||||
PxReal* jointVelocities;
|
||||
PxReal* jointAccelerations;
|
||||
PxReal* jointForce;
|
||||
PxReal* jointTargetPositions;
|
||||
PxReal* jointTargetVelocities;
|
||||
|
||||
PxU32* jointOffsets;
|
||||
|
||||
PxSpatialMatrix* worldSpatialArticulatedInertia;
|
||||
PxSpatialMatrix* spatialResponseMatrixW;
|
||||
PxTransform* linkBody2Worlds;
|
||||
PxU8* linkJointRootStateDataBuffer;
|
||||
PxTransform* linkBody2Actors;
|
||||
PxU32* parents;
|
||||
//Local space motion matrix - constant unless properties are changed
|
||||
Dy::SpatialSubspaceMatrix* motionMatrix;
|
||||
//World space motion matrix - computed from local matrix each frame
|
||||
Dy::SpatialSubspaceMatrix* worldMotionMatrix; // AD: only inverse dynamics now.
|
||||
|
||||
Cm::UnAlignedSpatialVector* jointAxis;
|
||||
|
||||
PxReal* linkWakeCounters;
|
||||
PxgArticulationLinkSleepData* linkSleepData;
|
||||
PxgArticulationLinkProp* linkProps;
|
||||
ArticulationBitField* children;
|
||||
PxU32* pathToRoot;
|
||||
|
||||
PxQuat* relativeQuat;
|
||||
|
||||
PxQuat* tempParentToChilds;
|
||||
PxVec3* tempRs;
|
||||
|
||||
PxGpuSpatialTendonData* spatialTendonParams;
|
||||
PxgArticulationTendon* spatialTendons;
|
||||
|
||||
PxGpuFixedTendonData* fixedTendonParams;
|
||||
PxgArticulationTendon* fixedTendons;
|
||||
|
||||
PxReal* cfms;
|
||||
PxReal* cfmScale;
|
||||
|
||||
Dy::ArticulationMimicJointCore* mimicJointCores;
|
||||
|
||||
PX_ALIGN(16, PxSpatialMatrix) invSpatialArticulatedInertiaW;
|
||||
|
||||
Dy::ErrorAccumulator internalResidualAccumulator; //Internal residual means no errors introduces by contacts or non-articulation joints connected to this instance will be included
|
||||
Dy::ErrorAccumulator contactResidualAccumulator;
|
||||
}
|
||||
PX_ALIGN_SUFFIX(16);
|
||||
#if PX_VC
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
/*
|
||||
\brief We aggregate link, joint, and root link state data into a single char buffer.
|
||||
We do this in two ways:
|
||||
a) a single buffer for each articulation
|
||||
b) a single buffer for all articulations
|
||||
The typical pattern of data flow is as follows:
|
||||
a) we store state data in a unique device buffer for each articulation.
|
||||
b) we copy from the individual device buffers per articulation to the single device buffer for all articulations.
|
||||
c) we copy the single buffer for all articulations from device to host
|
||||
d) we distribute state data from the single host buffer on the host to the individual articulation instances on the host.
|
||||
The state data that we store is as follows:
|
||||
a) link body2Worlds array, link velocities array, link accelerations array, link incoming joint forces array
|
||||
b) joint positions array, joint velocities array, joint accelerations array
|
||||
d) root link pre-sim velocity
|
||||
The struct PxgLinkJointRootStateData contains helper functions for allocating and querying
|
||||
state data buffers.
|
||||
*/
|
||||
struct PxgArticulationLinkJointRootStateData
|
||||
{/**
|
||||
\brief Compute the number of bytes required for an articulation with known
|
||||
maximum link count and known maximum dof count.
|
||||
\param[in] maxNbLinks is the maximum number of links of any articulation in the ensemble of articulations.
|
||||
\param[in] maxNbDofs is the maximum number of dofs of any articulation in the ensemble of articulations.
|
||||
\note This does not return an aligned size, use computeStateDataBufferByteSizeAligned16 for that purpose
|
||||
\return The number of bytes required to store the state data for an articulation.
|
||||
*/
|
||||
static PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 computeSingleArticulationStateDataBufferByteSizeRaw
|
||||
(const PxU32 maxNbLinks, const PxU32 maxNbDofs)
|
||||
{
|
||||
PxU32 byteSizePerArt =
|
||||
(sizeof(PxTransform) + 3 * sizeof(Cm::UnAlignedSpatialVector)) * maxNbLinks; //link pose + link velocity + link acceleration + link incoming joint force
|
||||
byteSizePerArt += sizeof(PxReal) * maxNbDofs; //joint pos
|
||||
byteSizePerArt += sizeof(PxReal) * maxNbDofs; //joint vel
|
||||
byteSizePerArt += sizeof(PxReal) * maxNbDofs; //joint accel
|
||||
byteSizePerArt += sizeof(Cm::UnAlignedSpatialVector); //root pre-sim vel
|
||||
return byteSizePerArt;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Compute the number of bytes required for an ensemble of articulations with known
|
||||
maximum link count and known maximum dof count.
|
||||
\param[in] maxNbLinks is the maximum number of links of any articulation in the ensemble of articulations.
|
||||
\param[in] maxNbDofs is the maximum number of dofs of any articulation in the ensemble of articulations.
|
||||
\param[in] nbArticulations is the number of articulations in the ensemble.
|
||||
\note This may be used to compute the number of bytes required for a single articulation by setting
|
||||
nbArticulations to 1 and setting maxNbLinks etc to be the link and dof count of that articulation.
|
||||
\return The number of bytes required to store the state data for an ensemble of articulations.
|
||||
*/
|
||||
static PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 computeStateDataBufferByteSizeAligned16
|
||||
(const PxU32 maxNbLinks, const PxU32 maxNbDofs, const PxU32 nbArticulations)
|
||||
{
|
||||
const PxU32 byteSizePerArt = computeSingleArticulationStateDataBufferByteSizeRaw(maxNbLinks, maxNbDofs);
|
||||
const PxU32 byteSize16PerArt = ((byteSizePerArt + 15) & ~15); //align the size upwards to the next 16-byte boundary
|
||||
return (byteSize16PerArt * nbArticulations);
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Return the pointer to a single articulation's state data buffer.
|
||||
\param[in] inputBufferForAllArticulations is a pointer to the memory containing the state
|
||||
data for the entire ensemble of articulations.
|
||||
\param[in] maxNbLinks is the maximum number of links of any articulation in the ensemble of articulations.
|
||||
\param[in] maxNbDofs is the maximum number of dofs of any articulation in the ensemble of articulations.
|
||||
\param[in] articulationId is the index of a single articulation within the ensemble.
|
||||
\return The pointer to a single articulation's state data buffer.
|
||||
*/
|
||||
static PX_CUDA_CALLABLE PX_FORCE_INLINE PxU8* getArticulationStateDataBuffer
|
||||
(PxU8* inputBufferForAllArticulations,
|
||||
const PxU32 maxNbLinks, const PxU32 maxNbDofs,
|
||||
const PxU32 articulationId)
|
||||
{
|
||||
PxU8* singleArticulationStateBuffer =
|
||||
inputBufferForAllArticulations +
|
||||
computeStateDataBufferByteSizeAligned16(maxNbLinks, maxNbDofs, articulationId);
|
||||
|
||||
return singleArticulationStateBuffer;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Decompose the state data buffer of a single articulation into pointers to arrays of
|
||||
poses, velocities etc.
|
||||
\param[in] singleArticulationStateBuffer is the state data buffer for a single articulation.
|
||||
\param[in] nbLinks is the number of links for the single articulation.
|
||||
\param[in] nbDofs is the number of dofs for the single articulation.
|
||||
\param[out] linkBody2Worlds is an array of link poses with one element per link.
|
||||
\param[out] linkVels is a pointer to an array of link spatial velocities with one element per link.
|
||||
\param[out] linkIncomingJointForces is a pointer to an array of link incoming joint forces with one element per link.
|
||||
\param[out] jointPositions is a pointer to an array of joint positions with one element per dof.
|
||||
\param[out] jointVelocities is a pointer to an array of joint velocities with one element per link.
|
||||
\param[out] jointAccelerations is a pointer to an array of joint accelerations with one element per dof.
|
||||
\param[out] rootPreVel is a pointer to the pre-sim velocity of the single articulation's root link.
|
||||
*/
|
||||
static PX_CUDA_CALLABLE PX_FORCE_INLINE void decomposeArticulationStateDataBuffer
|
||||
(PxU8* singleArticulationStateBuffer,
|
||||
const PxU32 nbLinks, const PxU32 nbDofs,
|
||||
PxTransform*& linkBody2Worlds, Cm::UnAlignedSpatialVector*& linkVels, Cm::UnAlignedSpatialVector*& linkAccels, Cm::UnAlignedSpatialVector*& linkIncomingJointForces,
|
||||
PxReal*& jointPositions, PxReal*& jointVelocities, PxReal*& jointAccelerations,
|
||||
Cm::UnAlignedSpatialVector*& rootPreVel)
|
||||
{
|
||||
PxU8* buffer = singleArticulationStateBuffer;
|
||||
linkBody2Worlds = reinterpret_cast<PxTransform*>(buffer);
|
||||
buffer += sizeof(PxTransform) * nbLinks;
|
||||
linkVels = reinterpret_cast<Cm::UnAlignedSpatialVector*>(buffer);
|
||||
buffer += sizeof(Cm::UnAlignedSpatialVector) * nbLinks;
|
||||
linkAccels = reinterpret_cast<Cm::UnAlignedSpatialVector*>(buffer);
|
||||
buffer += sizeof(Cm::UnAlignedSpatialVector) * nbLinks;
|
||||
linkIncomingJointForces = reinterpret_cast<Cm::UnAlignedSpatialVector*>(buffer);
|
||||
buffer += sizeof(Cm::UnAlignedSpatialVector) * nbLinks;
|
||||
jointPositions = reinterpret_cast<PxReal*>(buffer);
|
||||
buffer += sizeof(PxReal) * nbDofs;
|
||||
jointVelocities = reinterpret_cast<PxReal*>(buffer);
|
||||
buffer += sizeof(PxReal) * nbDofs;
|
||||
jointAccelerations = reinterpret_cast<PxReal*>(buffer);
|
||||
buffer += sizeof(PxReal) * nbDofs;
|
||||
rootPreVel = reinterpret_cast<Cm::UnAlignedSpatialVector*>(buffer);
|
||||
PX_ASSERT(
|
||||
singleArticulationStateBuffer + computeStateDataBufferByteSizeAligned16(nbLinks, nbDofs, 1) ==
|
||||
reinterpret_cast<PxU8*>(((reinterpret_cast<size_t>(buffer) + sizeof(Cm::UnAlignedSpatialVector) + 15) & ~15)));
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Compute the pointer to the array of link poses for a single articulation.
|
||||
\param[in] singleArticulationStateBuffer is the state data buffer for a single articulation.
|
||||
\param[in] nbLinks is the number of links for the single articulation.
|
||||
\param[in] nbDofs is the number of dofs for the single articulation.
|
||||
\return The pointer to the array of link poses for a single articulation.
|
||||
*/
|
||||
//Get the body2World array from the state data buffer of a single articulation.
|
||||
static PX_CUDA_CALLABLE PX_FORCE_INLINE PxTransform* getArticulationBody2Worlds
|
||||
(PxU8* singleArticulationStateBuffer,
|
||||
const PxU32 nbLinks, const PxU32 nbDofs)
|
||||
{
|
||||
PxTransform* linkBody2Worlds;
|
||||
Cm::UnAlignedSpatialVector* linkVels;
|
||||
Cm::UnAlignedSpatialVector* linkAccels;
|
||||
Cm::UnAlignedSpatialVector* linkIncomingJointForces;
|
||||
PxReal* jointPositions;
|
||||
PxReal* jointVelocities;
|
||||
PxReal* jointAccelerations;
|
||||
Cm::UnAlignedSpatialVector* rootPreVel;
|
||||
|
||||
decomposeArticulationStateDataBuffer(
|
||||
singleArticulationStateBuffer,
|
||||
nbLinks, nbDofs,
|
||||
linkBody2Worlds, linkVels, linkAccels, linkIncomingJointForces,
|
||||
jointPositions, jointVelocities, jointAccelerations,
|
||||
rootPreVel);
|
||||
|
||||
return linkBody2Worlds;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class PxgArticulationBuffer : public PxUserAllocated
|
||||
{
|
||||
public:
|
||||
|
||||
PxgArticulationBuffer(PxgHeapMemoryAllocatorManager* heapMemoryManager);
|
||||
|
||||
~PxgArticulationBuffer();
|
||||
|
||||
PxgTypedCudaBuffer<PxgArticulationLink> links;
|
||||
PxgTypedCudaBuffer<PxReal> linkWakeCounters; //original set to the same as articulation wakeCounter
|
||||
PxgTypedCudaBuffer<PxgArticulationLinkSleepData> linkSleepData;
|
||||
PxgTypedCudaBuffer<PxgArticulationLinkProp> linkProps;
|
||||
PxgTypedCudaBuffer<Dy::ArticulationJointCore> joints;
|
||||
PxgTypedCudaBuffer<Dy::ArticulationJointCoreData> jointData;
|
||||
PxgTypedCudaBuffer<Cm::UnAlignedSpatialVector> corioliseVectors; //link coriolise vector
|
||||
PxgTypedCudaBuffer<Cm::UnAlignedSpatialVector> zAForces; //link spatial zero acceleration force/ spatical articulate
|
||||
PxgTypedCudaBuffer<PxU32> pathToRoots; //global array store path to root for each link in continuous. Each link should have a start index and numberOfElems
|
||||
|
||||
PxgTypedCudaBuffer<PxGpuSpatialTendonData> spatialTendonParams;
|
||||
PxgTypedCudaBuffer<PxgArticulationTendon> spatialTendons;
|
||||
PxArray<PxgCudaBuffer*> attachmentFixedData;
|
||||
PxArray<PxgCudaBuffer*> attachmentModData;
|
||||
|
||||
PxgTypedCudaBuffer<PxGpuFixedTendonData> fixedTendonParams;
|
||||
PxgTypedCudaBuffer<PxgArticulationTendon> fixedTendons;
|
||||
PxArray<PxgCudaBuffer*> tendonJointFixData;
|
||||
PxArray<PxgCudaBuffer*> tendonJointCoefficientData;
|
||||
|
||||
PxgTypedCudaBuffer<Dy::ArticulationMimicJointCore> mimicJoints;
|
||||
|
||||
PxgTypedCudaBuffer<Cm::UnAlignedSpatialVector> externalAccelerations;
|
||||
|
||||
PxgTypedCudaBuffer<PxReal> jointForce;
|
||||
PxgTypedCudaBuffer<PxReal> jointTargetPositions;
|
||||
PxgTypedCudaBuffer<PxReal> jointTargetVelocities;
|
||||
PxgTypedCudaBuffer<PxU32> jointOffsets;
|
||||
PxgTypedCudaBuffer<PxU32> parents;
|
||||
PxgTypedCudaBuffer<Dy::SpatialSubspaceMatrix> motionMatrix;
|
||||
PxgTypedCudaBuffer<Dy::SpatialSubspaceMatrix> motionMatrixW;
|
||||
PxgTypedCudaBuffer<Cm::UnAlignedSpatialVector> jointAxis;
|
||||
|
||||
PxgTypedCudaBuffer<PxSpatialMatrix> spatialArticulatedInertiaW;
|
||||
PxgTypedCudaBuffer<PxSpatialMatrix> spatialImpulseResponseW;
|
||||
|
||||
//see PxgArticulationLinkJointRootStateData
|
||||
PxgCudaBuffer linkAndJointAndRootStates;
|
||||
|
||||
PxgTypedCudaBuffer<PxTransform> linkBody2Actors;
|
||||
|
||||
PxgTypedCudaBuffer<ArticulationBitField> children;
|
||||
|
||||
PxgTypedCudaBuffer<PxQuat> relativeQuats;
|
||||
PxgTypedCudaBuffer<PxReal> cfms;
|
||||
PxgTypedCudaBuffer<PxReal> cfmScale;
|
||||
|
||||
PxgTypedCudaBuffer<PxQuat> tempParentToChilds;
|
||||
PxgTypedCudaBuffer<PxVec3> tempRs;
|
||||
|
||||
PxU32 linkCount;
|
||||
|
||||
PxgHeapMemoryAllocatorManager* mHeapMemoryManager;
|
||||
};
|
||||
|
||||
//Helper function to compute the index of a particular link's deltaV value in the deltaV buffer.
|
||||
//We store this in a particular order to try and minimize cache misses
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU32 computeDeltaVIndex(const PxU32 maxArticulations, const PxU32 maxLinks,
|
||||
const PxU32 articulationID, const PxU32 linkID, const PxU32 slabID)
|
||||
{
|
||||
return articulationID + linkID * maxArticulations + slabID*maxArticulations*maxLinks;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
109
engine/third_party/physx/source/gpusimulationcontroller/include/PxgArticulationLink.h
vendored
Normal file
109
engine/third_party/physx/source/gpusimulationcontroller/include/PxgArticulationLink.h
vendored
Normal file
@@ -0,0 +1,109 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_ARTICULATION_LINK_H
|
||||
#define PXG_ARTICULATION_LINK_H
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "PxgSolverBody.h"
|
||||
#include "DyFeatherstoneArticulationUtils.h"
|
||||
|
||||
|
||||
namespace physx
|
||||
{
|
||||
typedef PxU64 ArticulationBitField;
|
||||
|
||||
struct PxgArticulationLinkData
|
||||
{
|
||||
public:
|
||||
Cm::UnAlignedSpatialVector IsW[3];//stI is the transpose of Is
|
||||
Cm::UnAlignedSpatialVector IsInvDW[3];
|
||||
//PxVec3 r; //vector from parent com to child com
|
||||
//PxVec3 rw; //world space r
|
||||
PxReal qstZIc[3];//jointForce - stZIc
|
||||
PxReal invStIs[3][3];
|
||||
};
|
||||
|
||||
struct PxgArticulationLink
|
||||
{
|
||||
#if !PX_CUDA_COMPILER
|
||||
PX_ALIGN(16, PxVec3 initialAngVel); // 12 12 initial ang vel
|
||||
PxReal penBiasClamp; // 4 16 the penetration bias clamp
|
||||
PxVec3 initialLinVel; // 12 28 initial lin vel
|
||||
PxReal invMass; // 4 32 inverse mass
|
||||
|
||||
#else
|
||||
float4 initialAngVelXYZ_penBiasClamp;
|
||||
float4 initialLinVelXYZ_invMassW;
|
||||
#endif
|
||||
|
||||
PxReal maxAngularVelocitySq; // 4 36
|
||||
PxReal maxLinearVelocitySq; // 4 40
|
||||
PxReal linearDamping; // 4 44
|
||||
PxReal angularDamping; // 4 48
|
||||
|
||||
PxU32 pathToRootOffset; // 4 52
|
||||
PxU32 childrenOffset; // 4 56
|
||||
PxU16 numPathToRoot; // 2 58
|
||||
PxU16 numChildren; // 2 60
|
||||
PxReal offsetSlop; // 4 64
|
||||
|
||||
ArticulationBitField pathToRoot; // 8 72 path to root, including link and root
|
||||
PxReal cfmScale; // 4 76
|
||||
bool disableGravity; // 1 77
|
||||
bool retainsAccelerations; // 1 78
|
||||
bool padding[2]; // 1 80
|
||||
};
|
||||
|
||||
|
||||
struct PxgArticulationLinkSleepData
|
||||
{
|
||||
#if !PX_CUDA_COMPILER
|
||||
PX_ALIGN(16, PxVec3 sleepLinVelAcc); //12 12
|
||||
PxReal padding0; //4 16
|
||||
PX_ALIGN(16, PxVec3 sleepAngVelAcc); //12 28
|
||||
PxReal padding1; //4 32
|
||||
#else
|
||||
float4 sleepLinVelAccXYZ;
|
||||
float4 sleepAngVelAccXYZ;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct PxgArticulationLinkProp
|
||||
{
|
||||
#if !PX_CUDA_COMPILER
|
||||
PX_ALIGN(16, PxVec3 invInertia); //12 12
|
||||
PxReal invMass; //4 16
|
||||
#else
|
||||
float4 invInertiaXYZ_invMass;
|
||||
#endif
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
57
engine/third_party/physx/source/gpusimulationcontroller/include/PxgArticulationTendon.h
vendored
Normal file
57
engine/third_party/physx/source/gpusimulationcontroller/include/PxgArticulationTendon.h
vendored
Normal file
@@ -0,0 +1,57 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_ARTICULATION_TENDON_H
|
||||
#define PXG_ARTICULATION_TENDON_H
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
|
||||
class PxgArticulationTendonElementFixedData
|
||||
{
|
||||
public:
|
||||
PxU64 children; //my children index //8 8
|
||||
PxU32 parent; //parent index //4 12
|
||||
PxU32 linkInd; //articulation link index //4 16
|
||||
};
|
||||
|
||||
|
||||
class PxgArticulationTendon
|
||||
{
|
||||
public:
|
||||
void* mFixedElements; //element fix in the initialization
|
||||
void* mModElements; //element can be modified in run time
|
||||
PxU32 mNbElements;
|
||||
};
|
||||
|
||||
}//namespace physx
|
||||
|
||||
#endif
|
||||
122
engine/third_party/physx/source/gpusimulationcontroller/include/PxgBVH.h
vendored
Normal file
122
engine/third_party/physx/source/gpusimulationcontroller/include/PxgBVH.h
vendored
Normal file
@@ -0,0 +1,122 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_BVH_H
|
||||
#define PXG_BVH_H
|
||||
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "GuSDF.h"
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
namespace physx
|
||||
{
|
||||
#endif
|
||||
|
||||
#if PX_SUPPORT_GPU_PHYSX
|
||||
|
||||
PX_ALIGN_PREFIX(16)
|
||||
struct PxgPackedNodeHalf
|
||||
{
|
||||
PxReal x;
|
||||
PxReal y;
|
||||
PxReal z;
|
||||
PxU32 i : 31;
|
||||
PxU32 b : 1;
|
||||
|
||||
PX_CUDA_CALLABLE PX_FORCE_INLINE PxVec3 getXYZ()
|
||||
{
|
||||
return PxVec3(x, y, z);
|
||||
}
|
||||
}
|
||||
PX_ALIGN_SUFFIX(16);
|
||||
|
||||
PX_ALIGN_PREFIX(16)
|
||||
struct PxgBVH
|
||||
{
|
||||
// for bottom up tree construction the root node does not appear in slot 0
|
||||
// this is a single int CUDA alloc that holds the index of the root
|
||||
PxU32* mRootNode;
|
||||
|
||||
PxgPackedNodeHalf* mNodeLowers; // stores the lower spatial bound of the node's children, left child stored in i, leaf flag stored in b
|
||||
PxgPackedNodeHalf* mNodeUppers; // stores the upper spatial bound of the node's children, right child stored in i, flag is unused
|
||||
|
||||
PxU32 mNumNodes;
|
||||
PxU32 mMaxNodes;
|
||||
|
||||
PxgBVH() : mRootNode(NULL), mNodeLowers(NULL), mNodeUppers(NULL), mNumNodes(0), mMaxNodes(0)
|
||||
{}
|
||||
}
|
||||
PX_ALIGN_SUFFIX(16);
|
||||
|
||||
PX_ALIGN_PREFIX(16)
|
||||
struct PxgBvhTriangleMesh
|
||||
{
|
||||
PxgBVH mBvh;
|
||||
|
||||
PxVec3* mVertices;
|
||||
PxU32* mTriangles;
|
||||
PxU32 mNumTriangles;
|
||||
PxU32 mNumVertices;
|
||||
PxU32 mPad[2];
|
||||
|
||||
PxgBvhTriangleMesh() : mBvh(), mVertices(NULL), mTriangles(NULL), mNumTriangles(0), mNumVertices(0)
|
||||
{}
|
||||
}
|
||||
PX_ALIGN_SUFFIX(16);
|
||||
|
||||
PX_ALIGN_PREFIX(16)
|
||||
struct PxgWindingClusterApproximation
|
||||
{
|
||||
public:
|
||||
PxVec3 mCentroidTimesArea;
|
||||
PxReal mRadius;
|
||||
PxVec3 mWeightedNormalSum;
|
||||
PxReal mAreaSum;
|
||||
|
||||
PX_CUDA_CALLABLE PxgWindingClusterApproximation() : mCentroidTimesArea(PxVec3(0.0f)), mRadius(0.0f), mWeightedNormalSum(PxVec3(0.0f)), mAreaSum(0.0f)
|
||||
{}
|
||||
}
|
||||
PX_ALIGN_SUFFIX(16);
|
||||
|
||||
struct PxgBVHKernelBlockDim
|
||||
{
|
||||
enum
|
||||
{
|
||||
BUILD_HIERARCHY = 256,
|
||||
BUILD_SDF = 256,
|
||||
SDF_FIX_HOLES = 256
|
||||
};
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
} // namespace physx
|
||||
#endif
|
||||
|
||||
#endif
|
||||
78
engine/third_party/physx/source/gpusimulationcontroller/include/PxgBodySim.h
vendored
Normal file
78
engine/third_party/physx/source/gpusimulationcontroller/include/PxgBodySim.h
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_BODYSIM_H
|
||||
#define PXG_BODYSIM_H
|
||||
|
||||
#include "AlignedTransform.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
|
||||
struct PxgBodySim
|
||||
{
|
||||
float4 linearVelocityXYZ_inverseMassW; //16 16
|
||||
float4 angularVelocityXYZ_maxPenBiasW; //32 16
|
||||
|
||||
float4 maxLinearVelocitySqX_maxAngularVelocitySqY_linearDampingZ_angularDampingW; //48 16
|
||||
float4 inverseInertiaXYZ_contactReportThresholdW; //64 16
|
||||
|
||||
float4 sleepLinVelAccXYZ_freezeCountW; //80 16
|
||||
float4 sleepAngVelAccXYZ_accelScaleW; //96 16
|
||||
float4 freezeThresholdX_wakeCounterY_sleepThresholdZ_bodySimIndex; //112 16
|
||||
|
||||
PxAlignedTransform body2World; //144 32
|
||||
PxAlignedTransform body2Actor_maxImpulseW; //176 32
|
||||
|
||||
PxU32 articulationRemapId; //180 4
|
||||
PxU32 internalFlags; //184 4
|
||||
PxU16 lockFlags; //186 2
|
||||
PxU16 disableGravity; //188 2
|
||||
PxReal offsetSlop; //192 4
|
||||
|
||||
float4 externalLinearAcceleration; //208 16
|
||||
float4 externalAngularAcceleration; //224 16
|
||||
};
|
||||
|
||||
struct PxgBodySimVelocities
|
||||
{
|
||||
float4 linearVelocity;
|
||||
float4 angularVelocity;
|
||||
};
|
||||
|
||||
struct PxgBodySimVelocityUpdate
|
||||
{
|
||||
float4 linearVelocityXYZ_bodySimIndexW;
|
||||
float4 angularVelocityXYZ_maxPenBiasW;
|
||||
float4 externalLinearAccelerationXYZ;
|
||||
float4 externalAngularAccelerationXYZ;
|
||||
};
|
||||
|
||||
}//physx
|
||||
|
||||
#endif
|
||||
270
engine/third_party/physx/source/gpusimulationcontroller/include/PxgBodySimManager.h
vendored
Normal file
270
engine/third_party/physx/source/gpusimulationcontroller/include/PxgBodySimManager.h
vendored
Normal file
@@ -0,0 +1,270 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_BODYSIM_MANAGER_H
|
||||
#define PXG_BODYSIM_MANAGER_H
|
||||
|
||||
#include "foundation/PxBitMap.h"
|
||||
#include "foundation/PxPinnedArray.h"
|
||||
#include "PxgCudaMemoryAllocator.h"
|
||||
#include "PxNodeIndex.h"
|
||||
#include "PxgBodySim.h"
|
||||
#include "CmIDPool.h"
|
||||
#include "CmBlockArray.h"
|
||||
#include "foundation/PxHashMap.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
class PxsRigidBody;
|
||||
struct PxsExternalAccelerationProvider;
|
||||
|
||||
namespace IG
|
||||
{
|
||||
class NodeIndex;
|
||||
}
|
||||
|
||||
namespace Dy
|
||||
{
|
||||
class FeatherstoneArticulation;
|
||||
class DeformableSurface;
|
||||
class DeformableVolume;
|
||||
class ParticleSystem;
|
||||
}
|
||||
|
||||
struct PxgRemapIndices
|
||||
{
|
||||
public:
|
||||
PxU32 nodeIndex; //this is the index to PxgBodySim in GPU
|
||||
PxU32 remapIndex; //this is the index map between PxgBodySim and the PxgArticulation in GPU
|
||||
};
|
||||
|
||||
struct PxgArticulationUpdate
|
||||
{
|
||||
PxU32 articulationIndex; //Which articulation on GPU
|
||||
Dy::FeatherstoneArticulation* articulation; //Which articulation on CPU
|
||||
};
|
||||
struct PxgArticulationIndices : public PxgRemapIndices {};
|
||||
struct PxgSoftBodyIndices : public PxgRemapIndices {};
|
||||
struct PxgFEMClothIndices : public PxgRemapIndices {};
|
||||
struct PxgParticleSystemIndices : public PxgRemapIndices {};
|
||||
|
||||
struct PxgStaticConstraint
|
||||
{
|
||||
PxU32 uniqueId;
|
||||
PxU32 linkID;
|
||||
};
|
||||
|
||||
struct PxgSelfConstraint
|
||||
{
|
||||
PxU32 uniqueId;
|
||||
PxU32 linkID0;
|
||||
PxU32 linkID1;
|
||||
};
|
||||
|
||||
struct PxgStaticConstraints
|
||||
{
|
||||
static const PxU32 MaxConstraints = 16;
|
||||
PxArray<PxgStaticConstraint> mStaticContacts;
|
||||
PxArray<PxgStaticConstraint> mStaticJoints;
|
||||
|
||||
};
|
||||
|
||||
struct PxgArticulationSelfConstraints
|
||||
{
|
||||
static const PxU32 MaxConstraints = 32;
|
||||
PxArray<PxgSelfConstraint> mSelfContacts;
|
||||
PxArray<PxgSelfConstraint> mSelfJoints;
|
||||
};
|
||||
|
||||
class PxgBodySimManager
|
||||
{
|
||||
PX_NOCOPY(PxgBodySimManager)
|
||||
public:
|
||||
PxgBodySimManager(const PxVirtualAllocator& allocator) : mNewUpdatedBodies(allocator),
|
||||
mTotalNumBodies(0), mNbUpdatedBodies(0),
|
||||
mTotalNumArticulations(0), mTotalNumSoftBodies(0), mTotalNumFEMCloths(0),
|
||||
mTotalNumPBDParticleSystems(0),
|
||||
mActivePBDParticleSystems(allocator),
|
||||
mActivePBDParticleSystemsDirty(false),
|
||||
mActiveSoftbodies(allocator),
|
||||
mActiveSelfCollisionSoftbodies(allocator),
|
||||
mActiveSoftbodiesDirty(false),
|
||||
mActiveFEMCloths(allocator),
|
||||
mActiveFEMClothsDirty(false),
|
||||
mTotalStaticArticContacts(0),
|
||||
mTotalStaticArticJoints(0),
|
||||
mTotalSelfArticContacts(0),
|
||||
mTotalSelfArticJoints(0),
|
||||
mMaxStaticArticContacts(0),
|
||||
mMaxStaticArticJoints(0),
|
||||
mMaxSelfArticContacts(0),
|
||||
mMaxSelfArticJoints(0),
|
||||
mTotalStaticRBContacts(0),
|
||||
mTotalStaticRBJoints(0),
|
||||
mMaxStaticRBContacts(0),
|
||||
mMaxStaticRBJoints(0),
|
||||
mExternalAccelerations(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
~PxgBodySimManager();
|
||||
|
||||
void addBody(PxsRigidBody* bodyCore, const PxU32 nodeIndex);
|
||||
|
||||
void addArticulation(Dy::FeatherstoneArticulation* articulation, const PxU32 nodeIndex, bool OmniPVDRecordDirectGPUAPI);
|
||||
void releaseArticulation(Dy::FeatherstoneArticulation* articulation, const PxU32 nodeIndex);
|
||||
void releaseDeferredArticulationIds();
|
||||
|
||||
void addSoftBody(Dy::DeformableVolume* deformableVolume, const PxU32 nodeIndex);
|
||||
void releaseSoftBody(Dy::DeformableVolume* deformableVolume);
|
||||
void releaseDeferredSoftBodyIds();
|
||||
bool activateSoftbody(Dy::DeformableVolume* deformableVolume);
|
||||
bool deactivateSoftbody(Dy::DeformableVolume* deformableVolume);
|
||||
|
||||
bool activateSoftbodySelfCollision(Dy::DeformableVolume* deformableVolume);
|
||||
bool deactivateSoftbodySelfCollision(Dy::DeformableVolume* deformableVolume);
|
||||
|
||||
void addFEMCloth(Dy::DeformableSurface*, const PxU32 nodeIndex);
|
||||
void releaseFEMCloth(Dy::DeformableSurface*);
|
||||
void releaseDeferredFEMClothIds();
|
||||
bool activateCloth(Dy::DeformableSurface*);
|
||||
bool deactivateCloth(Dy::DeformableSurface*);
|
||||
|
||||
void addPBDParticleSystem(Dy::ParticleSystem* particleSystem, const PxU32 nodeIndex);
|
||||
void releasePBDParticleSystem(Dy::ParticleSystem* particleSystem);
|
||||
void releaseDeferredPBDParticleSystemIds();
|
||||
|
||||
void updateBodies(PxsRigidBody** rigidBodies, PxU32* nodeIndices, const PxU32 nbBodies, PxsExternalAccelerationProvider* externalAccelerations);
|
||||
void updateBody(const PxNodeIndex&);
|
||||
void destroy();
|
||||
|
||||
void updateArticulation(Dy::FeatherstoneArticulation* articulation, const PxU32 nodeIndex);
|
||||
|
||||
void reset();
|
||||
void reserve(const PxU32 nbBodies);
|
||||
|
||||
bool addStaticArticulationContactManager(PxU32 uniqueIndex, const PxNodeIndex nodeIndex);
|
||||
bool removeStaticArticulationContactManager(PxU32 uniqueIndex, const PxNodeIndex nodeIndex);
|
||||
|
||||
bool addStaticArticulationJoint(PxU32 uniqueIndex, const PxNodeIndex nodeIndex);
|
||||
bool removeStaticArticulationJoint(PxU32 uniqueIndex, const PxNodeIndex nodeIndex);
|
||||
|
||||
bool addSelfArticulationContactManager(PxU32 uniqueIndex, const PxNodeIndex nodeIndex0, const PxNodeIndex nodeIndex1);
|
||||
bool removeSelfArticulationContactManager(PxU32 uniqueIndex, const PxNodeIndex nodeIndex0, const PxNodeIndex nodeIndex1);
|
||||
|
||||
bool addSelfArticulationJoint(PxU32 uniqueIndex, const PxNodeIndex nodeIndex0, const PxNodeIndex nodeIndex1);
|
||||
bool removeSelfArticulationJoint(PxU32 uniqueIndex, const PxNodeIndex nodeIndex0, const PxNodeIndex nodeIndex1);
|
||||
|
||||
bool addStaticRBContactManager(PxU32 uniqueIndex, const PxNodeIndex nodeIndex);
|
||||
bool removeStaticRBContactManager(PxU32 uniqueIndex, const PxNodeIndex nodeIndex);
|
||||
|
||||
bool addStaticRBJoint(PxU32 uniqueIndex, const PxNodeIndex nodeIndex);
|
||||
bool removeStaticRBJoint(PxU32 uniqueIndex, const PxNodeIndex nodeIndex);
|
||||
|
||||
PX_INLINE PxU32 getArticulationRemapIndex(PxU32 nodeIndex) { return mNodeToRemapMap.find(nodeIndex)->second;}
|
||||
|
||||
PX_INLINE PxU32 getNumActiveParticleSystem()
|
||||
{
|
||||
return mActivePBDParticleSystems.size();
|
||||
}
|
||||
|
||||
PxArray<PxgArticulationUpdate> mUpdatedArticulations;
|
||||
PxArray<void*> mBodies; //rigid bodies, articulations, soft bodies and particle systems
|
||||
|
||||
PxArray<PxU32> mNewOrUpdatedBodySims;
|
||||
PxArray<PxgArticulationIndices> mNewArticulationSims;
|
||||
PxArray<PxgSoftBodyIndices> mNewSoftBodySims;
|
||||
PxArray<PxgFEMClothIndices> mNewFEMClothSims;
|
||||
PxArray<Dy::DeformableSurface*> mDeformableSurfaces;
|
||||
PxArray<Dy::DeformableVolume*> mDeformableVolumes;
|
||||
PxArray<PxgParticleSystemIndices> mNewPBDParticleSystemSims;
|
||||
|
||||
|
||||
Cm::DeferredIDPool mArticulationIdPool; //generate the remap id between pxgbodysim and pxgarticulation
|
||||
Cm::DeferredIDPool mSoftBodyIdPool; //generate the remap id between pxgbodysim and pxgsoftbody
|
||||
Cm::DeferredIDPool mFEMClothIdPool; //generate the remap id between pxgbodysim and pxgfemcloth
|
||||
Cm::DeferredIDPool mPBDParticleSystemIdPool; //generate the remap id between pxgbodysim and pxgparticlesystem
|
||||
|
||||
|
||||
PxPinnedArray<PxgBodySimVelocityUpdate> mNewUpdatedBodies;
|
||||
PxU32 mTotalNumBodies; //include rigid body and articulation
|
||||
PxU32 mNbUpdatedBodies; //this is used for multiply threads in the ScBeforeSolverTask to update body information
|
||||
|
||||
PxBitMap mUpdatedMap;
|
||||
PxU32 mTotalNumArticulations;
|
||||
PxU32 mTotalNumSoftBodies;
|
||||
PxU32 mTotalNumFEMCloths;
|
||||
PxU32 mTotalNumPBDParticleSystems;
|
||||
PxArray<PxU32> mActiveFEMClothIndex;
|
||||
PxArray<PxU32> mActiveSoftbodyIndex;
|
||||
PxArray<PxU32> mActiveSelfCollisionSoftbodyIndex;
|
||||
|
||||
PxInt32ArrayPinned mActivePBDParticleSystems;
|
||||
bool mActivePBDParticleSystemsDirty;
|
||||
PxInt32ArrayPinned mActiveSoftbodies;
|
||||
PxInt32ArrayPinned mActiveSelfCollisionSoftbodies;
|
||||
PxArray<PxU32> mActiveSoftbodiesStaging;
|
||||
PxArray<PxU32> mActiveSelfCollisionSoftBodiesStaging;
|
||||
bool mActiveSoftbodiesDirty;
|
||||
PxInt32ArrayPinned mActiveFEMCloths;
|
||||
PxArray<PxU32> mActiveFEMClothStaging;
|
||||
bool mActiveFEMClothsDirty;
|
||||
|
||||
PxHashMap<PxU32, PxU32> mNodeToRemapMap;
|
||||
#if PX_SUPPORT_OMNI_PVD
|
||||
PxHashMap<PxU32, PxU32> mRemapToNodeMap;
|
||||
#endif
|
||||
PxArray<PxU32> mDeferredFreeNodeIDs;
|
||||
|
||||
Cm::BlockArray<PxgStaticConstraints, 1024> mStaticConstraints;
|
||||
Cm::BlockArray<PxgArticulationSelfConstraints, 1024> mArticulationSelfConstraints;
|
||||
|
||||
PxU32 mTotalStaticArticContacts;
|
||||
PxU32 mTotalStaticArticJoints;
|
||||
|
||||
PxU32 mTotalSelfArticContacts;
|
||||
PxU32 mTotalSelfArticJoints;
|
||||
|
||||
PxU32 mMaxStaticArticContacts;
|
||||
PxU32 mMaxStaticArticJoints;
|
||||
|
||||
PxU32 mMaxSelfArticContacts;
|
||||
PxU32 mMaxSelfArticJoints;
|
||||
|
||||
PxU32 mTotalStaticRBContacts;
|
||||
PxU32 mTotalStaticRBJoints;
|
||||
|
||||
PxU32 mMaxStaticRBContacts;
|
||||
PxU32 mMaxStaticRBJoints;
|
||||
|
||||
PxsExternalAccelerationProvider* mExternalAccelerations;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
84
engine/third_party/physx/source/gpusimulationcontroller/include/PxgConstraintIdMap.h
vendored
Normal file
84
engine/third_party/physx/source/gpusimulationcontroller/include/PxgConstraintIdMap.h
vendored
Normal file
@@ -0,0 +1,84 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_CONSTRAINT_ID_MAP_H
|
||||
#define PXG_CONSTRAINT_ID_MAP_H
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxAssert.h"
|
||||
|
||||
#if PX_CUDA_COMPILER
|
||||
#include "assert.h"
|
||||
#endif
|
||||
|
||||
namespace physx
|
||||
{
|
||||
// maps the constraint/joint ID to the internal joint data ID. This is used for direct GPU API
|
||||
// operations to use the same constraint/joint ID on the public interface level as long as the
|
||||
// constraint/joint stays in the same scene. In particular, changing actors of a constraint/joint
|
||||
// should be transparent to users and can be achieved using this map.
|
||||
class PxgConstraintIdMapEntry
|
||||
{
|
||||
public:
|
||||
static const PxU32 eINVALID_ID = 0xffffFFFF;
|
||||
|
||||
PX_CUDA_CALLABLE PX_FORCE_INLINE PxgConstraintIdMapEntry()
|
||||
: mJointDataId(eINVALID_ID)
|
||||
{
|
||||
}
|
||||
|
||||
PX_CUDA_CALLABLE PX_FORCE_INLINE void invalidate()
|
||||
{
|
||||
mJointDataId = eINVALID_ID;
|
||||
}
|
||||
|
||||
PX_CUDA_CALLABLE PX_FORCE_INLINE void setJointDataId(PxU32 jointDataId)
|
||||
{
|
||||
#if PX_CUDA_COMPILER
|
||||
assert(jointDataId < eINVALID_ID); // until PX_ASSERT works on GPU (see PX-4133)
|
||||
#else
|
||||
PX_ASSERT(jointDataId < eINVALID_ID);
|
||||
#endif
|
||||
mJointDataId = jointDataId;
|
||||
}
|
||||
|
||||
PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 getJointDataId() const
|
||||
{
|
||||
return mJointDataId;
|
||||
}
|
||||
|
||||
PX_CUDA_CALLABLE PX_FORCE_INLINE bool isJointDataIdValid() const { return (mJointDataId != eINVALID_ID); }
|
||||
|
||||
private:
|
||||
// maps the constraint/joint ID to the internal GPU joint data ID. eINVALID_ID is used if the
|
||||
// joint/constraint is unmapped/removed or inactive.
|
||||
PxU32 mJointDataId;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // PXG_CONSTRAINT_ID_MAP_H
|
||||
75
engine/third_party/physx/source/gpusimulationcontroller/include/PxgDeformableSkinning.h
vendored
Normal file
75
engine/third_party/physx/source/gpusimulationcontroller/include/PxgDeformableSkinning.h
vendored
Normal file
@@ -0,0 +1,75 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_DEFORMABLE_SKINNING_H
|
||||
#define PXG_DEFORMABLE_SKINNING_H
|
||||
|
||||
#include "PxDeformableSkinning.h"
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
|
||||
#include "PxgKernelLauncher.h"
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
namespace physx
|
||||
{
|
||||
#endif
|
||||
|
||||
#if PX_SUPPORT_GPU_PHYSX
|
||||
|
||||
class PxgDeformableSkinning : public PxDeformableSkinning, public PxUserAllocated
|
||||
{
|
||||
private:
|
||||
PxgKernelLauncher mKernelLauncher;
|
||||
|
||||
public:
|
||||
PxgDeformableSkinning(PxgKernelLauncher& kernelLauncher);
|
||||
|
||||
virtual ~PxgDeformableSkinning() { }
|
||||
|
||||
virtual void computeNormalVectors(
|
||||
PxTrimeshSkinningGpuData* skinningDataArrayD, PxU32 arrayLength,
|
||||
CUstream stream, PxU32 numGpuThreads) PX_OVERRIDE;
|
||||
|
||||
virtual void evaluateVerticesEmbeddedIntoSurface(
|
||||
PxTrimeshSkinningGpuData* skinningDataArrayD, PxU32 arrayLength,
|
||||
CUstream stream, PxU32 numGpuThreads) PX_OVERRIDE;
|
||||
|
||||
virtual void evaluateVerticesEmbeddedIntoVolume(
|
||||
PxTetmeshSkinningGpuData* skinningDataArrayD, PxU32 arrayLength,
|
||||
CUstream stream, PxU32 numGpuThreads) PX_OVERRIDE;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
} // namespace physx
|
||||
#endif
|
||||
|
||||
#endif
|
||||
64
engine/third_party/physx/source/gpusimulationcontroller/include/PxgDenseGridData.h
vendored
Normal file
64
engine/third_party/physx/source/gpusimulationcontroller/include/PxgDenseGridData.h
vendored
Normal file
@@ -0,0 +1,64 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_DENSE_GRID_DATA_H
|
||||
#define PXG_DENSE_GRID_DATA_H
|
||||
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
#include "PxSparseGridParams.h"
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
namespace physx
|
||||
{
|
||||
#endif
|
||||
|
||||
struct PxgDenseGridData
|
||||
{
|
||||
PxVec3 mOrigin;
|
||||
PxReal mGridSpacing;
|
||||
PxU32 mNumCellsX;
|
||||
PxU32 mNumCellsY;
|
||||
PxU32 mNumCellsZ;
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxReal getGridSpacing() const { return mGridSpacing; }
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxI32 getHaloSize() const { return 0; }
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU32 maxNumCells()
|
||||
{
|
||||
return mNumCellsX * mNumCellsY * mNumCellsZ;
|
||||
}
|
||||
};
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
} // namespace physx
|
||||
#endif
|
||||
|
||||
#endif
|
||||
93
engine/third_party/physx/source/gpusimulationcontroller/include/PxgDenseGridDataStandalone.h
vendored
Normal file
93
engine/third_party/physx/source/gpusimulationcontroller/include/PxgDenseGridDataStandalone.h
vendored
Normal file
@@ -0,0 +1,93 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PX_DENSE_GRID_DATA_H
|
||||
#define PX_DENSE_GRID_DATA_H
|
||||
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
#include "PxSparseGridParams.h"
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
namespace physx
|
||||
{
|
||||
#endif
|
||||
|
||||
/**
|
||||
\brief Descriptor for axis aligned dense grids
|
||||
*/
|
||||
struct PxDenseGridParams
|
||||
{
|
||||
PxVec3 origin; //!< The origin of the dense grid which is the corner with smallest x/y/z coordinates
|
||||
PxU32 numCellsX; //!< The number of cells in x direction
|
||||
PxU32 numCellsY; //!< The number of cells in y direction
|
||||
PxU32 numCellsZ; //!< The number of cells in z direction
|
||||
PxReal gridSpacing; //!< The cell size
|
||||
|
||||
PxDenseGridParams() : origin(PxVec3(0.0f)), numCellsX(0), numCellsY(0), numCellsZ(0), gridSpacing(0) {}
|
||||
|
||||
PxDenseGridParams(const PxVec3& origin_, PxU32 numCellsX_, PxU32 numCellsY_, PxU32 numCellsZ_, PxReal gridSpacing_)
|
||||
: origin(origin_), numCellsX(numCellsX_), numCellsY(numCellsY_), numCellsZ(numCellsZ_), gridSpacing(gridSpacing_) {}
|
||||
};
|
||||
|
||||
/**
|
||||
\brief Minimal set of data to access cells in a dense grid
|
||||
*/
|
||||
struct PxDenseGridData
|
||||
{
|
||||
PxDenseGridParams mGridParams; //!< The grid descriptor
|
||||
|
||||
PxDenseGridData() : mGridParams() {}
|
||||
|
||||
/**
|
||||
\brief The number of cells in the dense grid
|
||||
|
||||
\return The number of cells
|
||||
*/
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU32 maxNumCells()
|
||||
{
|
||||
return mGridParams.numCellsX * mGridParams.numCellsY * mGridParams.numCellsZ;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief The dense grid's cell size
|
||||
|
||||
\return The cell size
|
||||
*/
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxReal getCellSize()
|
||||
{
|
||||
return mGridParams.gridSpacing;
|
||||
}
|
||||
};
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
} // namespace physx
|
||||
#endif
|
||||
|
||||
#endif
|
||||
70
engine/third_party/physx/source/gpusimulationcontroller/include/PxgEssentialCore.h
vendored
Normal file
70
engine/third_party/physx/source/gpusimulationcontroller/include/PxgEssentialCore.h
vendored
Normal file
@@ -0,0 +1,70 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_ESSENTIAL_COMMON_H
|
||||
#define PXG_ESSENTIAL_COMMON_H
|
||||
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
#include "cudamanager/PxCudaTypes.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
|
||||
class PxgCudaKernelWranglerManager;
|
||||
class PxCudaContextManager;
|
||||
class PxCudaContext;
|
||||
class PxgHeapMemoryAllocatorManager;
|
||||
|
||||
struct PxGpuDynamicsMemoryConfig;
|
||||
|
||||
class PxgSimulationController;
|
||||
class PxgCudaBroadPhaseSap;
|
||||
class PxgGpuNarrowphaseCore;
|
||||
class PxgGpuContext;
|
||||
|
||||
class PxgEssentialCore
|
||||
{
|
||||
public:
|
||||
PxgEssentialCore(PxgCudaKernelWranglerManager* gpuKernelWrangler, PxCudaContextManager* cudaContextManager,
|
||||
PxgHeapMemoryAllocatorManager* heapMemoryManager, PxgSimulationController* simController,
|
||||
PxgGpuContext* context);
|
||||
|
||||
PxgCudaKernelWranglerManager* mGpuKernelWranglerManager;
|
||||
PxCudaContextManager* mCudaContextManager;
|
||||
PxCudaContext* mCudaContext;
|
||||
PxgHeapMemoryAllocatorManager* mHeapMemoryManager;
|
||||
|
||||
PxgSimulationController* mSimController;
|
||||
PxgGpuContext* mGpuContext;
|
||||
|
||||
CUstream mStream;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
336
engine/third_party/physx/source/gpusimulationcontroller/include/PxgFEMCloth.h
vendored
Normal file
336
engine/third_party/physx/source/gpusimulationcontroller/include/PxgFEMCloth.h
vendored
Normal file
@@ -0,0 +1,336 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_FEMCLOTH_H
|
||||
#define PXG_FEMCLOTH_H
|
||||
|
||||
#include "PxDeformableSurface.h"
|
||||
#include "PxgCudaBuffer.h"
|
||||
#include "PxsHeapMemoryAllocator.h"
|
||||
#include "cutil_math.h"
|
||||
#include "foundation/PxUserAllocated.h"
|
||||
#include "foundation/PxVec2.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
namespace Gu
|
||||
{
|
||||
class TriangleMesh;
|
||||
};
|
||||
|
||||
struct PxgFemRigidConstraintBlock;
|
||||
struct PxsDeformableSurfaceMaterialData;
|
||||
|
||||
#if PX_VC
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4324) // padding was added at the end of a structure because of a __declspec(align) value.
|
||||
#endif
|
||||
|
||||
struct PxgFEMClothData
|
||||
{
|
||||
public:
|
||||
PxU32 mMaxNbNonSharedTrisPerPartition;
|
||||
|
||||
PxU32 mSharedTriPairRemapOutputSize;
|
||||
PxU32 mNonSharedTriPairRemapOutputSize;
|
||||
|
||||
PxU32 mMaxNbSharedTriPairsPerPartition;
|
||||
PxU32 mMaxNbNonSharedTriPairsPerPartition;
|
||||
|
||||
PxU32 mNbPackedNodes;
|
||||
};
|
||||
|
||||
PX_ALIGN_PREFIX(16)
|
||||
class PxgFEMCloth
|
||||
{
|
||||
public:
|
||||
|
||||
// to deallocate the host mirror. Make sure you pass in the right allocator!
|
||||
void deallocate(PxsHeapMemoryAllocator* allocator);
|
||||
|
||||
void* mTriMeshData;
|
||||
|
||||
float4* mVelocity_InvMass;
|
||||
float4* mPosition_InvMass;
|
||||
float4* mRestPosition;
|
||||
|
||||
float4* mPrevPositionInContactOffset; // After contact pairs are updated, cloth vertices have moved by mPosition_InvMass -
|
||||
// mPrevPositionInContactOffset.
|
||||
float4* mPrevPositionInRestOffset; // After previous "step()", cloth vertices moved from mPosition_InvMass to mPrevPositionInRestOffset.
|
||||
|
||||
float* mDynamicFrictions; // dynamic friction per vertex
|
||||
PxU16* mMaterialIndices;
|
||||
|
||||
PxU32* mTrianglesWithActiveEdges;
|
||||
uint4* mTriangleVertexIndices;
|
||||
uint4* mOrderedNonSharedTriangleVertexIndices_triIndex;
|
||||
float2* mOrderedSharedTriangleLambdas; // Two lambdas per triangle: ARAP, area
|
||||
float2* mOrderedNonSharedTriangleLambdas; // Two lambdas per triangle: ARAP, area
|
||||
float4* mOrderedNonSharedTriangleRestPoseInv; // Four components of restPoseInv: m00, m10, m01, m11
|
||||
|
||||
// Shared triangle pair: A pair of triangles where both in-plane and bending constraints are applied together.
|
||||
// Non-shared triangle pair: A pair of triangles where only bending constraints are applied.
|
||||
|
||||
uint4* mOrderedSharedTrianglePairVertexIndices;
|
||||
uint4* mOrderedNonSharedTrianglePairVertexIndices;
|
||||
|
||||
float4* mOrderedSharedRestBendingAngle_flexuralStiffness_damping;
|
||||
float4* mOrderedNonSharedRestBendingAngle_flexuralStiffness_damping;
|
||||
|
||||
float* mSharedBendingLambdas;
|
||||
float* mNonSharedBendingLambdas;
|
||||
|
||||
bool mNonSharedTriPair_hasActiveBending;
|
||||
|
||||
// To solve in-plane energies for shared triangle pairs, the rest pose of the two triangles is also stored.
|
||||
// Since the two triangles share an edge, only one additional edge needs to be stored for each triangle, which are edge0 and edge1.
|
||||
// By choosing the shared edge to be in the direction of (1, 0) in 2D, only the magnitude of the shared edge needs to be stored.
|
||||
// Consequently:
|
||||
// Rest edges for triangle 0: (RestEdgeLength, 0), (Edge0.x, Edge0.y)
|
||||
// Rest edges for triangle 1: (RestEdgeLength, 0), (Edge1.x, Edge1.y)
|
||||
// This approach saves significant memory compared to storing restPoseInv matrices for each triangle.
|
||||
|
||||
float4* mOrderedSharedRestEdge0_edge1;
|
||||
float4* mOrderedSharedRestEdgeLength_material0_material1;
|
||||
|
||||
float4* mPosition_InvMassCP;
|
||||
|
||||
PxU32* mNonSharedTriAccumulatedPartitionsCP;
|
||||
|
||||
PxU32* mSharedTriPairRemapOutputCP;
|
||||
PxU32* mNonSharedTriPairRemapOutputCP;
|
||||
|
||||
PxU32* mSharedTriPairAccumulatedCopiesCP;
|
||||
PxU32* mNonSharedTriPairAccumulatedCopiesCP;
|
||||
|
||||
PxU32* mSharedTriPairAccumulatedPartitionsCP;
|
||||
PxU32* mNonSharedTriPairAccumulatedPartitionsCP;
|
||||
|
||||
float4* mDeltaPos; // Initialize to zero and zero every time in the apply delta kernel
|
||||
float4* mAccumulatedDeltaPos;
|
||||
float4* mAccumulatedDeltaVel; // Used for damping
|
||||
|
||||
PxBounds3* mPackedNodeBounds;
|
||||
|
||||
// For cloth-rigid contact preparation.
|
||||
PxgFemRigidConstraintBlock* mRigidConstraints; // ((numVerts + 31) / 32) * maxNumContactPerVertex *
|
||||
// sizeof(PxgFemRigidConstraintBlock)
|
||||
|
||||
PxReal mLinearDamping;
|
||||
PxReal mMaxLinearVelocity;
|
||||
PxReal mPenBiasClamp;
|
||||
|
||||
PxReal mSettlingThreshold;
|
||||
PxReal mSleepThreshold;
|
||||
PxReal mSettlingDamping;
|
||||
PxReal mSelfCollisionFilterDistance;
|
||||
|
||||
PxU32 mNbVerts;
|
||||
PxU32 mNbTriangles;
|
||||
PxU32 mNbNonSharedTriangles;
|
||||
PxU32 mNbTrianglesWithActiveEdges;
|
||||
|
||||
PxU32 mNbTrianglePairs;
|
||||
PxU32 mNbSharedTrianglePairs;
|
||||
PxU32 mNbNonSharedTrianglePairs;
|
||||
|
||||
PxU32 mNbNonSharedTriPartitions;
|
||||
PxU32 mNbSharedTriPairPartitions;
|
||||
PxU32 mNbNonSharedTriPairPartitions;
|
||||
|
||||
// For partitions that contain only a small number of elements, run them in a single kernel call instead of launching multiple kernels
|
||||
// one by one.
|
||||
// clusterId stores the first partition that has fewer elements than PxgFEMClothKernelBlockDim::CLOTH_SOLVESHELL.
|
||||
|
||||
PxU32 mNonSharedTriClusterId;
|
||||
PxU32 mSharedTriPairClusterId;
|
||||
PxU32 mNonSharedTriPairClusterId;
|
||||
|
||||
PxU32 mElementIndex;
|
||||
PxU32 mGpuRemapIndex;
|
||||
PxU8 mActorFlags;
|
||||
PxU8 mBodyFlags;
|
||||
PxU16 mSurfaceFlags;
|
||||
|
||||
PxU32 mIsActive;
|
||||
PxReal mRestDistance;
|
||||
PxReal mOriginalContactOffset;
|
||||
|
||||
PxU32 mNbCollisionPairUpdatesPerTimestep;
|
||||
PxU32 mNbCollisionSubsteps;
|
||||
|
||||
} PX_ALIGN_SUFFIX(16);
|
||||
|
||||
#if PX_VC
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
class PxgFEMClothBuffer : public PxUserAllocated
|
||||
{
|
||||
public:
|
||||
PxgFEMClothBuffer(PxgHeapMemoryAllocatorManager* heapMemoryManager);
|
||||
|
||||
PxgCudaBuffer triangleMeshData;
|
||||
|
||||
PxgTypedCudaBuffer<float4> deltaPos;
|
||||
PxgTypedCudaBuffer<float4> accumulatedDeltaPos;
|
||||
PxgTypedCudaBuffer<float4> accumulatedDeltaVel; // Used for damping
|
||||
|
||||
PxgTypedCudaBuffer<float4> prevPositionInContactOffset; // After contact pairs are updated, cloth vertices have moved by
|
||||
// mPosition_InvMass - mPrevPositionInContactOffset.
|
||||
PxgTypedCudaBuffer<float4> prevPositionInRestOffset; // After cloth-cloth distance is measured, cloth vertices have moved by
|
||||
// mPosition_InvMass - mPrevPositionInRestOffset.
|
||||
|
||||
|
||||
PxgTypedCudaBuffer<PxU16> materialIndices;
|
||||
PxgTypedCudaBuffer<float> dynamicfrictions;
|
||||
|
||||
PxgTypedCudaBuffer<PxU32> trianglesWithActiveEdges;
|
||||
PxgTypedCudaBuffer<uint4> triangleVertexIndices;
|
||||
PxgTypedCudaBuffer<uint4> orderedNonSharedTriangleVertexIndices_triIndex;
|
||||
|
||||
PxgTypedCudaBuffer<float2> orderedSharedTriangleLambdas;
|
||||
PxgTypedCudaBuffer<float2> orderedNonSharedTriangleLambdas;
|
||||
|
||||
PxgTypedCudaBuffer<float4> orderedNonSharedTriangleRestPoseInv;
|
||||
|
||||
PxgTypedCudaBuffer<uint4> orderedSharedTrianglePairVertexIndices;
|
||||
PxgTypedCudaBuffer<uint4> orderedNonSharedTrianglePairVertexIndices;
|
||||
|
||||
PxgTypedCudaBuffer<float4> orderedSharedRestBendingAngle_flexuralStiffness_damping;
|
||||
PxgTypedCudaBuffer<float4> orderedNonSharedRestBendingAngle_flexuralStiffness_damping;
|
||||
|
||||
PxgTypedCudaBuffer<float4> orderedSharedRestEdge0_edge1;
|
||||
PxgTypedCudaBuffer<float4> orderedSharedRestEdgeLength_material0_material1;
|
||||
|
||||
PxgTypedCudaBuffer<float> sharedBendingLambdas;
|
||||
PxgTypedCudaBuffer<float> nonSharedBendingLambdas;
|
||||
|
||||
PxgTypedCudaBuffer<float4> position_InvMassCP;
|
||||
|
||||
PxgTypedCudaBuffer<PxU32> nonSharedTriAccumulatedPartitionsCP;
|
||||
|
||||
PxgTypedCudaBuffer<PxU32> sharedTriPairRemapOutputCP;
|
||||
PxgTypedCudaBuffer<PxU32> nonSharedTriPairRemapOutputCP;
|
||||
|
||||
PxgTypedCudaBuffer<PxU32> sharedTriPairAccumulatedCopiesCP;
|
||||
PxgTypedCudaBuffer<PxU32> nonSharedTriPairAccumulatedCopiesCP;
|
||||
|
||||
PxgTypedCudaBuffer<PxU32> sharedTriPairAccumulatedPartitionsCP;
|
||||
PxgTypedCudaBuffer<PxU32> nonSharedTriPairAccumulatedPartitionsCP;
|
||||
|
||||
PxgTypedCudaBuffer<PxBounds3> packedNodeBounds; // for refit
|
||||
|
||||
PxgTypedCudaBuffer<PxU32> numPenetratedTets;
|
||||
};
|
||||
|
||||
struct EdgeEncoding
|
||||
{
|
||||
// TYPE0 layout (lower 16 bits)
|
||||
static constexpr PxU32 TYPE0_EDGE_BASE_POS = 0; // Edge presence bits: 0-2
|
||||
|
||||
static constexpr PxU32 TYPE0_AUTH_COUNT_POS = 3; // bits 3-4
|
||||
static constexpr PxU32 TYPE0_FIRST_EDGE_POS = 5; // bits 5-6
|
||||
static constexpr PxU32 TYPE0_SECOND_EDGE_POS = 7; // bits 7-8
|
||||
static constexpr PxU32 TYPE0_THIRD_EDGE_POS = 9; // bits 9-10
|
||||
|
||||
static constexpr PxU32 TYPE0_VERTEX0_ACTIVE_POS = 11;
|
||||
static constexpr PxU32 TYPE0_VERTEX1_ACTIVE_POS = 12;
|
||||
static constexpr PxU32 TYPE0_VERTEX2_ACTIVE_POS = 13;
|
||||
|
||||
// TYPE1 layout (upper 16 bits)
|
||||
static constexpr PxU32 TYPE1_EDGE_BASE_POS = 16; // edge presence: bits 16-18
|
||||
|
||||
static constexpr PxU32 TYPE1_AUTH_COUNT_POS = 19; // bits 19-20
|
||||
static constexpr PxU32 TYPE1_FIRST_EDGE_POS = 21; // bits 21-22
|
||||
static constexpr PxU32 TYPE1_SECOND_EDGE_POS = 23; // bits 23-24
|
||||
static constexpr PxU32 TYPE1_THIRD_EDGE_POS = 25; // bits 25-26
|
||||
|
||||
static constexpr PxU32 TYPE1_VERTEX0_ACTIVE_POS = 27;
|
||||
static constexpr PxU32 TYPE1_VERTEX1_ACTIVE_POS = 28;
|
||||
static constexpr PxU32 TYPE1_VERTEX2_ACTIVE_POS = 29;
|
||||
};
|
||||
|
||||
struct EdgeEncodingMask
|
||||
{
|
||||
// Type0: minimal triangle set covering all edges and vertices (compact encoding)
|
||||
static constexpr PxU32 TYPE0_AUTH_COUNT_MASK = 0x3 << EdgeEncoding::TYPE0_AUTH_COUNT_POS;
|
||||
static constexpr PxU32 TYPE0_FIRST_EDGE_MASK = 0x3 << EdgeEncoding::TYPE0_FIRST_EDGE_POS;
|
||||
static constexpr PxU32 TYPE0_SECOND_EDGE_MASK = 0x3 << EdgeEncoding::TYPE0_SECOND_EDGE_POS;
|
||||
static constexpr PxU32 TYPE0_THIRD_EDGE_MASK = 0x3 << EdgeEncoding::TYPE0_THIRD_EDGE_POS;
|
||||
|
||||
static constexpr PxU32 TYPE0_VERTEX0_ACTIVE_MASK = 1U << EdgeEncoding::TYPE0_VERTEX0_ACTIVE_POS;
|
||||
static constexpr PxU32 TYPE0_VERTEX1_ACTIVE_MASK = 1U << EdgeEncoding::TYPE0_VERTEX1_ACTIVE_POS;
|
||||
static constexpr PxU32 TYPE0_VERTEX2_ACTIVE_MASK = 1U << EdgeEncoding::TYPE0_VERTEX2_ACTIVE_POS;
|
||||
|
||||
// Type1: more balanced distribution of edges and vertices across triangles (balanced encoding)
|
||||
static constexpr PxU32 TYPE1_FIRST_EDGE_MASK = 0x3 << EdgeEncoding::TYPE1_FIRST_EDGE_POS;
|
||||
static constexpr PxU32 TYPE1_SECOND_EDGE_MASK = 0x3 << EdgeEncoding::TYPE1_SECOND_EDGE_POS;
|
||||
static constexpr PxU32 TYPE1_THIRD_EDGE_MASK = 0x3 << EdgeEncoding::TYPE1_THIRD_EDGE_POS;
|
||||
|
||||
static constexpr PxU32 TYPE1_VERTEX0_ACTIVE_MASK = 1U << EdgeEncoding::TYPE1_VERTEX0_ACTIVE_POS;
|
||||
static constexpr PxU32 TYPE1_VERTEX1_ACTIVE_MASK = 1U << EdgeEncoding::TYPE1_VERTEX1_ACTIVE_POS;
|
||||
static constexpr PxU32 TYPE1_VERTEX2_ACTIVE_MASK = 1U << EdgeEncoding::TYPE1_VERTEX2_ACTIVE_POS;
|
||||
};
|
||||
|
||||
class PxgFEMClothUtil
|
||||
{
|
||||
public:
|
||||
static PxU32 computeTriangleMeshByteSize(const Gu::TriangleMesh* triangleMesh);
|
||||
static PxU32 loadOutTriangleMesh(void* mem, const Gu::TriangleMesh* triangleMesh);
|
||||
static PxU32 initialTriangleData(PxgFEMCloth& femCloth, PxArray<uint2>& trianglePairTriangleIndices,
|
||||
PxArray<uint4>& trianglePairVertexIndices, const Gu::TriangleMesh* triangleMesh,
|
||||
const PxU16* materialHandles, PxsDeformableSurfaceMaterialData* materials, const PxU32 nbMaterials,
|
||||
PxsHeapMemoryAllocator* alloc);
|
||||
static void categorizeClothConstraints(PxArray<PxU32>& sharedTrianglePairs, PxArray<PxU32>& nonSharedTriangles,
|
||||
PxArray<PxU32>& nonSharedTrianglePairs, PxgFEMCloth& femCloth,
|
||||
const PxArray<uint2>& trianglePairTriangleIndices);
|
||||
|
||||
static void computeNonSharedTriangleConfiguration(PxgFEMCloth& femCloth, const PxArray<PxU32>& orderedNonSharedTriangles,
|
||||
const PxArray<PxU32>& activeTriangleIndices,
|
||||
const Gu::TriangleMesh* const triangleMesh);
|
||||
|
||||
static float updateFlexuralStiffnessPerTrianglePair(float t0Area, float t1Area, float hingeLength, float thickness, float inputStiffness);
|
||||
|
||||
static bool updateRestConfiguration(float4* orderedRestAngleAndStiffness_damping, uint4* orderedTrianglePairVertexIndices,
|
||||
PxgFEMCloth& femCloth, PxU32 it, PxU32 index, PxArray<uint2>& trianglePairTriangleIndices,
|
||||
const PxArray<uint4>& trianglePairVertexIndices, const PxsDeformableSurfaceMaterialData* materials,
|
||||
const PxVec3* positions, bool zeroRestBendingAngle, float4* orderedRestEdge0_edge1 = NULL,
|
||||
float4* orderedRestEdgeLength_material0_material1 = NULL);
|
||||
|
||||
static void computeTrianglePairConfiguration(PxgFEMCloth& femCloth, PxArray<uint2>& trianglePairTriangleIndices,
|
||||
const PxArray<uint4>& trianglePairVertexIndices, const PxArray<PxU32>& orderedTrianglePairs,
|
||||
const PxArray<PxU32>& activeTrianglePairIndices, const Gu::TriangleMesh* const triangleMesh,
|
||||
const PxsDeformableSurfaceMaterialData* materials, bool zeroRestBendingAngle,
|
||||
bool isSharedPartition);
|
||||
};
|
||||
|
||||
} // namespace physx
|
||||
|
||||
#endif
|
||||
217
engine/third_party/physx/source/gpusimulationcontroller/include/PxgFEMClothCore.h
vendored
Normal file
217
engine/third_party/physx/source/gpusimulationcontroller/include/PxgFEMClothCore.h
vendored
Normal file
@@ -0,0 +1,217 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_FEMCLOTH_CORE_H
|
||||
#define PXG_FEMCLOTH_CORE_H
|
||||
|
||||
#include "PxgFEMCore.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
|
||||
#define FEMCLOTH_MAX_NUM_PARTITIONS 32
|
||||
|
||||
namespace Dy
|
||||
{
|
||||
struct DeformableSurfaceCore;
|
||||
class DeformableSurface;
|
||||
}
|
||||
|
||||
// this is needed to force PhysXSimulationControllerGpu linkage as Static Library!
|
||||
void createPxgFEMCloth();
|
||||
|
||||
struct PxGpuDynamicsMemoryConfig;
|
||||
|
||||
class PxgCudaBroadPhaseSap;
|
||||
class PxgGpuNarrowphaseCore;
|
||||
class PxgFEMCloth;
|
||||
struct PxgFEMClothData;
|
||||
|
||||
class PxRenderBuffer;
|
||||
|
||||
class PxRenderOutput;
|
||||
|
||||
struct PxgSolverCoreDesc;
|
||||
struct PxgArticulationCoreDesc;
|
||||
|
||||
class PxPostSolveCallback;
|
||||
|
||||
struct PxgPrePrepDesc;
|
||||
struct PxgConstraintPrepareDesc;
|
||||
struct PxgSolverSharedDescBase;
|
||||
|
||||
class PxgFEMClothCore : public PxgFEMCore
|
||||
{
|
||||
public:
|
||||
PxgFEMClothCore(PxgCudaKernelWranglerManager* gpuKernelWrangler, PxCudaContextManager* cudaContextManager,
|
||||
PxgHeapMemoryAllocatorManager* heapMemoryManager, PxgSimulationController* simController, PxgGpuContext* context,
|
||||
PxU32 maxContacts, const PxU32 collisionStackSize, bool isTGS);
|
||||
~PxgFEMClothCore();
|
||||
|
||||
void preIteration();
|
||||
|
||||
// Integrate verts position based on gravity
|
||||
void preIntegrateSystems(PxU32 nbActiveFEMCloths, const PxVec3& gravity, PxReal dt);
|
||||
|
||||
// Calculate femCloth's world bound
|
||||
void refitBound(PxU32 nbActiveFEMCloths, CUstream stream);
|
||||
|
||||
void resetClothVsNonclothContactCounts();
|
||||
void checkBufferOverflows();
|
||||
|
||||
void updateClothContactPairValidity(bool forceUpdateClothContactPairs, bool adaptiveCollisionPairUpdate, PxReal dt);
|
||||
|
||||
void selfCollision(bool isVT);
|
||||
|
||||
void differentClothCollision(bool isVT);
|
||||
|
||||
void clampContactCounts();
|
||||
|
||||
void sortContacts(PxU32 nbActiveFemClothes);
|
||||
|
||||
void solve(PxgDevicePointer<PxgPrePrepDesc> prePrepDescd, PxgDevicePointer<PxgSolverCoreDesc> solverCoreDescd,
|
||||
PxgDevicePointer<PxgSolverSharedDescBase> sharedDescd, PxgDevicePointer<PxgArticulationCoreDesc> artiCoreDescd,
|
||||
PxReal dt, CUstream solverStream, PxU32 iter, PxU32 maxIter, bool isVelocityIteration, const PxVec3& gravity);
|
||||
|
||||
void solve_position(PxgDevicePointer<PxgPrePrepDesc> prePrepDescd, PxgDevicePointer<PxgSolverCoreDesc> solverCoreDescd,
|
||||
PxgDevicePointer<PxgSolverSharedDescBase> sharedDescd, PxgDevicePointer<PxgArticulationCoreDesc> artiCoreDescd,
|
||||
PxReal dt, CUstream solverStream, PxU32 iter, PxU32 maxIter, const PxVec3& gravity);
|
||||
|
||||
void solve_velocity(PxU32 iter, PxU32 maxIter, PxReal dt);
|
||||
|
||||
void step(PxReal dt, CUstream stream, PxU32 nbFEMCloths, const PxVec3& gravity, bool adaptiveCollisionPairUpdate, bool forceUpdateClothContactPairs);
|
||||
|
||||
void finalizeVelocities(PxReal dt);
|
||||
|
||||
void constraintPrep(PxgDevicePointer<PxgPrePrepDesc> prePrepDescd, PxgDevicePointer<PxgConstraintPrepareDesc> prepDescd,
|
||||
PxReal invDt, PxgDevicePointer<PxgSolverSharedDescBase> sharedDescd, CUstream solverStream,
|
||||
PxU32 nbSolverBodies, PxU32 nbArticulations);
|
||||
|
||||
bool updateUserData(PxPinnedArray<PxgFEMCloth>& femClothPool, PxArray<PxU32>& femClothNodeIndexPool, const PxU32* activeFEMCloths,
|
||||
PxU32 nbActiveFEMCloths, void** bodySimsLL);
|
||||
|
||||
CUstream getStream() { return mStream; }
|
||||
|
||||
void partitionTriangleSimData(PxgFEMCloth& femCloth, PxgFEMClothData& clothData, PxArray<PxU32>& orderedTriangles,
|
||||
const PxArray<PxU32>& activeTriangles, PxsHeapMemoryAllocator* alloc);
|
||||
void partitionTrianglePairSimData(PxgFEMCloth& femCloth, PxgFEMClothData& clothData, PxU32 maximumPartitions,
|
||||
PxArray<PxU32>& orderedTrianglePairs, const PxArray<PxU32>& activeTrianglePairs,
|
||||
const PxArray<uint4>& trianglePairVertexIndices, bool isSharedTrianglePair,
|
||||
PxsHeapMemoryAllocator* alloc);
|
||||
|
||||
PxgCudaPagedLinearAllocator<PxgHeapMemoryAllocator>& getStackAllocator() { return mIntermStackAlloc; }
|
||||
|
||||
PX_FORCE_INLINE PxU32 getMaxContacts() { return mMaxContacts; }
|
||||
|
||||
void applyDamping(PxU32 nbActiveFemClothes, PxReal dt, CUstream stream);
|
||||
|
||||
// Apply position delta change original triangle mesh
|
||||
void applyExternalDelta(PxU32 nbActiveFemClothes, PxReal dt, CUstream stream);
|
||||
|
||||
void drawContacts(PxRenderOutput& out);
|
||||
|
||||
void syncCloths();
|
||||
|
||||
void createActivatedDeactivatedLists();
|
||||
|
||||
private:
|
||||
void preIntegrateSystem(PxgFEMCloth* femClothsd, PxU32* activeFemCloths, PxU32 nbActiveFemCloths, PxU32 maxVertices,
|
||||
const PxVec3& gravity, PxReal dt, CUstream bpStream);
|
||||
|
||||
void prepRigidContactConstraint(PxgDevicePointer<PxgPrePrepDesc> prePrepDescd, PxgDevicePointer<PxgConstraintPrepareDesc> prepDescd,
|
||||
PxReal invDt, PxgDevicePointer<PxgSolverSharedDescBase> sharedDescd, CUstream solverStream,
|
||||
PxU32 numSolverBodies, PxU32 numArticulations);
|
||||
|
||||
void prepRigidAttachmentConstraints(PxgDevicePointer<PxgPrePrepDesc> prePrepDescd,
|
||||
PxgDevicePointer<PxgConstraintPrepareDesc> prepDescd, PxReal /*invDt*/,
|
||||
PxgDevicePointer<PxgSolverSharedDescBase> sharedDescd, CUstream stream);
|
||||
|
||||
void prepClothAttachmentConstraints(CUstream stream);
|
||||
|
||||
void prepClothParticleConstraint();
|
||||
|
||||
// These method are running at the cloth stream
|
||||
void prepClothContactConstraint(bool isVT);
|
||||
|
||||
void solveShellEnergy(PxgFEMCloth* femClothsd, PxgDevicePointer<PxU32> activeFEMClothsd, PxU32 nbActiveFEMCloths, PxReal dt);
|
||||
void solveNonSharedTriangles(PxgFEMCloth* femClothsd, PxgDevicePointer<PxU32> activeFEMClothsd, PxU32 nbActiveFEMCloths, PxReal dt);
|
||||
void solveTrianglePairs(PxgFEMCloth* femClothsd, PxgDevicePointer<PxU32> activeFEMClothsd, PxU32 nbActiveFEMCloths, PxReal dt,
|
||||
bool isSharedTrianglePair);
|
||||
|
||||
void queryRigidContactReferenceCount(PxgDevicePointer<PxgPrePrepDesc> prePrepDescd,
|
||||
PxgDevicePointer<PxgSolverCoreDesc> solverCoreDescd,
|
||||
PxgDevicePointer<PxgSolverSharedDescBase> sharedDescd,
|
||||
PxgDevicePointer<PxgArticulationCoreDesc> artiCoreDescd, CUstream solverStream, PxReal dt);
|
||||
|
||||
// Solve cloth vs rigid body contact
|
||||
void solveClothRigidContacts(PxgDevicePointer<PxgPrePrepDesc> prePrepDescd, PxgDevicePointer<PxgSolverCoreDesc> solverCoreDescd,
|
||||
PxgDevicePointer<PxgSolverSharedDescBase> sharedDescd,
|
||||
PxgDevicePointer<PxgArticulationCoreDesc> artiCoreDescd, CUstream solverStream, PxReal dt);
|
||||
|
||||
// Solve cloth vs rigid body attachment
|
||||
void solveClothRigidAttachment(PxgDevicePointer<PxgPrePrepDesc> prePrepDescd, PxgDevicePointer<PxgSolverCoreDesc> solverCoreDescd,
|
||||
PxgDevicePointer<PxgSolverSharedDescBase> sharedDescd,
|
||||
PxgDevicePointer<PxgArticulationCoreDesc> artiCoreDescd, CUstream solverStream, PxReal dt);
|
||||
|
||||
void solveClothAttachmentDelta();
|
||||
|
||||
void prepareClothClothCollision(bool forceUpdateClothContactPairs, bool adaptiveCollisionPairUpdate, PxReal dt);
|
||||
|
||||
void solveClothClothCollision(PxU32 nbActiveFEMCloths, PxReal dt);
|
||||
|
||||
// Solve cloth vs cloth contact and output to cloth delta buffer
|
||||
void solveClothContactsOutputClothDelta(PxReal dt, bool isVT);
|
||||
|
||||
// Solve cloth vs particle contact and output to cloth delta buffer
|
||||
void solveParticleContactsOutputClothDelta(CUstream particleStream);
|
||||
|
||||
// Solve cloth vs particle contact and output to particle delta buffer
|
||||
void solveParticleContactsOutputParticleDelta(CUstream particleStream);
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
|
||||
PxgTypedCudaBuffer<PxU8> mUpdateClothContactPairs; // When set to 1, updates the cloth-cloth contact pairs.
|
||||
|
||||
CUevent mBoundUpdateEvent; // This event is used to synchronize the broad phase stream(updateBound is running on
|
||||
// broad phase stream) and mStream
|
||||
|
||||
CUevent mSolveRigidEvent; // This event is recorded at the solver stream and the cloth stream need to wait for
|
||||
// that event finish before it processes
|
||||
CUevent mConstraintPrepParticleEvent; // This event is used to synchronize constraint prep(cloth stream) and
|
||||
// solve cloth vs particle system contacts (particle stream)
|
||||
CUevent mSolveParticleEvent; // This event is used to synchronize particle system contacts (particle stream)
|
||||
// before we call applyExternalTetraDelta
|
||||
|
||||
public:
|
||||
PxArray<Dy::DeformableSurface*> mActivatingDeformableSurfaces;
|
||||
PxArray<Dy::DeformableSurface*> mDeactivatingDeformableSurfaces;
|
||||
PxPostSolveCallback* mPostSolveCallback;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
46
engine/third_party/physx/source/gpusimulationcontroller/include/PxgFEMClothCoreKernelIndices.h
vendored
Normal file
46
engine/third_party/physx/source/gpusimulationcontroller/include/PxgFEMClothCoreKernelIndices.h
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_FEMCLOTH_CORE_KERNEL_INDICES_H
|
||||
#define PXG_FEMCLOTH_CORE_KERNEL_INDICES_H
|
||||
|
||||
namespace physx
|
||||
{
|
||||
|
||||
struct PxgFEMClothKernelBlockDim
|
||||
{
|
||||
enum
|
||||
{
|
||||
CLOTH_PREINTEGRATION = 512,
|
||||
CLOTH_STEP = 1024,
|
||||
CLOTH_SOLVESHELL = 128
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
543
engine/third_party/physx/source/gpusimulationcontroller/include/PxgFEMCore.h
vendored
Normal file
543
engine/third_party/physx/source/gpusimulationcontroller/include/PxgFEMCore.h
vendored
Normal file
@@ -0,0 +1,543 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_FEM_CORE_H
|
||||
#define PXG_FEM_CORE_H
|
||||
|
||||
#include "PxgNonRigidCoreCommon.h"
|
||||
#include "PxgSimulationCoreDesc.h"
|
||||
#include "PxNodeIndex.h"
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
|
||||
#include <vector_types.h>
|
||||
|
||||
namespace physx
|
||||
{
|
||||
struct PxgPrePrepDesc;
|
||||
struct PxgSolverCoreDesc;
|
||||
struct PxgSolverSharedDescBase;
|
||||
struct PxgArticulationCoreDesc;
|
||||
|
||||
//rigid vs soft body : pairInd0 is rigid , pairInd1 is soft body
|
||||
//particle vs soft body : pairInd0 is particle, pairInd1 is soft body
|
||||
//soft body vs soft body : pairInd0 is soft body, pairInd1 is soft body
|
||||
//soft body vs fem cloth : pairInd0 is soft body, pairInd1 is fem cloth
|
||||
//fem cloth vs fem cloth : pairInd0 is fem cloth, pairInd1 is fem cloth
|
||||
struct PX_ALIGN_PREFIX(16) PxgFemFemContactInfo
|
||||
{
|
||||
PxU64 pairInd0;
|
||||
PxU32 pairInd1;
|
||||
PxU32 auxInd; // Packed 32-bit auxiliary data
|
||||
|
||||
// Bit layout of auxInd (32 bits total):
|
||||
//
|
||||
// | 31 | 30 | 29 | 28 - 14 | 13 - 0 |
|
||||
// | Valid | PairType | InCollision| AuxInd1 (15 bits)| AuxInd0 (14b) |
|
||||
//
|
||||
// - Bits [0-13] : AuxInd0. First auxiliary index (14 bits, max value 16,383)
|
||||
// - Bits [14-28] : AuxInd1. Second auxiliary index (15 bits, max value 32,767)
|
||||
// - Bit [29] : isInCollision flag (0 = not in contact, 1 = in contact)
|
||||
// - Bit [30] : Pair type flag (0 = Vertex-Triangle, 1 = Edge-Edge)
|
||||
// - Bit [31] : Validity flag (0 = Invalid, 1 = Valid)
|
||||
|
||||
// Bit layout masks and shifts for auxInd
|
||||
static constexpr PxU32 AUX_IND0_BITS = 14;
|
||||
static constexpr PxU32 AUX_IND1_BITS = 15;
|
||||
|
||||
static constexpr PxU32 AUX_IND0_MASK = (1u << AUX_IND0_BITS) - 1; // 0x00003FFF
|
||||
static constexpr PxU32 AUX_IND1_MASK = ((1u << AUX_IND1_BITS) - 1) << AUX_IND0_BITS; // 0x1FFFC000
|
||||
static constexpr PxU32 AUX_IND1_SHIFT = AUX_IND0_BITS;
|
||||
|
||||
static constexpr PxU32 CONTACT_FLAG_SHIFT = 29;
|
||||
static constexpr PxU32 PAIR_TYPE_FLAG_SHIFT = 30;
|
||||
static constexpr PxU32 VALIDITY_FLAG_SHIFT = 31;
|
||||
|
||||
static constexpr PxU32 CONTACT_FLAG_MASK = 1u << CONTACT_FLAG_SHIFT;
|
||||
static constexpr PxU32 PAIR_TYPE_FLAG_MASK = 1u << PAIR_TYPE_FLAG_SHIFT;
|
||||
static constexpr PxU32 VALIDITY_FLAG_MASK = 1u << VALIDITY_FLAG_SHIFT;
|
||||
|
||||
// Set auxiliary indices
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void setAuxInd0(PxU32 id) { auxInd = (auxInd & ~AUX_IND0_MASK) | (id & AUX_IND0_MASK); }
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void setAuxInd1(PxU32 id)
|
||||
{
|
||||
auxInd = (auxInd & ~AUX_IND1_MASK) | ((id & ((1u << AUX_IND1_BITS) - 1)) << AUX_IND1_SHIFT);
|
||||
}
|
||||
|
||||
// Get auxiliary indices
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU32 getAuxInd0() const { return auxInd & AUX_IND0_MASK; }
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU32 getAuxInd1() const { return (auxInd >> AUX_IND1_SHIFT) & ((1u << AUX_IND1_BITS) - 1); }
|
||||
|
||||
// Mark validity
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void markInvalid() { auxInd &= ~VALIDITY_FLAG_MASK; }
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void markValid() { auxInd |= VALIDITY_FLAG_MASK; }
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void markValidity(bool isValid)
|
||||
{
|
||||
auxInd = (auxInd & ~VALIDITY_FLAG_MASK) | (static_cast<PxU32>(isValid) << VALIDITY_FLAG_SHIFT);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE bool isValidPair() const { return (auxInd & VALIDITY_FLAG_MASK) != 0; }
|
||||
|
||||
// Mark pair type
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void markVertexTrianglePair() { auxInd &= ~PAIR_TYPE_FLAG_MASK; }
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void markEdgeEdgePair() { auxInd |= PAIR_TYPE_FLAG_MASK; }
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE bool isVertexTrianglePair() const { return (auxInd & PAIR_TYPE_FLAG_MASK) == 0; }
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE bool isEdgeEdgePair() const { return (auxInd & PAIR_TYPE_FLAG_MASK) != 0; }
|
||||
|
||||
// Mark collision status (bit 29)
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void markInCollision(bool inContact)
|
||||
{
|
||||
auxInd = (auxInd & ~CONTACT_FLAG_MASK) | (static_cast<PxU32>(inContact) << CONTACT_FLAG_SHIFT);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE bool isInCollision() const { return (auxInd & CONTACT_FLAG_MASK) != 0; }
|
||||
|
||||
} PX_ALIGN_SUFFIX(16);
|
||||
|
||||
struct PX_ALIGN_PREFIX(16) PxgFemOtherContactInfo
|
||||
{
|
||||
PxU64 pairInd0; // Rigid/Particle
|
||||
PxU32 pairInd1; // Fem id
|
||||
PxU32 rigidMatInd_isInCollision;
|
||||
|
||||
// rigidMatInd_isInCollision encodes both rigid material index and collision state
|
||||
// Bit layout of rigidMatInd_isInCollision (32 bits total):
|
||||
//
|
||||
// | 31 | 30 - 0 |
|
||||
// |-------------|---------------------|
|
||||
// | isCollision | rigidMaterialIndex |
|
||||
//
|
||||
// - Bits [0-30] : rigidMaterialIndex (up to 2^31 materials)
|
||||
// - Bit [31] : isInCollision flag (0 = not in contact, 1 = in contact)
|
||||
|
||||
static constexpr PxU32 COLLISION_FLAG_BIT = 31;
|
||||
static constexpr PxU32 COLLISION_FLAG_MASK = 1u << COLLISION_FLAG_BIT;
|
||||
static constexpr PxU32 RIGID_MAT_INDEX_MASK = ~COLLISION_FLAG_MASK;
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void setRigidMaterialIndex(PxU32 matIndex)
|
||||
{
|
||||
rigidMatInd_isInCollision = (rigidMatInd_isInCollision & COLLISION_FLAG_MASK) | (matIndex & RIGID_MAT_INDEX_MASK);
|
||||
}
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void markInCollision(bool inContact)
|
||||
{
|
||||
if(inContact)
|
||||
rigidMatInd_isInCollision |= COLLISION_FLAG_MASK;
|
||||
else
|
||||
rigidMatInd_isInCollision &= ~COLLISION_FLAG_MASK;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU32 getRigidMaterialIndex() const { return rigidMatInd_isInCollision & RIGID_MAT_INDEX_MASK; }
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE bool isInCollision() const { return (rigidMatInd_isInCollision & COLLISION_FLAG_MASK) != 0; }
|
||||
}
|
||||
PX_ALIGN_SUFFIX(16);
|
||||
|
||||
PX_COMPILE_TIME_ASSERT(sizeof(PxgFemFemContactInfo) == sizeof(PxgFemOtherContactInfo));
|
||||
|
||||
struct PxgFemRigidConstraintBlock
|
||||
{
|
||||
// resp represents the rigid-body term in the denominator of the impulse calculation (also referred to as the velocity multiplier
|
||||
// internally). Also refer to PBD (Position-Based Dynamics) papers.
|
||||
float4 raXn_resp[32];
|
||||
float4 raXnF0_resp[32];
|
||||
float4 raXnF1_resp[32];
|
||||
|
||||
float4 normal_errorW[32];
|
||||
|
||||
// Friction tangent + invMass of the rigid body (avoids needing to read the mass)
|
||||
// Second tangent can be found by cross producting normal with fricTan0
|
||||
float4 fricTan0_invMass0[32];
|
||||
float4 barycentric[32];
|
||||
PxReal maxPenBias[32];
|
||||
};
|
||||
|
||||
struct PxgFEMParticleConstraintBlock
|
||||
{
|
||||
float4 normal_pen[32];
|
||||
float4 barycentric[32];
|
||||
PxReal velMultiplier[32];
|
||||
};
|
||||
|
||||
struct PxgFEMRigidAttachmentConstraint
|
||||
{
|
||||
float4 baryOrType[32];
|
||||
float4 raXn0_biasW[32];
|
||||
float4 raXn1_biasW[32];
|
||||
float4 raXn2_biasW[32];
|
||||
float4 velMultiplierXYZ_invMassW[32];
|
||||
float4 low_high_limits[32];
|
||||
float4 axis_angle[32];
|
||||
PxU32 elemId[32];
|
||||
PxU64 rigidId[32]; //node index
|
||||
};
|
||||
|
||||
struct PxgFEMFEMAttachmentConstraint
|
||||
{
|
||||
union
|
||||
{
|
||||
float4 low_high_limits[32];
|
||||
float4 low_high_angles[32];
|
||||
};
|
||||
|
||||
union
|
||||
{
|
||||
float4 axis_angle[32];
|
||||
float4 attachmentBarycentric[32];
|
||||
};
|
||||
|
||||
float4 barycentric0[32];
|
||||
float4 barycentric1[32];
|
||||
PxU64 elemId0[32]; //can be triangleId(cloth) or tetrahedron index
|
||||
PxU64 elemId1[32];//can be triangleId(cloth) or tetrahedron index
|
||||
float constraintOffset[32];
|
||||
};
|
||||
|
||||
|
||||
class PxgFEMCore : public PxgNonRigidCore
|
||||
{
|
||||
public:
|
||||
|
||||
PxgFEMCore(PxgCudaKernelWranglerManager* gpuKernelWrangler, PxCudaContextManager* cudaContextManager,
|
||||
PxgHeapMemoryAllocatorManager* heapMemoryManager, PxgSimulationController* simController,
|
||||
PxgGpuContext* context, const PxU32 maxContacts, const PxU32 collisionStackSize, bool isTGS, PxsHeapStats::Enum statType);
|
||||
|
||||
virtual ~PxgFEMCore();
|
||||
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<float4>& getRigidContacts() { return mRigidContactPointBuf; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<float4>& getRigidNormalPens() { return mRigidContactNormalPenBuf; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<float4>& getRigidBarycentrics() { return mRigidContactBarycentricBuf; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgFemOtherContactInfo>& getRigidContactInfos() { return mRigidContactInfoBuf; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxU32>& getRigidContactCount() { return mRigidTotalContactCountBuf; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxU32>& getPrevRigidContactCount() { return mRigidPrevContactCountBuf; }
|
||||
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<float4>& getFemContacts() { return mFemContactPointBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<float4>& getFemNormalPens() { return mFemContactNormalPenBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<float4>& getFemBarycentrics0() { return mFemContactBarycentric0Buffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<float4>& getFemBarycentrics1() { return mFemContactBarycentric1Buffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgFemFemContactInfo>& getVolumeContactOrVTContactInfos() { return mVolumeContactOrVTContactInfoBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgFemFemContactInfo>& getEEContactInfos() { return mEEContactInfoBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxU32>& getVolumeContactOrVTContactCount() { return mVolumeContactOrVTContactCountBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxU32>& getEEContactCount() { return mEEContactCountBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxU32>& getPrevFemContactCount() { return mPrevFemContactCountBuffer; }
|
||||
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<float4>& getParticleContacts() { return mParticleContactPointBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<float4>& getParticleNormalPens() { return mParticleContactNormalPenBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<float4>& getParticleBarycentrics() { return mParticleContactBarycentricBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgFemOtherContactInfo>& getParticleContactInfos() { return mParticleContactInfoBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxU32>& getParticleContactCount() { return mParticleTotalContactCountBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxU32>& getPrevParticleContactCount() { return mPrevParticleContactCountBuffer; }
|
||||
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxReal> getSpeculativeCCDContactOffset() { return mSpeculativeCCDContactOffset.getTypedDevicePtr(); }
|
||||
|
||||
void reserveRigidDeltaVelBuf(PxU32 newCapacity);
|
||||
|
||||
void reorderRigidContacts();
|
||||
|
||||
void copyContactCountsToHost();
|
||||
|
||||
protected:
|
||||
|
||||
void accumulateRigidDeltas(PxgDevicePointer<PxgPrePrepDesc> prePrepDescd, PxgDevicePointer<PxgSolverCoreDesc> solverCoreDescd,
|
||||
PxgDevicePointer<PxgSolverSharedDescBase> sharedDescd, PxgDevicePointer<PxgArticulationCoreDesc> artiCoreDescd,
|
||||
PxgDevicePointer<PxNodeIndex> rigidIdsd, PxgDevicePointer<PxU32> numIdsd, CUstream stream, bool isTGS);
|
||||
|
||||
//rigid body and fem contacts
|
||||
PxgTypedCudaBuffer<float4> mRigidContactPointBuf; //float4
|
||||
PxgTypedCudaBuffer<float4> mRigidContactNormalPenBuf; //float4
|
||||
PxgTypedCudaBuffer<float4> mRigidContactBarycentricBuf; //float4
|
||||
PxgTypedCudaBuffer<PxgFemOtherContactInfo> mRigidContactInfoBuf;
|
||||
PxgTypedCudaBuffer<PxU32> mRigidTotalContactCountBuf; //PxU32
|
||||
PxgTypedCudaBuffer<PxU32> mRigidPrevContactCountBuf; //PxU32
|
||||
|
||||
PxgTypedCudaBuffer<float4> mRigidSortedContactPointBuf;
|
||||
PxgTypedCudaBuffer<float4> mRigidSortedContactNormalPenBuf;
|
||||
PxgTypedCudaBuffer<float4> mRigidSortedContactBarycentricBuf; //float4
|
||||
PxgTypedCudaBuffer<PxU64> mRigidSortedRigidIdBuf;
|
||||
PxgTypedCudaBuffer<PxgFemOtherContactInfo> mRigidSortedContactInfoBuf;
|
||||
|
||||
// Reference count of each rigid body that interacts with deformable objects.
|
||||
// A single rigid body can have multiple reference counts when it is in contact with multiple triangles, tetrahedra, vertices, etc.
|
||||
// from deformable surfaces or deformable bodies. Currently, this is used only for contact constraints, but it can also be used for
|
||||
// attachment constraints.
|
||||
PxgTypedCudaBuffer<PxU32> mFemRigidReferenceCount;
|
||||
|
||||
//fem vs fem and fem self collision contacts
|
||||
PxgTypedCudaBuffer<float4> mFemContactPointBuffer; //float4
|
||||
PxgTypedCudaBuffer<float4> mFemContactNormalPenBuffer; //float4
|
||||
PxgTypedCudaBuffer<float4> mFemContactBarycentric0Buffer; //float4
|
||||
PxgTypedCudaBuffer<float4> mFemContactBarycentric1Buffer; //float4
|
||||
PxgTypedCudaBuffer<PxgFemFemContactInfo> mVolumeContactOrVTContactInfoBuffer;
|
||||
PxgTypedCudaBuffer<PxgFemFemContactInfo> mEEContactInfoBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mVolumeContactOrVTContactCountBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mEEContactCountBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mPrevFemContactCountBuffer;
|
||||
|
||||
PxgTypedCudaBuffer<PxReal> mSpeculativeCCDContactOffset;
|
||||
|
||||
//fem body vs particle system collision contacts
|
||||
PxgTypedCudaBuffer<float4> mParticleContactPointBuffer; //float4
|
||||
PxgTypedCudaBuffer<float4> mParticleContactNormalPenBuffer; //float4
|
||||
PxgTypedCudaBuffer<float4> mParticleContactBarycentricBuffer; //float4
|
||||
PxgTypedCudaBuffer<PxgFemOtherContactInfo> mParticleContactInfoBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mParticleTotalContactCountBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mPrevParticleContactCountBuffer;
|
||||
|
||||
PxgTypedCudaBuffer<float4> mParticleSortedContactPointBuffer;
|
||||
PxgTypedCudaBuffer<float4> mParticleSortedContactBarycentricBuffer; //float4
|
||||
PxgTypedCudaBuffer<float4> mParticleSortedContactNormalPenBuffer;
|
||||
PxgTypedCudaBuffer<PxgFemOtherContactInfo> mParticleSortedContactInfoBuffer;
|
||||
|
||||
|
||||
//contact prep buffer
|
||||
PxgTypedCudaBuffer<PxgFemRigidConstraintBlock> mRigidConstraintBuf; //constraint prep for rigid body vs fem
|
||||
PxgCudaBuffer mFemConstraintBuf; //constraint prep for fem vs fem(including self collision)
|
||||
PxgTypedCudaBuffer<PxgFEMParticleConstraintBlock> mParticleConstraintBuf; //constraint prep for particle vs fem
|
||||
|
||||
//To do: ideally, we want to use two separate stream to solve the rigid body collision
|
||||
PxgTypedCudaBuffer<PxReal> mRigidFEMAppliedForcesBuf;
|
||||
|
||||
PxgTypedCudaBuffer<float4> mFemAppliedForcesBuf; //applied force for fem due to collision between fem and fem or self collision
|
||||
|
||||
PxgTypedCudaBuffer<float4> mParticleAppliedFEMForcesBuf; //applied force for fem due to collision between particle system and fem
|
||||
PxgTypedCudaBuffer<float4> mParticleAppliedParticleForcesBuf; //applied force for particle system due to collision between particle system and fem
|
||||
|
||||
PxgTypedCudaBuffer<float4> mRigidDeltaVelBuf;
|
||||
|
||||
//Temp buffer to accumulate rigid delta velocity changes
|
||||
PxgTypedCudaBuffer<PxVec4> mTempBlockDeltaVelBuf;
|
||||
PxgTypedCudaBuffer<PxU64> mTempBlockRigidIdBuf;
|
||||
|
||||
//Temp buffer for sorted particle contacts
|
||||
PxgCudaBuffer mTempCellsHistogramBuf;
|
||||
PxgTypedCudaBuffer<PxU32> mTempBlockCellsHistogramBuf;
|
||||
PxgTypedCudaBuffer<PxU32> mTempHistogramCountBuf;
|
||||
|
||||
bool mIsTGS;
|
||||
PxU32* mRigidContactCountPrevTimestep; //Pinned memory
|
||||
PxU32* mVolumeContactorVTContactCountPrevTimestep; //Pinned memory
|
||||
PxU32* mEEContactCountPrevTimestep; //Pinned memory
|
||||
PxU32* mParticleContactCountPrevTimestep; //Pinned memory
|
||||
|
||||
#if PX_ENABLE_SIM_STATS
|
||||
PxU32 mContactCountStats; // for simStats.
|
||||
#else
|
||||
PX_CATCH_UNDEFINED_ENABLE_SIM_STATS
|
||||
#endif
|
||||
|
||||
CUevent mFinalizeEvent;
|
||||
};
|
||||
|
||||
|
||||
struct PxgFEMContactWriter
|
||||
{
|
||||
float4* outPoint;
|
||||
float4* outNormalPen;
|
||||
float4* outBarycentric;
|
||||
PxgFemOtherContactInfo* outContactInfo;
|
||||
PxU32* totalContactCount;
|
||||
|
||||
//Buffers for sorting. X either stands for Rigid or Particle
|
||||
PxU64* contactByX; //value
|
||||
PxU32* tempContactByX; //the lower 32 bit o value
|
||||
PxU32* contactIndexSortedByX; //rank
|
||||
PxU32* contactSortedByX; // AD: I'm not sure what we use this for. We do use the underlying buffer as the output buffer of RS_COPY_VALUE, so either we're skipping the radix sort in some cases or we write some unnecessary stuff here.
|
||||
|
||||
PxU32 maxNumContacts;
|
||||
|
||||
PxgFEMContactWriter(PxgFEMCore* femCore, bool useParticleForSorting = false)
|
||||
{
|
||||
//Ensure a rigid index has the same size as an encoded particle index
|
||||
PX_COMPILE_TIME_ASSERT(sizeof(PxNodeIndex) == sizeof(PxU64));
|
||||
|
||||
if (useParticleForSorting)
|
||||
{
|
||||
outPoint = femCore->getParticleContacts().getTypedPtr();
|
||||
outNormalPen = femCore->getParticleNormalPens().getTypedPtr();
|
||||
outBarycentric = femCore->getParticleBarycentrics().getTypedPtr();
|
||||
outContactInfo = femCore->getParticleContactInfos().getTypedPtr();
|
||||
totalContactCount = femCore->getParticleContactCount().getTypedPtr();
|
||||
|
||||
contactByX = femCore->getContactSortedByParticle().getTypedPtr();
|
||||
tempContactByX = femCore->getTempContactByParticle().getTypedPtr();
|
||||
contactIndexSortedByX = femCore->getContactRemapSortedByParticle().getTypedPtr();
|
||||
contactSortedByX = NULL; //Does not exist for particles
|
||||
}
|
||||
else
|
||||
{
|
||||
outPoint = femCore->getRigidContacts().getTypedPtr();
|
||||
outNormalPen = femCore->getRigidNormalPens().getTypedPtr();
|
||||
outBarycentric = femCore->getRigidBarycentrics().getTypedPtr();
|
||||
outContactInfo = femCore->getRigidContactInfos().getTypedPtr();
|
||||
totalContactCount = femCore->getRigidContactCount().getTypedPtr();
|
||||
|
||||
contactByX = reinterpret_cast<PxU64*>(femCore->getContactByRigid().getTypedPtr());
|
||||
tempContactByX = femCore->getTempContactByRigid().getTypedPtr();
|
||||
contactIndexSortedByX = femCore->getContactRemapSortedByRigid().getTypedPtr();
|
||||
contactSortedByX = reinterpret_cast<PxU32*>(femCore->getContactSortedByRigid().getTypedPtr()); //Cast from larger type to smaller type - no memory overflow
|
||||
}
|
||||
maxNumContacts = femCore->mMaxContacts;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE bool writeContactCore(PxU32 index, const float4& contact, const float4& normalPen, PxU64 rigidId)
|
||||
{
|
||||
if (index >= maxNumContacts)
|
||||
return false;
|
||||
|
||||
contactByX[index] = rigidId;
|
||||
tempContactByX[index] = PxU32(rigidId & 0xffffffff);
|
||||
contactIndexSortedByX[index] = index;
|
||||
|
||||
outPoint[index] = contact;
|
||||
outNormalPen[index] = normalPen;
|
||||
/*outContactInfo[index].pairInd0 = pairInd0;
|
||||
outContactInfo[index].pairInd1 = pairInd1;*/
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE bool writeContactNoBarycentric(PxU32 index, const float4& contact, const float4& normalPen,
|
||||
PxU64 pairInd0, PxU32 pairInd1, PxU64 rigidId)
|
||||
{
|
||||
if (index >= maxNumContacts)
|
||||
return false;
|
||||
|
||||
contactByX[index] = rigidId;
|
||||
tempContactByX[index] = PxU32(rigidId & 0xffffffff);
|
||||
contactIndexSortedByX[index] = index;
|
||||
|
||||
outPoint[index] = contact;
|
||||
outNormalPen[index] = normalPen;
|
||||
outContactInfo[index].pairInd0 = pairInd0;
|
||||
outContactInfo[index].pairInd1 = pairInd1;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE bool writeRigidVsDeformableContactNoBarycentric(PxU32 index, const float4& contact, const float4& normalPen,
|
||||
PxU64 pairInd0, PxU32 pairInd1, PxU64 rigidId, PxU32 rigidBodyMaterialId)
|
||||
{
|
||||
if (index >= maxNumContacts)
|
||||
return false;
|
||||
|
||||
contactByX[index] = rigidId;
|
||||
tempContactByX[index] = PxU32(rigidId & 0xffffffff);
|
||||
contactIndexSortedByX[index] = index;
|
||||
|
||||
outPoint[index] = contact;
|
||||
outNormalPen[index] = normalPen;
|
||||
PxgFemOtherContactInfo* ptr = reinterpret_cast<PxgFemOtherContactInfo*>(&outContactInfo[index]);
|
||||
ptr->pairInd0 = pairInd0;
|
||||
ptr->pairInd1 = pairInd1;
|
||||
ptr->setRigidMaterialIndex(rigidBodyMaterialId);
|
||||
ptr->markInCollision(false);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE bool writeContact(PxU32 index, const float4& contact, const float4& normalPen, const float4& barycentric,
|
||||
PxU64 pairInd0, PxU32 pairInd1, PxU64 id)
|
||||
{
|
||||
if (index >= maxNumContacts)
|
||||
return false;
|
||||
|
||||
writeContactNoBarycentric(index, contact, normalPen, pairInd0, pairInd1, id);
|
||||
outBarycentric[index] = barycentric;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE bool writeContact(PxU32 index, const float4& contact, const float4& normalPen, const float4& barycentric,
|
||||
PxU64 pairInd0, PxU32 pairInd1, PxNodeIndex rigidId)
|
||||
{
|
||||
return writeContact(index, contact, normalPen, barycentric, pairInd0, pairInd1, rigidId.getInd());
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE bool writeRigidVsDeformableContact(PxU32 index, const float4& contact, const float4& normalPen, const float4& barycentric,
|
||||
PxU64 pairInd0, PxU32 pairInd1, PxU32 rigidMaterialIndex, PxNodeIndex rigidId)
|
||||
{
|
||||
bool result = writeContactCore(index, contact, normalPen, rigidId.getInd());
|
||||
if (result)
|
||||
{
|
||||
outBarycentric[index] = barycentric;
|
||||
PxgFemOtherContactInfo* femRigidInfo = reinterpret_cast<PxgFemOtherContactInfo*>(outContactInfo);
|
||||
femRigidInfo[index].pairInd0 = pairInd0;
|
||||
femRigidInfo[index].pairInd1 = pairInd1;
|
||||
femRigidInfo[index].setRigidMaterialIndex(rigidMaterialIndex);
|
||||
femRigidInfo[index].markInCollision(false);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE bool writeRigidVsDeformableContact32(PxU32 index, const float4& contact, const float4& normalPen, const float4& barycentric,
|
||||
PxU64 pairInd0, PxU32 pairInd1, PxU32 rigidMaterialIndex, PxU32 contactSortedByRigid_)
|
||||
{
|
||||
if (index >= maxNumContacts)
|
||||
return false;
|
||||
|
||||
outPoint[index] = contact;
|
||||
outNormalPen[index] = normalPen;
|
||||
outBarycentric[index] = barycentric;
|
||||
|
||||
PxgFemOtherContactInfo* femRigidInfo = reinterpret_cast<PxgFemOtherContactInfo*>(outContactInfo);
|
||||
femRigidInfo[index].pairInd0 = pairInd0;
|
||||
femRigidInfo[index].pairInd1 = pairInd1;
|
||||
femRigidInfo[index].setRigidMaterialIndex(rigidMaterialIndex);
|
||||
femRigidInfo[index].markInCollision(false);
|
||||
|
||||
contactSortedByX[index] = contactSortedByRigid_;
|
||||
contactIndexSortedByX[index] = index;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE bool writeContact32(PxU32 index, const float4& contact, const float4& normalPen, const float4& barycentric,
|
||||
PxU64 pairInd0, PxU32 pairInd1, PxU32 contactSortedByRigid_)
|
||||
{
|
||||
if (index >= maxNumContacts)
|
||||
return false;
|
||||
|
||||
outPoint[index] = contact;
|
||||
outNormalPen[index] = normalPen;
|
||||
outBarycentric[index] = barycentric;
|
||||
outContactInfo[index].pairInd0 = pairInd0;
|
||||
outContactInfo[index].pairInd1 = pairInd1;
|
||||
|
||||
contactSortedByX[index] = contactSortedByRigid_;
|
||||
contactIndexSortedByX[index] = index;
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
212
engine/third_party/physx/source/gpusimulationcontroller/include/PxgInterpolation.h
vendored
Normal file
212
engine/third_party/physx/source/gpusimulationcontroller/include/PxgInterpolation.h
vendored
Normal file
@@ -0,0 +1,212 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_INTERPOLATION_H
|
||||
#define PXG_INTERPOLATION_H
|
||||
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
#include "foundation/PxMat44.h"
|
||||
#include "foundation/PxMathUtils.h"
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
namespace physx
|
||||
{
|
||||
#endif
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxQuat rotateAroundAxis(const PxQuat& q, PxVec3 rotationAxis, PxReal angle)
|
||||
{
|
||||
PxReal mag2 = rotationAxis.magnitudeSquared();
|
||||
if (mag2 < 1e-8f || angle < 1e-3f)
|
||||
return q;
|
||||
rotationAxis *= 1.0f / PxSqrt(mag2);
|
||||
const PxQuat rot(angle, rotationAxis);
|
||||
return rot * q;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxReal getT(PxReal t, PxReal alpha, const PxVec3& p0, const PxVec3& p1)
|
||||
{
|
||||
PxVec3 d = p1 - p0;
|
||||
PxReal a = d.magnitudeSquared();
|
||||
PxReal b = PxPow(a, alpha * 0.5f);
|
||||
return (b + t);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxReal lerp(PxReal p0, PxReal p1, PxReal t)
|
||||
{
|
||||
return (1.0f - t) * p0 + t * p1;
|
||||
}
|
||||
|
||||
//https://en.wikipedia.org/wiki/Centripetal_CatmullRom_spline
|
||||
inline PX_CUDA_CALLABLE PxVec3 evaluateSpline(const PxVec3& p0, const PxVec3& p1, const PxVec3& p2,
|
||||
const PxVec3& p3, PxReal t, PxReal alpha = 0.5f)
|
||||
{
|
||||
PxReal t0 = 0.0f;
|
||||
PxReal t1 = getT(t0, alpha, p0, p1);
|
||||
PxReal t2 = getT(t1, alpha, p1, p2);
|
||||
PxReal t3 = getT(t2, alpha, p2, p3);
|
||||
t = lerp(t1, t2, t);
|
||||
PxVec3 A1 = ((t1 - t) * p0 + (t - t0) * p1) * (1.0f / PxMax(0.0001f, t1 - t0));
|
||||
PxVec3 A2 = ((t2 - t) * p1 + (t - t1) * p2)* (1.0f / PxMax(0.0001f, t2 - t1));
|
||||
PxVec3 A3 = ((t3 - t) * p2 + (t - t2) * p3)* (1.0f / PxMax(0.0001f, t3 - t2));
|
||||
PxVec3 B1 = ((t2 - t) * A1 + (t - t0) * A2)* (1.0f / PxMax(0.0001f, t2 - t0));
|
||||
PxVec3 B2 = ((t3 - t) * A2 + (t - t1) * A3)* (1.0f / PxMax(0.0001f, t3 - t1));
|
||||
PxVec3 C = ((t2 - t) * B1 + (t - t1) * B2)* (1.0f / PxMax(0.0001f, t2 - t1));
|
||||
return C;
|
||||
}
|
||||
|
||||
inline PX_CUDA_CALLABLE PxVec3 evaluateSpline(const PxVec3 controlPoints[4], PxU32 numControlPoints, PxReal t, PxReal alpha = 0.5f)
|
||||
{
|
||||
if (numControlPoints == 1)
|
||||
return controlPoints[0];
|
||||
return evaluateSpline(controlPoints[0], controlPoints[1], controlPoints[2], controlPoints[3], t, alpha);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxVec3 PxLoad3(const PxVec4& v)
|
||||
{
|
||||
PxVec4 tmp = v;
|
||||
return PxVec3(tmp.x, tmp.y, tmp.z);
|
||||
}
|
||||
|
||||
template<typename V4>
|
||||
inline PX_CUDA_CALLABLE PxVec3 load3(const V4& val, const PxMat44& transf)
|
||||
{
|
||||
return transf.transform(PxLoad3(val));
|
||||
}
|
||||
|
||||
//Handles boundary case and forwards to standard evaluateSpline
|
||||
template<typename V4>
|
||||
inline PX_CUDA_CALLABLE PxVec3 evaluateSpline(const V4* points, PxU32 nbPoints, PxU32 index, PxReal t, const PxMat44& transf, PxReal alpha = 0.5f)
|
||||
{
|
||||
if (nbPoints == 0)
|
||||
return PxVec3(0.0f);
|
||||
if (nbPoints < 2)
|
||||
return load3(points[0], transf);
|
||||
if (index > nbPoints - 2)
|
||||
index = nbPoints - 2;
|
||||
PxVec3 p0;
|
||||
if (index == 0)
|
||||
p0 = load3(points[0], transf) - (load3(points[1], transf) - load3(points[0], transf));
|
||||
else
|
||||
p0 = load3(points[index - 1], transf);
|
||||
PxVec3 p3;
|
||||
if (index == nbPoints - 2)
|
||||
p3 = load3(points[nbPoints - 1], transf) + (load3(points[nbPoints - 1], transf) - load3(points[nbPoints - 2], transf));
|
||||
else
|
||||
p3 = load3(points[index + 2], transf);
|
||||
return evaluateSpline(p0, load3(points[index], transf), load3(points[index + 1], transf), p3, t, alpha);
|
||||
}
|
||||
|
||||
template<typename V4>
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU32 catmullRomFetchLastPoint(const V4* points, PxU32 nbPoints, PxU32 index, PxVec3& controlPoint, const PxMat44& transf)
|
||||
{
|
||||
if (nbPoints == 0)
|
||||
{
|
||||
controlPoint = PxVec3(0.0f);
|
||||
return 1;
|
||||
}
|
||||
if (nbPoints < 2)
|
||||
{
|
||||
controlPoint = load3(points[0], transf);
|
||||
return 1;
|
||||
}
|
||||
if (index >= nbPoints - 2)
|
||||
{
|
||||
PxVec3 p = load3(points[nbPoints - 1], transf);
|
||||
controlPoint = p + (p - load3(points[nbPoints - 2], transf));
|
||||
}
|
||||
else
|
||||
controlPoint = load3(points[index + 2], transf);
|
||||
return 4;
|
||||
}
|
||||
|
||||
inline PX_CUDA_CALLABLE void insertNextControlPoint(PxVec3 controlPoints[4], const PxVec3& nextControlPoint)
|
||||
{
|
||||
controlPoints[0] = controlPoints[1];
|
||||
controlPoints[1] = controlPoints[2];
|
||||
controlPoints[2] = controlPoints[3];
|
||||
controlPoints[3] = nextControlPoint;
|
||||
}
|
||||
|
||||
template<typename V4>
|
||||
inline PX_CUDA_CALLABLE PxU32 catmullRomFetchControlPoints(const V4* points, PxU32 nbPoints, PxU32 index, PxVec3 controlPoints[4], const PxMat44& transf)
|
||||
{
|
||||
if (nbPoints == 0)
|
||||
{
|
||||
controlPoints[0] = PxVec3(0.0f);
|
||||
return 1;
|
||||
}
|
||||
if (nbPoints < 2)
|
||||
{
|
||||
controlPoints[0] = load3(points[0], transf);
|
||||
return 1;
|
||||
}
|
||||
if (index > nbPoints - 2)
|
||||
index = nbPoints - 2;
|
||||
if (index <= 0)
|
||||
{
|
||||
PxVec3 p = load3(points[0], transf);
|
||||
controlPoints[0] = p - (load3(points[1], transf) - p);
|
||||
}
|
||||
else
|
||||
controlPoints[0] = load3(points[index - 1], transf);
|
||||
if (index >= nbPoints - 2)
|
||||
{
|
||||
PxVec3 p = load3(points[nbPoints - 1], transf);
|
||||
controlPoints[3] = p + (p - load3(points[nbPoints - 2], transf));
|
||||
}
|
||||
else
|
||||
controlPoints[3] = load3(points[index + 2], transf);
|
||||
controlPoints[1] = load3(points[index], transf);
|
||||
controlPoints[2] = load3(points[index + 1], transf);
|
||||
return 4;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxReal encodeStrandLocation(PxReal uniform, PxU32 segmentIndexInStrand, bool markAsLast)
|
||||
{
|
||||
PX_ASSERT(segmentIndexInStrand > 0);
|
||||
uniform = PxClamp(uniform, 0.0f, 0.999f); //Avoid the uniform to be smaller 0 or exactly 1 because then fractional and integer part cannot be separated reliably
|
||||
return (markAsLast ? -1.0f : 1.0f) * (uniform + segmentIndexInStrand);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxReal decodeStrandLocation(PxReal strandValue, PxU32& segmentIndexInStrand, bool& isLast)
|
||||
{
|
||||
isLast = strandValue < 0.0f;
|
||||
strandValue = PxAbs(strandValue);
|
||||
segmentIndexInStrand = PxU32(strandValue);
|
||||
return strandValue - segmentIndexInStrand;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
} // namespace physx
|
||||
#endif
|
||||
|
||||
#endif
|
||||
191
engine/third_party/physx/source/gpusimulationcontroller/include/PxgIsosurfaceData.h
vendored
Normal file
191
engine/third_party/physx/source/gpusimulationcontroller/include/PxgIsosurfaceData.h
vendored
Normal file
@@ -0,0 +1,191 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PX_ISOSURFACE_DATA_H
|
||||
#define PX_ISOSURFACE_DATA_H
|
||||
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
#include "PxSparseGridParams.h"
|
||||
#include "PxgSparseGridDataStandalone.h"
|
||||
#include "PxgDenseGridDataStandalone.h"
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
namespace physx
|
||||
{
|
||||
#endif
|
||||
/**
|
||||
\brief Bundles all data used to extract an isosurface on a dense grid
|
||||
*/
|
||||
struct PxIsosurfaceExtractionData
|
||||
{
|
||||
PxIsosurfaceExtractionData() : mGrid(), kernelSize(0.0f), restDensity(0), threshold(0), firstCellVert(NULL), swapState(0),
|
||||
numVerticesNumIndices(NULL), maxVerts(0), maxTriIds(0), verts(NULL), normals(NULL), triIds(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
PxDenseGridData mGrid;
|
||||
PxReal kernelSize;
|
||||
PxReal restDensity;
|
||||
PxReal threshold;
|
||||
|
||||
// grid
|
||||
PxReal* buffer[2];
|
||||
PxU32* firstCellVert;
|
||||
|
||||
PxU32 swapState;
|
||||
|
||||
// mesh
|
||||
PxU32* numVerticesNumIndices; //Pointer to a GPU buffer to allow for device to host copy
|
||||
PxU32 maxVerts, maxTriIds;
|
||||
|
||||
PxVec4* verts;
|
||||
PxVec4* normals;
|
||||
PxU32* triIds;
|
||||
|
||||
PxVec4* smoothingBuffer;
|
||||
|
||||
/**
|
||||
\brief Access to the density device array
|
||||
|
||||
\return The density devixe array
|
||||
*/
|
||||
PX_CUDA_CALLABLE PxReal* density()
|
||||
{
|
||||
return buffer[swapState];
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Access to the start triangle id per cell device array
|
||||
|
||||
\return The start triangle id per cell device array
|
||||
*/
|
||||
PX_CUDA_CALLABLE PxU32* firstCellTriId()
|
||||
{
|
||||
return reinterpret_cast<PxU32*>(buffer[1 - swapState]);
|
||||
}
|
||||
|
||||
/**
|
||||
\brief The grid's cell size
|
||||
|
||||
\return The cell size
|
||||
*/
|
||||
PX_CUDA_CALLABLE PxReal getSpacing()
|
||||
{
|
||||
return mGrid.mGridParams.gridSpacing;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief The number of cells in the dense grid
|
||||
|
||||
\return The number of cells
|
||||
*/
|
||||
PX_CUDA_CALLABLE PxU32 maxNumCells()
|
||||
{
|
||||
return mGrid.maxNumCells();
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
\brief Bundles all data used to extract an isosurface on a sparse grid
|
||||
*/
|
||||
struct PxSparseIsosurfaceExtractionData
|
||||
{
|
||||
PxSparseIsosurfaceExtractionData() : mGrid(), kernelSize(0.0f), restDensity(0), threshold(0), firstCellVert(NULL), swapState(0),
|
||||
numVerticesNumIndices(NULL), maxVerts(0), maxTriIds(0), verts(NULL), normals(NULL), triIds(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
PxSparseGridData mGrid;
|
||||
PxReal* buffer[2];
|
||||
|
||||
PxReal kernelSize;
|
||||
PxReal restDensity;
|
||||
PxReal threshold;
|
||||
PxU32* firstCellVert;
|
||||
|
||||
PxU32 swapState;
|
||||
|
||||
// mesh
|
||||
PxU32* numVerticesNumIndices; //Pointer to a GPU buffer to allow for device to host copy
|
||||
PxU32 maxVerts, maxTriIds;
|
||||
|
||||
PxVec4* verts;
|
||||
PxVec4* normals;
|
||||
PxU32* triIds;
|
||||
|
||||
PxVec4* smoothingBuffer;
|
||||
|
||||
|
||||
/**
|
||||
\brief Access to the density device array
|
||||
|
||||
\return The density device array
|
||||
*/
|
||||
PX_CUDA_CALLABLE PxReal* density()
|
||||
{
|
||||
return buffer[swapState];
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Access to the start triangle id per cell device array
|
||||
|
||||
\return The start triangle id per cell device array
|
||||
*/
|
||||
PX_CUDA_CALLABLE PxU32* firstCellTriId()
|
||||
{
|
||||
return reinterpret_cast<PxU32*>(buffer[1 - swapState]);
|
||||
}
|
||||
|
||||
/**
|
||||
\brief The grid's cell size
|
||||
|
||||
\return The cell size
|
||||
*/
|
||||
PX_CUDA_CALLABLE PxReal getSpacing()
|
||||
{
|
||||
return mGrid.mGridParams.gridSpacing;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief The maximal number of cells in the sparse grid, not all of them are always in use
|
||||
|
||||
\return The number of cells
|
||||
*/
|
||||
PX_CUDA_CALLABLE PxU32 maxNumCells()
|
||||
{
|
||||
return mGrid.maxNumCells();
|
||||
}
|
||||
};
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
} // namespace physx
|
||||
#endif
|
||||
|
||||
#endif
|
||||
301
engine/third_party/physx/source/gpusimulationcontroller/include/PxgIsosurfaceExtraction.h
vendored
Normal file
301
engine/third_party/physx/source/gpusimulationcontroller/include/PxgIsosurfaceExtraction.h
vendored
Normal file
@@ -0,0 +1,301 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_ISOSURFACE_EXTRACTION_H
|
||||
#define PXG_ISOSURFACE_EXTRACTION_H
|
||||
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
|
||||
#include "PxSparseGridParams.h"
|
||||
|
||||
#include "foundation/PxArray.h"
|
||||
|
||||
#include "PxIsosurfaceExtraction.h"
|
||||
|
||||
#include "PxgSparseGridStandalone.h"
|
||||
#include "PxgAlgorithms.h"
|
||||
#include "PxgIsosurfaceData.h"
|
||||
#include "PxgKernelLauncher.h"
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
namespace physx
|
||||
{
|
||||
#endif
|
||||
|
||||
#if PX_SUPPORT_GPU_PHYSX
|
||||
|
||||
class PxgSharedIsosurfaceExtractor
|
||||
{
|
||||
public:
|
||||
bool mEnabled;
|
||||
PxIsosurfaceParams mIsosurfaceParams;
|
||||
|
||||
PxGpuScan mScan;
|
||||
|
||||
PxgKernelLauncher mKernelLauncher;
|
||||
PxU32* mNumVerticesNumIndices;
|
||||
|
||||
bool mOwnsOutputGPUBuffers;
|
||||
PxVec4* mVertices;
|
||||
PxVec4* mNormals;
|
||||
PxU32* mTriIndices;
|
||||
|
||||
//public:
|
||||
PxgSharedIsosurfaceExtractor() : mEnabled(true), mKernelLauncher(), mNumVerticesNumIndices(NULL), mOwnsOutputGPUBuffers(false),
|
||||
mVertices(NULL), mNormals(NULL), mTriIndices(NULL)
|
||||
{}
|
||||
|
||||
virtual ~PxgSharedIsosurfaceExtractor() {}
|
||||
|
||||
template<typename DenseOrSparseGpuDataPackage>
|
||||
void extractIso(DenseOrSparseGpuDataPackage& mData, PxVec4* deviceParticlePos, const PxU32 numParticles, CUstream stream, PxU32* phases, PxU32 validPhaseMask,
|
||||
PxU32* activeIndices = NULL, PxVec4* anisotropy1 = NULL, PxVec4* anisotropy2 = NULL, PxVec4* anisotropy3 = NULL, PxReal anisotropyFactor = 1.0f);
|
||||
|
||||
template<typename DenseOrSparseGpuDataPackage>
|
||||
void meshFromDensity(DenseOrSparseGpuDataPackage& mData, CUstream stream);
|
||||
};
|
||||
|
||||
/**
|
||||
\brief GPU based isosurface extractor operating on a sparse grid
|
||||
*/
|
||||
class PxgSparseGridIsosurfaceExtractor : public PxSparseGridIsosurfaceExtractor, public PxUserAllocated
|
||||
{
|
||||
protected:
|
||||
PxgSharedIsosurfaceExtractor mShared;
|
||||
PxSparseIsosurfaceExtractionData mData;
|
||||
PxSparseGridBuilder mSparseGrid;
|
||||
|
||||
void paramsToMCData();
|
||||
|
||||
virtual void setMaxVerticesAndTriangles(PxU32 maxIsosurfaceVertices, PxU32 maxIsosurfaceTriangles);
|
||||
|
||||
virtual void releaseGPUBuffers();
|
||||
|
||||
virtual void allocateGPUBuffers();
|
||||
|
||||
public:
|
||||
PxgSparseGridIsosurfaceExtractor() : mShared()
|
||||
{}
|
||||
|
||||
PxgSparseGridIsosurfaceExtractor(PxgKernelLauncher& cudaContextManager, const PxSparseGridParams sparseGridParams,
|
||||
const PxIsosurfaceParams& isosurfaceParams, PxU32 maxNumParticles, PxU32 maxNumVertices, PxU32 maxNumTriangles) : mShared()
|
||||
{
|
||||
initialize(cudaContextManager, sparseGridParams, isosurfaceParams, maxNumParticles, maxNumVertices, maxNumTriangles);
|
||||
}
|
||||
|
||||
|
||||
virtual void setResultBufferDevice(PxVec4* vertices, PxU32* triIndices, PxVec4* normals);
|
||||
|
||||
virtual ~PxgSparseGridIsosurfaceExtractor() { }
|
||||
|
||||
void initialize(PxgKernelLauncher& cudaContextManager, const PxSparseGridParams sparseGridParams,
|
||||
const PxIsosurfaceParams& isosurfaceParams, PxU32 maxNumParticles, PxU32 maxNumVertices, PxU32 maxNumTriangles);
|
||||
|
||||
virtual void release();
|
||||
|
||||
virtual void setIsosurfaceParams(const PxIsosurfaceParams& params)
|
||||
{
|
||||
mShared.mIsosurfaceParams = params;
|
||||
paramsToMCData();
|
||||
}
|
||||
|
||||
virtual void clearDensity(CUstream stream);
|
||||
|
||||
virtual PxU32 getMaxParticles() const
|
||||
{
|
||||
return mSparseGrid.getMaxParticles();
|
||||
}
|
||||
|
||||
virtual PxU32 getMaxVertices() const
|
||||
{
|
||||
return mData.maxVerts;
|
||||
}
|
||||
|
||||
virtual PxU32 getMaxTriangles() const
|
||||
{
|
||||
return mData.maxTriIds / 3;
|
||||
}
|
||||
|
||||
virtual void setMaxParticles(PxU32 maxParticles);
|
||||
|
||||
virtual void extractIsosurface(PxVec4* deviceParticlePos, const PxU32 numParticles, CUstream stream, PxU32* phases = NULL, PxU32 validPhaseMask = PxParticlePhaseFlag::eParticlePhaseFluid,
|
||||
PxU32* activeIndices = NULL, PxVec4* anisotropy1 = NULL, PxVec4* anisotropy2 = NULL, PxVec4* anisotropy3 = NULL, PxReal anisotropyFactor = 1.0f);
|
||||
|
||||
virtual void setResultBufferHost(PxVec4* vertices, PxU32* triIndices, PxVec4* normals);
|
||||
|
||||
virtual PxIsosurfaceParams getIsosurfaceParams() const
|
||||
{
|
||||
return mShared.mIsosurfaceParams;
|
||||
}
|
||||
|
||||
virtual PxU32 getNumVertices() const
|
||||
{
|
||||
if (!mShared.mNumVerticesNumIndices)
|
||||
return 0;
|
||||
return mShared.mNumVerticesNumIndices[0];
|
||||
}
|
||||
|
||||
virtual PxU32 getNumTriangles() const
|
||||
{
|
||||
if (!mShared.mNumVerticesNumIndices)
|
||||
return 0;
|
||||
return mShared.mNumVerticesNumIndices[1] / 3;
|
||||
}
|
||||
|
||||
virtual void setEnabled(bool enabled)
|
||||
{
|
||||
mShared.mEnabled = enabled;
|
||||
}
|
||||
|
||||
virtual bool isEnabled() const
|
||||
{
|
||||
return mShared.mEnabled;
|
||||
}
|
||||
|
||||
virtual PxSparseGridParams getSparseGridParams() const
|
||||
{
|
||||
return mSparseGrid.getGridParameters();
|
||||
}
|
||||
|
||||
virtual void setSparseGridParams(const PxSparseGridParams& params)
|
||||
{
|
||||
mSparseGrid.setGridParameters(params);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
\brief GPU based isosurface extractor operating on a dense grid
|
||||
*/
|
||||
class PxgDenseGridIsosurfaceExtractor : public PxIsosurfaceExtractor, public PxUserAllocated
|
||||
{
|
||||
protected:
|
||||
PxgSharedIsosurfaceExtractor mShared;
|
||||
PxIsosurfaceExtractionData mData;
|
||||
|
||||
PxU32 mMaxParticles; //For compatibility with sparse grid isosurface extractor. There is no upper particle limit on the dense grid extractor.
|
||||
|
||||
void paramsToMCData();
|
||||
|
||||
virtual void setMaxVerticesAndTriangles(PxU32 maxIsosurfaceVertices, PxU32 maxIsosurfaceTriangles);
|
||||
|
||||
virtual void releaseGPUBuffers();
|
||||
|
||||
virtual void allocateGPUBuffers();
|
||||
|
||||
public:
|
||||
PxgDenseGridIsosurfaceExtractor() : mShared()
|
||||
{}
|
||||
|
||||
PxgDenseGridIsosurfaceExtractor(PxgKernelLauncher& cudaContextManager, const PxBounds3& worldBounds,
|
||||
PxReal cellSize, const PxIsosurfaceParams& isosurfaceParams, PxU32 maxNumParticles, PxU32 maxNumVertices, PxU32 maxNumTriangles) : mShared()
|
||||
{
|
||||
initialize(cudaContextManager, worldBounds, cellSize, isosurfaceParams, maxNumParticles, maxNumVertices, maxNumTriangles);
|
||||
}
|
||||
|
||||
virtual void setResultBufferDevice(PxVec4* vertices, PxU32* triIndices, PxVec4* normals);
|
||||
|
||||
virtual ~PxgDenseGridIsosurfaceExtractor() { }
|
||||
|
||||
void initialize(PxgKernelLauncher& cudaContextManager, const PxBounds3& worldBounds,
|
||||
PxReal cellSize, const PxIsosurfaceParams& isosurfaceParams, PxU32 maxNumParticles, PxU32 maxNumVertices, PxU32 maxNumTriangles);
|
||||
|
||||
virtual void release();
|
||||
|
||||
virtual void setIsosurfaceParams(const PxIsosurfaceParams& params)
|
||||
{
|
||||
mShared.mIsosurfaceParams = params;
|
||||
paramsToMCData();
|
||||
}
|
||||
|
||||
virtual void clearDensity(CUstream stream);
|
||||
|
||||
virtual PxU32 getMaxParticles() const
|
||||
{
|
||||
return mMaxParticles;
|
||||
}
|
||||
|
||||
virtual PxU32 getMaxVertices() const
|
||||
{
|
||||
return mData.maxVerts;
|
||||
}
|
||||
|
||||
virtual PxU32 getMaxTriangles() const
|
||||
{
|
||||
return mData.maxTriIds / 3;
|
||||
}
|
||||
|
||||
virtual void setMaxParticles(PxU32 maxParticles)
|
||||
{
|
||||
//No need to resize internal buffers on the dense grid isosurface;
|
||||
mMaxParticles = maxParticles;
|
||||
}
|
||||
|
||||
virtual void extractIsosurface(PxVec4* deviceParticlePos, const PxU32 numParticles, CUstream stream, PxU32* phases = NULL, PxU32 validPhaseMask = PxParticlePhaseFlag::eParticlePhaseFluid,
|
||||
PxU32* activeIndices = NULL, PxVec4* anisotropy1 = NULL, PxVec4* anisotropy2 = NULL, PxVec4* anisotropy3 = NULL, PxReal anisotropyFactor = 1.0f);
|
||||
|
||||
virtual void setResultBufferHost(PxVec4* vertices, PxU32* triIndices, PxVec4* normals);
|
||||
|
||||
virtual PxIsosurfaceParams getIsosurfaceParams() const
|
||||
{
|
||||
return mShared.mIsosurfaceParams;
|
||||
}
|
||||
|
||||
virtual PxU32 getNumVertices() const
|
||||
{
|
||||
if (!mShared.mNumVerticesNumIndices)
|
||||
return 0;
|
||||
return mShared.mNumVerticesNumIndices[0];
|
||||
}
|
||||
|
||||
virtual PxU32 getNumTriangles() const
|
||||
{
|
||||
if (!mShared.mNumVerticesNumIndices)
|
||||
return 0;
|
||||
return mShared.mNumVerticesNumIndices[1] / 3;
|
||||
}
|
||||
|
||||
virtual void setEnabled(bool enabled)
|
||||
{
|
||||
mShared.mEnabled = enabled;
|
||||
}
|
||||
|
||||
virtual bool isEnabled() const
|
||||
{
|
||||
return mShared.mEnabled;
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
} // namespace physx
|
||||
#endif
|
||||
|
||||
#endif
|
||||
162
engine/third_party/physx/source/gpusimulationcontroller/include/PxgJointManager.h
vendored
Normal file
162
engine/third_party/physx/source/gpusimulationcontroller/include/PxgJointManager.h
vendored
Normal file
@@ -0,0 +1,162 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_JOINT_MANAGER_H
|
||||
#define PXG_JOINT_MANAGER_H
|
||||
|
||||
#include "foundation/PxPinnedArray.h"
|
||||
#include "foundation/PxHashMap.h"
|
||||
#include "CmIDPool.h"
|
||||
#include "PxgD6JointData.h"
|
||||
#include "PxgConstraintPrep.h"
|
||||
#include "PxgConstraintIdMap.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
namespace IG
|
||||
{
|
||||
class IslandSim;
|
||||
class CPUExternalData;
|
||||
class GPUExternalData;
|
||||
}
|
||||
|
||||
namespace Dy
|
||||
{
|
||||
struct Constraint;
|
||||
}
|
||||
|
||||
struct PxgSolverConstraintManagerConstants;
|
||||
|
||||
//This manager should separate the joint types because we just support D6Joint in the constaint pre-prepare code
|
||||
class PxgJointManager
|
||||
{
|
||||
public:
|
||||
typedef PxPinnedArray<PxgConstraintIdMapEntry> ConstraintIdMap;
|
||||
|
||||
PxgJointManager(const PxVirtualAllocator& allocator, bool isDirectGpuApiEnabled);
|
||||
~PxgJointManager();
|
||||
|
||||
void reserveMemory(PxU32 maxConstraintRows);
|
||||
void reserveMemoryPreAddRemove(); // reserveMemory() above can not be used because it gets called after
|
||||
// joints get added/removed during constraint partitioning
|
||||
|
||||
void registerJoint(const Dy::Constraint& constraint);
|
||||
void removeJoint(PxU32 edgeIndex, PxArray<PxU32>& jointIndices, const IG::CPUExternalData& islandSimCpuData, const IG::GPUExternalData& islandSimGpuData);
|
||||
void addJoint( PxU32 edgeIndex, const Dy::Constraint* constraint, IG::IslandSim& islandSim, PxArray<PxU32>& jointIndices,
|
||||
PxPinnedArray<PxgSolverConstraintManagerConstants>& managerIter, PxU32 uniqueId);
|
||||
void updateJoint(PxU32 edgeIndex, const Dy::Constraint* constraint);
|
||||
void update(PxArray<PxU32>& jointOutputIndex);
|
||||
void reset();
|
||||
PxU32 getGpuNbRigidConstraints();
|
||||
PxU32 getGpuNbArtiConstraints();
|
||||
PxU32 getCpuNbRigidConstraints();
|
||||
PxU32 getCpuNbArtiConstraints();
|
||||
PxU32 getGpuNbActiveRigidConstraints();
|
||||
PxU32 getGpuNbActiveArtiConstraints();
|
||||
|
||||
PX_FORCE_INLINE const PxArray<const Dy::Constraint*>& getCpuRigidConstraints() const { return mCpuRigidConstraints; }
|
||||
PX_FORCE_INLINE const PxArray<const Dy::Constraint*>& getCpuArtiConstraints() const { return mCpuArtiConstraints; }
|
||||
|
||||
PX_FORCE_INLINE const PxInt32ArrayPinned& getDirtyGPURigidJointDataIndices() const { return mDirtyGPURigidJointDataIndices; }
|
||||
PX_FORCE_INLINE const PxInt32ArrayPinned& getDirtyGPUArtiJointDataIndices() const { return mDirtyGPUArtiJointDataIndices; }
|
||||
|
||||
PX_FORCE_INLINE const PxPinnedArray<PxgD6JointData>& getGpuRigidJointData() const { return mGpuRigidJointData; }
|
||||
PX_FORCE_INLINE const PxPinnedArray<PxgD6JointData>& getGpuArtiJointData() const { return mGpuArtiJointData; }
|
||||
|
||||
PX_FORCE_INLINE const PxPinnedArray<PxgConstraintPrePrep>& getGpuRigidJointPrePrep() const { return mGpuRigidJointPrePrep; }
|
||||
PX_FORCE_INLINE const PxPinnedArray<PxgConstraintPrePrep>& getGpuArtiJointPrePrep() const { return mGpuArtiJointPrePrep; }
|
||||
|
||||
// PT: these ones are contained in this class but actually filled by external code, PxgJointManager doesn't touch them.(*)
|
||||
PX_FORCE_INLINE PxPinnedArray<PxgConstraintData>& getCpuRigidConstraintData() { return mCpuRigidConstraintData; }
|
||||
PX_FORCE_INLINE PxPinnedArray<PxgConstraintData>& getCpuArtiConstraintData() { return mCpuArtiConstraintData; }
|
||||
PX_FORCE_INLINE PxPinnedArray<Px1DConstraint>& getCpuRigidConstraintRows() { return mCpuRigidConstraintRows; }
|
||||
PX_FORCE_INLINE PxPinnedArray<Px1DConstraint>& getCpuArtiConstraintRows() { return mCpuArtiConstraintRows; }
|
||||
|
||||
PX_FORCE_INLINE const ConstraintIdMap& getGpuConstraintIdMapHost() const { return mGpuConstraintIdMapHost; }
|
||||
|
||||
PX_FORCE_INLINE bool getAndClearConstraintIdMapDirtyFlag()
|
||||
{
|
||||
const bool isDirty = mIsGpuConstraintIdMapDirty;
|
||||
mIsGpuConstraintIdMapDirty = false;
|
||||
return isDirty;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
PxHashMap<PxU32, PxU32> mGpuRigidConstraintIndices;
|
||||
PxHashMap<PxU32, PxU32> mGpuArtiConstraintIndices;
|
||||
PxHashMap<PxU32, PxU32> mCpuRigidConstraintIndices;
|
||||
PxHashMap<PxU32, PxU32> mCpuArtiConstraintIndices;
|
||||
|
||||
PxArray<const Dy::Constraint*> mCpuRigidConstraints;
|
||||
PxArray<const Dy::Constraint*> mCpuArtiConstraints;
|
||||
|
||||
PxArray<PxU32> mCpuRigidUniqueIndex;
|
||||
PxArray<PxU32> mCpuArtiUniqueIndex;
|
||||
|
||||
PxArray<PxU32> mCpuRigidConstraintEdgeIndices;
|
||||
PxArray<PxU32> mCpuArtiConstraintEdgeIndices;
|
||||
|
||||
PxPinnedArray<PxgD6JointData> mGpuRigidJointData; // this is the input (PxgD6JointData) for rigid body we need to DMA to GPU so that GPU can fill in PxgConstraintData
|
||||
PxPinnedArray<PxgD6JointData> mGpuArtiJointData; // this is the input (PxgD6JointData) for articulation we need to DMA to GPU so that GPU can fill in PxgConstraintData
|
||||
PxPinnedArray<PxgConstraintPrePrep> mGpuRigidJointPrePrep; // this is the input (PxgConstraintPrePrep) for rigid body we need to DMA to GPU so that GPU can fill in PxgConstraintData
|
||||
PxPinnedArray<PxgConstraintPrePrep> mGpuArtiJointPrePrep; // this is the input (PxgConstraintPrePrep) for articulation we need to DMA to GPU so that GPU can fill in PxgConstraintData
|
||||
|
||||
PxPinnedArray<PxgConstraintData> mCpuRigidConstraintData; // (*) this need to append to the GPU result (PxgConstraintData) after the first past pre-prepare code
|
||||
PxPinnedArray<Px1DConstraint> mCpuRigidConstraintRows; // (*) this need to append to the GPU result (Px1DConstraint) after the first past pre-prepare code
|
||||
|
||||
PxPinnedArray<PxgConstraintData> mCpuArtiConstraintData; // (*) this need to append to the GPU result (PxgConstraintData) after the first past pre-prepare code
|
||||
PxPinnedArray<Px1DConstraint> mCpuArtiConstraintRows; // (*) this need to append to the GPU result (Px1DConstraint) after the first past pre-prepare code
|
||||
|
||||
PxInt32ArrayPinned mDirtyGPURigidJointDataIndices; // the dirty list indices of PxgD6JointData
|
||||
PxInt32ArrayPinned mDirtyGPUArtiJointDataIndices; // the dirty list indices of PxgD6JointData
|
||||
|
||||
ConstraintIdMap mGpuConstraintIdMapHost; // See PxgConstraintIdMapEntry for details. Only used when direct GPU API is enabled and
|
||||
// for joint/constraints that have the shader run on GPU.
|
||||
|
||||
PxHashMap<PxU32, PxU32> mEdgeIndexToGpuConstraintIdMap; // Get from edge index to constraint ID. Only used when direct GPU API is
|
||||
// enabled and for joint/constraints that have the shader run on GPU.
|
||||
|
||||
Cm::IDPool mGpuRigidJointDataIDPool; //each PxgD6JointData has an unique id. We can recycle the id when a joint has been removed from the joint manager
|
||||
Cm::IDPool mGpuArtiJointDataIDPool;
|
||||
public:
|
||||
// PT: not sure why these are here, it's computed by PxgGpuContext at the same time it fills the CPU constraint data (*)
|
||||
PxI32 mNbCpuRigidConstraintRows;
|
||||
PxI32 mNbCpuArtiConstraintRows;
|
||||
|
||||
private:
|
||||
PxU32 mMaxConstraintId;
|
||||
// Tracks all-time highest constraint ID to reserve sufficient space for mGpuConstraintIdMapHost.
|
||||
|
||||
bool mIsGpuConstraintIdMapDirty; // set to true when mGpuConstraintIdMapHost changed and needs to get sent to GPU
|
||||
|
||||
const bool mIsDirectGpuApiEnabled;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
298
engine/third_party/physx/source/gpusimulationcontroller/include/PxgKernelLauncher.h
vendored
Normal file
298
engine/third_party/physx/source/gpusimulationcontroller/include/PxgKernelLauncher.h
vendored
Normal file
@@ -0,0 +1,298 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_KERNEL_LAUNCHER_H
|
||||
#define PXG_KERNEL_LAUNCHER_H
|
||||
|
||||
#include "cudamanager/PxCudaContextManager.h"
|
||||
#include "CudaKernelWrangler.h"
|
||||
#include "PxgKernelWrangler.h"
|
||||
#include "cudamanager/PxCudaContext.h"
|
||||
#include "foundation/PxString.h"
|
||||
|
||||
#define KERNEL_LAUNCH_ERROR_CHECK 0
|
||||
|
||||
namespace physx
|
||||
{
|
||||
// PT: do not inline this
|
||||
static PX_NOINLINE void outputKernelLaunchDebugInfo(KernelWrangler* kernelWrangler, PxU16 id, const char* file, PxU32 line)
|
||||
{
|
||||
char errorMsg[4096];
|
||||
physx::Pxsnprintf(errorMsg, 4096, "Launching GPU kernel %s...\n", kernelWrangler->getCuFunctionName(id));
|
||||
PxGetFoundation().error(PxErrorCode::eDEBUG_INFO, file, line, errorMsg);
|
||||
}
|
||||
|
||||
// PT: do not inline this
|
||||
static PX_NOINLINE void outputKernelLaunchError(KernelWrangler* kernelWrangler, PxU16 kernelId, PxCUresult result, const char* file, PxU32 line)
|
||||
{
|
||||
char errorMsg[4096];
|
||||
physx::Pxsnprintf(errorMsg, 4096, "GPU kernel '%s' failed to launch with error %u!!\n",
|
||||
kernelWrangler->getCuFunctionName(kernelId), result.value);
|
||||
PxGetFoundation().error(PxErrorCode::eINTERNAL_ERROR, file, line, errorMsg);
|
||||
}
|
||||
|
||||
//do not inline this
|
||||
static PX_NOINLINE void outputKernelLaunchSyncError(KernelWrangler* kernelWrangler, PxU16 kernelId, PxCUresult result, const char* file, PxU32 line)
|
||||
{
|
||||
char buffer[4096];
|
||||
physx::Pxsnprintf(buffer, 4096, "GPU kernel '%s' execution failed with error %u!\n",
|
||||
kernelWrangler->getCuFunctionName(kernelId), result.value);
|
||||
PxGetFoundation().error(PxErrorCode::eINTERNAL_ERROR, file, line, buffer);
|
||||
}
|
||||
|
||||
template<const bool debugExecution>
|
||||
static PX_FORCE_INLINE PxCUresult _testLaunch(PxCUresult resultR, KernelWrangler* kernelWrangler, PxCudaContext* ctx, PxU16 kernelId, CUstream hStream, const char* file, PxU32 line)
|
||||
{
|
||||
// PT: don't assert immediately, we want to print the failing kernel's name first
|
||||
if(resultR != CUDA_SUCCESS)
|
||||
outputKernelLaunchError(kernelWrangler, kernelId, resultR, file, line);
|
||||
else if(debugExecution)
|
||||
{
|
||||
// PT: launching the kernel might have worked but executing the kernel is a different thing that can also fail.
|
||||
// This is only for debugging so code bloat won't matter here, and we can inline the whole thing.
|
||||
// This assumes the compiler removes all of it, which should work because we use a template for debugExecution.
|
||||
const PxCUresult syncErr = ctx->streamSynchronize(hStream);
|
||||
// PT: here again, don't assert immediately
|
||||
if(syncErr != CUDA_SUCCESS)
|
||||
{
|
||||
outputKernelLaunchSyncError(kernelWrangler, kernelId, syncErr, file, line);
|
||||
/*char buffer[4096];
|
||||
physx::Pxsnprintf(buffer, 4096, "GPU kernel '%s' execution failed with error %u!\n",
|
||||
kernelWrangler->getCuFunctionName(kernelId), syncErr.value);
|
||||
PxGetFoundation().error(PxErrorCode::eINTERNAL_ERROR, file, line, buffer);*/
|
||||
}
|
||||
PX_ASSERT(syncErr == CUDA_SUCCESS);
|
||||
}
|
||||
PX_ASSERT(resultR == CUDA_SUCCESS);
|
||||
return resultR;
|
||||
}
|
||||
|
||||
// PT: this one is for the regular launchKernel function
|
||||
template<const bool debugExecution>
|
||||
static PX_FORCE_INLINE PxCUresult _launch(
|
||||
KernelWrangler* kernelWrangler, PxCudaContext* ctx, PxU16 kernelId,
|
||||
PxU32 gridDimX, PxU32 gridDimY, PxU32 gridDimZ,
|
||||
PxU32 blockDimX, PxU32 blockDimY, PxU32 blockDimZ,
|
||||
PxU32 sharedMemBytes, CUstream hStream,
|
||||
PxCudaKernelParam* kernelParams, size_t kernelParamsSizeInBytes,
|
||||
const char* file, PxU32 line)
|
||||
{
|
||||
if (0)
|
||||
outputKernelLaunchDebugInfo(kernelWrangler, kernelId, file, line);
|
||||
|
||||
const PxCUresult resultR = ctx->launchKernel(kernelWrangler->getCuFunction(kernelId),
|
||||
gridDimX, gridDimY, gridDimZ,
|
||||
blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream,
|
||||
kernelParams, kernelParamsSizeInBytes,
|
||||
NULL,
|
||||
file, line);
|
||||
|
||||
return _testLaunch<debugExecution>(resultR, kernelWrangler, ctx, kernelId, hStream, file, line);
|
||||
}
|
||||
|
||||
// PT: this one is for the new 'optimized' launchKernel function
|
||||
template<const bool debugExecution>
|
||||
static PX_FORCE_INLINE PxCUresult _launch(
|
||||
KernelWrangler* kernelWrangler, PxCudaContext* ctx, PxU16 kernelId,
|
||||
PxU32 gridDimX, PxU32 gridDimY, PxU32 gridDimZ,
|
||||
PxU32 blockDimX, PxU32 blockDimY, PxU32 blockDimZ,
|
||||
PxU32 sharedMemBytes, CUstream hStream,
|
||||
void** kernelParams,
|
||||
const char* file, PxU32 line)
|
||||
{
|
||||
if (0)
|
||||
outputKernelLaunchDebugInfo(kernelWrangler, kernelId, file, line);
|
||||
|
||||
const PxCUresult resultR = ctx->launchKernel(kernelWrangler->getCuFunction(kernelId),
|
||||
gridDimX, gridDimY, gridDimZ,
|
||||
blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream,
|
||||
kernelParams,
|
||||
NULL,
|
||||
file, line);
|
||||
|
||||
return _testLaunch<debugExecution>(resultR, kernelWrangler, ctx, kernelId, hStream, file, line);
|
||||
}
|
||||
|
||||
class PxgKernelLauncher
|
||||
{
|
||||
private:
|
||||
static PX_FORCE_INLINE PxU32 divideRoundUp(PxU32 a, PxU32 b)
|
||||
{
|
||||
return (a + b - 1) / b;
|
||||
}
|
||||
|
||||
//https://riptutorial.com/cplusplus/example/3208/iterating-over-a-parameter-pack
|
||||
static void packArguments(PxCudaKernelParam* /*buffer*/, const PxU32 /*index*/)
|
||||
{
|
||||
// base case
|
||||
}
|
||||
|
||||
template <class T, class... Ts>
|
||||
static void packArguments(PxCudaKernelParam* buffer, const PxU32 index, T const* first, Ts const*... rest)
|
||||
{
|
||||
buffer[index].data = const_cast<void*>(static_cast<void const*>(first));
|
||||
buffer[index].size = sizeof(*first);
|
||||
packArguments(buffer, index + 1, rest...);
|
||||
}
|
||||
|
||||
//https://riptutorial.com/cplusplus/example/3208/iterating-over-a-parameter-pack
|
||||
static void packArgumentsPointerOnly(void** /*buffer*/, const PxU32 /*index*/)
|
||||
{
|
||||
// base case
|
||||
}
|
||||
|
||||
template <class T, class... Ts>
|
||||
static void packArgumentsPointerOnly(void** buffer, const PxU32 index, T const* first, Ts const*... rest)
|
||||
{
|
||||
buffer[index] = const_cast<void*>(static_cast<void const*>(first));
|
||||
packArgumentsPointerOnly(buffer, index + 1, rest...);
|
||||
}
|
||||
|
||||
|
||||
public:
|
||||
static PX_FORCE_INLINE PxCUresult launchKernel(PxCudaContext* cudaContext, PxgCudaKernelWranglerManager* gpuKernelWranglerManager,
|
||||
PxU16 kernelFunctionId, PxU32 gridDimX, PxU32 gridDimY, PxU32 gridDimZ,
|
||||
PxU32 blockDimX, PxU32 blockDimY, PxU32 blockDimZ, PxU32 sharedMemBytes, CUstream stream, PxCudaKernelParam* kernelParams, PxU32 kernelParamsSize)
|
||||
{
|
||||
return _launch<KERNEL_LAUNCH_ERROR_CHECK>(gpuKernelWranglerManager->mKernelWrangler, cudaContext, kernelFunctionId,
|
||||
gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, stream, kernelParams, kernelParamsSize, PX_FL);
|
||||
}
|
||||
|
||||
static PX_FORCE_INLINE PxCUresult launchKernel(PxCudaContext* cudaContext, PxgCudaKernelWranglerManager* gpuKernelWranglerManager,
|
||||
PxU16 kernelFunctionId, PxU32 gridDimX, PxU32 gridDimY, PxU32 gridDimZ,
|
||||
PxU32 blockDimX, PxU32 blockDimY, PxU32 blockDimZ, PxU32 sharedMemBytes, CUstream stream, void** kernelParams)
|
||||
{
|
||||
return _launch<KERNEL_LAUNCH_ERROR_CHECK>(gpuKernelWranglerManager->mKernelWrangler, cudaContext, kernelFunctionId,
|
||||
gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, stream, kernelParams, PX_FL);
|
||||
}
|
||||
|
||||
static PX_FORCE_INLINE PxCUresult launchKernelBlocks(PxCudaContext* cudaContext, PxgCudaKernelWranglerManager* gpuKernelWranglerManager,
|
||||
PxU16 kernelFunctionId, PxU32 numBlocks, PxU32 numThreadsPerBlock, PxU32 sharedMemBytes, CUstream stream, PxCudaKernelParam* kernelParams, PxU32 kernelParamsSize)
|
||||
{
|
||||
return launchKernel(cudaContext, gpuKernelWranglerManager, kernelFunctionId, numBlocks, 1, 1, numThreadsPerBlock, 1, 1, sharedMemBytes, stream, kernelParams, kernelParamsSize);
|
||||
}
|
||||
|
||||
template <class ...Args>
|
||||
static PX_FORCE_INLINE PxCUresult launchKernelAutoPackParameters(PxCudaContext* cudaContext, PxgCudaKernelWranglerManager* gpuKernelWranglerManager,
|
||||
PxU16 kernelFunctionId, PxU32 gridDimX, PxU32 gridDimY, PxU32 gridDimZ,
|
||||
PxU32 blockDimX, PxU32 blockDimY, PxU32 blockDimZ, PxU32 sharedMemBytes, CUstream stream, const Args&... args)
|
||||
{
|
||||
PxCudaKernelParam p[sizeof...(args)];
|
||||
packArguments(p, 0, &args...);
|
||||
return launchKernel(cudaContext, gpuKernelWranglerManager, kernelFunctionId, gridDimX, gridDimY, gridDimZ,
|
||||
blockDimX, blockDimY, blockDimZ, sharedMemBytes, stream, p, sizeof(p));
|
||||
}
|
||||
|
||||
template <class ...Args>
|
||||
static PX_FORCE_INLINE PxCUresult launchKernelBlocksAutoPackParameters(PxCudaContext* cudaContext, PxgCudaKernelWranglerManager* gpuKernelWranglerManager,
|
||||
PxU16 kernelFunctionId, PxU32 numBlocks, PxU32 numThreadsPerBlock, PxU32 sharedMemBytes, CUstream stream, const Args&... args)
|
||||
{
|
||||
PxCudaKernelParam p[sizeof...(args)];
|
||||
packArguments(p, 0, &args...);
|
||||
return launchKernelBlocks(cudaContext, gpuKernelWranglerManager, kernelFunctionId, numBlocks,
|
||||
numThreadsPerBlock, sharedMemBytes, stream, p, sizeof(p));
|
||||
}
|
||||
|
||||
static PX_FORCE_INLINE PxCUresult launchKernelThreads(PxCudaContext* cudaContext, PxgCudaKernelWranglerManager* gpuKernelWranglerManager,
|
||||
PxU16 kernelFunctionId, PxU32 totalNumThreads, PxU32 numThreadsPerBlock, PxU32 sharedMemBytes, CUstream stream, PxCudaKernelParam* kernelParams, PxU32 kernelParamsSize)
|
||||
{
|
||||
return launchKernelBlocks(cudaContext, gpuKernelWranglerManager, kernelFunctionId, divideRoundUp(totalNumThreads, numThreadsPerBlock),
|
||||
numThreadsPerBlock, sharedMemBytes, stream, kernelParams, kernelParamsSize);
|
||||
}
|
||||
|
||||
template <class ...Args>
|
||||
static PX_FORCE_INLINE PxCUresult launchKernelThreadsAutoPackParameters(PxCudaContext* cudaContext, PxgCudaKernelWranglerManager* gpuKernelWranglerManager,
|
||||
PxU16 kernelFunctionId, PxU32 totalNumThreads, PxU32 numThreadsPerBlock, PxU32 sharedMemBytes, CUstream stream, const Args&... args)
|
||||
{
|
||||
PxCudaKernelParam p[sizeof...(args)];
|
||||
packArguments(p, 0, &args...);
|
||||
return launchKernelThreads(cudaContext, gpuKernelWranglerManager, kernelFunctionId, totalNumThreads,
|
||||
numThreadsPerBlock, sharedMemBytes, stream, p, sizeof(p));
|
||||
}
|
||||
|
||||
|
||||
PxgKernelLauncher()
|
||||
: mCudaContextManager(NULL), mGpuKernelWranglerManager(NULL) {}
|
||||
|
||||
PxgKernelLauncher(PxCudaContextManager* cudaContextManager, PxgCudaKernelWranglerManager* gpuKernelWranglerManager)
|
||||
: mCudaContextManager(cudaContextManager), mGpuKernelWranglerManager(gpuKernelWranglerManager) {}
|
||||
|
||||
PX_FORCE_INLINE PxCudaContextManager* getCudaContextManager() { return mCudaContextManager; }
|
||||
PX_FORCE_INLINE const PxCudaContextManager* getCudaContextManager() const { return mCudaContextManager; }
|
||||
PX_FORCE_INLINE PxgCudaKernelWranglerManager* getKernelWrangler() { return mGpuKernelWranglerManager; }
|
||||
PX_FORCE_INLINE const PxgCudaKernelWranglerManager* getKernelWrangler() const { return mGpuKernelWranglerManager; }
|
||||
|
||||
template <class ...Args>
|
||||
PxCUresult launchKernel(PxU16 kernelId, PxU32 numBlocks, PxU32 numThreadsPerBlock, PxU32 sharedMemorySize, CUstream stream, const Args&... args)
|
||||
{
|
||||
void* p[sizeof...(args)];
|
||||
packArgumentsPointerOnly(p, 0, &args...);
|
||||
return PxgKernelLauncher::launchKernel(getCudaContextManager()->getCudaContext(), getKernelWrangler(),
|
||||
kernelId, numBlocks, 1, 1, numThreadsPerBlock, 1, 1, sharedMemorySize, stream, p);
|
||||
}
|
||||
|
||||
template <class ...Args>
|
||||
PxCUresult launchKernelXYZ(PxU16 kernelId, PxU32 numBlocksX, PxU32 numBlocksY, PxU32 numBlocksZ,
|
||||
PxU32 numThreadsPerBlockX, PxU32 numThreadsPerBlockY, PxU32 numThreadsPerBlockZ, PxU32 sharedMemorySize, CUstream stream, const Args&... args)
|
||||
{
|
||||
void* p[sizeof...(args)];
|
||||
packArgumentsPointerOnly(p, 0, &args...);
|
||||
return PxgKernelLauncher::launchKernel(getCudaContextManager()->getCudaContext(), getKernelWrangler(),
|
||||
kernelId, numBlocksX, numBlocksY, numBlocksZ, numThreadsPerBlockX, numThreadsPerBlockY, numThreadsPerBlockZ, sharedMemorySize, stream, p);
|
||||
}
|
||||
|
||||
template <class ...Args>
|
||||
PxCUresult launchKernelPtr(PxU16 kernelId, PxU32 numBlocks, PxU32 numThreadsPerBlock, PxU32 sharedMemorySize, CUstream stream, const Args*... args)
|
||||
{
|
||||
void* p[sizeof...(args)];
|
||||
packArgumentsPointerOnly(p, 0, args...);
|
||||
return PxgKernelLauncher::launchKernel(getCudaContextManager()->getCudaContext(), getKernelWrangler(),
|
||||
kernelId, numBlocks, 1, 1, numThreadsPerBlock, 1, 1, sharedMemorySize, stream, p);
|
||||
}
|
||||
|
||||
template <class ...Args>
|
||||
PxCUresult launchKernelXYZPtr(PxU16 kernelId, PxU32 numBlocksX, PxU32 numBlocksY, PxU32 numBlocksZ,
|
||||
PxU32 numThreadsPerBlockX, PxU32 numThreadsPerBlockY, PxU32 numThreadsPerBlockZ, PxU32 sharedMemorySize, CUstream stream, const Args*... args)
|
||||
{
|
||||
void* p[sizeof...(args)];
|
||||
packArgumentsPointerOnly(p, 0, args...);
|
||||
return PxgKernelLauncher::launchKernel(getCudaContextManager()->getCudaContext(), getKernelWrangler(),
|
||||
kernelId, numBlocksX, numBlocksY, numBlocksZ, numThreadsPerBlockX, numThreadsPerBlockY, numThreadsPerBlockZ, sharedMemorySize, stream, p);
|
||||
}
|
||||
|
||||
private:
|
||||
PxCudaContextManager* mCudaContextManager;
|
||||
PxgCudaKernelWranglerManager* mGpuKernelWranglerManager;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
117
engine/third_party/physx/source/gpusimulationcontroller/include/PxgNonRigidCoreCommon.h
vendored
Normal file
117
engine/third_party/physx/source/gpusimulationcontroller/include/PxgNonRigidCoreCommon.h
vendored
Normal file
@@ -0,0 +1,117 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_NONRIGID_CORE_COMMON_H
|
||||
#define PXG_NONRIGID_CORE_COMMON_H
|
||||
|
||||
#include "PxgCudaBuffer.h"
|
||||
#include "common/PxPhysXCommonConfig.h"
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
#include "PxgCudaPagedLinearAllocator.h"
|
||||
#include "PxgRadixSortDesc.h"
|
||||
#include "foundation/PxPinnedArray.h"
|
||||
#include "PxgEssentialCore.h"
|
||||
#include "PxNodeIndex.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
|
||||
class PxgCudaKernelWranglerManager;
|
||||
class PxCudaContextManager;
|
||||
class PxgHeapMemoryAllocatorManager;
|
||||
|
||||
struct PxGpuDynamicsMemoryConfig;
|
||||
|
||||
class PxgSimulationController;
|
||||
class PxgCudaBroadPhaseSap;
|
||||
class PxgGpuNarrowphaseCore;
|
||||
class PxgGpuContext;
|
||||
|
||||
class PxgNonRigidCore : public PxgEssentialCore
|
||||
{
|
||||
public:
|
||||
PxgNonRigidCore(PxgCudaKernelWranglerManager* gpuKernelWrangler, PxCudaContextManager* cudaContextManager,
|
||||
PxgHeapMemoryAllocatorManager* heapMemoryManager, PxgSimulationController* simController,
|
||||
PxgGpuContext* context, const PxU32 maxContacts, const PxU32 collisionStackSize, PxsHeapStats::Enum statType);
|
||||
|
||||
virtual ~PxgNonRigidCore();
|
||||
|
||||
void updateGPURadixSortBlockDesc(CUstream stream, CUdeviceptr inputKeyd, CUdeviceptr inputRankd,
|
||||
CUdeviceptr outputKeyd, CUdeviceptr outputRankd, CUdeviceptr radixCountd,
|
||||
CUdeviceptr numKeysd, PxgRadixSortBlockDesc* rsDescs,
|
||||
CUdeviceptr radixSortDescBuf0, CUdeviceptr radixSortDescBuf1);
|
||||
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxNodeIndex>& getContactByRigid() { return mContactByRigidBuf; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxNodeIndex>& getContactSortedByRigid() { return mContactSortedByRigidBuf; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxU32>& getTempContactByRigid() { return mTempContactByRigidBitBuf; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxU32>& getContactRemapSortedByRigid() { return mContactRemapSortedByRigidBuf; }
|
||||
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxU64>& getContactSortedByParticle() { return mContactSortedByParticleBuf; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxU32>& getTempContactByParticle() { return mTempContactByParticleBitBuf; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxU32>& getContactRemapSortedByParticle() { return mContactRemapSortedByParticleBuf; }
|
||||
|
||||
PxgCudaPagedLinearAllocator<PxgHeapMemoryAllocator> mIntermStackAlloc;
|
||||
PxgTypedCudaBuffer<PxU32> mStackSizeNeededOnDevice;
|
||||
PxU32* mStackSizeNeededPinned;
|
||||
|
||||
PxU32 mMaxContacts;
|
||||
PxU32 mCollisionStackSizeBytes;
|
||||
|
||||
//for sorting contacts
|
||||
PxPinnedArray<PxgRadixSortBlockDesc> mRSDesc;
|
||||
PxgCudaBufferN<2> mRadixSortDescBuf; //radix sort with rank
|
||||
PxgCudaBuffer mRadixCountTotalBuf;
|
||||
PxU32 mRadixCountSize;
|
||||
|
||||
//for radix sort
|
||||
PxgTypedCudaBuffer<PxNodeIndex> mContactByRigidBuf; //rigidId is nodeIndex, which is 64 bit
|
||||
PxgTypedCudaBuffer<PxNodeIndex> mContactSortedByRigidBuf; //rigidId is nodeIndex, which is 64 bit
|
||||
PxgTypedCudaBuffer<PxU32> mTempContactByRigidBitBuf; //low/high 32 bit
|
||||
PxgTypedCudaBuffer<PxU32> mContactRemapSortedByRigidBuf; //rank index
|
||||
|
||||
PxgTypedCudaBuffer<PxU64> mContactSortedByParticleBuf; //PxU64 particle system id and particle index
|
||||
PxgTypedCudaBuffer<PxU32> mTempContactByParticleBitBuf; //low/high 32 bit
|
||||
PxgTypedCudaBuffer<PxU32> mContactRemapSortedByParticleBuf; //rank index
|
||||
|
||||
|
||||
PxgTypedCudaBuffer<PxU32> mTempContactBuf;
|
||||
PxgTypedCudaBuffer<PxU32> mTempContactRemapBuf;
|
||||
PxgTypedCudaBuffer<PxU32> mTempContactBuf2;
|
||||
PxgTypedCudaBuffer<PxU32> mTempContactRemapBuf2;
|
||||
|
||||
#if PX_ENABLE_SIM_STATS
|
||||
PxU32 mCollisionStackSizeBytesStats;
|
||||
#else
|
||||
PX_CATCH_UNDEFINED_ENABLE_SIM_STATS
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
162
engine/third_party/physx/source/gpusimulationcontroller/include/PxgPBDParticleSystemCore.h
vendored
Normal file
162
engine/third_party/physx/source/gpusimulationcontroller/include/PxgPBDParticleSystemCore.h
vendored
Normal file
@@ -0,0 +1,162 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_PBD_PARTICLE_SYSTEM_CORE_H
|
||||
#define PXG_PBD_PARTICLE_SYSTEM_CORE_H
|
||||
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
|
||||
#include "cudamanager/PxCudaTypes.h"
|
||||
|
||||
#include "PxgParticleSystemCore.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
class PxCudaContextManager;
|
||||
class PxgBodySimManager;
|
||||
class PxgCudaKernelWranglerManager;
|
||||
class PxgGpuContext;
|
||||
struct PxGpuParticleBufferIndexPair;
|
||||
class PxgHeapMemoryAllocatorManager;
|
||||
class PxgParticleAndDiffuseBuffer;
|
||||
class PxgParticleClothBuffer;
|
||||
class PxgParticleRigidBuffer;
|
||||
class PxgParticleSystem;
|
||||
class PxgParticleSystemBuffer;
|
||||
class PxgParticleSystemDiffuseBuffer;
|
||||
class PxgSimulationController;
|
||||
|
||||
namespace Dy
|
||||
{
|
||||
class ParticleSystemCore;
|
||||
}
|
||||
|
||||
class PxgPBDParticleSystemCore : public PxgParticleSystemCore, public PxgDiffuseParticleCore
|
||||
{
|
||||
public:
|
||||
PxgPBDParticleSystemCore(PxgCudaKernelWranglerManager* gpuKernelWrangler, PxCudaContextManager* cudaContextManager,
|
||||
PxgHeapMemoryAllocatorManager* heapMemoryManager, PxgSimulationController* simController,
|
||||
PxgGpuContext* gpuContext, PxU32 maxParticleContacts);
|
||||
virtual ~PxgPBDParticleSystemCore();
|
||||
|
||||
|
||||
// calculate AABB bound for each particle volumes
|
||||
void updateVolumeBound(CUdeviceptr particleSystemsd, CUdeviceptr activeParticleSystemsd, const PxU32 numActiveParticleSystems,
|
||||
CUstream bpStream);
|
||||
|
||||
virtual void preIntegrateSystems(const PxU32 nbActiveParticleSystems, const PxVec3 gravity, const PxReal dt);
|
||||
//virtual void updateBounds(PxgParticleSystem* particleSystems, PxU32* activeParticleSystems, const PxU32 nbActiveParticleSystems);
|
||||
virtual void updateGrid();
|
||||
virtual void selfCollision();
|
||||
//this is for solving selfCollsion and contacts between particles and primitives based on sorted by particle id
|
||||
|
||||
virtual void constraintPrep(CUdeviceptr prePrepDescd, CUdeviceptr prepDescd, CUdeviceptr solverCoreDescd, CUdeviceptr sharedDescd,
|
||||
const PxReal dt, CUstream solverStream, bool isTGS, PxU32 numSolverBodies);
|
||||
virtual void updateParticles(const PxReal dt);
|
||||
virtual void solve(CUdeviceptr prePrepDescd, CUdeviceptr solverCoreDescd,
|
||||
CUdeviceptr sharedDescd, CUdeviceptr artiCoreDescd, const PxReal dt, CUstream solverStream);
|
||||
|
||||
virtual void solveTGS(CUdeviceptr prePrepDescd, CUdeviceptr solverCoreDescd,
|
||||
CUdeviceptr sharedDescd, CUdeviceptr artiCoreDescd, const PxReal dt, const PxReal totalInvDt, CUstream solverStream,
|
||||
const bool isVelocityIteration, PxI32 iterationIndex, PxI32 numTGSIterations, PxReal coefficient);
|
||||
|
||||
virtual void prepParticleConstraint(CUdeviceptr prePrepDescd, CUdeviceptr prepDescd, CUdeviceptr sharedDescd, bool isTGS, const PxReal dt);
|
||||
|
||||
|
||||
virtual void integrateSystems(const PxReal dt, const PxReal epsilonSq);
|
||||
virtual void onPostSolve();
|
||||
virtual void gpuMemDmaUpParticleSystem(PxgBodySimManager& bodySimManager, CUstream stream);
|
||||
virtual void getMaxIterationCount(PxgBodySimManager& bodySimManager, PxI32& maxPosIters, PxI32& maxVelIters);
|
||||
virtual void releaseParticleSystemDataBuffer();
|
||||
|
||||
void solveVelocities(CUdeviceptr particleSystemsd, CUdeviceptr activeParticleSystemsd, const PxU32 nbActiveParticleSystems, const PxReal dt);
|
||||
|
||||
void solveParticleCollision(const PxReal dt, bool isTGS, PxReal coefficient);
|
||||
|
||||
virtual void finalizeVelocities(const PxReal dt, const PxReal scale);
|
||||
|
||||
void solveSprings(CUdeviceptr particleSystemsd, CUdeviceptr activeParticleSystemsd,
|
||||
const PxU32 nbActiveParticleSystems, const PxReal dt, bool isTGS);
|
||||
|
||||
void initializeSprings(CUdeviceptr particleSystemsd, CUdeviceptr activeParticleSystemsd,
|
||||
const PxU32 nbActiveParticleSystems);
|
||||
|
||||
// Direct-GPU API
|
||||
PX_DEPRECATED void applyParticleBufferDataDEPRECATED(const PxU32* indices, const PxGpuParticleBufferIndexPair* indexPairs, const PxParticleBufferFlags* flags, PxU32 nbUpdatedBuffers, CUevent waitEvent, CUevent signalEvent);
|
||||
|
||||
private:
|
||||
|
||||
void allocateParticleBuffer(const PxU32 nbTotalParticleSystems, CUstream stream);
|
||||
void allocateParticleDataBuffer(void** bodySimsLL, CUstream stream);
|
||||
void updateDirtyData(PxgBodySimManager& bodySimManager, CUstream stream);
|
||||
|
||||
void resizeParticleDataBuffer(PxgParticleSystem& particleSystem, PxgParticleSystemBuffer* buffer, const PxU32 maxParticles, const PxU32 maxNeighborhood, CUstream stream);
|
||||
void resizeDiffuseParticleDiffuseBuffer(PxgParticleSystem& particleSystem, PxgParticleSystemDiffuseBuffer* diffuseBuffer, const PxU32 maxDiffuseParticles, CUstream stream);
|
||||
bool createUserParticleData(PxgParticleSystem& particleSystem, Dy::ParticleSystemCore& dyParticleSystemCore, PxgParticleSystemBuffer* buffer, PxgParticleSystemDiffuseBuffer* diffuseBuffer,
|
||||
CUstream stream);
|
||||
|
||||
PX_FORCE_INLINE PxU32 getMaxSpringsPerBuffer() { return mMaxSpringsPerBuffer; }
|
||||
PX_FORCE_INLINE PxU32 getMaxSpringPartitionsPerBuffer() { return mMaxSpringPartitionsPerBuffer; }
|
||||
PX_FORCE_INLINE PxU32 getMaxSpringsPerPartitionPerBuffer() { return mMaxSpringsPerPartitionPerBuffer; }
|
||||
PX_FORCE_INLINE PxU32 getMaxRigidsPerBuffer() { return mMaxRigidsPerBuffer; }
|
||||
|
||||
void calculateHashForDiffuseParticles(CUdeviceptr particleSystemsd, CUdeviceptr activeParticleSystemsd, const PxU32 numActiveParticleSystems);
|
||||
|
||||
void solveDensities(CUdeviceptr particleSystemsd, CUdeviceptr activeParticleSystemd, const PxU32 nbActiveParticleSystems, const PxReal dt,
|
||||
PxReal coefficient);
|
||||
|
||||
void solveInflatables(CUdeviceptr particleSystemsd, CUdeviceptr activeParticleSystemd, const PxU32 nbActiveParticleSystems, const PxReal coefficient, const PxReal dt);
|
||||
|
||||
void solveShapes(CUdeviceptr particleSystemsd, CUdeviceptr activeParticleSystemd, const PxU32 nbActiveParticleSystems, const PxReal dt, const PxReal biasCoefficient);
|
||||
|
||||
void solveAerodynamics(CUdeviceptr particleSystemsd, CUdeviceptr activeParticleSystemd, const PxU32 nbActiveParticleSystems, const PxReal dt);
|
||||
|
||||
void solveDiffuseParticles(CUdeviceptr particleSystemsd, CUdeviceptr activeParticleSystemd, const PxU32 nbActiveParticleSystems, const PxReal dt);
|
||||
|
||||
//-------------------------------------------------------------------------------
|
||||
// Materials
|
||||
void updateMaterials(CUdeviceptr particleSystemsd, CUdeviceptr activeParticleSystemd, const PxU32 nbActiveParticleSystems, CUstream bpStream, const PxReal invTotalDt);
|
||||
|
||||
PxU32 mMaxClothBuffersPerSystem;
|
||||
PxU32 mMaxClothsPerBuffer;
|
||||
PxU32 mMaxSpringsPerBuffer;
|
||||
PxU32 mMaxSpringPartitionsPerBuffer;
|
||||
PxU32 mMaxSpringsPerPartitionPerBuffer;
|
||||
PxU32 mMaxTrianglesPerBuffer;
|
||||
PxU32 mMaxVolumesPerBuffer;
|
||||
PxU32 mMaxRigidBuffersPerSystem;
|
||||
PxU32 mMaxRigidsPerBuffer;//compute the max rigids(shape matching) for each particle system
|
||||
PxU32 mMaxNumPhaseToMaterials; //compute the max number of phase to materials for each particle system
|
||||
bool mComputePotentials;
|
||||
PxU32 mNumActiveParticleSystems;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
112
engine/third_party/physx/source/gpusimulationcontroller/include/PxgParticleNeighborhoodProvider.h
vendored
Normal file
112
engine/third_party/physx/source/gpusimulationcontroller/include/PxgParticleNeighborhoodProvider.h
vendored
Normal file
@@ -0,0 +1,112 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_PARTICLE_NEIGHBORHOOD_PROVIDER_H
|
||||
#define PXG_PARTICLE_NEIGHBORHOOD_PROVIDER_H
|
||||
|
||||
#include "PxParticleNeighborhoodProvider.h"
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
|
||||
#include "PxgSparseGridStandalone.h"
|
||||
#include "PxgKernelLauncher.h"
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
namespace physx
|
||||
{
|
||||
#endif
|
||||
|
||||
#if PX_SUPPORT_GPU_PHYSX
|
||||
|
||||
class PxgParticleNeighborhoodProvider : public PxParticleNeighborhoodProvider, public PxUserAllocated
|
||||
{
|
||||
private:
|
||||
PxgKernelLauncher mKernelLauncher;
|
||||
|
||||
public:
|
||||
PxSparseGridBuilder mSparseGridBuilder;
|
||||
|
||||
|
||||
PxgParticleNeighborhoodProvider(PxgKernelLauncher& cudaContextManager, const PxU32 maxNumParticles, const PxReal particleContactOffset, const PxU32 maxNumSparseGridCells);
|
||||
|
||||
virtual void buildNeighborhood(PxVec4* deviceParticlePos, const PxU32 numParticles, CUstream stream, PxU32* devicePhases = NULL,
|
||||
PxU32 validPhase = PxParticlePhaseFlag::eParticlePhaseFluid, const PxU32* deviceActiveIndices = NULL)
|
||||
{
|
||||
mSparseGridBuilder.updateSparseGrid(deviceParticlePos, numParticles, devicePhases, stream, validPhase, deviceActiveIndices);
|
||||
mSparseGridBuilder.updateSubgridEndIndices(numParticles, stream);
|
||||
}
|
||||
|
||||
PxU32* getSubgridEndIndicesBuffer()
|
||||
{
|
||||
return mSparseGridBuilder.getSubgridEndIndicesBuffer();
|
||||
}
|
||||
|
||||
virtual PxU32 getMaxParticles() const
|
||||
{
|
||||
return mSparseGridBuilder.getMaxParticles();
|
||||
}
|
||||
|
||||
virtual void setMaxParticles(PxU32 maxParticles)
|
||||
{
|
||||
mSparseGridBuilder.setMaxParticles(maxParticles);
|
||||
}
|
||||
|
||||
virtual void release()
|
||||
{
|
||||
mSparseGridBuilder.release();
|
||||
PX_DELETE_THIS;
|
||||
}
|
||||
|
||||
virtual PxU32 getNumGridCellsInUse() const
|
||||
{
|
||||
return mSparseGridBuilder.getNumSubgridsInUse();
|
||||
}
|
||||
|
||||
virtual PxU32 getMaxGridCells() const
|
||||
{
|
||||
return mSparseGridBuilder.getGridParameters().maxNumSubgrids;
|
||||
}
|
||||
|
||||
virtual PxReal getCellSize() const
|
||||
{
|
||||
return mSparseGridBuilder.getGridParameters().gridSpacing;
|
||||
}
|
||||
|
||||
virtual void setCellProperties(PxU32 maxGridCells, PxReal cellSize);
|
||||
|
||||
virtual ~PxgParticleNeighborhoodProvider() {}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
} // namespace physx
|
||||
#endif
|
||||
|
||||
#endif
|
||||
638
engine/third_party/physx/source/gpusimulationcontroller/include/PxgParticleSystem.h
vendored
Normal file
638
engine/third_party/physx/source/gpusimulationcontroller/include/PxgParticleSystem.h
vendored
Normal file
@@ -0,0 +1,638 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_PARTICLE_SYSTEM_H
|
||||
#define PXG_PARTICLE_SYSTEM_H
|
||||
|
||||
#include "foundation/PxPinnedArray.h"
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
#include "foundation/PxUserAllocated.h"
|
||||
|
||||
#include "PxParticleBuffer.h"
|
||||
#include "PxParticleGpu.h"
|
||||
|
||||
#include "PxgCudaBuffer.h"
|
||||
|
||||
#include "DyParticleSystemCore.h"
|
||||
#include "PxsParticleBuffer.h"
|
||||
|
||||
#include <vector_types.h>
|
||||
#include "PxNodeIndex.h"
|
||||
#include "PxgSimulationCoreDesc.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
#define EMPTY_CELL 0xffffffff
|
||||
#define NUM_SPRING_PER_BLOCK 4
|
||||
|
||||
struct PxsParticleMaterialData;
|
||||
struct PxParticleRigidFilterPair;
|
||||
struct PxParticleRigidAttachment;
|
||||
class PxCudaContextManager;
|
||||
class PxsHeapMemoryAllocatorManager;
|
||||
class PxNodeIndex;
|
||||
class PxgPBDMaterialDerived;
|
||||
|
||||
template<typename MaterialType>
|
||||
PX_CUDA_CALLABLE PX_FORCE_INLINE const MaterialType& getParticleMaterial(const PxsParticleMaterialData* PX_RESTRICT materials,
|
||||
const PxU32 id, const PxU32 stride)
|
||||
{
|
||||
const PxU8* PX_RESTRICT data = reinterpret_cast<const PxU8*>(materials) + id * stride;
|
||||
return *reinterpret_cast<const MaterialType*>(data);
|
||||
}
|
||||
|
||||
struct PX_ALIGN_PREFIX(16) PxgPBDParticleMaterialDerived
|
||||
{
|
||||
PxReal surfaceTensionDerived;
|
||||
PxReal cohesion;
|
||||
PxReal cohesion1;
|
||||
PxReal cohesion2;
|
||||
|
||||
|
||||
__device__ PxgPBDParticleMaterialDerived() {}
|
||||
|
||||
__device__ PxgPBDParticleMaterialDerived(const PxReal surfaceTension_,
|
||||
const PxReal cohesion_, const PxReal cohesion1_, const PxReal cohesion2_)
|
||||
: surfaceTensionDerived(surfaceTension_), cohesion(cohesion_), cohesion1(cohesion1_), cohesion2(cohesion2_)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct PxgParticleSystemData
|
||||
{
|
||||
public:
|
||||
PxVec3 mBoundCenter;
|
||||
|
||||
PxU32 mElementIndex;
|
||||
PxU32 mRemapIndex;
|
||||
|
||||
PxReal mRestOffset;
|
||||
PxReal mSolidRestOffset;
|
||||
|
||||
PxReal mSpiky1;
|
||||
PxReal mSpiky2;
|
||||
PxReal mRestDensity;
|
||||
PxReal mRestDensityBoundary;
|
||||
PxReal mFluidSurfaceConstraintScale;
|
||||
PxReal mLambdaScale;
|
||||
|
||||
PxReal mRelaxationFactor;
|
||||
PxReal mFluidRestOffset;
|
||||
PxReal mInvRestDensity;
|
||||
|
||||
PxU32 mFlags;
|
||||
|
||||
PxReal mMaxVelocity;
|
||||
|
||||
PxU16 mLockFlags;
|
||||
PxU32 mNumPhaseToMaterials;
|
||||
|
||||
PxVec3 mWind;
|
||||
};
|
||||
|
||||
|
||||
//each particle has one collision header
|
||||
struct PX_ALIGN_PREFIX(8) PxgParticleCollisionHeader
|
||||
{
|
||||
public:
|
||||
PxU32 mPrimitiveCollisionStartIndex;
|
||||
PxU32 mPrimitiveCounts; //how many primitives are colliding with this particle
|
||||
} PX_ALIGN_SUFFIX(8);
|
||||
|
||||
PX_ALIGN_PREFIX(16)
|
||||
struct PxgParticleContactInfo
|
||||
{
|
||||
static const PxU32 MaxStaticContactsPerParticle = 12;
|
||||
static const PxU32 MaxStaticContactsPerMesh = 6;
|
||||
float4 mNormal_PenW;
|
||||
}PX_ALIGN_SUFFIX(16);
|
||||
|
||||
struct PxgParticleSimBuffer
|
||||
{
|
||||
float4* mPositionInvMasses;
|
||||
float4* mVelocities;
|
||||
float4* mRestPositions;
|
||||
PxU32* mPhases;
|
||||
PxParticleVolume* mVolumes;
|
||||
PxParticleRigidFilterPair* mFilterPairs;
|
||||
PxParticleRigidAttachment* mRigidAttachments;
|
||||
|
||||
PxU32 mNumActiveParticles;
|
||||
PxU32 mNumVolumes;
|
||||
PxU32 mNumFilterPairs;
|
||||
PxU32 mNumRigidAttachments;
|
||||
|
||||
PxU32 mFlags;
|
||||
PxU32 mDiffuseParticleBufferIndex;
|
||||
PxU32 mUniqueId; //Remains unchanged over the whole lifetime of a buffer
|
||||
};
|
||||
|
||||
struct PxgParticleClothSimBuffer
|
||||
{
|
||||
//Cloth
|
||||
PxU32* mAccumulatedSpringsPerPartitions; //numPartitions;
|
||||
PxU32* mAccumulatedCopiesPerParticles; //numSprings
|
||||
PxU32* mRemapOutput; //numSprings * 2
|
||||
PxParticleSpring* mOrderedSprings; //numSprings
|
||||
PxU32* mTriangles; //numTriangles * 3
|
||||
|
||||
PxU32* mSortedClothStartIndices; //numCloths
|
||||
|
||||
PxParticleCloth* mCloths; //numCloths
|
||||
|
||||
float4* mRemapPositions;
|
||||
float4* mRemapVelocities;
|
||||
PxReal* mSpringLambda;
|
||||
PxReal* mInflatableLambda;
|
||||
|
||||
PxU32 mParticleBufferIndex; //which particle buffer this cloth buffer associated with
|
||||
PxU32 mNumSprings;
|
||||
PxU32 mNumPartitions;
|
||||
PxU32 mNumCloths;
|
||||
PxU32 mNumTriangles;
|
||||
};
|
||||
|
||||
struct PxgParticleRigidSimBuffer
|
||||
{
|
||||
PxReal* mRigidCoefficients; //mNumRigids;
|
||||
float4* mRigidTranslations; //mNumRigids
|
||||
float4* mRigidRotations; //mNumRigids
|
||||
PxU32* mRigidOffsets; //mNumRigids + 1
|
||||
float4* mRigidLocalPositions; //mNumActiveParticles
|
||||
float4* mRigidLocalNormals; //mNumActiveParticles
|
||||
|
||||
PxU32 mNumRigids;
|
||||
PxU32 mParticleBufferIndex;
|
||||
};
|
||||
|
||||
struct PxgParticleDiffuseSimBuffer
|
||||
{
|
||||
PxDiffuseParticleParams mParams;
|
||||
|
||||
float4* mDiffusePositions_LifeTime;
|
||||
float4* mDiffuseVelocities;
|
||||
|
||||
PxU32 mMaxNumParticles;
|
||||
int* mNumDiffuseParticles; //device memory
|
||||
int* mNumActiveDiffuseParticles; //pinned memory
|
||||
|
||||
PxU32 mStartIndex;
|
||||
|
||||
PxU32 mFlags;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<class BufferClass>
|
||||
class PxgParticleBufferBase : public BufferClass, public PxUserAllocated
|
||||
{
|
||||
public:
|
||||
PxgParticleBufferBase(PxU32 maxNumParticles, PxU32 maxNumVolumes, PxCudaContextManager& contextManager);
|
||||
virtual ~PxgParticleBufferBase();
|
||||
|
||||
//PxsParticleBuffer
|
||||
virtual PxVec4* getPositionInvMassesD() const PX_OVERRIDE PX_FINAL { return mPositionInvMassesD; }
|
||||
virtual PxVec4* getVelocitiesD() const PX_OVERRIDE PX_FINAL { return mVelocitiesD; }
|
||||
virtual PxU32* getPhasesD() const PX_OVERRIDE PX_FINAL { return mPhasesD; }
|
||||
virtual PxParticleVolume* getParticleVolumesD() const PX_OVERRIDE PX_FINAL { return mVolumesD; }
|
||||
virtual PxVec4* getPositionInvMassesH() const PX_OVERRIDE PX_FINAL { return mPositionInvMassesH; }
|
||||
virtual PxVec4* getVelocitiesH() const PX_OVERRIDE PX_FINAL { return mVelocitiesH; }
|
||||
virtual PxU32* getPhasesH() const PX_OVERRIDE PX_FINAL { return mPhasesH; }
|
||||
virtual PxParticleVolume* getParticleVolumesH() const PX_OVERRIDE PX_FINAL { return mVolumesH; }
|
||||
virtual void setNbActiveParticles(PxU32 nbActiveParticles) PX_OVERRIDE;
|
||||
virtual PxU32 getNbActiveParticles() const PX_OVERRIDE PX_FINAL { return mNumActiveParticles; }
|
||||
virtual PxU32 getMaxParticles() const PX_OVERRIDE PX_FINAL { return mMaxNumParticles; }
|
||||
virtual PxU32 getNbParticleVolumes() const PX_OVERRIDE PX_FINAL { return mNumParticleVolumes; }
|
||||
virtual void setNbParticleVolumes(PxU32 nbParticleVolumes) PX_OVERRIDE PX_FINAL { mNumParticleVolumes = nbParticleVolumes; }
|
||||
virtual PxU32 getMaxParticleVolumes() const PX_OVERRIDE PX_FINAL { return mMaxNumVolumes; }
|
||||
virtual void setRigidFilters(PxParticleRigidFilterPair* filters, PxU32 nbFilters) PX_OVERRIDE PX_FINAL;
|
||||
virtual void setRigidAttachments(PxParticleRigidAttachment* attachments, PxU32 nbAttachments) PX_OVERRIDE PX_FINAL;
|
||||
virtual PxU32 getFlatListStartIndex() const PX_OVERRIDE PX_FINAL { return mFlatListStartIndex; }
|
||||
virtual void raiseFlags(PxParticleBufferFlag::Enum flags) PX_OVERRIDE PX_FINAL { mBufferFlags |= flags; }
|
||||
virtual PxU32 getUniqueId() const PX_OVERRIDE PX_FINAL { return mUniqueId; }
|
||||
virtual void allocHostBuffers() PX_OVERRIDE PX_FINAL;
|
||||
//~PxsParticleBuffer
|
||||
|
||||
void setFlatListStartIndex(PxU32 flatListStartIndex) { mFlatListStartIndex = flatListStartIndex; }
|
||||
PxParticleRigidAttachment* getRigidAttachments() const { return mRigidAttachments; }
|
||||
PxU32 getNbRigidAttachments() const { return mNumRigidAttachments; }
|
||||
PxParticleRigidFilterPair* getRigidFilters() const { return mFilterPairs; }
|
||||
PxU32 getNbRigidFilters() const { return mNumFilterPairs; }
|
||||
void copyToHost(CUstream stream);
|
||||
|
||||
PxU32 mBufferFlags;
|
||||
|
||||
public:
|
||||
PxCudaContextManager& mContextManager;
|
||||
|
||||
PxVec4* mPositionInvMassesD;
|
||||
PxVec4* mVelocitiesD;
|
||||
PxU32* mPhasesD;
|
||||
PxParticleVolume* mVolumesD;
|
||||
PxVec4* mPositionInvMassesH;
|
||||
PxVec4* mVelocitiesH;
|
||||
PxU32* mPhasesH;
|
||||
PxParticleVolume* mVolumesH;
|
||||
PxParticleRigidFilterPair* mFilterPairs;
|
||||
PxParticleRigidAttachment* mRigidAttachments;
|
||||
PxU32 mNumActiveParticles;
|
||||
PxU32 mMaxNumParticles;
|
||||
PxU32 mNumParticleVolumes;
|
||||
PxU32 mMaxNumVolumes;
|
||||
PxU32 mNumFilterPairs;
|
||||
PxU32 mNumRigidAttachments;
|
||||
PxU32 mFlatListStartIndex;
|
||||
PxU32 mUniqueId;
|
||||
};
|
||||
|
||||
class PxgParticleBuffer : public PxgParticleBufferBase<PxsParticleBuffer>
|
||||
{
|
||||
public:
|
||||
PxgParticleBuffer(PxU32 maxNumParticles, PxU32 maxNumVolumes, PxCudaContextManager& contextManager);
|
||||
virtual ~PxgParticleBuffer() {}
|
||||
|
||||
//PxsParticleBuffer
|
||||
virtual void release() PX_OVERRIDE PX_FINAL { PX_DELETE_THIS; }
|
||||
//~PxsParticleBuffer
|
||||
|
||||
void copyToHost(CUstream stream);
|
||||
};
|
||||
|
||||
class PxgParticleAndDiffuseBuffer : public PxgParticleBufferBase<PxsParticleAndDiffuseBuffer>
|
||||
{
|
||||
public:
|
||||
PxgParticleAndDiffuseBuffer(PxU32 maxNumParticles, PxU32 maxNumVolumes, PxU32 maxNumDiffuseParticles, PxCudaContextManager& contextManager);
|
||||
virtual ~PxgParticleAndDiffuseBuffer();
|
||||
|
||||
//PxsParticleAndDiffuseBuffer
|
||||
virtual void release() PX_OVERRIDE PX_FINAL { PX_DELETE_THIS; }
|
||||
virtual PxVec4* getDiffusePositionLifeTimeD() const PX_OVERRIDE PX_FINAL { return mDiffusePositionsLifeTimeD; }
|
||||
virtual PxVec4* getDiffuseVelocitiesD() const PX_OVERRIDE PX_FINAL { return mDiffuseVelocitiesD; }
|
||||
virtual PxU32 getNbActiveDiffuseParticles() const PX_OVERRIDE PX_FINAL { return mNumActiveDiffuseParticlesH[0]; }
|
||||
virtual void setMaxActiveDiffuseParticles(PxU32 maxActiveDiffuseParticles) PX_OVERRIDE PX_FINAL;
|
||||
virtual PxU32 getMaxDiffuseParticles() const PX_OVERRIDE PX_FINAL { return mMaxNumDiffuseParticles; }
|
||||
virtual void setDiffuseParticleParams(const PxDiffuseParticleParams& params) PX_OVERRIDE PX_FINAL;
|
||||
virtual const PxDiffuseParticleParams& getDiffuseParticleParams() const PX_OVERRIDE PX_FINAL { return mParams; }
|
||||
//~PxsParticleAndDiffuseBuffer
|
||||
|
||||
void copyToHost(CUstream stream);
|
||||
|
||||
public:
|
||||
PxDiffuseParticleParams mParams;
|
||||
PxVec4* mDiffusePositionsLifeTimeD;
|
||||
PxVec4* mDiffuseVelocitiesD;
|
||||
PxI32* mNumDiffuseParticlesD;
|
||||
PxU32 mMaxNumDiffuseParticles;
|
||||
PxU32 mMaxActiveDiffuseParticles;
|
||||
PxI32* mNumActiveDiffuseParticlesH; //pinned memory
|
||||
};
|
||||
|
||||
class PxgParticleClothBuffer : public PxgParticleBufferBase<PxsParticleClothBuffer>
|
||||
{
|
||||
public:
|
||||
PxgParticleClothBuffer(PxU32 maxNumParticles, PxU32 maxNumVolumes, PxU32 maxNumCloths,
|
||||
PxU32 maxNumTriangles, PxU32 maxNumSprings, PxCudaContextManager& contextManager);
|
||||
virtual ~PxgParticleClothBuffer();
|
||||
|
||||
//PxsParticleClothBuffer
|
||||
virtual void release() PX_OVERRIDE PX_FINAL { PX_DELETE_THIS; }
|
||||
virtual PxVec4* getRestPositionsD() PX_OVERRIDE PX_FINAL { return mRestPositionsD; }
|
||||
virtual PxU32* getTrianglesD() const PX_OVERRIDE PX_FINAL { return mTriangleIndicesD; }
|
||||
virtual void setNbTriangles(PxU32 nbTriangles) PX_OVERRIDE PX_FINAL { mNumTriangles = nbTriangles; }
|
||||
virtual PxU32 getNbTriangles() const PX_OVERRIDE PX_FINAL { return mNumTriangles; }
|
||||
virtual PxU32 getNbSprings() const PX_OVERRIDE PX_FINAL { return mNumSprings; }
|
||||
virtual PxParticleSpring* getSpringsD() PX_OVERRIDE PX_FINAL { return mOrderedSpringsD; }
|
||||
virtual void setCloths(PxPartitionedParticleCloth& cloths) PX_OVERRIDE PX_FINAL;
|
||||
virtual void setNbActiveParticles(PxU32 nbActiveParticles) PX_OVERRIDE PX_FINAL;
|
||||
//~PxsParticleClothBuffer
|
||||
|
||||
void copyToHost(CUstream stream);
|
||||
|
||||
public:
|
||||
PxVec4* mRestPositionsD;
|
||||
PxU32* mTriangleIndicesD;
|
||||
|
||||
PxU32* mAccumulatedSpringsPerPartitionsD; //numPartitions;
|
||||
PxU32* mAccumulatedCopiesPerParticlesD; //numSprings
|
||||
PxU32* mRemapOutputD; //numSprings * 2
|
||||
PxParticleSpring* mOrderedSpringsD; //numSprings
|
||||
|
||||
PxU32* mSortedClothStartIndicesD; //numCloths
|
||||
|
||||
PxParticleCloth* mClothsD; //numClothes
|
||||
|
||||
PxVec4* mRemapPositionsD;
|
||||
PxVec4* mRemapVelocitiesD;
|
||||
PxReal* mSpringLambdaD;
|
||||
PxReal* mInflatableLambdaD;
|
||||
|
||||
PxU32 mMaxNumCloths;
|
||||
PxU32 mMaxNumTriangles;
|
||||
PxU32 mMaxNumSprings;
|
||||
PxU32 mNumPartitions;
|
||||
PxU32 mMaxSpringsPerPartition;
|
||||
|
||||
PxU32 mNumSprings;
|
||||
PxU32 mNumCloths;
|
||||
PxU32 mNumTriangles;
|
||||
PxU32 mRemapOutputSize;
|
||||
};
|
||||
|
||||
class PxgParticleRigidBuffer : public PxgParticleBufferBase<PxsParticleRigidBuffer>
|
||||
{
|
||||
public:
|
||||
PxgParticleRigidBuffer(PxU32 maxNumParticles, PxU32 maxNumVolumes, PxU32 maxNumRigids, PxCudaContextManager& contextManager);
|
||||
virtual ~PxgParticleRigidBuffer();
|
||||
|
||||
//PxsParticleRigidBuffer
|
||||
virtual void release() PX_OVERRIDE PX_FINAL { PX_DELETE_THIS; }
|
||||
PxU32* getRigidOffsetsD() const PX_OVERRIDE PX_FINAL { return mRigidOffsetsD; }
|
||||
PxReal* getRigidCoefficientsD() const PX_OVERRIDE PX_FINAL { return mRigidCoefficientsD; }
|
||||
PxVec4* getRigidLocalPositionsD() const PX_OVERRIDE PX_FINAL { return mRigidLocalPositionsD; }
|
||||
PxVec4* getRigidLocalNormalsD() const PX_OVERRIDE PX_FINAL { return mRigidLocalNormalsD; }
|
||||
PxVec4* getRigidTranslationsD() const PX_OVERRIDE PX_FINAL { return mRigidTranslationsD; }
|
||||
PxVec4* getRigidRotationsD() const PX_OVERRIDE PX_FINAL { return mRigidRotationsD; }
|
||||
void setNbRigids(const PxU32 nbRigids) PX_OVERRIDE PX_FINAL { mNumActiveRigids = nbRigids; }
|
||||
PxU32 getNbRigids() const PX_OVERRIDE PX_FINAL { return mNumActiveRigids; }
|
||||
//~PxsParticleRigidBuffer
|
||||
|
||||
void copyToHost(CUstream stream);
|
||||
|
||||
public:
|
||||
PxU32* mRigidOffsetsD;
|
||||
PxReal* mRigidCoefficientsD;
|
||||
PxVec4* mRigidLocalPositionsD;
|
||||
PxVec4* mRigidLocalNormalsD;
|
||||
PxVec4* mRigidTranslationsD;
|
||||
PxVec4* mRigidRotationsD;
|
||||
PxU32 mNumActiveRigids;
|
||||
PxU32 mMaxNumRigids;
|
||||
};
|
||||
|
||||
#if PX_VC
|
||||
#pragma warning(push)
|
||||
#pragma warning( disable : 4324 ) // Padding was added at the end of a structure because of a __declspec(align) value.
|
||||
#endif
|
||||
|
||||
PX_ALIGN_PREFIX(16)
|
||||
class PxgParticleSystem : public PxGpuParticleSystem
|
||||
{
|
||||
public:
|
||||
|
||||
|
||||
float4* mOriginPos_InvMass; //8 16
|
||||
PxU32* mGridParticleHash; //16 32
|
||||
|
||||
|
||||
float4* mAccumDeltaP; //32 80
|
||||
//float4* mDeltaP; //32 80
|
||||
float4* mSortedDeltaP; //32 80
|
||||
PxU32* mCellStart; //36 88
|
||||
PxU32* mCellEnd; //40 96
|
||||
PxgParticleCollisionHeader* mCollisionHeaders; //44 104
|
||||
|
||||
float2* mDensityCollisionImpulses;
|
||||
PxgParticleContactInfo* mOneWayContactInfos; //56 128
|
||||
float2* mOneWayForces; //60
|
||||
PxNodeIndex* mOneWayNodeIndex; //64 136
|
||||
float4* mOneWaySurfaceVelocity; //68 144
|
||||
PxU32* mOneWayContactCount; //72 152
|
||||
|
||||
float4* mRestArray; //84
|
||||
|
||||
PxgParticleSystemData mData;
|
||||
|
||||
PxReal* mDensity;
|
||||
PxReal* mStaticDensity;
|
||||
float4* mSurfaceNormal;
|
||||
float4* mDelta;
|
||||
float4* mCurl;
|
||||
|
||||
// Normals
|
||||
float4* mNormalArray;
|
||||
|
||||
float4* mSortedOriginPos_InvMass;
|
||||
|
||||
PxgParticleSimBuffer* mParticleSimBuffers;
|
||||
PxU32* mParticleBufferRunsum;
|
||||
PxU32* mParticleBufferSortedUniqueIds;
|
||||
PxU32* mParticleBufferSortedUniqueIdsOriginalIndex;
|
||||
|
||||
PxgParticleClothSimBuffer* mClothSimBuffers;
|
||||
PxgParticleRigidSimBuffer* mRigidSimBuffers;
|
||||
PxgParticleDiffuseSimBuffer* mDiffuseSimBuffers;
|
||||
|
||||
PxU32* mAttachmentRunsum;
|
||||
|
||||
PxU32 mNumClothBuffers;
|
||||
PxU32 mNumRigidBuffers;
|
||||
PxU32 mNumDiffuseBuffers;
|
||||
PxU32 mNumRigidAttachments;
|
||||
PxU32 mRigidAttachmentOffset;
|
||||
|
||||
// Diffuse particles
|
||||
int* mNumDiffuseParticles;
|
||||
float4* mDiffusePosition_LifeTime;
|
||||
float4* mDiffuseVelocity;
|
||||
float2* mDiffusePotentials;
|
||||
PxU32* mDiffuseCellStart;
|
||||
PxU32* mDiffuseCellEnd;
|
||||
PxU32* mDiffuseGridParticleHash;
|
||||
float4* mDiffuseOriginPos_LifeTime;
|
||||
float4* mDiffuseSortedPos_LifeTime;
|
||||
float4* mDiffuseSortedOriginPos_LifeTime;
|
||||
float4* mDiffuseSortedVel;
|
||||
//GPU pointer to the mapping from sorted particle ID to unsorted particle ID
|
||||
PxU32* mDiffuseSortedToUnsortedMapping;
|
||||
//GPU pointer to the mapping from unsortedParticle ID to sorted particle ID
|
||||
PxU32* mDiffuseUnsortedToSortedMapping;
|
||||
PxgParticleContactInfo* mDiffuseOneWayContactInfos;
|
||||
float* mDiffuseOneWayForces;
|
||||
PxNodeIndex* mDiffuseOneWayNodeIndex;
|
||||
PxU32* mDiffuseOneWayContactCount;
|
||||
|
||||
|
||||
PxgPBDParticleMaterialDerived* mDerivedPBDMaterialData;
|
||||
PxU32 mParticleMaterialStride;
|
||||
|
||||
PxU16* mPhaseGroupToMaterialHandle;
|
||||
|
||||
}PX_ALIGN_SUFFIX(16);
|
||||
|
||||
#if PX_VC
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
class PxgParticleSystemBuffer
|
||||
{
|
||||
public:
|
||||
|
||||
PxgParticleSystemBuffer(PxgHeapMemoryAllocatorManager* heapMemoryManager);
|
||||
|
||||
PxgTypedCudaBuffer<PxVec4> originalPosition_mass; //we should be able to get rid of this buffer
|
||||
PxgTypedCudaBuffer<PxU32> grid_particle_hash;
|
||||
PxgTypedCudaBuffer<PxU32> grid_particle_index;
|
||||
PxgTypedCudaBuffer<PxVec4> sorted_position_mass;
|
||||
PxgTypedCudaBuffer<PxVec4> sorted_velocity;
|
||||
PxgTypedCudaBuffer<PxVec4> accumDeltaV;
|
||||
PxgTypedCudaBuffer<PxVec4> sortedDeltaP;
|
||||
PxgTypedCudaBuffer<PxU32> cell_start;
|
||||
PxgTypedCudaBuffer<PxU32> cell_end;
|
||||
PxgTypedCudaBuffer<PxgParticleCollisionHeader> collision_headers;
|
||||
PxgTypedCudaBuffer<PxU32> collision_counts;
|
||||
PxgTypedCudaBuffer<PxU32> collision_index;
|
||||
PxgTypedCudaBuffer<float2> collision_impulses;
|
||||
|
||||
PxgTypedCudaBuffer<PxU32> phases;
|
||||
PxgTypedCudaBuffer<float4> unsortedpositions;
|
||||
PxgTypedCudaBuffer<float4> unsortedvelocities;
|
||||
PxgTypedCudaBuffer<float4> restArray;
|
||||
PxgTypedCudaBuffer<float4> normal;
|
||||
|
||||
PxgTypedCudaBuffer<PxReal> density;
|
||||
PxgTypedCudaBuffer<PxReal> staticDensity;
|
||||
PxgTypedCudaBuffer<PxVec4> surfaceNormal;
|
||||
|
||||
PxgTypedCudaBuffer<PxVec4> delta;
|
||||
PxgTypedCudaBuffer<PxVec4> curl;
|
||||
|
||||
PxgTypedCudaBuffer<float4> sorted_originalPosition_mass;
|
||||
PxgTypedCudaBuffer<PxU32> sortedPhaseArray;
|
||||
|
||||
PxgTypedCudaBuffer<PxgParticleContactInfo> particleOneWayContacts;
|
||||
PxgTypedCudaBuffer<float2> particleOneWayForces;
|
||||
PxgTypedCudaBuffer<PxNodeIndex> particleOneWayContactsNodeIndices;
|
||||
PxgTypedCudaBuffer<float4> particleOneWayContactsSurfaceVelocities;
|
||||
PxgTypedCudaBuffer<PxU32> particleOneWayContactCount;
|
||||
|
||||
PxgTypedCudaBuffer<PxU32> reverseLookup;
|
||||
|
||||
PxgTypedCudaBuffer<PxU16> phase_group_to_material_handle;
|
||||
PxgTypedCudaBuffer<PxgPBDParticleMaterialDerived> derivedPBDMaterialProperties;
|
||||
|
||||
PxgTypedCudaBuffer<PxgParticleSimBuffer> user_particle_buffer; //PxgPBDParticleBuffer
|
||||
PxgTypedCudaBuffer<PxU32> user_particle_buffer_runsum; //PxU32*
|
||||
PxgTypedCudaBuffer<PxU32> user_particle_buffer_sorted_unique_ids; //PxU32*
|
||||
PxgTypedCudaBuffer<PxU32> user_particle_buffer_runsum_sorted_unique_ids_original_index; //PxU32*
|
||||
PxgTypedCudaBuffer<PxgParticleClothSimBuffer> user_cloth_buffer;
|
||||
PxgTypedCudaBuffer<PxgParticleRigidSimBuffer> user_rigid_buffer;
|
||||
PxgTypedCudaBuffer<PxgParticleDiffuseSimBuffer> user_diffuse_buffer;
|
||||
|
||||
PxgTypedCudaBuffer<PxU32> attachmentRunSum;
|
||||
PxgTypedCudaBuffer<PxU32> referencedRigidsRunsum;
|
||||
|
||||
PxPinnedArray<PxgParticleSimBuffer> mHostParticleBuffers;
|
||||
PxPinnedArray<PxgParticleClothSimBuffer> mHostClothBuffers;
|
||||
PxPinnedArray<PxgParticleRigidSimBuffer> mHostRigidBuffers;
|
||||
PxPinnedArray<PxgParticleDiffuseSimBuffer> mHostDiffuseBuffers;
|
||||
|
||||
PxInt32ArrayPinned mAttachmentRunSum;
|
||||
PxInt32ArrayPinned mParticleBufferRunSum;
|
||||
PxInt32ArrayPinned mReferencedRigidsRunsum;
|
||||
|
||||
PxInt32ArrayPinned mParticleBufferSortedUniqueIds;
|
||||
PxInt32ArrayPinned mParticleBufferSortedUniqueIdsOriginalIndex;
|
||||
PxFloatArrayPinned mRandomTable;
|
||||
PxInt16ArrayPinned mHostPhaseGroupToMaterialHandle;
|
||||
};
|
||||
|
||||
class PxgParticleSystemDiffuseBuffer
|
||||
{
|
||||
public:
|
||||
PxgParticleSystemDiffuseBuffer(PxgHeapMemoryAllocatorManager* heapMemoryManager);
|
||||
|
||||
// Diffuse particle data
|
||||
PxgTypedCudaBuffer<PxVec4> diffuse_positions;
|
||||
PxgTypedCudaBuffer<PxVec4> diffuse_velocities;
|
||||
PxgTypedCudaBuffer<PxVec4> diffuse_potentials;
|
||||
PxgTypedCudaBuffer<PxU32> diffuse_cell_start;
|
||||
PxgTypedCudaBuffer<PxU32> diffuse_cell_end;
|
||||
PxgTypedCudaBuffer<PxU32> diffuse_grid_particle_hash;
|
||||
PxgTypedCudaBuffer<PxU32> diffuse_sorted_to_unsorted_mapping;
|
||||
PxgTypedCudaBuffer<PxU32> diffuse_unsorted_to_sorted_mapping;
|
||||
PxgTypedCudaBuffer<PxVec4> diffuse_origin_pos_life_time;
|
||||
PxgTypedCudaBuffer<PxVec4> diffuse_sorted_pos_life_time;
|
||||
PxgTypedCudaBuffer<PxVec4> diffuse_sorted_origin_pos_life_time;
|
||||
PxgTypedCudaBuffer<PxVec4> diffuse_sorted_vel;
|
||||
PxgTypedCudaBuffer<PxgParticleContactInfo> diffuse_one_way_contacts;
|
||||
PxgTypedCudaBuffer<PxReal> diffuse_one_way_forces;
|
||||
PxgTypedCudaBuffer<PxNodeIndex> diffuse_one_way_contacts_node_indices;
|
||||
PxgTypedCudaBuffer<PxU32> diffuse_one_way_contact_count;
|
||||
PxgTypedCudaBuffer<PxU32> diffuse_particle_count;
|
||||
};
|
||||
|
||||
|
||||
PX_ALIGN_PREFIX(16)
|
||||
struct PxgParticlePrimitiveContact
|
||||
{
|
||||
public:
|
||||
PxVec4 normal_pen; //normal pen
|
||||
|
||||
PxU64 rigidId; //the corresponding rigid body node index
|
||||
PxU64 particleId; //particle index
|
||||
}PX_ALIGN_SUFFIX(16);
|
||||
|
||||
|
||||
struct PxgParticleContactWriter
|
||||
{
|
||||
PxgParticlePrimitiveContact* PX_RESTRICT particleContacts;
|
||||
PxU32* numTotalContacts;
|
||||
PxU64* contactSortedByParticle;
|
||||
PxU32* tempContactByParticle;
|
||||
PxU32* contactIndexSortedByParticle;
|
||||
PxNodeIndex* contactByRigid;
|
||||
PxU32* tempContactByRigid;
|
||||
PxU32* contactIndexSortedByRigid;
|
||||
PxU32 maxContacts;
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE void writeContact(PxU32 index, const PxVec4& normalPen,
|
||||
PxU64 compressedParticleIndex, PxNodeIndex rigidId)
|
||||
{
|
||||
if (index < maxContacts)
|
||||
{
|
||||
contactByRigid[index] = rigidId;
|
||||
tempContactByRigid[index] = PxU32(rigidId.getInd() & 0xffffffff);
|
||||
contactIndexSortedByRigid[index] = index;
|
||||
|
||||
contactSortedByParticle[index] = compressedParticleIndex;
|
||||
tempContactByParticle[index] = PxGetParticleIndex(compressedParticleIndex);
|
||||
contactIndexSortedByParticle[index] = index;
|
||||
|
||||
PxgParticlePrimitiveContact& contact = particleContacts[index];
|
||||
contact.normal_pen = normalPen;
|
||||
contact.rigidId = rigidId.getInd();
|
||||
contact.particleId = compressedParticleIndex;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
334
engine/third_party/physx/source/gpusimulationcontroller/include/PxgParticleSystemCore.h
vendored
Normal file
334
engine/third_party/physx/source/gpusimulationcontroller/include/PxgParticleSystemCore.h
vendored
Normal file
@@ -0,0 +1,334 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_PARTICLE_SYSTEM_CORE_H
|
||||
#define PXG_PARTICLE_SYSTEM_CORE_H
|
||||
|
||||
#include "foundation/PxArray.h"
|
||||
#include "foundation/PxPinnedArray.h"
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
|
||||
#include "PxSparseGridParams.h"
|
||||
|
||||
#include "cudamanager/PxCudaTypes.h"
|
||||
|
||||
#include "PxgCudaBuffer.h"
|
||||
#include "PxgNarrowphaseCore.h"
|
||||
#include "PxgNonRigidCoreCommon.h"
|
||||
#include "PxgParticleSystem.h"
|
||||
#include "PxgRadixSortDesc.h"
|
||||
#include "PxgSimulationCoreDesc.h"
|
||||
|
||||
#include <vector_types.h>
|
||||
|
||||
namespace physx
|
||||
{
|
||||
//this is needed to force PhysXSimulationControllerGpu linkage as Static Library!
|
||||
void createPxgParticleSystem();
|
||||
|
||||
class PxgBodySimManager;
|
||||
class PxgEssentialCore;
|
||||
class PxgCudaKernelWranglerManager;
|
||||
class PxCudaContextManager;
|
||||
class PxgHeapMemoryAllocatorManager;
|
||||
class PxgSimulationController;
|
||||
class PxgGpuContext;
|
||||
class PxBounds3;
|
||||
|
||||
struct PxgShapeDescBuffer;
|
||||
|
||||
struct PxgParticlePrimitiveConstraintBlock
|
||||
{
|
||||
float4 raXn_velMultiplierW[32];
|
||||
float4 normal_errorW[32];
|
||||
//Friction tangent + invMass of the rigid body (avoids needing to read the mass)
|
||||
//Second tangent can be found by cross producting normal with fricTan0
|
||||
float4 fricTan0_invMass0[32];
|
||||
float4 raXnF0_velMultiplierW[32];
|
||||
float4 raXnF1_velMultiplierW[32];
|
||||
PxU64 rigidId[32];
|
||||
PxU64 particleId[32];
|
||||
};
|
||||
|
||||
class PxgParticleSystemCore : public PxgNonRigidCore
|
||||
{
|
||||
public:
|
||||
PxgParticleSystemCore(PxgCudaKernelWranglerManager* gpuKernelWrangler, PxCudaContextManager* cudaContextManager,
|
||||
PxgHeapMemoryAllocatorManager* heapMemoryManager, PxgSimulationController* simController,
|
||||
PxgGpuContext* gpuContext, PxU32 maxParticleContacts);
|
||||
virtual ~PxgParticleSystemCore();
|
||||
|
||||
virtual void preIntegrateSystems(const PxU32 nbActiveParticleSystems, const PxVec3 gravity, const PxReal dt) = 0;
|
||||
virtual void updateBounds(PxgParticleSystem* particleSystems, PxU32* activeParticleSystems, const PxU32 nbActiveParticleSystems);
|
||||
virtual void updateGrid() = 0;
|
||||
virtual void selfCollision() = 0;
|
||||
void resetContactCounts();
|
||||
void sortContacts(const PxU32 nbActiveParticleSystems);
|
||||
|
||||
|
||||
virtual void gpuMemDmaUpParticleSystem(PxgBodySimManager& bodySimManager, CUstream stream) = 0;
|
||||
|
||||
virtual void constraintPrep(CUdeviceptr prePrepDescd, CUdeviceptr prepDescd, CUdeviceptr solverCoreDescd, CUdeviceptr sharedDescd,
|
||||
const PxReal dt, CUstream solverStream, bool isTGS, PxU32 numSolverBodies) = 0;
|
||||
virtual void solve(CUdeviceptr prePrepDescd, CUdeviceptr solverCoreDescd,
|
||||
CUdeviceptr sharedDescd, CUdeviceptr artiCoreDescd, const PxReal dt, CUstream solverStream) = 0;
|
||||
virtual void solveTGS(CUdeviceptr prePrepDescd, CUdeviceptr solverCoreDescd,
|
||||
CUdeviceptr sharedDescd, CUdeviceptr artiCoreDescd, const PxReal dt, const PxReal totalInvDt, CUstream solverStream,
|
||||
const bool isVelocityIteration, PxI32 iterationIndex, PxI32 numTGSIterations, PxReal coefficient) = 0;
|
||||
|
||||
virtual void integrateSystems(const PxReal dt, const PxReal epsilonSq) = 0;
|
||||
virtual void onPostSolve() = 0;
|
||||
|
||||
virtual void getMaxIterationCount(PxgBodySimManager& bodySimManager, PxI32& maxPosIters, PxI32& maxVelIters) = 0;
|
||||
|
||||
void updateParticleSystemData(PxgParticleSystem& sys, Dy::ParticleSystemCore& dyParticleSystemCore);
|
||||
//void updateSparseGridParams(PxSparseGridParams& params, Dy::ParticleSystemCore& dyParticleSystemCore);
|
||||
|
||||
PxgTypedCudaBuffer<PxgParticleSystem>& getParticleSystemBuffer() { return mParticleSystemBuffer; }
|
||||
PxgTypedCudaBuffer<PxU32>& getActiveParticleSystemBuffer() { return mActiveParticleSystemBuffer; }
|
||||
PxgCudaBuffer& getTempCellsHistogram() { return mTempCellsHistogramBuf; }
|
||||
PxgTypedCudaBuffer<PxU32>& getTempBlockCellsHistogram() { return mTempBlockCellsHistogramBuf; }
|
||||
PxgTypedCudaBuffer<PxU32>& getTempHistogramCount() { return mTempHistogramCountBuf; }
|
||||
|
||||
PxgTypedCudaBuffer<PxgParticlePrimitiveContact>& getParticleContacts() { return mPrimitiveContactsBuf; }
|
||||
PxgTypedCudaBuffer<PxU32>& getParticleContactCount() { return mPrimitiveContactCountBuf; }
|
||||
|
||||
CUstream getStream() { return mStream; }
|
||||
CUstream getFinalizeStream() { return mFinalizeStream; }
|
||||
|
||||
CUevent getBoundsUpdatedEvent() { return mBoundUpdateEvent;}
|
||||
|
||||
PxgDevicePointer<float4> getDeltaVelParticle() { return mDeltaVelParticleBuf.getTypedDevicePtr(); }
|
||||
|
||||
virtual void updateParticles(const PxReal dt) = 0;
|
||||
|
||||
virtual void finalizeVelocities(const PxReal dt, const PxReal scale) = 0;
|
||||
|
||||
virtual void releaseParticleSystemDataBuffer() = 0;
|
||||
|
||||
void gpuDMAActiveParticleIndices(const PxU32* activeParticleSystems, const PxU32 numActiveParticleSystems, CUstream stream);
|
||||
|
||||
PX_FORCE_INLINE PxU32 getMaxParticles() { return mMaxParticles; }
|
||||
|
||||
PxU32 getHostContactCount() { return *mHostContactCount; }
|
||||
|
||||
PxPinnedArray<PxgParticleSystem> mNewParticleSystemPool; //record the newly created particle system
|
||||
PxPinnedArray<PxgParticleSystem> mParticleSystemPool; //persistent cpu mirror
|
||||
|
||||
PxArray<PxU32> mNewParticleSystemNodeIndexPool;
|
||||
PxArray<PxU32> mParticleSystemNodeIndexPool;
|
||||
|
||||
PxInt32ArrayPinned mDirtyParamsParticleSystems;
|
||||
|
||||
protected:
|
||||
|
||||
void releaseInternalParticleSystemDataBuffer();
|
||||
|
||||
void getMaxIterationCount(PxgBodySimManager& bodySimManager, const PxU32 nbActiveParticles, const PxU32* activeParticles, PxI32& maxPosIters, PxI32& maxVelIters);
|
||||
|
||||
void updateGrid(PxgParticleSystem* particleSystems, const PxU32* activeParticleSystems, const PxU32 nbActiveParticles,
|
||||
CUdeviceptr particleSystemsd);
|
||||
|
||||
|
||||
void copyUserBufferToUnsortedArray(CUdeviceptr particleSystemsd, CUdeviceptr activeParticleSystemsd,
|
||||
const PxU32 nbActiveParticle, CUstream bpStream);
|
||||
|
||||
void copyUnsortedArrayToUserBuffer(CUdeviceptr particleSystemsd, CUdeviceptr activeParticleSystemsd, const PxU32 nbActiveParticles);
|
||||
|
||||
void copyUserBufferDataToHost(PxgParticleSystem* particleSystems, PxU32* activeParticleSystems, PxU32 nbActiveParticleSystems);
|
||||
|
||||
void copyUserDiffuseBufferToUnsortedArray(CUdeviceptr particleSystemsd, CUdeviceptr activeParticleSystemsd,
|
||||
const PxU32 nbActiveParticle, CUstream bpStream);
|
||||
|
||||
//integrate particle position based on gravity
|
||||
void preIntegrateSystem(CUdeviceptr particleSystemsd, CUdeviceptr activeParticleSystemsd, const PxU32 nbActiveParticles, const PxVec3 gravity,
|
||||
const PxReal dt, CUstream bpStream);
|
||||
|
||||
// calculate particle system's world bound
|
||||
void updateBound(const PxgParticleSystem& sys, PxgParticleSystem* particleSystems,
|
||||
PxBounds3* boundArray, PxReal* contactDists, CUstream bpStream);
|
||||
|
||||
// calculate grid hash
|
||||
void calculateHash(CUdeviceptr particleSystemsd, CUdeviceptr activeParticleSystemsd, const PxU32 numActiveParticleSystems);
|
||||
|
||||
// reorder particle arrays into sorted order and
|
||||
// find start and end of each cell
|
||||
void reorderDataAndFindCellStart(PxgParticleSystem* particleSystems, CUdeviceptr particleSystemsd, const PxU32 id, const PxU32 numParticles);
|
||||
|
||||
void selfCollision(PxgParticleSystem& particleSystem, PxgParticleSystem* particleSystemsd, const PxU32 id, const PxU32 numParticles);
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
//These method are using the particle stream
|
||||
virtual void prepParticleConstraint(CUdeviceptr prePrepDescd, CUdeviceptr prepDescd, CUdeviceptr sharedDescd, bool isTGS, const PxReal dt);
|
||||
|
||||
//void solveSelfCollision(PxgParticleSystem& particleSystem, PxgParticleSystem* particleSystemsd, const PxU32 id, const PxU32 numParticles, PxReal dt);
|
||||
|
||||
void applyDeltas(CUdeviceptr particleSystemd, CUdeviceptr activeParticleSystemd, const PxU32 nbActivParticleSystem, const PxReal dt, CUstream stream);
|
||||
/*void solveSprings(CUdeviceptr particleSystemsd, CUdeviceptr activeParticleSystemsd,
|
||||
const PxU32 nbActiveParticleSystems, const PxReal dt, bool isTGS);
|
||||
*/
|
||||
void solveOneWayCollision(CUdeviceptr particleSystemsd, CUdeviceptr activeParticleSystemsd,
|
||||
const PxU32 nbActiveParticleSystems, const PxReal dt, const PxReal biasCoefficient, const bool isVelocityIteration);
|
||||
|
||||
void updateSortedVelocity(CUdeviceptr particleSystemsd, CUdeviceptr activeParticleSystemsd,
|
||||
const PxU32 nbActiveParticleSystems, const PxReal dt, const bool skipNewPositionAdjustment = false);
|
||||
|
||||
void stepParticleSystems(CUdeviceptr particleSystemsd, CUdeviceptr activeParticleSystemsd, const PxU32 nbActiveParticleSystems,
|
||||
const PxReal dt, const PxReal totalInvDt, bool isParticleSystem);
|
||||
|
||||
//-------------------------------------------------------------------------------
|
||||
//These method are using the solverStream
|
||||
void prepPrimitiveConstraint(CUdeviceptr prePrepDescd, CUdeviceptr prepDescd, CUdeviceptr sharedDescd,
|
||||
const PxReal dt, bool isTGS, CUstream solverStream);
|
||||
|
||||
//this is for solving contacts between particles and primitives based on sorted by rigid id
|
||||
void solvePrimitiveCollisionForParticles(CUdeviceptr prePrepDescd, CUdeviceptr solverCoreDescd,
|
||||
CUdeviceptr sharedDescd, CUdeviceptr artiCoreDescd, const PxReal dt, bool isTGS, const PxReal coefficient,
|
||||
bool isVelIteration);
|
||||
|
||||
void solvePrimitiveCollisionForRigids(CUdeviceptr prePrepDescd, CUdeviceptr solverCoreDescd,
|
||||
CUdeviceptr sharedDescd, CUdeviceptr artiCoreDescd, CUstream solverStream, const PxReal dt, bool isTGS, const PxReal coefficient,
|
||||
bool isVelIteration);
|
||||
|
||||
void accumulateRigidDeltas(CUdeviceptr prePrepDescd, CUdeviceptr solverCoreDescd,
|
||||
CUdeviceptr sharedDescd, CUdeviceptr artiCoreDescd, CUdeviceptr rigidIdsd, CUdeviceptr numIdsd, CUstream stream,
|
||||
const bool isTGS);
|
||||
|
||||
void prepRigidAttachments(CUdeviceptr prePrepDescd, CUdeviceptr prepDescd, bool isTGS, const PxReal dt, CUstream stream,
|
||||
const PxU32 nbActiveParticleSystems, CUdeviceptr activeParticleSystemsd, PxU32 numSolverBodies);
|
||||
|
||||
void solveRigidAttachments(CUdeviceptr prePrepDescd, CUdeviceptr solverCoreDescd,
|
||||
CUdeviceptr sharedDescd, CUdeviceptr artiCoreDescd, CUstream solverStream, const PxReal dt,
|
||||
const bool isTGS, const PxReal biasCoefficient, const bool isVelocityIteration, CUdeviceptr particleSystemd, CUdeviceptr activeParticleSystemd, const PxU32 nbActiveParticleSystems);
|
||||
|
||||
//integrate particle position and velocity based on contact constraints
|
||||
void integrateSystem(CUdeviceptr particleSystemsd, CUdeviceptr activeParticleSystemd, const PxU32 nbActiveParticleSystems, const PxReal dt, const PxReal epsilonSq);
|
||||
|
||||
PxgTypedCudaBuffer<PxgParticleSystem> mParticleSystemBuffer; //persistent buffer for particle system
|
||||
PxgTypedCudaBuffer<PxU32> mActiveParticleSystemBuffer;
|
||||
|
||||
PxgTypedCudaBuffer<PxU32> mTempGridParticleHashBuf;
|
||||
PxgTypedCudaBuffer<PxU32> mTempGridParticleIndexBuf;
|
||||
PxgTypedCudaBuffer<PxU32> mTempGridDiffuseParticleHashBuf;
|
||||
PxgTypedCudaBuffer<PxU32> mTempGridDiffuseParticleIndexBuf;
|
||||
|
||||
PxgCudaBuffer mTempCellsHistogramBuf;
|
||||
PxgTypedCudaBuffer<PxU32> mTempBlockCellsHistogramBuf;
|
||||
PxgTypedCudaBuffer<PxU32> mTempHistogramCountBuf;
|
||||
PxgTypedCudaBuffer<PxBounds3> mTempBoundsBuf;
|
||||
|
||||
//-----------------------buffer for primitive vs particles contacts
|
||||
PxgTypedCudaBuffer<PxgParticlePrimitiveContact> mPrimitiveContactsBuf;
|
||||
PxgTypedCudaBuffer<PxU32> mPrimitiveContactCountBuf;
|
||||
|
||||
PxgTypedCudaBuffer<PxgParticlePrimitiveContact> mPrimitiveContactSortedByParticleBuf;
|
||||
PxgTypedCudaBuffer<PxgParticlePrimitiveContact> mPrimitiveContactSortedByRigidBuf;
|
||||
|
||||
PxgTypedCudaBuffer<PxgParticlePrimitiveConstraintBlock> mPrimitiveConstraintSortedByParticleBuf;
|
||||
PxgTypedCudaBuffer<PxgParticlePrimitiveConstraintBlock> mPrimitiveConstraintSortedByRigidBuf;
|
||||
|
||||
PxgTypedCudaBuffer<float2> mPrimitiveConstraintAppliedParticleForces;
|
||||
PxgTypedCudaBuffer<float2> mPrimitiveConstraintAppliedRigidForces;
|
||||
|
||||
PxgTypedCudaBuffer<float4> mDeltaVelParticleBuf;
|
||||
|
||||
PxgTypedCudaBuffer<float4> mDeltaVelRigidBuf;
|
||||
PxgTypedCudaBuffer<PxVec4> mTempBlockDeltaVelBuf;
|
||||
PxgTypedCudaBuffer<PxU64> mTempBlockRigidIdBuf;
|
||||
|
||||
PxgTypedCudaBuffer<PxgParticleRigidConstraint> mParticleRigidConstraints;
|
||||
PxgTypedCudaBuffer<PxU64> mParticleRigidAttachmentIds;
|
||||
PxgTypedCudaBuffer<PxU32> mParticleRigidConstraintCount;
|
||||
PxgTypedCudaBuffer<PxReal> mParticleRigidAttachmentScaleBuffer;
|
||||
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
CUstream mFinalizeStream;
|
||||
CUevent mFinalizeStartEvent;
|
||||
CUevent mBoundUpdateEvent;//this event is used to synchronize the broad phase stream(updateBound is running on broad phase stream) and mStream
|
||||
CUevent mSolveParticleEvent; //this event is used to synchronize solve particle/particle, paricle/rigid and solver rigid/particle
|
||||
CUevent mSelfCollisionEvent;
|
||||
CUevent mSolveParticleRigidEvent;
|
||||
CUevent mSolveRigidParticleEvent;
|
||||
PxU32 mCurrentTMIndex; //current temp marker buffer index
|
||||
|
||||
PxgCudaBuffer mHasFlipPhase;
|
||||
|
||||
|
||||
PxArray<PxgParticleSystemBuffer*> mParticleSystemDataBuffer; //persistent data, map with mParticleSystemBuffer
|
||||
|
||||
|
||||
// Diffuse particles
|
||||
PxPinnedArray<PxgRadixSortBlockDesc> mDiffuseParticlesRSDesc;
|
||||
|
||||
PxVec3 mGravity; //this get set in preIntegrateSystems
|
||||
|
||||
PxU32 mNbTotalParticleSystems;
|
||||
PxU32 mMaxParticles;
|
||||
bool mHasNonZeroFluidBoundaryScale;
|
||||
PxU32 mMaxParticlesPerBuffer;
|
||||
PxU32 mMaxBuffersPerSystem;
|
||||
PxU32 mMaxDiffusePerBuffer;
|
||||
PxU32 mMaxDiffuseBuffersPerSystem;
|
||||
PxU32 mMaxRigidAttachmentsPerSystem;
|
||||
PxU32 mTotalRigidAttachments;
|
||||
PxU32* mHostContactCount;
|
||||
|
||||
|
||||
friend class PxgSoftBodyCore;
|
||||
};
|
||||
|
||||
class PxgDiffuseParticleCore
|
||||
{
|
||||
public:
|
||||
PxgDiffuseParticleCore(PxgEssentialCore* core);
|
||||
|
||||
virtual ~PxgDiffuseParticleCore();
|
||||
|
||||
void releaseInternalDiffuseParticleDataBuffer();
|
||||
|
||||
void preDiffuseIntegrateSystem(CUdeviceptr particleSystemsd, CUdeviceptr activeParticleSystemsd, const PxU32 nbActiveParticles, const PxVec3 gravity,
|
||||
const PxReal dt, CUstream bpStream);
|
||||
|
||||
PxgEssentialCore* mEssentialCore;
|
||||
PxgCudaBuffer mDiffuseParticlesRandomTableBuf;
|
||||
|
||||
PxArray<PxgParticleSystemDiffuseBuffer*> mDiffuseParticleDataBuffer; //persistent data
|
||||
PxU32 mRandomTableSize;
|
||||
PxU32 mMaxDiffuseParticles; //the max number of diffuse particles
|
||||
|
||||
protected:
|
||||
void resizeDiffuseParticleParticleBuffers(PxgParticleSystem& particleSystem, PxgParticleSystemDiffuseBuffer* buffer, const PxU32 numParticles);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,68 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_PARTICLE_SYSTEM_CORE_KERNEL_INDICES_H
|
||||
#define PXG_PARTICLE_SYSTEM_CORE_KERNEL_INDICES_H
|
||||
|
||||
namespace physx
|
||||
{
|
||||
|
||||
struct PxgParticleSystemKernelBlockDim
|
||||
{
|
||||
enum
|
||||
{
|
||||
UPDATEBOUND = 1024, //can't change this. updateBound kernel is relied on numOfWarpPerBlock = 32
|
||||
UPDATEGRID = 1024,
|
||||
BOUNDCELLUPDATE = 512,
|
||||
PS_COLLISION = 256, //128,
|
||||
PS_MESH_COLLISION = 512,
|
||||
PS_HEIGHTFIELD_COLLISION = 64,
|
||||
ACCUMULATE_DELTA = 512,
|
||||
PS_SOLVE = 256,
|
||||
PS_CELL_RECOMPUTE = 256,
|
||||
PS_INFLATABLE = 256,
|
||||
PS_SOLVE_SHAPE = 64,
|
||||
SCAN = 512
|
||||
};
|
||||
};
|
||||
|
||||
struct PxgParticleSystemKernelGridDim
|
||||
{
|
||||
enum
|
||||
{
|
||||
BOUNDCELLUPDATE = 32,
|
||||
PS_COLLISION = 1024,
|
||||
PS_MESH_COLLISION = 16384,
|
||||
PS_HEIGHTFIELD_COLLISION = 4096,
|
||||
ACCUMULATE_DELTA = 32,
|
||||
PS_CELL_RECOMPUTE = 32,
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
101
engine/third_party/physx/source/gpusimulationcontroller/include/PxgRadixSortCore.h
vendored
Normal file
101
engine/third_party/physx/source/gpusimulationcontroller/include/PxgRadixSortCore.h
vendored
Normal file
@@ -0,0 +1,101 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_RADIX_SORT_CORE_H
|
||||
#define PXG_RADIX_SORT_CORE_H
|
||||
|
||||
#include "PxgCudaBuffer.h"
|
||||
#include "PxgEssentialCore.h"
|
||||
#include "PxgKernelWrangler.h"
|
||||
#include "PxgRadixSortDesc.h"
|
||||
|
||||
#include "cudamanager/PxCudaTypes.h"
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
#include "foundation/PxPinnedArray.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
class PxCudaContext;
|
||||
class PxgCudaKernelWranglerManager;
|
||||
|
||||
class PxgRadixSortCore
|
||||
{
|
||||
public:
|
||||
PxPinnedArray<PxgRadixSortBlockDesc> mRSDesc;
|
||||
PxgCudaBufferN<2> mRadixSortDescBuf; //radix sort with rank
|
||||
PxgCudaBuffer mRadixCountTotalBuf;
|
||||
PxU32 mRadixCountSize;
|
||||
PxgEssentialCore* mEssentialCore;
|
||||
|
||||
PxgRadixSortCore(PxgEssentialCore* core);
|
||||
|
||||
void allocate(PxU32 nbRequired = 1);
|
||||
|
||||
static void updateGPURadixSortDesc(PxCudaContext* cudaContext, const CUstream& stream, CUdeviceptr inputKeyd, CUdeviceptr inputRankd,
|
||||
CUdeviceptr outputKeyd, CUdeviceptr outputRankd, CUdeviceptr radixCountd, PxgRadixSortDesc* rsDescs,
|
||||
CUdeviceptr radixSortDescBuf0, CUdeviceptr radixSortDescBuf1, const PxU32 count);
|
||||
|
||||
static void sort(PxgCudaKernelWranglerManager* gpuKernelWranglerManager, PxCudaContext*cudaContext, const CUstream& stream,
|
||||
const PxU32 numOfKeys, PxgCudaBuffer* radixSortDescBuf, const PxU32 numBits, PxgRadixSortDesc* rsDescs);
|
||||
static void sort(PxgCudaKernelWranglerManager* gpuKernelWranglerManager, PxCudaContext*cudaContext, const CUstream& stream,
|
||||
PxgCudaBuffer* radixSortDescBuf, const PxU32 numBits);
|
||||
|
||||
|
||||
static PX_FORCE_INLINE PxI32 getNbBits(PxI32 x)
|
||||
{
|
||||
PxI32 n = 0;
|
||||
while (x >= 2)
|
||||
{
|
||||
++n;
|
||||
x /= 2;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
void sort(CUdeviceptr inputKeyd, CUdeviceptr inputRankd, CUdeviceptr outputKeyd, CUdeviceptr outputRankd, const PxU32 numOfKeys, const PxU32 numBits, const CUstream& stream, PxU32 id = 0)
|
||||
{
|
||||
PxgRadixSortDesc* rsDescs = &mRSDesc[id * 2];
|
||||
|
||||
updateGPURadixSortDesc(mEssentialCore->mCudaContext, stream, inputKeyd, inputRankd, outputKeyd, outputRankd, mRadixCountTotalBuf.getDevicePtr(), rsDescs,
|
||||
mRadixSortDescBuf[0].getDevicePtr(), mRadixSortDescBuf[1].getDevicePtr(), numOfKeys);
|
||||
|
||||
sort(mEssentialCore->mGpuKernelWranglerManager, mEssentialCore->mCudaContext, stream, numOfKeys, mRadixSortDescBuf.begin(), numBits, rsDescs);
|
||||
}
|
||||
|
||||
void sort(CUdeviceptr inputKeyd, CUdeviceptr inputRankd, CUdeviceptr outputKeyd, CUdeviceptr outputRankd, const PxU32 numOfKeys, const PxU32 numBits, PxU32 id = 0)
|
||||
{
|
||||
sort(inputKeyd, inputRankd, outputKeyd, outputRankd, numOfKeys, numBits, mEssentialCore->mStream, id);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
146
engine/third_party/physx/source/gpusimulationcontroller/include/PxgSDFBuilder.h
vendored
Normal file
146
engine/third_party/physx/source/gpusimulationcontroller/include/PxgSDFBuilder.h
vendored
Normal file
@@ -0,0 +1,146 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_SDF_BUILDER_H
|
||||
#define PXG_SDF_BUILDER_H
|
||||
|
||||
#include "PxSDFBuilder.h"
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "PxgKernelLauncher.h"
|
||||
|
||||
#include "PxgBVH.h"
|
||||
#include "GuSDF.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
#include "PxgAlgorithms.h"
|
||||
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
namespace physx
|
||||
{
|
||||
#endif
|
||||
|
||||
class PxgKernelLauncher;
|
||||
|
||||
// Create a linear BVH as described in Fast and Simple Agglomerative LBVH construction
|
||||
// this is a bottom-up clustering method that outputs one node per-leaf
|
||||
// Taken from Flex
|
||||
class PxgLinearBVHBuilderGPU
|
||||
{
|
||||
public:
|
||||
PxgLinearBVHBuilderGPU(PxgKernelLauncher& kernelLauncher);
|
||||
|
||||
// takes a bvh (host ref), and pointers to the GPU lower and upper bounds for each triangle
|
||||
// the priorities array allows specifying a 5-bit [0-31] value priority such that lower priority
|
||||
// leaves will always be returned first
|
||||
void buildFromLeaveBounds(PxgBVH& bvh, const PxVec4* lowers, const PxVec4* uppers, const PxI32* priorities, PxI32 n, PxBounds3* totalBounds,
|
||||
CUstream stream, bool skipAllocate = false);
|
||||
|
||||
void buildFromTriangles(PxgBVH& bvh, const PxVec3* vertices, const PxU32* triangleIndices, const PxI32* priorities,
|
||||
PxI32 n, PxBounds3* totalBounds, CUstream stream, PxReal boxMargin = 1e-5f);
|
||||
|
||||
void buildTreeAndWindingClustersFromTriangles(PxgBVH& bvh, PxgWindingClusterApproximation* windingNumberClustersD, const PxVec3* vertices, const PxU32* triangleIndices, const PxI32* priorities,
|
||||
PxI32 n, PxBounds3* totalBounds, CUstream stream, PxReal boxMargin = 1e-5f, bool skipAllocate = false);
|
||||
|
||||
|
||||
void resizeBVH(PxgBVH& bvh, PxU32 numNodes);
|
||||
void releaseBVH(PxgBVH& bvh);
|
||||
|
||||
//Allocates or resizes the linear bvh builder including the bvh itself
|
||||
void allocateOrResize(PxgBVH& bvh, PxU32 numItems);
|
||||
void release();
|
||||
|
||||
PxI32* mMaxTreeDepth;
|
||||
private:
|
||||
|
||||
void prepareHierarchConstruction(PxgBVH& bvh, const PxVec4* lowers, const PxVec4* uppers, const PxI32* priorities, PxI32 n, PxBounds3* totalBounds, CUstream stream);
|
||||
|
||||
PxgKernelLauncher mKernelLauncher;
|
||||
PxGpuRadixSort<PxU32> mSort;
|
||||
|
||||
// temporary data used during building
|
||||
PxU32* mIndices;
|
||||
PxI32* mKeys;
|
||||
PxReal* mDeltas;
|
||||
PxI32* mRangeLefts;
|
||||
PxI32* mRangeRights;
|
||||
PxI32* mNumChildren;
|
||||
|
||||
// bounds data when total item bounds built on GPU
|
||||
PxVec3* mTotalLower;
|
||||
PxVec3* mTotalUpper;
|
||||
PxVec3* mTotalInvEdges;
|
||||
|
||||
PxU32 mMaxItems;
|
||||
};
|
||||
|
||||
class PxgSDFBuilder : public PxSDFBuilder, public PxUserAllocated
|
||||
{
|
||||
private:
|
||||
PxgKernelLauncher mKernelLauncher;
|
||||
|
||||
void computeDenseSDF(const PxgBvhTriangleMesh& mesh, const PxgWindingClusterApproximation* windingNumberClustersD,
|
||||
const Gu::GridQueryPointSampler& sampler, PxU32 sizeX, PxU32 sizeY, PxU32 sizeZ, PxReal* sdfDataD, CUstream stream, PxReal* windingNumbersD = NULL);
|
||||
|
||||
// returns NULL if GPU errors occurred.
|
||||
PxReal* buildDenseSDF(const PxVec3* vertices, PxU32 numVertices, const PxU32* indicesOrig, PxU32 numTriangleIndices, PxU32 width, PxU32 height, PxU32 depth,
|
||||
const PxVec3& minExtents, const PxVec3& maxExtents, bool cellCenteredSamples, CUstream stream);
|
||||
|
||||
void compressSDF(PxReal* denseSdfD, PxU32 width, PxU32 height, PxU32 depth,
|
||||
PxU32 subgridSize, PxReal narrowBandThickness, PxU32 bytesPerSubgridPixel, PxReal errorThreshold,
|
||||
PxReal& subgridGlobalMinValue, PxReal& subgridGlobalMaxValue, PxArray<PxReal>& sdfCoarse, PxArray<PxU32>& sdfSubgridsStartSlots, PxArray<PxU8>& sdfDataSubgrids,
|
||||
PxU32& sdfSubgrids3DTexBlockDimX, PxU32& sdfSubgrids3DTexBlockDimY, PxU32& sdfSubgrids3DTexBlockDimZ, CUstream stream);
|
||||
|
||||
void fixHoles(PxU32 width, PxU32 height, PxU32 depth, PxReal* sdfDataD, const PxVec3& cellSize, const PxVec3& minExtents, const PxVec3& maxExtents,
|
||||
Gu::GridQueryPointSampler& sampler, CUstream stream);
|
||||
|
||||
bool allocateBuffersForCompression(PxReal*& backgroundSdfD, PxU32 numBackgroundSdfSamples, PxU32*& subgridAddressesD, PxU8*& subgridActiveD, PxU32 numAddressEntries,
|
||||
PxReal*& subgridGlobalMinValueD, PxReal*& subgridGlobalMaxValueD, PxGpuScan& scan);
|
||||
|
||||
void releaseBuffersForCompression(PxReal*& backgroundSdfD, PxU32*& subgridAddressesD, PxU8*& subgridActiveD, PxReal*& subgridGlobalMinValueD,
|
||||
PxReal*& subgridGlobalMaxValueD, PxGpuScan& scan);
|
||||
|
||||
public:
|
||||
PxgSDFBuilder(PxgKernelLauncher& kernelLauncher);
|
||||
|
||||
virtual bool buildSDF(const PxVec3* vertices, PxU32 numVertices, const PxU32* indicesOrig, PxU32 numTriangleIndices, PxU32 width, PxU32 height, PxU32 depth,
|
||||
const PxVec3& minExtents, const PxVec3& maxExtents, bool cellCenteredSamples, PxReal* sdf, CUstream stream) PX_OVERRIDE;
|
||||
|
||||
virtual bool buildSparseSDF(const PxVec3* vertices, PxU32 numVertices, const PxU32* indicesOrig, PxU32 numTriangleIndices, PxU32 width, PxU32 height, PxU32 depth,
|
||||
const PxVec3& minExtents, const PxVec3& maxExtents, PxReal narrowBandThickness, PxU32 subgridSize, PxSdfBitsPerSubgridPixel::Enum bytesPerSubgridPixel,
|
||||
PxArray<PxReal>& sdfCoarse, PxArray<PxU32>& sdfSubgridsStartSlots, PxArray<PxU8>& sdfDataSubgrids,
|
||||
PxReal& subgridsMinSdfValue, PxReal& subgridsMaxSdfValue,
|
||||
PxU32& sdfSubgrids3DTexBlockDimX, PxU32& sdfSubgrids3DTexBlockDimY, PxU32& sdfSubgrids3DTexBlockDimZ, CUstream stream) PX_OVERRIDE;
|
||||
|
||||
void release();
|
||||
};
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
} // namespace physx
|
||||
#endif
|
||||
|
||||
#endif
|
||||
55
engine/third_party/physx/source/gpusimulationcontroller/include/PxgShapeSim.h
vendored
Normal file
55
engine/third_party/physx/source/gpusimulationcontroller/include/PxgShapeSim.h
vendored
Normal file
@@ -0,0 +1,55 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_SHAPESIM_H
|
||||
#define PXG_SHAPESIM_H
|
||||
|
||||
#include "foundation/PxTransform.h"
|
||||
#include "foundation/PxBounds3.h"
|
||||
#include "PxNodeIndex.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
struct PxgShapeSim
|
||||
{
|
||||
PxTransform mTransform; //
|
||||
PxBounds3 mLocalBounds; // local bounds
|
||||
PxNodeIndex mBodySimIndex; // body sim index
|
||||
PxU32 mHullDataIndex; // this index is shared with the np hull data(hull need to be gpu compatible)
|
||||
PxU16 mShapeFlags; // shape flags
|
||||
PxU16 mShapeType; // this indicates what type of shape(sphere, capsule, box or convexhull)
|
||||
};
|
||||
|
||||
struct PxgNewShapeSim : PxgShapeSim
|
||||
{
|
||||
PxU32 mElementIndex; // transform cache and bound index
|
||||
};
|
||||
|
||||
}//physx
|
||||
|
||||
#endif
|
||||
110
engine/third_party/physx/source/gpusimulationcontroller/include/PxgShapeSimManager.h
vendored
Normal file
110
engine/third_party/physx/source/gpusimulationcontroller/include/PxgShapeSimManager.h
vendored
Normal file
@@ -0,0 +1,110 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_SHAPESIM_MANAGER_H
|
||||
#define PXG_SHAPESIM_MANAGER_H
|
||||
|
||||
#include "foundation/PxArray.h"
|
||||
#include "foundation/PxPinnedArray.h"
|
||||
#include "PxgShapeSim.h"
|
||||
#include "PxgCudaBuffer.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
namespace Cm
|
||||
{
|
||||
class FlushPool;
|
||||
}
|
||||
|
||||
namespace Sc
|
||||
{
|
||||
class ShapeSimBase;
|
||||
}
|
||||
|
||||
struct PxsShapeCore;
|
||||
class PxBaseTask;
|
||||
class PxgHeapMemoryAllocatorManager;
|
||||
class PxgGpuNarrowphaseCore;
|
||||
class KernelWrangler;
|
||||
|
||||
struct PxgShapeSimData
|
||||
{
|
||||
PxgShapeSimData() : mShapeCore(NULL), mElementIndex_GPU(PX_INVALID_U32)
|
||||
{
|
||||
}
|
||||
|
||||
const PxsShapeCore* mShapeCore; // 4 or 8
|
||||
|
||||
// NodeIndex used to look up BodySim in island manager
|
||||
PxNodeIndex mBodySimIndex_GPU; // 8 or 12 unique identified for body
|
||||
|
||||
// ElementID - copy of ElementSim's getElementID()
|
||||
PxU32 mElementIndex_GPU; // 12 or 16 transform cache and bound index
|
||||
};
|
||||
|
||||
class PxgShapeSimManager
|
||||
{
|
||||
PX_NOCOPY(PxgShapeSimManager)
|
||||
public:
|
||||
PxgShapeSimManager(PxgHeapMemoryAllocatorManager* heapMemoryManager);
|
||||
|
||||
void addPxgShape(Sc::ShapeSimBase* shapeSimBase, const PxsShapeCore* shapeCore, PxNodeIndex nodeIndex, PxU32 index);
|
||||
void setPxgShapeBodyNodeIndex(PxNodeIndex nodeIndex, PxU32 index);
|
||||
void removePxgShape(PxU32 index);
|
||||
|
||||
// PT: copies new shapes from CPU memory (mShapeSims) to GPU *host* memory (mPxgShapeSimPool)
|
||||
void copyToGpuShapeSim(PxgGpuNarrowphaseCore* npCore, PxBaseTask* continuation, Cm::FlushPool& flushPool);
|
||||
// PT: copies new shapes from GPU *host* memory (mPxgShapeSimPool) to GPU device memory (mNewShapeSimBuffer)
|
||||
// and *then* copies from device-to-device memory (mNewShapeSimBuffer => mShapeSimBuffer)
|
||||
void gpuMemDmaUpShapeSim(PxCudaContext* cudaContext, CUstream stream, KernelWrangler* kernelWrangler);
|
||||
|
||||
// PT: TODO: figure out the difference between mTotalNumShapes and mNbTotalShapeSim
|
||||
// (they both existed in different places and got logically refactored here)
|
||||
PX_FORCE_INLINE PxU32 getTotalNbShapes() const { return mTotalNumShapes; }
|
||||
PX_FORCE_INLINE PxU32 getNbTotalShapeSims() const { return mNbTotalShapeSim; }
|
||||
|
||||
PX_FORCE_INLINE CUdeviceptr getShapeSimsDevicePtr() const { return mShapeSimBuffer.getDevicePtr(); }
|
||||
PX_FORCE_INLINE const PxgShapeSim* getShapeSimsDeviceTypedPtr() const { return mShapeSimBuffer.getTypedPtr(); }
|
||||
PX_FORCE_INLINE Sc::ShapeSimBase** getShapeSims() { return mShapeSimPtrs.begin(); }
|
||||
|
||||
private:
|
||||
PxArray<PxgShapeSimData> mShapeSims;
|
||||
PxArray<Sc::ShapeSimBase*> mShapeSimPtrs;
|
||||
PxArray<PxU32> mNewShapeSims;
|
||||
PxU32 mTotalNumShapes;
|
||||
PxU32 mNbTotalShapeSim;
|
||||
|
||||
PxPinnedArray<PxgNewShapeSim> mPxgShapeSimPool;
|
||||
PxgTypedCudaBuffer<PxgShapeSim> mShapeSimBuffer;
|
||||
PxgTypedCudaBuffer<PxgNewShapeSim> mNewShapeSimBuffer;
|
||||
|
||||
friend class PxgCopyToShapeSimTask;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
905
engine/third_party/physx/source/gpusimulationcontroller/include/PxgSimulationController.h
vendored
Normal file
905
engine/third_party/physx/source/gpusimulationcontroller/include/PxgSimulationController.h
vendored
Normal file
@@ -0,0 +1,905 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_SIMULATION_CONTROLLER_H
|
||||
#define PXG_SIMULATION_CONTROLLER_H
|
||||
|
||||
#include "PxgBodySimManager.h"
|
||||
#include "PxgJointManager.h"
|
||||
#include "PxsSimulationController.h"
|
||||
#include "PxgHeapMemAllocator.h"
|
||||
#include "CmTask.h"
|
||||
#include "PxgArticulationLink.h"
|
||||
#include "DyArticulationJointCore.h"
|
||||
#include "PxgArticulation.h"
|
||||
#include "PxgArticulationTendon.h"
|
||||
#include "PxgSimulationCoreDesc.h"
|
||||
#include "PxgSoftBody.h"
|
||||
#include "PxgFEMCloth.h"
|
||||
#include "PxgParticleSystem.h"
|
||||
#include "PxArticulationTendonData.h"
|
||||
#include "foundation/PxPreprocessor.h"
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
|
||||
#include "BpAABBManagerBase.h"
|
||||
#include "PxgAABBManager.h"
|
||||
#include "PxsTransformCache.h"
|
||||
#include "PxgNarrowphaseCore.h"
|
||||
#define PXG_SC_DEBUG 0
|
||||
|
||||
namespace physx
|
||||
{
|
||||
//this is needed to force PhysXSimulationControllerGpu linkage as Static Library!
|
||||
void createPxgSimulationController();
|
||||
|
||||
namespace shdfnd
|
||||
{
|
||||
class PxVirtualAllocatorCallback;
|
||||
}
|
||||
|
||||
namespace Dy
|
||||
{
|
||||
class ParticleSystemCore;
|
||||
}
|
||||
|
||||
namespace Bp
|
||||
{
|
||||
class BroadPhase;
|
||||
}
|
||||
|
||||
class PxgSimulationCore;
|
||||
class PxgPBDParticleSystemCore;
|
||||
class PxgSoftBodyCore;
|
||||
class PxgFEMClothCore;
|
||||
class PxgGpuContext;
|
||||
class PxgNphaseImplementationContext;
|
||||
struct PxsCachedTransform;
|
||||
|
||||
class PxgSimulationController;
|
||||
class PxgCudaKernelWranglerManager;
|
||||
class PxgCudaBroadPhaseSap;
|
||||
|
||||
struct SoftBodyAttachmentAndFilterData
|
||||
{
|
||||
public:
|
||||
PxPinnedArray<PxgFEMRigidAttachment>* rigidAttachments;
|
||||
PxPinnedArray<PxgRigidFilterPair>* rigidFilterPairs;
|
||||
bool dirtyRigidAttachments;
|
||||
PxInt32ArrayPinned* activeRigidAttachments;
|
||||
bool dirtyActiveRigidAttachments;
|
||||
PxPinnedArray<PxgFEMFEMAttachment>* softBodyAttachments;
|
||||
bool dirtySoftBodyAttachments;
|
||||
PxInt32ArrayPinned* activeSoftBodyAttachments;
|
||||
bool dirtyActiveSoftBodyAttachments;
|
||||
PxArray<Dy::DeformableVolume*>* dirtyDeformableVolumeForFilterPairs;
|
||||
PxPinnedArray<PxgFEMFEMAttachment>* clothAttachments;
|
||||
PxPinnedArray<PxgNonRigidFilterPair>* clothFilterPairs;
|
||||
bool dirtyClothAttachments;
|
||||
PxInt32ArrayPinned* activeClothAttachments;
|
||||
bool dirtyActiveClothAttachments;
|
||||
PxPinnedArray<PxgFEMFEMAttachment>* particleAttachments;
|
||||
PxPinnedArray<PxgNonRigidFilterPair>* particleFilterPairs;
|
||||
bool dirtyParticleAttachments;
|
||||
PxInt32ArrayPinned* activeParticleAttachments;
|
||||
bool dirtyActiveParticleAttachments;
|
||||
};
|
||||
|
||||
class PxgCopyToBodySimTask : public Cm::Task
|
||||
{
|
||||
PxgSimulationController& mController;
|
||||
PxU32 mNewBodySimOffset;
|
||||
PxU32 mStartIndex;
|
||||
PxU32 mNbToProcess;
|
||||
|
||||
public:
|
||||
PxgCopyToBodySimTask(PxgSimulationController& controller, PxU32 bodySimOffset, PxU32 startIdx, PxU32 nbToProcess) : Cm::Task(0), mController(controller),
|
||||
mNewBodySimOffset(bodySimOffset), mStartIndex(startIdx),
|
||||
mNbToProcess(nbToProcess)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void runInternal();
|
||||
|
||||
virtual const char* getName() const
|
||||
{
|
||||
return "PxgCopyToBodySimTask";
|
||||
}
|
||||
|
||||
private:
|
||||
PX_NOCOPY(PxgCopyToBodySimTask)
|
||||
};
|
||||
|
||||
class PxgCopyToArticulationSimTask : public Cm::Task
|
||||
{
|
||||
PxgSimulationController& mController;
|
||||
PxU32 mNewBodySimOffset;
|
||||
PxU32 mStartIndex, mNbToProcess;
|
||||
PxI32* mSharedArticulationLinksIndex;
|
||||
PxI32* mSharedDofIndex;
|
||||
PxI32* mSharedSpatialTendonIndex;
|
||||
PxI32* mSharedSpatialTendonAttachmentIndex;
|
||||
PxI32* mSharedFixedTendonIndex;
|
||||
PxI32* mSharedFixedTendonJointIndex;
|
||||
PxI32* mSharedArticulationMimicJointIndex;
|
||||
PxI32* mSharedPathToRootIndex;
|
||||
|
||||
public:
|
||||
PxgCopyToArticulationSimTask(PxgSimulationController& controller, const PxU32 bodySimOffset, PxU32 startIdx,
|
||||
PxU32 nbToProcess, PxI32* sharedArticulationLinksIndex, PxI32* sharedDofIndex,
|
||||
PxI32* sharedSpatialTendonIndex,
|
||||
PxI32* sharedSpatialTendonAttachmentsIndex,
|
||||
PxI32* sharedFixedTendonIndex,
|
||||
PxI32* sharedFixedTendonJointIndex,
|
||||
PxI32* sharedArticulationMimicJointIndex,
|
||||
PxI32* sharedPathToRootIndex) :
|
||||
Cm::Task(0), mController(controller), mNewBodySimOffset(bodySimOffset), mStartIndex(startIdx),
|
||||
mNbToProcess(nbToProcess), mSharedArticulationLinksIndex(sharedArticulationLinksIndex),
|
||||
mSharedDofIndex(sharedDofIndex),
|
||||
mSharedSpatialTendonIndex(sharedSpatialTendonIndex),
|
||||
mSharedSpatialTendonAttachmentIndex(sharedSpatialTendonAttachmentsIndex),
|
||||
mSharedFixedTendonIndex(sharedFixedTendonIndex),
|
||||
mSharedFixedTendonJointIndex(sharedFixedTendonJointIndex),
|
||||
mSharedArticulationMimicJointIndex(sharedArticulationMimicJointIndex),
|
||||
mSharedPathToRootIndex(sharedPathToRootIndex)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void runInternal();
|
||||
|
||||
virtual const char* getName() const
|
||||
{
|
||||
return "PxgCopyToArticulationSimTask";
|
||||
}
|
||||
|
||||
private:
|
||||
PX_NOCOPY(PxgCopyToArticulationSimTask)
|
||||
};
|
||||
|
||||
class PxgUpdateArticulationSimTask : public Cm::Task
|
||||
{
|
||||
PxgSimulationController& mController;
|
||||
PxU32 mStartIndex, mNbToProcess;
|
||||
PxI32* mSharedArticulationLinksIndex;
|
||||
PxI32* mSharedArticulationDofIndex;
|
||||
PxI32* mSharedSpatialTendonIndex;
|
||||
PxI32* mSharedSpatialTendonAttachmentIndex;
|
||||
PxI32* mSharedFixedTendonIndex;
|
||||
PxI32* mSharedFixedTendonJointIndex;
|
||||
PxI32* mSharedMimicJointIndex;
|
||||
|
||||
public:
|
||||
PxgUpdateArticulationSimTask(PxgSimulationController& controller, PxU32 startIdx,
|
||||
PxU32 nbToProcess, PxI32* sharedArticulationLinksLindex,
|
||||
PxI32* sharedArticulationDofIndex,
|
||||
PxI32* sharedSpatialTendonIndex,
|
||||
PxI32* sharedSpatialTendonAttachmentIndex,
|
||||
PxI32* sharedFixedTendonIndex,
|
||||
PxI32* sharedFixedTendonJointIndex,
|
||||
PxI32* sharedMimicJointIndex) :
|
||||
Cm::Task(0), mController(controller), mStartIndex(startIdx),
|
||||
mNbToProcess(nbToProcess), mSharedArticulationLinksIndex(sharedArticulationLinksLindex),
|
||||
mSharedArticulationDofIndex(sharedArticulationDofIndex),
|
||||
mSharedSpatialTendonIndex(sharedSpatialTendonIndex),
|
||||
mSharedSpatialTendonAttachmentIndex(sharedSpatialTendonAttachmentIndex),
|
||||
mSharedFixedTendonIndex(sharedFixedTendonIndex),
|
||||
mSharedFixedTendonJointIndex(sharedFixedTendonJointIndex),
|
||||
mSharedMimicJointIndex(sharedMimicJointIndex)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void runInternal();
|
||||
|
||||
virtual const char* getName() const
|
||||
{
|
||||
return "PxgUpdateArticulationSimTask";
|
||||
}
|
||||
|
||||
private:
|
||||
PX_NOCOPY(PxgUpdateArticulationSimTask)
|
||||
};
|
||||
|
||||
class PxgCopyToSoftBodySimTask : public Cm::Task
|
||||
{
|
||||
PxgSimulationController& mController;
|
||||
PxU32 mStartIndex, mNbToProcess;
|
||||
|
||||
public:
|
||||
|
||||
static const PxU32 NbSoftBodiesPerTask = 50;
|
||||
|
||||
PxgCopyToSoftBodySimTask(PxgSimulationController& controller, PxU32 startIdx, PxU32 nbToProcess) :
|
||||
Cm::Task(0), mController(controller), mStartIndex(startIdx),
|
||||
mNbToProcess(nbToProcess)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void runInternal();
|
||||
|
||||
virtual const char* getName() const
|
||||
{
|
||||
return "PxgCopyToSoftBodySimTask";
|
||||
}
|
||||
|
||||
private:
|
||||
PX_NOCOPY(PxgCopyToSoftBodySimTask)
|
||||
};
|
||||
|
||||
class PxgCopyToFEMClothSimTask : public Cm::Task
|
||||
{
|
||||
PxgSimulationController& mController;
|
||||
PxU32 mStartIndex, mNbToProcess;
|
||||
|
||||
public:
|
||||
|
||||
static const PxU32 NbFEMClothsPerTask = 50;
|
||||
|
||||
PxgCopyToFEMClothSimTask(PxgSimulationController& controller, PxU32 startIdx, PxU32 nbToProcess) :
|
||||
Cm::Task(0), mController(controller), mStartIndex(startIdx),
|
||||
mNbToProcess(nbToProcess)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void runInternal();
|
||||
|
||||
virtual const char* getName() const
|
||||
{
|
||||
return "PxgCopyToFEMClothSimTask";
|
||||
}
|
||||
|
||||
private:
|
||||
PX_NOCOPY(PxgCopyToFEMClothSimTask)
|
||||
};
|
||||
|
||||
class PxgCopyToPBDParticleSystemSimTask : public Cm::Task
|
||||
{
|
||||
PxgSimulationController& mController;
|
||||
PxgParticleSystemCore* core;
|
||||
PxU32 mStartIndex, mNbToProcess;
|
||||
|
||||
public:
|
||||
PxgCopyToPBDParticleSystemSimTask(PxgSimulationController& controller, PxU32 startIdx, PxU32 nbToProcess) :
|
||||
Cm::Task(0), mController(controller), mStartIndex(startIdx),
|
||||
mNbToProcess(nbToProcess)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void runInternal();
|
||||
|
||||
virtual const char* getName() const
|
||||
{
|
||||
return "PxgCopyToPBDParticleSystemSimTask";
|
||||
}
|
||||
|
||||
private:
|
||||
PX_NOCOPY(PxgCopyToPBDParticleSystemSimTask)
|
||||
};
|
||||
|
||||
class PxgPostCopyToShapeSimTask : public Cm::Task
|
||||
{
|
||||
PxgSimulationController& mController;
|
||||
|
||||
public:
|
||||
PxgPostCopyToShapeSimTask(PxgSimulationController& controller) : Cm::Task(0), mController(controller)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void runInternal();
|
||||
|
||||
virtual const char* getName() const
|
||||
{
|
||||
return "PxgPostCopyToShapeSimTask";
|
||||
}
|
||||
private:
|
||||
PX_NOCOPY(PxgPostCopyToShapeSimTask)
|
||||
};
|
||||
|
||||
class PxgPostCopyToBodySimTask : public Cm::Task
|
||||
{
|
||||
PxgSimulationController& mController;
|
||||
const bool mEnableBodyAccelerations;
|
||||
|
||||
public:
|
||||
PxgPostCopyToBodySimTask(PxgSimulationController& controller, bool enableBodyAccelerations) : Cm::Task(0), mController(controller), mEnableBodyAccelerations(enableBodyAccelerations)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void runInternal();
|
||||
|
||||
virtual const char* getName() const
|
||||
{
|
||||
return "PxgPostCopyToBodySimTask";
|
||||
}
|
||||
private:
|
||||
PX_NOCOPY(PxgPostCopyToBodySimTask)
|
||||
};
|
||||
class PxgPostUpdateParticleAndSoftBodyTask : public Cm::Task
|
||||
{
|
||||
PxgSimulationController& mController;
|
||||
PxVec3 mGravity;
|
||||
PxReal mDt;
|
||||
|
||||
public:
|
||||
PxgPostUpdateParticleAndSoftBodyTask(PxgSimulationController& controller) : Cm::Task(0), mController(controller) {}
|
||||
|
||||
virtual void runInternal();
|
||||
|
||||
void setGravity(const PxVec3 gravity) { mGravity = gravity; }
|
||||
void setDt(const PxReal dt) { mDt = dt; }
|
||||
|
||||
virtual const char* getName() const
|
||||
{
|
||||
return "PxgPostUpdateParticleAndSoftBodyTask";
|
||||
}
|
||||
private:
|
||||
PX_NOCOPY(PxgPostUpdateParticleAndSoftBodyTask)
|
||||
};
|
||||
|
||||
template <typename Attachment>
|
||||
class AttachmentManager
|
||||
{
|
||||
public:
|
||||
PxPinnedArray<Attachment> mAttachments;
|
||||
PxInt32ArrayPinned mActiveAttachments;
|
||||
PxHashMap<PxU32, PxU32> mHandleToAttachmentMapping;
|
||||
PxHashMap<PxU32, PxU32> mHandleToActiveIndex;
|
||||
PxArray<PxU32> mHandles;
|
||||
|
||||
PxU32 mBaseHandle;
|
||||
bool mAttachmentsDirty;
|
||||
bool mActiveAttachmentsDirty;
|
||||
|
||||
AttachmentManager(PxgHeapMemoryAllocatorManager* manager) :
|
||||
mAttachments(manager->mMappedMemoryAllocators),
|
||||
mActiveAttachments(manager->mMappedMemoryAllocators),
|
||||
mBaseHandle(0),
|
||||
mAttachmentsDirty(false),
|
||||
mActiveAttachmentsDirty(false)
|
||||
{
|
||||
}
|
||||
|
||||
void addAttachment(const Attachment& attachment, const PxU32 handle)
|
||||
{
|
||||
const PxU32 size = mAttachments.size();
|
||||
mAttachments.pushBack(attachment);
|
||||
mHandles.pushBack(handle);
|
||||
mHandleToAttachmentMapping[handle] = size;
|
||||
mAttachmentsDirty = true;
|
||||
}
|
||||
|
||||
bool removeAttachment(const PxU32 handle)
|
||||
{
|
||||
deactivateAttachment(handle);
|
||||
|
||||
//Now remove this current handle...
|
||||
PxHashMap<PxU32, PxU32>::Entry mapping;
|
||||
bool found = mHandleToAttachmentMapping.erase(handle, mapping);
|
||||
if (found)
|
||||
{
|
||||
mAttachments.replaceWithLast(mapping.second);
|
||||
mHandles.replaceWithLast(mapping.second);
|
||||
if (mapping.second < mAttachments.size())
|
||||
{
|
||||
PxU32 newHandle = mHandles[mapping.second];
|
||||
mHandleToAttachmentMapping[newHandle] = mapping.second;
|
||||
const PxHashMap<PxU32, PxU32>::Entry* activeMapping = mHandleToActiveIndex.find(newHandle);
|
||||
if (activeMapping)
|
||||
{
|
||||
mActiveAttachments[activeMapping->second] = mapping.second;
|
||||
}
|
||||
}
|
||||
mAttachmentsDirty = true;
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
void activateAttachment(const PxU32 handle)
|
||||
{
|
||||
PX_ASSERT(!mHandleToActiveIndex.find(handle));
|
||||
PxU32 index = mHandleToAttachmentMapping[handle];
|
||||
mHandleToActiveIndex[handle] = mActiveAttachments.size();
|
||||
mActiveAttachments.pushBack(index);
|
||||
mActiveAttachmentsDirty = true;
|
||||
}
|
||||
|
||||
void deactivateAttachment(const PxU32 handle)
|
||||
{
|
||||
PxHashMap<PxU32, PxU32>::Entry mapping;
|
||||
bool found = mHandleToActiveIndex.erase(handle, mapping);
|
||||
if (found)
|
||||
{
|
||||
mActiveAttachments.replaceWithLast(mapping.second);
|
||||
|
||||
if (mapping.second < mActiveAttachments.size())
|
||||
{
|
||||
PxU32 replaceHandle = mHandles[mActiveAttachments[mapping.second]];
|
||||
mHandleToActiveIndex[replaceHandle] = mapping.second;
|
||||
}
|
||||
|
||||
mActiveAttachmentsDirty = true;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class PxgSimulationController : public PxsSimulationController
|
||||
{
|
||||
PX_NOCOPY(PxgSimulationController)
|
||||
public:
|
||||
PxgSimulationController(PxsKernelWranglerManager* gpuWranglerManagers, PxCudaContextManager* cudaContextManager,
|
||||
PxgGpuContext* dynamicContext, PxgNphaseImplementationContext* npContext, Bp::BroadPhase* bp, bool useGpuBroadphase,
|
||||
PxsSimulationControllerCallback* callback, PxgHeapMemoryAllocatorManager* heapMemoryManager,
|
||||
PxU32 maxSoftBodyContacts, PxU32 maxFemClothContacts, PxU32 maxParticleContacts, PxU32 collisionStackSizeBytes, bool enableBodyAccelerations);
|
||||
|
||||
virtual ~PxgSimulationController();
|
||||
|
||||
virtual void addPxgShape(Sc::ShapeSimBase* shapeSimBase, const PxsShapeCore* shapeCore, PxNodeIndex nodeIndex, PxU32 index) PX_OVERRIDE;
|
||||
virtual void setPxgShapeBodyNodeIndex(PxNodeIndex nodeIndex, PxU32 index) PX_OVERRIDE;
|
||||
virtual void removePxgShape(PxU32 index) PX_OVERRIDE;
|
||||
|
||||
virtual void addDynamic(PxsRigidBody* rigidBody, const PxNodeIndex& nodeIndex) PX_OVERRIDE;
|
||||
virtual void addDynamics(PxsRigidBody** rigidBody, const PxU32* nodeIndex, PxU32 nbBodies) PX_OVERRIDE;
|
||||
|
||||
virtual void addArticulation(Dy::FeatherstoneArticulation* articulation, const PxNodeIndex& nodeIndex) PX_OVERRIDE;
|
||||
virtual void releaseArticulation(Dy::FeatherstoneArticulation* articulation, const PxNodeIndex& nodeIndex) PX_OVERRIDE;
|
||||
virtual void releaseDeferredArticulationIds() PX_OVERRIDE;
|
||||
|
||||
virtual void addParticleFilter(Dy::DeformableVolume* deformableVolume, Dy::ParticleSystem* particleSystem,
|
||||
PxU32 particleId, PxU32 userBufferId, PxU32 tetId) PX_OVERRIDE;
|
||||
virtual void removeParticleFilter(Dy::DeformableVolume* deformableVolume,
|
||||
const Dy::ParticleSystem* particleSystem, PxU32 particleId, PxU32 userBufferId, PxU32 tetId) PX_OVERRIDE;
|
||||
|
||||
virtual PxU32 addParticleAttachment(Dy::DeformableVolume* deformableVolume, const Dy::ParticleSystem* particleSystem,
|
||||
PxU32 particleId, PxU32 userBufferId, PxU32 tetId, const PxVec4& barycentrics, const bool isActive) PX_OVERRIDE;
|
||||
virtual void removeParticleAttachment(Dy::DeformableVolume* deformableVolume, PxU32 handle) PX_OVERRIDE;
|
||||
|
||||
virtual void addRigidFilter(Dy::DeformableVolume* deformableVolume, const PxNodeIndex& rigidNodeIndex, PxU32 vertIndex) PX_OVERRIDE;
|
||||
virtual void removeRigidFilter(Dy::DeformableVolume* deformableVolume, const PxNodeIndex& rigidNodeIndex, PxU32 vertIndex) PX_OVERRIDE;
|
||||
|
||||
virtual PxU32 addRigidAttachment(Dy::DeformableVolume* deformableVolume, const PxNodeIndex& softBodyNodeIndex,
|
||||
PxsRigidBody* rigidBody, const PxNodeIndex& rigidNodeIndex, PxU32 vertIndex, const PxVec3& actorSpacePose,
|
||||
PxConeLimitedConstraint* constraint, const bool isActive, bool doConversion) PX_OVERRIDE;
|
||||
|
||||
virtual PxU32 addTetRigidAttachment(Dy::DeformableVolume* deformableVolume,
|
||||
PxsRigidBody* rigidBody, const PxNodeIndex& rigidNodeIndex, PxU32 tetIdx, const PxVec4& barycentrics, const PxVec3& actorSpacePose,
|
||||
PxConeLimitedConstraint* constraint, const bool isActive, bool doConversion) PX_OVERRIDE;
|
||||
|
||||
virtual void removeRigidAttachment(Dy::DeformableVolume* deformableVolume, PxU32 handle) PX_OVERRIDE;
|
||||
|
||||
virtual void addTetRigidFilter(Dy::DeformableVolume* deformableVolume,
|
||||
const PxNodeIndex& rigidNodeIndex, PxU32 tetId) PX_OVERRIDE;
|
||||
virtual void removeTetRigidFilter(Dy::DeformableVolume* deformableVolume,
|
||||
const PxNodeIndex& rigidNodeIndex, PxU32 tetId) PX_OVERRIDE;
|
||||
|
||||
virtual void addSoftBodyFilter(Dy::DeformableVolume* deformableVolume0, Dy::DeformableVolume* deformableVolume1, PxU32 tetIdx0,
|
||||
PxU32 tetIdx1) PX_OVERRIDE;
|
||||
virtual void removeSoftBodyFilter(Dy::DeformableVolume* deformableVolume0, Dy::DeformableVolume* deformableVolume1, PxU32 tetIdx0,
|
||||
PxU32 tetId1) PX_OVERRIDE;
|
||||
virtual void addSoftBodyFilters(Dy::DeformableVolume* deformableVolume0, Dy::DeformableVolume* deformableVolume1, PxU32* tetIndices0, PxU32* tetIndices1,
|
||||
PxU32 tetIndicesSize) PX_OVERRIDE;
|
||||
virtual void removeSoftBodyFilters(Dy::DeformableVolume* deformableVolume0, Dy::DeformableVolume* deformableVolume1, PxU32* tetIndices0, PxU32* tetIndices1,
|
||||
PxU32 tetIndicesSize) PX_OVERRIDE;
|
||||
|
||||
virtual PxU32 addSoftBodyAttachment(Dy::DeformableVolume* deformableVolume0, Dy::DeformableVolume* deformableVolume1, PxU32 tetIdx0, PxU32 tetIdx1,
|
||||
const PxVec4& tetBarycentric0, const PxVec4& tetBarycentric1, PxConeLimitedConstraint* constraint, PxReal constraintOffset,
|
||||
const bool addToActive, bool doConversion) PX_OVERRIDE;
|
||||
|
||||
virtual void removeSoftBodyAttachment(Dy::DeformableVolume* deformableVolume0, PxU32 handle) PX_OVERRIDE;
|
||||
|
||||
virtual void addClothFilter(Dy::DeformableVolume* deformableVolume, Dy::DeformableSurface* deformableSurface, PxU32 triIdx, PxU32 tetIdx) PX_OVERRIDE;
|
||||
virtual void removeClothFilter(Dy::DeformableVolume* deformableVolume, Dy::DeformableSurface* deformableSurface, PxU32 triId, PxU32 tetIdx) PX_OVERRIDE;
|
||||
|
||||
virtual PxU32 addClothAttachment(Dy::DeformableVolume* deformableVolume, Dy::DeformableSurface* deformableSurface, PxU32 triIdx,
|
||||
const PxVec4& triBarycentric, PxU32 tetIdx, const PxVec4& tetBarycentric,
|
||||
PxConeLimitedConstraint* constraint, PxReal constraintOffset,
|
||||
const bool isActive, bool doConversion) PX_OVERRIDE;
|
||||
|
||||
virtual void removeClothAttachment(Dy::DeformableVolume* deformableVolume, PxU32 handle) PX_OVERRIDE;
|
||||
|
||||
virtual PxU32 addRigidAttachment(Dy::DeformableSurface* deformableSurface, const PxNodeIndex& clothNodeIndex,
|
||||
PxsRigidBody* rigidBody, const PxNodeIndex& rigidNodeIndex, PxU32 vertIndex, const PxVec3& actorSpacePose,
|
||||
PxConeLimitedConstraint* constraint, const bool isActive) PX_OVERRIDE;
|
||||
|
||||
virtual void removeRigidAttachment(Dy::DeformableSurface* deformableSurface, PxU32 handle) PX_OVERRIDE;
|
||||
|
||||
virtual void addTriRigidFilter(Dy::DeformableSurface* deformableSurface,
|
||||
const PxNodeIndex& rigidNodeIndex, PxU32 triIdx) PX_OVERRIDE;
|
||||
|
||||
virtual void removeTriRigidFilter(Dy::DeformableSurface* deformableSurface, const PxNodeIndex& rigidNodeIndex,PxU32 triIdx) PX_OVERRIDE;
|
||||
|
||||
virtual PxU32 addTriRigidAttachment(Dy::DeformableSurface* deformableSurface,
|
||||
PxsRigidBody* rigidBody, const PxNodeIndex& rigidNodeIndex, PxU32 triIdx, const PxVec4& barycentrics,
|
||||
const PxVec3& actorSpacePose, PxConeLimitedConstraint* constraint,
|
||||
const bool isActive) PX_OVERRIDE;
|
||||
|
||||
virtual void removeTriRigidAttachment(Dy::DeformableSurface* deformableSurface, PxU32 handle) PX_OVERRIDE;
|
||||
|
||||
virtual void addClothFilter(Dy::DeformableSurface* deformableSurface0, Dy::DeformableSurface* deformableSurface1, PxU32 triIdx0, PxU32 triIdx1) PX_OVERRIDE;
|
||||
virtual void removeClothFilter(Dy::DeformableSurface* deformableSurface0, Dy::DeformableSurface* deformableSurface1, PxU32 triIdx0, PxU32 triId1) PX_OVERRIDE;
|
||||
|
||||
virtual PxU32 addTriClothAttachment(Dy::DeformableSurface* deformableSurface0, Dy::DeformableSurface* deformableSurface1, PxU32 triIdx0, PxU32 triIdx1,
|
||||
const PxVec4& triBarycentric0, const PxVec4& triBarycentric1, const bool addToActive) PX_OVERRIDE;
|
||||
|
||||
virtual void removeTriClothAttachment(Dy::DeformableSurface* deformableSurface0, PxU32 handle) PX_OVERRIDE;
|
||||
|
||||
PxU32 addRigidAttachmentInternal(const PxU32 nonRigidId, const PxU32 elemId, const bool isVertex, const PxVec4& barycentric, PxsRigidBody* rigidBody,
|
||||
const PxNodeIndex& rigidNodeIndex, const PxVec3& actorSpacePose, PxConeLimitedConstraint* constraint,
|
||||
AttachmentManager<PxgFEMRigidAttachment>& attachments, bool addToActive);
|
||||
|
||||
void addSoftBodyFiltersInternal(Dy::DeformableVolume* deformableVolume0, Dy::DeformableVolume* deformableVolume1, PxU32* tetIndices, PxU32 size);
|
||||
void removeSoftBodyFiltersInternal(Dy::DeformableVolume* deformableVolume0, Dy::DeformableVolume* deformableVolume1, PxU32* tetIndices, PxU32 size);
|
||||
|
||||
void createDeformableSurfaceCore();
|
||||
void createDeformableVolumeCore();
|
||||
virtual void addSoftBody(Dy::DeformableVolume* deformableVolume, const PxNodeIndex& nodeIndex) PX_OVERRIDE;
|
||||
virtual void releaseSoftBody(Dy::DeformableVolume* deformableVolume) PX_OVERRIDE;
|
||||
virtual void releaseDeferredSoftBodyIds() PX_OVERRIDE;
|
||||
virtual void activateSoftbody(Dy::DeformableVolume* deformableVolume) PX_OVERRIDE;
|
||||
virtual void deactivateSoftbody(Dy::DeformableVolume* deformableVolume) PX_OVERRIDE;
|
||||
virtual void activateSoftbodySelfCollision(Dy::DeformableVolume* deformableVolume) PX_OVERRIDE;
|
||||
virtual void deactivateSoftbodySelfCollision(Dy::DeformableVolume* deformableVolume) PX_OVERRIDE;
|
||||
virtual void setSoftBodyWakeCounter(Dy::DeformableVolume* deformableVolume) PX_OVERRIDE;
|
||||
|
||||
virtual void addFEMCloth(Dy::DeformableSurface* deformableSurface, const PxNodeIndex& nodeIndex) PX_OVERRIDE;
|
||||
virtual void releaseFEMCloth(Dy::DeformableSurface* deformableSurface) PX_OVERRIDE;
|
||||
virtual void releaseDeferredFEMClothIds() PX_OVERRIDE;
|
||||
virtual void activateCloth(Dy::DeformableSurface* deformableSurface) PX_OVERRIDE;
|
||||
virtual void deactivateCloth(Dy::DeformableSurface* deformableSurface) PX_OVERRIDE;
|
||||
virtual void setClothWakeCounter(Dy::DeformableSurface* deformableSurface) PX_OVERRIDE;
|
||||
|
||||
virtual void addParticleSystem(Dy::ParticleSystem* particleSystem, const PxNodeIndex& nodeIndex) PX_OVERRIDE;
|
||||
virtual void releaseParticleSystem(Dy::ParticleSystem* particleSystem) PX_OVERRIDE;
|
||||
virtual void releaseDeferredParticleSystemIds() PX_OVERRIDE;
|
||||
|
||||
virtual void updateDynamic(Dy::FeatherstoneArticulation* articulation, const PxNodeIndex& nodeIndex) PX_OVERRIDE;
|
||||
virtual void updateBodies(PxsRigidBody** rigidBodies, PxU32* nodeIndices, const PxU32 nbBodies, PxsExternalAccelerationProvider* externalAccelerations) PX_OVERRIDE;
|
||||
virtual void addJoint(const Dy::Constraint&) PX_OVERRIDE;
|
||||
virtual void updateJoint(const PxU32 edgeIndex, Dy::Constraint* constraint) PX_OVERRIDE;
|
||||
virtual void updateBodies(PxBaseTask* continuation) PX_OVERRIDE;
|
||||
virtual void updateShapes(PxBaseTask* continuation) PX_OVERRIDE;
|
||||
|
||||
virtual void preIntegrateAndUpdateBound(PxBaseTask* continuation, const PxVec3 gravity, const PxReal dt) PX_OVERRIDE;
|
||||
virtual void updateParticleSystemsAndSoftBodies() PX_OVERRIDE;
|
||||
virtual void sortContacts() PX_OVERRIDE;
|
||||
virtual void update(PxBitMapPinned& changedHandleMap) PX_OVERRIDE;
|
||||
virtual void mergeChangedAABBMgHandle() PX_OVERRIDE;
|
||||
virtual void gpuDmabackData(PxsTransformCache& cache, Bp::BoundsArray& boundArray, PxBitMapPinned& changedAABBMgrHandles, bool enableDirectGPUAPI) PX_OVERRIDE;
|
||||
virtual void updateScBodyAndShapeSim(PxsTransformCache& cache, Bp::BoundsArray& boundArray, PxBaseTask* continuation) PX_OVERRIDE;
|
||||
virtual void updateArticulation(Dy::FeatherstoneArticulation* articulation, const PxNodeIndex& nodeIndex) PX_OVERRIDE;
|
||||
virtual void updateArticulationJoint(Dy::FeatherstoneArticulation* articulation, const PxNodeIndex& nodeIndex) PX_OVERRIDE;
|
||||
virtual void updateArticulationExtAccel(Dy::FeatherstoneArticulation* articulation, const PxNodeIndex& nodeIndex) PX_OVERRIDE;
|
||||
virtual void updateArticulationAfterIntegration(PxsContext* /*llContext*/, Bp::AABBManagerBase* /*aabbManager*/,
|
||||
PxArray<Sc::BodySim*>& /*ccdBodies*/, PxBaseTask* /*continuation*/, IG::IslandSim& /*islandSim*/, float /*dt*/) PX_OVERRIDE {}
|
||||
virtual PxU32* getActiveBodies() PX_OVERRIDE;
|
||||
virtual PxU32* getDeactiveBodies() PX_OVERRIDE;
|
||||
virtual void** getRigidBodies() PX_OVERRIDE;
|
||||
virtual PxU32 getNbBodies() PX_OVERRIDE;
|
||||
|
||||
virtual Sc::ShapeSimBase** getShapeSims() PX_OVERRIDE;
|
||||
virtual PxU32* getUnfrozenShapes() PX_OVERRIDE;
|
||||
virtual PxU32* getFrozenShapes() PX_OVERRIDE;
|
||||
virtual PxU32 getNbFrozenShapes() PX_OVERRIDE;
|
||||
virtual PxU32 getNbUnfrozenShapes() PX_OVERRIDE;
|
||||
virtual PxU32 getNbShapes() PX_OVERRIDE;
|
||||
virtual void clear() PX_OVERRIDE { mNbFrozenShapes = 0; mNbUnfrozenShapes = 0; }
|
||||
virtual void setBounds(Bp::BoundsArray* boundArray) PX_OVERRIDE;
|
||||
virtual void reserve(const PxU32 nbBodies) PX_OVERRIDE;
|
||||
|
||||
PX_INLINE PxU32 getArticulationRemapIndex(const PxU32 nodeIndex) { return mBodySimManager.getArticulationRemapIndex(nodeIndex); }
|
||||
|
||||
virtual void setDeformableSurfaceGpuPostSolveCallback(PxPostSolveCallback* postSolveCallback) PX_OVERRIDE PX_FINAL;
|
||||
virtual void setDeformableVolumeGpuPostSolveCallback(PxPostSolveCallback* postSolveCallback) PX_OVERRIDE PX_FINAL;
|
||||
|
||||
// deprecated direct-GPU API
|
||||
|
||||
PX_DEPRECATED virtual void copySoftBodyDataDEPRECATED(void** data, void* dataEndIndices, void* softBodyIndices, PxSoftBodyGpuDataFlag::Enum flag, const PxU32 nbCopySoftBodies, const PxU32 maxSize, CUevent copyEvent) PX_OVERRIDE PX_FINAL;
|
||||
PX_DEPRECATED virtual void applySoftBodyDataDEPRECATED(void** data, void* dataEndIndices, void* softBodyIndices, PxSoftBodyGpuDataFlag::Enum flag, const PxU32 nbUpdatedSoftBodies, const PxU32 maxSize, CUevent applyEvent, CUevent signalEvent) PX_OVERRIDE PX_FINAL;
|
||||
|
||||
PX_DEPRECATED virtual void applyParticleBufferDataDEPRECATED(const PxU32* indices, const PxGpuParticleBufferIndexPair* indexPair, const PxParticleBufferFlags* flags, PxU32 nbUpdatedBuffers, CUevent waitEvent, CUevent signalEvent) PX_OVERRIDE;
|
||||
// end deprecated direct-GPU API
|
||||
|
||||
// new direct-GPU API
|
||||
virtual bool getRigidDynamicData(void* data, const PxRigidDynamicGPUIndex* gpuIndices, PxRigidDynamicGPUAPIReadType::Enum dataType, PxU32 nbElements, float oneOverDt, CUevent startEvent, CUevent finishEvent) const PX_OVERRIDE PX_FINAL;
|
||||
virtual bool setRigidDynamicData(const void* data, const PxRigidDynamicGPUIndex* gpuIndices, PxRigidDynamicGPUAPIWriteType::Enum dataType, PxU32 nbElements, CUevent startEvent, CUevent finishEvent) PX_OVERRIDE PX_FINAL;
|
||||
|
||||
virtual bool getArticulationData(void* data, const PxArticulationGPUIndex* gpuIndices, PxArticulationGPUAPIReadType::Enum dataType, PxU32 nbElements, CUevent startEvent, CUevent finishEvent) const PX_OVERRIDE PX_FINAL;
|
||||
virtual bool setArticulationData(const void* data, const PxArticulationGPUIndex* gpuIndices, PxArticulationGPUAPIWriteType::Enum dataType, PxU32 nbElements, CUevent startEvent, CUevent finishEvent) PX_OVERRIDE PX_FINAL;
|
||||
virtual bool computeArticulationData(void* data, const PxArticulationGPUIndex* gpuIndices, PxArticulationGPUAPIComputeType::Enum operation, PxU32 nbElements, CUevent startEvent, CUevent finishEvent) PX_OVERRIDE PX_FINAL;
|
||||
|
||||
virtual bool evaluateSDFDistances(PxVec4* localGradientAndSDFConcatenated, const PxShapeGPUIndex* shapeIndices, const PxVec4* localSamplePointsConcatenated, const PxU32* samplePointCountPerShape, PxU32 nbElements, PxU32 maxPointCount, CUevent startEvent, CUevent finishEvent) PX_OVERRIDE PX_FINAL;
|
||||
virtual bool copyContactData(void* data, PxU32* numContactPairs, const PxU32 maxContactPairs, CUevent startEvent, CUevent copyEvent) PX_OVERRIDE PX_FINAL;
|
||||
|
||||
virtual PxArticulationGPUAPIMaxCounts getArticulationGPUAPIMaxCounts() const PX_OVERRIDE PX_FINAL;
|
||||
|
||||
virtual bool getD6JointData(void* data, const PxD6JointGPUIndex* gpuIndices, PxD6JointGPUAPIReadType::Enum dataType, PxU32 nbElements, PxF32 oneOverDt, CUevent startEvent, CUevent finishEvent) const PX_OVERRIDE PX_FINAL;
|
||||
|
||||
// end new direct-GPU API
|
||||
|
||||
virtual PxU32 getInternalShapeIndex(const PxsShapeCore& shapeCore) PX_OVERRIDE PX_FINAL;
|
||||
|
||||
virtual void syncParticleData() PX_OVERRIDE;
|
||||
virtual void updateBoundsAndShapes(Bp::AABBManagerBase& aabbManager, bool useDirectApi) PX_OVERRIDE;
|
||||
|
||||
PX_FORCE_INLINE PxgSimulationCore* getSimulationCore() { return mSimulationCore; }
|
||||
PX_FORCE_INLINE PxgJointManager& getJointManager() { return mJointManager; }
|
||||
PX_FORCE_INLINE PxgBodySimManager& getBodySimManager() { return mBodySimManager; }
|
||||
|
||||
PX_FORCE_INLINE PxgPBDParticleSystemCore* getPBDParticleSystemCore() { return mPBDParticleSystemCore; }
|
||||
|
||||
PX_FORCE_INLINE PxgSoftBodyCore* getSoftBodyCore() { return mSoftBodyCore; }
|
||||
|
||||
PX_FORCE_INLINE PxgFEMClothCore* getFEMClothCore() { return mFEMClothCore; }
|
||||
|
||||
PX_FORCE_INLINE PxgSoftBody* getSoftBodies() { return mSoftBodyPool.begin(); }
|
||||
PX_FORCE_INLINE PxU32 getNbSoftBodies() { return mSoftBodyPool.size(); }
|
||||
|
||||
PX_FORCE_INLINE PxU32* getActiveSoftBodies() { return mBodySimManager.mActiveSoftbodies.begin(); }
|
||||
PX_FORCE_INLINE PxU32 getNbActiveSoftBodies() { return mBodySimManager.mActiveSoftbodies.size(); }
|
||||
|
||||
PX_FORCE_INLINE PxU32* getSoftBodyNodeIndex() { return mSoftBodyNodeIndexPool.begin(); }
|
||||
|
||||
PX_FORCE_INLINE PxgFEMCloth* getFEMCloths() { return mFEMClothPool.begin(); }
|
||||
PX_FORCE_INLINE PxU32 getNbFEMCloths() { return mFEMClothPool.size(); }
|
||||
|
||||
PX_FORCE_INLINE PxU32* getActiveFEMCloths() { return mBodySimManager.mActiveFEMCloths.begin(); }
|
||||
PX_FORCE_INLINE PxU32 getNbActiveFEMCloths() { return mBodySimManager.mActiveFEMCloths.size(); }
|
||||
|
||||
PX_FORCE_INLINE PxU32* getFEMClothNodeIndex() { return mFEMClothNodeIndexPool.begin(); }
|
||||
|
||||
void postCopyToShapeSim();
|
||||
void postCopyToBodySim(bool enableBodyAccelerations);
|
||||
//integrate particle system and update bound/update grid/self collision
|
||||
void preIntegrateAndUpdateBoundParticleSystem(const PxVec3 gravity, const PxReal dt);
|
||||
void preIntegrateAndUpdateBoundSoftBody(const PxVec3 gravity, const PxReal dt);
|
||||
void preIntegrateAndUpdateBoundFEMCloth(const PxVec3 gravity, const PxReal dt);
|
||||
void updateJointsAndSyncData();
|
||||
|
||||
void computeSoftBodySimMeshData(Dy::DeformableVolume* deformableVolume, PxU32 tetId, const PxVec4& tetBarycentric,
|
||||
PxU32& outTetId, PxVec4& outTetBarycentric);
|
||||
|
||||
PX_FORCE_INLINE PxU32 getMaxLinks() { return mMaxLinks; }
|
||||
|
||||
PX_FORCE_INLINE PxU32 getMaxFemContacts() { return mMaxFemClothContacts; }
|
||||
|
||||
virtual PxU32 getNbDeactivatedDeformableSurfaces() const PX_OVERRIDE;
|
||||
virtual PxU32 getNbActivatedDeformableSurfaces() const PX_OVERRIDE;
|
||||
|
||||
virtual Dy::DeformableSurface** getDeactivatedDeformableSurfaces() const PX_OVERRIDE;
|
||||
virtual Dy::DeformableSurface** getActivatedDeformableSurfaces() const PX_OVERRIDE;
|
||||
|
||||
virtual PxU32 getNbDeactivatedDeformableVolumes() const PX_OVERRIDE;
|
||||
virtual PxU32 getNbActivatedDeformableVolumes() const PX_OVERRIDE;
|
||||
|
||||
virtual Dy::DeformableVolume** getDeactivatedDeformableVolumes() const PX_OVERRIDE;
|
||||
virtual Dy::DeformableVolume** getActivatedDeformableVolumes() const PX_OVERRIDE;
|
||||
|
||||
virtual const PxReal* getDeformableVolumeWakeCounters() const PX_OVERRIDE;
|
||||
|
||||
|
||||
virtual void setEnableOVDReadback(bool enableOVDReadback) PX_OVERRIDE;
|
||||
virtual bool getEnableOVDReadback() const PX_OVERRIDE;
|
||||
|
||||
virtual void setEnableOVDCollisionReadback(bool enableOVDCollisionsReadback) PX_OVERRIDE;
|
||||
virtual bool getEnableOVDCollisionReadback() const PX_OVERRIDE;
|
||||
|
||||
#if PX_SUPPORT_OMNI_PVD
|
||||
virtual void setOVDCallbacks(PxsSimulationControllerOVDCallbacks& ovdCallbacks) PX_OVERRIDE;
|
||||
PX_FORCE_INLINE PxsSimulationControllerOVDCallbacks* getOVDCallbacks() { return mOvdCallbacks; }
|
||||
#endif
|
||||
virtual bool hasDeformableSurfaces() const PX_OVERRIDE { return mFEMClothCore != NULL; }
|
||||
virtual bool hasDeformableVolumes() const PX_OVERRIDE { return mSoftBodyCore != NULL; }
|
||||
|
||||
bool getRecomputeArticulationBlockFormat() const { return mRecomputeArticulationBlockFormat; }
|
||||
|
||||
private:
|
||||
|
||||
void copyToGpuBodySim(PxBaseTask* continuation);
|
||||
void copyToGpuParticleSystem(PxBaseTask* continuation);
|
||||
void copyToGpuSoftBody(PxBaseTask* continuation);
|
||||
void copyToGpuFEMCloth(PxBaseTask* continuation);
|
||||
|
||||
void copyToGpuBodySim(const PxU32 bodySimOffset, PxU32 bodyStartIndex, PxU32 nbToCopy);
|
||||
|
||||
void copyToGpuArticulationSim(const PxU32 bodySimOffset, PxU32 startIndex, PxU32 nbToCopy,
|
||||
PxI32* sharedArticulationLinksIndex, PxI32* sharedArticulationDofIndex,
|
||||
PxI32* sharedArticulationSpatialTendonIndex,
|
||||
PxI32* sharedArticulationAttachmentIndex,
|
||||
PxI32* sharedArticulationFixedTendonIndex,
|
||||
PxI32* sharedArticulationTendonJointIndex,
|
||||
PxI32* sharedArticulationMimicJointIndex,
|
||||
PxI32* sharedArticulationPathToRootIndex);
|
||||
|
||||
void updateGpuArticulationSim(PxU32 startIndex, PxU32 nbToCopy,
|
||||
PxI32* sharedArticulationLinksIndex, PxI32* sharedArticulationDofIndex,
|
||||
PxI32* sharedSpatialTendonIndex, PxI32* sharedAttachmentIndex,
|
||||
PxI32* sharedFixedTendonIndex, PxI32* sharedFixedTendonJointIndex,
|
||||
PxI32* sharedMimicJointIndex);
|
||||
|
||||
void copyToGpuSoftBodySim(PxU32 startIndex, PxU32 nbToCopy);
|
||||
|
||||
void copyToGpuFEMClothSim(PxU32 startIndex, PxU32 nbToCopy);
|
||||
|
||||
void copyToGpuPBDParticleSystemSim(PxU32 startIndex, PxU32 nbToCopy);
|
||||
|
||||
// bounds are shared by NP and BP, so we have the update in the simulation controller.
|
||||
// cache is for np, we update it together with bounds due to similar update logic
|
||||
void updateBoundsAndTransformCache(Bp::AABBManagerBase& aabbManager, CUstream stream, PxsTransformCache& cache, PxgCudaBuffer& mGpuTransformCache);
|
||||
|
||||
void copyBoundsAndTransforms(Bp::BoundsArray& boundsArray, PxsTransformCache& transformCache,
|
||||
PxgCudaBuffer& gpuTransformCache, PxU32 boundsArraySize,
|
||||
PxU32 totalTransformCacheSize, CUstream npStream);
|
||||
void mergeBoundsAndTransformsChanges(PxgBoundsArray& directGPUBoundsArray,
|
||||
PxsTransformCache& transformCache,
|
||||
PxgCudaBuffer& gpuTransformCache, PxU32 boundsArraySize, PxU32 totalTransformCacheSize,
|
||||
PxU32 numChanges, CUstream npStream);
|
||||
|
||||
#if PXG_SC_DEBUG
|
||||
void validateCacheAndBounds(PxBoundsArrayPinned& boundArray, PxCachedTransformArrayPinned& cachedTransform);
|
||||
#endif
|
||||
|
||||
PxgPostCopyToShapeSimTask mPostCopyShapeSimTask;
|
||||
PxgPostCopyToBodySimTask mPostCopyBodySimTask;
|
||||
PxgPostUpdateParticleAndSoftBodyTask mPostUpdateParticleSystemTask;
|
||||
PxgBodySimManager mBodySimManager;
|
||||
PxgJointManager mJointManager;
|
||||
|
||||
PxgSimulationCore* mSimulationCore;
|
||||
PxgSoftBodyCore* mSoftBodyCore;
|
||||
PxgFEMClothCore* mFEMClothCore;
|
||||
PxgPBDParticleSystemCore* mPBDParticleSystemCore;
|
||||
|
||||
PxgGpuContext* mDynamicContext;
|
||||
PxgNphaseImplementationContext* mNpContext;
|
||||
|
||||
PxPinnedArray<PxgBodySim> mNewBodySimPool;
|
||||
PxPinnedArray<PxgArticulationLink> mLinksPool;
|
||||
PxFloatArrayPinned mLinkWakeCounterPool;
|
||||
PxPinnedArray<Cm::UnAlignedSpatialVector> mLinkAccelPool;
|
||||
PxPinnedArray<PxgArticulationLinkProp> mLinkPropPool;
|
||||
PxPinnedArray<PxgArticulationLinkSleepData> mLinkSleepDataPool;
|
||||
PxPinnedArray<ArticulationBitField> mLinkChildPool;
|
||||
PxInt32ArrayPinned mLinkParentPool;
|
||||
PxPinnedArray<PxTransform> mLinkBody2WorldPool;
|
||||
PxPinnedArray<PxTransform> mLinkBody2ActorPool;
|
||||
|
||||
PxPinnedArray<Dy::ArticulationJointCore> mJointPool;
|
||||
PxPinnedArray<Dy::ArticulationJointCoreData> mJointDataPool;
|
||||
PxPinnedArray<PxgArticulationSimUpdate> mLinkJointIndexPool; //this record the start index of the link for an articulation in an array
|
||||
PxPinnedArray<PxgArticulation> mArticulationPool;
|
||||
PxPinnedArray<PxGpuSpatialTendonData> mSpatialTendonParamPool;
|
||||
PxPinnedArray<PxgArticulationTendon> mSpatialTendonPool;
|
||||
PxPinnedArray<PxgArticulationTendonElementFixedData> mAttachmentFixedPool;
|
||||
PxPinnedArray<PxGpuTendonAttachmentData> mAttachmentModPool;
|
||||
PxInt32ArrayPinned mTendonAttachmentMapPool; //store each start index of the attachment to the corresponding tendons
|
||||
PxPinnedArray<PxGpuFixedTendonData> mFixedTendonParamPool;
|
||||
PxPinnedArray<PxgArticulationTendon> mFixedTendonPool;
|
||||
PxPinnedArray<PxgArticulationTendonElementFixedData> mTendonJointFixedDataPool;
|
||||
PxPinnedArray<PxGpuTendonJointCoefficientData> mTendonJointCoefficientDataPool;
|
||||
PxInt32ArrayPinned mTendonTendonJointMapPool; //store each start index of the attachment to the corresponding tendons
|
||||
|
||||
PxInt32ArrayPinned mPathToRootPool;
|
||||
|
||||
PxPinnedArray<Dy::ArticulationMimicJointCore> mMimicJointPool;
|
||||
|
||||
PxPinnedArray<PxgArticulationSimUpdate> mArticulationUpdatePool; //Articulation update headers
|
||||
PxFloatArrayPinned mArticulationDofDataPool; //Articulation dof information (jointV, jointP etc.)
|
||||
PxPinnedArray<PxgSoftBody> mNewSoftBodyPool;
|
||||
PxArray<PxgSoftBodyData> mNewSoftBodyDataPool;
|
||||
PxPinnedArray<PxgSoftBody> mSoftBodyPool;
|
||||
PxArray<PxgSoftBodyData> mSoftBodyDataPool;
|
||||
PxInt32ArrayPinned mSoftBodyElementIndexPool;
|
||||
PxArray<PxU32> mNewSoftBodyNodeIndexPool;
|
||||
PxArray<PxU32> mNewSoftBodyElementIndexPool;
|
||||
PxArray<PxU32> mSoftBodyNodeIndexPool;
|
||||
PxArray<PxU32> mNewTetMeshByteSizePool;
|
||||
|
||||
AttachmentManager<PxgFEMFEMAttachment> mParticleSoftBodyAttachments;
|
||||
PxPinnedArray<PxgNonRigidFilterPair> mSoftBodyParticleFilterPairs;
|
||||
PxArray<PxU32> mSoftBodyParticleFilterRefs;
|
||||
|
||||
AttachmentManager<PxgFEMRigidAttachment> mRigidSoftBodyAttachments;
|
||||
PxPinnedArray<PxgRigidFilterPair> mSoftBodyRigidFilterPairs;
|
||||
PxArray<PxU32> mSoftBodyRigidFilterRefs;
|
||||
|
||||
AttachmentManager<PxgFEMFEMAttachment> mSoftBodySoftBodyAttachments;
|
||||
PxArray <Dy::DeformableVolume*> mDirtyDeformableVolumeForFilterPairs;
|
||||
|
||||
AttachmentManager<PxgFEMFEMAttachment> mSoftBodyClothAttachments;
|
||||
PxPinnedArray<PxgNonRigidFilterPair> mSoftBodyClothTetVertFilterPairs;
|
||||
PxArray<PxU32> mSoftBodyClothTetVertFilterRefs;
|
||||
|
||||
AttachmentManager<PxgFEMFEMAttachment> mClothClothAttachments;
|
||||
PxPinnedArray<PxgNonRigidFilterPair> mClothClothVertTriFilterPairs;
|
||||
PxArray<PxU32> mClothClothVertTriFilterRefs;
|
||||
|
||||
PxPinnedArray<PxgFEMCloth> mNewFEMClothPool;
|
||||
PxArray<PxgFEMClothData> mNewFEMClothDataPool;
|
||||
PxPinnedArray<PxgFEMCloth> mFEMClothPool;
|
||||
PxArray<PxgFEMClothData> mFEMClothDataPool;
|
||||
PxInt32ArrayPinned mFEMClothElementIndexPool;
|
||||
PxArray<PxU32> mNewFEMClothNodeIndexPool;
|
||||
PxArray<PxU32> mNewFEMClothElementIndexPool;
|
||||
PxArray<PxU32> mFEMClothNodeIndexPool;
|
||||
PxArray<PxU32> mNewTriangleMeshByteSizePool;
|
||||
|
||||
AttachmentManager<PxgFEMRigidAttachment> mClothRigidAttachments;
|
||||
PxPinnedArray<PxgRigidFilterPair> mClothRigidFilterPairs;
|
||||
PxArray<PxU32> mClothRigidFilterRefs;
|
||||
|
||||
PxInt32ArrayPinned mFrozenPool;
|
||||
PxInt32ArrayPinned mUnfrozenPool;
|
||||
PxInt32ArrayPinned mActivatePool;
|
||||
PxInt32ArrayPinned mDeactivatePool;
|
||||
|
||||
PxU32 mNbFrozenShapes;
|
||||
PxU32 mNbUnfrozenShapes;
|
||||
bool mHasBeenSimulated;//if there are no bodies in the scene, we don't run the update method so that we shouldn't need to syncback data
|
||||
|
||||
PxI32 mSharedLinkIndex;
|
||||
PxI32 mSharedDofIndex;
|
||||
PxI32 mSharedSpatialTendonIndex;
|
||||
PxI32 mSharedSpatialAttachmentIndex;
|
||||
PxI32 mSharedFixedTendonIndex;
|
||||
PxI32 mSharedFixedTendonJointIndex;
|
||||
PxI32 mSharedMimicJointIndex;
|
||||
PxI32 mSharedPathToRootIndex;
|
||||
|
||||
PxgCudaKernelWranglerManager* mGpuWranglerManager;
|
||||
PxCudaContextManager* mCudaContextManager;
|
||||
PxgHeapMemoryAllocatorManager* mHeapMemoryManager;
|
||||
PxgCudaBroadPhaseSap* mBroadPhase;
|
||||
PxU32 mMaxLinks;
|
||||
PxU32 mMaxDofs;
|
||||
PxU32 mMaxMimicJoints;
|
||||
PxU32 mMaxSpatialTendons;
|
||||
PxU32 mMaxAttachments;
|
||||
PxU32 mMaxFixedTendons;
|
||||
PxU32 mMaxTendonJoints;
|
||||
PxU32 mMaxPathToRoots;
|
||||
|
||||
PxU32 mMaxSoftBodyContacts;
|
||||
PxU32 mMaxFemClothContacts;
|
||||
PxU32 mMaxParticleContacts;
|
||||
PxU32 mCollisionStackSizeBytes;
|
||||
|
||||
bool mRecomputeArticulationBlockFormat;
|
||||
bool mEnableOVDReadback;
|
||||
bool mEnableOVDCollisionReadback;
|
||||
#if PX_SUPPORT_OMNI_PVD
|
||||
PxsSimulationControllerOVDCallbacks* mOvdCallbacks;
|
||||
#endif
|
||||
friend class PxgCopyToBodySimTask;
|
||||
friend class PxgCopyToArticulationSimTask;
|
||||
friend class PxgUpdateArticulationSimTask;
|
||||
friend class PxgCopyToSoftBodySimTask;
|
||||
friend class PxgCopyToFEMClothSimTask;
|
||||
friend class PxgCopyToPBDParticleSystemSimTask;
|
||||
friend class PxgSimulationCore;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
594
engine/third_party/physx/source/gpusimulationcontroller/include/PxgSimulationCore.h
vendored
Normal file
594
engine/third_party/physx/source/gpusimulationcontroller/include/PxgSimulationCore.h
vendored
Normal file
@@ -0,0 +1,594 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_SIMULATION_CORE_H
|
||||
#define PXG_SIMULATION_CORE_H
|
||||
|
||||
#include "PxgCudaBuffer.h"
|
||||
#include "PxgSimulationCoreDesc.h"
|
||||
#include "PxgSoftBody.h"
|
||||
#include "PxgSimulationController.h"
|
||||
#include "PxgFEMCloth.h"
|
||||
#include "foundation/PxUserAllocated.h"
|
||||
#include "PxgFEMCore.h"
|
||||
#include "PxgShapeSimManager.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
class PxCudaContextManager;
|
||||
class PxCudaContext;
|
||||
class KernelWrangler;
|
||||
class PxgCudaSolverCore;
|
||||
class PxgGpuNarrowphaseCore;
|
||||
class PxgCudaBroadPhaseSap;
|
||||
class PxgParticleSystemCore;
|
||||
class PxgSoftBodyCore;
|
||||
class PxgFEMClothCore;
|
||||
struct PxsCachedTransform;
|
||||
class PxgCudaKernelWranglerManager;
|
||||
class PxgSoftBodyBuffer;
|
||||
class PxgFEMClothBuffer;
|
||||
class PxgParticleSystemBuffer;
|
||||
class PxgBodySimManager;
|
||||
class PxgGpuContext;
|
||||
class PxSceneDesc;
|
||||
|
||||
class PxgSimulationCore : public PxUserAllocated
|
||||
{
|
||||
PX_NOCOPY(PxgSimulationCore)
|
||||
|
||||
public:
|
||||
PxgSimulationCore(PxgCudaKernelWranglerManager* gpuKernelWrangler,
|
||||
PxCudaContextManager* cudaContextManager,
|
||||
PxgHeapMemoryAllocatorManager* heapMemoryManager,
|
||||
PxgGpuContext* gpuContext,
|
||||
const bool useGpuBroadphase);
|
||||
|
||||
~PxgSimulationCore();
|
||||
|
||||
void gpuMemDmaUpBodySim(PxPinnedArray<PxgBodySimVelocityUpdate>& updatedBodySim,
|
||||
PxPinnedArray<PxgBodySim>& newBodySim,
|
||||
PxPinnedArray<PxgArticulationLink>& newLinkPool,
|
||||
PxFloatArrayPinned& newLinkWakeCounterPool,
|
||||
PxPinnedArray<Cm::UnAlignedSpatialVector>& newLinkExtAccelPool,
|
||||
PxPinnedArray<PxgArticulationLinkProp>& newLinkPropPool,
|
||||
PxInt32ArrayPinned& newLinkParentsPool,
|
||||
PxPinnedArray<Dy::ArticulationBitField>& newLinkChildPool,
|
||||
PxPinnedArray<PxTransform>& newLinkBody2WorldsPool,
|
||||
PxPinnedArray<PxTransform>& newLinkBody2ActorsPool,
|
||||
PxPinnedArray<Dy::ArticulationJointCore>& newJointCorePool,
|
||||
PxPinnedArray<Dy::ArticulationJointCoreData>& newJointDataPool,
|
||||
PxPinnedArray<PxgArticulationSimUpdate>& newLinkJointIndexPool,
|
||||
PxPinnedArray<PxgArticulation>& newArticulationPool,
|
||||
PxPinnedArray<PxGpuSpatialTendonData>& newSpatialTendonParamsPool,
|
||||
PxPinnedArray<PxgArticulationTendon>& newSpatialTendonPool,
|
||||
PxPinnedArray<PxgArticulationTendonElementFixedData>& newAttachmentFixedPool,
|
||||
PxPinnedArray<PxGpuTendonAttachmentData>& newAttachmentModPool,
|
||||
PxInt32ArrayPinned& newTendonToAttachmentRemapPool,
|
||||
PxPinnedArray<PxGpuFixedTendonData>& newFixedTendonParamsPool,
|
||||
PxPinnedArray<PxgArticulationTendon>& newFixedTendonPool,
|
||||
PxPinnedArray<PxgArticulationTendonElementFixedData>& newTendonJointFixedPool,
|
||||
PxPinnedArray<PxGpuTendonJointCoefficientData>& newTendonJointCoefficientPool,
|
||||
PxInt32ArrayPinned& newTendonToTendonJointRemapPool,
|
||||
PxPinnedArray<Dy::ArticulationMimicJointCore>& newMimicJointPool,
|
||||
PxInt32ArrayPinned& newPathToRootPool,
|
||||
PxU32 nbTotalBodies, PxU32 nbTotalArticulations, PxU32 maxLinks,
|
||||
PxU32 maxDofs, PxU32 maxMimicJoints, PxU32 maxSpatialTendons,
|
||||
PxU32 maxAttachments, PxU32 maxFixedTendons, PxU32 maxTendonJoints,
|
||||
bool enableBodyAccelerations);
|
||||
|
||||
void gpuMemDmaUpSoftBodies(PxPinnedArray<PxgSoftBody>& newSoftBodyPool,
|
||||
PxU32* newTetMeshByteSizePool,
|
||||
PxArray<PxgSoftBodyData>& newSoftBodyDataPool,
|
||||
PxArray<PxU32>& newSoftBodyNodeIndexPool,
|
||||
PxArray<PxU32>& newSoftBodyElememtIndexPool,
|
||||
PxPinnedArray<PxgSoftBody>& softBodyPool,
|
||||
PxArray<PxgSoftBodyData>& softBodyDataPool,
|
||||
PxInt32ArrayPinned& softBodyElementIndexPool,
|
||||
PxArray<PxU32>& softBodyNodeIndexPool,
|
||||
PxgBodySimManager& bodySimManager,
|
||||
SoftBodyAttachmentAndFilterData& data);
|
||||
|
||||
void gpuMemDmaUpFEMCloths(PxPinnedArray<PxgFEMCloth>& newFEMClothPool,
|
||||
PxU32* newTriangleMeshByteSizePool,
|
||||
PxArray<PxgFEMClothData>& newFEMClothDataPool,
|
||||
PxArray<PxU32>& newFEMClothNodeIndexPool,
|
||||
PxArray<PxU32>& newFEMClothElememtIndexPool,
|
||||
PxPinnedArray<PxgFEMCloth>& femClothPool,
|
||||
PxArray<PxgFEMClothData>& femClothDataPool,
|
||||
PxInt32ArrayPinned& femClothElementIndexPool,
|
||||
PxArray<PxU32>& femClothNodeIndexPool,
|
||||
PxgBodySimManager& bodySimManager,
|
||||
PxPinnedArray<PxgFEMRigidAttachment>& rigidAttachments,
|
||||
PxPinnedArray<PxgRigidFilterPair>& rigidAttachmentIds,
|
||||
bool dirtyRigidAttachments,
|
||||
PxInt32ArrayPinned& activeRigidAttachments,
|
||||
bool dirtyActiveRigidAttachments,
|
||||
PxPinnedArray<PxgFEMFEMAttachment>& clothAttachments,
|
||||
PxPinnedArray<PxgNonRigidFilterPair>& clothVertTriFilterIds,
|
||||
bool dirtyClothAttachments,
|
||||
PxInt32ArrayPinned& activeClothAttachments,
|
||||
bool dirtyActiveClothAttachments
|
||||
);
|
||||
|
||||
void gpuMemDmaUpParticleSystem(PxgBodySimManager& bodySimManager);
|
||||
|
||||
void mergeChangedAABBMgHandle();
|
||||
|
||||
void gpuMemDmaUp(const PxU32 nbTotalBodies, const PxU32 nbTotalShapes,
|
||||
PxBitMapPinned& changedHandleMap, const bool enableDirectGPUAPI);
|
||||
void gpuMemDmaBack(PxInt32ArrayPinned& frozenArray,
|
||||
PxInt32ArrayPinned& unfrozenArray,
|
||||
PxInt32ArrayPinned& activateArray,
|
||||
PxInt32ArrayPinned& deactiveArray,
|
||||
PxCachedTransformArrayPinned* cachedTransform,
|
||||
const PxU32 cachedCapacity,
|
||||
Bp::BoundsArray& boundArray, PxBitMapPinned& changedAABBMgrHandles,
|
||||
const PxU32 numShapes, const PxU32 numActiveBodies, bool enableDirectGPUAPI);
|
||||
|
||||
void syncDmaback(PxU32& nbFrozenShapesThisFrame, PxU32& nbUnfrozenShapesThisFrame, bool didSimulate);
|
||||
|
||||
void updateBodies(const PxU32 nbUpdatedBodies, const PxU32 nbNewBodies);
|
||||
|
||||
void updateArticulations(const PxU32 nbNewArticulations, PxgArticulationSimUpdate* updates,
|
||||
const PxU32 nbUpdatedArticulations, PxReal* dofData);
|
||||
|
||||
void updateJointsAndSyncData(const PxPinnedArray<PxgD6JointData>& rigidJointData,
|
||||
const PxInt32ArrayPinned& dirtyRigidJointIndices,
|
||||
const PxPinnedArray<PxgD6JointData>& artiJointData,
|
||||
const PxInt32ArrayPinned& dirtyArtiJointIndices,
|
||||
const PxPinnedArray<PxgConstraintPrePrep>& rigidJointPrePrep,
|
||||
const PxPinnedArray<PxgConstraintPrePrep>& artiJointPrePrep,
|
||||
const PxgJointManager::ConstraintIdMap& gpuConstraintIdMapHost,
|
||||
bool isGpuConstraintIdMapDirty,
|
||||
PxU32 nbTotalRigidJoints, PxU32 nbTotalArtiJoints);
|
||||
|
||||
void update(bool enableDirectGPUAPI);
|
||||
|
||||
void setBounds(Bp::BoundsArray* boundArray);
|
||||
|
||||
PxgArticulationBuffer** getArticulationDataBuffer() { return mArticulationDataBuffer.begin(); }
|
||||
PxgTypedCudaBuffer<PxBounds3>* getBoundArrayBuffer();
|
||||
|
||||
void gpuDmaUpdateData();
|
||||
|
||||
bool getRigidDynamicData(void* data, const PxRigidDynamicGPUIndex* gpuIndices, PxRigidDynamicGPUAPIReadType::Enum dataType, PxU32 nbElements, float oneOverDt, CUevent startEvent, CUevent finishEvent) const;
|
||||
bool setRigidDynamicData(const void* data, const PxRigidDynamicGPUIndex* gpuIndices, PxRigidDynamicGPUAPIWriteType::Enum dataType, PxU32 nbElements, CUevent startEvent, CUevent finishEvent);
|
||||
|
||||
void setSoftBodyWakeCounter(const PxU32 remapId, const PxReal wakeCounter, const PxU32 numSoftBodies);
|
||||
void setFEMClothWakeCounter(const PxU32 remapId, const PxReal wakeCounter, const PxU32 numClothes);
|
||||
|
||||
// PT: wrappers to make it easier to find the places where this is used.
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxgBodySim> getBodySimBufferDevicePtr() const { return mBodySimCudaBuffer.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxgBodySim> getBodySimBufferDeviceData() { return mBodySimCudaBuffer.getTypedDevicePtr(); }
|
||||
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxgBodySimVelocities> getBodySimPrevVelocitiesBufferDevicePtr() const { return mBodySimPreviousVelocitiesCudaBuffer.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxgBodySimVelocities> getBodySimPrevVelocitiesBufferDeviceData() { return mBodySimPreviousVelocitiesCudaBuffer.getTypedDevicePtr(); }
|
||||
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgArticulation>& getArticulationBuffer() { return mArticulationBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgSolverBodySleepData>& getArticulationSleepDataBuffer() { return mArticulationSleepDataBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgArticulationBlockData>& getArticulationBatchData() { return mArticulationBatchBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgArticulationBlockLinkData>& getArticulationBatchLinkData() { return mArticulationLinkBatchBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgArticulationTraversalStackData>& getArticulationTraversalStackData() { return mArticulationTraversalStackBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgArticulationBitFieldStackData>& getTempPathToRootBitFieldStackData() { return mTempPathToRootBitFieldStackBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgArticulationBitFieldStackData>& getTempSharedBitFieldStackData() { return mTempSharedBitFieldStackBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgArticulationBitFieldStackData>& getTempRootBitFieldStackData() { return mTempRootBitFieldStackBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgArticulationBitFieldData>& getPathToRootBitFieldStackData() { return mPathToRootBitFieldStackBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgArticulationBlockDofData>& getArticulationBatchDofData() { return mArticulationDofBatchBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgArticulationBlockMimicJointData>& getArticulationBatchMimicJointData() { return mArticulationMimicJointBatchBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgArticulationBlockSpatialTendonData>& getArticulationBatchSpatialTendonData() { return mArticulationSpatialTendonBatchBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgArticulationBlockAttachmentData>& getArticulationBatchAttachmentData() { return mArticulationAttachmentBatchBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgArticulationInternalTendonConstraintData>& getArticulationBatchSpatialTendonConstraintData() { return mArticulationSpatialTendonConstraintsBatchBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgArticulationBlockFixedTendonData>& getArticulationBatchFixedTendonData() { return mArticulationFixedTendonBatchBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgArticulationBlockTendonJointData>& getArticulationBatchTendonJointData() { return mArticulationTendonJointBatchBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgArticulationInternalTendonConstraintData>& getArticulationBatchFixedTendonConstraintData() { return mArticulationFixedTendonConstraintsBatchBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgSoftBody>& getSoftBodyBuffer() { return mSoftBodyBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxU32>& getActiveSoftBodyBuffer() { return mActiveSoftBodyBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxU32>& getActiveSelfCollisionSoftBodyBuffer() { return mActiveSelfCollisionSoftBodyBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxU32>& getSoftBodyElementIndexBuffer() { return mSoftBodyElementIndexBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgFEMCloth>& getFEMClothBuffer() { return mFEMClothBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxU32>& getActiveFEMClothBuffer() { return mActiveFEMClothBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxU32>& getFEMClothElementIndexBuffer() { return mFEMClothElementIndexBuffer; }
|
||||
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgD6JointData>& getD6RigidJointBuffer() { return mRigidJointBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgConstraintPrePrep>& getD6RigidJointPrePreBuffer() { return mRigidJointPrePrepBuffer; }
|
||||
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgD6JointData>& getD6ArtiJointBuffer() { return mArtiJointBuffer; }
|
||||
PX_FORCE_INLINE PxgTypedCudaBuffer<PxgConstraintPrePrep>& getD6ArtiJointPrePreBuffer() { return mArtiJointPrePrepBuffer; }
|
||||
PX_FORCE_INLINE CUstream getStream() { return mStream; }
|
||||
|
||||
bool getD6JointData(void* data, const PxD6JointGPUIndex* gpuIndices, PxD6JointGPUAPIReadType::Enum dataType, PxU32 nbElements, PxF32 oneOverDt,
|
||||
PxU32 directGpuApiIndexMapHostSize, CUevent startEvent, CUevent finishEvent) const;
|
||||
|
||||
|
||||
//soft body
|
||||
PX_FORCE_INLINE PxU32 getMaxTetraVerts() { return mMaxTetraVerts; }
|
||||
PX_FORCE_INLINE PxU32 getMaxTetrahedrons() { return mMaxTetrahedrons; }
|
||||
|
||||
PX_FORCE_INLINE PxU32 getGMMaxTetraPartitions() { return mGMMaxPartitions; }
|
||||
PX_FORCE_INLINE PxU32 getGMMaxTetraVerts() { return mGMMaxTetraVerts; }
|
||||
PX_FORCE_INLINE PxU32 getGMMaxTetrahedrons() { return mGMMaxTetrahedrons; }
|
||||
PX_FORCE_INLINE PxU32 getGMMaxTetrahedronsPerPartition() { return mGMMaxTetrahedronsPerPartition; }
|
||||
PX_FORCE_INLINE PxU32 getGMMaxJacobiTetrahedrons() { return mGMMaxJacobiTetrahedrons; }
|
||||
PX_FORCE_INLINE PxU32 getGMMaxJacobiVertices() { return mGMMaxJacobiVertices; }
|
||||
PX_FORCE_INLINE bool getGMUsePartitionAveraging() { return mUsePartitionAveraging; }
|
||||
|
||||
// FEM-cloth
|
||||
PX_FORCE_INLINE PxU32 getMaxClothVerts() { return mMaxNbClothVerts; }
|
||||
PX_FORCE_INLINE PxU32 getMaxClothTriangles() { return mMaxNbClothTriangles; }
|
||||
PX_FORCE_INLINE PxU32 getMaxClothTrianglesWithActiveEdges() { return mMaxNbClothTrianglesWithActiveEdges; }
|
||||
|
||||
PX_FORCE_INLINE PxU32 getMaxNonSharedTrianglePartitions() { return mMaxNbNonSharedTriPartitions; }
|
||||
PX_FORCE_INLINE PxU32 getMaxNonSharedTrianglesPerPartition() { return mMaxNbNonSharedTrianglesPerPartition; }
|
||||
|
||||
PX_FORCE_INLINE PxU32 getMaxNbNonSharedTriangles() { return mMaxNbNonSharedTriangles; }
|
||||
|
||||
PX_FORCE_INLINE PxU32 getMaxSharedTrianglePairPartitions() { return mMaxNbSharedTriPairPartitions; }
|
||||
PX_FORCE_INLINE PxU32 getMaxNonSharedTrianglePairPartitions() { return mMaxNbNonSharedTriPairPartitions; }
|
||||
|
||||
PX_FORCE_INLINE PxU32 getMaxNonSharedTriangleClusterId() { return mMaxNonSharedTriClusterId; }
|
||||
PX_FORCE_INLINE PxU32 getMaxSharedTrianglePairClusterId() { return mMaxSharedTriPairClusterId; }
|
||||
PX_FORCE_INLINE PxU32 getMaxNonSharedTrianglePairClusterId() { return mMaxNonSharedTriPairClusterId; }
|
||||
|
||||
PX_FORCE_INLINE PxU32 getMaxSharedTrianglePairsPerPartition() { return mMaxNbSharedTrianglePairsPerPartition; }
|
||||
PX_FORCE_INLINE PxU32 getMaxNonSharedTrianglePairsPerPartition() { return mMaxNbNonSharedTrianglePairsPerPartition; }
|
||||
|
||||
PX_FORCE_INLINE PxU32 getMaxNbSharedTrianglePairs() { return mMaxNbSharedTrianglePairs; }
|
||||
PX_FORCE_INLINE PxU32 getMaxNbNonSharedTrianglePairs() { return mMaxNbNonSharedTrianglePairs; }
|
||||
|
||||
PX_FORCE_INLINE bool hasActiveBendingPairs() { return mHasActiveBendingPairs; }
|
||||
|
||||
PX_FORCE_INLINE PxU32 getMaxNbCollisionPairUpdatesPerTimestep() { return mMaxNbCollisionPairUpdatesPerTimestep; }
|
||||
PX_FORCE_INLINE PxU32 getMaxNbCollisionSubsteps() { return mMaxNbCollisionSubsteps; }
|
||||
PX_FORCE_INLINE PxU32 getNumTotalFEMCloths() { return mNbTotalFEMCloths; }
|
||||
|
||||
PX_FORCE_INLINE PxU32 getNumTotalShapes() { return mPxgShapeSimManager.getNbTotalShapeSims(); }
|
||||
PX_FORCE_INLINE PxU32 getNumTotalSoftbodies() { return mNbTotalSoftBodies; }
|
||||
|
||||
PX_FORCE_INLINE PxU32 getNbRigidSoftBodyAttachments() const { return mNbRigidSoftBodyAttachments; }
|
||||
PX_FORCE_INLINE PxU32 getNbRigidSoftBodyFilters() const { return mNbRigidSoftBodyFilters; }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxgFEMRigidAttachment> getRigidSoftBodyAttachments() { return mSoftBodyRigidAttachments.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxU32> getActiveRigidSoftBodyAttachments() { return mActiveSoftBodyRigidConstraints.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxgFEMRigidAttachmentConstraint> getSoftBodyRigidConstraints() { return mSoftBodyRigidConstraints.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxU32> getGpuSoftBodyRigidCounter() { return mNumSoftBodyRigidAttachments.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxNodeIndex> getSoftBodyRigidAttachmentIds() { return mSoftBodyRigidAttachmentIds.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxgRigidFilterPair> getRigidSoftBodyFilters() { return mSoftBodyRigidFilterPairs.getTypedDevicePtr(); }
|
||||
|
||||
PX_FORCE_INLINE PxU32 getNbSoftBodySoftBodyAttachments() const { return mNbSoftBodySoftBodyAttachments; }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxgFEMFEMAttachment> getSoftBodySoftBodyAttachments() { return mSoftBodySoftBodyAttachments.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxU32> getActiveSoftBodySoftAttachments() { return mActiveSoftBodySoftBodyConstraints.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxgFEMFEMAttachmentConstraint> getSoftBodySoftBodyConstraints() { return mSoftBodySoftBodyConstraints.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxU32> getGpuSoftBodySoftBodyCounter() { return mNumSoftBodySoftBodyAttachments.getTypedDevicePtr(); }
|
||||
|
||||
PX_FORCE_INLINE PxU32 getNbClothSoftBodyAttachments() const { return mNbClothSoftBodyAttachments; }
|
||||
PX_FORCE_INLINE PxU32 getNbClothSoftBodyFilters() const { return mNbClothSoftBodyFilters; }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxgFEMFEMAttachment> getClothSoftBodyAttachments() { return mSoftBodyClothAttachments.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxU32> getActiveClothSoftBodyAttachments() { return mActiveSoftBodyClothConstraints.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxgFEMFEMAttachmentConstraint> getSoftBodyClothConstraints() { return mSoftBodyClothConstraints.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxU32> getGpuSoftBodyClothCounter() { return mNumSoftBodyClothAttachments.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxgNonRigidFilterPair> getClothSoftBodyFilters() { return mSoftBodyClothFilterPairs.getTypedDevicePtr(); }
|
||||
|
||||
|
||||
PX_FORCE_INLINE PxU32 getNbSoftBodyParticleAttachments() const { return mNbSoftBodyParticleAttachments; }
|
||||
PX_FORCE_INLINE PxU32 getNbSoftBodyParticleFilters() const { return mNbSoftBodyParticleFilters; }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxgFEMFEMAttachment> getSoftBodyParticleAttachments() { return mSoftBodyParticleAttachments.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxU32> getActiveSoftBodyParticleAttachments() { return mActiveSoftBodyParticleConstraints.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxgFEMFEMAttachmentConstraint> getSoftBodyParticleConstraints() { return mSoftBodyParticleConstraints.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxgNonRigidFilterPair> getSoftBodyParticleFilters() { return mSoftBodyParticleFilterPairs.getTypedDevicePtr(); }
|
||||
|
||||
|
||||
PX_FORCE_INLINE PxU32 getNbActiveRigidClothAttachments() const { return mNbRigidClothAttachments; }
|
||||
PX_FORCE_INLINE PxU32 getNbRigidClothFilters() const { return mNbRigidClothFilters; }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxgFEMRigidAttachment> getRigidClothAttachments() { return mClothRigidAttachments.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxU32> getActiveRigidClothAttachments() { return mActiveClothRigidAttachments.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxgFEMRigidAttachmentConstraint> getClothRigidConstraints() { return mClothRigidConstraints.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxU32> getGpuClothRigidCounter() { return mNumClothRigidAttachments.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxNodeIndex> getClothRigidAttachmentIds() { return mClothRigidAttachmentIds.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxgRigidFilterPair> getRigidClothFilters() { return mClothRigidFilterPairs.getTypedDevicePtr(); }
|
||||
|
||||
|
||||
PX_FORCE_INLINE PxU32 getNbActiveClothClothAttachments() const { return mNbClothClothAttachments; }
|
||||
PX_FORCE_INLINE PxU32 getNbClothClothVertTriFilters() const { return mNbClothClothVertTriFilters; }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxgFEMFEMAttachment> getClothClothAttachments() { return mClothClothAttachments.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxU32> getActiveClothClothAttachments() { return mActiveClothClothAttachments.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxgFEMFEMAttachmentConstraint> getClothClothConstraints() { return mClothClothConstraints.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxU32> getGpuClothClothCounter() { return mNumClothClothAttachments.getTypedDevicePtr(); }
|
||||
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxgNonRigidFilterPair> getClothClothVertTriFilters() { return mClothClothVertTriFilterPairs.getTypedDevicePtr(); }
|
||||
|
||||
PX_FORCE_INLINE PxBitMapPinned& getActiveClothStateChangedMap() { return mActiveFEMClothStateChangedMap; }
|
||||
PX_FORCE_INLINE PxReal* getActiveClothWakeCountsCPU() { return mFEMClothWakeCounts.begin(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxReal> getActiveClothWakeCountsGPU() { return mFEMClothWakeCountsGPU.getTypedDevicePtr(); }
|
||||
|
||||
PX_FORCE_INLINE PxBitMapPinned& getActiveSBStateChangedMap() { return mActiveSBStateChangedMap; }
|
||||
PX_FORCE_INLINE const PxReal* getActiveSBWakeCountsCPU() const { return mSBWakeCounts.begin(); }
|
||||
PX_FORCE_INLINE const PxgDevicePointer<PxReal> getActiveSBWakeCountsGPU() const { return mSBWakeCountsGPU.getTypedDevicePtr(); }
|
||||
PX_FORCE_INLINE PxReal* getActiveSBWakeCountsCPU() { return mSBWakeCounts.begin(); }
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxReal> getActiveSBWakeCountsGPU() { return mSBWakeCountsGPU.getTypedDevicePtr(); }
|
||||
|
||||
PX_FORCE_INLINE PxgDevicePointer<PxgSimulationCoreDesc> getSimulationCoreDesc() { return mUpdatedCacheAndBoundsDescBuffer.getTypedDevicePtr(); }
|
||||
|
||||
PxU32 getMaxArticulationLinks() const;
|
||||
PxU32 getMaxArticulationDofs() const;
|
||||
PxU32 getMaxArticulationMimicJoints() const;
|
||||
PxU32 getMaxArticuationSpatialTendons() const;
|
||||
PxU32 getMaxArticuationAttachments() const;
|
||||
PxU32 getMaxArticuationFixedTendons() const;
|
||||
PxU32 getMaxArticuationTendonJoints() const;
|
||||
|
||||
PxgGpuContext* mGpuContext;
|
||||
PxgCudaKernelWranglerManager* mGpuKernelWranglerManager;
|
||||
PxCudaContextManager* mCudaContextManager;
|
||||
PxCudaContext* mCudaContext;
|
||||
PxgHeapMemoryAllocatorManager* mHeapMemoryManager;
|
||||
Bp::BoundsArray* mBoundArray;
|
||||
bool mUseGpuBp;
|
||||
|
||||
private:
|
||||
|
||||
void constructDescriptor(CUdeviceptr boundsd, CUdeviceptr changedAABBMgrHandlesd, const PxU32 nbTotalShapes, const PxU32 bitMapWordCounts);
|
||||
void createGpuStreamsAndEvents();
|
||||
void releaseGpuStreamsAndEvents();
|
||||
void syncData();
|
||||
|
||||
PxgSimulationCoreDesc* mUpdatedCacheAndBoundsDesc;
|
||||
PxgNewBodiesDesc* mNewBodiesDesc;
|
||||
PxgUpdateArticulationDesc* mUpdateArticulationDesc;
|
||||
PxgUpdatedBodiesDesc* mUpdatedBodiesDesc;
|
||||
PxgUpdatedJointsDesc* mUpdatedJointsDesc;
|
||||
PxgUpdateActorDataDesc* mUpdatedActorDataDesc;
|
||||
|
||||
PxgTypedCudaBuffer<PxU32> mFrozenBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mUnfrozenBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mFrozenBlockAndResBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mUnfrozenBlockAndResBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mUpdatedBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mActivateBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mDeactivateBuffer;
|
||||
|
||||
PxgTypedCudaBuffer<PxU32> mUpdatedDirectBuffer;
|
||||
|
||||
// PT: new naming convention with "CudaBuffer" suffix and specific prefix for easier searching
|
||||
PxgTypedCudaBuffer<PxgBodySim> mBodySimCudaBuffer; // PT: contains PxgBodySim structs.
|
||||
PxgTypedCudaBuffer<PxgBodySimVelocities> mBodySimPreviousVelocitiesCudaBuffer; // PT: contains PxgBodySimVelocities structs. Only for direct GPU acceleration getters.
|
||||
|
||||
public:
|
||||
PxgShapeSimManager mPxgShapeSimManager;
|
||||
private:
|
||||
PxgTypedCudaBuffer<PxgArticulation> mArticulationBuffer; //persistent buffer for articulation
|
||||
PxgTypedCudaBuffer<PxgSolverBodySleepData> mArticulationSleepDataBuffer; //persistent buffer for sleepData
|
||||
PxgTypedCudaBuffer<PxgArticulationBlockData> mArticulationBatchBuffer;
|
||||
PxgTypedCudaBuffer<PxgArticulationBlockLinkData> mArticulationLinkBatchBuffer;
|
||||
PxgTypedCudaBuffer<PxgArticulationTraversalStackData> mArticulationTraversalStackBuffer;
|
||||
PxgTypedCudaBuffer<PxgArticulationBitFieldStackData> mTempPathToRootBitFieldStackBuffer;
|
||||
PxgTypedCudaBuffer<PxgArticulationBitFieldStackData> mTempSharedBitFieldStackBuffer;
|
||||
PxgTypedCudaBuffer<PxgArticulationBitFieldStackData> mTempRootBitFieldStackBuffer;
|
||||
PxgTypedCudaBuffer<PxgArticulationBitFieldData> mPathToRootBitFieldStackBuffer;
|
||||
PxgTypedCudaBuffer<PxgArticulationBlockDofData> mArticulationDofBatchBuffer;
|
||||
|
||||
PxgTypedCudaBuffer<PxgArticulationBlockMimicJointData> mArticulationMimicJointBatchBuffer;
|
||||
|
||||
PxgTypedCudaBuffer<PxgArticulationBlockSpatialTendonData> mArticulationSpatialTendonBatchBuffer;
|
||||
PxgTypedCudaBuffer<PxgArticulationInternalTendonConstraintData> mArticulationSpatialTendonConstraintsBatchBuffer;
|
||||
PxgTypedCudaBuffer<PxgArticulationBlockAttachmentData> mArticulationAttachmentBatchBuffer;
|
||||
|
||||
PxgTypedCudaBuffer<PxgArticulationBlockFixedTendonData> mArticulationFixedTendonBatchBuffer;
|
||||
PxgTypedCudaBuffer<PxgArticulationInternalTendonConstraintData> mArticulationFixedTendonConstraintsBatchBuffer;
|
||||
PxgTypedCudaBuffer<PxgArticulationBlockTendonJointData> mArticulationTendonJointBatchBuffer;
|
||||
|
||||
PxArray<PxgArticulationBuffer*> mArticulationDataBuffer;// persistent data, map with mArticulationBuffer
|
||||
//PxU32 mMaxLinks;
|
||||
//PxU32 mMaxDofs;
|
||||
|
||||
PxgTypedCudaBuffer<PxgSoftBody> mSoftBodyBuffer; //persistent buffer for soft bodies
|
||||
PxgTypedCudaBuffer<PxU32> mActiveSoftBodyBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mActiveSelfCollisionSoftBodyBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mSoftBodyElementIndexBuffer;
|
||||
PxgTypedCudaBuffer<PxgFEMCloth> mFEMClothBuffer; // persistent buffer for FEM-cloth
|
||||
PxgTypedCudaBuffer<PxU32> mActiveFEMClothBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mFEMClothElementIndexBuffer;
|
||||
|
||||
PxArray<PxgSoftBodyBuffer*> mSoftBodyDataBuffer; //persistent data, map with mSoftBodyBuffer
|
||||
PxArray<PxgFEMClothBuffer*> mFEMClothDataBuffer; //persistent data, map with mFEMClothBuffer
|
||||
|
||||
PxBitMapPinned mActiveFEMClothStateChangedMap;
|
||||
PxFloatArrayPinned mFEMClothWakeCounts;
|
||||
PxgTypedCudaBuffer<PxReal> mFEMClothWakeCountsGPU;
|
||||
|
||||
PxBitMapPinned mActiveSBStateChangedMap;
|
||||
PxFloatArrayPinned mSBWakeCounts;
|
||||
PxgTypedCudaBuffer<PxReal> mSBWakeCountsGPU;
|
||||
|
||||
PxgTypedCudaBuffer<PxgD6JointData> mRigidJointBuffer;
|
||||
PxgTypedCudaBuffer<PxgD6JointData> mArtiJointBuffer;
|
||||
|
||||
PxgTypedCudaBuffer<PxgConstraintPrePrep> mRigidJointPrePrepBuffer;
|
||||
PxgTypedCudaBuffer<PxgConstraintPrePrep> mArtiJointPrePrepBuffer;
|
||||
|
||||
PxgTypedCudaBuffer<PxgConstraintIdMapEntry> mGpuConstraintIdMapDevice;
|
||||
// See PxgJointManager::mGpuConstraintIdMapHost. This is just the device buffer counterpart.
|
||||
|
||||
PxgTypedCudaBuffer<PxgBodySimVelocityUpdate> mUpdatedBodySimBuffer;
|
||||
PxgTypedCudaBuffer<PxgBodySim> mNewBodySimBuffer;
|
||||
PxgTypedCudaBuffer<PxgArticulation> mNewArticulationBuffer;
|
||||
PxgTypedCudaBuffer<PxgArticulationLink> mNewLinkBuffer;
|
||||
PxgTypedCudaBuffer<PxReal> mNewLinkWakeCounterBuffer;
|
||||
PxgTypedCudaBuffer<Cm::UnAlignedSpatialVector> mNewLinkExtAccelBuffer;
|
||||
PxgTypedCudaBuffer<PxgArticulationLinkProp> mNewLinkPropBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mNewLinkParentBuffer;
|
||||
PxgTypedCudaBuffer<ArticulationBitField> mNewLinkChildBuffer;
|
||||
PxgTypedCudaBuffer<PxTransform> mNewLinkBody2WorldsBuffer;
|
||||
PxgTypedCudaBuffer<PxTransform> mNewLinkBody2ActorsBuffer;
|
||||
PxgTypedCudaBuffer<Dy::ArticulationJointCore> mNewJointCoreBuffer;
|
||||
PxgTypedCudaBuffer<Dy::ArticulationJointCoreData> mNewJointDataBuffer;
|
||||
PxgTypedCudaBuffer<PxgArticulationSimUpdate> mNewLinkIndexBuffer;
|
||||
PxgTypedCudaBuffer<PxGpuSpatialTendonData> mNewSpatialTendonParamsBuffer;
|
||||
PxgTypedCudaBuffer<PxgArticulationTendon> mNewSpatialTendonsBuffer;
|
||||
PxgTypedCudaBuffer<Dy::ArticulationMimicJointCore> mNewMimicJointBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mNewPathToRootBuffer;
|
||||
PxgTypedCudaBuffer<PxgArticulationTendonElementFixedData> mNewAttachmentFixedBuffer;
|
||||
PxgTypedCudaBuffer<PxGpuTendonAttachmentData> mNewAttachmentModBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mNewTendonAttachmentRemapBuffer;
|
||||
PxgTypedCudaBuffer<PxGpuFixedTendonData> mNewFixedTendonParamsBuffer;
|
||||
PxgTypedCudaBuffer<PxgArticulationTendon> mNewFixedTendonsBuffer;
|
||||
PxgTypedCudaBuffer<PxgArticulationTendonElementFixedData> mNewTendonJointsFixedBuffer;
|
||||
PxgTypedCudaBuffer<PxGpuTendonJointCoefficientData> mNewTendonJointsCoefficientBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mNewTendonTendonJointRemapBuffer;
|
||||
|
||||
PxgTypedCudaBuffer<PxU32> mActiveNodeIndices; //this is index mapping between solver body data index and the node index
|
||||
|
||||
PxgTypedCudaBuffer<PxgSimulationCoreDesc> mUpdatedCacheAndBoundsDescBuffer;
|
||||
PxgTypedCudaBuffer<PxgNewBodiesDesc> mBodiesDescBuffer;
|
||||
PxgTypedCudaBuffer<PxgUpdateArticulationDesc> mArticulationDescBuffer;
|
||||
PxgTypedCudaBuffer<PxgUpdatedBodiesDesc> mUpdatedBodiesDescBuffer;
|
||||
PxgTypedCudaBuffer<PxgUpdatedJointsDesc> mUpdatedJointDescBuffer;
|
||||
PxgTypedCudaBuffer<PxgUpdateActorDataDesc> mUpdatedActorDescBuffer;
|
||||
|
||||
PxgTypedCudaBuffer<PxBounds3> mBoundsBuffer; //Bp in CPU so we can't use the BoundsBuffer in the GPU BP
|
||||
PxgTypedCudaBuffer<PxU32> mChangedAABBMgrHandlesBuffer; //Bp in CPU so we can't use the changedAABBMgrHandlesBuffer in the GPU AABBManager
|
||||
|
||||
PxgTypedCudaBuffer<PxgFEMRigidAttachment> mSoftBodyRigidAttachments;
|
||||
PxgTypedCudaBuffer<PxgRigidFilterPair> mSoftBodyRigidFilterPairs;
|
||||
PxgTypedCudaBuffer<PxgFEMRigidAttachmentConstraint> mSoftBodyRigidConstraints;
|
||||
PxgTypedCudaBuffer<PxU32> mActiveSoftBodyRigidConstraints;
|
||||
PxgTypedCudaBuffer<PxU32> mNumSoftBodyRigidAttachments;
|
||||
PxgTypedCudaBuffer<PxNodeIndex> mSoftBodyRigidAttachmentIds;
|
||||
|
||||
|
||||
PxgTypedCudaBuffer<PxgFEMFEMAttachment> mSoftBodySoftBodyAttachments;
|
||||
PxgTypedCudaBuffer<PxgFEMFEMAttachmentConstraint> mSoftBodySoftBodyConstraints;
|
||||
PxgTypedCudaBuffer<PxU32> mActiveSoftBodySoftBodyConstraints;
|
||||
PxgTypedCudaBuffer<PxU32> mNumSoftBodySoftBodyAttachments;
|
||||
|
||||
PxgTypedCudaBuffer<PxgFEMFEMAttachment> mSoftBodyClothAttachments;
|
||||
PxgTypedCudaBuffer<PxgNonRigidFilterPair> mSoftBodyClothFilterPairs;
|
||||
PxgTypedCudaBuffer<PxgFEMFEMAttachmentConstraint> mSoftBodyClothConstraints;
|
||||
PxgTypedCudaBuffer<PxU32> mActiveSoftBodyClothConstraints;
|
||||
PxgTypedCudaBuffer<PxU32> mNumSoftBodyClothAttachments;
|
||||
|
||||
PxgTypedCudaBuffer<PxgFEMFEMAttachment> mSoftBodyParticleAttachments;
|
||||
PxgTypedCudaBuffer<PxgNonRigidFilterPair> mSoftBodyParticleFilterPairs;
|
||||
PxgTypedCudaBuffer<PxgFEMFEMAttachmentConstraint> mSoftBodyParticleConstraints;
|
||||
PxgTypedCudaBuffer<PxU32> mActiveSoftBodyParticleConstraints;
|
||||
|
||||
PxgTypedCudaBuffer<PxgFEMRigidAttachment> mClothRigidAttachments;
|
||||
PxgTypedCudaBuffer<PxU32> mActiveClothRigidAttachments;
|
||||
PxgTypedCudaBuffer<PxgRigidFilterPair> mClothRigidFilterPairs;
|
||||
PxgTypedCudaBuffer<PxgFEMRigidAttachmentConstraint> mClothRigidConstraints;
|
||||
PxgTypedCudaBuffer<PxU32> mNumClothRigidAttachments;
|
||||
PxgTypedCudaBuffer<PxNodeIndex> mClothRigidAttachmentIds;
|
||||
|
||||
PxgTypedCudaBuffer<PxgFEMFEMAttachment> mClothClothAttachments;
|
||||
PxgTypedCudaBuffer<PxU32> mActiveClothClothAttachments;
|
||||
PxgTypedCudaBuffer<PxgNonRigidFilterPair> mClothClothVertTriFilterPairs;
|
||||
PxgTypedCudaBuffer<PxgFEMFEMAttachmentConstraint> mClothClothConstraints;
|
||||
PxgTypedCudaBuffer<PxU32> mNumClothClothAttachments;
|
||||
|
||||
PxU32* mPinnedEvent;
|
||||
|
||||
PxU32 mNbRigidSoftBodyAttachments;
|
||||
PxU32 mNbRigidSoftBodyFilters;
|
||||
|
||||
PxU32 mNbSoftBodySoftBodyAttachments;
|
||||
|
||||
PxU32 mNbClothSoftBodyAttachments;
|
||||
PxU32 mNbClothSoftBodyFilters;
|
||||
|
||||
PxU32 mNbSoftBodyParticleAttachments;
|
||||
PxU32 mNbSoftBodyParticleFilters;
|
||||
|
||||
PxU32 mNbRigidClothAttachments;
|
||||
PxU32 mNbRigidClothFilters;
|
||||
|
||||
PxU32 mNbClothClothAttachments;
|
||||
PxU32 mNbClothClothVertTriFilters;
|
||||
|
||||
PxU32 mNbTotalBodySim;
|
||||
PxU32 mNbTotalArticulations; //this is used for articulation
|
||||
PxU32 mNbTotalSoftBodies;
|
||||
PxU32 mNbTotalFEMCloths;
|
||||
|
||||
PxU32 mMaxTetraVerts; //max number of verts for all the tetrahedron mesh
|
||||
PxU32 mMaxTetrahedrons;
|
||||
|
||||
PxU32 mGMMaxPartitions;
|
||||
PxU32 mGMMaxTetraVerts;
|
||||
PxU32 mGMMaxTetrahedrons;
|
||||
PxU32 mGMMaxTetrahedronsPerPartition;
|
||||
PxU32 mGMMaxJacobiTetrahedrons;
|
||||
PxU32 mGMMaxJacobiVertices;
|
||||
|
||||
PxU32 mMaxNbClothVerts;
|
||||
PxU32 mMaxNbClothTriangles;
|
||||
PxU32 mMaxNbClothTrianglesWithActiveEdges;
|
||||
PxU32 mMaxNbNonSharedTriangles;
|
||||
|
||||
PxU32 mMaxNbNonSharedTriPartitions;
|
||||
PxU32 mMaxNbNonSharedTrianglesPerPartition;
|
||||
|
||||
PxU32 mMaxNbSharedTrianglePairs;
|
||||
PxU32 mMaxNbNonSharedTrianglePairs;
|
||||
|
||||
PxU32 mMaxNbSharedTriPairPartitions;
|
||||
PxU32 mMaxNbNonSharedTriPairPartitions;
|
||||
|
||||
PxU32 mMaxNonSharedTriClusterId;
|
||||
PxU32 mMaxSharedTriPairClusterId;
|
||||
PxU32 mMaxNonSharedTriPairClusterId;
|
||||
|
||||
PxU32 mMaxNbSharedTrianglePairsPerPartition;
|
||||
PxU32 mMaxNbNonSharedTrianglePairsPerPartition;
|
||||
PxU32 mMaxNbCollisionPairUpdatesPerTimestep;
|
||||
PxU32 mMaxNbCollisionSubsteps;
|
||||
|
||||
//PxU32 mMaxParticles;
|
||||
PxU32 mNbTotalRigidJoints;
|
||||
PxU32 mNbTotalArtiJoints;
|
||||
|
||||
bool mUsePartitionAveraging;
|
||||
bool mHasActiveBendingPairs;
|
||||
|
||||
CUstream mStream;
|
||||
CUevent mEvent;
|
||||
CUevent mDmaEvent;
|
||||
|
||||
PxVec3 mGravity;
|
||||
|
||||
PxArray<PxgSoftBody> mSoftBodiesToFree;
|
||||
PxArray<PxgFEMCloth> mClothsToFree;
|
||||
#if PX_SUPPORT_OMNI_PVD
|
||||
PxU64 getRigidBodyDataTypeElementSize(PxRigidDynamicGPUAPIWriteType::Enum dataType);
|
||||
void ovdRigidBodyCallback(const void* PX_RESTRICT data, const PxRigidDynamicGPUIndex* PX_RESTRICT gpuIndices, PxRigidDynamicGPUAPIWriteType::Enum dataType, PxU32 nbElements);
|
||||
|
||||
PxPinnedArray<PxU8> mOvdDataBuffer;
|
||||
PxPinnedArray<PxU8> mOvdIndexBuffer;
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
395
engine/third_party/physx/source/gpusimulationcontroller/include/PxgSimulationCoreDesc.h
vendored
Normal file
395
engine/third_party/physx/source/gpusimulationcontroller/include/PxgSimulationCoreDesc.h
vendored
Normal file
@@ -0,0 +1,395 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
|
||||
#ifndef PXG_SIMULATION_CORE_DESC_H
|
||||
#define PXG_SIMULATION_CORE_DESC_H
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "DyVArticulation.h"
|
||||
#include "PxDeformableSurface.h"
|
||||
#include "PxDeformableVolume.h"
|
||||
|
||||
#include <vector_types.h>
|
||||
|
||||
#define PXG_CHECK_BITSHIFT_32(lowmask, highmask, bitshift) \
|
||||
((1 << bitshift) - 1 == lowmask) && (0xffffffff >> bitshift == highmask)
|
||||
|
||||
#define PXG_BITSHIFT_TET_ID 20
|
||||
#define PXG_BITSHIFT_ELEMENT_ID 20
|
||||
|
||||
PX_COMPILE_TIME_ASSERT(PX_MAX_NB_DEFORMABLE_SURFACE_TRI == PX_MAX_NB_DEFORMABLE_SURFACE_VTX);
|
||||
PX_COMPILE_TIME_ASSERT(PXG_CHECK_BITSHIFT_32(PX_MAX_NB_DEFORMABLE_SURFACE_TRI, PX_MAX_NB_DEFORMABLE_SURFACE, PXG_BITSHIFT_ELEMENT_ID));
|
||||
PX_COMPILE_TIME_ASSERT(PXG_CHECK_BITSHIFT_32(PX_MAX_NB_DEFORMABLE_VOLUME_TET, PX_MAX_NB_DEFORMABLE_VOLUME, PXG_BITSHIFT_TET_ID));
|
||||
|
||||
namespace physx
|
||||
{
|
||||
struct PxgSolverBodySleepData;
|
||||
struct PxgShape;
|
||||
struct PxgBodySim;
|
||||
struct PxgShapeSim;
|
||||
struct PxgArticulationLink;
|
||||
struct PxgArticulationLinkProp;
|
||||
struct PxsCachedTransform;
|
||||
struct PxgBodySimVelocityUpdate;
|
||||
struct PxgD6JointData;
|
||||
struct PxgConstraintPrePrep;
|
||||
class PxgArticulation;
|
||||
class PxNodeIndex;
|
||||
class PxgArticulationTendon;
|
||||
class PxGpuSpatialTendonData;
|
||||
class PxGpuFixedTendonData;
|
||||
class PxGpuTendonAttachmentData;
|
||||
class PxgArticulationTendonElementFixedData;
|
||||
class PxGpuTendonJointCoefficientData;
|
||||
|
||||
namespace Dy
|
||||
{
|
||||
struct ArticulationJointCore;
|
||||
class ArticulationJointCoreData;
|
||||
}
|
||||
|
||||
struct PxgNewBodiesDesc
|
||||
{
|
||||
const PxgBodySim* mNewBodySim;
|
||||
PxgBodySim* mBodySimBufferDeviceData;
|
||||
PxU32 mNbNewBodies; //number of newly added bodies
|
||||
};
|
||||
|
||||
struct PX_ALIGN_PREFIX(16) PxgArticulationSimUpdate
|
||||
{
|
||||
PxU32 dirtyFlags; //What data is updated (ArticulationDirtyFlag)
|
||||
PxU32 articulationIndex; //which articulation on the GPU
|
||||
PxU32 linkStartIndex; //Where the link/joint data starts in memory
|
||||
PxU32 dofDataStartIndex; //Where the dof data starts in memory, e.g. jointVelocity, jointPosition, jointForce etc.
|
||||
|
||||
PxU32 spatialTendonStartIndex; //Where the spatial tendon starts in memory
|
||||
PxU32 spatialTendonAttachmentStartIndex; //Where the spatial tendon attachment starts in memory
|
||||
PxU32 fixedTendonStartIndex; //Where the fixed tendon starts in memory
|
||||
PxU32 fixedTendonJointStartIndex; //Where the fixed tendon joint starts in memory
|
||||
|
||||
PxU32 mimicJointStartIndex;
|
||||
PxU32 pathToRootIndex; //Where the pathToRoot array starts in memory
|
||||
|
||||
PxU8 userFlags; //(PxArticulationFlag)
|
||||
PxU8 pad[7];
|
||||
|
||||
}PX_ALIGN_SUFFIX(16);
|
||||
|
||||
PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 PxEncodeClothIndex(const PxU32 clothId, const PxU32 elementId)
|
||||
{
|
||||
// elementId can be PX_MAX_NB_DEFORMABLE_SURFACE_TRI/PX_MAX_NB_DEFORMABLE_SURFACE_VTX to get at mask for "any tri or vtx"
|
||||
PX_ASSERT(clothId < PX_MAX_NB_DEFORMABLE_SURFACE);
|
||||
PX_ASSERT(elementId <= PX_MAX_NB_DEFORMABLE_SURFACE_TRI);
|
||||
PX_ASSERT(elementId <= PX_MAX_NB_DEFORMABLE_SURFACE_VTX);
|
||||
return (clothId << PXG_BITSHIFT_ELEMENT_ID) | elementId;
|
||||
}
|
||||
|
||||
PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 PxEncodeSoftBodyIndex(const PxU32 softBodyId, const PxU32 tetId)
|
||||
{
|
||||
// tetId can be PX_MAX_NB_DEFORMABLE_VOLUME_TET to get at mask for "any tet"
|
||||
PX_ASSERT(softBodyId < PX_MAX_NB_DEFORMABLE_VOLUME);
|
||||
PX_ASSERT(tetId <= PX_MAX_NB_DEFORMABLE_VOLUME_TET);
|
||||
return (softBodyId << PXG_BITSHIFT_TET_ID) | tetId;
|
||||
}
|
||||
|
||||
PX_CUDA_CALLABLE PX_FORCE_INLINE PxU64 PxEncodeParticleIndex(const PxU32 particleSystemId, const PxU32 particleId)
|
||||
{
|
||||
PX_ASSERT(particleSystemId < 0xffffffff);
|
||||
PX_ASSERT(particleId < 0xffffffff);
|
||||
return (PxU64(particleSystemId) << 32ull) | particleId;
|
||||
}
|
||||
|
||||
PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 PxGetSoftBodyElementIndex(const PxU32 compressedId)
|
||||
{
|
||||
return PX_MAX_NB_DEFORMABLE_VOLUME_TET & compressedId;
|
||||
}
|
||||
|
||||
PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 PxGetSoftBodyId(const PxU32 compressedId)
|
||||
{
|
||||
return compressedId >> PXG_BITSHIFT_TET_ID;
|
||||
}
|
||||
|
||||
PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 PxGetClothElementIndex(const PxU32 compressedId)
|
||||
{
|
||||
return PX_MAX_NB_DEFORMABLE_SURFACE_TRI & compressedId;
|
||||
}
|
||||
|
||||
PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 PxGetClothId(const PxU32 compressedId)
|
||||
{
|
||||
return compressedId >> PXG_BITSHIFT_ELEMENT_ID;
|
||||
}
|
||||
|
||||
PX_CUDA_CALLABLE PX_FORCE_INLINE bool PxGetIsVertexType(const float4& baryOrType)
|
||||
{
|
||||
return (baryOrType.x == 0.0f && baryOrType.y == 0.0f && baryOrType.z == 0.0f && baryOrType.w == 0.0f);
|
||||
}
|
||||
|
||||
PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 PxGetParticleIndex(const PxU64 compressedId)
|
||||
{
|
||||
return (0xffffffff) & compressedId;
|
||||
}
|
||||
|
||||
PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 PxGetParticleSystemId(const PxU64 compressedId)
|
||||
{
|
||||
return compressedId >> 32;
|
||||
}
|
||||
|
||||
PX_ALIGN_PREFIX(16)
|
||||
class PxgRigidFilterPair
|
||||
{
|
||||
public:
|
||||
PX_CUDA_CALLABLE PxgRigidFilterPair() : index2(0)
|
||||
{}
|
||||
|
||||
PxU64 index0; //rigid body index will always in index0
|
||||
PxU64 index1;
|
||||
PxU32 index2;
|
||||
PxU32 pading[3];
|
||||
|
||||
PX_CUDA_CALLABLE PxI32 compare(const PxgRigidFilterPair& other) const
|
||||
{
|
||||
if (index0 < other.index0)
|
||||
return -1;
|
||||
if (index0 > other.index0)
|
||||
return 1;
|
||||
if (index1 < other.index1)
|
||||
return -1;
|
||||
if (index1 > other.index1)
|
||||
return 1;
|
||||
if (index2 < other.index2)
|
||||
return -1;
|
||||
if (index2 > other.index2)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
}PX_ALIGN_SUFFIX(16);
|
||||
|
||||
|
||||
struct PxgConeLimitParams
|
||||
{
|
||||
union
|
||||
{
|
||||
float4 low_high_limits;
|
||||
float4 low_high_angle;
|
||||
};
|
||||
|
||||
union
|
||||
{
|
||||
float4 axis_angle;
|
||||
float4 barycentric;
|
||||
};
|
||||
};
|
||||
|
||||
//KS - consider splitting into 2x structures - 16 bytes and 8 bytes!
|
||||
//soft body/cloth with rigid
|
||||
class PxgFEMRigidAttachment : public PxgRigidFilterPair
|
||||
{
|
||||
public:
|
||||
union
|
||||
{
|
||||
float4 localPose0;
|
||||
float4 baryOrType0;
|
||||
};
|
||||
|
||||
PxgConeLimitParams coneLimitParams;
|
||||
float4 baryOrType1;
|
||||
};
|
||||
|
||||
PX_ALIGN_PREFIX(16)
|
||||
struct PxgParticleRigidConstraint
|
||||
{
|
||||
float4 raXn0_biasW[32];
|
||||
float4 raXn1_biasW[32];
|
||||
float4 raXn2_biasW[32];
|
||||
float4 velMultiplierXYZ_invMassW[32];
|
||||
float4 low_high_limits[32];
|
||||
float4 axis_angle[32];
|
||||
PxU64 particleId[32];
|
||||
PxU64 rigidId[32];
|
||||
}PX_ALIGN_SUFFIX(16);
|
||||
|
||||
PX_ALIGN_PREFIX(16)
|
||||
class PxgNonRigidFilterPair
|
||||
{
|
||||
public:
|
||||
PX_CUDA_CALLABLE PxgNonRigidFilterPair() : index2(0) // : referenceCount(0)
|
||||
{}
|
||||
|
||||
PxU64 index0;
|
||||
PxU32 index1;
|
||||
PxU32 index2;
|
||||
|
||||
|
||||
PX_CUDA_CALLABLE PxI32 compare(const PxgNonRigidFilterPair& other) const
|
||||
{
|
||||
if (index0 < other.index0)
|
||||
return -1;
|
||||
if (index0 > other.index0)
|
||||
return 1;
|
||||
if (index1 < other.index1)
|
||||
return -1;
|
||||
if (index1 > other.index1)
|
||||
return 1;
|
||||
if (index2 < other.index2)
|
||||
return -1;
|
||||
if (index2 > other.index2)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
}PX_ALIGN_SUFFIX(16);
|
||||
|
||||
PX_ALIGN_PREFIX(16)
|
||||
class PxgFEMFEMAttachment
|
||||
{
|
||||
public:
|
||||
PxgConeLimitParams coneLimitParams;
|
||||
float4 barycentricCoordinates0;
|
||||
float4 barycentricCoordinates1;
|
||||
PxU64 index0;
|
||||
PxU32 index1;
|
||||
PxReal constraintOffset;
|
||||
|
||||
}PX_ALIGN_SUFFIX(16);
|
||||
|
||||
PX_COMPILE_TIME_ASSERT(sizeof(PxgFEMFEMAttachment) % 16 == 0);
|
||||
|
||||
struct PxgUpdateArticulationDesc
|
||||
{
|
||||
PxgArticulation* mNewArticulations;
|
||||
PxReal* mNewLinkWakeCounters;
|
||||
PxgArticulationLink* mNewLinks;
|
||||
PxgArticulationLinkProp* mNewLinkProps;
|
||||
|
||||
PxU32* mNewLinkParents;
|
||||
Dy::ArticulationBitField* mNewLinkChildren;
|
||||
PxTransform* mNewLinkBody2Worlds;
|
||||
PxTransform* mNewLinkBody2Actors;
|
||||
Cm::UnAlignedSpatialVector* mNewLinkExtAccels;
|
||||
Dy::ArticulationJointCore* mNewJointCores;
|
||||
Dy::ArticulationJointCoreData* mNewJointData;
|
||||
PxgArticulationSimUpdate* mIndicesOffset;
|
||||
|
||||
PxgArticulation* mArticulationPool;
|
||||
PxgSolverBodySleepData* mArticulationSleepDataPool;
|
||||
|
||||
PxGpuSpatialTendonData* mNewSpatialTendonParamsPool;
|
||||
PxgArticulationTendon* mNewSpatialTendonPool;
|
||||
PxgArticulationTendonElementFixedData* mNewAttachmentFixedPool;
|
||||
PxGpuTendonAttachmentData* mNewAttachmentModPool;
|
||||
PxU32* mNewTendonAttachmentRemapPool;
|
||||
|
||||
PxGpuFixedTendonData* mNewFixedTendonParamsPool;
|
||||
PxgArticulationTendon* mNewFixedTendonPool;
|
||||
PxgArticulationTendonElementFixedData* mNewTendonJointFixedPool;
|
||||
PxGpuTendonJointCoefficientData* mNewTendonJointCoefficientPool;
|
||||
PxU32* mNewTendonTendonJointRemapPool;
|
||||
Dy::ArticulationMimicJointCore* mNewArticulationMimicJointPool;
|
||||
|
||||
PxU32* mNewPathToRootPool;
|
||||
|
||||
PxU32 mNbNewArticulations; //number of newly added aritculations
|
||||
};
|
||||
|
||||
struct PxgUpdatedBodiesDesc
|
||||
{
|
||||
PxgBodySim* mBodySimBufferDeviceData;
|
||||
PxgBodySimVelocityUpdate* mUpdatedBodySim;
|
||||
PxU32 mNbUpdatedBodies;
|
||||
};
|
||||
|
||||
struct PxgUpdatedJointsDesc
|
||||
{
|
||||
const PxgD6JointData* mD6RigidJointCPUPool; //map memory
|
||||
PxgD6JointData* mD6RigidJointGPUPool; //device memory
|
||||
|
||||
const PxgConstraintPrePrep* mD6RigidJointPrePrepCPUPool; //map memory
|
||||
PxgConstraintPrePrep* mD6RigidJointPrePrepGPUPool; //device memory
|
||||
|
||||
const PxgD6JointData* mD6ArtiJointCPUPool; //map memory
|
||||
PxgD6JointData* mD6ArtiJointGPUPool; //device memory
|
||||
|
||||
const PxgConstraintPrePrep* mD6ArtiJointPrePrepCPUPool; //map memory
|
||||
PxgConstraintPrePrep* mD6ArtiJointPrePrepGPUPool; //device memory
|
||||
|
||||
const PxU32* mUpdatedRigidJointIndices; //map memory
|
||||
PxU32 mNbUpdatedRigidJoints;
|
||||
|
||||
const PxU32* mUpdatedArtiJointIndices; //map memory
|
||||
PxU32 mNbUpdatedArtiJoints;
|
||||
};
|
||||
|
||||
struct PxgSimulationCoreDesc
|
||||
{
|
||||
PxU32* mChangedAABBMgrHandles;
|
||||
PxU32* mFrozen;
|
||||
PxU32* mUnfrozen;
|
||||
PxU32* mFrozenBlockAndRes;//this array is used to store frozen block value for the scan and also store the final frozen shape index
|
||||
PxU32* mUnfrozenBlockAndRes;//this array is used to store unfrozen block value for the scane and also store the final unfrozen shape index
|
||||
PxU32* mUpdated;
|
||||
PxU32* mActivate;
|
||||
PxU32* mDeactivate;
|
||||
|
||||
PxgBodySim* mBodySimBufferDeviceData;
|
||||
const PxgShapeSim* mShapeSimsBufferDeviceData;
|
||||
PxgArticulation* mArticulationPool;
|
||||
PxgSolverBodySleepData* mArticulationSleepDataPool;
|
||||
|
||||
PxsCachedTransform* mTransformCache;
|
||||
PxBounds3* mBounds;
|
||||
|
||||
PxU32* mBodyDataIndices;
|
||||
|
||||
PxgSolverBodySleepData* mSleepData;
|
||||
PxgShape* mShapes;
|
||||
|
||||
PxU32 mNbTotalShapes; //number of total shapes;
|
||||
PxU32 mBitMapWordCounts; //number of words in gChangedAABBMgrHandles, this will include shapes and aggregates
|
||||
|
||||
PxU32 mTotalFrozenShapes; // AD: these two members are the only reason we copy the whole descriptor back to cpu.
|
||||
PxU32 mTotalUnfrozenShapes;
|
||||
};
|
||||
|
||||
struct PxgUpdateActorDataDesc
|
||||
{
|
||||
PxgBodySim* mBodySimBufferDeviceData;
|
||||
|
||||
PxgShape* mShapes;
|
||||
|
||||
PxsCachedTransform* mTransformCache;
|
||||
PxBounds3* mBounds;
|
||||
|
||||
PxNodeIndex* mRigidNodeIndices;
|
||||
PxU32* mShapeIndices;
|
||||
|
||||
PxU32* mUpdated; //Direct API changed handle
|
||||
PxU32* mChangedAABBMgrHandles; //CPU API changed handle
|
||||
|
||||
PxU32 mBitMapWordCounts;
|
||||
|
||||
const PxgShapeSim* mShapeSimsBufferDeviceData;
|
||||
};
|
||||
}
|
||||
#endif
|
||||
86
engine/third_party/physx/source/gpusimulationcontroller/include/PxgSimulationCoreKernelIndices.h
vendored
Normal file
86
engine/third_party/physx/source/gpusimulationcontroller/include/PxgSimulationCoreKernelIndices.h
vendored
Normal file
@@ -0,0 +1,86 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_SIMULATION_CORE_KERNEL_INDICES_H
|
||||
#define PXG_SIMULATION_CORE_KERNEL_INDICES_H
|
||||
|
||||
namespace physx
|
||||
{
|
||||
|
||||
struct PxgSimulationCoreKernelBlockDim
|
||||
{
|
||||
enum
|
||||
{
|
||||
UPDATE_BODY_EXTERNAL_VELOCITIES = 256,
|
||||
UPDATE_BODIES_AND_SHAPES = 256,
|
||||
UPDATE_LINKS_AND_JOINTCORES = 256,
|
||||
UPDATE_JOINTS = 256,
|
||||
UPDATE_TRANSFORMCACHE_AND_BOUNDARRAY = 256,
|
||||
MERGE_TRANSFORMCACHE_AND_BOUNDARRAY_CHANGES = 256,
|
||||
UPDATE_AABBMGR_HANDLES = 256,
|
||||
COMPUTE_FROZEN_UNFROZEN_HISTOGRAM = 256,
|
||||
OUTPUT_FROZEN_UNFROZEN_HISTOGRAM = 256,
|
||||
CREATE_FROZEN_UNFROZEN_ARRAY = 256,
|
||||
NEW_ARTICULATION = 256,
|
||||
RIGID_DYNAMIC_GET_GLOBAL_POSE = 256,
|
||||
RIGID_DYNAMIC_GET_LINVEL = 256,
|
||||
RIGID_DYNAMIC_GET_ANGVEL = 256,
|
||||
RIGID_DYNAMIC_GET_LINACCEL = 256,
|
||||
RIGID_DYNAMIC_GET_ANGACCEL = 256,
|
||||
RIGID_DYNAMIC_SET_GLOBAL_POSE = 256,
|
||||
RIGID_DYNAMIC_SET_LINVEL = 256,
|
||||
RIGID_DYNAMIC_SET_ANGVEL = 256,
|
||||
RIGID_DYNAMIC_SET_FORCE = 256,
|
||||
RIGID_DYNAMIC_SET_TORQUE = 256,
|
||||
D6_JOINT_GET_FORCE = 256,
|
||||
D6_JOINT_GET_TORQUE = 256
|
||||
};
|
||||
};
|
||||
|
||||
struct PxgSimulationCoreKernelGridDim
|
||||
{
|
||||
enum
|
||||
{
|
||||
UPDATE_BODY_EXTERNAL_VELOCITIES = 64,
|
||||
UPDATE_BODIES_AND_SHAPES = 64,
|
||||
NEW_ARTICULATION = 64,
|
||||
UPDATE_SOFTBODIES = 64,
|
||||
UPDATE_PARTICLESYSTEMS = 64,
|
||||
UPDATE_LINKS_AND_JOINTCORES = 64,
|
||||
UPDATE_JOINTS = 64,
|
||||
UPDATE_TRANSFORMCACHE_AND_BOUNDARRAY = 256,
|
||||
UPDATE_AABBMGR_HANDLES = 64,
|
||||
COMPUTE_FROZEN_UNFROZEN_HISTOGRAM = 32, //this has to be 32 because we are doing warp scan
|
||||
OUTPUT_FROZEN_UNFROZEN_HISTOGRAM = 32, //this has to be 32
|
||||
CREATE_FROZEN_UNFROZEN_ARRAY = 64
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
128
engine/third_party/physx/source/gpusimulationcontroller/include/PxgSmoothing.h
vendored
Normal file
128
engine/third_party/physx/source/gpusimulationcontroller/include/PxgSmoothing.h
vendored
Normal file
@@ -0,0 +1,128 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_SMOOTHING_H
|
||||
#define PXG_SMOOTHING_H
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
|
||||
#include "foundation/PxArray.h"
|
||||
|
||||
#include "PxSmoothing.h"
|
||||
#include "PxgKernelWrangler.h"
|
||||
#include "PxgAnisotropyData.h"
|
||||
#include "PxgKernelLauncher.h"
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
namespace physx
|
||||
{
|
||||
#endif
|
||||
|
||||
#if PX_SUPPORT_GPU_PHYSX
|
||||
|
||||
class PxgSmoothedPositionGenerator : public PxSmoothedPositionGenerator, public PxUserAllocated
|
||||
{
|
||||
private:
|
||||
PxgKernelLauncher mKernelLauncher;
|
||||
PxSmoothedPositionData mPositionSmoothingDataHost;
|
||||
PxSmoothedPositionData* mPositionSmoothingDataPerParticleSystemDevice;
|
||||
PxU32 mNumParticles;
|
||||
bool mDirty;
|
||||
bool mOwnsSmoothedPositionGPUBuffers;
|
||||
PxVec4* mSmoothedPositions;
|
||||
bool mEnabled;
|
||||
|
||||
void releaseGPUSmoothedPositionBuffers();
|
||||
|
||||
void allocateGPUSmoothedPositionBuffers();
|
||||
|
||||
public:
|
||||
PxgSmoothedPositionGenerator(PxgKernelLauncher& cudaContextManager, PxU32 maxNumParticles, PxReal smoothingStrenght);
|
||||
|
||||
virtual ~PxgSmoothedPositionGenerator() { }
|
||||
|
||||
virtual void setSmoothing(float smoothingStrenght)
|
||||
{
|
||||
mPositionSmoothingDataHost.mSmoothing = smoothingStrenght;
|
||||
mDirty = true;
|
||||
}
|
||||
|
||||
virtual void release();
|
||||
|
||||
//Replaces the former readData method
|
||||
virtual void setResultBufferHost(PxVec4* smoothedPositions)
|
||||
{
|
||||
mSmoothedPositions = smoothedPositions;
|
||||
allocateGPUSmoothedPositionBuffers();
|
||||
mDirty = true;
|
||||
}
|
||||
|
||||
virtual void setResultBufferDevice(PxVec4* smoothedPositions)
|
||||
{
|
||||
if (mOwnsSmoothedPositionGPUBuffers)
|
||||
releaseGPUSmoothedPositionBuffers();
|
||||
mPositionSmoothingDataHost.mPositions = smoothedPositions;
|
||||
mDirty = true;
|
||||
mSmoothedPositions = NULL;
|
||||
}
|
||||
|
||||
virtual void generateSmoothedPositions(PxGpuParticleSystem* gpuParticleSystem, PxU32 numParticles, CUstream stream);
|
||||
|
||||
virtual void generateSmoothedPositions(PxVec4* particlePositionsGpu, PxParticleNeighborhoodProvider& neighborhoodProvider, PxU32 numParticles, PxReal particleContactOffset, CUstream stream);
|
||||
|
||||
virtual PxU32 getMaxParticles() const
|
||||
{
|
||||
return mNumParticles;
|
||||
}
|
||||
|
||||
virtual void setMaxParticles(PxU32 maxParticles);
|
||||
|
||||
virtual PxVec4* getSmoothedPositionsDevicePointer() const
|
||||
{
|
||||
return mPositionSmoothingDataHost.mPositions;
|
||||
}
|
||||
|
||||
virtual void setEnabled(bool enabled)
|
||||
{
|
||||
mEnabled = enabled;
|
||||
}
|
||||
|
||||
virtual bool isEnabled() const
|
||||
{
|
||||
return mEnabled;
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
} // namespace physx
|
||||
#endif
|
||||
|
||||
#endif
|
||||
243
engine/third_party/physx/source/gpusimulationcontroller/include/PxgSoftBody.h
vendored
Normal file
243
engine/third_party/physx/source/gpusimulationcontroller/include/PxgSoftBody.h
vendored
Normal file
@@ -0,0 +1,243 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_SOFTBODY_H
|
||||
#define PXG_SOFTBODY_H
|
||||
|
||||
#include "PxsHeapMemoryAllocator.h"
|
||||
#include "foundation/PxUserAllocated.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
#include "PxgCudaBuffer.h"
|
||||
#include "cutil_math.h"
|
||||
#include "PxDeformableVolume.h"
|
||||
#include "PxDeformableVolumeFlag.h"
|
||||
#include "PxSoftBodyFlag.h" // deprecated
|
||||
#include "PxgSimulationCoreDesc.h"
|
||||
|
||||
#define MAX_SELF_COLLISION_CONTACTS 1000
|
||||
#define NUM_BLOCK_PER_SOFTBODY_SOLVE_TETRA 2
|
||||
|
||||
#define SB_PARTITION_LIMIT 8 // max # partitions allowed. This value SHOULD NOT change. See also GuCookingTetrahedronMesh.cpp.
|
||||
|
||||
namespace physx
|
||||
{
|
||||
namespace Gu
|
||||
{
|
||||
class DeformableVolumeMesh;
|
||||
class BVTetrahedronMesh;
|
||||
class TetrahedronMesh;
|
||||
class DeformableVolumeAuxData;
|
||||
};
|
||||
|
||||
|
||||
struct PxgMat33Block;
|
||||
struct PxgSpatialVectorBlock;
|
||||
class PxgNonRigidFilterPair;
|
||||
|
||||
#if PX_VC
|
||||
#pragma warning(push)
|
||||
#pragma warning( disable : 4324 ) // Padding was added at the end of a structure because of a __declspec(align) value.
|
||||
#endif
|
||||
|
||||
struct PxgSoftBodyData
|
||||
{
|
||||
public:
|
||||
|
||||
PxU32 mRemapOutputSizeGM;
|
||||
PxU32 mMaxTetsPerPartitionsGM;
|
||||
PxU32 mTetsRemapSize; //size of tetrahedrons remap from collision mesh to simulation mesh
|
||||
|
||||
PxU32 mNbPackedNodes;
|
||||
};
|
||||
|
||||
PX_ALIGN_PREFIX(16)
|
||||
class PxgSoftBody
|
||||
{
|
||||
public:
|
||||
|
||||
PX_DEPRECATED static PxU32 dataIndexFromFlagDEPRECATED(PxSoftBodyGpuDataFlag::Enum flag);
|
||||
|
||||
// AD: We only use this for the host mirror as the GPU-side are PxgCudaBuffers and we only assign the pointers.
|
||||
// Make sure to pass the right allocator in here!
|
||||
void deallocate(PxsHeapMemoryAllocator* allocator);
|
||||
|
||||
void* mTetMeshData;
|
||||
PxU8* mTetMeshSurfaceHint;
|
||||
uint4* mTetIndices;
|
||||
PxU32* mTetIndicesRemapTable; //remap the tet indices to the source indices
|
||||
PxMat33* mTetraStresses;
|
||||
PxMat33* mTetraRestPoses;
|
||||
PxReal* mTetraStressCoefficient;
|
||||
float4* mTetraRotations; //sizeof mNumTets
|
||||
float4* mPosition_InvMass;
|
||||
float4* mRestPosition; //rest position of the verts - 4th component unused.
|
||||
|
||||
uint4* mSimTetIndices;
|
||||
float4* mSimVelocity_InvMass;
|
||||
float4* mSimPosition_InvMass;
|
||||
|
||||
const float4* mSimKinematicTarget;
|
||||
bool* mVertsAreDeformed;
|
||||
bool* mVertsCanNotDeform;
|
||||
float4* mSimDeltaPos; //sizeof mNumVertsGM, store the deltaPos change for the grid model verts
|
||||
PxgMat33Block* mSimTetraRestPoses;
|
||||
PxgMat33Block* mOrigQinv;
|
||||
|
||||
PxU32* mSimOrderedTetrahedrons;
|
||||
PxU32* mSimJacobiVertIndices;
|
||||
|
||||
float4* mSimTetraRotations; //sizeof mNumTetsGM
|
||||
|
||||
float4* mSimVelocity_invMassCP;//sizeof mRemapOutputSizeGM
|
||||
float4* mSimPosition_InvMassCP; //sizeof mRemapOutputSizeGM
|
||||
uint4* mSimRemapOutputCP; //sizeof numElements * numVertsPerElement
|
||||
PxU32* mSimAccumulatedCopiesCP; //sizeof mNumVertsGM
|
||||
PxU32* mSimAccumulatedPartitionsCP; //sizeof mMaxPartitionsGM
|
||||
|
||||
float4* mVertsBarycentricInGridModel; //the barycentric of verts(from collision mesh) in simulation mesh
|
||||
PxU32* mVertsRemapInGridModel;// the verts in collision mesh map to the tetrahedrons in simulation mesh
|
||||
|
||||
PxU32* mTetsRemapColToSim; //tetrahedrons remap from collision mesh to simulation mesh
|
||||
PxU32* mTetsAccumulatedRemapColToSim; // runsum of mTetsRemapColToSim sizeof mNumTets
|
||||
|
||||
PxU8* mSurfaceVertsHint;
|
||||
PxU32* mSurfaceVertToTetRemap;
|
||||
|
||||
PxgSpatialVectorBlock* mSimTetraMultipliers;
|
||||
|
||||
float4* mSimDelta; //initialize to zero and zero every time in the apply delta kernel
|
||||
|
||||
PxBounds3* mPackedNodeBounds;
|
||||
|
||||
PxU16* mOrderedMaterialIndices; //indices to global material array ordered by partition, size of mNumVertsGM(simulation mesh)
|
||||
PxU16* mMaterialIndices; //indices to global material array, size of mNumVertsGM(simulation mesh)
|
||||
|
||||
//filter pair for myself and other soft body. Index0 is myself and vertId. Index1 is other soft body and tetId
|
||||
PxgNonRigidFilterPair* mFilteringPairs;
|
||||
PxU32 mNumFilterPairs;
|
||||
|
||||
PxReal mLinearDamping;
|
||||
PxReal mMaxLinearVelocity;
|
||||
PxReal mPenBiasClamp;
|
||||
|
||||
PxReal mSettlingThreshold;
|
||||
PxReal mSleepThreshold;
|
||||
PxReal mSettlingDamping;
|
||||
PxReal mSelfCollisionFilterDistance;
|
||||
PxReal mSelfCollisionStressTolerance;
|
||||
|
||||
PxQuat mInitialRotation;
|
||||
PxU32 mNumVerts;
|
||||
PxU32 mNumTets;
|
||||
|
||||
PxU32 mNumVertsGM;
|
||||
PxU32 mNumTetsGM;
|
||||
PxU32 mNumPartitionsGM;
|
||||
|
||||
PxU32 mElementIndex;
|
||||
PxU32 mGpuRemapIndex;
|
||||
PxU8 mActorFlags;
|
||||
PxU8 mBodyFlags;
|
||||
PxU16 mVolumeFlags;
|
||||
|
||||
uint4* mSimPullIndices;
|
||||
|
||||
PxU32 mAwake;
|
||||
PxU32 mNumTetsPerElement;
|
||||
PxU32 mNumJacobiVertices;
|
||||
|
||||
PxReal mRestDistance;
|
||||
PxReal mOriginalContactOffset;
|
||||
PxReal mJacobiScale;
|
||||
|
||||
}PX_ALIGN_SUFFIX(16);
|
||||
|
||||
#if PX_VC
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
class PxgSoftBodyBuffer : public PxUserAllocated
|
||||
{
|
||||
public:
|
||||
|
||||
PxgSoftBodyBuffer(PxgHeapMemoryAllocatorManager* heapMemoryManager);
|
||||
|
||||
PxgCudaBuffer tetMeshData;
|
||||
PxgTypedCudaBuffer<PxU8> tetMeshSurfaceHint;
|
||||
PxgTypedCudaBuffer<uint4> tetIndices;
|
||||
PxgTypedCudaBuffer<PxU32> tetIndicesRemapTable;
|
||||
PxgTypedCudaBuffer<PxMat33> tetStresses;
|
||||
PxgTypedCudaBuffer<PxReal> tetStressCoefficient;
|
||||
PxgTypedCudaBuffer<PxMat33> tetRestPoses;
|
||||
PxgTypedCudaBuffer<float4> tetRotations;
|
||||
|
||||
PxgTypedCudaBuffer<uint4> tetIndicesGM;
|
||||
PxgTypedCudaBuffer<float4> pPostion_InvMassGM;
|
||||
PxgTypedCudaBuffer<bool> vertsAreDeformed;
|
||||
PxgTypedCudaBuffer<bool> vertsCantDeform;
|
||||
PxgTypedCudaBuffer<PxgMat33Block> tetRestPosesGM;
|
||||
PxgTypedCudaBuffer<PxgMat33Block> origTetRestPosesGM;
|
||||
PxgTypedCudaBuffer<float4> tetRotationsGM;
|
||||
PxgTypedCudaBuffer<PxU32> orderedTetGM;
|
||||
PxgTypedCudaBuffer<PxU32> jacobiVertIndicesGM;
|
||||
PxgTypedCudaBuffer<PxgSpatialVectorBlock> tetMultipliersGM;
|
||||
|
||||
PxgTypedCudaBuffer<float4> pDeltaVGM;
|
||||
|
||||
PxgTypedCudaBuffer<float4> pBarycentricGM;
|
||||
PxgTypedCudaBuffer<PxU32> pRemapGM;
|
||||
PxgTypedCudaBuffer<PxU32> tetRemapColToSim;
|
||||
PxgTypedCudaBuffer<PxU32> tetAccumulatedRemapColToSim;
|
||||
PxgTypedCudaBuffer<PxU8> surfaceVertsHint;
|
||||
PxgTypedCudaBuffer<PxU32> surfaceVertToTetRemap;
|
||||
PxgTypedCudaBuffer<float4> pDeltaPosGM;
|
||||
PxgTypedCudaBuffer<float4> pPosition_InvMassGMCP;
|
||||
PxgTypedCudaBuffer<float4> pVelocity_InvMassGMCP;
|
||||
PxgTypedCudaBuffer<PxU32> remapOutputGMCP;
|
||||
PxgTypedCudaBuffer<PxU32> accumulatedPartitionsGMCP;
|
||||
PxgTypedCudaBuffer<PxU32> accumulatedCopiesGMCP;
|
||||
PxgTypedCudaBuffer<uint4> pullIndices;
|
||||
PxgTypedCudaBuffer<PxU16> orderedMaterialIndices;
|
||||
PxgTypedCudaBuffer<PxU16> materialIndices;
|
||||
PxgTypedCudaBuffer<PxBounds3> packedNodeBounds; //for refit
|
||||
PxgTypedCudaBuffer<PxgNonRigidFilterPair> filterPairs;
|
||||
};
|
||||
|
||||
class PxgSoftBodyUtil
|
||||
{
|
||||
public:
|
||||
static PxU32 computeTetMeshByteSize(const Gu::BVTetrahedronMesh* tetMesh);
|
||||
static PxU32 loadOutTetMesh(void* mem, const Gu::BVTetrahedronMesh* tetMesh);
|
||||
static void initialTetData(PxgSoftBody& softbody, const Gu::BVTetrahedronMesh* colTetMesh, const Gu::TetrahedronMesh* simTetMesh,
|
||||
const Gu::DeformableVolumeAuxData* softBodyAuxData, const PxU16* materialsHandles, PxsHeapMemoryAllocator* alloc);
|
||||
static void computeBasisMatrix(PxMat33* restPoses, const Gu::DeformableVolumeMesh* tetMesh);
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
297
engine/third_party/physx/source/gpusimulationcontroller/include/PxgSoftBodyCore.h
vendored
Normal file
297
engine/third_party/physx/source/gpusimulationcontroller/include/PxgSoftBodyCore.h
vendored
Normal file
@@ -0,0 +1,297 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_SOFTBODY_CORE_H
|
||||
#define PXG_SOFTBODY_CORE_H
|
||||
|
||||
#include "PxgFEMCore.h"
|
||||
#include "PxSoftBodyFlag.h"
|
||||
#include "foundation/PxPreprocessor.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
//this is needed to force PhysXSimulationControllerGpu linkage as Static Library!
|
||||
void createPxgSoftBody();
|
||||
|
||||
struct PxGpuDynamicsMemoryConfig;
|
||||
|
||||
class PxgCudaBroadPhaseSap;
|
||||
class PxgGpuNarrowphaseCore;
|
||||
class PxgSoftBody;
|
||||
|
||||
struct PxgConstraintPrepareDesc;
|
||||
|
||||
class PxPostSolveCallback;
|
||||
|
||||
struct PxgSoftBodySoftBodyConstraintBlock
|
||||
{
|
||||
float4 barycentric0[32];
|
||||
float4 barycentric1[32];
|
||||
float4 normal_pen[32];
|
||||
PxReal velMultiplier[32];
|
||||
PxReal friction[32];
|
||||
};
|
||||
|
||||
namespace Dy
|
||||
{
|
||||
class DeformableVolume;
|
||||
}
|
||||
|
||||
class PxgSoftBodyCore : public PxgFEMCore
|
||||
{
|
||||
public:
|
||||
PxgSoftBodyCore(PxgCudaKernelWranglerManager* gpuKernelWrangler, PxCudaContextManager* cudaContextManager,
|
||||
PxgHeapMemoryAllocatorManager* heapMemoryManager, PxgSimulationController* simController,
|
||||
PxgGpuContext* context, const PxU32 maxContacts, const PxU32 collisionStackSize, const bool isTGS);
|
||||
~PxgSoftBodyCore();
|
||||
|
||||
//integrate verts position based on gravity
|
||||
void preIntegrateSystems(PxgSoftBody* softbodies, PxU32* activeSoftbodies, const PxU32 nbActiveSoftbodies,
|
||||
const PxVec3 gravity, const PxReal dt);
|
||||
|
||||
// calculate softbody's world bound
|
||||
void refitBound(PxgSoftBody* softbodies, const PxU32 nbActiveSoftbodies);
|
||||
|
||||
void resetContactCounts();
|
||||
void checkBufferOverflows();
|
||||
|
||||
void selfCollision();
|
||||
|
||||
//after narrow phase, we sort soft body contact by rigid id and particle id
|
||||
void sortContacts(const PxU32 nbActiveSoftbodies);
|
||||
|
||||
void updateSimTetraRotations();
|
||||
|
||||
void updateTetraRotations();
|
||||
|
||||
void solve(PxgDevicePointer<PxgPrePrepDesc> prePrepDescd, PxgDevicePointer<PxgConstraintPrepareDesc> prepDescd, PxgDevicePointer<PxgSolverCoreDesc> solverCoreDescd,
|
||||
PxgDevicePointer<PxgSolverSharedDescBase> sharedDescd, PxgDevicePointer<PxgArticulationCoreDesc> artiCoreDescd, const PxReal dt, CUstream solverStream,
|
||||
const bool isFirstIteration);
|
||||
|
||||
void solveTGS(PxgDevicePointer<PxgPrePrepDesc> prePrepDescd, PxgDevicePointer<PxgConstraintPrepareDesc> prepDescd, PxgDevicePointer<PxgSolverCoreDesc> solverCoreDescd,
|
||||
PxgDevicePointer<PxgSolverSharedDescBase> sharedDescd, PxgDevicePointer<PxgArticulationCoreDesc> artiCoreDescd, const PxReal dt, CUstream solverStream,
|
||||
const bool isVelocityIteration, const PxReal biasCoefficient, const bool isFirstIteration, const PxVec3& gravity);
|
||||
|
||||
void calculateStress();
|
||||
|
||||
void plasticDeformation();
|
||||
|
||||
void constraintPrep(PxgDevicePointer<PxgPrePrepDesc> prePrepDescd, PxgDevicePointer<PxgConstraintPrepareDesc> prepDescd, const PxReal invDt, PxgDevicePointer<PxgSolverSharedDescBase> sharedDescd, CUstream solverStream,
|
||||
const bool isTGS, PxU32 nbSolverBodies, PxU32 nbArticulations);
|
||||
|
||||
bool updateUserData(PxPinnedArray<PxgSoftBody>& softBodyPool, PxArray<PxU32>& softBodyNodeIndexPool,
|
||||
const PxU32* activeSoftBodies, const PxU32 nbActiveSoftBodies, void** bodySimsLL);
|
||||
|
||||
void copySoftBodyDataDEPRECATED(void** data, void* dataSizes, void* softBodyIndices, PxSoftBodyGpuDataFlag::Enum flag, const PxU32 nbCopySoftBodies, const PxU32 maxSize, CUevent copyEvent);
|
||||
void applySoftBodyDataDEPRECATED(void** data, void* dataSizes, void* softBodyIndices, PxSoftBodyGpuDataFlag::Enum flag, const PxU32 nbUpdatedSoftBodies, const PxU32 maxSize, CUevent applyEvent, CUevent signalEvent);
|
||||
|
||||
CUstream getStream() { return mStream; }
|
||||
|
||||
void syncSoftBodies();
|
||||
|
||||
void createActivatedDeactivatedLists();
|
||||
|
||||
|
||||
PxgCudaBuffer& getTempCellsHistogram() { return mTempCellsHistogramBuf; }
|
||||
PxgTypedCudaBuffer<PxU32>& getTempBlockCellsHistogram() { return mTempBlockCellsHistogramBuf; }
|
||||
PxgTypedCudaBuffer<PxU32>& getTempHistogramCount() { return mTempHistogramCountBuf; }
|
||||
|
||||
|
||||
PxgTypedCudaBuffer<float4>& getClothVsSoftBodyContacts() { return mSCContactPointBuffer; }
|
||||
PxgTypedCudaBuffer<float4>& getClothVsSoftBodyNormalPens() { return mSCContactNormalPenBuffer; }
|
||||
PxgTypedCudaBuffer<float4>& getClothVsSoftBodyBarycentrics0() { return mSCContactBarycentricBuffer0; } //barycentric toward soft body contact
|
||||
PxgTypedCudaBuffer<float4>& getClothVsSoftBodyBarycentrics1() { return mSCContactBarycentricBuffer1; } //barycentric toward cloth contact
|
||||
PxgTypedCudaBuffer<PxgFemFemContactInfo>& getClothVsSoftBodyContactInfos() { return mSCContactInfoBuffer; }
|
||||
PxgTypedCudaBuffer<PxU32>& getClothVsSoftBodyContactCount() { return mSCTotalContactCountBuffer; }
|
||||
PxgTypedCudaBuffer<PxU32>& getPrevClothVsSoftBodyContactCount() { return mPrevSCContactCountBuffer; }
|
||||
|
||||
PxgCudaPagedLinearAllocator<PxgHeapMemoryAllocator>& getStackAllocator() { return mIntermStackAlloc; }
|
||||
|
||||
//apply position delta change original grid model tetra mesh
|
||||
void finalizeVelocities(const PxReal dt, const PxReal scale, const bool isTGS);
|
||||
|
||||
//apply position delta change original grid model tetra mesh
|
||||
void applyExternalTetraDeltaGM(const PxU32 nbActiveSoftbodies, const PxReal dt, CUstream stream);
|
||||
|
||||
private:
|
||||
PX_DEPRECATED void copyOrApplySoftBodyDataDEPRECATED(PxU32 dataIndex, PxU32* softBodyIndices, PxU8** data, PxU32* dataSizes, PxU32 maxSizeInBytes, const PxU32 nbSoftbodies, const PxU32 applyDataToSoftBodies);
|
||||
|
||||
//integrate verts position based on gravity
|
||||
void preIntegrateSystem(PxgDevicePointer<PxgSoftBody> softbodiesd, PxgDevicePointer<PxU32> activeSoftBodiesd,
|
||||
const PxU32 nbActiveSoftBodies, const PxU32 maxVerts, const PxVec3 gravity, const PxReal dt, CUstream bpStream);
|
||||
|
||||
//These method are running at the solverStream
|
||||
void prepRigidContactConstraint(PxgDevicePointer<PxgPrePrepDesc> prePrepDescd, PxgDevicePointer<PxgConstraintPrepareDesc> prepDescd,
|
||||
const PxReal invDt, PxgDevicePointer<PxgSolverSharedDescBase> sharedDescd, CUstream solverStream, const bool isTGS, PxU32 numSolverBodies, PxU32 numArticulations);
|
||||
|
||||
void prepRigidAttachmentConstraints(PxgDevicePointer<PxgPrePrepDesc> prePrepDescd, PxgDevicePointer<PxgConstraintPrepareDesc> prepDescd,
|
||||
const PxReal invDt, PxgDevicePointer<PxgSolverSharedDescBase> sharedDescd, CUstream stream, bool isTGS);
|
||||
|
||||
void prepSoftBodyAttachmentConstraints(CUstream stream);
|
||||
|
||||
void prepClothAttachmentConstraints(CUstream stream);
|
||||
|
||||
void prepParticleAttachmentConstraints(CUstream stream);
|
||||
|
||||
void solveRSContactsOutputRigidDelta(PxgDevicePointer<PxgPrePrepDesc> prePrepDescd, PxgDevicePointer<PxgSolverCoreDesc> solverCoreDescd,
|
||||
PxgDevicePointer<PxgSolverSharedDescBase> sharedDescd, PxgDevicePointer<PxgArticulationCoreDesc> artiCoreDescd, CUstream solverStream, const PxReal dt);
|
||||
|
||||
void solveRSContactsOutputRigidDeltaTGS(PxgDevicePointer<PxgPrePrepDesc> prePrepDescd,
|
||||
PxgDevicePointer<PxgSolverCoreDesc> solverCoreDescd, PxgDevicePointer<PxgSolverSharedDescBase> sharedDescd, PxgDevicePointer<PxgArticulationCoreDesc> artiCoreDescd, CUstream solverStream,
|
||||
const PxReal dt);
|
||||
|
||||
//run on soft body stream
|
||||
void prepSoftBodyParticleConstraint();
|
||||
|
||||
//run on soft body stream
|
||||
void prepSoftBodyClothConstraint();
|
||||
|
||||
//These method are running at the soft body stream
|
||||
void prepSoftbodyContactConstraint();
|
||||
|
||||
void updateTetModelVerts(PxgDevicePointer<PxgSoftBody> softbodiesd, PxgDevicePointer<PxU32> activeSoftbodiesd,
|
||||
const PxU32 nbActiveSoftbodies, CUstream updateStream);
|
||||
|
||||
//solve in the grid model
|
||||
void solveCorotationalFEM(PxgSoftBody* softbodies, PxgSoftBody* softbodiesd, PxgDevicePointer<PxU32> activeSoftbodiesd,
|
||||
const PxU32 nbActiveSoftbodies, const PxReal dt, CUstream stream, const bool isTGS, const bool isFirstIteration);
|
||||
|
||||
void step(PxReal dt, CUstream stream, const PxU32 nbActiveSoftBodies, const PxVec3& gravity);
|
||||
|
||||
|
||||
void solveRigidAttachment(PxgDevicePointer<PxgPrePrepDesc> prePrepDescd, PxgDevicePointer<PxgSolverCoreDesc> solverCoreDescd, PxgDevicePointer<PxgSolverSharedDescBase> sharedDescd,
|
||||
PxgDevicePointer<PxgArticulationCoreDesc> artiCoreDescd, CUstream solverStream, const PxReal dt);
|
||||
|
||||
void solveSoftBodyAttachmentDelta();
|
||||
|
||||
void solveParticleAttachmentDelta();
|
||||
|
||||
void solveClothAttachmentDelta();
|
||||
|
||||
void solveRigidAttachmentTGS(PxgDevicePointer<PxgPrePrepDesc> prePrepDescd, PxgDevicePointer<PxgSolverCoreDesc> solverCoreDescd, PxgDevicePointer<PxgSolverSharedDescBase> sharedDescd,
|
||||
PxgDevicePointer<PxgArticulationCoreDesc> artiCoreDescd, CUstream solverStream, const PxReal dt, const PxReal biasCoefficient, bool isVelocityIteration);
|
||||
|
||||
//solve soft body vs particle contact and output to soft body delta buffer
|
||||
void solveSPContactsOutputSoftBodyDelta(const PxReal dt, const PxReal biasCoefficient);
|
||||
|
||||
//solve soft body vs particle contact and output to particle delta buffer
|
||||
void solveSPContactsOutputParticleDelta(const PxReal dt, const PxReal biasCoefficient, CUstream particleStream);
|
||||
|
||||
//solve soft body vs cloth contact and update position
|
||||
void solveSCContactsOutputDelta();
|
||||
|
||||
//solve soft body vs soft body contact and output to soft body delta buffer
|
||||
void solveSSContactsOutputSoftBodyDelta(const float dt, const float biasCoefficient, const bool isTGS);
|
||||
|
||||
void queryRigidContactReferenceCount(PxgDevicePointer<PxgPrePrepDesc> prePrepDescd,
|
||||
PxgDevicePointer<PxgSolverCoreDesc> solverCoreDescd, PxgDevicePointer<PxgSolverSharedDescBase> sharedDescd,
|
||||
PxgDevicePointer<PxgArticulationCoreDesc> artiCoreDescd, CUstream solverStream, PxReal dt);
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
|
||||
//soft body vs cloth collision contacts
|
||||
PxgTypedCudaBuffer<float4> mSCContactPointBuffer;
|
||||
PxgTypedCudaBuffer<float4> mSCContactNormalPenBuffer;
|
||||
PxgTypedCudaBuffer<float4> mSCContactBarycentricBuffer0;
|
||||
PxgTypedCudaBuffer<float4> mSCContactBarycentricBuffer1;
|
||||
PxgTypedCudaBuffer<PxgFemFemContactInfo> mSCContactInfoBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mSCTotalContactCountBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mPrevSCContactCountBuffer;
|
||||
//contact prep buffer
|
||||
PxgTypedCudaBuffer<PxgSoftBodySoftBodyConstraintBlock> mSCConstraintBuf; //constraint prep for cloth vs soft body
|
||||
|
||||
//To do: ideally, we want to use two separate stream to solve the rigid body and soft body collision
|
||||
PxgTypedCudaBuffer<PxReal> mSCLambdaNBuf; // accumulated deltaLambdaN for collision between FEMCloth and soft body
|
||||
|
||||
CUevent mBoundUpdateEvent;//this event is used to synchronize the broad phase stream(updateBound is running on broad phase stream) and mStream
|
||||
CUevent mSolveRigidEvent;
|
||||
CUevent mConstraintPrepSoftBodyParticleEvent; //this event is used to synchronize constraint prep(soft body stream) and solve soft body vs particle system contacts (particle stream)
|
||||
CUevent mSolveSoftBodyParticleEvent; //this event is used to synchronize particle system contacts (particle stream) before we call applyExternalTetraDelta
|
||||
|
||||
public:
|
||||
PxArray<Dy::DeformableVolume*> mActivatingDeformableVolumes;
|
||||
PxArray<Dy::DeformableVolume*> mDeactivatingDeformableVolumes;
|
||||
PxPostSolveCallback* mPostSolveCallback;
|
||||
};
|
||||
|
||||
struct PxgSoftBodyContactWriter
|
||||
{
|
||||
float4* outPoint;
|
||||
float4* outNormalPen;
|
||||
float4* outBarycentric0;
|
||||
float4* outBarycentric1;
|
||||
PxgFemFemContactInfo* outContactInfo;
|
||||
PxU32* totalContactCount;
|
||||
PxU32 maxContacts;
|
||||
|
||||
PxgSoftBodyContactWriter(PxgSoftBodyCore* softBodyCore, PxgFEMCore* femClothCore = NULL)
|
||||
{
|
||||
if (femClothCore)
|
||||
{
|
||||
totalContactCount = softBodyCore->getClothVsSoftBodyContactCount().getTypedPtr();
|
||||
outPoint = softBodyCore->getClothVsSoftBodyContacts().getTypedPtr();
|
||||
outNormalPen = softBodyCore->getClothVsSoftBodyNormalPens().getTypedPtr();
|
||||
outBarycentric0 = softBodyCore->getClothVsSoftBodyBarycentrics0().getTypedPtr();
|
||||
outBarycentric1 = softBodyCore->getClothVsSoftBodyBarycentrics1().getTypedPtr();
|
||||
outContactInfo = softBodyCore->getClothVsSoftBodyContactInfos().getTypedPtr();
|
||||
maxContacts = PxMin(softBodyCore->mMaxContacts, femClothCore->mMaxContacts);
|
||||
}
|
||||
else
|
||||
{
|
||||
totalContactCount = softBodyCore->getVolumeContactOrVTContactCount().getTypedPtr();
|
||||
outPoint = softBodyCore->getFemContacts().getTypedPtr();
|
||||
outNormalPen = softBodyCore->getFemNormalPens().getTypedPtr();
|
||||
outBarycentric0 = softBodyCore->getFemBarycentrics0().getTypedPtr();
|
||||
outBarycentric1 = softBodyCore->getFemBarycentrics1().getTypedPtr();
|
||||
outContactInfo = softBodyCore->getVolumeContactOrVTContactInfos().getTypedPtr();
|
||||
maxContacts = softBodyCore->mMaxContacts;
|
||||
}
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE bool writeContact(PxU32 index, const float4& contact, const float4& normalPen, const float4& barycentric0, const float4& barycentric1,
|
||||
PxU32 pairId0, PxU32 pairId1)
|
||||
{
|
||||
if (index >= maxContacts)
|
||||
return false;
|
||||
|
||||
|
||||
outPoint[index] = contact;
|
||||
outNormalPen[index] = normalPen;
|
||||
|
||||
outBarycentric0[index] = barycentric0;
|
||||
outBarycentric1[index] = barycentric1;
|
||||
|
||||
outContactInfo[index].pairInd0 = pairId0;
|
||||
outContactInfo[index].pairInd1 = pairId1;
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
69
engine/third_party/physx/source/gpusimulationcontroller/include/PxgSoftBodyCoreKernelIndices.h
vendored
Normal file
69
engine/third_party/physx/source/gpusimulationcontroller/include/PxgSoftBodyCoreKernelIndices.h
vendored
Normal file
@@ -0,0 +1,69 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_SOFTBODY_CORE_KERNEL_INDICES_H
|
||||
#define PXG_SOFTBODY_CORE_KERNEL_INDICES_H
|
||||
|
||||
namespace physx
|
||||
{
|
||||
|
||||
struct PxgSoftBodyKernelBlockDim
|
||||
{
|
||||
enum
|
||||
{
|
||||
SB_PREINTEGRATION = 1024,
|
||||
SB_REFIT = 256,
|
||||
SB_INTERNALSOLVE = 256,
|
||||
SB_UPDATEROTATION = 256,
|
||||
SB_SOLVETETRA = 64,
|
||||
SB_SOLVETETRA_LOW = 32,
|
||||
SB_REORDERCONTACTS = 256,
|
||||
SB_ACCUMULATE_DELTA = 512,
|
||||
};
|
||||
};
|
||||
|
||||
struct PxgSoftBodyKernelGridDim
|
||||
{
|
||||
enum
|
||||
{
|
||||
SB_REFIT = 32,
|
||||
SB_SBMIDPHASE = 1024,
|
||||
SB_SBCG = 1024,
|
||||
SB_MESHCG = 1024,
|
||||
SB_HFCG = 1024,
|
||||
SB_UPDATEROTATION = 1024,
|
||||
SB_SOLVETETRA = 64,
|
||||
SB_REORDERCONTACTS = 1024,
|
||||
SB_ACCUMULATE_DELTA = 32,
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
80
engine/third_party/physx/source/gpusimulationcontroller/include/PxgSparseGridDataStandalone.h
vendored
Normal file
80
engine/third_party/physx/source/gpusimulationcontroller/include/PxgSparseGridDataStandalone.h
vendored
Normal file
@@ -0,0 +1,80 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_SPARSE_GRID_DATA_STANDALONE_H
|
||||
#define PXG_SPARSE_GRID_DATA_STANDALONE_H
|
||||
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
#include "PxSparseGridParams.h"
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
namespace physx
|
||||
{
|
||||
#endif
|
||||
|
||||
/**
|
||||
\brief Minimal set of data to access cells in a sparse grid
|
||||
*/
|
||||
struct PxSparseGridData
|
||||
{
|
||||
PxSparseGridParams mGridParams; //!< The grid descriptor
|
||||
PxU32* mUniqueHashkeyPerSubgrid; //!< A unique id for every subgrid that is currently in use
|
||||
PxU32* mSubgridNeighbors; //!< Contains 27 elements for every subgrid in use and provides indices to the neighbors in the 3x3x3 neighborhood
|
||||
PxU32* mNumSubgridsInUse; //!< The number of subgrids that are currently in use
|
||||
PxU32* mSubgridOrderMap; //!< Only used for subgrids that have subgrid reuse enabled for consistent order across frames
|
||||
|
||||
PxSparseGridData() : mSubgridOrderMap(NULL) {}
|
||||
|
||||
/**
|
||||
\brief The number of cells in the sparse grid, not all of them are always in use
|
||||
|
||||
\return The number of cells
|
||||
*/
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU32 maxNumCells()
|
||||
{
|
||||
return mGridParams.maxNumSubgrids * mGridParams.subgridSizeX *mGridParams.subgridSizeY *mGridParams.subgridSizeZ;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief The sparse grid's cell size
|
||||
|
||||
\return The cell size
|
||||
*/
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxReal getCellSize()
|
||||
{
|
||||
return mGridParams.gridSpacing;
|
||||
}
|
||||
};
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
} // namespace physx
|
||||
#endif
|
||||
|
||||
#endif
|
||||
270
engine/third_party/physx/source/gpusimulationcontroller/include/PxgSparseGridStandalone.h
vendored
Normal file
270
engine/third_party/physx/source/gpusimulationcontroller/include/PxgSparseGridStandalone.h
vendored
Normal file
@@ -0,0 +1,270 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_SPARSE_GRID_STANDALONE_H
|
||||
#define PXG_SPARSE_GRID_STANDALONE_H
|
||||
|
||||
#include "cudamanager/PxCudaContext.h"
|
||||
#include "cudamanager/PxCudaContextManager.h"
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
|
||||
#include "PxgAlgorithms.h"
|
||||
#include "PxSparseGridParams.h"
|
||||
#include "PxParticleSystemFlag.h"
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
namespace physx
|
||||
{
|
||||
#endif
|
||||
|
||||
/**
|
||||
\brief Constructs a sparse grid structure given an array of particle positions
|
||||
*/
|
||||
class PxSparseGridBuilder
|
||||
{
|
||||
protected:
|
||||
PxgKernelLauncher* mKernelLauncher;
|
||||
|
||||
PxGpuScan mScan;
|
||||
PxGpuRadixSort<PxU32> mSort;
|
||||
PxSparseGridParams mSparseGridParams;
|
||||
PxU32* mHashkeyPerParticle;
|
||||
PxU32* mSortedParticleToSubgrid;
|
||||
PxU32* mSortedUniqueHashkeysPerSubgrid;
|
||||
PxU32* mSubgridNeighborLookup;
|
||||
|
||||
//Optional
|
||||
PxU32* mSortedToOriginalParticleIndex;
|
||||
|
||||
PxGpuScan mScanNeighbors;
|
||||
|
||||
PxGpuRadixSort<PxU32> mNeighborSort;
|
||||
PxU32* mNeighborCollector;
|
||||
PxU32* mRequiredNeighborMask;
|
||||
|
||||
PxU32 mMaxParticles;
|
||||
|
||||
PxU32 mNeighborhoodSize; //!< The minimum number of cells available around a particle that are guaranteed to be accessible. It can be computed as PxCeil(particleRadius/sparseGridSpacing)
|
||||
bool mTrackParticleOrder;
|
||||
|
||||
bool mCopySubgridsInUseToHost;
|
||||
PxU32 mNumSubgridsInUse;
|
||||
|
||||
PxU32* updateSubgrids(PxVec4* deviceParticlePos, const PxU32 numParticles, PxU32* devicePhases, CUstream stream,
|
||||
PxU32 validPhase = PxParticlePhaseFlag::eParticlePhaseFluid, const PxU32* activeIndices = NULL);
|
||||
|
||||
void updateSubgridNeighbors(PxU32* totalCountPointer, CUstream stream);
|
||||
|
||||
public:
|
||||
PxSparseGridBuilder() : mHashkeyPerParticle(NULL), mSortedToOriginalParticleIndex(NULL), mCopySubgridsInUseToHost(true), mNumSubgridsInUse(0) { }
|
||||
|
||||
~PxSparseGridBuilder()
|
||||
{
|
||||
release();
|
||||
}
|
||||
|
||||
/**
|
||||
\brief The number of subgrids that are in use. Subgrids in use contain at least one particle or have particles closely nearby
|
||||
|
||||
\return The number of subgrids in use
|
||||
*/
|
||||
PX_FORCE_INLINE PxU32 getNumSubgridsInUse() const
|
||||
{
|
||||
return mNumSubgridsInUse;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Sets the state of the flag that defines if the subgrid in use information gets copied to the host every frame
|
||||
|
||||
\param[in] enabled Enable or disable copying the number of subgrids in use to the host every frame
|
||||
*/
|
||||
PX_FORCE_INLINE void setCopySubgridsInUseToHostEnabled(bool enabled)
|
||||
{
|
||||
mCopySubgridsInUseToHost = enabled;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Gets the state of the flag that defines if the subgrid in use information gets copied to the host every frame
|
||||
|
||||
\return True if enabled
|
||||
*/
|
||||
PX_FORCE_INLINE bool getCopySubgridsInUseToHostEnabled() const
|
||||
{
|
||||
return mCopySubgridsInUseToHost;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Gets the sparse grid parameters
|
||||
|
||||
\return The sparse grid parameters
|
||||
*/
|
||||
PX_FORCE_INLINE PxSparseGridParams getGridParameters() const
|
||||
{
|
||||
return mSparseGridParams;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Sets the sparse grid parameters
|
||||
|
||||
\param[in] params The new sparse grid parameters
|
||||
*/
|
||||
PX_FORCE_INLINE void setGridParameters(PxSparseGridParams params)
|
||||
{
|
||||
release();
|
||||
initialize(mKernelLauncher, params, mMaxParticles, mNeighborhoodSize, mTrackParticleOrder);
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Gets the device pointer to the number of subgrids in use
|
||||
|
||||
\return The device pointer to the number of subgrids in use
|
||||
*/
|
||||
PX_FORCE_INLINE PxU32* getSubgridsInUseGpuPointer()
|
||||
{
|
||||
if (mNeighborhoodSize == 0)
|
||||
return mScan.getSumPointer();
|
||||
else
|
||||
return mScanNeighbors.getSumPointer();
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Gets the device array containing the subgrid hashkey for every subgrid in use
|
||||
|
||||
\return The device array containing the subgrid hashkey for every subgrid in use. Be aware that this data gets overwritten by the subgridEndIndicesBuffer data after a call to updateSubgridEndIndices
|
||||
*/
|
||||
PX_FORCE_INLINE PxU32* getUniqueHashkeysPerSubgrid()
|
||||
{
|
||||
return mSortedUniqueHashkeysPerSubgrid;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Gets the subgrid neighbor lookup table
|
||||
|
||||
\return The device pointer to the subgrid neighbor lookup table. Contains 27 elements for every subgrid in use and provides indices to the neighbors in the 3x3x3 neighborhood
|
||||
*/
|
||||
PX_FORCE_INLINE PxU32* getSubgridNeighborLookup()
|
||||
{
|
||||
return mSubgridNeighborLookup;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Gets the sorted particle to subgrid index device buffer.
|
||||
|
||||
\return The sorted particle to subgrid index device buffer
|
||||
*/
|
||||
PX_FORCE_INLINE PxU32* getSortedParticleToSubgrid()
|
||||
{
|
||||
return mSortedParticleToSubgrid;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Gets the sorted to original particle index device buffer.
|
||||
|
||||
\return The sorted to original particle index device buffer
|
||||
*/
|
||||
PX_FORCE_INLINE PxU32* getSortedToOriginalParticleIndex()
|
||||
{
|
||||
return mSortedToOriginalParticleIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Gets the subgrid end indices buffer. This allows to traverse the (sorted) particles in a subgrid because the buffer holds an inclusive cumulative sum of the active subgrid particle counts.
|
||||
|
||||
\return The subgrid end indices device buffer
|
||||
*/
|
||||
PX_FORCE_INLINE PxU32* getSubgridEndIndicesBuffer()
|
||||
{
|
||||
return mSortedUniqueHashkeysPerSubgrid;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Gets the maximal number of particles that can be processed
|
||||
|
||||
\return The maximal number of particles
|
||||
*/
|
||||
PX_FORCE_INLINE PxU32 getMaxParticles() const
|
||||
{
|
||||
return mMaxParticles;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Sets the maximal number of particles that can be processed
|
||||
|
||||
\param[in] maxParticles The maximal number of particles
|
||||
*/
|
||||
PX_FORCE_INLINE void setMaxParticles(PxU32 maxParticles)
|
||||
{
|
||||
release();
|
||||
initialize(mKernelLauncher, mSparseGridParams, maxParticles, mNeighborhoodSize, mTrackParticleOrder);
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Initializes the sparse grid builder
|
||||
|
||||
\param[in] cudaContextManager A cuda context manager
|
||||
\param[in] sparseGridParams The sparse grid parameters
|
||||
\param[in] maxNumParticles The number of particles
|
||||
\param[in] neighborhoodSize The size of the neighborhood around a particle that must be accessible.
|
||||
\param[in] trackParticleOrder If set to true, the mSortedToOriginalParticleIndex array will be updated during every call of updateSparseGrid
|
||||
*/
|
||||
virtual void initialize(PxgKernelLauncher* cudaContextManager, const PxSparseGridParams& sparseGridParams,
|
||||
PxU32 maxNumParticles, PxU32 neighborhoodSize, bool trackParticleOrder = false);
|
||||
|
||||
virtual void release();
|
||||
|
||||
//Completely rebuilds the sparse grid. Does not support to access data from previous timesteps. Supports a particle radius (neighborhoodSize)
|
||||
|
||||
/**
|
||||
\brief Updates the sparse grid given an array of particle positions
|
||||
|
||||
\param[in] deviceParticlePos The particle positions device array
|
||||
\param[in] numParticles The number of particles
|
||||
\param[in] devicePhases The particle's phases device array (optional, can be null)
|
||||
\param[in] stream The stream on which the work gets scheduled
|
||||
\param[in] validPhase The valid phase mask marking the phase bits that particles must have set in order to contribute to the isosurface
|
||||
\param[in] activeIndices Optional device array of active particle indices
|
||||
*/
|
||||
virtual void updateSparseGrid(PxVec4* deviceParticlePos, PxU32 numParticles, PxU32* devicePhases, CUstream stream,
|
||||
PxU32 validPhase = PxParticlePhaseFlag::eParticlePhaseFluid, const PxU32* activeIndices = NULL);
|
||||
|
||||
/**
|
||||
\brief Updates the subgrid end indices. This is only required if the particles per subgrid need to get processed (e. g. anisotropy generation)
|
||||
|
||||
\param[in] numParticles The number of particles
|
||||
\param[in] stream The stream on which the work gets scheduled
|
||||
*/
|
||||
void updateSubgridEndIndices(PxU32 numParticles, CUstream stream);
|
||||
};
|
||||
|
||||
#if !PX_DOXYGEN
|
||||
} // namespace physx
|
||||
#endif
|
||||
|
||||
#endif
|
||||
1356
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/FEMCloth.cu
vendored
Normal file
1356
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/FEMCloth.cu
vendored
Normal file
File diff suppressed because it is too large
Load Diff
595
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/FEMClothConstraintPrep.cu
vendored
Normal file
595
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/FEMClothConstraintPrep.cu
vendored
Normal file
@@ -0,0 +1,595 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "PxgFEMCloth.h"
|
||||
#include "PxgFEMCore.h"
|
||||
#include "PxgFEMClothCore.h"
|
||||
#include "vector_types.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxMathUtils.h"
|
||||
#include "copy.cuh"
|
||||
#include "assert.h"
|
||||
#include "stdio.h"
|
||||
#include "PxgSolverCoreDesc.h"
|
||||
#include "PxNodeIndex.h"
|
||||
#include "PxgBodySim.h"
|
||||
#include "PxgArticulation.h"
|
||||
#include "PxgParticleSystem.h"
|
||||
#include "PxgNpKernelIndices.h"
|
||||
#include "PxgSimulationCoreDesc.h"
|
||||
#include "PxsDeformableSurfaceMaterialCore.h"
|
||||
#include "utils.cuh"
|
||||
#include "deformableUtils.cuh"
|
||||
#include "deformableCollision.cuh"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
extern "C" __host__ void initFEMClothKernels0() {}
|
||||
|
||||
static __device__ inline float4 computeBarycentricPos(const uint4 triangleIdx, const float4* PX_RESTRICT position_invmass,
|
||||
const float4 barycentric)
|
||||
{
|
||||
const float4 a = position_invmass[triangleIdx.x];
|
||||
const float4 b = position_invmass[triangleIdx.y];
|
||||
const float4 c = position_invmass[triangleIdx.z];
|
||||
|
||||
const float4 posInvMass = a * barycentric.x + b * barycentric.y + c * barycentric.z;
|
||||
|
||||
return posInvMass;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief : prep cloth vs. rigid body collision
|
||||
//!
|
||||
|
||||
extern "C" __global__ void cloth_rigidContactPrepareLaunch(
|
||||
PxgFEMCloth* femClothes,
|
||||
float4* contacts_restW,
|
||||
float4* normalPens,
|
||||
float4* barycentrics,
|
||||
const PxgFemOtherContactInfo* contactInfos,
|
||||
PxU32* numContacts,
|
||||
PxgFemRigidConstraintBlock* primitiveConstraints,
|
||||
PxgPrePrepDesc* preDesc,
|
||||
PxgConstraintPrepareDesc* prepareDesc,
|
||||
PxReal* rigidLambdaNs,
|
||||
const PxReal invDt,
|
||||
PxgSolverSharedDescBase* sharedDesc,
|
||||
bool isTGS
|
||||
)
|
||||
{
|
||||
const PxU32 tNumContacts = *numContacts;
|
||||
|
||||
PxU32* solverBodyIndices = preDesc->solverBodyIndices;
|
||||
const PxU32 nbBlocksRequired = (tNumContacts + blockDim.x - 1) / blockDim.x;
|
||||
const PxU32 nbIterationsPerBlock = (nbBlocksRequired + gridDim.x - 1) / gridDim.x;
|
||||
const PxU32 idx = threadIdx.x;
|
||||
|
||||
for(PxU32 i = 0; i < nbIterationsPerBlock; ++i)
|
||||
{
|
||||
const PxU32 workIndex = i * blockDim.x + idx + nbIterationsPerBlock * blockIdx.x * blockDim.x;
|
||||
|
||||
if(workIndex >= tNumContacts)
|
||||
return;
|
||||
|
||||
rigidLambdaNs[workIndex] = 0.0f;
|
||||
|
||||
PxgFemOtherContactInfo contactInfo = contactInfos[workIndex];
|
||||
PxgFemRigidConstraintBlock& constraint = primitiveConstraints[workIndex / 32];
|
||||
|
||||
PxU64 pairInd0 = contactInfo.pairInd0;
|
||||
|
||||
// First one is rigid body
|
||||
const PxU64 tRigidId = pairInd0;
|
||||
const PxNodeIndex& rigidId = reinterpret_cast<const PxNodeIndex&>(tRigidId);
|
||||
|
||||
// Second one is cloth
|
||||
PxU32 pairInd1 = PxU32(contactInfo.pairInd1);
|
||||
|
||||
PxgFEMCloth& cloth = femClothes[PxGetClothId(pairInd1)];
|
||||
const PxU32 elementId = PxGetClothElementIndex(pairInd1);
|
||||
|
||||
if(elementId == 0xfffff)
|
||||
continue;
|
||||
|
||||
const float4* PX_RESTRICT accumDelta_invMass = cloth.mAccumulatedDeltaPos;
|
||||
|
||||
const float4 contact_restW = contacts_restW[workIndex];
|
||||
const float4 normal_pen = normalPens[workIndex];
|
||||
|
||||
const PxVec3 p(contact_restW.x, contact_restW.y, contact_restW.z);
|
||||
|
||||
float4 barycentric = barycentrics[workIndex];
|
||||
|
||||
float4 deltaP;
|
||||
if(barycentric.w == 0.f)
|
||||
{
|
||||
const uint4 vertexIndices = cloth.mTriangleVertexIndices[elementId];
|
||||
deltaP = computeBarycentricPos(vertexIndices, accumDelta_invMass, barycentric);
|
||||
}
|
||||
else
|
||||
{
|
||||
deltaP = accumDelta_invMass[elementId];
|
||||
}
|
||||
|
||||
const PxVec3 normal(-normal_pen.x, -normal_pen.y, -normal_pen.z);
|
||||
const PxReal pen = normal_pen.w - contact_restW.w;
|
||||
|
||||
const PxVec3 delta(deltaP.x, deltaP.y, deltaP.z);
|
||||
|
||||
prepareFEMContacts(constraint, normal, sharedDesc, p, pen, delta, rigidId, barycentric, prepareDesc, solverBodyIndices, cloth.mPenBiasClamp, invDt, isTGS);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//!
|
||||
//! \brief : prep cloth vs. cloth collision.
|
||||
//!
|
||||
|
||||
extern "C" __global__
|
||||
void cloth_clothContactPrepareLaunch(
|
||||
PxgFEMCloth* clothes,
|
||||
PxgFemFemContactInfo* contactInfos,
|
||||
PxU32* numContacts,
|
||||
PxU32 maxContacts,
|
||||
PxsDeformableSurfaceMaterialData* clothMaterials,
|
||||
const PxU8* updateContactPairs
|
||||
)
|
||||
{
|
||||
// Early exit if contact pairs are not updated.
|
||||
if(*updateContactPairs == 0)
|
||||
return;
|
||||
|
||||
const PxU32 tNumContacts = PxMin(*numContacts, maxContacts);
|
||||
const PxU32 nbBlocksRequired = (tNumContacts + blockDim.x - 1) / blockDim.x;
|
||||
const PxU32 nbIterationsPerBlock = (nbBlocksRequired + gridDim.x - 1) / gridDim.x;
|
||||
|
||||
const PxU32 idx = threadIdx.x;
|
||||
|
||||
for(PxU32 i = 0; i < nbIterationsPerBlock; ++i)
|
||||
{
|
||||
const PxU32 workIndex = i * blockDim.x + idx + nbIterationsPerBlock * blockIdx.x * blockDim.x;
|
||||
|
||||
if(workIndex == 0) // Clamp the cloth contact count.
|
||||
{
|
||||
*numContacts = tNumContacts;
|
||||
}
|
||||
|
||||
if(workIndex >= tNumContacts)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
PxgFemFemContactInfo contactInfo = contactInfos[workIndex];
|
||||
if(contactInfo.isValidPair()) // For different cloths, contactInfo is already set to valid.
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
const PxU32 pairInd0 = PxU32(contactInfo.pairInd0);
|
||||
PxgFEMCloth& cloth0 = clothes[PxGetClothId(pairInd0)];
|
||||
const PxU32 elementId0 = PxGetClothElementIndex(pairInd0);
|
||||
|
||||
PxU32 pairInd1 = PxU32(contactInfo.pairInd1);
|
||||
PxgFEMCloth& cloth1 = clothes[PxGetClothId(pairInd1)];
|
||||
const PxU32 elementId1 = PxGetClothElementIndex(pairInd1);
|
||||
|
||||
if(contactInfo.isEdgeEdgePair()) // Edge-edge collision
|
||||
{
|
||||
// Edge0
|
||||
PxU32 e0_localIndex0 = contactInfo.getAuxInd0();
|
||||
PxU32 e0_localIndex1 = (e0_localIndex0 + 1) % 3;
|
||||
|
||||
const uint4 triVertInd0 = cloth0.mTriangleVertexIndices[elementId0];
|
||||
const PxU32* vertexIndices0 = reinterpret_cast<const PxU32*>(&triVertInd0);
|
||||
const PxU32 e0_v0 = vertexIndices0[e0_localIndex0];
|
||||
const PxU32 e0_v1 = vertexIndices0[e0_localIndex1];
|
||||
|
||||
// Edge1
|
||||
PxU32 e1_localIndex0 = contactInfo.getAuxInd1();
|
||||
PxU32 e1_localIndex1 = (e1_localIndex0 + 1) % 3;
|
||||
|
||||
const uint4 triVertInd1 = cloth1.mTriangleVertexIndices[elementId1];
|
||||
const PxU32* vertexIndices1 = reinterpret_cast<const PxU32*>(&triVertInd1);
|
||||
const PxU32 e1_v0 = vertexIndices1[e1_localIndex0];
|
||||
const PxU32 e1_v1 = vertexIndices1[e1_localIndex1];
|
||||
|
||||
// Compute the exact rest distance for filtering.
|
||||
// Mark pairs as valid if their rest distance exceeds the filter threshold.
|
||||
const PxVec3 r0 = PxLoad3(cloth0.mRestPosition[e0_v0]);
|
||||
const PxVec3 r1 = PxLoad3(cloth0.mRestPosition[e0_v1]);
|
||||
const PxVec3 r2 = PxLoad3(cloth1.mRestPosition[e1_v0]);
|
||||
const PxVec3 r3 = PxLoad3(cloth1.mRestPosition[e1_v1]);
|
||||
|
||||
// Linear blending coefficients for edge0 and edge1, respectively.
|
||||
PxReal s, t;
|
||||
PxReal restDistSq;
|
||||
|
||||
// Computationally more expensive than closestPtLineLine.
|
||||
closestPtEdgeEdge(r0, r1, r2, r3, s, t, restDistSq);
|
||||
|
||||
// Apply exact (non-approximated) rest distance filtering.
|
||||
if(restDistSq > cloth0.mSelfCollisionFilterDistance * cloth0.mSelfCollisionFilterDistance)
|
||||
{
|
||||
contactInfo.markValid();
|
||||
contactInfos[workIndex] = contactInfo;
|
||||
}
|
||||
}
|
||||
else // Vertex-triangle collision
|
||||
{
|
||||
const uint4 triVertId1 = cloth1.mTriangleVertexIndices[elementId1];
|
||||
PxVec4T<PxU32> vertIndices(elementId0, triVertId1.x, triVertId1.y, triVertId1.z);
|
||||
|
||||
// Compute the exact rest distance for filtering.
|
||||
// Mark pairs as valid if their rest distance exceeds the filter threshold.
|
||||
const PxVec3 r0 = PxLoad3(cloth0.mRestPosition[vertIndices[0]]);
|
||||
const PxVec3 r1 = PxLoad3(cloth1.mRestPosition[vertIndices[1]]);
|
||||
const PxVec3 r2 = PxLoad3(cloth1.mRestPosition[vertIndices[2]]);
|
||||
const PxVec3 r3 = PxLoad3(cloth1.mRestPosition[vertIndices[3]]);
|
||||
|
||||
const PxVec3 r12 = r2 - r1;
|
||||
const PxVec3 r13 = r3 - r1;
|
||||
|
||||
// Apply exact (non-approximated) rest distance filtering.
|
||||
const PxVec3 closest = Gu::closestPtPointTriangle2(r0, r1, r2, r3, r12, r13);
|
||||
|
||||
const PxReal restDistSq = (r0 - closest).magnitudeSquared();
|
||||
if(restDistSq > cloth0.mSelfCollisionFilterDistance * cloth0.mSelfCollisionFilterDistance)
|
||||
{
|
||||
contactInfo.markValid();
|
||||
contactInfos[workIndex] = contactInfo;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static __device__ float4 computeTriangleContact(const float4* vels, const uint4& triVertId,
|
||||
const float4& barycentric)
|
||||
{
|
||||
const float4 v0 = vels[triVertId.x];
|
||||
const float4 v1 = vels[triVertId.y];
|
||||
const float4 v2 = vels[triVertId.z];
|
||||
|
||||
const float4 vel = v0 * barycentric.x + v1 * barycentric.y + v2 * barycentric.z;
|
||||
|
||||
return vel;
|
||||
}
|
||||
|
||||
extern "C" __global__ void cloth_particleContactPrepareLaunch(
|
||||
PxgFEMCloth* clothes,
|
||||
PxgParticleSystem* particlesystems,
|
||||
float4* contacts,
|
||||
float4* normalPens,
|
||||
float4* barycentrics,
|
||||
PxgFemOtherContactInfo* contactInfos,
|
||||
PxU32* numContacts,
|
||||
PxgFEMParticleConstraintBlock* spConstraints, //soft body particle constraint
|
||||
float2* softBodyAppliedForces,
|
||||
float2* particleAppliedForces
|
||||
)
|
||||
{
|
||||
const PxU32 tNumContacts = *numContacts;
|
||||
|
||||
const PxU32 nbBlocksRequired = (tNumContacts + blockDim.x - 1) / blockDim.x;
|
||||
|
||||
const PxU32 nbIterationsPerBlock = (nbBlocksRequired + gridDim.x - 1) / gridDim.x;
|
||||
|
||||
const PxU32 idx = threadIdx.x;
|
||||
const PxU32 threadIndexInWarp = threadIdx.x & 31;
|
||||
|
||||
for (PxU32 i = 0; i < nbIterationsPerBlock; ++i)
|
||||
{
|
||||
const PxU32 workIndex = i * blockDim.x + idx + nbIterationsPerBlock * blockIdx.x * blockDim.x;
|
||||
|
||||
if (workIndex >= tNumContacts)
|
||||
return;
|
||||
|
||||
//initialize appliedForces to be zero
|
||||
softBodyAppliedForces[workIndex] = make_float2(0.f, 0.f);
|
||||
particleAppliedForces[workIndex] = make_float2(0.f, 0.f);
|
||||
|
||||
PxgFemOtherContactInfo contactInfo = contactInfos[workIndex];
|
||||
PxgFEMParticleConstraintBlock& constraint = spConstraints[workIndex / 32];
|
||||
|
||||
PxU64 pairInd0 = contactInfo.pairInd0;
|
||||
|
||||
//pairInd0 is a particle system
|
||||
const PxU32 tParticleSystemId = PxGetParticleSystemId(pairInd0);
|
||||
const PxU32 tParticleIndex = PxGetParticleIndex(pairInd0);
|
||||
|
||||
//second one will be cloth
|
||||
PxU32 pairInd1 = PxU32(contactInfo.pairInd1);
|
||||
PxgFEMCloth& cloth = clothes[PxGetClothId(pairInd1)];
|
||||
const PxU32 triangleInd = PxGetClothElementIndex(pairInd1);
|
||||
|
||||
/*printf("workIndex %i particleSystemId %i particleIndex %i\n", workIndex, tParticleSystemId, tParticleIndex);
|
||||
printf("workIndex %i softbodyId %i tetInd %i\n", workIndex, pairInd1.getSoftBodyId(), tetInd);*/
|
||||
|
||||
const uint4 triVertInd = cloth.mTriangleVertexIndices[triangleInd];
|
||||
|
||||
/* printf("workIndex %i tetrahedronId(%i, %i, %i, %i)\n", workIndex, tetrahedronIdx.x, tetrahedronIdx.y,
|
||||
tetrahedronIdx.z, tetrahedronIdx.w);*/
|
||||
|
||||
//get out the contact point
|
||||
const float4 contact = contacts[workIndex];
|
||||
const float4 normal_pen = normalPens[workIndex];
|
||||
|
||||
/*printf("workIndex %i normal_pen(%f, %f, %f, %f)\n", workIndex, normal_pen.x, normal_pen.y,
|
||||
normal_pen.z, normal_pen.w);*/
|
||||
|
||||
const PxVec3 p(contact.x, contact.y, contact.z);
|
||||
|
||||
/*float4 barycentric;
|
||||
float invMass1 = computeInvMass(tetrahedronIdx, position_invmass, p, barycentric);*/
|
||||
float4 barycentric = barycentrics[workIndex];
|
||||
//float invMass1 = computeClothInvMass(triVertInd, position_invmass, barycentric);
|
||||
|
||||
const float4 delta1 = computeTriangleContact(cloth.mAccumulatedDeltaPos, triVertInd, barycentric);
|
||||
float invMass1 = delta1.w;
|
||||
|
||||
const PxVec3 normal(-normal_pen.x, -normal_pen.y, -normal_pen.z);
|
||||
|
||||
PxgParticleSystem& particleSystem = particlesystems[tParticleSystemId];
|
||||
//const float4 position_invMass = particleSystem.mSortedPosition_InvMass[tParticleIndex];
|
||||
//const PxReal invMass0 = position_invMass.w;
|
||||
|
||||
const float4 deltaP_invMass = particleSystem.mSortedDeltaP[tParticleIndex];
|
||||
|
||||
const PxReal invMass0 = deltaP_invMass.w;
|
||||
|
||||
PxVec3 delta(delta1.x - deltaP_invMass.x, delta1.y - deltaP_invMass.y, delta1.z - deltaP_invMass.z);
|
||||
|
||||
const PxReal pen = normal_pen.w + normal.dot(delta) - cloth.mRestDistance;
|
||||
|
||||
//printf("pen = %f, normal_pen.w = %f, normal.dot(delta) = %f, delta1 = (%f, %f, %f), deltaP = (%f, %f, %f)\n",
|
||||
// pen, normal_pen.w, normal.dot(delta), delta1.x, delta1.y, delta1.z, deltaP_invMass.x, deltaP_invMass.y, deltaP_invMass.z);
|
||||
|
||||
|
||||
const float unitResponse = invMass0 + invMass1;
|
||||
//KS - perhaps we don't need the > 0.f check here?
|
||||
const float velMultiplier = (unitResponse > 0.f) ? (1.f / unitResponse) : 0.f;
|
||||
|
||||
//PxReal biasedErr = PxMin(-0.5f * pen * invDt, 5.f)*velMultiplier;
|
||||
//PxReal biasedErr = (-0.5f * pen * invDt)*velMultiplier;
|
||||
//printf("biasedErr %f, pen %f, invDt %f, velMultiplier %f\n", biasedErr, pen, invDt, velMultiplier);
|
||||
constraint.normal_pen[threadIndexInWarp] = make_float4(normal.x, normal.y, normal.z, pen);
|
||||
constraint.barycentric[threadIndexInWarp] = barycentric;
|
||||
constraint.velMultiplier[threadIndexInWarp] = velMultiplier;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern "C" __global__ void cloth_rigidAttachmentPrepareLaunch(
|
||||
PxgFEMCloth* clothes,
|
||||
PxgFEMRigidAttachment* rigidAttachments,
|
||||
PxU32* activeRigidAttachments,
|
||||
PxNodeIndex* rigidAttachmentIds,
|
||||
PxU32 numRigidAttachments,
|
||||
PxgFEMRigidAttachmentConstraint* attachmentConstraints,
|
||||
const PxgPrePrepDesc* preDesc,
|
||||
const PxgConstraintPrepareDesc* prepareDesc,
|
||||
const PxgSolverSharedDescBase* sharedDesc,
|
||||
float4* rigidDeltaVel
|
||||
)
|
||||
{
|
||||
|
||||
const PxAlignedTransform* bodyFrames = prepareDesc->body2WorldPool;
|
||||
|
||||
const PxU32* solverBodyIndices = preDesc->solverBodyIndices;
|
||||
const PxgSolverBodyData* solverBodyData = prepareDesc->solverBodyDataPool;
|
||||
const PxgSolverTxIData* solverDataTxIPool = prepareDesc->solverBodyTxIDataPool;
|
||||
|
||||
|
||||
const PxgBodySim* bodySims = sharedDesc->mBodySimBufferDeviceData;
|
||||
|
||||
const PxU32 nbBlocksRequired = (numRigidAttachments + blockDim.x - 1) / blockDim.x;
|
||||
|
||||
const PxU32 nbIterationsPerBlock = (nbBlocksRequired + gridDim.x - 1) / gridDim.x;
|
||||
|
||||
const PxU32 idx = threadIdx.x;
|
||||
|
||||
|
||||
for (PxU32 i = 0; i < nbIterationsPerBlock; ++i)
|
||||
{
|
||||
const PxU32 workIndex = i * blockDim.x + idx + nbIterationsPerBlock * blockIdx.x * blockDim.x;
|
||||
|
||||
if (workIndex >= numRigidAttachments)
|
||||
return;
|
||||
|
||||
const PxU32 index = workIndex / 32;
|
||||
const PxU32 offset = workIndex & 31;
|
||||
|
||||
const PxU32 attachmentId = activeRigidAttachments[workIndex];
|
||||
|
||||
const PxgFEMRigidAttachment& attachment = rigidAttachments[attachmentId];
|
||||
PxgFEMRigidAttachmentConstraint& constraint = attachmentConstraints[index];
|
||||
|
||||
const PxU32 elemId = attachment.index1;
|
||||
const PxU32 clothId = PxGetClothId(elemId);
|
||||
const PxU32 elemIdx = PxGetClothElementIndex(elemId);
|
||||
const bool elemIsVertex = PxGetIsVertexType(attachment.baryOrType1);
|
||||
|
||||
PxgFEMCloth& cloth = clothes[clothId];
|
||||
|
||||
const float4* pos_invMass = cloth.mPosition_InvMass;
|
||||
const float4 low_high_limits = attachment.coneLimitParams.low_high_limits;
|
||||
const float4 axis_angle = attachment.coneLimitParams.axis_angle;
|
||||
|
||||
float4 attachmentPose;
|
||||
if (elemIsVertex)
|
||||
{
|
||||
attachmentPose = pos_invMass[elemIdx];
|
||||
}
|
||||
else
|
||||
{
|
||||
const float4 barycentric = attachment.baryOrType1;
|
||||
const uint4 triVertInd = cloth.mTriangleVertexIndices[elemIdx];
|
||||
const float4 pos_iMass0 = pos_invMass[triVertInd.x];
|
||||
const float4 pos_iMass1 = pos_invMass[triVertInd.y];
|
||||
const float4 pos_iMass2 = pos_invMass[triVertInd.z];
|
||||
attachmentPose = pos_iMass0 * barycentric.x + pos_iMass1 * barycentric.y + pos_iMass2 * barycentric.z;
|
||||
}
|
||||
|
||||
float invMass1 = attachmentPose.w;
|
||||
const PxVec3 point(attachmentPose.x, attachmentPose.y, attachmentPose.z);
|
||||
const PxVec3 axis(axis_angle.x, axis_angle.y, axis_angle.z);
|
||||
|
||||
float4 ra4 = attachment.localPose0;
|
||||
|
||||
//nodeIndex
|
||||
PxNodeIndex rigidId = reinterpret_cast<const PxNodeIndex&>(attachment.index0);
|
||||
PxU32 idx = 0;
|
||||
if (!rigidId.isStaticBody())
|
||||
{
|
||||
idx = solverBodyIndices[rigidId.index()];
|
||||
}
|
||||
|
||||
rigidAttachmentIds[workIndex] = rigidId;
|
||||
|
||||
const PxVec3 normal0(1.f, 0.f, 0.f);
|
||||
const PxVec3 normal1(0.f, 1.f, 0.f);
|
||||
const PxVec3 normal2(0.f, 0.f, 1.f);
|
||||
|
||||
if (rigidId.isArticulation())
|
||||
{
|
||||
PxU32 nodeIndexA = rigidId.index();
|
||||
PxU32 artiId = bodySims[nodeIndexA].articulationRemapId;
|
||||
|
||||
PxgArticulation& articulation = sharedDesc->articulations[artiId];
|
||||
|
||||
const PxU32 linkID = rigidId.articulationLinkId();
|
||||
const PxTransform body2World = articulation.linkBody2Worlds[linkID];
|
||||
|
||||
const PxVec3 bodyFrame0p(body2World.p.x, body2World.p.y, body2World.p.z);
|
||||
|
||||
const PxVec3 worldAxis = (body2World.rotate(axis)).getNormalized();
|
||||
|
||||
PxVec3 ra(ra4.x, ra4.y, ra4.z);
|
||||
ra = body2World.rotate(ra);
|
||||
PxVec3 error = ra + bodyFrame0p - point;
|
||||
|
||||
const PxVec3 raXn0 = ra.cross(normal0);
|
||||
const PxVec3 raXn1 = ra.cross(normal1);
|
||||
const PxVec3 raXn2 = ra.cross(normal2);
|
||||
|
||||
PxSpatialMatrix& spatialResponse = articulation.spatialResponseMatrixW[linkID];
|
||||
const Cm::UnAlignedSpatialVector deltaV0 = spatialResponse * Cm::UnAlignedSpatialVector(normal0, raXn0);
|
||||
const Cm::UnAlignedSpatialVector deltaV1 = spatialResponse * Cm::UnAlignedSpatialVector(normal1, raXn1);
|
||||
const Cm::UnAlignedSpatialVector deltaV2 = spatialResponse * Cm::UnAlignedSpatialVector(normal2, raXn2);
|
||||
|
||||
const PxReal resp0 = deltaV0.top.dot(raXn0) + deltaV0.bottom.dot(normal0) + invMass1;
|
||||
const PxReal resp1 = deltaV0.top.dot(raXn1) + deltaV0.bottom.dot(normal1) + invMass1;
|
||||
const PxReal resp2 = deltaV0.top.dot(raXn2) + deltaV0.bottom.dot(normal2) + invMass1;
|
||||
|
||||
const float velMultiplier0 = (resp0 > 0.f) ? (1.f / resp0) : 0.f;
|
||||
const float velMultiplier1 = (resp1 > 0.f) ? (1.f / resp1) : 0.f;
|
||||
const float velMultiplier2 = (resp2 > 0.f) ? (1.f / resp2) : 0.f;
|
||||
|
||||
PxReal biasedErr0 = (error.dot(normal0));
|
||||
PxReal biasedErr1 = (error.dot(normal1));
|
||||
PxReal biasedErr2 = (error.dot(normal2));
|
||||
|
||||
constraint.raXn0_biasW[offset] = make_float4(raXn0.x, raXn0.y, raXn0.z, biasedErr0);
|
||||
constraint.raXn1_biasW[offset] = make_float4(raXn1.x, raXn1.y, raXn1.z, biasedErr1);
|
||||
constraint.raXn2_biasW[offset] = make_float4(raXn2.x, raXn2.y, raXn2.z, biasedErr2);
|
||||
//articulation don't use invMass0. We set it to 1.0 here so that the impulse scaling for the linear impulse
|
||||
//to convert it to a velocity change remains an impulse if it is dealing with an articulation.
|
||||
constraint.velMultiplierXYZ_invMassW[offset] = make_float4(velMultiplier0, velMultiplier1, velMultiplier2, 1.f);
|
||||
constraint.elemId[offset] = elemId;
|
||||
constraint.rigidId[offset] = rigidId.getInd();
|
||||
constraint.baryOrType[offset] = attachment.baryOrType1;
|
||||
constraint.low_high_limits[offset] = low_high_limits;
|
||||
constraint.axis_angle[offset] = make_float4(worldAxis.x, worldAxis.y, worldAxis.z, axis_angle.w);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
//PxMat33 invSqrtInertia0 = solverDataTxIPool[idx].sqrtInvInertia;
|
||||
const float4 linVel_invMass0 = solverBodyData[idx].initialLinVelXYZ_invMassW;
|
||||
const PxReal invMass0 = linVel_invMass0.w;
|
||||
|
||||
PxMat33 invSqrtInertia0;
|
||||
PxReal inertiaScale = 1.f;
|
||||
if (invMass0 == 0.f && !rigidId.isStaticBody())
|
||||
{
|
||||
invSqrtInertia0 = PxMat33(PxIdentity);
|
||||
inertiaScale = 0.f;
|
||||
}
|
||||
else
|
||||
{
|
||||
invSqrtInertia0 = solverDataTxIPool[idx].sqrtInvInertia;
|
||||
}
|
||||
|
||||
PxAlignedTransform bodyFrame0 = bodyFrames[idx];
|
||||
const PxVec3 bodyFrame0p(bodyFrame0.p.x, bodyFrame0.p.y, bodyFrame0.p.z);
|
||||
|
||||
PxVec3 ra(ra4.x, ra4.y, ra4.z);
|
||||
ra = bodyFrame0.rotate(ra);
|
||||
PxVec3 error = ra + bodyFrame0p - point;
|
||||
|
||||
const PxVec3 worldAxis = (bodyFrame0.rotate(axis)).getNormalized();
|
||||
|
||||
|
||||
const PxVec3 raXn0 = ra.cross(normal0);
|
||||
const PxVec3 raXn1 = ra.cross(normal1);
|
||||
const PxVec3 raXn2 = ra.cross(normal2);
|
||||
|
||||
const PxVec3 raXnSqrtInertia0 = invSqrtInertia0 * raXn0;
|
||||
const PxVec3 raXnSqrtInertia1 = invSqrtInertia0 * raXn1;
|
||||
const PxVec3 raXnSqrtInertia2 = invSqrtInertia0 * raXn2;
|
||||
const float resp0 = (raXnSqrtInertia0.dot(raXnSqrtInertia0))*inertiaScale + invMass0 + invMass1;
|
||||
const float resp1 = (raXnSqrtInertia1.dot(raXnSqrtInertia1))*inertiaScale + invMass0 + invMass1;
|
||||
const float resp2 = (raXnSqrtInertia2.dot(raXnSqrtInertia2))*inertiaScale + invMass0 + invMass1;
|
||||
|
||||
const float velMultiplier0 = (resp0 > 0.f) ? (1.f / resp0) : 0.f;
|
||||
const float velMultiplier1 = (resp1 > 0.f) ? (1.f / resp1) : 0.f;
|
||||
const float velMultiplier2 = (resp2 > 0.f) ? (1.f / resp2) : 0.f;
|
||||
|
||||
PxReal biasedErr0 = (error.dot(normal0));
|
||||
PxReal biasedErr1 = (error.dot(normal1));
|
||||
PxReal biasedErr2 = (error.dot(normal2));
|
||||
|
||||
constraint.raXn0_biasW[offset] = make_float4(raXnSqrtInertia0.x, raXnSqrtInertia0.y, raXnSqrtInertia0.z, biasedErr0);
|
||||
constraint.raXn1_biasW[offset] = make_float4(raXnSqrtInertia1.x, raXnSqrtInertia1.y, raXnSqrtInertia1.z, biasedErr1);
|
||||
constraint.raXn2_biasW[offset] = make_float4(raXnSqrtInertia2.x, raXnSqrtInertia2.y, raXnSqrtInertia2.z, biasedErr2);
|
||||
constraint.velMultiplierXYZ_invMassW[offset] = make_float4(velMultiplier0, velMultiplier1, velMultiplier2, invMass0);
|
||||
constraint.elemId[offset] = elemId;
|
||||
constraint.rigidId[offset] = rigidId.getInd();
|
||||
constraint.baryOrType[offset] = attachment.baryOrType1;
|
||||
constraint.low_high_limits[offset] = low_high_limits;
|
||||
constraint.axis_angle[offset] = make_float4(worldAxis.x, worldAxis.y, worldAxis.z, axis_angle.w);
|
||||
|
||||
if (rigidDeltaVel)
|
||||
{
|
||||
rigidDeltaVel[workIndex] = make_float4(0.f);
|
||||
rigidDeltaVel[workIndex + numRigidAttachments] = make_float4(0.f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
1683
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/FEMClothExternalSolve.cu
vendored
Normal file
1683
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/FEMClothExternalSolve.cu
vendored
Normal file
File diff suppressed because it is too large
Load Diff
705
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/FEMClothUtil.cuh
vendored
Normal file
705
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/FEMClothUtil.cuh
vendored
Normal file
@@ -0,0 +1,705 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __CU_FEMCLOTHUTIL_CUH__
|
||||
#define __CU_FEMCLOTHUTIL_CUH__
|
||||
|
||||
#include "PxgFEMCloth.h"
|
||||
#include "vector_types.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
#include "foundation/PxBounds3.h"
|
||||
#include "copy.cuh"
|
||||
#include "shuffle.cuh"
|
||||
#include "assert.h"
|
||||
#include "stdio.h"
|
||||
#include "PxgFEMClothCoreKernelIndices.h"
|
||||
#include "atomic.cuh"
|
||||
#include "PxsDeformableSurfaceMaterialCore.h"
|
||||
#include "femMidphaseScratch.cuh"
|
||||
#include "GuBV32.h"
|
||||
#include "deformableUtils.cuh"
|
||||
#include "particleSystem.cuh"
|
||||
#include "utils.cuh"
|
||||
|
||||
|
||||
using namespace physx;
|
||||
|
||||
|
||||
/*******************************************************************************
|
||||
*
|
||||
*
|
||||
* Definitions
|
||||
*
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#define FEMCLOTH_SQRT2 1.4142135623730950488016887242097f
|
||||
#define FEMCLOTH_SQRT3 1.7320508075688772935274463415059f
|
||||
|
||||
#define FEMCLOTH_THRESHOLD 1.0e-14f
|
||||
#define FEMCLOTH_PI 3.14159265358979323846f
|
||||
#define FEMCLOTH_HALF_PI 1.57079632679489661923f
|
||||
#define FEMCLOTH_2PI 6.28318530717958647692f
|
||||
#define FEMCLOTH_2PI_INV 0.15915494309189533576888376337251f
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*******************************************************************************
|
||||
*
|
||||
*
|
||||
* Math functions
|
||||
*
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
//!
|
||||
//! \brief : Extract rotation R [r0, r1] from F [f0, f1] in 2D
|
||||
//! \reference: https://en.wikipedia.org/wiki/Square_root_of_a_2_by_2_matrix
|
||||
//!
|
||||
|
||||
static PX_FORCE_INLINE __device__ void extractRotation2D(PxVec2& r0, PxVec2& r1, const PxVec2& f0, const PxVec2& f1)
|
||||
{
|
||||
// R: rotation part of F (by polar decopmosition)
|
||||
// F^T * F = [S2[0], S2[2]]
|
||||
// [S2[2], S2[1]]
|
||||
const PxVec3 S2(f0.dot(f0), f1.dot(f1), f0.dot(f1));
|
||||
const float det = S2[0] * S2[1] - S2[2] * S2[2];
|
||||
|
||||
if (det < FEMCLOTH_THRESHOLD)
|
||||
{
|
||||
r0.x = 1.0f;
|
||||
r0.y = 0.0f;
|
||||
r1.x = 0.0f;
|
||||
r1.y = 1.0f;
|
||||
return;
|
||||
}
|
||||
|
||||
const float s0 = sqrtf(det);
|
||||
const float t = sqrtf(S2[0] + S2[1] + 2.0f * s0);
|
||||
|
||||
assert(t > 0.0f);
|
||||
if (t < FEMCLOTH_THRESHOLD)
|
||||
{
|
||||
r0.x = 1.0f;
|
||||
r0.y = 0.0f;
|
||||
r1.x = 0.0f;
|
||||
r1.y = 1.0f;
|
||||
return;
|
||||
}
|
||||
const float tInv = 1.0f / t;
|
||||
|
||||
PxVec3 S(S2);
|
||||
S[0] += s0;
|
||||
S[1] += s0;
|
||||
S *= tInv;
|
||||
|
||||
const float sDet = S[0] * S[1] - S[2] * S[2];
|
||||
assert(sDet > 0.0f);
|
||||
|
||||
if (sDet < FEMCLOTH_THRESHOLD)
|
||||
{
|
||||
r0.x = 1.0f;
|
||||
r0.y = 0.0f;
|
||||
r1.x = 0.0f;
|
||||
r1.y = 1.0f;
|
||||
return;
|
||||
}
|
||||
|
||||
const float sDetInv = 1.0f / sDet;
|
||||
|
||||
PxVec3 SInv(S[1], S[0], -S[2]);
|
||||
SInv *= sDetInv;
|
||||
|
||||
// R = [r0 r1]
|
||||
r0 = SInv[0] * f0 + SInv[2] * f1;
|
||||
r1 = SInv[2] * f0 + SInv[1] * f1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//!
|
||||
//! \brief : Approximated atan2: max error of ~1/10000
|
||||
//! \reference: https://mazzo.li/posts/vectorized-atan2.html
|
||||
//!
|
||||
|
||||
static PX_FORCE_INLINE __device__ PxReal atanApprox(PxReal x)
|
||||
{
|
||||
PxReal x2 = x * x;
|
||||
return x * (0.99997726f + x2 * (-0.33262347f + x2 * (0.19354346f + x2 * (-0.11643287f + x2 * (0.05265332f + x2 * (-0.01172120f))))));
|
||||
}
|
||||
|
||||
static PX_FORCE_INLINE __device__ PxReal atan2Approx(PxReal y, PxReal x)
|
||||
{
|
||||
bool swap = PxAbs(x) < PxAbs(y);
|
||||
PxReal input = swap ? (x / y) : (y / x);
|
||||
PxReal output = atanApprox(input);
|
||||
|
||||
output = swap ? (input >= 0.f ? FEMCLOTH_HALF_PI : -FEMCLOTH_HALF_PI) - output : output;
|
||||
|
||||
if(x < 0.f)
|
||||
{
|
||||
output += (y >= 0.f ? FEMCLOTH_PI : -FEMCLOTH_PI);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
static PX_FORCE_INLINE __device__ bool velocityClamping(float4& pos, float4& vel, float4& accumDelta, PxReal maxVel, PxReal dt,
|
||||
const float4& prevPos)
|
||||
{
|
||||
const PxReal maxVelSq = maxVel * maxVel;
|
||||
const PxReal velMagSq = PxLoad3(vel).magnitudeSquared();
|
||||
if (velMagSq > maxVelSq)
|
||||
{
|
||||
vel *= maxVel / PxSqrt(velMagSq);
|
||||
float4 newPos = prevPos + vel * dt;
|
||||
newPos.w = pos.w;
|
||||
vel.w = pos.w;
|
||||
|
||||
const float4 delta = newPos - pos;
|
||||
pos = newPos;
|
||||
accumDelta += delta;
|
||||
return true; // Velocity is clamped.
|
||||
}
|
||||
|
||||
return false; // Velocity is not clamped.
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*******************************************************************************
|
||||
*
|
||||
*
|
||||
* Delta lambda updates
|
||||
*
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
//!
|
||||
//! \brief : Returns the delta lambda in XPBD when a constraint has three vertex degrees of freedom, applicable in both 2D and 3D but without damping.
|
||||
//!
|
||||
|
||||
template <typename PxVec2Or3>
|
||||
static PX_FORCE_INLINE __device__ float queryDeltaLambda(float C, const PxVec2Or3& dCdx0, const PxVec2Or3& dCdx1, const PxVec2Or3& dCdx2,
|
||||
float alphaTilde, float lambda, float massInv0, float massInv1, float massInv2)
|
||||
{
|
||||
const float denom =
|
||||
(massInv0 * dCdx0.magnitudeSquared() + massInv1 * dCdx1.magnitudeSquared() + massInv2 * dCdx2.magnitudeSquared()) + alphaTilde;
|
||||
assert(denom != 0.0f);
|
||||
|
||||
if (denom < FEMCLOTH_THRESHOLD)
|
||||
return 0.0f;
|
||||
|
||||
return (-C - alphaTilde * lambda) / denom;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief : Returns the delta lambda in XPBD when a constraint has three vertex degrees of freedom, applicable in both 2D and 3D with damping.
|
||||
//!
|
||||
|
||||
template <typename PxVec2Or3>
|
||||
static PX_FORCE_INLINE __device__ float queryDeltaLambda(float C, const PxVec2Or3& dCdx0, const PxVec2Or3& dCdx1, const PxVec2Or3& dCdx2,
|
||||
float alphaTilde, float lambda, float massInv0, float massInv1, float massInv2,
|
||||
float damping, float dtInv, float dCdT)
|
||||
{
|
||||
const float denom = (1.0f + damping * dtInv) * (massInv0 * dCdx0.magnitudeSquared() + massInv1 * dCdx1.magnitudeSquared() +
|
||||
massInv2 * dCdx2.magnitudeSquared()) +
|
||||
alphaTilde;
|
||||
assert(denom != 0.0f);
|
||||
|
||||
if (denom < FEMCLOTH_THRESHOLD)
|
||||
return 0.0f;
|
||||
|
||||
return -(C + alphaTilde * lambda + damping * dCdT) / denom;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief : Returns the delta lambda in XPBD when a constraint has four vertex degrees of freedom, applicable in 3D but without damping.
|
||||
//!
|
||||
|
||||
static PX_FORCE_INLINE __device__ float queryDeltaLambda(float C, const PxVec3& dCdx0, const PxVec3& dCdx1, const PxVec3& dCdx2,
|
||||
const PxVec3& dCdx3, float alphaTilde, float lambda, float massInv0,
|
||||
float massInv1, float massInv2, float massInv3)
|
||||
{
|
||||
const float denom = (massInv0 * dCdx0.magnitudeSquared() + massInv1 * dCdx1.magnitudeSquared() + massInv2 * dCdx2.magnitudeSquared() +
|
||||
massInv3 * dCdx3.magnitudeSquared()) +
|
||||
alphaTilde;
|
||||
assert(denom != 0.0f);
|
||||
|
||||
if (denom < FEMCLOTH_THRESHOLD)
|
||||
return 0.0f;
|
||||
|
||||
return (-C - alphaTilde * lambda) / denom;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief : Returns the delta lambda in XPBD when a constraint has four vertex degrees of freedom, applicable in 3D with damping.
|
||||
//!
|
||||
|
||||
static PX_FORCE_INLINE __device__ float queryDeltaLambda(float C, const PxVec3& dCdx0, const PxVec3& dCdx1, const PxVec3& dCdx2,
|
||||
const PxVec3& dCdx3, float alphaTilde, float lambda, float massInv0, float massInv1,
|
||||
float massInv2, float massInv3, float damping, float dtInv, const float dCdT)
|
||||
{
|
||||
const float denom = (1.0f + damping * dtInv) * (massInv0 * dCdx0.magnitudeSquared() + massInv1 * dCdx1.magnitudeSquared() +
|
||||
massInv2 * dCdx2.magnitudeSquared() + massInv3 * dCdx3.magnitudeSquared()) +
|
||||
alphaTilde;
|
||||
assert(denom != 0.0f);
|
||||
|
||||
if (denom < FEMCLOTH_THRESHOLD)
|
||||
return 0.0f;
|
||||
|
||||
return -(C + alphaTilde * lambda + damping * dCdT) / denom;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*******************************************************************************
|
||||
*
|
||||
*
|
||||
* Deformation gradient and its derivatives
|
||||
*
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
//!
|
||||
//! \brief : query deformation gradients (F \in R^{2x2})
|
||||
//!
|
||||
|
||||
static PX_FORCE_INLINE __device__ void queryDeformationGradient_F2x2(PxVec2& f0, PxVec2& f1, const float4& QInv, const PxVec2& xp01,
|
||||
const PxVec2& xp02)
|
||||
{
|
||||
f0 = QInv.x * xp01 + QInv.y * xp02;
|
||||
f1 = QInv.z * xp01 + QInv.w * xp02;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//!
|
||||
//! \brief : compute gradient of constraint (F \in R^{2x2})
|
||||
//!
|
||||
|
||||
static PX_FORCE_INLINE __device__ void queryConstraintGradient_F2x2(PxVec2& grad1, PxVec2& grad2, const float4& qInv, const PxVec2& pC_pF0,
|
||||
const PxVec2& pC_pF1)
|
||||
{
|
||||
grad1 = qInv.x * pC_pF0 + qInv.z * pC_pF1;
|
||||
grad2 = qInv.y * pC_pF0 + qInv.w * pC_pF1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*******************************************************************************
|
||||
*
|
||||
*
|
||||
* Constraint functions
|
||||
*
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
//!
|
||||
//! \brief : As-Rigid-As-Possible constraint using {sqrt(||F - R||_F^2)}, F \in R^{2x2}
|
||||
//!
|
||||
|
||||
static inline __device__ void ARAPConstraint_F2X2(float& lambda, PxVec2& dx0, PxVec2& dx1, PxVec2& dx2, float alphaTilde,
|
||||
const float4& QInv, const PxVec2& x01, const PxVec2& x02, float massInv0, float massInv1,
|
||||
float massInv2, const PxgFEMCloth& shFEMCloth)
|
||||
{
|
||||
PxVec2 f0, f1, r0, r1; // F = [f0 f1], R = [r0 r1]
|
||||
PxVec2 grad0, grad1, grad2; // gradient of constraint
|
||||
|
||||
queryDeformationGradient_F2x2(f0, f1, QInv, x01, x02);
|
||||
extractRotation2D(r0, r1, f0, f1);
|
||||
|
||||
PxVec2 FMinusR0 = f0 - r0;
|
||||
PxVec2 FMinusR1 = f1 - r1;
|
||||
|
||||
const float C = sqrt(FMinusR0.dot(FMinusR0) + FMinusR1.dot(FMinusR1)); // ARAP constraint
|
||||
|
||||
if(C > FEMCLOTH_THRESHOLD)
|
||||
{
|
||||
const float CInv = 1.0f / C;
|
||||
|
||||
// pC/pF = [pCA_pF0 pCA_pF1]
|
||||
const PxVec2 pC_pF0 = CInv * FMinusR0;
|
||||
const PxVec2 pC_pF1 = CInv * FMinusR1;
|
||||
|
||||
queryConstraintGradient_F2x2(grad1, grad2, QInv, pC_pF0, pC_pF1);
|
||||
grad0 = -grad1 - grad2;
|
||||
|
||||
const float deltaLambda = queryDeltaLambda(C, grad0, grad1, grad2, alphaTilde, lambda, massInv0, massInv1, massInv2);
|
||||
lambda += deltaLambda;
|
||||
|
||||
dx0 += massInv0 * deltaLambda * grad0;
|
||||
dx1 += massInv1 * deltaLambda * grad1;
|
||||
dx2 += massInv2 * deltaLambda * grad2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//!
|
||||
//! \brief : Area conservation constraint
|
||||
//!
|
||||
|
||||
static inline __device__ void areaConstraint_F2X2(float& lambda, PxVec2& dx0, PxVec2& dx1, PxVec2& dx2, float alphaTilde,
|
||||
const float4& QInv, const PxVec2& x01, const PxVec2& x02, float massInv0, float massInv1,
|
||||
float massInv2, float area, const PxgFEMCloth& shFEMCloth)
|
||||
{
|
||||
#if 1
|
||||
|
||||
// Area constraints
|
||||
// C = |x01 X x02| / |u01 X u02| - 1.0
|
||||
const PxReal x01CrossX02 = x01.x * x02.y - x01.y * x02.x;
|
||||
const float undeformedAreaInv = 1.0f / area;
|
||||
|
||||
const float C = 0.5f * x01CrossX02 * undeformedAreaInv - 1.0f;
|
||||
|
||||
const PxVec2 grad1(0.5f * undeformedAreaInv * x02.y, -0.5f * undeformedAreaInv * x02.x);
|
||||
const PxVec2 grad2(-0.5f * undeformedAreaInv * x01.y, 0.5f * undeformedAreaInv * x01.x);
|
||||
const PxVec2 grad0 = -grad1 - grad2;
|
||||
|
||||
#else
|
||||
|
||||
// Area constraints
|
||||
// C = det(F) - 1, F \in R^ { 2x2 }
|
||||
PxVec2 f0, f1, r0, r1; // F = [f0 f1], R = [r0 r1]
|
||||
PxVec2 grad0, grad1, grad2; // gradient of constraint
|
||||
|
||||
queryDeformationGradient_F2x2(f0, f1, QInv, x01, x02);
|
||||
|
||||
const PxReal C = f0.x * f1.y - f0.y * f1.x - 1.0f;
|
||||
|
||||
// pC/pF = [pCA_pF0 pCA_pF1]
|
||||
const PxVec2 pC_pF0(f1.y, -f0.y);
|
||||
const PxVec2 pC_pF1(-f1.x, f0.x);
|
||||
|
||||
queryConstraintGradient_F2x2(grad1, grad2, QInv, pC_pF0, pC_pF1);
|
||||
grad0 = -grad1 - grad2;
|
||||
|
||||
#endif
|
||||
|
||||
const float deltaLambda = queryDeltaLambda(C, grad0, grad1, grad2, alphaTilde, lambda, massInv0, massInv1, massInv2);
|
||||
lambda += deltaLambda;
|
||||
|
||||
dx0 += massInv0 * deltaLambda * grad0;
|
||||
dx1 += massInv1 * deltaLambda * grad1;
|
||||
dx2 += massInv2 * deltaLambda * grad2;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*******************************************************************************
|
||||
*
|
||||
*
|
||||
* Energy models
|
||||
*
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
//!
|
||||
//! \brief : XPBD formulation of fixed corotated model
|
||||
//!
|
||||
|
||||
static __device__ inline void membraneEnergySolvePerTriangle(PxgFEMCloth& shFEMCloth, float4& xx0, float4& xx1, float4& xx2, PxReal dt,
|
||||
const PxsDeformableSurfaceMaterialData& material, const float4& QInv,
|
||||
float vertexScale0, float vertexScale1, float vertexScale2, PxU32 lambdaIndex,
|
||||
bool isShared, bool isTGS)
|
||||
{
|
||||
if (material.youngs < FEMCLOTH_THRESHOLD)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
PxVec3 x0 = PxLoad3(xx0);
|
||||
PxVec3 x1 = PxLoad3(xx1);
|
||||
PxVec3 x2 = PxLoad3(xx2);
|
||||
|
||||
const PxVec3 x01 = x1 - x0;
|
||||
const PxVec3 x02 = x2 - x0;
|
||||
|
||||
const PxVec3 axis0 = x01.getNormalized();
|
||||
PxVec3 normal = x01.cross(x02);
|
||||
const PxVec3 axis1 = (normal.cross(axis0)).getNormalized();
|
||||
|
||||
const PxReal dt2 = dt * dt;
|
||||
|
||||
const PxReal det = QInv.x * QInv.w - QInv.y * QInv.z;
|
||||
const PxReal area = 1.0f / (2.0f * det);
|
||||
const PxReal volume = area * material.thickness;
|
||||
|
||||
PxVec2 dx0(0.0f), dx1(0.0f), dx2(0.0f);
|
||||
float lambda0 = 0.0f, lambda1 = 0.0f;
|
||||
|
||||
if (!isTGS)
|
||||
{
|
||||
lambda0 = isShared ? shFEMCloth.mOrderedSharedTriangleLambdas[lambdaIndex].x : shFEMCloth.mOrderedNonSharedTriangleLambdas[lambdaIndex].x;
|
||||
lambda1 = isShared ? shFEMCloth.mOrderedSharedTriangleLambdas[lambdaIndex].y : shFEMCloth.mOrderedNonSharedTriangleLambdas[lambdaIndex].y;
|
||||
}
|
||||
|
||||
// Lame's parameters
|
||||
const PxPair<PxReal, PxReal> lames = lameParameters(material.youngs, material.poissons);
|
||||
|
||||
// 1) enforcing ARAP constraint
|
||||
PxVec2 xp01(axis0.dot(x01), axis1.dot(x01));
|
||||
PxVec2 xp02(axis0.dot(x02), axis1.dot(x02));
|
||||
|
||||
// Lame's second parameters
|
||||
const PxReal mu = lames.second;
|
||||
const PxReal alphaTilde0 = 1.0f / (2.0f * mu * volume * dt2);
|
||||
|
||||
ARAPConstraint_F2X2(lambda0, dx0, dx1, dx2, alphaTilde0, QInv, xp01, xp02, vertexScale0 * xx0.w, vertexScale1 * xx1.w,
|
||||
vertexScale2 * xx2.w, shFEMCloth);
|
||||
|
||||
// 2) enforcing area constraint
|
||||
if (material.poissons > FEMCLOTH_THRESHOLD)
|
||||
{
|
||||
PxReal alphaTilde1 = 0.0f;
|
||||
|
||||
if(material.poissons < 0.5f - FEMCLOTH_THRESHOLD)
|
||||
{
|
||||
// Lame's first parameters
|
||||
const PxReal lambda = lames.first;
|
||||
alphaTilde1 = 1.0f / (lambda * volume * dt2);
|
||||
}
|
||||
|
||||
xp01 += dx1 - dx0;
|
||||
xp02 += dx2 - dx0;
|
||||
|
||||
areaConstraint_F2X2(lambda1, dx0, dx1, dx2, alphaTilde1, QInv, xp01, xp02, vertexScale0 * xx0.w, vertexScale1 * xx1.w,
|
||||
vertexScale2 * xx2.w, area, shFEMCloth);
|
||||
}
|
||||
|
||||
x0 += dx0.x * axis0 + dx0.y * axis1;
|
||||
x1 += dx1.x * axis0 + dx1.y * axis1;
|
||||
x2 += dx2.x * axis0 + dx2.y * axis1;
|
||||
|
||||
if (!isTGS)
|
||||
{
|
||||
if (isShared)
|
||||
{
|
||||
shFEMCloth.mOrderedSharedTriangleLambdas[lambdaIndex].x = lambda0;
|
||||
shFEMCloth.mOrderedSharedTriangleLambdas[lambdaIndex].y = lambda1;
|
||||
}
|
||||
else
|
||||
{
|
||||
shFEMCloth.mOrderedNonSharedTriangleLambdas[lambdaIndex].x = lambda0;
|
||||
shFEMCloth.mOrderedNonSharedTriangleLambdas[lambdaIndex].y = lambda1;
|
||||
}
|
||||
}
|
||||
|
||||
xx0.x = x0.x;
|
||||
xx0.y = x0.y;
|
||||
xx0.z = x0.z;
|
||||
|
||||
xx1.x = x1.x;
|
||||
xx1.y = x1.y;
|
||||
xx1.z = x1.z;
|
||||
|
||||
xx2.x = x2.x;
|
||||
xx2.y = x2.y;
|
||||
xx2.z = x2.z;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//!
|
||||
//! \brief : XPBD formulation of "Discrete Shells"
|
||||
//!
|
||||
|
||||
static __device__ inline void bendingEnergySolvePerTrianglePair(PxgFEMCloth& shFEMCloth, float4& x0, float4& x1, float4& x2, float4& x3,
|
||||
const float4& vertexReferenceCounts, float dt, PxU32 trianglePairIndex,
|
||||
bool isSharedTrianglePartition, bool isTGS)
|
||||
{
|
||||
const PxVec3 x02 = PxLoad3(x2 - x0);
|
||||
const PxVec3 x03 = PxLoad3(x3 - x0);
|
||||
const PxVec3 x13 = PxLoad3(x3 - x1);
|
||||
const PxVec3 x12 = PxLoad3(x2 - x1);
|
||||
const PxVec3 x23 = PxLoad3(x3 - x2);
|
||||
const PxReal x23Len = x23.magnitude();
|
||||
|
||||
if(x23Len < FEMCLOTH_THRESHOLD)
|
||||
return;
|
||||
|
||||
const PxReal x23LenInv = 1.f / x23Len;
|
||||
const PxVec3 x23Normalized = x23 * x23LenInv;
|
||||
|
||||
const float4 restBendingAngle_flexuralStiffness_damping =
|
||||
isSharedTrianglePartition ? shFEMCloth.mOrderedSharedRestBendingAngle_flexuralStiffness_damping[trianglePairIndex]
|
||||
: shFEMCloth.mOrderedNonSharedRestBendingAngle_flexuralStiffness_damping[trianglePairIndex];
|
||||
|
||||
const PxReal restBendingAngle = restBendingAngle_flexuralStiffness_damping.x;
|
||||
const PxReal kInv = restBendingAngle_flexuralStiffness_damping.y;
|
||||
|
||||
if (kInv <= 0.f)
|
||||
return;
|
||||
|
||||
//const PxReal damping = restBendingAngle_flexuralStiffness_damping.z;
|
||||
|
||||
const PxVec3 scaledN0 = x02.cross(x03);
|
||||
const PxVec3 scaledN1 = x13.cross(x12);
|
||||
|
||||
const PxReal n0LenInv = 1.f / scaledN0.magnitude();
|
||||
const PxReal n1LenInv = 1.f / scaledN1.magnitude();
|
||||
|
||||
const PxVec3 n0 = scaledN0 * n0LenInv;
|
||||
PxVec3 n1 = scaledN1 * n1LenInv;
|
||||
|
||||
const PxReal cosAngle = n0.dot(n1);
|
||||
const PxReal sinAngle = n0.cross(n1).dot(x23Normalized);
|
||||
PxReal angle = atan2f(sinAngle, cosAngle);
|
||||
|
||||
PxReal C = 0.f;
|
||||
PxReal alphaTilde = 0.f;
|
||||
float dtInv = 1.0f / dt;
|
||||
|
||||
alphaTilde = kInv * dtInv * dtInv;
|
||||
C = angle - restBendingAngle;
|
||||
|
||||
if(PxAbs(C + FEMCLOTH_2PI) < PxAbs(C))
|
||||
{
|
||||
C += FEMCLOTH_2PI;
|
||||
}
|
||||
else if(PxAbs(C - FEMCLOTH_2PI) < PxAbs(C))
|
||||
{
|
||||
C -= FEMCLOTH_2PI;
|
||||
}
|
||||
|
||||
// Bending constraint clamped.
|
||||
C = PxClamp(C, -FEMCLOTH_HALF_PI, FEMCLOTH_HALF_PI);
|
||||
const PxVec3 temp0 = n0 * n0LenInv;
|
||||
const PxVec3 temp1 = n1 * n1LenInv;
|
||||
const PxVec3 dCdx0 = -x23Len * temp0;
|
||||
const PxVec3 dCdx1 = -x23Len * temp1;
|
||||
const PxVec3 dCdx2 = x03.dot(x23Normalized) * temp0 + x13.dot(x23Normalized) * temp1;
|
||||
const PxVec3 dCdx3 = -(x02.dot(x23Normalized) * temp0 + x12.dot(x23Normalized) * temp1);
|
||||
|
||||
PxReal lambda = 0.0f;
|
||||
|
||||
if (!isTGS)
|
||||
{
|
||||
lambda = isSharedTrianglePartition ? shFEMCloth.mSharedBendingLambdas[trianglePairIndex] :
|
||||
shFEMCloth.mNonSharedBendingLambdas[trianglePairIndex];
|
||||
}
|
||||
|
||||
float deltaLambda =
|
||||
queryDeltaLambda(C, dCdx0, dCdx1, dCdx2, dCdx3, alphaTilde, lambda, vertexReferenceCounts.x * x0.w, vertexReferenceCounts.y * x1.w,
|
||||
vertexReferenceCounts.z * x2.w, vertexReferenceCounts.w * x3.w);
|
||||
|
||||
if (!isTGS)
|
||||
{
|
||||
if (isSharedTrianglePartition)
|
||||
{
|
||||
shFEMCloth.mSharedBendingLambdas[trianglePairIndex] = lambda + deltaLambda;
|
||||
}
|
||||
else
|
||||
{
|
||||
shFEMCloth.mNonSharedBendingLambdas[trianglePairIndex] = lambda + deltaLambda;
|
||||
}
|
||||
}
|
||||
|
||||
PxReal scale0 = vertexReferenceCounts.x * x0.w * deltaLambda;
|
||||
x0.x += scale0 * dCdx0.x;
|
||||
x0.y += scale0 * dCdx0.y;
|
||||
x0.z += scale0 * dCdx0.z;
|
||||
|
||||
PxReal scale1 = vertexReferenceCounts.y * x1.w * deltaLambda;
|
||||
x1.x += scale1 * dCdx1.x;
|
||||
x1.y += scale1 * dCdx1.y;
|
||||
x1.z += scale1 * dCdx1.z;
|
||||
|
||||
PxReal scale2 = vertexReferenceCounts.z * x2.w * deltaLambda;
|
||||
x2.x += scale2 * dCdx2.x;
|
||||
x2.y += scale2 * dCdx2.y;
|
||||
x2.z += scale2 * dCdx2.z;
|
||||
|
||||
PxReal scale3 = vertexReferenceCounts.w * x3.w * deltaLambda;
|
||||
x3.x += scale3 * dCdx3.x;
|
||||
x3.y += scale3 * dCdx3.y;
|
||||
x3.z += scale3 * dCdx3.z;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief : Cloth shell energies in a triangle-pair (two adjacent triangles): in-plane + bending
|
||||
//!
|
||||
|
||||
static __device__ inline
|
||||
void
|
||||
clothSharedEnergySolvePerTrianglePair(PxgFEMCloth& shFEMCloth, float4& x0, float4& x1, float4& x2, float4& x3,
|
||||
const float4& vertexReferenceCount, const PxsDeformableSurfaceMaterialData* PX_RESTRICT clothMaterials,
|
||||
float dt, PxU32 trianglePairIndex, bool isTGS)
|
||||
{
|
||||
// shared edge: the shared edge between two adjacent triangles (triangle0, triangle1).
|
||||
// edge0, edge1: non-shared edge in triangle0 and triangle1, respectively.
|
||||
// tri0Count, tri1Count: the number of references to triangle0 and triangle1 in the entire triangle pairs.
|
||||
const float4 restData0 = shFEMCloth.mOrderedSharedRestEdge0_edge1[trianglePairIndex];
|
||||
const float4 restData1 = shFEMCloth.mOrderedSharedRestEdgeLength_material0_material1[trianglePairIndex];
|
||||
|
||||
const PxU32 globalMaterialIndex0 = static_cast<PxU32>(restData1.y);
|
||||
const PxU32 globalMaterialIndex1 = static_cast<PxU32>(restData1.z);
|
||||
|
||||
const PxVec2 restEdge0(restData0.x, restData0.y);
|
||||
const PxVec2 restEdge1(restData0.z, restData0.w);
|
||||
const float restSharedEdgeLength = restData1.x;
|
||||
|
||||
const float det0 = restSharedEdgeLength * restEdge0.y;
|
||||
const float det1 = restSharedEdgeLength * restEdge1.y;
|
||||
|
||||
// In-plane constraint for triangle0 with vertex x2, x3, and x0.
|
||||
if(PxAbs(det0) > FEMCLOTH_THRESHOLD)
|
||||
{
|
||||
const PxU32 lambdaIndex = 2*trianglePairIndex;
|
||||
const float4 QInv0 = make_float4(restEdge0.y, 0.0f, -restEdge0.x, restSharedEdgeLength) / det0;
|
||||
membraneEnergySolvePerTriangle(shFEMCloth, x2, x3, x0, dt, clothMaterials[globalMaterialIndex0], QInv0, vertexReferenceCount.z,
|
||||
vertexReferenceCount.w, vertexReferenceCount.x, lambdaIndex, true, isTGS);
|
||||
}
|
||||
|
||||
// In-plane constraint for triangle1 with vertex x2, x3, and x1.
|
||||
if(PxAbs(det1) > FEMCLOTH_THRESHOLD)
|
||||
{
|
||||
const PxU32 lambdaIndex = 2 * trianglePairIndex + 1;
|
||||
const float4 QInv1 = make_float4(restEdge1.y, 0.0f, -restEdge1.x, restSharedEdgeLength) / det1;
|
||||
membraneEnergySolvePerTriangle(shFEMCloth, x2, x3, x1, dt, clothMaterials[globalMaterialIndex1], QInv1, vertexReferenceCount.z,
|
||||
vertexReferenceCount.w, vertexReferenceCount.y, lambdaIndex, true, isTGS);
|
||||
}
|
||||
|
||||
// Bending constraint for the triangle pair
|
||||
bendingEnergySolvePerTrianglePair(shFEMCloth, x0, x1, x2, x3, vertexReferenceCount, dt, trianglePairIndex, true, isTGS);
|
||||
}
|
||||
|
||||
#endif // FEMCLOTHUTIL
|
||||
790
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/SDFConstruction.cu
vendored
Normal file
790
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/SDFConstruction.cu
vendored
Normal file
@@ -0,0 +1,790 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
|
||||
#include "atomic.cuh"
|
||||
#include "reduction.cuh"
|
||||
#include "bvh.cuh"
|
||||
#include "GuSDF.h"
|
||||
#include "utils.cuh"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
extern "C" __host__ void initSdfConstructionKernels0() {}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxVec3 getCenter(PxU32 index, const float4* PX_RESTRICT itemLowers, const float4* PX_RESTRICT itemUppers)
|
||||
{
|
||||
PxVec3 lower = PxLoad3(itemLowers[index]);
|
||||
PxVec3 upper = PxLoad3(itemUppers[index]);
|
||||
|
||||
PxVec3 center = 0.5f*(lower + upper);
|
||||
return center;
|
||||
}
|
||||
|
||||
extern "C" __global__ void bvhCalculateMortonCodes(const float4* PX_RESTRICT itemLowers, const float4* PX_RESTRICT itemUppers, const PxI32* PX_RESTRICT itemPriorities, PxI32 n,
|
||||
const PxVec3* gridLower, const PxVec3* gridInvEdges, PxI32* PX_RESTRICT indices, PxI32* PX_RESTRICT keys)
|
||||
{
|
||||
const PxI32 index = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
|
||||
if (index < n)
|
||||
{
|
||||
PxVec3 center = getCenter(index, itemLowers, itemUppers);
|
||||
|
||||
PxVec3 local = (center - gridLower[0]).multiply(gridInvEdges[0]);
|
||||
|
||||
PxI32 key;
|
||||
if (itemPriorities)
|
||||
{
|
||||
// 9-bit Morton codes stored in lower 27bits (512^3 effective resolution)
|
||||
// 5-bit priority code stored in the upper 5-bits
|
||||
key = morton3<512>(local.x, local.y, local.z);
|
||||
|
||||
// we invert priorities (so that higher priority items appear first in sorted order)
|
||||
key |= (~itemPriorities[index]) << 27;
|
||||
}
|
||||
else
|
||||
{
|
||||
key = morton3<1024>(local.x, local.y, local.z);
|
||||
}
|
||||
|
||||
indices[index] = index;
|
||||
keys[index] = key;
|
||||
}
|
||||
}
|
||||
|
||||
// calculate the index of the first differing bit between two adjacent Morton keys
|
||||
extern "C" __global__ void bvhCalculateKeyDeltas(const PxI32* PX_RESTRICT keys, PxReal* PX_RESTRICT deltas, PxI32 n)
|
||||
{
|
||||
const PxI32 index = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
|
||||
if (index + 1 < n)
|
||||
{
|
||||
PxI32 a = keys[index];
|
||||
PxI32 b = keys[index + 1];
|
||||
|
||||
//if (a > b)
|
||||
// printf("Elements not sorted\n");
|
||||
|
||||
PxI32 x = a ^ b;
|
||||
|
||||
deltas[index] = PxReal(x); // reinterpret_cast<PxReal&>(x);// This should work since x is positive
|
||||
}
|
||||
}
|
||||
|
||||
// calculate the index of the first differing bit between two adjacent Morton keys
|
||||
extern "C" __global__ void bvhCalculateKeyDeltasSquaredDistance(const PxI32* PX_RESTRICT keys, PxReal* PX_RESTRICT deltas, PxI32 n,
|
||||
const float4* PX_RESTRICT itemLowers, const float4* PX_RESTRICT itemUppers)
|
||||
{
|
||||
const PxI32 index = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
|
||||
if (index + 1 < n)
|
||||
{
|
||||
//PxI32 a = keys[index];
|
||||
//PxI32 b = keys[index + 1];
|
||||
|
||||
//itemLowers and itemUppers must be in sorted order
|
||||
PxVec3 centerA = getCenter(index, itemLowers, itemUppers);
|
||||
PxVec3 centerB = getCenter(index + 1, itemLowers, itemUppers);
|
||||
PxReal distanceSquared = (centerA - centerB).magnitudeSquared();
|
||||
|
||||
//if (a > b)
|
||||
// printf("Elements not sorted\n");
|
||||
|
||||
//PxI32 x = a ^ b;
|
||||
|
||||
deltas[index] = distanceSquared;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void bvhBuildLeaves(const float4* PX_RESTRICT itemLowers, const float4* PX_RESTRICT itemUppers,
|
||||
PxI32 n, const PxI32* PX_RESTRICT indices, PxI32* PX_RESTRICT rangeLefts, PxI32* PX_RESTRICT rangeRights, PxgPackedNodeHalf* PX_RESTRICT lowers, PxgPackedNodeHalf* PX_RESTRICT uppers)
|
||||
{
|
||||
const PxI32 index = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
|
||||
if (index < n)
|
||||
{
|
||||
const PxI32 item = indices[index];
|
||||
|
||||
const PxVec3 lower = PxLoad3(itemLowers[item]);
|
||||
const float4 upper = itemUppers[item];
|
||||
|
||||
// write leaf nodes
|
||||
lowers[index] = makeNode(lower, item, true);
|
||||
uppers[index] = makeNode(PxLoad3(upper), upper.w);
|
||||
|
||||
// write leaf key ranges
|
||||
rangeLefts[index] = index;
|
||||
rangeRights[index] = index;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void bvhComputeTriangleBounds(const PxVec3* PX_RESTRICT vertices, const PxU32* PX_RESTRICT triangleIndices, PxU32 numTriangles,
|
||||
float4* PX_RESTRICT itemLowers, float4* PX_RESTRICT itemUppers, PxReal margin)
|
||||
{
|
||||
const PxI32 index = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
|
||||
if (index < numTriangles)
|
||||
{
|
||||
PxVec3 a = vertices[triangleIndices[3 * index + 0]];
|
||||
PxVec3 b = vertices[triangleIndices[3 * index + 1]];
|
||||
PxVec3 c = vertices[triangleIndices[3 * index + 2]];
|
||||
|
||||
PxBounds3 bounds(a, a);
|
||||
bounds.include(b);
|
||||
bounds.include(c);
|
||||
bounds.fattenFast(margin);
|
||||
|
||||
itemLowers[index] = make_float4(bounds.minimum.x, bounds.minimum.y, bounds.minimum.z, 0.0f);
|
||||
itemUppers[index] = make_float4(bounds.maximum.x, bounds.maximum.y, bounds.maximum.z, 0.0f);
|
||||
|
||||
/*printf("%i %f %f %f %f %f %f\n", index, bounds.minimum.x, bounds.minimum.y, bounds.minimum.z, bounds.maximum.x, bounds.maximum.y, bounds.maximum.z);*/
|
||||
//printf("%i %i %i %i %f %f %f\n", index, triangleIndices[3 * index + 0], triangleIndices[3 * index + 1], triangleIndices[3 * index + 2], a.x, a.y, a.z);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void bvhBuildHierarchy(PxI32 n, PxI32* root, PxU32* maxTreeDepth, const PxReal* PX_RESTRICT deltas, PxI32* PX_RESTRICT numChildren,
|
||||
volatile PxI32* PX_RESTRICT rangeLefts, volatile PxI32* PX_RESTRICT rangeRights, volatile PxgPackedNodeHalf* PX_RESTRICT lowers, volatile PxgPackedNodeHalf* PX_RESTRICT uppers)
|
||||
{
|
||||
buildHierarchy(n, root, maxTreeDepth, deltas, numChildren, rangeLefts, rangeRights, lowers, uppers);
|
||||
}
|
||||
|
||||
|
||||
extern "C" __global__ void bvhBuildHierarchyAndWindingClusters(PxI32 n, PxI32* root, PxU32* maxTreeDepth, const PxReal* PX_RESTRICT deltas, PxI32* PX_RESTRICT numChildren,
|
||||
volatile PxI32* PX_RESTRICT rangeLefts, volatile PxI32* PX_RESTRICT rangeRights, volatile PxgPackedNodeHalf* PX_RESTRICT lowers, volatile PxgPackedNodeHalf* PX_RESTRICT uppers,
|
||||
PxgWindingClusterApproximation* clusters, const PxVec3* vertices, const PxU32* indices)
|
||||
{
|
||||
WindingClusterBuilder w(clusters, vertices, indices, n);
|
||||
buildHierarchy(n, root, maxTreeDepth, deltas, numChildren, rangeLefts, rangeRights, lowers, uppers, w);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ void AtomicMaxVec3(PxVec3* address, const PxVec3 v)
|
||||
{
|
||||
PxReal* arr = reinterpret_cast<PxReal*>(address);
|
||||
AtomicMax(&arr[0], v.x);
|
||||
AtomicMax(&arr[1], v.y);
|
||||
AtomicMax(&arr[2], v.z);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ void AtomicMinVec3(PxVec3* address, const PxVec3 v)
|
||||
{
|
||||
PxReal* arr = reinterpret_cast<PxReal*>(address);
|
||||
AtomicMin(&arr[0], v.x);
|
||||
AtomicMin(&arr[1], v.y);
|
||||
AtomicMin(&arr[2], v.z);
|
||||
}
|
||||
|
||||
extern "C" __global__ void bvhComputeTotalBounds(const float4* itemLowers, const float4* itemUppers, PxVec3* totalLower, PxVec3* totalUpper, PxI32 numItems)
|
||||
{
|
||||
const PxI32 blockStart = blockDim.x*blockIdx.x;
|
||||
const PxI32 numValid = min(numItems - blockStart, blockDim.x);
|
||||
|
||||
const PxI32 tid = blockStart + threadIdx.x;
|
||||
|
||||
PxU32 mask = __ballot_sync(FULL_MASK, tid < numItems);
|
||||
if (tid < numItems)
|
||||
{
|
||||
PxVec3 lower = PxLoad3(itemLowers[tid]);
|
||||
PxVec3 upper = PxLoad3(itemUppers[tid]);
|
||||
|
||||
PxVec3 blockUpper;
|
||||
blockUpper.x = blockReduction<MaxOpFloat, PxReal, 256>(mask, upper.x, -FLT_MAX);
|
||||
__syncthreads();
|
||||
blockUpper.y = blockReduction<MaxOpFloat, PxReal, 256>(mask, upper.y, -FLT_MAX);
|
||||
__syncthreads();
|
||||
blockUpper.z = blockReduction<MaxOpFloat, PxReal, 256>(mask, upper.z, -FLT_MAX);
|
||||
|
||||
// sync threads because second reduce uses same temp storage as first
|
||||
__syncthreads();
|
||||
|
||||
PxVec3 blockLower;
|
||||
blockLower.x = blockReduction<MinOpFloat, PxReal, 256>(mask, lower.x, FLT_MAX);
|
||||
__syncthreads();
|
||||
blockLower.y = blockReduction<MinOpFloat, PxReal, 256>(mask, lower.y, FLT_MAX);
|
||||
__syncthreads();
|
||||
blockLower.z = blockReduction<MinOpFloat, PxReal, 256>(mask, lower.z, FLT_MAX);
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
// write out block results, expanded by the radius
|
||||
AtomicMaxVec3(totalUpper, blockUpper);
|
||||
AtomicMinVec3(totalLower, blockLower);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// compute inverse edge length, this is just done on the GPU to avoid a CPU->GPU sync point
|
||||
extern "C" __global__ void bvhComputeTotalInvEdges(const PxVec3* totalLower, const PxVec3* totalUpper, PxVec3* totalInvEdges)
|
||||
{
|
||||
PxVec3 edges = (totalUpper[0] - totalLower[0]);
|
||||
edges += PxVec3(0.0001f);
|
||||
|
||||
totalInvEdges[0] = PxVec3(1.0f / edges.x, 1.0f / edges.y, 1.0f / edges.z);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxReal getDistanceOffset(const PxgPackedNodeHalf& o)
|
||||
{
|
||||
return reinterpret_cast<const float4&>(o).w;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ void setDistanceOffset(PxgPackedNodeHalf& o, PxReal distanceOffset)
|
||||
{
|
||||
reinterpret_cast<float4&>(o).w = distanceOffset;
|
||||
}
|
||||
|
||||
//The point is encoded as the center of a leaf node bounding box
|
||||
struct ClosestDistanceToPointCloudTraversalWithOffset
|
||||
{
|
||||
public:
|
||||
PxVec3 mQueryPoint;
|
||||
PxReal mClosestDistance;
|
||||
|
||||
PX_FORCE_INLINE __device__ ClosestDistanceToPointCloudTraversalWithOffset()
|
||||
{
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ ClosestDistanceToPointCloudTraversalWithOffset(const PxVec3& queryPoint, PxReal initialClosestDistance = 100000000000.0f)
|
||||
: mQueryPoint(queryPoint), mClosestDistance(initialClosestDistance)
|
||||
{
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxReal distancePointBoxSquared(const PxVec3& minimum, const PxVec3& maximum, const PxVec3& point)
|
||||
{
|
||||
PxVec3 closestPt = minimum.maximum(maximum.minimum(point));
|
||||
return (closestPt - point).magnitudeSquared();
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ BvhTraversalControl::Enum operator()(const PxgPackedNodeHalf& lower, const PxgPackedNodeHalf& upper, PxI32 nodeIndex)
|
||||
{
|
||||
if (distancePointBoxSquared(PxVec3(lower.x, lower.y, lower.z), PxVec3(upper.x, upper.y, upper.z), mQueryPoint) >= mClosestDistance * mClosestDistance)
|
||||
return BvhTraversalControl::eDontGoDeeper;
|
||||
|
||||
if (lower.b)
|
||||
{
|
||||
const PxVec3 point = PxVec3(0.5f * (lower.x + upper.x), 0.5f * (lower.y + upper.y), 0.5f * (lower.z + upper.z));
|
||||
|
||||
PxReal distanceOffset = getDistanceOffset(upper);
|
||||
PxReal distSq = (mQueryPoint - point).magnitudeSquared() + distanceOffset * distanceOffset;
|
||||
if (distSq < mClosestDistance * mClosestDistance)
|
||||
{
|
||||
mClosestDistance = PxSqrt(distSq);
|
||||
}
|
||||
|
||||
return BvhTraversalControl::eDontGoDeeper;
|
||||
}
|
||||
|
||||
return BvhTraversalControl::eGoDeeper;
|
||||
}
|
||||
};
|
||||
|
||||
PX_FORCE_INLINE __device__ bool traceInteriorRay(const PxgBvhTriangleMesh& mesh, const PxVec3& origin, const PxVec3& dir, PxI32* stack, PxU32 stackSize, PxReal& closestDotProduct, bool& closestPointOnTriangleEdge)
|
||||
{
|
||||
ClosestRayIntersectionTraversal query(mesh.mVertices, mesh.mTriangles, origin, dir, true);
|
||||
queryBVH(mesh.mBvh, query, stack, stackSize);
|
||||
|
||||
closestDotProduct = query.closestDotProduct;
|
||||
closestPointOnTriangleEdge = query.closestPointOnTriangleEdge;
|
||||
return query.hasHit();
|
||||
}
|
||||
|
||||
extern "C" __global__ __launch_bounds__(256, 1) void sdfCalculateDenseGridHybrid(PxgBvhTriangleMesh mesh, const PxgWindingClusterApproximation* PX_RESTRICT windingNumberClusters,
|
||||
Gu::GridQueryPointSampler sampler, PxU32 sizeX, PxU32 sizeY, PxU32 sizeZ, PxReal* PX_RESTRICT sdfData)
|
||||
{
|
||||
const PxU32 stackSize = 47;
|
||||
//__shared__ PxI32 stackMem[256 * stackSize];
|
||||
PxI32 stackMem[stackSize];
|
||||
|
||||
// block addressing
|
||||
const PxI32 x = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
const PxI32 y = blockIdx.y*blockDim.y + threadIdx.y;
|
||||
const PxI32 z = blockIdx.z*blockDim.z + threadIdx.z;
|
||||
|
||||
//const PxI32 threadId = threadIdx.z * 8 * 8 + threadIdx.y * 8 + threadIdx.x;
|
||||
|
||||
if (x < sizeX && y < sizeY && z < sizeZ)
|
||||
{
|
||||
PxU32 roodNodeId = *mesh.mBvh.mRootNode;
|
||||
PxVec3 meshBoundsMin = mesh.mBvh.mNodeLowers[roodNodeId].getXYZ();
|
||||
PxVec3 meshBoundsMax = mesh.mBvh.mNodeUppers[roodNodeId].getXYZ();
|
||||
|
||||
const PxVec3 p = sampler.getPoint(x, y, z);
|
||||
|
||||
PxI32* stack = &stackMem[/*stackSize * threadId*/0];
|
||||
|
||||
ClosestDistanceToTriangleMeshTraversal distQuery(mesh.mTriangles, mesh.mVertices, p);
|
||||
queryBVH(mesh.mBvh, distQuery, stack, stackSize);
|
||||
PxReal d = PxSqrt(distQuery.mClosestDistanceSquared);
|
||||
|
||||
PxReal sign = 1.0f;
|
||||
|
||||
bool repeatInsideTest = false;
|
||||
|
||||
PxI32 parity = 0;
|
||||
PxReal threshold = 0.01f;
|
||||
PxReal closestDotProduct;
|
||||
bool closestPointOnTriangleEdge;
|
||||
|
||||
// x-axis
|
||||
if (traceInteriorRay(mesh, p, PxVec3(PxAbs(p.x - meshBoundsMin.x) < PxAbs(meshBoundsMax.x - p.x) ? -1.0f : 1.0f, 0.0f, 0.0f), stack, stackSize, closestDotProduct, closestPointOnTriangleEdge))
|
||||
{
|
||||
if (closestDotProduct < 0.0f)
|
||||
parity++;
|
||||
if (closestPointOnTriangleEdge || PxAbs(closestDotProduct) <= threshold)
|
||||
repeatInsideTest = true;
|
||||
}
|
||||
|
||||
// y-axis
|
||||
if (!repeatInsideTest && traceInteriorRay(mesh, p, PxVec3(0.0f, PxAbs(p.y - meshBoundsMin.y) < PxAbs(meshBoundsMax.y - p.y) ? -1.0f : 1.0f, 0.0f), stack, stackSize, closestDotProduct, closestPointOnTriangleEdge))
|
||||
{
|
||||
if (closestDotProduct < 0.0f)
|
||||
parity++;
|
||||
if (closestPointOnTriangleEdge || PxAbs(closestDotProduct) <= threshold)
|
||||
repeatInsideTest = true;
|
||||
}
|
||||
|
||||
// z-axis
|
||||
if (!repeatInsideTest && traceInteriorRay(mesh, p, PxVec3(0.0f, 0.0f, PxAbs(p.z - meshBoundsMin.z) < PxAbs(meshBoundsMax.z - p.z) ? -1.0f : 1.0f), stack, stackSize, closestDotProduct, closestPointOnTriangleEdge))
|
||||
{
|
||||
if (closestDotProduct < 0.0f)
|
||||
parity++;
|
||||
if (closestPointOnTriangleEdge || PxAbs(closestDotProduct) <= threshold)
|
||||
repeatInsideTest = true;
|
||||
}
|
||||
|
||||
if (parity == 3)
|
||||
sign = -1.0f;
|
||||
else if (parity != 0)
|
||||
{
|
||||
repeatInsideTest = true;
|
||||
}
|
||||
|
||||
if (repeatInsideTest)
|
||||
{
|
||||
//Fall back to winding numbers for problematic points
|
||||
WindingNumberTraversal windingNumber(mesh.mTriangles, mesh.mNumTriangles, mesh.mVertices, windingNumberClusters, p);
|
||||
queryBVH(mesh.mBvh, windingNumber, stack, stackSize);
|
||||
bool inside = windingNumber.mWindingNumber > 0.5f;
|
||||
if (inside)
|
||||
sign = -1.0f;
|
||||
}
|
||||
|
||||
sdfData[Gu::idx3D(x, y, z, sizeX, sizeY)] = d * sign;
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
extern "C" __global__ __launch_bounds__(256, 1) void sdfCalculateDenseGridBlocks(PxgBvhTriangleMesh mesh, const PxgWindingClusterApproximation* PX_RESTRICT windingNumberClusters,
|
||||
Gu::GridQueryPointSampler sampler, PxU32 sizeX, PxU32 sizeY, PxU32 sizeZ, PxReal* PX_RESTRICT sdfData, PxReal* PX_RESTRICT windingNumbers)
|
||||
{
|
||||
const PxU32 stackSize = 47;
|
||||
//__shared__ PxI32 stackMem[256 * stackSize];
|
||||
PxI32 stackMem[stackSize];
|
||||
|
||||
const PxU32 x = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
const PxU32 y = blockIdx.y*blockDim.y + threadIdx.y;
|
||||
const PxU32 z = blockIdx.z*blockDim.z + threadIdx.z;
|
||||
|
||||
//const PxI32 threadId = threadIdx.z * 8 * 8 + threadIdx.y * 8 + threadIdx.x;
|
||||
|
||||
if (x < sizeX && y < sizeY && z < sizeZ)
|
||||
{
|
||||
PxVec3 p = sampler.getPoint(x, y, z);
|
||||
|
||||
PxI32* stack = &stackMem[/*stackSize * threadId*/0];
|
||||
|
||||
ClosestDistanceToTriangleMeshTraversal distQuery(mesh.mTriangles, mesh.mVertices, p);
|
||||
queryBVH(mesh.mBvh, distQuery, stack, stackSize);
|
||||
PxReal closestDistance = PxSqrt(distQuery.mClosestDistanceSquared);
|
||||
|
||||
WindingNumberTraversal windingNumber(mesh.mTriangles, mesh.mNumTriangles, mesh.mVertices, windingNumberClusters, p);
|
||||
queryBVH(mesh.mBvh, windingNumber, stack, stackSize);
|
||||
|
||||
|
||||
PxU32 resultIndex = Gu::idx3D(x, y, z, sizeX, sizeY);
|
||||
|
||||
bool inside = windingNumber.mWindingNumber > 0.5f;
|
||||
sdfData[resultIndex] = (inside ? -1.0f : 1.0f) * closestDistance;
|
||||
|
||||
if (windingNumbers)
|
||||
windingNumbers[resultIndex] = windingNumber.mWindingNumber;
|
||||
}
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU32 pow3(PxU32 i)
|
||||
{
|
||||
return i * i * i;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE bool rangesOverlaps(PxReal minA, PxReal maxA, PxReal minB, PxReal maxB)
|
||||
{
|
||||
return !(minA > maxB || minB > maxA);
|
||||
}
|
||||
|
||||
extern "C" __global__ void sdfPopulateBackgroundSDF(PxU32 cellsPerSubgrid, PxReal* PX_RESTRICT backgroundSDF, PxU32 backgroundSizeX, PxU32 backgroundSizeY, PxU32 backgroundSizeZ,
|
||||
const PxReal* PX_RESTRICT sdf, PxU32 width, PxU32 height, PxU32 depth)
|
||||
{
|
||||
PxI32 id = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
|
||||
if (id < backgroundSizeX * backgroundSizeY * backgroundSizeZ)
|
||||
{
|
||||
PxU32 xBlock, yBlock, zBlock;
|
||||
Gu::idToXYZ(id, backgroundSizeX, backgroundSizeY, xBlock, yBlock, zBlock);
|
||||
|
||||
const PxU32 index = Gu::idx3D(xBlock * cellsPerSubgrid, yBlock * cellsPerSubgrid, zBlock * cellsPerSubgrid, width + 1, height + 1);
|
||||
backgroundSDF[id] = sdf[index];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern "C" __global__ void
|
||||
__launch_bounds__(PxgBVHKernelBlockDim::BUILD_SDF, 1)
|
||||
sdfMarkRequiredSdfSubgrids(PxReal* PX_RESTRICT backgroundSDF, const PxReal* PX_RESTRICT sdf, PxU32* PX_RESTRICT subgridInfo, PxU8* PX_RESTRICT subgridActive, PxU32 cellsPerSubgrid, PxU32 width, PxU32 height, PxU32 depth,
|
||||
PxU32 backgroundSizeX, PxU32 backgroundSizeY, PxU32 backgroundSizeZ, PxReal narrowBandThickness, PxReal* subgridGlobalMinValue, PxReal* subgridGlobalMaxValue, PxReal errorThreshold)
|
||||
{
|
||||
__shared__ PxReal sharedMemoryX[PxgBVHKernelBlockDim::BUILD_SDF / WARP_SIZE];
|
||||
__shared__ PxReal sharedMemoryY[PxgBVHKernelBlockDim::BUILD_SDF / WARP_SIZE];
|
||||
__shared__ PxReal sharedMemoryZ[PxgBVHKernelBlockDim::BUILD_SDF / WARP_SIZE];
|
||||
|
||||
Gu::DenseSDF coarseEval(backgroundSizeX, backgroundSizeY, backgroundSizeZ, backgroundSDF); //TODO: Replace with 3d texture?
|
||||
PxReal s = 1.0f / cellsPerSubgrid;
|
||||
|
||||
//A subgrid has pow3(cellsPerSubgrid) cells but pow3(cellsPerSubgrid + 1) samples
|
||||
PxU32 numSamplesPerSubgrid = pow3(cellsPerSubgrid + 1);
|
||||
|
||||
PxReal sdfMin = FLT_MAX;
|
||||
PxReal sdfMax = -FLT_MAX;
|
||||
PxReal maxAbsError = 0.0f;
|
||||
|
||||
for (PxU32 i = threadIdx.x; i < numSamplesPerSubgrid; i += blockDim.x)
|
||||
{
|
||||
PxU32 xLocal, yLocal, zLocal;
|
||||
Gu::idToXYZ(i, cellsPerSubgrid + 1, cellsPerSubgrid + 1, xLocal, yLocal, zLocal);
|
||||
|
||||
PxU32 x = blockIdx.x * cellsPerSubgrid + xLocal;
|
||||
PxU32 y = blockIdx.y * cellsPerSubgrid + yLocal;
|
||||
PxU32 z = blockIdx.z * cellsPerSubgrid + zLocal;
|
||||
|
||||
const PxU32 index = Gu::idx3D(x, y, z, width + 1, height + 1);
|
||||
PxReal sdfValue = sdf[index];
|
||||
|
||||
sdfMin = PxMin(sdfMin, sdfValue);
|
||||
sdfMax = PxMax(sdfMax, sdfValue);
|
||||
|
||||
maxAbsError = PxMax(maxAbsError, PxAbs(sdfValue - coarseEval.sampleSDFDirect(PxVec3(blockIdx.x + xLocal * s, blockIdx.y + yLocal * s, blockIdx.z + zLocal * s))));
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
sdfMin = blockReduction<MinOpFloat, PxReal>(FULL_MASK, sdfMin, FLT_MAX, blockDim.x, sharedMemoryX);
|
||||
sdfMax = blockReduction<MaxOpFloat, PxReal>(FULL_MASK, sdfMax, -FLT_MAX, blockDim.x, sharedMemoryY);
|
||||
maxAbsError = blockReduction<MaxOpFloat, PxReal>(FULL_MASK, maxAbsError, -FLT_MAX, blockDim.x, sharedMemoryZ);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
bool subgridRequired = rangesOverlaps(sdfMin, sdfMax, -narrowBandThickness, narrowBandThickness);
|
||||
if (maxAbsError < errorThreshold)
|
||||
subgridRequired = false; //No need for a subgrid if the coarse SDF is already almost exact
|
||||
|
||||
PxU32 index = Gu::idx3D(blockIdx.x, blockIdx.y, blockIdx.z, backgroundSizeX - 1, backgroundSizeY - 1);
|
||||
|
||||
if (subgridRequired)
|
||||
{
|
||||
AtomicMin(subgridGlobalMinValue, sdfMin);
|
||||
AtomicMax(subgridGlobalMaxValue, sdfMax);
|
||||
|
||||
subgridInfo[index] = 1;
|
||||
subgridActive[index] = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
subgridInfo[index] = 0;
|
||||
subgridActive[index] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ void storeQuantized(void* PX_RESTRICT destination, PxU32 index, PxReal vNormalized, PxU32 bytesPerSubgridPixel)
|
||||
{
|
||||
switch (bytesPerSubgridPixel)
|
||||
{
|
||||
case 1:
|
||||
{
|
||||
PxU8* ptr8 = reinterpret_cast<PxU8*>(destination);
|
||||
ptr8[index] = PxU8(255.0f * PxClamp(vNormalized, 0.0f, 1.0f));
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
{
|
||||
PxU16* ptr16 = reinterpret_cast<PxU16*>(destination);
|
||||
ptr16[index] = PxU16(65535.0f * PxClamp(vNormalized, 0.0f, 1.0f));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__
|
||||
__launch_bounds__(PxgBVHKernelBlockDim::BUILD_SDF, 1)
|
||||
void sdfPopulateSdfSubgrids(const PxReal* PX_RESTRICT denseSDF, PxU32 width, PxU32 height, PxU32 depth, PxU32* PX_RESTRICT subgridInfo, PxU8* PX_RESTRICT subgridActive, PxU32 subgridSize, PxU32 w, PxU32 h, PxU32 d,
|
||||
void* PX_RESTRICT quantizedSparseSDFIn3DTextureFormat, PxU32 numSubgridsX, PxU32 numSubgridsY, PxU32 numSubgridsZ, const PxReal* subgridsMinSdfValue,
|
||||
const PxReal* subgridsMaxSdfValue, PxU32 bytesPerSubgridPixel, PxU32 outputSize)
|
||||
{
|
||||
const PxU32 idx = Gu::idx3D(blockIdx.x, blockIdx.y, blockIdx.z, w, h);
|
||||
//if (idx >= w*h*d)
|
||||
// printf("out of range 1\n");
|
||||
|
||||
const PxU32 addressInfo = subgridInfo[idx];
|
||||
|
||||
__syncthreads(); //Make sure that all threads in thread block have read the addressInfo
|
||||
|
||||
if (subgridActive[idx] == 0)
|
||||
{
|
||||
subgridInfo[idx] = 0xFFFFFFFF;
|
||||
return; //Subgrid does not need to be created
|
||||
}
|
||||
|
||||
//if (addressInfo == 0xFFFFFFFF)
|
||||
// printf("address %i %i %i %i\n", addressInfo, PxU32(activeSubgrids[idx]), w, h);
|
||||
|
||||
PxU32 addressX, addressY, addressZ;
|
||||
Gu::idToXYZ(addressInfo, numSubgridsX, numSubgridsY, addressX, addressY, addressZ);
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
subgridInfo[idx] = Gu::encodeTriple(addressX, addressY, addressZ);
|
||||
|
||||
//if (addressX >= numSubgridsX || addressY >= numSubgridsY || addressZ >= numSubgridsZ)
|
||||
// printf("kernel, subgrid index out of bounds %i %i %i %i\n", addressX, addressY, addressZ, addressInfo);
|
||||
|
||||
addressX *= (subgridSize + 1);
|
||||
addressY *= (subgridSize + 1);
|
||||
addressZ *= (subgridSize + 1);
|
||||
|
||||
//A subgrid has pow3(subgridSize) cells but pow3(subgridSize + 1) samples
|
||||
PxU32 numSamplesPerSubgrid = pow3(subgridSize + 1);
|
||||
|
||||
PxU32 tex3DsizeX = numSubgridsX * (subgridSize + 1);
|
||||
PxU32 tex3DsizeY = numSubgridsY * (subgridSize + 1);
|
||||
//PxU32 tex3DsizeZ = numSubgridsZ * (subgridSize + 1);
|
||||
|
||||
for (PxU32 i = threadIdx.x; i < numSamplesPerSubgrid; i += blockDim.x)
|
||||
{
|
||||
PxU32 xLocal, yLocal, zLocal;
|
||||
Gu::idToXYZ(i, subgridSize + 1, subgridSize + 1, xLocal, yLocal, zLocal);
|
||||
|
||||
const PxU32 index = Gu::idx3D(
|
||||
blockIdx.x * subgridSize + xLocal,
|
||||
blockIdx.y * subgridSize + yLocal,
|
||||
blockIdx.z * subgridSize + zLocal,
|
||||
width + 1, height + 1);
|
||||
|
||||
/*if(index >= (width+1)*(height + 1)*(depth + 1))
|
||||
printf("out of range 2\n");*/
|
||||
|
||||
PxReal sdfValue = denseSDF[index];
|
||||
PxU32 outputIndex = Gu::idx3D(addressX + xLocal, addressY + yLocal, addressZ + zLocal, tex3DsizeX, tex3DsizeY);
|
||||
|
||||
if (outputIndex * bytesPerSubgridPixel < outputSize)
|
||||
{
|
||||
if (bytesPerSubgridPixel == 4)
|
||||
{
|
||||
PxReal* ptr32 = reinterpret_cast<PxReal*>(quantizedSparseSDFIn3DTextureFormat);
|
||||
ptr32[outputIndex] = sdfValue;
|
||||
}
|
||||
else
|
||||
{
|
||||
PxReal s = 1.0f / (subgridsMaxSdfValue[0] - subgridsMinSdfValue[0]);
|
||||
PxReal vNormalized = (sdfValue - subgridsMinSdfValue[0]) * s;
|
||||
storeQuantized(quantizedSparseSDFIn3DTextureFormat, outputIndex, vNormalized, bytesPerSubgridPixel);
|
||||
}
|
||||
}
|
||||
/*else
|
||||
{
|
||||
printf("out of range %i %i %i %i %i %i %i %i %i %i\n", addressX, xLocal, addressY, yLocal, addressZ, zLocal, bytesPerSubgridPixel, outputIndex, addressInfo, PxU32(activeSubgrids[idx]));
|
||||
}*/
|
||||
}
|
||||
//__syncthreads();
|
||||
}
|
||||
|
||||
__device__ void findHoles(const PxReal* PX_RESTRICT sdf, const PxU32 width, const PxU32 height, const PxU32 depth, const PxVec3 cellSize,
|
||||
PxU32* atomicCounter, const Gu::GridQueryPointSampler* sampler, float4* PX_RESTRICT itemLowers, float4* PX_RESTRICT itemUppers, PxU32 capacity)
|
||||
{
|
||||
PxI32 id = ((blockIdx.x * blockDim.x) + threadIdx.x);
|
||||
|
||||
bool valueChanged = false;
|
||||
PxReal newValue = 0.0f;
|
||||
PxU32 px, py, pz;
|
||||
|
||||
if (id < width * height * depth)
|
||||
{
|
||||
PxReal initialValue = sdf[id];
|
||||
newValue = PxAbs(initialValue);
|
||||
|
||||
Gu::idToXYZ(id, width, height, px, py, pz);
|
||||
|
||||
for (PxU32 z = PxMax(1u, pz) - 1; z <= PxMin(depth - 1, pz + 1); ++z)
|
||||
for (PxU32 y = PxMax(1u, py) - 1; y <= PxMin(height - 1, py + 1); ++y)
|
||||
for (PxU32 x = PxMax(1u, px) - 1; x <= PxMin(width - 1, px + 1); ++x)
|
||||
{
|
||||
if (x == px && y == py && z == pz)
|
||||
continue;
|
||||
|
||||
PxU32 index = Gu::idx3D(x, y, z, width, height);
|
||||
if (index >= width * height * depth)
|
||||
continue;
|
||||
|
||||
PxReal value = sdf[index];
|
||||
|
||||
if (PxSign(initialValue) != PxSign(value))
|
||||
{
|
||||
PxReal distance = 0;
|
||||
if (x != px)
|
||||
distance += cellSize.x * cellSize.x;
|
||||
if (y != py)
|
||||
distance += cellSize.y * cellSize.y;
|
||||
if (z != pz)
|
||||
distance += cellSize.z * cellSize.z;
|
||||
|
||||
distance = PxSqrt(distance);
|
||||
|
||||
PxReal delta = PxAbs(value - initialValue);
|
||||
|
||||
if (0.99f * delta > distance)
|
||||
{
|
||||
PxReal scaling = distance / delta;
|
||||
PxReal v = 0.99f * scaling * initialValue;
|
||||
newValue = PxMin(newValue, PxAbs(v));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (initialValue < 0)
|
||||
newValue = -newValue;
|
||||
|
||||
valueChanged = newValue != initialValue;
|
||||
}
|
||||
|
||||
PxU32 outputIdx = globalScanExclusive<PxgBVHKernelBlockDim::SDF_FIX_HOLES / WARP_SIZE>(valueChanged, atomicCounter);
|
||||
|
||||
if (valueChanged && itemLowers && itemUppers)
|
||||
{
|
||||
const PxVec3 p = sampler->getPoint(px, py, pz);
|
||||
|
||||
assert(outputIdx < capacity);
|
||||
|
||||
itemLowers[outputIdx] = make_float4(p.x, p.y, p.z, 0.0f);
|
||||
itemUppers[outputIdx] = make_float4(px, py, pz, newValue);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__
|
||||
__launch_bounds__(PxgBVHKernelBlockDim::SDF_FIX_HOLES, 1)
|
||||
void sdfCountHoles(const PxReal* PX_RESTRICT sdf, const PxU32 width, const PxU32 height, const PxU32 depth, const PxVec3 cellSize,
|
||||
PxU32* atomicCounter)
|
||||
{
|
||||
findHoles(sdf, width, height, depth, cellSize, atomicCounter, NULL, NULL, NULL, 0);
|
||||
}
|
||||
|
||||
|
||||
//If the triangle, mesh which is used to compute the SDF, has a hole, then the sdf values near a sign change will not satisfy the eikonal equation
|
||||
//This kernel fixes those jumps along sign changes
|
||||
//Afterwards a jump flood algorithm can be used to fix the vincinity of the signe change
|
||||
extern "C" __global__
|
||||
__launch_bounds__(PxgBVHKernelBlockDim::SDF_FIX_HOLES, 1)
|
||||
void sdfFindHoles(const PxReal* PX_RESTRICT sdf, const PxU32 width, const PxU32 height, const PxU32 depth, const PxVec3 cellSize,
|
||||
PxU32* atomicCounter, const Gu::GridQueryPointSampler sampler,
|
||||
float4* PX_RESTRICT itemLowers, float4* PX_RESTRICT itemUppers, PxU32 capacity)
|
||||
{
|
||||
findHoles(sdf, width, height, depth, cellSize, atomicCounter, &sampler, itemLowers, itemUppers, capacity);
|
||||
}
|
||||
|
||||
extern "C" __global__ void sdfApplyHoleCorrections(PxReal* PX_RESTRICT sdf, PxU32 width, PxU32 height, PxU32 depth,
|
||||
Gu::GridQueryPointSampler sampler,
|
||||
PxVec4* PX_RESTRICT itemUppers, PxU32 numCorrections)
|
||||
{
|
||||
PxI32 id = ((blockIdx.x * blockDim.x) + threadIdx.x);
|
||||
|
||||
if (id < numCorrections)
|
||||
{
|
||||
PxVec4 upper = itemUppers[id];
|
||||
PxU32 x = PxU32(upper.x);
|
||||
PxU32 y = PxU32(upper.y);
|
||||
PxU32 z = PxU32(upper.z);
|
||||
|
||||
const PxVec3 p = sampler.getPoint(x, y, z);
|
||||
|
||||
sdf[Gu::idx3D(x, y, z, width, height)] = upper.w;
|
||||
|
||||
itemUppers[id] = PxVec4(p.x, p.y, p.z, upper.w);
|
||||
}
|
||||
}
|
||||
|
||||
//This can be launched on an existing SDF to fix distances given a point cloud where every leaf node was corrected due to a sign change in the SDF causing a gap in distance values larger than the cell size.
|
||||
//These kind of gaps can occur at places where the input triangle mesh has holes. Watertight meshes don't need this kind of post process repair.
|
||||
//The fast marching method or jump flood could be used as well to fix those defects but they need either many kernel launches or much more memory compared to the point cloud tree.
|
||||
extern "C" __global__ __launch_bounds__(256, 1) void sdfCalculateDenseGridPointCloud(PxgBVH bvh,
|
||||
Gu::GridQueryPointSampler sampler, PxU32 sizeX, PxU32 sizeY, PxU32 sizeZ, PxReal* PX_RESTRICT sdfData)
|
||||
{
|
||||
const PxU32 stackSize = 47;
|
||||
//__shared__ PxI32 stackMem[256 * stackSize];
|
||||
PxI32 stackMem[stackSize];
|
||||
|
||||
// block addressing
|
||||
const PxI32 x = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
const PxI32 y = blockIdx.y*blockDim.y + threadIdx.y;
|
||||
const PxI32 z = blockIdx.z*blockDim.z + threadIdx.z;
|
||||
|
||||
//const PxI32 threadId = threadIdx.z * 8 * 8 + threadIdx.y * 8 + threadIdx.x;
|
||||
|
||||
if (x < sizeX && y < sizeY && z < sizeZ)
|
||||
{
|
||||
const PxReal prevSdfValue = sdfData[Gu::idx3D(x, y, z, sizeX, sizeY)];
|
||||
|
||||
const PxVec3 p = sampler.getPoint(x, y, z);
|
||||
|
||||
PxI32* stack = &stackMem[/*stackSize * threadId*/0];
|
||||
|
||||
ClosestDistanceToPointCloudTraversalWithOffset distQuery(p, PxAbs(prevSdfValue));
|
||||
queryBVH(bvh, distQuery, stack, stackSize);
|
||||
|
||||
PxReal d = distQuery.mClosestDistance;
|
||||
if (d < PxAbs(prevSdfValue))
|
||||
{
|
||||
if (prevSdfValue < 0.0f)
|
||||
d = -d;
|
||||
|
||||
sdfData[Gu::idx3D(x, y, z, sizeX, sizeY)] = d;
|
||||
}
|
||||
}
|
||||
}
|
||||
438
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/algorithms.cu
vendored
Normal file
438
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/algorithms.cu
vendored
Normal file
@@ -0,0 +1,438 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
#include "cuda.h"
|
||||
#include "PxgAlgorithmsData.h"
|
||||
#include "stdio.h"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
extern "C" __host__ void initAlgorithmsKernels0() {}
|
||||
|
||||
struct int4x4
|
||||
{
|
||||
int4 data[4];
|
||||
};
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE int4x4 make_int16(const int4& a, const int4& b, const int4& c, const int4& d)
|
||||
{
|
||||
int4x4 result;
|
||||
result.data[0] = a;
|
||||
result.data[1] = b;
|
||||
result.data[2] = c;
|
||||
result.data[3] = d;
|
||||
return result;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE int4 operator+(const int4& lhs, const int4& rhs) { return make_int4(lhs.x + rhs.x, lhs.y + rhs.y, lhs.z + rhs.z, lhs.w + rhs.w); }
|
||||
|
||||
PX_FORCE_INLINE PX_CUDA_CALLABLE int4x4 operator+(const int4x4& lhs, const int4x4& rhs) { return make_int16(lhs.data[0] + rhs.data[0], lhs.data[1] + rhs.data[1], lhs.data[2] + rhs.data[2], lhs.data[3] + rhs.data[3]); }
|
||||
|
||||
PX_FORCE_INLINE __device__ int4 shfl_up_sync(PxU32 mask, int4 var, PxU32 delta, int width)
|
||||
{
|
||||
return make_int4(__shfl_up_sync(mask, var.x, delta, width), __shfl_up_sync(mask, var.y, delta, width), __shfl_up_sync(mask, var.z, delta, width), __shfl_up_sync(mask, var.w, delta, width));
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ int4x4 shfl_up_sync(PxU32 mask, int4x4 var, PxU32 delta, int width)
|
||||
{
|
||||
return make_int16(shfl_up_sync(mask, var.data[0], delta, width), shfl_up_sync(mask, var.data[1], delta, width), shfl_up_sync(mask, var.data[2], delta, width), shfl_up_sync(mask, var.data[3], delta, width));
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxU32 shfl_up_sync(PxU32 mask, PxU32 var, PxU32 delta, int width)
|
||||
{
|
||||
return __shfl_up_sync(mask, var, delta, width);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxI32 shfl_up_sync(PxU32 mask, PxI32 var, PxU32 delta, int width)
|
||||
{
|
||||
return __shfl_up_sync(mask, var, delta, width);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxU64 shfl_up_sync(PxU32 mask, PxU64 var, PxU32 delta, int width)
|
||||
{
|
||||
return __shfl_up_sync(mask, var, delta, width);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
PX_FORCE_INLINE __device__ T zero()
|
||||
{
|
||||
return T();
|
||||
}
|
||||
|
||||
template<>
|
||||
PX_FORCE_INLINE __device__ PxU32 zero<PxU32>()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<>
|
||||
PX_FORCE_INLINE __device__ int4 zero<int4>()
|
||||
{
|
||||
return make_int4(0, 0, 0, 0);
|
||||
}
|
||||
|
||||
template<>
|
||||
PX_FORCE_INLINE __device__ int4x4 zero<int4x4>()
|
||||
{
|
||||
return make_int16(make_int4(0, 0, 0, 0), make_int4(0, 0, 0, 0), make_int4(0, 0, 0, 0), make_int4(0, 0, 0, 0));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__device__ void warpScan(T& value, const int lane_id)
|
||||
{
|
||||
#pragma unroll
|
||||
for (int i = 1; i <= 32; i *= 2)
|
||||
{
|
||||
unsigned int mask = 0xffffffff;
|
||||
T n = shfl_up_sync(mask, value, i, 32);
|
||||
|
||||
if (lane_id >= i)
|
||||
value = value + n;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__device__ T scanPerBlock(
|
||||
T value,
|
||||
const PxU32 id,
|
||||
T* sum)
|
||||
{
|
||||
extern __shared__ PxU32 sumsMemory[];
|
||||
T* sums = reinterpret_cast<T*>(sumsMemory);
|
||||
int lane_id = id % warpSize;
|
||||
// determine a warp_id within a block
|
||||
int warp_id = threadIdx.x / warpSize;
|
||||
|
||||
// Now accumulate in log steps up the chain
|
||||
// compute sums, with another thread's value who is
|
||||
// distance delta away (i). Note
|
||||
// those threads where the thread 'i' away would have
|
||||
// been out of bounds of the warp are unaffected. This
|
||||
// creates the scan sum.
|
||||
|
||||
warpScan(value, lane_id);
|
||||
|
||||
// value now holds the scan value for the individual thread
|
||||
// next sum the largest values for each warp
|
||||
|
||||
__syncthreads(); //Required before accessing shared memory because this function can be called inside loops
|
||||
|
||||
// write the sum of the warp to smem
|
||||
if (threadIdx.x % warpSize == warpSize - 1)
|
||||
{
|
||||
sums[warp_id] = value;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
//
|
||||
// scan sum the warp sums
|
||||
// the same shfl scan operation, but performed on warp sums
|
||||
//
|
||||
if (warp_id == 0 && lane_id < (blockDim.x / warpSize))
|
||||
{
|
||||
T warp_sum = sums[lane_id];
|
||||
|
||||
int mask = (1 << (blockDim.x / warpSize)) - 1;
|
||||
for (int i = 1; i <= (blockDim.x / warpSize); i *= 2)
|
||||
{
|
||||
T n = shfl_up_sync(mask, warp_sum, i, (blockDim.x / warpSize));
|
||||
|
||||
if (lane_id >= i)
|
||||
warp_sum = warp_sum + n;
|
||||
}
|
||||
|
||||
sums[lane_id] = warp_sum;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
// perform a uniform add across warps in the block
|
||||
// read neighbouring warp's sum and add it to threads value
|
||||
T blockSum = zero<T>();
|
||||
|
||||
if (warp_id > 0)
|
||||
{
|
||||
blockSum = sums[warp_id - 1];
|
||||
}
|
||||
|
||||
value = value + blockSum;
|
||||
|
||||
// last thread has sum, write write out the block's sum
|
||||
if (sum != NULL && threadIdx.x == blockDim.x - 1)
|
||||
*sum = value;
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__device__ void scanPerBlockKernelShared(
|
||||
int id,
|
||||
const T* data,
|
||||
T* result,
|
||||
T* partialSums,
|
||||
const PxU32 length,
|
||||
const PxU32 exclusiveScan,
|
||||
T* totalSum)
|
||||
{
|
||||
T value = id < length ? data[id] : zero<T>();
|
||||
value = scanPerBlock(value, id, &partialSums[blockIdx.x]);
|
||||
if (totalSum && id == length - 1)
|
||||
*totalSum = value;
|
||||
|
||||
// Now write out our result
|
||||
if (id < length && result)
|
||||
{
|
||||
if (exclusiveScan == 0)
|
||||
result[id] = value;
|
||||
else
|
||||
{
|
||||
if (threadIdx.x + 1 < blockDim.x && id + 1 < length)
|
||||
result[id + 1] = value;
|
||||
if (threadIdx.x == 0)
|
||||
result[id] = zero<T>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ __launch_bounds__(1024, 1) void scanPerBlockKernel(
|
||||
const PxU32* data,
|
||||
PxU32* result,
|
||||
PxU32* partialSums,
|
||||
const PxU32 length,
|
||||
const PxU32 exclusiveScan,
|
||||
PxU32* totalSum)
|
||||
{
|
||||
scanPerBlockKernelShared<PxU32>((blockIdx.x * blockDim.x) + threadIdx.x, data, result, partialSums, length, exclusiveScan, totalSum);
|
||||
}
|
||||
|
||||
__device__ void exclusiveSumInt16(int4x4* values)
|
||||
{
|
||||
__syncthreads();
|
||||
PxU32* ptr = reinterpret_cast<PxU32*>(values);
|
||||
PxU32 value = threadIdx.x < 16 ? ptr[threadIdx.x] : 0;
|
||||
warpScan(value, threadIdx.x % warpSize);
|
||||
__syncthreads();
|
||||
if (threadIdx.x < 15)
|
||||
ptr[threadIdx.x + 1] = value;
|
||||
if (threadIdx.x == 15)
|
||||
ptr[0] = 0;
|
||||
}
|
||||
|
||||
extern "C" __global__ __launch_bounds__(512, 1) void scanPerBlockKernel4x4(
|
||||
const int4x4* data,
|
||||
int4x4* result,
|
||||
int4x4* partialSums,
|
||||
const PxU32 length,
|
||||
const PxU32 exclusiveScan,
|
||||
int4x4* totalSum)
|
||||
{
|
||||
int id = ((blockIdx.x * blockDim.x) + threadIdx.x);
|
||||
int4x4* r = NULL;
|
||||
if (id < length && result)
|
||||
{
|
||||
if (exclusiveScan == 0)
|
||||
r = &result[id];
|
||||
else
|
||||
{
|
||||
if (threadIdx.x + 1 < blockDim.x && id + 1 < length)
|
||||
r = &result[id + 1];
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
result[id].data[0] = zero<int4>();
|
||||
result[id].data[1] = zero<int4>();
|
||||
result[id].data[2] = zero<int4>();
|
||||
result[id].data[3] = zero<int4>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int4 value;
|
||||
#pragma unroll
|
||||
for (PxI32 i = 0; i < 4; ++i)
|
||||
{
|
||||
value = id < length ? data[id].data[i] : zero<int4>();
|
||||
value = scanPerBlock(value, id, &partialSums[blockIdx.x].data[i]);
|
||||
if (r)
|
||||
r->data[i] = value;
|
||||
if (totalSum && id == length - 1)
|
||||
totalSum->data[i] = value;
|
||||
}
|
||||
|
||||
if (totalSum && gridDim.x == 1)
|
||||
{
|
||||
exclusiveSumInt16(totalSum);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
__device__ void addBlockSumsKernelShared(const T* partialSums, T* data, const PxU32 len, T* totalSum)
|
||||
{
|
||||
const int id = ((blockIdx.x * blockDim.x) + threadIdx.x);
|
||||
|
||||
if (id >= len)
|
||||
return;
|
||||
|
||||
if (totalSum && id == len - 1)
|
||||
*totalSum = *totalSum + partialSums[blockIdx.x];
|
||||
|
||||
if (data)
|
||||
data[id] = data[id] + partialSums[blockIdx.x];
|
||||
}
|
||||
|
||||
extern "C" __global__ __launch_bounds__(1024, 1) void addBlockSumsKernel(const PxU32* partialSums, PxU32* data, const PxU32 length, PxU32* totalSum)
|
||||
{
|
||||
addBlockSumsKernelShared<PxU32>(partialSums, data, length, totalSum);
|
||||
}
|
||||
|
||||
extern "C" __global__ __launch_bounds__(1024, 1) void addBlockSumsKernel4x4(const int4x4* partialSums, int4x4* data, const PxU32 len, int4x4* totalSum)
|
||||
{
|
||||
const int id = ((blockIdx.x * blockDim.x) + threadIdx.x);
|
||||
|
||||
if (totalSum && id == len - 1)
|
||||
{
|
||||
(*totalSum).data[0] = (*totalSum).data[0] + partialSums[blockIdx.x].data[0];
|
||||
(*totalSum).data[1] = (*totalSum).data[1] + partialSums[blockIdx.x].data[1];
|
||||
(*totalSum).data[2] = (*totalSum).data[2] + partialSums[blockIdx.x].data[2];
|
||||
(*totalSum).data[3] = (*totalSum).data[3] + partialSums[blockIdx.x].data[3];
|
||||
}
|
||||
|
||||
if (data && id < len)
|
||||
{
|
||||
data[id].data[0] = data[id].data[0] + partialSums[blockIdx.x].data[0];
|
||||
data[id].data[1] = data[id].data[1] + partialSums[blockIdx.x].data[1];
|
||||
data[id].data[2] = data[id].data[2] + partialSums[blockIdx.x].data[2];
|
||||
data[id].data[3] = data[id].data[3] + partialSums[blockIdx.x].data[3];
|
||||
}
|
||||
|
||||
if (totalSum && blockIdx.x == gridDim.x - 1)
|
||||
{
|
||||
exclusiveSumInt16(totalSum);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__device__ void radixFourBitCountPerBlock(const T* data, PxU16* offsetsPerWarp, PxU32 passIndex, int4x4* partialSums, const PxU32 length, int4x4* totalSum)
|
||||
{
|
||||
int* totalSum1 = reinterpret_cast<int*>(totalSum);
|
||||
int* partialSums1 = reinterpret_cast<int*>(&partialSums[blockIdx.x]);
|
||||
|
||||
int id = ((blockIdx.x * blockDim.x) + threadIdx.x);
|
||||
int slot = 0;
|
||||
if (id < length)
|
||||
slot = (data[id] >> (passIndex * 4)) & 15;
|
||||
|
||||
PxU64 value;
|
||||
PxU64 partial;
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 3; ++i)
|
||||
{
|
||||
value = 0;
|
||||
if (id < length && slot < 6 && slot >= 0)
|
||||
{
|
||||
value = ((PxU64)1) << (slot * 10);
|
||||
}
|
||||
|
||||
value = scanPerBlock<PxU64>(value, id, &partial);
|
||||
if (threadIdx.x == blockDim.x - 1)
|
||||
{
|
||||
partialSums1[6 * i] = partial & 0x000003FF;
|
||||
partialSums1[6 * i + 1] = (partial >> 10) & 0x000003FF;
|
||||
partialSums1[6 * i + 2] = (partial >> 20) & 0x000003FF;
|
||||
partialSums1[6 * i + 3] = (partial >> 30) & 0x000003FF;
|
||||
if (i < 2)
|
||||
{
|
||||
partialSums1[6 * i + 4] = (partial >> 40) & 0x000003FF;
|
||||
partialSums1[6 * i + 5] = (partial >> 50) & 0x000003FF;
|
||||
}
|
||||
}
|
||||
|
||||
if (totalSum && id == length - 1)
|
||||
{
|
||||
totalSum1[6 * i] = value & 0x000003FF;
|
||||
totalSum1[6 * i + 1] = (value >> 10) & 0x000003FF;
|
||||
totalSum1[6 * i + 2] = (value >> 20) & 0x000003FF;
|
||||
totalSum1[6 * i + 3] = (value >> 30) & 0x000003FF;
|
||||
if (i < 2)
|
||||
{
|
||||
totalSum1[6 * i + 4] = (value >> 40) & 0x000003FF;
|
||||
totalSum1[6 * i + 5] = (value >> 50) & 0x000003FF;
|
||||
}
|
||||
}
|
||||
|
||||
if (id < length && slot < 6 && slot >= 0)
|
||||
offsetsPerWarp[id] = ((value >> (slot * 10)) & 0x000003FF) - 1;
|
||||
slot -= 6;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ __launch_bounds__(512, 1) void radixFourBitCountPerBlockKernel(const PxU32* data, PxU16* offsetsPerWarp, PxU32 passIndex, int4x4* partialSums, const PxU32 length, int4x4* totalSum)
|
||||
{
|
||||
radixFourBitCountPerBlock<PxU32>(data, offsetsPerWarp, passIndex, partialSums, length, totalSum);
|
||||
}
|
||||
|
||||
template<typename T, typename U>
|
||||
__device__ void radixFourBitReorder(const T* data, const PxU16* offsetsPerWarp, T* reordered, PxU32 passIndex, int4x4* partialSums, const PxU32 length, int4x4* cumulativeSum, U* dependentData = NULL, U* dependentDataReordered = NULL)
|
||||
{
|
||||
int* partialSums1 = reinterpret_cast<int*>(partialSums);
|
||||
|
||||
int id = ((blockIdx.x * blockDim.x) + threadIdx.x);
|
||||
if (id >= length)
|
||||
return;
|
||||
int* ptr = reinterpret_cast<int*>(cumulativeSum);
|
||||
int slot = (data[id] >> (passIndex * 4)) & 15;
|
||||
|
||||
int newIndex = ptr[slot] + offsetsPerWarp[id] + partialSums1[16 * blockIdx.x + slot];
|
||||
|
||||
if (newIndex < length) //This condition should always be met but in case everything goes wrong, it ensures that no out of bounds access happens
|
||||
{
|
||||
reordered[newIndex] = data[id];
|
||||
|
||||
if (dependentData && dependentDataReordered)
|
||||
dependentDataReordered[newIndex] = passIndex == 0 ? id : dependentData[id];
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ __launch_bounds__(1024, 1) void radixFourBitReorderKernel(const PxU32* data, const PxU16* offsetsPerWarp, PxU32* reordered, PxU32 passIndex, int4x4* partialSums, const PxU32 length, int4x4* cumulativeSum, PxU32* dependentData, PxU32* dependentDataReordered)
|
||||
{
|
||||
radixFourBitReorder<PxU32, PxU32>(data, offsetsPerWarp, reordered, passIndex, partialSums, length, cumulativeSum, dependentData, dependentDataReordered);
|
||||
}
|
||||
|
||||
extern "C" __global__ __launch_bounds__(1024, 1) void reorderKernel(const float4* data, float4* reordered, const PxU32 length, const PxU32* reorderedToOriginalMap)
|
||||
{
|
||||
int id = ((blockIdx.x * blockDim.x) + threadIdx.x);
|
||||
if (id >= length)
|
||||
return;
|
||||
|
||||
reordered[id] = data[reorderedToOriginalMap[id]];
|
||||
}
|
||||
|
||||
629
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/anisotropy.cu
vendored
Normal file
629
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/anisotropy.cu
vendored
Normal file
@@ -0,0 +1,629 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "vector_types.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
|
||||
#include "matrixDecomposition.cuh"
|
||||
#include "PxParticleGpu.h"
|
||||
#include "PxgAnisotropyData.h"
|
||||
#include "sparseGridStandalone.cuh"
|
||||
|
||||
#define ENABLE_KERNEL_LAUNCH_ERROR_CHECK 0
|
||||
|
||||
extern "C" __host__ void initAnisotropyKernels0() {}
|
||||
|
||||
__device__ inline PxVec3 PxLoad3(const float4& v) { float4 tmp = v; return PxVec3(tmp.x, tmp.y, tmp.z); }
|
||||
__device__ inline PxVec4 PxLoad4(const float4& v) { float4 tmp = v; return PxVec4(tmp.x, tmp.y, tmp.z, tmp.w); }
|
||||
|
||||
__device__ inline PxReal cube(PxReal x) { return x * x * x; }
|
||||
__device__ inline PxReal Wa(PxReal x, PxReal invr)
|
||||
{
|
||||
return 1.f - cube(x*invr);
|
||||
}
|
||||
template <typename V, typename T>
|
||||
__device__ inline V Lerp(const V& start, const V& end, const T& t)
|
||||
{
|
||||
return start + (end - start) * t;
|
||||
}
|
||||
template <typename V, typename T>
|
||||
__device__ inline V Clamp(const V& a, const T s, const T t) {
|
||||
return V(PxMin(t, PxMax(s, a[0])),
|
||||
PxMin(t, PxMax(s, a[1])),
|
||||
PxMin(t, PxMax(s, a[2])));
|
||||
}
|
||||
|
||||
//
|
||||
//extern "C" __global__ void smoothPositionsLaunch(PxU32* sortedOrder, PxU32* cellEnds, PxU float4* pose, PxU32* phase, float particleContactDistance)
|
||||
//{
|
||||
// const float4* const PX_RESTRICT sortedPose = reinterpret_cast<float4*>(particleSystem.mSortedPositions_InvMass);
|
||||
// const PxU32* const PX_RESTRICT sortedPhases = particleSystem.mSortedPhaseArray;
|
||||
//
|
||||
// const PxU32* const PX_RESTRICT cellStart = particleSystem.mCellStart;
|
||||
// const PxU32* const PX_RESTRICT cellEnd = particleSystem.mCellEnd;
|
||||
//
|
||||
// const PxReal particleContactDistanceSq = particleContactDistance * particleContactDistance;
|
||||
// const PxReal particleContactDistanceInv = 1.0f / particleContactDistance;
|
||||
//
|
||||
// // calculated the sum of weights and weighted avg position for particle neighborhood
|
||||
// PxVec3 xs(0.0f); //sum of positions
|
||||
// PxReal ws = 0.0f; //sum of weights
|
||||
// for (int z = -1; z <= 1; z++)
|
||||
// {
|
||||
// for (int y = -1; y <= 1; y++)
|
||||
// {
|
||||
// for (int x = -1; x <= 1; x++)
|
||||
// {
|
||||
// int3 neighbourPos = make_int3(gridPos.x + x, gridPos.y + y, gridPos.z + z);
|
||||
// PxU32 gridHash = calcGridHash(neighbourPos, gridSize);
|
||||
// PxU32 startIndex = cellStart[gridHash];
|
||||
// PxU32 endIndex = cellEnd[gridHash];
|
||||
//
|
||||
// if (startIndex != EMPTY_CELL)
|
||||
// {
|
||||
// PxU32 nextPhase = sortedPhases[startIndex];
|
||||
// float4 nextPos = fetch(&sortedPose[startIndex]);
|
||||
//
|
||||
// for (PxU32 particleIndex1 = startIndex; particleIndex1 < endIndex /*&& numCollidedParticles < maxNeighborhood*/; particleIndex1++)
|
||||
// {
|
||||
// const PxU32 phase2 = nextPhase;
|
||||
// const float4 pos2 = nextPos;
|
||||
//
|
||||
// if ((particleIndex1 + 1) < endIndex)
|
||||
// {
|
||||
// nextPhase = sortedPhases[particleIndex1 + 1];
|
||||
// nextPos = fetch(&sortedPose[particleIndex1 + 1]);
|
||||
// }
|
||||
//
|
||||
// if (phase2 & validPhaseMask)
|
||||
// {
|
||||
// const PxVec3 xj = PxLoad3(pos2);
|
||||
// const PxVec3 xij = xi - xj;
|
||||
//
|
||||
// const PxReal dsq = xij.magnitudeSquared();
|
||||
//
|
||||
// if (0.0f < dsq && dsq < particleContactDistanceSq)
|
||||
// {
|
||||
// const PxReal w = Wa(sqrtf(dsq), particleContactDistanceInv);
|
||||
// ws += w;
|
||||
// xs += xj * w;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if (ws > 0.f)
|
||||
// {
|
||||
// PxReal f = 4.0f*Wa(particleContactDistance*0.5f, particleContactDistanceInv);
|
||||
// PxReal smooth = PxMin(1.0f, ws / f)*smoothing;
|
||||
// xs /= ws;
|
||||
// xi = Lerp(xi, xs, smooth);
|
||||
// }
|
||||
//
|
||||
// //write smoothed positions back in API order
|
||||
// smoothedPositions[origIdx] = make_float4(xi.x, xi.y, xi.z, xi4.w);
|
||||
//}
|
||||
|
||||
// Smooths particle positions in a fluid by moving them closer to the weighted
|
||||
// average position of their local neighborhood
|
||||
extern "C" __global__ void smoothPositionsLaunch(PxGpuParticleSystem* particleSystems, const PxU32 id, PxSmoothedPositionData* smoothingDataPerParticleSystem)
|
||||
{
|
||||
PxGpuParticleSystem& particleSystem = particleSystems[id];
|
||||
|
||||
const PxU32 numParticles = particleSystem.mCommonData.mNumParticles;
|
||||
|
||||
PxSmoothedPositionData& data = smoothingDataPerParticleSystem[id];
|
||||
const PxReal smoothing = data.mSmoothing;
|
||||
|
||||
//pointers to global memory buffers -- inputs
|
||||
const float4* PX_RESTRICT sortedNewPositions = particleSystem.mSortedPositions_InvMass;
|
||||
const PxU32* PX_RESTRICT phases = particleSystem.mSortedPhaseArray;
|
||||
const PxU32* PX_RESTRICT collisionIndex = particleSystem.mCollisionIndex;
|
||||
const PxU32* PX_RESTRICT gridParticleIndices = particleSystem.mSortedToUnsortedMapping;
|
||||
|
||||
//pointers to smoothed position buffers -- outputs
|
||||
float4* PX_RESTRICT smoothPosOrig = reinterpret_cast<float4*>(data.mPositions); // particleSystem.mSmoothedOriginPos_InvMass;
|
||||
|
||||
const PxU32 globalThreadIdx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (globalThreadIdx >= numParticles)
|
||||
return; //skip thread if it's past the end of particle list
|
||||
|
||||
const PxU32 p = globalThreadIdx; //index of particle in sorted order
|
||||
const PxU32 origIdx = gridParticleIndices[globalThreadIdx];
|
||||
|
||||
const PxVec4 xi4 = PxLoad4(sortedNewPositions[p]);
|
||||
|
||||
//ignore smoothing for non-fluid particles & write original position
|
||||
if (!PxGetFluid(phases[p]))
|
||||
{
|
||||
PxVec4 x = PxLoad4(sortedNewPositions[p]);
|
||||
smoothPosOrig[origIdx] = make_float4(x.x, x.y, x.z, x.w);
|
||||
return;
|
||||
}
|
||||
|
||||
PxVec3 xi = PxVec3(xi4.x, xi4.y, xi4.z);
|
||||
|
||||
PxU32 contactCount = particleSystem.mParticleSelfCollisionCount[p];
|
||||
|
||||
// calculated the sum of weights and weighted avg position for particle neighborhood
|
||||
PxVec3 xs(0.0f); //sum of positions
|
||||
PxReal ws = 0.0f; //sum of weights
|
||||
for (PxU32 i = 0, offset = p; i < contactCount; ++i, offset += numParticles)
|
||||
{
|
||||
const PxU32 q = collisionIndex[offset];
|
||||
if (PxGetFluid(phases[q])) //ignore non-fluid particles
|
||||
{
|
||||
const PxVec3 xj = PxLoad3(sortedNewPositions[q]);
|
||||
const PxVec3 xij = xi - xj;
|
||||
|
||||
const PxReal dsq = xij.magnitudeSquared();
|
||||
|
||||
if (0.0f < dsq && dsq < particleSystem.mCommonData.mParticleContactDistanceSq)
|
||||
{
|
||||
const PxReal w = Wa(sqrtf(dsq), particleSystem.mCommonData.mParticleContactDistanceInv);
|
||||
ws += w;
|
||||
xs += xj * w;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ws > 0.f)
|
||||
{
|
||||
PxReal f = 4.0f*Wa(particleSystem.mCommonData.mParticleContactDistance*0.5f, particleSystem.mCommonData.mParticleContactDistanceInv);
|
||||
PxReal smooth = PxMin(1.0f, ws / f)*smoothing;
|
||||
xs /= ws;
|
||||
xi = Lerp(xi, xs, smooth);
|
||||
}
|
||||
|
||||
//write smoothed positions back in API order
|
||||
smoothPosOrig[origIdx] = make_float4(xi.x, xi.y, xi.z, xi4.w);
|
||||
}
|
||||
|
||||
// Calculates Eigen-decomposition of the particle covariance matrix according
|
||||
// to "Reconstructing Surfaces of Particle-Based Fluids Using Anisotropic Kernels"
|
||||
extern "C" __global__ void calculateAnisotropyLaunch(PxGpuParticleSystem* particleSystems, const PxU32 id, PxAnisotropyData* anisotropyDataPerParticleSystem)
|
||||
{
|
||||
PxGpuParticleSystem& particleSystem = particleSystems[id];
|
||||
|
||||
const PxU32 numParticles = particleSystem.mCommonData.mNumParticles;
|
||||
|
||||
//pointers to global memory buffers -- inputs
|
||||
const float4* PX_RESTRICT sortedNewPositions = particleSystem.mSortedPositions_InvMass;
|
||||
const PxU32* PX_RESTRICT phases = particleSystem.mSortedPhaseArray;
|
||||
const PxU32* PX_RESTRICT collisionIndex = particleSystem.mCollisionIndex;
|
||||
const PxU32* PX_RESTRICT gridParticleIndices = particleSystem.mSortedToUnsortedMapping;
|
||||
|
||||
const PxAnisotropyData& anisotropyData = anisotropyDataPerParticleSystem[id];
|
||||
float4* PX_RESTRICT q1 = reinterpret_cast<float4*>(anisotropyData.mAnisotropy_q1);
|
||||
float4* PX_RESTRICT q2 = reinterpret_cast<float4*>(anisotropyData.mAnisotropy_q2);
|
||||
float4* PX_RESTRICT q3 = reinterpret_cast<float4*>(anisotropyData.mAnisotropy_q3);
|
||||
|
||||
const PxU32 globalThreadIdx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (globalThreadIdx >= numParticles)
|
||||
return; //skip thread if it's past the end of particle list
|
||||
|
||||
const PxU32 p = globalThreadIdx; //index of particle in sorted order
|
||||
const PxU32 origIdx = gridParticleIndices[globalThreadIdx];
|
||||
|
||||
//ignore anisotropy for non-fluid particles
|
||||
if (!PxGetFluid(phases[p]))
|
||||
{
|
||||
float r = anisotropyData.mAnisotropyMin * particleSystem.mCommonData.mParticleContactDistance;
|
||||
q1[origIdx] = make_float4(1.0f, 0.0f, 0.0f, r);
|
||||
q2[origIdx] = make_float4(0.0f, 1.0f, 0.0f, r);
|
||||
q3[origIdx] = make_float4(0.0f, 0.0f, 1.0f, r);
|
||||
/*if (globalThreadIdx == 0)
|
||||
printf("PxGetFluid\n");*/
|
||||
return;
|
||||
}
|
||||
|
||||
const PxVec3 xi = PxLoad3(sortedNewPositions[p]);
|
||||
|
||||
PxU32 contactCount = particleSystem.mParticleSelfCollisionCount[p];
|
||||
|
||||
// calculated the sum of weights and weighted avg position for particle neighborhood
|
||||
PxVec3 xs(0.f); //sum of positions
|
||||
float ws = 0.f; //sum of weights
|
||||
|
||||
PxU32 nextQ;
|
||||
PxU32 nextNextQ;
|
||||
PxVec4 xj4Next;
|
||||
PxU32 nextPhase;
|
||||
|
||||
PxU32 offset = p;
|
||||
|
||||
if (contactCount > 0)
|
||||
{
|
||||
nextQ = collisionIndex[offset];
|
||||
xj4Next = PxLoad4(sortedNewPositions[nextQ]);
|
||||
nextPhase = phases[nextQ];
|
||||
|
||||
offset += numParticles;
|
||||
}
|
||||
if (contactCount > 1)
|
||||
{
|
||||
nextNextQ = collisionIndex[offset];
|
||||
offset += numParticles;
|
||||
}
|
||||
|
||||
for (PxU32 i = 0; i < contactCount; ++i, offset += numParticles)
|
||||
{
|
||||
const PxVec4 xj4 = xj4Next;
|
||||
const PxU32 phase2 = nextPhase;
|
||||
|
||||
if ((i + 1) < contactCount)
|
||||
{
|
||||
xj4Next = PxLoad4(sortedNewPositions[nextNextQ]);
|
||||
nextPhase = phases[nextNextQ];
|
||||
|
||||
nextQ = nextNextQ;
|
||||
|
||||
if ((i + 2) < contactCount)
|
||||
nextNextQ = collisionIndex[offset];
|
||||
|
||||
}
|
||||
if (PxGetFluid(phase2)) //ignore non-fluid particles
|
||||
{
|
||||
|
||||
const PxVec3 xj(xj4.x, xj4.y, xj4.z);
|
||||
const PxVec3 xij = xi - xj;
|
||||
|
||||
const PxReal dsq = xij.magnitudeSquared();
|
||||
|
||||
if (0.f < dsq && dsq < particleSystem.mCommonData.mParticleContactDistanceSq)
|
||||
{
|
||||
const PxReal w = Wa(sqrtf(dsq), particleSystem.mCommonData.mParticleContactDistanceInv);
|
||||
ws += w;
|
||||
xs += xj * w;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// set to radial and exit early in case of isolated particles
|
||||
if (ws == 0.0f)
|
||||
{
|
||||
float r = anisotropyData.mAnisotropyMin * particleSystem.mCommonData.mParticleContactDistance;
|
||||
q1[origIdx] = make_float4(1.0f, 0.0f, 0.0f, r);
|
||||
q2[origIdx] = make_float4(0.0f, 1.0f, 0.0f, r);
|
||||
q3[origIdx] = make_float4(0.0f, 0.0f, 1.0f, r);
|
||||
//if(globalThreadIdx==0)
|
||||
//printf("%i, ws\n", contactCount);
|
||||
return;
|
||||
}
|
||||
|
||||
//compute inverse sum weight and weight the average position
|
||||
float invWs = 1.f / ws;
|
||||
xs *= invWs;
|
||||
|
||||
PxMat33 covariance(PxVec3(0.0f), PxVec3(0.0f), PxVec3(0.0f));
|
||||
|
||||
offset = p;
|
||||
|
||||
if (contactCount > 0)
|
||||
{
|
||||
nextQ = collisionIndex[offset];
|
||||
xj4Next = PxLoad4(sortedNewPositions[nextQ]);
|
||||
nextPhase = phases[nextQ];
|
||||
|
||||
offset += numParticles;
|
||||
}
|
||||
if (contactCount > 1)
|
||||
{
|
||||
nextNextQ = collisionIndex[offset];
|
||||
offset += numParticles;
|
||||
}
|
||||
|
||||
// use weighted average position to calculate the covariance matrix
|
||||
for (PxU32 i = 0; i < contactCount; ++i, offset += numParticles)
|
||||
{
|
||||
const PxVec4 xj4 = xj4Next;
|
||||
const PxU32 phase2 = nextPhase;
|
||||
|
||||
if ((i + 1) < contactCount)
|
||||
{
|
||||
xj4Next = PxLoad4(sortedNewPositions[nextNextQ]);
|
||||
nextPhase = phases[nextNextQ];
|
||||
|
||||
nextQ = nextNextQ;
|
||||
|
||||
if ((i + 2) < contactCount)
|
||||
nextNextQ = collisionIndex[offset];
|
||||
|
||||
}
|
||||
if (PxGetFluid(phase2)) //ignore non-fluid particles
|
||||
{
|
||||
const PxVec3 xj(xj4.x, xj4.y, xj4.z);
|
||||
const PxVec3 xij = xi - xj;
|
||||
|
||||
const PxReal dsq = xij.magnitudeSquared();
|
||||
|
||||
if (0.f < dsq && dsq < particleSystem.mCommonData.mParticleContactDistanceSq)
|
||||
{
|
||||
const PxReal w = Wa(sqrtf(dsq), particleSystem.mCommonData.mParticleContactDistanceInv);
|
||||
const PxVec3 xjs = xj - xs;
|
||||
covariance += PxMat33::outer(w*xjs, xjs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
covariance *= invWs;
|
||||
|
||||
//calculate the eigen decomposition
|
||||
PxMat33 r;
|
||||
eigenDecomposition(covariance, r);
|
||||
|
||||
//sanitize the eigen values (diagonal of covariance matrix)
|
||||
covariance[0][0] = max(covariance[0][0], 0.f);
|
||||
covariance[1][1] = max(covariance[1][1], 0.f);
|
||||
covariance[2][2] = max(covariance[2][2], 0.f);
|
||||
|
||||
PxVec3 lambda(sqrtf(covariance[0][0]), sqrtf(covariance[1][1]), sqrtf(covariance[2][2]));
|
||||
//PxVec3 lambda(covariance[0][0], covariance[1][1], covariance[2][2]);
|
||||
|
||||
const float ks = anisotropyData.mAnisotropy;
|
||||
const float kmin = anisotropyData.mAnisotropyMin * particleSystem.mCommonData.mParticleContactDistance;
|
||||
const float kmax = anisotropyData.mAnisotropyMax * particleSystem.mCommonData.mParticleContactDistance;
|
||||
|
||||
lambda *= ks;
|
||||
lambda = Clamp(lambda, kmin, kmax);
|
||||
|
||||
//write out the anisotropy vectors
|
||||
q1[origIdx] = make_float4(r.column0.x, r.column0.y, r.column0.z, lambda.x);
|
||||
q2[origIdx] = make_float4(r.column1.x, r.column1.y, r.column1.z, lambda.y);
|
||||
q3[origIdx] = make_float4(r.column2.x, r.column2.y, r.column2.z, lambda.z);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
extern "C" __global__ __launch_bounds__(256, 1) void anisotropyKernel(const float4* const PX_RESTRICT deviceParticlePos,
|
||||
const PxU32* const PX_RESTRICT sortedToOriginalParticleIndex, const PxU32* const PX_RESTRICT sortedParticleToSubgrid, PxU32 maxNumSubgrids,
|
||||
const PxU32* const PX_RESTRICT subgridNeighbors, const PxU32* const PX_RESTRICT subgridEndIndices, int numParticles, PxU32* phases, PxU32 validPhaseMask,
|
||||
float4* q1, float4* q2, float4* q3, PxReal anisotropy, PxReal anisotropyMin, PxReal anisotropyMax, PxReal particleContactDistance)
|
||||
{
|
||||
PxI32 threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (threadIndex >= numParticles)
|
||||
return;
|
||||
|
||||
PxI32 pNr = sortedToOriginalParticleIndex[threadIndex];
|
||||
|
||||
PxU32 subgridIndex = sortedParticleToSubgrid[threadIndex];
|
||||
if (subgridIndex >= maxNumSubgrids || (phases && !(phases[pNr] & validPhaseMask)))
|
||||
{
|
||||
float r = anisotropyMin * particleContactDistance;
|
||||
q1[pNr] = make_float4(1.0f, 0.0f, 0.0f, r);
|
||||
q2[pNr] = make_float4(0.0f, 1.0f, 0.0f, r);
|
||||
q3[pNr] = make_float4(0.0f, 0.0f, 1.0f, r);
|
||||
return;
|
||||
}
|
||||
|
||||
PxVec3 xi = PxLoad3(deviceParticlePos[pNr]);
|
||||
|
||||
const PxReal particleContactDistanceSq = particleContactDistance * particleContactDistance;
|
||||
const PxReal particleContactDistanceInv = 1.0f / particleContactDistance;
|
||||
|
||||
// calculated the sum of weights and weighted avg position for particle neighborhood
|
||||
PxVec3 xs(0.f); //sum of positions
|
||||
float ws = 0.f; //sum of weights
|
||||
for (int z = -1; z <= 1; z++)
|
||||
{
|
||||
for (int y = -1; y <= 1; y++)
|
||||
{
|
||||
for (int x = -1; x <= 1; x++)
|
||||
{
|
||||
PxU32 n = subgridNeighborOffset(subgridNeighbors, subgridIndex, x, y, z);
|
||||
if (n == EMPTY_SUBGRID)
|
||||
continue;
|
||||
|
||||
int start = n == 0 ? 0 : subgridEndIndices[n - 1];
|
||||
int end = subgridEndIndices[n];
|
||||
for (int i = start; i < end; ++i)
|
||||
{
|
||||
int j = sortedToOriginalParticleIndex[i];
|
||||
if (phases && !(phases[j] & validPhaseMask))
|
||||
continue;
|
||||
|
||||
PxVec3 xj = PxLoad3(deviceParticlePos[j]);
|
||||
|
||||
const PxVec3 xij = xi - xj;
|
||||
|
||||
const PxReal dsq = xij.magnitudeSquared();
|
||||
|
||||
if (0.f < dsq && dsq < particleContactDistanceSq)
|
||||
{
|
||||
const PxReal w = Wa(sqrtf(dsq), particleContactDistanceInv);
|
||||
ws += w;
|
||||
xs += xj * w;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// set to radial and exit early in case of isolated particles
|
||||
if (ws == 0.0f)
|
||||
{
|
||||
float r = anisotropyMin * particleContactDistance;
|
||||
q1[pNr] = make_float4(1.0f, 0.0f, 0.0f, r);
|
||||
q2[pNr] = make_float4(0.0f, 1.0f, 0.0f, r);
|
||||
q3[pNr] = make_float4(0.0f, 0.0f, 1.0f, r);
|
||||
//if(globalThreadIdx==0)
|
||||
//printf("%i, ws\n", contactCount);
|
||||
return;
|
||||
}
|
||||
|
||||
//compute inverse sum weight and weight the average position
|
||||
float invWs = 1.f / ws;
|
||||
xs *= invWs;
|
||||
|
||||
PxMat33 covariance(PxVec3(0.0f), PxVec3(0.0f), PxVec3(0.0f));
|
||||
|
||||
|
||||
for (int z = -1; z <= 1; z++)
|
||||
{
|
||||
for (int y = -1; y <= 1; y++)
|
||||
{
|
||||
for (int x = -1; x <= 1; x++)
|
||||
{
|
||||
PxU32 n = subgridNeighborOffset(subgridNeighbors, subgridIndex, x, y, z);
|
||||
if (n == EMPTY_SUBGRID)
|
||||
continue;
|
||||
|
||||
int start = n == 0 ? 0 : subgridEndIndices[n - 1];
|
||||
int end = subgridEndIndices[n];
|
||||
for (int i = start; i < end; ++i)
|
||||
{
|
||||
int j = sortedToOriginalParticleIndex[i];
|
||||
if (phases && !(phases[j] & validPhaseMask))
|
||||
continue;
|
||||
|
||||
PxVec3 xj = PxLoad3(deviceParticlePos[j]);
|
||||
const PxVec3 xij = xi - xj;
|
||||
|
||||
const PxReal dsq = xij.magnitudeSquared();
|
||||
|
||||
if (0.f < dsq && dsq < particleContactDistanceSq)
|
||||
{
|
||||
const PxReal w = Wa(sqrtf(dsq), particleContactDistanceInv);
|
||||
const PxVec3 xjs = xj - xs;
|
||||
covariance += PxMat33::outer(w*xjs, xjs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
covariance *= invWs;
|
||||
|
||||
//calculate the eigen decomposition
|
||||
PxMat33 r;
|
||||
eigenDecomposition(covariance, r);
|
||||
|
||||
//sanitize the eigen values (diagonal of covariance matrix)
|
||||
covariance[0][0] = max(covariance[0][0], 0.f);
|
||||
covariance[1][1] = max(covariance[1][1], 0.f);
|
||||
covariance[2][2] = max(covariance[2][2], 0.f);
|
||||
|
||||
PxVec3 lambda(sqrtf(covariance[0][0]), sqrtf(covariance[1][1]), sqrtf(covariance[2][2]));
|
||||
//PxVec3 lambda(covariance[0][0], covariance[1][1], covariance[2][2]);
|
||||
|
||||
const float ks = anisotropy;
|
||||
const float kmin = anisotropyMin * particleContactDistance;
|
||||
const float kmax = anisotropyMax * particleContactDistance;
|
||||
|
||||
lambda *= ks;
|
||||
lambda = Clamp(lambda, kmin, kmax);
|
||||
|
||||
//write out the anisotropy vectors
|
||||
q1[pNr] = make_float4(r.column0.x, r.column0.y, r.column0.z, lambda.x);
|
||||
q2[pNr] = make_float4(r.column1.x, r.column1.y, r.column1.z, lambda.y);
|
||||
q3[pNr] = make_float4(r.column2.x, r.column2.y, r.column2.z, lambda.z);
|
||||
}
|
||||
|
||||
extern "C" __global__ void smoothPositionsKernel(float4* deviceParticlePos, PxU32* sortedToOriginalParticleIndex, PxU32* sortedParticleToSubgrid, PxU32 maxNumSubgrids,
|
||||
PxU32* subgridNeighbors, PxU32* subgridEndIndices, int numParticles, PxU32* phases, PxU32 validPhaseMask, float4* smoothPos, PxReal smoothing, PxReal particleContactDistance)
|
||||
{
|
||||
PxI32 threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (threadIndex >= numParticles)
|
||||
return;
|
||||
|
||||
PxI32 pNr = sortedToOriginalParticleIndex[threadIndex];
|
||||
float4 xi4 = deviceParticlePos[pNr];
|
||||
|
||||
PxU32 subgridIndex = sortedParticleToSubgrid[threadIndex];
|
||||
if (subgridIndex >= maxNumSubgrids || (phases && !(phases[pNr] & validPhaseMask)))
|
||||
{
|
||||
smoothPos[pNr] = xi4;
|
||||
return;
|
||||
}
|
||||
|
||||
PxVec3 xi = PxLoad3(deviceParticlePos[pNr]);
|
||||
|
||||
const PxReal particleContactDistanceSq = particleContactDistance * particleContactDistance;
|
||||
const PxReal particleContactDistanceInv = 1.0f / particleContactDistance;
|
||||
|
||||
// calculated the sum of weights and weighted avg position for particle neighborhood
|
||||
PxVec3 xs(0.0f); //sum of positions
|
||||
PxReal ws = 0.0f; //sum of weights
|
||||
for (int z = -1; z <= 1; z++)
|
||||
{
|
||||
for (int y = -1; y <= 1; y++)
|
||||
{
|
||||
for (int x = -1; x <= 1; x++)
|
||||
{
|
||||
PxU32 n = subgridNeighborOffset(subgridNeighbors, subgridIndex, x, y, z);
|
||||
if (n == EMPTY_SUBGRID)
|
||||
continue;
|
||||
|
||||
int start = n == 0 ? 0 : subgridEndIndices[n - 1];
|
||||
int end = subgridEndIndices[n];
|
||||
for (int i = start; i < end; ++i)
|
||||
{
|
||||
int j = sortedToOriginalParticleIndex[i];
|
||||
if (phases && !(phases[j] & validPhaseMask))
|
||||
continue;
|
||||
|
||||
PxVec3 xj = PxLoad3(deviceParticlePos[j]);
|
||||
|
||||
//Now do the actual calculation
|
||||
const PxVec3 xij = xi - xj;
|
||||
|
||||
const PxReal dsq = xij.magnitudeSquared();
|
||||
|
||||
if (0.0f < dsq && dsq < particleContactDistanceSq)
|
||||
{
|
||||
const PxReal w = Wa(sqrtf(dsq), particleContactDistanceInv);
|
||||
ws += w;
|
||||
xs += xj * w;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ws > 0.f)
|
||||
{
|
||||
PxReal f = 4.0f*Wa(particleContactDistance*0.5f, particleContactDistanceInv);
|
||||
PxReal smooth = PxMin(1.0f, ws / f)*smoothing;
|
||||
xs /= ws;
|
||||
xi = Lerp(xi, xs, smooth);
|
||||
}
|
||||
|
||||
//write smoothed positions back in API order
|
||||
smoothPos[pNr] = make_float4(xi.x, xi.y, xi.z, xi4.w);
|
||||
}
|
||||
|
||||
|
||||
213
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/attachments.cuh
vendored
Normal file
213
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/attachments.cuh
vendored
Normal file
@@ -0,0 +1,213 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __ATTACHMENTS_CUH__
|
||||
#define __ATTACHMENTS_CUH__
|
||||
|
||||
#include "foundation/PxVecMath.h"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
__device__ inline bool isConeLimitedEnabled(float maxAngle, float minDist, float maxDist)
|
||||
{
|
||||
return maxAngle >= 0.f || minDist >= 0.f || maxDist >= 0.f;
|
||||
}
|
||||
|
||||
/**
|
||||
* \param maxAngle The cone opening angle measured from the axis.
|
||||
* \param minDist Minimum distance measure from the cone tip.
|
||||
* \param maxDist Maximum distance measured from the cone tip.
|
||||
* \param relPos Position relative to the cone tip for which the error is computed (the minimum translation
|
||||
* vector to get from relPos to the allowed cone volume)
|
||||
*/
|
||||
__device__ inline PxVec3 computeConeLimitedError(float maxAngle, float minDist, float maxDist, const PxVec3& coneAxis, const PxVec3& relPos)
|
||||
{
|
||||
PxReal len = relPos.magnitude();
|
||||
|
||||
// angle constraint
|
||||
PxVec3 dir;
|
||||
if(maxAngle == 0.f)
|
||||
{
|
||||
dir = coneAxis;
|
||||
}
|
||||
else if(maxAngle > 0.f)
|
||||
{
|
||||
dir = (len > 1.0e-6f) ? (relPos / len) : coneAxis;
|
||||
const PxReal cosAngle = dir.dot(coneAxis);
|
||||
PxReal cosMaxAngle;
|
||||
PxReal sinMaxAngle;
|
||||
PxSinCos(maxAngle, sinMaxAngle, cosMaxAngle); // could be precomputed
|
||||
if(cosAngle < cosMaxAngle) // if theta > maxAngle
|
||||
{
|
||||
PxVec3 t1 = dir.cross(coneAxis);
|
||||
PxVec3 b1 = coneAxis.cross(t1).getNormalized();
|
||||
dir = cosMaxAngle * coneAxis + sinMaxAngle * b1; // new direction that is "maxAngle" deviated from the world axis.
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
dir = (len > 1.0e-6f) ? (relPos / len) : coneAxis;
|
||||
}
|
||||
|
||||
// length constraint
|
||||
len = PxClamp(len, minDist, maxDist >= 0.f ? maxDist : FLT_MAX);
|
||||
|
||||
return relPos - len * dir; // ideal relPos = len * dir
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxVec3 calculateAttachmentDeltaImpulsePGS(const float4& raXn0_biasW, const float4& raXn1_biasW,
|
||||
const float4& raXn2_biasW, const float4& velMultiplierXYZ_invMassW,
|
||||
const float4& low_high_limits, const float4& worldAxis_angle,
|
||||
const PxgVelocityPackPGS& vel0, const PxVec3& linVel1, PxReal invDt,
|
||||
PxReal biasFactor, PxVec3& deltaLinVel, PxVec3& deltaAngVel)
|
||||
{
|
||||
const PxVec3 raXn0 = PxVec3(raXn0_biasW.x, raXn0_biasW.y, raXn0_biasW.z);
|
||||
const PxVec3 raXn1 = PxVec3(raXn1_biasW.x, raXn1_biasW.y, raXn1_biasW.z);
|
||||
const PxVec3 raXn2 = PxVec3(raXn2_biasW.x, raXn2_biasW.y, raXn2_biasW.z);
|
||||
|
||||
// Compute the normal velocity of the constraint.
|
||||
|
||||
const PxReal velOfRigidAtAttachmentPointX = vel0.linVel.x + vel0.angVel.dot(raXn0);
|
||||
const PxReal velOfRigidAtAttachmentPointY = vel0.linVel.y + vel0.angVel.dot(raXn1);
|
||||
const PxReal velOfRigidAtAttachmentPointZ = vel0.linVel.z + vel0.angVel.dot(raXn2);
|
||||
|
||||
// Definition of errors is as follows
|
||||
// raXn0_biasW.w = ((rigidBodyCoM + comToPoint) - attachedPointLocation).x;
|
||||
// raXn1_biasW.w = ((rigidBodyCoM + comToPoint) - attachedPointLocation).y;
|
||||
// raXn2_biasW.w = ((rigidBodyCoM + comToPoint) - attachedPointLocation).z;
|
||||
const PxReal& positionErrorX = raXn0_biasW.w;
|
||||
const PxReal& positionErrorY = raXn1_biasW.w;
|
||||
const PxReal& positionErrorZ = raXn2_biasW.w;
|
||||
|
||||
// For bias see here https://box2d.org/files/ErinCatto_SequentialImpulses_GDC2006.pdf
|
||||
// Slide 22, Bias Impulse
|
||||
PxVec3 velError(linVel1.x - positionErrorX * biasFactor * invDt, linVel1.y - positionErrorY * biasFactor * invDt,
|
||||
linVel1.z - positionErrorZ * biasFactor * invDt);
|
||||
|
||||
if(isConeLimitedEnabled(worldAxis_angle.w, low_high_limits.x, low_high_limits.y))
|
||||
{
|
||||
// we don't understand why we need to scale by biasFactor in order to get the right error offset for the cone.
|
||||
PxReal weirdScale = invDt * biasFactor;
|
||||
PxVec3 posError = velError * (1.0f / weirdScale);
|
||||
const PxVec3 worldAxis(worldAxis_angle.x, worldAxis_angle.y, worldAxis_angle.z);
|
||||
posError = computeConeLimitedError(worldAxis_angle.w, low_high_limits.x, low_high_limits.y, worldAxis, posError);
|
||||
velError = posError * weirdScale;
|
||||
}
|
||||
|
||||
// deltaF for PGS: impulse
|
||||
const PxReal deltaF0 = (velError.x - velOfRigidAtAttachmentPointX) * velMultiplierXYZ_invMassW.x;
|
||||
const PxReal deltaF1 = (velError.y - velOfRigidAtAttachmentPointY) * velMultiplierXYZ_invMassW.y;
|
||||
const PxReal deltaF2 = (velError.z - velOfRigidAtAttachmentPointZ) * velMultiplierXYZ_invMassW.z;
|
||||
|
||||
const PxVec3 deltaImpulse = PxVec3(deltaF0, deltaF1, deltaF2);
|
||||
|
||||
const PxReal invMass0 = velMultiplierXYZ_invMassW.w;
|
||||
|
||||
deltaLinVel = deltaImpulse * invMass0;
|
||||
deltaAngVel = raXn0 * deltaF0 + raXn1 * deltaF1 + raXn2 * deltaF2;
|
||||
|
||||
return deltaImpulse;
|
||||
}
|
||||
|
||||
template <typename ConstraintType>
|
||||
PX_FORCE_INLINE __device__ PxVec3 calculateAttachmentDeltaImpulsePGS(PxU32 offset, const ConstraintType& constraint,
|
||||
const PxgVelocityPackPGS& vel0, const PxVec3& linVel1, PxReal invDt,
|
||||
PxReal biasFactor, PxVec3& deltaLinVel, PxVec3& deltaAngVel)
|
||||
{
|
||||
return calculateAttachmentDeltaImpulsePGS(constraint.raXn0_biasW[offset], constraint.raXn1_biasW[offset], constraint.raXn2_biasW[offset],
|
||||
constraint.velMultiplierXYZ_invMassW[offset], constraint.low_high_limits[offset],
|
||||
constraint.axis_angle[offset], vel0, linVel1, invDt, biasFactor, deltaLinVel, deltaAngVel);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxVec3 calculateAttachmentDeltaImpulseTGS(const float4& raXn0_biasW, const float4& raXn1_biasW,
|
||||
const float4& raXn2_biasW, const float4& velMultiplierXYZ_invMassW,
|
||||
const float4& low_high_limits, const float4& worldAxis_angle,
|
||||
const PxgVelocityPackTGS& vel0, const PxVec3& linDelta1, PxReal dt,
|
||||
PxReal biasCoefficient, bool isVelocityIteration, PxVec3& deltaLinVel,
|
||||
PxVec3& deltaAngVel)
|
||||
{
|
||||
const PxVec3 raXn0 = PxVec3(raXn0_biasW.x, raXn0_biasW.y, raXn0_biasW.z);
|
||||
const PxVec3 raXn1 = PxVec3(raXn1_biasW.x, raXn1_biasW.y, raXn1_biasW.z);
|
||||
const PxVec3 raXn2 = PxVec3(raXn2_biasW.x, raXn2_biasW.y, raXn2_biasW.z);
|
||||
|
||||
const PxReal velOfRigidAtAttachmentPoint0 = vel0.linVel.x + vel0.angVel.dot(raXn0);
|
||||
const PxReal velOfRigidAtAttachmentPoint1 = vel0.linVel.y + vel0.angVel.dot(raXn1);
|
||||
const PxReal velOfRigidAtAttachmentPoint2 = vel0.linVel.z + vel0.angVel.dot(raXn2);
|
||||
|
||||
const PxVec3 linDelta = linDelta1 - vel0.linDelta;
|
||||
|
||||
// Definition of errors is as follows
|
||||
// raXn0_biasW.w = ((rigidBodyCoM + comToPoint) - attachedPointLocation).x;
|
||||
// raXn1_biasW.w = ((rigidBodyCoM + comToPoint) - attachedPointLocation).y;
|
||||
// raXn2_biasW.w = ((rigidBodyCoM + comToPoint) - attachedPointLocation).z;
|
||||
const PxReal& positionErrorX = raXn0_biasW.w;
|
||||
const PxReal& positionErrorY = raXn1_biasW.w;
|
||||
const PxReal& positionErrorZ = raXn2_biasW.w;
|
||||
|
||||
// This is a position error as well (as opposed to a velocity error for PGS)
|
||||
PxVec3 tgsError(linDelta.x - positionErrorX - vel0.angDelta.dot(raXn0), linDelta.y - positionErrorY - vel0.angDelta.dot(raXn1),
|
||||
linDelta.z - positionErrorZ - vel0.angDelta.dot(raXn2));
|
||||
|
||||
if(isConeLimitedEnabled(worldAxis_angle.w, low_high_limits.x, low_high_limits.y))
|
||||
{
|
||||
const PxVec3 worldAxis(worldAxis_angle.x, worldAxis_angle.y, worldAxis_angle.z);
|
||||
tgsError = computeConeLimitedError(worldAxis_angle.w, low_high_limits.x, low_high_limits.y, worldAxis, tgsError);
|
||||
}
|
||||
|
||||
const PxReal velDt = isVelocityIteration ? 0.f : dt;
|
||||
|
||||
// deltaF for TGS: position delta multiplied by effective inertia
|
||||
|
||||
// Bias coefficient is already multiplied by dt
|
||||
// Bias seems to kind of act like damping
|
||||
const PxReal deltaF0 = (tgsError.x - velOfRigidAtAttachmentPoint0 * velDt) * velMultiplierXYZ_invMassW.x * biasCoefficient;
|
||||
const PxReal deltaF1 = (tgsError.y - velOfRigidAtAttachmentPoint1 * velDt) * velMultiplierXYZ_invMassW.y * biasCoefficient;
|
||||
const PxReal deltaF2 = (tgsError.z - velOfRigidAtAttachmentPoint2 * velDt) * velMultiplierXYZ_invMassW.z * biasCoefficient;
|
||||
|
||||
// const PxVec3 deltaImpulse = (normal0 * deltaF0 + normal1 * deltaF1 + normal2 * deltaF2);
|
||||
const PxVec3 deltaImpulse = PxVec3(deltaF0, deltaF1, deltaF2);
|
||||
|
||||
deltaLinVel = deltaImpulse * velMultiplierXYZ_invMassW.w;
|
||||
deltaAngVel = raXn0 * deltaF0 + raXn1 * deltaF1 + raXn2 * deltaF2;
|
||||
|
||||
return deltaImpulse;
|
||||
}
|
||||
|
||||
template <typename ConstraintType>
|
||||
PX_FORCE_INLINE __device__ PxVec3 calculateAttachmentDeltaImpulseTGS(PxU32 offset, const ConstraintType& constraint,
|
||||
const PxgVelocityPackTGS& vel0, const PxVec3& linDelta1, PxReal dt,
|
||||
PxReal biasCoefficient, bool isVelocityIteration, PxVec3& deltaLinVel,
|
||||
PxVec3& deltaAngVel)
|
||||
{
|
||||
return calculateAttachmentDeltaImpulseTGS(constraint.raXn0_biasW[offset], constraint.raXn1_biasW[offset],
|
||||
constraint.raXn2_biasW[offset], constraint.velMultiplierXYZ_invMassW[offset],
|
||||
constraint.low_high_limits[offset], constraint.axis_angle[offset], vel0, linDelta1, dt,
|
||||
biasCoefficient, isVelocityIteration, deltaLinVel, deltaAngVel);
|
||||
}
|
||||
|
||||
#endif // __ATTACHMENTS_CUH__
|
||||
763
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/bvh.cuh
vendored
Normal file
763
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/bvh.cuh
vendored
Normal file
@@ -0,0 +1,763 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __CU_BVH_CUH__
|
||||
#define __CU_BVH_CUH__
|
||||
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxBounds3.h"
|
||||
#include "PxgBVH.h"
|
||||
#include "GuDistancePointTriangle.h"
|
||||
#include "foundation/PxMath.h"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
PX_FORCE_INLINE __device__ PxU32 part1by2(PxU32 n)
|
||||
{
|
||||
n = (n ^ (n << 16)) & 0xff0000ff;
|
||||
n = (n ^ (n << 8)) & 0x0300f00f;
|
||||
n = (n ^ (n << 4)) & 0x030c30c3;
|
||||
n = (n ^ (n << 2)) & 0x09249249;
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
// Takes values in the range [0, 1] and assigns an index based Morton codes of length 3*log2(Dim) bits
|
||||
template <PxI32 Dim>
|
||||
PX_FORCE_INLINE __device__ PxU32 morton3(PxReal x, PxReal y, PxReal z)
|
||||
{
|
||||
PxU32 ux = PxClamp(PxI32(x*Dim), 0, Dim - 1);
|
||||
PxU32 uy = PxClamp(PxI32(y*Dim), 0, Dim - 1);
|
||||
PxU32 uz = PxClamp(PxI32(z*Dim), 0, Dim - 1);
|
||||
|
||||
return (part1by2(uz) << 2) | (part1by2(uy) << 1) | part1by2(ux);
|
||||
}
|
||||
|
||||
|
||||
struct BvhTraversalControl
|
||||
{
|
||||
enum Enum
|
||||
{
|
||||
eDontGoDeeper,
|
||||
eGoDeeper,
|
||||
eGoDeeperLowerFirst,
|
||||
eAbort
|
||||
};
|
||||
};
|
||||
|
||||
template <typename Func>
|
||||
PX_FORCE_INLINE __device__ void queryBVH(const PxgBVH& bvh, Func& f, PxI32* stack, PxU32 stackSize)
|
||||
{
|
||||
if (bvh.mNumNodes == 0)
|
||||
return;
|
||||
|
||||
PxI32 index = *bvh.mRootNode;
|
||||
PxI32 count = 0;
|
||||
|
||||
const PxU32 maxIter = bvh.mMaxNodes;
|
||||
for(PxU32 iter = 0; iter < maxIter; ++iter)
|
||||
{
|
||||
// union to allow 128-bit loads
|
||||
//union { PxgPackedNodeHalf lower; float4 lowerf; };
|
||||
//union { PxgPackedNodeHalf upper; float4 upperf; };
|
||||
|
||||
PxgPackedNodeHalf lower = bvh.mNodeLowers[index];
|
||||
PxgPackedNodeHalf upper = bvh.mNodeUppers[index];
|
||||
//lowerf = tex1Dfetch<float4>(bvh.mNodeLowersTex, index);
|
||||
//upperf = tex1Dfetch<float4>(bvh.mNodeUppersTex, index);
|
||||
|
||||
BvhTraversalControl::Enum control = f(lower, upper, index);
|
||||
if (control == BvhTraversalControl::eAbort)
|
||||
break;
|
||||
if (!lower.b && (control == BvhTraversalControl::eGoDeeper || control == BvhTraversalControl::eGoDeeperLowerFirst))
|
||||
{
|
||||
if (control == BvhTraversalControl::eGoDeeperLowerFirst)
|
||||
{
|
||||
if(count < stackSize)
|
||||
stack[count++] = upper.i;
|
||||
index = lower.i; //index gets processed next - assign lower index to it
|
||||
}
|
||||
else
|
||||
{
|
||||
if (count < stackSize)
|
||||
stack[count++] = lower.i;
|
||||
index = upper.i; //index gets processed next - assign upper index to it
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (count == 0)
|
||||
break;
|
||||
index = stack[--count];
|
||||
}
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxgPackedNodeHalf makeNode(const PxVec3& bound, PxI32 child, bool leaf)
|
||||
{
|
||||
PxgPackedNodeHalf n;
|
||||
n.x = bound.x;
|
||||
n.y = bound.y;
|
||||
n.z = bound.z;
|
||||
n.i = (PxU32)child;
|
||||
n.b = (PxU32)(leaf ? 1 : 0);
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
// variation of makeNode through volatile pointers used in BuildHierarchy
|
||||
PX_FORCE_INLINE __device__ void makeNode(volatile PxgPackedNodeHalf* n, const PxVec3& bound, PxI32 child, bool leaf)
|
||||
{
|
||||
n->x = bound.x;
|
||||
n->y = bound.y;
|
||||
n->z = bound.z;
|
||||
n->i = (PxU32)child;
|
||||
n->b = (PxU32)(leaf ? 1 : 0);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxgPackedNodeHalf makeNode(const PxVec3& bound, PxReal w)
|
||||
{
|
||||
PxgPackedNodeHalf n;
|
||||
n.x = bound.x;
|
||||
n.y = bound.y;
|
||||
n.z = bound.z;
|
||||
|
||||
reinterpret_cast<float4&>(n).w = w;
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
// this bottom-up process assigns left and right children and combines bounds to form internal nodes
|
||||
// there is one thread launched per-leaf node, each thread calculates it's parent node and assigns
|
||||
// itself to either the left or right parent slot, the last child to complete the parent and moves
|
||||
// up the hierarchy
|
||||
template <typename Func>
|
||||
PX_FORCE_INLINE __device__ void buildHierarchy(PxI32 n, PxI32* root, PxU32* maxTreeDepth, const PxReal* PX_RESTRICT deltas, PxI32* PX_RESTRICT numChildren,
|
||||
volatile PxI32* PX_RESTRICT rangeLefts, volatile PxI32* PX_RESTRICT rangeRights, volatile PxgPackedNodeHalf* PX_RESTRICT lowers, volatile PxgPackedNodeHalf* PX_RESTRICT uppers, Func& f)
|
||||
{
|
||||
PxI32 index = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
|
||||
PxU32 maxDepth = 0;
|
||||
|
||||
if (index < n)
|
||||
{
|
||||
const PxI32 internalOffset = n;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
PxI32 left = rangeLefts[index];
|
||||
PxI32 right = rangeRights[index];
|
||||
|
||||
// check if we are the root node, if so then store out our index and terminate
|
||||
if (left == 0 && right == n - 1)
|
||||
{
|
||||
*root = index;
|
||||
*maxTreeDepth = maxDepth;
|
||||
break;
|
||||
}
|
||||
|
||||
PxI32 childCount = 0;
|
||||
|
||||
PxI32 parent;
|
||||
|
||||
if (left == 0 || (right != n - 1 && deltas[right] < deltas[left - 1]))
|
||||
{
|
||||
parent = right + internalOffset;
|
||||
|
||||
// set parent left child
|
||||
lowers[parent].i = index;
|
||||
rangeLefts[parent] = left;
|
||||
|
||||
childCount = atomicAdd(&numChildren[parent], 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
parent = left + internalOffset - 1;
|
||||
|
||||
// set parent right child
|
||||
uppers[parent].i = index;
|
||||
rangeRights[parent] = right;
|
||||
|
||||
childCount = atomicAdd(&numChildren[parent], 1);
|
||||
}
|
||||
|
||||
// ensure above writes are visible to all threads
|
||||
__threadfence();
|
||||
|
||||
// if we have are the last thread (such that the parent node is now complete)
|
||||
// then update its bounds and move onto the the next parent in the hierarchy
|
||||
if (childCount == 1)
|
||||
{
|
||||
++maxDepth;
|
||||
|
||||
const PxI32 leftChild = lowers[parent].i;
|
||||
const PxI32 rightChild = uppers[parent].i;
|
||||
|
||||
//TODO: float4 loads as in queries?
|
||||
volatile PxgPackedNodeHalf& lowerLeft = lowers[leftChild];
|
||||
PxVec3 leftLower = PxVec3(lowerLeft.x,
|
||||
lowerLeft.y,
|
||||
lowerLeft.z);
|
||||
|
||||
PxVec3 leftUpper = PxVec3(uppers[leftChild].x,
|
||||
uppers[leftChild].y,
|
||||
uppers[leftChild].z);
|
||||
|
||||
volatile PxgPackedNodeHalf& lowerRight = lowers[rightChild];
|
||||
PxVec3 rightLower = PxVec3(lowerRight.x,
|
||||
lowerRight.y,
|
||||
lowerRight.z);
|
||||
|
||||
PxVec3 rightUpper = PxVec3(uppers[rightChild].x,
|
||||
uppers[rightChild].y,
|
||||
uppers[rightChild].z);
|
||||
|
||||
// union of child bounds
|
||||
PxVec3 lower = leftLower.minimum(rightLower);
|
||||
PxVec3 upper = leftUpper.maximum(rightUpper);
|
||||
|
||||
// write new BVH nodes
|
||||
makeNode(lowers + parent, lower, leftChild, false);
|
||||
makeNode(uppers + parent, upper, rightChild, false);
|
||||
|
||||
//Allows to compute additional data per node
|
||||
f(parent, leftChild, lowerLeft, rightChild, lowerRight);
|
||||
|
||||
// move onto processing the parent
|
||||
index = parent;
|
||||
}
|
||||
else
|
||||
{
|
||||
// parent not ready (we are the first child), terminate thread
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct EmptyBuilder
|
||||
{
|
||||
PX_FORCE_INLINE __device__ EmptyBuilder() {}
|
||||
PX_FORCE_INLINE __device__ void operator()(PxI32 parentId, PxI32 childLeftId, volatile PxgPackedNodeHalf& childLeft, PxI32 childRightId, volatile PxgPackedNodeHalf& childRight)
|
||||
{}
|
||||
};
|
||||
|
||||
PX_FORCE_INLINE __device__ void buildHierarchy(PxI32 n, PxI32* root, PxU32* maxTreeDepth, const PxReal* PX_RESTRICT deltas, PxI32* PX_RESTRICT numChildren,
|
||||
volatile PxI32* PX_RESTRICT rangeLefts, volatile PxI32* PX_RESTRICT rangeRights, volatile PxgPackedNodeHalf* PX_RESTRICT lowers, volatile PxgPackedNodeHalf* PX_RESTRICT uppers)
|
||||
{
|
||||
EmptyBuilder e;
|
||||
buildHierarchy(n, root, maxTreeDepth, deltas, numChildren, rangeLefts, rangeRights, lowers, uppers, e);
|
||||
}
|
||||
|
||||
|
||||
|
||||
__device__ inline bool intersectRayAABBFast(const PxVec3& pos, const PxVec3& rcp_dir, const PxVec3& min, const PxVec3& max, PxReal& lmin, PxReal& lmax)
|
||||
{
|
||||
PxReal l1 = (min.x - pos.x) * rcp_dir.x;
|
||||
PxReal l2 = (max.x - pos.x) * rcp_dir.x;
|
||||
lmin = PxMin(l1, l2);
|
||||
lmax = PxMax(l1, l2);
|
||||
|
||||
l1 = (min.y - pos.y) * rcp_dir.y;
|
||||
l2 = (max.y - pos.y) * rcp_dir.y;
|
||||
lmin = PxMax(PxMin(l1, l2), lmin);
|
||||
lmax = PxMin(PxMax(l1, l2), lmax);
|
||||
|
||||
l1 = (min.z - pos.z) * rcp_dir.z;
|
||||
l2 = (max.z - pos.z) * rcp_dir.z;
|
||||
lmin = PxMax(PxMin(l1, l2), lmin);
|
||||
lmax = PxMin(PxMax(l1, l2), lmax);
|
||||
|
||||
//return ((lmax > 0.f) & (lmax >= lmin));
|
||||
//return ((lmax > 0.f) & (lmax > lmin));
|
||||
bool hit = ((lmax >= 0.f) & (lmax >= lmin));
|
||||
/*if (hit)
|
||||
t = lmin;*/
|
||||
return hit;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxU32 maxAbsDim(PxVec3 dir)
|
||||
{
|
||||
dir.x = PxAbs(dir.x);
|
||||
dir.y = PxAbs(dir.y);
|
||||
dir.z = PxAbs(dir.z);
|
||||
if (dir.x >= dir.y && dir.x >= dir.z)
|
||||
return 0;
|
||||
if (dir.y >= dir.x && dir.y >= dir.z)
|
||||
return 1;
|
||||
return 2;
|
||||
}
|
||||
|
||||
//Specialized implementation guaranteeing watertightness taken from paper "Watertight Ray/Triangle Intersection"
|
||||
//https://jcgt.org/published/0002/01/05/paper.pdf
|
||||
__device__ inline bool intersectRayTriTwoSidedWatertight(const PxVec3& org, const PxVec3& dir, const PxVec3& a,
|
||||
const PxVec3& b, const PxVec3& c, PxReal& t, PxReal& u, PxReal& v, PxReal& w)
|
||||
{
|
||||
//Claculate the dimension where the ray direction is maximal
|
||||
PxU32 kz = maxAbsDim(dir);
|
||||
PxU32 kx = kz + 1; if (kx == 3) kx = 0;
|
||||
PxU32 ky = kx + 1; if (ky == 3) ky = 0;
|
||||
|
||||
//Swap kx and ky dimension to preserve winding direction of triangles
|
||||
if (dir[kz] < 0.0f)
|
||||
PxSwap(kx, ky);
|
||||
|
||||
//Calculate shear constants
|
||||
PxReal Sx = dir[kx] / dir[kz];
|
||||
PxReal Sy = dir[ky] / dir[kz];
|
||||
PxReal Sz = 1.0f / dir[kz];
|
||||
|
||||
|
||||
//Calculate vertices relative to ray origin
|
||||
const PxVec3 A = a - org;
|
||||
const PxVec3 B = b - org;
|
||||
const PxVec3 C = c - org;
|
||||
|
||||
//Perform shear and scale of vertices
|
||||
const PxReal Ax = A[kx] - Sx * A[kz];
|
||||
const PxReal Ay = A[ky] - Sy * A[kz];
|
||||
const PxReal Bx = B[kx] - Sx * B[kz];
|
||||
const PxReal By = B[ky] - Sy * B[kz];
|
||||
const PxReal Cx = C[kx] - Sx * C[kz];
|
||||
const PxReal Cy = C[ky] - Sy * C[kz];
|
||||
|
||||
//Calculate scaled barycentric coordinates
|
||||
PxReal U = Cx * By - Cy * Bx;
|
||||
PxReal V = Ax * Cy - Ay * Cx;
|
||||
PxReal W = Bx * Ay - By * Ax;
|
||||
|
||||
//Fallback to test against edges using double precision
|
||||
//Happens only in about 1 case out of 1mio tests according to the paper "Watertight Ray/Triangle Intersection"
|
||||
if (U == 0.0f || V == 0.0f || W == 0.0f)
|
||||
{
|
||||
double CxBy = (double)Cx*(double)By;
|
||||
double CyBx = (double)Cy*(double)Bx;
|
||||
U = (PxReal)(CxBy - CyBx);
|
||||
double AxCy = (double)Ax*(double)Cy;
|
||||
double AyCx = (double)Ay*(double)Cx;
|
||||
V = (PxReal)(AxCy - AyCx);
|
||||
double BxAy = (double)Bx*(double)Ay;
|
||||
double ByAx = (double)By*(double)Ax;
|
||||
W = (PxReal)(BxAy - ByAx);
|
||||
}
|
||||
|
||||
//Perform edge tests. Moving this test before and at the end of the previous conditional gives higher performance
|
||||
if ((U < 0.0f || V < 0.0f || W < 0.0f) &&
|
||||
(U > 0.0f || V > 0.0f || W > 0.0f))
|
||||
return false;
|
||||
|
||||
//Calculate determinant
|
||||
PxReal det = U + V + W;
|
||||
if (det == 0.0f)
|
||||
return false;
|
||||
|
||||
//Calculate scaled z-coordinates of vertices and use them to calculate the hit distance
|
||||
const PxReal Az = Sz * A[kz];
|
||||
const PxReal Bz = Sz * B[kz];
|
||||
const PxReal Cz = Sz * C[kz];
|
||||
const PxReal T = U * Az + V * Bz + W * Cz;
|
||||
|
||||
//Normalize U, V, W and T
|
||||
const PxReal rcpDet = 1.0f / det;
|
||||
u = U * rcpDet;
|
||||
v = V * rcpDet;
|
||||
w = W * rcpDet;
|
||||
t = T * rcpDet;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PxReal __device__ windingNumberForTriangle(const PxVec3& triA, const PxVec3& triB, const PxVec3& triC, const PxVec3& queryPoint)
|
||||
{
|
||||
PxVec3 a = triA - queryPoint;
|
||||
PxVec3 b = triB - queryPoint;
|
||||
PxVec3 c = triC - queryPoint;
|
||||
|
||||
PxReal y = a.dot(b.cross(c));
|
||||
|
||||
PxReal la = a.magnitude();
|
||||
PxReal lb = b.magnitude();
|
||||
PxReal lc = c.magnitude();
|
||||
|
||||
PxReal x = (la * lb * lc + a.dot(b) * lc + b.dot(c) * la + c.dot(a) * lb);
|
||||
PxReal omega = PxAtan2(y, x);
|
||||
|
||||
return (0.5f / PxPi) * omega;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PxReal __device__ firstOrderClusterApproximation(const PxVec3& weightedCentroid, const PxVec3& weightedNormalSum,
|
||||
const PxVec3& evaluationPoint)
|
||||
{
|
||||
const PxVec3 dir = weightedCentroid - evaluationPoint;
|
||||
const PxReal l = dir.magnitude();
|
||||
return ((0.25f / PxPi) / (l * l * l)) * weightedNormalSum.dot(dir);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PxReal __device__ radiusOfSphereContainingSubSpheres(const PxVec3& newSphereCenter, const PxVec3& centerA, PxReal radiusA, const PxVec3& centerB, PxReal radiusB)
|
||||
{
|
||||
return PxMax((centerA - newSphereCenter).magnitude() + radiusA, (centerB - newSphereCenter).magnitude() + radiusB);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PxVec3 __device__ triangleNormal(const PxVec3& triA, const PxVec3& triB, const PxVec3& triC)
|
||||
{
|
||||
return (triB - triA).cross(triC - triA);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PxVec3 __device__ triangleCentroid(const PxVec3& triA, const PxVec3& triB, const PxVec3& triC)
|
||||
{
|
||||
const PxReal third = 1.0f / 3.0f;
|
||||
return third * (triA + triB + triC);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxgWindingClusterApproximation createWindingClusterApproximation(const PxVec3* PX_RESTRICT vertices, const PxU32* PX_RESTRICT triangle)
|
||||
{
|
||||
const PxVec3& triA = vertices[triangle[0]];
|
||||
const PxVec3& triB = vertices[triangle[1]];
|
||||
const PxVec3& triC = vertices[triangle[2]];
|
||||
|
||||
PxgWindingClusterApproximation result;
|
||||
result.mWeightedNormalSum = 0.5f * triangleNormal(triA, triB, triC);
|
||||
result.mAreaSum = result.mWeightedNormalSum.magnitude();
|
||||
result.mCentroidTimesArea = triangleCentroid(triA, triB, triC);
|
||||
result.mRadius = PxSqrt(PxMax(PxMax((triA - result.mCentroidTimesArea).magnitudeSquared(),
|
||||
(triB - result.mCentroidTimesArea).magnitudeSquared()), (triC - result.mCentroidTimesArea).magnitudeSquared()));
|
||||
result.mCentroidTimesArea = result.mAreaSum * result.mCentroidTimesArea;
|
||||
return result;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxVec3 clusterCentroid(const PxgWindingClusterApproximation& c)
|
||||
{
|
||||
return c.mCentroidTimesArea * (1.0f / c.mAreaSum);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ void combineClusters(const PxgWindingClusterApproximation& a, const PxgWindingClusterApproximation& b, PxgWindingClusterApproximation& result)
|
||||
{
|
||||
result.mWeightedNormalSum = a.mWeightedNormalSum + b.mWeightedNormalSum;
|
||||
result.mAreaSum = a.mAreaSum + b.mAreaSum;
|
||||
result.mCentroidTimesArea = a.mCentroidTimesArea + b.mCentroidTimesArea;
|
||||
result.mRadius = radiusOfSphereContainingSubSpheres(clusterCentroid(result), clusterCentroid(a), a.mRadius, clusterCentroid(b), b.mRadius); //This is a conservative approximation (meaning the radius might b a bit too big) but that's fine for the winding number algorithm
|
||||
}
|
||||
|
||||
//Clusters are not stored for child nodes!
|
||||
PX_FORCE_INLINE __device__ PxI32 getClusterIndex(PxI32 bvhNodeIndex, PxU32 numTriangles)
|
||||
{
|
||||
PxI32 result = bvhNodeIndex - numTriangles; //The tree is built such that the leave nodes are at the beginning of the array
|
||||
assert(result >= 0);
|
||||
assert(result < numTriangles);
|
||||
/*if (result < 0 || result >= numTriangles)
|
||||
printf("Winding cluster out of range access\n");*/
|
||||
return result;
|
||||
}
|
||||
|
||||
//Can be passed to the buildHierarchy method to build a winding number hierarchy simultaneously
|
||||
struct WindingClusterBuilder
|
||||
{
|
||||
PxgWindingClusterApproximation* PX_RESTRICT clusters;
|
||||
const PxVec3* PX_RESTRICT vertices;
|
||||
const PxU32* PX_RESTRICT indices;
|
||||
PxU32 numTriangles;
|
||||
|
||||
PX_FORCE_INLINE __device__ WindingClusterBuilder(PxgWindingClusterApproximation* PX_RESTRICT clusters, const PxVec3* PX_RESTRICT vertices, const PxU32* PX_RESTRICT indices, PxU32 numTriangles)
|
||||
: clusters(clusters), vertices(vertices), indices(indices), numTriangles(numTriangles)
|
||||
{
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ void operator()(PxI32 parentId, PxI32 childLeftId, volatile PxgPackedNodeHalf& childLeft, PxI32 childRightId, volatile PxgPackedNodeHalf& childRight)
|
||||
{
|
||||
PxgWindingClusterApproximation approxLeft = childLeft.b ? createWindingClusterApproximation(vertices, &indices[3 * childLeft.i]) : clusters[getClusterIndex(childLeftId, numTriangles)];
|
||||
PxgWindingClusterApproximation approxRight = childRight.b ? createWindingClusterApproximation(vertices, &indices[3 * childRight.i]) : clusters[getClusterIndex(childRightId, numTriangles)];
|
||||
combineClusters(approxLeft, approxRight, clusters[getClusterIndex(parentId, numTriangles)]);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct WindingNumberTraversal
|
||||
{
|
||||
public:
|
||||
PxReal mWindingNumber = 0;
|
||||
const PxU32* PX_RESTRICT mTriangles;
|
||||
PxU32 mNumTriangles;
|
||||
const PxVec3* PX_RESTRICT mPoints;
|
||||
const PxgWindingClusterApproximation* PX_RESTRICT mClusters;
|
||||
PxVec3 mQueryPoint;
|
||||
PxReal mDistanceThresholdBeta;
|
||||
|
||||
__device__ WindingNumberTraversal()
|
||||
{
|
||||
}
|
||||
|
||||
__device__ WindingNumberTraversal(const PxU32* PX_RESTRICT triangles, PxU32 numTriangles, const PxVec3* PX_RESTRICT points,
|
||||
const PxgWindingClusterApproximation* PX_RESTRICT clusters, const PxVec3& queryPoint, PxReal distanceThresholdBeta = 2.0f)
|
||||
: mTriangles(triangles), mNumTriangles(numTriangles), mPoints(points), mClusters(clusters), mQueryPoint(queryPoint), mDistanceThresholdBeta(distanceThresholdBeta)
|
||||
{
|
||||
}
|
||||
|
||||
__device__ inline BvhTraversalControl::Enum operator()(const PxgPackedNodeHalf& lower, const PxgPackedNodeHalf& upper, PxI32 nodeIndex)
|
||||
{
|
||||
if (lower.b)
|
||||
{
|
||||
const PxU32* tri = &mTriangles[3 * lower.i];
|
||||
mWindingNumber += windingNumberForTriangle(mPoints[tri[0]], mPoints[tri[1]], mPoints[tri[2]], mQueryPoint);
|
||||
return BvhTraversalControl::eDontGoDeeper;
|
||||
}
|
||||
const PxgWindingClusterApproximation& cluster = mClusters[getClusterIndex(nodeIndex, mNumTriangles)];
|
||||
const PxReal distSquared = (mQueryPoint - clusterCentroid(cluster)).magnitudeSquared();
|
||||
const PxReal threshold = mDistanceThresholdBeta * cluster.mRadius;
|
||||
if (distSquared > threshold * threshold)
|
||||
{
|
||||
mWindingNumber += firstOrderClusterApproximation(clusterCentroid(cluster), cluster.mWeightedNormalSum, mQueryPoint);
|
||||
return BvhTraversalControl::eDontGoDeeper;
|
||||
}
|
||||
return BvhTraversalControl::eGoDeeper;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
PX_FORCE_INLINE __device__ PxReal rayTriangleSign(const PxVec3& dir, const PxVec3& a,
|
||||
const PxVec3& b, const PxVec3& c, bool normalize)
|
||||
{
|
||||
PxVec3 ab = b - a;
|
||||
PxVec3 ac = c - a;
|
||||
PxVec3 n = ab.cross(ac);
|
||||
|
||||
if (normalize)
|
||||
{
|
||||
PxReal mag2 = n.magnitudeSquared();
|
||||
if (mag2 > 0.0f)
|
||||
n = n * (1.0f / PxSqrt(mag2));
|
||||
}
|
||||
|
||||
return -(dir.dot(n));
|
||||
}
|
||||
|
||||
struct ClosestRayIntersectionTraversal
|
||||
{
|
||||
const PxVec3* PX_RESTRICT meshVertices;
|
||||
const PxU32* PX_RESTRICT meshIndices;
|
||||
|
||||
const PxVec3 origin;
|
||||
const PxVec3 dir;
|
||||
const PxVec3 rcpDir;
|
||||
|
||||
PxReal closestT;
|
||||
PxReal closestDotProduct;
|
||||
bool includeNegativeRayDirection;
|
||||
bool closestPointOnTriangleEdge;
|
||||
|
||||
|
||||
__device__ inline ClosestRayIntersectionTraversal(const PxVec3* PX_RESTRICT meshVertices, const PxU32* PX_RESTRICT meshIndices, const PxVec3& start, const PxVec3& dir, bool includeNegativeRayDirection) :
|
||||
meshVertices(meshVertices), meshIndices(meshIndices),
|
||||
origin(start),
|
||||
dir(dir),
|
||||
rcpDir(1.0f / dir.x, 1.0f / dir.y, 1.0f / dir.z),
|
||||
closestT(FLT_MAX),
|
||||
closestDotProduct(0.0f),
|
||||
includeNegativeRayDirection(includeNegativeRayDirection),
|
||||
closestPointOnTriangleEdge(false)
|
||||
{
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ bool hasHit()
|
||||
{
|
||||
return closestT < FLT_MAX;
|
||||
}
|
||||
|
||||
__device__ inline BvhTraversalControl::Enum operator()(const PxgPackedNodeHalf& lower, const PxgPackedNodeHalf& upper, PxI32 nodeIndex)
|
||||
{
|
||||
PxReal t;
|
||||
if (lower.b)
|
||||
{
|
||||
// test each element of the rigid body mesh
|
||||
PxU32 tri = lower.i;
|
||||
PxVec3 a = meshVertices[meshIndices[tri * 3 + 0]];
|
||||
PxVec3 b = meshVertices[meshIndices[tri * 3 + 1]];
|
||||
PxVec3 c = meshVertices[meshIndices[tri * 3 + 2]];
|
||||
|
||||
PxReal u, v, w, s;
|
||||
PxVec3 n;
|
||||
|
||||
|
||||
if (intersectRayTriTwoSidedWatertight(origin, dir, a, b, c, t, u, v, w))
|
||||
{
|
||||
s = rayTriangleSign(dir, a, b, c, true);
|
||||
if (includeNegativeRayDirection)
|
||||
{
|
||||
if (t < 0.0f)
|
||||
{
|
||||
t = -t;
|
||||
s = -s;
|
||||
}
|
||||
}
|
||||
if (t > 0.0f && t < closestT)
|
||||
{
|
||||
closestT = t;
|
||||
closestDotProduct = s;
|
||||
closestPointOnTriangleEdge = u == 0.0f || v == 0.0f || w == 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
return BvhTraversalControl::eDontGoDeeper;
|
||||
}
|
||||
|
||||
//TODO: Does intersectRayAABBFast work for negative t?
|
||||
PxReal tMax;
|
||||
if (intersectRayAABBFast(origin, rcpDir, PxVec3(lower.x, lower.y, lower.z), PxVec3(upper.x, upper.y, upper.z), t, tMax))
|
||||
{
|
||||
if (includeNegativeRayDirection)
|
||||
{
|
||||
if (tMax < 0.0f)
|
||||
t = -tMax;
|
||||
}
|
||||
if (t < closestT)
|
||||
return BvhTraversalControl::eGoDeeper;
|
||||
}
|
||||
return BvhTraversalControl::eDontGoDeeper;
|
||||
}
|
||||
};
|
||||
|
||||
struct ClosestDistanceToTriangleMeshTraversal
|
||||
{
|
||||
public:
|
||||
const PxU32* PX_RESTRICT mTriangles;
|
||||
const PxVec3* PX_RESTRICT mPoints;
|
||||
PxVec3 mQueryPoint;
|
||||
PxReal mClosestDistanceSquared;
|
||||
|
||||
__device__ inline ClosestDistanceToTriangleMeshTraversal()
|
||||
{
|
||||
}
|
||||
|
||||
__device__ inline ClosestDistanceToTriangleMeshTraversal(const PxU32* PX_RESTRICT triangles, const PxVec3* PX_RESTRICT points, const PxVec3& queryPoint)
|
||||
: mTriangles(triangles), mPoints(points), mQueryPoint(queryPoint), mClosestDistanceSquared(100000000000.0f)
|
||||
{
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxReal distancePointBoxSquared(const PxVec3& minimum, const PxVec3& maximum, const PxVec3& point)
|
||||
{
|
||||
PxVec3 closestPt = minimum.maximum(maximum.minimum(point));
|
||||
return (closestPt - point).magnitudeSquared();
|
||||
}
|
||||
|
||||
__device__ inline BvhTraversalControl::Enum operator()(const PxgPackedNodeHalf& lower, const PxgPackedNodeHalf& upper, PxI32 nodeIndex)
|
||||
{
|
||||
if (distancePointBoxSquared(PxVec3(lower.x, lower.y, lower.z), PxVec3(upper.x, upper.y, upper.z), mQueryPoint) >= mClosestDistanceSquared)
|
||||
return BvhTraversalControl::eDontGoDeeper;
|
||||
|
||||
if (lower.b)
|
||||
{
|
||||
const PxU32* tri = &mTriangles[3 * lower.i];
|
||||
const PxVec3 a = mPoints[tri[0]];
|
||||
const PxVec3 b = mPoints[tri[1]];
|
||||
const PxVec3 c = mPoints[tri[2]];
|
||||
|
||||
//PxReal s, t;
|
||||
PxVec3 closestPt = Gu::closestPtPointTriangle2UnitBox(mQueryPoint, a, b, c); // closestPtPointTriangle(mQueryPoint, a, b, c, s, t);
|
||||
PxReal distSq = (closestPt - mQueryPoint).magnitudeSquared();
|
||||
if (distSq < mClosestDistanceSquared)
|
||||
{
|
||||
mClosestDistanceSquared = distSq;
|
||||
}
|
||||
|
||||
return BvhTraversalControl::eDontGoDeeper;
|
||||
}
|
||||
|
||||
return BvhTraversalControl::eGoDeeper;
|
||||
}
|
||||
};
|
||||
|
||||
//Evaluates the winding number and the closest distance in a single query. Might be faster in some scenarios than two separate queries.
|
||||
struct WindingNumberAndDistanceTraversal
|
||||
{
|
||||
public:
|
||||
const PxU32* PX_RESTRICT mTriangles;
|
||||
PxU32 mNumTriangles;
|
||||
PxReal mWindingNumber;
|
||||
const PxVec3* PX_RESTRICT mPoints;
|
||||
const PxgWindingClusterApproximation* mClusters;
|
||||
PxVec3 mQueryPoint;
|
||||
PxReal mDistanceThresholdBeta;
|
||||
|
||||
PxReal mClosestDistance;
|
||||
|
||||
__device__ WindingNumberAndDistanceTraversal()
|
||||
{
|
||||
}
|
||||
|
||||
__device__ WindingNumberAndDistanceTraversal(const PxU32* PX_RESTRICT triangles, PxU32 numTriangles, const PxVec3* PX_RESTRICT points,
|
||||
const PxgWindingClusterApproximation* clusters, const PxVec3& queryPoint, PxReal distanceThresholdBeta = 2.0f)
|
||||
: mTriangles(triangles), mNumTriangles(numTriangles), mWindingNumber(0), mPoints(points), mClusters(clusters), mQueryPoint(queryPoint), mDistanceThresholdBeta(distanceThresholdBeta),
|
||||
mClosestDistance(10000000)
|
||||
{
|
||||
}
|
||||
|
||||
__device__ inline void evaluateLeaf(PxU32 payloadIndex)
|
||||
{
|
||||
const PxU32* tri = &mTriangles[3 * payloadIndex];
|
||||
const PxVec3 a = mPoints[tri[0]];
|
||||
const PxVec3 b = mPoints[tri[1]];
|
||||
const PxVec3 c = mPoints[tri[2]];
|
||||
mWindingNumber += windingNumberForTriangle(a, b, c, mQueryPoint);
|
||||
|
||||
//PxReal s, t;
|
||||
PxVec3 closestPt = Gu::closestPtPointTriangle2UnitBox(mQueryPoint, a, b, c); //closestPtPointTriangle(mQueryPoint, a, b, c, s, t);
|
||||
PxReal distSq = (closestPt - mQueryPoint).magnitudeSquared();
|
||||
if (distSq < mClosestDistance * mClosestDistance)
|
||||
{
|
||||
mClosestDistance = PxSqrt(distSq);
|
||||
}
|
||||
}
|
||||
|
||||
//Do not pass leave nodes into that function
|
||||
__device__ inline BvhTraversalControl::Enum evaluateBranchNode(const PxgPackedNodeHalf& lower, const PxgPackedNodeHalf& upper, PxI32 nodeIndex)
|
||||
{
|
||||
const PxgWindingClusterApproximation& cluster = mClusters[getClusterIndex(nodeIndex, mNumTriangles)];
|
||||
const PxReal dist = (mQueryPoint - clusterCentroid(cluster)).magnitude();
|
||||
if (dist - cluster.mRadius < mClosestDistance)
|
||||
{
|
||||
//Deeper traversal is required
|
||||
return BvhTraversalControl::eGoDeeper;
|
||||
}
|
||||
else if (dist > mDistanceThresholdBeta * cluster.mRadius)
|
||||
{
|
||||
mWindingNumber += firstOrderClusterApproximation(clusterCentroid(cluster), cluster.mWeightedNormalSum, mQueryPoint);
|
||||
return BvhTraversalControl::eDontGoDeeper;
|
||||
}
|
||||
return BvhTraversalControl::eGoDeeper;
|
||||
}
|
||||
|
||||
__device__ inline BvhTraversalControl::Enum operator()(const PxgPackedNodeHalf& lower, const PxgPackedNodeHalf& upper, PxI32 nodeIndex)
|
||||
{
|
||||
if (lower.b)
|
||||
{
|
||||
evaluateLeaf(lower.i);
|
||||
return BvhTraversalControl::eDontGoDeeper;
|
||||
}
|
||||
return evaluateBranchNode(lower, upper, nodeIndex);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
640
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/deformableUtils.cuh
vendored
Normal file
640
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/deformableUtils.cuh
vendored
Normal file
@@ -0,0 +1,640 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __DEFORMABLE_UTILS_CUH__
|
||||
#define __DEFORMABLE_UTILS_CUH__
|
||||
|
||||
#include "foundation/PxMathUtils.h"
|
||||
#include "PxsMaterialCombiner.h"
|
||||
#include "PxgFEMCore.h"
|
||||
#include "PxgFEMCloth.h"
|
||||
#include "PxgSoftBody.h"
|
||||
#include "PxgArticulation.h"
|
||||
#include "PxgBodySim.h"
|
||||
#include "dataReadWriteHelper.cuh"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
//This code is based on Matthias Muller's paper: A robust method to extract the rotational part of deformations
|
||||
//Basically, this is another way to extract a rotational matrix from deformation gradient instead of using polar
|
||||
//decomposition
|
||||
__device__ inline void extractRotation(const PxMat33 &A, PxQuat& q, int maxIter)
|
||||
{
|
||||
const PxReal eps = 1.0e-6f;
|
||||
for (int iter = 0; iter < maxIter; iter++)
|
||||
{
|
||||
PxMat33 R(q);
|
||||
PxVec3 omega = R.column0.cross(A.column0) + R.column1.cross(A.column1) + R.column2.cross(A.column2);
|
||||
// (Cross(R.cols[0], A.cols[0]) + Cross(R.cols[1], A.cols[1]) + Cross(R.cols[2], A.cols[2]));
|
||||
|
||||
//omega *= 1.0f / (fabsf(Dot(R.cols[0], A.cols[0]) + Dot(R.cols[1], A.cols[1]) + Dot(R.cols[2], A.cols[2])) + 1.0e-6f);
|
||||
omega *= 1.0f / (PxAbs(R.column0.dot(A.column0) + R.column1.dot(A.column1) + R.column2.dot(A.column2)) + eps);
|
||||
|
||||
const float w = omega.normalize();
|
||||
const PxQuat tempQ = PxQuat(w, omega);
|
||||
q = tempQ * q;
|
||||
q = q.getNormalized();
|
||||
|
||||
// early-exit after one update (instead of before) since we've already done the expensive computations to find w
|
||||
if (w < eps)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ inline void sb_extractRotationAPD(const PxMat33 &F, PxQuat& q, int maxIter)
|
||||
{
|
||||
const PxReal eps = 1.0e-6;
|
||||
const PxReal threshold = 1 - eps;
|
||||
//Use properties of Rodriguez's formula to detect degenerate case of exact 180 deg rotation by checking if the matrix' trace is close to -1
|
||||
//Rodrigues formula for rotation matrices: trace(R) = 1 + 2*cos(theta)
|
||||
//Double3 scaling = new Double3(Math.Max(eps, F.column0.Length), Math.Max(eps, F.column1.Length), Math.Max(eps, F.column2.Length));
|
||||
//bool overwriteGradient = F.column0.x / scaling.x + F.column1.y / scaling.y + F.column2.z / scaling.z < -0.99;
|
||||
//double wPrev = 0;
|
||||
for (int i = 0; i < maxIter; ++i)
|
||||
{
|
||||
PxMat33 B = PxMat33(q.getConjugate()) * F;
|
||||
PxVec3 gradient = PxVec3(B.column2.y - B.column1.z, B.column0.z - B.column2.x, B.column1.x - B.column0.y);
|
||||
/*if (overwriteGradient)
|
||||
{
|
||||
gradient = new Double3(-2, 0, 0); //Gradient for 90 Degree rotation around x axis, any non-zero gradient should work
|
||||
overwriteGradient = false;
|
||||
}*/
|
||||
if (i == 0 && gradient.magnitudeSquared() < 1e-16)
|
||||
{
|
||||
//If loop got stuck already in first iteration (e. g. rotation around 180 deg around an arbitrary axis), distort gradient
|
||||
gradient = PxVec3(-2, 0, 0); //Gradient for 90 Degree rotation around x axis, any non-zero gradient should work
|
||||
}
|
||||
PxReal h00 = B.column1.y + B.column2.z;
|
||||
PxReal h11 = B.column0.x + B.column2.z;
|
||||
PxReal h22 = B.column0.x + B.column1.y;
|
||||
PxReal h01 = -0.5f * (B.column1.x + B.column0.y);
|
||||
PxReal h02 = -0.5f * (B.column2.x + B.column0.z);
|
||||
PxReal h12 = -0.5f * (B.column2.y + B.column1.z);
|
||||
PxReal detH = -h02 * h02 * h11 + 2.0f * h01 * h02 * h12 - h00 * h12 * h12 - h01 * h01 * h22 + h00 * h11 * h22;
|
||||
PxVec3 omega;
|
||||
PxReal factor = -0.25f / detH;
|
||||
omega.x = factor * ((h11 * h22 - h12 * h12) * gradient.x + (h02 * h12 - h01 * h22) * gradient.y + (h01 * h12 - h02 * h11) * gradient.z);
|
||||
omega.y = factor * ((h02 * h12 - h01 * h22) * gradient.x + (h00 * h22 - h02 * h02) * gradient.y + (h01 * h02 - h00 * h12) * gradient.z);
|
||||
omega.z = factor * ((h01 * h12 - h02 * h11) * gradient.x + (h01 * h02 - h00 * h12) * gradient.y + (h00 * h11 - h01 * h01) * gradient.z);
|
||||
if (fabs(detH) < 1e-9f)
|
||||
omega = -gradient;
|
||||
if (omega.dot(gradient) > 0.0f)
|
||||
omega = gradient * -0.125f;
|
||||
PxReal l_omega2 = omega.magnitudeSquared();
|
||||
PxReal w = (1.0 - l_omega2) / (1.0f + l_omega2);
|
||||
PxVec3 vec = omega * (2.0f / (1.0f + l_omega2));
|
||||
q = q * PxQuat(vec.x, vec.y, vec.z, w);
|
||||
if (w > threshold /*&& wPrev>= w*/)
|
||||
break;
|
||||
//wPrev = w;
|
||||
}
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxVec3 projectVectorOntoPlane(PxVec3 v, PxVec3 planeNormal)
|
||||
{
|
||||
return v - (planeNormal.dot(v) / planeNormal.magnitudeSquared()) * planeNormal;
|
||||
}
|
||||
|
||||
// Function to compute Lame's parameters (lambda and mu)
|
||||
PX_FORCE_INLINE __device__ PxPair<PxReal, PxReal> lameParameters(PxReal Young, PxReal Poisson)
|
||||
{
|
||||
const PxReal lambda = Young * Poisson / ((1.0f + Poisson) * (1.0f - 2.0f * Poisson));
|
||||
const PxReal mu = Young / (2.0f * (1.0f + Poisson));
|
||||
|
||||
return PxPair<PxReal, PxReal>(lambda, mu);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ void prepareFEMContacts(PxgFemRigidConstraintBlock& constraint, const PxVec3& normal,
|
||||
PxgSolverSharedDescBase* sharedDesc, const PxVec3& p, PxReal pen, const PxVec3& delta,
|
||||
const PxNodeIndex& rigidId, const float4& barycentric,
|
||||
PxgConstraintPrepareDesc* prepareDesc, PxU32* solverBodyIndices, PxReal penBiasClampFEM,
|
||||
PxReal invDt, bool isTGS)
|
||||
{
|
||||
const PxU32 threadIndexInWarp = threadIdx.x & 31;
|
||||
|
||||
PxAlignedTransform* bodyFrames = prepareDesc->body2WorldPool;
|
||||
|
||||
PxgBodySim* bodySims = sharedDesc->mBodySimBufferDeviceData;
|
||||
|
||||
PxgSolverBodyData* solverBodyData = prepareDesc->solverBodyDataPool;
|
||||
PxgSolverTxIData* solverDataTxIPool = prepareDesc->solverBodyTxIDataPool;
|
||||
|
||||
// Select two tangent vectors to the normal.
|
||||
// Note that the friction behavior may vary depending on the chosen tangent vectors.
|
||||
|
||||
PxVec3 t0, t1;
|
||||
PxComputeBasisVectors(normal, t0, t1);
|
||||
|
||||
PxReal penBiasClampRigid;
|
||||
float4 raXn_resp;
|
||||
float4 raXnF0_resp;
|
||||
float4 raXnF1_resp;
|
||||
PxReal invMass0;
|
||||
|
||||
if(rigidId.isArticulation())
|
||||
{
|
||||
PxU32 nodeIndexA = rigidId.index();
|
||||
PxU32 artiId = bodySims[nodeIndexA].articulationRemapId;
|
||||
|
||||
PxgArticulation& articulation = sharedDesc->articulations[artiId];
|
||||
|
||||
const PxU32 linkID = rigidId.articulationLinkId();
|
||||
const PxTransform body2World = articulation.linkBody2Worlds[linkID];
|
||||
penBiasClampRigid = articulation.links[linkID].initialAngVelXYZ_penBiasClamp.w;
|
||||
|
||||
const PxVec3 bodyFrame0p(body2World.p.x, body2World.p.y, body2World.p.z);
|
||||
|
||||
PxVec3 ra = p - bodyFrame0p;
|
||||
PxVec3 raXn = ra.cross(normal);
|
||||
PxVec3 raXF0 = ra.cross(t0);
|
||||
PxVec3 raXF1 = ra.cross(t1);
|
||||
|
||||
PxSpatialMatrix& spatialResponse = articulation.spatialResponseMatrixW[linkID];
|
||||
|
||||
const Cm::UnAlignedSpatialVector deltaV0 = spatialResponse * Cm::UnAlignedSpatialVector(normal, raXn);
|
||||
const PxReal resp0 = deltaV0.top.dot(raXn) + deltaV0.bottom.dot(normal);
|
||||
|
||||
const Cm::UnAlignedSpatialVector deltaFV0 = spatialResponse * Cm::UnAlignedSpatialVector(t0, raXF0);
|
||||
const Cm::UnAlignedSpatialVector deltaFV1 = spatialResponse * Cm::UnAlignedSpatialVector(t1, raXF1);
|
||||
|
||||
const PxReal respF0 = deltaFV0.top.dot(raXF0) + deltaFV0.bottom.dot(t0);
|
||||
const PxReal respF1 = deltaFV1.top.dot(raXF1) + deltaFV1.bottom.dot(t1);
|
||||
|
||||
raXn_resp = make_float4(raXn.x, raXn.y, raXn.z, resp0);
|
||||
raXnF0_resp = make_float4(raXF0.x, raXF0.y, raXF0.z, respF0);
|
||||
raXnF1_resp = make_float4(raXF1.x, raXF1.y, raXF1.z, respF1);
|
||||
|
||||
// Articulations don't use invMass0. We set it to 1 so we get the linear impulse rather than velocity change.
|
||||
invMass0 = 1.f;
|
||||
}
|
||||
else
|
||||
{
|
||||
PxU32 idx = 0;
|
||||
if(!rigidId.isStaticBody())
|
||||
{
|
||||
idx = solverBodyIndices[rigidId.index()];
|
||||
}
|
||||
|
||||
PxMat33 invSqrtInertia0 = solverDataTxIPool[idx].sqrtInvInertia;
|
||||
const float4 linVel_invMass0 = solverBodyData[idx].initialLinVelXYZ_invMassW;
|
||||
penBiasClampRigid = solverBodyData[idx].initialAngVelXYZ_penBiasClamp.w;
|
||||
invMass0 = linVel_invMass0.w;
|
||||
|
||||
// both static and kinematic object have invMass = 0.f
|
||||
const bool isKinematic = (invMass0 == 0.f) && (!rigidId.isStaticBody());
|
||||
|
||||
PxAlignedTransform bodyFrame0 = bodyFrames[idx];
|
||||
const PxVec3 bodyFrame0p(bodyFrame0.p.x, bodyFrame0.p.y, bodyFrame0.p.z);
|
||||
|
||||
PxVec3 ra = p - bodyFrame0p;
|
||||
PxVec3 raXn = ra.cross(normal);
|
||||
PxVec3 raXF0 = ra.cross(t0);
|
||||
PxVec3 raXF1 = ra.cross(t1);
|
||||
|
||||
const PxVec3 raXnSqrtInertia = invSqrtInertia0 * raXn;
|
||||
const float resp0 = (raXnSqrtInertia.dot(raXnSqrtInertia)) + invMass0;
|
||||
|
||||
const PxVec3 raXF0SqrtInertia = invSqrtInertia0 * raXF0;
|
||||
const PxVec3 raXF1SqrtInertia = invSqrtInertia0 * raXF1;
|
||||
|
||||
const float respF0 = (raXF0SqrtInertia.dot(raXF0SqrtInertia)) + invMass0;
|
||||
const float respF1 = (raXF1SqrtInertia.dot(raXF1SqrtInertia)) + invMass0;
|
||||
|
||||
if(isKinematic)
|
||||
{
|
||||
raXn_resp = make_float4(raXn.x, raXn.y, raXn.z, resp0);
|
||||
raXnF0_resp = make_float4(raXF0.x, raXF0.y, raXF0.z, respF0);
|
||||
raXnF1_resp = make_float4(raXF1.x, raXF1.y, raXF1.z, respF1);
|
||||
}
|
||||
else
|
||||
{
|
||||
raXn_resp = make_float4(raXnSqrtInertia.x, raXnSqrtInertia.y, raXnSqrtInertia.z, resp0);
|
||||
raXnF0_resp = make_float4(raXF0SqrtInertia.x, raXF0SqrtInertia.y, raXF0SqrtInertia.z, respF0);
|
||||
raXnF1_resp = make_float4(raXF1SqrtInertia.x, raXF1SqrtInertia.y, raXF1SqrtInertia.z, respF1);
|
||||
}
|
||||
}
|
||||
|
||||
PxReal maxPenBias = fmaxf(penBiasClampRigid, penBiasClampFEM);
|
||||
PxReal error = pen + delta.dot(normal);
|
||||
|
||||
// KS - TODO - split these into 5 separate vectors to promote coalesced memory accesses!
|
||||
constraint.normal_errorW[threadIndexInWarp] = make_float4(normal.x, normal.y, normal.z, error);
|
||||
constraint.raXn_resp[threadIndexInWarp] = raXn_resp;
|
||||
constraint.raXnF0_resp[threadIndexInWarp] = raXnF0_resp;
|
||||
constraint.raXnF1_resp[threadIndexInWarp] = raXnF1_resp;
|
||||
constraint.fricTan0_invMass0[threadIndexInWarp] = make_float4(t0.x, t0.y, t0.z, invMass0);
|
||||
constraint.maxPenBias[threadIndexInWarp] = maxPenBias;
|
||||
constraint.barycentric[threadIndexInWarp] = barycentric;
|
||||
}
|
||||
|
||||
// Vec: PxVec3 for triangles and PxVec4 for tetrahedra.
|
||||
template <typename Vec>
|
||||
struct FEMCollision
|
||||
{
|
||||
bool isTGS = true;
|
||||
|
||||
// Rigid body
|
||||
PxU32 rigidBodyReferenceCount = 1;
|
||||
PxReal rigidBodyFriction = 0.0f;
|
||||
PxI32 frictionCombineMode;
|
||||
|
||||
// Deformable body
|
||||
PxReal deformableFriction = 0.0f;
|
||||
PxVec3 deformableLinDelta;
|
||||
Vec deformableVertexInvMasses; // After mass-splitting.
|
||||
|
||||
// Constraint
|
||||
PxVec3 normal = PxVec3(0.0f);
|
||||
PxVec3 tangent = PxVec3(0.0f);
|
||||
PxVec3 raXn = PxVec3(0.0f);
|
||||
PxVec3 raXt = PxVec3(0.0f);
|
||||
PxReal deltaLambdaN = 0.0f;
|
||||
PxReal deltaLambdaT = 0.0f;
|
||||
PxReal accumulatedDeltaLambdaN = 0.0f;
|
||||
|
||||
PX_FORCE_INLINE __device__ bool initialize(const float4& fricTan0_invMass0, PxgFemRigidConstraintBlock& constraint,
|
||||
PxReal appliedForceRef, PxNodeIndex rigidId, PxgVelocityReader& velocityReader, PxReal dt,
|
||||
const Vec& bc, bool wasActive, bool checkOnlyActivity)
|
||||
{
|
||||
// PxgFemRigidConstraintBlock is packed with arrays with size 32.
|
||||
assert(blockDim.x % 32 == 0 && blockDim.y == 1 && blockDim.z == 1);
|
||||
|
||||
// PBD way of appying constraints
|
||||
const PxU32 threadIndexInWarp = threadIdx.x & 31;
|
||||
|
||||
accumulatedDeltaLambdaN = appliedForceRef;
|
||||
|
||||
const PxReal threshold = 1.0e-14f;
|
||||
const PxReal invDt = 1.0f / dt;
|
||||
|
||||
const float4 raXn_resp = constraint.raXn_resp[threadIndexInWarp];
|
||||
const float4 normal_biasW = constraint.normal_errorW[threadIndexInWarp];
|
||||
const float4 raXnF0_resp = constraint.raXnF0_resp[threadIndexInWarp];
|
||||
const float4 raXnF1_resp = constraint.raXnF1_resp[threadIndexInWarp];
|
||||
const PxReal maxPenBias = constraint.maxPenBias[threadIndexInWarp];
|
||||
|
||||
normal = PxVec3(normal_biasW.x, normal_biasW.y, normal_biasW.z);
|
||||
const PxVec3 fric0 = PxVec3(fricTan0_invMass0.x, fricTan0_invMass0.y, fricTan0_invMass0.z);
|
||||
const PxVec3 fric1 = normal.cross(fric0);
|
||||
|
||||
const float initPen = normal_biasW.w;
|
||||
|
||||
raXn = PxVec3(raXn_resp.x, raXn_resp.y, raXn_resp.z);
|
||||
|
||||
PxReal CN;
|
||||
PxReal normalVel;
|
||||
PxVec3 relLinDelta;
|
||||
PxVec3 angDelta(0.0f);
|
||||
PxVec3 linVel;
|
||||
PxVec3 angVel;
|
||||
|
||||
if(isTGS)
|
||||
{
|
||||
PxgVelocityPackTGS rigidStateVec;
|
||||
velocityReader.readVelocitiesTGS(rigidId, rigidStateVec);
|
||||
|
||||
linVel = rigidStateVec.linVel;
|
||||
angVel = rigidStateVec.angVel;
|
||||
|
||||
normalVel = linVel.dot(normal) + angVel.dot(raXn);
|
||||
relLinDelta = rigidStateVec.linDelta - deformableLinDelta;
|
||||
angDelta = rigidStateVec.angDelta;
|
||||
|
||||
const PxReal error = (initPen + relLinDelta.dot(normal) + angDelta.dot(raXn)) * invDt;
|
||||
|
||||
// maxPenBias is negative.
|
||||
const PxReal errorBiased = PxMax(maxPenBias, error);
|
||||
|
||||
CN = errorBiased + normalVel;
|
||||
}
|
||||
else
|
||||
{
|
||||
PxgVelocityPackPGS rigidStateVec;
|
||||
velocityReader.readVelocitiesPGS(rigidId, rigidStateVec);
|
||||
|
||||
linVel = rigidStateVec.linVel;
|
||||
angVel = rigidStateVec.angVel;
|
||||
|
||||
normalVel = linVel.dot(normal) + angVel.dot(raXn);
|
||||
relLinDelta = -deformableLinDelta;
|
||||
|
||||
const PxReal error = (initPen + relLinDelta.dot(normal)) * invDt;
|
||||
|
||||
// maxPenBias is negative.
|
||||
const PxReal errorBiased = PxMax(maxPenBias, error);
|
||||
|
||||
CN = errorBiased + normalVel;
|
||||
}
|
||||
|
||||
const bool isActive = wasActive || CN < 0.0f;
|
||||
deltaLambdaN = 0.0f;
|
||||
|
||||
if(checkOnlyActivity)
|
||||
{
|
||||
return isActive;
|
||||
}
|
||||
|
||||
if(!isActive)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Deformable body term in the denominator of the impulse calculation. Also, refer to delta lambda in the XPBD paper.
|
||||
const PxReal deformableInvMass_massSplitting = bc.multiply(bc).dot(deformableVertexInvMasses);
|
||||
|
||||
const PxReal rigidRefCount = static_cast<PxReal>(rigidBodyReferenceCount);
|
||||
const PxReal unitResponse = rigidRefCount * raXn_resp.w + deformableInvMass_massSplitting;
|
||||
const PxReal invDenom = (unitResponse > 0.0f) ? (1.0f / unitResponse) : 0.0f;
|
||||
|
||||
deltaLambdaN = PxMax(-CN * invDenom, -accumulatedDeltaLambdaN);
|
||||
accumulatedDeltaLambdaN += deltaLambdaN;
|
||||
|
||||
// Friction constraint in the tangent direction.
|
||||
const PxVec3 raXnF0 = PxVec3(raXnF0_resp.x, raXnF0_resp.y, raXnF0_resp.z);
|
||||
const PxVec3 raXnF1 = PxVec3(raXnF1_resp.x, raXnF1_resp.y, raXnF1_resp.z);
|
||||
|
||||
const float tanVel0 = linVel.dot(fric0) + angVel.dot(raXnF0);
|
||||
const float tanVel1 = linVel.dot(fric1) + angVel.dot(raXnF1);
|
||||
|
||||
const PxReal CT0 = (fric0.dot(relLinDelta) + angDelta.dot(raXnF0)) * invDt + tanVel0;
|
||||
const PxReal CT1 = (fric1.dot(relLinDelta) + angDelta.dot(raXnF1)) * invDt + tanVel1;
|
||||
const PxVec3 relTanDelta = CT0 * fric0 + CT1 * fric1;
|
||||
const PxReal tanMagSq = relTanDelta.magnitudeSquared();
|
||||
|
||||
if(tanMagSq > threshold)
|
||||
{
|
||||
const PxReal CT = PxSqrt(tanMagSq);
|
||||
const PxReal invTanMag = 1.0f / CT;
|
||||
tangent = relTanDelta * invTanMag;
|
||||
|
||||
const PxReal frac0 = tangent.dot(fric0);
|
||||
const PxReal frac1 = tangent.dot(fric1);
|
||||
raXt = frac0 * raXnF0 + frac1 * raXnF1;
|
||||
|
||||
// Using two precomputed orthonormal tangent directions.
|
||||
const PxReal unitResponseT0 = rigidRefCount * raXnF0_resp.w + deformableInvMass_massSplitting;
|
||||
const PxReal invTanDenom0 = (unitResponseT0 > 0.0f) ? (1.0f / unitResponseT0) : 0.0f;
|
||||
|
||||
const PxReal unitResponseT1 = rigidRefCount * raXnF1_resp.w + deformableInvMass_massSplitting;
|
||||
const PxReal invTanDenom1 = (unitResponseT1 > 0.0f) ? (1.0f / unitResponseT1) : 0.0f;
|
||||
|
||||
PxReal deltaLambdaT0 = CT0 * invTanDenom0;
|
||||
PxReal deltaLambdaT1 = CT1 * invTanDenom1;
|
||||
|
||||
deltaLambdaT = PxSqrt(deltaLambdaT0 * deltaLambdaT0 + deltaLambdaT1 * deltaLambdaT1);
|
||||
deltaLambdaT = -PxMin(deltaLambdaT, getCombinedFriction() * PxAbs(deltaLambdaN));
|
||||
|
||||
assert(deltaLambdaT <= 0.0f);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxReal computeRigidChange(PxVec3& deltaLinVel0, PxVec3& deltaAngVel0, const PxNodeIndex& rigidId,
|
||||
PxReal rigidInvMass)
|
||||
{
|
||||
const PxReal rigidRefCount = static_cast<PxReal>(rigidBodyReferenceCount);
|
||||
|
||||
deltaAngVel0 = rigidId.isArticulation() ? raXn * deltaLambdaN + raXt * deltaLambdaT
|
||||
: (raXn * deltaLambdaN + raXt * deltaLambdaT) * rigidRefCount;
|
||||
|
||||
deltaLinVel0 = (normal * deltaLambdaN + tangent * deltaLambdaT) * rigidInvMass * rigidRefCount;
|
||||
|
||||
return accumulatedDeltaLambdaN;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxReal computeFEMChange(PxVec3& deltaPos, PxReal dt)
|
||||
{
|
||||
deltaPos = -(deltaLambdaN * normal + deltaLambdaT * tangent) * dt;
|
||||
return accumulatedDeltaLambdaN;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxReal getCombinedFriction()
|
||||
{
|
||||
return PxsCombinePxReal(rigidBodyFriction, deformableFriction, frictionCombineMode);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ int getGlobalRigidBodyId(const PxgPrePrepDesc* const prePrepDesc, const PxNodeIndex& rigidId,
|
||||
PxU32 numSolverBodies)
|
||||
{
|
||||
// Following PxgVelocityReader style to read rigid body indices.
|
||||
if(rigidId.isStaticBody())
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
const PxU32 solverBodyIdx = prePrepDesc->solverBodyIndices[rigidId.index()];
|
||||
|
||||
// Placing articulation indices at the end of rigid body indices to distinguish between rigid body reference counts and
|
||||
// articulation reference counts.
|
||||
return rigidId.isArticulation() ? static_cast<int>(numSolverBodies + solverBodyIdx) : static_cast<int>(solverBodyIdx);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ void readRigidBody(const PxNodeIndex& rigidId, int globalRigidBodyId, PxReal rigidInvMass,
|
||||
const PxU32* const rigidBodyReferenceCounts, const PxsMaterialData* rigidMaterial)
|
||||
{
|
||||
rigidBodyReferenceCount = 1;
|
||||
|
||||
// Query the reference count for the rigid body.
|
||||
if(rigidBodyReferenceCounts && globalRigidBodyId != -1 && rigidInvMass != 0.0f)
|
||||
{
|
||||
rigidBodyReferenceCount = rigidBodyReferenceCounts[globalRigidBodyId];
|
||||
}
|
||||
|
||||
if(rigidMaterial != NULL)
|
||||
{
|
||||
rigidBodyFriction = rigidMaterial->dynamicFriction;
|
||||
frictionCombineMode = rigidMaterial->fricCombineMode;
|
||||
}
|
||||
else
|
||||
{
|
||||
rigidBodyFriction = 0.0f;
|
||||
frictionCombineMode = PxCombineMode::eMAX;
|
||||
}
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ void writeRigidBody(float4* rigidDeltaVel, const PxVec3& deltaLinVel0, const PxVec3& deltaAngVel0,
|
||||
PxU32 workIndex0, PxU32 workIndex1, PxReal count)
|
||||
{
|
||||
rigidDeltaVel[workIndex0] = make_float4(deltaLinVel0.x, deltaLinVel0.y, deltaLinVel0.z, count);
|
||||
rigidDeltaVel[workIndex1] = make_float4(deltaAngVel0.x, deltaAngVel0.y, deltaAngVel0.z, 0.f);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxVec3 readCloth(const PxgFEMCloth& cloth, PxU32 elementId, const float4& bc,
|
||||
const PxsDeformableSurfaceMaterialData* const materials, bool countReferenceOnly)
|
||||
{
|
||||
// Note: PX_MAX_NB_DEFORMABLE_SURFACE_TRI == PX_MAX_NB_DEFORMABLE_SURFACE_VTX
|
||||
if(elementId == PX_MAX_NB_DEFORMABLE_SURFACE_TRI)
|
||||
{
|
||||
deformableVertexInvMasses = PxVec3(0.0f);
|
||||
return deformableVertexInvMasses;
|
||||
}
|
||||
|
||||
const float4* const PX_RESTRICT clothPosDeltas = cloth.mAccumulatedDeltaPos;
|
||||
float4 clothDelta;
|
||||
|
||||
if(bc.w == 0) // Cloth triangle
|
||||
{
|
||||
const uint4 triVertId = cloth.mTriangleVertexIndices[elementId];
|
||||
const float4 pd0 = clothPosDeltas[triVertId.x];
|
||||
const float4 pd1 = clothPosDeltas[triVertId.y];
|
||||
const float4 pd2 = clothPosDeltas[triVertId.z];
|
||||
|
||||
clothDelta = pd0 * bc.x + pd1 * bc.y + pd2 * bc.z;
|
||||
deformableVertexInvMasses = PxVec3(pd0.w, pd1.w, pd2.w);
|
||||
|
||||
if(!countReferenceOnly)
|
||||
{
|
||||
const PxU16 globalMaterialIndex = cloth.mMaterialIndices[elementId];
|
||||
deformableFriction = materials ? materials[globalMaterialIndex].dynamicFriction : 0.0f;
|
||||
|
||||
// Query the reference count for the cloth.
|
||||
PxVec3 deformableVertexReferenceCount;
|
||||
deformableVertexReferenceCount.x = cloth.mDeltaPos[triVertId.x].w;
|
||||
deformableVertexReferenceCount.y = cloth.mDeltaPos[triVertId.y].w;
|
||||
deformableVertexReferenceCount.z = cloth.mDeltaPos[triVertId.z].w;
|
||||
|
||||
// Mass-splitting
|
||||
deformableVertexInvMasses = deformableVertexInvMasses.multiply(deformableVertexReferenceCount);
|
||||
}
|
||||
}
|
||||
else // Cloth vertex
|
||||
{
|
||||
clothDelta = clothPosDeltas[elementId];
|
||||
deformableVertexInvMasses = PxVec3(clothDelta.w, 0.0f, 0.0f);
|
||||
|
||||
if(!countReferenceOnly)
|
||||
{
|
||||
deformableFriction = materials ? cloth.mDynamicFrictions[elementId] : 0.0f;
|
||||
|
||||
// Query the reference count for the cloth.
|
||||
const PxReal deformableVertexReferenceCount = cloth.mDeltaPos[elementId].w;
|
||||
|
||||
// Mass-splitting
|
||||
deformableVertexInvMasses.x *= deformableVertexReferenceCount;
|
||||
}
|
||||
}
|
||||
|
||||
deformableLinDelta = PxVec3(clothDelta.x, clothDelta.y, clothDelta.z);
|
||||
|
||||
return deformableVertexInvMasses;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ void writeCloth(PxgFEMCloth& cloth, PxU32 elementId, const float4& bc, PxVec3 deltaPos)
|
||||
{
|
||||
if(bc.w == 0.f) // Cloth triangle
|
||||
{
|
||||
const float* bcPtr = reinterpret_cast<const float*>(&bc.x);
|
||||
const uint4 triVertInds = cloth.mTriangleVertexIndices[elementId];
|
||||
const PxU32* triVertices = reinterpret_cast<const PxU32*>(&triVertInds.x);
|
||||
|
||||
#pragma unroll
|
||||
for(PxU32 it = 0; it < 3; ++it)
|
||||
{
|
||||
if(deformableVertexInvMasses[it] > 0.0f)
|
||||
{
|
||||
const PxVec3 dP = deltaPos * (deformableVertexInvMasses[it] * bcPtr[it]);
|
||||
atomicAdd(&cloth.mDeltaPos[triVertices[it]].x, dP.x);
|
||||
atomicAdd(&cloth.mDeltaPos[triVertices[it]].y, dP.y);
|
||||
atomicAdd(&cloth.mDeltaPos[triVertices[it]].z, dP.z);
|
||||
}
|
||||
}
|
||||
}
|
||||
else // Cloth vertex
|
||||
{
|
||||
if(deformableVertexInvMasses.x > 0.0f)
|
||||
{
|
||||
const PxVec3 dP = deltaPos * deformableVertexInvMasses.x;
|
||||
atomicAdd(&cloth.mDeltaPos[elementId].x, dP.x);
|
||||
atomicAdd(&cloth.mDeltaPos[elementId].y, dP.y);
|
||||
atomicAdd(&cloth.mDeltaPos[elementId].z, dP.z);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxVec4 readSoftBody(const PxgSoftBody& softbody, PxU32 tetId, const float4& bc,
|
||||
const PxsDeformableVolumeMaterialData* const materials, bool checkOnlyActivity)
|
||||
{
|
||||
if(tetId == PX_MAX_NB_DEFORMABLE_VOLUME_TET)
|
||||
{
|
||||
deformableVertexInvMasses = PxVec4(0.0f);
|
||||
return deformableVertexInvMasses;
|
||||
}
|
||||
|
||||
const float4* const PX_RESTRICT posDeltas = softbody.mSimDeltaPos;
|
||||
|
||||
const uint4 tetrahedronId = softbody.mSimTetIndices[tetId];
|
||||
const float4 pd0 = posDeltas[tetrahedronId.x];
|
||||
const float4 pd1 = posDeltas[tetrahedronId.y];
|
||||
const float4 pd2 = posDeltas[tetrahedronId.z];
|
||||
const float4 pd3 = posDeltas[tetrahedronId.w];
|
||||
|
||||
const float4 softBodyDelta = pd0 * bc.x + pd1 * bc.y + pd2 * bc.z + pd3 * bc.w;
|
||||
deformableLinDelta = PxVec3(softBodyDelta.x, softBodyDelta.y, softBodyDelta.z);
|
||||
deformableVertexInvMasses = PxVec4(pd0.w, pd1.w, pd2.w, pd3.w);
|
||||
|
||||
if(!checkOnlyActivity)
|
||||
{
|
||||
const PxU16 globalMaterialIndex = softbody.mMaterialIndices[tetId];
|
||||
deformableFriction = materials ? materials[globalMaterialIndex].dynamicFriction : 0.0f;
|
||||
|
||||
// Query the reference count for soft body.
|
||||
PxVec4 deformableVertexReferenceCount;
|
||||
deformableVertexReferenceCount.x = softbody.mSimDelta[tetrahedronId.x].w;
|
||||
deformableVertexReferenceCount.y = softbody.mSimDelta[tetrahedronId.y].w;
|
||||
deformableVertexReferenceCount.z = softbody.mSimDelta[tetrahedronId.z].w;
|
||||
deformableVertexReferenceCount.w = softbody.mSimDelta[tetrahedronId.w].w;
|
||||
|
||||
// Mass-splitting
|
||||
deformableVertexInvMasses = deformableVertexInvMasses.multiply(deformableVertexReferenceCount);
|
||||
}
|
||||
|
||||
return deformableVertexInvMasses;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ void writeSoftBody(const PxgSoftBody& softbody, PxU32 tetId, const float4& bc, PxVec3 deltaPos)
|
||||
{
|
||||
const float* bcPtr = reinterpret_cast<const float*>(&bc.x);
|
||||
const uint4 tetrahedronId = softbody.mSimTetIndices[tetId];
|
||||
const PxU32* tetVertices = reinterpret_cast<const PxU32*>(&tetrahedronId.x);
|
||||
|
||||
#pragma unroll
|
||||
for(PxU32 it = 0; it < 4; ++it)
|
||||
{
|
||||
if(deformableVertexInvMasses[it] > 0.0f)
|
||||
{
|
||||
const PxVec3 dP = deltaPos * (deformableVertexInvMasses[it] * bcPtr[it]);
|
||||
atomicAdd(&softbody.mSimDelta[tetVertices[it]].x, dP.x);
|
||||
atomicAdd(&softbody.mSimDelta[tetVertices[it]].y, dP.y);
|
||||
atomicAdd(&softbody.mSimDelta[tetVertices[it]].z, dP.z);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#endif // __DEFORMABLE_UTILS_CUH__
|
||||
131
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/denseGridStandalone.cuh
vendored
Normal file
131
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/denseGridStandalone.cuh
vendored
Normal file
@@ -0,0 +1,131 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "vector_types.h"
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "PxgDenseGridData.h"
|
||||
|
||||
//Should have the same value as the same define in sparseGrid.cuh
|
||||
#define EMPTY_SUBGRID 0xffffffff
|
||||
|
||||
using namespace physx;
|
||||
|
||||
PX_FORCE_INLINE __device__ __host__ int getCellNr(int numCellsX, int numCellsY, int xi, int yi, int zi)
|
||||
{
|
||||
return (zi * numCellsY + yi) * numCellsX + xi;
|
||||
}
|
||||
PX_FORCE_INLINE __device__ __host__ int getCellNr(const int3& gridSize, int xi, int yi, int zi)
|
||||
{
|
||||
return getCellNr(gridSize.x, gridSize.y, xi, yi, zi);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ __host__ int4 getCellCoords(int numCellsX, int numCellsY, int cellNr)
|
||||
{
|
||||
int4 result;
|
||||
result.x = cellNr % numCellsX;
|
||||
cellNr /= numCellsX;
|
||||
result.y = cellNr % numCellsY;
|
||||
result.z = cellNr / numCellsY;
|
||||
result.w = -1;
|
||||
return result;
|
||||
}
|
||||
PX_FORCE_INLINE __device__ __host__ int4 getCellCoords(const int3& gridSize, int cellNr)
|
||||
{
|
||||
return getCellCoords(gridSize.x, gridSize.y, cellNr);
|
||||
}
|
||||
|
||||
//Functions for the PxDenseGridData class - make sure they have the same name and aruments as their counterparts of the sparse grid to simplify templating
|
||||
PX_FORCE_INLINE __device__ int4 getGridCoordinates(const PxDenseGridData& data, int threadIndex)
|
||||
{
|
||||
return getCellCoords(data.mGridParams.numCellsX, data.mGridParams.numCellsY, threadIndex);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxU32 getCellIndex(PxDenseGridData& data, const int4& index, bool applySubgridOrder = true)
|
||||
{
|
||||
return getCellNr(data.mGridParams.numCellsX, data.mGridParams.numCellsY, index.x, index.y, index.z);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxU32 getCellIndex(PxDenseGridData& data, const int4& index, PxI32 offsetX, PxI32 offsetY, PxI32 offsetZ, bool applySubgridOrder = true)
|
||||
{
|
||||
return getCellNr(data.mGridParams.numCellsX, data.mGridParams.numCellsY, index.x + offsetX, index.y + offsetY, index.z + offsetZ);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxU32 getCellIndexSafe(PxDenseGridData& data, const int4& index, PxI32 offsetX, PxI32 offsetY, PxI32 offsetZ, bool applySubgridOrder = true)
|
||||
{
|
||||
if (index.x + offsetX < 0 || index.y + offsetY < 0 || index.z + offsetZ < 0 || index.x + offsetX >= data.mGridParams.numCellsX || index.y + offsetY >= data.mGridParams.numCellsY || index.z + offsetZ >= data.mGridParams.numCellsZ)
|
||||
return EMPTY_SUBGRID;
|
||||
return getCellNr(data.mGridParams.numCellsX, data.mGridParams.numCellsY, index.x + offsetX, index.y + offsetY, index.z + offsetZ);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxReal getGridValue(PxDenseGridData& data, const PxReal* dataSource, const int4& index, PxI32 offsetX, PxI32 offsetY, PxI32 offsetZ)
|
||||
{
|
||||
return dataSource[getCellIndex(data, index, offsetX, offsetY, offsetZ)];
|
||||
}
|
||||
|
||||
//Assumes that 0.0 is a valid value for access outside of the grid
|
||||
PX_FORCE_INLINE __device__ PxReal getGridValueSafe(PxDenseGridData& data, const PxReal* dataSource, int4 index, PxI32 offsetX, PxI32 offsetY, PxI32 offsetZ)
|
||||
{
|
||||
if (index.x + offsetX < 0 || index.y + offsetY < 0 || index.z + offsetZ < 0 || index.x + offsetX >= data.mGridParams.numCellsX || index.y + offsetY >= data.mGridParams.numCellsY || index.z + offsetZ >= data.mGridParams.numCellsZ)
|
||||
return 0.0f;
|
||||
return dataSource[getCellIndex(data, index, offsetX, offsetY, offsetZ)];
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ bool outOfRange(PxDenseGridData& data, const int threadIndex)
|
||||
{
|
||||
return threadIndex >= data.maxNumCells();
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ bool outOfActiveCells(PxDenseGridData& data, const int threadIndex)
|
||||
{
|
||||
return threadIndex >= data.maxNumCells(); //All cells are always active on a dense grid
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ bool outOfBounds(PxDenseGridData& data, const int4& index)
|
||||
{
|
||||
return index.x >= data.mGridParams.numCellsX - 1 || index.y >= data.mGridParams.numCellsY - 1 || index.z >= data.mGridParams.numCellsZ - 1 || index.x < 0 || index.y < 0 || index.z < 0;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ bool isLastCell(PxDenseGridData& data, const int threadIndex)
|
||||
{
|
||||
return threadIndex == (data.mGridParams.numCellsX - 1)*(data.mGridParams.numCellsY - 1)*(data.mGridParams.numCellsZ - 1) - 1;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxVec3 getLocation(PxDenseGridData& data, const int4& index)
|
||||
{
|
||||
return data.mGridParams.origin + PxVec3(index.x, index.y, index.z) * data.mGridParams.gridSpacing;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ int4 getCellIndexFromParticleAndTransformToLocalCoordinates(PxDenseGridData& data, PxVec3& p)
|
||||
{
|
||||
p = p - data.mGridParams.origin;
|
||||
PxReal invDx = 1.0f / data.mGridParams.gridSpacing;
|
||||
PxI32 cxi = (int)PxFloor(p.x * invDx);
|
||||
PxI32 cyi = (int)PxFloor(p.y * invDx);
|
||||
PxI32 czi = (int)PxFloor(p.z * invDx);
|
||||
return make_int4(cxi, cyi, czi, -1);
|
||||
}
|
||||
552
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/diffuseParticles.cu
vendored
Normal file
552
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/diffuseParticles.cu
vendored
Normal file
@@ -0,0 +1,552 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
|
||||
#include "vector_types.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
#include "foundation/PxBounds3.h"
|
||||
#include "PxgParticleSystemCore.h"
|
||||
#include "PxgParticleSystem.h"
|
||||
#include "PxgParticleSystemCoreKernelIndices.h"
|
||||
#include "PxgBodySim.h"
|
||||
#include "PxgCommonDefines.h"
|
||||
#include "reduction.cuh"
|
||||
#include "shuffle.cuh"
|
||||
#include "stdio.h"
|
||||
#include "PxgSolverBody.h"
|
||||
#include "PxgSolverCoreDesc.h"
|
||||
#include "PxParticleSystem.h"
|
||||
#include "assert.h"
|
||||
#include "copy.cuh"
|
||||
#include "PxgSimulationCoreDesc.h"
|
||||
#include "gridCal.cuh"
|
||||
#include "particleSystem.cuh"
|
||||
#include "atomic.cuh"
|
||||
#include "utils.cuh"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
// simpler kernel for diffuse weighting
|
||||
__device__ inline PxReal WDiffuse(const PxReal h, const PxReal invR)
|
||||
{
|
||||
return (1.0f - h * invR);
|
||||
}
|
||||
|
||||
extern "C" __host__ void initDiffuseParticlesKernels0() {}
|
||||
|
||||
extern "C" __global__ void ps_updateUnsortedDiffuseArrayLaunch(
|
||||
const PxgParticleSystem * PX_RESTRICT particleSystems,
|
||||
const PxU32 * PX_RESTRICT activeParticleSystems)
|
||||
{
|
||||
const PxU32 particleId = activeParticleSystems[blockIdx.z];
|
||||
|
||||
const PxgParticleSystem& particleSystem = particleSystems[particleId];
|
||||
|
||||
const PxU32 bufferIndex = blockIdx.y;
|
||||
|
||||
if (bufferIndex < particleSystem.mNumDiffuseBuffers)
|
||||
{
|
||||
const PxU32 threadIndexInWarp = threadIdx.x & 31;
|
||||
|
||||
float4* PX_RESTRICT unsortedPositions = reinterpret_cast<float4*>(particleSystem.mDiffusePosition_LifeTime);
|
||||
float4* PX_RESTRICT unsortedVels = reinterpret_cast<float4*>(particleSystem.mDiffuseVelocity);
|
||||
|
||||
PxU32 localSum = 0;
|
||||
|
||||
for (PxU32 i = threadIndexInWarp; i < bufferIndex; i += WARP_SIZE)
|
||||
{
|
||||
localSum += particleSystem.mDiffuseSimBuffers[i].mNumDiffuseParticles[0];
|
||||
}
|
||||
|
||||
PxU32 bufferOffset = warpReduction<AddOpPxU32, PxU32>(FULL_MASK, localSum);
|
||||
|
||||
PxgParticleDiffuseSimBuffer& buffer = particleSystem.mDiffuseSimBuffers[bufferIndex];
|
||||
|
||||
int numDiffuseParticles = buffer.mNumDiffuseParticles[0];
|
||||
|
||||
const float4* particles = buffer.mDiffusePositions_LifeTime;
|
||||
const float4* vels = buffer.mDiffuseVelocities;
|
||||
|
||||
const PxU32 globalThreadIndex = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
if (globalThreadIndex >= numDiffuseParticles)
|
||||
return;
|
||||
|
||||
if (globalThreadIndex == 0)
|
||||
{
|
||||
buffer.mStartIndex = bufferOffset;
|
||||
}
|
||||
|
||||
const PxU32 ind = bufferOffset + globalThreadIndex;
|
||||
unsortedPositions[ind] = particles[globalThreadIndex];
|
||||
unsortedVels[ind] = vels[globalThreadIndex];
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void ps_diffuseParticleOneWayCollision(
|
||||
PxgParticleSystem * PX_RESTRICT particleSystems,
|
||||
const PxU32* PX_RESTRICT activeParticleSystems,
|
||||
const PxU32 count
|
||||
)
|
||||
{
|
||||
__shared__ __align__(16) PxU8 particleSystemMemory[sizeof(PxgParticleSystem)];
|
||||
PxgParticleSystem& shParticleSystem = *(reinterpret_cast<PxgParticleSystem*>(particleSystemMemory));
|
||||
|
||||
const PxU32 id = activeParticleSystems[blockIdx.y];
|
||||
|
||||
const PxgParticleSystem& particleSystem = particleSystems[id];
|
||||
|
||||
const uint2* sParticleSystem = reinterpret_cast<const uint2*>(&particleSystem);
|
||||
uint2* dParticleSystem = reinterpret_cast<uint2*>(&shParticleSystem);
|
||||
|
||||
blockCopy<uint2>(dParticleSystem, sParticleSystem, sizeof(PxgParticleSystem));
|
||||
__syncthreads();
|
||||
|
||||
const PxU32 pi = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
|
||||
const PxU32 numParticles = *shParticleSystem.mNumDiffuseParticles;
|
||||
|
||||
|
||||
if (pi >= numParticles)
|
||||
return;
|
||||
|
||||
float4* PX_RESTRICT newPos = reinterpret_cast<float4*>(shParticleSystem.mDiffuseSortedPos_LifeTime);
|
||||
|
||||
const PxgParticleContactInfo* PX_RESTRICT contacts = shParticleSystem.mDiffuseOneWayContactInfos;
|
||||
const PxU32* PX_RESTRICT contactCounts = shParticleSystem.mDiffuseOneWayContactCount;
|
||||
|
||||
const PxU32 contactCount = PxMin(PxgParticleContactInfo::MaxStaticContactsPerParticle, contactCounts[pi]);
|
||||
|
||||
|
||||
if (contactCount)
|
||||
{
|
||||
PxVec3 posCorr = PxLoad3(newPos[pi]);
|
||||
for (PxU32 c = 0, offset = pi; c < contactCount; ++c, offset += numParticles)
|
||||
{
|
||||
const PxgParticleContactInfo& contact = contacts[offset];
|
||||
|
||||
const PxVec3 surfaceNormal = PxLoad3(contact.mNormal_PenW);
|
||||
|
||||
const PxVec3 deltaP = -surfaceNormal * contact.mNormal_PenW.w;
|
||||
posCorr += deltaP;
|
||||
}
|
||||
|
||||
newPos[pi] = make_float4(posCorr.x, posCorr.y, posCorr.z, newPos[pi].w);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern "C" __global__ void ps_diffuseParticleUpdatePBF(
|
||||
PxgParticleSystem* PX_RESTRICT particleSystems,
|
||||
const PxU32* activeParticleSystems,
|
||||
const PxVec3 gravity,
|
||||
const PxReal dt)
|
||||
{
|
||||
__shared__ __align__(16) PxU8 particleSystemMemory[sizeof(PxgParticleSystem)];
|
||||
PxgParticleSystem& shParticleSystem = *(reinterpret_cast<PxgParticleSystem*>(particleSystemMemory));
|
||||
|
||||
__shared__ int offset[3];
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
offset[0] = 0; offset[1] = -1; offset[2] = 1;
|
||||
}
|
||||
|
||||
const PxU32 id = activeParticleSystems[blockIdx.y];
|
||||
|
||||
const PxgParticleSystem& particleSystem = particleSystems[id];
|
||||
|
||||
const uint2* sParticleSystem = reinterpret_cast<const uint2*>(&particleSystem);
|
||||
uint2* dParticleSystem = reinterpret_cast<uint2*>(&shParticleSystem);
|
||||
|
||||
blockCopy<uint2>(dParticleSystem, sParticleSystem, sizeof(PxgParticleSystem));
|
||||
__syncthreads();
|
||||
|
||||
{
|
||||
int numDiffuse = *shParticleSystem.mNumDiffuseParticles;
|
||||
|
||||
const PxU32 pi = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
|
||||
if (pi >= numDiffuse)
|
||||
return;
|
||||
|
||||
const PxU32* const PX_RESTRICT cellStarts = shParticleSystem.mCellStart;
|
||||
const PxU32* const PX_RESTRICT cellEnds = shParticleSystem.mCellEnd;
|
||||
|
||||
// per-particle data
|
||||
const float4* const PX_RESTRICT sortedPose = reinterpret_cast<float4*>(shParticleSystem.mSortedPositions_InvMass);
|
||||
const float4* const PX_RESTRICT sortedVel = reinterpret_cast<float4*>(shParticleSystem.mSortedVelocities);
|
||||
|
||||
float4* PX_RESTRICT diffusePositions = reinterpret_cast<float4*>(shParticleSystem.mDiffuseSortedPos_LifeTime);
|
||||
|
||||
//Overloading this buffer to store the new velocity...
|
||||
float4* PX_RESTRICT newVel = reinterpret_cast<float4*>(shParticleSystem.mDiffuseSortedOriginPos_LifeTime);
|
||||
|
||||
// get elements
|
||||
const float4 xi4 = diffusePositions[pi];
|
||||
const PxVec3 pos = PxLoad3(xi4);
|
||||
|
||||
// interpolate
|
||||
PxVec3 velAvg(PxZero);
|
||||
PxU32 numNeighbors = 0;
|
||||
|
||||
const PxReal cellWidth = shParticleSystem.mCommonData.mGridCellWidth;
|
||||
const PxReal contactDistanceSq = shParticleSystem.mCommonData.mParticleContactDistanceSq;
|
||||
const PxReal invContactDistance = shParticleSystem.mCommonData.mParticleContactDistanceInv;
|
||||
const int3 gridPos = calcGridPos(xi4, cellWidth);
|
||||
const uint3 gridSize = make_uint3(shParticleSystem.mCommonData.mGridSizeX, shParticleSystem.mCommonData.mGridSizeY, shParticleSystem.mCommonData.mGridSizeZ);
|
||||
|
||||
// Iterate over cell
|
||||
PxReal weightSum = 0.0f;
|
||||
PxVec3 velocitySum(0.f);
|
||||
|
||||
const PxU32 maxNeighbors = 16;
|
||||
|
||||
const PxU32 end = (shParticleSystem.mData.mFlags & PxParticleFlag::eFULL_DIFFUSE_ADVECTION) ? 3 : 1;
|
||||
|
||||
|
||||
for (int z = 0; z < end; ++z)
|
||||
for (int y = 0; y < end; ++y)
|
||||
for (int x = 0; x < end; ++x)
|
||||
{
|
||||
const int3 neighbourPos = make_int3(gridPos.x + offset[x], gridPos.y + offset[y], gridPos.z + offset[z]);
|
||||
const PxU32 gridHash = calcGridHash(neighbourPos, gridSize);
|
||||
const PxU32 startIndex = cellStarts[gridHash];
|
||||
|
||||
if (startIndex != EMPTY_CELL)
|
||||
{
|
||||
const PxU32 endIndex = cellEnds[gridHash];
|
||||
for (PxU32 q = startIndex; q < endIndex; ++q)
|
||||
{
|
||||
const PxVec3 xj = PxLoad3(sortedPose[q]);
|
||||
const PxVec3 xij = pos - xj;
|
||||
|
||||
const PxReal dSq = xij.dot(xij);
|
||||
|
||||
if (dSq < contactDistanceSq)
|
||||
{
|
||||
const PxVec3 vj = PxLoad3(sortedVel[q]);
|
||||
const PxReal w = WDiffuse(sqrtf(dSq), invContactDistance);
|
||||
|
||||
weightSum += w;
|
||||
velocitySum += vj * w;
|
||||
|
||||
++numNeighbors;
|
||||
if (numNeighbors == maxNeighbors)
|
||||
goto weight_sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
weight_sum:
|
||||
if (weightSum > 0)
|
||||
velAvg = velocitySum / weightSum;
|
||||
|
||||
newVel[pi] = make_float4(velAvg.x, velAvg.y, velAvg.z, PxReal(numNeighbors));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void ps_diffuseParticleCompact(
|
||||
PxgParticleSystem* PX_RESTRICT particleSystems,
|
||||
const PxU32* activeParticleSystems,
|
||||
const PxVec3 gravity,
|
||||
const PxReal dt)
|
||||
{
|
||||
__shared__ __align__(16) PxU8 particleSystemMemory[sizeof(PxgParticleSystem)];
|
||||
PxgParticleSystem& shParticleSystem = *(reinterpret_cast<PxgParticleSystem*>(particleSystemMemory));
|
||||
|
||||
const PxU32 id = activeParticleSystems[blockIdx.z];
|
||||
|
||||
const PxgParticleSystem& particleSystem = particleSystems[id];
|
||||
|
||||
const uint2* sParticleSystem = reinterpret_cast<const uint2*>(&particleSystem);
|
||||
uint2* dParticleSystem = reinterpret_cast<uint2*>(&shParticleSystem);
|
||||
|
||||
blockCopy<uint2>(dParticleSystem, sParticleSystem, sizeof(PxgParticleSystem));
|
||||
__syncthreads();
|
||||
|
||||
const PxU32 bufferIndex = blockIdx.y;
|
||||
if (bufferIndex < shParticleSystem.mNumDiffuseBuffers)
|
||||
{
|
||||
|
||||
PxgParticleDiffuseSimBuffer& buffer = shParticleSystem.mDiffuseSimBuffers[bufferIndex];
|
||||
|
||||
int* numDiffuseParticles = buffer.mNumDiffuseParticles;
|
||||
int numDiffuse = numDiffuseParticles[0];
|
||||
|
||||
const PxU32 pi = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const PxU32 threadIndexInWarp = threadIdx.x & 31;
|
||||
|
||||
if (pi >= numDiffuse)
|
||||
return;
|
||||
|
||||
float4* PX_RESTRICT diffusePositionsNew = buffer.mDiffusePositions_LifeTime;
|
||||
float4* PX_RESTRICT diffuseVelocitiesNew = buffer.mDiffuseVelocities;
|
||||
|
||||
float4* PX_RESTRICT velAvgs = reinterpret_cast<float4*>(shParticleSystem.mDiffuseSortedOriginPos_LifeTime);
|
||||
|
||||
float4* PX_RESTRICT diffusePositions = reinterpret_cast<float4*>(shParticleSystem.mDiffuseSortedPos_LifeTime);
|
||||
float4* PX_RESTRICT diffusePositionsOld = reinterpret_cast<float4*>(shParticleSystem.mDiffuseOriginPos_LifeTime);
|
||||
|
||||
const PxU32* reverseLookup = shParticleSystem.mDiffuseUnsortedToSortedMapping;
|
||||
|
||||
const PxU32 index = pi + buffer.mStartIndex;
|
||||
const PxU32 sortedInd = reverseLookup[index];
|
||||
|
||||
// get elements
|
||||
const float4 xi4 = diffusePositions[sortedInd];
|
||||
const float4 vi4Old = diffusePositionsOld[index];
|
||||
const float4 xiva4 = velAvgs[sortedInd];
|
||||
const PxVec3 pos = PxLoad3(xi4);
|
||||
const PxVec3 oldPos = PxLoad3(vi4Old);
|
||||
const PxVec3 velAvg = PxLoad3(xiva4);
|
||||
|
||||
const PxReal lifeDelta = dt;
|
||||
|
||||
PxVec3 vel = (pos - oldPos)*(1.f / dt);
|
||||
|
||||
// integrate diffuse particle
|
||||
PxVec3 newVel;
|
||||
if (xiva4.w < 4.f)
|
||||
{
|
||||
// spray (ballistic)
|
||||
newVel = vel * (1.0f - buffer.mParams.airDrag * dt);
|
||||
}
|
||||
else if (xiva4.w < 8.f)
|
||||
{
|
||||
// foam
|
||||
newVel = velAvg;
|
||||
}
|
||||
else
|
||||
{
|
||||
// bubble
|
||||
newVel = vel - (1.f + buffer.mParams.buoyancy) * gravity * dt + buffer.mParams.bubbleDrag * (velAvg - vel);
|
||||
}
|
||||
|
||||
const float maxVel = shParticleSystem.mData.mMaxVelocity;
|
||||
if (newVel.magnitudeSquared() > 0)
|
||||
{
|
||||
newVel = PxMin(newVel.magnitude(), maxVel) * newVel.getNormalized();
|
||||
}
|
||||
|
||||
PxVec3 newPosCorr = pos + (newVel - vel) * dt;
|
||||
PxVec3 newVelCorr = newVel;
|
||||
|
||||
__syncwarp();
|
||||
|
||||
const PxReal lifeTime = fmaxf(xi4.w - lifeDelta, 0.0f);
|
||||
|
||||
PxU32 res = __ballot_sync(FULL_MASK, lifeTime > 0.f);
|
||||
|
||||
PxU32 offset = 0;
|
||||
|
||||
if (threadIndexInWarp == 0)
|
||||
offset = atomicAdd(&numDiffuseParticles[1], __popc(res));
|
||||
|
||||
offset = __shfl_sync(FULL_MASK, offset, 0);
|
||||
|
||||
|
||||
|
||||
if (lifeTime > 0.f)
|
||||
{
|
||||
PxU32 newIndex = offset + warpScanExclusive(res, threadIndexInWarp);
|
||||
|
||||
diffusePositionsNew[newIndex] = make_float4(newPosCorr.x, newPosCorr.y, newPosCorr.z, lifeTime);
|
||||
diffuseVelocitiesNew[newIndex] = make_float4(newVelCorr.x, newVelCorr.y, newVelCorr.z, 0.0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void ps_diffuseParticleCreate(
|
||||
PxgParticleSystem * PX_RESTRICT particleSystems,
|
||||
const PxU32* const PX_RESTRICT activeParticleSystems,
|
||||
const PxReal* const PX_RESTRICT randomTable,
|
||||
const PxU32 randomTableSize,
|
||||
const PxReal dt)
|
||||
{
|
||||
__shared__ __align__(16) PxU8 particleSystemMemory[sizeof(PxgParticleSystem)];
|
||||
PxgParticleSystem& shParticleSystem = *(reinterpret_cast<PxgParticleSystem*>(particleSystemMemory));
|
||||
|
||||
const PxU32 id = activeParticleSystems[blockIdx.z];
|
||||
const PxgParticleSystem& particleSystem = particleSystems[id];
|
||||
|
||||
const uint2* sParticleSystem = reinterpret_cast<const uint2*>(&particleSystem);
|
||||
uint2* dParticleSystem = reinterpret_cast<uint2*>(&shParticleSystem);
|
||||
|
||||
blockCopy<uint2>(dParticleSystem, sParticleSystem, sizeof(PxgParticleSystem));
|
||||
__syncthreads();
|
||||
|
||||
const PxU32 bufferIndex = blockIdx.y;
|
||||
if (bufferIndex < shParticleSystem.mCommonData.mNumParticleBuffers)
|
||||
{
|
||||
|
||||
const PxgParticleSimBuffer& buffer = shParticleSystem.mParticleSimBuffers[bufferIndex];
|
||||
|
||||
const PxU32 diffuseParticleBufferIndex = buffer.mDiffuseParticleBufferIndex;
|
||||
|
||||
if (diffuseParticleBufferIndex == 0xffffffff)
|
||||
return;
|
||||
|
||||
const PxgParticleSystemData& data = shParticleSystem.mData;
|
||||
|
||||
const PxU32 pi = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
|
||||
const PxU32 numParticles = buffer.mNumActiveParticles;
|
||||
|
||||
if (pi >= numParticles)
|
||||
return;
|
||||
|
||||
|
||||
PxgParticleDiffuseSimBuffer& diffuseBuffer = shParticleSystem.mDiffuseSimBuffers[diffuseParticleBufferIndex];
|
||||
|
||||
if (diffuseBuffer.mMaxNumParticles == 0)
|
||||
return;
|
||||
|
||||
// get arrays
|
||||
const float4* const PX_RESTRICT sortedPose = reinterpret_cast<float4*>(shParticleSystem.mSortedPositions_InvMass);
|
||||
const float4* const PX_RESTRICT sortedVel = reinterpret_cast<float4*>(shParticleSystem.mSortedVelocities);
|
||||
const PxU32* PX_RESTRICT phases = shParticleSystem.mSortedPhaseArray;
|
||||
const float2* const PX_RESTRICT potentials = reinterpret_cast<float2*>(shParticleSystem.mDiffusePotentials);
|
||||
|
||||
float4* PX_RESTRICT diffusePositionsNew = diffuseBuffer.mDiffusePositions_LifeTime;
|
||||
float4* PX_RESTRICT diffuseVelocitiesNew = diffuseBuffer.mDiffuseVelocities;
|
||||
|
||||
int* numDiffuseParticles = diffuseBuffer.mNumDiffuseParticles;
|
||||
|
||||
const PxU32* reverseLookup = shParticleSystem.mUnsortedToSortedMapping;
|
||||
const PxU32 offset = particleSystem.mParticleBufferRunsum[bufferIndex];
|
||||
|
||||
const PxU32 sortedInd = reverseLookup[pi + offset];
|
||||
// get elements
|
||||
const float2 ptnts = potentials[sortedInd];
|
||||
const PxReal threshold = diffuseBuffer.mParams.threshold;
|
||||
const PxU32 phase = phases[sortedInd];
|
||||
|
||||
if (!PxGetFluid(phase))
|
||||
return;
|
||||
|
||||
const float4 vi4 = sortedVel[sortedInd];
|
||||
|
||||
//Kinetic energy + pressure
|
||||
const PxReal kineticEnergy = dot3(vi4, vi4) * diffuseBuffer.mParams.kineticEnergyWeight;
|
||||
const PxReal divergence = diffuseBuffer.mParams.divergenceWeight * ptnts.x;
|
||||
const PxReal pressure = diffuseBuffer.mParams.pressureWeight * ptnts.y;
|
||||
PxReal intensity = pressure - divergence + kineticEnergy;
|
||||
|
||||
//if (pi == 0)
|
||||
// printf("numParticles %i diffuseParticleBufferIndex %i numDiffuseParticles[1] %i threshold %f\n", numParticles, diffuseParticleBufferIndex, numDiffuseParticles[1], threshold);
|
||||
|
||||
const PxReal r0 = randomTable[(sortedInd + 0) % randomTableSize];
|
||||
|
||||
if(r0 * intensity > threshold)
|
||||
{
|
||||
const float4 xi4 = sortedPose[sortedInd];
|
||||
|
||||
|
||||
//for (int i=0; i < 5; ++i)
|
||||
{
|
||||
// try and allocate new diffuse particles
|
||||
const int newIndex = atomicAdd(&numDiffuseParticles[1], 1);
|
||||
|
||||
if (newIndex < diffuseBuffer.mMaxNumParticles)
|
||||
{
|
||||
|
||||
const PxVec3 xi = PxLoad3(xi4);
|
||||
const PxVec3 vi = PxLoad3(vi4);
|
||||
|
||||
const PxReal r1 = randomTable[(sortedInd + 1) % randomTableSize];
|
||||
const PxReal r2 = randomTable[(sortedInd + 2) % randomTableSize];
|
||||
const PxReal r3 = randomTable[(sortedInd + 3) % randomTableSize];
|
||||
|
||||
const PxReal lifeMin = 1.0f;
|
||||
const PxReal lifeMax = diffuseBuffer.mParams.lifetime;
|
||||
const PxReal lifeScale = fminf(intensity / threshold, 1.f) * r1;
|
||||
const PxReal lifetime = lifeMin + lifeScale * (lifeMax - lifeMin);
|
||||
|
||||
const PxVec3 q = xi - r2 * vi * dt + PxVec3(r1, r2, r3) * data.mRestOffset * 0.25f;
|
||||
|
||||
diffusePositionsNew[newIndex] = make_float4(q.x, q.y, q.z, lifetime);
|
||||
diffuseVelocitiesNew[newIndex] = make_float4(vi.x, vi.y, vi.z, 0.0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern "C" __global__ void ps_diffuseParticleCopy(
|
||||
PxgParticleSystem * PX_RESTRICT particleSystems,
|
||||
const PxU32* const PX_RESTRICT activeParticleSystems,
|
||||
const PxU32 count)
|
||||
{
|
||||
const PxU32 id = activeParticleSystems[blockIdx.z];
|
||||
PxgParticleSystem& particleSystem = particleSystems[id];
|
||||
|
||||
const PxU32 numDiffuseBuffers = particleSystem.mNumDiffuseBuffers;
|
||||
|
||||
const PxU32 bufferIndex = blockIdx.y;
|
||||
if (bufferIndex < numDiffuseBuffers)
|
||||
{
|
||||
PxgParticleDiffuseSimBuffer& diffuseBuffer = particleSystem.mDiffuseSimBuffers[bufferIndex];
|
||||
|
||||
int* numDiffuseParticles = diffuseBuffer.mNumDiffuseParticles;
|
||||
const PxU32 numDiffuse = PxMin(PxI32(diffuseBuffer.mMaxNumParticles), numDiffuseParticles[1]);
|
||||
*diffuseBuffer.mNumActiveDiffuseParticles = numDiffuse; //pinned memory
|
||||
numDiffuseParticles[0] = numDiffuse;
|
||||
numDiffuseParticles[1] = 0;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern "C" __global__ void ps_diffuseParticleSum(
|
||||
PxgParticleSystem * PX_RESTRICT particleSystems,
|
||||
const PxU32* const PX_RESTRICT activeParticleSystems,
|
||||
const PxU32 count)
|
||||
{
|
||||
const PxU32 id = activeParticleSystems[blockIdx.x];
|
||||
PxgParticleSystem& particleSystem = particleSystems[id];
|
||||
|
||||
const PxU32 numDiffuseBuffers = particleSystem.mNumDiffuseBuffers;
|
||||
|
||||
PxU32 totalDiffuse = 0;
|
||||
for (PxU32 i = threadIdx.x; i < numDiffuseBuffers; i += WARP_SIZE)
|
||||
{
|
||||
PxgParticleDiffuseSimBuffer& diffuseBuffer = particleSystem.mDiffuseSimBuffers[i];
|
||||
totalDiffuse += diffuseBuffer.mNumDiffuseParticles[0];
|
||||
}
|
||||
|
||||
totalDiffuse = warpReduction<AddOpPxU32, PxU32>(FULL_MASK, totalDiffuse);
|
||||
|
||||
|
||||
if(threadIdx.x == 0)
|
||||
{
|
||||
*particleSystem.mNumDiffuseParticles = totalDiffuse;
|
||||
}
|
||||
}
|
||||
1164
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/isosurfaceExtraction.cu
vendored
Normal file
1164
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/isosurfaceExtraction.cu
vendored
Normal file
File diff suppressed because it is too large
Load Diff
184
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/marchingCubesTables.cuh
vendored
Normal file
184
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/marchingCubesTables.cuh
vendored
Normal file
@@ -0,0 +1,184 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
|
||||
|
||||
|
||||
// point numbering
|
||||
|
||||
// 7-----------6
|
||||
// /| /|
|
||||
// / | / |
|
||||
// / | / |
|
||||
// 4-----------5 |
|
||||
// | | | |
|
||||
// | 3-------|---2
|
||||
// | / | /
|
||||
// | / | /
|
||||
// |/ |/
|
||||
// 0-----------1
|
||||
|
||||
// edge numbering
|
||||
|
||||
// *-----6-----*
|
||||
// /| /|
|
||||
// 7 | 5 |
|
||||
// / 11 / 10
|
||||
// *-----4-----* |
|
||||
// | | | |
|
||||
// | *-----2-|---*
|
||||
// 8 / 9 /
|
||||
// | 3 | 1
|
||||
// |/ |/
|
||||
// *-----0-----*
|
||||
|
||||
|
||||
// z
|
||||
// | y
|
||||
// | /
|
||||
// |/
|
||||
// 0---- x
|
||||
|
||||
__constant__ int marchingCubeCorners[8][3] = { {0,0,0}, {1,0,0},{1,1,0},{0,1,0}, {0,0,1}, {1,0,1},{1,1,1},{0,1,1} };
|
||||
|
||||
__constant__ int firstMarchingCubesId[257] = {
|
||||
0, 0, 3, 6, 12, 15, 21, 27, 36, 39, 45, 51, 60, 66, 75, 84, 90, 93, 99, 105, 114,
|
||||
120, 129, 138, 150, 156, 165, 174, 186, 195, 207, 219, 228, 231, 237, 243, 252, 258, 267, 276, 288,
|
||||
294, 303, 312, 324, 333, 345, 357, 366, 372, 381, 390, 396, 405, 417, 429, 438, 447, 459, 471, 480,
|
||||
492, 507, 522, 528, 531, 537, 543, 552, 558, 567, 576, 588, 594, 603, 612, 624, 633, 645, 657, 666,
|
||||
672, 681, 690, 702, 711, 723, 735, 750, 759, 771, 783, 798, 810, 825, 840, 852, 858, 867, 876, 888,
|
||||
897, 909, 915, 924, 933, 945, 957, 972, 984, 999, 1008, 1014, 1023, 1035, 1047, 1056, 1068, 1083, 1092, 1098,
|
||||
1110, 1125, 1140, 1152, 1167, 1173, 1185, 1188, 1191, 1197, 1203, 1212, 1218, 1227, 1236, 1248, 1254, 1263, 1272, 1284,
|
||||
1293, 1305, 1317, 1326, 1332, 1341, 1350, 1362, 1371, 1383, 1395, 1410, 1419, 1425, 1437, 1446, 1458, 1467, 1482, 1488,
|
||||
1494, 1503, 1512, 1524, 1533, 1545, 1557, 1572, 1581, 1593, 1605, 1620, 1632, 1647, 1662, 1674, 1683, 1695, 1707, 1716,
|
||||
1728, 1743, 1758, 1770, 1782, 1791, 1806, 1812, 1827, 1839, 1845, 1848, 1854, 1863, 1872, 1884, 1893, 1905, 1917, 1932,
|
||||
1941, 1953, 1965, 1980, 1986, 1995, 2004, 2010, 2019, 2031, 2043, 2058, 2070, 2085, 2100, 2106, 2118, 2127, 2142, 2154,
|
||||
2163, 2169, 2181, 2184, 2193, 2205, 2217, 2232, 2244, 2259, 2268, 2280, 2292, 2307, 2322, 2328, 2337, 2349, 2355, 2358,
|
||||
2364, 2373, 2382, 2388, 2397, 2409, 2415, 2418, 2427, 2433, 2445, 2448, 2454, 2457, 2460, 2460 };
|
||||
|
||||
__constant__ int marchingCubesIds[2460] = {
|
||||
0, 8, 3, 0, 1, 9, 1, 8, 3, 9, 8, 1, 1, 2, 10, 0, 8, 3, 1, 2, 10, 9, 2, 10, 0, 2, 9, 2, 8, 3, 2,
|
||||
10, 8, 10, 9, 8, 3, 11, 2, 0, 11, 2, 8, 11, 0, 1, 9, 0, 2, 3, 11, 1, 11, 2, 1, 9, 11, 9, 8, 11, 3,
|
||||
10, 1, 11, 10, 3, 0, 10, 1, 0, 8, 10, 8, 11, 10, 3, 9, 0, 3, 11, 9, 11, 10, 9, 9, 8, 10, 10, 8, 11, 4,
|
||||
7, 8, 4, 3, 0, 7, 3, 4, 0, 1, 9, 8, 4, 7, 4, 1, 9, 4, 7, 1, 7, 3, 1, 1, 2, 10, 8, 4, 7, 3,
|
||||
4, 7, 3, 0, 4, 1, 2, 10, 9, 2, 10, 9, 0, 2, 8, 4, 7, 2, 10, 9, 2, 9, 7, 2, 7, 3, 7, 9, 4, 8,
|
||||
4, 7, 3, 11, 2, 11, 4, 7, 11, 2, 4, 2, 0, 4, 9, 0, 1, 8, 4, 7, 2, 3, 11, 4, 7, 11, 9, 4, 11, 9,
|
||||
11, 2, 9, 2, 1, 3, 10, 1, 3, 11, 10, 7, 8, 4, 1, 11, 10, 1, 4, 11, 1, 0, 4, 7, 11, 4, 4, 7, 8, 9,
|
||||
0, 11, 9, 11, 10, 11, 0, 3, 4, 7, 11, 4, 11, 9, 9, 11, 10, 9, 5, 4, 9, 5, 4, 0, 8, 3, 0, 5, 4, 1,
|
||||
5, 0, 8, 5, 4, 8, 3, 5, 3, 1, 5, 1, 2, 10, 9, 5, 4, 3, 0, 8, 1, 2, 10, 4, 9, 5, 5, 2, 10, 5,
|
||||
4, 2, 4, 0, 2, 2, 10, 5, 3, 2, 5, 3, 5, 4, 3, 4, 8, 9, 5, 4, 2, 3, 11, 0, 11, 2, 0, 8, 11, 4,
|
||||
9, 5, 0, 5, 4, 0, 1, 5, 2, 3, 11, 2, 1, 5, 2, 5, 8, 2, 8, 11, 4, 8, 5, 10, 3, 11, 10, 1, 3, 9,
|
||||
5, 4, 4, 9, 5, 0, 8, 1, 8, 10, 1, 8, 11, 10, 5, 4, 0, 5, 0, 11, 5, 11, 10, 11, 0, 3, 5, 4, 8, 5,
|
||||
8, 10, 10, 8, 11, 9, 7, 8, 5, 7, 9, 9, 3, 0, 9, 5, 3, 5, 7, 3, 0, 7, 8, 0, 1, 7, 1, 5, 7, 1,
|
||||
5, 3, 3, 5, 7, 9, 7, 8, 9, 5, 7, 10, 1, 2, 10, 1, 2, 9, 5, 0, 5, 3, 0, 5, 7, 3, 8, 0, 2, 8,
|
||||
2, 5, 8, 5, 7, 10, 5, 2, 2, 10, 5, 2, 5, 3, 3, 5, 7, 7, 9, 5, 7, 8, 9, 3, 11, 2, 9, 5, 7, 9,
|
||||
7, 2, 9, 2, 0, 2, 7, 11, 2, 3, 11, 0, 1, 8, 1, 7, 8, 1, 5, 7, 11, 2, 1, 11, 1, 7, 7, 1, 5, 9,
|
||||
5, 8, 8, 5, 7, 10, 1, 3, 10, 3, 11, 5, 7, 0, 5, 0, 9, 7, 11, 0, 1, 0, 10, 11, 10, 0, 11, 10, 0, 11,
|
||||
0, 3, 10, 5, 0, 8, 0, 7, 5, 7, 0, 11, 10, 5, 7, 11, 5, 10, 6, 5, 0, 8, 3, 5, 10, 6, 9, 0, 1, 5,
|
||||
10, 6, 1, 8, 3, 1, 9, 8, 5, 10, 6, 1, 6, 5, 2, 6, 1, 1, 6, 5, 1, 2, 6, 3, 0, 8, 9, 6, 5, 9,
|
||||
0, 6, 0, 2, 6, 5, 9, 8, 5, 8, 2, 5, 2, 6, 3, 2, 8, 2, 3, 11, 10, 6, 5, 11, 0, 8, 11, 2, 0, 10,
|
||||
6, 5, 0, 1, 9, 2, 3, 11, 5, 10, 6, 5, 10, 6, 1, 9, 2, 9, 11, 2, 9, 8, 11, 6, 3, 11, 6, 5, 3, 5,
|
||||
1, 3, 0, 8, 11, 0, 11, 5, 0, 5, 1, 5, 11, 6, 3, 11, 6, 0, 3, 6, 0, 6, 5, 0, 5, 9, 6, 5, 9, 6,
|
||||
9, 11, 11, 9, 8, 5, 10, 6, 4, 7, 8, 4, 3, 0, 4, 7, 3, 6, 5, 10, 1, 9, 0, 5, 10, 6, 8, 4, 7, 10,
|
||||
6, 5, 1, 9, 7, 1, 7, 3, 7, 9, 4, 6, 1, 2, 6, 5, 1, 4, 7, 8, 1, 2, 5, 5, 2, 6, 3, 0, 4, 3,
|
||||
4, 7, 8, 4, 7, 9, 0, 5, 0, 6, 5, 0, 2, 6, 7, 3, 9, 7, 9, 4, 3, 2, 9, 5, 9, 6, 2, 6, 9, 3,
|
||||
11, 2, 7, 8, 4, 10, 6, 5, 5, 10, 6, 4, 7, 2, 4, 2, 0, 2, 7, 11, 0, 1, 9, 4, 7, 8, 2, 3, 11, 5,
|
||||
10, 6, 9, 2, 1, 9, 11, 2, 9, 4, 11, 7, 11, 4, 5, 10, 6, 8, 4, 7, 3, 11, 5, 3, 5, 1, 5, 11, 6, 5,
|
||||
1, 11, 5, 11, 6, 1, 0, 11, 7, 11, 4, 0, 4, 11, 0, 5, 9, 0, 6, 5, 0, 3, 6, 11, 6, 3, 8, 4, 7, 6,
|
||||
5, 9, 6, 9, 11, 4, 7, 9, 7, 11, 9, 10, 4, 9, 6, 4, 10, 4, 10, 6, 4, 9, 10, 0, 8, 3, 10, 0, 1, 10,
|
||||
6, 0, 6, 4, 0, 8, 3, 1, 8, 1, 6, 8, 6, 4, 6, 1, 10, 1, 4, 9, 1, 2, 4, 2, 6, 4, 3, 0, 8, 1,
|
||||
2, 9, 2, 4, 9, 2, 6, 4, 0, 2, 4, 4, 2, 6, 8, 3, 2, 8, 2, 4, 4, 2, 6, 10, 4, 9, 10, 6, 4, 11,
|
||||
2, 3, 0, 8, 2, 2, 8, 11, 4, 9, 10, 4, 10, 6, 3, 11, 2, 0, 1, 6, 0, 6, 4, 6, 1, 10, 6, 4, 1, 6,
|
||||
1, 10, 4, 8, 1, 2, 1, 11, 8, 11, 1, 9, 6, 4, 9, 3, 6, 9, 1, 3, 11, 6, 3, 8, 11, 1, 8, 1, 0, 11,
|
||||
6, 1, 9, 1, 4, 6, 4, 1, 3, 11, 6, 3, 6, 0, 0, 6, 4, 6, 4, 8, 11, 6, 8, 7, 10, 6, 7, 8, 10, 8,
|
||||
9, 10, 0, 7, 3, 0, 10, 7, 0, 9, 10, 6, 7, 10, 10, 6, 7, 1, 10, 7, 1, 7, 8, 1, 8, 0, 10, 6, 7, 10,
|
||||
7, 1, 1, 7, 3, 1, 2, 6, 1, 6, 8, 1, 8, 9, 8, 6, 7, 2, 6, 9, 2, 9, 1, 6, 7, 9, 0, 9, 3, 7,
|
||||
3, 9, 7, 8, 0, 7, 0, 6, 6, 0, 2, 7, 3, 2, 6, 7, 2, 2, 3, 11, 10, 6, 8, 10, 8, 9, 8, 6, 7, 2,
|
||||
0, 7, 2, 7, 11, 0, 9, 7, 6, 7, 10, 9, 10, 7, 1, 8, 0, 1, 7, 8, 1, 10, 7, 6, 7, 10, 2, 3, 11, 11,
|
||||
2, 1, 11, 1, 7, 10, 6, 1, 6, 7, 1, 8, 9, 6, 8, 6, 7, 9, 1, 6, 11, 6, 3, 1, 3, 6, 0, 9, 1, 11,
|
||||
6, 7, 7, 8, 0, 7, 0, 6, 3, 11, 0, 11, 6, 0, 7, 11, 6, 7, 6, 11, 3, 0, 8, 11, 7, 6, 0, 1, 9, 11,
|
||||
7, 6, 8, 1, 9, 8, 3, 1, 11, 7, 6, 10, 1, 2, 6, 11, 7, 1, 2, 10, 3, 0, 8, 6, 11, 7, 2, 9, 0, 2,
|
||||
10, 9, 6, 11, 7, 6, 11, 7, 2, 10, 3, 10, 8, 3, 10, 9, 8, 7, 2, 3, 6, 2, 7, 7, 0, 8, 7, 6, 0, 6,
|
||||
2, 0, 2, 7, 6, 2, 3, 7, 0, 1, 9, 1, 6, 2, 1, 8, 6, 1, 9, 8, 8, 7, 6, 10, 7, 6, 10, 1, 7, 1,
|
||||
3, 7, 10, 7, 6, 1, 7, 10, 1, 8, 7, 1, 0, 8, 0, 3, 7, 0, 7, 10, 0, 10, 9, 6, 10, 7, 7, 6, 10, 7,
|
||||
10, 8, 8, 10, 9, 6, 8, 4, 11, 8, 6, 3, 6, 11, 3, 0, 6, 0, 4, 6, 8, 6, 11, 8, 4, 6, 9, 0, 1, 9,
|
||||
4, 6, 9, 6, 3, 9, 3, 1, 11, 3, 6, 6, 8, 4, 6, 11, 8, 2, 10, 1, 1, 2, 10, 3, 0, 11, 0, 6, 11, 0,
|
||||
4, 6, 4, 11, 8, 4, 6, 11, 0, 2, 9, 2, 10, 9, 10, 9, 3, 10, 3, 2, 9, 4, 3, 11, 3, 6, 4, 6, 3, 8,
|
||||
2, 3, 8, 4, 2, 4, 6, 2, 0, 4, 2, 4, 6, 2, 1, 9, 0, 2, 3, 4, 2, 4, 6, 4, 3, 8, 1, 9, 4, 1,
|
||||
4, 2, 2, 4, 6, 8, 1, 3, 8, 6, 1, 8, 4, 6, 6, 10, 1, 10, 1, 0, 10, 0, 6, 6, 0, 4, 4, 6, 3, 4,
|
||||
3, 8, 6, 10, 3, 0, 3, 9, 10, 9, 3, 10, 9, 4, 6, 10, 4, 4, 9, 5, 7, 6, 11, 0, 8, 3, 4, 9, 5, 11,
|
||||
7, 6, 5, 0, 1, 5, 4, 0, 7, 6, 11, 11, 7, 6, 8, 3, 4, 3, 5, 4, 3, 1, 5, 9, 5, 4, 10, 1, 2, 7,
|
||||
6, 11, 6, 11, 7, 1, 2, 10, 0, 8, 3, 4, 9, 5, 7, 6, 11, 5, 4, 10, 4, 2, 10, 4, 0, 2, 3, 4, 8, 3,
|
||||
5, 4, 3, 2, 5, 10, 5, 2, 11, 7, 6, 7, 2, 3, 7, 6, 2, 5, 4, 9, 9, 5, 4, 0, 8, 6, 0, 6, 2, 6,
|
||||
8, 7, 3, 6, 2, 3, 7, 6, 1, 5, 0, 5, 4, 0, 6, 2, 8, 6, 8, 7, 2, 1, 8, 4, 8, 5, 1, 5, 8, 9,
|
||||
5, 4, 10, 1, 6, 1, 7, 6, 1, 3, 7, 1, 6, 10, 1, 7, 6, 1, 0, 7, 8, 7, 0, 9, 5, 4, 4, 0, 10, 4,
|
||||
10, 5, 0, 3, 10, 6, 10, 7, 3, 7, 10, 7, 6, 10, 7, 10, 8, 5, 4, 10, 4, 8, 10, 6, 9, 5, 6, 11, 9, 11,
|
||||
8, 9, 3, 6, 11, 0, 6, 3, 0, 5, 6, 0, 9, 5, 0, 11, 8, 0, 5, 11, 0, 1, 5, 5, 6, 11, 6, 11, 3, 6,
|
||||
3, 5, 5, 3, 1, 1, 2, 10, 9, 5, 11, 9, 11, 8, 11, 5, 6, 0, 11, 3, 0, 6, 11, 0, 9, 6, 5, 6, 9, 1,
|
||||
2, 10, 11, 8, 5, 11, 5, 6, 8, 0, 5, 10, 5, 2, 0, 2, 5, 6, 11, 3, 6, 3, 5, 2, 10, 3, 10, 5, 3, 5,
|
||||
8, 9, 5, 2, 8, 5, 6, 2, 3, 8, 2, 9, 5, 6, 9, 6, 0, 0, 6, 2, 1, 5, 8, 1, 8, 0, 5, 6, 8, 3,
|
||||
8, 2, 6, 2, 8, 1, 5, 6, 2, 1, 6, 1, 3, 6, 1, 6, 10, 3, 8, 6, 5, 6, 9, 8, 9, 6, 10, 1, 0, 10,
|
||||
0, 6, 9, 5, 0, 5, 6, 0, 0, 3, 8, 5, 6, 10, 10, 5, 6, 11, 5, 10, 7, 5, 11, 11, 5, 10, 11, 7, 5, 8,
|
||||
3, 0, 5, 11, 7, 5, 10, 11, 1, 9, 0, 10, 7, 5, 10, 11, 7, 9, 8, 1, 8, 3, 1, 11, 1, 2, 11, 7, 1, 7,
|
||||
5, 1, 0, 8, 3, 1, 2, 7, 1, 7, 5, 7, 2, 11, 9, 7, 5, 9, 2, 7, 9, 0, 2, 2, 11, 7, 7, 5, 2, 7,
|
||||
2, 11, 5, 9, 2, 3, 2, 8, 9, 8, 2, 2, 5, 10, 2, 3, 5, 3, 7, 5, 8, 2, 0, 8, 5, 2, 8, 7, 5, 10,
|
||||
2, 5, 9, 0, 1, 5, 10, 3, 5, 3, 7, 3, 10, 2, 9, 8, 2, 9, 2, 1, 8, 7, 2, 10, 2, 5, 7, 5, 2, 1,
|
||||
3, 5, 3, 7, 5, 0, 8, 7, 0, 7, 1, 1, 7, 5, 9, 0, 3, 9, 3, 5, 5, 3, 7, 9, 8, 7, 5, 9, 7, 5,
|
||||
8, 4, 5, 10, 8, 10, 11, 8, 5, 0, 4, 5, 11, 0, 5, 10, 11, 11, 3, 0, 0, 1, 9, 8, 4, 10, 8, 10, 11, 10,
|
||||
4, 5, 10, 11, 4, 10, 4, 5, 11, 3, 4, 9, 4, 1, 3, 1, 4, 2, 5, 1, 2, 8, 5, 2, 11, 8, 4, 5, 8, 0,
|
||||
4, 11, 0, 11, 3, 4, 5, 11, 2, 11, 1, 5, 1, 11, 0, 2, 5, 0, 5, 9, 2, 11, 5, 4, 5, 8, 11, 8, 5, 9,
|
||||
4, 5, 2, 11, 3, 2, 5, 10, 3, 5, 2, 3, 4, 5, 3, 8, 4, 5, 10, 2, 5, 2, 4, 4, 2, 0, 3, 10, 2, 3,
|
||||
5, 10, 3, 8, 5, 4, 5, 8, 0, 1, 9, 5, 10, 2, 5, 2, 4, 1, 9, 2, 9, 4, 2, 8, 4, 5, 8, 5, 3, 3,
|
||||
5, 1, 0, 4, 5, 1, 0, 5, 8, 4, 5, 8, 5, 3, 9, 0, 5, 0, 3, 5, 9, 4, 5, 4, 11, 7, 4, 9, 11, 9,
|
||||
10, 11, 0, 8, 3, 4, 9, 7, 9, 11, 7, 9, 10, 11, 1, 10, 11, 1, 11, 4, 1, 4, 0, 7, 4, 11, 3, 1, 4, 3,
|
||||
4, 8, 1, 10, 4, 7, 4, 11, 10, 11, 4, 4, 11, 7, 9, 11, 4, 9, 2, 11, 9, 1, 2, 9, 7, 4, 9, 11, 7, 9,
|
||||
1, 11, 2, 11, 1, 0, 8, 3, 11, 7, 4, 11, 4, 2, 2, 4, 0, 11, 7, 4, 11, 4, 2, 8, 3, 4, 3, 2, 4, 2,
|
||||
9, 10, 2, 7, 9, 2, 3, 7, 7, 4, 9, 9, 10, 7, 9, 7, 4, 10, 2, 7, 8, 7, 0, 2, 0, 7, 3, 7, 10, 3,
|
||||
10, 2, 7, 4, 10, 1, 10, 0, 4, 0, 10, 1, 10, 2, 8, 7, 4, 4, 9, 1, 4, 1, 7, 7, 1, 3, 4, 9, 1, 4,
|
||||
1, 7, 0, 8, 1, 8, 7, 1, 4, 0, 3, 7, 4, 3, 4, 8, 7, 9, 10, 8, 10, 11, 8, 3, 0, 9, 3, 9, 11, 11,
|
||||
9, 10, 0, 1, 10, 0, 10, 8, 8, 10, 11, 3, 1, 10, 11, 3, 10, 1, 2, 11, 1, 11, 9, 9, 11, 8, 3, 0, 9, 3,
|
||||
9, 11, 1, 2, 9, 2, 11, 9, 0, 2, 11, 8, 0, 11, 3, 2, 11, 2, 3, 8, 2, 8, 10, 10, 8, 9, 9, 10, 2, 0,
|
||||
9, 2, 2, 3, 8, 2, 8, 10, 0, 1, 8, 1, 10, 8, 1, 10, 2, 1, 3, 8, 9, 1, 8, 0, 9, 1, 0, 3, 8 };
|
||||
|
||||
__constant__ int marchingCubesEdgeLocations[12][4] = {
|
||||
// relative cell coords, edge within cell
|
||||
{0, 0, 0, 0},
|
||||
{1, 0, 0, 1},
|
||||
{0, 1, 0, 0},
|
||||
{0, 0, 0, 1},
|
||||
|
||||
{0, 0, 1, 0},
|
||||
{1, 0, 1, 1},
|
||||
{0, 1, 1, 0},
|
||||
{0, 0, 1, 1},
|
||||
|
||||
{0, 0, 0, 2},
|
||||
{1, 0, 0, 2},
|
||||
{1, 1, 0, 2},
|
||||
{0, 1, 0, 2}
|
||||
};
|
||||
177
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/matrixDecomposition.cuh
vendored
Normal file
177
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/matrixDecomposition.cuh
vendored
Normal file
@@ -0,0 +1,177 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __CU_MATRIXDECOMPOSITION_CUH__
|
||||
#define __CU_MATRIXDECOMPOSITION_CUH__
|
||||
|
||||
#include "foundation/PxMat33.h"
|
||||
|
||||
namespace physx {
|
||||
|
||||
// Eigen decomposition code thanks to Matthias Mueller-Fischer!
|
||||
template<int p, int q, int k> __device__
|
||||
inline void jacobiRotateT(float* A, float* R)
|
||||
{
|
||||
const int pq_index = 3 * q + p;
|
||||
const int qp_index = 3 * p + q;
|
||||
const int pp_index = 3 * p + p;
|
||||
const int qq_index = 3 * q + q;
|
||||
|
||||
// rotates A through phi in pq-plane to set A(p,q) = 0
|
||||
// rotation stored in R whose columns are eigenvectors of A
|
||||
if (A[pq_index] == 0.0f)
|
||||
return;
|
||||
|
||||
float d = __fdividef(A[pp_index] - A[qq_index], 2.0f*A[pq_index]);
|
||||
float dSqPlus1 = d * d + 1.0f;
|
||||
float t = __fdividef(1.0f, fabs(d) + sqrtf(dSqPlus1));
|
||||
t = copysign(t, d);
|
||||
float c = 1.0f * rsqrtf(t*t + 1.0f);
|
||||
float s = t * c;
|
||||
|
||||
A[pp_index] += t * A[pq_index];
|
||||
A[qq_index] -= t * A[pq_index];
|
||||
A[pq_index] = A[qp_index] = 0.0f;
|
||||
|
||||
// transform A
|
||||
const int kp = p * 3 + k;
|
||||
const int kq = q * 3 + k;
|
||||
const int pk = k * 3 + p;
|
||||
const int qk = k * 3 + q;
|
||||
|
||||
float Akp = c * A[kp] + s * A[kq];
|
||||
float Akq = -s * A[kp] + c * A[kq];
|
||||
A[kp] = A[pk] = Akp;
|
||||
A[kq] = A[qk] = Akq;
|
||||
|
||||
// store rotation in R (loop unrolled for k = 0,1,2)
|
||||
// k = 0
|
||||
const int kp0 = p * 3 + 0;
|
||||
const int kq0 = q * 3 + 0;
|
||||
|
||||
float Rkp0 = c * R[kp0] + s * R[kq0];
|
||||
float Rkq0 = -s * R[kp0] + c * R[kq0];
|
||||
R[kp0] = Rkp0;
|
||||
R[kq0] = Rkq0;
|
||||
|
||||
// k = 1
|
||||
const int kp1 = p * 3 + 1;
|
||||
const int kq1 = q * 3 + 1;
|
||||
|
||||
float Rkp1 = c * R[kp1] + s * R[kq1];
|
||||
float Rkq1 = -s * R[kp1] + c * R[kq1];
|
||||
R[kp1] = Rkp1;
|
||||
R[kq1] = Rkq1;
|
||||
|
||||
// k = 2
|
||||
const int kp2 = p * 3 + 2;
|
||||
const int kq2 = q * 3 + 2;
|
||||
|
||||
float Rkp2 = c * R[kp2] + s * R[kq2];
|
||||
float Rkq2 = -s * R[kp2] + c * R[kq2];
|
||||
R[kp2] = Rkp2;
|
||||
R[kq2] = Rkq2;
|
||||
}
|
||||
|
||||
__device__
|
||||
inline void jacobiRotate(PxMat33 &A, PxMat33 &R, int p, int q)
|
||||
{
|
||||
// rotates A through phi in pq-plane to set A(p,q) = 0
|
||||
// rotation stored in R whose columns are eigenvectors of A
|
||||
if (A(p, q) == 0.0f)
|
||||
return;
|
||||
|
||||
float d = (A(p, p) - A(q, q)) / (2.0f*A(p, q));
|
||||
float t = 1.0f / (fabs(d) + sqrtf(d*d + 1.0f));
|
||||
if (d < 0.0f) t = -t;
|
||||
float c = 1.0f / sqrtf(t*t + 1.0f);
|
||||
float s = t * c;
|
||||
A(p, p) += t * A(p, q);
|
||||
A(q, q) -= t * A(p, q);
|
||||
A(p, q) = A(q, p) = 0.0f;
|
||||
|
||||
// transform A
|
||||
int k;
|
||||
for (k = 0; k < 3; k++) {
|
||||
if (k != p && k != q) {
|
||||
float Akp = c * A(k, p) + s * A(k, q);
|
||||
float Akq = -s * A(k, p) + c * A(k, q);
|
||||
A(k, p) = A(p, k) = Akp;
|
||||
A(k, q) = A(q, k) = Akq;
|
||||
}
|
||||
}
|
||||
|
||||
// store rotation in R
|
||||
for (k = 0; k < 3; k++) {
|
||||
float Rkp = c * R(k, p) + s * R(k, q);
|
||||
float Rkq = -s * R(k, p) + c * R(k, q);
|
||||
R(k, p) = Rkp;
|
||||
R(k, q) = Rkq;
|
||||
}
|
||||
}
|
||||
|
||||
__device__
|
||||
inline void eigenDecomposition(PxMat33 &A, PxMat33 &R, int numJacobiIterations = 4)
|
||||
{
|
||||
const float epsilon = 1e-15f;
|
||||
|
||||
// only for symmetric matrices!
|
||||
R = PxMat33(PxVec3(1.f, 0.f, 0.f), PxVec3(0.f, 1.f, 0.f), PxVec3(0.f, 0.f, 1.f));
|
||||
|
||||
float* fA = static_cast<float*>(&A(0, 0));
|
||||
float* fR = static_cast<float*>(&R(0, 0));
|
||||
|
||||
#define USE_FAST_JACOBI 1
|
||||
|
||||
for (int i = 0; i < numJacobiIterations; i++)
|
||||
{// 3 off diagonal elements
|
||||
// find off diagonal element with maximum modulus
|
||||
int j = 0;
|
||||
float max = fabs(A(0, 1));
|
||||
float a = fabs(A(0, 2));
|
||||
if (a > max) { j = 1; max = a; }
|
||||
a = fabs(A(1, 2));
|
||||
if (a > max) { j = 2; max = a; }
|
||||
|
||||
// all small enough -> done
|
||||
if (max < epsilon) break;
|
||||
|
||||
#if USE_FAST_JACOBI
|
||||
// rotate matrix with respect to that element
|
||||
if (j == 0) jacobiRotateT<0, 1, 2>(fA, fR);
|
||||
else if (j == 1) jacobiRotateT<0, 2, 1>(fA, fR);
|
||||
else jacobiRotateT<1, 2, 0>(fA, fR);
|
||||
#else
|
||||
jacobiRotate(A, R, p, q);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // __CU_MATRIXDECOMPOSITION_CUH__
|
||||
121
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/particleSystem.cuh
vendored
Normal file
121
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/particleSystem.cuh
vendored
Normal file
@@ -0,0 +1,121 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __PARTICLE_SYSTEM_CUH__
|
||||
#define __PARTICLE_SYSTEM_CUH__
|
||||
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
#include "utils.cuh"
|
||||
#include "reduction.cuh"
|
||||
#include "PxParticleGpu.h"
|
||||
#include "PxSparseGridParams.h"
|
||||
#include "PxgParticleSystem.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
__device__ inline PxVec3 getSubgridDomainSize(const PxSparseGridParams& params)
|
||||
{
|
||||
const PxReal dx = params.gridSpacing;
|
||||
return PxVec3(dx * (params.subgridSizeX - 2 * params.haloSize), dx * (params.subgridSizeY - 2 * params.haloSize), dx * (params.subgridSizeZ - 2 * params.haloSize));
|
||||
}
|
||||
|
||||
__device__ inline bool tryFindSubgridHashkey(
|
||||
const PxU32* const PX_RESTRICT sortedHashkey,
|
||||
const PxU32 numSubgrids,
|
||||
const PxU32 hashToFind,
|
||||
PxU32& result)
|
||||
{
|
||||
result = binarySearch(sortedHashkey, numSubgrids, hashToFind);
|
||||
return sortedHashkey[result] == hashToFind;
|
||||
}
|
||||
|
||||
__device__ inline float sqr(PxReal x) { return x * x; }
|
||||
|
||||
__device__ inline float W(const PxReal x, const PxReal kSpiky1, const PxReal kInvRadius)
|
||||
{
|
||||
return kSpiky1 * sqr(1.0f - x * kInvRadius);
|
||||
}
|
||||
|
||||
__device__ inline float dWdx(const PxReal x, const PxReal kSpiky2, const PxReal kInvRadius)
|
||||
{
|
||||
return -kSpiky2 * (1.0f - x * kInvRadius);
|
||||
}
|
||||
|
||||
// aerodynamics model in Frozen
|
||||
PX_FORCE_INLINE __device__ PxVec3 disneyWindModelEffect(const PxVec3& x0, const PxVec3& x1, const PxVec3& x2, const PxVec3& vel0, const PxVec3& vel1, const PxVec3& vel2,
|
||||
const PxVec3& wind, PxReal inverseMass, PxReal drag, PxReal lift, PxReal dt, PxReal airDensity)
|
||||
{
|
||||
const PxVec3 x01 = x1 - x0;
|
||||
const PxVec3 x02 = x2 - x0;
|
||||
PxVec3 n = x01.cross(x02);
|
||||
|
||||
// airDensity: 1.225 kg / m3, reference: https://en.wikipedia.org/wiki/Density_of_air
|
||||
const PxVec3 v = (vel0 + vel1 + vel2) * 0.3333f;
|
||||
const PxVec3 vrel = wind - v;
|
||||
|
||||
if(vrel.dot(n) < 0.f)
|
||||
{
|
||||
n *= -1.f;
|
||||
}
|
||||
|
||||
// option 1. using current (deformed) triangle area
|
||||
const PxReal coef = 0.25f * airDensity * dt * inverseMass;
|
||||
|
||||
//// optoin 2. using rest (undeformed) triangle area
|
||||
// const PxReal area = femCloth.mTriangleAreas[triIndex];
|
||||
// n.normalize();
|
||||
// const PxReal coef = 0.5 * shFEMCloth.mAirDensity * area * dt * inverseMass;
|
||||
|
||||
return coef * ((drag - lift) * vrel.dot(n) * vrel + lift * vrel.magnitudeSquared() * n);
|
||||
}
|
||||
|
||||
// finds the bufferIndex for a given UniqueID.
|
||||
static __device__ PxU32 findBufferIndexFromUniqueId(const PxgParticleSystem& particleSystem, PxU32 uniqueBufferId)
|
||||
{
|
||||
const PxU32 length = particleSystem.mCommonData.mNumParticleBuffers;
|
||||
if (length == 0)
|
||||
return 0;
|
||||
|
||||
const PxU32* values = particleSystem.mParticleBufferSortedUniqueIds;
|
||||
|
||||
PxU32 l = 0, r = length;
|
||||
while (l < r)
|
||||
{
|
||||
PxU32 m = (l + r) / 2;
|
||||
if (values[m] > uniqueBufferId)
|
||||
r = m;
|
||||
else
|
||||
l = m + 1;
|
||||
}
|
||||
return particleSystem.mParticleBufferSortedUniqueIdsOriginalIndex[r - 1];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
5866
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/particlesystem.cu
vendored
Normal file
5866
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/particlesystem.cu
vendored
Normal file
File diff suppressed because it is too large
Load Diff
875
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/rigidDeltaAccum.cu
vendored
Normal file
875
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/rigidDeltaAccum.cu
vendored
Normal file
@@ -0,0 +1,875 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "vector_types.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "cutil_math.h"
|
||||
#include "PxgParticleSystemCoreKernelIndices.h"
|
||||
#include "reduction.cuh"
|
||||
#include "shuffle.cuh"
|
||||
#include "PxgSolverCoreDesc.h"
|
||||
#include "PxNodeIndex.h"
|
||||
#include "assert.h"
|
||||
#include "PxgSimulationCoreDesc.h"
|
||||
#include "PxgArticulationCoreDesc.h"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
extern "C" __host__ void initSimulationControllerKernels2() {}
|
||||
|
||||
/*
|
||||
* This kernel takes a *sorted* list of PxNodeIndex (of size *numContacts),
|
||||
* and a corresponding deltaV list.
|
||||
*
|
||||
* deltaV is updated with the cumulativ delta values, such that for each last entry of a rigid body deltaV is the total sum
|
||||
* for that rigid body - *however* only for the rigid body entries which are processed within one block.
|
||||
* blockRigidId, blockDeltaV are updated, such that entry represents the deltaV sum and rigid body ID, of the last
|
||||
* occuring rigid body in that corresponding block. This can then be used in the subsequent kernel to complete the sum
|
||||
* for rigid bodies, which entries are overlapping a block.
|
||||
*/
|
||||
extern "C" __global__ void accumulateDeltaVRigidFirstLaunch(
|
||||
const PxU64* sortedRigidIds, //input
|
||||
const PxU32* numContacts, //input
|
||||
float4* deltaV, //input/output
|
||||
float4* blockDeltaV, //output
|
||||
PxU64* blockRigidId //output
|
||||
)
|
||||
{
|
||||
__shared__ PxU64 sRigidId[PxgParticleSystemKernelBlockDim::ACCUMULATE_DELTA + 1];
|
||||
|
||||
//numWarpsPerBlock can't be larger than 32
|
||||
const PxU32 numWarpsPerBlock = PxgParticleSystemKernelBlockDim::ACCUMULATE_DELTA / WARP_SIZE;
|
||||
|
||||
__shared__ float4 sLinWarpAccumulator[WARP_SIZE];
|
||||
__shared__ float4 sAngWarpAccumulator[WARP_SIZE];
|
||||
__shared__ PxU64 sWarpRigidId[WARP_SIZE];
|
||||
__shared__ float4 sLinBlockAccumulator;
|
||||
__shared__ float4 sAngBlockAccumulator;
|
||||
__shared__ PxU64 sBlockRigidId;
|
||||
|
||||
const PxU32 tNumContacts = *numContacts;
|
||||
const PxU32 nbBlocksRequired = (tNumContacts + blockDim.x - 1) / blockDim.x;
|
||||
const PxU32 nbIterationsPerBlock = (nbBlocksRequired + gridDim.x - 1) / gridDim.x;
|
||||
const PxU32 threadIndexInWarp = threadIdx.x & (WARP_SIZE - 1);
|
||||
const PxU32 warpIndex = threadIdx.x / WARP_SIZE;
|
||||
|
||||
if (threadIdx.x < 4)
|
||||
{
|
||||
float* tLinBlockAccumulator = reinterpret_cast<float*>(&sLinBlockAccumulator.x);
|
||||
tLinBlockAccumulator[threadIdx.x] = 0.f;
|
||||
|
||||
float* tAngBlockAccumulator = reinterpret_cast<float*>(&sAngBlockAccumulator.x);
|
||||
tAngBlockAccumulator[threadIdx.x] = 0.f;
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
sBlockRigidId = 0x8fffffffffffffff;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
for (PxU32 i = 0; i < nbIterationsPerBlock; ++i)
|
||||
{
|
||||
const PxU32 workIndex = blockDim.x*(blockIdx.x*nbIterationsPerBlock + i) + threadIdx.x;
|
||||
|
||||
PxU64 rigidId = 0x8fffffffffffffff;
|
||||
float4 linDeltaV = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
float4 angDeltaV = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
if (workIndex < tNumContacts)
|
||||
{
|
||||
rigidId = sortedRigidIds[workIndex];
|
||||
sRigidId[threadIdx.x] = rigidId;
|
||||
linDeltaV = deltaV[workIndex];
|
||||
angDeltaV = deltaV[workIndex + tNumContacts];
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
for (PxU32 reductionRadius = 1; reductionRadius < WARP_SIZE; reductionRadius <<= 1)
|
||||
{
|
||||
const PxU32 lane = threadIndexInWarp - reductionRadius;
|
||||
|
||||
float4 linVal = shuffle(FULL_MASK, linDeltaV, lane);
|
||||
float4 angVal = shuffle(FULL_MASK, angDeltaV, lane);
|
||||
|
||||
//workIndex < tNumContacts guarantees that sRigidId[WARP_SIZE * warpIndex + lane]
|
||||
//always points to initialized memory, since lane is always smaller than threadIndexInWarp.
|
||||
if (threadIndexInWarp >= reductionRadius && workIndex < tNumContacts &&
|
||||
rigidId == sRigidId[WARP_SIZE * warpIndex + lane])
|
||||
{
|
||||
linDeltaV += linVal;
|
||||
angDeltaV += angVal;
|
||||
}
|
||||
}
|
||||
|
||||
if (threadIndexInWarp == (WARP_SIZE - 1))
|
||||
{
|
||||
sLinWarpAccumulator[warpIndex] = linDeltaV;
|
||||
sAngWarpAccumulator[warpIndex] = angDeltaV;
|
||||
sWarpRigidId[warpIndex] = rigidId;
|
||||
}
|
||||
|
||||
const float4 prevLinBlockAccumulator = sLinBlockAccumulator;
|
||||
const float4 prevAngBlockAccumulator = sAngBlockAccumulator;
|
||||
const PxU64 prevBlockRigidId = sBlockRigidId;
|
||||
|
||||
//Don't allow write until we've finished all reading...
|
||||
__syncthreads();
|
||||
|
||||
if (warpIndex == 0)
|
||||
{
|
||||
float4 linDeltaV = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
float4 angDeltaV = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
|
||||
PxU64 warpRigidId = 0x8fffffffffffffff;
|
||||
if (threadIndexInWarp < numWarpsPerBlock)
|
||||
{
|
||||
linDeltaV = sLinWarpAccumulator[threadIndexInWarp];
|
||||
angDeltaV = sAngWarpAccumulator[threadIndexInWarp];
|
||||
warpRigidId = sWarpRigidId[threadIndexInWarp];
|
||||
}
|
||||
|
||||
float4 tLinDeltaV = linDeltaV;
|
||||
float4 tAngDeltaV = angDeltaV;
|
||||
|
||||
for (PxU32 reductionRadius = 1; reductionRadius < numWarpsPerBlock; reductionRadius <<= 1)
|
||||
{
|
||||
const PxU32 lane = threadIndexInWarp - reductionRadius;
|
||||
float4 linVal = shuffle(FULL_MASK, tLinDeltaV, lane);
|
||||
float4 angVal = shuffle(FULL_MASK, tAngDeltaV, lane);
|
||||
|
||||
if (threadIndexInWarp >= reductionRadius && warpRigidId == sWarpRigidId[lane])
|
||||
{
|
||||
tLinDeltaV += linVal;
|
||||
tAngDeltaV += angVal;
|
||||
}
|
||||
}
|
||||
|
||||
if (threadIndexInWarp == (numWarpsPerBlock - 1))
|
||||
{
|
||||
if (sBlockRigidId != warpRigidId)
|
||||
{
|
||||
//need to clear block accumulators in case previous iteration
|
||||
//stored other sBlockRigidId
|
||||
sLinBlockAccumulator = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
sAngBlockAccumulator = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
}
|
||||
sLinBlockAccumulator += tLinDeltaV;
|
||||
sAngBlockAccumulator += tAngDeltaV;
|
||||
sBlockRigidId = warpRigidId;
|
||||
}
|
||||
|
||||
sLinWarpAccumulator[threadIndexInWarp] = tLinDeltaV;
|
||||
sAngWarpAccumulator[threadIndexInWarp] = tAngDeltaV;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
if (workIndex < tNumContacts)
|
||||
{
|
||||
float4 accumLin = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
float4 accumAng = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
|
||||
//if rigidId and the previous element rigid Id is the same, we need to add the previous warp accumulate velocity to
|
||||
//the current rigid body
|
||||
if (warpIndex > 0 && rigidId == sWarpRigidId[warpIndex - 1])
|
||||
{
|
||||
accumLin = sLinWarpAccumulator[warpIndex - 1];
|
||||
accumAng = sAngWarpAccumulator[warpIndex - 1];
|
||||
}
|
||||
|
||||
if (i != 0 && rigidId == prevBlockRigidId)
|
||||
{
|
||||
accumLin += prevLinBlockAccumulator;
|
||||
accumAng += prevAngBlockAccumulator;
|
||||
}
|
||||
|
||||
//Now output both offsets...
|
||||
deltaV[workIndex] = linDeltaV + accumLin;
|
||||
deltaV[workIndex + tNumContacts] = angDeltaV + accumAng;
|
||||
}
|
||||
}
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
blockDeltaV[blockIdx.x] = sLinBlockAccumulator;
|
||||
blockDeltaV[blockIdx.x + PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA] = sAngBlockAccumulator;
|
||||
blockRigidId[blockIdx.x] = sBlockRigidId;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//32 blocks. Each block compute the exclusive ransum for the blockOffset
|
||||
extern "C" __global__ void accumulateDeltaVRigidSecondLaunch(
|
||||
const PxU64* sortedRigidIds, //input
|
||||
const PxU32* numContacts, //input
|
||||
const float4* deltaV, //input
|
||||
const float4* blockDeltaV, //input
|
||||
const PxU64* blockRigidId, //input
|
||||
PxgPrePrepDesc* prePrepDesc,
|
||||
PxgSolverCoreDesc* solverCoreDesc,
|
||||
PxgArticulationCoreDesc* artiCoreDesc,
|
||||
PxgSolverSharedDesc<IterativeSolveData>* sharedDesc,
|
||||
const bool isTGS
|
||||
)
|
||||
{
|
||||
__shared__ float4 sBlockLinDeltaV[PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA];
|
||||
__shared__ float4 sBlockAngDeltaV[PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA];
|
||||
__shared__ PxU64 sBlockRigidId[PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA];
|
||||
__shared__ PxU64 sRigidId[PxgParticleSystemKernelBlockDim::ACCUMULATE_DELTA + 1];
|
||||
|
||||
const PxU32 tNumContacts = *numContacts;
|
||||
const PxU32 nbBlocksRequired = (tNumContacts + blockDim.x - 1) / blockDim.x;
|
||||
const PxU32 nbIterationsPerBlock = (nbBlocksRequired + gridDim.x - 1) / gridDim.x;
|
||||
const PxU32 threadIndexInWarp = threadIdx.x & (WARP_SIZE - 1);
|
||||
|
||||
float4 linBlockDeltaV = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
float4 angBlockDeltaV = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
|
||||
PxU64 tBlockRigidId = 0x8fffffffffffffff;
|
||||
if (threadIdx.x < PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA)
|
||||
{
|
||||
linBlockDeltaV = blockDeltaV[threadIdx.x];
|
||||
angBlockDeltaV = blockDeltaV[threadIdx.x + PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA];
|
||||
tBlockRigidId = blockRigidId[threadIdx.x];
|
||||
|
||||
sBlockLinDeltaV[threadIdx.x] = linBlockDeltaV;
|
||||
sBlockAngDeltaV[threadIdx.x] = angBlockDeltaV;
|
||||
sBlockRigidId[threadIdx.x] = tBlockRigidId;
|
||||
}
|
||||
|
||||
__syncthreads(); //sBlockRigidId is written above and read below
|
||||
|
||||
float4 tLinDeltaV = linBlockDeltaV;
|
||||
float4 tAngDeltaV = angBlockDeltaV;
|
||||
//add on block deltaV if blockRigid id match
|
||||
for (PxU32 reductionRadius = 1; reductionRadius < PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA; reductionRadius <<= 1)
|
||||
{
|
||||
const PxU32 lane = threadIndexInWarp - reductionRadius;
|
||||
float4 linVal = shuffle(FULL_MASK, tLinDeltaV, lane);
|
||||
float4 angVal = shuffle(FULL_MASK, tAngDeltaV, lane);
|
||||
|
||||
if (threadIndexInWarp >= reductionRadius && tBlockRigidId == sBlockRigidId[lane])
|
||||
{
|
||||
tLinDeltaV += linVal;
|
||||
tAngDeltaV += angVal;
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads(); //sBlockRigidId is read above and written below
|
||||
|
||||
if (threadIdx.x < PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA)
|
||||
{
|
||||
sBlockLinDeltaV[threadIdx.x] = tLinDeltaV;
|
||||
sBlockAngDeltaV[threadIdx.x] = tAngDeltaV;
|
||||
sBlockRigidId[threadIdx.x] = blockRigidId[threadIdx.x];
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
float4* solverBodyDeltaVel = sharedDesc->iterativeData.solverBodyVelPool + solverCoreDesc->accumulatedBodyDeltaVOffset;
|
||||
//float4* initialVel = solverCoreDesc->outSolverVelocity;
|
||||
const PxU32 numSolverBodies = solverCoreDesc->numSolverBodies;
|
||||
|
||||
PxgArticulationBlockData* artiData = artiCoreDesc->mArticulationBlocks;
|
||||
PxgArticulationBlockLinkData* artiLinkData = artiCoreDesc->mArticulationLinkBlocks;
|
||||
|
||||
const PxU32 maxLinks = artiCoreDesc->mMaxLinksPerArticulation;
|
||||
|
||||
for (PxU32 i = 0; i < nbIterationsPerBlock; ++i)
|
||||
{
|
||||
__syncthreads(); //sRigidId is read and written in the same loop - read and write must be separated by syncs
|
||||
|
||||
const PxU32 workIndex = blockDim.x * (blockIdx.x * nbIterationsPerBlock + i) + threadIdx.x;
|
||||
|
||||
PxU64 rigidId = 0x8fffffffffffffff;
|
||||
if (workIndex < tNumContacts)
|
||||
{
|
||||
rigidId = sortedRigidIds[workIndex];
|
||||
if (threadIdx.x > 0)
|
||||
sRigidId[threadIdx.x - 1] = rigidId;
|
||||
|
||||
if (workIndex == tNumContacts - 1)
|
||||
{
|
||||
sRigidId[threadIdx.x] = 0x8fffffffffffffff;
|
||||
}
|
||||
else if (threadIdx.x == PxgParticleSystemKernelBlockDim::ACCUMULATE_DELTA - 1)
|
||||
{
|
||||
// first thread in block must load neighbor particle
|
||||
sRigidId[threadIdx.x] = sortedRigidIds[workIndex + 1];
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (workIndex < tNumContacts)
|
||||
{
|
||||
float4 accumLin = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
float4 accumAng = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
|
||||
if (rigidId != sRigidId[threadIdx.x])
|
||||
{
|
||||
float4 linVel = deltaV[workIndex];
|
||||
float4 angVel = deltaV[workIndex + tNumContacts];
|
||||
|
||||
PxU64 preBlockRigidId = blockIdx.x > 0 ? sBlockRigidId[blockIdx.x - 1] : 0x8fffffffffffffff;
|
||||
|
||||
if (rigidId == preBlockRigidId)
|
||||
{
|
||||
linVel += sBlockLinDeltaV[blockIdx.x - 1];
|
||||
angVel += sBlockAngDeltaV[blockIdx.x - 1];
|
||||
}
|
||||
|
||||
//nodeIndex
|
||||
const PxNodeIndex nodeId = reinterpret_cast<PxNodeIndex&>(rigidId);
|
||||
|
||||
PxU32 solverBodyIndex = 0;
|
||||
|
||||
if (!nodeId.isStaticBody())
|
||||
{
|
||||
PxU32 nodeIndex = nodeId.index();
|
||||
solverBodyIndex = prePrepDesc->solverBodyIndices[nodeIndex];
|
||||
|
||||
if (nodeId.isArticulation())
|
||||
{
|
||||
//solverBodyIndex is the globalThreadIndex for the active articulation in the block format
|
||||
const PxU32 blockIndex = solverBodyIndex / WARP_SIZE;
|
||||
|
||||
PxgArticulationBlockData& articulation = artiData[blockIndex];
|
||||
PxgArticulationBlockLinkData* artiLinks = &artiLinkData[blockIndex * maxLinks];
|
||||
|
||||
|
||||
const PxU32 artiIndexInBlock = solverBodyIndex % WARP_SIZE;
|
||||
|
||||
articulation.mStateDirty[artiIndexInBlock] = PxgArtiStateDirtyFlag::eHAS_IMPULSES;
|
||||
|
||||
const PxU32 linkID = nodeId.articulationLinkId();
|
||||
|
||||
const PxReal denom = PxMax(1.0f, linVel.w);
|
||||
PxReal ratio = 1.f / denom;
|
||||
linVel.w = 0.f;
|
||||
|
||||
//for articulation, linVel and angVel accumulate impulse
|
||||
Cm::UnAlignedSpatialVector impulse;
|
||||
impulse.top = PxVec3(linVel.x, linVel.y, linVel.z );
|
||||
impulse.bottom = PxVec3(angVel.x, angVel.y, angVel.z);
|
||||
|
||||
impulse.top *= ratio;
|
||||
impulse.bottom *= ratio;
|
||||
|
||||
/*printf("blockIndex %i artiIndexInBlock %i linkID %i ratio %f impulse linear(%f, %f, %f) angular(%f, %f, %f)\n", blockIndex, artiIndexInBlock, linkID, ratio,
|
||||
impulse.top.x, impulse.top.y, impulse.top.z, impulse.bottom.x, impulse.bottom.y, impulse.bottom.z);*/
|
||||
|
||||
storeSpatialVector(artiLinks[linkID].mScratchImpulse, -impulse, artiIndexInBlock);
|
||||
}
|
||||
else
|
||||
{
|
||||
float4 linearVelocity = solverBodyDeltaVel[solverBodyIndex];
|
||||
float4 angularVelocity = solverBodyDeltaVel[solverBodyIndex + numSolverBodies];
|
||||
|
||||
const PxReal denom = PxMax(1.0f, linVel.w);
|
||||
PxReal ratio = 1.f / denom;
|
||||
linVel.w = 0.f;
|
||||
|
||||
if (isTGS)
|
||||
{
|
||||
linearVelocity.x += linVel.x * ratio;
|
||||
linearVelocity.y += linVel.y * ratio;
|
||||
linearVelocity.z += linVel.z * ratio;
|
||||
linearVelocity.w += angVel.x * ratio;
|
||||
angularVelocity.x += angVel.y * ratio;
|
||||
angularVelocity.y += angVel.z * ratio;
|
||||
//The rest is the delta position buffer
|
||||
}
|
||||
else
|
||||
{
|
||||
/*assert(PxIsFinite(linVel.x)); assert(PxIsFinite(linVel.y)); assert(PxIsFinite(linVel.z));
|
||||
assert(PxIsFinite(angVel.x)); assert(PxIsFinite(angVel.y)); assert(PxIsFinite(angVel.z));
|
||||
assert(PxIsFinite(ratio));*/
|
||||
|
||||
/*printf("Accum linVelDelta = (%f, %f, %f, %f), angVelDelta = (%f, %f, %f, %f), ratio = %f, denom = %f, globalRelax = %f\n",
|
||||
linVel.x, linVel.y, linVel.z, linVel.w, angVel.x, angVel.y, angVel.z, angVel.w, ratio, denom, globalRelaxationCoefficient);*/
|
||||
|
||||
linearVelocity += linVel * ratio;
|
||||
angularVelocity += angVel * ratio;
|
||||
}
|
||||
|
||||
solverBodyDeltaVel[solverBodyIndex] = linearVelocity;
|
||||
solverBodyDeltaVel[solverBodyIndex + numSolverBodies] = angularVelocity;
|
||||
|
||||
}
|
||||
//printf("solverBodyIndex %i\n", solverBodyIndex);
|
||||
//printf("linearVelocity(%f, %f, %f, %f)\n", linearVelocity.x, linearVelocity.y, linearVelocity.z, linearVelocity.w);
|
||||
//printf("angularVelocity(%f, %f, %f, %f)\n", angularVelocity.x, angularVelocity.y, angularVelocity.z, angularVelocity.w);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//32 blocks. Each block compute the exclusive ransum for the blockOffset
|
||||
extern "C" __global__ void clearDeltaVRigidSecondLaunchMulti(
|
||||
PxU64* sortedRigidIds, //input
|
||||
PxU32* numContacts, //input
|
||||
PxgPrePrepDesc* prePrepDesc,
|
||||
PxgSolverCoreDesc* solverCoreDesc,
|
||||
PxgArticulationCoreDesc* artiCoreDesc,
|
||||
PxReal* tempDenom
|
||||
)
|
||||
{
|
||||
__shared__ PxU64 sRigidId[PxgParticleSystemKernelBlockDim::ACCUMULATE_DELTA + 1];
|
||||
|
||||
const PxU32 tNumContacts = *numContacts;
|
||||
const PxU32 idx = threadIdx.x;
|
||||
|
||||
const PxU32 totalBlockRequired = (tNumContacts + (PxgParticleSystemKernelBlockDim::ACCUMULATE_DELTA - 1)) / PxgParticleSystemKernelBlockDim::ACCUMULATE_DELTA;
|
||||
const PxU32 numIterationPerBlock = (totalBlockRequired + (PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA - 1)) / PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA;
|
||||
|
||||
PxgArticulationBlockLinkData* artiLinkData = artiCoreDesc->mArticulationLinkBlocks;
|
||||
|
||||
const PxU32 maxLinks = artiCoreDesc->mMaxLinksPerArticulation;
|
||||
|
||||
for (PxU32 i = 0; i < numIterationPerBlock; ++i)
|
||||
{
|
||||
const PxU32 workIndex = i * PxgParticleSystemKernelBlockDim::ACCUMULATE_DELTA + idx + numIterationPerBlock * blockIdx.x * blockDim.x;
|
||||
|
||||
PxU64 rigidId = 0x8fffffffffffffff;
|
||||
if (workIndex < tNumContacts)
|
||||
{
|
||||
rigidId = sortedRigidIds[workIndex];
|
||||
if (idx > 0)
|
||||
sRigidId[idx - 1] = rigidId;
|
||||
|
||||
if (workIndex == tNumContacts - 1)
|
||||
{
|
||||
sRigidId[idx] = 0x8fffffffffffffff;
|
||||
}
|
||||
else if (threadIdx.x == PxgParticleSystemKernelBlockDim::ACCUMULATE_DELTA - 1)
|
||||
{
|
||||
// first thread in block must load neighbor particle
|
||||
sRigidId[idx] = sortedRigidIds[workIndex + 1];
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (workIndex < tNumContacts)
|
||||
{
|
||||
if (rigidId != sRigidId[idx])
|
||||
{
|
||||
|
||||
//nodeIndex
|
||||
const PxNodeIndex nodeId = reinterpret_cast<PxNodeIndex&>(rigidId);
|
||||
|
||||
PxU32 solverBodyIndex = 0;
|
||||
|
||||
if (!nodeId.isStaticBody())
|
||||
{
|
||||
PxU32 nodeIndex = nodeId.index();
|
||||
solverBodyIndex = prePrepDesc->solverBodyIndices[nodeIndex];
|
||||
|
||||
if (nodeId.isArticulation())
|
||||
{
|
||||
//solverBodyIndex is the globalThreadIndex for the active articulation in the block format
|
||||
const PxU32 blockIndex = solverBodyIndex / WARP_SIZE;
|
||||
|
||||
PxgArticulationBlockLinkData* artiLinks = &artiLinkData[blockIndex * maxLinks];
|
||||
|
||||
const PxU32 artiIndexInBlock = solverBodyIndex % WARP_SIZE;
|
||||
|
||||
const PxU32 linkID = nodeId.articulationLinkId();
|
||||
|
||||
artiLinks[linkID].mDeltaScale[artiIndexInBlock] = 0.f;
|
||||
}
|
||||
else
|
||||
{
|
||||
tempDenom[solverBodyIndex] = 0.f;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//32 blocks. Each block compute the exclusive ransum for the blockOffset
|
||||
extern "C" __global__ void accumulateDeltaVRigidSecondLaunchMultiStage1(
|
||||
PxU64* sortedRigidIds, //input
|
||||
PxU32* numContacts, //input
|
||||
float4* deltaV, //input
|
||||
float4* blockDeltaV, //input
|
||||
PxU64* blockRigidId, //input
|
||||
PxgPrePrepDesc* prePrepDesc,
|
||||
PxgSolverCoreDesc* solverCoreDesc,
|
||||
PxgArticulationCoreDesc* artiCoreDesc,
|
||||
PxgSolverSharedDesc<IterativeSolveData>* sharedDesc,
|
||||
PxReal* tempDenom,
|
||||
const bool useLocalRelax,
|
||||
const float globalRelaxationCoefficient,
|
||||
bool isTGS
|
||||
)
|
||||
{
|
||||
__shared__ PxReal sBlockLinDeltaVW[PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA];
|
||||
__shared__ PxU64 sBlockRigidId[PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA];
|
||||
__shared__ PxU64 sRigidId[PxgParticleSystemKernelBlockDim::ACCUMULATE_DELTA + 1];
|
||||
|
||||
const PxU32 tNumContacts = *numContacts;
|
||||
const PxU32 idx = threadIdx.x;
|
||||
|
||||
const PxU32 threadIndexInWarp = idx & (WARP_SIZE - 1);
|
||||
|
||||
const PxU32 totalBlockRequired = (tNumContacts + (PxgParticleSystemKernelBlockDim::ACCUMULATE_DELTA - 1)) / PxgParticleSystemKernelBlockDim::ACCUMULATE_DELTA;
|
||||
|
||||
float4 linBlockDeltaV = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
float4 angBlockDeltaV = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
|
||||
PxU64 tBlockRigidId = 0x8fffffffffffffff;
|
||||
if (idx < PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA)
|
||||
{
|
||||
linBlockDeltaV = blockDeltaV[idx];
|
||||
tBlockRigidId = blockRigidId[idx];
|
||||
|
||||
sBlockLinDeltaVW[idx] = linBlockDeltaV.w;
|
||||
sBlockRigidId[idx] = tBlockRigidId;
|
||||
}
|
||||
|
||||
__syncthreads(); //sBlockRigidId is written above and read below
|
||||
|
||||
PxReal tLinDeltaVW = linBlockDeltaV.w;
|
||||
//add on block deltaV if blockRigid id match
|
||||
for (PxU32 reductionRadius = 1; reductionRadius < PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA; reductionRadius <<= 1)
|
||||
{
|
||||
const PxU32 lane = threadIndexInWarp - reductionRadius;
|
||||
PxReal w = __shfl_sync(FULL_MASK, tLinDeltaVW, lane);
|
||||
|
||||
if (threadIndexInWarp >= reductionRadius && tBlockRigidId == sBlockRigidId[lane])
|
||||
{
|
||||
tLinDeltaVW += w;
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads(); //sBlockRigidId is read above and written below
|
||||
|
||||
if (idx < PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA)
|
||||
{
|
||||
sBlockLinDeltaVW[idx] = tLinDeltaVW;
|
||||
sBlockRigidId[idx] = blockRigidId[idx];
|
||||
}
|
||||
|
||||
const PxU32 numIterationPerBlock = (totalBlockRequired + (PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA - 1)) / PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
PxgArticulationBlockLinkData* artiLinkData = artiCoreDesc->mArticulationLinkBlocks;
|
||||
|
||||
const PxU32 maxLinks = artiCoreDesc->mMaxLinksPerArticulation;
|
||||
|
||||
for (PxU32 i = 0; i < numIterationPerBlock; ++i)
|
||||
{
|
||||
const PxU32 workIndex = i * PxgParticleSystemKernelBlockDim::ACCUMULATE_DELTA + idx + numIterationPerBlock * blockIdx.x * blockDim.x;
|
||||
|
||||
PxU64 rigidId = 0x8fffffffffffffff;
|
||||
if (workIndex < tNumContacts)
|
||||
{
|
||||
rigidId = sortedRigidIds[workIndex];
|
||||
if (idx > 0)
|
||||
sRigidId[idx - 1] = rigidId;
|
||||
|
||||
if (workIndex == tNumContacts - 1)
|
||||
{
|
||||
sRigidId[idx] = 0x8fffffffffffffff;
|
||||
}
|
||||
else if (threadIdx.x == PxgParticleSystemKernelBlockDim::ACCUMULATE_DELTA - 1)
|
||||
{
|
||||
// first thread in block must load neighbor particle
|
||||
sRigidId[idx] = sortedRigidIds[workIndex + 1];
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (workIndex < tNumContacts)
|
||||
{
|
||||
if (rigidId != sRigidId[idx])
|
||||
{
|
||||
PxReal linVelW = deltaV[workIndex].w;
|
||||
|
||||
PxU64 preBlockRigidId = blockIdx.x > 0 ? sBlockRigidId[blockIdx.x - 1] : 0x8fffffffffffffff;
|
||||
|
||||
if (rigidId == preBlockRigidId)
|
||||
{
|
||||
linVelW += sBlockLinDeltaVW[blockIdx.x - 1];
|
||||
}
|
||||
|
||||
//nodeIndex
|
||||
const PxNodeIndex nodeId = reinterpret_cast<PxNodeIndex&>(rigidId);
|
||||
|
||||
PxU32 solverBodyIndex = 0;
|
||||
|
||||
if (!nodeId.isStaticBody())
|
||||
{
|
||||
PxU32 nodeIndex = nodeId.index();
|
||||
solverBodyIndex = prePrepDesc->solverBodyIndices[nodeIndex];
|
||||
|
||||
PxReal denom = globalRelaxationCoefficient;
|
||||
|
||||
if (useLocalRelax)
|
||||
denom = PxMax(denom, linVelW);
|
||||
|
||||
if (nodeId.isArticulation())
|
||||
{
|
||||
//solverBodyIndex is the globalThreadIndex for the active articulation in the block format
|
||||
const PxU32 blockIndex = solverBodyIndex / WARP_SIZE;
|
||||
|
||||
PxgArticulationBlockLinkData* artiLinks = &artiLinkData[blockIndex * maxLinks];
|
||||
|
||||
const PxU32 artiIndexInBlock = solverBodyIndex % WARP_SIZE;
|
||||
|
||||
const PxU32 linkID = nodeId.articulationLinkId();
|
||||
|
||||
atomicAdd(&artiLinks[linkID].mDeltaScale[artiIndexInBlock], denom);
|
||||
}
|
||||
else
|
||||
{
|
||||
atomicAdd(&tempDenom[solverBodyIndex], denom);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//32 blocks. Each block compute the exclusive ransum for the blockOffset
|
||||
extern "C" __global__ void accumulateDeltaVRigidSecondLaunchMultiStage2(
|
||||
PxU64* sortedRigidIds, //input
|
||||
PxU32* numContacts, //input
|
||||
float4* deltaV, //input
|
||||
float4* blockDeltaV, //input
|
||||
PxU64* blockRigidId, //input
|
||||
PxgPrePrepDesc* prePrepDesc,
|
||||
PxgSolverCoreDesc* solverCoreDesc,
|
||||
PxgArticulationCoreDesc* artiCoreDesc,
|
||||
PxgSolverSharedDesc<IterativeSolveData>* sharedDesc,
|
||||
PxReal* tempDenom,
|
||||
const bool useLocalRelax,
|
||||
const float globalRelaxationCoefficient,
|
||||
bool isTGS
|
||||
)
|
||||
{
|
||||
__shared__ float4 sBlockLinDeltaV[PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA];
|
||||
__shared__ float4 sBlockAngDeltaV[PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA];
|
||||
__shared__ PxU64 sBlockRigidId[PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA];
|
||||
__shared__ PxU64 sRigidId[PxgParticleSystemKernelBlockDim::ACCUMULATE_DELTA + 1];
|
||||
|
||||
const PxU32 tNumContacts = *numContacts;
|
||||
const PxU32 idx = threadIdx.x;
|
||||
|
||||
const PxU32 threadIndexInWarp = idx & (WARP_SIZE - 1);
|
||||
|
||||
const PxU32 totalBlockRequired = (tNumContacts + (PxgParticleSystemKernelBlockDim::ACCUMULATE_DELTA - 1)) / PxgParticleSystemKernelBlockDim::ACCUMULATE_DELTA;
|
||||
|
||||
float4 linBlockDeltaV = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
float4 angBlockDeltaV = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
|
||||
PxU64 tBlockRigidId = 0x8fffffffffffffff;
|
||||
if (idx < PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA)
|
||||
{
|
||||
linBlockDeltaV = blockDeltaV[idx];
|
||||
angBlockDeltaV = blockDeltaV[idx + PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA];
|
||||
tBlockRigidId = blockRigidId[idx];
|
||||
|
||||
sBlockLinDeltaV[idx] = linBlockDeltaV;
|
||||
sBlockAngDeltaV[idx] = angBlockDeltaV;
|
||||
sBlockRigidId[idx] = tBlockRigidId;
|
||||
}
|
||||
|
||||
__syncthreads(); //sBlockRigidId is written above and read below
|
||||
|
||||
float4 tLinDeltaV = linBlockDeltaV;
|
||||
float4 tAngDeltaV = angBlockDeltaV;
|
||||
//add on block deltaV if blockRigid id match
|
||||
for (PxU32 reductionRadius = 1; reductionRadius < PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA; reductionRadius <<= 1)
|
||||
{
|
||||
const PxU32 lane = threadIndexInWarp - reductionRadius;
|
||||
float4 linVal = shuffle(FULL_MASK, tLinDeltaV, lane);
|
||||
float4 angVal = shuffle(FULL_MASK, tAngDeltaV, lane);
|
||||
|
||||
if (threadIndexInWarp >= reductionRadius && tBlockRigidId == sBlockRigidId[lane])
|
||||
{
|
||||
tLinDeltaV += linVal;
|
||||
tAngDeltaV += angVal;
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads(); //sBlockRigidId is read above and written below
|
||||
|
||||
if (idx < PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA)
|
||||
{
|
||||
sBlockLinDeltaV[idx] = tLinDeltaV;
|
||||
sBlockAngDeltaV[idx] = tAngDeltaV;
|
||||
sBlockRigidId[idx] = blockRigidId[idx];
|
||||
}
|
||||
|
||||
const PxU32 numIterationPerBlock = (totalBlockRequired + (PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA - 1)) / PxgParticleSystemKernelGridDim::ACCUMULATE_DELTA;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
float4* solverBodyDeltaVel = sharedDesc->iterativeData.solverBodyVelPool + solverCoreDesc->accumulatedBodyDeltaVOffset;
|
||||
//float4* initialVel = solverCoreDesc->outSolverVelocity;
|
||||
const PxU32 numSolverBodies = solverCoreDesc->numSolverBodies;
|
||||
|
||||
PxgArticulationBlockData* artiData = artiCoreDesc->mArticulationBlocks;
|
||||
PxgArticulationBlockLinkData* artiLinkData = artiCoreDesc->mArticulationLinkBlocks;
|
||||
|
||||
const PxU32 maxLinks = artiCoreDesc->mMaxLinksPerArticulation;
|
||||
|
||||
for (PxU32 i = 0; i < numIterationPerBlock; ++i)
|
||||
{
|
||||
const PxU32 workIndex = i * PxgParticleSystemKernelBlockDim::ACCUMULATE_DELTA + idx + numIterationPerBlock * blockIdx.x * blockDim.x;
|
||||
|
||||
PxU64 rigidId = 0x8fffffffffffffff;
|
||||
if (workIndex < tNumContacts)
|
||||
{
|
||||
rigidId = sortedRigidIds[workIndex];
|
||||
if (idx > 0)
|
||||
sRigidId[idx - 1] = rigidId;
|
||||
|
||||
if (workIndex == tNumContacts - 1)
|
||||
{
|
||||
sRigidId[idx] = 0x8fffffffffffffff;
|
||||
}
|
||||
else if (threadIdx.x == PxgParticleSystemKernelBlockDim::ACCUMULATE_DELTA - 1)
|
||||
{
|
||||
// first thread in block must load neighbor particle
|
||||
sRigidId[idx] = sortedRigidIds[workIndex + 1];
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (workIndex < tNumContacts)
|
||||
{
|
||||
float4 accumLin = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
float4 accumAng = make_float4(0.f, 0.f, 0.f, 0.f);
|
||||
|
||||
if (rigidId != sRigidId[idx])
|
||||
{
|
||||
float4 linVel = deltaV[workIndex];
|
||||
float4 angVel = deltaV[workIndex + tNumContacts];
|
||||
|
||||
PxU64 preBlockRigidId = blockIdx.x > 0 ? sBlockRigidId[blockIdx.x - 1] : 0x8fffffffffffffff;
|
||||
|
||||
if (rigidId == preBlockRigidId)
|
||||
{
|
||||
linVel += sBlockLinDeltaV[blockIdx.x - 1];
|
||||
angVel += sBlockAngDeltaV[blockIdx.x - 1];
|
||||
}
|
||||
|
||||
//nodeIndex
|
||||
const PxNodeIndex nodeId = reinterpret_cast<PxNodeIndex&>(rigidId);
|
||||
|
||||
PxU32 solverBodyIndex = 0;
|
||||
|
||||
if (!nodeId.isStaticBody())
|
||||
{
|
||||
PxU32 nodeIndex = nodeId.index();
|
||||
solverBodyIndex = prePrepDesc->solverBodyIndices[nodeIndex];
|
||||
|
||||
if (nodeId.isArticulation())
|
||||
{
|
||||
//solverBodyIndex is the globalThreadIndex for the active articulation in the block format
|
||||
const PxU32 blockIndex = solverBodyIndex / WARP_SIZE;
|
||||
|
||||
PxgArticulationBlockData& articulation = artiData[blockIndex];
|
||||
PxgArticulationBlockLinkData* artiLinks = &artiLinkData[blockIndex * maxLinks];
|
||||
|
||||
|
||||
const PxU32 artiIndexInBlock = solverBodyIndex % WARP_SIZE;
|
||||
|
||||
articulation.mStateDirty[artiIndexInBlock] = PxgArtiStateDirtyFlag::eHAS_IMPULSES;
|
||||
|
||||
const PxU32 linkID = nodeId.articulationLinkId();
|
||||
|
||||
PxReal denom = artiLinks[linkID].mDeltaScale[artiIndexInBlock];
|
||||
PxReal ratio = 1.f / denom;
|
||||
|
||||
//for articulation, linVel and angVel accumulate impulse
|
||||
Cm::UnAlignedSpatialVector impulse;
|
||||
impulse.top = PxVec3(linVel.x, linVel.y, linVel.z);
|
||||
impulse.bottom = PxVec3(angVel.x, angVel.y, angVel.z);
|
||||
|
||||
impulse.top *= ratio;
|
||||
impulse.bottom *= ratio;
|
||||
|
||||
/*printf("blockIndex %i artiIndexInBlock %i linkID %i ratio %f impulse linear(%f, %f, %f) angular(%f, %f, %f)\n", blockIndex, artiIndexInBlock, linkID, ratio,
|
||||
impulse.top.x, impulse.top.y, impulse.top.z, impulse.bottom.x, impulse.bottom.y, impulse.bottom.z);*/
|
||||
|
||||
atomicAddSpatialVector(artiLinks[linkID].mScratchImpulse, -impulse, artiIndexInBlock);
|
||||
}
|
||||
else
|
||||
{
|
||||
PxReal denom = tempDenom[solverBodyIndex];// globalRelaxationCoefficient;
|
||||
|
||||
PxReal ratio = 1.f / denom;
|
||||
|
||||
if (isTGS)
|
||||
{
|
||||
atomicAdd(&solverBodyDeltaVel[solverBodyIndex].x, linVel.x*ratio);
|
||||
atomicAdd(&solverBodyDeltaVel[solverBodyIndex].y, linVel.y*ratio);
|
||||
atomicAdd(&solverBodyDeltaVel[solverBodyIndex].z, linVel.z*ratio);
|
||||
atomicAdd(&solverBodyDeltaVel[solverBodyIndex].w, angVel.x*ratio);
|
||||
atomicAdd(&solverBodyDeltaVel[solverBodyIndex + numSolverBodies].x, angVel.y*ratio);
|
||||
atomicAdd(&solverBodyDeltaVel[solverBodyIndex + numSolverBodies].y, angVel.z*ratio);
|
||||
//The rest is the delta position buffer
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
atomicAdd(&solverBodyDeltaVel[solverBodyIndex].x, linVel.x*ratio);
|
||||
atomicAdd(&solverBodyDeltaVel[solverBodyIndex].y, linVel.y*ratio);
|
||||
atomicAdd(&solverBodyDeltaVel[solverBodyIndex].z, linVel.z*ratio);
|
||||
atomicAdd(&solverBodyDeltaVel[solverBodyIndex + numSolverBodies].x, angVel.x*ratio);
|
||||
atomicAdd(&solverBodyDeltaVel[solverBodyIndex + numSolverBodies].y, angVel.y*ratio);
|
||||
atomicAdd(&solverBodyDeltaVel[solverBodyIndex + numSolverBodies].z, angVel.z*ratio);
|
||||
|
||||
}
|
||||
}
|
||||
//printf("solverBodyIndex %i\n", solverBodyIndex);
|
||||
//printf("linearVelocity(%f, %f, %f, %f)\n", linearVelocity.x, linearVelocity.y, linearVelocity.z, linearVelocity.w);
|
||||
//printf("angularVelocity(%f, %f, %f, %f)\n", angularVelocity.x, angularVelocity.y, angularVelocity.z, angularVelocity.w);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
2128
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/softBody.cu
vendored
Normal file
2128
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/softBody.cu
vendored
Normal file
File diff suppressed because it is too large
Load Diff
150
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/softBody.cuh
vendored
Normal file
150
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/softBody.cuh
vendored
Normal file
@@ -0,0 +1,150 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __SOFT_BODY_CUH__
|
||||
#define __SOFT_BODY_CUH__
|
||||
|
||||
#include "foundation/PxVecMath.h"
|
||||
#include "atomic.cuh"
|
||||
|
||||
/**
|
||||
TODO, remove. Already has been removed from softbody/softbody, softbody/femcloth and softbody/particle attachments.
|
||||
*/
|
||||
__device__ inline PxReal getSoftBodyInvMass(const PxReal baryMass, const float4& bary)
|
||||
{
|
||||
PxReal scale = PxSqrt(bary.x*bary.x + bary.y*bary.y + bary.z*bary.z + bary.w*bary.w);
|
||||
return baryMass * scale;
|
||||
}
|
||||
|
||||
static __device__ inline PxMat33 calculateDeformationGradient(
|
||||
const PxVec3& u1,
|
||||
const PxVec3& u2,
|
||||
const PxVec3& u3,
|
||||
const PxMat33& Qinv,
|
||||
const PxMat33& RTranspose)
|
||||
{
|
||||
// calculate deformation gradient
|
||||
PxMat33 P = PxMat33(u1, u2, u3);
|
||||
PxMat33 F = P * Qinv;
|
||||
|
||||
// remove rotation factor from strain, tranfrom into element space
|
||||
F = RTranspose * F;
|
||||
|
||||
return F;
|
||||
}
|
||||
|
||||
static __device__ float4 computeTetraContact(const float4* const vels, const uint4& tetrahedronId,
|
||||
const float4& barycentric, float4& invMass)
|
||||
{
|
||||
const float4 v0 = vels[tetrahedronId.x];
|
||||
const float4 v1 = vels[tetrahedronId.y];
|
||||
const float4 v2 = vels[tetrahedronId.z];
|
||||
const float4 v3 = vels[tetrahedronId.w];
|
||||
|
||||
invMass = make_float4(v0.w, v1.w, v2.w, v3.w);
|
||||
|
||||
const float4 vel = v0 * barycentric.x + v1 * barycentric.y
|
||||
+ v2 * barycentric.z + v3 * barycentric.w;
|
||||
|
||||
return vel;
|
||||
}
|
||||
|
||||
static __device__ void updateTetraPosDelta(const float4& invMasses, const float4& barycentric, const uint4& tetrahedronId,
|
||||
const PxVec3& deltaPos, float4* outputDeltaPoses, const PxReal addition = 1.f)
|
||||
{
|
||||
if (invMasses.x > 0.f && PxAbs(barycentric.x) > 1e-6f)
|
||||
{
|
||||
const PxVec3 dP = deltaPos * (invMasses.x * barycentric.x);
|
||||
AtomicAdd(outputDeltaPoses[tetrahedronId.x], dP, addition);
|
||||
}
|
||||
|
||||
if (invMasses.y > 0.f && PxAbs(barycentric.y) > 1e-6f)
|
||||
{
|
||||
const PxVec3 dP = deltaPos * (invMasses.y * barycentric.y);
|
||||
AtomicAdd(outputDeltaPoses[tetrahedronId.y], dP, addition);
|
||||
}
|
||||
|
||||
if (invMasses.z > 0.f && PxAbs(barycentric.z) > 1e-6f)
|
||||
{
|
||||
const PxVec3 dP = deltaPos * (invMasses.z * barycentric.z);
|
||||
AtomicAdd(outputDeltaPoses[tetrahedronId.z], dP, addition);
|
||||
}
|
||||
|
||||
if (invMasses.w > 0.f && PxAbs(barycentric.w) > 1e-6f)
|
||||
{
|
||||
const PxVec3 dP = deltaPos * (invMasses.w * barycentric.w);
|
||||
AtomicAdd(outputDeltaPoses[tetrahedronId.w], dP, addition);
|
||||
}
|
||||
}
|
||||
|
||||
static __device__ void updateTetPositionDelta(float4* outputDeltaPositions, const uint4& tetVertIndices,
|
||||
const PxVec3& deltaPosition, const float4& invMassBary, const PxReal constraintWeight)
|
||||
{
|
||||
//testing inverse mass and barycentric product for > 0, assuming that barycentric coordinates where clamped on construction.
|
||||
if (invMassBary.x > 0.0f)
|
||||
{
|
||||
AtomicAdd(outputDeltaPositions[tetVertIndices.x], deltaPosition*invMassBary.x, constraintWeight);
|
||||
}
|
||||
|
||||
if (invMassBary.y > 0.0f)
|
||||
{
|
||||
AtomicAdd(outputDeltaPositions[tetVertIndices.y], deltaPosition*invMassBary.y, constraintWeight);
|
||||
}
|
||||
|
||||
if (invMassBary.z > 0.0f)
|
||||
{
|
||||
AtomicAdd(outputDeltaPositions[tetVertIndices.z], deltaPosition*invMassBary.z, constraintWeight);
|
||||
}
|
||||
|
||||
if (invMassBary.w > 0.0f)
|
||||
{
|
||||
AtomicAdd(outputDeltaPositions[tetVertIndices.w], deltaPosition*invMassBary.w, constraintWeight);
|
||||
}
|
||||
}
|
||||
|
||||
static __device__ void updateTriPositionDelta(float4* outputDeltaPositions, const uint4& triVertIndices,
|
||||
const PxVec3& deltaPosition, const float4& invMassBary, const PxReal constraintWeight)
|
||||
{
|
||||
//testing inverse mass and barycentric product for > 0, assuming that barycentric coordinates where clamped on construction.
|
||||
if (invMassBary.x > 0.0f)
|
||||
{
|
||||
AtomicAdd(outputDeltaPositions[triVertIndices.x], deltaPosition*invMassBary.x, constraintWeight);
|
||||
}
|
||||
|
||||
if (invMassBary.y > 0.0f)
|
||||
{
|
||||
AtomicAdd(outputDeltaPositions[triVertIndices.y], deltaPosition*invMassBary.y, constraintWeight);
|
||||
}
|
||||
|
||||
if (invMassBary.z > 0.0f)
|
||||
{
|
||||
AtomicAdd(outputDeltaPositions[triVertIndices.z], deltaPosition*invMassBary.z, constraintWeight);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
3272
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/softBodyGM.cu
vendored
Normal file
3272
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/softBodyGM.cu
vendored
Normal file
File diff suppressed because it is too large
Load Diff
373
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/sparseGridStandalone.cu
vendored
Normal file
373
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/sparseGridStandalone.cu
vendored
Normal file
@@ -0,0 +1,373 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "vector_types.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "foundation/PxVec4.h"
|
||||
#include "stdio.h"
|
||||
#include "assert.h"
|
||||
#include "cuda.h"
|
||||
|
||||
#include "sparseGridStandalone.cuh"
|
||||
|
||||
#define ENABLE_KERNEL_LAUNCH_ERROR_CHECK 0
|
||||
|
||||
#define NEW_SUBGRID 0xfffffffe
|
||||
#define REUSED_SUBGRID 0xfffffffd
|
||||
|
||||
extern "C" __host__ void initSparseGridStandaloneKernels0() {}
|
||||
|
||||
extern "C" __global__ void sg_SparseGridCalcSubgridHashes(
|
||||
PxSparseGridParams sparseGridParams,
|
||||
PxU32* PX_RESTRICT indices,
|
||||
PxU32* PX_RESTRICT hashkeyPerParticle,
|
||||
const PxVec4* const PX_RESTRICT positions,
|
||||
const PxU32 numParticles,
|
||||
const PxU32* PX_RESTRICT phases,
|
||||
const PxU32 validPhaseMask,
|
||||
const PxU32* PX_RESTRICT activeIndices)
|
||||
{
|
||||
PxU32 p = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if (p >= numParticles)
|
||||
return;
|
||||
|
||||
if (activeIndices)
|
||||
p = activeIndices[p];
|
||||
|
||||
const PxVec3 subgridDomainSize = getSubgridDomainSize(sparseGridParams, 0/*sparseGridParams.haloSize*/);
|
||||
|
||||
const PxVec3 pos = positions[p].getXYZ();
|
||||
const int3 subgridId = calcSubgridId(pos, subgridDomainSize);
|
||||
|
||||
bool isValidPhase = phases == NULL || (phases[p] & validPhaseMask);
|
||||
|
||||
indices[p] = p;
|
||||
hashkeyPerParticle[p] = isValidPhase ? calcSubgridHash(subgridId) : EMPTY_SUBGRID;
|
||||
}
|
||||
|
||||
__device__ void applyMask(PxU32* mask, const PxU32* PX_RESTRICT uniqueSortedHashkey, PxU32 hashkey, PxU32 maxNumSubgrids)
|
||||
{
|
||||
if (hashkey == EMPTY_SUBGRID)
|
||||
return;
|
||||
|
||||
PxU32 sortedIdx = 0;
|
||||
const bool hashFound = tryFindHashkey(uniqueSortedHashkey, 27 * maxNumSubgrids, hashkey, sortedIdx);
|
||||
if (hashFound)
|
||||
{
|
||||
if (mask[sortedIdx] == 1)
|
||||
return; //Was already marked by another thread
|
||||
|
||||
mask[sortedIdx] = 1;
|
||||
|
||||
int i = sortedIdx - 1;
|
||||
while (i >= 0 && uniqueSortedHashkey[i] == hashkey)
|
||||
mask[i--] = 1;
|
||||
|
||||
i = sortedIdx + 1;
|
||||
while (i < 27 * maxNumSubgrids && uniqueSortedHashkey[i] == hashkey)
|
||||
mask[i++] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void sg_SparseGridMarkRequiredNeighbors(
|
||||
PxU32* requiredNeighborMask,
|
||||
const PxU32* PX_RESTRICT uniqueSortedHashkey,
|
||||
const PxSparseGridParams sparseGridParams,
|
||||
PxU32 neighborhoodSize,
|
||||
const PxVec4* particlePositions,
|
||||
const PxU32 numParticles,
|
||||
const PxU32* PX_RESTRICT phases,
|
||||
const PxU32 validPhaseMask,
|
||||
const PxU32* PX_RESTRICT activeIndices)
|
||||
{
|
||||
PxU32 i = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if (i >= numParticles)
|
||||
return;
|
||||
|
||||
if (activeIndices)
|
||||
i = activeIndices[i];
|
||||
|
||||
if (phases && !(phases[i] & validPhaseMask))
|
||||
return; //Avoid to allocate sparse grids in regions of non-fluid particles
|
||||
|
||||
const PxVec3 xp = particlePositions[i].getXYZ();
|
||||
|
||||
const PxU32 haloSize = 0; // sparseGridParams.haloSize;
|
||||
const PxVec3 subgridDomainSize = getSubgridDomainSize(sparseGridParams, haloSize);
|
||||
const int3 subgridId = calcSubgridId(xp, subgridDomainSize); //subgridIdsPerParticle[i]; // flipSubgridHashToId(hashkey);
|
||||
const PxReal dx = sparseGridParams.gridSpacing;
|
||||
const PxReal invDx = 1.0f / dx;
|
||||
|
||||
const PxVec3 subgridOrigin = PxVec3(
|
||||
subgridId.x * dx * (sparseGridParams.subgridSizeX - 2 * haloSize),
|
||||
subgridId.y * dx * (sparseGridParams.subgridSizeY - 2 * haloSize),
|
||||
subgridId.z * dx * (sparseGridParams.subgridSizeZ - 2 * haloSize));
|
||||
const PxVec3 localXp = xp - subgridOrigin;
|
||||
|
||||
int3 gridBaseCoord;
|
||||
gridBaseCoord.x = PxClamp(int(floor(localXp.x * invDx)), 0, int(int(sparseGridParams.subgridSizeX) - 2 * haloSize - 1));
|
||||
gridBaseCoord.y = PxClamp(int(floor(localXp.y * invDx)), 0, int(int(sparseGridParams.subgridSizeY) - 2 * haloSize - 1));
|
||||
gridBaseCoord.z = PxClamp(int(floor(localXp.z * invDx)), 0, int(int(sparseGridParams.subgridSizeZ) - 2 * haloSize - 1));
|
||||
|
||||
//Find the neighboring subgrids (step has values -1/0/1 for x/y/z) that need to exist
|
||||
int3 step;
|
||||
step.x = gridBaseCoord.x < neighborhoodSize ? -1 : (gridBaseCoord.x >= sparseGridParams.subgridSizeX - 2 * haloSize - neighborhoodSize ? 1 : 0);
|
||||
step.y = gridBaseCoord.y < neighborhoodSize ? -1 : (gridBaseCoord.y >= sparseGridParams.subgridSizeY - 2 * haloSize - neighborhoodSize ? 1 : 0);
|
||||
step.z = gridBaseCoord.z < neighborhoodSize ? -1 : (gridBaseCoord.z >= sparseGridParams.subgridSizeZ - 2 * haloSize - neighborhoodSize ? 1 : 0);
|
||||
|
||||
//Mark the neighbor subgrids that need to exist such that particles with a radius >0 near the subgrid boundary can transfer their density to the grid
|
||||
PxU32 buffer[8];
|
||||
int indexer = 0;
|
||||
|
||||
buffer[indexer++] = calcSubgridHash(subgridId);
|
||||
|
||||
if (step.x != 0 && step.y != 0 && step.z != 0) buffer[indexer++] = subgridHashOffset(subgridId, step.x, step.y, step.z);
|
||||
|
||||
if (step.x != 0 && step.y != 0) buffer[indexer++] = subgridHashOffset(subgridId, step.x, step.y, 0);
|
||||
if (step.x != 0 && step.z != 0) buffer[indexer++] = subgridHashOffset(subgridId, step.x, 0, step.z);
|
||||
if (step.y != 0 && step.z != 0) buffer[indexer++] = subgridHashOffset(subgridId, 0, step.y, step.z);
|
||||
|
||||
if (step.x != 0) buffer[indexer++] = subgridHashOffset(subgridId, step.x, 0, 0);
|
||||
if (step.y != 0) buffer[indexer++] = subgridHashOffset(subgridId, 0, step.y, 0);
|
||||
if (step.z != 0) buffer[indexer++] = subgridHashOffset(subgridId, 0, 0, step.z);
|
||||
|
||||
|
||||
for (int j = 0; j < indexer; ++j)
|
||||
applyMask(requiredNeighborMask, uniqueSortedHashkey, buffer[j], sparseGridParams.maxNumSubgrids);
|
||||
}
|
||||
|
||||
extern "C" __global__ void sg_SparseGridSortedArrayToDelta(
|
||||
const PxU32* in,
|
||||
const PxU32* mask,
|
||||
PxU32* out,
|
||||
PxU32 n)
|
||||
{
|
||||
const PxU32 i = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if (i < n)
|
||||
{
|
||||
if (i < n - 1 && in[i] != in[i + 1])
|
||||
out[i] = mask ? mask[i] : 1;
|
||||
else
|
||||
out[i] = 0;
|
||||
if (i == n - 1)
|
||||
out[i] = mask ? mask[i] : 1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void sg_SparseGridGetUniqueValues(
|
||||
const PxU32* sortedData,
|
||||
const PxU32* indices,
|
||||
PxU32* uniqueValues,
|
||||
const PxU32 n,
|
||||
PxU32* subgridNeighborCollector,
|
||||
const PxU32 uniqueValuesSize)
|
||||
{
|
||||
const PxU32 i = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if (i < n)
|
||||
{
|
||||
if (i == n - 1 || indices[i] != indices[i + 1])
|
||||
{
|
||||
if (indices[i] < uniqueValuesSize)
|
||||
{
|
||||
uniqueValues[indices[i]] = sortedData[i];
|
||||
|
||||
if (subgridNeighborCollector)
|
||||
{
|
||||
int4 id = subgridHashToId(sortedData[i]);
|
||||
int indexer = 27 * indices[i];
|
||||
for (int i = -1; i <= 1; ++i) for (int j = -1; j <= 1; ++j) for (int k = -1; k <= 1; ++k)
|
||||
subgridNeighborCollector[indexer++] = calcSubgridHash(make_int3(id.x + i, id.y + j, id.z + k));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void sg_SparseGridClearDensity(
|
||||
PxReal* PX_RESTRICT density,
|
||||
const PxReal clearValue,
|
||||
const PxU32* numActiveSubgrids,
|
||||
const PxU32 subgridSize
|
||||
)
|
||||
{
|
||||
const PxU32 idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx >= (numActiveSubgrids[0]) * subgridSize)
|
||||
return;
|
||||
|
||||
density[idx] = clearValue;
|
||||
}
|
||||
|
||||
extern "C" __global__ void sg_SparseGridBuildSubgridNeighbors(
|
||||
const PxU32* PX_RESTRICT uniqueSortedHashkey,
|
||||
const PxU32* PX_RESTRICT numActiveSubgrids,
|
||||
const PxU32 maxNumSubgrids,
|
||||
PxU32* PX_RESTRICT subgridNeighbors
|
||||
)
|
||||
{
|
||||
const PxU32 si = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (si >= maxNumSubgrids)
|
||||
return;
|
||||
|
||||
const PxU32 hash = uniqueSortedHashkey[si];
|
||||
|
||||
int4 sID = subgridHashToId(hash);
|
||||
|
||||
subgridNeighbors[27 * si + SUBGRID_CENTER_IDX] = si;
|
||||
|
||||
for (int z = -1; z <= 1; ++z) for (int y = -1; y <= 1; ++y) for (int x = -1; x <= 1; ++x)
|
||||
{
|
||||
const int3 nID = make_int3(sID.x + x, sID.y + y, sID.z + z);
|
||||
const PxU32 nHash = calcSubgridHash(nID);
|
||||
|
||||
PxU32 n = EMPTY_SUBGRID;
|
||||
if (isSubgridInsideRange(nID))
|
||||
{
|
||||
PxU32 nSortedIdx = 0;
|
||||
if (tryFindHashkey(uniqueSortedHashkey, numActiveSubgrids[0]/* + 1*/, nHash, nSortedIdx))
|
||||
n = nSortedIdx;
|
||||
}
|
||||
subgridNeighbors[27 * si + subgridNeighborIndex(x, y, z)] = n;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void sg_MarkSubgridEndIndices(const PxU32* sortedParticleToSubgrid, PxU32 numParticles, PxU32* subgridEndIndices)
|
||||
{
|
||||
PxI32 threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (threadIndex >= numParticles)
|
||||
return;
|
||||
|
||||
if (threadIndex < numParticles - 1)
|
||||
{
|
||||
if (sortedParticleToSubgrid[threadIndex] != sortedParticleToSubgrid[threadIndex + 1])
|
||||
subgridEndIndices[sortedParticleToSubgrid[threadIndex]] = threadIndex + 1;
|
||||
}
|
||||
else
|
||||
subgridEndIndices[sortedParticleToSubgrid[threadIndex]] = numParticles;
|
||||
}
|
||||
|
||||
extern "C" __global__ void sg_ReuseSubgrids(
|
||||
const PxSparseGridParams sparseGridParams,
|
||||
const PxU32* uniqueHashkeysPerSubgridPreviousUpdate,
|
||||
const PxU32* numActiveSubgridsPreviousUpdate,
|
||||
PxU32* subgridOrderMapPreviousUpdate,
|
||||
|
||||
const PxU32* uniqueHashkeysPerSubgrid,
|
||||
const PxU32* numActiveSubgrids,
|
||||
PxU32* subgridOrderMap)
|
||||
{
|
||||
PxI32 threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (threadIndex >= sparseGridParams.maxNumSubgrids)
|
||||
return;
|
||||
|
||||
|
||||
if (threadIndex >= numActiveSubgrids[0])
|
||||
{
|
||||
subgridOrderMap[threadIndex] = EMPTY_SUBGRID;
|
||||
return;
|
||||
}
|
||||
|
||||
const PxU32 hashkey = uniqueHashkeysPerSubgrid[threadIndex];
|
||||
PxU32 sortedIdx = 0;
|
||||
const bool hashFound = tryFindHashkey(uniqueHashkeysPerSubgridPreviousUpdate, numActiveSubgridsPreviousUpdate[0], hashkey, sortedIdx);
|
||||
if (!hashFound)
|
||||
{
|
||||
subgridOrderMap[threadIndex] = NEW_SUBGRID;
|
||||
return;
|
||||
}
|
||||
|
||||
subgridOrderMap[threadIndex] = subgridOrderMapPreviousUpdate[sortedIdx];
|
||||
subgridOrderMapPreviousUpdate[sortedIdx] = REUSED_SUBGRID;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ void addIdToUnusedSubgridStack(PxU32 idToAddToStack, PxU32* unusedSubgridStackSize, PxU32* unusedSubgridStack)
|
||||
{
|
||||
const PxU32 id = atomicAdd(unusedSubgridStackSize, 1);
|
||||
unusedSubgridStack[id] = idToAddToStack;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxU32 getSubgridIdFromUnusedStack(PxU32* unusedSubgridStackSize, PxU32* unusedSubgridStack)
|
||||
{
|
||||
const PxU32 id = PxU32(atomicAdd(reinterpret_cast<PxI32*>(unusedSubgridStackSize), -1));
|
||||
return unusedSubgridStack[id - 1];
|
||||
}
|
||||
|
||||
//TODO: This method uses atomics. For better debuging, it might be worth to offer a slower variant that generates 100% reproducible results
|
||||
extern "C" __global__ void sg_AddReleasedSubgridsToUnusedStack(
|
||||
const PxU32* numActiveSubgridsPreviousUpdate,
|
||||
const PxU32* subgridOrderMapPreviousUpdate,
|
||||
|
||||
PxU32* unusedSubgridStackSize,
|
||||
PxU32* unusedSubgridStack)
|
||||
{
|
||||
PxI32 threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (threadIndex >= numActiveSubgridsPreviousUpdate[0])
|
||||
return;
|
||||
|
||||
if (subgridOrderMapPreviousUpdate[threadIndex] != REUSED_SUBGRID)
|
||||
addIdToUnusedSubgridStack(subgridOrderMapPreviousUpdate[threadIndex], unusedSubgridStackSize, unusedSubgridStack);
|
||||
}
|
||||
|
||||
//TODO: This method uses atomics. For better debuging, it might be worth to offer a slower variant that generates 100% reproducible results
|
||||
extern "C" __global__ void sg_AllocateNewSubgrids(
|
||||
const PxU32* numActiveSubgrids,
|
||||
PxU32* subgridOrderMap,
|
||||
|
||||
PxU32* unusedSubgridStackSize,
|
||||
PxU32* unusedSubgridStack,
|
||||
|
||||
const PxU32* numActiveSubgridsPreviousUpdate,
|
||||
const PxU32 maxNumSubgrids)
|
||||
{
|
||||
PxI32 threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (threadIndex >= numActiveSubgrids[0])
|
||||
return;
|
||||
|
||||
|
||||
if (numActiveSubgridsPreviousUpdate[0] == 0)
|
||||
{
|
||||
PxU32 numActiveSubgridsClamped = PxMin(maxNumSubgrids, numActiveSubgrids[0]);
|
||||
|
||||
//Special case to simplify debugging: If no subgrids were active in the previous frame, then all subgrids present now must be new
|
||||
//Make sure that the subgrid indices in the first frame are always identical. But the order might change in subsequent frames due to the use of atomics
|
||||
//subgridOrderMap[threadIndex] = unusedSubgridStack[maxNumSubgrids - threadIndex - 1];
|
||||
subgridOrderMap[threadIndex] = unusedSubgridStack[maxNumSubgrids - numActiveSubgridsClamped + threadIndex]; //Use this line to test with non-default subgrid order to ensure that the code does not only work with the default order
|
||||
if (threadIndex == 0)
|
||||
unusedSubgridStackSize[0] -= numActiveSubgridsClamped;
|
||||
//If launched with 1024 threads per block, one could do per block scan and support 100% reproducible subgrid allocations using a block scan if maxNumSubgrids<=1024
|
||||
}
|
||||
else
|
||||
{
|
||||
if (subgridOrderMap[threadIndex] == NEW_SUBGRID)
|
||||
{
|
||||
subgridOrderMap[threadIndex] = getSubgridIdFromUnusedStack(unusedSubgridStackSize, unusedSubgridStack);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
318
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/sparseGridStandalone.cuh
vendored
Normal file
318
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/sparseGridStandalone.cuh
vendored
Normal file
@@ -0,0 +1,318 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "vector_types.h"
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "PxSparseGridParams.h"
|
||||
#include "PxgSparseGridDataStandalone.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#define MAX_SPARSEGRID_DIM 1024
|
||||
#define MIN_SPARSEGRID_ID -512
|
||||
#define MAX_SPARSEGRID_ID 511
|
||||
#define EMPTY_SUBGRID 0xffffffff
|
||||
#define NEW_SUBGRID 0xfffffffe
|
||||
#define REUSED_SUBGRID 0xfffffffd
|
||||
#define OUT_OF_BOUNDS -1
|
||||
#define SUBGRID_CENTER_IDX 13
|
||||
|
||||
using namespace physx;
|
||||
|
||||
PX_FORCE_INLINE __device__ __host__ int clampValue(int f, int a, int b)
|
||||
{
|
||||
return max(a, min(f, b));
|
||||
}
|
||||
|
||||
__device__ inline PxVec3 getSubgridDomainSize(const PxSparseGridParams& params, const PxU32 haloSize)
|
||||
{
|
||||
const PxReal dx = params.gridSpacing;
|
||||
return PxVec3(dx * (params.subgridSizeX - 2 * haloSize), dx * (params.subgridSizeY - 2 * haloSize), dx * (params.subgridSizeZ - 2 * haloSize));
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __host__ __device__ int3 calcSubgridId(const PxVec3 pos, const PxVec3 domainSize)
|
||||
{
|
||||
return make_int3((int)PxFloor(pos.x / domainSize.x), (int)PxFloor(pos.y / domainSize.y), (int)PxFloor(pos.z / domainSize.z));
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __host__ __device__ PxU32 calcSubgridHash(const int3 subgridId)
|
||||
{
|
||||
const int3 shifted = make_int3(subgridId.x - int(MIN_SPARSEGRID_ID), subgridId.y - int(MIN_SPARSEGRID_ID), subgridId.z - int(MIN_SPARSEGRID_ID));
|
||||
return MAX_SPARSEGRID_DIM * MAX_SPARSEGRID_DIM * shifted.z + MAX_SPARSEGRID_DIM * shifted.y + shifted.x;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxU32 subgridHashOffset(int3 subgridId, int offsetX, int offsetY, int offsetZ)
|
||||
{
|
||||
subgridId.x += offsetX;
|
||||
subgridId.y += offsetY;
|
||||
subgridId.z += offsetZ;
|
||||
return calcSubgridHash(subgridId);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __host__ __device__ PxI32 subgridNeighborIndex(const PxI32 x, const PxI32 y, const PxI32 z)
|
||||
{
|
||||
return ((x + 1) + 3 * (y + 1) + 9 * (z + 1));
|
||||
}
|
||||
|
||||
template<class T>
|
||||
static __device__ PxU32 searchSorted(const T* PX_RESTRICT data, const PxU32 numElements, const T& value)
|
||||
{
|
||||
PxU32 left = 0;
|
||||
PxU32 right = numElements;
|
||||
|
||||
while ((right - left) > 1)
|
||||
{
|
||||
PxU32 pos = (left + right) >> 1;
|
||||
const T& element = data[pos];
|
||||
if (element <= value)
|
||||
left = pos;
|
||||
else
|
||||
right = pos;
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ bool tryFindHashkey(const PxU32* const PX_RESTRICT sortedHashkey, const PxU32 numSubgrids, const PxU32 hashToFind, PxU32& result)
|
||||
{
|
||||
result = searchSorted(sortedHashkey, numSubgrids, hashToFind);
|
||||
return sortedHashkey[result] == hashToFind;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __host__ __device__ bool isSubgridInsideRange(const int3 val)
|
||||
{
|
||||
return
|
||||
val.x >= MIN_SPARSEGRID_ID && val.x <= MAX_SPARSEGRID_ID &&
|
||||
val.y >= MIN_SPARSEGRID_ID && val.y <= MAX_SPARSEGRID_ID &&
|
||||
val.z >= MIN_SPARSEGRID_ID && val.z <= MAX_SPARSEGRID_ID;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __host__ __device__ int4 subgridHashToId(const PxU32 hashKey)
|
||||
{
|
||||
const int ihashKey = static_cast<int>(hashKey);
|
||||
return make_int4(
|
||||
ihashKey % MAX_SPARSEGRID_DIM + MIN_SPARSEGRID_ID,
|
||||
(ihashKey / MAX_SPARSEGRID_DIM) % MAX_SPARSEGRID_DIM + MIN_SPARSEGRID_ID,
|
||||
ihashKey / MAX_SPARSEGRID_DIM / MAX_SPARSEGRID_DIM + MIN_SPARSEGRID_ID, 0);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __host__ __device__ PxI32 subgridNeighborOffset(const PxU32* const PX_RESTRICT subgridNeighbors, PxI32 si, PxI32 offsetX, PxI32 offsetY, PxI32 offsetZ)
|
||||
{
|
||||
return subgridNeighbors[27 * (si)+subgridNeighborIndex((offsetX), (offsetY), (offsetZ))];
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxI32 mod(PxI32 a, PxI32 b)
|
||||
{
|
||||
return (a + b) % b;
|
||||
//return (a + b) & (b - 1); //Assumes b is a power of two
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __host__ __device__ PxU32 sparseGridAccess(const PxSparseGridParams& sparseGridParams, PxI32 i, PxI32 j, PxI32 k, PxI32 si, const PxU32* subgridOrdering)
|
||||
{
|
||||
if (subgridOrdering)
|
||||
si = subgridOrdering[si];
|
||||
return i + sparseGridParams.subgridSizeX * (j + sparseGridParams.subgridSizeY * (k + sparseGridParams.subgridSizeZ * si));
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxU32 getIndex(const PxU32* const PX_RESTRICT subgridNeighbors, const PxSparseGridParams& sparseGridParams, PxI32 coordX, PxI32 coordY, PxI32 coordZ, PxU32 si, const PxU32* subgridOrdering)
|
||||
{
|
||||
int haloSize = 0; // sparseGridParams.haloSize;
|
||||
coordX += haloSize;
|
||||
coordY += haloSize;
|
||||
coordZ += haloSize;
|
||||
const PxI32 stepX = coordX < 0 ? -1 : (coordX >= sparseGridParams.subgridSizeX ? 1 : 0);
|
||||
const PxI32 stepY = coordY < 0 ? -1 : (coordY >= sparseGridParams.subgridSizeY ? 1 : 0);
|
||||
const PxI32 stepZ = coordZ < 0 ? -1 : (coordZ >= sparseGridParams.subgridSizeZ ? 1 : 0);
|
||||
|
||||
//if (stepX != 0 || stepY != 0 || stepZ != 0)
|
||||
// printf("neighbor access\n");
|
||||
|
||||
PxU32 n = subgridNeighborOffset(subgridNeighbors, si, stepX, stepY, stepZ);
|
||||
if (n == EMPTY_SUBGRID)
|
||||
return EMPTY_SUBGRID;
|
||||
|
||||
return sparseGridAccess(sparseGridParams, mod(coordX, sparseGridParams.subgridSizeX), mod(coordY, sparseGridParams.subgridSizeY), mod(coordZ, sparseGridParams.subgridSizeZ), n, subgridOrdering);
|
||||
}
|
||||
|
||||
//Assumes that 0.0 is a valid value for access outside of the grid
|
||||
PX_FORCE_INLINE __device__ PxReal getGridValue(const PxU32* const PX_RESTRICT subgridNeighbors, const PxReal* data, const PxSparseGridParams& sparseGridParams, PxI32 coordX, PxI32 coordY, PxI32 coordZ, PxU32 si, const PxU32* subgridOrdering)
|
||||
{
|
||||
const PxU32 id = getIndex(subgridNeighbors, sparseGridParams, coordX, coordY, coordZ, si, subgridOrdering);
|
||||
if (id == EMPTY_SUBGRID)
|
||||
return 0.0f;
|
||||
return data[id];
|
||||
}
|
||||
|
||||
//This will transform p to cell local coordinates
|
||||
PX_FORCE_INLINE __device__ int4 getCellIndexFromPosition(PxVec3& p, const PxSparseGridParams& sparseGridParams, const PxU32* uniqueHashkeyPerSubgrid, const PxU32* numSubgridsInUse)
|
||||
{
|
||||
int haloSize = 0; // sparseGridParams.haloSize;
|
||||
const PxVec3 subgridDomainSize = getSubgridDomainSize(sparseGridParams, haloSize);
|
||||
int3 subgridId = calcSubgridId(p, subgridDomainSize);
|
||||
PxU32 subgridHash = calcSubgridHash(subgridId);
|
||||
|
||||
PxU32 sortedIdx = 0;
|
||||
const bool hashFound = tryFindHashkey(uniqueHashkeyPerSubgrid, numSubgridsInUse[0], subgridHash, sortedIdx);
|
||||
if (!hashFound)
|
||||
{
|
||||
//printf("Hash not found %i\n", subgridHash);
|
||||
return make_int4(-1, -1, -1, OUT_OF_BOUNDS);
|
||||
}
|
||||
|
||||
const PxReal dx = sparseGridParams.gridSpacing;
|
||||
const PxReal invDx = 1.0f / dx;
|
||||
|
||||
const PxVec3 subgridOrigin = PxVec3(
|
||||
subgridId.x * dx * (sparseGridParams.subgridSizeX - 2 * haloSize),
|
||||
subgridId.y * dx * (sparseGridParams.subgridSizeY - 2 * haloSize),
|
||||
subgridId.z * dx * (sparseGridParams.subgridSizeZ - 2 * haloSize));
|
||||
p = p - subgridOrigin;
|
||||
|
||||
int4 result = make_int4(PxI32(PxFloor(p.x * invDx)), PxI32(PxFloor(p.y * invDx)), PxI32(PxFloor(p.z * invDx)), sortedIdx);
|
||||
result.x = PxClamp(result.x, 0, sparseGridParams.subgridSizeX - 2 * haloSize-1);
|
||||
result.y = PxClamp(result.y, 0, sparseGridParams.subgridSizeY - 2 * haloSize-1);
|
||||
result.z = PxClamp(result.z, 0, sparseGridParams.subgridSizeZ - 2 * haloSize-1);
|
||||
return result;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxVec3 getLocationFromHashkey(const PxU32 hash, const PxSparseGridParams& sparseGridParams, const int4& index)
|
||||
{
|
||||
int haloSize = 0; // sparseGridParams.haloSize;
|
||||
const int4 subgridId = subgridHashToId(hash);
|
||||
const PxVec3 subgridOrigin = PxVec3(
|
||||
subgridId.x * sparseGridParams.gridSpacing * (sparseGridParams.subgridSizeX - 2 * haloSize),
|
||||
subgridId.y * sparseGridParams.gridSpacing * (sparseGridParams.subgridSizeY - 2 * haloSize),
|
||||
subgridId.z * sparseGridParams.gridSpacing * (sparseGridParams.subgridSizeZ - 2 * haloSize));
|
||||
return subgridOrigin + PxVec3(index.x, index.y, index.z) * sparseGridParams.gridSpacing;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ int4 getGridCoordinates(const PxSparseGridParams& sparseGridParams, int threadIndex)
|
||||
{
|
||||
const PxU32 numSubgridCells = sparseGridParams.subgridSizeX * sparseGridParams.subgridSizeY * sparseGridParams.subgridSizeZ;
|
||||
const PxU32 si = threadIndex / numSubgridCells;
|
||||
|
||||
PxI32 localThreadIndex = threadIndex - si * numSubgridCells;
|
||||
const PxI32 xi = localThreadIndex % sparseGridParams.subgridSizeX;
|
||||
const PxI32 yi = (localThreadIndex / sparseGridParams.subgridSizeX) % sparseGridParams.subgridSizeY;
|
||||
const PxI32 zi = localThreadIndex / (sparseGridParams.subgridSizeX * sparseGridParams.subgridSizeY);
|
||||
|
||||
//Following code assumes that subgridSizeX and subgridSizeY are a power of two
|
||||
/*const PxI32 xi = localThreadIndex & (sparseGridParams.subgridSizeX - 1);
|
||||
const PxI32 yi = (localThreadIndex / sparseGridParams.subgridSizeX) & (sparseGridParams.subgridSizeY - 1);
|
||||
const PxI32 zi = localThreadIndex / (sparseGridParams.subgridSizeX * sparseGridParams.subgridSizeY);*/
|
||||
|
||||
/*if(sparseGridParams.haloSize>0 &&
|
||||
(xi < sparseGridParams.haloSize || yi < sparseGridParams.haloSize || zi < sparseGridParams.haloSize ||
|
||||
xi >= sparseGridParams.subgridSizeX - sparseGridParams.haloSize ||
|
||||
yi >= sparseGridParams.subgridSizeY - sparseGridParams.haloSize ||
|
||||
zi >= sparseGridParams.subgridSizeZ - sparseGridParams.haloSize))
|
||||
return make_int4(-1, -1, -1, OUT_OF_BOUNDS);*/
|
||||
|
||||
const PxU32 haloSize = 0; // sparseGridParams.haloSize;
|
||||
return make_int4(xi - haloSize, yi - haloSize, zi - haloSize, si);
|
||||
}
|
||||
|
||||
//Functions for the PxSparseGridData class - make sure they have the same name and aruments as their counterparts of the dense grid to simplify templating
|
||||
PX_FORCE_INLINE __device__ int4 getGridCoordinates(const PxSparseGridData& data, int threadIndex)
|
||||
{
|
||||
return getGridCoordinates(data.mGridParams, threadIndex);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxU32 getCellIndex(PxSparseGridData& data, int4 index, bool applySubgridOrder = true)
|
||||
{
|
||||
int haloSize = 0; // data.mGridParams.haloSize;
|
||||
index.x += haloSize;
|
||||
index.y += haloSize;
|
||||
index.z += haloSize;
|
||||
return sparseGridAccess(data.mGridParams, index.x, index.y, index.z, index.w, applySubgridOrder ? data.mSubgridOrderMap : NULL);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxU32 getCellIndex(PxSparseGridData& data, const int4& index, PxI32 offsetX, PxI32 offsetY, PxI32 offsetZ, bool applySubgridOrder = true)
|
||||
{
|
||||
/*if (applySubgridOrder && data.subgridOrderMap)
|
||||
{
|
||||
if (index.w < 0 || data.subgridOrderMap[index.w] < 0 || data.subgridOrderMap[index.w] >= data.mNumSubgridsInUse[0])
|
||||
printf("problem\n");
|
||||
}*/
|
||||
|
||||
return getIndex(data.mSubgridNeighbors, data.mGridParams, index.x + offsetX, index.y + offsetY, index.z + offsetZ, index.w, applySubgridOrder ? data.mSubgridOrderMap : NULL);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxU32 getCellIndexSafe(PxSparseGridData& data, const int4& index, PxI32 offsetX, PxI32 offsetY, PxI32 offsetZ, bool applySubgridOrder = true)
|
||||
{
|
||||
return getCellIndex(data, index, offsetX, offsetY, offsetZ, applySubgridOrder);
|
||||
}
|
||||
|
||||
//Assumes that 0.0 is a valid value for access outside of the grid
|
||||
PX_FORCE_INLINE __device__ PxReal getGridValue(PxSparseGridData& data, const PxReal* dataSource, const int4& index, PxI32 offsetX, PxI32 offsetY, PxI32 offsetZ)
|
||||
{
|
||||
return getGridValue(data.mSubgridNeighbors, dataSource, data.mGridParams, index.x + offsetX, index.y + offsetY, index.z + offsetZ, index.w, data.mSubgridOrderMap);
|
||||
}
|
||||
|
||||
//Assumes that 0.0 is a valid value for access outside of the grid
|
||||
PX_FORCE_INLINE __device__ PxReal getGridValueSafe(PxSparseGridData& data, const PxReal* dataSource, const int4& index, PxI32 offsetX, PxI32 offsetY, PxI32 offsetZ)
|
||||
{
|
||||
return getGridValue(data, dataSource, index, offsetX, offsetY, offsetZ);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ bool outOfRange(PxSparseGridData& data, const int threadIndex)
|
||||
{
|
||||
const PxSparseGridParams& sparseGridParams = data.mGridParams;
|
||||
return threadIndex >= sparseGridParams.maxNumSubgrids * sparseGridParams.subgridSizeX * sparseGridParams.subgridSizeY * sparseGridParams.subgridSizeZ;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ bool outOfActiveCells(PxSparseGridData& data, const int threadIndex)
|
||||
{
|
||||
const PxSparseGridParams& sparseGridParams = data.mGridParams;
|
||||
return threadIndex >= data.mNumSubgridsInUse[0] * sparseGridParams.subgridSizeX * sparseGridParams.subgridSizeY * sparseGridParams.subgridSizeZ;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ bool outOfBounds(PxSparseGridData& data, const int4& index)
|
||||
{
|
||||
return index.w >= data.mNumSubgridsInUse[0] || index.w >= data.mGridParams.maxNumSubgrids || index.w == OUT_OF_BOUNDS;
|
||||
//return data.subgridMask[index.w] == SUBGRID_INACTIVE;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ bool isLastCell(PxSparseGridData& data, const int threadIndex)
|
||||
{
|
||||
const PxSparseGridParams& sparseGridParams = data.mGridParams;
|
||||
return threadIndex == sparseGridParams.maxNumSubgrids * sparseGridParams.subgridSizeX * sparseGridParams.subgridSizeY * sparseGridParams.subgridSizeZ - 1;
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE __device__ PxVec3 getLocation(PxSparseGridData& data, const int4& index)
|
||||
{
|
||||
const PxSparseGridParams& sparseGridParams = data.mGridParams;
|
||||
const PxU32 hash = data.mUniqueHashkeyPerSubgrid[index.w];
|
||||
return getLocationFromHashkey(hash, sparseGridParams, index);
|
||||
}
|
||||
|
||||
//This will transform p to cell local coordinates
|
||||
PX_FORCE_INLINE __device__ int4 getCellIndexFromParticleAndTransformToLocalCoordinates(PxSparseGridData& data, PxVec3& p)
|
||||
{
|
||||
return getCellIndexFromPosition(p, data.mGridParams, data.mUniqueHashkeyPerSubgrid, data.mNumSubgridsInUse);
|
||||
}
|
||||
|
||||
1384
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/updateBodiesAndShapes.cu
vendored
Normal file
1384
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/updateBodiesAndShapes.cu
vendored
Normal file
File diff suppressed because it is too large
Load Diff
428
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/updateTransformAndBoundArray.cu
vendored
Normal file
428
engine/third_party/physx/source/gpusimulationcontroller/src/CUDA/updateTransformAndBoundArray.cu
vendored
Normal file
@@ -0,0 +1,428 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "foundation/PxPreprocessor.h"
|
||||
|
||||
#include "PxgCommonDefines.h"
|
||||
#include "PxgSimulationCoreKernelIndices.h"
|
||||
#include "PxgSimulationCoreDesc.h"
|
||||
#include "PxgSolverBody.h"
|
||||
#include "PxvDynamics.h"
|
||||
#include "PxgShapeSim.h"
|
||||
#include "PxgBodySim.h"
|
||||
#include "cutil_math.h"
|
||||
#include "reduction.cuh"
|
||||
#include "updateCacheAndBound.cuh"
|
||||
#include "PxsRigidBody.h"
|
||||
#include "PxgArticulation.h"
|
||||
#include "PxgAggregate.h"
|
||||
#include "PxgAABBManager.h"
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
|
||||
using namespace physx;
|
||||
|
||||
extern "C" __host__ void initSimulationControllerKernels1() {}
|
||||
|
||||
extern "C" __global__ void mergeTransformCacheAndBoundArrayChanges(
|
||||
PxBounds3* PX_RESTRICT deviceBounds,
|
||||
PxsCachedTransform* PX_RESTRICT deviceTransforms,
|
||||
const PxBounds3* PX_RESTRICT boundsArray,
|
||||
const PxsCachedTransform* PX_RESTRICT transformsArray,
|
||||
const PxBoundTransformUpdate* PX_RESTRICT changes,
|
||||
const PxU32 numChanges
|
||||
) {
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < numChanges) {
|
||||
PxU32 indexTo = changes[idx].indexTo;
|
||||
PxU32 indexFrom = changes[idx].indexFrom & 0x7FFFFFFF;
|
||||
bool isNew = (changes[idx].indexFrom & (1U << 31)) != 0;
|
||||
if (isNew) {
|
||||
deviceBounds[indexTo] = boundsArray[indexFrom];
|
||||
deviceTransforms[indexTo] = transformsArray[indexFrom];
|
||||
} else {
|
||||
deviceBounds[indexTo] = deviceBounds[indexFrom];
|
||||
deviceTransforms[indexTo] = deviceTransforms[indexFrom];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void updateTransformCacheAndBoundArrayLaunch(const PxgSimulationCoreDesc* scDesc)
|
||||
{
|
||||
const PxgSolverBodySleepData* PX_RESTRICT gSleepData = scDesc->mSleepData;
|
||||
|
||||
const PxU32* PX_RESTRICT gBodyDataIndices = scDesc->mBodyDataIndices;
|
||||
const PxgShape* PX_RESTRICT gShapes = scDesc->mShapes;
|
||||
|
||||
const PxgBodySim* PX_RESTRICT gBodySimPool = scDesc->mBodySimBufferDeviceData;
|
||||
|
||||
const PxU32 gNumShapes = scDesc->mNbTotalShapes;
|
||||
const PxgShapeSim* PX_RESTRICT gShapeSimPool = scDesc->mShapeSimsBufferDeviceData;
|
||||
|
||||
const PxgArticulation* PX_RESTRICT gArticulations = scDesc->mArticulationPool;
|
||||
const PxgSolverBodySleepData* PX_RESTRICT gArticulationSleepData = scDesc->mArticulationSleepDataPool;
|
||||
|
||||
PxsCachedTransform* PX_RESTRICT gTransformCache = scDesc->mTransformCache;
|
||||
PxBounds3* PX_RESTRICT gBounds = scDesc->mBounds;
|
||||
|
||||
//Each shape has a corresponding unfrozen element
|
||||
PxU32* PX_RESTRICT frozen = scDesc->mFrozen;
|
||||
PxU32* PX_RESTRICT unfrozen = scDesc->mUnfrozen;
|
||||
//Each shape has a updated element corresponding to the elementIndex
|
||||
PxU32* PX_RESTRICT updated = scDesc->mUpdated;
|
||||
|
||||
//Each body has a corresponding active and deactive element
|
||||
PxU32* PX_RESTRICT active = scDesc->mActivate;
|
||||
PxU32* PX_RESTRICT deactivate = scDesc->mDeactivate;
|
||||
|
||||
const PxU32 idx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
|
||||
for(PxU32 i=idx; i<gNumShapes; i+=blockDim.x * gridDim.x)
|
||||
{
|
||||
const PxgShapeSim& shapeSim = gShapeSimPool[i];
|
||||
|
||||
const PxNodeIndex bodySimNodeIndex = shapeSim.mBodySimIndex; // bodySimIndex is the same as nodeIndex in the IG
|
||||
|
||||
//not static body or deleted shape
|
||||
if (!bodySimNodeIndex.isStaticBody())
|
||||
{
|
||||
const PxU32 elementIndex = i; // this is the transform cache and bound array index
|
||||
const PxU32 bodySimIndex = bodySimNodeIndex.index();
|
||||
//printf("i %i bodySimIndex %i\n", idx, bodySimIndex );
|
||||
|
||||
const PxgBodySim& bodySim = gBodySimPool[bodySimIndex];
|
||||
|
||||
const PxU32 shapeFlags = shapeSim.mShapeFlags;
|
||||
bool isBP = (shapeFlags & PxU32(PxShapeFlag::eSIMULATION_SHAPE | PxShapeFlag::eTRIGGER_SHAPE));
|
||||
|
||||
bool isBPOrSq = (shapeFlags & PxU32(PxShapeFlag::eSIMULATION_SHAPE | PxShapeFlag::eTRIGGER_SHAPE | PxShapeFlag::eSCENE_QUERY_SHAPE));
|
||||
|
||||
if (!bodySimNodeIndex.isArticulation())
|
||||
{
|
||||
const PxU32 activeNodeIndex = gBodyDataIndices[bodySimIndex];
|
||||
|
||||
//if activeNodeIndex is valid, which means this node is active
|
||||
if (activeNodeIndex != 0xFFFFFFFF)
|
||||
{
|
||||
const PxU32 internalFlags = gSleepData[activeNodeIndex].internalFlags;
|
||||
const PxTransform body2World = bodySim.body2World.getTransform();
|
||||
|
||||
if ((internalFlags & PxsRigidBody::eFREEZE_THIS_FRAME) && (internalFlags & PxsRigidBody::eFROZEN))
|
||||
{
|
||||
frozen[i] = 1;
|
||||
gTransformCache[elementIndex].flags = PxsTransformFlag::eFROZEN;
|
||||
}
|
||||
else if (internalFlags & PxsRigidBody::eUNFREEZE_THIS_FRAME)
|
||||
{
|
||||
unfrozen[i] = 1;
|
||||
}
|
||||
|
||||
if (!(internalFlags & PxsRigidBody::eFROZEN) || (internalFlags & PxsRigidBody::eFREEZE_THIS_FRAME))
|
||||
{
|
||||
if (isBP)
|
||||
updated[elementIndex] = 1;
|
||||
|
||||
const PxTransform absPos = getAbsPose(body2World, shapeSim.mTransform, bodySim.body2Actor_maxImpulseW.getTransform());
|
||||
|
||||
updateCacheAndBound(absPos, shapeSim, elementIndex, gTransformCache, gBounds, gShapes, isBPOrSq);
|
||||
}
|
||||
|
||||
if (internalFlags & PxsRigidBody::eACTIVATE_THIS_FRAME)
|
||||
active[bodySimIndex] = 1;
|
||||
else if (internalFlags & PxsRigidBody::eDEACTIVATE_THIS_FRAME)
|
||||
deactivate[bodySimIndex] = 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
//This is articulation
|
||||
const PxU32 articulationId = bodySim.articulationRemapId;
|
||||
const PxgArticulation& articulation = gArticulations[articulationId];
|
||||
const PxgSolverBodySleepData artiSleepData = gArticulationSleepData[articulationId];
|
||||
const PxU32 internalFlags = artiSleepData.internalFlags;
|
||||
|
||||
const PxU32 linkId = bodySimNodeIndex.articulationLinkId();
|
||||
|
||||
const PxTransform body2World = articulation.linkBody2Worlds[linkId];
|
||||
|
||||
if (isBP)
|
||||
updated[elementIndex] = 1;
|
||||
|
||||
const PxTransform body2Actor = articulation.linkBody2Actors[linkId];
|
||||
|
||||
const PxTransform absPos = getAbsPose(body2World, shapeSim.mTransform, body2Actor);
|
||||
|
||||
updateCacheAndBound(absPos, shapeSim, elementIndex, gTransformCache, gBounds, gShapes, isBPOrSq);
|
||||
|
||||
if (internalFlags & PxsRigidBody::eACTIVATE_THIS_FRAME)
|
||||
active[bodySimIndex] = 1;
|
||||
else if (internalFlags & PxsRigidBody::eDEACTIVATE_THIS_FRAME)
|
||||
deactivate[bodySimIndex] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//after updateTransformCacheAndBoundArrayLaunch, we need to update the flags in the transform cache
|
||||
extern "C" __global__ void updateChangedAABBMgrHandlesLaunch(const PxgSimulationCoreDesc* scDesc)
|
||||
{
|
||||
const PxU32 gNumElements = scDesc->mBitMapWordCounts * 32;
|
||||
|
||||
const PxU32* updated = scDesc->mUpdated;
|
||||
|
||||
PxU32* gChangedAABBMgrHandles = scDesc->mChangedAABBMgrHandles;
|
||||
|
||||
const PxU32 idx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
|
||||
const PxU32 threadIndexInWarp = threadIdx.x & (WARP_SIZE -1);
|
||||
|
||||
for (PxU32 i = idx; i<gNumElements; i += blockDim.x * gridDim.x)
|
||||
{
|
||||
const PxU32 updateBit = updated[i];
|
||||
const PxU32 word = __ballot_sync(FULL_MASK, updateBit);
|
||||
|
||||
if(threadIndexInWarp == 0)
|
||||
{
|
||||
gChangedAABBMgrHandles[i/WARP_SIZE] = word;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//This kernel merge direct API updated handle and the CPU API updated handle
|
||||
extern "C" __global__ void mergeChangedAABBMgrHandlesLaunch(const PxgUpdateActorDataDesc* updateActorDesc)
|
||||
{
|
||||
//max number of shapes
|
||||
const PxU32 gNumElements = updateActorDesc->mBitMapWordCounts * 32;
|
||||
|
||||
//This is Direct API changed handles
|
||||
const PxU32* updated = updateActorDesc->mUpdated;
|
||||
|
||||
//This is CPU API changed handles
|
||||
PxU32* gChangedAABBMgrHandles = updateActorDesc->mChangedAABBMgrHandles;
|
||||
|
||||
const PxU32 idx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
|
||||
const PxU32 threadIndexInWarp = threadIdx.x & (WARP_SIZE - 1);
|
||||
|
||||
for (PxU32 i = idx; i < gNumElements; i += blockDim.x * gridDim.x)
|
||||
{
|
||||
const PxU32 updateBit = updated[i];
|
||||
|
||||
const PxU32 word = __ballot_sync(FULL_MASK, updateBit);
|
||||
|
||||
if (threadIndexInWarp == 0)
|
||||
{
|
||||
gChangedAABBMgrHandles[i / WARP_SIZE] |= word;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void computeFrozenAndUnfrozenHistogramLaunch(const PxgSimulationCoreDesc* scDesc)
|
||||
{
|
||||
const PxU32 WARP_PERBLOCK_SIZE = PxgSimulationCoreKernelBlockDim::COMPUTE_FROZEN_UNFROZEN_HISTOGRAM/WARP_SIZE;
|
||||
const PxU32 LOG2_WARP_PERBLOCK_SIZE = 3;
|
||||
|
||||
assert((1 << LOG2_WARP_PERBLOCK_SIZE) == WARP_PERBLOCK_SIZE);
|
||||
|
||||
__shared__ PxU32 sFrozenWarpAccumulator[WARP_PERBLOCK_SIZE];
|
||||
__shared__ PxU32 sUnFrozenWarpAccumulator[WARP_PERBLOCK_SIZE];
|
||||
|
||||
__shared__ PxU32 sFrozenBlockAccumulator;
|
||||
__shared__ PxU32 sUnfrozenBlockAccumulator;
|
||||
|
||||
PxU32* gFrozen = scDesc->mFrozen;
|
||||
PxU32* gUnfrozen = scDesc->mUnfrozen;
|
||||
PxU32* gFrozenBlock = scDesc->mFrozenBlockAndRes;
|
||||
PxU32* gUnfrozenBlock = scDesc->mUnfrozenBlockAndRes;
|
||||
|
||||
const PxU32 gNbTotalShapes = scDesc->mNbTotalShapes;
|
||||
|
||||
const PxU32 nbBlocksRequired = (gNbTotalShapes + blockDim.x-1)/blockDim.x;
|
||||
|
||||
const PxU32 nbIterationsPerBlock = (nbBlocksRequired + gridDim.x-1)/gridDim.x;
|
||||
|
||||
const PxU32 threadIndexInWarp = threadIdx.x & (WARP_SIZE-1);
|
||||
const PxU32 warpIndex = threadIdx.x/(WARP_SIZE);
|
||||
const PxU32 idx = threadIdx.x;
|
||||
|
||||
if(threadIdx.x == 0)
|
||||
{
|
||||
sFrozenBlockAccumulator = 0;
|
||||
sUnfrozenBlockAccumulator = 0;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
for(PxU32 i = 0; i < nbIterationsPerBlock; ++i)
|
||||
{
|
||||
const PxU32 workIndex = i*WARP_SIZE*WARP_PERBLOCK_SIZE + idx + nbIterationsPerBlock * blockIdx.x * blockDim.x;
|
||||
|
||||
//frozen/unfrozen is either 0 or 1
|
||||
PxU32 frozen = 0, unfrozen = 0;
|
||||
if(workIndex < gNbTotalShapes)
|
||||
{
|
||||
frozen = gFrozen[workIndex];
|
||||
unfrozen = gUnfrozen[workIndex];
|
||||
}
|
||||
|
||||
const PxU32 threadMask = (1<<threadIndexInWarp)-1;
|
||||
|
||||
const PxU32 frozenAccumVal = __popc(__ballot_sync(FULL_MASK, frozen)&threadMask);
|
||||
const PxU32 unfrozenAccumVal = __popc(__ballot_sync(FULL_MASK, unfrozen)&threadMask);
|
||||
|
||||
if(threadIndexInWarp == (WARP_SIZE-1))
|
||||
{
|
||||
sFrozenWarpAccumulator[warpIndex] = frozenAccumVal + frozen;
|
||||
sUnFrozenWarpAccumulator[warpIndex] = unfrozenAccumVal + unfrozen;
|
||||
}
|
||||
|
||||
const PxU32 prevFrozenBlockAccumulator = sFrozenBlockAccumulator;
|
||||
const PxU32 prevUnfrozenBlockAccumulator = sUnfrozenBlockAccumulator;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
unsigned mask_idx = __ballot_sync(FULL_MASK, idx < WARP_PERBLOCK_SIZE);
|
||||
if(idx < WARP_PERBLOCK_SIZE)
|
||||
{
|
||||
const PxU32 frozenValue = sFrozenWarpAccumulator[threadIndexInWarp];
|
||||
const PxU32 unfrozenValue = sUnFrozenWarpAccumulator[threadIndexInWarp];
|
||||
const PxU32 frozenOutput = warpScan<AddOpPxU32, PxU32, LOG2_WARP_PERBLOCK_SIZE>(mask_idx, frozenValue) - frozenValue;
|
||||
const PxU32 unfrozenOutput = warpScan<AddOpPxU32, PxU32, LOG2_WARP_PERBLOCK_SIZE>(mask_idx, unfrozenValue) - unfrozenValue;
|
||||
sFrozenWarpAccumulator[threadIndexInWarp] = frozenOutput;
|
||||
sUnFrozenWarpAccumulator[threadIndexInWarp] = unfrozenOutput;
|
||||
//const PxU32 output = warpScanAddWriteToSharedMem<WARP_PERBLOCK_SIZE>(idx, threadIndexInWarp, sWarpAccumulator, value, value);
|
||||
if(threadIndexInWarp == (WARP_PERBLOCK_SIZE-1))
|
||||
{
|
||||
sFrozenBlockAccumulator +=(frozenOutput + frozenValue);
|
||||
sUnfrozenBlockAccumulator +=(unfrozenOutput + unfrozenValue);
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if(workIndex < gNbTotalShapes)
|
||||
{
|
||||
//Now output both histograms...
|
||||
gFrozen[workIndex] = frozenAccumVal + prevFrozenBlockAccumulator + sFrozenWarpAccumulator[warpIndex];
|
||||
gUnfrozen[workIndex] = unfrozenAccumVal + prevUnfrozenBlockAccumulator + sUnFrozenWarpAccumulator[warpIndex];
|
||||
}
|
||||
}
|
||||
|
||||
if(threadIdx.x == 0)
|
||||
{
|
||||
gFrozenBlock[blockIdx.x] = sFrozenBlockAccumulator;
|
||||
gUnfrozenBlock[blockIdx.x] = sUnfrozenBlockAccumulator;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void outputFrozenAndUnfrozenHistogram(PxgSimulationCoreDesc* scDesc)
|
||||
{
|
||||
const PxU32 nbBlocks = PxgSimulationCoreKernelGridDim::OUTPUT_FROZEN_UNFROZEN_HISTOGRAM;
|
||||
PX_COMPILE_TIME_ASSERT(nbBlocks == 32);
|
||||
|
||||
__shared__ PxU32 sFrozenBlockAccum[nbBlocks];
|
||||
__shared__ PxU32 sUnfrozenBlockAccum[nbBlocks];
|
||||
|
||||
const PxU32 idx = threadIdx.x;
|
||||
|
||||
PxU32* gFrozen = scDesc->mFrozen;
|
||||
PxU32* gUnfrozen = scDesc->mUnfrozen;
|
||||
PxU32* gFrozenBlock = scDesc->mFrozenBlockAndRes;
|
||||
PxU32* gUnfrozenBlock = scDesc->mUnfrozenBlockAndRes;
|
||||
|
||||
const PxU32 gNbTotalShapes = scDesc->mNbTotalShapes;
|
||||
const PxU32 globalThreadIndex = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
|
||||
PxU32 frozen = 0;
|
||||
PxU32 frozenOutput = 0;
|
||||
PxU32 unfrozen = 0;
|
||||
PxU32 unfrozenOutput = 0;
|
||||
|
||||
unsigned mask_idx = __ballot_sync(FULL_MASK, idx < nbBlocks);
|
||||
if(idx < nbBlocks)
|
||||
{
|
||||
frozen = gFrozenBlock[idx];
|
||||
frozenOutput = warpScan<AddOpPxU32, PxU32>(mask_idx, frozen) - frozen;
|
||||
sFrozenBlockAccum[idx] = frozenOutput;
|
||||
|
||||
unfrozen = gUnfrozenBlock[idx];
|
||||
unfrozenOutput = warpScan<AddOpPxU32, PxU32>(mask_idx, unfrozen) - unfrozen;
|
||||
sUnfrozenBlockAccum[idx] = unfrozenOutput;
|
||||
}
|
||||
|
||||
if(globalThreadIndex == (nbBlocks-1))
|
||||
{
|
||||
scDesc->mTotalFrozenShapes = frozenOutput + frozen;
|
||||
scDesc->mTotalUnfrozenShapes = unfrozenOutput + unfrozen;
|
||||
}
|
||||
|
||||
const PxU32 totalBlockRequired = (gNbTotalShapes + (blockDim.x-1))/ blockDim.x;
|
||||
|
||||
const PxU32 numIterationPerBlock = (totalBlockRequired + (nbBlocks-1))/ nbBlocks;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
const PxU32 frozenBlockAccum = sFrozenBlockAccum[blockIdx.x];
|
||||
const PxU32 unfrozenBlockAccum = sUnfrozenBlockAccum[blockIdx.x];
|
||||
|
||||
for(PxU32 i=0; i<numIterationPerBlock; ++i)
|
||||
{
|
||||
const PxU32 workIndex = i * blockDim.x + idx + numIterationPerBlock * blockIdx.x * blockDim.x;
|
||||
|
||||
if(workIndex < gNbTotalShapes)
|
||||
{
|
||||
gFrozen[workIndex] = gFrozen[workIndex] + frozenBlockAccum;
|
||||
gUnfrozen[workIndex] = gUnfrozen[workIndex] + unfrozenBlockAccum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void createFrozenAndUnfrozenArray(const PxgSimulationCoreDesc* scDesc)
|
||||
{
|
||||
PxU32* gFrozen = scDesc->mFrozen;
|
||||
PxU32* gUnfrozen = scDesc->mUnfrozen;
|
||||
|
||||
PxU32* gFrozenRes = scDesc->mFrozenBlockAndRes;
|
||||
PxU32* gUnfrozenRes = scDesc->mUnfrozenBlockAndRes;
|
||||
|
||||
const PxU32 gNbTotalShapes = scDesc->mNbTotalShapes;
|
||||
const PxU32 gNbFrozenTotalShapes = scDesc->mTotalFrozenShapes;
|
||||
const PxU32 gNbUnfrozenTotalShapes = scDesc->mTotalUnfrozenShapes;
|
||||
|
||||
const PxU32 idx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
|
||||
for(PxU32 i=idx; i < gNbFrozenTotalShapes; i+= blockDim.x * gridDim.x)
|
||||
{
|
||||
gFrozenRes[i] = binarySearch<PxU32>(gFrozen, gNbTotalShapes, i);
|
||||
}
|
||||
|
||||
for(PxU32 i=idx; i< gNbUnfrozenTotalShapes; i+= blockDim.x * gridDim.x)
|
||||
{
|
||||
gUnfrozenRes[i] = binarySearch<PxU32>(gUnfrozen, gNbTotalShapes, i);
|
||||
}
|
||||
}
|
||||
312
engine/third_party/physx/source/gpusimulationcontroller/src/PxgAlgorithms.cpp
vendored
Normal file
312
engine/third_party/physx/source/gpusimulationcontroller/src/PxgAlgorithms.cpp
vendored
Normal file
@@ -0,0 +1,312 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "PxgAlgorithms.h"
|
||||
#include "PxgAlgorithmsData.h"
|
||||
|
||||
#include "PxParticleGpu.h"
|
||||
|
||||
#include "PxgKernelWrangler.h"
|
||||
#include "PxgKernelIndices.h"
|
||||
#include "PxgCudaMemoryAllocator.h"
|
||||
|
||||
#include "cudamanager/PxCudaContext.h"
|
||||
#include "cudamanager/PxCudaContextManager.h"
|
||||
|
||||
#include "PxgCudaMemoryAllocator.h"
|
||||
#include "foundation/PxErrors.h"
|
||||
#include "foundation/PxFoundation.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
|
||||
void scanPerBlockLaunch(PxgKernelLauncher& launcher, const PxU32* data, PxU32* result, PxU32* partialSums, const PxU32 length, const PxU32 numBlocks,
|
||||
const PxU32 numThreadsPerBlock, CUstream stream, const PxU32 exclusiveScan, PxU32* totalSum)
|
||||
{
|
||||
launcher.launchKernel(PxgKernelIds::scanPerBlockKernel, numBlocks, numThreadsPerBlock, numThreadsPerBlock * sizeof(int) / 32, stream,
|
||||
data, result, partialSums, length, exclusiveScan, totalSum);
|
||||
}
|
||||
|
||||
void addBlockSumsLaunch(PxgKernelLauncher& launcher, const PxU32* partialSums, PxU32* data, const PxU32 length,
|
||||
const PxU32 numBlocks, const PxU32 numThreadsPerBlock, CUstream stream, PxU32* totalSum)
|
||||
{
|
||||
launcher.launchKernel(PxgKernelIds::addBlockSumsKernel, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
partialSums, data, length, totalSum);
|
||||
}
|
||||
|
||||
|
||||
void scanPerBlockLaunch(PxgKernelLauncher& launcher, const PxInt4x4* data, PxInt4x4* result, PxInt4x4* partialSums, const PxU32 length, const PxU32 numBlocks,
|
||||
const PxU32 numThreadsPerBlock, CUstream stream, const PxU32 exclusiveScan, PxInt4x4* totalSum)
|
||||
{
|
||||
launcher.launchKernel(PxgKernelIds::scanPerBlockKernel4x4, numBlocks, numThreadsPerBlock, numThreadsPerBlock * sizeof(PxInt4) / 32, stream,
|
||||
data, result, partialSums, length, exclusiveScan, totalSum);
|
||||
}
|
||||
|
||||
void addBlockSumsLaunch(PxgKernelLauncher& launcher, const PxInt4x4* partialSums, PxInt4x4* data, const PxU32 length,
|
||||
const PxU32 numBlocks, const PxU32 numThreadsPerBlock, CUstream stream, PxInt4x4* totalSum)
|
||||
{
|
||||
launcher.launchKernel(PxgKernelIds::addBlockSumsKernel4x4, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
partialSums, data, length, totalSum);
|
||||
}
|
||||
|
||||
void radixFourBitCountPerBlockLaunch(PxgKernelLauncher& launcher, const PxU32* data, PxU16* offsets, const int passIndex, PxInt4x4* partialSums, const PxU32 length, const PxU32 numBlocks,
|
||||
const PxU32 numThreadsPerBlock, CUstream stream, PxInt4x4* totalSum)
|
||||
{
|
||||
launcher.launchKernel(PxgKernelIds::radixFourBitCountPerBlockKernel, numBlocks, numThreadsPerBlock, numThreadsPerBlock * sizeof(PxInt4x4) / 32, stream,
|
||||
data, offsets, passIndex, partialSums, length, totalSum);
|
||||
}
|
||||
|
||||
void radixFourBitReorderLaunch(PxgKernelLauncher& launcher, const PxU32* data, const PxU16* offsets, PxU32* reordered, PxU32 passIndex, PxInt4x4* partialSums, const PxU32 length, PxInt4x4* cumulativeSum,
|
||||
const PxU32 numBlocks, const PxU32 numThreadsPerBlock, CUstream stream, PxU32* dependentData, PxU32* dependentDataReordered)
|
||||
{
|
||||
launcher.launchKernel(PxgKernelIds::radixFourBitReorderKernel, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
data, offsets, reordered, passIndex, partialSums, length, cumulativeSum, dependentData, dependentDataReordered);
|
||||
}
|
||||
|
||||
void reorderLaunch(PxgKernelLauncher& launcher, const float4* data, float4* reordered, const PxU32 length, const PxU32* reorderedToOriginalMap, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = 512;
|
||||
const PxU32 numBlocks = (length + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::reorderKernel, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
data, reordered, length, reorderedToOriginalMap);
|
||||
}
|
||||
|
||||
|
||||
PX_FORCE_INLINE PxU32 toNextHigherEvenNumber(PxU32 n)
|
||||
{
|
||||
return n + n % 2;
|
||||
}
|
||||
|
||||
static PxU32 computeTmpScanBufferSize(const PxU32 blockSize, const PxU32 n)
|
||||
{
|
||||
if (n > blockSize)
|
||||
{
|
||||
const PxU32 numThreads = blockSize;
|
||||
const PxU32 numBlocks = (n + numThreads - 1) / numThreads;
|
||||
|
||||
return numBlocks + computeTmpScanBufferSize(blockSize, numBlocks);
|
||||
}
|
||||
else
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void computeBlockSum(PxgKernelLauncher& launcher, const T* blockSum, T* blockSumScan, T* blockSumBlockSum, T* result,
|
||||
const PxU32 blockSize, const PxU32 n, const PxU32 numBlocks, const CUstream& stream, T* totalSum = NULL)
|
||||
{
|
||||
PxU32 numThreads2 = blockSize;
|
||||
PxU32 numBlocks2 = (numBlocks + numThreads2 - 1) / numThreads2;
|
||||
T* tmp = NULL;
|
||||
scanPerBlockLaunch(launcher, blockSum, blockSumScan, blockSumBlockSum, numBlocks, numBlocks2, numThreads2, stream, 1, tmp);
|
||||
|
||||
if (numBlocks2 > 1)
|
||||
{
|
||||
computeBlockSum<T>(launcher,
|
||||
blockSumBlockSum,
|
||||
blockSumScan + numBlocks,
|
||||
blockSumBlockSum + numBlocks2,
|
||||
blockSumScan,
|
||||
blockSize,
|
||||
numBlocks,
|
||||
numBlocks2,
|
||||
stream);
|
||||
}
|
||||
|
||||
numThreads2 = blockSize;
|
||||
numBlocks2 = (n + numThreads2 - 1) / numThreads2;
|
||||
addBlockSumsLaunch(launcher, blockSumScan, result, n, numBlocks2, numThreads2, stream, totalSum);
|
||||
}
|
||||
|
||||
PX_FORCE_INLINE PxU32 getBits(PxU32 value, PxU32 numBits)
|
||||
{
|
||||
return value & ((1 << numBits) - 1);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void PxGpuRadixSort<T>::sort(T* inAndOutBuf, PxU32 numBitsToSort, const CUstream& stream, PxU32* outReorderTrackingBuffer, PxU32 numElementsToSort)
|
||||
{
|
||||
if (!mValueReorderBuffer && outReorderTrackingBuffer)
|
||||
{
|
||||
mValueReorderBuffer = PX_DEVICE_MEMORY_ALLOC(PxU32, *mKernelLauncher->getCudaContextManager(), mNumElements);
|
||||
if (!mValueReorderBuffer)
|
||||
{
|
||||
// the allocation above can fail, and if we just continue we're getting a mess with the pointers because of the swap below.
|
||||
PxGetFoundation().error(PxErrorCode::eINTERNAL_ERROR, PX_FL, "PxGpuRadixSort: failed to allocate reorder buffer, aborting sort!\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
PxU32 numActiveElements = numElementsToSort != 0xFFFFFFFF ? PxU32(numElementsToSort) : mNumElements;
|
||||
if (numActiveElements == 0)
|
||||
return;
|
||||
|
||||
PxInt4x4* blockSumsBuf = mTempBlockSumsGpuPtr;
|
||||
PxInt4x4* blockSumScanBuf = mTempBlockSumScanGpuPtr;
|
||||
|
||||
PxU32 numPasses = toNextHigherEvenNumber((numBitsToSort + 3) / 4);
|
||||
PX_ASSERT(numPasses % 2 == 0);
|
||||
|
||||
PxU32 numBlocks = (numActiveElements + mNumThreadsPerBlock - 1) / mNumThreadsPerBlock;
|
||||
for (PxU32 i = 0; i < numPasses; ++i)
|
||||
{
|
||||
radixFourBitCountPerBlockLaunch(*mKernelLauncher, inAndOutBuf, mOffsetBuffer, i, blockSumsBuf, numActiveElements, numBlocks, mNumThreadsPerBlock, stream, mTotalSum);
|
||||
|
||||
computeBlockSum<PxInt4x4>(*mKernelLauncher, blockSumsBuf, blockSumScanBuf, blockSumsBuf + numBlocks, NULL, mNumThreadsPerBlock, numActiveElements, numBlocks, stream, mTotalSum);
|
||||
|
||||
radixFourBitReorderLaunch(*mKernelLauncher, inAndOutBuf, mOffsetBuffer, mReorderBuffer, i, blockSumScanBuf, numActiveElements, mTotalSum, numBlocks, mNumThreadsPerBlock, stream, outReorderTrackingBuffer, mValueReorderBuffer);
|
||||
|
||||
PxSwap(inAndOutBuf, mReorderBuffer);
|
||||
if (outReorderTrackingBuffer)
|
||||
PxSwap(outReorderTrackingBuffer, mValueReorderBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool PxGpuRadixSort<T>::initialize(PxgKernelLauncher* kernelLauncher, PxU32 numElements, PxU32 numThreadsPerBlock)
|
||||
{
|
||||
if (mTempBlockSumsGpuPtr)
|
||||
return false;
|
||||
|
||||
PX_ASSERT(numThreadsPerBlock <= 1024);
|
||||
PX_ASSERT(numThreadsPerBlock >= 32);
|
||||
|
||||
mKernelLauncher = kernelLauncher;
|
||||
|
||||
mNumThreadsPerBlock = numThreadsPerBlock;
|
||||
mNumElements = numElements;
|
||||
|
||||
mTempBufferSize = computeTmpScanBufferSize(numThreadsPerBlock, numElements);
|
||||
mTempBlockSumsGpuPtr = PX_DEVICE_MEMORY_ALLOC(PxInt4x4, *kernelLauncher->getCudaContextManager(), mTempBufferSize);
|
||||
mTempBlockSumScanGpuPtr = PX_DEVICE_MEMORY_ALLOC(PxInt4x4, *kernelLauncher->getCudaContextManager(), mTempBufferSize);
|
||||
mTotalSum = PX_DEVICE_MEMORY_ALLOC(PxInt4x4, *kernelLauncher->getCudaContextManager(), 1);
|
||||
|
||||
mReorderBuffer = PX_DEVICE_MEMORY_ALLOC(T, *kernelLauncher->getCudaContextManager(), numElements);
|
||||
mOffsetBuffer = PX_DEVICE_MEMORY_ALLOC(PxU16, *kernelLauncher->getCudaContextManager(), numElements);
|
||||
|
||||
return true;
|
||||
}
|
||||
template<typename T>
|
||||
PxGpuRadixSort<T>::PxGpuRadixSort(PxgKernelLauncher* kernelLauncher, PxU32 numElements, PxU32 numThreadsPerBlock) : mValueReorderBuffer(NULL)
|
||||
{
|
||||
initialize(kernelLauncher, numElements, numThreadsPerBlock);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool PxGpuRadixSort<T>::release()
|
||||
{
|
||||
if (!mTempBlockSumsGpuPtr)
|
||||
return false;
|
||||
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher->getCudaContextManager(), mTempBlockSumsGpuPtr);
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher->getCudaContextManager(), mTempBlockSumScanGpuPtr);
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher->getCudaContextManager(), mTotalSum);
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher->getCudaContextManager(), mReorderBuffer);
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher->getCudaContextManager(), mOffsetBuffer);
|
||||
|
||||
if (mValueReorderBuffer)
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher->getCudaContextManager(), mValueReorderBuffer);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void PxGpuScan::scan(PxU32* inAndOutBuf, PxU32 exclusiveScan, const CUstream& stream, PxU32 numElementsToScan)
|
||||
{
|
||||
PxU32 numActiveElements = numElementsToScan != 0xFFFFFFFF ? PxU32(numElementsToScan) : mNumElements;
|
||||
if (numActiveElements == 0)
|
||||
return;
|
||||
|
||||
PxU32* blockSumsBuf = mTempBlockSumsGpuPtr;
|
||||
PxU32* blockSumScanBuf = mTempBlockSumScanGpuPtr;
|
||||
|
||||
PxU32 numBlocks = (numActiveElements + mNumThreadsPerBlock - 1) / mNumThreadsPerBlock;
|
||||
scanPerBlockLaunch(*mKernelLauncher, inAndOutBuf, inAndOutBuf, blockSumsBuf, numActiveElements, numBlocks, mNumThreadsPerBlock, stream, exclusiveScan, mTotalSum);
|
||||
|
||||
if (numBlocks > 1)
|
||||
computeBlockSum(*mKernelLauncher, blockSumsBuf, blockSumScanBuf, blockSumsBuf + numBlocks, inAndOutBuf, mNumThreadsPerBlock, numActiveElements, numBlocks, stream, mTotalSum);
|
||||
}
|
||||
|
||||
void PxGpuScan::sumOnly(PxU32* inBuf, const CUstream& stream, PxU32 numElementsToScan)
|
||||
{
|
||||
PxU32 numActiveElements = numElementsToScan != 0xFFFFFFFF ? PxU32(numElementsToScan) : mNumElements;
|
||||
if (numActiveElements == 0)
|
||||
return;
|
||||
|
||||
PxU32* blockSumsBuf = mTempBlockSumsGpuPtr;
|
||||
PxU32* blockSumScanBuf = mTempBlockSumScanGpuPtr;
|
||||
|
||||
PxU32 numBlocks = (numActiveElements + mNumThreadsPerBlock - 1) / mNumThreadsPerBlock;
|
||||
PxU32 exclusiveScan = 1;
|
||||
scanPerBlockLaunch(*mKernelLauncher, inBuf, NULL, blockSumsBuf, numActiveElements, numBlocks, mNumThreadsPerBlock, stream, exclusiveScan, mTotalSum);
|
||||
|
||||
if (numBlocks > 1)
|
||||
computeBlockSum<PxU32>(*mKernelLauncher, blockSumsBuf, blockSumScanBuf, blockSumsBuf + numBlocks, NULL, mNumThreadsPerBlock, numActiveElements, numBlocks, stream, mTotalSum);
|
||||
}
|
||||
|
||||
bool PxGpuScan::initialize(PxgKernelLauncher* kernelLauncher, PxU32 numElements, PxU32 numThreadsPerBlock)
|
||||
{
|
||||
if (mTempBlockSumsGpuPtr)
|
||||
return false;
|
||||
|
||||
PX_ASSERT(numThreadsPerBlock <= 1024);
|
||||
PX_ASSERT(numThreadsPerBlock >= 32);
|
||||
|
||||
mKernelLauncher = kernelLauncher;
|
||||
|
||||
mNumThreadsPerBlock = numThreadsPerBlock;
|
||||
mNumElements = numElements;
|
||||
|
||||
mTempBufferSize = computeTmpScanBufferSize(numThreadsPerBlock, numElements);
|
||||
mTempBlockSumsGpuPtr = PX_DEVICE_MEMORY_ALLOC(PxU32, *kernelLauncher->getCudaContextManager(), mTempBufferSize);
|
||||
mTempBlockSumScanGpuPtr = PX_DEVICE_MEMORY_ALLOC(PxU32, *kernelLauncher->getCudaContextManager(), mTempBufferSize);
|
||||
mTotalSum = PX_DEVICE_MEMORY_ALLOC(PxU32, *kernelLauncher->getCudaContextManager(), 1);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
PxGpuScan::PxGpuScan(PxgKernelLauncher* cudaContextManager, PxU32 numElements, PxU32 numThreadsPerBlock)
|
||||
: mTempBlockSumsGpuPtr(NULL), mTempBlockSumScanGpuPtr(NULL), mTotalSum(NULL)
|
||||
{
|
||||
initialize(cudaContextManager, numElements, numThreadsPerBlock);
|
||||
}
|
||||
|
||||
bool PxGpuScan::release()
|
||||
{
|
||||
if (!mTempBlockSumsGpuPtr)
|
||||
return false;
|
||||
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher->getCudaContextManager(), mTempBlockSumsGpuPtr);
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher->getCudaContextManager(), mTempBlockSumScanGpuPtr);
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher->getCudaContextManager(), mTotalSum);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template class PxGpuRadixSort<PxU32>;
|
||||
}
|
||||
201
engine/third_party/physx/source/gpusimulationcontroller/src/PxgAnisotropy.cpp
vendored
Normal file
201
engine/third_party/physx/source/gpusimulationcontroller/src/PxgAnisotropy.cpp
vendored
Normal file
@@ -0,0 +1,201 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "PxgAnisotropy.h"
|
||||
|
||||
|
||||
|
||||
#include "foundation/PxUserAllocated.h"
|
||||
#include "foundation/PxHashSet.h"
|
||||
|
||||
#include "PxPhysics.h"
|
||||
#include "PxParticleSystem.h"
|
||||
#include "PxParticleGpu.h"
|
||||
#include "PxPhysXGpu.h"
|
||||
#include "PxvGlobals.h"
|
||||
|
||||
#include "PxgParticleNeighborhoodProvider.h"
|
||||
#include "PxgKernelIndices.h"
|
||||
#include "PxgAlgorithms.h"
|
||||
#include "PxgSparseGridStandalone.h"
|
||||
#include "PxgAnisotropyData.h"
|
||||
#include "PxgCudaMemoryAllocator.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
|
||||
#if ENABLE_KERNEL_LAUNCH_ERROR_CHECK
|
||||
#define checkCudaError() { cudaError_t err = cudaDeviceSynchronize(); if (err != 0) printf("Cuda error file: %s, line: %i, error: %i\n", PX_FL, err); }
|
||||
#else
|
||||
#define checkCudaError() { }
|
||||
#endif
|
||||
|
||||
void updateAnisotropy(PxgKernelLauncher& launcher, PxGpuParticleSystem* particleSystems, const PxU32 id, PxAnisotropyData* anisotropyDataPerParticleSystem, PxU32 numParticles,
|
||||
CUstream stream, PxU32 numThreadsPerBlock = 256)
|
||||
{
|
||||
const PxU32 numBlocks = (numParticles + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::calculateAnisotropyLaunch, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
particleSystems, id, anisotropyDataPerParticleSystem);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void anisotropyLaunch(PxgKernelLauncher& launcher, float4* deviceParticlePos, PxU32* sortedToOriginalParticleIndex, PxU32* sortedParticleToSubgrid, PxU32 maxNumSubgrids,
|
||||
PxU32* subgridNeighbors, PxU32* subgridEndIndices, int numParticles, PxU32* phases, PxU32 validPhaseMask,
|
||||
float4* q1, float4* q2, float4* q3, PxReal anisotropy, PxReal anisotropyMin, PxReal anisotropyMax, PxReal particleContactDistance, CUstream stream, PxU32 numThreadsPerBlock = 256)
|
||||
{
|
||||
const PxU32 numBlocks = (numParticles + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::anisotropyKernel, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
deviceParticlePos, sortedToOriginalParticleIndex, sortedParticleToSubgrid, maxNumSubgrids,
|
||||
subgridNeighbors, subgridEndIndices, numParticles, phases, validPhaseMask, q1, q2, q3, anisotropy, anisotropyMin, anisotropyMax, particleContactDistance);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void PxgAnisotropyGenerator::releaseGPUAnisotropyBuffers()
|
||||
{
|
||||
if (!mAnisotropyDataHost.mAnisotropy_q1)
|
||||
return;
|
||||
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher.getCudaContextManager(), mAnisotropyDataHost.mAnisotropy_q1);
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher.getCudaContextManager(), mAnisotropyDataHost.mAnisotropy_q2);
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher.getCudaContextManager(), mAnisotropyDataHost.mAnisotropy_q3);
|
||||
mOwnsAnisotropyGPUBuffers = false;
|
||||
}
|
||||
|
||||
void PxgAnisotropyGenerator::allocateGPUAnisotropyBuffers()
|
||||
{
|
||||
if (mAnisotropyDataHost.mAnisotropy_q1)
|
||||
return;
|
||||
|
||||
mAnisotropyDataHost.mAnisotropy_q1 = PX_DEVICE_MEMORY_ALLOC(PxVec4, *mKernelLauncher.getCudaContextManager(), mNumParticles);
|
||||
mAnisotropyDataHost.mAnisotropy_q2 = PX_DEVICE_MEMORY_ALLOC(PxVec4, *mKernelLauncher.getCudaContextManager(), mNumParticles);
|
||||
mAnisotropyDataHost.mAnisotropy_q3 = PX_DEVICE_MEMORY_ALLOC(PxVec4, *mKernelLauncher.getCudaContextManager(), mNumParticles);
|
||||
mOwnsAnisotropyGPUBuffers = true;
|
||||
}
|
||||
|
||||
PxgAnisotropyGenerator::PxgAnisotropyGenerator(PxgKernelLauncher& cudaContextManager, PxU32 maxNumParticles, PxReal anisotropyScale, PxReal minAnisotropy, PxReal maxAnisotropy)
|
||||
: mAnisotropy1(NULL), mAnisotropy2(NULL), mAnisotropy3(NULL), mEnabled(true)
|
||||
{
|
||||
mAnisotropyDataHost.mAnisotropy_q1 = NULL;
|
||||
mAnisotropyDataHost.mAnisotropy_q2 = NULL;
|
||||
mAnisotropyDataHost.mAnisotropy_q3 = NULL;
|
||||
mKernelLauncher = cudaContextManager;
|
||||
mNumParticles = maxNumParticles;
|
||||
mAnisotropyDataPerParticleSystemDevice = PX_DEVICE_MEMORY_ALLOC(PxAnisotropyData, *mKernelLauncher.getCudaContextManager(), 1);
|
||||
|
||||
mAnisotropyDataHost.mAnisotropy = anisotropyScale;
|
||||
mAnisotropyDataHost.mAnisotropyMin = minAnisotropy;
|
||||
mAnisotropyDataHost.mAnisotropyMax = maxAnisotropy;
|
||||
mDirty = true;
|
||||
mOwnsAnisotropyGPUBuffers = false;
|
||||
}
|
||||
|
||||
void PxgAnisotropyGenerator::release()
|
||||
{
|
||||
if (!mAnisotropyDataPerParticleSystemDevice)
|
||||
return;
|
||||
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher.getCudaContextManager(), mAnisotropyDataPerParticleSystemDevice);
|
||||
if (mOwnsAnisotropyGPUBuffers)
|
||||
releaseGPUAnisotropyBuffers();
|
||||
|
||||
PX_DELETE_THIS;
|
||||
}
|
||||
|
||||
void PxgAnisotropyGenerator::setResultBufferHost(PxVec4* anisotropy1, PxVec4* anisotropy2, PxVec4* anisotropy3)
|
||||
{
|
||||
mAnisotropy1 = anisotropy1;
|
||||
mAnisotropy2 = anisotropy2;
|
||||
mAnisotropy3 = anisotropy3;
|
||||
allocateGPUAnisotropyBuffers();
|
||||
mDirty = true;
|
||||
}
|
||||
|
||||
void PxgAnisotropyGenerator::setResultBufferDevice(PxVec4* anisotropy1, PxVec4* anisotropy2, PxVec4* anisotropy3)
|
||||
{
|
||||
if (mOwnsAnisotropyGPUBuffers)
|
||||
releaseGPUAnisotropyBuffers();
|
||||
mAnisotropyDataHost.mAnisotropy_q1 = anisotropy1;
|
||||
mAnisotropyDataHost.mAnisotropy_q2 = anisotropy2;
|
||||
mAnisotropyDataHost.mAnisotropy_q3 = anisotropy3;
|
||||
mDirty = true;
|
||||
mAnisotropy1 = NULL;
|
||||
mAnisotropy2 = NULL;
|
||||
mAnisotropy3 = NULL;
|
||||
}
|
||||
|
||||
void PxgAnisotropyGenerator::generateAnisotropy(PxGpuParticleSystem* gpuParticleSystem, PxU32 numParticles, CUstream stream)
|
||||
{
|
||||
if (mDirty)
|
||||
{
|
||||
mDirty = false;
|
||||
mKernelLauncher.getCudaContextManager()->getCudaContext()->memcpyHtoDAsync(CUdeviceptr(mAnisotropyDataPerParticleSystemDevice), &mAnisotropyDataHost, sizeof(PxAnisotropyData), stream);
|
||||
}
|
||||
|
||||
updateAnisotropy(mKernelLauncher, gpuParticleSystem, 0, mAnisotropyDataPerParticleSystemDevice, numParticles, stream);
|
||||
|
||||
if (mAnisotropy1)
|
||||
{
|
||||
mKernelLauncher.getCudaContextManager()->getCudaContext()->memcpyDtoHAsync(mAnisotropy1, CUdeviceptr(mAnisotropyDataHost.mAnisotropy_q1), numParticles * sizeof(PxVec4), stream);
|
||||
mKernelLauncher.getCudaContextManager()->getCudaContext()->memcpyDtoHAsync(mAnisotropy2, CUdeviceptr(mAnisotropyDataHost.mAnisotropy_q2), numParticles * sizeof(PxVec4), stream);
|
||||
mKernelLauncher.getCudaContextManager()->getCudaContext()->memcpyDtoHAsync(mAnisotropy3, CUdeviceptr(mAnisotropyDataHost.mAnisotropy_q3), numParticles * sizeof(PxVec4), stream);
|
||||
}
|
||||
}
|
||||
|
||||
void PxgAnisotropyGenerator::generateAnisotropy(PxVec4* particlePositionsGpu, PxParticleNeighborhoodProvider& neighborhoodProvider, PxU32 numParticles, PxReal particleContactOffset, CUstream stream)
|
||||
{
|
||||
PxgParticleNeighborhoodProvider* n = static_cast<PxgParticleNeighborhoodProvider*>(&neighborhoodProvider);
|
||||
|
||||
anisotropyLaunch(mKernelLauncher, reinterpret_cast<float4*>(particlePositionsGpu), n->mSparseGridBuilder.getSortedToOriginalParticleIndex(),
|
||||
n->mSparseGridBuilder.getSortedParticleToSubgrid(), n->mSparseGridBuilder.getGridParameters().maxNumSubgrids,
|
||||
n->mSparseGridBuilder.getSubgridNeighborLookup(), n->getSubgridEndIndicesBuffer(), numParticles, NULL, 0, reinterpret_cast<float4*>(mAnisotropyDataHost.mAnisotropy_q1),
|
||||
reinterpret_cast<float4*>(mAnisotropyDataHost.mAnisotropy_q2), reinterpret_cast<float4*>(mAnisotropyDataHost.mAnisotropy_q3),
|
||||
mAnisotropyDataHost.mAnisotropy, mAnisotropyDataHost.mAnisotropyMin, mAnisotropyDataHost.mAnisotropyMax, 2 * particleContactOffset, stream);
|
||||
|
||||
if (mAnisotropy1)
|
||||
{
|
||||
mKernelLauncher.getCudaContextManager()->getCudaContext()->memcpyDtoHAsync(mAnisotropy1, CUdeviceptr(mAnisotropyDataHost.mAnisotropy_q1), numParticles * sizeof(PxVec4), stream);
|
||||
mKernelLauncher.getCudaContextManager()->getCudaContext()->memcpyDtoHAsync(mAnisotropy2, CUdeviceptr(mAnisotropyDataHost.mAnisotropy_q2), numParticles * sizeof(PxVec4), stream);
|
||||
mKernelLauncher.getCudaContextManager()->getCudaContext()->memcpyDtoHAsync(mAnisotropy3, CUdeviceptr(mAnisotropyDataHost.mAnisotropy_q3), numParticles * sizeof(PxVec4), stream);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void PxgAnisotropyGenerator::setMaxParticles(PxU32 maxParticles)
|
||||
{
|
||||
if (maxParticles == mNumParticles)
|
||||
return;
|
||||
|
||||
mNumParticles = maxParticles;
|
||||
|
||||
if (!mOwnsAnisotropyGPUBuffers)
|
||||
return;
|
||||
|
||||
releaseGPUAnisotropyBuffers();
|
||||
allocateGPUAnisotropyBuffers();
|
||||
}
|
||||
}
|
||||
60
engine/third_party/physx/source/gpusimulationcontroller/src/PxgArrayConverter.cpp
vendored
Normal file
60
engine/third_party/physx/source/gpusimulationcontroller/src/PxgArrayConverter.cpp
vendored
Normal file
@@ -0,0 +1,60 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "PxgArrayConverter.h"
|
||||
|
||||
#include "foundation/PxUserAllocated.h"
|
||||
|
||||
#include "PxPhysXGpu.h"
|
||||
#include "PxgKernelWrangler.h"
|
||||
#include "PxgKernelIndices.h"
|
||||
#include "PxgInterpolation.h"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
#if ENABLE_KERNEL_LAUNCH_ERROR_CHECK
|
||||
#define checkCudaError() { cudaError_t err = cudaDeviceSynchronize(); if (err != 0) printf("Cuda error file: %s, line: %i, error: %i\n", PX_FL, err); }
|
||||
#else
|
||||
#define checkCudaError() { }
|
||||
#endif
|
||||
|
||||
#define THREADS_PER_BLOCK 256
|
||||
|
||||
PxgArrayConverter::PxgArrayConverter(PxgKernelLauncher& kernelLauncher)
|
||||
{
|
||||
mKernelLauncher = kernelLauncher;
|
||||
}
|
||||
|
||||
void PxgArrayConverter::interleaveGpuBuffers(const PxVec4* vertices, const PxVec4* normals, PxU32 length, PxVec3* interleavedResultBuffer, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (length + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
mKernelLauncher.launchKernel(PxgKernelIds::util_InterleaveBuffers, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
vertices, normals, length, interleavedResultBuffer);
|
||||
checkCudaError();
|
||||
}
|
||||
845
engine/third_party/physx/source/gpusimulationcontroller/src/PxgBodySimManager.cpp
vendored
Normal file
845
engine/third_party/physx/source/gpusimulationcontroller/src/PxgBodySimManager.cpp
vendored
Normal file
@@ -0,0 +1,845 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
|
||||
#include "PxgBodySimManager.h"
|
||||
#include "PxvDynamics.h"
|
||||
#include "PxsRigidBody.h"
|
||||
#include "PxgBodySim.h"
|
||||
#include "foundation/PxFPU.h"
|
||||
#include "foundation/PxAtomic.h"
|
||||
#include "DyFeatherstoneArticulation.h"
|
||||
#include "DyDeformableSurface.h"
|
||||
#include "DyDeformableVolume.h"
|
||||
#include "DyParticleSystem.h"
|
||||
|
||||
#define BODY_SIM_VALIDATE 0
|
||||
|
||||
using namespace physx;
|
||||
|
||||
void PxgBodySimManager::addBody(PxsRigidBody* rigidBody, const PxU32 nodeIndex)
|
||||
{
|
||||
if (mUpdatedMap.boundedTest(nodeIndex))
|
||||
return;
|
||||
|
||||
if (mBodies.capacity() <= nodeIndex)
|
||||
{
|
||||
mBodies.resize(2 * nodeIndex + 1);
|
||||
}
|
||||
|
||||
mBodies[nodeIndex] = reinterpret_cast<void*>(rigidBody);
|
||||
|
||||
mUpdatedMap.growAndSet(nodeIndex);
|
||||
mNewOrUpdatedBodySims.pushBack(nodeIndex);
|
||||
mTotalNumBodies = PxMax(mTotalNumBodies, nodeIndex + 1);
|
||||
|
||||
mStaticConstraints.reserve(mTotalNumBodies);
|
||||
mStaticConstraints.resize(mTotalNumBodies);
|
||||
|
||||
PxgStaticConstraints& constraints = mStaticConstraints[nodeIndex];
|
||||
constraints.mStaticContacts.forceSize_Unsafe(0);
|
||||
constraints.mStaticJoints.forceSize_Unsafe(0);
|
||||
|
||||
// raise first DMA to GPU flag so we can handle body updates correctly with direct GPU API
|
||||
rigidBody->mInternalFlags |= PxsRigidBody::eFIRST_BODY_COPY_GPU;
|
||||
|
||||
#if BODY_SIM_VALIDATE
|
||||
else
|
||||
{
|
||||
bool found = false;
|
||||
for (PxU32 i = 0; i < mNewBodySims.size(); ++i)
|
||||
{
|
||||
if (mNewBodySims[i] == nodeIndex)
|
||||
{
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
PX_ASSERT(found);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
PxgBodySimManager::~PxgBodySimManager()
|
||||
{
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////articulation
|
||||
void PxgBodySimManager::addArticulation(Dy::FeatherstoneArticulation* articulation, const PxU32 nodeIndex, bool OmniPVDRecordDirectGPUAPI)
|
||||
{
|
||||
if (mUpdatedMap.boundedTest(nodeIndex))
|
||||
return;
|
||||
|
||||
if (mBodies.capacity() <= nodeIndex)
|
||||
{
|
||||
mBodies.resize(2 * nodeIndex + 1);
|
||||
}
|
||||
|
||||
mBodies[nodeIndex] = reinterpret_cast<void*>(articulation);
|
||||
|
||||
mUpdatedMap.growAndSet(nodeIndex);
|
||||
PxgArticulationIndices index;
|
||||
index.nodeIndex = nodeIndex;
|
||||
index.remapIndex = mArticulationIdPool.getNewID();
|
||||
|
||||
Dy::ArticulationCore* core = articulation->getCore();
|
||||
core->gpuRemapIndex = index.remapIndex;
|
||||
|
||||
mNewArticulationSims.pushBack(index);
|
||||
//Mark as in dirty list so that it doesn't appear in update list!
|
||||
articulation->mGPUDirtyFlags |= Dy::ArticulationDirtyFlag::eIN_DIRTY_LIST;
|
||||
|
||||
mNodeToRemapMap.insert(nodeIndex, index.remapIndex);
|
||||
#if PX_SUPPORT_OMNI_PVD
|
||||
if (OmniPVDRecordDirectGPUAPI)
|
||||
{
|
||||
mRemapToNodeMap.insert(index.remapIndex, nodeIndex);
|
||||
}
|
||||
#else
|
||||
PX_UNUSED(OmniPVDRecordDirectGPUAPI);
|
||||
#endif
|
||||
//articulation->setGpuRemapId(index.remapIndex);
|
||||
|
||||
mTotalNumBodies = PxMax(mTotalNumBodies, nodeIndex + 1);
|
||||
mTotalNumArticulations = PxMax(mTotalNumArticulations, index.remapIndex + 1);
|
||||
|
||||
mStaticConstraints.resize(mTotalNumBodies); //Shared between RBs and articulations...
|
||||
mArticulationSelfConstraints.resize(mTotalNumArticulations);
|
||||
|
||||
PxgStaticConstraints& constraints = mStaticConstraints[nodeIndex];
|
||||
constraints.mStaticContacts.forceSize_Unsafe(0);
|
||||
constraints.mStaticContacts.reserve(articulation->getBodyCount()); //We expect a contact per-body, so bump up the initial reservation
|
||||
constraints.mStaticJoints.forceSize_Unsafe(0);
|
||||
|
||||
PxgArticulationSelfConstraints& selfConstraints = mArticulationSelfConstraints[index.remapIndex];
|
||||
selfConstraints.mSelfContacts.forceSize_Unsafe(0);
|
||||
selfConstraints.mSelfJoints.forceSize_Unsafe(0);
|
||||
}
|
||||
|
||||
void PxgBodySimManager::updateArticulation(Dy::FeatherstoneArticulation* articulation, const PxU32 nodeIndex)
|
||||
{
|
||||
if (!(articulation->mGPUDirtyFlags & Dy::ArticulationDirtyFlag::eIN_DIRTY_LIST))
|
||||
{
|
||||
const PxHashMap<PxU32, PxU32>::Entry* entry = mNodeToRemapMap.find(nodeIndex);
|
||||
//If entry is not there, it means that this articulation is pending insertion (added to scene, but not yet simulated). In this
|
||||
//case, it will not need to appear in the update list
|
||||
if (entry)
|
||||
{
|
||||
articulation->mGPUDirtyFlags |= Dy::ArticulationDirtyFlag::eIN_DIRTY_LIST;
|
||||
PxU32 index = entry->second;
|
||||
|
||||
PxgArticulationUpdate update;
|
||||
update.articulationIndex = index;
|
||||
update.articulation = articulation;
|
||||
//KS - we don't write out startIndex - this happens later in the heavy-lifting code that actually writes out data from the articulation.
|
||||
//We defer this until during the frame because it is quite common that the user might update multiple properties and we would prefer to batc
|
||||
|
||||
mUpdatedArticulations.pushBack(update);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool PxgBodySimManager::addStaticArticulationContactManager(PxU32 uniqueIndex, const PxNodeIndex nodeIndex)
|
||||
{
|
||||
PX_ASSERT(nodeIndex.isArticulation());
|
||||
PxgStaticConstraints& staticConstraints = mStaticConstraints[nodeIndex.index()];
|
||||
|
||||
if (staticConstraints.mStaticContacts.size() < PxgStaticConstraints::MaxConstraints)
|
||||
{
|
||||
PxgStaticConstraint con;
|
||||
con.uniqueId = uniqueIndex;
|
||||
con.linkID = nodeIndex.articulationLinkId();
|
||||
PxU32 i = 0;
|
||||
PxU32 count = staticConstraints.mStaticContacts.size();
|
||||
for (; i < count; ++i)
|
||||
{
|
||||
if (con.linkID <= staticConstraints.mStaticContacts[i].linkID)
|
||||
break;
|
||||
}
|
||||
staticConstraints.mStaticContacts.resizeUninitialized(count + 1);
|
||||
|
||||
for (PxU32 j = count; j > i; --j)
|
||||
{
|
||||
staticConstraints.mStaticContacts[j] = staticConstraints.mStaticContacts[j - 1];
|
||||
}
|
||||
|
||||
staticConstraints.mStaticContacts[i] = con;
|
||||
mMaxStaticArticContacts = PxMax(staticConstraints.mStaticContacts.size(), mMaxStaticArticContacts);
|
||||
mTotalStaticArticContacts++;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
static bool remove(PxU32 uniqueIndex, PxArray<T>& container, PxU32& counter)
|
||||
{
|
||||
// PT: one of these loops was checking the indices in decreasing order for no clear reason (introduced in CL 30572881)
|
||||
PxU32 size = container.size();
|
||||
for(PxU32 i=0; i<size; i++)
|
||||
{
|
||||
if(container[i].uniqueId == uniqueIndex)
|
||||
{
|
||||
//Need to maintain order for batching to work!
|
||||
container.remove(i); // PT: beware, this is an actual shift
|
||||
counter--;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PxgBodySimManager::removeStaticArticulationContactManager(PxU32 uniqueIndex, const PxNodeIndex nodeIndex)
|
||||
{
|
||||
PX_ASSERT(nodeIndex.isArticulation());
|
||||
return remove(uniqueIndex, mStaticConstraints[nodeIndex.index()].mStaticContacts, mTotalStaticArticContacts);
|
||||
}
|
||||
|
||||
bool PxgBodySimManager::addStaticArticulationJoint(PxU32 uniqueIndex, const PxNodeIndex nodeIndex)
|
||||
{
|
||||
PX_ASSERT(nodeIndex.isArticulation());
|
||||
PxgStaticConstraints& staticConstraints = mStaticConstraints[nodeIndex.index()];
|
||||
|
||||
if (staticConstraints.mStaticJoints.size() < PxgStaticConstraints::MaxConstraints)
|
||||
{
|
||||
PxgStaticConstraint con;
|
||||
con.uniqueId = uniqueIndex;
|
||||
con.linkID = nodeIndex.articulationLinkId();
|
||||
//We need to find where to insert this contact into the articulation. The assumption is that we will not have a very
|
||||
//large number of static constraints, so we can just do a linear search. If this turns out to be a problem, we can try
|
||||
//something like binary search...
|
||||
PxU32 i = 0;
|
||||
PxU32 count = staticConstraints.mStaticJoints.size();
|
||||
for (; i < count; ++i)
|
||||
{
|
||||
if (con.linkID <= staticConstraints.mStaticJoints[i].linkID)
|
||||
break;
|
||||
}
|
||||
staticConstraints.mStaticJoints.resizeUninitialized(count + 1);
|
||||
|
||||
for (PxU32 j = count; j > i; --j)
|
||||
{
|
||||
staticConstraints.mStaticJoints[j] = staticConstraints.mStaticJoints[j - 1];
|
||||
}
|
||||
|
||||
staticConstraints.mStaticJoints[i] = con;
|
||||
|
||||
mMaxStaticArticJoints = PxMax(mMaxStaticArticJoints, count + 1);
|
||||
mTotalStaticArticJoints++;
|
||||
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PxgBodySimManager::removeStaticArticulationJoint(PxU32 uniqueIndex, const PxNodeIndex nodeIndex)
|
||||
{
|
||||
PX_ASSERT(nodeIndex.isArticulation());
|
||||
return remove(uniqueIndex, mStaticConstraints[nodeIndex.index()].mStaticJoints, mTotalStaticArticJoints);
|
||||
}
|
||||
|
||||
bool PxgBodySimManager::addStaticRBContactManager(PxU32 uniqueIndex, const PxNodeIndex nodeIndex)
|
||||
{
|
||||
PxgStaticConstraints& staticConstraints = mStaticConstraints[nodeIndex.index()];
|
||||
{
|
||||
PX_ASSERT(!nodeIndex.isArticulation());
|
||||
PxgStaticConstraint con;
|
||||
con.uniqueId = uniqueIndex;
|
||||
con.linkID = 0;
|
||||
staticConstraints.mStaticContacts.pushBack(con);
|
||||
mMaxStaticRBContacts = PxMax(staticConstraints.mStaticContacts.size(), mMaxStaticRBContacts);
|
||||
mTotalStaticRBContacts++;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool PxgBodySimManager::removeStaticRBContactManager(PxU32 uniqueIndex, const PxNodeIndex nodeIndex)
|
||||
{
|
||||
PX_ASSERT(!nodeIndex.isArticulation());
|
||||
return remove(uniqueIndex, mStaticConstraints[nodeIndex.index()].mStaticContacts, mTotalStaticRBContacts);
|
||||
}
|
||||
|
||||
bool PxgBodySimManager::addStaticRBJoint(PxU32 uniqueIndex, const PxNodeIndex nodeIndex)
|
||||
{
|
||||
PX_ASSERT(!nodeIndex.isArticulation());
|
||||
PxgStaticConstraints& staticConstraints = mStaticConstraints[nodeIndex.index()];
|
||||
|
||||
{
|
||||
PxgStaticConstraint con;
|
||||
con.uniqueId = uniqueIndex;
|
||||
con.linkID = 0;
|
||||
|
||||
staticConstraints.mStaticJoints.pushBack(con);
|
||||
|
||||
mMaxStaticRBJoints = PxMax(mMaxStaticRBJoints, staticConstraints.mStaticJoints.size());
|
||||
mTotalStaticRBJoints++;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool PxgBodySimManager::removeStaticRBJoint(PxU32 uniqueIndex, const PxNodeIndex nodeIndex)
|
||||
{
|
||||
PX_ASSERT(!nodeIndex.isArticulation());
|
||||
return remove(uniqueIndex, mStaticConstraints[nodeIndex.index()].mStaticJoints, mTotalStaticRBJoints);
|
||||
}
|
||||
|
||||
bool PxgBodySimManager::addSelfArticulationContactManager(PxU32 uniqueIndex, const PxNodeIndex nodeIndex0, const PxNodeIndex nodeIndex1)
|
||||
{
|
||||
PxU32 remapIndex = mNodeToRemapMap[nodeIndex0.index()];
|
||||
PxgArticulationSelfConstraints& selfConstraints = mArticulationSelfConstraints[remapIndex];
|
||||
if (selfConstraints.mSelfContacts.size() < PxgArticulationSelfConstraints::MaxConstraints)
|
||||
{
|
||||
PxgSelfConstraint con;
|
||||
con.uniqueId = uniqueIndex;
|
||||
con.linkID0 = nodeIndex0.articulationLinkId();
|
||||
con.linkID1 = nodeIndex1.articulationLinkId();
|
||||
|
||||
PxU32 count = selfConstraints.mSelfContacts.size();
|
||||
|
||||
selfConstraints.mSelfContacts.resizeUninitialized(count + 1);
|
||||
|
||||
PxU32 i = count;
|
||||
|
||||
selfConstraints.mSelfContacts[i] = con;
|
||||
mMaxSelfArticContacts = PxMax(selfConstraints.mSelfContacts.size(), mMaxSelfArticContacts);
|
||||
mTotalSelfArticContacts++;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PxgBodySimManager::removeSelfArticulationContactManager(PxU32 uniqueIndex, const PxNodeIndex nodeIndex0, const PxNodeIndex nodeIndex1)
|
||||
{
|
||||
PX_UNUSED(nodeIndex1);
|
||||
const PxHashMap<PxU32, PxU32>::Entry* entry = mNodeToRemapMap.find(nodeIndex0.index());
|
||||
|
||||
if(!entry)
|
||||
return false;
|
||||
|
||||
const PxU32 index = entry->second;
|
||||
return remove(uniqueIndex, mArticulationSelfConstraints[index].mSelfContacts, mTotalSelfArticContacts);
|
||||
}
|
||||
|
||||
bool PxgBodySimManager::addSelfArticulationJoint(PxU32 uniqueIndex, const PxNodeIndex nodeIndex0, const PxNodeIndex nodeIndex1)
|
||||
{
|
||||
PxU32 remapIndex = mNodeToRemapMap[nodeIndex0.index()];
|
||||
PxgArticulationSelfConstraints& selfConstraints = mArticulationSelfConstraints[remapIndex];
|
||||
if (selfConstraints.mSelfJoints.size() < PxgArticulationSelfConstraints::MaxConstraints)
|
||||
{
|
||||
PxgSelfConstraint con;
|
||||
con.uniqueId = uniqueIndex;
|
||||
con.linkID0 = nodeIndex0.articulationLinkId();
|
||||
con.linkID1 = nodeIndex1.articulationLinkId();
|
||||
//We need to find where to insert this contact into the articulation. The assumption is that we will not have a very
|
||||
//large number of static constraints, so we can just do a linear search. If this turns out to be a problem, we can try
|
||||
//something like binary search...
|
||||
PxU32 i = 0;
|
||||
PxU32 count = selfConstraints.mSelfJoints.size();
|
||||
for (; i < count; ++i)
|
||||
{
|
||||
if (con.linkID0 <= selfConstraints.mSelfJoints[i].linkID0)
|
||||
break;
|
||||
}
|
||||
selfConstraints.mSelfJoints.resizeUninitialized(count + 1);
|
||||
|
||||
for (PxU32 j = count; j > i; --j)
|
||||
{
|
||||
selfConstraints.mSelfJoints[j] = selfConstraints.mSelfJoints[j - 1];
|
||||
}
|
||||
|
||||
selfConstraints.mSelfJoints[i] = con;
|
||||
|
||||
mMaxSelfArticJoints = PxMax(mMaxSelfArticJoints, count + 1);
|
||||
mTotalSelfArticJoints++;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PxgBodySimManager::removeSelfArticulationJoint(PxU32 uniqueIndex, const PxNodeIndex nodeIndex0, const PxNodeIndex nodeIndex1)
|
||||
{
|
||||
PX_UNUSED(nodeIndex1);
|
||||
const PxHashMap<PxU32, PxU32>::Entry* entry = mNodeToRemapMap.find(nodeIndex0.index());
|
||||
|
||||
if(!entry)
|
||||
return false;
|
||||
|
||||
const PxU32 index = entry->second;
|
||||
return remove(uniqueIndex, mArticulationSelfConstraints[index].mSelfJoints, mTotalSelfArticJoints);
|
||||
}
|
||||
|
||||
void PxgBodySimManager::releaseArticulation(Dy::FeatherstoneArticulation* articulation, const PxU32 nodeIndex)
|
||||
{
|
||||
mDeferredFreeNodeIDs.pushBack(nodeIndex);
|
||||
|
||||
if (articulation->mGPUDirtyFlags & Dy::ArticulationDirtyFlag::eIN_DIRTY_LIST)
|
||||
{
|
||||
for (PxU32 i = 0; i < mUpdatedArticulations.size(); ++i)
|
||||
{
|
||||
if (mUpdatedArticulations[i].articulation == articulation)
|
||||
{
|
||||
mUpdatedArticulations.replaceWithLast(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (PxU32 i = 0; i < mNewArticulationSims.size(); ++i)
|
||||
{
|
||||
if (mNewArticulationSims[i].nodeIndex == nodeIndex)
|
||||
{
|
||||
mNewArticulationSims.replaceWithLast(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PxgBodySimManager::releaseDeferredArticulationIds()
|
||||
{
|
||||
PxHashMap<PxU32, PxU32>::Entry entry;
|
||||
PxU32 size = mDeferredFreeNodeIDs.size();
|
||||
for(PxU32 i=0;i<size;i++)
|
||||
{
|
||||
bool found = mNodeToRemapMap.erase(mDeferredFreeNodeIDs[i], entry);
|
||||
PX_UNUSED(found);
|
||||
PX_ASSERT(found);
|
||||
#if PX_SUPPORT_OMNI_PVD
|
||||
mRemapToNodeMap.erase(entry.second);
|
||||
#endif
|
||||
mArticulationIdPool.deferredFreeID(entry.second);
|
||||
}
|
||||
mDeferredFreeNodeIDs.clear();
|
||||
mArticulationIdPool.processDeferredIds();
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////softbody
|
||||
|
||||
void PxgBodySimManager::addSoftBody(Dy::DeformableVolume* deformableVolume, const PxU32 nodeIndex)
|
||||
{
|
||||
if (mUpdatedMap.boundedTest(nodeIndex))
|
||||
return;
|
||||
|
||||
if (mBodies.capacity() <= nodeIndex)
|
||||
{
|
||||
mBodies.resize(2 * nodeIndex + 1);
|
||||
}
|
||||
|
||||
mBodies[nodeIndex] = reinterpret_cast<void*>(deformableVolume);
|
||||
|
||||
mUpdatedMap.growAndSet(nodeIndex);
|
||||
PxgSoftBodyIndices index;
|
||||
index.nodeIndex = nodeIndex;
|
||||
index.remapIndex = mSoftBodyIdPool.getNewID();
|
||||
mNewSoftBodySims.pushBack(index);
|
||||
|
||||
deformableVolume->setGpuRemapId(index.remapIndex);
|
||||
|
||||
if (mActiveSoftbodyIndex.size() < (index.remapIndex + 1))
|
||||
{
|
||||
mActiveSoftbodyIndex.resize(PxMax(index.remapIndex + 1, mActiveSoftbodyIndex.size() * 2));
|
||||
mActiveSelfCollisionSoftbodyIndex.resize(PxMax(index.remapIndex + 1, mActiveSelfCollisionSoftbodyIndex.size() * 2));
|
||||
}
|
||||
mActiveSoftbodyIndex[index.remapIndex] = mActiveSoftbodiesStaging.size();
|
||||
|
||||
if (deformableVolume->getCore().bodyFlags & PxDeformableBodyFlag::eDISABLE_SELF_COLLISION)
|
||||
mActiveSelfCollisionSoftbodyIndex[index.remapIndex] = 0xFFFFFFFF;
|
||||
else
|
||||
{
|
||||
mActiveSelfCollisionSoftbodyIndex[index.remapIndex] = mActiveSelfCollisionSoftBodiesStaging.size();
|
||||
mActiveSelfCollisionSoftBodiesStaging.pushBack(index.remapIndex);
|
||||
}
|
||||
|
||||
mTotalNumBodies = PxMax(mTotalNumBodies, nodeIndex + 1);
|
||||
mTotalNumSoftBodies = PxMax(mTotalNumSoftBodies, index.remapIndex + 1);
|
||||
|
||||
mActiveSoftbodiesStaging.pushBack(index.remapIndex);
|
||||
mActiveSoftbodiesDirty = true;
|
||||
if (index.remapIndex == mDeformableVolumes.size())
|
||||
mDeformableVolumes.pushBack(deformableVolume);
|
||||
else
|
||||
mDeformableVolumes[index.remapIndex] = deformableVolume;
|
||||
}
|
||||
|
||||
void PxgBodySimManager::releaseSoftBody(Dy::DeformableVolume* deformableVolume)
|
||||
{
|
||||
PxU32 remapIndex = deformableVolume->getGpuRemapId();
|
||||
PxU32 index = mActiveSoftbodyIndex[remapIndex];
|
||||
|
||||
if (index != 0xFFFFFFFF)
|
||||
{
|
||||
PX_ASSERT(mActiveSoftbodiesStaging[index] == remapIndex);
|
||||
mActiveSoftbodiesDirty = true;
|
||||
mActiveSoftbodyIndex[remapIndex] = 0xFFFFFFFF;
|
||||
|
||||
|
||||
mActiveSoftbodiesStaging.replaceWithLast(index);
|
||||
if (index < mActiveSoftbodiesStaging.size())
|
||||
mActiveSoftbodyIndex[mActiveSoftbodiesStaging[index]] = index;
|
||||
|
||||
PxU32 selfCollisionIndex = mActiveSelfCollisionSoftbodyIndex[remapIndex];
|
||||
|
||||
if (selfCollisionIndex != 0xFFFFFFFF)
|
||||
{
|
||||
PX_ASSERT(mActiveSelfCollisionSoftBodiesStaging[selfCollisionIndex] == remapIndex);
|
||||
mActiveSelfCollisionSoftbodyIndex[remapIndex] = 0xFFFFFFFF;
|
||||
|
||||
|
||||
mActiveSelfCollisionSoftBodiesStaging.replaceWithLast(selfCollisionIndex);
|
||||
if (selfCollisionIndex < mActiveSelfCollisionSoftBodiesStaging.size())
|
||||
mActiveSelfCollisionSoftbodyIndex[mActiveSelfCollisionSoftBodiesStaging[selfCollisionIndex]] = selfCollisionIndex;
|
||||
}
|
||||
}
|
||||
|
||||
for (PxU32 i = 0; i < mNewSoftBodySims.size(); ++i)
|
||||
{
|
||||
if (mNewSoftBodySims[i].remapIndex == remapIndex)
|
||||
{
|
||||
mNewSoftBodySims.replaceWithLast(i);
|
||||
}
|
||||
}
|
||||
|
||||
mDeformableVolumes[remapIndex] = NULL;
|
||||
|
||||
mSoftBodyIdPool.deferredFreeID(deformableVolume->getGpuRemapId());
|
||||
}
|
||||
|
||||
void PxgBodySimManager::releaseDeferredSoftBodyIds()
|
||||
{
|
||||
mSoftBodyIdPool.processDeferredIds();
|
||||
}
|
||||
|
||||
bool PxgBodySimManager::activateSoftbody(Dy::DeformableVolume* deformableVolume)
|
||||
{
|
||||
PxU32 remapIndex = deformableVolume->getGpuRemapId();
|
||||
PxU32 index = mActiveSoftbodyIndex[remapIndex];
|
||||
if (0xFFFFFFFF == index)
|
||||
{
|
||||
mActiveSoftbodyIndex[remapIndex] = mActiveSoftbodiesStaging.size();
|
||||
mActiveSoftbodiesStaging.pushBack(remapIndex);
|
||||
mActiveSoftbodiesDirty = true;
|
||||
|
||||
if (deformableVolume->getCore().bodyFlags & PxDeformableBodyFlag::eDISABLE_SELF_COLLISION)
|
||||
mActiveSelfCollisionSoftbodyIndex[remapIndex] = 0xFFFFFFFF;
|
||||
else
|
||||
{
|
||||
mActiveSelfCollisionSoftbodyIndex[remapIndex] = mActiveSelfCollisionSoftBodiesStaging.size();
|
||||
mActiveSelfCollisionSoftBodiesStaging.pushBack(remapIndex);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PxgBodySimManager::deactivateSoftbody(Dy::DeformableVolume* deformableVolume)
|
||||
{
|
||||
PxU32 remapIndex = deformableVolume->getGpuRemapId();
|
||||
PxU32 index = mActiveSoftbodyIndex[remapIndex];
|
||||
if (0xFFFFFFFF != index)
|
||||
{
|
||||
mActiveSoftbodyIndex[remapIndex] = 0xFFFFFFFF;
|
||||
mActiveSoftbodiesStaging.replaceWithLast(index);
|
||||
mActiveSoftbodiesDirty = true;
|
||||
if (index < mActiveSoftbodiesStaging.size())
|
||||
mActiveSoftbodyIndex[mActiveSoftbodiesStaging[index]] = index;
|
||||
|
||||
deactivateSoftbodySelfCollision(deformableVolume);
|
||||
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PxgBodySimManager::activateSoftbodySelfCollision(Dy::DeformableVolume* deformableVolume)
|
||||
{
|
||||
PxU32 remapIndex = deformableVolume->getGpuRemapId();
|
||||
PxU32 index = mActiveSelfCollisionSoftbodyIndex[remapIndex];
|
||||
if (0xFFFFFFFF == index)
|
||||
{
|
||||
mActiveSelfCollisionSoftbodyIndex[remapIndex] = mActiveSelfCollisionSoftBodiesStaging.size();
|
||||
mActiveSelfCollisionSoftBodiesStaging.pushBack(remapIndex);
|
||||
mActiveSoftbodiesDirty = true;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PxgBodySimManager::deactivateSoftbodySelfCollision(Dy::DeformableVolume* deformableVolume)
|
||||
{
|
||||
PxU32 remapIndex = deformableVolume->getGpuRemapId();
|
||||
PxU32 index = mActiveSelfCollisionSoftbodyIndex[remapIndex];
|
||||
if (0xFFFFFFFF != index)
|
||||
{
|
||||
mActiveSoftbodiesDirty = true;
|
||||
PX_ASSERT(mActiveSelfCollisionSoftBodiesStaging[index] == remapIndex);
|
||||
mActiveSelfCollisionSoftbodyIndex[remapIndex] = 0xFFFFFFFF;
|
||||
|
||||
mActiveSelfCollisionSoftBodiesStaging.replaceWithLast(index);
|
||||
if (index < mActiveSelfCollisionSoftBodiesStaging.size())
|
||||
mActiveSelfCollisionSoftbodyIndex[mActiveSelfCollisionSoftBodiesStaging[index]] = index;
|
||||
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////femCloth
|
||||
|
||||
void PxgBodySimManager::addFEMCloth(Dy::DeformableSurface* deformableSurface, const PxU32 nodeIndex)
|
||||
{
|
||||
if (mUpdatedMap.boundedTest(nodeIndex))
|
||||
return;
|
||||
|
||||
if (mBodies.capacity() <= nodeIndex)
|
||||
{
|
||||
mBodies.resize(2 * nodeIndex + 1);
|
||||
}
|
||||
|
||||
mBodies[nodeIndex] = reinterpret_cast<void*>(deformableSurface);
|
||||
|
||||
mUpdatedMap.growAndSet(nodeIndex);
|
||||
|
||||
PxgFEMClothIndices index;
|
||||
index.nodeIndex = nodeIndex;
|
||||
index.remapIndex = mFEMClothIdPool.getNewID();
|
||||
mNewFEMClothSims.pushBack(index);
|
||||
|
||||
deformableSurface->setGpuRemapId(index.remapIndex);
|
||||
|
||||
if (mActiveFEMClothIndex.size() < (index.remapIndex+1))
|
||||
{
|
||||
mActiveFEMClothIndex.resize(PxMax(index.remapIndex + 1, mActiveFEMClothIndex.size() * 2));
|
||||
}
|
||||
mActiveFEMClothIndex[index.remapIndex] = mActiveFEMClothStaging.size();
|
||||
|
||||
mTotalNumBodies = PxMax(mTotalNumBodies, nodeIndex + 1);
|
||||
mTotalNumFEMCloths = PxMax(mTotalNumFEMCloths, index.remapIndex + 1);
|
||||
|
||||
mActiveFEMClothStaging.pushBack(index.remapIndex);
|
||||
mActiveFEMClothsDirty = true;
|
||||
|
||||
if (index.remapIndex == mDeformableSurfaces.size())
|
||||
mDeformableSurfaces.pushBack(deformableSurface);
|
||||
else
|
||||
mDeformableSurfaces[index.remapIndex] = deformableSurface;
|
||||
}
|
||||
|
||||
void PxgBodySimManager::releaseFEMCloth(Dy::DeformableSurface* deformableSurface)
|
||||
{
|
||||
PxU32 remapIndex = deformableSurface->getGpuRemapId();
|
||||
PxU32 index = mActiveFEMClothIndex[remapIndex];
|
||||
|
||||
if (index != 0xFFFFFFFF)
|
||||
{
|
||||
PX_ASSERT(mActiveFEMClothStaging[index] == remapIndex);
|
||||
mActiveFEMClothsDirty = true;
|
||||
mActiveFEMClothIndex[remapIndex] = 0xFFFFFFFF;
|
||||
|
||||
mActiveFEMClothStaging.replaceWithLast(index);
|
||||
if (index < mActiveFEMClothStaging.size())
|
||||
mActiveFEMClothIndex[mActiveFEMClothStaging[index]] = index;
|
||||
}
|
||||
|
||||
for (PxU32 i = 0; i < mNewFEMClothSims.size(); ++i)
|
||||
{
|
||||
if (mNewFEMClothSims[i].remapIndex == remapIndex)
|
||||
{
|
||||
mNewFEMClothSims.replaceWithLast(i);
|
||||
}
|
||||
}
|
||||
|
||||
mDeformableSurfaces[remapIndex] = NULL;
|
||||
|
||||
mFEMClothIdPool.deferredFreeID(remapIndex);
|
||||
}
|
||||
|
||||
void PxgBodySimManager::releaseDeferredFEMClothIds()
|
||||
{
|
||||
mFEMClothIdPool.processDeferredIds();
|
||||
}
|
||||
|
||||
bool PxgBodySimManager::activateCloth(Dy::DeformableSurface* deformableSurface)
|
||||
{
|
||||
PxU32 remapIndex = deformableSurface->getGpuRemapId();
|
||||
PxU32 index = mActiveFEMClothIndex[remapIndex];
|
||||
if (0xFFFFFFFF == index)
|
||||
{
|
||||
mActiveFEMClothIndex[remapIndex] = mActiveFEMClothStaging.size();
|
||||
mActiveFEMClothStaging.pushBack(remapIndex);
|
||||
mActiveFEMClothsDirty = true;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PxgBodySimManager::deactivateCloth(Dy::DeformableSurface* deformableSurface)
|
||||
{
|
||||
PxU32 remapIndex = deformableSurface->getGpuRemapId();
|
||||
PxU32 index = mActiveFEMClothIndex[remapIndex];
|
||||
if (0xFFFFFFFF != index)
|
||||
{
|
||||
mActiveFEMClothIndex[remapIndex] = 0xFFFFFFFF;
|
||||
mActiveFEMClothStaging.replaceWithLast(index);
|
||||
mActiveFEMClothsDirty = true;
|
||||
if (index < mActiveFEMClothStaging.size())
|
||||
mActiveFEMClothIndex[mActiveFEMClothStaging[index]] = index;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////particlesystem
|
||||
|
||||
void PxgBodySimManager::addPBDParticleSystem(Dy::ParticleSystem* particleSystem, const PxU32 nodeIndex)
|
||||
{
|
||||
if (mUpdatedMap.boundedTest(nodeIndex))
|
||||
return;
|
||||
|
||||
if (mBodies.capacity() <= nodeIndex)
|
||||
{
|
||||
mBodies.resize(2 * nodeIndex + 1);
|
||||
}
|
||||
|
||||
mBodies[nodeIndex] = reinterpret_cast<void*>(particleSystem);
|
||||
|
||||
mUpdatedMap.growAndSet(nodeIndex);
|
||||
PxgParticleSystemIndices index;
|
||||
index.nodeIndex = nodeIndex;
|
||||
index.remapIndex = mPBDParticleSystemIdPool.getNewID();
|
||||
mNewPBDParticleSystemSims.pushBack(index);
|
||||
|
||||
particleSystem->setGpuRemapId(index.remapIndex);
|
||||
|
||||
mTotalNumBodies = PxMax(mTotalNumBodies, nodeIndex + 1);
|
||||
mTotalNumPBDParticleSystems = PxMax(mTotalNumPBDParticleSystems, index.remapIndex + 1);
|
||||
|
||||
mActivePBDParticleSystems.pushBack(index.remapIndex);
|
||||
mActivePBDParticleSystemsDirty = true;
|
||||
}
|
||||
|
||||
void PxgBodySimManager::releasePBDParticleSystem(Dy::ParticleSystem* particleSystem)
|
||||
{
|
||||
PxU32 remapIndex = particleSystem->getGpuRemapId();
|
||||
for (PxU32 i = 0; i < mActivePBDParticleSystems.size(); ++i)
|
||||
{
|
||||
if (mActivePBDParticleSystems[i] == remapIndex)
|
||||
{
|
||||
mActivePBDParticleSystems.replaceWithLast(i);
|
||||
mActivePBDParticleSystemsDirty = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (PxU32 i = 0; i < mNewPBDParticleSystemSims.size(); ++i)
|
||||
{
|
||||
if (mNewPBDParticleSystemSims[i].remapIndex == remapIndex)
|
||||
{
|
||||
mNewPBDParticleSystemSims.replaceWithLast(i);
|
||||
}
|
||||
}
|
||||
|
||||
mPBDParticleSystemIdPool.deferredFreeID(remapIndex);
|
||||
}
|
||||
|
||||
void PxgBodySimManager::releaseDeferredPBDParticleSystemIds()
|
||||
{
|
||||
mPBDParticleSystemIdPool.processDeferredIds();
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////softbody
|
||||
|
||||
void PxgBodySimManager::updateBody(const PxNodeIndex& nodeIndex)
|
||||
{
|
||||
const PxU32 index = nodeIndex.index();
|
||||
if (!mUpdatedMap.boundedTest(index))
|
||||
{
|
||||
mUpdatedMap.growAndSet(index);
|
||||
mNewOrUpdatedBodySims.pushBack(index);
|
||||
}
|
||||
}
|
||||
|
||||
void PxgBodySimManager::updateBodies(PxsRigidBody** rigidBodies, PxU32* nodeIndices, PxU32 nbBodies,
|
||||
PxsExternalAccelerationProvider* externalAccelerations)
|
||||
{
|
||||
mExternalAccelerations = externalAccelerations;
|
||||
PxU32 newVal = static_cast<PxU32>(PxAtomicAdd(reinterpret_cast<PxI32*>(&mNbUpdatedBodies), static_cast<PxI32>(nbBodies)));
|
||||
|
||||
PxgBodySimVelocityUpdate* newUpdatedBodies = mNewUpdatedBodies.begin();
|
||||
const PxU32 startIndex = newVal - nbBodies;
|
||||
for(PxU32 i=0; i<nbBodies; ++i)
|
||||
{
|
||||
PxsRigidBody* rigid = rigidBodies[i];
|
||||
PxsBodyCore& bcLL = rigid->getCore();
|
||||
|
||||
newUpdatedBodies[i + startIndex].linearVelocityXYZ_bodySimIndexW = make_float4(bcLL.linearVelocity.x, bcLL.linearVelocity.y, bcLL.linearVelocity.z, PX_FR(nodeIndices[i]));
|
||||
newUpdatedBodies[i + startIndex].angularVelocityXYZ_maxPenBiasW = make_float4(bcLL.angularVelocity.x, bcLL.angularVelocity.y, bcLL.angularVelocity.z, bcLL.maxPenBias);
|
||||
|
||||
if (externalAccelerations)
|
||||
{
|
||||
if (externalAccelerations->hasAccelerations())
|
||||
{
|
||||
const PxsRigidBodyExternalAcceleration& acc = externalAccelerations->get(nodeIndices[i]);
|
||||
newUpdatedBodies[i + startIndex].externalLinearAccelerationXYZ = make_float4(acc.linearAcceleration.x, acc.linearAcceleration.y, acc.linearAcceleration.z, 0.0f);
|
||||
newUpdatedBodies[i + startIndex].externalAngularAccelerationXYZ = make_float4(acc.angularAcceleration.x, acc.angularAcceleration.y, acc.angularAcceleration.z, 0.0f);
|
||||
}
|
||||
else
|
||||
{
|
||||
newUpdatedBodies[i + startIndex].externalLinearAccelerationXYZ = make_float4(0.0f);
|
||||
newUpdatedBodies[i + startIndex].externalAngularAccelerationXYZ = make_float4(0.0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PxgBodySimManager::reserve(const PxU32 nbBodies)
|
||||
{
|
||||
const PxU32 requiredSize = mNewUpdatedBodies.size() + nbBodies;
|
||||
mNbUpdatedBodies = mNewUpdatedBodies.size();
|
||||
if (mNewUpdatedBodies.capacity() < requiredSize)
|
||||
{
|
||||
mNewUpdatedBodies.reserve(requiredSize * 2);
|
||||
}
|
||||
}
|
||||
|
||||
void PxgBodySimManager::destroy()
|
||||
{
|
||||
this->~PxgBodySimManager();
|
||||
PX_FREE_THIS;
|
||||
}
|
||||
|
||||
void PxgBodySimManager::reset()
|
||||
{
|
||||
mNewOrUpdatedBodySims.forceSize_Unsafe(0);
|
||||
mNewArticulationSims.forceSize_Unsafe(0);
|
||||
|
||||
mNewSoftBodySims.forceSize_Unsafe(0);
|
||||
mNewFEMClothSims.forceSize_Unsafe(0);
|
||||
mNewPBDParticleSystemSims.forceSize_Unsafe(0);
|
||||
|
||||
mNewUpdatedBodies.forceSize_Unsafe(0);
|
||||
mUpdatedArticulations.forceSize_Unsafe(0);
|
||||
mNbUpdatedBodies = 0;
|
||||
}
|
||||
90
engine/third_party/physx/source/gpusimulationcontroller/src/PxgDeformableSkinning.cpp
vendored
Normal file
90
engine/third_party/physx/source/gpusimulationcontroller/src/PxgDeformableSkinning.cpp
vendored
Normal file
@@ -0,0 +1,90 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "PxgDeformableSkinning.h"
|
||||
|
||||
#include "foundation/PxUserAllocated.h"
|
||||
|
||||
#include "PxPhysXGpu.h"
|
||||
#include "PxgKernelWrangler.h"
|
||||
#include "PxgKernelIndices.h"
|
||||
#include "GuAABBTree.h"
|
||||
#include "foundation/PxMathUtils.h"
|
||||
|
||||
|
||||
namespace physx
|
||||
{
|
||||
PxgDeformableSkinning::PxgDeformableSkinning(PxgKernelLauncher& kernelLauncher)
|
||||
{
|
||||
mKernelLauncher = kernelLauncher;
|
||||
}
|
||||
|
||||
void PxgDeformableSkinning::computeNormalVectors(
|
||||
PxTrimeshSkinningGpuData* skinningDataArrayD, PxU32 arrayLength,
|
||||
CUstream stream, PxU32 numGpuThreads)
|
||||
{
|
||||
physx::PxScopedCudaLock _lock(*mKernelLauncher.getCudaContextManager());
|
||||
|
||||
const PxU32 numThreadsPerBlock = 256;
|
||||
const PxU32 numBlocks = (numGpuThreads + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
|
||||
const PxU32 numThreadsPerBlockSmallKernels = 1024;
|
||||
const PxU32 numBlocksSmallKernels = (numGpuThreads + numThreadsPerBlockSmallKernels - 1) / numThreadsPerBlockSmallKernels;
|
||||
|
||||
mKernelLauncher.launchKernelXYZ(PxgKernelIds::util_ZeroNormals, numBlocksSmallKernels, arrayLength, 1, numThreadsPerBlockSmallKernels, 1, 1, 0, stream,
|
||||
skinningDataArrayD);
|
||||
|
||||
mKernelLauncher.launchKernelXYZ(PxgKernelIds::util_ComputeNormals, numBlocks, arrayLength, 1, numThreadsPerBlock, 1, 1, 0, stream,
|
||||
skinningDataArrayD);
|
||||
|
||||
mKernelLauncher.launchKernelXYZ(PxgKernelIds::util_NormalizeNormals, numBlocksSmallKernels, arrayLength, 1, numThreadsPerBlockSmallKernels, 1, 1, 0, stream,
|
||||
skinningDataArrayD);
|
||||
}
|
||||
|
||||
void PxgDeformableSkinning::evaluateVerticesEmbeddedIntoSurface(
|
||||
PxTrimeshSkinningGpuData* skinningDataArrayD, PxU32 arrayLength,
|
||||
CUstream stream, PxU32 numGpuThreads)
|
||||
{
|
||||
physx::PxScopedCudaLock _lock(*mKernelLauncher.getCudaContextManager());
|
||||
const PxU32 numThreadsPerBlock = 256;
|
||||
const PxU32 numBlocks = (numGpuThreads + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
mKernelLauncher.launchKernelXYZ(PxgKernelIds::util_InterpolateSkinnedClothVertices, numBlocks, arrayLength, 1, numThreadsPerBlock, 1, 1, 0, stream,
|
||||
skinningDataArrayD);
|
||||
}
|
||||
|
||||
void PxgDeformableSkinning::evaluateVerticesEmbeddedIntoVolume(
|
||||
PxTetmeshSkinningGpuData* skinningDataArrayD, PxU32 arrayLength,
|
||||
CUstream stream, PxU32 numGpuThreads)
|
||||
{
|
||||
physx::PxScopedCudaLock _lock(*mKernelLauncher.getCudaContextManager());
|
||||
const PxU32 numThreadsPerBlock = 256;
|
||||
const PxU32 numBlocks = (numGpuThreads + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
mKernelLauncher.launchKernelXYZ(PxgKernelIds::util_InterpolateSkinnedSoftBodyVertices, numBlocks, arrayLength, 1, numThreadsPerBlock, 1, 1, 0, stream,
|
||||
skinningDataArrayD);
|
||||
}
|
||||
}
|
||||
1043
engine/third_party/physx/source/gpusimulationcontroller/src/PxgFEMCloth.cpp
vendored
Normal file
1043
engine/third_party/physx/source/gpusimulationcontroller/src/PxgFEMCloth.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3048
engine/third_party/physx/source/gpusimulationcontroller/src/PxgFEMClothCore.cpp
vendored
Normal file
3048
engine/third_party/physx/source/gpusimulationcontroller/src/PxgFEMClothCore.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
335
engine/third_party/physx/source/gpusimulationcontroller/src/PxgFEMCore.cpp
vendored
Normal file
335
engine/third_party/physx/source/gpusimulationcontroller/src/PxgFEMCore.cpp
vendored
Normal file
@@ -0,0 +1,335 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "PxgFEMCore.h"
|
||||
#include "cudamanager/PxCudaContext.h"
|
||||
#include "cudamanager/PxCudaContextManager.h"
|
||||
#include "PxgKernelWrangler.h"
|
||||
#include "CudaKernelWrangler.h"
|
||||
#include "PxgKernelIndices.h"
|
||||
#include "PxgRadixSortKernelIndices.h"
|
||||
#include "PxgSoftBodyCoreKernelIndices.h"
|
||||
#include "PxgCudaMemoryAllocator.h"
|
||||
#include "PxgCudaHelpers.h"
|
||||
|
||||
#define FEM_GPU_DEBUG 0
|
||||
|
||||
using namespace physx;
|
||||
|
||||
PxgFEMCore::PxgFEMCore(PxgCudaKernelWranglerManager* gpuKernelWrangler, PxCudaContextManager* cudaContextManager,
|
||||
PxgHeapMemoryAllocatorManager* heapMemoryManager, PxgSimulationController* simController, PxgGpuContext* gpuContext,
|
||||
const PxU32 maxContacts, const PxU32 collisionStackSize, bool isTGS, PxsHeapStats::Enum statType) :
|
||||
PxgNonRigidCore(gpuKernelWrangler, cudaContextManager, heapMemoryManager, simController, gpuContext, maxContacts, collisionStackSize, statType),
|
||||
mRigidContactPointBuf(heapMemoryManager, statType),
|
||||
mRigidContactNormalPenBuf(heapMemoryManager, statType),
|
||||
mRigidContactBarycentricBuf(heapMemoryManager, statType),
|
||||
mRigidContactInfoBuf(heapMemoryManager, statType),
|
||||
mRigidTotalContactCountBuf(heapMemoryManager, statType),
|
||||
mRigidPrevContactCountBuf(heapMemoryManager, statType),
|
||||
mRigidSortedContactPointBuf(heapMemoryManager, statType),
|
||||
mRigidSortedContactNormalPenBuf(heapMemoryManager, statType),
|
||||
mRigidSortedContactBarycentricBuf(heapMemoryManager, statType),
|
||||
mRigidSortedRigidIdBuf(heapMemoryManager, statType),
|
||||
mRigidSortedContactInfoBuf(heapMemoryManager, statType),
|
||||
mFemRigidReferenceCount(heapMemoryManager, statType),
|
||||
mFemContactPointBuffer(heapMemoryManager, statType),
|
||||
mFemContactNormalPenBuffer(heapMemoryManager, statType),
|
||||
mFemContactBarycentric0Buffer(heapMemoryManager, statType),
|
||||
mFemContactBarycentric1Buffer(heapMemoryManager, statType),
|
||||
mVolumeContactOrVTContactInfoBuffer(heapMemoryManager, statType),
|
||||
mEEContactInfoBuffer(heapMemoryManager, statType),
|
||||
mVolumeContactOrVTContactCountBuffer(heapMemoryManager, statType),
|
||||
mEEContactCountBuffer(heapMemoryManager, statType),
|
||||
mPrevFemContactCountBuffer(heapMemoryManager, statType),
|
||||
mSpeculativeCCDContactOffset(heapMemoryManager, statType),
|
||||
mParticleContactPointBuffer(heapMemoryManager, statType),
|
||||
mParticleContactNormalPenBuffer(heapMemoryManager, statType),
|
||||
mParticleContactBarycentricBuffer(heapMemoryManager, statType),
|
||||
mParticleContactInfoBuffer(heapMemoryManager, statType),
|
||||
mParticleTotalContactCountBuffer(heapMemoryManager, statType),
|
||||
mPrevParticleContactCountBuffer(heapMemoryManager, statType),
|
||||
mParticleSortedContactPointBuffer(heapMemoryManager, statType),
|
||||
mParticleSortedContactBarycentricBuffer(heapMemoryManager, statType),
|
||||
mParticleSortedContactNormalPenBuffer(heapMemoryManager, statType),
|
||||
mParticleSortedContactInfoBuffer(heapMemoryManager, statType),
|
||||
mRigidConstraintBuf(heapMemoryManager, statType),
|
||||
mFemConstraintBuf(heapMemoryManager, statType),
|
||||
mParticleConstraintBuf(heapMemoryManager, statType),
|
||||
mRigidFEMAppliedForcesBuf(heapMemoryManager, statType),
|
||||
mFemAppliedForcesBuf(heapMemoryManager, statType),
|
||||
mParticleAppliedFEMForcesBuf(heapMemoryManager, statType),
|
||||
mParticleAppliedParticleForcesBuf(heapMemoryManager, statType),
|
||||
mRigidDeltaVelBuf(heapMemoryManager, statType),
|
||||
mTempBlockDeltaVelBuf(heapMemoryManager, statType),
|
||||
mTempBlockRigidIdBuf(heapMemoryManager, statType),
|
||||
mTempCellsHistogramBuf(heapMemoryManager, statType),
|
||||
mTempBlockCellsHistogramBuf(heapMemoryManager, statType),
|
||||
mTempHistogramCountBuf(heapMemoryManager, statType),
|
||||
mIsTGS(isTGS),
|
||||
mRigidContactCountPrevTimestep(NULL),
|
||||
mVolumeContactorVTContactCountPrevTimestep(NULL),
|
||||
mEEContactCountPrevTimestep(NULL),
|
||||
mParticleContactCountPrevTimestep(NULL)
|
||||
#if PX_ENABLE_SIM_STATS
|
||||
, mContactCountStats(0)
|
||||
#else
|
||||
PX_CATCH_UNDEFINED_ENABLE_SIM_STATS
|
||||
#endif
|
||||
{
|
||||
mCudaContextManager->acquireContext();
|
||||
|
||||
mRigidContactCountPrevTimestep = PX_PINNED_MEMORY_ALLOC(PxU32, *mCudaContextManager, 1);
|
||||
mVolumeContactorVTContactCountPrevTimestep = PX_PINNED_MEMORY_ALLOC(PxU32, *mCudaContextManager, 1);
|
||||
mEEContactCountPrevTimestep = PX_PINNED_MEMORY_ALLOC(PxU32, *mCudaContextManager, 1);
|
||||
mParticleContactCountPrevTimestep = PX_PINNED_MEMORY_ALLOC(PxU32, *mCudaContextManager, 1);
|
||||
*mRigidContactCountPrevTimestep = 0;
|
||||
*mVolumeContactorVTContactCountPrevTimestep = 0;
|
||||
*mEEContactCountPrevTimestep = 0;
|
||||
*mParticleContactCountPrevTimestep = 0;
|
||||
|
||||
//fem vs rigid contact buffer
|
||||
mRigidContactPointBuf.allocateElements(maxContacts, PX_FL);
|
||||
mRigidContactNormalPenBuf.allocateElements(maxContacts, PX_FL);
|
||||
mRigidContactBarycentricBuf.allocateElements(maxContacts, PX_FL);
|
||||
mRigidContactInfoBuf.allocateElements(maxContacts, PX_FL);
|
||||
|
||||
mRigidSortedContactPointBuf.allocateElements(maxContacts, PX_FL);
|
||||
mRigidSortedContactNormalPenBuf.allocateElements(maxContacts, PX_FL);
|
||||
mRigidSortedContactBarycentricBuf.allocateElements(maxContacts, PX_FL);
|
||||
mRigidSortedContactInfoBuf.allocateElements(maxContacts, PX_FL);
|
||||
|
||||
//PxNodeIndex is sizeof(PxU64)
|
||||
mRigidSortedRigidIdBuf.allocateElements(maxContacts, PX_FL);
|
||||
|
||||
mRigidTotalContactCountBuf.allocateElements(1, PX_FL);
|
||||
mRigidPrevContactCountBuf.allocateElements(1, PX_FL);
|
||||
|
||||
//fem vs fem contact buffer
|
||||
mFemContactPointBuffer.allocateElements(maxContacts, PX_FL);
|
||||
mFemContactNormalPenBuffer.allocateElements(maxContacts, PX_FL);
|
||||
mFemContactBarycentric0Buffer.allocateElements(maxContacts, PX_FL);
|
||||
mFemContactBarycentric1Buffer.allocateElements(maxContacts, PX_FL);
|
||||
|
||||
mVolumeContactOrVTContactInfoBuffer.allocateElements(maxContacts, PX_FL);
|
||||
mEEContactInfoBuffer.allocateElements(maxContacts, PX_FL);
|
||||
mVolumeContactOrVTContactCountBuffer.allocateElements(1, PX_FL);
|
||||
mEEContactCountBuffer.allocateElements(1, PX_FL);
|
||||
mPrevFemContactCountBuffer.allocateElements(1, PX_FL);
|
||||
|
||||
//fem vs particle contact buffer
|
||||
mParticleContactPointBuffer.allocateElements(maxContacts, PX_FL);
|
||||
mParticleContactNormalPenBuffer.allocateElements(maxContacts, PX_FL);
|
||||
mParticleContactBarycentricBuffer.allocateElements(maxContacts, PX_FL);
|
||||
mParticleContactInfoBuffer.allocateElements(maxContacts, PX_FL);
|
||||
mParticleTotalContactCountBuffer.allocateElements(1, PX_FL);
|
||||
mPrevParticleContactCountBuffer.allocateElements(1, PX_FL);
|
||||
|
||||
mParticleSortedContactPointBuffer.allocateElements(maxContacts, PX_FL);
|
||||
mParticleSortedContactNormalPenBuffer.allocateElements(maxContacts, PX_FL);
|
||||
mParticleSortedContactBarycentricBuffer.allocateElements(maxContacts, PX_FL);
|
||||
mParticleSortedContactInfoBuffer.allocateElements(maxContacts, PX_FL);
|
||||
|
||||
////KS - this will now store an array of 3 floats - normal force + 2* friction force
|
||||
mRigidFEMAppliedForcesBuf.allocateElements(maxContacts, PX_FL);
|
||||
mFemAppliedForcesBuf.allocateElements(maxContacts, PX_FL);
|
||||
mParticleAppliedFEMForcesBuf.allocateElements(maxContacts, PX_FL);
|
||||
mParticleAppliedParticleForcesBuf.allocateElements(maxContacts, PX_FL);
|
||||
|
||||
//linear and angular delta change for rigid body
|
||||
mRigidDeltaVelBuf.allocateElements(maxContacts * 2, PX_FL);
|
||||
|
||||
mTempBlockDeltaVelBuf.allocateElements(2 * PxgSoftBodyKernelGridDim::SB_ACCUMULATE_DELTA, PX_FL);
|
||||
mTempBlockRigidIdBuf.allocateElements(PxgSoftBodyKernelGridDim::SB_ACCUMULATE_DELTA, PX_FL);
|
||||
|
||||
mTempCellsHistogramBuf.allocate(1024 * 1024 * 64, PX_FL);
|
||||
mTempBlockCellsHistogramBuf.allocateElements(32, PX_FL);
|
||||
mTempHistogramCountBuf.allocateElements(1, PX_FL);
|
||||
|
||||
//KS - we divide by 32 because this is a block data format
|
||||
mRigidConstraintBuf.allocateElements((maxContacts + 31) / 32, PX_FL);
|
||||
mParticleConstraintBuf.allocateElements((maxContacts + 31) / 32, PX_FL);
|
||||
|
||||
mCudaContext->eventCreate(&mFinalizeEvent, CU_EVENT_DISABLE_TIMING);
|
||||
|
||||
mCudaContextManager->releaseContext();
|
||||
}
|
||||
|
||||
PxgFEMCore::~PxgFEMCore()
|
||||
{
|
||||
mCudaContextManager->acquireContext();
|
||||
|
||||
PX_PINNED_MEMORY_FREE(*mCudaContextManager, mRigidContactCountPrevTimestep);
|
||||
PX_PINNED_MEMORY_FREE(*mCudaContextManager, mVolumeContactorVTContactCountPrevTimestep);
|
||||
PX_PINNED_MEMORY_FREE(*mCudaContextManager, mEEContactCountPrevTimestep);
|
||||
PX_PINNED_MEMORY_FREE(*mCudaContextManager, mParticleContactCountPrevTimestep);
|
||||
|
||||
mCudaContext->eventDestroy(mFinalizeEvent);
|
||||
mFinalizeEvent = NULL;
|
||||
mCudaContextManager->releaseContext();
|
||||
}
|
||||
|
||||
void PxgFEMCore::copyContactCountsToHost()
|
||||
{
|
||||
PxgCudaHelpers::copyDToHAsync(*mCudaContextManager->getCudaContext(), mRigidContactCountPrevTimestep, reinterpret_cast<PxU32*>(getRigidContactCount().getDevicePtr()), 1, mStream);
|
||||
PxgCudaHelpers::copyDToHAsync(*mCudaContextManager->getCudaContext(), mVolumeContactorVTContactCountPrevTimestep, reinterpret_cast<PxU32*>(getVolumeContactOrVTContactCount().getDevicePtr()), 1, mStream);
|
||||
PxgCudaHelpers::copyDToHAsync(*mCudaContextManager->getCudaContext(), mEEContactCountPrevTimestep, reinterpret_cast<PxU32*>(getEEContactCount().getDevicePtr()), 1, mStream);
|
||||
PxgCudaHelpers::copyDToHAsync(*mCudaContextManager->getCudaContext(), mParticleContactCountPrevTimestep, reinterpret_cast<PxU32*>(getParticleContactCount().getDevicePtr()), 1, mStream);
|
||||
|
||||
PxgCudaHelpers::copyDToHAsync(*mCudaContextManager->getCudaContext(), mStackSizeNeededPinned, mStackSizeNeededOnDevice.getTypedPtr(), 1, mStream);
|
||||
}
|
||||
|
||||
void PxgFEMCore::reserveRigidDeltaVelBuf(PxU32 newCapacity)
|
||||
{
|
||||
PxU32 newCapacityElts = PxMax(mMaxContacts, newCapacity) * 2;
|
||||
mRigidDeltaVelBuf.allocateElements(newCapacityElts, PX_FL);
|
||||
}
|
||||
|
||||
void PxgFEMCore::reorderRigidContacts()
|
||||
{
|
||||
{
|
||||
PxgDevicePointer<PxU32> totalContactCountsd = mRigidTotalContactCountBuf.getTypedDevicePtr();
|
||||
|
||||
//rigid body and fem contacts
|
||||
PxgDevicePointer<float4> contactsd = mRigidContactPointBuf.getTypedDevicePtr();
|
||||
PxgDevicePointer<float4> normPensd = mRigidContactNormalPenBuf.getTypedDevicePtr();
|
||||
PxgDevicePointer<float4> barycentricsd = mRigidContactBarycentricBuf.getTypedDevicePtr();
|
||||
PxgDevicePointer<PxgFemOtherContactInfo> infosd = mRigidContactInfoBuf.getTypedDevicePtr();
|
||||
PxgDevicePointer<float4> sortedContactsd = mRigidSortedContactPointBuf.getTypedDevicePtr();
|
||||
PxgDevicePointer<float4> sortedNormPensd = mRigidSortedContactNormalPenBuf.getTypedDevicePtr();
|
||||
PxgDevicePointer<float4> sortedBarycentricsd = mRigidSortedContactBarycentricBuf.getTypedDevicePtr();
|
||||
PxgDevicePointer<PxgFemOtherContactInfo> sortedInfosd = mRigidSortedContactInfoBuf.getTypedDevicePtr();
|
||||
|
||||
//sortedInfosd already store the rigid id. However, we are sharing the code with the particle system
|
||||
//for rigid delta accumulation so we need to store the sorted rigid id as a PxU32 array.
|
||||
PxgDevicePointer<PxU64> sortedRigidsIdd = mRigidSortedRigidIdBuf.getTypedDevicePtr();
|
||||
PxgDevicePointer<PxU32> remapByRigidd = mContactRemapSortedByRigidBuf.getTypedDevicePtr();
|
||||
|
||||
CUfunction reorderFunction = mGpuKernelWranglerManager->getKernelWrangler()->getCuFunction(PxgKernelIds::FEM_REORDER_RS_CONTACTS);
|
||||
|
||||
PxCudaKernelParam kernelParams[] =
|
||||
{
|
||||
PX_CUDA_KERNEL_PARAM(contactsd),
|
||||
PX_CUDA_KERNEL_PARAM(normPensd),
|
||||
PX_CUDA_KERNEL_PARAM(barycentricsd),
|
||||
PX_CUDA_KERNEL_PARAM(infosd),
|
||||
PX_CUDA_KERNEL_PARAM(totalContactCountsd),
|
||||
PX_CUDA_KERNEL_PARAM(remapByRigidd),
|
||||
PX_CUDA_KERNEL_PARAM(sortedContactsd),
|
||||
PX_CUDA_KERNEL_PARAM(sortedNormPensd),
|
||||
PX_CUDA_KERNEL_PARAM(sortedBarycentricsd),
|
||||
PX_CUDA_KERNEL_PARAM(sortedInfosd),
|
||||
PX_CUDA_KERNEL_PARAM(sortedRigidsIdd)
|
||||
};
|
||||
|
||||
CUresult result = mCudaContext->launchKernel(reorderFunction, PxgSoftBodyKernelGridDim::SB_REORDERCONTACTS, 1, 1, PxgSoftBodyKernelBlockDim::SB_REORDERCONTACTS, 1, 1, 0, mStream, kernelParams, sizeof(kernelParams), 0, PX_FL);
|
||||
if (result != CUDA_SUCCESS)
|
||||
PxGetFoundation().error(PxErrorCode::eINTERNAL_ERROR, PX_FL, "GPU sb_reorderRSContactsLaunch fail to launch kernel!!\n");
|
||||
#if FEM_GPU_DEBUG
|
||||
result = mCudaContext->streamSynchronize(mStream);
|
||||
if (result != CUDA_SUCCESS)
|
||||
PxGetFoundation().error(PxErrorCode::eINTERNAL_ERROR, PX_FL, "GPU sb_reorderRSContactsLaunch fail!!\n");
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void PxgFEMCore::accumulateRigidDeltas(PxgDevicePointer<PxgPrePrepDesc> prePrepDescd, PxgDevicePointer<PxgSolverCoreDesc> solverCoreDescd, PxgDevicePointer<PxgSolverSharedDescBase> sharedDescd,
|
||||
PxgDevicePointer<PxgArticulationCoreDesc> artiCoreDescd, PxgDevicePointer<PxNodeIndex> rigidIdsd, PxgDevicePointer<PxU32> numIdsd, CUstream stream, bool isTGS)
|
||||
{
|
||||
PX_UNUSED(rigidIdsd);
|
||||
|
||||
{
|
||||
//CUdeviceptr contactInfosd = mRSSortedContactInfoBuffer.getDevicePtr();
|
||||
PxgDevicePointer<float4> deltaVd = mRigidDeltaVelBuf.getTypedDevicePtr();
|
||||
PxgDevicePointer<PxVec4> blockDeltaVd = mTempBlockDeltaVelBuf.getTypedDevicePtr();
|
||||
PxgDevicePointer<PxU64> blockRigidIdd = mTempBlockRigidIdBuf.getTypedDevicePtr();
|
||||
|
||||
const CUfunction rigidFirstKernelFunction = mGpuKernelWranglerManager->getKernelWrangler()->getCuFunction(PxgKernelIds::ACCUMULATE_DELTAVEL_RIGIDBODY_FIRST);
|
||||
|
||||
PxCudaKernelParam kernelParams[] =
|
||||
{
|
||||
//PX_CUDA_KERNEL_PARAM(contactInfosd),
|
||||
PX_CUDA_KERNEL_PARAM(rigidIdsd),
|
||||
PX_CUDA_KERNEL_PARAM(numIdsd),
|
||||
PX_CUDA_KERNEL_PARAM(deltaVd),
|
||||
PX_CUDA_KERNEL_PARAM(blockDeltaVd),
|
||||
PX_CUDA_KERNEL_PARAM(blockRigidIdd)
|
||||
};
|
||||
|
||||
CUresult result = mCudaContext->launchKernel(rigidFirstKernelFunction, PxgSoftBodyKernelGridDim::SB_ACCUMULATE_DELTA, 1, 1, PxgSoftBodyKernelBlockDim::SB_ACCUMULATE_DELTA, 1, 1, 0, stream, kernelParams, sizeof(kernelParams), 0, PX_FL);
|
||||
PX_ASSERT(result == CUDA_SUCCESS);
|
||||
PX_UNUSED(result);
|
||||
|
||||
#if FEM_GPU_DEBUG
|
||||
result = mCudaContext->streamSynchronize(stream);
|
||||
PX_ASSERT(result == CUDA_SUCCESS);
|
||||
if (result != CUDA_SUCCESS)
|
||||
PxGetFoundation().error(PxErrorCode::eINTERNAL_ERROR, PX_FL, "GPU fem accumulateDeltaVRigidFirstLaunch kernel fail!\n");
|
||||
|
||||
int bob = 0;
|
||||
PX_UNUSED(bob);
|
||||
#endif
|
||||
}
|
||||
|
||||
{
|
||||
//CUdeviceptr contactInfosd = mRSSortedContactInfoBuffer.getDevicePtr();
|
||||
PxgDevicePointer<float4> deltaVd = mRigidDeltaVelBuf.getTypedDevicePtr();
|
||||
PxgDevicePointer<PxVec4> blockDeltaVd = mTempBlockDeltaVelBuf.getTypedDevicePtr();
|
||||
PxgDevicePointer<PxU64> blockRigidIdd = mTempBlockRigidIdBuf.getTypedDevicePtr();
|
||||
|
||||
const CUfunction rigidSecondKernelFunction = mGpuKernelWranglerManager->getKernelWrangler()->getCuFunction(PxgKernelIds::ACCUMULATE_DELTAVEL_RIGIDBODY_SECOND);
|
||||
|
||||
PxCudaKernelParam kernelParams[] =
|
||||
{
|
||||
//PX_CUDA_KERNEL_PARAM(contactInfosd),
|
||||
PX_CUDA_KERNEL_PARAM(rigidIdsd),
|
||||
PX_CUDA_KERNEL_PARAM(numIdsd),
|
||||
PX_CUDA_KERNEL_PARAM(deltaVd),
|
||||
PX_CUDA_KERNEL_PARAM(blockDeltaVd),
|
||||
PX_CUDA_KERNEL_PARAM(blockRigidIdd),
|
||||
PX_CUDA_KERNEL_PARAM(prePrepDescd),
|
||||
PX_CUDA_KERNEL_PARAM(solverCoreDescd),
|
||||
PX_CUDA_KERNEL_PARAM(artiCoreDescd),
|
||||
PX_CUDA_KERNEL_PARAM(sharedDescd),
|
||||
PX_CUDA_KERNEL_PARAM(isTGS)
|
||||
};
|
||||
|
||||
CUresult result = mCudaContext->launchKernel(rigidSecondKernelFunction, PxgSoftBodyKernelGridDim::SB_ACCUMULATE_DELTA, 1, 1, PxgSoftBodyKernelBlockDim::SB_ACCUMULATE_DELTA, 1, 1, 0, stream, kernelParams, sizeof(kernelParams), 0, PX_FL);
|
||||
PX_ASSERT(result == CUDA_SUCCESS);
|
||||
PX_UNUSED(result);
|
||||
|
||||
#if FEM_GPU_DEBUG
|
||||
result = mCudaContext->streamSynchronize(stream);
|
||||
PX_ASSERT(result == CUDA_SUCCESS);
|
||||
if (result != CUDA_SUCCESS)
|
||||
PxGetFoundation().error(PxErrorCode::eINTERNAL_ERROR, PX_FL, "GPU fem accumulateDeltaVRigidSecondLaunch kernel fail!\n");
|
||||
|
||||
int bob = 0;
|
||||
PX_UNUSED(bob);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
757
engine/third_party/physx/source/gpusimulationcontroller/src/PxgIsosurfaceExtraction.cpp
vendored
Normal file
757
engine/third_party/physx/source/gpusimulationcontroller/src/PxgIsosurfaceExtraction.cpp
vendored
Normal file
@@ -0,0 +1,757 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "PxgIsosurfaceExtraction.h"
|
||||
|
||||
#include "foundation/PxUserAllocated.h"
|
||||
#include "PxgCudaMemoryAllocator.h"
|
||||
|
||||
#include "PxPhysXGpu.h"
|
||||
#include "PxgKernelWrangler.h"
|
||||
#include "PxgKernelIndices.h"
|
||||
#include "PxgCudaMemoryAllocator.h"
|
||||
#include "PxgCudaHelpers.h"
|
||||
|
||||
|
||||
namespace physx
|
||||
{
|
||||
#if ENABLE_KERNEL_LAUNCH_ERROR_CHECK
|
||||
#define checkCudaError() { cudaError_t err = cudaDeviceSynchronize(); if (err != 0) printf("Cuda error file: %s, line: %i, error: %i\n", PX_FL, err); }
|
||||
#else
|
||||
#define checkCudaError() { }
|
||||
#endif
|
||||
|
||||
#define THREADS_PER_BLOCK 256
|
||||
|
||||
|
||||
void sparseGridClearDensity(PxgKernelLauncher& launcher, const PxSparseGridParams& sparseGridParams, PxReal* density, PxReal clearValue, PxU32* numActiveSubgrids, CUstream stream)
|
||||
{
|
||||
PxU32 subgridSize = sparseGridParams.subgridSizeX * sparseGridParams.subgridSizeY * sparseGridParams.subgridSizeZ;
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (sparseGridParams.maxNumSubgrids * subgridSize + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::sg_SparseGridClearDensity, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
density, clearValue, numActiveSubgrids, subgridSize);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void computeParticleDensityLaunchUsingSDF(PxgKernelLauncher& launcher, PxVec4* deviceParticlePos, int numParticles, PxU32* phases, PxU32 validPhaseMask, PxIsosurfaceExtractionData& data, CUstream stream, PxU32* activeIndices = NULL,
|
||||
PxVec4* anisotropy1 = NULL, PxVec4* anisotropy2 = NULL, PxVec4* anisotropy3 = NULL, PxReal anisotropyFactor = 1.0f)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (numParticles + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::iso_ComputeParticleDensityUsingSDF, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
deviceParticlePos, numParticles, phases, validPhaseMask, data,
|
||||
activeIndices, reinterpret_cast<float4*>(anisotropy1), reinterpret_cast<float4*>(anisotropy2), reinterpret_cast<float4*>(anisotropy3), anisotropyFactor);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void computeParticleDensityLaunch(PxgKernelLauncher& launcher, PxVec4* deviceParticlePos, int numParticles, PxU32* phases, PxU32 validPhaseMask, PxIsosurfaceExtractionData& data, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (numParticles + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::iso_ComputeParticleDensity, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
deviceParticlePos, numParticles, phases, validPhaseMask, data);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void countCellVertsLaunch(PxgKernelLauncher& launcher, PxIsosurfaceExtractionData& data, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (data.maxNumCells() + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::iso_CountCellVerts, numBlocks, numThreadsPerBlock, 0, stream, data);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void createVertsLaunch(PxgKernelLauncher& launcher, PxIsosurfaceExtractionData& data, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (data.maxNumCells() + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::iso_CreateVerts, numBlocks, numThreadsPerBlock, 0, stream, data);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void countTriIdsLaunch(PxgKernelLauncher& launcher, PxIsosurfaceExtractionData& data, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (data.maxNumCells() + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::iso_CountTriIds, numBlocks, numThreadsPerBlock, 0, stream, data);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void createTriIdsLaunch(PxgKernelLauncher& launcher, PxIsosurfaceExtractionData& data, bool flipTriangleOrientation, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (data.maxNumCells() + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::iso_CreateTriIds, numBlocks, numThreadsPerBlock, 0, stream, data, flipTriangleOrientation ? 1 : 0);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void smoothVertsLaunch(PxgKernelLauncher& launcher, const PxVec4* vertices, PxVec4* output, const PxU32* triIds, const PxU32* numTriIds, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = 64;
|
||||
launcher.launchKernel(PxgKernelIds::iso_SmoothVerts, numBlocks, numThreadsPerBlock, 0, stream, vertices, output, triIds, numTriIds);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void averageVertsLaunch(PxgKernelLauncher& launcher, PxVec4* vertices, PxVec4* output, const PxU32* length, CUstream stream, PxReal blendWeight = 1.0f)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = 64;
|
||||
launcher.launchKernel(PxgKernelIds::iso_AverageVerts, numBlocks, numThreadsPerBlock, 0, stream, vertices, output, length, blendWeight);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void computeNormalsLaunch(PxgKernelLauncher& launcher, PxIsosurfaceExtractionData& data, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = 64;
|
||||
launcher.launchKernel(PxgKernelIds::iso_ComputeNormals, numBlocks, numThreadsPerBlock, 0, stream, data);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void normalizeNormalsLaunch(PxgKernelLauncher& launcher, PxIsosurfaceExtractionData& data, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = 64;
|
||||
launcher.launchKernel(PxgKernelIds::iso_NormalizeNormals, numBlocks, numThreadsPerBlock, 0, stream, data);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void smoothNormalsLaunch(PxgKernelLauncher& launcher, PxIsosurfaceExtractionData& data, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = 64;
|
||||
launcher.launchKernel(PxgKernelIds::iso_SmoothNormals, numBlocks, numThreadsPerBlock, 0, stream, data);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void smoothNormalsNormalizeLaunch(PxgKernelLauncher& launcher, PxIsosurfaceExtractionData& data, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = 64;
|
||||
launcher.launchKernel(PxgKernelIds::iso_SmoothNormalsNormalize, numBlocks, numThreadsPerBlock, 0, stream, data);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void gridFilterGaussLaunch(PxgKernelLauncher& launcher, PxIsosurfaceExtractionData& data, PxReal neighborWeight, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (data.maxNumCells() + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::iso_GridFilterGauss, numBlocks, numThreadsPerBlock, 0, stream, data, neighborWeight);
|
||||
data.swapState = 1 - data.swapState;
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void gridFilterDilateErodeLaunch(PxgKernelLauncher& launcher, PxIsosurfaceExtractionData& data, PxReal sign, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (data.maxNumCells() + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::iso_GridFilterDilateErode, numBlocks, numThreadsPerBlock, 0, stream, data, sign);
|
||||
data.swapState = 1 - data.swapState;
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void computeParticleDensityLaunchUsingSDF(PxgKernelLauncher& launcher, PxVec4* deviceParticlePos, int numParticles, PxU32* phases, PxU32 validPhaseMask, PxSparseIsosurfaceExtractionData& data, CUstream stream, PxU32* activeIndices = NULL,
|
||||
PxVec4* anisotropy1 = NULL, PxVec4* anisotropy2 = NULL, PxVec4* anisotropy3 = NULL, PxReal anisotropyFactor = 1.0f)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (numParticles + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::iso_ComputeParticleDensityUsingSDFSparse, numBlocks, numThreadsPerBlock, 0, stream, deviceParticlePos, numParticles, phases, validPhaseMask, data,
|
||||
activeIndices, reinterpret_cast<float4*>(anisotropy1), reinterpret_cast<float4*>(anisotropy2), reinterpret_cast<float4*>(anisotropy3), anisotropyFactor);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void computeParticleDensityLaunch(PxgKernelLauncher& launcher, PxVec4* deviceParticlePos, int numParticles, PxU32* phases, PxU32 validPhaseMask, PxSparseIsosurfaceExtractionData& data, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (numParticles + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::iso_ComputeParticleDensitySparse, numBlocks, numThreadsPerBlock, 0, stream, deviceParticlePos, numParticles, phases, validPhaseMask, data);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void countCellVertsLaunch(PxgKernelLauncher& launcher, PxSparseIsosurfaceExtractionData& data, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (data.maxNumCells() + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::iso_CountCellVertsSparse, numBlocks, numThreadsPerBlock, 0, stream, data);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void createVertsLaunch(PxgKernelLauncher& launcher, PxSparseIsosurfaceExtractionData& data, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (data.maxNumCells() + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::iso_CreateVertsSparse, numBlocks, numThreadsPerBlock, 0, stream, data);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void countTriIdsLaunch(PxgKernelLauncher& launcher, PxSparseIsosurfaceExtractionData& data, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (data.maxNumCells() + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::iso_CountTriIdsSparse, numBlocks, numThreadsPerBlock, 0, stream, data);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void createTriIdsLaunch(PxgKernelLauncher& launcher, PxSparseIsosurfaceExtractionData& data, bool flipTriangleOrientation, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (data.maxNumCells() + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::iso_CreateTriIdsSparse, numBlocks, numThreadsPerBlock, 0, stream, data, flipTriangleOrientation ? 1 : 0);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void computeNormalsLaunch(PxgKernelLauncher& launcher, PxSparseIsosurfaceExtractionData& data, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = 16;
|
||||
launcher.launchKernel(PxgKernelIds::iso_ComputeNormalsSparse, numBlocks, numThreadsPerBlock, 0, stream, data);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void normalizeNormalsLaunch(PxgKernelLauncher& launcher, PxSparseIsosurfaceExtractionData& data, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = 16;
|
||||
launcher.launchKernel(PxgKernelIds::iso_NormalizeNormalsSparse, numBlocks, numThreadsPerBlock, 0, stream, data);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
|
||||
void smoothNormalsLaunch(PxgKernelLauncher& launcher, PxSparseIsosurfaceExtractionData& data, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = 64;
|
||||
launcher.launchKernel(PxgKernelIds::iso_SmoothNormalsSparse, numBlocks, numThreadsPerBlock, 0, stream, data);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void smoothNormalsNormalizeLaunch(PxgKernelLauncher& launcher, PxSparseIsosurfaceExtractionData& data, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = 64;
|
||||
launcher.launchKernel(PxgKernelIds::iso_SmoothNormalsNormalizeSparse, numBlocks, numThreadsPerBlock, 0, stream, data);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
|
||||
|
||||
void gridFilterGaussLaunch(PxgKernelLauncher& launcher, PxSparseIsosurfaceExtractionData& data, PxReal neighborWeight, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (data.maxNumCells() + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::iso_GridFilterGaussSparse, numBlocks, numThreadsPerBlock, 0, stream, data, neighborWeight);
|
||||
data.swapState = 1 - data.swapState;
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void gridFilterDilateErodeLaunch(PxgKernelLauncher& launcher, PxSparseIsosurfaceExtractionData& data, PxReal sign, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (data.maxNumCells() + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::iso_GridFilterDilateErodeSparse, numBlocks, numThreadsPerBlock, 0, stream, data, sign);
|
||||
data.swapState = 1 - data.swapState;
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void PxgSparseGridIsosurfaceExtractor::setMaxVerticesAndTriangles(PxU32 maxIsosurfaceVertices, PxU32 maxIsosurfaceTriangles)
|
||||
{
|
||||
bool vertexCountChanged = mData.maxVerts != maxIsosurfaceVertices;
|
||||
bool indexCountChannged = mData.maxTriIds != maxIsosurfaceTriangles * 3;
|
||||
if (vertexCountChanged)
|
||||
{
|
||||
mData.maxVerts = maxIsosurfaceVertices;
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.smoothingBuffer);
|
||||
mData.smoothingBuffer = PX_DEVICE_MEMORY_ALLOC(PxVec4, *mShared.mKernelLauncher.getCudaContextManager(), mData.maxVerts);
|
||||
}
|
||||
if (indexCountChannged)
|
||||
mData.maxTriIds = maxIsosurfaceTriangles * 3;
|
||||
|
||||
if (!mShared.mOwnsOutputGPUBuffers)
|
||||
return;
|
||||
if (vertexCountChanged)
|
||||
{
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.verts);
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.normals);
|
||||
mData.verts = PX_DEVICE_MEMORY_ALLOC(PxVec4, *mShared.mKernelLauncher.getCudaContextManager(), mData.maxVerts);
|
||||
mData.normals = PX_DEVICE_MEMORY_ALLOC(PxVec4, *mShared.mKernelLauncher.getCudaContextManager(), mData.maxVerts);
|
||||
}
|
||||
if (indexCountChannged)
|
||||
{
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.triIds);
|
||||
mData.triIds = PX_DEVICE_MEMORY_ALLOC(PxU32, *mShared.mKernelLauncher.getCudaContextManager(), mData.maxTriIds);
|
||||
}
|
||||
}
|
||||
|
||||
void PxgSparseGridIsosurfaceExtractor::releaseGPUBuffers()
|
||||
{
|
||||
if (!mData.verts)
|
||||
return;
|
||||
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.verts);
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.normals);
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.triIds);
|
||||
}
|
||||
|
||||
void PxgSparseGridIsosurfaceExtractor::allocateGPUBuffers()
|
||||
{
|
||||
if (mData.verts)
|
||||
return;
|
||||
|
||||
mData.verts = PX_DEVICE_MEMORY_ALLOC(PxVec4, *mShared.mKernelLauncher.getCudaContextManager(), mData.maxVerts);
|
||||
mData.normals = PX_DEVICE_MEMORY_ALLOC(PxVec4, *mShared.mKernelLauncher.getCudaContextManager(), mData.maxVerts);
|
||||
mData.triIds = PX_DEVICE_MEMORY_ALLOC(PxU32, *mShared.mKernelLauncher.getCudaContextManager(), mData.maxTriIds);
|
||||
}
|
||||
|
||||
void PxgSparseGridIsosurfaceExtractor::setResultBufferDevice(PxVec4* vertices, PxU32* triIndices, PxVec4* normals)
|
||||
{
|
||||
if (mShared.mOwnsOutputGPUBuffers)
|
||||
releaseGPUBuffers();
|
||||
mShared.mOwnsOutputGPUBuffers = false;
|
||||
mData.verts = vertices;
|
||||
mData.normals = normals;
|
||||
mData.triIds = triIndices;
|
||||
mShared.mVertices = NULL;
|
||||
mShared.mTriIndices = NULL;
|
||||
mShared.mNormals = NULL;
|
||||
}
|
||||
|
||||
void PxgSparseGridIsosurfaceExtractor::setMaxParticles(PxU32 maxParticles)
|
||||
{
|
||||
PxSparseGridParams sparseGridParams = mSparseGrid.getGridParameters();
|
||||
|
||||
bool ownsGpuBuffers = mShared.mOwnsOutputGPUBuffers;
|
||||
mShared.mOwnsOutputGPUBuffers = false; //The following call to initialize will release all existing owned memory. If the output buffers are marked as not-owned, they will persist.
|
||||
initialize(mShared.mKernelLauncher, sparseGridParams, mShared.mIsosurfaceParams, maxParticles, mData.maxVerts, mData.maxTriIds / 3);
|
||||
mShared.mOwnsOutputGPUBuffers = ownsGpuBuffers;
|
||||
}
|
||||
|
||||
void PxgSparseGridIsosurfaceExtractor::setResultBufferHost(PxVec4* vertices, PxU32* triIndices, PxVec4* normals)
|
||||
{
|
||||
if (mShared.mOwnsOutputGPUBuffers)
|
||||
releaseGPUBuffers();
|
||||
mShared.mOwnsOutputGPUBuffers = true;
|
||||
mShared.mVertices = vertices;
|
||||
mShared.mTriIndices = triIndices;
|
||||
mShared.mNormals = normals;
|
||||
allocateGPUBuffers();
|
||||
}
|
||||
|
||||
void PxgDenseGridIsosurfaceExtractor::setMaxVerticesAndTriangles(PxU32 maxIsosurfaceVertices, PxU32 maxIsosurfaceTriangles)
|
||||
{
|
||||
bool vertexCountChanged = mData.maxVerts != maxIsosurfaceVertices;
|
||||
bool indexCountChannged = mData.maxTriIds != maxIsosurfaceTriangles * 3;
|
||||
if (vertexCountChanged)
|
||||
{
|
||||
mData.maxVerts = maxIsosurfaceVertices;
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.smoothingBuffer);
|
||||
mData.smoothingBuffer = PX_DEVICE_MEMORY_ALLOC(PxVec4, *mShared.mKernelLauncher.getCudaContextManager(), mData.maxVerts);
|
||||
}
|
||||
if (indexCountChannged)
|
||||
mData.maxTriIds = maxIsosurfaceTriangles * 3;
|
||||
|
||||
if (!mShared.mOwnsOutputGPUBuffers)
|
||||
return;
|
||||
if (vertexCountChanged)
|
||||
{
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.verts);
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.normals);
|
||||
mData.verts = PX_DEVICE_MEMORY_ALLOC(PxVec4, *mShared.mKernelLauncher.getCudaContextManager(), mData.maxVerts);
|
||||
mData.normals = PX_DEVICE_MEMORY_ALLOC(PxVec4, *mShared.mKernelLauncher.getCudaContextManager(), mData.maxVerts);
|
||||
}
|
||||
if (indexCountChannged)
|
||||
{
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.triIds);
|
||||
mData.triIds = PX_DEVICE_MEMORY_ALLOC(PxU32, *mShared.mKernelLauncher.getCudaContextManager(), mData.maxTriIds);
|
||||
}
|
||||
}
|
||||
|
||||
void PxgDenseGridIsosurfaceExtractor::releaseGPUBuffers()
|
||||
{
|
||||
if (!mData.verts)
|
||||
return;
|
||||
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.verts);
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.normals);
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.triIds);
|
||||
}
|
||||
|
||||
void PxgDenseGridIsosurfaceExtractor::allocateGPUBuffers()
|
||||
{
|
||||
if (mData.verts)
|
||||
return;
|
||||
|
||||
mData.verts = PX_DEVICE_MEMORY_ALLOC(PxVec4, *mShared.mKernelLauncher.getCudaContextManager(), mData.maxVerts);
|
||||
mData.normals = PX_DEVICE_MEMORY_ALLOC(PxVec4, *mShared.mKernelLauncher.getCudaContextManager(), mData.maxVerts);
|
||||
mData.triIds = PX_DEVICE_MEMORY_ALLOC(PxU32, *mShared.mKernelLauncher.getCudaContextManager(), mData.maxTriIds);
|
||||
}
|
||||
|
||||
void PxgDenseGridIsosurfaceExtractor::setResultBufferDevice(PxVec4* vertices, PxU32* triIndices, PxVec4* normals)
|
||||
{
|
||||
if (mShared.mOwnsOutputGPUBuffers)
|
||||
releaseGPUBuffers();
|
||||
mShared.mOwnsOutputGPUBuffers = false;
|
||||
mData.verts = vertices;
|
||||
mData.normals = normals;
|
||||
mData.triIds = triIndices;
|
||||
mShared.mVertices = NULL;
|
||||
mShared.mTriIndices = NULL;
|
||||
mShared.mNormals = NULL;
|
||||
}
|
||||
|
||||
void PxgDenseGridIsosurfaceExtractor::setResultBufferHost(PxVec4* vertices, PxU32* triIndices, PxVec4* normals)
|
||||
{
|
||||
if (mShared.mOwnsOutputGPUBuffers)
|
||||
releaseGPUBuffers();
|
||||
mShared.mOwnsOutputGPUBuffers = true;
|
||||
mShared.mVertices = vertices;
|
||||
mShared.mTriIndices = triIndices;
|
||||
mShared.mNormals = normals;
|
||||
allocateGPUBuffers();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void PxgSparseGridIsosurfaceExtractor::clearDensity(CUstream stream)
|
||||
{
|
||||
sparseGridClearDensity(mShared.mKernelLauncher, mData.mGrid.mGridParams, mData.density(), 0.0f, mSparseGrid.getSubgridsInUseGpuPointer(), stream);
|
||||
}
|
||||
|
||||
void PxgDenseGridIsosurfaceExtractor::paramsToMCData()
|
||||
{
|
||||
const PxReal marginFactor = 1.01f;
|
||||
mData.kernelSize = mShared.mIsosurfaceParams.particleCenterToIsosurfaceDistance + marginFactor * mData.getSpacing();
|
||||
mData.threshold = -marginFactor * mData.getSpacing();
|
||||
}
|
||||
|
||||
void PxgDenseGridIsosurfaceExtractor::initialize(PxgKernelLauncher& cudaContextManager, const PxBounds3& worldBounds,
|
||||
PxReal cellSize, const PxIsosurfaceParams& isosurfaceParams, PxU32 maxNumParticles, PxU32 maxNumVertices, PxU32 maxNumTriangles)
|
||||
{
|
||||
release();
|
||||
|
||||
mMaxParticles = maxNumParticles;
|
||||
mShared.mIsosurfaceParams = isosurfaceParams;
|
||||
mData.mGrid.mGridParams.gridSpacing = cellSize;
|
||||
mShared.mKernelLauncher = cudaContextManager;
|
||||
|
||||
paramsToMCData();
|
||||
|
||||
mData.restDensity = 1.0f;
|
||||
mData.mGrid.mGridParams.origin = worldBounds.minimum;
|
||||
|
||||
PxVec3 dim = worldBounds.getDimensions();
|
||||
mData.mGrid.mGridParams.numCellsX = (int)PxFloor(dim.x / mData.mGrid.mGridParams.gridSpacing) + 1;
|
||||
mData.mGrid.mGridParams.numCellsY = (int)PxFloor(dim.y / mData.mGrid.mGridParams.gridSpacing) + 1;
|
||||
mData.mGrid.mGridParams.numCellsZ = (int)PxFloor(dim.z / mData.mGrid.mGridParams.gridSpacing) + 1;
|
||||
const PxU32 numCells = mData.maxNumCells();
|
||||
|
||||
mShared.mScan.initialize(&mShared.mKernelLauncher, numCells);
|
||||
|
||||
mData.buffer[0] = PX_DEVICE_MEMORY_ALLOC(PxReal, *cudaContextManager.getCudaContextManager(), numCells);
|
||||
mData.firstCellVert = PX_DEVICE_MEMORY_ALLOC(PxU32, *cudaContextManager.getCudaContextManager(), numCells);
|
||||
mData.buffer[1] = PX_DEVICE_MEMORY_ALLOC(PxReal, *cudaContextManager.getCudaContextManager(), numCells);
|
||||
|
||||
mData.maxVerts = maxNumVertices;
|
||||
mData.maxTriIds = maxNumTriangles * 3;
|
||||
|
||||
mData.numVerticesNumIndices = PX_DEVICE_MEMORY_ALLOC(PxU32, *cudaContextManager.getCudaContextManager(), 2);
|
||||
|
||||
mShared.mNumVerticesNumIndices = PX_PINNED_MEMORY_ALLOC(PxU32, *cudaContextManager.getCudaContextManager(), 2);
|
||||
mShared.mNumVerticesNumIndices[0] = 0;
|
||||
mShared.mNumVerticesNumIndices[1] = 0;
|
||||
|
||||
mData.smoothingBuffer = PX_DEVICE_MEMORY_ALLOC(PxVec4, *cudaContextManager.getCudaContextManager(), maxNumVertices);
|
||||
}
|
||||
|
||||
void PxgDenseGridIsosurfaceExtractor::release()
|
||||
{
|
||||
if (!mData.firstCellVert)
|
||||
return;
|
||||
|
||||
mShared.mScan.release();
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.buffer[0]);
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.firstCellVert);
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.buffer[1]);
|
||||
if (mShared.mOwnsOutputGPUBuffers)
|
||||
{
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.verts);
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.normals);
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.triIds);
|
||||
}
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.numVerticesNumIndices);
|
||||
|
||||
PX_PINNED_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mShared.mNumVerticesNumIndices);
|
||||
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.smoothingBuffer);
|
||||
|
||||
//PX_DELETE_THIS;
|
||||
}
|
||||
|
||||
void PxgSparseGridIsosurfaceExtractor::extractIsosurface(PxVec4* deviceParticlePos, const PxU32 numParticles, CUstream stream, PxU32* phases, PxU32 validPhaseMask,
|
||||
PxU32* activeIndices, PxVec4* anisotropy1, PxVec4* anisotropy2, PxVec4* anisotropy3, PxReal anisotropyFactor)
|
||||
{
|
||||
if (!mShared.mEnabled)
|
||||
return;
|
||||
|
||||
PX_CHECK_AND_RETURN(deviceParticlePos, "PxSparseGridIsosurfaceExtractor::extractIsosurface no valid deviceParticlePositions provided");
|
||||
|
||||
mData.mGrid.mNumSubgridsInUse = mSparseGrid.getSubgridsInUseGpuPointer();
|
||||
mSparseGrid.updateSparseGrid(deviceParticlePos, numParticles, phases, stream, validPhaseMask);
|
||||
clearDensity(stream);
|
||||
mShared.extractIso<PxSparseIsosurfaceExtractionData>(mData, deviceParticlePos, numParticles, stream, phases, validPhaseMask, activeIndices, anisotropy1, anisotropy2, anisotropy3, anisotropyFactor);
|
||||
}
|
||||
|
||||
void PxgSparseGridIsosurfaceExtractor::paramsToMCData()
|
||||
{
|
||||
const PxReal marginFactor = 1.01f;
|
||||
mData.kernelSize = mShared.mIsosurfaceParams.particleCenterToIsosurfaceDistance + marginFactor * mData.getSpacing();
|
||||
mData.threshold = -marginFactor * mData.getSpacing();
|
||||
}
|
||||
|
||||
|
||||
void PxgSparseGridIsosurfaceExtractor::initialize(PxgKernelLauncher& cudaContextManager, const PxSparseGridParams sparseGridParams,
|
||||
const PxIsosurfaceParams& isosurfaceParams, PxU32 maxNumParticles, PxU32 maxNumVertices, PxU32 maxNumTriangles)
|
||||
{
|
||||
release();
|
||||
|
||||
mShared.mKernelLauncher = cudaContextManager;
|
||||
|
||||
mData.restDensity = 1.0f;
|
||||
mData.mGrid.mGridParams.gridSpacing = sparseGridParams.gridSpacing;
|
||||
mData.mGrid.mGridParams.maxNumSubgrids = sparseGridParams.maxNumSubgrids;
|
||||
mData.mGrid.mGridParams.subgridSizeX = sparseGridParams.subgridSizeX;
|
||||
mData.mGrid.mGridParams.subgridSizeY = sparseGridParams.subgridSizeY;
|
||||
mData.mGrid.mGridParams.subgridSizeZ = sparseGridParams.subgridSizeZ;
|
||||
|
||||
mShared.mIsosurfaceParams = isosurfaceParams;
|
||||
paramsToMCData();
|
||||
|
||||
const PxU32 numCells = mData.maxNumCells();
|
||||
|
||||
PxU32 minSubgridRes = PxMin(PxMin(sparseGridParams.subgridSizeX, sparseGridParams.subgridSizeY), sparseGridParams.subgridSizeZ);
|
||||
PxU32 neighborhoodSize = PxMin(minSubgridRes, PxU32(PxFloor((isosurfaceParams.particleCenterToIsosurfaceDistance + sparseGridParams.gridSpacing) / sparseGridParams.gridSpacing)) + 1);
|
||||
|
||||
mSparseGrid.initialize(&mShared.mKernelLauncher, mData.mGrid.mGridParams, maxNumParticles, neighborhoodSize);
|
||||
mShared.mScan.initialize(&mShared.mKernelLauncher, numCells);
|
||||
|
||||
mData.buffer[0] = PX_DEVICE_MEMORY_ALLOC(PxReal, *cudaContextManager.getCudaContextManager(), numCells);
|
||||
mData.firstCellVert = PX_DEVICE_MEMORY_ALLOC(PxU32, *cudaContextManager.getCudaContextManager(), numCells);
|
||||
mData.buffer[1] = PX_DEVICE_MEMORY_ALLOC(PxReal, *cudaContextManager.getCudaContextManager(), numCells);
|
||||
|
||||
mData.maxVerts = maxNumVertices;
|
||||
mData.maxTriIds = maxNumTriangles * 3;
|
||||
|
||||
mData.numVerticesNumIndices = PX_DEVICE_MEMORY_ALLOC(PxU32, *cudaContextManager.getCudaContextManager(), 2);
|
||||
|
||||
mShared.mNumVerticesNumIndices = PX_PINNED_MEMORY_ALLOC(PxU32, *cudaContextManager.getCudaContextManager(), 2);
|
||||
mShared.mNumVerticesNumIndices[0] = 0;
|
||||
mShared.mNumVerticesNumIndices[1] = 0;
|
||||
|
||||
//Make sparse grid data available for isosurface extraction
|
||||
mData.mGrid.mUniqueHashkeyPerSubgrid = mSparseGrid.getUniqueHashkeysPerSubgrid();
|
||||
mData.mGrid.mSubgridNeighbors = mSparseGrid.getSubgridNeighborLookup();
|
||||
|
||||
mData.smoothingBuffer = PX_DEVICE_MEMORY_ALLOC(PxVec4, *cudaContextManager.getCudaContextManager(), maxNumVertices);
|
||||
}
|
||||
|
||||
void PxgSparseGridIsosurfaceExtractor::release()
|
||||
{
|
||||
if (!mData.firstCellVert)
|
||||
return;
|
||||
|
||||
mSparseGrid.release();
|
||||
mShared.mScan.release();
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.buffer[0]);
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.firstCellVert);
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.buffer[1]);
|
||||
if (mShared.mOwnsOutputGPUBuffers)
|
||||
{
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.verts);
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.normals);
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.triIds);
|
||||
}
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.numVerticesNumIndices);
|
||||
|
||||
PX_PINNED_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mShared.mNumVerticesNumIndices);
|
||||
|
||||
PX_DEVICE_MEMORY_FREE(*mShared.mKernelLauncher.getCudaContextManager(), mData.smoothingBuffer);
|
||||
|
||||
//PX_DELETE_THIS;
|
||||
}
|
||||
|
||||
void PxgDenseGridIsosurfaceExtractor::clearDensity(CUstream stream)
|
||||
{
|
||||
PxgCudaHelpers::memsetAsync(*mShared.mKernelLauncher.getCudaContextManager(), mData.density(), PxReal(0.f), mData.maxNumCells(), stream);
|
||||
}
|
||||
|
||||
void PxgDenseGridIsosurfaceExtractor::extractIsosurface(PxVec4* deviceParticlePos, const PxU32 numParticles, CUstream stream, PxU32* phases, PxU32 validPhaseMask,
|
||||
PxU32* activeIndices, PxVec4* anisotropy1, PxVec4* anisotropy2, PxVec4* anisotropy3, PxReal anisotropyFactor)
|
||||
{
|
||||
if (!mShared.mEnabled)
|
||||
return;
|
||||
|
||||
PX_CHECK_AND_RETURN(deviceParticlePos, "PxDenseGridIsosurfaceExtractor::extractIsosurface no valid deviceParticlePositions provided");
|
||||
clearDensity(stream);
|
||||
mShared.extractIso<PxIsosurfaceExtractionData>(mData, deviceParticlePos, numParticles, stream, phases, validPhaseMask, activeIndices, anisotropy1, anisotropy2, anisotropy3, anisotropyFactor);
|
||||
}
|
||||
|
||||
template<typename DenseOrSparseGpuDataPackage>
|
||||
void PxgSharedIsosurfaceExtractor::meshFromDensity(DenseOrSparseGpuDataPackage& mData, CUstream stream)
|
||||
{
|
||||
if (!mData.firstCellVert)
|
||||
return;
|
||||
|
||||
PxCudaContext* cudaContext = mKernelLauncher.getCudaContextManager()->getCudaContext();
|
||||
|
||||
PxU32 passIndex = 0;
|
||||
PxIsosurfaceGridFilteringType::Enum operation;
|
||||
bool useSDFStyleDensityTransfer = true;
|
||||
PxReal sign = useSDFStyleDensityTransfer ? -1.0f : 1.0f;
|
||||
|
||||
PxReal neighborWeight = PxExp((-mData.getSpacing() * mData.getSpacing()) / (2.0f* mIsosurfaceParams.gridSmoothingRadius*mIsosurfaceParams.gridSmoothingRadius));
|
||||
|
||||
while (passIndex < 32 && mIsosurfaceParams.getGridFilteringPass(passIndex, operation))
|
||||
{
|
||||
if (operation == PxIsosurfaceGridFilteringType::eSMOOTH)
|
||||
gridFilterGaussLaunch(mKernelLauncher, mData, neighborWeight, stream);
|
||||
else
|
||||
gridFilterDilateErodeLaunch(mKernelLauncher, mData, sign * (operation == PxIsosurfaceGridFilteringType::eGROW ? 1.0f : -1.0f), stream);
|
||||
++passIndex;
|
||||
}
|
||||
|
||||
// create vertices
|
||||
countCellVertsLaunch(mKernelLauncher, mData, stream);
|
||||
mScan.exclusiveScan(mData.firstCellVert, stream);
|
||||
createVertsLaunch(mKernelLauncher, mData, stream);
|
||||
|
||||
countTriIdsLaunch(mKernelLauncher, mData, stream);
|
||||
mScan.exclusiveScan(mData.firstCellTriId(), stream);
|
||||
|
||||
createTriIdsLaunch(mKernelLauncher, mData, !useSDFStyleDensityTransfer, stream);
|
||||
|
||||
// smooth verts
|
||||
//The normals get computed after the vertex smoothing. We can use the normal storage as a temporary storage.
|
||||
PxVec4* avgVerts = mData.normals;
|
||||
|
||||
PxgCudaHelpers::memsetAsync(*mKernelLauncher.getCudaContextManager(), reinterpret_cast<PxReal*>(avgVerts), PxReal(0.f), mData.maxVerts * 4, stream);
|
||||
|
||||
//Compute weights for Taubin smoothing
|
||||
PxReal a = -1.0f;
|
||||
PxReal b = 0.01f; //pass band range 0...1
|
||||
PxReal c = 2.0f;
|
||||
|
||||
PxReal d = PxSqrt(b * b - 4 * a * c);
|
||||
PxReal solution1 = (-b + d) / (2.0f * a * c);
|
||||
PxReal solution2 = (-b - d) / (2.0f * a * c);
|
||||
|
||||
PxReal lambdaMu[2];
|
||||
lambdaMu[0] = PxMax(solution1, solution2);
|
||||
lambdaMu[1] = PxMin(solution1, solution2);
|
||||
|
||||
//The following parameter convert Taubin smoothing to classical Laplassian smoothing
|
||||
//lambdaMu[0] = 1.0f;
|
||||
//lambdaMu[1] = 1.0f;
|
||||
|
||||
for (PxU32 i = 0; i < mIsosurfaceParams.numMeshSmoothingPasses; i++)
|
||||
{
|
||||
smoothVertsLaunch(mKernelLauncher, mData.verts, avgVerts, mData.triIds, &mData.numVerticesNumIndices[1], stream);
|
||||
averageVertsLaunch(mKernelLauncher, avgVerts, mData.verts, mData.numVerticesNumIndices, stream, lambdaMu[i % 2]);
|
||||
}
|
||||
|
||||
// compute normals
|
||||
computeNormalsLaunch(mKernelLauncher, mData, stream);
|
||||
normalizeNormalsLaunch(mKernelLauncher, mData, stream);
|
||||
|
||||
for (PxU32 i = 0; i < mIsosurfaceParams.numMeshNormalSmoothingPasses; i++)
|
||||
{
|
||||
smoothNormalsLaunch(mKernelLauncher, mData, stream);
|
||||
smoothNormalsNormalizeLaunch(mKernelLauncher, mData, stream);
|
||||
}
|
||||
|
||||
if (mVertices)
|
||||
cudaContext->memcpyDtoHAsync(mVertices, CUdeviceptr(mData.verts), mData.maxVerts * sizeof(PxVec4), stream);
|
||||
if (mTriIndices)
|
||||
cudaContext->memcpyDtoHAsync(mTriIndices, CUdeviceptr(mData.triIds), mData.maxTriIds * sizeof(PxU32), stream);
|
||||
if (mNormals)
|
||||
cudaContext->memcpyDtoHAsync(mNormals, CUdeviceptr(mData.normals), mData.maxVerts * sizeof(PxVec4), stream);
|
||||
|
||||
cudaContext->memcpyDtoHAsync(mNumVerticesNumIndices, CUdeviceptr(mData.numVerticesNumIndices), 2 * sizeof(PxU32), stream);
|
||||
}
|
||||
|
||||
template<typename DenseOrSparseGpuDataPackage>
|
||||
void PxgSharedIsosurfaceExtractor::extractIso(DenseOrSparseGpuDataPackage& mData, PxVec4* deviceParticlePos, const PxU32 numParticles, CUstream stream, PxU32* phases, PxU32 validPhaseMask,
|
||||
PxU32* activeIndices, PxVec4* anisotropy1, PxVec4* anisotropy2, PxVec4* anisotropy3, PxReal anisotropyFactor)
|
||||
{
|
||||
if (!mData.firstCellVert)
|
||||
return;
|
||||
|
||||
computeParticleDensityLaunchUsingSDF(mKernelLauncher, deviceParticlePos, numParticles, phases, validPhaseMask, mData, stream,
|
||||
activeIndices, anisotropy1, anisotropy2, anisotropy3, anisotropyFactor);
|
||||
|
||||
meshFromDensity(mData, stream);
|
||||
}
|
||||
|
||||
template void PxgSharedIsosurfaceExtractor::meshFromDensity<PxIsosurfaceExtractionData>(PxIsosurfaceExtractionData& mData, CUstream stream);
|
||||
template void PxgSharedIsosurfaceExtractor::meshFromDensity<PxSparseIsosurfaceExtractionData>(PxSparseIsosurfaceExtractionData& mData, CUstream stream);
|
||||
|
||||
|
||||
template void PxgSharedIsosurfaceExtractor::extractIso<PxIsosurfaceExtractionData>(PxIsosurfaceExtractionData& mData, PxVec4* deviceParticlePos, const PxU32 numParticles, CUstream stream, PxU32* phases, PxU32 validPhaseMask,
|
||||
PxU32* activeIndices, PxVec4* anisotropy1, PxVec4* anisotropy2, PxVec4* anisotropy3, PxReal anisotropyFactor);
|
||||
template void PxgSharedIsosurfaceExtractor::extractIso<PxSparseIsosurfaceExtractionData>(PxSparseIsosurfaceExtractionData& mData, PxVec4* deviceParticlePos, const PxU32 numParticles, CUstream stream, PxU32* phases, PxU32 validPhaseMask,
|
||||
PxU32* activeIndices, PxVec4* anisotropy1, PxVec4* anisotropy2, PxVec4* anisotropy3, PxReal anisotropyFactor);
|
||||
|
||||
class PxIsosurfaceExtractorCallback : public PxParticleSystemCallback
|
||||
{
|
||||
public:
|
||||
PxIsosurfaceExtractor* mIsosurfaceExtractor;
|
||||
|
||||
void initialize(PxIsosurfaceExtractor* isosurfaceExtractor)
|
||||
{
|
||||
mIsosurfaceExtractor = isosurfaceExtractor;
|
||||
}
|
||||
|
||||
virtual void onPostSolve(const PxGpuMirroredPointer<PxGpuParticleSystem>& gpuParticleSystem, CUstream stream)
|
||||
{
|
||||
mIsosurfaceExtractor->extractIsosurface(reinterpret_cast<PxVec4*>(gpuParticleSystem.mHostPtr->mUnsortedPositions_InvMass),
|
||||
gpuParticleSystem.mHostPtr->mCommonData.mNumParticles, stream, gpuParticleSystem.mHostPtr->mUnsortedPhaseArray,
|
||||
PxParticlePhaseFlag::eParticlePhaseFluid, /*gpuParticleSystem.mHostPtr->mActiveArray*/NULL);
|
||||
}
|
||||
|
||||
virtual void onBegin(const PxGpuMirroredPointer<PxGpuParticleSystem>& /*gpuParticleSystem*/, CUstream /*stream*/) { }
|
||||
|
||||
virtual void onAdvance(const PxGpuMirroredPointer<PxGpuParticleSystem>& /*gpuParticleSystem*/, CUstream /*stream*/) { }
|
||||
};
|
||||
}
|
||||
451
engine/third_party/physx/source/gpusimulationcontroller/src/PxgJointManager.cpp
vendored
Normal file
451
engine/third_party/physx/source/gpusimulationcontroller/src/PxgJointManager.cpp
vendored
Normal file
@@ -0,0 +1,451 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "PxgJointManager.h"
|
||||
#include "DyConstraint.h"
|
||||
#include "PxgD6JointData.h"
|
||||
#include "PxgConstraintPrep.h"
|
||||
#include "common/PxProfileZone.h"
|
||||
#include "PxsPartitionEdge.h"
|
||||
|
||||
#define GPU_JOINT_PREP 1
|
||||
|
||||
using namespace physx;
|
||||
|
||||
static PX_FORCE_INLINE void resetConstraintPrepPrep(PxgConstraintPrePrep& preData)
|
||||
{
|
||||
preData.mNodeIndexA = preData.mNodeIndexB = PxNodeIndex(PX_INVALID_NODE);
|
||||
}
|
||||
|
||||
static PX_FORCE_INLINE void setupConstraintPrepPrep(PxgConstraintPrePrep& preData, const Dy::Constraint* constraint)
|
||||
{
|
||||
preData.mFlags = constraint->flags;
|
||||
preData.mLinBreakForce = constraint->linBreakForce;
|
||||
preData.mAngBreakForce = constraint->angBreakForce;
|
||||
}
|
||||
|
||||
static PX_FORCE_INLINE void setupConstraintPrepPrep(PxgConstraintPrePrep& preData, const Dy::Constraint* constraint,
|
||||
const IG::IslandSim& islandSim, const PxNodeIndex nodeIndex0, const PxNodeIndex nodeIndex1)
|
||||
{
|
||||
preData.mNodeIndexA = nodeIndex0;
|
||||
preData.mNodeIndexB = nodeIndex1;
|
||||
|
||||
PX_UNUSED(islandSim);
|
||||
PX_ASSERT(nodeIndex0.index() < islandSim.getNbNodes() || nodeIndex0.index() == PX_INVALID_NODE);
|
||||
PX_ASSERT(nodeIndex1.index() < islandSim.getNbNodes() || nodeIndex1.index() == PX_INVALID_NODE);
|
||||
|
||||
::setupConstraintPrepPrep(preData, constraint);
|
||||
}
|
||||
|
||||
PxgJointManager::PxgJointManager(const PxVirtualAllocator& allocator, bool isDirectGpuApiEnabled) :
|
||||
mGpuRigidJointData(allocator), mGpuArtiJointData(allocator),
|
||||
mGpuRigidJointPrePrep(allocator), mGpuArtiJointPrePrep(allocator),
|
||||
mCpuRigidConstraintData(allocator), mCpuRigidConstraintRows(allocator),
|
||||
mCpuArtiConstraintData(allocator), mCpuArtiConstraintRows(allocator),
|
||||
mDirtyGPURigidJointDataIndices(allocator), mDirtyGPUArtiJointDataIndices(allocator)
|
||||
, mGpuConstraintIdMapHost(allocator)
|
||||
, mMaxConstraintId(0)
|
||||
, mIsGpuConstraintIdMapDirty(false)
|
||||
, mIsDirectGpuApiEnabled(isDirectGpuApiEnabled)
|
||||
{
|
||||
}
|
||||
|
||||
PxgJointManager::~PxgJointManager()
|
||||
{
|
||||
}
|
||||
|
||||
void PxgJointManager::reserveMemory(PxU32 maxConstraintRows)
|
||||
{
|
||||
const PxU32 nbCpuRigidConstraints = mCpuRigidConstraints.size();
|
||||
const PxU32 nbCpuArtiConstraints = mCpuArtiConstraints.size();
|
||||
|
||||
mCpuRigidConstraintData.reserve(nbCpuRigidConstraints);
|
||||
mCpuRigidConstraintData.forceSize_Unsafe(nbCpuRigidConstraints);
|
||||
|
||||
mCpuRigidConstraintRows.reserve(nbCpuRigidConstraints * maxConstraintRows);
|
||||
mCpuRigidConstraintRows.forceSize_Unsafe(nbCpuRigidConstraints * maxConstraintRows);
|
||||
|
||||
mCpuArtiConstraintData.reserve(nbCpuArtiConstraints);
|
||||
mCpuArtiConstraintData.forceSize_Unsafe(nbCpuArtiConstraints);
|
||||
|
||||
mCpuArtiConstraintRows.reserve(nbCpuArtiConstraints * maxConstraintRows);
|
||||
mCpuArtiConstraintRows.forceSize_Unsafe(nbCpuArtiConstraints * maxConstraintRows);
|
||||
|
||||
mNbCpuRigidConstraintRows = 0;
|
||||
mNbCpuArtiConstraintRows = 0;
|
||||
}
|
||||
|
||||
void PxgJointManager::reserveMemoryPreAddRemove()
|
||||
{
|
||||
if (mMaxConstraintId >= mGpuConstraintIdMapHost.size())
|
||||
{
|
||||
const PxU32 newSize = mMaxConstraintId * 2 + 1;
|
||||
mGpuConstraintIdMapHost.resize(newSize);
|
||||
|
||||
mIsGpuConstraintIdMapDirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void PxgJointManager::registerJoint(const Dy::Constraint& constraint)
|
||||
{
|
||||
if (mIsDirectGpuApiEnabled && (constraint.flags & PxConstraintFlag::eGPU_COMPATIBLE) && GPU_JOINT_PREP)
|
||||
{
|
||||
PX_COMPILE_TIME_ASSERT(sizeof(mMaxConstraintId) >= sizeof(constraint.index));
|
||||
|
||||
mMaxConstraintId = PxMax(constraint.index, mMaxConstraintId);
|
||||
}
|
||||
}
|
||||
|
||||
static PX_FORCE_INLINE void removeJointGpuCpu(PxU32 edgeIndex,
|
||||
const IG::GPUExternalData& islandSimGpuData,
|
||||
PxHashMap<PxU32, PxU32>& gpuConstraintIndexMap,
|
||||
PxPinnedArray<PxgConstraintPrePrep>& gpuConstraintPrePrepEntries,
|
||||
PxInt32ArrayPinned& gpuDirtyJointDataIndices,
|
||||
Cm::IDPool& gpuIdPool,
|
||||
PxHashMap<PxU32, PxU32>& cpuConstraintIndexMap,
|
||||
PxArray<const Dy::Constraint*>& cpuConstraintList,
|
||||
PxArray<PxU32>& cpuConstraintEdgeIndices,
|
||||
PxArray<PxU32>& cpuUniqueIndices,
|
||||
PxArray<PxU32>& jointIndices,
|
||||
PxgJointManager::ConstraintIdMap& gpuConstraintIdMapHost,
|
||||
PxHashMap<PxU32, PxU32>& edgeIndexToGpuConstraintIdMap,
|
||||
bool& isGpuConstraintIdMapDirty,
|
||||
bool isDirectGpuApiEnabled)
|
||||
{
|
||||
const PxPair<const PxU32, PxU32>* gpuPair = gpuConstraintIndexMap.find(edgeIndex);
|
||||
if (gpuPair)
|
||||
{
|
||||
//gpu
|
||||
const PxU32 jointDataIndex = gpuPair->second;
|
||||
|
||||
//update the dirty list
|
||||
gpuDirtyJointDataIndices.pushBack(jointDataIndex);
|
||||
|
||||
resetConstraintPrepPrep(gpuConstraintPrePrepEntries[jointDataIndex]);
|
||||
|
||||
gpuIdPool.freeID(jointDataIndex);
|
||||
|
||||
gpuConstraintIndexMap.erase(edgeIndex);
|
||||
|
||||
if (isDirectGpuApiEnabled)
|
||||
{
|
||||
PxPair<const PxU32, PxU32> entry;
|
||||
const bool found = edgeIndexToGpuConstraintIdMap.erase(edgeIndex, entry);
|
||||
PX_ASSERT(found);
|
||||
|
||||
if (found) // should always be the case but extra safety if something went horribly wrong
|
||||
{
|
||||
const PxU32 mapIndex = entry.second;
|
||||
|
||||
PX_ASSERT(mapIndex < gpuConstraintIdMapHost.size());
|
||||
|
||||
gpuConstraintIdMapHost[mapIndex].invalidate();
|
||||
|
||||
isGpuConstraintIdMapDirty = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
//cpu
|
||||
const PxPair<const PxU32, PxU32>* cpuPair = cpuConstraintIndexMap.find(edgeIndex);
|
||||
if (cpuPair)
|
||||
{
|
||||
const PxU32 index = cpuPair->second;
|
||||
cpuConstraintList.replaceWithLast(index);
|
||||
|
||||
PxU32 replaceEdgeIndex = cpuConstraintEdgeIndices.back();
|
||||
|
||||
cpuConstraintEdgeIndices.replaceWithLast(index);
|
||||
|
||||
cpuConstraintIndexMap[replaceEdgeIndex] = index;
|
||||
|
||||
cpuUniqueIndices.replaceWithLast(index);
|
||||
|
||||
const PartitionEdge* pEdge = islandSimGpuData.getFirstPartitionEdge(replaceEdgeIndex);
|
||||
if (pEdge)
|
||||
jointIndices[pEdge->mUniqueIndex] = index;
|
||||
|
||||
cpuConstraintIndexMap.erase(edgeIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PxgJointManager::removeJoint(PxU32 edgeIndex, PxArray<PxU32>& jointIndices, const IG::CPUExternalData& islandSimCpuData, const IG::GPUExternalData& islandSimGpuData)
|
||||
{
|
||||
const PxNodeIndex nodeIndex0 = islandSimCpuData.getNodeIndex1(edgeIndex);
|
||||
const PxNodeIndex nodeIndex1 = islandSimCpuData.getNodeIndex2(edgeIndex);
|
||||
|
||||
if (nodeIndex0.isArticulation() || nodeIndex1.isArticulation())
|
||||
{
|
||||
removeJointGpuCpu(edgeIndex, islandSimGpuData,
|
||||
mGpuArtiConstraintIndices, mGpuArtiJointPrePrep, mDirtyGPUArtiJointDataIndices, mGpuArtiJointDataIDPool,
|
||||
mCpuArtiConstraintIndices, mCpuArtiConstraints, mCpuArtiConstraintEdgeIndices, mCpuArtiUniqueIndex,
|
||||
jointIndices,
|
||||
mGpuConstraintIdMapHost, mEdgeIndexToGpuConstraintIdMap,
|
||||
mIsGpuConstraintIdMapDirty, mIsDirectGpuApiEnabled);
|
||||
}
|
||||
else
|
||||
{
|
||||
removeJointGpuCpu(edgeIndex, islandSimGpuData,
|
||||
mGpuRigidConstraintIndices, mGpuRigidJointPrePrep, mDirtyGPURigidJointDataIndices, mGpuRigidJointDataIDPool,
|
||||
mCpuRigidConstraintIndices, mCpuRigidConstraints, mCpuRigidConstraintEdgeIndices, mCpuRigidUniqueIndex,
|
||||
jointIndices,
|
||||
mGpuConstraintIdMapHost, mEdgeIndexToGpuConstraintIdMap,
|
||||
mIsGpuConstraintIdMapDirty, mIsDirectGpuApiEnabled);
|
||||
}
|
||||
}
|
||||
|
||||
static PX_FORCE_INLINE void addJointGpu(const Dy::Constraint& constraint, PxU32 edgeIndex, PxU32 uniqueId,
|
||||
PxNodeIndex nodeIndex0, PxNodeIndex nodeIndex1,
|
||||
Cm::IDPool& idPool,
|
||||
PxPinnedArray<PxgD6JointData>& jointDataEntries,
|
||||
PxPinnedArray<PxgConstraintPrePrep>& constraintPrePrepEntries,
|
||||
PxInt32ArrayPinned& dirtyJointDataIndices,
|
||||
PxHashMap<PxU32, PxU32>& constraintIndexMap,
|
||||
PxArray<PxU32>& jointIndices,
|
||||
PxPinnedArray<PxgSolverConstraintManagerConstants>& managerIter,
|
||||
const IG::IslandSim& islandSim,
|
||||
PxgJointManager::ConstraintIdMap& gpuConstraintIdMapHost,
|
||||
PxHashMap<PxU32, PxU32>& edgeIndexToGpuConstraintIdMap,
|
||||
bool& isGpuConstraintIdMapDirty,
|
||||
bool isDirectGpuApiEnabled)
|
||||
{
|
||||
//In GPU, we work with PxgD6JointData and fill in PxgConstraintData
|
||||
const PxU32 jointDataId = idPool.getNewID();
|
||||
|
||||
if (jointDataId >= jointDataEntries.capacity())
|
||||
{
|
||||
const PxU32 capacity = jointDataEntries.capacity() * 2 + 1;
|
||||
jointDataEntries.resize(capacity);
|
||||
constraintPrePrepEntries.resize(capacity);
|
||||
}
|
||||
|
||||
PxgD6JointData& jointData = jointDataEntries[jointDataId];
|
||||
|
||||
PxMemCopy(&jointData, constraint.constantBlock, constraint.constantBlockSize);
|
||||
|
||||
//mark dirty
|
||||
dirtyJointDataIndices.pushBack(jointDataId);
|
||||
|
||||
constraintIndexMap.insert(edgeIndex, jointDataId);
|
||||
|
||||
::setupConstraintPrepPrep(constraintPrePrepEntries[jointDataId], &constraint, islandSim, nodeIndex0, nodeIndex1);
|
||||
|
||||
jointIndices[uniqueId] = jointDataId;
|
||||
|
||||
managerIter[uniqueId].mConstraintWriteBackIndex = constraint.index; // this is the joint writeback index
|
||||
|
||||
if (isDirectGpuApiEnabled)
|
||||
{
|
||||
PX_ASSERT(constraint.index < gpuConstraintIdMapHost.size());
|
||||
PX_ASSERT(!edgeIndexToGpuConstraintIdMap.find(edgeIndex));
|
||||
|
||||
edgeIndexToGpuConstraintIdMap.insert(edgeIndex, constraint.index);
|
||||
|
||||
gpuConstraintIdMapHost[constraint.index].setJointDataId(jointDataId);
|
||||
|
||||
isGpuConstraintIdMapDirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
static PX_FORCE_INLINE void addJointCpu(const Dy::Constraint& constraint, PxU32 edgeIndex, PxU32 uniqueId,
|
||||
PxArray<const Dy::Constraint*>& constraintList,
|
||||
PxHashMap<PxU32, PxU32>& constraintIndexMap,
|
||||
PxArray<PxU32>& constraintEdgeIndices,
|
||||
PxArray<PxU32>& uniqueIndices,
|
||||
PxArray<PxU32>& jointIndices,
|
||||
PxPinnedArray<PxgSolverConstraintManagerConstants>& managerIter)
|
||||
{
|
||||
const PxU32 index = constraintList.size();
|
||||
//In CPU, we work with Dy::Constraint and fill in PxgConstraintData
|
||||
constraintIndexMap.insert(edgeIndex, index);
|
||||
|
||||
constraintList.pushBack(&constraint);
|
||||
constraintEdgeIndices.pushBack(edgeIndex);
|
||||
uniqueIndices.pushBack(uniqueId);
|
||||
|
||||
jointIndices[uniqueId] = index;
|
||||
managerIter[uniqueId].mConstraintWriteBackIndex = constraint.index; // this is the joint writeback index
|
||||
}
|
||||
|
||||
void PxgJointManager::addJoint(PxU32 edgeIndex, const Dy::Constraint* constraint, IG::IslandSim& islandSim, PxArray<PxU32>& jointIndices,
|
||||
PxPinnedArray<PxgSolverConstraintManagerConstants>& managerIter, PxU32 uniqueId)
|
||||
{
|
||||
const PxNodeIndex nodeIndex0 = islandSim.mCpuData.getNodeIndex1(edgeIndex);
|
||||
const PxNodeIndex nodeIndex1 = islandSim.mCpuData.getNodeIndex2(edgeIndex);
|
||||
|
||||
bool isArticulationJoint = nodeIndex0.isArticulation() || nodeIndex1.isArticulation();
|
||||
//d6 joint(articulation + rigid body) with GPU shader
|
||||
if ((constraint->flags & PxConstraintFlag::eGPU_COMPATIBLE) && GPU_JOINT_PREP)
|
||||
{
|
||||
//GPU shader
|
||||
if (isArticulationJoint)
|
||||
{
|
||||
addJointGpu(*constraint, edgeIndex, uniqueId, nodeIndex0, nodeIndex1,
|
||||
mGpuArtiJointDataIDPool, mGpuArtiJointData, mGpuArtiJointPrePrep,
|
||||
mDirtyGPUArtiJointDataIndices, mGpuArtiConstraintIndices,
|
||||
jointIndices, managerIter, islandSim,
|
||||
mGpuConstraintIdMapHost, mEdgeIndexToGpuConstraintIdMap,
|
||||
mIsGpuConstraintIdMapDirty, mIsDirectGpuApiEnabled);
|
||||
}
|
||||
else
|
||||
{
|
||||
addJointGpu(*constraint, edgeIndex, uniqueId, nodeIndex0, nodeIndex1,
|
||||
mGpuRigidJointDataIDPool, mGpuRigidJointData, mGpuRigidJointPrePrep,
|
||||
mDirtyGPURigidJointDataIndices, mGpuRigidConstraintIndices,
|
||||
jointIndices, managerIter, islandSim,
|
||||
mGpuConstraintIdMapHost, mEdgeIndexToGpuConstraintIdMap,
|
||||
mIsGpuConstraintIdMapDirty, mIsDirectGpuApiEnabled);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
//CPU
|
||||
if (isArticulationJoint)
|
||||
{
|
||||
addJointCpu(*constraint, edgeIndex, uniqueId, mCpuArtiConstraints, mCpuArtiConstraintIndices,
|
||||
mCpuArtiConstraintEdgeIndices, mCpuArtiUniqueIndex, jointIndices, managerIter);
|
||||
}
|
||||
else
|
||||
{
|
||||
addJointCpu(*constraint, edgeIndex, uniqueId, mCpuRigidConstraints, mCpuRigidConstraintIndices,
|
||||
mCpuRigidConstraintEdgeIndices, mCpuRigidUniqueIndex, jointIndices, managerIter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static PX_FORCE_INLINE bool updateJointGpu(const Dy::Constraint& constraint, PxU32 edgeIndex,
|
||||
const PxHashMap<PxU32, PxU32>& constraintIndexMap,
|
||||
PxPinnedArray<PxgD6JointData>& jointDataEntries,
|
||||
PxPinnedArray<PxgConstraintPrePrep>& constraintPrePrepEntries,
|
||||
PxInt32ArrayPinned& dirtyJointDataIndices)
|
||||
{
|
||||
const PxPair<const PxU32, PxU32>* pair = constraintIndexMap.find(edgeIndex);
|
||||
|
||||
//ML:: if pair is NULL, this means the pair this constraint connect to asleep. In this case, we will let the updateIncrementalIslands to
|
||||
//activate this pair of objects and create new gpu joint data in addJoint(). Otherwise, we need to update the jointData and push the jointDataId to the dirty
|
||||
//list
|
||||
if (pair)
|
||||
{
|
||||
const PxU32 jointDataId = pair->second;
|
||||
|
||||
PxgD6JointData jointDataCopy = *reinterpret_cast<const PxgD6JointData*>(constraint.constantBlock);
|
||||
|
||||
jointDataEntries[jointDataId] = jointDataCopy;
|
||||
dirtyJointDataIndices.pushBack(jointDataId);
|
||||
|
||||
::setupConstraintPrepPrep(constraintPrePrepEntries[jointDataId], &constraint);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void PxgJointManager::updateJoint(PxU32 edgeIndex, const Dy::Constraint* constraint)
|
||||
{
|
||||
if (constraint->flags & PxConstraintFlag::eGPU_COMPATIBLE && GPU_JOINT_PREP)
|
||||
{
|
||||
if (updateJointGpu(*constraint, edgeIndex, mGpuRigidConstraintIndices,
|
||||
mGpuRigidJointData, mGpuRigidJointPrePrep, mDirtyGPURigidJointDataIndices))
|
||||
{
|
||||
// if a joint does not involve an articulation link, it should not be
|
||||
// tracked in mGpuArtiConstraintIndices. Keep in mind though that this
|
||||
// function might get called for joints that are asleep, in which case
|
||||
// it will not be tracked in either of the two maps.
|
||||
|
||||
PX_ASSERT(mGpuArtiConstraintIndices.find(edgeIndex) == NULL);
|
||||
}
|
||||
else
|
||||
{
|
||||
updateJointGpu(*constraint, edgeIndex, mGpuArtiConstraintIndices,
|
||||
mGpuArtiJointData, mGpuArtiJointPrePrep, mDirtyGPUArtiJointDataIndices);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PxU32 PxgJointManager::getGpuNbRigidConstraints()
|
||||
{
|
||||
return mGpuRigidJointDataIDPool.getMaxID();
|
||||
}
|
||||
|
||||
PxU32 PxgJointManager::getGpuNbArtiConstraints()
|
||||
{
|
||||
return mGpuArtiJointDataIDPool.getMaxID();
|
||||
}
|
||||
|
||||
PxU32 PxgJointManager::getGpuNbActiveRigidConstraints()
|
||||
{
|
||||
return mGpuRigidJointDataIDPool.getNumUsedID();
|
||||
}
|
||||
|
||||
PxU32 PxgJointManager::getGpuNbActiveArtiConstraints()
|
||||
{
|
||||
return mGpuArtiJointDataIDPool.getNumUsedID();
|
||||
}
|
||||
|
||||
PxU32 PxgJointManager::getCpuNbRigidConstraints()
|
||||
{
|
||||
return mCpuRigidConstraints.size();
|
||||
}
|
||||
|
||||
PxU32 PxgJointManager::getCpuNbArtiConstraints()
|
||||
{
|
||||
return mCpuArtiConstraints.size();
|
||||
}
|
||||
|
||||
void PxgJointManager::update(PxArray<PxU32>& jointOutputIndex)
|
||||
{
|
||||
PX_PROFILE_ZONE("PxgJointManager.update", 0);
|
||||
|
||||
//update constraints transform
|
||||
const PxU32 nbGpuRigidConstraints = mGpuRigidJointDataIDPool.getMaxID() ;//mGpuConstraints.size();
|
||||
|
||||
//reassign cpu rigid index
|
||||
const PxU32 nbCpuRigidConstraints = mCpuRigidConstraints.size();
|
||||
for (PxU32 i = 0; i < nbCpuRigidConstraints; ++i)
|
||||
{
|
||||
jointOutputIndex[mCpuRigidUniqueIndex[i]] = nbGpuRigidConstraints + i;
|
||||
}
|
||||
|
||||
const PxU32 nbGpuArtiConstraints = mGpuArtiJointDataIDPool.getMaxID();
|
||||
|
||||
//reassign cpu rigid index
|
||||
const PxU32 nbCpuArtiConstraints = mCpuArtiConstraints.size();
|
||||
for (PxU32 i = 0; i < nbCpuArtiConstraints; ++i)
|
||||
{
|
||||
jointOutputIndex[mCpuArtiUniqueIndex[i]] = nbGpuArtiConstraints + i;
|
||||
}
|
||||
}
|
||||
|
||||
void PxgJointManager::reset()
|
||||
{
|
||||
mDirtyGPURigidJointDataIndices.resize(0);
|
||||
mDirtyGPUArtiJointDataIndices.resize(0);
|
||||
}
|
||||
135
engine/third_party/physx/source/gpusimulationcontroller/src/PxgNonRigidCoreCommon.cpp
vendored
Normal file
135
engine/third_party/physx/source/gpusimulationcontroller/src/PxgNonRigidCoreCommon.cpp
vendored
Normal file
@@ -0,0 +1,135 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "PxgNonRigidCoreCommon.h"
|
||||
#include "PxgCudaMemoryAllocator.h"
|
||||
#include "cudamanager/PxCudaContext.h"
|
||||
#include "cudamanager/PxCudaContextManager.h"
|
||||
#include "PxgRadixSortKernelIndices.h"
|
||||
#include "common/PxPhysXCommonConfig.h"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
PxgEssentialCore::PxgEssentialCore(PxgCudaKernelWranglerManager* gpuKernelWrangler, PxCudaContextManager* cudaContextManager,
|
||||
PxgHeapMemoryAllocatorManager* heapMemoryManager, PxgSimulationController* simController, PxgGpuContext* gpuContext) :
|
||||
mGpuKernelWranglerManager(gpuKernelWrangler),
|
||||
mCudaContextManager(cudaContextManager),
|
||||
mCudaContext(cudaContextManager->getCudaContext()),
|
||||
mHeapMemoryManager(heapMemoryManager),
|
||||
mSimController(simController),
|
||||
mGpuContext(gpuContext),
|
||||
mStream(0)
|
||||
{
|
||||
}
|
||||
|
||||
PxgNonRigidCore::PxgNonRigidCore(PxgCudaKernelWranglerManager* gpuKernelWrangler, PxCudaContextManager* cudaContextManager,
|
||||
PxgHeapMemoryAllocatorManager* heapMemoryManager, PxgSimulationController* simController, PxgGpuContext* gpuContext,
|
||||
const PxU32 maxContacts, const PxU32 collisionStackSize, PxsHeapStats::Enum statType) :
|
||||
PxgEssentialCore(gpuKernelWrangler, cudaContextManager, heapMemoryManager, simController, gpuContext),
|
||||
mIntermStackAlloc(*heapMemoryManager->mDeviceMemoryAllocators, collisionStackSize),
|
||||
mStackSizeNeededOnDevice(heapMemoryManager, statType),
|
||||
mStackSizeNeededPinned(NULL),
|
||||
mMaxContacts(maxContacts),
|
||||
mCollisionStackSizeBytes(collisionStackSize),
|
||||
mRSDesc(heapMemoryManager->mMappedMemoryAllocators),
|
||||
mRadixSortDescBuf(heapMemoryManager, statType),
|
||||
mRadixCountTotalBuf(heapMemoryManager, statType),
|
||||
mContactByRigidBuf(heapMemoryManager, statType),
|
||||
mContactSortedByRigidBuf(heapMemoryManager, statType),
|
||||
mTempContactByRigidBitBuf(heapMemoryManager, statType),
|
||||
mContactRemapSortedByRigidBuf(heapMemoryManager, statType),
|
||||
mContactSortedByParticleBuf(heapMemoryManager, statType),
|
||||
mTempContactByParticleBitBuf(heapMemoryManager, statType),
|
||||
mContactRemapSortedByParticleBuf(heapMemoryManager, statType),
|
||||
mTempContactBuf(heapMemoryManager, statType),
|
||||
mTempContactRemapBuf(heapMemoryManager, statType),
|
||||
mTempContactBuf2(heapMemoryManager, statType),
|
||||
mTempContactRemapBuf2(heapMemoryManager, statType),
|
||||
#if PX_ENABLE_SIM_STATS
|
||||
mCollisionStackSizeBytesStats(0)
|
||||
#else
|
||||
PX_CATCH_UNDEFINED_ENABLE_SIM_STATS
|
||||
#endif
|
||||
{
|
||||
mStackSizeNeededOnDevice.allocateElements(1, PX_FL);
|
||||
mStackSizeNeededPinned = PX_PINNED_MEMORY_ALLOC(PxU32, *mCudaContextManager, 1);
|
||||
*mStackSizeNeededPinned = 0;
|
||||
|
||||
mRadixCountSize = sizeof(PxU32) * PxgRadixSortKernelGridDim::RADIX_SORT * 16;
|
||||
|
||||
const PxU32 maxContactsRoundedUp4 = (mMaxContacts + 3) & ~(4 - 1);
|
||||
|
||||
mContactByRigidBuf.allocateElements(mMaxContacts, PX_FL); // PxNodeIndex, no radix sort.
|
||||
mContactSortedByRigidBuf.allocateElements(mMaxContacts, PX_FL); // PxNodeIndex, no radix sort.
|
||||
mTempContactByRigidBitBuf.allocateElements(maxContactsRoundedUp4, PX_FL); // PxU32, used in radix sort
|
||||
mContactRemapSortedByRigidBuf.allocateElements(maxContactsRoundedUp4, PX_FL); // PxU32, used in radix sort
|
||||
|
||||
mContactSortedByParticleBuf.allocateElements((mMaxContacts + 1) & ~(2 - 1), PX_FL);
|
||||
mTempContactByParticleBitBuf.allocateElements(maxContactsRoundedUp4, PX_FL); // PxU32, used in radix sort
|
||||
mContactRemapSortedByParticleBuf.allocateElements(maxContactsRoundedUp4, PX_FL); // PxU32, used in radix sort
|
||||
|
||||
mTempContactBuf.allocateElements(maxContactsRoundedUp4, PX_FL); // PxU32, used in radix sort
|
||||
mTempContactRemapBuf.allocateElements(maxContactsRoundedUp4, PX_FL); // PxU32, used in radix sort
|
||||
mTempContactBuf2.allocateElements(maxContactsRoundedUp4, PX_FL); // PxU32, used in radix sort
|
||||
mTempContactRemapBuf2.allocateElements(maxContactsRoundedUp4, PX_FL); // PxU32, used in radix sort
|
||||
}
|
||||
|
||||
PxgNonRigidCore::~PxgNonRigidCore()
|
||||
{
|
||||
mCudaContextManager->acquireContext();
|
||||
|
||||
PX_PINNED_MEMORY_FREE(*mCudaContextManager, mStackSizeNeededPinned);
|
||||
|
||||
//destroy stream
|
||||
mCudaContext->streamDestroy(mStream);
|
||||
mStream = NULL;
|
||||
|
||||
mCudaContextManager->releaseContext();
|
||||
}
|
||||
|
||||
void PxgNonRigidCore::updateGPURadixSortBlockDesc(CUstream stream, CUdeviceptr inputKeyd, CUdeviceptr inputRankd,
|
||||
CUdeviceptr outputKeyd, CUdeviceptr outputRankd, CUdeviceptr radixCountd, CUdeviceptr numKeysd,
|
||||
PxgRadixSortBlockDesc* rsDescs, CUdeviceptr radixSortDescBuf0, CUdeviceptr radixSortDescBuf1)
|
||||
{
|
||||
rsDescs[0].inputKeys = reinterpret_cast<PxU32*>(inputKeyd);
|
||||
rsDescs[0].inputRanks = reinterpret_cast<PxU32*>(inputRankd);
|
||||
rsDescs[0].outputKeys = reinterpret_cast<PxU32*>(outputKeyd);
|
||||
rsDescs[0].outputRanks = reinterpret_cast<PxU32*>(outputRankd);
|
||||
rsDescs[0].radixBlockCounts = reinterpret_cast<PxU32*>(radixCountd);
|
||||
rsDescs[0].numKeys = reinterpret_cast<PxU32*>(numKeysd);
|
||||
|
||||
rsDescs[1].outputKeys = reinterpret_cast<PxU32*>(inputKeyd);
|
||||
rsDescs[1].outputRanks = reinterpret_cast<PxU32*>(inputRankd);
|
||||
rsDescs[1].inputKeys = reinterpret_cast<PxU32*>(outputKeyd);
|
||||
rsDescs[1].inputRanks = reinterpret_cast<PxU32*>(outputRankd);
|
||||
rsDescs[1].radixBlockCounts = reinterpret_cast<PxU32*>(radixCountd);
|
||||
rsDescs[1].numKeys = reinterpret_cast<PxU32*>(numKeysd);
|
||||
|
||||
mCudaContext->memcpyHtoDAsync(radixSortDescBuf0, (void*)&rsDescs[0], sizeof(PxgRadixSortBlockDesc), stream);
|
||||
mCudaContext->memcpyHtoDAsync(radixSortDescBuf1, (void*)&rsDescs[1], sizeof(PxgRadixSortBlockDesc), stream);
|
||||
}
|
||||
2304
engine/third_party/physx/source/gpusimulationcontroller/src/PxgPBDParticleSystemCore.cpp
vendored
Normal file
2304
engine/third_party/physx/source/gpusimulationcontroller/src/PxgPBDParticleSystemCore.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
77
engine/third_party/physx/source/gpusimulationcontroller/src/PxgParticleNeighborhood.cpp
vendored
Normal file
77
engine/third_party/physx/source/gpusimulationcontroller/src/PxgParticleNeighborhood.cpp
vendored
Normal file
@@ -0,0 +1,77 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "PxgParticleNeighborhoodProvider.h"
|
||||
#include "PxgAlgorithms.h"
|
||||
#include "PxgSparseGridStandalone.h"
|
||||
|
||||
#include "PxgAnisotropyData.h"
|
||||
|
||||
#include "PxPhysics.h"
|
||||
#include "PxParticleSystem.h"
|
||||
#include "foundation/PxUserAllocated.h"
|
||||
|
||||
#include "PxParticleGpu.h"
|
||||
#include "foundation/PxHashSet.h"
|
||||
|
||||
#include "PxParticleGpu.h"
|
||||
#include "PxgParticleNeighborhoodProvider.h"
|
||||
|
||||
#include "PxPhysXGpu.h"
|
||||
#include "PxvGlobals.h"
|
||||
#include "PxgKernelIndices.h"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
PxgParticleNeighborhoodProvider::PxgParticleNeighborhoodProvider(PxgKernelLauncher& cudaContextManager, const PxU32 maxNumParticles, const PxReal particleContactOffset, const PxU32 maxNumSparseGridCells)
|
||||
{
|
||||
mKernelLauncher = cudaContextManager;
|
||||
|
||||
PxSparseGridParams p;
|
||||
p.maxNumSubgrids = maxNumSparseGridCells;
|
||||
p.gridSpacing = 2.0f * particleContactOffset;
|
||||
p.subgridSizeX = 1;
|
||||
p.subgridSizeY = 1;
|
||||
p.subgridSizeZ = 1;
|
||||
mSparseGridBuilder.initialize(&mKernelLauncher, p, maxNumParticles, 0, true);
|
||||
}
|
||||
|
||||
void PxgParticleNeighborhoodProvider::setCellProperties(PxU32 maxGridCells, PxReal cellSize)
|
||||
{
|
||||
PxU32 maxNumParticles = mSparseGridBuilder.getMaxParticles();
|
||||
|
||||
mSparseGridBuilder.release();
|
||||
|
||||
PxSparseGridParams p;
|
||||
p.maxNumSubgrids = maxGridCells;
|
||||
p.gridSpacing = cellSize;
|
||||
p.subgridSizeX = 1;
|
||||
p.subgridSizeY = 1;
|
||||
p.subgridSizeZ = 1;
|
||||
mSparseGridBuilder.initialize(&mKernelLauncher, p, maxNumParticles, 0, true);
|
||||
}
|
||||
2750
engine/third_party/physx/source/gpusimulationcontroller/src/PxgParticleSystemCore.cpp
vendored
Normal file
2750
engine/third_party/physx/source/gpusimulationcontroller/src/PxgParticleSystemCore.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
197
engine/third_party/physx/source/gpusimulationcontroller/src/PxgRadixSortCore.cpp
vendored
Normal file
197
engine/third_party/physx/source/gpusimulationcontroller/src/PxgRadixSortCore.cpp
vendored
Normal file
@@ -0,0 +1,197 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "PxgRadixSortCore.h"
|
||||
#include "CudaKernelWrangler.h"
|
||||
#include "PxgKernelWrangler.h"
|
||||
#include "CudaContextManager.h"
|
||||
#include "cudamanager/PxCudaContext.h"
|
||||
#include "PxgKernelIndices.h"
|
||||
#include "PxgRadixSortKernelIndices.h"
|
||||
#include "PxParticleSystem.h"
|
||||
#include "PxgParticleSystemCoreKernelIndices.h"
|
||||
#include "DyParticleSystem.h"
|
||||
#include "PxgKernelLauncher.h"
|
||||
|
||||
#define PS_GPU_SPARSE_GRID_CORE_DEBUG 0
|
||||
|
||||
using namespace physx;
|
||||
|
||||
PxgRadixSortCore::PxgRadixSortCore(PxgEssentialCore* core) :
|
||||
mRSDesc(core->mHeapMemoryManager->mMappedMemoryAllocators),
|
||||
mRadixSortDescBuf(core->mHeapMemoryManager, PxsHeapStats::eSHARED_PARTICLES),
|
||||
mRadixCountTotalBuf(core->mHeapMemoryManager, PxsHeapStats::eSHARED_PARTICLES)
|
||||
{
|
||||
mEssentialCore = core;
|
||||
}
|
||||
|
||||
void PxgRadixSortCore::allocate(PxU32 nbRequired)
|
||||
{
|
||||
mRSDesc.resize(nbRequired * 2u);
|
||||
mRadixCountSize = sizeof(PxU32) * PxgRadixSortKernelGridDim::RADIX_SORT * 16;
|
||||
mRadixCountTotalBuf.allocate(mRadixCountSize * nbRequired, PX_FL);
|
||||
|
||||
for (PxU32 i = 0; i < 2; ++i)
|
||||
{
|
||||
mRadixSortDescBuf[i].allocate(sizeof(PxgRadixSortBlockDesc)*nbRequired, PX_FL);
|
||||
}
|
||||
}
|
||||
|
||||
void PxgRadixSortCore::updateGPURadixSortDesc(PxCudaContext*mCudaContext, const CUstream& stream, CUdeviceptr inputKeyd, CUdeviceptr inputRankd,
|
||||
CUdeviceptr outputKeyd, CUdeviceptr outputRankd, CUdeviceptr radixCountd, PxgRadixSortDesc* rsDescs,
|
||||
CUdeviceptr radixSortDescBuf0, CUdeviceptr radixSortDescBuf1, const PxU32 count)
|
||||
{
|
||||
rsDescs[0].inputKeys = reinterpret_cast<PxU32*>(inputKeyd);
|
||||
rsDescs[0].inputRanks = reinterpret_cast<PxU32*>(inputRankd);
|
||||
rsDescs[0].outputKeys = reinterpret_cast<PxU32*>(outputKeyd);
|
||||
rsDescs[0].outputRanks = reinterpret_cast<PxU32*>(outputRankd);
|
||||
rsDescs[0].radixBlockCounts = reinterpret_cast<PxU32*>(radixCountd);
|
||||
rsDescs[0].count = count;
|
||||
|
||||
rsDescs[1].outputKeys = reinterpret_cast<PxU32*>(inputKeyd);
|
||||
rsDescs[1].outputRanks = reinterpret_cast<PxU32*>(inputRankd);
|
||||
rsDescs[1].inputKeys = reinterpret_cast<PxU32*>(outputKeyd);
|
||||
rsDescs[1].inputRanks = reinterpret_cast<PxU32*>(outputRankd);
|
||||
rsDescs[1].radixBlockCounts = reinterpret_cast<PxU32*>(radixCountd);
|
||||
rsDescs[1].count = count;
|
||||
|
||||
mCudaContext->memcpyHtoDAsync(radixSortDescBuf0, (void*)&rsDescs[0], sizeof(PxgRadixSortDesc), stream);
|
||||
mCudaContext->memcpyHtoDAsync(radixSortDescBuf1, (void*)&rsDescs[1], sizeof(PxgRadixSortDesc), stream);
|
||||
}
|
||||
|
||||
void PxgRadixSortCore::sort(PxgCudaKernelWranglerManager* mGpuKernelWranglerManager, PxCudaContext*mCudaContext,
|
||||
const CUstream& stream, PxgCudaBuffer* radixSortDescBuf, const PxU32 numBits)
|
||||
{
|
||||
CUfunction radixFunction = mGpuKernelWranglerManager->getKernelWrangler()->getCuFunction(PxgKernelIds::RS_MULTIBLOCK);
|
||||
CUfunction calculateRanksFunction = mGpuKernelWranglerManager->getKernelWrangler()->getCuFunction(PxgKernelIds::RS_CALCULATERANKS_MULTIBLOCK);
|
||||
|
||||
PxU32 startBit = 0;
|
||||
PxU32 numPass = (numBits + 3) / 4;
|
||||
numPass += numPass & 1; // ensure even number of passes to have results in final buffer
|
||||
|
||||
for (PxU32 i = 0; i < numPass; ++i)
|
||||
{
|
||||
const PxU32 descIndex = i & 1;
|
||||
|
||||
CUdeviceptr rsDesc = radixSortDescBuf[descIndex].getDevicePtr();
|
||||
|
||||
PxCudaKernelParam radixSortKernelParams[] =
|
||||
{
|
||||
PX_CUDA_KERNEL_PARAM(rsDesc),
|
||||
PX_CUDA_KERNEL_PARAM(startBit)
|
||||
};
|
||||
|
||||
CUresult resultR = mCudaContext->launchKernel(radixFunction, PxgRadixSortKernelGridDim::RADIX_SORT, 1, 1, PxgRadixSortKernelBlockDim::RADIX_SORT, 1, 1, 0, stream, radixSortKernelParams, sizeof(radixSortKernelParams), 0, PX_FL);
|
||||
if (resultR != CUDA_SUCCESS)
|
||||
PxGetFoundation().error(PxErrorCode::eINTERNAL_ERROR, PX_FL, "GPU sortParticles fail to launch kernel!!\n");
|
||||
|
||||
resultR = mCudaContext->launchKernel(calculateRanksFunction, PxgRadixSortKernelGridDim::RADIX_SORT, 1, 1, PxgRadixSortKernelBlockDim::RADIX_SORT, 1, 1, 0, stream, radixSortKernelParams, sizeof(radixSortKernelParams), 0, PX_FL);
|
||||
if (resultR != CUDA_SUCCESS)
|
||||
PxGetFoundation().error(PxErrorCode::eINTERNAL_ERROR, PX_FL, "GPU sortParticles fail to launch kernel!!\n");
|
||||
|
||||
startBit += 4;
|
||||
}
|
||||
|
||||
#if PS_GPU_DEBUG
|
||||
CUresult result = mCudaContext->streamSynchronize(mStream);
|
||||
if (result != CUDA_SUCCESS)
|
||||
PxGetFoundation().error(PxErrorCode::eINTERNAL_ERROR, PX_FL, "GPU sort fail!!\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
void PxgRadixSortCore::sort(PxgCudaKernelWranglerManager* mGpuKernelWranglerManager, PxCudaContext*mCudaContext, const CUstream& stream,
|
||||
const PxU32 numOfKeys, PxgCudaBuffer* radixSortDescBuf, const PxU32 numBits, PxgRadixSortDesc* rsDescs)
|
||||
{
|
||||
CUfunction radixFunction = mGpuKernelWranglerManager->getKernelWrangler()->getCuFunction(PxgKernelIds::RS_MULTIBLOCK_NO_COUNT);
|
||||
CUfunction calculateRanksFunction = mGpuKernelWranglerManager->getKernelWrangler()->getCuFunction(PxgKernelIds::RS_CALCULATERANKS_MULTIBLOCK_NO_COUNT);
|
||||
|
||||
PxU32 startBit = 0;
|
||||
const PxU32 numPass = (numBits + 3) / 4;
|
||||
|
||||
for (PxU32 i = 0; i < numPass; ++i)
|
||||
{
|
||||
const PxU32 descIndex = i & 1;
|
||||
|
||||
CUdeviceptr rsDesc = radixSortDescBuf[descIndex].getDevicePtr();
|
||||
|
||||
PxCudaKernelParam radixSortKernelParams[] =
|
||||
{
|
||||
PX_CUDA_KERNEL_PARAM(rsDesc),
|
||||
PX_CUDA_KERNEL_PARAM(startBit)
|
||||
};
|
||||
|
||||
CUresult resultR = mCudaContext->launchKernel(radixFunction, PxgRadixSortKernelGridDim::RADIX_SORT, 1, 1, PxgRadixSortKernelBlockDim::RADIX_SORT, 1, 1, 0, stream, radixSortKernelParams, sizeof(radixSortKernelParams), 0, PX_FL);
|
||||
if (resultR != CUDA_SUCCESS)
|
||||
PxGetFoundation().error(PxErrorCode::eINTERNAL_ERROR, PX_FL, "GPU sortParticles fail to launch kernel!!\n");
|
||||
|
||||
resultR = mCudaContext->launchKernel(calculateRanksFunction, PxgRadixSortKernelGridDim::RADIX_SORT, 1, 1, PxgRadixSortKernelBlockDim::RADIX_SORT, 1, 1, 0, stream, radixSortKernelParams, sizeof(radixSortKernelParams), 0, PX_FL);
|
||||
if (resultR != CUDA_SUCCESS)
|
||||
PxGetFoundation().error(PxErrorCode::eINTERNAL_ERROR, PX_FL, "GPU sortParticles fail to launch kernel!!\n");
|
||||
|
||||
startBit += 4;
|
||||
}
|
||||
|
||||
if (numPass & 1)
|
||||
{
|
||||
//Odd number of passes performed, sorted results are in temp buffer, so copy data across to final buffer
|
||||
mCudaContext->memcpyDtoDAsync(CUdeviceptr(rsDescs[1].outputKeys), CUdeviceptr(rsDescs[1].inputKeys), sizeof(PxU32)*numOfKeys, stream);
|
||||
mCudaContext->memcpyDtoDAsync(CUdeviceptr(rsDescs[1].outputRanks), CUdeviceptr(rsDescs[1].inputRanks), sizeof(PxU32)*numOfKeys, stream);
|
||||
}
|
||||
|
||||
/*CUresult result = mCudaContext->streamSynchronize(stream);
|
||||
if (result != CUDA_SUCCESS)
|
||||
PxGetFoundation().error(PxErrorCode::eINTERNAL_ERROR, PX_FL, "GPU sortParticles fail!!\n");*/
|
||||
|
||||
#if PS_GPU_SPARSE_GRID_CORE_DEBUG
|
||||
CUresult result = mCudaContext->streamSynchronize(stream);
|
||||
if (result != CUDA_SUCCESS)
|
||||
PxGetFoundation().error(PxErrorCode::eINTERNAL_ERROR, PX_FL, "GPU sortParticles fail!!\n");
|
||||
|
||||
/*PxgParticleSystem* particleSystems = mSimController->getParticleSystems();
|
||||
|
||||
PxgParticleSystem& particleSystem = particleSystems[0];
|
||||
PxgParticleSystemData& data = particleSystem.mData;
|
||||
const PxU32 numParticles = data.mNumParticles;
|
||||
PxArray<PxU32> hash;
|
||||
PxArray<PxU32> particleIndex;
|
||||
hash.reserve(numParticles);
|
||||
hash.forceSize_Unsafe(numParticles);
|
||||
|
||||
particleIndex.reserve(numParticles);
|
||||
particleIndex.forceSize_Unsafe(numParticles);
|
||||
|
||||
CUdeviceptr hashd = reinterpret_cast<CUdeviceptr>(particleSystems[0].mGridParticleHash);
|
||||
CUdeviceptr particleIndexd = reinterpret_cast<CUdeviceptr>(particleSystems[0].mGridParticleIndex);
|
||||
|
||||
mCudaContext->memcpyDtoH(hash.begin(), hashd, sizeof(PxU32) * numParticles);
|
||||
mCudaContext->memcpyDtoH(particleIndex.begin(), particleIndexd, sizeof(PxU32) * numParticles);
|
||||
|
||||
int bob = 0;
|
||||
PX_UNUSED(bob);*/
|
||||
#endif
|
||||
}
|
||||
864
engine/third_party/physx/source/gpusimulationcontroller/src/PxgSDFBuilder.cpp
vendored
Normal file
864
engine/third_party/physx/source/gpusimulationcontroller/src/PxgSDFBuilder.cpp
vendored
Normal file
@@ -0,0 +1,864 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "PxgSDFBuilder.h"
|
||||
|
||||
#include "PxgKernelWrangler.h"
|
||||
#include "PxgKernelIndices.h"
|
||||
#include "PxgCudaMemoryAllocator.h"
|
||||
#include "PxgCudaHelpers.h"
|
||||
|
||||
#include "foundation/PxBounds3.h"
|
||||
#include "foundation/PxErrors.h"
|
||||
#include "foundation/PxFoundation.h"
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
|
||||
#define EXTENDED_DEBUG 0
|
||||
|
||||
using namespace physx;
|
||||
|
||||
// re-allocation policy of 1.5x
|
||||
static PX_FORCE_INLINE PxU32 calculateSlack(PxU32 n)
|
||||
{
|
||||
return n * 3 / 2;
|
||||
}
|
||||
|
||||
void PxgLinearBVHBuilderGPU::resizeBVH(PxgBVH& bvh, PxU32 numNodes)
|
||||
{
|
||||
if (numNodes > bvh.mMaxNodes)
|
||||
{
|
||||
const PxU32 numToAlloc = calculateSlack(numNodes);
|
||||
|
||||
PxCudaContextManager* ccm = mKernelLauncher.getCudaContextManager();
|
||||
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, bvh.mNodeLowers);
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, bvh.mNodeUppers);
|
||||
|
||||
bvh.mNodeLowers = PX_DEVICE_MEMORY_ALLOC(PxgPackedNodeHalf, *ccm, numToAlloc);
|
||||
bvh.mNodeUppers = PX_DEVICE_MEMORY_ALLOC(PxgPackedNodeHalf, *ccm, numToAlloc);
|
||||
|
||||
bvh.mMaxNodes = numToAlloc;
|
||||
|
||||
if (!bvh.mRootNode)
|
||||
{
|
||||
bvh.mRootNode = PX_DEVICE_MEMORY_ALLOC(PxU32, *ccm, 2);
|
||||
}
|
||||
}
|
||||
|
||||
bvh.mNumNodes = numNodes;
|
||||
}
|
||||
|
||||
void PxgLinearBVHBuilderGPU::releaseBVH(PxgBVH& bvh)
|
||||
{
|
||||
PxCudaContextManager* ccm = mKernelLauncher.getCudaContextManager();
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, bvh.mNodeLowers);
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, bvh.mNodeUppers);
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, bvh.mRootNode);
|
||||
}
|
||||
|
||||
PxgLinearBVHBuilderGPU::PxgLinearBVHBuilderGPU(PxgKernelLauncher& kernelLauncher)
|
||||
: mMaxTreeDepth(NULL)
|
||||
, mKernelLauncher(kernelLauncher)
|
||||
, mIndices(NULL)
|
||||
, mKeys(NULL)
|
||||
, mDeltas(NULL)
|
||||
, mRangeLefts(NULL)
|
||||
, mRangeRights(NULL)
|
||||
, mNumChildren(NULL)
|
||||
, mTotalLower(NULL)
|
||||
, mTotalUpper(NULL)
|
||||
, mTotalInvEdges(NULL)
|
||||
, mMaxItems(0)
|
||||
{
|
||||
PxCudaContextManager* ccm = kernelLauncher.getCudaContextManager();
|
||||
|
||||
mTotalLower = PX_DEVICE_MEMORY_ALLOC(PxVec3, *ccm, 1);
|
||||
mTotalUpper = PX_DEVICE_MEMORY_ALLOC(PxVec3, *ccm, 1);
|
||||
mTotalInvEdges = PX_DEVICE_MEMORY_ALLOC(PxVec3, *ccm, 1);
|
||||
}
|
||||
|
||||
void PxgLinearBVHBuilderGPU::allocateOrResize(PxgBVH& bvh, PxU32 numItems)
|
||||
{
|
||||
const PxU32 maxNodes = 2 * numItems;
|
||||
|
||||
resizeBVH(bvh, maxNodes);
|
||||
|
||||
if (numItems > mMaxItems)
|
||||
{
|
||||
const PxU32 itemsToAlloc = (numItems * 3) / 2;
|
||||
const PxU32 nodesToAlloc = (maxNodes * 3) / 2;
|
||||
|
||||
PxCudaContextManager* ccm = mKernelLauncher.getCudaContextManager();
|
||||
|
||||
// reallocate temporary storage if necessary
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, mIndices);
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, mKeys);
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, mDeltas);
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, mRangeLefts);
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, mRangeRights);
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, mNumChildren);
|
||||
|
||||
PX_PINNED_MEMORY_FREE(*ccm, mMaxTreeDepth);
|
||||
|
||||
mIndices = PX_DEVICE_MEMORY_ALLOC(PxU32, *ccm, itemsToAlloc);
|
||||
mKeys = PX_DEVICE_MEMORY_ALLOC(PxI32, *ccm, itemsToAlloc);
|
||||
mDeltas = PX_DEVICE_MEMORY_ALLOC(PxReal, *ccm, itemsToAlloc);// highest differenting bit between keys for item i and i+1
|
||||
mRangeLefts = PX_DEVICE_MEMORY_ALLOC(PxI32, *ccm, nodesToAlloc);
|
||||
mRangeRights = PX_DEVICE_MEMORY_ALLOC(PxI32, *ccm, nodesToAlloc);
|
||||
mNumChildren = PX_DEVICE_MEMORY_ALLOC(PxI32, *ccm, nodesToAlloc);
|
||||
|
||||
mMaxTreeDepth = PX_PINNED_MEMORY_ALLOC(PxI32, *ccm, 1);
|
||||
|
||||
mMaxItems = itemsToAlloc;
|
||||
|
||||
mSort.release();
|
||||
mSort.initialize(&mKernelLauncher, numItems);
|
||||
}
|
||||
}
|
||||
|
||||
void PxgLinearBVHBuilderGPU::release()
|
||||
{
|
||||
PxCudaContextManager* ccm = mKernelLauncher.getCudaContextManager();
|
||||
|
||||
mSort.release();
|
||||
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, mIndices);
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, mKeys);
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, mDeltas);
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, mRangeLefts);
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, mRangeRights);
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, mNumChildren);
|
||||
|
||||
PX_PINNED_MEMORY_FREE(*ccm, mMaxTreeDepth);
|
||||
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, mTotalLower);
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, mTotalUpper);
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, mTotalInvEdges);
|
||||
|
||||
mMaxItems = 0;
|
||||
}
|
||||
|
||||
void PxgLinearBVHBuilderGPU::buildFromTriangles(PxgBVH& bvh, const PxVec3* vertices, const PxU32* triangleIndices, const PxI32* itemPriorities, PxI32 numItems, PxBounds3* totalBounds, CUstream stream, PxReal boxMargin)
|
||||
{
|
||||
allocateOrResize(bvh, numItems);
|
||||
|
||||
//Since maxNodes is 2*numItems, the second half of bvh.mNodeLowers and bvh.mNodeUppers
|
||||
//Can be used as scratch memory until the BuildHierarchy kernel gets launched
|
||||
PX_COMPILE_TIME_ASSERT(sizeof(PxVec4) == sizeof(PxgPackedNodeHalf));
|
||||
PxVec4* itemLowers = reinterpret_cast<PxVec4*>(&bvh.mNodeLowers[numItems]);
|
||||
PxVec4* itemUppers = reinterpret_cast<PxVec4*>(&bvh.mNodeUppers[numItems]);
|
||||
|
||||
PxU32 kNumThreadsPerBlock = PxgBVHKernelBlockDim::BUILD_HIERARCHY;
|
||||
PxU32 kNumBlocks = (numItems + kNumThreadsPerBlock - 1) / kNumThreadsPerBlock;
|
||||
mKernelLauncher.launchKernelPtr(PxgKernelIds::bvh_ComputeTriangleBounds, kNumBlocks, kNumThreadsPerBlock, 0, stream,
|
||||
&vertices, &triangleIndices, &numItems, &itemLowers, &itemUppers, &boxMargin);
|
||||
|
||||
prepareHierarchConstruction(bvh, itemLowers, itemUppers, itemPriorities, numItems, totalBounds, stream);
|
||||
|
||||
kNumThreadsPerBlock = PxgBVHKernelBlockDim::BUILD_HIERARCHY;
|
||||
kNumBlocks = (numItems + kNumThreadsPerBlock - 1) / kNumThreadsPerBlock;
|
||||
mKernelLauncher.launchKernelPtr(PxgKernelIds::bvh_BuildHierarchy, kNumBlocks, kNumThreadsPerBlock, 0, stream,
|
||||
&numItems, &bvh.mRootNode, &mMaxTreeDepth, &mDeltas, &mNumChildren, &mRangeLefts, &mRangeRights, &bvh.mNodeLowers, &bvh.mNodeUppers);
|
||||
}
|
||||
|
||||
void PxgLinearBVHBuilderGPU::buildTreeAndWindingClustersFromTriangles(PxgBVH& bvh, PxgWindingClusterApproximation* windingNumberClustersD, const PxVec3* vertices, const PxU32* triangleIndices, const PxI32* itemPriorities,
|
||||
PxI32 numItems, PxBounds3* totalBounds, CUstream stream, PxReal boxMargin, bool skipAllocate)
|
||||
{
|
||||
if (!skipAllocate)
|
||||
allocateOrResize(bvh, numItems);
|
||||
|
||||
//Since maxNodes is 2*numItems, the second half of bvh.mNodeLowers and bvh.mNodeUppers
|
||||
//Can be used as scratch memory until the BuildHierarchy kernel gets launched
|
||||
PX_COMPILE_TIME_ASSERT(sizeof(PxVec4) == sizeof(PxgPackedNodeHalf));
|
||||
PxVec4* itemLowers = reinterpret_cast<PxVec4*>(&bvh.mNodeLowers[numItems]);
|
||||
PxVec4* itemUppers = reinterpret_cast<PxVec4*>(&bvh.mNodeUppers[numItems]);
|
||||
|
||||
const PxU32 kNumThreadsPerBlock = PxgBVHKernelBlockDim::BUILD_HIERARCHY;
|
||||
const PxU32 kNumBlocks = (numItems + kNumThreadsPerBlock - 1) / kNumThreadsPerBlock;
|
||||
|
||||
mKernelLauncher.launchKernelPtr(PxgKernelIds::bvh_ComputeTriangleBounds, kNumBlocks, kNumThreadsPerBlock, 0, stream,
|
||||
&vertices, &triangleIndices, &numItems, &itemLowers, &itemUppers, &boxMargin);
|
||||
|
||||
prepareHierarchConstruction(bvh, itemLowers, itemUppers, itemPriorities, numItems, totalBounds, stream);
|
||||
|
||||
mKernelLauncher.launchKernelPtr(PxgKernelIds::bvh_BuildHierarchyAndWindingClusters, kNumBlocks, kNumThreadsPerBlock, 0, stream,
|
||||
&numItems, &bvh.mRootNode, &mMaxTreeDepth, &mDeltas, &mNumChildren, &mRangeLefts, &mRangeRights, &bvh.mNodeLowers, &bvh.mNodeUppers,
|
||||
&windingNumberClustersD, &vertices, &triangleIndices);
|
||||
|
||||
#if EXTENDED_DEBUG
|
||||
bool debugTree = false;
|
||||
if (debugTree)
|
||||
{
|
||||
PxCudaContextManager* ccm = mKernelLauncher.getCudaContextManager();
|
||||
PxCUresult result = ccm->getCudaContext()->streamSynchronize(stream);
|
||||
PX_ASSERT(result == CUDA_SUCCESS);
|
||||
|
||||
PxArray<PxVec4> lowerDebug;
|
||||
lowerDebug.resize(2 * numItems);
|
||||
PxArray<PxVec4> upperDebug;
|
||||
upperDebug.resize(2 * numItems);
|
||||
PxU32 root = 0xFFFFFFFF;
|
||||
|
||||
result = ccm->getCudaContext()->streamSynchronize(stream);
|
||||
PX_ASSERT(result == CUDA_SUCCESS);
|
||||
|
||||
PxgCudaHelpers::copyDToH(*ccm->getCudaContext(), lowerDebug.begin(), reinterpret_cast<PxVec4*>(bvh.mNodeLowers), lowerDebug.size());
|
||||
PxgCudaHelpers::copyDToH(*ccm->getCudaContext(), upperDebug.begin(), reinterpret_cast<PxVec4*>(bvh.mNodeUppers), upperDebug.size());
|
||||
PxgCudaHelpers::copyDToH(*ccm->getCudaContext(), &root, bvh.mRootNode, 1u);
|
||||
|
||||
result = ccm->getCudaContext()->streamSynchronize(stream);
|
||||
PX_ASSERT(result == CUDA_SUCCESS);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void PxgLinearBVHBuilderGPU::buildFromLeaveBounds(PxgBVH& bvh, const PxVec4* itemLowers, const PxVec4* itemUppers, const PxI32* itemPriorities, PxI32 numItems, PxBounds3* totalBounds, CUstream stream, bool skipAllocate)
|
||||
{
|
||||
const PxU32 kNumThreadsPerBlock = PxgBVHKernelBlockDim::BUILD_HIERARCHY;
|
||||
const PxU32 kNumBlocks = (numItems + kNumThreadsPerBlock - 1) / kNumThreadsPerBlock;
|
||||
|
||||
if (!skipAllocate)
|
||||
allocateOrResize(bvh, numItems);
|
||||
|
||||
prepareHierarchConstruction(bvh, itemLowers, itemUppers, itemPriorities, numItems, totalBounds, stream);
|
||||
|
||||
mKernelLauncher.launchKernelPtr(PxgKernelIds::bvh_BuildHierarchy, kNumBlocks, kNumThreadsPerBlock, 0, stream,
|
||||
&numItems, &bvh.mRootNode, &mMaxTreeDepth, &mDeltas, &mNumChildren, &mRangeLefts, &mRangeRights, &bvh.mNodeLowers, &bvh.mNodeUppers);
|
||||
}
|
||||
|
||||
void PxgLinearBVHBuilderGPU::prepareHierarchConstruction(PxgBVH& bvh, const PxVec4* itemLowers, const PxVec4* itemUppers, const PxI32* itemPriorities, PxI32 numItems, PxBounds3* totalBounds, CUstream stream)
|
||||
{
|
||||
const PxU32 maxNodes = 2 * numItems;
|
||||
|
||||
const PxU32 kNumThreadsPerBlock = PxgBVHKernelBlockDim::BUILD_HIERARCHY;
|
||||
const PxU32 kNumBlocks = (numItems + kNumThreadsPerBlock - 1) / kNumThreadsPerBlock;
|
||||
|
||||
PxCudaContextManager* ccm = mKernelLauncher.getCudaContextManager();
|
||||
|
||||
// if total bounds supplied by the host then we just
|
||||
// compute our edge length and upload it to the GPU directly
|
||||
if (totalBounds)
|
||||
{
|
||||
// calculate Morton codes
|
||||
PxVec3 edges = (*totalBounds).getDimensions();
|
||||
edges += PxVec3(0.0001f);
|
||||
|
||||
PxVec3 invEdges = PxVec3(1.0f / edges.x, 1.0f / edges.y, 1.0f / edges.z);
|
||||
|
||||
PxgCudaHelpers::copyHToDAsync(*ccm->getCudaContext(), mTotalLower, &totalBounds->minimum, 1, stream);
|
||||
PxgCudaHelpers::copyHToDAsync(*ccm->getCudaContext(), mTotalUpper, &totalBounds->maximum, 1, stream);
|
||||
PxgCudaHelpers::copyHToDAsync(*ccm->getCudaContext(), mTotalInvEdges, &invEdges, 1, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
static const PxVec3 upper(-FLT_MAX);
|
||||
static const PxVec3 lower(FLT_MAX);
|
||||
|
||||
PxgCudaHelpers::copyHToDAsync(*ccm->getCudaContext(), mTotalLower, &lower, 1, stream);
|
||||
PxgCudaHelpers::copyHToDAsync(*ccm->getCudaContext(), mTotalUpper, &upper, 1, stream);
|
||||
|
||||
// compute the bounds union on the GPU
|
||||
mKernelLauncher.launchKernelPtr(PxgKernelIds::bvh_ComputeTotalBounds, kNumBlocks, kNumThreadsPerBlock, 0, stream,
|
||||
&itemLowers, &itemUppers, &mTotalLower, &mTotalUpper, &numItems);
|
||||
|
||||
// compute the total edge length
|
||||
mKernelLauncher.launchKernelPtr(PxgKernelIds::bvh_ComputeTotalInvEdges, 1, 1, 0, stream,
|
||||
&mTotalLower, &mTotalUpper, &mTotalInvEdges);
|
||||
}
|
||||
|
||||
// assign 30-bit Morton code based on the centroid of each triangle and bounds for each leaf
|
||||
mKernelLauncher.launchKernelPtr(PxgKernelIds::bvh_CalculateMortonCodes, kNumBlocks, kNumThreadsPerBlock, 0, stream,
|
||||
&itemLowers, &itemUppers, &itemPriorities, &numItems, &mTotalLower, &mTotalInvEdges, &mIndices, &mKeys);
|
||||
|
||||
// sort items based on Morton key (note the 32-bit sort key corresponds to the template parameter to Morton3, i.e. 3x9 bit keys combined)
|
||||
mSort.sort(reinterpret_cast<PxU32*>(mKeys), 32, stream, mIndices, numItems);
|
||||
|
||||
// initialize leaf nodes
|
||||
mKernelLauncher.launchKernelPtr(PxgKernelIds::bvh_BuildLeaves, kNumBlocks, kNumThreadsPerBlock, 0, stream,
|
||||
&itemLowers, &itemUppers, &numItems, &mIndices, &mRangeLefts, &mRangeRights, &bvh.mNodeLowers, &bvh.mNodeUppers);
|
||||
|
||||
// calculate deltas between adjacent keys
|
||||
/*mKernelLauncher.launchKernelPtr(PxgKernelIds::bvh_CalculateKeyDeltas, kNumBlocks, kNumThreadsPerBlock, 0, stream,
|
||||
&mKeys, &mDeltas, &numItems);*/
|
||||
mKernelLauncher.launchKernelPtr(PxgKernelIds::bvh_CalculateKeyDeltasSquaredDistance, kNumBlocks, kNumThreadsPerBlock, 0, stream,
|
||||
&mKeys, &mDeltas, &numItems, &bvh.mNodeLowers, &bvh.mNodeUppers);
|
||||
|
||||
// reset children count, this is our atomic counter so we know when an internal node is complete, only used during building
|
||||
PxgCudaHelpers::memsetAsync(*mKernelLauncher.getCudaContextManager()->getCudaContext(), mNumChildren, 0, maxNodes, stream);
|
||||
}
|
||||
|
||||
void PxgSDFBuilder::computeDenseSDF(const PxgBvhTriangleMesh& mesh, const PxgWindingClusterApproximation* windingNumberClustersD,
|
||||
const Gu::GridQueryPointSampler& sampler, PxU32 sizeX, PxU32 sizeY, PxU32 sizeZ, PxReal* sdfDataD, CUstream stream, PxReal* windingNumbersD)
|
||||
{
|
||||
PxCUresult result = CUDA_SUCCESS;
|
||||
|
||||
PxU32 blockDimX = 8;
|
||||
PxU32 blockDimY = 8;
|
||||
PxU32 blockDimZ = 4;
|
||||
PxU32 gridDimX = (sizeX + blockDimX - 1) / blockDimX;
|
||||
PxU32 gridDimY = (sizeY + blockDimY - 1) / blockDimY;
|
||||
PxU32 gridDimZ = (sizeZ + blockDimZ - 1) / blockDimZ;
|
||||
|
||||
bool useHybrid = true;
|
||||
if (useHybrid)
|
||||
{
|
||||
#if EXTENDED_DEBUG
|
||||
bool enableStatistics = false;
|
||||
if (enableStatistics)
|
||||
atomicCounter = PX_DEVICE_MEMORY_ALLOC(PxU32, *mKernelLauncher.getCudaContextManager(), 1);
|
||||
#endif
|
||||
|
||||
result = mKernelLauncher.launchKernelXYZPtr(PxgKernelIds::sdf_CalculateDenseGridHybrid, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, stream,
|
||||
&mesh, &windingNumberClustersD, &sampler, &sizeX, &sizeY, &sizeZ, &sdfDataD);
|
||||
|
||||
#if EXTENDED_DEBUG
|
||||
if (enableStatistics)
|
||||
{
|
||||
result = mKernelLauncher.getCudaContextManager()->getCudaContext()->streamSynchronize(stream);
|
||||
PX_ASSERT(result == CUDA_SUCCESS);
|
||||
|
||||
PxU32 counter;
|
||||
PxgcudaHelpers::copyDToH(*mKernelLauncher.getCudaContextManager(), &counter, atomicCounter, 1);
|
||||
|
||||
result = mKernelLauncher.getCudaContextManager()->getCudaContext()->streamSynchronize(stream);
|
||||
PX_ASSERT(result == CUDA_SUCCESS);
|
||||
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher.getCudaContextManager(), atomicCounter);
|
||||
|
||||
//printf("problematic: %f\n", PxReal(counter) / (sizeX*sizeY*sizeZ));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
result = mKernelLauncher.launchKernelXYZPtr(PxgKernelIds::sdf_CalculateDenseGridBlocks, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, stream,
|
||||
&mesh, &windingNumberClustersD, &sampler, &sizeX, &sizeY, &sizeZ, &sdfDataD, &windingNumbersD);
|
||||
}
|
||||
|
||||
PX_ASSERT(result == CUDA_SUCCESS);
|
||||
}
|
||||
|
||||
PxgSDFBuilder::PxgSDFBuilder(PxgKernelLauncher& kernelLauncher) : mKernelLauncher(kernelLauncher)
|
||||
{
|
||||
}
|
||||
|
||||
void PxgSDFBuilder::fixHoles(PxU32 width, PxU32 height, PxU32 depth, PxReal* sdfDataD, const PxVec3& cellSize, const PxVec3& minExtents, const PxVec3& maxExtents,
|
||||
Gu::GridQueryPointSampler& sampler, CUstream stream)
|
||||
{
|
||||
PxCudaContextManager* ccm = mKernelLauncher.getCudaContextManager();
|
||||
|
||||
PxBounds3 totalBounds(minExtents, maxExtents);
|
||||
|
||||
//Fix the sdf in case the source triangle mesh has holes
|
||||
const PxU32 numItems = width * height * depth;
|
||||
PxU32 kNumThreadsPerBlock = PxgBVHKernelBlockDim::SDF_FIX_HOLES;
|
||||
PxU32 kNumBlocks = (numItems + kNumThreadsPerBlock - 1) / kNumThreadsPerBlock;
|
||||
|
||||
PxU32* atomicCounter = PX_DEVICE_MEMORY_ALLOC(PxU32, *ccm, 1);
|
||||
if (!atomicCounter)
|
||||
return;
|
||||
|
||||
PxgCudaHelpers::memsetAsync(*ccm->getCudaContext(), atomicCounter, 0u, 1u, stream);
|
||||
|
||||
mKernelLauncher.launchKernelPtr(PxgKernelIds::sdf_CountHoles, kNumBlocks, kNumThreadsPerBlock, 0, stream,
|
||||
&sdfDataD, &width, &height, &depth, &cellSize, &atomicCounter);
|
||||
|
||||
PxU32 numPointsInCloud = 0;
|
||||
|
||||
PxCUresult result = ccm->getCudaContext()->streamSynchronize(stream);
|
||||
PX_ASSERT(result == CUDA_SUCCESS);
|
||||
|
||||
PxgCudaHelpers::copyDToH(*ccm->getCudaContext(), &numPointsInCloud, atomicCounter, 1);
|
||||
|
||||
result = ccm->getCudaContext()->streamSynchronize(stream);
|
||||
PX_ASSERT(result == CUDA_SUCCESS);
|
||||
|
||||
if (numPointsInCloud > 0)
|
||||
{
|
||||
PxgBVH pointCloudBvh = PxgBVH();
|
||||
PxgLinearBVHBuilderGPU treeBuilder(mKernelLauncher);
|
||||
treeBuilder.allocateOrResize(pointCloudBvh, numPointsInCloud);
|
||||
|
||||
// abort here if the allocations fail above.
|
||||
if (ccm->getCudaContext()->isInAbortMode())
|
||||
{
|
||||
treeBuilder.releaseBVH(pointCloudBvh);
|
||||
treeBuilder.release();
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, atomicCounter);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
//Since maxNodes is 2*numItems, the second half of bvh.mNodeLowers and bvh.mNodeUppers
|
||||
//Can be used as scratch memory until the BuildHierarchy kernel gets launched
|
||||
PX_COMPILE_TIME_ASSERT(sizeof(PxVec4) == sizeof(PxgPackedNodeHalf));
|
||||
PxVec4* pointCloudLowers = reinterpret_cast<PxVec4*>(&pointCloudBvh.mNodeLowers[numPointsInCloud]);
|
||||
PxVec4* pointCloudUppers = reinterpret_cast<PxVec4*>(&pointCloudBvh.mNodeUppers[numPointsInCloud]);
|
||||
|
||||
PxgCudaHelpers::memsetAsync(*ccm->getCudaContext(), atomicCounter, 0u, 1u, stream);
|
||||
mKernelLauncher.launchKernelPtr(PxgKernelIds::sdf_FindHoles, kNumBlocks, kNumThreadsPerBlock, 0, stream,
|
||||
&sdfDataD, &width, &height, &depth, &cellSize, &atomicCounter, &sampler, &pointCloudLowers, &pointCloudUppers, &numPointsInCloud);
|
||||
|
||||
mKernelLauncher.launchKernelPtr(PxgKernelIds::sdf_ApplyHoleCorrections, kNumBlocks, kNumThreadsPerBlock, 0, stream,
|
||||
&sdfDataD, &width, &height, &depth, &sampler, &pointCloudUppers, &numPointsInCloud);
|
||||
|
||||
treeBuilder.buildFromLeaveBounds(pointCloudBvh, pointCloudLowers, pointCloudUppers, NULL, numPointsInCloud, &totalBounds, stream, true);
|
||||
|
||||
#if EXTENDED_DEBUG
|
||||
bool debugTree = false;
|
||||
if (debugTree)
|
||||
{
|
||||
PxArray<PxVec4> lower;
|
||||
PxArray<PxVec4> upper;
|
||||
lower.resize(numPointsInCloud);
|
||||
upper.resize(numPointsInCloud);
|
||||
|
||||
result = ccm->getCudaContext()->streamSynchronize(stream);
|
||||
PX_ASSERT(result == CUDA_SUCCESS);
|
||||
|
||||
PxgcudaHelpers::copyDToH(*ccm, lower.begin(), reinterpret_cast<PxVec4*>(&pointCloudBvh.mNodeLowers[0]), numPointsInCloud);
|
||||
PxgcudaHelpers::copyDToH(*ccm, upper.begin(), reinterpret_cast<PxVec4*>(&pointCloudBvh.mNodeUppers[0]), numPointsInCloud);
|
||||
|
||||
result = ccm->getCudaContext()->streamSynchronize(stream);
|
||||
PX_ASSERT(result == CUDA_SUCCESS);
|
||||
}
|
||||
#endif
|
||||
|
||||
PxU32 blockDimX = 8;
|
||||
PxU32 blockDimY = 8;
|
||||
PxU32 blockDimZ = 4;
|
||||
PxU32 gridDimX = (width + blockDimX - 1) / blockDimX;
|
||||
PxU32 gridDimY = (height + blockDimY - 1) / blockDimY;
|
||||
PxU32 gridDimZ = (depth + blockDimZ - 1) / blockDimZ;
|
||||
|
||||
mKernelLauncher.launchKernelXYZPtr(PxgKernelIds::sdf_CalculateDenseGridPointCloud, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, stream,
|
||||
&pointCloudBvh, &sampler, &width, &height, &depth, &sdfDataD);
|
||||
|
||||
result = ccm->getCudaContext()->streamSynchronize(stream);
|
||||
|
||||
//if (treeBuilder.mMaxTreeDepth[0] > 48)
|
||||
// printf("maxDepth: %f\n", PxF64(treeBuilder.mMaxTreeDepth[0]));
|
||||
|
||||
treeBuilder.releaseBVH(pointCloudBvh);
|
||||
treeBuilder.release();
|
||||
}
|
||||
|
||||
result = ccm->getCudaContext()->streamSynchronize(stream);
|
||||
PX_ASSERT(result == CUDA_SUCCESS);
|
||||
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, atomicCounter);
|
||||
}
|
||||
|
||||
PxReal* PxgSDFBuilder::buildDenseSDF(const PxVec3* vertices, PxU32 numVertices, const PxU32* indicesOrig, PxU32 numTriangleIndices, PxU32 width, PxU32 height, PxU32 depth,
|
||||
const PxVec3& minExtents, const PxVec3& maxExtents, bool cellCenteredSamples, CUstream stream)
|
||||
{
|
||||
PxCudaContextManager* ccm = mKernelLauncher.getCudaContextManager();
|
||||
|
||||
// AD: we try to get all the allocations done upfront such that we can fail early.
|
||||
PxgLinearBVHBuilderGPU treeBuilder(mKernelLauncher);
|
||||
|
||||
PxgBvhTriangleMesh gpuMesh = PxgBvhTriangleMesh();
|
||||
gpuMesh.mVertices = PX_DEVICE_MEMORY_ALLOC(PxVec3, *ccm, numVertices);
|
||||
gpuMesh.mTriangles = PX_DEVICE_MEMORY_ALLOC(PxU32, *ccm, numTriangleIndices);
|
||||
gpuMesh.mNumVertices = numVertices;
|
||||
gpuMesh.mNumTriangles = numTriangleIndices / 3;
|
||||
|
||||
PxgWindingClusterApproximation* windingNumberClustersD = PX_DEVICE_MEMORY_ALLOC(PxgWindingClusterApproximation, *ccm, gpuMesh.mNumTriangles);
|
||||
|
||||
PxU32 numSDFSamples = width * height * depth;
|
||||
PxReal* sdfDataD = PX_DEVICE_MEMORY_ALLOC(PxReal, *ccm, numSDFSamples);
|
||||
|
||||
PxReal* windingNumbersD = NULL;
|
||||
|
||||
treeBuilder.allocateOrResize(gpuMesh.mBvh, gpuMesh.mNumTriangles);
|
||||
|
||||
if (ccm->getCudaContext()->isInAbortMode())
|
||||
{
|
||||
PxGetFoundation().error(PxErrorCode::eABORT, PX_FL, "GPU SDF cooking failed!\n");
|
||||
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, gpuMesh.mVertices);
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, gpuMesh.mTriangles);
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, windingNumberClustersD);
|
||||
|
||||
treeBuilder.releaseBVH(gpuMesh.mBvh);
|
||||
treeBuilder.release();
|
||||
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, sdfDataD);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// allocations are done here, let's start computing.
|
||||
|
||||
PxBounds3 totalBounds(minExtents, maxExtents);
|
||||
|
||||
PxgCudaHelpers::copyHToDAsync(*ccm->getCudaContext(), gpuMesh.mVertices, vertices, numVertices, stream);
|
||||
PxgCudaHelpers::copyHToDAsync(*ccm->getCudaContext(), gpuMesh.mTriangles, indicesOrig, numTriangleIndices, stream);
|
||||
|
||||
treeBuilder.buildTreeAndWindingClustersFromTriangles(gpuMesh.mBvh, windingNumberClustersD, gpuMesh.mVertices, gpuMesh.mTriangles, NULL, gpuMesh.mNumTriangles, &totalBounds, stream, 1e-5f, true);
|
||||
|
||||
const PxVec3 extents(maxExtents - minExtents);
|
||||
const PxVec3 cellSize(extents.x / width, extents.y / height, extents.z / depth);
|
||||
Gu::GridQueryPointSampler sampler(minExtents, cellSize, cellCenteredSamples);
|
||||
|
||||
#if EXTENDED_DEBUG
|
||||
bool debugWindingNumbers = false;
|
||||
PxArray<PxReal> windingNumbers;
|
||||
if (debugWindingNumbers)
|
||||
{
|
||||
windingNumbersD = PX_DEVICE_MEMORY_ALLOC(PxReal, *ccm, (width * height * depth));
|
||||
windingNumbers.resize(width * height * depth);
|
||||
}
|
||||
#endif
|
||||
|
||||
computeDenseSDF(gpuMesh, windingNumberClustersD, sampler, width, height, depth, sdfDataD, stream, windingNumbersD);
|
||||
|
||||
#if EXTENDED_DEBUG
|
||||
if (debugWindingNumbers)
|
||||
{
|
||||
PxCUresult result = ccm->getCudaContext()->streamSynchronize(stream);
|
||||
PX_ASSERT(result == CUDA_SUCCESS);
|
||||
|
||||
PxgCudaHelpers::copyDToH(*ccm, windingNumbers.begin(), windingNumbersD, width * height * depth);
|
||||
|
||||
result = ccm->getCudaContext()->streamSynchronize(stream);
|
||||
PX_ASSERT(result == CUDA_SUCCESS);
|
||||
PX_UNUSED(result);
|
||||
|
||||
PxReal minWinding = FLT_MAX;
|
||||
PxReal maxWinding = -FLT_MAX;
|
||||
PxReal windingClosestToZeroPointFive = FLT_MAX;
|
||||
|
||||
for (PxU32 i = 0; i < windingNumbers.size(); ++i)
|
||||
{
|
||||
PxReal w = windingNumbers[i];
|
||||
minWinding = PxMin(minWinding, w);
|
||||
maxWinding = PxMax(maxWinding, w);
|
||||
|
||||
PxReal diffToZeroPointFive = PxAbs(0.5f - w);
|
||||
windingClosestToZeroPointFive = PxMin(windingClosestToZeroPointFive, diffToZeroPointFive);
|
||||
}
|
||||
|
||||
//printf("windingInfo: %f %f %f\n", minWinding, maxWinding, windingClosestToZeroPointFive);
|
||||
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, windingNumbersD);
|
||||
}
|
||||
#endif
|
||||
|
||||
fixHoles(width, height, depth, sdfDataD, cellSize, minExtents, maxExtents, sampler, stream);
|
||||
|
||||
ccm->getCudaContext()->streamSynchronize(stream);
|
||||
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, gpuMesh.mVertices);
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, gpuMesh.mTriangles);
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, windingNumberClustersD);
|
||||
|
||||
treeBuilder.releaseBVH(gpuMesh.mBvh);
|
||||
treeBuilder.release();
|
||||
|
||||
if (ccm->getCudaContext()->isInAbortMode())
|
||||
{
|
||||
PxGetFoundation().error(PxErrorCode::eABORT, PX_FL, "GPU SDF cooking failed!\n");
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, sdfDataD);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return sdfDataD;
|
||||
}
|
||||
|
||||
bool PxgSDFBuilder::buildSDF(const PxVec3* vertices, PxU32 numVertices, const PxU32* indicesOrig, PxU32 numTriangleIndices, PxU32 width, PxU32 height, PxU32 depth,
|
||||
const PxVec3& minExtents, const PxVec3& maxExtents, bool cellCenteredSamples, PxReal* sdf, CUstream stream)
|
||||
{
|
||||
PxCudaContextManager* ccm = mKernelLauncher.getCudaContextManager();
|
||||
PxScopedCudaLock lock(*ccm);
|
||||
|
||||
bool destroyStream = false;
|
||||
if (stream == 0)
|
||||
{
|
||||
mKernelLauncher.getCudaContextManager()->getCudaContext()->streamCreate(&stream, CU_STREAM_NON_BLOCKING);
|
||||
destroyStream = true;
|
||||
}
|
||||
|
||||
PxReal* sdfDataD = buildDenseSDF(vertices, numVertices, indicesOrig, numTriangleIndices, width, height, depth, minExtents, maxExtents, cellCenteredSamples, stream);
|
||||
|
||||
// buildDenseSDF returns NULL if gpu cooking failed.
|
||||
if (!sdfDataD)
|
||||
return false;
|
||||
|
||||
PxU32 numSDFSamples = width * height * depth;
|
||||
PxCUresult result = ccm->getCudaContext()->streamSynchronize(stream);
|
||||
PX_ASSERT(result == CUDA_SUCCESS);
|
||||
PxgCudaHelpers::copyDToH(*ccm->getCudaContext(), sdf, sdfDataD, numSDFSamples);
|
||||
result = ccm->getCudaContext()->streamSynchronize(stream);
|
||||
PX_ASSERT(result == CUDA_SUCCESS);
|
||||
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, sdfDataD);
|
||||
|
||||
if (destroyStream)
|
||||
{
|
||||
mKernelLauncher.getCudaContextManager()->getCudaContext()->streamDestroy(stream);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void PxgSDFBuilder::release()
|
||||
{
|
||||
PX_DELETE_THIS;
|
||||
}
|
||||
|
||||
bool PxgSDFBuilder::buildSparseSDF(const PxVec3* vertices, PxU32 numVertices, const PxU32* indicesOrig,
|
||||
PxU32 numTriangleIndices, PxU32 width, PxU32 height, PxU32 depth,
|
||||
const PxVec3& minExtents, const PxVec3& maxExtents, PxReal narrowBandThickness, PxU32 subgridSize, PxSdfBitsPerSubgridPixel::Enum bytesPerSubgridPixel,
|
||||
PxArray<PxReal>& sdfCoarse, PxArray<PxU32>& sdfSubgridsStartSlots, PxArray<PxU8>& sdfDataSubgrids,
|
||||
PxReal& subgridsMinSdfValue, PxReal& subgridsMaxSdfValue,
|
||||
PxU32& sdfSubgrids3DTexBlockDimX, PxU32& sdfSubgrids3DTexBlockDimY, PxU32& sdfSubgrids3DTexBlockDimZ, CUstream stream)
|
||||
{
|
||||
if (mKernelLauncher.getCudaContextManager()->tryAcquireContext())
|
||||
{
|
||||
bool success = true;
|
||||
bool destroyStream = false;
|
||||
if (stream == 0)
|
||||
{
|
||||
mKernelLauncher.getCudaContextManager()->getCudaContext()->streamCreate(&stream, CU_STREAM_NON_BLOCKING);
|
||||
destroyStream = true;
|
||||
}
|
||||
PX_ASSERT(width % subgridSize == 0);
|
||||
PX_ASSERT(height % subgridSize == 0);
|
||||
PX_ASSERT(depth % subgridSize == 0);
|
||||
|
||||
const PxVec3 extents(maxExtents - minExtents);
|
||||
const PxVec3 delta(extents.x / width, extents.y / height, extents.z / depth);
|
||||
|
||||
PxReal* denseSdfD = buildDenseSDF(vertices, numVertices, indicesOrig, numTriangleIndices, width + 1, height + 1, depth + 1, minExtents, maxExtents + delta, false, stream);
|
||||
|
||||
const PxReal errorThreshold = 1e-6f * extents.magnitude();
|
||||
|
||||
if (denseSdfD)
|
||||
{
|
||||
compressSDF(denseSdfD, width, height, depth, subgridSize, narrowBandThickness, bytesPerSubgridPixel, errorThreshold,
|
||||
subgridsMinSdfValue, subgridsMaxSdfValue, sdfCoarse, sdfSubgridsStartSlots, sdfDataSubgrids,
|
||||
sdfSubgrids3DTexBlockDimX, sdfSubgrids3DTexBlockDimY, sdfSubgrids3DTexBlockDimZ, stream);
|
||||
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher.getCudaContextManager(), denseSdfD);
|
||||
|
||||
if (!sdfCoarse.size())
|
||||
{
|
||||
PxGetFoundation().error(PxErrorCode::eABORT, PX_FL, "GPU SDF cooking failed!\n");
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
success = false;
|
||||
|
||||
if (destroyStream)
|
||||
{
|
||||
mKernelLauncher.getCudaContextManager()->getCudaContext()->streamDestroy(stream);
|
||||
}
|
||||
|
||||
mKernelLauncher.getCudaContextManager()->releaseContext();
|
||||
return success;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PxgSDFBuilder::allocateBuffersForCompression(
|
||||
PxReal*& backgroundSdfD,
|
||||
PxU32 numBackgroundSdfSamples,
|
||||
PxU32*& subgridAddressesD,
|
||||
PxU8*& subgridActiveD,
|
||||
PxU32 numAddressEntries,
|
||||
PxReal*& subgridGlobalMinValueD,
|
||||
PxReal*& subgridGlobalMaxValueD,
|
||||
PxGpuScan& scan)
|
||||
{
|
||||
PxCudaContextManager* ccm = mKernelLauncher.getCudaContextManager();
|
||||
|
||||
backgroundSdfD = PX_DEVICE_MEMORY_ALLOC(PxReal, *ccm, numBackgroundSdfSamples);
|
||||
|
||||
subgridAddressesD = PX_DEVICE_MEMORY_ALLOC(PxU32, *ccm, numAddressEntries);
|
||||
subgridActiveD = PX_DEVICE_MEMORY_ALLOC(PxU8, *ccm, numAddressEntries);
|
||||
|
||||
subgridGlobalMinValueD = PX_DEVICE_MEMORY_ALLOC(PxReal, *ccm, 1);
|
||||
subgridGlobalMaxValueD = PX_DEVICE_MEMORY_ALLOC(PxReal, *ccm, 1);
|
||||
|
||||
scan.initialize(&mKernelLauncher, numAddressEntries);
|
||||
|
||||
if (ccm->getCudaContext()->isInAbortMode())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void PxgSDFBuilder::releaseBuffersForCompression(
|
||||
PxReal*& backgroundSdfD,
|
||||
PxU32*& subgridAddressesD,
|
||||
PxU8*& subgridActiveD,
|
||||
PxReal*& subgridGlobalMinValueD,
|
||||
PxReal*& subgridGlobalMaxValueD,
|
||||
PxGpuScan& scan)
|
||||
{
|
||||
PxCudaContextManager* ccm = mKernelLauncher.getCudaContextManager();
|
||||
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, backgroundSdfD);
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, subgridAddressesD);
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, subgridActiveD);
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, subgridGlobalMinValueD);
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, subgridGlobalMaxValueD);
|
||||
scan.release();
|
||||
}
|
||||
|
||||
void PxgSDFBuilder::compressSDF(PxReal* denseSdfD, PxU32 width, PxU32 height, PxU32 depth,
|
||||
PxU32 cellsPerSubgrid, PxReal narrowBandThickness, PxU32 bytesPerSubgridPixel, PxReal errorThreshold,
|
||||
PxReal& subgridGlobalMinValue, PxReal& subgridGlobalMaxValue, PxArray<PxReal>& sdfCoarse,
|
||||
PxArray<PxU32>& sdfSubgridsStartSlots, PxArray<PxU8>& sdfDataSubgrids,
|
||||
PxU32& sdfSubgrids3DTexBlockDimX, PxU32& sdfSubgrids3DTexBlockDimY, PxU32& sdfSubgrids3DTexBlockDimZ, CUstream stream)
|
||||
{
|
||||
// allocations upfront
|
||||
PxReal* backgroundSdfD = NULL;
|
||||
PxU32* subgridAddressesD = NULL;
|
||||
PxU8* subgridActiveD = NULL;
|
||||
PxReal* subgridGlobalMinValueD = NULL;
|
||||
PxReal* subgridGlobalMaxValueD = NULL;
|
||||
PxGpuScan scan;
|
||||
|
||||
const PxU32 w = width / cellsPerSubgrid;
|
||||
const PxU32 h = height / cellsPerSubgrid;
|
||||
const PxU32 d = depth / cellsPerSubgrid;
|
||||
|
||||
PX_ASSERT(width % cellsPerSubgrid == 0);
|
||||
PX_ASSERT(height % cellsPerSubgrid == 0);
|
||||
PX_ASSERT(depth % cellsPerSubgrid == 0);
|
||||
|
||||
const PxU32 backgroundSizeX = w + 1;
|
||||
const PxU32 backgroundSizeY = h + 1;
|
||||
const PxU32 backgroundSizeZ = d + 1;
|
||||
const PxU32 numBackgroundSdfSamples = backgroundSizeX * backgroundSizeY * backgroundSizeZ;
|
||||
|
||||
PxU32 numAddressEntries = w * h * d;
|
||||
|
||||
bool success = allocateBuffersForCompression(backgroundSdfD, numBackgroundSdfSamples, subgridAddressesD, subgridActiveD, numAddressEntries, subgridGlobalMinValueD, subgridGlobalMaxValueD, scan);
|
||||
|
||||
if (!success)
|
||||
{
|
||||
releaseBuffersForCompression(backgroundSdfD, subgridAddressesD, subgridActiveD, subgridGlobalMinValueD, subgridGlobalMaxValueD, scan);
|
||||
sdfCoarse.forceSize_Unsafe(0); // this signals that there is no SDF.
|
||||
return;
|
||||
}
|
||||
|
||||
// then the actual computation
|
||||
PxCudaContextManager* ccm = mKernelLauncher.getCudaContextManager();
|
||||
|
||||
PxReal val = FLT_MAX;
|
||||
PxgCudaHelpers::memsetAsync(*ccm->getCudaContext(), subgridGlobalMinValueD, val, 1, stream);
|
||||
val = -FLT_MAX;
|
||||
PxgCudaHelpers::memsetAsync(*ccm->getCudaContext(), subgridGlobalMaxValueD, val, 1, stream);
|
||||
|
||||
const PxU32 numItems = backgroundSizeX * backgroundSizeY * backgroundSizeZ;
|
||||
const PxU32 kNumThreadsPerBlock = PxgBVHKernelBlockDim::BUILD_SDF;
|
||||
const PxU32 kNumBlocks = (numItems + kNumThreadsPerBlock - 1) / kNumThreadsPerBlock;
|
||||
|
||||
mKernelLauncher.launchKernelPtr(PxgKernelIds::sdf_PopulateBackgroundSDF, kNumBlocks, kNumThreadsPerBlock, 0, stream,
|
||||
&cellsPerSubgrid, &backgroundSdfD, &backgroundSizeX, &backgroundSizeY, &backgroundSizeZ,
|
||||
&denseSdfD, &width, &height, &depth);
|
||||
|
||||
mKernelLauncher.launchKernelXYZPtr(PxgKernelIds::sdf_MarkRequiredSdfSubgrids, w, h, d, kNumThreadsPerBlock, 1, 1, 0, stream,
|
||||
&backgroundSdfD, &denseSdfD, &subgridAddressesD, &subgridActiveD, &cellsPerSubgrid, &width, &height, &depth, &backgroundSizeX, &backgroundSizeY, &backgroundSizeZ, &narrowBandThickness,
|
||||
&subgridGlobalMinValueD, &subgridGlobalMaxValueD, &errorThreshold);
|
||||
|
||||
scan.exclusiveScan(subgridAddressesD, stream);
|
||||
|
||||
PxU32 numSubgrids = 0;
|
||||
PxCUresult result = ccm->getCudaContext()->streamSynchronize(stream);
|
||||
PX_ASSERT(result == CUDA_SUCCESS);
|
||||
PxgCudaHelpers::copyDToH(*ccm->getCudaContext(), &numSubgrids, scan.getSumPointer(), 1);
|
||||
PxgCudaHelpers::copyDToH(*ccm->getCudaContext(), &subgridGlobalMinValue, subgridGlobalMinValueD, 1);
|
||||
PxgCudaHelpers::copyDToH(*ccm->getCudaContext(), &subgridGlobalMaxValue, subgridGlobalMaxValueD, 1);
|
||||
|
||||
//Synchronize the stream because the size of following memory allocations depends on calculations done in previously ran kernels
|
||||
result = ccm->getCudaContext()->streamSynchronize(stream);
|
||||
PX_ASSERT(result == CUDA_SUCCESS);
|
||||
|
||||
const PxU32 valuesPerSubgrid = (cellsPerSubgrid + 1)*(cellsPerSubgrid + 1)*(cellsPerSubgrid + 1);
|
||||
|
||||
const PxReal cubicRoot = PxPow(PxReal(numSubgrids), 1.0f / 3.0f);
|
||||
const PxU32 up = PxMax(1u, PxU32(PxCeil(cubicRoot)));
|
||||
|
||||
//Arrange numSubgrids in a 3d layout
|
||||
PxU32 n = numSubgrids;
|
||||
sdfSubgrids3DTexBlockDimX = PxMin(up, n);
|
||||
n = (n + up - 1) / up;
|
||||
sdfSubgrids3DTexBlockDimY = PxMin(up, n);
|
||||
n = (n + up - 1) / up;
|
||||
sdfSubgrids3DTexBlockDimZ = PxMin(up, n);
|
||||
PxU32 subgridDataSize = valuesPerSubgrid * sdfSubgrids3DTexBlockDimX * sdfSubgrids3DTexBlockDimY * sdfSubgrids3DTexBlockDimZ * bytesPerSubgridPixel;
|
||||
|
||||
//if (sdfSubgrids3DTexBlockDimX*sdfSubgrids3DTexBlockDimY*sdfSubgrids3DTexBlockDimZ < numSubgrids)
|
||||
// printf("3D subgrid texture too small\n");
|
||||
|
||||
//if (subgridDataSize == 0)
|
||||
// printf("subgridDataSize is zero %i %i %i %i %i %i %f %f\n", numSubgrids, valuesPerSubgrid, sdfSubgrids3DTexBlockDimX, sdfSubgrids3DTexBlockDimY, sdfSubgrids3DTexBlockDimZ, PxU32(bytesPerSubgridPixel), subgridGlobalMinValue, subgridGlobalMaxValue);
|
||||
|
||||
// we cannot put that one to the front because it depends on data we calculate just above.
|
||||
PxU32* quantizedSparseSDFIn3DTextureFormatD = PX_DEVICE_MEMORY_ALLOC(PxU32, *ccm, (subgridDataSize + 3)/4);
|
||||
if (!quantizedSparseSDFIn3DTextureFormatD)
|
||||
{
|
||||
releaseBuffersForCompression(backgroundSdfD, subgridAddressesD, subgridActiveD, subgridGlobalMinValueD, subgridGlobalMaxValueD, scan);
|
||||
sdfCoarse.forceSize_Unsafe(0);
|
||||
return;
|
||||
}
|
||||
|
||||
mKernelLauncher.launchKernelXYZPtr(PxgKernelIds::sdf_PopulateSdfSubgrids, w, h, d, kNumThreadsPerBlock, 1, 1, 0, stream,
|
||||
&denseSdfD, &width, &height, &depth, &subgridAddressesD, &subgridActiveD, &cellsPerSubgrid, &w, &h, &d,
|
||||
&quantizedSparseSDFIn3DTextureFormatD, &sdfSubgrids3DTexBlockDimX, &sdfSubgrids3DTexBlockDimY, &sdfSubgrids3DTexBlockDimZ,
|
||||
&subgridGlobalMinValueD, &subgridGlobalMaxValueD, &bytesPerSubgridPixel, &subgridDataSize);
|
||||
|
||||
sdfCoarse.resize(numBackgroundSdfSamples);
|
||||
sdfSubgridsStartSlots.resize(numAddressEntries);
|
||||
sdfDataSubgrids.resize(subgridDataSize);
|
||||
|
||||
result = ccm->getCudaContext()->streamSynchronize(stream);
|
||||
PX_ASSERT(result == CUDA_SUCCESS);
|
||||
PxgCudaHelpers::copyDToH(*ccm->getCudaContext(), sdfCoarse.begin(), backgroundSdfD, numBackgroundSdfSamples);
|
||||
PxgCudaHelpers::copyDToH(*ccm->getCudaContext(), sdfSubgridsStartSlots.begin(), subgridAddressesD, numAddressEntries);
|
||||
PxgCudaHelpers::copyDToH(*ccm->getCudaContext(), sdfDataSubgrids.begin(), reinterpret_cast<PxU8*>(quantizedSparseSDFIn3DTextureFormatD), subgridDataSize);
|
||||
result = ccm->getCudaContext()->streamSynchronize(stream);
|
||||
PX_ASSERT(result == CUDA_SUCCESS);
|
||||
|
||||
releaseBuffersForCompression(backgroundSdfD, subgridAddressesD, subgridActiveD, subgridGlobalMinValueD, subgridGlobalMaxValueD, scan);
|
||||
PX_DEVICE_MEMORY_FREE(*ccm, quantizedSparseSDFIn3DTextureFormatD);
|
||||
|
||||
if (bytesPerSubgridPixel == 4)
|
||||
{
|
||||
//32bit values are stored as normal floats while 16bit and 8bit values are scaled to 0...1 range and then scaled back to original range
|
||||
subgridGlobalMinValue = 0.0f;
|
||||
subgridGlobalMaxValue = 1.0f;
|
||||
}
|
||||
}
|
||||
219
engine/third_party/physx/source/gpusimulationcontroller/src/PxgShapeSimManager.cpp
vendored
Normal file
219
engine/third_party/physx/source/gpusimulationcontroller/src/PxgShapeSimManager.cpp
vendored
Normal file
@@ -0,0 +1,219 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
|
||||
#include "PxgShapeSimManager.h"
|
||||
#include "PxgHeapMemAllocator.h"
|
||||
#include "PxgNarrowphaseCore.h"
|
||||
#include "GuBounds.h"
|
||||
#include "CmTask.h"
|
||||
#include "CmFlushPool.h"
|
||||
#include "PxgSimulationCoreDesc.h"
|
||||
#include "PxgCudaMemoryAllocator.h"
|
||||
#include "cudamanager/PxCudaContext.h"
|
||||
#include "CudaKernelWrangler.h"
|
||||
#include "PxgKernelIndices.h"
|
||||
#include "PxgSimulationCoreKernelIndices.h"
|
||||
|
||||
#define SSM_GPU_DEBUG 0
|
||||
|
||||
using namespace physx;
|
||||
|
||||
PxgShapeSimManager::PxgShapeSimManager(PxgHeapMemoryAllocatorManager* heapMemoryManager) :
|
||||
mTotalNumShapes (0),
|
||||
mNbTotalShapeSim (0),
|
||||
mPxgShapeSimPool (PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
||||
mShapeSimBuffer (heapMemoryManager, PxsHeapStats::eSIMULATION),
|
||||
mNewShapeSimBuffer (heapMemoryManager, PxsHeapStats::eSIMULATION)
|
||||
{
|
||||
}
|
||||
|
||||
void PxgShapeSimManager::addPxgShape(Sc::ShapeSimBase* shapeSimBase, const PxsShapeCore* shapeCore, PxNodeIndex nodeIndex, PxU32 index)
|
||||
{
|
||||
if(mShapeSims.capacity() <= index)
|
||||
{
|
||||
mShapeSims.resize(2*index+1);
|
||||
mShapeSimPtrs.resize(2*index+1);
|
||||
}
|
||||
|
||||
mShapeSims[index].mShapeCore = shapeCore;
|
||||
mShapeSims[index].mElementIndex_GPU = index;
|
||||
mShapeSims[index].mBodySimIndex_GPU = nodeIndex;
|
||||
|
||||
mShapeSimPtrs[index] = shapeSimBase;
|
||||
|
||||
mNewShapeSims.pushBack(index);
|
||||
mTotalNumShapes = PxMax(mTotalNumShapes, index+1);
|
||||
}
|
||||
|
||||
// This method assigns the bodySimIndex for articulation links because the nodeIndex is not available for articulation links
|
||||
// until after creation, when they are inserted into the articulation and receive their node index.
|
||||
void PxgShapeSimManager::setPxgShapeBodyNodeIndex(PxNodeIndex nodeIndex, PxU32 index)
|
||||
{
|
||||
mShapeSims[index].mBodySimIndex_GPU = nodeIndex;
|
||||
}
|
||||
|
||||
void PxgShapeSimManager::removePxgShape(PxU32 index)
|
||||
{
|
||||
mShapeSims[index].mBodySimIndex_GPU = PxNodeIndex(PX_INVALID_NODE);
|
||||
mShapeSims[index].mElementIndex_GPU = PX_INVALID_U32;
|
||||
|
||||
mShapeSimPtrs[index] = NULL;
|
||||
|
||||
mNewShapeSims.pushBack(index);
|
||||
}
|
||||
|
||||
namespace physx // PT: only in physx namespace for the friend access to work
|
||||
{
|
||||
class PxgCopyToShapeSimTask : public Cm::Task
|
||||
{
|
||||
PxgShapeSimManager* mShapeSimManager;
|
||||
PxgGpuNarrowphaseCore* mNpCore;
|
||||
const PxU32 mStartIndex;
|
||||
const PxU32 mNbToProcess;
|
||||
|
||||
public:
|
||||
PxgCopyToShapeSimTask(PxgShapeSimManager* shapeSimManager, PxgGpuNarrowphaseCore* npCore, PxU32 startIdx, PxU32 nbToProcess) :
|
||||
Cm::Task (0), // PT: TODO: add missing context ID ... but then again it's missing from most of the GPU code anyway
|
||||
mShapeSimManager (shapeSimManager),
|
||||
mNpCore (npCore),
|
||||
mStartIndex (startIdx),
|
||||
mNbToProcess (nbToProcess)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void runInternal()
|
||||
{
|
||||
PxgNewShapeSim* dst = mShapeSimManager->mPxgShapeSimPool.begin();
|
||||
const PxU32* newShapeSimsIndices = mShapeSimManager->mNewShapeSims.begin();
|
||||
const PxgShapeSimData* src = mShapeSimManager->mShapeSims.begin();
|
||||
|
||||
PxgGpuNarrowphaseCore* npCore = mNpCore;
|
||||
const PxU32 shapeStartIndex = mStartIndex;
|
||||
const PxU32 endIndex = mNbToProcess + shapeStartIndex;
|
||||
for (PxU32 i = shapeStartIndex; i < endIndex; ++i)
|
||||
{
|
||||
PxgNewShapeSim& shapeSim = dst[i];
|
||||
|
||||
const PxU32 shapeIndex = newShapeSimsIndices[i];
|
||||
|
||||
const PxgShapeSimData& shapeLL = src[shapeIndex];
|
||||
|
||||
const PxsShapeCore* shapeCore = shapeLL.mShapeCore;
|
||||
|
||||
shapeSim.mTransform = shapeCore->getTransform();
|
||||
shapeSim.mElementIndex = shapeLL.mElementIndex_GPU;
|
||||
shapeSim.mBodySimIndex = shapeLL.mBodySimIndex_GPU;
|
||||
shapeSim.mShapeFlags = shapeCore->mShapeFlags;
|
||||
// ML: if the shape has been removed, we shouldn't calculate the bound (PT: otherwise it crashes, as the corresponding shape data has already been deleted)
|
||||
if (shapeSim.mElementIndex != PX_INVALID_U32)
|
||||
shapeSim.mLocalBounds = Gu::computeBounds(shapeCore->mGeometry.getGeometry(), PxTransform(PxIdentity));
|
||||
else
|
||||
shapeSim.mElementIndex = shapeIndex;
|
||||
|
||||
shapeSim.mHullDataIndex = npCore->getShapeIndex(*shapeCore);
|
||||
shapeSim.mShapeType = PxU16(shapeCore->mGeometry.getType());
|
||||
}
|
||||
}
|
||||
|
||||
virtual const char* getName() const
|
||||
{
|
||||
return "PxgCopyToShapeSimTask";
|
||||
}
|
||||
|
||||
private:
|
||||
PX_NOCOPY(PxgCopyToShapeSimTask)
|
||||
};
|
||||
}
|
||||
|
||||
void PxgShapeSimManager::copyToGpuShapeSim(PxgGpuNarrowphaseCore* npCore, PxBaseTask* continuation, Cm::FlushPool& flushPool)
|
||||
{
|
||||
const PxU32 nbNewShapes = mNewShapeSims.size();
|
||||
|
||||
// PT: ??? why not resize? you're not supposed to abuse forceSize_Unsafe
|
||||
mPxgShapeSimPool.forceSize_Unsafe(0);
|
||||
mPxgShapeSimPool.reserve(nbNewShapes);
|
||||
mPxgShapeSimPool.forceSize_Unsafe(nbNewShapes);
|
||||
//mPxgShapeSimPool.resize(nbNewShapes);
|
||||
|
||||
// PT: TODO: better task management....
|
||||
const PxU32 maxElementsPerTask = 1024;
|
||||
|
||||
for (PxU32 i = 0; i < nbNewShapes; i += maxElementsPerTask)
|
||||
{
|
||||
PxgCopyToShapeSimTask* task =
|
||||
PX_PLACEMENT_NEW(flushPool.allocate(sizeof(PxgCopyToShapeSimTask)), PxgCopyToShapeSimTask)(this, npCore, i, PxMin(maxElementsPerTask, nbNewShapes - i));
|
||||
|
||||
startTask(task, continuation);
|
||||
}
|
||||
}
|
||||
|
||||
void PxgShapeSimManager::gpuMemDmaUpShapeSim(PxCudaContext* cudaContext, CUstream stream, KernelWrangler* kernelWrangler)
|
||||
{
|
||||
const PxU32 nbTotalShapes = mTotalNumShapes;
|
||||
|
||||
const PxPinnedArray<PxgNewShapeSim>& newShapeSimPool = mPxgShapeSimPool;
|
||||
|
||||
const PxU32 nbNewShapes = newShapeSimPool.size();
|
||||
|
||||
//This will allocate PxgShapeSim
|
||||
if (nbTotalShapes > mNbTotalShapeSim)
|
||||
{
|
||||
PxU64 oldCapacity = mShapeSimBuffer.getSize();
|
||||
mShapeSimBuffer.allocateCopyOldDataAsync(nbTotalShapes * sizeof(PxgShapeSim), cudaContext, stream, PX_FL);
|
||||
if (oldCapacity < mShapeSimBuffer.getSize())
|
||||
cudaContext->memsetD32Async(mShapeSimBuffer.getDevicePtr() + oldCapacity, 0xFFFFFFFF, (mShapeSimBuffer.getSize() - oldCapacity) / sizeof(PxU32), stream);
|
||||
|
||||
mNbTotalShapeSim = nbTotalShapes;
|
||||
}
|
||||
|
||||
if (nbNewShapes)
|
||||
{
|
||||
mNewShapeSimBuffer.allocate(nbNewShapes * sizeof(PxgNewShapeSim), PX_FL);
|
||||
cudaContext->memcpyHtoDAsync(mNewShapeSimBuffer.getDevicePtr(), newShapeSimPool.begin(), sizeof(PxgNewShapeSim)* nbNewShapes, stream);
|
||||
|
||||
const PxgNewShapeSim* newShapeSimsBufferDeviceData = mNewShapeSimBuffer.getTypedPtr();
|
||||
PxgShapeSim* shapeSimsBufferDeviceData = mShapeSimBuffer.getTypedPtr();
|
||||
|
||||
void* kernelParams[] =
|
||||
{
|
||||
PX_CUDA_KERNEL_PARAM2(newShapeSimsBufferDeviceData),
|
||||
PX_CUDA_KERNEL_PARAM2(shapeSimsBufferDeviceData),
|
||||
PX_CUDA_KERNEL_PARAM2(nbNewShapes)
|
||||
};
|
||||
|
||||
const CUfunction kernelFunction = kernelWrangler->getCuFunction(PxgKernelIds::UPDATE_SHAPES);
|
||||
CUresult result = cudaContext->launchKernel(kernelFunction, PxgSimulationCoreKernelGridDim::UPDATE_BODIES_AND_SHAPES, 1, 1, PxgSimulationCoreKernelBlockDim::UPDATE_BODIES_AND_SHAPES, 1, 1, 0, stream, kernelParams, 0, PX_FL);
|
||||
PX_UNUSED(result);
|
||||
|
||||
#if SSM_GPU_DEBUG
|
||||
result = cudaContext->streamSynchronize(stream);
|
||||
if (result != CUDA_SUCCESS)
|
||||
PxGetFoundation().error(PxErrorCode::eINTERNAL_ERROR, PX_FL, "updateShapesLaunch kernel fail!\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
mNewShapeSims.clear();
|
||||
}
|
||||
4423
engine/third_party/physx/source/gpusimulationcontroller/src/PxgSimulationController.cpp
vendored
Normal file
4423
engine/third_party/physx/source/gpusimulationcontroller/src/PxgSimulationController.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3181
engine/third_party/physx/source/gpusimulationcontroller/src/PxgSimulationCore.cpp
vendored
Normal file
3181
engine/third_party/physx/source/gpusimulationcontroller/src/PxgSimulationCore.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
162
engine/third_party/physx/source/gpusimulationcontroller/src/PxgSmoothing.cpp
vendored
Normal file
162
engine/third_party/physx/source/gpusimulationcontroller/src/PxgSmoothing.cpp
vendored
Normal file
@@ -0,0 +1,162 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "PxgSmoothing.h"
|
||||
#include "PxgAlgorithms.h"
|
||||
#include "PxgSparseGridStandalone.h"
|
||||
|
||||
#include "PxgAnisotropyData.h"
|
||||
|
||||
#include "PxPhysics.h"
|
||||
#include "PxParticleSystem.h"
|
||||
#include "foundation/PxUserAllocated.h"
|
||||
#include "PxgCudaMemoryAllocator.h"
|
||||
|
||||
#include "PxParticleGpu.h"
|
||||
#include "PxgParticleNeighborhoodProvider.h"
|
||||
|
||||
#include "PxPhysXGpu.h"
|
||||
#include "PxvGlobals.h"
|
||||
#include "PxgKernelIndices.h"
|
||||
#include "PxgCudaMemoryAllocator.h"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
#if ENABLE_KERNEL_LAUNCH_ERROR_CHECK
|
||||
#define checkCudaError() { cudaError_t err = cudaDeviceSynchronize(); if (err != 0) printf("Cuda error file: %s, line: %i, error: %i\n", PX_FL, err); }
|
||||
#else
|
||||
#define checkCudaError() { }
|
||||
#endif
|
||||
|
||||
void updateSmoothedPositions(PxgKernelLauncher& launcher, PxGpuParticleSystem* particleSystems, const PxU32 id, PxSmoothedPositionData* smoothingDataPerParticleSystem,
|
||||
PxU32 numParticles, CUstream stream, PxU32 numThreadsPerBlock = 256)
|
||||
{
|
||||
const PxU32 numBlocks = (numParticles + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::smoothPositionsLaunch, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
particleSystems, id, smoothingDataPerParticleSystem);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void smoothPositionsLaunch(PxgKernelLauncher& launcher, float4* deviceParticlePos, PxU32* sortedToOriginalParticleIndex, PxU32* sortedParticleToSubgrid, PxU32 maxNumSubgrids,
|
||||
PxU32* subgridNeighbors, PxU32* subgridEndIndices, int numParticles, PxU32* phases, PxU32 validPhaseMask,
|
||||
float4* smoothPos, PxReal smoothing, PxReal particleContactDistance, CUstream stream, PxU32 numThreadsPerBlock = 256)
|
||||
{
|
||||
const PxU32 numBlocks = (numParticles + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::smoothPositionsKernel, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
deviceParticlePos, sortedToOriginalParticleIndex, sortedParticleToSubgrid, maxNumSubgrids,
|
||||
subgridNeighbors, subgridEndIndices, numParticles, phases, validPhaseMask, smoothPos, smoothing, particleContactDistance);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void PxgSmoothedPositionGenerator::releaseGPUSmoothedPositionBuffers()
|
||||
{
|
||||
if (!mPositionSmoothingDataHost.mPositions)
|
||||
return;
|
||||
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher.getCudaContextManager(), mPositionSmoothingDataHost.mPositions);
|
||||
mOwnsSmoothedPositionGPUBuffers = false;
|
||||
}
|
||||
|
||||
void PxgSmoothedPositionGenerator::allocateGPUSmoothedPositionBuffers()
|
||||
{
|
||||
if (mPositionSmoothingDataHost.mPositions)
|
||||
return;
|
||||
|
||||
mPositionSmoothingDataHost.mPositions = PX_DEVICE_MEMORY_ALLOC(PxVec4, *mKernelLauncher.getCudaContextManager(), mNumParticles);
|
||||
mOwnsSmoothedPositionGPUBuffers = true;
|
||||
}
|
||||
|
||||
PxgSmoothedPositionGenerator::PxgSmoothedPositionGenerator(PxgKernelLauncher& cudaContextManager, PxU32 maxNumParticles, PxReal smoothingStrenght)
|
||||
: mSmoothedPositions(NULL), mEnabled(true)
|
||||
{
|
||||
mPositionSmoothingDataHost.mPositions = NULL;
|
||||
mKernelLauncher = cudaContextManager;
|
||||
mNumParticles = maxNumParticles;
|
||||
mPositionSmoothingDataPerParticleSystemDevice = PX_DEVICE_MEMORY_ALLOC(PxSmoothedPositionData, *mKernelLauncher.getCudaContextManager(), 1);
|
||||
|
||||
mPositionSmoothingDataHost.mSmoothing = smoothingStrenght;
|
||||
mDirty = true;
|
||||
mOwnsSmoothedPositionGPUBuffers = false;
|
||||
}
|
||||
|
||||
void PxgSmoothedPositionGenerator::release()
|
||||
{
|
||||
if (!mPositionSmoothingDataPerParticleSystemDevice)
|
||||
return;
|
||||
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher.getCudaContextManager(), mPositionSmoothingDataPerParticleSystemDevice);
|
||||
if (mOwnsSmoothedPositionGPUBuffers)
|
||||
releaseGPUSmoothedPositionBuffers();
|
||||
|
||||
PX_DELETE_THIS;
|
||||
}
|
||||
|
||||
void PxgSmoothedPositionGenerator::generateSmoothedPositions(PxGpuParticleSystem* gpuParticleSystem, PxU32 numParticles, CUstream stream)
|
||||
{
|
||||
if (mDirty)
|
||||
{
|
||||
mDirty = false;
|
||||
mKernelLauncher.getCudaContextManager()->getCudaContext()->memcpyHtoDAsync(CUdeviceptr(mPositionSmoothingDataPerParticleSystemDevice), &mPositionSmoothingDataHost, sizeof(PxSmoothedPositionData), stream);
|
||||
}
|
||||
|
||||
updateSmoothedPositions(mKernelLauncher, gpuParticleSystem, 0, mPositionSmoothingDataPerParticleSystemDevice, numParticles, stream);
|
||||
|
||||
if (mSmoothedPositions)
|
||||
{
|
||||
mKernelLauncher.getCudaContextManager()->getCudaContext()->memcpyDtoHAsync(mSmoothedPositions, CUdeviceptr(mPositionSmoothingDataHost.mPositions), numParticles * sizeof(PxVec4), stream);
|
||||
}
|
||||
}
|
||||
|
||||
void PxgSmoothedPositionGenerator::generateSmoothedPositions(PxVec4* particlePositionsGpu, PxParticleNeighborhoodProvider& neighborhoodProvider, PxU32 numParticles, PxReal particleContactOffset, CUstream stream)
|
||||
{
|
||||
PxgParticleNeighborhoodProvider* n = static_cast<PxgParticleNeighborhoodProvider*>(&neighborhoodProvider);
|
||||
|
||||
smoothPositionsLaunch(mKernelLauncher, reinterpret_cast<float4*>(particlePositionsGpu/*n->reorderedParticles*/), n->mSparseGridBuilder.getSortedToOriginalParticleIndex(),
|
||||
n->mSparseGridBuilder.getSortedParticleToSubgrid(), n->mSparseGridBuilder.getGridParameters().maxNumSubgrids,
|
||||
n->mSparseGridBuilder.getSubgridNeighborLookup(), n->getSubgridEndIndicesBuffer(), numParticles, NULL, 0, reinterpret_cast<float4*>(mPositionSmoothingDataHost.mPositions),
|
||||
mPositionSmoothingDataHost.mSmoothing, 2 * particleContactOffset, stream);
|
||||
|
||||
if (mSmoothedPositions)
|
||||
{
|
||||
mKernelLauncher.getCudaContextManager()->getCudaContext()->memcpyDtoHAsync(mSmoothedPositions, CUdeviceptr(mPositionSmoothingDataHost.mPositions), numParticles * sizeof(PxVec4), stream);
|
||||
}
|
||||
}
|
||||
|
||||
void PxgSmoothedPositionGenerator::setMaxParticles(PxU32 maxParticles)
|
||||
{
|
||||
if (maxParticles == mNumParticles)
|
||||
return;
|
||||
|
||||
mNumParticles = maxParticles;
|
||||
|
||||
if (!mOwnsSmoothedPositionGPUBuffers)
|
||||
return;
|
||||
|
||||
releaseGPUSmoothedPositionBuffers();
|
||||
allocateGPUSmoothedPositionBuffers();
|
||||
}
|
||||
643
engine/third_party/physx/source/gpusimulationcontroller/src/PxgSoftBody.cpp
vendored
Normal file
643
engine/third_party/physx/source/gpusimulationcontroller/src/PxgSoftBody.cpp
vendored
Normal file
@@ -0,0 +1,643 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "PxgSoftBody.h"
|
||||
#include "geometry/PxSimpleTriangleMesh.h"
|
||||
#include "GuTetrahedronMesh.h"
|
||||
#include "cutil_math.h"
|
||||
//KS - currently need to include this as we are "borrowing" some of the block math types. Need to move them
|
||||
//to a common header
|
||||
#include "PxgArticulation.h"
|
||||
#include "PxsDeformableVolumeMaterialCore.h"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
PxU32 PxgSoftBodyUtil::computeTetMeshByteSize(const Gu::BVTetrahedronMesh* tetMesh)
|
||||
{
|
||||
const PxU32 meshDataSize =
|
||||
sizeof(uint4); // (nbVerts, numTets, maxDepth, nbBv32TreeNodes)
|
||||
//+ sizeof(PxU8) * numTets // meshTetrahedronSurfaceHint
|
||||
|
||||
//ML: don't know whether we need to have local bound
|
||||
Gu::BV32Tree* bv32Tree = tetMesh->mGRB_BV32Tree;
|
||||
const PxU32 bv32Size = bv32Tree->mNbPackedNodes * sizeof(Gu::BV32DataPacked)
|
||||
+ bv32Tree->mMaxTreeDepth * sizeof(Gu::BV32DataDepthInfo)
|
||||
+ bv32Tree->mNbPackedNodes * sizeof(PxU32);
|
||||
|
||||
return meshDataSize + bv32Size;
|
||||
}
|
||||
|
||||
static void copyTetraRestPoses(PxU16* destOrderedMaterialIndices, PxU16* destMaterialIndices, PxgMat33Block* blockRestPoses, PxMat33* tetRestPoses, PxU32* indices, PxU16* materialIndices,
|
||||
const PxU32 numTetsGM, const PxU32 numTetsPerElement, const PxU16* materialHandles)
|
||||
{
|
||||
const PxU32 numElements = numTetsGM / numTetsPerElement;
|
||||
|
||||
for (PxU32 i = 0; i < numElements; i += 32)
|
||||
{
|
||||
const PxU32 offCount = PxMin(numElements -i, 32u);
|
||||
|
||||
for (PxU32 elem = 0; elem < numTetsPerElement; elem++)
|
||||
{
|
||||
for (PxU32 off = 0; off < offCount; off++)
|
||||
{
|
||||
const PxU32 index = indices[i + off] + elem;
|
||||
|
||||
const PxU32 writeIndex = (i + off + elem * numElements);
|
||||
|
||||
PxgMat33Block& block = blockRestPoses[writeIndex / 32];
|
||||
PxU32 writeOffset = writeIndex & 31;
|
||||
|
||||
PxMat33& mat = tetRestPoses[index];
|
||||
//PxMat33& mat = tetRestPoses[i + off];
|
||||
block.mCol0[writeOffset].x = mat.column0.x;
|
||||
block.mCol0[writeOffset].y = mat.column0.y;
|
||||
block.mCol0[writeOffset].z = mat.column0.z;
|
||||
block.mCol0[writeOffset].w = mat.column1.x;
|
||||
block.mCol1[writeOffset].x = mat.column1.y;
|
||||
block.mCol1[writeOffset].y = mat.column1.z;
|
||||
block.mCol1[writeOffset].z = mat.column2.x;
|
||||
block.mCol1[writeOffset].w = mat.column2.y;
|
||||
block.mCol2[writeOffset] = mat.column2.z;
|
||||
|
||||
PxU16 localIndex = 0;
|
||||
if (materialIndices)
|
||||
localIndex = materialIndices[index];
|
||||
|
||||
destOrderedMaterialIndices[writeIndex] = materialHandles[localIndex];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (materialIndices)
|
||||
{
|
||||
for (PxU32 i = 0; i < numTetsGM; ++i)
|
||||
{
|
||||
const PxU16 localIndex = materialIndices[i];
|
||||
destMaterialIndices[i] = materialHandles[localIndex];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (PxU32 i = 0; i < numTetsGM; ++i)
|
||||
{
|
||||
destMaterialIndices[i] = materialHandles[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*static void copyTetRemaps(uint4* dstRemaps, PxU32* srcRemaps, PxU32 numTets)
|
||||
{
|
||||
for (PxU32 i = 0; i < numTets; ++i)
|
||||
{
|
||||
dstRemaps[i].x = srcRemaps[i];
|
||||
dstRemaps[i].y = srcRemaps[i+numTets];
|
||||
dstRemaps[i].z = srcRemaps[i+2*numTets];
|
||||
dstRemaps[i].w = srcRemaps[i+3*numTets];
|
||||
}
|
||||
}*/
|
||||
|
||||
PxU32 PxgSoftBodyUtil::loadOutTetMesh(void* mem, const Gu::BVTetrahedronMesh* tetMesh)
|
||||
{
|
||||
const PxU32 numTets = tetMesh->getNbTetrahedronsFast();
|
||||
const PxU32 numVerts = tetMesh->getNbVerticesFast();
|
||||
//const PxU32 numSurfaceTriangles = tetMesh->getNbTrianglesFast();
|
||||
|
||||
Gu::BV32Tree* bv32Tree = tetMesh->mGRB_BV32Tree;
|
||||
|
||||
PxU8* m = (PxU8*)mem;
|
||||
*((uint4*)m) = make_uint4(numVerts, numTets, bv32Tree->mMaxTreeDepth, bv32Tree->mNbPackedNodes);
|
||||
m += sizeof(uint4);
|
||||
|
||||
//Midphase
|
||||
PxMemCopy(m, bv32Tree->mPackedNodes, sizeof(Gu::BV32DataPacked) * bv32Tree->mNbPackedNodes);
|
||||
m += sizeof(Gu::BV32DataPacked) * bv32Tree->mNbPackedNodes;
|
||||
|
||||
PX_ASSERT(bv32Tree->mNbPackedNodes > 0);
|
||||
|
||||
PxMemCopy(m, bv32Tree->mTreeDepthInfo, sizeof(Gu::BV32DataDepthInfo) * bv32Tree->mMaxTreeDepth);
|
||||
m += sizeof(Gu::BV32DataDepthInfo) * bv32Tree->mMaxTreeDepth;
|
||||
|
||||
PxMemCopy(m, bv32Tree->mRemapPackedNodeIndexWithDepth, sizeof(PxU32) * bv32Tree->mNbPackedNodes);
|
||||
m += sizeof(PxU32) * bv32Tree->mNbPackedNodes;
|
||||
|
||||
/*PxMemCopy(m, tetMesh->mGRB_tetraSurfaceHint, sizeof(PxU8) * numTets);
|
||||
m += numTets * sizeof(PxU8);
|
||||
*/
|
||||
//GPU to CPU remap table
|
||||
//PxMemCopy(m, tetMesh->mGRB_faceRemap, numTets * sizeof(PxU32));
|
||||
|
||||
#if 0
|
||||
PxArray<PxBounds3> bounds(bv32Tree->mNbPackedNodes);
|
||||
const PxReal eps = 1e-6f;
|
||||
const PxReal contactOffset = 0.02f;
|
||||
const PxReal epsilon = 0.000199999995f;//we inflated the extents with epsilon in the cooking
|
||||
|
||||
for (PxU32 i = bv32Tree->mMaxTreeDepth; i > 0; i--)
|
||||
{
|
||||
const PxU32 iOffset = bv32Tree->mTreeDepthInfo[i - 1].offset;
|
||||
const PxU32 iCount = bv32Tree->mTreeDepthInfo[i - 1].count;
|
||||
PxU32* iRempapNodeIndex = &bv32Tree->mRemapPackedNodeIndexWithDepth[iOffset];
|
||||
|
||||
for (PxU32 j = 0; j < iCount; ++j)
|
||||
{
|
||||
const PxU32 nodeIndex = iRempapNodeIndex[j];
|
||||
Gu::BV32DataPacked& currentNode = bv32Tree->mPackedNodes[nodeIndex];
|
||||
PX_ASSERT(currentNode.mDepth == i - 1);
|
||||
|
||||
PxVec3 min(PX_MAX_F32);
|
||||
PxVec3 max(-PX_MAX_F32);
|
||||
|
||||
for (PxU32 k = 0; k < currentNode.mNbNodes; ++k)
|
||||
{
|
||||
if (currentNode.isLeaf(k))
|
||||
{
|
||||
PxU32 numPrimitives = currentNode.getNbReferencedPrimitives(k);
|
||||
PxU32 startIndex = currentNode.getPrimitiveStartIndex(k);
|
||||
|
||||
PX_ASSERT(numPrimitives <= 32);
|
||||
|
||||
PxVec3 curMin(PX_MAX_F32);
|
||||
PxVec3 curMax(-PX_MAX_F32);
|
||||
for (PxU32 l = 0; l < numPrimitives; ++l)
|
||||
{
|
||||
const PxU32 index = l + startIndex;
|
||||
|
||||
uint4 tetIndex = grbTetInd[index];
|
||||
const PxVec3 worldV0 = verts[tetIndex.x];
|
||||
const PxVec3 worldV1 = verts[tetIndex.y];
|
||||
const PxVec3 worldV2 = verts[tetIndex.z];
|
||||
const PxVec3 worldV3 = verts[tetIndex.w];
|
||||
|
||||
PxReal tMinX0 = PxMin(worldV0.x, worldV1.x);
|
||||
PxReal tMinY0 = PxMin(worldV0.y, worldV1.y);
|
||||
PxReal tMinZ0 = PxMin(worldV0.z, worldV1.z);
|
||||
|
||||
PxReal tMinX1 = PxMin(worldV2.x, worldV3.x);
|
||||
PxReal tMinY1 = PxMin(worldV2.y, worldV3.y);
|
||||
PxReal tMinZ1 = PxMin(worldV2.z, worldV3.z);
|
||||
|
||||
tMinX1 = PxMin(tMinX0, tMinX1);
|
||||
tMinY1 = PxMin(tMinY0, tMinY1);
|
||||
tMinZ1 = PxMin(tMinZ0, tMinZ1);
|
||||
|
||||
curMin.x = PxMin(tMinX1, curMin.x);
|
||||
curMin.y = PxMin(tMinY1, curMin.y);
|
||||
curMin.z = PxMin(tMinZ1, curMin.z);
|
||||
|
||||
//compute max
|
||||
tMinX0 = PxMax(worldV0.x, worldV1.x);
|
||||
tMinY0 = PxMax(worldV0.y, worldV1.y);
|
||||
tMinZ0 = PxMax(worldV0.z, worldV1.z);
|
||||
|
||||
tMinX1 = PxMax(worldV2.x, worldV3.x);
|
||||
tMinY1 = PxMax(worldV2.y, worldV3.y);
|
||||
tMinZ1 = PxMax(worldV2.z, worldV3.z);
|
||||
|
||||
tMinX1 = PxMax(tMinX0, tMinX1);
|
||||
tMinY1 = PxMax(tMinY0, tMinY1);
|
||||
tMinZ1 = PxMax(tMinZ0, tMinZ1);
|
||||
|
||||
curMax.x = PxMax(tMinX1, curMax.x);
|
||||
curMax.y = PxMax(tMinY1, curMax.y);
|
||||
curMax.z = PxMax(tMinZ1, curMax.z);
|
||||
}
|
||||
|
||||
min.x = PxMin(min.x, curMin.x);
|
||||
min.y = PxMin(min.y, curMin.y);
|
||||
min.z = PxMin(min.z, curMin.z);
|
||||
|
||||
max.x = PxMax(max.x, curMax.x);
|
||||
max.y = PxMax(max.y, curMax.y);
|
||||
max.z = PxMax(max.z, curMax.z);
|
||||
|
||||
const PxVec4 tempMin = currentNode.mMin[k];
|
||||
const PxVec4 tempMax = currentNode.mMax[k];
|
||||
|
||||
PxVec3 rMin(tempMin.x, tempMin.y, tempMin.z);
|
||||
PxVec3 rMax(tempMax.x, tempMax.y, tempMax.z);
|
||||
|
||||
const PxVec3 difMin = curMin - rMin;
|
||||
const PxVec3 difMax = rMax - curMax;
|
||||
|
||||
PX_UNUSED(difMin);
|
||||
PX_UNUSED(difMax);
|
||||
|
||||
PX_ASSERT(PxAbs(difMin.x - epsilon) < eps && PxAbs(difMin.y - epsilon) < eps && PxAbs(difMin.z - epsilon) < eps);
|
||||
PX_ASSERT(PxAbs(difMax.x - epsilon) < eps && PxAbs(difMax.y - epsilon) < eps && PxAbs(difMax.z - epsilon) < eps);
|
||||
}
|
||||
else
|
||||
{
|
||||
const PxU32 childOffset = currentNode.getChildOffset(k);
|
||||
|
||||
min.x = PxMin(bounds[childOffset].minimum.x, min.x);
|
||||
min.y = PxMin(bounds[childOffset].minimum.y, min.y);
|
||||
min.z = PxMin(bounds[childOffset].minimum.z, min.z);
|
||||
|
||||
max.x = PxMax(bounds[childOffset].maximum.x, max.x);
|
||||
max.y = PxMax(bounds[childOffset].maximum.y, max.y);
|
||||
max.z = PxMax(bounds[childOffset].maximum.z, max.z);
|
||||
}
|
||||
}
|
||||
|
||||
bounds[nodeIndex].minimum = min;
|
||||
bounds[nodeIndex].maximum = max;
|
||||
}
|
||||
}
|
||||
|
||||
bounds[0].minimum.x -= contactOffset;
|
||||
bounds[0].minimum.y -= contactOffset;
|
||||
bounds[0].minimum.z -= contactOffset;
|
||||
|
||||
bounds[0].maximum.x += contactOffset;
|
||||
bounds[0].maximum.y += contactOffset;
|
||||
bounds[0].maximum.z += contactOffset;
|
||||
#endif
|
||||
|
||||
return bv32Tree->mNbPackedNodes;
|
||||
}
|
||||
|
||||
void PxgSoftBodyUtil::initialTetData(PxgSoftBody& softbody, const Gu::BVTetrahedronMesh* colTetMesh,
|
||||
const Gu::TetrahedronMesh* simTetMesh, const Gu::DeformableVolumeAuxData* softBodyAuxData, const PxU16* materialsHandles,
|
||||
PxsHeapMemoryAllocator* alloc)
|
||||
{
|
||||
const PxU32 numTets = colTetMesh->getNbTetrahedronsFast();
|
||||
uint4* tetIndices = softbody.mTetIndices;
|
||||
|
||||
const PxU32 numTetsGM = simTetMesh->getNbTetrahedronsFast();
|
||||
uint4* tetIndicesGM = softbody.mSimTetIndices;
|
||||
|
||||
PxMat33* tetRestPoses = softbody.mTetraRestPoses;
|
||||
|
||||
const PxU32 numTetsPerElement = softBodyAuxData->mNumTetsPerElement;
|
||||
const PxU32 numElements = numTetsGM / numTetsPerElement;
|
||||
const PxU32 numVertsPerElement = numTetsPerElement == 1 ? 4 : 8;
|
||||
|
||||
//copy tetrahedron indices
|
||||
if (colTetMesh->has16BitIndices())
|
||||
{
|
||||
const PxU16* tetInds = reinterpret_cast<PxU16*>(colTetMesh->mGRB_tetraIndices);
|
||||
for (PxU32 i = 0; i < numTets; ++i)
|
||||
{
|
||||
tetIndices[i].x = tetInds[4 * i + 0];
|
||||
tetIndices[i].y = tetInds[4 * i + 1];
|
||||
tetIndices[i].z = tetInds[4 * i + 2];
|
||||
tetIndices[i].w = tetInds[4 * i + 3];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const PxU32* tetInds = reinterpret_cast<PxU32*>(colTetMesh->mGRB_tetraIndices);
|
||||
for (PxU32 i = 0; i < numTets; ++i)
|
||||
{
|
||||
tetIndices[i].x = tetInds[4 * i + 0];
|
||||
tetIndices[i].y = tetInds[4 * i + 1];
|
||||
tetIndices[i].z = tetInds[4 * i + 2];
|
||||
tetIndices[i].w = tetInds[4 * i + 3];
|
||||
}
|
||||
}
|
||||
for (PxU32 i = 0; i < numTets; ++i)
|
||||
{
|
||||
tetRestPoses[i] = softBodyAuxData->mTetraRestPoses[i];
|
||||
}
|
||||
|
||||
if (simTetMesh->has16BitIndices())
|
||||
{
|
||||
const PxU16* tetIndsGM = reinterpret_cast<const PxU16*>(simTetMesh->getTetrahedrons());
|
||||
for (PxU32 i = 0; i < numTetsGM; ++i)
|
||||
{
|
||||
tetIndicesGM[i].x = tetIndsGM[4 * i + 0];
|
||||
tetIndicesGM[i].y = tetIndsGM[4 * i + 1];
|
||||
tetIndicesGM[i].z = tetIndsGM[4 * i + 2];
|
||||
tetIndicesGM[i].w = tetIndsGM[4 * i + 3];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const PxU32* tetIndsGM = reinterpret_cast<const PxU32*>(simTetMesh->getTetrahedrons());
|
||||
for (PxU32 i = 0; i < numTetsGM; ++i)
|
||||
{
|
||||
tetIndicesGM[i].x = tetIndsGM[4 * i + 0];
|
||||
tetIndicesGM[i].y = tetIndsGM[4 * i + 1];
|
||||
tetIndicesGM[i].z = tetIndsGM[4 * i + 2];
|
||||
tetIndicesGM[i].w = tetIndsGM[4 * i + 3];
|
||||
}
|
||||
}
|
||||
|
||||
const PxU32 numVerts = colTetMesh->getNbVerticesFast();
|
||||
|
||||
PxMemCopy(softbody.mTetMeshSurfaceHint, colTetMesh->mGRB_tetraSurfaceHint, sizeof(PxU8) * numTets);
|
||||
|
||||
PxMemCopy(softbody.mTetIndicesRemapTable, colTetMesh->mGRB_faceRemap, sizeof(PxU32) * numTets);
|
||||
|
||||
const PxU32 numVertsGM = simTetMesh->getNbVerticesFast();
|
||||
|
||||
//PxMemCopy(softbody.mMaterialIndices, simTetMesh->mMaterialIndices, sizeof(PxU16) * numTetsGM);
|
||||
|
||||
copyTetraRestPoses(softbody.mOrderedMaterialIndices, softbody.mMaterialIndices, softbody.mSimTetraRestPoses, softBodyAuxData->mGridModelTetraRestPoses, softBodyAuxData->mGridModelOrderedTetrahedrons, simTetMesh->mMaterialIndices,
|
||||
numTetsGM, softBodyAuxData->mNumTetsPerElement, materialsHandles);
|
||||
PxMemCopy(softbody.mSimOrderedTetrahedrons, softBodyAuxData->mGridModelOrderedTetrahedrons, sizeof(PxU32) * numElements);
|
||||
|
||||
if (softBodyAuxData->mGMRemapOutputSize) // tet mesh only (or old hex mesh)
|
||||
{
|
||||
PxMemCopy(softbody.mSimRemapOutputCP, softBodyAuxData->mGMRemapOutputCP, sizeof(PxU32) * numElements * numVertsPerElement);
|
||||
PxMemCopy(softbody.mSimAccumulatedCopiesCP, softBodyAuxData->mGMAccumulatedCopiesCP, sizeof(PxU32) * numVertsGM);
|
||||
}
|
||||
|
||||
PxMemCopy(softbody.mSimAccumulatedPartitionsCP, softBodyAuxData->mGMAccumulatedPartitionsCP, sizeof(PxU32) * softBodyAuxData->getNbGMPartitionFast());
|
||||
PxMemCopy(softbody.mSimPullIndices, softBodyAuxData->mGMPullIndices, sizeof(PxU32) * numElements * numVertsPerElement);
|
||||
PxMemCopy(softbody.mVertsBarycentricInGridModel, softBodyAuxData->mVertsBarycentricInGridModel, sizeof(float4) * numVerts);
|
||||
PxMemCopy(softbody.mVertsRemapInGridModel, softBodyAuxData->mVertsRemapInGridModel, sizeof(PxU32) * numVerts);
|
||||
PxMemCopy(softbody.mTetsRemapColToSim, softBodyAuxData->mTetsRemapColToSim, sizeof(PxU32) * softBodyAuxData->getNbTetRemapSizeFast());
|
||||
PxMemCopy(softbody.mTetsAccumulatedRemapColToSim, softBodyAuxData->mTetsAccumulatedRemapColToSim, sizeof(PxU32) * numTets);
|
||||
|
||||
PxMemCopy(softbody.mSurfaceVertsHint, softBodyAuxData->mCollisionSurfaceVertsHint, sizeof(PxU8) * numVerts);
|
||||
|
||||
PxMemCopy(softbody.mSurfaceVertToTetRemap, softBodyAuxData->mCollisionSurfaceVertToTetRemap, sizeof(PxU32) * numVerts);
|
||||
|
||||
softbody.mNumTetsPerElement = softBodyAuxData->mNumTetsPerElement;
|
||||
softbody.mJacobiScale = 1.f;
|
||||
softbody.mNumJacobiVertices = 0;
|
||||
const PxU32 PULL_IND_MASK = 0x7fffffff;
|
||||
|
||||
// 1. for the extra Jacobi partition of a hex mesh, store the vertices in the partition to avoid running over
|
||||
// entire softbody vertices.
|
||||
// 2. check average or max number of adjacent voxels per vertex. This will be used to scale delta x in
|
||||
// Jacobi-style update while preserving momentum.
|
||||
if(softBodyAuxData->mNumTetsPerElement > 1 && softBodyAuxData->getNbGMPartitionFast() > SB_PARTITION_LIMIT)
|
||||
{
|
||||
const PxU32 startInd = softBodyAuxData->mGMAccumulatedPartitionsCP[SB_PARTITION_LIMIT - 1];
|
||||
const PxU32 endInd = softBodyAuxData->mGMAccumulatedPartitionsCP[SB_PARTITION_LIMIT];
|
||||
|
||||
const PxU32 numVoxelVertices = (endInd - startInd) * 8; // 8 vertices per element
|
||||
PX_ASSERT(endInd > startInd);
|
||||
|
||||
PxHashMap<PxU32, PxU32> numAdjVoxels; // <vertex id, # adjacent voxels>
|
||||
const uint4* pullIndices = softbody.mSimPullIndices;
|
||||
|
||||
PxArray<PxU32> jacobiVertIndices;
|
||||
jacobiVertIndices.reserve(numVoxelVertices);
|
||||
|
||||
PxU32 localIndexCount = 0;
|
||||
PxU32 maxCount = 1;
|
||||
for (PxU32 i = startInd; i < endInd; ++i)
|
||||
{
|
||||
uint4 pullInd[2];
|
||||
pullInd[0] = pullIndices[i];
|
||||
pullInd[1] = pullIndices[i + numElements];
|
||||
PxU32* pullIndPtr = &pullInd[0].x;
|
||||
pullInd[0].x &= PULL_IND_MASK;
|
||||
|
||||
for (PxU32 j = 0; j < 8; ++j)
|
||||
{
|
||||
if (numAdjVoxels.insert(pullIndPtr[j], 1))
|
||||
{
|
||||
++localIndexCount;
|
||||
jacobiVertIndices.pushBack(pullIndPtr[j]);
|
||||
}
|
||||
else
|
||||
{
|
||||
PxU32 count = ++numAdjVoxels[pullIndPtr[j]];
|
||||
maxCount = PxMax(maxCount, count);
|
||||
}
|
||||
}
|
||||
}
|
||||
PX_ASSERT(localIndexCount == numAdjVoxels.size());
|
||||
|
||||
// Jacobi scale can be defined using
|
||||
// 1. # average adjacency
|
||||
// 2. # max adjacency
|
||||
// 3. a magic number (e.g., 0.5)
|
||||
|
||||
// using average adjacency
|
||||
//softbody.mJacobiScale = PxReal(localIndexCount) / PxReal(numVoxelVertices);
|
||||
|
||||
// using max adjacency
|
||||
softbody.mJacobiScale = PxMax(1.f / PxReal(maxCount), 0.2f); // lower limit of 0.2 in case there are too
|
||||
// many duplicated voxels.
|
||||
|
||||
softbody.mNumJacobiVertices = localIndexCount;
|
||||
softbody.mSimJacobiVertIndices = reinterpret_cast<PxU32*>(
|
||||
alloc->allocate(sizeof(PxU32) * localIndexCount, PxsHeapStats::eSIMULATION, PX_FL));
|
||||
PxMemCopy(softbody.mSimJacobiVertIndices, jacobiVertIndices.begin(), sizeof(PxU32) * localIndexCount);
|
||||
|
||||
PX_ASSERT(numVoxelVertices >= localIndexCount);
|
||||
}
|
||||
|
||||
#if 0 // check GS partition validataion
|
||||
|
||||
const PxVec4T<PxU32>* pullIndices = reinterpret_cast<const PxVec4T<PxU32>*>(softBodyAuxData->mGMPullIndices);
|
||||
|
||||
if(softBodyAuxData->mNumTetsPerElement > 1)
|
||||
{
|
||||
PX_ASSERT(softBodyAuxData->getNbGMPartitionFast() <= (PxU32)(SB_PARTITION_LIMIT + 1));
|
||||
PxHashSet<PxU32> voxelIndices; // checking if every voxel is used for simulation, and each voxel is only
|
||||
// used once.
|
||||
voxelIndices.reserve(numElements);
|
||||
|
||||
for(PxU32 j = 0; j < SB_PARTITION_LIMIT; ++j) // GS partition
|
||||
{
|
||||
const PxU32 startInd = j == 0 ? 0 : softBodyAuxData->mGMAccumulatedPartitionsCP[j - 1];
|
||||
const PxU32 endInd = softBodyAuxData->mGMAccumulatedPartitionsCP[j];
|
||||
PX_ASSERT(endInd >= startInd);
|
||||
|
||||
PxHashSet<PxU32> voxelVertIndices; // non-overlapping voxel vertices
|
||||
voxelVertIndices.reserve(numVertsPerElement * (endInd - startInd));
|
||||
|
||||
for(PxU32 elementId = startInd; elementId < endInd; ++elementId)
|
||||
{
|
||||
// for GS partitions, make sure no two vertices are in the same partition.
|
||||
PxVec4T<PxU32> pullInd[2];
|
||||
pullInd[0] = pullIndices[elementId];
|
||||
pullInd[1] = pullIndices[elementId + numElements];
|
||||
|
||||
PxU32* pullIndPtr = &pullInd[0].x;
|
||||
pullInd[0].x &= PULL_IND_MASK;
|
||||
|
||||
for(PxU32 localVertIndex = 0; localVertIndex < 8; ++localVertIndex)
|
||||
{
|
||||
PX_ASSERT(pullIndPtr[localVertIndex] < numVerts);
|
||||
if(!voxelVertIndices.insert(pullIndPtr[localVertIndex]))
|
||||
{
|
||||
printf("Overlapping vertices found in the same partition\n");
|
||||
PX_ASSERT(false);
|
||||
}
|
||||
}
|
||||
|
||||
// make sure each voxel is used only once.
|
||||
if(!voxelIndices.insert(elementId))
|
||||
{
|
||||
printf("Same voxel is used multiple times\n");
|
||||
PX_ASSERT(false);
|
||||
}
|
||||
}
|
||||
|
||||
// for GS partitions, make sure every vertex in a partition is used only once.
|
||||
if(voxelVertIndices.size() != (endInd - startInd) * numVertsPerElement)
|
||||
{
|
||||
printf("Overlapping vertices found in the same partition\n");
|
||||
PX_ASSERT(false);
|
||||
}
|
||||
}
|
||||
|
||||
if(softBodyAuxData->mGMNbPartitions > SB_PARTITION_LIMIT) // jacobi partition
|
||||
{
|
||||
const PxU32 startInd = softBodyAuxData->mGMAccumulatedPartitionsCP[SB_PARTITION_LIMIT - 1];
|
||||
const PxU32 endInd = softBodyAuxData->mGMAccumulatedPartitionsCP[SB_PARTITION_LIMIT];
|
||||
PX_ASSERT(endInd >= startInd);
|
||||
|
||||
PxHashSet<PxU32> voxelVertIndices; // non-overlapping voxel vertices
|
||||
voxelVertIndices.reserve(numVertsPerElement * (endInd - startInd));
|
||||
|
||||
for(PxU32 elementId = startInd; elementId < endInd; ++elementId)
|
||||
{
|
||||
// make sure each voxel is used only once.
|
||||
if(!voxelIndices.insert(elementId))
|
||||
{
|
||||
printf("Same voxel is used multiple times\n");
|
||||
PX_ASSERT(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// make sure each voxel is used only once.
|
||||
if(voxelIndices.size() != numElements)
|
||||
{
|
||||
printf("Same voxel is used multiple times\n");
|
||||
PX_ASSERT(false);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void PxgSoftBodyUtil::computeBasisMatrix(PxMat33* restPoses, const Gu::DeformableVolumeMesh* tetMesh)
|
||||
{
|
||||
const PxVec3* positions = tetMesh->getCollisionMeshFast()->getVerticesFast();
|
||||
const PxU32 numTets = tetMesh->getCollisionMeshFast()->getNbTetrahedronsFast();
|
||||
//copy tetrahedron indices
|
||||
if (tetMesh->getCollisionMeshFast()->has16BitIndices())
|
||||
{
|
||||
const PxU16* tetInds = reinterpret_cast<PxU16*>(tetMesh->getCollisionMeshFast()->mGRB_tetraIndices);
|
||||
for (PxU32 i = 0; i < numTets; ++i)
|
||||
{
|
||||
PxVec3 v0 = positions[tetInds[4 * i + 0]];
|
||||
PxVec3 v1 = positions[tetInds[4 * i + 1]];
|
||||
PxVec3 v2 = positions[tetInds[4 * i + 2]];
|
||||
PxVec3 v3 = positions[tetInds[4 * i + 3]];
|
||||
|
||||
v1 -= v0;
|
||||
v2 -= v0;
|
||||
v3 -= v0;
|
||||
|
||||
PxMat33 D = PxMat33(v1, v2, v3);
|
||||
restPoses[i] = D.getInverse();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const PxU32* tetInds = reinterpret_cast<PxU32*>(tetMesh->getCollisionMeshFast()->mGRB_tetraIndices);
|
||||
for (PxU32 i = 0; i < numTets; ++i)
|
||||
{
|
||||
PxVec3 v0 = positions[tetInds[4 * i + 0]];
|
||||
PxVec3 v1 = positions[tetInds[4 * i + 1]];
|
||||
PxVec3 v2 = positions[tetInds[4 * i + 2]];
|
||||
PxVec3 v3 = positions[tetInds[4 * i + 3]];
|
||||
|
||||
v1 -= v0;
|
||||
v2 -= v0;
|
||||
v3 -= v0;
|
||||
|
||||
PxMat33 D = PxMat33(v1, v2, v3);
|
||||
restPoses[i] = D.getInverse();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PxU32 PxgSoftBody::dataIndexFromFlagDEPRECATED(PxSoftBodyGpuDataFlag::Enum flag)
|
||||
{
|
||||
switch (flag)
|
||||
{
|
||||
case PxSoftBodyGpuDataFlag::eTET_INDICES:
|
||||
return PX_OFFSET_OF_RT(PxgSoftBody, mTetIndices) / sizeof(CUdeviceptr);
|
||||
|
||||
case PxSoftBodyGpuDataFlag::eTET_REST_POSES:
|
||||
return PX_OFFSET_OF_RT(PxgSoftBody, mTetraRestPoses) / sizeof(CUdeviceptr);
|
||||
|
||||
case PxSoftBodyGpuDataFlag::eTET_ROTATIONS:
|
||||
return PX_OFFSET_OF_RT(PxgSoftBody, mTetraRotations) / sizeof(CUdeviceptr);
|
||||
|
||||
case PxSoftBodyGpuDataFlag::eTET_POSITION_INV_MASS:
|
||||
return PX_OFFSET_OF_RT(PxgSoftBody, mPosition_InvMass) / sizeof(CUdeviceptr);
|
||||
|
||||
case PxSoftBodyGpuDataFlag::eSIM_TET_INDICES:
|
||||
return PX_OFFSET_OF_RT(PxgSoftBody, mSimTetIndices) / sizeof(CUdeviceptr);
|
||||
|
||||
case PxSoftBodyGpuDataFlag::eSIM_TET_ROTATIONS:
|
||||
return PX_OFFSET_OF_RT(PxgSoftBody, mSimTetraRotations) / sizeof(CUdeviceptr);
|
||||
|
||||
case PxSoftBodyGpuDataFlag::eSIM_VELOCITY_INV_MASS:
|
||||
return PX_OFFSET_OF_RT(PxgSoftBody, mSimVelocity_InvMass) / sizeof(CUdeviceptr);
|
||||
|
||||
case PxSoftBodyGpuDataFlag::eSIM_POSITION_INV_MASS:
|
||||
return PX_OFFSET_OF_RT(PxgSoftBody, mSimPosition_InvMass) / sizeof(CUdeviceptr);
|
||||
}
|
||||
PX_ASSERT(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void PxgSoftBody::deallocate(PxsHeapMemoryAllocator* allocator)
|
||||
{
|
||||
allocator->deallocate(mTetMeshData);
|
||||
allocator->deallocate(mTetMeshSurfaceHint);
|
||||
allocator->deallocate(mTetIndices);
|
||||
allocator->deallocate(mTetIndicesRemapTable);
|
||||
allocator->deallocate(mTetraRestPoses);
|
||||
allocator->deallocate(mSimTetIndices);
|
||||
allocator->deallocate(mSimTetraRestPoses);
|
||||
allocator->deallocate(mSimOrderedTetrahedrons);
|
||||
allocator->deallocate(mVertsBarycentricInGridModel);
|
||||
allocator->deallocate(mVertsRemapInGridModel);
|
||||
allocator->deallocate(mTetsRemapColToSim);
|
||||
allocator->deallocate(mTetsAccumulatedRemapColToSim);
|
||||
allocator->deallocate(mSurfaceVertsHint);
|
||||
allocator->deallocate(mSurfaceVertToTetRemap);
|
||||
allocator->deallocate(mSimAccumulatedPartitionsCP);
|
||||
allocator->deallocate(mSimPullIndices);
|
||||
allocator->deallocate(mOrderedMaterialIndices);
|
||||
allocator->deallocate(mMaterialIndices);
|
||||
|
||||
if (mNumTetsPerElement == 1) // used for tet mesh only
|
||||
{
|
||||
allocator->deallocate(mSimRemapOutputCP);
|
||||
allocator->deallocate(mSimAccumulatedCopiesCP);
|
||||
}
|
||||
|
||||
if (mNumJacobiVertices) // when Jacobi vertices are used, deallocate mSimJacobiVertIndices.
|
||||
{
|
||||
allocator->deallocate(mSimJacobiVertIndices);
|
||||
}
|
||||
}
|
||||
3192
engine/third_party/physx/source/gpusimulationcontroller/src/PxgSoftBodyCore.cpp
vendored
Normal file
3192
engine/third_party/physx/source/gpusimulationcontroller/src/PxgSoftBodyCore.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
257
engine/third_party/physx/source/gpusimulationcontroller/src/PxgSparseGridStandalone.cpp
vendored
Normal file
257
engine/third_party/physx/source/gpusimulationcontroller/src/PxgSparseGridStandalone.cpp
vendored
Normal file
@@ -0,0 +1,257 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "PxgSparseGridStandalone.h"
|
||||
#include "PxSparseGridParams.h"
|
||||
#include "foundation/PxArray.h"
|
||||
|
||||
#include "PxgCudaMemoryAllocator.h"
|
||||
|
||||
#include "PxPhysXGpu.h"
|
||||
#include "PxvGlobals.h"
|
||||
#include "PxgKernelWrangler.h"
|
||||
#include "PxgKernelIndices.h"
|
||||
#include "PxgKernelLauncher.h"
|
||||
#include "PxgCudaMemoryAllocator.h"
|
||||
#include "PxgCudaHelpers.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
#if ENABLE_KERNEL_LAUNCH_ERROR_CHECK
|
||||
#define checkCudaError() { cudaError_t err = cudaDeviceSynchronize(); if (err != 0) printf("Cuda error file: %s, line: %i, error: %i\n", PX_FL, err); }
|
||||
#else
|
||||
#define checkCudaError() { }
|
||||
#endif
|
||||
|
||||
#define THREADS_PER_BLOCK 256
|
||||
|
||||
void sparseGridReuseSubgrids(PxgKernelLauncher& launcher, const PxSparseGridParams& sparseGridParams,
|
||||
const PxU32* uniqueHashkeysPerSubgridPreviousUpdate, const PxU32* numActiveSubgridsPreviousUpdate, PxU32* subgridOrderMapPreviousUpdate,
|
||||
const PxU32* uniqueHashkeysPerSubgrid, const PxU32* numActiveSubgrids, PxU32* subgridOrderMap,
|
||||
CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (sparseGridParams.maxNumSubgrids + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::sg_ReuseSubgrids, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
sparseGridParams, uniqueHashkeysPerSubgridPreviousUpdate, numActiveSubgridsPreviousUpdate, subgridOrderMapPreviousUpdate,
|
||||
uniqueHashkeysPerSubgrid, numActiveSubgrids, subgridOrderMap);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
|
||||
void sparseGridAddReleasedSubgridsToUnusedStack(PxgKernelLauncher& launcher, const PxSparseGridParams& sparseGridParams,
|
||||
const PxU32* numActiveSubgridsPreviousUpdate, const PxU32* subgridOrderMapPreviousUpdate,
|
||||
PxU32* unusedSubgridStackSize, PxU32* unusedSubgridStack, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (sparseGridParams.maxNumSubgrids + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::sg_AddReleasedSubgridsToUnusedStack, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
numActiveSubgridsPreviousUpdate, subgridOrderMapPreviousUpdate, unusedSubgridStackSize, unusedSubgridStack);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void sparseGridAllocateNewSubgrids(PxgKernelLauncher& launcher, const PxSparseGridParams& sparseGridParams, const PxU32* numActiveSubgrids, PxU32* subgridOrderMap,
|
||||
PxU32* unusedSubgridStackSize, PxU32* unusedSubgridStack, const PxU32* numActiveSubgridsPreviousUpdate, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (sparseGridParams.maxNumSubgrids + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::sg_AllocateNewSubgrids, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
numActiveSubgrids, subgridOrderMap, unusedSubgridStackSize, unusedSubgridStack, numActiveSubgridsPreviousUpdate, sparseGridParams.maxNumSubgrids);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
|
||||
void sparseGridCalcSubgridHashes(PxgKernelLauncher& launcher, const PxSparseGridParams& sparseGridParams, PxU32* indices,
|
||||
PxU32* hashkeyPerParticle, PxVec4* deviceParticlePos, const int numParticles,
|
||||
const PxU32* phases, const PxU32 validPhaseMask, CUstream stream, const PxU32* activeIndices = NULL)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (numParticles + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::sg_SparseGridCalcSubgridHashes, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
sparseGridParams, indices, hashkeyPerParticle, deviceParticlePos,
|
||||
numParticles, phases, validPhaseMask, activeIndices);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
|
||||
void sparseGridMarkRequiredNeighbors(PxgKernelLauncher& launcher, PxU32* outRequiredNeighborMask, PxU32* uniqueSortedHashkey, const PxSparseGridParams sparseGridParams, PxU32 neighborhoodSize,
|
||||
PxVec4* particlePositions, const PxU32 numParticles, const PxU32* phases, const PxU32 validPhaseMask, CUstream stream, const PxU32* activeIndices = NULL)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (numParticles + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::sg_SparseGridMarkRequiredNeighbors, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
outRequiredNeighborMask, uniqueSortedHashkey, sparseGridParams, neighborhoodSize, particlePositions,
|
||||
numParticles, phases, validPhaseMask, activeIndices);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
|
||||
void sparseGridSortedArrayToDelta(PxgKernelLauncher& launcher, const PxU32* in, const PxU32* mask, PxU32* out, PxU32 n, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (n + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::sg_SparseGridSortedArrayToDelta, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
in, mask, out, n);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void sparseGridGetUniqueValues(PxgKernelLauncher& launcher, const PxU32* sortedData, const PxU32* indices, PxU32* uniqueValues,
|
||||
const PxU32 n, PxU32* subgridNeighborCollector, const PxU32 uniqueValuesSize, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (n + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::sg_SparseGridGetUniqueValues, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
sortedData, indices, uniqueValues, n, subgridNeighborCollector, uniqueValuesSize);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void sparseGridBuildSubgridNeighbors(PxgKernelLauncher& launcher, const PxU32* uniqueSortedHashkey, const PxU32* numActiveSubgrids,
|
||||
const PxU32 maxNumSubgrids, PxU32* subgridNeighbors, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (maxNumSubgrids + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::sg_SparseGridBuildSubgridNeighbors, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
uniqueSortedHashkey, numActiveSubgrids, maxNumSubgrids, subgridNeighbors);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
|
||||
void sparseGridMarkSubgridEndIndicesLaunch(PxgKernelLauncher& launcher, const PxU32* sortedParticleToSubgrid, PxU32 numParticles, PxU32* subgridEndIndices, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (numParticles + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::sg_MarkSubgridEndIndices, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
sortedParticleToSubgrid, numParticles, subgridEndIndices);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void PxSparseGridBuilder::initialize(PxgKernelLauncher* kernelLauncher, const PxSparseGridParams& sparseGridParams, PxU32 maxNumParticles, PxU32 neighborhoodSize, bool trackParticleOrder)
|
||||
{
|
||||
mMaxParticles = maxNumParticles;
|
||||
|
||||
mKernelLauncher = kernelLauncher;
|
||||
mSparseGridParams = sparseGridParams;
|
||||
mNeighborhoodSize = neighborhoodSize;
|
||||
mTrackParticleOrder = trackParticleOrder;
|
||||
|
||||
mScan.initialize(kernelLauncher, maxNumParticles);
|
||||
mSort.initialize(kernelLauncher, maxNumParticles);
|
||||
mHashkeyPerParticle = PX_DEVICE_MEMORY_ALLOC(PxU32, *kernelLauncher->getCudaContextManager(), maxNumParticles);
|
||||
mSortedParticleToSubgrid = PX_DEVICE_MEMORY_ALLOC(PxU32, *kernelLauncher->getCudaContextManager(), maxNumParticles);
|
||||
mSortedUniqueHashkeysPerSubgrid = PX_DEVICE_MEMORY_ALLOC(PxU32, *kernelLauncher->getCudaContextManager(), sparseGridParams.maxNumSubgrids);
|
||||
mSubgridNeighborLookup = PX_DEVICE_MEMORY_ALLOC(PxU32, *kernelLauncher->getCudaContextManager(), 27 * sparseGridParams.maxNumSubgrids);
|
||||
|
||||
if (trackParticleOrder)
|
||||
mSortedToOriginalParticleIndex = PX_DEVICE_MEMORY_ALLOC(PxU32, *kernelLauncher->getCudaContextManager(), maxNumParticles);
|
||||
|
||||
if (mNeighborhoodSize > 0)
|
||||
{
|
||||
mScanNeighbors.initialize(kernelLauncher, 27 * sparseGridParams.maxNumSubgrids);
|
||||
mNeighborSort.initialize(kernelLauncher, 27 * sparseGridParams.maxNumSubgrids);
|
||||
mNeighborCollector = PX_DEVICE_MEMORY_ALLOC(PxU32, *kernelLauncher->getCudaContextManager(), 27 * sparseGridParams.maxNumSubgrids);
|
||||
mRequiredNeighborMask = PX_DEVICE_MEMORY_ALLOC(PxU32, *kernelLauncher->getCudaContextManager(), 27 * sparseGridParams.maxNumSubgrids);
|
||||
}
|
||||
}
|
||||
|
||||
void PxSparseGridBuilder::release()
|
||||
{
|
||||
if (!mHashkeyPerParticle)
|
||||
return;
|
||||
|
||||
mScan.release();
|
||||
mSort.release();
|
||||
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher->getCudaContextManager(), mHashkeyPerParticle);
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher->getCudaContextManager(), mSortedParticleToSubgrid);
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher->getCudaContextManager(), mSortedUniqueHashkeysPerSubgrid);
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher->getCudaContextManager(), mSubgridNeighborLookup);
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher->getCudaContextManager(), mSortedToOriginalParticleIndex);
|
||||
|
||||
if (mNeighborhoodSize > 0)
|
||||
{
|
||||
mScanNeighbors.release();
|
||||
mNeighborSort.release();
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher->getCudaContextManager(), mNeighborCollector);
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher->getCudaContextManager(), mRequiredNeighborMask);
|
||||
}
|
||||
}
|
||||
|
||||
void PxSparseGridBuilder::updateSubgridEndIndices(PxU32 numParticles, CUstream stream)
|
||||
{
|
||||
//Hijack a buffer that is not accessed after the subgrid update with exactly the right size
|
||||
PxU32* subgridEndIndicesBuffer = mSortedUniqueHashkeysPerSubgrid;
|
||||
sparseGridMarkSubgridEndIndicesLaunch(*mKernelLauncher, mSortedParticleToSubgrid, numParticles, subgridEndIndicesBuffer, stream);
|
||||
}
|
||||
|
||||
PxU32* PxSparseGridBuilder::updateSubgrids(PxVec4* deviceParticlePos, PxU32 numParticles, PxU32* devicePhases, CUstream stream, PxU32 validPhase, const PxU32* activeIndices)
|
||||
{
|
||||
sparseGridCalcSubgridHashes(*mKernelLauncher, mSparseGridParams, mSortedParticleToSubgrid, mHashkeyPerParticle, deviceParticlePos, numParticles, devicePhases, validPhase, stream, activeIndices);
|
||||
mSort.sort(mHashkeyPerParticle, 32, stream, mSortedToOriginalParticleIndex, numParticles);
|
||||
|
||||
sparseGridSortedArrayToDelta(*mKernelLauncher, mHashkeyPerParticle, NULL, mSortedParticleToSubgrid, numParticles, stream);
|
||||
mScan.exclusiveScan(mSortedParticleToSubgrid, stream, numParticles);
|
||||
|
||||
PxU32* totalCountPointer;
|
||||
if (mNeighborhoodSize > 0)
|
||||
{
|
||||
totalCountPointer = mScanNeighbors.getSumPointer();
|
||||
|
||||
sparseGridGetUniqueValues(*mKernelLauncher, mHashkeyPerParticle, mSortedParticleToSubgrid, mSortedUniqueHashkeysPerSubgrid, numParticles, mNeighborCollector, mSparseGridParams.maxNumSubgrids, stream);
|
||||
mNeighborSort.sort(mNeighborCollector, 32, stream);
|
||||
|
||||
PxgCudaHelpers::memsetAsync(*mKernelLauncher->getCudaContextManager(), mRequiredNeighborMask, PxU32(0), 27 * mSparseGridParams.maxNumSubgrids, stream);
|
||||
sparseGridMarkRequiredNeighbors(*mKernelLauncher, mRequiredNeighborMask, mNeighborCollector, mSparseGridParams, mNeighborhoodSize, deviceParticlePos, numParticles, devicePhases, validPhase, stream, activeIndices);
|
||||
|
||||
PxU32* tmpBuffer = mSubgridNeighborLookup; //This memory is only used temporary and populated later. It is always large enough to hold the temporary data.
|
||||
sparseGridSortedArrayToDelta(*mKernelLauncher, mNeighborCollector, mRequiredNeighborMask, tmpBuffer, 27 * mSparseGridParams.maxNumSubgrids, stream);
|
||||
|
||||
mScanNeighbors.exclusiveScan(tmpBuffer, stream);
|
||||
|
||||
sparseGridGetUniqueValues(*mKernelLauncher, mNeighborCollector, tmpBuffer, mSortedUniqueHashkeysPerSubgrid, 27 * mSparseGridParams.maxNumSubgrids, NULL, mSparseGridParams.maxNumSubgrids, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
totalCountPointer = mScan.getSumPointer();
|
||||
sparseGridGetUniqueValues(*mKernelLauncher, mHashkeyPerParticle, mSortedParticleToSubgrid, mSortedUniqueHashkeysPerSubgrid, numParticles, NULL, mSparseGridParams.maxNumSubgrids, stream);
|
||||
}
|
||||
return totalCountPointer;
|
||||
}
|
||||
|
||||
void PxSparseGridBuilder::updateSubgridNeighbors(PxU32* totalCountPointer, CUstream stream)
|
||||
{
|
||||
sparseGridBuildSubgridNeighbors(*mKernelLauncher, mSortedUniqueHashkeysPerSubgrid, totalCountPointer, mSparseGridParams.maxNumSubgrids, mSubgridNeighborLookup, stream);
|
||||
if (mCopySubgridsInUseToHost)
|
||||
mKernelLauncher->getCudaContextManager()->getCudaContext()->memcpyDtoHAsync(&mNumSubgridsInUse, CUdeviceptr(totalCountPointer), sizeof(PxU32), stream);
|
||||
}
|
||||
|
||||
void PxSparseGridBuilder::updateSparseGrid(PxVec4* deviceParticlePos, const PxU32 numParticles, PxU32* devicePhases, CUstream stream, PxU32 validPhase, const PxU32* activeIndices)
|
||||
{
|
||||
PxU32* totalCountPointer = updateSubgrids(deviceParticlePos, numParticles, devicePhases, stream, validPhase, activeIndices);
|
||||
updateSubgridNeighbors(totalCountPointer, stream);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user