feat(physics): wire physx sdk into build
This commit is contained in:
257
engine/third_party/physx/source/gpusimulationcontroller/src/PxgSparseGridStandalone.cpp
vendored
Normal file
257
engine/third_party/physx/source/gpusimulationcontroller/src/PxgSparseGridStandalone.cpp
vendored
Normal file
@@ -0,0 +1,257 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "PxgSparseGridStandalone.h"
|
||||
#include "PxSparseGridParams.h"
|
||||
#include "foundation/PxArray.h"
|
||||
|
||||
#include "PxgCudaMemoryAllocator.h"
|
||||
|
||||
#include "PxPhysXGpu.h"
|
||||
#include "PxvGlobals.h"
|
||||
#include "PxgKernelWrangler.h"
|
||||
#include "PxgKernelIndices.h"
|
||||
#include "PxgKernelLauncher.h"
|
||||
#include "PxgCudaMemoryAllocator.h"
|
||||
#include "PxgCudaHelpers.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
#if ENABLE_KERNEL_LAUNCH_ERROR_CHECK
|
||||
#define checkCudaError() { cudaError_t err = cudaDeviceSynchronize(); if (err != 0) printf("Cuda error file: %s, line: %i, error: %i\n", PX_FL, err); }
|
||||
#else
|
||||
#define checkCudaError() { }
|
||||
#endif
|
||||
|
||||
#define THREADS_PER_BLOCK 256
|
||||
|
||||
void sparseGridReuseSubgrids(PxgKernelLauncher& launcher, const PxSparseGridParams& sparseGridParams,
|
||||
const PxU32* uniqueHashkeysPerSubgridPreviousUpdate, const PxU32* numActiveSubgridsPreviousUpdate, PxU32* subgridOrderMapPreviousUpdate,
|
||||
const PxU32* uniqueHashkeysPerSubgrid, const PxU32* numActiveSubgrids, PxU32* subgridOrderMap,
|
||||
CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (sparseGridParams.maxNumSubgrids + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::sg_ReuseSubgrids, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
sparseGridParams, uniqueHashkeysPerSubgridPreviousUpdate, numActiveSubgridsPreviousUpdate, subgridOrderMapPreviousUpdate,
|
||||
uniqueHashkeysPerSubgrid, numActiveSubgrids, subgridOrderMap);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
|
||||
void sparseGridAddReleasedSubgridsToUnusedStack(PxgKernelLauncher& launcher, const PxSparseGridParams& sparseGridParams,
|
||||
const PxU32* numActiveSubgridsPreviousUpdate, const PxU32* subgridOrderMapPreviousUpdate,
|
||||
PxU32* unusedSubgridStackSize, PxU32* unusedSubgridStack, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (sparseGridParams.maxNumSubgrids + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::sg_AddReleasedSubgridsToUnusedStack, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
numActiveSubgridsPreviousUpdate, subgridOrderMapPreviousUpdate, unusedSubgridStackSize, unusedSubgridStack);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void sparseGridAllocateNewSubgrids(PxgKernelLauncher& launcher, const PxSparseGridParams& sparseGridParams, const PxU32* numActiveSubgrids, PxU32* subgridOrderMap,
|
||||
PxU32* unusedSubgridStackSize, PxU32* unusedSubgridStack, const PxU32* numActiveSubgridsPreviousUpdate, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (sparseGridParams.maxNumSubgrids + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::sg_AllocateNewSubgrids, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
numActiveSubgrids, subgridOrderMap, unusedSubgridStackSize, unusedSubgridStack, numActiveSubgridsPreviousUpdate, sparseGridParams.maxNumSubgrids);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
|
||||
void sparseGridCalcSubgridHashes(PxgKernelLauncher& launcher, const PxSparseGridParams& sparseGridParams, PxU32* indices,
|
||||
PxU32* hashkeyPerParticle, PxVec4* deviceParticlePos, const int numParticles,
|
||||
const PxU32* phases, const PxU32 validPhaseMask, CUstream stream, const PxU32* activeIndices = NULL)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (numParticles + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::sg_SparseGridCalcSubgridHashes, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
sparseGridParams, indices, hashkeyPerParticle, deviceParticlePos,
|
||||
numParticles, phases, validPhaseMask, activeIndices);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
|
||||
void sparseGridMarkRequiredNeighbors(PxgKernelLauncher& launcher, PxU32* outRequiredNeighborMask, PxU32* uniqueSortedHashkey, const PxSparseGridParams sparseGridParams, PxU32 neighborhoodSize,
|
||||
PxVec4* particlePositions, const PxU32 numParticles, const PxU32* phases, const PxU32 validPhaseMask, CUstream stream, const PxU32* activeIndices = NULL)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (numParticles + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::sg_SparseGridMarkRequiredNeighbors, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
outRequiredNeighborMask, uniqueSortedHashkey, sparseGridParams, neighborhoodSize, particlePositions,
|
||||
numParticles, phases, validPhaseMask, activeIndices);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
|
||||
void sparseGridSortedArrayToDelta(PxgKernelLauncher& launcher, const PxU32* in, const PxU32* mask, PxU32* out, PxU32 n, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (n + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::sg_SparseGridSortedArrayToDelta, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
in, mask, out, n);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void sparseGridGetUniqueValues(PxgKernelLauncher& launcher, const PxU32* sortedData, const PxU32* indices, PxU32* uniqueValues,
|
||||
const PxU32 n, PxU32* subgridNeighborCollector, const PxU32 uniqueValuesSize, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (n + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::sg_SparseGridGetUniqueValues, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
sortedData, indices, uniqueValues, n, subgridNeighborCollector, uniqueValuesSize);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void sparseGridBuildSubgridNeighbors(PxgKernelLauncher& launcher, const PxU32* uniqueSortedHashkey, const PxU32* numActiveSubgrids,
|
||||
const PxU32 maxNumSubgrids, PxU32* subgridNeighbors, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (maxNumSubgrids + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::sg_SparseGridBuildSubgridNeighbors, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
uniqueSortedHashkey, numActiveSubgrids, maxNumSubgrids, subgridNeighbors);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
|
||||
void sparseGridMarkSubgridEndIndicesLaunch(PxgKernelLauncher& launcher, const PxU32* sortedParticleToSubgrid, PxU32 numParticles, PxU32* subgridEndIndices, CUstream stream)
|
||||
{
|
||||
const PxU32 numThreadsPerBlock = THREADS_PER_BLOCK;
|
||||
const PxU32 numBlocks = (numParticles + numThreadsPerBlock - 1) / numThreadsPerBlock;
|
||||
launcher.launchKernel(PxgKernelIds::sg_MarkSubgridEndIndices, numBlocks, numThreadsPerBlock, 0, stream,
|
||||
sortedParticleToSubgrid, numParticles, subgridEndIndices);
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
void PxSparseGridBuilder::initialize(PxgKernelLauncher* kernelLauncher, const PxSparseGridParams& sparseGridParams, PxU32 maxNumParticles, PxU32 neighborhoodSize, bool trackParticleOrder)
|
||||
{
|
||||
mMaxParticles = maxNumParticles;
|
||||
|
||||
mKernelLauncher = kernelLauncher;
|
||||
mSparseGridParams = sparseGridParams;
|
||||
mNeighborhoodSize = neighborhoodSize;
|
||||
mTrackParticleOrder = trackParticleOrder;
|
||||
|
||||
mScan.initialize(kernelLauncher, maxNumParticles);
|
||||
mSort.initialize(kernelLauncher, maxNumParticles);
|
||||
mHashkeyPerParticle = PX_DEVICE_MEMORY_ALLOC(PxU32, *kernelLauncher->getCudaContextManager(), maxNumParticles);
|
||||
mSortedParticleToSubgrid = PX_DEVICE_MEMORY_ALLOC(PxU32, *kernelLauncher->getCudaContextManager(), maxNumParticles);
|
||||
mSortedUniqueHashkeysPerSubgrid = PX_DEVICE_MEMORY_ALLOC(PxU32, *kernelLauncher->getCudaContextManager(), sparseGridParams.maxNumSubgrids);
|
||||
mSubgridNeighborLookup = PX_DEVICE_MEMORY_ALLOC(PxU32, *kernelLauncher->getCudaContextManager(), 27 * sparseGridParams.maxNumSubgrids);
|
||||
|
||||
if (trackParticleOrder)
|
||||
mSortedToOriginalParticleIndex = PX_DEVICE_MEMORY_ALLOC(PxU32, *kernelLauncher->getCudaContextManager(), maxNumParticles);
|
||||
|
||||
if (mNeighborhoodSize > 0)
|
||||
{
|
||||
mScanNeighbors.initialize(kernelLauncher, 27 * sparseGridParams.maxNumSubgrids);
|
||||
mNeighborSort.initialize(kernelLauncher, 27 * sparseGridParams.maxNumSubgrids);
|
||||
mNeighborCollector = PX_DEVICE_MEMORY_ALLOC(PxU32, *kernelLauncher->getCudaContextManager(), 27 * sparseGridParams.maxNumSubgrids);
|
||||
mRequiredNeighborMask = PX_DEVICE_MEMORY_ALLOC(PxU32, *kernelLauncher->getCudaContextManager(), 27 * sparseGridParams.maxNumSubgrids);
|
||||
}
|
||||
}
|
||||
|
||||
void PxSparseGridBuilder::release()
|
||||
{
|
||||
if (!mHashkeyPerParticle)
|
||||
return;
|
||||
|
||||
mScan.release();
|
||||
mSort.release();
|
||||
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher->getCudaContextManager(), mHashkeyPerParticle);
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher->getCudaContextManager(), mSortedParticleToSubgrid);
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher->getCudaContextManager(), mSortedUniqueHashkeysPerSubgrid);
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher->getCudaContextManager(), mSubgridNeighborLookup);
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher->getCudaContextManager(), mSortedToOriginalParticleIndex);
|
||||
|
||||
if (mNeighborhoodSize > 0)
|
||||
{
|
||||
mScanNeighbors.release();
|
||||
mNeighborSort.release();
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher->getCudaContextManager(), mNeighborCollector);
|
||||
PX_DEVICE_MEMORY_FREE(*mKernelLauncher->getCudaContextManager(), mRequiredNeighborMask);
|
||||
}
|
||||
}
|
||||
|
||||
void PxSparseGridBuilder::updateSubgridEndIndices(PxU32 numParticles, CUstream stream)
|
||||
{
|
||||
//Hijack a buffer that is not accessed after the subgrid update with exactly the right size
|
||||
PxU32* subgridEndIndicesBuffer = mSortedUniqueHashkeysPerSubgrid;
|
||||
sparseGridMarkSubgridEndIndicesLaunch(*mKernelLauncher, mSortedParticleToSubgrid, numParticles, subgridEndIndicesBuffer, stream);
|
||||
}
|
||||
|
||||
PxU32* PxSparseGridBuilder::updateSubgrids(PxVec4* deviceParticlePos, PxU32 numParticles, PxU32* devicePhases, CUstream stream, PxU32 validPhase, const PxU32* activeIndices)
|
||||
{
|
||||
sparseGridCalcSubgridHashes(*mKernelLauncher, mSparseGridParams, mSortedParticleToSubgrid, mHashkeyPerParticle, deviceParticlePos, numParticles, devicePhases, validPhase, stream, activeIndices);
|
||||
mSort.sort(mHashkeyPerParticle, 32, stream, mSortedToOriginalParticleIndex, numParticles);
|
||||
|
||||
sparseGridSortedArrayToDelta(*mKernelLauncher, mHashkeyPerParticle, NULL, mSortedParticleToSubgrid, numParticles, stream);
|
||||
mScan.exclusiveScan(mSortedParticleToSubgrid, stream, numParticles);
|
||||
|
||||
PxU32* totalCountPointer;
|
||||
if (mNeighborhoodSize > 0)
|
||||
{
|
||||
totalCountPointer = mScanNeighbors.getSumPointer();
|
||||
|
||||
sparseGridGetUniqueValues(*mKernelLauncher, mHashkeyPerParticle, mSortedParticleToSubgrid, mSortedUniqueHashkeysPerSubgrid, numParticles, mNeighborCollector, mSparseGridParams.maxNumSubgrids, stream);
|
||||
mNeighborSort.sort(mNeighborCollector, 32, stream);
|
||||
|
||||
PxgCudaHelpers::memsetAsync(*mKernelLauncher->getCudaContextManager(), mRequiredNeighborMask, PxU32(0), 27 * mSparseGridParams.maxNumSubgrids, stream);
|
||||
sparseGridMarkRequiredNeighbors(*mKernelLauncher, mRequiredNeighborMask, mNeighborCollector, mSparseGridParams, mNeighborhoodSize, deviceParticlePos, numParticles, devicePhases, validPhase, stream, activeIndices);
|
||||
|
||||
PxU32* tmpBuffer = mSubgridNeighborLookup; //This memory is only used temporary and populated later. It is always large enough to hold the temporary data.
|
||||
sparseGridSortedArrayToDelta(*mKernelLauncher, mNeighborCollector, mRequiredNeighborMask, tmpBuffer, 27 * mSparseGridParams.maxNumSubgrids, stream);
|
||||
|
||||
mScanNeighbors.exclusiveScan(tmpBuffer, stream);
|
||||
|
||||
sparseGridGetUniqueValues(*mKernelLauncher, mNeighborCollector, tmpBuffer, mSortedUniqueHashkeysPerSubgrid, 27 * mSparseGridParams.maxNumSubgrids, NULL, mSparseGridParams.maxNumSubgrids, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
totalCountPointer = mScan.getSumPointer();
|
||||
sparseGridGetUniqueValues(*mKernelLauncher, mHashkeyPerParticle, mSortedParticleToSubgrid, mSortedUniqueHashkeysPerSubgrid, numParticles, NULL, mSparseGridParams.maxNumSubgrids, stream);
|
||||
}
|
||||
return totalCountPointer;
|
||||
}
|
||||
|
||||
void PxSparseGridBuilder::updateSubgridNeighbors(PxU32* totalCountPointer, CUstream stream)
|
||||
{
|
||||
sparseGridBuildSubgridNeighbors(*mKernelLauncher, mSortedUniqueHashkeysPerSubgrid, totalCountPointer, mSparseGridParams.maxNumSubgrids, mSubgridNeighborLookup, stream);
|
||||
if (mCopySubgridsInUseToHost)
|
||||
mKernelLauncher->getCudaContextManager()->getCudaContext()->memcpyDtoHAsync(&mNumSubgridsInUse, CUdeviceptr(totalCountPointer), sizeof(PxU32), stream);
|
||||
}
|
||||
|
||||
void PxSparseGridBuilder::updateSparseGrid(PxVec4* deviceParticlePos, const PxU32 numParticles, PxU32* devicePhases, CUstream stream, PxU32 validPhase, const PxU32* activeIndices)
|
||||
{
|
||||
PxU32* totalCountPointer = updateSubgrids(deviceParticlePos, numParticles, devicePhases, stream, validPhase, activeIndices);
|
||||
updateSubgridNeighbors(totalCountPointer, stream);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user