feat(physics): wire physx sdk into build
This commit is contained in:
570
engine/third_party/physx/source/gpunarrowphase/src/PxgShapeManager.cpp
vendored
Normal file
570
engine/third_party/physx/source/gpunarrowphase/src/PxgShapeManager.cpp
vendored
Normal file
@@ -0,0 +1,570 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#include "PxgShapeManager.h"
|
||||
#include "PxgCopyManager.h"
|
||||
#include "PxgHeapMemAllocator.h"
|
||||
#include "PxgCudaUtils.h"
|
||||
#include "PxNodeIndex.h"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
PxgShapeManager::PxgShapeManager(PxgHeapMemoryAllocatorManager* heapManager) :
|
||||
mHeapManager(heapManager),
|
||||
mHostShapes(PxVirtualAllocator(heapManager->mMappedMemoryAllocators, PxsHeapStats::eNARROWPHASE)),
|
||||
mHostShapesRemapTable(PxVirtualAllocator(heapManager->mMappedMemoryAllocators, PxsHeapStats::eNARROWPHASE)),
|
||||
mHostShapeIdTable(PxVirtualAllocator(heapManager->mMappedMemoryAllocators, PxsHeapStats::eNARROWPHASE)),
|
||||
mHostTransformCacheIdToActorTable(PxVirtualAllocator(heapManager->mMappedMemoryAllocators, PxsHeapStats::eNARROWPHASE)),
|
||||
mGpuShapesBuffer(heapManager, PxsHeapStats::eNARROWPHASE),
|
||||
mGpuShapesRemapTableBuffer(heapManager, PxsHeapStats::eNARROWPHASE),
|
||||
mGpuTransformCacheIdToActorTableBuffer(heapManager, PxsHeapStats::eNARROWPHASE),
|
||||
mGpuRigidIndiceBuffer(heapManager, PxsHeapStats::eNARROWPHASE),
|
||||
mGpuShapeIndiceBuffer(heapManager, PxsHeapStats::eNARROWPHASE),
|
||||
mGpuUnsortedShapeIndicesBuffer(heapManager, PxsHeapStats::eNARROWPHASE),
|
||||
mGpuTempRigidBitIndiceBuffer(heapManager, PxsHeapStats::eNARROWPHASE),
|
||||
mGpuTempRigidIndiceBuffer(heapManager, PxsHeapStats::eNARROWPHASE)
|
||||
{
|
||||
//allocate x4
|
||||
const PxU32 initialSize = 128;
|
||||
mHostShapes.forceSize_Unsafe(0);
|
||||
mHostShapes.reserve(initialSize);
|
||||
mHostShapes.forceSize_Unsafe(initialSize);
|
||||
|
||||
mHostShapesRemapTable.forceSize_Unsafe(0);
|
||||
mHostShapesRemapTable.reserve(initialSize);
|
||||
mHostShapesRemapTable.forceSize_Unsafe(initialSize);
|
||||
|
||||
mHostShapeIdTable.forceSize_Unsafe(0);
|
||||
mHostShapeIdTable.reserve(initialSize);
|
||||
mHostShapeIdTable.forceSize_Unsafe(initialSize);
|
||||
|
||||
mHostTransformCacheIdToActorTable.forceSize_Unsafe(0);
|
||||
mHostTransformCacheIdToActorTable.reserve(initialSize);
|
||||
mHostTransformCacheIdToActorTable.forceSize_Unsafe(initialSize);
|
||||
|
||||
mGpuShapesBuffer.allocate(sizeof(PxgShape)*initialSize, PX_FL);
|
||||
mGpuShapesRemapTableBuffer.allocate(sizeof(PxNodeIndex) * initialSize, PX_FL);
|
||||
mGpuTransformCacheIdToActorTableBuffer.allocate(sizeof(PxActor*) * initialSize, PX_FL);
|
||||
mGpuRigidIndiceBuffer.allocate(sizeof(PxNodeIndex) * initialSize, PX_FL);
|
||||
mGpuShapeIndiceBuffer.allocate(sizeof(PxU32) * initialSize, PX_FL);
|
||||
mGpuUnsortedShapeIndicesBuffer.allocate(sizeof(PxU32) * initialSize, PX_FL);
|
||||
mGpuTempRigidBitIndiceBuffer.allocate(sizeof(PxU32) * initialSize, PX_FL);
|
||||
mGpuTempRigidIndiceBuffer.allocate(sizeof(PxNodeIndex) * initialSize, PX_FL);
|
||||
|
||||
mResizeRequired = false;
|
||||
mTransformCacheResizeRequired = false;
|
||||
mMaxShapeId = -1;
|
||||
mMaxTransformCacheID = -1;
|
||||
|
||||
mHasShapeChanged = false;
|
||||
mHasShapeInstanceChanged = false;
|
||||
}
|
||||
|
||||
void PxgShapeManager::initialize(PxCudaContext* cudaContext, CUstream stream)
|
||||
{
|
||||
cudaContext->memsetD32Async(mGpuShapesRemapTableBuffer.getDevicePtr(), 0xFFFFFFFF, mGpuShapesRemapTableBuffer.getSize()/sizeof(PxU32), stream);
|
||||
cudaContext->memsetD32Async(mGpuRigidIndiceBuffer.getDevicePtr(), 0xFFFFFFFF, mGpuRigidIndiceBuffer.getSize() / sizeof(PxU32), stream);
|
||||
cudaContext->memsetD32Async(mGpuShapeIndiceBuffer.getDevicePtr(), 0xFFFFFFFF, mGpuShapeIndiceBuffer.getSize() / sizeof(PxU32), stream);
|
||||
cudaContext->memsetD32Async(mGpuUnsortedShapeIndicesBuffer.getDevicePtr(), 0xFFFFFFFF, mGpuUnsortedShapeIndicesBuffer.getSize() / sizeof(PxU32), stream);
|
||||
|
||||
}
|
||||
|
||||
PxU32 PxgShapeManager::registerShape(PxgShape& shape)
|
||||
{
|
||||
const PxU32 shapeId = mIdPool.getNewID();
|
||||
|
||||
if (shapeId >= mHostShapes.capacity())
|
||||
{
|
||||
mResizeRequired = true;
|
||||
const PxU32 capacity = shapeId * 2;
|
||||
|
||||
//make sure capacity is x4 because we need to use radix sort to sort shape id based on rigid body index later
|
||||
const PxU32 tempCapacity = (capacity + 3)&(~3);
|
||||
mHostShapes.resize(tempCapacity);
|
||||
mDirtyShapeMap.resize(tempCapacity);
|
||||
}
|
||||
|
||||
mHostShapes[shapeId] = shape;
|
||||
mDirtyShapeMap.growAndSet(shapeId);
|
||||
|
||||
mMaxShapeId = PxMax(PxI32(shapeId), mMaxShapeId);
|
||||
|
||||
mHasShapeChanged = true;
|
||||
|
||||
return shapeId;
|
||||
}
|
||||
|
||||
void PxgShapeManager::registerShapeInstance(const PxNodeIndex& nodeIndex, const PxU32 transformCacheID, PxActor* actor, bool aggregate)
|
||||
{
|
||||
if (transformCacheID >= mHostShapesRemapTable.capacity())
|
||||
{
|
||||
const PxU32 capacity = transformCacheID*2;
|
||||
//make sure capacity is x4 because we need to use radix sort to sort shape id based on rigid body index later
|
||||
const PxU32 tempCapacity = (capacity + 3)&(~3);
|
||||
mTransformCacheResizeRequired = true;
|
||||
mHostShapesRemapTable.resize(tempCapacity);
|
||||
mHostShapeIdTable.resize(tempCapacity);
|
||||
mHostTransformCacheIdToActorTable.resize(tempCapacity);
|
||||
mDirtyTransformCacheMap.resize(tempCapacity);
|
||||
}
|
||||
|
||||
mHostShapesRemapTable[transformCacheID] = nodeIndex;
|
||||
mHostShapeIdTable[transformCacheID] = aggregate? 0xffffffff : transformCacheID;
|
||||
mHostTransformCacheIdToActorTable[transformCacheID] = aggregate ? NULL : actor;
|
||||
mHasShapeInstanceChanged = true;
|
||||
mDirtyTransformCacheMap.growAndSet(transformCacheID);
|
||||
mMaxTransformCacheID = PxMax(PxI32(transformCacheID), mMaxTransformCacheID);
|
||||
}
|
||||
|
||||
void PxgShapeManager::unregisterShape(const PxU32 id)
|
||||
{
|
||||
mDirtyShapeMap.reset(id);
|
||||
mIdPool.deferredFreeID(id);
|
||||
mHasShapeChanged = true;
|
||||
}
|
||||
|
||||
void PxgShapeManager::unregisterShapeInstance(const PxU32 transformCacheID)
|
||||
{
|
||||
mDirtyTransformCacheMap.set(transformCacheID);
|
||||
mHostShapesRemapTable[transformCacheID] = PxNodeIndex(PX_INVALID_NODE);
|
||||
mHostShapeIdTable[transformCacheID] = 0xffffffff;
|
||||
mHostTransformCacheIdToActorTable[transformCacheID] = NULL;
|
||||
mHasShapeInstanceChanged = true;
|
||||
}
|
||||
|
||||
void PxgShapeManager::scheduleCopyHtoD(PxgCopyManager& copyManager, PxCudaContext* cudaContext, CUstream stream)
|
||||
{
|
||||
PX_UNUSED(copyManager);
|
||||
|
||||
const PxU32 maxGrouping = 16;
|
||||
|
||||
if (mHasShapeChanged)
|
||||
{
|
||||
mHasShapeChanged = false;
|
||||
|
||||
if (mResizeRequired)
|
||||
{
|
||||
//Allocate and copy data across
|
||||
mGpuShapesBuffer.allocateCopyOldDataAsync(sizeof(PxgShape)*mHostShapes.capacity(), cudaContext, stream, PX_FL);
|
||||
|
||||
mResizeRequired = false;
|
||||
}
|
||||
|
||||
const PxU32* bits = mDirtyShapeMap.getWords();
|
||||
if (bits)
|
||||
{
|
||||
const PxU32 totalNumOfShapes = mMaxShapeId + 1;
|
||||
const PxU32 numShapes = (totalNumOfShapes + 3) &(~3);
|
||||
|
||||
//make sure the dirty shape map cover x4 case and set those to invalid value
|
||||
for (PxU32 i = totalNumOfShapes; i < numShapes; ++i)
|
||||
{
|
||||
mDirtyShapeMap.growAndSet(i);
|
||||
}
|
||||
|
||||
// PT: ### bitmap iterator pattern
|
||||
const PxU32 lastSetBit = mDirtyShapeMap.findLast();
|
||||
for (PxU32 w = 0; w <= lastSetBit >> 5; ++w)
|
||||
{
|
||||
//b&=b-1 will clear the lowest set bit in b
|
||||
for (PxU32 b = bits[w]; b; )
|
||||
{
|
||||
//dirtyId is the next bit that's set to 1!
|
||||
const PxU32 dirtyId = PxU32(w << 5 | PxLowestSetBit(b));
|
||||
|
||||
void* hostPtr = mHostShapes.begin() + dirtyId;
|
||||
|
||||
PxgCopyManager::CopyDesc desc;
|
||||
desc.source = reinterpret_cast<size_t>(getMappedDevicePtr(cudaContext, hostPtr));
|
||||
desc.dest = reinterpret_cast<size_t>(reinterpret_cast<PxU8*>(mGpuShapesBuffer.getDevicePtr()) + dirtyId * sizeof(PxgShape));
|
||||
desc.bytes = sizeof(PxgShape);
|
||||
|
||||
mDirtyShapeMap.reset(dirtyId);
|
||||
//Now we loop to try and find adjacent bits that are set...
|
||||
PxU32 currIdx = dirtyId + 1;
|
||||
PxU32 groupSize = 1;
|
||||
while (currIdx <= lastSetBit && mDirtyShapeMap.test(currIdx) && groupSize < maxGrouping)
|
||||
{
|
||||
groupSize++;
|
||||
mDirtyShapeMap.reset(currIdx);
|
||||
currIdx++;
|
||||
desc.bytes += sizeof(PxgShape);
|
||||
}
|
||||
|
||||
if (currIdx != (dirtyId + 1))
|
||||
{
|
||||
//get the word from the current bit
|
||||
w = PxMin(currIdx, lastSetBit) >> 5;
|
||||
//reload the world
|
||||
b = bits[w]; //Set a 1 here to make sure the b &= (b-1) in the for loop doesn't remove the current bit we're interested in
|
||||
}
|
||||
else
|
||||
{
|
||||
b &= (b - 1);
|
||||
}
|
||||
|
||||
copyManager.pushDeferredHtoD(desc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mDirtyShapeMap.clear();
|
||||
}
|
||||
|
||||
if (mHasShapeInstanceChanged)
|
||||
{
|
||||
//AD: mHasShapeInstanceChanged needs to persist because computeRigidsToShapes() needs to run if we use direct-API
|
||||
// we lower the flag in PxgNarrowphaseCore::prepareGpuNarrowphase.
|
||||
|
||||
// AD: the resize of the GPU transform cache is inside PxgNarrowphaseCore::prepareGpuNarrowphase.
|
||||
if (mTransformCacheResizeRequired)
|
||||
{
|
||||
PxU64 oldCapacity = mGpuShapesRemapTableBuffer.getSize();
|
||||
mGpuShapesRemapTableBuffer.allocateCopyOldDataAsync(sizeof(PxNodeIndex)*mHostShapesRemapTable.capacity(), cudaContext, stream, PX_FL);
|
||||
cudaContext->memsetD32Async(mGpuShapesRemapTableBuffer.getDevicePtr() + oldCapacity, 0xFFFFFFFF, (mGpuShapesRemapTableBuffer.getSize() - oldCapacity) / sizeof(PxU32), stream);
|
||||
|
||||
oldCapacity = mGpuRigidIndiceBuffer.getSize();
|
||||
mGpuRigidIndiceBuffer.allocateCopyOldDataAsync(sizeof(PxNodeIndex) * mHostShapesRemapTable.capacity(), cudaContext, stream, PX_FL);
|
||||
cudaContext->memsetD32Async(mGpuRigidIndiceBuffer.getDevicePtr() + oldCapacity, 0xFFFFFFFF, (mGpuRigidIndiceBuffer.getSize() - oldCapacity) / sizeof(PxU32), stream);
|
||||
|
||||
mGpuTempRigidIndiceBuffer.allocate(sizeof(PxNodeIndex) * mHostShapesRemapTable.capacity(), PX_FL);
|
||||
|
||||
oldCapacity = mGpuShapeIndiceBuffer.getSize();
|
||||
mGpuShapeIndiceBuffer.allocateCopyOldDataAsync(sizeof(PxU32) * mHostShapeIdTable.capacity(), cudaContext, stream, PX_FL);
|
||||
cudaContext->memsetD32Async(mGpuShapeIndiceBuffer.getDevicePtr() + oldCapacity, 0xFFFFFFFF, (mGpuShapeIndiceBuffer.getSize() - oldCapacity) / sizeof(PxU32), stream);
|
||||
|
||||
oldCapacity = mGpuUnsortedShapeIndicesBuffer.getSize();
|
||||
mGpuUnsortedShapeIndicesBuffer.allocateCopyOldDataAsync(sizeof(PxU32) * mHostShapeIdTable.capacity(), cudaContext, stream, PX_FL);
|
||||
cudaContext->memsetD32Async(mGpuUnsortedShapeIndicesBuffer.getDevicePtr() + oldCapacity, 0xFFFFFFFF, (mGpuUnsortedShapeIndicesBuffer.getSize() - oldCapacity) / sizeof(PxU32), stream);
|
||||
|
||||
|
||||
mGpuTempRigidBitIndiceBuffer.allocate(sizeof(PxU32) * mHostShapeIdTable.capacity(), PX_FL);
|
||||
|
||||
oldCapacity = mGpuTransformCacheIdToActorTableBuffer.getSize();
|
||||
mGpuTransformCacheIdToActorTableBuffer.allocateCopyOldDataAsync(sizeof(PxActor*) * mHostTransformCacheIdToActorTable.capacity(), cudaContext, stream, PX_FL);
|
||||
cudaContext->memsetD32Async(mGpuTransformCacheIdToActorTableBuffer.getDevicePtr() + oldCapacity, 0, (mGpuTransformCacheIdToActorTableBuffer.getSize() - oldCapacity) / sizeof(PxActor*), stream);
|
||||
|
||||
mTransformCacheResizeRequired = false;
|
||||
}
|
||||
|
||||
const PxU32 totalNumOfShapeInstances = mMaxTransformCacheID + 1;
|
||||
const PxU32 numShapeInstances = (totalNumOfShapeInstances + 3) &(~3);
|
||||
|
||||
//make sure the dirty shape map cover x4 case and set those to invalid value
|
||||
for (PxU32 i = totalNumOfShapeInstances; i < numShapeInstances; ++i)
|
||||
{
|
||||
if (!mHostShapesRemapTable[i].isStaticBody())
|
||||
{
|
||||
mDirtyTransformCacheMap.growAndSet(i);
|
||||
mHostShapesRemapTable[i] = PxNodeIndex(PX_INVALID_NODE);
|
||||
mHostShapeIdTable[i] = 0xffffffff;
|
||||
mHostTransformCacheIdToActorTable[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
const PxU32* bits = mDirtyTransformCacheMap.getWords();
|
||||
|
||||
if (bits)
|
||||
{
|
||||
// PT: ### bitmap iterator pattern
|
||||
const PxU32 lastSetBit = mDirtyTransformCacheMap.findLast();
|
||||
for (PxU32 w = 0; w <= lastSetBit >> 5; ++w)
|
||||
{
|
||||
//b&=b-1 will clear the lowest set bit in b
|
||||
for (PxU32 b = bits[w]; b; )
|
||||
{
|
||||
//dirtyId is the next bit that's set to 1!
|
||||
const PxU32 dirtyId = PxU32(w << 5 | PxLowestSetBit(b));
|
||||
|
||||
void* hostRemapPtr = mHostShapesRemapTable.begin() + dirtyId;
|
||||
|
||||
void* hostShapeIdPtr = mHostShapeIdTable.begin() + dirtyId;
|
||||
|
||||
void* hostTransformCacheIdToActorPtr = mHostTransformCacheIdToActorTable.begin() + dirtyId;
|
||||
|
||||
PxgCopyManager::CopyDesc desc1;
|
||||
desc1.source = reinterpret_cast<size_t>(getMappedDevicePtr(cudaContext, hostRemapPtr));
|
||||
desc1.dest = reinterpret_cast<size_t>(reinterpret_cast<PxU8*>(mGpuShapesRemapTableBuffer.getDevicePtr()) + dirtyId * sizeof(PxNodeIndex));
|
||||
desc1.bytes = sizeof(PxNodeIndex);
|
||||
|
||||
PxgCopyManager::CopyDesc desc2;
|
||||
desc2.source = reinterpret_cast<size_t>(getMappedDevicePtr(cudaContext, hostRemapPtr));
|
||||
desc2.dest = reinterpret_cast<size_t>(reinterpret_cast<PxU8*>(mGpuRigidIndiceBuffer.getDevicePtr()) + dirtyId * sizeof(PxNodeIndex));
|
||||
desc2.bytes = sizeof(PxNodeIndex);
|
||||
|
||||
PxgCopyManager::CopyDesc desc3;
|
||||
desc3.source = reinterpret_cast<size_t>(getMappedDevicePtr(cudaContext, hostShapeIdPtr));
|
||||
desc3.dest = reinterpret_cast<size_t>(reinterpret_cast<PxU8*>(mGpuUnsortedShapeIndicesBuffer.getDevicePtr()) + dirtyId * sizeof(PxU32));
|
||||
desc3.bytes = sizeof(PxU32);
|
||||
|
||||
PxgCopyManager::CopyDesc desc4;
|
||||
desc4.source = reinterpret_cast<size_t>(getMappedDevicePtr(cudaContext, hostTransformCacheIdToActorPtr));
|
||||
desc4.dest = reinterpret_cast<size_t>(reinterpret_cast<PxU8*>(mGpuTransformCacheIdToActorTableBuffer.getDevicePtr()) + dirtyId * sizeof(PxActor*));
|
||||
desc4.bytes = sizeof(PxActor*);
|
||||
|
||||
mDirtyTransformCacheMap.reset(dirtyId);
|
||||
//Now we loop to try and find adjacent bits that are set...
|
||||
PxU32 currIdx = dirtyId + 1;
|
||||
PxU32 groupSize = 1;
|
||||
while (currIdx <= lastSetBit && mDirtyTransformCacheMap.test(currIdx) && groupSize < maxGrouping)
|
||||
{
|
||||
groupSize++;
|
||||
mDirtyTransformCacheMap.reset(currIdx);
|
||||
currIdx++;
|
||||
desc1.bytes += sizeof(PxNodeIndex);
|
||||
desc2.bytes += sizeof(PxNodeIndex);
|
||||
desc3.bytes += sizeof(PxU32);
|
||||
desc4.bytes += sizeof(PxActor*);
|
||||
}
|
||||
|
||||
if (currIdx != (dirtyId + 1))
|
||||
{
|
||||
//get the word from the current bit
|
||||
w = PxMin(currIdx, lastSetBit) >> 5;
|
||||
//reload the world
|
||||
b = bits[w]; //Set a 1 here to make sure the b &= (b-1) in the for loop doesn't remove the current bit we're interested in
|
||||
}
|
||||
else
|
||||
{
|
||||
b &= (b - 1);
|
||||
}
|
||||
|
||||
copyManager.pushDeferredHtoD(desc1);
|
||||
copyManager.pushDeferredHtoD(desc2);
|
||||
copyManager.pushDeferredHtoD(desc3);
|
||||
copyManager.pushDeferredHtoD(desc4);
|
||||
}
|
||||
}
|
||||
}
|
||||
mDirtyTransformCacheMap.clear();
|
||||
}
|
||||
}
|
||||
|
||||
void PxgShapeManager::updateShapeMaterial(const PxU32 materialIndex, const PxU32 id)
|
||||
{
|
||||
PX_ASSERT(id < mHostShapes.size());
|
||||
mHostShapes[id].materialIndex = materialIndex;
|
||||
mDirtyShapeMap.growAndSet(id);
|
||||
mHasShapeChanged = true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
PxgMaterialManager::PxgMaterialManager(PxgHeapMemoryAllocatorManager* heapManager, const PxU32 elemSize) :
|
||||
mGpuMaterialBuffer(heapManager, PxsHeapStats::eNARROWPHASE),
|
||||
mHeapManager(heapManager),
|
||||
mHostMaterial(PxVirtualAllocator(heapManager->mMappedMemoryAllocators, PxsHeapStats::eNARROWPHASE))
|
||||
{
|
||||
const PxU32 originalSize = elemSize * 128;
|
||||
mHostMaterial.forceSize_Unsafe(0);
|
||||
mHostMaterial.reserve(originalSize);
|
||||
mHostMaterial.forceSize_Unsafe(originalSize);
|
||||
|
||||
mGpuMaterialBuffer.allocate(originalSize, PX_FL);
|
||||
mResizeRequired = false;
|
||||
}
|
||||
|
||||
PxU32 PxgMaterialManager::registerMaterial(const PxU8* materialData, const PxU32 elemSize)
|
||||
{
|
||||
const PxU32 shapeId = mIdPool.getNewID();
|
||||
PxU32 capacity = mHostMaterial.capacity() / elemSize;
|
||||
|
||||
if (shapeId >= capacity)
|
||||
{
|
||||
capacity = PxMax(capacity * 2 + 1, shapeId + 1);
|
||||
mHostMaterial.resize(capacity * elemSize);
|
||||
mResizeRequired = true;
|
||||
}
|
||||
|
||||
PxU8* destPtr = mHostMaterial.begin() + shapeId * elemSize;
|
||||
PxMemCopy(destPtr, materialData, elemSize);
|
||||
|
||||
mDirtyMaterialMap.growAndSet(shapeId);
|
||||
|
||||
return shapeId;
|
||||
}
|
||||
|
||||
void PxgMaterialManager::unregisterMaterial(const PxU32 id)
|
||||
{
|
||||
mDirtyMaterialMap.reset(id);
|
||||
mIdPool.deferredFreeID(id);
|
||||
}
|
||||
|
||||
void PxgMaterialManager::scheduleCopyHtoD(PxgCopyManager& copyManager, PxCudaContext* cudaContext,
|
||||
CUstream stream, const PxU32 elemSize)
|
||||
{
|
||||
if (mResizeRequired)
|
||||
{
|
||||
mGpuMaterialBuffer.allocateCopyOldDataAsync(mHostMaterial.capacity(), cudaContext, stream, PX_FL);
|
||||
mResizeRequired = false;
|
||||
}
|
||||
const PxU32* bits = mDirtyMaterialMap.getWords();
|
||||
|
||||
const PxU32 maxGrouping = 16;
|
||||
|
||||
if (bits)
|
||||
{
|
||||
// PT: ### bitmap iterator pattern
|
||||
const PxU32 lastSetBit = mDirtyMaterialMap.findLast();
|
||||
for (PxU32 w = 0; w <= lastSetBit >> 5; ++w)
|
||||
{
|
||||
//b&=b-1 will clear the lowest set bit in b
|
||||
for (PxU32 b = bits[w]; b; )
|
||||
{
|
||||
//dirtyId is the next bit that's set to 1!
|
||||
const PxU32 dirtyId = PxU32(w << 5 | PxLowestSetBit(b));
|
||||
|
||||
void* hostPtr = mHostMaterial.begin() + dirtyId * elemSize;
|
||||
|
||||
PxgCopyManager::CopyDesc desc;
|
||||
desc.source = reinterpret_cast<size_t>(getMappedDevicePtr(cudaContext, hostPtr));
|
||||
desc.dest = reinterpret_cast<size_t>(reinterpret_cast<PxU8*>(mGpuMaterialBuffer.getDevicePtr()) + dirtyId * elemSize);
|
||||
desc.bytes = elemSize;
|
||||
|
||||
mDirtyMaterialMap.reset(dirtyId);
|
||||
|
||||
//Now we loop to try and find adjacent bits that are set...
|
||||
PxU32 currIdx = dirtyId + 1;
|
||||
PxU32 groupSize = 1;
|
||||
while (currIdx <= lastSetBit && mDirtyMaterialMap.test(currIdx) && (groupSize < maxGrouping))
|
||||
{
|
||||
groupSize++;
|
||||
mDirtyMaterialMap.reset(currIdx);
|
||||
currIdx++;
|
||||
desc.bytes += elemSize;
|
||||
}
|
||||
|
||||
if (currIdx != (dirtyId + 1))
|
||||
{
|
||||
//get the word from the current bit
|
||||
w = PxMin(currIdx, lastSetBit) >> 5;
|
||||
//reload the world
|
||||
b = bits[w]; //Set a 1 here to make sure the b &= (b-1) in the for loop doesn't remove the current bit we're interested in
|
||||
}
|
||||
else
|
||||
{
|
||||
b &= (b - 1);
|
||||
}
|
||||
|
||||
copyManager.pushDeferredHtoD(desc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mDirtyMaterialMap.clear();
|
||||
}
|
||||
|
||||
void PxgMaterialManager::updateMaterial(const PxU8* materialCore, const PxU32 elemSize, const PxU32 id)
|
||||
{
|
||||
PX_ASSERT(id < mHostMaterial.size());
|
||||
PxU8* destptr = reinterpret_cast<PxU8*>(mHostMaterial.begin() + id * elemSize);
|
||||
PxMemCopy(destptr, materialCore, elemSize);
|
||||
//mHostMaterial[id] = materialCore;
|
||||
mDirtyMaterialMap.growAndSet(id);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
PxgFEMMaterialManager::PxgFEMMaterialManager(PxgHeapMemoryAllocatorManager* heapManager, const PxU32 elemSize) :
|
||||
PxgMaterialManager(heapManager, elemSize)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void PxgFEMMaterialManager::scheduleCopyHtoD(PxgCopyManager& copyManager, PxCudaContext* cudaContext,
|
||||
CUstream stream, const PxU32 elemSize)
|
||||
{
|
||||
if (mResizeRequired)
|
||||
{
|
||||
mGpuMaterialBuffer.allocateCopyOldDataAsync(mHostMaterial.capacity(), cudaContext, stream, PX_FL);
|
||||
mResizeRequired = false;
|
||||
}
|
||||
|
||||
|
||||
const PxU32* bits = mDirtyMaterialMap.getWords();
|
||||
|
||||
const PxU32 maxGrouping = 16;
|
||||
|
||||
if (bits)
|
||||
{
|
||||
// PT: ### bitmap iterator pattern
|
||||
const PxU32 lastSetBit = mDirtyMaterialMap.findLast();
|
||||
for (PxU32 w = 0; w <= lastSetBit >> 5; ++w)
|
||||
{
|
||||
//b&=b-1 will clear the lowest set bit in b
|
||||
for (PxU32 b = bits[w]; b; )
|
||||
{
|
||||
//dirtyId is the next bit that's set to 1!
|
||||
const PxU32 dirtyId = PxU32(w << 5 | PxLowestSetBit(b));
|
||||
|
||||
void* hostPtr = mHostMaterial.begin() + dirtyId * elemSize;
|
||||
|
||||
PxgCopyManager::CopyDesc desc;
|
||||
desc.source = reinterpret_cast<size_t>(getMappedDevicePtr(cudaContext, hostPtr));
|
||||
desc.dest = reinterpret_cast<size_t>(reinterpret_cast<PxU8*>(mGpuMaterialBuffer.getDevicePtr()) + dirtyId * elemSize);
|
||||
desc.bytes = elemSize;
|
||||
|
||||
mDirtyMaterialMap.reset(dirtyId);
|
||||
|
||||
//Now we loop to try and find adjacent bits that are set...
|
||||
PxU32 currIdx = dirtyId + 1;
|
||||
PxU32 groupSize = 1;
|
||||
while (currIdx <= lastSetBit && mDirtyMaterialMap.test(currIdx) && (groupSize < maxGrouping))
|
||||
{
|
||||
groupSize++;
|
||||
mDirtyMaterialMap.reset(currIdx);
|
||||
currIdx++;
|
||||
desc.bytes += elemSize;
|
||||
}
|
||||
|
||||
if (currIdx != (dirtyId + 1))
|
||||
{
|
||||
//get the word from the current bit
|
||||
w = PxMin(currIdx, lastSetBit) >> 5;
|
||||
//reload the world
|
||||
b = bits[w]; //Set a 1 here to make sure the b &= (b-1) in the for loop doesn't remove the current bit we're interested in
|
||||
}
|
||||
else
|
||||
{
|
||||
b &= (b - 1);
|
||||
}
|
||||
|
||||
copyManager.pushDeferredHtoD(desc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mDirtyMaterialMap.clear();
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
PxgFEMSoftBodyMaterialManager::PxgFEMSoftBodyMaterialManager(PxgHeapMemoryAllocatorManager* heapManager) :
|
||||
PxgFEMMaterialManager(heapManager, sizeof(PxsDeformableVolumeMaterialData))
|
||||
{
|
||||
}
|
||||
Reference in New Issue
Block a user