// Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of NVIDIA CORPORATION nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved. // Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. // Copyright (c) 2001-2004 NovodeX AG. All rights reserved. #include "PxgShapeManager.h" #include "PxgCopyManager.h" #include "PxgHeapMemAllocator.h" #include "PxgCudaUtils.h" #include "PxNodeIndex.h" using namespace physx; PxgShapeManager::PxgShapeManager(PxgHeapMemoryAllocatorManager* heapManager) : mHeapManager(heapManager), mHostShapes(PxVirtualAllocator(heapManager->mMappedMemoryAllocators, PxsHeapStats::eNARROWPHASE)), mHostShapesRemapTable(PxVirtualAllocator(heapManager->mMappedMemoryAllocators, PxsHeapStats::eNARROWPHASE)), mHostShapeIdTable(PxVirtualAllocator(heapManager->mMappedMemoryAllocators, PxsHeapStats::eNARROWPHASE)), mHostTransformCacheIdToActorTable(PxVirtualAllocator(heapManager->mMappedMemoryAllocators, PxsHeapStats::eNARROWPHASE)), mGpuShapesBuffer(heapManager, PxsHeapStats::eNARROWPHASE), mGpuShapesRemapTableBuffer(heapManager, PxsHeapStats::eNARROWPHASE), mGpuTransformCacheIdToActorTableBuffer(heapManager, PxsHeapStats::eNARROWPHASE), mGpuRigidIndiceBuffer(heapManager, PxsHeapStats::eNARROWPHASE), mGpuShapeIndiceBuffer(heapManager, PxsHeapStats::eNARROWPHASE), mGpuUnsortedShapeIndicesBuffer(heapManager, PxsHeapStats::eNARROWPHASE), mGpuTempRigidBitIndiceBuffer(heapManager, PxsHeapStats::eNARROWPHASE), mGpuTempRigidIndiceBuffer(heapManager, PxsHeapStats::eNARROWPHASE) { //allocate x4 const PxU32 initialSize = 128; mHostShapes.forceSize_Unsafe(0); mHostShapes.reserve(initialSize); mHostShapes.forceSize_Unsafe(initialSize); mHostShapesRemapTable.forceSize_Unsafe(0); mHostShapesRemapTable.reserve(initialSize); mHostShapesRemapTable.forceSize_Unsafe(initialSize); mHostShapeIdTable.forceSize_Unsafe(0); mHostShapeIdTable.reserve(initialSize); mHostShapeIdTable.forceSize_Unsafe(initialSize); mHostTransformCacheIdToActorTable.forceSize_Unsafe(0); mHostTransformCacheIdToActorTable.reserve(initialSize); mHostTransformCacheIdToActorTable.forceSize_Unsafe(initialSize); mGpuShapesBuffer.allocate(sizeof(PxgShape)*initialSize, PX_FL); mGpuShapesRemapTableBuffer.allocate(sizeof(PxNodeIndex) * initialSize, PX_FL); mGpuTransformCacheIdToActorTableBuffer.allocate(sizeof(PxActor*) * initialSize, PX_FL); mGpuRigidIndiceBuffer.allocate(sizeof(PxNodeIndex) * initialSize, PX_FL); mGpuShapeIndiceBuffer.allocate(sizeof(PxU32) * initialSize, PX_FL); mGpuUnsortedShapeIndicesBuffer.allocate(sizeof(PxU32) * initialSize, PX_FL); mGpuTempRigidBitIndiceBuffer.allocate(sizeof(PxU32) * initialSize, PX_FL); mGpuTempRigidIndiceBuffer.allocate(sizeof(PxNodeIndex) * initialSize, PX_FL); mResizeRequired = false; mTransformCacheResizeRequired = false; mMaxShapeId = -1; mMaxTransformCacheID = -1; mHasShapeChanged = false; mHasShapeInstanceChanged = false; } void PxgShapeManager::initialize(PxCudaContext* cudaContext, CUstream stream) { cudaContext->memsetD32Async(mGpuShapesRemapTableBuffer.getDevicePtr(), 0xFFFFFFFF, mGpuShapesRemapTableBuffer.getSize()/sizeof(PxU32), stream); cudaContext->memsetD32Async(mGpuRigidIndiceBuffer.getDevicePtr(), 0xFFFFFFFF, mGpuRigidIndiceBuffer.getSize() / sizeof(PxU32), stream); cudaContext->memsetD32Async(mGpuShapeIndiceBuffer.getDevicePtr(), 0xFFFFFFFF, mGpuShapeIndiceBuffer.getSize() / sizeof(PxU32), stream); cudaContext->memsetD32Async(mGpuUnsortedShapeIndicesBuffer.getDevicePtr(), 0xFFFFFFFF, mGpuUnsortedShapeIndicesBuffer.getSize() / sizeof(PxU32), stream); } PxU32 PxgShapeManager::registerShape(PxgShape& shape) { const PxU32 shapeId = mIdPool.getNewID(); if (shapeId >= mHostShapes.capacity()) { mResizeRequired = true; const PxU32 capacity = shapeId * 2; //make sure capacity is x4 because we need to use radix sort to sort shape id based on rigid body index later const PxU32 tempCapacity = (capacity + 3)&(~3); mHostShapes.resize(tempCapacity); mDirtyShapeMap.resize(tempCapacity); } mHostShapes[shapeId] = shape; mDirtyShapeMap.growAndSet(shapeId); mMaxShapeId = PxMax(PxI32(shapeId), mMaxShapeId); mHasShapeChanged = true; return shapeId; } void PxgShapeManager::registerShapeInstance(const PxNodeIndex& nodeIndex, const PxU32 transformCacheID, PxActor* actor, bool aggregate) { if (transformCacheID >= mHostShapesRemapTable.capacity()) { const PxU32 capacity = transformCacheID*2; //make sure capacity is x4 because we need to use radix sort to sort shape id based on rigid body index later const PxU32 tempCapacity = (capacity + 3)&(~3); mTransformCacheResizeRequired = true; mHostShapesRemapTable.resize(tempCapacity); mHostShapeIdTable.resize(tempCapacity); mHostTransformCacheIdToActorTable.resize(tempCapacity); mDirtyTransformCacheMap.resize(tempCapacity); } mHostShapesRemapTable[transformCacheID] = nodeIndex; mHostShapeIdTable[transformCacheID] = aggregate? 0xffffffff : transformCacheID; mHostTransformCacheIdToActorTable[transformCacheID] = aggregate ? NULL : actor; mHasShapeInstanceChanged = true; mDirtyTransformCacheMap.growAndSet(transformCacheID); mMaxTransformCacheID = PxMax(PxI32(transformCacheID), mMaxTransformCacheID); } void PxgShapeManager::unregisterShape(const PxU32 id) { mDirtyShapeMap.reset(id); mIdPool.deferredFreeID(id); mHasShapeChanged = true; } void PxgShapeManager::unregisterShapeInstance(const PxU32 transformCacheID) { mDirtyTransformCacheMap.set(transformCacheID); mHostShapesRemapTable[transformCacheID] = PxNodeIndex(PX_INVALID_NODE); mHostShapeIdTable[transformCacheID] = 0xffffffff; mHostTransformCacheIdToActorTable[transformCacheID] = NULL; mHasShapeInstanceChanged = true; } void PxgShapeManager::scheduleCopyHtoD(PxgCopyManager& copyManager, PxCudaContext* cudaContext, CUstream stream) { PX_UNUSED(copyManager); const PxU32 maxGrouping = 16; if (mHasShapeChanged) { mHasShapeChanged = false; if (mResizeRequired) { //Allocate and copy data across mGpuShapesBuffer.allocateCopyOldDataAsync(sizeof(PxgShape)*mHostShapes.capacity(), cudaContext, stream, PX_FL); mResizeRequired = false; } const PxU32* bits = mDirtyShapeMap.getWords(); if (bits) { const PxU32 totalNumOfShapes = mMaxShapeId + 1; const PxU32 numShapes = (totalNumOfShapes + 3) &(~3); //make sure the dirty shape map cover x4 case and set those to invalid value for (PxU32 i = totalNumOfShapes; i < numShapes; ++i) { mDirtyShapeMap.growAndSet(i); } // PT: ### bitmap iterator pattern const PxU32 lastSetBit = mDirtyShapeMap.findLast(); for (PxU32 w = 0; w <= lastSetBit >> 5; ++w) { //b&=b-1 will clear the lowest set bit in b for (PxU32 b = bits[w]; b; ) { //dirtyId is the next bit that's set to 1! const PxU32 dirtyId = PxU32(w << 5 | PxLowestSetBit(b)); void* hostPtr = mHostShapes.begin() + dirtyId; PxgCopyManager::CopyDesc desc; desc.source = reinterpret_cast(getMappedDevicePtr(cudaContext, hostPtr)); desc.dest = reinterpret_cast(reinterpret_cast(mGpuShapesBuffer.getDevicePtr()) + dirtyId * sizeof(PxgShape)); desc.bytes = sizeof(PxgShape); mDirtyShapeMap.reset(dirtyId); //Now we loop to try and find adjacent bits that are set... PxU32 currIdx = dirtyId + 1; PxU32 groupSize = 1; while (currIdx <= lastSetBit && mDirtyShapeMap.test(currIdx) && groupSize < maxGrouping) { groupSize++; mDirtyShapeMap.reset(currIdx); currIdx++; desc.bytes += sizeof(PxgShape); } if (currIdx != (dirtyId + 1)) { //get the word from the current bit w = PxMin(currIdx, lastSetBit) >> 5; //reload the world b = bits[w]; //Set a 1 here to make sure the b &= (b-1) in the for loop doesn't remove the current bit we're interested in } else { b &= (b - 1); } copyManager.pushDeferredHtoD(desc); } } } mDirtyShapeMap.clear(); } if (mHasShapeInstanceChanged) { //AD: mHasShapeInstanceChanged needs to persist because computeRigidsToShapes() needs to run if we use direct-API // we lower the flag in PxgNarrowphaseCore::prepareGpuNarrowphase. // AD: the resize of the GPU transform cache is inside PxgNarrowphaseCore::prepareGpuNarrowphase. if (mTransformCacheResizeRequired) { PxU64 oldCapacity = mGpuShapesRemapTableBuffer.getSize(); mGpuShapesRemapTableBuffer.allocateCopyOldDataAsync(sizeof(PxNodeIndex)*mHostShapesRemapTable.capacity(), cudaContext, stream, PX_FL); cudaContext->memsetD32Async(mGpuShapesRemapTableBuffer.getDevicePtr() + oldCapacity, 0xFFFFFFFF, (mGpuShapesRemapTableBuffer.getSize() - oldCapacity) / sizeof(PxU32), stream); oldCapacity = mGpuRigidIndiceBuffer.getSize(); mGpuRigidIndiceBuffer.allocateCopyOldDataAsync(sizeof(PxNodeIndex) * mHostShapesRemapTable.capacity(), cudaContext, stream, PX_FL); cudaContext->memsetD32Async(mGpuRigidIndiceBuffer.getDevicePtr() + oldCapacity, 0xFFFFFFFF, (mGpuRigidIndiceBuffer.getSize() - oldCapacity) / sizeof(PxU32), stream); mGpuTempRigidIndiceBuffer.allocate(sizeof(PxNodeIndex) * mHostShapesRemapTable.capacity(), PX_FL); oldCapacity = mGpuShapeIndiceBuffer.getSize(); mGpuShapeIndiceBuffer.allocateCopyOldDataAsync(sizeof(PxU32) * mHostShapeIdTable.capacity(), cudaContext, stream, PX_FL); cudaContext->memsetD32Async(mGpuShapeIndiceBuffer.getDevicePtr() + oldCapacity, 0xFFFFFFFF, (mGpuShapeIndiceBuffer.getSize() - oldCapacity) / sizeof(PxU32), stream); oldCapacity = mGpuUnsortedShapeIndicesBuffer.getSize(); mGpuUnsortedShapeIndicesBuffer.allocateCopyOldDataAsync(sizeof(PxU32) * mHostShapeIdTable.capacity(), cudaContext, stream, PX_FL); cudaContext->memsetD32Async(mGpuUnsortedShapeIndicesBuffer.getDevicePtr() + oldCapacity, 0xFFFFFFFF, (mGpuUnsortedShapeIndicesBuffer.getSize() - oldCapacity) / sizeof(PxU32), stream); mGpuTempRigidBitIndiceBuffer.allocate(sizeof(PxU32) * mHostShapeIdTable.capacity(), PX_FL); oldCapacity = mGpuTransformCacheIdToActorTableBuffer.getSize(); mGpuTransformCacheIdToActorTableBuffer.allocateCopyOldDataAsync(sizeof(PxActor*) * mHostTransformCacheIdToActorTable.capacity(), cudaContext, stream, PX_FL); cudaContext->memsetD32Async(mGpuTransformCacheIdToActorTableBuffer.getDevicePtr() + oldCapacity, 0, (mGpuTransformCacheIdToActorTableBuffer.getSize() - oldCapacity) / sizeof(PxActor*), stream); mTransformCacheResizeRequired = false; } const PxU32 totalNumOfShapeInstances = mMaxTransformCacheID + 1; const PxU32 numShapeInstances = (totalNumOfShapeInstances + 3) &(~3); //make sure the dirty shape map cover x4 case and set those to invalid value for (PxU32 i = totalNumOfShapeInstances; i < numShapeInstances; ++i) { if (!mHostShapesRemapTable[i].isStaticBody()) { mDirtyTransformCacheMap.growAndSet(i); mHostShapesRemapTable[i] = PxNodeIndex(PX_INVALID_NODE); mHostShapeIdTable[i] = 0xffffffff; mHostTransformCacheIdToActorTable[i] = NULL; } } const PxU32* bits = mDirtyTransformCacheMap.getWords(); if (bits) { // PT: ### bitmap iterator pattern const PxU32 lastSetBit = mDirtyTransformCacheMap.findLast(); for (PxU32 w = 0; w <= lastSetBit >> 5; ++w) { //b&=b-1 will clear the lowest set bit in b for (PxU32 b = bits[w]; b; ) { //dirtyId is the next bit that's set to 1! const PxU32 dirtyId = PxU32(w << 5 | PxLowestSetBit(b)); void* hostRemapPtr = mHostShapesRemapTable.begin() + dirtyId; void* hostShapeIdPtr = mHostShapeIdTable.begin() + dirtyId; void* hostTransformCacheIdToActorPtr = mHostTransformCacheIdToActorTable.begin() + dirtyId; PxgCopyManager::CopyDesc desc1; desc1.source = reinterpret_cast(getMappedDevicePtr(cudaContext, hostRemapPtr)); desc1.dest = reinterpret_cast(reinterpret_cast(mGpuShapesRemapTableBuffer.getDevicePtr()) + dirtyId * sizeof(PxNodeIndex)); desc1.bytes = sizeof(PxNodeIndex); PxgCopyManager::CopyDesc desc2; desc2.source = reinterpret_cast(getMappedDevicePtr(cudaContext, hostRemapPtr)); desc2.dest = reinterpret_cast(reinterpret_cast(mGpuRigidIndiceBuffer.getDevicePtr()) + dirtyId * sizeof(PxNodeIndex)); desc2.bytes = sizeof(PxNodeIndex); PxgCopyManager::CopyDesc desc3; desc3.source = reinterpret_cast(getMappedDevicePtr(cudaContext, hostShapeIdPtr)); desc3.dest = reinterpret_cast(reinterpret_cast(mGpuUnsortedShapeIndicesBuffer.getDevicePtr()) + dirtyId * sizeof(PxU32)); desc3.bytes = sizeof(PxU32); PxgCopyManager::CopyDesc desc4; desc4.source = reinterpret_cast(getMappedDevicePtr(cudaContext, hostTransformCacheIdToActorPtr)); desc4.dest = reinterpret_cast(reinterpret_cast(mGpuTransformCacheIdToActorTableBuffer.getDevicePtr()) + dirtyId * sizeof(PxActor*)); desc4.bytes = sizeof(PxActor*); mDirtyTransformCacheMap.reset(dirtyId); //Now we loop to try and find adjacent bits that are set... PxU32 currIdx = dirtyId + 1; PxU32 groupSize = 1; while (currIdx <= lastSetBit && mDirtyTransformCacheMap.test(currIdx) && groupSize < maxGrouping) { groupSize++; mDirtyTransformCacheMap.reset(currIdx); currIdx++; desc1.bytes += sizeof(PxNodeIndex); desc2.bytes += sizeof(PxNodeIndex); desc3.bytes += sizeof(PxU32); desc4.bytes += sizeof(PxActor*); } if (currIdx != (dirtyId + 1)) { //get the word from the current bit w = PxMin(currIdx, lastSetBit) >> 5; //reload the world b = bits[w]; //Set a 1 here to make sure the b &= (b-1) in the for loop doesn't remove the current bit we're interested in } else { b &= (b - 1); } copyManager.pushDeferredHtoD(desc1); copyManager.pushDeferredHtoD(desc2); copyManager.pushDeferredHtoD(desc3); copyManager.pushDeferredHtoD(desc4); } } } mDirtyTransformCacheMap.clear(); } } void PxgShapeManager::updateShapeMaterial(const PxU32 materialIndex, const PxU32 id) { PX_ASSERT(id < mHostShapes.size()); mHostShapes[id].materialIndex = materialIndex; mDirtyShapeMap.growAndSet(id); mHasShapeChanged = true; } //////////////////////////////////////////////////////////////////////////////////////////// PxgMaterialManager::PxgMaterialManager(PxgHeapMemoryAllocatorManager* heapManager, const PxU32 elemSize) : mGpuMaterialBuffer(heapManager, PxsHeapStats::eNARROWPHASE), mHeapManager(heapManager), mHostMaterial(PxVirtualAllocator(heapManager->mMappedMemoryAllocators, PxsHeapStats::eNARROWPHASE)) { const PxU32 originalSize = elemSize * 128; mHostMaterial.forceSize_Unsafe(0); mHostMaterial.reserve(originalSize); mHostMaterial.forceSize_Unsafe(originalSize); mGpuMaterialBuffer.allocate(originalSize, PX_FL); mResizeRequired = false; } PxU32 PxgMaterialManager::registerMaterial(const PxU8* materialData, const PxU32 elemSize) { const PxU32 shapeId = mIdPool.getNewID(); PxU32 capacity = mHostMaterial.capacity() / elemSize; if (shapeId >= capacity) { capacity = PxMax(capacity * 2 + 1, shapeId + 1); mHostMaterial.resize(capacity * elemSize); mResizeRequired = true; } PxU8* destPtr = mHostMaterial.begin() + shapeId * elemSize; PxMemCopy(destPtr, materialData, elemSize); mDirtyMaterialMap.growAndSet(shapeId); return shapeId; } void PxgMaterialManager::unregisterMaterial(const PxU32 id) { mDirtyMaterialMap.reset(id); mIdPool.deferredFreeID(id); } void PxgMaterialManager::scheduleCopyHtoD(PxgCopyManager& copyManager, PxCudaContext* cudaContext, CUstream stream, const PxU32 elemSize) { if (mResizeRequired) { mGpuMaterialBuffer.allocateCopyOldDataAsync(mHostMaterial.capacity(), cudaContext, stream, PX_FL); mResizeRequired = false; } const PxU32* bits = mDirtyMaterialMap.getWords(); const PxU32 maxGrouping = 16; if (bits) { // PT: ### bitmap iterator pattern const PxU32 lastSetBit = mDirtyMaterialMap.findLast(); for (PxU32 w = 0; w <= lastSetBit >> 5; ++w) { //b&=b-1 will clear the lowest set bit in b for (PxU32 b = bits[w]; b; ) { //dirtyId is the next bit that's set to 1! const PxU32 dirtyId = PxU32(w << 5 | PxLowestSetBit(b)); void* hostPtr = mHostMaterial.begin() + dirtyId * elemSize; PxgCopyManager::CopyDesc desc; desc.source = reinterpret_cast(getMappedDevicePtr(cudaContext, hostPtr)); desc.dest = reinterpret_cast(reinterpret_cast(mGpuMaterialBuffer.getDevicePtr()) + dirtyId * elemSize); desc.bytes = elemSize; mDirtyMaterialMap.reset(dirtyId); //Now we loop to try and find adjacent bits that are set... PxU32 currIdx = dirtyId + 1; PxU32 groupSize = 1; while (currIdx <= lastSetBit && mDirtyMaterialMap.test(currIdx) && (groupSize < maxGrouping)) { groupSize++; mDirtyMaterialMap.reset(currIdx); currIdx++; desc.bytes += elemSize; } if (currIdx != (dirtyId + 1)) { //get the word from the current bit w = PxMin(currIdx, lastSetBit) >> 5; //reload the world b = bits[w]; //Set a 1 here to make sure the b &= (b-1) in the for loop doesn't remove the current bit we're interested in } else { b &= (b - 1); } copyManager.pushDeferredHtoD(desc); } } } mDirtyMaterialMap.clear(); } void PxgMaterialManager::updateMaterial(const PxU8* materialCore, const PxU32 elemSize, const PxU32 id) { PX_ASSERT(id < mHostMaterial.size()); PxU8* destptr = reinterpret_cast(mHostMaterial.begin() + id * elemSize); PxMemCopy(destptr, materialCore, elemSize); //mHostMaterial[id] = materialCore; mDirtyMaterialMap.growAndSet(id); } ////////////////////////////////////////////////////////////////////////////////////////////// PxgFEMMaterialManager::PxgFEMMaterialManager(PxgHeapMemoryAllocatorManager* heapManager, const PxU32 elemSize) : PxgMaterialManager(heapManager, elemSize) { } void PxgFEMMaterialManager::scheduleCopyHtoD(PxgCopyManager& copyManager, PxCudaContext* cudaContext, CUstream stream, const PxU32 elemSize) { if (mResizeRequired) { mGpuMaterialBuffer.allocateCopyOldDataAsync(mHostMaterial.capacity(), cudaContext, stream, PX_FL); mResizeRequired = false; } const PxU32* bits = mDirtyMaterialMap.getWords(); const PxU32 maxGrouping = 16; if (bits) { // PT: ### bitmap iterator pattern const PxU32 lastSetBit = mDirtyMaterialMap.findLast(); for (PxU32 w = 0; w <= lastSetBit >> 5; ++w) { //b&=b-1 will clear the lowest set bit in b for (PxU32 b = bits[w]; b; ) { //dirtyId is the next bit that's set to 1! const PxU32 dirtyId = PxU32(w << 5 | PxLowestSetBit(b)); void* hostPtr = mHostMaterial.begin() + dirtyId * elemSize; PxgCopyManager::CopyDesc desc; desc.source = reinterpret_cast(getMappedDevicePtr(cudaContext, hostPtr)); desc.dest = reinterpret_cast(reinterpret_cast(mGpuMaterialBuffer.getDevicePtr()) + dirtyId * elemSize); desc.bytes = elemSize; mDirtyMaterialMap.reset(dirtyId); //Now we loop to try and find adjacent bits that are set... PxU32 currIdx = dirtyId + 1; PxU32 groupSize = 1; while (currIdx <= lastSetBit && mDirtyMaterialMap.test(currIdx) && (groupSize < maxGrouping)) { groupSize++; mDirtyMaterialMap.reset(currIdx); currIdx++; desc.bytes += elemSize; } if (currIdx != (dirtyId + 1)) { //get the word from the current bit w = PxMin(currIdx, lastSetBit) >> 5; //reload the world b = bits[w]; //Set a 1 here to make sure the b &= (b-1) in the for loop doesn't remove the current bit we're interested in } else { b &= (b - 1); } copyManager.pushDeferredHtoD(desc); } } } mDirtyMaterialMap.clear(); } //////////////////////////////////////////////////////////////////////////////////////////// PxgFEMSoftBodyMaterialManager::PxgFEMSoftBodyMaterialManager(PxgHeapMemoryAllocatorManager* heapManager) : PxgFEMMaterialManager(heapManager, sizeof(PxsDeformableVolumeMaterialData)) { }