// Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of NVIDIA CORPORATION nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved. #include "common/PxProfileZone.h" #include "PxgConstraintPartition.h" #include "PxcNpWorkUnit.h" #include "PxsContactManager.h" #include "PxsContactManagerState.h" #include "PxgConstraintPrep.h" #include "PxvNphaseImplementationContext.h" #include "PxgBodySimManager.h" #include "PxgJointManager.h" #include "PxvDynamics.h" #include "CmFlushPool.h" using namespace physx; // PT: unfortunately these don't seem to be just an optimization, several UTs fail if we disable them // ===> which means things will fail when the buffers in the body sim manager are full #define ARTIC_STATIC_EDGES_INTERNAL_SOLVER 1 #define ARTIC_SELF_CONSTRAINT_SOLVER 1 #define RIGID_STATIC_EDGE_SOLVER 1 // PT: getDestroyedContactEdgeIndices() is only called after updateIncrementalIslands(), which resets // mDestroyedContactEdgeIndices. So it looks like any recorded data in that array before that point is never consumed. #define RECORD_DESTROYED_EDGES_IN_FOUND_LOST_PASSES 0 #define USE_FINE_GRAINED_PROFILE_ZONES 0 // PT: sanity check - run default versions static const bool gRunDefaultVersion = false; ///// // PT: note that inside PartitionEdgeManager we reuse PartitionEdge::mNextPatch for a linked-list of free // edges but it has nothing to do with contact patches and we could be using another place in PartitionEdge. PartitionEdgeManager::PartitionEdgeManager() : mFreeEdges(NULL), mEdgeCount(0) { } PartitionEdgeManager::~PartitionEdgeManager() { const PxU32 size = mMemory.size(); for(PxU32 a=0; a~PartitionEdgeSlab(); void* memory = mMemory[a]; PX_FREE(memory); } mPartitionEdgeSlabs.clear(); mMemory.clear(); } void PartitionEdgeManager::allocateSlab() { PX_ASSERT(mFreeEdges == NULL); PartitionEdgeSlab* newSlab; { // PT: align slab base address so that an edge doesn't cross a cache line void* memory = PX_ALLOC((sizeof(PartitionEdgeSlab)+32), "PartitionEdgeSlab"); const size_t aligned = (size_t(memory) + 31) & size_t(~31); newSlab = reinterpret_cast(aligned); mPartitionEdgeSlabs.pushBack(newSlab); mMemory.pushBack(memory); } PartitionEdge* edges = &newSlab->mEdges[0]; PxU32 currentIndex = mEdgeCount; edges->mUniqueIndex = currentIndex++; for(PxU32 a = 1; a < SLAB_SIZE; ++a) { edges[a-1].mNextPatch = &edges[a]; edges[a].mUniqueIndex = currentIndex++; } edges[SLAB_SIZE-1].mNextPatch = NULL; mEdgeCount += SLAB_SIZE; mFreeEdges = edges; } PX_FORCE_INLINE PartitionEdge* PartitionEdgeManager::getEdge(IG::EdgeIndex index) { if(mFreeEdges == NULL) allocateSlab(); PX_ASSERT(mFreeEdges != NULL); PartitionEdge* edge = mFreeEdges; mFreeEdges = mFreeEdges->mNextPatch; PX_PLACEMENT_NEW(edge, PartitionEdge(index)); return edge; } PX_FORCE_INLINE void PartitionEdgeManager::putEdge(PartitionEdge* edge) { edge->mNextPatch = mFreeEdges; mFreeEdges = edge; } ///// static PX_FORCE_INLINE void increaseNodeInteractionCounts(PxInt32ArrayPinned& nodeInteractionCountArray, PxNodeIndex nodeIndex1, PxNodeIndex nodeIndex2) { if(nodeIndex1.isValid()) nodeInteractionCountArray[nodeIndex1.index()]++; if(nodeIndex2.isValid()) nodeInteractionCountArray[nodeIndex2.index()]++; } static PX_FORCE_INLINE void increaseNodeInteractionCountsMT(PxInt32ArrayPinned& nodeInteractionCountArray, PxNodeIndex nodeIndex1, PxNodeIndex nodeIndex2) { if(nodeIndex1.isValid()) PxAtomicIncrement(reinterpret_cast(&nodeInteractionCountArray[nodeIndex1.index()])); if(nodeIndex2.isValid()) PxAtomicIncrement(reinterpret_cast(&nodeInteractionCountArray[nodeIndex2.index()])); } static PX_FORCE_INLINE void decreaseNodeInteractionCounts(PxInt32ArrayPinned& nodeInteractionCountArray, PxNodeIndex nodeIndex1, PxNodeIndex nodeIndex2) { if(nodeIndex1.isValid()) nodeInteractionCountArray[nodeIndex1.index()]--; if(nodeIndex2.isValid()) nodeInteractionCountArray[nodeIndex2.index()]--; } static PX_FORCE_INLINE void decreaseNodeInteractionCountsMT(PxInt32ArrayPinned& nodeInteractionCountArray, PxNodeIndex nodeIndex1, PxNodeIndex nodeIndex2) { if(nodeIndex1.isValid()) PxAtomicDecrement(reinterpret_cast(&nodeInteractionCountArray[nodeIndex1.index()])); if(nodeIndex2.isValid()) PxAtomicDecrement(reinterpret_cast(&nodeInteractionCountArray[nodeIndex2.index()])); } ///// static PX_FORCE_INLINE void increaseForceThresholds(const PxcNpWorkUnit& unit, PartitionEdge* edge, PxU32& nbForceThresholds) { if(unit.mFlags & PxcNpWorkUnitFlag::eFORCE_THRESHOLD) { edge->setHasThreshold(); nbForceThresholds++; } } static PX_FORCE_INLINE void increaseForceThresholdsMT(const PxcNpWorkUnit& unit, PartitionEdge* edge, PxU32* nbForceThresholds) { if(unit.mFlags & PxcNpWorkUnitFlag::eFORCE_THRESHOLD) { edge->setHasThreshold(); PxAtomicIncrement(reinterpret_cast(nbForceThresholds)); } } static PX_FORCE_INLINE void decreaseForceThresholds(const PartitionEdge* edge, PxU32& nbForceThresholds) { if(edge->hasThreshold()) { PX_ASSERT(nbForceThresholds); nbForceThresholds--; } } ///// PxgIncrementalPartition::PxgIncrementalPartition(const PxVirtualAllocator& allocator, PxU32 maxNumPartitions, PxU64 contextID) : mNodeCount(0), mNbContactBatches(0), mNbConstraintBatches(0), mNbPartitions(0), mTotalContacts(0), mTotalConstraints(0), mTotalArticulationContacts(0), mTotalArticulationConstraints(0), mMaxSlabCount(0), mNbForceThresholds(0), mPartitionIndexArray(allocator), mPartitionNodeArray(allocator), mSolverConstants(allocator), mNodeInteractionCountArray(allocator), mDestroyedContactEdgeIndices(allocator), mStartSlabPerPartition(allocator), mArticStartSlabPerPartition(allocator), mNbJointsPerPartition(allocator), mNbArtiJointsPerPartition(allocator), mCSlab(maxNumPartitions), mContextID(contextID) { mPartitionSlabs.pushBack(PX_NEW(PartitionSlab)); } PxgIncrementalPartition::~PxgIncrementalPartition() { { const PxU32 nbBatches = mBatches.size(); for(PxU32 i=0;i mNodeCount) { nodeCount = PxMax(nodeCount, 2u * mNodeCount); const PxU32 nbSlabs = mPartitionSlabs.size(); for(PxU32 a = 0; a < nbSlabs; ++a) { PartitionSlab* slab = mPartitionSlabs[a]; // PT: this can use up quite a bit of memory: // nb slabs * nb objects * (PXG_BATCH_SIZE * sizeof(NodeEntryStorage) + bitmask) slab->mNodeBitmap.resize(nodeCount); slab->mNodeEntries.resize(nodeCount); } //mNodeInteractionCountArray.resize(sizeof(PxU32), nodeCount); mNodeInteractionCountArray.reserve(nodeCount); mNodeInteractionCountArray.forceSize_Unsafe(nodeCount); PxMemZero(mNodeInteractionCountArray.begin() + mNodeCount, (nodeCount - mNodeCount)*sizeof(PxU32)); mNodeCount = nodeCount; } } void PxgIncrementalPartition::getPreviousAndNextReferencesInSlab(NodeEntryDecoded& prev, NodeEntryDecoded& next, PxU32 index, PxU32 uniqueId, const PartitionSlab* slab, PxU32 slabMask) const { PX_ASSERT(slabMask); PxU32 partitionId = mPartitionIndexArray[uniqueId].mPartitionIndex; #if PX_DEBUG { // PT: checks that the passed data we already had matches the data we previously computed in this function const PxU32 slabId = partitionId/PXG_BATCH_SIZE; PX_ASSERT(slab == mPartitionSlabs[slabId]); PX_ASSERT(slabMask == mPartitionSlabs[slabId]->mNodeBitmap[index]); } #endif // PT: I think that AND gets back the id between 0 and 31 that we started from when writing mPartitionIndex in the // first place, from "baseId + id". We have to do it this way to reuse that function for both the add & remove cases. // In the add case we could directly pass the proper partitionId to the function. const PxU32 partitionMask = PXG_BATCH_SIZE - 1; partitionId = partitionId & partitionMask; // PT: say we are in partition 3. // partitionBit = 1 << 3 = 8 = 00001000 // maskPrev = 8 - 1 = 7 = 00000111 // maskNext = ~(16 - 1) = ~15 = ~00001111 = 11110000 // slabMask marks which partitions are used for this node. // Say the mask is 10101010 (pretending we only have 8 partitions). // bitMaskPrev = 10101010 & 00000111 = xxxxX010 <= we clear out the bits before the partition X we're in // bitMaskNext = 10101010 & 11110000 = 1010Xxxx <= we clear out the bits after the partition X we're in // Ignoring the case where the results are 0, we find: // idprev = PxHighestSetBit(bitMaskPrev) = 1 (highest because we cleared out the top bits) // idnext = PxLowestSetBit(bitMaskNext) = 5 (lowest because we cleared out the bottom bits) // Then we fetch the edges corresponding to these indices. They are the previous and next edges // that will reference the same node (whose slabMask we used). // // So the problem if we want to MT this is that at the same time another edge can be processed, that // references the same node, fetches the same slabMask, updates the same slabMask, then writes itself // to the mEdges array for that node. In one thread the mask was 0 and the edge entry null, while in // another thread that mask was updated to 1 and the edge entry written out. const PxU32 partitionBit = 1u << partitionId; //const PxU32 maskPrev = ((1u<<(partitionId))-1u); const PxU32 maskPrev = partitionBit - 1u; //const PxU32 maskNext = partitionId == partitionMask ? 0 : ~((1u<<(partitionId+1))-1u); const PxU32 maskNext = partitionId == partitionMask ? 0 : ~(partitionBit + partitionBit - 1u); PxU32 bitMaskPrev = slabMask & maskPrev; PxU32 bitMaskNext = slabMask & maskNext; bitMaskPrev = bitMaskPrev == 0 ? slabMask : bitMaskPrev; bitMaskNext = bitMaskNext == 0 ? slabMask : bitMaskNext; PX_ASSERT(bitMaskPrev != 0); PX_ASSERT(bitMaskNext != 0); const PxU32 idprev = PxHighestSetBit(bitMaskPrev); const PxU32 idnext = PxLowestSetBit(bitMaskNext); #if STORE_INDICES_IN_NODE_ENTRIES #if STORE_EDGE_DATA_IN_NODE_ENTRIES const NodeEntryStorage* src = slab->mNodeEntries[index].mEdges; prev = src[idprev]; next = src[idnext]; #else const PxU32* src = slab->mNodeEntries[index].mEdges; prev = mEdgeManager.getPartitionEdge(src[idprev]); next = mEdgeManager.getPartitionEdge(src[idnext]); #endif #else prev = slab->mNodeEntries[index].mEdges[idprev]; next = slab->mNodeEntries[index].mEdges[idnext]; #endif } // PT: this function does the actual edge coloring + maintain a linked list of partition edges in PartitionNodeData void PxgIncrementalPartition::addEdgeInternal(const PartitionEdge* PX_RESTRICT partitionEdge, PartitionSlab* PX_RESTRICT slab, PxU16 id, PxU16 baseId) { const PxU32 uniqueId = partitionEdge->mUniqueIndex; //Insert this edge into this partition!!!! mPartitionIndexArray[uniqueId].mPartitionIndex = PxU16(baseId + id); slab->mPartitions[id].addToPartition(uniqueId, mPartitionIndexArray[uniqueId]); const PxNodeIndex node0 = partitionEdge->mNode0; const PxNodeIndex node1 = partitionEdge->mNode1; const PxU32 node0Index = node0.index(); const PxU32 node1Index = node1.index(); PxU32 slabMask0 = 0; if(!partitionEdge->hasInfiniteMass0()) { slabMask0 = slab->mNodeBitmap[node0Index] | (1 << id); slab->mNodeBitmap[node0Index] = slabMask0; #if STORE_INDICES_IN_NODE_ENTRIES #if STORE_EDGE_DATA_IN_NODE_ENTRIES slab->mNodeEntries[node0Index].mEdges[id].mUniqueIndex = uniqueId; slab->mNodeEntries[node0Index].mEdges[id].mNode0Index = node0Index; #else slab->mNodeEntries[node0Index].mEdges[id] = uniqueId; #endif #else slab->mNodeEntries[node0Index].mEdges[id] = partitionEdge; #endif } PxU32 slabMask1 = 0; if(!partitionEdge->hasInfiniteMass1()) { slabMask1 = slab->mNodeBitmap[node1Index] | (1 << id); slab->mNodeBitmap[node1Index] = slabMask1; #if STORE_INDICES_IN_NODE_ENTRIES #if STORE_EDGE_DATA_IN_NODE_ENTRIES slab->mNodeEntries[node1Index].mEdges[id].mUniqueIndex = uniqueId; slab->mNodeEntries[node1Index].mEdges[id].mNode0Index = node0Index; #else slab->mNodeEntries[node1Index].mEdges[id] = uniqueId; #endif #else slab->mNodeEntries[node1Index].mEdges[id] = partitionEdge; #endif } // PT: builds data used in constraintContactBlockPrePrepLaunch / constraint1DBlockPrePrepLaunch PartitionNodeData& nodeData = mPartitionNodeArray[uniqueId]; // PT: // The edge was just added to partition P. // The edge involves node0 and node1. // Each node has a bitmask encoding which partitions the node is involved in. // We just updated that bitmask with the bit from partition P. So that bitmask cannot be 0 here. // // getPreviousAndNextReferencesInSlab() looks for the previous and next bits / partitions // involving the same node. Each of these bit was set by / corresponds to another partition edge, // that we actually store in mNodeEntries. We have 32 edges in mNodeEntries for each node, so this // is effectively a redundant "copy" of the bitmask, except each bit is an explicit partition edge pointer. // // mPartitionNodeArray stores, for each edge, which nodes are involved in it, and which are the next // partition edges involving the same nodes. So this is a linked list of partition edges. if(!partitionEdge->hasInfiniteMass0()) { NodeEntryDecoded prevEdge; NodeEntryDecoded nextEdge; getPreviousAndNextReferencesInSlab(prevEdge, nextEdge, node0Index, uniqueId, slab, slabMask0); // PT: // Here for example, we retrieved the previous and next edges that contain node0 in this slab. // So we know that: // - either prevEdge.mNode0Index or prevEdge.mNode1Index will be equal to node0Index // - either nextEdge.mNode0Index or nextEdge.mNode1Index will be equal to node0Index // // We have to setup PartitionNodeData for the current edge. We only have a single LL here so we // have to 1) setup the next indices for the current PartitionNodeData, and 2) update the next indices // of the previous PartitionNodeData in the LL. // // 1) We know that the next edge is nextEdge. We are dealing with node0 so we're going to set mNextIndex[0]. // The only question is whether node0 is the first or second node in nextEdge. We use |1 for the second case. // // 2) We know that the previous edge is prevEdge. We know that either its node0 or its node1 will // be our node (node0). In the first case we must update the previous node's mNextIndex[0] (the LL for // the previous node's node0). Otherwise mNextIndex[1]. Either way our own address is uniqueId, and we // don't use |1 because node0 is not the second node for us (i.e. for nodeData). // // Questions remain: // - why do we need to build this LL? What is is needed for in the solver? // - what happens for the first edge, i.e. there is no prev / next ? // - do we really need to fetch the next edge? Can't we just copy the mNextIndex from previous edge? // => I think it's just so that it also works for the first edges indeed const PxU32 dstIndex = node0Index == getNode0Index(prevEdge) ? 0 : 1; mPartitionNodeArray[getUniqueId(prevEdge)].mNextIndex[dstIndex] = uniqueId << 1; const PxU32 data = getUniqueId(nextEdge) << 1; nodeData.mNextIndex[0] = getNode0Index(nextEdge) == node0Index ? data : data|1; } else { nodeData.mNextIndex[0] = (uniqueId<<1); } if(!partitionEdge->hasInfiniteMass1()) { NodeEntryDecoded prevEdge; NodeEntryDecoded nextEdge; getPreviousAndNextReferencesInSlab(prevEdge, nextEdge, node1Index, uniqueId, slab, slabMask1); // PT: // Same as for node0, the only notable difference is that node1 will now be the second entry // for the LL data of previous edge, so we have an extra |1 to add. const PxU32 dstIndex = node1Index == getNode0Index(prevEdge) ? 0 : 1; mPartitionNodeArray[getUniqueId(prevEdge)].mNextIndex[dstIndex] = (uniqueId << 1)|1; const PxU32 data = getUniqueId(nextEdge) << 1; nodeData.mNextIndex[1] = getNode0Index(nextEdge) == node1Index ? data : data|1; } else { nodeData.mNextIndex[1] = (uniqueId<<1)|1; } // PT: TODO: try to rebuild this LL in a second pass, all the links at once instead of incrementally? // We could also multi-thread that part but because the same node appears twice in different edges we would probably // get some false sharing when updating the same PartitionNodeData from 2 different threads at the same time. } // PT: this function does the actual edge coloring + maintain a linked list of partition edges in PartitionNodeData void PxgIncrementalPartition::removeEdgeInternal(PartitionSlab* PX_RESTRICT slab, const PartitionEdge* PX_RESTRICT edge, PxU32 id) { const PxU32 uniqueId = edge->mUniqueIndex; slab->mPartitions[id].removeFromPartition(uniqueId, mPartitionIndexArray); const PxU32 node0Index = edge->mNode0.index(); const PxU32 node1Index = edge->mNode1.index(); PxU32 slabMask0 = 0; if(!edge->hasInfiniteMass0()) { slabMask0 = slab->mNodeBitmap[node0Index] & (~(1<mNodeBitmap[node0Index] = slabMask0; resetNodeEntryStorage(slab->mNodeEntries[node0Index].mEdges[id]); // PT: this is not really needed. We won't read it if the mask is not set (and we don't initialize the data) } PxU32 slabMask1 = 0; if(!edge->hasInfiniteMass1()) { slabMask1 = slab->mNodeBitmap[node1Index] & (~(1<mNodeBitmap[node1Index] = slabMask1; resetNodeEntryStorage(slab->mNodeEntries[node1Index].mEdges[id]); // PT: this is not really needed. We won't read it if the mask is not set (and we don't initialize the data) } if(slabMask0 && !edge->hasInfiniteMass0()) { NodeEntryDecoded prevEdge; NodeEntryDecoded nextEdge; getPreviousAndNextReferencesInSlab(prevEdge, nextEdge, node0Index, uniqueId, slab, slabMask0); const PxU32 prevUniqueId = getUniqueId(prevEdge); const PxU32 nextUniqueId = getUniqueId(nextEdge); const PxU32 data = nextUniqueId << 1; const bool cndt = node0Index == getNode0Index(nextEdge); const PxU32 dstIndex = node0Index == getNode0Index(prevEdge) ? 0 : 1; mPartitionNodeArray[prevUniqueId].mNextIndex[dstIndex] = cndt ? data : data|1; } if(slabMask1 && !edge->hasInfiniteMass1()) { NodeEntryDecoded prevEdge; NodeEntryDecoded nextEdge; getPreviousAndNextReferencesInSlab(prevEdge, nextEdge, node1Index, uniqueId, slab, slabMask1); const PxU32 prevUniqueId = getUniqueId(prevEdge); const PxU32 nextUniqueId = getUniqueId(nextEdge); const PxU32 data = nextUniqueId << 1; const bool cndt = node1Index == getNode0Index(nextEdge); const PxU32 dstIndex = node1Index == getNode0Index(prevEdge) ? 0 : 1; mPartitionNodeArray[prevUniqueId].mNextIndex[dstIndex] = cndt ? data : data|1; } } bool PxgIncrementalPartition::addJointManager(const PartitionEdge* edge, PxgBodySimManager& bodySimManager) { #if USE_FINE_GRAINED_PROFILE_ZONES PX_PROFILE_ZONE("PxgIncrementalPartition::addJointManager", mContextID); #endif PX_UNUSED(bodySimManager); const PxU32 uniqueId = edge->mUniqueIndex; #if RIGID_STATIC_EDGE_SOLVER if (!edge->isArticulation0() && edge->mNode1.isStaticBody()) return bodySimManager.addStaticRBJoint(uniqueId, edge->mNode0); #endif #if ARTIC_STATIC_EDGES_INTERNAL_SOLVER || ARTIC_SELF_CONSTRAINT_SOLVER if (edge->isArticulation0()) { #if ARTIC_SELF_CONSTRAINT_SOLVER if (edge->mNode0.index() == edge->mNode1.index()) return bodySimManager.addSelfArticulationJoint(uniqueId, edge->mNode0, edge->mNode1); #endif #if ARTIC_STATIC_EDGES_INTERNAL_SOLVER if (edge->hasInfiniteMass1()) return bodySimManager.addStaticArticulationJoint(uniqueId, edge->mNode0); //Add to articulation 0 #endif } #if ARTIC_STATIC_EDGES_INTERNAL_SOLVER else if (edge->isArticulation1() && edge->hasInfiniteMass0()) return bodySimManager.addStaticArticulationJoint(uniqueId, edge->mNode1); #endif #endif return false; } static PX_FORCE_INLINE PxgIncrementalPartition::SpecialCase isSpecialCase(const PartitionEdge* edge) { PX_UNUSED(edge); #if RIGID_STATIC_EDGE_SOLVER if (!edge->isArticulation0()) { if(edge->hasInfiniteMass1()) return PxgIncrementalPartition::SPECIAL_CASE_STATIC_RB; } #endif #if ARTIC_STATIC_EDGES_INTERNAL_SOLVER || ARTIC_SELF_CONSTRAINT_SOLVER if (edge->isArticulation0()) { #if ARTIC_SELF_CONSTRAINT_SOLVER if (edge->mNode0.index() == edge->mNode1.index()) return PxgIncrementalPartition::SPECIAL_CASE_ARTI_SELF; #endif #if ARTIC_STATIC_EDGES_INTERNAL_SOLVER if (edge->hasInfiniteMass1()) return PxgIncrementalPartition::SPECIAL_CASE_STATIC_ARTI0; #endif } #if ARTIC_STATIC_EDGES_INTERNAL_SOLVER else if (edge->isArticulation1() && edge->hasInfiniteMass0()) return PxgIncrementalPartition::SPECIAL_CASE_STATIC_ARTI1; #endif #endif return PxgIncrementalPartition::SPECIAL_CASE_NONE; } static PX_FORCE_INLINE bool addSpecialCaseContactManager(const PxgIncrementalPartition::Part2WorkItem& item, PxgBodySimManager& manager) { PX_UNUSED(item); PX_UNUSED(manager); #if RIGID_STATIC_EDGE_SOLVER if(item.mSpecialCase == PxgIncrementalPartition::SPECIAL_CASE_STATIC_RB) return manager.addStaticRBContactManager(item.mPartitionEdge->mUniqueIndex, item.mPartitionEdge->mNode0); #endif #if ARTIC_STATIC_EDGES_INTERNAL_SOLVER || ARTIC_SELF_CONSTRAINT_SOLVER #if ARTIC_SELF_CONSTRAINT_SOLVER if(item.mSpecialCase == PxgIncrementalPartition::SPECIAL_CASE_ARTI_SELF) return manager.addSelfArticulationContactManager(item.mPartitionEdge->mUniqueIndex, item.mPartitionEdge->mNode0, item.mPartitionEdge->mNode1); #endif #if ARTIC_STATIC_EDGES_INTERNAL_SOLVER if(item.mSpecialCase == PxgIncrementalPartition::SPECIAL_CASE_STATIC_ARTI0) return manager.addStaticArticulationContactManager(item.mPartitionEdge->mUniqueIndex, item.mPartitionEdge->mNode0); if(item.mSpecialCase == PxgIncrementalPartition::SPECIAL_CASE_STATIC_ARTI1) return manager.addStaticArticulationContactManager(item.mPartitionEdge->mUniqueIndex, item.mPartitionEdge->mNode1); #endif #endif return false; } bool PxgIncrementalPartition::addContactManager(PartitionEdge* edge, const PxcNpWorkUnit& unit, PxgBodySimManager& manager) { #if USE_FINE_GRAINED_PROFILE_ZONES PX_PROFILE_ZONE("PxgIncrementalPartition::addContactManager", mContextID); #endif PX_UNUSED(manager); edge->setIsContact(); // PT: TODO: this could have been setup in the ctor directly increaseForceThresholds(unit, edge, mNbForceThresholds); #if RIGID_STATIC_EDGE_SOLVER if (!edge->isArticulation0()) { if(edge->hasInfiniteMass1()) // PT: covers both statics & kinematics return manager.addStaticRBContactManager(edge->mUniqueIndex, edge->mNode0); } #endif #if ARTIC_STATIC_EDGES_INTERNAL_SOLVER || ARTIC_SELF_CONSTRAINT_SOLVER if (edge->isArticulation0()) { #if ARTIC_SELF_CONSTRAINT_SOLVER if (edge->mNode0.index() == edge->mNode1.index()) return manager.addSelfArticulationContactManager(edge->mUniqueIndex, edge->mNode0, edge->mNode1); #endif #if ARTIC_STATIC_EDGES_INTERNAL_SOLVER if (edge->hasInfiniteMass1()) return manager.addStaticArticulationContactManager(edge->mUniqueIndex, edge->mNode0); //Add to articulation 0 #endif } #if ARTIC_STATIC_EDGES_INTERNAL_SOLVER else if (edge->isArticulation1() && edge->hasInfiniteMass0()) return manager.addStaticArticulationContactManager(edge->mUniqueIndex, edge->mNode1); #endif #endif return false; } #if PX_PARTITION_COMPACTION // PT: TODO: consider also passing node indices static PX_FORCE_INLINE void updateDirtyNodeBitmap(PxBitMap& isDirtyNode, const PartitionEdge* edge, PxU32 hasInfiniteMass0, PxU32 hasInfiniteMass1, bool selfConstraint) { if(!selfConstraint) { if(!hasInfiniteMass0) { const PxU32 index = edge->mNode0.index(); if(!isDirtyNode.test(index)) isDirtyNode.set(index); } if(!hasInfiniteMass1) { const PxU32 index = edge->mNode1.index(); if(!isDirtyNode.test(index)) isDirtyNode.set(index); } } } #else static PX_FORCE_INLINE void updateDirtyNodeBitmap(PxBitMap&, const PartitionEdge*, PxU32, PxU32, bool) {} #endif static PX_FORCE_INLINE bool removeSpecialHandled(PartitionEdge* edge, PxU32 uniqueId, PxgBodySimManager& manager, PxU32 hasInfiniteMass0, PxU32 hasInfiniteMass1) { PX_UNUSED(hasInfiniteMass0); PX_UNUSED(hasInfiniteMass1); PX_UNUSED(manager); PX_UNUSED(uniqueId); #if ARTIC_STATIC_EDGES_INTERNAL_SOLVER || ARTIC_SELF_CONSTRAINT_SOLVER || RIGID_STATIC_EDGE_SOLVER bool selfConstraint = false; PX_ASSERT(edge->isSpecialHandled()); const PxU32 isArticulation0 = edge->isArticulation0(); const PxU32 isArticulation1 = edge->isArticulation1(); bool specialHandled = false; const PxU32 isContact = edge->isContact(); #if RIGID_STATIC_EDGE_SOLVER if (!isArticulation0 && !isArticulation1) { if(hasInfiniteMass1) { if (isContact) specialHandled = manager.removeStaticRBContactManager(uniqueId, edge->mNode0); else specialHandled = manager.removeStaticRBJoint(uniqueId, edge->mNode0); } } else #endif #if ARTIC_STATIC_EDGES_INTERNAL_SOLVER || ARTIC_SELF_CONSTRAINT_SOLVER #if ARTIC_SELF_CONSTRAINT_SOLVER if (edge->mNode0.index() == edge->mNode1.index()) { selfConstraint = true; if (!isContact) specialHandled = manager.removeSelfArticulationJoint(uniqueId, edge->mNode0, edge->mNode1); else specialHandled = manager.removeSelfArticulationContactManager(uniqueId, edge->mNode0, edge->mNode1); } else #endif #if ARTIC_STATIC_EDGES_INTERNAL_SOLVER if (isArticulation0 && hasInfiniteMass1) { if (!isContact) specialHandled = manager.removeStaticArticulationJoint(uniqueId, edge->mNode0); else specialHandled = manager.removeStaticArticulationContactManager(uniqueId, edge->mNode0); } else if (isArticulation1 && hasInfiniteMass0) { if (!isContact) specialHandled = manager.removeStaticArticulationJoint(uniqueId, edge->mNode1); else specialHandled = manager.removeStaticArticulationContactManager(uniqueId, edge->mNode1); } #endif #endif PX_ASSERT(specialHandled); PX_UNUSED(specialHandled); #endif return selfConstraint; } void PxgIncrementalPartition::removeEdge(PartitionEdge* edge, IG::GPUExternalData& islandSimGpuData, PxgBodySimManager& manager) { #if USE_FINE_GRAINED_PROFILE_ZONES PX_PROFILE_ZONE("PxgIncrementalPartition::removeEdge", mContextID); #endif PX_UNUSED(manager); decreaseForceThresholds(edge, mNbForceThresholds); // PT: TODO: could we encode the proper bucket in the edge when we add it into the system, and skip all the tests? const PxU32 hasInfiniteMass0 = edge->hasInfiniteMass0(); const PxU32 hasInfiniteMass1 = edge->hasInfiniteMass1(); const PxU32 uniqueId = edge->mUniqueIndex; // PT: in this new design we encode in the partition edge itself whether it's "special handled" or not. We recorded the real state rather than the // theoretical state: an edge can be a special case in theory, but a regular case in practice, when the PxgBodySimManager/PxgJointManager are full. // The benefits are the following: // - there cannot be a mismatch anymore between the add & remove parts. Potentially the "add" part could have added the edge to the external buffers // (i.e. it's "special handled") but the "remove" part could have failed, making the code fallback to the regular case. This would break the edge // coloring since we would remove an edge that would not have taken the regular codepath before. This mismatch is not possible anymore. // - the code immediately knows whether an edge is special or not, without doing all the tests to determine in which bucket it falls. That's faster. // - we know the state ahead of time, so it makes multithreading easier. bool selfConstraint = false; if(edge->isSpecialHandled()) { selfConstraint = removeSpecialHandled(edge, uniqueId, manager, hasInfiniteMass0, hasInfiniteMass1); } else { PxU32 id = mPartitionIndexArray[uniqueId].mPartitionIndex; const PxU32 slabId = id / PXG_BATCH_SIZE; PartitionSlab* slab = mPartitionSlabs[slabId]; PxU32 baseId = PXG_BATCH_SIZE*slabId; id -= baseId; PX_ASSERT(slab); removeEdgeInternal(slab, edge, id); } { const IG::EdgeIndex edgeIndex = edge->getEdgeIndex(); const PartitionEdge* pEdge = islandSimGpuData.getFirstPartitionEdge(edgeIndex); if (pEdge == edge) islandSimGpuData.setFirstPartitionEdge(edgeIndex, edge->mNextPatch); } updateDirtyNodeBitmap(mIsDirtyNode, edge, hasInfiniteMass0, hasInfiniteMass1, selfConstraint); // PT: no need to reset the contact bit, it will be cleared on recycling mEdgeManager.putEdge(edge); } static PX_FORCE_INLINE PxIntBool isKinematic(const IG::IslandSim& islandSim, PxNodeIndex nodeIndex) { PxIntBool infinite = true; if(nodeIndex.isValid()) { // PT: TODO: pretty bad here to access one cache line just to read one bit const IG::Node& node = islandSim.getNode(nodeIndex); infinite = node.isKinematic(); } return infinite; } // PT: this function does multiple things: // 1) allocate a new PartitionEdge from the edge manager // 2) resize/allocate internal edge data buffers (mPartitionIndexArray, mNpIndexArray, mPartitionNodeArray, mSolverConstants) // 3) initialize some of the data for the newly allocated PartitionEdge // 4) initialize some of the data for the newly allocated PartitionIndexData entry in mPartitionIndexArray // 5) initialize some of the data for the newly allocated PartitionNodeData entry in mPartitionNodeArray // 6) initialize the newly allocated entry in mNpIndexArray // 7) initialize the newly allocated entry in mSolverConstants PartitionEdge* PxgIncrementalPartition::addEdge_Stage1(const IG::IslandSim& islandSim, IG::EdgeIndex edgeIndex, PxU32 patchIndex, PxU32 npIndex, PxNodeIndex node1, PxNodeIndex node2) { #if USE_FINE_GRAINED_PROFILE_ZONES PX_PROFILE_ZONE("PxgIncrementalPartition::addEdge_Stage1", mContextID); #endif /////////////////////////////////////////////////////////////////////////// // PT: 1) allocate a new PartitionEdge from the edge manager PartitionEdge* partitionEdge = mEdgeManager.getEdge(edgeIndex); /////////////////////////////////////////////////////////////////////////// // PT: 2) resize/allocate internal edge data buffers (mPartitionIndexArray, mNpIndexArray, mPartitionNodeArray, mSolverConstants) const PxU32 count = mEdgeManager.getEdgeCount(); if (count >= mPartitionIndexArray.capacity()) { PX_PROFILE_ZONE("ResizeEdgeBuffer", mContextID); const PxU32 newSize = PxMax(count, mPartitionIndexArray.capacity() * 2); mPartitionIndexArray.reserve(newSize); mNpIndexArray.reserve(newSize); mPartitionNodeArray.reserve(newSize); } if (count >= mPartitionIndexArray.size()) { mPartitionIndexArray.resizeUninitialized(count); mNpIndexArray.resizeUninitialized(count); mPartitionNodeArray.resizeUninitialized(count); } if(count >= mSolverConstants.capacity()) { const PxU32 newSize = PxMax(count, mSolverConstants.capacity() * 2); mSolverConstants.resize(newSize); //mSolverConstants.resizeUninitialized(newSize); // PT: TODO: one of the dup code was using resize, the other one resizeUninitialized. Investigate if it makes a difference. } /////////////////////////////////////////////////////////////////////////// // PT: 3) initialize some of the data for the newly allocated PartitionEdge partitionEdge->mNode0 = node1; partitionEdge->mNode1 = node2; if(isKinematic(islandSim, node1)) partitionEdge->setInfiniteMass0(); if(isKinematic(islandSim, node2)) partitionEdge->setInfiniteMass1(); /////////////////////////////////////////////////////////////////////////// // PT: 4) initialize some of the data for the newly allocated PartitionIndexData entry in mPartitionIndexArray const PxU32 uniqueId = partitionEdge->mUniqueIndex; PartitionIndexData& indexData = mPartitionIndexArray[uniqueId]; indexData.mPatchIndex = PxTo8(patchIndex); const PxU8 articulationOffset = node1.isArticulation() || node2.isArticulation() ? 2 : 0; const PxU32 implicitEdgeType = npIndex == 0xffffffff ? IG::Edge::EdgeType::eCONSTRAINT : IG::Edge::EdgeType::eCONTACT_MANAGER; indexData.mCType = PxU8(implicitEdgeType) + articulationOffset; /////////////////////////////////////////////////////////////////////////// // PT: 5) initialize some of the data for the newly allocated PartitionNodeData entry in mPartitionNodeArray PartitionNodeData& nodeData = mPartitionNodeArray[uniqueId]; nodeData.mNodeIndex0 = node1; nodeData.mNodeIndex1 = node2; /////////////////////////////////////////////////////////////////////////// // PT: 6) initialize the newly allocated entry in mNpIndexArray mNpIndexArray[uniqueId] = npIndex; /////////////////////////////////////////////////////////////////////////// // PT: 7) initialize the newly allocated entry in mSolverConstants mSolverConstants[uniqueId].mEdgeIndex = edgeIndex; // PT: as far as I can tell mConstraintWriteBackIndex remains uninitialized / unused for contact managers! return partitionEdge; } static PX_FORCE_INLINE void updatePartitionEdgeLinkedListHead(IG::GPUExternalData& islandSimGpuData, IG::EdgeIndex edgeIndex, PartitionEdge* partitionEdge) { partitionEdge->mNextPatch = islandSimGpuData.getFirstPartitionEdge(edgeIndex); islandSimGpuData.setFirstPartitionEdge(edgeIndex, partitionEdge); } // PT: this function does multiple things: // 1) the actual edge coloring for the new edge, i.e. figuring out in which partition it goes. This touches a bunch of new internal buffers. // 2) initialize the rest of the data for the newly allocated PartitionNodeData entry in mPartitionNodeArray. This is a linked-list of edges // related to the edge coloring bits. // 2b) initialize the rest of the data for the newly allocated PartitionIndexData entry in mPartitionIndexArray. // 3) initialize another linked-list for patches, the one first seen in the island manager where the data is actually stored. void PxgIncrementalPartition::addEdge_Stage2(IG::GPUExternalData& islandSimGpuData, IG::EdgeIndex edgeIndex, PartitionEdge* partitionEdge, bool specialHandled, bool doPart1, bool doPart2) { #if USE_FINE_GRAINED_PROFILE_ZONES PX_PROFILE_ZONE("PxgIncrementalPartition::addEdge_Stage2", mContextID); #endif if(doPart1) { if (!specialHandled) { //Now place this edge in an appropriate slab!!!! const PxNodeIndex node1 = partitionEdge->mNode0; const PxNodeIndex node2 = partitionEdge->mNode1; const PxU32 hasInfiniteMass0 = partitionEdge->hasInfiniteMass0(); const PxU32 hasInfiniteMass1 = partitionEdge->hasInfiniteMass1(); const PxU32 index1 = node1.index(); const PxU32 index2 = node2.index(); bool success = false; const PxU32 nbSlabs = mPartitionSlabs.size(); for (PxU32 a = 0; a < nbSlabs; ++a) { PartitionSlab* slab = mPartitionSlabs[a]; PxU32 map = 0; // This map encodes in which of the 32 partitions of this slab either of the rigids/articulations is already present if (!hasInfiniteMass0) map = slab->mNodeBitmap[index1]; if (!hasInfiniteMass1) map |= slab->mNodeBitmap[index2]; if (map != 0xFFFFFFFF) { const PxU32 baseId = a*PXG_BATCH_SIZE; const PxU32 id = PxLowestSetBit(~map); success = true; addEdgeInternal(partitionEdge, slab, PxTo16(id), PxTo16(baseId)); if ((id + baseId) == mMaxSlabCount) mMaxSlabCount = id + baseId + 1; break; } } if (!success) { PX_PROFILE_ZONE("NewPartitionSlab", mContextID); PartitionSlab* slab = PX_NEW(PartitionSlab); slab->mNodeBitmap.resize(mNodeCount); slab->mNodeEntries.resize(mNodeCount); mPartitionSlabs.pushBack(slab); const PxU32 baseId = PXG_BATCH_SIZE * (mPartitionSlabs.size() - 1); addEdgeInternal(partitionEdge, slab, 0, PxTo16(baseId)); if (baseId == mMaxSlabCount) mMaxSlabCount = baseId + 1; } } else partitionEdge->setSpecialHandled(); } if(doPart2) updatePartitionEdgeLinkedListHead(islandSimGpuData, edgeIndex, partitionEdge); } //static bool containedInDestroyedEdges(PxsContactManager* /*manager*/, PartitionEdge** /*destroyedEdges*/, const PxU32 /*destroyedEdgeCount*/) //{ // return false; //} static PX_FORCE_INLINE bool isLostPatch(const PxsContactManagerOutputCounts& output) { if(output.prevPatches >= output.nbPatches) return true; if(!output.nbPatches && output.prevPatches) return true; return false; } static PX_FORCE_INLINE bool isFoundPatch(const PxsContactManagerOutputCounts& output) { return output.prevPatches < output.nbPatches; } namespace { class ProcessPatchesTask : public Cm::Task { PX_NOCOPY(ProcessPatchesTask) public: PxgIncrementalPartition& mIncrementalPartition; IG::IslandSim& mIslandSim; PxsContactManager** mLostFoundPatchManagers; PxU32 mNbLostFoundPatchManagers; const PxsContactManagerOutputCounts* mLostFoundPairOutputs; PxgBodySimManager& mBodySimManager; PxgJointManager& mJointManager; ProcessPatchesTask( PxU64 contextID, PxgIncrementalPartition& partition, IG::IslandSim& islandSim, PxsContactManager** lostFoundPatchManagers, PxU32 nbLostFoundPatchManagers, const PxsContactManagerOutputCounts* lostFoundPairOutputs, PxgBodySimManager& bodySimManager, PxgJointManager& jointManager) : Cm::Task (contextID), mIncrementalPartition (partition), mIslandSim (islandSim), mLostFoundPatchManagers (lostFoundPatchManagers), mNbLostFoundPatchManagers (nbLostFoundPatchManagers), mLostFoundPairOutputs (lostFoundPairOutputs), mBodySimManager (bodySimManager), mJointManager (jointManager) {} virtual const char* getName() const PX_OVERRIDE PX_FINAL { return "PxgIncrementalPartitioning_ProcessPatchesTask"; } virtual void runInternal() PX_OVERRIDE PX_FINAL { mIncrementalPartition.processLostPatches_Reference(mIslandSim, mBodySimManager, mJointManager, mLostFoundPatchManagers, mNbLostFoundPatchManagers, mLostFoundPairOutputs); mIncrementalPartition.processFoundPatches_Reference(mIslandSim, mBodySimManager, mLostFoundPatchManagers, mNbLostFoundPatchManagers, mLostFoundPairOutputs); } }; } void PxgIncrementalPartition::processLostFoundPatches( Cm::FlushPool& flushPool, PxBaseTask* continuation, IG::IslandSim& islandSim, PxgBodySimManager& bodySimManager, PxgJointManager& jointManager, PxsContactManager** lostFoundPatchManagers, PxU32 nbLostFoundPatchManagers, const PxsContactManagerOutputCounts* lostFoundPairOutputs) { #if USE_SPLIT_SECOND_PASS_ISLAND_GEN // PT: this copy is necessary when running postIslandGen in parallel with this function. Specifically // Sc::Scene::setEdgesConnected will call mSimpleIslandManager->setEdgeConnected and mSimpleIslandManager->secondPassIslandGenPart1 // while this is running, and these functions will modify the active contact manager bitmap. { PX_PROFILE_ZONE("PxgIncrementalPartition::copyData", mContextID); PX_ASSERT(islandSim.mGpuData); IG::GPUExternalData& islandSimGpuData = *islandSim.mGpuData; // PT: TODO: use the scratch allocator instead? mActiveCMBitmapCopy.copy(islandSimGpuData.getActiveContactManagerBitmap()); } #endif if(!gRunDefaultVersion) { processLostPatchesMT( islandSim, flushPool, continuation, lostFoundPatchManagers, nbLostFoundPatchManagers, lostFoundPairOutputs, bodySimManager, jointManager); } else { ProcessPatchesTask* task = PX_PLACEMENT_NEW(flushPool.allocate(sizeof(ProcessPatchesTask)), ProcessPatchesTask)(mContextID, *this, islandSim, lostFoundPatchManagers, nbLostFoundPatchManagers, lostFoundPairOutputs, bodySimManager, jointManager); startTask(task, continuation); } } /////////////////////////////////////////////////////////////////////////////// namespace { class PreprocessTask; struct SharedContext { SharedContext( PxgIncrementalPartition& ip, IG::IslandSim& islandSim, Cm::FlushPool& flushPool, PxBaseTask* continuation, PxgBodySimManager& bodySimManager, PxgJointManager& jointManager, const PxsContactManagerOutputCounts* lostFoundPairOutputs, PxsContactManager** lostFoundPatchManagers, PxU32 nbLostFoundPatchManagers) : mIP(ip), mIslandSim(islandSim), mFlushPool(flushPool), mContinuation(continuation), mBodySimManager(bodySimManager), mJointManager(jointManager), mLostFoundPairOutputs(lostFoundPairOutputs), mLostFoundPatchManagers(lostFoundPatchManagers), mNbLostFoundPatchManagers(nbLostFoundPatchManagers), mTaskHead(NULL) { } PxgIncrementalPartition& mIP; IG::IslandSim& mIslandSim; Cm::FlushPool& mFlushPool; PxBaseTask* mContinuation; PxgBodySimManager& mBodySimManager; PxgJointManager& mJointManager; const PxsContactManagerOutputCounts* mLostFoundPairOutputs; PxsContactManager** mLostFoundPatchManagers; const PxU32 mNbLostFoundPatchManagers; PreprocessTask* mTaskHead; }; class PreprocessTask : public Cm::Task { PX_NOCOPY(PreprocessTask) public: SharedContext& mContext; const PxU32 mStartIndex; const PxU32 mNbToProcess; PxgIncrementalPartition::PartitionEdgeBatch* mBatch; PreprocessTask* mNext; PreprocessTask(PxU64 contextID, SharedContext& context, PxU32 startIndex, PxU32 nbToProcess) : Cm::Task(contextID), mContext(context), mStartIndex(startIndex), mNbToProcess(nbToProcess), mBatch(NULL), mNext(NULL) {} virtual const char* getName() const PX_OVERRIDE PX_FINAL { return "PxgIncrementalPartitioning_PreprocessTask"; } virtual void runInternal() PX_OVERRIDE PX_FINAL { PX_ASSERT(mContext.mIslandSim.mGpuData); IG::GPUExternalData& islandSimGpuData = *mContext.mIslandSim.mGpuData; const PxsContactManagerOutputCounts* PX_RESTRICT lostFoundPairOutputs = mContext.mLostFoundPairOutputs; const PxsContactManager*const* PX_RESTRICT lostFoundPatchManagers = mContext.mLostFoundPatchManagers; PxInt32ArrayPinned& PX_RESTRICT nodeInteractionCountArray = mContext.mIP.mNodeInteractionCountArray; PxU32* PX_RESTRICT dirtyMap = mContext.mIP.mIsDirtyNode.getWords(); const PxPinnedArray& PX_RESTRICT partitionIndexArray = mContext.mIP.mPartitionIndexArray; PxArray& PX_RESTRICT partitionSlabs = mContext.mIP.mPartitionSlabs; const PxU32 startIndex = mStartIndex; const PxU32 last = startIndex + mNbToProcess; PxI32 localNbForceThresholds = 0; // Process lost patches. These are CMs that reduced the number of contact patches they had. for(PxU32 a=startIndex; agetWorkUnit(); if(unit.mFlags & PxcNpWorkUnitFlag::eDISABLE_RESPONSE) continue; PartitionEdge* partitionEdge = islandSimGpuData.getFirstPartitionEdge(unit.mEdgeIndex); //KS - if this is NULL, it means this unit was also destroyed and will be included in the destroyedEdgeCount (i.e. NP detected a lost touch at the same time as BP detected a lost pair //PX_ASSERT(partitionEdge != NULL || containedInDestroyedEdges(manager, destroyedEdges, destroyedEdgeCount)); if(!partitionEdge) continue; if(!output.nbPatches && output.prevPatches) { decreaseNodeInteractionCountsMT(nodeInteractionCountArray, partitionEdge->mNode0, partitionEdge->mNode1); #if RECORD_DESTROYED_EDGES_IN_FOUND_LOST_PASSES #error TODO // PT: that one would need adapting if needed //mDestroyedContactEdgeIndices.pushBack(edgeIndex); #endif } for(PxU32 b = output.nbPatches; b < output.prevPatches; ++b) { if(!partitionEdge) break; // PT: fom removeEdge() { if(partitionEdge->hasThreshold()) localNbForceThresholds++; // PT: accumulate in local variable for now const PxU32 hasInfiniteMass0 = partitionEdge->hasInfiniteMass0(); const PxU32 hasInfiniteMass1 = partitionEdge->hasInfiniteMass1(); const PxU32 node0Index = partitionEdge->mNode0.index(); const PxU32 node1Index = partitionEdge->mNode1.index(); #if PX_PARTITION_COMPACTION { // PT: MT version of updateDirtyNodeBitmap const bool selfConstraint = node0Index == node1Index; if(!selfConstraint) { // PT: in this version we don't bother doing the "test" before the "set" if(!hasInfiniteMass0) PxAtomicOr(reinterpret_cast(&dirtyMap[node0Index >> 5]), 1 << (node0Index & 31)); if(!hasInfiniteMass1) PxAtomicOr(reinterpret_cast(&dirtyMap[node1Index >> 5]), 1 << (node1Index & 31)); } } #endif if(partitionEdge->isSpecialHandled()) { // PT: we cannot remove edges from external managers directly, it's not thread safe. We also // would like to avoid re-parsing the whole input array another time so we'd like to batch // special edges for later processing. // // How do we batch this? We want a system that avoids reallocations and preserve determinism. // Each pair can have N edges because of the patch linked list so we cannot use a static buffer // with a known max number of entries in each task. Naively we could just have a PxArray per task, // push to it, and then the next process would loop over the tasks in order and process their array // in order. Not perfect but still better than redoing the full costly loop from scratch. We // don't want a real PxArray per task of course so we could have a pool of PxArrays (or, gasp, // a PxArray of PxArrays) and we pick one up and assign it to each task we start. Not great but // could work. PX_ASSERT(mBatch); mBatch->mEdges.pushBack(partitionEdge); } else { const PxU32 uniqueId = partitionEdge->mUniqueIndex; PxU32 id = partitionIndexArray[uniqueId].mPartitionIndex; const PxU32 slabId = id / PXG_BATCH_SIZE; PartitionSlab* slab = partitionSlabs[slabId]; PxU32 baseId = PXG_BATCH_SIZE*slabId; id -= baseId; PX_ASSERT(slab); // PT: from removeEdgeInternal(slab, edge, id); { // PT: we cannot removeFromPartition here. We will do that later (REMOVE_EDGES_FROM_PARTITIONS). // We can however update the edge coloring: if(!hasInfiniteMass0) PxAtomicAnd(reinterpret_cast(&slab->mNodeBitmap[node0Index]), (~(1<(&slab->mNodeBitmap[node1Index]), (~(1<mNextPatch; partitionEdge = nextPartitionEdge; } } // PT: one atomic add to update the data we accumulated locally. Note the minus sign, as we don't have PxAtomicSub. if(localNbForceThresholds) PxAtomicAdd(reinterpret_cast(&mContext.mIP.mNbForceThresholds), -localNbForceThresholds); } }; class RemoveBatchedSpecialEdgesTask : public Cm::Task { PX_NOCOPY(RemoveBatchedSpecialEdgesTask) SharedContext& mContext; public: RemoveBatchedSpecialEdgesTask(PxU64 contextID, SharedContext& context) : Cm::Task(contextID), mContext(context) {} virtual const char* getName() const PX_OVERRIDE PX_FINAL { return "RemoveBatchedSpecialEdgesTask"; } virtual void runInternal() PX_OVERRIDE PX_FINAL { PxgBodySimManager& bodySimManager = mContext.mBodySimManager; // PT: go over batched data sequentially in a single task, but at least in a separate thread. Better than nothing. PreprocessTask* currentTask = mContext.mTaskHead; while(currentTask) { PreprocessTask* nextTask = currentTask->mNext; PxgIncrementalPartition::PartitionEdgeBatch* currentBatch = currentTask->mBatch; const PxU32 size = currentBatch->mEdges.size(); for(PxU32 i=0;imEdges[i]; const PxU32 hasInfiniteMass0 = partitionEdge->hasInfiniteMass0(); const PxU32 hasInfiniteMass1 = partitionEdge->hasInfiniteMass1(); const PxU32 uniqueId = partitionEdge->mUniqueIndex; // PT: part of removeEdge() that we skipped in PreprocessTask removeSpecialHandled(partitionEdge, uniqueId, bodySimManager, hasInfiniteMass0, hasInfiniteMass1); } currentTask = nextTask; } } }; enum Codepath { DESTROY_EDGES_PROCESS_FOUND_PATCHES, REMOVE_EDGES_FROM_PARTITIONS, UPDATE_EDGE_LL, }; static const char* gTaskNames[] = { "PxgPartitioning_DestroyEdgesAndProcessFoundPatches", "PxgPartitioning_RemoveEdgesFromPartitions", "PxgPartitioning_UpdateEgeLL", "PxgPartitioning_RemoveSpecialEdges", }; class ControlTask : public Cm::Task { PX_NOCOPY(ControlTask) SharedContext& mContext; const Codepath mCodepath; const PxU32 mStartIndex; const PxU32 mNbToProcess; public: ControlTask(PxU64 contextID, SharedContext& context, Codepath codepath, PxU32 startIndex=0, PxU32 nbToProcess=0xffffffff) : Cm::Task(contextID), mContext(context), mCodepath(codepath), mStartIndex(startIndex), mNbToProcess(nbToProcess) {} virtual const char* getName() const PX_OVERRIDE PX_FINAL { return gTaskNames[mCodepath]; } virtual void runInternal() PX_OVERRIDE PX_FINAL { parse(); if(mCodepath==DESTROY_EDGES_PROCESS_FOUND_PATCHES) { PX_ASSERT(mContext.mIslandSim.mGpuData); IG::GPUExternalData& islandSimGpuData = *mContext.mIslandSim.mGpuData; // PT: last part of processLostPatches_Reference, not multithreaded yet mContext.mIP.destroyEdges(mContext.mIslandSim.mCpuData, islandSimGpuData, mContext.mBodySimManager, mContext.mJointManager, true, false); mContext.mIP.processFoundPatches_Reference(mContext.mIslandSim, mContext.mBodySimManager, mContext.mLostFoundPatchManagers, mContext.mNbLostFoundPatchManagers, mContext.mLostFoundPairOutputs); } } void parse() { PX_ASSERT(mContext.mIslandSim.mGpuData); IG::GPUExternalData& islandSimGpuData = *mContext.mIslandSim.mGpuData; const PxsContactManagerOutputCounts* PX_RESTRICT lostFoundPairOutputs = mContext.mLostFoundPairOutputs; const PxsContactManager*const* PX_RESTRICT lostFoundPatchManagers = mContext.mLostFoundPatchManagers; PxPinnedArray& PX_RESTRICT partitionIndexArray = mContext.mIP.mPartitionIndexArray; PxPinnedArray& PX_RESTRICT partitionNodeArray = mContext.mIP.mPartitionNodeArray; PxArray& PX_RESTRICT partitionSlabs = mContext.mIP.mPartitionSlabs; // PT: TODO: unfortunately for now we do parse the full input array again. Would be great to avoid that. const PxU32 startIndex = mStartIndex; const PxU32 last = mNbToProcess == 0xffffffff ? mContext.mNbLostFoundPatchManagers : mStartIndex + mNbToProcess; for(PxU32 a=startIndex; agetWorkUnit(); if(unit.mFlags & PxcNpWorkUnitFlag::eDISABLE_RESPONSE) continue; PartitionEdge* partitionEdge = islandSimGpuData.getFirstPartitionEdge(unit.mEdgeIndex); if(!partitionEdge) continue; for(PxU32 b = output.nbPatches; b < output.prevPatches; ++b) { if(!partitionEdge) break; const PxU32 hasInfiniteMass0 = partitionEdge->hasInfiniteMass0(); const PxU32 hasInfiniteMass1 = partitionEdge->hasInfiniteMass1(); const PxU32 node0Index = partitionEdge->mNode0.index(); const PxU32 node1Index = partitionEdge->mNode1.index(); const PxU32 uniqueId = partitionEdge->mUniqueIndex; if(partitionEdge->isSpecialHandled()) { } else if(mCodepath == REMOVE_EDGES_FROM_PARTITIONS) { // PT: TODO: the IDs here are now computed twice. Maybe we could do better. PxU32 id = partitionIndexArray[uniqueId].mPartitionIndex; const PxU32 slabId = id / PXG_BATCH_SIZE; PartitionSlab* slab = partitionSlabs[slabId]; PxU32 baseId = PXG_BATCH_SIZE*slabId; id -= baseId; PX_ASSERT(slab); // PT: part of removeEdgeInternal() we skipped in PreprocessTask. slab->mPartitions[id].removeFromPartition(uniqueId, partitionIndexArray); } else if(mCodepath == UPDATE_EDGE_LL) { const PxU32 id = partitionIndexArray[uniqueId].mPartitionIndex; const PxU32 slabId = id / PXG_BATCH_SIZE; PartitionSlab* slab = partitionSlabs[slabId]; // PT: part of removeEdgeInternal() we skipped in PreprocessTask. { // PT: at this point mNodeBitmap has already been fully updated but the mNodeEntries haven't, // so we can still use them to find the prev & next edges. if(!hasInfiniteMass0) { const PxU32 slabMask0 = slab->mNodeBitmap[node0Index]; // PT: TODO: refactor if(slabMask0) { NodeEntryDecoded prevEdge; NodeEntryDecoded nextEdge; mContext.mIP.getPreviousAndNextReferencesInSlab(prevEdge, nextEdge, node0Index, uniqueId, slab, slabMask0); const PxU32 prevUniqueId = getUniqueId(prevEdge); const PxU32 nextUniqueId = getUniqueId(nextEdge); const PxU32 data = nextUniqueId << 1; const bool cndt = node0Index == getNode0Index(nextEdge); const PxU32 dstIndex = node0Index == getNode0Index(prevEdge) ? 0 : 1; partitionNodeArray[prevUniqueId].mNextIndex[dstIndex] = cndt ? data : data|1; } } if(!hasInfiniteMass1) { const PxU32 slabMask1 = slab->mNodeBitmap[node1Index]; if(slabMask1) { NodeEntryDecoded prevEdge; NodeEntryDecoded nextEdge; mContext.mIP.getPreviousAndNextReferencesInSlab(prevEdge, nextEdge, node1Index, uniqueId, slab, slabMask1); const PxU32 prevUniqueId = getUniqueId(prevEdge); const PxU32 nextUniqueId = getUniqueId(nextEdge); const PxU32 data = nextUniqueId << 1; const bool cndt = node1Index == getNode0Index(nextEdge); const PxU32 dstIndex = node1Index == getNode0Index(prevEdge) ? 0 : 1; partitionNodeArray[prevUniqueId].mNextIndex[dstIndex] = cndt ? data : data|1; } } } } PartitionEdge* nextPartitionEdge = partitionEdge->mNextPatch; if(mCodepath==DESTROY_EDGES_PROCESS_FOUND_PATCHES) { // PT: part of removeEdge() we couldn't run before { const IG::EdgeIndex edgeIndex = partitionEdge->getEdgeIndex(); PartitionEdge* pEdge = islandSimGpuData.getFirstPartitionEdge(edgeIndex); if (pEdge == partitionEdge) islandSimGpuData.setFirstPartitionEdge(edgeIndex, nextPartitionEdge); } mContext.mIP.mEdgeManager.putEdge(partitionEdge); } partitionEdge = nextPartitionEdge; } } } }; class PreprocessEpilogueTask : public Cm::Task { PX_NOCOPY(PreprocessEpilogueTask) SharedContext& mContext; public: PreprocessEpilogueTask(PxU64 contextID, SharedContext& context) : Cm::Task(contextID), mContext(context) {} virtual const char* getName() const PX_OVERRIDE PX_FINAL { return "PxgIncrementalPartitioning_PreprocessEpilogueTask"; } virtual void runInternal() PX_OVERRIDE PX_FINAL { Cm::FlushPool& flushPool = mContext.mFlushPool; ControlTask* removeEdgesFromPartitionsTask = PX_PLACEMENT_NEW(flushPool.allocate(sizeof(ControlTask)), ControlTask)(mContextID, mContext, REMOVE_EDGES_FROM_PARTITIONS); RemoveBatchedSpecialEdgesTask* removeSpecialEdgesTask = PX_PLACEMENT_NEW(flushPool.allocate(sizeof(RemoveBatchedSpecialEdgesTask)), RemoveBatchedSpecialEdgesTask)(mContextID, mContext); ControlTask* removeSerialContinuationTask = PX_PLACEMENT_NEW(flushPool.allocate(sizeof(ControlTask)), ControlTask)(mContextID, mContext, DESTROY_EDGES_PROCESS_FOUND_PATCHES); removeSerialContinuationTask->setContinuation(mContext.mContinuation); removeEdgesFromPartitionsTask->setContinuation(removeSerialContinuationTask); removeSpecialEdgesTask->setContinuation(removeSerialContinuationTask); removeSpecialEdgesTask->removeReference(); removeEdgesFromPartitionsTask->removeReference(); // PT: TODO: the split could be better here const PxU32 nbToGo = mContext.mNbLostFoundPatchManagers; const PxU32 numWorkerTasks = mContext.mContinuation->getTaskManager()->getCpuDispatcher()->getWorkerCount(); PxU32 nbPerTask = 0xffffffff; if(numWorkerTasks>2) { nbPerTask = nbToGo/(numWorkerTasks*2); nbPerTask = PxMax(nbPerTask, 32u); } for(PxU32 a=0; aremoveReference(); } }; } // PT: a multi-threaded version of processLostPatches_Reference void PxgIncrementalPartition::processLostPatchesMT( IG::IslandSim& islandSim, Cm::FlushPool& flushPool, PxBaseTask* continuation, PxsContactManager** lostFoundPatchManagers, PxU32 nbLostFoundPatchManagers, const PxsContactManagerOutputCounts* lostFoundPairOutputs, PxgBodySimManager& bodySimManager, PxgJointManager& jointManager) { mDestroyedContactEdgeIndices.forceSize_Unsafe(0); SharedContext* sharedContext = PX_PLACEMENT_NEW(flushPool.allocate(sizeof(SharedContext)), SharedContext)( *this, islandSim, flushPool, continuation, bodySimManager, jointManager, lostFoundPairOutputs, lostFoundPatchManagers, nbLostFoundPatchManagers); // PT: this code is fairly tedious to multithread: // - the input buffer contains both lost & found patches, in arbitrary order. We don't know where the lost patches are so the load balancing is tricky. // - parsing the buffer is actually quite expensive due to all the data it fetches from random places. So ideally we would only do that once. Versions // that redo the parsing in multiple threads just spread the cost to all cores. // - the code does heterogeneous bits of work that all require different multithreading solutions. // - some bits can be trivially multithreaded, some bits must remain sequential (sometimes just to preserve determinism), etc. // // This version uses 2*N + 4 tasks: // - N initial "preprocess" tasks that parse the input data and do as much as possible in parallel (various buffer updates, edge coloring, etc). // - an epilogue task that runs after these N initial tasks. This epilogue task then spawns: // - a task that removes edges from partitions. (*) // - a task that removes "special handled" edges from external buffers. (*) // - N tasks to update the partition edge linked list. // - a continuation task for the N + 2 previous tasks that runs whatever serial code is left. // (*) must remain single-threaded / serial to preserve determinism PreprocessEpilogueTask* preprocessEpilogueTask = PX_PLACEMENT_NEW(flushPool.allocate(sizeof(PreprocessEpilogueTask)), PreprocessEpilogueTask)(mContextID, *sharedContext); preprocessEpilogueTask->setContinuation(continuation); // PT: there is no attempt at clever load balancing yet. We just trivially split the input data. const PxU32 nbToGo = nbLostFoundPatchManagers; const PxU32 numWorkerTasks = continuation->getTaskManager()->getCpuDispatcher()->getWorkerCount(); PxU32 nbPerTask = 0xffffffff; if(numWorkerTasks>2) { nbPerTask = nbToGo/(numWorkerTasks*2); nbPerTask = PxMax(nbPerTask, 32u); } PreprocessTask* previousTask = NULL; PreprocessTask* taskHead = NULL; PxU32 nbTasks = 0; for(PxU32 a=0; a=mBatches.size()) { const PxU32 oldSize = mBatches.size(); const PxU32 newSize = oldSize*2 < 32 ? 32 : oldSize*2; mBatches.resize(newSize); for(PxU32 i=oldSize;imEdges.clear(); } else { batch = PX_NEW(PartitionEdgeBatch); mBatches[nbTasks] = batch; } preprocessTask->mBatch = batch; startTask(preprocessTask, preprocessEpilogueTask); updateTaskLinkedList(previousTask, preprocessTask, taskHead); nbTasks++; } sharedContext->mTaskHead = taskHead; preprocessEpilogueTask->removeReference(); } /////////////////////////////////////////////////////////////////////////////// void PxgIncrementalPartition::processLostPatches_Reference( IG::IslandSim& islandSim, PxgBodySimManager& bodySimManager, PxgJointManager& jointManager, PxsContactManager** lostFoundPatchManagers, PxU32 nbLostFoundPatchManagers, const PxsContactManagerOutputCounts* lostFoundPairOutputs) { PX_PROFILE_ZONE("PxgIncrementalPartition::processLostPatches", mContextID); const IG::CPUExternalData& islandSimCpuData = islandSim.mCpuData; PX_ASSERT(islandSim.mGpuData); IG::GPUExternalData& islandSimGpuData = *islandSim.mGpuData; mDestroyedContactEdgeIndices.forceSize_Unsafe(0); // Process lost patches. These are CMs that reduced the number of contact patches they had. { PX_PROFILE_ZONE("LostPatches", mContextID); for (PxU32 a = 0; a < nbLostFoundPatchManagers; ++a) { // PT: skip found patches const PxsContactManagerOutputCounts& output = lostFoundPairOutputs[a]; if(!isLostPatch(output)) continue; const PxsContactManager* manager = lostFoundPatchManagers[a]; const PxcNpWorkUnit& unit = manager->getWorkUnit(); if (!(unit.mFlags & PxcNpWorkUnitFlag::eDISABLE_RESPONSE)) { const IG::EdgeIndex edgeIndex = unit.mEdgeIndex; PartitionEdge* partitionEdge = islandSimGpuData.getFirstPartitionEdge(edgeIndex); //KS - if this is NULL, it means this unit was also destroyed and will be included in the destroyedEdgeCount (i.e. NP detected a lost touch at the same time as BP detected a lost pair //PX_ASSERT(partitionEdge != NULL || containedInDestroyedEdges(manager, destroyedEdges, destroyedEdgeCount)); if (partitionEdge) { if (output.nbPatches == 0 && output.prevPatches != 0) { decreaseNodeInteractionCounts(mNodeInteractionCountArray, partitionEdge->mNode0, partitionEdge->mNode1); #if RECORD_DESTROYED_EDGES_IN_FOUND_LOST_PASSES mDestroyedContactEdgeIndices.pushBack(edgeIndex); #endif } for (PxU32 b = output.nbPatches; b < output.prevPatches; ++b) { if(!partitionEdge) break; PartitionEdge* nextEdge = partitionEdge->mNextPatch; //Patches are stored last->first in the Edge structure, so we can just iterate over the edges removeEdge(partitionEdge, islandSimGpuData, bodySimManager); partitionEdge = nextEdge; } } } } } destroyEdges(islandSimCpuData, islandSimGpuData, bodySimManager, jointManager, true, false); } void PxgIncrementalPartition::processFoundPatches_Reference(IG::IslandSim& islandSim, PxgBodySimManager& bodySimManager, PxsContactManager** lostFoundPatchManagers, PxU32 nbLostFoundPatchManagers, const PxsContactManagerOutputCounts* lostFoundPairOutputs) { PX_PROFILE_ZONE("PxgIncrementalPartition::processFoundPatches", mContextID); const IG::CPUExternalData& islandSimCpuData = islandSim.mCpuData; PX_ASSERT(islandSim.mGpuData); IG::GPUExternalData& islandSimGpuData = *islandSim.mGpuData; { PX_PROFILE_ZONE("FoundPatches", mContextID); #if USE_SPLIT_SECOND_PASS_ISLAND_GEN const PxBitMap& activeCMBitmap = mActiveCMBitmapCopy; #else const PxBitMap& activeCMBitmap = islandSimGpuData.getActiveContactManagerBitmap(); #endif for (PxU32 a = 0; a < nbLostFoundPatchManagers; ++a) { //The list can now contain found and lost patches, so we need to know which we are dealing with. If it's a lost patch, this //will be processed in the next stage! // PT: skip lost patches const PxsContactManagerOutputCounts& output = lostFoundPairOutputs[a]; if(!isFoundPatch(output)) continue; const PxU32 prevPatches = output.prevPatches; const PxsContactManager* manager = lostFoundPatchManagers[a]; const PxcNpWorkUnit& unit = manager->getWorkUnit(); const IG::EdgeIndex edgeIndex = unit.mEdgeIndex; if (!(unit.mFlags & PxcNpWorkUnitFlag::eDISABLE_RESPONSE) && activeCMBitmap.boundedTest(unit.mEdgeIndex)) { //We either add all patches, or we add only the new patches. This decision is made based on whether there is already //a partition edge const PxU32 startIndex = islandSimGpuData.getFirstPartitionEdge(edgeIndex) ? prevPatches : 0; const PxNodeIndex node1 = islandSimCpuData.getNodeIndex1(edgeIndex); const PxNodeIndex node2 = islandSimCpuData.getNodeIndex2(edgeIndex); for (PxU32 b = startIndex; b < output.nbPatches; ++b) { PartitionEdge* edge = addEdge_Stage1(islandSim, edgeIndex, b, unit.mNpIndex, node1, node2); const bool specialHandled = addContactManager(edge, unit, bodySimManager); addEdge_Stage2(islandSimGpuData, edgeIndex, edge, specialHandled, true, true); } if (startIndex == 0) { #if RECORD_DESTROYED_EDGES_IN_FOUND_LOST_PASSES mDestroyedContactEdgeIndices.pushBack(edgeIndex); //KS - this will potentially hit *if* CCD is enabled #endif increaseNodeInteractionCounts(mNodeInteractionCountArray, node1, node2); } } } } } // PT: walk the patch linked list and remove all related edges PX_FORCE_INLINE void PxgIncrementalPartition::removeAllEdges(IG::GPUExternalData& islandSimGpuData, PxgBodySimManager& bodySimManager, PartitionEdge* partitionEdge) { while(partitionEdge) { PartitionEdge* nextEdge = partitionEdge->mNextPatch; removeEdge(partitionEdge, islandSimGpuData, bodySimManager); partitionEdge = nextEdge; } } void PxgIncrementalPartition::destroyEdges(const IG::CPUExternalData& islandSimCpuData, IG::GPUExternalData& islandSimGpuData, PxgBodySimManager& bodySimManager, PxgJointManager& jointManager, bool clearDestroyedEdges, bool recordDestroyedEdges) { PX_PROFILE_ZONE("DestroyedEdges", mContextID); const PxU32 destroyedEdgeCount = islandSimGpuData.getNbDestroyedPartitionEdges(); //printf("destroyedEdgeCount: %d\n", destroyedEdgeCount); if(!destroyedEdgeCount) return; #if RECORD_DESTROYED_EDGES_IN_FOUND_LOST_PASSES recordDestroyedEdges = true; #endif PartitionEdge** destroyedEdges = islandSimGpuData.getDestroyedPartitionEdges(); for (PxU32 a = 0; a < destroyedEdgeCount; ++a) { PartitionEdge* partitionEdge = destroyedEdges[a]; if (partitionEdge) { decreaseNodeInteractionCounts(mNodeInteractionCountArray, partitionEdge->mNode0, partitionEdge->mNode1); const PxU8 edgeType = mPartitionIndexArray[partitionEdge->mUniqueIndex].mCType; if (edgeType == PxgEdgeType::eCONSTRAINT || edgeType == PxgEdgeType::eARTICULATION_CONSTRAINT) jointManager.removeJoint(partitionEdge->getEdgeIndex(), mNpIndexArray, islandSimCpuData, islandSimGpuData); else if(recordDestroyedEdges) mDestroyedContactEdgeIndices.pushBack(partitionEdge->getEdgeIndex()); removeAllEdges(islandSimGpuData, bodySimManager, partitionEdge); } } if(clearDestroyedEdges) islandSimGpuData.clearDestroyedPartitionEdges(); } /////////////////////////////////////////////////////////////////////////////// namespace { class UpdateIncrementalIslandsTask : public Cm::Task { public: enum Codepath { REFERENCE_VERSION, PART_1_AND_2_0, PART_2_1, PART_2_2_AND_3, PART_2_2_ADD_CONTACT_MANAGER, PART_2_2_EDGE_COLORING, PART_2_2_UPDATE_PARTITION_EDGE_LL_HEAD, PART_3, }; protected: PX_NOCOPY(UpdateIncrementalIslandsTask) PxgIncrementalPartition& mIncrementalPartition; IG::IslandSim& mIslandSim; const IG::AuxCpuData& mIslandManagerData; PxsContactManagerOutputIterator& mIterator; PxgBodySimManager& mBodySimManager; PxgJointManager& mJointManager; const Codepath mCodepath; public: PxU32 mStartIndex; PxU32 mNbToProcess; UpdateIncrementalIslandsTask( PxU64 contextID, PxgIncrementalPartition& partition, IG::IslandSim& islandSim, const IG::AuxCpuData& islandManagerData, PxsContactManagerOutputIterator& iterator, PxgBodySimManager& bodySimManager, PxgJointManager& jointManager, Codepath codepath) : Cm::Task (contextID), mIncrementalPartition (partition), mIslandSim (islandSim), mIslandManagerData (islandManagerData), mIterator (iterator), mBodySimManager (bodySimManager), mJointManager (jointManager), mCodepath (codepath), mStartIndex (0), mNbToProcess (0xffffffff) {} virtual const char* getName() const PX_OVERRIDE PX_FINAL { return "PxgIncrementalPartitioning_UpdateIncrementalIslandsTask"; } virtual void runInternal() PX_OVERRIDE PX_FINAL { const Codepath codepath = mCodepath; if(codepath == REFERENCE_VERSION) { mIncrementalPartition.updateIncrementalIslands_Reference(mIslandSim, mIslandManagerData, mIterator, mBodySimManager, mJointManager); } else if(codepath == PART_1_AND_2_0) { mIncrementalPartition.updateIncrementalIslands_Part1(mIslandSim, mIslandManagerData, mIterator, mBodySimManager, mJointManager); mIncrementalPartition.updateIncrementalIslands_Part2_0(mIslandSim, mIslandManagerData, mIterator); } else if(codepath == PART_2_1) { const PxU32 nbToProcess = mNbToProcess == 0xffffffff ? mIncrementalPartition.mPart2WorkItems.size() : mNbToProcess; mIncrementalPartition.updateIncrementalIslands_Part2_1(mStartIndex, nbToProcess, mIslandSim, mIslandManagerData); } else if(codepath == PART_2_2_AND_3) { mIncrementalPartition.updateIncrementalIslands_Part2_2(mIslandSim, mBodySimManager, true, true, true); mIncrementalPartition.updateIncrementalIslands_Part3(mIslandSim, mJointManager); } else if(codepath == PART_2_2_ADD_CONTACT_MANAGER) { mIncrementalPartition.updateIncrementalIslands_Part2_2(mIslandSim, mBodySimManager, true, false, false); } else if(codepath == PART_2_2_EDGE_COLORING) { mIncrementalPartition.updateIncrementalIslands_Part2_2(mIslandSim, mBodySimManager, false, true, false); } else if(codepath == PART_2_2_UPDATE_PARTITION_EDGE_LL_HEAD) { mIncrementalPartition.updateIncrementalIslands_Part2_2(mIslandSim, mBodySimManager, false, false, true); } else if(codepath == PART_3) { mIncrementalPartition.updateIncrementalIslands_Part2_2_ProcessEdgeCases(mIslandSim); mIncrementalPartition.updateIncrementalIslands_Part3(mIslandSim, mJointManager); } } }; class UnlockLastIncrementalIslandsTasks : public Cm::Task { PX_NOCOPY(UnlockLastIncrementalIslandsTasks) UpdateIncrementalIslandsTask& mTask0; UpdateIncrementalIslandsTask& mTask1; UpdateIncrementalIslandsTask& mTask2; public: UnlockLastIncrementalIslandsTasks(PxU64 contextID, UpdateIncrementalIslandsTask& task0, UpdateIncrementalIslandsTask& task1, UpdateIncrementalIslandsTask& task2 ) : Cm::Task(contextID), mTask0(task0), mTask1(task1), mTask2(task2) {} virtual const char* getName() const PX_OVERRIDE PX_FINAL { return "UnlockLastIncrementalIslandsTasks"; } virtual void runInternal() PX_OVERRIDE PX_FINAL { mTask0.removeReference(); mTask1.removeReference(); mTask2.removeReference(); } }; } void PxgIncrementalPartition::updateIncrementalIslands( IG::IslandSim& islandSim, const IG::AuxCpuData& islandManagerData, Cm::FlushPool* flushPool, PxBaseTask* continuation, PxsContactManagerOutputIterator& iterator, PxgBodySimManager& bodySimManager, PxgJointManager& jointManager) { PX_PROFILE_ZONE("PxgIncrementalPartition::updateIncrementalIslands", mContextID); if(flushPool && continuation) { if(gRunDefaultVersion) { // PT: run serial reference version in separate task UpdateIncrementalIslandsTask* task = PX_PLACEMENT_NEW(flushPool->allocate(sizeof(UpdateIncrementalIslandsTask)), UpdateIncrementalIslandsTask) (mContextID, *this, islandSim, islandManagerData, iterator, bodySimManager, jointManager, UpdateIncrementalIslandsTask::REFERENCE_VERSION); startTask(task, continuation); } else { // PT: see the plan in updateIncrementalIslands_Reference(). // // We want to optimize "Part 2" (i.e. effectively the "ActivatedContacts" profile zone), which itself has multiple subparts. Code is like: // // foreach activated contact i: // const IG::EdgeIndex edgeId = activatedContacts[a]; // if (activeCMBitmap.test(edgeId)) // PxsContactManager* cm = islandManagerData.getContactManager(edgeId); // if (cm) // if (islandSimGpuData.getFirstPartitionEdge(edgeId) == NULL) // PxcNpWorkUnit& unit = cm->getWorkUnit(); // increaseNodeInteractionCounts(...); (2) // foreach contact patch b: // PartitionEdge* edge = addEdge_Stage1(...); (1)(2) // bool specialHandled = addContactManager(...); (2)(3) // addEdge_Stage2(...); (3) // unit.mFrictionPatchCount = 0; (2) // mDestroyedContactEdgeIndices.pushBack(edgeId); (1) // // It is a mix of: // (1) things that cannot easily be multithreaded, we will need to run these serially. // (2) things that can be trivially multithreaded with multiple tasks. // (3) things that can be multithreaded using one "single-threaded" task for each aspect of them. // // Rough plan then: // - run serial part / preallocate buffers (part2_0) // - run Stage1 with multiple tasks (part2_1) // - run addContactManager and Stage2 in 2 parallel tasks (part2_2) like we did for lost patches // - in theory this is not possible as addContactManager can fail and be re-routed to Stage2. This will need some cleanup edge cases. // // This is just for part 2. We also need to run parts 1 and 3. // // We don't need to run the first part of part 2 in a separate task, we can do it right here. So the pipeline would be: // - part1 and part2_0 right here // - spawn multiple tasks for part2_1 // - then a task that unlocks the two last parts // - part2_2 split into two parts running in parallel in separate tasks // - final task to do part3 // PT: TODO: delegate tasks / static allocs? updateIncrementalIslands_Part1(islandSim, islandManagerData, iterator, bodySimManager, jointManager); updateIncrementalIslands_Part2_0(islandSim, islandManagerData, iterator); // PT: pipeline: // PART_2_1 => unlockTask => PART_2_2_true_false_false => PART_3 // PART_2_1 PART_2_2_false_true_false // PART_2_1 PART_2_2_false_false_true // ... // PART_2_1 // PT: part3 is the serial task we run in the end UpdateIncrementalIslandsTask* part3task = PX_PLACEMENT_NEW(flushPool->allocate(sizeof(UpdateIncrementalIslandsTask)), UpdateIncrementalIslandsTask)(mContextID, *this, islandSim, islandManagerData, iterator, bodySimManager, jointManager, UpdateIncrementalIslandsTask::PART_3); part3task->setContinuation(continuation); // PT: part2s run in parallel in separate threads UpdateIncrementalIslandsTask* part2_cm = PX_PLACEMENT_NEW(flushPool->allocate(sizeof(UpdateIncrementalIslandsTask)), UpdateIncrementalIslandsTask)(mContextID, *this, islandSim, islandManagerData, iterator, bodySimManager, jointManager, UpdateIncrementalIslandsTask::PART_2_2_ADD_CONTACT_MANAGER); part2_cm->setContinuation(part3task); UpdateIncrementalIslandsTask* part2_ec = PX_PLACEMENT_NEW(flushPool->allocate(sizeof(UpdateIncrementalIslandsTask)), UpdateIncrementalIslandsTask)(mContextID, *this, islandSim, islandManagerData, iterator, bodySimManager, jointManager, UpdateIncrementalIslandsTask::PART_2_2_EDGE_COLORING); part2_ec->setContinuation(part3task); UpdateIncrementalIslandsTask* part2_ll = PX_PLACEMENT_NEW(flushPool->allocate(sizeof(UpdateIncrementalIslandsTask)), UpdateIncrementalIslandsTask)(mContextID, *this, islandSim, islandManagerData, iterator, bodySimManager, jointManager, UpdateIncrementalIslandsTask::PART_2_2_UPDATE_PARTITION_EDGE_LL_HEAD); part2_ll->setContinuation(part3task); // PT: the unlock task starts part2 tasks after part1 is done UnlockLastIncrementalIslandsTasks* unlockTask = PX_PLACEMENT_NEW(flushPool->allocate(sizeof(UnlockLastIncrementalIslandsTasks)), UnlockLastIncrementalIslandsTasks)(mContextID, *part2_cm, *part2_ec, *part2_ll); unlockTask->setContinuation(continuation); // PT: this spawns part1 tasks const PxU32 nbToGo = mPart2WorkItems.size(); const PxU32 numWorkerTasks = continuation->getTaskManager()->getCpuDispatcher()->getWorkerCount(); PxU32 nbPerTask = 0xffffffff; if(numWorkerTasks>2) { nbPerTask = nbToGo/(numWorkerTasks*2); nbPerTask = PxMax(nbPerTask, 32u); } for(PxU32 a=0; aallocate(sizeof(UpdateIncrementalIslandsTask)), UpdateIncrementalIslandsTask)(mContextID, *this, islandSim, islandManagerData, iterator, bodySimManager, jointManager, UpdateIncrementalIslandsTask::PART_2_1); task->mStartIndex = a; task->mNbToProcess = nbToProcess; startTask(task, unlockTask); } unlockTask->removeReference(); part3task->removeReference(); } } else { // PT: run serial version in the calling thread updateIncrementalIslands_Reference(islandSim, islandManagerData, iterator, bodySimManager, jointManager); } } void PxgIncrementalPartition::updateIncrementalIslands_Reference( IG::IslandSim& islandSim, const IG::AuxCpuData& islandManagerData, PxsContactManagerOutputIterator& iterator, PxgBodySimManager& bodySimManager, PxgJointManager& jointManager) { PX_PROFILE_ZONE("PxgIncrementalPartition::updateIncrementalIslands_Reference", mContextID); // PT: the function essentially has 3 parts: // // Part 1: // - various pre-allocations & resizes // - destroy edges // - process deactivating joints // - process deactivating contacts // - process activating joints // // Part 2: // - process activated contacts // // Part 3: // - compaction // - accumulate slabs // - accumulate partitions // - finalize partitions // // Part 2 is the bottleneck in the benchmarks we looked at so far, so the initial plan is simply: // - run Part1 single-threaded // - then Part2 multi-threaded // - then Part3 single-threaded updateIncrementalIslands_Part1(islandSim, islandManagerData, iterator, bodySimManager, jointManager); updateIncrementalIslands_Part2(islandSim, islandManagerData, iterator, bodySimManager); updateIncrementalIslands_Part3(islandSim, jointManager); } // PT: first part of reference version that we did not touch: // - various pre-allocations & resizes // - destroy edges // - process deactivating joints // - process deactivating contacts // - process activating joints void PxgIncrementalPartition::updateIncrementalIslands_Part1( IG::IslandSim& islandSim, const IG::AuxCpuData& islandManagerData, PxsContactManagerOutputIterator& iterator, PxgBodySimManager& bodySimManager, PxgJointManager& jointManager) { PX_PROFILE_ZONE("PxgIncrementalPartition::updateIncrementalIslands_Part1", mContextID); const IG::CPUExternalData& islandSimCpuData = islandSim.mCpuData; PX_ASSERT(islandSim.mGpuData); IG::GPUExternalData& islandSimGpuData = *islandSim.mGpuData; //KS - TODO - plumb articulation contacts/joints into this! const PxU32 destroyedEdgeCount = islandSimGpuData.getNbDestroyedPartitionEdges(); /*const PxU32 destroyedEdgeCount = islandSim.getNbDestroyedEdges(); const IG::EdgeIndex* destroyedEdges = islandSim.getDestroyedEdges();*/ const PxU32 deactivatingJointCount = islandSim.getNbDeactivatingEdges(IG::Edge::eCONSTRAINT); const IG::EdgeIndex* const deactivatingJoints = islandSim.getDeactivatingEdges(IG::Edge::eCONSTRAINT); const PxU32 deactivatingContactCount = islandSim.getNbDeactivatingEdges(IG::Edge::eCONTACT_MANAGER); const IG::EdgeIndex* const deactivatingContacts = islandSim.getDeactivatingEdges(IG::Edge::eCONTACT_MANAGER); //const PxU32 newJointCount = islandSim.getNbDirtyEdges(IG::Edge::eCONSTRAINT); const PxU32 activatedJointCount = islandSim.getNbActivatedEdges(IG::Edge::eCONSTRAINT); const IG::EdgeIndex* const activatedJoints = islandSim.getActivatedEdges(IG::Edge::eCONSTRAINT); const PxU32 activatedContactCount = islandSim.getNbActivatedEdges(IG::Edge::eCONTACT_MANAGER); reserveNodes(islandSim.getNbNodes()); /*mDestroyedContactEdgeIndices->setSize(0); mDestroyedContactEdgeIndices->reserve(sizeof(PxU32), activatedContactCount + deactivatingContactCount + destroyedEdgeCount);*/ mDestroyedContactEdgeIndices.forceSize_Unsafe(0); mDestroyedContactEdgeIndices.reserve(activatedContactCount + deactivatingContactCount + destroyedEdgeCount); mIsDirtyNode.resize(islandSim.getNbNodes()); //(1) Process destroyed edges. Note that these would have been destroyed last frame so there is a chance that they might have been recreated this frame // (in the event of the AABBs temporarily not overlapping in the BP). To simplify logic, we just remove and recreate these later... // PT: that comment about AABBs tells me this code was initially based on the speculative island manager... destroyEdges(islandSimCpuData, islandSimGpuData, bodySimManager, jointManager, false, true); //Deactivating joints - these are joints that have gone to sleep { PX_PROFILE_ZONE("DeactivatingJoints", mContextID); for (PxU32 a = 0; a < deactivatingJointCount; ++a) { //remove it from the PxgJointManager jointManager.removeJoint(deactivatingJoints[a], mNpIndexArray, islandSimCpuData, islandSimGpuData); PartitionEdge* partitionEdge = islandSimGpuData.getFirstPartitionEdge(deactivatingJoints[a]); if (partitionEdge) { decreaseNodeInteractionCounts(mNodeInteractionCountArray, partitionEdge->mNode0, partitionEdge->mNode1); removeAllEdges(islandSimGpuData, bodySimManager, partitionEdge); } islandSimGpuData.setFirstPartitionEdge(deactivatingJoints[a], NULL); } } //(2) Now we process the deactivating contacts. As with lost edges, deactivated edges were computed last frame so they may have been activated again. // If they're activated again, we will process them afterwards in the activatingContactCount stage. // In this case, we set the prevPatches to be the same as numPatches. This ensures that any found/lost events for woken edges will not also get processed, resulting in // an incorrect internal state { PX_PROFILE_ZONE("DeactivatedContacts", mContextID); for (PxU32 a = 0; a < deactivatingContactCount; ++a) { const IG::EdgeIndex edgeId = deactivatingContacts[a]; PartitionEdge* partitionEdge = islandSimGpuData.getFirstPartitionEdge(edgeId); if (partitionEdge) { decreaseNodeInteractionCounts(mNodeInteractionCountArray, partitionEdge->mNode0, partitionEdge->mNode1); removeAllEdges(islandSimGpuData, bodySimManager, partitionEdge); islandSimGpuData.setFirstPartitionEdge(edgeId, NULL); PxsContactManager* cm = islandManagerData.getContactManager(edgeId); if (cm) { PxcNpWorkUnit& workUnit = cm->getWorkUnit(); PxsContactManagerOutput& output = iterator.getContactManagerOutput(workUnit.mNpIndex); output.prevPatches = output.nbPatches; //Ensure that our internal data does not get corrupted by any touch found/lost events workUnit.mFrictionPatchCount = 0; //Zero the friction patch count to make sure that we don't access any memory illegally } mDestroyedContactEdgeIndices.pushBack(edgeId); } } } { PX_PROFILE_ZONE("ActivatedJoints", mContextID); //Activated joints - joints that were woken this frame. for (PxU32 a = 0; a < activatedJointCount; ++a) { //add it to the PxgJointManager Dy::Constraint* constraint = islandManagerData.getConstraint(activatedJoints[a]); //PX_ASSERT((!constraint->bodyCore0->isKinematic()) || (!constraint->bodyCore1->isKinematic())); const PxU32 edgeIndex = activatedJoints[a]; const PxNodeIndex node1 = islandSimCpuData.getNodeIndex1(edgeIndex); const PxNodeIndex node2 = islandSimCpuData.getNodeIndex2(edgeIndex); PartitionEdge* edge = addEdge_Stage1(islandSim, edgeIndex, 0, 0xFFFFFFFF, node1, node2); const bool specialHandled = addJointManager(edge, bodySimManager); addEdge_Stage2(islandSimGpuData, edgeIndex, edge, specialHandled, true, true); increaseNodeInteractionCounts(mNodeInteractionCountArray, node1, node2); jointManager.addJoint(edgeIndex, constraint, islandSim, mNpIndexArray, mSolverConstants, edge->mUniqueIndex); } } //processFoundPatches(islandManager, foundPatchManagers, nbFoundPatchManagers, foundManagerCounts, *simulationController); } void PxgIncrementalPartition::updateIncrementalIslands_Part2_0(IG::IslandSim& islandSim, const IG::AuxCpuData& islandManagerData, PxsContactManagerOutputIterator& iterator) { PX_PROFILE_ZONE("PxgIncrementalPartition::updateIncrementalIslands_Part2_0", mContextID); PX_ASSERT(islandSim.mGpuData); IG::GPUExternalData& islandSimGpuData = *islandSim.mGpuData; const PxBitMap& activeCMBitmap = islandSimGpuData.getActiveContactManagerBitmap(); const PxU32 activatedContactCount = islandSim.getNbActivatedEdges(IG::Edge::eCONTACT_MANAGER); const IG::EdgeIndex* const activatedContacts = islandSim.getActivatedEdges(IG::Edge::eCONTACT_MANAGER); { PX_PROFILE_ZONE("PreallocateEdgesAndSerialWork", mContextID); // PT: TODO: could we do this part earlier in the pipeline, in parallel with something else? // Or actually MT it? mPart2WorkItems.clear(); mPart2EdgeCases.clear(); for (PxU32 a = 0; a < activatedContactCount; ++a) { const IG::EdgeIndex edgeId = activatedContacts[a]; if(activeCMBitmap.test(edgeId)) { mDestroyedContactEdgeIndices.pushBack(edgeId); //KS - looks a bit weird because we didn't "destroy" any edges but this just ensures that we zero the PF count for this edge PxsContactManager* cm = islandManagerData.getContactManager(edgeId); if(cm) { if(islandSimGpuData.getFirstPartitionEdge(edgeId) == NULL) { PxcNpWorkUnit& unit = cm->getWorkUnit(); const PxsContactManagerOutput& output = iterator.getContactManagerOutput(unit.mNpIndex); for (PxU32 b = 0; b < output.nbPatches; ++b) { Part2WorkItem& item = mPart2WorkItems.insert(); item.mEdgeID = edgeId; item.mPatchIndex = PxU16(b); item.mPartitionEdge = mEdgeManager.getEdge(edgeId); // PT: TODO: batch } } } } } } // PT: these are the resizes that happened at the start of addEdge_Stage1 // PT: TODO: consider refactoring { PX_PROFILE_ZONE("ResizeBuffers", mContextID); if (mEdgeManager.getEdgeCount() >= mPartitionIndexArray.capacity()) { PX_PROFILE_ZONE("ResizeEdgeBuffer", mContextID); const PxU32 newSize = PxMax(mEdgeManager.getEdgeCount(), mPartitionIndexArray.capacity() * 2); mPartitionIndexArray.reserve(newSize); mNpIndexArray.reserve(newSize); mPartitionNodeArray.reserve(newSize); } if (mEdgeManager.getEdgeCount() >= mPartitionIndexArray.size()) { const PxU32 count = mEdgeManager.getEdgeCount(); mPartitionIndexArray.resizeUninitialized(count); mNpIndexArray.resizeUninitialized(count); mPartitionNodeArray.resizeUninitialized(count); } if(mEdgeManager.getEdgeCount() >= mSolverConstants.capacity()) { const PxU32 count = mEdgeManager.getEdgeCount(); const PxU32 newSize = PxMax(count, mSolverConstants.capacity() * 2); mSolverConstants.resize(newSize); } } } // PT: this one called from multiple threads void PxgIncrementalPartition::updateIncrementalIslands_Part2_1(PxU32 startIndex, PxU32 nbToProcess, IG::IslandSim& islandSim, const IG::AuxCpuData& islandManagerData) { PX_PROFILE_ZONE("PxgIncrementalPartition::updateIncrementalIslands_Part2_1", mContextID); const IG::CPUExternalData& islandSimCpuData = islandSim.mCpuData; PX_ASSERT(islandSim.mGpuData); IG::GPUExternalData& islandSimGpuData = *islandSim.mGpuData; const PxBitMap& activeCMBitmap = islandSimGpuData.getActiveContactManagerBitmap(); PX_UNUSED(activeCMBitmap); /*const*/ Part2WorkItem* workItems = mPart2WorkItems.begin(); const PxU32 last = startIndex + nbToProcess; for (PxU32 i=startIndex; igetWorkUnit(); PX_ASSERT((!unit.mRigidCore0->isKinematic()) || (!unit.mRigidCore1->isKinematic())); const PxNodeIndex node1 = islandSimCpuData.getNodeIndex1(edgeId); const PxNodeIndex node2 = islandSimCpuData.getNodeIndex2(edgeId); const PxU32 b = workItems[i].mPatchIndex; PartitionEdge* partitionEdge = workItems[i].mPartitionEdge; // PT: this block was part 3) of addEdge_Stage1 partitionEdge->mNode0 = node1; partitionEdge->mNode1 = node2; if(b==0) // PT: this happened only once per source edge { unit.mFrictionPatchCount = 0; //KS - ensure that the friction patch count is 0 increaseNodeInteractionCountsMT(mNodeInteractionCountArray, node1, node2); } if(isKinematic(islandSim, node1)) partitionEdge->setInfiniteMass0(); if(isKinematic(islandSim, node2)) partitionEdge->setInfiniteMass1(); // PT: this block was part 4) of addEdge_Stage1 const PxU32 uniqueId = partitionEdge->mUniqueIndex; PartitionIndexData& indexData = mPartitionIndexArray[uniqueId]; indexData.mPatchIndex = PxTo8(b); const PxU8 articulationOffset = node1.isArticulation() || node2.isArticulation() ? 2 : 0; const PxU32 implicitEdgeType = unit.mNpIndex == 0xffffffff ? IG::Edge::EdgeType::eCONSTRAINT : IG::Edge::EdgeType::eCONTACT_MANAGER; indexData.mCType = PxU8(implicitEdgeType) + articulationOffset; // PT: this block was part 5) of addEdge_Stage1 PartitionNodeData& nodeData = mPartitionNodeArray[uniqueId]; nodeData.mNodeIndex0 = node1; nodeData.mNodeIndex1 = node2; // PT: this block was part 6) of addEdge_Stage1 mNpIndexArray[uniqueId] = unit.mNpIndex; // PT: this block was part 7) of addEdge_Stage1 mSolverConstants[uniqueId].mEdgeIndex = edgeId; // PT: this block was the first part of addContactManager partitionEdge->setIsContact(); // PT: TODO: this could have been setup in the ctor directly increaseForceThresholdsMT(unit, partitionEdge, &mNbForceThresholds); // PT: mark special edges. Multithreaded classification, not strictly necessary but since we're here... workItems[i].mSpecialCase = PxU16(isSpecialCase(partitionEdge)); } } // PT: this is called from 3 different threads for the 3 different parts void PxgIncrementalPartition::updateIncrementalIslands_Part2_2(IG::IslandSim& islandSim, PxgBodySimManager& bodySimManager, bool dopart1, bool dopart2, bool dopart3) { PX_PROFILE_ZONE("PxgIncrementalPartition::updateIncrementalIslands_Part2_2", mContextID); PX_ASSERT(islandSim.mGpuData); IG::GPUExternalData& islandSimGpuData = *islandSim.mGpuData; // PT: we prepared these work items in updateIncrementalIslands_Part2_0, they are faster to parse now than the initial input data const PxU32 nbPartitionEdges = mPart2WorkItems.size(); const Part2WorkItem* workItems = mPart2WorkItems.begin(); if(dopart1) // PT: main part of addContactManager { PX_PROFILE_ZONE("addContactManager", mContextID); for (PxU32 i=0; iclearSpecialHandled(); } } // PT: second part of reference version that we are going to optimize: // - process activated contacts // This is the reference implementation. void PxgIncrementalPartition::updateIncrementalIslands_Part2( IG::IslandSim& islandSim, const IG::AuxCpuData& islandManagerData, PxsContactManagerOutputIterator& iterator, PxgBodySimManager& bodySimManager) { PX_PROFILE_ZONE("PxgIncrementalPartition::updateIncrementalIslands_Part2", mContextID); { PX_PROFILE_ZONE("ActivatedContacts", mContextID); //(2) Process activated edges //This edge was activated this frame so we add it to the system. This only does anything if there were not "found" pairs first. //Be aware that there is no guarantee that cmOutput being read has completed being written to this frame as this reads from the global buffer. //However, we know that, if the state changed, it would have been processed by the found pairs case rather than the activated pairs case. const IG::CPUExternalData& islandSimCpuData = islandSim.mCpuData; PX_ASSERT(islandSim.mGpuData); IG::GPUExternalData& islandSimGpuData = *islandSim.mGpuData; const PxU32 activatedContactCount = islandSim.getNbActivatedEdges(IG::Edge::eCONTACT_MANAGER); const IG::EdgeIndex* const activatedContacts = islandSim.getActivatedEdges(IG::Edge::eCONTACT_MANAGER); //printf("activatedContactCount: %d\n", activatedContactCount); const PxBitMap& activeCMBitmap = islandSimGpuData.getActiveContactManagerBitmap(); for (PxU32 a = 0; a < activatedContactCount; ++a) { const IG::EdgeIndex edgeId = activatedContacts[a]; if (activeCMBitmap.test(edgeId)) { PxsContactManager* cm = islandManagerData.getContactManager(edgeId); if (cm) { //If the first patch is NULL, it means that the pair hasn't been activated! if (islandSimGpuData.getFirstPartitionEdge(edgeId) == NULL) { PxcNpWorkUnit& unit = cm->getWorkUnit(); PX_ASSERT((!unit.mRigidCore0->isKinematic()) || (!unit.mRigidCore1->isKinematic())); const PxsContactManagerOutput& output = iterator.getContactManagerOutput(unit.mNpIndex); const PxNodeIndex node1 = islandSimCpuData.getNodeIndex1(edgeId); const PxNodeIndex node2 = islandSimCpuData.getNodeIndex2(edgeId); increaseNodeInteractionCounts(mNodeInteractionCountArray, node1, node2); for (PxU32 b = 0; b < output.nbPatches; ++b) { PartitionEdge* edge = addEdge_Stage1(islandSim, edgeId, b, unit.mNpIndex, node1, node2); const bool specialHandled = addContactManager(edge, unit, bodySimManager); addEdge_Stage2(islandSimGpuData, edgeId, edge, specialHandled, true, true); } PX_ASSERT(output.nbPatches == 0 || islandSimGpuData.getFirstPartitionEdge(edgeId) != NULL); unit.mFrictionPatchCount = 0; //KS - ensure that the friction patch count is 0 } } mDestroyedContactEdgeIndices.pushBack(edgeId); //KS - looks a bit weird because we didn't "destroy" any edges but this just ensures that we zero the PF count for this edge } } } } // PT: third part of reference version that we did not touch: // - compaction // - accumulate slabs // - accumulate partitions // - finalize partitions void PxgIncrementalPartition::updateIncrementalIslands_Part3(IG::IslandSim& islandSim, PxgJointManager& jointManager) { PX_PROFILE_ZONE("PxgIncrementalPartition::updateIncrementalIslands_Part3", mContextID); { // removed edges can result in partitions with 0 constraints. If such a partition // is in the middle of the partitions array, subsequent partitions might get // ignored (see the break statement in the partition for-loop further below). // To avoid this, compaction is needed such that all consecutive partition entries // have at least one constraint. doCompaction(); } PX_ASSERT(islandSim.mGpuData); IG::GPUExternalData& islandSimGpuData = *islandSim.mGpuData; islandSimGpuData.setEdgeNodeIndexPtr(mNpIndexArray.begin()); PxU32 nbPatches = 0; PxU32 totalConstraints = 0; PxU32 totalArticulationContacts = 0; PxU32 totalArticulationConstraints = 0; PxU32 nbPartitions = 0; PxU32 nbContactBatches = 0; PxU32 nbConstraintBatches = 0; PxU32 nbArtiContactBatches = 0; PxU32 nbArtiConstraintBatches = 0; #if PX_ENABLE_ASSERTS mAccumulatedPartitionCount.clear(); mAccumulatedConstraintCount.clear(); mAccumulatedArtiPartitionCount.clear(); mAccumulatedArtiConstraintCount.clear(); PxU32 accumulation = 0; PxU32 accumulatedConstraint = 0; PxU32 accumulatedArtics = 0; PxU32 accumulatedArticConstraints = 0; #endif const PxU32 batchMask = PXG_BATCH_SIZE - 1; { PX_PROFILE_ZONE("AccumulateSlabs", mContextID); const PxU32 nbSlabs = mPartitionSlabs.size(); for (PxU32 i = 0; i < nbSlabs; ++i) { const PartitionSlab* slab = mPartitionSlabs[i]; for (PxU32 localPartitionId = 0; localPartitionId < PXG_BATCH_SIZE; ++localPartitionId) { const PartitionIndices* partitionIndices = slab->mPartitions[localPartitionId].mPartitionIndices; const PxU32 nbContactManagers = partitionIndices[PxgEdgeType::eCONTACT_MANAGER].size(); const PxU32 nbConstraints = partitionIndices[PxgEdgeType::eCONSTRAINT].size(); const PxU32 nbArtiContactManagers = partitionIndices[PxgEdgeType::eARTICULATION_CONTACT].size(); const PxU32 nbArtiConstraints = partitionIndices[PxgEdgeType::eARTICULATION_CONSTRAINT].size(); if ((nbContactManagers + nbConstraints + nbArtiContactManagers + nbArtiConstraints) == 0) break; nbPartitions++; nbContactBatches += (nbContactManagers + batchMask) / PXG_BATCH_SIZE; nbConstraintBatches += (nbConstraints + batchMask) / PXG_BATCH_SIZE; nbArtiContactBatches += (nbArtiContactManagers + batchMask) / PXG_BATCH_SIZE; nbArtiConstraintBatches += (nbArtiConstraints + batchMask) / PXG_BATCH_SIZE; nbPatches += nbContactManagers; totalConstraints += nbConstraints; totalArticulationContacts += nbArtiContactManagers; totalArticulationConstraints += nbArtiConstraints; #if PX_ENABLE_ASSERTS accumulation = nbContactBatches + nbConstraintBatches; accumulatedArtics = nbArtiContactBatches + nbArtiConstraintBatches; accumulatedConstraint = nbPatches + totalConstraints; accumulatedArticConstraints = totalArticulationContacts + totalArticulationConstraints; mAccumulatedPartitionCount.pushBack(accumulation); mAccumulatedConstraintCount.pushBack(accumulatedConstraint); mAccumulatedArtiPartitionCount.pushBack(accumulatedArtics); mAccumulatedArtiConstraintCount.pushBack(accumulatedArticConstraints); #endif } } } //mIncrementalPartition.mAccumulatedPartitionCount.pushBack(accumulation); mNbConstraintBatches = nbConstraintBatches; mNbContactBatches = nbContactBatches; mNbArtiContactBatches = nbArtiContactBatches; mNbArtiConstraintBatches = nbArtiConstraintBatches; mNbPartitions = nbPartitions; mTotalContacts = nbPatches; mTotalConstraints = totalConstraints; mTotalArticulationContacts = totalArticulationContacts; mTotalArticulationConstraints = totalArticulationConstraints; mCSlab.mNbMaxPartitions = mCSlab.mUserNbMaxPartitions; //combined slabs to maximum number of slabs, which is default to be 32 but user can define the size in the descriptor mCSlab.clear(); mCSlab.mNbPartitions = PxMin(mCSlab.mNbMaxPartitions, mNbPartitions); const PxU32 maxPartitionsMask = mCSlab.mNbMaxPartitions - 1; PX_ASSERT(PxIsPowerOfTwo(mCSlab.mNbMaxPartitions)); { PX_PROFILE_ZONE("AccumulatePartitions", mContextID); for (PxU32 i = 0; i < mNbPartitions; ++i) { PartitionSlab* slab = mPartitionSlabs[i / PXG_BATCH_SIZE]; const PxU32 index = i & batchMask; Partition& partition = slab->mPartitions[index]; const PxU32 nbContactManagers = partition.mPartitionIndices[PxgEdgeType::eCONTACT_MANAGER].size(); const PxU32 nbConstraints = partition.mPartitionIndices[PxgEdgeType::eCONSTRAINT].size(); const PxU32 nbArticulationContacts = partition.mPartitionIndices[PxgEdgeType::eARTICULATION_CONTACT].size(); const PxU32 nbArticulationConstraints = partition.mPartitionIndices[PxgEdgeType::eARTICULATION_CONSTRAINT].size(); if ((nbContactManagers + nbConstraints + nbArticulationContacts + nbArticulationConstraints) == 0) break; const PxU32 combinedPartitionIndex = i & maxPartitionsMask; //mCSlab.mPartitions[index].pushBack(&partition); mCSlab.mPartitionArray[combinedPartitionIndex].mPartitions.pushBack(&partition); } } nbContactBatches = 0; nbConstraintBatches = 0; nbArtiContactBatches = 0; nbArtiConstraintBatches = 0; PxInt32ArrayPinned& startSlabIter = mStartSlabPerPartition; PxInt32ArrayPinned& articStartSlabIter = mArticStartSlabPerPartition; PxInt32ArrayPinned& jointPerPartitionIter = mNbJointsPerPartition; PxInt32ArrayPinned& artiJointPerPartitionIter = mNbArtiJointsPerPartition; startSlabIter.forceSize_Unsafe(0); startSlabIter.reserve(nbPartitions); startSlabIter.forceSize_Unsafe(nbPartitions); articStartSlabIter.forceSize_Unsafe(0); articStartSlabIter.reserve(nbPartitions); articStartSlabIter.forceSize_Unsafe(nbPartitions); jointPerPartitionIter.forceSize_Unsafe(0); jointPerPartitionIter.reserve(nbPartitions); jointPerPartitionIter.forceSize_Unsafe(nbPartitions); artiJointPerPartitionIter.forceSize_Unsafe(0); artiJointPerPartitionIter.reserve(nbPartitions); artiJointPerPartitionIter.forceSize_Unsafe(nbPartitions); mJointStartIndices.resize(nbPartitions); mContactStartIndices.resize(nbPartitions); mArtiJointStartIndices.resize(nbPartitions); mArtiContactStartIndices.resize(nbPartitions); PxU32 jointStartIndices = 0; PxU32 contactStartIndices = 0; PxU32 artiContactStartIndices = 0; PxU32 artiJointStartIndices = 0; { PX_PROFILE_ZONE("FinalizePartitions", mContextID); for (PxU32 i = 0; i < mCSlab.mNbPartitions; ++i) { Partition** partitions = mCSlab.mPartitionArray[i].mPartitions.begin();//mCSlab.mPartitions[i].begin(); PxU32 nbPartitionContactBatches = 0; PxU32 nbPartitionConstraintBatches = 0; PxU32 nbPartitionArtiContactBatches = 0; PxU32 nbPartitionArtiConstraintBatches = 0; PxU32 prevBatches = nbContactBatches + nbConstraintBatches; PxU32 prevArticBatches = nbArtiContactBatches + nbArtiConstraintBatches; for (PxU32 j = 0; j < mCSlab.mPartitionArray[i].mPartitions.size(); ++j) { const PxU32 k = i + mCSlab.mNbMaxPartitions * j; startSlabIter[k] = prevBatches + nbPartitionContactBatches + nbPartitionConstraintBatches; articStartSlabIter[k] = prevArticBatches + nbPartitionArtiContactBatches + nbPartitionArtiConstraintBatches; Partition& partition = *partitions[j]; const PxU32 nbContactManagers = partition.mPartitionIndices[PxgEdgeType::eCONTACT_MANAGER].size(); const PxU32 nbConstraints = partition.mPartitionIndices[PxgEdgeType::eCONSTRAINT].size(); const PxU32 nbArtiContactManagers = partition.mPartitionIndices[PxgEdgeType::eARTICULATION_CONTACT].size(); const PxU32 nbArtiConstraints = partition.mPartitionIndices[PxgEdgeType::eARTICULATION_CONSTRAINT].size(); nbPartitionContactBatches += (nbContactManagers + batchMask) / PXG_BATCH_SIZE; nbPartitionArtiContactBatches += (nbArtiContactManagers + batchMask) / PXG_BATCH_SIZE; PxU32 nbJointConstraintBatches = (nbConstraints + batchMask) / PXG_BATCH_SIZE; PxU32 nbArtiJointConstraintBatches = (nbArtiConstraints + batchMask) / PXG_BATCH_SIZE; nbPartitionConstraintBatches += nbJointConstraintBatches; nbPartitionArtiConstraintBatches += nbArtiJointConstraintBatches; jointPerPartitionIter[k] = nbJointConstraintBatches; artiJointPerPartitionIter[k] = nbArtiJointConstraintBatches; mJointStartIndices[k] = jointStartIndices; mContactStartIndices[k] = contactStartIndices; mArtiContactStartIndices[k] = artiContactStartIndices; mArtiJointStartIndices[k] = artiJointStartIndices; jointStartIndices += nbConstraints; contactStartIndices += nbContactManagers; artiContactStartIndices += nbArtiContactManagers; artiJointStartIndices += nbArtiConstraints; } nbContactBatches += nbPartitionContactBatches; nbConstraintBatches += nbPartitionConstraintBatches; mCSlab.mPartitionArray[i].mAccumulatedPartitionCount = nbContactBatches + nbConstraintBatches; nbArtiContactBatches += nbPartitionArtiContactBatches; nbArtiConstraintBatches += nbPartitionArtiConstraintBatches; mCSlab.mPartitionArray[i].mAccumulatedArtiPartitionCount = nbArtiContactBatches + nbArtiConstraintBatches; } } jointManager.update(mNpIndexArray); } #if PX_PARTITION_COMPACTION void PxgIncrementalPartition::pullForwardConstraints(PxU32 nodeIndex) { //PartitionSlab* writeSlab = mSlabs[slabId]; PxU16 currId = 0; const PxU32 nbSlabs = mPartitionSlabs.size(); for (PxU32 a = 0; a < nbSlabs; ++a) { PartitionSlab* slab = mPartitionSlabs[a]; while (currId < PXG_BATCH_SIZE) { PxU32 mask = ~((1u << currId) - 1u); PxU32 bitMask = ((slab->mNodeBitmap[nodeIndex]) & mask); currId = PXG_BATCH_SIZE; if (bitMask != 0) { //There is an entry referencing this body later in this slab. Check to see if it can be pulled forward currId = PxTo16(PxLowestSetBit(bitMask)); //copy to this entry... #if STORE_INDICES_IN_NODE_ENTRIES #if STORE_EDGE_DATA_IN_NODE_ENTRIES const PartitionEdge* replaceEdge = mEdgeManager.getPartitionEdge(slab->mNodeEntries[nodeIndex].mEdges[currId].mUniqueIndex); #else const PartitionEdge* replaceEdge = mEdgeManager.getPartitionEdge(slab->mNodeEntries[nodeIndex].mEdges[currId]); #endif #else const PartitionEdge* replaceEdge = slab->mNodeEntries[nodeIndex].mEdges[currId]; #endif const PxU32 node0Index = replaceEdge->mNode0.index(); const PxU32 node1Index = replaceEdge->mNode1.index(); for (PxU32 i = 0; i <= a; ++i) { PartitionSlab* writeSlab = mPartitionSlabs[i]; PxU32 map = 0; if (!replaceEdge->hasInfiniteMass0()) map = writeSlab->mNodeBitmap[node0Index]; if (!replaceEdge->hasInfiniteMass1()) map |= writeSlab->mNodeBitmap[node1Index]; if (map != 0xFFFFFFFF) { PxU32 newId = PxLowestSetBit(~map); if (i < a || newId < currId) { //replaceEdge can be brought forward to an earlier partition removeEdgeInternal(slab, replaceEdge, currId); addEdgeInternal(replaceEdge, writeSlab, PxTo16(newId), (PxU16)(i*PXG_BATCH_SIZE)); if (node0Index != nodeIndex && !replaceEdge->hasInfiniteMass0() && !mIsDirtyNode.test(node0Index)) mIsDirtyNode.set(node0Index); else if (node1Index != nodeIndex && !replaceEdge->hasInfiniteMass1() && !mIsDirtyNode.test(node1Index)) mIsDirtyNode.set(node1Index); break; } } } currId++; //Increment currId so that we don't loop indefinitely considering this entry } } currId = 0; } } #endif void PxgIncrementalPartition::doCompaction() { PX_PROFILE_ZONE("PxgIncrementalPartition::doCompaction", mContextID); #if PX_PARTITION_COMPACTION const PxU32 lastIdx = 0; PxBitMap::PxCircularIterator iter(mIsDirtyNode, lastIdx); //for (PxU32 a = 0; a < mDirtyNodes.size(); ++a) const PxU32 MaxCount = 500; PxU32 count = 0; PxU32 dirtyIdx; while ((dirtyIdx = iter.getNext()) != PxBitMap::PxCircularIterator::DONE && (count++ < MaxCount)) { pullForwardConstraints(dirtyIdx); mIsDirtyNode.reset(dirtyIdx); } #endif if (mMaxSlabCount) { PxI32 l = 0; PxI32 r = PxI32(mMaxSlabCount - 1); while (l < r) { //Search for an empty element... while (l < r) { const Partition& partition = mPartitionSlabs[PxU32(l / 32)]->mPartitions[PxU32(l & 31)]; if ( partition.mPartitionIndices[PxgEdgeType::eCONTACT_MANAGER].size() == 0 && partition.mPartitionIndices[PxgEdgeType::eCONSTRAINT].size() == 0 && partition.mPartitionIndices[PxgEdgeType::eARTICULATION_CONTACT].size() == 0 && partition.mPartitionIndices[PxgEdgeType::eARTICULATION_CONSTRAINT].size() == 0) break; l++; } //Search for a non-empty element while (l < r) { const Partition& partition = mPartitionSlabs[PxU32(r / 32)]->mPartitions[PxU32(r & 31)]; if ( partition.mPartitionIndices[PxgEdgeType::eCONTACT_MANAGER].size() != 0 || partition.mPartitionIndices[PxgEdgeType::eCONSTRAINT].size() != 0 || partition.mPartitionIndices[PxgEdgeType::eARTICULATION_CONTACT].size() != 0 || partition.mPartitionIndices[PxgEdgeType::eARTICULATION_CONSTRAINT].size() != 0) break; r--; } if (l < r) { //Swap!!!! PartitionSlab* slab = mPartitionSlabs[PxU32(r / 32)]; PartitionSlab* writeSlab = mPartitionSlabs[PxU32(l / 32)]; Partition& partition = slab->mPartitions[r & 31]; for (PxU32 edgeType = 0; edgeType 0; --a) { const PartitionEdge* edge = mEdgeManager.getPartitionEdge(partition.mPartitionIndices[PxgEdgeType::Enum(edgeType)][a - 1]); removeEdgeInternal(slab, edge, PxU32(r & 31)); addEdgeInternal(edge, writeSlab, PxU16(l & 31), PxU16(l&(~31))); } } r--; } } mMaxSlabCount = PxU32(r + 1); } }