// Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of NVIDIA CORPORATION nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved. // Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. // Copyright (c) 2001-2004 NovodeX AG. All rights reserved. #include "GuAABBTreeBounds.h" #include "GuAABBTree.h" #include "GuAABBTreeBuildStats.h" #include "GuBounds.h" #include "GuAABBTreeNode.h" #include "GuSAH.h" #include "foundation/PxMathUtils.h" #include "foundation/PxFPU.h" #include "foundation/PxInlineArray.h" using namespace physx; using namespace Gu; /////////////////////////////////////////////////////////////////////////////// void AABBTreeBounds::init(PxU32 nbBounds, const PxBounds3* bounds) { PX_FREE(mBounds); // PT: we always allocate one extra box, to make sure we can safely use V4 loads on the array mBounds = PX_ALLOCATE(PxBounds3, (nbBounds + 1), "AABBTreeBounds"); if(bounds) PxMemCopy(mBounds, bounds, nbBounds*sizeof(PxBounds3)); } void AABBTreeBounds::resize(PxU32 newSize, PxU32 previousSize) { PxBounds3* newBounds = PX_ALLOCATE(PxBounds3, (newSize + 1), "AABBTreeBounds"); if(mBounds && previousSize) PxMemCopy(newBounds, mBounds, sizeof(PxBounds3)*previousSize); PX_FREE(mBounds); mBounds = newBounds; } void AABBTreeBounds::release() { if(!mUserAllocated) PX_FREE(mBounds); } /////////////////////////////////////////////////////////////////////////////// NodeAllocator::NodeAllocator() : mPool(NULL), mCurrentSlabIndex(0), mTotalNbNodes(0) { } NodeAllocator::~NodeAllocator() { release(); } void NodeAllocator::release() { const PxU32 nbSlabs = mSlabs.size(); for (PxU32 i = 0; imNodeIndex = 0; mPool->mNbPrimitives = nbPrimitives; mSlabs.pushBack(Slab(mPool, 1, estimatedFinalSize)); mCurrentSlabIndex = 0; mTotalNbNodes = 1; } // PT: TODO: inline this? AABBTreeBuildNode* NodeAllocator::getBiNode() { mTotalNbNodes += 2; Slab& currentSlab = mSlabs[mCurrentSlabIndex]; if (currentSlab.mNbUsedNodes + 2 <= currentSlab.mMaxNbNodes) { AABBTreeBuildNode* biNode = currentSlab.mPool + currentSlab.mNbUsedNodes; currentSlab.mNbUsedNodes += 2; return biNode; } else { // Allocate new slab const PxU32 size = 1024; AABBTreeBuildNode* pool = PX_NEW(AABBTreeBuildNode)[size]; PxMemZero(pool, sizeof(AABBTreeBuildNode)*size); mSlabs.pushBack(Slab(pool, 2, size)); mCurrentSlabIndex++; return pool; } } /////////////////////////////////////////////////////////////////////////////// PxU32 Gu::reshuffle(PxU32 nb, PxU32* const PX_RESTRICT prims, const PxVec3* PX_RESTRICT centers, float splitValue, PxU32 axis) { // PT: to avoid calling the unsafe [] operator const size_t ptrValue = size_t(centers) + axis*sizeof(float); const PxVec3* PX_RESTRICT centersX = reinterpret_cast(ptrValue); // Loop through all node-related primitives. Their indices range from mNodePrimitives[0] to mNodePrimitives[mNbPrimitives-1]. // Those indices map the global list in the tree builder. PxU32 nbPos = 0; for(PxU32 i=0; i splitValue) { // Swap entries prims[i] = prims[nbPos]; prims[nbPos] = index; // Count primitives assigned to positive space nbPos++; } } return nbPos; } static PxU32 split(const PxBounds3& box, PxU32 nb, PxU32* const PX_RESTRICT prims, PxU32 axis, const AABBTreeBuildParams& params) { // Get node split value float splitValue = 0.0f; //float defaultSplitValue = box.getCenter(axis); //(void)defaultSplitValue; if(params.mBuildStrategy==BVH_SPLATTER_POINTS_SPLIT_GEOM_CENTER) { // PT: experimental attempt at replicating BV4_SPLATTER_POINTS_SPLIT_GEOM_CENTER, but with boxes instead of triangles. const PxBounds3* bounds = params.mBounds->getBounds(); for(PxU32 i=0;igetBounds(); PX_ASSERT(boxes); PX_ASSERT(primitives); PX_ASSERT(nbPrims); Vec4V minV = V4LoadU(&boxes[primitives[0]].minimum.x); Vec4V maxV = V4LoadU(&boxes[primitives[0]].maximum.x); meansV = V4LoadU(¶ms.mCache[primitives[0]].x); for (PxU32 i = 1; iparams.mLimit) { nbPos = nbPrims >> 1; } else return; } // Now create children and assign their pointers. mPos = allocator.getBiNode(); stats.increaseCount(2); // Assign children PX_ASSERT(!isLeaf()); AABBTreeBuildNode* Pos = const_cast(mPos); AABBTreeBuildNode* Neg = Pos + 1; Pos->mNodeIndex = mNodeIndex; Pos->mNbPrimitives = nbPos; Neg->mNodeIndex = mNodeIndex + nbPos; Neg->mNbPrimitives = mNbPrimitives - nbPos; } void AABBTreeBuildNode::subdivideSAH(const AABBTreeBuildParams& params, SAH_Buffers& buffers, BuildStats& stats, NodeAllocator& allocator, PxU32* const indices) { PxU32* const PX_RESTRICT primitives = indices + mNodeIndex; const PxU32 nbPrims = mNbPrimitives; // Compute global box for current node. The box is stored in mBV. computeGlobalBox(mBV, nbPrims, params.mBounds->getBounds(), primitives); // Check the user-defined limit. Also ensures we stop subdividing if we reach a leaf node. if (nbPrims <= params.mLimit) return; ///// PxU32 leftCount; if(!buffers.split(leftCount, nbPrims, primitives, params.mBounds->getBounds(), params.mCache)) { // Invalid split => fallback to previous strategy subdivide(params, stats, allocator, indices); return; } ///// // Now create children and assign their pointers. mPos = allocator.getBiNode(); stats.increaseCount(2); // Assign children PX_ASSERT(!isLeaf()); AABBTreeBuildNode* Pos = const_cast(mPos); AABBTreeBuildNode* Neg = Pos + 1; Pos->mNodeIndex = mNodeIndex; Pos->mNbPrimitives = leftCount; Neg->mNodeIndex = mNodeIndex + leftCount; Neg->mNbPrimitives = mNbPrimitives - leftCount; } /////////////////////////////////////////////////////////////////////////////// static PxU32* initAABBTreeBuild(const AABBTreeBuildParams& params, NodeAllocator& nodeAllocator, BuildStats& stats) { const PxU32 numPrimitives = params.mNbPrimitives; if(!numPrimitives) return NULL; // Init stats stats.setCount(1); // Initialize indices. This list will be modified during build. PxU32* indices = PX_ALLOCATE(PxU32, numPrimitives, "AABB tree indices"); // Identity permutation for(PxU32 i=0;igetBounds(); const float half = 0.5f; const FloatV halfV = FLoad(half); for(PxU32 i=0;i BuildStack; static void buildHierarchy(AABBTreeBuildNode* root, const AABBTreeBuildParams& params, BuildStats& stats, NodeAllocator& nodeBase, PxU32* const indices, bool useSAH) { PxU32 nb = 1; BuildStack stack; stack.forceSize_Unsafe(DEFAULT_BUILD_STACK_SIZE); stack[0] = root; struct Local { static PX_FORCE_INLINE PxU32 pushBack(AABBTreeBuildNode* node, BuildStack& stack, PxU32 nb) { stack[nb++] = node; if(nb == stack.capacity()) stack.resizeUninitialized(stack.capacity() * 2); return nb; } static PX_FORCE_INLINE PxU32 processChildren(AABBTreeBuildNode* node, BuildStack& stack, PxU32 nb, BuildStats& stats) { stats.mTotalPrims += node->mNbPrimitives; if(!node->isLeaf()) { AABBTreeBuildNode* Pos = const_cast(node->getPos()); PX_ASSERT(Pos); nb = pushBack(Pos + 1, stack, nb); nb = pushBack(Pos, stack, nb); } return nb; } }; if(useSAH) { SAH_Buffers sah(params.mNbPrimitives); do { AABBTreeBuildNode* node = stack[--nb]; node->subdivideSAH(params, sah, stats, nodeBase, indices); nb = Local::processChildren(node, stack, nb, stats); }while(nb); } else { do { AABBTreeBuildNode* node = stack[--nb]; node->subdivide(params, stats, nodeBase, indices); nb = Local::processChildren(node, stack, nb, stats); }while(nb); } } PxU32* Gu::buildAABBTree(const AABBTreeBuildParams& params, NodeAllocator& nodeAllocator, BuildStats& stats) { PxU32* indices = initAABBTreeBuild(params, nodeAllocator, stats); if(!indices) return NULL; buildHierarchy(nodeAllocator.mPool, params, stats, nodeAllocator, indices, params.mBuildStrategy==BVH_SAH); return indices; } void Gu::flattenTree(const NodeAllocator& nodeAllocator, BVHNode* dest, const PxU32* remap) { // PT: gathers all build nodes allocated so far and flatten them to a linear destination array of smaller runtime nodes PxU32 offset = 0; const PxU32 nbSlabs = nodeAllocator.mSlabs.size(); for(PxU32 s=0;s= nodeAllocator.mSlabs[j].mPool && pool[i].mPos < nodeAllocator.mSlabs[j].mPool + nodeAllocator.mSlabs[j].mNbUsedNodes) { localNodeIndex = PxU32(pool[i].mPos - nodeAllocator.mSlabs[j].mPool); break; } nodeBase += nodeAllocator.mSlabs[j].mNbUsedNodes; } const PxU32 nodeIndex = nodeBase + localNodeIndex; dest[offset].mData = nodeIndex << 1; } offset++; } } } void Gu::buildAABBTree(PxU32 nbBounds, const AABBTreeBounds& bounds, PxArray& tree) { PX_SIMD_GUARD // build the BVH BuildStats stats; NodeAllocator nodeAllocator; PxU32* indices = buildAABBTree(AABBTreeBuildParams(1, nbBounds, &bounds), nodeAllocator, stats); PX_ASSERT(indices); // store the computed hierarchy tree.resize(stats.getCount()); PX_ASSERT(tree.size() == nodeAllocator.mTotalNbNodes); // store the results into BVHNode list flattenTree(nodeAllocator, tree.begin(), indices); PX_FREE(indices); // PT: we don't need the indices for a complete tree } /////////////////////////////////////////////////////////////////////////////// // Progressive building class Gu::FIFOStack : public PxUserAllocated { public: FIFOStack() : mStack("SQFIFOStack"), mCurIndex(0) {} ~FIFOStack() {} PX_FORCE_INLINE PxU32 getNbEntries() const { return mStack.size(); } PX_FORCE_INLINE void push(AABBTreeBuildNode* entry) { mStack.pushBack(entry); } bool pop(AABBTreeBuildNode*& entry); private: PxArray mStack; PxU32 mCurIndex; //!< Current index within the container }; bool Gu::FIFOStack::pop(AABBTreeBuildNode*& entry) { const PxU32 NbEntries = mStack.size(); // Get current number of entries if (!NbEntries) return false; // Can be NULL when no value has been pushed. This is an invalid pop call. entry = mStack[mCurIndex++]; // Get oldest entry, move to next one if (mCurIndex == NbEntries) { // All values have been poped mStack.clear(); mCurIndex = 0; } return true; } //~Progressive building /////////////////////////////////////////////////////////////////////////////// BVHPartialRefitData::BVHPartialRefitData() : mParentIndices(NULL), mUpdateMap(NULL), mRefitHighestSetWord(0) { } BVHPartialRefitData::~BVHPartialRefitData() { releasePartialRefitData(true); } void BVHPartialRefitData::releasePartialRefitData(bool clearRefitMap) { PX_FREE(mParentIndices); PX_FREE(mUpdateMap); if(clearRefitMap) mRefitBitmask.clearAll(); mRefitHighestSetWord = 0; } static void createParentArray(PxU32 totalNbNodes, PxU32* parentIndices, const BVHNode* parentNode, const BVHNode* currentNode, const BVHNode* root) { const PxU32 parentIndex = PxU32(parentNode - root); const PxU32 currentIndex = PxU32(currentNode - root); PX_ASSERT(parentIndexisLeaf()) { createParentArray(totalNbNodes, parentIndices, currentNode, currentNode->getPos(root), root); createParentArray(totalNbNodes, parentIndices, currentNode, currentNode->getNeg(root), root); } } PxU32* BVHPartialRefitData::getParentIndices() { // PT: lazy-create parent array. Memory is not wasted for purely static trees, or dynamic trees that only do "full refit". if(!mParentIndices) { mParentIndices = PX_ALLOCATE(PxU32, mNbNodes, "AABB parent indices"); createParentArray(mNbNodes, mParentIndices, mNodes, mNodes, mNodes); } return mParentIndices; } void BVHPartialRefitData::createUpdateMap(PxU32 nbObjects) { // PT: we need an "update map" for PxBVH // PT: TODO: consider refactoring with the AABBtree version PX_FREE(mUpdateMap); if(!nbObjects) return; mUpdateMap = PX_ALLOCATE(PxU32, nbObjects, "UpdateMap"); PxMemSet(mUpdateMap, 0xff, sizeof(PxU32)*nbObjects); const PxU32 nbNodes = mNbNodes; const BVHNode* nodes = mNodes; const PxU32* indices = mIndices; for(TreeNodeIndex i=0;i>5) + ((nb_bits&31) ? 1 : 0); } bool BitArray::init(PxU32 nb_bits) { mSize = BitsToDwords(nb_bits); // Get ram for n bits PX_FREE(mBits); mBits = PX_ALLOCATE(PxU32, mSize, "BitArray::mBits"); // Set all bits to 0 clearAll(); return true; } void BitArray::resize(PxU32 maxBitNumber) { const PxU32 newSize = BitsToDwords(maxBitNumber); if (newSize <= mSize) return; PxU32* newBits = PX_ALLOCATE(PxU32, newSize, "BitArray::mBits"); PxMemZero(newBits + mSize, (newSize - mSize) * sizeof(PxU32)); PxMemCopy(newBits, mBits, mSize*sizeof(PxU32)); PX_FREE(mBits); mBits = newBits; mSize = newSize; } /////////////////////////////////////////////////////////////////////////////// static PX_FORCE_INLINE PxU32 getNbPrimitives(PxU32 data) { return (data>>1)&15; } static PX_FORCE_INLINE const PxU32* getPrimitives(const PxU32* base, PxU32 data) { return base + (data>>5); } static PX_FORCE_INLINE const BVHNode* getPos(const BVHNode* base, PxU32 data) { return base + (data>>1); } static PX_FORCE_INLINE PxU32 isLeaf(PxU32 data) { return data&1; } template static PX_FORCE_INLINE void refitNode(BVHNode* PX_RESTRICT current, const PxBounds3* PX_RESTRICT boxes, const PxU32* PX_RESTRICT indices, BVHNode* PX_RESTRICT const nodeBase) { // PT: we can safely use V4 loads on both boxes and nodes here: // - it's safe on boxes because we allocated one extra box in the pruning pool // - it's safe on nodes because there's always some data within the node, after the BV const PxU32 data = current->mData; Vec4V resultMinV, resultMaxV; if(isLeaf(data)) { const PxU32 nbPrims = getNbPrimitives(data); if(nbPrims) { if(hasIndices) { const PxU32* primitives = getPrimitives(indices, data); resultMinV = V4LoadU(&boxes[*primitives].minimum.x); resultMaxV = V4LoadU(&boxes[*primitives].maximum.x); if(nbPrims>1) { const PxU32* last = primitives + nbPrims; primitives++; while(primitives!=last) { resultMinV = V4Min(resultMinV, V4LoadU(&boxes[*primitives].minimum.x)); resultMaxV = V4Max(resultMaxV, V4LoadU(&boxes[*primitives].maximum.x)); primitives++; } } } else { PX_ASSERT(nbPrims==1); const PxU32 primIndex = data>>5; resultMinV = V4LoadU(&boxes[primIndex].minimum.x); resultMaxV = V4LoadU(&boxes[primIndex].maximum.x); } } else { // Might happen after a node has been invalidated const float max = GU_EMPTY_BOUNDS_EXTENTS; resultMinV = V4Load(max); resultMaxV = V4Load(-max); } } else { const BVHNode* pos = getPos(nodeBase, data); const BVHNode* neg = pos+1; const PxBounds3& posBox = pos->mBV; const PxBounds3& negBox = neg->mBV; resultMinV = V4Min(V4LoadU(&posBox.minimum.x), V4LoadU(&negBox.minimum.x)); // resultMaxV = V4Max(V4LoadU(&posBox.maximum.x), V4LoadU(&negBox.maximum.x)); #if PX_INTEL_FAMILY && !defined(PX_SIMD_DISABLED) Vec4V posMinV = V4LoadU(&posBox.minimum.z); Vec4V negMinV = V4LoadU(&negBox.minimum.z); posMinV = _mm_shuffle_ps(posMinV, posMinV, _MM_SHUFFLE(0, 3, 2, 1)); negMinV = _mm_shuffle_ps(negMinV, negMinV, _MM_SHUFFLE(0, 3, 2, 1)); resultMaxV = V4Max(posMinV, negMinV); #else // PT: fixes the perf issue but not really convincing resultMaxV = Vec4V_From_Vec3V(V3Max(V3LoadU(&posBox.maximum.x), V3LoadU(&negBox.maximum.x))); #endif } // PT: the V4 stores overwrite the data after the BV, but we just put it back afterwards V4StoreU(resultMinV, ¤t->mBV.minimum.x); V4StoreU(resultMaxV, ¤t->mBV.maximum.x); current->mData = data; } template static void refitLoop(const PxBounds3* PX_RESTRICT boxes, BVHNode* const PX_RESTRICT nodeBase, const PxU32* PX_RESTRICT indices, PxU32 nbNodes) { PX_ASSERT(boxes); PX_ASSERT(nodeBase); // Bottom-up update PxU32 index = nbNodes; while(index--) { BVHNode* current = nodeBase + index; if(index) PxPrefetch(current - 1); // PxBounds3 before = current->mBV; if(hasIndices) refitNode<1>(current, boxes, indices, nodeBase); else refitNode<0>(current, boxes, indices, nodeBase); // if(current->mBV.minimum==before.minimum && current->mBV.maximum==before.maximum) // break; } } void BVHCoreData::fullRefit(const PxBounds3* boxes) { if(mIndices) refitLoop<1>(boxes, mNodes, mIndices, mNbNodes); else refitLoop<0>(boxes, mNodes, mIndices, mNbNodes); } void BVHPartialRefitData::markNodeForRefit(TreeNodeIndex nodeIndex) { BitArray* PX_RESTRICT refitBitmask = &mRefitBitmask; if(!refitBitmask->getBits()) refitBitmask->init(mNbNodes); PX_ASSERT(nodeIndexisSet(currentIndex)) { // We can early exit if we already visited the node! goto Exit; } else { refitBitmask->setBit(currentIndex); const PxU32 currentMarkedWord = currentIndex>>5; refitHighestSetWord = PxMax(refitHighestSetWord, currentMarkedWord); const PxU32 parentIndex = parentIndices[currentIndex]; PX_ASSERT(parentIndex == 0 || parentIndex < currentIndex); if(currentIndex == parentIndex) break; currentIndex = parentIndex; } } Exit: mRefitHighestSetWord = refitHighestSetWord; } #define FIRST_VERSION #ifdef FIRST_VERSION template static void refitMarkedLoop(const PxBounds3* PX_RESTRICT boxes, BVHNode* const PX_RESTRICT nodeBase, const PxU32* PX_RESTRICT indices, PxU32* PX_RESTRICT bits, PxU32 nbToGo) { #if PX_DEBUG PxU32 nbRefit=0; PX_UNUSED(nbRefit); #endif PxU32 size = nbToGo; while(size--) { // Test 32 bits at a time const PxU32 currentBits = bits[size]; if(!currentBits) continue; PxU32 index = (size+1)<<5; PxU32 mask = PxU32(1<<((index-1)&31)); PxU32 count=32; while(count--) { index--; PxPrefetch(nodeBase + index); PX_ASSERT(size==index>>5); PX_ASSERT(mask==PxU32(1<<(index&31))); if(currentBits & mask) { if(hasIndices) refitNode<1>(nodeBase + index, boxes, indices, nodeBase); else refitNode<0>(nodeBase + index, boxes, indices, nodeBase); #if PX_DEBUG nbRefit++; #endif } mask>>=1; } bits[size] = 0; } } void BVHPartialRefitData::refitMarkedNodes(const PxBounds3* boxes) { if(!mRefitBitmask.getBits()) return; // No refit needed { /*const*/ PxU32* bits = const_cast(mRefitBitmask.getBits()); PxU32 size = mRefitHighestSetWord+1; #if PX_DEBUG if(1) { const PxU32 totalSize = mRefitBitmask.getSize(); for(PxU32 i=size;i(boxes, mNodes, mIndices, bits, size); else refitMarkedLoop<0>(boxes, mNodes, mIndices, bits, size); mRefitHighestSetWord = 0; // mRefitBitmask.clearAll(); } } #endif //#define SECOND_VERSION #ifdef SECOND_VERSION void BVHPartialRefitData::refitMarkedNodes(const PxBounds3* boxes) { /*const*/ PxU32* bits = const_cast(mRefitBitmask.getBits()); if(!bits) return; // No refit needed const PxU32 lastSetBit = mRefitBitmask.findLast(); const PxU32* indices = mIndices; BVHNode* const nodeBase = mNodes; // PT: ### bitmap iterator pattern for(PxU32 w = 0; w <= lastSetBit >> 5; ++w) { for(PxU32 b = bits[w]; b; b &= b-1) { const PxU32 index = (PxU32)(w<<5|PxLowestSetBit(b)); while(size--) { // Test 32 bits at a time const PxU32 currentBits = bits[size]; if(!currentBits) continue; PxU32 index = (size+1)<<5; PxU32 mask = PxU32(1<<((index-1)&31)); PxU32 count=32; while(count--) { index--; PxPrefetch(nodeBase + index); PX_ASSERT(size==index>>5); PX_ASSERT(mask==PxU32(1<<(index&31))); if(currentBits & mask) { refitNode(nodeBase + index, boxes, indices, nodeBase); #if PX_DEBUG nbRefit++; #endif } mask>>=1; } bits[size] = 0; } mRefitHighestSetWord = 0; // mRefitBitmask.clearAll(); } } #endif /////////////////////////////////////////////////////////////////////////////// AABBTree::AABBTree() : mTotalPrims(0) { // Progressive building mStack = NULL; //~Progressive building } AABBTree::~AABBTree() { release(false); } void AABBTree::release(bool clearRefitMap) { // Progressive building PX_DELETE(mStack); //~Progressive building releasePartialRefitData(clearRefitMap); // PT: TODO: move some to BVHCoreData dtor PX_DELETE_ARRAY(mNodes); PX_FREE(mIndices); mNbNodes = 0; mNbIndices = 0; } // Initialize nodes/indices from the input tree merge data void AABBTree::initTree(const AABBTreeMergeData& tree) { PX_ASSERT(mIndices == NULL); PX_ASSERT(mNodes == NULL); PX_ASSERT(mParentIndices == NULL); // allocate,copy indices mIndices = PX_ALLOCATE(PxU32, tree.mNbIndices, "AABB tree indices"); mNbIndices = tree.mNbIndices; PxMemCopy(mIndices, tree.mIndices, sizeof(PxU32)*tree.mNbIndices); // allocate,copy nodes mNodes = PX_NEW(BVHNode)[tree.mNbNodes]; mNbNodes = tree.mNbNodes; PxMemCopy(mNodes, tree.mNodes, sizeof(BVHNode)*tree.mNbNodes); } // Shift indices of the tree by offset. Used for merged trees, when initial indices needs to be shifted to match indices in current pruning pool void AABBTree::shiftIndices(PxU32 offset) { for (PxU32 i = 0; i < mNbIndices; i++) { mIndices[i] += offset; } } bool AABBTree::buildInit(const AABBTreeBuildParams& params, NodeAllocator& nodeAllocator, BuildStats& stats) { // Checkings const PxU32 nbPrimitives = params.mNbPrimitives; if(!nbPrimitives) return false; // Release previous tree release(); // Initialize indices. This list will be modified during build. mNbIndices = nbPrimitives; PxU32* indices = initAABBTreeBuild(params, nodeAllocator, stats); if(!indices) return false; PX_ASSERT(!mIndices); mIndices = indices; return true; } void AABBTree::buildEnd(const AABBTreeBuildParams& params, NodeAllocator& nodeAllocator, const BuildStats& stats) { PX_FREE(params.mCache); // Get back total number of nodes mNbNodes = stats.getCount(); mTotalPrims = stats.mTotalPrims; mNodes = PX_NEW(BVHNode)[mNbNodes]; PX_ASSERT(mNbNodes==nodeAllocator.mTotalNbNodes); flattenTree(nodeAllocator, mNodes); nodeAllocator.release(); } bool AABBTree::build(const AABBTreeBuildParams& params, NodeAllocator& nodeAllocator) { const PxU32 nbPrimitives = params.mNbPrimitives; if(!nbPrimitives) return false; // Release previous tree release(); BuildStats stats; mNbIndices = nbPrimitives; mIndices = buildAABBTree(params, nodeAllocator, stats); if(!mIndices) return false; buildEnd(params, nodeAllocator, stats); return true; } void AABBTree::shiftOrigin(const PxVec3& shift) { BVHNode* const nodeBase = mNodes; const PxU32 totalNbNodes = mNbNodes; for(PxU32 i=0; isubdivide(params, stats, nodeBase, indices); if(!node->isLeaf()) { AABBTreeBuildNode* pos = const_cast(node->getPos()); PX_ASSERT(pos); AABBTreeBuildNode* neg = pos + 1; stack.push(neg); stack.push(pos); } stats.mTotalPrims += node->mNbPrimitives; return node->mNbPrimitives; } PxU32 AABBTree::progressiveBuild(const AABBTreeBuildParams& params, NodeAllocator& nodeAllocator, BuildStats& stats, PxU32 progress, PxU32 limit) { if(progress==0) { if(!buildInit(params, nodeAllocator, stats)) return PX_INVALID_U32; mStack = PX_NEW(FIFOStack); mStack->push(nodeAllocator.mPool); return progress++; } else if(progress==1) { PxU32 stackCount = mStack->getNbEntries(); if(stackCount) { PxU32 Total = 0; const PxU32 Limit = limit; while(Totalpop(Entry)) Total += incrementalBuildHierarchy(*mStack, Entry, params, stats, nodeAllocator, mIndices); else break; } return progress; } buildEnd(params, nodeAllocator, stats); PX_DELETE(mStack); return 0; // Done! } return PX_INVALID_U32; } //~Progressive building PX_FORCE_INLINE static void setLeafData(PxU32& leafData, const BVHNode& node, const PxU32 indicesOffset) { const PxU32 index = indicesOffset + (node.mData >> 5); const PxU32 nbPrims = node.getNbPrimitives(); PX_ASSERT(nbPrims < 16); leafData = (index << 5) | ((nbPrims & 15) << 1) | 1; } // Copy the tree into nodes. Update node indices, leaf indices. void AABBTree::addRuntimeChilds(PxU32& nodeIndex, const AABBTreeMergeData& treeParams) { PX_ASSERT(nodeIndex < mNbNodes + treeParams.mNbNodes + 1); const PxU32 baseNodeIndex = nodeIndex; // copy the src tree into dest tree nodes, update its data for (PxU32 i = 0; i < treeParams.mNbNodes; i++) { PX_ASSERT(nodeIndex < mNbNodes + treeParams.mNbNodes + 1); mNodes[nodeIndex].mBV = treeParams.mNodes[i].mBV; if (treeParams.mNodes[i].isLeaf()) { setLeafData(mNodes[nodeIndex].mData, treeParams.mNodes[i], mNbIndices); } else { const PxU32 srcNodeIndex = baseNodeIndex + (treeParams.mNodes[i].getPosIndex()); mNodes[nodeIndex].mData = srcNodeIndex << 1; mParentIndices[srcNodeIndex] = nodeIndex; mParentIndices[srcNodeIndex + 1] = nodeIndex; } nodeIndex++; } } // Merge tree into targetNode, where target node is a leaf // 1. Allocate new nodes/parent, copy all the nodes/parents // 2. Create new node at the end, copy the data from target node // 3. Copy the merge tree after the new node, create the parent map for them, update the leaf indices // Schematic view: // Target Nodes: ...Tn... // Input tree: R1->Rc0, Rc1... // Merged tree: ...Tnc->...->Nc0,R1->Rc0,Rc1... // where new node: Nc0==Tn and Tnc is not a leaf anymore and points to Nc0 void AABBTree::mergeRuntimeLeaf(BVHNode& targetNode, const AABBTreeMergeData& treeParams, PxU32 targetMergeNodeIndex) { PX_ASSERT(mParentIndices); PX_ASSERT(targetNode.isLeaf()); // 1. Allocate new nodes/parent, copy all the nodes/parents // allocate new runtime pool with max combine number of nodes // we allocate only 1 additional node each merge BVHNode* newRuntimePool = PX_NEW(BVHNode)[mNbNodes + treeParams.mNbNodes + 1]; PxU32* newParentIndices = PX_ALLOCATE(PxU32, (mNbNodes + treeParams.mNbNodes + 1), "AABB parent indices"); // copy the whole target nodes, we will add the new node at the end together with the merge tree PxMemCopy(newRuntimePool, mNodes, sizeof(BVHNode)*(mNbNodes)); PxMemCopy(newParentIndices, mParentIndices, sizeof(PxU32)*(mNbNodes)); // 2. Create new node at the end, copy the data from target node PxU32 nodeIndex = mNbNodes; // copy the targetNode at the end of the new nodes newRuntimePool[nodeIndex].mBV = targetNode.mBV; newRuntimePool[nodeIndex].mData = targetNode.mData; // update the parent information newParentIndices[nodeIndex] = targetMergeNodeIndex; // mark for refit if (mRefitBitmask.getBits() && mRefitBitmask.isSet(targetMergeNodeIndex)) { mRefitBitmask.setBit(nodeIndex); const PxU32 currentMarkedWord = nodeIndex >> 5; mRefitHighestSetWord = PxMax(mRefitHighestSetWord, currentMarkedWord); } // swap pointers PX_DELETE_ARRAY(mNodes); mNodes = newRuntimePool; PX_FREE(mParentIndices); mParentIndices = newParentIndices; // 3. Copy the merge tree after the new node, create the parent map for them, update the leaf indices nodeIndex++; addRuntimeChilds(nodeIndex, treeParams); PX_ASSERT(nodeIndex == mNbNodes + 1 + treeParams.mNbNodes); // update the parent information for the input tree root node mParentIndices[mNbNodes + 1] = targetMergeNodeIndex; // fix the child information for the target node, was a leaf before mNodes[targetMergeNodeIndex].mData = mNbNodes << 1; // update the total number of nodes mNbNodes = mNbNodes + 1 + treeParams.mNbNodes; } // Merge tree into targetNode, where target node is not a leaf // 1. Allocate new nodes/parent, copy the nodes/parents till targetNodePosIndex // 2. Create new node , copy the data from target node // 3. Copy the rest of the target tree nodes/parents at the end -> targetNodePosIndex + 1 + treeParams.mNbNodes // 4. Copy the merge tree after the new node, create the parent map for them, update the leaf indices // 5. Go through the nodes copied at the end and fix the parents/childs // Schematic view: // Target Nodes: ...Tn->...->Tc0,Tc1... // Input tree: R1->Rc0, Rc1... // Merged tree: ...Tn->...->Nc0,R1->Rc0,Rc1...,Tc0,Tc1... // where new node: Nc0->...->Tc0,Tc1 void AABBTree::mergeRuntimeNode(BVHNode& targetNode, const AABBTreeMergeData& treeParams, PxU32 targetMergeNodeIndex) { PX_ASSERT(mParentIndices); PX_ASSERT(!targetNode.isLeaf()); // Get the target node child pos, this is where we insert the new node and the input tree const PxU32 targetNodePosIndex = targetNode.getPosIndex(); // 1. Allocate new nodes/parent, copy the nodes/parents till targetNodePosIndex // allocate new runtime pool with max combine number of nodes // we allocate only 1 additional node each merge BVHNode* newRuntimePool = PX_NEW(BVHNode)[mNbNodes + treeParams.mNbNodes + 1]; PxU32* newParentIndices = PX_ALLOCATE(PxU32, (mNbNodes + treeParams.mNbNodes + 1), "AABB parent indices"); // copy the untouched part of the nodes and parents PxMemCopy(newRuntimePool, mNodes, sizeof(BVHNode)*(targetNodePosIndex)); PxMemCopy(newParentIndices, mParentIndices, sizeof(PxU32)*(targetNodePosIndex)); PxU32 nodeIndex = targetNodePosIndex; // 2. Create new node , copy the data from target node newRuntimePool[nodeIndex].mBV = targetNode.mBV; newRuntimePool[nodeIndex].mData = ((targetNode.mData >> 1) + 1 + treeParams.mNbNodes) << 1; // update parent information newParentIndices[nodeIndex] = targetMergeNodeIndex; // handle mark for refit if(mRefitBitmask.getBits() && mRefitBitmask.isSet(targetMergeNodeIndex)) { mRefitBitmask.setBit(nodeIndex); const PxU32 currentMarkedWord = nodeIndex >> 5; mRefitHighestSetWord = PxMax(mRefitHighestSetWord, currentMarkedWord); } // 3. Copy the rest of the target tree nodes/parents at the end -> targetNodePosIndex + 1 + treeParams.mNbNodes if(mNbNodes - targetNodePosIndex) { PX_ASSERT(mNbNodes - targetNodePosIndex > 0); PxMemCopy(newRuntimePool + targetNodePosIndex + 1 + treeParams.mNbNodes, mNodes + targetNodePosIndex, sizeof(BVHNode)*(mNbNodes - targetNodePosIndex)); PxMemCopy(newParentIndices + targetNodePosIndex + 1 + treeParams.mNbNodes, mParentIndices + targetNodePosIndex, sizeof(PxU32)*(mNbNodes - targetNodePosIndex)); } // swap the pointers, release the old memory PX_DELETE_ARRAY(mNodes); mNodes = newRuntimePool; PX_FREE(mParentIndices); mParentIndices = newParentIndices; // 4. Copy the merge tree after the new node, create the parent map for them, update the leaf indices nodeIndex++; addRuntimeChilds(nodeIndex, treeParams); PX_ASSERT(nodeIndex == targetNodePosIndex + 1 + treeParams.mNbNodes); // update the total number of nodes mNbNodes = mNbNodes + 1 + treeParams.mNbNodes; // update the parent information for the input tree root node mParentIndices[targetNodePosIndex + 1] = targetMergeNodeIndex; // 5. Go through the nodes copied at the end and fix the parents/childs for (PxU32 i = targetNodePosIndex + 1 + treeParams.mNbNodes; i < mNbNodes; i++) { // check if the parent is the targetNode, if yes update the parent to new node if(mParentIndices[i] == targetMergeNodeIndex) { mParentIndices[i] = targetNodePosIndex; } else { // if parent node has been moved, update the parent node if(mParentIndices[i] >= targetNodePosIndex) { mParentIndices[i] = mParentIndices[i] + 1 + treeParams.mNbNodes; } else { // if parent has not been moved, update its child information const PxU32 parentIndex = mParentIndices[i]; // update the child information to point to Pos child if(i % 2 != 0) { const PxU32 srcNodeIndex = mNodes[parentIndex].getPosIndex(); // if child index points to a node that has been moved, update the child index PX_ASSERT(!mNodes[parentIndex].isLeaf()); PX_ASSERT(srcNodeIndex > targetNodePosIndex); mNodes[parentIndex].mData = (1 + treeParams.mNbNodes + srcNodeIndex) << 1; } } } if(!mNodes[i].isLeaf()) { // update the child node index const PxU32 srcNodeIndex = 1 + treeParams.mNbNodes + mNodes[i].getPosIndex(); mNodes[i].mData = srcNodeIndex << 1; } } } // traverse the target node, the tree is inside the targetNode, and find the best place where merge the tree void AABBTree::traverseRuntimeNode(BVHNode& targetNode, const AABBTreeMergeData& treeParams, PxU32 nodeIndex) { const BVHNode& srcNode = treeParams.getRootNode(); PX_ASSERT(srcNode.mBV.isInside(targetNode.mBV)); // Check if the srcNode(tree) can fit inside any of the target childs. If yes, traverse the target tree child BVHNode& targetPosChild = *targetNode.getPos(mNodes); if(srcNode.mBV.isInside(targetPosChild.mBV)) { return traverseRuntimeNode(targetPosChild, treeParams, targetNode.getPosIndex()); } BVHNode& targetNegChild = *targetNode.getNeg(mNodes); if (srcNode.mBV.isInside(targetNegChild.mBV)) { return traverseRuntimeNode(targetNegChild, treeParams, targetNode.getNegIndex()); } // we cannot traverse target anymore, lets add the srcTree to current target node if(targetNode.isLeaf()) mergeRuntimeLeaf(targetNode, treeParams, nodeIndex); else mergeRuntimeNode(targetNode, treeParams, nodeIndex); } // Merge the input tree into current tree. // Traverse the tree and find the smallest node, where the whole new tree fits. When we find the node // we create one new node pointing to the original children and the to the input tree root. void AABBTree::mergeTree(const AABBTreeMergeData& treeParams) { // allocate new indices buffer PxU32* newIndices = PX_ALLOCATE(PxU32, (mNbIndices + treeParams.mNbIndices), "AABB tree indices"); PxMemCopy(newIndices, mIndices, sizeof(PxU32)*mNbIndices); PX_FREE(mIndices); mIndices = newIndices; mTotalPrims += treeParams.mNbIndices; // copy the new indices, re-index using the provided indicesOffset. Note that indicesOffset // must be provided, as original mNbIndices can be different than indicesOffset dues to object releases. for (PxU32 i = 0; i < treeParams.mNbIndices; i++) { mIndices[mNbIndices + i] = treeParams.mIndicesOffset + treeParams.mIndices[i]; } // check the mRefitBitmask if we fit all the new nodes mRefitBitmask.resize(mNbNodes + treeParams.mNbNodes + 1); // create the parent information so we can update it getParentIndices(); // if new tree is inside the root AABB we will traverse the tree to find better node where to attach the tree subnodes // if the root is a leaf we merge with the root. if(treeParams.getRootNode().mBV.isInside(mNodes[0].mBV) && !mNodes[0].isLeaf()) { traverseRuntimeNode(mNodes[0], treeParams, 0); } else { if(mNodes[0].isLeaf()) { mergeRuntimeLeaf(mNodes[0], treeParams, 0); } else { mergeRuntimeNode(mNodes[0], treeParams, 0); } // increase the tree root AABB mNodes[0].mBV.include(treeParams.getRootNode().mBV); } #if PX_DEBUG //verify parent indices for (PxU32 i = 0; i < mNbNodes; i++) { if (i) { PX_ASSERT(mNodes[mParentIndices[i]].getPosIndex() == i || mNodes[mParentIndices[i]].getNegIndex() == i); } if (!mNodes[i].isLeaf()) { PX_ASSERT(mParentIndices[mNodes[i].getPosIndex()] == i); PX_ASSERT(mParentIndices[mNodes[i].getNegIndex()] == i); } } // verify the tree nodes, leafs for (PxU32 i = 0; i < mNbNodes; i++) { if (mNodes[i].isLeaf()) { const PxU32 index = mNodes[i].mData >> 5; const PxU32 nbPrim = mNodes[i].getNbPrimitives(); PX_ASSERT(index + nbPrim <= mNbIndices + treeParams.mNbIndices); } else { const PxU32 nodeIndex = (mNodes[i].getPosIndex()); PX_ASSERT(nodeIndex < mNbNodes); } } #endif // PX_DEBUG mNbIndices += treeParams.mNbIndices; } void TinyBVH::constructFromTriangles(const PxU32* triangles, const PxU32 numTriangles, const PxVec3* points, TinyBVH& result, PxF32 enlargement) { //Computes a bounding box for every triangle in triangles Gu::AABBTreeBounds boxes; boxes.init(numTriangles); for (PxU32 i = 0; i < numTriangles; ++i) { const PxU32* tri = &triangles[3 * i]; PxBounds3 box = PxBounds3::empty(); box.include(points[tri[0]]); box.include(points[tri[1]]); box.include(points[tri[2]]); box.fattenFast(enlargement); boxes.getBounds()[i] = box; } Gu::buildAABBTree(numTriangles, boxes, result.mTree); } void TinyBVH::constructFromTetrahedra(const PxU32* tetrahedra, const PxU32 numTetrahedra, const PxVec3* points, TinyBVH& result, PxF32 enlargement) { //Computes a bounding box for every tetrahedron in tetrahedra Gu::AABBTreeBounds boxes; boxes.init(numTetrahedra); for (PxU32 i = 0; i < numTetrahedra; ++i) { const PxU32* tri = &tetrahedra[4 * i]; PxBounds3 box = PxBounds3::empty(); box.include(points[tri[0]]); box.include(points[tri[1]]); box.include(points[tri[2]]); box.include(points[tri[3]]); box.fattenFast(enlargement); boxes.getBounds()[i] = box; } Gu::buildAABBTree(numTetrahedra, boxes, result.mTree); }